From 3bbf346d9ca984faa0c3e67cd1387a13b2bd1e37 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 16 Mar 2022 18:20:50 +0900 Subject: [PATCH 001/535] [SPARK-38563][PYTHON] Upgrade to Py4J 0.10.9.4 ### What changes were proposed in this pull request? This PR upgrade Py4J 0.10.9.4, with relevant documentation changes. ### Why are the changes needed? Py4J 0.10.9.3 has a resource leak issue when pinned thread mode is enabled - it's enabled by default in PySpark at https://github.com/apache/spark/commit/41af409b7bcfe1b3960274c0b3085bcc1f9d1c98. We worked around this by enforcing users to use `InheritableThread` or `inhteritable_thread_target` as a workaround. After upgrading, we don't need to enforce users anymore because it automatically cleans up, see also https://github.com/py4j/py4j/pull/471 ### Does this PR introduce _any_ user-facing change? Yes, users don't have to use `InheritableThread` or `inhteritable_thread_target` to avoid resource leaking problem anymore. ### How was this patch tested? CI in this PR should test it out. Closes #35871 from HyukjinKwon/SPARK-38563. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 8193b405f02f867439dd2d2017bf7b3c814b5cc8) Signed-off-by: Hyukjin Kwon --- bin/pyspark | 2 +- bin/pyspark2.cmd | 2 +- core/pom.xml | 2 +- .../apache/spark/api/python/PythonUtils.scala | 2 +- dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +- docs/job-scheduling.md | 2 +- python/docs/Makefile | 2 +- python/docs/make2.bat | 2 +- .../docs/source/getting_started/install.rst | 2 +- python/lib/py4j-0.10.9.3-src.zip | Bin 42021 -> 0 bytes python/lib/py4j-0.10.9.4-src.zip | Bin 0 -> 42404 bytes python/pyspark/context.py | 6 +-- python/pyspark/util.py | 35 +++--------------- python/setup.py | 2 +- sbin/spark-config.sh | 2 +- 16 files changed, 20 insertions(+), 45 deletions(-) delete mode 100644 python/lib/py4j-0.10.9.3-src.zip create mode 100644 python/lib/py4j-0.10.9.4-src.zip diff --git a/bin/pyspark b/bin/pyspark index 4840589ffb7bd..1e16c56bc9724 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -50,7 +50,7 @@ export PYSPARK_DRIVER_PYTHON_OPTS # Add the PySpark classes to the Python path: export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH" -export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.3-src.zip:$PYTHONPATH" +export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.4-src.zip:$PYTHONPATH" # Load the PySpark shell.py script when ./pyspark is used interactively: export OLD_PYTHONSTARTUP="$PYTHONSTARTUP" diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd index a19627a3b220a..f20c320494757 100644 --- a/bin/pyspark2.cmd +++ b/bin/pyspark2.cmd @@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" ( ) set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH% -set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9.3-src.zip;%PYTHONPATH% +set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9.4-src.zip;%PYTHONPATH% set OLD_PYTHONSTARTUP=%PYTHONSTARTUP% set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py diff --git a/core/pom.xml b/core/pom.xml index 9d3b1709af2ac..953c76b73469f 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -423,7 +423,7 @@ net.sf.py4j py4j - 0.10.9.3 + 0.10.9.4 org.apache.spark diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala index 8daba86758412..a9c353691b466 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala @@ -27,7 +27,7 @@ import org.apache.spark.SparkContext import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} private[spark] object PythonUtils { - val PY4J_ZIP_NAME = "py4j-0.10.9.3-src.zip" + val PY4J_ZIP_NAME = "py4j-0.10.9.4-src.zip" /** Get the PYTHONPATH for PySpark, either from SPARK_HOME, if it is set, or from our JAR */ def sparkPythonPath: String = { diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index bcbf8b9908ae5..f2db663550407 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -233,7 +233,7 @@ parquet-hadoop/1.12.2//parquet-hadoop-1.12.2.jar parquet-jackson/1.12.2//parquet-jackson-1.12.2.jar pickle/1.2//pickle-1.2.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar -py4j/0.10.9.3//py4j-0.10.9.3.jar +py4j/0.10.9.4//py4j-0.10.9.4.jar remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar rocksdbjni/6.20.3//rocksdbjni-6.20.3.jar scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 8ca7880c7a34d..c56b4c9bb6826 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -221,7 +221,7 @@ parquet-hadoop/1.12.2//parquet-hadoop-1.12.2.jar parquet-jackson/1.12.2//parquet-jackson-1.12.2.jar pickle/1.2//pickle-1.2.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar -py4j/0.10.9.3//py4j-0.10.9.3.jar +py4j/0.10.9.4//py4j-0.10.9.4.jar remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar rocksdbjni/6.20.3//rocksdbjni-6.20.3.jar scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md index 4ed2aa9112224..f44ed8245e286 100644 --- a/docs/job-scheduling.md +++ b/docs/job-scheduling.md @@ -304,5 +304,5 @@ via `sc.setJobGroup` in a separate PVM thread, which also disallows to cancel th later. `pyspark.InheritableThread` is recommended to use together for a PVM thread to inherit the inheritable attributes - such as local properties in a JVM thread, and to avoid resource leak. + such as local properties in a JVM thread. diff --git a/python/docs/Makefile b/python/docs/Makefile index 9cb1a17ef584f..2628530cb20b3 100644 --- a/python/docs/Makefile +++ b/python/docs/Makefile @@ -21,7 +21,7 @@ SPHINXBUILD ?= sphinx-build SOURCEDIR ?= source BUILDDIR ?= build -export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9.3-src.zip) +export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9.4-src.zip) # Put it first so that "make" without argument is like "make help". help: diff --git a/python/docs/make2.bat b/python/docs/make2.bat index 2e4e2b543ab24..26ef220309c48 100644 --- a/python/docs/make2.bat +++ b/python/docs/make2.bat @@ -25,7 +25,7 @@ if "%SPHINXBUILD%" == "" ( set SOURCEDIR=source set BUILDDIR=build -set PYTHONPATH=..;..\lib\py4j-0.10.9.3-src.zip +set PYTHONPATH=..;..\lib\py4j-0.10.9.4-src.zip if "%1" == "" goto help diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst index 15a12403128d9..3503be03339fe 100644 --- a/python/docs/source/getting_started/install.rst +++ b/python/docs/source/getting_started/install.rst @@ -157,7 +157,7 @@ Package Minimum supported version Note `pandas` 1.0.5 Optional for Spark SQL `NumPy` 1.7 Required for MLlib DataFrame-based API `pyarrow` 1.0.0 Optional for Spark SQL -`Py4J` 0.10.9.3 Required +`Py4J` 0.10.9.4 Required `pandas` 1.0.5 Required for pandas API on Spark `pyarrow` 1.0.0 Required for pandas API on Spark `Numpy` 1.14 Required for pandas API on Spark diff --git a/python/lib/py4j-0.10.9.3-src.zip b/python/lib/py4j-0.10.9.3-src.zip deleted file mode 100644 index 428f3acd62b3c024c76cd331c7ddb0fbad923720..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 42021 zcmafZV{|UevgRAxwr$(CZQHiJV`s;T7R0_2Oh6lXrN!kfSV04O8?0O8-}E>;%yMz${Wj$Z%sq9Za|eZ~L(q8bgk z_)T`iKc5EVz6Y4EFl<+8oMw>{o-I8Vvpgsl!bcv)@b8YfvX~TMhkZ42?)>PUdC;hU*g{ z2RUXrTR8Uem%|5NPTF|wE(4V0bf3giQK6=za4y&%prO?$(iu8`+6MMSkprEaC;H&}{JdVdqT zH&$#YEl*Ad?~c6#3ZJYJLR znyxvtp*AXb?CE??m8zb;Sq@@bfd(D6Cb)$s2Bh+SWzk@AA%=r9v?K`5HBuK?r~j%7 zXIlUc`Z!yTVi4xSxoHHR0Cbp8I_?;UG> z35gA7kDHrgZTk9JsPnMB>}r?&Tk~Vx!#iNpr7^*SSnype5as!|BJRLT0`NRoE__0@QHDBoWiyFInY z2tbOh*T;|B;dp?J{%mkb`p8b^L%b)QnRo$qA_Qun@%G~ylVC(d0*9Rk6XxT~=ZG_J zGKPpuoUkM6-~0{N6AR`4zE+$6%CfR#z-5x{S6S7^Pc@0UxJ~RqYVCQk8T!a zeZ~~%aI(-=VCn#2tP=&H=IiY+S{njGiDw|w72>$mTxLD4Upk;AKYsr{TVurX7B5SNT4L ze;{i6pm<3ro6Ff`-X@gj|I#|zEf1l1j1vztWKa)i%02HjT_J}BRkRgn5U8WEw0@2( zjP`IJ!th?FNm(JR4U{SA0puycw;lM5)Y5 zw(saJEJydo6xK$92f z8m+72p0xRq+bcp(=gHofO>>MqekOv``(~rD92P=D(L5T;gjF_?`$rlM1s4A;D7lhv z6cYs8oKLwRWlB<`CdLCqZWFQJ(BeKCr_V!gN@uRPA+0L$hRM_0X>{GeK*r*^J?gdV zheQ|Gb1aHg9K9hgrt_RWW*Mi(w~o`G$Ybulkbc0KtOMe=@j;y%Fc5ewW$$nN5W|h`l+@W+c#p|A@K9sNVAF*~h|pMrF>qZZ@B4B&fvB=9 zM1gj>T@rxp570Bl_Sx+Z3D+9J6Xe-WN#>Yibdq6<^%d#k zAGj2zQQmp9AzIYDTYN3Gs@{>Z%b)h(HXCML5pOZXU1_3x3R#0%p|B*mTZWuHf2Ac( zl&ccRvObBBIm3y-@l&iK;aj9^5Pc^Q_Pa;934R9{T0rdOl3Y%?XKAY!^o-3PmyO@@ zopbR)_j5G!W&8F$$jZEY%wKbkjX%FZQh1w*7lva|XU@feIFo;2t>8E_x`Z~u@Sb_; zsP(y*`Kd>?ZXGc@`YK)3P@TD^e^y7Jg!tzSWieY9-oydy6ypB8qwbjw8BfS2}pBtvD8@}zrq2jol&C`3O4ZK77HN? ze`F264jUAEA;14qG4`);QTZqeT?7FD77zgdq<@Etxz#_ln3cDg^M4aD*BaX~o8pK8 zs|HNE1lAZj#bk5DoM(EILWEXCfA~`mbYw_uT&naMx}pz5|K8?&C6KK@YExWPjG2JT}?`W`$?zX6=R_CtxCK5QEK4RSkr?5{3Ct(wmj0rpsAwsMSv0s-5Z$nddoI_%i zQl+D}F-5h>?1xbb8tkub60AYWNFP=)Xz>eCozpx9mMnfge+uo#Q9*#7xh&n@rMn^pMyLf#k zX7eVD=NlNrAN`z3#(iG?LYOsqx-3hZc;YKVhLy|cGnKVr4dhmGh`#tpigywS;jAu|6;L(xm-h$tuNpZ$F99mZ2SCZ#7VYZkD(+v32 zBwrDLX7r6Pb_mr3ap*O4E}*jQ!fwSqAZwVI8_wr=Y?bx%|DXdLctw*nOrsSwo1J5p76b+Clf&q5gFonhoaNh<$!v!7T>^Bs*wv@LcXeg)G?JQ?ER<9V-5a*x}Gg$DYqq}-@*oJ=OhDObF; zfGn}@jTeR_BhxtJ+-O8KY4z8%ZPr7pKRYya(^gCvdsMJeBJDG2tLd*`APe6;5p^4B z>3evhYaXqZe!@*s7%HYq;37x?7mKH3P+JUTNuZd^qS?;KWz^}h|8)|Cb^@bJjODPi zA&+zzrlpcIc{U}TR_;OutroUDN>+fV66)qvP|eYXbR4O9SjQNpo1w9Cf~{QinF9F` z7TFYuw0;$z<8LQ#8|*Y-95N?$ZcZv7t%SmNKyDOxfVLbsPfiQJsr9B%o%8z8kE3Jg{VdE+$PF*2~{ zxpGHtL#9h!8A-(I*JFMQA;Ch^^kcI1GNp_fO+*PYYGlcIv&7-gBrxCsBo-wUfnLR? zXyhTmN45ID?_gsnh)>lXG~u^Ls-BU8kk&UPo+%++gE0vD8Du<5fq*k^(C>p^n+OSL zjh9sF`^N>Qth49)TJa$VEP!W0td=@lXaaYL1o;nLxJxTD@bLL$gLlz41X<`@y-_E9 zgdN!6-~G4!mb?_FN9*e`0X=WW@3RcwR1)GQTo0lMU&I!%BN|2bmfz^$JVmJP=BoJI z#+FyTxKk1izAz_Jc$SPKt0vqx0FrN8&i3b!{i=S{-S2yd?!QrQR#Cr!SNGly-=FLp zd4T+b$A@hKI1$9tA$8od_f2`jF^a}H+jHdYzs~>Gz@fO~8jk7-Q}wt%w4?cg6oEpBShy*l4s`a zrn4~1J$tkNP9P*5EewzH)SIyi9WJZ={R15BcnCkyXQT=Yu`DDT3 z%blNn;<3o&$ojiq`R;gkPBeg+#Zl0#odJf8C}9!|GwdKVgLp1b7ZP2Q44tZ=>IVnH z&{L%cDe>8mv!es6@3h7msO6m`kGZeo1d_%0+uJy6yu;EMe_9U!=Ejm9rdh42^*v44wD@8sx3xPA`GIm*tdn zGwu7+l=}j{XwYwI zk9?CSEw`B&s&Ipt<4s0jcyk8I5O}oKUs7Un?!^&fE+Zo3G~MBrMt&a+WXB3QJBc?uRFaX8blZ zzWw*b9{CQe8-x`=J-S`X#qN!5;Kz=f0Gunuuu+WiVq_&g@_aWPS7P6$nRvmOI;<;)g+5n zBgOY3Q6%qIQ%VQ%AcxB!^i}SN&xp`F2SYeE<89{=E=srWEVpy(4C`j8vDdDbjZlv?x_IQ>PfU^+h=d{uY5-2?ZGWo%X=Y1{N#S$9QWa0;Iw zSZ1U33kF2eYv;+%pHnp$`{vB8^m+>mX1M8ve8clfl~~!Y0tB}t+$pWGLZ9$NSZN4l zik#zl@?hxQYnPRRlbl99SrKqHHXlMrk^H-v0@2a9!3O{v_eTQo#0{O6h0~M|&|g33 z687{~Y>PtI2w<+{P%zUhp@tj{&!g^d_%l+`hNaVKAf-{`DnY}9d@raL1p-B@Az;Ot z-~jPUXsm#s0Im>i{$Y3#eQ@FDRMc+W>YkWkV+0xTH`hM`Pqslq{YSv$w;3oZ_srMr zzUVP9EBUpM(lxAd!nKuRbk=`b)ltD8|Dax~rJ=k;k#}KUIiy~vGgXN)sF8N0N7D)> zkSBg*=n`!73G zHCs{PjHd3zmFw(RPHvuodJ^P6+-gIM<+OB<2-KP~wLV7c;il>}RXGM9*IstUdR^BW zqOem+J4N_$jC)?P9*+ABm6AeRDW~~iEW`;4BT-`fmN%>nyMgbRz9u?nH6JriUw$-spmn>F6(;@S0Qc6URbyHzVVb&8sEr-$rGOO z)J|7I-A?x&VTGt%@)8-O{QBNG4PTnIRLbdSj?8waP4b|}N-bKe>`!72qj=0XAOApA z?z{YZiBUeb-ZI90_k73P$#$V!V= za^Nnq&%HS-!?k1*0T$;L%hL?q|B2RsCn5#DMkE^o*KFaT*UY~>CH_GWQc-YfK^hmI zey~c@ZM^PC6?3*M;ad9OF`iOJ$4O6!naBlDA;?DgTyJDXxcC5wOhY@yXF@~0YNX{q zwW?E}W0dI+NgW|B`6YOOH6oT6lMiB);F_B9O1t+7zlo7iVv9AZCO|sip^houP>9Vz zxHP;D42A=J;8~xPINXDhLFn70Psy`gPTL{t@VXj+Z_ijWK>*C{Hk8hyUIz%yxr$P?5o z6rh%WpnF7Fr>^r8;WdmrMV8Txr#GPlU#SvGKt4OTNbHX5txG+V{%tZRo6DuHMNN$xG3kAxq`=Od-))DyO$@KZ8D(WFZ}s=K1l)Pm8Y zR#Qq}+HjO@G@?_cL=N#d`GiDFkoF4$i;L|P-&xamQ;~a{CJAf(4hD_}W>6Z_m81@* z5a!Zp!e!@F!JrAn^~Tn~-4}}vz0pn9vx5#+X$D0tL|W`+j#m_`6!oI!}j(FSXp> zoLzKq0u)=F3ZEt9dAqxY$BXR3R>YfQe4UC&egVT#wO4A4loN10|LT3G0GHziPm z2nkmJ!uC#BxXN~(y5al!Q|mt@tHzYeSI->PXAGT$6>s)-NZuwe&j)Ob-JT<=+ttgo zbk z?9I*=8)k#?&aBlSIJsP2qWTJ++^xPeY5epEG8ZwOS9jW&#Xl^Ip?#S04Yf(ye0~bw zrFFBc+SYnkbx%VToA+rJ=^i|&87F3PGT3cSq-Eo9QC$`tx4P9U;)+OQjpM}6GX z>X@mx>0U4j@b|2c(GFv~|s>F<^B;q!oqkD{4{A3gCqfLwYdP(HDcniD(nD zGTW(D3$jfb2ijD*X?Vm2w<=EK;RY>q=MvjY<~Zc+pxo`tHg=j`TCBy;Rj6muYDg(Ox}ZDlvUr3R4n6Jg)F%O{84o+1m+;ypQ!q10Cyr zzX@(xGgR6kFQ`gY(6pv@?eRDm^%7n{9~(~W_>@(B!Wi&{l<)w8yRFU?72 zx8iAa<>r45CPkn0fS(3ACLF4;I5va!e6V(kH0znf9Y(6eQ$MakfG_tf*NJ8ZW(sH$ z51X1{HHoT%J{*A}dEqTzibo)US~dyI(P?&rFHUiUU2WVM=ww8pe4~BhaiUBs#P6}L zE^0dZtBvpFkKX539rW%nrn9bv@Gl_qp!W4>=8J!|fIZv+nGa5P3WvsFl4eAIT~Nh- zmz1>Ly|1G4n;(m$1^k^sQ%Q>UGOy6kxO$~+9lgglXd6FyWmv;LfZ;rSqLoth3vs{@ zk#_y4GbId|BEd0S3uCW!Qk|Qyfr=09J*H~&dP!h62K4qefa*Xukd3J0raPXYuMXH) z1H(wU^>>}_sev%fq_@;pf_Akk_Vxk448&uOnO=pgEB)$yuyS6bD+C?!wA=|&p|{{n z=OF2^rLICf%NQpgE|su-&r*+84`YW^U$z5jAqpY_0BgSO;y085 zg?<%=i?8MwH4YG0XJ#FtQdfU_1sTPB&&IHI@+XhMCjN z#lW5P6vJ}UA3WH1<8d3>vWw35Zy{3D3v}S8LG&-LguB0`uy6BIZgl)DO>*R63+?nO zp6Y**3zoIJyZ?B*NAV^HQvbHpM^4wmSo3878nEUu8xJJYvwdB~0mfjuaMjO(Xk^?@ zH>o!t@Zk!wW7s=HP|Byb(5*W^t+J)BP*k|qba4K-n8y`PI@v%5Fekv!m(%W!zSUvt zHmc^6DXo1p_`h2;q=q1Ca{n}d9R892V3iCEtn95^4GjJpR!K!2vYho^8B$VqMqXCo zSej;HRxL7KfrgHXPC7PTV{D3UjDcE7QkHsVmWoa)HsX$G;>obltU_!!^W95V_x@gy zT(OK!Rue%g1?E4IT?vWGDjal#{VkdRK%FlT0QKKwm$i|*k%5JgtC@$9*T0_@z-{+m zHK?w1FWooAlfGxm&I(?$1EIhs<5;=|U7keJEG44si(+!BM;q_5_KyUjNxerG!8aiG-Y3k+hWgT^=W1E1JRL3Rt;lqG#DbE(@S&x zqwO}Mx>s@aitce$x~vY{A>y2A93+dz+(*c1-Kg80?8wfeH#fW9e-d>Esw)#cp-fMyRbpB=Q&>_bo5d@hEgzIv_1Gt+#S$ek%z$8N zA5n{|3u(o)%%2sl==LQC(PFYw}o;0S( zsqhg*<7m+s5{w}Tfz*c({k)cFR|ZKqWSua}{8Uu3lcYOW>St=ycAy!nv4T~HC$I&) zP;Pk8;m@uz^x$ZyDZ!nST$H~)tFl!Ub{zRfR<)B< zpa#=UmGEHJp|;nk(9#Z(78`D)N?(JhE(nGYQDVq9NTFQnDo}Kzl|ehU2IJ1S+_A!w zGb3q|WGcGT3(!8%;UPp>BGwK>mefVjrB^k1W>P1c8R&jk&6l?zw5rlhe_HlI;9a%s z|L{XShfrF&e4dlFaq66EbP&SLtOT|LEBNNeL~@DBf^L!g4RH16Cgo5h`@s9A34%?S z3{qr0%{8tQ^O$2vF)AFrL&|+l?EfZAkuuJDM{S2u>MV1317ao03gXckcMF)i{L}6K zZ0(DrhyV47>HTNzYtz6d;!xvrbyQ>idw+lU?ExA8HIMIS)e+>>NW?4l%7@f}KTf5E)SYG=UI@$^MW)pnmyc zkKcQO=2+YXLG7f*14tmx=vnxlZ*5lKsA*=IHCCgUvbvMwqGQ&rp?sjQ2(7WK5q6~U zsg$Jyu#Sd0APqExfGyH9Q)^<2EGv50umtv~8Wb>_FFgwBr|(<2l&w(kfI(r*#!NV|FfkM)*zPvNtT`4;|`f zvbVFDl2C?IvgSRuNye!GJGMt~SM8vTLD_=dI?GoG8Cpsly|+sV(3RJDoE7*t$Du)x z5h&2}V=P%}!;Qb}H5fhd&aUyeV;`{*+v2DkK_NLBMKu+5Vl+*>f9Vp+XAIV#(9MT- z5Z6?BMJFg2sR9kmjg{z+xC{lSOVq{7*Kv*n?F6=vapBE@;WU|# zH{wu?0`PYUy0Hgr)u2`(LBPz+t27`%L}G!FMulyk5*i_+49YU#?euR@n(($T*28rS zvvEa}O(D0V8X~V_a&DBlaJ)u?4p1zy#&$4?zno^HvRn)LxA!BC~ z7OUIJvM3c>Fs}*E1cD(#EoKL{zw6m_idWh9T(p9o95uj0WfFzew*qIqaLbGF<45xB z;9GeAcw^?^{* z1=Jki3zgBtc!yKIoo*|fIpE6dIUvso-WBC{q-vvjT!qW7Jw7%md<8UzBLEH2nl_oy zhtSdQvy8xrSsZ<{WjxQ$3PAC7-OXpW6i?YY+oBHC1(NcMszZp!srT8FKS|JCCicH=&n1qQhGi?l(#`-C+j*1!}yRF0QIAT&ANNV1TfHOGB7zutc?>5)S5@^uTN} zP_oGh4>V7Cg*1xsaRJ?&=wBZn|2{Wz?F?W>H|R*8pN9%Bm_L4=l*cKPwC`gL>iG&d z@_SpB;sV`C+7OQeN8kj%Fi&~>f^jFSPpqaSbx@!Xg*0BE4*KT1dl8wbHtdyeBr}X2 zgzqolq;nn?@Z+X)*0w3D{}CfFMN4hW^l3k?ltyYnT@tKguvH6Oq#ib4q5;F3Z*B^{ z>bzcP;5!KNxYaT<`b9M)a)wmT;2DOP`y9LPJ&tzbMFeyqx^2(XyB0qJM;7RvPGo&td$*!*t=ym43iL#3IHvL7Y zL;@9W|M>I z`Pxxy*^&=(4fGn0AN%5CYBjgVK}tOY(hrR=GKapWp5Ho~MzG=&1a2_q)6iNGUac>~ zOYR0@kkfA(qwrmU{Cn?J1mz^xUR$jyYBh+6f>BUV%(Od9U+G$`YG^3euEs88YC~cA%a8=NUUQgp@fvU_klM<`e1si3^tfgjH1@wX| zOAZ2`7$s8(0w)~_zHJi=66NDN*_vKaVRHmEDj!V})INq|1u7kHQqKTItIn=DDvR)P zAVMbXk;wWks2T&zL^Mf^DxuOqpT;N{o3J_{&ifq7<~rJ`uIpik_VKxf@?pf-89fJ} zFb%8NE6N2V4=bcK9*0&Nb)<0+#oLLfJi|!R>gs6pBdUQ(YRVLWkf4WHc;DpQLN=Uj z%8$lS+Mq-=IxI<`WfXQ1Q7#Z7(d!i9QBs_DUXQpBkYKz*r(9T_P*%GRi(#gcb1{t- zX$%|X&j%T4l1-&Y&^%ERN1bU9y*Pt3h;( zN*}y!K*T+HzswPGdG&J6t+& zO*;c`gcV}{Z07u%t|oz)fWKQ29`^mrYTrPSe64qcu#SPdntbe%MB4pqK%9Sq2(D5c^VTu1Aep&_wteoGhg2h=h5WO|oYmiIqMo^xh_M8M51YU0?nLuz-p1FQ;`i-)Vgg2|m z{dRu@9LY-?C?%(Y{7h$b$pfD=r<4BLa244?nqIx_n&=IDG~$&1=nwAvOQJ|RX4}nq z$^cEKIsO8gRKePz3MT%?;x*`*ws^=*Vqi@z%m)PM&zOeM#L|gGzpww!m+50__(C6} z+m)fUoqUcENVkXzye@4|3vs2|xH7Zop9BMnPTjP>IcP9rj-#!Qi=ekuvHhB;{H+L@ zp`N2|Cfa%Lfi*unkkb-X_IXG9cDW7~5LwrPh)ya68#gId6(28q!yW+$347)|Yt_41 zB~~gNXvZSpHH*2T+q4!zafe80IB&A-=Ag$6sZzt6B@5*VRlVkP}PnprEN$m}<{f@a4oMe5eT&K5-KzD7#CnRt;q|Z-blDor|L!g%~fyh0GxGkx@XN)|RKowl1^5{Hye-}2wY2d&x`_zx7*mHLAh2if-qWZXRef}H7!Ua^f_cy6 zRQ1;H^nqi8rwzi}d1pj_ofE$@46g?|?_F9&m>rwh5c^AhwlMjAeDXfb2qoFl9kfn= zJkII%INHHBzS5~Nks4hd3vnH2ejxem4*wj1JK6nZ6^>gQZeye()}hF>7|-(J zBM}1qoS%FXsmh{PpKL!QoF8>avCRKYNCg7M3X4m7JhXhv=GM0d@Qn|LeIGplN)_)| zWZbdz8xSgZPHT{HK|BRNQCi-eLz>nhkB<%zX%r@Rjo*)dgy`$sjX0T}?%;>ax})97 z%*6_?Oy_$QN^fhXdymg{_e}3#-x3{-p4OPDV85)VjkcZpw7a=@2$`L{*VdTXQ-^*!&B76H= z1=B5AeW4Xi0JNHn^>dZVa2*k}ccb}JT==R5W zyezbLe3uEUw~As*F1}3S4+5~2n2@G>AtMpCC%NHxtLlFG^8ru^b9B;5Vy2<0EXcOy zCTEJ$ALInIB^-qi2&qzedI_Hf-CB_1TUnYouvIP&aE7k zH~F2GG;Q3Z6RUkt^GVS|^I~=F0nbsS+@DB>*o>oon9izY+hJ2phhuttI!#=g zzEXPGERn)>g}m#^!9~6Yc;3Rc7A;xqgaYc0q7*E<`8&=*F0HVX+RxtD7o5E2ymGgj zTAdQ98usBP-1~u1PvT(ed04m=N-0lv>Sdlykmp2P`ZX_N_vCSCba9$D zRFkp#VUC}QsVP>qUSlZO2rS3)hRol~d6BflR?NaZ`~J?8YuaABzI4YZOvwrXQRuyq zGO(OIIPDmt8!B*Li3#t|X|w(`wrx>SlH!$Kb208GiGzj2zL;NG@t+!{B73u1`S2)4 z62AM#ZL1pd!~2^tvpU?JuTc-SQJ+1jL-i$8$3QBGgV5mC-* zwLp24L-0Q?32r9Jl^A)~Pn^-}h><}wd3RSat`ok6YaKN0NtsbjN@FKgS(XO9#aVKy zLRg1s9KaZk99t3e z)M*=v0XMc9%={X=(^;H>7HEVN5mD1x+AWlBo$pk8%`g8)?mf}tvAGDW+M?FG;#hZT z>HQ?_#%u}ZbgrV`@c`?*((E<#b6Ku{;Y-unuH{Z+dkc9R&cnkaFLMsQZB|>0&FUTL zwuTJ!pcf{$4L8Ey#BTA>(Jq_r-V&Ec-yg)!v5on z-nA*0=FGt9JiDRPE@q`|Pf7wuQx@0taGgg>i3lFkwiaf+-%k~8nDaGtk-|#P+<7`~ z{^Bo@fY|m>!5IScxd3Lq0}!(7RNG1Crf)-O9}6toQ%SeGeI>RR=LE8GGz}A}Y;tI;1V6W^E$s0I__@ zG%#RBXvE=rZsT2}UB^7r$1P@s9q4@0U2mLuyF}3)28!8do_eG`@JMMvRKjV4_e@?2 z0UKOkPUbWkk;Ew*XSIRL$U%Sst>X?x|5i7>3w7j&l(mG+0P*tcxK+GAS9oBm0yhwk z5kjbQk#Gsc&N{aF71Y`aP^J%?u44!TL> zseOs{7zoyOMuKmA#msbQ1tqm_@$RFQ-P`-bL4On{JAZ6%uWYN-*mAxHYe+ATE6vIx zUisdz!AmhVSo6!ZuQUL8V+iur?VgZNi4yB4aKkX^7f_1)^2#n49D*rt;als-ig`UP zEKo}ex!gN_4ziV%ihxAIV`1ukU^_QdJ_eYG=K?enRIz4>A(r+vui$9&dZv@f4Mt&{} ziR3s)MU$~D9kY;ExGF!3i^+@o=l$<7i--IFu-)rB=@2#BeyZ7dynIxKB+)E$28uq$ zP|Z7sZ>K*~@uvSqMK^v)hyU|knK*sEyRCO2k380Dgri#sN`nrqv z$08n_g)z&v!c36Ejv31AB3{mUHu&Jcf*=gL?Ohi z)*HPa{Hmy1I@yZ*U7GY%NIZpqJP*WNE?6D+vTVgfnLvdc=|?0Bxr~DvcQJ2aL@j%B zryZme!u!{Q@BCErk6`%uNVmf-C$n@~;WRnhK2HAm8b)?u9V!OsWMuaxrc3;B9DFNn z%h&8E56q_NA=?*2eE&UfR)qh%O*SFj^yD4S=~R{N7nfU0cb6HsoQ(!<>f0zg8U)rf*cX@S;eExWu2;Se6GWsKebGZOA+L z>^IFR_hQ5n@6{i-Pn-oy6VoQFKN@cC?#M`)k&o`g&oKD{$7h3($d8XSvFiq)X|ayV z;p67z0h>SW-*;FN!RNIi`ni-if#^%C)B)#?x!V@0FB!d;cEYRyy)hnln`N|QxC8T} zS$_n}zzv{lxI%Pl7|a=Ko>&vnA@6#In7J!c#03ZCgCF9NK`HCU3!&ovSu zSnf{+v1^p;!$qLuWpC{hMTeM{DG*8y{(Xdsu%Z0i^fnsVLx3tYEKC0rWXE28WBuD1 z1ZVxm*yArSE41O}eFQ|-2&r%+=@zA7rJIHbXU-hqUnfW)!@)e-Qc0ajBh>|VI+Y~g zJTk0HU1_B#+Ird%Q zD53Pg97t;L!6^~hz}WQELXsf`BThC^i$^~=@qk&FNhYJa_%ft9lRJ#pq640&KRAQ5 z``$G}LUlx{Q6q)$nYqg}@s*$XEd>4V~iEk6{SB*Wu7;}x7RI7bTlO1ubMGSAlT|)Gc{Niud7r2OD z!d#8yh^==zx~Pph4+NBKk$Y}x`O&z+9)6WkrJ2HDf)t)xEWs~ROAv7a zM%c^jdbQU}!fz06wV^h_@t7-CWMgj1hBEI%agZ3%#MWN2X9SU1N_F1pOEY|pbxtIDB-$?d zzY`tQpUKX})J=E79+9)5e)Y2w!wGsq4pyU6f&ba4=hYVn-CkA^Phv+ITq1TdxM@e? zN5;=U$}VUi4Ha%kv6b1s4FEu%3w9E^{$$LY)~&)gciSW0pU-SCRg7cj+y~zG z+@(vJa^Re=4A!7Ok=rk@o1YlAR^k^;Lbz4|-1eIE`_u-%Z=x@;`IN@cw<0suxI&6v zXKggdv9(lWkpLG9wyYQ23supptin0YI>&~W3^z+VAKPKUHSMXp`z|<)H|i(<18P8( zzmJAO1i~K-?UKKETrfnEfsbSdLx8&mj-KHy8(yx3gZ4UMh$MUXD6%FvF?`S%#&`)r zqM_C(NPw&jt@;`OBa8smp7W_u$V~8Y`T%K$t~1T2ao&S!WgMA|Hc8I&N)UW`*A#G$ z*S%6VoZU4m^P)4d%ef(yM`qx4V?CIpZH|TFmc8a&FQ^<*o|5$nUKH{(0EKFk@{m+> zRK$KqU^$3v8!G9YMY&8j<)jMG=Ws}{nvSx@iXy)!g?*J6fc(V$7RO-S@9ULry9lsLudqGUB>9hTwbn} z1qD8iPK*EIYQ6wG9i6_$dO%rCik(RE`ts|#! z-Ssxb+%JJ_?(PT3rtY_(t<33$mNQ?40;6&W-1SacG(Cr`5Td5*h<4$GbIBTh1BTe| z`q$x!!xU(NH-TUv2oVENMr6Ry*XONx&BIADw@LkwuD3voiH1l6Y8rEPmj64m;?R+OupV zWwTENh?Kqt*uS|2!gi8PaaK_T6CSy_^(*5b@cPngOk<8)1GXf!M$hNVa$KoQESSw< z9^WWe7V?gX850x4%3)!o>M=F$oM+H+`~4VtWCMvA`=c@R_OUSE+MhE;^uwo%4^t1_ zhvePi)%NH#IIeBx-9qKRn1~%zn@+@K&C+l+2ZKRa8Jx@5_s(YIEx;230R5KEDKF7v z&=hvi!j=}h_esj;jzHK*kcG`FytN>crGR95Nq_gAve47@&CMMqA@`-9$nF%Sj+Rp8 z7y%vFIE4(9xj(y4vK2VVPJ64vA#fMVO`Q)B`O})lfPgT-kNM>yPIv8@LY#Zcm+Nvl z*{}fpLY&jw9Ski(E{Ry6nB;F=Xx45dFSO2D90SZ>^+J|nalJNFclY6wbPe6Hww-pN zR&UG2lzjHSpS~z66D~e_K?cLC`b{;&l#1GsD;Qjx%W5JYlwzzPDZf}8-Q(6B8r1-=hdY#4J zn@1>i3h?USvE<{-|MD>=)J=6{-H)-$us)^gXEg_;?#U6xf6Q^?amhg&gPtfJ^Xzpa zPYx!0_dE$>1$=4*8J&E14Rme(Qyc4^=qam-r_5Npx)At_~S6GoGF29M5p{RYlw zV0A>j%Cd^!swr%8>j0xiEPNGdyLQlgNl*q!mj*9C)s63sMG1LGg@|LA)|VA=V?pX2 zG?fT!+^TjqQQ0;}_PKPL@+WRZaQ(NsmxE}%d-0*suwUL60W0958PrJ%Nl8mwRtF0t z!KP$Sy7Z$Xmxp7G0paqF#f*!3R+?XQ;?dyn1!oTPSFsSQ;5|_&6M4|r zpmRtGxmuGXiiyTRhYW5$oKKPfh`6xt-=;-T3Q!+eIWwU$<`I(=Ga^8Zk)8*_Zu#@E zq1vurJiC|X*}_OKPGpHsuX)Q-mKHdDp=XrG**pIzG@wloZw1A8*DLu-Lr`mCX0%&g zC$|-+8AJ0FSz}PAAS1Q@Gp-NoNJB;hGm`&d+WFYrel$4JTN~36#YPCOpJRdoz zv5;k7I)2^gpKUf@hM5WXm(S)2w2oLIFNEjlo9gajZtlUMB#jJ>Yk(*g6@rHa*bAXh zB2GhICw7U2O$(YbOq&tKz{kKRv&}sGOW=(BQ2|gQ#3mAoieRId7GEcdlG#p?PxdW> zbk3}Eg$oqbEQ&L;s%CvDvIPaa(u z@nce6Jp%OMM~n4z`smY7AH!1_guo z2Qrmy5vRDZjF~K|s5c56=#cpSm7h43*Ei zG86!aInDq=LH-#pCCbr+2{_oigNUVcO09{usNkUO~OXY9m8Hxt>L-QIgLjn5hB7gF;oiRagd7^q5{yt%(yp@bHcP@ z(;UUV+OL5v8-k~h8Q@jb1t%g?p|^z+Y&+eEe7rce952wWM}bef$v3hPifg3}Hc(8b zKqQoACweifYA*SDhAt>B1i%_5P#FHUUapLEQyi6$1hN{m#doIEEJ;()AK0d2-K10Y z9I#Tpnb#oupxVH7g^Si^$@v4?3ChbMNDXkXi3>BVHnfLqj_$hen(e)O{L1{#?~f0e zZHNBpa{CIj7<^RDPY?EvJabcGoSx@jLU|D3&u(aAQ-9jIG}qlma#M-MJDRi4kDOawKu=5CFd4`*%?y<(jX*k$UfVFup7ngvLE8B?5fr97rX^Nr|>i! z?PVjp6|G)EeQrWyNl+sJ`*`69D&b+FjLmyBvVc4zxE-V9)lchq`3!U0-Ry-+>CP<3 zglY&fD_$B7uG?8&gdl4WCfgkKKnkC&RFJ|&bqOLhrvwpt!C66= z2!m5NPfWKJ!i1$p5MgOhlWhfp7V->&d+dH)OI-@|;2kY>&OE0ZdlyTcGSBIzS}k?= zwYvufcNu}(2{F~URlS)EwD(iQ7Hm8ZcFQgQOK7AS>J0g0o}PSnJQQCMO{qhe)E4Br z)X8fzRx_^Vr~l2_Y5F8luMb`a)H_#6kvY&!{W=|{zPn3xF380xk#VG%w; zn9Zn`tV4w71%6BBF}s79mZpf5wq!DGC}c^!dtLkHZNC4atIb~NE%;?#BsOtq3qR}K z+RYjn?loqk6HPTZFH4HI**RQ-yE=?9lV3idx#lZLbz2mo2Nh&~H;2tIf0RvD8dxfi+%Vrl7=?tGOuN{C|AlRgZ2Lhb z3oPOsM0Z_>X@!@Kb@KlZc1d|iuFYL@GH4=&wi`M9Db&Tzr&4*FJmAs10qlen#i=%2 z0l%V6$vZ*(Lr}WYe*KrH+N0ewW#YiCkMOPMFC~j@-B>8%Lt?NWJ->Tey?Iov#t1IU zxu;Ysn3%}rYcnJZ36aFIvO^w}ic}ZgBw9kX{JG{6nq&?_pTl1yT}fcG^y1M}SAk0} zfKT!T2yiSxdc>~@ol*V_(T(w6(FtmKg}2Up8Tnu!$DN3@XQ8Y_RuSCK{DzQ)BebOV4-+T$YC zL)3V>YO-2*=DJd<@8+&mffws~I?*TZli=vdO;U73A)6!Jw>DE(rUyU^{*aR+@qDy! z6g~oLJacvC;UDk2;F{R);7=lbfqJqt_;FqbzAZ1K%u()i0afkkdnJQ-6)(8%OU+c< z4uH4JthN?9051u-EtdJ#RuA?udGvhTCLsaoO*z@wkM_C+d*FCrlnj2E5kvVX5~sDB^AEyJYt2CrtBgslL1(LLVz7@5ARKurafKiw_2c z3!@~wG*Ygg@yJkR$p~!Qh%!zkq?aW5>j#WR-!T|!h;W=ObpE!jXDK*L+Z>?CbyV+eD#h3zb;I=_JsE{qh;|tTML9-h zMA?qQkO6}Q0>F_hS+vz9JNC#32+Fn9g=8C1A>lG&tC4caFqShwfN~Dp0hr{sM~s8> zrIGiQl2`)Gzdg$PibzIR58sm{3g?d^el=BqN^DzCO@?mI7J|(1UXskv17zGJgyaAl zhqA4^#m~P4Utw`f07yhYM@w-DNPRk7%opovnRR5y3N7$?UEL+MJ;7S21V{lP*;tcv zASLLJ+Y>K%IoV3Ipjot3uOA^JS@#MVOsv0FZWiP5Zfa0{b0cY?8{JCfF>C=0R2cC8Ie08#&GE}&8a=|7y^H3&1p90 zj&U6Opu1kI-?)rZ*I_uN-)cMrLQ=%R)9Eak85%NI`% zPg}(c`MA68xUHAGG$J2gDhWA^%@J+x5N#=*_#LOOM&br__<$!O zhGJT@UQA4TT|J0_c{=J<7nUq!A>L&pUCE<^^RG^}+r{m7aGTwG-<`Ynu6>vt6V^Sq zIbLB9MTsm@D%FI}c>Cwm--i3pc0KlYrSY!kSU_Nb8B!<-R;P z-*i^DJEzJH^?s_5`yE|tE_Pv0$0z0%9({gbuPrpbI$DtepC=Y+_ky*usYmjPoOa*G}#nR zs`_bdTBMqr7nBA#P}A@xX9jZJiu5>u68j)%d-a-CJ3!=MH9td{$FH`m7KwTviTzu1 z6WmxwfixM(*IH?{T&EOY`|wkw_u8W@VEE;zSVmef{6Z_i8o*Z^vl+i(K*R@bWMp+8 zc+FbpEVRF41Snt`Gn(-f`Q`kbJ*` zrSBSePB0%^d{#UmFFJDReEiBW!e=k`_an4c-trH67=vTcY31{RK!&Jahyrua7?>bj z@?L)Z?U5h+1_S8z{v?{+4|6(Mw7LJGd|z?RKNr8x;kDA<%O|QR>0E~A-#A zBB!I+G`N(n*w(}XK3fwEU$%Wm5;6Gs$-T$~cN<#e{#?$J!{66kvh@U|+WV)!_Eo%(*ys6*9)?b&)FwP3erlkfkBfgE_WX8GVR19nd9 zfsBkgDpS)5H}tKowKTgizmcj|3AJ9;)4?fzmuN_RE4fo#Dm)ov-N^GMu3wC;MWc5E zR~j&C?ejZntgiB%PFZHtLD*jT5LlXCUe&< zsv`j?4reT<2{OO{CJ_FOezrB#-@0nhMxZdqdcb={(&G~rDWIB7m9nd+%H`yY@faz^ z3?7BCJxQeTBuFwle*v{qDZXiF6_HtLrD_KHAoYRX-p$uo^BtxGXb7~dPbjJro|tke zo1up6Yc9Vz_#VjO-{jcOm+HKK*6~!#yb?JjGk@)=nSq*;l9giJxfi8y){+k|Oqhlz zr!Tf0{-HI-@1!a=*XzyAu%3iSV;@FuY>pZ=-=?VK5>~^;c~lG$86G};kSR+)Y*aQf z&3>7t&jWKPU)@g>aZ7vwG1zgzc~f~hfQh*8CF`8;GMJIOxd8eD>MzWQo;1G|IOZ)2g&25 zi6&34U8be^X&$&GMq5UJ!hYIp+9Kn#_CV_kzWATVpc+3t-+8r3O4}@HZ>f9ZW$CF% zW8zYhuMB-P0#ugytw3&is8<2hqnAV4%cSNW6;vAYlOxXWE_F^|5ba)a5VE-!d}hVFPu(u(xz1^dFmx%?O&~@ zhKoal5MdiLbdeW+SFK-=;S1m52B2Tf@(e-ZBum|H(WNu?@>Or+(|updn=1utczE#L zc0NK`Dj&(}ZBu*s_*E1P@h6d)+f8(c)S~*zUN^&tgXp)VSKJ@dSvr4g6Iy1NuG#j! z{P<+&l{ramoMy&`nK@BUURkT=@4}pF)yF-KYKB8gvUBgo&G~j9b&b8|TCJKNmf^~p zE39xOWBgMxT*E$L-V8Z|4bvO%Z%NolS01ziMm}))tZlt$^xLv?aQK6P<-lqf8&nDf zSmDoE;Ewn}c8PPEY?~$)A^ZAHI1iEnZ2O}eydxJUSwzeGy=%H-Cey?;y(BTbjJ}enA%h+cr=7A7^c3CDR z!9)wfS+2KS36a5?^#xZPJcoP`HVhl=016S4~;g1znohk!>LCIr{z*3tVM5iEm*JI=%Pz z=U*AN=K1N#-@i}f`o@=iDDMW}Y3_luDw8=={#F@^yBq@gVQL&PGBgiA=pibQnhh6K z8?LzttN(-mWNW)~zJkk{#G zpX(9DOpH;DD?O&vh7}AIT^up+E&KTrt|pujr|OroUOOk?B79pnYO5Pa2G}7iL$L_O z>yX{+9Z^IrLGox;L?FCJ!4Noa5cmb)DK04iDw}&GCkkA|uzP)cI4#LN=(5*)nnnY6 z=CbP`c?Q1PEr>p>J|7gH)%NrvTjpQSw;dh)1ncg|kmeCRN}+#=-MSP3aX*F`*z>mo zc=ha;;Zn3*vOqpRp6f(3x7AWzfo3j{@h~&G4bpP*GO*{#=X+-d#lL?V@cg#~lW*p+ zo>k4bTvS8Q{35hzW@{9>ekxIH+gjzwMFM0RUXAD-*+lU9Oz`M-?ex6B>;3D>tQ1_J z3D`sFE*6~6*xR&REU?t8Cf$y@T}sgC^o!1u!DIL*I>v#yNuSSqbi+07(UDua?YeR0 z@6k!}1<&>uOS5_a$+n2A2@ggBcw0uCJ98<&`uyQls}bclQCWW&zGtiT_3PL*jwT}W zNiDs68EYXTvje*{QQABI+ipnkvdf!{R1eP| zv<4R&9I({fx1lw8W+NLfY7L5q$IoA!5BE=wzdblTSJ`qly*++gUWDm}px6*OF++Pd zS=d8;u71rFG-1NJl03s3aUUFY^p%qvyOUHZX9uTdjqly#8*oY$FO9@gl5=L*jUCD2 zbX)Z#QS9*PeFC#Q$N;6^(iIgjCAO)azknF06W@V{-`Sm!&u<1ChIn4zR7dr6S~qc; zxlHaDdLt#hQZ~Ub3=pl}iFY)I{T7rB@&+cG+NiF|CmIaJ!uzOwl;Fm(3Rd|Ns%=DF zb4YToh^m-h8+~5aemv5FPKTL1-1iH?Vo3 zZ%WMBdOUe}Vz(SeTlSS?zr?eqGezp`8lfs>UsAc-#CYC7g-OhO3 ztmZfHaCL$lfw+8N7BO`ZffpGv3<9YQHG~{pTxD)Ut;#;5_PgRbu#iDyNpR6&l7_x#$ zdSvLtU84IVgf5f{8v_-<7nLbq&zHTJ#KXev6ekM~@Sx}!BAmjv4vMETtC>x(?(O$_ zR{sQuM3g+nTsAsHx4-V0MiK- z++Y3b+S2UxG}b@Hm|+3n$?{mlh^F-$W0?brgJ zjl8~PEwX$Ngr}-q17ETrGBxsA2fB_UZzv-m(T$a9s)Q9X^w~D3^rGQ4-#a}GQdOoe zBA26Ue(xLZjthrXN=Y<2$S~nuQJd{Quo70f8@F!j#MAui?ynhakwXgDf`I5`v6Al> zcG!8ir857XnHmix*j%Yw=P3tml61X(Qqt#uykpGY4E!RWjp7R#40H#?oDr~ZBc4Q* zutT!md0UG4Lz`sw(5~tE4n5JQcf?On7QH6(cx5=|a5KZ?yxWSu;KLluInn|8Dbumd zWMsY0p%;CL^Ku#ZQcDwvRvGh1CI_F#zDK-u;%H|0?|UEB|BudMPO42uFoJ0tQ}UuR zfmb~t|K3|TjAgWc@H1}DE~bNh_k-)QtAC(FtDEthe1D6xtW&~=mOC{VUoiPoo_I>1 z{pUFDvMcX*&<$6e)seRH4dFrPil0I?>4`yyl%}LUFnidNLZ{_DY2C3Ty5{>BV(sNm zd2PO~szt_;(q}YN)MKcwN(;>Z<*L=_s+(>^2$en!rF?Xp1OGKh`Rb`*ZE~}si!u9n z&pi=@ymUtn2P2{7NKJPKni7JNItLV+H<%(k@6>J7qy zZmi*r8cV}EC$(=0!q%4hDYVKl0SR@?$))uA-`ar(yBMSz%zL}|9zdD9mh#=%;$C79 z(dHe9+fKi+>K>}xLVc}o@$b!1`I??uQb_PDO=TC=jAjcCveh=^N_&f=2Edf#?2q3+uxceYw8sHot@E-B~u_*!@6EsM}@L1dmJJf>-$I|bMs zAM$V3`}?w<0=6BcD#hTIyvC|?Rjq#b;gc`s^Nu+jcS_xGbZ7q9ho98HpO=Ig6VdPs zgkd)&Fj!m$2*>cIT#c{Iz{;x!uyk2w7CDfGom6{AxcPADc-=#J}CaS2mH~z+Bd9wmFMAS#9;a0il?d>)H6) zDrEwGqUn?C^J92MmZp#E-A<=Z_ul>nq-efaHM=ig`48x%u7>lOVe)GmAtRwm!^XUh z>r~8&E@8wgrdX5GQ14m&-_=mQXUau>emE5PF?hn+jtTmD_5~bXE$8cn4cH;SHE`0F zKaFoEiBXwbHEhg=Oj8W#GhD+Ud{K;lI$#Wg<^+h15z^dAR!pY6#ea+7%?9f0&qf^5 z!qF4%Rn>I(tg4EgOEbcqk-yog*8LKu1E9*_a^TvYQ4B9I)ASDD;}0FAqe$QYQS=Df zFwKamlbG{>`$S$FR5Z9q&7v9;i9i%H5Wg4K)eRUgr94vlh|A@p%Y!JvKssj*DU~?VrurL0o1vAyIPhucxnX^B8^3vD!QSi$#u^>8n_kO{d5V#N4DU|JvshW1gT&ogBlHb z@l#-_P9$^T;N-AHX~ZGfXMAY!^?{Bg6JEU znHNj=ue`Yd7@^u_3K2*bSOa?Z!aj?6GniCwX69iXxPUVZKwYd{U@d(vK@Bpgn(?w; zAlO}L-rovYfF)8U^V^{K4%P*1FnAIn28T}kst#=FbekHRiYOh4%VPaaIVPgVz7w%8(+zy2Y@wr<*16ZFooZXJ- zu|d&e3XbTM4TG6~_>GQHvN;J?H@v~Ec?jtS@|c47bGSg^W4OOsP9N?+Iy?OG@c0}; zV?SOIf4*EVR<4~xmPP#IY)E(059oFP5r7M9*2Ns6sp0qgENFsUcTu^6h1^av4{oyn z7r_F}*H_ok_9!kg*b#9Ch3+9#7i2qP+v&|D*HoV`m|@Hy2!hN@P)HEoe(v4%3Sn^=x_L;{X2d|KsLQ0m9O? zb=AdNr8XweVG@(=N3@AANcm1Yvd@QTfCmk-F^Ym!wPXe!2E}fxutWi&%xWqT$;n53 z&6?y%Cx}JC@BK(B3TbWnU2wuu{3k6s*`KzQo^oIK)wOV?ekKvi{NJV&gf>W=%g9{z zCoi5zUpucN2j>o5>@7wCK_V^KNTq;{z>=sY6!z+K&El^Df#(Jy+~lqYuj93vh!KmU z$x+Ux{*!j7){lxZR-^8{Qun^sVGY{HlfZ?6Br2!wHWrYMT z)YRHlU{JaxLCt7qEoC5DkO@P`Bk^DN1SuiMPfrid&h8m} z5sTvUsfgn9skbRW=$EEl*b-(JnR!LYE=Ks+pbQT}kW*1`7Lx-`05)aW<>+zyV*rBW zBUmE)*8QTueq%_7R+E{(W&_l$V8$dmk}bBf4-?ydFJK0_C;Ts8GWbsvL{2f^n*x>N zFmP^qi@IFWuii9MFz2nRx7&2DN{7SG0K#b^xLe#-fjfO#Hz zr7TIOn4W|Khl^ngE{8TtD$N$#KM9h@w#1|Wh!p^(0;unL*6j^6?I-COi=?7LG9nWfGDf@4QVi&}Bb(LY`8rZ0a!JK6tccy@kzuy>@F6+CN!_u@Xg zam%iB+EK_WJTNlyq??^ah%x)=!_|7h{6f8W^Y8uQU;V%M=rOQFa>M*?%^Jc_U=vA3 zO@UW?1#l}vch{jcy{M_uOj5Au6IV3!eFBLq;4^O;NPN}VV-GU&8e7`T5-XbQ@{9Km~PtpjKf7(r7bBCBHTc-(46 zOr6%;RP+VU&qi*?yG`hblr58BUfk#iRrU54427M6w#NL>_0p%Hk!5;4Z&n$P$jtoZ zb+$_Y)Hh4)-5ikjL9H>0EACws=|r)CMd7^&fh!EUqd+48Un&P}G${6TQ=u58Aw$_K z1qLVjK0s3d!yICAN5+hIQ=aHfb!*)JmH`3e#wss(?8CY70jF(uj-Hm)EO>2j*UZu; zU9r5!fHt=KZ%G1}_9E?^(JSiCgxP&VXghJ13t2kbZbF-POYk)(UtL^$na zF1NhB-arEJ5xB&z`#qlq)n9`M3L)KB;Mcl=yrst4SDo%|GtPWsR{8w2uFY21a2-9V zK)lJ83UW;>D*y;9jm!!& zCHIrtDpQym7U+^O!u!HaM3+9?y)|4;iJE{GV>tpoK_s4ha>`7!nYR^gD;GS3ZHhj> ziX%ar$|u#55j+umwkHiqrv1M30Y2&-iXxG`2_Oz7XtI_8$8%XDm`uBH8!Ml^Qhcp3 z^>{D!Rk&h{d9&L3-i8KT^~jOp&C3Q8mm0CvwrUiNJrKkof~Vbp>xZ=@)9ggArC3yv zQ%*d&?*kJljt$^SGl6a0U** z)GaQ{dfHHc(Ouzr)9X*T=94+!VszfgOT)ZoeMM6Gnuhd?L>>fJ89<=4U!8pB7e7*_ z)py>B%|ZC5bS@`$-yu<)buxiWvOhmbDOBOZN>w|18gDP2rr8Ca3m;8z-v)Fi`s)CK zuNGz!34|y4kTNjQusyE=Y6K2a?7ySon9|L%r<=>E=YsMHf1Zb^w>G_vYP<2JH!XDi z)(6!LH?Xr_T|WGePGX%X@;IE(mwTt5?|pe-X7daZK5jj?B$&UQhf}O zp5zUUUdI-gLmc=oreH4`Uy;O7lGA#oIYv82t0TMC8#2?dIYVQO5fFq z+bHFs?(}o8QSDGFAE_K{GdOrtg&rHE$!(vGR?_`ylX3V3U0n4DJcQVN{4NT$yq9h& z%z|th4EYrgOieqL(_&5U!^eHsa~sq&YySd9 zYzrO?Sy=0$2mtoP)i&J(ClO~K2ef%Udt0 zv4tHaek;z=-<#b@ssIVfss{1dj47J;>9AWgCPxV{O3px`5m;Ncu<1ewTnTdg5Ir!I z`=nWwXvqK{oHPSn4nzZG3X<(!%&hn=LVDnV|0l0Tn&FAs02lEJan^{*fiFW<*@wMM z{t)7OgakpIcOnQOA+Y!3rMowv7`*3IWA8ku_!ot3x)a~U6tZ#W4pN#!?WB>Ym$=b` z($GyqGx0aSz&i|Ib_Sca>cBANPa-pPP-Ik4hMxwO;B8&0l(jZiAcJAEhgI%~U*XF` z!@wpGcjN}c;}|9xqW~|PX3y?Rr13onqN;5ek6Gh2jFZ7kPGID*5*qsdr zB@o95Q4+wCU;r%Ocl3hj0NWOZUGj*PV;JT`j#kl|ZL_`rE|dfACiQsbTCxXhg!I|a z=25>GO{!^Ee(%8_Y>p&cANMC#SC9H`?id{kUqT^}knu{5HjcPq)6+w+PHo7P9wt6- z^WMh!?iN4);{SP8yeg++;COf+(n zEnZG0el1(c;!_hEePK?y5k$;US(gP!)|k4_K1IQ+XIPO$=rcOYZVk^TAWZt)~m!@{P*;WVAn^g~Ky2Ca^cA(*wH zYBpcb>*`MJW!oZYk5BB>RDGqJHdo~xn}$o~x9f>4aIu23-qxCJ$(oSVAVEO~NKo&- z0H%WFa-kR9y1C@>z1*TrAP%;{9)<>fUN-d@qeTogM0x%J{bINqBM_VWDM`H~&xbGr zUmTns9Pb|t&%b}p37w|p&BdfF$mJWE_%L!zC8^4m>Y0H7-pco%emy+<=J2`buntd- z&xH4%Lz=ob^%Bupkn?bVd6}&F%B;#QAnQpJSnB-r1uMb~wZkMEW?ef;NP5E&{y$Jl z0|XQR000O8PNO+c0pyj}(+vOsaYq0E7yujqaCtOpFKS_SVP9i!Y;0v?bZKvHb1ras zRa6N81LP$vUQnT}VRd*5009K{0RR956aWAK?Ob1P+c*+`_ov`FFVfa^lXf2tK~o@` zb??$k(jayhD7uA8%XF+(mVBa|#8>QhznP&(k^18}aoT2q8YpU$GsEF<=EoUQ$_{(_ z%TpGnlPI~|XYWR@pZ*hDI^Ax!bDB<;v0#~yi!_nZ7`{)_42Y8}k>$X^-ta39zvEbh zd6XtBjJcFTvOHzyt9*tnrn6IY_St9_Nj6u);aj@EZXUDoGG}qhCt||JD>e~RzKrvJ zr_(vg1ka%r^Gj}GZBmViv7+nMZzOVQYOk)!Mwkc5sItIRktF zJ$-AO&k!ELCAjW2U+K{>WZsTH!>oJs{Z!1y0wi3DJeUiRI#KOM=vk#K&A!OeJO#5c zF4cEyIMe4$?0og>8(IoA2F!iVlS#0Eh*2$>fZ#}iD;_Vwwwbt$B-kQ|lEo6qDcC_n z5@veSW$twQ5T+Cir2H|4Ri0*EIa^1Z&3A7Kpo(UDkMnVW^lcWGj z$)(7OX3(KQ4+Y?QKb1T&7KsR&mgU=YAqj+kqk8Rv=qS=tyG7n!(%@hoYVp<)-+ z#VBTv&Y<+Srl?$slr4!-&nh99l6C{^W6J&6b%qXtBYwi>Pr5xylxJu?WTA-h4DA(t z8p$siB`hQW&4e5wn!1K3c&8Us!1!#=?r*RkZl zDCA=k;~xYS)7p|HX^wMI%}PaCTBKa$qU%G2|0Aq<2^^lo*x(^OYUs6)Sstev+WgM! zFAYoFuBulGD7FTJMipsjIoGtnOC*7k7r?afi-R)&C7una?v~L8Ilo7=<-lfY;SG=! z{&X0}kdqWz0LrFPNpaMIUdr86Hg_f>wD+dj@+nEu* zh*fj^l1vY~ZFX5_?=jjzEGfheyM$-;+CpNHtc8xgjQ%G)hh-w-HEI)6CNRyaX?!bC z^??I&2G8$nfN_V0tuE(LA~!IZCjF0C)h(CbqDBB4@&aibwxX@VM+ploxQhbaue*Qk zv#)!9b^DO7&w1`4xA(=z-5tJM`MyJsN&OdCAVejT2C1R^s*2sOUqSj`rRXkrSO0Sw zB_6T)ZX$pNZ3s}z&02yJx~!;#Uw4Q~@XGd}41&Sw`RIKx9E@<+1X}OIN54x5y^?)M zV|pXrp{rbmwsXB;XStC||~@CzQ~SZ6qq zB)tko%$SkWU@VovFm}k7QYi%kK}1ly1UQaYjE9u#EwX4H<JVEg%6VMZZh-=VM7}1j=cZKpH%$3o4avWm^Mb3hj>35(@9SsxHR{ zmV(cLqh;t*-DVR)O3;`sCE_~3DIVy^a*a+&lcp_BEP$2@`u^d?#o_zl=N-EsN)KRY zt-|@?#mG?dL|%)`7{c|-z7#M-A%$LeE!1|# zAD$c^4Iak)2o%cgFiTZ>y1u$@fr#_(xIAmB9SH77!UmCZ0BTiZ(OOnE$ih~PFsNVr z@(fb>ji{i4vUCrMaqk!HAHIt*RW%|9kLz+BcyQvT4S`0mJrn?>HXuE#Ie;7PgfOJI zgsi(qyA>pron=(rT(`D2P~6?2l;ZC04#j=r?(XjH?ogz-Q@l`|;)UX{ad+7`l*1$M zc^gvhF);E|v;c*gaap{Df#de;?@Jse=B_p93*UrVEgunTJQF z3ZA9GI$53J)f+<8j^lEu&zE}0yZwH7 z?|ENL)+VW|RUHIo>F>H7_UahR*c)J zd|y$m!ort)iCWaQTnF7693zydx+xI{DWsPlLWX-(A~JfwN~3D_!;(7Vh|>k~>wDZ# zYWkK;>TLqbr=QT+CrKUHi8UcaR{9Ka*$~v!7sDvt6krlzTgN`ZLL1Gd@IhYu=X9l2 ztw(!UX{wMWSlE(bSR#!uZ^+|WUz7!vvgHhyv%FQ}x3VXl-Xz2N{V3G-+?*9U@L;=Y zkvVK6C%?s&fXiu%1@u^qnO&j&eIy(@y~X4MHPC$5!?qfBe7~qtXseW*%o!|ZY!8r` zb5e|P*c9P=8J$*QRrG-pXn(^rPD4qUsUKgzd1m6(6r+NWyu8x!Y1XV`CZ%%c5Nlj+ z)(S^Y5Cg`RKyw(A3SwN}_`jmWw>>}Ts`ZjmO z)rz;fzO7{3gBNd}YzOHIhkRh7eSZhh8ro_7f-+2Oo6PlAqAjE$fEjvB|7buTBEP(j z0a`eVsy)V_)1IRPq(O>@$vF?lsqRF6tlKdU%V63IM$o?5MCcS0F!ZsfEsG@-u{~KD zBHVPrNi2_~uUkAeYwAL?m^R(bB}7?H6GnwSMUODh+#3m+7dK(PTJ29$!y>WFM(rde z#GcPRJxcye={E_y^223IQ2sArpPH+j+qj#)9NOl% zF}-J7r7Ij$o6$An>G_QA`=#pV!KoKH!!7?nRy?5zn9{6g`Ut|Y%}&y#0wPo?!qg#g zbCi>S{DoKkx`52<$8&V7V^YK?oN$M7ii;*~_k;qVJ~Bi}sC~NOx((TcWr7pB4Ql@9 z(pCWvQFaAuo-8~FE5Q&j*eLPx;B;wv(~{6~09rDP;8BkZ`qD(Q!!|EVuRt|5dZ?mn zKI*-Gny%$M%wbF01M8YO=rsSA2h|FrtDTrCEyvRWx!#r=j$4M7GAI3Iddv(#>~Mm4 z)Y*5|S}0r!n`=yu%5pnTghp=hHXSeR^^f*-zCO08>M{FA94$Rtnv z&K($F$)QK-h4|{8bKKbkn_%%}V;$DnEPHM(p<{cJ!`G*w2A>q=e0<;{ukll~&v?z8 z5hF|h5@WcIdgZ!xtxZ1Tx>1NYBb%>?_mGP_!-0FMJ<7#evVNo7)mEgckXIFujD&$D z-#h;Pd9nSpF|gY+s|o+;hR0sD+@`}^+*u)5j*mgC$F71xgRJ}mKGvWW_9uD55I5gB z3L{rPp!B!PyQMgP{&jGNhvE7jan7E_VAWM8nDz%u-7u1bf603Ojmi_TwkRm?q3P%!gV2j20E`$hR5Tn&_4D-eUTR~DHX z))#yZKrp&7oUbhJTD_F2L-8JcxY0L;p@2FSGW~Qdk}&8in168_VV^1hJ{~!p+Plv|{`4@Z zCsi7_ZpnLNdwOHb5q!HfS)B9TyFnrB73!ZKi4XBQ zDqi|O`p@n*#frQsiga4-TU0qU<&QKbxtB&a^`^c zzJ%7EbDr9%M}M<5>#C%9=386gV}wvzV~$1lm>6xh=O2?L6PmJbx{{0ra=rq`VD=#x zHWn&r36}ukfn*XX47(&#*7W-79IoSzN90LNhW9`f3y2U7JB?Q7c;Y$Rb<(OEG|SK- z%tfdZ=}ZY_;W}%Yb`C)zWx8wC)qUtK08Cm_Gh{pi(hkd(n}dV0rd(2Kj9X`|S@s9_ zst)X9Rw-tyBq;sHBK)Y6jtYdpRApY;61#5po@L&gkz-r;GSbztliGAdj*PjzvV`q~z#uTC&8}a|bYO}A zL_t7}%Tj4mku^a3xwu`g_4=rrNI3*1AB&k(J7K^k+)-<%M`s-N2)$L{9md8dM$;FW z`SV7CNaRfGPl``TE13`n5S5Ll&t3W!H@+X^OY2b93Uo_hrKI+M#l)QTb~athomuBU zJ3s85u={!V-xhqoIIg|h+ysNnyuF^?y>Cf?`9wqt>$!PE@>4e|PLzk^vH6^K%|MyV zo^Oyuo0Y$8L2L2v-Zynm+M8BN=hYq^U7If!>YCu^w_>wnX!ox(RAzdhNUYiUKR$h} zrR(AH7l4kcIpVu1(GFHBaTyu+3n{5emqUwig=X@^zsGu;5Fq2QtXfMh+y!tQ{;=PK zq$#-(?&IV0Bf1giDVu8e=Tm4t1yuG&rW|Fq9ui!KH3O17KW=#g;sTOfqSpm5|BIvh z?}S4h0Deih&kWX%w8>j95tr)MAzz=+S3mfAbBTHOEp>f!iQnsuzX6D0L0JnCDFVPr zT}EN7YgUI}>{g}?cn!?GK81q!uAfjL80E6Pn)M;X9YWD>@Zymdjkwgh7_G1npI_|s zf9y264-F{lIIO~gnQhJt2|&3+Ro2kt_ou?^PQU!7V`k8J{c9lvJbDSW*;&^F$u0|M zDxjOAiIU$&mE>|oz9!6>om_jny`cZj5PxEUnS~?2MEF|bx`2`{S_^d2o!qrB?uSDV z>59t{9aWzh6A|kdEDz@$Onz4>iH?l*6u}GSR11Aa-U;8gX3le9^;6Q=?6EwR-6fNHyi%Pw^c}{$m9m*Kn!KuEgoeNUonjTnO2(2K*{?6U z8aI^C#85BOz4Z}oS%Zd!;8N*+>zbemHh^mIyW8ID1(%$bxjg!cP6{{Eyqg60X}N7D z(RAGxfN|mCr2Qe;HH%KbsgdbqWZooc%y!tK+VQ^hMyZG#l52MxH;?IwE=1<-{Gl&L zXIfJyxW#%I<30k%H4KWbRc6YnJ=`fno6n__s$N`~A)H+u`J+|%GEWu+`ZYAIz0CJC z;yZ2x9yIz`TP2+YsE=fdEa8h*o@fG`vblxvJ0Em#cZ%LBA_Y8O4m(!IYg3i))<#Oy zCPEnrYmj`%nfNF-XQx-=U=Oc4$-S)CUN;Ki%|n>$s;usDoc4YrPTKI6aC?^J2T`Vm znpp$-0%P<~A#v*>>!ROcA8xRfZAwH44C`UutH*b7vRCKwZOqeQsS!Ak+q>L0P_z{T zN`QC$q_2K$~bV ziit#u8}GK%gw5V`vZR&;h3ecp`LMYdKK#<;82&jWzT;c9Onk%B2jX!U?Yegaw5~PM z(IZ|$+wLa)zB`L>2_{&@MobswM@N{%;%(_ow#Zd)h1-H=HPljpy~hd9 z-)IBz80bZUjGXpv*gxV4n)6Z+qRCvl= z$IO{8vo)(iS5oQPifLr&ttUEFlz%IuU|; zb6eWjIYS^5eGqun;Ob6 zM_=i#g%gVVqeww_%%6`#y>yBLeJ8l$H;{sMJ!&?5ULOsPPp~YB5P=F5Nghe5XONyJ zbbDkcXLmeZ+`K%43|TlIP=hL4!=Tf+7l z_#Ff^X0Wj_KfwbfI>y{DFFB!(>rD3#Rx#R{i__3Sus#xE&X}T9(oaL7hy!o^I;s>1 zOiAu?FwayRt0@>Km@@CE1C*)A6y3w9cA;1U;F@*Fv1;O;)lLwCugV*PZqBhir_aoS zY$tAm`Kp))QHTv;uKR(`r_bJkh$yxVkYsOf5+8b6KA^}uw$ zuc0iuk7S#lJarP^e~sJe`xSs+SE*jnx-O?nB8ursk|$*Rclm8h5yaqvhjbhvj%P_x z>fN%z={FTk5G_1tnhtxx<|{MzE^|#Z`DVW2P*fqwYxA&$+s*l;K3!F*uq}rlw#(4B zh4?FOIQ#Y_eRvgQ&J;RpBX;TDjY!8yU!D*5knDovN#O_bS%v!99+j7M-ZGeyLS~+T z1%WeKqzLnG6KC&bb;(Td0r&l%7%59%EgB%<71VPMWFnA6R+lgZ!33&QPKjxRPGKut za8ZrBnnWd?_#Y$38|}F?$Rb_CaUkJ~etoWObB-vsjVjY(rhr|%B{x>cI5Z{czOo1> z?>KYU*h6eAkMy+`Je9nK#fwU?v5%qA(Cy5j|1bvK`-|u_7-*MKhKAc{ryT|uqah1( zsQ*3KaS@U&-wYF5f5z^!?_Heb{vCHI))}6o7`111RHbv=W_hh|ASW$%R~uq|%&Raf zj3oGj#fJG%5vbZ!SLU4PROxen`~_ExyD!sk<;(bnp922p@eT1+pSs|N@6fiKZ_0&# ztm$k4J~(lH?+o0{jl-ruOIpN&A={PLm3u4dUL|WuTa?nvzM5ebZ>G8T)*md%s~<ioE!zOYRnA z&9{qY@Syo`rw0g&W*_+`;aOR2K3nrr;dJ@Zv7FfsYad=qPZEM4Ibh$7I6*&N$6f73 z(IGLLMSM#}GH+|(?ymst%OiynJG*L8oYM|Dwr9R%-Xmm<0fNeCd>cJ3<#7~vR2P8hf$Lu8Vx8~7{cWEJ?{y_`;n*pT zY7^LrFic1-nby`8OFW{CZEJGR)WB?5_z4%EruUA{J-9DdkQF+R7l;{wC728AS=O37 zoc%1%)TKvIp!#kSY%LWsTF8wj{rB zR+Hs?d+rsy=If{2keFu~yUA8J3Hy@bw9|>-(y!a|wm5TF(Dz$C__-SO9*@lOVuTEW zfGW$H(k$++{moa4)sTdZ-&t)N)uJ@hpo)r<5N;RsU3~_~>5=(G#!7_>_8|qPcy;Mo z{adFpO3l%zwy&voj9u=Vah~gT^cc9mevaG z*Jp$n=k`1Q?+J&Wrd)s~O-MMWNCRo7k2!-77uN-TGOS={8TB$ew94ht#1;B49wCEM zD?|_+t=tP;@pqhsr!D%IyeEtY&7aWZleV~Z_>7PUg%Cj(=D@%&1^X3?PBFa^pEx6jlAo2O_idN2c}kzA?OVXiGr!Z4^XKVXL%xPlrJkQqu(zQZ~x*B zU?#Y;(~q%-rT%5O=_b?z_DHCBaT7&-o#wo$n1y;mBzG+{ZFvr`Xd7m%h}~fmx%IK* zbAqjlJB|9qbg9ub5$eDJ3vUywz;T4^i0Jc;)c6k9Yw4?&i%i$7slKiSvU$Vwmt*G? zv91-1)Ecux^{mvC-m9r$~2F7H)C1e!HRh)Qpq(rj(9Z~^iV;EJ}NYIC-yAJ5)R zGTU=BmM?gY@%C4-wz@dMv`ecQa7Ui5A+vwD=ti=TI#+AWk_{Q7Vakk%#a}7-n?sCu zDxR$Dou0@oOk9}3*?}dWxA7kAbqq?@eavIc`1<{Zy0EYn+19+px zN0`xIV5y(RHnPkf+|!1saGDHDHaOfemfC{+WyHRjbbVn-ww>~lEm-3MG^YQ za52I%Z(Tb1^VAPjlzu!k1|H5IX6{tDp35#u2B;tM_~!?^;-!m8G>Yequ4k8LGcM`V zM%!+3x8v$RNX)_GL|#60L}c_i8@3klVg?|T`WAcqEJK~+arDEx9H>`RsYXCpu+ZMR z2w4xfF9=Bg{yj7R#)Ov`Yg3gsM?las;DJ(r-{efgepP3Qrwg&dfMRoY*rO@_|YK)MD94iXnD@UCSOC&)3q;Jo*fcXI}XoB zMSFFeN2xZQ4`CBf&}idrJbG5 z3Dp{WR&uq8j@p!>6xn%h&61Fk2QsINp-EOI^2n?%0epkl6{7r>5^`0c+PcmdJn zmu$y*q))1=AVl=MCH;UW8gGpvX$WG}f3`zwBI&MOsU<0hDweg7e4XX6o*l`x=G58t zszNp#1Ep>r;}5xbb#=?Yl#<_-R{-!sQ!b>N9YQ}1&W9*zXQ6(=4c?LX5-UbUptV5c z$ww`MRa5n1!#Q)ayi900=al4MzzzIucC%mDx<0V+6fTg1Yd;^#YjkOjFsfg*Na^Ih z!2qga+XIg+mACQp@nXU&A;TD4_s}PR~Z>q;h#(VJ)J9kh#(1}$; zM$veY36~{(0eYrO_k@iuUDsR3$~yfAd5;J6RQ8-&p}TsOC0-RQEO|k(tyZ3KJrf%| z1sf-dC<#qgUVcGAUYSMN;UE}nnbT+p$DQ;O(p_VrLv~PuW``Kp3x4M1cxP~Iy)TDP z7lDLXIE~dj!vhgp1<#JXg%H5pdC0Vd2tvQkO0Ru<7YNiCOzYvNG6dMRjG_5;O_ zpAN1Mcb~3*flt9(TN^j7i)`o$W~{LW1`7U&z?=2RqMC>9BN96`y?{^G5B9lz)s47e zVx7y@{HNW-B)Pfem?kZoQBy5Cb{(M`FmxB=x12xt)&){8T&0Q@c$W(@mmMP(?VTPuAf6@Jo&JNbK)9*rPY5?|Ol~*UeH0I8tcjJJK*YyA)|KL3#d8|8=< zMG)erp3z+7F#%M*QrE#0Ci(<0K$ATn%Ib;acDx}+dD~r3AeZEozj-xf@_4Wa0qO7o z_{Cm(G8v_?{Sx~0xU+h(Z)Ui~Woq_v)msJ8-6cEZJp4ds3jqp30XST$C_=JTKrO`D6Asx-bQPr(yk+EfzAn6@A#+;J)q9} zmBc;FNfu5%M;%789HKl0V>^d_{;p-}C30uFRao~P<3udkht24_sCt##b4b#}(aZ9y z2=Blt>R!n8jH1`pK74)}YM*(J=kk#*&rZwk09I_rIrPx$3q|IPrNu%w0qKXMfyzOx zx#%$ZG_F!&>#Onr&v83{gA;atiHJ)Z?ihmwW#7=0doz@+V&8No7o?}WzPepB!AbnD zdfg*2?~%o5oryjVnj2qc{G7f%zmU(bO3@gWGM*^< z?K>Oyjp1}q6YvFL#*T4_l?|ir=SHH=gj^D*G3*}Z=A}{K=GcG^ zcmbQj1ECNr3sX3HMVt8Ex<`7rE^qd$bGvuRXO=0YVIiwm3vch80-Z{lM)3KC0!$9X zdw|bMGjhi`6JWYr%nJU+nULS;3`7P8AZEaquO=9%1%i$JCgiKaR7>t7g|wNKBeHKM zUO{A@E`ycoF_oMgJV^@p2c?~a5sa`O41d48J{HOEhnBo9z=P5K^MA;>+0ik^ILL4I z$$9^fm)Z*~X-^@KA|k?*e5DWK%Fg~}1+5_EE^pCAd~P0maWBbz9#=Y;a%6HdVbF~d zoqOkzj7Sq};Gp7+_DUTCYNl_&4a4-C%Hs{30iD~}HMimBUPw^#5f??<-aNf?5{_`I zSi?RM0Wu9Z#JUkB_JV@eZ=NCY#_DfYZe8$8O&c*2YjE-S_f`jMRpy_Dz#h4Vf{fF| zW_xn7Awqt_imLk#bXuuOTsdd30e!t^`&@Ul(9g>icDQohfvzKg{?+5D0-kNAcRhnp zh#E2}B&KfUoi~;9H7r5Ve#`I}udHd@{&SC~&P#?5B*%x&5&Gd@7027+ViRdm-|Jc* zk=lFlLKb09Y_{Wgg!{bBEF=d3NdkrNrZxZoajxO2e;)k3gMPEN{k5P0PDTDWa`a-i zW>Qy!20#J-Tdw#E^@nf)2l&GyH~Ho)Zh{5?;L!m9)W1=0(gvU;Na!CZ7b^>UBip}> za&OQb3VQwj&_Mn*zKw2IRX5ta5BP7F|FQi$ln>_b_}+vSmpA(zqodcKk%}Q@{im?< zuaSBeIw>G4BYW>dzCqyu0F-}=H18i!69+p-BiDa0J?Y|g|DS>W)%rh}s*lB?zn}ns z88iR@{ok15Z+H3gL7H2=HQvhmZyo5Z{r?9nGEC_T5(WS;#{&SM{Tue^&5rmd*3sF) z)#1&C`2P{?M??U(;bUiSJs-apCzX~z8Da_p~H q_usnRztUeL{-*zziuXqTL&f{Aq=0@)AOHa2ZPkBEhKA37oc#}Cf`+#M diff --git a/python/lib/py4j-0.10.9.4-src.zip b/python/lib/py4j-0.10.9.4-src.zip new file mode 100644 index 0000000000000000000000000000000000000000..51b3404d5ab3e9157bdb04df97920be616b1c855 GIT binary patch literal 42404 zcmagEV~}P+*QQ&xZC7=5+3d2-r;Mj;+jf_2+qSJP+qUiV&Ybxs&P>dS*%`TW|Hxdq z|E%lIxK}F5fJ4B6{I_S;$zJupfBcUZBnSbBy$72mBR~}v1pLK?*$~2&+0exu9t0HP z859Hr0^>gu75}sRzl|^;>n>)h|K2k~8zDhJU{OIpkpFABlZBbBfwdEZy~lqKB$GvH z*z%uX|68DHb-CCL4wUW>b(C>2J+5scPilnMSu(kL!3+>NM>sG>dL@oVwDJU1j_((r zK}MyPMC-Y8!}LS%ZVy%hJ~KvJy*e#UWkQb2!4Gvt-~8m^Mzfjk#pzYpX%>H?HN3-)zcX5CqgxVAI&sw5`la%VQCZs!KQy0umTF&#a2h-d>y zrQLSVn~&Q|f=%NUrxxsbIj=3f*NIZ)<2UPmbPMR9-R1<3@Wg;r?%xbr9B!0QD8}YE z!Py3ye9P3om7(nOU;*!^OA(C1-1v8`hG^jXO+S?;o;XdIh}mcDJlQ51aGf-N9mvz> zV>C+AQXS|aA z2a{s$?#<^7cWn`s9ePo2Npsn??ANm7eLFJIWHkRZ%@=8Lm zP*J$+xC30w%n6gi?)XfFFTz3K2lkn zO2`lS%wcX0Zw{1}M<43Cgn7-yNUPU$GhOD;(-J*HYgKkc1LZBTQ^syGL4@TzXTHW4 z;Thzp13zuCOd>2#nf>jK=UelQ?Vt^{BcK7^o_3=(LGaXs`ZAqC_KQuWmg9OQ1Df*V zcW*P*2CNU^=M)xq^SqWI4}sY-v*a7f;Wn+~{gZ_`*;Fq3kKz9!CmWp_t=No%l>tgG ziv%f?3h=%|f)%+TB&U%fPsSqF( zt;HDyYH7_apTY|w-CT#TJ=bWH<~O|%H6N&}sh2tzNI0x(U0!QS$5m4m zpObN}M{6t4D>4#o+q;U2(Z3YiGG}}r*ZF-u&lZ}&#+3!AW5k_Tp7aFxkaN*qvlDo8 zab?cRgJJeH{(xR#cDCP9G(GTmgz0KO+8VNJj8Vn@3*+*Q_fGCl2osbas!v!K=6&lc6ERK{O3yL&o} zuG#6!m_4;cymbDM>kzn)MX-ru)#pZaoH0Z#;a7Xta_JYk&E6F-4A__rv*?(u(2cAY zj(=EQQDQnE$V?vb#o2-Ar>#k1$~ed$6d|9_kJCW3(*JJm1}+2WRTg^D9`CdMX;!5D zozczFNppIOSrsLSIY=puAdX5_P~_$57&Q=f148-8VCb4;3j%`lU+Q9HXJc>R{9haV|AF2Dk}1bPd*c3DdclVv#NewN{>2K!P~4N<@+rO+3Rg{G%aZ!n;;YB1RcD z*4y!P*x;|&T9QO=Y-C<@*}H2Wlu(0PB{g=|KSz|Dgc#9d2&uv%q?l}h*aS|JcYQfr zpuaQBL_v3WoZ`T2_p#E(_Bd=0NLK4Z6`O-P(X?Nye1FiOHPsko@{J*-l-Wv@(kS{W zwP=jc6~)pNZB?rlwSna+CPqwme@RZ$bx!eES*=}4ePo>!$1c-D;{*s$AQZ~j2r2do z;_6zF?g$F{Ag2P_3}c-KknKC+{rS1E1OWh^2~MJV;t*#LP>)NwOyEC2>YL>ec^$h_ zXGv>)_47rxi&6tEKY;T43)v)fZ~=Cb8Q;mRA< z`4#Tv>%SPHUefh0#l7quCOS%Q;L=~ zcd02(nxo>+x;BZN{*Mcp^QTBf!n;seKk`-}B(q1k5osF?PC)GWf=W)gXK}L^;*{N& zfSup`jcegv=VLVedF$paz{0d_%vWQUoj#i2X%)claS10=RX)-Ucu zWv|GjhR;AoS!sw7MxLicrnA1)W}_l-oDvUcKS+uGNp3j? z$qL1mAuctzQlv#A_rtlVv3Q7*MkfiMUL+v3hD$Zhh?hPLmK4k>I~WW9x;Om7m_T7v zU7hrAH^V|Zzt0o8w*gUlBwC?-u_AP-!rG?Np&3k%Q(D`cvJ}mpk_*ywfX=@<8m)R5 zFe@2YTuog^+sx|yp>_0fXVU2P#xM=oQf$9tZ?BX~pva%sw)=vnb%^bkt==)PJ<$Yv zR18(O=gfD7-V|y@wVW0ms%D4q2Y7V>qb|2Q=QIk$)6sLX!NFN`45eK>ngb?MHX@6- z(G92XKRhzriCZzn5CYm_0WDxHn9>7Tf76;wF|wZO1(O%KV3Iw|_{IOZY{Qj&tWvo+ z+y)=5TI!?)WZ2QbqwORv;rYBnKr@z$;ShRTLFJT(ds)*}xycWDu~uEH8qINv3VimR zi|-D+MMmg^^!4mo&5U0J99jy&l_*zJ3)09K)PZ816N$)8Y~o>iQvsp^`oKKeKX-O?*cA9**jTZDUvmAG4@h^7;dHgThkE z4)xc&JnN(S_@TiTWJY*~e#U0_VFEs^8YR%wSXE!BGhUbvJ+(<`dW!jgQwt6mb>hl? ztI!aPPDnb@%R_`0TLCCK?q)+_4b>kTxArl)3y@kU*6z}FhYqp;;=L>l-UKIYI|fB5 zB}+$c;fQKc+76@V*V|rR$611xl1n(8#WW+bMao>%gm1v03AF8-*N6~Le_qzyIx2pg zPK(ZTQAPhKLc)F=`EHzIS`}$sGEA#Pm80B-F3nycojM9pJ;vJ;x?AsHqGCo;4DO65HrqXxd}?JFa3HEY7+ zDiTWKsD!e=*L(3LwBM1Kq+4QVp_lM2WRpQoUha2Vcw3rgkaz}`7eOybF;eJ2ed{}7yFAjEtqVNTtu84`CcVJYsE3iM41me z0(qrFaLkn)sj{f)HFM_E=rr+c(KGx+l`u9g1F8?#rDG^eL)yovU4Vwlan^E?rwUX< zxRg`m(t4GA_L&ZzR(L63_>>N6JY2s)HRB4V0`&UTNBBduGi?2B)gx((VK3sDN(8mM zdrLh;Q<rYYDLWw2}(pa?yrsycqNnD-_ByzHIvU%iluR#IZzp&Y$5+K0_P+67G1$q@5 zBhiKg9{_cqZ;)f?D34Wcw4pbLs_x-}FqYTF?nyzNgHgzOX_UN+{vfA35a0WMH<05n z8!mpU?H%PCv(23CX~qWavw}Pc;x^Y3z!AGb$H~9zAYNFQK!wgF>c5G;BFn<%=#Dz* zA#WoDX7=CooB#QBa=5k@<=69i^ftrz{aZrZi2Gg?`HRdfdPKeO&ioq-inkEM)l`*; z$I$$;mtac5&Kv$%iqM>CWW|W*8if4Ynyc+8Xs@y#W9R!0y6ZFI)gmGjY-RWL;O)`I zo)^?NaD3Rxj|*8m6-L`NYtQ&kD0bmES6jBc?bq37H6pqz0dV}s=9VMvZGrQ9k*H;q z=btswCo*WNANxBY==Y_QuU>Dv1H)Hk|JKtV{~Jfw?|h?fLS$QSZ{+XD>w|=uEMnIx zdxq_QUz$vQUX!B+cyuFvu$yJ-N~7&LfA!B#E@GUR({(V2Cb3==z}tn_;|N#FKP zP3^1ob@%Z~$iY?S9DwZKi^Ry4=^mrFvZZzQPhe@;Z%$urkgnp5=iJv^oI%=OlDz+3 zuR97dT(dU%Zv}!m2Z!BW<~wTSnUN(+8E*4N#iCVaYFWk)5vE1bzrbGD6xL$SN`Bb z1Km}6P~)F~TLoReXSinxi$f+nl5nx7X`|^D8Ly*G|~C4J|7#K@S0YaoxUhn!;DVoRM{? zQC<*ehlTQGYQAzP@Rstjd2LI~XuCvf@DfJSz+Wd8sL%>Jj5*sZU<+vw+6F{?a23th z8B3}*!b(96HgjSx6YWXsyz?6|N3h{~mfj*z&alG=8iL*f9r!@hsanPzp8b0-%Bbo8 zwe3w)sW1#3lyE=w-^_e+V%^Xk zdM8erZ?P~|5(Kcs8jZm7?hO2`*INJc0wX~m751K~Z6S8$i(+8<4Wv<9>Cr7EftZSn zDV(6J6d5Z|W&4zjBs5NptK8VDk(PC3X@ZQ-19&+Ii}nSb=83c*P|=9{5Q!*={O`n@A97Eacud=wozi}F@F?W1HtJu z)q>k_UH>Z`)^>wp?D7g05V;A(a9$&r%Fm8e+0|xq&-2ebIwPK{o<9NIK1ORW_l)jl-XJ*ENI6a>SJgZQNmi@{{c1a+Z(i|)B3XMmQhF1QS zeKbcE2)A?Pw48sO-JmNg0>#emMFJy|cl$42bTnr09)z9eJq~Q*nqJe)Vaf~i(+94Y zBeez3tiU-8j5{$H(l|q?K3m=Wu5 zCw>8k>lfh19i+uSj3lB5CH$0((WO(>6E$pzEF=Ev+%52E9U#17K_l*Xp6fc9{++C{7>1yA7E5J;k@eye6B;5Hf*;O`K z{Kg+m-i;~K-m940IE8R0&bz5=!o{Xsso~P zP)j?6`EZWAU$7mF`wW#(z*#7#_+Zb+2nwT8V`s_(D?+Z3+NZBb&)7`IOyih?QtQ0N z;8k3aJ9*65HUnz}e@N+Z7_L)Z;>R9%P^AdxT%V8L#~EX7a1k$Q)JL?<1MK@ge+0Rd znE_p(YW=8M10dQn^zQ28BWZ!%#nifPG#s+tSBT}(#vBE;yKn1{NhPrj%s9NEX^(C6 z#WZd7Z($ZF%Eiy&0m?6L9n(l98H*)c_NHj;w_4=)x@-@EcfY(n${7 zhWEKPWu!S5Pa-4W-{88NVEI1M>GOsqAytcHK@*tF-}jpMmLogM_DG9uYBPVq7-R@t;`K zs?9RV^amx6kQM(G+{Yaei;v0!H;8jiPI{r+{Xp8lPAj&?9R&zb47jP`h}Re3agra?UeB|H=@#BTuLKFU?7vx(L-i94nEc&Q2#PW2Z5_rmbXc$ zVbTH*&;Ls~w!;@WVyT$Biqy5PrC&%|8FpY?h4q4*2rAbhDymt;hP(rFu2_~kR8C)QLXF?*Czl7P5FN6be3EMDLU6LA? zVr6PUubb)n?k#JwMe&Jr%S!Pea+;^UUbo;QD%A>SLl%?Is9D^&!si(f(EA1K(-QFs z*n2dmIMwzV+TVl7n(O5eR5P-a;f;g^(zLYk^AcE}o5q9>wONCTZ7^w4(DF^ zt=&k#!KH#t8%*Gdr%tdZ78!i4lc;M07pT$%fl+|E(B4AaqSVH20d^C|y`M%!a`r7U z#rpw$T^r(k+d>!9Skq~5K>sJdpx*30AUKmQeCX~&b9b>P9<&e$?KX7(XYy#oVJEwX z^S+0DhVTf5BGRlSwoet>wFAe`?RgvT@B>x+qI=ej>!Mrq{aW+K$ER@}ljk>-2j4f}69jwfhy3gFb29d}Mx)lP9?mZS90mN{4}?K`G|4m)s=$gV$)f9BZ3&|W z`_5-1cupU6dhv~tNkTgT&it91J)nt88n)$$yQT?LrPRly*cxS?32SDh*V&N32p}O@ z1`)P(z$H+&an}jm)0OfICju_E7(Q>EqU1Nx%!BImm1;)K0xDGaw zawWr5B}nP)q242SaCP>FkyB1`>QJ1*pO(pG4c&=sik-uurN{wXmQV{}o$0WThei!2 znIP2zUIFQjj1~>-oo3~5mBpbRKs5n#w1#_6x&cmVZD>Wd`^patXkaW{J#e$RB?>op z#?|Dk86{^O88hs!HQA>?(W0wu!ia*b4WKA5a9svOG$}w8*bV8zS4Ex=lEq_A$jWRb zSI)~es_$$4&PgF8)4x%17!TENrazOQ!YDo`F(6=9D2kXJFxL#fb2&IA z7lmr*ldRv}5H`|M1@sG7jyHD)G{tlrU@(+i%OR2m%Be@y;5#R+KUekcC7ZVxotP~Rf_ zR@)yf798nk20T~R9F=Nn7Sdhb<0vt|UkFo^LO(3?WlSVp5!%`aiM);VM}i*de7_2A zS~6DHpv|jFmD9E)ckc4q8T4K)IfjKCffbp&jMI-nzVHbQZHZb$a@Vo7hAz%ZX0;G% zcIM=L3?@V#_duNl*vB2Hu-Z34^t`imh&1UM#TT9P(pnqe25OSeUE5z=ytt@ER z`vS&y^G5ITD))Q0nNrzSgZSsscrp5VH1fp1nj!CRK}`pzJA{K{a46Cuzs`S0e-{_G z+`g?~@tYorr1*XQ!&FI#^e`<~SHFCrYZ<*G(r+Ctu7+^bIH=A}Si!~y_a6Oj^>~isFa+uCt%uNttEU`MBS^JB#abD#vIK*d za_R3p*#&?z|4VJItAOZiQS9vldmf0z88f~NT2uPldvD>mN?!mm;%>ejpu%9rmC8xp zQA3N1YgVjtiJDRY4?iP`X466$|M5F0l5F6*KfpSmQ{x_d@C;omSgGimx(tfyJ0?n| zv7*9wT4$V?!&FKLmV%}d<1}rYYPdwg`Yl5(QZ0l7Ms3LkyqPqB6a-A;b%&_F6g2p& zAXI!M+n`~97^7*&RltJd@JfU^GZg!DJF06r0^7h+K%eOdf-KdLg~CumybFF-CkLBg z(p?PKMQ?C_&xO}zXwxP#&$pRWQP3rZoAXn>C!ni+Hl{ zc@AX8&d%P$&F-&PIqeRulunHu_23 zIlp&j@NHo45OE2g?tGWd+_cK3o4YUBRd!$K~1D8*zwC5S08ww|0;SLBU0dzV2K zpG-;3gZ}?oqM-nSugd+W0A%-{{y$wMeSHgC3uk@(|BI`nq6SmOcBc#@Df>@eR^dpR zc47t)9;-l0|C?SqI#zvbihhidMoChZ=HJY3da3BJThfU~V1r4y*l_xrhmOwOog$TD zDZQ))vQ!fM|Htfta5YnnYS$;r)c^r0_XY*Q_%E}|(!kX~-^{?-#Ld9tzkV(4|9m;& zQb*fuLp;Ivr1mubB?}ZCE``v{HSFY6D8*bN!nQCfv1%0B8cYt21BL{IIwSGm?Mgef z2eb%-@*mYlT~353*w)h05@1Ompnegt`eCRrB$jae5Z~7A(fp$STC{TWG81W>{Cf_A zY#JXh-iCV7q#17bt~V@)s1Sm&Gr{|Ny9m&jkj|BOM>d_D1{hWz+deAM#PE2rS-t3v z2hfzJSBN#nMQfBih>`EhF2fV$Yf|4`WwI)9Hp~Z2=v3W{0$g-dE3jwlP8?x%q6*1+ zRXh?06s1V-3s93IqmGo1U|IYa<2!bl1EPPsD&Dfl2$Wf~f)K^hP~s=UJ(rLu7C$2Kz^W883TQlW)cl?@?HB$>Jt(1)3a%e8a2iZB z+|%TAYc?pqjR%rSwzN5cn1=l#-J;8#ct9&D5356H7X7w1;Y|l|Sa`X?qT)n^Ik_3^ zIp7n84TKlx{zs!c8PEOE)PkY$QNO~H)sf!pJlW$YLz`4hOr_5fM)Scew!mj)|IYDY zf5QQQxfI7=5aX#o188KZK>i$R5rD7}K8mJfXn%36$$)9rIF?cktUx@IDi*A~Kb1G7 z9loMrSFlnnFVkQ@g8T4iSG@&NpHn_Yp@n#IVpPPKlo#k!3qd<=@m3e6TN`8jHpS?< zP^k1agEkJ-5|vQ|i?9lu46&@@AE_A9OjG~HTRbD6umN0T^myI`oZ!)G^v05XkwkiS z6J=P?|5Mj|Mhquc0pwMaEbc&)8!{-d=BHSo`~RB>J%p+d?H%-^j01J#Mv27QR3T_O zMe+<~k$uGAA86!fStSl;k&Rr=jm!=GO7~a{ps_fw<@AK|B zGwR{={(L`-_zR1XV6L$6?nutr$&sU{Ed;4FOya0t_G$!f%-EjqzCoyadX zloe4~F2lTS;RSHyAD;}+5P+sB{<;Gd`idh9YC}dX5E*nZ$cB8{0Ds(wGBTYBoM3HP z@Dd6JgcFu0J!350Gy`6Qv`EhIM6_SaZzTe)`^O8N^5qV9LDfU23MGDVj7deCabEUm znZ?GwH@zSIEW)T1q3l3pff(QKdBg1M&FUP>afQG=ZM2Fs@#AlVcMT@b!iN);G8hhP zqGnY_k;X|bCq8!wVJZk!9J-`kX~q{QEA9F5OE#XeQ1ci1ucg86EGCpadtp*M5AlZz zjpVs7b_6|WPp>ffgD$9{Y9lxT(1B<=RIJ5ivgI@)a_n$)ET$?C88(9&$qUnXvO^Ud zfIkzPW4ZbrSpDwP_(kFaevCsNC(uSp9-2*PlsFv%M^KW#AoQ(@i z5w*LgqpkaMaiI-0zNn=5NEdxTXNd-^3cR2Syig5;ol8ld76qXhqobHwdfVL;BcQ+3 zB_rs2QL6;|xj$~m*#}iO+OBpq+)Z|;IH4UDR$VhO1@|1m0-6uZnx=fT@Xjn#gkHf;uPsId zR7k{V04Cf_uySrxaMx`%Ov(hfy%r#JIePG)ENd;%5Rz4JnC_>&1(BZAuWXZ;J)3CA zA&2|+_(6ZiA4Pl3gkVGYq&BxLP8lZbBt?Jz{Jd28$*tZK0b!O|yG)w|hRMACbr6}u zZTEvW{d#y%5T0T1W;3Jz&6-JoA971e5`i4Q6cAsCS(gdRhc@a7CHUy2c~mC~`gBlS z;vPvpy!*Aq(jl|tStcUkPm|1=gzh?>Mx7&A5D1N2Bs8vduhkL{) zyt==g)z{+a?BMF^?n{tOoD)5qdTrwD7;0ed?DNsEF!DPApGhvy?kCrS*w;lFDWJ8e zCBCp9JSLz6_vrpMQWw^m!U}GDV*wU5Fs(^C&kwolD~^?RwKmZjHf_I7#Gd+Jl+K+( zu6$J1@;dIN-<+4GSg|!0k!^dqvM4&7Y#vdOMaM(suR4fnSMpWk zJFnLsNv97=DOxs$*4zwi5L*^BHPIx4RnSXugV&@V1=)fc0kN(61AHiRwcfsg<+#w) zFpauw*Ltviyi+8HMXU>8*p%-thedeJ-}?rs6k6mwDF*;R0MreDMx{aRDyO&4vOB;o zIir|3ARtV_k8GmJ*crJKrTiXUG^kZ2d{2FUZD-&MTcrBZ&09I|o;SpZ@Q?1&lFH(% zY$6&E83<8Se2dQ;eM!6bGXS<$<9mRGEhp93r(+PN0v zm#l2K6ns#&#yyUCI~{l*5u7z>r?Q(kY4U;Wnnv?v91K;W3uFr(aHijwWfvvAje=)3 zFW7vxdzmY)ZM4tAglVeP6r~^k8P|K%nCWGgS4tumPCysa=Hrx zlPbjdhGenLT7NH{vug%!9C_o32I$X z;O)w|OL*~sL>?*1Dg|ZB4L6j4L-N!Nvi~d6+hrIPkuA5Z%aW4*K&xB57e?)5sa6)b zf>+8&oBfWpL*5|}-_gm6&Xr%tLSd~*^t$Y$JOT<~qRnDf8dw~dGxu43%zM>g-5n%> zy6j|sX?`A@@OL@Er znkurJ@c;dGEOmG{mdi!PF+Nq!`2^8&WeLc(9TD;%+?xXfe1N#I!;CJ#*pI0kgsAjfmM^Zj`wT{#MGnZfTEYI9LILDw5+djQr*!VO>N7~an2 z+Wz8MVZn2r0k^i$ofROvl!$?QDCXdXYk7G1{*_t9mIM`At`eqX<#dREx0{{jxMEosvL$KK#Kmd8B-3SFIr!>eZ5+OYSbgjQ~|}Tkq3BHl)pfx9~nyYW3wNZqBdj zl1YJVm+0-~HEWGb7H$z@*{NPqe7>QAz4O8+7?^ow(CBNGQX;pdnuLJc6yu4;t~(7* zI7CjxxteZ01v8;flNi=2Q%suOk6pf91tVzw2%1nby_G}J+z)%5eX7~5qO*V)&0)Wb zqGoYb?qP=DC{XpyB&n5269R-sa9H~7p8f2|zZjBt1cZ)Kb?CHle)b7Vu(=hj0!5v> zp3G+}086(^(YUrotROE3M@#zUi9p&>a3iT9folWnhQImCQsh>T;*bMxra!cjCE&Qf z&{Fu3#P_GwXI=6A#-FiM$bpt5K=_w!=QD8k2DAp&N6P617v0{Jo%sv$r=9RPit+QV z+L73ZtE0#o0TlP22G@T~Dq^gYT^bgo!FznC$d{tTv@%T}@&af7KH5I+5FXb7VBml9 zD?qou@ZU0(^K{EY%!tzIboQs%kWKTyKZgqI({2wBu1znO*?VUhO#Nyos@O=>88#F` zaAKc(XTrx$p&r;tWjyoUK_5wur%s0RCD0rBYa~S`Fi^;^*g@NySvN+8VQbvttC)Xc z;$>2T2|+CohBy9E{HFwy!!9y&5dBB}eh^u}UPCAk(VhuHUA=-q4N&!%-eq5ZtiHFR z@0ne06rWiZOFk7mvsB&|dE76RzDOqfk|jG>*qTI)G0Z}hHRv~_A_{&AT$;8ll|Q#W z4uBa`3++j`uBfQHBJasFaOn@QAa?fX$?*~v3^;9Y+60!LBkRL;#~=q((=XRg<^d*# z<+|qEH7go|^9c{$^YbK7bB5cTKawpgRJY2tPCzjRXwBopBdquxz76)3vR)~js5mRp ziyHwWAl_ig3~8A`xVGy&xpw|S{k%AkjIu2%hI=DMuXgZ=E;Jj)-{vsAr3bmZ$^ofJ zIPL{WPWx}Ibv~`fdrRZ5kM3^6;Ju!FC4pgH7lfMx1K~5_+R4Z<3p*O8vv|960AzRz zM(-wDms7a#Wma9VJ?p8zHpb4rsM*modNH*KnQ20e!nG zRq8D#{Kt^Ov2sMiUL^fy)0-N5ZD;2!XF23;ytSXmt^`%EWB=4izA&%FXz+bJO~4Sd zJ?4{y@(c$dXYgo9PL9n8-rAgRITUbQ z-`hG|cSI+o<G5(_z`nk%J@uu@u>ZA)Ax({coz$M>g1}lkqDtr zfs^ezD1Tk+P7?(ibS}cBHL}|TzfwE4>AN+@3B>+Km9+bCLw`%wpMcJ+tuAlL5RXa3 zU8LfYm+6o~$oP)OSm1GRD>#|yI5rNW0ChShIO?b8y7UxDNsRhU;K`pH%VH&8!4DVd1d!Q&jP5V5JUcphOROj3& z;^?cnfln!-)v8K;FbV5}l}Wy#2R1!4X1XTF7m)wF>uC@MKRxJkWig9-?dx6D@0Ka% zZ`l%{C}HaTPtKKPk@F_DPK9@;=s_6VLEe#vf`T_WETXe&cC}8ybJj}x3b3%aT-*yXQsRIhI1lY z2HE;XR&ITW`R^fg5lLHpNUcRn&viEHHd}1JIC=3bwu%yK+sbGFsbV>^LYbEhy_4K; zaH6^O?P{`E>A4hrxoKGT^Y73-?3$4o(vR&CcerUyC0@Q!6&i)2b#!B$IOpSo&V&*0 zE7+B*RH8mC%o`FbAXjPHzn6aPr~MR!SmQO*osp!X(1e)S-{}n;v@2+0R+Qq!31Q< zXVYf{g50Qea>orv$wsrlHeclP=_4I=L{?dc7hlEze1a`>s`re$Hne@%HfB~rhW9D{%A23rA)QITC~ z(?AuklHSIuy?*{tIb>Z4?^_zO+6Zvt&tE`cok-yO3ftGk>|g$-L8PgQQFu(h+w1#{ zX5s|ZRz?~k18CiNL;5u7X5)rEv2cKA=`B>sn-z1Cxh!JQ3Ojk1xnK`gQoG&bGUad= z2tpLsAM0lJX2cG8@MtHa7l(5j%h)cfQbKwlpHs06{fS}8zm*ne#zH-=FnQ2W^cl%i zy3SvlwYG3R2cybXl_9<7$C0PLZN0reQ^BrnKFbpoQj@XncJ!nuKf1SFM_iBigBK~4 z2ei#l+}?EMyVs0F?sL5CUrX6j_kCuNA znB*+;PWpxI!*B-J4AbH)=lyznx#i&=HL~>q3WH|}`b199YcYcq{ImS_x>U)b{z+aaEiZOC!k3+oLgMaxzH#~`*YD3{| zZ;y7_^8_?F6?iyz$t<)^@***<@Z5KM2&lgf*YCt<{E)h~Hen%`f)^=*A)-Fi8cqG- zG9s229DlE;p|UYI;7L}FA)57P{{dY{s4 zS7`zXdo_HQ*h=#y_a-~rrn#bI!LK9PxLc?B^6|C@T7R(bQ8*I{0}%BEw~PMPFNKJW z5P)<~z0p+++*9TQofws}yxxZjN85?x)do9XXmzCu$lC8M`L=EPQ@vez?)$i-=Arhs z^KoJC{?4j_tj31%AHsG0YN&$`uI>vCg*Ov*ucfwcEY!vASym0`GUhdB{#nI@82ia<#_kN=+nw2@WV2OAHcHp9Mxxn z#_!EPbRZoh;QGW2`&CPz3)0IZ2-C02hB1HnejnZOmag`gU{PADYbW`w%znY#;XFQ( z)Ah&i?%w-#YdpVr61>lY)##>5JvH`QCZ4g%PM&ApnB360y`JL*w?E71+|Sd~@jt#d zm!F5{pFccYyc=Gwv+G-f`=5trQlR)sSpnaH>*z9hPeh{vKW@H7uRb^YVWzn#Jb@_S zvo3MwL5bx3^b6vGY>7N%z`+VhCF2`CLA{p(t9DT%Ec5s&Lc=jicH=>{4|;R^9{CB| zbS?PBn#*@JSvX{r#yL3;Kx`OAxHEZBLHE;sMhqiy#~`Y1l_`3p5OwHaQUmTp;)f1+ zaYy!E+>YBPR7Gd+Ls_W$(fvSHJn6wZ3pnn0?!Sso^aZjT>{kj9Yv|x>)+S&pGy9#S zaNiD-Yl=bYH2`s|7u{vkSxX~G$kYZc`9WIFMNg_8H=1(Z*|8PNGTnJ>(c`ZZ=DEN| z*h3wca5ah1KtXrwP@JBlcNTv7#F;ntKY3I_jAE0?^YNGJipBHgIH;LvSGnk2*sp^K z?CJ7_z5uHROw?f%i1>CKl8EDQ$O&JgE=H7`FH??QN>5TZ z+cgeg{pNXQrloRfStTkK=;^@fZA6pk_aNdy=60arQ304~!zoQPE$Oq|IWrQWLu`dx zlm%anIvp>sgsNU}6tC5B!Y*lu^TcwT5^q!-6H3wA3t~#FsOGbk9OmZxYktP@evI}f zn-4)NPxSGgH}ov_!|#rqo=&GCxN_RroX~>1?JaJ7r7{q@HuyI$b#+5+KN|C^F=rq( zr+bqMZdB`-boNoU4pEiYAAJBXET;JmOx1XffYZa?+ZWD%I&Ac$AcV_%UHEZ2c?8j& zQmz}xoou5%T1zSHXycp#2)#R)^pL{5Y`6})t6Fvj&%h3&j`0strljQsNyMulGpxaSW2!BMs27fXe`Yjf8$*lv|J}iqGHMjU`x;_Yg;8xjw7C);4P6)O3X^m1eZzX7%in+~0g@{n8Pa~;?f2x8`{1?jkp zA|MYwQt+K>Eo4l;5S!{TMCvvU#~<4_@nn+w^^M#_ooJO`i$K6{MuW40Xg)nVH7vvQk7Hd9mm)&M^7 zROC~&{Ifo@ZvV>AuPw|KcF%%1`ZiS7N=GP(n_SKsS+<6fG_v=F;x+R{mm+n{S_=s7 zqm~JSW&O?a?x#+8NNoq&-oX07e)Asy*jZqhOif z&AL8 zN!uI?#Vvcyxn58?qC6$*6}%|qX8;P-CgmZi=BSAMj=*vd*)~+tJBxCeZpuj&K+oZj zU^N|OjTbZXhG-nD3a*ZYvS|Qmhd^Aso|Nlw*9#A^dOY;i)6MkYfbd-io(jGfQib(e zP$^z4-;c|T`-|c+0#7vz28PfGz`Bglk-5BFDGLgG9Gw>b#npTPU^+T?jrD-CniM;c zt3k|;zJZfR3Ro)yB>M-3%cuVin(6`+1%X^kWJlhL0g&A4J~KB z3I#^x5V-4|v}k$`Ss{c>*AeZ)3Fk63{00oM-}SG<6NmZF0&fDrKoHi~fg&i{mf(wH z&NgC;eZJevO0l~+`PxRdS<=f?6x-mu#xrdH@5N*PcHHUbG4-?Cxue-XLXE4)CyyIWn0cR0ESP(}tnn+i1z_)7s_1g4ijjK`>Gj6@SS6R6iR2Yn>H`GkW| z<@{4%t{vnNS~F^(!;Z8=@USB<07r#%>@XwnhxFPrYjy=AM7|*1+!6vOXLWO(Vl6cT zbr^8D!IVI<(hKTeJx0ZGfOI+tPvOvQ&)q;Pj$8Sp@dPtwbsngNKv9eVie>0(8xrrb zM-aB=76R}SIY-i`g$ate1~(e$#4vtj=#GRFz3S{fW|Nrp@qExi^|5_&uca=sLN;8K ziMy-+Aye8Rt`KEH&)42-2~=x3^~hPBkW`Q3Ol7zZYkVK*UR)gn$c z!?tCqyC-Utv%6yCrIQ`8qf{telnz0G7l`1#E%w69kXnFx@Vi6#>Y@tCAgp%CLz93l z;y*h71|y3UnPz3`OC<5I_*neF^&EE4C$(qUNWNyD2oNcK1F(N{i-PSWo8qjZ2qrvo zbL&^eLE!bJ*O&$zw+3vjGlX>eTI%)5oke=!j|s5YI2 z%bKO(Y7Pd2urfH8vG1Kt$6J6W1OWOiol{<-$)G9hpoJ|hcJGss%^iWTkst+|S9ohd zCQAXy^pgJW17)G7>zkW9%s}o-Kat%j${Q`E$}s{uuyG0*D06>ypJXd=hMo3SheO~l zmYX^sBJ!s-jR65+fFJYAMV#x}Gle+!mS^j7IoYrP{X(46+#L)pLN0??pqS)uU1-*B zBrmkiS{wt+U-d$kVsX7TRCo8$lXMN;v9_Idp;m9p#gt_BzMsA*Diba~dO-%mtNLv< z#C(d{kt-Nno6Bl65sA+ZHx)aKMEO(FAfX}@o*>jZzQ~(s$N{3Pp3fdlDujx4t*J0e-~t{wxF@IVCq1`K{;4DuY=`}A{oE`t4mLPf zI3iZ7qQ$_m7ykUd$Hxzor=me27#+hvFwsY|gJm)(I8F)D{M(7K+unAy` zPGnK{LC2BnQT@S=Z`&Ev-O!Wn3Z(eN9(tX{-VK+Ce%%JWZjRk z%dkGB>Sr|vr0&TP#(&Il<8jGB8-t!G9`o#VBTo)yd-psEV+DL_1R0%tcnx%I{!<(4 zp6Ds7iKnz!yX0xpRquVq-<)6nzylvX;NO(6NC{TJk{y1F=^W7+BCIlH&{QU?l$}ZS zsL#2B-fMRo1zI2C$QP;F&-Ts_zTf*nc>WCyL`TBEVUdff+dLgE85MyK4d>|K=)}y% z4-(y6{^`NO68sb2Hqz`TY>`YCCBx13n0t5e3{hEhqsWaz!#BN4I3UN32)etP ztBMdJCO;`k#PxzT^7@K+ub8t*(C2=dnDr8Uc);D8#-5cHGQwB18EXAGJ3~^+*d~l7 z?+hND^ZG5E?LZTrfDDp(O8kS(r5WrHVh5p0*1_tCdX;4r!Btb(kO~pUFs&~u;>Lo!J7_8q*tk{gY@)Jlj_h;kH04j+is1Th zbuS0ediUZ(qhY_iF9KG;MKh?A6q1sbxU3EqNP4R>6CsP$u%AuR-UK5^}XBNffh;fesnmd^njT0T6Lv z-@i?Zq71*A1|4dHGrooU!Kb^)pZj>9u&(pYB14>>CrM>ttp>eLg7JCea+k=!g!}*M4OMLX!CysdZ}=+H0jI&eyH*B*^FLF zK26ecKgZb&ab=QEHGCKfb$D5>$|=E6ccsl<+-syOBnlTa8R{9~!%LQ8LR#2}kobe$ zZV_rl5l2CqNUap8T*B8yRaP-drtJY*Gy4HZSz;}8+nx}cT`Vje?J%tj8Oeu8?z;Hj zzIV45v($dT^9PnSc+V1eizI6P$(LdX4z6>)H?k}Is6KEZ= zLS6{Z(YMvz#oXM3L&+E!8rJ|(EGh&K3$PbLp+ua9yiV*A3!4@+WtcW2ih+-TPiC8W z_?N&L`C$Q2BE%*Vii%*Pm=@n8ijvt*kx%w5f?Uq5bA<~O)hvoLv#Mr&DY6Py_-H<< zrWAl06hEsfBA?f*)uP$??6a$Sb-lhAm_7OI!iXP}^6E1{AO38yo=!je{PV}~)C6rn zb}&FCr{}l*WOU+f*y=EBpaR=aE<&#er78Q+x5$A^Wn084ZY*Oaiz=$lJjx)kr78Ll zDy6y0#f&T|K-=(4?D`-jC&$CHgX5>JM;Alov#tyU0Ah|afKZTs#!HEEG+_b`Ht!%} zDVTqu(8B2A!N9pH1n-OeIX-<={(Q?PIS5#|w zE_6=g(MW`dFii}V0(czcqJ^jcbTBjS4dk3Kt=Ke2v9I=PV9SQ!DP#tCRdvCM$W-WU zp#^Z)?f9guioN8`|Vnb5q%~8H~fx`I) zPDaPf@EtIlN@e4uhNZQTS}UoDwuXo9-Y0 z#fH!k^!C&hb50w<8=tgIwz+L&t=(CBCE{)lq?jdpGEhV{H7@tyIaY?H`wY ze|9?vb0>U`gJZUxgtzoz@jWIcLNbQBElgO1j}T@vswL|X;dz1Il6lPTAf}}$BBd>v zOdASWQtw{ZzImJPzvybSS9%M6nHPyo9NNOqdbf77MuvNh+2};m&~iVeENvr}jy2TC zXNqj*;j!a0vFD#b|4cCZ81awvca0-VQ}E%a6Xqm4bo$}dlbzSWq4*kIe;P*2@U7i~ z{fT$ca3@48!SjAb$&rG#xHslyR4Amv6mcNysrfDIMYjj~Y@NYp!^X>!;%#;gm*B1r zV+^G`(xFq2ALZ22R{l!o);69zxu*yF2Z!GsxcCm0cr_005M(HHr@$0H-Fi?BB%N;0 z4>WMjrz6VGGn?Pq2MvjlKpkOZ1w|A9ll2v$D8XMPN342ZFn_N3N>bexh3G*AncvM} zGt3`lla&UR$|E<-w-81lAtuup%^t5#zKm*w13sufI3$ zwYgL+R!tnTPxLF z)qZL)vAQE#NO6soKjf>(B5z;gVi~#tz$fi-k?J98JY6+eEj)8wDb;s#*Q&sabv>Qv zllN(G^yDTfI--!xk?vcYsVmb1pap-($&q+IS~v}+yNw2I53K(<7VqVy%8t{SNR9P|t+cu(%QwixM zN&fl)qtSN^h8iLqXG>iHk5smFCwZp^L+}e4Uw7jppN{5+^JN*j@SK0d(*)QBzr$Gy z4%0RVC~_UuyPHZe_D9`tJxEVRAr_)tMnO@Ikr`38qcCK^V1WQ|Buf@;b;*uBG6I5f zZFM2pMpQ_+jM!?VTr!O13=p8419t!>`P~uY;5;+(zEToPp!s)4d0!F9=<4Bnl0@PB zQN*vN3Q&n{>#51m?b$+*8Qx2h8G3+>n}m=YVB=7>b+`EWm*6WbjtKyX2f{c(HZ1urLCi54`Amg@B*WF+fe zA%lta*UHUeJl;(Ws&8(@ZZMOWCC#L`LM}0v{%SmiwsrbC_@vTWnSD_?t;}vRLWkyO znPvr&Xs++{OoM5Lk(Vy-YtV})<^}uxFQGZN(jOux%f3VmSDn!O?V0}Ti}k8P{hkj^fGFYfm6e%=0!A`uOm4Qt{6W5|=yg~!zEV_-<9DcmdmZ@k}L zVw<4(6s7tYd;5X=s_pI}mk?dF@SdgowSD>G>EUUscp)EG`m4i(=TA3gy!N*;)(W@v zl9xv0<4YwWhp{=L%^jjG#S_2d^wmh*pbj7KM8r@`i`I*YX|JmXF)&X@z3Re}g)GGT zY@{oBba4Ll$#%QA{SI!kdmp-U_ujRSvtz=#=QhVH45BEJMM|Zb&>3(4eEQpPAKI?R z{=PKc^&AT*&O_ERkMuq#nU(OoA{l8tvAW!|gY!*ib-Qz_>`))33c26WwdP_M_H=w= zZsF0F2lm>+;}H5}zIyyRjNv84H>0?_MC|a1^~t~Xb%}W=86Y_NBEQFYSD7< zv$bT*PcrA+e&9z1@xxu4iw~>AUG@YTLqd~H;iRgc)}}?Oxp_fpfCDuRZ*pcJ*R4p8 z11PZ%g0@$$S+xU14p#FslzIGW%W9FR_fYKLnw#LpItrx8NWRuetK~YS_}Yh`BE8oh zWdXx4N5wMIg5eih3Dy9<;+W0&4Fe)Ra3dqD`@n0~I%lE%9V0*i%b3xOXEosZL2MfU z1}~Qu_RxG@lKWTRYmSZhs;06$)MyAao3I|Qz*YLf!MLs@$&dg)lB0tkmpA(H?ajyo z-!DaO>=)JGYG73pBXJouH3k6tI+&KTs{w6pkcl^;1ibudw#idqXv&xiZ~ps1nlWvi z->Arm&GW;^NdShGaswE~mi(~4`7a4)Hfuaua4NJK6KdU&9%@JRE zt_079BFo+DXLDma-r%QoZHCO?ypr*3K(D6Z>7i;4#Kk{grw3m>KiEG%JUQNiABMun z4u7-v-QMu|-tn{H{`0*v6y-J>3ccgDogw*t2TR{I@SI>iw)moWLSA&_()swcV}#FM z?(auvt-R$Q^)Lp-|YIyC3ItvS@Sv zL;0cNntv{SpTldVy;o0EQPQ~#&wqGvkOflOB+rv9ZVCx^eUyJYJLO11Ydf61-fnr!}-%Mt`+cw5#O>UmSYgRQ?VmtmY0 zl1)nuVn%!$zsuo2tK;QxE6>ZwBkbh(?ELiQ{`pC}SdtXLe>Kmr`-vUz?-~{nYZf6oC%ro_-SfXha%hEl6xQ}8=0u0Us3y(^(P zP7Ma{#CLUdD{+i(&Ck^O7hil)%+2z_Uk2=))&m(CbyTLN6K?2RTWe`{V}2u5trBXz zs;7ff{4UXu`c`tMxKwyD$hwi|OQs1DHVgH~QJuP=D*H zK^uX>9P0t^6-kdzSfqezHdV^5o+_7vOb}xPIzL8vah-P_TUE~i+_`2KVPcz{zb=A zG4o2~l+66Kr)CCfN=jCWb?07`!dXi`yf9%Jo}9kecKCq?Zy++>N7gdDJpT3=030@{ z8Dv;#O@epMelul*+OE(v%t{5*$Nsl_&kmBuOA}3=Ub{?7^V2+VON_RR0EPXu*|bH* zXYGO37ku@>)Ek;cTOBwrc&Y6Pe(^IL)3^iZz?s7Eh{ zw3kWEJu0X)<|jv--(BjQz#!Va{H|KRAj22F#SK8e zn&laS#7UOA-J(lp?A7bu#;5zfmN!=l*6{G)`|W&$vQ$2j)!U}_>hbF+7~)SNGq;=Q z5UEA=mA!6;5eLz4ORu;;rn7YZ*e0~hFkQ3lefjap&TDg$+BnUO4Ks71p1ihJ&EJJN z)vAws9MuemmSpGNkDK%DK$z@(df5j=iu-M1IvNcFgBPu!JD>bMrmivb$b`L7uyw$Kc@c`a2t;<)iruhsU75nUuRP@pFI!GIBWy1vPB!$m zG`oj+d`k*HC!o-aYIR#7UK2A;-yLc0wp3B2pB7QmqD}*TT4M;776(8TqA*r&*`h1i z#t}WhOmYbh%{62bqNPRs%Xa;8+be}bFu)dHP=moxa)Z!H$RQoF`wm7%GWP~^#%*mo z4$wo{e*IqQh@it8+I(0tVwSPbQp^J(0PV6&N`i?NgtJ_4xe_9SHR}tmICug1AZ!>m z*Z~wGCdqXI6ydeYEd_hs2Mz=6QVh1~k0m&ci)Do*%w-JlI_}mp)=naW1E9Br`0T)@ z#%_dCOGyKCc_g{jk*I2*wCE}g{Xt|ZL%{r@k`Jx<5wd(!hwe0cHzH6VkF5T2GCwz6 zb$cVc?w%7AwPH)E=2R`~NCmiU%W_gZnqOX8+JL6MSq*#<>GmOG$b7~5yCIFix&gkX zMe2>1yif#Yq1RDS@^ke4B^J2Ka1!6a9CUi`AI`rvY|V?)lfVCv$n}jc`%vBuzSG()YN1_bz zdiJtZ6mNm=Uqj*+vW756FQ+TU>dY=kI3Tan(LUEBikTRr8drKusSPU_D!Mpg;9K_d zC0tE7BTm&XWxaMzz(x4BZq!yckPNUxScYN|iq|2#*E^z!T7u-!tcXB(kAfj^-XQP` zz*AgO0#r8lNKO>Eh++5o_;6a1d(dUC_cVex&qBy zAmd?XbQ`4Qyak;35p!r2;)6CW=bp2GK*tWIG zk&6V#G`t$oJF+9FCYaC5P=95}_`7+i*L}mweX`-}u{3$k6c~%x6Bjcat0+wE#G&&dwo`|){6i%$!{OBHUu+o0cfE10@7;z4T9N* zr!)i@YDt)qyO-3k`?lMQTHETpCR$D2tnP*;!$YE(6|iooU*kNB!LkRoi|DZOS*Ofu z-Q>YmJbPwkBmHq;dCLycp0CU>)%gsSV%zXG1I??DMEU}5#EbcS+U@nXY9gn4iqA-% zrA;rZ4oDm<*12S3*=8+d_3`;B$asgx=fjgPzd6`HA09r?&AB@u0{rL6$Nbt06KIC5078G zJRk0#9DjFkdaknNYgX#c zH+CnfRL%}g%^Kgk$G6~=Dqb3irzGdhup2v)#p$-{Nut={)B6Nwd5{50zojcGU`lLL zJ%0f)PA9$t55KcJBcI<4I1KT;zNwDt>9lU*G;^8UG4w`CdZlcFVHhA9OB~;spy5^ANToF|;HXs$ zU3`jbNg2IFq0E;Haf)GhqMBj9Y0y8Ts2J*JU~nbmLIx}PovqttxKWr1`lQ?t8HU_y zvr3Bjx;KA1nM^S*Zlg^)@*q0iSA)8{%D$v>w~6uOWrdq8dO2%oO}IH^2tg~~@9k(ZC3ye?O{;Nj{7IRbI{z${|wA_6ZmWEcce z8)^tSy2x+8F$|Y0P|cdPVdvag6s;-8G~AREjGKU)j}@$?9rW7Mdc1NOtySm)e8BKF zBPSF%Yp%6ce|xm^)!0*r2p@)N9^Tlj@M*8 zMx>_^Rs?rB^L6s7>Dle!qW#SUeKAZj`t8^PUyQuIWi7IN5QL|yT?1dTATl-bS_it0 zBX1}pAkmGLX{v-3GW6LtsPv-YHQzfu4N_I6FCv$tYkuz=?v4wGRZ2-TI><2LT~V9u zKd=&3x*NA{>%`Oi>+Y`^Y>`6>*n)uQWU-R(7k1crxTP}xo|zgACD>f4TjwbUZIX1o zep1rsfV^YO-wgaBUyR}l84Pp>#GDbZZzG;Wl(0jx-g#S!`9qsz_Ry~B`3^nNr+36p zP!_!=^LS-A=5RB^55iSu$9_)<#~h*lZ%NG1oL z$G%6rb>e7d`0slk)&GyqVos_}M=*kE8&mS4GJ#h;A^+Z6IE-bqfATYK&n~8eegA{& zva5fjL#vzdoP2+av#e9Xhn71v7+)~?Q=WKApZ(`J?y@T%cF+x1oz;=H@(tlZ=!&00 zHR*{#hm@wIJ}`UOl0v8DJ!##sB)aB@7-H?^PkC*=sj5ZBk0Q`BRqu1X8d0OhLH z=&GA;LQ_*<3t0;Cr~D6zJwWPS;KSHq5rP&FT%pfo`nfjT%eCIw!Sn3BuNv`YE)^F#!p6 z%*mzn`rq1t2fG-g8q9mU_#Qx+yO#3Z+2USe5Ygrxh}%xTvFaYG+(Lb=Z}IQVQTc|R zT2e^xEKOw>)r@8f4zkrYEO(1x@KlS=p@klGn4h^EWaOa!x>Nb$wG~KTG;%HJ=wZ0GU8Db?>pp z{Ky1dz(=cPs0q@`Bqk2eu3q5o<+7@JMG(~+bfoqyr9}(Tj^G3!P%!QP+tLWbi*g0( zPIfpq>tyIWYQG5ET~LC9cqP-}Q~CxTBSbx3AjZwbe7&-eN8m{?i%$u!is^pG3wO3! zDyXR7#V#r5`1o3PZQZb4+8Bs`{Ro;wBD9Ut*;*8BUio&vTVr7FeXmb}KQb5*VW z@sCfxn$J7taNH?%!_l4jV;_D}|9)8#W=urGFA#>^l)zwd86X_Pn{qY2HUlfKN;n>5 z`I)V1$Yx$&%}Uc>u6H|~KHYo! z8<3*;V%6-vdhI`;le!wtXNJkIZG?=3Dh(U+I<8YOE4qXcub5&@N<+P8_5Z1c@;y^7 z^7F%?z>mQb&UQ@D*RwC+@M<|bfRPO@S$|B}=?u`7+PPOir zFdYC@2A2cZ_Kae9ftjXv03UzoARR>l2Z*9a(1vM7Or6A>2izy}+MuGrMQRq+m`DVo zn1T4cxUO!%cq!$P(nnk_A6*_q2?o+Ra~Pk01K*l%Pk{D0?w_4G%y<3^R5tXKBYy*7 zdS!M`_yjSNj-db|JBPbiH?MVh*6r)w|WQw82v=h!ANk%2d$} zElsX_=Fz~d(C(*87(TKENAAh-uOdhVBN^0a(2Jh}OLZcd3kN61Uv4EnFQyhUg8?6A zwvIPZN<+I1<@YJx)UX-RiUmY&2O3^hgcn5LxXip*!hhw>4ZsN1E>nm=y1*LHyBGFZ z%$vcadOI@@>%aw^VF2o4K%nWGyM_&-r=AUap;^z#Hs^eTBs(O8cyejyESsuuLU!|1^ zzARXTJl?9e7{)6u!NC!Z@QC0{16LwaRyiZNFj$tR=+a{Md7D~a?&z`a6*u+OHFC%t zMY*V!(>oG@CjqC+Zpu)0O=yl7k3Bcf^$juF^^C%j#f$LYFmuD#Cr1aLUDUJBe#96{->=UVK_-;Kxj9o7o=EML zNIXnT%QI|D!0y8Rsh!)Q@E|^S%Vz-V^M&42obI7uYf1C~JZu$Y; z4j=+>fz7&@Lo_w~exC(Rkn1igcd(G#Y39Lg7T_XSp!xdhI@%t^MFu+}&Y;jegzAE9 zM{GO2ndF-4^93`E83aL)c?k-ML`{Y0g|q}AXXJLWBQCF;Q~)_0*i#Xp@abpoBuR?` zu8QGy(6t3^3Cm%6F{7R>Z>%Oy(ZSNM_0z>GrFJDyU=oMzhqH+zNcmn2vd@HQbO(*F zF^VEpwO9u31;uWwq(sr6%x5b3$O%V%&6niqCdfp=@BK(53JGocQE-w{{3k6p*`Kx) zopN9J)wN`$ekS?K{NLsigf>W)%LrWdCoh;t-#M=$N9GP);4Ov#K^868NTq#^kdmk- z6!z+K&El_uf9D1wtmLkTt>d+t2oQ@y$x+Cr{*y+i){hD?R-^8s%h;iFe2TOaAvfpma+~l$bKQTk(jT0B9xHV za;T)a5AH}gnJi(Or1;*`r>6&JXZMV}h~@D4ROIma)Y}xG@=Mb$Yzgy<%(wY0%zcHRetG&!$vjM7AFkKQI$reM| z#|dn|7b=6?6aJSk8T==TAg6%uO;O5m3^+HvMO`lGS8tjr*z#7@yKTBxrNiN8(BL!? z+%0aa^3Afk9DHy7J*%`~>|ybIZceVGx`ceI%^ZX!^UT+X;t3eCKmieeR#8NJ?Y&48 zz|#d+_xS|zuBqX*Kl_^$9`r&N7JOat)FpoU}<*PAQCG;Lmjm zGuSOoVJ4w>L){F;;A)Ea359h5;GBnEDNE8RpeNzI;R4u#%b{(OO0C7lPr~D|Eiowo zVg)FvfatrPb$bI%JIc!;dSvgi%g+D;5zVne-xhrw`|eajT50pY;Mmd8qE=jU^iP+& z>C2zbPWHbYo}Hf_>>cT41-Dw@y|~YA+_Ed3_7d_64~&33>1L-90?dB?XtiE2zfdpU z{%61VcmFRwbPOz!+%Ugevxe{!7(|jGQ{d8G0oKaU-F0Y9FKVhZlN2oa#1#!4pFrFS z_{^IIvR-xe*nNy##+Ej%#L6ZHaAK2vdpdDmQFBVglj!HizwG?{eGi!8C)UL)*5 zW$$m-3@``AsbkxqT)(J1l!~MB?xGSXUQgI6@9_;vyt2JeiJ$(P0J*h7dJXWJ-z(} z<6vi?sxd!wz1%5iVVPdfn^ndgGBba9o$V3;_01A{HwWZ>P-TqbihCDDI#Fz3QFt#x z;0J@gD9}j2m&!pK4T?S8R47Jiyij&Yfx(Hg51MtppEVRN0I=hT}V4;^oqJOVRqjT+Rh*< zDn$1NyeKCXa7c+)(|?hAy>QS}l(rQ^VA7B$eJ+ZWvge(`0tgr$e#mkckHCM}=+yxU zBHW2ocFuAFdY66y(CqXRK4R5`2rjq)(LZTtTstzzB7px-7Td^dqjW^&w!m+l@7nhA^o6o5kj0OOvxK4UA zY`uH*-aZ8-!E8NDyJD{H;C5Lqch2NRj0e#j5D8=RX#thYqJ$LR!2`N5O4Ce$!)o%>K5|AR!hKt!?Un$Y z^v(q5H@zP5)*v}=*%;akEVY}sU6C-c^gpq@h#~vHCB8pq8h#)Iy9+;k4s++H~|{9#(}zkawfw(o-=GpaD8aKlA_fm<2D1uo*Rfv2ErrFvJ5*W}x>H$}h? zyb-setXs|5#iR;6f?N~J3edqyBeTLx$@k>8$`q!C1-E33*uHQR(Vq`@Zw;4ILMEWa zSdM^C5QZn8oH7$_=52-B$^{Q$o1)LJ;z-b@@=3L1giQpW?MXwDX}>RhfRB2IqA(!`}6klu1JKjru6|UG~-mJF1x1j-7J#wUY^HRaYrABPE ztr~@44+Jp?-)T4C`Y|oZ96Ql#DHc`aloL!e}oloB|)77!v6PquXY|i zc^x&uXUm&Gn!Jk>&-RncJnrcKo52Dwb&Jcgo;DO+bXRz;^!gL7`DD(w7@c?W(lD=C zUy+o)rXl?zkp#h21_UVWS0|tO#gCL`^}Tmua|He=oy&>ccgPZFolGE&?9We93RU>9 zQq|6$#@mahX?B6E;sZxrlrMpXVX!txa#E+HQR5O$%MW^+7em4eYE}myiCdlUOGTI}RuGZ142Ty=Mnz zHqRm3=QZ;Yw)-%su6+aK->B0HS)8pZh`Ps~Hn+3{gjzqaZ z7+^1FZ)PYPyF+aauYy*Uo{S+S)h7_Ow|62mRHSOP^z->dgn(1X*{lhdu2Qz4@6c@E79U{raXBJm_vC+!m>5Rm?i;)}y>wGy3S`q@=$`@>@Y$8Xn?!2chz39g4ur;MS(;&c z1x$uCf@G4#w!F$o2|MUA5~+iCAQriU1{gY;idb%r1&okg^M#N2D-l_|Z-nRaItmvy z*7YkrZdyPNm@-e-A1Qm?t4YU2-cDLBFQYQ^12E=#=47-a#I+pqXC*Zq_lOEuHJX#DGz}m8fO&8+dN|57+=z*c!C(WuvO9tTJq#5XP6dEW~kZkv2X2ow2asvvPMh}d>N|BKJI1mhY;T*Bnaxf6F~?GfxREE+`R$C;61Mzd*?yL zzbIhSo%k-MV2wL>kk1@yCyhkC#El-5hHe^~iNE;;-edT(GuX6M2Zs565^14>BBO%R z`!uiw@9IkBtF^HL833C-ta3;E3SS-?1~!49BR3cu#~8^N0(e=DRfvJKaZQ(6P`tSv z@Tr+lj>!?3C`v|pp|7Nsk?U*3?rbTBJ!xhn73G+Z*j zT~B0zixr&pw$^M*)`X-62?{!ZfqMT1FcmD93%%&p%_WcT=3XK7<+g>frR?c>iE{{=*B-<}@vDE+%C`F5k$+hmm6{ zLshm^&kPK}R(|;Wo8j5Fhc85jb$D`oCcOU~a@4)8mk7;*oQM0%t7OgBW>szhR!^G1 zQs<{HSrKNa9cI`t>)J`;(HoBN{{c`-0|XQR000O8+I44C0pyj}(+vOsaYq0E7yujq zaCtOpFKS_SVP9i!Y;0v?bZKvHb1rasRa6N8152DGVzzZLVs&^6009K{0RR956aWAK z?Ob1P+c*+`_ov`FFVfa^lXf2tK~o@`b??$k(jayhD7uA8%XF+(mVBa|#8>QhznP&( zk^18}aoT2q8YpU$GsEF<=EoUQ$_{(_%TpGnlPI~|XYWR@pZ*hDI^Ax!bDB<;v0#~y zi!_nZ7`{)_42Y8}k>$X^-ta39zvEbhd6XtBjJcFTvOHzyt9*tnrn6IY_St9_Nj6u) z;aj@EZXUDoGG}qhCt||JD>e~RzKrvJr_(vg1ka%r^Gj}GZBmViv7+nMZzOVQYOk)!Mwkc5sItIRktFJ$-AO&k!ELCAjW2U+K{>WZsTH!>oJs{Z!1y z0wi3DJeUiRI#KOM=vk#K&A!OeJO#5cF4cEyIMe4$?0og>8(IoA2F!iVlS#0Eh*2$> zfZ#}iD;_Vwwwbt$B-kQ|lEo6qDcC_n5@veSW$twQ5T+Cir2H|4Ri0*EIa^1Z&3A7Kpo(UDkMnVW^lcWGj$)(7OX3(KQ4+Y?QKb1T&7KsR&mgU=YAq zj+kqk8Rv=qS=tyG7n!(%@hoYVp<)-+#VBTv&Y<+Srl?$slr4!-&nh99l6C{^W6J&6 zb%qXtBYwi>Pr5xylxJu?WTA-h4DA(t8p$siB`hQW&4e5wn!1K3c&8Us!1!#=?r*RkZlDCA=k;~xYS)7p|HX^wMI%}PaCTBKa$qU%G2 z|0Aq<2^^lo*x(^OYUs6)Sstev+WgM!FAYoFuBulGD7FTJMipsjIoGtnOC*7k7r?af zi-R)&C7una?v~L8Ilo7=<-lfY;SG=!{&X0}kdqWz0LrFPNpaMIUdr86Hg_f>wD+dj@+nEu*h*fj^l1vY~ZFX5_?=jjzEGfheyM$-;+CpNH ztc8xgjQ%G)hh-w-HEI)6CNRyaX?!bC^??I&2G8$nfN_V0tuE(LA~!IZCjF0C)h(Cb zqDBB4@&aibwxX@VM+ploxQhbaue*Qkv#)!9b^DO7&w1`4xA(=z-5tJM`MyJsN&OdC zAVejT2C1R^s*2sOUqSj`rRXkrSO0SwB_6T)ZX$pNZ3s}z&02yJx~!;#Uw4Q~@XGd} z41&Sw`RIKx9E@<+1X}OIN54x5y^?)MV| zpXrp{rbmwsXB;XStC||~@CzQ~SZ6qqB)tko%$SkWU@VovFm}k7QYi%kK}1ly1UQaY zjE9u#EwX4H<JVEg%6VMZZh-=VM7}1j=cZ zKpH%$3o4avWm^Mb3hj>35(@9SsxHR{mV(cLqh;t*-DVR)O3;`sCE_~3DIVy^a*a+& zlcp_BEP$2@`u^d?#o_zl=N-EsN)KRYt-|@?#mG?dL|%)`7{c|-z7#M-A%$LeE!1|#AD$c^4Iak)2o%cgFiTZ>y1u$@fr#_(xIAmB z9SH77!UmCZ0BTiZ(OOnE$ih~PFsNVr@(fb>ji{i4vUCrMaqk!HAHIt*RW%|9kLz+B zcyQvT4S`0mJrn?>HXuE#Ie;7PgfOJIgsi(qyA>qZra-A}`e+8w0k`iUfNywG5d?lU z3yDd+0FPIlm7Qf&-Qc#a7f>9ETX8S$En3{6xa-2*S-5*~Dem5dyA^jYZpB@TEZh#; z?mm0(d#)rS8Ofg~^P6L4j3n=yZ{m}yKxXN%PnPG!EM4jTKpSuSWs^It;_IdMHLIwl zmGJJJ%K&{vs|}$l$MCq+=8N6rU06z1XW91k)3eh`YvLEHjY0zli>w_WNb|Rxc!0V@fR%4Nqj3#TtyI(r9M-1I@HIH z=Y#J(sUajnHV!>s@~k!LLi>3Mo{|-RYd+Y)OVI|^!NV60!V{^7ctRh~dcR*#E?G`- zKFj(|{MO*4&68wMzxVw|JMK?EbPyr7<-&6~D2_ghD}EPK<_j26nA5w0z56J*40?+R z2db;{?f2WNI5E8G{znWnFAfzk>J3P*swoWI4cMh?~WM?gL^&}8urSS>Q1`W`D`9QK__ByVHXWEhN zb+A^i>mX~bkj4d+ssOo!YgA|XhNNA?rJNl9+LSdVer2)cCXWY2baK@^|W7E2Jbn_{b-Auyw%io32 z;7>8a3^ewJ1LnnySucP0epkgNu}DX2BPGO{&pbU!$ffa_Lr@Fxv(S5RxTZ5;WZiB# z7E_ci>hoNozd&`v9Efa!9XJ9lhepNL>G!flZ`W$TOL$t}H8G~@L@s)RiRj>YJ{p_; zF+OHxy0D9|`ef9junmNne+ z^=HQy9EU4TdtwYD9NlaqUCJRslO#+U5Hov!;+MVf%3tM|diC%J1N)d1`4KnNzLfgB zP74&9qpgn$RTylSY`AVsK5h}~h+&PEom<=_;3mTP)ruz#AIkDe5Cj5@yEr&qn%cA= zwCIDC2>I}!M-F>oEYWI{m8O@YlJsrhNBevPnf`ZOi+Q-i#^!tWHM7;z>=`%O6=oM( zQ5AZw#|28gEmvIE6iua1j2Fp~(;uP+W6dJYI@v2>@Tje?u-w3E zc2(Y9Hc4ucm@Dv#Ek_<%#a`GevH{e|3~<~QLGftYi**Px7?CbY+7BCy^7J4HWQhde zp!OBm*y?6>X{?N<^$*?Y0|(-xiO;HWme?&w+s}eakH!P}6_jz@RtFzzUbL|e?_`=j zw-(#FJ;CMeRZ~Gg`u@*Y-+5N8+&8bWia8)NoF58vsE!(V-Kx?$n|U1=B*x6)E$lhq z3}QL}CE2|{UrW%hmA%{wR}u6m(H4}6C z8Ys)hB-&;BgIb-ul!5@e-x5boo-oMOdyX3D;-f9~^Wt_X+LwPF(&}cozDJy~XWn0S z*#@B>f~y)t5%(=z&%RcEB-Rqq&OS)7zAiRR)P&J4%T6DhbcU}#HnM9mBT=@r))!SD z{bjJH^!xR53dg}&qXE}Uz7%D^+7G9g5J&@0s-opKZ@^wjVSa8!nidv;^H%!{=;k+2 zk^=)QIi0aio?PcbP*zH0?9XoN!GG81*3G1K#3s(NZy*L;U=ACNp_viyyZ*b}NMpUw z%?Ac_##Bx0WOz1)F`+Pz>ZqK(SYZNr-l;HLCgx#kvxEgg?2h7<~7w@y9(t0RuFjisrbi?gW{ z=%4-JMa9^R8eQ3pT-}iYQ15P_0DyPcZ_Ry91+1O7KP9X`GgM!O<$<`sW7IkgMf3*W zC+FpU9@muEDC{{cVavN}%qm)pSD9!n#hYO;53Kbzc?p>ux% z@hQ*JzdNtGCw~|5OL7J< zR+7S?>%5_G0vgVGx!i7-h!4EfA*ggr{tF?Rwi8kR^0f5^D+cx^4eGMPdi?--;v$|-)L?#eK48e=$30sqrPoC9-7UIhn0w~Gtj zFQ-T1v3AyMIpZA+iSEOYV{fC|$@bJ$ZE6xCey;mrc3W#oMtmrzqq!xeJLSAuFrEUZ zsbkLJkvM*K9R#1Yy5N*&*0B6^zH;}uOWZS~;jJZ`Mb~h)|77QUi_MId5*Mq&%hd0R z6_0Dhm{MUF1*Fv2a>*@JQEqyt4fZ{R>+qE=UPqdg+Lw>VZd#}&@R?l2CC@{)V*m(1 zO^cmoy}Zuz3+CX=Lf9XLs_EYC?a0Gj)7y>J`+giZE(PC?hl?jhA2-LB9KoyO%G;?a z$f}yB>m|tZg+x1>ghbwtUr-=h|Db3~ZaR%Xz;@ZHA%Qi(1D3l-tz@aSoe)vT%qMYm zL={8et^GjBc(rKD06V`ug9TM>Xs3=U+aIEDKUbd4Z!PsS{OzAS;c9i}ZjYqv-d#xT zCoDbTN^3Bskl`O9>7TQo(+!=sGS9?n1w4g0ya%qazIrEbjv)MYu(wCkQkvfqy4u0_ z%$cq;5lY-K<*Ikv(n|3QmUf>Hfk zzZC5Y@r3Jt&%Ay7;p)EEY|h^kg4`=KBD)gc9sy#<`bb}=(gcnc*kmhP1* z@wwyztlf>^%lFIsH$(`M&lwz(Lnt)qb1s1!9o3{D^<*w%CEQIBwsS>3hG~v|YV1nz z_)~xN3GP)k{}~^~DmdO9^+CDS8a{(uqk@}m{}QVP_%0-OZ9txUm;P`!HxEdvDw<^^ z4XsuX4oCkYniask5cn7m3k$TbBjWsIL0QA@Gb_vLX!u$E*N1YrA?BU{lE^1q#&`Io z*6JbUJ(YtCp1E<3SzPRR=2txUBYR|ucBLrqiBM^Uj#UpSQu?3#6?vUDrgWw^;fIRA zJaX7(bKv(P(1^$NrL`Z!i|MLG83Y^P&DA1KWjVDDm6^*t69Z!cUJk4lMSo8iRV#X` z6apLOyY>kD>4c#lH|~#HWLJ*#lV?{Gcxz`K27?cZF1X1vDt!SpiZ^bI(R$f&npI07j1K{+}7{B7c7#wk5AddE*aBWvN(cu*0h!i&7ZK(>Gs#AK!H!hr& zXWq((%ti7M6emRT&ndQ@-l(P$7@pn>jKOJDz577#QX%ziI9hNYWZdh$vxpFDj9ma^ zIWIjrqGW9=$5TTcF&hwTj<2&pEt^!U4wzL}P15c@j(w`9_s3^q6b=A7?p(jUtVj) zWwM~bRuJWIeNfB=P0V!{auW{I#AuK3IwB*vV3CHTO%*ewO;TUj8>yX|Odtldo-H~{ zLeI!DLyccD4zN&d6sZuKV%yYDG;R(%IRU$Q5A#!DB1E&wWJ_<&Dl-&Y1_cca9j1Tz(c4d?7 ze~qW~GYn13#fRZv!rVMOScfYbG&=1r*z`lwTHA6?Mc<-fAnmG+T8b8p$*g;7i2|E^ z3!xM}gciT~Q4yd?#&?Wa)V3Ja|KsPl8KDw}0o01Wc7OAV`> z0DfJ?Y6YvRjCS#e$x8{Ips|9Z?l9raLvaVq`DH9V^guo>Pe0uad{N~ojU^#y>JC^CIHN}iGy6GyMlPdEZcG5U z>y=}sDSkDtfkynQmT@2*h9dlX3F`~wg9@!<+;>99=#|e15w*JuM1^exRN-T_b_VL? z;Vz-L(1`gnxs}bJumYQi5=~ZW_{AGaqpvB4CL|q~=Ao3WXCU=Guk-5;ySp z5$p56aaGEjL7%n6l92;Tc$Q%smYEgEnN3$}k)V=QM3B}EpCes2;kh!bQ1OAQmY}KY zWRBAymRf>cOe+ae{~zCrKxqs0?LiTr8F)tPpgU5oL^zNI-ic3ijYo;MX$?0gtnn=8 z355}FR^guB=|3fI5gX@uJ*^QN6KkY-V#NF*@7q0+4v2f3Virnl%f{CYY7~}`L57T? zh7Lt!B%?W*GssBe`!7{7+91njmKmba%CS)z9VtFw8V)P}1;>68;EkpIkSrt!pd z(r1gFGwf^hHFew;4NkZHcp2{E<*;3ZZm7mzUX^sGfoJHj?l1}`JMW~jaUSA(m}+%; zB;sg~dj!`xWSc(*OXGu0{IU!z;Ou4@FtgJ0ayB`=e}9FbZ|RSGAHJ@Xqe#4_PUo5w zu{R4b(K{liz@MR<4|bax?;=`}OpwqY8X|htJQ7=owNat`f;G?>%6oQ2>VA0&1;cZj z-8>HS#s7UbM-wt~deLHOKD34b>YY0W5RYlX+k6i#)S6ettR z#AIG*&gCnk=e@eo@1>Z!>HYZ3&#kal?-3HVks$koyW?0I_=Y3zJ*WxB9E=@fRJ?{V z@DccS4yt9=)S@<>I4Y!bcfEO|*pA=P=rAw$4Zh892-Gy58k`g8JN5^;3?P;H`^{3d z!ZVwq5jw~_Z+Jq=JW>2Hyc_II$^%%QR>`m~cf2+>K@lYL05_bN&x%p>-1rXd^vZ<2 zei02rl7&{8ffA!K^qNlBu(S&bN;|D|gd_#z66*33=Tq@SN&}?0-7XP{%6z`oFwaurEDJ#0~+j8%6cB{h3Q9h4x@!{{AqDWM6y#JiwMD&aT zME5DqO8-bwG4eA^dqLBdQV-p18PL;O;J;T~Ax@yy>9W}9>RV@LS{RC0H?;>vs~iZS z@&M{~El9$e^VZLEUohfP2&L6o%->}?AZc{6Fn0;Rmdd~^w^Go~*C2v^2p7IeTQ`EO z?lCE#Hc-A=Ej~u2Qsvu{kov2!Ys|>yrwebs(v}0lH+&6roxT<%9oWpNx`<@9fSsx^ z0}o7Aknz;aRa1>=KM&HA=jz`$me6R7Sa#JXp)t3EHly9wZ5c7~W`6bGKIbXLu5ciE zJ;WqWo5bH+&;|QPQnG>EeEdGzJsG*IhcbLM2F_WxDHA0d>mu)u(>4Q z6n)2Gf-kRlUoGt8-6Di$2o*!9yMI6CuJ^hY@3)NAXu(B-;I@#wp2xQUclVPW*u)Qu zgSEip3CI=ouP@}h>Q7yh!zV_ zfXU1*>T_0#yllUewgSE2MbQs>ffKskDr(8U2sy;DkM(>D6@EAECxdymYS z_k90TXrCQw!?;sD4Xu0Esj5nRrJzSaNzP*X^hCg;7E`CBUx%}tdkgC2Br`E zy@8)Ia33_~Zg`a*`L4lccU0S50D<*+TqW*>H!tkfBBQfp76zW1GV0Z&@yQR*GD=UG zVAwp62m(jhNBr{>$UnfZ0`t zM^ujpm{ft2UJ6#G-TXs(^9<)D%*;mIf2w>OOPp6#D%hE?=3L6{cmv}u@Ijr)9CH=> z`#n&wOBmTU)Cr~Xwq+PuEll&ILPhTnhH*+&iBW|dRJM*}zCbQKysUP>(CUu(nw@S} ztM3IFFVpjK2Fg73jmrk*-llTS+kNl3Y=}g+<^Tou#4S$O5)Bog4AO3U^-*a^clY6u z-ZxFW$DzABDAPMBk*ouUbLQ{cjn)rks2iPT#EKtBizso|{G2lTK(*yxwr2E{`|iFA z_xdASQW8mX;0fx7QM{Idg3|-(OMzDMt+VK%1WEG#!NxM)Ce)%~y9QFh*(I44nhVM~_1E~0g04(1 z)Y7x)lM;Db$2Dubiyy z+l24*yA^z8^z(y#MHz5OW$!6*`QCqb{yZ0ZICa0$6lOknxU{))_>mdq8Ove0t#o)~ zPdHsqkn#O5Hv=kGB^H9dCey;wZ3{DhWzS^RB%yDn{KjJ8XWIqM_3!n~J-W|g<#BID z8YYXp(*hb;9yNn&1$ZOrW=Tc|4I0x^zg~it&fp|Z5@fj8XJ&c37rmll5<+zgtlm@c zDa9L*@aDk?5mes{YEsnm(MX<;VekmuMxTI%5Y6IQsMI70xQ|faP>azB6z{1Zw$7I7 zTiMpfO#GX5AQCg$Jao6Fb}AU+gaaVoH$#ki#(iAR6z5+4J4BRn*9rxr_|A21GH)UK z9FvTZz|UbCGiYIge9B)!;|kJ)Ff^MYQ~U_~%EPLT8_NcL9CZ)J4su#0qIrij$IYzy zvs0R-kRN7{`cSDv!vPlyYG7EC zS|9@0soSTipF?}NYm}o^BeJ@JuT&>S0@WevF<)Lh(V*J!EwpCQll78kBy^4r?(wWd zk==NkY$rZp>9D{5Ufk0+S^w)~OcNb)s6Q4WlcBk5MMjCucab&6_!P3$PC5OK2d;wZWRWzM&6%QIOJq`f%ijv26M^hE|U2RyvsSM%MM|Sb})R{ zgX*_?=UamQ4%PKDGlx@iiMSsI?d`Ab+Fc`t&7-ghazelP_R>YcT)Z40+;O;ixN4MN zv(fk_c>-^J&pUpy*uuNC`J4+rZUuP*_4)&fBX7;SHx4MHT_kX_uOE>gUBGci<=1=c zq^U=CPu6|%(NGGd`jGv!aI=a|hIGNfdLLx3y~XiT$F@`wc?I8e?KA29Ya<-|P>46O zUK69av?Q25oY?xfQIw&v8d7umo5PjADSI|>HFIyHjr>7*ybpR7^|JJ_(hLLYbZxVR z!M#dYJ(qdOA+aR`GSwfOljiIiSZDzkjwIPrZ>qQ(O8Rd1>$0T#SToTmg?uNEr9&8jFGlgZuzIXf<|aW>)o9XZ;+4-xY8CSrXcC{wZmnM> z57CCz7)ZIICC7A6Kl7E?}#4<=nF@otU1v28mCQr3Pu6xs0CGk8{<+!_$g^R++&C$xeeW~CQb8++6D5&Fqw#VWOy`Oqghwl ziSJ5jFY9$7U*4frq-OiM)MU1e$vYEMaBHG>$m*U3RE`^GJKyM9qUZDgPQL9?62sz1 zIlT7L6}$F3N;L(tihzMiU0#4dv*=RE6S1?P+bm?t10v-S*h*&b2U^vAZ|F&hXgXkF zkU7H^p<|cC#c8VJJ)iTBk1^o=@%BmQT1?nDJ1@p>`ehPwvM6YwrDAgUIviwDO#pzd zZqi%H9esjEkLDH6_{GxH&9S$ZwG90$mV|Q&kCu8}vA;=YiI(rzkv?hJHX_}P_s8aiuwvfc-ILRP8Dp%n`P#B??Yip@ zGgh0}#$Pt=m*r;l@E#O;UEB&%Gn$y*m5%GIvF6Ho&FG~`xD8;izM}OVJf>s?UA^Q~ zIimTtD>r#;*WR5VTwRJyu-B?K1za z_i&~1{Nn(`Ez{=-Zcb-Ogw3HJPMh}y2dY;*!ayyeI=7m-{M#Y**KDzDo8l1r@?M^*hBF1%N0loRD z(YTvovZicm5OXdNs7fQSJAp#2^7 zrZ)i!0|ftpa<(+L1KRw}z4!)|&7}T!Vd}rfhiu}Ydd@#h`=)aN{<8fmln?Ij@x7^C z&TpPbW(SYIMk=BGA8OaXM~bS1>bVd721Nt_-v3LaS^t0<+uJ$-UH-xR9|Qg8=v5vH zf@fd=fN6990OMbnV{cdaYa-1o-v)2#`FC;d|1|!8Vo~9WS5PnkfH@uj0R3OEM{gd{ zzhWJn>|N~NyrKUe!BQE*EXu!S^i4hdH>wcX->C)$mUfmd1_u9+)OR3r)yNhDiX2S< z;Di<*)|IEU_dSd@OxzFS8@&0cevj6uS t|EHMx-|2^8f2aS4wE9N=i?sT0vZ|si>{|f=07!4E{#!Ay=KtmFzW~{AuE+oY literal 0 HcmV?d00001 diff --git a/python/pyspark/context.py b/python/pyspark/context.py index e47f162ca936c..59b5fa7f3a434 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -1365,7 +1365,7 @@ def setJobGroup(self, groupId: str, description: str, interruptOnCancel: bool = to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes as dead. If you run jobs in parallel, use :class:`pyspark.InheritableThread` for thread - local inheritance, and preventing resource leak. + local inheritance. Examples -------- @@ -1405,7 +1405,7 @@ def setLocalProperty(self, key: str, value: str) -> None: Notes ----- If you run jobs in parallel, use :class:`pyspark.InheritableThread` for thread - local inheritance, and preventing resource leak. + local inheritance. """ self._jsc.setLocalProperty(key, value) @@ -1423,7 +1423,7 @@ def setJobDescription(self, value: str) -> None: Notes ----- If you run jobs in parallel, use :class:`pyspark.InheritableThread` for thread - local inheritance, and preventing resource leak. + local inheritance. """ self._jsc.setJobDescription(value) diff --git a/python/pyspark/util.py b/python/pyspark/util.py index 5abbbb919636f..b7b972a5d35b8 100644 --- a/python/pyspark/util.py +++ b/python/pyspark/util.py @@ -331,13 +331,10 @@ def inheritable_thread_target(f: Callable) -> Callable: @functools.wraps(f) def wrapped(*args: Any, **kwargs: Any) -> Any: - try: - # Set local properties in child thread. - assert SparkContext._active_spark_context is not None - SparkContext._active_spark_context._jsc.sc().setLocalProperties(properties) - return f(*args, **kwargs) - finally: - InheritableThread._clean_py4j_conn_for_current_thread() + # Set local properties in child thread. + assert SparkContext._active_spark_context is not None + SparkContext._active_spark_context._jsc.sc().setLocalProperties(properties) + return f(*args, **kwargs) return wrapped else: @@ -377,10 +374,7 @@ def copy_local_properties(*a: Any, **k: Any) -> Any: assert hasattr(self, "_props") assert SparkContext._active_spark_context is not None SparkContext._active_spark_context._jsc.sc().setLocalProperties(self._props) - try: - return target(*a, **k) - finally: - InheritableThread._clean_py4j_conn_for_current_thread() + return target(*a, **k) super(InheritableThread, self).__init__( target=copy_local_properties, *args, **kwargs # type: ignore[misc] @@ -401,25 +395,6 @@ def start(self) -> None: self._props = SparkContext._active_spark_context._jsc.sc().getLocalProperties().clone() return super(InheritableThread, self).start() - @staticmethod - def _clean_py4j_conn_for_current_thread() -> None: - from pyspark import SparkContext - - jvm = SparkContext._jvm - assert jvm is not None - thread_connection = jvm._gateway_client.get_thread_connection() - if thread_connection is not None: - try: - # Dequeue is shared across other threads but it's thread-safe. - # If this function has to be invoked one more time in the same thead - # Py4J will create a new connection automatically. - jvm._gateway_client.deque.remove(thread_connection) - except ValueError: - # Should never reach this point - return - finally: - thread_connection.close() - if __name__ == "__main__": if "pypy" not in platform.python_implementation().lower() and sys.version_info[:2] >= (3, 7): diff --git a/python/setup.py b/python/setup.py index 673b146cb6c5d..ab9b64f79bc37 100755 --- a/python/setup.py +++ b/python/setup.py @@ -258,7 +258,7 @@ def run(self): license='http://www.apache.org/licenses/LICENSE-2.0', # Don't forget to update python/docs/source/getting_started/install.rst # if you're updating the versions or dependencies. - install_requires=['py4j==0.10.9.3'], + install_requires=['py4j==0.10.9.4'], extras_require={ 'ml': ['numpy>=1.15'], 'mllib': ['numpy>=1.15'], diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh index f27b6fe8d9a04..341eb053ed7b2 100755 --- a/sbin/spark-config.sh +++ b/sbin/spark-config.sh @@ -28,6 +28,6 @@ export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}" # Add the PySpark classes to the PYTHONPATH: if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}" - export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.3-src.zip:${PYTHONPATH}" + export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.4-src.zip:${PYTHONPATH}" export PYSPARK_PYTHONPATH_SET=1 fi From 1ec220f029f90a6ab109ef87f7c17337038d91d3 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 16 Mar 2022 20:50:14 +0900 Subject: [PATCH 002/535] [SPARK-38567][INFRA][3.3] Enable GitHub Action build_and_test on branch-3.3 ### What changes were proposed in this pull request? Like branch-3.2, this PR aims to update GitHub Action `build_and_test` in branch-3.3. ### Why are the changes needed? Currently, GitHub Action on branch-3.3 is not working. - https://github.com/apache/spark/commits/branch-3.3 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? N/A Closes #35876 from MaxGekk/fix-github-actions-3.3. Authored-by: Max Gekk Signed-off-by: Hyukjin Kwon --- .github/workflows/ansi_sql_mode_test.yml | 2 +- .github/workflows/build_and_test.yml | 32 +++++++----------------- 2 files changed, 10 insertions(+), 24 deletions(-) diff --git a/.github/workflows/ansi_sql_mode_test.yml b/.github/workflows/ansi_sql_mode_test.yml index e68b04b5420f0..cc4ac575306d1 100644 --- a/.github/workflows/ansi_sql_mode_test.yml +++ b/.github/workflows/ansi_sql_mode_test.yml @@ -22,7 +22,7 @@ name: ANSI SQL mode test on: push: branches: - - master + - branch-3.3 jobs: ansi_sql_test: diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index ebe17b5963f20..7baabc779867d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -23,20 +23,6 @@ on: push: branches: - '**' - - '!branch-*.*' - schedule: - # master, Hadoop 2 - - cron: '0 1 * * *' - # master - - cron: '0 4 * * *' - # branch-3.2 - - cron: '0 7 * * *' - # PySpark coverage for master branch - - cron: '0 10 * * *' - # Java 11 - - cron: '0 13 * * *' - # Java 17 - - cron: '0 16 * * *' workflow_call: inputs: ansi_enabled: @@ -96,7 +82,7 @@ jobs: echo '::set-output name=hadoop::hadoop3' else echo '::set-output name=java::8' - echo '::set-output name=branch::master' # Default branch to run on. CHANGE here when a branch is cut out. + echo '::set-output name=branch::branch-3.3' # Default branch to run on. CHANGE here when a branch is cut out. echo '::set-output name=type::regular' echo '::set-output name=envs::{"SPARK_ANSI_SQL_MODE": "${{ inputs.ansi_enabled }}"}' echo '::set-output name=hadoop::hadoop3' @@ -115,7 +101,7 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: master + ref: branch-3.3 - name: Sync the current branch with the latest in Apache Spark if: github.repository != 'apache/spark' run: | @@ -325,7 +311,7 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: master + ref: branch-3.3 - name: Sync the current branch with the latest in Apache Spark if: github.repository != 'apache/spark' run: | @@ -413,7 +399,7 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: master + ref: branch-3.3 - name: Sync the current branch with the latest in Apache Spark if: github.repository != 'apache/spark' run: | @@ -477,7 +463,7 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: master + ref: branch-3.3 - name: Sync the current branch with the latest in Apache Spark if: github.repository != 'apache/spark' run: | @@ -590,7 +576,7 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: master + ref: branch-3.3 - name: Sync the current branch with the latest in Apache Spark if: github.repository != 'apache/spark' run: | @@ -639,7 +625,7 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: master + ref: branch-3.3 - name: Sync the current branch with the latest in Apache Spark if: github.repository != 'apache/spark' run: | @@ -687,7 +673,7 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: master + ref: branch-3.3 - name: Sync the current branch with the latest in Apache Spark if: github.repository != 'apache/spark' run: | @@ -786,7 +772,7 @@ jobs: with: fetch-depth: 0 repository: apache/spark - ref: master + ref: branch-3.3 - name: Sync the current branch with the latest in Apache Spark if: github.repository != 'apache/spark' run: | From 8405ec352dbed6a3199fc2af3c60fae7186d15b5 Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Wed, 16 Mar 2022 10:54:18 -0500 Subject: [PATCH 003/535] [SPARK-38194][YARN][MESOS][K8S] Make memory overhead factor configurable ### What changes were proposed in this pull request? Add a new config to set the memory overhead factor for drivers and executors. Currently the memory overhead is hard coded to 10% (except in Kubernetes), and the only way to set it higher is to set it to a specific memory amount. ### Why are the changes needed? In dynamic environments where different people or use cases need different memory requirements, it would be helpful to set a higher memory overhead factor instead of having to set a higher specific memory overhead value. The kubernetes resource manager already makes this configurable. This makes it configurable across the board. ### Does this PR introduce _any_ user-facing change? No change to default behavior, just adds a new config users can change. ### How was this patch tested? New UT to check the memory calculation. Closes #35504 from Kimahriman/yarn-configurable-memory-overhead-factor. Authored-by: Adam Binford Signed-off-by: Thomas Graves (cherry picked from commit 71e2110b799220adc107c9ac5ce737281f2b65cc) Signed-off-by: Thomas Graves --- .../scala/org/apache/spark/SparkConf.scala | 4 +- .../spark/internal/config/package.scala | 28 +++++++++ docs/configuration.md | 30 ++++++++- docs/running-on-kubernetes.md | 9 --- .../k8s/features/BasicDriverFeatureStep.scala | 13 ++-- .../features/BasicExecutorFeatureStep.scala | 7 ++- .../BasicDriverFeatureStepSuite.scala | 63 +++++++++++++++++-- .../BasicExecutorFeatureStepSuite.scala | 54 ++++++++++++++++ .../deploy/rest/mesos/MesosRestServer.scala | 5 +- .../cluster/mesos/MesosSchedulerUtils.scala | 9 +-- .../rest/mesos/MesosRestServerSuite.scala | 8 ++- .../org/apache/spark/deploy/yarn/Client.scala | 14 ++++- .../spark/deploy/yarn/YarnAllocator.scala | 5 +- .../deploy/yarn/YarnSparkHadoopUtil.scala | 5 +- .../deploy/yarn/YarnAllocatorSuite.scala | 29 +++++++++ 15 files changed, 248 insertions(+), 35 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index 5f37a1abb1909..cf121749b7348 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -636,7 +636,9 @@ private[spark] object SparkConf extends Logging { DeprecatedConfig("spark.blacklist.killBlacklistedExecutors", "3.1.0", "Please use spark.excludeOnFailure.killExcludedExecutors"), DeprecatedConfig("spark.yarn.blacklist.executor.launch.blacklisting.enabled", "3.1.0", - "Please use spark.yarn.executor.launch.excludeOnFailure.enabled") + "Please use spark.yarn.executor.launch.excludeOnFailure.enabled"), + DeprecatedConfig("spark.kubernetes.memoryOverheadFactor", "3.3.0", + "Please use spark.driver.memoryOverheadFactor and spark.executor.memoryOverheadFactor") ) Map(configs.map { cfg => (cfg.key -> cfg) } : _*) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index dbec61a1fdb76..ffe4501248f43 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -105,6 +105,22 @@ package object config { .bytesConf(ByteUnit.MiB) .createOptional + private[spark] val DRIVER_MEMORY_OVERHEAD_FACTOR = + ConfigBuilder("spark.driver.memoryOverheadFactor") + .doc("Fraction of driver memory to be allocated as additional non-heap memory per driver " + + "process in cluster mode. This is memory that accounts for things like VM overheads, " + + "interned strings, other native overheads, etc. This tends to grow with the container " + + "size. This value defaults to 0.10 except for Kubernetes non-JVM jobs, which defaults to " + + "0.40. This is done as non-JVM tasks need more non-JVM heap space and such tasks " + + "commonly fail with \"Memory Overhead Exceeded\" errors. This preempts this error " + + "with a higher default. This value is ignored if spark.driver.memoryOverhead is set " + + "directly.") + .version("3.3.0") + .doubleConf + .checkValue(factor => factor > 0, + "Ensure that memory overhead is a double greater than 0") + .createWithDefault(0.1) + private[spark] val DRIVER_LOG_DFS_DIR = ConfigBuilder("spark.driver.log.dfsDir").version("3.0.0").stringConf.createOptional @@ -315,6 +331,18 @@ package object config { .bytesConf(ByteUnit.MiB) .createOptional + private[spark] val EXECUTOR_MEMORY_OVERHEAD_FACTOR = + ConfigBuilder("spark.executor.memoryOverheadFactor") + .doc("Fraction of executor memory to be allocated as additional non-heap memory per " + + "executor process. This is memory that accounts for things like VM overheads, " + + "interned strings, other native overheads, etc. This tends to grow with the container " + + "size. This value is ignored if spark.executor.memoryOverhead is set directly.") + .version("3.3.0") + .doubleConf + .checkValue(factor => factor > 0, + "Ensure that memory overhead is a double greater than 0") + .createWithDefault(0.1) + private[spark] val CORES_MAX = ConfigBuilder("spark.cores.max") .doc("When running on a standalone deploy cluster or a Mesos cluster in coarse-grained " + "sharing mode, the maximum amount of CPU cores to request for the application from across " + diff --git a/docs/configuration.md b/docs/configuration.md index ae3f422f34b3a..a2e6797b55e2f 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -183,7 +183,7 @@ of the most common options to set are: spark.driver.memoryOverhead - driverMemory * 0.10, with minimum of 384 + driverMemory * spark.driver.memoryOverheadFactor, with minimum of 384 Amount of non-heap memory to be allocated per driver process in cluster mode, in MiB unless otherwise specified. This is memory that accounts for things like VM overheads, interned strings, @@ -198,6 +198,21 @@ of the most common options to set are: 2.3.0 + + spark.driver.memoryOverheadFactor + 0.10 + + Fraction of driver memory to be allocated as additional non-heap memory per driver process in cluster mode. + This is memory that accounts for things like VM overheads, interned strings, + other native overheads, etc. This tends to grow with the container size. + This value defaults to 0.10 except for Kubernetes non-JVM jobs, which defaults to + 0.40. This is done as non-JVM tasks need more non-JVM heap space and such tasks + commonly fail with "Memory Overhead Exceeded" errors. This preempts this error + with a higher default. + This value is ignored if spark.driver.memoryOverhead is set directly. + + 3.3.0 + spark.driver.resource.{resourceName}.amount 0 @@ -272,7 +287,7 @@ of the most common options to set are: spark.executor.memoryOverhead - executorMemory * 0.10, with minimum of 384 + executorMemory * spark.executor.memoryOverheadFactor, with minimum of 384 Amount of additional memory to be allocated per executor process, in MiB unless otherwise specified. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. @@ -287,6 +302,17 @@ of the most common options to set are: 2.3.0 + + spark.executor.memoryOverheadFactor + 0.10 + + Fraction of executor memory to be allocated as additional non-heap memory per executor process. + This is memory that accounts for things like VM overheads, interned strings, + other native overheads, etc. This tends to grow with the container size. + This value is ignored if spark.executor.memoryOverhead is set directly. + + 3.3.0 + spark.executor.resource.{resourceName}.amount 0 diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index a5da80a68d32d..de37e22cc78d7 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -1137,15 +1137,6 @@ See the [configuration page](configuration.html) for information on Spark config 3.0.0 - - spark.kubernetes.memoryOverheadFactor - 0.1 - - This sets the Memory Overhead Factor that will allocate memory to non-JVM memory, which includes off-heap memory allocations, non-JVM tasks, various systems processes, and tmpfs-based local directories when spark.kubernetes.local.dirs.tmpfs is true. For JVM-based jobs this value will default to 0.10 and 0.40 for non-JVM jobs. - This is done as non-JVM tasks need more non-JVM heap space and such tasks commonly fail with "Memory Overhead Exceeded" errors. This preempts this error with a higher default. - - 2.4.0 - spark.kubernetes.pyspark.pythonVersion "3" diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala index 3b2b5612566a1..97151494fc60c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala @@ -53,18 +53,23 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf) // Memory settings private val driverMemoryMiB = conf.get(DRIVER_MEMORY) + private val memoryOverheadFactor = if (conf.contains(DRIVER_MEMORY_OVERHEAD_FACTOR)) { + conf.get(DRIVER_MEMORY_OVERHEAD_FACTOR) + } else { + conf.get(MEMORY_OVERHEAD_FACTOR) + } // The memory overhead factor to use. If the user has not set it, then use a different // value for non-JVM apps. This value is propagated to executors. private val overheadFactor = if (conf.mainAppResource.isInstanceOf[NonJVMResource]) { - if (conf.contains(MEMORY_OVERHEAD_FACTOR)) { - conf.get(MEMORY_OVERHEAD_FACTOR) + if (conf.contains(MEMORY_OVERHEAD_FACTOR) || conf.contains(DRIVER_MEMORY_OVERHEAD_FACTOR)) { + memoryOverheadFactor } else { NON_JVM_MEMORY_OVERHEAD_FACTOR } } else { - conf.get(MEMORY_OVERHEAD_FACTOR) + memoryOverheadFactor } private val memoryOverheadMiB = conf @@ -164,7 +169,7 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf) KUBERNETES_DRIVER_POD_NAME.key -> driverPodName, "spark.app.id" -> conf.appId, KUBERNETES_DRIVER_SUBMIT_CHECK.key -> "true", - MEMORY_OVERHEAD_FACTOR.key -> overheadFactor.toString) + DRIVER_MEMORY_OVERHEAD_FACTOR.key -> overheadFactor.toString) // try upload local, resolvable files to a hadoop compatible file system Seq(JARS, FILES, ARCHIVES, SUBMIT_PYTHON_FILES).foreach { key => val uris = conf.get(key).filter(uri => KubernetesUtils.isLocalAndResolvable(uri)) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala index a7625194bd6e6..15c69ad487f5f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala @@ -59,11 +59,16 @@ private[spark] class BasicExecutorFeatureStep( private val isDefaultProfile = resourceProfile.id == ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID private val isPythonApp = kubernetesConf.get(APP_RESOURCE_TYPE) == Some(APP_RESOURCE_TYPE_PYTHON) private val disableConfigMap = kubernetesConf.get(KUBERNETES_EXECUTOR_DISABLE_CONFIGMAP) + private val memoryOverheadFactor = if (kubernetesConf.contains(EXECUTOR_MEMORY_OVERHEAD_FACTOR)) { + kubernetesConf.get(EXECUTOR_MEMORY_OVERHEAD_FACTOR) + } else { + kubernetesConf.get(MEMORY_OVERHEAD_FACTOR) + } val execResources = ResourceProfile.getResourcesForClusterManager( resourceProfile.id, resourceProfile.executorResources, - kubernetesConf.get(MEMORY_OVERHEAD_FACTOR), + memoryOverheadFactor, kubernetesConf.sparkConf, isPythonApp, Map.empty) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala index bf7fbcc912f54..d45f5f97da213 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala @@ -134,7 +134,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite { KUBERNETES_DRIVER_POD_NAME.key -> "spark-driver-pod", "spark.app.id" -> KubernetesTestConf.APP_ID, "spark.kubernetes.submitInDriver" -> "true", - MEMORY_OVERHEAD_FACTOR.key -> MEMORY_OVERHEAD_FACTOR.defaultValue.get.toString) + DRIVER_MEMORY_OVERHEAD_FACTOR.key -> DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get.toString) assert(featureStep.getAdditionalPodSystemProperties() === expectedSparkConf) } @@ -193,7 +193,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite { // Memory overhead tests. Tuples are: // test name, main resource, overhead factor, expected factor Seq( - ("java", JavaMainAppResource(None), None, MEMORY_OVERHEAD_FACTOR.defaultValue.get), + ("java", JavaMainAppResource(None), None, DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get), ("python default", PythonMainAppResource(null), None, NON_JVM_MEMORY_OVERHEAD_FACTOR), ("python w/ override", PythonMainAppResource(null), Some(0.9d), 0.9d), ("r default", RMainAppResource(null), None, NON_JVM_MEMORY_OVERHEAD_FACTOR) @@ -201,13 +201,13 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite { test(s"memory overhead factor: $name") { // Choose a driver memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB val driverMem = - ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2 + ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2 // main app resource, overhead factor val sparkConf = new SparkConf(false) .set(CONTAINER_IMAGE, "spark-driver:latest") .set(DRIVER_MEMORY.key, s"${driverMem.toInt}m") - factor.foreach { value => sparkConf.set(MEMORY_OVERHEAD_FACTOR, value) } + factor.foreach { value => sparkConf.set(DRIVER_MEMORY_OVERHEAD_FACTOR, value) } val conf = KubernetesTestConf.createDriverConf( sparkConf = sparkConf, mainAppResource = resource) @@ -218,10 +218,63 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite { assert(mem === s"${expected}Mi") val systemProperties = step.getAdditionalPodSystemProperties() - assert(systemProperties(MEMORY_OVERHEAD_FACTOR.key) === expectedFactor.toString) + assert(systemProperties(DRIVER_MEMORY_OVERHEAD_FACTOR.key) === expectedFactor.toString) } } + test(s"SPARK-38194: memory overhead factor precendence") { + // Choose a driver memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB + val driverMem = + ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2 + + // main app resource, overhead factor + val sparkConf = new SparkConf(false) + .set(CONTAINER_IMAGE, "spark-driver:latest") + .set(DRIVER_MEMORY.key, s"${driverMem.toInt}m") + + // New config should take precedence + val expectedFactor = 0.2 + sparkConf.set(DRIVER_MEMORY_OVERHEAD_FACTOR, expectedFactor) + sparkConf.set(MEMORY_OVERHEAD_FACTOR, 0.3) + + val conf = KubernetesTestConf.createDriverConf( + sparkConf = sparkConf) + val step = new BasicDriverFeatureStep(conf) + val pod = step.configurePod(SparkPod.initialPod()) + val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory")) + val expected = (driverMem + driverMem * expectedFactor).toInt + assert(mem === s"${expected}Mi") + + val systemProperties = step.getAdditionalPodSystemProperties() + assert(systemProperties(DRIVER_MEMORY_OVERHEAD_FACTOR.key) === expectedFactor.toString) + } + + test(s"SPARK-38194: old memory factor settings is applied if new one isn't given") { + // Choose a driver memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB + val driverMem = + ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2 + + // main app resource, overhead factor + val sparkConf = new SparkConf(false) + .set(CONTAINER_IMAGE, "spark-driver:latest") + .set(DRIVER_MEMORY.key, s"${driverMem.toInt}m") + + // Old config still works if new config isn't given + val expectedFactor = 0.3 + sparkConf.set(MEMORY_OVERHEAD_FACTOR, expectedFactor) + + val conf = KubernetesTestConf.createDriverConf( + sparkConf = sparkConf) + val step = new BasicDriverFeatureStep(conf) + val pod = step.configurePod(SparkPod.initialPod()) + val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory")) + val expected = (driverMem + driverMem * expectedFactor).toInt + assert(mem === s"${expected}Mi") + + val systemProperties = step.getAdditionalPodSystemProperties() + assert(systemProperties(DRIVER_MEMORY_OVERHEAD_FACTOR.key) === expectedFactor.toString) + } + test("SPARK-35493: make spark.blockManager.port be able to be fallen back to in driver pod") { val initPod = SparkPod.initialPod() val sparkConf = new SparkConf() diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala index f5f2712481604..731a9b77d2059 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala @@ -441,6 +441,60 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter { )) } + test(s"SPARK-38194: memory overhead factor precendence") { + // Choose an executor memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB + val defaultFactor = EXECUTOR_MEMORY_OVERHEAD_FACTOR.defaultValue.get + val executorMem = ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / defaultFactor * 2 + + // main app resource, overhead factor + val sparkConf = new SparkConf(false) + .set(CONTAINER_IMAGE, "spark-driver:latest") + .set(EXECUTOR_MEMORY.key, s"${executorMem.toInt}m") + + // New config should take precedence + val expectedFactor = 0.2 + sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, expectedFactor) + sparkConf.set(MEMORY_OVERHEAD_FACTOR, 0.3) + + val conf = KubernetesTestConf.createExecutorConf( + sparkConf = sparkConf) + ResourceProfile.clearDefaultProfile() + val resourceProfile = ResourceProfile.getOrCreateDefaultProfile(sparkConf) + val step = new BasicExecutorFeatureStep(conf, new SecurityManager(baseConf), + resourceProfile) + val pod = step.configurePod(SparkPod.initialPod()) + val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory")) + val expected = (executorMem + executorMem * expectedFactor).toInt + assert(mem === s"${expected}Mi") + } + + test(s"SPARK-38194: old memory factor settings is applied if new one isn't given") { + // Choose an executor memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB + val defaultFactor = EXECUTOR_MEMORY_OVERHEAD_FACTOR.defaultValue.get + val executorMem = ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / defaultFactor * 2 + + // main app resource, overhead factor + val sparkConf = new SparkConf(false) + .set(CONTAINER_IMAGE, "spark-driver:latest") + .set(EXECUTOR_MEMORY.key, s"${executorMem.toInt}m") + + // New config should take precedence + val expectedFactor = 0.3 + sparkConf.set(MEMORY_OVERHEAD_FACTOR, expectedFactor) + + val conf = KubernetesTestConf.createExecutorConf( + sparkConf = sparkConf) + ResourceProfile.clearDefaultProfile() + val resourceProfile = ResourceProfile.getOrCreateDefaultProfile(sparkConf) + val step = new BasicExecutorFeatureStep(conf, new SecurityManager(baseConf), + resourceProfile) + val pod = step.configurePod(SparkPod.initialPod()) + val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory")) + val expected = (executorMem + executorMem * expectedFactor).toInt + assert(mem === s"${expected}Mi") + } + + // There is always exactly one controller reference, and it points to the driver pod. private def checkOwnerReferences(executor: Pod, driverPodUid: String): Unit = { assert(executor.getMetadata.getOwnerReferences.size() === 1) diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala index 2fd13a5903243..9e4187837b680 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala @@ -105,6 +105,7 @@ private[mesos] class MesosSubmitRequestServlet( val superviseDriver = sparkProperties.get(config.DRIVER_SUPERVISE.key) val driverMemory = sparkProperties.get(config.DRIVER_MEMORY.key) val driverMemoryOverhead = sparkProperties.get(config.DRIVER_MEMORY_OVERHEAD.key) + val driverMemoryOverheadFactor = sparkProperties.get(config.DRIVER_MEMORY_OVERHEAD_FACTOR.key) val driverCores = sparkProperties.get(config.DRIVER_CORES.key) val name = request.sparkProperties.getOrElse("spark.app.name", mainClass) @@ -121,8 +122,10 @@ private[mesos] class MesosSubmitRequestServlet( mainClass, appArgs, environmentVariables, extraClassPath, extraLibraryPath, javaOpts) val actualSuperviseDriver = superviseDriver.map(_.toBoolean).getOrElse(DEFAULT_SUPERVISE) val actualDriverMemory = driverMemory.map(Utils.memoryStringToMb).getOrElse(DEFAULT_MEMORY) + val actualDriverMemoryFactor = driverMemoryOverheadFactor.map(_.toDouble).getOrElse( + MEMORY_OVERHEAD_FACTOR) val actualDriverMemoryOverhead = driverMemoryOverhead.map(_.toInt).getOrElse( - math.max((MEMORY_OVERHEAD_FACTOR * actualDriverMemory).toInt, MEMORY_OVERHEAD_MIN)) + math.max((actualDriverMemoryFactor * actualDriverMemory).toInt, MEMORY_OVERHEAD_MIN)) val actualDriverCores = driverCores.map(_.toDouble).getOrElse(DEFAULT_CORES) val submitDate = new Date() val submissionId = newDriverId(submitDate) diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala index 38f83df00e428..524b1d514fafe 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala @@ -387,8 +387,7 @@ trait MesosSchedulerUtils extends Logging { } } - // These defaults copied from YARN - private val MEMORY_OVERHEAD_FRACTION = 0.10 + // This default copied from YARN private val MEMORY_OVERHEAD_MINIMUM = 384 /** @@ -400,8 +399,9 @@ trait MesosSchedulerUtils extends Logging { * (whichever is larger) */ def executorMemory(sc: SparkContext): Int = { + val memoryOverheadFactor = sc.conf.get(EXECUTOR_MEMORY_OVERHEAD_FACTOR) sc.conf.get(mesosConfig.EXECUTOR_MEMORY_OVERHEAD).getOrElse( - math.max(MEMORY_OVERHEAD_FRACTION * sc.executorMemory, MEMORY_OVERHEAD_MINIMUM).toInt) + + math.max(memoryOverheadFactor * sc.executorMemory, MEMORY_OVERHEAD_MINIMUM).toInt) + sc.executorMemory } @@ -415,7 +415,8 @@ trait MesosSchedulerUtils extends Logging { * `MEMORY_OVERHEAD_FRACTION (=0.1) * driverMemory` */ def driverContainerMemory(driverDesc: MesosDriverDescription): Int = { - val defaultMem = math.max(MEMORY_OVERHEAD_FRACTION * driverDesc.mem, MEMORY_OVERHEAD_MINIMUM) + val memoryOverheadFactor = driverDesc.conf.get(DRIVER_MEMORY_OVERHEAD_FACTOR) + val defaultMem = math.max(memoryOverheadFactor * driverDesc.mem, MEMORY_OVERHEAD_MINIMUM) driverDesc.conf.get(mesosConfig.DRIVER_MEMORY_OVERHEAD).getOrElse(defaultMem.toInt) + driverDesc.mem } diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala index 344fc38c84fb1..8bed43a54d5d0 100644 --- a/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala +++ b/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala @@ -35,10 +35,16 @@ class MesosRestServerSuite extends SparkFunSuite testOverheadMemory(new SparkConf(), "2000M", 2384) } - test("test driver overhead memory with overhead factor") { + test("test driver overhead memory with default overhead factor") { testOverheadMemory(new SparkConf(), "5000M", 5500) } + test("test driver overhead memory with overhead factor") { + val conf = new SparkConf() + conf.set(config.DRIVER_MEMORY_OVERHEAD_FACTOR.key, "0.2") + testOverheadMemory(conf, "5000M", 6000) + } + test("test configured driver overhead memory") { val conf = new SparkConf() conf.set(config.DRIVER_MEMORY_OVERHEAD.key, "1000") diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index ae85ea8d6110a..f364b79216098 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -54,6 +54,7 @@ import org.apache.spark.api.python.PythonUtils import org.apache.spark.deploy.{SparkApplication, SparkHadoopUtil} import org.apache.spark.deploy.security.HadoopDelegationTokenManager import org.apache.spark.deploy.yarn.ResourceRequestHelper._ +import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._ import org.apache.spark.deploy.yarn.config._ import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ @@ -70,7 +71,6 @@ private[spark] class Client( extends Logging { import Client._ - import YarnSparkHadoopUtil._ private val yarnClient = YarnClient.createYarnClient private val hadoopConf = new YarnConfiguration(SparkHadoopUtil.newConfiguration(sparkConf)) @@ -85,6 +85,12 @@ private[spark] class Client( private var appMaster: ApplicationMaster = _ private var stagingDirPath: Path = _ + private val amMemoryOverheadFactor = if (isClusterMode) { + sparkConf.get(DRIVER_MEMORY_OVERHEAD_FACTOR) + } else { + AM_MEMORY_OVERHEAD_FACTOR + } + // AM related configurations private val amMemory = if (isClusterMode) { sparkConf.get(DRIVER_MEMORY).toInt @@ -94,7 +100,7 @@ private[spark] class Client( private val amMemoryOverhead = { val amMemoryOverheadEntry = if (isClusterMode) DRIVER_MEMORY_OVERHEAD else AM_MEMORY_OVERHEAD sparkConf.get(amMemoryOverheadEntry).getOrElse( - math.max((MEMORY_OVERHEAD_FACTOR * amMemory).toLong, + math.max((amMemoryOverheadFactor * amMemory).toLong, ResourceProfile.MEMORY_OVERHEAD_MIN_MIB)).toInt } private val amCores = if (isClusterMode) { @@ -107,8 +113,10 @@ private[spark] class Client( private val executorMemory = sparkConf.get(EXECUTOR_MEMORY) // Executor offHeap memory in MiB. protected val executorOffHeapMemory = Utils.executorOffHeapMemorySizeAsMb(sparkConf) + + private val executorMemoryOvereadFactor = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD_FACTOR) private val executorMemoryOverhead = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD).getOrElse( - math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toLong, + math.max((executorMemoryOvereadFactor * executorMemory).toLong, ResourceProfile.MEMORY_OVERHEAD_MIN_MIB)).toInt private val isPython = sparkConf.get(IS_PYTHON_APP) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index 54ab643f2755b..a85b7174673af 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -163,6 +163,8 @@ private[yarn] class YarnAllocator( private val isPythonApp = sparkConf.get(IS_PYTHON_APP) + private val memoryOverheadFactor = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD_FACTOR) + private val launcherPool = ThreadUtils.newDaemonCachedThreadPool( "ContainerLauncher", sparkConf.get(CONTAINER_LAUNCH_MAX_THREADS)) @@ -280,9 +282,10 @@ private[yarn] class YarnAllocator( // track the resource profile if not already there getOrUpdateRunningExecutorForRPId(rp.id) logInfo(s"Resource profile ${rp.id} doesn't exist, adding it") + val resourcesWithDefaults = ResourceProfile.getResourcesForClusterManager(rp.id, rp.executorResources, - MEMORY_OVERHEAD_FACTOR, sparkConf, isPythonApp, resourceNameMapping) + memoryOverheadFactor, sparkConf, isPythonApp, resourceNameMapping) val customSparkResources = resourcesWithDefaults.customResources.map { case (name, execReq) => (name, execReq.amount.toString) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala index f347e37ba24ab..1869c739e4844 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala @@ -34,11 +34,10 @@ import org.apache.spark.util.Utils object YarnSparkHadoopUtil { - // Additional memory overhead + // Additional memory overhead for application masters in client mode. // 10% was arrived at experimentally. In the interest of minimizing memory waste while covering // the common cases. Memory overhead tends to grow with container size. - - val MEMORY_OVERHEAD_FACTOR = 0.10 + val AM_MEMORY_OVERHEAD_FACTOR = 0.10 val ANY_HOST = "*" diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala index db65d128b07f0..ae010f11503dd 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala @@ -706,4 +706,33 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter sparkConf.set(MEMORY_OFFHEAP_SIZE, originalOffHeapSize) } } + + test("SPARK-38194: Configurable memory overhead factor") { + val executorMemory = sparkConf.get(EXECUTOR_MEMORY).toLong + try { + sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, 0.5) + val (handler, _) = createAllocator(maxExecutors = 1, + additionalConfigs = Map(EXECUTOR_MEMORY.key -> executorMemory.toString)) + val defaultResource = handler.rpIdToYarnResource.get(defaultRPId) + val memory = defaultResource.getMemory + assert(memory == (executorMemory * 1.5).toLong) + } finally { + sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, 0.1) + } + } + + test("SPARK-38194: Memory overhead takes precedence over factor") { + val executorMemory = sparkConf.get(EXECUTOR_MEMORY) + try { + sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, 0.5) + sparkConf.set(EXECUTOR_MEMORY_OVERHEAD, (executorMemory * 0.4).toLong) + val (handler, _) = createAllocator(maxExecutors = 1, + additionalConfigs = Map(EXECUTOR_MEMORY.key -> executorMemory.toString)) + val defaultResource = handler.rpIdToYarnResource.get(defaultRPId) + val memory = defaultResource.getMemory + assert(memory == (executorMemory * 1.4).toLong) + } finally { + sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, 0.1) + } + } } From c284faad2d7d3b813c1c94c612b814c129b6dad3 Mon Sep 17 00:00:00 2001 From: Yihong He Date: Thu, 17 Mar 2022 10:03:42 +0900 Subject: [PATCH 004/535] [SPARK-38556][PYTHON] Disable Pandas usage logging for method calls inside @contextmanager functions ### What changes were proposed in this pull request? Wrap AbstractContextManager returned by contexmanager decorator function in function calls. The comment in the code change explain why it uses a wrapper class instead of wrapping functions of AbstractContextManager directly. ### Why are the changes needed? Currently, method calls inside contextmanager functions are treated as external for **with** statements. For example, the below code records config.set_option calls inside ps.option_context(...) ```python with ps.option_context("compute.ops_on_diff_frames", True): pass ``` We should disable usage logging for calls inside contextmanager functions to improve accuracy of the usage data ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Existing tests - Manual test by running `./bin/pyspark` and verified the output: ``` >>> sc.setLogLevel("info") >>> import pyspark.pandas as ps 22/03/15 17:10:50 INFO Log4jUsageLogger: pandasOnSparkImported=1.0, tags=List(), blob= >>> with ps.option_context("compute.ops_on_diff_frames", True): ... pass ... 22/03/15 17:11:17 INFO Log4jUsageLogger: pandasOnSparkFunctionCalled=1.0, tags=List(pandasOnSparkFunction=option_context(*args: Any) -> Iterator[NoneType], className=config, status=success), blob={"duration": 0.1615259999994123} 22/03/15 17:11:18 INFO Log4jUsageLogger: initialConfigLogging=1.0, tags=List(sparkApplicationId=local-1647360645198, sparkExecutionId=null, sparkJobGroupId=null), blob={"spark.sql.warehouse.dir":"file:/Users/yihong.he/spark/spark-warehouse","spark.executor.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"10.120.131.148","spark.serializer.objectStreamReset":"100","spark.driver.port":"61238","spark.rdd.compress":"True","spark.app.name":"PySparkShell","spark.submit.pyFiles":"","spark.ui.showConsoleProgress":"true","spark.app.startTime":"1647360644422","spark.executor.id":"driver","spark.driver.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"client","spark.master":"local[*]","spark.sql.catalogImplementation":"hive","spark.app.id":"local-1647360645198"} 22/03/15 17:11:19 INFO Log4jUsageLogger: pandasOnSparkFunctionCalled=1.0, tags=List(pandasOnSparkFunction=option_context.__enter__(), className=config, status=success), blob={"duration": 1594.1569399999978} 22/03/15 17:11:19 INFO Log4jUsageLogger: pandasOnSparkFunctionCalled=1.0, tags=List(pandasOnSparkFunction=option_context.__exit__(type, value, traceback), className=config, status=success), blob={"duration": 12.610170000002086} ``` Closes #35861 from heyihong/SPARK-38556. Authored-by: Yihong He Signed-off-by: Hyukjin Kwon (cherry picked from commit 7d1ff01299c88a1aadfac032ea0b3ef87f4ae50d) Signed-off-by: Hyukjin Kwon --- python/pyspark/instrumentation_utils.py | 30 +++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/python/pyspark/instrumentation_utils.py b/python/pyspark/instrumentation_utils.py index 908f5cbb3d473..b9aacf6c4c6b8 100644 --- a/python/pyspark/instrumentation_utils.py +++ b/python/pyspark/instrumentation_utils.py @@ -21,6 +21,7 @@ import threading import importlib import time +from contextlib import AbstractContextManager from types import ModuleType from typing import Tuple, Union, List, Callable, Any, Type @@ -30,6 +31,24 @@ _local = threading.local() +class _WrappedAbstractContextManager(AbstractContextManager): + def __init__( + self, acm: AbstractContextManager, class_name: str, function_name: str, logger: Any + ): + self._enter_func = _wrap_function( + class_name, "{}.__enter__".format(function_name), acm.__enter__, logger + ) + self._exit_func = _wrap_function( + class_name, "{}.__exit__".format(function_name), acm.__exit__, logger + ) + + def __enter__(self): # type: ignore[no-untyped-def] + return self._enter_func() + + def __exit__(self, exc_type, exc_val, exc_tb): # type: ignore[no-untyped-def] + return self._exit_func(exc_type, exc_val, exc_tb) + + def _wrap_function(class_name: str, function_name: str, func: Callable, logger: Any) -> Callable: signature = inspect.signature(func) @@ -44,6 +63,17 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: start = time.perf_counter() try: res = func(*args, **kwargs) + if isinstance(res, AbstractContextManager): + # Wrap AbstractContextManager's subclasses returned by @contextmanager decorator + # function so that wrapped function calls inside __enter__ and __exit__ + # are not recorded by usage logger. + # + # The reason to add a wrapped class after function calls instead of + # wrapping __enter__ and __exit__ methods of _GeneratorContextManager class is + # because usage logging should be disabled for functions with @contextmanager + # decorator in PySpark only. + res = _WrappedAbstractContextManager(res, class_name, function_name, logger) + logger.log_success( class_name, function_name, time.perf_counter() - start, signature ) From 801c330036ef93789061daeea82f7cbc8b9cdebb Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Thu, 17 Mar 2022 16:53:40 +0800 Subject: [PATCH 005/535] [SPARK-38560][SQL] If `Sum`, `Count`, `Any` accompany with distinct, cannot do partial agg push down ### What changes were proposed in this pull request? Spark could partial push down sum(distinct col), count(distinct col) if data source have multiple partitions, and Spark will sum the value again. So the result may not correctly. ### Why are the changes needed? Fix the bug push down sum(distinct col), count(distinct col) to data source and return incorrect result. ### Does this PR introduce _any_ user-facing change? 'Yes'. Users will see the correct behavior. ### How was this patch tested? New tests. Closes #35873 from beliefer/SPARK-38560. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan --- .../v2/V2ScanRelationPushDown.scala | 184 ++++++++++-------- .../apache/spark/sql/jdbc/JDBCV2Suite.scala | 14 +- 2 files changed, 111 insertions(+), 87 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala index 3ff917664b486..b4bd02773edfb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.planning.ScanOperation import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, LeafNode, Limit, LocalLimit, LogicalPlan, Project, Sample, Sort} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.expressions.SortOrder -import org.apache.spark.sql.connector.expressions.aggregate.{Aggregation, Avg, GeneralAggregateFunc} +import org.apache.spark.sql.connector.expressions.aggregate.{Aggregation, Avg, Count, GeneralAggregateFunc, Sum} import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownAggregates, SupportsPushDownFilters, V1Scan} import org.apache.spark.sql.execution.datasources.DataSourceStrategy import org.apache.spark.sql.sources @@ -156,101 +156,106 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper { } } - val pushedAggregates = finalTranslatedAggregates.filter(r.pushAggregation) - if (pushedAggregates.isEmpty) { + if (finalTranslatedAggregates.isEmpty) { aggNode // return original plan node - } else if (!supportPartialAggPushDown(pushedAggregates.get) && - !r.supportCompletePushDown(pushedAggregates.get)) { + } else if (!r.supportCompletePushDown(finalTranslatedAggregates.get) && + !supportPartialAggPushDown(finalTranslatedAggregates.get)) { aggNode // return original plan node } else { - // No need to do column pruning because only the aggregate columns are used as - // DataSourceV2ScanRelation output columns. All the other columns are not - // included in the output. - val scan = sHolder.builder.build() - - // scalastyle:off - // use the group by columns and aggregate columns as the output columns - // e.g. TABLE t (c1 INT, c2 INT, c3 INT) - // SELECT min(c1), max(c1) FROM t GROUP BY c2; - // Use c2, min(c1), max(c1) as output for DataSourceV2ScanRelation - // We want to have the following logical plan: - // == Optimized Logical Plan == - // Aggregate [c2#10], [min(min(c1)#21) AS min(c1)#17, max(max(c1)#22) AS max(c1)#18] - // +- RelationV2[c2#10, min(c1)#21, max(c1)#22] - // scalastyle:on - val newOutput = scan.readSchema().toAttributes - assert(newOutput.length == groupingExpressions.length + finalAggregates.length) - val groupAttrs = normalizedGroupingExpressions.zip(newOutput).map { - case (a: Attribute, b: Attribute) => b.withExprId(a.exprId) - case (_, b) => b - } - val aggOutput = newOutput.drop(groupAttrs.length) - val output = groupAttrs ++ aggOutput - - logInfo( - s""" - |Pushing operators to ${sHolder.relation.name} - |Pushed Aggregate Functions: - | ${pushedAggregates.get.aggregateExpressions.mkString(", ")} - |Pushed Group by: - | ${pushedAggregates.get.groupByColumns.mkString(", ")} - |Output: ${output.mkString(", ")} - """.stripMargin) - - val wrappedScan = getWrappedScan(scan, sHolder, pushedAggregates) - val scanRelation = DataSourceV2ScanRelation(sHolder.relation, wrappedScan, output) - if (r.supportCompletePushDown(pushedAggregates.get)) { - val projectExpressions = resultExpressions.map { expr => - // TODO At present, only push down group by attribute is supported. - // In future, more attribute conversion is extended here. e.g. GetStructField - expr.transform { - case agg: AggregateExpression => - val ordinal = aggExprToOutputOrdinal(agg.canonicalized) - val child = - addCastIfNeeded(aggOutput(ordinal), agg.resultAttribute.dataType) - Alias(child, agg.resultAttribute.name)(agg.resultAttribute.exprId) - } - }.asInstanceOf[Seq[NamedExpression]] - Project(projectExpressions, scanRelation) + val pushedAggregates = finalTranslatedAggregates.filter(r.pushAggregation) + if (pushedAggregates.isEmpty) { + aggNode // return original plan node } else { - val plan = Aggregate( - output.take(groupingExpressions.length), finalResultExpressions, scanRelation) + // No need to do column pruning because only the aggregate columns are used as + // DataSourceV2ScanRelation output columns. All the other columns are not + // included in the output. + val scan = sHolder.builder.build() // scalastyle:off - // Change the optimized logical plan to reflect the pushed down aggregate + // use the group by columns and aggregate columns as the output columns // e.g. TABLE t (c1 INT, c2 INT, c3 INT) // SELECT min(c1), max(c1) FROM t GROUP BY c2; - // The original logical plan is - // Aggregate [c2#10],[min(c1#9) AS min(c1)#17, max(c1#9) AS max(c1)#18] - // +- RelationV2[c1#9, c2#10] ... - // - // After change the V2ScanRelation output to [c2#10, min(c1)#21, max(c1)#22] - // we have the following - // !Aggregate [c2#10], [min(c1#9) AS min(c1)#17, max(c1#9) AS max(c1)#18] - // +- RelationV2[c2#10, min(c1)#21, max(c1)#22] ... - // - // We want to change it to + // Use c2, min(c1), max(c1) as output for DataSourceV2ScanRelation + // We want to have the following logical plan: // == Optimized Logical Plan == // Aggregate [c2#10], [min(min(c1)#21) AS min(c1)#17, max(max(c1)#22) AS max(c1)#18] - // +- RelationV2[c2#10, min(c1)#21, max(c1)#22] ... + // +- RelationV2[c2#10, min(c1)#21, max(c1)#22] // scalastyle:on - plan.transformExpressions { - case agg: AggregateExpression => - val ordinal = aggExprToOutputOrdinal(agg.canonicalized) - val aggAttribute = aggOutput(ordinal) - val aggFunction: aggregate.AggregateFunction = - agg.aggregateFunction match { - case max: aggregate.Max => - max.copy(child = addCastIfNeeded(aggAttribute, max.child.dataType)) - case min: aggregate.Min => - min.copy(child = addCastIfNeeded(aggAttribute, min.child.dataType)) - case sum: aggregate.Sum => - sum.copy(child = addCastIfNeeded(aggAttribute, sum.child.dataType)) - case _: aggregate.Count => - aggregate.Sum(addCastIfNeeded(aggAttribute, LongType)) - case other => other - } - agg.copy(aggregateFunction = aggFunction) + val newOutput = scan.readSchema().toAttributes + assert(newOutput.length == groupingExpressions.length + finalAggregates.length) + val groupAttrs = normalizedGroupingExpressions.zip(newOutput).map { + case (a: Attribute, b: Attribute) => b.withExprId(a.exprId) + case (_, b) => b + } + val aggOutput = newOutput.drop(groupAttrs.length) + val output = groupAttrs ++ aggOutput + + logInfo( + s""" + |Pushing operators to ${sHolder.relation.name} + |Pushed Aggregate Functions: + | ${pushedAggregates.get.aggregateExpressions.mkString(", ")} + |Pushed Group by: + | ${pushedAggregates.get.groupByColumns.mkString(", ")} + |Output: ${output.mkString(", ")} + """.stripMargin) + + val wrappedScan = getWrappedScan(scan, sHolder, pushedAggregates) + val scanRelation = + DataSourceV2ScanRelation(sHolder.relation, wrappedScan, output) + if (r.supportCompletePushDown(pushedAggregates.get)) { + val projectExpressions = resultExpressions.map { expr => + // TODO At present, only push down group by attribute is supported. + // In future, more attribute conversion is extended here. e.g. GetStructField + expr.transform { + case agg: AggregateExpression => + val ordinal = aggExprToOutputOrdinal(agg.canonicalized) + val child = + addCastIfNeeded(aggOutput(ordinal), agg.resultAttribute.dataType) + Alias(child, agg.resultAttribute.name)(agg.resultAttribute.exprId) + } + }.asInstanceOf[Seq[NamedExpression]] + Project(projectExpressions, scanRelation) + } else { + val plan = Aggregate(output.take(groupingExpressions.length), + finalResultExpressions, scanRelation) + + // scalastyle:off + // Change the optimized logical plan to reflect the pushed down aggregate + // e.g. TABLE t (c1 INT, c2 INT, c3 INT) + // SELECT min(c1), max(c1) FROM t GROUP BY c2; + // The original logical plan is + // Aggregate [c2#10],[min(c1#9) AS min(c1)#17, max(c1#9) AS max(c1)#18] + // +- RelationV2[c1#9, c2#10] ... + // + // After change the V2ScanRelation output to [c2#10, min(c1)#21, max(c1)#22] + // we have the following + // !Aggregate [c2#10], [min(c1#9) AS min(c1)#17, max(c1#9) AS max(c1)#18] + // +- RelationV2[c2#10, min(c1)#21, max(c1)#22] ... + // + // We want to change it to + // == Optimized Logical Plan == + // Aggregate [c2#10], [min(min(c1)#21) AS min(c1)#17, max(max(c1)#22) AS max(c1)#18] + // +- RelationV2[c2#10, min(c1)#21, max(c1)#22] ... + // scalastyle:on + plan.transformExpressions { + case agg: AggregateExpression => + val ordinal = aggExprToOutputOrdinal(agg.canonicalized) + val aggAttribute = aggOutput(ordinal) + val aggFunction: aggregate.AggregateFunction = + agg.aggregateFunction match { + case max: aggregate.Max => + max.copy(child = addCastIfNeeded(aggAttribute, max.child.dataType)) + case min: aggregate.Min => + min.copy(child = addCastIfNeeded(aggAttribute, min.child.dataType)) + case sum: aggregate.Sum => + sum.copy(child = addCastIfNeeded(aggAttribute, sum.child.dataType)) + case _: aggregate.Count => + aggregate.Sum(addCastIfNeeded(aggAttribute, LongType)) + case other => other + } + agg.copy(aggregateFunction = aggFunction) + } } } } @@ -279,7 +284,14 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper { private def supportPartialAggPushDown(agg: Aggregation): Boolean = { // We don't know the agg buffer of `GeneralAggregateFunc`, so can't do partial agg push down. - agg.aggregateExpressions().forall(!_.isInstanceOf[GeneralAggregateFunc]) + // If `Sum`, `Count`, `Avg` with distinct, can't do partial agg push down. + agg.aggregateExpressions().exists { + case sum: Sum => !sum.isDistinct + case count: Count => !count.isDistinct + case avg: Avg => !avg.isDistinct + case _: GeneralAggregateFunc => false + case _ => true + } } private def addCastIfNeeded(expression: Expression, expectedDataType: DataType) = diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index 85ccf828873d1..17bd7f7a6d5bc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Sort} import org.apache.spark.sql.connector.expressions.{FieldReference, NullOrdering, SortDirection, SortValue} import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanRelation, V1ScanWrapper} import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog -import org.apache.spark.sql.functions.{avg, count, lit, sum, udf} +import org.apache.spark.sql.functions.{avg, count, count_distinct, lit, sum, udf} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.util.Utils @@ -506,6 +506,18 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkAnswer(df, Seq(Row(3))) } + test("scan with aggregate push-down: cannot partial push down COUNT(DISTINCT col)") { + val df = spark.read + .option("partitionColumn", "dept") + .option("lowerBound", "0") + .option("upperBound", "2") + .option("numPartitions", "2") + .table("h2.test.employee") + .agg(count_distinct($"DEPT")) + checkAggregateRemoved(df, false) + checkAnswer(df, Seq(Row(3))) + } + test("scan with aggregate push-down: SUM without filer and group by") { val df = sql("SELECT SUM(SALARY) FROM h2.test.employee") checkAggregateRemoved(df) From ebaee4fad1a09d5e19d74ced940ef09f2a416f71 Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Thu, 17 Mar 2022 16:58:22 +0800 Subject: [PATCH 006/535] [SPARK-37995][SQL] PlanAdaptiveDynamicPruningFilters should use prepareExecutedPlan rather than createSparkPlan to re-plan subquery ### What changes were proposed in this pull request? Use exists adaptive execution context to re-compile subquery. ### Why are the changes needed? If a subquery which is inferred by dpp contains a nested subquery, AQE can not compile it correctly. the added test will fail before this pr: ```java java.lang.ClassCastException: org.apache.spark.sql.catalyst.plans.logical.Aggregate cannot be cast to org.apache.spark.sql.execution.SparkPlan at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) at scala.collection.Iterator.foreach(Iterator.scala:943) at scala.collection.Iterator.foreach$(Iterator.scala:943) at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) at scala.collection.IterableLike.foreach(IterableLike.scala:74) at scala.collection.IterableLike.foreach$(IterableLike.scala:73) at scala.collection.AbstractIterable.foreach(Iterable.scala:56) at scala.collection.TraversableLike.map(TraversableLike.scala:286) at scala.collection.TraversableLike.map$(TraversableLike.scala:279) at scala.collection.AbstractTraversable.map(Traversable.scala:108) at org.apache.spark.sql.execution.SparkPlanInfo$.fromSparkPlan(SparkPlanInfo.scala:75) at org.apache.spark.sql.execution.SparkPlanInfo$.$anonfun$fromSparkPlan$3(SparkPlanInfo.scala:75) ``` ### Does this PR introduce _any_ user-facing change? yes, bug fix ### How was this patch tested? add test Closes #35849 from ulysses-you/SPARK-37995. Authored-by: ulysses-you Signed-off-by: Wenchen Fan (cherry picked from commit 3afc4fb08b01597a5677ce706731639c687fd2dd) Signed-off-by: Wenchen Fan --- .../spark/sql/execution/QueryExecution.scala | 13 ++++++++++++ .../PlanAdaptiveDynamicPruningFilters.scala | 11 ++++------ .../sql/DynamicPartitionPruningSuite.scala | 20 ++++++++++++++++++- 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index 1b089943a680e..9bf8de5ea6c4b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -485,6 +485,19 @@ object QueryExecution { prepareExecutedPlan(spark, sparkPlan) } + /** + * Prepare the [[SparkPlan]] for execution using exists adaptive execution context. + * This method is only called by [[PlanAdaptiveDynamicPruningFilters]]. + */ + def prepareExecutedPlan( + session: SparkSession, + plan: LogicalPlan, + context: AdaptiveExecutionContext): SparkPlan = { + val sparkPlan = createSparkPlan(session, session.sessionState.planner, plan.clone()) + val preparationRules = preparations(session, Option(InsertAdaptiveSparkPlan(context)), true) + prepareForExecution(preparationRules, sparkPlan.clone()) + } + private val currentCteMap = new ThreadLocal[mutable.HashMap[Long, CTERelationDef]]() def cteMap: mutable.HashMap[Long, CTERelationDef] = currentCteMap.get() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveDynamicPruningFilters.scala index 1cc39df0107d8..9a780c11eefab 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveDynamicPruningFilters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveDynamicPruningFilters.scala @@ -71,13 +71,10 @@ case class PlanAdaptiveDynamicPruningFilters( val aggregate = Aggregate(Seq(alias), Seq(alias), buildPlan) val session = adaptivePlan.context.session - val planner = session.sessionState.planner - // Here we can't call the QueryExecution.prepareExecutedPlan() method to - // get the sparkPlan as Non-AQE use case, which will cause the physical - // plan optimization rules be inserted twice, once in AQE framework and - // another in prepareExecutedPlan() method. - val sparkPlan = QueryExecution.createSparkPlan(session, planner, aggregate) - val newAdaptivePlan = adaptivePlan.copy(inputPlan = sparkPlan) + val sparkPlan = QueryExecution.prepareExecutedPlan( + session, aggregate, adaptivePlan.context) + assert(sparkPlan.isInstanceOf[AdaptiveSparkPlanExec]) + val newAdaptivePlan = sparkPlan.asInstanceOf[AdaptiveSparkPlanExec] val values = SubqueryExec(name, newAdaptivePlan) DynamicPruningExpression(InSubqueryExec(value, values, exprId)) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala index 61885169ece4c..f74e0474eae1e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala @@ -1664,7 +1664,25 @@ class DynamicPartitionPruningV1SuiteAEOff extends DynamicPartitionPruningV1Suite with DisableAdaptiveExecutionSuite class DynamicPartitionPruningV1SuiteAEOn extends DynamicPartitionPruningV1Suite - with EnableAdaptiveExecutionSuite + with EnableAdaptiveExecutionSuite { + + test("SPARK-37995: PlanAdaptiveDynamicPruningFilters should use prepareExecutedPlan " + + "rather than createSparkPlan to re-plan subquery") { + withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true", + SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false", + SQLConf.EXCHANGE_REUSE_ENABLED.key -> "false") { + val df = sql( + """ + |SELECT f.date_id, f.store_id FROM fact_sk f + |JOIN dim_store s ON f.store_id = s.store_id AND s.country = 'NL' + |WHERE s.state_province != (SELECT max(state_province) FROM dim_stats) + """.stripMargin) + + checkPartitionPruningPredicate(df, true, false) + checkAnswer(df, Row(1000, 1) :: Row(1010, 2) :: Row(1020, 2) :: Nil) + } + } +} abstract class DynamicPartitionPruningV2Suite extends DynamicPartitionPruningDataSourceSuiteBase { override protected def runAnalyzeColumnCommands: Boolean = false From 1824c69618dc24dc385d74560ea99961d0315c48 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 17 Mar 2022 12:33:00 +0300 Subject: [PATCH 007/535] [SPARK-38566][SQL][3.3] Revert the parser changes for DEFAULT column support ### What changes were proposed in this pull request? Revert the commit https://github.com/apache/spark/commit/e21cb62d02c85a66771822cdd49c49dbb3e44502 from `branch-3.3`. ### Why are the changes needed? See discussion in the PR https://github.com/apache/spark/pull/35690. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By existing test suites. Closes #35885 from MaxGekk/revert-default-column-support-3.3. Authored-by: Max Gekk Signed-off-by: Max Gekk --- docs/sql-ref-ansi-compliance.md | 1 - .../spark/sql/catalyst/parser/SqlBaseLexer.g4 | 1 - .../sql/catalyst/parser/SqlBaseParser.g4 | 22 +------ .../sql/catalyst/parser/AstBuilder.scala | 61 +------------------ .../spark/sql/errors/QueryParsingErrors.scala | 4 -- .../sql/catalyst/parser/DDLParserSuite.scala | 53 ---------------- .../spark/sql/execution/SparkSqlParser.scala | 2 +- 7 files changed, 6 insertions(+), 138 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index ccfc60122d31c..0769089f4dabe 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -395,7 +395,6 @@ Below is a list of all the keywords in Spark SQL. |DAY|non-reserved|non-reserved|non-reserved| |DAYOFYEAR|non-reserved|non-reserved|non-reserved| |DBPROPERTIES|non-reserved|non-reserved|non-reserved| -|DEFAULT|non-reserved|non-reserved|non-reserved| |DEFINED|non-reserved|non-reserved|non-reserved| |DELETE|non-reserved|non-reserved|reserved| |DELIMITED|non-reserved|non-reserved|non-reserved| diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 index e84d4fa45eb99..6c731bb02bc39 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 @@ -146,7 +146,6 @@ DATABASES: 'DATABASES'; DATEADD: 'DATEADD'; DATEDIFF: 'DATEDIFF'; DBPROPERTIES: 'DBPROPERTIES'; -DEFAULT: 'DEFAULT'; DEFINED: 'DEFINED'; DELETE: 'DELETE'; DELIMITED: 'DELIMITED'; diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index fb3bccacaf94b..fe81f0ccb8a48 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -83,7 +83,7 @@ statement (RESTRICT | CASCADE)? #dropNamespace | SHOW namespaces ((FROM | IN) multipartIdentifier)? (LIKE? pattern=STRING)? #showNamespaces - | createTableHeader (LEFT_PAREN createOrReplaceTableColTypeList RIGHT_PAREN)? tableProvider? + | createTableHeader (LEFT_PAREN colTypeList RIGHT_PAREN)? tableProvider? createTableClauses (AS? query)? #createTable | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier @@ -93,7 +93,7 @@ statement createFileFormat | locationSpec | (TBLPROPERTIES tableProps=propertyList))* #createTableLike - | replaceTableHeader (LEFT_PAREN createOrReplaceTableColTypeList RIGHT_PAREN)? tableProvider? + | replaceTableHeader (LEFT_PAREN colTypeList RIGHT_PAREN)? tableProvider? createTableClauses (AS? query)? #replaceTable | ANALYZE TABLE multipartIdentifier partitionSpec? COMPUTE STATISTICS @@ -917,11 +917,7 @@ qualifiedColTypeWithPositionList ; qualifiedColTypeWithPosition - : name=multipartIdentifier dataType (NOT NULL)? defaultExpression? commentSpec? colPosition? - ; - -defaultExpression - : DEFAULT expression + : name=multipartIdentifier dataType (NOT NULL)? commentSpec? colPosition? ; colTypeList @@ -932,14 +928,6 @@ colType : colName=errorCapturingIdentifier dataType (NOT NULL)? commentSpec? ; -createOrReplaceTableColTypeList - : createOrReplaceTableColType (COMMA createOrReplaceTableColType)* - ; - -createOrReplaceTableColType - : colName=errorCapturingIdentifier dataType (NOT NULL)? defaultExpression? commentSpec? - ; - complexColTypeList : complexColType (COMMA complexColType)* ; @@ -1046,8 +1034,6 @@ alterColumnAction | commentSpec | colPosition | setOrDrop=(SET | DROP) NOT NULL - | SET defaultExpression - | dropDefault=DROP DEFAULT ; @@ -1105,7 +1091,6 @@ ansiNonReserved | DAY | DAYOFYEAR | DBPROPERTIES - | DEFAULT | DEFINED | DELETE | DELIMITED @@ -1361,7 +1346,6 @@ nonReserved | DAY | DAYOFYEAR | DBPROPERTIES - | DEFAULT | DEFINED | DELETE | DELIMITED diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 5eb72af6b2f09..3c8f0770e19af 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2728,13 +2728,6 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit StructType(Option(ctx).toSeq.flatMap(visitColTypeList)) } - /** - * Create top level table schema. - */ - protected def createSchema(ctx: CreateOrReplaceTableColTypeListContext): StructType = { - StructType(Option(ctx).toSeq.flatMap(visitCreateOrReplaceTableColTypeList)) - } - /** * Create a [[StructType]] from a number of column definitions. */ @@ -2761,41 +2754,6 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit metadata = builder.build()) } - /** - * Create a [[StructType]] from a number of CREATE TABLE column definitions. - */ - override def visitCreateOrReplaceTableColTypeList( - ctx: CreateOrReplaceTableColTypeListContext): Seq[StructField] = withOrigin(ctx) { - ctx.createOrReplaceTableColType().asScala.map(visitCreateOrReplaceTableColType).toSeq - } - - /** - * Create a top level [[StructField]] from a CREATE TABLE column definition. - */ - override def visitCreateOrReplaceTableColType( - ctx: CreateOrReplaceTableColTypeContext): StructField = withOrigin(ctx) { - import ctx._ - - val builder = new MetadataBuilder - // Add comment to metadata - Option(commentSpec()).map(visitCommentSpec).foreach { - builder.putString("comment", _) - } - - // Process the 'DEFAULT expression' clause in the column definition, if any. - val name: String = colName.getText - val defaultExpr = Option(ctx.defaultExpression()).map(visitDefaultExpression) - if (defaultExpr.isDefined) { - throw QueryParsingErrors.defaultColumnNotImplementedYetError(ctx) - } - - StructField( - name = name, - dataType = typedVisit[DataType](ctx.dataType), - nullable = NULL == null, - metadata = builder.build()) - } - /** * Create a [[StructType]] from a sequence of [[StructField]]s. */ @@ -3499,8 +3457,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = withOrigin(ctx) { val (table, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader) - val columns = Option(ctx.createOrReplaceTableColTypeList()) - .map(visitCreateOrReplaceTableColTypeList).getOrElse(Nil) + val columns = Option(ctx.colTypeList()).map(visitColTypeList).getOrElse(Nil) val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText) val (partTransforms, partCols, bucketSpec, properties, options, location, comment, serdeInfo) = visitCreateTableClauses(ctx.createTableClauses()) @@ -3579,8 +3536,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit val orCreate = ctx.replaceTableHeader().CREATE() != null val (partTransforms, partCols, bucketSpec, properties, options, location, comment, serdeInfo) = visitCreateTableClauses(ctx.createTableClauses()) - val columns = Option(ctx.createOrReplaceTableColTypeList()) - .map(visitCreateOrReplaceTableColTypeList).getOrElse(Nil) + val columns = Option(ctx.colTypeList()).map(visitColTypeList).getOrElse(Nil) val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText) if (provider.isDefined && serdeInfo.isDefined) { @@ -3699,10 +3655,6 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit override def visitQualifiedColTypeWithPosition( ctx: QualifiedColTypeWithPositionContext): QualifiedColType = withOrigin(ctx) { val name = typedVisit[Seq[String]](ctx.name) - val defaultExpr = Option(ctx.defaultExpression()).map(visitDefaultExpression) - if (defaultExpr.isDefined) { - throw QueryParsingErrors.defaultColumnNotImplementedYetError(ctx) - } QualifiedColType( path = if (name.length > 1) Some(UnresolvedFieldName(name.init)) else None, colName = name.last, @@ -3791,12 +3743,6 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit } else { None } - if (action.defaultExpression != null) { - throw QueryParsingErrors.defaultColumnNotImplementedYetError(ctx) - } - if (action.dropDefault != null) { - throw QueryParsingErrors.defaultColumnNotImplementedYetError(ctx) - } assert(Seq(dataType, nullable, comment, position).count(_.nonEmpty) == 1) @@ -3865,9 +3811,6 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit throw QueryParsingErrors.operationInHiveStyleCommandUnsupportedError( "Replacing with a nested column", "REPLACE COLUMNS", ctx) } - if (Option(colType.defaultExpression()).map(visitDefaultExpression).isDefined) { - throw QueryParsingErrors.defaultColumnNotImplementedYetError(ctx) - } col }.toSeq ) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index c03b1b45f644d..c09295884aa24 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -433,8 +433,4 @@ object QueryParsingErrors { new ParseException( s"DROP TEMPORARY FUNCTION requires a single part name but got: ${name.quoted}", ctx) } - - def defaultColumnNotImplementedYetError(ctx: ParserRuleContext): Throwable = { - new ParseException("Support for DEFAULT column values is not implemented yet", ctx) - } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index a339e6d33f5f3..507b17bbb5636 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -2238,57 +2238,4 @@ class DDLParserSuite extends AnalysisTest { comparePlans(parsePlan(timestampTypeSql), insertPartitionPlan(timestamp)) comparePlans(parsePlan(binaryTypeSql), insertPartitionPlan(binaryStr)) } - - test("SPARK-38335: Implement parser support for DEFAULT values for columns in tables") { - // The following commands will support DEFAULT columns, but this has not been implemented yet. - for (sql <- Seq( - "ALTER TABLE t1 ADD COLUMN x int NOT NULL DEFAULT 42", - "ALTER TABLE t1 ALTER COLUMN a.b.c SET DEFAULT 42", - "ALTER TABLE t1 ALTER COLUMN a.b.c DROP DEFAULT", - "ALTER TABLE t1 REPLACE COLUMNS (x STRING DEFAULT 42)", - "CREATE TABLE my_tab(a INT COMMENT 'test', b STRING NOT NULL DEFAULT \"abc\") USING parquet", - "REPLACE TABLE my_tab(a INT COMMENT 'test', b STRING NOT NULL DEFAULT \"xyz\") USING parquet" - )) { - val exc = intercept[ParseException] { - parsePlan(sql); - } - assert(exc.getMessage.contains("Support for DEFAULT column values is not implemented yet")); - } - // In each of the following cases, the DEFAULT reference parses as an unresolved attribute - // reference. We can handle these cases after the parsing stage, at later phases of analysis. - comparePlans(parsePlan("VALUES (1, 2, DEFAULT) AS val"), - SubqueryAlias("val", - UnresolvedInlineTable(Seq("col1", "col2", "col3"), Seq(Seq(Literal(1), Literal(2), - UnresolvedAttribute("DEFAULT")))))) - comparePlans(parsePlan( - "INSERT INTO t PARTITION(part = date'2019-01-02') VALUES ('a', DEFAULT)"), - InsertIntoStatement( - UnresolvedRelation(Seq("t")), - Map("part" -> Some("2019-01-02")), - userSpecifiedCols = Seq.empty[String], - query = UnresolvedInlineTable(Seq("col1", "col2"), Seq(Seq(Literal("a"), - UnresolvedAttribute("DEFAULT")))), - overwrite = false, ifPartitionNotExists = false)) - parseCompare( - """ - |MERGE INTO testcat1.ns1.ns2.tbl AS target - |USING testcat2.ns1.ns2.tbl AS source - |ON target.col1 = source.col1 - |WHEN MATCHED AND (target.col2='delete') THEN DELETE - |WHEN MATCHED AND (target.col2='update') THEN UPDATE SET target.col2 = DEFAULT - |WHEN NOT MATCHED AND (target.col2='insert') - |THEN INSERT (target.col1, target.col2) VALUES (source.col1, DEFAULT) - """.stripMargin, - MergeIntoTable( - SubqueryAlias("target", UnresolvedRelation(Seq("testcat1", "ns1", "ns2", "tbl"))), - SubqueryAlias("source", UnresolvedRelation(Seq("testcat2", "ns1", "ns2", "tbl"))), - EqualTo(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")), - Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("delete")))), - UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("update"))), - Seq(Assignment(UnresolvedAttribute("target.col2"), - UnresolvedAttribute("DEFAULT"))))), - Seq(InsertAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("insert"))), - Seq(Assignment(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")), - Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("DEFAULT"))))))) - } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index a4e72e04507b5..fed02dddecf78 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -318,7 +318,7 @@ class SparkSqlAstBuilder extends AstBuilder { val (_, _, _, _, options, location, _, _) = visitCreateTableClauses(ctx.createTableClauses()) val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText).getOrElse( throw QueryParsingErrors.createTempTableNotSpecifyProviderError(ctx)) - val schema = Option(ctx.createOrReplaceTableColTypeList()).map(createSchema) + val schema = Option(ctx.colTypeList()).map(createSchema) logWarning(s"CREATE TEMPORARY TABLE ... USING ... is deprecated, please use " + "CREATE TEMPORARY VIEW ... USING ... instead") From 2ee57055fd917a1829adce06c2504db0a68e3ba2 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Thu, 17 Mar 2022 18:43:42 +0900 Subject: [PATCH 008/535] [SPARK-38586][INFRA] Trigger notifying workflow in branch-3.3 and other future branches This PR fixes `Notify test workflow` workflow to be triggered against PRs. In fact, we don't need to check if the branch is `master` since the event triggers the workflow that's found in the commit SHA. To link builds to the CI status in PRs. No, dev-only. Will be checked after it gets merged - it's pretty straightforward. Closes #35891 from HyukjinKwon/SPARK-38575-follow. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 5c4930ab96360fc8c9ec8f15316e36eb7d516560) Signed-off-by: Hyukjin Kwon --- .github/workflows/notify_test_workflow.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/notify_test_workflow.yml b/.github/workflows/notify_test_workflow.yml index 04e7ab8309025..eb0da84a797c3 100644 --- a/.github/workflows/notify_test_workflow.yml +++ b/.github/workflows/notify_test_workflow.yml @@ -34,7 +34,6 @@ jobs: steps: - name: "Notify test workflow" uses: actions/github-script@f05a81df23035049204b043b50c3322045ce7eb3 # pin@v3 - if: ${{ github.base_ref == 'master' }} with: github-token: ${{ secrets.GITHUB_TOKEN }} script: | From cf0afa8619544ab6008fcec8a25891c2ff43625a Mon Sep 17 00:00:00 2001 From: Thomas Graves Date: Thu, 17 Mar 2022 12:54:50 -0700 Subject: [PATCH 009/535] Revert "[SPARK-38194][YARN][MESOS][K8S] Make memory overhead factor configurable" ### What changes were proposed in this pull request? This reverts commit 8405ec352dbed6a3199fc2af3c60fae7186d15b5. ### Why are the changes needed? The original PR broke K8s integration tests so lets revert in branch-3.3 for now and fix on master. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CI. K8s IT is recovered like the following. ``` [info] KubernetesSuite: [info] - Run SparkPi with no resources (9 seconds, 832 milliseconds) [info] - Run SparkPi with no resources & statefulset allocation (9 seconds, 715 milliseconds) [info] - Run SparkPi with a very long application name. (8 seconds, 672 milliseconds) [info] - Use SparkLauncher.NO_RESOURCE (9 seconds, 614 milliseconds) [info] - Run SparkPi with a master URL without a scheme. (9 seconds, 616 milliseconds) [info] - Run SparkPi with an argument. (8 seconds, 633 milliseconds) [info] - Run SparkPi with custom labels, annotations, and environment variables. (8 seconds, 631 milliseconds) [info] - All pods have the same service account by default (8 seconds, 625 milliseconds) [info] - Run extraJVMOptions check on driver (4 seconds, 639 milliseconds) [info] - Run SparkRemoteFileTest using a remote data file (8 seconds, 699 milliseconds) [info] - Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j2.properties (14 seconds, 31 milliseconds) [info] - Run SparkPi with env and mount secrets. (17 seconds, 878 milliseconds) [info] - Run PySpark on simple pi.py example (9 seconds, 642 milliseconds) [info] - Run PySpark to test a pyfiles example (11 seconds, 883 milliseconds) [info] - Run PySpark with memory customization (9 seconds, 602 milliseconds) [info] - Run in client mode. (6 seconds, 303 milliseconds) [info] - Start pod creation from template (8 seconds, 864 milliseconds) [info] - SPARK-38398: Schedule pod creation from template (8 seconds, 665 milliseconds) [info] - Test basic decommissioning (41 seconds, 74 milliseconds) [info] - Test basic decommissioning with shuffle cleanup (41 seconds, 318 milliseconds) [info] - Test decommissioning with dynamic allocation & shuffle cleanups (2 minutes, 40 seconds) [info] - Test decommissioning timeouts (41 seconds, 892 milliseconds) [info] - SPARK-37576: Rolling decommissioning (1 minute, 7 seconds) [info] - Run SparkR on simple dataframe.R example (11 seconds, 643 milliseconds) [info] VolcanoSuite: [info] - Run SparkPi with no resources (9 seconds, 585 milliseconds) [info] - Run SparkPi with no resources & statefulset allocation (10 seconds, 607 milliseconds) [info] - Run SparkPi with a very long application name. (9 seconds, 636 milliseconds) [info] - Use SparkLauncher.NO_RESOURCE (10 seconds, 681 milliseconds) [info] - Run SparkPi with a master URL without a scheme. (10 seconds, 628 milliseconds) [info] - Run SparkPi with an argument. (9 seconds, 638 milliseconds) [info] - Run SparkPi with custom labels, annotations, and environment variables. (9 seconds, 626 milliseconds) [info] - All pods have the same service account by default (10 seconds, 615 milliseconds) [info] - Run extraJVMOptions check on driver (4 seconds, 590 milliseconds) [info] - Run SparkRemoteFileTest using a remote data file (9 seconds, 660 milliseconds) [info] - Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j2.properties (15 seconds, 277 milliseconds) [info] - Run SparkPi with env and mount secrets. (19 seconds, 300 milliseconds) [info] - Run PySpark on simple pi.py example (10 seconds, 641 milliseconds) [info] - Run PySpark to test a pyfiles example (12 seconds, 656 milliseconds) [info] - Run PySpark with memory customization (10 seconds, 599 milliseconds) [info] - Run in client mode. (7 seconds, 258 milliseconds) [info] - Start pod creation from template (10 seconds, 664 milliseconds) [info] - SPARK-38398: Schedule pod creation from template (10 seconds, 891 milliseconds) [info] - Test basic decommissioning (42 seconds, 85 milliseconds) [info] - Test basic decommissioning with shuffle cleanup (42 seconds, 384 milliseconds) [info] - Test decommissioning with dynamic allocation & shuffle cleanups (2 minutes, 42 seconds) [info] - Test decommissioning timeouts (42 seconds, 725 milliseconds) [info] - SPARK-37576: Rolling decommissioning (1 minute, 8 seconds) [info] - Run SparkR on simple dataframe.R example (12 seconds, 641 milliseconds) [info] - Run SparkPi with volcano scheduler (10 seconds, 652 milliseconds) [info] - SPARK-38187: Run SparkPi Jobs with minCPU (27 seconds, 590 milliseconds) [info] - SPARK-38187: Run SparkPi Jobs with minMemory (29 seconds, 600 milliseconds) [info] - SPARK-38188: Run SparkPi jobs with 2 queues (only 1 enabled) (13 seconds, 228 milliseconds) [info] - SPARK-38188: Run SparkPi jobs with 2 queues (all enabled) (22 seconds, 329 milliseconds) [info] - SPARK-38423: Run driver job to validate priority order (15 seconds, 367 milliseconds) [info] Run completed in 28 minutes, 52 seconds. [info] Total number of tests run: 54 [info] Suites: completed 2, aborted 0 [info] Tests: succeeded 54, failed 0, canceled 0, ignored 0, pending 0 [info] All tests passed. [success] Total time: 1881 s (31:21), completed Mar 17, 2022 11:55:25 AM ``` Closes #35900 from tgravescs/revertoverhead. Authored-by: Thomas Graves Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/SparkConf.scala | 4 +- .../spark/internal/config/package.scala | 28 --------- docs/configuration.md | 30 +-------- docs/running-on-kubernetes.md | 9 +++ .../k8s/features/BasicDriverFeatureStep.scala | 13 ++-- .../features/BasicExecutorFeatureStep.scala | 7 +-- .../BasicDriverFeatureStepSuite.scala | 63 ++----------------- .../BasicExecutorFeatureStepSuite.scala | 54 ---------------- .../deploy/rest/mesos/MesosRestServer.scala | 5 +- .../cluster/mesos/MesosSchedulerUtils.scala | 9 ++- .../rest/mesos/MesosRestServerSuite.scala | 8 +-- .../org/apache/spark/deploy/yarn/Client.scala | 14 +---- .../spark/deploy/yarn/YarnAllocator.scala | 5 +- .../deploy/yarn/YarnSparkHadoopUtil.scala | 5 +- .../deploy/yarn/YarnAllocatorSuite.scala | 29 --------- 15 files changed, 35 insertions(+), 248 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index cf121749b7348..5f37a1abb1909 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -636,9 +636,7 @@ private[spark] object SparkConf extends Logging { DeprecatedConfig("spark.blacklist.killBlacklistedExecutors", "3.1.0", "Please use spark.excludeOnFailure.killExcludedExecutors"), DeprecatedConfig("spark.yarn.blacklist.executor.launch.blacklisting.enabled", "3.1.0", - "Please use spark.yarn.executor.launch.excludeOnFailure.enabled"), - DeprecatedConfig("spark.kubernetes.memoryOverheadFactor", "3.3.0", - "Please use spark.driver.memoryOverheadFactor and spark.executor.memoryOverheadFactor") + "Please use spark.yarn.executor.launch.excludeOnFailure.enabled") ) Map(configs.map { cfg => (cfg.key -> cfg) } : _*) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index ffe4501248f43..dbec61a1fdb76 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -105,22 +105,6 @@ package object config { .bytesConf(ByteUnit.MiB) .createOptional - private[spark] val DRIVER_MEMORY_OVERHEAD_FACTOR = - ConfigBuilder("spark.driver.memoryOverheadFactor") - .doc("Fraction of driver memory to be allocated as additional non-heap memory per driver " + - "process in cluster mode. This is memory that accounts for things like VM overheads, " + - "interned strings, other native overheads, etc. This tends to grow with the container " + - "size. This value defaults to 0.10 except for Kubernetes non-JVM jobs, which defaults to " + - "0.40. This is done as non-JVM tasks need more non-JVM heap space and such tasks " + - "commonly fail with \"Memory Overhead Exceeded\" errors. This preempts this error " + - "with a higher default. This value is ignored if spark.driver.memoryOverhead is set " + - "directly.") - .version("3.3.0") - .doubleConf - .checkValue(factor => factor > 0, - "Ensure that memory overhead is a double greater than 0") - .createWithDefault(0.1) - private[spark] val DRIVER_LOG_DFS_DIR = ConfigBuilder("spark.driver.log.dfsDir").version("3.0.0").stringConf.createOptional @@ -331,18 +315,6 @@ package object config { .bytesConf(ByteUnit.MiB) .createOptional - private[spark] val EXECUTOR_MEMORY_OVERHEAD_FACTOR = - ConfigBuilder("spark.executor.memoryOverheadFactor") - .doc("Fraction of executor memory to be allocated as additional non-heap memory per " + - "executor process. This is memory that accounts for things like VM overheads, " + - "interned strings, other native overheads, etc. This tends to grow with the container " + - "size. This value is ignored if spark.executor.memoryOverhead is set directly.") - .version("3.3.0") - .doubleConf - .checkValue(factor => factor > 0, - "Ensure that memory overhead is a double greater than 0") - .createWithDefault(0.1) - private[spark] val CORES_MAX = ConfigBuilder("spark.cores.max") .doc("When running on a standalone deploy cluster or a Mesos cluster in coarse-grained " + "sharing mode, the maximum amount of CPU cores to request for the application from across " + diff --git a/docs/configuration.md b/docs/configuration.md index a2e6797b55e2f..ae3f422f34b3a 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -183,7 +183,7 @@ of the most common options to set are: spark.driver.memoryOverhead - driverMemory * spark.driver.memoryOverheadFactor, with minimum of 384 + driverMemory * 0.10, with minimum of 384 Amount of non-heap memory to be allocated per driver process in cluster mode, in MiB unless otherwise specified. This is memory that accounts for things like VM overheads, interned strings, @@ -198,21 +198,6 @@ of the most common options to set are: 2.3.0 - - spark.driver.memoryOverheadFactor - 0.10 - - Fraction of driver memory to be allocated as additional non-heap memory per driver process in cluster mode. - This is memory that accounts for things like VM overheads, interned strings, - other native overheads, etc. This tends to grow with the container size. - This value defaults to 0.10 except for Kubernetes non-JVM jobs, which defaults to - 0.40. This is done as non-JVM tasks need more non-JVM heap space and such tasks - commonly fail with "Memory Overhead Exceeded" errors. This preempts this error - with a higher default. - This value is ignored if spark.driver.memoryOverhead is set directly. - - 3.3.0 - spark.driver.resource.{resourceName}.amount 0 @@ -287,7 +272,7 @@ of the most common options to set are: spark.executor.memoryOverhead - executorMemory * spark.executor.memoryOverheadFactor, with minimum of 384 + executorMemory * 0.10, with minimum of 384 Amount of additional memory to be allocated per executor process, in MiB unless otherwise specified. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. @@ -302,17 +287,6 @@ of the most common options to set are: 2.3.0 - - spark.executor.memoryOverheadFactor - 0.10 - - Fraction of executor memory to be allocated as additional non-heap memory per executor process. - This is memory that accounts for things like VM overheads, interned strings, - other native overheads, etc. This tends to grow with the container size. - This value is ignored if spark.executor.memoryOverhead is set directly. - - 3.3.0 - spark.executor.resource.{resourceName}.amount 0 diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index de37e22cc78d7..a5da80a68d32d 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -1137,6 +1137,15 @@ See the [configuration page](configuration.html) for information on Spark config 3.0.0 + + spark.kubernetes.memoryOverheadFactor + 0.1 + + This sets the Memory Overhead Factor that will allocate memory to non-JVM memory, which includes off-heap memory allocations, non-JVM tasks, various systems processes, and tmpfs-based local directories when spark.kubernetes.local.dirs.tmpfs is true. For JVM-based jobs this value will default to 0.10 and 0.40 for non-JVM jobs. + This is done as non-JVM tasks need more non-JVM heap space and such tasks commonly fail with "Memory Overhead Exceeded" errors. This preempts this error with a higher default. + + 2.4.0 + spark.kubernetes.pyspark.pythonVersion "3" diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala index 97151494fc60c..3b2b5612566a1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala @@ -53,23 +53,18 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf) // Memory settings private val driverMemoryMiB = conf.get(DRIVER_MEMORY) - private val memoryOverheadFactor = if (conf.contains(DRIVER_MEMORY_OVERHEAD_FACTOR)) { - conf.get(DRIVER_MEMORY_OVERHEAD_FACTOR) - } else { - conf.get(MEMORY_OVERHEAD_FACTOR) - } // The memory overhead factor to use. If the user has not set it, then use a different // value for non-JVM apps. This value is propagated to executors. private val overheadFactor = if (conf.mainAppResource.isInstanceOf[NonJVMResource]) { - if (conf.contains(MEMORY_OVERHEAD_FACTOR) || conf.contains(DRIVER_MEMORY_OVERHEAD_FACTOR)) { - memoryOverheadFactor + if (conf.contains(MEMORY_OVERHEAD_FACTOR)) { + conf.get(MEMORY_OVERHEAD_FACTOR) } else { NON_JVM_MEMORY_OVERHEAD_FACTOR } } else { - memoryOverheadFactor + conf.get(MEMORY_OVERHEAD_FACTOR) } private val memoryOverheadMiB = conf @@ -169,7 +164,7 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf) KUBERNETES_DRIVER_POD_NAME.key -> driverPodName, "spark.app.id" -> conf.appId, KUBERNETES_DRIVER_SUBMIT_CHECK.key -> "true", - DRIVER_MEMORY_OVERHEAD_FACTOR.key -> overheadFactor.toString) + MEMORY_OVERHEAD_FACTOR.key -> overheadFactor.toString) // try upload local, resolvable files to a hadoop compatible file system Seq(JARS, FILES, ARCHIVES, SUBMIT_PYTHON_FILES).foreach { key => val uris = conf.get(key).filter(uri => KubernetesUtils.isLocalAndResolvable(uri)) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala index 15c69ad487f5f..a7625194bd6e6 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala @@ -59,16 +59,11 @@ private[spark] class BasicExecutorFeatureStep( private val isDefaultProfile = resourceProfile.id == ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID private val isPythonApp = kubernetesConf.get(APP_RESOURCE_TYPE) == Some(APP_RESOURCE_TYPE_PYTHON) private val disableConfigMap = kubernetesConf.get(KUBERNETES_EXECUTOR_DISABLE_CONFIGMAP) - private val memoryOverheadFactor = if (kubernetesConf.contains(EXECUTOR_MEMORY_OVERHEAD_FACTOR)) { - kubernetesConf.get(EXECUTOR_MEMORY_OVERHEAD_FACTOR) - } else { - kubernetesConf.get(MEMORY_OVERHEAD_FACTOR) - } val execResources = ResourceProfile.getResourcesForClusterManager( resourceProfile.id, resourceProfile.executorResources, - memoryOverheadFactor, + kubernetesConf.get(MEMORY_OVERHEAD_FACTOR), kubernetesConf.sparkConf, isPythonApp, Map.empty) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala index d45f5f97da213..bf7fbcc912f54 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala @@ -134,7 +134,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite { KUBERNETES_DRIVER_POD_NAME.key -> "spark-driver-pod", "spark.app.id" -> KubernetesTestConf.APP_ID, "spark.kubernetes.submitInDriver" -> "true", - DRIVER_MEMORY_OVERHEAD_FACTOR.key -> DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get.toString) + MEMORY_OVERHEAD_FACTOR.key -> MEMORY_OVERHEAD_FACTOR.defaultValue.get.toString) assert(featureStep.getAdditionalPodSystemProperties() === expectedSparkConf) } @@ -193,7 +193,7 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite { // Memory overhead tests. Tuples are: // test name, main resource, overhead factor, expected factor Seq( - ("java", JavaMainAppResource(None), None, DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get), + ("java", JavaMainAppResource(None), None, MEMORY_OVERHEAD_FACTOR.defaultValue.get), ("python default", PythonMainAppResource(null), None, NON_JVM_MEMORY_OVERHEAD_FACTOR), ("python w/ override", PythonMainAppResource(null), Some(0.9d), 0.9d), ("r default", RMainAppResource(null), None, NON_JVM_MEMORY_OVERHEAD_FACTOR) @@ -201,13 +201,13 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite { test(s"memory overhead factor: $name") { // Choose a driver memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB val driverMem = - ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2 + ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2 // main app resource, overhead factor val sparkConf = new SparkConf(false) .set(CONTAINER_IMAGE, "spark-driver:latest") .set(DRIVER_MEMORY.key, s"${driverMem.toInt}m") - factor.foreach { value => sparkConf.set(DRIVER_MEMORY_OVERHEAD_FACTOR, value) } + factor.foreach { value => sparkConf.set(MEMORY_OVERHEAD_FACTOR, value) } val conf = KubernetesTestConf.createDriverConf( sparkConf = sparkConf, mainAppResource = resource) @@ -218,63 +218,10 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite { assert(mem === s"${expected}Mi") val systemProperties = step.getAdditionalPodSystemProperties() - assert(systemProperties(DRIVER_MEMORY_OVERHEAD_FACTOR.key) === expectedFactor.toString) + assert(systemProperties(MEMORY_OVERHEAD_FACTOR.key) === expectedFactor.toString) } } - test(s"SPARK-38194: memory overhead factor precendence") { - // Choose a driver memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB - val driverMem = - ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2 - - // main app resource, overhead factor - val sparkConf = new SparkConf(false) - .set(CONTAINER_IMAGE, "spark-driver:latest") - .set(DRIVER_MEMORY.key, s"${driverMem.toInt}m") - - // New config should take precedence - val expectedFactor = 0.2 - sparkConf.set(DRIVER_MEMORY_OVERHEAD_FACTOR, expectedFactor) - sparkConf.set(MEMORY_OVERHEAD_FACTOR, 0.3) - - val conf = KubernetesTestConf.createDriverConf( - sparkConf = sparkConf) - val step = new BasicDriverFeatureStep(conf) - val pod = step.configurePod(SparkPod.initialPod()) - val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory")) - val expected = (driverMem + driverMem * expectedFactor).toInt - assert(mem === s"${expected}Mi") - - val systemProperties = step.getAdditionalPodSystemProperties() - assert(systemProperties(DRIVER_MEMORY_OVERHEAD_FACTOR.key) === expectedFactor.toString) - } - - test(s"SPARK-38194: old memory factor settings is applied if new one isn't given") { - // Choose a driver memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB - val driverMem = - ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2 - - // main app resource, overhead factor - val sparkConf = new SparkConf(false) - .set(CONTAINER_IMAGE, "spark-driver:latest") - .set(DRIVER_MEMORY.key, s"${driverMem.toInt}m") - - // Old config still works if new config isn't given - val expectedFactor = 0.3 - sparkConf.set(MEMORY_OVERHEAD_FACTOR, expectedFactor) - - val conf = KubernetesTestConf.createDriverConf( - sparkConf = sparkConf) - val step = new BasicDriverFeatureStep(conf) - val pod = step.configurePod(SparkPod.initialPod()) - val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory")) - val expected = (driverMem + driverMem * expectedFactor).toInt - assert(mem === s"${expected}Mi") - - val systemProperties = step.getAdditionalPodSystemProperties() - assert(systemProperties(DRIVER_MEMORY_OVERHEAD_FACTOR.key) === expectedFactor.toString) - } - test("SPARK-35493: make spark.blockManager.port be able to be fallen back to in driver pod") { val initPod = SparkPod.initialPod() val sparkConf = new SparkConf() diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala index 731a9b77d2059..f5f2712481604 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala @@ -441,60 +441,6 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter { )) } - test(s"SPARK-38194: memory overhead factor precendence") { - // Choose an executor memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB - val defaultFactor = EXECUTOR_MEMORY_OVERHEAD_FACTOR.defaultValue.get - val executorMem = ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / defaultFactor * 2 - - // main app resource, overhead factor - val sparkConf = new SparkConf(false) - .set(CONTAINER_IMAGE, "spark-driver:latest") - .set(EXECUTOR_MEMORY.key, s"${executorMem.toInt}m") - - // New config should take precedence - val expectedFactor = 0.2 - sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, expectedFactor) - sparkConf.set(MEMORY_OVERHEAD_FACTOR, 0.3) - - val conf = KubernetesTestConf.createExecutorConf( - sparkConf = sparkConf) - ResourceProfile.clearDefaultProfile() - val resourceProfile = ResourceProfile.getOrCreateDefaultProfile(sparkConf) - val step = new BasicExecutorFeatureStep(conf, new SecurityManager(baseConf), - resourceProfile) - val pod = step.configurePod(SparkPod.initialPod()) - val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory")) - val expected = (executorMem + executorMem * expectedFactor).toInt - assert(mem === s"${expected}Mi") - } - - test(s"SPARK-38194: old memory factor settings is applied if new one isn't given") { - // Choose an executor memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB - val defaultFactor = EXECUTOR_MEMORY_OVERHEAD_FACTOR.defaultValue.get - val executorMem = ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / defaultFactor * 2 - - // main app resource, overhead factor - val sparkConf = new SparkConf(false) - .set(CONTAINER_IMAGE, "spark-driver:latest") - .set(EXECUTOR_MEMORY.key, s"${executorMem.toInt}m") - - // New config should take precedence - val expectedFactor = 0.3 - sparkConf.set(MEMORY_OVERHEAD_FACTOR, expectedFactor) - - val conf = KubernetesTestConf.createExecutorConf( - sparkConf = sparkConf) - ResourceProfile.clearDefaultProfile() - val resourceProfile = ResourceProfile.getOrCreateDefaultProfile(sparkConf) - val step = new BasicExecutorFeatureStep(conf, new SecurityManager(baseConf), - resourceProfile) - val pod = step.configurePod(SparkPod.initialPod()) - val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory")) - val expected = (executorMem + executorMem * expectedFactor).toInt - assert(mem === s"${expected}Mi") - } - - // There is always exactly one controller reference, and it points to the driver pod. private def checkOwnerReferences(executor: Pod, driverPodUid: String): Unit = { assert(executor.getMetadata.getOwnerReferences.size() === 1) diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala index 9e4187837b680..2fd13a5903243 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala @@ -105,7 +105,6 @@ private[mesos] class MesosSubmitRequestServlet( val superviseDriver = sparkProperties.get(config.DRIVER_SUPERVISE.key) val driverMemory = sparkProperties.get(config.DRIVER_MEMORY.key) val driverMemoryOverhead = sparkProperties.get(config.DRIVER_MEMORY_OVERHEAD.key) - val driverMemoryOverheadFactor = sparkProperties.get(config.DRIVER_MEMORY_OVERHEAD_FACTOR.key) val driverCores = sparkProperties.get(config.DRIVER_CORES.key) val name = request.sparkProperties.getOrElse("spark.app.name", mainClass) @@ -122,10 +121,8 @@ private[mesos] class MesosSubmitRequestServlet( mainClass, appArgs, environmentVariables, extraClassPath, extraLibraryPath, javaOpts) val actualSuperviseDriver = superviseDriver.map(_.toBoolean).getOrElse(DEFAULT_SUPERVISE) val actualDriverMemory = driverMemory.map(Utils.memoryStringToMb).getOrElse(DEFAULT_MEMORY) - val actualDriverMemoryFactor = driverMemoryOverheadFactor.map(_.toDouble).getOrElse( - MEMORY_OVERHEAD_FACTOR) val actualDriverMemoryOverhead = driverMemoryOverhead.map(_.toInt).getOrElse( - math.max((actualDriverMemoryFactor * actualDriverMemory).toInt, MEMORY_OVERHEAD_MIN)) + math.max((MEMORY_OVERHEAD_FACTOR * actualDriverMemory).toInt, MEMORY_OVERHEAD_MIN)) val actualDriverCores = driverCores.map(_.toDouble).getOrElse(DEFAULT_CORES) val submitDate = new Date() val submissionId = newDriverId(submitDate) diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala index 524b1d514fafe..38f83df00e428 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala @@ -387,7 +387,8 @@ trait MesosSchedulerUtils extends Logging { } } - // This default copied from YARN + // These defaults copied from YARN + private val MEMORY_OVERHEAD_FRACTION = 0.10 private val MEMORY_OVERHEAD_MINIMUM = 384 /** @@ -399,9 +400,8 @@ trait MesosSchedulerUtils extends Logging { * (whichever is larger) */ def executorMemory(sc: SparkContext): Int = { - val memoryOverheadFactor = sc.conf.get(EXECUTOR_MEMORY_OVERHEAD_FACTOR) sc.conf.get(mesosConfig.EXECUTOR_MEMORY_OVERHEAD).getOrElse( - math.max(memoryOverheadFactor * sc.executorMemory, MEMORY_OVERHEAD_MINIMUM).toInt) + + math.max(MEMORY_OVERHEAD_FRACTION * sc.executorMemory, MEMORY_OVERHEAD_MINIMUM).toInt) + sc.executorMemory } @@ -415,8 +415,7 @@ trait MesosSchedulerUtils extends Logging { * `MEMORY_OVERHEAD_FRACTION (=0.1) * driverMemory` */ def driverContainerMemory(driverDesc: MesosDriverDescription): Int = { - val memoryOverheadFactor = driverDesc.conf.get(DRIVER_MEMORY_OVERHEAD_FACTOR) - val defaultMem = math.max(memoryOverheadFactor * driverDesc.mem, MEMORY_OVERHEAD_MINIMUM) + val defaultMem = math.max(MEMORY_OVERHEAD_FRACTION * driverDesc.mem, MEMORY_OVERHEAD_MINIMUM) driverDesc.conf.get(mesosConfig.DRIVER_MEMORY_OVERHEAD).getOrElse(defaultMem.toInt) + driverDesc.mem } diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala index 8bed43a54d5d0..344fc38c84fb1 100644 --- a/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala +++ b/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala @@ -35,14 +35,8 @@ class MesosRestServerSuite extends SparkFunSuite testOverheadMemory(new SparkConf(), "2000M", 2384) } - test("test driver overhead memory with default overhead factor") { - testOverheadMemory(new SparkConf(), "5000M", 5500) - } - test("test driver overhead memory with overhead factor") { - val conf = new SparkConf() - conf.set(config.DRIVER_MEMORY_OVERHEAD_FACTOR.key, "0.2") - testOverheadMemory(conf, "5000M", 6000) + testOverheadMemory(new SparkConf(), "5000M", 5500) } test("test configured driver overhead memory") { diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index f364b79216098..ae85ea8d6110a 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -54,7 +54,6 @@ import org.apache.spark.api.python.PythonUtils import org.apache.spark.deploy.{SparkApplication, SparkHadoopUtil} import org.apache.spark.deploy.security.HadoopDelegationTokenManager import org.apache.spark.deploy.yarn.ResourceRequestHelper._ -import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._ import org.apache.spark.deploy.yarn.config._ import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ @@ -71,6 +70,7 @@ private[spark] class Client( extends Logging { import Client._ + import YarnSparkHadoopUtil._ private val yarnClient = YarnClient.createYarnClient private val hadoopConf = new YarnConfiguration(SparkHadoopUtil.newConfiguration(sparkConf)) @@ -85,12 +85,6 @@ private[spark] class Client( private var appMaster: ApplicationMaster = _ private var stagingDirPath: Path = _ - private val amMemoryOverheadFactor = if (isClusterMode) { - sparkConf.get(DRIVER_MEMORY_OVERHEAD_FACTOR) - } else { - AM_MEMORY_OVERHEAD_FACTOR - } - // AM related configurations private val amMemory = if (isClusterMode) { sparkConf.get(DRIVER_MEMORY).toInt @@ -100,7 +94,7 @@ private[spark] class Client( private val amMemoryOverhead = { val amMemoryOverheadEntry = if (isClusterMode) DRIVER_MEMORY_OVERHEAD else AM_MEMORY_OVERHEAD sparkConf.get(amMemoryOverheadEntry).getOrElse( - math.max((amMemoryOverheadFactor * amMemory).toLong, + math.max((MEMORY_OVERHEAD_FACTOR * amMemory).toLong, ResourceProfile.MEMORY_OVERHEAD_MIN_MIB)).toInt } private val amCores = if (isClusterMode) { @@ -113,10 +107,8 @@ private[spark] class Client( private val executorMemory = sparkConf.get(EXECUTOR_MEMORY) // Executor offHeap memory in MiB. protected val executorOffHeapMemory = Utils.executorOffHeapMemorySizeAsMb(sparkConf) - - private val executorMemoryOvereadFactor = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD_FACTOR) private val executorMemoryOverhead = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD).getOrElse( - math.max((executorMemoryOvereadFactor * executorMemory).toLong, + math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toLong, ResourceProfile.MEMORY_OVERHEAD_MIN_MIB)).toInt private val isPython = sparkConf.get(IS_PYTHON_APP) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index a85b7174673af..54ab643f2755b 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -163,8 +163,6 @@ private[yarn] class YarnAllocator( private val isPythonApp = sparkConf.get(IS_PYTHON_APP) - private val memoryOverheadFactor = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD_FACTOR) - private val launcherPool = ThreadUtils.newDaemonCachedThreadPool( "ContainerLauncher", sparkConf.get(CONTAINER_LAUNCH_MAX_THREADS)) @@ -282,10 +280,9 @@ private[yarn] class YarnAllocator( // track the resource profile if not already there getOrUpdateRunningExecutorForRPId(rp.id) logInfo(s"Resource profile ${rp.id} doesn't exist, adding it") - val resourcesWithDefaults = ResourceProfile.getResourcesForClusterManager(rp.id, rp.executorResources, - memoryOverheadFactor, sparkConf, isPythonApp, resourceNameMapping) + MEMORY_OVERHEAD_FACTOR, sparkConf, isPythonApp, resourceNameMapping) val customSparkResources = resourcesWithDefaults.customResources.map { case (name, execReq) => (name, execReq.amount.toString) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala index 1869c739e4844..f347e37ba24ab 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala @@ -34,10 +34,11 @@ import org.apache.spark.util.Utils object YarnSparkHadoopUtil { - // Additional memory overhead for application masters in client mode. + // Additional memory overhead // 10% was arrived at experimentally. In the interest of minimizing memory waste while covering // the common cases. Memory overhead tends to grow with container size. - val AM_MEMORY_OVERHEAD_FACTOR = 0.10 + + val MEMORY_OVERHEAD_FACTOR = 0.10 val ANY_HOST = "*" diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala index ae010f11503dd..db65d128b07f0 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala @@ -706,33 +706,4 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter sparkConf.set(MEMORY_OFFHEAP_SIZE, originalOffHeapSize) } } - - test("SPARK-38194: Configurable memory overhead factor") { - val executorMemory = sparkConf.get(EXECUTOR_MEMORY).toLong - try { - sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, 0.5) - val (handler, _) = createAllocator(maxExecutors = 1, - additionalConfigs = Map(EXECUTOR_MEMORY.key -> executorMemory.toString)) - val defaultResource = handler.rpIdToYarnResource.get(defaultRPId) - val memory = defaultResource.getMemory - assert(memory == (executorMemory * 1.5).toLong) - } finally { - sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, 0.1) - } - } - - test("SPARK-38194: Memory overhead takes precedence over factor") { - val executorMemory = sparkConf.get(EXECUTOR_MEMORY) - try { - sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, 0.5) - sparkConf.set(EXECUTOR_MEMORY_OVERHEAD, (executorMemory * 0.4).toLong) - val (handler, _) = createAllocator(maxExecutors = 1, - additionalConfigs = Map(EXECUTOR_MEMORY.key -> executorMemory.toString)) - val defaultResource = handler.rpIdToYarnResource.get(defaultRPId) - val memory = defaultResource.getMemory - assert(memory == (executorMemory * 1.4).toLong) - } finally { - sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, 0.1) - } - } } From ef8773ad5e90738a00408de87d2fe8566dc4acdc Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 17 Mar 2022 17:58:40 -0700 Subject: [PATCH 010/535] Revert "[SPARK-38563][PYTHON] Upgrade to Py4J 0.10.9.4" ### What changes were proposed in this pull request? This reverts commit 3bbf346d9ca984faa0c3e67cd1387a13b2bd1e37 from branch-3.3 to recover Apache Spark 3.3 on Python 3.10. ### Why are the changes needed? Py4J 0.10.9.4 has a regression which doesn't support Python 3.10. ### Does this PR introduce _any_ user-facing change? No. This is not released yet. ### How was this patch tested? Python UT with Python 3.10. Closes #35904 from dongjoon-hyun/SPARK-38563-3.3. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- bin/pyspark | 2 +- bin/pyspark2.cmd | 2 +- core/pom.xml | 2 +- .../apache/spark/api/python/PythonUtils.scala | 2 +- dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +- docs/job-scheduling.md | 2 +- python/docs/Makefile | 2 +- python/docs/make2.bat | 2 +- .../docs/source/getting_started/install.rst | 2 +- python/lib/py4j-0.10.9.3-src.zip | Bin 0 -> 42021 bytes python/lib/py4j-0.10.9.4-src.zip | Bin 42404 -> 0 bytes python/pyspark/context.py | 6 +-- python/pyspark/util.py | 35 +++++++++++++++--- python/setup.py | 2 +- sbin/spark-config.sh | 2 +- 16 files changed, 45 insertions(+), 20 deletions(-) create mode 100644 python/lib/py4j-0.10.9.3-src.zip delete mode 100644 python/lib/py4j-0.10.9.4-src.zip diff --git a/bin/pyspark b/bin/pyspark index 1e16c56bc9724..4840589ffb7bd 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -50,7 +50,7 @@ export PYSPARK_DRIVER_PYTHON_OPTS # Add the PySpark classes to the Python path: export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH" -export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.4-src.zip:$PYTHONPATH" +export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.3-src.zip:$PYTHONPATH" # Load the PySpark shell.py script when ./pyspark is used interactively: export OLD_PYTHONSTARTUP="$PYTHONSTARTUP" diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd index f20c320494757..a19627a3b220a 100644 --- a/bin/pyspark2.cmd +++ b/bin/pyspark2.cmd @@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" ( ) set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH% -set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9.4-src.zip;%PYTHONPATH% +set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9.3-src.zip;%PYTHONPATH% set OLD_PYTHONSTARTUP=%PYTHONSTARTUP% set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py diff --git a/core/pom.xml b/core/pom.xml index 953c76b73469f..9d3b1709af2ac 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -423,7 +423,7 @@ net.sf.py4j py4j - 0.10.9.4 + 0.10.9.3 org.apache.spark diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala index a9c353691b466..8daba86758412 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala @@ -27,7 +27,7 @@ import org.apache.spark.SparkContext import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} private[spark] object PythonUtils { - val PY4J_ZIP_NAME = "py4j-0.10.9.4-src.zip" + val PY4J_ZIP_NAME = "py4j-0.10.9.3-src.zip" /** Get the PYTHONPATH for PySpark, either from SPARK_HOME, if it is set, or from our JAR */ def sparkPythonPath: String = { diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index f2db663550407..bcbf8b9908ae5 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -233,7 +233,7 @@ parquet-hadoop/1.12.2//parquet-hadoop-1.12.2.jar parquet-jackson/1.12.2//parquet-jackson-1.12.2.jar pickle/1.2//pickle-1.2.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar -py4j/0.10.9.4//py4j-0.10.9.4.jar +py4j/0.10.9.3//py4j-0.10.9.3.jar remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar rocksdbjni/6.20.3//rocksdbjni-6.20.3.jar scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index c56b4c9bb6826..8ca7880c7a34d 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -221,7 +221,7 @@ parquet-hadoop/1.12.2//parquet-hadoop-1.12.2.jar parquet-jackson/1.12.2//parquet-jackson-1.12.2.jar pickle/1.2//pickle-1.2.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar -py4j/0.10.9.4//py4j-0.10.9.4.jar +py4j/0.10.9.3//py4j-0.10.9.3.jar remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar rocksdbjni/6.20.3//rocksdbjni-6.20.3.jar scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md index f44ed8245e286..4ed2aa9112224 100644 --- a/docs/job-scheduling.md +++ b/docs/job-scheduling.md @@ -304,5 +304,5 @@ via `sc.setJobGroup` in a separate PVM thread, which also disallows to cancel th later. `pyspark.InheritableThread` is recommended to use together for a PVM thread to inherit the inheritable attributes - such as local properties in a JVM thread. + such as local properties in a JVM thread, and to avoid resource leak. diff --git a/python/docs/Makefile b/python/docs/Makefile index 2628530cb20b3..9cb1a17ef584f 100644 --- a/python/docs/Makefile +++ b/python/docs/Makefile @@ -21,7 +21,7 @@ SPHINXBUILD ?= sphinx-build SOURCEDIR ?= source BUILDDIR ?= build -export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9.4-src.zip) +export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9.3-src.zip) # Put it first so that "make" without argument is like "make help". help: diff --git a/python/docs/make2.bat b/python/docs/make2.bat index 26ef220309c48..2e4e2b543ab24 100644 --- a/python/docs/make2.bat +++ b/python/docs/make2.bat @@ -25,7 +25,7 @@ if "%SPHINXBUILD%" == "" ( set SOURCEDIR=source set BUILDDIR=build -set PYTHONPATH=..;..\lib\py4j-0.10.9.4-src.zip +set PYTHONPATH=..;..\lib\py4j-0.10.9.3-src.zip if "%1" == "" goto help diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst index 3503be03339fe..15a12403128d9 100644 --- a/python/docs/source/getting_started/install.rst +++ b/python/docs/source/getting_started/install.rst @@ -157,7 +157,7 @@ Package Minimum supported version Note `pandas` 1.0.5 Optional for Spark SQL `NumPy` 1.7 Required for MLlib DataFrame-based API `pyarrow` 1.0.0 Optional for Spark SQL -`Py4J` 0.10.9.4 Required +`Py4J` 0.10.9.3 Required `pandas` 1.0.5 Required for pandas API on Spark `pyarrow` 1.0.0 Required for pandas API on Spark `Numpy` 1.14 Required for pandas API on Spark diff --git a/python/lib/py4j-0.10.9.3-src.zip b/python/lib/py4j-0.10.9.3-src.zip new file mode 100644 index 0000000000000000000000000000000000000000..428f3acd62b3c024c76cd331c7ddb0fbad923720 GIT binary patch literal 42021 zcmafZV{|UevgRAxwr$(CZQHiJV`s;T7R0_2Oh6lXrN!kfSV04O8?0O8-}E>;%yMz${Wj$Z%sq9Za|eZ~L(q8bgk z_)T`iKc5EVz6Y4EFl<+8oMw>{o-I8Vvpgsl!bcv)@b8YfvX~TMhkZ42?)>PUdC;hU*g{ z2RUXrTR8Uem%|5NPTF|wE(4V0bf3giQK6=za4y&%prO?$(iu8`+6MMSkprEaC;H&}{JdVdqT zH&$#YEl*Ad?~c6#3ZJYJLR znyxvtp*AXb?CE??m8zb;Sq@@bfd(D6Cb)$s2Bh+SWzk@AA%=r9v?K`5HBuK?r~j%7 zXIlUc`Z!yTVi4xSxoHHR0Cbp8I_?;UG> z35gA7kDHrgZTk9JsPnMB>}r?&Tk~Vx!#iNpr7^*SSnype5as!|BJRLT0`NRoE__0@QHDBoWiyFInY z2tbOh*T;|B;dp?J{%mkb`p8b^L%b)QnRo$qA_Qun@%G~ylVC(d0*9Rk6XxT~=ZG_J zGKPpuoUkM6-~0{N6AR`4zE+$6%CfR#z-5x{S6S7^Pc@0UxJ~RqYVCQk8T!a zeZ~~%aI(-=VCn#2tP=&H=IiY+S{njGiDw|w72>$mTxLD4Upk;AKYsr{TVurX7B5SNT4L ze;{i6pm<3ro6Ff`-X@gj|I#|zEf1l1j1vztWKa)i%02HjT_J}BRkRgn5U8WEw0@2( zjP`IJ!th?FNm(JR4U{SA0puycw;lM5)Y5 zw(saJEJydo6xK$92f z8m+72p0xRq+bcp(=gHofO>>MqekOv``(~rD92P=D(L5T;gjF_?`$rlM1s4A;D7lhv z6cYs8oKLwRWlB<`CdLCqZWFQJ(BeKCr_V!gN@uRPA+0L$hRM_0X>{GeK*r*^J?gdV zheQ|Gb1aHg9K9hgrt_RWW*Mi(w~o`G$Ybulkbc0KtOMe=@j;y%Fc5ewW$$nN5W|h`l+@W+c#p|A@K9sNVAF*~h|pMrF>qZZ@B4B&fvB=9 zM1gj>T@rxp570Bl_Sx+Z3D+9J6Xe-WN#>Yibdq6<^%d#k zAGj2zQQmp9AzIYDTYN3Gs@{>Z%b)h(HXCML5pOZXU1_3x3R#0%p|B*mTZWuHf2Ac( zl&ccRvObBBIm3y-@l&iK;aj9^5Pc^Q_Pa;934R9{T0rdOl3Y%?XKAY!^o-3PmyO@@ zopbR)_j5G!W&8F$$jZEY%wKbkjX%FZQh1w*7lva|XU@feIFo;2t>8E_x`Z~u@Sb_; zsP(y*`Kd>?ZXGc@`YK)3P@TD^e^y7Jg!tzSWieY9-oydy6ypB8qwbjw8BfS2}pBtvD8@}zrq2jol&C`3O4ZK77HN? ze`F264jUAEA;14qG4`);QTZqeT?7FD77zgdq<@Etxz#_ln3cDg^M4aD*BaX~o8pK8 zs|HNE1lAZj#bk5DoM(EILWEXCfA~`mbYw_uT&naMx}pz5|K8?&C6KK@YExWPjG2JT}?`W`$?zX6=R_CtxCK5QEK4RSkr?5{3Ct(wmj0rpsAwsMSv0s-5Z$nddoI_%i zQl+D}F-5h>?1xbb8tkub60AYWNFP=)Xz>eCozpx9mMnfge+uo#Q9*#7xh&n@rMn^pMyLf#k zX7eVD=NlNrAN`z3#(iG?LYOsqx-3hZc;YKVhLy|cGnKVr4dhmGh`#tpigywS;jAu|6;L(xm-h$tuNpZ$F99mZ2SCZ#7VYZkD(+v32 zBwrDLX7r6Pb_mr3ap*O4E}*jQ!fwSqAZwVI8_wr=Y?bx%|DXdLctw*nOrsSwo1J5p76b+Clf&q5gFonhoaNh<$!v!7T>^Bs*wv@LcXeg)G?JQ?ER<9V-5a*x}Gg$DYqq}-@*oJ=OhDObF; zfGn}@jTeR_BhxtJ+-O8KY4z8%ZPr7pKRYya(^gCvdsMJeBJDG2tLd*`APe6;5p^4B z>3evhYaXqZe!@*s7%HYq;37x?7mKH3P+JUTNuZd^qS?;KWz^}h|8)|Cb^@bJjODPi zA&+zzrlpcIc{U}TR_;OutroUDN>+fV66)qvP|eYXbR4O9SjQNpo1w9Cf~{QinF9F` z7TFYuw0;$z<8LQ#8|*Y-95N?$ZcZv7t%SmNKyDOxfVLbsPfiQJsr9B%o%8z8kE3Jg{VdE+$PF*2~{ zxpGHtL#9h!8A-(I*JFMQA;Ch^^kcI1GNp_fO+*PYYGlcIv&7-gBrxCsBo-wUfnLR? zXyhTmN45ID?_gsnh)>lXG~u^Ls-BU8kk&UPo+%++gE0vD8Du<5fq*k^(C>p^n+OSL zjh9sF`^N>Qth49)TJa$VEP!W0td=@lXaaYL1o;nLxJxTD@bLL$gLlz41X<`@y-_E9 zgdN!6-~G4!mb?_FN9*e`0X=WW@3RcwR1)GQTo0lMU&I!%BN|2bmfz^$JVmJP=BoJI z#+FyTxKk1izAz_Jc$SPKt0vqx0FrN8&i3b!{i=S{-S2yd?!QrQR#Cr!SNGly-=FLp zd4T+b$A@hKI1$9tA$8od_f2`jF^a}H+jHdYzs~>Gz@fO~8jk7-Q}wt%w4?cg6oEpBShy*l4s`a zrn4~1J$tkNP9P*5EewzH)SIyi9WJZ={R15BcnCkyXQT=Yu`DDT3 z%blNn;<3o&$ojiq`R;gkPBeg+#Zl0#odJf8C}9!|GwdKVgLp1b7ZP2Q44tZ=>IVnH z&{L%cDe>8mv!es6@3h7msO6m`kGZeo1d_%0+uJy6yu;EMe_9U!=Ejm9rdh42^*v44wD@8sx3xPA`GIm*tdn zGwu7+l=}j{XwYwI zk9?CSEw`B&s&Ipt<4s0jcyk8I5O}oKUs7Un?!^&fE+Zo3G~MBrMt&a+WXB3QJBc?uRFaX8blZ zzWw*b9{CQe8-x`=J-S`X#qN!5;Kz=f0Gunuuu+WiVq_&g@_aWPS7P6$nRvmOI;<;)g+5n zBgOY3Q6%qIQ%VQ%AcxB!^i}SN&xp`F2SYeE<89{=E=srWEVpy(4C`j8vDdDbjZlv?x_IQ>PfU^+h=d{uY5-2?ZGWo%X=Y1{N#S$9QWa0;Iw zSZ1U33kF2eYv;+%pHnp$`{vB8^m+>mX1M8ve8clfl~~!Y0tB}t+$pWGLZ9$NSZN4l zik#zl@?hxQYnPRRlbl99SrKqHHXlMrk^H-v0@2a9!3O{v_eTQo#0{O6h0~M|&|g33 z687{~Y>PtI2w<+{P%zUhp@tj{&!g^d_%l+`hNaVKAf-{`DnY}9d@raL1p-B@Az;Ot z-~jPUXsm#s0Im>i{$Y3#eQ@FDRMc+W>YkWkV+0xTH`hM`Pqslq{YSv$w;3oZ_srMr zzUVP9EBUpM(lxAd!nKuRbk=`b)ltD8|Dax~rJ=k;k#}KUIiy~vGgXN)sF8N0N7D)> zkSBg*=n`!73G zHCs{PjHd3zmFw(RPHvuodJ^P6+-gIM<+OB<2-KP~wLV7c;il>}RXGM9*IstUdR^BW zqOem+J4N_$jC)?P9*+ABm6AeRDW~~iEW`;4BT-`fmN%>nyMgbRz9u?nH6JriUw$-spmn>F6(;@S0Qc6URbyHzVVb&8sEr-$rGOO z)J|7I-A?x&VTGt%@)8-O{QBNG4PTnIRLbdSj?8waP4b|}N-bKe>`!72qj=0XAOApA z?z{YZiBUeb-ZI90_k73P$#$V!V= za^Nnq&%HS-!?k1*0T$;L%hL?q|B2RsCn5#DMkE^o*KFaT*UY~>CH_GWQc-YfK^hmI zey~c@ZM^PC6?3*M;ad9OF`iOJ$4O6!naBlDA;?DgTyJDXxcC5wOhY@yXF@~0YNX{q zwW?E}W0dI+NgW|B`6YOOH6oT6lMiB);F_B9O1t+7zlo7iVv9AZCO|sip^houP>9Vz zxHP;D42A=J;8~xPINXDhLFn70Psy`gPTL{t@VXj+Z_ijWK>*C{Hk8hyUIz%yxr$P?5o z6rh%WpnF7Fr>^r8;WdmrMV8Txr#GPlU#SvGKt4OTNbHX5txG+V{%tZRo6DuHMNN$xG3kAxq`=Od-))DyO$@KZ8D(WFZ}s=K1l)Pm8Y zR#Qq}+HjO@G@?_cL=N#d`GiDFkoF4$i;L|P-&xamQ;~a{CJAf(4hD_}W>6Z_m81@* z5a!Zp!e!@F!JrAn^~Tn~-4}}vz0pn9vx5#+X$D0tL|W`+j#m_`6!oI!}j(FSXp> zoLzKq0u)=F3ZEt9dAqxY$BXR3R>YfQe4UC&egVT#wO4A4loN10|LT3G0GHziPm z2nkmJ!uC#BxXN~(y5al!Q|mt@tHzYeSI->PXAGT$6>s)-NZuwe&j)Ob-JT<=+ttgo zbk z?9I*=8)k#?&aBlSIJsP2qWTJ++^xPeY5epEG8ZwOS9jW&#Xl^Ip?#S04Yf(ye0~bw zrFFBc+SYnkbx%VToA+rJ=^i|&87F3PGT3cSq-Eo9QC$`tx4P9U;)+OQjpM}6GX z>X@mx>0U4j@b|2c(GFv~|s>F<^B;q!oqkD{4{A3gCqfLwYdP(HDcniD(nD zGTW(D3$jfb2ijD*X?Vm2w<=EK;RY>q=MvjY<~Zc+pxo`tHg=j`TCBy;Rj6muYDg(Ox}ZDlvUr3R4n6Jg)F%O{84o+1m+;ypQ!q10Cyr zzX@(xGgR6kFQ`gY(6pv@?eRDm^%7n{9~(~W_>@(B!Wi&{l<)w8yRFU?72 zx8iAa<>r45CPkn0fS(3ACLF4;I5va!e6V(kH0znf9Y(6eQ$MakfG_tf*NJ8ZW(sH$ z51X1{HHoT%J{*A}dEqTzibo)US~dyI(P?&rFHUiUU2WVM=ww8pe4~BhaiUBs#P6}L zE^0dZtBvpFkKX539rW%nrn9bv@Gl_qp!W4>=8J!|fIZv+nGa5P3WvsFl4eAIT~Nh- zmz1>Ly|1G4n;(m$1^k^sQ%Q>UGOy6kxO$~+9lgglXd6FyWmv;LfZ;rSqLoth3vs{@ zk#_y4GbId|BEd0S3uCW!Qk|Qyfr=09J*H~&dP!h62K4qefa*Xukd3J0raPXYuMXH) z1H(wU^>>}_sev%fq_@;pf_Akk_Vxk448&uOnO=pgEB)$yuyS6bD+C?!wA=|&p|{{n z=OF2^rLICf%NQpgE|su-&r*+84`YW^U$z5jAqpY_0BgSO;y085 zg?<%=i?8MwH4YG0XJ#FtQdfU_1sTPB&&IHI@+XhMCjN z#lW5P6vJ}UA3WH1<8d3>vWw35Zy{3D3v}S8LG&-LguB0`uy6BIZgl)DO>*R63+?nO zp6Y**3zoIJyZ?B*NAV^HQvbHpM^4wmSo3878nEUu8xJJYvwdB~0mfjuaMjO(Xk^?@ zH>o!t@Zk!wW7s=HP|Byb(5*W^t+J)BP*k|qba4K-n8y`PI@v%5Fekv!m(%W!zSUvt zHmc^6DXo1p_`h2;q=q1Ca{n}d9R892V3iCEtn95^4GjJpR!K!2vYho^8B$VqMqXCo zSej;HRxL7KfrgHXPC7PTV{D3UjDcE7QkHsVmWoa)HsX$G;>obltU_!!^W95V_x@gy zT(OK!Rue%g1?E4IT?vWGDjal#{VkdRK%FlT0QKKwm$i|*k%5JgtC@$9*T0_@z-{+m zHK?w1FWooAlfGxm&I(?$1EIhs<5;=|U7keJEG44si(+!BM;q_5_KyUjNxerG!8aiG-Y3k+hWgT^=W1E1JRL3Rt;lqG#DbE(@S&x zqwO}Mx>s@aitce$x~vY{A>y2A93+dz+(*c1-Kg80?8wfeH#fW9e-d>Esw)#cp-fMyRbpB=Q&>_bo5d@hEgzIv_1Gt+#S$ek%z$8N zA5n{|3u(o)%%2sl==LQC(PFYw}o;0S( zsqhg*<7m+s5{w}Tfz*c({k)cFR|ZKqWSua}{8Uu3lcYOW>St=ycAy!nv4T~HC$I&) zP;Pk8;m@uz^x$ZyDZ!nST$H~)tFl!Ub{zRfR<)B< zpa#=UmGEHJp|;nk(9#Z(78`D)N?(JhE(nGYQDVq9NTFQnDo}Kzl|ehU2IJ1S+_A!w zGb3q|WGcGT3(!8%;UPp>BGwK>mefVjrB^k1W>P1c8R&jk&6l?zw5rlhe_HlI;9a%s z|L{XShfrF&e4dlFaq66EbP&SLtOT|LEBNNeL~@DBf^L!g4RH16Cgo5h`@s9A34%?S z3{qr0%{8tQ^O$2vF)AFrL&|+l?EfZAkuuJDM{S2u>MV1317ao03gXckcMF)i{L}6K zZ0(DrhyV47>HTNzYtz6d;!xvrbyQ>idw+lU?ExA8HIMIS)e+>>NW?4l%7@f}KTf5E)SYG=UI@$^MW)pnmyc zkKcQO=2+YXLG7f*14tmx=vnxlZ*5lKsA*=IHCCgUvbvMwqGQ&rp?sjQ2(7WK5q6~U zsg$Jyu#Sd0APqExfGyH9Q)^<2EGv50umtv~8Wb>_FFgwBr|(<2l&w(kfI(r*#!NV|FfkM)*zPvNtT`4;|`f zvbVFDl2C?IvgSRuNye!GJGMt~SM8vTLD_=dI?GoG8Cpsly|+sV(3RJDoE7*t$Du)x z5h&2}V=P%}!;Qb}H5fhd&aUyeV;`{*+v2DkK_NLBMKu+5Vl+*>f9Vp+XAIV#(9MT- z5Z6?BMJFg2sR9kmjg{z+xC{lSOVq{7*Kv*n?F6=vapBE@;WU|# zH{wu?0`PYUy0Hgr)u2`(LBPz+t27`%L}G!FMulyk5*i_+49YU#?euR@n(($T*28rS zvvEa}O(D0V8X~V_a&DBlaJ)u?4p1zy#&$4?zno^HvRn)LxA!BC~ z7OUIJvM3c>Fs}*E1cD(#EoKL{zw6m_idWh9T(p9o95uj0WfFzew*qIqaLbGF<45xB z;9GeAcw^?^{* z1=Jki3zgBtc!yKIoo*|fIpE6dIUvso-WBC{q-vvjT!qW7Jw7%md<8UzBLEH2nl_oy zhtSdQvy8xrSsZ<{WjxQ$3PAC7-OXpW6i?YY+oBHC1(NcMszZp!srT8FKS|JCCicH=&n1qQhGi?l(#`-C+j*1!}yRF0QIAT&ANNV1TfHOGB7zutc?>5)S5@^uTN} zP_oGh4>V7Cg*1xsaRJ?&=wBZn|2{Wz?F?W>H|R*8pN9%Bm_L4=l*cKPwC`gL>iG&d z@_SpB;sV`C+7OQeN8kj%Fi&~>f^jFSPpqaSbx@!Xg*0BE4*KT1dl8wbHtdyeBr}X2 zgzqolq;nn?@Z+X)*0w3D{}CfFMN4hW^l3k?ltyYnT@tKguvH6Oq#ib4q5;F3Z*B^{ z>bzcP;5!KNxYaT<`b9M)a)wmT;2DOP`y9LPJ&tzbMFeyqx^2(XyB0qJM;7RvPGo&td$*!*t=ym43iL#3IHvL7Y zL;@9W|M>I z`Pxxy*^&=(4fGn0AN%5CYBjgVK}tOY(hrR=GKapWp5Ho~MzG=&1a2_q)6iNGUac>~ zOYR0@kkfA(qwrmU{Cn?J1mz^xUR$jyYBh+6f>BUV%(Od9U+G$`YG^3euEs88YC~cA%a8=NUUQgp@fvU_klM<`e1si3^tfgjH1@wX| zOAZ2`7$s8(0w)~_zHJi=66NDN*_vKaVRHmEDj!V})INq|1u7kHQqKTItIn=DDvR)P zAVMbXk;wWks2T&zL^Mf^DxuOqpT;N{o3J_{&ifq7<~rJ`uIpik_VKxf@?pf-89fJ} zFb%8NE6N2V4=bcK9*0&Nb)<0+#oLLfJi|!R>gs6pBdUQ(YRVLWkf4WHc;DpQLN=Uj z%8$lS+Mq-=IxI<`WfXQ1Q7#Z7(d!i9QBs_DUXQpBkYKz*r(9T_P*%GRi(#gcb1{t- zX$%|X&j%T4l1-&Y&^%ERN1bU9y*Pt3h;( zN*}y!K*T+HzswPGdG&J6t+& zO*;c`gcV}{Z07u%t|oz)fWKQ29`^mrYTrPSe64qcu#SPdntbe%MB4pqK%9Sq2(D5c^VTu1Aep&_wteoGhg2h=h5WO|oYmiIqMo^xh_M8M51YU0?nLuz-p1FQ;`i-)Vgg2|m z{dRu@9LY-?C?%(Y{7h$b$pfD=r<4BLa244?nqIx_n&=IDG~$&1=nwAvOQJ|RX4}nq z$^cEKIsO8gRKePz3MT%?;x*`*ws^=*Vqi@z%m)PM&zOeM#L|gGzpww!m+50__(C6} z+m)fUoqUcENVkXzye@4|3vs2|xH7Zop9BMnPTjP>IcP9rj-#!Qi=ekuvHhB;{H+L@ zp`N2|Cfa%Lfi*unkkb-X_IXG9cDW7~5LwrPh)ya68#gId6(28q!yW+$347)|Yt_41 zB~~gNXvZSpHH*2T+q4!zafe80IB&A-=Ag$6sZzt6B@5*VRlVkP}PnprEN$m}<{f@a4oMe5eT&K5-KzD7#CnRt;q|Z-blDor|L!g%~fyh0GxGkx@XN)|RKowl1^5{Hye-}2wY2d&x`_zx7*mHLAh2if-qWZXRef}H7!Ua^f_cy6 zRQ1;H^nqi8rwzi}d1pj_ofE$@46g?|?_F9&m>rwh5c^AhwlMjAeDXfb2qoFl9kfn= zJkII%INHHBzS5~Nks4hd3vnH2ejxem4*wj1JK6nZ6^>gQZeye()}hF>7|-(J zBM}1qoS%FXsmh{PpKL!QoF8>avCRKYNCg7M3X4m7JhXhv=GM0d@Qn|LeIGplN)_)| zWZbdz8xSgZPHT{HK|BRNQCi-eLz>nhkB<%zX%r@Rjo*)dgy`$sjX0T}?%;>ax})97 z%*6_?Oy_$QN^fhXdymg{_e}3#-x3{-p4OPDV85)VjkcZpw7a=@2$`L{*VdTXQ-^*!&B76H= z1=B5AeW4Xi0JNHn^>dZVa2*k}ccb}JT==R5W zyezbLe3uEUw~As*F1}3S4+5~2n2@G>AtMpCC%NHxtLlFG^8ru^b9B;5Vy2<0EXcOy zCTEJ$ALInIB^-qi2&qzedI_Hf-CB_1TUnYouvIP&aE7k zH~F2GG;Q3Z6RUkt^GVS|^I~=F0nbsS+@DB>*o>oon9izY+hJ2phhuttI!#=g zzEXPGERn)>g}m#^!9~6Yc;3Rc7A;xqgaYc0q7*E<`8&=*F0HVX+RxtD7o5E2ymGgj zTAdQ98usBP-1~u1PvT(ed04m=N-0lv>Sdlykmp2P`ZX_N_vCSCba9$D zRFkp#VUC}QsVP>qUSlZO2rS3)hRol~d6BflR?NaZ`~J?8YuaABzI4YZOvwrXQRuyq zGO(OIIPDmt8!B*Li3#t|X|w(`wrx>SlH!$Kb208GiGzj2zL;NG@t+!{B73u1`S2)4 z62AM#ZL1pd!~2^tvpU?JuTc-SQJ+1jL-i$8$3QBGgV5mC-* zwLp24L-0Q?32r9Jl^A)~Pn^-}h><}wd3RSat`ok6YaKN0NtsbjN@FKgS(XO9#aVKy zLRg1s9KaZk99t3e z)M*=v0XMc9%={X=(^;H>7HEVN5mD1x+AWlBo$pk8%`g8)?mf}tvAGDW+M?FG;#hZT z>HQ?_#%u}ZbgrV`@c`?*((E<#b6Ku{;Y-unuH{Z+dkc9R&cnkaFLMsQZB|>0&FUTL zwuTJ!pcf{$4L8Ey#BTA>(Jq_r-V&Ec-yg)!v5on z-nA*0=FGt9JiDRPE@q`|Pf7wuQx@0taGgg>i3lFkwiaf+-%k~8nDaGtk-|#P+<7`~ z{^Bo@fY|m>!5IScxd3Lq0}!(7RNG1Crf)-O9}6toQ%SeGeI>RR=LE8GGz}A}Y;tI;1V6W^E$s0I__@ zG%#RBXvE=rZsT2}UB^7r$1P@s9q4@0U2mLuyF}3)28!8do_eG`@JMMvRKjV4_e@?2 z0UKOkPUbWkk;Ew*XSIRL$U%Sst>X?x|5i7>3w7j&l(mG+0P*tcxK+GAS9oBm0yhwk z5kjbQk#Gsc&N{aF71Y`aP^J%?u44!TL> zseOs{7zoyOMuKmA#msbQ1tqm_@$RFQ-P`-bL4On{JAZ6%uWYN-*mAxHYe+ATE6vIx zUisdz!AmhVSo6!ZuQUL8V+iur?VgZNi4yB4aKkX^7f_1)^2#n49D*rt;als-ig`UP zEKo}ex!gN_4ziV%ihxAIV`1ukU^_QdJ_eYG=K?enRIz4>A(r+vui$9&dZv@f4Mt&{} ziR3s)MU$~D9kY;ExGF!3i^+@o=l$<7i--IFu-)rB=@2#BeyZ7dynIxKB+)E$28uq$ zP|Z7sZ>K*~@uvSqMK^v)hyU|knK*sEyRCO2k380Dgri#sN`nrqv z$08n_g)z&v!c36Ejv31AB3{mUHu&Jcf*=gL?Ohi z)*HPa{Hmy1I@yZ*U7GY%NIZpqJP*WNE?6D+vTVgfnLvdc=|?0Bxr~DvcQJ2aL@j%B zryZme!u!{Q@BCErk6`%uNVmf-C$n@~;WRnhK2HAm8b)?u9V!OsWMuaxrc3;B9DFNn z%h&8E56q_NA=?*2eE&UfR)qh%O*SFj^yD4S=~R{N7nfU0cb6HsoQ(!<>f0zg8U)rf*cX@S;eExWu2;Se6GWsKebGZOA+L z>^IFR_hQ5n@6{i-Pn-oy6VoQFKN@cC?#M`)k&o`g&oKD{$7h3($d8XSvFiq)X|ayV z;p67z0h>SW-*;FN!RNIi`ni-if#^%C)B)#?x!V@0FB!d;cEYRyy)hnln`N|QxC8T} zS$_n}zzv{lxI%Pl7|a=Ko>&vnA@6#In7J!c#03ZCgCF9NK`HCU3!&ovSu zSnf{+v1^p;!$qLuWpC{hMTeM{DG*8y{(Xdsu%Z0i^fnsVLx3tYEKC0rWXE28WBuD1 z1ZVxm*yArSE41O}eFQ|-2&r%+=@zA7rJIHbXU-hqUnfW)!@)e-Qc0ajBh>|VI+Y~g zJTk0HU1_B#+Ird%Q zD53Pg97t;L!6^~hz}WQELXsf`BThC^i$^~=@qk&FNhYJa_%ft9lRJ#pq640&KRAQ5 z``$G}LUlx{Q6q)$nYqg}@s*$XEd>4V~iEk6{SB*Wu7;}x7RI7bTlO1ubMGSAlT|)Gc{Niud7r2OD z!d#8yh^==zx~Pph4+NBKk$Y}x`O&z+9)6WkrJ2HDf)t)xEWs~ROAv7a zM%c^jdbQU}!fz06wV^h_@t7-CWMgj1hBEI%agZ3%#MWN2X9SU1N_F1pOEY|pbxtIDB-$?d zzY`tQpUKX})J=E79+9)5e)Y2w!wGsq4pyU6f&ba4=hYVn-CkA^Phv+ITq1TdxM@e? zN5;=U$}VUi4Ha%kv6b1s4FEu%3w9E^{$$LY)~&)gciSW0pU-SCRg7cj+y~zG z+@(vJa^Re=4A!7Ok=rk@o1YlAR^k^;Lbz4|-1eIE`_u-%Z=x@;`IN@cw<0suxI&6v zXKggdv9(lWkpLG9wyYQ23supptin0YI>&~W3^z+VAKPKUHSMXp`z|<)H|i(<18P8( zzmJAO1i~K-?UKKETrfnEfsbSdLx8&mj-KHy8(yx3gZ4UMh$MUXD6%FvF?`S%#&`)r zqM_C(NPw&jt@;`OBa8smp7W_u$V~8Y`T%K$t~1T2ao&S!WgMA|Hc8I&N)UW`*A#G$ z*S%6VoZU4m^P)4d%ef(yM`qx4V?CIpZH|TFmc8a&FQ^<*o|5$nUKH{(0EKFk@{m+> zRK$KqU^$3v8!G9YMY&8j<)jMG=Ws}{nvSx@iXy)!g?*J6fc(V$7RO-S@9ULry9lsLudqGUB>9hTwbn} z1qD8iPK*EIYQ6wG9i6_$dO%rCik(RE`ts|#! z-Ssxb+%JJ_?(PT3rtY_(t<33$mNQ?40;6&W-1SacG(Cr`5Td5*h<4$GbIBTh1BTe| z`q$x!!xU(NH-TUv2oVENMr6Ry*XONx&BIADw@LkwuD3voiH1l6Y8rEPmj64m;?R+OupV zWwTENh?Kqt*uS|2!gi8PaaK_T6CSy_^(*5b@cPngOk<8)1GXf!M$hNVa$KoQESSw< z9^WWe7V?gX850x4%3)!o>M=F$oM+H+`~4VtWCMvA`=c@R_OUSE+MhE;^uwo%4^t1_ zhvePi)%NH#IIeBx-9qKRn1~%zn@+@K&C+l+2ZKRa8Jx@5_s(YIEx;230R5KEDKF7v z&=hvi!j=}h_esj;jzHK*kcG`FytN>crGR95Nq_gAve47@&CMMqA@`-9$nF%Sj+Rp8 z7y%vFIE4(9xj(y4vK2VVPJ64vA#fMVO`Q)B`O})lfPgT-kNM>yPIv8@LY#Zcm+Nvl z*{}fpLY&jw9Ski(E{Ry6nB;F=Xx45dFSO2D90SZ>^+J|nalJNFclY6wbPe6Hww-pN zR&UG2lzjHSpS~z66D~e_K?cLC`b{;&l#1GsD;Qjx%W5JYlwzzPDZf}8-Q(6B8r1-=hdY#4J zn@1>i3h?USvE<{-|MD>=)J=6{-H)-$us)^gXEg_;?#U6xf6Q^?amhg&gPtfJ^Xzpa zPYx!0_dE$>1$=4*8J&E14Rme(Qyc4^=qam-r_5Npx)At_~S6GoGF29M5p{RYlw zV0A>j%Cd^!swr%8>j0xiEPNGdyLQlgNl*q!mj*9C)s63sMG1LGg@|LA)|VA=V?pX2 zG?fT!+^TjqQQ0;}_PKPL@+WRZaQ(NsmxE}%d-0*suwUL60W0958PrJ%Nl8mwRtF0t z!KP$Sy7Z$Xmxp7G0paqF#f*!3R+?XQ;?dyn1!oTPSFsSQ;5|_&6M4|r zpmRtGxmuGXiiyTRhYW5$oKKPfh`6xt-=;-T3Q!+eIWwU$<`I(=Ga^8Zk)8*_Zu#@E zq1vurJiC|X*}_OKPGpHsuX)Q-mKHdDp=XrG**pIzG@wloZw1A8*DLu-Lr`mCX0%&g zC$|-+8AJ0FSz}PAAS1Q@Gp-NoNJB;hGm`&d+WFYrel$4JTN~36#YPCOpJRdoz zv5;k7I)2^gpKUf@hM5WXm(S)2w2oLIFNEjlo9gajZtlUMB#jJ>Yk(*g6@rHa*bAXh zB2GhICw7U2O$(YbOq&tKz{kKRv&}sGOW=(BQ2|gQ#3mAoieRId7GEcdlG#p?PxdW> zbk3}Eg$oqbEQ&L;s%CvDvIPaa(u z@nce6Jp%OMM~n4z`smY7AH!1_guo z2Qrmy5vRDZjF~K|s5c56=#cpSm7h43*Ei zG86!aInDq=LH-#pCCbr+2{_oigNUVcO09{usNkUO~OXY9m8Hxt>L-QIgLjn5hB7gF;oiRagd7^q5{yt%(yp@bHcP@ z(;UUV+OL5v8-k~h8Q@jb1t%g?p|^z+Y&+eEe7rce952wWM}bef$v3hPifg3}Hc(8b zKqQoACweifYA*SDhAt>B1i%_5P#FHUUapLEQyi6$1hN{m#doIEEJ;()AK0d2-K10Y z9I#Tpnb#oupxVH7g^Si^$@v4?3ChbMNDXkXi3>BVHnfLqj_$hen(e)O{L1{#?~f0e zZHNBpa{CIj7<^RDPY?EvJabcGoSx@jLU|D3&u(aAQ-9jIG}qlma#M-MJDRi4kDOawKu=5CFd4`*%?y<(jX*k$UfVFup7ngvLE8B?5fr97rX^Nr|>i! z?PVjp6|G)EeQrWyNl+sJ`*`69D&b+FjLmyBvVc4zxE-V9)lchq`3!U0-Ry-+>CP<3 zglY&fD_$B7uG?8&gdl4WCfgkKKnkC&RFJ|&bqOLhrvwpt!C66= z2!m5NPfWKJ!i1$p5MgOhlWhfp7V->&d+dH)OI-@|;2kY>&OE0ZdlyTcGSBIzS}k?= zwYvufcNu}(2{F~URlS)EwD(iQ7Hm8ZcFQgQOK7AS>J0g0o}PSnJQQCMO{qhe)E4Br z)X8fzRx_^Vr~l2_Y5F8luMb`a)H_#6kvY&!{W=|{zPn3xF380xk#VG%w; zn9Zn`tV4w71%6BBF}s79mZpf5wq!DGC}c^!dtLkHZNC4atIb~NE%;?#BsOtq3qR}K z+RYjn?loqk6HPTZFH4HI**RQ-yE=?9lV3idx#lZLbz2mo2Nh&~H;2tIf0RvD8dxfi+%Vrl7=?tGOuN{C|AlRgZ2Lhb z3oPOsM0Z_>X@!@Kb@KlZc1d|iuFYL@GH4=&wi`M9Db&Tzr&4*FJmAs10qlen#i=%2 z0l%V6$vZ*(Lr}WYe*KrH+N0ewW#YiCkMOPMFC~j@-B>8%Lt?NWJ->Tey?Iov#t1IU zxu;Ysn3%}rYcnJZ36aFIvO^w}ic}ZgBw9kX{JG{6nq&?_pTl1yT}fcG^y1M}SAk0} zfKT!T2yiSxdc>~@ol*V_(T(w6(FtmKg}2Up8Tnu!$DN3@XQ8Y_RuSCK{DzQ)BebOV4-+T$YC zL)3V>YO-2*=DJd<@8+&mffws~I?*TZli=vdO;U73A)6!Jw>DE(rUyU^{*aR+@qDy! z6g~oLJacvC;UDk2;F{R);7=lbfqJqt_;FqbzAZ1K%u()i0afkkdnJQ-6)(8%OU+c< z4uH4JthN?9051u-EtdJ#RuA?udGvhTCLsaoO*z@wkM_C+d*FCrlnj2E5kvVX5~sDB^AEyJYt2CrtBgslL1(LLVz7@5ARKurafKiw_2c z3!@~wG*Ygg@yJkR$p~!Qh%!zkq?aW5>j#WR-!T|!h;W=ObpE!jXDK*L+Z>?CbyV+eD#h3zb;I=_JsE{qh;|tTML9-h zMA?qQkO6}Q0>F_hS+vz9JNC#32+Fn9g=8C1A>lG&tC4caFqShwfN~Dp0hr{sM~s8> zrIGiQl2`)Gzdg$PibzIR58sm{3g?d^el=BqN^DzCO@?mI7J|(1UXskv17zGJgyaAl zhqA4^#m~P4Utw`f07yhYM@w-DNPRk7%opovnRR5y3N7$?UEL+MJ;7S21V{lP*;tcv zASLLJ+Y>K%IoV3Ipjot3uOA^JS@#MVOsv0FZWiP5Zfa0{b0cY?8{JCfF>C=0R2cC8Ie08#&GE}&8a=|7y^H3&1p90 zj&U6Opu1kI-?)rZ*I_uN-)cMrLQ=%R)9Eak85%NI`% zPg}(c`MA68xUHAGG$J2gDhWA^%@J+x5N#=*_#LOOM&br__<$!O zhGJT@UQA4TT|J0_c{=J<7nUq!A>L&pUCE<^^RG^}+r{m7aGTwG-<`Ynu6>vt6V^Sq zIbLB9MTsm@D%FI}c>Cwm--i3pc0KlYrSY!kSU_Nb8B!<-R;P z-*i^DJEzJH^?s_5`yE|tE_Pv0$0z0%9({gbuPrpbI$DtepC=Y+_ky*usYmjPoOa*G}#nR zs`_bdTBMqr7nBA#P}A@xX9jZJiu5>u68j)%d-a-CJ3!=MH9td{$FH`m7KwTviTzu1 z6WmxwfixM(*IH?{T&EOY`|wkw_u8W@VEE;zSVmef{6Z_i8o*Z^vl+i(K*R@bWMp+8 zc+FbpEVRF41Snt`Gn(-f`Q`kbJ*` zrSBSePB0%^d{#UmFFJDReEiBW!e=k`_an4c-trH67=vTcY31{RK!&Jahyrua7?>bj z@?L)Z?U5h+1_S8z{v?{+4|6(Mw7LJGd|z?RKNr8x;kDA<%O|QR>0E~A-#A zBB!I+G`N(n*w(}XK3fwEU$%Wm5;6Gs$-T$~cN<#e{#?$J!{66kvh@U|+WV)!_Eo%(*ys6*9)?b&)FwP3erlkfkBfgE_WX8GVR19nd9 zfsBkgDpS)5H}tKowKTgizmcj|3AJ9;)4?fzmuN_RE4fo#Dm)ov-N^GMu3wC;MWc5E zR~j&C?ejZntgiB%PFZHtLD*jT5LlXCUe&< zsv`j?4reT<2{OO{CJ_FOezrB#-@0nhMxZdqdcb={(&G~rDWIB7m9nd+%H`yY@faz^ z3?7BCJxQeTBuFwle*v{qDZXiF6_HtLrD_KHAoYRX-p$uo^BtxGXb7~dPbjJro|tke zo1up6Yc9Vz_#VjO-{jcOm+HKK*6~!#yb?JjGk@)=nSq*;l9giJxfi8y){+k|Oqhlz zr!Tf0{-HI-@1!a=*XzyAu%3iSV;@FuY>pZ=-=?VK5>~^;c~lG$86G};kSR+)Y*aQf z&3>7t&jWKPU)@g>aZ7vwG1zgzc~f~hfQh*8CF`8;GMJIOxd8eD>MzWQo;1G|IOZ)2g&25 zi6&34U8be^X&$&GMq5UJ!hYIp+9Kn#_CV_kzWATVpc+3t-+8r3O4}@HZ>f9ZW$CF% zW8zYhuMB-P0#ugytw3&is8<2hqnAV4%cSNW6;vAYlOxXWE_F^|5ba)a5VE-!d}hVFPu(u(xz1^dFmx%?O&~@ zhKoal5MdiLbdeW+SFK-=;S1m52B2Tf@(e-ZBum|H(WNu?@>Or+(|updn=1utczE#L zc0NK`Dj&(}ZBu*s_*E1P@h6d)+f8(c)S~*zUN^&tgXp)VSKJ@dSvr4g6Iy1NuG#j! z{P<+&l{ramoMy&`nK@BUURkT=@4}pF)yF-KYKB8gvUBgo&G~j9b&b8|TCJKNmf^~p zE39xOWBgMxT*E$L-V8Z|4bvO%Z%NolS01ziMm}))tZlt$^xLv?aQK6P<-lqf8&nDf zSmDoE;Ewn}c8PPEY?~$)A^ZAHI1iEnZ2O}eydxJUSwzeGy=%H-Cey?;y(BTbjJ}enA%h+cr=7A7^c3CDR z!9)wfS+2KS36a5?^#xZPJcoP`HVhl=016S4~;g1znohk!>LCIr{z*3tVM5iEm*JI=%Pz z=U*AN=K1N#-@i}f`o@=iDDMW}Y3_luDw8=={#F@^yBq@gVQL&PGBgiA=pibQnhh6K z8?LzttN(-mWNW)~zJkk{#G zpX(9DOpH;DD?O&vh7}AIT^up+E&KTrt|pujr|OroUOOk?B79pnYO5Pa2G}7iL$L_O z>yX{+9Z^IrLGox;L?FCJ!4Noa5cmb)DK04iDw}&GCkkA|uzP)cI4#LN=(5*)nnnY6 z=CbP`c?Q1PEr>p>J|7gH)%NrvTjpQSw;dh)1ncg|kmeCRN}+#=-MSP3aX*F`*z>mo zc=ha;;Zn3*vOqpRp6f(3x7AWzfo3j{@h~&G4bpP*GO*{#=X+-d#lL?V@cg#~lW*p+ zo>k4bTvS8Q{35hzW@{9>ekxIH+gjzwMFM0RUXAD-*+lU9Oz`M-?ex6B>;3D>tQ1_J z3D`sFE*6~6*xR&REU?t8Cf$y@T}sgC^o!1u!DIL*I>v#yNuSSqbi+07(UDua?YeR0 z@6k!}1<&>uOS5_a$+n2A2@ggBcw0uCJ98<&`uyQls}bclQCWW&zGtiT_3PL*jwT}W zNiDs68EYXTvje*{QQABI+ipnkvdf!{R1eP| zv<4R&9I({fx1lw8W+NLfY7L5q$IoA!5BE=wzdblTSJ`qly*++gUWDm}px6*OF++Pd zS=d8;u71rFG-1NJl03s3aUUFY^p%qvyOUHZX9uTdjqly#8*oY$FO9@gl5=L*jUCD2 zbX)Z#QS9*PeFC#Q$N;6^(iIgjCAO)azknF06W@V{-`Sm!&u<1ChIn4zR7dr6S~qc; zxlHaDdLt#hQZ~Ub3=pl}iFY)I{T7rB@&+cG+NiF|CmIaJ!uzOwl;Fm(3Rd|Ns%=DF zb4YToh^m-h8+~5aemv5FPKTL1-1iH?Vo3 zZ%WMBdOUe}Vz(SeTlSS?zr?eqGezp`8lfs>UsAc-#CYC7g-OhO3 ztmZfHaCL$lfw+8N7BO`ZffpGv3<9YQHG~{pTxD)Ut;#;5_PgRbu#iDyNpR6&l7_x#$ zdSvLtU84IVgf5f{8v_-<7nLbq&zHTJ#KXev6ekM~@Sx}!BAmjv4vMETtC>x(?(O$_ zR{sQuM3g+nTsAsHx4-V0MiK- z++Y3b+S2UxG}b@Hm|+3n$?{mlh^F-$W0?brgJ zjl8~PEwX$Ngr}-q17ETrGBxsA2fB_UZzv-m(T$a9s)Q9X^w~D3^rGQ4-#a}GQdOoe zBA26Ue(xLZjthrXN=Y<2$S~nuQJd{Quo70f8@F!j#MAui?ynhakwXgDf`I5`v6Al> zcG!8ir857XnHmix*j%Yw=P3tml61X(Qqt#uykpGY4E!RWjp7R#40H#?oDr~ZBc4Q* zutT!md0UG4Lz`sw(5~tE4n5JQcf?On7QH6(cx5=|a5KZ?yxWSu;KLluInn|8Dbumd zWMsY0p%;CL^Ku#ZQcDwvRvGh1CI_F#zDK-u;%H|0?|UEB|BudMPO42uFoJ0tQ}UuR zfmb~t|K3|TjAgWc@H1}DE~bNh_k-)QtAC(FtDEthe1D6xtW&~=mOC{VUoiPoo_I>1 z{pUFDvMcX*&<$6e)seRH4dFrPil0I?>4`yyl%}LUFnidNLZ{_DY2C3Ty5{>BV(sNm zd2PO~szt_;(q}YN)MKcwN(;>Z<*L=_s+(>^2$en!rF?Xp1OGKh`Rb`*ZE~}si!u9n z&pi=@ymUtn2P2{7NKJPKni7JNItLV+H<%(k@6>J7qy zZmi*r8cV}EC$(=0!q%4hDYVKl0SR@?$))uA-`ar(yBMSz%zL}|9zdD9mh#=%;$C79 z(dHe9+fKi+>K>}xLVc}o@$b!1`I??uQb_PDO=TC=jAjcCveh=^N_&f=2Edf#?2q3+uxceYw8sHot@E-B~u_*!@6EsM}@L1dmJJf>-$I|bMs zAM$V3`}?w<0=6BcD#hTIyvC|?Rjq#b;gc`s^Nu+jcS_xGbZ7q9ho98HpO=Ig6VdPs zgkd)&Fj!m$2*>cIT#c{Iz{;x!uyk2w7CDfGom6{AxcPADc-=#J}CaS2mH~z+Bd9wmFMAS#9;a0il?d>)H6) zDrEwGqUn?C^J92MmZp#E-A<=Z_ul>nq-efaHM=ig`48x%u7>lOVe)GmAtRwm!^XUh z>r~8&E@8wgrdX5GQ14m&-_=mQXUau>emE5PF?hn+jtTmD_5~bXE$8cn4cH;SHE`0F zKaFoEiBXwbHEhg=Oj8W#GhD+Ud{K;lI$#Wg<^+h15z^dAR!pY6#ea+7%?9f0&qf^5 z!qF4%Rn>I(tg4EgOEbcqk-yog*8LKu1E9*_a^TvYQ4B9I)ASDD;}0FAqe$QYQS=Df zFwKamlbG{>`$S$FR5Z9q&7v9;i9i%H5Wg4K)eRUgr94vlh|A@p%Y!JvKssj*DU~?VrurL0o1vAyIPhucxnX^B8^3vD!QSi$#u^>8n_kO{d5V#N4DU|JvshW1gT&ogBlHb z@l#-_P9$^T;N-AHX~ZGfXMAY!^?{Bg6JEU znHNj=ue`Yd7@^u_3K2*bSOa?Z!aj?6GniCwX69iXxPUVZKwYd{U@d(vK@Bpgn(?w; zAlO}L-rovYfF)8U^V^{K4%P*1FnAIn28T}kst#=FbekHRiYOh4%VPaaIVPgVz7w%8(+zy2Y@wr<*16ZFooZXJ- zu|d&e3XbTM4TG6~_>GQHvN;J?H@v~Ec?jtS@|c47bGSg^W4OOsP9N?+Iy?OG@c0}; zV?SOIf4*EVR<4~xmPP#IY)E(059oFP5r7M9*2Ns6sp0qgENFsUcTu^6h1^av4{oyn z7r_F}*H_ok_9!kg*b#9Ch3+9#7i2qP+v&|D*HoV`m|@Hy2!hN@P)HEoe(v4%3Sn^=x_L;{X2d|KsLQ0m9O? zb=AdNr8XweVG@(=N3@AANcm1Yvd@QTfCmk-F^Ym!wPXe!2E}fxutWi&%xWqT$;n53 z&6?y%Cx}JC@BK(B3TbWnU2wuu{3k6s*`KzQo^oIK)wOV?ekKvi{NJV&gf>W=%g9{z zCoi5zUpucN2j>o5>@7wCK_V^KNTq;{z>=sY6!z+K&El^Df#(Jy+~lqYuj93vh!KmU z$x+Ux{*!j7){lxZR-^8{Qun^sVGY{HlfZ?6Br2!wHWrYMT z)YRHlU{JaxLCt7qEoC5DkO@P`Bk^DN1SuiMPfrid&h8m} z5sTvUsfgn9skbRW=$EEl*b-(JnR!LYE=Ks+pbQT}kW*1`7Lx-`05)aW<>+zyV*rBW zBUmE)*8QTueq%_7R+E{(W&_l$V8$dmk}bBf4-?ydFJK0_C;Ts8GWbsvL{2f^n*x>N zFmP^qi@IFWuii9MFz2nRx7&2DN{7SG0K#b^xLe#-fjfO#Hz zr7TIOn4W|Khl^ngE{8TtD$N$#KM9h@w#1|Wh!p^(0;unL*6j^6?I-COi=?7LG9nWfGDf@4QVi&}Bb(LY`8rZ0a!JK6tccy@kzuy>@F6+CN!_u@Xg zam%iB+EK_WJTNlyq??^ah%x)=!_|7h{6f8W^Y8uQU;V%M=rOQFa>M*?%^Jc_U=vA3 zO@UW?1#l}vch{jcy{M_uOj5Au6IV3!eFBLq;4^O;NPN}VV-GU&8e7`T5-XbQ@{9Km~PtpjKf7(r7bBCBHTc-(46 zOr6%;RP+VU&qi*?yG`hblr58BUfk#iRrU54427M6w#NL>_0p%Hk!5;4Z&n$P$jtoZ zb+$_Y)Hh4)-5ikjL9H>0EACws=|r)CMd7^&fh!EUqd+48Un&P}G${6TQ=u58Aw$_K z1qLVjK0s3d!yICAN5+hIQ=aHfb!*)JmH`3e#wss(?8CY70jF(uj-Hm)EO>2j*UZu; zU9r5!fHt=KZ%G1}_9E?^(JSiCgxP&VXghJ13t2kbZbF-POYk)(UtL^$na zF1NhB-arEJ5xB&z`#qlq)n9`M3L)KB;Mcl=yrst4SDo%|GtPWsR{8w2uFY21a2-9V zK)lJ83UW;>D*y;9jm!!& zCHIrtDpQym7U+^O!u!HaM3+9?y)|4;iJE{GV>tpoK_s4ha>`7!nYR^gD;GS3ZHhj> ziX%ar$|u#55j+umwkHiqrv1M30Y2&-iXxG`2_Oz7XtI_8$8%XDm`uBH8!Ml^Qhcp3 z^>{D!Rk&h{d9&L3-i8KT^~jOp&C3Q8mm0CvwrUiNJrKkof~Vbp>xZ=@)9ggArC3yv zQ%*d&?*kJljt$^SGl6a0U** z)GaQ{dfHHc(Ouzr)9X*T=94+!VszfgOT)ZoeMM6Gnuhd?L>>fJ89<=4U!8pB7e7*_ z)py>B%|ZC5bS@`$-yu<)buxiWvOhmbDOBOZN>w|18gDP2rr8Ca3m;8z-v)Fi`s)CK zuNGz!34|y4kTNjQusyE=Y6K2a?7ySon9|L%r<=>E=YsMHf1Zb^w>G_vYP<2JH!XDi z)(6!LH?Xr_T|WGePGX%X@;IE(mwTt5?|pe-X7daZK5jj?B$&UQhf}O zp5zUUUdI-gLmc=oreH4`Uy;O7lGA#oIYv82t0TMC8#2?dIYVQO5fFq z+bHFs?(}o8QSDGFAE_K{GdOrtg&rHE$!(vGR?_`ylX3V3U0n4DJcQVN{4NT$yq9h& z%z|th4EYrgOieqL(_&5U!^eHsa~sq&YySd9 zYzrO?Sy=0$2mtoP)i&J(ClO~K2ef%Udt0 zv4tHaek;z=-<#b@ssIVfss{1dj47J;>9AWgCPxV{O3px`5m;Ncu<1ewTnTdg5Ir!I z`=nWwXvqK{oHPSn4nzZG3X<(!%&hn=LVDnV|0l0Tn&FAs02lEJan^{*fiFW<*@wMM z{t)7OgakpIcOnQOA+Y!3rMowv7`*3IWA8ku_!ot3x)a~U6tZ#W4pN#!?WB>Ym$=b` z($GyqGx0aSz&i|Ib_Sca>cBANPa-pPP-Ik4hMxwO;B8&0l(jZiAcJAEhgI%~U*XF` z!@wpGcjN}c;}|9xqW~|PX3y?Rr13onqN;5ek6Gh2jFZ7kPGID*5*qsdr zB@o95Q4+wCU;r%Ocl3hj0NWOZUGj*PV;JT`j#kl|ZL_`rE|dfACiQsbTCxXhg!I|a z=25>GO{!^Ee(%8_Y>p&cANMC#SC9H`?id{kUqT^}knu{5HjcPq)6+w+PHo7P9wt6- z^WMh!?iN4);{SP8yeg++;COf+(n zEnZG0el1(c;!_hEePK?y5k$;US(gP!)|k4_K1IQ+XIPO$=rcOYZVk^TAWZt)~m!@{P*;WVAn^g~Ky2Ca^cA(*wH zYBpcb>*`MJW!oZYk5BB>RDGqJHdo~xn}$o~x9f>4aIu23-qxCJ$(oSVAVEO~NKo&- z0H%WFa-kR9y1C@>z1*TrAP%;{9)<>fUN-d@qeTogM0x%J{bINqBM_VWDM`H~&xbGr zUmTns9Pb|t&%b}p37w|p&BdfF$mJWE_%L!zC8^4m>Y0H7-pco%emy+<=J2`buntd- z&xH4%Lz=ob^%Bupkn?bVd6}&F%B;#QAnQpJSnB-r1uMb~wZkMEW?ef;NP5E&{y$Jl z0|XQR000O8PNO+c0pyj}(+vOsaYq0E7yujqaCtOpFKS_SVP9i!Y;0v?bZKvHb1ras zRa6N81LP$vUQnT}VRd*5009K{0RR956aWAK?Ob1P+c*+`_ov`FFVfa^lXf2tK~o@` zb??$k(jayhD7uA8%XF+(mVBa|#8>QhznP&(k^18}aoT2q8YpU$GsEF<=EoUQ$_{(_ z%TpGnlPI~|XYWR@pZ*hDI^Ax!bDB<;v0#~yi!_nZ7`{)_42Y8}k>$X^-ta39zvEbh zd6XtBjJcFTvOHzyt9*tnrn6IY_St9_Nj6u);aj@EZXUDoGG}qhCt||JD>e~RzKrvJ zr_(vg1ka%r^Gj}GZBmViv7+nMZzOVQYOk)!Mwkc5sItIRktF zJ$-AO&k!ELCAjW2U+K{>WZsTH!>oJs{Z!1y0wi3DJeUiRI#KOM=vk#K&A!OeJO#5c zF4cEyIMe4$?0og>8(IoA2F!iVlS#0Eh*2$>fZ#}iD;_Vwwwbt$B-kQ|lEo6qDcC_n z5@veSW$twQ5T+Cir2H|4Ri0*EIa^1Z&3A7Kpo(UDkMnVW^lcWGj z$)(7OX3(KQ4+Y?QKb1T&7KsR&mgU=YAqj+kqk8Rv=qS=tyG7n!(%@hoYVp<)-+ z#VBTv&Y<+Srl?$slr4!-&nh99l6C{^W6J&6b%qXtBYwi>Pr5xylxJu?WTA-h4DA(t z8p$siB`hQW&4e5wn!1K3c&8Us!1!#=?r*RkZl zDCA=k;~xYS)7p|HX^wMI%}PaCTBKa$qU%G2|0Aq<2^^lo*x(^OYUs6)Sstev+WgM! zFAYoFuBulGD7FTJMipsjIoGtnOC*7k7r?afi-R)&C7una?v~L8Ilo7=<-lfY;SG=! z{&X0}kdqWz0LrFPNpaMIUdr86Hg_f>wD+dj@+nEu* zh*fj^l1vY~ZFX5_?=jjzEGfheyM$-;+CpNHtc8xgjQ%G)hh-w-HEI)6CNRyaX?!bC z^??I&2G8$nfN_V0tuE(LA~!IZCjF0C)h(CbqDBB4@&aibwxX@VM+ploxQhbaue*Qk zv#)!9b^DO7&w1`4xA(=z-5tJM`MyJsN&OdCAVejT2C1R^s*2sOUqSj`rRXkrSO0Sw zB_6T)ZX$pNZ3s}z&02yJx~!;#Uw4Q~@XGd}41&Sw`RIKx9E@<+1X}OIN54x5y^?)M zV|pXrp{rbmwsXB;XStC||~@CzQ~SZ6qq zB)tko%$SkWU@VovFm}k7QYi%kK}1ly1UQaYjE9u#EwX4H<JVEg%6VMZZh-=VM7}1j=cZKpH%$3o4avWm^Mb3hj>35(@9SsxHR{ zmV(cLqh;t*-DVR)O3;`sCE_~3DIVy^a*a+&lcp_BEP$2@`u^d?#o_zl=N-EsN)KRY zt-|@?#mG?dL|%)`7{c|-z7#M-A%$LeE!1|# zAD$c^4Iak)2o%cgFiTZ>y1u$@fr#_(xIAmB9SH77!UmCZ0BTiZ(OOnE$ih~PFsNVr z@(fb>ji{i4vUCrMaqk!HAHIt*RW%|9kLz+BcyQvT4S`0mJrn?>HXuE#Ie;7PgfOJI zgsi(qyA>pron=(rT(`D2P~6?2l;ZC04#j=r?(XjH?ogz-Q@l`|;)UX{ad+7`l*1$M zc^gvhF);E|v;c*gaap{Df#de;?@Jse=B_p93*UrVEgunTJQF z3ZA9GI$53J)f+<8j^lEu&zE}0yZwH7 z?|ENL)+VW|RUHIo>F>H7_UahR*c)J zd|y$m!ort)iCWaQTnF7693zydx+xI{DWsPlLWX-(A~JfwN~3D_!;(7Vh|>k~>wDZ# zYWkK;>TLqbr=QT+CrKUHi8UcaR{9Ka*$~v!7sDvt6krlzTgN`ZLL1Gd@IhYu=X9l2 ztw(!UX{wMWSlE(bSR#!uZ^+|WUz7!vvgHhyv%FQ}x3VXl-Xz2N{V3G-+?*9U@L;=Y zkvVK6C%?s&fXiu%1@u^qnO&j&eIy(@y~X4MHPC$5!?qfBe7~qtXseW*%o!|ZY!8r` zb5e|P*c9P=8J$*QRrG-pXn(^rPD4qUsUKgzd1m6(6r+NWyu8x!Y1XV`CZ%%c5Nlj+ z)(S^Y5Cg`RKyw(A3SwN}_`jmWw>>}Ts`ZjmO z)rz;fzO7{3gBNd}YzOHIhkRh7eSZhh8ro_7f-+2Oo6PlAqAjE$fEjvB|7buTBEP(j z0a`eVsy)V_)1IRPq(O>@$vF?lsqRF6tlKdU%V63IM$o?5MCcS0F!ZsfEsG@-u{~KD zBHVPrNi2_~uUkAeYwAL?m^R(bB}7?H6GnwSMUODh+#3m+7dK(PTJ29$!y>WFM(rde z#GcPRJxcye={E_y^223IQ2sArpPH+j+qj#)9NOl% zF}-J7r7Ij$o6$An>G_QA`=#pV!KoKH!!7?nRy?5zn9{6g`Ut|Y%}&y#0wPo?!qg#g zbCi>S{DoKkx`52<$8&V7V^YK?oN$M7ii;*~_k;qVJ~Bi}sC~NOx((TcWr7pB4Ql@9 z(pCWvQFaAuo-8~FE5Q&j*eLPx;B;wv(~{6~09rDP;8BkZ`qD(Q!!|EVuRt|5dZ?mn zKI*-Gny%$M%wbF01M8YO=rsSA2h|FrtDTrCEyvRWx!#r=j$4M7GAI3Iddv(#>~Mm4 z)Y*5|S}0r!n`=yu%5pnTghp=hHXSeR^^f*-zCO08>M{FA94$Rtnv z&K($F$)QK-h4|{8bKKbkn_%%}V;$DnEPHM(p<{cJ!`G*w2A>q=e0<;{ukll~&v?z8 z5hF|h5@WcIdgZ!xtxZ1Tx>1NYBb%>?_mGP_!-0FMJ<7#evVNo7)mEgckXIFujD&$D z-#h;Pd9nSpF|gY+s|o+;hR0sD+@`}^+*u)5j*mgC$F71xgRJ}mKGvWW_9uD55I5gB z3L{rPp!B!PyQMgP{&jGNhvE7jan7E_VAWM8nDz%u-7u1bf603Ojmi_TwkRm?q3P%!gV2j20E`$hR5Tn&_4D-eUTR~DHX z))#yZKrp&7oUbhJTD_F2L-8JcxY0L;p@2FSGW~Qdk}&8in168_VV^1hJ{~!p+Plv|{`4@Z zCsi7_ZpnLNdwOHb5q!HfS)B9TyFnrB73!ZKi4XBQ zDqi|O`p@n*#frQsiga4-TU0qU<&QKbxtB&a^`^c zzJ%7EbDr9%M}M<5>#C%9=386gV}wvzV~$1lm>6xh=O2?L6PmJbx{{0ra=rq`VD=#x zHWn&r36}ukfn*XX47(&#*7W-79IoSzN90LNhW9`f3y2U7JB?Q7c;Y$Rb<(OEG|SK- z%tfdZ=}ZY_;W}%Yb`C)zWx8wC)qUtK08Cm_Gh{pi(hkd(n}dV0rd(2Kj9X`|S@s9_ zst)X9Rw-tyBq;sHBK)Y6jtYdpRApY;61#5po@L&gkz-r;GSbztliGAdj*PjzvV`q~z#uTC&8}a|bYO}A zL_t7}%Tj4mku^a3xwu`g_4=rrNI3*1AB&k(J7K^k+)-<%M`s-N2)$L{9md8dM$;FW z`SV7CNaRfGPl``TE13`n5S5Ll&t3W!H@+X^OY2b93Uo_hrKI+M#l)QTb~athomuBU zJ3s85u={!V-xhqoIIg|h+ysNnyuF^?y>Cf?`9wqt>$!PE@>4e|PLzk^vH6^K%|MyV zo^Oyuo0Y$8L2L2v-Zynm+M8BN=hYq^U7If!>YCu^w_>wnX!ox(RAzdhNUYiUKR$h} zrR(AH7l4kcIpVu1(GFHBaTyu+3n{5emqUwig=X@^zsGu;5Fq2QtXfMh+y!tQ{;=PK zq$#-(?&IV0Bf1giDVu8e=Tm4t1yuG&rW|Fq9ui!KH3O17KW=#g;sTOfqSpm5|BIvh z?}S4h0Deih&kWX%w8>j95tr)MAzz=+S3mfAbBTHOEp>f!iQnsuzX6D0L0JnCDFVPr zT}EN7YgUI}>{g}?cn!?GK81q!uAfjL80E6Pn)M;X9YWD>@Zymdjkwgh7_G1npI_|s zf9y264-F{lIIO~gnQhJt2|&3+Ro2kt_ou?^PQU!7V`k8J{c9lvJbDSW*;&^F$u0|M zDxjOAiIU$&mE>|oz9!6>om_jny`cZj5PxEUnS~?2MEF|bx`2`{S_^d2o!qrB?uSDV z>59t{9aWzh6A|kdEDz@$Onz4>iH?l*6u}GSR11Aa-U;8gX3le9^;6Q=?6EwR-6fNHyi%Pw^c}{$m9m*Kn!KuEgoeNUonjTnO2(2K*{?6U z8aI^C#85BOz4Z}oS%Zd!;8N*+>zbemHh^mIyW8ID1(%$bxjg!cP6{{Eyqg60X}N7D z(RAGxfN|mCr2Qe;HH%KbsgdbqWZooc%y!tK+VQ^hMyZG#l52MxH;?IwE=1<-{Gl&L zXIfJyxW#%I<30k%H4KWbRc6YnJ=`fno6n__s$N`~A)H+u`J+|%GEWu+`ZYAIz0CJC z;yZ2x9yIz`TP2+YsE=fdEa8h*o@fG`vblxvJ0Em#cZ%LBA_Y8O4m(!IYg3i))<#Oy zCPEnrYmj`%nfNF-XQx-=U=Oc4$-S)CUN;Ki%|n>$s;usDoc4YrPTKI6aC?^J2T`Vm znpp$-0%P<~A#v*>>!ROcA8xRfZAwH44C`UutH*b7vRCKwZOqeQsS!Ak+q>L0P_z{T zN`QC$q_2K$~bV ziit#u8}GK%gw5V`vZR&;h3ecp`LMYdKK#<;82&jWzT;c9Onk%B2jX!U?Yegaw5~PM z(IZ|$+wLa)zB`L>2_{&@MobswM@N{%;%(_ow#Zd)h1-H=HPljpy~hd9 z-)IBz80bZUjGXpv*gxV4n)6Z+qRCvl= z$IO{8vo)(iS5oQPifLr&ttUEFlz%IuU|; zb6eWjIYS^5eGqun;Ob6 zM_=i#g%gVVqeww_%%6`#y>yBLeJ8l$H;{sMJ!&?5ULOsPPp~YB5P=F5Nghe5XONyJ zbbDkcXLmeZ+`K%43|TlIP=hL4!=Tf+7l z_#Ff^X0Wj_KfwbfI>y{DFFB!(>rD3#Rx#R{i__3Sus#xE&X}T9(oaL7hy!o^I;s>1 zOiAu?FwayRt0@>Km@@CE1C*)A6y3w9cA;1U;F@*Fv1;O;)lLwCugV*PZqBhir_aoS zY$tAm`Kp))QHTv;uKR(`r_bJkh$yxVkYsOf5+8b6KA^}uw$ zuc0iuk7S#lJarP^e~sJe`xSs+SE*jnx-O?nB8ursk|$*Rclm8h5yaqvhjbhvj%P_x z>fN%z={FTk5G_1tnhtxx<|{MzE^|#Z`DVW2P*fqwYxA&$+s*l;K3!F*uq}rlw#(4B zh4?FOIQ#Y_eRvgQ&J;RpBX;TDjY!8yU!D*5knDovN#O_bS%v!99+j7M-ZGeyLS~+T z1%WeKqzLnG6KC&bb;(Td0r&l%7%59%EgB%<71VPMWFnA6R+lgZ!33&QPKjxRPGKut za8ZrBnnWd?_#Y$38|}F?$Rb_CaUkJ~etoWObB-vsjVjY(rhr|%B{x>cI5Z{czOo1> z?>KYU*h6eAkMy+`Je9nK#fwU?v5%qA(Cy5j|1bvK`-|u_7-*MKhKAc{ryT|uqah1( zsQ*3KaS@U&-wYF5f5z^!?_Heb{vCHI))}6o7`111RHbv=W_hh|ASW$%R~uq|%&Raf zj3oGj#fJG%5vbZ!SLU4PROxen`~_ExyD!sk<;(bnp922p@eT1+pSs|N@6fiKZ_0&# ztm$k4J~(lH?+o0{jl-ruOIpN&A={PLm3u4dUL|WuTa?nvzM5ebZ>G8T)*md%s~<ioE!zOYRnA z&9{qY@Syo`rw0g&W*_+`;aOR2K3nrr;dJ@Zv7FfsYad=qPZEM4Ibh$7I6*&N$6f73 z(IGLLMSM#}GH+|(?ymst%OiynJG*L8oYM|Dwr9R%-Xmm<0fNeCd>cJ3<#7~vR2P8hf$Lu8Vx8~7{cWEJ?{y_`;n*pT zY7^LrFic1-nby`8OFW{CZEJGR)WB?5_z4%EruUA{J-9DdkQF+R7l;{wC728AS=O37 zoc%1%)TKvIp!#kSY%LWsTF8wj{rB zR+Hs?d+rsy=If{2keFu~yUA8J3Hy@bw9|>-(y!a|wm5TF(Dz$C__-SO9*@lOVuTEW zfGW$H(k$++{moa4)sTdZ-&t)N)uJ@hpo)r<5N;RsU3~_~>5=(G#!7_>_8|qPcy;Mo z{adFpO3l%zwy&voj9u=Vah~gT^cc9mevaG z*Jp$n=k`1Q?+J&Wrd)s~O-MMWNCRo7k2!-77uN-TGOS={8TB$ew94ht#1;B49wCEM zD?|_+t=tP;@pqhsr!D%IyeEtY&7aWZleV~Z_>7PUg%Cj(=D@%&1^X3?PBFa^pEx6jlAo2O_idN2c}kzA?OVXiGr!Z4^XKVXL%xPlrJkQqu(zQZ~x*B zU?#Y;(~q%-rT%5O=_b?z_DHCBaT7&-o#wo$n1y;mBzG+{ZFvr`Xd7m%h}~fmx%IK* zbAqjlJB|9qbg9ub5$eDJ3vUywz;T4^i0Jc;)c6k9Yw4?&i%i$7slKiSvU$Vwmt*G? zv91-1)Ecux^{mvC-m9r$~2F7H)C1e!HRh)Qpq(rj(9Z~^iV;EJ}NYIC-yAJ5)R zGTU=BmM?gY@%C4-wz@dMv`ecQa7Ui5A+vwD=ti=TI#+AWk_{Q7Vakk%#a}7-n?sCu zDxR$Dou0@oOk9}3*?}dWxA7kAbqq?@eavIc`1<{Zy0EYn+19+px zN0`xIV5y(RHnPkf+|!1saGDHDHaOfemfC{+WyHRjbbVn-ww>~lEm-3MG^YQ za52I%Z(Tb1^VAPjlzu!k1|H5IX6{tDp35#u2B;tM_~!?^;-!m8G>Yequ4k8LGcM`V zM%!+3x8v$RNX)_GL|#60L}c_i8@3klVg?|T`WAcqEJK~+arDEx9H>`RsYXCpu+ZMR z2w4xfF9=Bg{yj7R#)Ov`Yg3gsM?las;DJ(r-{efgepP3Qrwg&dfMRoY*rO@_|YK)MD94iXnD@UCSOC&)3q;Jo*fcXI}XoB zMSFFeN2xZQ4`CBf&}idrJbG5 z3Dp{WR&uq8j@p!>6xn%h&61Fk2QsINp-EOI^2n?%0epkl6{7r>5^`0c+PcmdJn zmu$y*q))1=AVl=MCH;UW8gGpvX$WG}f3`zwBI&MOsU<0hDweg7e4XX6o*l`x=G58t zszNp#1Ep>r;}5xbb#=?Yl#<_-R{-!sQ!b>N9YQ}1&W9*zXQ6(=4c?LX5-UbUptV5c z$ww`MRa5n1!#Q)ayi900=al4MzzzIucC%mDx<0V+6fTg1Yd;^#YjkOjFsfg*Na^Ih z!2qga+XIg+mACQp@nXU&A;TD4_s}PR~Z>q;h#(VJ)J9kh#(1}$; zM$veY36~{(0eYrO_k@iuUDsR3$~yfAd5;J6RQ8-&p}TsOC0-RQEO|k(tyZ3KJrf%| z1sf-dC<#qgUVcGAUYSMN;UE}nnbT+p$DQ;O(p_VrLv~PuW``Kp3x4M1cxP~Iy)TDP z7lDLXIE~dj!vhgp1<#JXg%H5pdC0Vd2tvQkO0Ru<7YNiCOzYvNG6dMRjG_5;O_ zpAN1Mcb~3*flt9(TN^j7i)`o$W~{LW1`7U&z?=2RqMC>9BN96`y?{^G5B9lz)s47e zVx7y@{HNW-B)Pfem?kZoQBy5Cb{(M`FmxB=x12xt)&){8T&0Q@c$W(@mmMP(?VTPuAf6@Jo&JNbK)9*rPY5?|Ol~*UeH0I8tcjJJK*YyA)|KL3#d8|8=< zMG)erp3z+7F#%M*QrE#0Ci(<0K$ATn%Ib;acDx}+dD~r3AeZEozj-xf@_4Wa0qO7o z_{Cm(G8v_?{Sx~0xU+h(Z)Ui~Woq_v)msJ8-6cEZJp4ds3jqp30XST$C_=JTKrO`D6Asx-bQPr(yk+EfzAn6@A#+;J)q9} zmBc;FNfu5%M;%789HKl0V>^d_{;p-}C30uFRao~P<3udkht24_sCt##b4b#}(aZ9y z2=Blt>R!n8jH1`pK74)}YM*(J=kk#*&rZwk09I_rIrPx$3q|IPrNu%w0qKXMfyzOx zx#%$ZG_F!&>#Onr&v83{gA;atiHJ)Z?ihmwW#7=0doz@+V&8No7o?}WzPepB!AbnD zdfg*2?~%o5oryjVnj2qc{G7f%zmU(bO3@gWGM*^< z?K>Oyjp1}q6YvFL#*T4_l?|ir=SHH=gj^D*G3*}Z=A}{K=GcG^ zcmbQj1ECNr3sX3HMVt8Ex<`7rE^qd$bGvuRXO=0YVIiwm3vch80-Z{lM)3KC0!$9X zdw|bMGjhi`6JWYr%nJU+nULS;3`7P8AZEaquO=9%1%i$JCgiKaR7>t7g|wNKBeHKM zUO{A@E`ycoF_oMgJV^@p2c?~a5sa`O41d48J{HOEhnBo9z=P5K^MA;>+0ik^ILL4I z$$9^fm)Z*~X-^@KA|k?*e5DWK%Fg~}1+5_EE^pCAd~P0maWBbz9#=Y;a%6HdVbF~d zoqOkzj7Sq};Gp7+_DUTCYNl_&4a4-C%Hs{30iD~}HMimBUPw^#5f??<-aNf?5{_`I zSi?RM0Wu9Z#JUkB_JV@eZ=NCY#_DfYZe8$8O&c*2YjE-S_f`jMRpy_Dz#h4Vf{fF| zW_xn7Awqt_imLk#bXuuOTsdd30e!t^`&@Ul(9g>icDQohfvzKg{?+5D0-kNAcRhnp zh#E2}B&KfUoi~;9H7r5Ve#`I}udHd@{&SC~&P#?5B*%x&5&Gd@7027+ViRdm-|Jc* zk=lFlLKb09Y_{Wgg!{bBEF=d3NdkrNrZxZoajxO2e;)k3gMPEN{k5P0PDTDWa`a-i zW>Qy!20#J-Tdw#E^@nf)2l&GyH~Ho)Zh{5?;L!m9)W1=0(gvU;Na!CZ7b^>UBip}> za&OQb3VQwj&_Mn*zKw2IRX5ta5BP7F|FQi$ln>_b_}+vSmpA(zqodcKk%}Q@{im?< zuaSBeIw>G4BYW>dzCqyu0F-}=H18i!69+p-BiDa0J?Y|g|DS>W)%rh}s*lB?zn}ns z88iR@{ok15Z+H3gL7H2=HQvhmZyo5Z{r?9nGEC_T5(WS;#{&SM{Tue^&5rmd*3sF) z)#1&C`2P{?M??U(;bUiSJs-apCzX~z8Da_p~H q_usnRztUeL{-*zziuXqTL&f{Aq=0@)AOHa2ZPkBEhKA37oc#}Cf`+#M literal 0 HcmV?d00001 diff --git a/python/lib/py4j-0.10.9.4-src.zip b/python/lib/py4j-0.10.9.4-src.zip deleted file mode 100644 index 51b3404d5ab3e9157bdb04df97920be616b1c855..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 42404 zcmagEV~}P+*QQ&xZC7=5+3d2-r;Mj;+jf_2+qSJP+qUiV&Ybxs&P>dS*%`TW|Hxdq z|E%lIxK}F5fJ4B6{I_S;$zJupfBcUZBnSbBy$72mBR~}v1pLK?*$~2&+0exu9t0HP z859Hr0^>gu75}sRzl|^;>n>)h|K2k~8zDhJU{OIpkpFABlZBbBfwdEZy~lqKB$GvH z*z%uX|68DHb-CCL4wUW>b(C>2J+5scPilnMSu(kL!3+>NM>sG>dL@oVwDJU1j_((r zK}MyPMC-Y8!}LS%ZVy%hJ~KvJy*e#UWkQb2!4Gvt-~8m^Mzfjk#pzYpX%>H?HN3-)zcX5CqgxVAI&sw5`la%VQCZs!KQy0umTF&#a2h-d>y zrQLSVn~&Q|f=%NUrxxsbIj=3f*NIZ)<2UPmbPMR9-R1<3@Wg;r?%xbr9B!0QD8}YE z!Py3ye9P3om7(nOU;*!^OA(C1-1v8`hG^jXO+S?;o;XdIh}mcDJlQ51aGf-N9mvz> zV>C+AQXS|aA z2a{s$?#<^7cWn`s9ePo2Npsn??ANm7eLFJIWHkRZ%@=8Lm zP*J$+xC30w%n6gi?)XfFFTz3K2lkn zO2`lS%wcX0Zw{1}M<43Cgn7-yNUPU$GhOD;(-J*HYgKkc1LZBTQ^syGL4@TzXTHW4 z;Thzp13zuCOd>2#nf>jK=UelQ?Vt^{BcK7^o_3=(LGaXs`ZAqC_KQuWmg9OQ1Df*V zcW*P*2CNU^=M)xq^SqWI4}sY-v*a7f;Wn+~{gZ_`*;Fq3kKz9!CmWp_t=No%l>tgG ziv%f?3h=%|f)%+TB&U%fPsSqF( zt;HDyYH7_apTY|w-CT#TJ=bWH<~O|%H6N&}sh2tzNI0x(U0!QS$5m4m zpObN}M{6t4D>4#o+q;U2(Z3YiGG}}r*ZF-u&lZ}&#+3!AW5k_Tp7aFxkaN*qvlDo8 zab?cRgJJeH{(xR#cDCP9G(GTmgz0KO+8VNJj8Vn@3*+*Q_fGCl2osbas!v!K=6&lc6ERK{O3yL&o} zuG#6!m_4;cymbDM>kzn)MX-ru)#pZaoH0Z#;a7Xta_JYk&E6F-4A__rv*?(u(2cAY zj(=EQQDQnE$V?vb#o2-Ar>#k1$~ed$6d|9_kJCW3(*JJm1}+2WRTg^D9`CdMX;!5D zozczFNppIOSrsLSIY=puAdX5_P~_$57&Q=f148-8VCb4;3j%`lU+Q9HXJc>R{9haV|AF2Dk}1bPd*c3DdclVv#NewN{>2K!P~4N<@+rO+3Rg{G%aZ!n;;YB1RcD z*4y!P*x;|&T9QO=Y-C<@*}H2Wlu(0PB{g=|KSz|Dgc#9d2&uv%q?l}h*aS|JcYQfr zpuaQBL_v3WoZ`T2_p#E(_Bd=0NLK4Z6`O-P(X?Nye1FiOHPsko@{J*-l-Wv@(kS{W zwP=jc6~)pNZB?rlwSna+CPqwme@RZ$bx!eES*=}4ePo>!$1c-D;{*s$AQZ~j2r2do z;_6zF?g$F{Ag2P_3}c-KknKC+{rS1E1OWh^2~MJV;t*#LP>)NwOyEC2>YL>ec^$h_ zXGv>)_47rxi&6tEKY;T43)v)fZ~=Cb8Q;mRA< z`4#Tv>%SPHUefh0#l7quCOS%Q;L=~ zcd02(nxo>+x;BZN{*Mcp^QTBf!n;seKk`-}B(q1k5osF?PC)GWf=W)gXK}L^;*{N& zfSup`jcegv=VLVedF$paz{0d_%vWQUoj#i2X%)claS10=RX)-Ucu zWv|GjhR;AoS!sw7MxLicrnA1)W}_l-oDvUcKS+uGNp3j? z$qL1mAuctzQlv#A_rtlVv3Q7*MkfiMUL+v3hD$Zhh?hPLmK4k>I~WW9x;Om7m_T7v zU7hrAH^V|Zzt0o8w*gUlBwC?-u_AP-!rG?Np&3k%Q(D`cvJ}mpk_*ywfX=@<8m)R5 zFe@2YTuog^+sx|yp>_0fXVU2P#xM=oQf$9tZ?BX~pva%sw)=vnb%^bkt==)PJ<$Yv zR18(O=gfD7-V|y@wVW0ms%D4q2Y7V>qb|2Q=QIk$)6sLX!NFN`45eK>ngb?MHX@6- z(G92XKRhzriCZzn5CYm_0WDxHn9>7Tf76;wF|wZO1(O%KV3Iw|_{IOZY{Qj&tWvo+ z+y)=5TI!?)WZ2QbqwORv;rYBnKr@z$;ShRTLFJT(ds)*}xycWDu~uEH8qINv3VimR zi|-D+MMmg^^!4mo&5U0J99jy&l_*zJ3)09K)PZ816N$)8Y~o>iQvsp^`oKKeKX-O?*cA9**jTZDUvmAG4@h^7;dHgThkE z4)xc&JnN(S_@TiTWJY*~e#U0_VFEs^8YR%wSXE!BGhUbvJ+(<`dW!jgQwt6mb>hl? ztI!aPPDnb@%R_`0TLCCK?q)+_4b>kTxArl)3y@kU*6z}FhYqp;;=L>l-UKIYI|fB5 zB}+$c;fQKc+76@V*V|rR$611xl1n(8#WW+bMao>%gm1v03AF8-*N6~Le_qzyIx2pg zPK(ZTQAPhKLc)F=`EHzIS`}$sGEA#Pm80B-F3nycojM9pJ;vJ;x?AsHqGCo;4DO65HrqXxd}?JFa3HEY7+ zDiTWKsD!e=*L(3LwBM1Kq+4QVp_lM2WRpQoUha2Vcw3rgkaz}`7eOybF;eJ2ed{}7yFAjEtqVNTtu84`CcVJYsE3iM41me z0(qrFaLkn)sj{f)HFM_E=rr+c(KGx+l`u9g1F8?#rDG^eL)yovU4Vwlan^E?rwUX< zxRg`m(t4GA_L&ZzR(L63_>>N6JY2s)HRB4V0`&UTNBBduGi?2B)gx((VK3sDN(8mM zdrLh;Q<rYYDLWw2}(pa?yrsycqNnD-_ByzHIvU%iluR#IZzp&Y$5+K0_P+67G1$q@5 zBhiKg9{_cqZ;)f?D34Wcw4pbLs_x-}FqYTF?nyzNgHgzOX_UN+{vfA35a0WMH<05n z8!mpU?H%PCv(23CX~qWavw}Pc;x^Y3z!AGb$H~9zAYNFQK!wgF>c5G;BFn<%=#Dz* zA#WoDX7=CooB#QBa=5k@<=69i^ftrz{aZrZi2Gg?`HRdfdPKeO&ioq-inkEM)l`*; z$I$$;mtac5&Kv$%iqM>CWW|W*8if4Ynyc+8Xs@y#W9R!0y6ZFI)gmGjY-RWL;O)`I zo)^?NaD3Rxj|*8m6-L`NYtQ&kD0bmES6jBc?bq37H6pqz0dV}s=9VMvZGrQ9k*H;q z=btswCo*WNANxBY==Y_QuU>Dv1H)Hk|JKtV{~Jfw?|h?fLS$QSZ{+XD>w|=uEMnIx zdxq_QUz$vQUX!B+cyuFvu$yJ-N~7&LfA!B#E@GUR({(V2Cb3==z}tn_;|N#FKP zP3^1ob@%Z~$iY?S9DwZKi^Ry4=^mrFvZZzQPhe@;Z%$urkgnp5=iJv^oI%=OlDz+3 zuR97dT(dU%Zv}!m2Z!BW<~wTSnUN(+8E*4N#iCVaYFWk)5vE1bzrbGD6xL$SN`Bb z1Km}6P~)F~TLoReXSinxi$f+nl5nx7X`|^D8Ly*G|~C4J|7#K@S0YaoxUhn!;DVoRM{? zQC<*ehlTQGYQAzP@Rstjd2LI~XuCvf@DfJSz+Wd8sL%>Jj5*sZU<+vw+6F{?a23th z8B3}*!b(96HgjSx6YWXsyz?6|N3h{~mfj*z&alG=8iL*f9r!@hsanPzp8b0-%Bbo8 zwe3w)sW1#3lyE=w-^_e+V%^Xk zdM8erZ?P~|5(Kcs8jZm7?hO2`*INJc0wX~m751K~Z6S8$i(+8<4Wv<9>Cr7EftZSn zDV(6J6d5Z|W&4zjBs5NptK8VDk(PC3X@ZQ-19&+Ii}nSb=83c*P|=9{5Q!*={O`n@A97Eacud=wozi}F@F?W1HtJu z)q>k_UH>Z`)^>wp?D7g05V;A(a9$&r%Fm8e+0|xq&-2ebIwPK{o<9NIK1ORW_l)jl-XJ*ENI6a>SJgZQNmi@{{c1a+Z(i|)B3XMmQhF1QS zeKbcE2)A?Pw48sO-JmNg0>#emMFJy|cl$42bTnr09)z9eJq~Q*nqJe)Vaf~i(+94Y zBeez3tiU-8j5{$H(l|q?K3m=Wu5 zCw>8k>lfh19i+uSj3lB5CH$0((WO(>6E$pzEF=Ev+%52E9U#17K_l*Xp6fc9{++C{7>1yA7E5J;k@eye6B;5Hf*;O`K z{Kg+m-i;~K-m940IE8R0&bz5=!o{Xsso~P zP)j?6`EZWAU$7mF`wW#(z*#7#_+Zb+2nwT8V`s_(D?+Z3+NZBb&)7`IOyih?QtQ0N z;8k3aJ9*65HUnz}e@N+Z7_L)Z;>R9%P^AdxT%V8L#~EX7a1k$Q)JL?<1MK@ge+0Rd znE_p(YW=8M10dQn^zQ28BWZ!%#nifPG#s+tSBT}(#vBE;yKn1{NhPrj%s9NEX^(C6 z#WZd7Z($ZF%Eiy&0m?6L9n(l98H*)c_NHj;w_4=)x@-@EcfY(n${7 zhWEKPWu!S5Pa-4W-{88NVEI1M>GOsqAytcHK@*tF-}jpMmLogM_DG9uYBPVq7-R@t;`K zs?9RV^amx6kQM(G+{Yaei;v0!H;8jiPI{r+{Xp8lPAj&?9R&zb47jP`h}Re3agra?UeB|H=@#BTuLKFU?7vx(L-i94nEc&Q2#PW2Z5_rmbXc$ zVbTH*&;Ls~w!;@WVyT$Biqy5PrC&%|8FpY?h4q4*2rAbhDymt;hP(rFu2_~kR8C)QLXF?*Czl7P5FN6be3EMDLU6LA? zVr6PUubb)n?k#JwMe&Jr%S!Pea+;^UUbo;QD%A>SLl%?Is9D^&!si(f(EA1K(-QFs z*n2dmIMwzV+TVl7n(O5eR5P-a;f;g^(zLYk^AcE}o5q9>wONCTZ7^w4(DF^ zt=&k#!KH#t8%*Gdr%tdZ78!i4lc;M07pT$%fl+|E(B4AaqSVH20d^C|y`M%!a`r7U z#rpw$T^r(k+d>!9Skq~5K>sJdpx*30AUKmQeCX~&b9b>P9<&e$?KX7(XYy#oVJEwX z^S+0DhVTf5BGRlSwoet>wFAe`?RgvT@B>x+qI=ej>!Mrq{aW+K$ER@}ljk>-2j4f}69jwfhy3gFb29d}Mx)lP9?mZS90mN{4}?K`G|4m)s=$gV$)f9BZ3&|W z`_5-1cupU6dhv~tNkTgT&it91J)nt88n)$$yQT?LrPRly*cxS?32SDh*V&N32p}O@ z1`)P(z$H+&an}jm)0OfICju_E7(Q>EqU1Nx%!BImm1;)K0xDGaw zawWr5B}nP)q242SaCP>FkyB1`>QJ1*pO(pG4c&=sik-uurN{wXmQV{}o$0WThei!2 znIP2zUIFQjj1~>-oo3~5mBpbRKs5n#w1#_6x&cmVZD>Wd`^patXkaW{J#e$RB?>op z#?|Dk86{^O88hs!HQA>?(W0wu!ia*b4WKA5a9svOG$}w8*bV8zS4Ex=lEq_A$jWRb zSI)~es_$$4&PgF8)4x%17!TENrazOQ!YDo`F(6=9D2kXJFxL#fb2&IA z7lmr*ldRv}5H`|M1@sG7jyHD)G{tlrU@(+i%OR2m%Be@y;5#R+KUekcC7ZVxotP~Rf_ zR@)yf798nk20T~R9F=Nn7Sdhb<0vt|UkFo^LO(3?WlSVp5!%`aiM);VM}i*de7_2A zS~6DHpv|jFmD9E)ckc4q8T4K)IfjKCffbp&jMI-nzVHbQZHZb$a@Vo7hAz%ZX0;G% zcIM=L3?@V#_duNl*vB2Hu-Z34^t`imh&1UM#TT9P(pnqe25OSeUE5z=ytt@ER z`vS&y^G5ITD))Q0nNrzSgZSsscrp5VH1fp1nj!CRK}`pzJA{K{a46Cuzs`S0e-{_G z+`g?~@tYorr1*XQ!&FI#^e`<~SHFCrYZ<*G(r+Ctu7+^bIH=A}Si!~y_a6Oj^>~isFa+uCt%uNttEU`MBS^JB#abD#vIK*d za_R3p*#&?z|4VJItAOZiQS9vldmf0z88f~NT2uPldvD>mN?!mm;%>ejpu%9rmC8xp zQA3N1YgVjtiJDRY4?iP`X466$|M5F0l5F6*KfpSmQ{x_d@C;omSgGimx(tfyJ0?n| zv7*9wT4$V?!&FKLmV%}d<1}rYYPdwg`Yl5(QZ0l7Ms3LkyqPqB6a-A;b%&_F6g2p& zAXI!M+n`~97^7*&RltJd@JfU^GZg!DJF06r0^7h+K%eOdf-KdLg~CumybFF-CkLBg z(p?PKMQ?C_&xO}zXwxP#&$pRWQP3rZoAXn>C!ni+Hl{ zc@AX8&d%P$&F-&PIqeRulunHu_23 zIlp&j@NHo45OE2g?tGWd+_cK3o4YUBRd!$K~1D8*zwC5S08ww|0;SLBU0dzV2K zpG-;3gZ}?oqM-nSugd+W0A%-{{y$wMeSHgC3uk@(|BI`nq6SmOcBc#@Df>@eR^dpR zc47t)9;-l0|C?SqI#zvbihhidMoChZ=HJY3da3BJThfU~V1r4y*l_xrhmOwOog$TD zDZQ))vQ!fM|Htfta5YnnYS$;r)c^r0_XY*Q_%E}|(!kX~-^{?-#Ld9tzkV(4|9m;& zQb*fuLp;Ivr1mubB?}ZCE``v{HSFY6D8*bN!nQCfv1%0B8cYt21BL{IIwSGm?Mgef z2eb%-@*mYlT~353*w)h05@1Ompnegt`eCRrB$jae5Z~7A(fp$STC{TWG81W>{Cf_A zY#JXh-iCV7q#17bt~V@)s1Sm&Gr{|Ny9m&jkj|BOM>d_D1{hWz+deAM#PE2rS-t3v z2hfzJSBN#nMQfBih>`EhF2fV$Yf|4`WwI)9Hp~Z2=v3W{0$g-dE3jwlP8?x%q6*1+ zRXh?06s1V-3s93IqmGo1U|IYa<2!bl1EPPsD&Dfl2$Wf~f)K^hP~s=UJ(rLu7C$2Kz^W883TQlW)cl?@?HB$>Jt(1)3a%e8a2iZB z+|%TAYc?pqjR%rSwzN5cn1=l#-J;8#ct9&D5356H7X7w1;Y|l|Sa`X?qT)n^Ik_3^ zIp7n84TKlx{zs!c8PEOE)PkY$QNO~H)sf!pJlW$YLz`4hOr_5fM)Scew!mj)|IYDY zf5QQQxfI7=5aX#o188KZK>i$R5rD7}K8mJfXn%36$$)9rIF?cktUx@IDi*A~Kb1G7 z9loMrSFlnnFVkQ@g8T4iSG@&NpHn_Yp@n#IVpPPKlo#k!3qd<=@m3e6TN`8jHpS?< zP^k1agEkJ-5|vQ|i?9lu46&@@AE_A9OjG~HTRbD6umN0T^myI`oZ!)G^v05XkwkiS z6J=P?|5Mj|Mhquc0pwMaEbc&)8!{-d=BHSo`~RB>J%p+d?H%-^j01J#Mv27QR3T_O zMe+<~k$uGAA86!fStSl;k&Rr=jm!=GO7~a{ps_fw<@AK|B zGwR{={(L`-_zR1XV6L$6?nutr$&sU{Ed;4FOya0t_G$!f%-EjqzCoyadX zloe4~F2lTS;RSHyAD;}+5P+sB{<;Gd`idh9YC}dX5E*nZ$cB8{0Ds(wGBTYBoM3HP z@Dd6JgcFu0J!350Gy`6Qv`EhIM6_SaZzTe)`^O8N^5qV9LDfU23MGDVj7deCabEUm znZ?GwH@zSIEW)T1q3l3pff(QKdBg1M&FUP>afQG=ZM2Fs@#AlVcMT@b!iN);G8hhP zqGnY_k;X|bCq8!wVJZk!9J-`kX~q{QEA9F5OE#XeQ1ci1ucg86EGCpadtp*M5AlZz zjpVs7b_6|WPp>ffgD$9{Y9lxT(1B<=RIJ5ivgI@)a_n$)ET$?C88(9&$qUnXvO^Ud zfIkzPW4ZbrSpDwP_(kFaevCsNC(uSp9-2*PlsFv%M^KW#AoQ(@i z5w*LgqpkaMaiI-0zNn=5NEdxTXNd-^3cR2Syig5;ol8ld76qXhqobHwdfVL;BcQ+3 zB_rs2QL6;|xj$~m*#}iO+OBpq+)Z|;IH4UDR$VhO1@|1m0-6uZnx=fT@Xjn#gkHf;uPsId zR7k{V04Cf_uySrxaMx`%Ov(hfy%r#JIePG)ENd;%5Rz4JnC_>&1(BZAuWXZ;J)3CA zA&2|+_(6ZiA4Pl3gkVGYq&BxLP8lZbBt?Jz{Jd28$*tZK0b!O|yG)w|hRMACbr6}u zZTEvW{d#y%5T0T1W;3Jz&6-JoA971e5`i4Q6cAsCS(gdRhc@a7CHUy2c~mC~`gBlS z;vPvpy!*Aq(jl|tStcUkPm|1=gzh?>Mx7&A5D1N2Bs8vduhkL{) zyt==g)z{+a?BMF^?n{tOoD)5qdTrwD7;0ed?DNsEF!DPApGhvy?kCrS*w;lFDWJ8e zCBCp9JSLz6_vrpMQWw^m!U}GDV*wU5Fs(^C&kwolD~^?RwKmZjHf_I7#Gd+Jl+K+( zu6$J1@;dIN-<+4GSg|!0k!^dqvM4&7Y#vdOMaM(suR4fnSMpWk zJFnLsNv97=DOxs$*4zwi5L*^BHPIx4RnSXugV&@V1=)fc0kN(61AHiRwcfsg<+#w) zFpauw*Ltviyi+8HMXU>8*p%-thedeJ-}?rs6k6mwDF*;R0MreDMx{aRDyO&4vOB;o zIir|3ARtV_k8GmJ*crJKrTiXUG^kZ2d{2FUZD-&MTcrBZ&09I|o;SpZ@Q?1&lFH(% zY$6&E83<8Se2dQ;eM!6bGXS<$<9mRGEhp93r(+PN0v zm#l2K6ns#&#yyUCI~{l*5u7z>r?Q(kY4U;Wnnv?v91K;W3uFr(aHijwWfvvAje=)3 zFW7vxdzmY)ZM4tAglVeP6r~^k8P|K%nCWGgS4tumPCysa=Hrx zlPbjdhGenLT7NH{vug%!9C_o32I$X z;O)w|OL*~sL>?*1Dg|ZB4L6j4L-N!Nvi~d6+hrIPkuA5Z%aW4*K&xB57e?)5sa6)b zf>+8&oBfWpL*5|}-_gm6&Xr%tLSd~*^t$Y$JOT<~qRnDf8dw~dGxu43%zM>g-5n%> zy6j|sX?`A@@OL@Er znkurJ@c;dGEOmG{mdi!PF+Nq!`2^8&WeLc(9TD;%+?xXfe1N#I!;CJ#*pI0kgsAjfmM^Zj`wT{#MGnZfTEYI9LILDw5+djQr*!VO>N7~an2 z+Wz8MVZn2r0k^i$ofROvl!$?QDCXdXYk7G1{*_t9mIM`At`eqX<#dREx0{{jxMEosvL$KK#Kmd8B-3SFIr!>eZ5+OYSbgjQ~|}Tkq3BHl)pfx9~nyYW3wNZqBdj zl1YJVm+0-~HEWGb7H$z@*{NPqe7>QAz4O8+7?^ow(CBNGQX;pdnuLJc6yu4;t~(7* zI7CjxxteZ01v8;flNi=2Q%suOk6pf91tVzw2%1nby_G}J+z)%5eX7~5qO*V)&0)Wb zqGoYb?qP=DC{XpyB&n5269R-sa9H~7p8f2|zZjBt1cZ)Kb?CHle)b7Vu(=hj0!5v> zp3G+}086(^(YUrotROE3M@#zUi9p&>a3iT9folWnhQImCQsh>T;*bMxra!cjCE&Qf z&{Fu3#P_GwXI=6A#-FiM$bpt5K=_w!=QD8k2DAp&N6P617v0{Jo%sv$r=9RPit+QV z+L73ZtE0#o0TlP22G@T~Dq^gYT^bgo!FznC$d{tTv@%T}@&af7KH5I+5FXb7VBml9 zD?qou@ZU0(^K{EY%!tzIboQs%kWKTyKZgqI({2wBu1znO*?VUhO#Nyos@O=>88#F` zaAKc(XTrx$p&r;tWjyoUK_5wur%s0RCD0rBYa~S`Fi^;^*g@NySvN+8VQbvttC)Xc z;$>2T2|+CohBy9E{HFwy!!9y&5dBB}eh^u}UPCAk(VhuHUA=-q4N&!%-eq5ZtiHFR z@0ne06rWiZOFk7mvsB&|dE76RzDOqfk|jG>*qTI)G0Z}hHRv~_A_{&AT$;8ll|Q#W z4uBa`3++j`uBfQHBJasFaOn@QAa?fX$?*~v3^;9Y+60!LBkRL;#~=q((=XRg<^d*# z<+|qEH7go|^9c{$^YbK7bB5cTKawpgRJY2tPCzjRXwBopBdquxz76)3vR)~js5mRp ziyHwWAl_ig3~8A`xVGy&xpw|S{k%AkjIu2%hI=DMuXgZ=E;Jj)-{vsAr3bmZ$^ofJ zIPL{WPWx}Ibv~`fdrRZ5kM3^6;Ju!FC4pgH7lfMx1K~5_+R4Z<3p*O8vv|960AzRz zM(-wDms7a#Wma9VJ?p8zHpb4rsM*modNH*KnQ20e!nG zRq8D#{Kt^Ov2sMiUL^fy)0-N5ZD;2!XF23;ytSXmt^`%EWB=4izA&%FXz+bJO~4Sd zJ?4{y@(c$dXYgo9PL9n8-rAgRITUbQ z-`hG|cSI+o<G5(_z`nk%J@uu@u>ZA)Ax({coz$M>g1}lkqDtr zfs^ezD1Tk+P7?(ibS}cBHL}|TzfwE4>AN+@3B>+Km9+bCLw`%wpMcJ+tuAlL5RXa3 zU8LfYm+6o~$oP)OSm1GRD>#|yI5rNW0ChShIO?b8y7UxDNsRhU;K`pH%VH&8!4DVd1d!Q&jP5V5JUcphOROj3& z;^?cnfln!-)v8K;FbV5}l}Wy#2R1!4X1XTF7m)wF>uC@MKRxJkWig9-?dx6D@0Ka% zZ`l%{C}HaTPtKKPk@F_DPK9@;=s_6VLEe#vf`T_WETXe&cC}8ybJj}x3b3%aT-*yXQsRIhI1lY z2HE;XR&ITW`R^fg5lLHpNUcRn&viEHHd}1JIC=3bwu%yK+sbGFsbV>^LYbEhy_4K; zaH6^O?P{`E>A4hrxoKGT^Y73-?3$4o(vR&CcerUyC0@Q!6&i)2b#!B$IOpSo&V&*0 zE7+B*RH8mC%o`FbAXjPHzn6aPr~MR!SmQO*osp!X(1e)S-{}n;v@2+0R+Qq!31Q< zXVYf{g50Qea>orv$wsrlHeclP=_4I=L{?dc7hlEze1a`>s`re$Hne@%HfB~rhW9D{%A23rA)QITC~ z(?AuklHSIuy?*{tIb>Z4?^_zO+6Zvt&tE`cok-yO3ftGk>|g$-L8PgQQFu(h+w1#{ zX5s|ZRz?~k18CiNL;5u7X5)rEv2cKA=`B>sn-z1Cxh!JQ3Ojk1xnK`gQoG&bGUad= z2tpLsAM0lJX2cG8@MtHa7l(5j%h)cfQbKwlpHs06{fS}8zm*ne#zH-=FnQ2W^cl%i zy3SvlwYG3R2cybXl_9<7$C0PLZN0reQ^BrnKFbpoQj@XncJ!nuKf1SFM_iBigBK~4 z2ei#l+}?EMyVs0F?sL5CUrX6j_kCuNA znB*+;PWpxI!*B-J4AbH)=lyznx#i&=HL~>q3WH|}`b199YcYcq{ImS_x>U)b{z+aaEiZOC!k3+oLgMaxzH#~`*YD3{| zZ;y7_^8_?F6?iyz$t<)^@***<@Z5KM2&lgf*YCt<{E)h~Hen%`f)^=*A)-Fi8cqG- zG9s229DlE;p|UYI;7L}FA)57P{{dY{s4 zS7`zXdo_HQ*h=#y_a-~rrn#bI!LK9PxLc?B^6|C@T7R(bQ8*I{0}%BEw~PMPFNKJW z5P)<~z0p+++*9TQofws}yxxZjN85?x)do9XXmzCu$lC8M`L=EPQ@vez?)$i-=Arhs z^KoJC{?4j_tj31%AHsG0YN&$`uI>vCg*Ov*ucfwcEY!vASym0`GUhdB{#nI@82ia<#_kN=+nw2@WV2OAHcHp9Mxxn z#_!EPbRZoh;QGW2`&CPz3)0IZ2-C02hB1HnejnZOmag`gU{PADYbW`w%znY#;XFQ( z)Ah&i?%w-#YdpVr61>lY)##>5JvH`QCZ4g%PM&ApnB360y`JL*w?E71+|Sd~@jt#d zm!F5{pFccYyc=Gwv+G-f`=5trQlR)sSpnaH>*z9hPeh{vKW@H7uRb^YVWzn#Jb@_S zvo3MwL5bx3^b6vGY>7N%z`+VhCF2`CLA{p(t9DT%Ec5s&Lc=jicH=>{4|;R^9{CB| zbS?PBn#*@JSvX{r#yL3;Kx`OAxHEZBLHE;sMhqiy#~`Y1l_`3p5OwHaQUmTp;)f1+ zaYy!E+>YBPR7Gd+Ls_W$(fvSHJn6wZ3pnn0?!Sso^aZjT>{kj9Yv|x>)+S&pGy9#S zaNiD-Yl=bYH2`s|7u{vkSxX~G$kYZc`9WIFMNg_8H=1(Z*|8PNGTnJ>(c`ZZ=DEN| z*h3wca5ah1KtXrwP@JBlcNTv7#F;ntKY3I_jAE0?^YNGJipBHgIH;LvSGnk2*sp^K z?CJ7_z5uHROw?f%i1>CKl8EDQ$O&JgE=H7`FH??QN>5TZ z+cgeg{pNXQrloRfStTkK=;^@fZA6pk_aNdy=60arQ304~!zoQPE$Oq|IWrQWLu`dx zlm%anIvp>sgsNU}6tC5B!Y*lu^TcwT5^q!-6H3wA3t~#FsOGbk9OmZxYktP@evI}f zn-4)NPxSGgH}ov_!|#rqo=&GCxN_RroX~>1?JaJ7r7{q@HuyI$b#+5+KN|C^F=rq( zr+bqMZdB`-boNoU4pEiYAAJBXET;JmOx1XffYZa?+ZWD%I&Ac$AcV_%UHEZ2c?8j& zQmz}xoou5%T1zSHXycp#2)#R)^pL{5Y`6})t6Fvj&%h3&j`0strljQsNyMulGpxaSW2!BMs27fXe`Yjf8$*lv|J}iqGHMjU`x;_Yg;8xjw7C);4P6)O3X^m1eZzX7%in+~0g@{n8Pa~;?f2x8`{1?jkp zA|MYwQt+K>Eo4l;5S!{TMCvvU#~<4_@nn+w^^M#_ooJO`i$K6{MuW40Xg)nVH7vvQk7Hd9mm)&M^7 zROC~&{Ifo@ZvV>AuPw|KcF%%1`ZiS7N=GP(n_SKsS+<6fG_v=F;x+R{mm+n{S_=s7 zqm~JSW&O?a?x#+8NNoq&-oX07e)Asy*jZqhOif z&AL8 zN!uI?#Vvcyxn58?qC6$*6}%|qX8;P-CgmZi=BSAMj=*vd*)~+tJBxCeZpuj&K+oZj zU^N|OjTbZXhG-nD3a*ZYvS|Qmhd^Aso|Nlw*9#A^dOY;i)6MkYfbd-io(jGfQib(e zP$^z4-;c|T`-|c+0#7vz28PfGz`Bglk-5BFDGLgG9Gw>b#npTPU^+T?jrD-CniM;c zt3k|;zJZfR3Ro)yB>M-3%cuVin(6`+1%X^kWJlhL0g&A4J~KB z3I#^x5V-4|v}k$`Ss{c>*AeZ)3Fk63{00oM-}SG<6NmZF0&fDrKoHi~fg&i{mf(wH z&NgC;eZJevO0l~+`PxRdS<=f?6x-mu#xrdH@5N*PcHHUbG4-?Cxue-XLXE4)CyyIWn0cR0ESP(}tnn+i1z_)7s_1g4ijjK`>Gj6@SS6R6iR2Yn>H`GkW| z<@{4%t{vnNS~F^(!;Z8=@USB<07r#%>@XwnhxFPrYjy=AM7|*1+!6vOXLWO(Vl6cT zbr^8D!IVI<(hKTeJx0ZGfOI+tPvOvQ&)q;Pj$8Sp@dPtwbsngNKv9eVie>0(8xrrb zM-aB=76R}SIY-i`g$ate1~(e$#4vtj=#GRFz3S{fW|Nrp@qExi^|5_&uca=sLN;8K ziMy-+Aye8Rt`KEH&)42-2~=x3^~hPBkW`Q3Ol7zZYkVK*UR)gn$c z!?tCqyC-Utv%6yCrIQ`8qf{telnz0G7l`1#E%w69kXnFx@Vi6#>Y@tCAgp%CLz93l z;y*h71|y3UnPz3`OC<5I_*neF^&EE4C$(qUNWNyD2oNcK1F(N{i-PSWo8qjZ2qrvo zbL&^eLE!bJ*O&$zw+3vjGlX>eTI%)5oke=!j|s5YI2 z%bKO(Y7Pd2urfH8vG1Kt$6J6W1OWOiol{<-$)G9hpoJ|hcJGss%^iWTkst+|S9ohd zCQAXy^pgJW17)G7>zkW9%s}o-Kat%j${Q`E$}s{uuyG0*D06>ypJXd=hMo3SheO~l zmYX^sBJ!s-jR65+fFJYAMV#x}Gle+!mS^j7IoYrP{X(46+#L)pLN0??pqS)uU1-*B zBrmkiS{wt+U-d$kVsX7TRCo8$lXMN;v9_Idp;m9p#gt_BzMsA*Diba~dO-%mtNLv< z#C(d{kt-Nno6Bl65sA+ZHx)aKMEO(FAfX}@o*>jZzQ~(s$N{3Pp3fdlDujx4t*J0e-~t{wxF@IVCq1`K{;4DuY=`}A{oE`t4mLPf zI3iZ7qQ$_m7ykUd$Hxzor=me27#+hvFwsY|gJm)(I8F)D{M(7K+unAy` zPGnK{LC2BnQT@S=Z`&Ev-O!Wn3Z(eN9(tX{-VK+Ce%%JWZjRk z%dkGB>Sr|vr0&TP#(&Il<8jGB8-t!G9`o#VBTo)yd-psEV+DL_1R0%tcnx%I{!<(4 zp6Ds7iKnz!yX0xpRquVq-<)6nzylvX;NO(6NC{TJk{y1F=^W7+BCIlH&{QU?l$}ZS zsL#2B-fMRo1zI2C$QP;F&-Ts_zTf*nc>WCyL`TBEVUdff+dLgE85MyK4d>|K=)}y% z4-(y6{^`NO68sb2Hqz`TY>`YCCBx13n0t5e3{hEhqsWaz!#BN4I3UN32)etP ztBMdJCO;`k#PxzT^7@K+ub8t*(C2=dnDr8Uc);D8#-5cHGQwB18EXAGJ3~^+*d~l7 z?+hND^ZG5E?LZTrfDDp(O8kS(r5WrHVh5p0*1_tCdX;4r!Btb(kO~pUFs&~u;>Lo!J7_8q*tk{gY@)Jlj_h;kH04j+is1Th zbuS0ediUZ(qhY_iF9KG;MKh?A6q1sbxU3EqNP4R>6CsP$u%AuR-UK5^}XBNffh;fesnmd^njT0T6Lv z-@i?Zq71*A1|4dHGrooU!Kb^)pZj>9u&(pYB14>>CrM>ttp>eLg7JCea+k=!g!}*M4OMLX!CysdZ}=+H0jI&eyH*B*^FLF zK26ecKgZb&ab=QEHGCKfb$D5>$|=E6ccsl<+-syOBnlTa8R{9~!%LQ8LR#2}kobe$ zZV_rl5l2CqNUap8T*B8yRaP-drtJY*Gy4HZSz;}8+nx}cT`Vje?J%tj8Oeu8?z;Hj zzIV45v($dT^9PnSc+V1eizI6P$(LdX4z6>)H?k}Is6KEZ= zLS6{Z(YMvz#oXM3L&+E!8rJ|(EGh&K3$PbLp+ua9yiV*A3!4@+WtcW2ih+-TPiC8W z_?N&L`C$Q2BE%*Vii%*Pm=@n8ijvt*kx%w5f?Uq5bA<~O)hvoLv#Mr&DY6Py_-H<< zrWAl06hEsfBA?f*)uP$??6a$Sb-lhAm_7OI!iXP}^6E1{AO38yo=!je{PV}~)C6rn zb}&FCr{}l*WOU+f*y=EBpaR=aE<&#er78Q+x5$A^Wn084ZY*Oaiz=$lJjx)kr78Ll zDy6y0#f&T|K-=(4?D`-jC&$CHgX5>JM;Alov#tyU0Ah|afKZTs#!HEEG+_b`Ht!%} zDVTqu(8B2A!N9pH1n-OeIX-<={(Q?PIS5#|w zE_6=g(MW`dFii}V0(czcqJ^jcbTBjS4dk3Kt=Ke2v9I=PV9SQ!DP#tCRdvCM$W-WU zp#^Z)?f9guioN8`|Vnb5q%~8H~fx`I) zPDaPf@EtIlN@e4uhNZQTS}UoDwuXo9-Y0 z#fH!k^!C&hb50w<8=tgIwz+L&t=(CBCE{)lq?jdpGEhV{H7@tyIaY?H`wY ze|9?vb0>U`gJZUxgtzoz@jWIcLNbQBElgO1j}T@vswL|X;dz1Il6lPTAf}}$BBd>v zOdASWQtw{ZzImJPzvybSS9%M6nHPyo9NNOqdbf77MuvNh+2};m&~iVeENvr}jy2TC zXNqj*;j!a0vFD#b|4cCZ81awvca0-VQ}E%a6Xqm4bo$}dlbzSWq4*kIe;P*2@U7i~ z{fT$ca3@48!SjAb$&rG#xHslyR4Amv6mcNysrfDIMYjj~Y@NYp!^X>!;%#;gm*B1r zV+^G`(xFq2ALZ22R{l!o);69zxu*yF2Z!GsxcCm0cr_005M(HHr@$0H-Fi?BB%N;0 z4>WMjrz6VGGn?Pq2MvjlKpkOZ1w|A9ll2v$D8XMPN342ZFn_N3N>bexh3G*AncvM} zGt3`lla&UR$|E<-w-81lAtuup%^t5#zKm*w13sufI3$ zwYgL+R!tnTPxLF z)qZL)vAQE#NO6soKjf>(B5z;gVi~#tz$fi-k?J98JY6+eEj)8wDb;s#*Q&sabv>Qv zllN(G^yDTfI--!xk?vcYsVmb1pap-($&q+IS~v}+yNw2I53K(<7VqVy%8t{SNR9P|t+cu(%QwixM zN&fl)qtSN^h8iLqXG>iHk5smFCwZp^L+}e4Uw7jppN{5+^JN*j@SK0d(*)QBzr$Gy z4%0RVC~_UuyPHZe_D9`tJxEVRAr_)tMnO@Ikr`38qcCK^V1WQ|Buf@;b;*uBG6I5f zZFM2pMpQ_+jM!?VTr!O13=p8419t!>`P~uY;5;+(zEToPp!s)4d0!F9=<4Bnl0@PB zQN*vN3Q&n{>#51m?b$+*8Qx2h8G3+>n}m=YVB=7>b+`EWm*6WbjtKyX2f{c(HZ1urLCi54`Amg@B*WF+fe zA%lta*UHUeJl;(Ws&8(@ZZMOWCC#L`LM}0v{%SmiwsrbC_@vTWnSD_?t;}vRLWkyO znPvr&Xs++{OoM5Lk(Vy-YtV})<^}uxFQGZN(jOux%f3VmSDn!O?V0}Ti}k8P{hkj^fGFYfm6e%=0!A`uOm4Qt{6W5|=yg~!zEV_-<9DcmdmZ@k}L zVw<4(6s7tYd;5X=s_pI}mk?dF@SdgowSD>G>EUUscp)EG`m4i(=TA3gy!N*;)(W@v zl9xv0<4YwWhp{=L%^jjG#S_2d^wmh*pbj7KM8r@`i`I*YX|JmXF)&X@z3Re}g)GGT zY@{oBba4Ll$#%QA{SI!kdmp-U_ujRSvtz=#=QhVH45BEJMM|Zb&>3(4eEQpPAKI?R z{=PKc^&AT*&O_ERkMuq#nU(OoA{l8tvAW!|gY!*ib-Qz_>`))33c26WwdP_M_H=w= zZsF0F2lm>+;}H5}zIyyRjNv84H>0?_MC|a1^~t~Xb%}W=86Y_NBEQFYSD7< zv$bT*PcrA+e&9z1@xxu4iw~>AUG@YTLqd~H;iRgc)}}?Oxp_fpfCDuRZ*pcJ*R4p8 z11PZ%g0@$$S+xU14p#FslzIGW%W9FR_fYKLnw#LpItrx8NWRuetK~YS_}Yh`BE8oh zWdXx4N5wMIg5eih3Dy9<;+W0&4Fe)Ra3dqD`@n0~I%lE%9V0*i%b3xOXEosZL2MfU z1}~Qu_RxG@lKWTRYmSZhs;06$)MyAao3I|Qz*YLf!MLs@$&dg)lB0tkmpA(H?ajyo z-!DaO>=)JGYG73pBXJouH3k6tI+&KTs{w6pkcl^;1ibudw#idqXv&xiZ~ps1nlWvi z->Arm&GW;^NdShGaswE~mi(~4`7a4)Hfuaua4NJK6KdU&9%@JRE zt_079BFo+DXLDma-r%QoZHCO?ypr*3K(D6Z>7i;4#Kk{grw3m>KiEG%JUQNiABMun z4u7-v-QMu|-tn{H{`0*v6y-J>3ccgDogw*t2TR{I@SI>iw)moWLSA&_()swcV}#FM z?(auvt-R$Q^)Lp-|YIyC3ItvS@Sv zL;0cNntv{SpTldVy;o0EQPQ~#&wqGvkOflOB+rv9ZVCx^eUyJYJLO11Ydf61-fnr!}-%Mt`+cw5#O>UmSYgRQ?VmtmY0 zl1)nuVn%!$zsuo2tK;QxE6>ZwBkbh(?ELiQ{`pC}SdtXLe>Kmr`-vUz?-~{nYZf6oC%ro_-SfXha%hEl6xQ}8=0u0Us3y(^(P zP7Ma{#CLUdD{+i(&Ck^O7hil)%+2z_Uk2=))&m(CbyTLN6K?2RTWe`{V}2u5trBXz zs;7ff{4UXu`c`tMxKwyD$hwi|OQs1DHVgH~QJuP=D*H zK^uX>9P0t^6-kdzSfqezHdV^5o+_7vOb}xPIzL8vah-P_TUE~i+_`2KVPcz{zb=A zG4o2~l+66Kr)CCfN=jCWb?07`!dXi`yf9%Jo}9kecKCq?Zy++>N7gdDJpT3=030@{ z8Dv;#O@epMelul*+OE(v%t{5*$Nsl_&kmBuOA}3=Ub{?7^V2+VON_RR0EPXu*|bH* zXYGO37ku@>)Ek;cTOBwrc&Y6Pe(^IL)3^iZz?s7Eh{ zw3kWEJu0X)<|jv--(BjQz#!Va{H|KRAj22F#SK8e zn&laS#7UOA-J(lp?A7bu#;5zfmN!=l*6{G)`|W&$vQ$2j)!U}_>hbF+7~)SNGq;=Q z5UEA=mA!6;5eLz4ORu;;rn7YZ*e0~hFkQ3lefjap&TDg$+BnUO4Ks71p1ihJ&EJJN z)vAws9MuemmSpGNkDK%DK$z@(df5j=iu-M1IvNcFgBPu!JD>bMrmivb$b`L7uyw$Kc@c`a2t;<)iruhsU75nUuRP@pFI!GIBWy1vPB!$m zG`oj+d`k*HC!o-aYIR#7UK2A;-yLc0wp3B2pB7QmqD}*TT4M;776(8TqA*r&*`h1i z#t}WhOmYbh%{62bqNPRs%Xa;8+be}bFu)dHP=moxa)Z!H$RQoF`wm7%GWP~^#%*mo z4$wo{e*IqQh@it8+I(0tVwSPbQp^J(0PV6&N`i?NgtJ_4xe_9SHR}tmICug1AZ!>m z*Z~wGCdqXI6ydeYEd_hs2Mz=6QVh1~k0m&ci)Do*%w-JlI_}mp)=naW1E9Br`0T)@ z#%_dCOGyKCc_g{jk*I2*wCE}g{Xt|ZL%{r@k`Jx<5wd(!hwe0cHzH6VkF5T2GCwz6 zb$cVc?w%7AwPH)E=2R`~NCmiU%W_gZnqOX8+JL6MSq*#<>GmOG$b7~5yCIFix&gkX zMe2>1yif#Yq1RDS@^ke4B^J2Ka1!6a9CUi`AI`rvY|V?)lfVCv$n}jc`%vBuzSG()YN1_bz zdiJtZ6mNm=Uqj*+vW756FQ+TU>dY=kI3Tan(LUEBikTRr8drKusSPU_D!Mpg;9K_d zC0tE7BTm&XWxaMzz(x4BZq!yckPNUxScYN|iq|2#*E^z!T7u-!tcXB(kAfj^-XQP` zz*AgO0#r8lNKO>Eh++5o_;6a1d(dUC_cVex&qBy zAmd?XbQ`4Qyak;35p!r2;)6CW=bp2GK*tWIG zk&6V#G`t$oJF+9FCYaC5P=95}_`7+i*L}mweX`-}u{3$k6c~%x6Bjcat0+wE#G&&dwo`|){6i%$!{OBHUu+o0cfE10@7;z4T9N* zr!)i@YDt)qyO-3k`?lMQTHETpCR$D2tnP*;!$YE(6|iooU*kNB!LkRoi|DZOS*Ofu z-Q>YmJbPwkBmHq;dCLycp0CU>)%gsSV%zXG1I??DMEU}5#EbcS+U@nXY9gn4iqA-% zrA;rZ4oDm<*12S3*=8+d_3`;B$asgx=fjgPzd6`HA09r?&AB@u0{rL6$Nbt06KIC5078G zJRk0#9DjFkdaknNYgX#c zH+CnfRL%}g%^Kgk$G6~=Dqb3irzGdhup2v)#p$-{Nut={)B6Nwd5{50zojcGU`lLL zJ%0f)PA9$t55KcJBcI<4I1KT;zNwDt>9lU*G;^8UG4w`CdZlcFVHhA9OB~;spy5^ANToF|;HXs$ zU3`jbNg2IFq0E;Haf)GhqMBj9Y0y8Ts2J*JU~nbmLIx}PovqttxKWr1`lQ?t8HU_y zvr3Bjx;KA1nM^S*Zlg^)@*q0iSA)8{%D$v>w~6uOWrdq8dO2%oO}IH^2tg~~@9k(ZC3ye?O{;Nj{7IRbI{z${|wA_6ZmWEcce z8)^tSy2x+8F$|Y0P|cdPVdvag6s;-8G~AREjGKU)j}@$?9rW7Mdc1NOtySm)e8BKF zBPSF%Yp%6ce|xm^)!0*r2p@)N9^Tlj@M*8 zMx>_^Rs?rB^L6s7>Dle!qW#SUeKAZj`t8^PUyQuIWi7IN5QL|yT?1dTATl-bS_it0 zBX1}pAkmGLX{v-3GW6LtsPv-YHQzfu4N_I6FCv$tYkuz=?v4wGRZ2-TI><2LT~V9u zKd=&3x*NA{>%`Oi>+Y`^Y>`6>*n)uQWU-R(7k1crxTP}xo|zgACD>f4TjwbUZIX1o zep1rsfV^YO-wgaBUyR}l84Pp>#GDbZZzG;Wl(0jx-g#S!`9qsz_Ry~B`3^nNr+36p zP!_!=^LS-A=5RB^55iSu$9_)<#~h*lZ%NG1oL z$G%6rb>e7d`0slk)&GyqVos_}M=*kE8&mS4GJ#h;A^+Z6IE-bqfATYK&n~8eegA{& zva5fjL#vzdoP2+av#e9Xhn71v7+)~?Q=WKApZ(`J?y@T%cF+x1oz;=H@(tlZ=!&00 zHR*{#hm@wIJ}`UOl0v8DJ!##sB)aB@7-H?^PkC*=sj5ZBk0Q`BRqu1X8d0OhLH z=&GA;LQ_*<3t0;Cr~D6zJwWPS;KSHq5rP&FT%pfo`nfjT%eCIw!Sn3BuNv`YE)^F#!p6 z%*mzn`rq1t2fG-g8q9mU_#Qx+yO#3Z+2USe5Ygrxh}%xTvFaYG+(Lb=Z}IQVQTc|R zT2e^xEKOw>)r@8f4zkrYEO(1x@KlS=p@klGn4h^EWaOa!x>Nb$wG~KTG;%HJ=wZ0GU8Db?>pp z{Ky1dz(=cPs0q@`Bqk2eu3q5o<+7@JMG(~+bfoqyr9}(Tj^G3!P%!QP+tLWbi*g0( zPIfpq>tyIWYQG5ET~LC9cqP-}Q~CxTBSbx3AjZwbe7&-eN8m{?i%$u!is^pG3wO3! zDyXR7#V#r5`1o3PZQZb4+8Bs`{Ro;wBD9Ut*;*8BUio&vTVr7FeXmb}KQb5*VW z@sCfxn$J7taNH?%!_l4jV;_D}|9)8#W=urGFA#>^l)zwd86X_Pn{qY2HUlfKN;n>5 z`I)V1$Yx$&%}Uc>u6H|~KHYo! z8<3*;V%6-vdhI`;le!wtXNJkIZG?=3Dh(U+I<8YOE4qXcub5&@N<+P8_5Z1c@;y^7 z^7F%?z>mQb&UQ@D*RwC+@M<|bfRPO@S$|B}=?u`7+PPOir zFdYC@2A2cZ_Kae9ftjXv03UzoARR>l2Z*9a(1vM7Or6A>2izy}+MuGrMQRq+m`DVo zn1T4cxUO!%cq!$P(nnk_A6*_q2?o+Ra~Pk01K*l%Pk{D0?w_4G%y<3^R5tXKBYy*7 zdS!M`_yjSNj-db|JBPbiH?MVh*6r)w|WQw82v=h!ANk%2d$} zElsX_=Fz~d(C(*87(TKENAAh-uOdhVBN^0a(2Jh}OLZcd3kN61Uv4EnFQyhUg8?6A zwvIPZN<+I1<@YJx)UX-RiUmY&2O3^hgcn5LxXip*!hhw>4ZsN1E>nm=y1*LHyBGFZ z%$vcadOI@@>%aw^VF2o4K%nWGyM_&-r=AUap;^z#Hs^eTBs(O8cyejyESsuuLU!|1^ zzARXTJl?9e7{)6u!NC!Z@QC0{16LwaRyiZNFj$tR=+a{Md7D~a?&z`a6*u+OHFC%t zMY*V!(>oG@CjqC+Zpu)0O=yl7k3Bcf^$juF^^C%j#f$LYFmuD#Cr1aLUDUJBe#96{->=UVK_-;Kxj9o7o=EML zNIXnT%QI|D!0y8Rsh!)Q@E|^S%Vz-V^M&42obI7uYf1C~JZu$Y; z4j=+>fz7&@Lo_w~exC(Rkn1igcd(G#Y39Lg7T_XSp!xdhI@%t^MFu+}&Y;jegzAE9 zM{GO2ndF-4^93`E83aL)c?k-ML`{Y0g|q}AXXJLWBQCF;Q~)_0*i#Xp@abpoBuR?` zu8QGy(6t3^3Cm%6F{7R>Z>%Oy(ZSNM_0z>GrFJDyU=oMzhqH+zNcmn2vd@HQbO(*F zF^VEpwO9u31;uWwq(sr6%x5b3$O%V%&6niqCdfp=@BK(53JGocQE-w{{3k6p*`Kx) zopN9J)wN`$ekS?K{NLsigf>W)%LrWdCoh;t-#M=$N9GP);4Ov#K^868NTq#^kdmk- z6!z+K&El_uf9D1wtmLkTt>d+t2oQ@y$x+Cr{*y+i){hD?R-^8s%h;iFe2TOaAvfpma+~l$bKQTk(jT0B9xHV za;T)a5AH}gnJi(Or1;*`r>6&JXZMV}h~@D4ROIma)Y}xG@=Mb$Yzgy<%(wY0%zcHRetG&!$vjM7AFkKQI$reM| z#|dn|7b=6?6aJSk8T==TAg6%uO;O5m3^+HvMO`lGS8tjr*z#7@yKTBxrNiN8(BL!? z+%0aa^3Afk9DHy7J*%`~>|ybIZceVGx`ceI%^ZX!^UT+X;t3eCKmieeR#8NJ?Y&48 zz|#d+_xS|zuBqX*Kl_^$9`r&N7JOat)FpoU}<*PAQCG;Lmjm zGuSOoVJ4w>L){F;;A)Ea359h5;GBnEDNE8RpeNzI;R4u#%b{(OO0C7lPr~D|Eiowo zVg)FvfatrPb$bI%JIc!;dSvgi%g+D;5zVne-xhrw`|eajT50pY;Mmd8qE=jU^iP+& z>C2zbPWHbYo}Hf_>>cT41-Dw@y|~YA+_Ed3_7d_64~&33>1L-90?dB?XtiE2zfdpU z{%61VcmFRwbPOz!+%Ugevxe{!7(|jGQ{d8G0oKaU-F0Y9FKVhZlN2oa#1#!4pFrFS z_{^IIvR-xe*nNy##+Ej%#L6ZHaAK2vdpdDmQFBVglj!HizwG?{eGi!8C)UL)*5 zW$$m-3@``AsbkxqT)(J1l!~MB?xGSXUQgI6@9_;vyt2JeiJ$(P0J*h7dJXWJ-z(} z<6vi?sxd!wz1%5iVVPdfn^ndgGBba9o$V3;_01A{HwWZ>P-TqbihCDDI#Fz3QFt#x z;0J@gD9}j2m&!pK4T?S8R47Jiyij&Yfx(Hg51MtppEVRN0I=hT}V4;^oqJOVRqjT+Rh*< zDn$1NyeKCXa7c+)(|?hAy>QS}l(rQ^VA7B$eJ+ZWvge(`0tgr$e#mkckHCM}=+yxU zBHW2ocFuAFdY66y(CqXRK4R5`2rjq)(LZTtTstzzB7px-7Td^dqjW^&w!m+l@7nhA^o6o5kj0OOvxK4UA zY`uH*-aZ8-!E8NDyJD{H;C5Lqch2NRj0e#j5D8=RX#thYqJ$LR!2`N5O4Ce$!)o%>K5|AR!hKt!?Un$Y z^v(q5H@zP5)*v}=*%;akEVY}sU6C-c^gpq@h#~vHCB8pq8h#)Iy9+;k4s++H~|{9#(}zkawfw(o-=GpaD8aKlA_fm<2D1uo*Rfv2ErrFvJ5*W}x>H$}h? zyb-setXs|5#iR;6f?N~J3edqyBeTLx$@k>8$`q!C1-E33*uHQR(Vq`@Zw;4ILMEWa zSdM^C5QZn8oH7$_=52-B$^{Q$o1)LJ;z-b@@=3L1giQpW?MXwDX}>RhfRB2IqA(!`}6klu1JKjru6|UG~-mJF1x1j-7J#wUY^HRaYrABPE ztr~@44+Jp?-)T4C`Y|oZ96Ql#DHc`aloL!e}oloB|)77!v6PquXY|i zc^x&uXUm&Gn!Jk>&-RncJnrcKo52Dwb&Jcgo;DO+bXRz;^!gL7`DD(w7@c?W(lD=C zUy+o)rXl?zkp#h21_UVWS0|tO#gCL`^}Tmua|He=oy&>ccgPZFolGE&?9We93RU>9 zQq|6$#@mahX?B6E;sZxrlrMpXVX!txa#E+HQR5O$%MW^+7em4eYE}myiCdlUOGTI}RuGZ142Ty=Mnz zHqRm3=QZ;Yw)-%su6+aK->B0HS)8pZh`Ps~Hn+3{gjzqaZ z7+^1FZ)PYPyF+aauYy*Uo{S+S)h7_Ow|62mRHSOP^z->dgn(1X*{lhdu2Qz4@6c@E79U{raXBJm_vC+!m>5Rm?i;)}y>wGy3S`q@=$`@>@Y$8Xn?!2chz39g4ur;MS(;&c z1x$uCf@G4#w!F$o2|MUA5~+iCAQriU1{gY;idb%r1&okg^M#N2D-l_|Z-nRaItmvy z*7YkrZdyPNm@-e-A1Qm?t4YU2-cDLBFQYQ^12E=#=47-a#I+pqXC*Zq_lOEuHJX#DGz}m8fO&8+dN|57+=z*c!C(WuvO9tTJq#5XP6dEW~kZkv2X2ow2asvvPMh}d>N|BKJI1mhY;T*Bnaxf6F~?GfxREE+`R$C;61Mzd*?yL zzbIhSo%k-MV2wL>kk1@yCyhkC#El-5hHe^~iNE;;-edT(GuX6M2Zs565^14>BBO%R z`!uiw@9IkBtF^HL833C-ta3;E3SS-?1~!49BR3cu#~8^N0(e=DRfvJKaZQ(6P`tSv z@Tr+lj>!?3C`v|pp|7Nsk?U*3?rbTBJ!xhn73G+Z*j zT~B0zixr&pw$^M*)`X-62?{!ZfqMT1FcmD93%%&p%_WcT=3XK7<+g>frR?c>iE{{=*B-<}@vDE+%C`F5k$+hmm6{ zLshm^&kPK}R(|;Wo8j5Fhc85jb$D`oCcOU~a@4)8mk7;*oQM0%t7OgBW>szhR!^G1 zQs<{HSrKNa9cI`t>)J`;(HoBN{{c`-0|XQR000O8+I44C0pyj}(+vOsaYq0E7yujq zaCtOpFKS_SVP9i!Y;0v?bZKvHb1rasRa6N8152DGVzzZLVs&^6009K{0RR956aWAK z?Ob1P+c*+`_ov`FFVfa^lXf2tK~o@`b??$k(jayhD7uA8%XF+(mVBa|#8>QhznP&( zk^18}aoT2q8YpU$GsEF<=EoUQ$_{(_%TpGnlPI~|XYWR@pZ*hDI^Ax!bDB<;v0#~y zi!_nZ7`{)_42Y8}k>$X^-ta39zvEbhd6XtBjJcFTvOHzyt9*tnrn6IY_St9_Nj6u) z;aj@EZXUDoGG}qhCt||JD>e~RzKrvJr_(vg1ka%r^Gj}GZBmViv7+nMZzOVQYOk)!Mwkc5sItIRktFJ$-AO&k!ELCAjW2U+K{>WZsTH!>oJs{Z!1y z0wi3DJeUiRI#KOM=vk#K&A!OeJO#5cF4cEyIMe4$?0og>8(IoA2F!iVlS#0Eh*2$> zfZ#}iD;_Vwwwbt$B-kQ|lEo6qDcC_n5@veSW$twQ5T+Cir2H|4Ri0*EIa^1Z&3A7Kpo(UDkMnVW^lcWGj$)(7OX3(KQ4+Y?QKb1T&7KsR&mgU=YAq zj+kqk8Rv=qS=tyG7n!(%@hoYVp<)-+#VBTv&Y<+Srl?$slr4!-&nh99l6C{^W6J&6 zb%qXtBYwi>Pr5xylxJu?WTA-h4DA(t8p$siB`hQW&4e5wn!1K3c&8Us!1!#=?r*RkZlDCA=k;~xYS)7p|HX^wMI%}PaCTBKa$qU%G2 z|0Aq<2^^lo*x(^OYUs6)Sstev+WgM!FAYoFuBulGD7FTJMipsjIoGtnOC*7k7r?af zi-R)&C7una?v~L8Ilo7=<-lfY;SG=!{&X0}kdqWz0LrFPNpaMIUdr86Hg_f>wD+dj@+nEu*h*fj^l1vY~ZFX5_?=jjzEGfheyM$-;+CpNH ztc8xgjQ%G)hh-w-HEI)6CNRyaX?!bC^??I&2G8$nfN_V0tuE(LA~!IZCjF0C)h(Cb zqDBB4@&aibwxX@VM+ploxQhbaue*Qkv#)!9b^DO7&w1`4xA(=z-5tJM`MyJsN&OdC zAVejT2C1R^s*2sOUqSj`rRXkrSO0SwB_6T)ZX$pNZ3s}z&02yJx~!;#Uw4Q~@XGd} z41&Sw`RIKx9E@<+1X}OIN54x5y^?)MV| zpXrp{rbmwsXB;XStC||~@CzQ~SZ6qqB)tko%$SkWU@VovFm}k7QYi%kK}1ly1UQaY zjE9u#EwX4H<JVEg%6VMZZh-=VM7}1j=cZ zKpH%$3o4avWm^Mb3hj>35(@9SsxHR{mV(cLqh;t*-DVR)O3;`sCE_~3DIVy^a*a+& zlcp_BEP$2@`u^d?#o_zl=N-EsN)KRYt-|@?#mG?dL|%)`7{c|-z7#M-A%$LeE!1|#AD$c^4Iak)2o%cgFiTZ>y1u$@fr#_(xIAmB z9SH77!UmCZ0BTiZ(OOnE$ih~PFsNVr@(fb>ji{i4vUCrMaqk!HAHIt*RW%|9kLz+B zcyQvT4S`0mJrn?>HXuE#Ie;7PgfOJIgsi(qyA>qZra-A}`e+8w0k`iUfNywG5d?lU z3yDd+0FPIlm7Qf&-Qc#a7f>9ETX8S$En3{6xa-2*S-5*~Dem5dyA^jYZpB@TEZh#; z?mm0(d#)rS8Ofg~^P6L4j3n=yZ{m}yKxXN%PnPG!EM4jTKpSuSWs^It;_IdMHLIwl zmGJJJ%K&{vs|}$l$MCq+=8N6rU06z1XW91k)3eh`YvLEHjY0zli>w_WNb|Rxc!0V@fR%4Nqj3#TtyI(r9M-1I@HIH z=Y#J(sUajnHV!>s@~k!LLi>3Mo{|-RYd+Y)OVI|^!NV60!V{^7ctRh~dcR*#E?G`- zKFj(|{MO*4&68wMzxVw|JMK?EbPyr7<-&6~D2_ghD}EPK<_j26nA5w0z56J*40?+R z2db;{?f2WNI5E8G{znWnFAfzk>J3P*swoWI4cMh?~WM?gL^&}8urSS>Q1`W`D`9QK__ByVHXWEhN zb+A^i>mX~bkj4d+ssOo!YgA|XhNNA?rJNl9+LSdVer2)cCXWY2baK@^|W7E2Jbn_{b-Auyw%io32 z;7>8a3^ewJ1LnnySucP0epkgNu}DX2BPGO{&pbU!$ffa_Lr@Fxv(S5RxTZ5;WZiB# z7E_ci>hoNozd&`v9Efa!9XJ9lhepNL>G!flZ`W$TOL$t}H8G~@L@s)RiRj>YJ{p_; zF+OHxy0D9|`ef9junmNne+ z^=HQy9EU4TdtwYD9NlaqUCJRslO#+U5Hov!;+MVf%3tM|diC%J1N)d1`4KnNzLfgB zP74&9qpgn$RTylSY`AVsK5h}~h+&PEom<=_;3mTP)ruz#AIkDe5Cj5@yEr&qn%cA= zwCIDC2>I}!M-F>oEYWI{m8O@YlJsrhNBevPnf`ZOi+Q-i#^!tWHM7;z>=`%O6=oM( zQ5AZw#|28gEmvIE6iua1j2Fp~(;uP+W6dJYI@v2>@Tje?u-w3E zc2(Y9Hc4ucm@Dv#Ek_<%#a`GevH{e|3~<~QLGftYi**Px7?CbY+7BCy^7J4HWQhde zp!OBm*y?6>X{?N<^$*?Y0|(-xiO;HWme?&w+s}eakH!P}6_jz@RtFzzUbL|e?_`=j zw-(#FJ;CMeRZ~Gg`u@*Y-+5N8+&8bWia8)NoF58vsE!(V-Kx?$n|U1=B*x6)E$lhq z3}QL}CE2|{UrW%hmA%{wR}u6m(H4}6C z8Ys)hB-&;BgIb-ul!5@e-x5boo-oMOdyX3D;-f9~^Wt_X+LwPF(&}cozDJy~XWn0S z*#@B>f~y)t5%(=z&%RcEB-Rqq&OS)7zAiRR)P&J4%T6DhbcU}#HnM9mBT=@r))!SD z{bjJH^!xR53dg}&qXE}Uz7%D^+7G9g5J&@0s-opKZ@^wjVSa8!nidv;^H%!{=;k+2 zk^=)QIi0aio?PcbP*zH0?9XoN!GG81*3G1K#3s(NZy*L;U=ACNp_viyyZ*b}NMpUw z%?Ac_##Bx0WOz1)F`+Pz>ZqK(SYZNr-l;HLCgx#kvxEgg?2h7<~7w@y9(t0RuFjisrbi?gW{ z=%4-JMa9^R8eQ3pT-}iYQ15P_0DyPcZ_Ry91+1O7KP9X`GgM!O<$<`sW7IkgMf3*W zC+FpU9@muEDC{{cVavN}%qm)pSD9!n#hYO;53Kbzc?p>ux% z@hQ*JzdNtGCw~|5OL7J< zR+7S?>%5_G0vgVGx!i7-h!4EfA*ggr{tF?Rwi8kR^0f5^D+cx^4eGMPdi?--;v$|-)L?#eK48e=$30sqrPoC9-7UIhn0w~Gtj zFQ-T1v3AyMIpZA+iSEOYV{fC|$@bJ$ZE6xCey;mrc3W#oMtmrzqq!xeJLSAuFrEUZ zsbkLJkvM*K9R#1Yy5N*&*0B6^zH;}uOWZS~;jJZ`Mb~h)|77QUi_MId5*Mq&%hd0R z6_0Dhm{MUF1*Fv2a>*@JQEqyt4fZ{R>+qE=UPqdg+Lw>VZd#}&@R?l2CC@{)V*m(1 zO^cmoy}Zuz3+CX=Lf9XLs_EYC?a0Gj)7y>J`+giZE(PC?hl?jhA2-LB9KoyO%G;?a z$f}yB>m|tZg+x1>ghbwtUr-=h|Db3~ZaR%Xz;@ZHA%Qi(1D3l-tz@aSoe)vT%qMYm zL={8et^GjBc(rKD06V`ug9TM>Xs3=U+aIEDKUbd4Z!PsS{OzAS;c9i}ZjYqv-d#xT zCoDbTN^3Bskl`O9>7TQo(+!=sGS9?n1w4g0ya%qazIrEbjv)MYu(wCkQkvfqy4u0_ z%$cq;5lY-K<*Ikv(n|3QmUf>Hfk zzZC5Y@r3Jt&%Ay7;p)EEY|h^kg4`=KBD)gc9sy#<`bb}=(gcnc*kmhP1* z@wwyztlf>^%lFIsH$(`M&lwz(Lnt)qb1s1!9o3{D^<*w%CEQIBwsS>3hG~v|YV1nz z_)~xN3GP)k{}~^~DmdO9^+CDS8a{(uqk@}m{}QVP_%0-OZ9txUm;P`!HxEdvDw<^^ z4XsuX4oCkYniask5cn7m3k$TbBjWsIL0QA@Gb_vLX!u$E*N1YrA?BU{lE^1q#&`Io z*6JbUJ(YtCp1E<3SzPRR=2txUBYR|ucBLrqiBM^Uj#UpSQu?3#6?vUDrgWw^;fIRA zJaX7(bKv(P(1^$NrL`Z!i|MLG83Y^P&DA1KWjVDDm6^*t69Z!cUJk4lMSo8iRV#X` z6apLOyY>kD>4c#lH|~#HWLJ*#lV?{Gcxz`K27?cZF1X1vDt!SpiZ^bI(R$f&npI07j1K{+}7{B7c7#wk5AddE*aBWvN(cu*0h!i&7ZK(>Gs#AK!H!hr& zXWq((%ti7M6emRT&ndQ@-l(P$7@pn>jKOJDz577#QX%ziI9hNYWZdh$vxpFDj9ma^ zIWIjrqGW9=$5TTcF&hwTj<2&pEt^!U4wzL}P15c@j(w`9_s3^q6b=A7?p(jUtVj) zWwM~bRuJWIeNfB=P0V!{auW{I#AuK3IwB*vV3CHTO%*ewO;TUj8>yX|Odtldo-H~{ zLeI!DLyccD4zN&d6sZuKV%yYDG;R(%IRU$Q5A#!DB1E&wWJ_<&Dl-&Y1_cca9j1Tz(c4d?7 ze~qW~GYn13#fRZv!rVMOScfYbG&=1r*z`lwTHA6?Mc<-fAnmG+T8b8p$*g;7i2|E^ z3!xM}gciT~Q4yd?#&?Wa)V3Ja|KsPl8KDw}0o01Wc7OAV`> z0DfJ?Y6YvRjCS#e$x8{Ips|9Z?l9raLvaVq`DH9V^guo>Pe0uad{N~ojU^#y>JC^CIHN}iGy6GyMlPdEZcG5U z>y=}sDSkDtfkynQmT@2*h9dlX3F`~wg9@!<+;>99=#|e15w*JuM1^exRN-T_b_VL? z;Vz-L(1`gnxs}bJumYQi5=~ZW_{AGaqpvB4CL|q~=Ao3WXCU=Guk-5;ySp z5$p56aaGEjL7%n6l92;Tc$Q%smYEgEnN3$}k)V=QM3B}EpCes2;kh!bQ1OAQmY}KY zWRBAymRf>cOe+ae{~zCrKxqs0?LiTr8F)tPpgU5oL^zNI-ic3ijYo;MX$?0gtnn=8 z355}FR^guB=|3fI5gX@uJ*^QN6KkY-V#NF*@7q0+4v2f3Virnl%f{CYY7~}`L57T? zh7Lt!B%?W*GssBe`!7{7+91njmKmba%CS)z9VtFw8V)P}1;>68;EkpIkSrt!pd z(r1gFGwf^hHFew;4NkZHcp2{E<*;3ZZm7mzUX^sGfoJHj?l1}`JMW~jaUSA(m}+%; zB;sg~dj!`xWSc(*OXGu0{IU!z;Ou4@FtgJ0ayB`=e}9FbZ|RSGAHJ@Xqe#4_PUo5w zu{R4b(K{liz@MR<4|bax?;=`}OpwqY8X|htJQ7=owNat`f;G?>%6oQ2>VA0&1;cZj z-8>HS#s7UbM-wt~deLHOKD34b>YY0W5RYlX+k6i#)S6ettR z#AIG*&gCnk=e@eo@1>Z!>HYZ3&#kal?-3HVks$koyW?0I_=Y3zJ*WxB9E=@fRJ?{V z@DccS4yt9=)S@<>I4Y!bcfEO|*pA=P=rAw$4Zh892-Gy58k`g8JN5^;3?P;H`^{3d z!ZVwq5jw~_Z+Jq=JW>2Hyc_II$^%%QR>`m~cf2+>K@lYL05_bN&x%p>-1rXd^vZ<2 zei02rl7&{8ffA!K^qNlBu(S&bN;|D|gd_#z66*33=Tq@SN&}?0-7XP{%6z`oFwaurEDJ#0~+j8%6cB{h3Q9h4x@!{{AqDWM6y#JiwMD&aT zME5DqO8-bwG4eA^dqLBdQV-p18PL;O;J;T~Ax@yy>9W}9>RV@LS{RC0H?;>vs~iZS z@&M{~El9$e^VZLEUohfP2&L6o%->}?AZc{6Fn0;Rmdd~^w^Go~*C2v^2p7IeTQ`EO z?lCE#Hc-A=Ej~u2Qsvu{kov2!Ys|>yrwebs(v}0lH+&6roxT<%9oWpNx`<@9fSsx^ z0}o7Aknz;aRa1>=KM&HA=jz`$me6R7Sa#JXp)t3EHly9wZ5c7~W`6bGKIbXLu5ciE zJ;WqWo5bH+&;|QPQnG>EeEdGzJsG*IhcbLM2F_WxDHA0d>mu)u(>4Q z6n)2Gf-kRlUoGt8-6Di$2o*!9yMI6CuJ^hY@3)NAXu(B-;I@#wp2xQUclVPW*u)Qu zgSEip3CI=ouP@}h>Q7yh!zV_ zfXU1*>T_0#yllUewgSE2MbQs>ffKskDr(8U2sy;DkM(>D6@EAECxdymYS z_k90TXrCQw!?;sD4Xu0Esj5nRrJzSaNzP*X^hCg;7E`CBUx%}tdkgC2Br`E zy@8)Ia33_~Zg`a*`L4lccU0S50D<*+TqW*>H!tkfBBQfp76zW1GV0Z&@yQR*GD=UG zVAwp62m(jhNBr{>$UnfZ0`t zM^ujpm{ft2UJ6#G-TXs(^9<)D%*;mIf2w>OOPp6#D%hE?=3L6{cmv}u@Ijr)9CH=> z`#n&wOBmTU)Cr~Xwq+PuEll&ILPhTnhH*+&iBW|dRJM*}zCbQKysUP>(CUu(nw@S} ztM3IFFVpjK2Fg73jmrk*-llTS+kNl3Y=}g+<^Tou#4S$O5)Bog4AO3U^-*a^clY6u z-ZxFW$DzABDAPMBk*ouUbLQ{cjn)rks2iPT#EKtBizso|{G2lTK(*yxwr2E{`|iFA z_xdASQW8mX;0fx7QM{Idg3|-(OMzDMt+VK%1WEG#!NxM)Ce)%~y9QFh*(I44nhVM~_1E~0g04(1 z)Y7x)lM;Db$2Dubiyy z+l24*yA^z8^z(y#MHz5OW$!6*`QCqb{yZ0ZICa0$6lOknxU{))_>mdq8Ove0t#o)~ zPdHsqkn#O5Hv=kGB^H9dCey;wZ3{DhWzS^RB%yDn{KjJ8XWIqM_3!n~J-W|g<#BID z8YYXp(*hb;9yNn&1$ZOrW=Tc|4I0x^zg~it&fp|Z5@fj8XJ&c37rmll5<+zgtlm@c zDa9L*@aDk?5mes{YEsnm(MX<;VekmuMxTI%5Y6IQsMI70xQ|faP>azB6z{1Zw$7I7 zTiMpfO#GX5AQCg$Jao6Fb}AU+gaaVoH$#ki#(iAR6z5+4J4BRn*9rxr_|A21GH)UK z9FvTZz|UbCGiYIge9B)!;|kJ)Ff^MYQ~U_~%EPLT8_NcL9CZ)J4su#0qIrij$IYzy zvs0R-kRN7{`cSDv!vPlyYG7EC zS|9@0soSTipF?}NYm}o^BeJ@JuT&>S0@WevF<)Lh(V*J!EwpCQll78kBy^4r?(wWd zk==NkY$rZp>9D{5Ufk0+S^w)~OcNb)s6Q4WlcBk5MMjCucab&6_!P3$PC5OK2d;wZWRWzM&6%QIOJq`f%ijv26M^hE|U2RyvsSM%MM|Sb})R{ zgX*_?=UamQ4%PKDGlx@iiMSsI?d`Ab+Fc`t&7-ghazelP_R>YcT)Z40+;O;ixN4MN zv(fk_c>-^J&pUpy*uuNC`J4+rZUuP*_4)&fBX7;SHx4MHT_kX_uOE>gUBGci<=1=c zq^U=CPu6|%(NGGd`jGv!aI=a|hIGNfdLLx3y~XiT$F@`wc?I8e?KA29Ya<-|P>46O zUK69av?Q25oY?xfQIw&v8d7umo5PjADSI|>HFIyHjr>7*ybpR7^|JJ_(hLLYbZxVR z!M#dYJ(qdOA+aR`GSwfOljiIiSZDzkjwIPrZ>qQ(O8Rd1>$0T#SToTmg?uNEr9&8jFGlgZuzIXf<|aW>)o9XZ;+4-xY8CSrXcC{wZmnM> z57CCz7)ZIICC7A6Kl7E?}#4<=nF@otU1v28mCQr3Pu6xs0CGk8{<+!_$g^R++&C$xeeW~CQb8++6D5&Fqw#VWOy`Oqghwl ziSJ5jFY9$7U*4frq-OiM)MU1e$vYEMaBHG>$m*U3RE`^GJKyM9qUZDgPQL9?62sz1 zIlT7L6}$F3N;L(tihzMiU0#4dv*=RE6S1?P+bm?t10v-S*h*&b2U^vAZ|F&hXgXkF zkU7H^p<|cC#c8VJJ)iTBk1^o=@%BmQT1?nDJ1@p>`ehPwvM6YwrDAgUIviwDO#pzd zZqi%H9esjEkLDH6_{GxH&9S$ZwG90$mV|Q&kCu8}vA;=YiI(rzkv?hJHX_}P_s8aiuwvfc-ILRP8Dp%n`P#B??Yip@ zGgh0}#$Pt=m*r;l@E#O;UEB&%Gn$y*m5%GIvF6Ho&FG~`xD8;izM}OVJf>s?UA^Q~ zIimTtD>r#;*WR5VTwRJyu-B?K1za z_i&~1{Nn(`Ez{=-Zcb-Ogw3HJPMh}y2dY;*!ayyeI=7m-{M#Y**KDzDo8l1r@?M^*hBF1%N0loRD z(YTvovZicm5OXdNs7fQSJAp#2^7 zrZ)i!0|ftpa<(+L1KRw}z4!)|&7}T!Vd}rfhiu}Ydd@#h`=)aN{<8fmln?Ij@x7^C z&TpPbW(SYIMk=BGA8OaXM~bS1>bVd721Nt_-v3LaS^t0<+uJ$-UH-xR9|Qg8=v5vH zf@fd=fN6990OMbnV{cdaYa-1o-v)2#`FC;d|1|!8Vo~9WS5PnkfH@uj0R3OEM{gd{ zzhWJn>|N~NyrKUe!BQE*EXu!S^i4hdH>wcX->C)$mUfmd1_u9+)OR3r)yNhDiX2S< z;Di<*)|IEU_dSd@OxzFS8@&0cevj6uS t|EHMx-|2^8f2aS4wE9N=i?sT0vZ|si>{|f=07!4E{#!Ay=KtmFzW~{AuE+oY diff --git a/python/pyspark/context.py b/python/pyspark/context.py index 59b5fa7f3a434..e47f162ca936c 100644 --- a/python/pyspark/context.py +++ b/python/pyspark/context.py @@ -1365,7 +1365,7 @@ def setJobGroup(self, groupId: str, description: str, interruptOnCancel: bool = to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes as dead. If you run jobs in parallel, use :class:`pyspark.InheritableThread` for thread - local inheritance. + local inheritance, and preventing resource leak. Examples -------- @@ -1405,7 +1405,7 @@ def setLocalProperty(self, key: str, value: str) -> None: Notes ----- If you run jobs in parallel, use :class:`pyspark.InheritableThread` for thread - local inheritance. + local inheritance, and preventing resource leak. """ self._jsc.setLocalProperty(key, value) @@ -1423,7 +1423,7 @@ def setJobDescription(self, value: str) -> None: Notes ----- If you run jobs in parallel, use :class:`pyspark.InheritableThread` for thread - local inheritance. + local inheritance, and preventing resource leak. """ self._jsc.setJobDescription(value) diff --git a/python/pyspark/util.py b/python/pyspark/util.py index b7b972a5d35b8..5abbbb919636f 100644 --- a/python/pyspark/util.py +++ b/python/pyspark/util.py @@ -331,10 +331,13 @@ def inheritable_thread_target(f: Callable) -> Callable: @functools.wraps(f) def wrapped(*args: Any, **kwargs: Any) -> Any: - # Set local properties in child thread. - assert SparkContext._active_spark_context is not None - SparkContext._active_spark_context._jsc.sc().setLocalProperties(properties) - return f(*args, **kwargs) + try: + # Set local properties in child thread. + assert SparkContext._active_spark_context is not None + SparkContext._active_spark_context._jsc.sc().setLocalProperties(properties) + return f(*args, **kwargs) + finally: + InheritableThread._clean_py4j_conn_for_current_thread() return wrapped else: @@ -374,7 +377,10 @@ def copy_local_properties(*a: Any, **k: Any) -> Any: assert hasattr(self, "_props") assert SparkContext._active_spark_context is not None SparkContext._active_spark_context._jsc.sc().setLocalProperties(self._props) - return target(*a, **k) + try: + return target(*a, **k) + finally: + InheritableThread._clean_py4j_conn_for_current_thread() super(InheritableThread, self).__init__( target=copy_local_properties, *args, **kwargs # type: ignore[misc] @@ -395,6 +401,25 @@ def start(self) -> None: self._props = SparkContext._active_spark_context._jsc.sc().getLocalProperties().clone() return super(InheritableThread, self).start() + @staticmethod + def _clean_py4j_conn_for_current_thread() -> None: + from pyspark import SparkContext + + jvm = SparkContext._jvm + assert jvm is not None + thread_connection = jvm._gateway_client.get_thread_connection() + if thread_connection is not None: + try: + # Dequeue is shared across other threads but it's thread-safe. + # If this function has to be invoked one more time in the same thead + # Py4J will create a new connection automatically. + jvm._gateway_client.deque.remove(thread_connection) + except ValueError: + # Should never reach this point + return + finally: + thread_connection.close() + if __name__ == "__main__": if "pypy" not in platform.python_implementation().lower() and sys.version_info[:2] >= (3, 7): diff --git a/python/setup.py b/python/setup.py index ab9b64f79bc37..673b146cb6c5d 100755 --- a/python/setup.py +++ b/python/setup.py @@ -258,7 +258,7 @@ def run(self): license='http://www.apache.org/licenses/LICENSE-2.0', # Don't forget to update python/docs/source/getting_started/install.rst # if you're updating the versions or dependencies. - install_requires=['py4j==0.10.9.4'], + install_requires=['py4j==0.10.9.3'], extras_require={ 'ml': ['numpy>=1.15'], 'mllib': ['numpy>=1.15'], diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh index 341eb053ed7b2..f27b6fe8d9a04 100755 --- a/sbin/spark-config.sh +++ b/sbin/spark-config.sh @@ -28,6 +28,6 @@ export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}" # Add the PySpark classes to the PYTHONPATH: if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}" - export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.4-src.zip:${PYTHONPATH}" + export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.3-src.zip:${PYTHONPATH}" export PYSPARK_PYTHONPATH_SET=1 fi From 7bb1d6f01148b037acad12de8166cf742cd30ea3 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Fri, 18 Mar 2022 14:00:48 +0900 Subject: [PATCH 011/535] [SPARK-38563][PYTHON] Upgrade to Py4J 0.10.9.5 ### What changes were proposed in this pull request? This PR is a retry of https://github.com/apache/spark/pull/35871 with bumping up the version to 0.10.9.5. It was reverted because of Python 3.10 is broken, and Python 3.10 was not officially supported in Py4J. In Py4J 0.10.9.5, the issue was fixed (https://github.com/py4j/py4j/pull/475), and it added Python 3.10 support officially with CI set up (https://github.com/py4j/py4j/pull/477). ### Why are the changes needed? See https://github.com/apache/spark/pull/35871 ### Does this PR introduce _any_ user-facing change? See https://github.com/apache/spark/pull/35871 ### How was this patch tested? Py4J sets up Python 3.10 CI now, and I manually tested PySpark with Python 3.10 with this patch: ```bash ./bin/pyspark ``` ``` import py4j py4j.__version__ spark.range(10).show() ``` ``` Using Python version 3.10.0 (default, Mar 3 2022 03:57:21) Spark context Web UI available at http://172.30.5.50:4040 Spark context available as 'sc' (master = local[*], app id = local-1647571387534). SparkSession available as 'spark'. >>> import py4j >>> py4j.__version__ '0.10.9.5' >>> spark.range(10).show() +---+ | id| +---+ ... ``` Closes #35907 from HyukjinKwon/SPARK-38563-followup. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 97335ea037a9a036c013c86ef62d74ca638f808e) Signed-off-by: Hyukjin Kwon --- bin/pyspark | 2 +- bin/pyspark2.cmd | 2 +- core/pom.xml | 2 +- .../apache/spark/api/python/PythonUtils.scala | 2 +- dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +- docs/job-scheduling.md | 2 +- python/docs/Makefile | 2 +- python/docs/make2.bat | 2 +- .../docs/source/getting_started/install.rst | 2 +- python/lib/py4j-0.10.9.3-src.zip | Bin 42021 -> 0 bytes python/lib/py4j-0.10.9.5-src.zip | Bin 0 -> 42404 bytes python/pyspark/context.py | 6 +-- python/pyspark/util.py | 35 +++--------------- python/setup.py | 2 +- sbin/spark-config.sh | 2 +- 16 files changed, 20 insertions(+), 45 deletions(-) delete mode 100644 python/lib/py4j-0.10.9.3-src.zip create mode 100644 python/lib/py4j-0.10.9.5-src.zip diff --git a/bin/pyspark b/bin/pyspark index 4840589ffb7bd..21a514e5e2c4a 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -50,7 +50,7 @@ export PYSPARK_DRIVER_PYTHON_OPTS # Add the PySpark classes to the Python path: export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH" -export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.3-src.zip:$PYTHONPATH" +export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.5-src.zip:$PYTHONPATH" # Load the PySpark shell.py script when ./pyspark is used interactively: export OLD_PYTHONSTARTUP="$PYTHONSTARTUP" diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd index a19627a3b220a..eec02a406b680 100644 --- a/bin/pyspark2.cmd +++ b/bin/pyspark2.cmd @@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" ( ) set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH% -set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9.3-src.zip;%PYTHONPATH% +set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9.5-src.zip;%PYTHONPATH% set OLD_PYTHONSTARTUP=%PYTHONSTARTUP% set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py diff --git a/core/pom.xml b/core/pom.xml index 9d3b1709af2ac..a753a592b0f33 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -423,7 +423,7 @@ net.sf.py4j py4j - 0.10.9.3 + 0.10.9.5 org.apache.spark diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala index 8daba86758412..63361713c9b94 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala @@ -27,7 +27,7 @@ import org.apache.spark.SparkContext import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} private[spark] object PythonUtils { - val PY4J_ZIP_NAME = "py4j-0.10.9.3-src.zip" + val PY4J_ZIP_NAME = "py4j-0.10.9.5-src.zip" /** Get the PYTHONPATH for PySpark, either from SPARK_HOME, if it is set, or from our JAR */ def sparkPythonPath: String = { diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index bcbf8b9908ae5..28cb7c1684173 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -233,7 +233,7 @@ parquet-hadoop/1.12.2//parquet-hadoop-1.12.2.jar parquet-jackson/1.12.2//parquet-jackson-1.12.2.jar pickle/1.2//pickle-1.2.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar -py4j/0.10.9.3//py4j-0.10.9.3.jar +py4j/0.10.9.5//py4j-0.10.9.5.jar remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar rocksdbjni/6.20.3//rocksdbjni-6.20.3.jar scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 8ca7880c7a34d..07549effc8706 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -221,7 +221,7 @@ parquet-hadoop/1.12.2//parquet-hadoop-1.12.2.jar parquet-jackson/1.12.2//parquet-jackson-1.12.2.jar pickle/1.2//pickle-1.2.jar protobuf-java/2.5.0//protobuf-java-2.5.0.jar -py4j/0.10.9.3//py4j-0.10.9.3.jar +py4j/0.10.9.5//py4j-0.10.9.5.jar remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar rocksdbjni/6.20.3//rocksdbjni-6.20.3.jar scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md index 4ed2aa9112224..f44ed8245e286 100644 --- a/docs/job-scheduling.md +++ b/docs/job-scheduling.md @@ -304,5 +304,5 @@ via `sc.setJobGroup` in a separate PVM thread, which also disallows to cancel th later. `pyspark.InheritableThread` is recommended to use together for a PVM thread to inherit the inheritable attributes - such as local properties in a JVM thread, and to avoid resource leak. + such as local properties in a JVM thread. diff --git a/python/docs/Makefile b/python/docs/Makefile index 9cb1a17ef584f..14e5214fc1e0a 100644 --- a/python/docs/Makefile +++ b/python/docs/Makefile @@ -21,7 +21,7 @@ SPHINXBUILD ?= sphinx-build SOURCEDIR ?= source BUILDDIR ?= build -export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9.3-src.zip) +export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9.5-src.zip) # Put it first so that "make" without argument is like "make help". help: diff --git a/python/docs/make2.bat b/python/docs/make2.bat index 2e4e2b543ab24..d36b7a1abc0b0 100644 --- a/python/docs/make2.bat +++ b/python/docs/make2.bat @@ -25,7 +25,7 @@ if "%SPHINXBUILD%" == "" ( set SOURCEDIR=source set BUILDDIR=build -set PYTHONPATH=..;..\lib\py4j-0.10.9.3-src.zip +set PYTHONPATH=..;..\lib\py4j-0.10.9.5-src.zip if "%1" == "" goto help diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst index 15a12403128d9..479f8bf7cc88d 100644 --- a/python/docs/source/getting_started/install.rst +++ b/python/docs/source/getting_started/install.rst @@ -157,7 +157,7 @@ Package Minimum supported version Note `pandas` 1.0.5 Optional for Spark SQL `NumPy` 1.7 Required for MLlib DataFrame-based API `pyarrow` 1.0.0 Optional for Spark SQL -`Py4J` 0.10.9.3 Required +`Py4J` 0.10.9.5 Required `pandas` 1.0.5 Required for pandas API on Spark `pyarrow` 1.0.0 Required for pandas API on Spark `Numpy` 1.14 Required for pandas API on Spark diff --git a/python/lib/py4j-0.10.9.3-src.zip b/python/lib/py4j-0.10.9.3-src.zip deleted file mode 100644 index 428f3acd62b3c024c76cd331c7ddb0fbad923720..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 42021 zcmafZV{|UevgRAxwr$(CZQHiJV`s;T7R0_2Oh6lXrN!kfSV04O8?0O8-}E>;%yMz${Wj$Z%sq9Za|eZ~L(q8bgk z_)T`iKc5EVz6Y4EFl<+8oMw>{o-I8Vvpgsl!bcv)@b8YfvX~TMhkZ42?)>PUdC;hU*g{ z2RUXrTR8Uem%|5NPTF|wE(4V0bf3giQK6=za4y&%prO?$(iu8`+6MMSkprEaC;H&}{JdVdqT zH&$#YEl*Ad?~c6#3ZJYJLR znyxvtp*AXb?CE??m8zb;Sq@@bfd(D6Cb)$s2Bh+SWzk@AA%=r9v?K`5HBuK?r~j%7 zXIlUc`Z!yTVi4xSxoHHR0Cbp8I_?;UG> z35gA7kDHrgZTk9JsPnMB>}r?&Tk~Vx!#iNpr7^*SSnype5as!|BJRLT0`NRoE__0@QHDBoWiyFInY z2tbOh*T;|B;dp?J{%mkb`p8b^L%b)QnRo$qA_Qun@%G~ylVC(d0*9Rk6XxT~=ZG_J zGKPpuoUkM6-~0{N6AR`4zE+$6%CfR#z-5x{S6S7^Pc@0UxJ~RqYVCQk8T!a zeZ~~%aI(-=VCn#2tP=&H=IiY+S{njGiDw|w72>$mTxLD4Upk;AKYsr{TVurX7B5SNT4L ze;{i6pm<3ro6Ff`-X@gj|I#|zEf1l1j1vztWKa)i%02HjT_J}BRkRgn5U8WEw0@2( zjP`IJ!th?FNm(JR4U{SA0puycw;lM5)Y5 zw(saJEJydo6xK$92f z8m+72p0xRq+bcp(=gHofO>>MqekOv``(~rD92P=D(L5T;gjF_?`$rlM1s4A;D7lhv z6cYs8oKLwRWlB<`CdLCqZWFQJ(BeKCr_V!gN@uRPA+0L$hRM_0X>{GeK*r*^J?gdV zheQ|Gb1aHg9K9hgrt_RWW*Mi(w~o`G$Ybulkbc0KtOMe=@j;y%Fc5ewW$$nN5W|h`l+@W+c#p|A@K9sNVAF*~h|pMrF>qZZ@B4B&fvB=9 zM1gj>T@rxp570Bl_Sx+Z3D+9J6Xe-WN#>Yibdq6<^%d#k zAGj2zQQmp9AzIYDTYN3Gs@{>Z%b)h(HXCML5pOZXU1_3x3R#0%p|B*mTZWuHf2Ac( zl&ccRvObBBIm3y-@l&iK;aj9^5Pc^Q_Pa;934R9{T0rdOl3Y%?XKAY!^o-3PmyO@@ zopbR)_j5G!W&8F$$jZEY%wKbkjX%FZQh1w*7lva|XU@feIFo;2t>8E_x`Z~u@Sb_; zsP(y*`Kd>?ZXGc@`YK)3P@TD^e^y7Jg!tzSWieY9-oydy6ypB8qwbjw8BfS2}pBtvD8@}zrq2jol&C`3O4ZK77HN? ze`F264jUAEA;14qG4`);QTZqeT?7FD77zgdq<@Etxz#_ln3cDg^M4aD*BaX~o8pK8 zs|HNE1lAZj#bk5DoM(EILWEXCfA~`mbYw_uT&naMx}pz5|K8?&C6KK@YExWPjG2JT}?`W`$?zX6=R_CtxCK5QEK4RSkr?5{3Ct(wmj0rpsAwsMSv0s-5Z$nddoI_%i zQl+D}F-5h>?1xbb8tkub60AYWNFP=)Xz>eCozpx9mMnfge+uo#Q9*#7xh&n@rMn^pMyLf#k zX7eVD=NlNrAN`z3#(iG?LYOsqx-3hZc;YKVhLy|cGnKVr4dhmGh`#tpigywS;jAu|6;L(xm-h$tuNpZ$F99mZ2SCZ#7VYZkD(+v32 zBwrDLX7r6Pb_mr3ap*O4E}*jQ!fwSqAZwVI8_wr=Y?bx%|DXdLctw*nOrsSwo1J5p76b+Clf&q5gFonhoaNh<$!v!7T>^Bs*wv@LcXeg)G?JQ?ER<9V-5a*x}Gg$DYqq}-@*oJ=OhDObF; zfGn}@jTeR_BhxtJ+-O8KY4z8%ZPr7pKRYya(^gCvdsMJeBJDG2tLd*`APe6;5p^4B z>3evhYaXqZe!@*s7%HYq;37x?7mKH3P+JUTNuZd^qS?;KWz^}h|8)|Cb^@bJjODPi zA&+zzrlpcIc{U}TR_;OutroUDN>+fV66)qvP|eYXbR4O9SjQNpo1w9Cf~{QinF9F` z7TFYuw0;$z<8LQ#8|*Y-95N?$ZcZv7t%SmNKyDOxfVLbsPfiQJsr9B%o%8z8kE3Jg{VdE+$PF*2~{ zxpGHtL#9h!8A-(I*JFMQA;Ch^^kcI1GNp_fO+*PYYGlcIv&7-gBrxCsBo-wUfnLR? zXyhTmN45ID?_gsnh)>lXG~u^Ls-BU8kk&UPo+%++gE0vD8Du<5fq*k^(C>p^n+OSL zjh9sF`^N>Qth49)TJa$VEP!W0td=@lXaaYL1o;nLxJxTD@bLL$gLlz41X<`@y-_E9 zgdN!6-~G4!mb?_FN9*e`0X=WW@3RcwR1)GQTo0lMU&I!%BN|2bmfz^$JVmJP=BoJI z#+FyTxKk1izAz_Jc$SPKt0vqx0FrN8&i3b!{i=S{-S2yd?!QrQR#Cr!SNGly-=FLp zd4T+b$A@hKI1$9tA$8od_f2`jF^a}H+jHdYzs~>Gz@fO~8jk7-Q}wt%w4?cg6oEpBShy*l4s`a zrn4~1J$tkNP9P*5EewzH)SIyi9WJZ={R15BcnCkyXQT=Yu`DDT3 z%blNn;<3o&$ojiq`R;gkPBeg+#Zl0#odJf8C}9!|GwdKVgLp1b7ZP2Q44tZ=>IVnH z&{L%cDe>8mv!es6@3h7msO6m`kGZeo1d_%0+uJy6yu;EMe_9U!=Ejm9rdh42^*v44wD@8sx3xPA`GIm*tdn zGwu7+l=}j{XwYwI zk9?CSEw`B&s&Ipt<4s0jcyk8I5O}oKUs7Un?!^&fE+Zo3G~MBrMt&a+WXB3QJBc?uRFaX8blZ zzWw*b9{CQe8-x`=J-S`X#qN!5;Kz=f0Gunuuu+WiVq_&g@_aWPS7P6$nRvmOI;<;)g+5n zBgOY3Q6%qIQ%VQ%AcxB!^i}SN&xp`F2SYeE<89{=E=srWEVpy(4C`j8vDdDbjZlv?x_IQ>PfU^+h=d{uY5-2?ZGWo%X=Y1{N#S$9QWa0;Iw zSZ1U33kF2eYv;+%pHnp$`{vB8^m+>mX1M8ve8clfl~~!Y0tB}t+$pWGLZ9$NSZN4l zik#zl@?hxQYnPRRlbl99SrKqHHXlMrk^H-v0@2a9!3O{v_eTQo#0{O6h0~M|&|g33 z687{~Y>PtI2w<+{P%zUhp@tj{&!g^d_%l+`hNaVKAf-{`DnY}9d@raL1p-B@Az;Ot z-~jPUXsm#s0Im>i{$Y3#eQ@FDRMc+W>YkWkV+0xTH`hM`Pqslq{YSv$w;3oZ_srMr zzUVP9EBUpM(lxAd!nKuRbk=`b)ltD8|Dax~rJ=k;k#}KUIiy~vGgXN)sF8N0N7D)> zkSBg*=n`!73G zHCs{PjHd3zmFw(RPHvuodJ^P6+-gIM<+OB<2-KP~wLV7c;il>}RXGM9*IstUdR^BW zqOem+J4N_$jC)?P9*+ABm6AeRDW~~iEW`;4BT-`fmN%>nyMgbRz9u?nH6JriUw$-spmn>F6(;@S0Qc6URbyHzVVb&8sEr-$rGOO z)J|7I-A?x&VTGt%@)8-O{QBNG4PTnIRLbdSj?8waP4b|}N-bKe>`!72qj=0XAOApA z?z{YZiBUeb-ZI90_k73P$#$V!V= za^Nnq&%HS-!?k1*0T$;L%hL?q|B2RsCn5#DMkE^o*KFaT*UY~>CH_GWQc-YfK^hmI zey~c@ZM^PC6?3*M;ad9OF`iOJ$4O6!naBlDA;?DgTyJDXxcC5wOhY@yXF@~0YNX{q zwW?E}W0dI+NgW|B`6YOOH6oT6lMiB);F_B9O1t+7zlo7iVv9AZCO|sip^houP>9Vz zxHP;D42A=J;8~xPINXDhLFn70Psy`gPTL{t@VXj+Z_ijWK>*C{Hk8hyUIz%yxr$P?5o z6rh%WpnF7Fr>^r8;WdmrMV8Txr#GPlU#SvGKt4OTNbHX5txG+V{%tZRo6DuHMNN$xG3kAxq`=Od-))DyO$@KZ8D(WFZ}s=K1l)Pm8Y zR#Qq}+HjO@G@?_cL=N#d`GiDFkoF4$i;L|P-&xamQ;~a{CJAf(4hD_}W>6Z_m81@* z5a!Zp!e!@F!JrAn^~Tn~-4}}vz0pn9vx5#+X$D0tL|W`+j#m_`6!oI!}j(FSXp> zoLzKq0u)=F3ZEt9dAqxY$BXR3R>YfQe4UC&egVT#wO4A4loN10|LT3G0GHziPm z2nkmJ!uC#BxXN~(y5al!Q|mt@tHzYeSI->PXAGT$6>s)-NZuwe&j)Ob-JT<=+ttgo zbk z?9I*=8)k#?&aBlSIJsP2qWTJ++^xPeY5epEG8ZwOS9jW&#Xl^Ip?#S04Yf(ye0~bw zrFFBc+SYnkbx%VToA+rJ=^i|&87F3PGT3cSq-Eo9QC$`tx4P9U;)+OQjpM}6GX z>X@mx>0U4j@b|2c(GFv~|s>F<^B;q!oqkD{4{A3gCqfLwYdP(HDcniD(nD zGTW(D3$jfb2ijD*X?Vm2w<=EK;RY>q=MvjY<~Zc+pxo`tHg=j`TCBy;Rj6muYDg(Ox}ZDlvUr3R4n6Jg)F%O{84o+1m+;ypQ!q10Cyr zzX@(xGgR6kFQ`gY(6pv@?eRDm^%7n{9~(~W_>@(B!Wi&{l<)w8yRFU?72 zx8iAa<>r45CPkn0fS(3ACLF4;I5va!e6V(kH0znf9Y(6eQ$MakfG_tf*NJ8ZW(sH$ z51X1{HHoT%J{*A}dEqTzibo)US~dyI(P?&rFHUiUU2WVM=ww8pe4~BhaiUBs#P6}L zE^0dZtBvpFkKX539rW%nrn9bv@Gl_qp!W4>=8J!|fIZv+nGa5P3WvsFl4eAIT~Nh- zmz1>Ly|1G4n;(m$1^k^sQ%Q>UGOy6kxO$~+9lgglXd6FyWmv;LfZ;rSqLoth3vs{@ zk#_y4GbId|BEd0S3uCW!Qk|Qyfr=09J*H~&dP!h62K4qefa*Xukd3J0raPXYuMXH) z1H(wU^>>}_sev%fq_@;pf_Akk_Vxk448&uOnO=pgEB)$yuyS6bD+C?!wA=|&p|{{n z=OF2^rLICf%NQpgE|su-&r*+84`YW^U$z5jAqpY_0BgSO;y085 zg?<%=i?8MwH4YG0XJ#FtQdfU_1sTPB&&IHI@+XhMCjN z#lW5P6vJ}UA3WH1<8d3>vWw35Zy{3D3v}S8LG&-LguB0`uy6BIZgl)DO>*R63+?nO zp6Y**3zoIJyZ?B*NAV^HQvbHpM^4wmSo3878nEUu8xJJYvwdB~0mfjuaMjO(Xk^?@ zH>o!t@Zk!wW7s=HP|Byb(5*W^t+J)BP*k|qba4K-n8y`PI@v%5Fekv!m(%W!zSUvt zHmc^6DXo1p_`h2;q=q1Ca{n}d9R892V3iCEtn95^4GjJpR!K!2vYho^8B$VqMqXCo zSej;HRxL7KfrgHXPC7PTV{D3UjDcE7QkHsVmWoa)HsX$G;>obltU_!!^W95V_x@gy zT(OK!Rue%g1?E4IT?vWGDjal#{VkdRK%FlT0QKKwm$i|*k%5JgtC@$9*T0_@z-{+m zHK?w1FWooAlfGxm&I(?$1EIhs<5;=|U7keJEG44si(+!BM;q_5_KyUjNxerG!8aiG-Y3k+hWgT^=W1E1JRL3Rt;lqG#DbE(@S&x zqwO}Mx>s@aitce$x~vY{A>y2A93+dz+(*c1-Kg80?8wfeH#fW9e-d>Esw)#cp-fMyRbpB=Q&>_bo5d@hEgzIv_1Gt+#S$ek%z$8N zA5n{|3u(o)%%2sl==LQC(PFYw}o;0S( zsqhg*<7m+s5{w}Tfz*c({k)cFR|ZKqWSua}{8Uu3lcYOW>St=ycAy!nv4T~HC$I&) zP;Pk8;m@uz^x$ZyDZ!nST$H~)tFl!Ub{zRfR<)B< zpa#=UmGEHJp|;nk(9#Z(78`D)N?(JhE(nGYQDVq9NTFQnDo}Kzl|ehU2IJ1S+_A!w zGb3q|WGcGT3(!8%;UPp>BGwK>mefVjrB^k1W>P1c8R&jk&6l?zw5rlhe_HlI;9a%s z|L{XShfrF&e4dlFaq66EbP&SLtOT|LEBNNeL~@DBf^L!g4RH16Cgo5h`@s9A34%?S z3{qr0%{8tQ^O$2vF)AFrL&|+l?EfZAkuuJDM{S2u>MV1317ao03gXckcMF)i{L}6K zZ0(DrhyV47>HTNzYtz6d;!xvrbyQ>idw+lU?ExA8HIMIS)e+>>NW?4l%7@f}KTf5E)SYG=UI@$^MW)pnmyc zkKcQO=2+YXLG7f*14tmx=vnxlZ*5lKsA*=IHCCgUvbvMwqGQ&rp?sjQ2(7WK5q6~U zsg$Jyu#Sd0APqExfGyH9Q)^<2EGv50umtv~8Wb>_FFgwBr|(<2l&w(kfI(r*#!NV|FfkM)*zPvNtT`4;|`f zvbVFDl2C?IvgSRuNye!GJGMt~SM8vTLD_=dI?GoG8Cpsly|+sV(3RJDoE7*t$Du)x z5h&2}V=P%}!;Qb}H5fhd&aUyeV;`{*+v2DkK_NLBMKu+5Vl+*>f9Vp+XAIV#(9MT- z5Z6?BMJFg2sR9kmjg{z+xC{lSOVq{7*Kv*n?F6=vapBE@;WU|# zH{wu?0`PYUy0Hgr)u2`(LBPz+t27`%L}G!FMulyk5*i_+49YU#?euR@n(($T*28rS zvvEa}O(D0V8X~V_a&DBlaJ)u?4p1zy#&$4?zno^HvRn)LxA!BC~ z7OUIJvM3c>Fs}*E1cD(#EoKL{zw6m_idWh9T(p9o95uj0WfFzew*qIqaLbGF<45xB z;9GeAcw^?^{* z1=Jki3zgBtc!yKIoo*|fIpE6dIUvso-WBC{q-vvjT!qW7Jw7%md<8UzBLEH2nl_oy zhtSdQvy8xrSsZ<{WjxQ$3PAC7-OXpW6i?YY+oBHC1(NcMszZp!srT8FKS|JCCicH=&n1qQhGi?l(#`-C+j*1!}yRF0QIAT&ANNV1TfHOGB7zutc?>5)S5@^uTN} zP_oGh4>V7Cg*1xsaRJ?&=wBZn|2{Wz?F?W>H|R*8pN9%Bm_L4=l*cKPwC`gL>iG&d z@_SpB;sV`C+7OQeN8kj%Fi&~>f^jFSPpqaSbx@!Xg*0BE4*KT1dl8wbHtdyeBr}X2 zgzqolq;nn?@Z+X)*0w3D{}CfFMN4hW^l3k?ltyYnT@tKguvH6Oq#ib4q5;F3Z*B^{ z>bzcP;5!KNxYaT<`b9M)a)wmT;2DOP`y9LPJ&tzbMFeyqx^2(XyB0qJM;7RvPGo&td$*!*t=ym43iL#3IHvL7Y zL;@9W|M>I z`Pxxy*^&=(4fGn0AN%5CYBjgVK}tOY(hrR=GKapWp5Ho~MzG=&1a2_q)6iNGUac>~ zOYR0@kkfA(qwrmU{Cn?J1mz^xUR$jyYBh+6f>BUV%(Od9U+G$`YG^3euEs88YC~cA%a8=NUUQgp@fvU_klM<`e1si3^tfgjH1@wX| zOAZ2`7$s8(0w)~_zHJi=66NDN*_vKaVRHmEDj!V})INq|1u7kHQqKTItIn=DDvR)P zAVMbXk;wWks2T&zL^Mf^DxuOqpT;N{o3J_{&ifq7<~rJ`uIpik_VKxf@?pf-89fJ} zFb%8NE6N2V4=bcK9*0&Nb)<0+#oLLfJi|!R>gs6pBdUQ(YRVLWkf4WHc;DpQLN=Uj z%8$lS+Mq-=IxI<`WfXQ1Q7#Z7(d!i9QBs_DUXQpBkYKz*r(9T_P*%GRi(#gcb1{t- zX$%|X&j%T4l1-&Y&^%ERN1bU9y*Pt3h;( zN*}y!K*T+HzswPGdG&J6t+& zO*;c`gcV}{Z07u%t|oz)fWKQ29`^mrYTrPSe64qcu#SPdntbe%MB4pqK%9Sq2(D5c^VTu1Aep&_wteoGhg2h=h5WO|oYmiIqMo^xh_M8M51YU0?nLuz-p1FQ;`i-)Vgg2|m z{dRu@9LY-?C?%(Y{7h$b$pfD=r<4BLa244?nqIx_n&=IDG~$&1=nwAvOQJ|RX4}nq z$^cEKIsO8gRKePz3MT%?;x*`*ws^=*Vqi@z%m)PM&zOeM#L|gGzpww!m+50__(C6} z+m)fUoqUcENVkXzye@4|3vs2|xH7Zop9BMnPTjP>IcP9rj-#!Qi=ekuvHhB;{H+L@ zp`N2|Cfa%Lfi*unkkb-X_IXG9cDW7~5LwrPh)ya68#gId6(28q!yW+$347)|Yt_41 zB~~gNXvZSpHH*2T+q4!zafe80IB&A-=Ag$6sZzt6B@5*VRlVkP}PnprEN$m}<{f@a4oMe5eT&K5-KzD7#CnRt;q|Z-blDor|L!g%~fyh0GxGkx@XN)|RKowl1^5{Hye-}2wY2d&x`_zx7*mHLAh2if-qWZXRef}H7!Ua^f_cy6 zRQ1;H^nqi8rwzi}d1pj_ofE$@46g?|?_F9&m>rwh5c^AhwlMjAeDXfb2qoFl9kfn= zJkII%INHHBzS5~Nks4hd3vnH2ejxem4*wj1JK6nZ6^>gQZeye()}hF>7|-(J zBM}1qoS%FXsmh{PpKL!QoF8>avCRKYNCg7M3X4m7JhXhv=GM0d@Qn|LeIGplN)_)| zWZbdz8xSgZPHT{HK|BRNQCi-eLz>nhkB<%zX%r@Rjo*)dgy`$sjX0T}?%;>ax})97 z%*6_?Oy_$QN^fhXdymg{_e}3#-x3{-p4OPDV85)VjkcZpw7a=@2$`L{*VdTXQ-^*!&B76H= z1=B5AeW4Xi0JNHn^>dZVa2*k}ccb}JT==R5W zyezbLe3uEUw~As*F1}3S4+5~2n2@G>AtMpCC%NHxtLlFG^8ru^b9B;5Vy2<0EXcOy zCTEJ$ALInIB^-qi2&qzedI_Hf-CB_1TUnYouvIP&aE7k zH~F2GG;Q3Z6RUkt^GVS|^I~=F0nbsS+@DB>*o>oon9izY+hJ2phhuttI!#=g zzEXPGERn)>g}m#^!9~6Yc;3Rc7A;xqgaYc0q7*E<`8&=*F0HVX+RxtD7o5E2ymGgj zTAdQ98usBP-1~u1PvT(ed04m=N-0lv>Sdlykmp2P`ZX_N_vCSCba9$D zRFkp#VUC}QsVP>qUSlZO2rS3)hRol~d6BflR?NaZ`~J?8YuaABzI4YZOvwrXQRuyq zGO(OIIPDmt8!B*Li3#t|X|w(`wrx>SlH!$Kb208GiGzj2zL;NG@t+!{B73u1`S2)4 z62AM#ZL1pd!~2^tvpU?JuTc-SQJ+1jL-i$8$3QBGgV5mC-* zwLp24L-0Q?32r9Jl^A)~Pn^-}h><}wd3RSat`ok6YaKN0NtsbjN@FKgS(XO9#aVKy zLRg1s9KaZk99t3e z)M*=v0XMc9%={X=(^;H>7HEVN5mD1x+AWlBo$pk8%`g8)?mf}tvAGDW+M?FG;#hZT z>HQ?_#%u}ZbgrV`@c`?*((E<#b6Ku{;Y-unuH{Z+dkc9R&cnkaFLMsQZB|>0&FUTL zwuTJ!pcf{$4L8Ey#BTA>(Jq_r-V&Ec-yg)!v5on z-nA*0=FGt9JiDRPE@q`|Pf7wuQx@0taGgg>i3lFkwiaf+-%k~8nDaGtk-|#P+<7`~ z{^Bo@fY|m>!5IScxd3Lq0}!(7RNG1Crf)-O9}6toQ%SeGeI>RR=LE8GGz}A}Y;tI;1V6W^E$s0I__@ zG%#RBXvE=rZsT2}UB^7r$1P@s9q4@0U2mLuyF}3)28!8do_eG`@JMMvRKjV4_e@?2 z0UKOkPUbWkk;Ew*XSIRL$U%Sst>X?x|5i7>3w7j&l(mG+0P*tcxK+GAS9oBm0yhwk z5kjbQk#Gsc&N{aF71Y`aP^J%?u44!TL> zseOs{7zoyOMuKmA#msbQ1tqm_@$RFQ-P`-bL4On{JAZ6%uWYN-*mAxHYe+ATE6vIx zUisdz!AmhVSo6!ZuQUL8V+iur?VgZNi4yB4aKkX^7f_1)^2#n49D*rt;als-ig`UP zEKo}ex!gN_4ziV%ihxAIV`1ukU^_QdJ_eYG=K?enRIz4>A(r+vui$9&dZv@f4Mt&{} ziR3s)MU$~D9kY;ExGF!3i^+@o=l$<7i--IFu-)rB=@2#BeyZ7dynIxKB+)E$28uq$ zP|Z7sZ>K*~@uvSqMK^v)hyU|knK*sEyRCO2k380Dgri#sN`nrqv z$08n_g)z&v!c36Ejv31AB3{mUHu&Jcf*=gL?Ohi z)*HPa{Hmy1I@yZ*U7GY%NIZpqJP*WNE?6D+vTVgfnLvdc=|?0Bxr~DvcQJ2aL@j%B zryZme!u!{Q@BCErk6`%uNVmf-C$n@~;WRnhK2HAm8b)?u9V!OsWMuaxrc3;B9DFNn z%h&8E56q_NA=?*2eE&UfR)qh%O*SFj^yD4S=~R{N7nfU0cb6HsoQ(!<>f0zg8U)rf*cX@S;eExWu2;Se6GWsKebGZOA+L z>^IFR_hQ5n@6{i-Pn-oy6VoQFKN@cC?#M`)k&o`g&oKD{$7h3($d8XSvFiq)X|ayV z;p67z0h>SW-*;FN!RNIi`ni-if#^%C)B)#?x!V@0FB!d;cEYRyy)hnln`N|QxC8T} zS$_n}zzv{lxI%Pl7|a=Ko>&vnA@6#In7J!c#03ZCgCF9NK`HCU3!&ovSu zSnf{+v1^p;!$qLuWpC{hMTeM{DG*8y{(Xdsu%Z0i^fnsVLx3tYEKC0rWXE28WBuD1 z1ZVxm*yArSE41O}eFQ|-2&r%+=@zA7rJIHbXU-hqUnfW)!@)e-Qc0ajBh>|VI+Y~g zJTk0HU1_B#+Ird%Q zD53Pg97t;L!6^~hz}WQELXsf`BThC^i$^~=@qk&FNhYJa_%ft9lRJ#pq640&KRAQ5 z``$G}LUlx{Q6q)$nYqg}@s*$XEd>4V~iEk6{SB*Wu7;}x7RI7bTlO1ubMGSAlT|)Gc{Niud7r2OD z!d#8yh^==zx~Pph4+NBKk$Y}x`O&z+9)6WkrJ2HDf)t)xEWs~ROAv7a zM%c^jdbQU}!fz06wV^h_@t7-CWMgj1hBEI%agZ3%#MWN2X9SU1N_F1pOEY|pbxtIDB-$?d zzY`tQpUKX})J=E79+9)5e)Y2w!wGsq4pyU6f&ba4=hYVn-CkA^Phv+ITq1TdxM@e? zN5;=U$}VUi4Ha%kv6b1s4FEu%3w9E^{$$LY)~&)gciSW0pU-SCRg7cj+y~zG z+@(vJa^Re=4A!7Ok=rk@o1YlAR^k^;Lbz4|-1eIE`_u-%Z=x@;`IN@cw<0suxI&6v zXKggdv9(lWkpLG9wyYQ23supptin0YI>&~W3^z+VAKPKUHSMXp`z|<)H|i(<18P8( zzmJAO1i~K-?UKKETrfnEfsbSdLx8&mj-KHy8(yx3gZ4UMh$MUXD6%FvF?`S%#&`)r zqM_C(NPw&jt@;`OBa8smp7W_u$V~8Y`T%K$t~1T2ao&S!WgMA|Hc8I&N)UW`*A#G$ z*S%6VoZU4m^P)4d%ef(yM`qx4V?CIpZH|TFmc8a&FQ^<*o|5$nUKH{(0EKFk@{m+> zRK$KqU^$3v8!G9YMY&8j<)jMG=Ws}{nvSx@iXy)!g?*J6fc(V$7RO-S@9ULry9lsLudqGUB>9hTwbn} z1qD8iPK*EIYQ6wG9i6_$dO%rCik(RE`ts|#! z-Ssxb+%JJ_?(PT3rtY_(t<33$mNQ?40;6&W-1SacG(Cr`5Td5*h<4$GbIBTh1BTe| z`q$x!!xU(NH-TUv2oVENMr6Ry*XONx&BIADw@LkwuD3voiH1l6Y8rEPmj64m;?R+OupV zWwTENh?Kqt*uS|2!gi8PaaK_T6CSy_^(*5b@cPngOk<8)1GXf!M$hNVa$KoQESSw< z9^WWe7V?gX850x4%3)!o>M=F$oM+H+`~4VtWCMvA`=c@R_OUSE+MhE;^uwo%4^t1_ zhvePi)%NH#IIeBx-9qKRn1~%zn@+@K&C+l+2ZKRa8Jx@5_s(YIEx;230R5KEDKF7v z&=hvi!j=}h_esj;jzHK*kcG`FytN>crGR95Nq_gAve47@&CMMqA@`-9$nF%Sj+Rp8 z7y%vFIE4(9xj(y4vK2VVPJ64vA#fMVO`Q)B`O})lfPgT-kNM>yPIv8@LY#Zcm+Nvl z*{}fpLY&jw9Ski(E{Ry6nB;F=Xx45dFSO2D90SZ>^+J|nalJNFclY6wbPe6Hww-pN zR&UG2lzjHSpS~z66D~e_K?cLC`b{;&l#1GsD;Qjx%W5JYlwzzPDZf}8-Q(6B8r1-=hdY#4J zn@1>i3h?USvE<{-|MD>=)J=6{-H)-$us)^gXEg_;?#U6xf6Q^?amhg&gPtfJ^Xzpa zPYx!0_dE$>1$=4*8J&E14Rme(Qyc4^=qam-r_5Npx)At_~S6GoGF29M5p{RYlw zV0A>j%Cd^!swr%8>j0xiEPNGdyLQlgNl*q!mj*9C)s63sMG1LGg@|LA)|VA=V?pX2 zG?fT!+^TjqQQ0;}_PKPL@+WRZaQ(NsmxE}%d-0*suwUL60W0958PrJ%Nl8mwRtF0t z!KP$Sy7Z$Xmxp7G0paqF#f*!3R+?XQ;?dyn1!oTPSFsSQ;5|_&6M4|r zpmRtGxmuGXiiyTRhYW5$oKKPfh`6xt-=;-T3Q!+eIWwU$<`I(=Ga^8Zk)8*_Zu#@E zq1vurJiC|X*}_OKPGpHsuX)Q-mKHdDp=XrG**pIzG@wloZw1A8*DLu-Lr`mCX0%&g zC$|-+8AJ0FSz}PAAS1Q@Gp-NoNJB;hGm`&d+WFYrel$4JTN~36#YPCOpJRdoz zv5;k7I)2^gpKUf@hM5WXm(S)2w2oLIFNEjlo9gajZtlUMB#jJ>Yk(*g6@rHa*bAXh zB2GhICw7U2O$(YbOq&tKz{kKRv&}sGOW=(BQ2|gQ#3mAoieRId7GEcdlG#p?PxdW> zbk3}Eg$oqbEQ&L;s%CvDvIPaa(u z@nce6Jp%OMM~n4z`smY7AH!1_guo z2Qrmy5vRDZjF~K|s5c56=#cpSm7h43*Ei zG86!aInDq=LH-#pCCbr+2{_oigNUVcO09{usNkUO~OXY9m8Hxt>L-QIgLjn5hB7gF;oiRagd7^q5{yt%(yp@bHcP@ z(;UUV+OL5v8-k~h8Q@jb1t%g?p|^z+Y&+eEe7rce952wWM}bef$v3hPifg3}Hc(8b zKqQoACweifYA*SDhAt>B1i%_5P#FHUUapLEQyi6$1hN{m#doIEEJ;()AK0d2-K10Y z9I#Tpnb#oupxVH7g^Si^$@v4?3ChbMNDXkXi3>BVHnfLqj_$hen(e)O{L1{#?~f0e zZHNBpa{CIj7<^RDPY?EvJabcGoSx@jLU|D3&u(aAQ-9jIG}qlma#M-MJDRi4kDOawKu=5CFd4`*%?y<(jX*k$UfVFup7ngvLE8B?5fr97rX^Nr|>i! z?PVjp6|G)EeQrWyNl+sJ`*`69D&b+FjLmyBvVc4zxE-V9)lchq`3!U0-Ry-+>CP<3 zglY&fD_$B7uG?8&gdl4WCfgkKKnkC&RFJ|&bqOLhrvwpt!C66= z2!m5NPfWKJ!i1$p5MgOhlWhfp7V->&d+dH)OI-@|;2kY>&OE0ZdlyTcGSBIzS}k?= zwYvufcNu}(2{F~URlS)EwD(iQ7Hm8ZcFQgQOK7AS>J0g0o}PSnJQQCMO{qhe)E4Br z)X8fzRx_^Vr~l2_Y5F8luMb`a)H_#6kvY&!{W=|{zPn3xF380xk#VG%w; zn9Zn`tV4w71%6BBF}s79mZpf5wq!DGC}c^!dtLkHZNC4atIb~NE%;?#BsOtq3qR}K z+RYjn?loqk6HPTZFH4HI**RQ-yE=?9lV3idx#lZLbz2mo2Nh&~H;2tIf0RvD8dxfi+%Vrl7=?tGOuN{C|AlRgZ2Lhb z3oPOsM0Z_>X@!@Kb@KlZc1d|iuFYL@GH4=&wi`M9Db&Tzr&4*FJmAs10qlen#i=%2 z0l%V6$vZ*(Lr}WYe*KrH+N0ewW#YiCkMOPMFC~j@-B>8%Lt?NWJ->Tey?Iov#t1IU zxu;Ysn3%}rYcnJZ36aFIvO^w}ic}ZgBw9kX{JG{6nq&?_pTl1yT}fcG^y1M}SAk0} zfKT!T2yiSxdc>~@ol*V_(T(w6(FtmKg}2Up8Tnu!$DN3@XQ8Y_RuSCK{DzQ)BebOV4-+T$YC zL)3V>YO-2*=DJd<@8+&mffws~I?*TZli=vdO;U73A)6!Jw>DE(rUyU^{*aR+@qDy! z6g~oLJacvC;UDk2;F{R);7=lbfqJqt_;FqbzAZ1K%u()i0afkkdnJQ-6)(8%OU+c< z4uH4JthN?9051u-EtdJ#RuA?udGvhTCLsaoO*z@wkM_C+d*FCrlnj2E5kvVX5~sDB^AEyJYt2CrtBgslL1(LLVz7@5ARKurafKiw_2c z3!@~wG*Ygg@yJkR$p~!Qh%!zkq?aW5>j#WR-!T|!h;W=ObpE!jXDK*L+Z>?CbyV+eD#h3zb;I=_JsE{qh;|tTML9-h zMA?qQkO6}Q0>F_hS+vz9JNC#32+Fn9g=8C1A>lG&tC4caFqShwfN~Dp0hr{sM~s8> zrIGiQl2`)Gzdg$PibzIR58sm{3g?d^el=BqN^DzCO@?mI7J|(1UXskv17zGJgyaAl zhqA4^#m~P4Utw`f07yhYM@w-DNPRk7%opovnRR5y3N7$?UEL+MJ;7S21V{lP*;tcv zASLLJ+Y>K%IoV3Ipjot3uOA^JS@#MVOsv0FZWiP5Zfa0{b0cY?8{JCfF>C=0R2cC8Ie08#&GE}&8a=|7y^H3&1p90 zj&U6Opu1kI-?)rZ*I_uN-)cMrLQ=%R)9Eak85%NI`% zPg}(c`MA68xUHAGG$J2gDhWA^%@J+x5N#=*_#LOOM&br__<$!O zhGJT@UQA4TT|J0_c{=J<7nUq!A>L&pUCE<^^RG^}+r{m7aGTwG-<`Ynu6>vt6V^Sq zIbLB9MTsm@D%FI}c>Cwm--i3pc0KlYrSY!kSU_Nb8B!<-R;P z-*i^DJEzJH^?s_5`yE|tE_Pv0$0z0%9({gbuPrpbI$DtepC=Y+_ky*usYmjPoOa*G}#nR zs`_bdTBMqr7nBA#P}A@xX9jZJiu5>u68j)%d-a-CJ3!=MH9td{$FH`m7KwTviTzu1 z6WmxwfixM(*IH?{T&EOY`|wkw_u8W@VEE;zSVmef{6Z_i8o*Z^vl+i(K*R@bWMp+8 zc+FbpEVRF41Snt`Gn(-f`Q`kbJ*` zrSBSePB0%^d{#UmFFJDReEiBW!e=k`_an4c-trH67=vTcY31{RK!&Jahyrua7?>bj z@?L)Z?U5h+1_S8z{v?{+4|6(Mw7LJGd|z?RKNr8x;kDA<%O|QR>0E~A-#A zBB!I+G`N(n*w(}XK3fwEU$%Wm5;6Gs$-T$~cN<#e{#?$J!{66kvh@U|+WV)!_Eo%(*ys6*9)?b&)FwP3erlkfkBfgE_WX8GVR19nd9 zfsBkgDpS)5H}tKowKTgizmcj|3AJ9;)4?fzmuN_RE4fo#Dm)ov-N^GMu3wC;MWc5E zR~j&C?ejZntgiB%PFZHtLD*jT5LlXCUe&< zsv`j?4reT<2{OO{CJ_FOezrB#-@0nhMxZdqdcb={(&G~rDWIB7m9nd+%H`yY@faz^ z3?7BCJxQeTBuFwle*v{qDZXiF6_HtLrD_KHAoYRX-p$uo^BtxGXb7~dPbjJro|tke zo1up6Yc9Vz_#VjO-{jcOm+HKK*6~!#yb?JjGk@)=nSq*;l9giJxfi8y){+k|Oqhlz zr!Tf0{-HI-@1!a=*XzyAu%3iSV;@FuY>pZ=-=?VK5>~^;c~lG$86G};kSR+)Y*aQf z&3>7t&jWKPU)@g>aZ7vwG1zgzc~f~hfQh*8CF`8;GMJIOxd8eD>MzWQo;1G|IOZ)2g&25 zi6&34U8be^X&$&GMq5UJ!hYIp+9Kn#_CV_kzWATVpc+3t-+8r3O4}@HZ>f9ZW$CF% zW8zYhuMB-P0#ugytw3&is8<2hqnAV4%cSNW6;vAYlOxXWE_F^|5ba)a5VE-!d}hVFPu(u(xz1^dFmx%?O&~@ zhKoal5MdiLbdeW+SFK-=;S1m52B2Tf@(e-ZBum|H(WNu?@>Or+(|updn=1utczE#L zc0NK`Dj&(}ZBu*s_*E1P@h6d)+f8(c)S~*zUN^&tgXp)VSKJ@dSvr4g6Iy1NuG#j! z{P<+&l{ramoMy&`nK@BUURkT=@4}pF)yF-KYKB8gvUBgo&G~j9b&b8|TCJKNmf^~p zE39xOWBgMxT*E$L-V8Z|4bvO%Z%NolS01ziMm}))tZlt$^xLv?aQK6P<-lqf8&nDf zSmDoE;Ewn}c8PPEY?~$)A^ZAHI1iEnZ2O}eydxJUSwzeGy=%H-Cey?;y(BTbjJ}enA%h+cr=7A7^c3CDR z!9)wfS+2KS36a5?^#xZPJcoP`HVhl=016S4~;g1znohk!>LCIr{z*3tVM5iEm*JI=%Pz z=U*AN=K1N#-@i}f`o@=iDDMW}Y3_luDw8=={#F@^yBq@gVQL&PGBgiA=pibQnhh6K z8?LzttN(-mWNW)~zJkk{#G zpX(9DOpH;DD?O&vh7}AIT^up+E&KTrt|pujr|OroUOOk?B79pnYO5Pa2G}7iL$L_O z>yX{+9Z^IrLGox;L?FCJ!4Noa5cmb)DK04iDw}&GCkkA|uzP)cI4#LN=(5*)nnnY6 z=CbP`c?Q1PEr>p>J|7gH)%NrvTjpQSw;dh)1ncg|kmeCRN}+#=-MSP3aX*F`*z>mo zc=ha;;Zn3*vOqpRp6f(3x7AWzfo3j{@h~&G4bpP*GO*{#=X+-d#lL?V@cg#~lW*p+ zo>k4bTvS8Q{35hzW@{9>ekxIH+gjzwMFM0RUXAD-*+lU9Oz`M-?ex6B>;3D>tQ1_J z3D`sFE*6~6*xR&REU?t8Cf$y@T}sgC^o!1u!DIL*I>v#yNuSSqbi+07(UDua?YeR0 z@6k!}1<&>uOS5_a$+n2A2@ggBcw0uCJ98<&`uyQls}bclQCWW&zGtiT_3PL*jwT}W zNiDs68EYXTvje*{QQABI+ipnkvdf!{R1eP| zv<4R&9I({fx1lw8W+NLfY7L5q$IoA!5BE=wzdblTSJ`qly*++gUWDm}px6*OF++Pd zS=d8;u71rFG-1NJl03s3aUUFY^p%qvyOUHZX9uTdjqly#8*oY$FO9@gl5=L*jUCD2 zbX)Z#QS9*PeFC#Q$N;6^(iIgjCAO)azknF06W@V{-`Sm!&u<1ChIn4zR7dr6S~qc; zxlHaDdLt#hQZ~Ub3=pl}iFY)I{T7rB@&+cG+NiF|CmIaJ!uzOwl;Fm(3Rd|Ns%=DF zb4YToh^m-h8+~5aemv5FPKTL1-1iH?Vo3 zZ%WMBdOUe}Vz(SeTlSS?zr?eqGezp`8lfs>UsAc-#CYC7g-OhO3 ztmZfHaCL$lfw+8N7BO`ZffpGv3<9YQHG~{pTxD)Ut;#;5_PgRbu#iDyNpR6&l7_x#$ zdSvLtU84IVgf5f{8v_-<7nLbq&zHTJ#KXev6ekM~@Sx}!BAmjv4vMETtC>x(?(O$_ zR{sQuM3g+nTsAsHx4-V0MiK- z++Y3b+S2UxG}b@Hm|+3n$?{mlh^F-$W0?brgJ zjl8~PEwX$Ngr}-q17ETrGBxsA2fB_UZzv-m(T$a9s)Q9X^w~D3^rGQ4-#a}GQdOoe zBA26Ue(xLZjthrXN=Y<2$S~nuQJd{Quo70f8@F!j#MAui?ynhakwXgDf`I5`v6Al> zcG!8ir857XnHmix*j%Yw=P3tml61X(Qqt#uykpGY4E!RWjp7R#40H#?oDr~ZBc4Q* zutT!md0UG4Lz`sw(5~tE4n5JQcf?On7QH6(cx5=|a5KZ?yxWSu;KLluInn|8Dbumd zWMsY0p%;CL^Ku#ZQcDwvRvGh1CI_F#zDK-u;%H|0?|UEB|BudMPO42uFoJ0tQ}UuR zfmb~t|K3|TjAgWc@H1}DE~bNh_k-)QtAC(FtDEthe1D6xtW&~=mOC{VUoiPoo_I>1 z{pUFDvMcX*&<$6e)seRH4dFrPil0I?>4`yyl%}LUFnidNLZ{_DY2C3Ty5{>BV(sNm zd2PO~szt_;(q}YN)MKcwN(;>Z<*L=_s+(>^2$en!rF?Xp1OGKh`Rb`*ZE~}si!u9n z&pi=@ymUtn2P2{7NKJPKni7JNItLV+H<%(k@6>J7qy zZmi*r8cV}EC$(=0!q%4hDYVKl0SR@?$))uA-`ar(yBMSz%zL}|9zdD9mh#=%;$C79 z(dHe9+fKi+>K>}xLVc}o@$b!1`I??uQb_PDO=TC=jAjcCveh=^N_&f=2Edf#?2q3+uxceYw8sHot@E-B~u_*!@6EsM}@L1dmJJf>-$I|bMs zAM$V3`}?w<0=6BcD#hTIyvC|?Rjq#b;gc`s^Nu+jcS_xGbZ7q9ho98HpO=Ig6VdPs zgkd)&Fj!m$2*>cIT#c{Iz{;x!uyk2w7CDfGom6{AxcPADc-=#J}CaS2mH~z+Bd9wmFMAS#9;a0il?d>)H6) zDrEwGqUn?C^J92MmZp#E-A<=Z_ul>nq-efaHM=ig`48x%u7>lOVe)GmAtRwm!^XUh z>r~8&E@8wgrdX5GQ14m&-_=mQXUau>emE5PF?hn+jtTmD_5~bXE$8cn4cH;SHE`0F zKaFoEiBXwbHEhg=Oj8W#GhD+Ud{K;lI$#Wg<^+h15z^dAR!pY6#ea+7%?9f0&qf^5 z!qF4%Rn>I(tg4EgOEbcqk-yog*8LKu1E9*_a^TvYQ4B9I)ASDD;}0FAqe$QYQS=Df zFwKamlbG{>`$S$FR5Z9q&7v9;i9i%H5Wg4K)eRUgr94vlh|A@p%Y!JvKssj*DU~?VrurL0o1vAyIPhucxnX^B8^3vD!QSi$#u^>8n_kO{d5V#N4DU|JvshW1gT&ogBlHb z@l#-_P9$^T;N-AHX~ZGfXMAY!^?{Bg6JEU znHNj=ue`Yd7@^u_3K2*bSOa?Z!aj?6GniCwX69iXxPUVZKwYd{U@d(vK@Bpgn(?w; zAlO}L-rovYfF)8U^V^{K4%P*1FnAIn28T}kst#=FbekHRiYOh4%VPaaIVPgVz7w%8(+zy2Y@wr<*16ZFooZXJ- zu|d&e3XbTM4TG6~_>GQHvN;J?H@v~Ec?jtS@|c47bGSg^W4OOsP9N?+Iy?OG@c0}; zV?SOIf4*EVR<4~xmPP#IY)E(059oFP5r7M9*2Ns6sp0qgENFsUcTu^6h1^av4{oyn z7r_F}*H_ok_9!kg*b#9Ch3+9#7i2qP+v&|D*HoV`m|@Hy2!hN@P)HEoe(v4%3Sn^=x_L;{X2d|KsLQ0m9O? zb=AdNr8XweVG@(=N3@AANcm1Yvd@QTfCmk-F^Ym!wPXe!2E}fxutWi&%xWqT$;n53 z&6?y%Cx}JC@BK(B3TbWnU2wuu{3k6s*`KzQo^oIK)wOV?ekKvi{NJV&gf>W=%g9{z zCoi5zUpucN2j>o5>@7wCK_V^KNTq;{z>=sY6!z+K&El^Df#(Jy+~lqYuj93vh!KmU z$x+Ux{*!j7){lxZR-^8{Qun^sVGY{HlfZ?6Br2!wHWrYMT z)YRHlU{JaxLCt7qEoC5DkO@P`Bk^DN1SuiMPfrid&h8m} z5sTvUsfgn9skbRW=$EEl*b-(JnR!LYE=Ks+pbQT}kW*1`7Lx-`05)aW<>+zyV*rBW zBUmE)*8QTueq%_7R+E{(W&_l$V8$dmk}bBf4-?ydFJK0_C;Ts8GWbsvL{2f^n*x>N zFmP^qi@IFWuii9MFz2nRx7&2DN{7SG0K#b^xLe#-fjfO#Hz zr7TIOn4W|Khl^ngE{8TtD$N$#KM9h@w#1|Wh!p^(0;unL*6j^6?I-COi=?7LG9nWfGDf@4QVi&}Bb(LY`8rZ0a!JK6tccy@kzuy>@F6+CN!_u@Xg zam%iB+EK_WJTNlyq??^ah%x)=!_|7h{6f8W^Y8uQU;V%M=rOQFa>M*?%^Jc_U=vA3 zO@UW?1#l}vch{jcy{M_uOj5Au6IV3!eFBLq;4^O;NPN}VV-GU&8e7`T5-XbQ@{9Km~PtpjKf7(r7bBCBHTc-(46 zOr6%;RP+VU&qi*?yG`hblr58BUfk#iRrU54427M6w#NL>_0p%Hk!5;4Z&n$P$jtoZ zb+$_Y)Hh4)-5ikjL9H>0EACws=|r)CMd7^&fh!EUqd+48Un&P}G${6TQ=u58Aw$_K z1qLVjK0s3d!yICAN5+hIQ=aHfb!*)JmH`3e#wss(?8CY70jF(uj-Hm)EO>2j*UZu; zU9r5!fHt=KZ%G1}_9E?^(JSiCgxP&VXghJ13t2kbZbF-POYk)(UtL^$na zF1NhB-arEJ5xB&z`#qlq)n9`M3L)KB;Mcl=yrst4SDo%|GtPWsR{8w2uFY21a2-9V zK)lJ83UW;>D*y;9jm!!& zCHIrtDpQym7U+^O!u!HaM3+9?y)|4;iJE{GV>tpoK_s4ha>`7!nYR^gD;GS3ZHhj> ziX%ar$|u#55j+umwkHiqrv1M30Y2&-iXxG`2_Oz7XtI_8$8%XDm`uBH8!Ml^Qhcp3 z^>{D!Rk&h{d9&L3-i8KT^~jOp&C3Q8mm0CvwrUiNJrKkof~Vbp>xZ=@)9ggArC3yv zQ%*d&?*kJljt$^SGl6a0U** z)GaQ{dfHHc(Ouzr)9X*T=94+!VszfgOT)ZoeMM6Gnuhd?L>>fJ89<=4U!8pB7e7*_ z)py>B%|ZC5bS@`$-yu<)buxiWvOhmbDOBOZN>w|18gDP2rr8Ca3m;8z-v)Fi`s)CK zuNGz!34|y4kTNjQusyE=Y6K2a?7ySon9|L%r<=>E=YsMHf1Zb^w>G_vYP<2JH!XDi z)(6!LH?Xr_T|WGePGX%X@;IE(mwTt5?|pe-X7daZK5jj?B$&UQhf}O zp5zUUUdI-gLmc=oreH4`Uy;O7lGA#oIYv82t0TMC8#2?dIYVQO5fFq z+bHFs?(}o8QSDGFAE_K{GdOrtg&rHE$!(vGR?_`ylX3V3U0n4DJcQVN{4NT$yq9h& z%z|th4EYrgOieqL(_&5U!^eHsa~sq&YySd9 zYzrO?Sy=0$2mtoP)i&J(ClO~K2ef%Udt0 zv4tHaek;z=-<#b@ssIVfss{1dj47J;>9AWgCPxV{O3px`5m;Ncu<1ewTnTdg5Ir!I z`=nWwXvqK{oHPSn4nzZG3X<(!%&hn=LVDnV|0l0Tn&FAs02lEJan^{*fiFW<*@wMM z{t)7OgakpIcOnQOA+Y!3rMowv7`*3IWA8ku_!ot3x)a~U6tZ#W4pN#!?WB>Ym$=b` z($GyqGx0aSz&i|Ib_Sca>cBANPa-pPP-Ik4hMxwO;B8&0l(jZiAcJAEhgI%~U*XF` z!@wpGcjN}c;}|9xqW~|PX3y?Rr13onqN;5ek6Gh2jFZ7kPGID*5*qsdr zB@o95Q4+wCU;r%Ocl3hj0NWOZUGj*PV;JT`j#kl|ZL_`rE|dfACiQsbTCxXhg!I|a z=25>GO{!^Ee(%8_Y>p&cANMC#SC9H`?id{kUqT^}knu{5HjcPq)6+w+PHo7P9wt6- z^WMh!?iN4);{SP8yeg++;COf+(n zEnZG0el1(c;!_hEePK?y5k$;US(gP!)|k4_K1IQ+XIPO$=rcOYZVk^TAWZt)~m!@{P*;WVAn^g~Ky2Ca^cA(*wH zYBpcb>*`MJW!oZYk5BB>RDGqJHdo~xn}$o~x9f>4aIu23-qxCJ$(oSVAVEO~NKo&- z0H%WFa-kR9y1C@>z1*TrAP%;{9)<>fUN-d@qeTogM0x%J{bINqBM_VWDM`H~&xbGr zUmTns9Pb|t&%b}p37w|p&BdfF$mJWE_%L!zC8^4m>Y0H7-pco%emy+<=J2`buntd- z&xH4%Lz=ob^%Bupkn?bVd6}&F%B;#QAnQpJSnB-r1uMb~wZkMEW?ef;NP5E&{y$Jl z0|XQR000O8PNO+c0pyj}(+vOsaYq0E7yujqaCtOpFKS_SVP9i!Y;0v?bZKvHb1ras zRa6N81LP$vUQnT}VRd*5009K{0RR956aWAK?Ob1P+c*+`_ov`FFVfa^lXf2tK~o@` zb??$k(jayhD7uA8%XF+(mVBa|#8>QhznP&(k^18}aoT2q8YpU$GsEF<=EoUQ$_{(_ z%TpGnlPI~|XYWR@pZ*hDI^Ax!bDB<;v0#~yi!_nZ7`{)_42Y8}k>$X^-ta39zvEbh zd6XtBjJcFTvOHzyt9*tnrn6IY_St9_Nj6u);aj@EZXUDoGG}qhCt||JD>e~RzKrvJ zr_(vg1ka%r^Gj}GZBmViv7+nMZzOVQYOk)!Mwkc5sItIRktF zJ$-AO&k!ELCAjW2U+K{>WZsTH!>oJs{Z!1y0wi3DJeUiRI#KOM=vk#K&A!OeJO#5c zF4cEyIMe4$?0og>8(IoA2F!iVlS#0Eh*2$>fZ#}iD;_Vwwwbt$B-kQ|lEo6qDcC_n z5@veSW$twQ5T+Cir2H|4Ri0*EIa^1Z&3A7Kpo(UDkMnVW^lcWGj z$)(7OX3(KQ4+Y?QKb1T&7KsR&mgU=YAqj+kqk8Rv=qS=tyG7n!(%@hoYVp<)-+ z#VBTv&Y<+Srl?$slr4!-&nh99l6C{^W6J&6b%qXtBYwi>Pr5xylxJu?WTA-h4DA(t z8p$siB`hQW&4e5wn!1K3c&8Us!1!#=?r*RkZl zDCA=k;~xYS)7p|HX^wMI%}PaCTBKa$qU%G2|0Aq<2^^lo*x(^OYUs6)Sstev+WgM! zFAYoFuBulGD7FTJMipsjIoGtnOC*7k7r?afi-R)&C7una?v~L8Ilo7=<-lfY;SG=! z{&X0}kdqWz0LrFPNpaMIUdr86Hg_f>wD+dj@+nEu* zh*fj^l1vY~ZFX5_?=jjzEGfheyM$-;+CpNHtc8xgjQ%G)hh-w-HEI)6CNRyaX?!bC z^??I&2G8$nfN_V0tuE(LA~!IZCjF0C)h(CbqDBB4@&aibwxX@VM+ploxQhbaue*Qk zv#)!9b^DO7&w1`4xA(=z-5tJM`MyJsN&OdCAVejT2C1R^s*2sOUqSj`rRXkrSO0Sw zB_6T)ZX$pNZ3s}z&02yJx~!;#Uw4Q~@XGd}41&Sw`RIKx9E@<+1X}OIN54x5y^?)M zV|pXrp{rbmwsXB;XStC||~@CzQ~SZ6qq zB)tko%$SkWU@VovFm}k7QYi%kK}1ly1UQaYjE9u#EwX4H<JVEg%6VMZZh-=VM7}1j=cZKpH%$3o4avWm^Mb3hj>35(@9SsxHR{ zmV(cLqh;t*-DVR)O3;`sCE_~3DIVy^a*a+&lcp_BEP$2@`u^d?#o_zl=N-EsN)KRY zt-|@?#mG?dL|%)`7{c|-z7#M-A%$LeE!1|# zAD$c^4Iak)2o%cgFiTZ>y1u$@fr#_(xIAmB9SH77!UmCZ0BTiZ(OOnE$ih~PFsNVr z@(fb>ji{i4vUCrMaqk!HAHIt*RW%|9kLz+BcyQvT4S`0mJrn?>HXuE#Ie;7PgfOJI zgsi(qyA>pron=(rT(`D2P~6?2l;ZC04#j=r?(XjH?ogz-Q@l`|;)UX{ad+7`l*1$M zc^gvhF);E|v;c*gaap{Df#de;?@Jse=B_p93*UrVEgunTJQF z3ZA9GI$53J)f+<8j^lEu&zE}0yZwH7 z?|ENL)+VW|RUHIo>F>H7_UahR*c)J zd|y$m!ort)iCWaQTnF7693zydx+xI{DWsPlLWX-(A~JfwN~3D_!;(7Vh|>k~>wDZ# zYWkK;>TLqbr=QT+CrKUHi8UcaR{9Ka*$~v!7sDvt6krlzTgN`ZLL1Gd@IhYu=X9l2 ztw(!UX{wMWSlE(bSR#!uZ^+|WUz7!vvgHhyv%FQ}x3VXl-Xz2N{V3G-+?*9U@L;=Y zkvVK6C%?s&fXiu%1@u^qnO&j&eIy(@y~X4MHPC$5!?qfBe7~qtXseW*%o!|ZY!8r` zb5e|P*c9P=8J$*QRrG-pXn(^rPD4qUsUKgzd1m6(6r+NWyu8x!Y1XV`CZ%%c5Nlj+ z)(S^Y5Cg`RKyw(A3SwN}_`jmWw>>}Ts`ZjmO z)rz;fzO7{3gBNd}YzOHIhkRh7eSZhh8ro_7f-+2Oo6PlAqAjE$fEjvB|7buTBEP(j z0a`eVsy)V_)1IRPq(O>@$vF?lsqRF6tlKdU%V63IM$o?5MCcS0F!ZsfEsG@-u{~KD zBHVPrNi2_~uUkAeYwAL?m^R(bB}7?H6GnwSMUODh+#3m+7dK(PTJ29$!y>WFM(rde z#GcPRJxcye={E_y^223IQ2sArpPH+j+qj#)9NOl% zF}-J7r7Ij$o6$An>G_QA`=#pV!KoKH!!7?nRy?5zn9{6g`Ut|Y%}&y#0wPo?!qg#g zbCi>S{DoKkx`52<$8&V7V^YK?oN$M7ii;*~_k;qVJ~Bi}sC~NOx((TcWr7pB4Ql@9 z(pCWvQFaAuo-8~FE5Q&j*eLPx;B;wv(~{6~09rDP;8BkZ`qD(Q!!|EVuRt|5dZ?mn zKI*-Gny%$M%wbF01M8YO=rsSA2h|FrtDTrCEyvRWx!#r=j$4M7GAI3Iddv(#>~Mm4 z)Y*5|S}0r!n`=yu%5pnTghp=hHXSeR^^f*-zCO08>M{FA94$Rtnv z&K($F$)QK-h4|{8bKKbkn_%%}V;$DnEPHM(p<{cJ!`G*w2A>q=e0<;{ukll~&v?z8 z5hF|h5@WcIdgZ!xtxZ1Tx>1NYBb%>?_mGP_!-0FMJ<7#evVNo7)mEgckXIFujD&$D z-#h;Pd9nSpF|gY+s|o+;hR0sD+@`}^+*u)5j*mgC$F71xgRJ}mKGvWW_9uD55I5gB z3L{rPp!B!PyQMgP{&jGNhvE7jan7E_VAWM8nDz%u-7u1bf603Ojmi_TwkRm?q3P%!gV2j20E`$hR5Tn&_4D-eUTR~DHX z))#yZKrp&7oUbhJTD_F2L-8JcxY0L;p@2FSGW~Qdk}&8in168_VV^1hJ{~!p+Plv|{`4@Z zCsi7_ZpnLNdwOHb5q!HfS)B9TyFnrB73!ZKi4XBQ zDqi|O`p@n*#frQsiga4-TU0qU<&QKbxtB&a^`^c zzJ%7EbDr9%M}M<5>#C%9=386gV}wvzV~$1lm>6xh=O2?L6PmJbx{{0ra=rq`VD=#x zHWn&r36}ukfn*XX47(&#*7W-79IoSzN90LNhW9`f3y2U7JB?Q7c;Y$Rb<(OEG|SK- z%tfdZ=}ZY_;W}%Yb`C)zWx8wC)qUtK08Cm_Gh{pi(hkd(n}dV0rd(2Kj9X`|S@s9_ zst)X9Rw-tyBq;sHBK)Y6jtYdpRApY;61#5po@L&gkz-r;GSbztliGAdj*PjzvV`q~z#uTC&8}a|bYO}A zL_t7}%Tj4mku^a3xwu`g_4=rrNI3*1AB&k(J7K^k+)-<%M`s-N2)$L{9md8dM$;FW z`SV7CNaRfGPl``TE13`n5S5Ll&t3W!H@+X^OY2b93Uo_hrKI+M#l)QTb~athomuBU zJ3s85u={!V-xhqoIIg|h+ysNnyuF^?y>Cf?`9wqt>$!PE@>4e|PLzk^vH6^K%|MyV zo^Oyuo0Y$8L2L2v-Zynm+M8BN=hYq^U7If!>YCu^w_>wnX!ox(RAzdhNUYiUKR$h} zrR(AH7l4kcIpVu1(GFHBaTyu+3n{5emqUwig=X@^zsGu;5Fq2QtXfMh+y!tQ{;=PK zq$#-(?&IV0Bf1giDVu8e=Tm4t1yuG&rW|Fq9ui!KH3O17KW=#g;sTOfqSpm5|BIvh z?}S4h0Deih&kWX%w8>j95tr)MAzz=+S3mfAbBTHOEp>f!iQnsuzX6D0L0JnCDFVPr zT}EN7YgUI}>{g}?cn!?GK81q!uAfjL80E6Pn)M;X9YWD>@Zymdjkwgh7_G1npI_|s zf9y264-F{lIIO~gnQhJt2|&3+Ro2kt_ou?^PQU!7V`k8J{c9lvJbDSW*;&^F$u0|M zDxjOAiIU$&mE>|oz9!6>om_jny`cZj5PxEUnS~?2MEF|bx`2`{S_^d2o!qrB?uSDV z>59t{9aWzh6A|kdEDz@$Onz4>iH?l*6u}GSR11Aa-U;8gX3le9^;6Q=?6EwR-6fNHyi%Pw^c}{$m9m*Kn!KuEgoeNUonjTnO2(2K*{?6U z8aI^C#85BOz4Z}oS%Zd!;8N*+>zbemHh^mIyW8ID1(%$bxjg!cP6{{Eyqg60X}N7D z(RAGxfN|mCr2Qe;HH%KbsgdbqWZooc%y!tK+VQ^hMyZG#l52MxH;?IwE=1<-{Gl&L zXIfJyxW#%I<30k%H4KWbRc6YnJ=`fno6n__s$N`~A)H+u`J+|%GEWu+`ZYAIz0CJC z;yZ2x9yIz`TP2+YsE=fdEa8h*o@fG`vblxvJ0Em#cZ%LBA_Y8O4m(!IYg3i))<#Oy zCPEnrYmj`%nfNF-XQx-=U=Oc4$-S)CUN;Ki%|n>$s;usDoc4YrPTKI6aC?^J2T`Vm znpp$-0%P<~A#v*>>!ROcA8xRfZAwH44C`UutH*b7vRCKwZOqeQsS!Ak+q>L0P_z{T zN`QC$q_2K$~bV ziit#u8}GK%gw5V`vZR&;h3ecp`LMYdKK#<;82&jWzT;c9Onk%B2jX!U?Yegaw5~PM z(IZ|$+wLa)zB`L>2_{&@MobswM@N{%;%(_ow#Zd)h1-H=HPljpy~hd9 z-)IBz80bZUjGXpv*gxV4n)6Z+qRCvl= z$IO{8vo)(iS5oQPifLr&ttUEFlz%IuU|; zb6eWjIYS^5eGqun;Ob6 zM_=i#g%gVVqeww_%%6`#y>yBLeJ8l$H;{sMJ!&?5ULOsPPp~YB5P=F5Nghe5XONyJ zbbDkcXLmeZ+`K%43|TlIP=hL4!=Tf+7l z_#Ff^X0Wj_KfwbfI>y{DFFB!(>rD3#Rx#R{i__3Sus#xE&X}T9(oaL7hy!o^I;s>1 zOiAu?FwayRt0@>Km@@CE1C*)A6y3w9cA;1U;F@*Fv1;O;)lLwCugV*PZqBhir_aoS zY$tAm`Kp))QHTv;uKR(`r_bJkh$yxVkYsOf5+8b6KA^}uw$ zuc0iuk7S#lJarP^e~sJe`xSs+SE*jnx-O?nB8ursk|$*Rclm8h5yaqvhjbhvj%P_x z>fN%z={FTk5G_1tnhtxx<|{MzE^|#Z`DVW2P*fqwYxA&$+s*l;K3!F*uq}rlw#(4B zh4?FOIQ#Y_eRvgQ&J;RpBX;TDjY!8yU!D*5knDovN#O_bS%v!99+j7M-ZGeyLS~+T z1%WeKqzLnG6KC&bb;(Td0r&l%7%59%EgB%<71VPMWFnA6R+lgZ!33&QPKjxRPGKut za8ZrBnnWd?_#Y$38|}F?$Rb_CaUkJ~etoWObB-vsjVjY(rhr|%B{x>cI5Z{czOo1> z?>KYU*h6eAkMy+`Je9nK#fwU?v5%qA(Cy5j|1bvK`-|u_7-*MKhKAc{ryT|uqah1( zsQ*3KaS@U&-wYF5f5z^!?_Heb{vCHI))}6o7`111RHbv=W_hh|ASW$%R~uq|%&Raf zj3oGj#fJG%5vbZ!SLU4PROxen`~_ExyD!sk<;(bnp922p@eT1+pSs|N@6fiKZ_0&# ztm$k4J~(lH?+o0{jl-ruOIpN&A={PLm3u4dUL|WuTa?nvzM5ebZ>G8T)*md%s~<ioE!zOYRnA z&9{qY@Syo`rw0g&W*_+`;aOR2K3nrr;dJ@Zv7FfsYad=qPZEM4Ibh$7I6*&N$6f73 z(IGLLMSM#}GH+|(?ymst%OiynJG*L8oYM|Dwr9R%-Xmm<0fNeCd>cJ3<#7~vR2P8hf$Lu8Vx8~7{cWEJ?{y_`;n*pT zY7^LrFic1-nby`8OFW{CZEJGR)WB?5_z4%EruUA{J-9DdkQF+R7l;{wC728AS=O37 zoc%1%)TKvIp!#kSY%LWsTF8wj{rB zR+Hs?d+rsy=If{2keFu~yUA8J3Hy@bw9|>-(y!a|wm5TF(Dz$C__-SO9*@lOVuTEW zfGW$H(k$++{moa4)sTdZ-&t)N)uJ@hpo)r<5N;RsU3~_~>5=(G#!7_>_8|qPcy;Mo z{adFpO3l%zwy&voj9u=Vah~gT^cc9mevaG z*Jp$n=k`1Q?+J&Wrd)s~O-MMWNCRo7k2!-77uN-TGOS={8TB$ew94ht#1;B49wCEM zD?|_+t=tP;@pqhsr!D%IyeEtY&7aWZleV~Z_>7PUg%Cj(=D@%&1^X3?PBFa^pEx6jlAo2O_idN2c}kzA?OVXiGr!Z4^XKVXL%xPlrJkQqu(zQZ~x*B zU?#Y;(~q%-rT%5O=_b?z_DHCBaT7&-o#wo$n1y;mBzG+{ZFvr`Xd7m%h}~fmx%IK* zbAqjlJB|9qbg9ub5$eDJ3vUywz;T4^i0Jc;)c6k9Yw4?&i%i$7slKiSvU$Vwmt*G? zv91-1)Ecux^{mvC-m9r$~2F7H)C1e!HRh)Qpq(rj(9Z~^iV;EJ}NYIC-yAJ5)R zGTU=BmM?gY@%C4-wz@dMv`ecQa7Ui5A+vwD=ti=TI#+AWk_{Q7Vakk%#a}7-n?sCu zDxR$Dou0@oOk9}3*?}dWxA7kAbqq?@eavIc`1<{Zy0EYn+19+px zN0`xIV5y(RHnPkf+|!1saGDHDHaOfemfC{+WyHRjbbVn-ww>~lEm-3MG^YQ za52I%Z(Tb1^VAPjlzu!k1|H5IX6{tDp35#u2B;tM_~!?^;-!m8G>Yequ4k8LGcM`V zM%!+3x8v$RNX)_GL|#60L}c_i8@3klVg?|T`WAcqEJK~+arDEx9H>`RsYXCpu+ZMR z2w4xfF9=Bg{yj7R#)Ov`Yg3gsM?las;DJ(r-{efgepP3Qrwg&dfMRoY*rO@_|YK)MD94iXnD@UCSOC&)3q;Jo*fcXI}XoB zMSFFeN2xZQ4`CBf&}idrJbG5 z3Dp{WR&uq8j@p!>6xn%h&61Fk2QsINp-EOI^2n?%0epkl6{7r>5^`0c+PcmdJn zmu$y*q))1=AVl=MCH;UW8gGpvX$WG}f3`zwBI&MOsU<0hDweg7e4XX6o*l`x=G58t zszNp#1Ep>r;}5xbb#=?Yl#<_-R{-!sQ!b>N9YQ}1&W9*zXQ6(=4c?LX5-UbUptV5c z$ww`MRa5n1!#Q)ayi900=al4MzzzIucC%mDx<0V+6fTg1Yd;^#YjkOjFsfg*Na^Ih z!2qga+XIg+mACQp@nXU&A;TD4_s}PR~Z>q;h#(VJ)J9kh#(1}$; zM$veY36~{(0eYrO_k@iuUDsR3$~yfAd5;J6RQ8-&p}TsOC0-RQEO|k(tyZ3KJrf%| z1sf-dC<#qgUVcGAUYSMN;UE}nnbT+p$DQ;O(p_VrLv~PuW``Kp3x4M1cxP~Iy)TDP z7lDLXIE~dj!vhgp1<#JXg%H5pdC0Vd2tvQkO0Ru<7YNiCOzYvNG6dMRjG_5;O_ zpAN1Mcb~3*flt9(TN^j7i)`o$W~{LW1`7U&z?=2RqMC>9BN96`y?{^G5B9lz)s47e zVx7y@{HNW-B)Pfem?kZoQBy5Cb{(M`FmxB=x12xt)&){8T&0Q@c$W(@mmMP(?VTPuAf6@Jo&JNbK)9*rPY5?|Ol~*UeH0I8tcjJJK*YyA)|KL3#d8|8=< zMG)erp3z+7F#%M*QrE#0Ci(<0K$ATn%Ib;acDx}+dD~r3AeZEozj-xf@_4Wa0qO7o z_{Cm(G8v_?{Sx~0xU+h(Z)Ui~Woq_v)msJ8-6cEZJp4ds3jqp30XST$C_=JTKrO`D6Asx-bQPr(yk+EfzAn6@A#+;J)q9} zmBc;FNfu5%M;%789HKl0V>^d_{;p-}C30uFRao~P<3udkht24_sCt##b4b#}(aZ9y z2=Blt>R!n8jH1`pK74)}YM*(J=kk#*&rZwk09I_rIrPx$3q|IPrNu%w0qKXMfyzOx zx#%$ZG_F!&>#Onr&v83{gA;atiHJ)Z?ihmwW#7=0doz@+V&8No7o?}WzPepB!AbnD zdfg*2?~%o5oryjVnj2qc{G7f%zmU(bO3@gWGM*^< z?K>Oyjp1}q6YvFL#*T4_l?|ir=SHH=gj^D*G3*}Z=A}{K=GcG^ zcmbQj1ECNr3sX3HMVt8Ex<`7rE^qd$bGvuRXO=0YVIiwm3vch80-Z{lM)3KC0!$9X zdw|bMGjhi`6JWYr%nJU+nULS;3`7P8AZEaquO=9%1%i$JCgiKaR7>t7g|wNKBeHKM zUO{A@E`ycoF_oMgJV^@p2c?~a5sa`O41d48J{HOEhnBo9z=P5K^MA;>+0ik^ILL4I z$$9^fm)Z*~X-^@KA|k?*e5DWK%Fg~}1+5_EE^pCAd~P0maWBbz9#=Y;a%6HdVbF~d zoqOkzj7Sq};Gp7+_DUTCYNl_&4a4-C%Hs{30iD~}HMimBUPw^#5f??<-aNf?5{_`I zSi?RM0Wu9Z#JUkB_JV@eZ=NCY#_DfYZe8$8O&c*2YjE-S_f`jMRpy_Dz#h4Vf{fF| zW_xn7Awqt_imLk#bXuuOTsdd30e!t^`&@Ul(9g>icDQohfvzKg{?+5D0-kNAcRhnp zh#E2}B&KfUoi~;9H7r5Ve#`I}udHd@{&SC~&P#?5B*%x&5&Gd@7027+ViRdm-|Jc* zk=lFlLKb09Y_{Wgg!{bBEF=d3NdkrNrZxZoajxO2e;)k3gMPEN{k5P0PDTDWa`a-i zW>Qy!20#J-Tdw#E^@nf)2l&GyH~Ho)Zh{5?;L!m9)W1=0(gvU;Na!CZ7b^>UBip}> za&OQb3VQwj&_Mn*zKw2IRX5ta5BP7F|FQi$ln>_b_}+vSmpA(zqodcKk%}Q@{im?< zuaSBeIw>G4BYW>dzCqyu0F-}=H18i!69+p-BiDa0J?Y|g|DS>W)%rh}s*lB?zn}ns z88iR@{ok15Z+H3gL7H2=HQvhmZyo5Z{r?9nGEC_T5(WS;#{&SM{Tue^&5rmd*3sF) z)#1&C`2P{?M??U(;bUiSJs-apCzX~z8Da_p~H q_usnRztUeL{-*zziuXqTL&f{Aq=0@)AOHa2ZPkBEhKA37oc#}Cf`+#M diff --git a/python/lib/py4j-0.10.9.5-src.zip b/python/lib/py4j-0.10.9.5-src.zip new file mode 100644 index 0000000000000000000000000000000000000000..478d4b09bef7529c6b41789d0a5769f20928202f GIT binary patch literal 42404 zcmagEW0Yn=*QS}aZD&?i+N`wACygg<+jgaG+qSJr+qUg_yQjZi(>*h*Ppp07{0N*s z`?_P@I}~NWA>cs%+w-C1sQTYO{>KXvgaE|egUyl=pb85DF1^KSsPLa~afb&1g?I)9 z0fE5y&p^fhO#g2^49L2Rnd-myjL=3%5D-{Y5D?`5n(ky_W@}*W#9;67-xrd}qBIoz ze=k(6E*HDOfzthQ2-?p!$=<}bmA@F?|8meGY;zi*rlS8Kn~DfF9z}G*dxr?FLYGd{?rt5}6-D#M z)6i=Yf03o(4PR$oU4Rm3!M-lbtUHSlw|C^R>0N#rB)dc`%@wG%+7dBaXV)PdGORZj zo%H-BacwAHS6Z5!4%~~kQPXEwFbc*;mBfTx?##y2?cAVOx7I2krlZFW5p4jewA=1^ z^KpAguxY&F)Ph|v=e4ExI#H^8{AS&cZUG&%+nnGLo*0nI{hL9H!;KOO#n>DtINLyz zZ<+eHGL(HDEa3fgDS}a$8~?7=5Dk34>8H}f6Q>CiG5f5YC)-2=u9N1k19{qfjAkjK zahpkOpuo_pwn1=(RG%f)Zx?IX7S!-y+P}ymzpSo{)S6f;2vCup<|+l^j90S%U{b8z zz4^T1t}UXn&=+<&JM%l5#oh zDN?D}okkEf;fm{YP*_Ml&-4DF0}DK@%3Yz0HLU(+s^(BjaPI4CdU}vNobQ+Pq7o9hs^=NfI&{H7P8<^y##^-|{o35Rv9%S&%P_jt;;ppgCPxN556b285L zXl(_0MMk1+dsk60`j=u`=8W&-I=|27*+MhexUv9sjJWg4lb!${axU6yb^>oMuFP3^ zFwDNjAJ8kz&h|TsrUxF6FkS6OTSIn@F{;>qVO*Zq>kVZH(CUh&k+4Q=vhh4W(un8? zM6Usf6?~&O;83P~%K1rClIqn_Zs2koDE&aQyGZ;#H{B`i*`oTC%J^$$cTb1WH9LJ7 zv!}L*m(Cw@9Rl~U2sUx7`rN3FGlr-o{A%x7F8xBc*}DRU0UNVn79Fz{x{>w5@ej)@ zN=yd?naLx*I6Dyiv^7ag83*};BINV=aTqXxomKqwy>j2+Z8xW)z{oV=AV`7xqqk3JZG14tW; zX$|hRobXb_9;8~6Dp`4Zw#SS|1J`Yknd8%@&%%G|0A9g#M7pSr$8t{Jy{(jDM#=qY zYx1A=?Q4dbm&*|o*MsrK%ZcOtlHM*xjvqJ`QBK`fr_*}?Ko^G;=V5O$q69_G)u~Dy zFw|DKxljDx*#Cb^N2ZlqD74CnO;J?NHWWTG4qmzZ5?SHXN@ref|G2 zPG6sdkK`8<12Yo?Hv{`Gxc^mM5S0);Pr&}O11ty#(toLok)4gbf%AWD@c$z&&1y2X z8%!u$7Z~6s%qkH{QZ(@l%kYnegbD9jHHjEy*jR7J z(_w?ZVrxkfxv`OX&1LVdeNaLTZk5#7S^peSauQ-hk0GQAi;!Zn1!5C8N#6D4aDo2L zFcSsc;cO|9it@8argVt1IkjXcOlu~9ZQA(retJI<~ zLRS<^Q?ymBTGR%Xr=!2XJXfup;9zeG5g!kv?#u5YocqTZB>WM>~K|nn&_uYPU2)`G~qWAy(*`Sm3`G{byN@3;+lKs8BwTd%n z>d05Norpy%F=4i0n-uo<4|qq<**40V3wc|K7pJT-PCQ3An_XK?Hk=*fs~?%%o&|$j zWNKsdqNaQl?s!PVd~>HNcYmp|_8P7u&t3Rlr&GX6)Q49PBF}D5#hJ^N7lkWtSm#%` zm#_a~h5;{BVZb|< z#lx1TUY5rm+1fRftjNn$RiGM6b^naEKrz|RDf&W|4w8`__IQL-Wr9f_aLBAvz>gJG0!dXYc5MBbsyatwb8x@D{%v1A2<_?h94q3mr50$+l zlNvq)8D*s*Mi_aX5}D3gH(I~0Q$6yVkxqEgISzGJE}AC8?QbYZa!17Ign|{4xY>M= zLbojN>!4n-7v}qaJB+Ek7X~kYgMiGVfPkR>SGt&5{O2rY;c4Rdf5@0C^{uE4aTLE5 zeP$hEOKj~T%2_h5Q{71+5)0C9{^Wga8FDM9O5OU-$b;a|o9wSR%C-Bg_LVTQFV3F zzugQA?fgDZ=-viI>5*uK^2LhKr3!1CN{41JJx*zDbIMXQdrB@y(*ZjF>S(m;VZf|p zU~x5d9c?qK_lMTe%biK1*Biq$U`w(6j=jB7E`cI{Ufb>qn${t`^gP z-JUbw6?#*s71eTDbf}sg!XMz(1&q4f?wr#o5Kl+X$p!~!%`udA@n{a1NZE)i;zl=| zzW?yZa3^lX6hjDTiv_fRwO~pQWc^KRGR4SxsuxUN@N6VlhSYc(@|5pZZJ1XrS5O)W?xV^9Z*bxtHAH?fI_VS?b+un=G>Bf{v-6}U4E zD_sfejxxk8;bZJ^L|7iGVbz{ce$a-E$m0i_%kP!m+KG^rJsq??(=*CGzlxzJ7z9^E zt>y*YxH%2Zh}DyYnT)=Mhx@^y-1nCY#hh}S0qY{C${=HjV$yrZV-Ihevns42+=oYy ziSKdI1mlx%aBGg0kgDs45Qa+j-2cqVF*flT<$E})7q*RI9em89YRT&l)C~$tDLd3( z@A9mV>f?t7TaX#y8TuKU;fD$MuxgY*Q)5+qq0V?=KJ?TkrRgc=15PbCWYmc(`>jGl zFghXWL@y5!UTg)R=(w8=g*8-vY~0$%#}EUQZ=YOLEB2>u+0vZU&5;q74%74+p z-qFt4&dARC{{V+(UK^)%$@!fp+T&H^uz#ZUiHqA7PljS0W5b{JB zlPE6h%0FK&ayX|Y2RzoibjtnN__qGjzBoim7aWeY#a%trObxISDYUPQ#MP_`i>pW| ziK7zA{$B6Jm(YGkVv=r&orPY)w~$Q+IeEF?X_4O%^x$kHigC#mZO)@f zta%cKpvuTJ{BvwD_&sUyY1}&FrrDnroV;NzCX6>KSRs+}k+9kLDd^A2cSlOoN>TC_ z8tPfroe+Y73a-_~1E|AE{&dKJH&%Fi(Z2!V$gO)c;AsC~Ce3~t@ z=K(Tb{E?AFsd_o$Hxm*pFit(9Tq{*dtJXk~prk>QoHL0Z{z!lX8$e}MLKo;&Y>Y%3 z5_|yEeZE1Cp`$!jxzUE+9ICp93&L1l7rQ3~bq+=$>!nfhF8YI<@<4p=|J^{2!)&(Sc`i$r=Y#cevF;(JLs;@h*yho;Voulj2&(sOhA>R*jIz@2)g*n}J2kbh z*4N#~DwoR**}4$WxqLnwL!XyH=c7}b8!Y~e@XKGd%f-` z$Z*Zt=)V;RO2rHzq(1Rvs>DLfXuCV9rAmF51Jj`DkM?ugbJ{W^Jef9Djot}LJN|ZN z%2jn-2Ptm#54rewiTq*5ey)Bj0J7=`RUN!8oCO>GEIQOcIiV!`KK!bBv*mg;WA*0A z%Q|*jV76z=>{q@$+L;ygBV)A}G-+dmXD5xDgv1Hi4^AVS_1A&H(xAlponQHb4-Irz z=|PQu0&=ytsfk>Ksmz>A7}`A4|L!IQKxDdcX;;iy(pum|JSxR zO{KyxbWpWt(aB7_tF5)$mQqvZ{akDV>+ht=Yxjq{D;SKnDUgiz*MT>Pqcj%os zX}-n6SV<7T5^FR9&$~15w_a=g&kKwMeN@T|r-siu89h50KLn5hR3nxAKONWp~w7DYz+jb(^Ly? z!*%_ybXeOBim}TpSU}_^6vKIqU@AX5Qe{`0%{|XQ^XQCtiq`4X(ysEpz$89F$n*xw zXKd(%myYA@?h^pKZBzPYYMq%G3*z*A9`LL}C0h0?AK4{=U`lhWz$-K!K^j{5SN72y zRUq8XmD6(madv~QtOyi4yB7(JNZ#$ge9_UE!Fv#Pp7%JgiEDaIGlwZJ&`%$@Vvf`n zJhKAlFfi`KU`XQ(q55of_rtDlq*Dsf`o)uJP^D4BN9+dD?GDeq9RZrBgA+n74t8=%&qji8#{~;LFO&Yq&9m`dlH&zt< za$XILbTylta7~36y=8Zc8V1xuH^v1Z1^qdKsuSnZF8L~zxl)u7K+&EWNhcUb6(<^e z)kwK9{{h2fNK7m3>d|84;oUbHoKn0Desg!JGN-G3_pJakfyPImy_0a~yJT0{Wbqq+ zG9W#w%4oa=_8iQAH zMegJ=W7`a@5&R*g$6>fmd5Ir;;6arloO69XdLL(uwZTQaq){KyHV?4x`}`5)Qf3Bp zfvWYRY7KyB&(OQ8kB_7Ub{A9Yy3ugRdS4-yOB-_()b75mKPHvLHZbGxhNeBX(HGOS z(Z7XRpePqVhX*LXymd??m1HcIaM_!pvEOQu-|Mo`h}J0klAFRS9x=_u-v2K1Udmi# zl8>%4k8<5P+ctGOyRU8+y|8I9@5Jl=&|}8H3@}fH+C>*2?AUB&wjWG}?;RQq&gJ-H zjf)(BCrv;)7VQ{?<4JL!wlawHk+Q#Q<*5)dud1&1*z?v8^13V*%)oD8p-Cq>a2wv| z+LV#zTs(=4fPaJQZi40eNT<&mmV{I-k_Am*GJoG|;#-y!doKv1C^$7QO+dsjSgGMM zUVEsDGgBIOCB6R;OD&`Apew{e>I70Q$WHxKXJA6Ia1Romf_X&5jEQmCK*xV#QL8q~ zB-0<1JVI9dS8yM9L@Yik58NQmIXUTtZubLe13RtQ8g~>RKr!H^h9h2IfX7L)IJ^c1 zi4V8$UY8I*+=HI&g0)k|&)kSgdvPg^6oG+EMn?~s;W+qMe?a}qARPp*T3OyEp@vBd zJUss|<=75iqrWHb>1g-@txx08a5_%Myhnl2aN79PUPt6QVn=0|3=8R5T1Id6(Ju@pF1B5CYf0<*o66HTL0B_05G)dmQE5y^k|y+*Ft>Ii z0SA`~Hf=D0C!RXNo>*k?wN9e04P2l~69h&9>Oy-9af?zLy9L-y9QS@270KDR$Q17f z^mT2B_iYPZOk+)_y#f87{DOM3`+(p~y6~a956#`ho_NqgAhg@i{h!IB5r>`Z9?tt7 z_8Gz>6pBc*me@X3Xx9!LKey*?yu%Mv@r&+RH?E6r(f4c3A0MB_bxfYyygMUq(@xAx z;%i!S2r)(~e{(f{eQoe?cmohhUZg}igct7ZUQmxwSzU0lT!mh5{*U{&HEiS!^lC2A zmEuZzxO4Mb@~o&0bhJ8kBd;l{nIC-Ld`}SUtsnBQ&(F!&+Zv5pcf0#9Ku&M=4i*$K zy0vz>*CN`S&29a|c~(IS%JmVEc6m6z0B{uWcRvsY?a?IDNT>oUrX-85ceN#q8tglt zk>ELf)ak`HN+t>I1UU0&ZuWpCE@{}7C+?aiP?b_2lVWR>c_yrxm0o8<0waKgWEn)* z)&ZA5*~VQbbWd+;?T38Dkb3FziL>gIv4f=i)z${p(+K`)pPi}8eMEJuYKg8EuG|c! zBX=UJV$|qd-ik)$3X;FQTz(AXrVvwm+?>C^Wdz)IT8B)OJT7as@k%34F((mkqodi1 zMSr{_Vl3!&MfCdH~e~%+VU|J?REGskNaM+3qVpFrb04aP`2==9Vbj+!b!5!2zt&`*0!53iwh1E&vNnLCyufuC5YeOnRbV%y3tts^K1ddiIUy^vm0US5 z+o-;;^*bkpkWBwZ#bG>DznT6_f(oPfoWy{DU7;vqcEDUSd}Gd^az$vsHqGVWlw1_5 zp--}YcSG1nOBK*BTshv{9ncihae%>4axI5Q8Yrh8RfF%GwEkSxyO(UgBUNeeHDNZh_(PQxh(|@Q4tnkL{143`2d3@LO$v zv{-PYpBeC6S#wmXsaZ&Od5@#S{C*)!O$z<6%$G5dbVX=uBP8-R)*lIar1SkMxM|5) zVS_fWDpgL~lH9q=YiH1Vwd5EUas*an@-j|82KmA#FtjCV5y@T0)*8AvE1A_osM(p5 z_c53ddE5ha5?~*9pu%e31kv-()*;fQYZP-3t`bZ0umTOW)U#A8n(m)2pg}flY=YY; zstR#`2!ZNBxO5>Nh6-WcC^Sp2(FL_I#TjzBeygvY7J>ea`9a8qKCKYD%eJzhVebnV z-_0An%d6b)-DXN(R&)|7wQ3zXdfNobC_~j=`Zwi~KtO9sONg+;aQ2 zg2iuoB$DFy`43YiA=1ONTwVS0g|21vj!3_C{P=}&6>lG&>*SG6O4TRG4qHUp`MuVd z#BYimAGjLAQRARGJ7EPI8{B*JyVc`4j>8b7x3?Zb8?K&mM2#TT{uFCvz{(N~UdpAv z^JEtQ&ipU6xvm1DvqiDD5A1m$7H7=(GH6ZdZ|}W@<0^dt#E853c7O_l8CNPNc}EQ` zF0NUz&LwI}1w8zWB$`bNW&FqQph&WT>;3@ifKH8j@WC^5tzf01Yw9v6s_&R6nZ}9= z<7u67Vh&R&Ay^8UN{rLAajM}G3G25EwMexP4j8p18}Me*08$Vzjn^Hb`clx~uYyqV zm288C0b-1%9ajMhj>9Vv=FCv+)9t9PQbk>VJAPUe-?9qCyPIFOtv<>P_L!BNUgf~+UKjhQ=$o0Uzw{yeR^4V|L1lWjt|~bp8I9*J`&m&84BO}@b?5xv zox!((y+gz$e7f^pI&;%1n|ca`1*?twXAcXx+@Tbc^^_o{#MpXrT3wMh+U#8hReUlf zH4pm#Yl(&e2)-)!p8}BGf8u|-O8WX1wieF%`u_)4Nkt8&jO|VtMpE{lysW~JH0{I; zAUsxqmi{-rbabrx*cAO3BaM=zEX}`}-}F+^VYj3ckH7|#ab#`2UC51>tI@8r7~(ma72*Qtk~3g7IHwm!*NLfxel6vx%F5$AA4=+W+_Egi9T5 zw+-(Y{Ff|Hbhs2kGuN<_Q=t@di3r=msKlyKXlpPzG!7UN5bBJ?gSRW~)E>|x z49b60A9XnqqF`G~OG|(yeSrE!#OjBk!jM?P@k4xDw@34f{%g_7&C5)raq{mu46s9ec z98i=Zy)Qsbj*L1|K7wWOV~p?EWe$k`?W%aoA|p^{%?d&kOGAmD43pZ^pDQsfdjymdCsM^g)i#z*}MOIAmEv-4z+qYQ0QH8GVwOBl@uv)BTkmHj)%hy4u) z0OnF0e?g3={tTdzp#u4Hs6_z6M))Y2lA-;@u_gniS>sqrHLwElOsZJ0^8Qrbly>-v zhF!r*vAj%!{Rr;EpI!A9NPSNE7=;$%$%#=BV^UtAQ!NDTw8dLpm~L&1_1hGq=R%>< z+YH(`P)k%s5iG(ga5BWQihrbHNHb0S8*lN9fWiiFkW4b#G0RCf$slrCiD=hLbP|#k1`I_ksBovYg2`w=@iK` zltuOtgMXlrpJkOem_>Y%zzPnllNw~Hr(zgOeCujkGALp<8&gw*icqP zWw{LVwuKkKk$-$LKtlkUrugd)ROl;?ET|0`wLoOh!5|y*X#@OmBg)8hCUAnaWx-1* z7!Xcap7e~dbkhuY5z-<#!xPbdEx(lrwC*1-bjp`I+yzw+ohp?0#W5xoZN_=or)3r! z``+|^^s@+~QiQStkp*IWzvm6JuQ#i6EXNfB_q5R}(!`Iy5#BYJJPRLARLWpDtcjXc z8ATc=xt#diA%v+QRB`B%cBL6#pscj#$1mA<%0kUw=)aZ*yR(>3_Uwg8@jS#IDm0Sk z!q^e?pgq09H zY>wsXcVP9qPvaMf5BM<-d7MBSDS2o%p;7w$y>QZH1VPU{-Dtbo(Qr4}o#KRcSXgz<#1!0f1Pf?Z5L#*{GEM;4R4n)(3iU#F zL|UPBV4nY9y2lAYpd>1ZVcO}(U}9$f0d_ehxDNIw1RZn>Wb^*Y3U=w-)9{?zL1wo(nYgUZ3#>GZCkkpF zV_Z+)*|MOBMJxx6(m@EWABBAs>rNaPPsdvmxrzWu$=CqL%o= ze(;!p4&0;r+elqlYYHp4@r?yo)WEbR={!H=uCF*&+SS@bYuL2?IuU#7e^EMj3c2!8 zSJRXt%+-4P2A1POQ^Pds zvR&)J`teSY92T)IfMHX+^Q0UA00B@p02-ABwX2-oKFjU^yX1^w z;(&lK2|u!lCSzyhPL%R{bkU$zmGC|F{k5HeFKm(OOE+)jynEgdBf>wrOG_$?ud<0~ zKx809QSmK4Z}cVY-qYx{LZdUBwL#j$-~zty2e}3yBq}bmIwnnhwlDxKJ3U7+uwH> z8b#>WH5phh2K=2@hRx9Ybe*HSfpGdh`ZFN0+gju@{2ayA`ZK;XE?DsU?o}tfY{XB3 zs#qG#M;gapI%ai57G>sZ7g*pr__~_w(-VG0#eOJRy#LkxqdpDG4YF;;k}X^j_!O{ z;ZpEH*&6pa=IwOgeME59pqWc)*!{W0qZ%^fn5f*}P!$ z+3sboxVF(&Yp?W-cJUnh%OWnc@TjhM@w`VasE>p`Lz5tvsYf6qziNRqwp1drXfrhm zdIO!X0}FblAtd-Bj0e5-kcxtah+CCaamZ-Uuw@U(1qGohA!l&6UCdW3pS{9<~y$!`ER`s%mhV?v!`Riq*?v&;=RwmOy4N}H?zNv7^ z25FopgDxm|y3-tsq*9F~%Z-%}T3P z%Ue=umcHEi^LwoCd!U_79Wj7@Y;krvQqas6N}jzdQ3{0vJ z=Npp6Hf#O8bk4ekDMAm1b*aeMU)O1=oi_$xAq$1ICeiD%kMam8go!qbS!rN#V9wlU`7!TRhjn+51nRPr z{iXSNaKhi^1fNoVk1Tju9|CRQ5B@oFUM~hW2Ui2zcgAkbgH*R0ALdf&8 zTmRIE9d0|n{f-qH)`N<_hX?wb=C9bv7@+W?wcf+p*N)-B>Oc8OQikIp-5Z%atV{+jd0AgK%#S4DbQs#tt*O0B3)x!!ESrS={5) zdp6}_A<>b-rQ#USVS^mc>CX4(k#yxKyk!QzW2ntV;RIc8ob3TvBMCQronv@An``@v zXN3jNc?R6tLU&ex>{22I?xC228?NQy;rmx+6J93 zwRB1X>G|*n&*YKvpRH@aMo47f@s!JvX zvR$ILm)EQ{GFiAqh-IgGN%8rH3ii$mpI~6-l|iGgRZ5B6mTD3LZc~gW8oTZ^IN=aE z73XTY^%Ts6K22g+t4uLzc0YFcb`^}E`6FmT$@EqZMRPywb@r)dw~EdJVl;>SE{dAP zRk?>5f}=pyHHPhw|n-pBmZJZ-VqQwO4Xs$#`)PNEWzehvF{y~LPIhTnkOuGZog!a~64S~weaH)({rhP9xI=hc2Y`Y9$*%z2 z{=$FDRL;{a4>2Q3r_`RvHU}0+#F~%?pRo0;2kcueyDR61pvQ+-u`Zxe) zOf9r0;ku%t?uxu8&%mWWz=GJ>qbJ8pSTNwU!D$m%evYgU*Byf#P))yFKbZ%Z7?$gr zZ`Z772+k)wc+bz1K+PF$bN)!StWe!5*E#{k7@#$e509|oclb8gSIT;&bfV&{L@#ax zjDUE9DKn&H2I1PS^W@t33-$BjKr+g7Nf!U@iYNL%=Va1 z63R0igq*>nAvrlVCru@Kq~~=q2zmQrJ{zu%VJhJfTpoEOZ3l(|R{Y>?AsxlUPMukB z@0y@-%;Tg1f-nx3K1&%Y@Q)be!k0LMlk;Ss#C)L99@^6sher2#6MAcNzU5HBaeZ&= zaNQA|kd{NAJAd7Jpn!AE~tvgK=Y|yy~m)6K`6Z}f;*rxB+948R_A63%s!wvl{S$_gLv$ndtB||(W5qFV_ zOJ1f!3L)b=9%F&W!L8tArsLQ+i~`i@nBb_Np6k+6BqcHGHb0+TRli%Nn7?I9 zfTDz{_dhvTmPO8+*g6&7ouadgg-JqkWUZ)8ByECU-;fu}h+4+4eMTN;u2ZE(Z{BgT zIMcop#9~pU4@}InJa{at5vx4g^;eaInDshK(Y-kI<#w=bJ24}&@mn9ACX#PD_D1i9 ze(sIGm*~}8N}nQfj)&>Y70QZldXQZZpgRBVPVg?|^W4fFKg4NJ#h#h&${Ws!Y#C(h z8(F#aA?Ck_&_yI|^&zzuEj`!SsM~C@{o>@sv)C$1tZgf!0i=rM%nD^*HuO$%zrl&- z*0-z4Vx{L&^yQ{u+0Va2_pob5W=KD_N8I72HI;bzMpbAOiq_GMb>f_l4>}V@z^`Ce zu2PBmurO~(tbkmlY5!jOwV(D=5MqtjOm{|-icXWDbxt`I=@E_nEEf`QQs+93hK0kr zsfy2dS1W<6DZXwP9@q(Xk{EhP4A3qnKY%t+n$jZ*cr9bWLgK^90d1g-Gfdaa9%cc9gNu*Iiv|!3uLTp3C7(^7 z5eRal*2x_=93>mg0+(OeZ|Z&;-q^1!T_dNnS|}`A)M$))DT(NLs^Hi5Q6S!btC491 z=yH@q0N&Z_?=*mYbG^4gWRe;gm@IPI3(XsT*tsFh)gorA-4> zyh?f-tM>Z&N9B-pCA@EG$Z8|NjX!?@g>@oQ|UT? zZPwbt`5cTYTUCbio*ze^`nL7<{!9hCw)reiR7g$6y4%r{qWtLIb{%m&;tyV=R36Ya zLvef4mG53N61mUuvVSdQPvQGvztC6TpEnpJ;*}4hpBOIaVFkS-ML$~pJz$cv z%sc59whzM@U^7gMvz+(q?d6t-d(_C*2Ph1Vh5OX}wvTE1u<03D?Gwz{D*z|@MAYn= zt24WnTelR`=n7}H2=b+BX=b8)*K{%41 z(r!jO+k>(DPyO5iybKs{TbLB8Kj`S2C1&Q4DHc&hDMgpAWtf7f)Cnd5Ab-I`9lF1- zGjArmJun5OdOY#xY~n??MF0|qeVa%M;uM46FlTS}aCXBXb3Vw@ zGFMFGA(aHAuFR!0ir)D>F6S6WQL67PegS`71}MhJl|2seY7hS91Ksc>cB&19x4k{u zWzQ4P;8ft@+$FQnI?0Q~w8C@W?IEE4I$XaKpYcQL+S-JLTnb*K2!@FIP-`^xhs%gq zUU2-qo`%ZC+<+%pIfiJ~pZy1@HH2hZrbgO8;Ghroyfi$yh8y@O;%2I$z#|QY(44DJ z{Kqdo`p>XOo8d2gU#GgA3iyy%-2T3s-pokJ5QU zJ@rOcF>p_r4|HNw%JO<2DjaPmj#nG(e4*8qDj;jWx8&Ql=}+}`<+<2C^C(#(xOc^{b%{KDfFsI27JY)V-G4zOhgjw`W;3pv#!oocY%}3z#kQNzJTL zzEyVOIWF)U_gyn!1m?BUH{TmCNeyPr+S3OH6(uI@*?PDjHaLuGl#q6G7kvFi)W-P0 z0AItp1rTD*WW_jPUxr{F_OH>>$VxBZ&Akiim*0HB4ucFtQ3dQIHEaR!aB7|ibWcs=kwVm=gGmjz6Nw)>;Kd!; zdvQB%pHLN@y$@xf>PPnjS@EO??=0ZBmaY!PL!yzYpjk*|7a=uJCdMWvE#ZBS6UX%h77+)XboUv;dt!&pg zfc2Z_nVFW#sb!U@SfHl^tG5wNqThpv2btS}hDQZprVXbw(X^z`a_7uQgbuM4Zc!F| zHR^P{yb`KVaZD&hYcGf?v7(yKR&tn|@2~k8$NMqbpKLw^ ztvu1kd*0Bq*bl!ua(X(Qir~s=XLCXe?zXqM^_9v%=-S}lywueVwf$(!ug08#)ST{3 zD!5UtW7642**Zj3UVrofypTiPj?To;zL7W3i7o}l{l+-&o|K(p{MZ^;76HHn?1lSd zLuDS`-{+pdm?v%bMFOkEsbacS^Z# zBzLln`e-esu%nH01|anAVA4Yh^RnSO?5=9r89W0!j5@|YNSTtB7bFp{g3#dMX0uVR z5sy=>b6)$RmDg)H&Wnt{V8*;71<_nWlc9g|LRg{Kb@Dyg3AD`Z9mB$?krJ}`Lr4%G zW=aWa@-OW!LU4lwC}0Az^`4Jw}_HG z{lbp(#m7iD8r(0CBQ6-*ufG{K{DS7i8LBz(q!kgQClZGi9YVmuSE)_}coDJ^2z*fT zVK++LohI3qwqx6ksqnKy4NXke@3!U3as@zvG}Hw9#%k7w2#bbW|3+A!gI?nvoI(Sa zV+I7+#q#w|EpP+ZuXQ?O($Qb|XyPG|REf~mnM5m{fXa`Q+$W&9!Lej_$q#ELK9CG~ zCd|q;0fi|}gffq9{IpZ#R};N!~*ojph2R_FCJhjav@|6!lO*FDe8vxgl=e zRng^GL*PI_hes4+N?977ygkeyuF}hq3H%1wDr`D*Hp)Y8kKZ8@B@R$qju z)EHum_i3{NDC~uLPYmVRUdH+d;vM2m$AX;XJ5}d0^HEh}UGOKATz@P7?N+ng+2u)OOJp1Bh3fs z2%Ua2XV2_ZVIR70k!&txHybG@vhy5(9{B7r#*f)@%vXnL)6dFn7THWq^;-k@#8Z(^ z)$-5!%)0$6L%+5#SJ*uZ;^^B@St}i(ByMs!Yh>9PO47*Q7mC-+7hQ_fF>5U#xQ|*U z43_mb%e$XC;UTphXnO}wIDnhgQ^bfNFu?*i>y&koPpfQH=5`;uUtx=EwSsPmQH2_8!0jfRcQ=?#+;N$cG z(hOZ^nor}r2i3|rG8t`>oadFm_wueO)EuvSrEWO8YgXn(XIPhWLn@EV!0X0(FiG1S z3&kya&ADDsIifrz>lM5xjIjQfk?F#=CD3ow?|HajO0bn{hca8OcvYHe-k>vH| z*L8>%Z@=yj$B%u^o zGy_-9J~L3&{*ov}6mM6N&{QENQ@b8{^9#D`ZHl>H0@>W%50FjWZ$Vp`(+w?Wz6u3K zq`O;Pi+4D>2T(=^Kbs0Q4fsm|Uj(L?K#a$zXpBS?I1{MXG6#Jmz4?TLQ04qn zV6GkH5n3~9pu>)|L-4R8F91h{bnGxA@Q3u;Gi!DQBSgL+-P{raCuenYonkFD19cd1 zxxth`veFCcUp+>}ae#C>2v6bAZO`36D~?vO+dol!?2m z{~=S_A+8W*LeJOUYY9|qfKXf-vB8LgW`%77UWT>Qx%u6AG#CdalVLX@Vbvl|HN&=L zskPsZ?u=rT~!1Ww<&?mKL*+{--p9l~seFLz6bBluQB%9)_q6j8Da&zlf z#zElqrPr7S9JdB+$!Lw9&zI%6QfXK)mBT!~QLZc`9TPJqW`~u-!Z6iiYTP-`pyT$3 zG4#j=5;OKkW9aQ;VZOCLXNu^DPZuAi9=eaoyThyP(P?m8+swO#%6~BtJE%6Dh0B_y z;c5;BgRn9!K6*h0!>jsjHN39R39Tp*9WU)UgvqN|I+Q0^#z#$g zN$OYK7RUjjte(#vO)7+nb*-r|OW*?b|9P5!AQ7Ho(8ZT;LVFb+02S2!Y8 ztD?ogu^0aQzQ@N8lc%CVXl&j|!@cbgoPlS?6W=q)CVz7SDQqV;PXz~;YE%Ph!rPsZ zs`DIhPBTCI@(^L9FB=E6zs z*e2gp)k0+!5j}QeAr1m@>u#)(%0siEo+U6f%(VSQaWk|pp;wtp2kXy-Td)aWi%w)w z_d&;z>rwr|j&Iu;)ZNgN?h2&%#2$K`#owDpD0T|)>fo{DW_$NM31bC(Y6Kabe0U9XZT?dm>z?Q- ztBI$ySi9tD(^c<%#^0P@|G)zuKH%S!ut*73z>*z)i|HKE86vDQWzbY6tCXEd^{CIe zgWhX*8wFY);>Z`N+RygR55C{~L3sWR4Ma!6zhRM!s@psrE*TYp4-Mz&;ONB6#t#zR zT>k07^EkFNSxsRVCcp3eV^;lR7KHn>M;pMW9ZKb%0S4YDL|cKskw9ssIXgpA%Gf51ChrU$ zo%8xFob5mpo`4LJc}o0)&7~Ra5Ml?RO4h;ZhpWCP@^tjDZdr+)UB7sCFU_-skzSm|5}#i4mZdB$aQZ^e zD37yu{!wT^n;_l_it(;j@|A|5*2K(cx4cenE6y>7<|(qqpiV(XYW-(iAJ&nEj0k2V z|I4)ViMjn~au7ro0A?z0^#!Cga}D8cEuCrDG$oW zCu%U!iRsZYU9BmfTteYNQGLzaBEop5VnmydrD*ei1$wD)u{7z-0)D9R@!5=CND-Y$-EI+T zMG;3qnnnFXNbb7$-@bRZ z7qirU!1D)|HF(bwc#D)n(I(YufBbkpa!_L-%f58{y3s$|Y`zRL6Yejc%@b%Hu|i%5 z&(XKl-NoG8gG0#}85-9BQ7kG14-2ptLZL*QhP+Pf5(}FaG-a4JBZ`5Kflp?edH9#W z8TnxWP$I-85{imoqnH-oB#M&RPLWUcErMLmtaF766xA$>Gqb8@eJQdER`_T>siqWw z8WcaPDk7iPtJR{}`Ruc+dUd_N7??fz?81m2lk(~_Kp*~Wv7Syp`~36A@YDotKz1-d zC8y`N{bY3FZP@BCY@h<$P%c8R2&F0e(6`8eOl4cdDQ+xdCW|Vn&OFK>v85^c5Gtj) z%f*Z=DL~usOzip~B`3$jvxDQOu16O`<+H8~1ps1>Gk{Q#f5uCRax`HA4mR%~Vkw&Gi6ASOtdwu(HOM}wHgH|xqP1Ca{(yFZ@^T1L z0~~DP!VIeo?ID|^yY9PYd#@h9HvjYc<3ncKp?|vEz5*=ddgS{iq+>{un=lPdV z9z^)F8`{{^pLQP#6_PCb%tjuzeiF&nz2h*0!`yZ^d*M>LGYc}I8iLG zy)8-^o`4X5tg}y%a2}|bCZ6I9+(|Kw!4Pt=XYjdct{|N?K%AjSn}s(Eqy6B~>%!qdsuMlc5 z0`$w=y@Z(3Cp}C`-LBxv0?Mg$p-iV7(K)k1@4x73vsZcxewi1EO&r?7&w96Zvqpw{joIi#)6jB1r7UeDmX0;l$Y+Xd z=Hap9GqLBNLH|rJ`xx<$^mmOTOjGdTs1xQSJ9PTt)svmq!J+sXU4I%z%#6xI>qWN*`fQ!SXT!$JlHzT44wvAr4r2_Z zJJO+3k00gK(pLUT=hil!Jh`U_`v-^L9k}=om3TD{?hs@sbf>@+Kizsz4J4gz&kr9P7wbPlqnAOyu&l8IpyBNMc#pArDGLsta!t zEumWeT=NM{G6$j0;V+V|B(Pa}@o1{6z@-(3aoW%`r&}x4UDbYS zFtNHLT1at?l|SUG$RclF<6;@Q0l+8iagpjFYCK&vSuH$sT`ARfbJwcCi*-Gn=#%$p zaP;IRDLSH%&5`a~o2e_)1E2+e$jOm-K3X^mAAvQVxjOUkkM~`0P3(8@Cy~BDJ=q!j zIIjcWmRC{cD0jMms`m7~l0m$V7hLzHW~yxmz*}ZkTMHe4mxSCF%Y19A2m6FPdOm5B zkbv~2oa}6JOSFp1qCmDpj-vD>pyV3%y_Kd7R0ru(b;#foCJ!S6UTgZW)cY9}@iz5+ zGJEwCrunv1UtSNPkCl@5;qwq|%&gzygF)fKCZFR|xJu(7{3gdy+)q{87ZO zrV3DrZR@GY(CyhmkQv@fk{Nn{jGKgz9AM*6wsp7o`Iq1;ERG2Pi3sRuDJ}u2Plt>7 zVm&Rhjtp6$1wL=8yQH=!SPPW^DIg>pYjO^x1pRS);sq}!TZtAliDauH8)x=cOLj9f(O@Juj^OcpEg#t!0c{e1p36Z!9;UDl_-+O?4843%~ zB%=nPA88{aa!14%E?uTMwMQO9;4iH?&F0)Oj$=RcD|QejxASEKD6(cno3SBy%X%LB z-H*-P27NVUY@N*4hzM_FPE4%gfwR*}hX7mNF1&(yy*A6I)T^tkR@rzzYk{%^eBUSgY| z`4px47<>DH`>O5kA(s$cwD6v#{Iz}g;_2aOt9T(FSNf~NgXd2-X1w;dGS&*W^^%uH zvA&0R!qRkzmEyWYRD-5D2kwr?Sn$Q_<|9tw}a39*P$Ns)F z-t`;{D9%IHGLQ5=Cz+M-ydoKCJ+ZpnvxD=(Y5Ac7xr{~Vs7El zmk0LR!s8J7WWIX*I*j2Z#W$n4yF~2piS^09_H~JQCmA3)EPAY*he!C6Ds=0YVX@CPY4R3O0AlI!(j{_*N z4}!K=uUWMNL=IN-Gn9G!YRhVosP|Cp-t}OgJKo@@b!~>s;JlLYY(TH3;pw4j4#dSjV5bLPJwMn#KRh|!f**#$$PRzA z_uby``QGuf;r{czGZf`E8w$PSww)pQeg{k6HSnBZKDPLxctT!u0wPS?OUheNl zXsx{EAN4Q>$D-59=LLZbQNIuc=Abb!LAd0-{N}qOKllv>(Cht4G`k)Cg@3 zWhhJ8;m7XVL~URe2`#&t1_=!!= zd4#=es^!;4wBB2<=KFKVAd&JMyABjHGs=Tu7*;pVN>usE3QCiSG_BtIZh1* z@5Fa?bt`dsc4K}cRjm?gy{e~! zQ~WN`kos0~r?^yjGRV4-=S^I{7+Z@*?*^_kVAR@|chXp24g6EIEx#@6DL4s9+c@mv zoFIqG$prpi!}6C?Qae`7XD?{64PBVbUAL%?1fV#av79Ez00Wpn_&565)=+=zszDop z!W`=X?-fapPgtaYYBp8MuAVBFlQYI+q!crF6vp->k;aoC$?W_E)J~=NrlD0tW~r5` z8R&!52YP!qUt`Vpm=2&J(6TO zE+s@Pnww>QIj5+aR#9KEqQ zYSet2qLNEk4IAfCF+^l|`1C=hEd8)i*~m2eRhm8z%%OaJKS`W|?shW-9~w-mF4Dzg z4=RWo6~?R6E@5=e9V!okN64Wc1X*K#H{}5^Fm!@E8E+sm-bdClygdH)7yukLsTpKg zX-$H6&VDmxgW9gpG|Wl`)5rd|d(RG%$4e7Uo?g35OY_q_a7&D~i~xoGwAr*p#%Jw; z))##BzmGvRetN$1dXtp4S<>E8_r}Z8Q<28Rr6gY&`f3EIEc07|-1Jbd0;oqXhqRYT z%{?lpH0CEqoZnsQoWLO3z2qQdb1(P`_B5uOTzmE8G@GN1Q^cRtE&K$~pJ++aZ^Hd- zhSC9(Z136wZJKCNFZDVgwO20vcv9UMx2kRl#lh*p@%{notIXYYtqcNh=UzY^&*l_N zhAHYScWbTD-T6#_m_@|x*-UmpQ)rE{06l}y7-so<%uijY{TA~Uy}=n$zz z^_9JDh7kwRZ%ePZKc=&E{@5n8%rITE?S1+2$eZssMat0fwH{Rcpu#v7j zXa$UX;PP49deP{&W#{1V2LsE2)i5@w6bi7yU$DR(@qz3T=QPp~QLuHuf_V{&xd=pd){5P*a$T9ixvxCs3@=+wIU{T@B~CWKPRBji)wXSAzl+RPTw7A?zU7>rJoj2)1poTep+J)mKFy<6{0X!ZrP$M*~Sq) zz)W%p4$U=W6QZR>{mXX!a@#9~LomP=Ur>X=P;!INO2{D{vilB3Ml$yXbH;6LI}Xr8 z*?#?A>4>1i8`^wWGGdmo&r-|-Apq^NOiF@@7KF20Z@Cg8gEi|5t~huB`5?NSW3>5nBij*Df5B+O+D@H+0+GuBQbg9D(qg!t^hrp9iB zQcFn#ba^DX)sd)bptR^J4gEo6Dnr2hp^^`+`4O^wQ-|&}dp9CbAdjs6aWX$QU3GgS zyzZV86t!YYs^(NJ>qrH-ZOd{}J(^!$TH1i7zF7@?5$X0JW5|5P`MV*F!MXvyrbX(F zn7mK~W}(+nQSx*2{UsK-%5W0j!5nmY?;p;;Hf+s{)04mdkjV9oFZ)p54ZhRd17}qx zbEf>QG8A_?1oXqyIAUaI9)8e6R3J4QE~+*rQC8o;_e@5yH}f*Z3r%vls7Im<@p|^M zR1|N4?_WdW7P5vgM=z%<#_G&2NH`#`)6qWHBZ`?AqZ(IwOsNek7%I9rV&Gf$^CetO zI3rHgFJ--UPQXR@wrK2JXyd*Fo|Oe6?E;eOP@yC_bz0=|#58zn*V9 zI`|3J-H{>9BYKoV{}Q`(DFWht3^TCjZwK(|*)PMTXt`v8e11IFiD+)CrMd#mTp;6N zW^@~*<>X~x&yz3r&JK$I{5;_KZwV&f%ws*PnsK?PhM@UHXw%HrD0KZ)qS&^z%8`o% z$TYkf(L1t<;PaW_(e2vld4bpa*Ogf*xIh!IhtgdvIG?e%X}MTnsaH+99d)~upwa0U zohO6G@K1D%19Ou;pZDm7YuuwFw{+We5myr)j0Etuj5v4Z zQhxRM!>d*!%5S2w{xE#cR_p86v1=SnMCOxPdigTeLPTZ#i(1?2ye3*r-mLD1Cc{IbnH8{Zs9)nei@~x7wu|Vn@>!?MYTe|) zRy=!VWh4D@V0p_9(w?u(FxB}Cm15iQHUrJ8kVN_dZp4fEeA?~xw`wA%dWz3To~2DM zs}4vUEY`VXW7%dcWcBg+Dad$-$LGV7FTXk1KOY`G&2EXkR(4Yomo+7jhDj1QG|49e z%r4iSMvjH)hO0Q`{Q?HFtii1#+HrV&FdvN4*8TYQ2A%2{s>l4=3lnIX|DmVq}E;cw|skv`MYx2xSHeS>k6c3MIygVQ7 zpB#U8aC)w?Pe#5;nVvBW_geSO24HmDqu=%Q$2qH zF-|AG0}sEmJ0qXp3^)w&yuPW9>glv@;xu!a+%fb!Jr zOg6PqU6oHX7>b4WQTr&tjbjz8@+DN;h`Q#GXic~|WC%ek-|y{cG9`Hc0F+tHP4{2>|3oA4CEqsIRP%fp z)BKkEy4$wp!4hO?A!wlBaRR1FJte!H@w!>fZ{XqT1UUk6`M@k<>LLO!GGrJ8QX6Ur zIl9PizcCD#D^SgvwPEMnSrn})$28oO6O5aHn~xQ&r5*Iz(|Wve8Ld_51AM^nHX|$^ zjJR+o;2Gjupc_wBj<3a{do!P`F|io3f=GH~=)_&3`y+%dlnNUI6~GsjDPGT)y_m$q z!tE3%3l8w0=ouoM!nY2Jr!uRVO|b6m_j*?U1c*eGJjPr$K)G1eGc$c?fV?!tfsg!# zJkLe=bkV3iqj{r)KRyjut90$vlU{=E??qmOhS`R0$j)AROh0l$HaG8fFaqKOx7lM3 z_@4dK1H-Bf#cQs$R^%6lEY5G~{D!Qk^+Jq%-`d>W;&84ce;L>aH?W)N0OZwIKD27E zJ)a^VL=lq%EQNdDFFu_Z4sQmQ%_c__@RDGCJo^w(HgZjj(9Y&s8KvzqlqCr*?K)#p zfVJd;Z1`>QCW>A2^$Q@Eq%SuPIQ9V32^HL5{p#A%?DaHy^`!sruSe|VYL3@rJw~Lb z5mp3uIrDY$s_EJ7;iCP`1${A0GWzY<0$+^0zGW@4d=P}Es$BzLvLG@w@>&PFjw5d< zBOuX@m1(Mk6*BbMHmLNX;WghoJq=P-rY|CwqicTe8}5z^hgC{RG&;yI;ayRi?LV** zR=OLvZtKL;{Oj(o8Elb53fO{x=wz{y?-zF1dAOxA|DKr|4JFuIsaxkM2W^scy?#>C z=YYIp%-;G=*l(WiIBPf!-U zCi8e@IOcFO!{of%iof8)9Lzb=0s1M^vCU*;z0RQ*eTnmO8Te956NpwB^GGHKpU1vO zymjJeX87-WAJzYl&SFlgO-C?-X&Y1WqB4P3Jt6$0nV zq(iHl@tk~ri?gg#!iSbSH5gwo`BR>FN}v7bIPS75A9m0USDn?7w(d=9)#$35 zZbS%`J`JUObeseKHAwmDsbOt$v!aVJ`*_bi5rn*SM-B%gq2)+TcL$mhf|5E16r1HQ zp4nVHIN*D@q7>-nJWkh5{Wi?Dw9V=b!hvqA;f)$g!#XFmZwbQImij5Q$}s^6bl;iA=_F_m%Zw*k*qPW}Trr?Z61&i|4MB*D*5lP9* z&Rq}a!+$j%a0N~2NLkse_>$MNxAQkL5OPjHYju57V?Rs!W;LG|HvpMHGj;E=#{9?x zUBE}HWvB_#%OoZa&#qqJ?&Y$odPNY`8+4@hETu&Y(T?B*AW$&v|J%|C!;5kS>P~hz zH|u2RJZirP+g(tCgLoy=;Zyns9wS6OULeNJ#eBW8kVoK2FpEzKuZrn@#|wA1S}Lfh z;KeQ}=lJ+qcjYaM&~8Cwo+Lb`X`VX;*c~77Z`S+!vYrCA9i=M8;Fi3`s&iGX{_&4b zznafG=5X98b;Hq}`C}h`QvZHg5@t+9!!Hnq-ITy!aTy>S!<%w7zBU6ZuSz%`Wcit` zYRG0@U(HI>U*#Ks&gWE#JTq33vIm1|%MVp^_e<7=yw3HXVoPp;38;Tc()KCX8=oj%=r`x}s= z`C`@VzIyFHpp&{9&S!?nuWf{kgenai^E$3mF)O-+5wDnHO-e((XZ8Q7hVngAF7ork zp}>#96V7%_(ATps;P7fWUoULH4*9KtleYY6d^<^u%G|19V>V=(VnCnc8V2EuV*Jwq zV;D3iKx~YV=1#I=GUYA)TLf=5P+xyB;*b`Oo^Y?Kro(4dRqR}v5$=rq%}%xMmoOaw zRR)&>*Y=EJc!8OwcK{!M=pY?M0tbkqN6?08MogW=oCn+|^4g%H!9{8o)tE>GqL_jB zy|}J!z<4R;k2nEckV(~ym-Pa{?n?9iR>%S@kusUz2F3TVE?|SflL#?5 zbmCWaU`wal)JW#xSj-G)`bS?7)8?ORLE`5OkE-KbAgX$OgS;yEgIONPe_y4Q3BD{? zggoA=w;0ANF2TVOj_`=!OaoUUQdT)5xiDCkrs&dQ_j#LIU+(C!?-e)o)irX+97VaP zmeV^DfhPf{%Wle0c1>uG7>_+S&-D#4+VzaWu7MbV8W#2+^O=QP5zeBI4$bsxO-s~- z;-j9Vd_y9kd<6SUi^YrZ-!OB-*C$5@pIy|m&wj)hOW&{06+tGH!nrw97M@7$l}J2H zOv^KDOu+8K{i&VXq3|F+cgtr0>+^=Q+YvoBD0)o65uLJOF!K+;(J@LkC*kUbH@Gzq zA>BY8QxJa+7btuT_gBm5qy5j$4xb$!pF?QuCoAI5m+QsKwR6a_h<}_7>2CS~-3}lE zaDmObm_sx*{C=MWO_1v@DtEAu+iB*(Z5H4nSfKg(>N?sU#YF}?BF>=DJ%s9lY)5Q6 zy_w{i>hlFNj2Q$$ka-CTi9}6>=!LWdA!p=vvLi09oKyfg9oSP5pz!Hu?<7f!0&!rG6&)%KYEv6NENMmdglS_9rixNZ&cHB1h&9UEnQ-06`Wl*hr;)jgXS4CKUGS zbIszff`8`*BCO=Dhppqang|e!L&;Idrv8&gsMe1PF;=7QwLn3@zq#0_2M-=REcW-F zKmT%X|J&m1#limJSBLuoArv_AhA2Xy;R=kj$d@$>!_U-=dTo9mR|G4Y|w-U z@yDqUI19)DCjgtWymIui{V~u$^5H9yaqE5|V81b*L#w^aU$X(KRWMx=9my6$*~bZN zzZWWl+!OwnFB$wNiXf+e?@dw4aSS*&y+vIv=~r)>DcJH>)w^xFSEa+@XVBm@5!@|q ztMbjVx*U9O{ynR-VeDb?d~Qyzq`HKBtIZsQCG*VJh~fztvOob5fL2jNeC@qR6u{F3 zSNHh@@vf{hul40rjvci)(`hyJ5cLT`^v*Jf<8lp>A)K^HRZc04Vc^eo3NzR( zPGKgYcSGF_#o%g+_z8t|0pOg6UMWk`DWE6ez2O4bg3F<8l1i<`#!tfIu`Mwv0Ad9w zsetIao^^WzO*_iVA$nx*vdhl^0ujxzLf;mB9Q*E6Lt1I`zu?%>(V|vdbM#M_yXniH z&rbHg9iE+^9_$_IWd*ld;JvudZrrjfo%Ry)3J;8cJn3eq5dzG9{%EycFuzbQ-u`F5 z_;>#=K6DH$k=!u9TeF7n6BtC2AyeSeUIEt1(A{-tO)qMyG?Nr8`ot9t9iKql3i!;M z2C`mt_Sk)lT*j6*t;EVE25@4NeS11_UQu&O#gpjg$G`0S{N$IOpNlN9+Fm2<06 zpsF!HbiLduXknRN&zn`o9Wpb2d7bSN0QJohdp8HX}nN&NrAzMvJapXz$%A;+>s^Y{gfxVQ{5W(zhyK4`LN0h z9y@Sue86ejotLL&H49!F+%>bbNk1&_F`$j@{zsAkrd>!oXY`7?Ghue$5ZcZlDk?vz2SJQuydcAPaRFt+ALtxU7Cw(r8l(OfY!U6~w9)8Gj7mvVy*yz;(2_oEy zRCdmC0(zHz0nqI96Fy?qga|IU0MS2bXIwin$RdFMPZrzAY~z(2g5d=W3lE}3#4EaW zHt?a8j;XCIF#Va#I9;38b13(uUkuYs2#q)3KfrZ|cxQ{XV~pL3-IQ-CFc)<@%ZpCB zs(J%L>;d~te$3H$aU|)UHW5xcnaeG2uQ!lDd;~7B>weGYK=s!k!a+#)6*#qSAaAL$ z_Eo35+l(`xl~q1Jt!uLtHdaSZDiCk-waIO{rRo;)z*e-%djOIBX2N}2qV1Ldo%GHG z=Qq6`@zx+YZ`l~y3@o*qxLuJjvGrgT7tT-Me6f%-l6&zyv$4t>1&#c0}iEu`Qm=U zK(S|5eHwU_hL#HPcgP72pj3k_It0Gv)JcNhp{)|_I~elG66g2S%vbHyVp+*al-0=I zx~+CM6anB9P&uoKOVem0>ock_q;SJUfPq^X!38ejuYsqaYo&Tujo0McvNuJ*54;h# zqO4oZ*~O#^Jc3*k%L>rJN+YwvOv(4;w#pQyh6T4|jM%<#6Vaa!cW(`sQ$i-7#aNDj zPY{MDpPVuiZRTx-+sXwGVVk1Qui{A1rt(R(WQ0uwpY2IQl4-v$eSnX8hoUefZvqHH z37V{B!0}wxh$Pc4+{VghuM}Tv%sbvoeHE_QV&1H_zPF(PS3Pp1c=J-h#HB`TwXGV3 zVGjf`2;XTp;QBEw$s9Y;Ybh30g+6d`p#gCe7=pH?^Xw+I#WWbk-UoEx#G;6#L;-cEu+C1Hu*>-!oeZ7cUi znokZMVNtJ{u)=6BDxO3W#K(Mqad894emvoz{Qb4gnl82KCDU*EzGgoFF%Z3J@rp|7 zE^T{7l-P*yC{Hud;I!*qoKp;km$&Hs@QH>8Cnk#y2q#vS;Uz(x1H%6IdargKJ$W59 z!Dq{xL7Kda6VLXO%RKJr|C_-AFm;Q|vYs{+U36D?uJrm7uK8rnw-}vw^3pJ`SznQq zzNR7lB9R2aRR#no?N=wC`NfZvX7#;yVsixkDV@uS-FL_mXPrzSjqJ})QVLc0uu|2| zp2pjYr)hS9=fX!5e76DGiT*l($g72!L;~JPKBNpxG;GhSAR2*#6#MUJET(j`?CItb z>bZz~0-xt0>a9(0quOqK=}ik=zx6>i!wu}LSC^0etCLtK3Of!b^lb0+%e`j@W;V|u z+vC=AOCFSD$suzJg4xAWS5h@^?bb8TUKKwNK^R~! zXK!XG8@oeo4X=V$m7a_tCDkVo=1H#5=yGgPImCPaVhZ-6@fArNC3&o8nq#PQv^v7v z%@V^cxL5*3NZ+gXSkQyh)05M!ms264?s*Q)31C!tog(ohRwwNfBM^}OjN*&Oz|%}B z&^L9Ru?n42TLLK(t4Gj^sq|f)xQ$XC>P|n08P$%X@=zsUo58`GD(KiCM{fIcw36;u zn~Vc5=;EqJ$RWh;<9AV<<-K%MVG3l^VCbI$7Vz1Xz?(#B+lU501`dSAXIYwIdj(8} zG=gN3#kRc4NeMgXG7_nScOVwIg9aEnnu=I%js=X6UGs&H_$v`vyl;f(@;V9^HrDkk zJ#Jb+4wy1e*B>c+-K$B*Mcz(YE-#}p^8+yEdgf%bB*e8OFxJG)WgrI17XmH9CxS!# zgaIE8pB8I+A3o{3p4)JyS^F0-Vq5TF$ii9|ML@47uD0nWIEgs*I6&RISNIjJ+3P-! zr`P?I8cQ|PzG(dZZnbQBSl)U`jV#o#@!8hhtK#lI+E z)1CM(reKXbcaYBR%$vNPDURR@OoeiCV+gCe7X()%>9 z1n=rf<*T)^0vQ0CJ*;v^{0d(l8U{9jpd&XJ8^;*Q7y@`%j#Y?(v~f+BT2Q>X9q_4{ zP>#tFnJ7v|dZDkRm67Xf#O`c3D8V;Ie3Af@1fyR8yrUOH2iUeS=8{LO9K$dfat-f*OEP8Bc#uUHjnznXi`nP@_P^dU=t+a`nW%_x_Z=ibH~t7_!0_% zgp5~ev~k1@o1Pwlb!tPJ^f2proA)-(cenWY7yr+*>fMS|6OY|>hZ)*;A0yceF&bB< zTp|Q!Zvttn`NT*pD=bPBW1?Z3Z1Hk3@oU*i7Mz-(=nHeojUZx<%F?_`hO4Skw_~yR z1sh2X&5@NBLz;O-WOfOI8mx?by#+n59qzOre{_2A)#2X_af%f{yaO3?j_l80cZ(;n z8WuJc4yWmqrXNxwGiY^m48g1oRkO)@-c)yLFWVMLdwgQ2rs`|mw7Dwp*)&`-zg1_=r}fPs4d1uzvXmkYh<*3Bi4@8uS40&%bn_AoT?%d)A*7%gI` zAco+&gL{NZ!RWfK`!6O#D|e%DnnJa zRL=|yz*c_v{F~v~w}&r8hjn;zd?vjA9CFmXt(OSRf}Dr@%d2F~*Jf320aj0%z*6U@ zFIf?0s2yh5Fzeb$;?Wz9@c#i&O9KQH000080NQnDQ~~6b*V7FE0C7hE02lxq0C0IU zYA$C1OC`Gh%gk3jhHG_5lC@1QY-O0PS2~ zZ`(K$fA^>0IWN-Ib(3}<4nb2On|1HfOVS{A7bv=gO3QStR+fCCoWxh`cfXmTNRj&E zIC0u$ff^`klQYBNaOTGuQpyf{`pZ)mrjsbS+-L7bub=)CTRPotw{x0Kma$-&kc%{t z(HOo@(+r4{E0N{Ez~1mH4!`4Agn5)EER4C7Lb5z%=c{~%EvB6`_@beS(R5d0B;i>}Yd zVIK%QDS$Z77jpmEvq>7t{(?sCr`hE*82wosjWeFDo>h5g`8@VJdA8c`F!=0fjb9xA z7d4R)Ds0*k9#P=ubeVHl;#0m@z|!{EQ2ci(l2G(?k7=l~3`LIQx!(~tp;+YXm}C#K z43@@K>Ln=kyiuudqfjAKu4G5~!eaI7=YE*Z7d&@IL{ET!bMS>k+yMWKOP=Q$_*x{# zt6cCb<0~~(B-Pryz;l<1MH3rOm&XY;7fQV5onSkI( zf-4>`!M2&Wj3n41h?2z;$tl=DLlS0s(`D{-`w*rS45a)qhE<+sUO8J|c>)9_EU08$ zdqfQK$oNQ)vB4FpIPCxe$UK4&`((@i1b`IB>9wTjDO39_jie}MAU_mK>P=GB_66HYv;Tul@A$W!+jbIUr zbq~Tg5{aHZeoCA4H|93XNZ1e1LxY6R1v_9}{LwW8J8YD)aRf7tqp1j2Az%=|SdN%$ zn;GYc{#n`>85fzjg7GYAm!V=8)x{`gkIta|@IP*>#2v zf+K#y=1;mkN|a}4J!GMX@eJ)1eHzIx86_+v0L_FPA)3N}Od%ESS!oz9Gv%%_59M`erz$oNn6XPEQ71P?1C25XxQO!z4Sz4rA@N*V+^(uu3MjS)gGLo;XgSxkz)K{7k{7_V@r#2q z041IcsP2~0206b+wB^8NYT*r#6#jG=$B>g0S^&zXQsq9tX+%sw1ml@Ns8vo;x%d$QzkIYs%d;HQ1yWWat6=uYk+ZwhOI8=Q6e`mnI`>@Sk*0;-=anU z8}b5a9JZpZ!bb@UEVzpT-LJcU?X$0Ye|7tiug`hzA-DI%$K4&iT=~93k4gO(SRh0t zlLo1w{HluGuU|p>U!~|Scvt^(8YLdF`EDYB25ksX%*|SY6S}OZgkN`vO7P0|pbUb+ z>G|k=FdU3<*92Pc!$-eM2)&YhNMj^OhPa!i{W<>(HhIZjFcAYT5wYJb#DIWc1hP&J zM*|}XjIyaPZNVRn6?`&j#0qHdR%s!N;8`Oz_0;H95H>QXE*p(tz@oDeK~PbziXM|l zhCEx#R{%kkvuJK+g!Q8)dy{V3sUWgx7qd4wSlp&iO&OS=+FF-r$7UU&YDtg+#v7j?@J27VUx&Ryyg?8LUeg7DKl8!7z5nmr^MO13^Sky979nSB!_0 z>n*Zq9_7&$lwD9-Wr8bnY0~R7`$C5230Pyb5RlLp*82X?=mfxgK_@}UMBLc<4WE2o z%KU`O{NOc*Xtx6w!Vl2S_^9o;yX@rw3VEhXYQz$qT+$a0NNNt326 zPAq_y3i|%x#l_+K;OOn?>EYS$!P(*IfIa!s(b}f_7a+p$-EV8@O`aR1M@k^$^EX zGTTZzw+aYk9EEy21na{OX)V-t#vh&>9}OPH{0J1v?J!GKdb+;4Zh?sN@3=f`svQXK zNWun@a{y{pW6@exHps$Oj4-HQ{PGM^`HiTcg0gfEigE83?H|62F;z7p2aoG=9e8l! zrVW8cussw2q&6Tut2uxh?u0O;xP+{`N4pgy*rq_KZTe^i&;hsaAb@XpQV|4xH4BMJ zy#S9_ofws!WmFu|wyhfo7Tnz-XmBUE1W)6^X{>RlfySNS?h+)py97w^;1b;3-6e22 z_Rc-$y!)hUbd9P%U#-2z`cYML?wV9;SHBr?j}~U6EL|B-VNEuDv#Fex3H4KZn$D+%WT&qOM*He8N{_bFRkc(R*&u_k038ixr7Ixh>kgi;B;zKNr8J zzC6=@zp0KF-7NtOYWk=kcY=r$`9p`DcT|$K-}G%~5%cHRvhY3N^6r{xlqOJ&t((Z8 zVRGcf6uX3krnuDM@wZv)WJ2lIKJKu>Z%cfA8RQr_LZXv?LyU3ntE}jqj>}(D?I`xT zILp^{S1YYojkBm#{)+L}XwFIviQ7hp7mHAcfsIVmy@z0yd@IEwkCI=kU3V+#3g&V3 ztO`P8v)tuItPQLYfE|9w;Go9NeM^L4n;lQNM8?z^baO;))@mWjK&|hZSWWiT~F4e3AG059-Otp`_(CuUMQ?&iy%ddL1K7;X6Y zaT1D-AQYxOv4oNgt+!Fk=a6E2BuVU(GDko1&7OVvQ01F?`EZJbdq{!uh#z8KOm|kN z4T;IoF+hVU2(n8yTD7Jcv50ZRvc|~%TG%A)Cc&+2C6Go4V<{4Nbp?t&-#eb4Shpat z=z*6BCVtSTf5p)6Y>$4DTyxpZP**kfdiZgRtM&e9yUJzI>cLS8OY z#>DeDOQXNxitn28S%sJ7JUL>LII=&+{L4uvXC)i~o%JP-TWPVa``cQ6sb*ad?NwU4 zDlbo)M2!gSMMPlBfk#%MC+?zxKV3320>4FI9LDBc9g;j&go}#K!y1bc6GR3@CLYwU zb6IL)b@O|Exa?i)DdX`27xJU2_p(Wr)Gb8E*Mdj?od?PbB;&BH4lxG&ZEY3N$t-<( zC8l+Al*h}nrh|Jt5LdYRcW2gx(W)EV&(D@hxR!` z-tR#Y?a&SjTznzcr`LKG`>SnaMLzb}v?k&4$ zyJ8wZsOm?R_9W*-zh4{`&yBvjYH(T(f$m%Spt3J z!YchZnXyWhT<1bmR!nN*$7$>F;jYK+=X>n~4r#VsLn+vNFnko&=ZrX?)unP{tY{A+*Q$kiJy#1DvcFv7aXS;4v=_h8{+oen8@fWaYy*vc4VffJ9H z;xWBmBH3p*p>~PFSBJlj$9C>A&=l`S^*zDNVHLE0Th56eDUHfw#tLC9= zn-gmr4p%oDqxl(~&>H377r1{uB*sfRsqX@nVeg3m0MKhv&;LrqF|n~UvvYAabAtRc zAD*U%%UEog%h=T&1pxEr1_l6lgZmopb1ZD_yvZBC`iHstA~YAmQ#wqi+whIaFllT? z@%v#-kIPaKa{Ytn$7?vic^-==xf?}ZsUj`51>nz#4BQF<+sl*c zaH)sEigM3m*(}>j$fp~Gz@Lj&9jpZwr?r|}LmE^mwKgPiUpzWK4-otOXwxyD(_krU zVd;sop(tRv78r}LvqLxZ;#CJZ1M{!VvLID7i@_e!h1)b-yV zsSSUeH581(B3Lh!+ijBymCkpFsvJ^{BSq0yBhJOKN96e#OshIsd-3OK?i9HXp!C3? zGJ&n(Np%SaOnMGiR`M#7sg(#0A^Jny(np%E%rj=uMw8Tt-%X{tu;-Q3@Bob4_@%q$ zOm79OojEjbQiXo~!c&NEh!#7`{h*vVDq3^wb zDwJr=(tX(&AVY0^*Cvg3Ihs4rHiF@sGvf+>*H;CmtkLkU%H~$U40mBoYq5`WnRY_n zh^<_MXM@WL*s46_{F91j{mTIzw8H-4mApQPwB%rKkD;wHvmti5MF`EAtTPo$*fQg? zLYUjU=)cwK-=!FxoTUec7SX@(w9ATS8a|moVZ;zh*0Z_c1twE>5%#zDT^^#(Fl-HB z%C+NoqTI7mwBufy%0+)jfQ)vo*G*%Rxl&Ltd%q7aaw`#Qv&gV0o*5Klh>%T5`VGaK z8&qfkCP+Y1!XYMbkU2DsDh1+R*cXQs0?wAwEROT38IoHV;o)@8R2jaa^HQCkNIpX2 zZcA5US9#n)(K#eyW8P;n_)(EfpJboF@>b#%)S+9KRKwU{%wrQ*qQ~zB%TEX|3VDAB z;jDt<+|hJYTdfflsI)5h8Fw#mYCvy-zpnHtQEfBrZ-31NQK(C18Oyt zSYKSr5LU=oEy*m}fC$$3ay)cY7`v~^S_VxBi1vTpGn$iJ8a1v4Le&aD4KqJ>i2NAE zVISA-4_g!#4-ArjFUJekjz07U?R`7vr&6f&0n`9(z;0Ku_o)?~H|(6?9_8Jb-d_?n zrPnC54pU$05CjbkvdK9hl`x8)yDlBs%fIHmp;|n)$WM|et$qGUiv_#&dfm-Kqw+!(_mE!9Gf9cY{uwTo zso#KvSaX!j;SldFQ&ba{o5157_L2%f^?hU&b8PBX6>HsNr4oCc4DJB66HL&%ZzTOp z$O0xBBN<2*veRKivJ}upK9Y#W69=R~df8}t2Utg@ubgFZ?Xrv3^qQlOgZUstMfHU9YiH^=BeV z5zM$usFwz`8$+gdnR>*mn3?1&)~-ML=-`P(;wOLoQO$n|CB{aj;L3_e3oE#9sc@EK zR#We;!u|GiHA6&>RIGjDG*#~;kX>Po`kLZg*Ae_>7mcGDjRaLwyMeg&uq;(YrWxuo zMeH9<{3PMov#yiO7vyX9E#JCc5&k{)N7MHhI+N?t?-hqojjMPLRNdsmMwIJ9299dP zxS4cwHS_jTDXV^Q3`jyC(?T7l|!t=V3Mzoft^{{5K16q}`a{4uf)R9Pg?huwE zTsg@ufF0<4347p!(9hoXQt@Rt6}fz_oK5^{M1mXVtM@OY_XGR_Us3a1C}%*UwMlUd z?;|6+C~XyRLr%_0c^u~|<=FpxdE-dQSB9z|l~@bcCv5>Wo_x=d`TVRQ(QHrf^it^iJcgjIsY=jeD34J;B3>-n1TIcmSHEGZ%$80S5UDQsC%w|Im068ne9DT#= z$Vy%h*dsqv+}v0FQF_8}V@Rl=4-Msms>&eZaTUa%q|7cU=wvS`JcCDvFw-ejvp3FR zjDE8R81u5QS=ajD2ChodP@`;2?+6z#S+V};LHed?Nw(@u2|~s%yasmS0wV`0esBjV zmKO{$#g44xRpBs@ZA57LPOH5DIX{QZR_p z|C^#xrKqlhWgqSbkwbi9Wcyb^#k8qJa_SSY0jGB6F z87$O8@IR+Wj{|gUQ;INCYHhWH0Yi*b!S>b9y{%_~=}HX8W`A8>Z zo_vfSzXw&@hOHM?e)eP}Wo~Q3tPXkPrUjD+P=nXZ`|_5{P4(nXNsocA-_<+Qihy{r zJSaxbM=Mw@Bvm?gX~$f_QGOPGC^cDcC5gNZBjV;?fg0k#Y$<_5WiLy_#6 zBbJ!c+b(74NY#$Jor2Z?x_iR2i4bwOFxc<@^bjXl+OfBPWY_WM--Bod5?t4FPSfc0 z-geQVYSea)vT?)l9pHUY9vL5VAQrKshax$$Z4Yiiwcf@3dcYn@2j}wDNz}4lV`4?_ zV_A2#?U@@zAJfRuu$h`h`?9#^9e!*q!OB54(Ul~-y~p@)f8*z$>G^4C2Zll{bS-T$ z7=A?6MqLG_kpH}JgNiuaTJ*;qNaI-cmvhB6D*2On1uE z=g^WiLp$yyLe`h+3Y+krz;?sL{q^>^yk>9nfPBMMOi0iqSmTGN74|Y*N^K}wbD@BP z@Pmdh!Vjd51wPK19|X^KvB(}5!e~dvvyFRC$!e z^Udh+SXv$e5)O+PoR$;OVkR(Y%HDzR_GF_OHZdZMT6^S5$T|%e7*GyO;sHRuS65_L z=q1wb%?y_I5@d16mNir#_%wDg+l?7QvXf30dC7^8KT8x!A}0{-6T%)QGNr798>4Bq zB3WTw-WbP<#AOQrEvtw*PtD{-mKqA$EDCmNH*u0zJ=> zw~Qk81aAz+52my-%XcaxU(a4}Gyzf$oouWEMJnu8}DE9@=1-nA9c0gXjd}&6V`6-RuWMg^H!crx%hHRP-Lf}IiWW*0c5os z-B+PmT5dnRqL3ShJSfx7F7yH|j?SEYtGiWXFrM5xn50WyLrj2{Tb@##jnBo`moYk2 z_|2HMf<7tdZc;rd(&eFMwB~J_N{P$?njk-XnPEZ6S)+^xJ&HrU0f^2`Zmvm~Dy z507;!{PG(g*2xE1uFHo(onx34KC8P0$z5eBBOA-XJLvU9{0Mceu+n1M%SeO`IZb6L zm=zqSmssvW%YWWHRabqjpXJRW5bZWDsg2etFixwyl*i6K1!AZe2sPbSw>s$4RYXb9 zU~KauHbuohug^53+)DnCK{i5w(*9<|a*iF?@B~knZ@5nzF!m>0<|f7hAmG%oU%-P) zRG|Xqrgt%r0l5nxPSfxBLj;$Q$i_emmC`~U)Vdd0dl9bo`@5YWLCHp*{N~b9YFXUq zbIVLo!YD-Esuo$`kY;p6@N6JpYV`xfeiaV%E~ZhyS67f2I`-qON#S) zHz00rDzWs_+QfctKgGRl1>etx2hks=EFzTU=-i-od9^w0w}R5nw$jX3>#!5mrYf%A zN!T*;I*#E?9Imx5&mODx)f`*cQz}cW4bOAr58)4&%sM=w;W%JtzuA1yw9~r@B^67` z8#vPX@lgiEE$CFncc5!t@5;yNUJ4G#J(Ouio@8ypm39FR>QG&P7q+Biuyx!AUmih( zkvBD2)&9#U&iE6eh#5KSwZNuBei2$>>;@Xa*HSR3G>yk}?llulFEL@fbdeW#E@!gURzw7>U{R#taFwPK(ZJ{_ddh;9BJ`DL-3>%dg zY)QF&f98ReuQgk7LV^p?*^#UHZ8Yde~^u23i5_DS+zcNb546#LuLy;o-y zBzM~p{GR;Mpz~N&j7SZs->e?Es$Mp2J9-NDM<^$MndmRkM{^&5c&x~HKUD-U5RKK8 z>4}YzAz^726bt^`&=JXax>Nm(ex#|Mzx(NDV#hW5!&9yxVkHJmHsOt2xQ6%61B(Kw z&)K8f%jMX`t=8F?ks>`ajhFnbP9(;v;h$9UtWIxFcSvS7Sw~j%M%TC~Oi_D8dbtgG z!8~k_Jd8J%3@pxdyttREOt@gQIRe8sFtQ|a!RJ0MnV>xpk#L2>HP>WSkd8m}j_vM>9z_9^Z54n3?Tb~Ic{6`PUm-~$HB0r;{Vms%XW z`-VImL9|2|Yu`vZi8e0hH)EM8a4xF@m9&XQ`1ca3C5is|=%(XF4?Qr`suHWbDdu#q zLgES&u88|C2!j3A(d}A+bDZi6d1F43rqqy=U9Ot|-*8~gj~_a2Pqg^=k5O=4h*vjq zjf`CG&h9*a9Pa99woQ-(N(Bi0UEAF6o5Y8qcdd49&?SQYq|wOd66@c^YEjcRjeT{c zBtMcfSa!;6`N-df1-EExfis8BnpiW#f!^_5ereoFY;9Ta&gcUM-W%g zT-qBsXDS4(W>0ZGDu64VKV04D+IX#2gAb=i(t-3WUZ2G0t??FVm9OK! zd0x+8|8Z*6t{ROQ>je$nI;(_ap_}*t)SSz8`%zjM`cV3~9Q5MJU7_$`4lJY0NR7z^ z!kZ!K$nER(cmvj(G`ykd_9!eKg)OW#p+xYA)j_O+Mwta6a}^>@PQK0OEaPgnfVXs9 zyyz0-jA>a0dYOg=yWsupt0VV}S|cR>xN%sB=4@-opeExM8G#4#+#YTUPjvy4E0-!{DYk)?wr#;#$hH;75;kU zlD?^v#fkO(5)M^2)t^41mh9=+vtI6( zoIG^OEd3@t3p$YY&HiFB^U#)e@2t4kR1J<^jwEi1*q^xCDk|B|b}?W}4g$%JX-&Eg zIVHOk$Op~UG**h#8>1*tERG7R^*bNRy`LMgMDHNUv!7FB5nHB=X+5>V#~MEg&Tb31 zB{qdWh)3f~Ym=kUN4`%J(4vU=cG4mn^E*0&D_uKsoi`M59g2?=mq3qzf%5$yHA)Uy)^~}A3 zcmpC0nn4~dI!z}x@v0}XZmo8bj8UnfEXFM;=L8vFdJQE$j&;6#@W@pz$tI29iFk-m z;k#(8Comj3$k5Q4h6KvXv@dq`GoI6&e26Sht}^Slf|)nB)pdJ&P0%-AMZ=Q=x&R~K z!_|>l$|s!!@!mGNqZqdKpTyva~8yS&xm0P{WoB(zSh8qPa?T#kM`iumls3)7;#!b=KwZIY<^6F|cd!@NYJlE!J zDN`jClfqoS$V7HT&I?GNzQB{* z;?6O>FNmXJ4TRa6N(R|>X}UR0jjtncmF-qJs?s=JOCw!y6|s5-Iz+!(5a z2p$kKTAm?q_@N)#w~J0;6|hM@^?mM#AzEU4R2tJBzsJ$-v=#L=mX{QQnTP%wQ5#J; zVlr#DSM=?l-!-9H4#x3{b3NO2+_O9&EvaY7wd)i?dLE8h9*QzrNBr`5N#TJ58OWhiMMf_y-sATK&gWoV$EVWkMYEqjFN9~N%*fhN zEpt$vNQkI3$iVG}>`wsrq zZ}_(#Cg527FGmg@+*WKF>hJ(K`Tx|A{)YOCnuG*ca0RQ6KBR}#!2sDUn)>SXV*lz5{r@PI)(CD+>2*e5)x&?Iic$WZYG`O_XX#>S`2R?K0|Bc?v=~z7d None: Notes ----- If you run jobs in parallel, use :class:`pyspark.InheritableThread` for thread - local inheritance, and preventing resource leak. + local inheritance. """ self._jsc.setLocalProperty(key, value) @@ -1423,7 +1423,7 @@ def setJobDescription(self, value: str) -> None: Notes ----- If you run jobs in parallel, use :class:`pyspark.InheritableThread` for thread - local inheritance, and preventing resource leak. + local inheritance. """ self._jsc.setJobDescription(value) diff --git a/python/pyspark/util.py b/python/pyspark/util.py index 5abbbb919636f..b7b972a5d35b8 100644 --- a/python/pyspark/util.py +++ b/python/pyspark/util.py @@ -331,13 +331,10 @@ def inheritable_thread_target(f: Callable) -> Callable: @functools.wraps(f) def wrapped(*args: Any, **kwargs: Any) -> Any: - try: - # Set local properties in child thread. - assert SparkContext._active_spark_context is not None - SparkContext._active_spark_context._jsc.sc().setLocalProperties(properties) - return f(*args, **kwargs) - finally: - InheritableThread._clean_py4j_conn_for_current_thread() + # Set local properties in child thread. + assert SparkContext._active_spark_context is not None + SparkContext._active_spark_context._jsc.sc().setLocalProperties(properties) + return f(*args, **kwargs) return wrapped else: @@ -377,10 +374,7 @@ def copy_local_properties(*a: Any, **k: Any) -> Any: assert hasattr(self, "_props") assert SparkContext._active_spark_context is not None SparkContext._active_spark_context._jsc.sc().setLocalProperties(self._props) - try: - return target(*a, **k) - finally: - InheritableThread._clean_py4j_conn_for_current_thread() + return target(*a, **k) super(InheritableThread, self).__init__( target=copy_local_properties, *args, **kwargs # type: ignore[misc] @@ -401,25 +395,6 @@ def start(self) -> None: self._props = SparkContext._active_spark_context._jsc.sc().getLocalProperties().clone() return super(InheritableThread, self).start() - @staticmethod - def _clean_py4j_conn_for_current_thread() -> None: - from pyspark import SparkContext - - jvm = SparkContext._jvm - assert jvm is not None - thread_connection = jvm._gateway_client.get_thread_connection() - if thread_connection is not None: - try: - # Dequeue is shared across other threads but it's thread-safe. - # If this function has to be invoked one more time in the same thead - # Py4J will create a new connection automatically. - jvm._gateway_client.deque.remove(thread_connection) - except ValueError: - # Should never reach this point - return - finally: - thread_connection.close() - if __name__ == "__main__": if "pypy" not in platform.python_implementation().lower() and sys.version_info[:2] >= (3, 7): diff --git a/python/setup.py b/python/setup.py index 673b146cb6c5d..a8d16ff922951 100755 --- a/python/setup.py +++ b/python/setup.py @@ -258,7 +258,7 @@ def run(self): license='http://www.apache.org/licenses/LICENSE-2.0', # Don't forget to update python/docs/source/getting_started/install.rst # if you're updating the versions or dependencies. - install_requires=['py4j==0.10.9.3'], + install_requires=['py4j==0.10.9.5'], extras_require={ 'ml': ['numpy>=1.15'], 'mllib': ['numpy>=1.15'], diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh index f27b6fe8d9a04..6044de2599ef6 100755 --- a/sbin/spark-config.sh +++ b/sbin/spark-config.sh @@ -28,6 +28,6 @@ export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}" # Add the PySpark classes to the PYTHONPATH: if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}" - export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.3-src.zip:${PYTHONPATH}" + export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.5-src.zip:${PYTHONPATH}" export PYSPARK_PYTHONPATH_SET=1 fi From fa71029c1860830d0c47fe20b3f8831da31d4820 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Fri, 18 Mar 2022 16:44:12 +0900 Subject: [PATCH 012/535] [SPARK-38583][SQL] Restore the behavior of `to_timestamp` that allows numeric types This PR is a followup of https://github.com/apache/spark/commit/fab4ceb157baac870f6d50b942084bb9b2cd4ad2 that mistakenly removed the numeric type support in `to_timestamp(...)`. This PR restores the behavior back. To keep the previous behavior. To end users, no because the change has not been released yet. Unit test was added, and manually tested: ```scala spark.range(1).selectExpr("to_timestamp(id)").show() ``` **Before** ``` +----------------+ |to_timestamp(id)| +----------------+ | null| +----------------+ ``` **After** ``` +-------------------+ | to_timestamp(id)| +-------------------+ |1970-01-01 09:00:00| +-------------------+ ``` Closes #35887 from HyukjinKwon/SPARK-38583. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 681dfee7a0fa040b8928c65ef34471ee7239621c) Signed-off-by: Hyukjin Kwon --- .../catalyst/expressions/datetimeExpressions.scala | 6 ++++-- .../src/test/resources/sql-tests/inputs/timestamp.sql | 1 + .../sql-tests/results/ansi/timestamp.sql.out | 10 +++++++++- .../sql-tests/results/datetime-legacy.sql.out | 10 +++++++++- .../resources/sql-tests/results/timestamp.sql.out | 10 +++++++++- .../results/timestampNTZ/timestamp-ansi.sql.out | 11 ++++++++++- .../sql-tests/results/timestampNTZ/timestamp.sql.out | 10 +++++++++- 7 files changed, 51 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 013f11ac29786..15ab3a2abd63c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -2080,8 +2080,10 @@ case class ParseToTimestamp( override def inputTypes: Seq[AbstractDataType] = { // Note: ideally this function should only take string input, but we allow more types here to // be backward compatible. - TypeCollection(StringType, DateType, TimestampType, TimestampNTZType) +: - format.map(_ => StringType).toSeq + val types = Seq(StringType, DateType, TimestampType, TimestampNTZType) + TypeCollection( + (if (dataType.isInstanceOf[TimestampType]) types :+ NumericType else types): _* + ) +: format.map(_ => StringType).toSeq } override protected def withNewChildrenInternal( diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql index 21d27e98ab440..b0d958a24b149 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp.sql @@ -62,6 +62,7 @@ select UNIX_MILLIS(timestamp'2020-12-01 14:30:08Z'), UNIX_MILLIS(timestamp'2020- select UNIX_MICROS(timestamp'2020-12-01 14:30:08Z'), UNIX_MICROS(timestamp'2020-12-01 14:30:08.999999Z'), UNIX_MICROS(null); select to_timestamp(null), to_timestamp('2016-12-31 00:12:00'), to_timestamp('2016-12-31', 'yyyy-MM-dd'); +select to_timestamp(1); -- variable-length second fraction tests select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]'); select to_timestamp('2019-10-06 10:11:12.0', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]'); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out index 2946842e3f6e4..1bd579e93a943 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 97 +-- Number of queries: 98 -- !query @@ -326,6 +326,14 @@ struct +-- !query output +1969-12-31 16:00:01 + + -- !query select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]') -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out index ebfdf60effdae..9b00d3815c134 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 194 +-- Number of queries: 195 -- !query @@ -1138,6 +1138,14 @@ struct +-- !query output +1969-12-31 16:00:01 + + -- !query select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]') -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out index 0ebdf4cc01615..792b0a52f7337 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 97 +-- Number of queries: 98 -- !query @@ -322,6 +322,14 @@ struct +-- !query output +1969-12-31 16:00:01 + + -- !query select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]') -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out index f7552ed4f62cc..9cad5a32598b6 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 97 +-- Number of queries: 98 -- !query @@ -326,6 +326,15 @@ struct +-- !query output +java.time.DateTimeException +Cannot cast 1 to TimestampNTZType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + -- !query select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]') -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out index 06e255a09c3e3..4ae5a8d5e8ad4 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 97 +-- Number of queries: 98 -- !query @@ -322,6 +322,14 @@ struct +-- !query output +NULL + + -- !query select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]') -- !query schema From 61278786adf04c5652335ec438c55bdb9c47fbf8 Mon Sep 17 00:00:00 2001 From: Jungtaek Lim Date: Fri, 18 Mar 2022 18:27:45 +0900 Subject: [PATCH 013/535] [SPARK-38593][SS] Carry over the metric of the number of dropped late events into SessionWindowStateStoreSaveExec ### What changes were proposed in this pull request? This PR proposes to carry over the metric of the number of dropped late events from SessionWindowStateStoreRestoreExec to SessionWindowStateStoreSaveExec, so that state operator progress for session window aggregation operator will have correct number. ### Why are the changes needed? The number of dropped late events for session window aggregation was missed in the mechanism of SS metrics (state operator progress). ### Does this PR introduce _any_ user-facing change? Yes, the number of dropped late events for session window aggregation will be taken into account for state operator progress in streaming listener, as well as a graph on late events in the SS UI page. ### How was this patch tested? Modified UT. Closes #35909 from HeartSaVioR/SPARK-38593. Authored-by: Jungtaek Lim Signed-off-by: Jungtaek Lim --- .../streaming/statefulOperators.scala | 23 +++++++++++++++++++ .../StreamingSessionWindowSuite.scala | 17 ++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala index e367637671cc8..e2a0598644258 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala @@ -743,6 +743,29 @@ case class SessionWindowStateStoreSaveExec( override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = copy(child = newChild) + + /** + * The class overrides this method since dropping late events are happening in the upstream node + * [[SessionWindowStateStoreRestoreExec]], and this class has responsibility to report the number + * of dropped late events as a part of StateOperatorProgress. + * + * This method should be called in the driver after this SparkPlan has been executed and metrics + * have been updated. + */ + override def getProgress(): StateOperatorProgress = { + val stateOpProgress = super.getProgress() + + // This should be safe, since the method is called in the driver after the plan has been + // executed and metrics have been updated. + val numRowsDroppedByWatermark = child.collectFirst { + case s: SessionWindowStateStoreRestoreExec => + s.longMetric("numRowsDroppedByWatermark").value + }.getOrElse(0L) + + stateOpProgress.copy( + newNumRowsUpdated = stateOpProgress.numRowsUpdated, + newNumRowsDroppedByWatermark = numRowsDroppedByWatermark) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala index d0f3a87acbc29..3ed23bad6a36c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala @@ -216,6 +216,7 @@ class StreamingSessionWindowSuite extends StreamTest // ("structured", 41, 51, 10, 1) CheckNewAnswer( ), + assertNumRowsDroppedByWatermark(2), // concatenating multiple previous sessions into one AddData(inputData, ("spark streaming", 30L)), @@ -319,6 +320,7 @@ class StreamingSessionWindowSuite extends StreamTest // ("spark", 40, 50, 10, 1), CheckNewAnswer( ), + assertNumRowsDroppedByWatermark(2), // concatenating multiple previous sessions into one AddData(inputData, ("spark streaming", 30L)), @@ -406,6 +408,21 @@ class StreamingSessionWindowSuite extends StreamTest } } + private def assertNumRowsDroppedByWatermark( + numRowsDroppedByWatermark: Long): AssertOnQuery = AssertOnQuery { q => + q.processAllAvailable() + val progressWithData = q.recentProgress.filterNot { p => + // filter out batches which are falling into one of types: + // 1) doesn't execute the batch run + // 2) empty input batch + p.inputRowsPerSecond == 0 + }.lastOption.get + assert(progressWithData.stateOperators(0).numRowsDroppedByWatermark + === numRowsDroppedByWatermark) + true + } + + private def sessionWindowQuery( input: MemoryStream[(String, Long)], sessionWindow: Column = session_window($"eventTime", "10 seconds")): DataFrame = { From 28f7d955c9125ccedb3b5b4ff0f97ac283fbcc88 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 18 Mar 2022 21:01:31 +0300 Subject: [PATCH 014/535] [SPARK-38600][SQL] Include `unit` into the sql string of `TIMESTAMPADD/DIFF` ### What changes were proposed in this pull request? In the PR, I propose to override the `sql()` method of the `TIMESTAMPADD` and `TIMESTAMPDIFF` expression to include `unit` as the first parameter of the functions. ### Why are the changes needed? To improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? By running the test suites: ``` $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z timestamp.sql" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z timestamp-ansi.sql" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z datetime-legacy.sql" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z timestamp-ntz.sql" ``` Closes #35911 from MaxGekk/timestampadd-unit-in-schema. Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 53eaaf867c3849e01ea19840820bbdfc031e5606) Signed-off-by: Max Gekk --- .../expressions/datetimeExpressions.scala | 10 ++++ .../sql-tests/results/ansi/date.sql.out | 40 ++++++------- .../sql-tests/results/ansi/timestamp.sql.out | 16 +++--- .../resources/sql-tests/results/date.sql.out | 40 ++++++------- .../sql-tests/results/datetime-legacy.sql.out | 56 +++++++++---------- .../sql-tests/results/timestamp-ntz.sql.out | 8 +-- .../sql-tests/results/timestamp.sql.out | 16 +++--- .../timestampNTZ/timestamp-ansi.sql.out | 16 +++--- .../results/timestampNTZ/timestamp.sql.out | 16 +++--- 9 files changed, 114 insertions(+), 104 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 15ab3a2abd63c..fbc670f5a8c81 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -3121,6 +3121,11 @@ case class TimestampAdd( override def prettyName: String = "timestampadd" + override def sql: String = { + val childrenSQL = (unit +: children.map(_.sql)).mkString(", ") + s"$prettyName($childrenSQL)" + } + override protected def withNewChildrenInternal( newLeft: Expression, newRight: Expression): TimestampAdd = { @@ -3203,6 +3208,11 @@ case class TimestampDiff( override def prettyName: String = "timestampdiff" + override def sql: String = { + val childrenSQL = (unit +: children.map(_.sql)).mkString(", ") + s"$prettyName($childrenSQL)" + } + override protected def withNewChildrenInternal( newLeft: Expression, newRight: Expression): TimestampDiff = { diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out index 437b56e2ffa3e..98b1ec42a79ca 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out @@ -665,7 +665,7 @@ You may get a different result due to the upgrading to Spark >= 3.0: Fail to rec -- !query select dateadd(MICROSECOND, 1001, timestamp'2022-02-25 01:02:03.123') -- !query schema -struct +struct -- !query output 2022-02-25 01:02:03.124001 @@ -673,7 +673,7 @@ struct -- !query select dateadd(MILLISECOND, -1, timestamp'2022-02-25 01:02:03.456') -- !query schema -struct +struct -- !query output 2022-02-25 01:02:03.455 @@ -681,7 +681,7 @@ struct -- !query select dateadd(SECOND, 58, timestamp'2022-02-25 01:02:03') -- !query schema -struct +struct -- !query output 2022-02-25 01:03:01 @@ -689,7 +689,7 @@ struct -- !query select dateadd(MINUTE, -100, date'2022-02-25') -- !query schema -struct +struct -- !query output 2022-02-24 22:20:00 @@ -697,7 +697,7 @@ struct -- !query select dateadd(HOUR, -1, timestamp'2022-02-25 01:02:03') -- !query schema -struct +struct -- !query output 2022-02-25 00:02:03 @@ -705,7 +705,7 @@ struct -- !query select dateadd(DAY, 367, date'2022-02-25') -- !query schema -struct +struct -- !query output 2023-02-27 00:00:00 @@ -713,7 +713,7 @@ struct -- !query select dateadd(WEEK, -4, timestamp'2022-02-25 01:02:03') -- !query schema -struct +struct -- !query output 2022-01-28 01:02:03 @@ -721,7 +721,7 @@ struct -- !query select dateadd(MONTH, -1, timestamp'2022-02-25 01:02:03') -- !query schema -struct +struct -- !query output 2022-01-25 01:02:03 @@ -729,7 +729,7 @@ struct -- !query select dateadd(QUARTER, 5, date'2022-02-25') -- !query schema -struct +struct -- !query output 2023-05-25 00:00:00 @@ -737,7 +737,7 @@ struct -- !query select dateadd(YEAR, 1, date'2022-02-25') -- !query schema -struct +struct -- !query output 2023-02-25 00:00:00 @@ -745,7 +745,7 @@ struct -- !query select datediff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001') -- !query schema -struct +struct -- !query output 1001 @@ -753,7 +753,7 @@ struct +struct -- !query output -1 @@ -761,7 +761,7 @@ struct +struct -- !query output 58 @@ -769,7 +769,7 @@ struct +struct -- !query output -100 @@ -777,7 +777,7 @@ struct -- !query select datediff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03') -- !query schema -struct +struct -- !query output -1 @@ -785,7 +785,7 @@ struct +struct -- !query output 367 @@ -793,7 +793,7 @@ struct -- !query select datediff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03') -- !query schema -struct +struct -- !query output -4 @@ -801,7 +801,7 @@ struct +struct -- !query output -1 @@ -809,7 +809,7 @@ struct +struct -- !query output 5 @@ -817,6 +817,6 @@ struct -- !query select datediff(YEAR, date'2022-02-25', date'2023-02-25') -- !query schema -struct +struct -- !query output 1 diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out index 1bd579e93a943..cf6af94818939 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out @@ -784,7 +784,7 @@ You may get a different result due to the upgrading to Spark >= 3.0: Fail to rec -- !query select timestampadd(MONTH, -1, timestamp'2022-02-14 01:02:03') -- !query schema -struct +struct -- !query output 2022-01-14 01:02:03 @@ -792,7 +792,7 @@ struct -- !query select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03') -- !query schema -struct +struct -- !query output 2022-02-14 02:00:03 @@ -800,7 +800,7 @@ struct -- !query select timestampadd(YEAR, 1, date'2022-02-15') -- !query schema -struct +struct -- !query output 2023-02-15 00:00:00 @@ -808,7 +808,7 @@ struct -- !query select timestampadd(SECOND, -1, date'2022-02-15') -- !query schema -struct +struct -- !query output 2022-02-14 23:59:59 @@ -816,7 +816,7 @@ struct -- !query select timestampdiff(MONTH, timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03') -- !query schema -struct +struct -- !query output -1 @@ -824,7 +824,7 @@ struct +struct -- !query output 58 @@ -832,7 +832,7 @@ struct +struct -- !query output 1 @@ -840,6 +840,6 @@ struct -- !query select timestampdiff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59') -- !query schema -struct +struct -- !query output -1 diff --git a/sql/core/src/test/resources/sql-tests/results/date.sql.out b/sql/core/src/test/resources/sql-tests/results/date.sql.out index 91c89ef5a93d7..83f59cbca2ae7 100644 --- a/sql/core/src/test/resources/sql-tests/results/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/date.sql.out @@ -664,7 +664,7 @@ You may get a different result due to the upgrading to Spark >= 3.0: Fail to rec -- !query select dateadd(MICROSECOND, 1001, timestamp'2022-02-25 01:02:03.123') -- !query schema -struct +struct -- !query output 2022-02-25 01:02:03.124001 @@ -672,7 +672,7 @@ struct -- !query select dateadd(MILLISECOND, -1, timestamp'2022-02-25 01:02:03.456') -- !query schema -struct +struct -- !query output 2022-02-25 01:02:03.455 @@ -680,7 +680,7 @@ struct -- !query select dateadd(SECOND, 58, timestamp'2022-02-25 01:02:03') -- !query schema -struct +struct -- !query output 2022-02-25 01:03:01 @@ -688,7 +688,7 @@ struct -- !query select dateadd(MINUTE, -100, date'2022-02-25') -- !query schema -struct +struct -- !query output 2022-02-24 22:20:00 @@ -696,7 +696,7 @@ struct -- !query select dateadd(HOUR, -1, timestamp'2022-02-25 01:02:03') -- !query schema -struct +struct -- !query output 2022-02-25 00:02:03 @@ -704,7 +704,7 @@ struct -- !query select dateadd(DAY, 367, date'2022-02-25') -- !query schema -struct +struct -- !query output 2023-02-27 00:00:00 @@ -712,7 +712,7 @@ struct -- !query select dateadd(WEEK, -4, timestamp'2022-02-25 01:02:03') -- !query schema -struct +struct -- !query output 2022-01-28 01:02:03 @@ -720,7 +720,7 @@ struct -- !query select dateadd(MONTH, -1, timestamp'2022-02-25 01:02:03') -- !query schema -struct +struct -- !query output 2022-01-25 01:02:03 @@ -728,7 +728,7 @@ struct -- !query select dateadd(QUARTER, 5, date'2022-02-25') -- !query schema -struct +struct -- !query output 2023-05-25 00:00:00 @@ -736,7 +736,7 @@ struct -- !query select dateadd(YEAR, 1, date'2022-02-25') -- !query schema -struct +struct -- !query output 2023-02-25 00:00:00 @@ -744,7 +744,7 @@ struct -- !query select datediff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001') -- !query schema -struct +struct -- !query output 1001 @@ -752,7 +752,7 @@ struct +struct -- !query output -1 @@ -760,7 +760,7 @@ struct +struct -- !query output 58 @@ -768,7 +768,7 @@ struct +struct -- !query output -100 @@ -776,7 +776,7 @@ struct -- !query select datediff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03') -- !query schema -struct +struct -- !query output -1 @@ -784,7 +784,7 @@ struct +struct -- !query output 367 @@ -792,7 +792,7 @@ struct -- !query select datediff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03') -- !query schema -struct +struct -- !query output -4 @@ -800,7 +800,7 @@ struct +struct -- !query output -1 @@ -808,7 +808,7 @@ struct +struct -- !query output 5 @@ -816,6 +816,6 @@ struct -- !query select datediff(YEAR, date'2022-02-25', date'2023-02-25') -- !query schema -struct +struct -- !query output 1 diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out index 9b00d3815c134..03ec7957ed668 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out @@ -661,7 +661,7 @@ struct> -- !query select dateadd(MICROSECOND, 1001, timestamp'2022-02-25 01:02:03.123') -- !query schema -struct +struct -- !query output 2022-02-25 01:02:03.124001 @@ -669,7 +669,7 @@ struct -- !query select dateadd(MILLISECOND, -1, timestamp'2022-02-25 01:02:03.456') -- !query schema -struct +struct -- !query output 2022-02-25 01:02:03.455 @@ -677,7 +677,7 @@ struct -- !query select dateadd(SECOND, 58, timestamp'2022-02-25 01:02:03') -- !query schema -struct +struct -- !query output 2022-02-25 01:03:01 @@ -685,7 +685,7 @@ struct -- !query select dateadd(MINUTE, -100, date'2022-02-25') -- !query schema -struct +struct -- !query output 2022-02-24 22:20:00 @@ -693,7 +693,7 @@ struct -- !query select dateadd(HOUR, -1, timestamp'2022-02-25 01:02:03') -- !query schema -struct +struct -- !query output 2022-02-25 00:02:03 @@ -701,7 +701,7 @@ struct -- !query select dateadd(DAY, 367, date'2022-02-25') -- !query schema -struct +struct -- !query output 2023-02-27 00:00:00 @@ -709,7 +709,7 @@ struct -- !query select dateadd(WEEK, -4, timestamp'2022-02-25 01:02:03') -- !query schema -struct +struct -- !query output 2022-01-28 01:02:03 @@ -717,7 +717,7 @@ struct -- !query select dateadd(MONTH, -1, timestamp'2022-02-25 01:02:03') -- !query schema -struct +struct -- !query output 2022-01-25 01:02:03 @@ -725,7 +725,7 @@ struct -- !query select dateadd(QUARTER, 5, date'2022-02-25') -- !query schema -struct +struct -- !query output 2023-05-25 00:00:00 @@ -733,7 +733,7 @@ struct -- !query select dateadd(YEAR, 1, date'2022-02-25') -- !query schema -struct +struct -- !query output 2023-02-25 00:00:00 @@ -741,7 +741,7 @@ struct -- !query select datediff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001') -- !query schema -struct +struct -- !query output 1001 @@ -749,7 +749,7 @@ struct +struct -- !query output -1 @@ -757,7 +757,7 @@ struct +struct -- !query output 58 @@ -765,7 +765,7 @@ struct +struct -- !query output -100 @@ -773,7 +773,7 @@ struct -- !query select datediff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03') -- !query schema -struct +struct -- !query output -1 @@ -781,7 +781,7 @@ struct +struct -- !query output 367 @@ -789,7 +789,7 @@ struct -- !query select datediff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03') -- !query schema -struct +struct -- !query output -4 @@ -797,7 +797,7 @@ struct +struct -- !query output -1 @@ -805,7 +805,7 @@ struct +struct -- !query output 5 @@ -813,7 +813,7 @@ struct -- !query select datediff(YEAR, date'2022-02-25', date'2023-02-25') -- !query schema -struct +struct -- !query output 1 @@ -1588,7 +1588,7 @@ struct> -- !query select timestampadd(MONTH, -1, timestamp'2022-02-14 01:02:03') -- !query schema -struct +struct -- !query output 2022-01-14 01:02:03 @@ -1596,7 +1596,7 @@ struct -- !query select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03') -- !query schema -struct +struct -- !query output 2022-02-14 02:00:03 @@ -1604,7 +1604,7 @@ struct -- !query select timestampadd(YEAR, 1, date'2022-02-15') -- !query schema -struct +struct -- !query output 2023-02-15 00:00:00 @@ -1612,7 +1612,7 @@ struct -- !query select timestampadd(SECOND, -1, date'2022-02-15') -- !query schema -struct +struct -- !query output 2022-02-14 23:59:59 @@ -1620,7 +1620,7 @@ struct -- !query select timestampdiff(MONTH, timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03') -- !query schema -struct +struct -- !query output -1 @@ -1628,7 +1628,7 @@ struct +struct -- !query output 58 @@ -1636,7 +1636,7 @@ struct +struct -- !query output 1 @@ -1644,6 +1644,6 @@ struct -- !query select timestampdiff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59') -- !query schema -struct +struct -- !query output -1 diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out index c4fcff4c2b81b..f36ffffe5f74f 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out @@ -70,7 +70,7 @@ struct +struct -- !query output 1 @@ -78,7 +78,7 @@ struct +struct -- !query output 1 @@ -86,7 +86,7 @@ struct +struct -- !query output 1 @@ -94,6 +94,6 @@ struct +struct -- !query output 877 diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out index 792b0a52f7337..9974a26c76a65 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out @@ -778,7 +778,7 @@ You may get a different result due to the upgrading to Spark >= 3.0: Fail to rec -- !query select timestampadd(MONTH, -1, timestamp'2022-02-14 01:02:03') -- !query schema -struct +struct -- !query output 2022-01-14 01:02:03 @@ -786,7 +786,7 @@ struct -- !query select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03') -- !query schema -struct +struct -- !query output 2022-02-14 02:00:03 @@ -794,7 +794,7 @@ struct -- !query select timestampadd(YEAR, 1, date'2022-02-15') -- !query schema -struct +struct -- !query output 2023-02-15 00:00:00 @@ -802,7 +802,7 @@ struct -- !query select timestampadd(SECOND, -1, date'2022-02-15') -- !query schema -struct +struct -- !query output 2022-02-14 23:59:59 @@ -810,7 +810,7 @@ struct -- !query select timestampdiff(MONTH, timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03') -- !query schema -struct +struct -- !query output -1 @@ -818,7 +818,7 @@ struct +struct -- !query output 58 @@ -826,7 +826,7 @@ struct +struct -- !query output 1 @@ -834,6 +834,6 @@ struct -- !query select timestampdiff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59') -- !query schema -struct +struct -- !query output -1 diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out index 9cad5a32598b6..3f275b2a2bdeb 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out @@ -783,7 +783,7 @@ struct> -- !query select timestampadd(MONTH, -1, timestamp'2022-02-14 01:02:03') -- !query schema -struct +struct -- !query output 2022-01-14 01:02:03 @@ -791,7 +791,7 @@ struct -- !query select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03') -- !query schema -struct +struct -- !query output 2022-02-14 02:00:03 @@ -799,7 +799,7 @@ struct -- !query select timestampadd(YEAR, 1, date'2022-02-15') -- !query schema -struct +struct -- !query output 2023-02-15 00:00:00 @@ -807,7 +807,7 @@ struct -- !query select timestampadd(SECOND, -1, date'2022-02-15') -- !query schema -struct +struct -- !query output 2022-02-14 23:59:59 @@ -815,7 +815,7 @@ struct -- !query select timestampdiff(MONTH, timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03') -- !query schema -struct +struct -- !query output -1 @@ -823,7 +823,7 @@ struct +struct -- !query output 58 @@ -831,7 +831,7 @@ struct +struct -- !query output 1 @@ -839,6 +839,6 @@ struct -- !query select timestampdiff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59') -- !query schema -struct +struct -- !query output -1 diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out index 4ae5a8d5e8ad4..85fa0beb99061 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out @@ -776,7 +776,7 @@ struct> -- !query select timestampadd(MONTH, -1, timestamp'2022-02-14 01:02:03') -- !query schema -struct +struct -- !query output 2022-01-14 01:02:03 @@ -784,7 +784,7 @@ struct -- !query select timestampadd(MINUTE, 58, timestamp'2022-02-14 01:02:03') -- !query schema -struct +struct -- !query output 2022-02-14 02:00:03 @@ -792,7 +792,7 @@ struct -- !query select timestampadd(YEAR, 1, date'2022-02-15') -- !query schema -struct +struct -- !query output 2023-02-15 00:00:00 @@ -800,7 +800,7 @@ struct -- !query select timestampadd(SECOND, -1, date'2022-02-15') -- !query schema -struct +struct -- !query output 2022-02-14 23:59:59 @@ -808,7 +808,7 @@ struct -- !query select timestampdiff(MONTH, timestamp'2022-02-14 01:02:03', timestamp'2022-01-14 01:02:03') -- !query schema -struct +struct -- !query output -1 @@ -816,7 +816,7 @@ struct +struct -- !query output 58 @@ -824,7 +824,7 @@ struct +struct -- !query output 1 @@ -832,6 +832,6 @@ struct -- !query select timestampdiff(SECOND, date'2022-02-15', timestamp'2022-02-14 23:59:59') -- !query schema -struct +struct -- !query output -1 From 5daf367e11c71371b8dda139a2ea912e2427936f Mon Sep 17 00:00:00 2001 From: jackylee-ch Date: Sat, 19 Mar 2022 08:56:15 -0500 Subject: [PATCH 015/535] [SPARK-38544][BUILD] Upgrade log4j2 to 2.17.2 ### What changes were proposed in this pull request? This pr aims to upgrade log4j2 to 2.17.2. ### Why are the changes needed? This version brings a lot of fixes released to log1.x support, the release notes and change report as follows: - https://logging.apache.org/log4j/2.x/index.html#News - https://logging.apache.org/log4j/2.x/changes-report.html#a2.17.2 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? GA Closes #35898 from jackylee-ch/fix_update_log4j2_to_2.17.2. Authored-by: jackylee-ch Signed-off-by: Sean Owen (cherry picked from commit 4661455aa03e30af3a2fe911ad8c5c5a12e1790b) Signed-off-by: Sean Owen --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 8 ++++---- dev/deps/spark-deps-hadoop-3-hive-2.3 | 8 ++++---- pom.xml | 2 +- .../k8s/integrationtest/SparkConfPropagateSuite.scala | 3 ++- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index 28cb7c1684173..06d5939be654b 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -187,10 +187,10 @@ lapack/2.2.1//lapack-2.2.1.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar libthrift/0.12.0//libthrift-0.12.0.jar -log4j-1.2-api/2.17.1//log4j-1.2-api-2.17.1.jar -log4j-api/2.17.1//log4j-api-2.17.1.jar -log4j-core/2.17.1//log4j-core-2.17.1.jar -log4j-slf4j-impl/2.17.1//log4j-slf4j-impl-2.17.1.jar +log4j-1.2-api/2.17.2//log4j-1.2-api-2.17.2.jar +log4j-api/2.17.2//log4j-api-2.17.2.jar +log4j-core/2.17.2//log4j-core-2.17.2.jar +log4j-slf4j-impl/2.17.2//log4j-slf4j-impl-2.17.2.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.8.0//lz4-java-1.8.0.jar mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 07549effc8706..2e9b0e3aa21a6 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -172,10 +172,10 @@ lapack/2.2.1//lapack-2.2.1.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar libthrift/0.12.0//libthrift-0.12.0.jar -log4j-1.2-api/2.17.1//log4j-1.2-api-2.17.1.jar -log4j-api/2.17.1//log4j-api-2.17.1.jar -log4j-core/2.17.1//log4j-core-2.17.1.jar -log4j-slf4j-impl/2.17.1//log4j-slf4j-impl-2.17.1.jar +log4j-1.2-api/2.17.2//log4j-1.2-api-2.17.2.jar +log4j-api/2.17.2//log4j-api-2.17.2.jar +log4j-core/2.17.2//log4j-core-2.17.2.jar +log4j-slf4j-impl/2.17.2//log4j-slf4j-impl-2.17.2.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.8.0//lz4-java-1.8.0.jar mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar diff --git a/pom.xml b/pom.xml index d8b5b87c7d97b..a751bdd3462fe 100644 --- a/pom.xml +++ b/pom.xml @@ -119,7 +119,7 @@ 1.6.0 spark 1.7.32 - 2.17.1 + 2.17.2 3.3.2 2.5.0 diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala index 7ed82e3517eb9..318b92833eb76 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkConfPropagateSuite.scala @@ -41,8 +41,9 @@ private[spark] trait SparkConfPropagateSuite { k8sSuite: KubernetesSuite => sparkAppConf.set("spark.executor.extraJavaOptions", "-Dlog4j2.debug") sparkAppConf.set("spark.kubernetes.executor.deleteOnTermination", "false") + // since 2.17.2, log4j2 loads the original log4j2.properties instead of the soft link. val log4jExpectedLog = - Seq("Reconfiguration complete for context", "at URI /opt/spark/conf/log4j2.properties") + Seq("Reconfiguration complete for context", "at URI /opt/spark/conf", "/log4j2.properties") runSparkApplicationAndVerifyCompletion( appResource = containerLocalSparkDistroExamplesJar, From 1a720d30f77a8b63fe484c60fc4f4953f9ff1af8 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 21 Mar 2022 11:23:01 +0900 Subject: [PATCH 016/535] Revert "[SPARK-38556][PYTHON] Disable Pandas usage logging for method calls inside @contextmanager functions" This reverts commit c284faad2d7d3b813c1c94c612b814c129b6dad3. --- python/pyspark/instrumentation_utils.py | 30 ------------------------- 1 file changed, 30 deletions(-) diff --git a/python/pyspark/instrumentation_utils.py b/python/pyspark/instrumentation_utils.py index b9aacf6c4c6b8..908f5cbb3d473 100644 --- a/python/pyspark/instrumentation_utils.py +++ b/python/pyspark/instrumentation_utils.py @@ -21,7 +21,6 @@ import threading import importlib import time -from contextlib import AbstractContextManager from types import ModuleType from typing import Tuple, Union, List, Callable, Any, Type @@ -31,24 +30,6 @@ _local = threading.local() -class _WrappedAbstractContextManager(AbstractContextManager): - def __init__( - self, acm: AbstractContextManager, class_name: str, function_name: str, logger: Any - ): - self._enter_func = _wrap_function( - class_name, "{}.__enter__".format(function_name), acm.__enter__, logger - ) - self._exit_func = _wrap_function( - class_name, "{}.__exit__".format(function_name), acm.__exit__, logger - ) - - def __enter__(self): # type: ignore[no-untyped-def] - return self._enter_func() - - def __exit__(self, exc_type, exc_val, exc_tb): # type: ignore[no-untyped-def] - return self._exit_func(exc_type, exc_val, exc_tb) - - def _wrap_function(class_name: str, function_name: str, func: Callable, logger: Any) -> Callable: signature = inspect.signature(func) @@ -63,17 +44,6 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: start = time.perf_counter() try: res = func(*args, **kwargs) - if isinstance(res, AbstractContextManager): - # Wrap AbstractContextManager's subclasses returned by @contextmanager decorator - # function so that wrapped function calls inside __enter__ and __exit__ - # are not recorded by usage logger. - # - # The reason to add a wrapped class after function calls instead of - # wrapping __enter__ and __exit__ methods of _GeneratorContextManager class is - # because usage logging should be disabled for functions with @contextmanager - # decorator in PySpark only. - res = _WrappedAbstractContextManager(res, class_name, function_name, logger) - logger.log_success( class_name, function_name, time.perf_counter() - start, signature ) From f3020310ce8ff43e0cf037ad9706280fbe0936ea Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 21 Mar 2022 13:24:19 +0900 Subject: [PATCH 017/535] [SPARK-38607][INFRA] Test result report for ANSI mode ### What changes were proposed in this pull request? This PR proposes two: 1. Report test results for ANSI mode enabled too. All "Build and test" is reused, and all steps are ready. We can just post the status to GitHub Checks status (like https://github.com/apache/spark/runs/5618763442) 2. Rename `Build and test (ANSI)` -> `ANSI SQL mode test`, and `.github/workflows/ansi_sql_mode_test.yml` -> `.github/workflows/build_and_test_ansi.yml` for naming consistency. ### Why are the changes needed? 1. To easily navigate the test results. 2. The current naming looks a bit messy: Screen Shot 2022-03-21 at 10 12 37 AM After this PR, it would look more consistent. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? CI in this PR should tests. It should be monitored after it gets merged too. Closes #35916 from HyukjinKwon/SPARK-38607. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit cae51ea06eadc2400f4588ca76e4f20685019581) Signed-off-by: Hyukjin Kwon --- .../{ansi_sql_mode_test.yml => build_and_test_ansi.yml} | 6 +++--- .github/workflows/test_report.yml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) rename .github/workflows/{ansi_sql_mode_test.yml => build_and_test_ansi.yml} (92%) diff --git a/.github/workflows/ansi_sql_mode_test.yml b/.github/workflows/build_and_test_ansi.yml similarity index 92% rename from .github/workflows/ansi_sql_mode_test.yml rename to .github/workflows/build_and_test_ansi.yml index cc4ac575306d1..3b8e44ff80ec3 100644 --- a/.github/workflows/ansi_sql_mode_test.yml +++ b/.github/workflows/build_and_test_ansi.yml @@ -17,7 +17,7 @@ # under the License. # -name: ANSI SQL mode test +name: "Build and test (ANSI)" on: push: @@ -25,10 +25,10 @@ on: - branch-3.3 jobs: - ansi_sql_test: + call-build-and-test: + name: Call main build uses: ./.github/workflows/build_and_test.yml if: github.repository == 'apache/spark' with: ansi_enabled: true - diff --git a/.github/workflows/test_report.yml b/.github/workflows/test_report.yml index e537e34f15212..a3f09c06ed989 100644 --- a/.github/workflows/test_report.yml +++ b/.github/workflows/test_report.yml @@ -20,7 +20,7 @@ name: Report test results on: workflow_run: - workflows: ["Build and test"] + workflows: ["Build and test", "Build and test (ANSI)"] types: - completed From 8310b249e3543150982468d0bece69c136cbde77 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Mon, 21 Mar 2022 14:26:46 +0800 Subject: [PATCH 018/535] [SPARK-38548][SQL] New SQL function: try_sum ### What changes were proposed in this pull request? Add a new SQL function: try_sum. It is identical to the function `sum`, except that it returns `NULL` result instead of throwing an exception on integral/decimal value overflow. Note it is different from sum when ANSI mode is off: | Function | Sum | TrySum | |------------------|------------------------------------|-------------| | Decimal overflow | Return NULL | Return NULL | | Integer overflow | Return lower 64 bits of the result | Return NULL | ### Why are the changes needed? * Users can manage to finish queries without interruptions in ANSI mode. * Users can get NULLs instead of unreasonable results if overflow occurs when ANSI mode is off. For example ``` > SELECT sum(col) FROM VALUES (9223372036854775807L), (1L) AS tab(col); -9223372036854775808 > SELECT try_sum(col) FROM VALUES (9223372036854775807L), (1L) AS tab(col); NULL ``` ### Does this PR introduce _any_ user-facing change? Yes, a new SQL function: try_sum which is identical to the function `sum`, except that it returns `NULL` result instead of throwing an exception on integral/decimal value overflow. ### How was this patch tested? UT Closes #35848 from gengliangwang/trySum2. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit c34fee4d20da9ab5b4f1f26185fc1a9a83b99d05) Signed-off-by: Gengliang Wang --- docs/sql-ref-ansi-compliance.md | 1 + .../catalyst/analysis/FunctionRegistry.scala | 1 + .../catalyst/expressions/aggregate/Sum.scala | 129 ++++++++++++++---- .../sql-functions/sql-expression-schema.md | 3 +- .../sql-tests/inputs/ansi/try_aggregates.sql | 1 + .../sql-tests/inputs/try_aggregates.sql | 13 ++ .../results/ansi/try_aggregates.sql.out | 82 +++++++++++ .../sql-tests/results/try_aggregates.sql.out | 82 +++++++++++ 8 files changed, 282 insertions(+), 30 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/ansi/try_aggregates.sql create mode 100644 sql/core/src/test/resources/sql-tests/inputs/try_aggregates.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 0769089f4dabe..f296f2b4a820c 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -316,6 +316,7 @@ When ANSI mode is on, it throws exceptions for invalid operations. You can use t - `try_subtract`: identical to the add operator `-`, except that it returns `NULL` result instead of throwing an exception on integral value overflow. - `try_multiply`: identical to the add operator `*`, except that it returns `NULL` result instead of throwing an exception on integral value overflow. - `try_divide`: identical to the division operator `/`, except that it returns `NULL` result instead of throwing an exception on dividing 0. + - `try_sum`: identical to the function `sum`, except that it returns `NULL` result instead of throwing an exception on integral/decimal value overflow. - `try_element_at`: identical to the function `element_at`, except that it returns `NULL` result instead of throwing an exception on array's index out of bound or map's key not found. ### SQL Keywords (optional, disabled by default) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index e5954c8f26942..a37d4b2dab3c4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -452,6 +452,7 @@ object FunctionRegistry { expression[TrySubtract]("try_subtract"), expression[TryMultiply]("try_multiply"), expression[TryElementAt]("try_element_at"), + expression[TrySum]("try_sum"), // aggregate functions expression[HyperLogLogPlusPlus]("approx_count_distinct"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala index ec7479af96af2..5d8fd702ba423 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala @@ -26,27 +26,11 @@ import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ -@ExpressionDescription( - usage = "_FUNC_(expr) - Returns the sum calculated from values of a group.", - examples = """ - Examples: - > SELECT _FUNC_(col) FROM VALUES (5), (10), (15) AS tab(col); - 30 - > SELECT _FUNC_(col) FROM VALUES (NULL), (10), (15) AS tab(col); - 25 - > SELECT _FUNC_(col) FROM VALUES (NULL), (NULL) AS tab(col); - NULL - """, - group = "agg_funcs", - since = "1.0.0") -case class Sum( - child: Expression, - failOnError: Boolean = SQLConf.get.ansiEnabled) - extends DeclarativeAggregate +abstract class SumBase(child: Expression) extends DeclarativeAggregate with ImplicitCastInputTypes with UnaryLike[Expression] { - def this(child: Expression) = this(child, failOnError = SQLConf.get.ansiEnabled) + def failOnError: Boolean override def nullable: Boolean = true @@ -57,7 +41,7 @@ case class Sum( Seq(TypeCollection(NumericType, YearMonthIntervalType, DayTimeIntervalType)) override def checkInputDataTypes(): TypeCheckResult = - TypeUtils.checkForAnsiIntervalOrNumericType(child.dataType, "sum") + TypeUtils.checkForAnsiIntervalOrNumericType(child.dataType, prettyName) final override val nodePatterns: Seq[TreePattern] = Seq(SUM) @@ -86,16 +70,17 @@ case class Sum( case _ => Seq(Literal(null, resultType)) } - override lazy val updateExpressions: Seq[Expression] = { + protected def getUpdateExpressions: Seq[Expression] = { resultType match { case _: DecimalType => // For decimal type, the initial value of `sum` is 0. We need to keep `sum` unchanged if // the input is null, as SUM function ignores null input. The `sum` can only be null if // overflow happens under non-ansi mode. val sumExpr = if (child.nullable) { - If(child.isNull, sum, sum + KnownNotNull(child).cast(resultType)) + If(child.isNull, sum, + Add(sum, KnownNotNull(child).cast(resultType), failOnError = failOnError)) } else { - sum + child.cast(resultType) + Add(sum, child.cast(resultType), failOnError = failOnError) } // The buffer becomes non-empty after seeing the first not-null input. val isEmptyExpr = if (child.nullable) { @@ -110,9 +95,10 @@ case class Sum( // in case the input is nullable. The `sum` can only be null if there is no value, as // non-decimal type can produce overflowed value under non-ansi mode. if (child.nullable) { - Seq(coalesce(coalesce(sum, zero) + child.cast(resultType), sum)) + Seq(coalesce(Add(coalesce(sum, zero), child.cast(resultType), failOnError = failOnError), + sum)) } else { - Seq(coalesce(sum, zero) + child.cast(resultType)) + Seq(Add(coalesce(sum, zero), child.cast(resultType), failOnError = failOnError)) } } } @@ -129,7 +115,7 @@ case class Sum( * isEmpty: Set to false if either one of the left or right is set to false. This * means we have seen atleast a value that was not null. */ - override lazy val mergeExpressions: Seq[Expression] = { + protected def getMergeExpressions: Seq[Expression] = { resultType match { case _: DecimalType => val bufferOverflow = !isEmpty.left && sum.left.isNull @@ -143,7 +129,9 @@ case class Sum( // overflow happens. KnownNotNull(sum.left) + KnownNotNull(sum.right)), isEmpty.left && isEmpty.right) - case _ => Seq(coalesce(coalesce(sum.left, zero) + sum.right, sum.left)) + case _ => Seq(coalesce( + Add(coalesce(sum.left, zero), sum.right, failOnError = failOnError), + sum.left)) } } @@ -154,15 +142,98 @@ case class Sum( * So now, if ansi is enabled, then throw exception, if not then return null. * If sum is not null, then return the sum. */ - override lazy val evaluateExpression: Expression = resultType match { + protected def getEvaluateExpression: Expression = resultType match { case d: DecimalType => If(isEmpty, Literal.create(null, resultType), CheckOverflowInSum(sum, d, !failOnError)) case _ => sum } - override protected def withNewChildInternal(newChild: Expression): Sum = copy(child = newChild) - // The flag `failOnError` won't be shown in the `toString` or `toAggString` methods override def flatArguments: Iterator[Any] = Iterator(child) } + +@ExpressionDescription( + usage = "_FUNC_(expr) - Returns the sum calculated from values of a group.", + examples = """ + Examples: + > SELECT _FUNC_(col) FROM VALUES (5), (10), (15) AS tab(col); + 30 + > SELECT _FUNC_(col) FROM VALUES (NULL), (10), (15) AS tab(col); + 25 + > SELECT _FUNC_(col) FROM VALUES (NULL), (NULL) AS tab(col); + NULL + """, + group = "agg_funcs", + since = "1.0.0") +case class Sum( + child: Expression, + failOnError: Boolean = SQLConf.get.ansiEnabled) + extends SumBase(child) { + def this(child: Expression) = this(child, failOnError = SQLConf.get.ansiEnabled) + + override protected def withNewChildInternal(newChild: Expression): Sum = copy(child = newChild) + + override lazy val updateExpressions: Seq[Expression] = getUpdateExpressions + + override lazy val mergeExpressions: Seq[Expression] = getMergeExpressions + + override lazy val evaluateExpression: Expression = getEvaluateExpression +} + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(expr) - Returns the sum calculated from values of a group and the result is null on overflow.", + examples = """ + Examples: + > SELECT _FUNC_(col) FROM VALUES (5), (10), (15) AS tab(col); + 30 + > SELECT _FUNC_(col) FROM VALUES (NULL), (10), (15) AS tab(col); + 25 + > SELECT _FUNC_(col) FROM VALUES (NULL), (NULL) AS tab(col); + NULL + > SELECT _FUNC_(col) FROM VALUES (9223372036854775807L), (1L) AS tab(col); + NULL + """, + since = "3.3.0", + group = "agg_funcs") +// scalastyle:on line.size.limit +case class TrySum(child: Expression) extends SumBase(child) { + + override def failOnError: Boolean = dataType match { + // Double type won't fail, thus the failOnError is always false + // For decimal type, it returns NULL on overflow. It behaves the same as TrySum when + // `failOnError` is false. + case _: DoubleType | _: DecimalType => false + case _ => true + } + + override lazy val updateExpressions: Seq[Expression] = + if (failOnError) { + val expressions = getUpdateExpressions + // If the length of updateExpressions is larger than 1, the tail expressions are for + // tracking whether the input is empty, which doesn't need `TryEval` execution. + Seq(TryEval(expressions.head)) ++ expressions.tail + } else { + getUpdateExpressions + } + + override lazy val mergeExpressions: Seq[Expression] = + if (failOnError) { + getMergeExpressions.map(TryEval) + } else { + getMergeExpressions + } + + override lazy val evaluateExpression: Expression = + if (failOnError) { + TryEval(getEvaluateExpression) + } else { + getEvaluateExpression + } + + override protected def withNewChildInternal(newChild: Expression): Expression = + copy(child = newChild) + + override def prettyName: String = "try_sum" +} diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 386dd1fe0ae17..1afba46e16213 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,6 +1,6 @@ ## Summary - - Number of queries: 382 + - Number of queries: 383 - Number of expressions that missing example: 12 - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint ## Schema of Built-in Functions @@ -376,6 +376,7 @@ | org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | stddev | SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | stddev_samp | SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.Sum | sum | SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.TrySum | try_sum | SELECT try_sum(col) FROM VALUES (5), (10), (15) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.VariancePop | var_pop | SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp | var_samp | SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp | variance | SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/try_aggregates.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/try_aggregates.sql new file mode 100644 index 0000000000000..f5b44d2b2aa76 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/try_aggregates.sql @@ -0,0 +1 @@ +--IMPORT try_aggregates.sql diff --git a/sql/core/src/test/resources/sql-tests/inputs/try_aggregates.sql b/sql/core/src/test/resources/sql-tests/inputs/try_aggregates.sql new file mode 100644 index 0000000000000..ffa8eefe82831 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/try_aggregates.sql @@ -0,0 +1,13 @@ +-- try_sum +SELECT try_sum(col) FROM VALUES (5), (10), (15) AS tab(col); +SELECT try_sum(col) FROM VALUES (5.0), (10.0), (15.0) AS tab(col); +SELECT try_sum(col) FROM VALUES (NULL), (10), (15) AS tab(col); +SELECT try_sum(col) FROM VALUES (NULL), (NULL) AS tab(col); +SELECT try_sum(col) FROM VALUES (9223372036854775807L), (1L) AS tab(col); +-- test overflow in Decimal(38, 0) +SELECT try_sum(col) FROM VALUES (98765432109876543210987654321098765432BD), (98765432109876543210987654321098765432BD) AS tab(col); + +SELECT try_sum(col) FROM VALUES (interval '1 months'), (interval '1 months') AS tab(col); +SELECT try_sum(col) FROM VALUES (interval '2147483647 months'), (interval '1 months') AS tab(col); +SELECT try_sum(col) FROM VALUES (interval '1 seconds'), (interval '1 seconds') AS tab(col); +SELECT try_sum(col) FROM VALUES (interval '106751991 DAYS'), (interval '1 DAYS') AS tab(col); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out new file mode 100644 index 0000000000000..7ae217ad7582b --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out @@ -0,0 +1,82 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 10 + + +-- !query +SELECT try_sum(col) FROM VALUES (5), (10), (15) AS tab(col) +-- !query schema +struct +-- !query output +30 + + +-- !query +SELECT try_sum(col) FROM VALUES (5.0), (10.0), (15.0) AS tab(col) +-- !query schema +struct +-- !query output +30.0 + + +-- !query +SELECT try_sum(col) FROM VALUES (NULL), (10), (15) AS tab(col) +-- !query schema +struct +-- !query output +25 + + +-- !query +SELECT try_sum(col) FROM VALUES (NULL), (NULL) AS tab(col) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT try_sum(col) FROM VALUES (9223372036854775807L), (1L) AS tab(col) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT try_sum(col) FROM VALUES (98765432109876543210987654321098765432BD), (98765432109876543210987654321098765432BD) AS tab(col) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT try_sum(col) FROM VALUES (interval '1 months'), (interval '1 months') AS tab(col) +-- !query schema +struct +-- !query output +0-2 + + +-- !query +SELECT try_sum(col) FROM VALUES (interval '2147483647 months'), (interval '1 months') AS tab(col) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT try_sum(col) FROM VALUES (interval '1 seconds'), (interval '1 seconds') AS tab(col) +-- !query schema +struct +-- !query output +0 00:00:02.000000000 + + +-- !query +SELECT try_sum(col) FROM VALUES (interval '106751991 DAYS'), (interval '1 DAYS') AS tab(col) +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out b/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out new file mode 100644 index 0000000000000..7ae217ad7582b --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out @@ -0,0 +1,82 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 10 + + +-- !query +SELECT try_sum(col) FROM VALUES (5), (10), (15) AS tab(col) +-- !query schema +struct +-- !query output +30 + + +-- !query +SELECT try_sum(col) FROM VALUES (5.0), (10.0), (15.0) AS tab(col) +-- !query schema +struct +-- !query output +30.0 + + +-- !query +SELECT try_sum(col) FROM VALUES (NULL), (10), (15) AS tab(col) +-- !query schema +struct +-- !query output +25 + + +-- !query +SELECT try_sum(col) FROM VALUES (NULL), (NULL) AS tab(col) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT try_sum(col) FROM VALUES (9223372036854775807L), (1L) AS tab(col) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT try_sum(col) FROM VALUES (98765432109876543210987654321098765432BD), (98765432109876543210987654321098765432BD) AS tab(col) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT try_sum(col) FROM VALUES (interval '1 months'), (interval '1 months') AS tab(col) +-- !query schema +struct +-- !query output +0-2 + + +-- !query +SELECT try_sum(col) FROM VALUES (interval '2147483647 months'), (interval '1 months') AS tab(col) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT try_sum(col) FROM VALUES (interval '1 seconds'), (interval '1 seconds') AS tab(col) +-- !query schema +struct +-- !query output +0 00:00:02.000000000 + + +-- !query +SELECT try_sum(col) FROM VALUES (interval '106751991 DAYS'), (interval '1 DAYS') AS tab(col) +-- !query schema +struct +-- !query output +NULL From 03114717287e28384aaf4636ae0a9e005e453632 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 21 Mar 2022 17:22:07 +0900 Subject: [PATCH 019/535] [SPARK-38609][PYTHON] Add `PYSPARK_PANDAS_USAGE_LOGGER` environment variable as an alias of `KOALAS_USAGE_LOGGER` ### What changes were proposed in this pull request? This PR proposes to add a new alias `PYSPARK_PANDAS_USAGE_LOGGER` for `KOALAS_USAGE_LOGGER`. ### Why are the changes needed? To avoid legacy name, Koalas. ### Does this PR introduce _any_ user-facing change? Yes, uses can set `PYSPARK_PANDAS_USAGE_LOGGER` environment variable for using a usage logger. ### How was this patch tested? Manual positive tests: ```bash PYSPARK_PANDAS_USAGE_LOGGER=pyspark.pandas.usage_logging.usage_logger ./bin/pyspark ``` ```python >>> import pyspark.pandas as ps >>> df = ps.range(1) >>> import logging >>> import sys >>> root = logging.getLogger() >>> root.setLevel(logging.INFO) >>> handler = logging.StreamHandler(sys.stdout) >>> handler.setLevel(logging.INFO) >>> formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') >>> handler.setFormatter(formatter) >>> root.addHandler(handler) >>> df.id.sort_values() ... INFO:pyspark.pandas.usage_logger:A function `DataFrame.__getattr__(self, key)` was successfully finished after 0.136 ms. .... ``` ```bash KOALAS_USAGE_LOGGER=pyspark.pandas.usage_logging.usage_logger ./bin/pyspark ``` ```python >>> import pyspark.pandas as ps >>> df = ps.range(1) >>> import logging >>> import sys >>> root = logging.getLogger() >>> root.setLevel(logging.INFO) >>> handler = logging.StreamHandler(sys.stdout) >>> handler.setLevel(logging.INFO) >>> formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') >>> handler.setFormatter(formatter) >>> root.addHandler(handler) >>> df.id.sort_values() ... 2022-03-21 11:51:21,039 - pyspark.pandas.usage_logger - INFO - A function `DataFrame.__getattr__(self, key)` was successfully finished after 0.107 ms. ... ``` Manual negative tests: ```bash PYSPARK_PANDAS_USAGE_LOGGER=abc ./bin/pyspark ``` ```python >>> import pyspark.pandas as ps WARNING:pyspark.pandas.usage_logger:Tried to attach usage logger `abc`, but an exception was raised: module 'abc' has no attribute 'get_logger' ``` ```bash KOALAS_USAGE_LOGGER=abc ./bin/pyspark ``` ```python >>> import pyspark.pandas as ps WARNING:pyspark.pandas.usage_logger:Tried to attach usage logger `abc`, but an exception was raised: module 'abc' has no attribute 'get_logger' ``` Closes #35917 from HyukjinKwon/SPARK-38609. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit a627dac7d649412b2f0e823000a3b4d40b7975d7) Signed-off-by: Hyukjin Kwon --- python/pyspark/pandas/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/pandas/__init__.py b/python/pyspark/pandas/__init__.py index df84c118db5a6..a11c496e2ca88 100644 --- a/python/pyspark/pandas/__init__.py +++ b/python/pyspark/pandas/__init__.py @@ -101,8 +101,8 @@ def _auto_patch_spark() -> None: import os import logging - # Attach a usage logger. - logger_module = os.getenv("KOALAS_USAGE_LOGGER", "") + # Attach a usage logger. 'KOALAS_USAGE_LOGGER' is legacy, and it's for compatibility. + logger_module = os.getenv("PYSPARK_PANDAS_USAGE_LOGGER", os.getenv("KOALAS_USAGE_LOGGER", "")) if logger_module != "": try: from pyspark.pandas import usage_logging From bfaf9903dc08370d2deb3d116315be9650ff9026 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Mon, 21 Mar 2022 21:02:39 +0900 Subject: [PATCH 020/535] [SPARK-38612][PYTHON] Fix Inline type hint for duplicated.keep ### What changes were proposed in this pull request? Fix Inline type hint for `duplicated.keep` ### Why are the changes needed? `keep` can be "first", "last" and False in pandas ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed Closes #35920 from Yikun/SPARK-38612. Authored-by: Yikun Jiang Signed-off-by: Hyukjin Kwon (cherry picked from commit acb50d95a4952dea1cbbc27d4ddcc0b3432a13cf) Signed-off-by: Hyukjin Kwon --- python/pyspark/pandas/frame.py | 6 +++--- python/pyspark/pandas/series.py | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py index 41a0dde47a51b..b355708e97575 100644 --- a/python/pyspark/pandas/frame.py +++ b/python/pyspark/pandas/frame.py @@ -4307,7 +4307,7 @@ def op(psser: ps.Series) -> Union[ps.Series, Column]: def _mark_duplicates( self, subset: Optional[Union[Name, List[Name]]] = None, - keep: str = "first", + keep: Union[bool, str] = "first", ) -> Tuple[SparkDataFrame, str]: if subset is None: subset_list = self._internal.column_labels @@ -4350,7 +4350,7 @@ def _mark_duplicates( def duplicated( self, subset: Optional[Union[Name, List[Name]]] = None, - keep: str = "first", + keep: Union[bool, str] = "first", ) -> "Series": """ Return boolean Series denoting duplicate rows, optionally only considering certain columns. @@ -9037,7 +9037,7 @@ def describe(self, percentiles: Optional[List[float]] = None) -> "DataFrame": def drop_duplicates( self, subset: Optional[Union[Name, List[Name]]] = None, - keep: str = "first", + keep: Union[bool, str] = "first", inplace: bool = False, ) -> Optional["DataFrame"]: """ diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py index 35dc5acf21bee..9eb57ad95af04 100644 --- a/python/pyspark/pandas/series.py +++ b/python/pyspark/pandas/series.py @@ -1647,7 +1647,9 @@ def to_list(self) -> List: tolist = to_list - def drop_duplicates(self, keep: str = "first", inplace: bool = False) -> Optional["Series"]: + def drop_duplicates( + self, keep: Union[bool, str] = "first", inplace: bool = False + ) -> Optional["Series"]: """ Return Series with duplicate values removed. From 8a6d1444d6a55783b92e9da00954b3766aef8090 Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Mon, 21 Mar 2022 08:51:04 -0500 Subject: [PATCH 021/535] [SPARK-38194][YARN][MESOS][K8S][3.3] Make memory overhead factor configurable ### What changes were proposed in this pull request? Add a new config to set the memory overhead factor for drivers and executors. Currently the memory overhead is hard coded to 10% (except in Kubernetes), and the only way to set it higher is to set it to a specific memory amount. ### Why are the changes needed? In dynamic environments where different people or use cases need different memory requirements, it would be helpful to set a higher memory overhead factor instead of having to set a higher specific memory overhead value. The kubernetes resource manager already makes this configurable. This makes it configurable across the board. ### Does this PR introduce _any_ user-facing change? No change to default behavior, just adds a new config users can change. ### How was this patch tested? New UT to check the memory calculation. Closes #35912 from Kimahriman/memory-overhead-factor-3.3. Authored-by: Adam Binford Signed-off-by: Thomas Graves --- .../scala/org/apache/spark/SparkConf.scala | 4 +- .../spark/internal/config/package.scala | 28 ++++++ docs/configuration.md | 30 +++++- docs/running-on-kubernetes.md | 9 -- .../k8s/features/BasicDriverFeatureStep.scala | 19 +++- .../features/BasicExecutorFeatureStep.scala | 7 +- .../BasicDriverFeatureStepSuite.scala | 96 ++++++++++++++++++- .../BasicExecutorFeatureStepSuite.scala | 54 +++++++++++ .../deploy/rest/mesos/MesosRestServer.scala | 5 +- .../cluster/mesos/MesosSchedulerUtils.scala | 9 +- .../rest/mesos/MesosRestServerSuite.scala | 8 +- .../org/apache/spark/deploy/yarn/Client.scala | 14 ++- .../spark/deploy/yarn/YarnAllocator.scala | 5 +- .../deploy/yarn/YarnSparkHadoopUtil.scala | 5 +- .../deploy/yarn/YarnAllocatorSuite.scala | 29 ++++++ 15 files changed, 287 insertions(+), 35 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index 5f37a1abb1909..cf121749b7348 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -636,7 +636,9 @@ private[spark] object SparkConf extends Logging { DeprecatedConfig("spark.blacklist.killBlacklistedExecutors", "3.1.0", "Please use spark.excludeOnFailure.killExcludedExecutors"), DeprecatedConfig("spark.yarn.blacklist.executor.launch.blacklisting.enabled", "3.1.0", - "Please use spark.yarn.executor.launch.excludeOnFailure.enabled") + "Please use spark.yarn.executor.launch.excludeOnFailure.enabled"), + DeprecatedConfig("spark.kubernetes.memoryOverheadFactor", "3.3.0", + "Please use spark.driver.memoryOverheadFactor and spark.executor.memoryOverheadFactor") ) Map(configs.map { cfg => (cfg.key -> cfg) } : _*) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index dbec61a1fdb76..ffe4501248f43 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -105,6 +105,22 @@ package object config { .bytesConf(ByteUnit.MiB) .createOptional + private[spark] val DRIVER_MEMORY_OVERHEAD_FACTOR = + ConfigBuilder("spark.driver.memoryOverheadFactor") + .doc("Fraction of driver memory to be allocated as additional non-heap memory per driver " + + "process in cluster mode. This is memory that accounts for things like VM overheads, " + + "interned strings, other native overheads, etc. This tends to grow with the container " + + "size. This value defaults to 0.10 except for Kubernetes non-JVM jobs, which defaults to " + + "0.40. This is done as non-JVM tasks need more non-JVM heap space and such tasks " + + "commonly fail with \"Memory Overhead Exceeded\" errors. This preempts this error " + + "with a higher default. This value is ignored if spark.driver.memoryOverhead is set " + + "directly.") + .version("3.3.0") + .doubleConf + .checkValue(factor => factor > 0, + "Ensure that memory overhead is a double greater than 0") + .createWithDefault(0.1) + private[spark] val DRIVER_LOG_DFS_DIR = ConfigBuilder("spark.driver.log.dfsDir").version("3.0.0").stringConf.createOptional @@ -315,6 +331,18 @@ package object config { .bytesConf(ByteUnit.MiB) .createOptional + private[spark] val EXECUTOR_MEMORY_OVERHEAD_FACTOR = + ConfigBuilder("spark.executor.memoryOverheadFactor") + .doc("Fraction of executor memory to be allocated as additional non-heap memory per " + + "executor process. This is memory that accounts for things like VM overheads, " + + "interned strings, other native overheads, etc. This tends to grow with the container " + + "size. This value is ignored if spark.executor.memoryOverhead is set directly.") + .version("3.3.0") + .doubleConf + .checkValue(factor => factor > 0, + "Ensure that memory overhead is a double greater than 0") + .createWithDefault(0.1) + private[spark] val CORES_MAX = ConfigBuilder("spark.cores.max") .doc("When running on a standalone deploy cluster or a Mesos cluster in coarse-grained " + "sharing mode, the maximum amount of CPU cores to request for the application from across " + diff --git a/docs/configuration.md b/docs/configuration.md index ae3f422f34b3a..a2e6797b55e2f 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -183,7 +183,7 @@ of the most common options to set are: spark.driver.memoryOverhead - driverMemory * 0.10, with minimum of 384 + driverMemory * spark.driver.memoryOverheadFactor, with minimum of 384 Amount of non-heap memory to be allocated per driver process in cluster mode, in MiB unless otherwise specified. This is memory that accounts for things like VM overheads, interned strings, @@ -198,6 +198,21 @@ of the most common options to set are: 2.3.0 + + spark.driver.memoryOverheadFactor + 0.10 + + Fraction of driver memory to be allocated as additional non-heap memory per driver process in cluster mode. + This is memory that accounts for things like VM overheads, interned strings, + other native overheads, etc. This tends to grow with the container size. + This value defaults to 0.10 except for Kubernetes non-JVM jobs, which defaults to + 0.40. This is done as non-JVM tasks need more non-JVM heap space and such tasks + commonly fail with "Memory Overhead Exceeded" errors. This preempts this error + with a higher default. + This value is ignored if spark.driver.memoryOverhead is set directly. + + 3.3.0 + spark.driver.resource.{resourceName}.amount 0 @@ -272,7 +287,7 @@ of the most common options to set are: spark.executor.memoryOverhead - executorMemory * 0.10, with minimum of 384 + executorMemory * spark.executor.memoryOverheadFactor, with minimum of 384 Amount of additional memory to be allocated per executor process, in MiB unless otherwise specified. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. @@ -287,6 +302,17 @@ of the most common options to set are: 2.3.0 + + spark.executor.memoryOverheadFactor + 0.10 + + Fraction of executor memory to be allocated as additional non-heap memory per executor process. + This is memory that accounts for things like VM overheads, interned strings, + other native overheads, etc. This tends to grow with the container size. + This value is ignored if spark.executor.memoryOverhead is set directly. + + 3.3.0 + spark.executor.resource.{resourceName}.amount 0 diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index a5da80a68d32d..de37e22cc78d7 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -1137,15 +1137,6 @@ See the [configuration page](configuration.html) for information on Spark config 3.0.0 - - spark.kubernetes.memoryOverheadFactor - 0.1 - - This sets the Memory Overhead Factor that will allocate memory to non-JVM memory, which includes off-heap memory allocations, non-JVM tasks, various systems processes, and tmpfs-based local directories when spark.kubernetes.local.dirs.tmpfs is true. For JVM-based jobs this value will default to 0.10 and 0.40 for non-JVM jobs. - This is done as non-JVM tasks need more non-JVM heap space and such tasks commonly fail with "Memory Overhead Exceeded" errors. This preempts this error with a higher default. - - 2.4.0 - spark.kubernetes.pyspark.pythonVersion "3" diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala index 3b2b5612566a1..413f5bca9dfca 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala @@ -54,9 +54,11 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf) // Memory settings private val driverMemoryMiB = conf.get(DRIVER_MEMORY) - // The memory overhead factor to use. If the user has not set it, then use a different - // value for non-JVM apps. This value is propagated to executors. - private val overheadFactor = + // The default memory overhead factor to use, derived from the deprecated + // `spark.kubernetes.memoryOverheadFactor` config or the default overhead values. + // If the user has not set it, then use a different default for non-JVM apps. This value is + // propagated to executors and used if the executor overhead factor is not set explicitly. + private val defaultOverheadFactor = if (conf.mainAppResource.isInstanceOf[NonJVMResource]) { if (conf.contains(MEMORY_OVERHEAD_FACTOR)) { conf.get(MEMORY_OVERHEAD_FACTOR) @@ -67,9 +69,16 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf) conf.get(MEMORY_OVERHEAD_FACTOR) } + // Prefer the driver memory overhead factor if set explicitly + private val memoryOverheadFactor = if (conf.contains(DRIVER_MEMORY_OVERHEAD_FACTOR)) { + conf.get(DRIVER_MEMORY_OVERHEAD_FACTOR) + } else { + defaultOverheadFactor + } + private val memoryOverheadMiB = conf .get(DRIVER_MEMORY_OVERHEAD) - .getOrElse(math.max((overheadFactor * driverMemoryMiB).toInt, + .getOrElse(math.max((memoryOverheadFactor * driverMemoryMiB).toInt, ResourceProfile.MEMORY_OVERHEAD_MIN_MIB)) private val driverMemoryWithOverheadMiB = driverMemoryMiB + memoryOverheadMiB @@ -164,7 +173,7 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf) KUBERNETES_DRIVER_POD_NAME.key -> driverPodName, "spark.app.id" -> conf.appId, KUBERNETES_DRIVER_SUBMIT_CHECK.key -> "true", - MEMORY_OVERHEAD_FACTOR.key -> overheadFactor.toString) + MEMORY_OVERHEAD_FACTOR.key -> defaultOverheadFactor.toString) // try upload local, resolvable files to a hadoop compatible file system Seq(JARS, FILES, ARCHIVES, SUBMIT_PYTHON_FILES).foreach { key => val uris = conf.get(key).filter(uri => KubernetesUtils.isLocalAndResolvable(uri)) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala index a7625194bd6e6..15c69ad487f5f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala @@ -59,11 +59,16 @@ private[spark] class BasicExecutorFeatureStep( private val isDefaultProfile = resourceProfile.id == ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID private val isPythonApp = kubernetesConf.get(APP_RESOURCE_TYPE) == Some(APP_RESOURCE_TYPE_PYTHON) private val disableConfigMap = kubernetesConf.get(KUBERNETES_EXECUTOR_DISABLE_CONFIGMAP) + private val memoryOverheadFactor = if (kubernetesConf.contains(EXECUTOR_MEMORY_OVERHEAD_FACTOR)) { + kubernetesConf.get(EXECUTOR_MEMORY_OVERHEAD_FACTOR) + } else { + kubernetesConf.get(MEMORY_OVERHEAD_FACTOR) + } val execResources = ResourceProfile.getResourcesForClusterManager( resourceProfile.id, resourceProfile.executorResources, - kubernetesConf.get(MEMORY_OVERHEAD_FACTOR), + memoryOverheadFactor, kubernetesConf.sparkConf, isPythonApp, Map.empty) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala index bf7fbcc912f54..9a3b06af7f0b9 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala @@ -192,13 +192,46 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite { // Memory overhead tests. Tuples are: // test name, main resource, overhead factor, expected factor + val driverDefault = DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get + val oldConfigDefault = MEMORY_OVERHEAD_FACTOR.defaultValue.get + val nonJvm = NON_JVM_MEMORY_OVERHEAD_FACTOR Seq( - ("java", JavaMainAppResource(None), None, MEMORY_OVERHEAD_FACTOR.defaultValue.get), - ("python default", PythonMainAppResource(null), None, NON_JVM_MEMORY_OVERHEAD_FACTOR), + ("java", JavaMainAppResource(None), None, driverDefault, oldConfigDefault), + ("python default", PythonMainAppResource(null), None, nonJvm, nonJvm), + ("python w/ override", PythonMainAppResource(null), Some(0.9d), 0.9d, nonJvm), + ("r default", RMainAppResource(null), None, nonJvm, nonJvm) + ).foreach { case (name, resource, factor, expectedFactor, expectedPropFactor) => + test(s"memory overhead factor new config: $name") { + // Choose a driver memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB + val driverMem = + ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2 + + // main app resource, overhead factor + val sparkConf = new SparkConf(false) + .set(CONTAINER_IMAGE, "spark-driver:latest") + .set(DRIVER_MEMORY.key, s"${driverMem.toInt}m") + factor.foreach { value => sparkConf.set(DRIVER_MEMORY_OVERHEAD_FACTOR, value) } + val conf = KubernetesTestConf.createDriverConf( + sparkConf = sparkConf, + mainAppResource = resource) + val step = new BasicDriverFeatureStep(conf) + val pod = step.configurePod(SparkPod.initialPod()) + val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory")) + val expected = (driverMem + driverMem * expectedFactor).toInt + assert(mem === s"${expected}Mi") + + val systemProperties = step.getAdditionalPodSystemProperties() + assert(systemProperties(MEMORY_OVERHEAD_FACTOR.key) === expectedPropFactor.toString) + } + } + + Seq( + ("java", JavaMainAppResource(None), None, driverDefault), + ("python default", PythonMainAppResource(null), None, nonJvm), ("python w/ override", PythonMainAppResource(null), Some(0.9d), 0.9d), - ("r default", RMainAppResource(null), None, NON_JVM_MEMORY_OVERHEAD_FACTOR) + ("r default", RMainAppResource(null), None, nonJvm) ).foreach { case (name, resource, factor, expectedFactor) => - test(s"memory overhead factor: $name") { + test(s"memory overhead factor old config: $name") { // Choose a driver memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB val driverMem = ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2 @@ -222,6 +255,61 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite { } } + test(s"SPARK-38194: memory overhead factor precendence") { + // Choose a driver memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB + val driverMem = + ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2 + + // main app resource, overhead factor + val sparkConf = new SparkConf(false) + .set(CONTAINER_IMAGE, "spark-driver:latest") + .set(DRIVER_MEMORY.key, s"${driverMem.toInt}m") + + // New config should take precedence + val expectedFactor = 0.2 + val oldFactor = 0.3 + sparkConf.set(DRIVER_MEMORY_OVERHEAD_FACTOR, expectedFactor) + sparkConf.set(MEMORY_OVERHEAD_FACTOR, oldFactor) + + val conf = KubernetesTestConf.createDriverConf( + sparkConf = sparkConf) + val step = new BasicDriverFeatureStep(conf) + val pod = step.configurePod(SparkPod.initialPod()) + val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory")) + val expected = (driverMem + driverMem * expectedFactor).toInt + assert(mem === s"${expected}Mi") + + // The old config should be passed as a system property for use by the executor + val systemProperties = step.getAdditionalPodSystemProperties() + assert(systemProperties(MEMORY_OVERHEAD_FACTOR.key) === oldFactor.toString) + } + + test(s"SPARK-38194: old memory factor settings is applied if new one isn't given") { + // Choose a driver memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB + val driverMem = + ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / DRIVER_MEMORY_OVERHEAD_FACTOR.defaultValue.get * 2 + + // main app resource, overhead factor + val sparkConf = new SparkConf(false) + .set(CONTAINER_IMAGE, "spark-driver:latest") + .set(DRIVER_MEMORY.key, s"${driverMem.toInt}m") + + // Old config still works if new config isn't given + val expectedFactor = 0.3 + sparkConf.set(MEMORY_OVERHEAD_FACTOR, expectedFactor) + + val conf = KubernetesTestConf.createDriverConf( + sparkConf = sparkConf) + val step = new BasicDriverFeatureStep(conf) + val pod = step.configurePod(SparkPod.initialPod()) + val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory")) + val expected = (driverMem + driverMem * expectedFactor).toInt + assert(mem === s"${expected}Mi") + + val systemProperties = step.getAdditionalPodSystemProperties() + assert(systemProperties(MEMORY_OVERHEAD_FACTOR.key) === expectedFactor.toString) + } + test("SPARK-35493: make spark.blockManager.port be able to be fallen back to in driver pod") { val initPod = SparkPod.initialPod() val sparkConf = new SparkConf() diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala index f5f2712481604..731a9b77d2059 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala @@ -441,6 +441,60 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter { )) } + test(s"SPARK-38194: memory overhead factor precendence") { + // Choose an executor memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB + val defaultFactor = EXECUTOR_MEMORY_OVERHEAD_FACTOR.defaultValue.get + val executorMem = ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / defaultFactor * 2 + + // main app resource, overhead factor + val sparkConf = new SparkConf(false) + .set(CONTAINER_IMAGE, "spark-driver:latest") + .set(EXECUTOR_MEMORY.key, s"${executorMem.toInt}m") + + // New config should take precedence + val expectedFactor = 0.2 + sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, expectedFactor) + sparkConf.set(MEMORY_OVERHEAD_FACTOR, 0.3) + + val conf = KubernetesTestConf.createExecutorConf( + sparkConf = sparkConf) + ResourceProfile.clearDefaultProfile() + val resourceProfile = ResourceProfile.getOrCreateDefaultProfile(sparkConf) + val step = new BasicExecutorFeatureStep(conf, new SecurityManager(baseConf), + resourceProfile) + val pod = step.configurePod(SparkPod.initialPod()) + val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory")) + val expected = (executorMem + executorMem * expectedFactor).toInt + assert(mem === s"${expected}Mi") + } + + test(s"SPARK-38194: old memory factor settings is applied if new one isn't given") { + // Choose an executor memory where the default memory overhead is > MEMORY_OVERHEAD_MIN_MIB + val defaultFactor = EXECUTOR_MEMORY_OVERHEAD_FACTOR.defaultValue.get + val executorMem = ResourceProfile.MEMORY_OVERHEAD_MIN_MIB / defaultFactor * 2 + + // main app resource, overhead factor + val sparkConf = new SparkConf(false) + .set(CONTAINER_IMAGE, "spark-driver:latest") + .set(EXECUTOR_MEMORY.key, s"${executorMem.toInt}m") + + // New config should take precedence + val expectedFactor = 0.3 + sparkConf.set(MEMORY_OVERHEAD_FACTOR, expectedFactor) + + val conf = KubernetesTestConf.createExecutorConf( + sparkConf = sparkConf) + ResourceProfile.clearDefaultProfile() + val resourceProfile = ResourceProfile.getOrCreateDefaultProfile(sparkConf) + val step = new BasicExecutorFeatureStep(conf, new SecurityManager(baseConf), + resourceProfile) + val pod = step.configurePod(SparkPod.initialPod()) + val mem = amountAndFormat(pod.container.getResources.getRequests.get("memory")) + val expected = (executorMem + executorMem * expectedFactor).toInt + assert(mem === s"${expected}Mi") + } + + // There is always exactly one controller reference, and it points to the driver pod. private def checkOwnerReferences(executor: Pod, driverPodUid: String): Unit = { assert(executor.getMetadata.getOwnerReferences.size() === 1) diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala index 2fd13a5903243..9e4187837b680 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/rest/mesos/MesosRestServer.scala @@ -105,6 +105,7 @@ private[mesos] class MesosSubmitRequestServlet( val superviseDriver = sparkProperties.get(config.DRIVER_SUPERVISE.key) val driverMemory = sparkProperties.get(config.DRIVER_MEMORY.key) val driverMemoryOverhead = sparkProperties.get(config.DRIVER_MEMORY_OVERHEAD.key) + val driverMemoryOverheadFactor = sparkProperties.get(config.DRIVER_MEMORY_OVERHEAD_FACTOR.key) val driverCores = sparkProperties.get(config.DRIVER_CORES.key) val name = request.sparkProperties.getOrElse("spark.app.name", mainClass) @@ -121,8 +122,10 @@ private[mesos] class MesosSubmitRequestServlet( mainClass, appArgs, environmentVariables, extraClassPath, extraLibraryPath, javaOpts) val actualSuperviseDriver = superviseDriver.map(_.toBoolean).getOrElse(DEFAULT_SUPERVISE) val actualDriverMemory = driverMemory.map(Utils.memoryStringToMb).getOrElse(DEFAULT_MEMORY) + val actualDriverMemoryFactor = driverMemoryOverheadFactor.map(_.toDouble).getOrElse( + MEMORY_OVERHEAD_FACTOR) val actualDriverMemoryOverhead = driverMemoryOverhead.map(_.toInt).getOrElse( - math.max((MEMORY_OVERHEAD_FACTOR * actualDriverMemory).toInt, MEMORY_OVERHEAD_MIN)) + math.max((actualDriverMemoryFactor * actualDriverMemory).toInt, MEMORY_OVERHEAD_MIN)) val actualDriverCores = driverCores.map(_.toDouble).getOrElse(DEFAULT_CORES) val submitDate = new Date() val submissionId = newDriverId(submitDate) diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala index 38f83df00e428..524b1d514fafe 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerUtils.scala @@ -387,8 +387,7 @@ trait MesosSchedulerUtils extends Logging { } } - // These defaults copied from YARN - private val MEMORY_OVERHEAD_FRACTION = 0.10 + // This default copied from YARN private val MEMORY_OVERHEAD_MINIMUM = 384 /** @@ -400,8 +399,9 @@ trait MesosSchedulerUtils extends Logging { * (whichever is larger) */ def executorMemory(sc: SparkContext): Int = { + val memoryOverheadFactor = sc.conf.get(EXECUTOR_MEMORY_OVERHEAD_FACTOR) sc.conf.get(mesosConfig.EXECUTOR_MEMORY_OVERHEAD).getOrElse( - math.max(MEMORY_OVERHEAD_FRACTION * sc.executorMemory, MEMORY_OVERHEAD_MINIMUM).toInt) + + math.max(memoryOverheadFactor * sc.executorMemory, MEMORY_OVERHEAD_MINIMUM).toInt) + sc.executorMemory } @@ -415,7 +415,8 @@ trait MesosSchedulerUtils extends Logging { * `MEMORY_OVERHEAD_FRACTION (=0.1) * driverMemory` */ def driverContainerMemory(driverDesc: MesosDriverDescription): Int = { - val defaultMem = math.max(MEMORY_OVERHEAD_FRACTION * driverDesc.mem, MEMORY_OVERHEAD_MINIMUM) + val memoryOverheadFactor = driverDesc.conf.get(DRIVER_MEMORY_OVERHEAD_FACTOR) + val defaultMem = math.max(memoryOverheadFactor * driverDesc.mem, MEMORY_OVERHEAD_MINIMUM) driverDesc.conf.get(mesosConfig.DRIVER_MEMORY_OVERHEAD).getOrElse(defaultMem.toInt) + driverDesc.mem } diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala index 344fc38c84fb1..8bed43a54d5d0 100644 --- a/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala +++ b/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/rest/mesos/MesosRestServerSuite.scala @@ -35,10 +35,16 @@ class MesosRestServerSuite extends SparkFunSuite testOverheadMemory(new SparkConf(), "2000M", 2384) } - test("test driver overhead memory with overhead factor") { + test("test driver overhead memory with default overhead factor") { testOverheadMemory(new SparkConf(), "5000M", 5500) } + test("test driver overhead memory with overhead factor") { + val conf = new SparkConf() + conf.set(config.DRIVER_MEMORY_OVERHEAD_FACTOR.key, "0.2") + testOverheadMemory(conf, "5000M", 6000) + } + test("test configured driver overhead memory") { val conf = new SparkConf() conf.set(config.DRIVER_MEMORY_OVERHEAD.key, "1000") diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index ae85ea8d6110a..f364b79216098 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -54,6 +54,7 @@ import org.apache.spark.api.python.PythonUtils import org.apache.spark.deploy.{SparkApplication, SparkHadoopUtil} import org.apache.spark.deploy.security.HadoopDelegationTokenManager import org.apache.spark.deploy.yarn.ResourceRequestHelper._ +import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._ import org.apache.spark.deploy.yarn.config._ import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ @@ -70,7 +71,6 @@ private[spark] class Client( extends Logging { import Client._ - import YarnSparkHadoopUtil._ private val yarnClient = YarnClient.createYarnClient private val hadoopConf = new YarnConfiguration(SparkHadoopUtil.newConfiguration(sparkConf)) @@ -85,6 +85,12 @@ private[spark] class Client( private var appMaster: ApplicationMaster = _ private var stagingDirPath: Path = _ + private val amMemoryOverheadFactor = if (isClusterMode) { + sparkConf.get(DRIVER_MEMORY_OVERHEAD_FACTOR) + } else { + AM_MEMORY_OVERHEAD_FACTOR + } + // AM related configurations private val amMemory = if (isClusterMode) { sparkConf.get(DRIVER_MEMORY).toInt @@ -94,7 +100,7 @@ private[spark] class Client( private val amMemoryOverhead = { val amMemoryOverheadEntry = if (isClusterMode) DRIVER_MEMORY_OVERHEAD else AM_MEMORY_OVERHEAD sparkConf.get(amMemoryOverheadEntry).getOrElse( - math.max((MEMORY_OVERHEAD_FACTOR * amMemory).toLong, + math.max((amMemoryOverheadFactor * amMemory).toLong, ResourceProfile.MEMORY_OVERHEAD_MIN_MIB)).toInt } private val amCores = if (isClusterMode) { @@ -107,8 +113,10 @@ private[spark] class Client( private val executorMemory = sparkConf.get(EXECUTOR_MEMORY) // Executor offHeap memory in MiB. protected val executorOffHeapMemory = Utils.executorOffHeapMemorySizeAsMb(sparkConf) + + private val executorMemoryOvereadFactor = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD_FACTOR) private val executorMemoryOverhead = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD).getOrElse( - math.max((MEMORY_OVERHEAD_FACTOR * executorMemory).toLong, + math.max((executorMemoryOvereadFactor * executorMemory).toLong, ResourceProfile.MEMORY_OVERHEAD_MIN_MIB)).toInt private val isPython = sparkConf.get(IS_PYTHON_APP) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index 54ab643f2755b..a85b7174673af 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -163,6 +163,8 @@ private[yarn] class YarnAllocator( private val isPythonApp = sparkConf.get(IS_PYTHON_APP) + private val memoryOverheadFactor = sparkConf.get(EXECUTOR_MEMORY_OVERHEAD_FACTOR) + private val launcherPool = ThreadUtils.newDaemonCachedThreadPool( "ContainerLauncher", sparkConf.get(CONTAINER_LAUNCH_MAX_THREADS)) @@ -280,9 +282,10 @@ private[yarn] class YarnAllocator( // track the resource profile if not already there getOrUpdateRunningExecutorForRPId(rp.id) logInfo(s"Resource profile ${rp.id} doesn't exist, adding it") + val resourcesWithDefaults = ResourceProfile.getResourcesForClusterManager(rp.id, rp.executorResources, - MEMORY_OVERHEAD_FACTOR, sparkConf, isPythonApp, resourceNameMapping) + memoryOverheadFactor, sparkConf, isPythonApp, resourceNameMapping) val customSparkResources = resourcesWithDefaults.customResources.map { case (name, execReq) => (name, execReq.amount.toString) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala index f347e37ba24ab..1869c739e4844 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala @@ -34,11 +34,10 @@ import org.apache.spark.util.Utils object YarnSparkHadoopUtil { - // Additional memory overhead + // Additional memory overhead for application masters in client mode. // 10% was arrived at experimentally. In the interest of minimizing memory waste while covering // the common cases. Memory overhead tends to grow with container size. - - val MEMORY_OVERHEAD_FACTOR = 0.10 + val AM_MEMORY_OVERHEAD_FACTOR = 0.10 val ANY_HOST = "*" diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala index db65d128b07f0..ae010f11503dd 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala @@ -706,4 +706,33 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter sparkConf.set(MEMORY_OFFHEAP_SIZE, originalOffHeapSize) } } + + test("SPARK-38194: Configurable memory overhead factor") { + val executorMemory = sparkConf.get(EXECUTOR_MEMORY).toLong + try { + sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, 0.5) + val (handler, _) = createAllocator(maxExecutors = 1, + additionalConfigs = Map(EXECUTOR_MEMORY.key -> executorMemory.toString)) + val defaultResource = handler.rpIdToYarnResource.get(defaultRPId) + val memory = defaultResource.getMemory + assert(memory == (executorMemory * 1.5).toLong) + } finally { + sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, 0.1) + } + } + + test("SPARK-38194: Memory overhead takes precedence over factor") { + val executorMemory = sparkConf.get(EXECUTOR_MEMORY) + try { + sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, 0.5) + sparkConf.set(EXECUTOR_MEMORY_OVERHEAD, (executorMemory * 0.4).toLong) + val (handler, _) = createAllocator(maxExecutors = 1, + additionalConfigs = Map(EXECUTOR_MEMORY.key -> executorMemory.toString)) + val defaultResource = handler.rpIdToYarnResource.get(defaultRPId) + val memory = defaultResource.getMemory + assert(memory == (executorMemory * 1.4).toLong) + } finally { + sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, 0.1) + } + } } From 64165ca215a641cbd9d3817cb39cf9bec17a6488 Mon Sep 17 00:00:00 2001 From: Kevin Wallimann Date: Tue, 22 Mar 2022 00:27:51 +0800 Subject: [PATCH 022/535] [SPARK-34805][SQL] Propagate metadata from nested columns in Alias ### What changes were proposed in this pull request? The metadata of a `GetStructField` expression is propagated in the `Alias` expression. ### Why are the changes needed? Currently, in a dataframe with nested structs, when selecting an inner struct, the metadata of that inner struct is lost. For example, suppose `df.schema.head.dataType.head.metadata` returns a non-empty Metadata object, then `df.select('Field0.SubField0').schema.head.metadata` returns an empty Metadata object The following snippet demonstrates the issue ``` import org.apache.spark.sql.Row import org.apache.spark.sql.types.{LongType, MetadataBuilder, StructField, StructType} val metadataAbc = new MetadataBuilder().putString("my-metadata", "abc").build() val metadataXyz = new MetadataBuilder().putString("my-metadata", "xyz").build() val schema = StructType(Seq( StructField("abc", StructType(Seq( StructField("xyz", LongType, nullable = true, metadataXyz) )), metadata = metadataAbc))) import scala.collection.JavaConverters._ val data = Seq(Row(Row(1L))).asJava val df = spark.createDataFrame(data, schema) println(df.select("abc").schema.head.metadata) // OK, metadata is {"my-metadata":"abc"} println(df.select("abc.xyz").schema.head.metadata) // NOT OK, metadata is {}, expected {"my-metadata","xyz"} ``` The issue can be reproduced in versions 3.2.0, 3.1.2 and 2.4.8 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added a new test Closes #35270 from kevinwallimann/SPARK-34805. Authored-by: Kevin Wallimann Signed-off-by: Wenchen Fan (cherry picked from commit f8fd0237772a68bd1a0fd00e24038f3fe442b2ef) Signed-off-by: Wenchen Fan --- .../expressions/complexTypeExtractors.scala | 2 ++ .../catalyst/expressions/namedExpressions.scala | 13 ++++++++----- .../expressions/NamedExpressionSuite.scala | 15 ++++++++++++++- .../apache/spark/sql/ColumnExpressionSuite.scala | 13 +++++++++++++ 4 files changed, 37 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index 99b43df50a19f..cb7e06b9934a4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -146,6 +146,8 @@ case class GetStructField(child: Expression, ordinal: Int, name: Option[String] override protected def withNewChildInternal(newChild: Expression): GetStructField = copy(child = newChild) + + def metadata: Metadata = childSchema(ordinal).metadata } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala index d5df6a12aa45b..47cdf21a8729f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala @@ -168,11 +168,8 @@ case class Alias(child: Expression, name: String)( override def metadata: Metadata = { explicitMetadata.getOrElse { child match { - case named: NamedExpression => - val builder = new MetadataBuilder().withMetadata(named.metadata) - nonInheritableMetadataKeys.foreach(builder.remove) - builder.build() - + case named: NamedExpression => removeNonInheritableMetadata(named.metadata) + case structField: GetStructField => removeNonInheritableMetadata(structField.metadata) case _ => Metadata.empty } } @@ -207,6 +204,12 @@ case class Alias(child: Expression, name: String)( "" } + private def removeNonInheritableMetadata(metadata: Metadata): Metadata = { + val builder = new MetadataBuilder().withMetadata(metadata) + nonInheritableMetadataKeys.foreach(builder.remove) + builder.build() + } + override def toString: String = s"$child AS $name#${exprId.id}$typeSuffix$delaySuffix" override protected final def otherCopyArgs: Seq[AnyRef] = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NamedExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NamedExpressionSuite.scala index f6cc19abaf9df..3e6f40f3b1ca5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NamedExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NamedExpressionSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute -import org.apache.spark.sql.types.IntegerType +import org.apache.spark.sql.types.{IntegerType, MetadataBuilder, StringType, StructField, StructType} class NamedExpressionSuite extends SparkFunSuite { @@ -51,4 +51,17 @@ class NamedExpressionSuite extends SparkFunSuite { val attr13 = UnresolvedAttribute("`a.b`") assert(attr13.sql === "`a.b`") } + + test("SPARK-34805: non inheritable metadata should be removed from child struct in Alias") { + val nonInheritableMetadataKey = "non-inheritable-key" + val metadata = new MetadataBuilder() + .putString(nonInheritableMetadataKey, "value1") + .putString("key", "value2") + .build() + val structType = StructType(Seq(StructField("value", StringType, metadata = metadata))) + val alias = Alias(GetStructField(AttributeReference("a", structType)(), 0), "my-alias")( + nonInheritableMetadataKeys = Seq(nonInheritableMetadataKey)) + assert(!alias.metadata.contains(nonInheritableMetadataKey)) + assert(alias.metadata.contains("key")) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala index 995bf5d903ad4..b392b7536f5f3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -137,6 +137,19 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { assert(newCol.expr.asInstanceOf[NamedExpression].metadata.getString("key") === "value") } + test("SPARK-34805: as propagates metadata from nested column") { + val metadata = new MetadataBuilder + metadata.putString("key", "value") + val df = spark.createDataFrame(sparkContext.emptyRDD[Row], + StructType(Seq( + StructField("parent", StructType(Seq( + StructField("child", StringType, metadata = metadata.build()) + )))) + )) + val newCol = df("parent.child") + assert(newCol.expr.asInstanceOf[NamedExpression].metadata.getString("key") === "value") + } + test("collect on column produced by a binary operator") { val df = Seq((1, 2, 3)).toDF("a", "b", "c") checkAnswer(df.select(df("a") + df("b")), Seq(Row(3))) From 444bac2fe96525947bcbbe12aec215205bc09a6f Mon Sep 17 00:00:00 2001 From: zhangxudong1 Date: Mon, 21 Mar 2022 13:48:20 -0500 Subject: [PATCH 023/535] [SPARK-38606][DOC] Update document to make a good guide of multiple versions of the Spark Shuffle Service ### What changes were proposed in this pull request? Update document "Running multiple versions of the Spark Shuffle Service" to use colon when writing %s.classpath instead of commas. ### Why are the changes needed? User may be confused when they following the current document to deploy multi-versions Spark Shuffle Service on YARN. We have tried to run multiple versions of the Spark Shuffle Service according https://github.com/apache/spark/blob/master/docs/running-on-yarn.md but, it wont work. Then we solved it by using colon when writing %s.classpath instead of commas. Related discussing is in https://issues.apache.org/jira/browse/YARN-4577?focusedCommentId=17493624&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-17493624 ### Does this PR introduce _any_ user-facing change? User document changes. ### How was this patch tested? ![image](https://user-images.githubusercontent.com/7348090/159159057-d85b5235-8979-43fb-a613-aa0edd2067e9.png) Closes #35914 from TonyDoen/SPARK-38606. Authored-by: zhangxudong1 Signed-off-by: Sean Owen (cherry picked from commit a876f005ecef77d94bb8048fa1ff0841b7f3607a) Signed-off-by: Sean Owen --- docs/running-on-yarn.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 63c03760b8beb..48b0c7dc315c4 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -925,8 +925,14 @@ have configuration like: ```properties yarn.nodemanager.aux-services = spark_shuffle_x,spark_shuffle_y - yarn.nodemanager.aux-services.spark_shuffle_x.classpath = /path/to/spark-x-yarn-shuffle.jar,/path/to/spark-x-config - yarn.nodemanager.aux-services.spark_shuffle_y.classpath = /path/to/spark-y-yarn-shuffle.jar,/path/to/spark-y-config + yarn.nodemanager.aux-services.spark_shuffle_x.classpath = /path/to/spark-x-path/fat.jar:/path/to/spark-x-config + yarn.nodemanager.aux-services.spark_shuffle_y.classpath = /path/to/spark-y-path/fat.jar:/path/to/spark-y-config +``` +Or +```properties + yarn.nodemanager.aux-services = spark_shuffle_x,spark_shuffle_y + yarn.nodemanager.aux-services.spark_shuffle_x.classpath = /path/to/spark-x-path/*:/path/to/spark-x-config + yarn.nodemanager.aux-services.spark_shuffle_y.classpath = /path/to/spark-y-path/*:/path/to/spark-y-config ``` The two `spark-*-config` directories each contain one file, `spark-shuffle-site.xml`. These are XML From 0396c0519bdb8b0d8ec084b18339787a14ee20ac Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Tue, 22 Mar 2022 09:21:51 +0900 Subject: [PATCH 024/535] [SPARK-38604][SQL] Keep ceil and floor with only a single argument the same as before This is just the fix. I didn't add any tests yet. I am happy to do it, I just wasn't sure where the right place to put the tests would be. Once I have tests I will cherry-pick this back to the 3.3 branch and put up a PR for that too. I am also happy to update the comments because this is a bit confusing that there is no indication that things have changed. Closes #35913 from revans2/ceil_floor_no_arg_behavior. Authored-by: Robert (Bobby) Evans Signed-off-by: Hyukjin Kwon (cherry picked from commit 692e4b0360202f6849de53fb179cddaa3dd3f090) Signed-off-by: Hyukjin Kwon --- .../scala/org/apache/spark/sql/functions.scala | 8 ++++++-- .../org/apache/spark/sql/MathFunctionsSuite.scala | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 58e855e2314ce..17e1d48bb2cd6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1783,7 +1783,9 @@ object functions { * @group math_funcs * @since 1.4.0 */ - def ceil(e: Column): Column = ceil(e, lit(0)) + def ceil(e: Column): Column = withExpr { + UnresolvedFunction(Seq("ceil"), Seq(e.expr), isDistinct = false) + } /** * Computes the ceiling of the given value of `e` to 0 decimal places. @@ -1913,7 +1915,9 @@ object functions { * @group math_funcs * @since 1.4.0 */ - def floor(e: Column): Column = floor(e, lit(0)) + def floor(e: Column): Column = withExpr { + UnresolvedFunction(Seq("floor"), Seq(e.expr), isDistinct = false) + } /** * Computes the floor of the given column value to 0 decimal places. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala index ab52cb98208f2..1a00491ccb1b1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala @@ -202,6 +202,13 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { test("ceil and ceiling") { testOneToOneMathFunction(ceil, (d: Double) => math.ceil(d).toLong) + // testOneToOneMathFunction does not validate the resulting data type + assert( + spark.range(1).select(ceil(col("id")).alias("a")).schema == + types.StructType(Seq(types.StructField("a", types.LongType)))) + assert( + spark.range(1).select(ceil(col("id"), lit(0)).alias("a")).schema == + types.StructType(Seq(types.StructField("a", types.DecimalType(20, 0))))) checkAnswer( sql("SELECT ceiling(0), ceiling(1), ceiling(1.5)"), Row(0L, 1L, 2L)) @@ -250,6 +257,13 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { test("floor") { testOneToOneMathFunction(floor, (d: Double) => math.floor(d).toLong) + // testOneToOneMathFunction does not validate the resulting data type + assert( + spark.range(1).select(floor(col("id")).alias("a")).schema == + types.StructType(Seq(types.StructField("a", types.LongType)))) + assert( + spark.range(1).select(floor(col("id"), lit(0)).alias("a")).schema == + types.StructType(Seq(types.StructField("a", types.DecimalType(20, 0))))) } test("factorial") { From 8b90205ae971eb0ef6e79d849abb14243bb7dc0f Mon Sep 17 00:00:00 2001 From: tianhanhu Date: Tue, 22 Mar 2022 13:23:39 +0800 Subject: [PATCH 025/535] [SPARK-38574][DOCS] Enrich the documentation of option avroSchema ### What changes were proposed in this pull request? Enrich Avro data source documentation to emphasize the difference between `avroSchema` which is an option, and `jsonFormatSchema` which is a parameter to function `from_avro` . When using `from_avro`, `avroSchema` option can be set to a compatible and evolved schema, while `jsonFormatSchema` has to be the actual schema. Elsewise, the behavior is undefined. ### Why are the changes needed? Reduce confusion caused by option and parameter having similar namings. ### Does this PR introduce _any_ user-facing change? Yes, Avro data source documentation is enriched a bit. ### How was this patch tested? No testing required. Just a documentation change Closes #35880 from tianhanhu/SPARK-38574. Authored-by: tianhanhu Signed-off-by: Gengliang Wang (cherry picked from commit ee5121a56e10ba2c65ae67159da472713cc5edd4) Signed-off-by: Gengliang Wang --- docs/sql-data-sources-avro.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md index a26d56ff10377..db3e03c7bfbab 100644 --- a/docs/sql-data-sources-avro.md +++ b/docs/sql-data-sources-avro.md @@ -231,10 +231,11 @@ Data source options of Avro can be set via: Optional schema provided by a user in JSON format.
  • - When reading Avro, this option can be set to an evolved schema, which is compatible but different with + When reading Avro files or calling function from_avro, this option can be set to an evolved schema, which is compatible but different with the actual Avro schema. The deserialization schema will be consistent with the evolved schema. For example, if we set an evolved schema containing one additional column with a default value, - the reading result in Spark will contain the new column too. + the reading result in Spark will contain the new column too. Note that when using this option with + from_avro, you still need to pass the actual Avro schema as a parameter to the function.
  • When writing Avro, this option can be set if the expected output Avro schema doesn't match the From a14f3a52e8f9d9e58f6fdcf53aebb0802f477436 Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Tue, 22 Mar 2022 15:45:51 +0800 Subject: [PATCH 026/535] [SPARK-38432][SQL] Refactor framework so as JDBC dialect could compile filter by self way ### What changes were proposed in this pull request? Currently, Spark DS V2 could push down filters into JDBC source. However, only the most basic form of filter is supported. On the other hand, some JDBC source could not compile the filters by themselves way. This PR reactor the framework so as JDBC dialect could compile expression by self way. First, The framework translate catalyst expressions to DS V2 filters. Second, The JDBC dialect could compile DS V2 filters to different SQL syntax. ### Why are the changes needed? Make the framework be more common use. ### Does this PR introduce _any_ user-facing change? 'No'. The feature is not released. ### How was this patch tested? Exists tests. Closes #35768 from beliefer/SPARK-38432_new. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit 53df45650af1e48e01e392caed6c1f83c2e9e9f1) Signed-off-by: Wenchen Fan --- project/MimaExcludes.scala | 2 + .../sql/connector/expressions/Expression.java | 17 ++ .../expressions/GeneralScalarExpression.java | 98 ++----- .../sql/connector/expressions/Literal.java | 3 + .../connector/expressions/NamedReference.java | 6 + .../sql/connector/expressions/SortOrder.java | 3 + .../sql/connector/expressions/Transform.java | 8 +- .../connector/expressions/aggregate/Avg.java | 3 + .../expressions/aggregate/Count.java | 3 + .../expressions/aggregate/CountStar.java | 4 + .../aggregate/GeneralAggregateFunc.java | 13 +- .../connector/expressions/aggregate/Max.java | 3 + .../connector/expressions/aggregate/Min.java | 3 + .../connector/expressions/aggregate/Sum.java | 3 + .../expressions/filter/AlwaysFalse.java | 30 +- .../expressions/filter/AlwaysTrue.java | 28 +- .../sql/connector/expressions/filter/And.java | 14 +- .../expressions/filter/BinaryComparison.java | 60 ---- .../expressions/filter/BinaryFilter.java | 65 ----- .../expressions/filter/EqualNullSafe.java | 40 --- .../connector/expressions/filter/EqualTo.java | 39 --- .../connector/expressions/filter/Filter.java | 40 --- .../expressions/filter/GreaterThan.java | 39 --- .../filter/GreaterThanOrEqual.java | 39 --- .../sql/connector/expressions/filter/In.java | 76 ----- .../expressions/filter/IsNotNull.java | 58 ---- .../connector/expressions/filter/IsNull.java | 58 ---- .../expressions/filter/LessThan.java | 39 --- .../expressions/filter/LessThanOrEqual.java | 39 --- .../sql/connector/expressions/filter/Not.java | 31 +- .../sql/connector/expressions/filter/Or.java | 14 +- .../expressions/filter/Predicate.java | 149 ++++++++++ .../expressions/filter/StringContains.java | 39 --- .../expressions/filter/StringEndsWith.java | 39 --- .../expressions/filter/StringPredicate.java | 60 ---- .../expressions/filter/StringStartsWith.java | 41 --- .../read/SupportsPushDownV2Filters.java | 35 +-- .../util/V2ExpressionSQLBuilder.java | 100 +++++-- .../apache/spark/sql/sources/filters.scala | 60 ++++ .../expressions/TransformExtractorSuite.scala | 1 - .../catalyst/util/V2ExpressionBuilder.scala | 113 ++++++-- .../sql/execution/DataSourceScanExec.scala | 11 +- .../datasources/DataSourceStrategy.scala | 11 +- .../execution/datasources/jdbc/JDBCRDD.scala | 77 +---- .../datasources/jdbc/JDBCRelation.scala | 18 +- .../datasources/v2/DataSourceV2Strategy.scala | 137 +++------ .../datasources/v2/PushDownUtils.scala | 17 +- .../datasources/v2/PushedDownOperators.scala | 4 +- .../v2/V2ScanRelationPushDown.scala | 8 +- .../datasources/v2/jdbc/JDBCScan.scala | 9 +- .../datasources/v2/jdbc/JDBCScanBuilder.scala | 20 +- .../apache/spark/sql/jdbc/DB2Dialect.scala | 24 +- .../apache/spark/sql/jdbc/DerbyDialect.scala | 16 +- .../org/apache/spark/sql/jdbc/H2Dialect.scala | 28 +- .../apache/spark/sql/jdbc/JdbcDialects.scala | 22 +- .../spark/sql/jdbc/MsSqlServerDialect.scala | 16 +- .../apache/spark/sql/jdbc/MySQLDialect.scala | 16 +- .../apache/spark/sql/jdbc/OracleDialect.scala | 28 +- .../spark/sql/jdbc/PostgresDialect.scala | 28 +- .../spark/sql/jdbc/TeradataDialect.scala | 28 +- .../JavaAdvancedDataSourceV2WithV2Filter.java | 75 ++--- .../sql/connector/DataSourceV2Suite.scala | 43 +-- .../v2/DataSourceV2StrategySuite.scala | 10 +- .../datasources/v2/V2FiltersSuite.scala | 204 ------------- .../datasources/v2/V2PredicateSuite.scala | 188 ++++++++++++ .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 19 +- .../apache/spark/sql/jdbc/JDBCV2Suite.scala | 268 ++++++++++++++---- 67 files changed, 1214 insertions(+), 1626 deletions(-) delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/BinaryComparison.java delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/BinaryFilter.java delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/EqualNullSafe.java delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/EqualTo.java delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/Filter.java delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/GreaterThan.java delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/GreaterThanOrEqual.java delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/In.java delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/IsNotNull.java delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/IsNull.java delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/LessThan.java delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/LessThanOrEqual.java create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/Predicate.java delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/StringContains.java delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/StringEndsWith.java delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/StringPredicate.java delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/StringStartsWith.java delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2FiltersSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index b045d4615d3c4..dc3b661406537 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -49,6 +49,8 @@ object MimaExcludes { ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.util.ExecutionListenerManager.this"), // [SPARK-37786][SQL] StreamingQueryListener support use SQLConf.get to get corresponding SessionState's SQLConf ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryManager.this"), + // [SPARK-38432][SQL] Reactor framework so as JDBC dialect could compile filter by self way + ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.sources.Filter.toV2"), // [SPARK-37600][BUILD] Upgrade to Hadoop 3.3.2 ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4Compressor"), diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java index 9f6c0975ae0e1..76dfe73f666cf 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java @@ -17,6 +17,8 @@ package org.apache.spark.sql.connector.expressions; +import java.util.Arrays; + import org.apache.spark.annotation.Evolving; /** @@ -26,8 +28,23 @@ */ @Evolving public interface Expression { + Expression[] EMPTY_EXPRESSION = new Expression[0]; + /** * Format the expression as a human readable SQL-like string. */ default String describe() { return this.toString(); } + + /** + * Returns an array of the children of this node. Children should not change. + */ + Expression[] children(); + + /** + * List of fields or columns that are referenced by this expression. + */ + default NamedReference[] references() { + return Arrays.stream(children()).map(e -> e.references()) + .flatMap(Arrays::stream).distinct().toArray(NamedReference[]::new); + } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java index b3dd2cbfe3d7d..8952761f9ef34 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java @@ -19,77 +19,19 @@ import java.io.Serializable; import java.util.Arrays; +import java.util.Objects; import org.apache.spark.annotation.Evolving; +import org.apache.spark.sql.connector.expressions.filter.Predicate; import org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder; -// scalastyle:off line.size.limit /** * The general representation of SQL scalar expressions, which contains the upper-cased - * expression name and all the children expressions. + * expression name and all the children expressions. Please also see {@link Predicate} + * for the supported predicate expressions. *

    * The currently supported SQL scalar expressions: *

      - *
    1. Name: IS_NULL - *
        - *
      • SQL semantic: expr IS NULL
      • - *
      • Since version: 3.3.0
      • - *
      - *
    2. - *
    3. Name: IS_NOT_NULL - *
        - *
      • SQL semantic: expr IS NOT NULL
      • - *
      • Since version: 3.3.0
      • - *
      - *
    4. - *
    5. Name: = - *
        - *
      • SQL semantic: expr1 = expr2
      • - *
      • Since version: 3.3.0
      • - *
      - *
    6. - *
    7. Name: != - *
        - *
      • SQL semantic: expr1 != expr2
      • - *
      • Since version: 3.3.0
      • - *
      - *
    8. - *
    9. Name: <> - *
        - *
      • SQL semantic: expr1 <> expr2
      • - *
      • Since version: 3.3.0
      • - *
      - *
    10. - *
    11. Name: <=> - *
        - *
      • SQL semantic: expr1 <=> expr2
      • - *
      • Since version: 3.3.0
      • - *
      - *
    12. - *
    13. Name: < - *
        - *
      • SQL semantic: expr1 < expr2
      • - *
      • Since version: 3.3.0
      • - *
      - *
    14. - *
    15. Name: <= - *
        - *
      • SQL semantic: expr1 <= expr2
      • - *
      • Since version: 3.3.0
      • - *
      - *
    16. - *
    17. Name: > - *
        - *
      • SQL semantic: expr1 > expr2
      • - *
      • Since version: 3.3.0
      • - *
      - *
    18. - *
    19. Name: >= - *
        - *
      • SQL semantic: expr1 >= expr2
      • - *
      • Since version: 3.3.0
      • - *
      - *
    20. *
    21. Name: + *
        *
      • SQL semantic: expr1 + expr2
      • @@ -138,24 +80,6 @@ *
      • Since version: 3.3.0
      • *
      *
    22. - *
    23. Name: AND - *
        - *
      • SQL semantic: expr1 AND expr2
      • - *
      • Since version: 3.3.0
      • - *
      - *
    24. - *
    25. Name: OR - *
        - *
      • SQL semantic: expr1 OR expr2
      • - *
      • Since version: 3.3.0
      • - *
      - *
    26. - *
    27. Name: NOT - *
        - *
      • SQL semantic: NOT expr
      • - *
      • Since version: 3.3.0
      • - *
      - *
    28. *
    29. Name: ~ *
        *
      • SQL semantic: ~ expr
      • @@ -176,7 +100,6 @@ * * @since 3.3.0 */ -// scalastyle:on line.size.limit @Evolving public class GeneralScalarExpression implements Expression, Serializable { private String name; @@ -190,6 +113,19 @@ public GeneralScalarExpression(String name, Expression[] children) { public String name() { return name; } public Expression[] children() { return children; } + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + GeneralScalarExpression that = (GeneralScalarExpression) o; + return Objects.equals(name, that.name) && Arrays.equals(children, that.children); + } + + @Override + public int hashCode() { + return Objects.hash(name, children); + } + @Override public String toString() { V2ExpressionSQLBuilder builder = new V2ExpressionSQLBuilder(); diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Literal.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Literal.java index df9e58fa319fd..5e8aeafe74515 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Literal.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Literal.java @@ -40,4 +40,7 @@ public interface Literal extends Expression { * Returns the SQL data type of the literal. */ DataType dataType(); + + @Override + default Expression[] children() { return EMPTY_EXPRESSION; } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NamedReference.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NamedReference.java index 167432fa0e86a..8c0f029a35832 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NamedReference.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/NamedReference.java @@ -32,4 +32,10 @@ public interface NamedReference extends Expression { * Each string in the returned array represents a field name. */ String[] fieldNames(); + + @Override + default Expression[] children() { return EMPTY_EXPRESSION; } + + @Override + default NamedReference[] references() { return new NamedReference[]{ this }; } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortOrder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortOrder.java index 72252457df26e..51401786ca5d7 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortOrder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/SortOrder.java @@ -40,4 +40,7 @@ public interface SortOrder extends Expression { * Returns the null ordering. */ NullOrdering nullOrdering(); + + @Override + default Expression[] children() { return new Expression[]{ expression() }; } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Transform.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Transform.java index 297205825c6a4..e9ead7fc5fd2a 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Transform.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Transform.java @@ -34,13 +34,11 @@ public interface Transform extends Expression { */ String name(); - /** - * Returns all field references in the transform arguments. - */ - NamedReference[] references(); - /** * Returns the arguments passed to the transform function. */ Expression[] arguments(); + + @Override + default Expression[] children() { return arguments(); } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Avg.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Avg.java index cc9d27ab8e59c..d09e5f7ba28a3 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Avg.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Avg.java @@ -38,6 +38,9 @@ public Avg(Expression column, boolean isDistinct) { public Expression column() { return input; } public boolean isDistinct() { return isDistinct; } + @Override + public Expression[] children() { return new Expression[]{ input }; } + @Override public String toString() { if (isDistinct) { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Count.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Count.java index 54c64b83c5d52..c840b29ad2546 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Count.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Count.java @@ -38,6 +38,9 @@ public Count(Expression column, boolean isDistinct) { public Expression column() { return input; } public boolean isDistinct() { return isDistinct; } + @Override + public Expression[] children() { return new Expression[]{ input }; } + @Override public String toString() { if (isDistinct) { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/CountStar.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/CountStar.java index 13801194b63cb..ff8639cbd05a2 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/CountStar.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/CountStar.java @@ -18,6 +18,7 @@ package org.apache.spark.sql.connector.expressions.aggregate; import org.apache.spark.annotation.Evolving; +import org.apache.spark.sql.connector.expressions.Expression; /** * An aggregate function that returns the number of rows in a group. @@ -30,6 +31,9 @@ public final class CountStar implements AggregateFunc { public CountStar() { } + @Override + public Expression[] children() { return EMPTY_EXPRESSION; } + @Override public String toString() { return "COUNT(*)"; } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/GeneralAggregateFunc.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/GeneralAggregateFunc.java index 0ff26c8875b7a..7016644543447 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/GeneralAggregateFunc.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/GeneralAggregateFunc.java @@ -22,7 +22,6 @@ import org.apache.spark.annotation.Evolving; import org.apache.spark.sql.connector.expressions.Expression; -import org.apache.spark.sql.connector.expressions.NamedReference; /** * The general implementation of {@link AggregateFunc}, which contains the upper-cased function @@ -46,21 +45,23 @@ public final class GeneralAggregateFunc implements AggregateFunc { private final String name; private final boolean isDistinct; - private final NamedReference[] inputs; + private final Expression[] children; public String name() { return name; } public boolean isDistinct() { return isDistinct; } - public NamedReference[] inputs() { return inputs; } - public GeneralAggregateFunc(String name, boolean isDistinct, NamedReference[] inputs) { + public GeneralAggregateFunc(String name, boolean isDistinct, Expression[] children) { this.name = name; this.isDistinct = isDistinct; - this.inputs = inputs; + this.children = children; } + @Override + public Expression[] children() { return children; } + @Override public String toString() { - String inputsString = Arrays.stream(inputs) + String inputsString = Arrays.stream(children) .map(Expression::describe) .collect(Collectors.joining(", ")); if (isDistinct) { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Max.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Max.java index 971aac279e09b..089d2bd751763 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Max.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Max.java @@ -33,6 +33,9 @@ public final class Max implements AggregateFunc { public Expression column() { return input; } + @Override + public Expression[] children() { return new Expression[]{ input }; } + @Override public String toString() { return "MAX(" + input.describe() + ")"; } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Min.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Min.java index 8d0644b0f0103..253cdea41dd76 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Min.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Min.java @@ -33,6 +33,9 @@ public final class Min implements AggregateFunc { public Expression column() { return input; } + @Override + public Expression[] children() { return new Expression[]{ input }; } + @Override public String toString() { return "MIN(" + input.describe() + ")"; } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Sum.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Sum.java index 721ef31c9a817..4e01b92d8c369 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Sum.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Sum.java @@ -38,6 +38,9 @@ public Sum(Expression column, boolean isDistinct) { public Expression column() { return input; } public boolean isDistinct() { return isDistinct; } + @Override + public Expression[] children() { return new Expression[]{ input }; } + @Override public String toString() { if (isDistinct) { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/AlwaysFalse.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/AlwaysFalse.java index 72ed83f86df6d..accdd1acd7d0e 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/AlwaysFalse.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/AlwaysFalse.java @@ -17,34 +17,30 @@ package org.apache.spark.sql.connector.expressions.filter; -import java.util.Objects; - import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.NamedReference; +import org.apache.spark.sql.connector.expressions.Literal; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.DataTypes; /** - * A filter that always evaluates to {@code false}. + * A predicate that always evaluates to {@code false}. * * @since 3.3.0 */ @Evolving -public final class AlwaysFalse extends Filter { +public final class AlwaysFalse extends Predicate implements Literal { - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - return true; + public AlwaysFalse() { + super("ALWAYS_FALSE", new Predicate[]{}); } - @Override - public int hashCode() { - return Objects.hash(); + public Boolean value() { + return false; } - @Override - public String toString() { return "FALSE"; } + public DataType dataType() { + return DataTypes.BooleanType; + } - @Override - public NamedReference[] references() { return EMPTY_REFERENCE; } + public String toString() { return "FALSE"; } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/AlwaysTrue.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/AlwaysTrue.java index b6d39c3f64a77..5a14f64b9b7e2 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/AlwaysTrue.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/AlwaysTrue.java @@ -17,34 +17,30 @@ package org.apache.spark.sql.connector.expressions.filter; -import java.util.Objects; - import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.NamedReference; +import org.apache.spark.sql.connector.expressions.Literal; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.DataTypes; /** - * A filter that always evaluates to {@code true}. + * A predicate that always evaluates to {@code true}. * * @since 3.3.0 */ @Evolving -public final class AlwaysTrue extends Filter { +public final class AlwaysTrue extends Predicate implements Literal { + + public AlwaysTrue() { + super("ALWAYS_TRUE", new Predicate[]{}); + } - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; + public Boolean value() { return true; } - @Override - public int hashCode() { - return Objects.hash(); + public DataType dataType() { + return DataTypes.BooleanType; } - @Override public String toString() { return "TRUE"; } - - @Override - public NamedReference[] references() { return EMPTY_REFERENCE; } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/And.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/And.java index e0b8b13acb158..179a4b3c6349d 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/And.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/And.java @@ -20,20 +20,18 @@ import org.apache.spark.annotation.Evolving; /** - * A filter that evaluates to {@code true} iff both {@code left} and {@code right} evaluate to + * A predicate that evaluates to {@code true} iff both {@code left} and {@code right} evaluate to * {@code true}. * * @since 3.3.0 */ @Evolving -public final class And extends BinaryFilter { +public final class And extends Predicate { - public And(Filter left, Filter right) { - super(left, right); + public And(Predicate left, Predicate right) { + super("AND", new Predicate[]{left, right}); } - @Override - public String toString() { - return String.format("(%s) AND (%s)", left.describe(), right.describe()); - } + public Predicate left() { return (Predicate) children()[0]; } + public Predicate right() { return (Predicate) children()[1]; } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/BinaryComparison.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/BinaryComparison.java deleted file mode 100644 index 0ae6e5af3ca1a..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/BinaryComparison.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.expressions.filter; - -import java.util.Objects; - -import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.Literal; -import org.apache.spark.sql.connector.expressions.NamedReference; - -/** - * Base class for {@link EqualNullSafe}, {@link EqualTo}, {@link GreaterThan}, - * {@link GreaterThanOrEqual}, {@link LessThan}, {@link LessThanOrEqual} - * - * @since 3.3.0 - */ -@Evolving -abstract class BinaryComparison extends Filter { - protected final NamedReference column; - protected final Literal value; - - protected BinaryComparison(NamedReference column, Literal value) { - this.column = column; - this.value = value; - } - - public NamedReference column() { return column; } - public Literal value() { return value; } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - BinaryComparison that = (BinaryComparison) o; - return Objects.equals(column, that.column) && Objects.equals(value, that.value); - } - - @Override - public int hashCode() { - return Objects.hash(column, value); - } - - @Override - public NamedReference[] references() { return new NamedReference[] { column }; } -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/BinaryFilter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/BinaryFilter.java deleted file mode 100644 index ac4b9f281e9ca..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/BinaryFilter.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.expressions.filter; - -import java.util.Objects; - -import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.NamedReference; - -/** - * Base class for {@link And}, {@link Or} - * - * @since 3.3.0 - */ -@Evolving -abstract class BinaryFilter extends Filter { - protected final Filter left; - protected final Filter right; - - protected BinaryFilter(Filter left, Filter right) { - this.left = left; - this.right = right; - } - - public Filter left() { return left; } - public Filter right() { return right; } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - BinaryFilter and = (BinaryFilter) o; - return Objects.equals(left, and.left) && Objects.equals(right, and.right); - } - - @Override - public int hashCode() { - return Objects.hash(left, right); - } - - @Override - public NamedReference[] references() { - NamedReference[] refLeft = left.references(); - NamedReference[] refRight = right.references(); - NamedReference[] arr = new NamedReference[refLeft.length + refRight.length]; - System.arraycopy(refLeft, 0, arr, 0, refLeft.length); - System.arraycopy(refRight, 0, arr, refLeft.length, refRight.length); - return arr; - } -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/EqualNullSafe.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/EqualNullSafe.java deleted file mode 100644 index 34b529194e075..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/EqualNullSafe.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.expressions.filter; - -import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.Literal; -import org.apache.spark.sql.connector.expressions.NamedReference; - -/** - * Performs equality comparison, similar to {@link EqualTo}. However, this differs from - * {@link EqualTo} in that it returns {@code true} (rather than NULL) if both inputs are NULL, - * and {@code false} (rather than NULL) if one of the input is NULL and the other is not NULL. - * - * @since 3.3.0 - */ -@Evolving -public final class EqualNullSafe extends BinaryComparison { - - public EqualNullSafe(NamedReference column, Literal value) { - super(column, value); - } - - @Override - public String toString() { return this.column.describe() + " <=> " + value.describe(); } -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/EqualTo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/EqualTo.java deleted file mode 100644 index b9c4fe053b83c..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/EqualTo.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.expressions.filter; - -import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.Literal; -import org.apache.spark.sql.connector.expressions.NamedReference; - -/** - * A filter that evaluates to {@code true} iff the {@code column} evaluates to a value - * equal to {@code value}. - * - * @since 3.3.0 - */ -@Evolving -public final class EqualTo extends BinaryComparison { - - public EqualTo(NamedReference column, Literal value) { - super(column, value); - } - - @Override - public String toString() { return column.describe() + " = " + value.describe(); } -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/Filter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/Filter.java deleted file mode 100644 index af87e76d2ff7d..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/Filter.java +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.expressions.filter; - -import java.io.Serializable; - -import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.Expression; -import org.apache.spark.sql.connector.expressions.NamedReference; - -/** - * Filter base class - * - * @since 3.3.0 - */ -@Evolving -public abstract class Filter implements Expression, Serializable { - - protected static final NamedReference[] EMPTY_REFERENCE = new NamedReference[0]; - - /** - * Returns list of columns that are referenced by this filter. - */ - public abstract NamedReference[] references(); -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/GreaterThan.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/GreaterThan.java deleted file mode 100644 index a3374f359ea29..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/GreaterThan.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.expressions.filter; - -import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.Literal; -import org.apache.spark.sql.connector.expressions.NamedReference; - -/** - * A filter that evaluates to {@code true} iff the {@code column} evaluates to a value - * greater than {@code value}. - * - * @since 3.3.0 - */ -@Evolving -public final class GreaterThan extends BinaryComparison { - - public GreaterThan(NamedReference column, Literal value) { - super(column, value); - } - - @Override - public String toString() { return column.describe() + " > " + value.describe(); } -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/GreaterThanOrEqual.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/GreaterThanOrEqual.java deleted file mode 100644 index 4ee921014da41..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/GreaterThanOrEqual.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.expressions.filter; - -import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.Literal; -import org.apache.spark.sql.connector.expressions.NamedReference; - -/** - * A filter that evaluates to {@code true} iff the {@code column} evaluates to a value - * greater than or equal to {@code value}. - * - * @since 3.3.0 - */ -@Evolving -public final class GreaterThanOrEqual extends BinaryComparison { - - public GreaterThanOrEqual(NamedReference column, Literal value) { - super(column, value); - } - - @Override - public String toString() { return column.describe() + " >= " + value.describe(); } -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/In.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/In.java deleted file mode 100644 index 8d6490b8984fd..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/In.java +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.expressions.filter; - -import java.util.Arrays; -import java.util.Objects; -import java.util.stream.Collectors; - -import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.Literal; -import org.apache.spark.sql.connector.expressions.NamedReference; - -/** - * A filter that evaluates to {@code true} iff the {@code column} evaluates to one of the - * {@code values} in the array. - * - * @since 3.3.0 - */ -@Evolving -public final class In extends Filter { - static final int MAX_LEN_TO_PRINT = 50; - private final NamedReference column; - private final Literal[] values; - - public In(NamedReference column, Literal[] values) { - this.column = column; - this.values = values; - } - - public NamedReference column() { return column; } - public Literal[] values() { return values; } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - In in = (In) o; - return Objects.equals(column, in.column) && values.length == in.values.length - && Arrays.asList(values).containsAll(Arrays.asList(in.values)); - } - - @Override - public int hashCode() { - int result = Objects.hash(column); - result = 31 * result + Arrays.hashCode(values); - return result; - } - - @Override - public String toString() { - String res = Arrays.stream(values).limit((MAX_LEN_TO_PRINT)).map(Literal::describe) - .collect(Collectors.joining(", ")); - if (values.length > MAX_LEN_TO_PRINT) { - res += "..."; - } - return column.describe() + " IN (" + res + ")"; - } - - @Override - public NamedReference[] references() { return new NamedReference[] { column }; } -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/IsNotNull.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/IsNotNull.java deleted file mode 100644 index 2cf000e99878e..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/IsNotNull.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.expressions.filter; - -import java.util.Objects; - -import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.NamedReference; - -/** - * A filter that evaluates to {@code true} iff the {@code column} evaluates to a non-null value. - * - * @since 3.3.0 - */ -@Evolving -public final class IsNotNull extends Filter { - private final NamedReference column; - - public IsNotNull(NamedReference column) { - this.column = column; - } - - public NamedReference column() { return column; } - - @Override - public String toString() { return column.describe() + " IS NOT NULL"; } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - IsNotNull isNotNull = (IsNotNull) o; - return Objects.equals(column, isNotNull.column); - } - - @Override - public int hashCode() { - return Objects.hash(column); - } - - @Override - public NamedReference[] references() { return new NamedReference[] { column }; } -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/IsNull.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/IsNull.java deleted file mode 100644 index 1cd497c02242e..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/IsNull.java +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.expressions.filter; - -import java.util.Objects; - -import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.NamedReference; - -/** - * A filter that evaluates to {@code true} iff the {@code column} evaluates to null. - * - * @since 3.3.0 - */ -@Evolving -public final class IsNull extends Filter { - private final NamedReference column; - - public IsNull(NamedReference column) { - this.column = column; - } - - public NamedReference column() { return column; } - - @Override - public String toString() { return column.describe() + " IS NULL"; } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - IsNull isNull = (IsNull) o; - return Objects.equals(column, isNull.column); - } - - @Override - public int hashCode() { - return Objects.hash(column); - } - - @Override - public NamedReference[] references() { return new NamedReference[] { column }; } -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/LessThan.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/LessThan.java deleted file mode 100644 index 9fa5cfb87f527..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/LessThan.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.expressions.filter; - -import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.Literal; -import org.apache.spark.sql.connector.expressions.NamedReference; - -/** - * A filter that evaluates to {@code true} iff the {@code column} evaluates to a value - * less than {@code value}. - * - * @since 3.3.0 - */ -@Evolving -public final class LessThan extends BinaryComparison { - - public LessThan(NamedReference column, Literal value) { - super(column, value); - } - - @Override - public String toString() { return column.describe() + " < " + value.describe(); } -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/LessThanOrEqual.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/LessThanOrEqual.java deleted file mode 100644 index a41b3c8045d5a..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/LessThanOrEqual.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.expressions.filter; - -import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.Literal; -import org.apache.spark.sql.connector.expressions.NamedReference; - -/** - * A filter that evaluates to {@code true} iff the {@code column} evaluates to a value - * less than or equal to {@code value}. - * - * @since 3.3.0 - */ -@Evolving -public final class LessThanOrEqual extends BinaryComparison { - - public LessThanOrEqual(NamedReference column, Literal value) { - super(column, value); - } - - @Override - public String toString() { return column.describe() + " <= " + value.describe(); } -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/Not.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/Not.java index 69746f59ee933..d65c9f0b6c3d9 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/Not.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/Not.java @@ -17,40 +17,19 @@ package org.apache.spark.sql.connector.expressions.filter; -import java.util.Objects; - import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.NamedReference; /** - * A filter that evaluates to {@code true} iff {@code child} is evaluated to {@code false}. + * A predicate that evaluates to {@code true} iff {@code child} is evaluated to {@code false}. * * @since 3.3.0 */ @Evolving -public final class Not extends Filter { - private final Filter child; - - public Not(Filter child) { this.child = child; } - - public Filter child() { return child; } +public final class Not extends Predicate { - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - Not not = (Not) o; - return Objects.equals(child, not.child); + public Not(Predicate child) { + super("NOT", new Predicate[]{child}); } - @Override - public int hashCode() { - return Objects.hash(child); - } - - @Override - public String toString() { return "NOT (" + child.describe() + ")"; } - - @Override - public NamedReference[] references() { return child.references(); } + public Predicate child() { return (Predicate) children()[0]; } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/Or.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/Or.java index baa33d849feef..7f1717cc7da58 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/Or.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/Or.java @@ -20,20 +20,18 @@ import org.apache.spark.annotation.Evolving; /** - * A filter that evaluates to {@code true} iff at least one of {@code left} or {@code right} + * A predicate that evaluates to {@code true} iff at least one of {@code left} or {@code right} * evaluates to {@code true}. * * @since 3.3.0 */ @Evolving -public final class Or extends BinaryFilter { +public final class Or extends Predicate { - public Or(Filter left, Filter right) { - super(left, right); + public Or(Predicate left, Predicate right) { + super("OR", new Predicate[]{left, right}); } - @Override - public String toString() { - return String.format("(%s) OR (%s)", left.describe(), right.describe()); - } + public Predicate left() { return (Predicate) children()[0]; } + public Predicate right() { return (Predicate) children()[1]; } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/Predicate.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/Predicate.java new file mode 100644 index 0000000000000..e58cddc274c5f --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/Predicate.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.expressions.filter; + +import org.apache.spark.annotation.Evolving; +import org.apache.spark.sql.connector.expressions.Expression; +import org.apache.spark.sql.connector.expressions.GeneralScalarExpression; + +/** + * The general representation of predicate expressions, which contains the upper-cased expression + * name and all the children expressions. You can also use these concrete subclasses for better + * type safety: {@link And}, {@link Or}, {@link Not}, {@link AlwaysTrue}, {@link AlwaysFalse}. + *

        + * The currently supported predicate expressions: + *

          + *
        1. Name: IS_NULL + *
            + *
          • SQL semantic: expr IS NULL
          • + *
          • Since version: 3.3.0
          • + *
          + *
        2. + *
        3. Name: IS_NOT_NULL + *
            + *
          • SQL semantic: expr IS NOT NULL
          • + *
          • Since version: 3.3.0
          • + *
          + *
        4. + *
        5. Name: STARTS_WITH + *
            + *
          • SQL semantic: expr1 LIKE 'expr2%'
          • + *
          • Since version: 3.3.0
          • + *
          + *
        6. + *
        7. Name: ENDS_WITH + *
            + *
          • SQL semantic: expr1 LIKE '%expr2'
          • + *
          • Since version: 3.3.0
          • + *
          + *
        8. + *
        9. Name: CONTAINS + *
            + *
          • SQL semantic: expr1 LIKE '%expr2%'
          • + *
          • Since version: 3.3.0
          • + *
          + *
        10. + *
        11. Name: IN + *
            + *
          • SQL semantic: expr IN (expr1, expr2, ...)
          • + *
          • Since version: 3.3.0
          • + *
          + *
        12. + *
        13. Name: = + *
            + *
          • SQL semantic: expr1 = expr2
          • + *
          • Since version: 3.3.0
          • + *
          + *
        14. + *
        15. Name: <> + *
            + *
          • SQL semantic: expr1 <> expr2
          • + *
          • Since version: 3.3.0
          • + *
          + *
        16. + *
        17. Name: <=> + *
            + *
          • SQL semantic: null-safe version of expr1 = expr2
          • + *
          • Since version: 3.3.0
          • + *
          + *
        18. + *
        19. Name: < + *
            + *
          • SQL semantic: expr1 < expr2
          • + *
          • Since version: 3.3.0
          • + *
          + *
        20. + *
        21. Name: <= + *
            + *
          • SQL semantic: expr1 <= expr2
          • + *
          • Since version: 3.3.0
          • + *
          + *
        22. + *
        23. Name: > + *
            + *
          • SQL semantic: expr1 > expr2
          • + *
          • Since version: 3.3.0
          • + *
          + *
        24. + *
        25. Name: >= + *
            + *
          • SQL semantic: expr1 >= expr2
          • + *
          • Since version: 3.3.0
          • + *
          + *
        26. + *
        27. Name: AND + *
            + *
          • SQL semantic: expr1 AND expr2
          • + *
          • Since version: 3.3.0
          • + *
          + *
        28. + *
        29. Name: OR + *
            + *
          • SQL semantic: expr1 OR expr2
          • + *
          • Since version: 3.3.0
          • + *
          + *
        30. + *
        31. Name: NOT + *
            + *
          • SQL semantic: NOT expr
          • + *
          • Since version: 3.3.0
          • + *
          + *
        32. + *
        33. Name: ALWAYS_TRUE + *
            + *
          • SQL semantic: TRUE
          • + *
          • Since version: 3.3.0
          • + *
          + *
        34. + *
        35. Name: ALWAYS_FALSE + *
            + *
          • SQL semantic: FALSE
          • + *
          • Since version: 3.3.0
          • + *
          + *
        36. + *
        + * + * @since 3.3.0 + */ +@Evolving +public class Predicate extends GeneralScalarExpression { + + public Predicate(String name, Expression[] children) { + super(name, children); + } +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/StringContains.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/StringContains.java deleted file mode 100644 index 9a01e4d574888..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/StringContains.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.expressions.filter; - -import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.NamedReference; -import org.apache.spark.unsafe.types.UTF8String; - -/** - * A filter that evaluates to {@code true} iff the {@code column} evaluates to - * a string that contains {@code value}. - * - * @since 3.3.0 - */ -@Evolving -public final class StringContains extends StringPredicate { - - public StringContains(NamedReference column, UTF8String value) { - super(column, value); - } - - @Override - public String toString() { return "STRING_CONTAINS(" + column.describe() + ", " + value + ")"; } -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/StringEndsWith.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/StringEndsWith.java deleted file mode 100644 index 11b8317ba4895..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/StringEndsWith.java +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.expressions.filter; - -import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.NamedReference; -import org.apache.spark.unsafe.types.UTF8String; - -/** - * A filter that evaluates to {@code true} iff the {@code column} evaluates to - * a string that ends with {@code value}. - * - * @since 3.3.0 - */ -@Evolving -public final class StringEndsWith extends StringPredicate { - - public StringEndsWith(NamedReference column, UTF8String value) { - super(column, value); - } - - @Override - public String toString() { return "STRING_ENDS_WITH(" + column.describe() + ", " + value + ")"; } -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/StringPredicate.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/StringPredicate.java deleted file mode 100644 index ffe5d5dba45b3..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/StringPredicate.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.expressions.filter; - -import java.util.Objects; - -import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.NamedReference; -import org.apache.spark.unsafe.types.UTF8String; - -/** - * Base class for {@link StringContains}, {@link StringStartsWith}, - * {@link StringEndsWith} - * - * @since 3.3.0 - */ -@Evolving -abstract class StringPredicate extends Filter { - protected final NamedReference column; - protected final UTF8String value; - - protected StringPredicate(NamedReference column, UTF8String value) { - this.column = column; - this.value = value; - } - - public NamedReference column() { return column; } - public UTF8String value() { return value; } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - StringPredicate that = (StringPredicate) o; - return Objects.equals(column, that.column) && Objects.equals(value, that.value); - } - - @Override - public int hashCode() { - return Objects.hash(column, value); - } - - @Override - public NamedReference[] references() { return new NamedReference[] { column }; } -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/StringStartsWith.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/StringStartsWith.java deleted file mode 100644 index 38a5de1921cdc..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/StringStartsWith.java +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.expressions.filter; - -import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.NamedReference; -import org.apache.spark.unsafe.types.UTF8String; - -/** - * A filter that evaluates to {@code true} iff the {@code column} evaluates to - * a string that starts with {@code value}. - * - * @since 3.3.0 - */ -@Evolving -public final class StringStartsWith extends StringPredicate { - - public StringStartsWith(NamedReference column, UTF8String value) { - super(column, value); - } - - @Override - public String toString() { - return "STRING_STARTS_WITH(" + column.describe() + ", " + value + ")"; - } -} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownV2Filters.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownV2Filters.java index e1a5401a5ca9f..1fec939aeb474 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownV2Filters.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownV2Filters.java @@ -18,11 +18,11 @@ package org.apache.spark.sql.connector.read; import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.expressions.filter.Filter; +import org.apache.spark.sql.connector.expressions.filter.Predicate; /** * A mix-in interface for {@link ScanBuilder}. Data sources can implement this interface to - * push down V2 {@link Filter} to the data source and reduce the size of the data to be read. + * push down V2 {@link Predicate} to the data source and reduce the size of the data to be read. * Please Note that this interface is preferred over {@link SupportsPushDownFilters}, which uses * V1 {@link org.apache.spark.sql.sources.Filter} and is less efficient due to the * internal -> external data conversion. @@ -33,28 +33,31 @@ public interface SupportsPushDownV2Filters extends ScanBuilder { /** - * Pushes down filters, and returns filters that need to be evaluated after scanning. + * Pushes down predicates, and returns predicates that need to be evaluated after scanning. *

        - * Rows should be returned from the data source if and only if all of the filters match. That is, - * filters must be interpreted as ANDed together. + * Rows should be returned from the data source if and only if all of the predicates match. + * That is, predicates must be interpreted as ANDed together. */ - Filter[] pushFilters(Filter[] filters); + Predicate[] pushPredicates(Predicate[] predicates); /** - * Returns the filters that are pushed to the data source via {@link #pushFilters(Filter[])}. + * Returns the predicates that are pushed to the data source via + * {@link #pushPredicates(Predicate[])}. *

        - * There are 3 kinds of filters: + * There are 3 kinds of predicates: *

          - *
        1. pushable filters which don't need to be evaluated again after scanning.
        2. - *
        3. pushable filters which still need to be evaluated after scanning, e.g. parquet row - * group filter.
        4. - *
        5. non-pushable filters.
        6. + *
        7. pushable predicates which don't need to be evaluated again after scanning.
        8. + *
        9. pushable predicates which still need to be evaluated after scanning, e.g. parquet row + * group predicate.
        10. + *
        11. non-pushable predicates.
        12. *
        *

        - * Both case 1 and 2 should be considered as pushed filters and should be returned by this method. + * Both case 1 and 2 should be considered as pushed predicates and should be returned + * by this method. *

        - * It's possible that there is no filters in the query and {@link #pushFilters(Filter[])} - * is never called, empty array should be returned for this case. + * It's possible that there is no predicates in the query and + * {@link #pushPredicates(Predicate[])} is never called, + * empty array should be returned for this case. */ - Filter[] pushedFilters(); + Predicate[] pushedPredicates(); } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index 0af0d88b0f622..91dae749f974b 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -17,39 +17,53 @@ package org.apache.spark.sql.connector.util; -import java.util.ArrayList; +import java.util.Arrays; import java.util.List; +import java.util.stream.Collectors; import org.apache.spark.sql.connector.expressions.Expression; -import org.apache.spark.sql.connector.expressions.FieldReference; +import org.apache.spark.sql.connector.expressions.NamedReference; import org.apache.spark.sql.connector.expressions.GeneralScalarExpression; -import org.apache.spark.sql.connector.expressions.LiteralValue; +import org.apache.spark.sql.connector.expressions.Literal; /** * The builder to generate SQL from V2 expressions. */ public class V2ExpressionSQLBuilder { + public String build(Expression expr) { - if (expr instanceof LiteralValue) { - return visitLiteral((LiteralValue) expr); - } else if (expr instanceof FieldReference) { - return visitFieldReference((FieldReference) expr); + if (expr instanceof Literal) { + return visitLiteral((Literal) expr); + } else if (expr instanceof NamedReference) { + return visitNamedReference((NamedReference) expr); } else if (expr instanceof GeneralScalarExpression) { GeneralScalarExpression e = (GeneralScalarExpression) expr; String name = e.name(); switch (name) { + case "IN": { + List children = + Arrays.stream(e.children()).map(c -> build(c)).collect(Collectors.toList()); + return visitIn(children.get(0), children.subList(1, children.size())); + } case "IS_NULL": return visitIsNull(build(e.children()[0])); case "IS_NOT_NULL": return visitIsNotNull(build(e.children()[0])); + case "STARTS_WITH": + return visitStartsWith(build(e.children()[0]), build(e.children()[1])); + case "ENDS_WITH": + return visitEndsWith(build(e.children()[0]), build(e.children()[1])); + case "CONTAINS": + return visitContains(build(e.children()[0]), build(e.children()[1])); case "=": - case "!=": + case "<>": case "<=>": case "<": case "<=": case ">": case ">=": - return visitBinaryComparison(name, build(e.children()[0]), build(e.children()[1])); + return visitBinaryComparison( + name, inputToSQL(e.children()[0]), inputToSQL(e.children()[1])); case "+": case "*": case "/": @@ -57,12 +71,14 @@ public String build(Expression expr) { case "&": case "|": case "^": - return visitBinaryArithmetic(name, build(e.children()[0]), build(e.children()[1])); + return visitBinaryArithmetic( + name, inputToSQL(e.children()[0]), inputToSQL(e.children()[1])); case "-": if (e.children().length == 1) { return visitUnaryArithmetic(name, build(e.children()[0])); } else { - return visitBinaryArithmetic(name, build(e.children()[0]), build(e.children()[1])); + return visitBinaryArithmetic( + name, inputToSQL(e.children()[0]), inputToSQL(e.children()[1])); } case "AND": return visitAnd(name, build(e.children()[0]), build(e.children()[1])); @@ -72,12 +88,11 @@ public String build(Expression expr) { return visitNot(build(e.children()[0])); case "~": return visitUnaryArithmetic(name, build(e.children()[0])); - case "CASE_WHEN": - List children = new ArrayList<>(); - for (Expression child : e.children()) { - children.add(build(child)); - } + case "CASE_WHEN": { + List children = + Arrays.stream(e.children()).map(c -> build(c)).collect(Collectors.toList()); return visitCaseWhen(children.toArray(new String[e.children().length])); + } // TODO supports other expressions default: return visitUnexpectedExpr(expr); @@ -87,12 +102,19 @@ public String build(Expression expr) { } } - protected String visitLiteral(LiteralValue literalValue) { - return literalValue.toString(); + protected String visitLiteral(Literal literal) { + return literal.toString(); } - protected String visitFieldReference(FieldReference fieldRef) { - return fieldRef.toString(); + protected String visitNamedReference(NamedReference namedRef) { + return namedRef.toString(); + } + + protected String visitIn(String v, List list) { + if (list.isEmpty()) { + return "CASE WHEN " + v + " IS NULL THEN NULL ELSE FALSE END"; + } + return v + " IN (" + list.stream().collect(Collectors.joining(", ")) + ")"; } protected String visitIsNull(String v) { @@ -103,12 +125,46 @@ protected String visitIsNotNull(String v) { return v + " IS NOT NULL"; } + protected String visitStartsWith(String l, String r) { + // Remove quotes at the beginning and end. + // e.g. converts "'str'" to "str". + String value = r.substring(1, r.length() - 1); + return l + " LIKE '" + value + "%'"; + } + + protected String visitEndsWith(String l, String r) { + // Remove quotes at the beginning and end. + // e.g. converts "'str'" to "str". + String value = r.substring(1, r.length() - 1); + return l + " LIKE '%" + value + "'"; + } + + protected String visitContains(String l, String r) { + // Remove quotes at the beginning and end. + // e.g. converts "'str'" to "str". + String value = r.substring(1, r.length() - 1); + return l + " LIKE '%" + value + "%'"; + } + + private String inputToSQL(Expression input) { + if (input.children().length > 1) { + return "(" + build(input) + ")"; + } else { + return build(input); + } + } + protected String visitBinaryComparison(String name, String l, String r) { - return "(" + l + ") " + name + " (" + r + ")"; + switch (name) { + case "<=>": + return "(" + l + " = " + r + ") OR (" + l + " IS NULL AND " + r + " IS NULL)"; + default: + return l + " " + name + " " + r; + } } protected String visitBinaryArithmetic(String name, String l, String r) { - return "(" + l + ") " + name + " (" + r + ")"; + return l + " " + name + " " + r; } protected String visitAnd(String name, String l, String r) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala index 174dd088d4c66..9954821e7cebc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala @@ -18,7 +18,12 @@ package org.apache.spark.sql.sources import org.apache.spark.annotation.{Evolving, Stable} +import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.parseColumnPath +import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue} +import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse => V2AlwaysFalse, AlwaysTrue => V2AlwaysTrue, Predicate} +import org.apache.spark.sql.types.StringType +import org.apache.spark.unsafe.types.UTF8String //////////////////////////////////////////////////////////////////////////////////////////////////// // This file defines all the filters that we can push down to the data sources. @@ -64,6 +69,11 @@ sealed abstract class Filter { private[sql] def containsNestedColumn: Boolean = { this.v2references.exists(_.length > 1) } + + /** + * Converts V1 filter to V2 filter + */ + private[sql] def toV2: Predicate } /** @@ -78,6 +88,11 @@ sealed abstract class Filter { @Stable case class EqualTo(attribute: String, value: Any) extends Filter { override def references: Array[String] = Array(attribute) ++ findReferences(value) + override def toV2: Predicate = { + val literal = Literal(value) + new Predicate("=", + Array(FieldReference(attribute), LiteralValue(literal.value, literal.dataType))) + } } /** @@ -93,6 +108,11 @@ case class EqualTo(attribute: String, value: Any) extends Filter { @Stable case class EqualNullSafe(attribute: String, value: Any) extends Filter { override def references: Array[String] = Array(attribute) ++ findReferences(value) + override def toV2: Predicate = { + val literal = Literal(value) + new Predicate("<=>", + Array(FieldReference(attribute), LiteralValue(literal.value, literal.dataType))) + } } /** @@ -107,6 +127,11 @@ case class EqualNullSafe(attribute: String, value: Any) extends Filter { @Stable case class GreaterThan(attribute: String, value: Any) extends Filter { override def references: Array[String] = Array(attribute) ++ findReferences(value) + override def toV2: Predicate = { + val literal = Literal(value) + new Predicate(">", + Array(FieldReference(attribute), LiteralValue(literal.value, literal.dataType))) + } } /** @@ -121,6 +146,11 @@ case class GreaterThan(attribute: String, value: Any) extends Filter { @Stable case class GreaterThanOrEqual(attribute: String, value: Any) extends Filter { override def references: Array[String] = Array(attribute) ++ findReferences(value) + override def toV2: Predicate = { + val literal = Literal(value) + new Predicate(">=", + Array(FieldReference(attribute), LiteralValue(literal.value, literal.dataType))) + } } /** @@ -135,6 +165,11 @@ case class GreaterThanOrEqual(attribute: String, value: Any) extends Filter { @Stable case class LessThan(attribute: String, value: Any) extends Filter { override def references: Array[String] = Array(attribute) ++ findReferences(value) + override def toV2: Predicate = { + val literal = Literal(value) + new Predicate("<", + Array(FieldReference(attribute), LiteralValue(literal.value, literal.dataType))) + } } /** @@ -149,6 +184,11 @@ case class LessThan(attribute: String, value: Any) extends Filter { @Stable case class LessThanOrEqual(attribute: String, value: Any) extends Filter { override def references: Array[String] = Array(attribute) ++ findReferences(value) + override def toV2: Predicate = { + val literal = Literal(value) + new Predicate("<=", + Array(FieldReference(attribute), LiteralValue(literal.value, literal.dataType))) + } } /** @@ -185,6 +225,13 @@ case class In(attribute: String, values: Array[Any]) extends Filter { } override def references: Array[String] = Array(attribute) ++ values.flatMap(findReferences) + override def toV2: Predicate = { + val literals = values.map { value => + val literal = Literal(value) + LiteralValue(literal.value, literal.dataType) + } + new Predicate("IN", FieldReference(attribute) +: literals) + } } /** @@ -198,6 +245,7 @@ case class In(attribute: String, values: Array[Any]) extends Filter { @Stable case class IsNull(attribute: String) extends Filter { override def references: Array[String] = Array(attribute) + override def toV2: Predicate = new Predicate("IS_NULL", Array(FieldReference(attribute))) } /** @@ -211,6 +259,7 @@ case class IsNull(attribute: String) extends Filter { @Stable case class IsNotNull(attribute: String) extends Filter { override def references: Array[String] = Array(attribute) + override def toV2: Predicate = new Predicate("IS_NOT_NULL", Array(FieldReference(attribute))) } /** @@ -221,6 +270,7 @@ case class IsNotNull(attribute: String) extends Filter { @Stable case class And(left: Filter, right: Filter) extends Filter { override def references: Array[String] = left.references ++ right.references + override def toV2: Predicate = new Predicate("AND", Seq(left, right).map(_.toV2).toArray) } /** @@ -231,6 +281,7 @@ case class And(left: Filter, right: Filter) extends Filter { @Stable case class Or(left: Filter, right: Filter) extends Filter { override def references: Array[String] = left.references ++ right.references + override def toV2: Predicate = new Predicate("OR", Seq(left, right).map(_.toV2).toArray) } /** @@ -241,6 +292,7 @@ case class Or(left: Filter, right: Filter) extends Filter { @Stable case class Not(child: Filter) extends Filter { override def references: Array[String] = child.references + override def toV2: Predicate = new Predicate("NOT", Array(child.toV2)) } /** @@ -255,6 +307,8 @@ case class Not(child: Filter) extends Filter { @Stable case class StringStartsWith(attribute: String, value: String) extends Filter { override def references: Array[String] = Array(attribute) + override def toV2: Predicate = new Predicate("STARTS_WITH", + Array(FieldReference(attribute), LiteralValue(UTF8String.fromString(value), StringType))) } /** @@ -269,6 +323,8 @@ case class StringStartsWith(attribute: String, value: String) extends Filter { @Stable case class StringEndsWith(attribute: String, value: String) extends Filter { override def references: Array[String] = Array(attribute) + override def toV2: Predicate = new Predicate("ENDS_WITH", + Array(FieldReference(attribute), LiteralValue(UTF8String.fromString(value), StringType))) } /** @@ -283,6 +339,8 @@ case class StringEndsWith(attribute: String, value: String) extends Filter { @Stable case class StringContains(attribute: String, value: String) extends Filter { override def references: Array[String] = Array(attribute) + override def toV2: Predicate = new Predicate("CONTAINS", + Array(FieldReference(attribute), LiteralValue(UTF8String.fromString(value), StringType))) } /** @@ -293,6 +351,7 @@ case class StringContains(attribute: String, value: String) extends Filter { @Evolving case class AlwaysTrue() extends Filter { override def references: Array[String] = Array.empty + override def toV2: Predicate = new V2AlwaysTrue() } @Evolving @@ -307,6 +366,7 @@ object AlwaysTrue extends AlwaysTrue { @Evolving case class AlwaysFalse() extends Filter { override def references: Array[String] = Array.empty + override def toV2: Predicate = new V2AlwaysFalse() } @Evolving diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/expressions/TransformExtractorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/expressions/TransformExtractorSuite.scala index 54ab1df3fa8f8..62cae3c861071 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/expressions/TransformExtractorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/expressions/TransformExtractorSuite.scala @@ -45,7 +45,6 @@ class TransformExtractorSuite extends SparkFunSuite { */ private def transform(func: String, ref: NamedReference): Transform = new Transform { override def name: String = func - override def references: Array[NamedReference] = Array(ref) override def arguments: Array[Expression] = Array(ref) override def toString: String = ref.describe } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala index 1e361695056a7..5c8e6a67ce3f0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala @@ -17,18 +17,23 @@ package org.apache.spark.sql.catalyst.util -import org.apache.spark.sql.catalyst.expressions.{Add, And, Attribute, BinaryComparison, BinaryOperator, BitwiseAnd, BitwiseNot, BitwiseOr, BitwiseXor, CaseWhen, Divide, EqualTo, Expression, IsNotNull, IsNull, Literal, Multiply, Not, Or, Remainder, Subtract, UnaryMinus} +import org.apache.spark.sql.catalyst.expressions.{Add, And, BinaryComparison, BinaryOperator, BitwiseAnd, BitwiseNot, BitwiseOr, BitwiseXor, CaseWhen, Contains, Divide, EndsWith, EqualTo, Expression, In, InSet, IsNotNull, IsNull, Literal, Multiply, Not, Or, Predicate, Remainder, StartsWith, StringPredicate, Subtract, UnaryMinus} import org.apache.spark.sql.connector.expressions.{Expression => V2Expression, FieldReference, GeneralScalarExpression, LiteralValue} +import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse, AlwaysTrue, And => V2And, Not => V2Not, Or => V2Or, Predicate => V2Predicate} +import org.apache.spark.sql.execution.datasources.PushableColumn +import org.apache.spark.sql.types.BooleanType /** * The builder to generate V2 expressions from catalyst expressions. */ -class V2ExpressionBuilder(e: Expression) { +class V2ExpressionBuilder( + e: Expression, nestedPredicatePushdownEnabled: Boolean = false, isPredicate: Boolean = false) { - def build(): Option[V2Expression] = generateExpression(e) + val pushableColumn = PushableColumn(nestedPredicatePushdownEnabled) + + def build(): Option[V2Expression] = generateExpression(e, isPredicate) private def canTranslate(b: BinaryOperator) = b match { - case _: And | _: Or => true case _: BinaryComparison => true case _: BitwiseAnd | _: BitwiseOr | _: BitwiseXor => true case add: Add => add.failOnError @@ -39,18 +44,83 @@ class V2ExpressionBuilder(e: Expression) { case _ => false } - private def generateExpression(expr: Expression): Option[V2Expression] = expr match { + private def generateExpression( + expr: Expression, isPredicate: Boolean = false): Option[V2Expression] = expr match { + case Literal(true, BooleanType) => Some(new AlwaysTrue()) + case Literal(false, BooleanType) => Some(new AlwaysFalse()) case Literal(value, dataType) => Some(LiteralValue(value, dataType)) - case attr: Attribute => Some(FieldReference.column(attr.name)) + case col @ pushableColumn(name) if nestedPredicatePushdownEnabled => + if (isPredicate && col.dataType.isInstanceOf[BooleanType]) { + Some(new V2Predicate("=", Array(FieldReference(name), LiteralValue(true, BooleanType)))) + } else { + Some(FieldReference(name)) + } + case pushableColumn(name) if !nestedPredicatePushdownEnabled => + Some(FieldReference.column(name)) + case in @ InSet(child, hset) => + generateExpression(child).map { v => + val children = + (v +: hset.toSeq.map(elem => LiteralValue(elem, in.dataType))).toArray[V2Expression] + new V2Predicate("IN", children) + } + // Because we only convert In to InSet in Optimizer when there are more than certain + // items. So it is possible we still get an In expression here that needs to be pushed + // down. + case In(value, list) => + val v = generateExpression(value) + val listExpressions = list.flatMap(generateExpression(_)) + if (v.isDefined && list.length == listExpressions.length) { + val children = (v.get +: listExpressions).toArray[V2Expression] + // The children looks like [expr, value1, ..., valueN] + Some(new V2Predicate("IN", children)) + } else { + None + } case IsNull(col) => generateExpression(col) - .map(c => new GeneralScalarExpression("IS_NULL", Array[V2Expression](c))) + .map(c => new V2Predicate("IS_NULL", Array[V2Expression](c))) case IsNotNull(col) => generateExpression(col) - .map(c => new GeneralScalarExpression("IS_NOT_NULL", Array[V2Expression](c))) - case b: BinaryOperator if canTranslate(b) => - val left = generateExpression(b.left) - val right = generateExpression(b.right) + .map(c => new V2Predicate("IS_NOT_NULL", Array[V2Expression](c))) + case p: StringPredicate => + val left = generateExpression(p.left) + val right = generateExpression(p.right) if (left.isDefined && right.isDefined) { - Some(new GeneralScalarExpression(b.sqlOperator, Array[V2Expression](left.get, right.get))) + val name = p match { + case _: StartsWith => "STARTS_WITH" + case _: EndsWith => "ENDS_WITH" + case _: Contains => "CONTAINS" + } + Some(new V2Predicate(name, Array[V2Expression](left.get, right.get))) + } else { + None + } + case and: And => + val l = generateExpression(and.left, true) + val r = generateExpression(and.right, true) + if (l.isDefined && r.isDefined) { + assert(l.get.isInstanceOf[V2Predicate] && r.get.isInstanceOf[V2Predicate]) + Some(new V2And(l.get.asInstanceOf[V2Predicate], r.get.asInstanceOf[V2Predicate])) + } else { + None + } + case or: Or => + val l = generateExpression(or.left, true) + val r = generateExpression(or.right, true) + if (l.isDefined && r.isDefined) { + assert(l.get.isInstanceOf[V2Predicate] && r.get.isInstanceOf[V2Predicate]) + Some(new V2Or(l.get.asInstanceOf[V2Predicate], r.get.asInstanceOf[V2Predicate])) + } else { + None + } + case b: BinaryOperator if canTranslate(b) => + val l = generateExpression(b.left) + val r = generateExpression(b.right) + if (l.isDefined && r.isDefined) { + b match { + case _: Predicate => + Some(new V2Predicate(b.sqlOperator, Array[V2Expression](l.get, r.get))) + case _ => + Some(new GeneralScalarExpression(b.sqlOperator, Array[V2Expression](l.get, r.get))) + } } else { None } @@ -58,32 +128,35 @@ class V2ExpressionBuilder(e: Expression) { val left = generateExpression(eq.left) val right = generateExpression(eq.right) if (left.isDefined && right.isDefined) { - Some(new GeneralScalarExpression("!=", Array[V2Expression](left.get, right.get))) + Some(new V2Predicate("<>", Array[V2Expression](left.get, right.get))) } else { None } - case Not(child) => generateExpression(child) - .map(v => new GeneralScalarExpression("NOT", Array[V2Expression](v))) + case Not(child) => generateExpression(child, true) // NOT expects predicate + .map { v => + assert(v.isInstanceOf[V2Predicate]) + new V2Not(v.asInstanceOf[V2Predicate]) + } case UnaryMinus(child, true) => generateExpression(child) .map(v => new GeneralScalarExpression("-", Array[V2Expression](v))) case BitwiseNot(child) => generateExpression(child) .map(v => new GeneralScalarExpression("~", Array[V2Expression](v))) case CaseWhen(branches, elseValue) => - val conditions = branches.map(_._1).flatMap(generateExpression) - val values = branches.map(_._2).flatMap(generateExpression) + val conditions = branches.map(_._1).flatMap(generateExpression(_, true)) + val values = branches.map(_._2).flatMap(generateExpression(_, true)) if (conditions.length == branches.length && values.length == branches.length) { val branchExpressions = conditions.zip(values).flatMap { case (c, v) => Seq[V2Expression](c, v) } if (elseValue.isDefined) { - elseValue.flatMap(generateExpression).map { v => + elseValue.flatMap(generateExpression(_)).map { v => val children = (branchExpressions :+ v).toArray[V2Expression] // The children looks like [condition1, value1, ..., conditionN, valueN, elseValue] - new GeneralScalarExpression("CASE_WHEN", children) + new V2Predicate("CASE_WHEN", children) } } else { // The children looks like [condition1, value1, ..., conditionN, valueN] - Some(new GeneralScalarExpression("CASE_WHEN", branchExpressions.toArray[V2Expression])) + Some(new V2Predicate("CASE_WHEN", branchExpressions.toArray[V2Expression])) } } else { None diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala index 1e2fa41ef0f49..e6de7d0e763b9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala @@ -153,9 +153,14 @@ case class RowDataSourceScanExec( pushedDownOperators.limit.map(value => "PushedLimit" -> s"LIMIT $value") } - Map( - "ReadSchema" -> requiredSchema.catalogString, - "PushedFilters" -> seqToString(markedFilters.toSeq)) ++ + val pushedFilters = if (pushedDownOperators.pushedPredicates.nonEmpty) { + seqToString(pushedDownOperators.pushedPredicates.map(_.describe())) + } else { + seqToString(markedFilters.toSeq) + } + + Map("ReadSchema" -> requiredSchema.catalogString, + "PushedFilters" -> pushedFilters) ++ pushedDownOperators.aggregation.fold(Map[String, String]()) { v => Map("PushedAggregates" -> seqToString(v.aggregateExpressions.map(_.describe())), "PushedGroupByColumns" -> seqToString(v.groupByColumns.map(_.describe())))} ++ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala index 4e5014cc83e13..1b14884e75994 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala @@ -340,7 +340,7 @@ object DataSourceStrategy l.output.toStructType, Set.empty, Set.empty, - PushedDownOperators(None, None, None, Seq.empty), + PushedDownOperators(None, None, None, Seq.empty, Seq.empty), toCatalystRDD(l, baseRelation.buildScan()), baseRelation, None) :: Nil @@ -414,7 +414,7 @@ object DataSourceStrategy requestedColumns.toStructType, pushedFilters.toSet, handledFilters, - PushedDownOperators(None, None, None, Seq.empty), + PushedDownOperators(None, None, None, Seq.empty, Seq.empty), scanBuilder(requestedColumns, candidatePredicates, pushedFilters), relation.relation, relation.catalogTable.map(_.identifier)) @@ -437,7 +437,7 @@ object DataSourceStrategy requestedColumns.toStructType, pushedFilters.toSet, handledFilters, - PushedDownOperators(None, None, None, Seq.empty), + PushedDownOperators(None, None, None, Seq.empty, Seq.empty), scanBuilder(requestedColumns, candidatePredicates, pushedFilters), relation.relation, relation.catalogTable.map(_.identifier)) @@ -864,8 +864,5 @@ object PushableColumnWithoutNestedColumn extends PushableColumnBase { * Get the expression of DS V2 to represent catalyst expression that can be pushed down. */ object PushableExpression { - def unapply(e: Expression): Option[V2Expression] = e match { - case PushableColumnWithoutNestedColumn(name) => Some(FieldReference.column(name)) - case _ => new V2ExpressionBuilder(e).build() - } + def unapply(e: Expression): Option[V2Expression] = new V2ExpressionBuilder(e).build() } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala index 3cd2e03828212..b30b460ac67db 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala @@ -24,13 +24,11 @@ import scala.util.control.NonFatal import org.apache.spark.{InterruptibleIterator, Partition, SparkContext, TaskContext} import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD -import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.connector.expressions.SortOrder -import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.connector.expressions.filter.Predicate import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects} -import org.apache.spark.sql.sources._ import org.apache.spark.sql.types._ import org.apache.spark.util.CompletionIterator @@ -94,68 +92,13 @@ object JDBCRDD extends Logging { new StructType(columns.map(name => fieldMap(name))) } - /** - * Turns a single Filter into a String representing a SQL expression. - * Returns None for an unhandled filter. - */ - def compileFilter(f: Filter, dialect: JdbcDialect): Option[String] = { - - def quote(colName: String): String = { - val nameParts = SparkSession.active.sessionState.sqlParser.parseMultipartIdentifier(colName) - if(nameParts.length > 1) { - throw QueryCompilationErrors.commandNotSupportNestedColumnError("Filter push down", colName) - } - dialect.quoteIdentifier(nameParts.head) - } - - Option(f match { - case EqualTo(attr, value) => s"${quote(attr)} = ${dialect.compileValue(value)}" - case EqualNullSafe(attr, value) => - val col = quote(attr) - s"(NOT ($col != ${dialect.compileValue(value)} OR $col IS NULL OR " + - s"${dialect.compileValue(value)} IS NULL) OR " + - s"($col IS NULL AND ${dialect.compileValue(value)} IS NULL))" - case LessThan(attr, value) => s"${quote(attr)} < ${dialect.compileValue(value)}" - case GreaterThan(attr, value) => s"${quote(attr)} > ${dialect.compileValue(value)}" - case LessThanOrEqual(attr, value) => s"${quote(attr)} <= ${dialect.compileValue(value)}" - case GreaterThanOrEqual(attr, value) => s"${quote(attr)} >= ${dialect.compileValue(value)}" - case IsNull(attr) => s"${quote(attr)} IS NULL" - case IsNotNull(attr) => s"${quote(attr)} IS NOT NULL" - case StringStartsWith(attr, value) => s"${quote(attr)} LIKE '${value}%'" - case StringEndsWith(attr, value) => s"${quote(attr)} LIKE '%${value}'" - case StringContains(attr, value) => s"${quote(attr)} LIKE '%${value}%'" - case In(attr, value) if value.isEmpty => - s"CASE WHEN ${quote(attr)} IS NULL THEN NULL ELSE FALSE END" - case In(attr, value) => s"${quote(attr)} IN (${dialect.compileValue(value)})" - case Not(f) => compileFilter(f, dialect).map(p => s"(NOT ($p))").getOrElse(null) - case Or(f1, f2) => - // We can't compile Or filter unless both sub-filters are compiled successfully. - // It applies too for the following And filter. - // If we can make sure compileFilter supports all filters, we can remove this check. - val or = Seq(f1, f2).flatMap(compileFilter(_, dialect)) - if (or.size == 2) { - or.map(p => s"($p)").mkString(" OR ") - } else { - null - } - case And(f1, f2) => - val and = Seq(f1, f2).flatMap(compileFilter(_, dialect)) - if (and.size == 2) { - and.map(p => s"($p)").mkString(" AND ") - } else { - null - } - case _ => null - }) - } - /** * Build and return JDBCRDD from the given information. * * @param sc - Your SparkContext. * @param schema - The Catalyst schema of the underlying database table. * @param requiredColumns - The names of the columns or aggregate columns to SELECT. - * @param filters - The filters to include in all WHERE clauses. + * @param predicates - The predicates to include in all WHERE clauses. * @param parts - An array of JDBCPartitions specifying partition ids and * per-partition WHERE clauses. * @param options - JDBC options that contains url, table and other information. @@ -164,7 +107,7 @@ object JDBCRDD extends Logging { * @param sample - The pushed down tableSample. * @param limit - The pushed down limit. If the value is 0, it means no limit or limit * is not pushed down. - * @param sortValues - The sort values cooperates with limit to realize top N. + * @param sortOrders - The sort orders cooperates with limit to realize top N. * * @return An RDD representing "SELECT requiredColumns FROM fqTable". */ @@ -173,7 +116,7 @@ object JDBCRDD extends Logging { sc: SparkContext, schema: StructType, requiredColumns: Array[String], - filters: Array[Filter], + predicates: Array[Predicate], parts: Array[Partition], options: JDBCOptions, outputSchema: Option[StructType] = None, @@ -194,7 +137,7 @@ object JDBCRDD extends Logging { dialect.createConnectionFactory(options), outputSchema.getOrElse(pruneSchema(schema, requiredColumns)), quotedColumns, - filters, + predicates, parts, url, options, @@ -216,7 +159,7 @@ private[jdbc] class JDBCRDD( getConnection: Int => Connection, schema: StructType, columns: Array[String], - filters: Array[Filter], + predicates: Array[Predicate], partitions: Array[Partition], url: String, options: JDBCOptions, @@ -239,10 +182,10 @@ private[jdbc] class JDBCRDD( /** * `filters`, but as a WHERE clause suitable for injection into a SQL query. */ - private val filterWhereClause: String = - filters - .flatMap(JDBCRDD.compileFilter(_, JdbcDialects.get(url))) - .map(p => s"($p)").mkString(" AND ") + private val filterWhereClause: String = { + val dialect = JdbcDialects.get(url) + predicates.flatMap(dialect.compileExpression(_)).map(p => s"($p)").mkString(" AND ") + } /** * A WHERE clause representing both `filters`, if any, and the current partition. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala index ecb207363cd59..0f1a1b6dc667b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala @@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter} import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, stringToDate, stringToTimestamp} import org.apache.spark.sql.connector.expressions.SortOrder +import org.apache.spark.sql.connector.expressions.filter.Predicate import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo import org.apache.spark.sql.internal.SQLConf @@ -270,10 +271,11 @@ private[sql] case class JDBCRelation( override val needConversion: Boolean = false - // Check if JDBCRDD.compileFilter can accept input filters + // Check if JdbcDialect can compile input filters override def unhandledFilters(filters: Array[Filter]): Array[Filter] = { if (jdbcOptions.pushDownPredicate) { - filters.filter(JDBCRDD.compileFilter(_, JdbcDialects.get(jdbcOptions.url)).isEmpty) + val dialect = JdbcDialects.get(jdbcOptions.url) + filters.filter(f => dialect.compileExpression(f.toV2).isEmpty) } else { filters } @@ -281,17 +283,17 @@ private[sql] case class JDBCRelation( override def buildScan(requiredColumns: Array[String], filters: Array[Filter]): RDD[Row] = { // When pushDownPredicate is false, all Filters that need to be pushed down should be ignored - val pushedFilters = if (jdbcOptions.pushDownPredicate) { - filters + val pushedPredicates = if (jdbcOptions.pushDownPredicate) { + filters.map(_.toV2) } else { - Array.empty[Filter] + Array.empty[Predicate] } // Rely on a type erasure hack to pass RDD[InternalRow] back as RDD[Row] JDBCRDD.scanTable( sparkSession.sparkContext, schema, requiredColumns, - pushedFilters, + pushedPredicates, parts, jdbcOptions).asInstanceOf[RDD[Row]] } @@ -299,7 +301,7 @@ private[sql] case class JDBCRelation( def buildScan( requiredColumns: Array[String], finalSchema: StructType, - filters: Array[Filter], + predicates: Array[Predicate], groupByColumns: Option[Array[String]], tableSample: Option[TableSampleInfo], limit: Int, @@ -309,7 +311,7 @@ private[sql] case class JDBCRelation( sparkSession.sparkContext, schema, requiredColumns, - filters, + predicates, parts, jdbcOptions, Some(finalSchema), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index e544a7c8767e7..c6d271b9d75c0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -24,27 +24,25 @@ import org.apache.spark.sql.{SparkSession, Strategy} import org.apache.spark.sql.catalyst.analysis.{ResolvedDBObjectName, ResolvedNamespace, ResolvedPartitionSpec, ResolvedTable} import org.apache.spark.sql.catalyst.catalog.CatalogUtils import org.apache.spark.sql.catalyst.expressions -import org.apache.spark.sql.catalyst.expressions.{And, Attribute, DynamicPruning, EmptyRow, Expression, Literal, NamedExpression, PredicateHelper, SubqueryExpression} +import org.apache.spark.sql.catalyst.expressions.{And, Attribute, DynamicPruning, Expression, NamedExpression, Not, Or, PredicateHelper, SubqueryExpression} import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.util.toPrettySQL +import org.apache.spark.sql.catalyst.util.{toPrettySQL, V2ExpressionBuilder} import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, Table, TableCapability, TableCatalog} import org.apache.spark.sql.connector.catalog.index.SupportsIndex -import org.apache.spark.sql.connector.expressions.{FieldReference, Literal => V2Literal, LiteralValue} -import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse => V2AlwaysFalse, AlwaysTrue => V2AlwaysTrue, And => V2And, EqualNullSafe => V2EqualNullSafe, EqualTo => V2EqualTo, Filter => V2Filter, GreaterThan => V2GreaterThan, GreaterThanOrEqual => V2GreaterThanOrEqual, In => V2In, IsNotNull => V2IsNotNull, IsNull => V2IsNull, LessThan => V2LessThan, LessThanOrEqual => V2LessThanOrEqual, Not => V2Not, Or => V2Or, StringContains => V2StringContains, StringEndsWith => V2StringEndsWith, StringStartsWith => V2StringStartsWith} +import org.apache.spark.sql.connector.expressions.{FieldReference} +import org.apache.spark.sql.connector.expressions.filter.{And => V2And, Not => V2Not, Or => V2Or, Predicate} import org.apache.spark.sql.connector.read.LocalScan import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream} import org.apache.spark.sql.connector.write.V1Write import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.execution.{FilterExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan} -import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, PushableColumn, PushableColumnBase} +import org.apache.spark.sql.execution.datasources.DataSourceStrategy import org.apache.spark.sql.execution.streaming.continuous.{WriteToContinuousDataSource, WriteToContinuousDataSourceExec} import org.apache.spark.sql.internal.StaticSQLConf.WAREHOUSE_PATH import org.apache.spark.sql.sources.{BaseRelation, TableScan} -import org.apache.spark.sql.types.{BooleanType, StringType} import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.storage.StorageLevel -import org.apache.spark.unsafe.types.UTF8String class DataSourceV2Strategy(session: SparkSession) extends Strategy with PredicateHelper { @@ -473,74 +471,12 @@ private[sql] object DataSourceV2Strategy { private def translateLeafNodeFilterV2( predicate: Expression, - pushableColumn: PushableColumnBase): Option[V2Filter] = predicate match { - case expressions.EqualTo(pushableColumn(name), Literal(v, t)) => - Some(new V2EqualTo(FieldReference(name), LiteralValue(v, t))) - case expressions.EqualTo(Literal(v, t), pushableColumn(name)) => - Some(new V2EqualTo(FieldReference(name), LiteralValue(v, t))) - - case expressions.EqualNullSafe(pushableColumn(name), Literal(v, t)) => - Some(new V2EqualNullSafe(FieldReference(name), LiteralValue(v, t))) - case expressions.EqualNullSafe(Literal(v, t), pushableColumn(name)) => - Some(new V2EqualNullSafe(FieldReference(name), LiteralValue(v, t))) - - case expressions.GreaterThan(pushableColumn(name), Literal(v, t)) => - Some(new V2GreaterThan(FieldReference(name), LiteralValue(v, t))) - case expressions.GreaterThan(Literal(v, t), pushableColumn(name)) => - Some(new V2LessThan(FieldReference(name), LiteralValue(v, t))) - - case expressions.LessThan(pushableColumn(name), Literal(v, t)) => - Some(new V2LessThan(FieldReference(name), LiteralValue(v, t))) - case expressions.LessThan(Literal(v, t), pushableColumn(name)) => - Some(new V2GreaterThan(FieldReference(name), LiteralValue(v, t))) - - case expressions.GreaterThanOrEqual(pushableColumn(name), Literal(v, t)) => - Some(new V2GreaterThanOrEqual(FieldReference(name), LiteralValue(v, t))) - case expressions.GreaterThanOrEqual(Literal(v, t), pushableColumn(name)) => - Some(new V2LessThanOrEqual(FieldReference(name), LiteralValue(v, t))) - - case expressions.LessThanOrEqual(pushableColumn(name), Literal(v, t)) => - Some(new V2LessThanOrEqual(FieldReference(name), LiteralValue(v, t))) - case expressions.LessThanOrEqual(Literal(v, t), pushableColumn(name)) => - Some(new V2GreaterThanOrEqual(FieldReference(name), LiteralValue(v, t))) - - case in @ expressions.InSet(pushableColumn(name), set) => - val values: Array[V2Literal[_]] = - set.toSeq.map(elem => LiteralValue(elem, in.dataType)).toArray - Some(new V2In(FieldReference(name), values)) - - // Because we only convert In to InSet in Optimizer when there are more than certain - // items. So it is possible we still get an In expression here that needs to be pushed - // down. - case in @ expressions.In(pushableColumn(name), list) if list.forall(_.isInstanceOf[Literal]) => - val hSet = list.map(_.eval(EmptyRow)) - Some(new V2In(FieldReference(name), - hSet.toArray.map(LiteralValue(_, in.value.dataType)))) - - case expressions.IsNull(pushableColumn(name)) => - Some(new V2IsNull(FieldReference(name))) - case expressions.IsNotNull(pushableColumn(name)) => - Some(new V2IsNotNull(FieldReference(name))) - - case expressions.StartsWith(pushableColumn(name), Literal(v: UTF8String, StringType)) => - Some(new V2StringStartsWith(FieldReference(name), v)) - - case expressions.EndsWith(pushableColumn(name), Literal(v: UTF8String, StringType)) => - Some(new V2StringEndsWith(FieldReference(name), v)) - - case expressions.Contains(pushableColumn(name), Literal(v: UTF8String, StringType)) => - Some(new V2StringContains(FieldReference(name), v)) - - case expressions.Literal(true, BooleanType) => - Some(new V2AlwaysTrue) - - case expressions.Literal(false, BooleanType) => - Some(new V2AlwaysFalse) - - case e @ pushableColumn(name) if e.dataType.isInstanceOf[BooleanType] => - Some(new V2EqualTo(FieldReference(name), LiteralValue(true, BooleanType))) - - case _ => None + supportNestedPredicatePushdown: Boolean): Option[Predicate] = { + val pushablePredicate = PushablePredicate(supportNestedPredicatePushdown) + predicate match { + case pushablePredicate(expr) => Some(expr) + case _ => None + } } /** @@ -550,7 +486,7 @@ private[sql] object DataSourceV2Strategy { */ protected[sql] def translateFilterV2( predicate: Expression, - supportNestedPredicatePushdown: Boolean): Option[V2Filter] = { + supportNestedPredicatePushdown: Boolean): Option[Predicate] = { translateFilterV2WithMapping(predicate, None, supportNestedPredicatePushdown) } @@ -565,11 +501,11 @@ private[sql] object DataSourceV2Strategy { */ protected[sql] def translateFilterV2WithMapping( predicate: Expression, - translatedFilterToExpr: Option[mutable.HashMap[V2Filter, Expression]], + translatedFilterToExpr: Option[mutable.HashMap[Predicate, Expression]], nestedPredicatePushdownEnabled: Boolean) - : Option[V2Filter] = { + : Option[Predicate] = { predicate match { - case expressions.And(left, right) => + case And(left, right) => // See SPARK-12218 for detailed discussion // It is not safe to just convert one side if we do not understand the // other side. Here is an example used to explain the reason. @@ -586,7 +522,7 @@ private[sql] object DataSourceV2Strategy { right, translatedFilterToExpr, nestedPredicatePushdownEnabled) } yield new V2And(leftFilter, rightFilter) - case expressions.Or(left, right) => + case Or(left, right) => for { leftFilter <- translateFilterV2WithMapping( left, translatedFilterToExpr, nestedPredicatePushdownEnabled) @@ -594,13 +530,12 @@ private[sql] object DataSourceV2Strategy { right, translatedFilterToExpr, nestedPredicatePushdownEnabled) } yield new V2Or(leftFilter, rightFilter) - case expressions.Not(child) => + case Not(child) => translateFilterV2WithMapping(child, translatedFilterToExpr, nestedPredicatePushdownEnabled) .map(new V2Not(_)) case other => - val filter = translateLeafNodeFilterV2( - other, PushableColumn(nestedPredicatePushdownEnabled)) + val filter = translateLeafNodeFilterV2(other, nestedPredicatePushdownEnabled) if (filter.isDefined && translatedFilterToExpr.isDefined) { translatedFilterToExpr.get(filter.get) = predicate } @@ -609,20 +544,34 @@ private[sql] object DataSourceV2Strategy { } protected[sql] def rebuildExpressionFromFilter( - filter: V2Filter, - translatedFilterToExpr: mutable.HashMap[V2Filter, Expression]): Expression = { - filter match { + predicate: Predicate, + translatedFilterToExpr: mutable.HashMap[Predicate, Expression]): Expression = { + predicate match { case and: V2And => - expressions.And(rebuildExpressionFromFilter(and.left, translatedFilterToExpr), - rebuildExpressionFromFilter(and.right, translatedFilterToExpr)) + expressions.And( + rebuildExpressionFromFilter(and.left(), translatedFilterToExpr), + rebuildExpressionFromFilter(and.right(), translatedFilterToExpr)) case or: V2Or => - expressions.Or(rebuildExpressionFromFilter(or.left, translatedFilterToExpr), - rebuildExpressionFromFilter(or.right, translatedFilterToExpr)) + expressions.Or( + rebuildExpressionFromFilter(or.left(), translatedFilterToExpr), + rebuildExpressionFromFilter(or.right(), translatedFilterToExpr)) case not: V2Not => - expressions.Not(rebuildExpressionFromFilter(not.child, translatedFilterToExpr)) - case other => - translatedFilterToExpr.getOrElse(other, - throw new IllegalStateException("Failed to rebuild Expression for filter: " + filter)) + expressions.Not(rebuildExpressionFromFilter(not.child(), translatedFilterToExpr)) + case _ => + translatedFilterToExpr.getOrElse(predicate, + throw new IllegalStateException("Failed to rebuild Expression for filter: " + predicate)) } } } + +/** + * Get the expression of DS V2 to represent catalyst predicate that can be pushed down. + */ +case class PushablePredicate(nestedPredicatePushdownEnabled: Boolean) { + + def unapply(e: Expression): Option[Predicate] = + new V2ExpressionBuilder(e, nestedPredicatePushdownEnabled, true).build().map { v => + assert(v.isInstanceOf[Predicate]) + v.asInstanceOf[Predicate] + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala index 9953658b65488..1149bff7d2da7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala @@ -22,7 +22,7 @@ import scala.collection.mutable import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, Expression, NamedExpression, PredicateHelper, SchemaPruning} import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.connector.expressions.SortOrder -import org.apache.spark.sql.connector.expressions.filter.{Filter => V2Filter} +import org.apache.spark.sql.connector.expressions.filter.Predicate import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownLimit, SupportsPushDownRequiredColumns, SupportsPushDownTableSample, SupportsPushDownTopN, SupportsPushDownV2Filters} import org.apache.spark.sql.execution.datasources.DataSourceStrategy import org.apache.spark.sql.internal.SQLConf @@ -35,9 +35,8 @@ object PushDownUtils extends PredicateHelper { * * @return pushed filter and post-scan filters. */ - def pushFilters( - scanBuilder: ScanBuilder, - filters: Seq[Expression]): (Either[Seq[sources.Filter], Seq[V2Filter]], Seq[Expression]) = { + def pushFilters(scanBuilder: ScanBuilder, filters: Seq[Expression]) + : (Either[Seq[sources.Filter], Seq[Predicate]], Seq[Expression]) = { scanBuilder match { case r: SupportsPushDownFilters => // A map from translated data source leaf node filters to original catalyst filter @@ -73,8 +72,8 @@ object PushDownUtils extends PredicateHelper { // expressions. For a `And`/`Or` predicate, it is possible that the predicate is partially // pushed down. This map can be used to construct a catalyst filter expression from the // input filter, or a superset(partial push down filter) of the input filter. - val translatedFilterToExpr = mutable.HashMap.empty[V2Filter, Expression] - val translatedFilters = mutable.ArrayBuffer.empty[V2Filter] + val translatedFilterToExpr = mutable.HashMap.empty[Predicate, Expression] + val translatedFilters = mutable.ArrayBuffer.empty[Predicate] // Catalyst filter expression that can't be translated to data source filters. val untranslatableExprs = mutable.ArrayBuffer.empty[Expression] @@ -92,10 +91,10 @@ object PushDownUtils extends PredicateHelper { // Data source filters that need to be evaluated again after scanning. which means // the data source cannot guarantee the rows returned can pass these filters. // As a result we must return it so Spark can plan an extra filter operator. - val postScanFilters = r.pushFilters(translatedFilters.toArray).map { filter => - DataSourceV2Strategy.rebuildExpressionFromFilter(filter, translatedFilterToExpr) + val postScanFilters = r.pushPredicates(translatedFilters.toArray).map { predicate => + DataSourceV2Strategy.rebuildExpressionFromFilter(predicate, translatedFilterToExpr) } - (Right(r.pushedFilters), (untranslatableExprs ++ postScanFilters).toSeq) + (Right(r.pushedPredicates), (untranslatableExprs ++ postScanFilters).toSeq) case f: FileScanBuilder => val postScanFilters = f.pushFilters(filters) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushedDownOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushedDownOperators.scala index 20ced9c17f7e0..a95b4593fc397 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushedDownOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushedDownOperators.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.sql.connector.expressions.SortOrder import org.apache.spark.sql.connector.expressions.aggregate.Aggregation +import org.apache.spark.sql.connector.expressions.filter.Predicate /** * Pushed down operators @@ -27,6 +28,7 @@ case class PushedDownOperators( aggregation: Option[Aggregation], sample: Option[TableSampleInfo], limit: Option[Int], - sortValues: Seq[SortOrder]) { + sortValues: Seq[SortOrder], + pushedPredicates: Seq[Predicate]) { assert((limit.isEmpty && sortValues.isEmpty) || limit.isDefined) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala index b4bd02773edfb..44cdff10aca45 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala @@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, LeafNode, import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.expressions.SortOrder import org.apache.spark.sql.connector.expressions.aggregate.{Aggregation, Avg, Count, GeneralAggregateFunc, Sum} +import org.apache.spark.sql.connector.expressions.filter.Predicate import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownAggregates, SupportsPushDownFilters, V1Scan} import org.apache.spark.sql.execution.datasources.DataSourceStrategy import org.apache.spark.sql.sources @@ -72,6 +73,7 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper { val pushedFiltersStr = if (pushedFilters.isLeft) { pushedFilters.left.get.mkString(", ") } else { + sHolder.pushedPredicates = pushedFilters.right.get pushedFilters.right.get.mkString(", ") } @@ -405,8 +407,8 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper { f.pushedFilters() case _ => Array.empty[sources.Filter] } - val pushedDownOperators = PushedDownOperators(aggregation, - sHolder.pushedSample, sHolder.pushedLimit, sHolder.sortOrders) + val pushedDownOperators = PushedDownOperators(aggregation, sHolder.pushedSample, + sHolder.pushedLimit, sHolder.sortOrders, sHolder.pushedPredicates) V1ScanWrapper(v1, pushedFilters, pushedDownOperators) case _ => scan } @@ -422,6 +424,8 @@ case class ScanBuilderHolder( var sortOrders: Seq[SortOrder] = Seq.empty[SortOrder] var pushedSample: Option[TableSampleInfo] = None + + var pushedPredicates: Seq[Predicate] = Seq.empty[Predicate] } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScan.scala index 87ec9f43804e4..f68f78d51fd96 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScan.scala @@ -19,16 +19,17 @@ package org.apache.spark.sql.execution.datasources.v2.jdbc import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SQLContext} import org.apache.spark.sql.connector.expressions.SortOrder +import org.apache.spark.sql.connector.expressions.filter.Predicate import org.apache.spark.sql.connector.read.V1Scan import org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo -import org.apache.spark.sql.sources.{BaseRelation, Filter, TableScan} +import org.apache.spark.sql.sources.{BaseRelation, TableScan} import org.apache.spark.sql.types.StructType case class JDBCScan( relation: JDBCRelation, prunedSchema: StructType, - pushedFilters: Array[Filter], + pushedPredicates: Array[Predicate], pushedAggregateColumn: Array[String] = Array(), groupByColumns: Option[Array[String]], tableSample: Option[TableSampleInfo], @@ -48,7 +49,7 @@ case class JDBCScan( } else { pushedAggregateColumn } - relation.buildScan(columnList, prunedSchema, pushedFilters, groupByColumns, tableSample, + relation.buildScan(columnList, prunedSchema, pushedPredicates, groupByColumns, tableSample, pushedLimit, sortOrders) } }.asInstanceOf[T] @@ -63,7 +64,7 @@ case class JDBCScan( ("[]", "[]") } super.description() + ", prunedSchema: " + seqToString(prunedSchema) + - ", PushedFilters: " + seqToString(pushedFilters) + + ", PushedPredicates: " + seqToString(pushedPredicates) + ", PushedAggregates: " + aggString + ", PushedGroupBy: " + groupByString } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala index 61bf729bc8fbf..475f563856f82 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala @@ -22,12 +22,12 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.SparkSession import org.apache.spark.sql.connector.expressions.SortOrder import org.apache.spark.sql.connector.expressions.aggregate.Aggregation -import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownAggregates, SupportsPushDownFilters, SupportsPushDownLimit, SupportsPushDownRequiredColumns, SupportsPushDownTableSample, SupportsPushDownTopN} +import org.apache.spark.sql.connector.expressions.filter.Predicate +import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownAggregates, SupportsPushDownLimit, SupportsPushDownRequiredColumns, SupportsPushDownTableSample, SupportsPushDownTopN, SupportsPushDownV2Filters} import org.apache.spark.sql.execution.datasources.PartitioningUtils import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JDBCRDD, JDBCRelation} import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo import org.apache.spark.sql.jdbc.JdbcDialects -import org.apache.spark.sql.sources.Filter import org.apache.spark.sql.types.StructType case class JDBCScanBuilder( @@ -35,7 +35,7 @@ case class JDBCScanBuilder( schema: StructType, jdbcOptions: JDBCOptions) extends ScanBuilder - with SupportsPushDownFilters + with SupportsPushDownV2Filters with SupportsPushDownRequiredColumns with SupportsPushDownAggregates with SupportsPushDownLimit @@ -45,7 +45,7 @@ case class JDBCScanBuilder( private val isCaseSensitive = session.sessionState.conf.caseSensitiveAnalysis - private var pushedFilter = Array.empty[Filter] + private var pushedPredicate = Array.empty[Predicate] private var finalSchema = schema @@ -55,18 +55,18 @@ case class JDBCScanBuilder( private var sortOrders: Array[SortOrder] = Array.empty[SortOrder] - override def pushFilters(filters: Array[Filter]): Array[Filter] = { + override def pushPredicates(predicates: Array[Predicate]): Array[Predicate] = { if (jdbcOptions.pushDownPredicate) { val dialect = JdbcDialects.get(jdbcOptions.url) - val (pushed, unSupported) = filters.partition(JDBCRDD.compileFilter(_, dialect).isDefined) - this.pushedFilter = pushed + val (pushed, unSupported) = predicates.partition(dialect.compileExpression(_).isDefined) + this.pushedPredicate = pushed unSupported } else { - filters + predicates } } - override def pushedFilters(): Array[Filter] = pushedFilter + override def pushedPredicates(): Array[Predicate] = pushedPredicate private var pushedAggregateList: Array[String] = Array() @@ -170,7 +170,7 @@ case class JDBCScanBuilder( // "DEPT","NAME",MAX("SALARY"),MIN("BONUS"), instead of getting column names from // prunedSchema and quote them (will become "MAX(SALARY)", "MIN(BONUS)" and can't // be used in sql string. - JDBCScan(JDBCRelation(schema, parts, jdbcOptions)(session), finalSchema, pushedFilter, + JDBCScan(JDBCRelation(schema, parts, jdbcOptions)(session), finalSchema, pushedPredicate, pushedAggregateList, pushedGroupByCols, tableSample, pushedLimit, sortOrders) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala index 6af5cc00ef5db..35293b38db780 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala @@ -35,27 +35,27 @@ private object DB2Dialect extends JdbcDialect { super.compileAggregate(aggFunction).orElse( aggFunction match { case f: GeneralAggregateFunc if f.name() == "VAR_POP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"VARIANCE($distinct${f.inputs().head})") + Some(s"VARIANCE($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"VARIANCE_SAMP($distinct${f.inputs().head})") + Some(s"VARIANCE_SAMP($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"STDDEV($distinct${f.inputs().head})") + Some(s"STDDEV($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"STDDEV_SAMP($distinct${f.inputs().head})") + Some(s"STDDEV_SAMP($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "COVAR_POP" && f.isDistinct == false => - assert(f.inputs().length == 2) - Some(s"COVARIANCE(${f.inputs().head}, ${f.inputs().last})") + assert(f.children().length == 2) + Some(s"COVARIANCE(${f.children().head}, ${f.children().last})") case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" && f.isDistinct == false => - assert(f.inputs().length == 2) - Some(s"COVARIANCE_SAMP(${f.inputs().head}, ${f.inputs().last})") + assert(f.children().length == 2) + Some(s"COVARIANCE_SAMP(${f.children().head}, ${f.children().last})") case _ => None } ) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala index bf838b8ed66eb..36c3c6be4a05c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala @@ -35,17 +35,17 @@ private object DerbyDialect extends JdbcDialect { super.compileAggregate(aggFunction).orElse( aggFunction match { case f: GeneralAggregateFunc if f.name() == "VAR_POP" && f.isDistinct == false => - assert(f.inputs().length == 1) - Some(s"VAR_POP(${f.inputs().head})") + assert(f.children().length == 1) + Some(s"VAR_POP(${f.children().head})") case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" && f.isDistinct == false => - assert(f.inputs().length == 1) - Some(s"VAR_SAMP(${f.inputs().head})") + assert(f.children().length == 1) + Some(s"VAR_SAMP(${f.children().head})") case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" && f.isDistinct == false => - assert(f.inputs().length == 1) - Some(s"STDDEV_POP(${f.inputs().head})") + assert(f.children().length == 1) + Some(s"STDDEV_POP(${f.children().head})") case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" && f.isDistinct == false => - assert(f.inputs().length == 1) - Some(s"STDDEV_SAMP(${f.inputs().head})") + assert(f.children().length == 1) + Some(s"STDDEV_SAMP(${f.children().head})") case _ => None } ) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala index 7bd51f809cd04..643376cdb126a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala @@ -32,33 +32,33 @@ private object H2Dialect extends JdbcDialect { super.compileAggregate(aggFunction).orElse( aggFunction match { case f: GeneralAggregateFunc if f.name() == "VAR_POP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"VAR_POP($distinct${f.inputs().head})") + Some(s"VAR_POP($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"VAR_SAMP($distinct${f.inputs().head})") + Some(s"VAR_SAMP($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"STDDEV_POP($distinct${f.inputs().head})") + Some(s"STDDEV_POP($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"STDDEV_SAMP($distinct${f.inputs().head})") + Some(s"STDDEV_SAMP($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "COVAR_POP" => - assert(f.inputs().length == 2) + assert(f.children().length == 2) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"COVAR_POP($distinct${f.inputs().head}, ${f.inputs().last})") + Some(s"COVAR_POP($distinct${f.children().head}, ${f.children().last})") case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" => - assert(f.inputs().length == 2) + assert(f.children().length == 2) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"COVAR_SAMP($distinct${f.inputs().head}, ${f.inputs().last})") + Some(s"COVAR_SAMP($distinct${f.children().head}, ${f.children().last})") case f: GeneralAggregateFunc if f.name() == "CORR" => - assert(f.inputs().length == 2) + assert(f.children().length == 2) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"CORR($distinct${f.inputs().head}, ${f.inputs().last})") + Some(s"CORR($distinct${f.children().head}, ${f.children().last})") case _ => None } ) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index c9dcbb2706cd4..4b28de26b59e4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -22,17 +22,19 @@ import java.time.{Instant, LocalDate} import java.util import scala.collection.mutable.ArrayBuilder +import scala.util.control.NonFatal import org.apache.commons.lang3.StringUtils import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.internal.Logging import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.CatalystTypeConverters import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter} import org.apache.spark.sql.connector.catalog.TableChange import org.apache.spark.sql.connector.catalog.TableChange._ import org.apache.spark.sql.connector.catalog.index.TableIndex -import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, NamedReference} +import org.apache.spark.sql.connector.expressions.{Expression, Literal, NamedReference} import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Avg, Count, CountStar, Max, Min, Sum} import org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder import org.apache.spark.sql.errors.QueryCompilationErrors @@ -220,12 +222,18 @@ abstract class JdbcDialect extends Serializable with Logging{ } class JDBCSQLBuilder extends V2ExpressionSQLBuilder { - override def visitFieldReference(fieldRef: FieldReference): String = { - if (fieldRef.fieldNames().length != 1) { + override def visitLiteral(literal: Literal[_]): String = { + compileValue( + CatalystTypeConverters.convertToScala(literal.value(), literal.dataType())).toString + } + + override def visitNamedReference(namedRef: NamedReference): String = { + if (namedRef.fieldNames().length > 1) { throw new IllegalArgumentException( - "FieldReference with field name has multiple or zero parts unsupported: " + fieldRef); + QueryCompilationErrors.commandNotSupportNestedColumnError( + "Filter push down", namedRef.toString).getMessage); } - quoteIdentifier(fieldRef.fieldNames.head) + quoteIdentifier(namedRef.fieldNames.head) } } @@ -240,7 +248,9 @@ abstract class JdbcDialect extends Serializable with Logging{ try { Some(jdbcSQLBuilder.build(expr)) } catch { - case _: IllegalArgumentException => None + case NonFatal(e) => + logWarning("Error occurs while compiling V2 expression", e) + None } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala index 841f1c87319b5..8d2fbec55f919 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala @@ -47,21 +47,21 @@ private object MsSqlServerDialect extends JdbcDialect { super.compileAggregate(aggFunction).orElse( aggFunction match { case f: GeneralAggregateFunc if f.name() == "VAR_POP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"VARP($distinct${f.inputs().head})") + Some(s"VARP($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"VAR($distinct${f.inputs().head})") + Some(s"VAR($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"STDEVP($distinct${f.inputs().head})") + Some(s"STDEVP($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"STDEV($distinct${f.inputs().head})") + Some(s"STDEV($distinct${f.children().head})") case _ => None } ) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala index d73721de962d7..24f9bac74f86d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala @@ -43,17 +43,17 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper { super.compileAggregate(aggFunction).orElse( aggFunction match { case f: GeneralAggregateFunc if f.name() == "VAR_POP" && f.isDistinct == false => - assert(f.inputs().length == 1) - Some(s"VAR_POP(${f.inputs().head})") + assert(f.children().length == 1) + Some(s"VAR_POP(${f.children().head})") case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" && f.isDistinct == false => - assert(f.inputs().length == 1) - Some(s"VAR_SAMP(${f.inputs().head})") + assert(f.children().length == 1) + Some(s"VAR_SAMP(${f.children().head})") case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" && f.isDistinct == false => - assert(f.inputs().length == 1) - Some(s"STDDEV_POP(${f.inputs().head})") + assert(f.children().length == 1) + Some(s"STDDEV_POP(${f.children().head})") case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" && f.isDistinct == false => - assert(f.inputs().length == 1) - Some(s"STDDEV_SAMP(${f.inputs().head})") + assert(f.children().length == 1) + Some(s"STDDEV_SAMP(${f.children().head})") case _ => None } ) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala index 71db7e9285f5e..40333c1757c4a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala @@ -41,26 +41,26 @@ private case object OracleDialect extends JdbcDialect { super.compileAggregate(aggFunction).orElse( aggFunction match { case f: GeneralAggregateFunc if f.name() == "VAR_POP" && f.isDistinct == false => - assert(f.inputs().length == 1) - Some(s"VAR_POP(${f.inputs().head})") + assert(f.children().length == 1) + Some(s"VAR_POP(${f.children().head})") case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" && f.isDistinct == false => - assert(f.inputs().length == 1) - Some(s"VAR_SAMP(${f.inputs().head})") + assert(f.children().length == 1) + Some(s"VAR_SAMP(${f.children().head})") case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" && f.isDistinct == false => - assert(f.inputs().length == 1) - Some(s"STDDEV_POP(${f.inputs().head})") + assert(f.children().length == 1) + Some(s"STDDEV_POP(${f.children().head})") case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" && f.isDistinct == false => - assert(f.inputs().length == 1) - Some(s"STDDEV_SAMP(${f.inputs().head})") + assert(f.children().length == 1) + Some(s"STDDEV_SAMP(${f.children().head})") case f: GeneralAggregateFunc if f.name() == "COVAR_POP" && f.isDistinct == false => - assert(f.inputs().length == 2) - Some(s"COVAR_POP(${f.inputs().head}, ${f.inputs().last})") + assert(f.children().length == 2) + Some(s"COVAR_POP(${f.children().head}, ${f.children().last})") case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" && f.isDistinct == false => - assert(f.inputs().length == 2) - Some(s"COVAR_SAMP(${f.inputs().head}, ${f.inputs().last})") + assert(f.children().length == 2) + Some(s"COVAR_SAMP(${f.children().head}, ${f.children().last})") case f: GeneralAggregateFunc if f.name() == "CORR" && f.isDistinct == false => - assert(f.inputs().length == 2) - Some(s"CORR(${f.inputs().head}, ${f.inputs().last})") + assert(f.children().length == 2) + Some(s"CORR(${f.children().head}, ${f.children().last})") case _ => None } ) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala index e2023d110ae4b..a668d66ee2f9a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala @@ -41,33 +41,33 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper { super.compileAggregate(aggFunction).orElse( aggFunction match { case f: GeneralAggregateFunc if f.name() == "VAR_POP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"VAR_POP($distinct${f.inputs().head})") + Some(s"VAR_POP($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"VAR_SAMP($distinct${f.inputs().head})") + Some(s"VAR_SAMP($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"STDDEV_POP($distinct${f.inputs().head})") + Some(s"STDDEV_POP($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"STDDEV_SAMP($distinct${f.inputs().head})") + Some(s"STDDEV_SAMP($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "COVAR_POP" => - assert(f.inputs().length == 2) + assert(f.children().length == 2) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"COVAR_POP($distinct${f.inputs().head}, ${f.inputs().last})") + Some(s"COVAR_POP($distinct${f.children().head}, ${f.children().last})") case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" => - assert(f.inputs().length == 2) + assert(f.children().length == 2) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"COVAR_SAMP($distinct${f.inputs().head}, ${f.inputs().last})") + Some(s"COVAR_SAMP($distinct${f.children().head}, ${f.children().last})") case f: GeneralAggregateFunc if f.name() == "CORR" => - assert(f.inputs().length == 2) + assert(f.children().length == 2) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"CORR($distinct${f.inputs().head}, ${f.inputs().last})") + Some(s"CORR($distinct${f.children().head}, ${f.children().last})") case _ => None } ) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala index 13e16d24d048d..79fb710cf03b3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala @@ -35,30 +35,30 @@ private case object TeradataDialect extends JdbcDialect { super.compileAggregate(aggFunction).orElse( aggFunction match { case f: GeneralAggregateFunc if f.name() == "VAR_POP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"VAR_POP($distinct${f.inputs().head})") + Some(s"VAR_POP($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"VAR_SAMP($distinct${f.inputs().head})") + Some(s"VAR_SAMP($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"STDDEV_POP($distinct${f.inputs().head})") + Some(s"STDDEV_POP($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" => - assert(f.inputs().length == 1) + assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"STDDEV_SAMP($distinct${f.inputs().head})") + Some(s"STDDEV_SAMP($distinct${f.children().head})") case f: GeneralAggregateFunc if f.name() == "COVAR_POP" && f.isDistinct == false => - assert(f.inputs().length == 2) - Some(s"COVAR_POP(${f.inputs().head}, ${f.inputs().last})") + assert(f.children().length == 2) + Some(s"COVAR_POP(${f.children().head}, ${f.children().last})") case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" && f.isDistinct == false => - assert(f.inputs().length == 2) - Some(s"COVAR_SAMP(${f.inputs().head}, ${f.inputs().last})") + assert(f.children().length == 2) + Some(s"COVAR_SAMP(${f.children().head}, ${f.children().last})") case f: GeneralAggregateFunc if f.name() == "CORR" && f.isDistinct == false => - assert(f.inputs().length == 2) - Some(s"CORR(${f.inputs().head}, ${f.inputs().last})") + assert(f.children().length == 2) + Some(s"CORR(${f.children().head}, ${f.children().last})") case _ => None } ) diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaAdvancedDataSourceV2WithV2Filter.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaAdvancedDataSourceV2WithV2Filter.java index b92206c6a5444..ec532da61042f 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaAdvancedDataSourceV2WithV2Filter.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaAdvancedDataSourceV2WithV2Filter.java @@ -17,21 +17,23 @@ package test.org.apache.spark.sql.connector; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.catalyst.expressions.GenericInternalRow; import org.apache.spark.sql.connector.TestingV2Source; import org.apache.spark.sql.connector.catalog.Table; -import org.apache.spark.sql.connector.expressions.filter.Filter; +import org.apache.spark.sql.connector.expressions.FieldReference; +import org.apache.spark.sql.connector.expressions.Literal; +import org.apache.spark.sql.connector.expressions.LiteralValue; +import org.apache.spark.sql.connector.expressions.filter.Predicate; import org.apache.spark.sql.connector.read.*; -import org.apache.spark.sql.connector.expressions.filter.GreaterThan; import org.apache.spark.sql.types.StructType; import org.apache.spark.sql.util.CaseInsensitiveStringMap; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - public class JavaAdvancedDataSourceV2WithV2Filter implements TestingV2Source { @Override @@ -48,7 +50,7 @@ static class AdvancedScanBuilderWithV2Filter implements ScanBuilder, Scan, SupportsPushDownV2Filters, SupportsPushDownRequiredColumns { private StructType requiredSchema = TestingV2Source.schema(); - private Filter[] filters = new Filter[0]; + private Predicate[] predicates = new Predicate[0]; @Override public void pruneColumns(StructType requiredSchema) { @@ -61,32 +63,38 @@ public StructType readSchema() { } @Override - public Filter[] pushFilters(Filter[] filters) { - Filter[] supported = Arrays.stream(filters).filter(f -> { - if (f instanceof GreaterThan) { - GreaterThan gt = (GreaterThan) f; - return gt.column().describe().equals("i") && gt.value().value() instanceof Integer; + public Predicate[] pushPredicates(Predicate[] predicates) { + Predicate[] supported = Arrays.stream(predicates).filter(f -> { + if (f.name().equals(">")) { + assert(f.children()[0] instanceof FieldReference); + FieldReference column = (FieldReference) f.children()[0]; + assert(f.children()[1] instanceof LiteralValue); + Literal value = (Literal) f.children()[1]; + return column.describe().equals("i") && value.value() instanceof Integer; } else { return false; } - }).toArray(Filter[]::new); - - Filter[] unsupported = Arrays.stream(filters).filter(f -> { - if (f instanceof GreaterThan) { - GreaterThan gt = (GreaterThan) f; - return !gt.column().describe().equals("i") || !(gt.value().value() instanceof Integer); + }).toArray(Predicate[]::new); + + Predicate[] unsupported = Arrays.stream(predicates).filter(f -> { + if (f.name().equals(">")) { + assert(f.children()[0] instanceof FieldReference); + FieldReference column = (FieldReference) f.children()[0]; + assert(f.children()[1] instanceof LiteralValue); + Literal value = (LiteralValue) f.children()[1]; + return !column.describe().equals("i") || !(value.value() instanceof Integer); } else { return true; } - }).toArray(Filter[]::new); + }).toArray(Predicate[]::new); - this.filters = supported; + this.predicates = supported; return unsupported; } @Override - public Filter[] pushedFilters() { - return filters; + public Predicate[] pushedPredicates() { + return predicates; } @Override @@ -96,18 +104,18 @@ public Scan build() { @Override public Batch toBatch() { - return new AdvancedBatchWithV2Filter(requiredSchema, filters); + return new AdvancedBatchWithV2Filter(requiredSchema, predicates); } } public static class AdvancedBatchWithV2Filter implements Batch { // Exposed for testing. public StructType requiredSchema; - public Filter[] filters; + public Predicate[] predicates; - AdvancedBatchWithV2Filter(StructType requiredSchema, Filter[] filters) { + AdvancedBatchWithV2Filter(StructType requiredSchema, Predicate[] predicates) { this.requiredSchema = requiredSchema; - this.filters = filters; + this.predicates = predicates; } @Override @@ -115,11 +123,14 @@ public InputPartition[] planInputPartitions() { List res = new ArrayList<>(); Integer lowerBound = null; - for (Filter filter : filters) { - if (filter instanceof GreaterThan) { - GreaterThan f = (GreaterThan) filter; - if ("i".equals(f.column().describe()) && f.value().value() instanceof Integer) { - lowerBound = (Integer) f.value().value(); + for (Predicate predicate : predicates) { + if (predicate.name().equals(">")) { + assert(predicate.children()[0] instanceof FieldReference); + FieldReference column = (FieldReference) predicate.children()[0]; + assert(predicate.children()[1] instanceof LiteralValue); + Literal value = (Literal) predicate.children()[1]; + if ("i".equals(column.describe()) && value.value() instanceof Integer) { + lowerBound = (Integer) value.value(); break; } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala index 8f37e42b167be..b7533c6a09fda 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala @@ -27,8 +27,8 @@ import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.connector.catalog.{SupportsRead, Table, TableCapability, TableProvider} import org.apache.spark.sql.connector.catalog.TableCapability._ -import org.apache.spark.sql.connector.expressions.Transform -import org.apache.spark.sql.connector.expressions.filter.{Filter => V2Filter, GreaterThan => V2GreaterThan} +import org.apache.spark.sql.connector.expressions.{Literal, Transform} +import org.apache.spark.sql.connector.expressions.filter.Predicate import org.apache.spark.sql.connector.read._ import org.apache.spark.sql.connector.read.partitioning.{ClusteredDistribution, Distribution, Partitioning} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper @@ -155,11 +155,11 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS checkAnswer(q1, (0 until 10).map(i => Row(-i))) if (cls == classOf[AdvancedDataSourceV2WithV2Filter]) { val batch = getBatchWithV2Filter(q1) - assert(batch.filters.isEmpty) + assert(batch.predicates.isEmpty) assert(batch.requiredSchema.fieldNames === Seq("j")) } else { val batch = getJavaBatchWithV2Filter(q1) - assert(batch.filters.isEmpty) + assert(batch.predicates.isEmpty) assert(batch.requiredSchema.fieldNames === Seq("j")) } @@ -167,11 +167,11 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS checkAnswer(q2, (4 until 10).map(i => Row(i, -i))) if (cls == classOf[AdvancedDataSourceV2WithV2Filter]) { val batch = getBatchWithV2Filter(q2) - assert(batch.filters.flatMap(_.references.map(_.describe)).toSet == Set("i")) + assert(batch.predicates.flatMap(_.references.map(_.describe)).toSet == Set("i")) assert(batch.requiredSchema.fieldNames === Seq("i", "j")) } else { val batch = getJavaBatchWithV2Filter(q2) - assert(batch.filters.flatMap(_.references.map(_.describe)).toSet == Set("i")) + assert(batch.predicates.flatMap(_.references.map(_.describe)).toSet == Set("i")) assert(batch.requiredSchema.fieldNames === Seq("i", "j")) } @@ -179,11 +179,11 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS checkAnswer(q3, (7 until 10).map(i => Row(i))) if (cls == classOf[AdvancedDataSourceV2WithV2Filter]) { val batch = getBatchWithV2Filter(q3) - assert(batch.filters.flatMap(_.references.map(_.describe)).toSet == Set("i")) + assert(batch.predicates.flatMap(_.references.map(_.describe)).toSet == Set("i")) assert(batch.requiredSchema.fieldNames === Seq("i")) } else { val batch = getJavaBatchWithV2Filter(q3) - assert(batch.filters.flatMap(_.references.map(_.describe)).toSet == Set("i")) + assert(batch.predicates.flatMap(_.references.map(_.describe)).toSet == Set("i")) assert(batch.requiredSchema.fieldNames === Seq("i")) } @@ -192,12 +192,12 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS if (cls == classOf[AdvancedDataSourceV2WithV2Filter]) { val batch = getBatchWithV2Filter(q4) // Symbol("j") < 10 is not supported by the testing data source. - assert(batch.filters.isEmpty) + assert(batch.predicates.isEmpty) assert(batch.requiredSchema.fieldNames === Seq("j")) } else { val batch = getJavaBatchWithV2Filter(q4) // Symbol("j") < 10 is not supported by the testing data source. - assert(batch.filters.isEmpty) + assert(batch.predicates.isEmpty) assert(batch.requiredSchema.fieldNames === Seq("j")) } } @@ -704,7 +704,7 @@ class AdvancedScanBuilderWithV2Filter extends ScanBuilder with Scan with SupportsPushDownV2Filters with SupportsPushDownRequiredColumns { var requiredSchema = TestingV2Source.schema - var filters = Array.empty[V2Filter] + var predicates = Array.empty[Predicate] override def pruneColumns(requiredSchema: StructType): Unit = { this.requiredSchema = requiredSchema @@ -712,29 +712,32 @@ class AdvancedScanBuilderWithV2Filter extends ScanBuilder override def readSchema(): StructType = requiredSchema - override def pushFilters(filters: Array[V2Filter]): Array[V2Filter] = { - val (supported, unsupported) = filters.partition { - case _: V2GreaterThan => true + override def pushPredicates(predicates: Array[Predicate]): Array[Predicate] = { + val (supported, unsupported) = predicates.partition { + case p: Predicate if p.name() == ">" => true case _ => false } - this.filters = supported + this.predicates = supported unsupported } - override def pushedFilters(): Array[V2Filter] = filters + override def pushedPredicates(): Array[Predicate] = predicates override def build(): Scan = this - override def toBatch: Batch = new AdvancedBatchWithV2Filter(filters, requiredSchema) + override def toBatch: Batch = new AdvancedBatchWithV2Filter(predicates, requiredSchema) } class AdvancedBatchWithV2Filter( - val filters: Array[V2Filter], + val predicates: Array[Predicate], val requiredSchema: StructType) extends Batch { override def planInputPartitions(): Array[InputPartition] = { - val lowerBound = filters.collectFirst { - case gt: V2GreaterThan => gt.value + val lowerBound = predicates.collectFirst { + case p: Predicate if p.name().equals(">") => + val value = p.children()(1) + assert(value.isInstanceOf[Literal[_]]) + value.asInstanceOf[Literal[_]] } val res = scala.collection.mutable.ArrayBuffer.empty[InputPartition] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala index 0fb6fc58c400d..6296da47cca51 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala @@ -21,21 +21,21 @@ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue} -import org.apache.spark.sql.connector.expressions.filter.{EqualTo => V2EqualTo, Filter => V2Filter} +import org.apache.spark.sql.connector.expressions.filter.Predicate import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.BooleanType class DataSourceV2StrategySuite extends PlanTest with SharedSparkSession { test("SPARK-36644: Push down boolean column filter") { testTranslateFilter(Symbol("col").boolean, - Some(new V2EqualTo(FieldReference("col"), LiteralValue(true, BooleanType)))) + Some(new Predicate("=", Array(FieldReference("col"), LiteralValue(true, BooleanType))))) } /** - * Translate the given Catalyst [[Expression]] into data source [[V2Filter]] - * then verify against the given [[V2Filter]]. + * Translate the given Catalyst [[Expression]] into data source V2 [[Predicate]] + * then verify against the given [[Predicate]]. */ - def testTranslateFilter(catalystFilter: Expression, result: Option[V2Filter]): Unit = { + def testTranslateFilter(catalystFilter: Expression, result: Option[Predicate]): Unit = { assertResult(result) { DataSourceV2Strategy.translateFilterV2(catalystFilter, true) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2FiltersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2FiltersSuite.scala deleted file mode 100644 index b457211b7f89f..0000000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2FiltersSuite.scala +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.datasources.v2 - -import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.connector.expressions.{FieldReference, Literal, LiteralValue} -import org.apache.spark.sql.connector.expressions.filter._ -import org.apache.spark.sql.execution.datasources.v2.FiltersV2Suite.ref -import org.apache.spark.sql.types.IntegerType -import org.apache.spark.unsafe.types.UTF8String - -class FiltersV2Suite extends SparkFunSuite { - - test("nested columns") { - val filter1 = new EqualTo(ref("a", "B"), LiteralValue(1, IntegerType)) - assert(filter1.references.map(_.describe()).toSeq == Seq("a.B")) - assert(filter1.describe.equals("a.B = 1")) - - val filter2 = new EqualTo(ref("a", "b.c"), LiteralValue(1, IntegerType)) - assert(filter2.references.map(_.describe()).toSeq == Seq("a.`b.c`")) - assert(filter2.describe.equals("a.`b.c` = 1")) - - val filter3 = new EqualTo(ref("`a`.b", "c"), LiteralValue(1, IntegerType)) - assert(filter3.references.map(_.describe()).toSeq == Seq("```a``.b`.c")) - assert(filter3.describe.equals("```a``.b`.c = 1")) - } - - test("AlwaysTrue") { - val filter1 = new AlwaysTrue - val filter2 = new AlwaysTrue - assert(filter1.equals(filter2)) - assert(filter1.references.map(_.describe()).length == 0) - assert(filter1.describe.equals("TRUE")) - } - - test("AlwaysFalse") { - val filter1 = new AlwaysFalse - val filter2 = new AlwaysFalse - assert(filter1.equals(filter2)) - assert(filter1.references.map(_.describe()).length == 0) - assert(filter1.describe.equals("FALSE")) - } - - test("EqualTo") { - val filter1 = new EqualTo(ref("a"), LiteralValue(1, IntegerType)) - val filter2 = new EqualTo(ref("a"), LiteralValue(1, IntegerType)) - assert(filter1.equals(filter2)) - assert(filter1.references.map(_.describe()).toSeq == Seq("a")) - assert(filter1.describe.equals("a = 1")) - } - - test("EqualNullSafe") { - val filter1 = new EqualNullSafe(ref("a"), LiteralValue(1, IntegerType)) - val filter2 = new EqualNullSafe(ref("a"), LiteralValue(1, IntegerType)) - assert(filter1.equals(filter2)) - assert(filter1.references.map(_.describe()).toSeq == Seq("a")) - assert(filter1.describe.equals("a <=> 1")) - } - - test("GreaterThan") { - val filter1 = new GreaterThan(ref("a"), LiteralValue(1, IntegerType)) - val filter2 = new GreaterThan(ref("a"), LiteralValue(1, IntegerType)) - assert(filter1.equals(filter2)) - assert(filter1.references.map(_.describe()).toSeq == Seq("a")) - assert(filter1.describe.equals("a > 1")) - } - - test("GreaterThanOrEqual") { - val filter1 = new GreaterThanOrEqual(ref("a"), LiteralValue(1, IntegerType)) - val filter2 = new GreaterThanOrEqual(ref("a"), LiteralValue(1, IntegerType)) - assert(filter1.equals(filter2)) - assert(filter1.references.map(_.describe()).toSeq == Seq("a")) - assert(filter1.describe.equals("a >= 1")) - } - - test("LessThan") { - val filter1 = new LessThan(ref("a"), LiteralValue(1, IntegerType)) - val filter2 = new LessThan(ref("a"), LiteralValue(1, IntegerType)) - assert(filter1.equals(filter2)) - assert(filter1.references.map(_.describe()).toSeq == Seq("a")) - assert(filter1.describe.equals("a < 1")) - } - - test("LessThanOrEqual") { - val filter1 = new LessThanOrEqual(ref("a"), LiteralValue(1, IntegerType)) - val filter2 = new LessThanOrEqual(ref("a"), LiteralValue(1, IntegerType)) - assert(filter1.equals(filter2)) - assert(filter1.references.map(_.describe()).toSeq == Seq("a")) - assert(filter1.describe.equals("a <= 1")) - } - - test("In") { - val filter1 = new In(ref("a"), - Array(LiteralValue(1, IntegerType), LiteralValue(2, IntegerType), - LiteralValue(3, IntegerType), LiteralValue(4, IntegerType))) - val filter2 = new In(ref("a"), - Array(LiteralValue(4, IntegerType), LiteralValue(2, IntegerType), - LiteralValue(3, IntegerType), LiteralValue(1, IntegerType))) - assert(filter1.equals(filter2)) - assert(filter1.references.map(_.describe()).toSeq == Seq("a")) - assert(filter1.describe.equals("a IN (1, 2, 3, 4)")) - val values: Array[Literal[_]] = new Array[Literal[_]](1000) - for (i <- 0 until 1000) { - values(i) = LiteralValue(i, IntegerType) - } - val filter3 = new In(ref("a"), values) - var expected = "a IN (" - for (i <- 0 until 50) { - expected += i + ", " - } - expected = expected.dropRight(2) // remove the last ", " - expected += "...)" - assert(filter3.describe.equals(expected)) - } - - test("IsNull") { - val filter1 = new IsNull(ref("a")) - val filter2 = new IsNull(ref("a")) - assert(filter1.equals(filter2)) - assert(filter1.references.map(_.describe()).toSeq == Seq("a")) - assert(filter1.describe.equals("a IS NULL")) - } - - test("IsNotNull") { - val filter1 = new IsNotNull(ref("a")) - val filter2 = new IsNotNull(ref("a")) - assert(filter1.equals(filter2)) - assert(filter1.references.map(_.describe()).toSeq == Seq("a")) - assert(filter1.describe.equals("a IS NOT NULL")) - } - - test("Not") { - val filter1 = new Not(new LessThan(ref("a"), LiteralValue(1, IntegerType))) - val filter2 = new Not(new LessThan(ref("a"), LiteralValue(1, IntegerType))) - assert(filter1.equals(filter2)) - assert(filter1.references.map(_.describe()).toSeq == Seq("a")) - assert(filter1.describe.equals("NOT (a < 1)")) - } - - test("And") { - val filter1 = new And(new EqualTo(ref("a"), LiteralValue(1, IntegerType)), - new EqualTo(ref("b"), LiteralValue(1, IntegerType))) - val filter2 = new And(new EqualTo(ref("a"), LiteralValue(1, IntegerType)), - new EqualTo(ref("b"), LiteralValue(1, IntegerType))) - assert(filter1.equals(filter2)) - assert(filter1.references.map(_.describe()).toSeq == Seq("a", "b")) - assert(filter1.describe.equals("(a = 1) AND (b = 1)")) - } - - test("Or") { - val filter1 = new Or(new EqualTo(ref("a"), LiteralValue(1, IntegerType)), - new EqualTo(ref("b"), LiteralValue(1, IntegerType))) - val filter2 = new Or(new EqualTo(ref("a"), LiteralValue(1, IntegerType)), - new EqualTo(ref("b"), LiteralValue(1, IntegerType))) - assert(filter1.equals(filter2)) - assert(filter1.references.map(_.describe()).toSeq == Seq("a", "b")) - assert(filter1.describe.equals("(a = 1) OR (b = 1)")) - } - - test("StringStartsWith") { - val filter1 = new StringStartsWith(ref("a"), UTF8String.fromString("str")) - val filter2 = new StringStartsWith(ref("a"), UTF8String.fromString("str")) - assert(filter1.equals(filter2)) - assert(filter1.references.map(_.describe()).toSeq == Seq("a")) - assert(filter1.describe.equals("STRING_STARTS_WITH(a, str)")) - } - - test("StringEndsWith") { - val filter1 = new StringEndsWith(ref("a"), UTF8String.fromString("str")) - val filter2 = new StringEndsWith(ref("a"), UTF8String.fromString("str")) - assert(filter1.equals(filter2)) - assert(filter1.references.map(_.describe()).toSeq == Seq("a")) - assert(filter1.describe.equals("STRING_ENDS_WITH(a, str)")) - } - - test("StringContains") { - val filter1 = new StringContains(ref("a"), UTF8String.fromString("str")) - val filter2 = new StringContains(ref("a"), UTF8String.fromString("str")) - assert(filter1.equals(filter2)) - assert(filter1.references.map(_.describe()).toSeq == Seq("a")) - assert(filter1.describe.equals("STRING_CONTAINS(a, str)")) - } -} - -object FiltersV2Suite { - private[sql] def ref(parts: String*): FieldReference = { - new FieldReference(parts) - } -} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala new file mode 100644 index 0000000000000..2d6e6fcf16174 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, Literal, LiteralValue} +import org.apache.spark.sql.connector.expressions.filter._ +import org.apache.spark.sql.execution.datasources.v2.V2PredicateSuite.ref +import org.apache.spark.sql.types.{IntegerType, StringType} +import org.apache.spark.unsafe.types.UTF8String + +class V2PredicateSuite extends SparkFunSuite { + + test("nested columns") { + val predicate1 = + new Predicate("=", Array[Expression](ref("a", "B"), LiteralValue(1, IntegerType))) + assert(predicate1.references.map(_.describe()).toSeq == Seq("a.B")) + assert(predicate1.describe.equals("a.B = 1")) + + val predicate2 = + new Predicate("=", Array[Expression](ref("a", "b.c"), LiteralValue(1, IntegerType))) + assert(predicate2.references.map(_.describe()).toSeq == Seq("a.`b.c`")) + assert(predicate2.describe.equals("a.`b.c` = 1")) + + val predicate3 = + new Predicate("=", Array[Expression](ref("`a`.b", "c"), LiteralValue(1, IntegerType))) + assert(predicate3.references.map(_.describe()).toSeq == Seq("```a``.b`.c")) + assert(predicate3.describe.equals("```a``.b`.c = 1")) + } + + test("AlwaysTrue") { + val predicate1 = new AlwaysTrue + val predicate2 = new AlwaysTrue + assert(predicate1.equals(predicate2)) + assert(predicate1.references.map(_.describe()).length == 0) + assert(predicate1.describe.equals("TRUE")) + } + + test("AlwaysFalse") { + val predicate1 = new AlwaysFalse + val predicate2 = new AlwaysFalse + assert(predicate1.equals(predicate2)) + assert(predicate1.references.map(_.describe()).length == 0) + assert(predicate1.describe.equals("FALSE")) + } + + test("EqualTo") { + val predicate1 = new Predicate("=", Array[Expression](ref("a"), LiteralValue(1, IntegerType))) + val predicate2 = new Predicate("=", Array[Expression](ref("a"), LiteralValue(1, IntegerType))) + assert(predicate1.equals(predicate2)) + assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) + assert(predicate1.describe.equals("a = 1")) + } + + test("EqualNullSafe") { + val predicate1 = new Predicate("<=>", Array[Expression](ref("a"), LiteralValue(1, IntegerType))) + val predicate2 = new Predicate("<=>", Array[Expression](ref("a"), LiteralValue(1, IntegerType))) + assert(predicate1.equals(predicate2)) + assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) + assert(predicate1.describe.equals("(a = 1) OR (a IS NULL AND 1 IS NULL)")) + } + + test("In") { + val predicate1 = new Predicate("IN", + Array(ref("a"), LiteralValue(1, IntegerType), LiteralValue(2, IntegerType), + LiteralValue(3, IntegerType), LiteralValue(4, IntegerType))) + val predicate2 = new Predicate("IN", + Array(ref("a"), LiteralValue(4, IntegerType), LiteralValue(2, IntegerType), + LiteralValue(3, IntegerType), LiteralValue(1, IntegerType))) + assert(!predicate1.equals(predicate2)) + assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) + assert(predicate1.describe.equals("a IN (1, 2, 3, 4)")) + val values: Array[Literal[_]] = new Array[Literal[_]](1000) + var expected = "a IN (" + for (i <- 0 until 1000) { + values(i) = LiteralValue(i, IntegerType) + expected += i + ", " + } + val predicate3 = new Predicate("IN", (ref("a") +: values).toArray[Expression]) + expected = expected.dropRight(2) // remove the last ", " + expected += ")" + assert(predicate3.describe.equals(expected)) + } + + test("IsNull") { + val predicate1 = new Predicate("IS_NULL", Array[Expression](ref("a"))) + val predicate2 = new Predicate("IS_NULL", Array[Expression](ref("a"))) + assert(predicate1.equals(predicate2)) + assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) + assert(predicate1.describe.equals("a IS NULL")) + } + + test("IsNotNull") { + val predicate1 = new Predicate("IS_NOT_NULL", Array[Expression](ref("a"))) + val predicate2 = new Predicate("IS_NOT_NULL", Array[Expression](ref("a"))) + assert(predicate1.equals(predicate2)) + assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) + assert(predicate1.describe.equals("a IS NOT NULL")) + } + + test("Not") { + val predicate1 = new Not( + new Predicate("<", Array[Expression](ref("a"), LiteralValue(1, IntegerType)))) + val predicate2 = new Not( + new Predicate("<", Array[Expression](ref("a"), LiteralValue(1, IntegerType)))) + assert(predicate1.equals(predicate2)) + assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) + assert(predicate1.describe.equals("NOT (a < 1)")) + } + + test("And") { + val predicate1 = new And( + new Predicate("=", Array[Expression](ref("a"), LiteralValue(1, IntegerType))), + new Predicate("=", Array[Expression](ref("b"), LiteralValue(1, IntegerType)))) + val predicate2 = new And( + new Predicate("=", Array[Expression](ref("a"), LiteralValue(1, IntegerType))), + new Predicate("=", Array[Expression](ref("b"), LiteralValue(1, IntegerType)))) + assert(predicate1.equals(predicate2)) + assert(predicate1.references.map(_.describe()).toSeq == Seq("a", "b")) + assert(predicate1.describe.equals("(a = 1) AND (b = 1)")) + } + + test("Or") { + val predicate1 = new Or( + new Predicate("=", Array[Expression](ref("a"), LiteralValue(1, IntegerType))), + new Predicate("=", Array[Expression](ref("b"), LiteralValue(1, IntegerType)))) + val predicate2 = new Or( + new Predicate("=", Array[Expression](ref("a"), LiteralValue(1, IntegerType))), + new Predicate("=", Array[Expression](ref("b"), LiteralValue(1, IntegerType)))) + assert(predicate1.equals(predicate2)) + assert(predicate1.references.map(_.describe()).toSeq == Seq("a", "b")) + assert(predicate1.describe.equals("(a = 1) OR (b = 1)")) + } + + test("StringStartsWith") { + val literal = LiteralValue(UTF8String.fromString("str"), StringType) + val predicate1 = new Predicate("STARTS_WITH", + Array[Expression](ref("a"), literal)) + val predicate2 = new Predicate("STARTS_WITH", + Array[Expression](ref("a"), literal)) + assert(predicate1.equals(predicate2)) + assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) + assert(predicate1.describe.equals("a LIKE 'str%'")) + } + + test("StringEndsWith") { + val literal = LiteralValue(UTF8String.fromString("str"), StringType) + val predicate1 = new Predicate("ENDS_WITH", + Array[Expression](ref("a"), literal)) + val predicate2 = new Predicate("ENDS_WITH", + Array[Expression](ref("a"), literal)) + assert(predicate1.equals(predicate2)) + assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) + assert(predicate1.describe.equals("a LIKE '%str'")) + } + + test("StringContains") { + val literal = LiteralValue(UTF8String.fromString("str"), StringType) + val predicate1 = new Predicate("CONTAINS", + Array[Expression](ref("a"), literal)) + val predicate2 = new Predicate("CONTAINS", + Array[Expression](ref("a"), literal)) + assert(predicate1.equals(predicate2)) + assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) + assert(predicate1.describe.equals("a LIKE '%str%'")) + } +} + +object V2PredicateSuite { + private[sql] def ref(parts: String*): FieldReference = { + new FieldReference(parts) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index d32e958c7ca2b..b1f5fd00868db 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeTestUtils import org.apache.spark.sql.execution.{DataSourceScanExec, ExtendedMode} import org.apache.spark.sql.execution.command.{ExplainCommand, ShowCreateTableCommand} import org.apache.spark.sql.execution.datasources.LogicalRelation -import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JDBCPartition, JDBCRDD, JDBCRelation, JdbcUtils} +import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JDBCPartition, JDBCRelation, JdbcUtils} import org.apache.spark.sql.execution.metric.InputOutputMetricsHelper import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.sources._ @@ -773,12 +773,12 @@ class JDBCSuite extends QueryTest } test("compile filters") { - val compileFilter = PrivateMethod[Option[String]](Symbol("compileFilter")) def doCompileFilter(f: Filter): String = - JDBCRDD invokePrivate compileFilter(f, JdbcDialects.get("jdbc:")) getOrElse("") + JdbcDialects.get("jdbc:").compileExpression(f.toV2).getOrElse("") + Seq(("col0", "col1"), ("`col0`", "`col1`")).foreach { case(col0, col1) => assert(doCompileFilter(EqualTo(col0, 3)) === """"col0" = 3""") - assert(doCompileFilter(Not(EqualTo(col1, "abc"))) === """(NOT ("col1" = 'abc'))""") + assert(doCompileFilter(Not(EqualTo(col1, "abc"))) === """NOT ("col1" = 'abc')""") assert(doCompileFilter(And(EqualTo(col0, 0), EqualTo(col1, "def"))) === """("col0" = 0) AND ("col1" = 'def')""") assert(doCompileFilter(Or(EqualTo(col0, 2), EqualTo(col1, "ghi"))) @@ -795,17 +795,14 @@ class JDBCSuite extends QueryTest assert(doCompileFilter(In(col1, Array.empty)) === """CASE WHEN "col1" IS NULL THEN NULL ELSE FALSE END""") assert(doCompileFilter(Not(In(col1, Array("mno", "pqr")))) - === """(NOT ("col1" IN ('mno', 'pqr')))""") + === """NOT ("col1" IN ('mno', 'pqr'))""") assert(doCompileFilter(IsNull(col1)) === """"col1" IS NULL""") assert(doCompileFilter(IsNotNull(col1)) === """"col1" IS NOT NULL""") assert(doCompileFilter(And(EqualNullSafe(col0, "abc"), EqualTo(col1, "def"))) - === """((NOT ("col0" != 'abc' OR "col0" IS NULL OR 'abc' IS NULL) """ - + """OR ("col0" IS NULL AND 'abc' IS NULL))) AND ("col1" = 'def')""") + === """(("col0" = 'abc') OR ("col0" IS NULL AND 'abc' IS NULL))""" + + """ AND ("col1" = 'def')""") } - val e = intercept[AnalysisException] { - doCompileFilter(EqualTo("col0.nested", 3)) - }.getMessage - assert(e.contains("Filter push down does not support nested column: col0.nested")) + assert(doCompileFilter(EqualTo("col0.nested", 3)).isEmpty) } test("Dialect unregister") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index 17bd7f7a6d5bc..d50a0551226a9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Sort} import org.apache.spark.sql.connector.expressions.{FieldReference, NullOrdering, SortDirection, SortValue} import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanRelation, V1ScanWrapper} import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog -import org.apache.spark.sql.functions.{avg, count, count_distinct, lit, sum, udf} +import org.apache.spark.sql.functions.{avg, count, count_distinct, lit, not, sum, udf, when} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.util.Utils @@ -70,17 +70,17 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel conn.prepareStatement("INSERT INTO \"test\".\"people\" VALUES ('mary', 2)").executeUpdate() conn.prepareStatement( "CREATE TABLE \"test\".\"employee\" (dept INTEGER, name TEXT(32), salary NUMERIC(20, 2)," + - " bonus DOUBLE)").executeUpdate() - conn.prepareStatement("INSERT INTO \"test\".\"employee\" VALUES (1, 'amy', 10000, 1000)") - .executeUpdate() - conn.prepareStatement("INSERT INTO \"test\".\"employee\" VALUES (2, 'alex', 12000, 1200)") - .executeUpdate() - conn.prepareStatement("INSERT INTO \"test\".\"employee\" VALUES (1, 'cathy', 9000, 1200)") - .executeUpdate() - conn.prepareStatement("INSERT INTO \"test\".\"employee\" VALUES (2, 'david', 10000, 1300)") - .executeUpdate() - conn.prepareStatement("INSERT INTO \"test\".\"employee\" VALUES (6, 'jen', 12000, 1200)") - .executeUpdate() + " bonus DOUBLE, is_manager BOOLEAN)").executeUpdate() + conn.prepareStatement( + "INSERT INTO \"test\".\"employee\" VALUES (1, 'amy', 10000, 1000, true)").executeUpdate() + conn.prepareStatement( + "INSERT INTO \"test\".\"employee\" VALUES (2, 'alex', 12000, 1200, false)").executeUpdate() + conn.prepareStatement( + "INSERT INTO \"test\".\"employee\" VALUES (1, 'cathy', 9000, 1200, false)").executeUpdate() + conn.prepareStatement( + "INSERT INTO \"test\".\"employee\" VALUES (2, 'david', 10000, 1300, true)").executeUpdate() + conn.prepareStatement( + "INSERT INTO \"test\".\"employee\" VALUES (6, 'jen', 12000, 1200, true)").executeUpdate() conn.prepareStatement( "CREATE TABLE \"test\".\"dept\" (\"dept id\" INTEGER NOT NULL)").executeUpdate() conn.prepareStatement("INSERT INTO \"test\".\"dept\" VALUES (1)").executeUpdate() @@ -131,7 +131,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel val df1 = spark.read.table("h2.test.employee") .where($"dept" === 1).limit(1) checkPushedLimit(df1, Some(1)) - checkAnswer(df1, Seq(Row(1, "amy", 10000.00, 1000.0))) + checkAnswer(df1, Seq(Row(1, "amy", 10000.00, 1000.0, true))) val df2 = spark.read .option("partitionColumn", "dept") @@ -142,7 +142,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .filter($"dept" > 1) .limit(1) checkPushedLimit(df2, Some(1)) - checkAnswer(df2, Seq(Row(2, "alex", 12000.00, 1200.0))) + checkAnswer(df2, Seq(Row(2, "alex", 12000.00, 1200.0, false))) val df3 = sql("SELECT name FROM h2.test.employee WHERE dept > 1 LIMIT 1") checkSchemaNames(df3, Seq("NAME")) @@ -191,12 +191,12 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .sort("salary") .limit(1) checkPushedLimit(df1, Some(1), createSortValues()) - checkAnswer(df1, Seq(Row(1, "cathy", 9000.00, 1200.0))) + checkAnswer(df1, Seq(Row(1, "cathy", 9000.00, 1200.0, false))) val df2 = spark.read.table("h2.test.employee") .where($"dept" === 1).orderBy($"salary").limit(1) checkPushedLimit(df2, Some(1), createSortValues()) - checkAnswer(df2, Seq(Row(1, "cathy", 9000.00, 1200.0))) + checkAnswer(df2, Seq(Row(1, "cathy", 9000.00, 1200.0, false))) val df3 = spark.read .option("partitionColumn", "dept") @@ -209,7 +209,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .limit(1) checkPushedLimit( df3, Some(1), createSortValues(SortDirection.DESCENDING, NullOrdering.NULLS_LAST)) - checkAnswer(df3, Seq(Row(2, "alex", 12000.00, 1200.0))) + checkAnswer(df3, Seq(Row(2, "alex", 12000.00, 1200.0, false))) val df4 = sql("SELECT name FROM h2.test.employee WHERE dept > 1 ORDER BY salary NULLS LAST LIMIT 1") @@ -220,7 +220,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel val df5 = spark.read.table("h2.test.employee") .where($"dept" === 1).orderBy($"salary") checkPushedLimit(df5, None) - checkAnswer(df5, Seq(Row(1, "cathy", 9000.00, 1200.0), Row(1, "amy", 10000.00, 1000.0))) + checkAnswer(df5, + Seq(Row(1, "cathy", 9000.00, 1200.0, false), Row(1, "amy", 10000.00, 1000.0, true))) val df6 = spark.read .table("h2.test.employee") @@ -247,7 +248,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .sort(sub($"NAME")) .limit(1) checkPushedLimit(df8) - checkAnswer(df8, Seq(Row(2, "alex", 12000.00, 1200.0))) + checkAnswer(df8, Seq(Row(2, "alex", 12000.00, 1200.0, false))) } private def createSortValues( @@ -264,11 +265,165 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel df.queryExecution.optimizedPlan.collect { case _: DataSourceV2ScanRelation => val expected_plan_fragment = - "PushedFilters: [IsNotNull(ID), GreaterThan(ID,1)]" + "PushedFilters: [ID IS NOT NULL, ID > 1]" checkKeywordsExistsInExplain(df, expected_plan_fragment) } checkAnswer(df, Row("mary", 2)) + + val df2 = spark.table("h2.test.employee").filter($"name".isin("amy", "cathy")) + + checkFiltersRemoved(df2) + + df2.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + val expected_plan_fragment = + "PushedFilters: [NAME IN ('amy', 'cathy')]" + checkKeywordsExistsInExplain(df2, expected_plan_fragment) + } + + checkAnswer(df2, Seq(Row(1, "amy", 10000, 1000, true), Row(1, "cathy", 9000, 1200, false))) + + val df3 = spark.table("h2.test.employee").filter($"name".startsWith("a")) + + checkFiltersRemoved(df3) + + df3.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + val expected_plan_fragment = + "PushedFilters: [NAME IS NOT NULL, NAME LIKE 'a%']" + checkKeywordsExistsInExplain(df3, expected_plan_fragment) + } + + checkAnswer(df3, Seq(Row(1, "amy", 10000, 1000, true), Row(2, "alex", 12000, 1200, false))) + + val df4 = spark.table("h2.test.employee").filter($"is_manager") + + checkFiltersRemoved(df4) + + df4.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + val expected_plan_fragment = + "PushedFilters: [IS_MANAGER IS NOT NULL, IS_MANAGER = true]" + checkKeywordsExistsInExplain(df4, expected_plan_fragment) + } + + checkAnswer(df4, Seq(Row(1, "amy", 10000, 1000, true), Row(2, "david", 10000, 1300, true), + Row(6, "jen", 12000, 1200, true))) + + val df5 = spark.table("h2.test.employee").filter($"is_manager".and($"salary" > 10000)) + + checkFiltersRemoved(df5) + + df5.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + val expected_plan_fragment = + "PushedFilters: [IS_MANAGER IS NOT NULL, SALARY IS NOT NULL, " + + "IS_MANAGER = true, SALARY > 10000.00]" + checkKeywordsExistsInExplain(df5, expected_plan_fragment) + } + + checkAnswer(df5, Seq(Row(6, "jen", 12000, 1200, true))) + + val df6 = spark.table("h2.test.employee").filter($"is_manager".or($"salary" > 10000)) + + checkFiltersRemoved(df6) + + df6.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + val expected_plan_fragment = + "PushedFilters: [(IS_MANAGER = true) OR (SALARY > 10000.00)], " + checkKeywordsExistsInExplain(df6, expected_plan_fragment) + } + + checkAnswer(df6, Seq(Row(1, "amy", 10000, 1000, true), Row(2, "alex", 12000, 1200, false), + Row(2, "david", 10000, 1300, true), Row(6, "jen", 12000, 1200, true))) + + val df7 = spark.table("h2.test.employee").filter(not($"is_manager") === true) + + checkFiltersRemoved(df7) + + df7.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + val expected_plan_fragment = + "PushedFilters: [IS_MANAGER IS NOT NULL, NOT (IS_MANAGER = true)], " + checkKeywordsExistsInExplain(df7, expected_plan_fragment) + } + + checkAnswer(df7, Seq(Row(1, "cathy", 9000, 1200, false), Row(2, "alex", 12000, 1200, false))) + + val df8 = spark.table("h2.test.employee").filter($"is_manager" === true) + + checkFiltersRemoved(df8) + + df8.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + val expected_plan_fragment = + "PushedFilters: [IS_MANAGER IS NOT NULL, IS_MANAGER = true], " + checkKeywordsExistsInExplain(df8, expected_plan_fragment) + } + + checkAnswer(df8, Seq(Row(1, "amy", 10000, 1000, true), + Row(2, "david", 10000, 1300, true), Row(6, "jen", 12000, 1200, true))) + + val df9 = spark.table("h2.test.employee") + .filter(when($"dept" > 1, true).when($"is_manager", false).otherwise($"dept" > 3)) + + checkFiltersRemoved(df9) + + df9.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + val expected_plan_fragment = + "PushedFilters: [CASE WHEN DEPT > 1 THEN TRUE WHEN IS_MANAGER = true THEN FALSE" + + " ELSE DEPT > 3 END], " + checkKeywordsExistsInExplain(df9, expected_plan_fragment) + } + + checkAnswer(df9, Seq(Row(2, "alex", 12000, 1200, false), + Row(2, "david", 10000, 1300, true), Row(6, "jen", 12000, 1200, true))) + } + + test("scan with complex filter push-down") { + Seq(false, true).foreach { ansiMode => + withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiMode.toString) { + val df = spark.table("h2.test.people").filter($"id" + 1 > 1) + + checkFiltersRemoved(df, ansiMode) + + df.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + val expected_plan_fragment = if (ansiMode) { + "PushedFilters: [ID IS NOT NULL, (ID + 1) > 1]" + } else { + "PushedFilters: [ID IS NOT NULL]" + } + checkKeywordsExistsInExplain(df, expected_plan_fragment) + } + + checkAnswer(df, Seq(Row("fred", 1), Row("mary", 2))) + + val df2 = sql(""" + |SELECT * FROM h2.test.employee + |WHERE (CASE WHEN SALARY > 10000 THEN BONUS ELSE BONUS + 200 END) > 1200 + |""".stripMargin) + + checkFiltersRemoved(df2, ansiMode) + + df2.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + val expected_plan_fragment = if (ansiMode) { + "PushedFilters: [(CASE WHEN SALARY > 10000.00 THEN BONUS" + + " ELSE BONUS + 200.0 END) > 1200.0]" + } else { + "PushedFilters: []" + } + checkKeywordsExistsInExplain(df2, expected_plan_fragment) + } + + checkAnswer(df2, + Seq(Row(1, "cathy", 9000, 1200, false), Row(2, "david", 10000, 1300, true))) + } + } } test("scan with column pruning") { @@ -412,18 +567,22 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel case _: DataSourceV2ScanRelation => val expected_plan_fragment = "PushedAggregates: [MAX(SALARY), AVG(BONUS)], " + - "PushedFilters: [IsNotNull(DEPT), GreaterThan(DEPT,0)], " + - "PushedGroupByColumns: [DEPT]" + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + + "PushedGroupByColumns: [DEPT], " checkKeywordsExistsInExplain(df, expected_plan_fragment) } checkAnswer(df, Seq(Row(10000, 1100.0), Row(12000, 1250.0), Row(12000, 1200.0))) } - private def checkFiltersRemoved(df: DataFrame): Unit = { + private def checkFiltersRemoved(df: DataFrame, removed: Boolean = true): Unit = { val filters = df.queryExecution.optimizedPlan.collect { case f: Filter => f } - assert(filters.isEmpty) + if (removed) { + assert(filters.isEmpty) + } else { + assert(filters.nonEmpty) + } } test("scan with aggregate push-down: MAX AVG with filter without group by") { @@ -434,8 +593,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel case _: DataSourceV2ScanRelation => val expected_plan_fragment = "PushedAggregates: [MAX(ID), AVG(ID)], " + - "PushedFilters: [IsNotNull(ID), GreaterThan(ID,0)], " + - "PushedGroupByColumns: []" + "PushedFilters: [ID IS NOT NULL, ID > 0], " + + "PushedGroupByColumns: [], " checkKeywordsExistsInExplain(df, expected_plan_fragment) } checkAnswer(df, Seq(Row(2, 1.5))) @@ -550,7 +709,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel val expected_plan_fragment = "PushedAggregates: [SUM(SALARY)], " + "PushedFilters: [], " + - "PushedGroupByColumns: [DEPT]" + "PushedGroupByColumns: [DEPT], " checkKeywordsExistsInExplain(df, expected_plan_fragment) } checkAnswer(df, Seq(Row(19000), Row(22000), Row(12000))) @@ -564,7 +723,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel val expected_plan_fragment = "PushedAggregates: [SUM(DISTINCT SALARY)], " + "PushedFilters: [], " + - "PushedGroupByColumns: [DEPT]" + "PushedGroupByColumns: [DEPT], " checkKeywordsExistsInExplain(df, expected_plan_fragment) } checkAnswer(df, Seq(Row(19000), Row(22000), Row(12000))) @@ -579,8 +738,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel case _: DataSourceV2ScanRelation => val expected_plan_fragment = "PushedAggregates: [MAX(SALARY), MIN(BONUS)], " + - "PushedFilters: [IsNotNull(DEPT), GreaterThan(DEPT,0)], " + - "PushedGroupByColumns: [DEPT, NAME]" + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + + "PushedGroupByColumns: [DEPT, NAME], " checkKeywordsExistsInExplain(df, expected_plan_fragment) } checkAnswer(df, Seq(Row(9000, 1200), Row(12000, 1200), Row(10000, 1300), @@ -599,8 +758,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel case _: DataSourceV2ScanRelation => val expected_plan_fragment = "PushedAggregates: [MAX(SALARY)], " + - "PushedFilters: [IsNotNull(DEPT), GreaterThan(DEPT,0)], " + - "PushedGroupByColumns: [DEPT, NAME]" + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + + "PushedGroupByColumns: [DEPT, NAME], " checkKeywordsExistsInExplain(df1, expected_plan_fragment) } checkAnswer(df1, Seq(Row("1#amy", 10000), Row("1#cathy", 9000), Row("2#alex", 12000), @@ -617,8 +776,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel case _: DataSourceV2ScanRelation => val expected_plan_fragment = "PushedAggregates: [MAX(SALARY), MIN(BONUS)], " + - "PushedFilters: [IsNotNull(DEPT), GreaterThan(DEPT,0)], " + - "PushedGroupByColumns: [DEPT, NAME]" + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + + "PushedGroupByColumns: [DEPT, NAME], " checkKeywordsExistsInExplain(df2, expected_plan_fragment) } checkAnswer(df2, Seq(Row("1#amy", 11000), Row("1#cathy", 10200), Row("2#alex", 13200), @@ -634,7 +793,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel df3.queryExecution.optimizedPlan.collect { case _: DataSourceV2ScanRelation => val expected_plan_fragment = - "PushedFilters: [IsNotNull(DEPT), GreaterThan(DEPT,0)], " + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " checkKeywordsExistsInExplain(df3, expected_plan_fragment) } checkAnswer(df3, Seq(Row("1#amy", 11000), Row("1#cathy", 10200), Row("2#alex", 13200), @@ -653,8 +812,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel case _: DataSourceV2ScanRelation => val expected_plan_fragment = "PushedAggregates: [MAX(SALARY), MIN(BONUS)], " + - "PushedFilters: [IsNotNull(DEPT), GreaterThan(DEPT,0)], " + - "PushedGroupByColumns: [DEPT]" + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + + "PushedGroupByColumns: [DEPT], " checkKeywordsExistsInExplain(df, expected_plan_fragment) } checkAnswer(df, Seq(Row(12000, 1200), Row(12000, 1200))) @@ -670,7 +829,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel val expected_plan_fragment = "PushedAggregates: [MIN(SALARY)], " + "PushedFilters: [], " + - "PushedGroupByColumns: [DEPT]" + "PushedGroupByColumns: [DEPT], " checkKeywordsExistsInExplain(df, expected_plan_fragment) } checkAnswer(df, Seq(Row(1, 9000), Row(2, 10000), Row(6, 12000))) @@ -693,8 +852,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel case _: DataSourceV2ScanRelation => val expected_plan_fragment = "PushedAggregates: [SUM(SALARY)], " + - "PushedFilters: [IsNotNull(DEPT), GreaterThan(DEPT,0)], " + - "PushedGroupByColumns: [DEPT]" + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + + "PushedGroupByColumns: [DEPT], " checkKeywordsExistsInExplain(query, expected_plan_fragment) } checkAnswer(query, Seq(Row(6, 12000), Row(1, 19000), Row(2, 22000))) @@ -736,8 +895,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel case _: DataSourceV2ScanRelation => val expected_plan_fragment = "PushedAggregates: [VAR_POP(BONUS), VAR_SAMP(BONUS)], " + - "PushedFilters: [IsNotNull(DEPT), GreaterThan(DEPT,0)], " + - "PushedGroupByColumns: [DEPT]" + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + + "PushedGroupByColumns: [DEPT], " checkKeywordsExistsInExplain(df, expected_plan_fragment) } checkAnswer(df, Seq(Row(10000d, 20000d), Row(2500d, 5000d), Row(0d, null))) @@ -752,8 +911,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel case _: DataSourceV2ScanRelation => val expected_plan_fragment = "PushedAggregates: [STDDEV_POP(BONUS), STDDEV_SAMP(BONUS)], " + - "PushedFilters: [IsNotNull(DEPT), GreaterThan(DEPT,0)], " + - "PushedGroupByColumns: [DEPT]" + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + + "PushedGroupByColumns: [DEPT], " checkKeywordsExistsInExplain(df, expected_plan_fragment) } checkAnswer(df, Seq(Row(100d, 141.4213562373095d), Row(50d, 70.71067811865476d), Row(0d, null))) @@ -768,8 +927,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel case _: DataSourceV2ScanRelation => val expected_plan_fragment = "PushedAggregates: [COVAR_POP(BONUS, BONUS), COVAR_SAMP(BONUS, BONUS)], " + - "PushedFilters: [IsNotNull(DEPT), GreaterThan(DEPT,0)], " + - "PushedGroupByColumns: [DEPT]" + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + + "PushedGroupByColumns: [DEPT], " checkKeywordsExistsInExplain(df, expected_plan_fragment) } checkAnswer(df, Seq(Row(10000d, 20000d), Row(2500d, 5000d), Row(0d, null))) @@ -784,15 +943,15 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel case _: DataSourceV2ScanRelation => val expected_plan_fragment = "PushedAggregates: [CORR(BONUS, BONUS)], " + - "PushedFilters: [IsNotNull(DEPT), GreaterThan(DEPT,0)], " + - "PushedGroupByColumns: [DEPT]" + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + + "PushedGroupByColumns: [DEPT], " checkKeywordsExistsInExplain(df, expected_plan_fragment) } checkAnswer(df, Seq(Row(1d), Row(1d), Row(null))) } test("scan with aggregate push-down: aggregate over alias NOT push down") { - val cols = Seq("a", "b", "c", "d") + val cols = Seq("a", "b", "c", "d", "e") val df1 = sql("select * from h2.test.employee").toDF(cols: _*) val df2 = df1.groupBy().sum("c") checkAggregateRemoved(df2, false) @@ -848,10 +1007,10 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel df.queryExecution.optimizedPlan.collect { case _: DataSourceV2ScanRelation => val expected_plan_fragment = - "PushedAggregates: [COUNT(CASE WHEN ((SALARY) > (8000.00)) AND ((SALARY) < (10000.00))" + - " THEN SALARY ELSE 0.00 END), C..., " + + "PushedAggregates: [COUNT(CASE WHEN (SALARY > 8000.00) AND (SALARY < 10000.00)" + + " THEN SALARY ELSE 0.00 END), COUNT(CAS..., " + "PushedFilters: [], " + - "PushedGroupByColumns: [DEPT]" + "PushedGroupByColumns: [DEPT], " checkKeywordsExistsInExplain(df, expected_plan_fragment) } checkAnswer(df, Seq(Row(1, 1, 1, 1, 1, 0d, 12000d, 0d, 12000d, 12000d, 0d, 0d, 2, 0d), @@ -865,8 +1024,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel val df = sql("SELECT SUM(2147483647 + DEPT) FROM h2.test.employee") checkAggregateRemoved(df, ansiMode) val expected_plan_fragment = if (ansiMode) { - "PushedAggregates: [SUM((2147483647) + (DEPT))], " + - "PushedFilters: [], PushedGroupByColumns: []" + "PushedAggregates: [SUM(2147483647 + DEPT)], " + + "PushedFilters: [], " + + "PushedGroupByColumns: []" } else { "PushedFilters: []" } From bbe485fe3778de38680feee23785ebe1e2da9d4f Mon Sep 17 00:00:00 2001 From: Yimin Date: Tue, 22 Mar 2022 18:24:12 +0800 Subject: [PATCH 027/535] [SPARK-38579][SQL][WEBUI] Requesting Restful API can cause NullPointerException ### What changes were proposed in this pull request? Added null check for `exec.metricValues`. ### Why are the changes needed? When requesting Restful API {baseURL}/api/v1/applications/$appId/sql/$executionId which is introduced by this PR https://github.com/apache/spark/pull/28208, it can cause NullPointerException. The root cause is, when calling method doUpdate() of `LiveExecutionData`, `metricsValues` can be null. Then, when statement `printableMetrics(graph.allNodes, exec.metricValues)` is executed, it will throw NullPointerException. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Tested manually. Closes #35884 from yym1995/fix-npe. Lead-authored-by: Yimin Co-authored-by: Yimin Yang <26797163+yym1995@users.noreply.github.com> Signed-off-by: Yuming Wang (cherry picked from commit 99992a4e050a00564049be6938f5734876c17518) Signed-off-by: Yuming Wang --- .../org/apache/spark/status/api/v1/sql/SqlResource.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala index c7599f864dd97..4dd96e5ae252b 100644 --- a/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala +++ b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala @@ -94,7 +94,11 @@ private[v1] class SqlResource extends BaseAppResource { val duration = exec.completionTime.getOrElse(new Date()).getTime - exec.submissionTime val planDetails = if (planDescription) exec.physicalPlanDescription else "" - val nodes = if (details) printableMetrics(graph.allNodes, exec.metricValues) else Seq.empty + val nodes = if (details) { + printableMetrics(graph.allNodes, Option(exec.metricValues).getOrElse(Map.empty)) + } else { + Seq.empty + } val edges = if (details) graph.edges else Seq.empty new ExecutionData( From 0265c86a0c735f7b39b1dac4a549bdf1e70d18c6 Mon Sep 17 00:00:00 2001 From: Xinyi Yu Date: Tue, 22 Mar 2022 23:18:34 +0800 Subject: [PATCH 028/535] [SPARK-38456][SQL] Improve error messages of no viable alternative, extraneous input and missing token in ParseException MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This PR improves the "no viable alternative", "extraneous input" and "missing .. at " ANTLR error messages in ParseExceptions, as mentioned in https://issues.apache.org/jira/browse/SPARK-38456 **With this PR, all ANTLR exceptions are unified to the same error class, `PARSE_SYNTAX_ERROR`.** #### No viable alternative * Query ```sql select ( ``` * Before ``` no viable alternative at input ‘(‘(line 1, pos 8) ``` * After ``` Syntax error at or near end of input(line 1, pos 8) ``` #### Extraneous Input * Query ```sql CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING) USING parquet ``` * Before ``` extraneous input ':' expecting {'APPLY', 'CALLED', 'CHANGES', 'CLONE', 'COLLECT', 'CONTAINS', 'CONVERT', 'COPY', 'COPY_OPTIONS', 'CREDENTIAL', 'CREDENTIALS', 'DEEP', 'DEFINER', 'DELTA', 'DETERMINISTIC', 'ENCRYPTION', 'EXPECT', 'FAIL', 'FILES', 'FORMAT_OPTIONS', 'HISTORY', 'INCREMENTAL', 'INPUT', 'INVOKER', 'LANGUAGE', 'LIVE', 'MATERIALIZED', 'MODIFIES', 'OPTIMIZE', 'PATTERN', 'READS', 'RESTORE', 'RETURN', 'RETURNS', 'SAMPLE', 'SCD TYPE 1', 'SCD TYPE 2', 'SECURITY', 'SEQUENCE', 'SHALLOW', 'SNAPSHOT', 'SPECIFIC', 'SQL', 'STORAGE', 'STREAMING', 'UPDATES', 'UP_TO_DATE', 'VIOLATION', 'ZORDER', 'ADD', 'AFTER', 'ALL', 'ALTER', 'ALWAYS', 'ANALYZE', 'AND', 'ANTI', 'ANY', 'ARCHIVE', 'ARRAY', 'AS', 'ASC', 'AT', 'AUTHORIZATION', 'BETWEEN', 'BOTH', 'BUCKET', 'BUCKETS', 'BY', 'CACHE', 'CASCADE', 'CASE', 'CAST', 'CATALOG', 'CATALOGS', 'CHANGE', 'CHECK', 'CLEAR', 'CLUSTER', 'CLUSTERED', 'CODE', 'CODEGEN', 'COLLATE', 'COLLECTION', 'COLUMN', 'COLUMNS', 'COMMENT', 'COMMIT', 'COMPACT', 'COMPACTIONS', 'COMPUTE', 'CONCATENATE', 'CONSTRAINT', 'COST', 'CREATE', 'CROSS', 'CUBE', 'CURRENT', 'CURRENT_DATE', 'CURRENT_TIME', 'CURRENT_TIMESTAMP', 'CURRENT_USER', 'DAY', 'DATA', 'DATABASE', 'DATABASES', 'DATEADD', 'DATE_ADD', 'DATEDIFF', 'DATE_DIFF', 'DBPROPERTIES', 'DEFAULT', 'DEFINED', 'DELETE', 'DELIMITED', 'DESC', 'DESCRIBE', 'DFS', 'DIRECTORIES', 'DIRECTORY', 'DISTINCT', 'DISTRIBUTE', 'DIV', 'DROP', 'ELSE', 'END', 'ESCAPE', 'ESCAPED', 'EXCEPT', 'EXCHANGE', 'EXISTS', 'EXPLAIN', 'EXPORT', 'EXTENDED', 'EXTERNAL', 'EXTRACT', 'FALSE', 'FETCH', 'FIELDS', 'FILTER', 'FILEFORMAT', 'FIRST', 'FN', 'FOLLOWING', 'FOR', 'FOREIGN', 'FORMAT', 'FORMATTED', 'FROM', 'FULL', 'FUNCTION', 'FUNCTIONS', 'GENERATED', 'GLOBAL', 'GRANT', 'GRANTS', 'GROUP', 'GROUPING', 'HAVING', 'HOUR', 'IDENTITY', 'IF', 'IGNORE', 'IMPORT', 'IN', 'INCREMENT', 'INDEX', 'INDEXES', 'INNER', 'INPATH', 'INPUTFORMAT', 'INSERT', 'INTERSECT', 'INTERVAL', 'INTO', 'IS', 'ITEMS', 'JOIN', 'KEY', 'KEYS', 'LAST', 'LATERAL', 'LAZY', 'LEADING', 'LEFT', 'LIKE', 'ILIKE', 'LIMIT', 'LINES', 'LIST', 'LOAD', 'LOCAL', 'LOCATION', 'LOCK', 'LOCKS', 'LOGICAL', 'MACRO', 'MAP', 'MATCHED', 'MERGE', 'MINUTE', 'MONTH', 'MSCK', 'NAMESPACE', 'NAMESPACES', 'NATURAL', 'NO', NOT, 'NULL', 'NULLS', 'OF', 'ON', 'ONLY', 'OPTION', 'OPTIONS', 'OR', 'ORDER', 'OUT', 'OUTER', 'OUTPUTFORMAT', 'OVER', 'OVERLAPS', 'OVERLAY', 'OVERWRITE', 'PARTITION', 'PARTITIONED', 'PARTITIONS', 'PERCENTILE_CONT', 'PERCENT', 'PIVOT', 'PLACING', 'POSITION', 'PRECEDING', 'PRIMARY', 'PRINCIPALS', 'PROPERTIES', 'PROVIDER', 'PROVIDERS', 'PURGE', 'QUALIFY', 'QUERY', 'RANGE', 'RECIPIENT', 'RECIPIENTS', 'RECORDREADER', 'RECORDWRITER', 'RECOVER', 'REDUCE', 'REFERENCES', 'REFRESH', 'REMOVE', 'RENAME', 'REPAIR', 'REPEATABLE', 'REPLACE', 'REPLICAS', 'RESET', 'RESPECT', 'RESTRICT', 'REVOKE', 'RIGHT', RLIKE, 'ROLE', 'ROLES', 'ROLLBACK', 'ROLLUP', 'ROW', 'ROWS', 'SECOND', 'SCHEMA', 'SCHEMAS', 'SELECT', 'SEMI', 'SEPARATED', 'SERDE', 'SERDEPROPERTIES', 'SESSION_USER', 'SET', 'MINUS', 'SETS', 'SHARE', 'SHARES', 'SHOW', 'SKEWED', 'SOME', 'SORT', 'SORTED', 'START', 'STATISTICS', 'STORED', 'STRATIFY', 'STRUCT', 'SUBSTR', 'SUBSTRING', 'SYNC', 'SYSTEM_TIME', 'SYSTEM_VERSION', 'TABLE', 'TABLES', 'TABLESAMPLE', 'TBLPROPERTIES', TEMPORARY, 'TERMINATED', 'THEN', 'TIME', 'TIMESTAMP', 'TIMESTAMPADD', 'TIMESTAMPDIFF', 'TO', 'TOUCH', 'TRAILING', 'TRANSACTION', 'TRANSACTIONS', 'TRANSFORM', 'TRIM', 'TRUE', 'TRUNCATE', 'TRY_CAST', 'TYPE', 'UNARCHIVE', 'UNBOUNDED', 'UNCACHE', 'UNION', 'UNIQUE', 'UNKNOWN', 'UNLOCK', 'UNSET', 'UPDATE', 'USE', 'USER', 'USING', 'VALUES', 'VERSION', 'VIEW', 'VIEWS', 'WHEN', 'WHERE', 'WINDOW', 'WITH', 'WITHIN', 'YEAR', 'ZONE', IDENTIFIER, BACKQUOTED_IDENTIFIER}(line 1, pos 21) ``` * After ``` Syntax error at or near ':': extra input ':'(line 1, pos 21) ``` #### Missing token * Query ```sql select count(a from b ``` * Before ``` missing ')' at 'from'(line 2, pos 0) ``` * After ``` Syntax error at or near 'from': missing ')'(line 2, pos 0) ``` ### Why are the changes needed? https://issues.apache.org/jira/browse/SPARK-38384 The description states the reason for the change. TLDR, the error messages of ParseException directly coming from ANTLR are not user-friendly and we want to improve it. ### Does this PR introduce _any_ user-facing change? If the error messages changes are considered as user-facing change, then yes. Example cases are listed in the top of this PR description. ### How was this patch tested? Local unit test. Closes #35915 from anchovYu/rest-parse-exceptions. Authored-by: Xinyi Yu Signed-off-by: Wenchen Fan (cherry picked from commit 27455aee8e6c671dcfee757771be6cdd58c9b001) Signed-off-by: Wenchen Fan --- .../main/resources/error/error-classes.json | 4 +- ...ql-distributed-sql-engine-spark-sql-cli.md | 2 +- docs/sql-ref-identifier.md | 4 +- .../sql/catalyst/parser/ParseDriver.scala | 4 +- .../parser/SparkParserErrorStrategy.scala | 55 +++++++++++++++---- .../sql/catalyst/parser/DDLParserSuite.scala | 6 +- .../catalyst/parser/ErrorParserSuite.scala | 26 +++++---- .../parser/ExpressionParserSuite.scala | 8 +-- .../sql/catalyst/parser/PlanParserSuite.scala | 22 ++++---- .../parser/TableIdentifierParserSuite.scala | 2 +- .../sql-tests/results/ansi/interval.sql.out | 4 +- .../sql-tests/results/ansi/literals.sql.out | 2 +- .../sql-tests/results/csv-functions.sql.out | 2 +- .../resources/sql-tests/results/cte.sql.out | 2 +- .../sql-tests/results/grouping_set.sql.out | 4 +- .../sql-tests/results/interval.sql.out | 4 +- .../sql-tests/results/json-functions.sql.out | 2 +- .../sql-tests/results/literals.sql.out | 2 +- .../results/postgreSQL/window_part3.sql.out | 2 +- .../sql-tests/results/transform.sql.out | 6 +- ...terTableRecoverPartitionsParserSuite.scala | 2 +- .../command/PlanResolutionSuite.scala | 2 +- .../spark/sql/jdbc/JDBCWriteSuite.scala | 2 +- .../sql/hive/thriftserver/CliSuite.scala | 2 +- 24 files changed, 102 insertions(+), 69 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index c7a9c854cb486..cd47d50b345ff 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -142,8 +142,8 @@ "message" : [ "Syntax error, unexpected empty statement" ], "sqlState" : "42000" }, - "PARSE_INPUT_MISMATCHED" : { - "message" : [ "Syntax error at or near %s" ], + "PARSE_SYNTAX_ERROR" : { + "message" : [ "Syntax error at or near %s%s" ], "sqlState" : "42000" }, "PIVOT_VALUE_DATA_TYPE_MISMATCH" : { diff --git a/docs/sql-distributed-sql-engine-spark-sql-cli.md b/docs/sql-distributed-sql-engine-spark-sql-cli.md index f7f366952068d..c41715367cad9 100644 --- a/docs/sql-distributed-sql-engine-spark-sql-cli.md +++ b/docs/sql-distributed-sql-engine-spark-sql-cli.md @@ -113,7 +113,7 @@ Use `;` (semicolon) to terminate commands. Notice: /* This is a comment contains ; */ SELECT 1; ``` - However, if ';' is the end of the line, it terminates the SQL statement. The example above will be terminated into `/* This is a comment contains ` and `*/ SELECT 1`, Spark will submit these two commands separated and throw parser error (`unclosed bracketed comment` and `extraneous input '*/'`). + However, if ';' is the end of the line, it terminates the SQL statement. The example above will be terminated into `/* This is a comment contains ` and `*/ SELECT 1`, Spark will submit these two commands separated and throw parser error (`unclosed bracketed comment` and `Syntax error at or near '*/'`). diff --git a/docs/sql-ref-identifier.md b/docs/sql-ref-identifier.md index f65d491cc2fc4..bba8c67780ad4 100644 --- a/docs/sql-ref-identifier.md +++ b/docs/sql-ref-identifier.md @@ -58,7 +58,7 @@ An identifier is a string used to identify a database object such as a table, vi -- This CREATE TABLE fails with ParseException because of the illegal identifier name a.b CREATE TABLE test (a.b int); org.apache.spark.sql.catalyst.parser.ParseException: -no viable alternative at input 'CREATE TABLE test (a.'(line 1, pos 20) +Syntax error at or near '.': extra input '.'(line 1, pos 20) -- This CREATE TABLE works CREATE TABLE test (`a.b` int); @@ -66,7 +66,7 @@ CREATE TABLE test (`a.b` int); -- This CREATE TABLE fails with ParseException because special character ` is not escaped CREATE TABLE test1 (`a`b` int); org.apache.spark.sql.catalyst.parser.ParseException: -no viable alternative at input 'CREATE TABLE test (`a`b`'(line 1, pos 23) +Syntax error at or near '`'(line 1, pos 23) -- This CREATE TABLE works CREATE TABLE test (`a``b` int); diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index 5c9c382d08d04..82be4d61e911e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -289,8 +289,8 @@ class ParseException( } def withCommand(cmd: String): ParseException = { - // PARSE_EMPTY_STATEMENT error class overrides the PARSE_INPUT_MISMATCHED when cmd is empty - if (cmd.trim().isEmpty && errorClass.isDefined && errorClass.get == "PARSE_INPUT_MISMATCHED") { + // PARSE_EMPTY_STATEMENT error class overrides the PARSE_SYNTAX_ERROR when cmd is empty + if (cmd.trim().isEmpty && errorClass.isDefined && errorClass.get == "PARSE_SYNTAX_ERROR") { new ParseException(Option(cmd), start, stop, "PARSE_EMPTY_STATEMENT", Array[String]()) } else { new ParseException(Option(cmd), message, start, stop, errorClass, messageParameters) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala index 0ce514c4d2298..1b0b68620737f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala @@ -17,8 +17,7 @@ package org.apache.spark.sql.catalyst.parser -import org.antlr.v4.runtime.{DefaultErrorStrategy, InputMismatchException, IntStream, Parser, - ParserRuleContext, RecognitionException, Recognizer} +import org.antlr.v4.runtime.{DefaultErrorStrategy, InputMismatchException, IntStream, NoViableAltException, Parser, ParserRuleContext, RecognitionException, Recognizer, Token} /** * A [[SparkRecognitionException]] extends the [[RecognitionException]] with more information @@ -49,6 +48,10 @@ class SparkRecognitionException( }, Some(errorClass), messageParameters) + + /** Construct with pure errorClass and messageParameter information. */ + def this(errorClass: String, messageParameters: Array[String]) = + this("", null, null, null, Some(errorClass), messageParameters) } /** @@ -61,21 +64,49 @@ class SparkRecognitionException( */ class SparkParserErrorStrategy() extends DefaultErrorStrategy { private val userWordDict : Map[String, String] = Map("''" -> "end of input") - private def getUserFacingLanguage(input: String) = { - userWordDict.getOrElse(input, input) + + /** Get the user-facing display of the error token. */ + override def getTokenErrorDisplay(t: Token): String = { + val tokenName = super.getTokenErrorDisplay(t) + userWordDict.getOrElse(tokenName, tokenName) } override def reportInputMismatch(recognizer: Parser, e: InputMismatchException): Unit = { - // Keep the original error message in ANTLR - val msg = "mismatched input " + - this.getTokenErrorDisplay(e.getOffendingToken) + - " expecting " + - e.getExpectedTokens.toString(recognizer.getVocabulary) + val exceptionWithErrorClass = new SparkRecognitionException( + e, + "PARSE_SYNTAX_ERROR", + Array(getTokenErrorDisplay(e.getOffendingToken), "")) + recognizer.notifyErrorListeners(e.getOffendingToken, "", exceptionWithErrorClass) + } + override def reportNoViableAlternative(recognizer: Parser, e: NoViableAltException): Unit = { val exceptionWithErrorClass = new SparkRecognitionException( e, - "PARSE_INPUT_MISMATCHED", - Array(getUserFacingLanguage(getTokenErrorDisplay(e.getOffendingToken)))) - recognizer.notifyErrorListeners(e.getOffendingToken, msg, exceptionWithErrorClass) + "PARSE_SYNTAX_ERROR", + Array(getTokenErrorDisplay(e.getOffendingToken), "")) + recognizer.notifyErrorListeners(e.getOffendingToken, "", exceptionWithErrorClass) + } + + override def reportUnwantedToken(recognizer: Parser): Unit = { + if (!this.inErrorRecoveryMode(recognizer)) { + this.beginErrorCondition(recognizer) + val errorTokenDisplay = getTokenErrorDisplay(recognizer.getCurrentToken) + val hint = ": extra input " + errorTokenDisplay + val exceptionWithErrorClass = new SparkRecognitionException( + "PARSE_SYNTAX_ERROR", + Array(errorTokenDisplay, hint)) + recognizer.notifyErrorListeners(recognizer.getCurrentToken, "", exceptionWithErrorClass) + } + } + + override def reportMissingToken(recognizer: Parser): Unit = { + if (!this.inErrorRecoveryMode(recognizer)) { + this.beginErrorCondition(recognizer) + val hint = ": missing " + getExpectedTokens(recognizer).toString(recognizer.getVocabulary) + val exceptionWithErrorClass = new SparkRecognitionException( + "PARSE_SYNTAX_ERROR", + Array(getTokenErrorDisplay(recognizer.getCurrentToken), hint)) + recognizer.notifyErrorListeners(recognizer.getCurrentToken, "", exceptionWithErrorClass) + } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 507b17bbb5636..d5d90ceca2909 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -74,7 +74,7 @@ class DDLParserSuite extends AnalysisTest { } intercept("CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING) USING parquet", - "extraneous input ':'") + "Syntax error at or near ':': extra input ':'") } test("create/replace table - with IF NOT EXISTS") { @@ -1777,9 +1777,9 @@ class DDLParserSuite extends AnalysisTest { allColumns = true)) intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL COLUMNS key, value", - Some("PARSE_INPUT_MISMATCHED"), "Syntax error at or near 'key'") // expecting {, ';'} + Some("PARSE_SYNTAX_ERROR"), "Syntax error at or near 'key'") // expecting {, ';'} intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL", - "missing 'COLUMNS' at ''") + "Syntax error at or near end of input: missing 'COLUMNS'") } test("LOAD DATA INTO table") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala index 71296f0a26e4a..20e17a8451674 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala @@ -79,21 +79,22 @@ class ErrorParserSuite extends AnalysisTest { test("no viable input") { intercept("select ((r + 1) ", 1, 16, 16, - "no viable alternative at input", "----------------^^^") + "Syntax error at or near", "----------------^^^") } test("extraneous input") { - intercept("select 1 1", 1, 9, 10, "extraneous input '1' expecting", "---------^^^") - intercept("select *\nfrom r as q t", 2, 12, 13, "extraneous input", "------------^^^") + intercept("select 1 1", 1, 9, 10, + "Syntax error at or near '1': extra input '1'", "---------^^^") + intercept("select *\nfrom r as q t", 2, 12, 13, "Syntax error at or near", "------------^^^") } test("mismatched input") { - intercept("select * from r order by q from t", "PARSE_INPUT_MISMATCHED", + intercept("select * from r order by q from t", "PARSE_SYNTAX_ERROR", 1, 27, 31, "Syntax error at or near", "---------------------------^^^" ) - intercept("select *\nfrom r\norder by q\nfrom t", "PARSE_INPUT_MISMATCHED", + intercept("select *\nfrom r\norder by q\nfrom t", "PARSE_SYNTAX_ERROR", 4, 0, 4, "Syntax error at or near", "^^^") } @@ -107,9 +108,9 @@ class ErrorParserSuite extends AnalysisTest { test("jargon token substitute to user-facing language") { // '' -> end of input - intercept("select count(*", "PARSE_INPUT_MISMATCHED", + intercept("select count(*", "PARSE_SYNTAX_ERROR", 1, 14, 14, "Syntax error at or near end of input") - intercept("select 1 as a from", "PARSE_INPUT_MISMATCHED", + intercept("select 1 as a from", "PARSE_SYNTAX_ERROR", 1, 18, 18, "Syntax error at or near end of input") } @@ -120,11 +121,12 @@ class ErrorParserSuite extends AnalysisTest { } test("SPARK-21136: misleading error message due to problematic antlr grammar") { - intercept("select * from a left join_ b on a.id = b.id", None, "missing 'JOIN' at 'join_'") - intercept("select * from test where test.t is like 'test'", Some("PARSE_INPUT_MISMATCHED"), - SparkThrowableHelper.getMessage("PARSE_INPUT_MISMATCHED", Array("'is'"))) - intercept("SELECT * FROM test WHERE x NOT NULL", Some("PARSE_INPUT_MISMATCHED"), - SparkThrowableHelper.getMessage("PARSE_INPUT_MISMATCHED", Array("'NOT'"))) + intercept("select * from a left join_ b on a.id = b.id", None, + "Syntax error at or near 'join_': missing 'JOIN'") + intercept("select * from test where test.t is like 'test'", Some("PARSE_SYNTAX_ERROR"), + SparkThrowableHelper.getMessage("PARSE_SYNTAX_ERROR", Array("'is'", ""))) + intercept("SELECT * FROM test WHERE x NOT NULL", Some("PARSE_SYNTAX_ERROR"), + SparkThrowableHelper.getMessage("PARSE_SYNTAX_ERROR", Array("'NOT'", ""))) } test("hyphen in identifier - DDL tests") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index 754ac8b91f738..9e63c817e7478 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -284,7 +284,7 @@ class ExpressionParserSuite extends AnalysisTest { assertEqual("foo(distinct a, b)", 'foo.distinctFunction('a, 'b)) assertEqual("grouping(distinct a, b)", 'grouping.distinctFunction('a, 'b)) assertEqual("`select`(all a, b)", 'select.function('a, 'b)) - intercept("foo(a x)", "extraneous input 'x'") + intercept("foo(a x)", "Syntax error at or near 'x': extra input 'x'") } private def lv(s: Symbol) = UnresolvedNamedLambdaVariable(Seq(s.name)) @@ -654,7 +654,7 @@ class ExpressionParserSuite extends AnalysisTest { // Note: Single quote follows 1.6 parsing behavior // when ESCAPED_STRING_LITERALS is enabled. val e = intercept[ParseException](parser.parseExpression("'\''")) - assert(e.message.contains("extraneous input '''")) + assert(e.message.contains("Syntax error at or near ''': extra input '''")) // The unescape special characters (e.g., "\\t") for 2.0+ don't work // when ESCAPED_STRING_LITERALS is enabled. They are parsed literally. @@ -866,7 +866,7 @@ class ExpressionParserSuite extends AnalysisTest { test("composed expressions") { assertEqual("1 + r.r As q", (Literal(1) + UnresolvedAttribute("r.r")).as("q")) assertEqual("1 - f('o', o(bar))", Literal(1) - 'f.function("o", 'o.function('bar))) - intercept("1 - f('o', o(bar)) hello * world", Some("PARSE_INPUT_MISMATCHED"), + intercept("1 - f('o', o(bar)) hello * world", Some("PARSE_SYNTAX_ERROR"), "Syntax error at or near '*'") } @@ -886,7 +886,7 @@ class ExpressionParserSuite extends AnalysisTest { test("SPARK-17832 function identifier contains backtick") { val complexName = FunctionIdentifier("`ba`r", Some("`fo`o")) assertEqual(complexName.quotedString, UnresolvedAttribute(Seq("`fo`o", "`ba`r"))) - intercept(complexName.unquotedString, Some("PARSE_INPUT_MISMATCHED"), + intercept(complexName.unquotedString, Some("PARSE_SYNTAX_ERROR"), "Syntax error at or near") // Function identifier contains continuous backticks should be treated correctly. val complexName2 = FunctionIdentifier("ba``r", Some("fo``o")) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 70138a3e688c7..3e2d917a8932f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -283,8 +283,8 @@ class PlanParserSuite extends AnalysisTest { assertEqual("from a select distinct b, c", Distinct(table("a").select('b, 'c))) // Weird "FROM table" queries, should be invalid anyway - intercept("from a", "no viable alternative at input 'from a'") - intercept("from (from a union all from b) c select *", "no viable alternative at input 'from") + intercept("from a", "Syntax error at or near end of input") + intercept("from (from a union all from b) c select *", "Syntax error at or near 'union'") } test("multi select query") { @@ -292,10 +292,10 @@ class PlanParserSuite extends AnalysisTest { "from a select * select * where s < 10", table("a").select(star()).union(table("a").where('s < 10).select(star()))) intercept( - "from a select * select * from x where a.s < 10", Some("PARSE_INPUT_MISMATCHED"), + "from a select * select * from x where a.s < 10", Some("PARSE_SYNTAX_ERROR"), "Syntax error at or near 'from'") intercept( - "from a select * from b", Some("PARSE_INPUT_MISMATCHED"), + "from a select * from b", Some("PARSE_SYNTAX_ERROR"), "Syntax error at or near 'from'") assertEqual( "from a insert into tbl1 select * insert into tbl2 select * where s < 10", @@ -404,7 +404,7 @@ class PlanParserSuite extends AnalysisTest { val m = intercept[ParseException] { parsePlan("SELECT a, b, count(distinct a, distinct b) as c FROM d GROUP BY a, b") }.getMessage - assert(m.contains("extraneous input 'b'")) + assert(m.contains("Syntax error at or near 'b': extra input 'b'")) } @@ -778,11 +778,11 @@ class PlanParserSuite extends AnalysisTest { test("select hint syntax") { // Hive compatibility: Missing parameter raises ParseException. - intercept("SELECT /*+ HINT() */ * FROM t", Some("PARSE_INPUT_MISMATCHED"), + intercept("SELECT /*+ HINT() */ * FROM t", Some("PARSE_SYNTAX_ERROR"), "Syntax error at or near") // Disallow space as the delimiter. - intercept("SELECT /*+ INDEX(a b c) */ * from default.t", Some("PARSE_INPUT_MISMATCHED"), + intercept("SELECT /*+ INDEX(a b c) */ * from default.t", Some("PARSE_SYNTAX_ERROR"), "Syntax error at or near 'b'") comparePlans( @@ -840,7 +840,7 @@ class PlanParserSuite extends AnalysisTest { UnresolvedHint("REPARTITION", Seq(Literal(100)), table("t").select(star())))) - intercept("SELECT /*+ COALESCE(30 + 50) */ * FROM t", Some("PARSE_INPUT_MISMATCHED"), + intercept("SELECT /*+ COALESCE(30 + 50) */ * FROM t", Some("PARSE_SYNTAX_ERROR"), "Syntax error at or near") comparePlans( @@ -965,9 +965,9 @@ class PlanParserSuite extends AnalysisTest { ) } - intercept("select ltrim(both 'S' from 'SS abc S'", Some("PARSE_INPUT_MISMATCHED"), + intercept("select ltrim(both 'S' from 'SS abc S'", Some("PARSE_SYNTAX_ERROR"), "Syntax error at or near 'from'") // expecting {')' - intercept("select rtrim(trailing 'S' from 'SS abc S'", Some("PARSE_INPUT_MISMATCHED"), + intercept("select rtrim(trailing 'S' from 'SS abc S'", Some("PARSE_SYNTAX_ERROR"), "Syntax error at or near 'from'") // expecting {')' assertTrimPlans( @@ -1113,7 +1113,7 @@ class PlanParserSuite extends AnalysisTest { val m1 = intercept[ParseException] { parsePlan("SELECT * FROM (INSERT INTO BAR VALUES (2))") }.getMessage - assert(m1.contains("missing ')' at 'BAR'")) + assert(m1.contains("Syntax error at or near 'BAR': missing ')'")) val m2 = intercept[ParseException] { parsePlan("SELECT * FROM S WHERE C1 IN (INSERT INTO T VALUES (2))") }.getMessage diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala index a65f209d06c75..c2b240b3c496e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala @@ -310,7 +310,7 @@ class TableIdentifierParserSuite extends SQLKeywordUtils { val errMsg = intercept[ParseException] { parseTableIdentifier(keyword) }.getMessage - assert(errMsg.contains("no viable alternative at input")) + assert(errMsg.contains("Syntax error at or near")) assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`")) assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`")) } diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index cfc77aa45fdeb..2f46111c8c499 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -1343,7 +1343,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -extraneous input 'day' expecting {, ';'}(line 1, pos 27) +Syntax error at or near 'day': extra input 'day'(line 1, pos 27) == SQL == select interval 30 day day day @@ -1375,7 +1375,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -extraneous input 'days' expecting {, ';'}(line 1, pos 29) +Syntax error at or near 'days': extra input 'days'(line 1, pos 29) == SQL == select interval 30 days days days diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out index b96baee7023c2..f13542dd4424c 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out @@ -217,7 +217,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -no viable alternative at input 'select .'(line 1, pos 7) +Syntax error at or near '.'(line 1, pos 7) == SQL == select .e3 diff --git a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out index 53cae3f935568..92b454bd2f6aa 100644 --- a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out @@ -34,7 +34,7 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException Cannot parse the data type: -extraneous input 'InvalidType' expecting (line 1, pos 2) +Syntax error at or near 'InvalidType': extra input 'InvalidType'(line 1, pos 2) == SQL == a InvalidType diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out index b8f666586ce45..6b572460b957f 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out @@ -133,7 +133,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -no viable alternative at input 'WITH t()'(line 1, pos 7) +Syntax error at or near ')'(line 1, pos 7) == SQL == WITH t() AS (SELECT 1) diff --git a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out index 6af8e7048c815..21c13af560dac 100644 --- a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out @@ -138,7 +138,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -extraneous input 'ROLLUP' expecting {, ';'}(line 1, pos 53) +Syntax error at or near 'ROLLUP': extra input 'ROLLUP'(line 1, pos 53) == SQL == SELECT a, b, c, count(d) FROM grouping GROUP BY WITH ROLLUP @@ -152,7 +152,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -extraneous input 'CUBE' expecting {, ';'}(line 1, pos 53) +Syntax error at or near 'CUBE': extra input 'CUBE'(line 1, pos 53) == SQL == SELECT a, b, c, count(d) FROM grouping GROUP BY WITH CUBE diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index f32836ee7a661..7aa0c699679ec 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1336,7 +1336,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -extraneous input 'day' expecting {, ';'}(line 1, pos 27) +Syntax error at or near 'day': extra input 'day'(line 1, pos 27) == SQL == select interval 30 day day day @@ -1368,7 +1368,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -extraneous input 'days' expecting {, ';'}(line 1, pos 29) +Syntax error at or near 'days': extra input 'days'(line 1, pos 29) == SQL == select interval 30 days days days diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out index 84610834fa7e7..48a7b7b7952ce 100644 --- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out @@ -125,7 +125,7 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException Cannot parse the data type: -extraneous input 'InvalidType' expecting (line 1, pos 2) +Syntax error at or near 'InvalidType': extra input 'InvalidType'(line 1, pos 2) == SQL == a InvalidType diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out index b96baee7023c2..f13542dd4424c 100644 --- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out @@ -217,7 +217,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -no viable alternative at input 'select .'(line 1, pos 7) +Syntax error at or near '.'(line 1, pos 7) == SQL == select .e3 diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out index fc19471bb5b32..680c5707a450d 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out @@ -361,7 +361,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -extraneous input 'BY' expecting {')', ',', 'ORDER', 'RANGE', 'ROWS', 'SORT'}(line 1, pos 45) +Syntax error at or near 'BY': extra input 'BY'(line 1, pos 45) == SQL == select rank() OVER (PARTITION BY four, ORDER BY ten) FROM tenk1 diff --git a/sql/core/src/test/resources/sql-tests/results/transform.sql.out b/sql/core/src/test/resources/sql-tests/results/transform.sql.out index c9a04c99b9fb2..be57390761ba3 100644 --- a/sql/core/src/test/resources/sql-tests/results/transform.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/transform.sql.out @@ -760,7 +760,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -no viable alternative at input 'SELECT TRANSFORM(b AS'(line 1, pos 19) +Syntax error at or near 'AS'(line 1, pos 19) == SQL == SELECT TRANSFORM(b AS b_1, MAX(a), CAST(sum(c) AS STRING)) @@ -782,7 +782,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -no viable alternative at input 'SELECT TRANSFORM(b b_1'(line 1, pos 19) +Syntax error at or near 'b_1'(line 1, pos 19) == SQL == SELECT TRANSFORM(b b_1, MAX(a), CAST(sum(c) AS STRING)) @@ -804,7 +804,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -no viable alternative at input 'SELECT TRANSFORM(b, MAX(a) AS'(line 1, pos 27) +Syntax error at or near 'AS'(line 1, pos 27) == SQL == SELECT TRANSFORM(b, MAX(a) AS max_a, CAST(sum(c) AS STRING)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRecoverPartitionsParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRecoverPartitionsParserSuite.scala index ebc1bd3468837..394392299ba4b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRecoverPartitionsParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRecoverPartitionsParserSuite.scala @@ -29,7 +29,7 @@ class AlterTableRecoverPartitionsParserSuite extends AnalysisTest with SharedSpa val errMsg = intercept[ParseException] { parsePlan("ALTER TABLE RECOVER PARTITIONS") }.getMessage - assert(errMsg.contains("no viable alternative at input 'ALTER TABLE RECOVER PARTITIONS'")) + assert(errMsg.contains("Syntax error at or near 'PARTITIONS'")) } test("recover partitions of a table") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 5399f9674377a..6cfdbdd2a21f5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -1750,7 +1750,7 @@ class PlanResolutionSuite extends AnalysisTest { interceptParseException(parsePlan)( "CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING)", - "extraneous input ':'")() + "Syntax error at or near ':': extra input ':'")() } test("create hive table - table file format") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala index 79952e5a6c288..1a3e49186daae 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala @@ -497,7 +497,7 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter { .option("createTableColumnTypes", "`name char(20)") // incorrectly quoted column .jdbc(url1, "TEST.USERDBTYPETEST", properties) }.getMessage() - assert(msg.contains("extraneous input")) + assert(msg.contains("Syntax error at or near '`': extra input '`'")) } test("SPARK-10849: jdbc CreateTableColumnTypes duplicate columns") { diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 2f0fd858ba206..e1840d8622b54 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -642,7 +642,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { test("SPARK-37694: delete [jar|file|archive] shall use spark sql processor") { runCliWithin(2.minute, errorResponses = Seq("ParseException"))( - "delete jar dummy.jar;" -> "missing 'FROM' at 'jar'(line 1, pos 7)") + "delete jar dummy.jar;" -> "Syntax error at or near 'jar': missing 'FROM'(line 1, pos 7)") } test("SPARK-37906: Spark SQL CLI should not pass final comment") { From 4bb2967ea321dd656a28ec685fecc2f97391968e Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Tue, 22 Mar 2022 18:10:41 -0500 Subject: [PATCH 029/535] [SPARK-38194][FOLLOWUP] Update executor config description for memoryOverheadFactor Follow up for https://github.com/apache/spark/pull/35912#pullrequestreview-915755215, update the executor memoryOverheadFactor to mention the 0.4 default for non-jvm jobs as well. ### What changes were proposed in this pull request? Doc update ### Why are the changes needed? To clarify new configs ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UTs Closes #35934 from Kimahriman/memory-overhead-executor-docs. Authored-by: Adam Binford Signed-off-by: Sean Owen (cherry picked from commit 768ab55e00cb0ec639db1444250ef40471c4a417) Signed-off-by: Sean Owen --- .../scala/org/apache/spark/internal/config/package.scala | 6 +++++- docs/configuration.md | 4 ++++ docs/running-on-kubernetes.md | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index ffe4501248f43..fa048f54415a8 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -336,7 +336,11 @@ package object config { .doc("Fraction of executor memory to be allocated as additional non-heap memory per " + "executor process. This is memory that accounts for things like VM overheads, " + "interned strings, other native overheads, etc. This tends to grow with the container " + - "size. This value is ignored if spark.executor.memoryOverhead is set directly.") + "size. This value defaults to 0.10 except for Kubernetes non-JVM jobs, which defaults " + + "to 0.40. This is done as non-JVM tasks need more non-JVM heap space and such tasks " + + "commonly fail with \"Memory Overhead Exceeded\" errors. This preempts this error " + + "with a higher default. This value is ignored if spark.executor.memoryOverhead is set " + + "directly.") .version("3.3.0") .doubleConf .checkValue(factor => factor > 0, diff --git a/docs/configuration.md b/docs/configuration.md index a2e6797b55e2f..a2cf2338c398e 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -309,6 +309,10 @@ of the most common options to set are: Fraction of executor memory to be allocated as additional non-heap memory per executor process. This is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to grow with the container size. + This value defaults to 0.10 except for Kubernetes non-JVM jobs, which defaults to + 0.40. This is done as non-JVM tasks need more non-JVM heap space and such tasks + commonly fail with "Memory Overhead Exceeded" errors. This preempts this error + with a higher default. This value is ignored if spark.executor.memoryOverhead is set directly. diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index de37e22cc78d7..6fec9bab10dbf 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -359,7 +359,7 @@ If no volume is set as local storage, Spark uses temporary scratch space to spil `emptyDir` volumes use the nodes backing storage for ephemeral storage by default, this behaviour may not be appropriate for some compute environments. For example if you have diskless nodes with remote storage mounted over a network, having lots of executors doing IO to this remote storage may actually degrade performance. -In this case it may be desirable to set `spark.kubernetes.local.dirs.tmpfs=true` in your configuration which will cause the `emptyDir` volumes to be configured as `tmpfs` i.e. RAM backed volumes. When configured like this Spark's local storage usage will count towards your pods memory usage therefore you may wish to increase your memory requests by increasing the value of `spark.kubernetes.memoryOverheadFactor` as appropriate. +In this case it may be desirable to set `spark.kubernetes.local.dirs.tmpfs=true` in your configuration which will cause the `emptyDir` volumes to be configured as `tmpfs` i.e. RAM backed volumes. When configured like this Spark's local storage usage will count towards your pods memory usage therefore you may wish to increase your memory requests by increasing the value of `spark.{driver,executor}.memoryOverheadFactor` as appropriate. ## Introspection and Debugging From 099fe9717d8e1317e1001c0409720049abd95fa3 Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Wed, 23 Mar 2022 09:11:48 +0800 Subject: [PATCH 030/535] [SPARK-38626][SQL] Make condition in DeleteFromTable plan required ### What changes were proposed in this pull request? This PR makes the condition in `DeleteFromTable` required. Right now, the condition is optional and `None` is equivalent to a true literal. As a consequence, rules that handle such statements have to catch these two different yet equivalent representations, which makes those rules more complex. Instead, we can simply default the condition while parsing and make it required. This change has been discussed and reviewed [here](https://github.com/apache/spark/pull/35395#discussion_r815234852). ### Why are the changes needed? These changes are needed to simplify rules that handle `DeleteFromTable` plans. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #35941 from aokolnychyi/spark-38626. Authored-by: Anton Okolnychyi Signed-off-by: Wenchen Fan (cherry picked from commit a89d2897ec803c0c272d4812420f2741880c9612) Signed-off-by: Wenchen Fan --- .../optimizer/ReplaceNullWithFalseInPredicate.scala | 2 +- .../optimizer/SimplifyConditionalsInPredicate.scala | 2 +- .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 4 ++-- .../spark/sql/catalyst/plans/logical/v2Commands.scala | 2 +- .../apache/spark/sql/errors/QueryCompilationErrors.scala | 2 +- .../optimizer/PullupCorrelatedPredicatesSuite.scala | 4 ++-- .../optimizer/ReplaceNullWithFalseInPredicateSuite.scala | 2 +- .../optimizer/SimplifyConditionalsInPredicateSuite.scala | 2 +- .../apache/spark/sql/catalyst/parser/DDLParserSuite.scala | 4 ++-- .../execution/datasources/v2/DataSourceV2Strategy.scala | 4 ++-- .../spark/sql/execution/command/PlanResolutionSuite.scala | 8 ++++---- 11 files changed, 18 insertions(+), 18 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala index 3de19afa912aa..9ec498aa14e3c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala @@ -54,7 +54,7 @@ object ReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] { _.containsAnyPattern(NULL_LITERAL, TRUE_OR_FALSE_LITERAL, INSET), ruleId) { case f @ Filter(cond, _) => f.copy(condition = replaceNullWithFalse(cond)) case j @ Join(_, _, _, Some(cond), _) => j.copy(condition = Some(replaceNullWithFalse(cond))) - case d @ DeleteFromTable(_, Some(cond)) => d.copy(condition = Some(replaceNullWithFalse(cond))) + case d @ DeleteFromTable(_, cond) => d.copy(condition = replaceNullWithFalse(cond)) case u @ UpdateTable(_, _, Some(cond)) => u.copy(condition = Some(replaceNullWithFalse(cond))) case m @ MergeIntoTable(_, _, mergeCond, matchedActions, notMatchedActions) => m.copy( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala index c08bcbe8915bd..e1972b997c2be 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala @@ -48,7 +48,7 @@ object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] { _.containsAnyPattern(CASE_WHEN, IF), ruleId) { case f @ Filter(cond, _) => f.copy(condition = simplifyConditional(cond)) case j @ Join(_, _, _, Some(cond), _) => j.copy(condition = Some(simplifyConditional(cond))) - case d @ DeleteFromTable(_, Some(cond)) => d.copy(condition = Some(simplifyConditional(cond))) + case d @ DeleteFromTable(_, cond) => d.copy(condition = simplifyConditional(cond)) case u @ UpdateTable(_, _, Some(cond)) => u.copy(condition = Some(simplifyConditional(cond))) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 3c8f0770e19af..9266388341e16 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -365,9 +365,9 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit val tableAlias = getTableAliasWithoutColumnAlias(ctx.tableAlias(), "DELETE") val aliasedTable = tableAlias.map(SubqueryAlias(_, table)).getOrElse(table) val predicate = if (ctx.whereClause() != null) { - Some(expression(ctx.whereClause().booleanExpression())) + expression(ctx.whereClause().booleanExpression()) } else { - None + Literal.TrueLiteral } DeleteFromTable(aliasedTable, predicate) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index 45465b0f99d3b..b2ca34668a6f6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -451,7 +451,7 @@ object DescribeColumn { */ case class DeleteFromTable( table: LogicalPlan, - condition: Option[Expression]) extends UnaryCommand with SupportsSubquery { + condition: Expression) extends UnaryCommand with SupportsSubquery { override def child: LogicalPlan = table override protected def withNewChildInternal(newChild: LogicalPlan): DeleteFromTable = copy(table = newChild) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 6bf0ec8eb8c40..57ed7da7b2051 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -775,7 +775,7 @@ object QueryCompilationErrors { s"$v2WriteClassName is not an instance of $v1WriteClassName") } - def unsupportedDeleteByConditionWithSubqueryError(condition: Option[Expression]): Throwable = { + def unsupportedDeleteByConditionWithSubqueryError(condition: Expression): Throwable = { new AnalysisException( s"Delete by condition with subquery is not supported: $condition") } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala index c4b052b97dc62..3ffbb49b27dae 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala @@ -114,14 +114,14 @@ class PullupCorrelatedPredicatesSuite extends PlanTest { test("PullupCorrelatedPredicates should handle deletes") { val subPlan = testRelation2.where('a === 'c).select('c) val cond = InSubquery(Seq('a), ListQuery(subPlan)) - val deletePlan = DeleteFromTable(testRelation, Some(cond)).analyze + val deletePlan = DeleteFromTable(testRelation, cond).analyze assert(deletePlan.resolved) val optimized = Optimize.execute(deletePlan) assert(optimized.resolved) optimized match { - case DeleteFromTable(_, Some(s: InSubquery)) => + case DeleteFromTable(_, s: InSubquery) => val outerRefs = SubExprUtils.getOuterReferences(s.query.plan) assert(outerRefs.isEmpty, "should be no outer refs") case other => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala index e65174b1b5f51..57698d15522d8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala @@ -461,7 +461,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest { } private def testDelete(originalCond: Expression, expectedCond: Expression): Unit = { - test((rel, expr) => DeleteFromTable(rel, Some(expr)), originalCond, expectedCond) + test((rel, expr) => DeleteFromTable(rel, expr), originalCond, expectedCond) } private def testUpdate(originalCond: Expression, expectedCond: Expression): Unit = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala index 79db53e94f2c2..bb6ca5499d133 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala @@ -229,7 +229,7 @@ class SimplifyConditionalsInPredicateSuite extends PlanTest { } private def testDelete(originalCond: Expression, expectedCond: Expression): Unit = { - test((rel, expr) => DeleteFromTable(rel, Some(expr)), originalCond, expectedCond) + test((rel, expr) => DeleteFromTable(rel, expr), originalCond, expectedCond) } private def testUpdate(originalCond: Expression, expectedCond: Expression): Unit = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index d5d90ceca2909..472506fa9070b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -1369,14 +1369,14 @@ class DDLParserSuite extends AnalysisTest { parseCompare("DELETE FROM testcat.ns1.ns2.tbl", DeleteFromTable( UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl")), - None)) + Literal.TrueLiteral)) } test("delete from table: with alias and where clause") { parseCompare("DELETE FROM testcat.ns1.ns2.tbl AS t WHERE t.a = 2", DeleteFromTable( SubqueryAlias("t", UnresolvedRelation(Seq("testcat", "ns1", "ns2", "tbl"))), - Some(EqualTo(UnresolvedAttribute("t.a"), Literal(2))))) + EqualTo(UnresolvedAttribute("t.a"), Literal(2)))) } test("delete from table: columns aliases is not allowed") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index c6d271b9d75c0..c0b00a426143b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -257,12 +257,12 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat relation match { case DataSourceV2ScanRelation(r, _, output) => val table = r.table - if (condition.exists(SubqueryExpression.hasSubquery)) { + if (SubqueryExpression.hasSubquery(condition)) { throw QueryCompilationErrors.unsupportedDeleteByConditionWithSubqueryError(condition) } // fail if any filter cannot be converted. // correctness depends on removing all matching data. - val filters = DataSourceStrategy.normalizeExprs(condition.toSeq, output) + val filters = DataSourceStrategy.normalizeExprs(Seq(condition), output) .flatMap(splitConjunctivePredicates(_).map { f => DataSourceStrategy.translateFilter(f, true).getOrElse( throw QueryCompilationErrors.cannotTranslateExpressionToSourceFilterError(f)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 6cfdbdd2a21f5..24b6be07619f3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -922,14 +922,14 @@ class PlanResolutionSuite extends AnalysisTest { val parsed4 = parseAndResolve(sql4) parsed1 match { - case DeleteFromTable(AsDataSourceV2Relation(_), None) => + case DeleteFromTable(AsDataSourceV2Relation(_), Literal.TrueLiteral) => case _ => fail("Expect DeleteFromTable, but got:\n" + parsed1.treeString) } parsed2 match { case DeleteFromTable( AsDataSourceV2Relation(_), - Some(EqualTo(name: UnresolvedAttribute, StringLiteral("Robert")))) => + EqualTo(name: UnresolvedAttribute, StringLiteral("Robert"))) => assert(name.name == "name") case _ => fail("Expect DeleteFromTable, but got:\n" + parsed2.treeString) } @@ -937,7 +937,7 @@ class PlanResolutionSuite extends AnalysisTest { parsed3 match { case DeleteFromTable( SubqueryAlias(AliasIdentifier("t", Seq()), AsDataSourceV2Relation(_)), - Some(EqualTo(name: UnresolvedAttribute, StringLiteral("Robert")))) => + EqualTo(name: UnresolvedAttribute, StringLiteral("Robert"))) => assert(name.name == "t.name") case _ => fail("Expect DeleteFromTable, but got:\n" + parsed3.treeString) } @@ -945,7 +945,7 @@ class PlanResolutionSuite extends AnalysisTest { parsed4 match { case DeleteFromTable( SubqueryAlias(AliasIdentifier("t", Seq()), AsDataSourceV2Relation(_)), - Some(InSubquery(values, query))) => + InSubquery(values, query)) => assert(values.size == 1 && values.head.isInstanceOf[UnresolvedAttribute]) assert(values.head.asInstanceOf[UnresolvedAttribute].name == "t.name") query match { From 9dacbc0ffa6bfe062abbc479d109248640976897 Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Wed, 23 Mar 2022 09:47:35 +0800 Subject: [PATCH 031/535] [SPARK-38432][SQL][FOLLOWUP] Supplement test case for overflow and add comments ### What changes were proposed in this pull request? This PR follows up https://github.com/apache/spark/pull/35768 and improves the code. 1. Supplement test case for overflow 2. Not throw IllegalArgumentException 3. Improve V2ExpressionSQLBuilder 4. Add comments in V2ExpressionBuilder ### Why are the changes needed? Supplement test case for overflow and add comments. ### Does this PR introduce _any_ user-facing change? 'No'. V2 aggregate pushdown not released yet. ### How was this patch tested? New tests. Closes #35933 from beliefer/SPARK-38432_followup. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit 4e606383a663919b7120789ae741a0f6698e3ff0) Signed-off-by: Wenchen Fan --- .../util/V2ExpressionSQLBuilder.java | 6 ++-- .../catalyst/util/V2ExpressionBuilder.scala | 2 ++ .../apache/spark/sql/jdbc/JdbcDialects.scala | 5 ++- .../apache/spark/sql/jdbc/JDBCV2Suite.scala | 34 ++++++++++++++++--- 4 files changed, 36 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index 91dae749f974b..1df01d29cbdd1 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -75,7 +75,7 @@ public String build(Expression expr) { name, inputToSQL(e.children()[0]), inputToSQL(e.children()[1])); case "-": if (e.children().length == 1) { - return visitUnaryArithmetic(name, build(e.children()[0])); + return visitUnaryArithmetic(name, inputToSQL(e.children()[0])); } else { return visitBinaryArithmetic( name, inputToSQL(e.children()[0]), inputToSQL(e.children()[1])); @@ -87,7 +87,7 @@ public String build(Expression expr) { case "NOT": return visitNot(build(e.children()[0])); case "~": - return visitUnaryArithmetic(name, build(e.children()[0])); + return visitUnaryArithmetic(name, inputToSQL(e.children()[0])); case "CASE_WHEN": { List children = Arrays.stream(e.children()).map(c -> build(c)).collect(Collectors.toList()); @@ -179,7 +179,7 @@ protected String visitNot(String v) { return "NOT (" + v + ")"; } - protected String visitUnaryArithmetic(String name, String v) { return name +" (" + v + ")"; } + protected String visitUnaryArithmetic(String name, String v) { return name + v; } protected String visitCaseWhen(String[] children) { StringBuilder sb = new StringBuilder("CASE"); diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala index 5c8e6a67ce3f0..fbd6884358b0a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala @@ -94,6 +94,7 @@ class V2ExpressionBuilder( None } case and: And => + // AND expects predicate val l = generateExpression(and.left, true) val r = generateExpression(and.right, true) if (l.isDefined && r.isDefined) { @@ -103,6 +104,7 @@ class V2ExpressionBuilder( None } case or: Or => + // OR expects predicate val l = generateExpression(or.left, true) val r = generateExpression(or.right, true) if (l.isDefined && r.isDefined) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index 4b28de26b59e4..674ef005df2dc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -229,9 +229,8 @@ abstract class JdbcDialect extends Serializable with Logging{ override def visitNamedReference(namedRef: NamedReference): String = { if (namedRef.fieldNames().length > 1) { - throw new IllegalArgumentException( - QueryCompilationErrors.commandNotSupportNestedColumnError( - "Filter push down", namedRef.toString).getMessage); + throw QueryCompilationErrors.commandNotSupportNestedColumnError( + "Filter push down", namedRef.toString) } quoteIdentifier(namedRef.fieldNames.head) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index d50a0551226a9..d6f098f1d5189 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -402,14 +402,38 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkAnswer(df, Seq(Row("fred", 1), Row("mary", 2))) - val df2 = sql(""" + val df2 = spark.table("h2.test.people").filter($"id" + Int.MaxValue > 1) + + checkFiltersRemoved(df2, ansiMode) + + df2.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + val expected_plan_fragment = if (ansiMode) { + "PushedFilters: [ID IS NOT NULL, (ID + 2147483647) > 1], " + } else { + "PushedFilters: [ID IS NOT NULL], " + } + checkKeywordsExistsInExplain(df2, expected_plan_fragment) + } + + if (ansiMode) { + val e = intercept[SparkException] { + checkAnswer(df2, Seq.empty) + } + assert(e.getMessage.contains( + "org.h2.jdbc.JdbcSQLDataException: Numeric value out of range: \"2147483648\"")) + } else { + checkAnswer(df2, Seq.empty) + } + + val df3 = sql(""" |SELECT * FROM h2.test.employee |WHERE (CASE WHEN SALARY > 10000 THEN BONUS ELSE BONUS + 200 END) > 1200 |""".stripMargin) - checkFiltersRemoved(df2, ansiMode) + checkFiltersRemoved(df3, ansiMode) - df2.queryExecution.optimizedPlan.collect { + df3.queryExecution.optimizedPlan.collect { case _: DataSourceV2ScanRelation => val expected_plan_fragment = if (ansiMode) { "PushedFilters: [(CASE WHEN SALARY > 10000.00 THEN BONUS" + @@ -417,10 +441,10 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel } else { "PushedFilters: []" } - checkKeywordsExistsInExplain(df2, expected_plan_fragment) + checkKeywordsExistsInExplain(df3, expected_plan_fragment) } - checkAnswer(df2, + checkAnswer(df3, Seq(Row(1, "cathy", 9000, 1200, false), Row(2, "david", 10000, 1300, true))) } } From 2b59a9616c5922d515db430752044869abd93746 Mon Sep 17 00:00:00 2001 From: Abhishek Somani Date: Wed, 23 Mar 2022 09:57:28 +0800 Subject: [PATCH 032/535] [SPARK-32268][SQL] Row-level Runtime Filtering ### What changes were proposed in this pull request? This PR proposes row-level runtime filters in Spark to reduce intermediate data volume for operators like shuffle, join and aggregate, and hence improve performance. We propose two mechanisms to do this: semi-join filters or bloom filters, and both mechanisms are proposed to co-exist side-by-side behind feature configs. [Design Doc](https://docs.google.com/document/d/16IEuyLeQlubQkH8YuVuXWKo2-grVIoDJqQpHZrE7q04/edit?usp=sharing) with more details. ### Why are the changes needed? With Semi-Join, we see 9 queries improve for the TPC DS 3TB benchmark, and no regressions. With Bloom Filter, we see 10 queries improve for the TPC DS 3TB benchmark, and no regressions. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added tests Closes #35789 from somani/rf. Lead-authored-by: Abhishek Somani Co-authored-by: Abhishek Somani Co-authored-by: Yuming Wang Signed-off-by: Wenchen Fan (cherry picked from commit 1f4e4c812a9dc6d7e35631c1663c1ba6f6d9b721) Signed-off-by: Wenchen Fan --- .../apache/spark/util/sketch/BloomFilter.java | 7 + .../spark/util/sketch/BloomFilterImpl.java | 5 + .../expressions/BloomFilterMightContain.scala | 113 ++++ .../aggregate/BloomFilterAggregate.scala | 179 +++++++ .../expressions/objects/objects.scala | 2 + .../sql/catalyst/expressions/predicates.scala | 16 + .../expressions/regexpExpressions.scala | 5 +- .../optimizer/InjectRuntimeFilter.scala | 303 +++++++++++ .../sql/catalyst/trees/TreePatterns.scala | 3 + .../apache/spark/sql/internal/SQLConf.scala | 80 +++ .../spark/sql/execution/SparkOptimizer.scala | 2 + .../dynamicpruning/PartitionPruning.scala | 15 - .../sql/BloomFilterAggregateQuerySuite.scala | 215 ++++++++ .../spark/sql/InjectRuntimeFilterSuite.scala | 503 ++++++++++++++++++ 14 files changed, 1432 insertions(+), 16 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/BloomFilterAggregate.scala create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala diff --git a/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java b/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java index c53987ecf6e25..2a6e270a91267 100644 --- a/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java +++ b/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java @@ -163,6 +163,13 @@ int getVersionNumber() { */ public abstract void writeTo(OutputStream out) throws IOException; + /** + * @return the number of set bits in this {@link BloomFilter}. + */ + public long cardinality() { + throw new UnsupportedOperationException("Not implemented"); + } + /** * Reads in a {@link BloomFilter} from an input stream. It is the caller's responsibility to close * the stream. diff --git a/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilterImpl.java b/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilterImpl.java index e7766ee903480..ccf1833af9945 100644 --- a/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilterImpl.java +++ b/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilterImpl.java @@ -207,6 +207,11 @@ public BloomFilter intersectInPlace(BloomFilter other) throws IncompatibleMergeE return this; } + @Override + public long cardinality() { + return this.bits.cardinality(); + } + private BloomFilterImpl checkCompatibilityForMerge(BloomFilter other) throws IncompatibleMergeException { // Duplicates the logic of `isCompatible` here to provide better error message. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala new file mode 100644 index 0000000000000..cf052f865ea90 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import java.io.ByteArrayInputStream + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, JavaCode, TrueLiteral} +import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper +import org.apache.spark.sql.catalyst.trees.TreePattern.OUTER_REFERENCE +import org.apache.spark.sql.types._ +import org.apache.spark.util.sketch.BloomFilter + +/** + * An internal scalar function that returns the membership check result (either true or false) + * for values of `valueExpression` in the Bloom filter represented by `bloomFilterExpression`. + * Not that since the function is "might contain", always returning true regardless is not + * wrong. + * Note that this expression requires that `bloomFilterExpression` is either a constant value or + * an uncorrelated scalar subquery. This is sufficient for the Bloom filter join rewrite. + * + * @param bloomFilterExpression the Binary data of Bloom filter. + * @param valueExpression the Long value to be tested for the membership of `bloomFilterExpression`. + */ +case class BloomFilterMightContain( + bloomFilterExpression: Expression, + valueExpression: Expression) extends BinaryExpression { + + override def nullable: Boolean = true + override def left: Expression = bloomFilterExpression + override def right: Expression = valueExpression + override def prettyName: String = "might_contain" + override def dataType: DataType = BooleanType + + override def checkInputDataTypes(): TypeCheckResult = { + (left.dataType, right.dataType) match { + case (BinaryType, NullType) | (NullType, LongType) | (NullType, NullType) | + (BinaryType, LongType) => + bloomFilterExpression match { + case e : Expression if e.foldable => TypeCheckResult.TypeCheckSuccess + case subquery : PlanExpression[_] if !subquery.containsPattern(OUTER_REFERENCE) => + TypeCheckResult.TypeCheckSuccess + case _ => + TypeCheckResult.TypeCheckFailure(s"The Bloom filter binary input to $prettyName " + + "should be either a constant value or a scalar subquery expression") + } + case _ => TypeCheckResult.TypeCheckFailure(s"Input to function $prettyName should have " + + s"been ${BinaryType.simpleString} followed by a value with ${LongType.simpleString}, " + + s"but it's [${left.dataType.catalogString}, ${right.dataType.catalogString}].") + } + } + + override protected def withNewChildrenInternal( + newBloomFilterExpression: Expression, + newValueExpression: Expression): BloomFilterMightContain = + copy(bloomFilterExpression = newBloomFilterExpression, + valueExpression = newValueExpression) + + // The bloom filter created from `bloomFilterExpression`. + @transient private lazy val bloomFilter = { + val bytes = bloomFilterExpression.eval().asInstanceOf[Array[Byte]] + if (bytes == null) null else deserialize(bytes) + } + + override def eval(input: InternalRow): Any = { + if (bloomFilter == null) { + null + } else { + val value = valueExpression.eval(input) + if (value == null) null else bloomFilter.mightContainLong(value.asInstanceOf[Long]) + } + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + if (bloomFilter == null) { + ev.copy(isNull = TrueLiteral, value = JavaCode.defaultLiteral(dataType)) + } else { + val bf = ctx.addReferenceObj("bloomFilter", bloomFilter, classOf[BloomFilter].getName) + val valueEval = valueExpression.genCode(ctx) + ev.copy(code = code""" + ${valueEval.code} + boolean ${ev.isNull} = ${valueEval.isNull}; + ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; + if (!${ev.isNull}) { + ${ev.value} = $bf.mightContainLong((Long)${valueEval.value}); + }""") + } + } + + final def deserialize(bytes: Array[Byte]): BloomFilter = { + val in = new ByteArrayInputStream(bytes) + val bloomFilter = BloomFilter.readFrom(in) + in.close() + bloomFilter + } + +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/BloomFilterAggregate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/BloomFilterAggregate.scala new file mode 100644 index 0000000000000..c734bca3ef8d0 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/BloomFilterAggregate.scala @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions.aggregate + +import java.io.ByteArrayInputStream +import java.io.ByteArrayOutputStream + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult._ +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.trees.TernaryLike +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types._ +import org.apache.spark.util.sketch.BloomFilter + +/** + * An internal aggregate function that creates a Bloom filter from input values. + * + * @param child Child expression of Long values for creating a Bloom filter. + * @param estimatedNumItemsExpression The number of estimated distinct items (optional). + * @param numBitsExpression The number of bits to use (optional). + */ +case class BloomFilterAggregate( + child: Expression, + estimatedNumItemsExpression: Expression, + numBitsExpression: Expression, + override val mutableAggBufferOffset: Int, + override val inputAggBufferOffset: Int) + extends TypedImperativeAggregate[BloomFilter] with TernaryLike[Expression] { + + def this(child: Expression, estimatedNumItemsExpression: Expression, + numBitsExpression: Expression) = { + this(child, estimatedNumItemsExpression, numBitsExpression, 0, 0) + } + + def this(child: Expression, estimatedNumItemsExpression: Expression) = { + this(child, estimatedNumItemsExpression, + // 1 byte per item. + Multiply(estimatedNumItemsExpression, Literal(8L))) + } + + def this(child: Expression) = { + this(child, Literal(SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_EXPECTED_NUM_ITEMS)), + Literal(SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_NUM_BITS))) + } + + override def checkInputDataTypes(): TypeCheckResult = { + (first.dataType, second.dataType, third.dataType) match { + case (_, NullType, _) | (_, _, NullType) => + TypeCheckResult.TypeCheckFailure("Null typed values cannot be used as size arguments") + case (LongType, LongType, LongType) => + if (!estimatedNumItemsExpression.foldable) { + TypeCheckFailure("The estimated number of items provided must be a constant literal") + } else if (estimatedNumItems <= 0L) { + TypeCheckFailure("The estimated number of items must be a positive value " + + s" (current value = $estimatedNumItems)") + } else if (!numBitsExpression.foldable) { + TypeCheckFailure("The number of bits provided must be a constant literal") + } else if (numBits <= 0L) { + TypeCheckFailure("The number of bits must be a positive value " + + s" (current value = $numBits)") + } else { + require(estimatedNumItems <= + SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS)) + require(numBits <= SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_BITS)) + TypeCheckSuccess + } + case _ => TypeCheckResult.TypeCheckFailure(s"Input to function $prettyName should have " + + s"been a ${LongType.simpleString} value followed with two ${LongType.simpleString} size " + + s"arguments, but it's [${first.dataType.catalogString}, " + + s"${second.dataType.catalogString}, ${third.dataType.catalogString}]") + } + } + override def nullable: Boolean = true + + override def dataType: DataType = BinaryType + + override def prettyName: String = "bloom_filter_agg" + + // Mark as lazy so that `estimatedNumItems` is not evaluated during tree transformation. + private lazy val estimatedNumItems: Long = + Math.min(estimatedNumItemsExpression.eval().asInstanceOf[Number].longValue, + SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS)) + + // Mark as lazy so that `numBits` is not evaluated during tree transformation. + private lazy val numBits: Long = + Math.min(numBitsExpression.eval().asInstanceOf[Number].longValue, + SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_BITS)) + + override def first: Expression = child + + override def second: Expression = estimatedNumItemsExpression + + override def third: Expression = numBitsExpression + + override protected def withNewChildrenInternal( + newChild: Expression, + newEstimatedNumItemsExpression: Expression, + newNumBitsExpression: Expression): BloomFilterAggregate = { + copy(child = newChild, estimatedNumItemsExpression = newEstimatedNumItemsExpression, + numBitsExpression = newNumBitsExpression) + } + + override def createAggregationBuffer(): BloomFilter = { + BloomFilter.create(estimatedNumItems, numBits) + } + + override def update(buffer: BloomFilter, inputRow: InternalRow): BloomFilter = { + val value = child.eval(inputRow) + // Ignore null values. + if (value == null) { + return buffer + } + buffer.putLong(value.asInstanceOf[Long]) + buffer + } + + override def merge(buffer: BloomFilter, other: BloomFilter): BloomFilter = { + buffer.mergeInPlace(other) + } + + override def eval(buffer: BloomFilter): Any = { + if (buffer.cardinality() == 0) { + // There's no set bit in the Bloom filter and hence no not-null value is processed. + return null + } + serialize(buffer) + } + + override def withNewMutableAggBufferOffset(newOffset: Int): BloomFilterAggregate = + copy(mutableAggBufferOffset = newOffset) + + override def withNewInputAggBufferOffset(newOffset: Int): BloomFilterAggregate = + copy(inputAggBufferOffset = newOffset) + + override def serialize(obj: BloomFilter): Array[Byte] = { + BloomFilterAggregate.serialize(obj) + } + + override def deserialize(bytes: Array[Byte]): BloomFilter = { + BloomFilterAggregate.deserialize(bytes) + } +} + +object BloomFilterAggregate { + final def serialize(obj: BloomFilter): Array[Byte] = { + // BloomFilterImpl.writeTo() writes 2 integers (version number and num hash functions), hence + // the +8 + val size = (obj.bitSize() / 8) + 8 + require(size <= Integer.MAX_VALUE, s"actual number of bits is too large $size") + val out = new ByteArrayOutputStream(size.intValue()) + obj.writeTo(out) + out.close() + out.toByteArray + } + + final def deserialize(bytes: Array[Byte]): BloomFilter = { + val in = new ByteArrayInputStream(bytes) + val bloomFilter = BloomFilter.readFrom(in) + in.close() + bloomFilter + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 6974ada8735c3..2c879beeed623 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -360,6 +360,8 @@ case class Invoke( lazy val argClasses = ScalaReflection.expressionJavaClasses(arguments) + final override val nodePatterns: Seq[TreePattern] = Seq(INVOKE) + override def nullable: Boolean = targetObject.nullable || needNullCheck || returnNullable override def children: Seq[Expression] = targetObject +: arguments override lazy val deterministic: Boolean = isDeterministic && arguments.forall(_.deterministic) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index a2fd668f495e0..d16e09c5ed95c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -287,6 +287,22 @@ trait PredicateHelper extends AliasHelper with Logging { } } } + + /** + * Returns whether an expression is likely to be selective + */ + def isLikelySelective(e: Expression): Boolean = e match { + case Not(expr) => isLikelySelective(expr) + case And(l, r) => isLikelySelective(l) || isLikelySelective(r) + case Or(l, r) => isLikelySelective(l) && isLikelySelective(r) + case _: StringRegexExpression => true + case _: BinaryComparison => true + case _: In | _: InSet => true + case _: StringPredicate => true + case BinaryPredicate(_) => true + case _: MultiLikeBase => true + case _ => false + } } @ExpressionDescription( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 368cbfd6be641..bfaaba514462f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.trees.BinaryLike -import org.apache.spark.sql.catalyst.trees.TreePattern.{LIKE_FAMLIY, TreePattern} +import org.apache.spark.sql.catalyst.trees.TreePattern.{LIKE_FAMLIY, REGEXP_EXTRACT_FAMILY, REGEXP_REPLACE, TreePattern} import org.apache.spark.sql.catalyst.util.{GenericArrayData, StringUtils} import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.types._ @@ -627,6 +627,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio @transient private var lastReplacementInUTF8: UTF8String = _ // result buffer write by Matcher @transient private lazy val result: StringBuffer = new StringBuffer + final override val nodePatterns: Seq[TreePattern] = Seq(REGEXP_REPLACE) override def nullSafeEval(s: Any, p: Any, r: Any, i: Any): Any = { if (!p.equals(lastRegex)) { @@ -751,6 +752,8 @@ abstract class RegExpExtractBase // last regex pattern, we cache it for performance concern @transient private var pattern: Pattern = _ + final override val nodePatterns: Seq[TreePattern] = Seq(REGEXP_EXTRACT_FAMILY) + override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType, IntegerType) override def first: Expression = subject override def second: Expression = regexp diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala new file mode 100644 index 0000000000000..35d0189f64651 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala @@ -0,0 +1,303 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, BloomFilterAggregate, Complete} +import org.apache.spark.sql.catalyst.planning.{ExtractEquiJoinKeys, PhysicalOperation} +import org.apache.spark.sql.catalyst.plans._ +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.trees.TreePattern.{INVOKE, JSON_TO_STRUCT, LIKE_FAMLIY, PYTHON_UDF, REGEXP_EXTRACT_FAMILY, REGEXP_REPLACE, SCALA_UDF} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types._ + +/** + * Insert a filter on one side of the join if the other side has a selective predicate. + * The filter could be an IN subquery (converted to a semi join), a bloom filter, or something + * else in the future. + */ +object InjectRuntimeFilter extends Rule[LogicalPlan] with PredicateHelper with JoinSelectionHelper { + + // Wraps `expr` with a hash function if its byte size is larger than an integer. + private def mayWrapWithHash(expr: Expression): Expression = { + if (expr.dataType.defaultSize > IntegerType.defaultSize) { + new Murmur3Hash(Seq(expr)) + } else { + expr + } + } + + private def injectFilter( + filterApplicationSideExp: Expression, + filterApplicationSidePlan: LogicalPlan, + filterCreationSideExp: Expression, + filterCreationSidePlan: LogicalPlan): LogicalPlan = { + require(conf.runtimeFilterBloomFilterEnabled || conf.runtimeFilterSemiJoinReductionEnabled) + if (conf.runtimeFilterBloomFilterEnabled) { + injectBloomFilter( + filterApplicationSideExp, + filterApplicationSidePlan, + filterCreationSideExp, + filterCreationSidePlan + ) + } else { + injectInSubqueryFilter( + filterApplicationSideExp, + filterApplicationSidePlan, + filterCreationSideExp, + filterCreationSidePlan + ) + } + } + + private def injectBloomFilter( + filterApplicationSideExp: Expression, + filterApplicationSidePlan: LogicalPlan, + filterCreationSideExp: Expression, + filterCreationSidePlan: LogicalPlan): LogicalPlan = { + // Skip if the filter creation side is too big + if (filterCreationSidePlan.stats.sizeInBytes > conf.runtimeFilterCreationSideThreshold) { + return filterApplicationSidePlan + } + val rowCount = filterCreationSidePlan.stats.rowCount + val bloomFilterAgg = + if (rowCount.isDefined && rowCount.get.longValue > 0L) { + new BloomFilterAggregate(new XxHash64(Seq(filterCreationSideExp)), + Literal(rowCount.get.longValue)) + } else { + new BloomFilterAggregate(new XxHash64(Seq(filterCreationSideExp))) + } + val aggExp = AggregateExpression(bloomFilterAgg, Complete, isDistinct = false, None) + val alias = Alias(aggExp, "bloomFilter")() + val aggregate = ConstantFolding(Aggregate(Nil, Seq(alias), filterCreationSidePlan)) + val bloomFilterSubquery = ScalarSubquery(aggregate, Nil) + val filter = BloomFilterMightContain(bloomFilterSubquery, + new XxHash64(Seq(filterApplicationSideExp))) + Filter(filter, filterApplicationSidePlan) + } + + private def injectInSubqueryFilter( + filterApplicationSideExp: Expression, + filterApplicationSidePlan: LogicalPlan, + filterCreationSideExp: Expression, + filterCreationSidePlan: LogicalPlan): LogicalPlan = { + require(filterApplicationSideExp.dataType == filterCreationSideExp.dataType) + val actualFilterKeyExpr = mayWrapWithHash(filterCreationSideExp) + val alias = Alias(actualFilterKeyExpr, actualFilterKeyExpr.toString)() + val aggregate = Aggregate(Seq(alias), Seq(alias), filterCreationSidePlan) + if (!canBroadcastBySize(aggregate, conf)) { + // Skip the InSubquery filter if the size of `aggregate` is beyond broadcast join threshold, + // i.e., the semi-join will be a shuffled join, which is not worthwhile. + return filterApplicationSidePlan + } + val filter = InSubquery(Seq(mayWrapWithHash(filterApplicationSideExp)), + ListQuery(aggregate, childOutputs = aggregate.output)) + Filter(filter, filterApplicationSidePlan) + } + + /** + * Returns whether the plan is a simple filter over scan and the filter is likely selective + * Also check if the plan only has simple expressions (attribute reference, literals) so that we + * do not add a subquery that might have an expensive computation + */ + private def isSelectiveFilterOverScan(plan: LogicalPlan): Boolean = { + val ret = plan match { + case PhysicalOperation(_, filters, child) if child.isInstanceOf[LeafNode] => + filters.forall(isSimpleExpression) && + filters.exists(isLikelySelective) + case _ => false + } + !plan.isStreaming && ret + } + + private def isSimpleExpression(e: Expression): Boolean = { + !e.containsAnyPattern(PYTHON_UDF, SCALA_UDF, INVOKE, JSON_TO_STRUCT, LIKE_FAMLIY, + REGEXP_EXTRACT_FAMILY, REGEXP_REPLACE) + } + + private def canFilterLeft(joinType: JoinType): Boolean = joinType match { + case Inner | RightOuter => true + case _ => false + } + + private def canFilterRight(joinType: JoinType): Boolean = joinType match { + case Inner | LeftOuter => true + case _ => false + } + + private def isProbablyShuffleJoin(left: LogicalPlan, + right: LogicalPlan, hint: JoinHint): Boolean = { + !hintToBroadcastLeft(hint) && !hintToBroadcastRight(hint) && + !canBroadcastBySize(left, conf) && !canBroadcastBySize(right, conf) + } + + private def probablyHasShuffle(plan: LogicalPlan): Boolean = { + plan.collectFirst { + case j@Join(left, right, _, _, hint) + if isProbablyShuffleJoin(left, right, hint) => j + case a: Aggregate => a + }.nonEmpty + } + + // Returns the max scan byte size in the subtree rooted at `filterApplicationSide`. + private def maxScanByteSize(filterApplicationSide: LogicalPlan): BigInt = { + val defaultSizeInBytes = conf.getConf(SQLConf.DEFAULT_SIZE_IN_BYTES) + filterApplicationSide.collect({ + case leaf: LeafNode => leaf + }).map(scan => { + // DEFAULT_SIZE_IN_BYTES means there's no byte size information in stats. Since we avoid + // creating a Bloom filter when the filter application side is very small, so using 0 + // as the byte size when the actual size is unknown can avoid regression by applying BF + // on a small table. + if (scan.stats.sizeInBytes == defaultSizeInBytes) BigInt(0) else scan.stats.sizeInBytes + }).max + } + + // Returns true if `filterApplicationSide` satisfies the byte size requirement to apply a + // Bloom filter; false otherwise. + private def satisfyByteSizeRequirement(filterApplicationSide: LogicalPlan): Boolean = { + // In case `filterApplicationSide` is a union of many small tables, disseminating the Bloom + // filter to each small task might be more costly than scanning them itself. Thus, we use max + // rather than sum here. + val maxScanSize = maxScanByteSize(filterApplicationSide) + maxScanSize >= + conf.getConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD) + } + + /** + * Check that: + * - The filterApplicationSideJoinExp can be pushed down through joins and aggregates (ie the + * expression references originate from a single leaf node) + * - The filter creation side has a selective predicate + * - The current join is a shuffle join or a broadcast join that has a shuffle below it + * - The max filterApplicationSide scan size is greater than a configurable threshold + */ + private def filteringHasBenefit( + filterApplicationSide: LogicalPlan, + filterCreationSide: LogicalPlan, + filterApplicationSideExp: Expression, + hint: JoinHint): Boolean = { + findExpressionAndTrackLineageDown(filterApplicationSideExp, + filterApplicationSide).isDefined && isSelectiveFilterOverScan(filterCreationSide) && + (isProbablyShuffleJoin(filterApplicationSide, filterCreationSide, hint) || + probablyHasShuffle(filterApplicationSide)) && + satisfyByteSizeRequirement(filterApplicationSide) + } + + def hasRuntimeFilter(left: LogicalPlan, right: LogicalPlan, leftKey: Expression, + rightKey: Expression): Boolean = { + if (conf.runtimeFilterBloomFilterEnabled) { + hasBloomFilter(left, right, leftKey, rightKey) + } else { + hasInSubquery(left, right, leftKey, rightKey) + } + } + + // This checks if there is already a DPP filter, as this rule is called just after DPP. + def hasDynamicPruningSubquery( + left: LogicalPlan, + right: LogicalPlan, + leftKey: Expression, + rightKey: Expression): Boolean = { + (left, right) match { + case (Filter(DynamicPruningSubquery(pruningKey, _, _, _, _, _), plan), _) => + pruningKey.fastEquals(leftKey) || hasDynamicPruningSubquery(plan, right, leftKey, rightKey) + case (_, Filter(DynamicPruningSubquery(pruningKey, _, _, _, _, _), plan)) => + pruningKey.fastEquals(rightKey) || + hasDynamicPruningSubquery(left, plan, leftKey, rightKey) + case _ => false + } + } + + def hasBloomFilter( + left: LogicalPlan, + right: LogicalPlan, + leftKey: Expression, + rightKey: Expression): Boolean = { + findBloomFilterWithExp(left, leftKey) || findBloomFilterWithExp(right, rightKey) + } + + private def findBloomFilterWithExp(plan: LogicalPlan, key: Expression): Boolean = { + plan.find { + case Filter(condition, _) => + splitConjunctivePredicates(condition).exists { + case BloomFilterMightContain(_, XxHash64(Seq(valueExpression), _)) + if valueExpression.fastEquals(key) => true + case _ => false + } + case _ => false + }.isDefined + } + + def hasInSubquery(left: LogicalPlan, right: LogicalPlan, leftKey: Expression, + rightKey: Expression): Boolean = { + (left, right) match { + case (Filter(InSubquery(Seq(key), + ListQuery(Aggregate(Seq(Alias(_, _)), Seq(Alias(_, _)), _), _, _, _, _)), _), _) => + key.fastEquals(leftKey) || key.fastEquals(new Murmur3Hash(Seq(leftKey))) + case (_, Filter(InSubquery(Seq(key), + ListQuery(Aggregate(Seq(Alias(_, _)), Seq(Alias(_, _)), _), _, _, _, _)), _)) => + key.fastEquals(rightKey) || key.fastEquals(new Murmur3Hash(Seq(rightKey))) + case _ => false + } + } + + private def tryInjectRuntimeFilter(plan: LogicalPlan): LogicalPlan = { + var filterCounter = 0 + val numFilterThreshold = conf.getConf(SQLConf.RUNTIME_FILTER_NUMBER_THRESHOLD) + plan transformUp { + case join @ ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, _, _, left, right, hint) => + var newLeft = left + var newRight = right + (leftKeys, rightKeys).zipped.foreach((l, r) => { + // Check if: + // 1. There is already a DPP filter on the key + // 2. There is already a runtime filter (Bloom filter or IN subquery) on the key + // 3. The keys are simple cheap expressions + if (filterCounter < numFilterThreshold && + !hasDynamicPruningSubquery(left, right, l, r) && + !hasRuntimeFilter(newLeft, newRight, l, r) && + isSimpleExpression(l) && isSimpleExpression(r)) { + val oldLeft = newLeft + val oldRight = newRight + if (canFilterLeft(joinType) && filteringHasBenefit(left, right, l, hint)) { + newLeft = injectFilter(l, newLeft, r, right) + } + // Did we actually inject on the left? If not, try on the right + if (newLeft.fastEquals(oldLeft) && canFilterRight(joinType) && + filteringHasBenefit(right, left, r, hint)) { + newRight = injectFilter(r, newRight, l, left) + } + if (!newLeft.fastEquals(oldLeft) || !newRight.fastEquals(oldRight)) { + filterCounter = filterCounter + 1 + } + } + }) + join.withNewChildren(Seq(newLeft, newRight)) + } + } + + override def apply(plan: LogicalPlan): LogicalPlan = plan match { + case s: Subquery if s.correlated => plan + case _ if !conf.runtimeFilterSemiJoinReductionEnabled && + !conf.runtimeFilterBloomFilterEnabled => plan + case _ => tryInjectRuntimeFilter(plan) + } + +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala index b595966bcc235..3cf45d5f79f00 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala @@ -54,6 +54,7 @@ object TreePattern extends Enumeration { val IN_SUBQUERY: Value = Value val INSET: Value = Value val INTERSECT: Value = Value + val INVOKE: Value = Value val JSON_TO_STRUCT: Value = Value val LAMBDA_FUNCTION: Value = Value val LAMBDA_VARIABLE: Value = Value @@ -72,6 +73,8 @@ object TreePattern extends Enumeration { val PIVOT: Value = Value val PLAN_EXPRESSION: Value = Value val PYTHON_UDF: Value = Value + val REGEXP_EXTRACT_FAMILY: Value = Value + val REGEXP_REPLACE: Value = Value val RUNTIME_REPLACEABLE: Value = Value val SCALAR_SUBQUERY: Value = Value val SCALA_UDF: Value = Value diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 3314dd1916498..1bba8b6d866a6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -341,6 +341,77 @@ object SQLConf { .booleanConf .createWithDefault(true) + val RUNTIME_FILTER_SEMI_JOIN_REDUCTION_ENABLED = + buildConf("spark.sql.optimizer.runtimeFilter.semiJoinReduction.enabled") + .doc("When true and if one side of a shuffle join has a selective predicate, we attempt " + + "to insert a semi join in the other side to reduce the amount of shuffle data.") + .version("3.3.0") + .booleanConf + .createWithDefault(false) + + val RUNTIME_FILTER_NUMBER_THRESHOLD = + buildConf("spark.sql.optimizer.runtimeFilter.number.threshold") + .doc("The total number of injected runtime filters (non-DPP) for a single " + + "query. This is to prevent driver OOMs with too many Bloom filters.") + .version("3.3.0") + .intConf + .checkValue(threshold => threshold >= 0, "The threshold should be >= 0") + .createWithDefault(10) + + val RUNTIME_BLOOM_FILTER_ENABLED = + buildConf("spark.sql.optimizer.runtime.bloomFilter.enabled") + .doc("When true and if one side of a shuffle join has a selective predicate, we attempt " + + "to insert a bloom filter in the other side to reduce the amount of shuffle data.") + .version("3.3.0") + .booleanConf + .createWithDefault(false) + + val RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD = + buildConf("spark.sql.optimizer.runtime.bloomFilter.creationSideThreshold") + .doc("Size threshold of the bloom filter creation side plan. Estimated size needs to be " + + "under this value to try to inject bloom filter.") + .version("3.3.0") + .bytesConf(ByteUnit.BYTE) + .createWithDefaultString("10MB") + + val RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD = + buildConf("spark.sql.optimizer.runtime.bloomFilter.applicationSideScanSizethreshold") + .doc("Byte size threshold of the Bloom filter application side plan's aggregated scan " + + "size. Aggregated scan byte size of the Bloom filter application side needs to be over " + + "this value to inject a bloom filter.") + .version("3.3.0") + .bytesConf(ByteUnit.BYTE) + .createWithDefaultString("10GB") + + val RUNTIME_BLOOM_FILTER_EXPECTED_NUM_ITEMS = + buildConf("spark.sql.optimizer.runtime.bloomFilter.expectedNumItems") + .doc("The default number of expected items for the runtime bloomfilter") + .version("3.3.0") + .longConf + .createWithDefault(1000000L) + + val RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS = + buildConf("spark.sql.optimizer.runtime.bloomFilter.maxNumItems") + .doc("The max allowed number of expected items for the runtime bloom filter") + .version("3.3.0") + .longConf + .createWithDefault(4000000L) + + + val RUNTIME_BLOOM_FILTER_NUM_BITS = + buildConf("spark.sql.optimizer.runtime.bloomFilter.numBits") + .doc("The default number of bits to use for the runtime bloom filter") + .version("3.3.0") + .longConf + .createWithDefault(8388608L) + + val RUNTIME_BLOOM_FILTER_MAX_NUM_BITS = + buildConf("spark.sql.optimizer.runtime.bloomFilter.maxNumBits") + .doc("The max number of bits to use for the runtime bloom filter") + .version("3.3.0") + .longConf + .createWithDefault(67108864L) + val COMPRESS_CACHED = buildConf("spark.sql.inMemoryColumnarStorage.compressed") .doc("When set to true Spark SQL will automatically select a compression codec for each " + "column based on statistics of the data.") @@ -3750,6 +3821,15 @@ class SQLConf extends Serializable with Logging { def dynamicPartitionPruningReuseBroadcastOnly: Boolean = getConf(DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY) + def runtimeFilterSemiJoinReductionEnabled: Boolean = + getConf(RUNTIME_FILTER_SEMI_JOIN_REDUCTION_ENABLED) + + def runtimeFilterBloomFilterEnabled: Boolean = + getConf(RUNTIME_BLOOM_FILTER_ENABLED) + + def runtimeFilterCreationSideThreshold: Long = + getConf(RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD) + def stateStoreProviderClass: String = getConf(STATE_STORE_PROVIDER_CLASS) def isStateSchemaCheckEnabled: Boolean = getConf(STATE_SCHEMA_CHECK_ENABLED) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala index 7e8fb4a157262..743cb591b306f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala @@ -43,6 +43,8 @@ class SparkOptimizer( Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog)) :+ Batch("PartitionPruning", Once, PartitionPruning) :+ + Batch("InjectRuntimeFilter", FixedPoint(1), + InjectRuntimeFilter) :+ Batch("Pushdown Filters from PartitionPruning", fixedPoint, PushDownPredicates) :+ Batch("Cleanup filters that cannot be pushed down", Once, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala index 3b5fc4aea5d8b..89d66034f06cd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala @@ -194,21 +194,6 @@ object PartitionPruning extends Rule[LogicalPlan] with PredicateHelper { scanOverhead + cachedOverhead } - /** - * Returns whether an expression is likely to be selective - */ - private def isLikelySelective(e: Expression): Boolean = e match { - case Not(expr) => isLikelySelective(expr) - case And(l, r) => isLikelySelective(l) || isLikelySelective(r) - case Or(l, r) => isLikelySelective(l) && isLikelySelective(r) - case _: StringRegexExpression => true - case _: BinaryComparison => true - case _: In | _: InSet => true - case _: StringPredicate => true - case BinaryPredicate(_) => true - case _: MultiLikeBase => true - case _ => false - } /** * Search a filtering predicate in a given logical plan diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala new file mode 100644 index 0000000000000..025593be4c959 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.FunctionIdentifier +import org.apache.spark.sql.catalyst.analysis.FunctionRegistry +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.aggregate.BloomFilterAggregate +import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec +import org.apache.spark.sql.execution.aggregate.BaseAggregateExec +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession + +/** + * Query tests for the Bloom filter aggregate and filter function. + */ +class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession { + import testImplicits._ + + val funcId_bloom_filter_agg = new FunctionIdentifier("bloom_filter_agg") + val funcId_might_contain = new FunctionIdentifier("might_contain") + + // Register 'bloom_filter_agg' to builtin. + FunctionRegistry.builtin.registerFunction(funcId_bloom_filter_agg, + new ExpressionInfo(classOf[BloomFilterAggregate].getName, "bloom_filter_agg"), + (children: Seq[Expression]) => children.size match { + case 1 => new BloomFilterAggregate(children.head) + case 2 => new BloomFilterAggregate(children.head, children(1)) + case 3 => new BloomFilterAggregate(children.head, children(1), children(2)) + }) + + // Register 'might_contain' to builtin. + FunctionRegistry.builtin.registerFunction(funcId_might_contain, + new ExpressionInfo(classOf[BloomFilterMightContain].getName, "might_contain"), + (children: Seq[Expression]) => BloomFilterMightContain(children.head, children(1))) + + override def afterAll(): Unit = { + FunctionRegistry.builtin.dropFunction(funcId_bloom_filter_agg) + FunctionRegistry.builtin.dropFunction(funcId_might_contain) + super.afterAll() + } + + test("Test bloom_filter_agg and might_contain") { + val conf = SQLConf.get + val table = "bloom_filter_test" + for (numEstimatedItems <- Seq(Long.MinValue, -10L, 0L, 4096L, 4194304L, Long.MaxValue, + conf.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS))) { + for (numBits <- Seq(Long.MinValue, -10L, 0L, 4096L, 4194304L, Long.MaxValue, + conf.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_BITS))) { + val sqlString = s""" + |SELECT every(might_contain( + | (SELECT bloom_filter_agg(col, + | cast($numEstimatedItems as long), + | cast($numBits as long)) + | FROM $table), + | col)) positive_membership_test, + | every(might_contain( + | (SELECT bloom_filter_agg(col, + | cast($numEstimatedItems as long), + | cast($numBits as long)) + | FROM values (-1L), (100001L), (20000L) as t(col)), + | col)) negative_membership_test + |FROM $table + """.stripMargin + withTempView(table) { + (Seq(Long.MinValue, 0, Long.MaxValue) ++ (1L to 10000L)) + .toDF("col").createOrReplaceTempView(table) + // Validate error messages as well as answers when there's no error. + if (numEstimatedItems <= 0) { + val exception = intercept[AnalysisException] { + spark.sql(sqlString) + } + assert(exception.getMessage.contains( + "The estimated number of items must be a positive value")) + } else if (numBits <= 0) { + val exception = intercept[AnalysisException] { + spark.sql(sqlString) + } + assert(exception.getMessage.contains("The number of bits must be a positive value")) + } else { + checkAnswer(spark.sql(sqlString), Row(true, false)) + } + } + } + } + } + + test("Test that bloom_filter_agg errors out disallowed input value types") { + val exception1 = intercept[AnalysisException] { + spark.sql(""" + |SELECT bloom_filter_agg(a) + |FROM values (1.2), (2.5) as t(a)""" + .stripMargin) + } + assert(exception1.getMessage.contains( + "Input to function bloom_filter_agg should have been a bigint value")) + + val exception2 = intercept[AnalysisException] { + spark.sql(""" + |SELECT bloom_filter_agg(a, 2) + |FROM values (cast(1 as long)), (cast(2 as long)) as t(a)""" + .stripMargin) + } + assert(exception2.getMessage.contains( + "function bloom_filter_agg should have been a bigint value followed with two bigint")) + + val exception3 = intercept[AnalysisException] { + spark.sql(""" + |SELECT bloom_filter_agg(a, cast(2 as long), 5) + |FROM values (cast(1 as long)), (cast(2 as long)) as t(a)""" + .stripMargin) + } + assert(exception3.getMessage.contains( + "function bloom_filter_agg should have been a bigint value followed with two bigint")) + + val exception4 = intercept[AnalysisException] { + spark.sql(""" + |SELECT bloom_filter_agg(a, null, 5) + |FROM values (cast(1 as long)), (cast(2 as long)) as t(a)""" + .stripMargin) + } + assert(exception4.getMessage.contains("Null typed values cannot be used as size arguments")) + + val exception5 = intercept[AnalysisException] { + spark.sql(""" + |SELECT bloom_filter_agg(a, 5, null) + |FROM values (cast(1 as long)), (cast(2 as long)) as t(a)""" + .stripMargin) + } + assert(exception5.getMessage.contains("Null typed values cannot be used as size arguments")) + } + + test("Test that might_contain errors out disallowed input value types") { + val exception1 = intercept[AnalysisException] { + spark.sql("""|SELECT might_contain(1.0, 1L)""" + .stripMargin) + } + assert(exception1.getMessage.contains( + "Input to function might_contain should have been binary followed by a value with bigint")) + + val exception2 = intercept[AnalysisException] { + spark.sql("""|SELECT might_contain(NULL, 0.1)""" + .stripMargin) + } + assert(exception2.getMessage.contains( + "Input to function might_contain should have been binary followed by a value with bigint")) + } + + test("Test that might_contain errors out non-constant Bloom filter") { + val exception1 = intercept[AnalysisException] { + spark.sql(""" + |SELECT might_contain(cast(a as binary), cast(5 as long)) + |FROM values (cast(1 as long)), (cast(2 as long)) as t(a)""" + .stripMargin) + } + assert(exception1.getMessage.contains( + "The Bloom filter binary input to might_contain should be either a constant value or " + + "a scalar subquery expression")) + + val exception2 = intercept[AnalysisException] { + spark.sql(""" + |SELECT might_contain((select cast(a as binary)), cast(5 as long)) + |FROM values (cast(1 as long)), (cast(2 as long)) as t(a)""" + .stripMargin) + } + assert(exception2.getMessage.contains( + "The Bloom filter binary input to might_contain should be either a constant value or " + + "a scalar subquery expression")) + } + + test("Test that might_contain can take a constant value input") { + checkAnswer(spark.sql( + """SELECT might_contain( + |X'00000001000000050000000343A2EC6EA8C117E2D3CDB767296B144FC5BFBCED9737F267', + |cast(201 as long))""".stripMargin), + Row(false)) + } + + test("Test that bloom_filter_agg produces a NULL with empty input") { + checkAnswer(spark.sql("""SELECT bloom_filter_agg(cast(id as long)) from range(1, 1)"""), + Row(null)) + } + + test("Test NULL inputs for might_contain") { + checkAnswer(spark.sql( + s""" + |SELECT might_contain(null, null) both_null, + | might_contain(null, 1L) null_bf, + | might_contain((SELECT bloom_filter_agg(cast(id as long)) from range(1, 10000)), + | null) null_value + """.stripMargin), + Row(null, null, null)) + } + + test("Test that a query with bloom_filter_agg has partial aggregates") { + assert(spark.sql("""SELECT bloom_filter_agg(cast(id as long)) from range(1, 1000000)""") + .queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec].inputPlan + .collect({case agg: BaseAggregateExec => agg}).size == 2) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala new file mode 100644 index 0000000000000..a5e27fbfda42a --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala @@ -0,0 +1,503 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql + +import org.apache.spark.sql.catalyst.expressions.{Alias, BloomFilterMightContain, Literal} +import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, BloomFilterAggregate} +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, LogicalPlan} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} +import org.apache.spark.sql.types.{IntegerType, StructType} + +class InjectRuntimeFilterSuite extends QueryTest with SQLTestUtils with SharedSparkSession { + + protected override def beforeAll(): Unit = { + super.beforeAll() + val schema = new StructType().add("a1", IntegerType, nullable = true) + .add("b1", IntegerType, nullable = true) + .add("c1", IntegerType, nullable = true) + .add("d1", IntegerType, nullable = true) + .add("e1", IntegerType, nullable = true) + .add("f1", IntegerType, nullable = true) + + val data1 = Seq(Seq(null, 47, null, 4, 6, 48), + Seq(73, 63, null, 92, null, null), + Seq(76, 10, 74, 98, 37, 5), + Seq(0, 63, null, null, null, null), + Seq(15, 77, null, null, null, null), + Seq(null, 57, 33, 55, null, 58), + Seq(4, 0, 86, null, 96, 14), + Seq(28, 16, 58, null, null, null), + Seq(1, 88, null, 8, null, 79), + Seq(59, null, null, null, 20, 25), + Seq(1, 50, null, 94, 94, null), + Seq(null, null, null, 67, 51, 57), + Seq(77, 50, 8, 90, 16, 21), + Seq(34, 28, null, 5, null, 64), + Seq(null, null, 88, 11, 63, 79), + Seq(92, 94, 23, 1, null, 64), + Seq(57, 56, null, 83, null, null), + Seq(null, 35, 8, 35, null, 70), + Seq(null, 8, null, 35, null, 87), + Seq(9, null, null, 60, null, 5), + Seq(null, 15, 66, null, 83, null)) + val rdd1 = spark.sparkContext.parallelize(data1) + val rddRow1 = rdd1.map(s => Row.fromSeq(s)) + spark.createDataFrame(rddRow1, schema).write.saveAsTable("bf1") + + val schema2 = new StructType().add("a2", IntegerType, nullable = true) + .add("b2", IntegerType, nullable = true) + .add("c2", IntegerType, nullable = true) + .add("d2", IntegerType, nullable = true) + .add("e2", IntegerType, nullable = true) + .add("f2", IntegerType, nullable = true) + + + val data2 = Seq(Seq(67, 17, 45, 91, null, null), + Seq(98, 63, 0, 89, null, 40), + Seq(null, 76, 68, 75, 20, 19), + Seq(8, null, null, null, 78, null), + Seq(48, 62, null, null, 11, 98), + Seq(84, null, 99, 65, 66, 51), + Seq(98, null, null, null, 42, 51), + Seq(10, 3, 29, null, 68, 8), + Seq(85, 36, 41, null, 28, 71), + Seq(89, null, 94, 95, 67, 21), + Seq(44, null, 24, 33, null, 6), + Seq(null, 6, 78, 31, null, 69), + Seq(59, 2, 63, 9, 66, 20), + Seq(5, 23, 10, 86, 68, null), + Seq(null, 63, 99, 55, 9, 65), + Seq(57, 62, 68, 5, null, 0), + Seq(75, null, 15, null, 81, null), + Seq(53, null, 6, 68, 28, 13), + Seq(null, null, null, null, 89, 23), + Seq(36, 73, 40, null, 8, null), + Seq(24, null, null, 40, null, null)) + val rdd2 = spark.sparkContext.parallelize(data2) + val rddRow2 = rdd2.map(s => Row.fromSeq(s)) + spark.createDataFrame(rddRow2, schema2).write.saveAsTable("bf2") + + val schema3 = new StructType().add("a3", IntegerType, nullable = true) + .add("b3", IntegerType, nullable = true) + .add("c3", IntegerType, nullable = true) + .add("d3", IntegerType, nullable = true) + .add("e3", IntegerType, nullable = true) + .add("f3", IntegerType, nullable = true) + + val data3 = Seq(Seq(67, 17, 45, 91, null, null), + Seq(98, 63, 0, 89, null, 40), + Seq(null, 76, 68, 75, 20, 19), + Seq(8, null, null, null, 78, null), + Seq(48, 62, null, null, 11, 98), + Seq(84, null, 99, 65, 66, 51), + Seq(98, null, null, null, 42, 51), + Seq(10, 3, 29, null, 68, 8), + Seq(85, 36, 41, null, 28, 71), + Seq(89, null, 94, 95, 67, 21), + Seq(44, null, 24, 33, null, 6), + Seq(null, 6, 78, 31, null, 69), + Seq(59, 2, 63, 9, 66, 20), + Seq(5, 23, 10, 86, 68, null), + Seq(null, 63, 99, 55, 9, 65), + Seq(57, 62, 68, 5, null, 0), + Seq(75, null, 15, null, 81, null), + Seq(53, null, 6, 68, 28, 13), + Seq(null, null, null, null, 89, 23), + Seq(36, 73, 40, null, 8, null), + Seq(24, null, null, 40, null, null)) + val rdd3 = spark.sparkContext.parallelize(data3) + val rddRow3 = rdd3.map(s => Row.fromSeq(s)) + spark.createDataFrame(rddRow3, schema3).write.saveAsTable("bf3") + + + val schema4 = new StructType().add("a4", IntegerType, nullable = true) + .add("b4", IntegerType, nullable = true) + .add("c4", IntegerType, nullable = true) + .add("d4", IntegerType, nullable = true) + .add("e4", IntegerType, nullable = true) + .add("f4", IntegerType, nullable = true) + + val data4 = Seq(Seq(67, 17, 45, 91, null, null), + Seq(98, 63, 0, 89, null, 40), + Seq(null, 76, 68, 75, 20, 19), + Seq(8, null, null, null, 78, null), + Seq(48, 62, null, null, 11, 98), + Seq(84, null, 99, 65, 66, 51), + Seq(98, null, null, null, 42, 51), + Seq(10, 3, 29, null, 68, 8), + Seq(85, 36, 41, null, 28, 71), + Seq(89, null, 94, 95, 67, 21), + Seq(44, null, 24, 33, null, 6), + Seq(null, 6, 78, 31, null, 69), + Seq(59, 2, 63, 9, 66, 20), + Seq(5, 23, 10, 86, 68, null), + Seq(null, 63, 99, 55, 9, 65), + Seq(57, 62, 68, 5, null, 0), + Seq(75, null, 15, null, 81, null), + Seq(53, null, 6, 68, 28, 13), + Seq(null, null, null, null, 89, 23), + Seq(36, 73, 40, null, 8, null), + Seq(24, null, null, 40, null, null)) + val rdd4 = spark.sparkContext.parallelize(data4) + val rddRow4 = rdd4.map(s => Row.fromSeq(s)) + spark.createDataFrame(rddRow4, schema4).write.saveAsTable("bf4") + + val schema5part = new StructType().add("a5", IntegerType, nullable = true) + .add("b5", IntegerType, nullable = true) + .add("c5", IntegerType, nullable = true) + .add("d5", IntegerType, nullable = true) + .add("e5", IntegerType, nullable = true) + .add("f5", IntegerType, nullable = true) + + val data5part = Seq(Seq(67, 17, 45, 91, null, null), + Seq(98, 63, 0, 89, null, 40), + Seq(null, 76, 68, 75, 20, 19), + Seq(8, null, null, null, 78, null), + Seq(48, 62, null, null, 11, 98), + Seq(84, null, 99, 65, 66, 51), + Seq(98, null, null, null, 42, 51), + Seq(10, 3, 29, null, 68, 8), + Seq(85, 36, 41, null, 28, 71), + Seq(89, null, 94, 95, 67, 21), + Seq(44, null, 24, 33, null, 6), + Seq(null, 6, 78, 31, null, 69), + Seq(59, 2, 63, 9, 66, 20), + Seq(5, 23, 10, 86, 68, null), + Seq(null, 63, 99, 55, 9, 65), + Seq(57, 62, 68, 5, null, 0), + Seq(75, null, 15, null, 81, null), + Seq(53, null, 6, 68, 28, 13), + Seq(null, null, null, null, 89, 23), + Seq(36, 73, 40, null, 8, null), + Seq(24, null, null, 40, null, null)) + val rdd5part = spark.sparkContext.parallelize(data5part) + val rddRow5part = rdd5part.map(s => Row.fromSeq(s)) + spark.createDataFrame(rddRow5part, schema5part).write.partitionBy("f5") + .saveAsTable("bf5part") + spark.createDataFrame(rddRow5part, schema5part).filter("a5 > 30") + .write.partitionBy("f5") + .saveAsTable("bf5filtered") + + sql("analyze table bf1 compute statistics for columns a1, b1, c1, d1, e1, f1") + sql("analyze table bf2 compute statistics for columns a2, b2, c2, d2, e2, f2") + sql("analyze table bf3 compute statistics for columns a3, b3, c3, d3, e3, f3") + sql("analyze table bf4 compute statistics for columns a4, b4, c4, d4, e4, f4") + sql("analyze table bf5part compute statistics for columns a5, b5, c5, d5, e5, f5") + sql("analyze table bf5filtered compute statistics for columns a5, b5, c5, d5, e5, f5") + } + + protected override def afterAll(): Unit = try { + sql("DROP TABLE IF EXISTS bf1") + sql("DROP TABLE IF EXISTS bf2") + sql("DROP TABLE IF EXISTS bf3") + sql("DROP TABLE IF EXISTS bf4") + sql("DROP TABLE IF EXISTS bf5part") + sql("DROP TABLE IF EXISTS bf5filtered") + } finally { + super.afterAll() + } + + def checkWithAndWithoutFeatureEnabled(query: String, testSemiJoin: Boolean, + shouldReplace: Boolean): Unit = { + var planDisabled: LogicalPlan = null + var planEnabled: LogicalPlan = null + var expectedAnswer: Array[Row] = null + + withSQLConf(SQLConf.RUNTIME_FILTER_SEMI_JOIN_REDUCTION_ENABLED.key -> "false", + SQLConf.RUNTIME_BLOOM_FILTER_ENABLED.key -> "false") { + planDisabled = sql(query).queryExecution.optimizedPlan + expectedAnswer = sql(query).collect() + } + + if (testSemiJoin) { + withSQLConf(SQLConf.RUNTIME_FILTER_SEMI_JOIN_REDUCTION_ENABLED.key -> "true", + SQLConf.RUNTIME_BLOOM_FILTER_ENABLED.key -> "false") { + planEnabled = sql(query).queryExecution.optimizedPlan + checkAnswer(sql(query), expectedAnswer) + } + if (shouldReplace) { + val normalizedEnabled = normalizePlan(normalizeExprIds(planEnabled)) + val normalizedDisabled = normalizePlan(normalizeExprIds(planDisabled)) + assert(normalizedEnabled != normalizedDisabled) + } else { + comparePlans(planDisabled, planEnabled) + } + } else { + withSQLConf(SQLConf.RUNTIME_FILTER_SEMI_JOIN_REDUCTION_ENABLED.key -> "false", + SQLConf.RUNTIME_BLOOM_FILTER_ENABLED.key -> "true") { + planEnabled = sql(query).queryExecution.optimizedPlan + checkAnswer(sql(query), expectedAnswer) + if (shouldReplace) { + assert(getNumBloomFilters(planEnabled) > getNumBloomFilters(planDisabled)) + } else { + assert(getNumBloomFilters(planEnabled) == getNumBloomFilters(planDisabled)) + } + } + } + } + + def getNumBloomFilters(plan: LogicalPlan): Integer = { + val numBloomFilterAggs = plan.collect { + case Filter(condition, _) => condition.collect { + case subquery: org.apache.spark.sql.catalyst.expressions.ScalarSubquery + => subquery.plan.collect { + case Aggregate(_, aggregateExpressions, _) => + aggregateExpressions.map { + case Alias(AggregateExpression(bfAgg : BloomFilterAggregate, _, _, _, _), + _) => + assert(bfAgg.estimatedNumItemsExpression.isInstanceOf[Literal]) + assert(bfAgg.numBitsExpression.isInstanceOf[Literal]) + 1 + }.sum + }.sum + }.sum + }.sum + val numMightContains = plan.collect { + case Filter(condition, _) => condition.collect { + case BloomFilterMightContain(_, _) => 1 + }.sum + }.sum + assert(numBloomFilterAggs == numMightContains) + numMightContains + } + + def assertRewroteSemiJoin(query: String): Unit = { + checkWithAndWithoutFeatureEnabled(query, testSemiJoin = true, shouldReplace = true) + } + + def assertDidNotRewriteSemiJoin(query: String): Unit = { + checkWithAndWithoutFeatureEnabled(query, testSemiJoin = true, shouldReplace = false) + } + + def assertRewroteWithBloomFilter(query: String): Unit = { + checkWithAndWithoutFeatureEnabled(query, testSemiJoin = false, shouldReplace = true) + } + + def assertDidNotRewriteWithBloomFilter(query: String): Unit = { + checkWithAndWithoutFeatureEnabled(query, testSemiJoin = false, shouldReplace = false) + } + + test("Runtime semi join reduction: simple") { + // Filter creation side is 3409 bytes + // Filter application side scan is 3362 bytes + withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "2000") { + assertRewroteSemiJoin("select * from bf1 join bf2 on bf1.c1 = bf2.c2 where bf2.a2 = 62") + assertDidNotRewriteSemiJoin("select * from bf1 join bf2 on bf1.c1 = bf2.c2") + } + } + + test("Runtime semi join reduction: two joins") { + withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "2000") { + assertRewroteSemiJoin("select * from bf1 join bf2 join bf3 on bf1.c1 = bf2.c2 " + + "and bf3.c3 = bf2.c2 where bf2.a2 = 5") + } + } + + test("Runtime semi join reduction: three joins") { + withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "2000") { + assertRewroteSemiJoin("select * from bf1 join bf2 join bf3 join bf4 on " + + "bf1.c1 = bf2.c2 and bf2.c2 = bf3.c3 and bf3.c3 = bf4.c4 where bf1.a1 = 5") + } + } + + test("Runtime semi join reduction: simple expressions only") { + withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "2000") { + val squared = (s: Long) => { + s * s + } + spark.udf.register("square", squared) + assertDidNotRewriteSemiJoin("select * from bf1 join bf2 on " + + "bf1.c1 = bf2.c2 where square(bf2.a2) = 62") + assertDidNotRewriteSemiJoin("select * from bf1 join bf2 on " + + "bf1.c1 = square(bf2.c2) where bf2.a2= 62") + } + } + + test("Runtime bloom filter join: simple") { + withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "2000") { + assertRewroteWithBloomFilter("select * from bf1 join bf2 on bf1.c1 = bf2.c2 " + + "where bf2.a2 = 62") + assertDidNotRewriteWithBloomFilter("select * from bf1 join bf2 on bf1.c1 = bf2.c2") + } + } + + test("Runtime bloom filter join: two filters single join") { + withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "2000") { + var planDisabled: LogicalPlan = null + var planEnabled: LogicalPlan = null + var expectedAnswer: Array[Row] = null + + val query = "select * from bf1 join bf2 on bf1.c1 = bf2.c2 and " + + "bf1.b1 = bf2.b2 where bf2.a2 = 62" + + withSQLConf(SQLConf.RUNTIME_FILTER_SEMI_JOIN_REDUCTION_ENABLED.key -> "false", + SQLConf.RUNTIME_BLOOM_FILTER_ENABLED.key -> "false") { + planDisabled = sql(query).queryExecution.optimizedPlan + expectedAnswer = sql(query).collect() + } + + withSQLConf(SQLConf.RUNTIME_FILTER_SEMI_JOIN_REDUCTION_ENABLED.key -> "false", + SQLConf.RUNTIME_BLOOM_FILTER_ENABLED.key -> "true") { + planEnabled = sql(query).queryExecution.optimizedPlan + checkAnswer(sql(query), expectedAnswer) + } + assert(getNumBloomFilters(planEnabled) == getNumBloomFilters(planDisabled) + 2) + } + } + + test("Runtime bloom filter join: test the number of filter threshold") { + withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "2000") { + var planDisabled: LogicalPlan = null + var planEnabled: LogicalPlan = null + var expectedAnswer: Array[Row] = null + + val query = "select * from bf1 join bf2 on bf1.c1 = bf2.c2 and " + + "bf1.b1 = bf2.b2 where bf2.a2 = 62" + + withSQLConf(SQLConf.RUNTIME_FILTER_SEMI_JOIN_REDUCTION_ENABLED.key -> "false", + SQLConf.RUNTIME_BLOOM_FILTER_ENABLED.key -> "false") { + planDisabled = sql(query).queryExecution.optimizedPlan + expectedAnswer = sql(query).collect() + } + + for (numFilterThreshold <- 0 to 3) { + withSQLConf(SQLConf.RUNTIME_FILTER_SEMI_JOIN_REDUCTION_ENABLED.key -> "false", + SQLConf.RUNTIME_BLOOM_FILTER_ENABLED.key -> "true", + SQLConf.RUNTIME_FILTER_NUMBER_THRESHOLD.key -> numFilterThreshold.toString) { + planEnabled = sql(query).queryExecution.optimizedPlan + checkAnswer(sql(query), expectedAnswer) + } + if (numFilterThreshold < 3) { + assert(getNumBloomFilters(planEnabled) == getNumBloomFilters(planDisabled) + + numFilterThreshold) + } else { + assert(getNumBloomFilters(planEnabled) == getNumBloomFilters(planDisabled) + 2) + } + } + } + } + + test("Runtime bloom filter join: insert one bloom filter per column") { + withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "2000") { + var planDisabled: LogicalPlan = null + var planEnabled: LogicalPlan = null + var expectedAnswer: Array[Row] = null + + val query = "select * from bf1 join bf2 on bf1.c1 = bf2.c2 and " + + "bf1.c1 = bf2.b2 where bf2.a2 = 62" + + withSQLConf(SQLConf.RUNTIME_FILTER_SEMI_JOIN_REDUCTION_ENABLED.key -> "false", + SQLConf.RUNTIME_BLOOM_FILTER_ENABLED.key -> "false") { + planDisabled = sql(query).queryExecution.optimizedPlan + expectedAnswer = sql(query).collect() + } + + withSQLConf(SQLConf.RUNTIME_FILTER_SEMI_JOIN_REDUCTION_ENABLED.key -> "false", + SQLConf.RUNTIME_BLOOM_FILTER_ENABLED.key -> "true") { + planEnabled = sql(query).queryExecution.optimizedPlan + checkAnswer(sql(query), expectedAnswer) + } + assert(getNumBloomFilters(planEnabled) == getNumBloomFilters(planDisabled) + 1) + } + } + + test("Runtime bloom filter join: do not add bloom filter if dpp filter exists " + + "on the same column") { + withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "2000") { + assertDidNotRewriteWithBloomFilter("select * from bf5part join bf2 on " + + "bf5part.f5 = bf2.c2 where bf2.a2 = 62") + } + } + + test("Runtime bloom filter join: add bloom filter if dpp filter exists on " + + "a different column") { + withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "2000") { + assertRewroteWithBloomFilter("select * from bf5part join bf2 on " + + "bf5part.c5 = bf2.c2 and bf5part.f5 = bf2.f2 where bf2.a2 = 62") + } + } + + test("Runtime bloom filter join: BF rewrite triggering threshold test") { + // Filter creation side data size is 3409 bytes. On the filter application side, an individual + // scan's byte size is 3362. + withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "3000", + SQLConf.RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD.key -> "4000" + ) { + assertRewroteWithBloomFilter("select * from bf1 join bf2 on bf1.c1 = bf2.c2 " + + "where bf2.a2 = 62") + } + withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "50", + SQLConf.RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD.key -> "50" + ) { + assertDidNotRewriteWithBloomFilter("select * from bf1 join bf2 on bf1.c1 = bf2.c2 " + + "where bf2.a2 = 62") + } + withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "5000", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "3000", + SQLConf.RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD.key -> "4000" + ) { + // Rewrite should not be triggered as the Bloom filter application side scan size is small. + assertDidNotRewriteWithBloomFilter("select * from bf1 join bf2 on bf1.c1 = bf2.c2 " + + "where bf2.a2 = 62") + } + withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "32", + SQLConf.RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD.key -> "4000") { + // Test that the max scan size rather than an individual scan size on the filter + // application side matters. `bf5filtered` has 14168 bytes and `bf2` has 3409 bytes. + withSQLConf( + SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "5000") { + assertRewroteWithBloomFilter("select * from " + + "(select * from bf5filtered union all select * from bf2) t " + + "join bf3 on t.c5 = bf3.c3 where bf3.a3 = 5") + } + withSQLConf( + SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "15000") { + assertDidNotRewriteWithBloomFilter("select * from " + + "(select * from bf5filtered union all select * from bf2) t " + + "join bf3 on t.c5 = bf3.c3 where bf3.a3 = 5") + } + } + } + + test("Runtime bloom filter join: simple expressions only") { + withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "2000") { + val squared = (s: Long) => { + s * s + } + spark.udf.register("square", squared) + assertDidNotRewriteWithBloomFilter("select * from bf1 join bf2 on " + + "bf1.c1 = bf2.c2 where square(bf2.a2) = 62" ) + assertDidNotRewriteWithBloomFilter("select * from bf1 join bf2 on " + + "bf1.c1 = square(bf2.c2) where bf2.a2 = 62" ) + } + } +} From 3a65ed9b423ab9ac6e3e36be86daf128930f3ec9 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Wed, 23 Mar 2022 11:01:18 +0900 Subject: [PATCH 033/535] [MINOR] Add @since for DSv2 API ### What changes were proposed in this pull request? Add missing `since` for DSv2 API ### Why are the changes needed? Let users know which version of the API was introduced. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? The change does not touch the code. Closes #35935 from pan3793/minor. Authored-by: Cheng Pan Signed-off-by: Hyukjin Kwon (cherry picked from commit f73d5289155a9fb9b81fe9b62324491bcf06a142) Signed-off-by: Hyukjin Kwon --- .../org/apache/spark/sql/connector/read/HasPartitionKey.java | 1 + .../sql/connector/read/streaming/AcceptsLatestSeenOffset.java | 2 ++ 2 files changed, 3 insertions(+) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/HasPartitionKey.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/HasPartitionKey.java index 777693938c4e6..a4421aad80fff 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/HasPartitionKey.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/HasPartitionKey.java @@ -40,6 +40,7 @@ * * @see org.apache.spark.sql.connector.read.SupportsReportPartitioning * @see org.apache.spark.sql.connector.read.partitioning.Partitioning + * @since 3.3.0 */ public interface HasPartitionKey extends InputPartition { /** diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/AcceptsLatestSeenOffset.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/AcceptsLatestSeenOffset.java index e8515c063cffd..8e910b0825cbc 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/AcceptsLatestSeenOffset.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/AcceptsLatestSeenOffset.java @@ -25,6 +25,8 @@ * if the interface is implemented along with DSv1 streaming sources. * * The callback method will be called once per run. + * + * @since 3.3.0 */ public interface AcceptsLatestSeenOffset extends SparkDataStream { /** From 469bdff61529f11e4ee86dc66511d5b1ce51332b Mon Sep 17 00:00:00 2001 From: mcdull-zhang Date: Wed, 16 Mar 2022 14:17:18 +0800 Subject: [PATCH 034/535] [SPARK-38542][SQL] UnsafeHashedRelation should serialize numKeys out ### What changes were proposed in this pull request? UnsafeHashedRelation should serialize numKeys out ### Why are the changes needed? One case I found was this: We turned on ReusedExchange(BroadcastExchange), but the returned UnsafeHashedRelation is missing numKeys. The reason is that the current type of TorrentBroadcast._value is SoftReference, so the UnsafeHashedRelation obtained by deserialization loses numKeys, which will lead to incorrect calculation results. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added a line of assert to an existing unit test Closes #35836 from mcdull-zhang/UnsafeHashed. Authored-by: mcdull-zhang Signed-off-by: Wenchen Fan --- .../org/apache/spark/sql/execution/joins/HashedRelation.scala | 4 +++- .../spark/sql/execution/joins/HashedRelationSuite.scala | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala index 698e7ed6fc57e..253f16e39d352 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala @@ -207,7 +207,7 @@ private[execution] class ValueRowWithKeyIndex { * A HashedRelation for UnsafeRow, which is backed BytesToBytesMap. * * It's serialized in the following format: - * [number of keys] + * [number of keys] [number of fields] * [size of key] [size of value] [key bytes] [bytes for value] */ private[joins] class UnsafeHashedRelation( @@ -364,6 +364,7 @@ private[joins] class UnsafeHashedRelation( writeInt: (Int) => Unit, writeLong: (Long) => Unit, writeBuffer: (Array[Byte], Int, Int) => Unit) : Unit = { + writeInt(numKeys) writeInt(numFields) // TODO: move these into BytesToBytesMap writeLong(binaryMap.numKeys()) @@ -397,6 +398,7 @@ private[joins] class UnsafeHashedRelation( readInt: () => Int, readLong: () => Long, readBuffer: (Array[Byte], Int, Int) => Unit): Unit = { + numKeys = readInt() numFields = readInt() resultRow = new UnsafeRow(numFields) val nKeys = readLong() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala index 2462fe31a9b66..6c87178f267c4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala @@ -93,6 +93,9 @@ class HashedRelationSuite extends SharedSparkSession { assert(hashed2.get(toUnsafe(InternalRow(10))) === null) assert(hashed2.get(unsafeData(2)).toArray === data2) + // SPARK-38542: UnsafeHashedRelation should serialize numKeys out + assert(hashed2.keys().map(_.copy()).forall(_.numFields == 1)) + val os2 = new ByteArrayOutputStream() val out2 = new ObjectOutputStream(os2) hashed2.writeExternal(out2) From fed4a65fe1b809051a111b50f4fa550bb9b2b204 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 22 Mar 2022 21:12:26 -0700 Subject: [PATCH 035/535] [SPARK-38630][K8S] K8s app name label should start and end with alphanumeric char ### What changes were proposed in this pull request? This PR aims to fix `getAppNameLabel` by removing the prefix and suffix `-` character from app name label. ### Why are the changes needed? Currently, `master/branch-3.3` has this regression. ``` io.fabric8.kubernetes.client.KubernetesClientException: Failure executing: POST at: https://kubernetes.default.svc/api/v1/namespaces/default/pods. Message: Pod "..." is invalid: metadata.labels: Invalid value: "...-tpcds-1000g-parquet-": a valid label must be an empty string or consist of alphanumeric characters, '-', '_' or '.', and must start and end with an alphanumeric character ``` ### Does this PR introduce _any_ user-facing change? This is a regression at Apache Spark 3.3.0. ### How was this patch tested? Pass the CI with the newly added test case. Closes #35943 from dongjoon-hyun/SPARK-38630. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 43487cb2b0f01cbb2c235cc52fcb515c5448d4a8) Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/deploy/k8s/KubernetesConf.scala | 5 +++-- .../org/apache/spark/deploy/k8s/KubernetesConfSuite.scala | 6 ++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala index 118f4e5a61d3f..8a985c31b171c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala @@ -261,7 +261,8 @@ private[spark] object KubernetesConf { def getAppNameLabel(appName: String): String = { // According to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels, // must be 63 characters or less to follow the DNS label standard, so take the 63 characters - // of the appName name as the label. + // of the appName name as the label. In addition, label value must start and end with + // an alphanumeric character. StringUtils.abbreviate( s"$appName" .trim @@ -270,7 +271,7 @@ private[spark] object KubernetesConf { .replaceAll("-+", "-"), "", KUBERNETES_DNSNAME_MAX_LENGTH - ) + ).stripPrefix("-").stripSuffix("-") } /** diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala index eecaff262bf66..d33d79249104c 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala @@ -244,4 +244,10 @@ class KubernetesConfSuite extends SparkFunSuite { assert(KubernetesConf.getAppNameLabel("a" * 64) === "a" * 63) assert(KubernetesConf.getAppNameLabel("a" * 253) === "a" * 63) } + + test("SPARK-38630: K8s label value should start and end with alphanumeric") { + assert(KubernetesConf.getAppNameLabel("-hello-") === "hello") + assert(KubernetesConf.getAppNameLabel("a" * 62 + "-aaa") === "a" * 62) + assert(KubernetesConf.getAppNameLabel("-" + "a" * 63) === "a" * 62) + } } From 27f57c9c962344028af40807d8c2654e5618d978 Mon Sep 17 00:00:00 2001 From: huangmaoyang2 Date: Wed, 23 Mar 2022 15:06:58 +0900 Subject: [PATCH 036/535] [SPARK-38629][SQL][DOCS] Two links beneath Spark SQL Guide/Data Sources do not work properly SPARK-38629 Two links beneath Spark SQL Guide/Data Sources do not work properly ### What changes were proposed in this pull request? Two typos have been corrected in sql-data-sources.md under Spark's docs directory. ### Why are the changes needed? Two links under latest documentation [Spark SQL Guide/Data Sources](https://spark.apache.org/docs/latest/sql-data-sources.html) do not work properly, when click 'Ignore Corrupt File' or 'Ignore Missing Files', it does redirect me to the right page, but does not scroll to the right section. This issue actually has been there since v3.0.0. ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? I've built the documentation locally and tested my change. Closes #35944 from morvenhuang/SPARK-38629. Authored-by: huangmaoyang2 Signed-off-by: Hyukjin Kwon (cherry picked from commit ac9ae98011424a030a6ef264caf077b8873e251d) Signed-off-by: Hyukjin Kwon --- docs/sql-data-sources.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sql-data-sources.md b/docs/sql-data-sources.md index 72a9f90766675..8c57550ffc1c6 100644 --- a/docs/sql-data-sources.md +++ b/docs/sql-data-sources.md @@ -34,8 +34,8 @@ goes into specific options that are available for the built-in data sources. * [Saving to Persistent Tables](sql-data-sources-load-save-functions.html#saving-to-persistent-tables) * [Bucketing, Sorting and Partitioning](sql-data-sources-load-save-functions.html#bucketing-sorting-and-partitioning) * [Generic File Source Options](sql-data-sources-generic-options.html) - * [Ignore Corrupt Files](sql-data-sources-generic-options.html#ignore-corrupt-iles) - * [Ignore Missing Files](sql-data-sources-generic-options.html#ignore-missing-iles) + * [Ignore Corrupt Files](sql-data-sources-generic-options.html#ignore-corrupt-files) + * [Ignore Missing Files](sql-data-sources-generic-options.html#ignore-missing-files) * [Path Global Filter](sql-data-sources-generic-options.html#path-global-filter) * [Recursive File Lookup](sql-data-sources-generic-options.html#recursive-file-lookup) * [Parquet Files](sql-data-sources-parquet.html) From 6e5f1811b180868303ea0ee2f44309c3a5ef914c Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Wed, 23 Mar 2022 15:22:48 +0800 Subject: [PATCH 037/535] [SPARK-38533][SQL] DS V2 aggregate push-down supports project with alias ### What changes were proposed in this pull request? Currently, Spark DS V2 aggregate push-down doesn't supports project with alias. Refer https://github.com/apache/spark/blob/c91c2e9afec0d5d5bbbd2e155057fe409c5bb928/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala#L96 This PR let it works good with alias. **The first example:** the origin plan show below: ``` Aggregate [DEPT#0], [DEPT#0, sum(mySalary#8) AS total#14] +- Project [DEPT#0, SALARY#2 AS mySalary#8] +- ScanBuilderHolder [DEPT#0, NAME#1, SALARY#2, BONUS#3], RelationV2[DEPT#0, NAME#1, SALARY#2, BONUS#3] test.employee, JDBCScanBuilder(org.apache.spark.sql.test.TestSparkSession77978658,StructType(StructField(DEPT,IntegerType,true),StructField(NAME,StringType,true),StructField(SALARY,DecimalType(20,2),true),StructField(BONUS,DoubleType,true)),org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions5f8da82) ``` If we can complete push down the aggregate, then the plan will be: ``` Project [DEPT#0, SUM(SALARY)#18 AS sum(SALARY#2)#13 AS total#14] +- RelationV2[DEPT#0, SUM(SALARY)#18] test.employee ``` If we can partial push down the aggregate, then the plan will be: ``` Aggregate [DEPT#0], [DEPT#0, sum(cast(SUM(SALARY)#18 as decimal(20,2))) AS total#14] +- RelationV2[DEPT#0, SUM(SALARY)#18] test.employee ``` **The second example:** the origin plan show below: ``` Aggregate [myDept#33], [myDept#33, sum(mySalary#34) AS total#40] +- Project [DEPT#25 AS myDept#33, SALARY#27 AS mySalary#34] +- ScanBuilderHolder [DEPT#25, NAME#26, SALARY#27, BONUS#28], RelationV2[DEPT#25, NAME#26, SALARY#27, BONUS#28] test.employee, JDBCScanBuilder(org.apache.spark.sql.test.TestSparkSession25c4f621,StructType(StructField(DEPT,IntegerType,true),StructField(NAME,StringType,true),StructField(SALARY,DecimalType(20,2),true),StructField(BONUS,DoubleType,true)),org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions345d641e) ``` If we can complete push down the aggregate, then the plan will be: ``` Project [DEPT#25 AS myDept#33, SUM(SALARY)#44 AS sum(SALARY#27)#39 AS total#40] +- RelationV2[DEPT#25, SUM(SALARY)#44] test.employee ``` If we can partial push down the aggregate, then the plan will be: ``` Aggregate [myDept#33], [DEPT#25 AS myDept#33, sum(cast(SUM(SALARY)#56 as decimal(20,2))) AS total#52] +- RelationV2[DEPT#25, SUM(SALARY)#56] test.employee ``` ### Why are the changes needed? Alias is more useful. ### Does this PR introduce _any_ user-facing change? 'Yes'. Users could see DS V2 aggregate push-down supports project with alias. ### How was this patch tested? New tests. Closes #35932 from beliefer/SPARK-38533_new. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit f327dade9cdb466574b4698c2b9da4bdaac300e0) Signed-off-by: Wenchen Fan --- .../v2/V2ScanRelationPushDown.scala | 22 +++-- .../FileSourceAggregatePushDownSuite.scala | 4 +- .../apache/spark/sql/jdbc/JDBCV2Suite.scala | 86 +++++++++++++++++-- 3 files changed, 97 insertions(+), 15 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala index 44cdff10aca45..c699e92cf0190 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala @@ -19,9 +19,10 @@ package org.apache.spark.sql.execution.datasources.v2 import scala.collection.mutable -import org.apache.spark.sql.catalyst.expressions.{Alias, And, Attribute, AttributeReference, Cast, Divide, DivideDTInterval, DivideYMInterval, EqualTo, Expression, If, IntegerLiteral, Literal, NamedExpression, PredicateHelper, ProjectionOverSchema, SubqueryExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, AliasHelper, And, Attribute, AttributeReference, Cast, Divide, DivideDTInterval, DivideYMInterval, EqualTo, Expression, If, IntegerLiteral, Literal, NamedExpression, PredicateHelper, ProjectionOverSchema, SubqueryExpression} import org.apache.spark.sql.catalyst.expressions.aggregate import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression +import org.apache.spark.sql.catalyst.optimizer.CollapseProject import org.apache.spark.sql.catalyst.planning.ScanOperation import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, LeafNode, Limit, LocalLimit, LogicalPlan, Project, Sample, Sort} import org.apache.spark.sql.catalyst.rules.Rule @@ -34,7 +35,7 @@ import org.apache.spark.sql.sources import org.apache.spark.sql.types.{DataType, DayTimeIntervalType, LongType, StructType, YearMonthIntervalType} import org.apache.spark.sql.util.SchemaUtils._ -object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper { +object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper with AliasHelper { import DataSourceV2Implicits._ def apply(plan: LogicalPlan): LogicalPlan = { @@ -95,22 +96,27 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper { case aggNode @ Aggregate(groupingExpressions, resultExpressions, child) => child match { case ScanOperation(project, filters, sHolder: ScanBuilderHolder) - if filters.isEmpty && project.forall(_.isInstanceOf[AttributeReference]) => + if filters.isEmpty && CollapseProject.canCollapseExpressions( + resultExpressions, project, alwaysInline = true) => sHolder.builder match { case r: SupportsPushDownAggregates => + val aliasMap = getAliasMap(project) + val actualResultExprs = resultExpressions.map(replaceAliasButKeepName(_, aliasMap)) + val actualGroupExprs = groupingExpressions.map(replaceAlias(_, aliasMap)) + val aggExprToOutputOrdinal = mutable.HashMap.empty[Expression, Int] - val aggregates = collectAggregates(resultExpressions, aggExprToOutputOrdinal) + val aggregates = collectAggregates(actualResultExprs, aggExprToOutputOrdinal) val normalizedAggregates = DataSourceStrategy.normalizeExprs( aggregates, sHolder.relation.output).asInstanceOf[Seq[AggregateExpression]] val normalizedGroupingExpressions = DataSourceStrategy.normalizeExprs( - groupingExpressions, sHolder.relation.output) + actualGroupExprs, sHolder.relation.output) val translatedAggregates = DataSourceStrategy.translateAggregation( normalizedAggregates, normalizedGroupingExpressions) val (finalResultExpressions, finalAggregates, finalTranslatedAggregates) = { if (translatedAggregates.isEmpty || r.supportCompletePushDown(translatedAggregates.get) || translatedAggregates.get.aggregateExpressions().forall(!_.isInstanceOf[Avg])) { - (resultExpressions, aggregates, translatedAggregates) + (actualResultExprs, aggregates, translatedAggregates) } else { // scalastyle:off // The data source doesn't support the complete push-down of this aggregation. @@ -127,7 +133,7 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper { // Aggregate [c2#10],[sum(c1#9)/count(c1#9) AS avg(c1)#19] // +- ScanOperation[...] // scalastyle:on - val newResultExpressions = resultExpressions.map { expr => + val newResultExpressions = actualResultExprs.map { expr => expr.transform { case AggregateExpression(avg: aggregate.Average, _, isDistinct, _, _) => val sum = aggregate.Sum(avg.child).toAggregateExpression(isDistinct) @@ -206,7 +212,7 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper { val scanRelation = DataSourceV2ScanRelation(sHolder.relation, wrappedScan, output) if (r.supportCompletePushDown(pushedAggregates.get)) { - val projectExpressions = resultExpressions.map { expr => + val projectExpressions = finalResultExpressions.map { expr => // TODO At present, only push down group by attribute is supported. // In future, more attribute conversion is extended here. e.g. GetStructField expr.transform { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceAggregatePushDownSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceAggregatePushDownSuite.scala index 47740c5274616..26dfe1a50971f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceAggregatePushDownSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceAggregatePushDownSuite.scala @@ -184,7 +184,7 @@ trait FileSourceAggregatePushDownSuite } } - test("aggregate over alias not push down") { + test("aggregate over alias push down") { val data = Seq((-2, "abc", 2), (3, "def", 4), (6, "ghi", 2), (0, null, 19), (9, "mno", 7), (2, null, 6)) withDataSourceTable(data, "t") { @@ -194,7 +194,7 @@ trait FileSourceAggregatePushDownSuite query.queryExecution.optimizedPlan.collect { case _: DataSourceV2ScanRelation => val expected_plan_fragment = - "PushedAggregation: []" // aggregate alias not pushed down + "PushedAggregation: [MIN(_1)]" checkKeywordsExistsInExplain(query, expected_plan_fragment) } checkAnswer(query, Seq(Row(-2))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index d6f098f1d5189..31fdb022b625f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -974,15 +974,19 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkAnswer(df, Seq(Row(1d), Row(1d), Row(null))) } - test("scan with aggregate push-down: aggregate over alias NOT push down") { + test("scan with aggregate push-down: aggregate over alias push down") { val cols = Seq("a", "b", "c", "d", "e") val df1 = sql("select * from h2.test.employee").toDF(cols: _*) val df2 = df1.groupBy().sum("c") - checkAggregateRemoved(df2, false) + checkAggregateRemoved(df2) df2.queryExecution.optimizedPlan.collect { - case relation: DataSourceV2ScanRelation => relation.scan match { - case v1: V1ScanWrapper => - assert(v1.pushedDownOperators.aggregation.isEmpty) + case relation: DataSourceV2ScanRelation => + val expectedPlanFragment = + "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByColumns: []" + checkKeywordsExistsInExplain(df2, expectedPlanFragment) + relation.scan match { + case v1: V1ScanWrapper => + assert(v1.pushedDownOperators.aggregation.nonEmpty) } } checkAnswer(df2, Seq(Row(53000.00))) @@ -1228,4 +1232,76 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel |ON h2.test.view1.`|col1` = h2.test.view2.`|col1`""".stripMargin) checkAnswer(df, Seq.empty[Row]) } + + test("scan with aggregate push-down: complete push-down aggregate with alias") { + val df = spark.table("h2.test.employee") + .select($"DEPT", $"SALARY".as("mySalary")) + .groupBy($"DEPT") + .agg(sum($"mySalary").as("total")) + .filter($"total" > 1000) + checkAggregateRemoved(df) + df.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + val expectedPlanFragment = + "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByColumns: [DEPT]" + checkKeywordsExistsInExplain(df, expectedPlanFragment) + } + checkAnswer(df, Seq(Row(1, 19000.00), Row(2, 22000.00), Row(6, 12000.00))) + + val df2 = spark.table("h2.test.employee") + .select($"DEPT".as("myDept"), $"SALARY".as("mySalary")) + .groupBy($"myDept") + .agg(sum($"mySalary").as("total")) + .filter($"total" > 1000) + checkAggregateRemoved(df2) + df2.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + val expectedPlanFragment = + "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByColumns: [DEPT]" + checkKeywordsExistsInExplain(df2, expectedPlanFragment) + } + checkAnswer(df2, Seq(Row(1, 19000.00), Row(2, 22000.00), Row(6, 12000.00))) + } + + test("scan with aggregate push-down: partial push-down aggregate with alias") { + val df = spark.read + .option("partitionColumn", "DEPT") + .option("lowerBound", "0") + .option("upperBound", "2") + .option("numPartitions", "2") + .table("h2.test.employee") + .select($"NAME", $"SALARY".as("mySalary")) + .groupBy($"NAME") + .agg(sum($"mySalary").as("total")) + .filter($"total" > 1000) + checkAggregateRemoved(df, false) + df.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + val expectedPlanFragment = + "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByColumns: [NAME]" + checkKeywordsExistsInExplain(df, expectedPlanFragment) + } + checkAnswer(df, Seq(Row("alex", 12000.00), Row("amy", 10000.00), + Row("cathy", 9000.00), Row("david", 10000.00), Row("jen", 12000.00))) + + val df2 = spark.read + .option("partitionColumn", "DEPT") + .option("lowerBound", "0") + .option("upperBound", "2") + .option("numPartitions", "2") + .table("h2.test.employee") + .select($"NAME".as("myName"), $"SALARY".as("mySalary")) + .groupBy($"myName") + .agg(sum($"mySalary").as("total")) + .filter($"total" > 1000) + checkAggregateRemoved(df2, false) + df2.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + val expectedPlanFragment = + "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByColumns: [NAME]" + checkKeywordsExistsInExplain(df2, expectedPlanFragment) + } + checkAnswer(df2, Seq(Row("alex", 12000.00), Row("amy", 10000.00), + Row("cathy", 9000.00), Row("david", 10000.00), Row("jen", 12000.00))) + } } From ecc24c13263f5e1a95c34b4bc58644a200dcf7cc Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Wed, 23 Mar 2022 17:25:47 +0800 Subject: [PATCH 038/535] [SPARK-38587][SQL] Validating new location for rename command should use formatted names ### What changes were proposed in this pull request? Fix bug for `getDatabase` with a unformatted database name. ```java [info] - ALTER TABLE .. RENAME using V1 catalog V1 command: newName *** FAILED *** (61 milliseconds) [info] org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException: Database 'CaseUpperCaseLower' not found [info] at org.apache.spark.sql.catalyst.catalog.ExternalCatalog.requireDbExists(ExternalCatalog.scala:42) [info] at org.apache.spark.sql.catalyst.catalog.ExternalCatalog.requireDbExists$(ExternalCatalog.scala:40) [info] at org.apache.spark.sql.catalyst.catalog.InMemoryCatalog.requireDbExists(InMemoryCatalog.scala:47) [info] at org.apache.spark.sql.catalyst.catalog.InMemoryCatalog.getDatabase(InMemoryCatalog.scala:171) [info] at org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.getDatabase(ExternalCatalogWithListener.scala:65) [info] at org.apache.spark.sql.catalyst.catalog.SessionCatalog.validateNewLocationOfRename(SessionCatalog.scala:1863) [info] at org.apache.spark.sql.catalyst.catalog.SessionCatalog.renameTable(SessionCatalog.scala:739) [info] at org.apache.spark.sql.execution.command.AlterTableRenameCommand.run(tables.scala:209 ``` ### Why are the changes needed? bugfix ### Does this PR introduce _any_ user-facing change? no, bugfix ### How was this patch tested? added new tests Closes #35895 from yaooqinn/SPARK-38587. Authored-by: Kent Yao Signed-off-by: Kent Yao (cherry picked from commit a3776e01839d7639e534bd345bcfb4bc63dd2a65) Signed-off-by: Kent Yao --- .../spark/sql/catalyst/catalog/SessionCatalog.scala | 10 ++++++---- .../execution/command/AlterTableRenameSuiteBase.scala | 8 ++++++++ 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 3727bb3c101cc..5872f2ab925dd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -734,10 +734,9 @@ class SessionCatalog( } else { requireDbExists(db) if (oldName.database.isDefined || !tempViews.contains(oldTableName)) { - requireTableExists(TableIdentifier(oldTableName, Some(db))) - requireTableNotExists(TableIdentifier(newTableName, Some(db))) validateName(newTableName) - validateNewLocationOfRename(oldName, newName) + validateNewLocationOfRename( + TableIdentifier(oldTableName, Some(db)), TableIdentifier(newTableName, Some(db))) externalCatalog.renameTable(db, oldTableName, newTableName) } else { if (newName.database.isDefined) { @@ -1856,10 +1855,13 @@ class SessionCatalog( private def validateNewLocationOfRename( oldName: TableIdentifier, newName: TableIdentifier): Unit = { + requireTableExists(oldName) + requireTableNotExists(newName) val oldTable = getTableMetadata(oldName) if (oldTable.tableType == CatalogTableType.MANAGED) { + assert(oldName.database.nonEmpty) val databaseLocation = - externalCatalog.getDatabase(oldName.database.getOrElse(currentDb)).locationUri + externalCatalog.getDatabase(oldName.database.get).locationUri val newTableLocation = new Path(new Path(databaseLocation), formatTableName(newName.table)) val fs = newTableLocation.getFileSystem(hadoopConf) if (fs.exists(newTableLocation)) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala index 1803ec046930b..2942d61f7fb7f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala @@ -136,4 +136,12 @@ trait AlterTableRenameSuiteBase extends QueryTest with DDLCommandTestUtils { checkAnswer(spark.table(dst), Row(1, 2)) } } + + test("SPARK-38587: use formatted names") { + withNamespaceAndTable("CaseUpperCaseLower", "CaseUpperCaseLower") { t => + sql(s"CREATE TABLE ${t}_Old (i int) $defaultUsing") + sql(s"ALTER TABLE ${t}_Old RENAME TO CaseUpperCaseLower.CaseUpperCaseLower") + assert(spark.table(t).isEmpty) + } + } } From 90b01fc12b2eb24d5d864fc89883889e36a194ab Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Wed, 23 Mar 2022 19:43:44 +0900 Subject: [PATCH 039/535] [SPARK-38628][SQL] Complete the copy method in subclasses of InternalRow, ArrayData, and MapData to safely copy their instances ### What changes were proposed in this pull request? Completes the `copy` method in subclasses of `InternalRow`, `ArrayData`, and `MapData` to safely copy their instances. ### Why are the changes needed? Some subclasses of `InternalRow`, `ArrayData`, and `MapData` missing support for `StructType`, `ArrayType`, and `MapType` in their copy method. We should complete them to safely copy their instances and prevent potential issues. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #35942 from ueshin/issues/SPARK-38628/copy. Authored-by: Takuya UESHIN Signed-off-by: Hyukjin Kwon (cherry picked from commit 861e8b4a8ba784da1a69bd6522a0a7fdac5d1091) Signed-off-by: Hyukjin Kwon --- .../java/org/apache/spark/sql/vectorized/ColumnarArray.java | 2 +- .../org/apache/spark/sql/vectorized/ColumnarBatchRow.java | 6 ++++++ .../java/org/apache/spark/sql/vectorized/ColumnarRow.java | 6 ++++++ .../spark/sql/execution/vectorized/MutableColumnarRow.java | 6 ++++++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java index 2fb6b3fc6648f..bd7c3d7c0fd49 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java @@ -68,7 +68,7 @@ public ArrayData copy() { } else if (dt instanceof DoubleType) { return UnsafeArrayData.fromPrimitiveArray(toDoubleArray()); } else { - return new GenericArrayData(toObjectArray(dt)); + return new GenericArrayData(toObjectArray(dt)).copy(); // ensure the elements are copied. } } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java index 8c32d5c54cc01..7f841266008f8 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java @@ -71,6 +71,12 @@ public InternalRow copy() { row.setInt(i, getInt(i)); } else if (dt instanceof TimestampType) { row.setLong(i, getLong(i)); + } else if (dt instanceof StructType) { + row.update(i, getStruct(i, ((StructType) dt).fields().length).copy()); + } else if (dt instanceof ArrayType) { + row.update(i, getArray(i).copy()); + } else if (dt instanceof MapType) { + row.update(i, getMap(i).copy()); } else { throw new RuntimeException("Not implemented. " + dt); } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java index da4b242be20ee..fd4e8ff5cab53 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java @@ -80,6 +80,12 @@ public InternalRow copy() { row.setInt(i, getInt(i)); } else if (dt instanceof TimestampType) { row.setLong(i, getLong(i)); + } else if (dt instanceof StructType) { + row.update(i, getStruct(i, ((StructType) dt).fields().length).copy()); + } else if (dt instanceof ArrayType) { + row.update(i, getArray(i).copy()); + } else if (dt instanceof MapType) { + row.update(i, getMap(i).copy()); } else { throw new RuntimeException("Not implemented. " + dt); } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java index f4fdf50692c11..64568f18f6858 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/MutableColumnarRow.java @@ -80,6 +80,12 @@ public InternalRow copy() { row.setInt(i, getInt(i)); } else if (dt instanceof TimestampType) { row.setLong(i, getLong(i)); + } else if (dt instanceof StructType) { + row.update(i, getStruct(i, ((StructType) dt).fields().length).copy()); + } else if (dt instanceof ArrayType) { + row.update(i, getArray(i).copy()); + } else if (dt instanceof MapType) { + row.update(i, getMap(i).copy()); } else { throw new RuntimeException("Not implemented. " + dt); } From 737077af04c1a62a99bfd5dba731174dd29f97f4 Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Wed, 23 Mar 2022 21:40:44 +0800 Subject: [PATCH 040/535] [SPARK-37483][SQL][FOLLOWUP] Rename `pushedTopN` to `PushedTopN` and improve JDBCV2Suite ### What changes were proposed in this pull request? This PR fix three issues. **First**, create method `checkPushedInfo` and `checkSortRemoved` to reuse code. **Second**, remove method `checkPushedLimit`, because `checkPushedInfo` can cover it. **Third**, rename `pushedTopN` to `PushedTopN`, so as consistent with other pushed information. ### Why are the changes needed? Reuse code and let pushed information more correctly. ### Does this PR introduce _any_ user-facing change? 'No'. New feature and improve the tests. ### How was this patch tested? Adjust existing tests. Closes #35921 from beliefer/SPARK-37483_followup. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit 4fe55c522d7bc34487f21d0e69fc7c230d61a3bf) Signed-off-by: Wenchen Fan --- .../sql/execution/DataSourceScanExec.scala | 2 +- .../apache/spark/sql/jdbc/JDBCV2Suite.scala | 517 +++++------------- 2 files changed, 132 insertions(+), 387 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala index e6de7d0e763b9..5067cd7fa3ca1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala @@ -148,7 +148,7 @@ case class RowDataSourceScanExec( val pushedTopN = s"ORDER BY ${seqToString(pushedDownOperators.sortValues.map(_.describe()))}" + s" LIMIT ${pushedDownOperators.limit.get}" - Some("pushedTopN" -> pushedTopN) + Some("PushedTopN" -> pushedTopN) } else { pushedDownOperators.limit.map(value => "PushedLimit" -> s"LIMIT $value") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index 31fdb022b625f..e7e9174463bbf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -24,7 +24,6 @@ import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.sql.{DataFrame, ExplainSuiteHelper, QueryTest, Row} import org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Sort} -import org.apache.spark.sql.connector.expressions.{FieldReference, NullOrdering, SortDirection, SortValue} import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanRelation, V1ScanWrapper} import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog import org.apache.spark.sql.functions.{avg, count, count_distinct, lit, not, sum, udf, when} @@ -110,13 +109,20 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkAnswer(sql("SELECT name, id FROM h2.test.people"), Seq(Row("fred", 1), Row("mary", 2))) } + private def checkPushedInfo(df: DataFrame, expectedPlanFragment: String): Unit = { + df.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + checkKeywordsExistsInExplain(df, expectedPlanFragment) + } + } + // TABLESAMPLE ({integer_expression | decimal_expression} PERCENT) and // TABLESAMPLE (BUCKET integer_expression OUT OF integer_expression) // are tested in JDBC dialect tests because TABLESAMPLE is not supported by all the DBMS test("TABLESAMPLE (integer_expression ROWS) is the same as LIMIT") { val df = sql("SELECT NAME FROM h2.test.employee TABLESAMPLE (3 ROWS)") checkSchemaNames(df, Seq("NAME")) - checkPushedLimit(df, Some(3)) + checkPushedInfo(df, "PushedFilters: [], PushedLimit: LIMIT 3, ") checkAnswer(df, Seq(Row("amy"), Row("alex"), Row("cathy"))) } @@ -130,7 +136,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel test("simple scan with LIMIT") { val df1 = spark.read.table("h2.test.employee") .where($"dept" === 1).limit(1) - checkPushedLimit(df1, Some(1)) + checkPushedInfo(df1, + "PushedFilters: [DEPT IS NOT NULL, DEPT = 1], PushedLimit: LIMIT 1, ") checkAnswer(df1, Seq(Row(1, "amy", 10000.00, 1000.0, true))) val df2 = spark.read @@ -141,19 +148,22 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .table("h2.test.employee") .filter($"dept" > 1) .limit(1) - checkPushedLimit(df2, Some(1)) + checkPushedInfo(df2, + "PushedFilters: [DEPT IS NOT NULL, DEPT > 1], PushedLimit: LIMIT 1, ") checkAnswer(df2, Seq(Row(2, "alex", 12000.00, 1200.0, false))) val df3 = sql("SELECT name FROM h2.test.employee WHERE dept > 1 LIMIT 1") checkSchemaNames(df3, Seq("NAME")) - checkPushedLimit(df3, Some(1)) + checkPushedInfo(df3, + "PushedFilters: [DEPT IS NOT NULL, DEPT > 1], PushedLimit: LIMIT 1, ") checkAnswer(df3, Seq(Row("alex"))) val df4 = spark.read .table("h2.test.employee") .groupBy("DEPT").sum("SALARY") .limit(1) - checkPushedLimit(df4, None) + checkPushedInfo(df4, + "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByColumns: [DEPT], ") checkAnswer(df4, Seq(Row(1, 19000.00))) val name = udf { (x: String) => x.matches("cat|dav|amy") } @@ -164,24 +174,18 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .filter(name($"shortName")) .limit(1) // LIMIT is pushed down only if all the filters are pushed down - checkPushedLimit(df5, None) + checkPushedInfo(df5, "PushedFilters: [], ") checkAnswer(df5, Seq(Row(10000.00, 1000.0, "amy"))) } - private def checkPushedLimit(df: DataFrame, limit: Option[Int] = None, - sortValues: Seq[SortValue] = Nil): Unit = { - df.queryExecution.optimizedPlan.collect { - case relation: DataSourceV2ScanRelation => relation.scan match { - case v1: V1ScanWrapper => - assert(v1.pushedDownOperators.limit === limit) - assert(v1.pushedDownOperators.sortValues === sortValues) - } + private def checkSortRemoved(df: DataFrame, removed: Boolean = true): Unit = { + val sorts = df.queryExecution.optimizedPlan.collect { + case s: Sort => s } - if (sortValues.nonEmpty) { - val sorts = df.queryExecution.optimizedPlan.collect { - case s: Sort => s - } + if (removed) { assert(sorts.isEmpty) + } else { + assert(sorts.nonEmpty) } } @@ -190,12 +194,16 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .table("h2.test.employee") .sort("salary") .limit(1) - checkPushedLimit(df1, Some(1), createSortValues()) + checkSortRemoved(df1) + checkPushedInfo(df1, + "PushedFilters: [], PushedTopN: ORDER BY [salary ASC NULLS FIRST] LIMIT 1, ") checkAnswer(df1, Seq(Row(1, "cathy", 9000.00, 1200.0, false))) val df2 = spark.read.table("h2.test.employee") .where($"dept" === 1).orderBy($"salary").limit(1) - checkPushedLimit(df2, Some(1), createSortValues()) + checkSortRemoved(df2) + checkPushedInfo(df2, "PushedFilters: [DEPT IS NOT NULL, DEPT = 1], " + + "PushedTopN: ORDER BY [salary ASC NULLS FIRST] LIMIT 1, ") checkAnswer(df2, Seq(Row(1, "cathy", 9000.00, 1200.0, false))) val df3 = spark.read @@ -207,19 +215,23 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .filter($"dept" > 1) .orderBy($"salary".desc) .limit(1) - checkPushedLimit( - df3, Some(1), createSortValues(SortDirection.DESCENDING, NullOrdering.NULLS_LAST)) + checkSortRemoved(df3) + checkPushedInfo(df3, "PushedFilters: [DEPT IS NOT NULL, DEPT > 1], " + + "PushedTopN: ORDER BY [salary DESC NULLS LAST] LIMIT 1, ") checkAnswer(df3, Seq(Row(2, "alex", 12000.00, 1200.0, false))) val df4 = sql("SELECT name FROM h2.test.employee WHERE dept > 1 ORDER BY salary NULLS LAST LIMIT 1") checkSchemaNames(df4, Seq("NAME")) - checkPushedLimit(df4, Some(1), createSortValues(nullOrdering = NullOrdering.NULLS_LAST)) + checkSortRemoved(df4) + checkPushedInfo(df4, "PushedFilters: [DEPT IS NOT NULL, DEPT > 1], " + + "PushedTopN: ORDER BY [salary ASC NULLS LAST] LIMIT 1, ") checkAnswer(df4, Seq(Row("david"))) val df5 = spark.read.table("h2.test.employee") .where($"dept" === 1).orderBy($"salary") - checkPushedLimit(df5, None) + checkSortRemoved(df5, false) + checkPushedInfo(df5, "PushedFilters: [DEPT IS NOT NULL, DEPT = 1], ") checkAnswer(df5, Seq(Row(1, "cathy", 9000.00, 1200.0, false), Row(1, "amy", 10000.00, 1000.0, true))) @@ -228,7 +240,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .groupBy("DEPT").sum("SALARY") .orderBy("DEPT") .limit(1) - checkPushedLimit(df6) + checkSortRemoved(df6, false) + checkPushedInfo(df6, "PushedAggregates: [SUM(SALARY)]," + + " PushedFilters: [], PushedGroupByColumns: [DEPT], ") checkAnswer(df6, Seq(Row(1, 19000.00))) val name = udf { (x: String) => x.matches("cat|dav|amy") } @@ -240,145 +254,69 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .sort($"SALARY".desc) .limit(1) // LIMIT is pushed down only if all the filters are pushed down - checkPushedLimit(df7) + checkSortRemoved(df7, false) + checkPushedInfo(df7, "PushedFilters: [], ") checkAnswer(df7, Seq(Row(10000.00, 1000.0, "amy"))) val df8 = spark.read .table("h2.test.employee") .sort(sub($"NAME")) .limit(1) - checkPushedLimit(df8) + checkSortRemoved(df8, false) + checkPushedInfo(df8, "PushedFilters: [], ") checkAnswer(df8, Seq(Row(2, "alex", 12000.00, 1200.0, false))) } - private def createSortValues( - sortDirection: SortDirection = SortDirection.ASCENDING, - nullOrdering: NullOrdering = NullOrdering.NULLS_FIRST): Seq[SortValue] = { - Seq(SortValue(FieldReference("salary"), sortDirection, nullOrdering)) - } - test("scan with filter push-down") { val df = spark.table("h2.test.people").filter($"id" > 1) - checkFiltersRemoved(df) - - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedFilters: [ID IS NOT NULL, ID > 1]" - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } - + checkPushedInfo(df, "PushedFilters: [ID IS NOT NULL, ID > 1], ") checkAnswer(df, Row("mary", 2)) val df2 = spark.table("h2.test.employee").filter($"name".isin("amy", "cathy")) - checkFiltersRemoved(df2) - - df2.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedFilters: [NAME IN ('amy', 'cathy')]" - checkKeywordsExistsInExplain(df2, expected_plan_fragment) - } - + checkPushedInfo(df2, "PushedFilters: [NAME IN ('amy', 'cathy')]") checkAnswer(df2, Seq(Row(1, "amy", 10000, 1000, true), Row(1, "cathy", 9000, 1200, false))) val df3 = spark.table("h2.test.employee").filter($"name".startsWith("a")) - checkFiltersRemoved(df3) - - df3.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedFilters: [NAME IS NOT NULL, NAME LIKE 'a%']" - checkKeywordsExistsInExplain(df3, expected_plan_fragment) - } - + checkPushedInfo(df3, "PushedFilters: [NAME IS NOT NULL, NAME LIKE 'a%']") checkAnswer(df3, Seq(Row(1, "amy", 10000, 1000, true), Row(2, "alex", 12000, 1200, false))) val df4 = spark.table("h2.test.employee").filter($"is_manager") - checkFiltersRemoved(df4) - - df4.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedFilters: [IS_MANAGER IS NOT NULL, IS_MANAGER = true]" - checkKeywordsExistsInExplain(df4, expected_plan_fragment) - } - + checkPushedInfo(df4, "PushedFilters: [IS_MANAGER IS NOT NULL, IS_MANAGER = true]") checkAnswer(df4, Seq(Row(1, "amy", 10000, 1000, true), Row(2, "david", 10000, 1300, true), Row(6, "jen", 12000, 1200, true))) val df5 = spark.table("h2.test.employee").filter($"is_manager".and($"salary" > 10000)) - checkFiltersRemoved(df5) - - df5.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedFilters: [IS_MANAGER IS NOT NULL, SALARY IS NOT NULL, " + - "IS_MANAGER = true, SALARY > 10000.00]" - checkKeywordsExistsInExplain(df5, expected_plan_fragment) - } - + checkPushedInfo(df5, "PushedFilters: [IS_MANAGER IS NOT NULL, SALARY IS NOT NULL, " + + "IS_MANAGER = true, SALARY > 10000.00]") checkAnswer(df5, Seq(Row(6, "jen", 12000, 1200, true))) val df6 = spark.table("h2.test.employee").filter($"is_manager".or($"salary" > 10000)) - checkFiltersRemoved(df6) - - df6.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedFilters: [(IS_MANAGER = true) OR (SALARY > 10000.00)], " - checkKeywordsExistsInExplain(df6, expected_plan_fragment) - } - + checkPushedInfo(df6, "PushedFilters: [(IS_MANAGER = true) OR (SALARY > 10000.00)], ") checkAnswer(df6, Seq(Row(1, "amy", 10000, 1000, true), Row(2, "alex", 12000, 1200, false), Row(2, "david", 10000, 1300, true), Row(6, "jen", 12000, 1200, true))) val df7 = spark.table("h2.test.employee").filter(not($"is_manager") === true) - checkFiltersRemoved(df7) - - df7.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedFilters: [IS_MANAGER IS NOT NULL, NOT (IS_MANAGER = true)], " - checkKeywordsExistsInExplain(df7, expected_plan_fragment) - } - + checkPushedInfo(df7, "PushedFilters: [IS_MANAGER IS NOT NULL, NOT (IS_MANAGER = true)], ") checkAnswer(df7, Seq(Row(1, "cathy", 9000, 1200, false), Row(2, "alex", 12000, 1200, false))) val df8 = spark.table("h2.test.employee").filter($"is_manager" === true) - checkFiltersRemoved(df8) - - df8.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedFilters: [IS_MANAGER IS NOT NULL, IS_MANAGER = true], " - checkKeywordsExistsInExplain(df8, expected_plan_fragment) - } - + checkPushedInfo(df8, "PushedFilters: [IS_MANAGER IS NOT NULL, IS_MANAGER = true], ") checkAnswer(df8, Seq(Row(1, "amy", 10000, 1000, true), Row(2, "david", 10000, 1300, true), Row(6, "jen", 12000, 1200, true))) val df9 = spark.table("h2.test.employee") .filter(when($"dept" > 1, true).when($"is_manager", false).otherwise($"dept" > 3)) - checkFiltersRemoved(df9) - - df9.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedFilters: [CASE WHEN DEPT > 1 THEN TRUE WHEN IS_MANAGER = true THEN FALSE" + - " ELSE DEPT > 3 END], " - checkKeywordsExistsInExplain(df9, expected_plan_fragment) - } - + checkPushedInfo(df9, "PushedFilters: [CASE WHEN DEPT > 1 THEN TRUE " + + "WHEN IS_MANAGER = true THEN FALSE ELSE DEPT > 3 END], ") checkAnswer(df9, Seq(Row(2, "alex", 12000, 1200, false), Row(2, "david", 10000, 1300, true), Row(6, "jen", 12000, 1200, true))) } @@ -387,19 +325,13 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel Seq(false, true).foreach { ansiMode => withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiMode.toString) { val df = spark.table("h2.test.people").filter($"id" + 1 > 1) - checkFiltersRemoved(df, ansiMode) - - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = if (ansiMode) { - "PushedFilters: [ID IS NOT NULL, (ID + 1) > 1]" - } else { - "PushedFilters: [ID IS NOT NULL]" - } - checkKeywordsExistsInExplain(df, expected_plan_fragment) + val expectedPlanFragment = if (ansiMode) { + "PushedFilters: [ID IS NOT NULL, (ID + 1) > 1]" + } else { + "PushedFilters: [ID IS NOT NULL]" } - + checkPushedInfo(df, expectedPlanFragment) checkAnswer(df, Seq(Row("fred", 1), Row("mary", 2))) val df2 = spark.table("h2.test.people").filter($"id" + Int.MaxValue > 1) @@ -432,18 +364,13 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel |""".stripMargin) checkFiltersRemoved(df3, ansiMode) - - df3.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = if (ansiMode) { - "PushedFilters: [(CASE WHEN SALARY > 10000.00 THEN BONUS" + - " ELSE BONUS + 200.0 END) > 1200.0]" - } else { - "PushedFilters: []" - } - checkKeywordsExistsInExplain(df3, expected_plan_fragment) + val expectedPlanFragment3 = if (ansiMode) { + "PushedFilters: [(CASE WHEN SALARY > 10000.00 THEN BONUS" + + " ELSE BONUS + 200.0 END) > 1200.0]" + } else { + "PushedFilters: []" } - + checkPushedInfo(df3, expectedPlanFragment3) checkAnswer(df3, Seq(Row(1, "cathy", 9000, 1200, false), Row(2, "david", 10000, 1300, true))) } @@ -587,14 +514,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel " group by DePt") checkFiltersRemoved(df) checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [MAX(SALARY), AVG(BONUS)], " + - "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + - "PushedGroupByColumns: [DEPT], " - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [MAX(SALARY), AVG(BONUS)], " + + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + + "PushedGroupByColumns: [DEPT], ") checkAnswer(df, Seq(Row(10000, 1100.0), Row(12000, 1250.0), Row(12000, 1200.0))) } @@ -613,14 +535,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel val df = sql("select MAX(ID), AVG(ID) FROM h2.test.people where id > 0") checkFiltersRemoved(df) checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [MAX(ID), AVG(ID)], " + - "PushedFilters: [ID IS NOT NULL, ID > 0], " + - "PushedGroupByColumns: [], " - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [MAX(ID), AVG(ID)], " + + "PushedFilters: [ID IS NOT NULL, ID > 0], " + + "PushedGroupByColumns: [], ") checkAnswer(df, Seq(Row(2, 1.5))) } @@ -650,42 +567,28 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel "PushedAggregates: [MAX(SALARY)]" checkKeywordsExistsInExplain(df, expected_plan_fragment) } + checkPushedInfo(df, "PushedAggregates: [MAX(SALARY)]") checkAnswer(df, Seq(Row(12001))) } test("scan with aggregate push-down: COUNT(*)") { val df = sql("select COUNT(*) FROM h2.test.employee") checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [COUNT(*)]" - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [COUNT(*)]") checkAnswer(df, Seq(Row(5))) } test("scan with aggregate push-down: COUNT(col)") { val df = sql("select COUNT(DEPT) FROM h2.test.employee") checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [COUNT(DEPT)]" - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [COUNT(DEPT)]") checkAnswer(df, Seq(Row(5))) } test("scan with aggregate push-down: COUNT(DISTINCT col)") { val df = sql("select COUNT(DISTINCT DEPT) FROM h2.test.employee") checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [COUNT(DISTINCT DEPT)]" - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [COUNT(DISTINCT DEPT)]") checkAnswer(df, Seq(Row(3))) } @@ -704,52 +607,30 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel test("scan with aggregate push-down: SUM without filer and group by") { val df = sql("SELECT SUM(SALARY) FROM h2.test.employee") checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [SUM(SALARY)]" - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [SUM(SALARY)]") checkAnswer(df, Seq(Row(53000))) } test("scan with aggregate push-down: DISTINCT SUM without filer and group by") { val df = sql("SELECT SUM(DISTINCT SALARY) FROM h2.test.employee") checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [SUM(DISTINCT SALARY)]" - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [SUM(DISTINCT SALARY)]") checkAnswer(df, Seq(Row(31000))) } test("scan with aggregate push-down: SUM with group by") { val df = sql("SELECT SUM(SALARY) FROM h2.test.employee GROUP BY DEPT") checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [SUM(SALARY)], " + - "PushedFilters: [], " + - "PushedGroupByColumns: [DEPT], " - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [SUM(SALARY)], " + + "PushedFilters: [], PushedGroupByColumns: [DEPT], ") checkAnswer(df, Seq(Row(19000), Row(22000), Row(12000))) } test("scan with aggregate push-down: DISTINCT SUM with group by") { val df = sql("SELECT SUM(DISTINCT SALARY) FROM h2.test.employee GROUP BY DEPT") checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [SUM(DISTINCT SALARY)], " + - "PushedFilters: [], " + - "PushedGroupByColumns: [DEPT], " - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [SUM(DISTINCT SALARY)], " + + "PushedFilters: [], PushedGroupByColumns: [DEPT]") checkAnswer(df, Seq(Row(19000), Row(22000), Row(12000))) } @@ -758,14 +639,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel " group by DEPT, NAME") checkFiltersRemoved(df) checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [MAX(SALARY), MIN(BONUS)], " + - "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + - "PushedGroupByColumns: [DEPT, NAME], " - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [MAX(SALARY), MIN(BONUS)], " + + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByColumns: [DEPT, NAME]") checkAnswer(df, Seq(Row(9000, 1200), Row(12000, 1200), Row(10000, 1300), Row(10000, 1000), Row(12000, 1200))) } @@ -778,14 +653,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel } assert(filters1.isEmpty) checkAggregateRemoved(df1) - df1.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [MAX(SALARY)], " + - "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + - "PushedGroupByColumns: [DEPT, NAME], " - checkKeywordsExistsInExplain(df1, expected_plan_fragment) - } + checkPushedInfo(df1, "PushedAggregates: [MAX(SALARY)], " + + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByColumns: [DEPT, NAME]") checkAnswer(df1, Seq(Row("1#amy", 10000), Row("1#cathy", 9000), Row("2#alex", 12000), Row("2#david", 10000), Row("6#jen", 12000))) @@ -796,30 +665,16 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel } assert(filters2.isEmpty) checkAggregateRemoved(df2) - df2.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [MAX(SALARY), MIN(BONUS)], " + - "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + - "PushedGroupByColumns: [DEPT, NAME], " - checkKeywordsExistsInExplain(df2, expected_plan_fragment) - } + checkPushedInfo(df2, "PushedAggregates: [MAX(SALARY), MIN(BONUS)], " + + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByColumns: [DEPT, NAME]") checkAnswer(df2, Seq(Row("1#amy", 11000), Row("1#cathy", 10200), Row("2#alex", 13200), Row("2#david", 11300), Row("6#jen", 13200))) val df3 = sql("select concat_ws('#', DEPT, NAME), MAX(SALARY) + MIN(BONUS)" + " FROM h2.test.employee where dept > 0 group by concat_ws('#', DEPT, NAME)") - val filters3 = df3.queryExecution.optimizedPlan.collect { - case f: Filter => f - } - assert(filters3.isEmpty) + checkFiltersRemoved(df3) checkAggregateRemoved(df3, false) - df3.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " - checkKeywordsExistsInExplain(df3, expected_plan_fragment) - } + checkPushedInfo(df3, "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], ") checkAnswer(df3, Seq(Row("1#amy", 11000), Row("1#cathy", 10200), Row("2#alex", 13200), Row("2#david", 11300), Row("6#jen", 13200))) } @@ -827,19 +682,11 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel test("scan with aggregate push-down: with having clause") { val df = sql("select MAX(SALARY), MIN(BONUS) FROM h2.test.employee where dept > 0" + " group by DEPT having MIN(BONUS) > 1000") - val filters = df.queryExecution.optimizedPlan.collect { - case f: Filter => f // filter over aggregate not push down - } - assert(filters.nonEmpty) + // filter over aggregate not push down + checkFiltersRemoved(df, false) checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [MAX(SALARY), MIN(BONUS)], " + - "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + - "PushedGroupByColumns: [DEPT], " - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [MAX(SALARY), MIN(BONUS)], " + + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByColumns: [DEPT]") checkAnswer(df, Seq(Row(12000, 1200), Row(12000, 1200))) } @@ -848,14 +695,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .groupBy($"DEPT") .min("SALARY").as("total") checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [MIN(SALARY)], " + - "PushedFilters: [], " + - "PushedGroupByColumns: [DEPT], " - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [MIN(SALARY)], " + + "PushedFilters: [], PushedGroupByColumns: [DEPT]") checkAnswer(df, Seq(Row(1, 9000), Row(2, 10000), Row(6, 12000))) } @@ -867,19 +708,10 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .agg(sum($"SALARY").as("total")) .filter($"total" > 1000) .orderBy($"total") - val filters = query.queryExecution.optimizedPlan.collect { - case f: Filter => f - } - assert(filters.nonEmpty) // filter over aggregate not pushed down - checkAggregateRemoved(df) - query.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [SUM(SALARY)], " + - "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + - "PushedGroupByColumns: [DEPT], " - checkKeywordsExistsInExplain(query, expected_plan_fragment) - } + checkFiltersRemoved(query, false)// filter over aggregate not pushed down + checkAggregateRemoved(query) + checkPushedInfo(query, "PushedAggregates: [SUM(SALARY)], " + + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByColumns: [DEPT]") checkAnswer(query, Seq(Row(6, 12000), Row(1, 19000), Row(2, 22000))) } @@ -888,12 +720,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel val decrease = udf { (x: Double, y: Double) => x - y } val query = df.select(decrease(sum($"SALARY"), sum($"BONUS")).as("value")) checkAggregateRemoved(query) - query.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [SUM(SALARY), SUM(BONUS)]" - checkKeywordsExistsInExplain(query, expected_plan_fragment) - } + checkPushedInfo(query, "PushedAggregates: [SUM(SALARY), SUM(BONUS)], ") checkAnswer(query, Seq(Row(47100.0))) } @@ -915,14 +742,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel " group by DePt") checkFiltersRemoved(df) checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [VAR_POP(BONUS), VAR_SAMP(BONUS)], " + - "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + - "PushedGroupByColumns: [DEPT], " - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [VAR_POP(BONUS), VAR_SAMP(BONUS)], " + + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByColumns: [DEPT]") checkAnswer(df, Seq(Row(10000d, 20000d), Row(2500d, 5000d), Row(0d, null))) } @@ -931,14 +752,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel " where dept > 0 group by DePt") checkFiltersRemoved(df) checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [STDDEV_POP(BONUS), STDDEV_SAMP(BONUS)], " + - "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + - "PushedGroupByColumns: [DEPT], " - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [STDDEV_POP(BONUS), STDDEV_SAMP(BONUS)], " + + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByColumns: [DEPT]") checkAnswer(df, Seq(Row(100d, 141.4213562373095d), Row(50d, 70.71067811865476d), Row(0d, null))) } @@ -947,14 +762,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel " FROM h2.test.employee where dept > 0 group by DePt") checkFiltersRemoved(df) checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [COVAR_POP(BONUS, BONUS), COVAR_SAMP(BONUS, BONUS)], " + - "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + - "PushedGroupByColumns: [DEPT], " - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [COVAR_POP(BONUS, BONUS), COVAR_SAMP(BONUS, BONUS)], " + + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByColumns: [DEPT]") checkAnswer(df, Seq(Row(10000d, 20000d), Row(2500d, 5000d), Row(0d, null))) } @@ -963,14 +772,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel " group by DePt") checkFiltersRemoved(df) checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [CORR(BONUS, BONUS)], " + - "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " + - "PushedGroupByColumns: [DEPT], " - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [CORR(BONUS, BONUS)], " + + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByColumns: [DEPT]") checkAnswer(df, Seq(Row(1d), Row(1d), Row(null))) } @@ -1032,15 +835,11 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel |FROM h2.test.employee GROUP BY DEPT """.stripMargin) checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [COUNT(CASE WHEN (SALARY > 8000.00) AND (SALARY < 10000.00)" + - " THEN SALARY ELSE 0.00 END), COUNT(CAS..., " + - "PushedFilters: [], " + - "PushedGroupByColumns: [DEPT], " - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, + "PushedAggregates: [COUNT(CASE WHEN (SALARY > 8000.00) AND (SALARY < 10000.00)" + + " THEN SALARY ELSE 0.00 END), COUNT(CAS..., " + + "PushedFilters: [], " + + "PushedGroupByColumns: [DEPT], ") checkAnswer(df, Seq(Row(1, 1, 1, 1, 1, 0d, 12000d, 0d, 12000d, 12000d, 0d, 0d, 2, 0d), Row(2, 2, 2, 2, 2, 0d, 10000d, 0d, 10000d, 10000d, 0d, 0d, 2, 0d), Row(2, 2, 2, 2, 2, 0d, 12000d, 0d, 12000d, 12000d, 0d, 0d, 3, 0d))) @@ -1051,17 +850,14 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiMode.toString) { val df = sql("SELECT SUM(2147483647 + DEPT) FROM h2.test.employee") checkAggregateRemoved(df, ansiMode) - val expected_plan_fragment = if (ansiMode) { + val expectedPlanFragment = if (ansiMode) { "PushedAggregates: [SUM(2147483647 + DEPT)], " + "PushedFilters: [], " + "PushedGroupByColumns: []" } else { "PushedFilters: []" } - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, expectedPlanFragment) if (ansiMode) { val e = intercept[SparkException] { checkAnswer(df, Seq(Row(-10737418233L))) @@ -1080,12 +876,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel val decrease = udf { (x: Double, y: Double) => x - y } val query = df.select(sum(decrease($"SALARY", $"BONUS")).as("value")) checkAggregateRemoved(query, false) - query.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedFilters: []" - checkKeywordsExistsInExplain(query, expected_plan_fragment) - } + checkPushedInfo(query, "PushedFilters: []") checkAnswer(query, Seq(Row(47100.0))) } @@ -1121,12 +912,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkAnswer(sql("SELECT `dept id` FROM h2.test.dept"), Seq(Row(1), Row(2))) val df = sql("SELECT COUNT(`dept id`) FROM h2.test.dept") checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [COUNT(`dept id`)]" - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [COUNT(`dept id`)]") checkAnswer(df, Seq(Row(2))) } @@ -1135,12 +921,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkAnswer(sql("SELECT `名` FROM h2.test.person"), Seq(Row(1), Row(2))) val df = sql("SELECT COUNT(`名`) FROM h2.test.person") checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [COUNT(`名`)]" - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [COUNT(`名`)]") checkAnswer(df, Seq(Row(2))) // scalastyle:on } @@ -1154,12 +935,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .table("h2.test.employee") .agg(sum($"SALARY").as("sum"), avg($"SALARY").as("avg"), count($"SALARY").as("count")) checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [SUM(SALARY), AVG(SALARY), COUNT(SALARY)]" - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [SUM(SALARY), AVG(SALARY), COUNT(SALARY)]") checkAnswer(df, Seq(Row(53000.00, 10600.000000, 5))) val df2 = spark.read @@ -1171,12 +947,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .groupBy($"name") .agg(sum($"SALARY").as("sum"), avg($"SALARY").as("avg"), count($"SALARY").as("count")) checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [SUM(SALARY), AVG(SALARY), COUNT(SALARY)]" - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [SUM(SALARY), AVG(SALARY), COUNT(SALARY)]") checkAnswer(df2, Seq( Row("alex", 12000.00, 12000.000000, 1), Row("amy", 10000.00, 10000.000000, 1), @@ -1194,12 +965,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .table("h2.test.employee") .agg(sum($"SALARY").as("sum"), avg($"SALARY").as("avg"), count($"SALARY").as("count")) checkAggregateRemoved(df, false) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [SUM(SALARY), COUNT(SALARY)]" - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [SUM(SALARY), COUNT(SALARY)]") checkAnswer(df, Seq(Row(53000.00, 10600.000000, 5))) val df2 = spark.read @@ -1211,12 +977,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .groupBy($"name") .agg(sum($"SALARY").as("sum"), avg($"SALARY").as("avg"), count($"SALARY").as("count")) checkAggregateRemoved(df, false) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = - "PushedAggregates: [SUM(SALARY), COUNT(SALARY)]" - checkKeywordsExistsInExplain(df, expected_plan_fragment) - } + checkPushedInfo(df, "PushedAggregates: [SUM(SALARY), COUNT(SALARY)]") checkAnswer(df2, Seq( Row("alex", 12000.00, 12000.000000, 1), Row("amy", 10000.00, 10000.000000, 1), @@ -1240,12 +1001,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .agg(sum($"mySalary").as("total")) .filter($"total" > 1000) checkAggregateRemoved(df) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expectedPlanFragment = - "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByColumns: [DEPT]" - checkKeywordsExistsInExplain(df, expectedPlanFragment) - } + checkPushedInfo(df, + "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByColumns: [DEPT]") checkAnswer(df, Seq(Row(1, 19000.00), Row(2, 22000.00), Row(6, 12000.00))) val df2 = spark.table("h2.test.employee") @@ -1254,12 +1011,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .agg(sum($"mySalary").as("total")) .filter($"total" > 1000) checkAggregateRemoved(df2) - df2.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expectedPlanFragment = - "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByColumns: [DEPT]" - checkKeywordsExistsInExplain(df2, expectedPlanFragment) - } + checkPushedInfo(df2, + "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByColumns: [DEPT]") checkAnswer(df2, Seq(Row(1, 19000.00), Row(2, 22000.00), Row(6, 12000.00))) } @@ -1275,12 +1028,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .agg(sum($"mySalary").as("total")) .filter($"total" > 1000) checkAggregateRemoved(df, false) - df.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expectedPlanFragment = - "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByColumns: [NAME]" - checkKeywordsExistsInExplain(df, expectedPlanFragment) - } + checkPushedInfo(df, + "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByColumns: [NAME]") checkAnswer(df, Seq(Row("alex", 12000.00), Row("amy", 10000.00), Row("cathy", 9000.00), Row("david", 10000.00), Row("jen", 12000.00))) @@ -1295,12 +1044,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .agg(sum($"mySalary").as("total")) .filter($"total" > 1000) checkAggregateRemoved(df2, false) - df2.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expectedPlanFragment = - "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByColumns: [NAME]" - checkKeywordsExistsInExplain(df2, expectedPlanFragment) - } + checkPushedInfo(df2, + "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByColumns: [NAME]") checkAnswer(df2, Seq(Row("alex", 12000.00), Row("amy", 10000.00), Row("cathy", 9000.00), Row("david", 10000.00), Row("jen", 12000.00))) } From eb5d8fab5a575ce54e43926e1fa3ec31080934f0 Mon Sep 17 00:00:00 2001 From: flynn Date: Wed, 23 Mar 2022 08:59:30 -0500 Subject: [PATCH 041/535] [SPARK-18621][PYTHON] Make sql type reprs eval-able ### What changes were proposed in this pull request? These changes update the `__repr__` methods of type classes in `pyspark.sql.types` to print string representations which are `eval`-able. In other words, any instance of a `DataType` will produce a repr which can be passed to `eval()` to create an identical instance. Similar changes previously submitted: https://github.com/apache/spark/pull/25495 ### Why are the changes needed? This [bug](https://issues.apache.org/jira/browse/SPARK-18621) has been around for a while. The current implementation returns a string representation which is valid in scala rather than python. These changes fix the repr to be valid with python. The [motivation](https://docs.python.org/3/library/functions.html#repr) is "to return a string that would yield an object with the same value when passed to eval()". ### Does this PR introduce _any_ user-facing change? Example: Current implementation: ```python from pyspark.sql.types import * struct = StructType([StructField('f1', StringType(), True)]) repr(struct) # StructType(List(StructField(f1,StringType,true))) new_struct = eval(repr(struct)) # Traceback (most recent call last): # File "", line 1, in # File "", line 1, in # NameError: name 'List' is not defined struct_field = StructField('f1', StringType(), True) repr(struct_field) # StructField(f1,StringType,true) new_struct_field = eval(repr(struct_field)) # Traceback (most recent call last): # File "", line 1, in # File "", line 1, in # NameError: name 'f1' is not defined ``` With changes: ```python from pyspark.sql.types import * struct = StructType([StructField('f1', StringType(), True)]) repr(struct) # StructType([StructField('f1', StringType(), True)]) new_struct = eval(repr(struct)) struct == new_struct # True struct_field = StructField('f1', StringType(), True) repr(struct_field) # StructField('f1', StringType(), True) new_struct_field = eval(repr(struct_field)) struct_field == new_struct_field # True ``` ### How was this patch tested? The changes include a test which asserts that an instance of each type is equal to the `eval` of its `repr`, as in the above example. Closes #34320 from crflynn/sql-types-repr. Lead-authored-by: flynn Co-authored-by: Flynn Signed-off-by: Sean Owen (cherry picked from commit c5ebdc6ded0479f557541a08b1312097a9c4244f) Signed-off-by: Sean Owen --- .../migration_guide/pyspark_3.2_to_3.3.rst | 1 + python/pyspark/ml/functions.py | 8 +- python/pyspark/pandas/extensions.py | 8 +- python/pyspark/pandas/internal.py | 78 ++++++++++--------- python/pyspark/pandas/spark/utils.py | 23 ++++-- python/pyspark/pandas/tests/test_groupby.py | 2 +- python/pyspark/pandas/tests/test_series.py | 2 +- python/pyspark/pandas/typedef/typehints.py | 52 ++++++------- python/pyspark/sql/dataframe.py | 3 +- python/pyspark/sql/tests/test_dataframe.py | 2 +- python/pyspark/sql/tests/test_types.py | 23 ++++++ python/pyspark/sql/types.py | 32 ++++---- 12 files changed, 135 insertions(+), 99 deletions(-) diff --git a/python/docs/source/migration_guide/pyspark_3.2_to_3.3.rst b/python/docs/source/migration_guide/pyspark_3.2_to_3.3.rst index f2701d4fb7216..d81008d63cbe9 100644 --- a/python/docs/source/migration_guide/pyspark_3.2_to_3.3.rst +++ b/python/docs/source/migration_guide/pyspark_3.2_to_3.3.rst @@ -23,3 +23,4 @@ Upgrading from PySpark 3.2 to 3.3 * In Spark 3.3, the ``pyspark.pandas.sql`` method follows [the standard Python string formatter](https://docs.python.org/3/library/string.html#format-string-syntax). To restore the previous behavior, set ``PYSPARK_PANDAS_SQL_LEGACY`` environment variable to ``1``. * In Spark 3.3, the ``drop`` method of pandas API on Spark DataFrame supports dropping rows by ``index``, and sets dropping by index instead of column by default. * In Spark 3.3, PySpark upgrades Pandas version, the new minimum required version changes from 0.23.2 to 1.0.5. +* In Spark 3.3, the ``repr`` return values of SQL DataTypes have been changed to yield an object with the same value when passed to ``eval``. diff --git a/python/pyspark/ml/functions.py b/python/pyspark/ml/functions.py index aecafb3d494df..9725d4b033bd8 100644 --- a/python/pyspark/ml/functions.py +++ b/python/pyspark/ml/functions.py @@ -58,11 +58,11 @@ def vector_to_array(col: Column, dtype: str = "float64") -> Column: [Row(vec=[1.0, 2.0, 3.0], oldVec=[10.0, 20.0, 30.0]), Row(vec=[2.0, 0.0, 3.0], oldVec=[20.0, 0.0, 30.0])] >>> df1.schema.fields - [StructField(vec,ArrayType(DoubleType,false),false), - StructField(oldVec,ArrayType(DoubleType,false),false)] + [StructField('vec', ArrayType(DoubleType(), False), False), + StructField('oldVec', ArrayType(DoubleType(), False), False)] >>> df2.schema.fields - [StructField(vec,ArrayType(FloatType,false),false), - StructField(oldVec,ArrayType(FloatType,false),false)] + [StructField('vec', ArrayType(FloatType(), False), False), + StructField('oldVec', ArrayType(FloatType(), False), False)] """ sc = SparkContext._active_spark_context assert sc is not None and sc._jvm is not None diff --git a/python/pyspark/pandas/extensions.py b/python/pyspark/pandas/extensions.py index 69f742541a599..eeb02f06a85e9 100644 --- a/python/pyspark/pandas/extensions.py +++ b/python/pyspark/pandas/extensions.py @@ -109,7 +109,7 @@ def __init__(self, pandas_on_spark_obj): ... Traceback (most recent call last): ... - ValueError: Cannot call DatetimeMethods on type StringType + ValueError: Cannot call DatetimeMethods on type StringType() Note: This function is not meant to be used directly - instead, use register_dataframe_accessor, register_series_accessor, or register_index_accessor. @@ -169,7 +169,7 @@ def register_dataframe_accessor(name: str) -> Callable[[Type[T]], Type[T]]: ... Traceback (most recent call last): ... - ValueError: Cannot call DatetimeMethods on type StringType + ValueError: Cannot call DatetimeMethods on type StringType() Examples -------- @@ -250,7 +250,7 @@ def __init__(self, pandas_on_spark_obj): ... Traceback (most recent call last): ... - ValueError: Cannot call DatetimeMethods on type StringType + ValueError: Cannot call DatetimeMethods on type StringType() Examples -------- @@ -322,7 +322,7 @@ def __init__(self, pandas_on_spark_obj): ... Traceback (most recent call last): ... - ValueError: Cannot call DatetimeMethods on type StringType + ValueError: Cannot call DatetimeMethods on type StringType() Examples -------- diff --git a/python/pyspark/pandas/internal.py b/python/pyspark/pandas/internal.py index ffc86ba4c6134..f79f0ada73a8d 100644 --- a/python/pyspark/pandas/internal.py +++ b/python/pyspark/pandas/internal.py @@ -206,7 +206,7 @@ def __eq__(self, other: Any) -> bool: ) def __repr__(self) -> str: - return "InternalField(dtype={dtype},struct_field={struct_field})".format( + return "InternalField(dtype={dtype}, struct_field={struct_field})".format( dtype=self.dtype, struct_field=self.struct_field ) @@ -293,13 +293,13 @@ class InternalFrame: >>> internal.index_names [None] >>> internal.data_fields # doctest: +NORMALIZE_WHITESPACE - [InternalField(dtype=int64,struct_field=StructField(A,LongType,false)), - InternalField(dtype=int64,struct_field=StructField(B,LongType,false)), - InternalField(dtype=int64,struct_field=StructField(C,LongType,false)), - InternalField(dtype=int64,struct_field=StructField(D,LongType,false)), - InternalField(dtype=int64,struct_field=StructField(E,LongType,false))] + [InternalField(dtype=int64, struct_field=StructField('A', LongType(), False)), + InternalField(dtype=int64, struct_field=StructField('B', LongType(), False)), + InternalField(dtype=int64, struct_field=StructField('C', LongType(), False)), + InternalField(dtype=int64, struct_field=StructField('D', LongType(), False)), + InternalField(dtype=int64, struct_field=StructField('E', LongType(), False))] >>> internal.index_fields - [InternalField(dtype=int64,struct_field=StructField(__index_level_0__,LongType,false))] + [InternalField(dtype=int64, struct_field=StructField('__index_level_0__', LongType(), False))] >>> internal.to_internal_spark_frame.show() # doctest: +NORMALIZE_WHITESPACE +-----------------+---+---+---+---+---+ |__index_level_0__| A| B| C| D| E| @@ -355,13 +355,13 @@ class InternalFrame: ['A', 'B', 'C', 'D', 'E'] >>> internal.index_names [('A',)] - >>> internal.data_fields - [InternalField(dtype=int64,struct_field=StructField(B,LongType,false)), - InternalField(dtype=int64,struct_field=StructField(C,LongType,false)), - InternalField(dtype=int64,struct_field=StructField(D,LongType,false)), - InternalField(dtype=int64,struct_field=StructField(E,LongType,false))] + >>> internal.data_fields # doctest: +NORMALIZE_WHITESPACE + [InternalField(dtype=int64, struct_field=StructField('B', LongType(), False)), + InternalField(dtype=int64, struct_field=StructField('C', LongType(), False)), + InternalField(dtype=int64, struct_field=StructField('D', LongType(), False)), + InternalField(dtype=int64, struct_field=StructField('E', LongType(), False))] >>> internal.index_fields - [InternalField(dtype=int64,struct_field=StructField(A,LongType,false))] + [InternalField(dtype=int64, struct_field=StructField('A', LongType(), False))] >>> internal.to_internal_spark_frame.show() # doctest: +NORMALIZE_WHITESPACE +---+---+---+---+---+ | A| B| C| D| E| @@ -419,13 +419,13 @@ class InternalFrame: >>> internal.index_names [None, ('A',)] >>> internal.data_fields # doctest: +NORMALIZE_WHITESPACE - [InternalField(dtype=int64,struct_field=StructField(B,LongType,false)), - InternalField(dtype=int64,struct_field=StructField(C,LongType,false)), - InternalField(dtype=int64,struct_field=StructField(D,LongType,false)), - InternalField(dtype=int64,struct_field=StructField(E,LongType,false))] + [InternalField(dtype=int64, struct_field=StructField('B', LongType(), False)), + InternalField(dtype=int64, struct_field=StructField('C', LongType(), False)), + InternalField(dtype=int64, struct_field=StructField('D', LongType(), False)), + InternalField(dtype=int64, struct_field=StructField('E', LongType(), False))] >>> internal.index_fields # doctest: +NORMALIZE_WHITESPACE - [InternalField(dtype=int64,struct_field=StructField(__index_level_0__,LongType,false)), - InternalField(dtype=int64,struct_field=StructField(A,LongType,false))] + [InternalField(dtype=int64, struct_field=StructField('__index_level_0__', LongType(), False)), + InternalField(dtype=int64, struct_field=StructField('A', LongType(), False))] >>> internal.to_internal_spark_frame.show() # doctest: +NORMALIZE_WHITESPACE +-----------------+---+---+---+---+---+ |__index_level_0__| A| B| C| D| E| @@ -508,9 +508,9 @@ class InternalFrame: >>> internal.index_names [('A',)] >>> internal.data_fields - [InternalField(dtype=int64,struct_field=StructField(B,LongType,false))] + [InternalField(dtype=int64, struct_field=StructField('B', LongType(), False))] >>> internal.index_fields - [InternalField(dtype=int64,struct_field=StructField(A,LongType,false))] + [InternalField(dtype=int64, struct_field=StructField('A', LongType(), False))] >>> internal.to_internal_spark_frame.show() # doctest: +NORMALIZE_WHITESPACE +---+---+ | A| B| @@ -596,9 +596,12 @@ def __init__( [('row_index_a',), ('row_index_b',), ('a', 'x')] >>> internal.index_fields # doctest: +NORMALIZE_WHITESPACE - [InternalField(dtype=object,struct_field=StructField(__index_level_0__,StringType,false)), - InternalField(dtype=object,struct_field=StructField(__index_level_1__,StringType,false)), - InternalField(dtype=int64,struct_field=StructField((a, x),LongType,false))] + [InternalField(dtype=object, + struct_field=StructField('__index_level_0__', StringType(), False)), + InternalField(dtype=object, + struct_field=StructField('__index_level_1__', StringType(), False)), + InternalField(dtype=int64, + struct_field=StructField('(a, x)', LongType(), False))] >>> internal.column_labels [('a', 'y'), ('b', 'z')] @@ -607,8 +610,8 @@ def __init__( [Column<'(a, y)'>, Column<'(b, z)'>] >>> internal.data_fields # doctest: +NORMALIZE_WHITESPACE - [InternalField(dtype=int64,struct_field=StructField((a, y),LongType,false)), - InternalField(dtype=int64,struct_field=StructField((b, z),LongType,false))] + [InternalField(dtype=int64, struct_field=StructField('(a, y)', LongType(), False)), + InternalField(dtype=int64, struct_field=StructField('(b, z)', LongType(), False))] >>> internal.column_label_names [('column_labels_a',), ('column_labels_b',)] @@ -1505,13 +1508,14 @@ def prepare_pandas_frame( 2 30 c 1 >>> index_columns ['__index_level_0__'] - >>> index_fields - [InternalField(dtype=int64,struct_field=StructField(__index_level_0__,LongType,false))] + >>> index_fields # doctest: +NORMALIZE_WHITESPACE + [InternalField(dtype=int64, struct_field=StructField('__index_level_0__', + LongType(), False))] >>> data_columns ['(x, a)', '(y, b)'] >>> data_fields # doctest: +NORMALIZE_WHITESPACE - [InternalField(dtype=object,struct_field=StructField((x, a),StringType,false)), - InternalField(dtype=category,struct_field=StructField((y, b),ByteType,false))] + [InternalField(dtype=object, struct_field=StructField('(x, a)', StringType(), False)), + InternalField(dtype=category, struct_field=StructField('(y, b)', ByteType(), False))] >>> import datetime >>> pdf = pd.DataFrame({ @@ -1521,9 +1525,11 @@ def prepare_pandas_frame( >>> _, _, _, _, data_fields = ( ... InternalFrame.prepare_pandas_frame(pdf, prefer_timestamp_ntz=True) ... ) - >>> data_fields - [InternalField(dtype=datetime64[ns],struct_field=StructField(dt,TimestampNTZType,false)), - InternalField(dtype=object,struct_field=StructField(dt_obj,TimestampNTZType,false))] + >>> data_fields # doctest: +NORMALIZE_WHITESPACE + [InternalField(dtype=datetime64[ns], + struct_field=StructField('dt', TimestampNTZType(), False)), + InternalField(dtype=object, + struct_field=StructField('dt_obj', TimestampNTZType(), False))] >>> pdf = pd.DataFrame({ ... "td": [datetime.timedelta(0)], "td_obj": [datetime.timedelta(0)] @@ -1533,8 +1539,10 @@ def prepare_pandas_frame( ... InternalFrame.prepare_pandas_frame(pdf) ... ) >>> data_fields # doctest: +NORMALIZE_WHITESPACE - [InternalField(dtype=timedelta64[ns],struct_field=StructField(td,DayTimeIntervalType(0,3),false)), - InternalField(dtype=object,struct_field=StructField(td_obj,DayTimeIntervalType(0,3),false))] + [InternalField(dtype=timedelta64[ns], + struct_field=StructField('td', DayTimeIntervalType(0, 3), False)), + InternalField(dtype=object, + struct_field=StructField('td_obj', DayTimeIntervalType(0, 3), False))] """ pdf = pdf.copy() diff --git a/python/pyspark/pandas/spark/utils.py b/python/pyspark/pandas/spark/utils.py index 0940a5e508071..9b8b5bb7542ab 100644 --- a/python/pyspark/pandas/spark/utils.py +++ b/python/pyspark/pandas/spark/utils.py @@ -52,7 +52,7 @@ def as_nullable_spark_type(dt: DataType) -> DataType: >>> as_nullable_spark_type(StructType([ ... StructField("A", IntegerType(), True), ... StructField("B", FloatType(), False)])) # doctest: +NORMALIZE_WHITESPACE - StructType(List(StructField(A,IntegerType,true),StructField(B,FloatType,true))) + StructType([StructField('A', IntegerType(), True), StructField('B', FloatType(), True)]) >>> as_nullable_spark_type(StructType([ ... StructField("A", @@ -62,9 +62,12 @@ def as_nullable_spark_type(dt: DataType) -> DataType: ... ArrayType(IntegerType(), False), False), False), ... StructField('b', StringType(), True)])), ... StructField("B", FloatType(), False)])) # doctest: +NORMALIZE_WHITESPACE - StructType(List(StructField(A,StructType(List(StructField(a,MapType(IntegerType,ArrayType\ -(IntegerType,true),true),true),StructField(b,StringType,true))),true),\ -StructField(B,FloatType,true))) + StructType([StructField('A', + StructType([StructField('a', + MapType(IntegerType(), + ArrayType(IntegerType(), True), True), True), + StructField('b', StringType(), True)]), True), + StructField('B', FloatType(), True)]) """ if isinstance(dt, StructType): new_fields = [] @@ -132,7 +135,8 @@ def force_decimal_precision_scale( >>> force_decimal_precision_scale(StructType([ ... StructField("A", DecimalType(10, 0), True), ... StructField("B", DecimalType(14, 7), False)])) # doctest: +NORMALIZE_WHITESPACE - StructType(List(StructField(A,DecimalType(38,18),true),StructField(B,DecimalType(38,18),false))) + StructType([StructField('A', DecimalType(38,18), True), + StructField('B', DecimalType(38,18), False)]) >>> force_decimal_precision_scale(StructType([ ... StructField("A", @@ -143,9 +147,12 @@ def force_decimal_precision_scale( ... StructField('b', StringType(), True)])), ... StructField("B", DecimalType(30, 15), False)]), ... precision=30, scale=15) # doctest: +NORMALIZE_WHITESPACE - StructType(List(StructField(A,StructType(List(StructField(a,MapType(DecimalType(30,15),\ -ArrayType(DecimalType(30,15),false),false),false),StructField(b,StringType,true))),true),\ -StructField(B,DecimalType(30,15),false))) + StructType([StructField('A', + StructType([StructField('a', + MapType(DecimalType(30,15), + ArrayType(DecimalType(30,15), False), False), False), + StructField('b', StringType(), True)]), True), + StructField('B', DecimalType(30,15), False)]) """ if isinstance(dt, StructType): new_fields = [] diff --git a/python/pyspark/pandas/tests/test_groupby.py b/python/pyspark/pandas/tests/test_groupby.py index 661526b160050..ec17e0dba2799 100644 --- a/python/pyspark/pandas/tests/test_groupby.py +++ b/python/pyspark/pandas/tests/test_groupby.py @@ -2227,7 +2227,7 @@ def udf(col) -> int: with self.assertRaisesRegex( TypeError, "Expected the return type of this function to be of Series type, " - "but found type ScalarType\\[LongType\\]", + "but found type ScalarType\\[LongType\\(\\)\\]", ): psdf.groupby("a").transform(udf) diff --git a/python/pyspark/pandas/tests/test_series.py b/python/pyspark/pandas/tests/test_series.py index 4cfd7c63e312d..fc78bcf4cd436 100644 --- a/python/pyspark/pandas/tests/test_series.py +++ b/python/pyspark/pandas/tests/test_series.py @@ -2985,7 +2985,7 @@ def udf(col) -> ps.Series[int]: with self.assertRaisesRegex( ValueError, r"Expected the return type of this function to be of scalar type, " - r"but found type SeriesType\[LongType\]", + r"but found type SeriesType\[LongType\(\)\]", ): psser.apply(udf) diff --git a/python/pyspark/pandas/typedef/typehints.py b/python/pyspark/pandas/typedef/typehints.py index 695ed31af6f42..8a32a14b64e72 100644 --- a/python/pyspark/pandas/typedef/typehints.py +++ b/python/pyspark/pandas/typedef/typehints.py @@ -319,17 +319,17 @@ def pandas_on_spark_type(tpe: Union[str, type, Dtype]) -> Tuple[Dtype, types.Dat Examples -------- >>> pandas_on_spark_type(int) - (dtype('int64'), LongType) + (dtype('int64'), LongType()) >>> pandas_on_spark_type(str) - (dtype('>> pandas_on_spark_type(datetime.date) - (dtype('O'), DateType) + (dtype('O'), DateType()) >>> pandas_on_spark_type(datetime.datetime) - (dtype('>> pandas_on_spark_type(datetime.timedelta) - (dtype('>> pandas_on_spark_type(List[bool]) - (dtype('O'), ArrayType(BooleanType,true)) + (dtype('O'), ArrayType(BooleanType(), True)) """ try: dtype = pandas_dtype(tpe) @@ -381,7 +381,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.dtype dtype('int64') >>> inferred.spark_type - LongType + LongType() >>> def func() -> ps.Series[int]: ... pass @@ -389,7 +389,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.dtype dtype('int64') >>> inferred.spark_type - LongType + LongType() >>> def func() -> ps.DataFrame[np.float, str]: ... pass @@ -397,7 +397,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.dtypes [dtype('float64'), dtype('>> inferred.spark_type - StructType(List(StructField(c0,DoubleType,true),StructField(c1,StringType,true))) + StructType([StructField('c0', DoubleType(), True), StructField('c1', StringType(), True)]) >>> def func() -> ps.DataFrame[np.float]: ... pass @@ -405,7 +405,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.dtypes [dtype('float64')] >>> inferred.spark_type - StructType(List(StructField(c0,DoubleType,true))) + StructType([StructField('c0', DoubleType(), True)]) >>> def func() -> 'int': ... pass @@ -413,7 +413,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.dtype dtype('int64') >>> inferred.spark_type - LongType + LongType() >>> def func() -> 'ps.Series[int]': ... pass @@ -421,7 +421,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.dtype dtype('int64') >>> inferred.spark_type - LongType + LongType() >>> def func() -> 'ps.DataFrame[np.float, str]': ... pass @@ -429,7 +429,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.dtypes [dtype('float64'), dtype('>> inferred.spark_type - StructType(List(StructField(c0,DoubleType,true),StructField(c1,StringType,true))) + StructType([StructField('c0', DoubleType(), True), StructField('c1', StringType(), True)]) >>> def func() -> 'ps.DataFrame[np.float]': ... pass @@ -437,7 +437,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.dtypes [dtype('float64')] >>> inferred.spark_type - StructType(List(StructField(c0,DoubleType,true))) + StructType([StructField('c0', DoubleType(), True)]) >>> def func() -> ps.DataFrame['a': np.float, 'b': int]: ... pass @@ -445,7 +445,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.dtypes [dtype('float64'), dtype('int64')] >>> inferred.spark_type - StructType(List(StructField(a,DoubleType,true),StructField(b,LongType,true))) + StructType([StructField('a', DoubleType(), True), StructField('b', LongType(), True)]) >>> def func() -> "ps.DataFrame['a': np.float, 'b': int]": ... pass @@ -453,7 +453,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.dtypes [dtype('float64'), dtype('int64')] >>> inferred.spark_type - StructType(List(StructField(a,DoubleType,true),StructField(b,LongType,true))) + StructType([StructField('a', DoubleType(), True), StructField('b', LongType(), True)]) >>> pdf = pd.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}) >>> def func() -> ps.DataFrame[pdf.dtypes]: @@ -462,7 +462,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.dtypes [dtype('int64'), dtype('int64')] >>> inferred.spark_type - StructType(List(StructField(c0,LongType,true),StructField(c1,LongType,true))) + StructType([StructField('c0', LongType(), True), StructField('c1', LongType(), True)]) >>> pdf = pd.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]}) >>> def func() -> ps.DataFrame[zip(pdf.columns, pdf.dtypes)]: @@ -471,7 +471,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.dtypes [dtype('int64'), dtype('int64')] >>> inferred.spark_type - StructType(List(StructField(a,LongType,true),StructField(b,LongType,true))) + StructType([StructField('a', LongType(), True), StructField('b', LongType(), True)]) >>> pdf = pd.DataFrame({("x", "a"): [1, 2, 3], ("y", "b"): [3, 4, 5]}) >>> def func() -> ps.DataFrame[zip(pdf.columns, pdf.dtypes)]: @@ -480,7 +480,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.dtypes [dtype('int64'), dtype('int64')] >>> inferred.spark_type - StructType(List(StructField((x, a),LongType,true),StructField((y, b),LongType,true))) + StructType([StructField('(x, a)', LongType(), True), StructField('(y, b)', LongType(), True)]) >>> pdf = pd.DataFrame({"a": [1, 2, 3], "b": pd.Categorical([3, 4, 5])}) >>> def func() -> ps.DataFrame[pdf.dtypes]: @@ -489,7 +489,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.dtypes [dtype('int64'), CategoricalDtype(categories=[3, 4, 5], ordered=False)] >>> inferred.spark_type - StructType(List(StructField(c0,LongType,true),StructField(c1,LongType,true))) + StructType([StructField('c0', LongType(), True), StructField('c1', LongType(), True)]) >>> def func() -> ps.DataFrame[zip(pdf.columns, pdf.dtypes)]: ... pass @@ -497,7 +497,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.dtypes [dtype('int64'), CategoricalDtype(categories=[3, 4, 5], ordered=False)] >>> inferred.spark_type - StructType(List(StructField(a,LongType,true),StructField(b,LongType,true))) + StructType([StructField('a', LongType(), True), StructField('b', LongType(), True)]) >>> def func() -> ps.Series[pdf.b.dtype]: ... pass @@ -505,7 +505,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.dtype CategoricalDtype(categories=[3, 4, 5], ordered=False) >>> inferred.spark_type - LongType + LongType() >>> def func() -> ps.DataFrame[int, [int, int]]: ... pass @@ -515,7 +515,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.spark_type.simpleString() 'struct<__index_level_0__:bigint,c0:bigint,c1:bigint>' >>> inferred.index_fields - [InternalField(dtype=int64,struct_field=StructField(__index_level_0__,LongType,true))] + [InternalField(dtype=int64, struct_field=StructField('__index_level_0__', LongType(), True))] >>> def func() -> ps.DataFrame[pdf.index.dtype, pdf.dtypes]: ... pass @@ -525,7 +525,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.spark_type.simpleString() 'struct<__index_level_0__:bigint,c0:bigint,c1:bigint>' >>> inferred.index_fields - [InternalField(dtype=int64,struct_field=StructField(__index_level_0__,LongType,true))] + [InternalField(dtype=int64, struct_field=StructField('__index_level_0__', LongType(), True))] >>> def func() -> ps.DataFrame[ ... ("index", CategoricalDtype(categories=[3, 4, 5], ordered=False)), @@ -537,7 +537,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.spark_type.simpleString() 'struct' >>> inferred.index_fields - [InternalField(dtype=category,struct_field=StructField(index,LongType,true))] + [InternalField(dtype=category, struct_field=StructField('index', LongType(), True))] >>> def func() -> ps.DataFrame[ ... (pdf.index.name, pdf.index.dtype), zip(pdf.columns, pdf.dtypes)]: @@ -548,7 +548,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp >>> inferred.spark_type.simpleString() 'struct<__index_level_0__:bigint,a:bigint,b:bigint>' >>> inferred.index_fields - [InternalField(dtype=int64,struct_field=StructField(__index_level_0__,LongType,true))] + [InternalField(dtype=int64, struct_field=StructField('__index_level_0__', LongType(), True))] """ # We should re-import to make sure the class 'SeriesType' is not treated as a class # within this module locally. See Series.__class_getitem__ which imports this class diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index c5de9fb79571f..ea30e5bc88839 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -364,7 +364,8 @@ def schema(self) -> StructType: Examples -------- >>> df.schema - StructType(List(StructField(age,IntegerType,true),StructField(name,StringType,true))) + StructType([StructField('age', IntegerType(), True), + StructField('name', StringType(), True)]) """ if self._schema is None: try: diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py index 5f5e88fd46deb..be5e1d9a6e5dc 100644 --- a/python/pyspark/sql/tests/test_dataframe.py +++ b/python/pyspark/sql/tests/test_dataframe.py @@ -627,7 +627,7 @@ def test_toDF_with_schema_string(self): # field types mismatch will cause exception at runtime. self.assertRaisesRegex( Exception, - "FloatType can not accept", + "FloatType\\(\\) can not accept", lambda: rdd.toDF("key: float, value: string").collect(), ) diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py index 9ae6c3a63457e..d9ad2344ac5db 100644 --- a/python/pyspark/sql/tests/test_types.py +++ b/python/pyspark/sql/tests/test_types.py @@ -949,6 +949,29 @@ def assertCollectSuccess(typecode, value): a = array.array(t) self.spark.createDataFrame([Row(myarray=a)]).collect() + def test_repr(self): + instances = [ + NullType(), + StringType(), + BinaryType(), + BooleanType(), + DateType(), + TimestampType(), + DecimalType(), + DoubleType(), + FloatType(), + ByteType(), + IntegerType(), + LongType(), + ShortType(), + ArrayType(StringType()), + MapType(StringType(), IntegerType()), + StructField("f1", StringType(), True), + StructType([StructField("f1", StringType(), True)]), + ] + for instance in instances: + self.assertEqual(eval(repr(instance)), instance) + def test_daytime_interval_type_constructor(self): # SPARK-37277: Test constructors in day time interval. self.assertEqual(DayTimeIntervalType().simpleString(), "interval day to second") diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 41db22b054049..23e54eb8889d9 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -80,7 +80,7 @@ class DataType: """Base class for data types.""" def __repr__(self) -> str: - return self.__class__.__name__ + return self.__class__.__name__ + "()" def __hash__(self) -> int: return hash(str(self)) @@ -364,7 +364,7 @@ def _str_repr(self) -> str: jsonValue = _str_repr def __repr__(self) -> str: - return "%s(%d,%d)" % (type(self).__name__, self.startField, self.endField) + return "%s(%d, %d)" % (type(self).__name__, self.startField, self.endField) def needConversion(self) -> bool: return True @@ -415,7 +415,7 @@ def simpleString(self) -> str: return "array<%s>" % self.elementType.simpleString() def __repr__(self) -> str: - return "ArrayType(%s,%s)" % (self.elementType, str(self.containsNull).lower()) + return "ArrayType(%s, %s)" % (self.elementType, str(self.containsNull)) def jsonValue(self) -> Dict[str, Any]: return { @@ -485,11 +485,7 @@ def simpleString(self) -> str: return "map<%s,%s>" % (self.keyType.simpleString(), self.valueType.simpleString()) def __repr__(self) -> str: - return "MapType(%s,%s,%s)" % ( - self.keyType, - self.valueType, - str(self.valueContainsNull).lower(), - ) + return "MapType(%s, %s, %s)" % (self.keyType, self.valueType, str(self.valueContainsNull)) def jsonValue(self) -> Dict[str, Any]: return { @@ -570,7 +566,7 @@ def simpleString(self) -> str: return "%s:%s" % (self.name, self.dataType.simpleString()) def __repr__(self) -> str: - return "StructField(%s,%s,%s)" % (self.name, self.dataType, str(self.nullable).lower()) + return "StructField('%s', %s, %s)" % (self.name, self.dataType, str(self.nullable)) def jsonValue(self) -> Dict[str, Any]: return { @@ -616,9 +612,9 @@ class StructType(DataType): -------- >>> struct1 = StructType([StructField("f1", StringType(), True)]) >>> struct1["f1"] - StructField(f1,StringType,true) + StructField('f1', StringType(), True) >>> struct1[0] - StructField(f1,StringType,true) + StructField('f1', StringType(), True) >>> struct1 = StructType([StructField("f1", StringType(), True)]) >>> struct2 = StructType([StructField("f1", StringType(), True)]) @@ -753,7 +749,7 @@ def simpleString(self) -> str: return "struct<%s>" % (",".join(f.simpleString() for f in self)) def __repr__(self) -> str: - return "StructType(List(%s))" % ",".join(str(field) for field in self) + return "StructType([%s])" % ", ".join(str(field) for field in self) def jsonValue(self) -> Dict[str, Any]: return {"type": self.typeName(), "fields": [f.jsonValue() for f in self]} @@ -979,17 +975,17 @@ def _parse_datatype_string(s: str) -> DataType: Examples -------- >>> _parse_datatype_string("int ") - IntegerType + IntegerType() >>> _parse_datatype_string("INT ") - IntegerType + IntegerType() >>> _parse_datatype_string("a: byte, b: decimal( 16 , 8 ) ") - StructType(List(StructField(a,ByteType,true),StructField(b,DecimalType(16,8),true))) + StructType([StructField('a', ByteType(), True), StructField('b', DecimalType(16,8), True)]) >>> _parse_datatype_string("a DOUBLE, b STRING") - StructType(List(StructField(a,DoubleType,true),StructField(b,StringType,true))) + StructType([StructField('a', DoubleType(), True), StructField('b', StringType(), True)]) >>> _parse_datatype_string("a: array< short>") - StructType(List(StructField(a,ArrayType(ShortType,true),true))) + StructType([StructField('a', ArrayType(ShortType(), True), True)]) >>> _parse_datatype_string(" map ") - MapType(StringType,StringType,true) + MapType(StringType(), StringType(), True) >>> # Error cases >>> _parse_datatype_string("blabla") # doctest: +IGNORE_EXCEPTION_DETAIL From bb0e8e2164188183eb2fb6aa23c45882151fd77f Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Thu, 24 Mar 2022 07:18:27 +0800 Subject: [PATCH 042/535] [SPARK-32268][TESTS][FOLLOWUP] Fix `BloomFilterAggregateQuerySuite` failed in ansi mode ### What changes were proposed in this pull request? `Test that might_contain errors out non-constant Bloom filter` in `BloomFilterAggregateQuerySuite ` failed in ansi mode due to `Numeric <=> Binary` is [not allowed in ansi mode](https://github.com/apache/spark/pull/30260), so the content of `exception.getMessage` is different from that of non-ans mode. This pr change the case to ensure that the error messages of `ansi` mode and `non-ansi` are consistent. ### Why are the changes needed? Bug fix. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Pass GA - Local Test **Before** ``` export SPARK_ANSI_SQL_MODE=false mvn clean test -pl sql/core -am -Dtest=none -DwildcardSuites=org.apache.spark.sql.BloomFilterAggregateQuerySuite ``` ``` Run completed in 23 seconds, 537 milliseconds. Total number of tests run: 8 Suites: completed 2, aborted 0 Tests: succeeded 8, failed 0, canceled 0, ignored 0, pending 0 All tests passed. ``` ``` export SPARK_ANSI_SQL_MODE=true mvn clean test -pl sql/core -am -Dtest=none -DwildcardSuites=org.apache.spark.sql.BloomFilterAggregateQuerySuite ``` ``` - Test that might_contain errors out non-constant Bloom filter *** FAILED *** "cannot resolve 'CAST(t.a AS BINARY)' due to data type mismatch: cannot cast bigint to binary with ANSI mode on. If you have to cast bigint to binary, you can set spark.sql.ansi.enabled as false. ; line 2 pos 21; 'Project [unresolvedalias('might_contain(cast(a#2424L as binary), cast(5 as bigint)), None)] +- SubqueryAlias t +- LocalRelation [a#2424L] " did not contain "The Bloom filter binary input to might_contain should be either a constant value or a scalar subquery expression" (BloomFilterAggregateQuerySuite.scala:171) ``` **After** ``` export SPARK_ANSI_SQL_MODE=false mvn clean test -pl sql/core -am -Dtest=none -DwildcardSuites=org.apache.spark.sql.BloomFilterAggregateQuerySuite ``` ``` Run completed in 26 seconds, 544 milliseconds. Total number of tests run: 8 Suites: completed 2, aborted 0 Tests: succeeded 8, failed 0, canceled 0, ignored 0, pending 0 All tests passed. ``` ``` export SPARK_ANSI_SQL_MODE=true mvn clean test -pl sql/core -am -Dtest=none -DwildcardSuites=org.apache.spark.sql.BloomFilterAggregateQuerySuite ``` ``` Run completed in 25 seconds, 289 milliseconds. Total number of tests run: 8 Suites: completed 2, aborted 0 Tests: succeeded 8, failed 0, canceled 0, ignored 0, pending 0 All tests passed. ``` Closes #35953 from LuciferYang/SPARK-32268-FOLLOWUP. Authored-by: yangjie01 Signed-off-by: Yuming Wang (cherry picked from commit 716512364468cef3c12a85403661de2837cc6fe5) Signed-off-by: Yuming Wang --- .../org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala index 025593be4c959..7fc89ecc88ba3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala @@ -165,7 +165,7 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession { val exception1 = intercept[AnalysisException] { spark.sql(""" |SELECT might_contain(cast(a as binary), cast(5 as long)) - |FROM values (cast(1 as long)), (cast(2 as long)) as t(a)""" + |FROM values (cast(1 as string)), (cast(2 as string)) as t(a)""" .stripMargin) } assert(exception1.getMessage.contains( @@ -175,7 +175,7 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession { val exception2 = intercept[AnalysisException] { spark.sql(""" |SELECT might_contain((select cast(a as binary)), cast(5 as long)) - |FROM values (cast(1 as long)), (cast(2 as long)) as t(a)""" + |FROM values (cast(1 as string)), (cast(2 as string)) as t(a)""" .stripMargin) } assert(exception2.getMessage.contains( From c6cd2cc7f92f3905f7cc2d2517d7b7746e286a69 Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Thu, 24 Mar 2022 10:07:43 +0800 Subject: [PATCH 043/535] [SPARK-38625][SQL] DataSource V2: Add APIs for group-based row-level operations ### What changes were proposed in this pull request? This PR contains row-level operation APIs for V2 data sources that can replace groups of data (e.g. files, partitions). It is a subset of the changes already reviewed in #35395. ### Why are the changes needed? These changes are needed to support row-level operations in Spark per SPIP [SPARK-35801](https://issues.apache.org/jira/browse/SPARK-35801). ### Does this PR introduce _any_ user-facing change? Yes, this PR adds new Data Source V2 APIs. ### How was this patch tested? Not applicable. Closes #35940 from aokolnychyi/spark-38625. Authored-by: Anton Okolnychyi Signed-off-by: Wenchen Fan (cherry picked from commit 6743aaaef9fff46c009cc235dd87d0508c5e5ba8) Signed-off-by: Wenchen Fan --- .../catalog/SupportsRowLevelOperations.java | 42 +++++++++ .../connector/write/RowLevelOperation.java | 92 +++++++++++++++++++ .../write/RowLevelOperationBuilder.java | 34 +++++++ .../write/RowLevelOperationInfo.java | 40 ++++++++ 4 files changed, 208 insertions(+) create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsRowLevelOperations.java create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperation.java create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperationBuilder.java create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperationInfo.java diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsRowLevelOperations.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsRowLevelOperations.java new file mode 100644 index 0000000000000..323d52fbbeea3 --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsRowLevelOperations.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.catalog; + +import org.apache.spark.annotation.Experimental; +import org.apache.spark.sql.connector.write.RowLevelOperationBuilder; +import org.apache.spark.sql.connector.write.RowLevelOperation; +import org.apache.spark.sql.connector.write.RowLevelOperationInfo; + +/** + * A mix-in interface for {@link Table} row-level operations support. Data sources can implement + * this interface to indicate they support rewriting data for DELETE, UPDATE, MERGE operations. + * + * @since 3.3.0 + */ +@Experimental +public interface SupportsRowLevelOperations extends Table { + /** + * Returns a {@link RowLevelOperationBuilder} to build a {@link RowLevelOperation}. + * Spark will call this method while planning DELETE, UPDATE and MERGE operations + * that require rewriting data. + * + * @param info the row-level operation info such as command (e.g. DELETE) and options + * @return the row-level operation builder + */ + RowLevelOperationBuilder newRowLevelOperationBuilder(RowLevelOperationInfo info); +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperation.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperation.java new file mode 100644 index 0000000000000..04bbab11e10d7 --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperation.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.write; + +import org.apache.spark.annotation.Experimental; +import org.apache.spark.sql.connector.expressions.NamedReference; +import org.apache.spark.sql.connector.read.Scan; +import org.apache.spark.sql.connector.read.ScanBuilder; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; + +/** + * A logical representation of a data source DELETE, UPDATE, or MERGE operation that requires + * rewriting data. + * + * @since 3.3.0 + */ +@Experimental +public interface RowLevelOperation { + + /** + * A row-level SQL command. + */ + enum Command { + DELETE, UPDATE, MERGE + } + + /** + * Returns the description associated with this row-level operation. + */ + default String description() { + return this.getClass().toString(); + } + + /** + * Returns the SQL command that is being performed. + */ + Command command(); + + /** + * Returns a {@link ScanBuilder} to configure a {@link Scan} for this row-level operation. + *

        + * Data sources fall into two categories: those that can handle a delta of rows and those that + * need to replace groups (e.g. partitions, files). Data sources that handle deltas allow Spark + * to quickly discard unchanged rows and have no requirements for input scans. Data sources that + * replace groups of rows can discard deleted rows but need to keep unchanged rows to be passed + * back into the source. This means that scans for such data sources must produce all rows + * in a group if any are returned. Some data sources will avoid pushing filters into files (file + * granularity), while others will avoid pruning files within a partition (partition granularity). + *

        + * For example, if a data source can only replace partitions, all rows from a partition must + * be returned by the scan, even if a filter can narrow the set of changes to a single file + * in the partition. Similarly, a data source that can swap individual files must produce all + * rows from files where at least one record must be changed, not just rows that must be changed. + */ + ScanBuilder newScanBuilder(CaseInsensitiveStringMap options); + + /** + * Returns a {@link WriteBuilder} to configure a {@link Write} for this row-level operation. + *

        + * Note that Spark will first configure the scan and then the write, allowing data sources to pass + * information from the scan to the write. For example, the scan can report which condition was + * used to read the data that may be needed by the write under certain isolation levels. + * Implementations may capture the built scan or required scan information and then use it + * while building the write. + */ + WriteBuilder newWriteBuilder(LogicalWriteInfo info); + + /** + * Returns metadata attributes that are required to perform this row-level operation. + *

        + * Data sources that can use this method to project metadata columns needed for writing + * the data back (e.g. metadata columns for grouping data). + */ + default NamedReference[] requiredMetadataAttributes() { + return new NamedReference[0]; + } +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperationBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperationBuilder.java new file mode 100644 index 0000000000000..bc2f5778037f1 --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperationBuilder.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.write; + +import org.apache.spark.annotation.Experimental; + +/** + * An interface for building a {@link RowLevelOperation}. + * + * @since 3.3.0 + */ +@Experimental +public interface RowLevelOperationBuilder { + /** + * Returns a {@link RowLevelOperation} that controls how Spark rewrites data + * for DELETE, UPDATE, MERGE commands. + */ + RowLevelOperation build(); +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperationInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperationInfo.java new file mode 100644 index 0000000000000..e3d7397aed91b --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperationInfo.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.write; + +import org.apache.spark.annotation.Experimental; +import org.apache.spark.sql.connector.write.RowLevelOperation.Command; +import org.apache.spark.sql.util.CaseInsensitiveStringMap; + +/** + * An interface with logical information for a row-level operation such as DELETE, UPDATE, MERGE. + * + * @since 3.3.0 + */ +@Experimental +public interface RowLevelOperationInfo { + /** + * Returns options that the user specified when performing the row-level operation. + */ + CaseInsensitiveStringMap options(); + + /** + * Returns the row-level SQL command (e.g. DELETE, UPDATE, MERGE). + */ + Command command(); +} From fb9c633db950d7af70376ff1d076d36297e21c36 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Thu, 24 Mar 2022 12:12:38 +0900 Subject: [PATCH 044/535] [SPARK-38631][CORE] Uses Java-based implementation for un-tarring at Utils.unpack ### What changes were proposed in this pull request? This PR proposes to use `FileUtil.unTarUsingJava` that is a Java implementation for un-tar `.tar` files. `unTarUsingJava` is not public but it exists in all Hadoop versions from 2.1+, see HADOOP-9264. The security issue reproduction requires a non-Windows platform, and a non-gzipped TAR archive file name (contents don't matter). ### Why are the changes needed? There is a risk for arbitrary shell command injection via `Utils.unpack` when the filename is controlled by a malicious user. This is due to an issue in Hadoop's `unTar`, that is not properly escaping the filename before passing to a shell command:https://github.com/apache/hadoop/blob/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java#L904 ### Does this PR introduce _any_ user-facing change? Yes, it prevents a security issue that, previously, allowed users to execute arbitrary shall command. ### How was this patch tested? Manually tested in local, and existing test cases should cover. Closes #35946 from HyukjinKwon/SPARK-38631. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 057c051285ec32c665fb458d0670c1c16ba536b2) Signed-off-by: Hyukjin Kwon --- .../scala/org/apache/spark/util/Utils.scala | 30 +++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index c8c7ea627b864..24b3d2b6191e3 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -600,16 +600,42 @@ private[spark] object Utils extends Logging { if (lowerSrc.endsWith(".jar")) { RunJar.unJar(source, dest, RunJar.MATCH_ANY) } else if (lowerSrc.endsWith(".zip")) { + // TODO(SPARK-37677): should keep file permissions. Java implementation doesn't. FileUtil.unZip(source, dest) - } else if ( - lowerSrc.endsWith(".tar.gz") || lowerSrc.endsWith(".tgz") || lowerSrc.endsWith(".tar")) { + } else if (lowerSrc.endsWith(".tar.gz") || lowerSrc.endsWith(".tgz")) { FileUtil.unTar(source, dest) + } else if (lowerSrc.endsWith(".tar")) { + // TODO(SPARK-38632): should keep file permissions. Java implementation doesn't. + unTarUsingJava(source, dest) } else { logWarning(s"Cannot unpack $source, just copying it to $dest.") copyRecursive(source, dest) } } + /** + * The method below was copied from `FileUtil.unTar` but uses Java-based implementation + * to work around a security issue, see also SPARK-38631. + */ + private def unTarUsingJava(source: File, dest: File): Unit = { + if (!dest.mkdirs && !dest.isDirectory) { + throw new IOException(s"Mkdirs failed to create $dest") + } else { + try { + // Should not fail because all Hadoop 2.1+ (from HADOOP-9264) + // have 'unTarUsingJava'. + val mth = classOf[FileUtil].getDeclaredMethod( + "unTarUsingJava", classOf[File], classOf[File], classOf[Boolean]) + mth.setAccessible(true) + mth.invoke(null, source, dest, java.lang.Boolean.FALSE) + } catch { + // Re-throw the original exception. + case e: java.lang.reflect.InvocationTargetException if e.getCause != null => + throw e.getCause + } + } + } + /** Records the duration of running `body`. */ def timeTakenMs[T](body: => T): (T, Long) = { val startTime = System.nanoTime() From 072968d730863e89635c903999a397fc0233ea87 Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Thu, 24 Mar 2022 14:28:32 +0800 Subject: [PATCH 045/535] [SPARK-38063][SQL] Support split_part Function ### What changes were proposed in this pull request? `split_part()` is a commonly supported function by other systems such as Postgres and some other systems. The Spark equivalent is `element_at(split(arg, delim), part)` ### Why are the changes needed? Adding new SQL function. ### Does this PR introduce _any_ user-facing change? Yes. This PR adds a new function so there is no previous behavior. The following demonstrates more about the new function: syntax: `split_part(str, delimiter, partNum)` This function splits `str` by `delimiter` and return requested part of the split (1-based). If any input is null, returns null. If the index is out of range of split parts, returns empty string. If index is 0, throws an ArrayIndexOutOfBoundsException. `str` and `delimiter` are the same type as `string`. `partNum` is `integer` type Examples: ``` > SELECT _FUNC_('11.12.13', '.', 3); 13 > SELECT _FUNC_(NULL, '.', 3); NULL ``` ### How was this patch tested? Unit Test Closes #35352 from amaliujia/splitpart. Authored-by: Rui Wang Signed-off-by: Wenchen Fan (cherry picked from commit 3858bf0fbd02e3d8fd18e967f3841c50b9294414) Signed-off-by: Wenchen Fan --- .../apache/spark/unsafe/types/UTF8String.java | 21 ++++- .../catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/collectionOperations.scala | 22 ++++- .../expressions/stringExpressions.scala | 75 ++++++++++++++++- .../sql-functions/sql-expression-schema.md | 3 +- .../sql-tests/inputs/string-functions.sql | 12 +++ .../results/ansi/string-functions.sql.out | 83 ++++++++++++++++++- .../results/string-functions.sql.out | 83 ++++++++++++++++++- 8 files changed, 291 insertions(+), 9 deletions(-) diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index 98c61cfd9bb9b..0f9d653a0eb32 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -23,6 +23,7 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Map; +import java.util.regex.Pattern; import com.esotericsoftware.kryo.Kryo; import com.esotericsoftware.kryo.KryoSerializable; @@ -999,13 +1000,31 @@ public static UTF8String concatWs(UTF8String separator, UTF8String... inputs) { } public UTF8String[] split(UTF8String pattern, int limit) { + return split(pattern.toString(), limit); + } + + public UTF8String[] splitSQL(UTF8String delimiter, int limit) { + // if delimiter is empty string, skip the regex based splitting directly as regex + // treats empty string as matching anything, thus use the input directly. + if (delimiter.numBytes() == 0) { + return new UTF8String[]{this}; + } else { + // we do not treat delimiter as a regex but consider the whole string of delimiter + // as the separator to split string. Java String's split, however, only accept + // regex as the pattern to split, thus we can quote the delimiter to escape special + // characters in the string. + return split(Pattern.quote(delimiter.toString()), limit); + } + } + + private UTF8String[] split(String delimiter, int limit) { // Java String's split method supports "ignore empty string" behavior when the limit is 0 // whereas other languages do not. To avoid this java specific behavior, we fall back to // -1 when the limit is 0. if (limit == 0) { limit = -1; } - String[] splits = toString().split(pattern.toString(), limit); + String[] splits = toString().split(delimiter, limit); UTF8String[] res = new UTF8String[splits.length]; for (int i = 0; i < res.length; i++) { res[i] = fromString(splits[i]); diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index a37d4b2dab3c4..a06112a18ef89 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -549,6 +549,7 @@ object FunctionRegistry { expression[SoundEx]("soundex"), expression[StringSpace]("space"), expression[StringSplit]("split"), + expression[SplitPart]("split_part"), expression[Substring]("substr", true), expression[Substring]("substring"), expression[Left]("left"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 363c531b04272..ca008391d1bde 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -2095,10 +2095,12 @@ case class ArrayPosition(left: Expression, right: Expression) case class ElementAt( left: Expression, right: Expression, + // The value to return if index is out of bound + defaultValueOutOfBound: Option[Literal] = None, failOnError: Boolean = SQLConf.get.ansiEnabled) extends GetMapValueUtil with GetArrayItemUtil with NullIntolerant { - def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled) + def this(left: Expression, right: Expression) = this(left, right, None, SQLConf.get.ansiEnabled) @transient private lazy val mapKeyType = left.dataType.asInstanceOf[MapType].keyType @@ -2179,7 +2181,10 @@ case class ElementAt( if (failOnError) { throw QueryExecutionErrors.invalidElementAtIndexError(index, array.numElements()) } else { - null + defaultValueOutOfBound match { + case Some(value) => value.eval() + case None => null + } } } else { val idx = if (index == 0) { @@ -2218,7 +2223,16 @@ case class ElementAt( val indexOutOfBoundBranch = if (failOnError) { s"throw QueryExecutionErrors.invalidElementAtIndexError($index, $eval1.numElements());" } else { - s"${ev.isNull} = true;" + defaultValueOutOfBound match { + case Some(value) => + val defaultValueEval = value.genCode(ctx) + s""" + ${defaultValueEval.code} + ${ev.isNull} = ${defaultValueEval.isNull} + ${ev.value} = ${defaultValueEval.value} + """.stripMargin + case None => s"${ev.isNull} = true;" + } } s""" @@ -2278,7 +2292,7 @@ case class ElementAt( case class TryElementAt(left: Expression, right: Expression, replacement: Expression) extends RuntimeReplaceable with InheritAnalysisRules { def this(left: Expression, right: Expression) = - this(left, right, ElementAt(left, right, failOnError = false)) + this(left, right, ElementAt(left, right, None, failOnError = false)) override def prettyName: String = "try_element_at" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index fc73216b296af..a08ab84ac6f4a 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -35,7 +35,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, UPPER_OR_LO import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, TypeUtils} import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types._ +import org.apache.spark.sql.types.{StringType, _} import org.apache.spark.unsafe.UTF8StringBuilder import org.apache.spark.unsafe.array.ByteArrayMethods import org.apache.spark.unsafe.types.{ByteArray, UTF8String} @@ -2943,3 +2943,76 @@ case class Sentences( copy(str = newFirst, language = newSecond, country = newThird) } + +/** + * Splits a given string by a specified delimiter and return splits into a + * GenericArrayData. This expression is different from `split` function as + * `split` takes regex expression as the pattern to split strings while this + * expression take delimiter (a string without carrying special meaning on its + * characters, thus is not treated as regex) to split strings. + */ +case class StringSplitSQL( + str: Expression, + delimiter: Expression) extends BinaryExpression with NullIntolerant { + override def dataType: DataType = ArrayType(StringType, containsNull = false) + override def left: Expression = str + override def right: Expression = delimiter + + override def nullSafeEval(string: Any, delimiter: Any): Any = { + val strings = string.asInstanceOf[UTF8String].splitSQL( + delimiter.asInstanceOf[UTF8String], -1); + new GenericArrayData(strings.asInstanceOf[Array[Any]]) + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val arrayClass = classOf[GenericArrayData].getName + nullSafeCodeGen(ctx, ev, (str, delimiter) => { + // Array in java is covariant, so we don't need to cast UTF8String[] to Object[]. + s"${ev.value} = new $arrayClass($str.splitSQL($delimiter,-1));" + }) + } + + override def withNewChildrenInternal( + newFirst: Expression, newSecond: Expression): StringSplitSQL = + copy(str = newFirst, delimiter = newSecond) +} + +/** + * Splits a given string by a specified delimiter and returns the requested part. + * If any input is null, returns null. + * If index is out of range of split parts, return empty string. + * If index is 0, throws an ArrayIndexOutOfBoundsException. + */ +@ExpressionDescription( + usage = + """ + _FUNC_(str, delimiter, partNum) - Splits `str` by delimiter and return + requested part of the split (1-based). If any input is null, returns null. + if `partNum` is out of range of split parts, returns empty string. If `partNum` is 0, + throws an error. If `partNum` is negative, the parts are counted backward from the + end of the string. If the `delimiter` is an empty string, the `str` is not split. + """, + examples = + """ + Examples: + > SELECT _FUNC_('11.12.13', '.', 3); + 13 + """, + since = "3.3.0", + group = "string_funcs") +case class SplitPart ( + str: Expression, + delimiter: Expression, + partNum: Expression) + extends RuntimeReplaceable with ImplicitCastInputTypes { + override lazy val replacement: Expression = + ElementAt(StringSplitSQL(str, delimiter), partNum, Some(Literal.create("", StringType)), + false) + override def nodeName: String = "split_part" + override def inputTypes: Seq[DataType] = Seq(StringType, StringType, IntegerType) + def children: Seq[Expression] = Seq(str, delimiter, partNum) + protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = { + copy(str = newChildren.apply(0), delimiter = newChildren.apply(1), + partNum = newChildren.apply(2)) + } +} diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 1afba46e16213..166c7613d4af0 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,6 +1,6 @@ ## Summary - - Number of queries: 383 + - Number of queries: 384 - Number of expressions that missing example: 12 - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint ## Schema of Built-in Functions @@ -275,6 +275,7 @@ | org.apache.spark.sql.catalyst.expressions.SoundEx | soundex | SELECT soundex('Miller') | struct | | org.apache.spark.sql.catalyst.expressions.SparkPartitionID | spark_partition_id | SELECT spark_partition_id() | struct | | org.apache.spark.sql.catalyst.expressions.SparkVersion | version | SELECT version() | struct | +| org.apache.spark.sql.catalyst.expressions.SplitPart | split_part | SELECT split_part('11.12.13', '.', 3) | struct | | org.apache.spark.sql.catalyst.expressions.Sqrt | sqrt | SELECT sqrt(4) | struct | | org.apache.spark.sql.catalyst.expressions.Stack | stack | SELECT stack(2, 1, 2, 3) | struct | | org.apache.spark.sql.catalyst.expressions.StartsWithExpressionBuilder$ | startswith | SELECT startswith('Spark SQL', 'Spark') | struct | diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index e7c01a69bc838..7d22e791570c3 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -27,6 +27,18 @@ select right("abcd", -2), right("abcd", 0), right("abcd", 'a'); SELECT split('aa1cc2ee3', '[1-9]+'); SELECT split('aa1cc2ee3', '[1-9]+', 2); +-- split_part function +SELECT split_part('11.12.13', '.', 2); +SELECT split_part('11.12.13', '.', -1); +SELECT split_part('11.12.13', '.', -3); +SELECT split_part('11.12.13', '', 1); +SELECT split_part('11ab12ab13', 'ab', 1); +SELECT split_part('11.12.13', '.', 0); +SELECT split_part('11.12.13', '.', 4); +SELECT split_part('11.12.13', '.', 5); +SELECT split_part('11.12.13', '.', -5); +SELECT split_part(null, '.', 1); + -- substring function SELECT substr('Spark SQL', 5); SELECT substr('Spark SQL', -3); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index b182b5cb6b390..01213bd57ad73 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 131 +-- Number of queries: 141 -- !query @@ -126,6 +126,87 @@ struct> ["aa","cc2ee3"] +-- !query +SELECT split_part('11.12.13', '.', 2) +-- !query schema +struct +-- !query output +12 + + +-- !query +SELECT split_part('11.12.13', '.', -1) +-- !query schema +struct +-- !query output +13 + + +-- !query +SELECT split_part('11.12.13', '.', -3) +-- !query schema +struct +-- !query output +11 + + +-- !query +SELECT split_part('11.12.13', '', 1) +-- !query schema +struct +-- !query output +11.12.13 + + +-- !query +SELECT split_part('11ab12ab13', 'ab', 1) +-- !query schema +struct +-- !query output +11 + + +-- !query +SELECT split_part('11.12.13', '.', 0) +-- !query schema +struct<> +-- !query output +java.lang.ArrayIndexOutOfBoundsException +SQL array indices start at 1 + + +-- !query +SELECT split_part('11.12.13', '.', 4) +-- !query schema +struct +-- !query output + + + +-- !query +SELECT split_part('11.12.13', '.', 5) +-- !query schema +struct +-- !query output + + + +-- !query +SELECT split_part('11.12.13', '.', -5) +-- !query schema +struct +-- !query output + + + +-- !query +SELECT split_part(null, '.', 1) +-- !query schema +struct +-- !query output +NULL + + -- !query SELECT substr('Spark SQL', 5) -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index 4307df7e61683..3a7f197e36234 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 131 +-- Number of queries: 141 -- !query @@ -124,6 +124,87 @@ struct> ["aa","cc2ee3"] +-- !query +SELECT split_part('11.12.13', '.', 2) +-- !query schema +struct +-- !query output +12 + + +-- !query +SELECT split_part('11.12.13', '.', -1) +-- !query schema +struct +-- !query output +13 + + +-- !query +SELECT split_part('11.12.13', '.', -3) +-- !query schema +struct +-- !query output +11 + + +-- !query +SELECT split_part('11.12.13', '', 1) +-- !query schema +struct +-- !query output +11.12.13 + + +-- !query +SELECT split_part('11ab12ab13', 'ab', 1) +-- !query schema +struct +-- !query output +11 + + +-- !query +SELECT split_part('11.12.13', '.', 0) +-- !query schema +struct<> +-- !query output +java.lang.ArrayIndexOutOfBoundsException +SQL array indices start at 1 + + +-- !query +SELECT split_part('11.12.13', '.', 4) +-- !query schema +struct +-- !query output + + + +-- !query +SELECT split_part('11.12.13', '.', 5) +-- !query schema +struct +-- !query output + + + +-- !query +SELECT split_part('11.12.13', '.', -5) +-- !query schema +struct +-- !query output + + + +-- !query +SELECT split_part(null, '.', 1) +-- !query schema +struct +-- !query output +NULL + + -- !query SELECT substr('Spark SQL', 5) -- !query schema From 2238b05cdac131f286222cf35769d4257cfc5a67 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 24 Mar 2022 17:39:08 +0300 Subject: [PATCH 046/535] [SPARK-37568][SQL][3.3] Support 2-arguments by the convert_timezone() function ### What changes were proposed in this pull request? Add new constructor to the `ConvertTimezone` expression (see https://github.com/apache/spark/pull/34817) which accepts only 2 arguments: 1. `` - the time zone to which the input timestamp should be converted. 2. `` - the timestamp to convert. and sets `` to the current session time zone (see the SQL config `spark.sql.session.timeZone`). ### Why are the changes needed? To help users in migrations from other systems to Spark SQL. Other systems support optional first parameter: - https://docs.aws.amazon.com/redshift/latest/dg/CONVERT_TIMEZONE.html - https://docs.snowflake.com/en/sql-reference/functions/convert_timezone.html ### Does this PR introduce _any_ user-facing change? No, it extends the existing signature, and the function hasn't been released yet. ### How was this patch tested? By running new tests: ``` $ build/sbt "sql/test:testOnly org.apache.spark.sql.expressions.ExpressionInfoSuite" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z timestamp-ltz.sql" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z timestamp-ntz.sql" ``` Closes #35957 from MaxGekk/convert_timezone-2-params-3.3. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../sql/catalyst/expressions/datetimeExpressions.scala | 10 ++++++++-- .../test/resources/sql-tests/inputs/timestamp-ltz.sql | 2 ++ .../test/resources/sql-tests/inputs/timestamp-ntz.sql | 1 + .../resources/sql-tests/results/timestamp-ltz.sql.out | 10 +++++++++- .../resources/sql-tests/results/timestamp-ntz.sql.out | 10 +++++++++- 5 files changed, 29 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index fbc670f5a8c81..fc701d4f817ca 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -2999,10 +2999,11 @@ object SubtractDates { // scalastyle:off line.size.limit @ExpressionDescription( - usage = "_FUNC_(sourceTz, targetTz, sourceTs) - Converts the timestamp without time zone `sourceTs` from the `sourceTz` time zone to `targetTz`. ", + usage = "_FUNC_([sourceTz, ]targetTz, sourceTs) - Converts the timestamp without time zone `sourceTs` from the `sourceTz` time zone to `targetTz`. ", arguments = """ Arguments: - * sourceTz - the time zone for the input timestamp + * sourceTz - the time zone for the input timestamp. + If it is missed, the current session time zone is used as the source time zone. * targetTz - the time zone to which the input timestamp should be converted * sourceTs - a timestamp without time zone """, @@ -3010,6 +3011,8 @@ object SubtractDates { Examples: > SELECT _FUNC_('Europe/Amsterdam', 'America/Los_Angeles', timestamp_ntz'2021-12-06 00:00:00'); 2021-12-05 15:00:00 + > SELECT _FUNC_('Europe/Amsterdam', timestamp_ntz'2021-12-05 15:00:00'); + 2021-12-06 00:00:00 """, group = "datetime_funcs", since = "3.3.0") @@ -3020,6 +3023,9 @@ case class ConvertTimezone( sourceTs: Expression) extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant { + def this(targetTz: Expression, sourceTs: Expression) = + this(CurrentTimeZone(), targetTz, sourceTs) + override def first: Expression = sourceTz override def second: Expression = targetTz override def third: Expression = sourceTs diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz.sql index 3c2883ec03024..88ce0baa8cd24 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz.sql @@ -14,3 +14,5 @@ select to_timestamp_ltz(to_timestamp_ntz(null)), to_timestamp_ltz(to_timestamp_n SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678); SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678, 'CET'); SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 60.007); + +SELECT convert_timezone('Europe/Amsterdam', timestamp_ltz'2022-03-23 00:00:00 America/Los_Angeles'); diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql index b7dc2872e50d3..bec31d324e41a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql @@ -17,6 +17,7 @@ SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678, 'CET'); SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 60.007); SELECT convert_timezone('Europe/Moscow', 'America/Los_Angeles', timestamp_ntz'2022-01-01 00:00:00'); +SELECT convert_timezone('Europe/Amsterdam', timestamp_ntz'2022-03-23 00:00:00'); -- Get the difference between timestamps w/o time zone in the specified units select timestampdiff(QUARTER, timestamp_ntz'2022-01-01 01:02:03', timestamp_ntz'2022-05-02 05:06:07'); diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz.sql.out index 057cdf1db845c..c2ede2f8953d5 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 7 +-- Number of queries: 8 -- !query @@ -56,3 +56,11 @@ SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 60.007) struct -- !query output NULL + + +-- !query +SELECT convert_timezone('Europe/Amsterdam', timestamp_ltz'2022-03-23 00:00:00 America/Los_Angeles') +-- !query schema +struct +-- !query output +2022-03-23 08:00:00 diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out index f36ffffe5f74f..146c403b87882 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 12 +-- Number of queries: 13 -- !query @@ -67,6 +67,14 @@ struct +-- !query output +2022-03-23 08:00:00 + + -- !query select timestampdiff(QUARTER, timestamp_ntz'2022-01-01 01:02:03', timestamp_ntz'2022-05-02 05:06:07') -- !query schema From 5582f92046a3486dc6d30e6e4083446fdbd52667 Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Thu, 24 Mar 2022 22:43:32 +0800 Subject: [PATCH 047/535] [SPARK-37463][SQL] Read/Write Timestamp ntz from/to Orc uses int64 ### What changes were proposed in this pull request? #33588 (comment) show Spark cannot read/write timestamp ntz and ltz correctly. Based on the discussion https://github.com/apache/spark/pull/34741#issuecomment-983660633, we just to fix read/write timestamp ntz to Orc uses int64. ### Why are the changes needed? Fix the bug about read/write timestamp ntz from/to Orc with different times zone. ### Does this PR introduce _any_ user-facing change? Yes. Orc timestamp ntz is a new feature. ### How was this patch tested? New tests. Closes #34984 from beliefer/SPARK-37463-int64. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit e410d98f57750080ad46932cc9211d2cf5154c24) Signed-off-by: Wenchen Fan --- .../orc/OrcAtomicColumnVector.java | 10 ---- .../datasources/orc/OrcDeserializer.scala | 5 +- .../datasources/orc/OrcFileFormat.scala | 9 ++-- .../datasources/orc/OrcFilters.scala | 11 ++--- .../datasources/orc/OrcSerializer.scala | 4 +- .../execution/datasources/orc/OrcUtils.scala | 46 ++++++------------- .../parquet/ParquetRowConverter.scala | 4 ++ .../v2/orc/OrcPartitionReaderFactory.scala | 16 +++---- .../datasources/v2/orc/OrcWrite.scala | 2 +- .../datasources/orc/OrcQuerySuite.scala | 26 +++++++++++ 10 files changed, 59 insertions(+), 74 deletions(-) diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcAtomicColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcAtomicColumnVector.java index b4f7b99247158..c2d8334d928c0 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcAtomicColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcAtomicColumnVector.java @@ -27,7 +27,6 @@ import org.apache.spark.sql.types.DateType; import org.apache.spark.sql.types.Decimal; import org.apache.spark.sql.types.TimestampType; -import org.apache.spark.sql.types.TimestampNTZType; import org.apache.spark.sql.vectorized.ColumnarArray; import org.apache.spark.sql.vectorized.ColumnarMap; import org.apache.spark.unsafe.types.UTF8String; @@ -37,7 +36,6 @@ */ public class OrcAtomicColumnVector extends OrcColumnVector { private final boolean isTimestamp; - private final boolean isTimestampNTZ; private final boolean isDate; // Column vector for each type. Only 1 is populated for any type. @@ -56,12 +54,6 @@ public class OrcAtomicColumnVector extends OrcColumnVector { isTimestamp = false; } - if (type instanceof TimestampNTZType) { - isTimestampNTZ = true; - } else { - isTimestampNTZ = false; - } - if (type instanceof DateType) { isDate = true; } else { @@ -113,8 +105,6 @@ public long getLong(int rowId) { int index = getRowIndex(rowId); if (isTimestamp) { return DateTimeUtils.fromJavaTimestamp(timestampData.asScratchTimestamp(index)); - } else if (isTimestampNTZ) { - return OrcUtils.fromOrcNTZ(timestampData.asScratchTimestamp(index)); } else { return longData.vector[index]; } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala index 0c2856cd40cd7..564e42ecd284b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala @@ -105,7 +105,7 @@ class OrcDeserializer( case IntegerType | _: YearMonthIntervalType => (ordinal, value) => updater.setInt(ordinal, value.asInstanceOf[IntWritable].get) - case LongType | _: DayTimeIntervalType => (ordinal, value) => + case LongType | _: DayTimeIntervalType | _: TimestampNTZType => (ordinal, value) => updater.setLong(ordinal, value.asInstanceOf[LongWritable].get) case FloatType => (ordinal, value) => @@ -129,9 +129,6 @@ class OrcDeserializer( case TimestampType => (ordinal, value) => updater.setLong(ordinal, DateTimeUtils.fromJavaTimestamp(value.asInstanceOf[OrcTimestamp])) - case TimestampNTZType => (ordinal, value) => - updater.setLong(ordinal, OrcUtils.fromOrcNTZ(value.asInstanceOf[OrcTimestamp])) - case DecimalType.Fixed(precision, scale) => (ordinal, value) => val v = OrcShimUtils.getDecimal(value) v.changePrecision(precision, scale) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala index 39a8763160530..2b060c9015317 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala @@ -142,11 +142,10 @@ class OrcFileFormat val fs = filePath.getFileSystem(conf) val readerOptions = OrcFile.readerOptions(conf).filesystem(fs) - val resultedColPruneInfo = - Utils.tryWithResource(OrcFile.createReader(filePath, readerOptions)) { reader => - OrcUtils.requestedColumnIds( - isCaseSensitive, dataSchema, requiredSchema, reader, conf) - } + val orcSchema = + Utils.tryWithResource(OrcFile.createReader(filePath, readerOptions))(_.getSchema) + val resultedColPruneInfo = OrcUtils.requestedColumnIds( + isCaseSensitive, dataSchema, requiredSchema, orcSchema, conf) if (resultedColPruneInfo.isEmpty) { Iterator.empty diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala index 0d85a45dbfa03..4bb1c187c45fa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.execution.datasources.orc -import java.sql.Timestamp import java.time.{Duration, Instant, LocalDate, LocalDateTime, Period} import org.apache.hadoop.hive.common.`type`.HiveDecimal @@ -143,11 +142,11 @@ private[sql] object OrcFilters extends OrcFiltersBase { def getPredicateLeafType(dataType: DataType): PredicateLeaf.Type = dataType match { case BooleanType => PredicateLeaf.Type.BOOLEAN case ByteType | ShortType | IntegerType | LongType | - _: AnsiIntervalType => PredicateLeaf.Type.LONG + _: AnsiIntervalType | TimestampNTZType => PredicateLeaf.Type.LONG case FloatType | DoubleType => PredicateLeaf.Type.FLOAT case StringType => PredicateLeaf.Type.STRING case DateType => PredicateLeaf.Type.DATE - case TimestampType | TimestampNTZType => PredicateLeaf.Type.TIMESTAMP + case TimestampType => PredicateLeaf.Type.TIMESTAMP case _: DecimalType => PredicateLeaf.Type.DECIMAL case _ => throw QueryExecutionErrors.unsupportedOperationForDataTypeError(dataType) } @@ -170,11 +169,7 @@ private[sql] object OrcFilters extends OrcFiltersBase { case _: TimestampType if value.isInstanceOf[Instant] => toJavaTimestamp(instantToMicros(value.asInstanceOf[Instant])) case _: TimestampNTZType if value.isInstanceOf[LocalDateTime] => - val orcTimestamp = OrcUtils.toOrcNTZ(localDateTimeToMicros(value.asInstanceOf[LocalDateTime])) - // Hive meets OrcTimestamp will throw ClassNotFoundException, So convert it. - val timestamp = new Timestamp(orcTimestamp.getTime) - timestamp.setNanos(orcTimestamp.getNanos) - timestamp + localDateTimeToMicros(value.asInstanceOf[LocalDateTime]) case _: YearMonthIntervalType => IntervalUtils.periodToMonths(value.asInstanceOf[Period]).longValue() case _: DayTimeIntervalType => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala index a928cd9a4f00a..5ed73c3f78b1d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcSerializer.scala @@ -98,7 +98,7 @@ class OrcSerializer(dataSchema: StructType) { } - case LongType | _: DayTimeIntervalType => + case LongType | _: DayTimeIntervalType | _: TimestampNTZType => if (reuseObj) { val result = new LongWritable() (getter, ordinal) => @@ -147,8 +147,6 @@ class OrcSerializer(dataSchema: StructType) { result.setNanos(ts.getNanos) result - case TimestampNTZType => (getter, ordinal) => OrcUtils.toOrcNTZ(getter.getLong(ordinal)) - case DecimalType.Fixed(precision, scale) => OrcShimUtils.getHiveDecimalWritable(precision, scale) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala index 1f05117462db8..a68ce1a86367f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.execution.datasources.orc import java.nio.charset.StandardCharsets.UTF_8 -import java.sql.Timestamp import java.util.Locale import scala.collection.JavaConverters._ @@ -29,7 +28,6 @@ import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.hadoop.hive.serde2.io.DateWritable import org.apache.hadoop.io.{BooleanWritable, ByteWritable, DoubleWritable, FloatWritable, IntWritable, LongWritable, ShortWritable, WritableComparable} import org.apache.orc.{BooleanColumnStatistics, ColumnStatistics, DateColumnStatistics, DoubleColumnStatistics, IntegerColumnStatistics, OrcConf, OrcFile, Reader, TypeDescription, Writer} -import org.apache.orc.mapred.OrcTimestamp import org.apache.spark.{SPARK_VERSION_SHORT, SparkException} import org.apache.spark.deploy.SparkHadoopUtil @@ -39,8 +37,8 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.caseSensitiveResolution import org.apache.spark.sql.catalyst.expressions.JoinedRow import org.apache.spark.sql.catalyst.parser.CatalystSqlParser -import org.apache.spark.sql.catalyst.util.{quoteIdentifier, CharVarcharUtils, DateTimeUtils} -import org.apache.spark.sql.catalyst.util.DateTimeConstants._ +import org.apache.spark.sql.catalyst.util.CharVarcharUtils +import org.apache.spark.sql.catalyst.util.quoteIdentifier import org.apache.spark.sql.connector.expressions.aggregate.{Aggregation, Count, CountStar, Max, Min} import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.execution.datasources.{AggregatePushDownUtils, SchemaMergeUtils} @@ -199,7 +197,7 @@ object OrcUtils extends Logging { isCaseSensitive: Boolean, dataSchema: StructType, requiredSchema: StructType, - reader: Reader, + orcSchema: TypeDescription, conf: Configuration): Option[(Array[Int], Boolean)] = { def checkTimestampCompatibility(orcCatalystSchema: StructType, dataSchema: StructType): Unit = { orcCatalystSchema.fields.map(_.dataType).zip(dataSchema.fields.map(_.dataType)).foreach { @@ -212,7 +210,6 @@ object OrcUtils extends Logging { } } - val orcSchema = reader.getSchema checkTimestampCompatibility(toCatalystSchema(orcSchema), dataSchema) val orcFieldNames = orcSchema.getFieldNames.asScala val forcePositionalEvolution = OrcConf.FORCE_POSITIONAL_EVOLUTION.getBoolean(conf) @@ -261,7 +258,6 @@ object OrcUtils extends Logging { if (matchedOrcFields.size > 1) { // Need to fail if there is ambiguity, i.e. more than one field is matched. val matchedOrcFieldsString = matchedOrcFields.mkString("[", ", ", "]") - reader.close() throw QueryExecutionErrors.foundDuplicateFieldInCaseInsensitiveModeError( requiredFieldName, matchedOrcFieldsString) } else { @@ -285,18 +281,17 @@ object OrcUtils extends Logging { * Given a `StructType` object, this methods converts it to corresponding string representation * in ORC. */ - def orcTypeDescriptionString(dt: DataType): String = dt match { + def getOrcSchemaString(dt: DataType): String = dt match { case s: StructType => val fieldTypes = s.fields.map { f => - s"${quoteIdentifier(f.name)}:${orcTypeDescriptionString(f.dataType)}" + s"${quoteIdentifier(f.name)}:${getOrcSchemaString(f.dataType)}" } s"struct<${fieldTypes.mkString(",")}>" case a: ArrayType => - s"array<${orcTypeDescriptionString(a.elementType)}>" + s"array<${getOrcSchemaString(a.elementType)}>" case m: MapType => - s"map<${orcTypeDescriptionString(m.keyType)},${orcTypeDescriptionString(m.valueType)}>" - case TimestampNTZType => TypeDescription.Category.TIMESTAMP.getName - case _: DayTimeIntervalType => LongType.catalogString + s"map<${getOrcSchemaString(m.keyType)},${getOrcSchemaString(m.valueType)}>" + case _: DayTimeIntervalType | _: TimestampNTZType => LongType.catalogString case _: YearMonthIntervalType => IntegerType.catalogString case _ => dt.catalogString } @@ -306,16 +301,14 @@ object OrcUtils extends Logging { dt match { case y: YearMonthIntervalType => val typeDesc = new TypeDescription(TypeDescription.Category.INT) - typeDesc.setAttribute( - CATALYST_TYPE_ATTRIBUTE_NAME, y.typeName) + typeDesc.setAttribute(CATALYST_TYPE_ATTRIBUTE_NAME, y.typeName) Some(typeDesc) case d: DayTimeIntervalType => val typeDesc = new TypeDescription(TypeDescription.Category.LONG) - typeDesc.setAttribute( - CATALYST_TYPE_ATTRIBUTE_NAME, d.typeName) + typeDesc.setAttribute(CATALYST_TYPE_ATTRIBUTE_NAME, d.typeName) Some(typeDesc) case n: TimestampNTZType => - val typeDesc = new TypeDescription(TypeDescription.Category.TIMESTAMP) + val typeDesc = new TypeDescription(TypeDescription.Category.LONG) typeDesc.setAttribute(CATALYST_TYPE_ATTRIBUTE_NAME, n.typeName) Some(typeDesc) case t: TimestampType => @@ -378,9 +371,9 @@ object OrcUtils extends Logging { partitionSchema: StructType, conf: Configuration): String = { val resultSchemaString = if (canPruneCols) { - OrcUtils.orcTypeDescriptionString(resultSchema) + OrcUtils.getOrcSchemaString(resultSchema) } else { - OrcUtils.orcTypeDescriptionString(StructType(dataSchema.fields ++ partitionSchema.fields)) + OrcUtils.getOrcSchemaString(StructType(dataSchema.fields ++ partitionSchema.fields)) } OrcConf.MAPRED_INPUT_SCHEMA.setString(conf, resultSchemaString) resultSchemaString @@ -532,17 +525,4 @@ object OrcUtils extends Logging { resultRow } } - - def fromOrcNTZ(ts: Timestamp): Long = { - DateTimeUtils.millisToMicros(ts.getTime) + - (ts.getNanos / NANOS_PER_MICROS) % MICROS_PER_MILLIS - } - - def toOrcNTZ(micros: Long): OrcTimestamp = { - val seconds = Math.floorDiv(micros, MICROS_PER_SECOND) - val nanos = (micros - seconds * MICROS_PER_SECOND) * NANOS_PER_MICROS - val result = new OrcTimestamp(seconds * MILLIS_PER_SECOND) - result.setNanos(nanos.toInt) - result - } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala index 63ad5ed6db82e..a955dd6fc76a3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala @@ -358,6 +358,8 @@ private[parquet] class ParquetRowConverter( case StringType => new ParquetStringConverter(updater) + // As long as the parquet type is INT64 timestamp, whether logical annotation + // `isAdjustedToUTC` is false or true, it will be read as Spark's TimestampLTZ type case TimestampType if parquetType.getLogicalTypeAnnotation.isInstanceOf[TimestampLogicalTypeAnnotation] && parquetType.getLogicalTypeAnnotation @@ -368,6 +370,8 @@ private[parquet] class ParquetRowConverter( } } + // As long as the parquet type is INT64 timestamp, whether logical annotation + // `isAdjustedToUTC` is false or true, it will be read as Spark's TimestampLTZ type case TimestampType if parquetType.getLogicalTypeAnnotation.isInstanceOf[TimestampLogicalTypeAnnotation] && parquetType.getLogicalTypeAnnotation diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala index ec6a3bbc26187..ef13beaf9b413 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala @@ -88,11 +88,9 @@ case class OrcPartitionReaderFactory( } val filePath = new Path(new URI(file.filePath)) - val resultedColPruneInfo = - Utils.tryWithResource(createORCReader(filePath, conf)) { reader => - OrcUtils.requestedColumnIds( - isCaseSensitive, dataSchema, readDataSchema, reader, conf) - } + val orcSchema = Utils.tryWithResource(createORCReader(filePath, conf))(_.getSchema) + val resultedColPruneInfo = OrcUtils.requestedColumnIds( + isCaseSensitive, dataSchema, readDataSchema, orcSchema, conf) if (resultedColPruneInfo.isEmpty) { new EmptyPartitionReader[InternalRow] @@ -131,11 +129,9 @@ case class OrcPartitionReaderFactory( } val filePath = new Path(new URI(file.filePath)) - val resultedColPruneInfo = - Utils.tryWithResource(createORCReader(filePath, conf)) { reader => - OrcUtils.requestedColumnIds( - isCaseSensitive, dataSchema, readDataSchema, reader, conf) - } + val orcSchema = Utils.tryWithResource(createORCReader(filePath, conf))(_.getSchema) + val resultedColPruneInfo = OrcUtils.requestedColumnIds( + isCaseSensitive, dataSchema, readDataSchema, orcSchema, conf) if (resultedColPruneInfo.isEmpty) { new EmptyPartitionReader diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWrite.scala index 1ac9266e8d5e7..63c20abb95ebe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWrite.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWrite.scala @@ -43,7 +43,7 @@ case class OrcWrite( val conf = job.getConfiguration - conf.set(MAPRED_OUTPUT_SCHEMA.getAttribute, OrcUtils.orcTypeDescriptionString(dataSchema)) + conf.set(MAPRED_OUTPUT_SCHEMA.getAttribute, OrcUtils.getOrcSchemaString(dataSchema)) conf.set(COMPRESS.getAttribute, orcOptions.compressionCodec) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala index 49b7cfa9d3724..f093a5fc73526 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala @@ -35,6 +35,7 @@ import org.apache.orc.mapreduce.OrcInputFormat import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, RecordReaderIterator} import org.apache.spark.sql.execution.datasources.v2.BatchScanExec @@ -803,6 +804,31 @@ abstract class OrcQuerySuite extends OrcQueryTest with SharedSparkSession { } } } + + test("SPARK-37463: read/write Timestamp ntz to Orc with different time zone") { + DateTimeTestUtils.withDefaultTimeZone(DateTimeTestUtils.LA) { + val sqlText = """ + |select + | timestamp_ntz '2021-06-01 00:00:00' ts_ntz1, + | timestamp_ntz '1883-11-16 00:00:00.0' as ts_ntz2, + | timestamp_ntz '2021-03-14 02:15:00.0' as ts_ntz3 + |""".stripMargin + + val df = sql(sqlText) + + df.write.mode("overwrite").orc("ts_ntz_orc") + + val query = "select * from `orc`.`ts_ntz_orc`" + + DateTimeTestUtils.outstandingZoneIds.foreach { zoneId => + DateTimeTestUtils.withDefaultTimeZone(zoneId) { + withAllNativeOrcReaders { + checkAnswer(sql(query), df) + } + } + } + } + } } class OrcV1QuerySuite extends OrcQuerySuite { From a338be2e68c808a292b108fd0e41d65564fc9abd Mon Sep 17 00:00:00 2001 From: mcdull-zhang Date: Fri, 25 Mar 2022 08:47:47 +0800 Subject: [PATCH 048/535] [SPARK-38570][SQL] Incorrect DynamicPartitionPruning caused by Literal ### What changes were proposed in this pull request? The return value of Literal.references is an empty AttributeSet, so Literal is mistaken for a partition column. For example, the sql in the test case will generate such a physical plan when the adaptive is closed: ```text *(4) Project [store_id#5281, date_id#5283, state_province#5292] +- *(4) BroadcastHashJoin [store_id#5281], [store_id#5291], Inner, BuildRight, false :- Union : :- *(1) Project [4 AS store_id#5281, date_id#5283] : : +- *(1) Filter ((isnotnull(date_id#5283) AND (date_id#5283 >= 1300)) AND dynamicpruningexpression(4 IN dynamicpruning#5300)) : : : +- ReusedSubquery SubqueryBroadcast dynamicpruning#5300, 0, [store_id#5291], [id=#336] : : +- *(1) ColumnarToRow : : +- FileScan parquet default.fact_sk[date_id#5283,store_id#5286] Batched: true, DataFilters: [isnotnull(date_id#5283), (date_id#5283 >= 1300)], Format: Parquet, Location: CatalogFileIndex(1 paths)[file:/Users/dongdongzhang/code/study/spark/spark-warehouse/org.apache.s..., PartitionFilters: [dynamicpruningexpression(4 IN dynamicpruning#5300)], PushedFilters: [IsNotNull(date_id), GreaterThanOrEqual(date_id,1300)], ReadSchema: struct : : +- SubqueryBroadcast dynamicpruning#5300, 0, [store_id#5291], [id=#336] : : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#335] : : +- *(1) Project [store_id#5291, state_province#5292] : : +- *(1) Filter (((isnotnull(country#5293) AND (country#5293 = US)) AND ((store_id#5291 <=> 4) OR (store_id#5291 <=> 5))) AND isnotnull(store_id#5291)) : : +- *(1) ColumnarToRow : : +- FileScan parquet default.dim_store[store_id#5291,state_province#5292,country#5293] Batched: true, DataFilters: [isnotnull(country#5293), (country#5293 = US), ((store_id#5291 <=> 4) OR (store_id#5291 <=> 5)), ..., Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/Users/dongdongzhang/code/study/spark/spark-warehouse/org.apache...., PartitionFilters: [], PushedFilters: [IsNotNull(country), EqualTo(country,US), Or(EqualNullSafe(store_id,4),EqualNullSafe(store_id,5))..., ReadSchema: struct : +- *(2) Project [5 AS store_id#5282, date_id#5287] : +- *(2) Filter ((isnotnull(date_id#5287) AND (date_id#5287 <= 1000)) AND dynamicpruningexpression(5 IN dynamicpruning#5300)) : : +- ReusedSubquery SubqueryBroadcast dynamicpruning#5300, 0, [store_id#5291], [id=#336] : +- *(2) ColumnarToRow : +- FileScan parquet default.fact_stats[date_id#5287,store_id#5290] Batched: true, DataFilters: [isnotnull(date_id#5287), (date_id#5287 <= 1000)], Format: Parquet, Location: CatalogFileIndex(1 paths)[file:/Users/dongdongzhang/code/study/spark/spark-warehouse/org.apache.s..., PartitionFilters: [dynamicpruningexpression(5 IN dynamicpruning#5300)], PushedFilters: [IsNotNull(date_id), LessThanOrEqual(date_id,1000)], ReadSchema: struct : +- ReusedSubquery SubqueryBroadcast dynamicpruning#5300, 0, [store_id#5291], [id=#336] +- ReusedExchange [store_id#5291, state_province#5292], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#335] ``` after this pr: ```text *(4) Project [store_id#5281, date_id#5283, state_province#5292] +- *(4) BroadcastHashJoin [store_id#5281], [store_id#5291], Inner, BuildRight, false :- Union : :- *(1) Project [4 AS store_id#5281, date_id#5283] : : +- *(1) Filter (isnotnull(date_id#5283) AND (date_id#5283 >= 1300)) : : +- *(1) ColumnarToRow : : +- FileScan parquet default.fact_sk[date_id#5283,store_id#5286] Batched: true, DataFilters: [isnotnull(date_id#5283), (date_id#5283 >= 1300)], Format: Parquet, Location: CatalogFileIndex(1 paths)[file:/Users/dongdongzhang/code/study/spark/spark-warehouse/org.apache.s..., PartitionFilters: [], PushedFilters: [IsNotNull(date_id), GreaterThanOrEqual(date_id,1300)], ReadSchema: struct : +- *(2) Project [5 AS store_id#5282, date_id#5287] : +- *(2) Filter (isnotnull(date_id#5287) AND (date_id#5287 <= 1000)) : +- *(2) ColumnarToRow : +- FileScan parquet default.fact_stats[date_id#5287,store_id#5290] Batched: true, DataFilters: [isnotnull(date_id#5287), (date_id#5287 <= 1000)], Format: Parquet, Location: CatalogFileIndex(1 paths)[file:/Users/dongdongzhang/code/study/spark/spark-warehouse/org.apache.s..., PartitionFilters: [], PushedFilters: [IsNotNull(date_id), LessThanOrEqual(date_id,1000)], ReadSchema: struct +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#326] +- *(3) Project [store_id#5291, state_province#5292] +- *(3) Filter (((isnotnull(country#5293) AND (country#5293 = US)) AND ((store_id#5291 <=> 4) OR (store_id#5291 <=> 5))) AND isnotnull(store_id#5291)) +- *(3) ColumnarToRow +- FileScan parquet default.dim_store[store_id#5291,state_province#5292,country#5293] Batched: true, DataFilters: [isnotnull(country#5293), (country#5293 = US), ((store_id#5291 <=> 4) OR (store_id#5291 <=> 5)), ..., Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/Users/dongdongzhang/code/study/spark/spark-warehouse/org.apache...., PartitionFilters: [], PushedFilters: [IsNotNull(country), EqualTo(country,US), Or(EqualNullSafe(store_id,4),EqualNullSafe(store_id,5))..., ReadSchema: struct ``` ### Why are the changes needed? Execution performance improvement ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added unit test Closes #35878 from mcdull-zhang/literal_dynamic_partition. Lead-authored-by: mcdull-zhang Co-authored-by: mcdull_zhang Signed-off-by: Yuming Wang (cherry picked from commit 4c51851c4227f22df9385a66280905108d529fba) Signed-off-by: Yuming Wang --- .../sql/catalyst/expressions/predicates.scala | 1 + .../sql/DynamicPartitionPruningSuite.scala | 28 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index d16e09c5ed95c..949ce97411652 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -128,6 +128,7 @@ trait PredicateHelper extends AliasHelper with Logging { def findExpressionAndTrackLineageDown( exp: Expression, plan: LogicalPlan): Option[(Expression, LogicalPlan)] = { + if (exp.references.isEmpty) return None plan match { case p: Project => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala index f74e0474eae1e..cfdd2e08a79ea 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala @@ -1528,6 +1528,34 @@ abstract class DynamicPartitionPruningSuiteBase } } } + + test("SPARK-38570: Fix incorrect DynamicPartitionPruning caused by Literal") { + withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") { + val df = sql( + """ + |SELECT f.store_id, + | f.date_id, + | s.state_province + |FROM (SELECT 4 AS store_id, + | date_id, + | product_id + | FROM fact_sk + | WHERE date_id >= 1300 + | UNION ALL + | SELECT 5 AS store_id, + | date_id, + | product_id + | FROM fact_stats + | WHERE date_id <= 1000) f + |JOIN dim_store s + |ON f.store_id = s.store_id + |WHERE s.country = 'US' + |""".stripMargin) + + checkPartitionPruningPredicate(df, withSubquery = false, withBroadcast = false) + checkAnswer(df, Row(4, 1300, "California") :: Row(5, 1000, "Texas") :: Nil) + } + } } abstract class DynamicPartitionPruningDataSourceSuiteBase From 9277353b23df4b54dfb65e948e1b3d001806929b Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Fri, 25 Mar 2022 20:00:39 +0800 Subject: [PATCH 049/535] [SPARK-38644][SQL] DS V2 topN push-down supports project with alias ### What changes were proposed in this pull request? Currently, Spark DS V2 topN push-down doesn't supports project with alias. This PR let it works good with alias. **Example**: the origin plan show below: ``` Sort [mySalary#10 ASC NULLS FIRST], true +- Project [NAME#1, SALARY#2 AS mySalary#10] +- ScanBuilderHolder [DEPT#0, NAME#1, SALARY#2, BONUS#3, IS_MANAGER#4], RelationV2[DEPT#0, NAME#1, SALARY#2, BONUS#3, IS_MANAGER#4] test.employee, JDBCScanBuilder(org.apache.spark.sql.test.TestSparkSession7fd4b9ec,StructType(StructField(DEPT,IntegerType,true),StructField(NAME,StringType,true),StructField(SALARY,DecimalType(20,2),true),StructField(BONUS,DoubleType,true),StructField(IS_MANAGER,BooleanType,true)),org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions3c8e4a82) ``` The `pushedLimit` and `sortOrders` of `JDBCScanBuilder` are empty. If we can push down the top n, then the plan will be: ``` Project [NAME#1, SALARY#2 AS mySalary#10] +- ScanBuilderHolder [DEPT#0, NAME#1, SALARY#2, BONUS#3, IS_MANAGER#4], RelationV2[DEPT#0, NAME#1, SALARY#2, BONUS#3, IS_MANAGER#4] test.employee, JDBCScanBuilder(org.apache.spark.sql.test.TestSparkSession7fd4b9ec,StructType(StructField(DEPT,IntegerType,true),StructField(NAME,StringType,true),StructField(SALARY,DecimalType(20,2),true),StructField(BONUS,DoubleType,true),StructField(IS_MANAGER,BooleanType,true)),org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions3c8e4a82) ``` The `pushedLimit` of `JDBCScanBuilder` will be `1` and `sortOrders` of `JDBCScanBuilder` will be `SALARY ASC NULLS FIRST`. ### Why are the changes needed? Alias is more useful. ### Does this PR introduce _any_ user-facing change? 'Yes'. Users could see DS V2 topN push-down supports project with alias. ### How was this patch tested? New tests. Closes #35961 from beliefer/SPARK-38644. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan --- .../v2/V2ScanRelationPushDown.scala | 15 +++++++----- .../apache/spark/sql/jdbc/JDBCV2Suite.scala | 24 +++++++++++++++++++ 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala index c699e92cf0190..eaa30f90b77f5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala @@ -19,14 +19,14 @@ package org.apache.spark.sql.execution.datasources.v2 import scala.collection.mutable -import org.apache.spark.sql.catalyst.expressions.{Alias, AliasHelper, And, Attribute, AttributeReference, Cast, Divide, DivideDTInterval, DivideYMInterval, EqualTo, Expression, If, IntegerLiteral, Literal, NamedExpression, PredicateHelper, ProjectionOverSchema, SubqueryExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, AliasHelper, And, Attribute, AttributeReference, Cast, Divide, DivideDTInterval, DivideYMInterval, EqualTo, Expression, If, IntegerLiteral, Literal, NamedExpression, PredicateHelper, ProjectionOverSchema, SortOrder, SubqueryExpression} import org.apache.spark.sql.catalyst.expressions.aggregate import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.optimizer.CollapseProject import org.apache.spark.sql.catalyst.planning.ScanOperation import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, LeafNode, Limit, LocalLimit, LogicalPlan, Project, Sample, Sort} import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.connector.expressions.SortOrder +import org.apache.spark.sql.connector.expressions.{SortOrder => V2SortOrder} import org.apache.spark.sql.connector.expressions.aggregate.{Aggregation, Avg, Count, GeneralAggregateFunc, Sum} import org.apache.spark.sql.connector.expressions.filter.Predicate import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownAggregates, SupportsPushDownFilters, V1Scan} @@ -374,9 +374,12 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper wit sHolder.pushedLimit = Some(limit) } operation - case s @ Sort(order, _, operation @ ScanOperation(_, filter, sHolder: ScanBuilderHolder)) - if filter.isEmpty => - val orders = DataSourceStrategy.translateSortOrders(order) + case s @ Sort(order, _, operation @ ScanOperation(project, filter, sHolder: ScanBuilderHolder)) + if filter.isEmpty && CollapseProject.canCollapseExpressions( + order, project, alwaysInline = true) => + val aliasMap = getAliasMap(project) + val newOrder = order.map(replaceAlias(_, aliasMap)).asInstanceOf[Seq[SortOrder]] + val orders = DataSourceStrategy.translateSortOrders(newOrder) if (orders.length == order.length) { val topNPushed = PushDownUtils.pushTopN(sHolder.builder, orders.toArray, limit) if (topNPushed) { @@ -427,7 +430,7 @@ case class ScanBuilderHolder( builder: ScanBuilder) extends LeafNode { var pushedLimit: Option[Int] = None - var sortOrders: Seq[SortOrder] = Seq.empty[SortOrder] + var sortOrders: Seq[V2SortOrder] = Seq.empty[V2SortOrder] var pushedSample: Option[TableSampleInfo] = None diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index e7e9174463bbf..3ab87ee3387e4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -267,6 +267,30 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkAnswer(df8, Seq(Row(2, "alex", 12000.00, 1200.0, false))) } + test("simple scan with top N: order by with alias") { + val df1 = spark.read + .table("h2.test.employee") + .select($"NAME", $"SALARY".as("mySalary")) + .sort("mySalary") + .limit(1) + checkSortRemoved(df1) + checkPushedInfo(df1, + "PushedFilters: [], PushedTopN: ORDER BY [SALARY ASC NULLS FIRST] LIMIT 1, ") + checkAnswer(df1, Seq(Row("cathy", 9000.00))) + + val df2 = spark.read + .table("h2.test.employee") + .select($"DEPT", $"NAME", $"SALARY".as("mySalary")) + .filter($"DEPT" > 1) + .sort("mySalary") + .limit(1) + checkSortRemoved(df2) + checkPushedInfo(df2, + "PushedFilters: [DEPT IS NOT NULL, DEPT > 1], " + + "PushedTopN: ORDER BY [SALARY ASC NULLS FIRST] LIMIT 1, ") + checkAnswer(df2, Seq(Row(2, "david", 10000.00))) + } + test("scan with filter push-down") { val df = spark.table("h2.test.people").filter($"id" > 1) checkFiltersRemoved(df) From 332861569d09d404da48b63846c0fa5920da0a6e Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Fri, 25 Mar 2022 22:00:48 +0900 Subject: [PATCH 050/535] [SPARK-38654][SQL][PYTHON] Show default index type in SQL plans for pandas API on Spark ### What changes were proposed in this pull request? This PR proposes to show the default index type in SQL plans for pandas API on Spark. Note that this PR does not handle `sequence` case because that's discouraged in production, and tricky to insert an alias. ### Why are the changes needed? When users set `compute.default_index_type`, it's difficult to know which DataFrame users which index. We should at least note that in Spark SQL so users can tell which plans are for default index. ### Does this PR introduce _any_ user-facing change? Yes, when users call `pyspark.pandas.DataFrame.spark.explain(True)`: **distributed** ```python import pyspark.pandas as ps ps.set_option("compute.default_index_type", "distributed") ps.range(1).spark.explain() ``` ``` == Physical Plan == *(1) Project [distributed_index() AS __index_level_0__#15L, id#13L] +- *(1) Range (0, 1, step=1, splits=16) ``` **distributed-sequence** ```python import pyspark.pandas as ps ps.set_option("compute.default_index_type", "distributed-sequence") ps.range(1).spark.explain() ``` ``` == Physical Plan == AttachDistributedSequence[__index_level_0__#16L, id#13L] Index: __index_level_0__#16L +- *(1) Range (0, 1, step=1, splits=16) ``` ### How was this patch tested? Manually tested. Closes #35968 from HyukjinKwon/SPARK-38654. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 8ef0159550c143e07fa79b120b2d1fdf9d535fdc) Signed-off-by: Hyukjin Kwon --- python/pyspark/pandas/internal.py | 6 +++++- .../catalyst/expressions/MonotonicallyIncreasingID.scala | 2 +- .../catalyst/plans/logical/pythonLogicalOperators.scala | 7 +++++++ .../execution/python/AttachDistributedSequenceExec.scala | 7 +++++++ 4 files changed, 20 insertions(+), 2 deletions(-) diff --git a/python/pyspark/pandas/internal.py b/python/pyspark/pandas/internal.py index f79f0ada73a8d..b2e67492829da 100644 --- a/python/pyspark/pandas/internal.py +++ b/python/pyspark/pandas/internal.py @@ -887,7 +887,11 @@ def attach_sequence_column(sdf: SparkDataFrame, column_name: str) -> SparkDataFr @staticmethod def attach_distributed_column(sdf: SparkDataFrame, column_name: str) -> SparkDataFrame: scols = [scol_for(sdf, column) for column in sdf.columns] - return sdf.select(F.monotonically_increasing_id().alias(column_name), *scols) + jvm = sdf.sparkSession._jvm + tag = jvm.org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FUNC_ALIAS() + jexpr = F.monotonically_increasing_id()._jc.expr() + jexpr.setTagValue(tag, "distributed_index") + return sdf.select(Column(jvm.Column(jexpr)).alias(column_name), *scols) @staticmethod def attach_distributed_sequence_column(sdf: SparkDataFrame, column_name: str) -> SparkDataFrame: diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala index f228b36ecd472..ecf254f65f5a1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala @@ -85,7 +85,7 @@ case class MonotonicallyIncreasingID() extends LeafExpression with Stateful { $countTerm++;""", isNull = FalseLiteral) } - override def prettyName: String = "monotonically_increasing_id" + override def nodeName: String = "monotonically_increasing_id" override def sql: String = s"$prettyName()" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala index 13a40db1199cb..c2f74b3508342 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, Expression, PythonUDF} +import org.apache.spark.sql.catalyst.util.truncatedString /** * FlatMap groups using a udf: pandas.Dataframe -> pandas.DataFrame. @@ -146,4 +147,10 @@ case class AttachDistributedSequence( override protected def withNewChildInternal(newChild: LogicalPlan): AttachDistributedSequence = copy(child = newChild) + + override def simpleString(maxFields: Int): String = { + val truncatedOutputString = truncatedString(output, "[", ", ", "]", maxFields) + val indexColumn = s"Index: $sequenceAttr" + s"$nodeName$truncatedOutputString $indexColumn" + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AttachDistributedSequenceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AttachDistributedSequenceExec.scala index 27bfb7f682572..203fb6d7d50b4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AttachDistributedSequenceExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AttachDistributedSequenceExec.scala @@ -21,6 +21,7 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.physical._ +import org.apache.spark.sql.catalyst.util.truncatedString import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode} /** @@ -59,4 +60,10 @@ case class AttachDistributedSequenceExec( override protected def withNewChildInternal(newChild: SparkPlan): AttachDistributedSequenceExec = copy(child = newChild) + + override def simpleString(maxFields: Int): String = { + val truncatedOutputString = truncatedString(output, "[", ", ", "]", maxFields) + val indexColumn = s"Index: $sequenceAttr" + s"$nodeName$truncatedOutputString $indexColumn" + } } From 2d470763ecbcccde418956b03e503461352ab4c2 Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Fri, 25 Mar 2022 13:00:17 -0500 Subject: [PATCH 051/535] [SPARK-37618][CORE] Remove shuffle blocks using the shuffle service for released executors ### What changes were proposed in this pull request? Add support for removing shuffle files on released executors via the external shuffle service. The shuffle service already supports removing shuffle service cached RDD blocks, so I reused this mechanism to remove shuffle blocks as well, so as not to require updating the shuffle service itself. To support this change functioning in a secure Yarn environment, I updated permissions on some of the block manager folders and files. Specifically: - Block manager sub directories have the group write posix permission added to them. This gives the shuffle service permission to delete files from within these folders. - Shuffle files have the world readable posix permission added to them. This is because when the sub directories are marked group writable, they lose the setgid bit that gets set in a secure Yarn environment. Without this, the permissions on the files would be `rw-r-----`, and since the group running Yarn (and therefore the shuffle service), is no longer the group owner of the file, it does not have access to read the file. The sub directories still do not have world execute permissions, so there's no security issue opening up these files. Both of these changes are done after creating a file so that umasks don't affect the resulting permissions. ### Why are the changes needed? External shuffle services are very useful for long running jobs and dynamic allocation. However, currently if an executor is removed (either through dynamic deallocation or through some error), the shuffle files created by that executor will live until the application finishes. This results in local disks slowly filling up over time, eventually causing problems for long running applications. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New unit test. Not sure if there's a better way I could have tested for the files being deleted or any other tests I should add. Closes #35085 from Kimahriman/shuffle-service-remove-shuffle-blocks. Authored-by: Adam Binford Signed-off-by: Thomas Graves (cherry picked from commit 9a7596e1dde0f1dd596aa6d3b2efbcb5d1ef70ea) Signed-off-by: Thomas Graves --- .../shuffle/ExternalBlockStoreClient.java | 4 +- .../io/LocalDiskShuffleMapOutputWriter.java | 3 +- .../org/apache/spark/ContextCleaner.scala | 4 +- .../scala/org/apache/spark/SparkEnv.scala | 6 +- .../spark/internal/config/package.scala | 10 ++ .../shuffle/IndexShuffleBlockResolver.scala | 18 ++- .../spark/shuffle/ShuffleBlockResolver.scala | 8 ++ .../storage/BlockManagerMasterEndpoint.scala | 89 +++++++++--- .../spark/storage/DiskBlockManager.scala | 61 ++++++++- .../org/apache/spark/storage/DiskStore.scala | 10 ++ .../sort/UnsafeShuffleWriterSuite.java | 8 ++ .../spark/ExternalShuffleServiceSuite.scala | 127 +++++++++++++++++- .../BypassMergeSortShuffleWriterSuite.scala | 11 ++ .../sort/IndexShuffleBlockResolverSuite.scala | 5 + ...LocalDiskShuffleMapOutputWriterSuite.scala | 5 + .../BlockManagerReplicationSuite.scala | 3 +- .../spark/storage/BlockManagerSuite.scala | 3 +- .../spark/storage/DiskBlockManagerSuite.scala | 26 +++- docs/configuration.md | 11 ++ .../streaming/ReceivedBlockHandlerSuite.scala | 3 +- 20 files changed, 372 insertions(+), 43 deletions(-) diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java index d2df77658ccdd..b066d99e8ef8a 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java @@ -299,7 +299,7 @@ public void onSuccess(ByteBuffer response) { BlockTransferMessage msgObj = BlockTransferMessage.Decoder.fromByteBuffer(response); numRemovedBlocksFuture.complete(((BlocksRemoved) msgObj).numRemovedBlocks); } catch (Throwable t) { - logger.warn("Error trying to remove RDD blocks " + Arrays.toString(blockIds) + + logger.warn("Error trying to remove blocks " + Arrays.toString(blockIds) + " via external shuffle service from executor: " + execId, t); numRemovedBlocksFuture.complete(0); } @@ -307,7 +307,7 @@ public void onSuccess(ByteBuffer response) { @Override public void onFailure(Throwable e) { - logger.warn("Error trying to remove RDD blocks " + Arrays.toString(blockIds) + + logger.warn("Error trying to remove blocks " + Arrays.toString(blockIds) + " via external shuffle service from executor: " + execId, e); numRemovedBlocksFuture.complete(0); } diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriter.java index 6c5025d1822f8..efe508d1361c7 100644 --- a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriter.java +++ b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriter.java @@ -36,7 +36,6 @@ import org.apache.spark.internal.config.package$; import org.apache.spark.shuffle.IndexShuffleBlockResolver; import org.apache.spark.shuffle.api.metadata.MapOutputCommitMessage; -import org.apache.spark.util.Utils; /** * Implementation of {@link ShuffleMapOutputWriter} that replicates the functionality of shuffle @@ -87,7 +86,7 @@ public ShufflePartitionWriter getPartitionWriter(int reducePartitionId) throws I } lastPartitionId = reducePartitionId; if (outputTempFile == null) { - outputTempFile = Utils.tempFileWith(outputFile); + outputTempFile = blockResolver.createTempFile(outputFile); } if (outputFileChannel != null) { currChannelPosition = outputFileChannel.position(); diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala index 091b5e1600d9e..a6fa28b8ae8ef 100644 --- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala +++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala @@ -235,8 +235,10 @@ private[spark] class ContextCleaner( try { if (mapOutputTrackerMaster.containsShuffle(shuffleId)) { logDebug("Cleaning shuffle " + shuffleId) - mapOutputTrackerMaster.unregisterShuffle(shuffleId) + // Shuffle must be removed before it's unregistered from the output tracker + // to find blocks served by the shuffle service on deallocated executors shuffleDriverComponents.removeShuffle(shuffleId, blocking) + mapOutputTrackerMaster.unregisterShuffle(shuffleId) listeners.asScala.foreach(_.shuffleCleaned(shuffleId)) logDebug("Cleaned shuffle " + shuffleId) } else { diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala index d07614a5e2129..19467e7eca12e 100644 --- a/core/src/main/scala/org/apache/spark/SparkEnv.scala +++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala @@ -343,12 +343,14 @@ object SparkEnv extends Logging { isLocal, conf, listenerBus, - if (conf.get(config.SHUFFLE_SERVICE_FETCH_RDD_ENABLED)) { + if (conf.get(config.SHUFFLE_SERVICE_ENABLED)) { externalShuffleClient } else { None }, blockManagerInfo, - mapOutputTracker.asInstanceOf[MapOutputTrackerMaster], isDriver)), + mapOutputTracker.asInstanceOf[MapOutputTrackerMaster], + shuffleManager, + isDriver)), registerOrLookupEndpoint( BlockManagerMaster.DRIVER_HEARTBEAT_ENDPOINT_NAME, new BlockManagerMasterHeartbeatEndpoint(rpcEnv, isLocal, blockManagerInfo)), diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala index fa048f54415a8..aa8f63e14efc7 100644 --- a/core/src/main/scala/org/apache/spark/internal/config/package.scala +++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala @@ -686,6 +686,16 @@ package object config { .booleanConf .createWithDefault(false) + private[spark] val SHUFFLE_SERVICE_REMOVE_SHUFFLE_ENABLED = + ConfigBuilder("spark.shuffle.service.removeShuffle") + .doc("Whether to use the ExternalShuffleService for deleting shuffle blocks for " + + "deallocated executors when the shuffle is no longer needed. Without this enabled, " + + "shuffle data on executors that are deallocated will remain on disk until the " + + "application ends.") + .version("3.3.0") + .booleanConf + .createWithDefault(false) + private[spark] val SHUFFLE_SERVICE_FETCH_RDD_ENABLED = ConfigBuilder(Constants.SHUFFLE_SERVICE_FETCH_RDD_ENABLED) .doc("Whether to use the ExternalShuffleService for fetching disk persisted RDD blocks. " + diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala index f1485ec99789d..ba54555311e75 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala @@ -84,6 +84,11 @@ private[spark] class IndexShuffleBlockResolver( shuffleFiles.map(_.length()).sum } + /** Create a temporary file that will be renamed to the final resulting file */ + def createTempFile(file: File): File = { + blockManager.diskBlockManager.createTempFileWith(file) + } + /** * Get the shuffle data file. * @@ -234,7 +239,7 @@ private[spark] class IndexShuffleBlockResolver( throw new IllegalStateException(s"Unexpected shuffle block transfer ${blockId} as " + s"${blockId.getClass().getSimpleName()}") } - val fileTmp = Utils.tempFileWith(file) + val fileTmp = createTempFile(file) val channel = Channels.newChannel( serializerManager.wrapStream(blockId, new FileOutputStream(fileTmp))) @@ -335,7 +340,7 @@ private[spark] class IndexShuffleBlockResolver( checksums: Array[Long], dataTmp: File): Unit = { val indexFile = getIndexFile(shuffleId, mapId) - val indexTmp = Utils.tempFileWith(indexFile) + val indexTmp = createTempFile(indexFile) val checksumEnabled = checksums.nonEmpty val (checksumFileOpt, checksumTmpOpt) = if (checksumEnabled) { @@ -343,7 +348,7 @@ private[spark] class IndexShuffleBlockResolver( "The size of partition lengths and checksums should be equal") val checksumFile = getChecksumFile(shuffleId, mapId, conf.get(config.SHUFFLE_CHECKSUM_ALGORITHM)) - (Some(checksumFile), Some(Utils.tempFileWith(checksumFile))) + (Some(checksumFile), Some(createTempFile(checksumFile))) } else { (None, None) } @@ -597,6 +602,13 @@ private[spark] class IndexShuffleBlockResolver( } } + override def getBlocksForShuffle(shuffleId: Int, mapId: Long): Seq[BlockId] = { + Seq( + ShuffleIndexBlockId(shuffleId, mapId, NOOP_REDUCE_ID), + ShuffleDataBlockId(shuffleId, mapId, NOOP_REDUCE_ID) + ) + } + override def stop(): Unit = {} } diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockResolver.scala index 0f35f8c983d6f..c8fde8d2d39da 100644 --- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockResolver.scala +++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockResolver.scala @@ -41,6 +41,14 @@ trait ShuffleBlockResolver { */ def getBlockData(blockId: BlockId, dirs: Option[Array[String]] = None): ManagedBuffer + /** + * Retrieve a list of BlockIds for a given shuffle map. Used to delete shuffle files + * from the external shuffle service after the associated executor has been removed. + */ + def getBlocksForShuffle(shuffleId: Int, mapId: Long): Seq[BlockId] = { + Seq.empty + } + /** * Retrieve the data for the specified merged shuffle block as multiple chunks. */ diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala index b96befce2c0da..4d8ba9b3e4e0a 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala @@ -36,6 +36,7 @@ import org.apache.spark.network.shuffle.ExternalBlockStoreClient import org.apache.spark.rpc.{IsolatedRpcEndpoint, RpcCallContext, RpcEndpointRef, RpcEnv} import org.apache.spark.scheduler._ import org.apache.spark.scheduler.cluster.{CoarseGrainedClusterMessages, CoarseGrainedSchedulerBackend} +import org.apache.spark.shuffle.ShuffleManager import org.apache.spark.storage.BlockManagerMessages._ import org.apache.spark.util.{RpcUtils, ThreadUtils, Utils} @@ -52,6 +53,7 @@ class BlockManagerMasterEndpoint( externalBlockStoreClient: Option[ExternalBlockStoreClient], blockManagerInfo: mutable.Map[BlockManagerId, BlockManagerInfo], mapOutputTracker: MapOutputTrackerMaster, + shuffleManager: ShuffleManager, isDriver: Boolean) extends IsolatedRpcEndpoint with Logging { @@ -104,9 +106,11 @@ class BlockManagerMasterEndpoint( private val pushBasedShuffleEnabled = Utils.isPushBasedShuffleEnabled(conf, isDriver) logInfo("BlockManagerMasterEndpoint up") - // same as `conf.get(config.SHUFFLE_SERVICE_ENABLED) - // && conf.get(config.SHUFFLE_SERVICE_FETCH_RDD_ENABLED)` - private val externalShuffleServiceRddFetchEnabled: Boolean = externalBlockStoreClient.isDefined + + private val externalShuffleServiceRemoveShuffleEnabled: Boolean = + externalBlockStoreClient.isDefined && conf.get(config.SHUFFLE_SERVICE_REMOVE_SHUFFLE_ENABLED) + private val externalShuffleServiceRddFetchEnabled: Boolean = + externalBlockStoreClient.isDefined && conf.get(config.SHUFFLE_SERVICE_FETCH_RDD_ENABLED) private val externalShuffleServicePort: Int = StorageUtils.externalShuffleServicePort(conf) private lazy val driverEndpoint = @@ -294,33 +298,74 @@ class BlockManagerMasterEndpoint( } }.toSeq - val removeRddBlockViaExtShuffleServiceFutures = externalBlockStoreClient.map { shuffleClient => - blocksToDeleteByShuffleService.map { case (bmId, blockIds) => - Future[Int] { - val numRemovedBlocks = shuffleClient.removeBlocks( - bmId.host, - bmId.port, - bmId.executorId, - blockIds.map(_.toString).toArray) - numRemovedBlocks.get(defaultRpcTimeout.duration.toSeconds, TimeUnit.SECONDS) + val removeRddBlockViaExtShuffleServiceFutures = if (externalShuffleServiceRddFetchEnabled) { + externalBlockStoreClient.map { shuffleClient => + blocksToDeleteByShuffleService.map { case (bmId, blockIds) => + Future[Int] { + val numRemovedBlocks = shuffleClient.removeBlocks( + bmId.host, + bmId.port, + bmId.executorId, + blockIds.map(_.toString).toArray) + numRemovedBlocks.get(defaultRpcTimeout.duration.toSeconds, TimeUnit.SECONDS) + } } - } - }.getOrElse(Seq.empty) + }.getOrElse(Seq.empty) + } else { + Seq.empty + } Future.sequence(removeRddFromExecutorsFutures ++ removeRddBlockViaExtShuffleServiceFutures) } private def removeShuffle(shuffleId: Int): Future[Seq[Boolean]] = { - // Nothing to do in the BlockManagerMasterEndpoint data structures val removeMsg = RemoveShuffle(shuffleId) - Future.sequence( - blockManagerInfo.values.map { bm => - bm.storageEndpoint.ask[Boolean](removeMsg).recover { - // use false as default value means no shuffle data were removed - handleBlockRemovalFailure("shuffle", shuffleId.toString, bm.blockManagerId, false) + val removeShuffleFromExecutorsFutures = blockManagerInfo.values.map { bm => + bm.storageEndpoint.ask[Boolean](removeMsg).recover { + // use false as default value means no shuffle data were removed + handleBlockRemovalFailure("shuffle", shuffleId.toString, bm.blockManagerId, false) + } + }.toSeq + + // Find all shuffle blocks on executors that are no longer running + val blocksToDeleteByShuffleService = + new mutable.HashMap[BlockManagerId, mutable.HashSet[BlockId]] + if (externalShuffleServiceRemoveShuffleEnabled) { + mapOutputTracker.shuffleStatuses.get(shuffleId).foreach { shuffleStatus => + shuffleStatus.withMapStatuses { mapStatuses => + mapStatuses.foreach { mapStatus => + // Check if the executor has been deallocated + if (!blockManagerIdByExecutor.contains(mapStatus.location.executorId)) { + val blocksToDel = + shuffleManager.shuffleBlockResolver.getBlocksForShuffle(shuffleId, mapStatus.mapId) + if (blocksToDel.nonEmpty) { + val blocks = blocksToDeleteByShuffleService.getOrElseUpdate(mapStatus.location, + new mutable.HashSet[BlockId]) + blocks ++= blocksToDel + } + } + } } - }.toSeq - ) + } + } + + val removeShuffleFromShuffleServicesFutures = + externalBlockStoreClient.map { shuffleClient => + blocksToDeleteByShuffleService.map { case (bmId, blockIds) => + Future[Boolean] { + val numRemovedBlocks = shuffleClient.removeBlocks( + bmId.host, + bmId.port, + bmId.executorId, + blockIds.map(_.toString).toArray) + numRemovedBlocks.get(defaultRpcTimeout.duration.toSeconds, + TimeUnit.SECONDS) == blockIds.size + } + } + }.getOrElse(Seq.empty) + + Future.sequence(removeShuffleFromExecutorsFutures ++ + removeShuffleFromShuffleServicesFutures) } /** diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala index c6a22972d2a0f..e29f3fc1b8050 100644 --- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala @@ -19,6 +19,7 @@ package org.apache.spark.storage import java.io.{File, IOException} import java.nio.file.Files +import java.nio.file.attribute.PosixFilePermission import java.util.UUID import scala.collection.mutable.HashMap @@ -77,6 +78,15 @@ private[spark] class DiskBlockManager( private val shutdownHook = addShutdownHook() + // If either of these features are enabled, we must change permissions on block manager + // directories and files to accomodate the shuffle service deleting files in a secure environment. + // Parent directories are assumed to be restrictive to prevent unauthorized users from accessing + // or modifying world readable files. + private val permissionChangingRequired = conf.get(config.SHUFFLE_SERVICE_ENABLED) && ( + conf.get(config.SHUFFLE_SERVICE_REMOVE_SHUFFLE_ENABLED) || + conf.get(config.SHUFFLE_SERVICE_FETCH_RDD_ENABLED) + ) + /** Looks up a file by hashing it into one of our local subdirectories. */ // This method should be kept in sync with // org.apache.spark.network.shuffle.ExecutorDiskUtils#getFilePath(). @@ -94,7 +104,16 @@ private[spark] class DiskBlockManager( } else { val newDir = new File(localDirs(dirId), "%02x".format(subDirId)) if (!newDir.exists()) { - Files.createDirectory(newDir.toPath) + val path = newDir.toPath + Files.createDirectory(path) + if (permissionChangingRequired) { + // SPARK-37618: Create dir as group writable so files within can be deleted by the + // shuffle service in a secure setup. This will remove the setgid bit so files created + // within won't be created with the parent folder group. + val currentPerms = Files.getPosixFilePermissions(path) + currentPerms.add(PosixFilePermission.GROUP_WRITE) + Files.setPosixFilePermissions(path, currentPerms) + } } subDirs(dirId)(subDirId) = newDir newDir @@ -166,6 +185,37 @@ private[spark] class DiskBlockManager( } } + /** + * SPARK-37618: Makes sure that the file is created as world readable. This is to get + * around the fact that making the block manager sub dirs group writable removes + * the setgid bit in secure Yarn environments, which prevents the shuffle service + * from being able to read shuffle files. The outer directories will still not be + * world executable, so this doesn't allow access to these files except for the + * running user and shuffle service. + */ + def createWorldReadableFile(file: File): Unit = { + val path = file.toPath + Files.createFile(path) + val currentPerms = Files.getPosixFilePermissions(path) + currentPerms.add(PosixFilePermission.OTHERS_READ) + Files.setPosixFilePermissions(path, currentPerms) + } + + /** + * Creates a temporary version of the given file with world readable permissions (if required). + * Used to create block files that will be renamed to the final version of the file. + */ + def createTempFileWith(file: File): File = { + val tmpFile = Utils.tempFileWith(file) + if (permissionChangingRequired) { + // SPARK-37618: we need to make the file world readable because the parent will + // lose the setgid bit when making it group writable. Without this the shuffle + // service can't read the shuffle files in a secure setup. + createWorldReadableFile(tmpFile) + } + tmpFile + } + /** Produces a unique block id and File suitable for storing local intermediate results. */ def createTempLocalBlock(): (TempLocalBlockId, File) = { var blockId = new TempLocalBlockId(UUID.randomUUID()) @@ -181,7 +231,14 @@ private[spark] class DiskBlockManager( while (getFile(blockId).exists()) { blockId = new TempShuffleBlockId(UUID.randomUUID()) } - (blockId, getFile(blockId)) + val tmpFile = getFile(blockId) + if (permissionChangingRequired) { + // SPARK-37618: we need to make the file world readable because the parent will + // lose the setgid bit when making it group writable. Without this the shuffle + // service can't read the shuffle files in a secure setup. + createWorldReadableFile(tmpFile) + } + (blockId, tmpFile) } /** diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala index f0334c56962cb..d45947db69343 100644 --- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala +++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala @@ -50,6 +50,9 @@ private[spark] class DiskStore( private val maxMemoryMapBytes = conf.get(config.MEMORY_MAP_LIMIT_FOR_TESTS) private val blockSizes = new ConcurrentHashMap[BlockId, Long]() + private val shuffleServiceFetchRddEnabled = conf.get(config.SHUFFLE_SERVICE_ENABLED) && + conf.get(config.SHUFFLE_SERVICE_FETCH_RDD_ENABLED) + def getSize(blockId: BlockId): Long = blockSizes.get(blockId) /** @@ -71,6 +74,13 @@ private[spark] class DiskStore( logDebug(s"Attempting to put block $blockId") val startTimeNs = System.nanoTime() val file = diskManager.getFile(blockId) + + // SPARK-37618: If fetching cached RDDs from the shuffle service is enabled, we must make + // the file world readable, as it will not be owned by the group running the shuffle service + // in a secure environment. This is due to changing directory permissions to allow deletion, + if (shuffleServiceFetchRddEnabled) { + diskManager.createWorldReadableFile(file) + } val out = new CountingWritableChannel(openForWrite(file)) var threwException: Boolean = true try { diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java index f4e09b7a0a38a..8a3df5a9d098d 100644 --- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java +++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java @@ -127,6 +127,10 @@ public void setUp() throws Exception { }); when(shuffleBlockResolver.getDataFile(anyInt(), anyLong())).thenReturn(mergedOutputFile); + when(shuffleBlockResolver.createTempFile(any(File.class))).thenAnswer(invocationOnMock -> { + File file = (File) invocationOnMock.getArguments()[0]; + return Utils.tempFileWith(file); + }); Answer renameTempAnswer = invocationOnMock -> { partitionSizesInMergedFile = (long[]) invocationOnMock.getArguments()[2]; @@ -158,6 +162,10 @@ public void setUp() throws Exception { spillFilesCreated.add(file); return Tuple2$.MODULE$.apply(blockId, file); }); + when(diskBlockManager.createTempFileWith(any(File.class))).thenAnswer(invocationOnMock -> { + File file = (File) invocationOnMock.getArguments()[0]; + return Utils.tempFileWith(file); + }); when(taskContext.taskMetrics()).thenReturn(taskMetrics); when(shuffleDep.serializer()).thenReturn(serializer); diff --git a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala index 48c1cc5906f30..dd3d90f3124d5 100644 --- a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala +++ b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala @@ -17,6 +17,13 @@ package org.apache.spark +import java.io.File +import java.nio.file.Files +import java.nio.file.attribute.PosixFilePermission + +import scala.concurrent.Promise +import scala.concurrent.duration.Duration + import org.scalatest.BeforeAndAfterAll import org.scalatest.concurrent.Eventually import org.scalatest.matchers.should.Matchers._ @@ -26,9 +33,9 @@ import org.apache.spark.internal.config import org.apache.spark.network.TransportContext import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.network.server.TransportServer -import org.apache.spark.network.shuffle.{ExternalBlockHandler, ExternalBlockStoreClient} -import org.apache.spark.storage.{RDDBlockId, StorageLevel} -import org.apache.spark.util.Utils +import org.apache.spark.network.shuffle.{ExecutorDiskUtils, ExternalBlockHandler, ExternalBlockStoreClient} +import org.apache.spark.storage.{RDDBlockId, ShuffleBlockId, ShuffleDataBlockId, ShuffleIndexBlockId, StorageLevel} +import org.apache.spark.util.{ThreadUtils, Utils} /** * This suite creates an external shuffle server and routes all shuffle fetches through it. @@ -101,7 +108,9 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll wi } test("SPARK-25888: using external shuffle service fetching disk persisted blocks") { - val confWithRddFetchEnabled = conf.clone.set(config.SHUFFLE_SERVICE_FETCH_RDD_ENABLED, true) + val confWithRddFetchEnabled = conf.clone + .set(config.SHUFFLE_HOST_LOCAL_DISK_READING_ENABLED, true) + .set(config.SHUFFLE_SERVICE_FETCH_RDD_ENABLED, true) sc = new SparkContext("local-cluster[1,1,1024]", "test", confWithRddFetchEnabled) sc.env.blockManager.externalShuffleServiceEnabled should equal(true) sc.env.blockManager.blockStoreClient.getClass should equal(classOf[ExternalBlockStoreClient]) @@ -113,13 +122,42 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll wi rdd.count() val blockId = RDDBlockId(rdd.id, 0) - eventually(timeout(2.seconds), interval(100.milliseconds)) { + val bms = eventually(timeout(2.seconds), interval(100.milliseconds)) { val locations = sc.env.blockManager.master.getLocations(blockId) assert(locations.size === 2) assert(locations.map(_.port).contains(server.getPort), "external shuffle service port should be contained") + locations } + val dirManager = sc.env.blockManager.hostLocalDirManager + .getOrElse(fail("No host local dir manager")) + + val promises = bms.map { case bmid => + val promise = Promise[File]() + dirManager.getHostLocalDirs(bmid.host, bmid.port, Seq(bmid.executorId).toArray) { + case scala.util.Success(res) => res.foreach { case (eid, dirs) => + val file = new File(ExecutorDiskUtils.getFilePath(dirs, + sc.env.blockManager.subDirsPerLocalDir, blockId.name)) + promise.success(file) + } + case scala.util.Failure(error) => promise.failure(error) + } + promise.future + } + val filesToCheck = promises.map(p => ThreadUtils.awaitResult(p, Duration(2, "sec"))) + + filesToCheck.foreach(f => { + val parentPerms = Files.getPosixFilePermissions(f.getParentFile.toPath) + assert(parentPerms.contains(PosixFilePermission.GROUP_WRITE)) + + // On most operating systems the default umask will make this test pass + // even if the permission isn't changed. To properly test this, run the + // test with a umask of 0027 + val perms = Files.getPosixFilePermissions(f.toPath) + assert(perms.contains(PosixFilePermission.OTHERS_READ)) + }) + sc.killExecutors(sc.getExecutorIds()) eventually(timeout(2.seconds), interval(100.milliseconds)) { @@ -138,4 +176,83 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll wi rpcHandler.applicationRemoved(sc.conf.getAppId, true) } } + + test("SPARK-37618: external shuffle service removes shuffle blocks from deallocated executors") { + for (enabled <- Seq(true, false)) { + // Use local disk reading to get location of shuffle files on disk + val confWithLocalDiskReading = conf.clone + .set(config.SHUFFLE_HOST_LOCAL_DISK_READING_ENABLED, true) + .set(config.SHUFFLE_SERVICE_REMOVE_SHUFFLE_ENABLED, enabled) + sc = new SparkContext("local-cluster[1,1,1024]", "test", confWithLocalDiskReading) + sc.env.blockManager.externalShuffleServiceEnabled should equal(true) + sc.env.blockManager.blockStoreClient.getClass should equal(classOf[ExternalBlockStoreClient]) + try { + val rdd = sc.parallelize(0 until 100, 2) + .map { i => (i, 1) } + .repartition(1) + + rdd.count() + + val mapOutputs = sc.env.mapOutputTracker.getMapSizesByExecutorId(0, 0).toSeq + + val dirManager = sc.env.blockManager.hostLocalDirManager + .getOrElse(fail("No host local dir manager")) + + val promises = mapOutputs.map { case (bmid, blocks) => + val promise = Promise[Seq[File]]() + dirManager.getHostLocalDirs(bmid.host, bmid.port, Seq(bmid.executorId).toArray) { + case scala.util.Success(res) => res.foreach { case (eid, dirs) => + val files = blocks.flatMap { case (blockId, _, _) => + val shuffleBlockId = blockId.asInstanceOf[ShuffleBlockId] + Seq( + ShuffleDataBlockId(shuffleBlockId.shuffleId, shuffleBlockId.mapId, + shuffleBlockId.reduceId).name, + ShuffleIndexBlockId(shuffleBlockId.shuffleId, shuffleBlockId.mapId, + shuffleBlockId.reduceId).name + ).map { blockId => + new File(ExecutorDiskUtils.getFilePath(dirs, + sc.env.blockManager.subDirsPerLocalDir, blockId)) + } + } + promise.success(files) + } + case scala.util.Failure(error) => promise.failure(error) + } + promise.future + } + val filesToCheck = promises.flatMap(p => ThreadUtils.awaitResult(p, Duration(2, "sec"))) + assert(filesToCheck.length == 4) + assert(filesToCheck.forall(_.exists())) + + if (enabled) { + filesToCheck.foreach(f => { + val parentPerms = Files.getPosixFilePermissions(f.getParentFile.toPath) + assert(parentPerms.contains(PosixFilePermission.GROUP_WRITE)) + + // On most operating systems the default umask will make this test pass + // even if the permission isn't changed. To properly test this, run the + // test with a umask of 0027 + val perms = Files.getPosixFilePermissions(f.toPath) + assert(perms.contains(PosixFilePermission.OTHERS_READ)) + }) + } + + sc.killExecutors(sc.getExecutorIds()) + eventually(timeout(2.seconds), interval(100.milliseconds)) { + assert(sc.env.blockManager.master.getExecutorEndpointRef("0").isEmpty) + } + + sc.cleaner.foreach(_.doCleanupShuffle(0, true)) + + if (enabled) { + assert(filesToCheck.forall(!_.exists())) + } else { + assert(filesToCheck.forall(_.exists())) + } + } finally { + rpcHandler.applicationRemoved(sc.conf.getAppId, true) + sc.stop() + } + } + } } diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala index 38ed702d0e4c7..83bd3b0a99779 100644 --- a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala +++ b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala @@ -111,6 +111,12 @@ class BypassMergeSortShuffleWriterSuite blockId = args(0).asInstanceOf[BlockId]) } + when(blockResolver.createTempFile(any(classOf[File]))) + .thenAnswer { invocationOnMock => + val file = invocationOnMock.getArguments()(0).asInstanceOf[File] + Utils.tempFileWith(file) + } + when(diskBlockManager.createTempShuffleBlock()) .thenAnswer { _ => val blockId = new TempShuffleBlockId(UUID.randomUUID) @@ -266,6 +272,11 @@ class BypassMergeSortShuffleWriterSuite temporaryFilesCreated += file (blockId, file) } + when(diskBlockManager.createTempFileWith(any(classOf[File]))) + .thenAnswer { invocationOnMock => + val file = invocationOnMock.getArguments()(0).asInstanceOf[File] + Utils.tempFileWith(file) + } val numPartition = shuffleHandle.dependency.partitioner.numPartitions val writer = new BypassMergeSortShuffleWriter[Int, Int]( diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala index 21704b1c67325..de12f6840a1ad 100644 --- a/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala +++ b/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala @@ -56,6 +56,11 @@ class IndexShuffleBlockResolverSuite extends SparkFunSuite with BeforeAndAfterEa any[BlockId], any[Option[Array[String]]])).thenAnswer( (invocation: InvocationOnMock) => new File(tempDir, invocation.getArguments.head.toString)) when(diskBlockManager.localDirs).thenReturn(Array(tempDir)) + when(diskBlockManager.createTempFileWith(any(classOf[File]))) + .thenAnswer { invocationOnMock => + val file = invocationOnMock.getArguments()(0).asInstanceOf[File] + Utils.tempFileWith(file) + } conf.set("spark.app.id", appId) } diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala index 35d9b4ab1f766..6c9ec8b71a429 100644 --- a/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala +++ b/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala @@ -74,6 +74,11 @@ class LocalDiskShuffleMapOutputWriterSuite extends SparkFunSuite with BeforeAndA .set("spark.app.id", "example.spark.app") .set("spark.shuffle.unsafe.file.output.buffer", "16k") when(blockResolver.getDataFile(anyInt, anyLong)).thenReturn(mergedOutputFile) + when(blockResolver.createTempFile(any(classOf[File]))) + .thenAnswer { invocationOnMock => + val file = invocationOnMock.getArguments()(0).asInstanceOf[File] + Utils.tempFileWith(file) + } when(blockResolver.writeMetadataFileAndCommit( anyInt, anyLong, any(classOf[Array[Long]]), any(classOf[Array[Long]]), any(classOf[File]))) .thenAnswer { invocationOnMock => diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala index fc7b7a440697e..14e1ee5b09d59 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala @@ -102,7 +102,8 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite val blockManagerInfo = new mutable.HashMap[BlockManagerId, BlockManagerInfo]() master = new BlockManagerMaster(rpcEnv.setupEndpoint("blockmanager", new BlockManagerMasterEndpoint(rpcEnv, true, conf, - new LiveListenerBus(conf), None, blockManagerInfo, mapOutputTracker, isDriver = true)), + new LiveListenerBus(conf), None, blockManagerInfo, mapOutputTracker, sc.env.shuffleManager, + isDriver = true)), rpcEnv.setupEndpoint("blockmanagerHeartbeat", new BlockManagerMasterHeartbeatEndpoint(rpcEnv, true, blockManagerInfo)), conf, true) allStores.clear() diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala index 0f99ea819f67f..45e05b2cc2da1 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala @@ -188,7 +188,8 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE liveListenerBus = spy(new LiveListenerBus(conf)) master = spy(new BlockManagerMaster(rpcEnv.setupEndpoint("blockmanager", new BlockManagerMasterEndpoint(rpcEnv, true, conf, - liveListenerBus, None, blockManagerInfo, mapOutputTracker, isDriver = true)), + liveListenerBus, None, blockManagerInfo, mapOutputTracker, shuffleManager, + isDriver = true)), rpcEnv.setupEndpoint("blockmanagerHeartbeat", new BlockManagerMasterHeartbeatEndpoint(rpcEnv, true, blockManagerInfo)), conf, true)) } diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala index b36eeb767e2e1..58fe40f9adeb5 100644 --- a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.storage import java.io.{File, FileWriter} import java.nio.file.{Files, Paths} -import java.nio.file.attribute.PosixFilePermissions +import java.nio.file.attribute.{PosixFilePermission, PosixFilePermissions} import java.util.HashMap import com.fasterxml.jackson.core.`type`.TypeReference @@ -141,6 +141,30 @@ class DiskBlockManagerSuite extends SparkFunSuite with BeforeAndAfterEach with B assert(attemptId.equals("1")) } + test("SPARK-37618: Sub dirs are group writable when removing from shuffle service enabled") { + val conf = testConf.clone + conf.set("spark.local.dir", rootDirs) + conf.set("spark.shuffle.service.enabled", "true") + conf.set("spark.shuffle.service.removeShuffle", "false") + val diskBlockManager = new DiskBlockManager(conf, deleteFilesOnStop = true, isDriver = false) + val blockId = new TestBlockId("test") + val newFile = diskBlockManager.getFile(blockId) + val parentDir = newFile.getParentFile() + assert(parentDir.exists && parentDir.isDirectory) + val permission = Files.getPosixFilePermissions(parentDir.toPath) + assert(!permission.contains(PosixFilePermission.GROUP_WRITE)) + + assert(parentDir.delete()) + + conf.set("spark.shuffle.service.removeShuffle", "true") + val diskBlockManager2 = new DiskBlockManager(conf, deleteFilesOnStop = true, isDriver = false) + val newFile2 = diskBlockManager2.getFile(blockId) + val parentDir2 = newFile2.getParentFile() + assert(parentDir2.exists && parentDir2.isDirectory) + val permission2 = Files.getPosixFilePermissions(parentDir2.toPath) + assert(permission2.contains(PosixFilePermission.GROUP_WRITE)) + } + def writeToFile(file: File, numBytes: Int): Unit = { val writer = new FileWriter(file, true) for (i <- 0 until numBytes) writer.write(i) diff --git a/docs/configuration.md b/docs/configuration.md index a2cf2338c398e..4fa37792a335f 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -996,6 +996,17 @@ Apart from these, the following properties are also available, and may be useful

        + + + + + + diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala index a3b5b38904a2e..dcf82d5e2c28e 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala @@ -93,7 +93,8 @@ abstract class BaseReceivedBlockHandlerSuite(enableEncryption: Boolean) val blockManagerInfo = new mutable.HashMap[BlockManagerId, BlockManagerInfo]() blockManagerMaster = new BlockManagerMaster(rpcEnv.setupEndpoint("blockmanager", new BlockManagerMasterEndpoint(rpcEnv, true, conf, - new LiveListenerBus(conf), None, blockManagerInfo, mapOutputTracker, isDriver = true)), + new LiveListenerBus(conf), None, blockManagerInfo, mapOutputTracker, shuffleManager, + isDriver = true)), rpcEnv.setupEndpoint("blockmanagerHeartbeat", new BlockManagerMasterHeartbeatEndpoint(rpcEnv, true, blockManagerInfo)), conf, true) From f0e1ad9a4099f93b5fa373777a29fd6da71538f6 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Sun, 27 Mar 2022 09:23:19 +0900 Subject: [PATCH 052/535] [SPARK-37512][PYTHON][FOLLOWUP] Add test_timedelta_ops to modules ### What changes were proposed in this pull request? Add `test_timedelta_ops` to modules ### Why are the changes needed? `test_timedelta_ops` wasn't added to modules in original PR https://github.com/apache/spark/pull/34776. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - UT passed - `Build modules (master, regular job): pyspark-pandas` contains test_timedelta_ops. Closes #35980 from Yikun/patch-19. Authored-by: Yikun Jiang Signed-off-by: Hyukjin Kwon (cherry picked from commit 0a4de08566eb38bf2c5ddb5d6312195d503fa7d7) Signed-off-by: Hyukjin Kwon --- dev/sparktestsupport/modules.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 6e668bba8c803..aab63056e979a 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -613,6 +613,7 @@ def __hash__(self): "pyspark.pandas.tests.data_type_ops.test_num_ops", "pyspark.pandas.tests.data_type_ops.test_string_ops", "pyspark.pandas.tests.data_type_ops.test_udt_ops", + "pyspark.pandas.tests.data_type_ops.test_timedelta_ops", "pyspark.pandas.tests.indexes.test_category", "pyspark.pandas.tests.indexes.test_timedelta", "pyspark.pandas.tests.plot.test_frame_plot", From 7fcdd71fa976d910425143c9e733385c7cba37ca Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Sun, 27 Mar 2022 09:29:49 +0900 Subject: [PATCH 053/535] [SPARK-38308][SQL] Eagerly iterate over sequence of window expressions in `ExtractWindowExpressions` ### What changes were proposed in this pull request? Pass an `IndexedSeq` (likely a `Vector`) to `ExtractWindowExpressions.extract` and `ExtractWindowExpressions.addWindow` rather than whatever sequence type was specified by the user (in the `Dataset.select` method). To accomplish this, we only need to pass an `IndexedSeq` to `ExtractWindowExpressions.extract`. `ExtractWindowExpressions.extract` will then return another `IndexedSeq` that we will pass on to `ExtractWindowExpressions.addWindow` ### Why are the changes needed? Consider this query: ``` val df = spark.range(0, 20).map { x => (x % 4, x + 1, x + 2) }.toDF("a", "b", "c") import org.apache.spark.sql.expressions._ val w = Window.partitionBy("a").orderBy("b") val selectExprs = Stream( sum("c").over(w.rowsBetween(Window.unboundedPreceding, Window.currentRow)).as("sumc"), avg("c").over(w.rowsBetween(Window.unboundedPreceding, Window.currentRow)).as("avgc") ) df.select(selectExprs: _*).show(false) ``` It fails with ``` org.apache.spark.sql.AnalysisException: Resolved attribute(s) avgc#23 missing from c#16L,a#14L,b#15L,sumc#21L in operator !Project [c#16L, a#14L, b#15L, sumc#21L, sumc#21L, avgc#23].; ``` If you change `Stream` to a `Seq`, it succeeds. As with SPARK-38221 and SPARK-38528, this is due to the use of this code pattern: ``` def someMethod (seq: Seq[xxx]) { ... val outerDataStructure = val newSeq = seq.map { x => ... code that puts something in outerDataStructure ... } ... code that uses outerDataStructure (and expects it to be populated) ... } ``` If `seq` is a `Stream`, `seq.map` might be evaluated lazily, in which case `outerDataStructure` will not be fully populated before it is used. Both `ExtractWindowExpressions.extract` and `ExtractWindowExpressions.addWindow` use this pattern, but the above example failure is due to the pattern's use in `ExtractWindowExpressions.addWindow` (`extractedWindowExprBuffer` does not get fully populated, so the Window operator does not produce the output expected by its parent projection). I chose `IndexedSeq` not for its efficient indexing, but because `map` will eagerly iterate over it. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New unit test. Closes #35635 from bersprockets/window_expression_stream_issue. Authored-by: Bruce Robbins Signed-off-by: Hyukjin Kwon (cherry picked from commit eb30a27e53158e64fffaa6d32ff9369ffbae0384) Signed-off-by: Hyukjin Kwon --- .../sql/catalyst/analysis/Analyzer.scala | 2 +- .../sql/DataFrameWindowFunctionsSuite.scala | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 528998398ddeb..6d950673fa633 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -3141,7 +3141,7 @@ class Analyzer(override val catalogManager: CatalogManager) // have been resolved. case p @ Project(projectList, child) if hasWindowFunction(projectList) && !p.expressions.exists(!_.resolved) => - val (windowExpressions, regularExpressions) = extract(projectList) + val (windowExpressions, regularExpressions) = extract(projectList.toIndexedSeq) // We add a project to get all needed expressions for window expressions from the child // of the original Project operator. val withProject = Project(regularExpressions, child) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala index 11b2309ee38eb..4676f8be9113e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala @@ -1122,4 +1122,28 @@ class DataFrameWindowFunctionsSuite extends QueryTest assert(shuffleByRequirement, "Can't find desired shuffle node from the query plan") } } + + test("SPARK-38308: Properly handle Stream of window expressions") { + val df = Seq( + (1, 2, 3), + (1, 3, 4), + (2, 4, 5), + (2, 5, 6) + ).toDF("a", "b", "c") + + val w = Window.partitionBy("a").orderBy("b") + val selectExprs = Stream( + sum("c").over(w.rowsBetween(Window.unboundedPreceding, Window.currentRow)).as("sumc"), + avg("c").over(w.rowsBetween(Window.unboundedPreceding, Window.currentRow)).as("avgc") + ) + checkAnswer( + df.select(selectExprs: _*), + Seq( + Row(3, 3), + Row(7, 3.5), + Row(5, 5), + Row(11, 5.5) + ) + ) + } } From f084e4d43c04ef8b044741f05b8082e1f1118daa Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Mon, 28 Mar 2022 00:59:21 +0800 Subject: [PATCH 054/535] [SPARK-38665][BUILD] Upgrade jackson due to CVE-2020-36518 ### What changes were proposed in this pull request? Upgrade jackson due to CVE-2020-36518 ### Why are the changes needed? https://github.com/FasterXML/jackson-databind/issues/2816 only jackson-databind has a 2.13.2.1 release other jackson jars should stay at 2.13.2 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests. Closes #35981 from pan3793/jackson. Authored-by: Cheng Pan Signed-off-by: Yuming Wang (cherry picked from commit c952b83deee3e1063b237a1253b65f3b739343a7) Signed-off-by: Yuming Wang --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 4 ++-- dev/deps/spark-deps-hadoop-3-hive-2.3 | 4 ++-- pom.xml | 8 +++++++- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index 06d5939be654b..0202d9fff8803 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -115,10 +115,10 @@ ivy/2.5.0//ivy-2.5.0.jar jackson-annotations/2.13.2//jackson-annotations-2.13.2.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.13.2//jackson-core-2.13.2.jar -jackson-databind/2.13.2//jackson-databind-2.13.2.jar +jackson-databind/2.13.2.1//jackson-databind-2.13.2.1.jar jackson-dataformat-cbor/2.13.2//jackson-dataformat-cbor-2.13.2.jar jackson-dataformat-yaml/2.13.2//jackson-dataformat-yaml-2.13.2.jar -jackson-datatype-jsr310/2.13.1//jackson-datatype-jsr310-2.13.1.jar +jackson-datatype-jsr310/2.13.2//jackson-datatype-jsr310-2.13.2.jar jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar jackson-module-scala_2.12/2.13.2//jackson-module-scala_2.12-2.13.2.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 2e9b0e3aa21a6..600caf9dc9464 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -104,10 +104,10 @@ ivy/2.5.0//ivy-2.5.0.jar jackson-annotations/2.13.2//jackson-annotations-2.13.2.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.13.2//jackson-core-2.13.2.jar -jackson-databind/2.13.2//jackson-databind-2.13.2.jar +jackson-databind/2.13.2.1//jackson-databind-2.13.2.1.jar jackson-dataformat-cbor/2.13.2//jackson-dataformat-cbor-2.13.2.jar jackson-dataformat-yaml/2.13.2//jackson-dataformat-yaml-2.13.2.jar -jackson-datatype-jsr310/2.13.1//jackson-datatype-jsr310-2.13.1.jar +jackson-datatype-jsr310/2.13.2//jackson-datatype-jsr310-2.13.2.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar jackson-module-scala_2.12/2.13.2//jackson-module-scala_2.12-2.13.2.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar diff --git a/pom.xml b/pom.xml index a751bdd3462fe..6daeb70965d89 100644 --- a/pom.xml +++ b/pom.xml @@ -176,6 +176,7 @@ true 1.9.13 2.13.2 + 2.13.2.1 1.1.8.4 1.1.2 2.2.1 @@ -926,13 +927,18 @@ com.fasterxml.jackson.core jackson-databind - ${fasterxml.jackson.version} + ${fasterxml.jackson.databind.version} com.fasterxml.jackson.core jackson-annotations ${fasterxml.jackson.version} + + com.fasterxml.jackson.datatype + jackson-datatype-jsr310 + ${fasterxml.jackson.version} + From bcd01bb56f5d3eb78ad490b95a9f16f6d586a92d Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Mon, 28 Mar 2022 11:47:49 +0800 Subject: [PATCH 055/535] [SPARK-38616][SQL] Keep track of SQL query text in Catalyst TreeNode ### What changes were proposed in this pull request? Spark SQL uses the class Origin for tracking the position of each TreeNode in the SQL query text. When there is a parser error, we can show the position info in the error message: ``` > sql("create tabe foo(i int)") org.apache.spark.sql.catalyst.parser.ParseException: no viable alternative at input 'create tabe'(line 1, pos 7) == SQL == create tabe foo(i int) -------^^^ ``` It contains two fields: line and startPosition. This is enough for the parser since the SQL query text is known. However, the SQL query text is unknown in the execution phase. Spark SQL can't show the problematic SQL clause on ANSI runtime failures. This PR is to include the query text in Origin. After this, we can provide details in the error messages of Expressions which can throw runtime exceptions when ANSI mode is on. ### Why are the changes needed? Currently, there is not enough error context for runtime ANSI failures. In the following example, the error message only tells that there is a "divide by zero" error, without pointing out where the exact SQL statement is. ``` SELECT ws2.web_sales / ws1.web_sales web_q1_q2, ss2.store_sales / ss1.store_sales store_q1_q2, ws3.web_sales / ws2.web_sales web_q2_q3, ss3.store_sales / ss2.store_sales store_q2_q3 FROM ss ss1, ss ss2, ss ss3, ws ws1, ws ws2, ws ws3 WHERE .... AND CASE WHEN ws1.web_sales > 0 THEN ws2.web_sales / ws1.web_sales ELSE NULL END > CASE WHEN ss1.store_sales > 0 THEN ss2.store_sales / ss1.store_sales ELSE NULL END AND CASE WHEN ws2.web_sales > 0 THEN ws3.web_sales / ws2.web_sales ELSE NULL END > CASE WHEN ss2.store_sales > 0 THEN ss3.store_sales / ss2.store_sales ELSE NULL END ORDER BY ss1.ca_county ``` ``` org.apache.spark.SparkArithmeticException: divide by zero at org.apache.spark.sql.errors.QueryExecutionErrors$.divideByZeroError(QueryExecutionErrors.scala:140) at org.apache.spark.sql.catalyst.expressions.DivModLike.eval(arithmetic.scala:437) at org.apache.spark.sql.catalyst.expressions.DivModLike.eval$(arithmetic.scala:425) at org.apache.spark.sql.catalyst.expressions.Divide.eval(arithmetic.scala:534) ``` This PR is the initial PR for the project https://issues.apache.org/jira/browse/SPARK-38615 The project is able to provide such error context: ``` org.apache.spark.SparkArithmeticException: divide by zero SparkArithmeticException: divide by zero. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 2, position 43) == ws2.web_sales / ws1.web_sales web_q1_q2, ss2.store_sales / ss1.store_sales store_q1_q2 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? UT Closes #35936 from gengliangwang/trackText. Lead-authored-by: Gengliang Wang Co-authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit ecfe049cc271a3037c5e697be1a1e6af8f61f854) Signed-off-by: Gengliang Wang --- .../sql/catalyst/catalog/SessionCatalog.scala | 9 ++- .../sql/catalyst/parser/ParseDriver.scala | 26 ++++++--- .../sql/catalyst/parser/ParserUtils.scala | 28 +++++++++- .../spark/sql/catalyst/trees/TreeNode.scala | 12 +++- .../catalyst/parser/ParserUtilsSuite.scala | 56 +++++++++++++++++++ .../spark/sql/execution/SQLViewSuite.scala | 55 ++++++++++++++++++ 6 files changed, 174 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 5872f2ab925dd..322302e8a6f05 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder import org.apache.spark.sql.catalyst.expressions.{Alias, Expression, ExpressionInfo, UpCast} import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException, ParserInterface} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, SubqueryAlias, View} +import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin} import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils} import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.errors.QueryCompilationErrors @@ -872,9 +873,15 @@ class SessionCatalog( throw new IllegalStateException("Invalid view without text.") } val viewConfigs = metadata.viewSQLConfigs + val origin = Origin( + objectType = Some("VIEW"), + objectName = Some(metadata.qualifiedName) + ) val parsedPlan = SQLConf.withExistingConf(View.effectiveSQLConf(viewConfigs, isTempView)) { try { - parser.parseQuery(viewText) + CurrentOrigin.withOrigin(origin) { + parser.parseQuery(viewText) + } } catch { case _: ParseException => throw QueryCompilationErrors.invalidViewText(viewText, metadata.qualifiedName) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index 82be4d61e911e..bf0ee9c115d8f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -26,6 +26,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.{FunctionIdentifier, SQLConfHelper, TableIdentifier} import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.parser.ParserUtils.withOrigin import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.trees.Origin import org.apache.spark.sql.errors.QueryParsingErrors @@ -43,7 +44,10 @@ abstract class AbstractSqlParser extends ParserInterface with SQLConfHelper with /** Creates Expression for a given SQL string. */ override def parseExpression(sqlText: String): Expression = parse(sqlText) { parser => - astBuilder.visitSingleExpression(parser.singleExpression()) + val ctx = parser.singleExpression() + withOrigin(ctx, Some(sqlText)) { + astBuilder.visitSingleExpression(ctx) + } } /** Creates TableIdentifier for a given SQL string. */ @@ -75,16 +79,22 @@ abstract class AbstractSqlParser extends ParserInterface with SQLConfHelper with /** Creates LogicalPlan for a given SQL string of query. */ override def parseQuery(sqlText: String): LogicalPlan = parse(sqlText) { parser => - astBuilder.visitQuery(parser.query()) + val ctx = parser.query() + withOrigin(ctx, Some(sqlText)) { + astBuilder.visitQuery(ctx) + } } /** Creates LogicalPlan for a given SQL string. */ override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { parser => - astBuilder.visitSingleStatement(parser.singleStatement()) match { - case plan: LogicalPlan => plan - case _ => - val position = Origin(None, None) - throw QueryParsingErrors.sqlStatementUnsupportedError(sqlText, position) + val ctx = parser.singleStatement() + withOrigin(ctx, Some(sqlText)) { + astBuilder.visitSingleStatement(ctx) match { + case plan: LogicalPlan => plan + case _ => + val position = Origin(None, None) + throw QueryParsingErrors.sqlStatementUnsupportedError(sqlText, position) + } } } @@ -271,7 +281,7 @@ class ParseException( val builder = new StringBuilder builder ++= "\n" ++= message start match { - case Origin(Some(l), Some(p)) => + case Origin(Some(l), Some(p), _, _, _, _, _) => builder ++= s"(line $l, pos $p)\n" command.foreach { cmd => val (above, below) = cmd.split("\n").splitAt(l) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala index 694ddfcad1cb1..048012ac50e9d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala @@ -114,6 +114,24 @@ object ParserUtils { Origin(opt.map(_.getLine), opt.map(_.getCharPositionInLine)) } + def positionAndText( + startToken: Token, + stopToken: Token, + sqlText: String, + objectType: Option[String], + objectName: Option[String]): Origin = { + val startOpt = Option(startToken) + val stopOpt = Option(stopToken) + Origin( + line = startOpt.map(_.getLine), + startPosition = startOpt.map(_.getCharPositionInLine), + startIndex = startOpt.map(_.getStartIndex), + stopIndex = stopOpt.map(_.getStopIndex), + sqlText = Some(sqlText), + objectType = objectType, + objectName = objectName) + } + /** Validate the condition. If it doesn't throw a parse exception. */ def validate(f: => Boolean, message: String, ctx: ParserRuleContext): Unit = { if (!f) { @@ -126,9 +144,15 @@ object ParserUtils { * registered origin. This method restores the previously set origin after completion of the * closure. */ - def withOrigin[T](ctx: ParserRuleContext)(f: => T): T = { + def withOrigin[T](ctx: ParserRuleContext, sqlText: Option[String] = None)(f: => T): T = { val current = CurrentOrigin.get - CurrentOrigin.set(position(ctx.getStart)) + val text = sqlText.orElse(current.sqlText) + if (text.isEmpty) { + CurrentOrigin.set(position(ctx.getStart)) + } else { + CurrentOrigin.set(positionAndText(ctx.getStart, ctx.getStop, text.get, + current.objectType, current.objectName)) + } try { f } finally { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala index ac60e18b2c1bf..84d92c15f24be 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala @@ -52,9 +52,19 @@ import org.apache.spark.util.collection.BitSet /** Used by [[TreeNode.getNodeNumbered]] when traversing the tree for a given number */ private class MutableInt(var i: Int) +/** + * Contexts of TreeNodes, including location, SQL text, object type and object name. + * The only supported object type is "VIEW" now. In the future, we may support SQL UDF or other + * objects which contain SQL text. + */ case class Origin( line: Option[Int] = None, - startPosition: Option[Int] = None) + startPosition: Option[Int] = None, + startIndex: Option[Int] = None, + stopIndex: Option[Int] = None, + sqlText: Option[String] = None, + objectType: Option[String] = None, + objectName: Option[String] = None) /** * Provides a location for TreeNodes to ask about the context of their origin. For example, which diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala index 35f09001fd217..818ddb63104a5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala @@ -53,6 +53,24 @@ class ParserUtilsSuite extends SparkFunSuite { parser.statement().asInstanceOf[CreateNamespaceContext] } + val castClause = + """ + |CAST(1 /* Convert + | INT + | AS + | String */ as STRING)""".stripMargin.trim + + val castQuery = + s""" + |SELECT + |$castClause /* SHOULD NOT INCLUDE THIS */ + | AS s + |""".stripMargin + + val castQueryContext = buildContext(castQuery) { parser => + parser.statement().asInstanceOf[StatementDefaultContext] + } + val emptyContext = buildContext("") { parser => parser.statement } @@ -199,4 +217,42 @@ class ParserUtilsSuite extends SparkFunSuite { assert(origin == Origin(Some(3), Some(27))) assert(CurrentOrigin.get == current) } + + private def findCastContext(ctx: ParserRuleContext): Option[CastContext] = { + ctx match { + case context: CastContext => + Some(context) + case _ => + val it = ctx.children.iterator() + while(it.hasNext) { + it.next() match { + case p: ParserRuleContext => + val childResult = findCastContext(p) + if (childResult.isDefined) { + return childResult + } + case _ => + } + } + None + } + } + + test("withOrigin: setting SQL text") { + withOrigin(castQueryContext, Some(castQuery)) { + assert(CurrentOrigin.get.sqlText.contains(castQuery)) + val castContext = findCastContext(castQueryContext) + assert(castContext.isDefined) + withOrigin(castContext.get) { + val current = CurrentOrigin.get + assert(current.sqlText.contains(castQuery)) + assert(current.startIndex.isDefined) + assert(current.stopIndex.isDefined) + // With sqlText, startIndex, stopIndex, we can get the corresponding SQL text of the + // Cast clause. + assert(current.sqlText.get.substring(current.startIndex.get, current.stopIndex.get + 1) == + castClause) + } + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala index 68eb15b4ae097..77513c560f0d7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala @@ -20,7 +20,10 @@ package org.apache.spark.sql.execution import org.apache.spark.SparkException import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.expressions.{Add, Alias, Divide} import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.catalyst.plans.logical.Project +import org.apache.spark.sql.catalyst.trees.Origin import org.apache.spark.sql.internal.SQLConf._ import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} @@ -909,6 +912,58 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils { } } + test("CurrentOrigin is correctly set in and out of the View") { + withTable("t") { + Seq((1, 1), (2, 2)).toDF("a", "b").write.format("parquet").saveAsTable("t") + Seq("VIEW", "TEMPORARY VIEW").foreach { viewType => + val viewId = "v" + withView(viewId) { + val viewText = "SELECT a + b c FROM t" + sql( + s""" + |CREATE $viewType $viewId AS + |-- the body of the view + |$viewText + |""".stripMargin) + val plan = sql("select c / 2.0D d from v").logicalPlan + val add = plan.collectFirst { + case Project(Seq(Alias(a: Add, _)), _) => a + } + assert(add.isDefined) + val qualifiedName = if (viewType == "VIEW") { + s"default.$viewId" + } else { + viewId + } + val expectedAddOrigin = Origin( + line = Some(1), + startPosition = Some(7), + startIndex = Some(7), + stopIndex = Some(11), + sqlText = Some("SELECT a + b c FROM t"), + objectType = Some("VIEW"), + objectName = Some(qualifiedName) + ) + assert(add.get.origin == expectedAddOrigin) + + val divide = plan.collectFirst { + case Project(Seq(Alias(d: Divide, _)), _) => d + } + assert(divide.isDefined) + val expectedDivideOrigin = Origin( + line = Some(1), + startPosition = Some(7), + startIndex = Some(7), + stopIndex = Some(14), + sqlText = Some("select c / 2.0D d from v"), + objectType = None, + objectName = None) + assert(divide.get.origin == expectedDivideOrigin) + } + } + } + } + test("SPARK-37932: view join with same view") { withTable("t") { withView("v1") { From ac939182e0e216dfdd4d787581cf84a27934dd9f Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Mon, 28 Mar 2022 13:25:46 +0800 Subject: [PATCH 056/535] [SPARK-38391][SQL] Datasource v2 supports partial topN push-down ### What changes were proposed in this pull request? Currently , Spark supports push down topN completely . But for some data source (e.g. JDBC ) that have multiple partition , we should preserve partial push down topN. ### Why are the changes needed? Make behavior of sort pushdown correctly. ### Does this PR introduce _any_ user-facing change? 'No'. Just change the inner implement. ### How was this patch tested? New tests. Closes #35710 from beliefer/SPARK-38391. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit a8629a142b1658f520c8c7ab2b4712fd5bc5ecc3) Signed-off-by: Wenchen Fan --- .../sql/connector/read/SupportsPushDownTopN.java | 6 ++++++ .../execution/datasources/v2/PushDownUtils.scala | 11 +++++++---- .../datasources/v2/V2ScanRelationPushDown.scala | 11 ++++++++--- .../datasources/v2/jdbc/JDBCScanBuilder.scala | 2 ++ .../org/apache/spark/sql/jdbc/JDBCV2Suite.scala | 13 ++++++++++--- 5 files changed, 33 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownTopN.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownTopN.java index 0212895fde079..cba1592c4fa14 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownTopN.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownTopN.java @@ -35,4 +35,10 @@ public interface SupportsPushDownTopN extends ScanBuilder { * Pushes down top N to the data source. */ boolean pushTopN(SortOrder[] orders, int limit); + + /** + * Whether the top N is partially pushed or not. If it returns true, then Spark will do top N + * again. This method will only be called when {@link #pushTopN} returns true. + */ + default boolean isPartiallyPushed() { return true; } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala index 1149bff7d2da7..f72310b5d7afa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala @@ -129,11 +129,14 @@ object PushDownUtils extends PredicateHelper { /** * Pushes down top N to the data source Scan */ - def pushTopN(scanBuilder: ScanBuilder, order: Array[SortOrder], limit: Int): Boolean = { + def pushTopN( + scanBuilder: ScanBuilder, + order: Array[SortOrder], + limit: Int): (Boolean, Boolean) = { scanBuilder match { - case s: SupportsPushDownTopN => - s.pushTopN(order, limit) - case _ => false + case s: SupportsPushDownTopN if s.pushTopN(order, limit) => + (true, s.isPartiallyPushed) + case _ => (false, false) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala index eaa30f90b77f5..c8ef8b00d0cf9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala @@ -381,11 +381,16 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper wit val newOrder = order.map(replaceAlias(_, aliasMap)).asInstanceOf[Seq[SortOrder]] val orders = DataSourceStrategy.translateSortOrders(newOrder) if (orders.length == order.length) { - val topNPushed = PushDownUtils.pushTopN(sHolder.builder, orders.toArray, limit) - if (topNPushed) { + val (isPushed, isPartiallyPushed) = + PushDownUtils.pushTopN(sHolder.builder, orders.toArray, limit) + if (isPushed) { sHolder.pushedLimit = Some(limit) sHolder.sortOrders = orders - operation + if (isPartiallyPushed) { + s + } else { + operation + } } else { s } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala index 475f563856f82..0a1542a42956d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala @@ -146,6 +146,8 @@ case class JDBCScanBuilder( false } + override def isPartiallyPushed(): Boolean = jdbcOptions.numPartitions.map(_ > 1).getOrElse(false) + override def pruneColumns(requiredSchema: StructType): Unit = { // JDBC doesn't support nested column pruning. // TODO (SPARK-32593): JDBC support nested column and nested column pruning. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index 3ab87ee3387e4..afbdc604b8a18 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -199,8 +199,15 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel "PushedFilters: [], PushedTopN: ORDER BY [salary ASC NULLS FIRST] LIMIT 1, ") checkAnswer(df1, Seq(Row(1, "cathy", 9000.00, 1200.0, false))) - val df2 = spark.read.table("h2.test.employee") - .where($"dept" === 1).orderBy($"salary").limit(1) + val df2 = spark.read + .option("partitionColumn", "dept") + .option("lowerBound", "0") + .option("upperBound", "2") + .option("numPartitions", "1") + .table("h2.test.employee") + .where($"dept" === 1) + .orderBy($"salary") + .limit(1) checkSortRemoved(df2) checkPushedInfo(df2, "PushedFilters: [DEPT IS NOT NULL, DEPT = 1], " + "PushedTopN: ORDER BY [salary ASC NULLS FIRST] LIMIT 1, ") @@ -215,7 +222,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel .filter($"dept" > 1) .orderBy($"salary".desc) .limit(1) - checkSortRemoved(df3) + checkSortRemoved(df3, false) checkPushedInfo(df3, "PushedFilters: [DEPT IS NOT NULL, DEPT > 1], " + "PushedTopN: ORDER BY [salary DESC NULLS LAST] LIMIT 1, ") checkAnswer(df3, Seq(Row(2, "alex", 12000.00, 1200.0, false))) From cc85b1ee138eeb2ea9aca9c545d48ab2c5b49c1c Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 28 Mar 2022 13:43:31 +0800 Subject: [PATCH 057/535] [SPARK-38623][SQL] Add more comments and tests for HashShuffleSpec ### What changes were proposed in this pull request? Add more comments and tests to explain the special handling of duplicated cluster keys. ### Why are the changes needed? improve code readability ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests. Closes #35937 from cloud-fan/join. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan (cherry picked from commit c0cb5bce6623e98fa5161c1e3e866e730de87fa5) Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/plans/physical/partitioning.scala | 7 ++++++- .../org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala | 8 ++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala index 78d153c5a0e83..e4ff14b17a20c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala @@ -516,6 +516,11 @@ case class HashShuffleSpec( * A sequence where each element is a set of positions of the hash partition key to the cluster * keys. For instance, if cluster keys are [a, b, b] and hash partition keys are [a, b], the * result will be [(0), (1, 2)]. + * + * This is useful to check compatibility between two `HashShuffleSpec`s. If the cluster keys are + * [a, b, b] and [x, y, z] for the two join children, and the hash partition keys are + * [a, b] and [x, z], they are compatible. With the positions, we can do the compatibility check + * by looking at if the positions of hash partition keys from two sides have overlapping. */ lazy val hashKeyPositions: Seq[mutable.BitSet] = { val distKeyToPos = mutable.Map.empty[Expression, mutable.BitSet] @@ -533,7 +538,7 @@ case class HashShuffleSpec( // 1. both distributions have the same number of clustering expressions // 2. both partitioning have the same number of partitions // 3. both partitioning have the same number of expressions - // 4. each pair of expression from both has overlapping positions in their + // 4. each pair of partitioning expression from both sides has overlapping positions in their // corresponding distributions. distribution.clustering.length == otherDistribution.clustering.length && partitioning.numPartitions == otherPartitioning.numPartitions && diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala index 74ec949fe4470..7e11d4f68392f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala @@ -91,6 +91,14 @@ class ShuffleSpecSuite extends SparkFunSuite with SQLHelper { expected = true ) + checkCompatible( + HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10), + ClusteredDistribution(Seq($"a", $"b", $"b"))), + HashShuffleSpec(HashPartitioning(Seq($"a", $"d"), 10), + ClusteredDistribution(Seq($"a", $"c", $"d"))), + expected = true + ) + checkCompatible( HashShuffleSpec(HashPartitioning(Seq($"a", $"b", $"a"), 10), ClusteredDistribution(Seq($"a", $"b", $"b"))), From 212cbf006daafe3b73ed7233362781703d5ef14a Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Mon, 28 Mar 2022 14:17:39 +0800 Subject: [PATCH 058/535] [SPARK-38655][SQL] `OffsetWindowFunctionFrameBase` cannot find the offset row whose input is not-null ### What changes were proposed in this pull request? ``` select x, nth_value(x, 5) IGNORE NULLS over (order by x rows between unbounded preceding and current row) from (select explode(sequence(1, 3)) x) ``` The sql output: ``` null null 3 ``` But it should returns ``` null null null ``` ### Why are the changes needed? Fix the bug UnboundedPrecedingOffsetWindowFunctionFrame works not good. ### Does this PR introduce _any_ user-facing change? 'Yes'. The output will be correct after fix this bug. ### How was this patch tested? New tests. Closes #35971 from beliefer/SPARK-38655. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan --- .../window/WindowFunctionFrame.scala | 26 ++++++++--- .../sql/DataFrameWindowFunctionsSuite.scala | 44 +++++++++++++++++++ 2 files changed, 63 insertions(+), 7 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala index 7d0859564d18e..2b7f702a7f20a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala @@ -157,6 +157,9 @@ abstract class OffsetWindowFunctionFrameBase( /** find the offset row whose input is not null */ protected def findNextRowWithNonNullInput(): Unit = { + // In order to find the offset row whose input is not-null, + // offset < = input.length must be guaranteed. + assert(offset <= input.length) while (skippedNonNullCount < offset && inputIndex < input.length) { val r = WindowFunctionFrame.getNextOrNull(inputIterator) if (!nullCheck(r)) { @@ -165,6 +168,11 @@ abstract class OffsetWindowFunctionFrameBase( } inputIndex += 1 } + if (skippedNonNullCount < offset && inputIndex == input.length) { + // The size of non-null input is less than offset, cannot find the offset row whose input + // is not null. Therefore, reset `nextSelectedRow` with empty row. + nextSelectedRow = EmptyRow + } } override def currentLowerBound(): Int = throw new UnsupportedOperationException() @@ -362,14 +370,18 @@ class UnboundedPrecedingOffsetWindowFunctionFrame( assert(offset > 0) override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = { - resetStates(rows) - if (ignoreNulls) { - findNextRowWithNonNullInput() + if (offset > rows.length) { + fillDefaultValue(EmptyRow) } else { - // drain the first few rows if offset is larger than one - while (inputIndex < offset) { - nextSelectedRow = WindowFunctionFrame.getNextOrNull(inputIterator) - inputIndex += 1 + resetStates(rows) + if (ignoreNulls) { + findNextRowWithNonNullInput() + } else { + // drain the first few rows if offset is larger than one + while (inputIndex < offset) { + nextSelectedRow = WindowFunctionFrame.getNextOrNull(inputIterator) + inputIndex += 1 + } } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala index 4676f8be9113e..25d676f5d93bc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala @@ -710,6 +710,50 @@ class DataFrameWindowFunctionsSuite extends QueryTest Row("a", 4, "x", "x", "y", "x", "x", "y"), Row("b", 1, null, null, null, null, null, null), Row("b", 2, null, null, null, null, null, null))) + + val df2 = Seq( + ("a", 1, "x"), + ("a", 2, "y"), + ("a", 3, "z")). + toDF("key", "order", "value") + checkAnswer( + df2.select( + $"key", + $"order", + nth_value($"value", 2).over(window1), + nth_value($"value", 2, ignoreNulls = true).over(window1), + nth_value($"value", 2).over(window2), + nth_value($"value", 2, ignoreNulls = true).over(window2), + nth_value($"value", 3).over(window1), + nth_value($"value", 3, ignoreNulls = true).over(window1), + nth_value($"value", 3).over(window2), + nth_value($"value", 3, ignoreNulls = true).over(window2), + nth_value($"value", 4).over(window1), + nth_value($"value", 4, ignoreNulls = true).over(window1), + nth_value($"value", 4).over(window2), + nth_value($"value", 4, ignoreNulls = true).over(window2)), + Seq( + Row("a", 1, "y", "y", null, null, "z", "z", null, null, null, null, null, null), + Row("a", 2, "y", "y", "y", "y", "z", "z", null, null, null, null, null, null), + Row("a", 3, "y", "y", "y", "y", "z", "z", "z", "z", null, null, null, null))) + + val df3 = Seq( + ("a", 1, "x"), + ("a", 2, nullStr), + ("a", 3, "z")). + toDF("key", "order", "value") + checkAnswer( + df3.select( + $"key", + $"order", + nth_value($"value", 3).over(window1), + nth_value($"value", 3, ignoreNulls = true).over(window1), + nth_value($"value", 3).over(window2), + nth_value($"value", 3, ignoreNulls = true).over(window2)), + Seq( + Row("a", 1, "z", null, null, null), + Row("a", 2, "z", null, null, null), + Row("a", 3, "z", null, "z", null))) } test("nth_value on descending ordered window") { From a5d6d0570bd60c776bbe1e5e467892e25109315a Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Mon, 28 Mar 2022 22:06:05 +0800 Subject: [PATCH 059/535] [SPARK-38432][SQL][FOLLOWUP] Add test case for push down filter with alias ### What changes were proposed in this pull request? DS V2 pushdown predicates to data source supports column with alias. But Spark missing the test case for push down filter with alias. ### Why are the changes needed? Add test case for push down filter with alias ### Does this PR introduce _any_ user-facing change? 'No'. Just add a test case. ### How was this patch tested? New tests. Closes #35988 from beliefer/SPARK-38432_followup2. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit 65608251f84a7ed8232aa288b9fdd36d0dd5189e) Signed-off-by: Wenchen Fan --- .../test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index afbdc604b8a18..791600a10e34e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -350,6 +350,13 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel "WHEN IS_MANAGER = true THEN FALSE ELSE DEPT > 3 END], ") checkAnswer(df9, Seq(Row(2, "alex", 12000, 1200, false), Row(2, "david", 10000, 1300, true), Row(6, "jen", 12000, 1200, true))) + + val df10 = spark.table("h2.test.people") + .select($"NAME".as("myName"), $"ID".as("myID")) + .filter($"myID" > 1) + checkFiltersRemoved(df10) + checkPushedInfo(df10, "PushedFilters: [ID IS NOT NULL, ID > 1], ") + checkAnswer(df10, Row("mary", 2)) } test("scan with complex filter push-down") { From 8e52fd308430b0d723e5d04b3f88d093b02058b0 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 28 Mar 2022 18:23:12 -0500 Subject: [PATCH 060/535] [SPARK-37853][CORE][SQL][FOLLOWUP] Clean up log4j2 deprecation api usage ### What changes were proposed in this pull request? This pr just fix a previously unnoticed deprecation api usage related to log4j2. ### Why are the changes needed? Clean up log4j2 deprecation api usage ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GA Closes #35796 from LuciferYang/SPARK-37853-FOLLOWUP. Authored-by: yangjie01 Signed-off-by: Sean Owen (cherry picked from commit 84bc452bb59d2b4067b617f8dd32d35f875b1e72) Signed-off-by: Sean Owen --- .../org/apache/spark/launcher/ChildProcAppHandleSuite.java | 3 ++- .../apache/hive/service/cli/operation/LogDivertAppender.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java b/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java index b0525a6ec061f..6a0c5845e80ca 100644 --- a/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java +++ b/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java @@ -28,6 +28,7 @@ import java.util.stream.Collectors; import static java.nio.file.attribute.PosixFilePermission.*; +import org.apache.logging.log4j.core.config.Property; import org.apache.logging.log4j.core.config.plugins.*; import org.apache.logging.log4j.core.Filter; import org.apache.logging.log4j.core.Layout; @@ -249,7 +250,7 @@ protected LogAppender(String name, Filter filter, Layout layout, boolean ignoreExceptions) { - super(name, filter, layout, ignoreExceptions); + super(name, filter, layout, ignoreExceptions, Property.EMPTY_ARRAY); } @Override diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/LogDivertAppender.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/LogDivertAppender.java index ca0fbe7eb67a9..64730f39bf37d 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/LogDivertAppender.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/LogDivertAppender.java @@ -38,6 +38,7 @@ import org.apache.logging.log4j.core.appender.AbstractWriterAppender; import org.apache.logging.log4j.core.appender.WriterManager; import com.google.common.base.Joiner; +import org.apache.logging.log4j.core.config.Property; import org.apache.logging.log4j.message.Message; /** @@ -278,7 +279,7 @@ private static StringLayout initLayout(OperationLog.LoggingLevel loggingMode) { public LogDivertAppender(OperationManager operationManager, OperationLog.LoggingLevel loggingMode) { - super("LogDivertAppender", initLayout(loggingMode), null, false, true, + super("LogDivertAppender", initLayout(loggingMode), null, false, true, Property.EMPTY_ARRAY, new WriterManager(new CharArrayWriter(), "LogDivertAppender", initLayout(loggingMode), true)); From 2b3a089d78c00103a4aaa183ab7f96fed39547e5 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Mar 2022 11:07:22 +0900 Subject: [PATCH 061/535] [SPARK-38680][INFRA] Set upperbound for pandas-stubs in CI ### What changes were proposed in this pull request? `pandas-stubs` released `1.2.0.54` but that seems like havng a breaking change: e.g., ) https://github.com/apache/spark/runs/5729037000?check_suite_focus=true ``` starting mypy annotations test... annotations failed mypy checks: python/pyspark/pandas/ml.py:62: error: Incompatible types in assignment (expression has type "Index", variable has type "MultiIndex") [assignment] python/pyspark/pandas/frame.py:6133: error: unused "type: ignore" comment python/pyspark/pandas/frame.py:6292: error: Incompatible types in assignment (expression has type "Index", variable has type "MultiIndex") [assignment] Found 3 errors in 2 files (checked 325 source files) ``` This PR proposes to pin pandas-stubs version for now to avoid breaking CI. ### Why are the changes needed? To have the stable CI status. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? It should be tested in this CI. Closes #35996 from HyukjinKwon/SPARK-38680. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 2d90659d09b153ebd4100db686d5c63e8dbe63db) Signed-off-by: Hyukjin Kwon --- .github/workflows/build_and_test.yml | 2 +- dev/requirements.txt | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 7baabc779867d..1f5df70cde936 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -503,7 +503,7 @@ jobs: # Jinja2 3.0.0+ causes error when building with Sphinx. # See also https://issues.apache.org/jira/browse/SPARK-35375. python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==21.12b0' - python3.9 -m pip install pandas-stubs + python3.9 -m pip install 'pandas-stubs==1.2.0.53' - name: Install R linter dependencies and SparkR run: | apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev diff --git a/dev/requirements.txt b/dev/requirements.txt index b17694aece12f..22e72d555434b 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -21,7 +21,8 @@ coverage mypy pytest-mypy-plugins==1.9.3 flake8==3.9.0 -pandas-stubs +# See SPARK-38680. +pandas-stubs<1.2.0.54 # Documentation (SQL) mkdocs From fbb9c2024054695adc6c15f52b06b157ea5bd211 Mon Sep 17 00:00:00 2001 From: Harutaka Kawamura Date: Tue, 29 Mar 2022 11:08:48 +0900 Subject: [PATCH 062/535] [MINOR][PYTHON] Fix `MultilayerPerceptronClassifierTest.test_raw_and_probability_prediction` ### What changes were proposed in this pull request? - A follow-up for https://github.com/apache/spark/pull/35778 - Increase `rtol` to de-flake the test. https://github.com/apache/spark/runs/5725350556 ``` Not equal to tolerance rtol=0.15, atol=1e-08 Mismatched elements: 3 / 3 (100%) Max absolute difference: 4.39314499 Max relative difference: 0.23356037 x: array([-14.190332, -10.063731, 26.570715]) y: array([-11.608192, -8.15828 , 22.17757 ]) ``` ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? Closes #35997 from harupy/fix-test_raw_and_probability_prediction-2. Authored-by: Harutaka Kawamura Signed-off-by: Hyukjin Kwon (cherry picked from commit 264dbd7641ee30b01212d10de98f29c8be5c8943) Signed-off-by: Hyukjin Kwon --- python/pyspark/ml/tests/test_algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py index 08da8592c043d..e677e79cecb85 100644 --- a/python/pyspark/ml/tests/test_algorithms.py +++ b/python/pyspark/ml/tests/test_algorithms.py @@ -106,7 +106,7 @@ def test_raw_and_probability_prediction(self): np.testing.assert_allclose( result.rawPrediction, expected_rawPrediction, - rtol=0.15, + rtol=0.3, # Use the same default value as `np.allclose` atol=1e-08, ) From 2a19c29aac76105fb06b09f26b84e58361715e22 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Mar 2022 12:00:22 +0900 Subject: [PATCH 063/535] [SPARK-38656][UI][PYTHON] Show options for Pandas API on Spark in UI ### What changes were proposed in this pull request? This PR proposes to show options for Pandas API on Spark in UI. The options in Pandas API on Spark (https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/options.html#available-options) are currently not shown properly in "SQL Properties". It is shown, under "SQL Properties", as something like `pandas_on_Spark.compute.ops_on_diff_frames` as a key and `false` as its value that is the format internally used. This PR extracts pandas-on-Spark specific options and show separately. Additionally, this PR also proposes to hide "SQL Properties" is none of configurations set. ### Why are the changes needed? For better readability and UX for pandas API on Spark. ### Does this PR introduce _any_ user-facing change? Yes. Now, we're able to show pandas-on-Spark specific options under "Pandas API on Spark: Properties" as below: Screen Shot 2022-03-28 at 9 31 54 AM Screen Shot 2022-03-28 at 9 31 20 AM ### How was this patch tested? Manually tested as above. Closes #35972 from HyukjinKwon/SPARK-38656. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 94abcd7037b05ac5068ce421e07306d45e957246) Signed-off-by: Hyukjin Kwon --- .../sql/execution/ui/ExecutionPage.scala | 53 ++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala index 6aacec34eb49e..5734760c5fdea 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala @@ -21,12 +21,18 @@ import javax.servlet.http.HttpServletRequest import scala.xml.Node +import org.json4s.JNull +import org.json4s.JsonAST.{JBool, JString} +import org.json4s.jackson.JsonMethods.parse + import org.apache.spark.JobExecutionStatus import org.apache.spark.internal.Logging import org.apache.spark.ui.{UIUtils, WebUIPage} class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging { + private val pandasOnSparkConfPrefix = "pandas_on_Spark." + private val sqlStore = parent.sqlStore override def render(request: HttpServletRequest): Seq[Node] = { @@ -82,7 +88,11 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging summary ++ planVisualization(request, metrics, graph) ++ physicalPlanDescription(executionUIData.physicalPlanDescription) ++ - modifiedConfigs(executionUIData.modifiedConfigs) + modifiedConfigs( + executionUIData.modifiedConfigs.filterKeys( + !_.startsWith(pandasOnSparkConfPrefix)).toMap) ++ + modifiedPandasOnSparkConfigs( + executionUIData.modifiedConfigs.filterKeys(_.startsWith(pandasOnSparkConfPrefix)).toMap) }.getOrElse {
        No information to display for query {executionId}
        } @@ -148,6 +158,8 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging } private def modifiedConfigs(modifiedConfigs: Map[String, String]): Seq[Node] = { + if (Option(modifiedConfigs).exists(_.isEmpty)) return Nil + val configs = UIUtils.listingTable( propertyHeader, propertyRow, @@ -168,6 +180,45 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging
        } + private def modifiedPandasOnSparkConfigs( + modifiedPandasOnSparkConfigs: Map[String, String]): Seq[Node] = { + if (Option(modifiedPandasOnSparkConfigs).exists(_.isEmpty)) return Nil + + val modifiedOptions = modifiedPandasOnSparkConfigs.toSeq.map { case (k, v) => + // Remove prefix. + val key = k.slice(pandasOnSparkConfPrefix.length, k.length) + // The codes below is a simple version of Python's repr(). + // Pandas API on Spark does not support other types in the options yet. + val pyValue = parse(v) match { + case JNull => "None" + case JBool(v) => v.toString.capitalize + case JString(s) => s"'$s'" + case _ => v + } + (key, pyValue) + } + + val configs = UIUtils.listingTable( + propertyHeader, + propertyRow, + modifiedOptions.sorted, + fixedWidth = true + ) + +
        + + + Pandas API Properties + + +
        +
        + } + private def propertyHeader = Seq("Name", "Value") private def propertyRow(kv: (String, String)) =
        } From 7ce2d1555d4df0681656b330e5289a7ebd2aa58f Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Tue, 29 Mar 2022 11:25:58 +0800 Subject: [PATCH 064/535] [SPARK-38633][SQL] Support push down Cast to JDBC data source V2 ### What changes were proposed in this pull request? Cast is very useful and Spark always use Cast to convert data type automatically. ### Why are the changes needed? Let more aggregates and filters could be pushed down. ### Does this PR introduce _any_ user-facing change? 'Yes'. This PR after cut off 3.3.0. ### How was this patch tested? New tests. Closes #35947 from beliefer/SPARK-38633. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit cd222db3b7e31108c154eba5b0a2afb224a75ff5) Signed-off-by: Wenchen Fan --- .../spark/sql/connector/expressions/Cast.java | 45 +++++++++++++++++++ .../util/V2ExpressionSQLBuilder.java | 9 ++++ .../catalyst/util/V2ExpressionBuilder.scala | 6 ++- .../apache/spark/sql/jdbc/JDBCV2Suite.scala | 21 ++++++++- 4 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Cast.java diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Cast.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Cast.java new file mode 100644 index 0000000000000..26b97b46fe2ef --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Cast.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.expressions; + +import java.io.Serializable; + +import org.apache.spark.annotation.Evolving; +import org.apache.spark.sql.types.DataType; + +/** + * Represents a cast expression in the public logical expression API. + * + * @since 3.3.0 + */ +@Evolving +public class Cast implements Expression, Serializable { + private Expression expression; + private DataType dataType; + + public Cast(Expression expression, DataType dataType) { + this.expression = expression; + this.dataType = dataType; + } + + public Expression expression() { return expression; } + public DataType dataType() { return dataType; } + + @Override + public Expression[] children() { return new Expression[]{ expression() }; } +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index 1df01d29cbdd1..c8d924db75aed 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -21,10 +21,12 @@ import java.util.List; import java.util.stream.Collectors; +import org.apache.spark.sql.connector.expressions.Cast; import org.apache.spark.sql.connector.expressions.Expression; import org.apache.spark.sql.connector.expressions.NamedReference; import org.apache.spark.sql.connector.expressions.GeneralScalarExpression; import org.apache.spark.sql.connector.expressions.Literal; +import org.apache.spark.sql.types.DataType; /** * The builder to generate SQL from V2 expressions. @@ -36,6 +38,9 @@ public String build(Expression expr) { return visitLiteral((Literal) expr); } else if (expr instanceof NamedReference) { return visitNamedReference((NamedReference) expr); + } else if (expr instanceof Cast) { + Cast cast = (Cast) expr; + return visitCast(build(cast.expression()), cast.dataType()); } else if (expr instanceof GeneralScalarExpression) { GeneralScalarExpression e = (GeneralScalarExpression) expr; String name = e.name(); @@ -167,6 +172,10 @@ protected String visitBinaryArithmetic(String name, String l, String r) { return l + " " + name + " " + r; } + protected String visitCast(String l, DataType dataType) { + return "CAST(" + l + " AS " + dataType.typeName() + ")"; + } + protected String visitAnd(String name, String l, String r) { return "(" + l + ") " + name + " (" + r + ")"; } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala index fbd6884358b0a..5fd01ac5636b1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala @@ -17,8 +17,8 @@ package org.apache.spark.sql.catalyst.util -import org.apache.spark.sql.catalyst.expressions.{Add, And, BinaryComparison, BinaryOperator, BitwiseAnd, BitwiseNot, BitwiseOr, BitwiseXor, CaseWhen, Contains, Divide, EndsWith, EqualTo, Expression, In, InSet, IsNotNull, IsNull, Literal, Multiply, Not, Or, Predicate, Remainder, StartsWith, StringPredicate, Subtract, UnaryMinus} -import org.apache.spark.sql.connector.expressions.{Expression => V2Expression, FieldReference, GeneralScalarExpression, LiteralValue} +import org.apache.spark.sql.catalyst.expressions.{Add, And, BinaryComparison, BinaryOperator, BitwiseAnd, BitwiseNot, BitwiseOr, BitwiseXor, CaseWhen, Cast, Contains, Divide, EndsWith, EqualTo, Expression, In, InSet, IsNotNull, IsNull, Literal, Multiply, Not, Or, Predicate, Remainder, StartsWith, StringPredicate, Subtract, UnaryMinus} +import org.apache.spark.sql.connector.expressions.{Cast => V2Cast, Expression => V2Expression, FieldReference, GeneralScalarExpression, LiteralValue} import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse, AlwaysTrue, And => V2And, Not => V2Not, Or => V2Or, Predicate => V2Predicate} import org.apache.spark.sql.execution.datasources.PushableColumn import org.apache.spark.sql.types.BooleanType @@ -93,6 +93,8 @@ class V2ExpressionBuilder( } else { None } + case Cast(child, dataType, _, true) => + generateExpression(child).map(v => new V2Cast(v, dataType)) case and: And => // AND expects predicate val l = generateExpression(and.left, true) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index 791600a10e34e..a5e3a71f6cfe5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -359,7 +359,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkAnswer(df10, Row("mary", 2)) } - test("scan with complex filter push-down") { + test("scan with filter push-down with ansi mode") { Seq(false, true).foreach { ansiMode => withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiMode.toString) { val df = spark.table("h2.test.people").filter($"id" + 1 > 1) @@ -411,6 +411,25 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkPushedInfo(df3, expectedPlanFragment3) checkAnswer(df3, Seq(Row(1, "cathy", 9000, 1200, false), Row(2, "david", 10000, 1300, true))) + + val df4 = spark.table("h2.test.employee") + .filter(($"salary" > 1000d).and($"salary" < 12000d)) + + checkFiltersRemoved(df4, ansiMode) + + df4.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + val expected_plan_fragment = if (ansiMode) { + "PushedFilters: [SALARY IS NOT NULL, " + + "CAST(SALARY AS double) > 1000.0, CAST(SALARY AS double) < 12000.0], " + } else { + "PushedFilters: [SALARY IS NOT NULL], " + } + checkKeywordsExistsInExplain(df4, expected_plan_fragment) + } + + checkAnswer(df4, Seq(Row(1, "amy", 10000, 1000, true), + Row(1, "cathy", 9000, 1200, false), Row(2, "david", 10000, 1300, true))) } } } From c77f044c299fe6ffc6758b1548491e6e8f5da6a1 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 29 Mar 2022 15:15:48 +0900 Subject: [PATCH 065/535] [SPARK-38657][UI][SQL] Rename 'SQL' to 'SQL / DataFrame' in SQL UI page ### What changes were proposed in this pull request? This PR proposes to rename, in SQL UI page, - `SQL` -> `SQL / DataFrame` ### Why are the changes needed? - DataFrame executions are also included in this page. - Spark ML users will run DataFrame-based MLlib API, but they will have to check the "SQL" tab. - Pandas API on Spark arguably has no link to SQL itself conceptually. It makes less sense to users of pandas API. ### Does this PR introduce _any_ user-facing change? Yes. After this change, the rename proposed above will be applied as below: Screen Shot 2022-03-28 at 10 01 38 AM Screen Shot 2022-03-28 at 10 01 45 AM ### How was this patch tested? Manually tested as above. Closes #35973 from HyukjinKwon/SPARK-38657. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 3e12ec975c2ce75ef31f82dad22d5b5ecf3808ec) Signed-off-by: Hyukjin Kwon --- core/src/main/resources/org/apache/spark/ui/static/webui.css | 2 ++ .../org/apache/spark/sql/execution/ui/AllExecutionsPage.scala | 2 +- .../scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala | 2 +- .../main/scala/org/apache/spark/sql/execution/ui/SQLTab.scala | 2 ++ 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css index 1185b9fd8e3dd..0252bc800471f 100755 --- a/core/src/main/resources/org/apache/spark/ui/static/webui.css +++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css @@ -88,12 +88,14 @@ a:not([href]):hover { height: 50px; padding: 10px 15px 10px; line-height: 2; + white-space: nowrap; } .navbar .navbar-nav .nav-item.active .nav-link { background-color: #e5e5e5; box-shadow: inset 0 3px 8px rgba(0, 0, 0, 0.125); color: #555555; + white-space: nowrap; } table.sortable thead { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala index 71bfc98b9eeb7..b3f23cd1b5e07 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala @@ -147,7 +147,7 @@ private[ui] class AllExecutionsPage(parent: SQLTab) extends WebUIPage("") with L - UIUtils.headerSparkPage(request, "SQL", summary ++ content, parent) + UIUtils.headerSparkPage(request, "SQL / DataFrame", summary ++ content, parent) } private def executionsTable( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala index 5734760c5fdea..498bb2a6c1c99 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala @@ -171,7 +171,7 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging - SQL Properties + SQL / DataFrame Properties
        CommandDescription
        3.3.02.3.0
        spark.shuffle.service.removeShufflefalse + Whether to use the ExternalShuffleService for deleting shuffle blocks for + deallocated executors when the shuffle is no longer needed. Without this enabled, + shuffle data on executors that are deallocated will remain on disk until the + application ends. + 3.3.0
        spark.shuffle.maxChunksBeingTransferred Long.MAX_VALUE
        {kv._1}{kv._2}
        @@ -457,7 +457,7 @@ In addition to the types listed above, it supports reading `union` types. The fo 3. `union(something, null)`, where something is any supported Avro type. This will be mapped to the same Spark SQL type as that of something, with nullable set to true. All other union types are considered complex. They will be mapped to StructType where field names are member0, member1, etc., in accordance with members of the union. This is consistent with the behavior when converting between Avro and Parquet. -It also supports reading the following Avro [logical types](https://avro.apache.org/docs/1.10.2/spec.html#Logical+Types): +It also supports reading the following Avro [logical types](https://avro.apache.org/docs/1.11.0/spec.html#Logical+Types):
        Avro typeSpark SQL type
        diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala index 9fe50079b24b1..48b2c3481a6ee 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala @@ -77,14 +77,14 @@ private[sql] class AvroOptions( /** * Top level record name in write result, which is required in Avro spec. - * See https://avro.apache.org/docs/1.10.2/spec.html#schema_record . + * See https://avro.apache.org/docs/1.11.0/spec.html#schema_record . * Default value is "topLevelRecord" */ val recordName: String = parameters.getOrElse("recordName", "topLevelRecord") /** * Record namespace in write result. Default value is "". - * See Avro spec for details: https://avro.apache.org/docs/1.10.2/spec.html#schema_record . + * See Avro spec for details: https://avro.apache.org/docs/1.11.0/spec.html#schema_record . */ val recordNamespace: String = parameters.getOrElse("recordNamespace", "") diff --git a/pom.xml b/pom.xml index a8a6a1353fc1b..cee7970ace10d 100644 --- a/pom.xml +++ b/pom.xml @@ -149,6 +149,7 @@ the link to metrics.dropwizard.io in docs/monitoring.md. --> 4.2.7 + 1.11.01.12.0 diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index b536b50532a05..934fa4a1fddd9 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -708,7 +708,7 @@ object DependencyOverrides { dependencyOverrides += "com.google.guava" % "guava" % guavaVersion, dependencyOverrides += "xerces" % "xercesImpl" % "2.12.0", dependencyOverrides += "jline" % "jline" % "2.14.6", - dependencyOverrides += "org.apache.avro" % "avro" % "1.10.2") + dependencyOverrides += "org.apache.avro" % "avro" % "1.11.0") } /** diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala index a23efd8ffd34d..ad0f9a56a8267 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala @@ -895,7 +895,7 @@ class HiveClientSuite(version: String, allVersions: Seq[String]) test("Decimal support of Avro Hive serde") { val tableName = "tab1" // TODO: add the other logical types. For details, see the link: - // https://avro.apache.org/docs/1.8.1/spec.html#Logical+Types + // https://avro.apache.org/docs/1.11.0/spec.html#Logical+Types val avroSchema = """{ | "name": "test_record", From d034ce5b90f44efc701ec8cef4a0cb42cfc18f66 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 31 Mar 2022 00:52:07 -0700 Subject: [PATCH 078/535] [SPARK-38709][SQL] Remove trailing $ from function class name in sql-expression-schema.md ### What changes were proposed in this pull request? It's a bit weird to see class names like `CeilExpressionBuilder$` in `sql-expression-schema.md`. This PR removes the trailing `$`. ### Why are the changes needed? code cleanup ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? existing tests Closes #36021 from cloud-fan/minor. Authored-by: Wenchen Fan Signed-off-by: Dongjoon Hyun (cherry picked from commit 794420fcddcacbb655ea88c4015d0a309b410bda) Signed-off-by: Dongjoon Hyun --- .../catalyst/analysis/FunctionRegistry.scala | 7 ++--- .../sql-functions/sql-expression-schema.md | 26 +++++++++---------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 3737f2ab7f1df..bb4aa701102fe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -170,7 +170,7 @@ object FunctionRegistryBase { if (df != null) { if (df.extended().isEmpty) { new ExpressionInfo( - clazz.getCanonicalName, + clazz.getCanonicalName.stripSuffix("$"), null, name, df.usage(), @@ -184,10 +184,11 @@ object FunctionRegistryBase { } else { // This exists for the backward compatibility with old `ExpressionDescription`s defining // the extended description in `extended()`. - new ExpressionInfo(clazz.getCanonicalName, null, name, df.usage(), df.extended()) + new ExpressionInfo( + clazz.getCanonicalName.stripSuffix("$"), null, name, df.usage(), df.extended()) } } else { - new ExpressionInfo(clazz.getCanonicalName, name) + new ExpressionInfo(clazz.getCanonicalName.stripSuffix("$"), name) } } } diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 177a78db66c62..644bfa926dafb 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -69,14 +69,14 @@ | org.apache.spark.sql.catalyst.expressions.Cast | timestamp | N/A | N/A | | org.apache.spark.sql.catalyst.expressions.Cast | tinyint | N/A | N/A | | org.apache.spark.sql.catalyst.expressions.Cbrt | cbrt | SELECT cbrt(27.0) | struct | -| org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder$ | ceil | SELECT ceil(-0.1) | struct | -| org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder$ | ceiling | SELECT ceiling(-0.1) | struct | +| org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder | ceil | SELECT ceil(-0.1) | struct | +| org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder | ceiling | SELECT ceiling(-0.1) | struct | | org.apache.spark.sql.catalyst.expressions.Chr | char | SELECT char(65) | struct | | org.apache.spark.sql.catalyst.expressions.Chr | chr | SELECT chr(65) | struct | | org.apache.spark.sql.catalyst.expressions.Coalesce | coalesce | SELECT coalesce(NULL, 1, NULL) | struct | | org.apache.spark.sql.catalyst.expressions.Concat | concat | SELECT concat('Spark', 'SQL') | struct | | org.apache.spark.sql.catalyst.expressions.ConcatWs | concat_ws | SELECT concat_ws(' ', 'Spark', 'SQL') | struct | -| org.apache.spark.sql.catalyst.expressions.ContainsExpressionBuilder$ | contains | SELECT contains('Spark SQL', 'Spark') | struct | +| org.apache.spark.sql.catalyst.expressions.ContainsExpressionBuilder | contains | SELECT contains('Spark SQL', 'Spark') | struct | | org.apache.spark.sql.catalyst.expressions.Conv | conv | SELECT conv('100', 2, 10) | struct | | org.apache.spark.sql.catalyst.expressions.ConvertTimezone | convert_timezone | SELECT convert_timezone('Europe/Amsterdam', 'America/Los_Angeles', timestamp_ntz'2021-12-06 00:00:00') | struct | | org.apache.spark.sql.catalyst.expressions.Cos | cos | SELECT cos(0) | struct | @@ -100,7 +100,7 @@ | org.apache.spark.sql.catalyst.expressions.DateDiff | datediff | SELECT datediff('2009-07-31', '2009-07-30') | struct | | org.apache.spark.sql.catalyst.expressions.DateFormatClass | date_format | SELECT date_format('2016-04-08', 'y') | struct | | org.apache.spark.sql.catalyst.expressions.DateFromUnixDate | date_from_unix_date | SELECT date_from_unix_date(1) | struct | -| org.apache.spark.sql.catalyst.expressions.DatePartExpressionBuilder$ | date_part | SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct | +| org.apache.spark.sql.catalyst.expressions.DatePartExpressionBuilder | date_part | SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct | | org.apache.spark.sql.catalyst.expressions.DateSub | date_sub | SELECT date_sub('2016-07-30', 1) | struct | | org.apache.spark.sql.catalyst.expressions.DayOfMonth | day | SELECT day('2009-07-30') | struct | | org.apache.spark.sql.catalyst.expressions.DayOfMonth | dayofmonth | SELECT dayofmonth('2009-07-30') | struct | @@ -112,7 +112,7 @@ | org.apache.spark.sql.catalyst.expressions.ElementAt | element_at | SELECT element_at(array(1, 2, 3), 2) | struct | | org.apache.spark.sql.catalyst.expressions.Elt | elt | SELECT elt(1, 'scala', 'java') | struct | | org.apache.spark.sql.catalyst.expressions.Encode | encode | SELECT encode('abc', 'utf-8') | struct | -| org.apache.spark.sql.catalyst.expressions.EndsWithExpressionBuilder$ | endswith | SELECT endswith('Spark SQL', 'SQL') | struct | +| org.apache.spark.sql.catalyst.expressions.EndsWithExpressionBuilder | endswith | SELECT endswith('Spark SQL', 'SQL') | struct | | org.apache.spark.sql.catalyst.expressions.EqualNullSafe | <=> | SELECT 2 <=> 2 | struct<(2 <=> 2):boolean> | | org.apache.spark.sql.catalyst.expressions.EqualTo | = | SELECT 2 = 2 | struct<(2 = 2):boolean> | | org.apache.spark.sql.catalyst.expressions.EqualTo | == | SELECT 2 == 2 | struct<(2 = 2):boolean> | @@ -125,7 +125,7 @@ | org.apache.spark.sql.catalyst.expressions.Factorial | factorial | SELECT factorial(5) | struct | | org.apache.spark.sql.catalyst.expressions.FindInSet | find_in_set | SELECT find_in_set('ab','abc,b,ab,c,def') | struct | | org.apache.spark.sql.catalyst.expressions.Flatten | flatten | SELECT flatten(array(array(1, 2), array(3, 4))) | struct> | -| org.apache.spark.sql.catalyst.expressions.FloorExpressionBuilder$ | floor | SELECT floor(-0.1) | struct | +| org.apache.spark.sql.catalyst.expressions.FloorExpressionBuilder | floor | SELECT floor(-0.1) | struct | | org.apache.spark.sql.catalyst.expressions.FormatNumber | format_number | SELECT format_number(12332.123456, 4) | struct | | org.apache.spark.sql.catalyst.expressions.FormatString | format_string | SELECT format_string("Hello World %d %s", 100, "days") | struct | | org.apache.spark.sql.catalyst.expressions.FormatString | printf | SELECT printf("Hello World %d %s", 100, "days") | struct | @@ -156,7 +156,7 @@ | org.apache.spark.sql.catalyst.expressions.JsonObjectKeys | json_object_keys | SELECT json_object_keys('{}') | struct> | | org.apache.spark.sql.catalyst.expressions.JsonToStructs | from_json | SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE') | struct> | | org.apache.spark.sql.catalyst.expressions.JsonTuple | json_tuple | SELECT json_tuple('{"a":1, "b":2}', 'a', 'b') | struct | -| org.apache.spark.sql.catalyst.expressions.LPadExpressionBuilder$ | lpad | SELECT lpad('hi', 5, '??') | struct | +| org.apache.spark.sql.catalyst.expressions.LPadExpressionBuilder | lpad | SELECT lpad('hi', 5, '??') | struct | | org.apache.spark.sql.catalyst.expressions.Lag | lag | SELECT a, b, lag(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.LastDay | last_day | SELECT last_day('2009-01-12') | struct | | org.apache.spark.sql.catalyst.expressions.Lead | lead | SELECT a, b, lead(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | @@ -182,8 +182,8 @@ | org.apache.spark.sql.catalyst.expressions.MakeDate | make_date | SELECT make_date(2013, 7, 15) | struct | | org.apache.spark.sql.catalyst.expressions.MakeInterval | make_interval | SELECT make_interval(100, 11, 1, 1, 12, 30, 01.001001) | struct | | org.apache.spark.sql.catalyst.expressions.MakeTimestamp | make_timestamp | SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887) | struct | -| org.apache.spark.sql.catalyst.expressions.MakeTimestampLTZExpressionBuilder$ | make_timestamp_ltz | SELECT make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887) | struct | -| org.apache.spark.sql.catalyst.expressions.MakeTimestampNTZExpressionBuilder$ | make_timestamp_ntz | SELECT make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887) | struct | +| org.apache.spark.sql.catalyst.expressions.MakeTimestampLTZExpressionBuilder | make_timestamp_ltz | SELECT make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887) | struct | +| org.apache.spark.sql.catalyst.expressions.MakeTimestampNTZExpressionBuilder | make_timestamp_ntz | SELECT make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887) | struct | | org.apache.spark.sql.catalyst.expressions.MakeYMInterval | make_ym_interval | SELECT make_ym_interval(1, 2) | struct | | org.apache.spark.sql.catalyst.expressions.MapConcat | map_concat | SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c')) | struct> | | org.apache.spark.sql.catalyst.expressions.MapContainsKey | map_contains_key | SELECT map_contains_key(map(1, 'a', 2, 'b'), 1) | struct | @@ -219,8 +219,8 @@ | org.apache.spark.sql.catalyst.expressions.Overlay | overlay | SELECT overlay('Spark SQL' PLACING '_' FROM 6) | struct | | org.apache.spark.sql.catalyst.expressions.ParseToDate | to_date | SELECT to_date('2009-07-30 04:17:52') | struct | | org.apache.spark.sql.catalyst.expressions.ParseToTimestamp | to_timestamp | SELECT to_timestamp('2016-12-31 00:12:00') | struct | -| org.apache.spark.sql.catalyst.expressions.ParseToTimestampLTZExpressionBuilder$ | to_timestamp_ltz | SELECT to_timestamp_ltz('2016-12-31 00:12:00') | struct | -| org.apache.spark.sql.catalyst.expressions.ParseToTimestampNTZExpressionBuilder$ | to_timestamp_ntz | SELECT to_timestamp_ntz('2016-12-31 00:12:00') | struct | +| org.apache.spark.sql.catalyst.expressions.ParseToTimestampLTZExpressionBuilder | to_timestamp_ltz | SELECT to_timestamp_ltz('2016-12-31 00:12:00') | struct | +| org.apache.spark.sql.catalyst.expressions.ParseToTimestampNTZExpressionBuilder | to_timestamp_ntz | SELECT to_timestamp_ntz('2016-12-31 00:12:00') | struct | | org.apache.spark.sql.catalyst.expressions.ParseUrl | parse_url | SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') | struct | | org.apache.spark.sql.catalyst.expressions.PercentRank | percent_rank | SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.Pi | pi | SELECT pi() | struct | @@ -233,7 +233,7 @@ | org.apache.spark.sql.catalyst.expressions.RLike | regexp | SELECT regexp('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct | | org.apache.spark.sql.catalyst.expressions.RLike | regexp_like | SELECT regexp_like('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct | | org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT rlike('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct | -| org.apache.spark.sql.catalyst.expressions.RPadExpressionBuilder$ | rpad | SELECT rpad('hi', 5, '??') | struct | +| org.apache.spark.sql.catalyst.expressions.RPadExpressionBuilder | rpad | SELECT rpad('hi', 5, '??') | struct | | org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT raise_error('custom error message') | struct | | org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct | | org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct | @@ -278,7 +278,7 @@ | org.apache.spark.sql.catalyst.expressions.SplitPart | split_part | SELECT split_part('11.12.13', '.', 3) | struct | | org.apache.spark.sql.catalyst.expressions.Sqrt | sqrt | SELECT sqrt(4) | struct | | org.apache.spark.sql.catalyst.expressions.Stack | stack | SELECT stack(2, 1, 2, 3) | struct | -| org.apache.spark.sql.catalyst.expressions.StartsWithExpressionBuilder$ | startswith | SELECT startswith('Spark SQL', 'Spark') | struct | +| org.apache.spark.sql.catalyst.expressions.StartsWithExpressionBuilder | startswith | SELECT startswith('Spark SQL', 'Spark') | struct | | org.apache.spark.sql.catalyst.expressions.StringInstr | instr | SELECT instr('SparkSQL', 'SQL') | struct | | org.apache.spark.sql.catalyst.expressions.StringLocate | locate | SELECT locate('bar', 'foobarbar') | struct | | org.apache.spark.sql.catalyst.expressions.StringLocate | position | SELECT position('bar', 'foobarbar') | struct | From 68dfff767aa3faaaf3c614514dcd8ef322256579 Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Thu, 31 Mar 2022 19:18:58 +0800 Subject: [PATCH 079/535] [SPARK-37839][SQL][FOLLOWUP] Check overflow when DS V2 partial aggregate push-down `AVG` ### What changes were proposed in this pull request? https://github.com/apache/spark/pull/35130 supports partial aggregate push-down `AVG` for DS V2. The behavior doesn't consistent with `Average` if occurs overflow in ansi mode. This PR closely follows the implement of `Average` to respect overflow in ansi mode. ### Why are the changes needed? Make the behavior consistent with `Average` if occurs overflow in ansi mode. ### Does this PR introduce _any_ user-facing change? 'Yes'. Users could see the exception about overflow throws in ansi mode. ### How was this patch tested? New tests. Closes #35320 from beliefer/SPARK-37839_followup. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit e6839ad7340bc9eb5df03df2a62110bdda805e6b) Signed-off-by: Wenchen Fan --- .../expressions/aggregate/Average.scala | 4 +- .../v2/V2ScanRelationPushDown.scala | 21 +++------ .../apache/spark/sql/jdbc/JDBCV2Suite.scala | 45 ++++++++++++++++++- 3 files changed, 52 insertions(+), 18 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala index 05f7edaeb5d48..533f7f20b2530 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala @@ -76,8 +76,8 @@ case class Average( case _ => DoubleType } - private lazy val sum = AttributeReference("sum", sumDataType)() - private lazy val count = AttributeReference("count", LongType)() + lazy val sum = AttributeReference("sum", sumDataType)() + lazy val count = AttributeReference("count", LongType)() override lazy val aggBufferAttributes = sum :: count :: Nil diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala index c8ef8b00d0cf9..5371829271d36 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2 import scala.collection.mutable -import org.apache.spark.sql.catalyst.expressions.{Alias, AliasHelper, And, Attribute, AttributeReference, Cast, Divide, DivideDTInterval, DivideYMInterval, EqualTo, Expression, If, IntegerLiteral, Literal, NamedExpression, PredicateHelper, ProjectionOverSchema, SortOrder, SubqueryExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, AliasHelper, And, Attribute, AttributeReference, Cast, Expression, IntegerLiteral, NamedExpression, PredicateHelper, ProjectionOverSchema, SortOrder, SubqueryExpression} import org.apache.spark.sql.catalyst.expressions.aggregate import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.optimizer.CollapseProject @@ -32,7 +32,7 @@ import org.apache.spark.sql.connector.expressions.filter.Predicate import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownAggregates, SupportsPushDownFilters, V1Scan} import org.apache.spark.sql.execution.datasources.DataSourceStrategy import org.apache.spark.sql.sources -import org.apache.spark.sql.types.{DataType, DayTimeIntervalType, LongType, StructType, YearMonthIntervalType} +import org.apache.spark.sql.types.{DataType, LongType, StructType} import org.apache.spark.sql.util.SchemaUtils._ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper with AliasHelper { @@ -138,18 +138,11 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper wit case AggregateExpression(avg: aggregate.Average, _, isDistinct, _, _) => val sum = aggregate.Sum(avg.child).toAggregateExpression(isDistinct) val count = aggregate.Count(avg.child).toAggregateExpression(isDistinct) - // Closely follow `Average.evaluateExpression` - avg.dataType match { - case _: YearMonthIntervalType => - If(EqualTo(count, Literal(0L)), - Literal(null, YearMonthIntervalType()), DivideYMInterval(sum, count)) - case _: DayTimeIntervalType => - If(EqualTo(count, Literal(0L)), - Literal(null, DayTimeIntervalType()), DivideDTInterval(sum, count)) - case _ => - // TODO deal with the overflow issue - Divide(addCastIfNeeded(sum, avg.dataType), - addCastIfNeeded(count, avg.dataType), false) + avg.evaluateExpression transform { + case a: Attribute if a.semanticEquals(avg.sum) => + addCastIfNeeded(sum, avg.sum.dataType) + case a: Attribute if a.semanticEquals(avg.count) => + addCastIfNeeded(count, avg.count.dataType) } } }.asInstanceOf[Seq[NamedExpression]] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index a5e3a71f6cfe5..67a02904660c3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -95,6 +95,14 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel """CREATE TABLE "test"."view1" ("|col1" INTEGER, "|col2" INTEGER)""").executeUpdate() conn.prepareStatement( """CREATE TABLE "test"."view2" ("|col1" INTEGER, "|col3" INTEGER)""").executeUpdate() + + conn.prepareStatement( + "CREATE TABLE \"test\".\"item\" (id INTEGER, name TEXT(32), price NUMERIC(23, 3))") + .executeUpdate() + conn.prepareStatement("INSERT INTO \"test\".\"item\" VALUES " + + "(1, 'bottle', 11111111111111111111.123)").executeUpdate() + conn.prepareStatement("INSERT INTO \"test\".\"item\" VALUES " + + "(1, 'bottle', 99999999999999999999.123)").executeUpdate() } } @@ -484,8 +492,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel test("show tables") { checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "people", false), Row("test", "empty_table", false), - Row("test", "employee", false), Row("test", "dept", false), Row("test", "person", false), - Row("test", "view1", false), Row("test", "view2", false))) + Row("test", "employee", false), Row("test", "item", false), Row("test", "dept", false), + Row("test", "person", false), Row("test", "view1", false), Row("test", "view2", false))) } test("SQL API: create table as select") { @@ -1106,4 +1114,37 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkAnswer(df2, Seq(Row("alex", 12000.00), Row("amy", 10000.00), Row("cathy", 9000.00), Row("david", 10000.00), Row("jen", 12000.00))) } + + test("scan with aggregate push-down: partial push-down AVG with overflow") { + def createDataFrame: DataFrame = spark.read + .option("partitionColumn", "id") + .option("lowerBound", "0") + .option("upperBound", "2") + .option("numPartitions", "2") + .table("h2.test.item") + .agg(avg($"PRICE").as("avg")) + + Seq(true, false).foreach { ansiEnabled => + withSQLConf((SQLConf.ANSI_ENABLED.key, ansiEnabled.toString)) { + val df = createDataFrame + checkAggregateRemoved(df, false) + df.queryExecution.optimizedPlan.collect { + case _: DataSourceV2ScanRelation => + val expected_plan_fragment = + "PushedAggregates: [SUM(PRICE), COUNT(PRICE)]" + checkKeywordsExistsInExplain(df, expected_plan_fragment) + } + if (ansiEnabled) { + val e = intercept[SparkException] { + df.collect() + } + assert(e.getCause.isInstanceOf[ArithmeticException]) + assert(e.getCause.getMessage.contains("cannot be represented as Decimal") || + e.getCause.getMessage.contains("Overflow in sum of decimals")) + } else { + checkAnswer(df, Seq(Row(null))) + } + } + } + } } From 6bf760ace4f96bd9ff2c9cfdebff60690ddb6db8 Mon Sep 17 00:00:00 2001 From: Dereck Li Date: Thu, 31 Mar 2022 21:33:08 +0800 Subject: [PATCH 080/535] [SPARK-38333][SQL] PlanExpression expression should skip addExprTree function in Executor ### What changes were proposed in this pull request? It is master branch pr [SPARK-38333](https://github.com/apache/spark/pull/35662) ### Why are the changes needed? Bug fix, it is potential issue. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? UT Closes #36012 from monkeyboy123/spark-38333. Authored-by: Dereck Li Signed-off-by: Wenchen Fan (cherry picked from commit a40acd4392a8611062763ce6ec7bc853d401c646) Signed-off-by: Wenchen Fan --- .../expressions/EquivalentExpressions.scala | 2 +- .../SubexpressionEliminationSuite.scala | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala index 903a6fd7bd014..472b6e871e73a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala @@ -197,7 +197,7 @@ class EquivalentExpressions { expr.exists(_.isInstanceOf[LambdaVariable]) || // `PlanExpression` wraps query plan. To compare query plans of `PlanExpression` on executor, // can cause error like NPE. - (expr.isInstanceOf[PlanExpression[_]] && Utils.isInRunningSparkTask) + (expr.exists(_.isInstanceOf[PlanExpression[_]]) && Utils.isInRunningSparkTask) if (!skip && !updateExprInMap(expr, map, useCount)) { val uc = useCount.signum diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala index fa3003b275783..3c96ba4300035 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala @@ -16,8 +16,9 @@ */ package org.apache.spark.sql.catalyst.expressions -import org.apache.spark.SparkFunSuite +import org.apache.spark.{SparkFunSuite, TaskContext, TaskContextImpl} import org.apache.spark.sql.catalyst.expressions.codegen._ +import org.apache.spark.sql.catalyst.plans.logical.LocalRelation import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BinaryType, DataType, Decimal, IntegerType} @@ -419,6 +420,21 @@ class SubexpressionEliminationSuite extends SparkFunSuite with ExpressionEvalHel } } + test("SPARK-38333: PlanExpression expression should skip addExprTree function in Executor") { + try { + // suppose we are in executor + val context1 = new TaskContextImpl(0, 0, 0, 0, 0, null, null, null, cpus = 0) + TaskContext.setTaskContext(context1) + + val equivalence = new EquivalentExpressions + val expression = DynamicPruningExpression(Exists(LocalRelation())) + equivalence.addExprTree(expression) + assert(equivalence.getExprState(expression).isEmpty) + } finally { + TaskContext.unset() + } + } + test("SPARK-35886: PromotePrecision should not overwrite genCode") { val p = PromotePrecision(Literal(Decimal("10.1"))) From eb442790319a8227677ca424c90a0a938bba7daf Mon Sep 17 00:00:00 2001 From: stczwd Date: Thu, 31 Mar 2022 21:42:23 +0800 Subject: [PATCH 081/535] [SPARK-37831][CORE] add task partition id in TaskInfo and Task Metrics ### Why are the changes needed? There is no partition id in the current task metrics. It is difficult to track the task metrics of a specific partition or the stage metrics of processing data, especially when the stage was retried. ``` class TaskData private[spark]( val taskId: Long, val index: Int, val attempt: Int, val launchTime: Date, val resultFetchStart: Option[Date], JsonDeserialize(contentAs = classOf[JLong]) val duration: Option[Long], val executorId: String, val host: String, val status: String, val taskLocality: String, val speculative: Boolean, val accumulatorUpdates: Seq[AccumulableInfo], val errorMessage: Option[String] = None, val taskMetrics: Option[TaskMetrics] = None, val executorLogs: Map[String, String], val schedulerDelay: Long, val gettingResultTime: Long) ``` ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? add new tests Closes #35185 from jackylee-ch/SPARK-37831. Lead-authored-by: stczwd Co-authored-by: Jacky Lee Co-authored-by: jackylee-ch Signed-off-by: Wenchen Fan (cherry picked from commit 13cfc5bcaf8ebda8769595c58902327c4affdecf) Signed-off-by: Wenchen Fan --- .../org/apache/spark/scheduler/TaskInfo.scala | 22 + .../spark/scheduler/TaskSetManager.scala | 3 +- .../apache/spark/status/AppStatusStore.scala | 3 +- .../org/apache/spark/status/LiveEntity.scala | 1 + .../org/apache/spark/status/api/v1/api.scala | 1 + .../org/apache/spark/status/storeTypes.scala | 6 + .../org/apache/spark/util/JsonProtocol.scala | 7 +- .../application_list_json_expectation.json | 15 + .../completed_app_list_json_expectation.json | 15 + ...xcludeOnFailure_for_stage_expectation.json | 12 + ...eOnFailure_node_for_stage_expectation.json | 14 + .../limit_app_list_json_expectation.json | 34 +- .../minDate_app_list_json_expectation.json | 15 + .../minEndDate_app_list_json_expectation.json | 15 + .../one_stage_attempt_json_expectation.json | 8 + .../one_stage_json_expectation.json | 8 + ...e_stage_json_with_details_expectation.json | 8 + ...age_json_with_partitionId_expectation.json | 624 ++++++++++++++++++ .../stage_task_list_expectation.json | 21 + ...multi_attempt_app_json_1__expectation.json | 8 + ...multi_attempt_app_json_2__expectation.json | 8 + ...k_list_w__offset___length_expectation.json | 50 ++ ...stage_task_list_w__sortBy_expectation.json | 20 + ...tBy_short_names___runtime_expectation.json | 20 + ...rtBy_short_names__runtime_expectation.json | 20 + ..._status___offset___length_expectation.json | 2 + ...rtBy_short_names__runtime_expectation.json | 20 + ...stage_task_list_w__status_expectation.json | 10 + ...ask_list_with_partitionId_expectation.json | 401 +++++++++++ ...age_with_accumulable_json_expectation.json | 8 + .../stage_with_peak_metrics_expectation.json | 16 + ..._with_speculation_summary_expectation.json | 5 + .../stage_with_summaries_expectation.json | 16 + .../spark-events/local-1642039451826 | 115 ++++ .../ExecutorAllocationManagerSuite.scala | 7 +- .../deploy/history/HistoryServerSuite.scala | 2 + .../spark/scheduler/DAGSchedulerSuite.scala | 7 +- .../scheduler/EventLoggingListenerSuite.scala | 3 +- .../dynalloc/ExecutorMonitorSuite.scala | 3 +- .../spark/status/AppStatusListenerSuite.scala | 7 +- .../spark/status/AppStatusStoreSuite.scala | 8 +- .../spark/status/AppStatusUtilsSuite.scala | 2 + .../status/ListenerEventsTestHelper.scala | 5 +- .../org/apache/spark/ui/StagePageSuite.scala | 4 +- .../apache/spark/util/JsonProtocolSuite.scala | 23 +- dev/.rat-excludes | 1 + project/MimaExcludes.scala | 3 + .../ui/MetricsAggregationBenchmark.scala | 1 + .../ui/SQLAppStatusListenerSuite.scala | 1 + 49 files changed, 1581 insertions(+), 47 deletions(-) create mode 100644 core/src/test/resources/HistoryServerExpectations/one_stage_json_with_partitionId_expectation.json create mode 100644 core/src/test/resources/HistoryServerExpectations/stage_task_list_with_partitionId_expectation.json create mode 100644 core/src/test/resources/spark-events/local-1642039451826 diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala index 67799b2b45c18..b135a82145dcd 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala @@ -35,12 +35,34 @@ class TaskInfo( */ val index: Int, val attemptNumber: Int, + /** + * The actual RDD partition ID in this task. + * The ID of the RDD partition is always same across task attempts. + * This will be -1 for historical data, and available for all applications since Spark 3.3. + */ + val partitionId: Int, val launchTime: Long, val executorId: String, val host: String, val taskLocality: TaskLocality.TaskLocality, val speculative: Boolean) { + /** + * This api doesn't contains partitionId, please use the new api. + * Remain it for backward compatibility before Spark 3.3. + */ + def this( + taskId: Long, + index: Int, + attemptNumber: Int, + launchTime: Long, + executorId: String, + host: String, + taskLocality: TaskLocality.TaskLocality, + speculative: Boolean) = { + this(taskId, index, attemptNumber, -1, launchTime, executorId, host, taskLocality, speculative) + } + /** * The time when the task started remotely getting the result. Will not be set if the * task result was sent immediately when the task finished (as opposed to sending an diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index b7fae2a533f0e..8523dc4c9388d 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -509,7 +509,8 @@ private[spark] class TaskSetManager( // Do various bookkeeping copiesRunning(index) += 1 val attemptNum = taskAttempts(index).size - val info = new TaskInfo(taskId, index, attemptNum, launchTime, + val info = new TaskInfo( + taskId, index, attemptNum, task.partitionId, launchTime, execId, host, taskLocality, speculative) taskInfos(taskId) = info taskAttempts(index) = info :: taskAttempts(index) diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala index 398cd45a6e879..03767ee83a95d 100644 --- a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala +++ b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala @@ -743,7 +743,8 @@ private[spark] class AppStatusStore( }) new v1.TaskData(taskDataOld.taskId, taskDataOld.index, - taskDataOld.attempt, taskDataOld.launchTime, taskDataOld.resultFetchStart, + taskDataOld.attempt, taskDataOld.partitionId, + taskDataOld.launchTime, taskDataOld.resultFetchStart, taskDataOld.duration, taskDataOld.executorId, taskDataOld.host, taskDataOld.status, taskDataOld.taskLocality, taskDataOld.speculative, taskDataOld.accumulatorUpdates, taskDataOld.errorMessage, taskDataOld.taskMetrics, diff --git a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala index c5d72d3cfea68..6437d2f0732bc 100644 --- a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala +++ b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala @@ -201,6 +201,7 @@ private class LiveTask( info.taskId, info.index, info.attemptNumber, + info.partitionId, info.launchTime, if (info.gettingResult) info.gettingResultTime else -1L, duration, diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala index 86ddd3bb22b0d..d1a4d602fc5e0 100644 --- a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala +++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala @@ -308,6 +308,7 @@ class TaskData private[spark]( val taskId: Long, val index: Int, val attempt: Int, + val partitionId: Int, val launchTime: Date, val resultFetchStart: Option[Date], @JsonDeserialize(contentAs = classOf[JLong]) diff --git a/core/src/main/scala/org/apache/spark/status/storeTypes.scala b/core/src/main/scala/org/apache/spark/status/storeTypes.scala index 39bf593274904..895fb586536b6 100644 --- a/core/src/main/scala/org/apache/spark/status/storeTypes.scala +++ b/core/src/main/scala/org/apache/spark/status/storeTypes.scala @@ -141,6 +141,7 @@ private[spark] object TaskIndexNames { final val STAGE = "stage" final val STATUS = "sta" final val TASK_INDEX = "idx" + final val TASK_PARTITION_ID = "partid" final val COMPLETION_TIME = "ct" } @@ -161,6 +162,10 @@ private[spark] class TaskDataWrapper( val index: Int, @KVIndexParam(value = TaskIndexNames.ATTEMPT, parent = TaskIndexNames.STAGE) val attempt: Int, + @KVIndexParam(value = TaskIndexNames.TASK_PARTITION_ID, parent = TaskIndexNames.STAGE) + // Different kvstores have different default values (eg 0 or -1), + // thus we use the default value here for backwards compatibility. + val partitionId: Int = -1, @KVIndexParam(value = TaskIndexNames.LAUNCH_TIME, parent = TaskIndexNames.STAGE) val launchTime: Long, val resultFetchStart: Long, @@ -286,6 +291,7 @@ private[spark] class TaskDataWrapper( taskId, index, attempt, + partitionId, new Date(launchTime), if (resultFetchStart > 0L) Some(new Date(resultFetchStart)) else None, if (duration > 0L) Some(duration) else None, diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala index f9b6ed37977cb..acbd3239df24c 100644 --- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala +++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala @@ -317,6 +317,7 @@ private[spark] object JsonProtocol { ("Task ID" -> taskInfo.taskId) ~ ("Index" -> taskInfo.index) ~ ("Attempt" -> taskInfo.attemptNumber) ~ + ("Partition ID" -> taskInfo.partitionId) ~ ("Launch Time" -> taskInfo.launchTime) ~ ("Executor ID" -> taskInfo.executorId) ~ ("Host" -> taskInfo.host) ~ @@ -916,6 +917,7 @@ private[spark] object JsonProtocol { val taskId = (json \ "Task ID").extract[Long] val index = (json \ "Index").extract[Int] val attempt = jsonOption(json \ "Attempt").map(_.extract[Int]).getOrElse(1) + val partitionId = jsonOption(json \ "Partition ID").map(_.extract[Int]).getOrElse(-1) val launchTime = (json \ "Launch Time").extract[Long] val executorId = weakIntern((json \ "Executor ID").extract[String]) val host = weakIntern((json \ "Host").extract[String]) @@ -930,8 +932,9 @@ private[spark] object JsonProtocol { case None => Seq.empty[AccumulableInfo] } - val taskInfo = - new TaskInfo(taskId, index, attempt, launchTime, executorId, host, taskLocality, speculative) + val taskInfo = new TaskInfo( + taskId, index, attempt, partitionId, launchTime, + executorId, host, taskLocality, speculative) taskInfo.gettingResultTime = gettingResultTime taskInfo.finishTime = finishTime taskInfo.failed = failed diff --git a/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json index ae1edbc684843..e15707cc12145 100644 --- a/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/application_list_json_expectation.json @@ -1,4 +1,19 @@ [ { + "id" : "local-1642039451826", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2022-01-13T02:04:10.519GMT", + "endTime" : "2022-01-13T02:05:36.564GMT", + "lastUpdated" : "", + "duration" : 86045, + "sparkUser" : "lijunqing", + "completed" : true, + "appSparkVersion" : "3.3.0-SNAPSHOT", + "startTimeEpoch" : 1642039450519, + "endTimeEpoch" : 1642039536564, + "lastUpdatedEpoch" : 0 + } ] +}, { "id" : "application_1628109047826_1317105", "name" : "Spark shell", "attempts" : [ { diff --git a/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json index ae1edbc684843..e15707cc12145 100644 --- a/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/completed_app_list_json_expectation.json @@ -1,4 +1,19 @@ [ { + "id" : "local-1642039451826", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2022-01-13T02:04:10.519GMT", + "endTime" : "2022-01-13T02:05:36.564GMT", + "lastUpdated" : "", + "duration" : 86045, + "sparkUser" : "lijunqing", + "completed" : true, + "appSparkVersion" : "3.3.0-SNAPSHOT", + "startTimeEpoch" : 1642039450519, + "endTimeEpoch" : 1642039536564, + "lastUpdatedEpoch" : 0 + } ] +}, { "id" : "application_1628109047826_1317105", "name" : "Spark shell", "attempts" : [ { diff --git a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json index 9be6b6eb123ad..69747c8f2ca80 100644 --- a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json @@ -46,6 +46,7 @@ "taskId" : 0, "index" : 0, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-09T10:21:18.347GMT", "duration" : 562, "executorId" : "0", @@ -100,6 +101,7 @@ "taskId" : 5, "index" : 3, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-09T10:21:18.958GMT", "duration" : 22, "executorId" : "1", @@ -153,6 +155,7 @@ "taskId" : 10, "index" : 8, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-09T10:21:19.034GMT", "duration" : 12, "executorId" : "1", @@ -206,6 +209,7 @@ "taskId" : 1, "index" : 1, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-09T10:21:18.364GMT", "duration" : 565, "executorId" : "1", @@ -259,6 +263,7 @@ "taskId" : 6, "index" : 4, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-09T10:21:18.980GMT", "duration" : 16, "executorId" : "1", @@ -312,6 +317,7 @@ "taskId" : 9, "index" : 7, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-09T10:21:19.022GMT", "duration" : 12, "executorId" : "1", @@ -365,6 +371,7 @@ "taskId" : 2, "index" : 2, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-09T10:21:18.899GMT", "duration" : 27, "executorId" : "0", @@ -419,6 +426,7 @@ "taskId" : 7, "index" : 5, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-09T10:21:18.996GMT", "duration" : 15, "executorId" : "1", @@ -472,6 +480,7 @@ "taskId" : 3, "index" : 0, "attempt" : 1, + "partitionId": -1, "launchTime" : "2018-01-09T10:21:18.919GMT", "duration" : 24, "executorId" : "1", @@ -525,6 +534,7 @@ "taskId" : 11, "index" : 9, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-09T10:21:19.045GMT", "duration" : 15, "executorId" : "1", @@ -578,6 +588,7 @@ "taskId" : 8, "index" : 6, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-09T10:21:19.011GMT", "duration" : 11, "executorId" : "1", @@ -631,6 +642,7 @@ "taskId" : 4, "index" : 2, "attempt" : 1, + "partitionId": -1, "launchTime" : "2018-01-09T10:21:18.943GMT", "duration" : 16, "executorId" : "1", diff --git a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json index 1661f7caf0525..35bee443eab67 100644 --- a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json @@ -46,6 +46,7 @@ "taskId" : 0, "index" : 0, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-18T18:33:12.816GMT", "duration" : 2064, "executorId" : "1", @@ -99,6 +100,7 @@ "taskId" : 5, "index" : 5, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-18T18:33:14.320GMT", "duration" : 73, "executorId" : "5", @@ -153,6 +155,7 @@ "taskId" : 10, "index" : 1, "attempt" : 1, + "partitionId": -1, "launchTime" : "2018-01-18T18:33:15.069GMT", "duration" : 132, "executorId" : "2", @@ -206,6 +209,7 @@ "taskId" : 1, "index" : 1, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-18T18:33:12.832GMT", "duration" : 1506, "executorId" : "5", @@ -260,6 +264,7 @@ "taskId" : 6, "index" : 6, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-18T18:33:14.323GMT", "duration" : 67, "executorId" : "4", @@ -314,6 +319,7 @@ "taskId" : 9, "index" : 4, "attempt" : 1, + "partitionId": -1, "launchTime" : "2018-01-18T18:33:14.973GMT", "duration" : 96, "executorId" : "2", @@ -367,6 +373,7 @@ "taskId" : 13, "index" : 9, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-18T18:33:15.200GMT", "duration" : 76, "executorId" : "2", @@ -420,6 +427,7 @@ "taskId" : 2, "index" : 2, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-18T18:33:12.832GMT", "duration" : 1774, "executorId" : "3", @@ -473,6 +481,7 @@ "taskId" : 12, "index" : 8, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-18T18:33:15.165GMT", "duration" : 60, "executorId" : "1", @@ -526,6 +535,7 @@ "taskId" : 7, "index" : 5, "attempt" : 1, + "partitionId": -1, "launchTime" : "2018-01-18T18:33:14.859GMT", "duration" : 115, "executorId" : "2", @@ -579,6 +589,7 @@ "taskId" : 3, "index" : 3, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-18T18:33:12.833GMT", "duration" : 2027, "executorId" : "2", @@ -632,6 +643,7 @@ "taskId" : 11, "index" : 7, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-18T18:33:15.072GMT", "duration" : 93, "executorId" : "1", @@ -685,6 +697,7 @@ "taskId" : 8, "index" : 6, "attempt" : 1, + "partitionId": -1, "launchTime" : "2018-01-18T18:33:14.879GMT", "duration" : 194, "executorId" : "1", @@ -738,6 +751,7 @@ "taskId" : 4, "index" : 4, "attempt" : 0, + "partitionId": -1, "launchTime" : "2018-01-18T18:33:12.833GMT", "duration" : 1522, "executorId" : "4", diff --git a/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json index 91e3ebd5a4739..2a504ce953962 100644 --- a/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/limit_app_list_json_expectation.json @@ -1,4 +1,19 @@ [ { + "id" : "local-1642039451826", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2022-01-13T02:04:10.519GMT", + "endTime" : "2022-01-13T02:05:36.564GMT", + "lastUpdated" : "", + "duration" : 86045, + "sparkUser" : "lijunqing", + "completed" : true, + "appSparkVersion" : "3.3.0-SNAPSHOT", + "startTimeEpoch" : 1642039450519, + "endTimeEpoch" : 1642039536564, + "lastUpdatedEpoch" : 0 + } ] +}, { "id" : "application_1628109047826_1317105", "name" : "Spark shell", "attempts" : [ { @@ -24,23 +39,8 @@ "sparkUser" : "terryk", "completed" : true, "appSparkVersion" : "3.1.0-SNAPSHOT", - "lastUpdatedEpoch" : 0, + "startTimeEpoch" : 1594091460235, "endTimeEpoch" : 1594091824231, - "startTimeEpoch" : 1594091460235 - } ] -}, { - "id" : "application_1578436911597_0052", - "name" : "Spark shell", - "attempts" : [ { - "startTime" : "2020-01-11T17:44:22.851GMT", - "endTime" : "2020-01-11T17:46:42.615GMT", - "lastUpdated" : "", - "duration" : 139764, - "sparkUser" : "tgraves", - "completed" : true, - "appSparkVersion" : "3.0.0-SNAPSHOT", - "endTimeEpoch" : 1578764802615, - "startTimeEpoch" : 1578764662851, "lastUpdatedEpoch" : 0 } ] -} ] +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json index 9885d36e22aa3..389206364550d 100644 --- a/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/minDate_app_list_json_expectation.json @@ -1,4 +1,19 @@ [ { + "id" : "local-1642039451826", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2022-01-13T02:04:10.519GMT", + "endTime" : "2022-01-13T02:05:36.564GMT", + "lastUpdated" : "", + "duration" : 86045, + "sparkUser" : "lijunqing", + "completed" : true, + "appSparkVersion" : "3.3.0-SNAPSHOT", + "startTimeEpoch" : 1642039450519, + "endTimeEpoch" : 1642039536564, + "lastUpdatedEpoch" : 0 + } ] +}, { "id" : "application_1628109047826_1317105", "name" : "Spark shell", "attempts" : [ { diff --git a/core/src/test/resources/HistoryServerExpectations/minEndDate_app_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/minEndDate_app_list_json_expectation.json index cf0bad7819be5..3bd637af524c0 100644 --- a/core/src/test/resources/HistoryServerExpectations/minEndDate_app_list_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/minEndDate_app_list_json_expectation.json @@ -1,4 +1,19 @@ [ { + "id" : "local-1642039451826", + "name" : "Spark shell", + "attempts" : [ { + "startTime" : "2022-01-13T02:04:10.519GMT", + "endTime" : "2022-01-13T02:05:36.564GMT", + "lastUpdated" : "", + "duration" : 86045, + "sparkUser" : "lijunqing", + "completed" : true, + "appSparkVersion" : "3.3.0-SNAPSHOT", + "startTimeEpoch" : 1642039450519, + "endTimeEpoch" : 1642039536564, + "lastUpdatedEpoch" : 0 + } ] +}, { "id" : "application_1628109047826_1317105", "name" : "Spark shell", "attempts" : [ { diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json index 94d343d68b497..9b9da9a11adf7 100644 --- a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json @@ -46,6 +46,7 @@ "taskId" : 10, "index" : 2, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.830GMT", "duration" : 456, "executorId" : "driver", @@ -96,6 +97,7 @@ "taskId" : 14, "index" : 6, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.832GMT", "duration" : 450, "executorId" : "driver", @@ -146,6 +148,7 @@ "taskId" : 9, "index" : 1, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.830GMT", "duration" : 454, "executorId" : "driver", @@ -196,6 +199,7 @@ "taskId" : 13, "index" : 5, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.831GMT", "duration" : 452, "executorId" : "driver", @@ -246,6 +250,7 @@ "taskId" : 12, "index" : 4, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.831GMT", "duration" : 454, "executorId" : "driver", @@ -296,6 +301,7 @@ "taskId" : 11, "index" : 3, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.830GMT", "duration" : 454, "executorId" : "driver", @@ -346,6 +352,7 @@ "taskId" : 8, "index" : 0, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.829GMT", "duration" : 454, "executorId" : "driver", @@ -396,6 +403,7 @@ "taskId" : 15, "index" : 7, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.833GMT", "duration" : 450, "executorId" : "driver", diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json index 67134869f14a0..f57fecac0a62b 100644 --- a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json @@ -46,6 +46,7 @@ "taskId" : 10, "index" : 2, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.830GMT", "duration" : 456, "executorId" : "driver", @@ -96,6 +97,7 @@ "taskId" : 14, "index" : 6, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.832GMT", "duration" : 450, "executorId" : "driver", @@ -146,6 +148,7 @@ "taskId" : 9, "index" : 1, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.830GMT", "duration" : 454, "executorId" : "driver", @@ -196,6 +199,7 @@ "taskId" : 13, "index" : 5, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.831GMT", "duration" : 452, "executorId" : "driver", @@ -246,6 +250,7 @@ "taskId" : 12, "index" : 4, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.831GMT", "duration" : 454, "executorId" : "driver", @@ -296,6 +301,7 @@ "taskId" : 11, "index" : 3, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.830GMT", "duration" : 454, "executorId" : "driver", @@ -346,6 +352,7 @@ "taskId" : 8, "index" : 0, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.829GMT", "duration" : 454, "executorId" : "driver", @@ -396,6 +403,7 @@ "taskId" : 15, "index" : 7, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.833GMT", "duration" : 450, "executorId" : "driver", diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_json_with_details_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_json_with_details_expectation.json index 67134869f14a0..f57fecac0a62b 100644 --- a/core/src/test/resources/HistoryServerExpectations/one_stage_json_with_details_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/one_stage_json_with_details_expectation.json @@ -46,6 +46,7 @@ "taskId" : 10, "index" : 2, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.830GMT", "duration" : 456, "executorId" : "driver", @@ -96,6 +97,7 @@ "taskId" : 14, "index" : 6, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.832GMT", "duration" : 450, "executorId" : "driver", @@ -146,6 +148,7 @@ "taskId" : 9, "index" : 1, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.830GMT", "duration" : 454, "executorId" : "driver", @@ -196,6 +199,7 @@ "taskId" : 13, "index" : 5, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.831GMT", "duration" : 452, "executorId" : "driver", @@ -246,6 +250,7 @@ "taskId" : 12, "index" : 4, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.831GMT", "duration" : 454, "executorId" : "driver", @@ -296,6 +301,7 @@ "taskId" : 11, "index" : 3, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.830GMT", "duration" : 454, "executorId" : "driver", @@ -346,6 +352,7 @@ "taskId" : 8, "index" : 0, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.829GMT", "duration" : 454, "executorId" : "driver", @@ -396,6 +403,7 @@ "taskId" : 15, "index" : 7, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-02-03T16:43:05.833GMT", "duration" : 450, "executorId" : "driver", diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_json_with_partitionId_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_json_with_partitionId_expectation.json new file mode 100644 index 0000000000000..cc944e70e4298 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/one_stage_json_with_partitionId_expectation.json @@ -0,0 +1,624 @@ +[ { + "status" : "COMPLETE", + "stageId" : 2, + "attemptId" : 0, + "numTasks" : 10, + "numActiveTasks" : 0, + "numCompleteTasks" : 10, + "numFailedTasks" : 0, + "numKilledTasks" : 0, + "numCompletedIndices" : 10, + "submissionTime" : "2022-01-13T02:04:57.017GMT", + "firstTaskLaunchedTime" : "2022-01-13T02:04:57.053GMT", + "completionTime" : "2022-01-13T02:04:57.127GMT", + "executorDeserializeTime" : 33, + "executorDeserializeCpuTime" : 25578000, + "executorRunTime" : 455, + "executorCpuTime" : 173422000, + "resultSize" : 28480, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputBytes" : 0, + "inputRecords" : 0, + "outputBytes" : 0, + "outputRecords" : 0, + "shuffleRemoteBlocksFetched" : 0, + "shuffleLocalBlocksFetched" : 80, + "shuffleFetchWaitTime" : 0, + "shuffleRemoteBytesRead" : 0, + "shuffleRemoteBytesReadToDisk" : 0, + "shuffleLocalBytesRead" : 3760, + "shuffleReadBytes" : 3760, + "shuffleReadRecords" : 100, + "shuffleWriteBytes" : 590, + "shuffleWriteTime" : 10569751, + "shuffleWriteRecords" : 10, + "name" : "count at :23", + "details" : "org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line15.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line15.$read$$iw$$iw$$iw$$iw.(:33)\n$line15.$read$$iw$$iw$$iw.(:35)\n$line15.$read$$iw$$iw.(:37)\n$line15.$read$$iw.(:39)\n$line15.$read.(:41)\n$line15.$read$.(:45)\n$line15.$read$.()\n$line15.$eval$.$print$lzycompute(:7)\n$line15.$eval$.$print(:6)\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)", + "schedulingPool" : "default", + "rddIds" : [ 7, 6, 5 ], + "accumulatorUpdates" : [ ], + "tasks" : { + "10" : { + "taskId" : 10, + "index" : 2, + "attempt" : 0, + "partitionId" : 2, + "launchTime" : "2022-01-13T02:04:57.055GMT", + "duration" : 61, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "NODE_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 4, + "executorDeserializeCpuTime" : 2238000, + "executorRunTime" : 52, + "executorCpuTime" : 27082000, + "resultSize" : 2848, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 0 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 8, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 376, + "recordsRead" : 10 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 59, + "writeTime" : 1282083, + "recordsWritten" : 1 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 5, + "gettingResultTime" : 0 + }, + "14" : { + "taskId" : 14, + "index" : 6, + "attempt" : 0, + "partitionId" : 6, + "launchTime" : "2022-01-13T02:04:57.056GMT", + "duration" : 59, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "NODE_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 4, + "executorDeserializeCpuTime" : 2498000, + "executorRunTime" : 50, + "executorCpuTime" : 21709000, + "resultSize" : 2848, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 0 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 8, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 376, + "recordsRead" : 10 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 59, + "writeTime" : 976166, + "recordsWritten" : 1 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 5, + "gettingResultTime" : 0 + }, + "9" : { + "taskId" : 9, + "index" : 1, + "attempt" : 0, + "partitionId" : 1, + "launchTime" : "2022-01-13T02:04:57.055GMT", + "duration" : 65, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "NODE_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 4, + "executorDeserializeCpuTime" : 3175000, + "executorRunTime" : 57, + "executorCpuTime" : 18115000, + "resultSize" : 2848, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 0 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 8, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 372, + "recordsRead" : 9 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 59, + "writeTime" : 1490249, + "recordsWritten" : 1 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 4, + "gettingResultTime" : 0 + }, + "13" : { + "taskId" : 13, + "index" : 5, + "attempt" : 0, + "partitionId" : 5, + "launchTime" : "2022-01-13T02:04:57.056GMT", + "duration" : 67, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "NODE_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorDeserializeCpuTime" : 2177000, + "executorRunTime" : 60, + "executorCpuTime" : 18621000, + "resultSize" : 2848, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 0 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 8, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 372, + "recordsRead" : 9 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 59, + "writeTime" : 673209, + "recordsWritten" : 1 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 4, + "gettingResultTime" : 0 + }, + "17" : { + "taskId" : 17, + "index" : 9, + "attempt" : 0, + "partitionId" : 9, + "launchTime" : "2022-01-13T02:04:57.115GMT", + "duration" : 10, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "NODE_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 1, + "executorDeserializeCpuTime" : 1662000, + "executorRunTime" : 6, + "executorCpuTime" : 5470000, + "resultSize" : 2848, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 0 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 8, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 380, + "recordsRead" : 11 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 59, + "writeTime" : 506707, + "recordsWritten" : 1 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 3, + "gettingResultTime" : 0 + }, + "12" : { + "taskId" : 12, + "index" : 4, + "attempt" : 0, + "partitionId" : 4, + "launchTime" : "2022-01-13T02:04:57.056GMT", + "duration" : 62, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "NODE_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 4, + "executorDeserializeCpuTime" : 2614000, + "executorRunTime" : 54, + "executorCpuTime" : 21689000, + "resultSize" : 2848, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 0 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 8, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 372, + "recordsRead" : 9 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 59, + "writeTime" : 1093250, + "recordsWritten" : 1 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 4, + "gettingResultTime" : 0 + }, + "16" : { + "taskId" : 16, + "index" : 8, + "attempt" : 0, + "partitionId" : 8, + "launchTime" : "2022-01-13T02:04:57.114GMT", + "duration" : 12, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "NODE_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 1, + "executorDeserializeCpuTime" : 1875000, + "executorRunTime" : 7, + "executorCpuTime" : 5421000, + "resultSize" : 2848, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 0 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 8, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 380, + "recordsRead" : 11 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 59, + "writeTime" : 568086, + "recordsWritten" : 1 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 4, + "gettingResultTime" : 0 + }, + "11" : { + "taskId" : 11, + "index" : 3, + "attempt" : 0, + "partitionId" : 3, + "launchTime" : "2022-01-13T02:04:57.055GMT", + "duration" : 64, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "NODE_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 6, + "executorDeserializeCpuTime" : 3452000, + "executorRunTime" : 54, + "executorCpuTime" : 17668000, + "resultSize" : 2848, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 0 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 8, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 376, + "recordsRead" : 10 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 59, + "writeTime" : 1558374, + "recordsWritten" : 1 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 4, + "gettingResultTime" : 0 + }, + "8" : { + "taskId" : 8, + "index" : 0, + "attempt" : 0, + "partitionId" : 0, + "launchTime" : "2022-01-13T02:04:57.053GMT", + "duration" : 71, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "NODE_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorDeserializeCpuTime" : 3128000, + "executorRunTime" : 62, + "executorCpuTime" : 20132000, + "resultSize" : 2848, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 0 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 8, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 376, + "recordsRead" : 10 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 59, + "writeTime" : 1340668, + "recordsWritten" : 1 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 6, + "gettingResultTime" : 0 + }, + "15" : { + "taskId" : 15, + "index" : 7, + "attempt" : 0, + "partitionId" : 7, + "launchTime" : "2022-01-13T02:04:57.056GMT", + "duration" : 62, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "NODE_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 3, + "executorDeserializeCpuTime" : 2759000, + "executorRunTime" : 53, + "executorCpuTime" : 17515000, + "resultSize" : 2848, + "jvmGcTime" : 0, + "resultSerializationTime" : 0, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 0 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 8, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 380, + "recordsRead" : 11 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 59, + "writeTime" : 1080959, + "recordsWritten" : 1 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 6, + "gettingResultTime" : 0 + } + }, + "executorSummary" : { + "driver" : { + "taskTime" : 533, + "failedTasks" : 0, + "succeededTasks" : 10, + "killedTasks" : 0, + "inputBytes" : 0, + "inputRecords" : 0, + "outputBytes" : 0, + "outputRecords" : 0, + "shuffleRead" : 3760, + "shuffleReadRecords" : 100, + "shuffleWrite" : 590, + "shuffleWriteRecords" : 10, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "isBlacklistedForStage" : false, + "peakMemoryMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0, + "TotalGCTime" : 0 + }, + "isExcludedForStage" : false + } + }, + "killedTasksSummary" : { }, + "resourceProfileId" : 0, + "peakExecutorMetrics" : { + "JVMHeapMemory" : 0, + "JVMOffHeapMemory" : 0, + "OnHeapExecutionMemory" : 0, + "OffHeapExecutionMemory" : 0, + "OnHeapStorageMemory" : 0, + "OffHeapStorageMemory" : 0, + "OnHeapUnifiedMemory" : 0, + "OffHeapUnifiedMemory" : 0, + "DirectPoolMemory" : 0, + "MappedPoolMemory" : 0, + "ProcessTreeJVMVMemory" : 0, + "ProcessTreeJVMRSSMemory" : 0, + "ProcessTreePythonVMemory" : 0, + "ProcessTreePythonRSSMemory" : 0, + "ProcessTreeOtherVMemory" : 0, + "ProcessTreeOtherRSSMemory" : 0, + "MinorGCCount" : 0, + "MinorGCTime" : 0, + "MajorGCCount" : 0, + "MajorGCTime" : 0, + "TotalGCTime" : 0 + } +} ] \ No newline at end of file diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json index f859ab6fff240..f32d40c362dbd 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json @@ -2,6 +2,7 @@ "taskId" : 0, "index" : 0, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.494GMT", "duration" : 435, "executorId" : "driver", @@ -51,6 +52,7 @@ "taskId" : 1, "index" : 1, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.502GMT", "duration" : 421, "executorId" : "driver", @@ -100,6 +102,7 @@ "taskId" : 2, "index" : 2, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.503GMT", "duration" : 419, "executorId" : "driver", @@ -149,6 +152,8 @@ "taskId" : 3, "index" : 3, "attempt" : 0, + "partitionId" : -1, +"partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.504GMT", "duration" : 423, "executorId" : "driver", @@ -198,6 +203,7 @@ "taskId" : 4, "index" : 4, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.504GMT", "duration" : 419, "executorId" : "driver", @@ -247,6 +253,7 @@ "taskId" : 5, "index" : 5, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.505GMT", "duration" : 414, "executorId" : "driver", @@ -296,6 +303,7 @@ "taskId" : 6, "index" : 6, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.505GMT", "duration" : 419, "executorId" : "driver", @@ -345,6 +353,7 @@ "taskId" : 7, "index" : 7, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.506GMT", "duration" : 423, "executorId" : "driver", @@ -394,6 +403,7 @@ "taskId" : 8, "index" : 8, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.914GMT", "duration" : 88, "executorId" : "driver", @@ -443,6 +453,7 @@ "taskId" : 9, "index" : 9, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.915GMT", "duration" : 101, "executorId" : "driver", @@ -492,6 +503,7 @@ "taskId" : 10, "index" : 10, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.916GMT", "duration" : 99, "executorId" : "driver", @@ -541,6 +553,7 @@ "taskId" : 11, "index" : 11, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.918GMT", "duration" : 89, "executorId" : "driver", @@ -590,6 +603,7 @@ "taskId" : 12, "index" : 12, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.923GMT", "duration" : 93, "executorId" : "driver", @@ -639,6 +653,7 @@ "taskId" : 13, "index" : 13, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.924GMT", "duration" : 138, "executorId" : "driver", @@ -688,6 +703,7 @@ "taskId" : 14, "index" : 14, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.925GMT", "duration" : 94, "executorId" : "driver", @@ -737,6 +753,7 @@ "taskId" : 15, "index" : 15, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.928GMT", "duration" : 83, "executorId" : "driver", @@ -786,6 +803,7 @@ "taskId" : 16, "index" : 16, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.001GMT", "duration" : 98, "executorId" : "driver", @@ -835,6 +853,7 @@ "taskId" : 17, "index" : 17, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.005GMT", "duration" : 123, "executorId" : "driver", @@ -884,6 +903,7 @@ "taskId" : 18, "index" : 18, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.010GMT", "duration" : 105, "executorId" : "driver", @@ -933,6 +953,7 @@ "taskId" : 19, "index" : 19, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.012GMT", "duration" : 94, "executorId" : "driver", diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json index 5a0d214ff3046..2ab5903bd3b54 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json @@ -2,6 +2,7 @@ "taskId" : 0, "index" : 0, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-03-16T19:25:36.515GMT", "duration" : 61, "executorId" : "driver", @@ -56,6 +57,7 @@ "taskId" : 1, "index" : 1, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-03-16T19:25:36.521GMT", "duration" : 53, "executorId" : "driver", @@ -110,6 +112,7 @@ "taskId" : 2, "index" : 2, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-03-16T19:25:36.522GMT", "duration" : 48, "executorId" : "driver", @@ -164,6 +167,7 @@ "taskId" : 3, "index" : 3, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-03-16T19:25:36.522GMT", "duration" : 50, "executorId" : "driver", @@ -218,6 +222,7 @@ "taskId" : 4, "index" : 4, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-03-16T19:25:36.522GMT", "duration" : 52, "executorId" : "driver", @@ -272,6 +277,7 @@ "taskId" : 5, "index" : 5, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-03-16T19:25:36.523GMT", "duration" : 52, "executorId" : "driver", @@ -326,6 +332,7 @@ "taskId" : 6, "index" : 6, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-03-16T19:25:36.523GMT", "duration" : 51, "executorId" : "driver", @@ -380,6 +387,7 @@ "taskId" : 7, "index" : 7, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-03-16T19:25:36.524GMT", "duration" : 51, "executorId" : "driver", diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json index fb9a1699a9cd5..f50a37939494b 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json @@ -2,6 +2,7 @@ "taskId" : 0, "index" : 0, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-03-17T23:12:16.515GMT", "duration" : 61, "executorId" : "driver", @@ -56,6 +57,7 @@ "taskId" : 1, "index" : 1, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-03-17T23:12:16.521GMT", "duration" : 53, "executorId" : "driver", @@ -110,6 +112,7 @@ "taskId" : 2, "index" : 2, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-03-17T23:12:16.522GMT", "duration" : 48, "executorId" : "driver", @@ -164,6 +167,7 @@ "taskId" : 3, "index" : 3, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-03-17T23:12:16.522GMT", "duration" : 50, "executorId" : "driver", @@ -218,6 +222,7 @@ "taskId" : 4, "index" : 4, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-03-17T23:12:16.522GMT", "duration" : 52, "executorId" : "driver", @@ -272,6 +277,7 @@ "taskId" : 5, "index" : 5, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-03-17T23:12:16.523GMT", "duration" : 52, "executorId" : "driver", @@ -326,6 +332,7 @@ "taskId" : 6, "index" : 6, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-03-17T23:12:16.523GMT", "duration" : 51, "executorId" : "driver", @@ -380,6 +387,7 @@ "taskId" : 7, "index" : 7, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-03-17T23:12:16.524GMT", "duration" : 51, "executorId" : "driver", diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json index d83528d84972c..01500db125e1d 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json @@ -2,6 +2,7 @@ "taskId" : 10, "index" : 10, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.916GMT", "duration" : 99, "executorId" : "driver", @@ -51,6 +52,7 @@ "taskId" : 11, "index" : 11, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.918GMT", "duration" : 89, "executorId" : "driver", @@ -100,6 +102,7 @@ "taskId" : 12, "index" : 12, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.923GMT", "duration" : 93, "executorId" : "driver", @@ -149,6 +152,7 @@ "taskId" : 13, "index" : 13, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.924GMT", "duration" : 138, "executorId" : "driver", @@ -198,6 +202,7 @@ "taskId" : 14, "index" : 14, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.925GMT", "duration" : 94, "executorId" : "driver", @@ -247,6 +252,7 @@ "taskId" : 15, "index" : 15, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.928GMT", "duration" : 83, "executorId" : "driver", @@ -296,6 +302,7 @@ "taskId" : 16, "index" : 16, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.001GMT", "duration" : 98, "executorId" : "driver", @@ -345,6 +352,7 @@ "taskId" : 17, "index" : 17, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.005GMT", "duration" : 123, "executorId" : "driver", @@ -394,6 +402,7 @@ "taskId" : 18, "index" : 18, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.010GMT", "duration" : 105, "executorId" : "driver", @@ -443,6 +452,7 @@ "taskId" : 19, "index" : 19, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.012GMT", "duration" : 94, "executorId" : "driver", @@ -492,6 +502,7 @@ "taskId" : 20, "index" : 20, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.014GMT", "duration" : 90, "executorId" : "driver", @@ -541,6 +552,7 @@ "taskId" : 21, "index" : 21, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.015GMT", "duration" : 96, "executorId" : "driver", @@ -590,6 +602,7 @@ "taskId" : 22, "index" : 22, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.018GMT", "duration" : 101, "executorId" : "driver", @@ -639,6 +652,7 @@ "taskId" : 23, "index" : 23, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.031GMT", "duration" : 84, "executorId" : "driver", @@ -688,6 +702,7 @@ "taskId" : 24, "index" : 24, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.098GMT", "duration" : 52, "executorId" : "driver", @@ -737,6 +752,7 @@ "taskId" : 25, "index" : 25, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.103GMT", "duration" : 61, "executorId" : "driver", @@ -786,6 +802,7 @@ "taskId" : 26, "index" : 26, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.105GMT", "duration" : 52, "executorId" : "driver", @@ -835,6 +852,7 @@ "taskId" : 27, "index" : 27, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.110GMT", "duration" : 41, "executorId" : "driver", @@ -884,6 +902,7 @@ "taskId" : 28, "index" : 28, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.113GMT", "duration" : 49, "executorId" : "driver", @@ -933,6 +952,7 @@ "taskId" : 29, "index" : 29, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.114GMT", "duration" : 52, "executorId" : "driver", @@ -982,6 +1002,7 @@ "taskId" : 30, "index" : 30, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.118GMT", "duration" : 62, "executorId" : "driver", @@ -1031,6 +1052,7 @@ "taskId" : 31, "index" : 31, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.127GMT", "duration" : 74, "executorId" : "driver", @@ -1080,6 +1102,7 @@ "taskId" : 32, "index" : 32, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.148GMT", "duration" : 33, "executorId" : "driver", @@ -1129,6 +1152,7 @@ "taskId" : 33, "index" : 33, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.149GMT", "duration" : 58, "executorId" : "driver", @@ -1178,6 +1202,7 @@ "taskId" : 34, "index" : 34, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.156GMT", "duration" : 42, "executorId" : "driver", @@ -1227,6 +1252,7 @@ "taskId" : 35, "index" : 35, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.161GMT", "duration" : 50, "executorId" : "driver", @@ -1276,6 +1302,7 @@ "taskId" : 36, "index" : 36, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.164GMT", "duration" : 40, "executorId" : "driver", @@ -1325,6 +1352,7 @@ "taskId" : 37, "index" : 37, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.165GMT", "duration" : 42, "executorId" : "driver", @@ -1374,6 +1402,7 @@ "taskId" : 38, "index" : 38, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.166GMT", "duration" : 47, "executorId" : "driver", @@ -1423,6 +1452,7 @@ "taskId" : 39, "index" : 39, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.180GMT", "duration" : 32, "executorId" : "driver", @@ -1472,6 +1502,7 @@ "taskId" : 40, "index" : 40, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.197GMT", "duration" : 24, "executorId" : "driver", @@ -1521,6 +1552,7 @@ "taskId" : 41, "index" : 41, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.200GMT", "duration" : 24, "executorId" : "driver", @@ -1570,6 +1602,7 @@ "taskId" : 42, "index" : 42, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.203GMT", "duration" : 42, "executorId" : "driver", @@ -1619,6 +1652,7 @@ "taskId" : 43, "index" : 43, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.204GMT", "duration" : 39, "executorId" : "driver", @@ -1668,6 +1702,7 @@ "taskId" : 44, "index" : 44, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.205GMT", "duration" : 37, "executorId" : "driver", @@ -1717,6 +1752,7 @@ "taskId" : 45, "index" : 45, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.206GMT", "duration" : 37, "executorId" : "driver", @@ -1766,6 +1802,7 @@ "taskId" : 46, "index" : 46, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.210GMT", "duration" : 43, "executorId" : "driver", @@ -1815,6 +1852,7 @@ "taskId" : 47, "index" : 47, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.212GMT", "duration" : 33, "executorId" : "driver", @@ -1864,6 +1902,7 @@ "taskId" : 48, "index" : 48, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.220GMT", "duration" : 30, "executorId" : "driver", @@ -1913,6 +1952,7 @@ "taskId" : 49, "index" : 49, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.223GMT", "duration" : 34, "executorId" : "driver", @@ -1962,6 +2002,7 @@ "taskId" : 50, "index" : 50, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.240GMT", "duration" : 26, "executorId" : "driver", @@ -2011,6 +2052,7 @@ "taskId" : 51, "index" : 51, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.242GMT", "duration" : 21, "executorId" : "driver", @@ -2060,6 +2102,7 @@ "taskId" : 52, "index" : 52, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.243GMT", "duration" : 28, "executorId" : "driver", @@ -2109,6 +2152,7 @@ "taskId" : 53, "index" : 53, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.244GMT", "duration" : 29, "executorId" : "driver", @@ -2158,6 +2202,7 @@ "taskId" : 54, "index" : 54, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.244GMT", "duration" : 59, "executorId" : "driver", @@ -2207,6 +2252,7 @@ "taskId" : 55, "index" : 55, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.246GMT", "duration" : 30, "executorId" : "driver", @@ -2256,6 +2302,7 @@ "taskId" : 56, "index" : 56, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.249GMT", "duration" : 31, "executorId" : "driver", @@ -2305,6 +2352,7 @@ "taskId" : 57, "index" : 57, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.257GMT", "duration" : 21, "executorId" : "driver", @@ -2354,6 +2402,7 @@ "taskId" : 58, "index" : 58, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.263GMT", "duration" : 23, "executorId" : "driver", @@ -2403,6 +2452,7 @@ "taskId" : 59, "index" : 59, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.265GMT", "duration" : 23, "executorId" : "driver", diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json index 82e339c8f56dd..b1c71897f5c18 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json @@ -2,6 +2,7 @@ "taskId" : 6, "index" : 6, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.505GMT", "duration" : 419, "executorId" : "driver", @@ -51,6 +52,7 @@ "taskId" : 5, "index" : 5, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.505GMT", "duration" : 414, "executorId" : "driver", @@ -100,6 +102,7 @@ "taskId" : 1, "index" : 1, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.502GMT", "duration" : 421, "executorId" : "driver", @@ -149,6 +152,7 @@ "taskId" : 7, "index" : 7, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.506GMT", "duration" : 423, "executorId" : "driver", @@ -198,6 +202,7 @@ "taskId" : 4, "index" : 4, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.504GMT", "duration" : 419, "executorId" : "driver", @@ -247,6 +252,7 @@ "taskId" : 3, "index" : 3, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.504GMT", "duration" : 423, "executorId" : "driver", @@ -296,6 +302,7 @@ "taskId" : 0, "index" : 0, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.494GMT", "duration" : 435, "executorId" : "driver", @@ -345,6 +352,7 @@ "taskId" : 2, "index" : 2, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.503GMT", "duration" : 419, "executorId" : "driver", @@ -394,6 +402,7 @@ "taskId" : 22, "index" : 22, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.018GMT", "duration" : 101, "executorId" : "driver", @@ -443,6 +452,7 @@ "taskId" : 18, "index" : 18, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.010GMT", "duration" : 105, "executorId" : "driver", @@ -492,6 +502,7 @@ "taskId" : 17, "index" : 17, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.005GMT", "duration" : 123, "executorId" : "driver", @@ -541,6 +552,7 @@ "taskId" : 21, "index" : 21, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.015GMT", "duration" : 96, "executorId" : "driver", @@ -590,6 +602,7 @@ "taskId" : 19, "index" : 19, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.012GMT", "duration" : 94, "executorId" : "driver", @@ -639,6 +652,7 @@ "taskId" : 16, "index" : 16, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.001GMT", "duration" : 98, "executorId" : "driver", @@ -688,6 +702,7 @@ "taskId" : 9, "index" : 9, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.915GMT", "duration" : 101, "executorId" : "driver", @@ -737,6 +752,7 @@ "taskId" : 20, "index" : 20, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.014GMT", "duration" : 90, "executorId" : "driver", @@ -786,6 +802,7 @@ "taskId" : 14, "index" : 14, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.925GMT", "duration" : 94, "executorId" : "driver", @@ -835,6 +852,7 @@ "taskId" : 8, "index" : 8, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.914GMT", "duration" : 88, "executorId" : "driver", @@ -884,6 +902,7 @@ "taskId" : 12, "index" : 12, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.923GMT", "duration" : 93, "executorId" : "driver", @@ -933,6 +952,7 @@ "taskId" : 15, "index" : 15, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.928GMT", "duration" : 83, "executorId" : "driver", diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json index 82e339c8f56dd..b1c71897f5c18 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json @@ -2,6 +2,7 @@ "taskId" : 6, "index" : 6, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.505GMT", "duration" : 419, "executorId" : "driver", @@ -51,6 +52,7 @@ "taskId" : 5, "index" : 5, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.505GMT", "duration" : 414, "executorId" : "driver", @@ -100,6 +102,7 @@ "taskId" : 1, "index" : 1, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.502GMT", "duration" : 421, "executorId" : "driver", @@ -149,6 +152,7 @@ "taskId" : 7, "index" : 7, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.506GMT", "duration" : 423, "executorId" : "driver", @@ -198,6 +202,7 @@ "taskId" : 4, "index" : 4, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.504GMT", "duration" : 419, "executorId" : "driver", @@ -247,6 +252,7 @@ "taskId" : 3, "index" : 3, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.504GMT", "duration" : 423, "executorId" : "driver", @@ -296,6 +302,7 @@ "taskId" : 0, "index" : 0, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.494GMT", "duration" : 435, "executorId" : "driver", @@ -345,6 +352,7 @@ "taskId" : 2, "index" : 2, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.503GMT", "duration" : 419, "executorId" : "driver", @@ -394,6 +402,7 @@ "taskId" : 22, "index" : 22, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.018GMT", "duration" : 101, "executorId" : "driver", @@ -443,6 +452,7 @@ "taskId" : 18, "index" : 18, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.010GMT", "duration" : 105, "executorId" : "driver", @@ -492,6 +502,7 @@ "taskId" : 17, "index" : 17, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.005GMT", "duration" : 123, "executorId" : "driver", @@ -541,6 +552,7 @@ "taskId" : 21, "index" : 21, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.015GMT", "duration" : 96, "executorId" : "driver", @@ -590,6 +602,7 @@ "taskId" : 19, "index" : 19, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.012GMT", "duration" : 94, "executorId" : "driver", @@ -639,6 +652,7 @@ "taskId" : 16, "index" : 16, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.001GMT", "duration" : 98, "executorId" : "driver", @@ -688,6 +702,7 @@ "taskId" : 9, "index" : 9, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.915GMT", "duration" : 101, "executorId" : "driver", @@ -737,6 +752,7 @@ "taskId" : 20, "index" : 20, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.014GMT", "duration" : 90, "executorId" : "driver", @@ -786,6 +802,7 @@ "taskId" : 14, "index" : 14, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.925GMT", "duration" : 94, "executorId" : "driver", @@ -835,6 +852,7 @@ "taskId" : 8, "index" : 8, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.914GMT", "duration" : 88, "executorId" : "driver", @@ -884,6 +902,7 @@ "taskId" : 12, "index" : 12, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.923GMT", "duration" : 93, "executorId" : "driver", @@ -933,6 +952,7 @@ "taskId" : 15, "index" : 15, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.928GMT", "duration" : 83, "executorId" : "driver", diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json index 01eef1b565bf6..3fa4cad79df2c 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json @@ -2,6 +2,7 @@ "taskId" : 40, "index" : 40, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.197GMT", "duration" : 24, "executorId" : "driver", @@ -51,6 +52,7 @@ "taskId" : 41, "index" : 41, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.200GMT", "duration" : 24, "executorId" : "driver", @@ -100,6 +102,7 @@ "taskId" : 43, "index" : 43, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.204GMT", "duration" : 39, "executorId" : "driver", @@ -149,6 +152,7 @@ "taskId" : 57, "index" : 57, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.257GMT", "duration" : 21, "executorId" : "driver", @@ -198,6 +202,7 @@ "taskId" : 58, "index" : 58, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.263GMT", "duration" : 23, "executorId" : "driver", @@ -247,6 +252,7 @@ "taskId" : 68, "index" : 68, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.306GMT", "duration" : 22, "executorId" : "driver", @@ -296,6 +302,7 @@ "taskId" : 86, "index" : 86, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.374GMT", "duration" : 28, "executorId" : "driver", @@ -345,6 +352,7 @@ "taskId" : 32, "index" : 32, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.148GMT", "duration" : 33, "executorId" : "driver", @@ -394,6 +402,7 @@ "taskId" : 39, "index" : 39, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.180GMT", "duration" : 32, "executorId" : "driver", @@ -443,6 +452,7 @@ "taskId" : 42, "index" : 42, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.203GMT", "duration" : 42, "executorId" : "driver", @@ -492,6 +502,7 @@ "taskId" : 51, "index" : 51, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.242GMT", "duration" : 21, "executorId" : "driver", @@ -541,6 +552,7 @@ "taskId" : 59, "index" : 59, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.265GMT", "duration" : 23, "executorId" : "driver", @@ -590,6 +602,7 @@ "taskId" : 63, "index" : 63, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.276GMT", "duration" : 40, "executorId" : "driver", @@ -639,6 +652,7 @@ "taskId" : 87, "index" : 87, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.374GMT", "duration" : 36, "executorId" : "driver", @@ -688,6 +702,7 @@ "taskId" : 90, "index" : 90, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.385GMT", "duration" : 23, "executorId" : "driver", @@ -737,6 +752,7 @@ "taskId" : 99, "index" : 99, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.426GMT", "duration" : 22, "executorId" : "driver", @@ -786,6 +802,7 @@ "taskId" : 44, "index" : 44, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.205GMT", "duration" : 37, "executorId" : "driver", @@ -835,6 +852,7 @@ "taskId" : 47, "index" : 47, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.212GMT", "duration" : 33, "executorId" : "driver", @@ -884,6 +902,7 @@ "taskId" : 50, "index" : 50, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.240GMT", "duration" : 26, "executorId" : "driver", @@ -933,6 +952,7 @@ "taskId" : 52, "index" : 52, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.243GMT", "duration" : 28, "executorId" : "driver", diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status___offset___length_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status___offset___length_expectation.json index 28509e33c5dcc..0cd3a45e0af7e 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status___offset___length_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status___offset___length_expectation.json @@ -2,6 +2,7 @@ "taskId" : 1, "index" : 1, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.502GMT", "duration" : 421, "executorId" : "driver", @@ -51,6 +52,7 @@ "taskId" : 2, "index" : 2, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:06.503GMT", "duration" : 419, "executorId" : "driver", diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status___sortBy_short_names__runtime_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status___sortBy_short_names__runtime_expectation.json index 01eef1b565bf6..3fa4cad79df2c 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status___sortBy_short_names__runtime_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status___sortBy_short_names__runtime_expectation.json @@ -2,6 +2,7 @@ "taskId" : 40, "index" : 40, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.197GMT", "duration" : 24, "executorId" : "driver", @@ -51,6 +52,7 @@ "taskId" : 41, "index" : 41, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.200GMT", "duration" : 24, "executorId" : "driver", @@ -100,6 +102,7 @@ "taskId" : 43, "index" : 43, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.204GMT", "duration" : 39, "executorId" : "driver", @@ -149,6 +152,7 @@ "taskId" : 57, "index" : 57, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.257GMT", "duration" : 21, "executorId" : "driver", @@ -198,6 +202,7 @@ "taskId" : 58, "index" : 58, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.263GMT", "duration" : 23, "executorId" : "driver", @@ -247,6 +252,7 @@ "taskId" : 68, "index" : 68, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.306GMT", "duration" : 22, "executorId" : "driver", @@ -296,6 +302,7 @@ "taskId" : 86, "index" : 86, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.374GMT", "duration" : 28, "executorId" : "driver", @@ -345,6 +352,7 @@ "taskId" : 32, "index" : 32, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.148GMT", "duration" : 33, "executorId" : "driver", @@ -394,6 +402,7 @@ "taskId" : 39, "index" : 39, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.180GMT", "duration" : 32, "executorId" : "driver", @@ -443,6 +452,7 @@ "taskId" : 42, "index" : 42, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.203GMT", "duration" : 42, "executorId" : "driver", @@ -492,6 +502,7 @@ "taskId" : 51, "index" : 51, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.242GMT", "duration" : 21, "executorId" : "driver", @@ -541,6 +552,7 @@ "taskId" : 59, "index" : 59, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.265GMT", "duration" : 23, "executorId" : "driver", @@ -590,6 +602,7 @@ "taskId" : 63, "index" : 63, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.276GMT", "duration" : 40, "executorId" : "driver", @@ -639,6 +652,7 @@ "taskId" : 87, "index" : 87, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.374GMT", "duration" : 36, "executorId" : "driver", @@ -688,6 +702,7 @@ "taskId" : 90, "index" : 90, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.385GMT", "duration" : 23, "executorId" : "driver", @@ -737,6 +752,7 @@ "taskId" : 99, "index" : 99, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.426GMT", "duration" : 22, "executorId" : "driver", @@ -786,6 +802,7 @@ "taskId" : 44, "index" : 44, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.205GMT", "duration" : 37, "executorId" : "driver", @@ -835,6 +852,7 @@ "taskId" : 47, "index" : 47, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.212GMT", "duration" : 33, "executorId" : "driver", @@ -884,6 +902,7 @@ "taskId" : 50, "index" : 50, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.240GMT", "duration" : 26, "executorId" : "driver", @@ -933,6 +952,7 @@ "taskId" : 52, "index" : 52, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2015-05-06T13:03:07.243GMT", "duration" : 28, "executorId" : "driver", diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status_expectation.json index 9896aceb275de..d625c6f0e3bfc 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status_expectation.json @@ -2,6 +2,7 @@ "taskId" : 1, "index" : 1, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2016-11-15T23:20:44.052GMT", "duration" : 675, "executorId" : "0", @@ -55,6 +56,7 @@ "taskId" : 3, "index" : 3, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2016-11-15T23:20:44.053GMT", "duration" : 725, "executorId" : "2", @@ -108,6 +110,7 @@ "taskId" : 5, "index" : 5, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2016-11-15T23:20:44.055GMT", "duration" : 665, "executorId" : "0", @@ -161,6 +164,7 @@ "taskId" : 7, "index" : 7, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2016-11-15T23:20:44.056GMT", "duration" : 685, "executorId" : "2", @@ -214,6 +218,7 @@ "taskId" : 9, "index" : 9, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2016-11-15T23:20:44.057GMT", "duration" : 732, "executorId" : "0", @@ -267,6 +272,7 @@ "taskId" : 11, "index" : 11, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2016-11-15T23:20:44.058GMT", "duration" : 678, "executorId" : "2", @@ -320,6 +326,7 @@ "taskId" : 13, "index" : 13, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2016-11-15T23:20:44.060GMT", "duration" : 669, "executorId" : "0", @@ -373,6 +380,7 @@ "taskId" : 15, "index" : 15, "attempt" : 0, + "partitionId" : -1, "launchTime" : "2016-11-15T23:20:44.065GMT", "duration" : 672, "executorId" : "2", @@ -426,6 +434,7 @@ "taskId" : 19, "index" : 11, "attempt" : 1, + "partitionId": -1, "launchTime" : "2016-11-15T23:20:44.736GMT", "duration" : 13, "executorId" : "2", @@ -479,6 +488,7 @@ "taskId" : 20, "index" : 15, "attempt" : 1, + "partitionId": -1, "launchTime" : "2016-11-15T23:20:44.737GMT", "duration" : 19, "executorId" : "2", diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_with_partitionId_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_with_partitionId_expectation.json new file mode 100644 index 0000000000000..9d8a38cde2d04 --- /dev/null +++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_with_partitionId_expectation.json @@ -0,0 +1,401 @@ +[ { + "taskId" : 0, + "index" : 0, + "attempt" : 0, + "partitionId" : 0, + "launchTime" : "2022-01-13T02:04:56.413GMT", + "duration" : 480, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 277, + "executorDeserializeCpuTime" : 142074000, + "executorRunTime" : 158, + "executorCpuTime" : 48671000, + "resultSize" : 1836, + "jvmGcTime" : 11, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 12 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 0, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 0, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 468, + "writeTime" : 33968833, + "recordsWritten" : 12 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 44, + "gettingResultTime" : 0 +}, { + "taskId" : 1, + "index" : 1, + "attempt" : 0, + "partitionId" : 1, + "launchTime" : "2022-01-13T02:04:56.425GMT", + "duration" : 468, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 277, + "executorDeserializeCpuTime" : 141303000, + "executorRunTime" : 158, + "executorCpuTime" : 52706000, + "resultSize" : 1836, + "jvmGcTime" : 11, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 13 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 0, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 0, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 472, + "writeTime" : 32707462, + "recordsWritten" : 13 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 32, + "gettingResultTime" : 0 +}, { + "taskId" : 2, + "index" : 2, + "attempt" : 0, + "partitionId" : 2, + "launchTime" : "2022-01-13T02:04:56.425GMT", + "duration" : 467, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 277, + "executorDeserializeCpuTime" : 144165000, + "executorRunTime" : 158, + "executorCpuTime" : 47904000, + "resultSize" : 1836, + "jvmGcTime" : 11, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 12 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 0, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 0, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 468, + "writeTime" : 31530251, + "recordsWritten" : 12 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 31, + "gettingResultTime" : 0 +}, { + "taskId" : 3, + "index" : 3, + "attempt" : 0, + "partitionId" : 3, + "launchTime" : "2022-01-13T02:04:56.425GMT", + "duration" : 468, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 277, + "executorDeserializeCpuTime" : 81219000, + "executorRunTime" : 158, + "executorCpuTime" : 50624000, + "resultSize" : 1836, + "jvmGcTime" : 11, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 13 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 0, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 0, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 472, + "writeTime" : 33237160, + "recordsWritten" : 13 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 32, + "gettingResultTime" : 0 +}, { + "taskId" : 4, + "index" : 4, + "attempt" : 0, + "partitionId" : 4, + "launchTime" : "2022-01-13T02:04:56.426GMT", + "duration" : 466, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 277, + "executorDeserializeCpuTime" : 143825000, + "executorRunTime" : 158, + "executorCpuTime" : 42837000, + "resultSize" : 1836, + "jvmGcTime" : 11, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 12 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 0, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 0, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 468, + "writeTime" : 32008329, + "recordsWritten" : 12 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 30, + "gettingResultTime" : 0 +}, { + "taskId" : 5, + "index" : 5, + "attempt" : 0, + "partitionId" : 5, + "launchTime" : "2022-01-13T02:04:56.426GMT", + "duration" : 465, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 277, + "executorDeserializeCpuTime" : 156363000, + "executorRunTime" : 158, + "executorCpuTime" : 60658000, + "resultSize" : 1836, + "jvmGcTime" : 11, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 13 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 0, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 0, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 472, + "writeTime" : 31370834, + "recordsWritten" : 13 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 29, + "gettingResultTime" : 0 +}, { + "taskId" : 6, + "index" : 6, + "attempt" : 0, + "partitionId" : 6, + "launchTime" : "2022-01-13T02:04:56.427GMT", + "duration" : 461, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 277, + "executorDeserializeCpuTime" : 131261000, + "executorRunTime" : 158, + "executorCpuTime" : 75295000, + "resultSize" : 1836, + "jvmGcTime" : 11, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 12 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 0, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 0, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 468, + "writeTime" : 32402787, + "recordsWritten" : 12 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 25, + "gettingResultTime" : 0 +}, { + "taskId" : 7, + "index" : 7, + "attempt" : 0, + "partitionId" : 7, + "launchTime" : "2022-01-13T02:04:56.427GMT", + "duration" : 466, + "executorId" : "driver", + "host" : "172.22.200.52", + "status" : "SUCCESS", + "taskLocality" : "PROCESS_LOCAL", + "speculative" : false, + "accumulatorUpdates" : [ ], + "taskMetrics" : { + "executorDeserializeTime" : 277, + "executorDeserializeCpuTime" : 145865000, + "executorRunTime" : 158, + "executorCpuTime" : 42833000, + "resultSize" : 1836, + "jvmGcTime" : 11, + "resultSerializationTime" : 1, + "memoryBytesSpilled" : 0, + "diskBytesSpilled" : 0, + "peakExecutionMemory" : 0, + "inputMetrics" : { + "bytesRead" : 0, + "recordsRead" : 13 + }, + "outputMetrics" : { + "bytesWritten" : 0, + "recordsWritten" : 0 + }, + "shuffleReadMetrics" : { + "remoteBlocksFetched" : 0, + "localBlocksFetched" : 0, + "fetchWaitTime" : 0, + "remoteBytesRead" : 0, + "remoteBytesReadToDisk" : 0, + "localBytesRead" : 0, + "recordsRead" : 0 + }, + "shuffleWriteMetrics" : { + "bytesWritten" : 472, + "writeTime" : 32328957, + "recordsWritten" : 13 + } + }, + "executorLogs" : { }, + "schedulerDelay" : 30, + "gettingResultTime" : 0 +} ] diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json index 281f7b35f2893..9a822b0d5ce8b 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json @@ -50,6 +50,7 @@ "taskId" : 0, "index" : 0, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-03-16T19:25:36.515GMT", "duration" : 61, "executorId" : "driver", @@ -105,6 +106,7 @@ "taskId" : 5, "index" : 5, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-03-16T19:25:36.523GMT", "duration" : 52, "executorId" : "driver", @@ -160,6 +162,7 @@ "taskId" : 1, "index" : 1, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-03-16T19:25:36.521GMT", "duration" : 53, "executorId" : "driver", @@ -215,6 +218,7 @@ "taskId" : 6, "index" : 6, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-03-16T19:25:36.523GMT", "duration" : 51, "executorId" : "driver", @@ -270,6 +274,7 @@ "taskId" : 2, "index" : 2, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-03-16T19:25:36.522GMT", "duration" : 48, "executorId" : "driver", @@ -325,6 +330,7 @@ "taskId" : 7, "index" : 7, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-03-16T19:25:36.524GMT", "duration" : 51, "executorId" : "driver", @@ -380,6 +386,7 @@ "taskId" : 3, "index" : 3, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-03-16T19:25:36.522GMT", "duration" : 50, "executorId" : "driver", @@ -435,6 +442,7 @@ "taskId" : 4, "index" : 4, "attempt" : 0, + "partitionId": -1, "launchTime" : "2015-03-16T19:25:36.522GMT", "duration" : 52, "executorId" : "driver", diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_peak_metrics_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_peak_metrics_expectation.json index 5b2c205077dd6..2ded2dede5a83 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_with_peak_metrics_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_with_peak_metrics_expectation.json @@ -46,6 +46,7 @@ "taskId" : 42, "index" : 10, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.120GMT", "duration" : 1923, "executorId" : "0", @@ -99,6 +100,7 @@ "taskId" : 37, "index" : 5, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.100GMT", "duration" : 1915, "executorId" : "0", @@ -152,6 +154,7 @@ "taskId" : 46, "index" : 14, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.132GMT", "duration" : 1905, "executorId" : "0", @@ -205,6 +208,7 @@ "taskId" : 38, "index" : 6, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.104GMT", "duration" : 1835, "executorId" : "0", @@ -258,6 +262,7 @@ "taskId" : 33, "index" : 1, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.082GMT", "duration" : 1943, "executorId" : "0", @@ -311,6 +316,7 @@ "taskId" : 41, "index" : 9, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.116GMT", "duration" : 1916, "executorId" : "0", @@ -364,6 +370,7 @@ "taskId" : 32, "index" : 0, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.077GMT", "duration" : 1960, "executorId" : "0", @@ -417,6 +424,7 @@ "taskId" : 34, "index" : 2, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.087GMT", "duration" : 1939, "executorId" : "0", @@ -470,6 +478,7 @@ "taskId" : 45, "index" : 13, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.129GMT", "duration" : 1895, "executorId" : "0", @@ -523,6 +532,7 @@ "taskId" : 44, "index" : 12, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.126GMT", "duration" : 1917, "executorId" : "0", @@ -576,6 +586,7 @@ "taskId" : 39, "index" : 7, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.109GMT", "duration" : 1915, "executorId" : "0", @@ -629,6 +640,7 @@ "taskId" : 35, "index" : 3, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.091GMT", "duration" : 1925, "executorId" : "0", @@ -682,6 +694,7 @@ "taskId" : 43, "index" : 11, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.123GMT", "duration" : 1906, "executorId" : "0", @@ -735,6 +748,7 @@ "taskId" : 40, "index" : 8, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.112GMT", "duration" : 1904, "executorId" : "0", @@ -788,6 +802,7 @@ "taskId" : 36, "index" : 4, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.095GMT", "duration" : 1920, "executorId" : "0", @@ -841,6 +856,7 @@ "taskId" : 47, "index" : 15, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.136GMT", "duration" : 1878, "executorId" : "0", diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_speculation_summary_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_speculation_summary_expectation.json index 32cf5719d25f0..f4c21829929ed 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_with_speculation_summary_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_with_speculation_summary_expectation.json @@ -46,6 +46,7 @@ "taskId" : 0, "index" : 0, "attempt" : 0, + "partitionId": -1, "launchTime" : "2021-08-10T23:27:53.885GMT", "duration" : 2234, "executorId" : "7", @@ -99,6 +100,7 @@ "taskId" : 1, "index" : 1, "attempt" : 0, + "partitionId": -1, "launchTime" : "2021-08-10T23:27:53.903GMT", "duration" : 2647, "executorId" : "5", @@ -152,6 +154,7 @@ "taskId" : 2, "index" : 2, "attempt" : 0, + "partitionId": -1, "launchTime" : "2021-08-10T23:27:53.904GMT", "duration" : 5124, "executorId" : "8", @@ -205,6 +208,7 @@ "taskId" : 3, "index" : 3, "attempt" : 0, + "partitionId": -1, "launchTime" : "2021-08-10T23:27:53.904GMT", "duration" : 63773, "executorId" : "10", @@ -258,6 +262,7 @@ "taskId" : 4, "index" : 3, "attempt" : 1, + "partitionId": -1, "launchTime" : "2021-08-10T23:28:04.499GMT", "duration" : 53201, "executorId" : "7", diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_summaries_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_summaries_expectation.json index 3be20df679692..164395ff24dc1 100644 --- a/core/src/test/resources/HistoryServerExpectations/stage_with_summaries_expectation.json +++ b/core/src/test/resources/HistoryServerExpectations/stage_with_summaries_expectation.json @@ -46,6 +46,7 @@ "taskId" : 42, "index" : 10, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.120GMT", "duration" : 1923, "executorId" : "0", @@ -99,6 +100,7 @@ "taskId" : 37, "index" : 5, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.100GMT", "duration" : 1915, "executorId" : "0", @@ -152,6 +154,7 @@ "taskId" : 46, "index" : 14, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.132GMT", "duration" : 1905, "executorId" : "0", @@ -205,6 +208,7 @@ "taskId" : 38, "index" : 6, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.104GMT", "duration" : 1835, "executorId" : "0", @@ -258,6 +262,7 @@ "taskId" : 33, "index" : 1, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.082GMT", "duration" : 1943, "executorId" : "0", @@ -311,6 +316,7 @@ "taskId" : 41, "index" : 9, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.116GMT", "duration" : 1916, "executorId" : "0", @@ -364,6 +370,7 @@ "taskId" : 32, "index" : 0, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.077GMT", "duration" : 1960, "executorId" : "0", @@ -417,6 +424,7 @@ "taskId" : 34, "index" : 2, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.087GMT", "duration" : 1939, "executorId" : "0", @@ -470,6 +478,7 @@ "taskId" : 45, "index" : 13, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.129GMT", "duration" : 1895, "executorId" : "0", @@ -523,6 +532,7 @@ "taskId" : 44, "index" : 12, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.126GMT", "duration" : 1917, "executorId" : "0", @@ -576,6 +586,7 @@ "taskId" : 39, "index" : 7, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.109GMT", "duration" : 1915, "executorId" : "0", @@ -629,6 +640,7 @@ "taskId" : 35, "index" : 3, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.091GMT", "duration" : 1925, "executorId" : "0", @@ -682,6 +694,7 @@ "taskId" : 43, "index" : 11, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.123GMT", "duration" : 1906, "executorId" : "0", @@ -735,6 +748,7 @@ "taskId" : 40, "index" : 8, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.112GMT", "duration" : 1904, "executorId" : "0", @@ -788,6 +802,7 @@ "taskId" : 36, "index" : 4, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.095GMT", "duration" : 1920, "executorId" : "0", @@ -841,6 +856,7 @@ "taskId" : 47, "index" : 15, "attempt" : 0, + "partitionId": -1, "launchTime" : "2020-07-07T03:11:21.136GMT", "duration" : 1878, "executorId" : "0", diff --git a/core/src/test/resources/spark-events/local-1642039451826 b/core/src/test/resources/spark-events/local-1642039451826 new file mode 100644 index 0000000000000..d4bf1c830d7d2 --- /dev/null +++ b/core/src/test/resources/spark-events/local-1642039451826 @@ -0,0 +1,115 @@ +{"Event":"SparkListenerLogStart","Spark Version":"3.3.0-SNAPSHOT"} +{"Event":"SparkListenerResourceProfileAdded","Resource Profile Id":0,"Executor Resource Requests":{"cores":{"Resource Name":"cores","Amount":1,"Discovery Script":"","Vendor":""},"memory":{"Resource Name":"memory","Amount":1024,"Discovery Script":"","Vendor":""},"offHeap":{"Resource Name":"offHeap","Amount":0,"Discovery Script":"","Vendor":""}},"Task Resource Requests":{"cpus":{"Resource Name":"cpus","Amount":1.0}}} +{"Event":"SparkListenerExecutorAdded","Timestamp":1642039451891,"Executor ID":"driver","Executor Info":{"Host":"172.22.200.52","Total Cores":8,"Log Urls":{},"Attributes":{},"Resources":{},"Resource Profile Id":0}} +{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"172.22.200.52","Port":61039},"Maximum Memory":384093388,"Timestamp":1642039451909,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0} +{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre","Java Version":"1.8.0_312 (Azul Systems, Inc.)","Scala Version":"version 2.12.15"},"Spark Properties":{"spark.executor.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"172.22.200.52","spark.eventLog.enabled":"true","spark.driver.port":"61038","spark.repl.class.uri":"spark://172.22.200.52:61038/classes","spark.jars":"","spark.repl.class.outputDir":"/private/var/folders/dm/1vhcj_l97j146n6mgr2cm9rw0000gp/T/spark-501ae231-0cb9-4ba2-845f-3fc3cb053141/repl-054c2c94-f7a2-4f4b-9f12-96a1cdb15bc6","spark.app.name":"Spark shell","spark.scheduler.mode":"FIFO","spark.submit.pyFiles":"","spark.ui.showConsoleProgress":"true","spark.app.startTime":"1642039450519","spark.executor.id":"driver","spark.driver.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"client","spark.master":"local[*]","spark.home":"/Users/lijunqing/Code/stczwd/spark/dist","spark.eventLog.dir":"/Users/lijunqing/Code/stczwd/spark/dist/eventLog","spark.sql.catalogImplementation":"hive","spark.app.id":"local-1642039451826"},"Hadoop Properties":{"hadoop.service.shutdown.timeout":"30s","yarn.resourcemanager.amlauncher.thread-count":"50","yarn.sharedcache.enabled":"false","fs.s3a.connection.maximum":"48","yarn.nodemanager.numa-awareness.numactl.cmd":"/usr/bin/numactl","fs.viewfs.overload.scheme.target.o3fs.impl":"org.apache.hadoop.fs.ozone.OzoneFileSystem","fs.s3a.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem","yarn.app.mapreduce.am.scheduler.heartbeat.interval-ms":"1000","yarn.timeline-service.timeline-client.number-of-async-entities-to-merge":"10","hadoop.security.kms.client.timeout":"60","hadoop.http.authentication.kerberos.principal":"HTTP/_HOST@LOCALHOST","mapreduce.jobhistory.loadedjob.tasks.max":"-1","yarn.resourcemanager.application-tag-based-placement.enable":"false","mapreduce.framework.name":"local","yarn.sharedcache.uploader.server.thread-count":"50","yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds.min":"3600","yarn.nodemanager.linux-container-executor.nonsecure-mode.user-pattern":"^[_.A-Za-z0-9][-@_.A-Za-z0-9]{0,255}?[$]?$","tfile.fs.output.buffer.size":"262144","yarn.app.mapreduce.am.job.task.listener.thread-count":"30","yarn.nodemanager.node-attributes.resync-interval-ms":"120000","yarn.nodemanager.container-log-monitor.interval-ms":"60000","hadoop.security.groups.cache.background.reload.threads":"3","yarn.resourcemanager.webapp.cross-origin.enabled":"false","fs.AbstractFileSystem.ftp.impl":"org.apache.hadoop.fs.ftp.FtpFs","hadoop.registry.secure":"false","hadoop.shell.safely.delete.limit.num.files":"100","mapreduce.job.acl-view-job":" ","fs.s3a.s3guard.ddb.background.sleep":"25ms","fs.s3a.retry.limit":"7","mapreduce.jobhistory.loadedjobs.cache.size":"5","fs.s3a.s3guard.ddb.table.create":"false","fs.viewfs.overload.scheme.target.s3a.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem","yarn.nodemanager.amrmproxy.enabled":"false","yarn.timeline-service.entity-group-fs-store.with-user-dir":"false","mapreduce.shuffle.pathcache.expire-after-access-minutes":"5","mapreduce.input.fileinputformat.split.minsize":"0","yarn.resourcemanager.container.liveness-monitor.interval-ms":"600000","yarn.resourcemanager.client.thread-count":"50","io.seqfile.compress.blocksize":"1000000","yarn.nodemanager.runtime.linux.docker.allowed-container-runtimes":"runc","fs.viewfs.overload.scheme.target.http.impl":"org.apache.hadoop.fs.http.HttpFileSystem","yarn.resourcemanager.nodemanagers.heartbeat-interval-slowdown-factor":"1.0","yarn.sharedcache.checksum.algo.impl":"org.apache.hadoop.yarn.sharedcache.ChecksumSHA256Impl","yarn.nodemanager.amrmproxy.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.nodemanager.amrmproxy.DefaultRequestInterceptor","yarn.timeline-service.entity-group-fs-store.leveldb-cache-read-cache-size":"10485760","mapreduce.reduce.shuffle.fetch.retry.interval-ms":"1000","mapreduce.task.profile.maps":"0-2","yarn.scheduler.include-port-in-node-name":"false","yarn.nodemanager.admin-env":"MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX","yarn.resourcemanager.node-removal-untracked.timeout-ms":"60000","mapreduce.am.max-attempts":"2","hadoop.security.kms.client.failover.sleep.base.millis":"100","mapreduce.jobhistory.webapp.https.address":"0.0.0.0:19890","yarn.node-labels.fs-store.impl.class":"org.apache.hadoop.yarn.nodelabels.FileSystemNodeLabelsStore","yarn.nodemanager.collector-service.address":"${yarn.nodemanager.hostname}:8048","fs.trash.checkpoint.interval":"0","mapreduce.job.map.output.collector.class":"org.apache.hadoop.mapred.MapTask$MapOutputBuffer","yarn.resourcemanager.node-ip-cache.expiry-interval-secs":"-1","hadoop.http.authentication.signature.secret.file":"*********(redacted)","hadoop.jetty.logs.serve.aliases":"true","yarn.resourcemanager.placement-constraints.handler":"disabled","yarn.timeline-service.handler-thread-count":"10","yarn.resourcemanager.max-completed-applications":"1000","yarn.nodemanager.aux-services.manifest.enabled":"false","yarn.resourcemanager.system-metrics-publisher.enabled":"false","yarn.resourcemanager.placement-constraints.algorithm.class":"org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.algorithm.DefaultPlacementAlgorithm","yarn.sharedcache.webapp.address":"0.0.0.0:8788","fs.s3a.select.input.csv.quote.escape.character":"\\\\","yarn.resourcemanager.delegation.token.renew-interval":"*********(redacted)","yarn.sharedcache.nm.uploader.replication.factor":"10","hadoop.security.groups.negative-cache.secs":"30","yarn.app.mapreduce.task.container.log.backups":"0","mapreduce.reduce.skip.proc-count.auto-incr":"true","fs.viewfs.overload.scheme.target.swift.impl":"org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem","hadoop.security.group.mapping.ldap.posix.attr.gid.name":"gidNumber","ipc.client.fallback-to-simple-auth-allowed":"false","yarn.nodemanager.resource.memory.enforced":"true","yarn.client.failover-proxy-provider":"org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider","yarn.timeline-service.http-authentication.simple.anonymous.allowed":"true","ha.health-monitor.check-interval.ms":"1000","yarn.nodemanager.runtime.linux.runc.host-pid-namespace.allowed":"false","hadoop.metrics.jvm.use-thread-mxbean":"false","ipc.[port_number].faircallqueue.multiplexer.weights":"8,4,2,1","yarn.acl.reservation-enable":"false","yarn.resourcemanager.store.class":"org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore","yarn.app.mapreduce.am.hard-kill-timeout-ms":"10000","fs.s3a.etag.checksum.enabled":"false","yarn.nodemanager.container-metrics.enable":"true","ha.health-monitor.rpc.connect.max.retries":"1","yarn.timeline-service.client.fd-clean-interval-secs":"60","yarn.resourcemanager.nodemanagers.heartbeat-interval-scaling-enable":"false","yarn.resourcemanager.nodemanagers.heartbeat-interval-ms":"1000","hadoop.common.configuration.version":"3.0.0","fs.s3a.s3guard.ddb.table.capacity.read":"0","yarn.nodemanager.remote-app-log-dir-suffix":"logs","yarn.nodemanager.container-log-monitor.dir-size-limit-bytes":"1000000000","yarn.nodemanager.windows-container.cpu-limit.enabled":"false","yarn.nodemanager.runtime.linux.docker.privileged-containers.allowed":"false","file.blocksize":"67108864","hadoop.http.idle_timeout.ms":"60000","hadoop.registry.zk.retry.ceiling.ms":"60000","yarn.scheduler.configuration.leveldb-store.path":"${hadoop.tmp.dir}/yarn/system/confstore","yarn.sharedcache.store.in-memory.initial-delay-mins":"10","mapreduce.jobhistory.principal":"jhs/_HOST@REALM.TLD","mapreduce.map.skip.proc-count.auto-incr":"true","fs.s3a.committer.name":"file","mapreduce.task.profile.reduces":"0-2","hadoop.zk.num-retries":"1000","yarn.webapp.xfs-filter.enabled":"true","fs.viewfs.overload.scheme.target.hdfs.impl":"org.apache.hadoop.hdfs.DistributedFileSystem","seq.io.sort.mb":"100","yarn.scheduler.configuration.max.version":"100","yarn.timeline-service.webapp.https.address":"${yarn.timeline-service.hostname}:8190","yarn.resourcemanager.scheduler.address":"${yarn.resourcemanager.hostname}:8030","yarn.node-labels.enabled":"false","yarn.resourcemanager.webapp.ui-actions.enabled":"true","mapreduce.task.timeout":"600000","yarn.sharedcache.client-server.thread-count":"50","hadoop.security.groups.shell.command.timeout":"0s","hadoop.security.crypto.cipher.suite":"AES/CTR/NoPadding","yarn.nodemanager.elastic-memory-control.oom-handler":"org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.DefaultOOMHandler","yarn.resourcemanager.connect.max-wait.ms":"900000","fs.defaultFS":"file:///","yarn.minicluster.use-rpc":"false","ipc.[port_number].decay-scheduler.decay-factor":"0.5","fs.har.impl.disable.cache":"true","yarn.webapp.ui2.enable":"false","io.compression.codec.bzip2.library":"system-native","yarn.webapp.filter-invalid-xml-chars":"false","yarn.nodemanager.runtime.linux.runc.layer-mounts-interval-secs":"600","fs.s3a.select.input.csv.record.delimiter":"\\n","fs.s3a.change.detection.source":"etag","ipc.[port_number].backoff.enable":"false","yarn.nodemanager.distributed-scheduling.enabled":"false","mapreduce.shuffle.connection-keep-alive.timeout":"5","yarn.resourcemanager.webapp.https.address":"${yarn.resourcemanager.hostname}:8090","yarn.webapp.enable-rest-app-submissions":"true","mapreduce.jobhistory.address":"0.0.0.0:10020","yarn.resourcemanager.nm-tokens.master-key-rolling-interval-secs":"*********(redacted)","yarn.is.minicluster":"false","yarn.nodemanager.address":"${yarn.nodemanager.hostname}:0","fs.abfss.impl":"org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem","fs.AbstractFileSystem.s3a.impl":"org.apache.hadoop.fs.s3a.S3A","mapreduce.task.combine.progress.records":"10000","yarn.resourcemanager.epoch.range":"0","yarn.resourcemanager.am.max-attempts":"2","yarn.nodemanager.runtime.linux.runc.image-toplevel-dir":"/runc-root","yarn.nodemanager.linux-container-executor.cgroups.hierarchy":"/hadoop-yarn","fs.AbstractFileSystem.wasbs.impl":"org.apache.hadoop.fs.azure.Wasbs","yarn.timeline-service.entity-group-fs-store.cache-store-class":"org.apache.hadoop.yarn.server.timeline.MemoryTimelineStore","yarn.nodemanager.runtime.linux.runc.allowed-container-networks":"host,none,bridge","fs.ftp.transfer.mode":"BLOCK_TRANSFER_MODE","ipc.server.log.slow.rpc":"false","ipc.server.reuseaddr":"true","fs.ftp.timeout":"0","yarn.resourcemanager.node-labels.provider.fetch-interval-ms":"1800000","yarn.router.webapp.https.address":"0.0.0.0:8091","yarn.nodemanager.webapp.cross-origin.enabled":"false","fs.wasb.impl":"org.apache.hadoop.fs.azure.NativeAzureFileSystem","yarn.resourcemanager.auto-update.containers":"false","yarn.app.mapreduce.am.job.committer.cancel-timeout":"60000","yarn.scheduler.configuration.zk-store.parent-path":"/confstore","yarn.nodemanager.default-container-executor.log-dirs.permissions":"710","yarn.app.attempt.diagnostics.limit.kc":"64","fs.viewfs.overload.scheme.target.swebhdfs.impl":"org.apache.hadoop.hdfs.web.SWebHdfsFileSystem","yarn.client.failover-no-ha-proxy-provider":"org.apache.hadoop.yarn.client.DefaultNoHARMFailoverProxyProvider","fs.s3a.change.detection.mode":"server","ftp.bytes-per-checksum":"512","yarn.nodemanager.resource.memory-mb":"-1","fs.AbstractFileSystem.abfs.impl":"org.apache.hadoop.fs.azurebfs.Abfs","yarn.timeline-service.writer.flush-interval-seconds":"60","fs.s3a.fast.upload.active.blocks":"4","yarn.resourcemanager.submission-preprocessor.enabled":"false","hadoop.security.credential.clear-text-fallback":"true","yarn.nodemanager.collector-service.thread-count":"5","ipc.[port_number].scheduler.impl":"org.apache.hadoop.ipc.DefaultRpcScheduler","fs.azure.secure.mode":"false","mapreduce.jobhistory.joblist.cache.size":"20000","fs.ftp.host":"0.0.0.0","yarn.timeline-service.writer.async.queue.capacity":"100","yarn.resourcemanager.fs.state-store.num-retries":"0","yarn.resourcemanager.nodemanager-connect-retries":"10","yarn.nodemanager.log-aggregation.num-log-files-per-app":"30","hadoop.security.kms.client.encrypted.key.cache.low-watermark":"0.3f","fs.s3a.committer.magic.enabled":"true","yarn.timeline-service.client.max-retries":"30","dfs.ha.fencing.ssh.connect-timeout":"30000","yarn.log-aggregation-enable":"false","yarn.system-metrics-publisher.enabled":"false","mapreduce.reduce.markreset.buffer.percent":"0.0","fs.AbstractFileSystem.viewfs.impl":"org.apache.hadoop.fs.viewfs.ViewFs","yarn.resourcemanager.nodemanagers.heartbeat-interval-speedup-factor":"1.0","mapreduce.task.io.sort.factor":"10","yarn.nodemanager.amrmproxy.client.thread-count":"25","ha.failover-controller.new-active.rpc-timeout.ms":"60000","yarn.nodemanager.container-localizer.java.opts":"-Xmx256m","mapreduce.jobhistory.datestring.cache.size":"200000","mapreduce.job.acl-modify-job":" ","yarn.nodemanager.windows-container.memory-limit.enabled":"false","yarn.timeline-service.webapp.address":"${yarn.timeline-service.hostname}:8188","yarn.app.mapreduce.am.job.committer.commit-window":"10000","yarn.nodemanager.container-manager.thread-count":"20","yarn.minicluster.fixed.ports":"false","hadoop.tags.system":"YARN,HDFS,NAMENODE,DATANODE,REQUIRED,SECURITY,KERBEROS,PERFORMANCE,CLIENT\n ,SERVER,DEBUG,DEPRECATED,COMMON,OPTIONAL","yarn.cluster.max-application-priority":"0","yarn.timeline-service.ttl-enable":"true","mapreduce.jobhistory.recovery.store.fs.uri":"${hadoop.tmp.dir}/mapred/history/recoverystore","hadoop.caller.context.signature.max.size":"40","ipc.[port_number].decay-scheduler.backoff.responsetime.enable":"false","yarn.client.load.resource-types.from-server":"false","ha.zookeeper.session-timeout.ms":"10000","ipc.[port_number].decay-scheduler.metrics.top.user.count":"10","tfile.io.chunk.size":"1048576","fs.s3a.s3guard.ddb.table.capacity.write":"0","yarn.dispatcher.print-events-info.threshold":"5000","mapreduce.job.speculative.slowtaskthreshold":"1.0","io.serializations":"org.apache.hadoop.io.serializer.WritableSerialization, org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization, org.apache.hadoop.io.serializer.avro.AvroReflectSerialization","hadoop.security.kms.client.failover.sleep.max.millis":"2000","hadoop.security.group.mapping.ldap.directory.search.timeout":"10000","yarn.scheduler.configuration.store.max-logs":"1000","yarn.nodemanager.node-attributes.provider.fetch-interval-ms":"600000","fs.swift.impl":"org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem","yarn.nodemanager.local-cache.max-files-per-directory":"8192","hadoop.http.cross-origin.enabled":"false","hadoop.zk.acl":"world:anyone:rwcda","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin.num-manifests-to-cache":"10","mapreduce.map.sort.spill.percent":"0.80","yarn.timeline-service.entity-group-fs-store.scan-interval-seconds":"60","yarn.node-attribute.fs-store.impl.class":"org.apache.hadoop.yarn.server.resourcemanager.nodelabels.FileSystemNodeAttributeStore","fs.s3a.retry.interval":"500ms","yarn.timeline-service.client.best-effort":"false","yarn.resourcemanager.webapp.delegation-token-auth-filter.enabled":"*********(redacted)","hadoop.security.group.mapping.ldap.posix.attr.uid.name":"uidNumber","fs.AbstractFileSystem.swebhdfs.impl":"org.apache.hadoop.fs.SWebHdfs","yarn.nodemanager.elastic-memory-control.timeout-sec":"5","fs.s3a.select.enabled":"true","mapreduce.ifile.readahead":"true","yarn.timeline-service.leveldb-timeline-store.ttl-interval-ms":"300000","yarn.timeline-service.reader.webapp.address":"${yarn.timeline-service.webapp.address}","yarn.resourcemanager.placement-constraints.algorithm.pool-size":"1","yarn.timeline-service.hbase.coprocessor.jar.hdfs.location":"/hbase/coprocessor/hadoop-yarn-server-timelineservice.jar","hadoop.security.kms.client.encrypted.key.cache.num.refill.threads":"2","yarn.resourcemanager.scheduler.class":"org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler","yarn.app.mapreduce.am.command-opts":"-Xmx1024m","fs.s3a.metadatastore.fail.on.write.error":"true","hadoop.http.sni.host.check.enabled":"false","mapreduce.cluster.local.dir":"${hadoop.tmp.dir}/mapred/local","io.mapfile.bloom.error.rate":"0.005","fs.client.resolve.topology.enabled":"false","yarn.nodemanager.runtime.linux.allowed-runtimes":"default","yarn.sharedcache.store.class":"org.apache.hadoop.yarn.server.sharedcachemanager.store.InMemorySCMStore","ha.failover-controller.graceful-fence.rpc-timeout.ms":"5000","ftp.replication":"3","fs.getspaceused.jitterMillis":"60000","hadoop.security.uid.cache.secs":"14400","mapreduce.job.maxtaskfailures.per.tracker":"3","fs.s3a.metadatastore.impl":"org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore","io.skip.checksum.errors":"false","yarn.app.mapreduce.client-am.ipc.max-retries-on-timeouts":"3","yarn.timeline-service.webapp.xfs-filter.xframe-options":"SAMEORIGIN","fs.s3a.connection.timeout":"200000","yarn.app.mapreduce.am.webapp.https.enabled":"false","mapreduce.job.max.split.locations":"15","yarn.resourcemanager.nm-container-queuing.max-queue-length":"15","yarn.resourcemanager.delegation-token.always-cancel":"*********(redacted)","hadoop.registry.zk.session.timeout.ms":"60000","yarn.federation.cache-ttl.secs":"300","mapreduce.jvm.system-properties-to-log":"os.name,os.version,java.home,java.runtime.version,java.vendor,java.version,java.vm.name,java.class.path,java.io.tmpdir,user.dir,user.name","yarn.resourcemanager.opportunistic-container-allocation.nodes-used":"10","yarn.timeline-service.entity-group-fs-store.active-dir":"/tmp/entity-file-history/active","mapreduce.shuffle.transfer.buffer.size":"131072","yarn.timeline-service.client.retry-interval-ms":"1000","yarn.timeline-service.flowname.max-size":"0","yarn.http.policy":"HTTP_ONLY","fs.s3a.socket.send.buffer":"8192","fs.AbstractFileSystem.abfss.impl":"org.apache.hadoop.fs.azurebfs.Abfss","yarn.sharedcache.uploader.server.address":"0.0.0.0:8046","yarn.resourcemanager.delegation-token.max-conf-size-bytes":"*********(redacted)","hadoop.http.authentication.token.validity":"*********(redacted)","mapreduce.shuffle.max.connections":"0","yarn.minicluster.yarn.nodemanager.resource.memory-mb":"4096","mapreduce.job.emit-timeline-data":"false","yarn.nodemanager.resource.system-reserved-memory-mb":"-1","hadoop.kerberos.min.seconds.before.relogin":"60","mapreduce.jobhistory.move.thread-count":"3","yarn.resourcemanager.admin.client.thread-count":"1","yarn.dispatcher.drain-events.timeout":"300000","ipc.[port_number].decay-scheduler.backoff.responsetime.thresholds":"10s,20s,30s,40s","fs.s3a.buffer.dir":"${hadoop.tmp.dir}/s3a","hadoop.ssl.enabled.protocols":"TLSv1.2","mapreduce.jobhistory.admin.address":"0.0.0.0:10033","yarn.log-aggregation-status.time-out.ms":"600000","mapreduce.shuffle.port":"13562","yarn.resourcemanager.max-log-aggregation-diagnostics-in-memory":"10","yarn.nodemanager.health-checker.interval-ms":"600000","yarn.router.clientrm.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.router.clientrm.DefaultClientRequestInterceptor","yarn.resourcemanager.zk-appid-node.split-index":"0","ftp.blocksize":"67108864","yarn.nodemanager.runtime.linux.sandbox-mode.local-dirs.permissions":"read","yarn.router.rmadmin.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.router.rmadmin.DefaultRMAdminRequestInterceptor","yarn.nodemanager.log-container-debug-info.enabled":"true","yarn.resourcemanager.activities-manager.app-activities.max-queue-length":"100","yarn.resourcemanager.application-https.policy":"NONE","yarn.client.max-cached-nodemanagers-proxies":"0","yarn.nodemanager.linux-container-executor.cgroups.delete-delay-ms":"20","yarn.nodemanager.delete.debug-delay-sec":"0","yarn.nodemanager.pmem-check-enabled":"true","yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage":"90.0","mapreduce.app-submission.cross-platform":"false","yarn.resourcemanager.work-preserving-recovery.scheduling-wait-ms":"10000","yarn.nodemanager.container-retry-minimum-interval-ms":"1000","hadoop.security.groups.cache.secs":"300","yarn.federation.enabled":"false","yarn.workflow-id.tag-prefix":"workflowid:","fs.azure.local.sas.key.mode":"false","ipc.maximum.data.length":"134217728","fs.s3a.endpoint":"s3.amazonaws.com","mapreduce.shuffle.max.threads":"0","yarn.router.pipeline.cache-max-size":"25","yarn.resourcemanager.nm-container-queuing.load-comparator":"QUEUE_LENGTH","yarn.resourcemanager.resource-tracker.nm.ip-hostname-check":"false","hadoop.security.authorization":"false","mapreduce.job.complete.cancel.delegation.tokens":"*********(redacted)","fs.s3a.paging.maximum":"5000","nfs.exports.allowed.hosts":"* rw","yarn.nodemanager.amrmproxy.ha.enable":"false","mapreduce.jobhistory.http.policy":"HTTP_ONLY","yarn.sharedcache.store.in-memory.check-period-mins":"720","hadoop.security.group.mapping.ldap.ssl":"false","fs.s3a.downgrade.syncable.exceptions":"true","yarn.client.application-client-protocol.poll-interval-ms":"200","yarn.scheduler.configuration.leveldb-store.compaction-interval-secs":"86400","yarn.timeline-service.writer.class":"org.apache.hadoop.yarn.server.timelineservice.storage.HBaseTimelineWriterImpl","ha.zookeeper.parent-znode":"/hadoop-ha","yarn.resourcemanager.submission-preprocessor.file-refresh-interval-ms":"60000","yarn.nodemanager.log-aggregation.policy.class":"org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation.AllContainerLogAggregationPolicy","mapreduce.reduce.shuffle.merge.percent":"0.66","hadoop.security.group.mapping.ldap.search.filter.group":"(objectClass=group)","yarn.resourcemanager.placement-constraints.scheduler.pool-size":"1","yarn.resourcemanager.activities-manager.cleanup-interval-ms":"5000","yarn.nodemanager.resourcemanager.minimum.version":"NONE","mapreduce.job.speculative.speculative-cap-running-tasks":"0.1","yarn.admin.acl":"*","ipc.[port_number].identity-provider.impl":"org.apache.hadoop.ipc.UserIdentityProvider","yarn.nodemanager.recovery.supervised":"false","yarn.sharedcache.admin.thread-count":"1","yarn.resourcemanager.ha.automatic-failover.enabled":"true","yarn.nodemanager.container-log-monitor.total-size-limit-bytes":"10000000000","mapreduce.reduce.skip.maxgroups":"0","mapreduce.reduce.shuffle.connect.timeout":"180000","yarn.nodemanager.health-checker.scripts":"script","yarn.resourcemanager.address":"${yarn.resourcemanager.hostname}:8032","ipc.client.ping":"true","mapreduce.task.local-fs.write-limit.bytes":"-1","fs.adl.oauth2.access.token.provider.type":"*********(redacted)","mapreduce.shuffle.ssl.file.buffer.size":"65536","yarn.resourcemanager.ha.automatic-failover.embedded":"true","yarn.nodemanager.resource-plugins.gpu.docker-plugin":"nvidia-docker-v1","fs.s3a.s3guard.consistency.retry.interval":"2s","hadoop.ssl.enabled":"false","fs.s3a.multipart.purge":"false","yarn.scheduler.configuration.store.class":"file","yarn.resourcemanager.nm-container-queuing.queue-limit-stdev":"1.0f","mapreduce.job.end-notification.max.attempts":"5","mapreduce.output.fileoutputformat.compress.codec":"org.apache.hadoop.io.compress.DefaultCodec","yarn.nodemanager.container-monitor.procfs-tree.smaps-based-rss.enabled":"false","ipc.client.bind.wildcard.addr":"false","yarn.resourcemanager.webapp.rest-csrf.enabled":"false","ha.health-monitor.connect-retry-interval.ms":"1000","yarn.nodemanager.keytab":"/etc/krb5.keytab","mapreduce.jobhistory.keytab":"/etc/security/keytab/jhs.service.keytab","fs.s3a.threads.max":"64","yarn.nodemanager.runtime.linux.docker.image-update":"false","mapreduce.reduce.shuffle.input.buffer.percent":"0.70","fs.viewfs.overload.scheme.target.abfss.impl":"org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem","yarn.dispatcher.cpu-monitor.samples-per-min":"60","yarn.nodemanager.runtime.linux.docker.allowed-container-networks":"host,none,bridge","yarn.nodemanager.node-labels.resync-interval-ms":"120000","hadoop.tmp.dir":"/tmp/hadoop-${user.name}","mapreduce.job.maps":"2","mapreduce.jobhistory.webapp.rest-csrf.custom-header":"X-XSRF-Header","mapreduce.job.end-notification.max.retry.interval":"5000","yarn.log-aggregation.retain-check-interval-seconds":"-1","yarn.resourcemanager.resource-tracker.client.thread-count":"50","yarn.nodemanager.containers-launcher.class":"org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncher","yarn.rm.system-metrics-publisher.emit-container-events":"false","yarn.timeline-service.leveldb-timeline-store.start-time-read-cache-size":"10000","yarn.resourcemanager.ha.automatic-failover.zk-base-path":"/yarn-leader-election","io.seqfile.local.dir":"${hadoop.tmp.dir}/io/local","fs.s3a.s3guard.ddb.throttle.retry.interval":"100ms","fs.AbstractFileSystem.wasb.impl":"org.apache.hadoop.fs.azure.Wasb","mapreduce.client.submit.file.replication":"10","mapreduce.jobhistory.minicluster.fixed.ports":"false","fs.s3a.multipart.threshold":"128M","yarn.resourcemanager.webapp.xfs-filter.xframe-options":"SAMEORIGIN","mapreduce.jobhistory.done-dir":"${yarn.app.mapreduce.am.staging-dir}/history/done","ipc.client.idlethreshold":"4000","yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage":"false","mapreduce.reduce.input.buffer.percent":"0.0","yarn.nodemanager.runtime.linux.docker.userremapping-gid-threshold":"1","yarn.nodemanager.webapp.rest-csrf.enabled":"false","fs.ftp.host.port":"21","ipc.ping.interval":"60000","yarn.resourcemanager.history-writer.multi-threaded-dispatcher.pool-size":"10","yarn.resourcemanager.admin.address":"${yarn.resourcemanager.hostname}:8033","file.client-write-packet-size":"65536","ipc.client.kill.max":"10","mapreduce.reduce.speculative":"true","hadoop.security.key.default.bitlength":"128","mapreduce.job.reducer.unconditional-preempt.delay.sec":"300","yarn.nodemanager.disk-health-checker.interval-ms":"120000","yarn.nodemanager.log.deletion-threads-count":"4","fs.s3a.committer.abort.pending.uploads":"true","yarn.webapp.filter-entity-list-by-user":"false","yarn.resourcemanager.activities-manager.app-activities.ttl-ms":"600000","ipc.client.connection.maxidletime":"10000","mapreduce.task.io.sort.mb":"100","yarn.nodemanager.localizer.client.thread-count":"5","io.erasurecode.codec.rs.rawcoders":"rs_native,rs_java","io.erasurecode.codec.rs-legacy.rawcoders":"rs-legacy_java","yarn.sharedcache.admin.address":"0.0.0.0:8047","yarn.resourcemanager.placement-constraints.algorithm.iterator":"SERIAL","yarn.nodemanager.localizer.cache.cleanup.interval-ms":"600000","hadoop.security.crypto.codec.classes.aes.ctr.nopadding":"org.apache.hadoop.crypto.OpensslAesCtrCryptoCodec, org.apache.hadoop.crypto.JceAesCtrCryptoCodec","mapreduce.job.cache.limit.max-resources-mb":"0","fs.s3a.connection.ssl.enabled":"true","yarn.nodemanager.process-kill-wait.ms":"5000","mapreduce.job.hdfs-servers":"${fs.defaultFS}","yarn.app.mapreduce.am.webapp.https.client.auth":"false","hadoop.workaround.non.threadsafe.getpwuid":"true","fs.df.interval":"60000","ipc.[port_number].decay-scheduler.thresholds":"13,25,50","fs.s3a.multiobjectdelete.enable":"true","yarn.sharedcache.cleaner.resource-sleep-ms":"0","yarn.nodemanager.disk-health-checker.min-healthy-disks":"0.25","hadoop.shell.missing.defaultFs.warning":"false","io.file.buffer.size":"65536","fs.viewfs.overload.scheme.target.wasb.impl":"org.apache.hadoop.fs.azure.NativeAzureFileSystem","hadoop.security.group.mapping.ldap.search.attr.member":"member","hadoop.security.random.device.file.path":"/dev/urandom","hadoop.security.sensitive-config-keys":"*********(redacted)","fs.s3a.s3guard.ddb.max.retries":"9","fs.viewfs.overload.scheme.target.file.impl":"org.apache.hadoop.fs.LocalFileSystem","hadoop.rpc.socket.factory.class.default":"org.apache.hadoop.net.StandardSocketFactory","yarn.intermediate-data-encryption.enable":"false","yarn.resourcemanager.connect.retry-interval.ms":"30000","yarn.nodemanager.container.stderr.pattern":"{*stderr*,*STDERR*}","yarn.scheduler.minimum-allocation-mb":"1024","yarn.app.mapreduce.am.staging-dir":"/tmp/hadoop-yarn/staging","mapreduce.reduce.shuffle.read.timeout":"180000","hadoop.http.cross-origin.max-age":"1800","io.erasurecode.codec.xor.rawcoders":"xor_native,xor_java","fs.s3a.s3guard.consistency.retry.limit":"7","fs.s3a.connection.establish.timeout":"5000","mapreduce.job.running.map.limit":"0","yarn.minicluster.control-resource-monitoring":"false","hadoop.ssl.require.client.cert":"false","hadoop.kerberos.kinit.command":"kinit","yarn.federation.state-store.class":"org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore","fs.s3a.delegation.tokens.enabled":"*********(redacted)","mapreduce.reduce.log.level":"INFO","hadoop.security.dns.log-slow-lookups.threshold.ms":"1000","mapreduce.job.ubertask.enable":"false","adl.http.timeout":"-1","yarn.resourcemanager.placement-constraints.retry-attempts":"3","hadoop.caller.context.enabled":"false","hadoop.security.group.mapping.ldap.num.attempts":"3","yarn.nodemanager.vmem-pmem-ratio":"2.1","hadoop.rpc.protection":"authentication","ha.health-monitor.rpc-timeout.ms":"45000","yarn.nodemanager.remote-app-log-dir":"/tmp/logs","hadoop.zk.timeout-ms":"10000","fs.s3a.s3guard.cli.prune.age":"86400000","yarn.nodemanager.resource.pcores-vcores-multiplier":"1.0","yarn.nodemanager.runtime.linux.sandbox-mode":"disabled","yarn.app.mapreduce.am.containerlauncher.threadpool-initial-size":"10","fs.viewfs.overload.scheme.target.webhdfs.impl":"org.apache.hadoop.hdfs.web.WebHdfsFileSystem","fs.s3a.committer.threads":"8","hadoop.zk.retry-interval-ms":"1000","hadoop.security.crypto.buffer.size":"8192","yarn.nodemanager.node-labels.provider.fetch-interval-ms":"600000","mapreduce.jobhistory.recovery.store.leveldb.path":"${hadoop.tmp.dir}/mapred/history/recoverystore","yarn.client.failover-retries-on-socket-timeouts":"0","fs.s3a.ssl.channel.mode":"default_jsse","yarn.nodemanager.resource.memory.enabled":"false","fs.azure.authorization.caching.enable":"true","hadoop.security.instrumentation.requires.admin":"false","yarn.nodemanager.delete.thread-count":"4","mapreduce.job.finish-when-all-reducers-done":"true","hadoop.registry.jaas.context":"Client","yarn.timeline-service.leveldb-timeline-store.path":"${hadoop.tmp.dir}/yarn/timeline","io.map.index.interval":"128","yarn.resourcemanager.nm-container-queuing.max-queue-wait-time-ms":"100","fs.abfs.impl":"org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem","mapreduce.job.counters.max":"120","mapreduce.jobhistory.webapp.rest-csrf.enabled":"false","yarn.timeline-service.store-class":"org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore","mapreduce.jobhistory.move.interval-ms":"180000","fs.s3a.change.detection.version.required":"true","yarn.nodemanager.localizer.fetch.thread-count":"4","yarn.resourcemanager.scheduler.client.thread-count":"50","hadoop.ssl.hostname.verifier":"DEFAULT","yarn.timeline-service.leveldb-state-store.path":"${hadoop.tmp.dir}/yarn/timeline","mapreduce.job.classloader":"false","mapreduce.task.profile.map.params":"${mapreduce.task.profile.params}","ipc.client.connect.timeout":"20000","hadoop.security.auth_to_local.mechanism":"hadoop","yarn.timeline-service.app-collector.linger-period.ms":"60000","yarn.nm.liveness-monitor.expiry-interval-ms":"600000","yarn.resourcemanager.reservation-system.planfollower.time-step":"1000","yarn.resourcemanager.activities-manager.scheduler-activities.ttl-ms":"600000","yarn.nodemanager.runtime.linux.docker.enable-userremapping.allowed":"true","yarn.webapp.api-service.enable":"false","yarn.nodemanager.recovery.enabled":"false","mapreduce.job.end-notification.retry.interval":"1000","fs.du.interval":"600000","fs.ftp.impl":"org.apache.hadoop.fs.ftp.FTPFileSystem","yarn.nodemanager.container.stderr.tail.bytes":"4096","yarn.nodemanager.disk-health-checker.disk-free-space-threshold.enabled":"true","hadoop.security.group.mapping.ldap.read.timeout.ms":"60000","hadoop.security.groups.cache.warn.after.ms":"5000","file.bytes-per-checksum":"512","mapreduce.outputcommitter.factory.scheme.s3a":"org.apache.hadoop.fs.s3a.commit.S3ACommitterFactory","hadoop.security.groups.cache.background.reload":"false","yarn.nodemanager.container-monitor.enabled":"true","yarn.nodemanager.elastic-memory-control.enabled":"false","net.topology.script.number.args":"100","mapreduce.task.merge.progress.records":"10000","yarn.nodemanager.localizer.address":"${yarn.nodemanager.hostname}:8040","yarn.timeline-service.keytab":"/etc/krb5.keytab","mapreduce.reduce.shuffle.fetch.retry.timeout-ms":"30000","yarn.resourcemanager.rm.container-allocation.expiry-interval-ms":"600000","yarn.nodemanager.container-executor.exit-code-file.timeout-ms":"2000","mapreduce.fileoutputcommitter.algorithm.version":"1","yarn.resourcemanager.work-preserving-recovery.enabled":"true","mapreduce.map.skip.maxrecords":"0","yarn.sharedcache.root-dir":"/sharedcache","fs.s3a.retry.throttle.limit":"20","hadoop.http.authentication.type":"simple","fs.viewfs.overload.scheme.target.oss.impl":"org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem","mapreduce.job.cache.limit.max-resources":"0","mapreduce.task.userlog.limit.kb":"0","ipc.[port_number].weighted-cost.handler":"1","yarn.resourcemanager.scheduler.monitor.enable":"false","ipc.client.connect.max.retries":"10","hadoop.registry.zk.retry.times":"5","yarn.nodemanager.resource-monitor.interval-ms":"3000","yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices":"auto","mapreduce.job.sharedcache.mode":"disabled","yarn.nodemanager.webapp.rest-csrf.custom-header":"X-XSRF-Header","mapreduce.shuffle.listen.queue.size":"128","yarn.scheduler.configuration.mutation.acl-policy.class":"org.apache.hadoop.yarn.server.resourcemanager.scheduler.DefaultConfigurationMutationACLPolicy","mapreduce.map.cpu.vcores":"1","yarn.log-aggregation.file-formats":"TFile","yarn.timeline-service.client.fd-retain-secs":"300","fs.s3a.select.output.csv.field.delimiter":",","yarn.nodemanager.health-checker.timeout-ms":"1200000","hadoop.user.group.static.mapping.overrides":"dr.who=;","fs.azure.sas.expiry.period":"90d","fs.s3a.select.output.csv.record.delimiter":"\\n","mapreduce.jobhistory.recovery.store.class":"org.apache.hadoop.mapreduce.v2.hs.HistoryServerFileSystemStateStoreService","fs.viewfs.overload.scheme.target.https.impl":"org.apache.hadoop.fs.http.HttpsFileSystem","fs.s3a.s3guard.ddb.table.sse.enabled":"false","yarn.resourcemanager.fail-fast":"${yarn.fail-fast}","yarn.resourcemanager.proxy-user-privileges.enabled":"false","yarn.router.webapp.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.router.webapp.DefaultRequestInterceptorREST","yarn.nodemanager.resource.memory.cgroups.soft-limit-percentage":"90.0","mapreduce.job.reducer.preempt.delay.sec":"0","hadoop.util.hash.type":"murmur","yarn.nodemanager.disk-validator":"basic","yarn.app.mapreduce.client.job.max-retries":"3","fs.viewfs.overload.scheme.target.ftp.impl":"org.apache.hadoop.fs.ftp.FTPFileSystem","mapreduce.reduce.shuffle.retry-delay.max.ms":"60000","hadoop.security.group.mapping.ldap.connection.timeout.ms":"60000","mapreduce.task.profile.params":"-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s","yarn.app.mapreduce.shuffle.log.backups":"0","yarn.nodemanager.container-diagnostics-maximum-size":"10000","hadoop.registry.zk.retry.interval.ms":"1000","yarn.nodemanager.linux-container-executor.cgroups.delete-timeout-ms":"1000","fs.AbstractFileSystem.file.impl":"org.apache.hadoop.fs.local.LocalFs","yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds":"-1","mapreduce.jobhistory.cleaner.interval-ms":"86400000","hadoop.registry.zk.quorum":"localhost:2181","yarn.nodemanager.runtime.linux.runc.allowed-container-runtimes":"runc","mapreduce.output.fileoutputformat.compress":"false","yarn.resourcemanager.am-rm-tokens.master-key-rolling-interval-secs":"*********(redacted)","fs.s3a.assumed.role.session.duration":"30m","hadoop.security.group.mapping.ldap.conversion.rule":"none","hadoop.ssl.server.conf":"ssl-server.xml","fs.s3a.retry.throttle.interval":"100ms","seq.io.sort.factor":"100","fs.viewfs.overload.scheme.target.ofs.impl":"org.apache.hadoop.fs.ozone.RootedOzoneFileSystem","yarn.sharedcache.cleaner.initial-delay-mins":"10","mapreduce.client.completion.pollinterval":"5000","hadoop.ssl.keystores.factory.class":"org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory","yarn.app.mapreduce.am.resource.cpu-vcores":"1","yarn.timeline-service.enabled":"false","yarn.nodemanager.runtime.linux.docker.capabilities":"CHOWN,DAC_OVERRIDE,FSETID,FOWNER,MKNOD,NET_RAW,SETGID,SETUID,SETFCAP,SETPCAP,NET_BIND_SERVICE,SYS_CHROOT,KILL,AUDIT_WRITE","yarn.acl.enable":"false","yarn.timeline-service.entity-group-fs-store.done-dir":"/tmp/entity-file-history/done/","hadoop.security.group.mapping.ldap.num.attempts.before.failover":"3","mapreduce.task.profile":"false","hadoop.prometheus.endpoint.enabled":"false","yarn.resourcemanager.fs.state-store.uri":"${hadoop.tmp.dir}/yarn/system/rmstore","mapreduce.jobhistory.always-scan-user-dir":"false","fs.s3a.metadatastore.metadata.ttl":"15m","yarn.nodemanager.opportunistic-containers-use-pause-for-preemption":"false","yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user":"nobody","yarn.timeline-service.reader.class":"org.apache.hadoop.yarn.server.timelineservice.storage.HBaseTimelineReaderImpl","yarn.resourcemanager.configuration.provider-class":"org.apache.hadoop.yarn.LocalConfigurationProvider","yarn.nodemanager.runtime.linux.docker.userremapping-uid-threshold":"1","yarn.resourcemanager.configuration.file-system-based-store":"/yarn/conf","mapreduce.job.cache.limit.max-single-resource-mb":"0","yarn.nodemanager.runtime.linux.docker.stop.grace-period":"10","yarn.resourcemanager.resource-profiles.source-file":"resource-profiles.json","mapreduce.job.dfs.storage.capacity.kill-limit-exceed":"false","yarn.nodemanager.resource.percentage-physical-cpu-limit":"100","mapreduce.jobhistory.client.thread-count":"10","tfile.fs.input.buffer.size":"262144","mapreduce.client.progressmonitor.pollinterval":"1000","yarn.nodemanager.log-dirs":"${yarn.log.dir}/userlogs","yarn.resourcemanager.opportunistic.max.container-allocation.per.am.heartbeat":"-1","fs.automatic.close":"true","yarn.resourcemanager.delegation-token-renewer.thread-retry-interval":"*********(redacted)","fs.s3a.select.input.csv.quote.character":"\"","yarn.nodemanager.hostname":"0.0.0.0","ipc.[port_number].cost-provider.impl":"org.apache.hadoop.ipc.DefaultCostProvider","yarn.nodemanager.runtime.linux.runc.manifest-to-resources-plugin":"org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.runc.HdfsManifestToResourcesPlugin","yarn.nodemanager.remote-app-log-dir-include-older":"true","yarn.nodemanager.resource.memory.cgroups.swappiness":"0","ftp.stream-buffer-size":"4096","yarn.fail-fast":"false","yarn.nodemanager.runtime.linux.runc.layer-mounts-to-keep":"100","yarn.timeline-service.app-aggregation-interval-secs":"15","hadoop.security.group.mapping.ldap.search.filter.user":"(&(objectClass=user)(sAMAccountName={0}))","ipc.[port_number].weighted-cost.lockshared":"10","yarn.nodemanager.container-localizer.log.level":"INFO","yarn.timeline-service.address":"${yarn.timeline-service.hostname}:10200","mapreduce.job.ubertask.maxmaps":"9","fs.s3a.threads.keepalivetime":"60","mapreduce.jobhistory.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","mapreduce.task.files.preserve.failedtasks":"false","yarn.app.mapreduce.client.job.retry-interval":"2000","ha.failover-controller.graceful-fence.connection.retries":"1","fs.s3a.select.output.csv.quote.escape.character":"\\\\","yarn.resourcemanager.delegation.token.max-lifetime":"*********(redacted)","hadoop.kerberos.keytab.login.autorenewal.enabled":"false","yarn.timeline-service.client.drain-entities.timeout.ms":"2000","yarn.nodemanager.resource-plugins.fpga.vendor-plugin.class":"org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga.IntelFpgaOpenclPlugin","yarn.resourcemanager.nodemanagers.heartbeat-interval-min-ms":"1000","yarn.timeline-service.entity-group-fs-store.summary-store":"org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore","mapreduce.reduce.cpu.vcores":"1","mapreduce.job.encrypted-intermediate-data.buffer.kb":"128","fs.client.resolve.remote.symlinks":"true","yarn.nodemanager.webapp.https.address":"0.0.0.0:8044","hadoop.http.cross-origin.allowed-origins":"*","mapreduce.job.encrypted-intermediate-data":"false","yarn.nodemanager.disk-health-checker.disk-utilization-threshold.enabled":"true","fs.s3a.executor.capacity":"16","yarn.timeline-service.entity-group-fs-store.retain-seconds":"604800","yarn.resourcemanager.metrics.runtime.buckets":"60,300,1440","yarn.timeline-service.generic-application-history.max-applications":"10000","yarn.nodemanager.local-dirs":"${hadoop.tmp.dir}/nm-local-dir","mapreduce.shuffle.connection-keep-alive.enable":"false","yarn.node-labels.configuration-type":"centralized","fs.s3a.path.style.access":"false","yarn.nodemanager.aux-services.mapreduce_shuffle.class":"org.apache.hadoop.mapred.ShuffleHandler","yarn.sharedcache.store.in-memory.staleness-period-mins":"10080","fs.adl.impl":"org.apache.hadoop.fs.adl.AdlFileSystem","yarn.resourcemanager.application.max-tags":"10","hadoop.domainname.resolver.impl":"org.apache.hadoop.net.DNSDomainNameResolver","yarn.resourcemanager.nodemanager.minimum.version":"NONE","mapreduce.jobhistory.webapp.xfs-filter.xframe-options":"SAMEORIGIN","yarn.app.mapreduce.am.staging-dir.erasurecoding.enabled":"false","net.topology.impl":"org.apache.hadoop.net.NetworkTopology","io.map.index.skip":"0","yarn.timeline-service.reader.webapp.https.address":"${yarn.timeline-service.webapp.https.address}","fs.ftp.data.connection.mode":"ACTIVE_LOCAL_DATA_CONNECTION_MODE","mapreduce.job.local-fs.single-disk-limit.check.kill-limit-exceed":"true","yarn.scheduler.maximum-allocation-vcores":"4","hadoop.http.cross-origin.allowed-headers":"X-Requested-With,Content-Type,Accept,Origin","yarn.nodemanager.log-aggregation.compression-type":"none","yarn.timeline-service.version":"1.0f","yarn.ipc.rpc.class":"org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC","mapreduce.reduce.maxattempts":"4","hadoop.security.dns.log-slow-lookups.enabled":"false","mapreduce.job.committer.setup.cleanup.needed":"true","hadoop.security.secure.random.impl":"org.apache.hadoop.crypto.random.OpensslSecureRandom","mapreduce.job.running.reduce.limit":"0","fs.s3a.select.errors.include.sql":"false","fs.s3a.connection.request.timeout":"0","ipc.maximum.response.length":"134217728","yarn.resourcemanager.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","mapreduce.job.token.tracking.ids.enabled":"*********(redacted)","hadoop.caller.context.max.size":"128","yarn.nodemanager.runtime.linux.docker.host-pid-namespace.allowed":"false","yarn.nodemanager.runtime.linux.docker.delayed-removal.allowed":"false","hadoop.registry.system.acls":"sasl:yarn@, sasl:mapred@, sasl:hdfs@","yarn.nodemanager.recovery.dir":"${hadoop.tmp.dir}/yarn-nm-recovery","fs.s3a.fast.upload.buffer":"disk","mapreduce.jobhistory.intermediate-done-dir":"${yarn.app.mapreduce.am.staging-dir}/history/done_intermediate","yarn.app.mapreduce.shuffle.log.separate":"true","yarn.log-aggregation.debug.filesize":"104857600","fs.s3a.max.total.tasks":"32","fs.s3a.readahead.range":"64K","hadoop.http.authentication.simple.anonymous.allowed":"true","fs.s3a.attempts.maximum":"20","hadoop.registry.zk.connection.timeout.ms":"15000","yarn.resourcemanager.delegation-token-renewer.thread-count":"*********(redacted)","yarn.resourcemanager.delegation-token-renewer.thread-timeout":"*********(redacted)","yarn.timeline-service.leveldb-timeline-store.start-time-write-cache-size":"10000","yarn.nodemanager.aux-services.manifest.reload-ms":"0","yarn.nodemanager.emit-container-events":"true","yarn.resourcemanager.resource-profiles.enabled":"false","yarn.timeline-service.hbase-schema.prefix":"prod.","fs.azure.authorization":"false","mapreduce.map.log.level":"INFO","ha.failover-controller.active-standby-elector.zk.op.retries":"3","yarn.resourcemanager.decommissioning-nodes-watcher.poll-interval-secs":"20","mapreduce.output.fileoutputformat.compress.type":"RECORD","yarn.resourcemanager.leveldb-state-store.path":"${hadoop.tmp.dir}/yarn/system/rmstore","yarn.timeline-service.webapp.rest-csrf.custom-header":"X-XSRF-Header","mapreduce.ifile.readahead.bytes":"4194304","yarn.sharedcache.app-checker.class":"org.apache.hadoop.yarn.server.sharedcachemanager.RemoteAppChecker","yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users":"true","yarn.nodemanager.resource.detect-hardware-capabilities":"false","mapreduce.cluster.acls.enabled":"false","mapreduce.job.speculative.retry-after-no-speculate":"1000","fs.viewfs.overload.scheme.target.abfs.impl":"org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem","hadoop.security.group.mapping.ldap.search.group.hierarchy.levels":"0","yarn.resourcemanager.fs.state-store.retry-interval-ms":"1000","file.stream-buffer-size":"4096","yarn.resourcemanager.application-timeouts.monitor.interval-ms":"3000","mapreduce.map.output.compress.codec":"org.apache.hadoop.io.compress.DefaultCodec","mapreduce.map.speculative":"true","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin.hdfs-hash-file":"/runc-root/image-tag-to-hash","mapreduce.job.speculative.retry-after-speculate":"15000","yarn.nodemanager.linux-container-executor.cgroups.mount":"false","yarn.app.mapreduce.am.container.log.backups":"0","yarn.app.mapreduce.am.log.level":"INFO","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin":"org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.runc.ImageTagToManifestPlugin","io.bytes.per.checksum":"512","mapreduce.job.reduce.slowstart.completedmaps":"0.05","yarn.timeline-service.http-authentication.type":"simple","hadoop.security.group.mapping.ldap.search.attr.group.name":"cn","yarn.nodemanager.resource-plugins.fpga.allowed-fpga-devices":"auto","yarn.timeline-service.client.internal-timers-ttl-secs":"420","fs.s3a.select.output.csv.quote.character":"\"","hadoop.http.logs.enabled":"true","fs.s3a.block.size":"32M","yarn.sharedcache.client-server.address":"0.0.0.0:8045","yarn.nodemanager.logaggregation.threadpool-size-max":"100","yarn.resourcemanager.hostname":"0.0.0.0","yarn.resourcemanager.delegation.key.update-interval":"86400000","mapreduce.reduce.shuffle.fetch.retry.enabled":"${yarn.nodemanager.recovery.enabled}","mapreduce.map.memory.mb":"-1","mapreduce.task.skip.start.attempts":"2","fs.AbstractFileSystem.hdfs.impl":"org.apache.hadoop.fs.Hdfs","yarn.nodemanager.disk-health-checker.enable":"true","fs.s3a.select.output.csv.quote.fields":"always","ipc.client.tcpnodelay":"true","ipc.client.rpc-timeout.ms":"0","yarn.nodemanager.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","yarn.resourcemanager.delegation-token-renewer.thread-retry-max-attempts":"*********(redacted)","ipc.client.low-latency":"false","mapreduce.input.lineinputformat.linespermap":"1","yarn.router.interceptor.user.threadpool-size":"5","ipc.client.connect.max.retries.on.timeouts":"45","yarn.timeline-service.leveldb-timeline-store.read-cache-size":"104857600","fs.AbstractFileSystem.har.impl":"org.apache.hadoop.fs.HarFs","mapreduce.job.split.metainfo.maxsize":"10000000","yarn.am.liveness-monitor.expiry-interval-ms":"600000","yarn.resourcemanager.container-tokens.master-key-rolling-interval-secs":"*********(redacted)","yarn.timeline-service.entity-group-fs-store.app-cache-size":"10","yarn.nodemanager.runtime.linux.runc.hdfs-manifest-to-resources-plugin.stat-cache-timeout-interval-secs":"360","fs.s3a.socket.recv.buffer":"8192","yarn.resourcemanager.resource-tracker.address":"${yarn.resourcemanager.hostname}:8031","yarn.nodemanager.node-labels.provider.fetch-timeout-ms":"1200000","mapreduce.job.heap.memory-mb.ratio":"0.8","yarn.resourcemanager.leveldb-state-store.compaction-interval-secs":"3600","yarn.resourcemanager.webapp.rest-csrf.custom-header":"X-XSRF-Header","yarn.nodemanager.pluggable-device-framework.enabled":"false","yarn.scheduler.configuration.fs.path":"file://${hadoop.tmp.dir}/yarn/system/schedconf","mapreduce.client.output.filter":"FAILED","hadoop.http.filter.initializers":"org.apache.hadoop.http.lib.StaticUserWebFilter","mapreduce.reduce.memory.mb":"-1","yarn.timeline-service.hostname":"0.0.0.0","file.replication":"1","yarn.nodemanager.container-metrics.unregister-delay-ms":"10000","yarn.nodemanager.container-metrics.period-ms":"-1","mapreduce.fileoutputcommitter.task.cleanup.enabled":"false","yarn.nodemanager.log.retain-seconds":"10800","yarn.timeline-service.entity-group-fs-store.cleaner-interval-seconds":"3600","ipc.[port_number].callqueue.impl":"java.util.concurrent.LinkedBlockingQueue","yarn.resourcemanager.keytab":"/etc/krb5.keytab","hadoop.security.group.mapping.providers.combined":"true","mapreduce.reduce.merge.inmem.threshold":"1000","yarn.timeline-service.recovery.enabled":"false","fs.azure.saskey.usecontainersaskeyforallaccess":"true","yarn.sharedcache.nm.uploader.thread-count":"20","yarn.resourcemanager.nodemanager-graceful-decommission-timeout-secs":"3600","ipc.[port_number].weighted-cost.lockfree":"1","mapreduce.shuffle.ssl.enabled":"false","yarn.timeline-service.hbase.coprocessor.app-final-value-retention-milliseconds":"259200000","yarn.nodemanager.opportunistic-containers-max-queue-length":"0","yarn.resourcemanager.state-store.max-completed-applications":"${yarn.resourcemanager.max-completed-applications}","mapreduce.job.speculative.minimum-allowed-tasks":"10","fs.s3a.aws.credentials.provider":"\n org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider,\n org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider,\n com.amazonaws.auth.EnvironmentVariableCredentialsProvider,\n org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider\n ","yarn.log-aggregation.retain-seconds":"-1","yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb":"0","mapreduce.jobhistory.max-age-ms":"604800000","hadoop.http.cross-origin.allowed-methods":"GET,POST,HEAD","yarn.resourcemanager.opportunistic-container-allocation.enabled":"false","mapreduce.jobhistory.webapp.address":"0.0.0.0:19888","hadoop.system.tags":"YARN,HDFS,NAMENODE,DATANODE,REQUIRED,SECURITY,KERBEROS,PERFORMANCE,CLIENT\n ,SERVER,DEBUG,DEPRECATED,COMMON,OPTIONAL","yarn.log-aggregation.file-controller.TFile.class":"org.apache.hadoop.yarn.logaggregation.filecontroller.tfile.LogAggregationTFileController","yarn.client.nodemanager-connect.max-wait-ms":"180000","yarn.resourcemanager.webapp.address":"${yarn.resourcemanager.hostname}:8088","mapreduce.jobhistory.recovery.enable":"false","mapreduce.reduce.shuffle.parallelcopies":"5","fs.AbstractFileSystem.webhdfs.impl":"org.apache.hadoop.fs.WebHdfs","fs.trash.interval":"0","yarn.app.mapreduce.client.max-retries":"3","hadoop.security.authentication":"simple","mapreduce.task.profile.reduce.params":"${mapreduce.task.profile.params}","yarn.app.mapreduce.am.resource.mb":"1536","mapreduce.input.fileinputformat.list-status.num-threads":"1","yarn.nodemanager.container-executor.class":"org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor","io.mapfile.bloom.size":"1048576","yarn.timeline-service.ttl-ms":"604800000","yarn.resourcemanager.nm-container-queuing.min-queue-length":"5","yarn.nodemanager.resource.cpu-vcores":"-1","mapreduce.job.reduces":"1","fs.s3a.multipart.size":"64M","fs.s3a.select.input.csv.comment.marker":"#","yarn.scheduler.minimum-allocation-vcores":"1","mapreduce.job.speculative.speculative-cap-total-tasks":"0.01","hadoop.ssl.client.conf":"ssl-client.xml","mapreduce.job.queuename":"default","mapreduce.job.encrypted-intermediate-data-key-size-bits":"128","fs.s3a.metadatastore.authoritative":"false","ipc.[port_number].weighted-cost.response":"1","yarn.nodemanager.webapp.xfs-filter.xframe-options":"SAMEORIGIN","ha.health-monitor.sleep-after-disconnect.ms":"1000","yarn.app.mapreduce.shuffle.log.limit.kb":"0","hadoop.security.group.mapping":"org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback","yarn.client.application-client-protocol.poll-timeout-ms":"-1","mapreduce.jobhistory.jhist.format":"binary","mapreduce.task.stuck.timeout-ms":"600000","yarn.resourcemanager.application.max-tag.length":"100","yarn.resourcemanager.ha.enabled":"false","dfs.client.ignore.namenode.default.kms.uri":"false","hadoop.http.staticuser.user":"dr.who","mapreduce.task.exit.timeout.check-interval-ms":"20000","mapreduce.jobhistory.intermediate-user-done-dir.permissions":"770","mapreduce.task.exit.timeout":"60000","yarn.nodemanager.linux-container-executor.resources-handler.class":"org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler","mapreduce.reduce.shuffle.memory.limit.percent":"0.25","yarn.resourcemanager.reservation-system.enable":"false","mapreduce.map.output.compress":"false","ha.zookeeper.acl":"world:anyone:rwcda","ipc.server.max.connections":"0","yarn.nodemanager.runtime.linux.docker.default-container-network":"host","yarn.router.webapp.address":"0.0.0.0:8089","yarn.scheduler.maximum-allocation-mb":"8192","yarn.resourcemanager.scheduler.monitor.policies":"org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy","yarn.sharedcache.cleaner.period-mins":"1440","yarn.nodemanager.resource-plugins.gpu.docker-plugin.nvidia-docker-v1.endpoint":"http://localhost:3476/v1.0/docker/cli","yarn.app.mapreduce.am.container.log.limit.kb":"0","ipc.client.connect.retry.interval":"1000","yarn.timeline-service.http-cross-origin.enabled":"false","fs.wasbs.impl":"org.apache.hadoop.fs.azure.NativeAzureFileSystem$Secure","yarn.resourcemanager.nodemanagers.heartbeat-interval-max-ms":"1000","yarn.federation.subcluster-resolver.class":"org.apache.hadoop.yarn.server.federation.resolver.DefaultSubClusterResolverImpl","yarn.resourcemanager.zk-state-store.parent-path":"/rmstore","fs.s3a.select.input.csv.field.delimiter":",","mapreduce.jobhistory.cleaner.enable":"true","yarn.timeline-service.client.fd-flush-interval-secs":"10","hadoop.security.kms.client.encrypted.key.cache.expiry":"43200000","yarn.client.nodemanager-client-async.thread-pool-max-size":"500","mapreduce.map.maxattempts":"4","yarn.resourcemanager.nm-container-queuing.sorting-nodes-interval-ms":"1000","fs.s3a.committer.staging.tmp.path":"tmp/staging","yarn.nodemanager.sleep-delay-before-sigkill.ms":"250","yarn.resourcemanager.nm-container-queuing.min-queue-wait-time-ms":"10","mapreduce.job.end-notification.retry.attempts":"0","yarn.nodemanager.resource.count-logical-processors-as-cores":"false","hadoop.registry.zk.root":"/registry","adl.feature.ownerandgroup.enableupn":"false","yarn.resourcemanager.zk-max-znode-size.bytes":"1048576","mapreduce.job.reduce.shuffle.consumer.plugin.class":"org.apache.hadoop.mapreduce.task.reduce.Shuffle","yarn.resourcemanager.delayed.delegation-token.removal-interval-ms":"*********(redacted)","yarn.nodemanager.localizer.cache.target-size-mb":"10240","fs.s3a.committer.staging.conflict-mode":"append","mapreduce.client.libjars.wildcard":"true","fs.s3a.committer.staging.unique-filenames":"true","yarn.nodemanager.node-attributes.provider.fetch-timeout-ms":"1200000","fs.s3a.list.version":"2","ftp.client-write-packet-size":"65536","ipc.[port_number].weighted-cost.lockexclusive":"100","fs.AbstractFileSystem.adl.impl":"org.apache.hadoop.fs.adl.Adl","yarn.nodemanager.container-log-monitor.enable":"false","hadoop.security.key.default.cipher":"AES/CTR/NoPadding","yarn.client.failover-retries":"0","fs.s3a.multipart.purge.age":"86400","mapreduce.job.local-fs.single-disk-limit.check.interval-ms":"5000","net.topology.node.switch.mapping.impl":"org.apache.hadoop.net.ScriptBasedMapping","yarn.nodemanager.amrmproxy.address":"0.0.0.0:8049","ipc.server.listen.queue.size":"256","ipc.[port_number].decay-scheduler.period-ms":"5000","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin.cache-refresh-interval-secs":"60","map.sort.class":"org.apache.hadoop.util.QuickSort","fs.viewfs.rename.strategy":"SAME_MOUNTPOINT","hadoop.security.kms.client.authentication.retry-count":"1","fs.permissions.umask-mode":"022","fs.s3a.assumed.role.credentials.provider":"org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider","yarn.nodemanager.runtime.linux.runc.privileged-containers.allowed":"false","yarn.nodemanager.vmem-check-enabled":"true","yarn.nodemanager.numa-awareness.enabled":"false","yarn.nodemanager.recovery.compaction-interval-secs":"3600","yarn.app.mapreduce.client-am.ipc.max-retries":"3","yarn.federation.registry.base-dir":"yarnfederation/","yarn.nodemanager.health-checker.run-before-startup":"false","mapreduce.job.max.map":"-1","mapreduce.job.local-fs.single-disk-limit.bytes":"-1","mapreduce.shuffle.pathcache.concurrency-level":"16","mapreduce.job.ubertask.maxreduces":"1","mapreduce.shuffle.pathcache.max-weight":"10485760","hadoop.security.kms.client.encrypted.key.cache.size":"500","hadoop.security.java.secure.random.algorithm":"SHA1PRNG","ha.failover-controller.cli-check.rpc-timeout.ms":"20000","mapreduce.jobhistory.jobname.limit":"50","fs.s3a.select.input.compression":"none","yarn.client.nodemanager-connect.retry-interval-ms":"10000","ipc.[port_number].scheduler.priority.levels":"4","yarn.timeline-service.state-store-class":"org.apache.hadoop.yarn.server.timeline.recovery.LeveldbTimelineStateStore","yarn.nodemanager.env-whitelist":"JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ","yarn.sharedcache.nested-level":"3","yarn.timeline-service.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","fs.azure.user.agent.prefix":"unknown","yarn.resourcemanager.zk-delegation-token-node.split-index":"*********(redacted)","yarn.nodemanager.numa-awareness.read-topology":"false","yarn.nodemanager.webapp.address":"${yarn.nodemanager.hostname}:8042","rpc.metrics.quantile.enable":"false","yarn.registry.class":"org.apache.hadoop.registry.client.impl.FSRegistryOperationsService","mapreduce.jobhistory.admin.acl":"*","yarn.resourcemanager.system-metrics-publisher.dispatcher.pool-size":"10","yarn.scheduler.queue-placement-rules":"user-group","hadoop.http.authentication.kerberos.keytab":"${user.home}/hadoop.keytab","yarn.resourcemanager.recovery.enabled":"false","fs.s3a.select.input.csv.header":"none","yarn.nodemanager.runtime.linux.runc.hdfs-manifest-to-resources-plugin.stat-cache-size":"500","yarn.timeline-service.webapp.rest-csrf.enabled":"false","yarn.nodemanager.disk-health-checker.min-free-space-per-disk-watermark-high-mb":"0"},"System Properties":{"java.io.tmpdir":"/var/folders/dm/1vhcj_l97j146n6mgr2cm9rw0000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Azul Systems, Inc.","java.vm.specification.version":"1.8","user.home":"/Users/lijunqing","file.encoding.pkg":"sun.io","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib","user.dir":"/Users/lijunqing/Code/stczwd/spark/dist","java.library.path":"/Users/lijunqing/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"aarch64","java.vm.version":"25.312-b07","jetty.git.hash":"526006ecfa3af7f1a27ef3a288e2bef7ea9dd7e8","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.8.0_312-b07","java.vm.info":"mixed mode","java.ext.dirs":"/Users/lijunqing/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"OpenJDK Runtime Environment","jdk.vendor.version":"Zulu 8.58.0.13-CA-macos-aarch64","file.separator":"/","java.class.version":"52.0","scala.usejavacp":"true","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"Asia/Shanghai","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"12.1","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","jdk.lang.Process.launchMechanism":"POSIX_SPAWN","user.country":"CN","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"zh","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://www.azul.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Azul Systems, Inc.","java.vendor.url.bug":"http://www.azul.com/support/","user.name":"lijunqing","java.vm.name":"OpenJDK 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --class org.apache.spark.repl.Main --name Spark shell spark-shell","java.home":"/Library/Java/JavaVirtualMachines/zulu-8.jdk/Contents/Home/jre","java.version":"1.8.0_312","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/lijunqing/Code/stczwd/spark/dist/jars/antlr4-runtime-4.8.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/janino-3.0.16.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/json4s-jackson_2.12-3.7.0-M11.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-hive_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/HikariCP-2.5.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/velocity-1.5.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hive-metastore-2.3.9.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/shims-0.9.23.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/commons-codec-1.15.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-catalyst_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/tink-1.6.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/httpclient-4.5.13.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-repl_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/paranamer-2.8.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/pickle-1.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/audience-annotations-0.5.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/lapack-2.2.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jakarta.annotation-api-1.3.5.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/netty-transport-4.1.72.Final.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/log4j-core-2.17.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-tags_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/arpack-2.2.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jersey-container-servlet-core-2.34.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/arrow-vector-6.0.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-yarn_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jersey-container-servlet-2.34.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/scala-reflect-2.12.15.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/objenesis-3.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/netty-handler-4.1.72.Final.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/commons-math3-3.4.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/blas-2.2.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hive-service-rpc-3.1.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/netty-transport-classes-epoll-4.1.72.Final.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spire-util_2.12-0.17.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/metrics-graphite-4.2.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/netty-tcnative-classes-2.0.46.Final.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/leveldbjni-all-1.8.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/datanucleus-rdbms-4.1.19.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jersey-common-2.34.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-unsafe_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/metrics-jvm-4.2.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-network-common_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/threeten-extra-1.5.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spire-platform_2.12-0.17.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-kvstore_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/commons-cli-1.5.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/cats-kernel_2.12-2.1.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/minlog-1.3.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jpam-1.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/log4j-1.2-api-2.17.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/flatbuffers-java-1.12.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/stax-api-1.0.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/commons-collections-3.2.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/netty-transport-native-unix-common-4.1.72.Final.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/json4s-scalap_2.12-3.7.0-M11.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jakarta.xml.bind-api-2.3.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hk2-utils-2.6.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/netty-codec-4.1.72.Final.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/antlr-runtime-3.5.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hive-jdbc-2.3.9.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/ivy-2.5.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hadoop-shaded-guava-1.1.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/json4s-core_2.12-3.7.0-M11.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jakarta.inject-2.6.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jackson-databind-2.13.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-hive-thriftserver_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/arrow-format-6.0.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/opencsv-2.3.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/derby-10.14.2.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/parquet-jackson-1.12.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/breeze-macros_2.12-1.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/JLargeArrays-1.5.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/javax.jdo-3.2.0-m3.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/istack-commons-runtime-3.0.8.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/bonecp-0.8.0.RELEASE.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/algebra_2.12-2.0.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spire-macros_2.12-0.17.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/orc-mapreduce-1.7.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/netty-transport-native-kqueue-4.1.72.Final-osx-aarch_64.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/json4s-ast_2.12-3.7.0-M11.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/parquet-column-1.12.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/commons-io-2.11.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/commons-net-3.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jackson-module-scala_2.12-2.13.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/commons-text-1.6.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jline-2.14.6.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hive-shims-scheduler-2.3.9.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/parquet-hadoop-1.12.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-streaming_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hadoop-client-runtime-3.3.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hive-shims-common-2.3.9.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/kryo-shaded-4.0.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/guava-14.0.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/shapeless_2.12-2.3.3.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jackson-core-asl-1.9.13.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/orc-shims-1.7.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hive-shims-2.3.9.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/stream-2.9.6.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/xbean-asm9-shaded-4.20.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/xz-1.8.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jdo-api-3.0.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/netty-buffer-4.1.72.Final.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/compress-lzf-1.0.3.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jackson-core-2.13.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jersey-server-2.34.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hive-common-2.3.9.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/commons-lang-2.6.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/lz4-java-1.8.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/macro-compat_2.12-1.1.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/arrow-memory-netty-6.0.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jakarta.validation-api-2.0.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/metrics-core-4.2.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/snappy-java-1.1.8.4.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/netty-resolver-4.1.72.Final.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/transaction-api-1.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/netty-common-4.1.72.Final.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/curator-framework-2.13.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/zookeeper-3.6.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hk2-api-2.6.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jodd-core-3.5.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-mllib-local_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/log4j-slf4j-impl-2.17.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hive-storage-api-2.7.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hive-exec-2.3.9-core.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/scala-compiler-2.12.15.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/netty-transport-native-kqueue-4.1.72.Final-osx-x86_64.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jakarta.ws.rs-api-2.1.6.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/httpcore-4.4.14.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hive-llap-common-2.3.9.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/curator-client-2.13.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/chill_2.12-0.10.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/javolution-5.5.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/log4j-api-2.17.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/zookeeper-jute-3.6.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/netty-transport-native-epoll-4.1.72.Final-linux-aarch_64.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/commons-lang3-3.12.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/zstd-jni-1.5.1-1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/scala-parser-combinators_2.12-1.1.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/json-1.8.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hive-shims-0.23-2.3.9.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/conf/":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/parquet-format-structures-1.12.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/protobuf-java-2.5.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/RoaringBitmap-0.9.23.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/arpack_combined_all-0.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/osgi-resource-locator-1.0.3.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/parquet-common-1.12.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jaxb-runtime-2.3.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/commons-compiler-3.0.16.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jsr305-3.0.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-sql_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-launcher_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/curator-recipes-2.13.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/commons-crypto-1.1.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hk2-locator-2.6.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jackson-annotations-2.13.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/scala-library-2.12.15.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/arrow-memory-core-6.0.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/metrics-json-4.2.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/core-1.1.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/activation-1.1.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/scala-xml_2.12-1.2.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jakarta.servlet-api-4.0.3.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-network-shuffle_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hadoop-client-api-3.3.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/netty-transport-native-epoll-4.1.72.Final-linux-x86_64.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/breeze_2.12-1.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-core_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/commons-compress-1.21.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/oro-2.0.8.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/commons-logging-1.1.3.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/super-csv-2.2.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/avro-1.11.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/htrace-core4-4.1.0-incubating.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/metrics-jmx-4.2.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/chill-java-0.10.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/JTransforms-3.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jcl-over-slf4j-1.7.32.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/commons-pool-1.5.4.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/slf4j-api-1.7.32.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-mllib_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-sketch_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/datanucleus-core-4.1.17.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/gson-2.2.4.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/avro-mapred-1.11.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spire_2.12-0.17.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/parquet-encoding-1.12.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jul-to-slf4j-1.7.32.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jackson-mapper-asl-1.9.13.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jersey-hk2-2.34.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/ST4-4.0.4.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/libfb303-0.9.3.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/aircompressor-0.21.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/scala-collection-compat_2.12-2.1.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/netty-all-4.1.72.Final.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/commons-dbcp-1.4.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hive-beeline-2.3.9.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/annotations-17.0.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hive-vector-code-gen-2.3.9.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/rocksdbjni-6.20.3.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/py4j-0.10.9.3.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/libthrift-0.12.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/orc-core-1.7.2.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hive-serde-2.3.9.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/spark-graphx_2.12-3.3.0-SNAPSHOT.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/datanucleus-api-jdo-4.2.4.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/javassist-3.25.0-GA.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jta-1.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/joda-time-2.10.12.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hadoop-yarn-server-web-proxy-3.3.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/hive-cli-2.3.9.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/aopalliance-repackaged-2.6.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/avro-ipc-1.11.0.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/netty-transport-classes-kqueue-4.1.72.Final.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/univocity-parsers-2.9.1.jar":"System Classpath","/Users/lijunqing/Code/stczwd/spark/dist/jars/jersey-client-2.34.jar":"System Classpath"}} +{"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"local-1642039451826","Timestamp":1642039450519,"User":"lijunqing"} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":0,"description":"count at :23","details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line15.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line15.$read$$iw$$iw$$iw$$iw.(:33)\n$line15.$read$$iw$$iw$$iw.(:35)\n$line15.$read$$iw$$iw.(:37)\n$line15.$read$$iw.(:39)\n$line15.$read.(:41)\n$line15.$read$.(:45)\n$line15.$read$.()\n$line15.$eval$.$print$lzycompute(:7)\n$line15.$eval$.$print(:6)\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","physicalPlanDescription":"== Physical Plan ==\nAdaptiveSparkPlan (7)\n+- HashAggregate (6)\n +- Exchange (5)\n +- HashAggregate (4)\n +- Exchange (3)\n +- Project (2)\n +- Range (1)\n\n\n(1) Range\nOutput [1]: [id#0L]\nArguments: Range (0, 100, step=1, splits=Some(8))\n\n(2) Project\nOutput: []\nInput [1]: [id#0L]\n\n(3) Exchange\nInput: []\nArguments: RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#15]\n\n(4) HashAggregate\nInput: []\nKeys: []\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count#8L]\nResults [1]: [count#9L]\n\n(5) Exchange\nInput [1]: [count#9L]\nArguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#19]\n\n(6) HashAggregate\nInput [1]: [count#9L]\nKeys: []\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#5L]\nResults [1]: [count(1)#5L AS count#6L]\n\n(7) AdaptiveSparkPlan\nOutput [1]: [count#6L]\nArguments: isFinalPlan=false\n\n","sparkPlanInfo":{"nodeName":"AdaptiveSparkPlan","simpleString":"AdaptiveSparkPlan isFinalPlan=false","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[], functions=[count(1)])","children":[{"nodeName":"Exchange","simpleString":"Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#19]","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[], functions=[partial_count(1)])","children":[{"nodeName":"Exchange","simpleString":"Exchange RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#15]","children":[{"nodeName":"Project","simpleString":"Project","children":[{"nodeName":"Range","simpleString":"Range (0, 100, step=1, splits=8)","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":36,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":34,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":35,"metricType":"nsTiming"},{"name":"records read","accumulatorId":32,"metricType":"sum"},{"name":"local bytes read","accumulatorId":30,"metricType":"size"},{"name":"fetch wait time","accumulatorId":31,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":28,"metricType":"size"},{"name":"local blocks read","accumulatorId":27,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":26,"metricType":"sum"},{"name":"data size","accumulatorId":24,"metricType":"size"},{"name":"number of partitions","accumulatorId":25,"metricType":"sum"},{"name":"remote bytes read to disk","accumulatorId":29,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":33,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":20,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":21,"metricType":"timing"},{"name":"peak memory","accumulatorId":19,"metricType":"size"},{"name":"number of output rows","accumulatorId":18,"metricType":"sum"},{"name":"number of sort fallback tasks","accumulatorId":23,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":22,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":16,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":17,"metricType":"nsTiming"},{"name":"records read","accumulatorId":14,"metricType":"sum"},{"name":"local bytes read","accumulatorId":12,"metricType":"size"},{"name":"fetch wait time","accumulatorId":13,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":10,"metricType":"size"},{"name":"local blocks read","accumulatorId":9,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":8,"metricType":"sum"},{"name":"data size","accumulatorId":6,"metricType":"size"},{"name":"number of partitions","accumulatorId":7,"metricType":"sum"},{"name":"remote bytes read to disk","accumulatorId":11,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":15,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":2,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":3,"metricType":"timing"},{"name":"peak memory","accumulatorId":1,"metricType":"size"},{"name":"number of output rows","accumulatorId":0,"metricType":"sum"},{"name":"number of sort fallback tasks","accumulatorId":5,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":4,"metricType":"average"}]}],"metadata":{},"metrics":[]},"time":1642039495581,"modifiedConfigs":{}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate","executionId":0,"physicalPlanDescription":"== Physical Plan ==\nAdaptiveSparkPlan (12)\n+- == Current Plan ==\n HashAggregate (7)\n +- Exchange (6)\n +- HashAggregate (5)\n +- ShuffleQueryStage (4)\n +- Exchange (3)\n +- * Project (2)\n +- * Range (1)\n+- == Initial Plan ==\n HashAggregate (11)\n +- Exchange (10)\n +- HashAggregate (9)\n +- Exchange (8)\n +- Project (2)\n +- Range (1)\n\n\n(1) Range [codegen id : 1]\nOutput [1]: [id#0L]\nArguments: Range (0, 100, step=1, splits=Some(8))\n\n(2) Project [codegen id : 1]\nOutput: []\nInput [1]: [id#0L]\n\n(3) Exchange\nInput: []\nArguments: RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#28]\n\n(4) ShuffleQueryStage\nOutput: []\nArguments: 0\n\n(5) HashAggregate\nInput: []\nKeys: []\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count#8L]\nResults [1]: [count#9L]\n\n(6) Exchange\nInput [1]: [count#9L]\nArguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#33]\n\n(7) HashAggregate\nInput [1]: [count#9L]\nKeys: []\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#5L]\nResults [1]: [count(1)#5L AS count#6L]\n\n(8) Exchange\nInput: []\nArguments: RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#15]\n\n(9) HashAggregate\nInput: []\nKeys: []\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count#8L]\nResults [1]: [count#9L]\n\n(10) Exchange\nInput [1]: [count#9L]\nArguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#19]\n\n(11) HashAggregate\nInput [1]: [count#9L]\nKeys: []\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#5L]\nResults [1]: [count(1)#5L AS count#6L]\n\n(12) AdaptiveSparkPlan\nOutput [1]: [count#6L]\nArguments: isFinalPlan=false\n\n","sparkPlanInfo":{"nodeName":"AdaptiveSparkPlan","simpleString":"AdaptiveSparkPlan isFinalPlan=false","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[], functions=[count(1)])","children":[{"nodeName":"Exchange","simpleString":"Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#33]","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[], functions=[partial_count(1)])","children":[{"nodeName":"ShuffleQueryStage","simpleString":"ShuffleQueryStage 0","children":[{"nodeName":"Exchange","simpleString":"Exchange RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#28]","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project","children":[{"nodeName":"Range","simpleString":"Range (0, 100, step=1, splits=8)","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":36,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":73,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":71,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":72,"metricType":"nsTiming"},{"name":"records read","accumulatorId":69,"metricType":"sum"},{"name":"local bytes read","accumulatorId":67,"metricType":"size"},{"name":"fetch wait time","accumulatorId":68,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":65,"metricType":"size"},{"name":"local blocks read","accumulatorId":64,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":63,"metricType":"sum"},{"name":"data size","accumulatorId":61,"metricType":"size"},{"name":"number of partitions","accumulatorId":62,"metricType":"sum"},{"name":"remote bytes read to disk","accumulatorId":66,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":70,"metricType":"size"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":57,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":58,"metricType":"timing"},{"name":"peak memory","accumulatorId":56,"metricType":"size"},{"name":"number of output rows","accumulatorId":55,"metricType":"sum"},{"name":"number of sort fallback tasks","accumulatorId":60,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":59,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":53,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":54,"metricType":"nsTiming"},{"name":"records read","accumulatorId":51,"metricType":"sum"},{"name":"local bytes read","accumulatorId":49,"metricType":"size"},{"name":"fetch wait time","accumulatorId":50,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":47,"metricType":"size"},{"name":"local blocks read","accumulatorId":46,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":45,"metricType":"sum"},{"name":"data size","accumulatorId":43,"metricType":"size"},{"name":"number of partitions","accumulatorId":44,"metricType":"sum"},{"name":"remote bytes read to disk","accumulatorId":48,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":52,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":39,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":40,"metricType":"timing"},{"name":"peak memory","accumulatorId":38,"metricType":"size"},{"name":"number of output rows","accumulatorId":37,"metricType":"sum"},{"name":"number of sort fallback tasks","accumulatorId":42,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":41,"metricType":"average"}]}],"metadata":{},"metrics":[]}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerDriverAccumUpdates","executionId":0,"accumUpdates":[[62,10]]} +{"Event":"SparkListenerJobStart","Job ID":0,"Submission Time":1642039496191,"Stage Infos":[{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":8,"RDD Info":[{"RDD ID":4,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[3],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"ParallelCollectionRDD","Scope":"{\"id\":\"1\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line15.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line15.$read$$iw$$iw$$iw$$iw.(:33)\n$line15.$read$$iw$$iw$$iw.(:35)\n$line15.$read$$iw$$iw.(:37)\n$line15.$read$$iw.(:39)\n$line15.$read.(:41)\n$line15.$read$.(:45)\n$line15.$read$.()\n$line15.$eval$.$print$lzycompute(:7)\n$line15.$eval$.$print(:6)\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[0],"Properties":{"spark.sql.warehouse.dir":"file:/Users/lijunqing/Code/stczwd/spark/dist/spark-warehouse","spark.executor.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"172.22.200.52","spark.eventLog.enabled":"true","spark.driver.port":"61038","spark.repl.class.uri":"spark://172.22.200.52:61038/classes","spark.jars":"","spark.repl.class.outputDir":"/private/var/folders/dm/1vhcj_l97j146n6mgr2cm9rw0000gp/T/spark-501ae231-0cb9-4ba2-845f-3fc3cb053141/repl-054c2c94-f7a2-4f4b-9f12-96a1cdb15bc6","spark.app.name":"Spark shell","spark.rdd.scope":"{\"id\":\"0\",\"name\":\"Exchange\"}","spark.rdd.scope.noOverride":"true","spark.submit.pyFiles":"","spark.ui.showConsoleProgress":"true","spark.app.startTime":"1642039450519","spark.executor.id":"driver","spark.driver.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"client","spark.master":"local[*]","spark.home":"/Users/lijunqing/Code/stczwd/spark/dist","spark.eventLog.dir":"/Users/lijunqing/Code/stczwd/spark/dist/eventLog","spark.sql.execution.id":"0","spark.sql.catalogImplementation":"hive","spark.app.id":"local-1642039451826"}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":8,"RDD Info":[{"RDD ID":4,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[3],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"ParallelCollectionRDD","Scope":"{\"id\":\"1\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line15.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line15.$read$$iw$$iw$$iw$$iw.(:33)\n$line15.$read$$iw$$iw$$iw.(:35)\n$line15.$read$$iw$$iw.(:37)\n$line15.$read$$iw.(:39)\n$line15.$read.(:41)\n$line15.$read$.(:45)\n$line15.$read$.()\n$line15.$eval$.$print$lzycompute(:7)\n$line15.$eval$.$print(:6)\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Submission Time":1642039496205,"Accumulables":[],"Resource Profile Id":0},"Properties":{"spark.sql.warehouse.dir":"file:/Users/lijunqing/Code/stczwd/spark/dist/spark-warehouse","spark.executor.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"172.22.200.52","spark.eventLog.enabled":"true","spark.driver.port":"61038","spark.repl.class.uri":"spark://172.22.200.52:61038/classes","spark.jars":"","spark.repl.class.outputDir":"/private/var/folders/dm/1vhcj_l97j146n6mgr2cm9rw0000gp/T/spark-501ae231-0cb9-4ba2-845f-3fc3cb053141/repl-054c2c94-f7a2-4f4b-9f12-96a1cdb15bc6","spark.app.name":"Spark shell","spark.rdd.scope":"{\"id\":\"0\",\"name\":\"Exchange\"}","spark.rdd.scope.noOverride":"true","spark.submit.pyFiles":"","spark.ui.showConsoleProgress":"true","spark.app.startTime":"1642039450519","spark.executor.id":"driver","spark.driver.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"client","spark.master":"local[*]","spark.home":"/Users/lijunqing/Code/stczwd/spark/dist","spark.eventLog.dir":"/Users/lijunqing/Code/stczwd/spark/dist/eventLog","spark.sql.execution.id":"0","spark.sql.catalogImplementation":"hive","spark.app.id":"local-1642039451826"}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Partition ID":0,"Launch Time":1642039496413,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Partition ID":1,"Launch Time":1642039496425,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Partition ID":2,"Launch Time":1642039496425,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Partition ID":3,"Launch Time":1642039496425,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Partition ID":4,"Launch Time":1642039496426,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Partition ID":5,"Launch Time":1642039496426,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Partition ID":6,"Launch Time":1642039496427,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Partition ID":7,"Launch Time":1642039496427,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Partition ID":6,"Launch Time":1642039496427,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039496888,"Failed":false,"Killed":false,"Accumulables":[{"ID":36,"Name":"number of output rows","Update":"12","Value":"12","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":61,"Name":"data size","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":70,"Name":"shuffle bytes written","Update":"468","Value":"468","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":71,"Name":"shuffle records written","Update":"12","Value":"12","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":72,"Name":"shuffle write time","Update":"32402787","Value":"32402787","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":73,"Name":"duration","Update":"27","Value":"27","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.executorDeserializeTime","Update":277,"Value":277,"Internal":true,"Count Failed Values":true},{"ID":75,"Name":"internal.metrics.executorDeserializeCpuTime","Update":131261000,"Value":131261000,"Internal":true,"Count Failed Values":true},{"ID":76,"Name":"internal.metrics.executorRunTime","Update":158,"Value":158,"Internal":true,"Count Failed Values":true},{"ID":77,"Name":"internal.metrics.executorCpuTime","Update":75295000,"Value":75295000,"Internal":true,"Count Failed Values":true},{"ID":78,"Name":"internal.metrics.resultSize","Update":1836,"Value":1836,"Internal":true,"Count Failed Values":true},{"ID":79,"Name":"internal.metrics.jvmGCTime","Update":11,"Value":11,"Internal":true,"Count Failed Values":true},{"ID":80,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":92,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":468,"Value":468,"Internal":true,"Count Failed Values":true},{"ID":93,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":12,"Value":12,"Internal":true,"Count Failed Values":true},{"ID":94,"Name":"internal.metrics.shuffle.write.writeTime","Update":32402787,"Value":32402787,"Internal":true,"Count Failed Values":true},{"ID":96,"Name":"internal.metrics.input.recordsRead","Update":12,"Value":12,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":319351248,"JVMOffHeapMemory":183565360,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":18842,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":18842,"OffHeapUnifiedMemory":0,"DirectPoolMemory":3897,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":13,"MinorGCTime":120,"MajorGCCount":4,"MajorGCTime":353,"TotalGCTime":473},"Task Metrics":{"Executor Deserialize Time":277,"Executor Deserialize CPU Time":131261000,"Executor Run Time":158,"Executor CPU Time":75295000,"Peak Execution Memory":0,"Result Size":1836,"JVM GC Time":11,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":468,"Shuffle Write Time":32402787,"Shuffle Records Written":12},"Input Metrics":{"Bytes Read":0,"Records Read":12},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Partition ID":5,"Launch Time":1642039496426,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039496891,"Failed":false,"Killed":false,"Accumulables":[{"ID":36,"Name":"number of output rows","Update":"13","Value":"25","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":61,"Name":"data size","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":70,"Name":"shuffle bytes written","Update":"472","Value":"940","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":71,"Name":"shuffle records written","Update":"13","Value":"25","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":72,"Name":"shuffle write time","Update":"31370834","Value":"63773621","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":73,"Name":"duration","Update":"27","Value":"54","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.executorDeserializeTime","Update":277,"Value":554,"Internal":true,"Count Failed Values":true},{"ID":75,"Name":"internal.metrics.executorDeserializeCpuTime","Update":156363000,"Value":287624000,"Internal":true,"Count Failed Values":true},{"ID":76,"Name":"internal.metrics.executorRunTime","Update":158,"Value":316,"Internal":true,"Count Failed Values":true},{"ID":77,"Name":"internal.metrics.executorCpuTime","Update":60658000,"Value":135953000,"Internal":true,"Count Failed Values":true},{"ID":78,"Name":"internal.metrics.resultSize","Update":1836,"Value":3672,"Internal":true,"Count Failed Values":true},{"ID":79,"Name":"internal.metrics.jvmGCTime","Update":11,"Value":22,"Internal":true,"Count Failed Values":true},{"ID":80,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":92,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":472,"Value":940,"Internal":true,"Count Failed Values":true},{"ID":93,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":13,"Value":25,"Internal":true,"Count Failed Values":true},{"ID":94,"Name":"internal.metrics.shuffle.write.writeTime","Update":31370834,"Value":63773621,"Internal":true,"Count Failed Values":true},{"ID":96,"Name":"internal.metrics.input.recordsRead","Update":13,"Value":25,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":319351248,"JVMOffHeapMemory":183565360,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":18842,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":18842,"OffHeapUnifiedMemory":0,"DirectPoolMemory":3897,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":13,"MinorGCTime":120,"MajorGCCount":4,"MajorGCTime":353,"TotalGCTime":473},"Task Metrics":{"Executor Deserialize Time":277,"Executor Deserialize CPU Time":156363000,"Executor Run Time":158,"Executor CPU Time":60658000,"Peak Execution Memory":0,"Result Size":1836,"JVM GC Time":11,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":472,"Shuffle Write Time":31370834,"Shuffle Records Written":13},"Input Metrics":{"Bytes Read":0,"Records Read":13},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Partition ID":4,"Launch Time":1642039496426,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039496892,"Failed":false,"Killed":false,"Accumulables":[{"ID":36,"Name":"number of output rows","Update":"12","Value":"37","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":61,"Name":"data size","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":70,"Name":"shuffle bytes written","Update":"468","Value":"1408","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":71,"Name":"shuffle records written","Update":"12","Value":"37","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":72,"Name":"shuffle write time","Update":"32008329","Value":"95781950","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":73,"Name":"duration","Update":"27","Value":"81","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.executorDeserializeTime","Update":277,"Value":831,"Internal":true,"Count Failed Values":true},{"ID":75,"Name":"internal.metrics.executorDeserializeCpuTime","Update":143825000,"Value":431449000,"Internal":true,"Count Failed Values":true},{"ID":76,"Name":"internal.metrics.executorRunTime","Update":158,"Value":474,"Internal":true,"Count Failed Values":true},{"ID":77,"Name":"internal.metrics.executorCpuTime","Update":42837000,"Value":178790000,"Internal":true,"Count Failed Values":true},{"ID":78,"Name":"internal.metrics.resultSize","Update":1836,"Value":5508,"Internal":true,"Count Failed Values":true},{"ID":79,"Name":"internal.metrics.jvmGCTime","Update":11,"Value":33,"Internal":true,"Count Failed Values":true},{"ID":80,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":92,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":468,"Value":1408,"Internal":true,"Count Failed Values":true},{"ID":93,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":12,"Value":37,"Internal":true,"Count Failed Values":true},{"ID":94,"Name":"internal.metrics.shuffle.write.writeTime","Update":32008329,"Value":95781950,"Internal":true,"Count Failed Values":true},{"ID":96,"Name":"internal.metrics.input.recordsRead","Update":12,"Value":37,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":319351248,"JVMOffHeapMemory":183565360,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":18842,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":18842,"OffHeapUnifiedMemory":0,"DirectPoolMemory":3897,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":13,"MinorGCTime":120,"MajorGCCount":4,"MajorGCTime":353,"TotalGCTime":473},"Task Metrics":{"Executor Deserialize Time":277,"Executor Deserialize CPU Time":143825000,"Executor Run Time":158,"Executor CPU Time":42837000,"Peak Execution Memory":0,"Result Size":1836,"JVM GC Time":11,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":468,"Shuffle Write Time":32008329,"Shuffle Records Written":12},"Input Metrics":{"Bytes Read":0,"Records Read":12},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Partition ID":2,"Launch Time":1642039496425,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039496892,"Failed":false,"Killed":false,"Accumulables":[{"ID":36,"Name":"number of output rows","Update":"12","Value":"49","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":61,"Name":"data size","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":70,"Name":"shuffle bytes written","Update":"468","Value":"1876","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":71,"Name":"shuffle records written","Update":"12","Value":"49","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":72,"Name":"shuffle write time","Update":"31530251","Value":"127312201","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":73,"Name":"duration","Update":"27","Value":"108","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.executorDeserializeTime","Update":277,"Value":1108,"Internal":true,"Count Failed Values":true},{"ID":75,"Name":"internal.metrics.executorDeserializeCpuTime","Update":144165000,"Value":575614000,"Internal":true,"Count Failed Values":true},{"ID":76,"Name":"internal.metrics.executorRunTime","Update":158,"Value":632,"Internal":true,"Count Failed Values":true},{"ID":77,"Name":"internal.metrics.executorCpuTime","Update":47904000,"Value":226694000,"Internal":true,"Count Failed Values":true},{"ID":78,"Name":"internal.metrics.resultSize","Update":1836,"Value":7344,"Internal":true,"Count Failed Values":true},{"ID":79,"Name":"internal.metrics.jvmGCTime","Update":11,"Value":44,"Internal":true,"Count Failed Values":true},{"ID":80,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":4,"Internal":true,"Count Failed Values":true},{"ID":92,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":468,"Value":1876,"Internal":true,"Count Failed Values":true},{"ID":93,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":12,"Value":49,"Internal":true,"Count Failed Values":true},{"ID":94,"Name":"internal.metrics.shuffle.write.writeTime","Update":31530251,"Value":127312201,"Internal":true,"Count Failed Values":true},{"ID":96,"Name":"internal.metrics.input.recordsRead","Update":12,"Value":49,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":319351248,"JVMOffHeapMemory":183565360,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":18842,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":18842,"OffHeapUnifiedMemory":0,"DirectPoolMemory":3897,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":13,"MinorGCTime":120,"MajorGCCount":4,"MajorGCTime":353,"TotalGCTime":473},"Task Metrics":{"Executor Deserialize Time":277,"Executor Deserialize CPU Time":144165000,"Executor Run Time":158,"Executor CPU Time":47904000,"Peak Execution Memory":0,"Result Size":1836,"JVM GC Time":11,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":468,"Shuffle Write Time":31530251,"Shuffle Records Written":12},"Input Metrics":{"Bytes Read":0,"Records Read":12},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Partition ID":0,"Launch Time":1642039496413,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039496893,"Failed":false,"Killed":false,"Accumulables":[{"ID":36,"Name":"number of output rows","Update":"12","Value":"61","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":61,"Name":"data size","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":70,"Name":"shuffle bytes written","Update":"468","Value":"2344","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":71,"Name":"shuffle records written","Update":"12","Value":"61","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":72,"Name":"shuffle write time","Update":"33968833","Value":"161281034","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":73,"Name":"duration","Update":"27","Value":"135","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.executorDeserializeTime","Update":277,"Value":1385,"Internal":true,"Count Failed Values":true},{"ID":75,"Name":"internal.metrics.executorDeserializeCpuTime","Update":142074000,"Value":717688000,"Internal":true,"Count Failed Values":true},{"ID":76,"Name":"internal.metrics.executorRunTime","Update":158,"Value":790,"Internal":true,"Count Failed Values":true},{"ID":77,"Name":"internal.metrics.executorCpuTime","Update":48671000,"Value":275365000,"Internal":true,"Count Failed Values":true},{"ID":78,"Name":"internal.metrics.resultSize","Update":1836,"Value":9180,"Internal":true,"Count Failed Values":true},{"ID":79,"Name":"internal.metrics.jvmGCTime","Update":11,"Value":55,"Internal":true,"Count Failed Values":true},{"ID":80,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":5,"Internal":true,"Count Failed Values":true},{"ID":92,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":468,"Value":2344,"Internal":true,"Count Failed Values":true},{"ID":93,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":12,"Value":61,"Internal":true,"Count Failed Values":true},{"ID":94,"Name":"internal.metrics.shuffle.write.writeTime","Update":33968833,"Value":161281034,"Internal":true,"Count Failed Values":true},{"ID":96,"Name":"internal.metrics.input.recordsRead","Update":12,"Value":61,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":319351248,"JVMOffHeapMemory":183565360,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":18842,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":18842,"OffHeapUnifiedMemory":0,"DirectPoolMemory":3897,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":13,"MinorGCTime":120,"MajorGCCount":4,"MajorGCTime":353,"TotalGCTime":473},"Task Metrics":{"Executor Deserialize Time":277,"Executor Deserialize CPU Time":142074000,"Executor Run Time":158,"Executor CPU Time":48671000,"Peak Execution Memory":0,"Result Size":1836,"JVM GC Time":11,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":468,"Shuffle Write Time":33968833,"Shuffle Records Written":12},"Input Metrics":{"Bytes Read":0,"Records Read":12},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Partition ID":1,"Launch Time":1642039496425,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039496893,"Failed":false,"Killed":false,"Accumulables":[{"ID":36,"Name":"number of output rows","Update":"13","Value":"74","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":61,"Name":"data size","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":70,"Name":"shuffle bytes written","Update":"472","Value":"2816","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":71,"Name":"shuffle records written","Update":"13","Value":"74","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":72,"Name":"shuffle write time","Update":"32707462","Value":"193988496","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":73,"Name":"duration","Update":"27","Value":"162","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.executorDeserializeTime","Update":277,"Value":1662,"Internal":true,"Count Failed Values":true},{"ID":75,"Name":"internal.metrics.executorDeserializeCpuTime","Update":141303000,"Value":858991000,"Internal":true,"Count Failed Values":true},{"ID":76,"Name":"internal.metrics.executorRunTime","Update":158,"Value":948,"Internal":true,"Count Failed Values":true},{"ID":77,"Name":"internal.metrics.executorCpuTime","Update":52706000,"Value":328071000,"Internal":true,"Count Failed Values":true},{"ID":78,"Name":"internal.metrics.resultSize","Update":1836,"Value":11016,"Internal":true,"Count Failed Values":true},{"ID":79,"Name":"internal.metrics.jvmGCTime","Update":11,"Value":66,"Internal":true,"Count Failed Values":true},{"ID":80,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":6,"Internal":true,"Count Failed Values":true},{"ID":92,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":472,"Value":2816,"Internal":true,"Count Failed Values":true},{"ID":93,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":13,"Value":74,"Internal":true,"Count Failed Values":true},{"ID":94,"Name":"internal.metrics.shuffle.write.writeTime","Update":32707462,"Value":193988496,"Internal":true,"Count Failed Values":true},{"ID":96,"Name":"internal.metrics.input.recordsRead","Update":13,"Value":74,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":319351248,"JVMOffHeapMemory":183565360,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":18842,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":18842,"OffHeapUnifiedMemory":0,"DirectPoolMemory":3897,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":13,"MinorGCTime":120,"MajorGCCount":4,"MajorGCTime":353,"TotalGCTime":473},"Task Metrics":{"Executor Deserialize Time":277,"Executor Deserialize CPU Time":141303000,"Executor Run Time":158,"Executor CPU Time":52706000,"Peak Execution Memory":0,"Result Size":1836,"JVM GC Time":11,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":472,"Shuffle Write Time":32707462,"Shuffle Records Written":13},"Input Metrics":{"Bytes Read":0,"Records Read":13},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Partition ID":7,"Launch Time":1642039496427,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039496893,"Failed":false,"Killed":false,"Accumulables":[{"ID":36,"Name":"number of output rows","Update":"13","Value":"87","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":61,"Name":"data size","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":70,"Name":"shuffle bytes written","Update":"472","Value":"3288","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":71,"Name":"shuffle records written","Update":"13","Value":"87","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":72,"Name":"shuffle write time","Update":"32328957","Value":"226317453","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":73,"Name":"duration","Update":"27","Value":"189","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.executorDeserializeTime","Update":277,"Value":1939,"Internal":true,"Count Failed Values":true},{"ID":75,"Name":"internal.metrics.executorDeserializeCpuTime","Update":145865000,"Value":1004856000,"Internal":true,"Count Failed Values":true},{"ID":76,"Name":"internal.metrics.executorRunTime","Update":158,"Value":1106,"Internal":true,"Count Failed Values":true},{"ID":77,"Name":"internal.metrics.executorCpuTime","Update":42833000,"Value":370904000,"Internal":true,"Count Failed Values":true},{"ID":78,"Name":"internal.metrics.resultSize","Update":1836,"Value":12852,"Internal":true,"Count Failed Values":true},{"ID":79,"Name":"internal.metrics.jvmGCTime","Update":11,"Value":77,"Internal":true,"Count Failed Values":true},{"ID":80,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":7,"Internal":true,"Count Failed Values":true},{"ID":92,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":472,"Value":3288,"Internal":true,"Count Failed Values":true},{"ID":93,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":13,"Value":87,"Internal":true,"Count Failed Values":true},{"ID":94,"Name":"internal.metrics.shuffle.write.writeTime","Update":32328957,"Value":226317453,"Internal":true,"Count Failed Values":true},{"ID":96,"Name":"internal.metrics.input.recordsRead","Update":13,"Value":87,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":319351248,"JVMOffHeapMemory":183565360,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":18842,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":18842,"OffHeapUnifiedMemory":0,"DirectPoolMemory":3897,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":13,"MinorGCTime":120,"MajorGCCount":4,"MajorGCTime":353,"TotalGCTime":473},"Task Metrics":{"Executor Deserialize Time":277,"Executor Deserialize CPU Time":145865000,"Executor Run Time":158,"Executor CPU Time":42833000,"Peak Execution Memory":0,"Result Size":1836,"JVM GC Time":11,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":472,"Shuffle Write Time":32328957,"Shuffle Records Written":13},"Input Metrics":{"Bytes Read":0,"Records Read":13},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Partition ID":3,"Launch Time":1642039496425,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039496893,"Failed":false,"Killed":false,"Accumulables":[{"ID":36,"Name":"number of output rows","Update":"13","Value":"100","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":61,"Name":"data size","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":70,"Name":"shuffle bytes written","Update":"472","Value":"3760","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":71,"Name":"shuffle records written","Update":"13","Value":"100","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":72,"Name":"shuffle write time","Update":"33237160","Value":"259554613","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":73,"Name":"duration","Update":"27","Value":"216","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.executorDeserializeTime","Update":277,"Value":2216,"Internal":true,"Count Failed Values":true},{"ID":75,"Name":"internal.metrics.executorDeserializeCpuTime","Update":81219000,"Value":1086075000,"Internal":true,"Count Failed Values":true},{"ID":76,"Name":"internal.metrics.executorRunTime","Update":158,"Value":1264,"Internal":true,"Count Failed Values":true},{"ID":77,"Name":"internal.metrics.executorCpuTime","Update":50624000,"Value":421528000,"Internal":true,"Count Failed Values":true},{"ID":78,"Name":"internal.metrics.resultSize","Update":1836,"Value":14688,"Internal":true,"Count Failed Values":true},{"ID":79,"Name":"internal.metrics.jvmGCTime","Update":11,"Value":88,"Internal":true,"Count Failed Values":true},{"ID":80,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":8,"Internal":true,"Count Failed Values":true},{"ID":92,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":472,"Value":3760,"Internal":true,"Count Failed Values":true},{"ID":93,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":13,"Value":100,"Internal":true,"Count Failed Values":true},{"ID":94,"Name":"internal.metrics.shuffle.write.writeTime","Update":33237160,"Value":259554613,"Internal":true,"Count Failed Values":true},{"ID":96,"Name":"internal.metrics.input.recordsRead","Update":13,"Value":100,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":319351248,"JVMOffHeapMemory":183565360,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":18842,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":18842,"OffHeapUnifiedMemory":0,"DirectPoolMemory":3897,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":13,"MinorGCTime":120,"MajorGCCount":4,"MajorGCTime":353,"TotalGCTime":473},"Task Metrics":{"Executor Deserialize Time":277,"Executor Deserialize CPU Time":81219000,"Executor Run Time":158,"Executor CPU Time":50624000,"Peak Execution Memory":0,"Result Size":1836,"JVM GC Time":11,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":472,"Shuffle Write Time":33237160,"Shuffle Records Written":13},"Input Metrics":{"Bytes Read":0,"Records Read":13},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":8,"RDD Info":[{"RDD ID":4,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[3],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"ParallelCollectionRDD","Scope":"{\"id\":\"1\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line15.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line15.$read$$iw$$iw$$iw$$iw.(:33)\n$line15.$read$$iw$$iw$$iw.(:35)\n$line15.$read$$iw$$iw.(:37)\n$line15.$read$$iw.(:39)\n$line15.$read.(:41)\n$line15.$read$.(:45)\n$line15.$read$.()\n$line15.$eval$.$print$lzycompute(:7)\n$line15.$eval$.$print(:6)\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Submission Time":1642039496205,"Completion Time":1642039496907,"Accumulables":[{"ID":36,"Name":"number of output rows","Value":"100","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":61,"Name":"data size","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":70,"Name":"shuffle bytes written","Value":"3760","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":71,"Name":"shuffle records written","Value":"100","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":72,"Name":"shuffle write time","Value":"259554613","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":73,"Name":"duration","Value":"216","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.executorDeserializeTime","Value":2216,"Internal":true,"Count Failed Values":true},{"ID":75,"Name":"internal.metrics.executorDeserializeCpuTime","Value":1086075000,"Internal":true,"Count Failed Values":true},{"ID":76,"Name":"internal.metrics.executorRunTime","Value":1264,"Internal":true,"Count Failed Values":true},{"ID":77,"Name":"internal.metrics.executorCpuTime","Value":421528000,"Internal":true,"Count Failed Values":true},{"ID":78,"Name":"internal.metrics.resultSize","Value":14688,"Internal":true,"Count Failed Values":true},{"ID":79,"Name":"internal.metrics.jvmGCTime","Value":88,"Internal":true,"Count Failed Values":true},{"ID":80,"Name":"internal.metrics.resultSerializationTime","Value":8,"Internal":true,"Count Failed Values":true},{"ID":92,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":3760,"Internal":true,"Count Failed Values":true},{"ID":93,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":100,"Internal":true,"Count Failed Values":true},{"ID":94,"Name":"internal.metrics.shuffle.write.writeTime","Value":259554613,"Internal":true,"Count Failed Values":true},{"ID":96,"Name":"internal.metrics.input.recordsRead","Value":100,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}} +{"Event":"SparkListenerJobEnd","Job ID":0,"Completion Time":1642039496914,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate","executionId":0,"physicalPlanDescription":"== Physical Plan ==\nAdaptiveSparkPlan (13)\n+- == Current Plan ==\n HashAggregate (8)\n +- ShuffleQueryStage (7)\n +- Exchange (6)\n +- * HashAggregate (5)\n +- ShuffleQueryStage (4)\n +- Exchange (3)\n +- * Project (2)\n +- * Range (1)\n+- == Initial Plan ==\n HashAggregate (12)\n +- Exchange (11)\n +- HashAggregate (10)\n +- Exchange (9)\n +- Project (2)\n +- Range (1)\n\n\n(1) Range [codegen id : 1]\nOutput [1]: [id#0L]\nArguments: Range (0, 100, step=1, splits=Some(8))\n\n(2) Project [codegen id : 1]\nOutput: []\nInput [1]: [id#0L]\n\n(3) Exchange\nInput: []\nArguments: RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#28]\n\n(4) ShuffleQueryStage\nOutput: []\nArguments: 0\n\n(5) HashAggregate [codegen id : 2]\nInput: []\nKeys: []\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count#8L]\nResults [1]: [count#9L]\n\n(6) Exchange\nInput [1]: [count#9L]\nArguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#49]\n\n(7) ShuffleQueryStage\nOutput [1]: [count#9L]\nArguments: 1\n\n(8) HashAggregate\nInput [1]: [count#9L]\nKeys: []\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#5L]\nResults [1]: [count(1)#5L AS count#6L]\n\n(9) Exchange\nInput: []\nArguments: RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#15]\n\n(10) HashAggregate\nInput: []\nKeys: []\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count#8L]\nResults [1]: [count#9L]\n\n(11) Exchange\nInput [1]: [count#9L]\nArguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#19]\n\n(12) HashAggregate\nInput [1]: [count#9L]\nKeys: []\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#5L]\nResults [1]: [count(1)#5L AS count#6L]\n\n(13) AdaptiveSparkPlan\nOutput [1]: [count#6L]\nArguments: isFinalPlan=false\n\n","sparkPlanInfo":{"nodeName":"AdaptiveSparkPlan","simpleString":"AdaptiveSparkPlan isFinalPlan=false","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[], functions=[count(1)])","children":[{"nodeName":"ShuffleQueryStage","simpleString":"ShuffleQueryStage 1","children":[{"nodeName":"Exchange","simpleString":"Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#49]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[], functions=[partial_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"ShuffleQueryStage","simpleString":"ShuffleQueryStage 0","children":[{"nodeName":"Exchange","simpleString":"Exchange RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#28]","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project","children":[{"nodeName":"Range","simpleString":"Range (0, 100, step=1, splits=8)","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":36,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":73,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":71,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":72,"metricType":"nsTiming"},{"name":"records read","accumulatorId":69,"metricType":"sum"},{"name":"local bytes read","accumulatorId":67,"metricType":"size"},{"name":"fetch wait time","accumulatorId":68,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":65,"metricType":"size"},{"name":"local blocks read","accumulatorId":64,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":63,"metricType":"sum"},{"name":"data size","accumulatorId":61,"metricType":"size"},{"name":"number of partitions","accumulatorId":62,"metricType":"sum"},{"name":"remote bytes read to disk","accumulatorId":66,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":70,"metricType":"size"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":120,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":121,"metricType":"timing"},{"name":"peak memory","accumulatorId":119,"metricType":"size"},{"name":"number of output rows","accumulatorId":118,"metricType":"sum"},{"name":"number of sort fallback tasks","accumulatorId":123,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":122,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":117,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":115,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":116,"metricType":"nsTiming"},{"name":"records read","accumulatorId":113,"metricType":"sum"},{"name":"local bytes read","accumulatorId":111,"metricType":"size"},{"name":"fetch wait time","accumulatorId":112,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":109,"metricType":"size"},{"name":"local blocks read","accumulatorId":108,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":107,"metricType":"sum"},{"name":"data size","accumulatorId":105,"metricType":"size"},{"name":"number of partitions","accumulatorId":106,"metricType":"sum"},{"name":"remote bytes read to disk","accumulatorId":110,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":114,"metricType":"size"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":101,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":102,"metricType":"timing"},{"name":"peak memory","accumulatorId":100,"metricType":"size"},{"name":"number of output rows","accumulatorId":99,"metricType":"sum"},{"name":"number of sort fallback tasks","accumulatorId":104,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":103,"metricType":"average"}]}],"metadata":{},"metrics":[]}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerDriverAccumUpdates","executionId":0,"accumUpdates":[[106,1]]} +{"Event":"SparkListenerJobStart","Job ID":1,"Submission Time":1642039497010,"Stage Infos":[{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":8,"RDD Info":[{"RDD ID":4,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[3],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"ParallelCollectionRDD","Scope":"{\"id\":\"1\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line15.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line15.$read$$iw$$iw$$iw$$iw.(:33)\n$line15.$read$$iw$$iw$$iw.(:35)\n$line15.$read$$iw$$iw.(:37)\n$line15.$read$$iw.(:39)\n$line15.$read.(:41)\n$line15.$read$.(:45)\n$line15.$read$.()\n$line15.$eval$.$print$lzycompute(:7)\n$line15.$eval$.$print(:6)\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":10,"RDD Info":[{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"4\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"5\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"count at :23","Parent IDs":[5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"9\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[4],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[1],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line15.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line15.$read$$iw$$iw$$iw$$iw.(:33)\n$line15.$read$$iw$$iw$$iw.(:35)\n$line15.$read$$iw$$iw.(:37)\n$line15.$read$$iw.(:39)\n$line15.$read.(:41)\n$line15.$read$.(:45)\n$line15.$read$.()\n$line15.$eval$.$print$lzycompute(:7)\n$line15.$eval$.$print(:6)\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[1,2],"Properties":{"spark.sql.warehouse.dir":"file:/Users/lijunqing/Code/stczwd/spark/dist/spark-warehouse","spark.executor.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"172.22.200.52","spark.eventLog.enabled":"true","spark.driver.port":"61038","__fetch_continuous_blocks_in_batch_enabled":"true","spark.repl.class.uri":"spark://172.22.200.52:61038/classes","spark.jars":"","spark.repl.class.outputDir":"/private/var/folders/dm/1vhcj_l97j146n6mgr2cm9rw0000gp/T/spark-501ae231-0cb9-4ba2-845f-3fc3cb053141/repl-054c2c94-f7a2-4f4b-9f12-96a1cdb15bc6","spark.app.name":"Spark shell","spark.rdd.scope":"{\"id\":\"4\",\"name\":\"Exchange\"}","spark.rdd.scope.noOverride":"true","spark.submit.pyFiles":"","spark.ui.showConsoleProgress":"true","spark.app.startTime":"1642039450519","spark.executor.id":"driver","spark.driver.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"client","spark.master":"local[*]","spark.home":"/Users/lijunqing/Code/stczwd/spark/dist","spark.eventLog.dir":"/Users/lijunqing/Code/stczwd/spark/dist/eventLog","spark.sql.execution.id":"0","spark.sql.catalogImplementation":"hive","spark.app.id":"local-1642039451826"}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":10,"RDD Info":[{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"4\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"5\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"count at :23","Parent IDs":[5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"9\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[4],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[1],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line15.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line15.$read$$iw$$iw$$iw$$iw.(:33)\n$line15.$read$$iw$$iw$$iw.(:35)\n$line15.$read$$iw$$iw.(:37)\n$line15.$read$$iw.(:39)\n$line15.$read.(:41)\n$line15.$read$.(:45)\n$line15.$read$.()\n$line15.$eval$.$print$lzycompute(:7)\n$line15.$eval$.$print(:6)\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Submission Time":1642039497017,"Accumulables":[],"Resource Profile Id":0},"Properties":{"spark.sql.warehouse.dir":"file:/Users/lijunqing/Code/stczwd/spark/dist/spark-warehouse","spark.executor.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"172.22.200.52","spark.eventLog.enabled":"true","spark.driver.port":"61038","__fetch_continuous_blocks_in_batch_enabled":"true","spark.repl.class.uri":"spark://172.22.200.52:61038/classes","spark.jars":"","spark.repl.class.outputDir":"/private/var/folders/dm/1vhcj_l97j146n6mgr2cm9rw0000gp/T/spark-501ae231-0cb9-4ba2-845f-3fc3cb053141/repl-054c2c94-f7a2-4f4b-9f12-96a1cdb15bc6","spark.app.name":"Spark shell","spark.rdd.scope":"{\"id\":\"4\",\"name\":\"Exchange\"}","spark.rdd.scope.noOverride":"true","spark.submit.pyFiles":"","spark.ui.showConsoleProgress":"true","spark.app.startTime":"1642039450519","spark.executor.id":"driver","spark.driver.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"client","spark.master":"local[*]","spark.home":"/Users/lijunqing/Code/stczwd/spark/dist","spark.eventLog.dir":"/Users/lijunqing/Code/stczwd/spark/dist/eventLog","spark.sql.execution.id":"0","spark.sql.catalogImplementation":"hive","spark.app.id":"local-1642039451826"}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Partition ID":0,"Launch Time":1642039497053,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Partition ID":1,"Launch Time":1642039497055,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Partition ID":2,"Launch Time":1642039497055,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Partition ID":3,"Launch Time":1642039497055,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Partition ID":4,"Launch Time":1642039497056,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Partition ID":5,"Launch Time":1642039497056,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Partition ID":6,"Launch Time":1642039497056,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Partition ID":7,"Launch Time":1642039497056,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":16,"Index":8,"Attempt":0,"Partition ID":8,"Launch Time":1642039497114,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":17,"Index":9,"Attempt":0,"Partition ID":9,"Launch Time":1642039497115,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":6,"Attempt":0,"Partition ID":6,"Launch Time":1642039497056,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039497115,"Failed":false,"Killed":false,"Accumulables":[{"ID":64,"Name":"local blocks read","Update":"8","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":67,"Name":"local bytes read","Update":"376","Value":"376","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":68,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":69,"Name":"records read","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":105,"Name":"data size","Update":"16","Value":"16","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":114,"Name":"shuffle bytes written","Update":"59","Value":"59","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":115,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":116,"Name":"shuffle write time","Update":"976166","Value":"976166","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":117,"Name":"duration","Update":"12","Value":"12","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":118,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":121,"Name":"time in aggregation build","Update":"11","Value":"11","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":124,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":4,"Internal":true,"Count Failed Values":true},{"ID":125,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2498000,"Value":2498000,"Internal":true,"Count Failed Values":true},{"ID":126,"Name":"internal.metrics.executorRunTime","Update":50,"Value":50,"Internal":true,"Count Failed Values":true},{"ID":127,"Name":"internal.metrics.executorCpuTime","Update":21709000,"Value":21709000,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.resultSize","Update":2848,"Value":2848,"Internal":true,"Count Failed Values":true},{"ID":135,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":136,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":8,"Value":8,"Internal":true,"Count Failed Values":true},{"ID":137,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":138,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":139,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":376,"Value":376,"Internal":true,"Count Failed Values":true},{"ID":140,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":141,"Name":"internal.metrics.shuffle.read.recordsRead","Update":10,"Value":10,"Internal":true,"Count Failed Values":true},{"ID":142,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":59,"Internal":true,"Count Failed Values":true},{"ID":143,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.write.writeTime","Update":976166,"Value":976166,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":2498000,"Executor Run Time":50,"Executor CPU Time":21709000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":376,"Total Records Read":10},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":976166,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":2,"Attempt":0,"Partition ID":2,"Launch Time":1642039497055,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039497116,"Failed":false,"Killed":false,"Accumulables":[{"ID":64,"Name":"local blocks read","Update":"8","Value":"16","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":67,"Name":"local bytes read","Update":"376","Value":"752","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":68,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":69,"Name":"records read","Update":"10","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":105,"Name":"data size","Update":"16","Value":"32","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":114,"Name":"shuffle bytes written","Update":"59","Value":"118","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":115,"Name":"shuffle records written","Update":"1","Value":"2","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":116,"Name":"shuffle write time","Update":"1282083","Value":"2258249","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":117,"Name":"duration","Update":"13","Value":"25","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":118,"Name":"number of output rows","Update":"1","Value":"2","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":121,"Name":"time in aggregation build","Update":"12","Value":"23","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":124,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":8,"Internal":true,"Count Failed Values":true},{"ID":125,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2238000,"Value":4736000,"Internal":true,"Count Failed Values":true},{"ID":126,"Name":"internal.metrics.executorRunTime","Update":52,"Value":102,"Internal":true,"Count Failed Values":true},{"ID":127,"Name":"internal.metrics.executorCpuTime","Update":27082000,"Value":48791000,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.resultSize","Update":2848,"Value":5696,"Internal":true,"Count Failed Values":true},{"ID":135,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":136,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":8,"Value":16,"Internal":true,"Count Failed Values":true},{"ID":137,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":138,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":139,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":376,"Value":752,"Internal":true,"Count Failed Values":true},{"ID":140,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":141,"Name":"internal.metrics.shuffle.read.recordsRead","Update":10,"Value":20,"Internal":true,"Count Failed Values":true},{"ID":142,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":118,"Internal":true,"Count Failed Values":true},{"ID":143,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.write.writeTime","Update":1282083,"Value":2258249,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":2238000,"Executor Run Time":52,"Executor CPU Time":27082000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":376,"Total Records Read":10},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":1282083,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":7,"Attempt":0,"Partition ID":7,"Launch Time":1642039497056,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039497118,"Failed":false,"Killed":false,"Accumulables":[{"ID":64,"Name":"local blocks read","Update":"8","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":67,"Name":"local bytes read","Update":"380","Value":"1132","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":68,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":69,"Name":"records read","Update":"11","Value":"31","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":105,"Name":"data size","Update":"16","Value":"48","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":114,"Name":"shuffle bytes written","Update":"59","Value":"177","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":115,"Name":"shuffle records written","Update":"1","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":116,"Name":"shuffle write time","Update":"1080959","Value":"3339208","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":117,"Name":"duration","Update":"13","Value":"38","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":118,"Name":"number of output rows","Update":"1","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":121,"Name":"time in aggregation build","Update":"12","Value":"35","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":124,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":11,"Internal":true,"Count Failed Values":true},{"ID":125,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2759000,"Value":7495000,"Internal":true,"Count Failed Values":true},{"ID":126,"Name":"internal.metrics.executorRunTime","Update":53,"Value":155,"Internal":true,"Count Failed Values":true},{"ID":127,"Name":"internal.metrics.executorCpuTime","Update":17515000,"Value":66306000,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.resultSize","Update":2848,"Value":8544,"Internal":true,"Count Failed Values":true},{"ID":135,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":136,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":8,"Value":24,"Internal":true,"Count Failed Values":true},{"ID":137,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":138,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":139,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":380,"Value":1132,"Internal":true,"Count Failed Values":true},{"ID":140,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":141,"Name":"internal.metrics.shuffle.read.recordsRead","Update":11,"Value":31,"Internal":true,"Count Failed Values":true},{"ID":142,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":177,"Internal":true,"Count Failed Values":true},{"ID":143,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.write.writeTime","Update":1080959,"Value":3339208,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":2759000,"Executor Run Time":53,"Executor CPU Time":17515000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":380,"Total Records Read":11},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":1080959,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":4,"Attempt":0,"Partition ID":4,"Launch Time":1642039497056,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039497118,"Failed":false,"Killed":false,"Accumulables":[{"ID":64,"Name":"local blocks read","Update":"8","Value":"32","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":67,"Name":"local bytes read","Update":"372","Value":"1504","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":68,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":69,"Name":"records read","Update":"9","Value":"40","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":105,"Name":"data size","Update":"16","Value":"64","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":114,"Name":"shuffle bytes written","Update":"59","Value":"236","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":115,"Name":"shuffle records written","Update":"1","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":116,"Name":"shuffle write time","Update":"1093250","Value":"4432458","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":117,"Name":"duration","Update":"13","Value":"51","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":118,"Name":"number of output rows","Update":"1","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":121,"Name":"time in aggregation build","Update":"12","Value":"47","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":124,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":15,"Internal":true,"Count Failed Values":true},{"ID":125,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2614000,"Value":10109000,"Internal":true,"Count Failed Values":true},{"ID":126,"Name":"internal.metrics.executorRunTime","Update":54,"Value":209,"Internal":true,"Count Failed Values":true},{"ID":127,"Name":"internal.metrics.executorCpuTime","Update":21689000,"Value":87995000,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.resultSize","Update":2848,"Value":11392,"Internal":true,"Count Failed Values":true},{"ID":135,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":136,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":8,"Value":32,"Internal":true,"Count Failed Values":true},{"ID":137,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":138,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":139,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":372,"Value":1504,"Internal":true,"Count Failed Values":true},{"ID":140,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":141,"Name":"internal.metrics.shuffle.read.recordsRead","Update":9,"Value":40,"Internal":true,"Count Failed Values":true},{"ID":142,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":236,"Internal":true,"Count Failed Values":true},{"ID":143,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":4,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.write.writeTime","Update":1093250,"Value":4432458,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":2614000,"Executor Run Time":54,"Executor CPU Time":21689000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":372,"Total Records Read":9},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":1093250,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":3,"Attempt":0,"Partition ID":3,"Launch Time":1642039497055,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039497119,"Failed":false,"Killed":false,"Accumulables":[{"ID":64,"Name":"local blocks read","Update":"8","Value":"40","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":67,"Name":"local bytes read","Update":"376","Value":"1880","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":68,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":69,"Name":"records read","Update":"10","Value":"50","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":105,"Name":"data size","Update":"16","Value":"80","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":114,"Name":"shuffle bytes written","Update":"59","Value":"295","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":115,"Name":"shuffle records written","Update":"1","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":116,"Name":"shuffle write time","Update":"1558374","Value":"5990832","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":117,"Name":"duration","Update":"13","Value":"64","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":118,"Name":"number of output rows","Update":"1","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":121,"Name":"time in aggregation build","Update":"11","Value":"58","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":124,"Name":"internal.metrics.executorDeserializeTime","Update":6,"Value":21,"Internal":true,"Count Failed Values":true},{"ID":125,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3452000,"Value":13561000,"Internal":true,"Count Failed Values":true},{"ID":126,"Name":"internal.metrics.executorRunTime","Update":54,"Value":263,"Internal":true,"Count Failed Values":true},{"ID":127,"Name":"internal.metrics.executorCpuTime","Update":17668000,"Value":105663000,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.resultSize","Update":2848,"Value":14240,"Internal":true,"Count Failed Values":true},{"ID":135,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":136,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":8,"Value":40,"Internal":true,"Count Failed Values":true},{"ID":137,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":138,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":139,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":376,"Value":1880,"Internal":true,"Count Failed Values":true},{"ID":140,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":141,"Name":"internal.metrics.shuffle.read.recordsRead","Update":10,"Value":50,"Internal":true,"Count Failed Values":true},{"ID":142,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":295,"Internal":true,"Count Failed Values":true},{"ID":143,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":5,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.write.writeTime","Update":1558374,"Value":5990832,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":6,"Executor Deserialize CPU Time":3452000,"Executor Run Time":54,"Executor CPU Time":17668000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":376,"Total Records Read":10},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":1558374,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":1,"Attempt":0,"Partition ID":1,"Launch Time":1642039497055,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039497120,"Failed":false,"Killed":false,"Accumulables":[{"ID":64,"Name":"local blocks read","Update":"8","Value":"48","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":67,"Name":"local bytes read","Update":"372","Value":"2252","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":68,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":69,"Name":"records read","Update":"9","Value":"59","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":105,"Name":"data size","Update":"16","Value":"96","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":114,"Name":"shuffle bytes written","Update":"59","Value":"354","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":115,"Name":"shuffle records written","Update":"1","Value":"6","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":116,"Name":"shuffle write time","Update":"1490249","Value":"7481081","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":117,"Name":"duration","Update":"13","Value":"77","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":118,"Name":"number of output rows","Update":"1","Value":"6","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":121,"Name":"time in aggregation build","Update":"11","Value":"69","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":124,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":25,"Internal":true,"Count Failed Values":true},{"ID":125,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3175000,"Value":16736000,"Internal":true,"Count Failed Values":true},{"ID":126,"Name":"internal.metrics.executorRunTime","Update":57,"Value":320,"Internal":true,"Count Failed Values":true},{"ID":127,"Name":"internal.metrics.executorCpuTime","Update":18115000,"Value":123778000,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.resultSize","Update":2848,"Value":17088,"Internal":true,"Count Failed Values":true},{"ID":135,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":136,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":8,"Value":48,"Internal":true,"Count Failed Values":true},{"ID":137,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":138,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":139,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":372,"Value":2252,"Internal":true,"Count Failed Values":true},{"ID":140,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":141,"Name":"internal.metrics.shuffle.read.recordsRead","Update":9,"Value":59,"Internal":true,"Count Failed Values":true},{"ID":142,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":354,"Internal":true,"Count Failed Values":true},{"ID":143,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":6,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.write.writeTime","Update":1490249,"Value":7481081,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":3175000,"Executor Run Time":57,"Executor CPU Time":18115000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":372,"Total Records Read":9},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":1490249,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":5,"Attempt":0,"Partition ID":5,"Launch Time":1642039497056,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039497123,"Failed":false,"Killed":false,"Accumulables":[{"ID":64,"Name":"local blocks read","Update":"8","Value":"56","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":67,"Name":"local bytes read","Update":"372","Value":"2624","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":68,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":69,"Name":"records read","Update":"9","Value":"68","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":105,"Name":"data size","Update":"16","Value":"112","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":114,"Name":"shuffle bytes written","Update":"59","Value":"413","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":115,"Name":"shuffle records written","Update":"1","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":116,"Name":"shuffle write time","Update":"673209","Value":"8154290","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":117,"Name":"duration","Update":"12","Value":"89","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":118,"Name":"number of output rows","Update":"1","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":121,"Name":"time in aggregation build","Update":"11","Value":"80","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":124,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":28,"Internal":true,"Count Failed Values":true},{"ID":125,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2177000,"Value":18913000,"Internal":true,"Count Failed Values":true},{"ID":126,"Name":"internal.metrics.executorRunTime","Update":60,"Value":380,"Internal":true,"Count Failed Values":true},{"ID":127,"Name":"internal.metrics.executorCpuTime","Update":18621000,"Value":142399000,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.resultSize","Update":2848,"Value":19936,"Internal":true,"Count Failed Values":true},{"ID":135,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":136,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":8,"Value":56,"Internal":true,"Count Failed Values":true},{"ID":137,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":138,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":139,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":372,"Value":2624,"Internal":true,"Count Failed Values":true},{"ID":140,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":141,"Name":"internal.metrics.shuffle.read.recordsRead","Update":9,"Value":68,"Internal":true,"Count Failed Values":true},{"ID":142,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":413,"Internal":true,"Count Failed Values":true},{"ID":143,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":7,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.write.writeTime","Update":673209,"Value":8154290,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":2177000,"Executor Run Time":60,"Executor CPU Time":18621000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":372,"Total Records Read":9},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":673209,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":0,"Attempt":0,"Partition ID":0,"Launch Time":1642039497053,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039497124,"Failed":false,"Killed":false,"Accumulables":[{"ID":64,"Name":"local blocks read","Update":"8","Value":"64","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":67,"Name":"local bytes read","Update":"376","Value":"3000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":68,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":69,"Name":"records read","Update":"10","Value":"78","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":105,"Name":"data size","Update":"16","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":114,"Name":"shuffle bytes written","Update":"59","Value":"472","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":115,"Name":"shuffle records written","Update":"1","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":116,"Name":"shuffle write time","Update":"1340668","Value":"9494958","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":117,"Name":"duration","Update":"13","Value":"102","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":118,"Name":"number of output rows","Update":"1","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":121,"Name":"time in aggregation build","Update":"11","Value":"91","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":124,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":31,"Internal":true,"Count Failed Values":true},{"ID":125,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3128000,"Value":22041000,"Internal":true,"Count Failed Values":true},{"ID":126,"Name":"internal.metrics.executorRunTime","Update":62,"Value":442,"Internal":true,"Count Failed Values":true},{"ID":127,"Name":"internal.metrics.executorCpuTime","Update":20132000,"Value":162531000,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.resultSize","Update":2848,"Value":22784,"Internal":true,"Count Failed Values":true},{"ID":135,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":136,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":8,"Value":64,"Internal":true,"Count Failed Values":true},{"ID":137,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":138,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":139,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":376,"Value":3000,"Internal":true,"Count Failed Values":true},{"ID":140,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":141,"Name":"internal.metrics.shuffle.read.recordsRead","Update":10,"Value":78,"Internal":true,"Count Failed Values":true},{"ID":142,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":472,"Internal":true,"Count Failed Values":true},{"ID":143,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":8,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.write.writeTime","Update":1340668,"Value":9494958,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":3128000,"Executor Run Time":62,"Executor CPU Time":20132000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":376,"Total Records Read":10},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":1340668,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":9,"Attempt":0,"Partition ID":9,"Launch Time":1642039497115,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039497125,"Failed":false,"Killed":false,"Accumulables":[{"ID":64,"Name":"local blocks read","Update":"8","Value":"72","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":67,"Name":"local bytes read","Update":"380","Value":"3380","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":68,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":69,"Name":"records read","Update":"11","Value":"89","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":105,"Name":"data size","Update":"16","Value":"144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":114,"Name":"shuffle bytes written","Update":"59","Value":"531","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":115,"Name":"shuffle records written","Update":"1","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":116,"Name":"shuffle write time","Update":"506707","Value":"10001665","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":117,"Name":"duration","Update":"1","Value":"103","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":118,"Name":"number of output rows","Update":"1","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":121,"Name":"time in aggregation build","Update":"1","Value":"92","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":124,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":32,"Internal":true,"Count Failed Values":true},{"ID":125,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1662000,"Value":23703000,"Internal":true,"Count Failed Values":true},{"ID":126,"Name":"internal.metrics.executorRunTime","Update":6,"Value":448,"Internal":true,"Count Failed Values":true},{"ID":127,"Name":"internal.metrics.executorCpuTime","Update":5470000,"Value":168001000,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.resultSize","Update":2848,"Value":25632,"Internal":true,"Count Failed Values":true},{"ID":135,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":136,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":8,"Value":72,"Internal":true,"Count Failed Values":true},{"ID":137,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":138,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":139,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":380,"Value":3380,"Internal":true,"Count Failed Values":true},{"ID":140,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":141,"Name":"internal.metrics.shuffle.read.recordsRead","Update":11,"Value":89,"Internal":true,"Count Failed Values":true},{"ID":142,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":531,"Internal":true,"Count Failed Values":true},{"ID":143,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":9,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.write.writeTime","Update":506707,"Value":10001665,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":1662000,"Executor Run Time":6,"Executor CPU Time":5470000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":380,"Total Records Read":11},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":506707,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":8,"Attempt":0,"Partition ID":8,"Launch Time":1642039497114,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039497126,"Failed":false,"Killed":false,"Accumulables":[{"ID":64,"Name":"local blocks read","Update":"8","Value":"80","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":67,"Name":"local bytes read","Update":"380","Value":"3760","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":68,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":69,"Name":"records read","Update":"11","Value":"100","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":105,"Name":"data size","Update":"16","Value":"160","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":114,"Name":"shuffle bytes written","Update":"59","Value":"590","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":115,"Name":"shuffle records written","Update":"1","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":116,"Name":"shuffle write time","Update":"568086","Value":"10569751","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":117,"Name":"duration","Update":"1","Value":"104","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":118,"Name":"number of output rows","Update":"1","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":121,"Name":"time in aggregation build","Update":"1","Value":"93","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":124,"Name":"internal.metrics.executorDeserializeTime","Update":1,"Value":33,"Internal":true,"Count Failed Values":true},{"ID":125,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1875000,"Value":25578000,"Internal":true,"Count Failed Values":true},{"ID":126,"Name":"internal.metrics.executorRunTime","Update":7,"Value":455,"Internal":true,"Count Failed Values":true},{"ID":127,"Name":"internal.metrics.executorCpuTime","Update":5421000,"Value":173422000,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.resultSize","Update":2848,"Value":28480,"Internal":true,"Count Failed Values":true},{"ID":135,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":136,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":8,"Value":80,"Internal":true,"Count Failed Values":true},{"ID":137,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":138,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":139,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":380,"Value":3760,"Internal":true,"Count Failed Values":true},{"ID":140,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":141,"Name":"internal.metrics.shuffle.read.recordsRead","Update":11,"Value":100,"Internal":true,"Count Failed Values":true},{"ID":142,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":590,"Internal":true,"Count Failed Values":true},{"ID":143,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":10,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.write.writeTime","Update":568086,"Value":10569751,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":1,"Executor Deserialize CPU Time":1875000,"Executor Run Time":7,"Executor CPU Time":5421000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":8,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":380,"Total Records Read":11},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":568086,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":10,"RDD Info":[{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"4\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"5\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"count at :23","Parent IDs":[5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"9\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[4],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[1],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line15.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line15.$read$$iw$$iw$$iw$$iw.(:33)\n$line15.$read$$iw$$iw$$iw.(:35)\n$line15.$read$$iw$$iw.(:37)\n$line15.$read$$iw.(:39)\n$line15.$read.(:41)\n$line15.$read$.(:45)\n$line15.$read$.()\n$line15.$eval$.$print$lzycompute(:7)\n$line15.$eval$.$print(:6)\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Submission Time":1642039497017,"Completion Time":1642039497127,"Accumulables":[{"ID":64,"Name":"local blocks read","Value":"80","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":67,"Name":"local bytes read","Value":"3760","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":68,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":69,"Name":"records read","Value":"100","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":105,"Name":"data size","Value":"160","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":114,"Name":"shuffle bytes written","Value":"590","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":115,"Name":"shuffle records written","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":116,"Name":"shuffle write time","Value":"10569751","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":117,"Name":"duration","Value":"104","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":118,"Name":"number of output rows","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":121,"Name":"time in aggregation build","Value":"93","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":124,"Name":"internal.metrics.executorDeserializeTime","Value":33,"Internal":true,"Count Failed Values":true},{"ID":125,"Name":"internal.metrics.executorDeserializeCpuTime","Value":25578000,"Internal":true,"Count Failed Values":true},{"ID":126,"Name":"internal.metrics.executorRunTime","Value":455,"Internal":true,"Count Failed Values":true},{"ID":127,"Name":"internal.metrics.executorCpuTime","Value":173422000,"Internal":true,"Count Failed Values":true},{"ID":128,"Name":"internal.metrics.resultSize","Value":28480,"Internal":true,"Count Failed Values":true},{"ID":135,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":136,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":80,"Internal":true,"Count Failed Values":true},{"ID":137,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":138,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":139,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":3760,"Internal":true,"Count Failed Values":true},{"ID":140,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":141,"Name":"internal.metrics.shuffle.read.recordsRead","Value":100,"Internal":true,"Count Failed Values":true},{"ID":142,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":590,"Internal":true,"Count Failed Values":true},{"ID":143,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":10,"Internal":true,"Count Failed Values":true},{"ID":144,"Name":"internal.metrics.shuffle.write.writeTime","Value":10569751,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}} +{"Event":"SparkListenerJobEnd","Job ID":1,"Completion Time":1642039497127,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate","executionId":0,"physicalPlanDescription":"== Physical Plan ==\nAdaptiveSparkPlan (13)\n+- == Final Plan ==\n * HashAggregate (8)\n +- ShuffleQueryStage (7)\n +- Exchange (6)\n +- * HashAggregate (5)\n +- ShuffleQueryStage (4)\n +- Exchange (3)\n +- * Project (2)\n +- * Range (1)\n+- == Initial Plan ==\n HashAggregate (12)\n +- Exchange (11)\n +- HashAggregate (10)\n +- Exchange (9)\n +- Project (2)\n +- Range (1)\n\n\n(1) Range [codegen id : 1]\nOutput [1]: [id#0L]\nArguments: Range (0, 100, step=1, splits=Some(8))\n\n(2) Project [codegen id : 1]\nOutput: []\nInput [1]: [id#0L]\n\n(3) Exchange\nInput: []\nArguments: RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#28]\n\n(4) ShuffleQueryStage\nOutput: []\nArguments: 0\n\n(5) HashAggregate [codegen id : 2]\nInput: []\nKeys: []\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count#8L]\nResults [1]: [count#9L]\n\n(6) Exchange\nInput [1]: [count#9L]\nArguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#49]\n\n(7) ShuffleQueryStage\nOutput [1]: [count#9L]\nArguments: 1\n\n(8) HashAggregate [codegen id : 3]\nInput [1]: [count#9L]\nKeys: []\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#5L]\nResults [1]: [count(1)#5L AS count#6L]\n\n(9) Exchange\nInput: []\nArguments: RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#15]\n\n(10) HashAggregate\nInput: []\nKeys: []\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count#8L]\nResults [1]: [count#9L]\n\n(11) Exchange\nInput [1]: [count#9L]\nArguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#19]\n\n(12) HashAggregate\nInput [1]: [count#9L]\nKeys: []\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#5L]\nResults [1]: [count(1)#5L AS count#6L]\n\n(13) AdaptiveSparkPlan\nOutput [1]: [count#6L]\nArguments: isFinalPlan=true\n\n","sparkPlanInfo":{"nodeName":"AdaptiveSparkPlan","simpleString":"AdaptiveSparkPlan isFinalPlan=true","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"ShuffleQueryStage","simpleString":"ShuffleQueryStage 1","children":[{"nodeName":"Exchange","simpleString":"Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#49]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[], functions=[partial_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"ShuffleQueryStage","simpleString":"ShuffleQueryStage 0","children":[{"nodeName":"Exchange","simpleString":"Exchange RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#28]","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project","children":[{"nodeName":"Range","simpleString":"Range (0, 100, step=1, splits=8)","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":36,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":73,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":71,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":72,"metricType":"nsTiming"},{"name":"records read","accumulatorId":69,"metricType":"sum"},{"name":"local bytes read","accumulatorId":67,"metricType":"size"},{"name":"fetch wait time","accumulatorId":68,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":65,"metricType":"size"},{"name":"local blocks read","accumulatorId":64,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":63,"metricType":"sum"},{"name":"data size","accumulatorId":61,"metricType":"size"},{"name":"number of partitions","accumulatorId":62,"metricType":"sum"},{"name":"remote bytes read to disk","accumulatorId":66,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":70,"metricType":"size"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":120,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":121,"metricType":"timing"},{"name":"peak memory","accumulatorId":119,"metricType":"size"},{"name":"number of output rows","accumulatorId":118,"metricType":"sum"},{"name":"number of sort fallback tasks","accumulatorId":123,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":122,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":117,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":115,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":116,"metricType":"nsTiming"},{"name":"records read","accumulatorId":113,"metricType":"sum"},{"name":"local bytes read","accumulatorId":111,"metricType":"size"},{"name":"fetch wait time","accumulatorId":112,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":109,"metricType":"size"},{"name":"local blocks read","accumulatorId":108,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":107,"metricType":"sum"},{"name":"data size","accumulatorId":105,"metricType":"size"},{"name":"number of partitions","accumulatorId":106,"metricType":"sum"},{"name":"remote bytes read to disk","accumulatorId":110,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":114,"metricType":"size"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":152,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":153,"metricType":"timing"},{"name":"peak memory","accumulatorId":151,"metricType":"size"},{"name":"number of output rows","accumulatorId":150,"metricType":"sum"},{"name":"number of sort fallback tasks","accumulatorId":155,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":154,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":149,"metricType":"timing"}]}],"metadata":{},"metrics":[]}} +{"Event":"SparkListenerJobStart","Job ID":2,"Submission Time":1642039497179,"Stage Infos":[{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":1,"RDD Info":[{"RDD ID":10,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"15\",\"name\":\"mapPartitionsInternal\"}","Callsite":"count at :23","Parent IDs":[9],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":9,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"10\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"count at :23","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"14\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[4],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line15.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line15.$read$$iw$$iw$$iw$$iw.(:33)\n$line15.$read$$iw$$iw$$iw.(:35)\n$line15.$read$$iw$$iw.(:37)\n$line15.$read$$iw.(:39)\n$line15.$read.(:41)\n$line15.$read$.(:45)\n$line15.$read$.()\n$line15.$eval$.$print$lzycompute(:7)\n$line15.$eval$.$print(:6)\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":3,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":8,"RDD Info":[{"RDD ID":4,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[3],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[2],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":2,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"ParallelCollectionRDD","Scope":"{\"id\":\"1\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":8,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line15.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line15.$read$$iw$$iw$$iw$$iw.(:33)\n$line15.$read$$iw$$iw$$iw.(:35)\n$line15.$read$$iw$$iw.(:37)\n$line15.$read$$iw.(:39)\n$line15.$read.(:41)\n$line15.$read$.(:45)\n$line15.$read$.()\n$line15.$eval$.$print$lzycompute(:7)\n$line15.$eval$.$print(:6)\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":4,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":10,"RDD Info":[{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"4\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"5\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"count at :23","Parent IDs":[5],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":5,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"9\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[4],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[3],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line15.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line15.$read$$iw$$iw$$iw$$iw.(:33)\n$line15.$read$$iw$$iw$$iw.(:35)\n$line15.$read$$iw$$iw.(:37)\n$line15.$read$$iw.(:39)\n$line15.$read.(:41)\n$line15.$read$.(:45)\n$line15.$read$.()\n$line15.$eval$.$print$lzycompute(:7)\n$line15.$eval$.$print(:6)\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[5,3,4],"Properties":{"spark.sql.warehouse.dir":"file:/Users/lijunqing/Code/stczwd/spark/dist/spark-warehouse","spark.executor.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"172.22.200.52","spark.eventLog.enabled":"true","spark.driver.port":"61038","__fetch_continuous_blocks_in_batch_enabled":"true","spark.repl.class.uri":"spark://172.22.200.52:61038/classes","spark.jars":"","spark.repl.class.outputDir":"/private/var/folders/dm/1vhcj_l97j146n6mgr2cm9rw0000gp/T/spark-501ae231-0cb9-4ba2-845f-3fc3cb053141/repl-054c2c94-f7a2-4f4b-9f12-96a1cdb15bc6","spark.app.name":"Spark shell","spark.rdd.scope":"{\"id\":\"16\",\"name\":\"collect\"}","spark.rdd.scope.noOverride":"true","spark.submit.pyFiles":"","spark.ui.showConsoleProgress":"true","spark.app.startTime":"1642039450519","spark.executor.id":"driver","spark.driver.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"client","spark.master":"local[*]","spark.home":"/Users/lijunqing/Code/stczwd/spark/dist","spark.eventLog.dir":"/Users/lijunqing/Code/stczwd/spark/dist/eventLog","spark.sql.execution.id":"0","spark.sql.catalogImplementation":"hive","spark.app.id":"local-1642039451826"}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":1,"RDD Info":[{"RDD ID":10,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"15\",\"name\":\"mapPartitionsInternal\"}","Callsite":"count at :23","Parent IDs":[9],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":9,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"10\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"count at :23","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"14\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[4],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line15.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line15.$read$$iw$$iw$$iw$$iw.(:33)\n$line15.$read$$iw$$iw$$iw.(:35)\n$line15.$read$$iw$$iw.(:37)\n$line15.$read$$iw.(:39)\n$line15.$read.(:41)\n$line15.$read$.(:45)\n$line15.$read$.()\n$line15.$eval$.$print$lzycompute(:7)\n$line15.$eval$.$print(:6)\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Submission Time":1642039497181,"Accumulables":[],"Resource Profile Id":0},"Properties":{"spark.sql.warehouse.dir":"file:/Users/lijunqing/Code/stczwd/spark/dist/spark-warehouse","spark.executor.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"172.22.200.52","spark.eventLog.enabled":"true","spark.driver.port":"61038","__fetch_continuous_blocks_in_batch_enabled":"true","spark.repl.class.uri":"spark://172.22.200.52:61038/classes","spark.jars":"","spark.repl.class.outputDir":"/private/var/folders/dm/1vhcj_l97j146n6mgr2cm9rw0000gp/T/spark-501ae231-0cb9-4ba2-845f-3fc3cb053141/repl-054c2c94-f7a2-4f4b-9f12-96a1cdb15bc6","spark.app.name":"Spark shell","spark.rdd.scope":"{\"id\":\"16\",\"name\":\"collect\"}","spark.rdd.scope.noOverride":"true","spark.submit.pyFiles":"","spark.ui.showConsoleProgress":"true","spark.app.startTime":"1642039450519","spark.executor.id":"driver","spark.driver.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"client","spark.master":"local[*]","spark.home":"/Users/lijunqing/Code/stczwd/spark/dist","spark.eventLog.dir":"/Users/lijunqing/Code/stczwd/spark/dist/eventLog","spark.sql.execution.id":"0","spark.sql.catalogImplementation":"hive","spark.app.id":"local-1642039451826"}} +{"Event":"SparkListenerTaskStart","Stage ID":5,"Stage Attempt ID":0,"Task Info":{"Task ID":18,"Index":0,"Attempt":0,"Partition ID":0,"Launch Time":1642039497187,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":5,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":0,"Attempt":0,"Partition ID":0,"Launch Time":1642039497187,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039497248,"Failed":false,"Killed":false,"Accumulables":[{"ID":108,"Name":"local blocks read","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":111,"Name":"local bytes read","Update":"590","Value":"590","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":112,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":113,"Name":"records read","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":149,"Name":"duration","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":150,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":153,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":156,"Name":"internal.metrics.executorDeserializeTime","Update":52,"Value":52,"Internal":true,"Count Failed Values":true},{"ID":157,"Name":"internal.metrics.executorDeserializeCpuTime","Update":51010000,"Value":51010000,"Internal":true,"Count Failed Values":true},{"ID":158,"Name":"internal.metrics.executorRunTime","Update":6,"Value":6,"Internal":true,"Count Failed Values":true},{"ID":159,"Name":"internal.metrics.executorCpuTime","Update":6062000,"Value":6062000,"Internal":true,"Count Failed Values":true},{"ID":160,"Name":"internal.metrics.resultSize","Update":2656,"Value":2656,"Internal":true,"Count Failed Values":true},{"ID":167,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":168,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":10,"Value":10,"Internal":true,"Count Failed Values":true},{"ID":169,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":170,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":171,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":590,"Value":590,"Internal":true,"Count Failed Values":true},{"ID":172,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":173,"Name":"internal.metrics.shuffle.read.recordsRead","Update":10,"Value":10,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":52,"Executor Deserialize CPU Time":51010000,"Executor Run Time":6,"Executor CPU Time":6062000,"Peak Execution Memory":0,"Result Size":2656,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":10,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":590,"Total Records Read":10},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":5,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":1,"RDD Info":[{"RDD ID":10,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"15\",\"name\":\"mapPartitionsInternal\"}","Callsite":"count at :23","Parent IDs":[9],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":9,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"10\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"count at :23","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"14\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[4],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line15.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line15.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line15.$read$$iw$$iw$$iw$$iw.(:33)\n$line15.$read$$iw$$iw$$iw.(:35)\n$line15.$read$$iw$$iw.(:37)\n$line15.$read$$iw.(:39)\n$line15.$read.(:41)\n$line15.$read$.(:45)\n$line15.$read$.()\n$line15.$eval$.$print$lzycompute(:7)\n$line15.$eval$.$print(:6)\n$line15.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Submission Time":1642039497181,"Completion Time":1642039497249,"Accumulables":[{"ID":108,"Name":"local blocks read","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":111,"Name":"local bytes read","Value":"590","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":112,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":113,"Name":"records read","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":149,"Name":"duration","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":150,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":153,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":156,"Name":"internal.metrics.executorDeserializeTime","Value":52,"Internal":true,"Count Failed Values":true},{"ID":157,"Name":"internal.metrics.executorDeserializeCpuTime","Value":51010000,"Internal":true,"Count Failed Values":true},{"ID":158,"Name":"internal.metrics.executorRunTime","Value":6,"Internal":true,"Count Failed Values":true},{"ID":159,"Name":"internal.metrics.executorCpuTime","Value":6062000,"Internal":true,"Count Failed Values":true},{"ID":160,"Name":"internal.metrics.resultSize","Value":2656,"Internal":true,"Count Failed Values":true},{"ID":167,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":168,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":10,"Internal":true,"Count Failed Values":true},{"ID":169,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":170,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":171,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":590,"Internal":true,"Count Failed Values":true},{"ID":172,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":173,"Name":"internal.metrics.shuffle.read.recordsRead","Value":10,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}} +{"Event":"SparkListenerJobEnd","Job ID":2,"Completion Time":1642039497251,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":0,"time":1642039497259} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart","executionId":1,"description":"count at :23","details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line16.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line16.$read$$iw$$iw$$iw$$iw.(:33)\n$line16.$read$$iw$$iw$$iw.(:35)\n$line16.$read$$iw$$iw.(:37)\n$line16.$read$$iw.(:39)\n$line16.$read.(:41)\n$line16.$read$.(:45)\n$line16.$read$.()\n$line16.$eval$.$print$lzycompute(:7)\n$line16.$eval$.$print(:6)\n$line16.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","physicalPlanDescription":"== Physical Plan ==\nAdaptiveSparkPlan (7)\n+- HashAggregate (6)\n +- Exchange (5)\n +- HashAggregate (4)\n +- Exchange (3)\n +- Project (2)\n +- Range (1)\n\n\n(1) Range\nOutput [1]: [id#10L]\nArguments: Range (0, 3347, step=13, splits=Some(5))\n\n(2) Project\nOutput: []\nInput [1]: [id#10L]\n\n(3) Exchange\nInput: []\nArguments: RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#76]\n\n(4) HashAggregate\nInput: []\nKeys: []\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count#18L]\nResults [1]: [count#19L]\n\n(5) Exchange\nInput [1]: [count#19L]\nArguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#80]\n\n(6) HashAggregate\nInput [1]: [count#19L]\nKeys: []\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#15L]\nResults [1]: [count(1)#15L AS count#16L]\n\n(7) AdaptiveSparkPlan\nOutput [1]: [count#16L]\nArguments: isFinalPlan=false\n\n","sparkPlanInfo":{"nodeName":"AdaptiveSparkPlan","simpleString":"AdaptiveSparkPlan isFinalPlan=false","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[], functions=[count(1)])","children":[{"nodeName":"Exchange","simpleString":"Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#80]","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[], functions=[partial_count(1)])","children":[{"nodeName":"Exchange","simpleString":"Exchange RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#76]","children":[{"nodeName":"Project","simpleString":"Project","children":[{"nodeName":"Range","simpleString":"Range (0, 3347, step=13, splits=5)","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":217,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":215,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":216,"metricType":"nsTiming"},{"name":"records read","accumulatorId":213,"metricType":"sum"},{"name":"local bytes read","accumulatorId":211,"metricType":"size"},{"name":"fetch wait time","accumulatorId":212,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":209,"metricType":"size"},{"name":"local blocks read","accumulatorId":208,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":207,"metricType":"sum"},{"name":"data size","accumulatorId":205,"metricType":"size"},{"name":"number of partitions","accumulatorId":206,"metricType":"sum"},{"name":"remote bytes read to disk","accumulatorId":210,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":214,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":201,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":202,"metricType":"timing"},{"name":"peak memory","accumulatorId":200,"metricType":"size"},{"name":"number of output rows","accumulatorId":199,"metricType":"sum"},{"name":"number of sort fallback tasks","accumulatorId":204,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":203,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":197,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":198,"metricType":"nsTiming"},{"name":"records read","accumulatorId":195,"metricType":"sum"},{"name":"local bytes read","accumulatorId":193,"metricType":"size"},{"name":"fetch wait time","accumulatorId":194,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":191,"metricType":"size"},{"name":"local blocks read","accumulatorId":190,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":189,"metricType":"sum"},{"name":"data size","accumulatorId":187,"metricType":"size"},{"name":"number of partitions","accumulatorId":188,"metricType":"sum"},{"name":"remote bytes read to disk","accumulatorId":192,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":196,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":183,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":184,"metricType":"timing"},{"name":"peak memory","accumulatorId":182,"metricType":"size"},{"name":"number of output rows","accumulatorId":181,"metricType":"sum"},{"name":"number of sort fallback tasks","accumulatorId":186,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":185,"metricType":"average"}]}],"metadata":{},"metrics":[]},"time":1642039528290,"modifiedConfigs":{}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate","executionId":1,"physicalPlanDescription":"== Physical Plan ==\nAdaptiveSparkPlan (12)\n+- == Current Plan ==\n HashAggregate (7)\n +- Exchange (6)\n +- HashAggregate (5)\n +- ShuffleQueryStage (4)\n +- Exchange (3)\n +- * Project (2)\n +- * Range (1)\n+- == Initial Plan ==\n HashAggregate (11)\n +- Exchange (10)\n +- HashAggregate (9)\n +- Exchange (8)\n +- Project (2)\n +- Range (1)\n\n\n(1) Range [codegen id : 1]\nOutput [1]: [id#10L]\nArguments: Range (0, 3347, step=13, splits=Some(5))\n\n(2) Project [codegen id : 1]\nOutput: []\nInput [1]: [id#10L]\n\n(3) Exchange\nInput: []\nArguments: RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#89]\n\n(4) ShuffleQueryStage\nOutput: []\nArguments: 0\n\n(5) HashAggregate\nInput: []\nKeys: []\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count#18L]\nResults [1]: [count#19L]\n\n(6) Exchange\nInput [1]: [count#19L]\nArguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#94]\n\n(7) HashAggregate\nInput [1]: [count#19L]\nKeys: []\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#15L]\nResults [1]: [count(1)#15L AS count#16L]\n\n(8) Exchange\nInput: []\nArguments: RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#76]\n\n(9) HashAggregate\nInput: []\nKeys: []\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count#18L]\nResults [1]: [count#19L]\n\n(10) Exchange\nInput [1]: [count#19L]\nArguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#80]\n\n(11) HashAggregate\nInput [1]: [count#19L]\nKeys: []\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#15L]\nResults [1]: [count(1)#15L AS count#16L]\n\n(12) AdaptiveSparkPlan\nOutput [1]: [count#16L]\nArguments: isFinalPlan=false\n\n","sparkPlanInfo":{"nodeName":"AdaptiveSparkPlan","simpleString":"AdaptiveSparkPlan isFinalPlan=false","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[], functions=[count(1)])","children":[{"nodeName":"Exchange","simpleString":"Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#94]","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[], functions=[partial_count(1)])","children":[{"nodeName":"ShuffleQueryStage","simpleString":"ShuffleQueryStage 0","children":[{"nodeName":"Exchange","simpleString":"Exchange RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#89]","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project","children":[{"nodeName":"Range","simpleString":"Range (0, 3347, step=13, splits=5)","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":217,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":254,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":252,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":253,"metricType":"nsTiming"},{"name":"records read","accumulatorId":250,"metricType":"sum"},{"name":"local bytes read","accumulatorId":248,"metricType":"size"},{"name":"fetch wait time","accumulatorId":249,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":246,"metricType":"size"},{"name":"local blocks read","accumulatorId":245,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":244,"metricType":"sum"},{"name":"data size","accumulatorId":242,"metricType":"size"},{"name":"number of partitions","accumulatorId":243,"metricType":"sum"},{"name":"remote bytes read to disk","accumulatorId":247,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":251,"metricType":"size"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":238,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":239,"metricType":"timing"},{"name":"peak memory","accumulatorId":237,"metricType":"size"},{"name":"number of output rows","accumulatorId":236,"metricType":"sum"},{"name":"number of sort fallback tasks","accumulatorId":241,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":240,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":234,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":235,"metricType":"nsTiming"},{"name":"records read","accumulatorId":232,"metricType":"sum"},{"name":"local bytes read","accumulatorId":230,"metricType":"size"},{"name":"fetch wait time","accumulatorId":231,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":228,"metricType":"size"},{"name":"local blocks read","accumulatorId":227,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":226,"metricType":"sum"},{"name":"data size","accumulatorId":224,"metricType":"size"},{"name":"number of partitions","accumulatorId":225,"metricType":"sum"},{"name":"remote bytes read to disk","accumulatorId":229,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":233,"metricType":"size"}]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":220,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":221,"metricType":"timing"},{"name":"peak memory","accumulatorId":219,"metricType":"size"},{"name":"number of output rows","accumulatorId":218,"metricType":"sum"},{"name":"number of sort fallback tasks","accumulatorId":223,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":222,"metricType":"average"}]}],"metadata":{},"metrics":[]}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerDriverAccumUpdates","executionId":1,"accumUpdates":[[243,10]]} +{"Event":"SparkListenerJobStart","Job ID":3,"Submission Time":1642039528323,"Stage Infos":[{"Stage ID":6,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":5,"RDD Info":[{"RDD ID":15,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"28\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[14],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":14,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"28\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[13],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":12,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"29\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[11],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":11,"Name":"ParallelCollectionRDD","Scope":"{\"id\":\"29\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":13,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"29\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[12],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line16.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line16.$read$$iw$$iw$$iw$$iw.(:33)\n$line16.$read$$iw$$iw$$iw.(:35)\n$line16.$read$$iw$$iw.(:37)\n$line16.$read$$iw.(:39)\n$line16.$read.(:41)\n$line16.$read$.(:45)\n$line16.$read$.()\n$line16.$eval$.$print$lzycompute(:7)\n$line16.$eval$.$print(:6)\n$line16.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[6],"Properties":{"spark.sql.warehouse.dir":"file:/Users/lijunqing/Code/stczwd/spark/dist/spark-warehouse","spark.executor.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"172.22.200.52","spark.eventLog.enabled":"true","spark.driver.port":"61038","__fetch_continuous_blocks_in_batch_enabled":"true","spark.repl.class.uri":"spark://172.22.200.52:61038/classes","spark.jars":"","spark.repl.class.outputDir":"/private/var/folders/dm/1vhcj_l97j146n6mgr2cm9rw0000gp/T/spark-501ae231-0cb9-4ba2-845f-3fc3cb053141/repl-054c2c94-f7a2-4f4b-9f12-96a1cdb15bc6","spark.app.name":"Spark shell","spark.rdd.scope":"{\"id\":\"28\",\"name\":\"Exchange\"}","spark.rdd.scope.noOverride":"true","spark.submit.pyFiles":"","spark.ui.showConsoleProgress":"true","spark.app.startTime":"1642039450519","spark.executor.id":"driver","spark.driver.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"client","spark.master":"local[*]","spark.home":"/Users/lijunqing/Code/stczwd/spark/dist","spark.eventLog.dir":"/Users/lijunqing/Code/stczwd/spark/dist/eventLog","spark.sql.execution.id":"1","spark.sql.catalogImplementation":"hive","spark.app.id":"local-1642039451826"}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":6,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":5,"RDD Info":[{"RDD ID":15,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"28\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[14],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":14,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"28\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[13],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":12,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"29\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[11],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":11,"Name":"ParallelCollectionRDD","Scope":"{\"id\":\"29\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":13,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"29\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[12],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line16.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line16.$read$$iw$$iw$$iw$$iw.(:33)\n$line16.$read$$iw$$iw$$iw.(:35)\n$line16.$read$$iw$$iw.(:37)\n$line16.$read$$iw.(:39)\n$line16.$read.(:41)\n$line16.$read$.(:45)\n$line16.$read$.()\n$line16.$eval$.$print$lzycompute(:7)\n$line16.$eval$.$print(:6)\n$line16.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Submission Time":1642039528324,"Accumulables":[],"Resource Profile Id":0},"Properties":{"spark.sql.warehouse.dir":"file:/Users/lijunqing/Code/stczwd/spark/dist/spark-warehouse","spark.executor.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"172.22.200.52","spark.eventLog.enabled":"true","spark.driver.port":"61038","__fetch_continuous_blocks_in_batch_enabled":"true","spark.repl.class.uri":"spark://172.22.200.52:61038/classes","spark.jars":"","spark.repl.class.outputDir":"/private/var/folders/dm/1vhcj_l97j146n6mgr2cm9rw0000gp/T/spark-501ae231-0cb9-4ba2-845f-3fc3cb053141/repl-054c2c94-f7a2-4f4b-9f12-96a1cdb15bc6","spark.app.name":"Spark shell","spark.rdd.scope":"{\"id\":\"28\",\"name\":\"Exchange\"}","spark.rdd.scope.noOverride":"true","spark.submit.pyFiles":"","spark.ui.showConsoleProgress":"true","spark.app.startTime":"1642039450519","spark.executor.id":"driver","spark.driver.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"client","spark.master":"local[*]","spark.home":"/Users/lijunqing/Code/stczwd/spark/dist","spark.eventLog.dir":"/Users/lijunqing/Code/stczwd/spark/dist/eventLog","spark.sql.execution.id":"1","spark.sql.catalogImplementation":"hive","spark.app.id":"local-1642039451826"}} +{"Event":"SparkListenerTaskStart","Stage ID":6,"Stage Attempt ID":0,"Task Info":{"Task ID":19,"Index":0,"Attempt":0,"Partition ID":0,"Launch Time":1642039528357,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":6,"Stage Attempt ID":0,"Task Info":{"Task ID":20,"Index":1,"Attempt":0,"Partition ID":1,"Launch Time":1642039528358,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":6,"Stage Attempt ID":0,"Task Info":{"Task ID":21,"Index":2,"Attempt":0,"Partition ID":2,"Launch Time":1642039528358,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":6,"Stage Attempt ID":0,"Task Info":{"Task ID":22,"Index":3,"Attempt":0,"Partition ID":3,"Launch Time":1642039528358,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":6,"Stage Attempt ID":0,"Task Info":{"Task ID":23,"Index":4,"Attempt":0,"Partition ID":4,"Launch Time":1642039528358,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":6,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":0,"Attempt":0,"Partition ID":0,"Launch Time":1642039528357,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039528383,"Failed":false,"Killed":false,"Accumulables":[{"ID":217,"Name":"number of output rows","Update":"51","Value":"51","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":242,"Name":"data size","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":251,"Name":"shuffle bytes written","Update":"520","Value":"520","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":252,"Name":"shuffle records written","Update":"51","Value":"51","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":253,"Name":"shuffle write time","Update":"5953700","Value":"5953700","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":254,"Name":"duration","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":255,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":256,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2403000,"Value":2403000,"Internal":true,"Count Failed Values":true},{"ID":257,"Name":"internal.metrics.executorRunTime","Update":18,"Value":18,"Internal":true,"Count Failed Values":true},{"ID":258,"Name":"internal.metrics.executorCpuTime","Update":12538000,"Value":12538000,"Internal":true,"Count Failed Values":true},{"ID":259,"Name":"internal.metrics.resultSize","Update":1750,"Value":1750,"Internal":true,"Count Failed Values":true},{"ID":273,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":520,"Value":520,"Internal":true,"Count Failed Values":true},{"ID":274,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":51,"Value":51,"Internal":true,"Count Failed Values":true},{"ID":275,"Name":"internal.metrics.shuffle.write.writeTime","Update":5953700,"Value":5953700,"Internal":true,"Count Failed Values":true},{"ID":277,"Name":"internal.metrics.input.recordsRead","Update":51,"Value":51,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":2403000,"Executor Run Time":18,"Executor CPU Time":12538000,"Peak Execution Memory":0,"Result Size":1750,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":520,"Shuffle Write Time":5953700,"Shuffle Records Written":51},"Input Metrics":{"Bytes Read":0,"Records Read":51},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":6,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":4,"Attempt":0,"Partition ID":4,"Launch Time":1642039528358,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039528384,"Failed":false,"Killed":false,"Accumulables":[{"ID":217,"Name":"number of output rows","Update":"52","Value":"103","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":242,"Name":"data size","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":251,"Name":"shuffle bytes written","Update":"520","Value":"1040","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":252,"Name":"shuffle records written","Update":"52","Value":"103","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":253,"Name":"shuffle write time","Update":"7592966","Value":"13546666","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":254,"Name":"duration","Update":"1","Value":"2","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":255,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":4,"Internal":true,"Count Failed Values":true},{"ID":256,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1995000,"Value":4398000,"Internal":true,"Count Failed Values":true},{"ID":257,"Name":"internal.metrics.executorRunTime","Update":19,"Value":37,"Internal":true,"Count Failed Values":true},{"ID":258,"Name":"internal.metrics.executorCpuTime","Update":13485000,"Value":26023000,"Internal":true,"Count Failed Values":true},{"ID":259,"Name":"internal.metrics.resultSize","Update":1750,"Value":3500,"Internal":true,"Count Failed Values":true},{"ID":273,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":520,"Value":1040,"Internal":true,"Count Failed Values":true},{"ID":274,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":52,"Value":103,"Internal":true,"Count Failed Values":true},{"ID":275,"Name":"internal.metrics.shuffle.write.writeTime","Update":7592966,"Value":13546666,"Internal":true,"Count Failed Values":true},{"ID":277,"Name":"internal.metrics.input.recordsRead","Update":52,"Value":103,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1995000,"Executor Run Time":19,"Executor CPU Time":13485000,"Peak Execution Memory":0,"Result Size":1750,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":520,"Shuffle Write Time":7592966,"Shuffle Records Written":52},"Input Metrics":{"Bytes Read":0,"Records Read":52},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":6,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":1,"Attempt":0,"Partition ID":1,"Launch Time":1642039528358,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039528384,"Failed":false,"Killed":false,"Accumulables":[{"ID":217,"Name":"number of output rows","Update":"52","Value":"155","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":242,"Name":"data size","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":251,"Name":"shuffle bytes written","Update":"520","Value":"1560","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":252,"Name":"shuffle records written","Update":"52","Value":"155","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":253,"Name":"shuffle write time","Update":"5810669","Value":"19357335","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":254,"Name":"duration","Update":"1","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":255,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":7,"Internal":true,"Count Failed Values":true},{"ID":256,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2006000,"Value":6404000,"Internal":true,"Count Failed Values":true},{"ID":257,"Name":"internal.metrics.executorRunTime","Update":20,"Value":57,"Internal":true,"Count Failed Values":true},{"ID":258,"Name":"internal.metrics.executorCpuTime","Update":10665000,"Value":36688000,"Internal":true,"Count Failed Values":true},{"ID":259,"Name":"internal.metrics.resultSize","Update":1750,"Value":5250,"Internal":true,"Count Failed Values":true},{"ID":273,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":520,"Value":1560,"Internal":true,"Count Failed Values":true},{"ID":274,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":52,"Value":155,"Internal":true,"Count Failed Values":true},{"ID":275,"Name":"internal.metrics.shuffle.write.writeTime","Update":5810669,"Value":19357335,"Internal":true,"Count Failed Values":true},{"ID":277,"Name":"internal.metrics.input.recordsRead","Update":52,"Value":155,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":2006000,"Executor Run Time":20,"Executor CPU Time":10665000,"Peak Execution Memory":0,"Result Size":1750,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":520,"Shuffle Write Time":5810669,"Shuffle Records Written":52},"Input Metrics":{"Bytes Read":0,"Records Read":52},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":6,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":3,"Attempt":0,"Partition ID":3,"Launch Time":1642039528358,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039528386,"Failed":false,"Killed":false,"Accumulables":[{"ID":217,"Name":"number of output rows","Update":"52","Value":"207","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":242,"Name":"data size","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":251,"Name":"shuffle bytes written","Update":"520","Value":"2080","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":252,"Name":"shuffle records written","Update":"52","Value":"207","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":253,"Name":"shuffle write time","Update":"6453916","Value":"25811251","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":254,"Name":"duration","Update":"1","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":255,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":10,"Internal":true,"Count Failed Values":true},{"ID":256,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2176000,"Value":8580000,"Internal":true,"Count Failed Values":true},{"ID":257,"Name":"internal.metrics.executorRunTime","Update":21,"Value":78,"Internal":true,"Count Failed Values":true},{"ID":258,"Name":"internal.metrics.executorCpuTime","Update":12442000,"Value":49130000,"Internal":true,"Count Failed Values":true},{"ID":259,"Name":"internal.metrics.resultSize","Update":1750,"Value":7000,"Internal":true,"Count Failed Values":true},{"ID":273,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":520,"Value":2080,"Internal":true,"Count Failed Values":true},{"ID":274,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":52,"Value":207,"Internal":true,"Count Failed Values":true},{"ID":275,"Name":"internal.metrics.shuffle.write.writeTime","Update":6453916,"Value":25811251,"Internal":true,"Count Failed Values":true},{"ID":277,"Name":"internal.metrics.input.recordsRead","Update":52,"Value":207,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":2176000,"Executor Run Time":21,"Executor CPU Time":12442000,"Peak Execution Memory":0,"Result Size":1750,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":520,"Shuffle Write Time":6453916,"Shuffle Records Written":52},"Input Metrics":{"Bytes Read":0,"Records Read":52},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":6,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":2,"Attempt":0,"Partition ID":2,"Launch Time":1642039528358,"Executor ID":"driver","Host":"172.22.200.52","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039528388,"Failed":false,"Killed":false,"Accumulables":[{"ID":217,"Name":"number of output rows","Update":"51","Value":"258","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":242,"Name":"data size","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":251,"Name":"shuffle bytes written","Update":"520","Value":"2600","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":252,"Name":"shuffle records written","Update":"51","Value":"258","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":253,"Name":"shuffle write time","Update":"6818330","Value":"32629581","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":254,"Name":"duration","Update":"1","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":255,"Name":"internal.metrics.executorDeserializeTime","Update":3,"Value":13,"Internal":true,"Count Failed Values":true},{"ID":256,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2186000,"Value":10766000,"Internal":true,"Count Failed Values":true},{"ID":257,"Name":"internal.metrics.executorRunTime","Update":23,"Value":101,"Internal":true,"Count Failed Values":true},{"ID":258,"Name":"internal.metrics.executorCpuTime","Update":10929000,"Value":60059000,"Internal":true,"Count Failed Values":true},{"ID":259,"Name":"internal.metrics.resultSize","Update":1750,"Value":8750,"Internal":true,"Count Failed Values":true},{"ID":273,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":520,"Value":2600,"Internal":true,"Count Failed Values":true},{"ID":274,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":51,"Value":258,"Internal":true,"Count Failed Values":true},{"ID":275,"Name":"internal.metrics.shuffle.write.writeTime","Update":6818330,"Value":32629581,"Internal":true,"Count Failed Values":true},{"ID":277,"Name":"internal.metrics.input.recordsRead","Update":51,"Value":258,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":3,"Executor Deserialize CPU Time":2186000,"Executor Run Time":23,"Executor CPU Time":10929000,"Peak Execution Memory":0,"Result Size":1750,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":520,"Shuffle Write Time":6818330,"Shuffle Records Written":51},"Input Metrics":{"Bytes Read":0,"Records Read":51},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":6,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":5,"RDD Info":[{"RDD ID":15,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"28\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[14],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":14,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"28\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[13],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":12,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"29\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[11],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":11,"Name":"ParallelCollectionRDD","Scope":"{\"id\":\"29\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":13,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"29\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[12],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line16.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line16.$read$$iw$$iw$$iw$$iw.(:33)\n$line16.$read$$iw$$iw$$iw.(:35)\n$line16.$read$$iw$$iw.(:37)\n$line16.$read$$iw.(:39)\n$line16.$read.(:41)\n$line16.$read$.(:45)\n$line16.$read$.()\n$line16.$eval$.$print$lzycompute(:7)\n$line16.$eval$.$print(:6)\n$line16.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Submission Time":1642039528324,"Completion Time":1642039528388,"Accumulables":[{"ID":217,"Name":"number of output rows","Value":"258","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":242,"Name":"data size","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":251,"Name":"shuffle bytes written","Value":"2600","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":252,"Name":"shuffle records written","Value":"258","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":253,"Name":"shuffle write time","Value":"32629581","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":254,"Name":"duration","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":255,"Name":"internal.metrics.executorDeserializeTime","Value":13,"Internal":true,"Count Failed Values":true},{"ID":256,"Name":"internal.metrics.executorDeserializeCpuTime","Value":10766000,"Internal":true,"Count Failed Values":true},{"ID":257,"Name":"internal.metrics.executorRunTime","Value":101,"Internal":true,"Count Failed Values":true},{"ID":258,"Name":"internal.metrics.executorCpuTime","Value":60059000,"Internal":true,"Count Failed Values":true},{"ID":259,"Name":"internal.metrics.resultSize","Value":8750,"Internal":true,"Count Failed Values":true},{"ID":273,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":2600,"Internal":true,"Count Failed Values":true},{"ID":274,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":258,"Internal":true,"Count Failed Values":true},{"ID":275,"Name":"internal.metrics.shuffle.write.writeTime","Value":32629581,"Internal":true,"Count Failed Values":true},{"ID":277,"Name":"internal.metrics.input.recordsRead","Value":258,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}} +{"Event":"SparkListenerJobEnd","Job ID":3,"Completion Time":1642039528389,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate","executionId":1,"physicalPlanDescription":"== Physical Plan ==\nAdaptiveSparkPlan (13)\n+- == Current Plan ==\n HashAggregate (8)\n +- ShuffleQueryStage (7)\n +- Exchange (6)\n +- * HashAggregate (5)\n +- ShuffleQueryStage (4)\n +- Exchange (3)\n +- * Project (2)\n +- * Range (1)\n+- == Initial Plan ==\n HashAggregate (12)\n +- Exchange (11)\n +- HashAggregate (10)\n +- Exchange (9)\n +- Project (2)\n +- Range (1)\n\n\n(1) Range [codegen id : 1]\nOutput [1]: [id#10L]\nArguments: Range (0, 3347, step=13, splits=Some(5))\n\n(2) Project [codegen id : 1]\nOutput: []\nInput [1]: [id#10L]\n\n(3) Exchange\nInput: []\nArguments: RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#89]\n\n(4) ShuffleQueryStage\nOutput: []\nArguments: 0\n\n(5) HashAggregate [codegen id : 2]\nInput: []\nKeys: []\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count#18L]\nResults [1]: [count#19L]\n\n(6) Exchange\nInput [1]: [count#19L]\nArguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#110]\n\n(7) ShuffleQueryStage\nOutput [1]: [count#19L]\nArguments: 1\n\n(8) HashAggregate\nInput [1]: [count#19L]\nKeys: []\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#15L]\nResults [1]: [count(1)#15L AS count#16L]\n\n(9) Exchange\nInput: []\nArguments: RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#76]\n\n(10) HashAggregate\nInput: []\nKeys: []\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count#18L]\nResults [1]: [count#19L]\n\n(11) Exchange\nInput [1]: [count#19L]\nArguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#80]\n\n(12) HashAggregate\nInput [1]: [count#19L]\nKeys: []\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#15L]\nResults [1]: [count(1)#15L AS count#16L]\n\n(13) AdaptiveSparkPlan\nOutput [1]: [count#16L]\nArguments: isFinalPlan=false\n\n","sparkPlanInfo":{"nodeName":"AdaptiveSparkPlan","simpleString":"AdaptiveSparkPlan isFinalPlan=false","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[], functions=[count(1)])","children":[{"nodeName":"ShuffleQueryStage","simpleString":"ShuffleQueryStage 1","children":[{"nodeName":"Exchange","simpleString":"Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#110]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[], functions=[partial_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"ShuffleQueryStage","simpleString":"ShuffleQueryStage 0","children":[{"nodeName":"Exchange","simpleString":"Exchange RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#89]","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project","children":[{"nodeName":"Range","simpleString":"Range (0, 3347, step=13, splits=5)","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":217,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":254,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":252,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":253,"metricType":"nsTiming"},{"name":"records read","accumulatorId":250,"metricType":"sum"},{"name":"local bytes read","accumulatorId":248,"metricType":"size"},{"name":"fetch wait time","accumulatorId":249,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":246,"metricType":"size"},{"name":"local blocks read","accumulatorId":245,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":244,"metricType":"sum"},{"name":"data size","accumulatorId":242,"metricType":"size"},{"name":"number of partitions","accumulatorId":243,"metricType":"sum"},{"name":"remote bytes read to disk","accumulatorId":247,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":251,"metricType":"size"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":301,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":302,"metricType":"timing"},{"name":"peak memory","accumulatorId":300,"metricType":"size"},{"name":"number of output rows","accumulatorId":299,"metricType":"sum"},{"name":"number of sort fallback tasks","accumulatorId":304,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":303,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":298,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":296,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":297,"metricType":"nsTiming"},{"name":"records read","accumulatorId":294,"metricType":"sum"},{"name":"local bytes read","accumulatorId":292,"metricType":"size"},{"name":"fetch wait time","accumulatorId":293,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":290,"metricType":"size"},{"name":"local blocks read","accumulatorId":289,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":288,"metricType":"sum"},{"name":"data size","accumulatorId":286,"metricType":"size"},{"name":"number of partitions","accumulatorId":287,"metricType":"sum"},{"name":"remote bytes read to disk","accumulatorId":291,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":295,"metricType":"size"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":282,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":283,"metricType":"timing"},{"name":"peak memory","accumulatorId":281,"metricType":"size"},{"name":"number of output rows","accumulatorId":280,"metricType":"sum"},{"name":"number of sort fallback tasks","accumulatorId":285,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":284,"metricType":"average"}]}],"metadata":{},"metrics":[]}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerDriverAccumUpdates","executionId":1,"accumUpdates":[[287,1]]} +{"Event":"SparkListenerJobStart","Job ID":4,"Submission Time":1642039528402,"Stage Infos":[{"Stage ID":7,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":5,"RDD Info":[{"RDD ID":15,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"28\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[14],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":14,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"28\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[13],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":12,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"29\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[11],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":11,"Name":"ParallelCollectionRDD","Scope":"{\"id\":\"29\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":13,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"29\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[12],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line16.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line16.$read$$iw$$iw$$iw$$iw.(:33)\n$line16.$read$$iw$$iw$$iw.(:35)\n$line16.$read$$iw$$iw.(:37)\n$line16.$read$$iw.(:39)\n$line16.$read.(:41)\n$line16.$read$.(:45)\n$line16.$read$.()\n$line16.$eval$.$print$lzycompute(:7)\n$line16.$eval$.$print(:6)\n$line16.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":8,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":10,"RDD Info":[{"RDD ID":18,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"32\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[17],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":16,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"37\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[15],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":17,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"33\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"count at :23","Parent IDs":[16],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[7],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line16.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line16.$read$$iw$$iw$$iw$$iw.(:33)\n$line16.$read$$iw$$iw$$iw.(:35)\n$line16.$read$$iw$$iw.(:37)\n$line16.$read$$iw.(:39)\n$line16.$read.(:41)\n$line16.$read$.(:45)\n$line16.$read$.()\n$line16.$eval$.$print$lzycompute(:7)\n$line16.$eval$.$print(:6)\n$line16.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[7,8],"Properties":{"spark.sql.warehouse.dir":"file:/Users/lijunqing/Code/stczwd/spark/dist/spark-warehouse","spark.executor.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"172.22.200.52","spark.eventLog.enabled":"true","spark.driver.port":"61038","__fetch_continuous_blocks_in_batch_enabled":"true","spark.repl.class.uri":"spark://172.22.200.52:61038/classes","spark.jars":"","spark.repl.class.outputDir":"/private/var/folders/dm/1vhcj_l97j146n6mgr2cm9rw0000gp/T/spark-501ae231-0cb9-4ba2-845f-3fc3cb053141/repl-054c2c94-f7a2-4f4b-9f12-96a1cdb15bc6","spark.app.name":"Spark shell","spark.rdd.scope":"{\"id\":\"32\",\"name\":\"Exchange\"}","spark.rdd.scope.noOverride":"true","spark.submit.pyFiles":"","spark.ui.showConsoleProgress":"true","spark.app.startTime":"1642039450519","spark.executor.id":"driver","spark.driver.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"client","spark.master":"local[*]","spark.home":"/Users/lijunqing/Code/stczwd/spark/dist","spark.eventLog.dir":"/Users/lijunqing/Code/stczwd/spark/dist/eventLog","spark.sql.execution.id":"1","spark.sql.catalogImplementation":"hive","spark.app.id":"local-1642039451826"}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":8,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":10,"RDD Info":[{"RDD ID":18,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"32\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[17],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":16,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"37\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[15],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":17,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"33\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"count at :23","Parent IDs":[16],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[7],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line16.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line16.$read$$iw$$iw$$iw$$iw.(:33)\n$line16.$read$$iw$$iw$$iw.(:35)\n$line16.$read$$iw$$iw.(:37)\n$line16.$read$$iw.(:39)\n$line16.$read.(:41)\n$line16.$read$.(:45)\n$line16.$read$.()\n$line16.$eval$.$print$lzycompute(:7)\n$line16.$eval$.$print(:6)\n$line16.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Submission Time":1642039528404,"Accumulables":[],"Resource Profile Id":0},"Properties":{"spark.sql.warehouse.dir":"file:/Users/lijunqing/Code/stczwd/spark/dist/spark-warehouse","spark.executor.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"172.22.200.52","spark.eventLog.enabled":"true","spark.driver.port":"61038","__fetch_continuous_blocks_in_batch_enabled":"true","spark.repl.class.uri":"spark://172.22.200.52:61038/classes","spark.jars":"","spark.repl.class.outputDir":"/private/var/folders/dm/1vhcj_l97j146n6mgr2cm9rw0000gp/T/spark-501ae231-0cb9-4ba2-845f-3fc3cb053141/repl-054c2c94-f7a2-4f4b-9f12-96a1cdb15bc6","spark.app.name":"Spark shell","spark.rdd.scope":"{\"id\":\"32\",\"name\":\"Exchange\"}","spark.rdd.scope.noOverride":"true","spark.submit.pyFiles":"","spark.ui.showConsoleProgress":"true","spark.app.startTime":"1642039450519","spark.executor.id":"driver","spark.driver.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"client","spark.master":"local[*]","spark.home":"/Users/lijunqing/Code/stczwd/spark/dist","spark.eventLog.dir":"/Users/lijunqing/Code/stczwd/spark/dist/eventLog","spark.sql.execution.id":"1","spark.sql.catalogImplementation":"hive","spark.app.id":"local-1642039451826"}} +{"Event":"SparkListenerTaskStart","Stage ID":8,"Stage Attempt ID":0,"Task Info":{"Task ID":24,"Index":0,"Attempt":0,"Partition ID":0,"Launch Time":1642039528410,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":8,"Stage Attempt ID":0,"Task Info":{"Task ID":25,"Index":1,"Attempt":0,"Partition ID":1,"Launch Time":1642039528410,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":8,"Stage Attempt ID":0,"Task Info":{"Task ID":26,"Index":2,"Attempt":0,"Partition ID":2,"Launch Time":1642039528411,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":8,"Stage Attempt ID":0,"Task Info":{"Task ID":27,"Index":3,"Attempt":0,"Partition ID":3,"Launch Time":1642039528411,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":8,"Stage Attempt ID":0,"Task Info":{"Task ID":28,"Index":4,"Attempt":0,"Partition ID":4,"Launch Time":1642039528411,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":8,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":5,"Attempt":0,"Partition ID":5,"Launch Time":1642039528411,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":8,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":6,"Attempt":0,"Partition ID":6,"Launch Time":1642039528411,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":8,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":7,"Attempt":0,"Partition ID":7,"Launch Time":1642039528411,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":8,"Stage Attempt ID":0,"Task Info":{"Task ID":32,"Index":8,"Attempt":0,"Partition ID":8,"Launch Time":1642039528425,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskStart","Stage ID":8,"Stage Attempt ID":0,"Task Info":{"Task ID":33,"Index":9,"Attempt":0,"Partition ID":9,"Launch Time":1642039528426,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":8,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":1,"Attempt":0,"Partition ID":1,"Launch Time":1642039528410,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039528427,"Failed":false,"Killed":false,"Accumulables":[{"ID":245,"Name":"local blocks read","Update":"5","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":248,"Name":"local bytes read","Update":"260","Value":"260","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":249,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":250,"Name":"records read","Update":"26","Value":"26","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":286,"Name":"data size","Update":"16","Value":"16","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":295,"Name":"shuffle bytes written","Update":"59","Value":"59","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":296,"Name":"shuffle records written","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":297,"Name":"shuffle write time","Update":"621958","Value":"621958","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":298,"Name":"duration","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":299,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":302,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":305,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":306,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1997000,"Value":1997000,"Internal":true,"Count Failed Values":true},{"ID":307,"Name":"internal.metrics.executorRunTime","Update":9,"Value":9,"Internal":true,"Count Failed Values":true},{"ID":308,"Name":"internal.metrics.executorCpuTime","Update":5764000,"Value":5764000,"Internal":true,"Count Failed Values":true},{"ID":309,"Name":"internal.metrics.resultSize","Update":2848,"Value":2848,"Internal":true,"Count Failed Values":true},{"ID":316,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":317,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":5,"Value":5,"Internal":true,"Count Failed Values":true},{"ID":318,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":319,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":320,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":260,"Value":260,"Internal":true,"Count Failed Values":true},{"ID":321,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":322,"Name":"internal.metrics.shuffle.read.recordsRead","Update":26,"Value":26,"Internal":true,"Count Failed Values":true},{"ID":323,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":59,"Internal":true,"Count Failed Values":true},{"ID":324,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":325,"Name":"internal.metrics.shuffle.write.writeTime","Update":621958,"Value":621958,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1997000,"Executor Run Time":9,"Executor CPU Time":5764000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":5,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":260,"Total Records Read":26},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":621958,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":8,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":6,"Attempt":0,"Partition ID":6,"Launch Time":1642039528411,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039528427,"Failed":false,"Killed":false,"Accumulables":[{"ID":245,"Name":"local blocks read","Update":"5","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":248,"Name":"local bytes read","Update":"260","Value":"520","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":249,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":250,"Name":"records read","Update":"27","Value":"53","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":286,"Name":"data size","Update":"16","Value":"32","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":295,"Name":"shuffle bytes written","Update":"59","Value":"118","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":296,"Name":"shuffle records written","Update":"1","Value":"2","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":297,"Name":"shuffle write time","Update":"945999","Value":"1567957","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":298,"Name":"duration","Update":"1","Value":"2","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":299,"Name":"number of output rows","Update":"1","Value":"2","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":302,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":305,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":4,"Internal":true,"Count Failed Values":true},{"ID":306,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2043000,"Value":4040000,"Internal":true,"Count Failed Values":true},{"ID":307,"Name":"internal.metrics.executorRunTime","Update":10,"Value":19,"Internal":true,"Count Failed Values":true},{"ID":308,"Name":"internal.metrics.executorCpuTime","Update":6022000,"Value":11786000,"Internal":true,"Count Failed Values":true},{"ID":309,"Name":"internal.metrics.resultSize","Update":2848,"Value":5696,"Internal":true,"Count Failed Values":true},{"ID":316,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":317,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":5,"Value":10,"Internal":true,"Count Failed Values":true},{"ID":318,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":319,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":320,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":260,"Value":520,"Internal":true,"Count Failed Values":true},{"ID":321,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":322,"Name":"internal.metrics.shuffle.read.recordsRead","Update":27,"Value":53,"Internal":true,"Count Failed Values":true},{"ID":323,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":118,"Internal":true,"Count Failed Values":true},{"ID":324,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":325,"Name":"internal.metrics.shuffle.write.writeTime","Update":945999,"Value":1567957,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":2043000,"Executor Run Time":10,"Executor CPU Time":6022000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":5,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":260,"Total Records Read":27},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":945999,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":8,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":0,"Attempt":0,"Partition ID":0,"Launch Time":1642039528410,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039528427,"Failed":false,"Killed":false,"Accumulables":[{"ID":245,"Name":"local blocks read","Update":"5","Value":"15","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":248,"Name":"local bytes read","Update":"260","Value":"780","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":249,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":250,"Name":"records read","Update":"25","Value":"78","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":286,"Name":"data size","Update":"16","Value":"48","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":295,"Name":"shuffle bytes written","Update":"59","Value":"177","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":296,"Name":"shuffle records written","Update":"1","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":297,"Name":"shuffle write time","Update":"489459","Value":"2057416","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":298,"Name":"duration","Update":"0","Value":"2","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":299,"Name":"number of output rows","Update":"1","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":302,"Name":"time in aggregation build","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":305,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":8,"Internal":true,"Count Failed Values":true},{"ID":306,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2389000,"Value":6429000,"Internal":true,"Count Failed Values":true},{"ID":307,"Name":"internal.metrics.executorRunTime","Update":7,"Value":26,"Internal":true,"Count Failed Values":true},{"ID":308,"Name":"internal.metrics.executorCpuTime","Update":4527000,"Value":16313000,"Internal":true,"Count Failed Values":true},{"ID":309,"Name":"internal.metrics.resultSize","Update":2848,"Value":8544,"Internal":true,"Count Failed Values":true},{"ID":316,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":317,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":5,"Value":15,"Internal":true,"Count Failed Values":true},{"ID":318,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":319,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":320,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":260,"Value":780,"Internal":true,"Count Failed Values":true},{"ID":321,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":322,"Name":"internal.metrics.shuffle.read.recordsRead","Update":25,"Value":78,"Internal":true,"Count Failed Values":true},{"ID":323,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":177,"Internal":true,"Count Failed Values":true},{"ID":324,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":325,"Name":"internal.metrics.shuffle.write.writeTime","Update":489459,"Value":2057416,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":2389000,"Executor Run Time":7,"Executor CPU Time":4527000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":5,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":260,"Total Records Read":25},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":489459,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":8,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":7,"Attempt":0,"Partition ID":7,"Launch Time":1642039528411,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039528432,"Failed":false,"Killed":false,"Accumulables":[{"ID":245,"Name":"local blocks read","Update":"5","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":248,"Name":"local bytes read","Update":"260","Value":"1040","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":249,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":250,"Name":"records read","Update":"26","Value":"104","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":286,"Name":"data size","Update":"16","Value":"64","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":295,"Name":"shuffle bytes written","Update":"59","Value":"236","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":296,"Name":"shuffle records written","Update":"1","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":297,"Name":"shuffle write time","Update":"1090458","Value":"3147874","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":298,"Name":"duration","Update":"2","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":299,"Name":"number of output rows","Update":"1","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":302,"Name":"time in aggregation build","Update":"2","Value":"2","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":305,"Name":"internal.metrics.executorDeserializeTime","Update":5,"Value":13,"Internal":true,"Count Failed Values":true},{"ID":306,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2056000,"Value":8485000,"Internal":true,"Count Failed Values":true},{"ID":307,"Name":"internal.metrics.executorRunTime","Update":12,"Value":38,"Internal":true,"Count Failed Values":true},{"ID":308,"Name":"internal.metrics.executorCpuTime","Update":5051000,"Value":21364000,"Internal":true,"Count Failed Values":true},{"ID":309,"Name":"internal.metrics.resultSize","Update":2848,"Value":11392,"Internal":true,"Count Failed Values":true},{"ID":316,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":317,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":5,"Value":20,"Internal":true,"Count Failed Values":true},{"ID":318,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":319,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":320,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":260,"Value":1040,"Internal":true,"Count Failed Values":true},{"ID":321,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":322,"Name":"internal.metrics.shuffle.read.recordsRead","Update":26,"Value":104,"Internal":true,"Count Failed Values":true},{"ID":323,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":236,"Internal":true,"Count Failed Values":true},{"ID":324,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":4,"Internal":true,"Count Failed Values":true},{"ID":325,"Name":"internal.metrics.shuffle.write.writeTime","Update":1090458,"Value":3147874,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":5,"Executor Deserialize CPU Time":2056000,"Executor Run Time":12,"Executor CPU Time":5051000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":5,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":260,"Total Records Read":26},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":1090458,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":8,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":3,"Attempt":0,"Partition ID":3,"Launch Time":1642039528411,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039528432,"Failed":false,"Killed":false,"Accumulables":[{"ID":245,"Name":"local blocks read","Update":"5","Value":"25","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":248,"Name":"local bytes read","Update":"260","Value":"1300","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":249,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":250,"Name":"records read","Update":"26","Value":"130","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":286,"Name":"data size","Update":"16","Value":"80","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":295,"Name":"shuffle bytes written","Update":"59","Value":"295","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":296,"Name":"shuffle records written","Update":"1","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":297,"Name":"shuffle write time","Update":"857626","Value":"4005500","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":298,"Name":"duration","Update":"1","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":299,"Name":"number of output rows","Update":"1","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":302,"Name":"time in aggregation build","Update":"1","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":305,"Name":"internal.metrics.executorDeserializeTime","Update":7,"Value":20,"Internal":true,"Count Failed Values":true},{"ID":306,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2320000,"Value":10805000,"Internal":true,"Count Failed Values":true},{"ID":307,"Name":"internal.metrics.executorRunTime","Update":7,"Value":45,"Internal":true,"Count Failed Values":true},{"ID":308,"Name":"internal.metrics.executorCpuTime","Update":5718000,"Value":27082000,"Internal":true,"Count Failed Values":true},{"ID":309,"Name":"internal.metrics.resultSize","Update":2848,"Value":14240,"Internal":true,"Count Failed Values":true},{"ID":316,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":317,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":5,"Value":25,"Internal":true,"Count Failed Values":true},{"ID":318,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":319,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":320,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":260,"Value":1300,"Internal":true,"Count Failed Values":true},{"ID":321,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":322,"Name":"internal.metrics.shuffle.read.recordsRead","Update":26,"Value":130,"Internal":true,"Count Failed Values":true},{"ID":323,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":295,"Internal":true,"Count Failed Values":true},{"ID":324,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":5,"Internal":true,"Count Failed Values":true},{"ID":325,"Name":"internal.metrics.shuffle.write.writeTime","Update":857626,"Value":4005500,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":7,"Executor Deserialize CPU Time":2320000,"Executor Run Time":7,"Executor CPU Time":5718000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":5,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":260,"Total Records Read":26},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":857626,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":8,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":5,"Attempt":0,"Partition ID":5,"Launch Time":1642039528411,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039528433,"Failed":false,"Killed":false,"Accumulables":[{"ID":245,"Name":"local blocks read","Update":"5","Value":"30","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":248,"Name":"local bytes read","Update":"260","Value":"1560","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":249,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":250,"Name":"records read","Update":"26","Value":"156","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":286,"Name":"data size","Update":"16","Value":"96","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":295,"Name":"shuffle bytes written","Update":"59","Value":"354","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":296,"Name":"shuffle records written","Update":"1","Value":"6","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":297,"Name":"shuffle write time","Update":"498790","Value":"4504290","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":298,"Name":"duration","Update":"0","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":299,"Name":"number of output rows","Update":"1","Value":"6","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":302,"Name":"time in aggregation build","Update":"0","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":305,"Name":"internal.metrics.executorDeserializeTime","Update":6,"Value":26,"Internal":true,"Count Failed Values":true},{"ID":306,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1613000,"Value":12418000,"Internal":true,"Count Failed Values":true},{"ID":307,"Name":"internal.metrics.executorRunTime","Update":11,"Value":56,"Internal":true,"Count Failed Values":true},{"ID":308,"Name":"internal.metrics.executorCpuTime","Update":4648000,"Value":31730000,"Internal":true,"Count Failed Values":true},{"ID":309,"Name":"internal.metrics.resultSize","Update":2848,"Value":17088,"Internal":true,"Count Failed Values":true},{"ID":316,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":317,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":5,"Value":30,"Internal":true,"Count Failed Values":true},{"ID":318,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":319,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":320,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":260,"Value":1560,"Internal":true,"Count Failed Values":true},{"ID":321,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":322,"Name":"internal.metrics.shuffle.read.recordsRead","Update":26,"Value":156,"Internal":true,"Count Failed Values":true},{"ID":323,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":354,"Internal":true,"Count Failed Values":true},{"ID":324,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":6,"Internal":true,"Count Failed Values":true},{"ID":325,"Name":"internal.metrics.shuffle.write.writeTime","Update":498790,"Value":4504290,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":6,"Executor Deserialize CPU Time":1613000,"Executor Run Time":11,"Executor CPU Time":4648000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":5,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":260,"Total Records Read":26},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":498790,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":8,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":2,"Attempt":0,"Partition ID":2,"Launch Time":1642039528411,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039528435,"Failed":false,"Killed":false,"Accumulables":[{"ID":245,"Name":"local blocks read","Update":"5","Value":"35","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":248,"Name":"local bytes read","Update":"260","Value":"1820","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":249,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":250,"Name":"records read","Update":"25","Value":"181","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":286,"Name":"data size","Update":"16","Value":"112","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":295,"Name":"shuffle bytes written","Update":"59","Value":"413","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":296,"Name":"shuffle records written","Update":"1","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":297,"Name":"shuffle write time","Update":"877625","Value":"5381915","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":298,"Name":"duration","Update":"4","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":299,"Name":"number of output rows","Update":"1","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":302,"Name":"time in aggregation build","Update":"4","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":305,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":28,"Internal":true,"Count Failed Values":true},{"ID":306,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2226000,"Value":14644000,"Internal":true,"Count Failed Values":true},{"ID":307,"Name":"internal.metrics.executorRunTime","Update":19,"Value":75,"Internal":true,"Count Failed Values":true},{"ID":308,"Name":"internal.metrics.executorCpuTime","Update":5887000,"Value":37617000,"Internal":true,"Count Failed Values":true},{"ID":309,"Name":"internal.metrics.resultSize","Update":2848,"Value":19936,"Internal":true,"Count Failed Values":true},{"ID":316,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":317,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":5,"Value":35,"Internal":true,"Count Failed Values":true},{"ID":318,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":319,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":320,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":260,"Value":1820,"Internal":true,"Count Failed Values":true},{"ID":321,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":322,"Name":"internal.metrics.shuffle.read.recordsRead","Update":25,"Value":181,"Internal":true,"Count Failed Values":true},{"ID":323,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":413,"Internal":true,"Count Failed Values":true},{"ID":324,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":7,"Internal":true,"Count Failed Values":true},{"ID":325,"Name":"internal.metrics.shuffle.write.writeTime","Update":877625,"Value":5381915,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":2226000,"Executor Run Time":19,"Executor CPU Time":5887000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":5,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":260,"Total Records Read":25},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":877625,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":8,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":32,"Index":8,"Attempt":0,"Partition ID":8,"Launch Time":1642039528425,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039528435,"Failed":false,"Killed":false,"Accumulables":[{"ID":245,"Name":"local blocks read","Update":"5","Value":"40","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":248,"Name":"local bytes read","Update":"260","Value":"2080","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":249,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":250,"Name":"records read","Update":"25","Value":"206","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":286,"Name":"data size","Update":"16","Value":"128","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":295,"Name":"shuffle bytes written","Update":"59","Value":"472","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":296,"Name":"shuffle records written","Update":"1","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":297,"Name":"shuffle write time","Update":"574458","Value":"5956373","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":298,"Name":"duration","Update":"0","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":299,"Name":"number of output rows","Update":"1","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":302,"Name":"time in aggregation build","Update":"0","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":305,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":30,"Internal":true,"Count Failed Values":true},{"ID":306,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1749000,"Value":16393000,"Internal":true,"Count Failed Values":true},{"ID":307,"Name":"internal.metrics.executorRunTime","Update":5,"Value":80,"Internal":true,"Count Failed Values":true},{"ID":308,"Name":"internal.metrics.executorCpuTime","Update":5125000,"Value":42742000,"Internal":true,"Count Failed Values":true},{"ID":309,"Name":"internal.metrics.resultSize","Update":2848,"Value":22784,"Internal":true,"Count Failed Values":true},{"ID":316,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":317,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":5,"Value":40,"Internal":true,"Count Failed Values":true},{"ID":318,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":319,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":320,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":260,"Value":2080,"Internal":true,"Count Failed Values":true},{"ID":321,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":322,"Name":"internal.metrics.shuffle.read.recordsRead","Update":25,"Value":206,"Internal":true,"Count Failed Values":true},{"ID":323,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":472,"Internal":true,"Count Failed Values":true},{"ID":324,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":8,"Internal":true,"Count Failed Values":true},{"ID":325,"Name":"internal.metrics.shuffle.write.writeTime","Update":574458,"Value":5956373,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1749000,"Executor Run Time":5,"Executor CPU Time":5125000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":5,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":260,"Total Records Read":25},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":574458,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":8,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":4,"Attempt":0,"Partition ID":4,"Launch Time":1642039528411,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039528436,"Failed":false,"Killed":false,"Accumulables":[{"ID":245,"Name":"local blocks read","Update":"5","Value":"45","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":248,"Name":"local bytes read","Update":"260","Value":"2340","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":249,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":250,"Name":"records read","Update":"26","Value":"232","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":286,"Name":"data size","Update":"16","Value":"144","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":295,"Name":"shuffle bytes written","Update":"59","Value":"531","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":296,"Name":"shuffle records written","Update":"1","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":297,"Name":"shuffle write time","Update":"695666","Value":"6652039","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":298,"Name":"duration","Update":"1","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":299,"Name":"number of output rows","Update":"1","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":302,"Name":"time in aggregation build","Update":"0","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":305,"Name":"internal.metrics.executorDeserializeTime","Update":5,"Value":35,"Internal":true,"Count Failed Values":true},{"ID":306,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1966000,"Value":18359000,"Internal":true,"Count Failed Values":true},{"ID":307,"Name":"internal.metrics.executorRunTime","Update":16,"Value":96,"Internal":true,"Count Failed Values":true},{"ID":308,"Name":"internal.metrics.executorCpuTime","Update":5508000,"Value":48250000,"Internal":true,"Count Failed Values":true},{"ID":309,"Name":"internal.metrics.resultSize","Update":2848,"Value":25632,"Internal":true,"Count Failed Values":true},{"ID":316,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":317,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":5,"Value":45,"Internal":true,"Count Failed Values":true},{"ID":318,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":319,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":320,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":260,"Value":2340,"Internal":true,"Count Failed Values":true},{"ID":321,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":322,"Name":"internal.metrics.shuffle.read.recordsRead","Update":26,"Value":232,"Internal":true,"Count Failed Values":true},{"ID":323,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":531,"Internal":true,"Count Failed Values":true},{"ID":324,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":9,"Internal":true,"Count Failed Values":true},{"ID":325,"Name":"internal.metrics.shuffle.write.writeTime","Update":695666,"Value":6652039,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":5,"Executor Deserialize CPU Time":1966000,"Executor Run Time":16,"Executor CPU Time":5508000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":5,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":260,"Total Records Read":26},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":695666,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":8,"Stage Attempt ID":0,"Task Type":"ShuffleMapTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":33,"Index":9,"Attempt":0,"Partition ID":9,"Launch Time":1642039528426,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039528437,"Failed":false,"Killed":false,"Accumulables":[{"ID":245,"Name":"local blocks read","Update":"5","Value":"50","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":248,"Name":"local bytes read","Update":"260","Value":"2600","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":249,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":250,"Name":"records read","Update":"26","Value":"258","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":286,"Name":"data size","Update":"16","Value":"160","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":295,"Name":"shuffle bytes written","Update":"59","Value":"590","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":296,"Name":"shuffle records written","Update":"1","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":297,"Name":"shuffle write time","Update":"535084","Value":"7187123","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":298,"Name":"duration","Update":"0","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":299,"Name":"number of output rows","Update":"1","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":302,"Name":"time in aggregation build","Update":"0","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":305,"Name":"internal.metrics.executorDeserializeTime","Update":4,"Value":39,"Internal":true,"Count Failed Values":true},{"ID":306,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1688000,"Value":20047000,"Internal":true,"Count Failed Values":true},{"ID":307,"Name":"internal.metrics.executorRunTime","Update":4,"Value":100,"Internal":true,"Count Failed Values":true},{"ID":308,"Name":"internal.metrics.executorCpuTime","Update":4639000,"Value":52889000,"Internal":true,"Count Failed Values":true},{"ID":309,"Name":"internal.metrics.resultSize","Update":2848,"Value":28480,"Internal":true,"Count Failed Values":true},{"ID":316,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":317,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":5,"Value":50,"Internal":true,"Count Failed Values":true},{"ID":318,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":319,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":320,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":260,"Value":2600,"Internal":true,"Count Failed Values":true},{"ID":321,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":322,"Name":"internal.metrics.shuffle.read.recordsRead","Update":26,"Value":258,"Internal":true,"Count Failed Values":true},{"ID":323,"Name":"internal.metrics.shuffle.write.bytesWritten","Update":59,"Value":590,"Internal":true,"Count Failed Values":true},{"ID":324,"Name":"internal.metrics.shuffle.write.recordsWritten","Update":1,"Value":10,"Internal":true,"Count Failed Values":true},{"ID":325,"Name":"internal.metrics.shuffle.write.writeTime","Update":535084,"Value":7187123,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":4,"Executor Deserialize CPU Time":1688000,"Executor Run Time":4,"Executor CPU Time":4639000,"Peak Execution Memory":0,"Result Size":2848,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":5,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":260,"Total Records Read":26},"Shuffle Write Metrics":{"Shuffle Bytes Written":59,"Shuffle Write Time":535084,"Shuffle Records Written":1},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":8,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":10,"RDD Info":[{"RDD ID":18,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"32\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[17],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":16,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"37\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[15],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":17,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"33\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"count at :23","Parent IDs":[16],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[7],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line16.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line16.$read$$iw$$iw$$iw$$iw.(:33)\n$line16.$read$$iw$$iw$$iw.(:35)\n$line16.$read$$iw$$iw.(:37)\n$line16.$read$$iw.(:39)\n$line16.$read.(:41)\n$line16.$read$.(:45)\n$line16.$read$.()\n$line16.$eval$.$print$lzycompute(:7)\n$line16.$eval$.$print(:6)\n$line16.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Submission Time":1642039528404,"Completion Time":1642039528438,"Accumulables":[{"ID":245,"Name":"local blocks read","Value":"50","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":248,"Name":"local bytes read","Value":"2600","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":249,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":250,"Name":"records read","Value":"258","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":286,"Name":"data size","Value":"160","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":295,"Name":"shuffle bytes written","Value":"590","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":296,"Name":"shuffle records written","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":297,"Name":"shuffle write time","Value":"7187123","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":298,"Name":"duration","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":299,"Name":"number of output rows","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":302,"Name":"time in aggregation build","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":305,"Name":"internal.metrics.executorDeserializeTime","Value":39,"Internal":true,"Count Failed Values":true},{"ID":306,"Name":"internal.metrics.executorDeserializeCpuTime","Value":20047000,"Internal":true,"Count Failed Values":true},{"ID":307,"Name":"internal.metrics.executorRunTime","Value":100,"Internal":true,"Count Failed Values":true},{"ID":308,"Name":"internal.metrics.executorCpuTime","Value":52889000,"Internal":true,"Count Failed Values":true},{"ID":309,"Name":"internal.metrics.resultSize","Value":28480,"Internal":true,"Count Failed Values":true},{"ID":316,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":317,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":50,"Internal":true,"Count Failed Values":true},{"ID":318,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":319,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":320,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":2600,"Internal":true,"Count Failed Values":true},{"ID":321,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":322,"Name":"internal.metrics.shuffle.read.recordsRead","Value":258,"Internal":true,"Count Failed Values":true},{"ID":323,"Name":"internal.metrics.shuffle.write.bytesWritten","Value":590,"Internal":true,"Count Failed Values":true},{"ID":324,"Name":"internal.metrics.shuffle.write.recordsWritten","Value":10,"Internal":true,"Count Failed Values":true},{"ID":325,"Name":"internal.metrics.shuffle.write.writeTime","Value":7187123,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}} +{"Event":"SparkListenerJobEnd","Job ID":4,"Completion Time":1642039528439,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate","executionId":1,"physicalPlanDescription":"== Physical Plan ==\nAdaptiveSparkPlan (13)\n+- == Final Plan ==\n * HashAggregate (8)\n +- ShuffleQueryStage (7)\n +- Exchange (6)\n +- * HashAggregate (5)\n +- ShuffleQueryStage (4)\n +- Exchange (3)\n +- * Project (2)\n +- * Range (1)\n+- == Initial Plan ==\n HashAggregate (12)\n +- Exchange (11)\n +- HashAggregate (10)\n +- Exchange (9)\n +- Project (2)\n +- Range (1)\n\n\n(1) Range [codegen id : 1]\nOutput [1]: [id#10L]\nArguments: Range (0, 3347, step=13, splits=Some(5))\n\n(2) Project [codegen id : 1]\nOutput: []\nInput [1]: [id#10L]\n\n(3) Exchange\nInput: []\nArguments: RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#89]\n\n(4) ShuffleQueryStage\nOutput: []\nArguments: 0\n\n(5) HashAggregate [codegen id : 2]\nInput: []\nKeys: []\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count#18L]\nResults [1]: [count#19L]\n\n(6) Exchange\nInput [1]: [count#19L]\nArguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#110]\n\n(7) ShuffleQueryStage\nOutput [1]: [count#19L]\nArguments: 1\n\n(8) HashAggregate [codegen id : 3]\nInput [1]: [count#19L]\nKeys: []\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#15L]\nResults [1]: [count(1)#15L AS count#16L]\n\n(9) Exchange\nInput: []\nArguments: RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#76]\n\n(10) HashAggregate\nInput: []\nKeys: []\nFunctions [1]: [partial_count(1)]\nAggregate Attributes [1]: [count#18L]\nResults [1]: [count#19L]\n\n(11) Exchange\nInput [1]: [count#19L]\nArguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#80]\n\n(12) HashAggregate\nInput [1]: [count#19L]\nKeys: []\nFunctions [1]: [count(1)]\nAggregate Attributes [1]: [count(1)#15L]\nResults [1]: [count(1)#15L AS count#16L]\n\n(13) AdaptiveSparkPlan\nOutput [1]: [count#16L]\nArguments: isFinalPlan=true\n\n","sparkPlanInfo":{"nodeName":"AdaptiveSparkPlan","simpleString":"AdaptiveSparkPlan isFinalPlan=true","children":[{"nodeName":"WholeStageCodegen (3)","simpleString":"WholeStageCodegen (3)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[], functions=[count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"ShuffleQueryStage","simpleString":"ShuffleQueryStage 1","children":[{"nodeName":"Exchange","simpleString":"Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#110]","children":[{"nodeName":"WholeStageCodegen (2)","simpleString":"WholeStageCodegen (2)","children":[{"nodeName":"HashAggregate","simpleString":"HashAggregate(keys=[], functions=[partial_count(1)])","children":[{"nodeName":"InputAdapter","simpleString":"InputAdapter","children":[{"nodeName":"ShuffleQueryStage","simpleString":"ShuffleQueryStage 0","children":[{"nodeName":"Exchange","simpleString":"Exchange RoundRobinPartitioning(10), REPARTITION_BY_NUM, [id=#89]","children":[{"nodeName":"WholeStageCodegen (1)","simpleString":"WholeStageCodegen (1)","children":[{"nodeName":"Project","simpleString":"Project","children":[{"nodeName":"Range","simpleString":"Range (0, 3347, step=13, splits=5)","children":[],"metadata":{},"metrics":[{"name":"number of output rows","accumulatorId":217,"metricType":"sum"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":254,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":252,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":253,"metricType":"nsTiming"},{"name":"records read","accumulatorId":250,"metricType":"sum"},{"name":"local bytes read","accumulatorId":248,"metricType":"size"},{"name":"fetch wait time","accumulatorId":249,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":246,"metricType":"size"},{"name":"local blocks read","accumulatorId":245,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":244,"metricType":"sum"},{"name":"data size","accumulatorId":242,"metricType":"size"},{"name":"number of partitions","accumulatorId":243,"metricType":"sum"},{"name":"remote bytes read to disk","accumulatorId":247,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":251,"metricType":"size"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":301,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":302,"metricType":"timing"},{"name":"peak memory","accumulatorId":300,"metricType":"size"},{"name":"number of output rows","accumulatorId":299,"metricType":"sum"},{"name":"number of sort fallback tasks","accumulatorId":304,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":303,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":298,"metricType":"timing"}]}],"metadata":{},"metrics":[{"name":"shuffle records written","accumulatorId":296,"metricType":"sum"},{"name":"shuffle write time","accumulatorId":297,"metricType":"nsTiming"},{"name":"records read","accumulatorId":294,"metricType":"sum"},{"name":"local bytes read","accumulatorId":292,"metricType":"size"},{"name":"fetch wait time","accumulatorId":293,"metricType":"timing"},{"name":"remote bytes read","accumulatorId":290,"metricType":"size"},{"name":"local blocks read","accumulatorId":289,"metricType":"sum"},{"name":"remote blocks read","accumulatorId":288,"metricType":"sum"},{"name":"data size","accumulatorId":286,"metricType":"size"},{"name":"number of partitions","accumulatorId":287,"metricType":"sum"},{"name":"remote bytes read to disk","accumulatorId":291,"metricType":"size"},{"name":"shuffle bytes written","accumulatorId":295,"metricType":"size"}]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[]}],"metadata":{},"metrics":[{"name":"spill size","accumulatorId":333,"metricType":"size"},{"name":"time in aggregation build","accumulatorId":334,"metricType":"timing"},{"name":"peak memory","accumulatorId":332,"metricType":"size"},{"name":"number of output rows","accumulatorId":331,"metricType":"sum"},{"name":"number of sort fallback tasks","accumulatorId":336,"metricType":"sum"},{"name":"avg hash probe bucket list iters","accumulatorId":335,"metricType":"average"}]}],"metadata":{},"metrics":[{"name":"duration","accumulatorId":330,"metricType":"timing"}]}],"metadata":{},"metrics":[]}} +{"Event":"SparkListenerJobStart","Job ID":5,"Submission Time":1642039528461,"Stage Infos":[{"Stage ID":9,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":5,"RDD Info":[{"RDD ID":15,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"28\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[14],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":14,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"28\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[13],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":12,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"29\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[11],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":11,"Name":"ParallelCollectionRDD","Scope":"{\"id\":\"29\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":13,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"29\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"count at :23","Parent IDs":[12],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"DETERMINATE","Number of Partitions":5,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line16.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line16.$read$$iw$$iw$$iw$$iw.(:33)\n$line16.$read$$iw$$iw$$iw.(:35)\n$line16.$read$$iw$$iw.(:37)\n$line16.$read$$iw.(:39)\n$line16.$read.(:41)\n$line16.$read$.(:45)\n$line16.$read$.()\n$line16.$eval$.$print$lzycompute(:7)\n$line16.$eval$.$print(:6)\n$line16.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":10,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":10,"RDD Info":[{"RDD ID":18,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"32\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[17],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":16,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"37\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[15],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":17,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"33\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"count at :23","Parent IDs":[16],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":10,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[9],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line16.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line16.$read$$iw$$iw$$iw$$iw.(:33)\n$line16.$read$$iw$$iw$$iw.(:35)\n$line16.$read$$iw$$iw.(:37)\n$line16.$read$$iw.(:39)\n$line16.$read.(:41)\n$line16.$read$.(:45)\n$line16.$read$.()\n$line16.$eval$.$print$lzycompute(:7)\n$line16.$eval$.$print(:6)\n$line16.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Accumulables":[],"Resource Profile Id":0},{"Stage ID":11,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":1,"RDD Info":[{"RDD ID":21,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"43\",\"name\":\"mapPartitionsInternal\"}","Callsite":"count at :23","Parent IDs":[20],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":20,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"38\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"count at :23","Parent IDs":[19],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":19,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"42\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[18],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[10],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line16.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line16.$read$$iw$$iw$$iw$$iw.(:33)\n$line16.$read$$iw$$iw$$iw.(:35)\n$line16.$read$$iw$$iw.(:37)\n$line16.$read$$iw.(:39)\n$line16.$read.(:41)\n$line16.$read$.(:45)\n$line16.$read$.()\n$line16.$eval$.$print$lzycompute(:7)\n$line16.$eval$.$print(:6)\n$line16.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Accumulables":[],"Resource Profile Id":0}],"Stage IDs":[9,10,11],"Properties":{"spark.sql.warehouse.dir":"file:/Users/lijunqing/Code/stczwd/spark/dist/spark-warehouse","spark.executor.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"172.22.200.52","spark.eventLog.enabled":"true","spark.driver.port":"61038","__fetch_continuous_blocks_in_batch_enabled":"true","spark.repl.class.uri":"spark://172.22.200.52:61038/classes","spark.jars":"","spark.repl.class.outputDir":"/private/var/folders/dm/1vhcj_l97j146n6mgr2cm9rw0000gp/T/spark-501ae231-0cb9-4ba2-845f-3fc3cb053141/repl-054c2c94-f7a2-4f4b-9f12-96a1cdb15bc6","spark.app.name":"Spark shell","spark.rdd.scope":"{\"id\":\"44\",\"name\":\"collect\"}","spark.rdd.scope.noOverride":"true","spark.submit.pyFiles":"","spark.ui.showConsoleProgress":"true","spark.app.startTime":"1642039450519","spark.executor.id":"driver","spark.driver.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"client","spark.master":"local[*]","spark.home":"/Users/lijunqing/Code/stczwd/spark/dist","spark.eventLog.dir":"/Users/lijunqing/Code/stczwd/spark/dist/eventLog","spark.sql.execution.id":"1","spark.sql.catalogImplementation":"hive","spark.app.id":"local-1642039451826"}} +{"Event":"SparkListenerStageSubmitted","Stage Info":{"Stage ID":11,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":1,"RDD Info":[{"RDD ID":21,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"43\",\"name\":\"mapPartitionsInternal\"}","Callsite":"count at :23","Parent IDs":[20],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":20,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"38\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"count at :23","Parent IDs":[19],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":19,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"42\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[18],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[10],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line16.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line16.$read$$iw$$iw$$iw$$iw.(:33)\n$line16.$read$$iw$$iw$$iw.(:35)\n$line16.$read$$iw$$iw.(:37)\n$line16.$read$$iw.(:39)\n$line16.$read.(:41)\n$line16.$read$.(:45)\n$line16.$read$.()\n$line16.$eval$.$print$lzycompute(:7)\n$line16.$eval$.$print(:6)\n$line16.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Submission Time":1642039528463,"Accumulables":[],"Resource Profile Id":0},"Properties":{"spark.sql.warehouse.dir":"file:/Users/lijunqing/Code/stczwd/spark/dist/spark-warehouse","spark.executor.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"172.22.200.52","spark.eventLog.enabled":"true","spark.driver.port":"61038","__fetch_continuous_blocks_in_batch_enabled":"true","spark.repl.class.uri":"spark://172.22.200.52:61038/classes","spark.jars":"","spark.repl.class.outputDir":"/private/var/folders/dm/1vhcj_l97j146n6mgr2cm9rw0000gp/T/spark-501ae231-0cb9-4ba2-845f-3fc3cb053141/repl-054c2c94-f7a2-4f4b-9f12-96a1cdb15bc6","spark.app.name":"Spark shell","spark.rdd.scope":"{\"id\":\"44\",\"name\":\"collect\"}","spark.rdd.scope.noOverride":"true","spark.submit.pyFiles":"","spark.ui.showConsoleProgress":"true","spark.app.startTime":"1642039450519","spark.executor.id":"driver","spark.driver.extraJavaOptions":"-XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"client","spark.master":"local[*]","spark.home":"/Users/lijunqing/Code/stczwd/spark/dist","spark.eventLog.dir":"/Users/lijunqing/Code/stczwd/spark/dist/eventLog","spark.sql.execution.id":"1","spark.sql.catalogImplementation":"hive","spark.app.id":"local-1642039451826"}} +{"Event":"SparkListenerTaskStart","Stage ID":11,"Stage Attempt ID":0,"Task Info":{"Task ID":34,"Index":0,"Attempt":0,"Partition ID":0,"Launch Time":1642039528469,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}} +{"Event":"SparkListenerTaskEnd","Stage ID":11,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":34,"Index":0,"Attempt":0,"Partition ID":0,"Launch Time":1642039528469,"Executor ID":"driver","Host":"172.22.200.52","Locality":"NODE_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1642039528478,"Failed":false,"Killed":false,"Accumulables":[{"ID":289,"Name":"local blocks read","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":292,"Name":"local bytes read","Update":"590","Value":"590","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":293,"Name":"fetch wait time","Update":"0","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":294,"Name":"records read","Update":"10","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":330,"Name":"duration","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":331,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":334,"Name":"time in aggregation build","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":337,"Name":"internal.metrics.executorDeserializeTime","Update":2,"Value":2,"Internal":true,"Count Failed Values":true},{"ID":338,"Name":"internal.metrics.executorDeserializeCpuTime","Update":1866000,"Value":1866000,"Internal":true,"Count Failed Values":true},{"ID":339,"Name":"internal.metrics.executorRunTime","Update":3,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":340,"Name":"internal.metrics.executorCpuTime","Update":3912000,"Value":3912000,"Internal":true,"Count Failed Values":true},{"ID":341,"Name":"internal.metrics.resultSize","Update":2656,"Value":2656,"Internal":true,"Count Failed Values":true},{"ID":348,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":349,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Update":10,"Value":10,"Internal":true,"Count Failed Values":true},{"ID":350,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":351,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":352,"Name":"internal.metrics.shuffle.read.localBytesRead","Update":590,"Value":590,"Internal":true,"Count Failed Values":true},{"ID":353,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Update":0,"Value":0,"Internal":true,"Count Failed Values":true},{"ID":354,"Name":"internal.metrics.shuffle.read.recordsRead","Update":10,"Value":10,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0,"TotalGCTime":0},"Task Metrics":{"Executor Deserialize Time":2,"Executor Deserialize CPU Time":1866000,"Executor Run Time":3,"Executor CPU Time":3912000,"Peak Execution Memory":0,"Result Size":2656,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":10,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":590,"Total Records Read":10},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}} +{"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":11,"Stage Attempt ID":0,"Stage Name":"count at :23","Number of Tasks":1,"RDD Info":[{"RDD ID":21,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"43\",\"name\":\"mapPartitionsInternal\"}","Callsite":"count at :23","Parent IDs":[20],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":20,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"38\",\"name\":\"WholeStageCodegen (3)\"}","Callsite":"count at :23","Parent IDs":[19],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":19,"Name":"ShuffledRowRDD","Scope":"{\"id\":\"42\",\"name\":\"Exchange\"}","Callsite":"count at :23","Parent IDs":[18],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"DeterministicLevel":"UNORDERED","Number of Partitions":1,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[10],"Details":"org.apache.spark.sql.Dataset.count(Dataset.scala:3130)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:23)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.(:27)\n$line16.$read$$iw$$iw$$iw$$iw$$iw$$iw.(:29)\n$line16.$read$$iw$$iw$$iw$$iw$$iw.(:31)\n$line16.$read$$iw$$iw$$iw$$iw.(:33)\n$line16.$read$$iw$$iw$$iw.(:35)\n$line16.$read$$iw$$iw.(:37)\n$line16.$read$$iw.(:39)\n$line16.$read.(:41)\n$line16.$read$.(:45)\n$line16.$read$.()\n$line16.$eval$.$print$lzycompute(:7)\n$line16.$eval$.$print(:6)\n$line16.$eval.$print()\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:747)","Submission Time":1642039528463,"Completion Time":1642039528479,"Accumulables":[{"ID":289,"Name":"local blocks read","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":292,"Name":"local bytes read","Value":"590","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":293,"Name":"fetch wait time","Value":"0","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":294,"Name":"records read","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":330,"Name":"duration","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":331,"Name":"number of output rows","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":334,"Name":"time in aggregation build","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":337,"Name":"internal.metrics.executorDeserializeTime","Value":2,"Internal":true,"Count Failed Values":true},{"ID":338,"Name":"internal.metrics.executorDeserializeCpuTime","Value":1866000,"Internal":true,"Count Failed Values":true},{"ID":339,"Name":"internal.metrics.executorRunTime","Value":3,"Internal":true,"Count Failed Values":true},{"ID":340,"Name":"internal.metrics.executorCpuTime","Value":3912000,"Internal":true,"Count Failed Values":true},{"ID":341,"Name":"internal.metrics.resultSize","Value":2656,"Internal":true,"Count Failed Values":true},{"ID":348,"Name":"internal.metrics.shuffle.read.remoteBlocksFetched","Value":0,"Internal":true,"Count Failed Values":true},{"ID":349,"Name":"internal.metrics.shuffle.read.localBlocksFetched","Value":10,"Internal":true,"Count Failed Values":true},{"ID":350,"Name":"internal.metrics.shuffle.read.remoteBytesRead","Value":0,"Internal":true,"Count Failed Values":true},{"ID":351,"Name":"internal.metrics.shuffle.read.remoteBytesReadToDisk","Value":0,"Internal":true,"Count Failed Values":true},{"ID":352,"Name":"internal.metrics.shuffle.read.localBytesRead","Value":590,"Internal":true,"Count Failed Values":true},{"ID":353,"Name":"internal.metrics.shuffle.read.fetchWaitTime","Value":0,"Internal":true,"Count Failed Values":true},{"ID":354,"Name":"internal.metrics.shuffle.read.recordsRead","Value":10,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}} +{"Event":"SparkListenerJobEnd","Job ID":5,"Completion Time":1642039528479,"Job Result":{"Result":"JobSucceeded"}} +{"Event":"org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd","executionId":1,"time":1642039528481} +{"Event":"SparkListenerApplicationEnd","Timestamp":1642039536564} diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala index 2fb5140d38b2c..c616c43fe1b1e 100644 --- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala @@ -1756,12 +1756,12 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite { } private def onExecutorBusy(manager: ExecutorAllocationManager, id: String): Unit = { - val info = new TaskInfo(1, 1, 1, 0, id, "foo.example.com", TaskLocality.PROCESS_LOCAL, false) + val info = new TaskInfo(1, 1, 1, 1, 0, id, "foo.example.com", TaskLocality.PROCESS_LOCAL, false) post(SparkListenerTaskStart(1, 1, info)) } private def onExecutorIdle(manager: ExecutorAllocationManager, id: String): Unit = { - val info = new TaskInfo(1, 1, 1, 0, id, "foo.example.com", TaskLocality.PROCESS_LOCAL, false) + val info = new TaskInfo(1, 1, 1, 1, 0, id, "foo.example.com", TaskLocality.PROCESS_LOCAL, false) info.markFinished(TaskState.FINISHED, 1) post(SparkListenerTaskEnd(1, 1, "foo", Success, info, new ExecutorMetrics, null)) } @@ -1815,7 +1815,8 @@ private object ExecutorAllocationManagerSuite extends PrivateMethodTester { taskIndex: Int, executorId: String, speculative: Boolean = false): TaskInfo = { - new TaskInfo(taskId, taskIndex, 0, 0, executorId, "", TaskLocality.ANY, speculative) + new TaskInfo(taskId, taskIndex, 0, partitionId = taskIndex, + 0, executorId, "", TaskLocality.ANY, speculative) } /* ------------------------------------------------------- * diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala index c3bd4d880a5a5..25f962aaa65a6 100644 --- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala @@ -146,6 +146,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers "one stage attempt json" -> "applications/local-1422981780767/stages/1/0", "one stage attempt json details with failed task" -> "applications/local-1422981780767/stages/1/0?details=true&taskStatus=failed", + "one stage json with partitionId" -> "applications/local-1642039451826/stages/2", "stage task summary w shuffle write" -> "applications/local-1430917381534/stages/0/0/taskSummary", @@ -169,6 +170,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers "applications/local-1430917381534/stages/0/0/taskList?status=success&offset=1&length=2", "stage task list w/ status & sortBy short names: runtime" -> "applications/local-1430917381534/stages/0/0/taskList?status=success&sortBy=runtime", + "stage task list with partitionId" -> "applications/local-1642039451826/stages/0/0/taskList", "stage list with accumulable json" -> "applications/local-1426533911241/1/stages", "stage with accumulable json" -> "applications/local-1426533911241/1/stages/0/0", diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala index 023e352ba1b02..c61cfe6874f7d 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala @@ -2082,7 +2082,8 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti // result task 0.0 succeed runEvent(makeCompletionEvent(taskSets(1).tasks(0), Success, 42)) // speculative result task 1.1 fetch failed - val info = new TaskInfo(4, index = 1, attemptNumber = 1, 0L, "", "", TaskLocality.ANY, true) + val info = new TaskInfo( + 4, index = 1, attemptNumber = 1, partitionId = 1, 0L, "", "", TaskLocality.ANY, true) runEvent(makeCompletionEvent( taskSets(1).tasks(1), FetchFailed(makeBlockManagerId("hostA"), shuffleDep.shuffleId, 0L, 0, 1, "ignored"), @@ -4371,13 +4372,13 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti // Nothing in this test should break if the task info's fields are null, but // OutputCommitCoordinator requires the task info itself to not be null. private def createFakeTaskInfo(): TaskInfo = { - val info = new TaskInfo(0, 0, 0, 0L, "", "", TaskLocality.ANY, false) + val info = new TaskInfo(0, 0, 0, 0, 0L, "", "", TaskLocality.ANY, false) info.finishTime = 1 info } private def createFakeTaskInfoWithId(taskId: Long): TaskInfo = { - val info = new TaskInfo(taskId, 0, 0, 0L, "", "", TaskLocality.ANY, false) + val info = new TaskInfo(taskId, 0, 0, 0, 0L, "", "", TaskLocality.ANY, false) info.finishTime = 1 info } diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala index edb2095004f71..d7cd63a7c21cf 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala @@ -599,7 +599,8 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit stageId: Int, taskType: String, executorMetrics: ExecutorMetrics): SparkListenerTaskEnd = { - val taskInfo = new TaskInfo(taskId, taskIndex, 0, 1553291556000L, executorId, "executor", + val taskInfo = new TaskInfo( + taskId, taskIndex, 0, partitionId = taskIndex, 1553291556000L, executorId, "executor", TaskLocality.NODE_LOCAL, false) val taskMetrics = TaskMetrics.empty SparkListenerTaskEnd(stageId, 0, taskType, Success, taskInfo, executorMetrics, taskMetrics) diff --git a/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala index 336198b182c87..39e1470d12071 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala @@ -493,7 +493,8 @@ class ExecutorMonitorSuite extends SparkFunSuite { speculative: Boolean = false, duration: Long = -1L): TaskInfo = { val start = if (duration > 0) clock.getTimeMillis() - duration else clock.getTimeMillis() - val task = new TaskInfo(id, id, 1, start, execId, "foo.example.com", + val task = new TaskInfo( + id, id, 1, id, start, execId, "foo.example.com", TaskLocality.PROCESS_LOCAL, speculative) if (duration > 0) { task.markFinished(TaskState.FINISHED, math.max(1, clock.getTimeMillis())) diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala index 5e2e931c37689..047d59e1320af 100644 --- a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala @@ -1860,7 +1860,8 @@ abstract class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter private def newAttempt(orig: TaskInfo, nextId: Long): TaskInfo = { // Task reattempts have a different ID, but the same index as the original. - new TaskInfo(nextId, orig.index, orig.attemptNumber + 1, time, orig.executorId, + new TaskInfo( + nextId, orig.index, orig.attemptNumber + 1, orig.partitionId, time, orig.executorId, s"${orig.executorId}.example.com", TaskLocality.PROCESS_LOCAL, orig.speculative) } @@ -1868,7 +1869,9 @@ abstract class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter (1 to count).map { id => val exec = execs(id.toInt % execs.length) val taskId = nextTaskId() - new TaskInfo(taskId, taskId.toInt, 1, time, exec, s"$exec.example.com", + val taskIndex = id - 1 + val partitionId = taskIndex + new TaskInfo(taskId, taskIndex, 1, partitionId, time, exec, s"$exec.example.com", TaskLocality.PROCESS_LOCAL, id % 2 == 0) } } diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala index 53b01313d5d4c..70852164b890f 100644 --- a/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala +++ b/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala @@ -219,7 +219,8 @@ class AppStatusStoreSuite extends SparkFunSuite { resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID) (1 to 2).foreach { taskId => - val taskInfo = new TaskInfo(taskId, taskId, 0, 0, "0", "localhost", TaskLocality.ANY, + val taskInfo = new TaskInfo( + taskId, taskId, 0, taskId, 0, "0", "localhost", TaskLocality.ANY, false) listener.onStageSubmitted(SparkListenerStageSubmitted(stageInfo)) listener.onTaskStart(SparkListenerTaskStart(0, 0, taskInfo)) @@ -246,14 +247,15 @@ class AppStatusStoreSuite extends SparkFunSuite { private def newTaskData(i: Int, status: String = "SUCCESS"): TaskDataWrapper = { new TaskDataWrapper( - i.toLong, i, i, i, i, i, i.toString, i.toString, status, i.toString, false, Nil, None, true, + i.toLong, i, i, i, i, i, i, + i.toString, i.toString, status, i.toString, false, Nil, None, true, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, i, stageId, attemptId) } private def writeTaskDataToStore(i: Int, store: KVStore, status: String): Unit = { - val liveTask = new LiveTask(new TaskInfo( i.toLong, i, i, i.toLong, i.toString, + val liveTask = new LiveTask(new TaskInfo( i.toLong, i, i, i, i.toLong, i.toString, i.toString, TaskLocality.ANY, false), stageId, attemptId, None) if (status == "SUCCESS") { diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusUtilsSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusUtilsSuite.scala index a01b24d323d28..da14dcd541632 100644 --- a/core/src/test/scala/org/apache/spark/status/AppStatusUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/status/AppStatusUtilsSuite.scala @@ -28,6 +28,7 @@ class AppStatusUtilsSuite extends SparkFunSuite { taskId = 0, index = 0, attempt = 0, + partitionId = 0, launchTime = new Date(1L), resultFetchStart = None, duration = Some(100L), @@ -62,6 +63,7 @@ class AppStatusUtilsSuite extends SparkFunSuite { taskId = 0, index = 0, attempt = 0, + partitionId = 0, launchTime = new Date(1L), resultFetchStart = None, duration = Some(100L), diff --git a/core/src/test/scala/org/apache/spark/status/ListenerEventsTestHelper.scala b/core/src/test/scala/org/apache/spark/status/ListenerEventsTestHelper.scala index 99c0d9593ccae..f93c2bcea2c48 100644 --- a/core/src/test/scala/org/apache/spark/status/ListenerEventsTestHelper.scala +++ b/core/src/test/scala/org/apache/spark/status/ListenerEventsTestHelper.scala @@ -73,7 +73,7 @@ object ListenerEventsTestHelper { def createTasks(ids: Seq[Long], execs: Array[String], time: Long): Seq[TaskInfo] = { ids.zipWithIndex.map { case (id, idx) => val exec = execs(idx % execs.length) - new TaskInfo(id, idx, 1, time, exec, s"$exec.example.com", + new TaskInfo(id, idx, 1, idx, time, exec, s"$exec.example.com", TaskLocality.PROCESS_LOCAL, idx % 2 == 0) } } @@ -84,7 +84,8 @@ object ListenerEventsTestHelper { def createTaskWithNewAttempt(orig: TaskInfo, time: Long): TaskInfo = { // Task reattempts have a different ID, but the same index as the original. - new TaskInfo(nextTaskId(), orig.index, orig.attemptNumber + 1, time, orig.executorId, + new TaskInfo( + nextTaskId(), orig.index, orig.attemptNumber + 1, orig.partitionId, time, orig.executorId, s"${orig.executorId}.example.com", TaskLocality.PROCESS_LOCAL, orig.speculative) } diff --git a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala index 31d87180870dd..3448b4f95d89b 100644 --- a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala @@ -140,8 +140,8 @@ class StagePageSuite extends SparkFunSuite with LocalSparkContext { // Simulate two tasks to test PEAK_EXECUTION_MEMORY correctness (1 to 2).foreach { taskId => - val taskInfo = new TaskInfo(taskId, taskId, 0, 0, "0", "localhost", TaskLocality.ANY, - false) + val taskInfo = new TaskInfo(taskId, taskId, 0, taskId, 0, + "0", "localhost", TaskLocality.ANY, false) listener.onStageSubmitted(SparkListenerStageSubmitted(stageInfo)) listener.onTaskStart(SparkListenerTaskStart(0, 0, taskInfo)) taskInfo.markFinished(TaskState.FINISHED, System.currentTimeMillis()) diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala index 4eea2256553f5..36b61f67e3b87 100644 --- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala @@ -45,23 +45,23 @@ class JsonProtocolSuite extends SparkFunSuite { val stageSubmitted = SparkListenerStageSubmitted(makeStageInfo(100, 200, 300, 400L, 500L), properties) val stageCompleted = SparkListenerStageCompleted(makeStageInfo(101, 201, 301, 401L, 501L)) - val taskStart = SparkListenerTaskStart(111, 0, makeTaskInfo(222L, 333, 1, 444L, false)) + val taskStart = SparkListenerTaskStart(111, 0, makeTaskInfo(222L, 333, 1, 333, 444L, false)) val taskGettingResult = - SparkListenerTaskGettingResult(makeTaskInfo(1000L, 2000, 5, 3000L, true)) + SparkListenerTaskGettingResult(makeTaskInfo(1000L, 2000, 5, 2000, 3000L, true)) val taskEnd = SparkListenerTaskEnd(1, 0, "ShuffleMapTask", Success, - makeTaskInfo(123L, 234, 67, 345L, false), + makeTaskInfo(123L, 234, 67, 234, 345L, false), new ExecutorMetrics(Array(543L, 123456L, 12345L, 1234L, 123L, 12L, 432L, 321L, 654L, 765L, 256912L, 123456L, 123456L, 61728L, 30364L, 15182L, 0, 0, 0, 0, 80001L)), makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, hasHadoopInput = false, hasOutput = false)) val taskEndWithHadoopInput = SparkListenerTaskEnd(1, 0, "ShuffleMapTask", Success, - makeTaskInfo(123L, 234, 67, 345L, false), + makeTaskInfo(123L, 234, 67, 234, 345L, false), new ExecutorMetrics(Array(543L, 123456L, 12345L, 1234L, 123L, 12L, 432L, 321L, 654L, 765L, 256912L, 123456L, 123456L, 61728L, 30364L, 15182L, 0, 0, 0, 0, 80001L)), makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, hasHadoopInput = true, hasOutput = false)) val taskEndWithOutput = SparkListenerTaskEnd(1, 0, "ResultTask", Success, - makeTaskInfo(123L, 234, 67, 345L, false), + makeTaskInfo(123L, 234, 67, 234, 345L, false), new ExecutorMetrics(Array(543L, 123456L, 12345L, 1234L, 123L, 12L, 432L, 321L, 654L, 765L, 256912L, 123456L, 123456L, 61728L, 30364L, 15182L, 0, 0, 0, 0, 80001L)), @@ -175,7 +175,7 @@ class JsonProtocolSuite extends SparkFunSuite { val attributes = Map("ContainerId" -> "ct1", "User" -> "spark").toMap testRDDInfo(makeRddInfo(2, 3, 4, 5L, 6L, DeterministicLevel.DETERMINATE)) testStageInfo(makeStageInfo(10, 20, 30, 40L, 50L)) - testTaskInfo(makeTaskInfo(999L, 888, 55, 777L, false)) + testTaskInfo(makeTaskInfo(999L, 888, 55, 888, 777L, false)) testTaskMetrics(makeTaskMetrics( 33333L, 44444L, 55555L, 66666L, 7, 8, hasHadoopInput = false, hasOutput = false)) testBlockManagerId(BlockManagerId("Hong", "Kong", 500)) @@ -1021,9 +1021,9 @@ private[spark] object JsonProtocolSuite extends Assertions { stageInfo } - private def makeTaskInfo(a: Long, b: Int, c: Int, d: Long, speculative: Boolean) = { - val taskInfo = new TaskInfo(a, b, c, d, "executor", "your kind sir", TaskLocality.NODE_LOCAL, - speculative) + private def makeTaskInfo(a: Long, b: Int, c: Int, d: Int, e: Long, speculative: Boolean) = { + val taskInfo = new TaskInfo(a, b, c, d, e, + "executor", "your kind sir", TaskLocality.NODE_LOCAL, speculative) taskInfo.setAccumulables( List(makeAccumulableInfo(1), makeAccumulableInfo(2), makeAccumulableInfo(3, internal = true))) taskInfo @@ -1226,6 +1226,7 @@ private[spark] object JsonProtocolSuite extends Assertions { | "Task ID": 222, | "Index": 333, | "Attempt": 1, + | "Partition ID": 333, | "Launch Time": 444, | "Executor ID": "executor", | "Host": "your kind sir", @@ -1273,6 +1274,7 @@ private[spark] object JsonProtocolSuite extends Assertions { | "Task ID": 1000, | "Index": 2000, | "Attempt": 5, + | "Partition ID": 2000, | "Launch Time": 3000, | "Executor ID": "executor", | "Host": "your kind sir", @@ -1326,6 +1328,7 @@ private[spark] object JsonProtocolSuite extends Assertions { | "Task ID": 123, | "Index": 234, | "Attempt": 67, + | "Partition ID": 234, | "Launch Time": 345, | "Executor ID": "executor", | "Host": "your kind sir", @@ -1451,6 +1454,7 @@ private[spark] object JsonProtocolSuite extends Assertions { | "Task ID": 123, | "Index": 234, | "Attempt": 67, + | "Partition ID": 234, | "Launch Time": 345, | "Executor ID": "executor", | "Host": "your kind sir", @@ -1576,6 +1580,7 @@ private[spark] object JsonProtocolSuite extends Assertions { | "Task ID": 123, | "Index": 234, | "Attempt": 67, + | "Partition ID": 234, | "Launch Time": 345, | "Executor ID": "executor", | "Host": "your kind sir", diff --git a/dev/.rat-excludes b/dev/.rat-excludes index 7fdc3839d8a4f..464d5b234c835 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -96,6 +96,7 @@ local-1426633911242 local-1430917381534 local-1430917381535_1 local-1430917381535_2 +local-1642039451826 DESCRIPTION NAMESPACE test_support/* diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index dc3b661406537..8dc4fab1ab76e 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -52,6 +52,9 @@ object MimaExcludes { // [SPARK-38432][SQL] Reactor framework so as JDBC dialect could compile filter by self way ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.sources.Filter.toV2"), + // [SPARK-37831][CORE] Add task partition id in TaskInfo and Task Metrics + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.TaskData.this"), + // [SPARK-37600][BUILD] Upgrade to Hadoop 3.3.2 ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4Compressor"), ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4Factory"), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/MetricsAggregationBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/MetricsAggregationBenchmark.scala index aa3988ae37e41..5d9bb8a0a4c58 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/MetricsAggregationBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/MetricsAggregationBenchmark.scala @@ -110,6 +110,7 @@ object MetricsAggregationBenchmark extends BenchmarkBase { taskId = taskOffset + i.toLong, index = i, attemptNumber = 0, + partitionId = i, // The following fields are not used. launchTime = 0, executorId = "", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala index 9b5b532d3ecdc..3dabaadf19f89 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala @@ -111,6 +111,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils taskId = taskId, attemptNumber = attemptNumber, index = taskId.toInt, + partitionId = taskId.toInt, // The following fields are not used in tests launchTime = 0, executorId = "", From c2ed15de94f611972c463ff8bba9268c1e93fd30 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 31 Mar 2022 23:22:05 +0800 Subject: [PATCH 082/535] [SPARK-38710][SQL] Use SparkArithmeticException for arithmetic overflow runtime errors ### What changes were proposed in this pull request? On arithmetic overflow runtime errors, Spark should throw SparkArithmeticException instead of `java.lang.ArithmeticException` ### Why are the changes needed? Use a better error exception type. ### Does this PR introduce _any_ user-facing change? Yes, trivial change on the exception type: on arithmetic overflow runtime errors, Spark will throw SparkArithmeticException instead of `java.lang.ArithmeticException` ### How was this patch tested? UT Closes #36022 from gengliangwang/ArithmeticException. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit b70fa24304e524255df8b19c31666699e2642583) Signed-off-by: Gengliang Wang --- core/src/main/resources/error/error-classes.json | 4 ++++ .../spark/sql/errors/QueryExecutionErrors.scala | 5 ++--- .../sql-tests/results/ansi/interval.sql.out | 14 +++++++------- .../resources/sql-tests/results/interval.sql.out | 14 +++++++------- .../sql-tests/results/postgreSQL/int4.sql.out | 12 ++++++------ .../sql-tests/results/postgreSQL/int8.sql.out | 8 ++++---- .../results/postgreSQL/window_part2.sql.out | 4 ++-- .../apache/spark/sql/DataFrameAggregateSuite.scala | 8 ++++---- 8 files changed, 36 insertions(+), 33 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index d9e2e74c3ec9d..d7d77022399ba 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -3,6 +3,10 @@ "message" : [ "Field name %s is ambiguous and has %s matching fields in the struct." ], "sqlState" : "42000" }, + "ARITHMETIC_OVERFLOW" : { + "message" : [ "%s.%s If necessary set %s to false (except for ANSI interval type) to bypass this error.%s" ], + "sqlState" : "22003" + }, "CANNOT_CAST_DATATYPE" : { "message" : [ "Cannot cast %s to %s." ], "sqlState" : "22005" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index a83b7b27ea563..304801e39f6af 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -446,9 +446,8 @@ object QueryExecutionErrors { hint: String = "", errorContext: String = ""): ArithmeticException = { val alternative = if (hint.nonEmpty) s" To return NULL instead, use '$hint'." else "" - new ArithmeticException(s"$message.$alternative If necessary set " + - s"${SQLConf.ANSI_ENABLED.key} to false (except for ANSI interval type) to bypass this " + - "error." + errorContext) + new SparkArithmeticException("ARITHMETIC_OVERFLOW", + Array(message, alternative, SQLConf.ANSI_ENABLED.key, errorContext)) } def unaryMinusCauseOverflowError(originValue: AnyVal): ArithmeticException = { diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index e46861208dced..f28e530ae4faf 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -1755,7 +1755,7 @@ select -(a) from values (interval '-2147483648 months', interval '2147483647 mon -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. @@ -1764,7 +1764,7 @@ select a - b from values (interval '-2147483648 months', interval '2147483647 mo -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. @@ -1773,7 +1773,7 @@ select b + interval '1 month' from values (interval '-2147483648 months', interv -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. @@ -2002,7 +2002,7 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1 -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1 @@ -2014,7 +2014,7 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L @@ -2060,7 +2060,7 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1 -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1 @@ -2072,7 +2072,7 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index df1db77a895e3..58c86c9ccf65c 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1744,7 +1744,7 @@ select -(a) from values (interval '-2147483648 months', interval '2147483647 mon -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. @@ -1753,7 +1753,7 @@ select a - b from values (interval '-2147483648 months', interval '2147483647 mo -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. @@ -1762,7 +1762,7 @@ select b + interval '1 month' from values (interval '-2147483648 months', interv -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. @@ -1991,7 +1991,7 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1 -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1 @@ -2003,7 +2003,7 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L @@ -2049,7 +2049,7 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1 -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1 @@ -2061,7 +2061,7 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out index 993fe44cfe47a..144a01511f271 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out @@ -199,7 +199,7 @@ SELECT '' AS five, i.f1, i.f1 * smallint('2') AS x FROM INT4_TBL i -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 25) == SELECT '' AS five, i.f1, i.f1 * smallint('2') AS x FROM INT4_TBL i @@ -222,7 +222,7 @@ SELECT '' AS five, i.f1, i.f1 * int('2') AS x FROM INT4_TBL i -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 25) == SELECT '' AS five, i.f1, i.f1 * int('2') AS x FROM INT4_TBL i @@ -245,7 +245,7 @@ SELECT '' AS five, i.f1, i.f1 + smallint('2') AS x FROM INT4_TBL i -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 25) == SELECT '' AS five, i.f1, i.f1 + smallint('2') AS x FROM INT4_TBL i @@ -269,7 +269,7 @@ SELECT '' AS five, i.f1, i.f1 + int('2') AS x FROM INT4_TBL i -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 25) == SELECT '' AS five, i.f1, i.f1 + int('2') AS x FROM INT4_TBL i @@ -293,7 +293,7 @@ SELECT '' AS five, i.f1, i.f1 - smallint('2') AS x FROM INT4_TBL i -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 25) == SELECT '' AS five, i.f1, i.f1 - smallint('2') AS x FROM INT4_TBL i @@ -317,7 +317,7 @@ SELECT '' AS five, i.f1, i.f1 - int('2') AS x FROM INT4_TBL i -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 25) == SELECT '' AS five, i.f1, i.f1 - int('2') AS x FROM INT4_TBL i diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out index bb4a77025ad94..cc524b575d33e 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out @@ -391,7 +391,7 @@ SELECT '' AS three, q1, q2, q1 * q2 AS multiply FROM INT8_TBL -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException long overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 28) == SELECT '' AS three, q1, q2, q1 * q2 AS multiply FROM INT8_TBL @@ -753,7 +753,7 @@ SELECT bigint((-9223372036854775808)) * bigint((-1)) -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException long overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT bigint((-9223372036854775808)) * bigint((-1)) @@ -781,7 +781,7 @@ SELECT bigint((-9223372036854775808)) * int((-1)) -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException long overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT bigint((-9223372036854775808)) * int((-1)) @@ -809,7 +809,7 @@ SELECT bigint((-9223372036854775808)) * smallint((-1)) -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException long overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT bigint((-9223372036854775808)) * smallint((-1)) diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out index ab6170eb517f9..75c40ce92d2d7 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out @@ -224,7 +224,7 @@ from range(9223372036854775804, 9223372036854775807) x -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException long overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. @@ -234,7 +234,7 @@ from range(-9223372036854775806, -9223372036854775805) x -- !query schema struct<> -- !query output -java.lang.ArithmeticException +org.apache.spark.SparkArithmeticException long overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala index 157736f9777e6..425be96d6b8ab 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala @@ -1278,12 +1278,12 @@ class DataFrameAggregateSuite extends QueryTest val error = intercept[SparkException] { checkAnswer(df2.select(sum($"year-month")), Nil) } - assert(error.toString contains "java.lang.ArithmeticException: integer overflow") + assert(error.toString contains "SparkArithmeticException: integer overflow") val error2 = intercept[SparkException] { checkAnswer(df2.select(sum($"day")), Nil) } - assert(error2.toString contains "java.lang.ArithmeticException: long overflow") + assert(error2.toString contains "SparkArithmeticException: long overflow") } test("SPARK-34837: Support ANSI SQL intervals by the aggregate function `avg`") { @@ -1412,12 +1412,12 @@ class DataFrameAggregateSuite extends QueryTest val error = intercept[SparkException] { checkAnswer(df2.select(avg($"year-month")), Nil) } - assert(error.toString contains "java.lang.ArithmeticException: integer overflow") + assert(error.toString contains "SparkArithmeticException: integer overflow") val error2 = intercept[SparkException] { checkAnswer(df2.select(avg($"day")), Nil) } - assert(error2.toString contains "java.lang.ArithmeticException: long overflow") + assert(error2.toString contains "SparkArithmeticException: long overflow") val df3 = intervalData.filter($"class" > 4) val avgDF3 = df3.select(avg($"year-month"), avg($"day")) From 4e787dc0fc9e23a27f7dfdd5cf1c5bdf91e4775f Mon Sep 17 00:00:00 2001 From: Parth Chandra Date: Thu, 31 Mar 2022 13:06:50 -0700 Subject: [PATCH 083/535] [SPARK-37974][SQL] Implement vectorized DELTA_BYTE_ARRAY and DELTA_LENGTH_BYTE_ARRAY encodings for Parquet V2 support ### What changes were proposed in this pull request? This PR provides a vectorized implementation of the DELTA_BYTE_ARRAY encoding of Parquet V2. The PR also implements the DELTA_LENGTH_BYTE_ARRAY encoding which is needed by the former. ### Why are the changes needed? The current support for Parquet V2 in the vectorized reader uses a non-vectorized version of the above encoding and needs to be vectorized. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Reproduces all the tests for the encodings from the Parquet implementation. Also adds more cases to the Parquet Encoding test suite. Closes #35262 from parthchandra/SPARK-36879-PR3. Lead-authored-by: Parth Chandra Co-authored-by: Chao Sun Signed-off-by: Chao Sun --- .../DataSourceReadBenchmark-jdk11-results.txt | 424 ++++++++-------- .../DataSourceReadBenchmark-jdk17-results.txt | 470 +++++++++--------- .../DataSourceReadBenchmark-results.txt | 424 ++++++++-------- .../SpecificParquetRecordReaderBase.java | 11 + .../parquet/VectorizedColumnReader.java | 15 +- .../VectorizedDeltaBinaryPackedReader.java | 6 + .../VectorizedDeltaByteArrayReader.java | 113 ++++- .../VectorizedDeltaLengthByteArrayReader.java | 89 ++++ .../VectorizedParquetRecordReader.java | 3 +- .../parquet/VectorizedValuesReader.java | 16 + .../vectorized/OffHeapColumnVector.java | 5 + .../vectorized/OnHeapColumnVector.java | 6 + .../vectorized/WritableColumnVector.java | 7 + .../ParquetDeltaByteArrayEncodingSuite.scala | 143 ++++++ ...uetDeltaLengthByteArrayEncodingSuite.scala | 142 ++++++ .../parquet/ParquetEncodingSuite.scala | 93 ++-- 16 files changed, 1250 insertions(+), 717 deletions(-) create mode 100644 sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaLengthByteArrayReader.java create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaByteArrayEncodingSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaLengthByteArrayEncodingSuite.scala diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt index 25c43d8273df8..11fc93406c363 100644 --- a/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt +++ b/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt @@ -2,322 +2,322 @@ SQL Single Numeric Column Scan ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single BOOLEAN Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 9636 9771 191 1.6 612.6 1.0X -SQL Json 7960 8227 378 2.0 506.1 1.2X -SQL Parquet Vectorized: DataPageV1 113 129 12 139.7 7.2 85.6X -SQL Parquet Vectorized: DataPageV2 84 93 12 186.6 5.4 114.3X -SQL Parquet MR: DataPageV1 1466 1470 6 10.7 93.2 6.6X -SQL Parquet MR: DataPageV2 1334 1347 18 11.8 84.8 7.2X -SQL ORC Vectorized 163 197 27 96.3 10.4 59.0X -SQL ORC MR 1554 1558 6 10.1 98.8 6.2X - -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +SQL CSV 11809 12046 335 1.3 750.8 1.0X +SQL Json 8588 8592 7 1.8 546.0 1.4X +SQL Parquet Vectorized: DataPageV1 140 162 18 112.0 8.9 84.1X +SQL Parquet Vectorized: DataPageV2 103 117 12 152.6 6.6 114.6X +SQL Parquet MR: DataPageV1 1634 1648 20 9.6 103.9 7.2X +SQL Parquet MR: DataPageV2 1495 1501 9 10.5 95.1 7.9X +SQL ORC Vectorized 180 224 42 87.4 11.4 65.6X +SQL ORC MR 1536 1576 57 10.2 97.7 7.7X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single BOOLEAN Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 94 103 13 167.1 6.0 1.0X -ParquetReader Vectorized: DataPageV2 77 86 11 204.3 4.9 1.2X -ParquetReader Vectorized -> Row: DataPageV1 44 47 4 357.0 2.8 2.1X -ParquetReader Vectorized -> Row: DataPageV2 35 37 3 445.2 2.2 2.7X +ParquetReader Vectorized: DataPageV1 109 114 10 144.3 6.9 1.0X +ParquetReader Vectorized: DataPageV2 90 93 3 175.3 5.7 1.2X +ParquetReader Vectorized -> Row: DataPageV1 58 60 4 271.9 3.7 1.9X +ParquetReader Vectorized -> Row: DataPageV2 39 41 3 404.0 2.5 2.8X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 11479 11919 622 1.4 729.8 1.0X -SQL Json 9894 9922 39 1.6 629.1 1.2X -SQL Parquet Vectorized: DataPageV1 123 156 30 128.3 7.8 93.6X -SQL Parquet Vectorized: DataPageV2 126 138 19 125.2 8.0 91.4X -SQL Parquet MR: DataPageV1 1986 2500 726 7.9 126.3 5.8X -SQL Parquet MR: DataPageV2 1810 1898 126 8.7 115.1 6.3X -SQL ORC Vectorized 174 210 30 90.5 11.0 66.1X -SQL ORC MR 1645 1652 9 9.6 104.6 7.0X - -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +SQL CSV 14515 14526 16 1.1 922.8 1.0X +SQL Json 9862 9863 2 1.6 627.0 1.5X +SQL Parquet Vectorized: DataPageV1 144 167 31 109.5 9.1 101.1X +SQL Parquet Vectorized: DataPageV2 139 159 27 113.4 8.8 104.6X +SQL Parquet MR: DataPageV1 1777 1780 3 8.8 113.0 8.2X +SQL Parquet MR: DataPageV2 1690 1691 2 9.3 107.4 8.6X +SQL ORC Vectorized 201 238 46 78.3 12.8 72.2X +SQL ORC MR 1513 1522 14 10.4 96.2 9.6X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 166 177 14 94.9 10.5 1.0X -ParquetReader Vectorized: DataPageV2 165 172 11 95.3 10.5 1.0X -ParquetReader Vectorized -> Row: DataPageV1 95 100 5 165.7 6.0 1.7X -ParquetReader Vectorized -> Row: DataPageV2 85 89 6 186.0 5.4 2.0X +ParquetReader Vectorized: DataPageV1 182 192 11 86.6 11.5 1.0X +ParquetReader Vectorized: DataPageV2 181 188 7 86.9 11.5 1.0X +ParquetReader Vectorized -> Row: DataPageV1 96 99 4 163.3 6.1 1.9X +ParquetReader Vectorized -> Row: DataPageV2 96 99 3 163.4 6.1 1.9X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 12176 12646 664 1.3 774.1 1.0X -SQL Json 9696 9729 46 1.6 616.5 1.3X -SQL Parquet Vectorized: DataPageV1 151 201 33 103.9 9.6 80.4X -SQL Parquet Vectorized: DataPageV2 216 235 15 72.7 13.8 56.3X -SQL Parquet MR: DataPageV1 1915 2017 145 8.2 121.8 6.4X -SQL Parquet MR: DataPageV2 1954 1978 33 8.0 124.3 6.2X -SQL ORC Vectorized 197 235 25 79.7 12.6 61.7X -SQL ORC MR 1769 1829 85 8.9 112.5 6.9X - -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +SQL CSV 15326 15437 156 1.0 974.4 1.0X +SQL Json 10281 10290 13 1.5 653.7 1.5X +SQL Parquet Vectorized: DataPageV1 164 212 36 95.9 10.4 93.4X +SQL Parquet Vectorized: DataPageV2 230 244 11 68.5 14.6 66.7X +SQL Parquet MR: DataPageV1 2108 2111 4 7.5 134.0 7.3X +SQL Parquet MR: DataPageV2 1940 1963 33 8.1 123.3 7.9X +SQL ORC Vectorized 229 279 34 68.7 14.6 66.9X +SQL ORC MR 1903 1906 3 8.3 121.0 8.1X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 230 237 12 68.5 14.6 1.0X -ParquetReader Vectorized: DataPageV2 293 298 9 53.6 18.7 0.8X -ParquetReader Vectorized -> Row: DataPageV1 215 265 23 73.2 13.7 1.1X -ParquetReader Vectorized -> Row: DataPageV2 279 301 32 56.3 17.8 0.8X +ParquetReader Vectorized: DataPageV1 253 262 10 62.2 16.1 1.0X +ParquetReader Vectorized: DataPageV2 323 327 9 48.8 20.5 0.8X +ParquetReader Vectorized -> Row: DataPageV1 280 288 8 56.3 17.8 0.9X +ParquetReader Vectorized -> Row: DataPageV2 301 314 21 52.2 19.1 0.8X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 13069 13409 482 1.2 830.9 1.0X -SQL Json 10599 10621 32 1.5 673.9 1.2X -SQL Parquet Vectorized: DataPageV1 142 177 34 110.6 9.0 91.9X -SQL Parquet Vectorized: DataPageV2 313 359 28 50.2 19.9 41.7X -SQL Parquet MR: DataPageV1 1979 2044 92 7.9 125.8 6.6X -SQL Parquet MR: DataPageV2 1958 2030 101 8.0 124.5 6.7X -SQL ORC Vectorized 277 303 21 56.7 17.6 47.1X -SQL ORC MR 1692 1782 128 9.3 107.6 7.7X - -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +SQL CSV 16756 16776 28 0.9 1065.3 1.0X +SQL Json 10690 10692 3 1.5 679.6 1.6X +SQL Parquet Vectorized: DataPageV1 160 208 45 98.1 10.2 104.5X +SQL Parquet Vectorized: DataPageV2 390 423 23 40.3 24.8 43.0X +SQL Parquet MR: DataPageV1 2196 2201 8 7.2 139.6 7.6X +SQL Parquet MR: DataPageV2 2065 2072 10 7.6 131.3 8.1X +SQL ORC Vectorized 323 338 10 48.7 20.5 51.9X +SQL ORC MR 1899 1906 11 8.3 120.7 8.8X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 253 269 18 62.1 16.1 1.0X -ParquetReader Vectorized: DataPageV2 1197 1199 3 13.1 76.1 0.2X -ParquetReader Vectorized -> Row: DataPageV1 273 361 110 57.7 17.3 0.9X -ParquetReader Vectorized -> Row: DataPageV2 379 438 37 41.5 24.1 0.7X +ParquetReader Vectorized: DataPageV1 278 285 9 56.6 17.7 1.0X +ParquetReader Vectorized: DataPageV2 514 518 2 30.6 32.7 0.5X +ParquetReader Vectorized -> Row: DataPageV1 308 316 11 51.0 19.6 0.9X +ParquetReader Vectorized -> Row: DataPageV2 498 525 27 31.6 31.6 0.6X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 17143 17467 458 0.9 1089.9 1.0X -SQL Json 11507 12198 977 1.4 731.6 1.5X -SQL Parquet Vectorized: DataPageV1 238 253 19 66.0 15.2 71.9X -SQL Parquet Vectorized: DataPageV2 502 567 48 31.3 31.9 34.1X -SQL Parquet MR: DataPageV1 2333 2335 3 6.7 148.4 7.3X -SQL Parquet MR: DataPageV2 1948 1972 34 8.1 123.8 8.8X -SQL ORC Vectorized 389 408 20 40.5 24.7 44.1X -SQL ORC MR 1726 1817 128 9.1 109.7 9.9X - -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +SQL CSV 21841 21851 14 0.7 1388.6 1.0X +SQL Json 12828 12843 21 1.2 815.6 1.7X +SQL Parquet Vectorized: DataPageV1 241 279 19 65.2 15.3 90.6X +SQL Parquet Vectorized: DataPageV2 554 596 29 28.4 35.2 39.5X +SQL Parquet MR: DataPageV1 2404 2428 34 6.5 152.8 9.1X +SQL Parquet MR: DataPageV2 2153 2166 18 7.3 136.9 10.1X +SQL ORC Vectorized 417 464 62 37.7 26.5 52.4X +SQL ORC MR 2136 2146 14 7.4 135.8 10.2X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 289 340 43 54.4 18.4 1.0X -ParquetReader Vectorized: DataPageV2 572 609 27 27.5 36.4 0.5X -ParquetReader Vectorized -> Row: DataPageV1 329 353 48 47.8 20.9 0.9X -ParquetReader Vectorized -> Row: DataPageV2 639 654 18 24.6 40.6 0.5X +ParquetReader Vectorized: DataPageV1 324 357 34 48.6 20.6 1.0X +ParquetReader Vectorized: DataPageV2 694 702 11 22.6 44.2 0.5X +ParquetReader Vectorized -> Row: DataPageV1 378 385 8 41.6 24.0 0.9X +ParquetReader Vectorized -> Row: DataPageV2 701 708 8 22.4 44.6 0.5X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 13721 13812 129 1.1 872.4 1.0X -SQL Json 12147 17632 2196 1.3 772.3 1.1X -SQL Parquet Vectorized: DataPageV1 138 164 25 113.9 8.8 99.4X -SQL Parquet Vectorized: DataPageV2 151 180 26 104.4 9.6 91.1X -SQL Parquet MR: DataPageV1 2006 2078 101 7.8 127.6 6.8X -SQL Parquet MR: DataPageV2 2038 2040 2 7.7 129.6 6.7X -SQL ORC Vectorized 465 475 10 33.8 29.6 29.5X -SQL ORC MR 1814 1860 64 8.7 115.4 7.6X - -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +SQL CSV 17238 17239 2 0.9 1096.0 1.0X +SQL Json 12295 12307 18 1.3 781.7 1.4X +SQL Parquet Vectorized: DataPageV1 162 203 27 96.8 10.3 106.1X +SQL Parquet Vectorized: DataPageV2 157 194 32 100.4 10.0 110.0X +SQL Parquet MR: DataPageV1 2163 2165 3 7.3 137.5 8.0X +SQL Parquet MR: DataPageV2 2014 2014 1 7.8 128.0 8.6X +SQL ORC Vectorized 458 462 5 34.4 29.1 37.7X +SQL ORC MR 1984 1984 0 7.9 126.1 8.7X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 275 404 187 57.2 17.5 1.0X -ParquetReader Vectorized: DataPageV2 275 287 12 57.2 17.5 1.0X -ParquetReader Vectorized -> Row: DataPageV1 227 265 24 69.2 14.4 1.2X -ParquetReader Vectorized -> Row: DataPageV2 228 259 28 69.1 14.5 1.2X +ParquetReader Vectorized: DataPageV1 252 259 10 62.3 16.0 1.0X +ParquetReader Vectorized: DataPageV2 252 256 9 62.3 16.0 1.0X +ParquetReader Vectorized -> Row: DataPageV1 259 307 40 60.7 16.5 1.0X +ParquetReader Vectorized -> Row: DataPageV2 260 295 25 60.5 16.5 1.0X -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 17269 17620 496 0.9 1097.9 1.0X -SQL Json 15636 15952 447 1.0 994.1 1.1X -SQL Parquet Vectorized: DataPageV1 238 267 18 66.0 15.1 72.5X -SQL Parquet Vectorized: DataPageV2 222 260 21 70.9 14.1 77.9X -SQL Parquet MR: DataPageV1 2418 2457 56 6.5 153.7 7.1X -SQL Parquet MR: DataPageV2 2194 2207 18 7.2 139.5 7.9X -SQL ORC Vectorized 519 528 14 30.3 33.0 33.3X -SQL ORC MR 1760 1770 14 8.9 111.9 9.8X - -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +SQL CSV 22485 22536 72 0.7 1429.5 1.0X +SQL Json 16281 16286 8 1.0 1035.1 1.4X +SQL Parquet Vectorized: DataPageV1 232 288 35 67.9 14.7 97.1X +SQL Parquet Vectorized: DataPageV2 277 290 9 56.8 17.6 81.2X +SQL Parquet MR: DataPageV1 2331 2341 15 6.7 148.2 9.6X +SQL Parquet MR: DataPageV2 2216 2229 18 7.1 140.9 10.1X +SQL ORC Vectorized 561 569 9 28.0 35.7 40.1X +SQL ORC MR 2118 2137 27 7.4 134.6 10.6X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 284 305 30 55.3 18.1 1.0X -ParquetReader Vectorized: DataPageV2 286 286 1 55.1 18.2 1.0X -ParquetReader Vectorized -> Row: DataPageV1 325 337 16 48.4 20.6 0.9X -ParquetReader Vectorized -> Row: DataPageV2 346 361 16 45.5 22.0 0.8X +ParquetReader Vectorized: DataPageV1 355 356 1 44.3 22.6 1.0X +ParquetReader Vectorized: DataPageV2 355 356 1 44.3 22.6 1.0X +ParquetReader Vectorized -> Row: DataPageV1 379 386 9 41.5 24.1 0.9X +ParquetReader Vectorized -> Row: DataPageV2 379 389 10 41.5 24.1 0.9X ================================================================================================ Int and String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Int and String Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 12428 12714 405 0.8 1185.2 1.0X -SQL Json 11088 11251 231 0.9 1057.4 1.1X -SQL Parquet Vectorized: DataPageV1 1990 1997 10 5.3 189.8 6.2X -SQL Parquet Vectorized: DataPageV2 2551 2618 95 4.1 243.3 4.9X -SQL Parquet MR: DataPageV1 3903 3913 15 2.7 372.2 3.2X -SQL Parquet MR: DataPageV2 3734 3920 263 2.8 356.1 3.3X -SQL ORC Vectorized 2153 2155 3 4.9 205.3 5.8X -SQL ORC MR 3485 3549 91 3.0 332.4 3.6X +SQL CSV 15733 15738 8 0.7 1500.4 1.0X +SQL Json 11953 11969 22 0.9 1140.0 1.3X +SQL Parquet Vectorized: DataPageV1 2100 2137 52 5.0 200.2 7.5X +SQL Parquet Vectorized: DataPageV2 2525 2535 14 4.2 240.8 6.2X +SQL Parquet MR: DataPageV1 4075 4110 49 2.6 388.6 3.9X +SQL Parquet MR: DataPageV2 3991 4014 34 2.6 380.6 3.9X +SQL ORC Vectorized 2323 2355 45 4.5 221.5 6.8X +SQL ORC MR 3776 3882 150 2.8 360.1 4.2X ================================================================================================ Repeated String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Repeated String: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 7116 7167 72 1.5 678.7 1.0X -SQL Json 6700 6741 58 1.6 639.0 1.1X -SQL Parquet Vectorized: DataPageV1 526 556 36 19.9 50.1 13.5X -SQL Parquet Vectorized: DataPageV2 518 533 15 20.2 49.4 13.7X -SQL Parquet MR: DataPageV1 1504 1656 216 7.0 143.4 4.7X -SQL Parquet MR: DataPageV2 1676 1676 1 6.3 159.8 4.2X -SQL ORC Vectorized 497 518 20 21.1 47.4 14.3X -SQL ORC MR 1657 1787 183 6.3 158.1 4.3X +SQL CSV 8921 8966 63 1.2 850.7 1.0X +SQL Json 7215 7218 5 1.5 688.1 1.2X +SQL Parquet Vectorized: DataPageV1 604 627 23 17.3 57.6 14.8X +SQL Parquet Vectorized: DataPageV2 606 620 18 17.3 57.8 14.7X +SQL Parquet MR: DataPageV1 1686 1693 10 6.2 160.8 5.3X +SQL Parquet MR: DataPageV2 1660 1665 8 6.3 158.3 5.4X +SQL ORC Vectorized 541 548 7 19.4 51.6 16.5X +SQL ORC MR 1920 1930 13 5.5 183.1 4.6X ================================================================================================ Partitioned Table Scan ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Partitioned Table: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------------- -Data column - CSV 18247 18411 232 0.9 1160.1 1.0X -Data column - Json 10860 11264 571 1.4 690.5 1.7X -Data column - Parquet Vectorized: DataPageV1 223 274 26 70.6 14.2 81.9X -Data column - Parquet Vectorized: DataPageV2 537 559 23 29.3 34.1 34.0X -Data column - Parquet MR: DataPageV1 2411 2517 150 6.5 153.3 7.6X -Data column - Parquet MR: DataPageV2 2299 2356 81 6.8 146.2 7.9X -Data column - ORC Vectorized 417 433 11 37.7 26.5 43.8X -Data column - ORC MR 2107 2178 101 7.5 134.0 8.7X -Partition column - CSV 6090 6186 136 2.6 387.2 3.0X -Partition column - Json 9479 9603 176 1.7 602.7 1.9X -Partition column - Parquet Vectorized: DataPageV1 49 69 28 322.0 3.1 373.6X -Partition column - Parquet Vectorized: DataPageV2 49 63 23 322.1 3.1 373.7X -Partition column - Parquet MR: DataPageV1 1200 1225 36 13.1 76.3 15.2X -Partition column - Parquet MR: DataPageV2 1199 1240 57 13.1 76.3 15.2X -Partition column - ORC Vectorized 53 77 26 295.0 3.4 342.2X -Partition column - ORC MR 1287 1346 83 12.2 81.8 14.2X -Both columns - CSV 17671 18140 663 0.9 1123.5 1.0X -Both columns - Json 11675 12167 696 1.3 742.3 1.6X -Both columns - Parquet Vectorized: DataPageV1 298 303 9 52.9 18.9 61.3X -Both columns - Parquet Vectorized: DataPageV2 541 580 36 29.1 34.4 33.7X -Both columns - Parquet MR: DataPageV1 2448 2491 60 6.4 155.6 7.5X -Both columns - Parquet MR: DataPageV2 2303 2352 69 6.8 146.4 7.9X -Both columns - ORC Vectorized 385 406 25 40.9 24.5 47.4X -Both columns - ORC MR 2118 2202 120 7.4 134.6 8.6X +Data column - CSV 21951 21976 36 0.7 1395.6 1.0X +Data column - Json 12896 12905 14 1.2 819.9 1.7X +Data column - Parquet Vectorized: DataPageV1 247 307 48 63.6 15.7 88.7X +Data column - Parquet Vectorized: DataPageV2 657 686 25 23.9 41.8 33.4X +Data column - Parquet MR: DataPageV1 2705 2708 3 5.8 172.0 8.1X +Data column - Parquet MR: DataPageV2 2621 2621 0 6.0 166.6 8.4X +Data column - ORC Vectorized 440 468 30 35.7 28.0 49.9X +Data column - ORC MR 2553 2565 17 6.2 162.3 8.6X +Partition column - CSV 6640 6641 1 2.4 422.2 3.3X +Partition column - Json 10499 10512 19 1.5 667.5 2.1X +Partition column - Parquet Vectorized: DataPageV1 60 79 24 261.4 3.8 364.8X +Partition column - Parquet Vectorized: DataPageV2 58 81 26 270.2 3.7 377.0X +Partition column - Parquet MR: DataPageV1 1387 1412 35 11.3 88.2 15.8X +Partition column - Parquet MR: DataPageV2 1383 1407 34 11.4 87.9 15.9X +Partition column - ORC Vectorized 61 85 25 256.8 3.9 358.4X +Partition column - ORC MR 1552 1553 1 10.1 98.7 14.1X +Both columns - CSV 21896 21919 32 0.7 1392.1 1.0X +Both columns - Json 13645 13664 27 1.2 867.5 1.6X +Both columns - Parquet Vectorized: DataPageV1 307 351 33 51.3 19.5 71.6X +Both columns - Parquet Vectorized: DataPageV2 698 740 36 22.5 44.4 31.4X +Both columns - Parquet MR: DataPageV1 2804 2821 24 5.6 178.3 7.8X +Both columns - Parquet MR: DataPageV2 2624 2636 16 6.0 166.8 8.4X +Both columns - ORC Vectorized 462 521 53 34.0 29.4 47.5X +Both columns - ORC MR 2564 2580 22 6.1 163.0 8.6X ================================================================================================ String with Nulls Scan ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (0.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 7966 12723 2892 1.3 759.7 1.0X -SQL Json 9897 10008 157 1.1 943.9 0.8X -SQL Parquet Vectorized: DataPageV1 1176 1264 125 8.9 112.1 6.8X -SQL Parquet Vectorized: DataPageV2 2224 2326 144 4.7 212.1 3.6X -SQL Parquet MR: DataPageV1 3431 3483 73 3.1 327.2 2.3X -SQL Parquet MR: DataPageV2 3845 4043 280 2.7 366.7 2.1X -ParquetReader Vectorized: DataPageV1 1055 1056 2 9.9 100.6 7.6X -ParquetReader Vectorized: DataPageV2 2093 2119 37 5.0 199.6 3.8X -SQL ORC Vectorized 1129 1217 125 9.3 107.7 7.1X -SQL ORC MR 2931 2982 72 3.6 279.5 2.7X - -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +SQL CSV 10818 10826 11 1.0 1031.6 1.0X +SQL Json 10812 10833 29 1.0 1031.2 1.0X +SQL Parquet Vectorized: DataPageV1 1301 1312 15 8.1 124.1 8.3X +SQL Parquet Vectorized: DataPageV2 1953 1982 42 5.4 186.2 5.5X +SQL Parquet MR: DataPageV1 3677 3680 5 2.9 350.6 2.9X +SQL Parquet MR: DataPageV2 3970 3972 2 2.6 378.6 2.7X +ParquetReader Vectorized: DataPageV1 1004 1016 16 10.4 95.8 10.8X +ParquetReader Vectorized: DataPageV2 1606 1622 22 6.5 153.2 6.7X +SQL ORC Vectorized 1160 1182 30 9.0 110.7 9.3X +SQL ORC MR 3266 3330 90 3.2 311.4 3.3X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (50.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 6338 6508 240 1.7 604.4 1.0X -SQL Json 7149 7247 138 1.5 681.8 0.9X -SQL Parquet Vectorized: DataPageV1 937 984 45 11.2 89.3 6.8X -SQL Parquet Vectorized: DataPageV2 1582 1608 37 6.6 150.9 4.0X -SQL Parquet MR: DataPageV1 2525 2721 277 4.2 240.8 2.5X -SQL Parquet MR: DataPageV2 2969 2974 7 3.5 283.1 2.1X -ParquetReader Vectorized: DataPageV1 933 940 12 11.2 88.9 6.8X -ParquetReader Vectorized: DataPageV2 1535 1549 20 6.8 146.4 4.1X -SQL ORC Vectorized 1144 1204 86 9.2 109.1 5.5X -SQL ORC MR 2816 2822 8 3.7 268.6 2.3X - -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +SQL CSV 7971 7981 15 1.3 760.2 1.0X +SQL Json 8266 8269 3 1.3 788.4 1.0X +SQL Parquet Vectorized: DataPageV1 1025 1036 15 10.2 97.8 7.8X +SQL Parquet Vectorized: DataPageV2 1432 1440 11 7.3 136.6 5.6X +SQL Parquet MR: DataPageV1 2792 2806 20 3.8 266.3 2.9X +SQL Parquet MR: DataPageV2 2958 2992 47 3.5 282.1 2.7X +ParquetReader Vectorized: DataPageV1 1010 1024 20 10.4 96.3 7.9X +ParquetReader Vectorized: DataPageV2 1331 1335 4 7.9 127.0 6.0X +SQL ORC Vectorized 1266 1271 6 8.3 120.8 6.3X +SQL ORC MR 3032 3089 81 3.5 289.2 2.6X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (95.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 4443 4504 86 2.4 423.7 1.0X -SQL Json 4528 4563 49 2.3 431.8 1.0X -SQL Parquet Vectorized: DataPageV1 213 233 15 49.2 20.3 20.8X -SQL Parquet Vectorized: DataPageV2 267 294 22 39.3 25.4 16.7X -SQL Parquet MR: DataPageV1 1691 1700 13 6.2 161.2 2.6X -SQL Parquet MR: DataPageV2 1515 1565 70 6.9 144.5 2.9X -ParquetReader Vectorized: DataPageV1 228 231 2 46.0 21.7 19.5X -ParquetReader Vectorized: DataPageV2 285 296 9 36.8 27.1 15.6X -SQL ORC Vectorized 369 425 82 28.4 35.2 12.1X -SQL ORC MR 1457 1463 9 7.2 138.9 3.0X +SQL CSV 5829 5833 5 1.8 555.9 1.0X +SQL Json 4966 4978 17 2.1 473.6 1.2X +SQL Parquet Vectorized: DataPageV1 236 244 7 44.5 22.5 24.7X +SQL Parquet Vectorized: DataPageV2 305 315 13 34.4 29.1 19.1X +SQL Parquet MR: DataPageV1 1777 1784 10 5.9 169.5 3.3X +SQL Parquet MR: DataPageV2 1635 1637 4 6.4 155.9 3.6X +ParquetReader Vectorized: DataPageV1 242 246 2 43.2 23.1 24.0X +ParquetReader Vectorized: DataPageV2 309 313 7 34.0 29.5 18.9X +SQL ORC Vectorized 391 419 53 26.8 37.3 14.9X +SQL ORC MR 1686 1687 1 6.2 160.8 3.5X ================================================================================================ Single Column Scan From Wide Columns ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 10 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 2374 2377 5 0.4 2264.2 1.0X -SQL Json 2693 2726 46 0.4 2568.5 0.9X -SQL Parquet Vectorized: DataPageV1 44 62 16 23.8 42.0 54.0X -SQL Parquet Vectorized: DataPageV2 63 81 21 16.5 60.5 37.5X -SQL Parquet MR: DataPageV1 173 198 27 6.1 164.6 13.8X -SQL Parquet MR: DataPageV2 161 193 30 6.5 153.5 14.8X -SQL ORC Vectorized 53 71 18 19.9 50.2 45.1X -SQL ORC MR 149 182 34 7.0 142.3 15.9X - -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +SQL CSV 2301 2305 6 0.5 2194.0 1.0X +SQL Json 2874 2895 29 0.4 2741.1 0.8X +SQL Parquet Vectorized: DataPageV1 47 66 20 22.3 44.8 48.9X +SQL Parquet Vectorized: DataPageV2 74 90 25 14.2 70.5 31.1X +SQL Parquet MR: DataPageV1 198 219 26 5.3 189.0 11.6X +SQL Parquet MR: DataPageV2 178 207 45 5.9 170.1 12.9X +SQL ORC Vectorized 59 76 20 17.6 56.7 38.7X +SQL ORC MR 173 193 24 6.1 164.6 13.3X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 50 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 5149 5193 62 0.2 4910.9 1.0X -SQL Json 10556 10891 475 0.1 10066.5 0.5X -SQL Parquet Vectorized: DataPageV1 64 96 28 16.3 61.3 80.1X -SQL Parquet Vectorized: DataPageV2 83 106 22 12.6 79.1 62.0X -SQL Parquet MR: DataPageV1 196 232 25 5.3 187.4 26.2X -SQL Parquet MR: DataPageV2 184 221 28 5.7 175.1 28.0X -SQL ORC Vectorized 74 98 31 14.1 70.8 69.3X -SQL ORC MR 182 214 38 5.8 173.9 28.2X - -OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure +SQL CSV 5418 5425 9 0.2 5167.2 1.0X +SQL Json 11463 11574 156 0.1 10932.3 0.5X +SQL Parquet Vectorized: DataPageV1 66 103 28 15.8 63.4 81.5X +SQL Parquet Vectorized: DataPageV2 90 115 27 11.7 85.5 60.4X +SQL Parquet MR: DataPageV1 218 234 23 4.8 208.3 24.8X +SQL Parquet MR: DataPageV2 199 225 29 5.3 190.1 27.2X +SQL ORC Vectorized 76 106 31 13.7 72.8 71.0X +SQL ORC MR 193 216 28 5.4 184.2 28.0X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 100 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 9077 9107 43 0.1 8656.2 1.0X -SQL Json 20131 20886 1067 0.1 19198.5 0.5X -SQL Parquet Vectorized: DataPageV1 93 124 26 11.3 88.8 97.5X -SQL Parquet Vectorized: DataPageV2 103 128 29 10.2 98.5 87.9X -SQL Parquet MR: DataPageV1 218 257 35 4.8 207.6 41.7X -SQL Parquet MR: DataPageV2 213 255 29 4.9 202.7 42.7X -SQL ORC Vectorized 80 95 20 13.0 76.6 112.9X -SQL ORC MR 187 207 20 5.6 178.0 48.6X +SQL CSV 9430 9430 0 0.1 8993.3 1.0X +SQL Json 21268 21347 111 0.0 20283.1 0.4X +SQL Parquet Vectorized: DataPageV1 97 124 24 10.9 92.1 97.6X +SQL Parquet Vectorized: DataPageV2 119 136 19 8.8 113.6 79.2X +SQL Parquet MR: DataPageV1 254 285 35 4.1 242.1 37.1X +SQL Parquet MR: DataPageV2 231 260 30 4.5 220.0 40.9X +SQL ORC Vectorized 95 119 31 11.1 90.4 99.5X +SQL ORC MR 214 219 5 4.9 203.6 44.2X diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-jdk17-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-jdk17-results.txt index ecba57c0c3cc3..8ff176457af10 100644 --- a/sql/core/benchmarks/DataSourceReadBenchmark-jdk17-results.txt +++ b/sql/core/benchmarks/DataSourceReadBenchmark-jdk17-results.txt @@ -2,322 +2,322 @@ SQL Single Numeric Column Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single BOOLEAN Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 15972 16369 561 1.0 1015.5 1.0X -SQL Json 9543 9580 54 1.6 606.7 1.7X -SQL Parquet Vectorized: DataPageV1 115 144 19 136.3 7.3 138.4X -SQL Parquet Vectorized: DataPageV2 95 109 15 165.1 6.1 167.6X -SQL Parquet MR: DataPageV1 2098 2119 30 7.5 133.4 7.6X -SQL Parquet MR: DataPageV2 2007 2012 6 7.8 127.6 8.0X -SQL ORC Vectorized 211 225 16 74.5 13.4 75.7X -SQL ORC MR 2077 2103 36 7.6 132.1 7.7X - -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL CSV 9610 10067 646 1.6 611.0 1.0X +SQL Json 8316 8410 133 1.9 528.7 1.2X +SQL Parquet Vectorized: DataPageV1 123 145 10 127.7 7.8 78.0X +SQL Parquet Vectorized: DataPageV2 93 108 12 170.0 5.9 103.8X +SQL Parquet MR: DataPageV1 1766 1768 2 8.9 112.3 5.4X +SQL Parquet MR: DataPageV2 1540 1543 3 10.2 97.9 6.2X +SQL ORC Vectorized 175 182 6 89.6 11.2 54.8X +SQL ORC MR 1517 1533 22 10.4 96.5 6.3X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single BOOLEAN Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 43 47 2 369.4 2.7 1.0X -ParquetReader Vectorized: DataPageV2 30 34 2 518.5 1.9 1.4X -ParquetReader Vectorized -> Row: DataPageV1 47 50 2 333.6 3.0 0.9X -ParquetReader Vectorized -> Row: DataPageV2 31 35 2 504.8 2.0 1.4X +ParquetReader Vectorized: DataPageV1 61 63 2 256.3 3.9 1.0X +ParquetReader Vectorized: DataPageV2 44 45 2 356.3 2.8 1.4X +ParquetReader Vectorized -> Row: DataPageV1 51 51 1 311.3 3.2 1.2X +ParquetReader Vectorized -> Row: DataPageV2 32 33 2 492.4 2.0 1.9X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 17468 17543 105 0.9 1110.6 1.0X -SQL Json 11059 11065 8 1.4 703.1 1.6X -SQL Parquet Vectorized: DataPageV1 128 142 15 123.1 8.1 136.7X -SQL Parquet Vectorized: DataPageV2 126 141 8 125.2 8.0 139.1X -SQL Parquet MR: DataPageV1 2305 2331 36 6.8 146.5 7.6X -SQL Parquet MR: DataPageV2 2075 2095 28 7.6 131.9 8.4X -SQL ORC Vectorized 172 191 16 91.5 10.9 101.6X -SQL ORC MR 1777 1796 26 8.8 113.0 9.8X - -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL CSV 14866 14885 26 1.1 945.2 1.0X +SQL Json 9585 9586 3 1.6 609.4 1.6X +SQL Parquet Vectorized: DataPageV1 119 131 12 132.4 7.6 125.2X +SQL Parquet Vectorized: DataPageV2 119 125 5 132.0 7.6 124.7X +SQL Parquet MR: DataPageV1 1954 2025 101 8.0 124.2 7.6X +SQL Parquet MR: DataPageV2 1800 1824 35 8.7 114.4 8.3X +SQL ORC Vectorized 169 176 6 93.0 10.8 87.9X +SQL ORC MR 1432 1467 50 11.0 91.0 10.4X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 72 77 5 219.4 4.6 1.0X -ParquetReader Vectorized: DataPageV2 72 77 3 217.9 4.6 1.0X -ParquetReader Vectorized -> Row: DataPageV1 76 83 6 206.6 4.8 0.9X -ParquetReader Vectorized -> Row: DataPageV2 75 80 3 210.3 4.8 1.0X +ParquetReader Vectorized: DataPageV1 118 120 2 133.0 7.5 1.0X +ParquetReader Vectorized: DataPageV2 119 120 2 132.6 7.5 1.0X +ParquetReader Vectorized -> Row: DataPageV1 72 73 2 218.1 4.6 1.6X +ParquetReader Vectorized -> Row: DataPageV2 72 74 2 217.7 4.6 1.6X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 18330 18332 3 0.9 1165.4 1.0X -SQL Json 11383 11429 66 1.4 723.7 1.6X -SQL Parquet Vectorized: DataPageV1 179 197 13 88.0 11.4 102.5X -SQL Parquet Vectorized: DataPageV2 239 263 18 65.7 15.2 76.6X -SQL Parquet MR: DataPageV1 2552 2567 21 6.2 162.3 7.2X -SQL Parquet MR: DataPageV2 2389 2436 67 6.6 151.9 7.7X -SQL ORC Vectorized 246 263 14 64.0 15.6 74.6X -SQL ORC MR 1965 2002 52 8.0 124.9 9.3X - -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL CSV 14601 14699 139 1.1 928.3 1.0X +SQL Json 9446 9517 101 1.7 600.5 1.5X +SQL Parquet Vectorized: DataPageV1 156 168 15 101.1 9.9 93.8X +SQL Parquet Vectorized: DataPageV2 197 213 15 79.6 12.6 73.9X +SQL Parquet MR: DataPageV1 2113 2130 23 7.4 134.4 6.9X +SQL Parquet MR: DataPageV2 1739 1784 64 9.0 110.5 8.4X +SQL ORC Vectorized 192 205 10 81.9 12.2 76.0X +SQL ORC MR 1518 1588 100 10.4 96.5 9.6X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 253 263 11 62.2 16.1 1.0X -ParquetReader Vectorized: DataPageV2 306 317 7 51.4 19.4 0.8X -ParquetReader Vectorized -> Row: DataPageV1 246 250 4 64.0 15.6 1.0X -ParquetReader Vectorized -> Row: DataPageV2 316 321 4 49.8 20.1 0.8X +ParquetReader Vectorized: DataPageV1 215 221 6 73.2 13.7 1.0X +ParquetReader Vectorized: DataPageV2 269 278 8 58.5 17.1 0.8X +ParquetReader Vectorized -> Row: DataPageV1 206 208 2 76.2 13.1 1.0X +ParquetReader Vectorized -> Row: DataPageV2 244 262 10 64.4 15.5 0.9X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 19573 19822 352 0.8 1244.4 1.0X -SQL Json 12141 12217 107 1.3 771.9 1.6X -SQL Parquet Vectorized: DataPageV1 192 222 28 81.8 12.2 101.8X -SQL Parquet Vectorized: DataPageV2 345 373 24 45.6 21.9 56.7X -SQL Parquet MR: DataPageV1 2736 2741 7 5.7 173.9 7.2X -SQL Parquet MR: DataPageV2 2467 2536 97 6.4 156.9 7.9X -SQL ORC Vectorized 332 356 20 47.4 21.1 59.0X -SQL ORC MR 2188 2193 7 7.2 139.1 8.9X - -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL CSV 15886 16086 282 1.0 1010.0 1.0X +SQL Json 9872 9880 12 1.6 627.6 1.6X +SQL Parquet Vectorized: DataPageV1 174 195 22 90.4 11.1 91.3X +SQL Parquet Vectorized: DataPageV2 393 409 16 40.0 25.0 40.4X +SQL Parquet MR: DataPageV1 1953 2064 157 8.1 124.2 8.1X +SQL Parquet MR: DataPageV2 2215 2231 23 7.1 140.8 7.2X +SQL ORC Vectorized 280 314 22 56.1 17.8 56.7X +SQL ORC MR 1681 1706 35 9.4 106.9 9.5X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 291 295 4 54.1 18.5 1.0X -ParquetReader Vectorized: DataPageV2 493 518 39 31.9 31.3 0.6X -ParquetReader Vectorized -> Row: DataPageV1 300 306 8 52.5 19.1 1.0X -ParquetReader Vectorized -> Row: DataPageV2 471 483 11 33.4 30.0 0.6X +ParquetReader Vectorized: DataPageV1 253 263 8 62.1 16.1 1.0X +ParquetReader Vectorized: DataPageV2 450 461 15 34.9 28.6 0.6X +ParquetReader Vectorized -> Row: DataPageV1 241 253 12 65.2 15.3 1.1X +ParquetReader Vectorized -> Row: DataPageV2 437 448 14 36.0 27.8 0.6X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 24692 24718 37 0.6 1569.9 1.0X -SQL Json 14839 14875 50 1.1 943.5 1.7X -SQL Parquet Vectorized: DataPageV1 295 316 29 53.3 18.7 83.7X -SQL Parquet Vectorized: DataPageV2 477 505 24 32.9 30.4 51.7X -SQL Parquet MR: DataPageV1 2841 2981 197 5.5 180.6 8.7X -SQL Parquet MR: DataPageV2 2616 2632 23 6.0 166.3 9.4X -SQL ORC Vectorized 388 403 11 40.5 24.7 63.6X -SQL ORC MR 2274 2372 138 6.9 144.6 10.9X - -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL CSV 20641 20744 145 0.8 1312.3 1.0X +SQL Json 13055 13122 95 1.2 830.0 1.6X +SQL Parquet Vectorized: DataPageV1 246 267 16 63.8 15.7 83.8X +SQL Parquet Vectorized: DataPageV2 513 532 16 30.7 32.6 40.2X +SQL Parquet MR: DataPageV1 2354 2387 47 6.7 149.7 8.8X +SQL Parquet MR: DataPageV2 2118 2148 43 7.4 134.6 9.7X +SQL ORC Vectorized 418 437 17 37.6 26.6 49.4X +SQL ORC MR 1808 1852 61 8.7 115.0 11.4X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 376 387 9 41.9 23.9 1.0X -ParquetReader Vectorized: DataPageV2 585 591 6 26.9 37.2 0.6X -ParquetReader Vectorized -> Row: DataPageV1 377 387 9 41.8 23.9 1.0X -ParquetReader Vectorized -> Row: DataPageV2 576 586 10 27.3 36.6 0.7X +ParquetReader Vectorized: DataPageV1 306 315 5 51.5 19.4 1.0X +ParquetReader Vectorized: DataPageV2 584 591 11 26.9 37.1 0.5X +ParquetReader Vectorized -> Row: DataPageV1 288 299 14 54.6 18.3 1.1X +ParquetReader Vectorized -> Row: DataPageV2 549 557 8 28.6 34.9 0.6X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 20566 20651 119 0.8 1307.6 1.0X -SQL Json 14337 14409 101 1.1 911.5 1.4X -SQL Parquet Vectorized: DataPageV1 154 167 8 101.9 9.8 133.2X -SQL Parquet Vectorized: DataPageV2 157 178 14 99.9 10.0 130.6X -SQL Parquet MR: DataPageV1 2730 2730 1 5.8 173.5 7.5X -SQL Parquet MR: DataPageV2 2459 2491 45 6.4 156.3 8.4X -SQL ORC Vectorized 479 501 15 32.9 30.4 43.0X -SQL ORC MR 2293 2343 71 6.9 145.8 9.0X - -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL CSV 17024 17292 378 0.9 1082.4 1.0X +SQL Json 11724 11904 255 1.3 745.4 1.5X +SQL Parquet Vectorized: DataPageV1 174 186 11 90.6 11.0 98.1X +SQL Parquet Vectorized: DataPageV2 173 189 14 90.9 11.0 98.4X +SQL Parquet MR: DataPageV1 1932 2037 148 8.1 122.9 8.8X +SQL Parquet MR: DataPageV2 1947 1976 41 8.1 123.8 8.7X +SQL ORC Vectorized 432 459 36 36.4 27.5 39.4X +SQL ORC MR 1984 1985 1 7.9 126.1 8.6X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 272 283 9 57.9 17.3 1.0X -ParquetReader Vectorized: DataPageV2 250 288 27 62.9 15.9 1.1X -ParquetReader Vectorized -> Row: DataPageV1 291 301 6 54.1 18.5 0.9X -ParquetReader Vectorized -> Row: DataPageV2 293 305 14 53.6 18.6 0.9X +ParquetReader Vectorized: DataPageV1 257 259 2 61.2 16.3 1.0X +ParquetReader Vectorized: DataPageV2 239 254 10 65.8 15.2 1.1X +ParquetReader Vectorized -> Row: DataPageV1 259 260 1 60.8 16.4 1.0X +ParquetReader Vectorized -> Row: DataPageV2 258 262 6 61.0 16.4 1.0X -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 25753 25874 171 0.6 1637.3 1.0X -SQL Json 19097 19391 416 0.8 1214.2 1.3X -SQL Parquet Vectorized: DataPageV1 273 288 11 57.6 17.4 94.3X -SQL Parquet Vectorized: DataPageV2 240 277 25 65.5 15.3 107.3X -SQL Parquet MR: DataPageV1 2969 3042 103 5.3 188.8 8.7X -SQL Parquet MR: DataPageV2 2692 2747 78 5.8 171.1 9.6X -SQL ORC Vectorized 601 626 20 26.2 38.2 42.8X -SQL ORC MR 2458 2467 13 6.4 156.3 10.5X - -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL CSV 22592 22594 4 0.7 1436.3 1.0X +SQL Json 16252 16271 26 1.0 1033.3 1.4X +SQL Parquet Vectorized: DataPageV1 247 271 22 63.6 15.7 91.3X +SQL Parquet Vectorized: DataPageV2 252 266 14 62.4 16.0 89.6X +SQL Parquet MR: DataPageV1 2337 2352 21 6.7 148.6 9.7X +SQL Parquet MR: DataPageV2 2187 2223 50 7.2 139.1 10.3X +SQL ORC Vectorized 489 526 25 32.2 31.1 46.2X +SQL ORC MR 1816 1892 107 8.7 115.5 12.4X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 354 363 7 44.4 22.5 1.0X -ParquetReader Vectorized: DataPageV2 345 359 12 45.5 22.0 1.0X -ParquetReader Vectorized -> Row: DataPageV1 337 345 8 46.7 21.4 1.1X -ParquetReader Vectorized -> Row: DataPageV2 335 364 21 46.9 21.3 1.1X +ParquetReader Vectorized: DataPageV1 291 304 8 54.0 18.5 1.0X +ParquetReader Vectorized: DataPageV2 298 309 7 52.9 18.9 1.0X +ParquetReader Vectorized -> Row: DataPageV1 330 338 16 47.7 21.0 0.9X +ParquetReader Vectorized -> Row: DataPageV2 331 338 12 47.5 21.1 0.9X ================================================================================================ Int and String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Int and String Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 18074 18101 37 0.6 1723.7 1.0X -SQL Json 13211 13214 5 0.8 1259.9 1.4X -SQL Parquet Vectorized: DataPageV1 2249 2286 53 4.7 214.5 8.0X -SQL Parquet Vectorized: DataPageV2 2804 2818 20 3.7 267.4 6.4X -SQL Parquet MR: DataPageV1 4708 4779 100 2.2 449.0 3.8X -SQL Parquet MR: DataPageV2 4868 5046 251 2.2 464.3 3.7X -SQL ORC Vectorized 2145 2160 20 4.9 204.6 8.4X -SQL ORC MR 4180 4308 182 2.5 398.6 4.3X +SQL CSV 14365 14780 587 0.7 1369.9 1.0X +SQL Json 10718 10772 76 1.0 1022.2 1.3X +SQL Parquet Vectorized: DataPageV1 1932 1988 80 5.4 184.2 7.4X +SQL Parquet Vectorized: DataPageV2 2298 2317 27 4.6 219.2 6.2X +SQL Parquet MR: DataPageV1 3829 3957 181 2.7 365.1 3.8X +SQL Parquet MR: DataPageV2 4176 4208 46 2.5 398.3 3.4X +SQL ORC Vectorized 2026 2046 28 5.2 193.2 7.1X +SQL ORC MR 3566 3580 21 2.9 340.0 4.0X ================================================================================================ Repeated String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Repeated String: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 11320 11376 78 0.9 1079.6 1.0X -SQL Json 7593 7664 101 1.4 724.1 1.5X -SQL Parquet Vectorized: DataPageV1 633 639 9 16.6 60.3 17.9X -SQL Parquet Vectorized: DataPageV2 621 644 20 16.9 59.2 18.2X -SQL Parquet MR: DataPageV1 2111 2157 65 5.0 201.3 5.4X -SQL Parquet MR: DataPageV2 2018 2064 65 5.2 192.4 5.6X -SQL ORC Vectorized 505 540 36 20.8 48.2 22.4X -SQL ORC MR 2302 2360 82 4.6 219.5 4.9X +SQL CSV 9372 9373 1 1.1 893.8 1.0X +SQL Json 6862 6865 4 1.5 654.4 1.4X +SQL Parquet Vectorized: DataPageV1 606 613 8 17.3 57.8 15.5X +SQL Parquet Vectorized: DataPageV2 611 615 3 17.2 58.3 15.3X +SQL Parquet MR: DataPageV1 1713 1721 11 6.1 163.3 5.5X +SQL Parquet MR: DataPageV2 1721 1724 4 6.1 164.1 5.4X +SQL ORC Vectorized 467 469 2 22.5 44.5 20.1X +SQL ORC MR 1816 1818 2 5.8 173.2 5.2X ================================================================================================ Partitioned Table Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Partitioned Table: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------------- -Data column - CSV 24867 25261 556 0.6 1581.0 1.0X -Data column - Json 13937 13987 70 1.1 886.1 1.8X -Data column - Parquet Vectorized: DataPageV1 252 264 8 62.3 16.0 98.5X -Data column - Parquet Vectorized: DataPageV2 547 560 13 28.8 34.7 45.5X -Data column - Parquet MR: DataPageV1 3492 3509 25 4.5 222.0 7.1X -Data column - Parquet MR: DataPageV2 3148 3208 84 5.0 200.2 7.9X -Data column - ORC Vectorized 493 512 21 31.9 31.3 50.5X -Data column - ORC MR 2925 2943 26 5.4 185.9 8.5X -Partition column - CSV 7847 7851 5 2.0 498.9 3.2X -Partition column - Json 11759 11908 210 1.3 747.6 2.1X -Partition column - Parquet Vectorized: DataPageV1 60 67 7 262.3 3.8 414.7X -Partition column - Parquet Vectorized: DataPageV2 57 65 9 274.2 3.6 433.5X -Partition column - Parquet MR: DataPageV1 1762 1768 8 8.9 112.1 14.1X -Partition column - Parquet MR: DataPageV2 1742 1783 59 9.0 110.7 14.3X -Partition column - ORC Vectorized 59 71 7 265.6 3.8 419.9X -Partition column - ORC MR 1743 1764 29 9.0 110.8 14.3X -Both columns - CSV 25859 25924 92 0.6 1644.1 1.0X -Both columns - Json 14693 14764 101 1.1 934.2 1.7X -Both columns - Parquet Vectorized: DataPageV1 341 395 66 46.2 21.7 73.0X -Both columns - Parquet Vectorized: DataPageV2 624 643 13 25.2 39.7 39.9X -Both columns - Parquet MR: DataPageV1 3541 3611 99 4.4 225.2 7.0X -Both columns - Parquet MR: DataPageV2 3279 3301 32 4.8 208.4 7.6X -Both columns - ORC Vectorized 434 483 40 36.2 27.6 57.3X -Both columns - ORC MR 2946 2964 26 5.3 187.3 8.4X +Data column - CSV 21799 22053 360 0.7 1385.9 1.0X +Data column - Json 12978 12985 10 1.2 825.1 1.7X +Data column - Parquet Vectorized: DataPageV1 261 277 15 60.4 16.6 83.7X +Data column - Parquet Vectorized: DataPageV2 601 647 42 26.2 38.2 36.3X +Data column - Parquet MR: DataPageV1 2796 2798 2 5.6 177.8 7.8X +Data column - Parquet MR: DataPageV2 2595 2626 43 6.1 165.0 8.4X +Data column - ORC Vectorized 428 449 25 36.8 27.2 50.9X +Data column - ORC MR 2162 2274 159 7.3 137.5 10.1X +Partition column - CSV 5804 5922 167 2.7 369.0 3.8X +Partition column - Json 10410 10455 64 1.5 661.8 2.1X +Partition column - Parquet Vectorized: DataPageV1 56 60 6 280.9 3.6 389.3X +Partition column - Parquet Vectorized: DataPageV2 55 59 5 286.5 3.5 397.1X +Partition column - Parquet MR: DataPageV1 1357 1357 1 11.6 86.3 16.1X +Partition column - Parquet MR: DataPageV2 1339 1339 0 11.7 85.1 16.3X +Partition column - ORC Vectorized 57 61 5 276.3 3.6 382.9X +Partition column - ORC MR 1346 1351 7 11.7 85.6 16.2X +Both columns - CSV 20812 21349 759 0.8 1323.2 1.0X +Both columns - Json 13061 13372 440 1.2 830.4 1.7X +Both columns - Parquet Vectorized: DataPageV1 265 275 6 59.3 16.9 82.1X +Both columns - Parquet Vectorized: DataPageV2 619 637 20 25.4 39.4 35.2X +Both columns - Parquet MR: DataPageV1 2827 2830 4 5.6 179.8 7.7X +Both columns - Parquet MR: DataPageV2 2593 2603 14 6.1 164.8 8.4X +Both columns - ORC Vectorized 391 432 37 40.2 24.9 55.7X +Both columns - ORC MR 2438 2455 25 6.5 155.0 8.9X ================================================================================================ String with Nulls Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (0.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 13698 13783 121 0.8 1306.3 1.0X -SQL Json 11030 11144 161 1.0 1051.9 1.2X -SQL Parquet Vectorized: DataPageV1 1695 1699 7 6.2 161.6 8.1X -SQL Parquet Vectorized: DataPageV2 2740 2744 5 3.8 261.3 5.0X -SQL Parquet MR: DataPageV1 4547 4594 66 2.3 433.7 3.0X -SQL Parquet MR: DataPageV2 5382 5455 103 1.9 513.3 2.5X -ParquetReader Vectorized: DataPageV1 1238 1238 0 8.5 118.0 11.1X -ParquetReader Vectorized: DataPageV2 2312 2325 19 4.5 220.5 5.9X -SQL ORC Vectorized 1134 1147 18 9.2 108.2 12.1X -SQL ORC MR 3966 4015 69 2.6 378.2 3.5X - -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL CSV 10697 10736 56 1.0 1020.1 1.0X +SQL Json 9722 9963 341 1.1 927.2 1.1X +SQL Parquet Vectorized: DataPageV1 1337 1342 6 7.8 127.6 8.0X +SQL Parquet Vectorized: DataPageV2 1731 1757 38 6.1 165.1 6.2X +SQL Parquet MR: DataPageV1 3581 3584 4 2.9 341.5 3.0X +SQL Parquet MR: DataPageV2 3996 4001 7 2.6 381.1 2.7X +ParquetReader Vectorized: DataPageV1 1006 1015 13 10.4 96.0 10.6X +ParquetReader Vectorized: DataPageV2 1476 1477 2 7.1 140.7 7.2X +SQL ORC Vectorized 957 1042 120 11.0 91.3 11.2X +SQL ORC MR 3060 3068 11 3.4 291.8 3.5X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (50.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 10613 10658 64 1.0 1012.1 1.0X -SQL Json 8973 8996 33 1.2 855.7 1.2X -SQL Parquet Vectorized: DataPageV1 1208 1221 18 8.7 115.2 8.8X -SQL Parquet Vectorized: DataPageV2 1949 1950 1 5.4 185.9 5.4X -SQL Parquet MR: DataPageV1 3701 3716 21 2.8 353.0 2.9X -SQL Parquet MR: DataPageV2 4150 4192 60 2.5 395.8 2.6X -ParquetReader Vectorized: DataPageV1 1191 1192 1 8.8 113.6 8.9X -ParquetReader Vectorized: DataPageV2 1874 1917 61 5.6 178.7 5.7X -SQL ORC Vectorized 1338 1365 38 7.8 127.6 7.9X -SQL ORC MR 3659 3674 21 2.9 349.0 2.9X - -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL CSV 7299 7300 1 1.4 696.1 1.0X +SQL Json 7453 7659 292 1.4 710.8 1.0X +SQL Parquet Vectorized: DataPageV1 896 916 32 11.7 85.4 8.1X +SQL Parquet Vectorized: DataPageV2 1282 1283 1 8.2 122.3 5.7X +SQL Parquet MR: DataPageV1 2586 2678 130 4.1 246.6 2.8X +SQL Parquet MR: DataPageV2 3061 3066 6 3.4 291.9 2.4X +ParquetReader Vectorized: DataPageV1 913 915 3 11.5 87.0 8.0X +ParquetReader Vectorized: DataPageV2 1181 1183 3 8.9 112.6 6.2X +SQL ORC Vectorized 1102 1111 13 9.5 105.1 6.6X +SQL ORC MR 2916 3002 121 3.6 278.1 2.5X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (95.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 8714 8809 134 1.2 831.0 1.0X -SQL Json 5801 5819 25 1.8 553.2 1.5X -SQL Parquet Vectorized: DataPageV1 297 316 11 35.3 28.3 29.3X -SQL Parquet Vectorized: DataPageV2 363 382 12 28.9 34.6 24.0X -SQL Parquet MR: DataPageV1 2350 2366 22 4.5 224.1 3.7X -SQL Parquet MR: DataPageV2 2132 2183 73 4.9 203.3 4.1X -ParquetReader Vectorized: DataPageV1 296 310 13 35.4 28.2 29.4X -ParquetReader Vectorized: DataPageV2 368 372 3 28.5 35.1 23.7X -SQL ORC Vectorized 474 487 10 22.1 45.2 18.4X -SQL ORC MR 2025 2031 9 5.2 193.1 4.3X +SQL CSV 4615 4619 6 2.3 440.1 1.0X +SQL Json 4926 4927 1 2.1 469.8 0.9X +SQL Parquet Vectorized: DataPageV1 240 246 5 43.8 22.9 19.3X +SQL Parquet Vectorized: DataPageV2 287 295 4 36.5 27.4 16.1X +SQL Parquet MR: DataPageV1 1774 1781 10 5.9 169.2 2.6X +SQL Parquet MR: DataPageV2 1772 1773 1 5.9 169.0 2.6X +ParquetReader Vectorized: DataPageV1 238 240 2 44.0 22.7 19.4X +ParquetReader Vectorized: DataPageV2 285 288 3 36.8 27.2 16.2X +SQL ORC Vectorized 382 392 6 27.4 36.5 12.1X +SQL ORC MR 1616 1617 2 6.5 154.1 2.9X ================================================================================================ Single Column Scan From Wide Columns ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 10 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 2677 2687 14 0.4 2553.2 1.0X -SQL Json 3581 3588 10 0.3 3414.8 0.7X -SQL Parquet Vectorized: DataPageV1 52 59 7 20.2 49.6 51.5X -SQL Parquet Vectorized: DataPageV2 68 75 7 15.4 65.0 39.3X -SQL Parquet MR: DataPageV1 245 257 9 4.3 233.6 10.9X -SQL Parquet MR: DataPageV2 224 237 8 4.7 213.7 11.9X -SQL ORC Vectorized 64 70 5 16.3 61.3 41.7X -SQL ORC MR 208 216 8 5.0 198.2 12.9X - -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL CSV 2051 2052 2 0.5 1956.1 1.0X +SQL Json 3230 3232 3 0.3 3080.6 0.6X +SQL Parquet Vectorized: DataPageV1 45 50 7 23.2 43.2 45.3X +SQL Parquet Vectorized: DataPageV2 67 72 8 15.6 64.1 30.5X +SQL Parquet MR: DataPageV1 191 198 8 5.5 181.9 10.8X +SQL Parquet MR: DataPageV2 176 181 6 6.0 167.7 11.7X +SQL ORC Vectorized 55 60 6 19.0 52.7 37.1X +SQL ORC MR 164 168 4 6.4 156.1 12.5X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 50 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 5753 5771 25 0.2 5486.7 1.0X -SQL Json 13801 13851 71 0.1 13161.9 0.4X -SQL Parquet Vectorized: DataPageV1 75 83 9 14.1 71.1 77.2X -SQL Parquet Vectorized: DataPageV2 84 93 7 12.4 80.6 68.1X -SQL Parquet MR: DataPageV1 269 280 7 3.9 256.5 21.4X -SQL Parquet MR: DataPageV2 251 258 8 4.2 238.9 23.0X -SQL ORC Vectorized 82 88 6 12.8 78.3 70.1X -SQL ORC MR 223 239 8 4.7 213.0 25.8X - -OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure -Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL CSV 4530 4530 0 0.2 4320.0 1.0X +SQL Json 12530 12536 9 0.1 11949.2 0.4X +SQL Parquet Vectorized: DataPageV1 60 65 6 17.4 57.6 75.0X +SQL Parquet Vectorized: DataPageV2 83 91 8 12.6 79.1 54.6X +SQL Parquet MR: DataPageV1 211 216 7 5.0 201.2 21.5X +SQL Parquet MR: DataPageV2 195 204 12 5.4 186.0 23.2X +SQL ORC Vectorized 70 75 5 14.9 67.1 64.4X +SQL ORC MR 182 191 11 5.8 173.5 24.9X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 100 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 9487 9503 24 0.1 9047.1 1.0X -SQL Json 26109 26240 186 0.0 24899.2 0.4X -SQL Parquet Vectorized: DataPageV1 100 110 10 10.4 95.8 94.5X -SQL Parquet Vectorized: DataPageV2 113 119 6 9.3 107.3 84.3X -SQL Parquet MR: DataPageV1 280 296 11 3.7 267.2 33.9X -SQL Parquet MR: DataPageV2 281 321 68 3.7 268.0 33.8X -SQL ORC Vectorized 92 101 8 11.4 87.5 103.4X -SQL ORC MR 228 245 10 4.6 217.7 41.6X +SQL CSV 7758 7763 7 0.1 7398.8 1.0X +SQL Json 24530 24546 23 0.0 23393.2 0.3X +SQL Parquet Vectorized: DataPageV1 91 96 6 11.5 87.1 84.9X +SQL Parquet Vectorized: DataPageV2 113 118 6 9.2 108.1 68.4X +SQL Parquet MR: DataPageV1 246 254 8 4.3 234.2 31.6X +SQL Parquet MR: DataPageV2 229 235 6 4.6 218.7 33.8X +SQL ORC Vectorized 88 92 6 11.9 83.8 88.3X +SQL ORC MR 205 214 9 5.1 195.2 37.9X diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt index 6a2b6bfb4a0a8..1a7ebe51057be 100644 --- a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt +++ b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt @@ -2,322 +2,322 @@ SQL Single Numeric Column Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single BOOLEAN Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 11570 12144 812 1.4 735.6 1.0X -SQL Json 7542 7568 37 2.1 479.5 1.5X -SQL Parquet Vectorized: DataPageV1 129 144 16 121.9 8.2 89.7X -SQL Parquet Vectorized: DataPageV2 92 106 20 170.3 5.9 125.2X -SQL Parquet MR: DataPageV1 1416 1419 3 11.1 90.0 8.2X -SQL Parquet MR: DataPageV2 1281 1359 110 12.3 81.4 9.0X -SQL ORC Vectorized 161 176 10 97.4 10.3 71.6X -SQL ORC MR 1525 1545 29 10.3 96.9 7.6X - -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +SQL CSV 12972 13210 337 1.2 824.8 1.0X +SQL Json 7440 7634 275 2.1 473.0 1.7X +SQL Parquet Vectorized: DataPageV1 125 137 10 125.8 8.0 103.7X +SQL Parquet Vectorized: DataPageV2 93 103 20 168.4 5.9 138.9X +SQL Parquet MR: DataPageV1 1621 1657 52 9.7 103.0 8.0X +SQL Parquet MR: DataPageV2 1396 1420 34 11.3 88.7 9.3X +SQL ORC Vectorized 178 186 16 88.5 11.3 73.0X +SQL ORC MR 1501 1503 4 10.5 95.4 8.6X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single BOOLEAN Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 111 118 6 142.3 7.0 1.0X -ParquetReader Vectorized: DataPageV2 116 117 2 135.7 7.4 1.0X -ParquetReader Vectorized -> Row: DataPageV1 48 49 1 324.9 3.1 2.3X -ParquetReader Vectorized -> Row: DataPageV2 39 39 1 405.8 2.5 2.9X +ParquetReader Vectorized: DataPageV1 132 134 4 119.3 8.4 1.0X +ParquetReader Vectorized: DataPageV2 115 117 3 136.7 7.3 1.1X +ParquetReader Vectorized -> Row: DataPageV1 57 58 1 275.1 3.6 2.3X +ParquetReader Vectorized -> Row: DataPageV2 41 41 1 387.9 2.6 3.3X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 13807 14535 1030 1.1 877.8 1.0X -SQL Json 8079 8094 21 1.9 513.6 1.7X -SQL Parquet Vectorized: DataPageV1 139 152 12 113.0 8.9 99.2X -SQL Parquet Vectorized: DataPageV2 140 147 5 112.5 8.9 98.7X -SQL Parquet MR: DataPageV1 1637 1741 148 9.6 104.1 8.4X -SQL Parquet MR: DataPageV2 1522 1636 161 10.3 96.8 9.1X -SQL ORC Vectorized 147 160 10 106.9 9.4 93.8X -SQL ORC MR 1542 1545 4 10.2 98.1 9.0X - -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +SQL CSV 15808 15867 83 1.0 1005.0 1.0X +SQL Json 9119 9174 78 1.7 579.8 1.7X +SQL Parquet Vectorized: DataPageV1 157 163 7 100.2 10.0 100.7X +SQL Parquet Vectorized: DataPageV2 156 161 5 100.6 9.9 101.1X +SQL Parquet MR: DataPageV1 1846 1871 36 8.5 117.4 8.6X +SQL Parquet MR: DataPageV2 1702 1707 7 9.2 108.2 9.3X +SQL ORC Vectorized 130 134 2 120.7 8.3 121.3X +SQL ORC MR 1536 1542 9 10.2 97.7 10.3X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 166 171 8 94.7 10.6 1.0X -ParquetReader Vectorized: DataPageV2 166 169 4 94.7 10.6 1.0X -ParquetReader Vectorized -> Row: DataPageV1 156 157 2 100.7 9.9 1.1X -ParquetReader Vectorized -> Row: DataPageV2 156 157 2 100.7 9.9 1.1X +ParquetReader Vectorized: DataPageV1 198 202 5 79.3 12.6 1.0X +ParquetReader Vectorized: DataPageV2 197 199 3 79.8 12.5 1.0X +ParquetReader Vectorized -> Row: DataPageV1 188 190 3 83.4 12.0 1.1X +ParquetReader Vectorized -> Row: DataPageV2 188 190 3 83.5 12.0 1.1X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 15327 15421 133 1.0 974.5 1.0X -SQL Json 8564 8799 332 1.8 544.5 1.8X -SQL Parquet Vectorized: DataPageV1 202 219 11 77.8 12.8 75.8X -SQL Parquet Vectorized: DataPageV2 203 210 8 77.7 12.9 75.7X -SQL Parquet MR: DataPageV1 1874 2004 183 8.4 119.2 8.2X -SQL Parquet MR: DataPageV2 1606 1709 146 9.8 102.1 9.5X -SQL ORC Vectorized 167 179 10 94.1 10.6 91.7X -SQL ORC MR 1404 1408 6 11.2 89.3 10.9X - -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +SQL CSV 16474 16493 27 1.0 1047.4 1.0X +SQL Json 9477 9478 1 1.7 602.6 1.7X +SQL Parquet Vectorized: DataPageV1 211 216 7 74.4 13.4 77.9X +SQL Parquet Vectorized: DataPageV2 215 221 5 73.0 13.7 76.5X +SQL Parquet MR: DataPageV1 2114 2133 28 7.4 134.4 7.8X +SQL Parquet MR: DataPageV2 1792 1808 22 8.8 113.9 9.2X +SQL ORC Vectorized 179 182 4 88.0 11.4 92.2X +SQL ORC MR 1586 1588 2 9.9 100.8 10.4X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 222 236 13 70.7 14.1 1.0X -ParquetReader Vectorized: DataPageV2 259 268 14 60.8 16.5 0.9X -ParquetReader Vectorized -> Row: DataPageV1 228 248 11 68.9 14.5 1.0X -ParquetReader Vectorized -> Row: DataPageV2 264 293 13 59.5 16.8 0.8X +ParquetReader Vectorized: DataPageV1 254 257 5 62.0 16.1 1.0X +ParquetReader Vectorized: DataPageV2 299 302 4 52.6 19.0 0.8X +ParquetReader Vectorized -> Row: DataPageV1 236 238 4 66.7 15.0 1.1X +ParquetReader Vectorized -> Row: DataPageV2 281 283 4 56.0 17.9 0.9X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 17479 17651 243 0.9 1111.3 1.0X -SQL Json 9565 9582 25 1.6 608.1 1.8X -SQL Parquet Vectorized: DataPageV1 152 159 8 103.2 9.7 114.7X -SQL Parquet Vectorized: DataPageV2 290 308 18 54.2 18.4 60.3X -SQL Parquet MR: DataPageV1 1861 1980 169 8.5 118.3 9.4X -SQL Parquet MR: DataPageV2 1647 1748 142 9.5 104.7 10.6X -SQL ORC Vectorized 230 251 12 68.3 14.6 75.9X -SQL ORC MR 1645 1648 3 9.6 104.6 10.6X - -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +SQL CSV 18049 18086 52 0.9 1147.5 1.0X +SQL Json 10073 10074 1 1.6 640.4 1.8X +SQL Parquet Vectorized: DataPageV1 177 184 9 89.1 11.2 102.3X +SQL Parquet Vectorized: DataPageV2 301 306 6 52.2 19.1 59.9X +SQL Parquet MR: DataPageV1 2120 2134 21 7.4 134.8 8.5X +SQL Parquet MR: DataPageV2 1855 1893 54 8.5 117.9 9.7X +SQL ORC Vectorized 246 249 1 63.8 15.7 73.2X +SQL ORC MR 1655 1660 6 9.5 105.2 10.9X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 208 213 9 75.7 13.2 1.0X -ParquetReader Vectorized: DataPageV2 355 382 14 44.3 22.6 0.6X -ParquetReader Vectorized -> Row: DataPageV1 212 233 8 74.1 13.5 1.0X -ParquetReader Vectorized -> Row: DataPageV2 350 353 7 45.0 22.2 0.6X +ParquetReader Vectorized: DataPageV1 239 243 5 65.8 15.2 1.0X +ParquetReader Vectorized: DataPageV2 384 387 4 40.9 24.4 0.6X +ParquetReader Vectorized -> Row: DataPageV1 223 224 3 70.7 14.2 1.1X +ParquetReader Vectorized -> Row: DataPageV2 366 370 7 43.0 23.3 0.7X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 21825 21944 169 0.7 1387.6 1.0X -SQL Json 11877 11927 71 1.3 755.1 1.8X -SQL Parquet Vectorized: DataPageV1 229 242 18 68.8 14.5 95.5X -SQL Parquet Vectorized: DataPageV2 435 452 23 36.1 27.7 50.1X -SQL Parquet MR: DataPageV1 2050 2184 190 7.7 130.3 10.6X -SQL Parquet MR: DataPageV2 1829 1927 138 8.6 116.3 11.9X -SQL ORC Vectorized 287 308 14 54.8 18.3 76.0X -SQL ORC MR 1579 1603 34 10.0 100.4 13.8X - -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +SQL CSV 22703 22737 48 0.7 1443.4 1.0X +SQL Json 12723 12743 28 1.2 808.9 1.8X +SQL Parquet Vectorized: DataPageV1 228 261 76 69.1 14.5 99.7X +SQL Parquet Vectorized: DataPageV2 465 472 7 33.8 29.5 48.9X +SQL Parquet MR: DataPageV1 2166 2168 3 7.3 137.7 10.5X +SQL Parquet MR: DataPageV2 1921 1936 21 8.2 122.1 11.8X +SQL ORC Vectorized 307 313 10 51.2 19.5 73.9X +SQL ORC MR 1730 1745 21 9.1 110.0 13.1X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 299 341 86 52.6 19.0 1.0X -ParquetReader Vectorized: DataPageV2 551 607 110 28.5 35.1 0.5X -ParquetReader Vectorized -> Row: DataPageV1 341 344 4 46.2 21.7 0.9X -ParquetReader Vectorized -> Row: DataPageV2 508 557 33 31.0 32.3 0.6X +ParquetReader Vectorized: DataPageV1 309 316 10 51.0 19.6 1.0X +ParquetReader Vectorized: DataPageV2 559 563 5 28.1 35.5 0.6X +ParquetReader Vectorized -> Row: DataPageV1 292 296 6 53.9 18.6 1.1X +ParquetReader Vectorized -> Row: DataPageV2 541 547 8 29.1 34.4 0.6X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 17585 17926 482 0.9 1118.0 1.0X -SQL Json 11927 12180 357 1.3 758.3 1.5X -SQL Parquet Vectorized: DataPageV1 150 161 11 104.6 9.6 116.9X -SQL Parquet Vectorized: DataPageV2 150 160 8 104.7 9.5 117.1X -SQL Parquet MR: DataPageV1 1830 1867 52 8.6 116.4 9.6X -SQL Parquet MR: DataPageV2 1715 1828 160 9.2 109.1 10.3X -SQL ORC Vectorized 328 358 15 48.0 20.8 53.6X -SQL ORC MR 1584 1687 145 9.9 100.7 11.1X - -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +SQL CSV 18790 18808 25 0.8 1194.6 1.0X +SQL Json 11572 11579 10 1.4 735.7 1.6X +SQL Parquet Vectorized: DataPageV1 155 158 5 101.7 9.8 121.6X +SQL Parquet Vectorized: DataPageV2 158 162 6 99.6 10.0 119.0X +SQL Parquet MR: DataPageV1 2041 2050 12 7.7 129.8 9.2X +SQL Parquet MR: DataPageV2 1903 1905 3 8.3 121.0 9.9X +SQL ORC Vectorized 357 359 2 44.1 22.7 52.7X +SQL ORC MR 1745 1755 15 9.0 110.9 10.8X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 207 211 8 76.0 13.2 1.0X -ParquetReader Vectorized: DataPageV2 207 220 11 75.8 13.2 1.0X -ParquetReader Vectorized -> Row: DataPageV1 208 214 9 75.7 13.2 1.0X -ParquetReader Vectorized -> Row: DataPageV2 208 213 9 75.6 13.2 1.0X +ParquetReader Vectorized: DataPageV1 239 243 4 65.7 15.2 1.0X +ParquetReader Vectorized: DataPageV2 240 243 4 65.7 15.2 1.0X +ParquetReader Vectorized -> Row: DataPageV1 221 225 4 71.1 14.1 1.1X +ParquetReader Vectorized -> Row: DataPageV2 223 225 4 70.6 14.2 1.1X -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz SQL Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 22569 22614 63 0.7 1434.9 1.0X -SQL Json 15590 15600 15 1.0 991.2 1.4X -SQL Parquet Vectorized: DataPageV1 225 241 17 69.9 14.3 100.3X -SQL Parquet Vectorized: DataPageV2 219 236 13 72.0 13.9 103.3X -SQL Parquet MR: DataPageV1 2013 2109 136 7.8 128.0 11.2X -SQL Parquet MR: DataPageV2 1850 1967 165 8.5 117.6 12.2X -SQL ORC Vectorized 396 416 25 39.7 25.2 56.9X -SQL ORC MR 1707 1763 79 9.2 108.5 13.2X - -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +SQL CSV 23476 23478 3 0.7 1492.6 1.0X +SQL Json 14568 15103 757 1.1 926.2 1.6X +SQL Parquet Vectorized: DataPageV1 212 230 16 74.2 13.5 110.7X +SQL Parquet Vectorized: DataPageV2 209 218 8 75.4 13.3 112.5X +SQL Parquet MR: DataPageV1 1943 2080 194 8.1 123.5 12.1X +SQL Parquet MR: DataPageV2 1824 1830 9 8.6 116.0 12.9X +SQL ORC Vectorized 395 419 20 39.9 25.1 59.5X +SQL ORC MR 1844 1855 15 8.5 117.2 12.7X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Parquet Reader Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 280 298 13 56.2 17.8 1.0X -ParquetReader Vectorized: DataPageV2 278 300 21 56.6 17.7 1.0X -ParquetReader Vectorized -> Row: DataPageV1 280 299 13 56.2 17.8 1.0X -ParquetReader Vectorized -> Row: DataPageV2 304 307 4 51.8 19.3 0.9X +ParquetReader Vectorized: DataPageV1 280 322 88 56.1 17.8 1.0X +ParquetReader Vectorized: DataPageV2 282 301 19 55.8 17.9 1.0X +ParquetReader Vectorized -> Row: DataPageV1 284 290 4 55.3 18.1 1.0X +ParquetReader Vectorized -> Row: DataPageV2 287 293 9 54.8 18.3 1.0X ================================================================================================ Int and String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Int and String Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 15548 16002 641 0.7 1482.8 1.0X -SQL Json 10801 11108 434 1.0 1030.1 1.4X -SQL Parquet Vectorized: DataPageV1 1858 1966 152 5.6 177.2 8.4X -SQL Parquet Vectorized: DataPageV2 2342 2466 175 4.5 223.4 6.6X -SQL Parquet MR: DataPageV1 3873 3908 49 2.7 369.4 4.0X -SQL Parquet MR: DataPageV2 3764 3869 148 2.8 358.9 4.1X -SQL ORC Vectorized 2018 2020 3 5.2 192.5 7.7X -SQL ORC MR 3247 3450 287 3.2 309.7 4.8X +SQL CSV 14663 15652 1399 0.7 1398.4 1.0X +SQL Json 10757 10845 125 1.0 1025.9 1.4X +SQL Parquet Vectorized: DataPageV1 1815 1933 166 5.8 173.1 8.1X +SQL Parquet Vectorized: DataPageV2 2244 2297 75 4.7 214.0 6.5X +SQL Parquet MR: DataPageV1 3491 3685 273 3.0 333.0 4.2X +SQL Parquet MR: DataPageV2 3600 3627 37 2.9 343.4 4.1X +SQL ORC Vectorized 1804 1895 129 5.8 172.0 8.1X +SQL ORC MR 3181 3379 280 3.3 303.4 4.6X ================================================================================================ Repeated String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Repeated String: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 8028 8337 436 1.3 765.6 1.0X -SQL Json 6362 6488 178 1.6 606.7 1.3X -SQL Parquet Vectorized: DataPageV1 642 673 51 16.3 61.3 12.5X -SQL Parquet Vectorized: DataPageV2 646 678 40 16.2 61.6 12.4X -SQL Parquet MR: DataPageV1 1504 1604 141 7.0 143.5 5.3X -SQL Parquet MR: DataPageV2 1645 1646 1 6.4 156.9 4.9X -SQL ORC Vectorized 386 415 25 27.2 36.8 20.8X -SQL ORC MR 1704 1730 37 6.2 162.5 4.7X +SQL CSV 8466 8778 441 1.2 807.4 1.0X +SQL Json 6389 6454 93 1.6 609.3 1.3X +SQL Parquet Vectorized: DataPageV1 644 675 52 16.3 61.4 13.1X +SQL Parquet Vectorized: DataPageV2 640 668 44 16.4 61.0 13.2X +SQL Parquet MR: DataPageV1 1579 1602 33 6.6 150.6 5.4X +SQL Parquet MR: DataPageV2 1536 1539 4 6.8 146.5 5.5X +SQL ORC Vectorized 439 443 4 23.9 41.9 19.3X +SQL ORC MR 1787 1806 27 5.9 170.5 4.7X ================================================================================================ Partitioned Table Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Partitioned Table: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------------- -Data column - CSV 21472 21514 59 0.7 1365.2 1.0X -Data column - Json 11537 11606 97 1.4 733.5 1.9X -Data column - Parquet Vectorized: DataPageV1 238 256 11 66.1 15.1 90.2X -Data column - Parquet Vectorized: DataPageV2 482 507 17 32.6 30.6 44.6X -Data column - Parquet MR: DataPageV1 2213 2355 200 7.1 140.7 9.7X -Data column - Parquet MR: DataPageV2 2036 2163 179 7.7 129.4 10.5X -Data column - ORC Vectorized 289 310 20 54.4 18.4 74.3X -Data column - ORC MR 1898 1936 54 8.3 120.7 11.3X -Partition column - CSV 6307 6364 80 2.5 401.0 3.4X -Partition column - Json 9167 9253 121 1.7 582.8 2.3X -Partition column - Parquet Vectorized: DataPageV1 62 66 3 253.5 3.9 346.1X -Partition column - Parquet Vectorized: DataPageV2 61 65 2 259.2 3.9 353.8X -Partition column - Parquet MR: DataPageV1 1086 1088 3 14.5 69.0 19.8X -Partition column - Parquet MR: DataPageV2 1091 1146 78 14.4 69.4 19.7X -Partition column - ORC Vectorized 63 67 2 251.1 4.0 342.9X -Partition column - ORC MR 1173 1175 3 13.4 74.6 18.3X -Both columns - CSV 21458 22038 820 0.7 1364.3 1.0X -Both columns - Json 12697 12712 22 1.2 807.2 1.7X -Both columns - Parquet Vectorized: DataPageV1 275 288 10 57.2 17.5 78.0X -Both columns - Parquet Vectorized: DataPageV2 505 525 24 31.2 32.1 42.5X -Both columns - Parquet MR: DataPageV1 2541 2547 9 6.2 161.5 8.5X -Both columns - Parquet MR: DataPageV2 2059 2060 2 7.6 130.9 10.4X -Both columns - ORC Vectorized 326 349 16 48.3 20.7 66.0X -Both columns - ORC MR 2116 2151 50 7.4 134.5 10.1X +Data column - CSV 22527 22546 26 0.7 1432.3 1.0X +Data column - Json 12533 12712 254 1.3 796.8 1.8X +Data column - Parquet Vectorized: DataPageV1 229 244 14 68.7 14.6 98.3X +Data column - Parquet Vectorized: DataPageV2 508 519 16 31.0 32.3 44.3X +Data column - Parquet MR: DataPageV1 2525 2535 13 6.2 160.6 8.9X +Data column - Parquet MR: DataPageV2 2194 2209 21 7.2 139.5 10.3X +Data column - ORC Vectorized 315 317 2 50.0 20.0 71.6X +Data column - ORC MR 2098 2100 3 7.5 133.4 10.7X +Partition column - CSV 6747 6753 9 2.3 429.0 3.3X +Partition column - Json 10080 10102 32 1.6 640.8 2.2X +Partition column - Parquet Vectorized: DataPageV1 60 63 2 262.8 3.8 376.4X +Partition column - Parquet Vectorized: DataPageV2 58 63 8 270.2 3.7 387.1X +Partition column - Parquet MR: DataPageV1 1152 1155 4 13.6 73.3 19.5X +Partition column - Parquet MR: DataPageV2 1149 1149 1 13.7 73.0 19.6X +Partition column - ORC Vectorized 61 64 3 259.8 3.8 372.1X +Partition column - ORC MR 1332 1332 0 11.8 84.7 16.9X +Both columns - CSV 23030 23042 17 0.7 1464.2 1.0X +Both columns - Json 13569 13581 16 1.2 862.7 1.7X +Both columns - Parquet Vectorized: DataPageV1 268 277 11 58.7 17.0 84.0X +Both columns - Parquet Vectorized: DataPageV2 551 557 7 28.6 35.0 40.9X +Both columns - Parquet MR: DataPageV1 2556 2557 0 6.2 162.5 8.8X +Both columns - Parquet MR: DataPageV2 2287 2292 7 6.9 145.4 9.9X +Both columns - ORC Vectorized 361 363 2 43.6 22.9 62.5X +Both columns - ORC MR 2158 2161 5 7.3 137.2 10.4X ================================================================================================ String with Nulls Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (0.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 10074 10372 422 1.0 960.7 1.0X -SQL Json 10037 10147 156 1.0 957.2 1.0X -SQL Parquet Vectorized: DataPageV1 1192 1226 47 8.8 113.7 8.4X -SQL Parquet Vectorized: DataPageV2 2349 2423 105 4.5 224.0 4.3X -SQL Parquet MR: DataPageV1 2995 3114 168 3.5 285.6 3.4X -SQL Parquet MR: DataPageV2 3847 3900 75 2.7 366.9 2.6X -ParquetReader Vectorized: DataPageV1 888 918 51 11.8 84.7 11.3X -ParquetReader Vectorized: DataPageV2 2128 2159 43 4.9 203.0 4.7X -SQL ORC Vectorized 837 908 61 12.5 79.8 12.0X -SQL ORC MR 2792 2882 127 3.8 266.3 3.6X - -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +SQL CSV 11418 11463 63 0.9 1088.9 1.0X +SQL Json 9698 9938 339 1.1 924.9 1.2X +SQL Parquet Vectorized: DataPageV1 1176 1207 45 8.9 112.1 9.7X +SQL Parquet Vectorized: DataPageV2 1652 1669 24 6.3 157.6 6.9X +SQL Parquet MR: DataPageV1 3041 3119 109 3.4 290.0 3.8X +SQL Parquet MR: DataPageV2 4030 4110 114 2.6 384.3 2.8X +ParquetReader Vectorized: DataPageV1 1008 1014 8 10.4 96.2 11.3X +ParquetReader Vectorized: DataPageV2 1247 1305 82 8.4 118.9 9.2X +SQL ORC Vectorized 820 856 56 12.8 78.2 13.9X +SQL ORC MR 2762 2807 64 3.8 263.4 4.1X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (50.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 7808 7810 3 1.3 744.6 1.0X -SQL Json 7434 7491 82 1.4 708.9 1.1X -SQL Parquet Vectorized: DataPageV1 1037 1044 10 10.1 98.9 7.5X -SQL Parquet Vectorized: DataPageV2 1528 1529 3 6.9 145.7 5.1X -SQL Parquet MR: DataPageV1 2300 2411 156 4.6 219.4 3.4X -SQL Parquet MR: DataPageV2 2637 2639 4 4.0 251.5 3.0X -ParquetReader Vectorized: DataPageV1 843 907 56 12.4 80.4 9.3X -ParquetReader Vectorized: DataPageV2 1424 1446 30 7.4 135.8 5.5X -SQL ORC Vectorized 1131 1132 1 9.3 107.8 6.9X -SQL ORC MR 2781 2856 106 3.8 265.3 2.8X - -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +SQL CSV 6752 6756 5 1.6 644.0 1.0X +SQL Json 7469 7549 112 1.4 712.3 0.9X +SQL Parquet Vectorized: DataPageV1 912 990 67 11.5 87.0 7.4X +SQL Parquet Vectorized: DataPageV2 1141 1215 104 9.2 108.8 5.9X +SQL Parquet MR: DataPageV1 2256 2418 229 4.6 215.1 3.0X +SQL Parquet MR: DataPageV2 2712 2882 241 3.9 258.6 2.5X +ParquetReader Vectorized: DataPageV1 956 960 6 11.0 91.2 7.1X +ParquetReader Vectorized: DataPageV2 1211 1211 1 8.7 115.5 5.6X +SQL ORC Vectorized 1135 1135 1 9.2 108.2 6.0X +SQL ORC MR 2716 2766 70 3.9 259.0 2.5X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz String with Nulls Scan (95.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 5357 5538 255 2.0 510.9 1.0X -SQL Json 4354 4387 47 2.4 415.2 1.2X -SQL Parquet Vectorized: DataPageV1 212 226 15 49.5 20.2 25.3X -SQL Parquet Vectorized: DataPageV2 265 276 16 39.6 25.2 20.2X -SQL Parquet MR: DataPageV1 1575 1578 4 6.7 150.2 3.4X -SQL Parquet MR: DataPageV2 1624 1638 21 6.5 154.8 3.3X -ParquetReader Vectorized: DataPageV1 219 234 14 47.8 20.9 24.4X -ParquetReader Vectorized: DataPageV2 274 294 17 38.2 26.2 19.5X -SQL ORC Vectorized 370 393 12 28.4 35.3 14.5X -SQL ORC MR 1540 1545 7 6.8 146.9 3.5X +SQL CSV 4496 4710 303 2.3 428.8 1.0X +SQL Json 4324 4343 28 2.4 412.3 1.0X +SQL Parquet Vectorized: DataPageV1 221 244 9 47.5 21.0 20.4X +SQL Parquet Vectorized: DataPageV2 270 288 13 38.8 25.8 16.6X +SQL Parquet MR: DataPageV1 1451 1461 15 7.2 138.3 3.1X +SQL Parquet MR: DataPageV2 1364 1368 5 7.7 130.0 3.3X +ParquetReader Vectorized: DataPageV1 256 258 2 40.9 24.5 17.5X +ParquetReader Vectorized: DataPageV2 273 291 17 38.4 26.0 16.5X +SQL ORC Vectorized 345 367 24 30.4 32.9 13.0X +SQL ORC MR 1508 1509 2 7.0 143.8 3.0X ================================================================================================ Single Column Scan From Wide Columns ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 10 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 2159 2212 74 0.5 2059.3 1.0X -SQL Json 2836 2896 84 0.4 2704.5 0.8X -SQL Parquet Vectorized: DataPageV1 54 59 9 19.5 51.4 40.1X -SQL Parquet Vectorized: DataPageV2 66 72 8 15.9 63.1 32.7X -SQL Parquet MR: DataPageV1 173 186 10 6.1 164.5 12.5X -SQL Parquet MR: DataPageV2 159 172 8 6.6 151.8 13.6X -SQL ORC Vectorized 54 60 10 19.2 52.0 39.6X -SQL ORC MR 150 161 7 7.0 143.3 14.4X - -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +SQL CSV 2036 2140 147 0.5 1941.4 1.0X +SQL Json 2796 2927 186 0.4 2666.5 0.7X +SQL Parquet Vectorized: DataPageV1 47 52 7 22.2 45.0 43.1X +SQL Parquet Vectorized: DataPageV2 64 69 7 16.4 61.2 31.7X +SQL Parquet MR: DataPageV1 176 190 11 5.9 168.1 11.5X +SQL Parquet MR: DataPageV2 157 171 6 6.7 149.3 13.0X +SQL ORC Vectorized 52 56 10 20.3 49.2 39.5X +SQL ORC MR 142 152 8 7.4 135.9 14.3X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 50 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 5877 5883 8 0.2 5605.0 1.0X -SQL Json 11474 11587 159 0.1 10942.9 0.5X -SQL Parquet Vectorized: DataPageV1 66 72 7 15.9 63.1 88.9X -SQL Parquet Vectorized: DataPageV2 83 90 8 12.6 79.4 70.6X -SQL Parquet MR: DataPageV1 191 201 9 5.5 182.6 30.7X -SQL Parquet MR: DataPageV2 179 187 9 5.9 170.3 32.9X -SQL ORC Vectorized 70 76 12 14.9 67.1 83.5X -SQL ORC MR 167 175 7 6.3 159.2 35.2X - -OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure +SQL CSV 5384 5560 249 0.2 5134.8 1.0X +SQL Json 10934 11224 410 0.1 10427.1 0.5X +SQL Parquet Vectorized: DataPageV1 62 67 7 16.8 59.5 86.3X +SQL Parquet Vectorized: DataPageV2 79 85 7 13.3 75.3 68.1X +SQL Parquet MR: DataPageV1 198 211 9 5.3 188.6 27.2X +SQL Parquet MR: DataPageV2 177 188 9 5.9 168.7 30.4X +SQL ORC Vectorized 67 73 10 15.6 64.0 80.2X +SQL ORC MR 160 172 8 6.6 152.3 33.7X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz Single Column Scan from 100 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 9695 9965 382 0.1 9245.8 1.0X -SQL Json 22119 23566 2045 0.0 21094.6 0.4X -SQL Parquet Vectorized: DataPageV1 96 104 7 10.9 91.6 100.9X -SQL Parquet Vectorized: DataPageV2 113 121 8 9.3 107.8 85.8X -SQL Parquet MR: DataPageV1 227 243 9 4.6 216.2 42.8X -SQL Parquet MR: DataPageV2 210 225 12 5.0 200.2 46.2X -SQL ORC Vectorized 90 96 10 11.7 85.7 107.9X -SQL ORC MR 188 199 9 5.6 178.9 51.7X +SQL CSV 9602 9882 396 0.1 9157.0 1.0X +SQL Json 21369 21987 874 0.0 20379.5 0.4X +SQL Parquet Vectorized: DataPageV1 90 97 7 11.7 85.4 107.2X +SQL Parquet Vectorized: DataPageV2 107 115 7 9.8 102.0 89.8X +SQL Parquet MR: DataPageV1 227 234 14 4.6 216.1 42.4X +SQL Parquet MR: DataPageV2 204 216 10 5.1 194.4 47.1X +SQL ORC Vectorized 81 89 8 12.9 77.6 118.1X +SQL ORC MR 181 195 12 5.8 172.3 53.2X diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java index 07e35c158c8cb..5669534cd111a 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java @@ -29,6 +29,8 @@ import java.util.Set; import com.google.common.annotations.VisibleForTesting; +import org.apache.parquet.VersionParser; +import org.apache.parquet.VersionParser.ParsedVersion; import org.apache.parquet.column.page.PageReadStore; import scala.Option; @@ -69,6 +71,9 @@ public abstract class SpecificParquetRecordReaderBase extends RecordReader fileMetadata = fileReader.getFileMetaData().getKeyValueMetaData(); ReadSupport readSupport = getReadSupportInstance(getReadSupportClass(configuration)); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java index 57a307b1b7b6b..ee09d2b2a3be9 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java @@ -21,6 +21,8 @@ import java.time.ZoneId; import java.util.PrimitiveIterator; +import org.apache.parquet.CorruptDeltaByteArrays; +import org.apache.parquet.VersionParser.ParsedVersion; import org.apache.parquet.bytes.ByteBufferInputStream; import org.apache.parquet.bytes.BytesInput; import org.apache.parquet.bytes.BytesUtils; @@ -28,6 +30,7 @@ import org.apache.parquet.column.Dictionary; import org.apache.parquet.column.Encoding; import org.apache.parquet.column.page.*; +import org.apache.parquet.column.values.RequiresPreviousReader; import org.apache.parquet.column.values.ValuesReader; import org.apache.parquet.schema.LogicalTypeAnnotation; import org.apache.parquet.schema.LogicalTypeAnnotation.DateLogicalTypeAnnotation; @@ -86,6 +89,7 @@ public class VectorizedColumnReader { private final ColumnDescriptor descriptor; private final LogicalTypeAnnotation logicalTypeAnnotation; private final String datetimeRebaseMode; + private final ParsedVersion writerVersion; public VectorizedColumnReader( ColumnDescriptor descriptor, @@ -96,7 +100,8 @@ public VectorizedColumnReader( String datetimeRebaseMode, String datetimeRebaseTz, String int96RebaseMode, - String int96RebaseTz) throws IOException { + String int96RebaseTz, + ParsedVersion writerVersion) throws IOException { this.descriptor = descriptor; this.pageReader = pageReader; this.readState = new ParquetReadState(descriptor.getMaxDefinitionLevel(), rowIndexes); @@ -129,6 +134,7 @@ public VectorizedColumnReader( this.datetimeRebaseMode = datetimeRebaseMode; assert "LEGACY".equals(int96RebaseMode) || "EXCEPTION".equals(int96RebaseMode) || "CORRECTED".equals(int96RebaseMode); + this.writerVersion = writerVersion; } private boolean isLazyDecodingSupported(PrimitiveType.PrimitiveTypeName typeName) { @@ -259,6 +265,7 @@ private void initDataReader( int pageValueCount, Encoding dataEncoding, ByteBufferInputStream in) throws IOException { + ValuesReader previousReader = this.dataColumn; if (dataEncoding.usesDictionary()) { this.dataColumn = null; if (dictionary == null) { @@ -283,6 +290,12 @@ private void initDataReader( } catch (IOException e) { throw new IOException("could not read page in col " + descriptor, e); } + // for PARQUET-246 (See VectorizedDeltaByteArrayReader.setPreviousValues) + if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) && + previousReader instanceof RequiresPreviousReader) { + // previousReader can only be set if reading sequentially + ((RequiresPreviousReader) dataColumn).setPreviousReader(previousReader); + } } private ValuesReader getValuesReader(Encoding encoding) { diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaBinaryPackedReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaBinaryPackedReader.java index 7b2aac3118e5f..9c6596aa1b5cc 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaBinaryPackedReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaBinaryPackedReader.java @@ -90,6 +90,7 @@ public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOExce Preconditions.checkArgument(miniSize % 8 == 0, "miniBlockSize must be multiple of 8, but it's " + miniSize); this.miniBlockSizeInValues = (int) miniSize; + // True value count. May be less than valueCount because of nulls this.totalValueCount = BytesUtils.readUnsignedVarInt(in); this.bitWidths = new int[miniBlockNumInABlock]; this.unpackedValuesBuffer = new long[miniBlockSizeInValues]; @@ -97,6 +98,11 @@ public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOExce firstValue = BytesUtils.readZigZagVarLong(in); } + // True value count. May be less than valueCount because of nulls + int getTotalValueCount() { + return totalValueCount; + } + @Override public byte readByte() { readValues(1, null, 0, (w, r, v) -> byteVal = (byte) v); diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaByteArrayReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaByteArrayReader.java index 72b760d426eac..b3fc54a8d152c 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaByteArrayReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaByteArrayReader.java @@ -16,50 +16,127 @@ */ package org.apache.spark.sql.execution.datasources.parquet; +import static org.apache.spark.sql.types.DataTypes.BinaryType; +import static org.apache.spark.sql.types.DataTypes.IntegerType; + import org.apache.parquet.bytes.ByteBufferInputStream; -import org.apache.parquet.column.values.deltastrings.DeltaByteArrayReader; +import org.apache.parquet.column.values.RequiresPreviousReader; +import org.apache.parquet.column.values.ValuesReader; import org.apache.parquet.io.api.Binary; +import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector; import org.apache.spark.sql.execution.vectorized.WritableColumnVector; import java.io.IOException; import java.nio.ByteBuffer; /** - * An implementation of the Parquet DELTA_BYTE_ARRAY decoder that supports the vectorized interface. + * An implementation of the Parquet DELTA_BYTE_ARRAY decoder that supports the vectorized + * interface. */ -public class VectorizedDeltaByteArrayReader extends VectorizedReaderBase { - private final DeltaByteArrayReader deltaByteArrayReader = new DeltaByteArrayReader(); +public class VectorizedDeltaByteArrayReader extends VectorizedReaderBase + implements VectorizedValuesReader, RequiresPreviousReader { + + private final VectorizedDeltaBinaryPackedReader prefixLengthReader; + private final VectorizedDeltaLengthByteArrayReader suffixReader; + private WritableColumnVector prefixLengthVector; + private ByteBuffer previous; + private int currentRow = 0; + + // Temporary variable used by readBinary + private final WritableColumnVector binaryValVector; + // Temporary variable used by skipBinary + private final WritableColumnVector tempBinaryValVector; + + VectorizedDeltaByteArrayReader() { + this.prefixLengthReader = new VectorizedDeltaBinaryPackedReader(); + this.suffixReader = new VectorizedDeltaLengthByteArrayReader(); + binaryValVector = new OnHeapColumnVector(1, BinaryType); + tempBinaryValVector = new OnHeapColumnVector(1, BinaryType); + } @Override public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { - deltaByteArrayReader.initFromPage(valueCount, in); + prefixLengthVector = new OnHeapColumnVector(valueCount, IntegerType); + prefixLengthReader.initFromPage(valueCount, in); + prefixLengthReader.readIntegers(prefixLengthReader.getTotalValueCount(), + prefixLengthVector, 0); + suffixReader.initFromPage(valueCount, in); } @Override public Binary readBinary(int len) { - return deltaByteArrayReader.readBytes(); + readValues(1, binaryValVector, 0); + return Binary.fromConstantByteArray(binaryValVector.getBinary(0)); } - @Override - public void readBinary(int total, WritableColumnVector c, int rowId) { + private void readValues(int total, WritableColumnVector c, int rowId) { for (int i = 0; i < total; i++) { - Binary binary = deltaByteArrayReader.readBytes(); - ByteBuffer buffer = binary.toByteBuffer(); - if (buffer.hasArray()) { - c.putByteArray(rowId + i, buffer.array(), buffer.arrayOffset() + buffer.position(), - binary.length()); - } else { - byte[] bytes = new byte[binary.length()]; - buffer.get(bytes); - c.putByteArray(rowId + i, bytes); + // NOTE: due to PARQUET-246, it is important that we + // respect prefixLength which was read from prefixLengthReader, + // even for the *first* value of a page. Even though the first + // value of the page should have an empty prefix, it may not + // because of PARQUET-246. + int prefixLength = prefixLengthVector.getInt(currentRow); + ByteBuffer suffix = suffixReader.getBytes(currentRow); + byte[] suffixArray = suffix.array(); + int suffixLength = suffix.limit() - suffix.position(); + int length = prefixLength + suffixLength; + + // We have to do this to materialize the output + WritableColumnVector arrayData = c.arrayData(); + int offset = arrayData.getElementsAppended(); + if (prefixLength != 0) { + arrayData.appendBytes(prefixLength, previous.array(), previous.position()); } + arrayData.appendBytes(suffixLength, suffixArray, suffix.position()); + c.putArray(rowId + i, offset, length); + previous = arrayData.getByteBuffer(offset, length); + currentRow++; + } + } + + @Override + public void readBinary(int total, WritableColumnVector c, int rowId) { + readValues(total, c, rowId); + } + + /** + * There was a bug (PARQUET-246) in which DeltaByteArrayWriter's reset() method did not clear the + * previous value state that it tracks internally. This resulted in the first value of all pages + * (except for the first page) to be a delta from the last value of the previous page. In order to + * read corrupted files written with this bug, when reading a new page we need to recover the + * previous page's last value to use it (if needed) to read the first value. + */ + public void setPreviousReader(ValuesReader reader) { + if (reader != null) { + this.previous = ((VectorizedDeltaByteArrayReader) reader).previous; } } @Override public void skipBinary(int total) { + WritableColumnVector c1 = tempBinaryValVector; + WritableColumnVector c2 = binaryValVector; + for (int i = 0; i < total; i++) { - deltaByteArrayReader.skip(); + int prefixLength = prefixLengthVector.getInt(currentRow); + ByteBuffer suffix = suffixReader.getBytes(currentRow); + byte[] suffixArray = suffix.array(); + int suffixLength = suffix.limit() - suffix.position(); + int length = prefixLength + suffixLength; + + WritableColumnVector arrayData = c1.arrayData(); + c1.reset(); + if (prefixLength != 0) { + arrayData.appendBytes(prefixLength, previous.array(), previous.position()); + } + arrayData.appendBytes(suffixLength, suffixArray, suffix.position()); + previous = arrayData.getByteBuffer(0, length); + currentRow++; + + WritableColumnVector tmp = c1; + c1 = c2; + c2 = tmp; } } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaLengthByteArrayReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaLengthByteArrayReader.java new file mode 100644 index 0000000000000..ac5b8527f5e13 --- /dev/null +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaLengthByteArrayReader.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution.datasources.parquet; + +import static org.apache.spark.sql.types.DataTypes.IntegerType; + +import java.io.EOFException; +import java.io.IOException; +import java.nio.ByteBuffer; +import org.apache.parquet.bytes.ByteBufferInputStream; +import org.apache.parquet.io.ParquetDecodingException; +import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector; +import org.apache.spark.sql.execution.vectorized.WritableColumnVector; + +/** + * An implementation of the Parquet DELTA_LENGTH_BYTE_ARRAY decoder that supports the vectorized + * interface. + */ +public class VectorizedDeltaLengthByteArrayReader extends VectorizedReaderBase implements + VectorizedValuesReader { + + private final VectorizedDeltaBinaryPackedReader lengthReader; + private ByteBufferInputStream in; + private WritableColumnVector lengthsVector; + private int currentRow = 0; + + VectorizedDeltaLengthByteArrayReader() { + lengthReader = new VectorizedDeltaBinaryPackedReader(); + } + + @Override + public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { + lengthsVector = new OnHeapColumnVector(valueCount, IntegerType); + lengthReader.initFromPage(valueCount, in); + lengthReader.readIntegers(lengthReader.getTotalValueCount(), lengthsVector, 0); + this.in = in.remainingStream(); + } + + @Override + public void readBinary(int total, WritableColumnVector c, int rowId) { + ByteBuffer buffer; + ByteBufferOutputWriter outputWriter = ByteBufferOutputWriter::writeArrayByteBuffer; + int length; + for (int i = 0; i < total; i++) { + length = lengthsVector.getInt(rowId + i); + try { + buffer = in.slice(length); + } catch (EOFException e) { + throw new ParquetDecodingException("Failed to read " + length + " bytes"); + } + outputWriter.write(c, rowId + i, buffer, length); + } + currentRow += total; + } + + public ByteBuffer getBytes(int rowId) { + int length = lengthsVector.getInt(rowId); + try { + return in.slice(length); + } catch (EOFException e) { + throw new ParquetDecodingException("Failed to read " + length + " bytes"); + } + } + + @Override + public void skipBinary(int total) { + for (int i = 0; i < total; i++) { + int remaining = lengthsVector.getInt(currentRow + i); + while (remaining > 0) { + remaining -= in.skip(remaining); + } + } + currentRow += total; + } +} diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java index 0e976be2f652e..da23b5fcec28f 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java @@ -367,7 +367,8 @@ private void checkEndOfRowGroup() throws IOException { datetimeRebaseMode, datetimeRebaseTz, int96RebaseMode, - int96RebaseTz); + int96RebaseTz, + writerVersion); } totalCountLoadedSoFar += pages.getRowCount(); } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedValuesReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedValuesReader.java index 7ddece068e099..4308614338499 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedValuesReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedValuesReader.java @@ -17,6 +17,8 @@ package org.apache.spark.sql.execution.datasources.parquet; +import java.nio.ByteBuffer; + import org.apache.spark.sql.execution.vectorized.WritableColumnVector; import org.apache.parquet.io.api.Binary; @@ -86,4 +88,18 @@ interface IntegerOutputWriter { void write(WritableColumnVector outputColumnVector, int rowId, long val); } + @FunctionalInterface + interface ByteBufferOutputWriter { + void write(WritableColumnVector c, int rowId, ByteBuffer val, int length); + + static void writeArrayByteBuffer(WritableColumnVector c, int rowId, ByteBuffer val, + int length) { + c.putByteArray(rowId, + val.array(), + val.arrayOffset() + val.position(), + length); + } + + static void skipWrite(WritableColumnVector c, int rowId, ByteBuffer val, int length) { } + } } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java index bbe96819a618b..42552c7afc624 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java @@ -221,6 +221,11 @@ protected UTF8String getBytesAsUTF8String(int rowId, int count) { return UTF8String.fromAddress(null, data + rowId, count); } + @Override + public ByteBuffer getByteBuffer(int rowId, int count) { + return ByteBuffer.wrap(getBytes(rowId, count)); + } + // // APIs dealing with shorts // diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java index 833a93f2a2bdb..d246a3c24e4a6 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java @@ -219,6 +219,12 @@ protected UTF8String getBytesAsUTF8String(int rowId, int count) { return UTF8String.fromBytes(byteData, rowId, count); } + @Override + public ByteBuffer getByteBuffer(int rowId, int count) { + return ByteBuffer.wrap(byteData, rowId, count); + } + + // // APIs dealing with Shorts // diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java index 5e01c372793f1..ae457a16123d2 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java @@ -18,6 +18,7 @@ import java.math.BigDecimal; import java.math.BigInteger; +import java.nio.ByteBuffer; import com.google.common.annotations.VisibleForTesting; @@ -443,6 +444,12 @@ public byte[] getBinary(int rowId) { } } + /** + * Gets the values of bytes from [rowId, rowId + count), as a ByteBuffer. + * This method is similar to {@link ColumnVector#getBytes(int, int)}, but avoids making a copy. + */ + public abstract ByteBuffer getByteBuffer(int rowId, int count); + /** * Append APIs. These APIs all behave similarly and will append data to the current vector. It * is not valid to mix the put and append APIs. The append APIs are slower and should only be diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaByteArrayEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaByteArrayEncodingSuite.scala new file mode 100644 index 0000000000000..c54eef348f342 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaByteArrayEncodingSuite.scala @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution.datasources.parquet + +import org.apache.parquet.bytes.DirectByteBufferAllocator +import org.apache.parquet.column.values.Utils +import org.apache.parquet.column.values.deltastrings.DeltaByteArrayWriter + +import org.apache.spark.sql.execution.vectorized.{OnHeapColumnVector, WritableColumnVector} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{IntegerType, StringType} + +/** + * Read tests for vectorized Delta byte array reader. + * Translated from * org.apache.parquet.column.values.delta.TestDeltaByteArray + */ +class ParquetDeltaByteArrayEncodingSuite extends ParquetCompatibilityTest with SharedSparkSession { + val values: Array[String] = Array("parquet-mr", "parquet", "parquet-format"); + val randvalues: Array[String] = Utils.getRandomStringSamples(10000, 32) + + var writer: DeltaByteArrayWriter = _ + var reader: VectorizedDeltaByteArrayReader = _ + private var writableColumnVector: WritableColumnVector = _ + + protected override def beforeEach(): Unit = { + writer = new DeltaByteArrayWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator) + reader = new VectorizedDeltaByteArrayReader() + super.beforeAll() + } + + test("test Serialization") { + assertReadWrite(writer, reader, values) + } + + test("random strings") { + assertReadWrite(writer, reader, randvalues) + } + + test("random strings with skip") { + assertReadWriteWithSkip(writer, reader, randvalues) + } + + test("random strings with skipN") { + assertReadWriteWithSkipN(writer, reader, randvalues) + } + + test("test lengths") { + var reader = new VectorizedDeltaBinaryPackedReader + Utils.writeData(writer, values) + val data = writer.getBytes.toInputStream + val length = values.length + writableColumnVector = new OnHeapColumnVector(length, IntegerType) + reader.initFromPage(length, data) + reader.readIntegers(length, writableColumnVector, 0) + // test prefix lengths + assert(0 == writableColumnVector.getInt(0)) + assert(7 == writableColumnVector.getInt(1)) + assert(7 == writableColumnVector.getInt(2)) + + reader = new VectorizedDeltaBinaryPackedReader + writableColumnVector = new OnHeapColumnVector(length, IntegerType) + reader.initFromPage(length, data) + reader.readIntegers(length, writableColumnVector, 0) + // test suffix lengths + assert(10 == writableColumnVector.getInt(0)) + assert(0 == writableColumnVector.getInt(1)) + assert(7 == writableColumnVector.getInt(2)) + } + + private def assertReadWrite( + writer: DeltaByteArrayWriter, + reader: VectorizedDeltaByteArrayReader, + vals: Array[String]): Unit = { + Utils.writeData(writer, vals) + val length = vals.length + val is = writer.getBytes.toInputStream + + writableColumnVector = new OnHeapColumnVector(length, StringType) + + reader.initFromPage(length, is) + reader.readBinary(length, writableColumnVector, 0) + + for (i <- 0 until length) { + assert(vals(i).getBytes() sameElements writableColumnVector.getBinary(i)) + } + } + + private def assertReadWriteWithSkip( + writer: DeltaByteArrayWriter, + reader: VectorizedDeltaByteArrayReader, + vals: Array[String]): Unit = { + Utils.writeData(writer, vals) + val length = vals.length + val is = writer.getBytes.toInputStream + writableColumnVector = new OnHeapColumnVector(length, StringType) + reader.initFromPage(length, is) + var i = 0 + while ( { + i < vals.length + }) { + reader.readBinary(1, writableColumnVector, i) + assert(vals(i).getBytes() sameElements writableColumnVector.getBinary(i)) + reader.skipBinary(1) + i += 2 + } + } + + private def assertReadWriteWithSkipN( + writer: DeltaByteArrayWriter, + reader: VectorizedDeltaByteArrayReader, + vals: Array[String]): Unit = { + Utils.writeData(writer, vals) + val length = vals.length + val is = writer.getBytes.toInputStream + writableColumnVector = new OnHeapColumnVector(length, StringType) + reader.initFromPage(length, is) + var skipCount = 0 + var i = 0 + while ( { + i < vals.length + }) { + skipCount = (vals.length - i) / 2 + reader.readBinary(1, writableColumnVector, i) + assert(vals(i).getBytes() sameElements writableColumnVector.getBinary(i)) + reader.skipBinary(skipCount) + i += skipCount + 1 + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaLengthByteArrayEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaLengthByteArrayEncodingSuite.scala new file mode 100644 index 0000000000000..17dc70df42a6d --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaLengthByteArrayEncodingSuite.scala @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution.datasources.parquet + +import java.util.Random + +import org.apache.commons.lang3.RandomStringUtils +import org.apache.parquet.bytes.{ByteBufferInputStream, DirectByteBufferAllocator} +import org.apache.parquet.column.values.Utils +import org.apache.parquet.column.values.deltalengthbytearray.DeltaLengthByteArrayValuesWriter +import org.apache.parquet.io.api.Binary + +import org.apache.spark.sql.execution.vectorized.{OnHeapColumnVector, WritableColumnVector} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{IntegerType, StringType} + +/** + * Read tests for vectorized Delta length byte array reader. + * Translated from + * org.apache.parquet.column.values.delta.TestDeltaLengthByteArray + */ +class ParquetDeltaLengthByteArrayEncodingSuite + extends ParquetCompatibilityTest + with SharedSparkSession { + val values: Array[String] = Array("parquet", "hadoop", "mapreduce") + var writer: DeltaLengthByteArrayValuesWriter = _ + var reader: VectorizedDeltaLengthByteArrayReader = _ + private var writableColumnVector: WritableColumnVector = _ + + protected override def beforeEach(): Unit = { + writer = + new DeltaLengthByteArrayValuesWriter(64 * 1024, 64 * 1024, new DirectByteBufferAllocator) + reader = new VectorizedDeltaLengthByteArrayReader() + super.beforeAll() + } + + test("test serialization") { + writeData(writer, values) + readAndValidate(reader, writer.getBytes.toInputStream, values.length, values) + } + + test("random strings") { + val values = Utils.getRandomStringSamples(1000, 32) + writeData(writer, values) + readAndValidate(reader, writer.getBytes.toInputStream, values.length, values) + } + + test("random strings with empty strings") { + val values = getRandomStringSamplesWithEmptyStrings(1000, 32) + writeData(writer, values) + readAndValidate(reader, writer.getBytes.toInputStream, values.length, values) + } + + test("skip with random strings") { + val values = Utils.getRandomStringSamples(1000, 32) + writeData(writer, values) + reader.initFromPage(values.length, writer.getBytes.toInputStream) + writableColumnVector = new OnHeapColumnVector(values.length, StringType) + var i = 0 + while (i < values.length) { + reader.readBinary(1, writableColumnVector, i) + assert(values(i).getBytes() sameElements writableColumnVector.getBinary(i)) + reader.skipBinary(1) + i += 2 + } + reader = new VectorizedDeltaLengthByteArrayReader() + reader.initFromPage(values.length, writer.getBytes.toInputStream) + writableColumnVector = new OnHeapColumnVector(values.length, StringType) + var skipCount = 0 + i = 0 + while (i < values.length) { + skipCount = (values.length - i) / 2 + reader.readBinary(1, writableColumnVector, i) + assert(values(i).getBytes() sameElements writableColumnVector.getBinary(i)) + reader.skipBinary(skipCount) + i += skipCount + 1 + } + } + + // Read the lengths from the beginning of the buffer and compare with the lengths of the values + test("test lengths") { + val reader = new VectorizedDeltaBinaryPackedReader + writeData(writer, values) + val length = values.length + writableColumnVector = new OnHeapColumnVector(length, IntegerType) + reader.initFromPage(length, writer.getBytes.toInputStream) + reader.readIntegers(length, writableColumnVector, 0) + for (i <- 0 until length) { + assert(values(i).length == writableColumnVector.getInt(i)) + } + } + + private def writeData(writer: DeltaLengthByteArrayValuesWriter, values: Array[String]): Unit = { + for (i <- values.indices) { + writer.writeBytes(Binary.fromString(values(i))) + } + } + + private def readAndValidate( + reader: VectorizedDeltaLengthByteArrayReader, + is: ByteBufferInputStream, + length: Int, + expectedValues: Array[String]): Unit = { + + writableColumnVector = new OnHeapColumnVector(length, StringType) + + reader.initFromPage(length, is) + reader.readBinary(length, writableColumnVector, 0) + + for (i <- 0 until length) { + assert(expectedValues(i).getBytes() sameElements writableColumnVector.getBinary(i)) + } + } + + def getRandomStringSamplesWithEmptyStrings(numSamples: Int, maxLength: Int): Array[String] = { + val randomLen = new Random + val randomEmpty = new Random + val samples: Array[String] = new Array[String](numSamples) + for (i <- 0 until numSamples) { + var maxLen: Int = randomLen.nextInt(maxLength) + if(randomEmpty.nextInt() % 11 != 0) { + maxLen = 0; + } + samples(i) = RandomStringUtils.randomAlphanumeric(0, maxLen) + } + samples + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala index f7100a53444aa..07e2849ce6f19 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala @@ -27,6 +27,7 @@ import org.apache.parquet.column.{Encoding, ParquetProperties} import org.apache.parquet.hadoop.ParquetOutputFormat import org.apache.spark.TestUtils +import org.apache.spark.memory.MemoryMode import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.internal.SQLConf @@ -47,6 +48,13 @@ class ParquetEncodingSuite extends ParquetCompatibilityTest with SharedSparkSess null.asInstanceOf[Duration], null.asInstanceOf[java.lang.Boolean]) + private def withMemoryModes(f: String => Unit): Unit = { + Seq(MemoryMode.OFF_HEAP, MemoryMode.ON_HEAP).foreach(mode => { + val offHeap = if (mode == MemoryMode.OFF_HEAP) "true" else "false" + f(offHeap) + }) + } + test("All Types Dictionary") { (1 :: 1000 :: Nil).foreach { n => { withTempPath { dir => @@ -141,45 +149,54 @@ class ParquetEncodingSuite extends ParquetCompatibilityTest with SharedSparkSess ) val hadoopConf = spark.sessionState.newHadoopConfWithOptions(extraOptions) - withSQLConf( - SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true", - ParquetOutputFormat.JOB_SUMMARY_LEVEL -> "ALL") { - withTempPath { dir => - val path = s"${dir.getCanonicalPath}/test.parquet" - - val data = (1 to 3).map { i => - ( i, i.toLong, i.toShort, Array[Byte](i.toByte), s"test_${i}", - DateTimeUtils.fromJavaDate(Date.valueOf(s"2021-11-0" + i)), - DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf(s"2020-11-01 12:00:0" + i)), - Period.of(1, i, 0), Duration.ofMillis(i * 100), - new BigDecimal(java.lang.Long.toUnsignedString(i*100000)) - ) + withMemoryModes { offHeapMode => + withSQLConf( + SQLConf.COLUMN_VECTOR_OFFHEAP_ENABLED.key -> offHeapMode, + SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true", + ParquetOutputFormat.JOB_SUMMARY_LEVEL -> "ALL") { + withTempPath { dir => + val path = s"${dir.getCanonicalPath}/test.parquet" + // Have more than 2 * 4096 records (so we have multiple tasks and each task + // reads at least twice from the reader). This will catch any issues with state + // maintained by the reader(s) + // Add at least one string with a null + val data = (1 to 8193).map { i => + (i, + i.toLong, i.toShort, Array[Byte](i.toByte), + if (i % 2 == 1) s"test_$i" else null, + DateTimeUtils.fromJavaDate(Date.valueOf(s"2021-11-0" + ((i % 9) + 1))), + DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf(s"2020-11-01 12:00:0" + (i % 10))), + Period.of(1, (i % 11) + 1, 0), + Duration.ofMillis(((i % 9) + 1) * 100), + new BigDecimal(java.lang.Long.toUnsignedString(i * 100000)) + ) + } + + spark.createDataFrame(data) + .write.options(extraOptions).mode("overwrite").parquet(path) + + val blockMetadata = readFooter(new Path(path), hadoopConf).getBlocks.asScala.head + val columnChunkMetadataList = blockMetadata.getColumns.asScala + + // Verify that indeed delta encoding is used for each column + assert(columnChunkMetadataList.length === 10) + assert(columnChunkMetadataList(0).getEncodings.contains(Encoding.DELTA_BINARY_PACKED)) + assert(columnChunkMetadataList(1).getEncodings.contains(Encoding.DELTA_BINARY_PACKED)) + assert(columnChunkMetadataList(2).getEncodings.contains(Encoding.DELTA_BINARY_PACKED)) + // Both fixed-length byte array and variable-length byte array (also called BINARY) + // are use DELTA_BYTE_ARRAY for encoding + assert(columnChunkMetadataList(3).getEncodings.contains(Encoding.DELTA_BYTE_ARRAY)) + assert(columnChunkMetadataList(4).getEncodings.contains(Encoding.DELTA_BYTE_ARRAY)) + + assert(columnChunkMetadataList(5).getEncodings.contains(Encoding.DELTA_BINARY_PACKED)) + assert(columnChunkMetadataList(6).getEncodings.contains(Encoding.DELTA_BINARY_PACKED)) + assert(columnChunkMetadataList(7).getEncodings.contains(Encoding.DELTA_BINARY_PACKED)) + assert(columnChunkMetadataList(8).getEncodings.contains(Encoding.DELTA_BINARY_PACKED)) + assert(columnChunkMetadataList(9).getEncodings.contains(Encoding.DELTA_BYTE_ARRAY)) + + val actual = spark.read.parquet(path).collect() + assert(actual.sortBy(_.getInt(0)) === data.map(Row.fromTuple)); } - - spark.createDataFrame(data) - .write.options(extraOptions).mode("overwrite").parquet(path) - - val blockMetadata = readFooter(new Path(path), hadoopConf).getBlocks.asScala.head - val columnChunkMetadataList = blockMetadata.getColumns.asScala - - // Verify that indeed delta encoding is used for each column - assert(columnChunkMetadataList.length === 10) - assert(columnChunkMetadataList(0).getEncodings.contains(Encoding.DELTA_BINARY_PACKED)) - assert(columnChunkMetadataList(1).getEncodings.contains(Encoding.DELTA_BINARY_PACKED)) - assert(columnChunkMetadataList(2).getEncodings.contains(Encoding.DELTA_BINARY_PACKED)) - // Both fixed-length byte array and variable-length byte array (also called BINARY) - // are use DELTA_BYTE_ARRAY for encoding - assert(columnChunkMetadataList(3).getEncodings.contains(Encoding.DELTA_BYTE_ARRAY)) - assert(columnChunkMetadataList(4).getEncodings.contains(Encoding.DELTA_BYTE_ARRAY)) - - assert(columnChunkMetadataList(5).getEncodings.contains(Encoding.DELTA_BINARY_PACKED)) - assert(columnChunkMetadataList(6).getEncodings.contains(Encoding.DELTA_BINARY_PACKED)) - assert(columnChunkMetadataList(7).getEncodings.contains(Encoding.DELTA_BINARY_PACKED)) - assert(columnChunkMetadataList(8).getEncodings.contains(Encoding.DELTA_BINARY_PACKED)) - assert(columnChunkMetadataList(9).getEncodings.contains(Encoding.DELTA_BYTE_ARRAY)) - - val actual = spark.read.parquet(path).collect() - assert(actual.sortBy(_.getInt(0)) === data.map(Row.fromTuple)); } } } From a210e3929bb96086894a9a5a72f0fe5946b1659d Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 1 Apr 2022 11:40:32 +0800 Subject: [PATCH 084/535] [SPARK-38716][SQL] Provide query context in map key not exists error ### What changes were proposed in this pull request? Provide query context in `map key does not exist` runtime error with ANSI SQL mode on, including - operator `[]` - function `element_at()` ### Why are the changes needed? Provide SQL query context of runtime errors to users, so that they can understand it better. ### Does this PR introduce _any_ user-facing change? Yes, improve the runtime error message of "map key does not exist" ### How was this patch tested? UT Closes #36025 from gengliangwang/mapKeyContext. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit f0fc991f1d2e7b04b0e3967481f7e75e4322ae15) Signed-off-by: Gengliang Wang --- core/src/main/resources/error/error-classes.json | 4 ++-- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 5 ++++- .../sql/catalyst/expressions/complexTypeExtractors.scala | 6 ++++-- .../apache/spark/sql/errors/QueryExecutionErrors.scala | 9 ++++++--- .../test/resources/sql-tests/results/ansi/map.sql.out | 9 +++++++++ 5 files changed, 25 insertions(+), 8 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index d7d77022399ba..c8c18413a9d0c 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -121,10 +121,10 @@ "sqlState" : "42000" }, "MAP_KEY_DOES_NOT_EXIST" : { - "message" : [ "Key %s does not exist. If necessary set %s to false to bypass this error." ] + "message" : [ "Key %s does not exist. If necessary set %s to false to bypass this error.%s" ] }, "MAP_KEY_DOES_NOT_EXIST_IN_ELEMENT_AT" : { - "message" : [ "Key %s does not exist. To return NULL instead, use 'try_element_at'. If necessary set %s to false to bypass this error." ] + "message" : [ "Key %s does not exist. To return NULL instead, use 'try_element_at'. If necessary set %s to false to bypass this error.%s" ] }, "MISSING_COLUMN" : { "message" : [ "Column '%s' does not exist. Did you mean one of the following? [%s]" ], diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 6d950673fa633..6b44483ab1d2d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -41,6 +41,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2 import org.apache.spark.sql.catalyst.trees.{AlwaysProcess, CurrentOrigin} +import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin import org.apache.spark.sql.catalyst.trees.TreePattern._ import org.apache.spark.sql.catalyst.util.{toPrettySQL, CharVarcharUtils} import org.apache.spark.sql.connector.catalog._ @@ -1749,7 +1750,9 @@ class Analyzer(override val catalogManager: CatalogManager) case u @ UnresolvedExtractValue(child, fieldName) => val newChild = innerResolve(child, isTopLevel = false) if (newChild.resolved) { - ExtractValue(newChild, fieldName, resolver) + withOrigin(u.origin) { + ExtractValue(newChild, fieldName, resolver) + } } else { u.copy(child = newChild) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index cb7e06b9934a4..3cd404a9c0d5f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -367,7 +367,7 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes { if (!found) { if (failOnError) { - throw QueryExecutionErrors.mapKeyNotExistError(ordinal, isElementAtFunction) + throw QueryExecutionErrors.mapKeyNotExistError(ordinal, isElementAtFunction, origin.context) } else { null } @@ -400,9 +400,11 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes { } val keyJavaType = CodeGenerator.javaType(keyType) + lazy val errorContext = ctx.addReferenceObj("errCtx", origin.context) nullSafeCodeGen(ctx, ev, (eval1, eval2) => { val keyNotFoundBranch = if (failOnError) { - s"throw QueryExecutionErrors.mapKeyNotExistError($eval2, $isElementAtFunction);" + s"throw QueryExecutionErrors.mapKeyNotExistError(" + + s"$eval2, $isElementAtFunction, $errorContext);" } else { s"${ev.isNull} = true;" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 304801e39f6af..2a9b3c0005cbc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -165,13 +165,16 @@ object QueryExecutionErrors { messageParameters = Array(index.toString, numElements.toString, SQLConf.ANSI_ENABLED.key)) } - def mapKeyNotExistError(key: Any, isElementAtFunction: Boolean): NoSuchElementException = { + def mapKeyNotExistError( + key: Any, + isElementAtFunction: Boolean, + context: String): NoSuchElementException = { if (isElementAtFunction) { new SparkNoSuchElementException(errorClass = "MAP_KEY_DOES_NOT_EXIST_IN_ELEMENT_AT", - messageParameters = Array(key.toString, SQLConf.ANSI_ENABLED.key)) + messageParameters = Array(key.toString, SQLConf.ANSI_ENABLED.key, context)) } else { new SparkNoSuchElementException(errorClass = "MAP_KEY_DOES_NOT_EXIST", - messageParameters = Array(key.toString, SQLConf.ANSI_STRICT_INDEX_OPERATOR.key)) + messageParameters = Array(key.toString, SQLConf.ANSI_STRICT_INDEX_OPERATOR.key, context)) } } diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out index 5f7bd9faa79e9..5ba37278fbcb5 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out @@ -9,6 +9,9 @@ struct<> -- !query output org.apache.spark.SparkNoSuchElementException Key 5 does not exist. To return NULL instead, use 'try_element_at'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select element_at(map(1, 'a', 2, 'b'), 5) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -18,6 +21,9 @@ struct<> -- !query output org.apache.spark.SparkNoSuchElementException Key 5 does not exist. If necessary set spark.sql.ansi.strictIndexOperator to false to bypass this error. +== SQL(line 1, position 7) == +select map(1, 'a', 2, 'b')[5] + ^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -109,3 +115,6 @@ struct<> -- !query output org.apache.spark.SparkNoSuchElementException Key 5 does not exist. To return NULL instead, use 'try_element_at'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select element_at(map(1, 'a', 2, 'b'), 5) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From b4f996aa49adafda81a14ec272a896019206221e Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Fri, 1 Apr 2022 13:34:41 +0800 Subject: [PATCH 085/535] [SPARK-37960][SQL][FOLLOWUP] Make the testing CASE WHEN query more reasonable ### What changes were proposed in this pull request? Some testing CASE WHEN queries are not carefully written and do not make sense. In the future, the optimizer may get smarter and get rid of the CASE WHEN completely, and then we loose test coverage. This PR updates some CASE WHEN queries to make them more reasonable. ### Why are the changes needed? future-proof test coverage. ### Does this PR introduce _any_ user-facing change? 'No'. ### How was this patch tested? N/A Closes #36032 from beliefer/SPARK-37960_followup2. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit eb353aa959ca2298b28ff0b9671edcfa3ac62822) Signed-off-by: Wenchen Fan --- .../org/apache/spark/sql/jdbc/JDBCV2Suite.scala | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index 67a02904660c3..6a0a55b77881e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -888,13 +888,12 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel | COUNT(CASE WHEN SALARY > 11000 OR SALARY < 10000 THEN SALARY ELSE 0 END), | COUNT(CASE WHEN SALARY >= 12000 OR SALARY < 9000 THEN SALARY ELSE 0 END), | COUNT(CASE WHEN SALARY >= 12000 OR NOT(SALARY >= 9000) THEN SALARY ELSE 0 END), - | MAX(CASE WHEN NOT(SALARY > 8000) AND SALARY >= 8000 THEN SALARY ELSE 0 END), - | MAX(CASE WHEN NOT(SALARY > 8000) OR SALARY > 8000 THEN SALARY ELSE 0 END), - | MAX(CASE WHEN NOT(SALARY > 8000) AND NOT(SALARY < 8000) THEN SALARY ELSE 0 END), + | MAX(CASE WHEN NOT(SALARY > 10000) AND SALARY >= 8000 THEN SALARY ELSE 0 END), + | MAX(CASE WHEN NOT(SALARY > 10000) OR SALARY > 8000 THEN SALARY ELSE 0 END), + | MAX(CASE WHEN NOT(SALARY > 10000) AND NOT(SALARY < 8000) THEN SALARY ELSE 0 END), | MAX(CASE WHEN NOT(SALARY != 0) OR NOT(SALARY < 8000) THEN SALARY ELSE 0 END), | MAX(CASE WHEN NOT(SALARY > 8000 AND SALARY > 8000) THEN 0 ELSE SALARY END), | MIN(CASE WHEN NOT(SALARY > 8000 OR SALARY IS NULL) THEN SALARY ELSE 0 END), - | SUM(CASE WHEN NOT(SALARY > 8000 AND SALARY IS NOT NULL) THEN SALARY ELSE 0 END), | SUM(CASE WHEN SALARY > 10000 THEN 2 WHEN SALARY > 8000 THEN 1 END), | AVG(CASE WHEN NOT(SALARY > 8000 OR SALARY IS NOT NULL) THEN SALARY ELSE 0 END) |FROM h2.test.employee GROUP BY DEPT @@ -905,9 +904,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel " THEN SALARY ELSE 0.00 END), COUNT(CAS..., " + "PushedFilters: [], " + "PushedGroupByColumns: [DEPT], ") - checkAnswer(df, Seq(Row(1, 1, 1, 1, 1, 0d, 12000d, 0d, 12000d, 12000d, 0d, 0d, 2, 0d), - Row(2, 2, 2, 2, 2, 0d, 10000d, 0d, 10000d, 10000d, 0d, 0d, 2, 0d), - Row(2, 2, 2, 2, 2, 0d, 12000d, 0d, 12000d, 12000d, 0d, 0d, 3, 0d))) + checkAnswer(df, Seq(Row(1, 1, 1, 1, 1, 0d, 12000d, 0d, 12000d, 12000d, 0d, 2, 0d), + Row(2, 2, 2, 2, 2, 10000d, 10000d, 10000d, 10000d, 10000d, 0d, 2, 0d), + Row(2, 2, 2, 2, 2, 10000d, 12000d, 10000d, 12000d, 12000d, 0d, 3, 0d))) } test("scan with aggregate push-down: aggregate function with binary arithmetic") { From 8a072ef6badad69ef5cfdd656d0c068979f6ea76 Mon Sep 17 00:00:00 2001 From: Jungtaek Lim Date: Fri, 1 Apr 2022 18:21:48 +0900 Subject: [PATCH 086/535] [SPARK-38684][SS] Fix correctness issue on stream-stream outer join with RocksDB state store provider ### What changes were proposed in this pull request? (Credit to alex-balikov for the inspiration of the root cause observation, and anishshri-db for looking into the issue together.) This PR fixes the correctness issue on stream-stream outer join with RocksDB state store provider, which can occur in certain condition, like below: * stream-stream time interval outer join * left outer join has an issue on left side, right outer join has an issue on right side, full outer join has an issue on both sides * At batch N, produce non-late row(s) on the problematic side * At the same batch (batch N), some row(s) on the problematic side are evicted by the condition of watermark The root cause is same as [SPARK-38320](https://issues.apache.org/jira/browse/SPARK-38320) - weak read consistency on iterator, especially with RocksDB state store provider. (Quoting from SPARK-38320: The problem is due to the StateStore.iterator not reflecting StateStore changes made after its creation.) More specifically, if updates are performed during processing input rows and somehow updates the number of values for grouping key, the update is not seen in SymmetricHashJoinStateManager.removeByValueCondition, and the method does the eviction with the number of values in out of sync. Making it more worse, if the method performs the eviction and updates the number of values for grouping key, it "overwrites" the number of value, effectively drop all rows being inserted in the same batch. Below code blocks are references on understanding the details of the issue. https://github.com/apache/spark/blob/ca7200b0008dc6101a252020e6c34ef7b72d81d6/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala#L327-L339 https://github.com/apache/spark/blob/ca7200b0008dc6101a252020e6c34ef7b72d81d6/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala#L619-L627 https://github.com/apache/spark/blob/ca7200b0008dc6101a252020e6c34ef7b72d81d6/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala#L195-L201 https://github.com/apache/spark/blob/ca7200b0008dc6101a252020e6c34ef7b72d81d6/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala#L208-L223 This PR fixes the outer iterators as late evaluation to ensure all updates on processing input rows are reflected "before" outer iterators are initialized. ### Why are the changes needed? The bug is described in above section. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New UT added. Closes #36002 from HeartSaVioR/SPARK-38684. Authored-by: Jungtaek Lim Signed-off-by: Jungtaek Lim (cherry picked from commit 2f8613f22c0750c00cf1dcfb2f31c431d8dc1be7) Signed-off-by: Jungtaek Lim --- .../StreamingSymmetricHashJoinExec.scala | 81 ++++++++++++++----- .../sql/streaming/StreamingJoinSuite.scala | 63 ++++++++++++++- 2 files changed, 121 insertions(+), 23 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala index 81888e0f7e189..aa888c148ddf4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala @@ -324,17 +324,22 @@ case class StreamingSymmetricHashJoinExec( } } + val initIterFn = { () => + val removedRowIter = leftSideJoiner.removeOldState() + removedRowIter.filterNot { kv => + stateFormatVersion match { + case 1 => matchesWithRightSideState(new UnsafeRowPair(kv.key, kv.value)) + case 2 => kv.matched + case _ => throwBadStateFormatVersionException() + } + }.map(pair => joinedRow.withLeft(pair.value).withRight(nullRight)) + } + // NOTE: we need to make sure `outerOutputIter` is evaluated "after" exhausting all of - // elements in `innerOutputIter`, because evaluation of `innerOutputIter` may update - // the match flag which the logic for outer join is relying on. - val removedRowIter = leftSideJoiner.removeOldState() - val outerOutputIter = removedRowIter.filterNot { kv => - stateFormatVersion match { - case 1 => matchesWithRightSideState(new UnsafeRowPair(kv.key, kv.value)) - case 2 => kv.matched - case _ => throwBadStateFormatVersionException() - } - }.map(pair => joinedRow.withLeft(pair.value).withRight(nullRight)) + // elements in `hashJoinOutputIter`, otherwise it may lead to out of sync according to + // the interface contract on StateStore.iterator and end up with correctness issue. + // Please refer SPARK-38684 for more details. + val outerOutputIter = new LazilyInitializingJoinedRowIterator(initIterFn) hashJoinOutputIter ++ outerOutputIter case RightOuter => @@ -344,14 +349,23 @@ case class StreamingSymmetricHashJoinExec( postJoinFilter(joinedRow.withLeft(leftValue).withRight(rightKeyValue.value)) } } - val removedRowIter = rightSideJoiner.removeOldState() - val outerOutputIter = removedRowIter.filterNot { kv => - stateFormatVersion match { - case 1 => matchesWithLeftSideState(new UnsafeRowPair(kv.key, kv.value)) - case 2 => kv.matched - case _ => throwBadStateFormatVersionException() - } - }.map(pair => joinedRow.withLeft(nullLeft).withRight(pair.value)) + + val initIterFn = { () => + val removedRowIter = rightSideJoiner.removeOldState() + removedRowIter.filterNot { kv => + stateFormatVersion match { + case 1 => matchesWithLeftSideState(new UnsafeRowPair(kv.key, kv.value)) + case 2 => kv.matched + case _ => throwBadStateFormatVersionException() + } + }.map(pair => joinedRow.withLeft(nullLeft).withRight(pair.value)) + } + + // NOTE: we need to make sure `outerOutputIter` is evaluated "after" exhausting all of + // elements in `hashJoinOutputIter`, otherwise it may lead to out of sync according to + // the interface contract on StateStore.iterator and end up with correctness issue. + // Please refer SPARK-38684 for more details. + val outerOutputIter = new LazilyInitializingJoinedRowIterator(initIterFn) hashJoinOutputIter ++ outerOutputIter case FullOuter => @@ -360,10 +374,25 @@ case class StreamingSymmetricHashJoinExec( case 2 => kv.matched case _ => throwBadStateFormatVersionException() } - val leftSideOutputIter = leftSideJoiner.removeOldState().filterNot( - isKeyToValuePairMatched).map(pair => joinedRow.withLeft(pair.value).withRight(nullRight)) - val rightSideOutputIter = rightSideJoiner.removeOldState().filterNot( - isKeyToValuePairMatched).map(pair => joinedRow.withLeft(nullLeft).withRight(pair.value)) + + val leftSideInitIterFn = { () => + val removedRowIter = leftSideJoiner.removeOldState() + removedRowIter.filterNot(isKeyToValuePairMatched) + .map(pair => joinedRow.withLeft(pair.value).withRight(nullRight)) + } + + val rightSideInitIterFn = { () => + val removedRowIter = rightSideJoiner.removeOldState() + removedRowIter.filterNot(isKeyToValuePairMatched) + .map(pair => joinedRow.withLeft(nullLeft).withRight(pair.value)) + } + + // NOTE: we need to make sure both `leftSideOutputIter` and `rightSideOutputIter` are + // evaluated "after" exhausting all of elements in `hashJoinOutputIter`, otherwise it may + // lead to out of sync according to the interface contract on StateStore.iterator and + // end up with correctness issue. Please refer SPARK-38684 for more details. + val leftSideOutputIter = new LazilyInitializingJoinedRowIterator(leftSideInitIterFn) + val rightSideOutputIter = new LazilyInitializingJoinedRowIterator(rightSideInitIterFn) hashJoinOutputIter ++ leftSideOutputIter ++ rightSideOutputIter case _ => throwBadJoinTypeException() @@ -638,4 +667,12 @@ case class StreamingSymmetricHashJoinExec( override protected def withNewChildrenInternal( newLeft: SparkPlan, newRight: SparkPlan): StreamingSymmetricHashJoinExec = copy(left = newLeft, right = newRight) + + private class LazilyInitializingJoinedRowIterator( + initFn: () => Iterator[JoinedRow]) extends Iterator[JoinedRow] { + private lazy val iter: Iterator[JoinedRow] = initFn() + + override def hasNext: Boolean = iter.hasNext + override def next(): JoinedRow = iter.next() + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala index 29caaf7289d6f..491b8da213e10 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala @@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec import org.apache.spark.sql.execution.streaming.{MemoryStream, StatefulOperatorStateInfo, StreamingSymmetricHashJoinExec, StreamingSymmetricHashJoinHelper} -import org.apache.spark.sql.execution.streaming.state.{StateStore, StateStoreProviderId} +import org.apache.spark.sql.execution.streaming.state.{RocksDBStateStoreProvider, StateStore, StateStoreProviderId} import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.util.Utils @@ -1353,6 +1353,67 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite { ).select(Symbol("leftKey1"), Symbol("rightKey1"), Symbol("leftKey2"), Symbol("rightKey2"), $"leftWindow.end".cast("long"), Symbol("leftValue"), Symbol("rightValue")) } + + test("SPARK-38684: outer join works correctly even if processing input rows and " + + "evicting state rows for same grouping key happens in the same micro-batch") { + + // The test is to demonstrate the correctness issue in outer join before SPARK-38684. + withSQLConf( + SQLConf.STREAMING_NO_DATA_MICRO_BATCHES_ENABLED.key -> "false", + SQLConf.STATE_STORE_PROVIDER_CLASS.key -> classOf[RocksDBStateStoreProvider].getName) { + + val input1 = MemoryStream[(Timestamp, String, String)] + val df1 = input1.toDF + .selectExpr("_1 as eventTime", "_2 as id", "_3 as comment") + .withWatermark("eventTime", "0 second") + + val input2 = MemoryStream[(Timestamp, String, String)] + val df2 = input2.toDF + .selectExpr("_1 as eventTime", "_2 as id", "_3 as comment") + .withWatermark("eventTime", "0 second") + + val joined = df1.as("left") + .join(df2.as("right"), + expr(""" + |left.id = right.id AND left.eventTime BETWEEN + | right.eventTime - INTERVAL 30 seconds AND + | right.eventTime + INTERVAL 30 seconds + """.stripMargin), + joinType = "leftOuter") + + testStream(joined)( + MultiAddData( + (input1, Seq((Timestamp.valueOf("2020-01-02 00:00:00"), "abc", "left in batch 1"))), + (input2, Seq((Timestamp.valueOf("2020-01-02 00:01:00"), "abc", "right in batch 1"))) + ), + CheckNewAnswer(), + MultiAddData( + (input1, Seq((Timestamp.valueOf("2020-01-02 01:00:00"), "abc", "left in batch 2"))), + (input2, Seq((Timestamp.valueOf("2020-01-02 01:01:00"), "abc", "right in batch 2"))) + ), + // watermark advanced to "2020-01-02 00:00:00" + CheckNewAnswer(), + AddData(input1, (Timestamp.valueOf("2020-01-02 01:30:00"), "abc", "left in batch 3")), + // watermark advanced to "2020-01-02 01:00:00" + CheckNewAnswer( + (Timestamp.valueOf("2020-01-02 00:00:00"), "abc", "left in batch 1", null, null, null) + ), + // left side state should still contain "left in batch 2" and "left in batch 3" + // we should see both rows in the left side since + // - "left in batch 2" is going to be evicted in this batch + // - "left in batch 3" is going to be matched with new row in right side + AddData(input2, + (Timestamp.valueOf("2020-01-02 01:30:10"), "abc", "match with left in batch 3")), + // watermark advanced to "2020-01-02 01:01:00" + CheckNewAnswer( + (Timestamp.valueOf("2020-01-02 01:00:00"), "abc", "left in batch 2", + null, null, null), + (Timestamp.valueOf("2020-01-02 01:30:00"), "abc", "left in batch 3", + Timestamp.valueOf("2020-01-02 01:30:10"), "abc", "match with left in batch 3") + ) + ) + } + } } class StreamingFullOuterJoinSuite extends StreamingJoinSuite { From 09d2b0e92ae423a329c02824cb554482c80aa44f Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Fri, 1 Apr 2022 19:10:11 -0700 Subject: [PATCH 087/535] [SPARK-34863][SQL] Support complex types for Parquet vectorized reader ### What changes were proposed in this pull request? This PR adds support for complex types (e.g., list, map, array) for Spark's vectorized Parquet reader. In particular, this introduces the following changes: 1. Added a new class `ParquetColumnVector` which encapsulates all the necessary information needed when reading a Parquet column, including the `ParquetColumn` for the Parquet column, the repetition & definition levels (only allocated for a leaf-node of a complex type), as well as the reader for the column. In addition, it also contains logic for assembling nested columnar batches, via interpreting Parquet repetition & definition levels. 2. Changes are made in `VectorizedParquetRecordReader` to initialize a list of `ParquetColumnVector` for the columns read. 3. `VectorizedColumnReader` now also creates a reader for repetition column. Depending on whether maximum repetition level is 0, the batch read is now split into two code paths, e.g., `readBatch` versus `readBatchNested`. 4. Added logic to handle complex type in `VectorizedRleValuesReader`. For data types involving only struct or primitive types, it still goes with the old `readBatch` method which now also saves definition levels into a vector for later assembly. Otherwise, for data types involving array or map, a separate code path `readBatchNested` is introduced to handle repetition levels. This PR also introduced a new flag `spark.sql.parquet.enableNestedColumnVectorizedReader` which turns the feature on or off. By default it is on to facilitates all the Parquet related test coverage. ### Why are the changes needed? Whenever read schema containing complex types, at the moment Spark will fallback to the row-based reader in parquet-mr, which is much slower. As benchmark shows, by adding support into the vectorized reader, we can get ~15x on average speed up on reading struct fields, and ~1.5x when reading array of struct and map. ### Does this PR introduce _any_ user-facing change? With the PR Spark should now support reading complex types in its vectorized Parquet reader. A new config `spark.sql.parquet.enableNestedColumnVectorizedReader` is introduced to turn the feature on or off. ### How was this patch tested? Added new unit tests. Closes #34659 from sunchao/SPARK-34863-new. Authored-by: Chao Sun Signed-off-by: Liang-Chi Hsieh (cherry picked from commit deac8f950edb1d893fe4bf2cc7c4adbd29d1db22) Signed-off-by: Liang-Chi Hsieh --- .../apache/spark/sql/internal/SQLConf.scala | 11 + .../parquet/ParquetColumnVector.java | 381 ++++++++++++++++ .../datasources/parquet/ParquetReadState.java | 60 ++- .../SpecificParquetRecordReaderBase.java | 15 +- .../parquet/VectorizedColumnReader.java | 84 ++-- .../VectorizedParquetRecordReader.java | 160 ++++--- .../parquet/VectorizedRleValuesReader.java | 413 ++++++++++++++++-- .../vectorized/OnHeapColumnVector.java | 2 +- .../vectorized/WritableColumnVector.java | 48 +- .../parquet/ParquetFileFormat.scala | 8 +- .../parquet/ParquetSchemaConverter.scala | 17 +- .../datasources/parquet/ParquetUtils.scala | 27 +- .../ParquetPartitionReaderFactory.scala | 4 +- .../sql-tests/results/explain-aqe.sql.out | 3 +- .../sql-tests/results/explain.sql.out | 3 +- .../datasources/FileBasedDataSourceTest.scala | 9 +- .../execution/datasources/orc/OrcTest.scala | 2 + .../orc/OrcV1SchemaPruningSuite.scala | 2 + .../orc/OrcV2SchemaPruningSuite.scala | 2 + .../parquet/ParquetColumnIndexSuite.scala | 13 + .../parquet/ParquetFileFormatSuite.scala | 37 ++ .../datasources/parquet/ParquetIOSuite.scala | 351 +++++++++++++++ .../parquet/ParquetSchemaPruningSuite.scala | 2 + .../datasources/parquet/ParquetTest.scala | 2 + .../parquet/ParquetVectorizedSuite.scala | 330 ++++++++++++++ 25 files changed, 1813 insertions(+), 173 deletions(-) create mode 100644 sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnVector.java diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 1bba8b6d866a6..5bf59923787a7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1009,6 +1009,14 @@ object SQLConf { .booleanConf .createWithDefault(true) + val PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED = + buildConf("spark.sql.parquet.enableNestedColumnVectorizedReader") + .doc("Enables vectorized Parquet decoding for nested columns (e.g., struct, list, map). " + + s"Requires ${PARQUET_VECTORIZED_READER_ENABLED.key} to be enabled.") + .version("3.3.0") + .booleanConf + .createWithDefault(true) + val PARQUET_RECORD_FILTER_ENABLED = buildConf("spark.sql.parquet.recordLevelFilter.enabled") .doc("If true, enables Parquet's native record-level filtering using the pushed down " + "filters. " + @@ -3904,6 +3912,9 @@ class SQLConf extends Serializable with Logging { def parquetVectorizedReaderEnabled: Boolean = getConf(PARQUET_VECTORIZED_READER_ENABLED) + def parquetVectorizedReaderNestedColumnEnabled: Boolean = + getConf(PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED) + def parquetVectorizedReaderBatchSize: Int = getConf(PARQUET_VECTORIZED_READER_BATCH_SIZE) def columnBatchSize: Int = getConf(COLUMN_BATCH_SIZE) diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnVector.java new file mode 100644 index 0000000000000..4b29520d30ffa --- /dev/null +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnVector.java @@ -0,0 +1,381 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.parquet; + +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import com.google.common.base.Preconditions; + +import org.apache.spark.memory.MemoryMode; +import org.apache.spark.sql.execution.vectorized.OffHeapColumnVector; +import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector; +import org.apache.spark.sql.execution.vectorized.WritableColumnVector; +import org.apache.spark.sql.types.ArrayType; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.MapType; +import org.apache.spark.sql.types.StructType; + +/** + * Contains necessary information representing a Parquet column, either of primitive or nested type. + */ +final class ParquetColumnVector { + private final ParquetColumn column; + private final List children; + private final WritableColumnVector vector; + + /** + * Repetition & Definition levels + * These are allocated only for leaf columns; for non-leaf columns, they simply maintain + * references to that of the former. + */ + private WritableColumnVector repetitionLevels; + private WritableColumnVector definitionLevels; + + /** Whether this column is primitive (i.e., leaf column) */ + private final boolean isPrimitive; + + /** Reader for this column - only set if 'isPrimitive' is true */ + private VectorizedColumnReader columnReader; + + ParquetColumnVector( + ParquetColumn column, + WritableColumnVector vector, + int capacity, + MemoryMode memoryMode, + Set missingColumns) { + + DataType sparkType = column.sparkType(); + if (!sparkType.sameType(vector.dataType())) { + throw new IllegalArgumentException("Spark type: " + sparkType + + " doesn't match the type: " + vector.dataType() + " in column vector"); + } + + this.column = column; + this.vector = vector; + this.children = new ArrayList<>(); + this.isPrimitive = column.isPrimitive(); + + if (missingColumns.contains(column)) { + vector.setAllNull(); + return; + } + + if (isPrimitive) { + // TODO: avoid allocating these if not necessary, for instance, the node is of top-level + // and is not repeated, or the node is not top-level but its max repetition level is 0. + repetitionLevels = allocateLevelsVector(capacity, memoryMode); + definitionLevels = allocateLevelsVector(capacity, memoryMode); + } else { + Preconditions.checkArgument(column.children().size() == vector.getNumChildren()); + for (int i = 0; i < column.children().size(); i++) { + ParquetColumnVector childCv = new ParquetColumnVector(column.children().apply(i), + vector.getChild(i), capacity, memoryMode, missingColumns); + children.add(childCv); + + // Only use levels from non-missing child, this can happen if only some but not all + // fields of a struct are missing. + if (!childCv.vector.isAllNull()) { + this.repetitionLevels = childCv.repetitionLevels; + this.definitionLevels = childCv.definitionLevels; + } + } + + // This can happen if all the fields of a struct are missing, in which case we should mark + // the struct itself as a missing column + if (repetitionLevels == null) { + vector.setAllNull(); + } + } + } + + /** + * Returns all the children of this column. + */ + List getChildren() { + return children; + } + + /** + * Returns all the leaf columns in depth-first order. + */ + List getLeaves() { + List result = new ArrayList<>(); + getLeavesHelper(this, result); + return result; + } + + private static void getLeavesHelper(ParquetColumnVector vector, List coll) { + if (vector.isPrimitive) { + coll.add(vector); + } else { + for (ParquetColumnVector child : vector.children) { + getLeavesHelper(child, coll); + } + } + } + + /** + * Assembles this column and calculate collection offsets recursively. + * This is a no-op for primitive columns. + */ + void assemble() { + // nothing to do if the column itself is missing + if (vector.isAllNull()) return; + + DataType type = column.sparkType(); + if (type instanceof ArrayType || type instanceof MapType) { + for (ParquetColumnVector child : children) { + child.assemble(); + } + assembleCollection(); + } else if (type instanceof StructType) { + for (ParquetColumnVector child : children) { + child.assemble(); + } + assembleStruct(); + } + } + + /** + * Resets this Parquet column vector, which includes resetting all the writable column vectors + * (used to store values, definition levels, and repetition levels) for this and all its children. + */ + void reset() { + // nothing to do if the column itself is missing + if (vector.isAllNull()) return; + + vector.reset(); + repetitionLevels.reset(); + definitionLevels.reset(); + for (ParquetColumnVector child : children) { + child.reset(); + } + } + + /** + * Returns the {@link ParquetColumn} of this column vector. + */ + ParquetColumn getColumn() { + return this.column; + } + + /** + * Returns the writable column vector used to store values. + */ + WritableColumnVector getValueVector() { + return this.vector; + } + + /** + * Returns the writable column vector used to store repetition levels. + */ + WritableColumnVector getRepetitionLevelVector() { + return this.repetitionLevels; + } + + /** + * Returns the writable column vector used to store definition levels. + */ + WritableColumnVector getDefinitionLevelVector() { + return this.definitionLevels; + } + + /** + * Returns the column reader for reading a Parquet column. + */ + VectorizedColumnReader getColumnReader() { + return this.columnReader; + } + + /** + * Sets the column vector to 'reader'. Note this can only be called on a primitive Parquet + * column. + */ + void setColumnReader(VectorizedColumnReader reader) { + if (!isPrimitive) { + throw new IllegalStateException("Can't set reader for non-primitive column"); + } + this.columnReader = reader; + } + + /** + * Assemble collections, e.g., array, map. + */ + private void assembleCollection() { + int maxDefinitionLevel = column.definitionLevel(); + int maxElementRepetitionLevel = column.repetitionLevel(); + + // There are 4 cases when calculating definition levels: + // 1. definitionLevel == maxDefinitionLevel + // ==> value is defined and not null + // 2. definitionLevel == maxDefinitionLevel - 1 + // ==> value is null + // 3. definitionLevel < maxDefinitionLevel - 1 + // ==> value doesn't exist since one of its optional parents is null + // 4. definitionLevel > maxDefinitionLevel + // ==> value is a nested element within an array or map + // + // `i` is the index over all leaf elements of this array, while `offset` is the index over + // all top-level elements of this array. + int rowId = 0; + for (int i = 0, offset = 0; i < definitionLevels.getElementsAppended(); + i = getNextCollectionStart(maxElementRepetitionLevel, i)) { + vector.reserve(rowId + 1); + int definitionLevel = definitionLevels.getInt(i); + if (definitionLevel <= maxDefinitionLevel) { + // This means the value is not an array element, but a collection that is either null or + // empty. In this case, we should increase offset to skip it when returning an array + // starting from the offset. + // + // For instance, considering an array of strings with 3 elements like the following: + // null, [], [a, b, c] + // the child array (which is of String type) in this case will be: + // null: 1 1 0 0 0 + // length: 0 0 1 1 1 + // offset: 0 0 0 1 2 + // and the array itself will be: + // null: 1 0 0 + // length: 0 0 3 + // offset: 0 1 2 + // + // It's important that for the third element `[a, b, c]`, the offset in the array + // (not the elements) starts from 2 since otherwise we'd include the first & second null + // element from child array in the result. + offset += 1; + } + if (definitionLevel <= maxDefinitionLevel - 1) { + // Collection is null or one of its optional parents is null + vector.putNull(rowId++); + } else if (definitionLevel == maxDefinitionLevel) { + // Collection is defined but empty + vector.putNotNull(rowId); + vector.putArray(rowId, offset, 0); + rowId++; + } else if (definitionLevel > maxDefinitionLevel) { + // Collection is defined and non-empty: find out how many top elements are there until the + // start of the next array. + vector.putNotNull(rowId); + int length = getCollectionSize(maxElementRepetitionLevel, i); + vector.putArray(rowId, offset, length); + offset += length; + rowId++; + } + } + vector.addElementsAppended(rowId); + } + + private void assembleStruct() { + int maxRepetitionLevel = column.repetitionLevel(); + int maxDefinitionLevel = column.definitionLevel(); + + vector.reserve(definitionLevels.getElementsAppended()); + + int rowId = 0; + boolean hasRepetitionLevels = repetitionLevels.getElementsAppended() > 0; + for (int i = 0; i < definitionLevels.getElementsAppended(); i++) { + // If repetition level > maxRepetitionLevel, the value is a nested element (e.g., an array + // element in struct>), and we should skip the definition level since it doesn't + // represent with the struct. + if (!hasRepetitionLevels || repetitionLevels.getInt(i) <= maxRepetitionLevel) { + if (definitionLevels.getInt(i) <= maxDefinitionLevel - 1) { + // Struct is null + vector.putNull(rowId); + rowId++; + } else if (definitionLevels.getInt(i) >= maxDefinitionLevel) { + vector.putNotNull(rowId); + rowId++; + } + } + } + vector.addElementsAppended(rowId); + } + + private static WritableColumnVector allocateLevelsVector(int capacity, MemoryMode memoryMode) { + switch (memoryMode) { + case ON_HEAP: + return new OnHeapColumnVector(capacity, DataTypes.IntegerType); + case OFF_HEAP: + return new OffHeapColumnVector(capacity, DataTypes.IntegerType); + default: + throw new IllegalArgumentException("Unknown memory mode: " + memoryMode); + } + } + + /** + * For a collection (i.e., array or map) element at index 'idx', returns the starting index of + * the next collection after it. + * + * @param maxRepetitionLevel the maximum repetition level for the elements in this collection + * @param idx the index of this collection in the Parquet column + * @return the starting index of the next collection + */ + private int getNextCollectionStart(int maxRepetitionLevel, int idx) { + idx += 1; + for (; idx < repetitionLevels.getElementsAppended(); idx++) { + if (repetitionLevels.getInt(idx) <= maxRepetitionLevel) { + break; + } + } + return idx; + } + + /** + * Gets the size of a collection (i.e., array or map) element, starting at 'idx'. + * + * @param maxRepetitionLevel the maximum repetition level for the elements in this collection + * @param idx the index of this collection in the Parquet column + * @return the size of this collection + */ + private int getCollectionSize(int maxRepetitionLevel, int idx) { + int size = 1; + for (idx += 1; idx < repetitionLevels.getElementsAppended(); idx++) { + if (repetitionLevels.getInt(idx) <= maxRepetitionLevel) { + break; + } else if (repetitionLevels.getInt(idx) <= maxRepetitionLevel + 1) { + // Only count elements which belong to the current collection + // For instance, suppose we have the following Parquet schema: + // + // message schema { max rl max dl + // optional group col (LIST) { 0 1 + // repeated group list { 1 2 + // optional group element (LIST) { 1 3 + // repeated group list { 2 4 + // required int32 element; 2 4 + // } + // } + // } + // } + // } + // + // For a list such as: [[[0, 1], [2, 3]], [[4, 5], [6, 7]]], the repetition & definition + // levels would be: + // + // repetition levels: [0, 2, 1, 2, 0, 2, 1, 2] + // definition levels: [2, 2, 2, 2, 2, 2, 2, 2] + // + // When calculating collection size for the outer array, we should only count repetition + // levels whose value is <= 1 (which is the max repetition level for the inner array) + size++; + } + } + return size; + } +} diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetReadState.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetReadState.java index b26088753465e..bde69402241c2 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetReadState.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetReadState.java @@ -17,6 +17,8 @@ package org.apache.spark.sql.execution.datasources.parquet; +import org.apache.parquet.column.ColumnDescriptor; + import java.util.ArrayList; import java.util.Iterator; import java.util.List; @@ -42,24 +44,52 @@ final class ParquetReadState { /** The current row range */ private RowRange currentRange; + /** Maximum repetition level for the Parquet column */ + final int maxRepetitionLevel; + /** Maximum definition level for the Parquet column */ final int maxDefinitionLevel; + /** Whether this column is required */ + final boolean isRequired; + /** The current index over all rows within the column chunk. This is used to check if the * current row should be skipped by comparing against the row ranges. */ long rowId; - /** The offset in the current batch to put the next value */ - int offset; + /** The offset in the current batch to put the next value in value vector */ + int valueOffset; + + /** The offset in the current batch to put the next value in repetition & definition vector */ + int levelOffset; /** The remaining number of values to read in the current page */ int valuesToReadInPage; - /** The remaining number of values to read in the current batch */ - int valuesToReadInBatch; + /** The remaining number of rows to read in the current batch */ + int rowsToReadInBatch; + + + /* The following fields are only used when reading repeated values */ + + /** When processing repeated values, whether we've found the beginning of the first list after the + * current batch. */ + boolean lastListCompleted; - ParquetReadState(int maxDefinitionLevel, PrimitiveIterator.OfLong rowIndexes) { - this.maxDefinitionLevel = maxDefinitionLevel; + /** When processing repeated types, the number of accumulated definition levels to process */ + int numBatchedDefLevels; + + /** When processing repeated types, whether we should skip the current batch of definition + * levels. */ + boolean shouldSkip; + + ParquetReadState( + ColumnDescriptor descriptor, + boolean isRequired, + PrimitiveIterator.OfLong rowIndexes) { + this.maxRepetitionLevel = descriptor.getMaxRepetitionLevel(); + this.maxDefinitionLevel = descriptor.getMaxDefinitionLevel(); + this.isRequired = isRequired; this.rowRanges = constructRanges(rowIndexes); nextRange(); } @@ -101,8 +131,12 @@ private Iterator constructRanges(PrimitiveIterator.OfLong rowIndexes) * Must be called at the beginning of reading a new batch. */ void resetForNewBatch(int batchSize) { - this.offset = 0; - this.valuesToReadInBatch = batchSize; + this.valueOffset = 0; + this.levelOffset = 0; + this.rowsToReadInBatch = batchSize; + this.lastListCompleted = this.maxRepetitionLevel == 0; // always true for non-repeated column + this.numBatchedDefLevels = 0; + this.shouldSkip = false; } /** @@ -127,16 +161,6 @@ long currentRangeEnd() { return currentRange.end; } - /** - * Advance the current offset and rowId to the new values. - */ - void advanceOffsetAndRowId(int newOffset, long newRowId) { - valuesToReadInBatch -= (newOffset - offset); - valuesToReadInPage -= (newRowId - rowId); - offset = newOffset; - rowId = newRowId; - } - /** * Advance to the next range. */ diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java index 5669534cd111a..292a0f98af1ca 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java @@ -74,6 +74,7 @@ public abstract class SpecificParquetRecordReaderBase extends RecordReader columns) throws IOException } } fileReader.setRequestedSchema(requestedSchema); - this.sparkSchema = new ParquetToSparkSchemaConverter(config).convert(requestedSchema); + this.parquetColumn = new ParquetToSparkSchemaConverter(config) + .convertParquetColumn(requestedSchema, Option.empty()); + this.sparkSchema = (StructType) parquetColumn.sparkType(); this.totalRowCount = fileReader.getFilteredRecordCount(); } @@ -191,7 +198,9 @@ protected void initialize( config.setBoolean(SQLConf.PARQUET_BINARY_AS_STRING().key() , false); config.setBoolean(SQLConf.PARQUET_INT96_AS_TIMESTAMP().key(), false); config.setBoolean(SQLConf.CASE_SENSITIVE().key(), false); - this.sparkSchema = new ParquetToSparkSchemaConverter(config).convert(requestedSchema); + this.parquetColumn = new ParquetToSparkSchemaConverter(config) + .convertParquetColumn(requestedSchema, Option.empty()); + this.sparkSchema = (StructType) parquetColumn.sparkType(); this.totalRowCount = totalRowCount; } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java index ee09d2b2a3be9..c2e85da3884ab 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java @@ -19,7 +19,6 @@ import java.io.IOException; import java.time.ZoneId; -import java.util.PrimitiveIterator; import org.apache.parquet.CorruptDeltaByteArrays; import org.apache.parquet.VersionParser.ParsedVersion; @@ -41,7 +40,6 @@ import org.apache.spark.sql.execution.vectorized.WritableColumnVector; import org.apache.spark.sql.types.Decimal; -import static org.apache.parquet.column.ValuesType.REPETITION_LEVEL; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BOOLEAN; import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64; @@ -69,6 +67,11 @@ public class VectorizedColumnReader { */ private VectorizedRleValuesReader defColumn; + /** + * Vectorized RLE decoder for repetition levels + */ + private VectorizedRleValuesReader repColumn; + /** * Factory to get type-specific vector updater. */ @@ -93,9 +96,8 @@ public class VectorizedColumnReader { public VectorizedColumnReader( ColumnDescriptor descriptor, - LogicalTypeAnnotation logicalTypeAnnotation, - PageReader pageReader, - PrimitiveIterator.OfLong rowIndexes, + boolean isRequired, + PageReadStore pageReadStore, ZoneId convertTz, String datetimeRebaseMode, String datetimeRebaseTz, @@ -103,9 +105,10 @@ public VectorizedColumnReader( String int96RebaseTz, ParsedVersion writerVersion) throws IOException { this.descriptor = descriptor; - this.pageReader = pageReader; - this.readState = new ParquetReadState(descriptor.getMaxDefinitionLevel(), rowIndexes); - this.logicalTypeAnnotation = logicalTypeAnnotation; + this.pageReader = pageReadStore.getPageReader(descriptor); + this.readState = new ParquetReadState(descriptor, isRequired, + pageReadStore.getRowIndexes().orElse(null)); + this.logicalTypeAnnotation = descriptor.getPrimitiveType().getLogicalTypeAnnotation(); this.updaterFactory = new ParquetVectorUpdaterFactory( logicalTypeAnnotation, convertTz, @@ -161,9 +164,13 @@ private boolean isLazyDecodingSupported(PrimitiveType.PrimitiveTypeName typeName } /** - * Reads `total` values from this columnReader into column. + * Reads `total` rows from this columnReader into column. */ - void readBatch(int total, WritableColumnVector column) throws IOException { + void readBatch( + int total, + WritableColumnVector column, + WritableColumnVector repetitionLevels, + WritableColumnVector definitionLevels) throws IOException { WritableColumnVector dictionaryIds = null; ParquetVectorUpdater updater = updaterFactory.getUpdater(descriptor, column.dataType()); @@ -174,22 +181,32 @@ void readBatch(int total, WritableColumnVector column) throws IOException { dictionaryIds = column.reserveDictionaryIds(total); } readState.resetForNewBatch(total); - while (readState.valuesToReadInBatch > 0) { + while (readState.rowsToReadInBatch > 0 || !readState.lastListCompleted) { if (readState.valuesToReadInPage == 0) { int pageValueCount = readPage(); + if (pageValueCount < 0) { + // we've read all the pages; this could happen when we're reading a repeated list and we + // don't know where the list will end until we've seen all the pages. + break; + } readState.resetForNewPage(pageValueCount, pageFirstRowIndex); } PrimitiveType.PrimitiveTypeName typeName = descriptor.getPrimitiveType().getPrimitiveTypeName(); if (isCurrentPageDictionaryEncoded) { // Save starting offset in case we need to decode dictionary IDs. - int startOffset = readState.offset; + int startOffset = readState.valueOffset; // Save starting row index so we can check if we need to eagerly decode dict ids later long startRowId = readState.rowId; // Read and decode dictionary ids. - defColumn.readIntegers(readState, dictionaryIds, column, - (VectorizedValuesReader) dataColumn); + if (readState.maxRepetitionLevel == 0) { + defColumn.readIntegers(readState, dictionaryIds, column, definitionLevels, + (VectorizedValuesReader) dataColumn); + } else { + repColumn.readIntegersRepeated(readState, repetitionLevels, defColumn, definitionLevels, + dictionaryIds, column, (VectorizedValuesReader) dataColumn); + } // TIMESTAMP_MILLIS encoded as INT64 can't be lazily decoded as we need to post process // the values to add microseconds precision. @@ -220,24 +237,32 @@ void readBatch(int total, WritableColumnVector column) throws IOException { boolean needTransform = castLongToInt || isUnsignedInt32 || isUnsignedInt64; column.setDictionary(new ParquetDictionary(dictionary, needTransform)); } else { - updater.decodeDictionaryIds(readState.offset - startOffset, startOffset, column, + updater.decodeDictionaryIds(readState.valueOffset - startOffset, startOffset, column, dictionaryIds, dictionary); } } else { - if (column.hasDictionary() && readState.offset != 0) { + if (column.hasDictionary() && readState.valueOffset != 0) { // This batch already has dictionary encoded values but this new page is not. The batch // does not support a mix of dictionary and not so we will decode the dictionary. - updater.decodeDictionaryIds(readState.offset, 0, column, dictionaryIds, dictionary); + updater.decodeDictionaryIds(readState.valueOffset, 0, column, dictionaryIds, dictionary); } column.setDictionary(null); VectorizedValuesReader valuesReader = (VectorizedValuesReader) dataColumn; - defColumn.readBatch(readState, column, valuesReader, updater); + if (readState.maxRepetitionLevel == 0) { + defColumn.readBatch(readState, column, definitionLevels, valuesReader, updater); + } else { + repColumn.readBatchRepeated(readState, repetitionLevels, defColumn, definitionLevels, + column, valuesReader, updater); + } } } } private int readPage() { DataPage page = pageReader.readPage(); + if (page == null) { + return -1; + } this.pageFirstRowIndex = page.getFirstRowIndex().orElse(0L); return page.accept(new DataPage.Visitor() { @@ -328,18 +353,18 @@ private int readPageV1(DataPageV1 page) throws IOException { } int pageValueCount = page.getValueCount(); - int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); - this.defColumn = new VectorizedRleValuesReader(bitWidth); + int rlBitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxRepetitionLevel()); + this.repColumn = new VectorizedRleValuesReader(rlBitWidth); + + int dlBitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); + this.defColumn = new VectorizedRleValuesReader(dlBitWidth); + try { BytesInput bytes = page.getBytes(); ByteBufferInputStream in = bytes.toInputStream(); - // only used now to consume the repetition level data - page.getRlEncoding() - .getValuesReader(descriptor, REPETITION_LEVEL) - .initFromPage(pageValueCount, in); - + repColumn.initFromPage(pageValueCount, in); defColumn.initFromPage(pageValueCount, in); initDataReader(pageValueCount, page.getValueEncoding(), in); return pageValueCount; @@ -350,11 +375,16 @@ private int readPageV1(DataPageV1 page) throws IOException { private int readPageV2(DataPageV2 page) throws IOException { int pageValueCount = page.getValueCount(); - int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); // do not read the length from the stream. v2 pages handle dividing the page bytes. - defColumn = new VectorizedRleValuesReader(bitWidth, false); + int rlBitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxRepetitionLevel()); + repColumn = new VectorizedRleValuesReader(rlBitWidth, false); + repColumn.initFromPage(pageValueCount, page.getRepetitionLevels().toInputStream()); + + int dlBitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel()); + defColumn = new VectorizedRleValuesReader(dlBitWidth, false); defColumn.initFromPage(pageValueCount, page.getDefinitionLevels().toInputStream()); + try { initDataReader(pageValueCount, page.getDataEncoding(), page.getData().toInputStream()); return pageValueCount; diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java index da23b5fcec28f..80f6f88810a19 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java @@ -20,13 +20,17 @@ import java.io.IOException; import java.time.ZoneId; import java.util.Arrays; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import scala.collection.JavaConverters; import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.column.page.PageReadStore; +import org.apache.parquet.schema.GroupType; import org.apache.parquet.schema.MessageType; import org.apache.parquet.schema.Type; @@ -44,7 +48,7 @@ * A specialized RecordReader that reads into InternalRows or ColumnarBatches directly using the * Parquet column APIs. This is somewhat based on parquet-mr's ColumnReader. * - * TODO: handle complex types, decimal requiring more than 8 bytes, INT96. Schema mismatch. + * TODO: decimal requiring more than 8 bytes, INT96. Schema mismatch. * All of these can be handled efficiently and easily with codegen. * * This class can either return InternalRows or ColumnarBatches. With whole stage codegen @@ -64,10 +68,10 @@ public class VectorizedParquetRecordReader extends SpecificParquetRecordReaderBa private int numBatched = 0; /** - * For each request column, the reader to read this column. This is NULL if this column - * is missing from the file, in which case we populate the attribute with NULL. + * Encapsulate writable column vectors with other Parquet related info such as + * repetition / definition levels. */ - private VectorizedColumnReader[] columnReaders; + private ParquetColumnVector[] columnVectors; /** * The number of rows that have been returned. @@ -80,9 +84,10 @@ public class VectorizedParquetRecordReader extends SpecificParquetRecordReaderBa private long totalCountLoadedSoFar = 0; /** - * For each column, true if the column is missing in the file and we'll instead return NULLs. + * For each leaf column, if it is in the set, it means the column is missing in the file and + * we'll instead return NULLs. */ - private boolean[] missingColumns; + private Set missingColumns; /** * The timezone that timestamp INT96 values should be converted to. Null if no conversion. Here to @@ -120,8 +125,6 @@ public class VectorizedParquetRecordReader extends SpecificParquetRecordReaderBa */ private ColumnarBatch columnarBatch; - private WritableColumnVector[] columnVectors; - /** * If true, this class returns batches instead of rows. */ @@ -246,25 +249,25 @@ private void initBatch( } } + WritableColumnVector[] vectors; if (memMode == MemoryMode.OFF_HEAP) { - columnVectors = OffHeapColumnVector.allocateColumns(capacity, batchSchema); + vectors = OffHeapColumnVector.allocateColumns(capacity, batchSchema); } else { - columnVectors = OnHeapColumnVector.allocateColumns(capacity, batchSchema); + vectors = OnHeapColumnVector.allocateColumns(capacity, batchSchema); + } + columnarBatch = new ColumnarBatch(vectors); + + columnVectors = new ParquetColumnVector[sparkSchema.fields().length]; + for (int i = 0; i < columnVectors.length; i++) { + columnVectors[i] = new ParquetColumnVector(parquetColumn.children().apply(i), + vectors[i], capacity, memMode, missingColumns); } - columnarBatch = new ColumnarBatch(columnVectors); + if (partitionColumns != null) { int partitionIdx = sparkSchema.fields().length; for (int i = 0; i < partitionColumns.fields().length; i++) { - ColumnVectorUtils.populate(columnVectors[i + partitionIdx], partitionValues, i); - columnVectors[i + partitionIdx].setIsConstant(); - } - } - - // Initialize missing columns with nulls. - for (int i = 0; i < missingColumns.length; i++) { - if (missingColumns[i]) { - columnVectors[i].putNulls(0, capacity); - columnVectors[i].setIsConstant(); + ColumnVectorUtils.populate(vectors[i + partitionIdx], partitionValues, i); + vectors[i + partitionIdx].setIsConstant(); } } } @@ -298,7 +301,7 @@ public void enableReturningBatches() { * Advances to the next batch of rows. Returns false if there are no more. */ public boolean nextBatch() throws IOException { - for (WritableColumnVector vector : columnVectors) { + for (ParquetColumnVector vector : columnVectors) { vector.reset(); } columnarBatch.setNumRows(0); @@ -306,10 +309,17 @@ public boolean nextBatch() throws IOException { checkEndOfRowGroup(); int num = (int) Math.min(capacity, totalCountLoadedSoFar - rowsReturned); - for (int i = 0; i < columnReaders.length; ++i) { - if (columnReaders[i] == null) continue; - columnReaders[i].readBatch(num, columnVectors[i]); + for (ParquetColumnVector cv : columnVectors) { + for (ParquetColumnVector leafCv : cv.getLeaves()) { + VectorizedColumnReader columnReader = leafCv.getColumnReader(); + if (columnReader != null) { + columnReader.readBatch(num, leafCv.getValueVector(), + leafCv.getRepetitionLevelVector(), leafCv.getDefinitionLevelVector()); + } + } + cv.assemble(); } + rowsReturned += num; columnarBatch.setNumRows(num); numBatched = num; @@ -318,34 +328,61 @@ public boolean nextBatch() throws IOException { } private void initializeInternal() throws IOException, UnsupportedOperationException { - // Check that the requested schema is supported. - missingColumns = new boolean[requestedSchema.getFieldCount()]; - List columns = requestedSchema.getColumns(); - List paths = requestedSchema.getPaths(); - for (int i = 0; i < requestedSchema.getFieldCount(); ++i) { - Type t = requestedSchema.getFields().get(i); - if (!t.isPrimitive() || t.isRepetition(Type.Repetition.REPEATED)) { - throw new UnsupportedOperationException("Complex types not supported."); - } + missingColumns = new HashSet<>(); + for (ParquetColumn column : JavaConverters.seqAsJavaList(parquetColumn.children())) { + checkColumn(column); + } + } - String[] colPath = paths.get(i); - if (fileSchema.containsPath(colPath)) { - ColumnDescriptor fd = fileSchema.getColumnDescription(colPath); - if (!fd.equals(columns.get(i))) { + /** + * Check whether a column from requested schema is missing from the file schema, or whether it + * conforms to the type of the file schema. + */ + private void checkColumn(ParquetColumn column) throws IOException { + String[] path = JavaConverters.seqAsJavaList(column.path()).toArray(new String[0]); + if (containsPath(fileSchema, path)) { + if (column.isPrimitive()) { + ColumnDescriptor desc = column.descriptor().get(); + ColumnDescriptor fd = fileSchema.getColumnDescription(desc.getPath()); + if (!fd.equals(desc)) { throw new UnsupportedOperationException("Schema evolution not supported."); } - missingColumns[i] = false; } else { - if (columns.get(i).getMaxDefinitionLevel() == 0) { - // Column is missing in data but the required data is non-nullable. This file is invalid. - throw new IOException("Required column is missing in data file. Col: " + - Arrays.toString(colPath)); + for (ParquetColumn childColumn : JavaConverters.seqAsJavaList(column.children())) { + checkColumn(childColumn); } - missingColumns[i] = true; } + } else { // A missing column which is either primitive or complex + if (column.required()) { + // Column is missing in data but the required data is non-nullable. This file is invalid. + throw new IOException("Required column is missing in data file. Col: " + + Arrays.toString(path)); + } + missingColumns.add(column); } } + /** + * Checks whether the given 'path' exists in 'parquetType'. The difference between this and + * {@link MessageType#containsPath(String[])} is that the latter only support paths to leaf + * nodes, while this support paths both to leaf and non-leaf nodes. + */ + private boolean containsPath(Type parquetType, String[] path) { + return containsPath(parquetType, path, 0); + } + + private boolean containsPath(Type parquetType, String[] path, int depth) { + if (path.length == depth) return true; + if (parquetType instanceof GroupType) { + String fieldName = path[depth]; + GroupType parquetGroupType = (GroupType) parquetType; + if (parquetGroupType.containsField(fieldName)) { + return containsPath(parquetGroupType.getType(fieldName), path, depth + 1); + } + } + return false; + } + private void checkEndOfRowGroup() throws IOException { if (rowsReturned != totalCountLoadedSoFar) return; PageReadStore pages = reader.readNextRowGroup(); @@ -353,23 +390,26 @@ private void checkEndOfRowGroup() throws IOException { throw new IOException("expecting more rows but reached last block. Read " + rowsReturned + " out of " + totalRowCount); } - List columns = requestedSchema.getColumns(); - List types = requestedSchema.asGroupType().getFields(); - columnReaders = new VectorizedColumnReader[columns.size()]; - for (int i = 0; i < columns.size(); ++i) { - if (missingColumns[i]) continue; - columnReaders[i] = new VectorizedColumnReader( - columns.get(i), - types.get(i).getLogicalTypeAnnotation(), - pages.getPageReader(columns.get(i)), - pages.getRowIndexes().orElse(null), - convertTz, - datetimeRebaseMode, - datetimeRebaseTz, - int96RebaseMode, - int96RebaseTz, - writerVersion); + for (ParquetColumnVector cv : columnVectors) { + initColumnReader(pages, cv); } totalCountLoadedSoFar += pages.getRowCount(); } + + private void initColumnReader(PageReadStore pages, ParquetColumnVector cv) throws IOException { + if (!missingColumns.contains(cv.getColumn())) { + if (cv.getColumn().isPrimitive()) { + ParquetColumn column = cv.getColumn(); + VectorizedColumnReader reader = new VectorizedColumnReader( + column.descriptor().get(), column.required(), pages, convertTz, datetimeRebaseMode, + datetimeRebaseTz, int96RebaseMode, int96RebaseTz, writerVersion); + cv.setColumnReader(reader); + } else { + // Not in missing columns and is a complex type: this must be a struct + for (ParquetColumnVector childCv : cv.getChildren()) { + initColumnReader(pages, childCv); + } + } + } + } } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java index bd7cbc7e17188..2cc763a5b725e 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java @@ -96,13 +96,13 @@ public VectorizedRleValuesReader(int bitWidth, boolean readLength) { public void initFromPage(int valueCount, ByteBufferInputStream in) throws IOException { this.in = in; if (fixedWidth) { - // initialize for repetition and definition levels + // Initialize for repetition and definition levels if (readLength) { int length = readIntLittleEndian(); this.in = in.sliceStream(length); } } else { - // initialize for values + // Initialize for values if (in.available() > 0) { init(in.read()); } @@ -157,47 +157,52 @@ public int readInteger() { } /** - * Reads a batch of values into vector `values`, using `valueReader`. The related states such - * as row index, offset, number of values left in the batch and page, etc, are tracked by - * `state`. The type-specific `updater` is used to update or skip values. + * Reads a batch of definition levels and values into vector 'defLevels' and 'values' + * respectively. The values are read using 'valueReader'. *

        - * This reader reads the definition levels and then will read from `valueReader` for the - * non-null values. If the value is null, `values` will be populated with null value. + * The related states such as row index, offset, number of values left in the batch and page, + * are tracked by 'state'. The type-specific 'updater' is used to update or skip values. + *

        + * This reader reads the definition levels and then will read from 'valueReader' for the + * non-null values. If the value is null, 'values' will be populated with null value. */ public void readBatch( ParquetReadState state, WritableColumnVector values, + WritableColumnVector defLevels, VectorizedValuesReader valueReader, ParquetVectorUpdater updater) { - readBatchInternal(state, values, values, valueReader, updater); + readBatchInternal(state, values, values, defLevels, valueReader, updater); } /** - * Decoding for dictionary ids. The IDs are populated into `values` and the nullability is - * populated into `nulls`. + * Decoding for dictionary ids. The IDs are populated into 'values' and the nullability is + * populated into 'nulls'. */ public void readIntegers( ParquetReadState state, WritableColumnVector values, WritableColumnVector nulls, - VectorizedValuesReader data) { - readBatchInternal(state, values, nulls, data, new ParquetVectorUpdaterFactory.IntegerUpdater()); + WritableColumnVector defLevels, + VectorizedValuesReader valueReader) { + readBatchInternal(state, values, nulls, defLevels, valueReader, + new ParquetVectorUpdaterFactory.IntegerUpdater()); } private void readBatchInternal( ParquetReadState state, WritableColumnVector values, WritableColumnVector nulls, + WritableColumnVector defLevels, VectorizedValuesReader valueReader, ParquetVectorUpdater updater) { - int offset = state.offset; long rowId = state.rowId; - int leftInBatch = state.valuesToReadInBatch; + int leftInBatch = state.rowsToReadInBatch; int leftInPage = state.valuesToReadInPage; while (leftInBatch > 0 && leftInPage > 0) { - if (this.currentCount == 0) this.readNextGroup(); + if (currentCount == 0 && !readNextGroup()) break; int n = Math.min(leftInBatch, Math.min(leftInPage, this.currentCount)); long rangeStart = state.currentRangeStart(); @@ -210,11 +215,11 @@ private void readBatchInternal( } else if (rowId > rangeEnd) { state.nextRange(); } else { - // the range [rowId, rowId + n) overlaps with the current row range in state + // The range [rowId, rowId + n) overlaps with the current row range in state long start = Math.max(rangeStart, rowId); long end = Math.min(rangeEnd, rowId + n - 1); - // skip the part [rowId, start) + // Skip the part [rowId, start) int toSkip = (int) (start - rowId); if (toSkip > 0) { skipValues(toSkip, state, valueReader, updater); @@ -222,36 +227,347 @@ private void readBatchInternal( leftInPage -= toSkip; } - // read the part [start, end] + // Read the part [start, end] n = (int) (end - start + 1); + readValuesN(n, state, defLevels, values, nulls, valueReader, updater); + + state.levelOffset += n; + leftInBatch -= n; + rowId += n; + leftInPage -= n; + currentCount -= n; + defLevels.addElementsAppended(n); + } + } + + state.rowsToReadInBatch = leftInBatch; + state.valuesToReadInPage = leftInPage; + state.rowId = rowId; + } - switch (mode) { - case RLE: - if (currentValue == state.maxDefinitionLevel) { - updater.readValues(n, offset, values, valueReader); + /** + * Reads a batch of repetition levels, definition levels and values into 'repLevels', + * 'defLevels' and 'values' respectively. The definition levels and values are read via + * 'defLevelsReader' and 'valueReader' respectively. + *

        + * The related states such as row index, offset, number of rows left in the batch and page, + * are tracked by 'state'. The type-specific 'updater' is used to update or skip values. + */ + public void readBatchRepeated( + ParquetReadState state, + WritableColumnVector repLevels, + VectorizedRleValuesReader defLevelsReader, + WritableColumnVector defLevels, + WritableColumnVector values, + VectorizedValuesReader valueReader, + ParquetVectorUpdater updater) { + readBatchRepeatedInternal(state, repLevels, defLevelsReader, defLevels, values, values, true, + valueReader, updater); + } + + /** + * Reads a batch of repetition levels, definition levels and integer values into 'repLevels', + * 'defLevels', 'values' and 'nulls' respectively. The definition levels and values are read via + * 'defLevelsReader' and 'valueReader' respectively. + *

        + * The 'values' vector is used to hold non-null values, while 'nulls' vector is used to hold + * null values. + *

        + * The related states such as row index, offset, number of rows left in the batch and page, + * are tracked by 'state'. + *

        + * Unlike 'readBatchRepeated', this is used to decode dictionary indices in dictionary encoding. + */ + public void readIntegersRepeated( + ParquetReadState state, + WritableColumnVector repLevels, + VectorizedRleValuesReader defLevelsReader, + WritableColumnVector defLevels, + WritableColumnVector values, + WritableColumnVector nulls, + VectorizedValuesReader valueReader) { + readBatchRepeatedInternal(state, repLevels, defLevelsReader, defLevels, values, nulls, false, + valueReader, new ParquetVectorUpdaterFactory.IntegerUpdater()); + } + + /** + * Keep reading repetition level values from the page until either: 1) we've read enough + * top-level rows to fill the current batch, or 2) we've drained the data page completely. + * + * @param valuesReused whether 'values' vector is reused for 'nulls' + */ + public void readBatchRepeatedInternal( + ParquetReadState state, + WritableColumnVector repLevels, + VectorizedRleValuesReader defLevelsReader, + WritableColumnVector defLevels, + WritableColumnVector values, + WritableColumnVector nulls, + boolean valuesReused, + VectorizedValuesReader valueReader, + ParquetVectorUpdater updater) { + + int leftInBatch = state.rowsToReadInBatch; + int leftInPage = state.valuesToReadInPage; + long rowId = state.rowId; + + DefLevelProcessor defLevelProcessor = new DefLevelProcessor(defLevelsReader, state, defLevels, + values, nulls, valuesReused, valueReader, updater); + + while ((leftInBatch > 0 || !state.lastListCompleted) && leftInPage > 0) { + if (currentCount == 0 && !readNextGroup()) break; + + // Values to read in the current RLE/PACKED block, must be <= what's left in the page + int valuesLeftInBlock = Math.min(leftInPage, currentCount); + + // The current row range start and end + long rangeStart = state.currentRangeStart(); + long rangeEnd = state.currentRangeEnd(); + + switch (mode) { + case RLE: + // This RLE block is consist of top-level rows, so we'll need to check + // if the rows should be skipped according to row indexes. + if (currentValue == 0) { + if (leftInBatch == 0) { + state.lastListCompleted = true; } else { - nulls.putNulls(offset, n); + // # of rows to read in the block, must be <= what's left in the current batch + int n = Math.min(leftInBatch, valuesLeftInBlock); + + if (rowId + n < rangeStart) { + // Need to skip all rows in [rowId, rowId + n) + defLevelProcessor.skipValues(n); + rowId += n; + currentCount -= n; + leftInPage -= n; + } else if (rowId > rangeEnd) { + // The current row index already beyond the current range: move to the next range + // and repeat + state.nextRange(); + } else { + // The range [rowId, rowId + n) overlaps with the current row range + long start = Math.max(rangeStart, rowId); + long end = Math.min(rangeEnd, rowId + n - 1); + + // Skip the rows in [rowId, start) + int toSkip = (int) (start - rowId); + if (toSkip > 0) { + defLevelProcessor.skipValues(toSkip); + rowId += toSkip; + currentCount -= toSkip; + leftInPage -= toSkip; + } + + // Read the rows in [start, end] + n = (int) (end - start + 1); + + if (n > 0) { + repLevels.appendInts(n, 0); + defLevelProcessor.readValues(n); + } + + rowId += n; + currentCount -= n; + leftInBatch -= n; + leftInPage -= n; + } + } + } else { + // Not a top-level row: just read all the repetition levels in the block if the row + // should be included according to row indexes, else skip the rows. + if (!state.shouldSkip) { + repLevels.appendInts(valuesLeftInBlock, currentValue); } - break; - case PACKED: - for (int i = 0; i < n; ++i) { - if (currentBuffer[currentBufferIdx++] == state.maxDefinitionLevel) { - updater.readValue(offset + i, values, valueReader); + state.numBatchedDefLevels += valuesLeftInBlock; + leftInPage -= valuesLeftInBlock; + currentCount -= valuesLeftInBlock; + } + break; + case PACKED: + int i = 0; + + for (; i < valuesLeftInBlock; i++) { + int currentValue = currentBuffer[currentBufferIdx + i]; + if (currentValue == 0) { + if (leftInBatch == 0) { + state.lastListCompleted = true; + break; + } else if (rowId < rangeStart) { + // This is a top-level row, therefore check if we should skip it with row indexes + // the row is before the current range, skip it + defLevelProcessor.skipValues(1); + } else if (rowId > rangeEnd) { + // The row is after the current range, move to the next range and compare again + state.nextRange(); + break; } else { - nulls.putNull(offset + i); + // The row is in the current range, decrement the row counter and read it + leftInBatch--; + repLevels.appendInt(0); + defLevelProcessor.readValues(1); + } + rowId++; + } else { + if (!state.shouldSkip) { + repLevels.appendInt(currentValue); } + state.numBatchedDefLevels += 1; } - break; + } + + leftInPage -= i; + currentCount -= i; + currentBufferIdx += i; + break; + } + } + + // Process all the batched def levels + defLevelProcessor.finish(); + + state.rowsToReadInBatch = leftInBatch; + state.valuesToReadInPage = leftInPage; + state.rowId = rowId; + } + + private static class DefLevelProcessor { + private final VectorizedRleValuesReader reader; + private final ParquetReadState state; + private final WritableColumnVector defLevels; + private final WritableColumnVector values; + private final WritableColumnVector nulls; + private final boolean valuesReused; + private final VectorizedValuesReader valueReader; + private final ParquetVectorUpdater updater; + + DefLevelProcessor( + VectorizedRleValuesReader reader, + ParquetReadState state, + WritableColumnVector defLevels, + WritableColumnVector values, + WritableColumnVector nulls, + boolean valuesReused, + VectorizedValuesReader valueReader, + ParquetVectorUpdater updater) { + this.reader = reader; + this.state = state; + this.defLevels = defLevels; + this.values = values; + this.nulls = nulls; + this.valuesReused = valuesReused; + this.valueReader = valueReader; + this.updater = updater; + } + + void readValues(int n) { + if (!state.shouldSkip) { + state.numBatchedDefLevels += n; + } else { + reader.skipValues(state.numBatchedDefLevels, state, valueReader, updater); + state.numBatchedDefLevels = n; + state.shouldSkip = false; + } + } + + void skipValues(int n) { + if (state.shouldSkip) { + state.numBatchedDefLevels += n; + } else { + reader.readValues(state.numBatchedDefLevels, state, defLevels, values, nulls, valuesReused, + valueReader, updater); + state.numBatchedDefLevels = n; + state.shouldSkip = true; + } + } + + void finish() { + if (state.numBatchedDefLevels > 0) { + if (state.shouldSkip) { + reader.skipValues(state.numBatchedDefLevels, state, valueReader, updater); + } else { + reader.readValues(state.numBatchedDefLevels, state, defLevels, values, nulls, + valuesReused, valueReader, updater); } - offset += n; - leftInBatch -= n; - rowId += n; - leftInPage -= n; - currentCount -= n; + state.numBatchedDefLevels = 0; } } + } + + /** + * Read the next 'total' values (either null or non-null) from this definition level reader and + * 'valueReader'. The definition levels are read into 'defLevels'. If a value is not + * null, it is appended to 'values'. Otherwise, a null bit will be set in 'nulls'. + * + * This is only used when reading repeated values. + */ + private void readValues( + int total, + ParquetReadState state, + WritableColumnVector defLevels, + WritableColumnVector values, + WritableColumnVector nulls, + boolean valuesReused, + VectorizedValuesReader valueReader, + ParquetVectorUpdater updater) { + + defLevels.reserveAdditional(total); + values.reserveAdditional(total); + if (!valuesReused) { + // 'nulls' is a separate column vector so we'll have to reserve it separately + nulls.reserveAdditional(total); + } + + int n = total; + int initialValueOffset = state.valueOffset; + while (n > 0) { + if (currentCount == 0 && !readNextGroup()) break; + int num = Math.min(n, this.currentCount); + readValuesN(num, state, defLevels, values, nulls, valueReader, updater); + state.levelOffset += num; + currentCount -= num; + n -= num; + } + + defLevels.addElementsAppended(total); + + int valuesRead = state.valueOffset - initialValueOffset; + values.addElementsAppended(valuesRead); + if (!valuesReused) { + nulls.addElementsAppended(valuesRead); + } + } - state.advanceOffsetAndRowId(offset, rowId); + private void readValuesN( + int n, + ParquetReadState state, + WritableColumnVector defLevels, + WritableColumnVector values, + WritableColumnVector nulls, + VectorizedValuesReader valueReader, + ParquetVectorUpdater updater) { + switch (mode) { + case RLE: + if (currentValue == state.maxDefinitionLevel) { + updater.readValues(n, state.valueOffset, values, valueReader); + } else { + nulls.putNulls(state.valueOffset, n); + } + state.valueOffset += n; + defLevels.putInts(state.levelOffset, n, currentValue); + break; + case PACKED: + for (int i = 0; i < n; ++i) { + int currentValue = currentBuffer[currentBufferIdx++]; + if (currentValue == state.maxDefinitionLevel) { + updater.readValue(state.valueOffset++, values, valueReader); + } else { + nulls.putNull(state.valueOffset++); + } + defLevels.putInt(state.levelOffset + i, currentValue); + } + break; + } } /** @@ -264,11 +580,11 @@ private void skipValues( VectorizedValuesReader valuesReader, ParquetVectorUpdater updater) { while (n > 0) { - if (this.currentCount == 0) this.readNextGroup(); + if (currentCount == 0 && !readNextGroup()) break; int num = Math.min(n, this.currentCount); switch (mode) { case RLE: - // we only need to skip non-null values from `valuesReader` since nulls are represented + // We only need to skip non-null values from `valuesReader` since nulls are represented // via definition levels which are skipped here via decrementing `currentCount`. if (currentValue == state.maxDefinitionLevel) { updater.skipValues(num, valuesReader); @@ -276,7 +592,7 @@ private void skipValues( break; case PACKED: for (int i = 0; i < num; ++i) { - // same as above, only skip non-null values from `valuesReader` + // Same as above, only skip non-null values from `valuesReader` if (currentBuffer[currentBufferIdx++] == state.maxDefinitionLevel) { updater.skipValues(1, valuesReader); } @@ -295,7 +611,7 @@ private void skipValues( public void readIntegers(int total, WritableColumnVector c, int rowId) { int left = total; while (left > 0) { - if (this.currentCount == 0) this.readNextGroup(); + if (currentCount == 0 && !readNextGroup()) break; int n = Math.min(left, this.currentCount); switch (mode) { case RLE: @@ -505,9 +821,14 @@ private int readIntLittleEndianPaddedOnBitWidth() throws IOException { } /** - * Reads the next group. + * Reads the next group. Returns false if no more group available. */ - private void readNextGroup() { + private boolean readNextGroup() { + if (in.available() <= 0) { + currentCount = 0; + return false; + } + try { int header = readUnsignedVarInt(); this.mode = (header & 1) == 0 ? MODE.RLE : MODE.PACKED; @@ -515,7 +836,7 @@ private void readNextGroup() { case RLE: this.currentCount = header >>> 1; this.currentValue = readIntLittleEndianPaddedOnBitWidth(); - return; + break; case PACKED: int numGroups = header >>> 1; this.currentCount = numGroups * 8; @@ -531,13 +852,15 @@ private void readNextGroup() { this.packer.unpack8Values(buffer, buffer.position(), this.currentBuffer, valueIndex); valueIndex += 8; } - return; + break; default: throw new ParquetDecodingException("not a valid mode " + this.mode); } } catch (IOException e) { throw new ParquetDecodingException("Failed to read from input stream", e); } + + return true; } /** @@ -546,7 +869,7 @@ private void readNextGroup() { private void skipValues(int n) { int left = n; while (left > 0) { - if (this.currentCount == 0) this.readNextGroup(); + if (this.currentCount == 0 && !readNextGroup()) break; int num = Math.min(left, this.currentCount); switch (mode) { case RLE: diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java index d246a3c24e4a6..505377bdb683e 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java @@ -127,7 +127,7 @@ public void putNotNulls(int rowId, int count) { @Override public boolean isNullAt(int rowId) { - return nulls[rowId] == 1; + return isAllNull || nulls[rowId] == 1; } // diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java index ae457a16123d2..9ffb733a461ad 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java @@ -53,7 +53,7 @@ public abstract class WritableColumnVector extends ColumnVector { * Resets this column for writing. The currently stored values are no longer accessible. */ public void reset() { - if (isConstant) return; + if (isConstant || isAllNull) return; if (childColumns != null) { for (WritableColumnVector c: childColumns) { @@ -83,6 +83,10 @@ public void close() { dictionary = null; } + public void reserveAdditional(int additionalCapacity) { + reserve(elementsAppended + additionalCapacity); + } + public void reserve(int requiredCapacity) { if (requiredCapacity < 0) { throwUnsupportedException(requiredCapacity, null); @@ -117,7 +121,7 @@ private void throwUnsupportedException(int requiredCapacity, Throwable cause) { @Override public boolean hasNull() { - return numNulls > 0; + return isAllNull || numNulls > 0; } @Override @@ -714,15 +718,47 @@ public WritableColumnVector arrayData() { public WritableColumnVector getChild(int ordinal) { return childColumns[ordinal]; } /** - * Returns the elements appended. + * Returns the number of child vectors. + */ + public int getNumChildren() { + return childColumns.length; + } + + /** + * Returns the elements appended. This is useful */ public final int getElementsAppended() { return elementsAppended; } + /** + * Increment number of elements appended by 'num'. + * + * This is useful when one wants to use the 'putXXX' API to add new elements to the vector, but + * still want to keep count of how many elements have been added (since the 'putXXX' APIs don't + * increment count). + */ + public final void addElementsAppended(int num) { + elementsAppended += num; + } + /** * Marks this column as being constant. */ public final void setIsConstant() { isConstant = true; } + /** + * Marks this column only contains null values. + */ + public final void setAllNull() { + isAllNull = true; + } + + /** + * Whether this column only contains null values. + */ + public final boolean isAllNull() { + return isAllNull; + } + /** * Maximum number of rows that can be stored in this column. */ @@ -745,6 +781,12 @@ public WritableColumnVector arrayData() { */ protected boolean isConstant; + /** + * True if this column only contains nulls. This means the column values never change, even + * across resets. Comparing to 'isConstant' above, this doesn't require any allocation of space. + */ + protected boolean isAllNull; + /** * Default size of each array length value. This grows as necessary. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index 18876dedb951e..44dc145d36e68 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -45,6 +45,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjectio import org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.errors.QueryExecutionErrors +import org.apache.spark.sql.execution.WholeStageCodegenExec import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.vectorized.{OffHeapColumnVector, OnHeapColumnVector} import org.apache.spark.sql.internal.SQLConf @@ -173,8 +174,8 @@ class ParquetFileFormat override def supportBatch(sparkSession: SparkSession, schema: StructType): Boolean = { val conf = sparkSession.sessionState.conf conf.parquetVectorizedReaderEnabled && conf.wholeStageEnabled && - schema.length <= conf.wholeStageMaxNumFields && - schema.forall(_.dataType.isInstanceOf[AtomicType]) + ParquetUtils.isBatchReadSupportedForSchema(conf, schema) && + !WholeStageCodegenExec.isTooManyFields(conf, schema) } override def vectorTypes( @@ -240,8 +241,7 @@ class ParquetFileFormat val sqlConf = sparkSession.sessionState.conf val enableOffHeapColumnVector = sqlConf.offHeapColumnVectorEnabled val enableVectorizedReader: Boolean = - sqlConf.parquetVectorizedReaderEnabled && - resultSchema.forall(_.dataType.isInstanceOf[AtomicType]) + ParquetUtils.isBatchReadSupportedForSchema(sqlConf, resultSchema) val enableRecordFilter: Boolean = sqlConf.parquetRecordFilterEnabled val timestampConversion: Boolean = sqlConf.isParquetINT96TimestampConversion val capacity = sqlConf.parquetVectorizedReaderBatchSize diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala index 34a4eb8c002d6..0e065f19a88a4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.execution.datasources.parquet +import java.util.Locale + import org.apache.hadoop.conf.Configuration import org.apache.parquet.io.{ColumnIO, ColumnIOFactory, GroupColumnIO, PrimitiveColumnIO} import org.apache.parquet.schema._ @@ -92,10 +94,16 @@ class ParquetToSparkSchemaConverter( private def convertInternal( groupColumn: GroupColumnIO, sparkReadSchema: Option[StructType] = None): ParquetColumn = { + // First convert the read schema into a map from field name to the field itself, to avoid O(n) + // lookup cost below. + val schemaMapOpt = sparkReadSchema.map { schema => + schema.map(f => normalizeFieldName(f.name) -> f).toMap + } + val converted = (0 until groupColumn.getChildrenCount).map { i => val field = groupColumn.getChild(i) - val fieldFromReadSchema = sparkReadSchema.flatMap { schema => - schema.find(f => isSameFieldName(f.name, field.getName, caseSensitive)) + val fieldFromReadSchema = schemaMapOpt.flatMap { schemaMap => + schemaMap.get(normalizeFieldName(field.getName)) } var fieldReadType = fieldFromReadSchema.map(_.dataType) @@ -146,9 +154,8 @@ class ParquetToSparkSchemaConverter( ParquetColumn(StructType(converted.map(_._1)), groupColumn, converted.map(_._2)) } - private def isSameFieldName(left: String, right: String, caseSensitive: Boolean): Boolean = - if (!caseSensitive) left.equalsIgnoreCase(right) - else left == right + private def normalizeFieldName(name: String): String = + if (caseSensitive) name else name.toLowerCase(Locale.ROOT) /** * Converts a Parquet [[Type]] to a [[ParquetColumn]] which wraps a Spark SQL [[DataType]] with diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala index 2c565c8890e70..9f2e6580ecb46 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala @@ -36,8 +36,9 @@ import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec import org.apache.spark.sql.connector.expressions.aggregate.{Aggregation, Count, CountStar, Max, Min} import org.apache.spark.sql.execution.datasources.AggregatePushDownUtils import org.apache.spark.sql.execution.datasources.v2.V2ColumnUtils +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.{LegacyBehaviorPolicy, PARQUET_AGGREGATE_PUSHDOWN_ENABLED} -import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, StructType} +import org.apache.spark.sql.types.{ArrayType, AtomicType, DataType, MapType, StructField, StructType} object ParquetUtils { def inferSchema( @@ -187,6 +188,30 @@ object ParquetUtils { } } + /** + * Whether columnar read is supported for the input `schema`. + */ + def isBatchReadSupportedForSchema(sqlConf: SQLConf, schema: StructType): Boolean = + sqlConf.parquetVectorizedReaderEnabled && + schema.forall(f => isBatchReadSupported(sqlConf, f.dataType)) + + def isBatchReadSupported(sqlConf: SQLConf, dt: DataType): Boolean = dt match { + case _: AtomicType => + true + case at: ArrayType => + sqlConf.parquetVectorizedReaderNestedColumnEnabled && + isBatchReadSupported(sqlConf, at.elementType) + case mt: MapType => + sqlConf.parquetVectorizedReaderNestedColumnEnabled && + isBatchReadSupported(sqlConf, mt.keyType) && + isBatchReadSupported(sqlConf, mt.valueType) + case st: StructType => + sqlConf.parquetVectorizedReaderNestedColumnEnabled && + st.fields.forall(f => isBatchReadSupported(sqlConf, f.dataType)) + case _ => + false + } + /** * When the partial aggregates (Max/Min/Count) are pushed down to Parquet, we don't need to * createRowBaseReader to read data from Parquet and aggregate at Spark layer. Instead we want diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala index 12b8a631196ae..ea4f5e0d287ab 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala @@ -70,8 +70,8 @@ case class ParquetPartitionReaderFactory( private val isCaseSensitive = sqlConf.caseSensitiveAnalysis private val resultSchema = StructType(partitionSchema.fields ++ readDataSchema.fields) private val enableOffHeapColumnVector = sqlConf.offHeapColumnVectorEnabled - private val enableVectorizedReader: Boolean = sqlConf.parquetVectorizedReaderEnabled && - resultSchema.forall(_.dataType.isInstanceOf[AtomicType]) + private val enableVectorizedReader: Boolean = + ParquetUtils.isBatchReadSupportedForSchema(sqlConf, resultSchema) private val enableRecordFilter: Boolean = sqlConf.parquetRecordFilterEnabled private val timestampConversion: Boolean = sqlConf.isParquetINT96TimestampConversion private val capacity = sqlConf.parquetVectorizedReaderBatchSize diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out index f5e5b46d29ce6..f98fb1eb2a57a 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out @@ -1125,7 +1125,8 @@ struct -- !query output == Physical Plan == *Filter v#x IN ([a],null) -+- FileScan parquet default.t[v#x] Batched: false, DataFilters: [v#x IN ([a],null)], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/t], PartitionFilters: [], PushedFilters: [In(v, [[a],null])], ReadSchema: struct> ++- *ColumnarToRow + +- FileScan parquet default.t[v#x] Batched: true, DataFilters: [v#x IN ([a],null)], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/t], PartitionFilters: [], PushedFilters: [In(v, [[a],null])], ReadSchema: struct> -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out index 4e552d51a3953..a563eda1e7b04 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out @@ -1067,7 +1067,8 @@ struct -- !query output == Physical Plan == *Filter v#x IN ([a],null) -+- FileScan parquet default.t[v#x] Batched: false, DataFilters: [v#x IN ([a],null)], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/t], PartitionFilters: [], PushedFilters: [In(v, [[a],null])], ReadSchema: struct> ++- *ColumnarToRow + +- FileScan parquet default.t[v#x] Batched: true, DataFilters: [v#x IN ([a],null)], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/t], PartitionFilters: [], PushedFilters: [In(v, [[a],null])], ReadSchema: struct> -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileBasedDataSourceTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileBasedDataSourceTest.scala index c2dc20b0099a3..a154a6566307a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileBasedDataSourceTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileBasedDataSourceTest.scala @@ -38,6 +38,8 @@ private[sql] trait FileBasedDataSourceTest extends SQLTestUtils { protected val dataSourceName: String // The SQL config key for enabling vectorized reader. protected val vectorizedReaderEnabledKey: String + // The SQL config key for enabling vectorized reader for nested types. + protected val vectorizedReaderNestedEnabledKey: String /** * Reads data source file from given `path` as `DataFrame` and passes it to given function. @@ -52,8 +54,11 @@ private[sql] trait FileBasedDataSourceTest extends SQLTestUtils { f(spark.read.format(dataSourceName).load(path.toString)) } if (testVectorized) { - withSQLConf(vectorizedReaderEnabledKey -> "true") { - f(spark.read.format(dataSourceName).load(path.toString)) + Seq(true, false).foreach { enableNested => + withSQLConf(vectorizedReaderEnabledKey -> "true", + vectorizedReaderNestedEnabledKey -> enableNested.toString) { + f(spark.read.format(dataSourceName).load(path)) + } } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala index c36bfd9362466..46a7f8d3d90de 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala @@ -56,6 +56,8 @@ trait OrcTest extends QueryTest with FileBasedDataSourceTest with BeforeAndAfter override protected val dataSourceName: String = "orc" override protected val vectorizedReaderEnabledKey: String = SQLConf.ORC_VECTORIZED_READER_ENABLED.key + override protected val vectorizedReaderNestedEnabledKey: String = + SQLConf.ORC_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key protected override def beforeAll(): Unit = { super.beforeAll() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1SchemaPruningSuite.scala index 2ce38dae47db4..4d33eacecc130 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1SchemaPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1SchemaPruningSuite.scala @@ -25,6 +25,8 @@ class OrcV1SchemaPruningSuite extends SchemaPruningSuite { override protected val dataSourceName: String = "orc" override protected val vectorizedReaderEnabledKey: String = SQLConf.ORC_VECTORIZED_READER_ENABLED.key + override protected val vectorizedReaderNestedEnabledKey: String = + SQLConf.ORC_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key override protected def sparkConf: SparkConf = super diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala index 47254f4231d57..107a2b7912029 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala @@ -29,6 +29,8 @@ class OrcV2SchemaPruningSuite extends SchemaPruningSuite with AdaptiveSparkPlanH override protected val dataSourceName: String = "orc" override protected val vectorizedReaderEnabledKey: String = SQLConf.ORC_VECTORIZED_READER_ENABLED.key + override protected val vectorizedReaderNestedEnabledKey: String = + SQLConf.ORC_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key override protected def sparkConf: SparkConf = super diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnIndexSuite.scala index bdcc1a4a5b959..64bfcdadcf45e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnIndexSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnIndexSuite.scala @@ -102,4 +102,17 @@ class ParquetColumnIndexSuite extends QueryTest with ParquetTest with SharedSpar }.toDF() checkUnalignedPages(df)(actions: _*) } + + test("reading unaligned pages - struct type") { + val df = (0 until 2000).map(i => Tuple1((i.toLong, i + ":" + "o" * (i / 100)))).toDF("s") + checkUnalignedPages(df)( + df => df.filter("s._1 = 500"), + df => df.filter("s._1 = 500 or s._1 = 1500"), + df => df.filter("s._1 = 500 or s._1 = 501 or s._1 = 1500"), + df => df.filter("s._1 = 500 or s._1 = 501 or s._1 = 1000 or s._1 = 1500"), + // range filter + df => df.filter("s._1 >= 500 and s._1 < 1000"), + df => df.filter("(s._1 >= 500 and s._1 < 1000) or (s._1 >= 1500 and s._1 < 1600)") + ) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala index 4eab5c3a09279..a9a8dacc374f0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala @@ -89,6 +89,43 @@ abstract class ParquetFileFormatSuite } } } + + test("support batch reads for schema") { + val testUDT = new TestUDT.MyDenseVectorUDT + Seq(true, false).foreach { enabled => + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> enabled.toString) { + Seq( + Seq(StructField("f1", IntegerType), StructField("f2", BooleanType)) -> true, + Seq(StructField("f1", IntegerType), StructField("f2", ArrayType(IntegerType))) -> enabled, + Seq(StructField("f1", BooleanType), StructField("f2", testUDT)) -> false + ).foreach { case (schema, expected) => + assert(ParquetUtils.isBatchReadSupportedForSchema(conf, StructType(schema)) == expected) + } + } + } + } + + test("support batch reads for data type") { + val testUDT = new TestUDT.MyDenseVectorUDT + Seq(true, false).foreach { enabled => + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> enabled.toString) { + Seq( + IntegerType -> true, + BooleanType -> true, + ArrayType(TimestampType) -> enabled, + StructType(Seq(StructField("f1", DecimalType.SYSTEM_DEFAULT), + StructField("f2", StringType))) -> enabled, + MapType(keyType = LongType, valueType = DateType) -> enabled, + testUDT -> false, + ArrayType(testUDT) -> false, + StructType(Seq(StructField("f1", ByteType), StructField("f2", testUDT))) -> false, + MapType(keyType = testUDT, valueType = BinaryType) -> false + ).foreach { case (dt, expected) => + assert(ParquetUtils.isBatchReadSupported(conf, dt) == expected) + } + } + } + } } class ParquetFileFormatV1Suite extends ParquetFileFormatSuite { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala index 99b2d9844ed1b..4d01db999fba5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala @@ -358,6 +358,357 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession } } + test("vectorized reader: array") { + val data = Seq( + Tuple1(null), + Tuple1(Seq()), + Tuple1(Seq("a", "b", "c")), + Tuple1(Seq(null)) + ) + + withParquetFile(data) { file => + readParquetFile(file) { df => + checkAnswer(df.sort("_1"), + Row(null) :: Row(Seq()) :: Row(Seq(null)) :: Row(Seq("a", "b", "c")) :: Nil + ) + } + } + } + + test("vectorized reader: missing array") { + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "true") { + val data = Seq( + Tuple1(null), + Tuple1(Seq()), + Tuple1(Seq("a", "b", "c")), + Tuple1(Seq(null)) + ) + + val readSchema = new StructType().add("_2", new ArrayType( + new StructType().add("a", LongType, nullable = true), + containsNull = true) + ) + + withParquetFile(data) { file => + checkAnswer(spark.read.schema(readSchema).parquet(file), + Row(null) :: Row(null) :: Row(null) :: Row(null) :: Nil + ) + } + } + } + + test("vectorized reader: array of array") { + val data = Seq( + Tuple1(Seq(Seq(0, 1), Seq(2, 3))), + Tuple1(Seq(Seq(4, 5), Seq(6, 7))) + ) + + withParquetFile(data) { file => + readParquetFile(file) { df => + checkAnswer(df.sort("_1"), + Row(Seq(Seq(0, 1), Seq(2, 3))) :: Row(Seq(Seq(4, 5), Seq(6, 7))) :: Nil + ) + } + } + } + + test("vectorized reader: struct of array") { + val data = Seq( + Tuple1(Tuple2("a", null)), + Tuple1(null), + Tuple1(Tuple2(null, null)), + Tuple1(Tuple2(null, Seq("b", "c"))), + Tuple1(Tuple2("d", Seq("e", "f"))), + Tuple1(null) + ) + + withParquetFile(data) { file => + readParquetFile(file) { df => + checkAnswer(df, + Row(Row("a", null)) :: Row(null) :: Row(Row(null, null)) :: + Row(Row(null, Seq("b", "c"))) :: Row(Row("d", Seq("e", "f"))) :: Row(null) :: Nil + ) + } + } + } + + test("vectorized reader: array of struct") { + val data = Seq( + Tuple1(null), + Tuple1(Seq()), + Tuple1(Seq(Tuple2("a", null), Tuple2(null, "b"))), + Tuple1(Seq(null)), + Tuple1(Seq(Tuple2(null, null), Tuple2("c", null), null)), + Tuple1(Seq()) + ) + + withParquetFile(data) { file => + readParquetFile(file) { df => + checkAnswer(df, + Row(null) :: + Row(Seq()) :: + Row(Seq(Row("a", null), Row(null, "b"))) :: + Row(Seq(null)) :: + Row(Seq(Row(null, null), Row("c", null), null)) :: + Row(Seq()) :: + Nil) + } + } + } + + + test("vectorized reader: array of nested struct") { + val data = Seq( + Tuple1(Tuple2("a", null)), + Tuple1(Tuple2("b", Seq(Tuple2("c", "d")))), + Tuple1(null), + Tuple1(Tuple2("e", Seq(Tuple2("f", null), Tuple2(null, "g")))), + Tuple1(Tuple2(null, null)), + Tuple1(Tuple2(null, Seq(null))), + Tuple1(Tuple2(null, Seq(Tuple2(null, null), Tuple2("h", null), null))), + Tuple1(Tuple2("i", Seq())), + Tuple1(null) + ) + + withParquetFile(data) { file => + readParquetFile(file) { df => + checkAnswer(df, + Row(Row("a", null)) :: + Row(Row("b", Seq(Row("c", "d")))) :: + Row(null) :: + Row(Row("e", Seq(Row("f", null), Row(null, "g")))) :: + Row(Row(null, null)) :: + Row(Row(null, Seq(null))) :: + Row(Row(null, Seq(Row(null, null), Row("h", null), null))) :: + Row(Row("i", Seq())) :: + Row(null) :: + Nil) + } + } + } + + test("vectorized reader: required array with required elements") { + Seq(true, false).foreach { dictionaryEnabled => + def makeRawParquetFile(path: Path, expected: Seq[Seq[String]]): Unit = { + val schemaStr = + """message spark_schema { + | required group _1 (LIST) { + | repeated group list { + | required binary element (UTF8); + | } + | } + |} + """.stripMargin + val schema = MessageTypeParser.parseMessageType(schemaStr) + val writer = createParquetWriter(schema, path, dictionaryEnabled) + + val factory = new SimpleGroupFactory(schema) + expected.foreach { values => + val group = factory.newGroup() + val list = group.addGroup(0) + values.foreach { value => + list.addGroup(0).append("element", value) + } + writer.write(group) + } + writer.close() + } + + // write the following into the Parquet file: + // 0: [ "a", "b" ] + // 1: [ ] + // 2: [ "c", "d" ] + withTempDir { dir => + val path = new Path(dir.toURI.toString, "part-r-0.parquet") + val expected = Seq(Seq("a", "b"), Seq(), Seq("c", "d")) + makeRawParquetFile(path, expected) + readParquetFile(path.toString) { df => checkAnswer(df, expected.map(Row(_))) } + } + } + } + + test("vectorized reader: optional array with required elements") { + Seq(true, false).foreach { dictionaryEnabled => + def makeRawParquetFile(path: Path, expected: Seq[Seq[String]]): Unit = { + val schemaStr = + """message spark_schema { + | optional group _1 (LIST) { + | repeated group list { + | required binary element (UTF8); + | } + | } + |} + """.stripMargin + val schema = MessageTypeParser.parseMessageType(schemaStr) + val writer = createParquetWriter(schema, path, dictionaryEnabled) + + val factory = new SimpleGroupFactory(schema) + expected.foreach { values => + val group = factory.newGroup() + if (values != null) { + val list = group.addGroup(0) + values.foreach { value => + list.addGroup(0).append("element", value) + } + } + writer.write(group) + } + writer.close() + } + + // write the following into the Parquet file: + // 0: [ "a", "b" ] + // 1: null + // 2: [ "c", "d" ] + // 3: [ ] + // 4: [ "e", "f" ] + withTempDir { dir => + val path = new Path(dir.toURI.toString, "part-r-0.parquet") + val expected = Seq(Seq("a", "b"), null, Seq("c", "d"), Seq(), Seq("e", "f")) + makeRawParquetFile(path, expected) + readParquetFile(path.toString) { df => checkAnswer(df, expected.map(Row(_))) } + } + } + } + + test("vectorized reader: required array with optional elements") { + Seq(true, false).foreach { dictionaryEnabled => + def makeRawParquetFile(path: Path, expected: Seq[Seq[String]]): Unit = { + val schemaStr = + """message spark_schema { + | required group _1 (LIST) { + | repeated group list { + | optional binary element (UTF8); + | } + | } + |} + """.stripMargin + val schema = MessageTypeParser.parseMessageType(schemaStr) + val writer = createParquetWriter(schema, path, dictionaryEnabled) + + val factory = new SimpleGroupFactory(schema) + expected.foreach { values => + val group = factory.newGroup() + if (values != null) { + val list = group.addGroup(0) + values.foreach { value => + val group = list.addGroup(0) + if (value != null) group.append("element", value) + } + } + writer.write(group) + } + writer.close() + } + + // write the following into the Parquet file: + // 0: [ "a", null ] + // 3: [ ] + // 4: [ null, "b" ] + withTempDir { dir => + val path = new Path(dir.toURI.toString, "part-r-0.parquet") + val expected = Seq(Seq("a", null), Seq(), Seq(null, "b")) + makeRawParquetFile(path, expected) + readParquetFile(path.toString) { df => checkAnswer(df, expected.map(Row(_))) } + } + } + } + + test("vectorized reader: required array with legacy format") { + Seq(true, false).foreach { dictionaryEnabled => + def makeRawParquetFile(path: Path, expected: Seq[Seq[String]]): Unit = { + val schemaStr = + """message spark_schema { + | repeated binary element (UTF8); + |} + """.stripMargin + val schema = MessageTypeParser.parseMessageType(schemaStr) + val writer = createParquetWriter(schema, path, dictionaryEnabled) + + val factory = new SimpleGroupFactory(schema) + expected.foreach { values => + val group = factory.newGroup() + values.foreach(group.append("element", _)) + writer.write(group) + } + writer.close() + } + + // write the following into the Parquet file: + // 0: [ "a", "b" ] + // 3: [ ] + // 4: [ "c", "d" ] + withTempDir { dir => + val path = new Path(dir.toURI.toString, "part-r-0.parquet") + val expected = Seq(Seq("a", "b"), Seq(), Seq("c", "d")) + makeRawParquetFile(path, expected) + readParquetFile(path.toString) { df => checkAnswer(df, expected.map(Row(_))) } + } + } + } + + test("vectorized reader: struct") { + val data = Seq( + Tuple1(null), + Tuple1((1, "a")), + Tuple1((2, null)), + Tuple1((3, "b")), + Tuple1(null) + ) + + withParquetFile(data) { file => + readParquetFile(file) { df => + checkAnswer(df.sort("_1"), + Row(null) :: Row(null) :: Row(Row(1, "a")) :: Row(Row(2, null)) :: Row(Row(3, "b")) :: Nil + ) + } + } + } + + test("vectorized reader: missing all struct fields") { + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "true") { + val data = Seq( + Tuple1((1, "a")), + Tuple1((2, null)), + Tuple1(null) + ) + + val readSchema = new StructType().add("_1", + new StructType() + .add("_3", IntegerType, nullable = true) + .add("_4", LongType, nullable = true), + nullable = true) + + withParquetFile(data) { file => + checkAnswer(spark.read.schema(readSchema).parquet(file), + Row(null) :: Row(null) :: Row(null) :: Nil + ) + } + } + } + + test("vectorized reader: missing some struct fields") { + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "true") { + val data = Seq( + Tuple1((1, "a")), + Tuple1((2, null)), + Tuple1(null) + ) + + val readSchema = new StructType().add("_1", + new StructType() + .add("_1", IntegerType, nullable = true) + .add("_3", LongType, nullable = true), + nullable = true) + + withParquetFile(data) { file => + checkAnswer(spark.read.schema(readSchema).parquet(file), + Row(null) :: Row(Row(1, null)) :: Row(Row(2, null)) :: Nil + ) + } + } + } + test("SPARK-34817: Support for unsigned Parquet logical types") { val parquetSchema = MessageTypeParser.parseMessageType( """message root { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala index cab93bd96fff4..6a93b72472c73 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala @@ -31,6 +31,8 @@ abstract class ParquetSchemaPruningSuite extends SchemaPruningSuite with Adaptiv override protected val dataSourceName: String = "parquet" override protected val vectorizedReaderEnabledKey: String = SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key + override protected val vectorizedReaderNestedEnabledKey: String = + SQLConf.ORC_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala index 18690844d484c..9eca308f16fcc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTest.scala @@ -51,6 +51,8 @@ private[sql] trait ParquetTest extends FileBasedDataSourceTest { override protected val dataSourceName: String = "parquet" override protected val vectorizedReaderEnabledKey: String = SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key + override protected val vectorizedReaderNestedEnabledKey: String = + SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key /** * Reads the parquet file at `path` diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorizedSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorizedSuite.scala index 36a52b60688e9..94509185cc121 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorizedSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorizedSuite.scala @@ -195,6 +195,290 @@ class ParquetVectorizedSuite extends QueryTest with ParquetTest with SharedSpark } } + test("nested type - single page, no column index") { + (1 to 4).foreach { batchSize => + Seq(true, false).foreach { dictionaryEnabled => + testNestedStringArrayOneLevel(None, None, Seq(4), + Seq(Seq("a", "b", "c", "d")), + Seq(0, 1, 1, 1), Seq(3, 3, 3, 3), Seq("a", "b", "c", "d"), batchSize, + dictionaryEnabled = dictionaryEnabled) + + testNestedStringArrayOneLevel(None, None, Seq(4), + Seq(Seq("a", "b"), Seq("c", "d")), + Seq(0, 1, 0, 1), Seq(3, 3, 3, 3), Seq("a", "b", "c", "d"), batchSize, + dictionaryEnabled = dictionaryEnabled) + + testNestedStringArrayOneLevel(None, None, Seq(4), + Seq(Seq("a"), Seq("b"), Seq("c"), Seq("d")), + Seq(0, 0, 0, 0), Seq(3, 3, 3, 3), Seq("a", "b", "c", "d"), batchSize, + dictionaryEnabled = dictionaryEnabled) + + testNestedStringArrayOneLevel(None, None, Seq(4), + Seq(Seq("a"), Seq(null), Seq("c"), Seq(null)), + Seq(0, 0, 0, 0), Seq(3, 2, 3, 2), Seq("a", null, "c", null), batchSize, + dictionaryEnabled = dictionaryEnabled) + + testNestedStringArrayOneLevel(None, None, Seq(4), + Seq(Seq("a"), Seq(null, null, null)), + Seq(0, 0, 1, 1), Seq(3, 2, 2, 2), Seq("a", null, null, null), batchSize, + dictionaryEnabled = dictionaryEnabled) + + testNestedStringArrayOneLevel(None, None, Seq(6), + Seq(Seq("a"), Seq(null, null, null), null, Seq()), + Seq(0, 0, 1, 1, 0, 0), Seq(3, 2, 2, 2, 0, 1), Seq("a", null, null, null, null, null), + batchSize, dictionaryEnabled = dictionaryEnabled) + + testNestedStringArrayOneLevel(None, None, Seq(8), + Seq(Seq("a"), Seq(), Seq(), null, Seq("b", null, "c"), null), + Seq(0, 0, 0, 0, 0, 1, 1, 0), Seq(3, 1, 1, 0, 3, 2, 3, 0), + Seq("a", null, null, null, "b", null, "c", null), batchSize, + dictionaryEnabled = dictionaryEnabled) + } + } + } + + test("nested type - multiple page, no column index") { + BATCH_SIZE_CONFIGS.foreach { batchSize => + Seq(Seq(2, 3, 2, 3)).foreach { pageSizes => + Seq(true, false).foreach { dictionaryEnabled => + testNestedStringArrayOneLevel(None, None, pageSizes, + Seq(Seq("a"), Seq(), Seq("b", null, "c"), Seq("d", "e"), Seq(null), Seq(), null), + Seq(0, 0, 0, 1, 1, 0, 1, 0, 0, 0), Seq(3, 1, 3, 2, 3, 3, 3, 2, 1, 0), + Seq("a", null, "b", null, "c", "d", "e", null, null, null), batchSize, + dictionaryEnabled = dictionaryEnabled) + } + } + } + } + + test("nested type - multiple page, no column index, batch span multiple pages") { + (1 to 6).foreach { batchSize => + Seq(true, false).foreach { dictionaryEnabled => + // a list across multiple pages + testNestedStringArrayOneLevel(None, None, Seq(1, 5), + Seq(Seq("a"), Seq("b", "c", "d", "e", "f")), + Seq(0, 0, 1, 1, 1, 1), Seq.fill(6)(3), Seq("a", "b", "c", "d", "e", "f"), batchSize, + dictionaryEnabled = dictionaryEnabled) + + testNestedStringArrayOneLevel(None, None, Seq(1, 3, 2), + Seq(Seq("a"), Seq("b", "c", "d"), Seq("e", "f")), + Seq(0, 0, 1, 1, 0, 1), Seq.fill(6)(3), Seq("a", "b", "c", "d", "e", "f"), batchSize, + dictionaryEnabled = dictionaryEnabled) + + testNestedStringArrayOneLevel(None, None, Seq(2, 2, 2), + Seq(Seq("a", "b"), Seq("c", "d"), Seq("e", "f")), + Seq(0, 1, 0, 1, 0, 1), Seq.fill(6)(3), Seq("a", "b", "c", "d", "e", "f"), batchSize, + dictionaryEnabled = dictionaryEnabled) + } + } + } + + test("nested type - RLE encoding") { + (1 to 8).foreach { batchSize => + Seq(Seq(26), Seq(4, 3, 11, 4, 4), Seq(18, 8)).foreach { pageSizes => + Seq(true, false).foreach { dictionaryEnabled => + testNestedStringArrayOneLevel(None, None, pageSizes, + (0 to 6).map(i => Seq(('a' + i).toChar.toString)) ++ + Seq((7 to 17).map(i => ('a' + i).toChar.toString)) ++ + (18 to 25).map(i => Seq(('a' + i).toChar.toString)), + Seq.fill(8)(0) ++ Seq.fill(10)(1) ++ Seq.fill(8)(0), Seq.fill(26)(3), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + } + } + } + } + + test("nested type - column index with ranges") { + (1 to 8).foreach { batchSize => + Seq(Seq(8), Seq(6, 2), Seq(1, 5, 2)).foreach { pageSizes => + Seq(true, false).foreach { dictionaryEnabled => + var ranges = Seq((1L, 2L)) + testNestedStringArrayOneLevel(None, Some(ranges), pageSizes, + Seq(Seq("b", "c", "d", "e", "f"), Seq("g", "h")), + Seq(0, 0, 1, 1, 1, 1, 0, 1), Seq.fill(8)(3), + Seq("a", "b", "c", "d", "e", "f", "g", "h"), + batchSize, dictionaryEnabled = dictionaryEnabled) + + ranges = Seq((3L, 5L)) + testNestedStringArrayOneLevel(None, Some(ranges), pageSizes, + Seq(), + Seq(0, 0, 1, 1, 1, 1, 0, 1), Seq.fill(8)(3), + Seq("a", "b", "c", "d", "e", "f", "g", "h"), + batchSize, dictionaryEnabled = dictionaryEnabled) + + ranges = Seq((0L, 0L)) + testNestedStringArrayOneLevel(None, Some(ranges), pageSizes, + Seq(Seq("a")), + Seq(0, 0, 1, 1, 1, 1, 0, 1), Seq.fill(8)(3), + Seq("a", "b", "c", "d", "e", "f", "g", "h"), + batchSize, dictionaryEnabled = dictionaryEnabled) + } + } + } + } + + test("nested type - column index with ranges and RLE encoding") { + BATCH_SIZE_CONFIGS.foreach { batchSize => + Seq(Seq(26), Seq(4, 3, 11, 4, 4), Seq(18, 8)).foreach { pageSizes => + Seq(true, false).foreach { dictionaryEnabled => + var ranges = Seq((0L, 2L)) + testNestedStringArrayOneLevel(None, Some(ranges), pageSizes, + Seq(Seq("a"), Seq("b"), Seq("c")), + Seq.fill(8)(0) ++ Seq.fill(10)(1) ++ Seq.fill(8)(0), Seq.fill(26)(3), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + + ranges = Seq((4L, 6L)) + testNestedStringArrayOneLevel(None, Some(ranges), pageSizes, + Seq(Seq("e"), Seq("f"), Seq("g")), + Seq.fill(8)(0) ++ Seq.fill(10)(1) ++ Seq.fill(8)(0), Seq.fill(26)(3), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + + ranges = Seq((6L, 9L)) + testNestedStringArrayOneLevel(None, Some(ranges), pageSizes, + Seq(Seq("g")) ++ Seq((7 to 17).map(i => ('a' + i).toChar.toString)) ++ + Seq(Seq("s"), Seq("t")), + Seq.fill(8)(0) ++ Seq.fill(10)(1) ++ Seq.fill(8)(0), Seq.fill(26)(3), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + + ranges = Seq((4L, 6L), (14L, 20L)) + testNestedStringArrayOneLevel(None, Some(ranges), pageSizes, + Seq(Seq("e"), Seq("f"), Seq("g"), Seq("y"), Seq("z")), + Seq.fill(8)(0) ++ Seq.fill(10)(1) ++ Seq.fill(8)(0), Seq.fill(26)(3), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + } + } + } + } + + test("nested type - column index with ranges and nulls") { + BATCH_SIZE_CONFIGS.foreach { batchSize => + Seq(Seq(16), Seq(8, 8), Seq(4, 4, 4, 4), Seq(2, 6, 4, 4)).foreach { pageSizes => + Seq(true, false).foreach { dictionaryEnabled => + testNestedStringArrayOneLevel(None, None, pageSizes, + Seq(Seq("a", null), Seq("c", "d"), Seq(), Seq("f", null, "h"), + Seq("i", "j", "k", null), Seq(), null, null, Seq()), + Seq(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0), + Seq(3, 2, 3, 3, 1, 3, 2, 3, 3, 3, 3, 2, 1, 0, 0, 1), + (0 to 15), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + + var ranges = Seq((0L, 15L)) + testNestedStringArrayOneLevel(None, Some(ranges), pageSizes, + Seq(Seq("a", null), Seq("c", "d"), Seq(), Seq("f", null, "h"), + Seq("i", "j", "k", null), Seq(), null, null, Seq()), + Seq(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0), + Seq(3, 2, 3, 3, 1, 3, 2, 3, 3, 3, 3, 2, 1, 0, 0, 1), + (0 to 15), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + + ranges = Seq((0L, 2L)) + testNestedStringArrayOneLevel(None, Some(ranges), pageSizes, + Seq(Seq("a", null), Seq("c", "d"), Seq()), + Seq(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0), + Seq(3, 2, 3, 3, 1, 3, 2, 3, 3, 3, 3, 2, 1, 0, 0, 1), + (0 to 15), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + + ranges = Seq((3L, 7L)) + testNestedStringArrayOneLevel(None, Some(ranges), pageSizes, + Seq(Seq("f", null, "h"), Seq("i", "j", "k", null), Seq(), null, null), + Seq(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0), + Seq(3, 2, 3, 3, 1, 3, 2, 3, 3, 3, 3, 2, 1, 0, 0, 1), + (0 to 15), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + + ranges = Seq((5, 12L)) + testNestedStringArrayOneLevel(None, Some(ranges), pageSizes, + Seq(Seq(), null, null, Seq()), + Seq(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0), + Seq(3, 2, 3, 3, 1, 3, 2, 3, 3, 3, 3, 2, 1, 0, 0, 1), + (0 to 15), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + + ranges = Seq((5, 12L)) + testNestedStringArrayOneLevel(None, Some(ranges), pageSizes, + Seq(Seq(), null, null, Seq()), + Seq(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0), + Seq(3, 2, 3, 3, 1, 3, 2, 3, 3, 3, 3, 2, 1, 0, 0, 1), + (0 to 15), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + + ranges = Seq((0L, 0L), (2, 3), (5, 7), (8, 10)) + testNestedStringArrayOneLevel(None, Some(ranges), pageSizes, + Seq(Seq("a", null), Seq(), Seq("f", null, "h"), Seq(), null, null, Seq()), + Seq(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0), + Seq(3, 2, 3, 3, 1, 3, 2, 3, 3, 3, 3, 2, 1, 0, 0, 1), + (0 to 15), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + } + } + } + } + + test("nested type - column index with ranges, nulls and first row indexes") { + BATCH_SIZE_CONFIGS.foreach { batchSize => + Seq(true, false).foreach { dictionaryEnabled => + val pageSizes = Seq(4, 4, 4, 4) + var firstRowIndexes = Seq(10L, 20, 30, 40) + var ranges = Seq((0L, 5L)) + testNestedStringArrayOneLevel(Some(firstRowIndexes), Some(ranges), pageSizes, + Seq(), + Seq(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0), + Seq(3, 2, 3, 3, 1, 3, 2, 3, 3, 3, 3, 2, 1, 0, 0, 1), + (0 to 15), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + + ranges = Seq((5L, 15)) + testNestedStringArrayOneLevel(Some(firstRowIndexes), Some(ranges), pageSizes, + Seq(Seq("a", null), Seq("c", "d")), + Seq(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0), + Seq(3, 2, 3, 3, 1, 3, 2, 3, 3, 3, 3, 2, 1, 0, 0, 1), + (0 to 15), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + + ranges = Seq((25, 28)) + testNestedStringArrayOneLevel(Some(firstRowIndexes), Some(ranges), pageSizes, + Seq(), + Seq(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0), + Seq(3, 2, 3, 3, 1, 3, 2, 3, 3, 3, 3, 2, 1, 0, 0, 1), + (0 to 15), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + + ranges = Seq((35, 45)) + testNestedStringArrayOneLevel(Some(firstRowIndexes), Some(ranges), pageSizes, + Seq(Seq(), null, null, Seq()), + Seq(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0), + Seq(3, 2, 3, 3, 1, 3, 2, 3, 3, 3, 3, 2, 1, 0, 0, 1), + (0 to 15), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + + ranges = Seq((45, 55)) + testNestedStringArrayOneLevel(Some(firstRowIndexes), Some(ranges), pageSizes, + Seq(), + Seq(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0), + Seq(3, 2, 3, 3, 1, 3, 2, 3, 3, 3, 3, 2, 1, 0, 0, 1), + (0 to 15), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + + ranges = Seq((45, 55)) + testNestedStringArrayOneLevel(Some(firstRowIndexes), Some(ranges), pageSizes, + Seq(), + Seq(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0), + Seq(3, 2, 3, 3, 1, 3, 2, 3, 3, 3, 3, 2, 1, 0, 0, 1), + (0 to 15), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + + ranges = Seq((15, 29), (31, 35)) + testNestedStringArrayOneLevel(Some(firstRowIndexes), Some(ranges), pageSizes, + Seq(Seq(), Seq("f", null, "h")), + Seq(0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0), + Seq(3, 2, 3, 3, 1, 3, 2, 3, 3, 3, 3, 2, 1, 0, 0, 1), + (0 to 15), + batchSize = batchSize, dictionaryEnabled = dictionaryEnabled) + } + } + } + private def testPrimitiveString( firstRowIndexesOpt: Option[Seq[Long]], rangesOpt: Option[Seq[(Long, Long)]], @@ -236,6 +520,52 @@ class ParquetVectorizedSuite extends QueryTest with ParquetTest with SharedSpark rangesOpt), expectedValues.map(i => Row(i)), batchSize) } + private def testNestedStringArrayOneLevel( + firstRowIndexesOpt: Option[Seq[Long]], + rangesOpt: Option[Seq[(Long, Long)]], + pageSizes: Seq[Int], + expected: Seq[Seq[String]], + rls: Seq[Int], + dls: Seq[Int], + values: Seq[String] = VALUES, + batchSize: Int, + dictionaryEnabled: Boolean = false): Unit = { + assert(pageSizes.sum == rls.length && rls.length == dls.length) + firstRowIndexesOpt.foreach(a => assert(pageSizes.length == a.length)) + + val parquetSchema = MessageTypeParser.parseMessageType( + s"""message root { + | optional group _1 (LIST) { + | repeated group list { + | optional binary a(UTF8); + | } + | } + |} + |""".stripMargin + ) + + val maxRepLevel = 1 + val maxDefLevel = 3 + val ty = parquetSchema.getType("_1", "list", "a").asPrimitiveType() + val cd = new ColumnDescriptor(Seq("_1", "list", "a").toArray, ty, maxRepLevel, maxDefLevel) + + var i = 0 + var numRows = 0 + val memPageStore = new MemPageStore(expected.length) + val pageFirstRowIndexes = ArrayBuffer.empty[Long] + pageSizes.foreach { size => + pageFirstRowIndexes += numRows + numRows += rls.slice(i, i + size).count(_ == 0) + writeDataPage(cd, memPageStore, rls.slice(i, i + size), dls.slice(i, i + size), + values.slice(i, i + size), maxDefLevel, dictionaryEnabled) + i += size + } + + checkAnswer(expected.length, parquetSchema, + TestPageReadStore(memPageStore, firstRowIndexesOpt.getOrElse(pageFirstRowIndexes).toSeq, + rangesOpt), expected.map(i => Row(i)), batchSize) + } + /** * Write a single data page using repetition levels, definition levels and values provided. * From ae13b453f6b239af4c7f57cff99e7b8ef939cc9e Mon Sep 17 00:00:00 2001 From: Xinrong Meng Date: Sun, 3 Apr 2022 09:52:48 +0900 Subject: [PATCH 088/535] [SPARK-38763][PYTHON] Support lambda `column` parameter of `DataFrame.rename` ### What changes were proposed in this pull request? Support lambda `column` parameter of `DataFrame.rename`. We may want to backport this to 3.3 since this is a regression. ### Why are the changes needed? To reach parity with Pandas. ### Does this PR introduce _any_ user-facing change? Yes. The regression is fixed; lambda `column` is supported again. ```py >>> psdf = ps.DataFrame({'x': [1, 2], 'y': [3, 4]}) >>> psdf.rename(columns=lambda x: x + 'o') xo yo 0 1 3 1 2 4 ``` ### How was this patch tested? Unit tests. Closes #36042 from xinrong-databricks/frame.rename. Authored-by: Xinrong Meng Signed-off-by: Hyukjin Kwon (cherry picked from commit 037d07c8acb864f495ea74afba94531c28c163ce) Signed-off-by: Hyukjin Kwon --- python/pyspark/pandas/frame.py | 16 ++++++++++------ python/pyspark/pandas/tests/test_dataframe.py | 9 +++++++++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py index b355708e97575..6e8f69ad6e7ac 100644 --- a/python/pyspark/pandas/frame.py +++ b/python/pyspark/pandas/frame.py @@ -10580,7 +10580,7 @@ def rename( """ def gen_mapper_fn( - mapper: Union[Dict, Callable[[Any], Any]] + mapper: Union[Dict, Callable[[Any], Any]], skip_return_type: bool = False ) -> Tuple[Callable[[Any], Any], Dtype, DataType]: if isinstance(mapper, dict): mapper_dict = mapper @@ -10598,21 +10598,25 @@ def mapper_fn(x: Any) -> Any: raise KeyError("Index include value which is not in the `mapper`") return x + return mapper_fn, dtype, spark_return_type elif callable(mapper): mapper_callable = cast(Callable, mapper) - return_type = cast(ScalarType, infer_return_type(mapper)) - dtype = return_type.dtype - spark_return_type = return_type.spark_type def mapper_fn(x: Any) -> Any: return mapper_callable(x) + if skip_return_type: + return mapper_fn, None, None + else: + return_type = cast(ScalarType, infer_return_type(mapper)) + dtype = return_type.dtype + spark_return_type = return_type.spark_type + return mapper_fn, dtype, spark_return_type else: raise ValueError( "`mapper` or `index` or `columns` should be " "either dict-like or function type." ) - return mapper_fn, dtype, spark_return_type index_mapper_fn = None index_mapper_ret_stype = None @@ -10633,7 +10637,7 @@ def mapper_fn(x: Any) -> Any: index ) if columns: - columns_mapper_fn, _, _ = gen_mapper_fn(columns) + columns_mapper_fn, _, _ = gen_mapper_fn(columns, skip_return_type=True) if not index and not columns: raise ValueError("Either `index` or `columns` should be provided.") diff --git a/python/pyspark/pandas/tests/test_dataframe.py b/python/pyspark/pandas/tests/test_dataframe.py index 6f3c1c41653ad..1cc03bf06f8ca 100644 --- a/python/pyspark/pandas/tests/test_dataframe.py +++ b/python/pyspark/pandas/tests/test_dataframe.py @@ -817,11 +817,20 @@ def mul10(x) -> int: pdf1.rename(columns=str_lower, index={1: 10, 2: 20}), ) + self.assert_eq( + psdf1.rename(columns=lambda x: str.lower(x)), + pdf1.rename(columns=lambda x: str.lower(x)), + ) + idx = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B"), ("Y", "C"), ("Y", "D")]) pdf2 = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=idx) psdf2 = ps.from_pandas(pdf2) self.assert_eq(psdf2.rename(columns=str_lower), pdf2.rename(columns=str_lower)) + self.assert_eq( + psdf2.rename(columns=lambda x: str.lower(x)), + pdf2.rename(columns=lambda x: str.lower(x)), + ) self.assert_eq( psdf2.rename(columns=str_lower, level=0), pdf2.rename(columns=str_lower, level=0) From 3d4d1edd534e0d19d9c7d978c345ad4973cb9456 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 3 Apr 2022 10:10:15 -0700 Subject: [PATCH 089/535] [SPARK-38776][MLLIB][TESTS] Disable ANSI_ENABLED explicitly in `ALSSuite` This PR aims to disable `ANSI_ENABLED` explicitly in the following tests of `ALSSuite`. ``` test("ALS validate input dataset") { test("input type validation") { ``` After SPARK-38490, this test became flaky in ANSI mode GitHub Action. ![Screen Shot 2022-04-03 at 12 07 29 AM](https://user-images.githubusercontent.com/9700541/161416006-7b76596f-c19a-4212-91d2-8602df569608.png) - https://github.com/apache/spark/runs/5800714463?check_suite_focus=true - https://github.com/apache/spark/runs/5803714260?check_suite_focus=true - https://github.com/apache/spark/runs/5803745768?check_suite_focus=true ``` [info] ALSSuite: ... [info] - ALS validate input dataset *** FAILED *** (2 seconds, 449 milliseconds) [info] Invalid Long: out of range "Job aborted due to stage failure: Task 0 in stage 100.0 failed 1 times, most recent failure: Lost task 0.0 in stage 100.0 (TID 348) (localhost executor driver): org.apache.spark.SparkArithmeticException: Casting 1231000000000 to int causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. ``` No. This is a test-only bug and fix. Pass the CIs. Closes #36051 from dongjoon-hyun/SPARK-38776. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit d18fd7bcbdfe028e2e985ec6a8ec2f78bd5599c4) Signed-off-by: Dongjoon Hyun --- .../spark/ml/recommendation/ALSSuite.scala | 71 ++++++++++--------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala index b63c777bc3eb0..7372b2d7673f1 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala @@ -39,6 +39,7 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerStageCompleted} import org.apache.spark.sql.{DataFrame, Encoder, Row, SparkSession} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.functions.{col, lit} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.streaming.StreamingQueryException import org.apache.spark.sql.types._ import org.apache.spark.storage.StorageLevel @@ -213,7 +214,9 @@ class ALSSuite extends MLTest with DefaultReadWriteTest with Logging { } withClue("Valid Long Ids") { - df.select(checkedCast(lit(1231L))).collect() + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + df.select(checkedCast(lit(1231L))).collect() + } } withClue("Valid Decimal Ids") { @@ -677,40 +680,42 @@ class ALSSuite extends MLTest with DefaultReadWriteTest with Logging { (1, 1L, 1d, 0, 0L, 0d, 5.0) ).toDF("user", "user_big", "user_small", "item", "item_big", "item_small", "rating") val msg = "either out of Integer range or contained a fractional part" - withClue("fit should fail when ids exceed integer range. ") { - assert(intercept[SparkException] { - als.fit(df.select(df("user_big").as("user"), df("item"), df("rating"))) - }.getCause.getMessage.contains(msg)) - assert(intercept[SparkException] { - als.fit(df.select(df("user_small").as("user"), df("item"), df("rating"))) - }.getCause.getMessage.contains(msg)) - assert(intercept[SparkException] { - als.fit(df.select(df("item_big").as("item"), df("user"), df("rating"))) - }.getCause.getMessage.contains(msg)) - assert(intercept[SparkException] { - als.fit(df.select(df("item_small").as("item"), df("user"), df("rating"))) - }.getCause.getMessage.contains(msg)) - } - withClue("transform should fail when ids exceed integer range. ") { - val model = als.fit(df) - def testTransformIdExceedsIntRange[A : Encoder](dataFrame: DataFrame): Unit = { - val e1 = intercept[SparkException] { - model.transform(dataFrame).collect() - } - TestUtils.assertExceptionMsg(e1, msg) - val e2 = intercept[StreamingQueryException] { - testTransformer[A](dataFrame, model, "prediction") { _ => } + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + withClue("fit should fail when ids exceed integer range. ") { + assert(intercept[SparkException] { + als.fit(df.select(df("user_big").as("user"), df("item"), df("rating"))) + }.getCause.getMessage.contains(msg)) + assert(intercept[SparkException] { + als.fit(df.select(df("user_small").as("user"), df("item"), df("rating"))) + }.getCause.getMessage.contains(msg)) + assert(intercept[SparkException] { + als.fit(df.select(df("item_big").as("item"), df("user"), df("rating"))) + }.getCause.getMessage.contains(msg)) + assert(intercept[SparkException] { + als.fit(df.select(df("item_small").as("item"), df("user"), df("rating"))) + }.getCause.getMessage.contains(msg)) + } + withClue("transform should fail when ids exceed integer range. ") { + val model = als.fit(df) + def testTransformIdExceedsIntRange[A : Encoder](dataFrame: DataFrame): Unit = { + val e1 = intercept[SparkException] { + model.transform(dataFrame).collect() + } + TestUtils.assertExceptionMsg(e1, msg) + val e2 = intercept[StreamingQueryException] { + testTransformer[A](dataFrame, model, "prediction") { _ => } + } + TestUtils.assertExceptionMsg(e2, msg) } - TestUtils.assertExceptionMsg(e2, msg) + testTransformIdExceedsIntRange[(Long, Int)](df.select(df("user_big").as("user"), + df("item"))) + testTransformIdExceedsIntRange[(Double, Int)](df.select(df("user_small").as("user"), + df("item"))) + testTransformIdExceedsIntRange[(Long, Int)](df.select(df("item_big").as("item"), + df("user"))) + testTransformIdExceedsIntRange[(Double, Int)](df.select(df("item_small").as("item"), + df("user"))) } - testTransformIdExceedsIntRange[(Long, Int)](df.select(df("user_big").as("user"), - df("item"))) - testTransformIdExceedsIntRange[(Double, Int)](df.select(df("user_small").as("user"), - df("item"))) - testTransformIdExceedsIntRange[(Long, Int)](df.select(df("item_big").as("item"), - df("user"))) - testTransformIdExceedsIntRange[(Double, Int)](df.select(df("item_small").as("item"), - df("user"))) } } From fe95b03eff2bab17d4aab7e9814f04abb2991b11 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Sun, 3 Apr 2022 10:30:38 -0700 Subject: [PATCH 090/535] [SPARK-38446][CORE] Fix deadlock between ExecutorClassLoader and FileDownloadCallback caused by Log4j ### What changes were proposed in this pull request? While `log4j.ignoreTCL/log4j2.ignoreTCL` is false, which is the default, it uses the context ClassLoader for the current Thread, see `org.apache.logging.log4j.util.LoaderUtil.loadClass`. While ExecutorClassLoader try to loadClass through remotely though the FileDownload, if error occurs, we will long on debug level, and `log4j...LoaderUtil` will be blocked by ExecutorClassLoader acquired classloading lock. Fortunately, it only happens when ThresholdFilter's level is `debug`. or we can set `log4j.ignoreTCL/log4j2.ignoreTCL` to true, but I don't know what else it will cause. So in this PR, I simply remove the debug log which cause this deadlock ### Why are the changes needed? fix deadlock ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? https://github.com/apache/incubator-kyuubi/pull/2046#discussion_r821414439, with a ut in kyuubi project, resolved(https://github.com/apache/incubator-kyuubi/actions/runs/1950222737) ### Additional Resources [ut.jstack.txt](https://github.com/apache/spark/files/8206457/ut.jstack.txt) Closes #35765 from yaooqinn/SPARK-38446. Authored-by: Kent Yao Signed-off-by: Dongjoon Hyun (cherry picked from commit aef674564ff12e78bd2f30846e3dcb69988249ae) Signed-off-by: Dongjoon Hyun --- core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala index 5864e9e2ceac0..464b6cbc6b0a6 100644 --- a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala +++ b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala @@ -451,7 +451,6 @@ private[netty] class NettyRpcEnv( } override def onFailure(streamId: String, cause: Throwable): Unit = { - logDebug(s"Error downloading stream $streamId.", cause) source.setError(cause) sink.close() } From 6ab8b384d03d9ba1a046327a4ba9b4c7406ad706 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 3 Apr 2022 13:31:34 -0700 Subject: [PATCH 091/535] [SPARK-38776][MLLIB][TESTS][FOLLOWUP] Disable ANSI_ENABLED more for `Out of Range` failures This is a follow-up of https://github.com/apache/spark/pull/36051. After fixing `Overflow` errors, `Out Of Range` failures are observed in the rest of test code in the same test case. To make GitHub Action ANSI test CI pass. No. At this time, I used the following to simulate GitHub Action ANSI job. ``` $ SPARK_ANSI_SQL_MODE=true build/sbt "mllib/testOnly *.ALSSuite" ... [info] All tests passed. [success] Total time: 80 s (01:20), completed Apr 3, 2022 1:05:50 PM ``` Closes #36054 from dongjoon-hyun/SPARK-38776-2. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit fbcab01ffb672dda98f6f472da44aed26b59b2a5) Signed-off-by: Dongjoon Hyun --- .../spark/ml/recommendation/ALSSuite.scala | 30 +++++++++++-------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala index 7372b2d7673f1..e925f7b574edc 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala @@ -228,18 +228,20 @@ class ALSSuite extends MLTest with DefaultReadWriteTest with Logging { } val msg = "either out of Integer range or contained a fractional part" - withClue("Invalid Long: out of range") { - val e: SparkException = intercept[SparkException] { - df.select(checkedCast(lit(1231000000000L))).collect() + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + withClue("Invalid Long: out of range") { + val e: SparkException = intercept[SparkException] { + df.select(checkedCast(lit(1231000000000L))).collect() + } + assert(e.getMessage.contains(msg)) } - assert(e.getMessage.contains(msg)) - } - withClue("Invalid Decimal: out of range") { - val e: SparkException = intercept[SparkException] { - df.select(checkedCast(lit(1231000000000.0).cast(DecimalType(15, 2)))).collect() + withClue("Invalid Decimal: out of range") { + val e: SparkException = intercept[SparkException] { + df.select(checkedCast(lit(1231000000000.0).cast(DecimalType(15, 2)))).collect() + } + assert(e.getMessage.contains(msg)) } - assert(e.getMessage.contains(msg)) } withClue("Invalid Decimal: fractional part") { @@ -249,11 +251,13 @@ class ALSSuite extends MLTest with DefaultReadWriteTest with Logging { assert(e.getMessage.contains(msg)) } - withClue("Invalid Double: out of range") { - val e: SparkException = intercept[SparkException] { - df.select(checkedCast(lit(1231000000000.0))).collect() + withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + withClue("Invalid Double: out of range") { + val e: SparkException = intercept[SparkException] { + df.select(checkedCast(lit(1231000000000.0))).collect() + } + assert(e.getMessage.contains(msg)) } - assert(e.getMessage.contains(msg)) } withClue("Invalid Double: fractional part") { From 8f9aa50f730a48c1697cbb4f4c9ba707963a37a1 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Sun, 3 Apr 2022 18:35:10 -0700 Subject: [PATCH 092/535] [SPARK-34863][SQL][FOLLOWUP] Disable `spark.sql.parquet.enableNestedColumnVectorizedReader` by default ### What changes were proposed in this pull request? This PR disables `spark.sql.parquet.enableNestedColumnVectorizedReader` by default. ### Why are the changes needed? In #34659 the config was turned mainly for testing reason. As the feature is new, we should turn it off by default. ### Does this PR introduce _any_ user-facing change? The config `spark.sql.parquet.enableNestedColumnVectorizedReader` is turned off by default now. ### How was this patch tested? Existing tests. Closes #36055 from sunchao/disable. Authored-by: Chao Sun Signed-off-by: Liang-Chi Hsieh (cherry picked from commit 1b08673a6d92e3e0fceb4a686a0ba77a87f1ebbc) Signed-off-by: Liang-Chi Hsieh --- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- .../src/test/resources/sql-tests/results/explain-aqe.sql.out | 3 +-- sql/core/src/test/resources/sql-tests/results/explain.sql.out | 3 +-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 5bf59923787a7..56849b8718e19 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1015,7 +1015,7 @@ object SQLConf { s"Requires ${PARQUET_VECTORIZED_READER_ENABLED.key} to be enabled.") .version("3.3.0") .booleanConf - .createWithDefault(true) + .createWithDefault(false) val PARQUET_RECORD_FILTER_ENABLED = buildConf("spark.sql.parquet.recordLevelFilter.enabled") .doc("If true, enables Parquet's native record-level filtering using the pushed down " + diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out index f98fb1eb2a57a..f5e5b46d29ce6 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out @@ -1125,8 +1125,7 @@ struct -- !query output == Physical Plan == *Filter v#x IN ([a],null) -+- *ColumnarToRow - +- FileScan parquet default.t[v#x] Batched: true, DataFilters: [v#x IN ([a],null)], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/t], PartitionFilters: [], PushedFilters: [In(v, [[a],null])], ReadSchema: struct> ++- FileScan parquet default.t[v#x] Batched: false, DataFilters: [v#x IN ([a],null)], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/t], PartitionFilters: [], PushedFilters: [In(v, [[a],null])], ReadSchema: struct> -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out index a563eda1e7b04..4e552d51a3953 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out @@ -1067,8 +1067,7 @@ struct -- !query output == Physical Plan == *Filter v#x IN ([a],null) -+- *ColumnarToRow - +- FileScan parquet default.t[v#x] Batched: true, DataFilters: [v#x IN ([a],null)], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/t], PartitionFilters: [], PushedFilters: [In(v, [[a],null])], ReadSchema: struct> ++- FileScan parquet default.t[v#x] Batched: false, DataFilters: [v#x IN ([a],null)], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/t], PartitionFilters: [], PushedFilters: [In(v, [[a],null])], ReadSchema: struct> -- !query From 21e5ec529e1df114e94f81ac8d4abfbd943f1a05 Mon Sep 17 00:00:00 2001 From: Yang Liu Date: Mon, 4 Apr 2022 11:03:20 +0800 Subject: [PATCH 093/535] [SPARK-32268][SQL][FOLLOWUP] Add ColumnPruning in injectBloomFilter ### What changes were proposed in this pull request? Add `ColumnPruning` in `InjectRuntimeFilter.injectBloomFilter` to optimize the BoomFilter creation query. ### Why are the changes needed? It seems BloomFilter subqueries injected by `InjectRuntimeFilter` will read as many columns as filterCreationSidePlan. This does not match "Only scan the required columns" as the design said. We can check this by a simple case in `InjectRuntimeFilterSuite`: ```scala withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_ENABLED.key -> "true", SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000", SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "2000") { val query = "select * from bf1 join bf2 on bf1.c1 = bf2.c2 where bf2.a2 = 62" sql(query).explain() } ``` The reason is subqueries have not been optimized by `ColumnPruning`, and this pr will fix it. ### Does this PR introduce _any_ user-facing change? No, not released ### How was this patch tested? Improve the test by adding `columnPruningTakesEffect` to check the optimizedPlan of bloom filter join. Closes #36047 from Flyangz/SPARK-32268-FOllOWUP. Authored-by: Yang Liu Signed-off-by: Yuming Wang (cherry picked from commit c98725a2b9574ba3c9a10567af740db7467df59d) Signed-off-by: Yuming Wang --- .../catalyst/optimizer/InjectRuntimeFilter.scala | 3 ++- .../spark/sql/InjectRuntimeFilterSuite.scala | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala index 35d0189f64651..a69cda25ef4f9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala @@ -85,7 +85,8 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with PredicateHelper with J } val aggExp = AggregateExpression(bloomFilterAgg, Complete, isDistinct = false, None) val alias = Alias(aggExp, "bloomFilter")() - val aggregate = ConstantFolding(Aggregate(Nil, Seq(alias), filterCreationSidePlan)) + val aggregate = + ConstantFolding(ColumnPruning(Aggregate(Nil, Seq(alias), filterCreationSidePlan))) val bloomFilterSubquery = ScalarSubquery(aggregate, Nil) val filter = BloomFilterMightContain(bloomFilterSubquery, new XxHash64(Seq(filterApplicationSideExp))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala index 0da3667382c16..097a18cabd58c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala @@ -255,6 +255,7 @@ class InjectRuntimeFilterSuite extends QueryTest with SQLTestUtils with SharedSp planEnabled = sql(query).queryExecution.optimizedPlan checkAnswer(sql(query), expectedAnswer) if (shouldReplace) { + assert(!columnPruningTakesEffect(planEnabled)) assert(getNumBloomFilters(planEnabled) > getNumBloomFilters(planDisabled)) } else { assert(getNumBloomFilters(planEnabled) == getNumBloomFilters(planDisabled)) @@ -288,6 +289,20 @@ class InjectRuntimeFilterSuite extends QueryTest with SQLTestUtils with SharedSp numMightContains } + def columnPruningTakesEffect(plan: LogicalPlan): Boolean = { + def takesEffect(plan: LogicalPlan): Boolean = { + val result = org.apache.spark.sql.catalyst.optimizer.ColumnPruning.apply(plan) + !result.fastEquals(plan) + } + + plan.collectFirst { + case Filter(condition, _) if condition.collectFirst { + case subquery: org.apache.spark.sql.catalyst.expressions.ScalarSubquery + if takesEffect(subquery.plan) => true + }.nonEmpty => true + }.nonEmpty + } + def assertRewroteSemiJoin(query: String): Unit = { checkWithAndWithoutFeatureEnabled(query, testSemiJoin = true, shouldReplace = true) } From 1695b30e1b2120b52a84f3df2784d6435874867c Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 4 Apr 2022 15:15:39 +0900 Subject: [PATCH 094/535] [MINOR][DOCS] Remove PySpark doc build warnings This PR fixes a various documentation build warnings in PySpark documentation To render the docs better. Yes, it changes the documentation to be prettier. Pretty minor though. I manually tested it by building the PySpark documentation Closes #36057 from HyukjinKwon/remove-pyspark-build-warnings. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit a252c109b32bd3bbb269d6790f0c35e0a4ae705f) Signed-off-by: Hyukjin Kwon --- .../source/user_guide/sql/arrow_pandas.rst | 18 +++++++++--------- python/pyspark/pandas/generic.py | 4 ++-- python/pyspark/pandas/indexes/base.py | 2 +- python/pyspark/sql/streaming.py | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/python/docs/source/user_guide/sql/arrow_pandas.rst b/python/docs/source/user_guide/sql/arrow_pandas.rst index 20a9f935d586f..9675b1096f037 100644 --- a/python/docs/source/user_guide/sql/arrow_pandas.rst +++ b/python/docs/source/user_guide/sql/arrow_pandas.rst @@ -53,7 +53,7 @@ This can be controlled by ``spark.sql.execution.arrow.pyspark.fallback.enabled`` .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py :language: python - :lines: 35-48 + :lines: 37-52 :dedent: 4 Using the above optimizations with Arrow will produce the same results as when Arrow is not @@ -90,7 +90,7 @@ specify the type hints of ``pandas.Series`` and ``pandas.DataFrame`` as below: .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py :language: python - :lines: 54-78 + :lines: 56-80 :dedent: 4 In the following sections, it describes the combinations of the supported type hints. For simplicity, @@ -113,7 +113,7 @@ The following example shows how to create this Pandas UDF that computes the prod .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py :language: python - :lines: 82-112 + :lines: 84-114 :dedent: 4 For detailed usage, please see :func:`pandas_udf`. @@ -152,7 +152,7 @@ The following example shows how to create this Pandas UDF: .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py :language: python - :lines: 116-138 + :lines: 118-140 :dedent: 4 For detailed usage, please see :func:`pandas_udf`. @@ -174,7 +174,7 @@ The following example shows how to create this Pandas UDF: .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py :language: python - :lines: 142-165 + :lines: 144-167 :dedent: 4 For detailed usage, please see :func:`pandas_udf`. @@ -205,7 +205,7 @@ and window operations: .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py :language: python - :lines: 169-210 + :lines: 171-212 :dedent: 4 .. currentmodule:: pyspark.sql.functions @@ -270,7 +270,7 @@ in the group. .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py :language: python - :lines: 214-232 + :lines: 216-234 :dedent: 4 For detailed usage, please see please see :meth:`GroupedData.applyInPandas` @@ -288,7 +288,7 @@ The following example shows how to use :meth:`DataFrame.mapInPandas`: .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py :language: python - :lines: 236-247 + :lines: 238-249 :dedent: 4 For detailed usage, please see :meth:`DataFrame.mapInPandas`. @@ -327,7 +327,7 @@ The following example shows how to use ``DataFrame.groupby().cogroup().applyInPa .. literalinclude:: ../../../../../examples/src/main/python/sql/arrow.py :language: python - :lines: 251-273 + :lines: 253-275 :dedent: 4 diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py index 49375da516629..7750126868437 100644 --- a/python/pyspark/pandas/generic.py +++ b/python/pyspark/pandas/generic.py @@ -952,7 +952,7 @@ def to_json( This parameter only works when `path` is specified. Returns - -------- + ------- str or None Examples @@ -2317,7 +2317,7 @@ def bool(self) -> bool: the object does not have exactly 1 element, or that element is not boolean Returns - -------- + ------- bool Examples diff --git a/python/pyspark/pandas/indexes/base.py b/python/pyspark/pandas/indexes/base.py index 1705ef83261bf..9c86cc1fdfce7 100644 --- a/python/pyspark/pandas/indexes/base.py +++ b/python/pyspark/pandas/indexes/base.py @@ -1312,7 +1312,7 @@ def get_level_values(self, level: Union[int, Name]) -> "Index": """ Return Index if a valid level is given. - Examples: + Examples -------- >>> psidx = ps.Index(['a', 'b', 'c'], name='ks') >>> psidx.get_level_values(0) diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py index 7517a41337f90..36ace1c88e82a 100644 --- a/python/pyspark/sql/streaming.py +++ b/python/pyspark/sql/streaming.py @@ -823,7 +823,7 @@ def table(self, tableName: str) -> "DataFrame": string, for the name of the table. Returns - -------- + ------- :class:`DataFrame` Notes From ea8adb9722d22bea170877a41af8108b884bee07 Mon Sep 17 00:00:00 2001 From: itholic Date: Mon, 4 Apr 2022 15:19:46 +0900 Subject: [PATCH 095/535] [SPARK-38780][PYTHON][DOCS] PySpark docs build should fail when there is warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This PR proposes to add option "-W" when running PySpark documentation build via Sphinx. ### Why are the changes needed? To make documentation build failing when the documentation violates the Sphinx warning rules. So far, the docs build is passed although the docs has warnings as below: ``` … build succeeded, 14 warnings. ``` With this change, the warnings treated as error, so the build should be failed as below for an example: ``` Warning, treated as error: …:153:Malformed table. Text in column margin in table line 38. … make: *** [html] Error 2 ``` ### Does this PR introduce _any_ user-facing change? This would make docs a bit more prettier. ### How was this patch tested? The existing build & tests should be passed. Closes #36058 from itholic/SPARK-38780. Authored-by: itholic Signed-off-by: Hyukjin Kwon (cherry picked from commit bd8100b78e7f357725bfa54697a27a0c421ef496) Signed-off-by: Hyukjin Kwon --- python/docs/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/docs/Makefile b/python/docs/Makefile index 14e5214fc1e0a..65ab6cfa3ecb8 100644 --- a/python/docs/Makefile +++ b/python/docs/Makefile @@ -16,7 +16,7 @@ # Minimal makefile for Sphinx documentation # You can set these variables from the command line. -SPHINXOPTS ?= +SPHINXOPTS ?= "-W" SPHINXBUILD ?= sphinx-build SOURCEDIR ?= source BUILDDIR ?= build From f60bfc2bb0d69f485035ca93e793f7af6e9fdad9 Mon Sep 17 00:00:00 2001 From: itholic Date: Mon, 4 Apr 2022 16:03:19 +0900 Subject: [PATCH 096/535] [SPARK-38780][FOLLOWUP][PYTHON][DOCS] PySpark docs build should fail when there is warning ### What changes were proposed in this pull request? This PR proposes to remove `ForeachBatchFunction` and `StreamingQueryException` from `python/docs/source/reference/pyspark.ss.rst` since they're not API so the doc build is failed. ### Why are the changes needed? To fix the document build failure. ### Does this PR introduce _any_ user-facing change? `ForeachBatchFunction` and `StreamingQueryException` are removed from the documents since they're not actually APIs. ### How was this patch tested? The existing doc build should be passed. Closes #36059 from itholic/SPARK-38780-followup. Authored-by: itholic Signed-off-by: Hyukjin Kwon (cherry picked from commit 835b46d1204eef73a5e07b0d93a406bf9471f879) Signed-off-by: Hyukjin Kwon --- python/docs/source/reference/pyspark.ss.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/docs/source/reference/pyspark.ss.rst b/python/docs/source/reference/pyspark.ss.rst index a7936a4f2a59c..cace2d5a6bbb3 100644 --- a/python/docs/source/reference/pyspark.ss.rst +++ b/python/docs/source/reference/pyspark.ss.rst @@ -30,9 +30,7 @@ Core Classes DataStreamReader DataStreamWriter - ForeachBatchFunction StreamingQuery - StreamingQueryException StreamingQueryManager Input and Output From 4cd395ef2c2ea842c570b4187778c0a580596f3c Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 4 Apr 2022 16:59:04 +0800 Subject: [PATCH 097/535] [SPARK-38778][INFRA][BUILD] Replace http with https for project url in pom ### What changes were proposed in this pull request? change http://spark.apache.org/ to https://spark.apache.org/ in the project URL of all pom files ### Why are the changes needed? fix home page in maven central https://mvnrepository.com/artifact/org.apache.spark/spark-sql_2.13/3.2.1 #### From License | Apache 2.0 -- | -- Categories |Hadoop Query Engines HomePage|http://spark.apache.org/ Date | (Jan 26, 2022) #### to License | Apache 2.0 -- | -- Categories |Hadoop Query Engines HomePage|https://spark.apache.org/ Date | (Jan 26, 2022) ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? pass GHA Closes #36053 from yaooqinn/SPARK-38778. Authored-by: Kent Yao Signed-off-by: Yuming Wang (cherry picked from commit 65d347d145f0039d5246431431573ccd34724276) Signed-off-by: Yuming Wang --- R/pkg/R/DataFrame.R | 2 +- R/pkg/R/RDD.R | 2 +- R/pkg/R/sparkR.R | 6 +++--- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- dev/checkstyle.xml | 2 +- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- .../spark/examples/streaming/JavaKinesisWordCountASL.java | 6 +++--- .../main/python/examples/streaming/kinesis_wordcount_asl.py | 4 ++-- .../spark/examples/streaming/KinesisWordCountASL.scala | 4 ++-- graphx/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 4 ++-- repl/pom.xml | 2 +- .../kubernetes/docker/src/main/dockerfiles/spark/Dockerfile | 2 +- .../docker/src/main/dockerfiles/spark/Dockerfile.java17 | 2 +- sql/catalyst/pom.xml | 2 +- .../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- .../scala/org/apache/spark/streaming/StreamingContext.scala | 2 +- .../spark/streaming/api/java/JavaStreamingContext.scala | 2 +- tools/pom.xml | 2 +- 40 files changed, 47 insertions(+), 47 deletions(-) diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index db616626f8fc8..e143cbd8256f9 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -608,7 +608,7 @@ setMethod("cache", #' #' Persist this SparkDataFrame with the specified storage level. For details of the #' supported storage levels, refer to -#' \url{http://spark.apache.org/docs/latest/rdd-programming-guide.html#rdd-persistence}. +#' \url{https://spark.apache.org/docs/latest/rdd-programming-guide.html#rdd-persistence}. #' #' @param x the SparkDataFrame to persist. #' @param newLevel storage level chosen for the persistence. See available options in diff --git a/R/pkg/R/RDD.R b/R/pkg/R/RDD.R index 408a3ff25b2b2..31f69c594aa4c 100644 --- a/R/pkg/R/RDD.R +++ b/R/pkg/R/RDD.R @@ -227,7 +227,7 @@ setMethod("cacheRDD", #' #' Persist this RDD with the specified storage level. For details of the #' supported storage levels, refer to -#'\url{http://spark.apache.org/docs/latest/rdd-programming-guide.html#rdd-persistence}. +#'\url{https://spark.apache.org/docs/latest/rdd-programming-guide.html#rdd-persistence}. #' #' @param x The RDD to persist #' @param newLevel The new storage level to be assigned diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index fcda4ff1d74c2..f18a6c7e25f1b 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -344,10 +344,10 @@ sparkRHive.init <- function(jsc = NULL) { #' the warehouse, an accompanied metastore may also be automatically created in the current #' directory when a new SparkSession is initialized with \code{enableHiveSupport} set to #' \code{TRUE}, which is the default. For more details, refer to Hive configuration at -#' \url{http://spark.apache.org/docs/latest/sql-programming-guide.html#hive-tables}. +#' \url{https://spark.apache.org/docs/latest/sql-programming-guide.html#hive-tables}. #' #' For details on how to initialize and use SparkR, refer to SparkR programming guide at -#' \url{http://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession}. +#' \url{https://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession}. #' #' @param master the Spark master URL. #' @param appName application name to register with cluster manager. @@ -598,7 +598,7 @@ sparkConfToSubmitOps[["spark.kerberos.principal"]] <- "--principal" # # A few Spark Application and Runtime environment properties cannot take effect after driver # JVM has started, as documented in: -# http://spark.apache.org/docs/latest/configuration.html#application-properties +# https://spark.apache.org/docs/latest/configuration.html#application-properties # When starting SparkR without using spark-submit, for example, from Rstudio, add them to # spark-submit commandline if not already set in SPARKR_SUBMIT_ARGS so that they can be effective. getClientModeSparkSubmitOpts <- function(submitOps, sparkEnvirMap) { diff --git a/assembly/pom.xml b/assembly/pom.xml index 74c2f44121fca..0f88fe4feaf6b 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -27,7 +27,7 @@ spark-assembly_2.12 Spark Project Assembly - http://spark.apache.org/ + https://spark.apache.org/ pom diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index e4ccc96eb41b7..15f7b8fa828a2 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -29,7 +29,7 @@ spark-kvstore_2.12 jar Spark Project Local DB - http://spark.apache.org/ + https://spark.apache.org/ kvstore diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 3303fa906720e..d652b6d1c8d19 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -29,7 +29,7 @@ spark-network-common_2.12 jar Spark Project Networking - http://spark.apache.org/ + https://spark.apache.org/ network-common diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 19cc5f4581a2c..db36da4799fd6 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -29,7 +29,7 @@ spark-network-shuffle_2.12 jar Spark Project Shuffle Streaming Service - http://spark.apache.org/ + https://spark.apache.org/ network-shuffle diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 1f43c903a527c..9e0a202edd15c 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -29,7 +29,7 @@ spark-network-yarn_2.12 jar Spark Project YARN Shuffle Service - http://spark.apache.org/ + https://spark.apache.org/ network-yarn diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 31df0a0b1c087..068ef60b77fb5 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -29,7 +29,7 @@ spark-sketch_2.12 jar Spark Project Sketch - http://spark.apache.org/ + https://spark.apache.org/ sketch diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1de13c243b543..5081579e38d12 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -29,7 +29,7 @@ spark-tags_2.12 jar Spark Project Tags - http://spark.apache.org/ + https://spark.apache.org/ tags diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index ec14ea9d609a1..500f408380581 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -29,7 +29,7 @@ spark-unsafe_2.12 jar Spark Project Unsafe - http://spark.apache.org/ + https://spark.apache.org/ unsafe diff --git a/core/pom.xml b/core/pom.xml index e696853b9e363..6f30a8076f8da 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -28,7 +28,7 @@ spark-core_2.12 jar Spark Project Core - http://spark.apache.org/ + https://spark.apache.org/ core diff --git a/dev/checkstyle.xml b/dev/checkstyle.xml index 6c93ff94fd9f2..72c10f210f286 100644 --- a/dev/checkstyle.xml +++ b/dev/checkstyle.xml @@ -28,7 +28,7 @@ with Spark-specific changes from: - http://spark.apache.org/contributing.html#code-style-guide + https://spark.apache.org/contributing.html#code-style-guide Checkstyle is very configurable. Be sure to read the documentation at http://checkstyle.sf.net (or in your downloaded distribution). diff --git a/examples/pom.xml b/examples/pom.xml index 422048f2b3eb9..694c2f5c3ce27 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -28,7 +28,7 @@ spark-examples_2.12 jar Spark Project Examples - http://spark.apache.org/ + https://spark.apache.org/ examples diff --git a/external/avro/pom.xml b/external/avro/pom.xml index 7e414be577a28..dd3fb1ae413db 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -31,7 +31,7 @@ jar Spark Avro - http://spark.apache.org/ + https://spark.apache.org/ diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index e3070f462c1ff..0d8022c3269d4 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -29,7 +29,7 @@ spark-docker-integration-tests_2.12 jar Spark Project Docker Integration Tests - http://spark.apache.org/ + https://spark.apache.org/ docker-integration-tests diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 0c8194cad4d20..a6267ec53e25c 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -28,7 +28,7 @@ spark-streaming-kafka-0-10-assembly_2.12 jar Spark Integration for Kafka 0.10 Assembly - http://spark.apache.org/ + https://spark.apache.org/ streaming-kafka-0-10-assembly diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 45902a9061495..455e242685cf3 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -32,7 +32,7 @@ jar Kafka 0.10+ Token Provider for Streaming - http://spark.apache.org/ + https://spark.apache.org/ diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index b689f2b02b758..6e2f8e823a64f 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -31,7 +31,7 @@ jar Spark Integration for Kafka 0.10 - http://spark.apache.org/ + https://spark.apache.org/ diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 01541eb7816d0..7fe69a90156a2 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -28,7 +28,7 @@ spark-streaming-kinesis-asl-assembly_2.12 jar Spark Project Kinesis Assembly - http://spark.apache.org/ + https://spark.apache.org/ streaming-kinesis-asl-assembly diff --git a/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java b/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java index 244873af70de9..636af9f6c6060 100644 --- a/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java +++ b/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java @@ -73,9 +73,9 @@ * Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs * Instance profile credentials - delivered through the Amazon EC2 metadata service * For more information, see - * http://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/credentials.html + * https://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/credentials.html * - * See http://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more details on + * See https://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more details on * the Kinesis Spark Streaming integration. */ public final class JavaKinesisWordCountASL { // needs to be public for access from run-example @@ -91,7 +91,7 @@ public static void main(String[] args) throws Exception { " is the endpoint of the Kinesis service\n" + " (e.g. https://kinesis.us-east-1.amazonaws.com)\n" + "Generate data for the Kinesis stream using the example KinesisWordProducerASL.\n" + - "See http://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more\n" + + "See https://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more\n" + "details.\n" ); System.exit(1); diff --git a/external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py b/external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py index 5b134988f7509..e66763538d15a 100644 --- a/external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py +++ b/external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py @@ -50,9 +50,9 @@ Credential profiles file - default location (~/.aws/credentials) shared by all AWS SDKs Instance profile credentials - delivered through the Amazon EC2 metadata service For more information, see - http://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/credentials.html + https://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/credentials.html - See http://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more details on + See https://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more details on the Kinesis Spark Streaming integration. """ import sys diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala index e96a669041633..7d12af3256f1f 100644 --- a/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala +++ b/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala @@ -70,7 +70,7 @@ import org.apache.spark.streaming.kinesis.KinesisInputDStream * For more information, see * http://docs.aws.amazon.com/AWSSdkDocsJava/latest/DeveloperGuide/credentials.html * - * See http://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more details on + * See https://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more details on * the Kinesis Spark Streaming integration. */ object KinesisWordCountASL extends Logging { @@ -87,7 +87,7 @@ object KinesisWordCountASL extends Logging { | (e.g. https://kinesis.us-east-1.amazonaws.com) | |Generate input data for Kinesis stream using the example KinesisWordProducerASL. - |See http://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more + |See https://spark.apache.org/docs/latest/streaming-kinesis-integration.html for more |details. """.stripMargin) System.exit(1) diff --git a/graphx/pom.xml b/graphx/pom.xml index c119027c87f42..5d42f43dd3288 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -31,7 +31,7 @@ jar Spark Project GraphX - http://spark.apache.org/ + https://spark.apache.org/ diff --git a/launcher/pom.xml b/launcher/pom.xml index 348033a776aeb..5889a004513a1 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -29,7 +29,7 @@ spark-launcher_2.12 jar Spark Project Launcher - http://spark.apache.org/ + https://spark.apache.org/ launcher diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 246d4fbae50b2..4a92bdd29e488 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -31,7 +31,7 @@ jar Spark Project ML Local Library - http://spark.apache.org/ + https://spark.apache.org/ diff --git a/mllib/pom.xml b/mllib/pom.xml index af10a0dd21574..74161d23d2337 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -31,7 +31,7 @@ jar Spark Project ML Library - http://spark.apache.org/ + https://spark.apache.org/ diff --git a/pom.xml b/pom.xml index cee7970ace10d..64d6e0762151b 100644 --- a/pom.xml +++ b/pom.xml @@ -29,7 +29,7 @@ 3.3.0-SNAPSHOT pom Spark Project Parent POM - http://spark.apache.org/ + https://spark.apache.org/ Apache 2.0 License @@ -50,7 +50,7 @@ matei.zaharia@gmail.com https://cs.stanford.edu/people/matei Apache Software Foundation - http://spark.apache.org + https://spark.apache.org diff --git a/repl/pom.xml b/repl/pom.xml index 32ca7925c24e5..e4ee7b6d78c75 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -28,7 +28,7 @@ spark-repl_2.12 jar Spark Project REPL - http://spark.apache.org/ + https://spark.apache.org/ repl diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile index 8c3db7e243d8b..6ed03624e5955 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile @@ -21,7 +21,7 @@ FROM openjdk:${java_image_tag} ARG spark_uid=185 # Before building the docker image, first build and make a Spark distribution following -# the instructions in http://spark.apache.org/docs/latest/building-spark.html. +# the instructions in https://spark.apache.org/docs/latest/building-spark.html. # If this docker file is being used in the context of building your images from a Spark # distribution, the docker build command should be invoked from the top level directory # of the Spark distribution. E.g.: diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 index f9ab64e94a54c..7fdaac1153022 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 @@ -21,7 +21,7 @@ FROM debian:bullseye-slim ARG spark_uid=185 # Before building the docker image, first build and make a Spark distribution following -# the instructions in http://spark.apache.org/docs/latest/building-spark.html. +# the instructions in https://spark.apache.org/docs/latest/building-spark.html. # If this docker file is being used in the context of building your images from a Spark # distribution, the docker build command should be invoked from the top level directory # of the Spark distribution. E.g.: diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 6fedd4f146ea6..db6910da3cddc 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -29,7 +29,7 @@ spark-catalyst_2.12 jar Spark Project Catalyst - http://spark.apache.org/ + https://spark.apache.org/ catalyst diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 2a9b3c0005cbc..ccf0219f2e12f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -520,7 +520,7 @@ object QueryExecutionErrors { new ClassNotFoundException( s""" |Failed to find data source: $provider. Please find packages at - |http://spark.apache.org/third-party-projects.html + |https://spark.apache.org/third-party-projects.html """.stripMargin, error) } diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 3002a3b4a876d..f9da9543e9616 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -29,7 +29,7 @@ spark-sql_2.12 jar Spark Project SQL - http://spark.apache.org/ + https://spark.apache.org/ sql diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 764712fcbb482..20c51873dbc7d 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -29,7 +29,7 @@ spark-hive-thriftserver_2.12 jar Spark Project Hive Thrift Server - http://spark.apache.org/ + https://spark.apache.org/ hive-thriftserver diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 893362ae28bc1..906ab911812ef 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -29,7 +29,7 @@ spark-hive_2.12 jar Spark Project Hive - http://spark.apache.org/ + https://spark.apache.org/ hive diff --git a/streaming/pom.xml b/streaming/pom.xml index ea1415162a373..a728089002e2a 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -31,7 +31,7 @@ jar Spark Project Streaming - http://spark.apache.org/ + https://spark.apache.org/ diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala index e3459c96335b3..3232c5b6b1395 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala @@ -275,7 +275,7 @@ class StreamingContext private[streaming] ( /** * Create an input stream with any arbitrary user implemented receiver. - * Find more details at http://spark.apache.org/docs/latest/streaming-custom-receivers.html + * Find more details at https://spark.apache.org/docs/latest/streaming-custom-receivers.html * @param receiver Custom implementation of Receiver */ def receiverStream[T: ClassTag](receiver: Receiver[T]): ReceiverInputDStream[T] = { diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala index af3f5a060f54b..f288fae6598c3 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala @@ -421,7 +421,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable { /** * Create an input stream with any arbitrary user implemented receiver. - * Find more details at: http://spark.apache.org/docs/latest/streaming-custom-receivers.html + * Find more details at: https://spark.apache.org/docs/latest/streaming-custom-receivers.html * @param receiver Custom implementation of Receiver */ def receiverStream[T](receiver: Receiver[T]): JavaReceiverInputDStream[T] = { diff --git a/tools/pom.xml b/tools/pom.xml index bee1c0eb466c6..a3ecea583eb15 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -30,7 +30,7 @@ jar Spark Project Tools - http://spark.apache.org/ + https://spark.apache.org/ From a84bb171df206e320f2ceef936fce40328ef86c0 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Mon, 4 Apr 2022 23:09:14 -0700 Subject: [PATCH 098/535] [SPARK-37377][SQL][3.3] Initial implementation of Storage-Partitioned Join This is a backport of #35657 to `branch-3.3` ### What changes were proposed in this pull request? This PR introduces the initial implementation of Storage-Partitioned Join ([SPIP](https://docs.google.com/document/d/1foTkDSM91VxKgkEcBMsuAvEjNybjja-uHk-r3vtXWFE)). Changes: - `org.apache.spark.sql.connector.read.partitioning.Partitioning` currently is very limited (as mentioned in the SPIP), and cannot be extended to handle join cases. This PR completely replace it following the catalyst `Partitioning` interface, and added two concrete sub-classes: `KeyGroupedPartitioning` and `UnknownPartitioning`. This allows a V2 data source to report to Spark it's partition transform expressions, via `SupportsReportPartitioning` interface. - with the above change, `org.apache.spark.sql.connector.read.partitioning.Distribution` and `org.apache.spark.sql.connector.read.partitioning.ClusteredDistribution` now are replaced by classes with the same name in `org.apache.spark.sql.connector.distributions` package. Therefore, this PR marks the former two as deprecated. - `DataSourcePartitioning` used to be in `org.apache.spark.sql.execution.datasources.v2`. This moves it into package `org.apache.spark.sql.catalyst.plans.physical` and renames it to `KeyGroupedPartitioning`, so that it can be extended for more non-V2 use cases, such as Hive bucketing. In addition, it is also changed to accommodate the Storage-Partitioned Join feature. - a new expression type: `TransformExpression`, is introduced to bind syntactic partition transforms with their semantic meaning, represented by a V2 function. This expression is un-evaluable for now, and is used later in `EnsureRequirements` to check whether join children are compatible with each other. - a new optimizer rule: `V2ScanPartitioning`, is added to recognize `Scan`s implement `SupportsReportPartitioning`. If they do, this rule converts V2 partition transform expressions into their counterparts in catalyst, and annotate `DataSourceV2ScanRelation` with the result. These are later propagated into `DataSourceV2ScanExecBase`. - changes are made in `DataSourceV2ScanExecBase` to create `KeyGroupedPartitioning` for scan if 1) the scan is annotated with catalyst partition transform expressions, and 2) if all input splits implement `HasPartitionKey`. - A new config: `spark.sql.sources.v2.bucketing.enabled` is introduced to turn on or off the behavior. By default it is false. ### Why are the changes needed? Spark currently support bucketing in DataSource V1, but not in V2. This is the first step to support bucket join, and is general form, storage-partitioned join, for V2 data sources. In addition, the work here can potentially used to support Hive bucketing as well. Please check the SPIP for details. ### Does this PR introduce _any_ user-facing change? With the changes, a user can now: - have V2 data sources to report distribution and ordering to Spark on read path - Spark will recognize the distribution property and eliminate shuffle in join/aggregate/window, etc, when the source distribution matches the required distribution from these. - a new config `spark.sql.sources.v2.bucketing.enabled` is introduced to turn on/off the above behavior ### How was this patch tested? - Added a new test suite `KeyGroupedPartitioningSuite` covers end-to-end tests on the new feature - Extended `EnsureRequirementsSuite` to cover `KeyGroupedPartitioning` - Some existing test classes, such as `InMemoryTable` are extended to cover the changes Closes #36068 from sunchao/SPARK-37377-3.3. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun --- .../apache/spark/util/collection/Utils.scala | 9 + .../spark/sql/avro/AvroRowReaderSuite.scala | 2 +- .../org/apache/spark/sql/avro/AvroSuite.scala | 6 +- .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 2 +- project/MimaExcludes.scala | 8 +- .../read/partitioning/Distribution.java | 44 -- .../partitioning/KeyGroupedPartitioning.java | 55 ++ .../read/partitioning/Partitioning.java | 26 +- ...ribution.java => UnknownPartitioning.java} | 22 +- .../sql/catalyst/util/InternalRowSet.scala | 65 +++ .../sql/catalyst/util/InternalRowSet.scala | 69 +++ .../expressions/TransformExpression.scala | 61 +++ .../expressions/V2ExpressionUtils.scala | 87 +++- .../plans/physical/partitioning.scala | 134 +++++ .../catalog/CatalogV2Implicits.scala | 4 + .../datasources/v2/DataSourceV2Relation.scala | 7 +- .../apache/spark/sql/internal/SQLConf.scala | 11 + .../connector/catalog/InMemoryCatalog.scala | 4 + .../sql/connector/catalog/InMemoryTable.scala | 18 +- .../scala/org/apache/spark/sql/Dataset.scala | 3 +- .../spark/sql/execution/SparkOptimizer.scala | 9 +- .../datasources/v2/BatchScanExec.scala | 44 +- .../datasources/v2/ContinuousScanExec.scala | 8 +- .../v2/DataSourcePartitioning.scala | 56 --- .../datasources/v2/DataSourceRDD.scala | 73 ++- .../v2/DataSourceV2ScanExecBase.scala | 79 ++- .../datasources/v2/DataSourceV2Strategy.scala | 9 +- .../v2/DistributionAndOrderingUtils.scala | 21 +- .../datasources/v2/MicroBatchScanExec.scala | 7 +- .../datasources/v2/V2ScanPartitioning.scala | 51 ++ .../dynamicpruning/PartitionPruning.scala | 2 +- .../exchange/EnsureRequirements.scala | 41 +- .../JavaPartitionAwareDataSource.java | 35 +- .../spark/sql/FileBasedDataSourceSuite.scala | 4 +- .../sql/connector/DataSourceV2Suite.scala | 81 ++- .../DistributionAndOrderingSuiteBase.scala | 103 ++++ .../KeyGroupedPartitioningSuite.scala | 475 ++++++++++++++++++ .../WriteDistributionAndOrderingSuite.scala | 43 +- .../functions/transformFunctions.scala | 78 +++ .../datasources/PrunePartitionSuiteBase.scala | 2 +- .../datasources/orc/OrcFilterSuite.scala | 2 +- .../execution/datasources/orc/OrcTest.scala | 3 +- .../orc/OrcV2SchemaPruningSuite.scala | 2 +- .../parquet/ParquetFilterSuite.scala | 2 +- .../exchange/EnsureRequirementsSuite.scala | 310 +++++++++++- 45 files changed, 1839 insertions(+), 338 deletions(-) delete mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Distribution.java create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/KeyGroupedPartitioning.java rename sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/{ClusteredDistribution.java => UnknownPartitioning.java} (61%) create mode 100644 sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/util/InternalRowSet.scala create mode 100644 sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/util/InternalRowSet.scala create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TransformExpression.scala delete mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourcePartitioning.scala create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioning.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/connector/DistributionAndOrderingSuiteBase.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/connector/catalog/functions/transformFunctions.scala diff --git a/core/src/main/scala/org/apache/spark/util/collection/Utils.scala b/core/src/main/scala/org/apache/spark/util/collection/Utils.scala index 4939b600dbfbd..8b543f1642a05 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/Utils.scala @@ -36,4 +36,13 @@ private[spark] object Utils { } ordering.leastOf(input.asJava, num).iterator.asScala } + + /** + * Only returns `Some` iff ALL elements in `input` are defined. In this case, it is + * equivalent to `Some(input.flatten)`. + * + * Otherwise, returns `None`. + */ + def sequenceToOption[T](input: Seq[Option[T]]): Option[Seq[T]] = + if (input.forall(_.isDefined)) Some(input.flatten) else None } diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala index 890b615b18c5f..08c61381c5780 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala @@ -59,7 +59,7 @@ class AvroRowReaderSuite val df = spark.read.format("avro").load(dir.getCanonicalPath) val fileScan = df.queryExecution.executedPlan collectFirst { - case BatchScanExec(_, f: AvroScan, _) => f + case BatchScanExec(_, f: AvroScan, _, _) => f } val filePath = fileScan.get.fileIndex.inputFiles(0) val fileSize = new File(new URI(filePath)).length diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala index a70fbc0d833e8..e93c1c09c9fc2 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala @@ -2335,7 +2335,7 @@ class AvroV2Suite extends AvroSuite with ExplainSuiteHelper { }) val fileScan = df.queryExecution.executedPlan collectFirst { - case BatchScanExec(_, f: AvroScan, _) => f + case BatchScanExec(_, f: AvroScan, _, _) => f } assert(fileScan.nonEmpty) assert(fileScan.get.partitionFilters.nonEmpty) @@ -2368,7 +2368,7 @@ class AvroV2Suite extends AvroSuite with ExplainSuiteHelper { assert(filterCondition.isDefined) val fileScan = df.queryExecution.executedPlan collectFirst { - case BatchScanExec(_, f: AvroScan, _) => f + case BatchScanExec(_, f: AvroScan, _, _) => f } assert(fileScan.nonEmpty) assert(fileScan.get.partitionFilters.isEmpty) @@ -2449,7 +2449,7 @@ class AvroV2Suite extends AvroSuite with ExplainSuiteHelper { .where("value = 'a'") val fileScan = df.queryExecution.executedPlan collectFirst { - case BatchScanExec(_, f: AvroScan, _) => f + case BatchScanExec(_, f: AvroScan, _, _) => f } assert(fileScan.nonEmpty) if (filtersPushdown) { diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala index ebd5b844cbc9b..9f93fbf96d2bd 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala @@ -372,7 +372,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu private def checkAggregatePushed(df: DataFrame, funcName: String): Unit = { df.queryExecution.optimizedPlan.collect { - case DataSourceV2ScanRelation(_, scan, _) => + case DataSourceV2ScanRelation(_, scan, _, _) => assert(scan.isInstanceOf[V1ScanWrapper]) val wrapper = scan.asInstanceOf[V1ScanWrapper] assert(wrapper.pushedDownOperators.aggregation.isDefined) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 8dc4fab1ab76e..d832d68c999aa 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -58,7 +58,13 @@ object MimaExcludes { // [SPARK-37600][BUILD] Upgrade to Hadoop 3.3.2 ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4Compressor"), ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4Factory"), - ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4SafeDecompressor") + ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4SafeDecompressor"), + + // [SPARK-37377][SQL] Initial implementation of Storage-Partitioned Join + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.connector.read.partitioning.ClusteredDistribution"), + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.connector.read.partitioning.Distribution"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.connector.read.partitioning.Partitioning.*"), + ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.connector.read.partitioning.Partitioning.*") ) // Exclude rules for 3.2.x from 3.1.1 diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Distribution.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Distribution.java deleted file mode 100644 index a5911a820ac10..0000000000000 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Distribution.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.connector.read.partitioning; - -import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.read.PartitionReader; - -/** - * An interface to represent data distribution requirement, which specifies how the records should - * be distributed among the data partitions (one {@link PartitionReader} outputs data for one - * partition). - *

        - * Note that this interface has nothing to do with the data ordering inside one - * partition(the output records of a single {@link PartitionReader}). - *

        - * The instance of this interface is created and provided by Spark, then consumed by - * {@link Partitioning#satisfy(Distribution)}. This means data source developers don't need to - * implement this interface, but need to catch as more concrete implementations of this interface - * as possible in {@link Partitioning#satisfy(Distribution)}. - *

        - * Concrete implementations until now: - *

          - *
        • {@link ClusteredDistribution}
        • - *
        - * - * @since 3.0.0 - */ -@Evolving -public interface Distribution {} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/KeyGroupedPartitioning.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/KeyGroupedPartitioning.java new file mode 100644 index 0000000000000..552d92ad0e8b8 --- /dev/null +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/KeyGroupedPartitioning.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.read.partitioning; + +import org.apache.spark.annotation.Evolving; +import org.apache.spark.sql.connector.expressions.Expression; + +/** + * Represents a partitioning where rows are split across partitions based on the + * partition transform expressions returned by {@link KeyGroupedPartitioning#keys}. + *

        + * Note: Data source implementations should make sure for a single partition, all of its rows + * must be evaluated to the same partition value after being applied by + * {@link KeyGroupedPartitioning#keys} expressions. Different partitions can share the same + * partition value: Spark will group these into a single logical partition during planning phase. + * + * @since 3.3.0 + */ +@Evolving +public class KeyGroupedPartitioning implements Partitioning { + private final Expression[] keys; + private final int numPartitions; + + public KeyGroupedPartitioning(Expression[] keys, int numPartitions) { + this.keys = keys; + this.numPartitions = numPartitions; + } + + /** + * Returns the partition transform expressions for this partitioning. + */ + public Expression[] keys() { + return keys; + } + + @Override + public int numPartitions() { + return numPartitions; + } +} diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Partitioning.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Partitioning.java index 7befab4ec5365..09f05d84e7ffb 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Partitioning.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/Partitioning.java @@ -18,33 +18,25 @@ package org.apache.spark.sql.connector.read.partitioning; import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.read.InputPartition; import org.apache.spark.sql.connector.read.SupportsReportPartitioning; /** * An interface to represent the output data partitioning for a data source, which is returned by - * {@link SupportsReportPartitioning#outputPartitioning()}. Note that this should work - * like a snapshot. Once created, it should be deterministic and always report the same number of - * partitions and the same "satisfy" result for a certain distribution. + * {@link SupportsReportPartitioning#outputPartitioning()}. + *

        + * Note: implementors should NOT directly implement this interface. Instead, they should + * use one of the following subclasses: + *

          + *
        • {@link KeyGroupedPartitioning}
        • + *
        • {@link UnknownPartitioning}
        • + *
        * * @since 3.0.0 */ @Evolving public interface Partitioning { - /** - * Returns the number of partitions(i.e., {@link InputPartition}s) the data source outputs. + * Returns the number of partitions that the data is split across. */ int numPartitions(); - - /** - * Returns true if this partitioning can satisfy the given distribution, which means Spark does - * not need to shuffle the output data of this data source for some certain operations. - *

        - * Note that, Spark may add new concrete implementations of {@link Distribution} in new releases. - * This method should be aware of it and always return false for unrecognized distributions. It's - * recommended to check every Spark new release and support new distributions if possible, to - * avoid shuffle at Spark side for more cases. - */ - boolean satisfy(Distribution distribution); } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/ClusteredDistribution.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/UnknownPartitioning.java similarity index 61% rename from sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/ClusteredDistribution.java rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/UnknownPartitioning.java index ed0354484d7be..a2ae360d9a51f 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/ClusteredDistribution.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/partitioning/UnknownPartitioning.java @@ -18,24 +18,22 @@ package org.apache.spark.sql.connector.read.partitioning; import org.apache.spark.annotation.Evolving; -import org.apache.spark.sql.connector.read.PartitionReader; /** - * A concrete implementation of {@link Distribution}. Represents a distribution where records that - * share the same values for the {@link #clusteredColumns} will be produced by the same - * {@link PartitionReader}. + * Represents a partitioning where rows are split across partitions in an unknown pattern. * - * @since 3.0.0 + * @since 3.3.0 */ @Evolving -public class ClusteredDistribution implements Distribution { +public class UnknownPartitioning implements Partitioning { + private final int numPartitions; - /** - * The names of the clustered columns. Note that they are order insensitive. - */ - public final String[] clusteredColumns; + public UnknownPartitioning(int numPartitions) { + this.numPartitions = numPartitions; + } - public ClusteredDistribution(String[] clusteredColumns) { - this.clusteredColumns = clusteredColumns; + @Override + public int numPartitions() { + return numPartitions; } } diff --git a/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/util/InternalRowSet.scala b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/util/InternalRowSet.scala new file mode 100644 index 0000000000000..9e8ec042694d0 --- /dev/null +++ b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/util/InternalRowSet.scala @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.util + +import scala.collection.mutable + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Murmur3HashFunction, RowOrdering} +import org.apache.spark.sql.types.{DataType, StructField, StructType} + +/** + * A mutable Set with [[InternalRow]] as its element type. It uses Spark's internal murmur hash to + * compute hash code from an row, and uses [[RowOrdering]] to perform equality checks. + * + * @param dataTypes the data types for the row keys this set holds + */ +class InternalRowSet(val dataTypes: Seq[DataType]) extends mutable.Set[InternalRow] { + private val baseSet = new mutable.HashSet[InternalRowContainer] + + private val structType = StructType(dataTypes.map(t => StructField("f", t))) + private val ordering = RowOrdering.createNaturalAscendingOrdering(dataTypes) + + override def contains(row: InternalRow): Boolean = + baseSet.contains(new InternalRowContainer(row)) + + private class InternalRowContainer(val row: InternalRow) { + override def hashCode(): Int = Murmur3HashFunction.hash(row, structType, 42L).toInt + + override def equals(other: Any): Boolean = other match { + case r: InternalRowContainer => ordering.compare(row, r.row) == 0 + case r => this == r + } + } + + override def +=(row: InternalRow): InternalRowSet.this.type = { + val rowKey = new InternalRowContainer(row) + baseSet += rowKey + this + } + + override def -=(row: InternalRow): InternalRowSet.this.type = { + val rowKey = new InternalRowContainer(row) + baseSet -= rowKey + this + } + + override def iterator: Iterator[InternalRow] = { + baseSet.iterator.map(_.row) + } +} diff --git a/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/util/InternalRowSet.scala b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/util/InternalRowSet.scala new file mode 100644 index 0000000000000..66090fdf1872f --- /dev/null +++ b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/util/InternalRowSet.scala @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.util + +import scala.collection.mutable + +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Murmur3HashFunction, RowOrdering} +import org.apache.spark.sql.types.{DataType, StructField, StructType} + +/** + * A mutable Set with [[InternalRow]] as its element type. It uses Spark's internal murmur hash to + * compute hash code from an row, and uses [[RowOrdering]] to perform equality checks. + * + * @param dataTypes the data types for the row keys this set holds + */ +class InternalRowSet(val dataTypes: Seq[DataType]) extends mutable.Set[InternalRow] { + private val baseSet = new mutable.HashSet[InternalRowContainer] + + private val structType = StructType(dataTypes.map(t => StructField("f", t))) + private val ordering = RowOrdering.createNaturalAscendingOrdering(dataTypes) + + override def contains(row: InternalRow): Boolean = + baseSet.contains(new InternalRowContainer(row)) + + private class InternalRowContainer(val row: InternalRow) { + override def hashCode(): Int = Murmur3HashFunction.hash(row, structType, 42L).toInt + + override def equals(other: Any): Boolean = other match { + case r: InternalRowContainer => ordering.compare(row, r.row) == 0 + case r => this == r + } + } + + override def addOne(row: InternalRow): InternalRowSet.this.type = { + val rowKey = new InternalRowContainer(row) + baseSet += rowKey + this + } + + override def subtractOne(row: InternalRow): InternalRowSet.this.type = { + val rowKey = new InternalRowContainer(row) + baseSet -= rowKey + this + } + + override def clear(): Unit = { + baseSet.clear() + } + + override def iterator: Iterator[InternalRow] = { + baseSet.iterator.map(_.row) + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TransformExpression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TransformExpression.scala new file mode 100644 index 0000000000000..8412de554b711 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TransformExpression.scala @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import org.apache.spark.sql.connector.catalog.functions.BoundFunction +import org.apache.spark.sql.types.DataType + +/** + * Represents a partition transform expression, for instance, `bucket`, `days`, `years`, etc. + * + * @param function the transform function itself. Spark will use it to decide whether two + * partition transform expressions are compatible. + * @param numBucketsOpt the number of buckets if the transform is `bucket`. Unset otherwise. + */ +case class TransformExpression( + function: BoundFunction, + children: Seq[Expression], + numBucketsOpt: Option[Int] = None) extends Expression with Unevaluable { + + override def nullable: Boolean = true + + /** + * Whether this [[TransformExpression]] has the same semantics as `other`. + * For instance, `bucket(32, c)` is equal to `bucket(32, d)`, but not to `bucket(16, d)` or + * `year(c)`. + * + * This will be used, for instance, by Spark to determine whether storage-partitioned join can + * be triggered, by comparing partition transforms from both sides of the join and checking + * whether they are compatible. + * + * @param other the transform expression to compare to + * @return true if this and `other` has the same semantics w.r.t to transform, false otherwise. + */ + def isSameFunction(other: TransformExpression): Boolean = other match { + case TransformExpression(otherFunction, _, otherNumBucketsOpt) => + function.canonicalName() == otherFunction.canonicalName() && + numBucketsOpt == otherNumBucketsOpt + case _ => + false + } + + override def dataType: DataType = function.resultType() + + override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = + copy(children = newChildren) +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala index 72d072ff1a4a4..596d5d8b565df 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala @@ -17,16 +17,22 @@ package org.apache.spark.sql.catalyst.expressions +import org.apache.spark.internal.Logging import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.SQLConfHelper +import org.apache.spark.sql.catalyst.analysis.NoSuchFunctionException import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.connector.expressions.{Expression => V2Expression, FieldReference, IdentityTransform, NamedReference, NullOrdering => V2NullOrdering, SortDirection => V2SortDirection, SortValue} +import org.apache.spark.sql.connector.catalog.{FunctionCatalog, Identifier} +import org.apache.spark.sql.connector.catalog.functions._ +import org.apache.spark.sql.connector.expressions.{BucketTransform, Expression => V2Expression, FieldReference, IdentityTransform, NamedReference, NamedTransform, NullOrdering => V2NullOrdering, SortDirection => V2SortDirection, SortOrder => V2SortOrder, SortValue, Transform} import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.types._ +import org.apache.spark.util.collection.Utils.sequenceToOption /** * A utility class that converts public connector expressions into Catalyst expressions. */ -object V2ExpressionUtils extends SQLConfHelper { +object V2ExpressionUtils extends SQLConfHelper with Logging { import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper def resolveRef[T <: NamedExpression](ref: NamedReference, plan: LogicalPlan): T = { @@ -44,20 +50,85 @@ object V2ExpressionUtils extends SQLConfHelper { refs.map(ref => resolveRef[T](ref, plan)) } - def toCatalyst(expr: V2Expression, query: LogicalPlan): Expression = { + /** + * Converts the array of input V2 [[V2SortOrder]] into their counterparts in catalyst. + */ + def toCatalystOrdering(ordering: Array[V2SortOrder], query: LogicalPlan): Seq[SortOrder] = { + sequenceToOption(ordering.map(toCatalyst(_, query))).asInstanceOf[Option[Seq[SortOrder]]] + .getOrElse(Seq.empty) + } + + def toCatalyst( + expr: V2Expression, + query: LogicalPlan, + funCatalogOpt: Option[FunctionCatalog] = None): Option[Expression] = { expr match { + case t: Transform => + toCatalystTransform(t, query, funCatalogOpt) case SortValue(child, direction, nullOrdering) => - val catalystChild = toCatalyst(child, query) - SortOrder(catalystChild, toCatalyst(direction), toCatalyst(nullOrdering), Seq.empty) - case IdentityTransform(ref) => - resolveRef[NamedExpression](ref, query) + toCatalyst(child, query, funCatalogOpt).map { catalystChild => + SortOrder(catalystChild, toCatalyst(direction), toCatalyst(nullOrdering), Seq.empty) + } case ref: FieldReference => - resolveRef[NamedExpression](ref, query) + Some(resolveRef[NamedExpression](ref, query)) case _ => throw new AnalysisException(s"$expr is not currently supported") } } + def toCatalystTransform( + trans: Transform, + query: LogicalPlan, + funCatalogOpt: Option[FunctionCatalog] = None): Option[Expression] = trans match { + case IdentityTransform(ref) => + Some(resolveRef[NamedExpression](ref, query)) + case BucketTransform(numBuckets, refs, sorted) + if sorted.isEmpty && refs.length == 1 && refs.forall(_.isInstanceOf[NamedReference]) => + val resolvedRefs = refs.map(r => resolveRef[NamedExpression](r, query)) + // Create a dummy reference for `numBuckets` here and use that, together with `refs`, to + // look up the V2 function. + val numBucketsRef = AttributeReference("numBuckets", IntegerType, nullable = false)() + funCatalogOpt.flatMap { catalog => + loadV2Function(catalog, "bucket", Seq(numBucketsRef) ++ resolvedRefs).map { bound => + TransformExpression(bound, resolvedRefs, Some(numBuckets)) + } + } + case NamedTransform(name, refs) + if refs.length == 1 && refs.forall(_.isInstanceOf[NamedReference]) => + val resolvedRefs = refs.map(_.asInstanceOf[NamedReference]).map { r => + resolveRef[NamedExpression](r, query) + } + funCatalogOpt.flatMap { catalog => + loadV2Function(catalog, name, resolvedRefs).map { bound => + TransformExpression(bound, resolvedRefs) + } + } + case _ => + throw new AnalysisException(s"Transform $trans is not currently supported") + } + + private def loadV2Function( + catalog: FunctionCatalog, + name: String, + args: Seq[Expression]): Option[BoundFunction] = { + val inputType = StructType(args.zipWithIndex.map { + case (exp, pos) => StructField(s"_$pos", exp.dataType, exp.nullable) + }) + try { + val unbound = catalog.loadFunction(Identifier.of(Array.empty, name)) + Some(unbound.bind(inputType)) + } catch { + case _: NoSuchFunctionException => + val parameterString = args.map(_.dataType.typeName).mkString("(", ", ", ")") + logWarning(s"V2 function $name with parameter types $parameterString is used in " + + "partition transforms, but its definition couldn't be found in the function catalog " + + "provided") + None + case _: UnsupportedOperationException => + None + } + } + private def toCatalyst(direction: V2SortDirection): SortDirection = direction match { case V2SortDirection.ASCENDING => Ascending case V2SortDirection.DESCENDING => Descending diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala index e4ff14b17a20c..69eeab426ed01 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.plans.physical import scala.collection.mutable +import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{DataType, IntegerType} @@ -305,6 +306,63 @@ case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int) newChildren: IndexedSeq[Expression]): HashPartitioning = copy(expressions = newChildren) } +/** + * Represents a partitioning where rows are split across partitions based on transforms defined + * by `expressions`. `partitionValuesOpt`, if defined, should contain value of partition key(s) in + * ascending order, after evaluated by the transforms in `expressions`, for each input partition. + * In addition, its length must be the same as the number of input partitions (and thus is a 1-1 + * mapping), and each row in `partitionValuesOpt` must be unique. + * + * For example, if `expressions` is `[years(ts_col)]`, then a valid value of `partitionValuesOpt` is + * `[0, 1, 2]`, which represents 3 input partitions with distinct partition values. All rows + * in each partition have the same value for column `ts_col` (which is of timestamp type), after + * being applied by the `years` transform. + * + * On the other hand, `[0, 0, 1]` is not a valid value for `partitionValuesOpt` since `0` is + * duplicated twice. + * + * @param expressions partition expressions for the partitioning. + * @param numPartitions the number of partitions + * @param partitionValuesOpt if set, the values for the cluster keys of the distribution, must be + * in ascending order. + */ +case class KeyGroupedPartitioning( + expressions: Seq[Expression], + numPartitions: Int, + partitionValuesOpt: Option[Seq[InternalRow]] = None) extends Partitioning { + + override def satisfies0(required: Distribution): Boolean = { + super.satisfies0(required) || { + required match { + case c @ ClusteredDistribution(requiredClustering, requireAllClusterKeys, _) => + if (requireAllClusterKeys) { + // Checks whether this partitioning is partitioned on exactly same clustering keys of + // `ClusteredDistribution`. + c.areAllClusterKeysMatched(expressions) + } else { + // We'll need to find leaf attributes from the partition expressions first. + val attributes = expressions.flatMap(_.collectLeaves()) + attributes.forall(x => requiredClustering.exists(_.semanticEquals(x))) + } + + case _ => + false + } + } + } + + override def createShuffleSpec(distribution: ClusteredDistribution): ShuffleSpec = + KeyGroupedShuffleSpec(this, distribution) +} + +object KeyGroupedPartitioning { + def apply( + expressions: Seq[Expression], + partitionValues: Seq[InternalRow]): KeyGroupedPartitioning = { + KeyGroupedPartitioning(expressions, partitionValues.size, Some(partitionValues)) + } +} + /** * Represents a partitioning where rows are split across partitions based on some total ordering of * the expressions specified in `ordering`. When data is partitioned in this manner, it guarantees: @@ -456,6 +514,8 @@ trait ShuffleSpec { * A true return value means that the data partitioning from this spec can be seen as * co-partitioned with the `other`, and therefore no shuffle is required when joining the two * sides. + * + * Note that Spark assumes this to be reflexive, symmetric and transitive. */ def isCompatibleWith(other: ShuffleSpec): Boolean @@ -574,6 +634,80 @@ case class HashShuffleSpec( override def numPartitions: Int = partitioning.numPartitions } +case class KeyGroupedShuffleSpec( + partitioning: KeyGroupedPartitioning, + distribution: ClusteredDistribution) extends ShuffleSpec { + + /** + * A sequence where each element is a set of positions of the partition expression to the cluster + * keys. For instance, if cluster keys are [a, b, b] and partition expressions are + * [bucket(4, a), years(b)], the result will be [(0), (1, 2)]. + * + * Note that we only allow each partition expression to contain a single partition key. + * Therefore the mapping here is very similar to that from `HashShuffleSpec`. + */ + lazy val keyPositions: Seq[mutable.BitSet] = { + val distKeyToPos = mutable.Map.empty[Expression, mutable.BitSet] + distribution.clustering.zipWithIndex.foreach { case (distKey, distKeyPos) => + distKeyToPos.getOrElseUpdate(distKey.canonicalized, mutable.BitSet.empty).add(distKeyPos) + } + partitioning.expressions.map { e => + val leaves = e.collectLeaves() + assert(leaves.size == 1, s"Expected exactly one child from $e, but found ${leaves.size}") + distKeyToPos.getOrElse(leaves.head.canonicalized, mutable.BitSet.empty) + } + } + + private lazy val ordering: Ordering[InternalRow] = + RowOrdering.createNaturalAscendingOrdering(partitioning.expressions.map(_.dataType)) + + override def numPartitions: Int = partitioning.numPartitions + + override def isCompatibleWith(other: ShuffleSpec): Boolean = other match { + // Here we check: + // 1. both distributions have the same number of clustering keys + // 2. both partitioning have the same number of partitions + // 3. partition expressions from both sides are compatible, which means: + // 3.1 both sides have the same number of partition expressions + // 3.2 for each pair of partition expressions at the same index, the corresponding + // partition keys must share overlapping positions in their respective clustering keys. + // 3.3 each pair of partition expressions at the same index must share compatible + // transform functions. + // 4. the partition values, if present on both sides, are following the same order. + case otherSpec @ KeyGroupedShuffleSpec(otherPartitioning, otherDistribution) => + val expressions = partitioning.expressions + val otherExpressions = otherPartitioning.expressions + + distribution.clustering.length == otherDistribution.clustering.length && + numPartitions == other.numPartitions && + expressions.length == otherExpressions.length && { + val otherKeyPositions = otherSpec.keyPositions + keyPositions.zip(otherKeyPositions).forall { case (left, right) => + left.intersect(right).nonEmpty + } + } && expressions.zip(otherExpressions).forall { + case (l, r) => isExpressionCompatible(l, r) + } && partitioning.partitionValuesOpt.zip(otherPartitioning.partitionValuesOpt).forall { + case (left, right) => left.zip(right).forall { case (l, r) => + ordering.compare(l, r) == 0 + } + } + case ShuffleSpecCollection(specs) => + specs.exists(isCompatibleWith) + case _ => false + } + + private def isExpressionCompatible(left: Expression, right: Expression): Boolean = + (left, right) match { + case (_: LeafExpression, _: LeafExpression) => true + case (left: TransformExpression, right: TransformExpression) => + left.isSameFunction(right) + case _ => false + } + + override def canCreatePartitioning: Boolean = false +} + case class ShuffleSpecCollection(specs: Seq[ShuffleSpec]) extends ShuffleSpec { override def isCompatibleWith(other: ShuffleSpec): Boolean = { specs.exists(_.isCompatibleWith(other)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala index 04af7eda6aaa9..91809b6176c8a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala @@ -188,4 +188,8 @@ private[sql] object CatalogV2Implicits { def parseColumnPath(name: String): Seq[String] = { CatalystSqlParser.parseMultipartIdentifier(name) } + + def parseFunctionName(name: String): Seq[String] = { + CatalystSqlParser.parseMultipartIdentifier(name) + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala index d93864991fc3d..6b0760ca1637b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelation} -import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} import org.apache.spark.sql.catalyst.plans.logical.{ExposesMetadataColumns, LeafNode, LogicalPlan, Statistics} import org.apache.spark.sql.catalyst.util.{truncatedString, CharVarcharUtils} import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, MetadataColumn, SupportsMetadataColumns, Table, TableCapability} @@ -113,11 +113,14 @@ case class DataSourceV2Relation( * @param relation a [[DataSourceV2Relation]] * @param scan a DSv2 [[Scan]] * @param output the output attributes of this relation + * @param keyGroupedPartitioning if set, the partitioning expressions that are used to split the + * rows in the scan across different partitions */ case class DataSourceV2ScanRelation( relation: DataSourceV2Relation, scan: Scan, - output: Seq[AttributeReference]) extends LeafNode with NamedRelation { + output: Seq[AttributeReference], + keyGroupedPartitioning: Option[Seq[Expression]] = None) extends LeafNode with NamedRelation { override def name: String = relation.table.name() diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 56849b8718e19..223017d17304d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1334,6 +1334,15 @@ object SQLConf { .booleanConf .createWithDefault(true) + val V2_BUCKETING_ENABLED = buildConf("spark.sql.sources.v2.bucketing.enabled") + .doc(s"Similar to ${BUCKETING_ENABLED.key}, this config is used to enable bucketing for V2 " + + "data sources. When turned on, Spark will recognize the specific distribution " + + "reported by a V2 data source through SupportsReportPartitioning, and will try to " + + "avoid shuffle if necessary.") + .version("3.3.0") + .booleanConf + .createWithDefault(false) + val BUCKETING_MAX_BUCKETS = buildConf("spark.sql.sources.bucketing.maxBuckets") .doc("The maximum number of buckets allowed.") .version("2.4.0") @@ -4147,6 +4156,8 @@ class SQLConf extends Serializable with Logging { def autoBucketedScanEnabled: Boolean = getConf(SQLConf.AUTO_BUCKETED_SCAN_ENABLED) + def v2BucketingEnabled: Boolean = getConf(SQLConf.V2_BUCKETING_ENABLED) + def dataFrameSelfJoinAutoResolveAmbiguity: Boolean = getConf(DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryCatalog.scala index 202b03f28f082..be3baf9252006 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryCatalog.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryCatalog.scala @@ -55,4 +55,8 @@ class InMemoryCatalog extends InMemoryTableCatalog with FunctionCatalog { def createFunction(ident: Identifier, fn: UnboundFunction): UnboundFunction = { functions.put(ident, fn) } + + def clearFunctions(): Unit = { + functions.clear() + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala index 5d72b2060bfd8..a762b0f87839f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala @@ -29,10 +29,11 @@ import org.scalatest.Assertions._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, JoinedRow} import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils} -import org.apache.spark.sql.connector.distributions.{Distribution, Distributions} +import org.apache.spark.sql.connector.distributions.{ClusteredDistribution, Distribution, Distributions} import org.apache.spark.sql.connector.expressions._ import org.apache.spark.sql.connector.metric.{CustomMetric, CustomTaskMetric} import org.apache.spark.sql.connector.read._ +import org.apache.spark.sql.connector.read.partitioning.{KeyGroupedPartitioning, Partitioning, UnknownPartitioning} import org.apache.spark.sql.connector.write._ import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite} import org.apache.spark.sql.internal.connector.SupportsStreamingUpdateAsAppend @@ -260,7 +261,8 @@ class InMemoryTable( var data: Seq[InputPartition], readSchema: StructType, tableSchema: StructType) - extends Scan with Batch with SupportsRuntimeFiltering with SupportsReportStatistics { + extends Scan with Batch with SupportsRuntimeFiltering with SupportsReportStatistics + with SupportsReportPartitioning { override def toBatch: Batch = this @@ -278,6 +280,13 @@ class InMemoryTable( InMemoryStats(OptionalLong.of(sizeInBytes), OptionalLong.of(numRows)) } + override def outputPartitioning(): Partitioning = { + InMemoryTable.this.distribution match { + case cd: ClusteredDistribution => new KeyGroupedPartitioning(cd.clustering(), data.size) + case _ => new UnknownPartitioning(data.size) + } + } + override def planInputPartitions(): Array[InputPartition] = data.toArray override def createReaderFactory(): PartitionReaderFactory = { @@ -293,9 +302,10 @@ class InMemoryTable( } override def filter(filters: Array[Filter]): Unit = { - if (partitioning.length == 1) { + if (partitioning.length == 1 && partitioning.head.references().length == 1) { + val ref = partitioning.head.references().head filters.foreach { - case In(attrName, values) if attrName == partitioning.head.name => + case In(attrName, values) if attrName == ref.toString => val matchingKeys = values.map(_.toString).toSet data = data.filter(partition => { val key = partition.asInstanceOf[BufferedRows].keyString diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 62dea96614a5f..564eed1ecfda2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -3619,7 +3619,8 @@ class Dataset[T] private[sql]( fr.inputFiles case r: HiveTableRelation => r.tableMeta.storage.locationUri.map(_.toString).toArray - case DataSourceV2ScanRelation(DataSourceV2Relation(table: FileTable, _, _, _, _), _, _) => + case DataSourceV2ScanRelation(DataSourceV2Relation(table: FileTable, _, _, _, _), + _, _, _) => table.fileIndex.inputFiles }.flatten files.toSet.toArray diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala index dcb02ab8556ec..bfe4bd2924118 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions import org.apache.spark.sql.execution.datasources.SchemaPruning -import org.apache.spark.sql.execution.datasources.v2.{V2ScanRelationPushDown, V2Writes} +import org.apache.spark.sql.execution.datasources.v2.{V2ScanPartitioning, V2ScanRelationPushDown, V2Writes} import org.apache.spark.sql.execution.dynamicpruning.{CleanupDynamicPruningFilters, PartitionPruning} import org.apache.spark.sql.execution.python.{ExtractGroupingPythonUDFFromAggregate, ExtractPythonUDFFromAggregate, ExtractPythonUDFs} @@ -37,7 +37,11 @@ class SparkOptimizer( override def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] = // TODO: move SchemaPruning into catalyst - SchemaPruning :: V2ScanRelationPushDown :: V2Writes :: PruneFileSourcePartitions :: Nil + Seq(SchemaPruning) :+ + V2ScanRelationPushDown :+ + V2ScanPartitioning :+ + V2Writes :+ + PruneFileSourcePartitions override def defaultBatches: Seq[Batch] = (preOptimizationBatches ++ super.defaultBatches :+ Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog)) :+ @@ -75,6 +79,7 @@ class SparkOptimizer( ExtractPythonUDFFromAggregate.ruleName :+ ExtractGroupingPythonUDFFromAggregate.ruleName :+ ExtractPythonUDFs.ruleName :+ V2ScanRelationPushDown.ruleName :+ + V2ScanPartitioning.ruleName :+ V2Writes.ruleName /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala index 937d18d9eb76f..0b813d52ceed1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala @@ -24,9 +24,10 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.QueryPlan -import org.apache.spark.sql.catalyst.plans.physical.SinglePartition +import org.apache.spark.sql.catalyst.plans.physical.{KeyGroupedPartitioning, SinglePartition} +import org.apache.spark.sql.catalyst.util.InternalRowSet import org.apache.spark.sql.catalyst.util.truncatedString -import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory, Scan, SupportsRuntimeFiltering} +import org.apache.spark.sql.connector.read.{HasPartitionKey, InputPartition, PartitionReaderFactory, Scan, SupportsRuntimeFiltering} import org.apache.spark.sql.execution.datasources.DataSourceStrategy /** @@ -35,7 +36,8 @@ import org.apache.spark.sql.execution.datasources.DataSourceStrategy case class BatchScanExec( output: Seq[AttributeReference], @transient scan: Scan, - runtimeFilters: Seq[Expression]) extends DataSourceV2ScanExecBase { + runtimeFilters: Seq[Expression], + keyGroupedPartitioning: Option[Seq[Expression]] = None) extends DataSourceV2ScanExecBase { @transient lazy val batch = scan.toBatch @@ -49,9 +51,9 @@ case class BatchScanExec( override def hashCode(): Int = Objects.hashCode(batch, runtimeFilters) - @transient override lazy val partitions: Seq[InputPartition] = batch.planInputPartitions() + @transient override lazy val inputPartitions: Seq[InputPartition] = batch.planInputPartitions() - @transient private lazy val filteredPartitions: Seq[InputPartition] = { + @transient private lazy val filteredPartitions: Seq[Seq[InputPartition]] = { val dataSourceFilters = runtimeFilters.flatMap { case DynamicPruningExpression(e) => DataSourceStrategy.translateRuntimeFilter(e) case _ => None @@ -68,16 +70,36 @@ case class BatchScanExec( val newPartitions = scan.toBatch.planInputPartitions() originalPartitioning match { - case p: DataSourcePartitioning if p.numPartitions != newPartitions.size => - throw new SparkException( - "Data source must have preserved the original partitioning during runtime filtering; " + - s"reported num partitions: ${p.numPartitions}, " + - s"num partitions after runtime filtering: ${newPartitions.size}") + case p: KeyGroupedPartitioning => + if (newPartitions.exists(!_.isInstanceOf[HasPartitionKey])) { + throw new SparkException("Data source must have preserved the original partitioning " + + "during runtime filtering: not all partitions implement HasPartitionKey after " + + "filtering") + } + + val newRows = new InternalRowSet(p.expressions.map(_.dataType)) + newRows ++= newPartitions.map(_.asInstanceOf[HasPartitionKey].partitionKey()) + val oldRows = p.partitionValuesOpt.get + + if (oldRows.size != newRows.size) { + throw new SparkException("Data source must have preserved the original partitioning " + + "during runtime filtering: the number of unique partition values obtained " + + s"through HasPartitionKey changed: before ${oldRows.size}, after ${newRows.size}") + } + + if (!oldRows.forall(newRows.contains)) { + throw new SparkException("Data source must have preserved the original partitioning " + + "during runtime filtering: the number of unique partition values obtained " + + s"through HasPartitionKey remain the same but do not exactly match") + } + + groupPartitions(newPartitions).get.map(_._2) + case _ => // no validation is needed as the data source did not report any specific partitioning + newPartitions.map(Seq(_)) } - newPartitions } else { partitions } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala index b19db8b0e5110..5f973e10b80f1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala @@ -31,7 +31,8 @@ case class ContinuousScanExec( output: Seq[Attribute], @transient scan: Scan, @transient stream: ContinuousStream, - @transient start: Offset) extends DataSourceV2ScanExecBase { + @transient start: Offset, + keyGroupedPartitioning: Option[Seq[Expression]] = None) extends DataSourceV2ScanExecBase { // TODO: unify the equal/hashCode implementation for all data source v2 query plans. override def equals(other: Any): Boolean = other match { @@ -41,13 +42,14 @@ case class ContinuousScanExec( override def hashCode(): Int = stream.hashCode() - override lazy val partitions: Seq[InputPartition] = stream.planInputPartitions(start) + override lazy val inputPartitions: Seq[InputPartition] = stream.planInputPartitions(start) override lazy val readerFactory: ContinuousPartitionReaderFactory = { stream.createContinuousReaderFactory() } override lazy val inputRDD: RDD[InternalRow] = { + assert(partitions.forall(_.length == 1), "should only contain a single partition") EpochCoordinatorRef.get( sparkContext.getLocalProperty(ContinuousExecution.EPOCH_COORDINATOR_ID_KEY), sparkContext.env) @@ -56,7 +58,7 @@ case class ContinuousScanExec( sparkContext, conf.continuousStreamingExecutorQueueSize, conf.continuousStreamingExecutorPollIntervalMs, - partitions, + partitions.map(_.head), schema, readerFactory, customMetrics) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourcePartitioning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourcePartitioning.scala deleted file mode 100644 index 9211ec25525fa..0000000000000 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourcePartitioning.scala +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.datasources.v2 - -import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, Expression} -import org.apache.spark.sql.catalyst.plans.physical -import org.apache.spark.sql.connector.read.partitioning.{ClusteredDistribution, Partitioning} - -/** - * An adapter from public data source partitioning to catalyst internal `Partitioning`. - */ -class DataSourcePartitioning( - partitioning: Partitioning, - colNames: AttributeMap[String]) extends physical.Partitioning { - - override val numPartitions: Int = partitioning.numPartitions() - - override def satisfies0(required: physical.Distribution): Boolean = { - super.satisfies0(required) || { - required match { - case d: physical.ClusteredDistribution if isCandidate(d.clustering) => - val attrs = d.clustering.map(_.asInstanceOf[Attribute]) - partitioning.satisfy( - new ClusteredDistribution(attrs.map { a => - val name = colNames.get(a) - assert(name.isDefined, s"Attribute ${a.name} is not found in the data source output") - name.get - }.toArray)) - - case _ => false - } - } - } - - private def isCandidate(clustering: Seq[Expression]): Boolean = { - clustering.forall { - case a: Attribute => colNames.contains(a) - case _ => false - } - } -} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala index a1eb857c4ed41..09c8756ca0189 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala @@ -29,14 +29,14 @@ import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.execution.metric.{CustomMetrics, SQLMetric} import org.apache.spark.sql.vectorized.ColumnarBatch -class DataSourceRDDPartition(val index: Int, val inputPartition: InputPartition) +class DataSourceRDDPartition(val index: Int, val inputPartitions: Seq[InputPartition]) extends Partition with Serializable // TODO: we should have 2 RDDs: an RDD[InternalRow] for row-based scan, an `RDD[ColumnarBatch]` for // columnar scan. class DataSourceRDD( sc: SparkContext, - @transient private val inputPartitions: Seq[InputPartition], + @transient private val inputPartitions: Seq[Seq[InputPartition]], partitionReaderFactory: PartitionReaderFactory, columnarReads: Boolean, customMetrics: Map[String, SQLMetric]) @@ -44,7 +44,7 @@ class DataSourceRDD( override protected def getPartitions: Array[Partition] = { inputPartitions.zipWithIndex.map { - case (inputPartition, index) => new DataSourceRDDPartition(index, inputPartition) + case (inputPartitions, index) => new DataSourceRDDPartition(index, inputPartitions) }.toArray } @@ -54,31 +54,56 @@ class DataSourceRDD( } override def compute(split: Partition, context: TaskContext): Iterator[InternalRow] = { - val inputPartition = castPartition(split).inputPartition - val (iter, reader) = if (columnarReads) { - val batchReader = partitionReaderFactory.createColumnarReader(inputPartition) - val iter = new MetricsBatchIterator( - new PartitionIterator[ColumnarBatch](batchReader, customMetrics)) - (iter, batchReader) - } else { - val rowReader = partitionReaderFactory.createReader(inputPartition) - val iter = new MetricsRowIterator( - new PartitionIterator[InternalRow](rowReader, customMetrics)) - (iter, rowReader) - } - context.addTaskCompletionListener[Unit] { _ => - // In case of early stopping before consuming the entire iterator, - // we need to do one more metric update at the end of the task. - CustomMetrics.updateMetrics(reader.currentMetricsValues, customMetrics) - iter.forceUpdateMetrics() - reader.close() + + val iterator = new Iterator[Object] { + private val inputPartitions = castPartition(split).inputPartitions + private var currentIter: Option[Iterator[Object]] = None + private var currentIndex: Int = 0 + + override def hasNext: Boolean = currentIter.exists(_.hasNext) || advanceToNextIter() + + override def next(): Object = { + if (!hasNext) throw new NoSuchElementException("No more elements") + currentIter.get.next() + } + + private def advanceToNextIter(): Boolean = { + if (currentIndex >= inputPartitions.length) { + false + } else { + val inputPartition = inputPartitions(currentIndex) + currentIndex += 1 + + // TODO: SPARK-25083 remove the type erasure hack in data source scan + val (iter, reader) = if (columnarReads) { + val batchReader = partitionReaderFactory.createColumnarReader(inputPartition) + val iter = new MetricsBatchIterator( + new PartitionIterator[ColumnarBatch](batchReader, customMetrics)) + (iter, batchReader) + } else { + val rowReader = partitionReaderFactory.createReader(inputPartition) + val iter = new MetricsRowIterator( + new PartitionIterator[InternalRow](rowReader, customMetrics)) + (iter, rowReader) + } + context.addTaskCompletionListener[Unit] { _ => + // In case of early stopping before consuming the entire iterator, + // we need to do one more metric update at the end of the task. + CustomMetrics.updateMetrics(reader.currentMetricsValues, customMetrics) + iter.forceUpdateMetrics() + reader.close() + } + currentIter = Some(iter) + hasNext + } + } } - // TODO: SPARK-25083 remove the type erasure hack in data source scan - new InterruptibleIterator(context, iter.asInstanceOf[Iterator[InternalRow]]) + + new InterruptibleIterator(context, iterator).asInstanceOf[Iterator[InternalRow]] } override def getPreferredLocations(split: Partition): Seq[String] = { - castPartition(split).inputPartition.preferredLocations() + castPartition(split).inputPartitions.flatMap(_.preferredLocations()) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala index 92f454c1bcd1e..42909986fce05 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala @@ -19,13 +19,14 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.AttributeMap +import org.apache.spark.sql.catalyst.expressions.{Expression, RowOrdering} import org.apache.spark.sql.catalyst.plans.physical -import org.apache.spark.sql.catalyst.plans.physical.SinglePartition +import org.apache.spark.sql.catalyst.plans.physical.{KeyGroupedPartitioning, SinglePartition} import org.apache.spark.sql.catalyst.util.truncatedString -import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory, Scan, SupportsReportPartitioning} +import org.apache.spark.sql.connector.read.{HasPartitionKey, InputPartition, PartitionReaderFactory, Scan} import org.apache.spark.sql.execution.{ExplainUtils, LeafExecNode} import org.apache.spark.sql.execution.metric.SQLMetrics +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.connector.SupportsMetadata import org.apache.spark.sql.vectorized.ColumnarBatch import org.apache.spark.util.Utils @@ -43,16 +44,23 @@ trait DataSourceV2ScanExecBase extends LeafExecNode { def scan: Scan - def partitions: Seq[InputPartition] - def readerFactory: PartitionReaderFactory + /** Optional partitioning expressions provided by the V2 data sources, through + * `SupportsReportPartitioning` */ + def keyGroupedPartitioning: Option[Seq[Expression]] + + protected def inputPartitions: Seq[InputPartition] + override def simpleString(maxFields: Int): String = { val result = s"$nodeName${truncatedString(output, "[", ", ", "]", maxFields)} ${scan.description()}" redact(result) } + def partitions: Seq[Seq[InputPartition]] = + groupedPartitions.map(_.map(_._2)).getOrElse(inputPartitions.map(Seq(_))) + /** * Shorthand for calling redact() without specifying redacting rules */ @@ -78,23 +86,64 @@ trait DataSourceV2ScanExecBase extends LeafExecNode { |""".stripMargin } - override def outputPartitioning: physical.Partitioning = scan match { - case _ if partitions.length == 1 => - SinglePartition + override def outputPartitioning: physical.Partitioning = { + if (partitions.length == 1) SinglePartition + else groupedPartitions.map { partitionValues => + KeyGroupedPartitioning(keyGroupedPartitioning.get, + partitionValues.size, Some(partitionValues.map(_._1))) + }.getOrElse(super.outputPartitioning) + } - case s: SupportsReportPartitioning => - new DataSourcePartitioning( - s.outputPartitioning(), AttributeMap(output.map(a => a -> a.name))) + @transient lazy val groupedPartitions: Option[Seq[(InternalRow, Seq[InputPartition])]] = + groupPartitions(inputPartitions) - case _ => super.outputPartitioning + /** + * Group partition values for all the input partitions. This returns `Some` iff: + * - [[SQLConf.V2_BUCKETING_ENABLED]] is turned on + * - all input partitions implement [[HasPartitionKey]] + * - `keyGroupedPartitioning` is set + * + * The result, if defined, is a list of tuples where the first element is a partition value, + * and the second element is a list of input partitions that share the same partition value. + * + * A non-empty result means each partition is clustered on a single key and therefore eligible + * for further optimizations to eliminate shuffling in some operations such as join and aggregate. + */ + def groupPartitions( + inputPartitions: Seq[InputPartition]): Option[Seq[(InternalRow, Seq[InputPartition])]] = { + if (!SQLConf.get.v2BucketingEnabled) return None + keyGroupedPartitioning.flatMap { expressions => + val results = inputPartitions.takeWhile { + case _: HasPartitionKey => true + case _ => false + }.map(p => (p.asInstanceOf[HasPartitionKey].partitionKey(), p)) + + if (results.length != inputPartitions.length || inputPartitions.isEmpty) { + // Not all of the `InputPartitions` implements `HasPartitionKey`, therefore skip here. + None + } else { + val partKeyType = expressions.map(_.dataType) + + val groupedPartitions = results.groupBy(_._1).toSeq.map { case (key, s) => + (key, s.map(_._2)) + } + + // also sort the input partitions according to their partition key order. This ensures + // a canonical order from both sides of a bucketed join, for example. + val keyOrdering: Ordering[(InternalRow, Seq[InputPartition])] = { + RowOrdering.createNaturalAscendingOrdering(partKeyType).on(_._1) + } + Some(groupedPartitions.sorted(keyOrdering)) + } + } } override def supportsColumnar: Boolean = { - require(partitions.forall(readerFactory.supportColumnarReads) || - !partitions.exists(readerFactory.supportColumnarReads), + require(inputPartitions.forall(readerFactory.supportColumnarReads) || + !inputPartitions.exists(readerFactory.supportColumnarReads), "Cannot mix row-based and columnar input partitions.") - partitions.exists(readerFactory.supportColumnarReads) + inputPartitions.exists(readerFactory.supportColumnarReads) } def inputRDD: RDD[InternalRow] diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index c0b00a426143b..45540fb4a1122 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -104,7 +104,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case PhysicalOperation(project, filters, DataSourceV2ScanRelation( - _, V1ScanWrapper(scan, pushed, pushedDownOperators), output)) => + _, V1ScanWrapper(scan, pushed, pushedDownOperators), output, _)) => val v1Relation = scan.toV1TableScan[BaseRelation with TableScan](session.sqlContext) if (v1Relation.schema != scan.readSchema()) { throw QueryExecutionErrors.fallbackV1RelationReportsInconsistentSchemaError( @@ -125,7 +125,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat withProjectAndFilter(project, filters, dsScan, needsUnsafeConversion = false) :: Nil case PhysicalOperation(project, filters, - DataSourceV2ScanRelation(_, scan: LocalScan, output)) => + DataSourceV2ScanRelation(_, scan: LocalScan, output, _)) => val localScanExec = LocalTableScanExec(output, scan.rows().toSeq) withProjectAndFilter(project, filters, localScanExec, needsUnsafeConversion = false) :: Nil @@ -137,7 +137,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case _: DynamicPruning => true case _ => false } - val batchExec = BatchScanExec(relation.output, relation.scan, runtimeFilters) + val batchExec = BatchScanExec(relation.output, relation.scan, runtimeFilters, + relation.keyGroupedPartitioning) withProjectAndFilter(project, postScanFilters, batchExec, !batchExec.supportsColumnar) :: Nil case PhysicalOperation(p, f, r: StreamingDataSourceV2Relation) @@ -255,7 +256,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case DeleteFromTable(relation, condition) => relation match { - case DataSourceV2ScanRelation(r, _, output) => + case DataSourceV2ScanRelation(r, _, output, _) => val table = r.table if (SubqueryExpression.hasSubquery(condition)) { throw QueryCompilationErrors.unsupportedDeleteByConditionWithSubqueryError(condition) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala index 0d9146d31c883..275255c9a3d39 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala @@ -17,23 +17,27 @@ package org.apache.spark.sql.execution.datasources.v2 -import org.apache.spark.sql.catalyst.expressions.{Expression, SortOrder} -import org.apache.spark.sql.catalyst.expressions.V2ExpressionUtils.toCatalyst +import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.expressions.V2ExpressionUtils._ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, RepartitionByExpression, Sort} -import org.apache.spark.sql.connector.distributions.{ClusteredDistribution, OrderedDistribution, UnspecifiedDistribution} +import org.apache.spark.sql.connector.distributions._ import org.apache.spark.sql.connector.write.{RequiresDistributionAndOrdering, Write} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.util.collection.Utils.sequenceToOption object DistributionAndOrderingUtils { def prepareQuery(write: Write, query: LogicalPlan, conf: SQLConf): LogicalPlan = write match { case write: RequiresDistributionAndOrdering => val numPartitions = write.requiredNumPartitions() + val distribution = write.requiredDistribution match { - case d: OrderedDistribution => d.ordering.map(e => toCatalyst(e, query)) - case d: ClusteredDistribution => d.clustering.map(e => toCatalyst(e, query)) - case _: UnspecifiedDistribution => Array.empty[Expression] + case d: OrderedDistribution => toCatalystOrdering(d.ordering(), query) + case d: ClusteredDistribution => + sequenceToOption(d.clustering.map(e => toCatalyst(e, query))) + .getOrElse(Seq.empty[Expression]) + case _: UnspecifiedDistribution => Seq.empty[Expression] } val queryWithDistribution = if (distribution.nonEmpty) { @@ -52,10 +56,7 @@ object DistributionAndOrderingUtils { query } - val ordering = write.requiredOrdering.toSeq - .map(e => toCatalyst(e, query)) - .asInstanceOf[Seq[SortOrder]] - + val ordering = toCatalystOrdering(write.requiredOrdering, query) val queryWithDistributionAndOrdering = if (ordering.nonEmpty) { Sort(ordering, global = false, queryWithDistribution) } else { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala index 1430a32c8e81a..3db7fb7851249 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory, Scan} import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset} @@ -31,7 +31,8 @@ case class MicroBatchScanExec( @transient scan: Scan, @transient stream: MicroBatchStream, @transient start: Offset, - @transient end: Offset) extends DataSourceV2ScanExecBase { + @transient end: Offset, + keyGroupedPartitioning: Option[Seq[Expression]] = None) extends DataSourceV2ScanExecBase { // TODO: unify the equal/hashCode implementation for all data source v2 query plans. override def equals(other: Any): Boolean = other match { @@ -41,7 +42,7 @@ case class MicroBatchScanExec( override def hashCode(): Int = stream.hashCode() - override lazy val partitions: Seq[InputPartition] = stream.planInputPartitions(start, end) + override lazy val inputPartitions: Seq[InputPartition] = stream.planInputPartitions(start, end) override lazy val readerFactory: PartitionReaderFactory = stream.createReaderFactory() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioning.scala new file mode 100644 index 0000000000000..8d2b3a8880cd3 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioning.scala @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution.datasources.v2 + +import org.apache.spark.sql.catalyst.SQLConfHelper +import org.apache.spark.sql.catalyst.expressions.V2ExpressionUtils +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.connector.catalog.FunctionCatalog +import org.apache.spark.sql.connector.read.SupportsReportPartitioning +import org.apache.spark.sql.connector.read.partitioning.{KeyGroupedPartitioning, UnknownPartitioning} +import org.apache.spark.util.collection.Utils.sequenceToOption + +/** + * Extracts [[DataSourceV2ScanRelation]] from the input logical plan, converts any V2 partitioning + * reported by data sources to their catalyst counterparts. Then, annotates the plan with the + * result. + */ +object V2ScanPartitioning extends Rule[LogicalPlan] with SQLConfHelper { + override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { + case d @ DataSourceV2ScanRelation(relation, scan: SupportsReportPartitioning, _, _) => + val funCatalogOpt = relation.catalog.flatMap { + case c: FunctionCatalog => Some(c) + case _ => None + } + + val catalystPartitioning = scan.outputPartitioning() match { + case kgp: KeyGroupedPartitioning => sequenceToOption(kgp.keys().map( + V2ExpressionUtils.toCatalyst(_, relation, funCatalogOpt))) + case _: UnknownPartitioning => None + case p => throw new IllegalArgumentException("Unsupported data source V2 partitioning " + + "type: " + p.getClass.getSimpleName) + } + + d.copy(keyGroupedPartitioning = catalystPartitioning) + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala index 89d66034f06cd..114d58c739e29 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala @@ -78,7 +78,7 @@ object PartitionPruning extends Rule[LogicalPlan] with PredicateHelper { } else { None } - case (resExp, r @ DataSourceV2ScanRelation(_, scan: SupportsRuntimeFiltering, _)) => + case (resExp, r @ DataSourceV2ScanRelation(_, scan: SupportsRuntimeFiltering, _, _)) => val filterAttrs = V2ExpressionUtils.resolveRefs[Attribute](scan.filterAttributes, r) if (resExp.references.subsetOf(AttributeSet(filterAttrs))) { Some(r) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala index de1806ab87b4c..67a58da89625e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.plans.physical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.joins.{ShuffledHashJoinExec, SortMergeJoinExec} +import org.apache.spark.sql.internal.SQLConf /** * Ensures that the [[org.apache.spark.sql.catalyst.plans.physical.Partitioning Partitioning]] @@ -137,8 +138,16 @@ case class EnsureRequirements( Some(finalCandidateSpecs.values.maxBy(_.numPartitions)) } + // Check if 1) all children are of `KeyGroupedPartitioning` and 2) they are all compatible + // with each other. If both are true, skip shuffle. + val allCompatible = childrenIndexes.sliding(2).forall { + case Seq(a, b) => + checkKeyGroupedSpec(specs(a)) && checkKeyGroupedSpec(specs(b)) && + specs(a).isCompatibleWith(specs(b)) + } + children = children.zip(requiredChildDistributions).zipWithIndex.map { - case ((child, _), idx) if !childrenIndexes.contains(idx) => + case ((child, _), idx) if allCompatible || !childrenIndexes.contains(idx) => child case ((child, dist), idx) => if (bestSpecOpt.isDefined && bestSpecOpt.get.isCompatibleWith(specs(idx))) { @@ -177,6 +186,26 @@ case class EnsureRequirements( children } + private def checkKeyGroupedSpec(shuffleSpec: ShuffleSpec): Boolean = { + def check(spec: KeyGroupedShuffleSpec): Boolean = { + val attributes = spec.partitioning.expressions.flatMap(_.collectLeaves()) + val clustering = spec.distribution.clustering + + if (SQLConf.get.getConf(SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION)) { + attributes.length == clustering.length && attributes.zip(clustering).forall { + case (l, r) => l.semanticEquals(r) + } + } else { + true // already validated in `KeyGroupedPartitioning.satisfies` + } + } + shuffleSpec match { + case spec: KeyGroupedShuffleSpec => check(spec) + case ShuffleSpecCollection(specs) => specs.exists(checkKeyGroupedSpec) + case _ => false + } + } + private def reorder( leftKeys: IndexedSeq[Expression], rightKeys: IndexedSeq[Expression], @@ -256,6 +285,16 @@ case class EnsureRequirements( reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, rightExpressions, rightKeys) .orElse(reorderJoinKeysRecursively( leftKeys, rightKeys, leftPartitioning, None)) + case (Some(KeyGroupedPartitioning(clustering, _, _)), _) => + val leafExprs = clustering.flatMap(_.collectLeaves()) + reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, leafExprs, leftKeys) + .orElse(reorderJoinKeysRecursively( + leftKeys, rightKeys, None, rightPartitioning)) + case (_, Some(KeyGroupedPartitioning(clustering, _, _))) => + val leafExprs = clustering.flatMap(_.collectLeaves()) + reorder(leftKeys.toIndexedSeq, rightKeys.toIndexedSeq, leafExprs, rightKeys) + .orElse(reorderJoinKeysRecursively( + leftKeys, rightKeys, leftPartitioning, None)) case (Some(PartitioningCollection(partitionings)), _) => partitionings.foldLeft(Option.empty[(Seq[Expression], Seq[Expression])]) { (res, p) => res.orElse(reorderJoinKeysRecursively(leftKeys, rightKeys, Some(p), rightPartitioning)) diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaPartitionAwareDataSource.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaPartitionAwareDataSource.java index e5c50beeaf611..08be0ce9543a7 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaPartitionAwareDataSource.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaPartitionAwareDataSource.java @@ -18,18 +18,17 @@ package test.org.apache.spark.sql.connector; import java.io.IOException; -import java.util.Arrays; import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.catalyst.expressions.GenericInternalRow; import org.apache.spark.sql.connector.TestingV2Source; +import org.apache.spark.sql.connector.catalog.Table; +import org.apache.spark.sql.connector.expressions.Expression; import org.apache.spark.sql.connector.expressions.Expressions; import org.apache.spark.sql.connector.expressions.Transform; -import org.apache.spark.sql.connector.catalog.Table; import org.apache.spark.sql.connector.read.*; -import org.apache.spark.sql.connector.read.partitioning.ClusteredDistribution; -import org.apache.spark.sql.connector.read.partitioning.Distribution; import org.apache.spark.sql.connector.read.partitioning.Partitioning; +import org.apache.spark.sql.connector.read.partitioning.KeyGroupedPartitioning; import org.apache.spark.sql.util.CaseInsensitiveStringMap; public class JavaPartitionAwareDataSource implements TestingV2Source { @@ -51,7 +50,8 @@ public PartitionReaderFactory createReaderFactory() { @Override public Partitioning outputPartitioning() { - return new MyPartitioning(); + Expression[] clustering = new Transform[] { Expressions.identity("i") }; + return new KeyGroupedPartitioning(clustering, 2); } } @@ -70,25 +70,7 @@ public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) { }; } - static class MyPartitioning implements Partitioning { - - @Override - public int numPartitions() { - return 2; - } - - @Override - public boolean satisfy(Distribution distribution) { - if (distribution instanceof ClusteredDistribution) { - String[] clusteredCols = ((ClusteredDistribution) distribution).clusteredColumns; - return Arrays.asList(clusteredCols).contains("i"); - } - - return false; - } - } - - static class SpecificInputPartition implements InputPartition { + static class SpecificInputPartition implements InputPartition, HasPartitionKey { int[] i; int[] j; @@ -97,6 +79,11 @@ static class SpecificInputPartition implements InputPartition { this.i = i; this.j = j; } + + @Override + public InternalRow partitionKey() { + return new GenericInternalRow(new Object[] {i[0]}); + } } static class SpecificReaderFactory implements PartitionReaderFactory { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala index bc7a7b2977aca..17dfde65ca14e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala @@ -842,7 +842,7 @@ class FileBasedDataSourceSuite extends QueryTest }) val fileScan = df.queryExecution.executedPlan collectFirst { - case BatchScanExec(_, f: FileScan, _) => f + case BatchScanExec(_, f: FileScan, _, _) => f } assert(fileScan.nonEmpty) assert(fileScan.get.partitionFilters.nonEmpty) @@ -882,7 +882,7 @@ class FileBasedDataSourceSuite extends QueryTest assert(filterCondition.isDefined) val fileScan = df.queryExecution.executedPlan collectFirst { - case BatchScanExec(_, f: FileScan, _) => f + case BatchScanExec(_, f: FileScan, _, _) => f } assert(fileScan.nonEmpty) assert(fileScan.get.partitionFilters.isEmpty) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala index b7533c6a09fda..491d27e546483 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala @@ -27,15 +27,16 @@ import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.connector.catalog.{SupportsRead, Table, TableCapability, TableProvider} import org.apache.spark.sql.connector.catalog.TableCapability._ -import org.apache.spark.sql.connector.expressions.{Literal, Transform} +import org.apache.spark.sql.connector.expressions.{FieldReference, Literal, Transform} import org.apache.spark.sql.connector.expressions.filter.Predicate import org.apache.spark.sql.connector.read._ -import org.apache.spark.sql.connector.read.partitioning.{ClusteredDistribution, Distribution, Partitioning} +import org.apache.spark.sql.connector.read.partitioning.{KeyGroupedPartitioning, Partitioning} import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2Relation, DataSourceV2ScanRelation} import org.apache.spark.sql.execution.exchange.{Exchange, ShuffleExchangeExec} import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.sources.{Filter, GreaterThan} import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{IntegerType, StructType} @@ -245,34 +246,36 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS test("partitioning reporting") { import org.apache.spark.sql.functions.{count, sum} - Seq(classOf[PartitionAwareDataSource], classOf[JavaPartitionAwareDataSource]).foreach { cls => - withClue(cls.getName) { - val df = spark.read.format(cls.getName).load() - checkAnswer(df, Seq(Row(1, 4), Row(1, 4), Row(3, 6), Row(2, 6), Row(4, 2), Row(4, 2))) - - val groupByColA = df.groupBy(Symbol("i")).agg(sum(Symbol("j"))) - checkAnswer(groupByColA, Seq(Row(1, 8), Row(2, 6), Row(3, 6), Row(4, 4))) - assert(collectFirst(groupByColA.queryExecution.executedPlan) { - case e: ShuffleExchangeExec => e - }.isEmpty) - - val groupByColAB = df.groupBy(Symbol("i"), Symbol("j")).agg(count("*")) - checkAnswer(groupByColAB, Seq(Row(1, 4, 2), Row(2, 6, 1), Row(3, 6, 1), Row(4, 2, 2))) - assert(collectFirst(groupByColAB.queryExecution.executedPlan) { - case e: ShuffleExchangeExec => e - }.isEmpty) - - val groupByColB = df.groupBy(Symbol("j")).agg(sum(Symbol("i"))) - checkAnswer(groupByColB, Seq(Row(2, 8), Row(4, 2), Row(6, 5))) - assert(collectFirst(groupByColB.queryExecution.executedPlan) { - case e: ShuffleExchangeExec => e - }.isDefined) - - val groupByAPlusB = df.groupBy(Symbol("i") + Symbol("j")).agg(count("*")) - checkAnswer(groupByAPlusB, Seq(Row(5, 2), Row(6, 2), Row(8, 1), Row(9, 1))) - assert(collectFirst(groupByAPlusB.queryExecution.executedPlan) { - case e: ShuffleExchangeExec => e - }.isDefined) + withSQLConf(SQLConf.V2_BUCKETING_ENABLED.key -> "true") { + Seq(classOf[PartitionAwareDataSource], classOf[JavaPartitionAwareDataSource]).foreach { cls => + withClue(cls.getName) { + val df = spark.read.format(cls.getName).load() + checkAnswer(df, Seq(Row(1, 4), Row(1, 4), Row(3, 6), Row(2, 6), Row(4, 2), Row(4, 2))) + + val groupByColA = df.groupBy(Symbol("i")).agg(sum(Symbol("j"))) + checkAnswer(groupByColA, Seq(Row(1, 8), Row(2, 6), Row(3, 6), Row(4, 4))) + assert(collectFirst(groupByColA.queryExecution.executedPlan) { + case e: ShuffleExchangeExec => e + }.isEmpty) + + val groupByColAB = df.groupBy(Symbol("i"), Symbol("j")).agg(count("*")) + checkAnswer(groupByColAB, Seq(Row(1, 4, 2), Row(2, 6, 1), Row(3, 6, 1), Row(4, 2, 2))) + assert(collectFirst(groupByColAB.queryExecution.executedPlan) { + case e: ShuffleExchangeExec => e + }.isEmpty) + + val groupByColB = df.groupBy(Symbol("j")).agg(sum(Symbol("i"))) + checkAnswer(groupByColB, Seq(Row(2, 8), Row(4, 2), Row(6, 5))) + assert(collectFirst(groupByColB.queryExecution.executedPlan) { + case e: ShuffleExchangeExec => e + }.isDefined) + + val groupByAPlusB = df.groupBy(Symbol("i") + Symbol("j")).agg(count("*")) + checkAnswer(groupByAPlusB, Seq(Row(5, 2), Row(6, 2), Row(8, 1), Row(9, 1))) + assert(collectFirst(groupByAPlusB.queryExecution.executedPlan) { + case e: ShuffleExchangeExec => e + }.isDefined) + } } } } @@ -896,7 +899,8 @@ class PartitionAwareDataSource extends TestingV2Source { SpecificReaderFactory } - override def outputPartitioning(): Partitioning = new MyPartitioning + override def outputPartitioning(): Partitioning = + new KeyGroupedPartitioning(Array(FieldReference("i")), 2) } override def getTable(options: CaseInsensitiveStringMap): Table = new SimpleBatchTable { @@ -904,18 +908,13 @@ class PartitionAwareDataSource extends TestingV2Source { new MyScanBuilder() } } - - class MyPartitioning extends Partitioning { - override def numPartitions(): Int = 2 - - override def satisfy(distribution: Distribution): Boolean = distribution match { - case c: ClusteredDistribution => c.clusteredColumns.contains("i") - case _ => false - } - } } -case class SpecificInputPartition(i: Array[Int], j: Array[Int]) extends InputPartition +case class SpecificInputPartition( + i: Array[Int], + j: Array[Int]) extends InputPartition with HasPartitionKey { + override def partitionKey(): InternalRow = InternalRow.fromSeq(Seq(i(0))) +} object SpecificReaderFactory extends PartitionReaderFactory { override def createReader(partition: InputPartition): PartitionReader[InternalRow] = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DistributionAndOrderingSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DistributionAndOrderingSuiteBase.scala new file mode 100644 index 0000000000000..f4317e632761c --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DistributionAndOrderingSuiteBase.scala @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.connector + +import org.scalatest.BeforeAndAfter + +import org.apache.spark.sql.QueryTest +import org.apache.spark.sql.catalyst +import org.apache.spark.sql.catalyst.analysis.Resolver +import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.catalyst.expressions.SortOrder +import org.apache.spark.sql.catalyst.plans.QueryPlan +import org.apache.spark.sql.catalyst.plans.physical +import org.apache.spark.sql.catalyst.plans.physical._ +import org.apache.spark.sql.connector.catalog.InMemoryCatalog +import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper +import org.apache.spark.sql.test.SharedSparkSession + +abstract class DistributionAndOrderingSuiteBase + extends QueryTest with SharedSparkSession with BeforeAndAfter with AdaptiveSparkPlanHelper { + import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ + + override def beforeAll(): Unit = { + super.beforeAll() + spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryCatalog].getName) + } + + override def afterAll(): Unit = { + spark.sessionState.conf.unsetConf("spark.sql.catalog.testcat") + super.afterAll() + } + + protected val resolver: Resolver = conf.resolver + + protected def resolvePartitioning[T <: QueryPlan[T]]( + partitioning: Partitioning, + plan: QueryPlan[T]): Partitioning = partitioning match { + case HashPartitioning(exprs, numPartitions) => + HashPartitioning(exprs.map(resolveAttrs(_, plan)), numPartitions) + case KeyGroupedPartitioning(clustering, numPartitions, partitionValues) => + KeyGroupedPartitioning(clustering.map(resolveAttrs(_, plan)), numPartitions, + partitionValues) + case PartitioningCollection(partitionings) => + PartitioningCollection(partitionings.map(resolvePartitioning(_, plan))) + case RangePartitioning(ordering, numPartitions) => + RangePartitioning(ordering.map(resolveAttrs(_, plan).asInstanceOf[SortOrder]), numPartitions) + case p @ SinglePartition => + p + case p: UnknownPartitioning => + p + case p => + fail(s"unexpected partitioning: $p") + } + + protected def resolveDistribution[T <: QueryPlan[T]]( + distribution: physical.Distribution, + plan: QueryPlan[T]): physical.Distribution = distribution match { + case physical.ClusteredDistribution(clustering, numPartitions, _) => + physical.ClusteredDistribution(clustering.map(resolveAttrs(_, plan)), numPartitions) + case physical.OrderedDistribution(ordering) => + physical.OrderedDistribution(ordering.map(resolveAttrs(_, plan).asInstanceOf[SortOrder])) + case physical.UnspecifiedDistribution => + physical.UnspecifiedDistribution + case d => + fail(s"unexpected distribution: $d") + } + + protected def resolveAttrs[T <: QueryPlan[T]]( + expr: catalyst.expressions.Expression, + plan: QueryPlan[T]): catalyst.expressions.Expression = { + + expr.transform { + case UnresolvedAttribute(Seq(attrName)) => + plan.output.find(attr => resolver(attr.name, attrName)).get + case UnresolvedAttribute(nameParts) => + val attrName = nameParts.mkString(".") + fail(s"cannot resolve a nested attr: $attrName") + } + } + + protected def attr(name: String): UnresolvedAttribute = { + UnresolvedAttribute(name) + } + + protected def catalog: InMemoryCatalog = { + val catalog = spark.sessionState.catalogManager.catalog("testcat") + catalog.asTableCatalog.asInstanceOf[InMemoryCatalog] + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala new file mode 100644 index 0000000000000..834faedd1ceef --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala @@ -0,0 +1,475 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.connector + +import java.util.Collections + +import org.apache.spark.sql.{DataFrame, Row} +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Ascending, SortOrder => catalystSortOrder, TransformExpression} +import org.apache.spark.sql.catalyst.plans.physical +import org.apache.spark.sql.connector.catalog.Identifier +import org.apache.spark.sql.connector.catalog.InMemoryTableCatalog +import org.apache.spark.sql.connector.catalog.functions._ +import org.apache.spark.sql.connector.distributions.Distribution +import org.apache.spark.sql.connector.distributions.Distributions +import org.apache.spark.sql.connector.expressions._ +import org.apache.spark.sql.connector.expressions.Expressions._ +import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.execution.datasources.v2.BatchScanExec +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation +import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec +import org.apache.spark.sql.execution.joins.SortMergeJoinExec +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.SQLConf._ +import org.apache.spark.sql.types._ + +class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { + private var originalV2BucketingEnabled: Boolean = false + private var originalAutoBroadcastJoinThreshold: Long = -1 + + override def beforeAll(): Unit = { + super.beforeAll() + originalV2BucketingEnabled = conf.getConf(V2_BUCKETING_ENABLED) + conf.setConf(V2_BUCKETING_ENABLED, true) + originalAutoBroadcastJoinThreshold = conf.getConf(AUTO_BROADCASTJOIN_THRESHOLD) + conf.setConf(AUTO_BROADCASTJOIN_THRESHOLD, -1L) + } + + override def afterAll(): Unit = { + try { + super.afterAll() + } finally { + conf.setConf(V2_BUCKETING_ENABLED, originalV2BucketingEnabled) + conf.setConf(AUTO_BROADCASTJOIN_THRESHOLD, originalAutoBroadcastJoinThreshold) + } + } + + before { + Seq(UnboundYearsFunction, UnboundDaysFunction, UnboundBucketFunction).foreach { f => + catalog.createFunction(Identifier.of(Array.empty, f.name()), f) + } + } + + after { + catalog.clearTables() + catalog.clearFunctions() + } + + private val emptyProps: java.util.Map[String, String] = { + Collections.emptyMap[String, String] + } + private val table: String = "tbl" + private val schema = new StructType() + .add("id", IntegerType) + .add("data", StringType) + .add("ts", TimestampType) + + test("clustered distribution: output partitioning should be KeyGroupedPartitioning") { + val partitions: Array[Transform] = Array(Expressions.years("ts")) + + // create a table with 3 partitions, partitioned by `years` transform + createTable(table, schema, partitions, + Distributions.clustered(partitions.map(_.asInstanceOf[Expression]))) + sql(s"INSERT INTO testcat.ns.$table VALUES " + + s"(0, 'aaa', CAST('2022-01-01' AS timestamp)), " + + s"(1, 'bbb', CAST('2021-01-01' AS timestamp)), " + + s"(2, 'ccc', CAST('2020-01-01' AS timestamp))") + + var df = sql(s"SELECT count(*) FROM testcat.ns.$table GROUP BY ts") + val catalystDistribution = physical.ClusteredDistribution( + Seq(TransformExpression(YearsFunction, Seq(attr("ts"))))) + val partitionValues = Seq(50, 51, 52).map(v => InternalRow.fromSeq(Seq(v))) + + checkQueryPlan(df, catalystDistribution, + physical.KeyGroupedPartitioning(catalystDistribution.clustering, partitionValues)) + + // multiple group keys should work too as long as partition keys are subset of them + df = sql(s"SELECT count(*) FROM testcat.ns.$table GROUP BY id, ts") + checkQueryPlan(df, catalystDistribution, + physical.KeyGroupedPartitioning(catalystDistribution.clustering, partitionValues)) + } + + test("non-clustered distribution: fallback to super.partitioning") { + val partitions: Array[Transform] = Array(years("ts")) + val ordering: Array[SortOrder] = Array(sort(FieldReference("ts"), + SortDirection.ASCENDING, NullOrdering.NULLS_FIRST)) + + createTable(table, schema, partitions, Distributions.ordered(ordering), ordering) + sql(s"INSERT INTO testcat.ns.$table VALUES " + + s"(0, 'aaa', CAST('2022-01-01' AS timestamp)), " + + s"(1, 'bbb', CAST('2021-01-01' AS timestamp)), " + + s"(2, 'ccc', CAST('2020-01-01' AS timestamp))") + + val df = sql(s"SELECT * FROM testcat.ns.$table") + val catalystOrdering = Seq(catalystSortOrder(attr("ts"), Ascending)) + val catalystDistribution = physical.OrderedDistribution(catalystOrdering) + + checkQueryPlan(df, catalystDistribution, physical.UnknownPartitioning(0)) + } + + test("non-clustered distribution: no partition") { + val partitions: Array[Transform] = Array(bucket(32, "ts")) + createTable(table, schema, partitions, + Distributions.clustered(partitions.map(_.asInstanceOf[Expression]))) + + val df = sql(s"SELECT * FROM testcat.ns.$table") + val distribution = physical.ClusteredDistribution( + Seq(TransformExpression(BucketFunction, Seq(attr("ts")), Some(32)))) + + checkQueryPlan(df, distribution, physical.UnknownPartitioning(0)) + } + + test("non-clustered distribution: single partition") { + val partitions: Array[Transform] = Array(bucket(32, "ts")) + createTable(table, schema, partitions, + Distributions.clustered(partitions.map(_.asInstanceOf[Expression]))) + sql(s"INSERT INTO testcat.ns.$table VALUES (0, 'aaa', CAST('2020-01-01' AS timestamp))") + + val df = sql(s"SELECT * FROM testcat.ns.$table") + val distribution = physical.ClusteredDistribution( + Seq(TransformExpression(BucketFunction, Seq(attr("ts")), Some(32)))) + + checkQueryPlan(df, distribution, physical.SinglePartition) + } + + test("non-clustered distribution: no V2 catalog") { + spark.conf.set("spark.sql.catalog.testcat2", classOf[InMemoryTableCatalog].getName) + val nonFunctionCatalog = spark.sessionState.catalogManager.catalog("testcat2") + .asInstanceOf[InMemoryTableCatalog] + val partitions: Array[Transform] = Array(bucket(32, "ts")) + createTable(table, schema, partitions, + Distributions.clustered(partitions.map(_.asInstanceOf[Expression])), + catalog = nonFunctionCatalog) + sql(s"INSERT INTO testcat2.ns.$table VALUES " + + s"(0, 'aaa', CAST('2022-01-01' AS timestamp)), " + + s"(1, 'bbb', CAST('2021-01-01' AS timestamp)), " + + s"(2, 'ccc', CAST('2020-01-01' AS timestamp))") + + val df = sql(s"SELECT * FROM testcat2.ns.$table") + val distribution = physical.UnspecifiedDistribution + + try { + checkQueryPlan(df, distribution, physical.UnknownPartitioning(0)) + } finally { + spark.conf.unset("spark.sql.catalog.testcat2") + } + } + + test("non-clustered distribution: no V2 function provided") { + catalog.clearFunctions() + + val partitions: Array[Transform] = Array(bucket(32, "ts")) + createTable(table, schema, partitions, + Distributions.clustered(partitions.map(_.asInstanceOf[Expression]))) + sql(s"INSERT INTO testcat.ns.$table VALUES " + + s"(0, 'aaa', CAST('2022-01-01' AS timestamp)), " + + s"(1, 'bbb', CAST('2021-01-01' AS timestamp)), " + + s"(2, 'ccc', CAST('2020-01-01' AS timestamp))") + + val df = sql(s"SELECT * FROM testcat.ns.$table") + val distribution = physical.UnspecifiedDistribution + + checkQueryPlan(df, distribution, physical.UnknownPartitioning(0)) + } + + test("non-clustered distribution: V2 bucketing disabled") { + withSQLConf(SQLConf.V2_BUCKETING_ENABLED.key -> "false") { + val partitions: Array[Transform] = Array(bucket(32, "ts")) + createTable(table, schema, partitions, + Distributions.clustered(partitions.map(_.asInstanceOf[Expression]))) + sql(s"INSERT INTO testcat.ns.$table VALUES " + + s"(0, 'aaa', CAST('2022-01-01' AS timestamp)), " + + s"(1, 'bbb', CAST('2021-01-01' AS timestamp)), " + + s"(2, 'ccc', CAST('2020-01-01' AS timestamp))") + + val df = sql(s"SELECT * FROM testcat.ns.$table") + val distribution = physical.ClusteredDistribution( + Seq(TransformExpression(BucketFunction, Seq(attr("ts")), Some(32)))) + + checkQueryPlan(df, distribution, physical.UnknownPartitioning(0)) + } + } + + /** + * Check whether the query plan from `df` has the expected `distribution`, `ordering` and + * `partitioning`. + */ + private def checkQueryPlan( + df: DataFrame, + distribution: physical.Distribution, + partitioning: physical.Partitioning): Unit = { + // check distribution & ordering are correctly populated in logical plan + val relation = df.queryExecution.optimizedPlan.collect { + case r: DataSourceV2ScanRelation => r + }.head + + resolveDistribution(distribution, relation) match { + case physical.ClusteredDistribution(clustering, _, _) => + assert(relation.keyGroupedPartitioning.isDefined && + relation.keyGroupedPartitioning.get == clustering) + case _ => + assert(relation.keyGroupedPartitioning.isEmpty) + } + + // check distribution, ordering and output partitioning are correctly populated in physical plan + val scan = collect(df.queryExecution.executedPlan) { + case s: BatchScanExec => s + }.head + + val expectedPartitioning = resolvePartitioning(partitioning, scan) + assert(expectedPartitioning == scan.outputPartitioning) + } + + private def createTable( + table: String, + schema: StructType, + partitions: Array[Transform], + distribution: Distribution = Distributions.unspecified(), + ordering: Array[expressions.SortOrder] = Array.empty, + catalog: InMemoryTableCatalog = catalog): Unit = { + catalog.createTable(Identifier.of(Array("ns"), table), + schema, partitions, emptyProps, distribution, ordering, None) + } + + private val customers: String = "customers" + private val customers_schema = new StructType() + .add("customer_name", StringType) + .add("customer_age", IntegerType) + .add("customer_id", LongType) + + private val orders: String = "orders" + private val orders_schema = new StructType() + .add("order_amount", DoubleType) + .add("customer_id", LongType) + + private def testWithCustomersAndOrders( + customers_partitions: Array[Transform], + customers_distribution: Distribution, + orders_partitions: Array[Transform], + orders_distribution: Distribution, + expectedNumOfShuffleExecs: Int): Unit = { + createTable(customers, customers_schema, customers_partitions, customers_distribution) + sql(s"INSERT INTO testcat.ns.$customers VALUES " + + s"('aaa', 10, 1), ('bbb', 20, 2), ('ccc', 30, 3)") + + createTable(orders, orders_schema, orders_partitions, orders_distribution) + sql(s"INSERT INTO testcat.ns.$orders VALUES " + + s"(100.0, 1), (200.0, 1), (150.0, 2), (250.0, 2), (350.0, 2), (400.50, 3)") + + val df = sql("SELECT customer_name, customer_age, order_amount " + + s"FROM testcat.ns.$customers c JOIN testcat.ns.$orders o " + + "ON c.customer_id = o.customer_id ORDER BY c.customer_id, order_amount") + + val shuffles = collectShuffles(df.queryExecution.executedPlan) + assert(shuffles.length == expectedNumOfShuffleExecs) + + checkAnswer(df, + Seq(Row("aaa", 10, 100.0), Row("aaa", 10, 200.0), Row("bbb", 20, 150.0), + Row("bbb", 20, 250.0), Row("bbb", 20, 350.0), Row("ccc", 30, 400.50))) + } + + private def collectShuffles(plan: SparkPlan): Seq[ShuffleExchangeExec] = { + // here we skip collecting shuffle operators that are not associated with SMJ + collect(plan) { + case s: SortMergeJoinExec => s + }.flatMap(smj => + collect(smj) { + case s: ShuffleExchangeExec => s + }) + } + + test("partitioned join: exact distribution (same number of buckets) from both sides") { + val customers_partitions = Array(bucket(4, "customer_id")) + val orders_partitions = Array(bucket(4, "customer_id")) + + testWithCustomersAndOrders(customers_partitions, + Distributions.clustered(customers_partitions.toArray), + orders_partitions, + Distributions.clustered(orders_partitions.toArray), + 0) + } + + test("partitioned join: number of buckets mismatch should trigger shuffle") { + val customers_partitions = Array(bucket(4, "customer_id")) + val orders_partitions = Array(bucket(2, "customer_id")) + + // should shuffle both sides when number of buckets are not the same + testWithCustomersAndOrders(customers_partitions, + Distributions.clustered(customers_partitions.toArray), + orders_partitions, + Distributions.clustered(orders_partitions.toArray), + 2) + } + + test("partitioned join: only one side reports partitioning") { + val customers_partitions = Array(bucket(4, "customer_id")) + val orders_partitions = Array(bucket(2, "customer_id")) + + testWithCustomersAndOrders(customers_partitions, + Distributions.clustered(customers_partitions.toArray), + orders_partitions, + Distributions.unspecified(), + 2) + } + + private val items: String = "items" + private val items_schema: StructType = new StructType() + .add("id", LongType) + .add("name", StringType) + .add("price", FloatType) + .add("arrive_time", TimestampType) + + private val purchases: String = "purchases" + private val purchases_schema: StructType = new StructType() + .add("item_id", LongType) + .add("price", FloatType) + .add("time", TimestampType) + + test("partitioned join: join with two partition keys and matching & sorted partitions") { + val items_partitions = Array(bucket(8, "id"), days("arrive_time")) + createTable(items, items_schema, items_partitions, + Distributions.clustered(items_partitions.toArray)) + sql(s"INSERT INTO testcat.ns.$items VALUES " + + s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " + + s"(1, 'aa', 41.0, cast('2020-01-15' as timestamp)), " + + s"(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " + + s"(2, 'bb', 10.5, cast('2020-01-01' as timestamp)), " + + s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp))") + + val purchases_partitions = Array(bucket(8, "item_id"), days("time")) + createTable(purchases, purchases_schema, purchases_partitions, + Distributions.clustered(purchases_partitions.toArray)) + sql(s"INSERT INTO testcat.ns.$purchases VALUES " + + s"(1, 42.0, cast('2020-01-01' as timestamp)), " + + s"(1, 44.0, cast('2020-01-15' as timestamp)), " + + s"(1, 45.0, cast('2020-01-15' as timestamp)), " + + s"(2, 11.0, cast('2020-01-01' as timestamp)), " + + s"(3, 19.5, cast('2020-02-01' as timestamp))") + + val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " + + s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " + + "ON i.id = p.item_id AND i.arrive_time = p.time ORDER BY id, purchase_price, sale_price") + + val shuffles = collectShuffles(df.queryExecution.executedPlan) + assert(shuffles.isEmpty, "should not add shuffle for both sides of the join") + checkAnswer(df, + Seq(Row(1, "aa", 40.0, 42.0), Row(1, "aa", 41.0, 44.0), Row(1, "aa", 41.0, 45.0), + Row(2, "bb", 10.0, 11.0), Row(2, "bb", 10.5, 11.0), Row(3, "cc", 15.5, 19.5)) + ) + } + + test("partitioned join: join with two partition keys and unsorted partitions") { + val items_partitions = Array(bucket(8, "id"), days("arrive_time")) + createTable(items, items_schema, items_partitions, + Distributions.clustered(items_partitions.toArray)) + sql(s"INSERT INTO testcat.ns.$items VALUES " + + s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp)), " + + s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " + + s"(1, 'aa', 41.0, cast('2020-01-15' as timestamp)), " + + s"(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " + + s"(2, 'bb', 10.5, cast('2020-01-01' as timestamp))") + + val purchases_partitions = Array(bucket(8, "item_id"), days("time")) + createTable(purchases, purchases_schema, purchases_partitions, + Distributions.clustered(purchases_partitions.toArray)) + sql(s"INSERT INTO testcat.ns.$purchases VALUES " + + s"(2, 11.0, cast('2020-01-01' as timestamp)), " + + s"(1, 42.0, cast('2020-01-01' as timestamp)), " + + s"(1, 44.0, cast('2020-01-15' as timestamp)), " + + s"(1, 45.0, cast('2020-01-15' as timestamp)), " + + s"(3, 19.5, cast('2020-02-01' as timestamp))") + + val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " + + s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " + + "ON i.id = p.item_id AND i.arrive_time = p.time ORDER BY id, purchase_price, sale_price") + + val shuffles = collectShuffles(df.queryExecution.executedPlan) + assert(shuffles.isEmpty, "should not add shuffle for both sides of the join") + checkAnswer(df, + Seq(Row(1, "aa", 40.0, 42.0), Row(1, "aa", 41.0, 44.0), Row(1, "aa", 41.0, 45.0), + Row(2, "bb", 10.0, 11.0), Row(2, "bb", 10.5, 11.0), Row(3, "cc", 15.5, 19.5)) + ) + } + + test("partitioned join: join with two partition keys and different # of partition keys") { + val items_partitions = Array(bucket(8, "id"), days("arrive_time")) + createTable(items, items_schema, items_partitions, + Distributions.clustered(items_partitions.toArray)) + + sql(s"INSERT INTO testcat.ns.$items VALUES " + + s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " + + s"(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " + + s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp))") + + val purchases_partitions = Array(bucket(8, "item_id"), days("time")) + createTable(purchases, purchases_schema, purchases_partitions, + Distributions.clustered(purchases_partitions.toArray)) + sql(s"INSERT INTO testcat.ns.$purchases VALUES " + + s"(1, 42.0, cast('2020-01-01' as timestamp)), " + + s"(2, 11.0, cast('2020-01-01' as timestamp))") + + val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " + + s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " + + "ON i.id = p.item_id AND i.arrive_time = p.time ORDER BY id, purchase_price, sale_price") + + val shuffles = collectShuffles(df.queryExecution.executedPlan) + assert(shuffles.nonEmpty, "should add shuffle when partition keys mismatch") + } + + test("data source partitioning + dynamic partition filtering") { + withSQLConf( + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB", + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false", + SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true", + SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false", + SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "10") { + val items_partitions = Array(identity("id")) + createTable(items, items_schema, items_partitions, + Distributions.clustered(items_partitions.toArray)) + sql(s"INSERT INTO testcat.ns.$items VALUES " + + s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " + + s"(1, 'aa', 41.0, cast('2020-01-15' as timestamp)), " + + s"(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " + + s"(2, 'bb', 10.5, cast('2020-01-01' as timestamp)), " + + s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp))") + + val purchases_partitions = Array(identity("item_id")) + createTable(purchases, purchases_schema, purchases_partitions, + Distributions.clustered(purchases_partitions.toArray)) + sql(s"INSERT INTO testcat.ns.$purchases VALUES " + + s"(1, 42.0, cast('2020-01-01' as timestamp)), " + + s"(1, 44.0, cast('2020-01-15' as timestamp)), " + + s"(1, 45.0, cast('2020-01-15' as timestamp)), " + + s"(2, 11.0, cast('2020-01-01' as timestamp)), " + + s"(3, 19.5, cast('2020-02-01' as timestamp))") + + // number of unique partitions changed after dynamic filtering - should throw exception + var df = sql(s"SELECT sum(p.price) from testcat.ns.$items i, testcat.ns.$purchases p WHERE " + + s"i.id = p.item_id AND i.price > 40.0") + val e = intercept[Exception](df.collect()) + assert(e.getMessage.contains("number of unique partition values")) + + // dynamic filtering doesn't change partitioning so storage-partitioned join should kick in + df = sql(s"SELECT sum(p.price) from testcat.ns.$items i, testcat.ns.$purchases p WHERE " + + s"i.id = p.item_id AND i.price >= 10.0") + val shuffles = collectShuffles(df.queryExecution.executedPlan) + assert(shuffles.isEmpty, "should not add shuffle for both sides of the join") + checkAnswer(df, Seq(Row(303.5))) + } + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala index 5f8684a144778..36efe5ec1d2ee 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala @@ -19,41 +19,28 @@ package org.apache.spark.sql.connector import java.util.Collections -import org.scalatest.BeforeAndAfter - -import org.apache.spark.sql.{catalyst, AnalysisException, DataFrame, QueryTest, Row} -import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute +import org.apache.spark.sql.{catalyst, AnalysisException, DataFrame, Row} import org.apache.spark.sql.catalyst.plans.physical import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, RangePartitioning, UnknownPartitioning} -import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTableCatalog} +import org.apache.spark.sql.connector.catalog.Identifier import org.apache.spark.sql.connector.distributions.{Distribution, Distributions} import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, NullOrdering, SortDirection, SortOrder} import org.apache.spark.sql.connector.expressions.LogicalExpressions._ import org.apache.spark.sql.execution.{QueryExecution, SortExec, SparkPlan} -import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.execution.streaming.sources.ContinuousMemoryStream import org.apache.spark.sql.functions.lit import org.apache.spark.sql.streaming.{StreamingQueryException, Trigger} -import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{IntegerType, StringType, StructType} import org.apache.spark.sql.util.QueryExecutionListener -class WriteDistributionAndOrderingSuite - extends QueryTest with SharedSparkSession with BeforeAndAfter with AdaptiveSparkPlanHelper { - - import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ +class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase { import testImplicits._ - before { - spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName) - } - after { spark.sessionState.catalogManager.reset() - spark.sessionState.conf.unsetConf("spark.sql.catalog.testcat") } private val microBatchPrefix = "micro_batch_" @@ -65,8 +52,6 @@ class WriteDistributionAndOrderingSuite .add("id", IntegerType) .add("data", StringType) - private val resolver = conf.resolver - test("ordered distribution and sort with same exprs: append") { checkOrderedDistributionAndSortWithSameExprs("append") } @@ -1027,28 +1012,6 @@ class WriteDistributionAndOrderingSuite assert(actualOrdering == expectedOrdering, "ordering must match") } - private def resolveAttrs( - expr: catalyst.expressions.Expression, - plan: SparkPlan): catalyst.expressions.Expression = { - - expr.transform { - case UnresolvedAttribute(Seq(attrName)) => - plan.output.find(attr => resolver(attr.name, attrName)).get - case UnresolvedAttribute(nameParts) => - val attrName = nameParts.mkString(".") - fail(s"cannot resolve a nested attr: $attrName") - } - } - - private def attr(name: String): UnresolvedAttribute = { - UnresolvedAttribute(name) - } - - private def catalog: InMemoryTableCatalog = { - val catalog = spark.sessionState.catalogManager.catalog("testcat") - catalog.asTableCatalog.asInstanceOf[InMemoryTableCatalog] - } - // executes a write operation and keeps the executed physical plan private def execute(writeFunc: => Unit): SparkPlan = { var executedPlan: SparkPlan = null diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/catalog/functions/transformFunctions.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/catalog/functions/transformFunctions.scala new file mode 100644 index 0000000000000..1994874d3289e --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/catalog/functions/transformFunctions.scala @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.connector.catalog.functions + +import org.apache.spark.sql.types._ + +object UnboundYearsFunction extends UnboundFunction { + override def bind(inputType: StructType): BoundFunction = { + if (inputType.size == 1 && isValidType(inputType.head.dataType)) YearsFunction + else throw new UnsupportedOperationException( + "'years' only take date or timestamp as input type") + } + + private def isValidType(dt: DataType): Boolean = dt match { + case DateType | TimestampType => true + case _ => false + } + + override def description(): String = name() + override def name(): String = "years" +} + +object YearsFunction extends BoundFunction { + override def inputTypes(): Array[DataType] = Array(TimestampType) + override def resultType(): DataType = LongType + override def name(): String = "years" + override def canonicalName(): String = name() +} + +object DaysFunction extends BoundFunction { + override def inputTypes(): Array[DataType] = Array(TimestampType) + override def resultType(): DataType = LongType + override def name(): String = "days" + override def canonicalName(): String = name() +} + +object UnboundDaysFunction extends UnboundFunction { + override def bind(inputType: StructType): BoundFunction = { + if (inputType.size == 1 && isValidType(inputType.head.dataType)) DaysFunction + else throw new UnsupportedOperationException( + "'days' only take date or timestamp as input type") + } + + private def isValidType(dt: DataType): Boolean = dt match { + case DateType | TimestampType => true + case _ => false + } + + override def description(): String = name() + override def name(): String = "days" +} + +object UnboundBucketFunction extends UnboundFunction { + override def bind(inputType: StructType): BoundFunction = BucketFunction + override def description(): String = name() + override def name(): String = "bucket" +} + +object BucketFunction extends BoundFunction { + override def inputTypes(): Array[DataType] = Array(IntegerType, IntegerType) + override def resultType(): DataType = IntegerType + override def name(): String = "bucket" + override def canonicalName(): String = name() +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PrunePartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PrunePartitionSuiteBase.scala index 9909996059dac..775f34f1f6156 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PrunePartitionSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PrunePartitionSuiteBase.scala @@ -95,7 +95,7 @@ abstract class PrunePartitionSuiteBase extends StatisticsCollectionTestBase { assert(getScanExecPartitionSize(plan) == expectedPartitionCount) val collectFn: PartialFunction[SparkPlan, Seq[Expression]] = collectPartitionFiltersFn orElse { - case BatchScanExec(_, scan: FileScan, _) => scan.partitionFilters + case BatchScanExec(_, scan: FileScan, _, _) => scan.partitionFilters } val pushedDownPartitionFilters = plan.collectFirst(collectFn) .map(exps => exps.filterNot(e => e.isInstanceOf[IsNotNull])) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala index a62ce9226a6a6..a9cb01b6d5657 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala @@ -59,7 +59,7 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession { .where(Column(predicate)) query.queryExecution.optimizedPlan match { - case PhysicalOperation(_, filters, DataSourceV2ScanRelation(_, o: OrcScan, _)) => + case PhysicalOperation(_, filters, DataSourceV2ScanRelation(_, o: OrcScan, _, _)) => assert(filters.nonEmpty, "No filter is analyzed from the given query") assert(o.pushedFilters.nonEmpty, "No filter is pushed down") val maybeFilter = OrcFilters.createFilter(query.schema, o.pushedFilters) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala index 46a7f8d3d90de..53d2ccdc5af68 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala @@ -120,8 +120,7 @@ trait OrcTest extends QueryTest with FileBasedDataSourceTest with BeforeAndAfter .where(Column(predicate)) query.queryExecution.optimizedPlan match { - case PhysicalOperation(_, filters, - DataSourceV2ScanRelation(_, o: OrcScan, _)) => + case PhysicalOperation(_, filters, DataSourceV2ScanRelation(_, o: OrcScan, _, _)) => assert(filters.nonEmpty, "No filter is analyzed from the given query") if (noneSupported) { assert(o.pushedFilters.isEmpty, "Unsupported filters should not show in pushed filters") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala index 107a2b7912029..7fb6d4c36968d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala @@ -40,7 +40,7 @@ class OrcV2SchemaPruningSuite extends SchemaPruningSuite with AdaptiveSparkPlanH override def checkScanSchemata(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = { val fileSourceScanSchemata = collect(df.queryExecution.executedPlan) { - case BatchScanExec(_, scan: OrcScan, _) => scan.readDataSchema + case BatchScanExec(_, scan: OrcScan, _, _) => scan.readDataSchema } assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size, s"Found ${fileSourceScanSchemata.size} file sources in dataframe, " + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index d5180a393f61a..64a2ec6308cde 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -2016,7 +2016,7 @@ class ParquetV2FilterSuite extends ParquetFilterSuite { query.queryExecution.optimizedPlan.collectFirst { case PhysicalOperation(_, filters, - DataSourceV2ScanRelation(_, scan: ParquetScan, _)) => + DataSourceV2ScanRelation(_, scan: ParquetScan, _, _)) => assert(filters.nonEmpty, "No filter is analyzed from the given query") val sourceFilters = filters.flatMap(DataSourceStrategy.translateFilter(_, true)).toArray val pushedFilters = scan.pushedFilters diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala index db99557466d95..7237cc5f0fa51 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala @@ -17,13 +17,15 @@ package org.apache.spark.sql.execution.exchange -import org.apache.spark.sql.catalyst.expressions.{Ascending, Literal, SortOrder} +import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.Inner import org.apache.spark.sql.catalyst.plans.physical._ +import org.apache.spark.sql.connector.catalog.functions._ import org.apache.spark.sql.execution.{DummySparkPlan, SortExec} import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.joins.SortMergeJoinExec import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION import org.apache.spark.sql.test.SharedSparkSession class EnsureRequirementsSuite extends SharedSparkSession { @@ -79,6 +81,48 @@ class EnsureRequirementsSuite extends SharedSparkSession { } } + test("reorder should handle KeyGroupedPartitioning") { + // partitioning on the left + val plan1 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning(Seq( + years(exprA), bucket(4, exprB), days(exprC)), 4) + ) + val plan2 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning(Seq( + years(exprB), bucket(4, exprA), days(exprD)), 4) + ) + val smjExec = SortMergeJoinExec( + exprB :: exprC :: exprA :: Nil, exprA :: exprD :: exprB :: Nil, + Inner, None, plan1, plan2 + ) + EnsureRequirements.apply(smjExec) match { + case SortMergeJoinExec(leftKeys, rightKeys, _, _, + SortExec(_, _, DummySparkPlan(_, _, _: KeyGroupedPartitioning, _, _), _), + SortExec(_, _, DummySparkPlan(_, _, _: KeyGroupedPartitioning, _, _), _), _) => + assert(leftKeys === Seq(exprA, exprB, exprC)) + assert(rightKeys === Seq(exprB, exprA, exprD)) + case other => fail(other.toString) + } + + // partitioning on the right + val plan3 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning(Seq( + bucket(4, exprD), days(exprA), years(exprC)), 4) + ) + val smjExec2 = SortMergeJoinExec( + exprB :: exprD :: exprC :: Nil, exprA :: exprC :: exprD :: Nil, + Inner, None, plan1, plan3 + ) + EnsureRequirements.apply(smjExec2) match { + case SortMergeJoinExec(leftKeys, rightKeys, _, _, + SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), + SortExec(_, _, ShuffleExchangeExec(_: HashPartitioning, _, _), _), _) => + assert(leftKeys === Seq(exprC, exprB, exprD)) + assert(rightKeys === Seq(exprD, exprA, exprC)) + case other => fail(other.toString) + } + } + test("reorder should fallback to the other side partitioning") { val plan1 = DummySparkPlan( outputPartitioning = HashPartitioning(exprA :: exprB :: exprC :: Nil, 5)) @@ -645,4 +689,268 @@ class EnsureRequirementsSuite extends SharedSparkSession { } } } + + test("Check with KeyGroupedPartitioning") { + // simplest case: identity transforms + var plan1 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning(exprA :: exprB :: Nil, 5)) + var plan2 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning(exprA :: exprC :: Nil, 5)) + var smjExec = SortMergeJoinExec( + exprA :: exprB :: Nil, exprA :: exprC :: Nil, Inner, None, plan1, plan2) + EnsureRequirements.apply(smjExec) match { + case SortMergeJoinExec(_, _, _, _, + SortExec(_, _, DummySparkPlan(_, _, left: KeyGroupedPartitioning, _, _), _), + SortExec(_, _, DummySparkPlan(_, _, right: KeyGroupedPartitioning, _, _), _), _) => + assert(left.expressions === Seq(exprA, exprB)) + assert(right.expressions === Seq(exprA, exprC)) + case other => fail(other.toString) + } + + // matching bucket transforms from both sides + plan1 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning( + bucket(4, exprA) :: bucket(16, exprB) :: Nil, 4) + ) + plan2 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning( + bucket(4, exprA) :: bucket(16, exprC) :: Nil, 4) + ) + smjExec = SortMergeJoinExec( + exprA :: exprB :: Nil, exprA :: exprC :: Nil, Inner, None, plan1, plan2) + EnsureRequirements.apply(smjExec) match { + case SortMergeJoinExec(_, _, _, _, + SortExec(_, _, DummySparkPlan(_, _, left: KeyGroupedPartitioning, _, _), _), + SortExec(_, _, DummySparkPlan(_, _, right: KeyGroupedPartitioning, _, _), _), _) => + assert(left.expressions === Seq(bucket(4, exprA), bucket(16, exprB))) + assert(right.expressions === Seq(bucket(4, exprA), bucket(16, exprC))) + case other => fail(other.toString) + } + + // partition collections + plan1 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning( + bucket(4, exprA) :: bucket(16, exprB) :: Nil, 4) + ) + plan2 = DummySparkPlan( + outputPartitioning = PartitioningCollection(Seq( + KeyGroupedPartitioning(bucket(4, exprA) :: bucket(16, exprC) :: Nil, 4), + HashPartitioning(exprA :: exprC :: Nil, 4)) + ) + ) + smjExec = SortMergeJoinExec( + exprA :: exprB :: Nil, exprA :: exprC :: Nil, Inner, None, plan1, plan2) + EnsureRequirements.apply(smjExec) match { + case SortMergeJoinExec(_, _, _, _, + SortExec(_, _, DummySparkPlan(_, _, left: KeyGroupedPartitioning, _, _), _), + SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), _) => + assert(left.expressions === Seq(bucket(4, exprA), bucket(16, exprB))) + case other => fail(other.toString) + } + smjExec = SortMergeJoinExec( + exprA :: exprC :: Nil, exprA :: exprB :: Nil, Inner, None, plan2, plan1) + EnsureRequirements.apply(smjExec) match { + case SortMergeJoinExec(_, _, _, _, + SortExec(_, _, DummySparkPlan(_, _, _: PartitioningCollection, _, _), _), + SortExec(_, _, DummySparkPlan(_, _, right: KeyGroupedPartitioning, _, _), _), _) => + assert(right.expressions === Seq(bucket(4, exprA), bucket(16, exprB))) + case other => fail(other.toString) + } + + // bucket + years transforms from both sides + plan1 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning(bucket(4, exprA) :: years(exprB) :: Nil, 4) + ) + plan2 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning(bucket(4, exprA) :: years(exprC) :: Nil, 4) + ) + smjExec = SortMergeJoinExec( + exprA :: exprB :: Nil, exprA :: exprC :: Nil, Inner, None, plan1, plan2) + EnsureRequirements.apply(smjExec) match { + case SortMergeJoinExec(_, _, _, _, + SortExec(_, _, DummySparkPlan(_, _, left: KeyGroupedPartitioning, _, _), _), + SortExec(_, _, DummySparkPlan(_, _, right: KeyGroupedPartitioning, _, _), _), _) => + assert(left.expressions === Seq(bucket(4, exprA), years(exprB))) + assert(right.expressions === Seq(bucket(4, exprA), years(exprC))) + case other => fail(other.toString) + } + + // by default spark.sql.requireAllClusterKeysForCoPartition is true, so when there isn't + // exact match on all partition keys, Spark will fallback to shuffle. + plan1 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning( + bucket(4, exprA) :: bucket(4, exprB) :: Nil, 4) + ) + plan2 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning( + bucket(4, exprA) :: bucket(4, exprC) :: Nil, 4) + ) + smjExec = SortMergeJoinExec( + exprA :: exprB :: exprB :: Nil, exprA :: exprC :: exprC :: Nil, Inner, None, plan1, plan2) + EnsureRequirements.apply(smjExec) match { + case SortMergeJoinExec(_, _, _, _, + SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _), + SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) => + assert(left.expressions === Seq(exprA, exprB, exprB)) + assert(right.expressions === Seq(exprA, exprC, exprC)) + case other => fail(other.toString) + } + } + + test(s"KeyGroupedPartitioning with ${REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION.key} = false") { + var plan1 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning( + bucket(4, exprB) :: years(exprC) :: Nil, 4) + ) + var plan2 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning( + bucket(4, exprC) :: years(exprB) :: Nil, 4) + ) + + // simple case + var smjExec = SortMergeJoinExec( + exprA :: exprB :: exprC :: Nil, exprA :: exprC :: exprB :: Nil, Inner, None, plan1, plan2) + applyEnsureRequirementsWithSubsetKeys(smjExec) match { + case SortMergeJoinExec(_, _, _, _, + SortExec(_, _, DummySparkPlan(_, _, left: KeyGroupedPartitioning, _, _), _), + SortExec(_, _, DummySparkPlan(_, _, right: KeyGroupedPartitioning, _, _), _), _) => + assert(left.expressions === Seq(bucket(4, exprB), years(exprC))) + assert(right.expressions === Seq(bucket(4, exprC), years(exprB))) + case other => fail(other.toString) + } + + // should also work with distributions with duplicated keys + plan1 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning( + bucket(4, exprA) :: years(exprB) :: Nil, 4) + ) + plan2 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning( + bucket(4, exprA) :: years(exprC) :: Nil, 4) + ) + smjExec = SortMergeJoinExec( + exprA :: exprB :: exprB :: Nil, exprA :: exprC :: exprC :: Nil, Inner, None, plan1, plan2) + applyEnsureRequirementsWithSubsetKeys(smjExec) match { + case SortMergeJoinExec(_, _, _, _, + SortExec(_, _, DummySparkPlan(_, _, left: KeyGroupedPartitioning, _, _), _), + SortExec(_, _, DummySparkPlan(_, _, right: KeyGroupedPartitioning, _, _), _), _) => + assert(left.expressions === Seq(bucket(4, exprA), years(exprB))) + assert(right.expressions === Seq(bucket(4, exprA), years(exprC))) + case other => fail(other.toString) + } + + // both partitioning and distribution have duplicated keys + plan1 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning( + years(exprA) :: bucket(4, exprB) :: days(exprA) :: Nil, 5)) + plan2 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning( + years(exprA) :: bucket(4, exprC) :: days(exprA) :: Nil, 5)) + smjExec = SortMergeJoinExec( + exprA :: exprB :: exprB :: Nil, exprA :: exprC :: exprC :: Nil, Inner, None, plan1, plan2) + applyEnsureRequirementsWithSubsetKeys(smjExec) match { + case SortMergeJoinExec(_, _, _, _, + SortExec(_, _, DummySparkPlan(_, _, left: KeyGroupedPartitioning, _, _), _), + SortExec(_, _, DummySparkPlan(_, _, right: KeyGroupedPartitioning, _, _), _), _) => + assert(left.expressions === Seq(years(exprA), bucket(4, exprB), days(exprA))) + assert(right.expressions === Seq(years(exprA), bucket(4, exprC), days(exprA))) + case other => fail(other.toString) + } + + // invalid case: partitioning key positions don't match + plan1 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning( + bucket(4, exprA) :: bucket(4, exprB) :: Nil, 4) + ) + plan2 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning( + bucket(4, exprB) :: bucket(4, exprC) :: Nil, 4) + ) + + smjExec = SortMergeJoinExec( + exprA :: exprB :: exprC :: Nil, exprA :: exprB :: exprC :: Nil, Inner, None, plan1, plan2) + applyEnsureRequirementsWithSubsetKeys(smjExec) match { + case SortMergeJoinExec(_, _, _, _, + SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _), + SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) => + assert(left.expressions === Seq(exprA, exprB, exprC)) + assert(right.expressions === Seq(exprA, exprB, exprC)) + case other => fail(other.toString) + } + + // invalid case: different number of buckets (we don't support coalescing/repartitioning yet) + plan1 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning( + bucket(4, exprA) :: bucket(4, exprB) :: Nil, 4) + ) + plan2 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning( + bucket(4, exprA) :: bucket(8, exprC) :: Nil, 4) + ) + smjExec = SortMergeJoinExec( + exprA :: exprB :: exprB :: Nil, exprA :: exprC :: exprC :: Nil, Inner, None, plan1, plan2) + applyEnsureRequirementsWithSubsetKeys(smjExec) match { + case SortMergeJoinExec(_, _, _, _, + SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _), + SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) => + assert(left.expressions === Seq(exprA, exprB, exprB)) + assert(right.expressions === Seq(exprA, exprC, exprC)) + case other => fail(other.toString) + } + + // invalid case: partition key positions match but with different transforms + plan1 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning(years(exprA) :: bucket(4, exprB) :: Nil, 4) + ) + plan2 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning(days(exprA) :: bucket(4, exprC) :: Nil, 4) + ) + smjExec = SortMergeJoinExec( + exprA :: exprB :: exprB :: Nil, exprA :: exprC :: exprC :: Nil, Inner, None, plan1, plan2) + applyEnsureRequirementsWithSubsetKeys(smjExec) match { + case SortMergeJoinExec(_, _, _, _, + SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _), + SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) => + assert(left.expressions === Seq(exprA, exprB, exprB)) + assert(right.expressions === Seq(exprA, exprC, exprC)) + case other => fail(other.toString) + } + + + // invalid case: multiple references in transform + plan1 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning( + years(exprA) :: buckets(4, Seq(exprB, exprC)) :: Nil, 4) + ) + plan2 = DummySparkPlan( + outputPartitioning = KeyGroupedPartitioning( + years(exprA) :: buckets(4, Seq(exprB, exprC)) :: Nil, 4) + ) + smjExec = SortMergeJoinExec( + exprA :: exprB :: exprB :: Nil, exprA :: exprC :: exprC :: Nil, Inner, None, plan1, plan2) + applyEnsureRequirementsWithSubsetKeys(smjExec) match { + case SortMergeJoinExec(_, _, _, _, + SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _), + SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) => + assert(left.expressions === Seq(exprA, exprB, exprB)) + assert(right.expressions === Seq(exprA, exprC, exprC)) + case other => fail(other.toString) + } + } + + def bucket(numBuckets: Int, expr: Expression): TransformExpression = { + TransformExpression(BucketFunction, Seq(expr), Some(numBuckets)) + } + + def buckets(numBuckets: Int, expr: Seq[Expression]): TransformExpression = { + TransformExpression(BucketFunction, expr, Some(numBuckets)) + } + + def years(expr: Expression): TransformExpression = { + TransformExpression(YearsFunction, Seq(expr)) + } + + def days(expr: Expression): TransformExpression = { + TransformExpression(DaysFunction, Seq(expr)) + } } From b852645f69b3b7a0a2140a732c4c03b302f8795a Mon Sep 17 00:00:00 2001 From: dch nguyen Date: Tue, 5 Apr 2022 16:19:01 +0200 Subject: [PATCH 099/535] [SPARK-37423][PYTHON] Inline type hints for fpm.py in python/pyspark/mllib ### What changes were proposed in this pull request? Inline type hints for fpm.py, test.py in python/pyspark/mllib/ ### Why are the changes needed? We can take advantage of static type checking within the functions by inlining the type hints. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests Closes #35067 from dchvn/fpm_2. Authored-by: dch nguyen Signed-off-by: zero323 (cherry picked from commit 93f646dd00ba8b3370bb904ba91862c407c62cc2) Signed-off-by: zero323 --- python/pyspark/mllib/fpm.py | 42 +++++++++++++++++++-------- python/pyspark/mllib/fpm.pyi | 55 ------------------------------------ 2 files changed, 30 insertions(+), 67 deletions(-) delete mode 100644 python/pyspark/mllib/fpm.pyi diff --git a/python/pyspark/mllib/fpm.py b/python/pyspark/mllib/fpm.py index a349a55193e52..4b26ca6422969 100644 --- a/python/pyspark/mllib/fpm.py +++ b/python/pyspark/mllib/fpm.py @@ -17,17 +17,20 @@ import sys -from collections import namedtuple +from typing import Any, Generic, List, NamedTuple, TypeVar -from pyspark import since +from pyspark import since, SparkContext from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc from pyspark.mllib.util import JavaSaveable, JavaLoader, inherit_doc +from pyspark.rdd import RDD __all__ = ["FPGrowth", "FPGrowthModel", "PrefixSpan", "PrefixSpanModel"] +T = TypeVar("T") + @inherit_doc -class FPGrowthModel(JavaModelWrapper, JavaSaveable, JavaLoader): +class FPGrowthModel(JavaModelWrapper, JavaSaveable, JavaLoader["FPGrowthModel"]): """ A FP-Growth model for mining frequent itemsets using the Parallel FP-Growth algorithm. @@ -49,7 +52,7 @@ class FPGrowthModel(JavaModelWrapper, JavaSaveable, JavaLoader): """ @since("1.4.0") - def freqItemsets(self): + def freqItemsets(self) -> RDD["FPGrowth.FreqItemset"]: """ Returns the frequent itemsets of this model. """ @@ -57,11 +60,12 @@ def freqItemsets(self): @classmethod @since("2.0.0") - def load(cls, sc, path): + def load(cls, sc: SparkContext, path: str) -> "FPGrowthModel": """ Load a model from the given path. """ model = cls._load_java(sc, path) + assert sc._jvm is not None wrapper = sc._jvm.org.apache.spark.mllib.api.python.FPGrowthModelWrapper(model) return FPGrowthModel(wrapper) @@ -74,7 +78,9 @@ class FPGrowth: """ @classmethod - def train(cls, data, minSupport=0.3, numPartitions=-1): + def train( + cls, data: RDD[List[T]], minSupport: float = 0.3, numPartitions: int = -1 + ) -> "FPGrowthModel": """ Computes an FP-Growth model that contains frequent itemsets. @@ -95,16 +101,19 @@ def train(cls, data, minSupport=0.3, numPartitions=-1): model = callMLlibFunc("trainFPGrowthModel", data, float(minSupport), int(numPartitions)) return FPGrowthModel(model) - class FreqItemset(namedtuple("FreqItemset", ["items", "freq"])): + class FreqItemset(NamedTuple): """ Represents an (items, freq) tuple. .. versionadded:: 1.4.0 """ + items: List[Any] + freq: int + @inherit_doc -class PrefixSpanModel(JavaModelWrapper): +class PrefixSpanModel(JavaModelWrapper, Generic[T]): """ Model fitted by PrefixSpan @@ -124,7 +133,7 @@ class PrefixSpanModel(JavaModelWrapper): """ @since("1.6.0") - def freqSequences(self): + def freqSequences(self) -> RDD["PrefixSpan.FreqSequence"]: """Gets frequent sequences""" return self.call("getFreqSequences").map(lambda x: PrefixSpan.FreqSequence(x[0], x[1])) @@ -144,7 +153,13 @@ class PrefixSpan: """ @classmethod - def train(cls, data, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=32000000): + def train( + cls, + data: RDD[List[List[T]]], + minSupport: float = 0.1, + maxPatternLength: int = 10, + maxLocalProjDBSize: int = 32000000, + ) -> PrefixSpanModel[T]: """ Finds the complete set of frequent sequential patterns in the input sequences of itemsets. @@ -177,15 +192,18 @@ def train(cls, data, minSupport=0.1, maxPatternLength=10, maxLocalProjDBSize=320 ) return PrefixSpanModel(model) - class FreqSequence(namedtuple("FreqSequence", ["sequence", "freq"])): + class FreqSequence(NamedTuple): """ Represents a (sequence, freq) tuple. .. versionadded:: 1.6.0 """ + sequence: List[List[Any]] + freq: int + -def _test(): +def _test() -> None: import doctest from pyspark.sql import SparkSession import pyspark.mllib.fpm diff --git a/python/pyspark/mllib/fpm.pyi b/python/pyspark/mllib/fpm.pyi deleted file mode 100644 index 4c5d9c5004326..0000000000000 --- a/python/pyspark/mllib/fpm.pyi +++ /dev/null @@ -1,55 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import Generic, List, TypeVar -from pyspark.context import SparkContext -from pyspark.rdd import RDD -from pyspark.mllib.common import JavaModelWrapper -from pyspark.mllib.util import JavaSaveable, JavaLoader - -T = TypeVar("T") - -class FPGrowthModel(JavaModelWrapper, JavaSaveable, JavaLoader[FPGrowthModel], Generic[T]): - def freqItemsets(self) -> RDD[FPGrowth.FreqItemset[T]]: ... - @classmethod - def load(cls, sc: SparkContext, path: str) -> FPGrowthModel: ... - -class FPGrowth: - @classmethod - def train( - cls, data: RDD[List[T]], minSupport: float = ..., numPartitions: int = ... - ) -> FPGrowthModel[T]: ... - class FreqItemset(Generic[T]): - items: List[T] - freq: int - -class PrefixSpanModel(JavaModelWrapper, Generic[T]): - def freqSequences(self) -> RDD[PrefixSpan.FreqSequence[T]]: ... - -class PrefixSpan: - @classmethod - def train( - cls, - data: RDD[List[List[T]]], - minSupport: float = ..., - maxPatternLength: int = ..., - maxLocalProjDBSize: int = ..., - ) -> PrefixSpanModel[T]: ... - class FreqSequence(tuple, Generic[T]): - sequence: List[T] - freq: int From 23fa70e9b2b5c896a12f95173dd581d9044b85a7 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Sun, 3 Apr 2022 00:47:40 -0700 Subject: [PATCH 100/535] [SPARK-28090][SQL] Improve `replaceAliasButKeepName` performance ### What changes were proposed in this pull request? SPARK-28090 ticket description contains an example query with multiple nested struct creation and field extraction. The following is is an example of the query when the sample code range is set to only 3: ``` Project [struct(num1, numerics#23.num1, num10, numerics#23.num10, num11, numerics#23.num11, num12, numerics#23.num12, num13, numerics#23.num13, num14, numerics#23.num14, num15, numerics#23.num15, num2, numerics#23.num2, num3, numerics#23.num3, num4, numerics#23.num4, num5, numerics#23.num5, num6, numerics#23.num6, num7, numerics#23.num7, num8, numerics#23.num8, num9, numerics#23.num9, out_num1, numerics#23.out_num1, out_num2, -numerics#23.num2) AS numerics#42] +- Project [struct(num1, numerics#5.num1, num10, numerics#5.num10, num11, numerics#5.num11, num12, numerics#5.num12, num13, numerics#5.num13, num14, numerics#5.num14, num15, numerics#5.num15, num2, numerics#5.num2, num3, numerics#5.num3, num4, numerics#5.num4, num5, numerics#5.num5, num6, numerics#5.num6, num7, numerics#5.num7, num8, numerics#5.num8, num9, numerics#5.num9, out_num1, -numerics#5.num1) AS numerics#23] +- LogicalRDD [numerics#5], false ``` If the level of nesting reaches 7 the query plan generation becomes extremely slow on Spark 2.4. That is because both - `CollapseProject` rule is slow and - some of the expression optimization rules running on the huge, not yet simplified expression tree of the single, collapsed `Project` node are slow. On Spark 3.3, after SPARK-36718, `CollapseProject` doesn't collapse such plans so the above issues don't occur, but `PhysicalOperation` extractor has an issue that it also builds up that huge expression tree and then traverses and modifies it in `AliasHelper.replaceAliasButKeepName()`. With a small change in that function we can avoid such costly operations. ### Why are the changes needed? The suggested change reduced the plan generation time of the example query from minutes (range = 7) or hours (range = 8+) to seconds. ### Does this PR introduce _any_ user-facing change? The example query can be executed. ### How was this patch tested? Existing UTs + manual test of the example query in the ticket description. Closes #35382 from peter-toth/SPARK-28090-improve-replacealiasbutkeepname. Authored-by: Peter Toth Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/expressions/AliasHelper.scala | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala index dea7ea0f144bf..888a9869e074e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala @@ -70,11 +70,17 @@ trait AliasHelper { protected def replaceAliasButKeepName( expr: NamedExpression, aliasMap: AttributeMap[Alias]): NamedExpression = { - // Use transformUp to prevent infinite recursion when the replacement expression - // redefines the same ExprId, - trimNonTopLevelAliases(expr.transformUp { + expr match { + // We need to keep the `Alias` if we replace a top-level Attribute, so that it's still a + // `NamedExpression`. We also need to keep the name of the original Attribute. case a: Attribute => aliasMap.get(a).map(_.withName(a.name)).getOrElse(a) - }).asInstanceOf[NamedExpression] + case o => + // Use transformUp to prevent infinite recursion when the replacement expression + // redefines the same ExprId. + o.mapChildren(_.transformUp { + case a: Attribute => aliasMap.get(a).map(_.child).getOrElse(a) + }).asInstanceOf[NamedExpression] + } } protected def trimAliases(e: Expression): Expression = { From fa9cbe21889a3d032687a152ec795ce1dd2db0ff Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 6 Apr 2022 17:26:17 +0900 Subject: [PATCH 101/535] [MINOR][SQL][SS][DOCS] Add varargs to Dataset.observe(String, ..) with a documentation fix ### What changes were proposed in this pull request? This PR proposes two minor changes: - Fixes the example at `Dataset.observe(String, ...)` - Adds `varargs` to be consistent with another overloaded version: `Dataset.observe(Observation, ..)` ### Why are the changes needed? To provide a correct example, support Java APIs properly with `varargs` and API consistency. ### Does this PR introduce _any_ user-facing change? Yes, the example is fixed in the documentation. Additionally Java users should be able to use `Dataset.observe(String, ..)` per `varargs`. ### How was this patch tested? Manually tested. CI should verify the changes too. Closes #36084 from HyukjinKwon/minor-docs. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit fb3f380b3834ca24947a82cb8d87efeae6487664) Signed-off-by: Hyukjin Kwon --- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 564eed1ecfda2..7d16a2f5eee14 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -1979,6 +1979,7 @@ class Dataset[T] private[sql]( * {{{ * // Monitor the metrics using a listener. * spark.streams.addListener(new StreamingQueryListener() { + * override def onQueryStarted(event: QueryStartedEvent): Unit = {} * override def onQueryProgress(event: QueryProgressEvent): Unit = { * event.progress.observedMetrics.asScala.get("my_event").foreach { row => * // Trigger if the number of errors exceeds 5 percent @@ -1990,8 +1991,7 @@ class Dataset[T] private[sql]( * } * } * } - * def onQueryStarted(event: QueryStartedEvent): Unit = {} - * def onQueryTerminated(event: QueryTerminatedEvent): Unit = {} + * override def onQueryTerminated(event: QueryTerminatedEvent): Unit = {} * }) * // Observe row count (rc) and error row count (erc) in the streaming Dataset * val observed_ds = ds.observe("my_event", count(lit(1)).as("rc"), count($"error").as("erc")) @@ -2001,6 +2001,7 @@ class Dataset[T] private[sql]( * @group typedrel * @since 3.0.0 */ + @varargs def observe(name: String, expr: Column, exprs: Column*): Dataset[T] = withTypedPlan { CollectMetrics(name, (expr +: exprs).map(_.named), logicalPlan) } From ae890785c4babdca555272ec318bd8439746848e Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 7 Apr 2022 01:27:28 +0800 Subject: [PATCH 102/535] [SPARK-38762][SQL] Provide query context in Decimal overflow errors ### What changes were proposed in this pull request? Provide query context in Decimal overflow errors: * explicit casting other data types as decimal * implicit casting in decimal operations, including add/subtract/multiply/divide/reminder/pmod ### Why are the changes needed? Provide SQL query context of runtime errors to users, so that they can understand it better. ### Does this PR introduce _any_ user-facing change? Yes, improve the runtime error message of decimal overflow ### How was this patch tested? Existing UT Also test `sql/core/src/test/resources/sql-tests/inputs/cast.sql` under ANSI mode. Closes #36040 from gengliangwang/decimalContext. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit 7de321005b02dcfe61d2954eeb569bf54ac6e78f) Signed-off-by: Gengliang Wang --- .../main/resources/error/error-classes.json | 2 +- .../spark/sql/catalyst/expressions/Cast.scala | 31 +- .../expressions/decimalExpressions.scala | 29 +- .../expressions/mathExpressions.scala | 2 +- .../sql/errors/QueryExecutionErrors.scala | 11 +- .../org/apache/spark/sql/types/Decimal.scala | 6 +- .../resources/sql-tests/inputs/ansi/cast.sql | 1 + .../test/resources/sql-tests/inputs/cast.sql | 3 + .../sql-tests/results/ansi/cast.sql.out | 534 ++++++++++++++++++ .../ansi/decimalArithmeticOperations.sql.out | 12 + .../sql-tests/results/ansi/interval.sql.out | 3 + .../resources/sql-tests/results/cast.sql.out | 18 +- 12 files changed, 626 insertions(+), 26 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/ansi/cast.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index c8c18413a9d0c..855d3c5cd6e0e 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -12,7 +12,7 @@ "sqlState" : "22005" }, "CANNOT_CHANGE_DECIMAL_PRECISION" : { - "message" : [ "%s cannot be represented as Decimal(%s, %s). If necessary set %s to false to bypass this error." ], + "message" : [ "%s cannot be represented as Decimal(%s, %s). If necessary set %s to false to bypass this error.%s" ], "sqlState" : "22005" }, "CANNOT_PARSE_DECIMAL" : { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 39463ed122b6e..03ecaecca066c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -793,7 +793,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit null } else { throw QueryExecutionErrors.cannotChangeDecimalPrecisionError( - value, decimalType.precision, decimalType.scale) + value, decimalType.precision, decimalType.scale, origin.context) } } } @@ -984,6 +984,10 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit } } + def errorContextCode(codegenContext: CodegenContext): String = { + codegenContext.addReferenceObj("errCtx", origin.context) + } + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val eval = child.genCode(ctx) val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx) @@ -1320,8 +1324,13 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit } } - private[this] def changePrecision(d: ExprValue, decimalType: DecimalType, - evPrim: ExprValue, evNull: ExprValue, canNullSafeCast: Boolean): Block = { + private[this] def changePrecision( + d: ExprValue, + decimalType: DecimalType, + evPrim: ExprValue, + evNull: ExprValue, + canNullSafeCast: Boolean, + ctx: CodegenContext): Block = { if (canNullSafeCast) { code""" |$d.changePrecision(${decimalType.precision}, ${decimalType.scale}); @@ -1333,7 +1342,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit } else { s""" |throw QueryExecutionErrors.cannotChangeDecimalPrecisionError( - | $d, ${decimalType.precision}, ${decimalType.scale}); + | $d, ${decimalType.precision}, ${decimalType.scale}, ${errorContextCode(ctx)}); """.stripMargin } code""" @@ -1360,20 +1369,20 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit if ($tmp == null) { $evNull = true; } else { - ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast)} + ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast, ctx)} } """ case StringType if ansiEnabled => (c, evPrim, evNull) => code""" Decimal $tmp = Decimal.fromStringANSI($c); - ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast)} + ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast, ctx)} """ case BooleanType => (c, evPrim, evNull) => code""" Decimal $tmp = $c ? Decimal.apply(1) : Decimal.apply(0); - ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast)} + ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast, ctx)} """ case DateType => // date can't cast to decimal in Hive @@ -1384,19 +1393,19 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit code""" Decimal $tmp = Decimal.apply( scala.math.BigDecimal.valueOf(${timestampToDoubleCode(c)})); - ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast)} + ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast, ctx)} """ case DecimalType() => (c, evPrim, evNull) => code""" Decimal $tmp = $c.clone(); - ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast)} + ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast, ctx)} """ case x: IntegralType => (c, evPrim, evNull) => code""" Decimal $tmp = Decimal.apply((long) $c); - ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast)} + ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast, ctx)} """ case x: FractionalType => // All other numeric types can be represented precisely as Doubles @@ -1404,7 +1413,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit code""" try { Decimal $tmp = Decimal.apply(scala.math.BigDecimal.valueOf((double) $c)); - ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast)} + ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast, ctx)} } catch (java.lang.NumberFormatException e) { $evNull = true; } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala index 8116537d7b06d..4a4b8e0fc0dfd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala @@ -137,15 +137,23 @@ case class CheckOverflow( dataType.precision, dataType.scale, Decimal.ROUND_HALF_UP, - nullOnOverflow) + nullOnOverflow, + origin.context) override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val errorContextCode = if (nullOnOverflow) { + ctx.addReferenceObj("errCtx", origin.context) + } else { + "\"\"" + } nullSafeCodeGen(ctx, ev, eval => { + // scalastyle:off line.size.limit s""" |${ev.value} = $eval.toPrecision( - | ${dataType.precision}, ${dataType.scale}, Decimal.ROUND_HALF_UP(), $nullOnOverflow); + | ${dataType.precision}, ${dataType.scale}, Decimal.ROUND_HALF_UP(), $nullOnOverflow, $errorContextCode); |${ev.isNull} = ${ev.value} == null; """.stripMargin + // scalastyle:on line.size.limit }) } @@ -168,23 +176,31 @@ case class CheckOverflowInSum( override def eval(input: InternalRow): Any = { val value = child.eval(input) if (value == null) { - if (nullOnOverflow) null else throw QueryExecutionErrors.overflowInSumOfDecimalError + if (nullOnOverflow) null + else throw QueryExecutionErrors.overflowInSumOfDecimalError(origin.context) } else { value.asInstanceOf[Decimal].toPrecision( dataType.precision, dataType.scale, Decimal.ROUND_HALF_UP, - nullOnOverflow) + nullOnOverflow, + origin.context) } } override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val childGen = child.genCode(ctx) + val errorContextCode = if (nullOnOverflow) { + ctx.addReferenceObj("errCtx", origin.context) + } else { + "\"\"" + } val nullHandling = if (nullOnOverflow) { "" } else { - s"throw QueryExecutionErrors.overflowInSumOfDecimalError();" + s"throw QueryExecutionErrors.overflowInSumOfDecimalError($errorContextCode);" } + // scalastyle:off line.size.limit val code = code""" |${childGen.code} |boolean ${ev.isNull} = ${childGen.isNull}; @@ -193,10 +209,11 @@ case class CheckOverflowInSum( | $nullHandling |} else { | ${ev.value} = ${childGen.value}.toPrecision( - | ${dataType.precision}, ${dataType.scale}, Decimal.ROUND_HALF_UP(), $nullOnOverflow); + | ${dataType.precision}, ${dataType.scale}, Decimal.ROUND_HALF_UP(), $nullOnOverflow, $errorContextCode); | ${ev.isNull} = ${ev.value} == null; |} |""".stripMargin + // scalastyle:on line.size.limit ev.copy(code = code) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala index f64b6ea078a46..d8a20f1a6c8b7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala @@ -1530,7 +1530,7 @@ abstract class RoundBase(child: Expression, scale: Expression, if (_scale >= 0) { s""" ${ev.value} = ${ce.value}.toPrecision(${ce.value}.precision(), $s, - Decimal.$modeStr(), true); + Decimal.$modeStr(), true, ""); ${ev.isNull} = ${ev.value} == null;""" } else { s""" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index ccf0219f2e12f..4c75bdf234155 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -95,10 +95,13 @@ object QueryExecutionErrors { } def cannotChangeDecimalPrecisionError( - value: Decimal, decimalPrecision: Int, decimalScale: Int): ArithmeticException = { + value: Decimal, + decimalPrecision: Int, + decimalScale: Int, + context: String): ArithmeticException = { new SparkArithmeticException(errorClass = "CANNOT_CHANGE_DECIMAL_PRECISION", messageParameters = Array(value.toDebugString, - decimalPrecision.toString, decimalScale.toString, SQLConf.ANSI_ENABLED.key)) + decimalPrecision.toString, decimalScale.toString, SQLConf.ANSI_ENABLED.key, context)) } def invalidInputSyntaxForNumericError(e: NumberFormatException): NumberFormatException = { @@ -215,8 +218,8 @@ object QueryExecutionErrors { ansiIllegalArgumentError(e.getMessage) } - def overflowInSumOfDecimalError(): ArithmeticException = { - arithmeticOverflowError("Overflow in sum of decimals") + def overflowInSumOfDecimalError(context: String): ArithmeticException = { + arithmeticOverflowError("Overflow in sum of decimals", errorContext = context) } def overflowInIntegralDivideError(context: String): ArithmeticException = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala index bbf902849e7f9..39c7e6ba58007 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala @@ -356,7 +356,8 @@ final class Decimal extends Ordered[Decimal] with Serializable { precision: Int, scale: Int, roundMode: BigDecimal.RoundingMode.Value = ROUND_HALF_UP, - nullOnOverflow: Boolean = true): Decimal = { + nullOnOverflow: Boolean = true, + context: String = ""): Decimal = { val copy = clone() if (copy.changePrecision(precision, scale, roundMode)) { copy @@ -364,7 +365,8 @@ final class Decimal extends Ordered[Decimal] with Serializable { if (nullOnOverflow) { null } else { - throw QueryExecutionErrors.cannotChangeDecimalPrecisionError(this, precision, scale) + throw QueryExecutionErrors.cannotChangeDecimalPrecisionError( + this, precision, scale, context) } } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/cast.sql new file mode 100644 index 0000000000000..b16ee89ac67ec --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/cast.sql @@ -0,0 +1 @@ +--IMPORT cast.sql diff --git a/sql/core/src/test/resources/sql-tests/inputs/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/cast.sql index 21737cd0aab0a..39095cb8ce032 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cast.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cast.sql @@ -84,3 +84,6 @@ select cast('1中文' as bigint); select cast('\t\t true \n\r ' as boolean); select cast('\t\n false \t\r' as boolean); select cast('\t\n xyz \t\r' as boolean); + +select cast('23.45' as decimal(4, 2)); +select cast('123.45' as decimal(4, 2)); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out new file mode 100644 index 0000000000000..6e45fe8dce938 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out @@ -0,0 +1,534 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 57 + + +-- !query +SELECT CAST('1.23' AS int) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: 1.23. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +SELECT CAST('1.23' AS long) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: 1.23. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +SELECT CAST('-4.56' AS int) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: -4.56. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +SELECT CAST('-4.56' AS long) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: -4.56. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +SELECT CAST('abc' AS int) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: abc. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +SELECT CAST('abc' AS long) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: abc. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +SELECT CAST('1234567890123' AS int) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: 1234567890123. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +SELECT CAST('12345678901234567890123' AS long) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: 12345678901234567890123. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +SELECT CAST('' AS int) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: . To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +SELECT CAST('' AS long) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: . To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +SELECT CAST(NULL AS int) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT CAST(NULL AS long) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT CAST('123.a' AS int) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: 123.a. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +SELECT CAST('123.a' AS long) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: 123.a. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +SELECT CAST('-2147483648' AS int) +-- !query schema +struct +-- !query output +-2147483648 + + +-- !query +SELECT CAST('-2147483649' AS int) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: -2147483649. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +SELECT CAST('2147483647' AS int) +-- !query schema +struct +-- !query output +2147483647 + + +-- !query +SELECT CAST('2147483648' AS int) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: 2147483648. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +SELECT CAST('-9223372036854775808' AS long) +-- !query schema +struct +-- !query output +-9223372036854775808 + + +-- !query +SELECT CAST('-9223372036854775809' AS long) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: -9223372036854775809. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +SELECT CAST('9223372036854775807' AS long) +-- !query schema +struct +-- !query output +9223372036854775807 + + +-- !query +SELECT CAST('9223372036854775808' AS long) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: 9223372036854775808. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +SELECT HEX(CAST('abc' AS binary)) +-- !query schema +struct +-- !query output +616263 + + +-- !query +SELECT HEX(CAST(CAST(123 AS byte) AS binary)) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'CAST(CAST(123 AS TINYINT) AS BINARY)' due to data type mismatch: + cannot cast tinyint to binary with ANSI mode on. + If you have to cast tinyint to binary, you can set spark.sql.ansi.enabled as false. +; line 1 pos 11 + + +-- !query +SELECT HEX(CAST(CAST(-123 AS byte) AS binary)) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'CAST(CAST(-123 AS TINYINT) AS BINARY)' due to data type mismatch: + cannot cast tinyint to binary with ANSI mode on. + If you have to cast tinyint to binary, you can set spark.sql.ansi.enabled as false. +; line 1 pos 11 + + +-- !query +SELECT HEX(CAST(123S AS binary)) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'CAST(123S AS BINARY)' due to data type mismatch: + cannot cast smallint to binary with ANSI mode on. + If you have to cast smallint to binary, you can set spark.sql.ansi.enabled as false. +; line 1 pos 11 + + +-- !query +SELECT HEX(CAST(-123S AS binary)) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'CAST(-123S AS BINARY)' due to data type mismatch: + cannot cast smallint to binary with ANSI mode on. + If you have to cast smallint to binary, you can set spark.sql.ansi.enabled as false. +; line 1 pos 11 + + +-- !query +SELECT HEX(CAST(123 AS binary)) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'CAST(123 AS BINARY)' due to data type mismatch: + cannot cast int to binary with ANSI mode on. + If you have to cast int to binary, you can set spark.sql.ansi.enabled as false. +; line 1 pos 11 + + +-- !query +SELECT HEX(CAST(-123 AS binary)) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'CAST(-123 AS BINARY)' due to data type mismatch: + cannot cast int to binary with ANSI mode on. + If you have to cast int to binary, you can set spark.sql.ansi.enabled as false. +; line 1 pos 11 + + +-- !query +SELECT HEX(CAST(123L AS binary)) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'CAST(123L AS BINARY)' due to data type mismatch: + cannot cast bigint to binary with ANSI mode on. + If you have to cast bigint to binary, you can set spark.sql.ansi.enabled as false. +; line 1 pos 11 + + +-- !query +SELECT HEX(CAST(-123L AS binary)) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +cannot resolve 'CAST(-123L AS BINARY)' due to data type mismatch: + cannot cast bigint to binary with ANSI mode on. + If you have to cast bigint to binary, you can set spark.sql.ansi.enabled as false. +; line 1 pos 11 + + +-- !query +DESC FUNCTION boolean +-- !query schema +struct +-- !query output +Class: org.apache.spark.sql.catalyst.expressions.Cast +Function: boolean +Usage: boolean(expr) - Casts the value `expr` to the target data type `boolean`. + + +-- !query +DESC FUNCTION EXTENDED boolean +-- !query schema +struct +-- !query output +Class: org.apache.spark.sql.catalyst.expressions.Cast +Extended Usage: + No example/argument for boolean. + + Since: 2.0.1 + +Function: boolean +Usage: boolean(expr) - Casts the value `expr` to the target data type `boolean`. + + +-- !query +SELECT CAST('interval 3 month 1 hour' AS interval) +-- !query schema +struct +-- !query output +3 months 1 hours + + +-- !query +SELECT CAST("interval '3-1' year to month" AS interval year to month) +-- !query schema +struct +-- !query output +3-1 + + +-- !query +SELECT CAST("interval '3 00:00:01' day to second" AS interval day to second) +-- !query schema +struct +-- !query output +3 00:00:01.000000000 + + +-- !query +SELECT CAST(interval 3 month 1 hour AS string) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.catalyst.parser.ParseException + +Cannot mix year-month and day-time fields: interval 3 month 1 hour(line 1, pos 12) + +== SQL == +SELECT CAST(interval 3 month 1 hour AS string) +------------^^^ + + +-- !query +SELECT CAST(interval 3 year 1 month AS string) +-- !query schema +struct +-- !query output +INTERVAL '3-1' YEAR TO MONTH + + +-- !query +SELECT CAST(interval 3 day 1 second AS string) +-- !query schema +struct +-- !query output +INTERVAL '3 00:00:01' DAY TO SECOND + + +-- !query +select cast(' 1' as tinyint) +-- !query schema +struct +-- !query output +1 + + +-- !query +select cast(' 1\t' as tinyint) +-- !query schema +struct +-- !query output +1 + + +-- !query +select cast(' 1' as smallint) +-- !query schema +struct +-- !query output +1 + + +-- !query +select cast(' 1' as INT) +-- !query schema +struct +-- !query output +1 + + +-- !query +select cast(' 1' as bigint) +-- !query schema +struct +-- !query output +1 + + +-- !query +select cast(' 1' as float) +-- !query schema +struct +-- !query output +1.0 + + +-- !query +select cast(' 1 ' as DOUBLE) +-- !query schema +struct +-- !query output +1.0 + + +-- !query +select cast('1.0 ' as DEC) +-- !query schema +struct +-- !query output +1 + + +-- !query +select cast('1中文' as tinyint) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: 1中文. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +select cast('1中文' as smallint) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: 1中文. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +select cast('1中文' as INT) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: 1中文. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +select cast('中文1' as bigint) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: 中文1. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +select cast('1中文' as bigint) +-- !query schema +struct<> +-- !query output +java.lang.NumberFormatException +invalid input syntax for type numeric: 1中文. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +select cast('\t\t true \n\r ' as boolean) +-- !query schema +struct +-- !query output +true + + +-- !query +select cast('\t\n false \t\r' as boolean) +-- !query schema +struct +-- !query output +false + + +-- !query +select cast('\t\n xyz \t\r' as boolean) +-- !query schema +struct<> +-- !query output +java.lang.UnsupportedOperationException +invalid input syntax for type boolean: + xyz . To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. + + +-- !query +select cast('23.45' as decimal(4, 2)) +-- !query schema +struct +-- !query output +23.45 + + +-- !query +select cast('123.45' as decimal(4, 2)) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkArithmeticException +Decimal(expanded,123.45,5,2}) cannot be represented as Decimal(4, 2). If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select cast('123.45' as decimal(4, 2)) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out index 375c1e332fc04..8cd1f6fba3b90 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out @@ -77,6 +77,9 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Decimal(expanded,10000000000000000000000000000000000000.1,39,1}) cannot be represented as Decimal(38, 1). If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select (5e36BD + 0.1) + 5e36BD + ^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -86,6 +89,9 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Decimal(expanded,-11000000000000000000000000000000000000.1,39,1}) cannot be represented as Decimal(38, 1). If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select (-4e36BD - 0.1) - 7e36BD + ^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -95,6 +101,9 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Decimal(expanded,152415787532388367501905199875019052100,39,0}) cannot be represented as Decimal(38, 2). If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select 12345678901234567890.0 * 12345678901234567890.0 + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -104,6 +113,9 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Decimal(expanded,1000000000000000000000000000000000000,37,0}) cannot be represented as Decimal(38, 6). If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select 1e35BD / 0.1 + ^^^^^^^^^^^^ -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index f28e530ae4faf..1f82263843232 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -647,6 +647,9 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Decimal(expanded,1234567890123456789,20,0}) cannot be represented as Decimal(18, 6). If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out index 9bd3a4e26a9e8..79a1e28a143b5 100644 --- a/sql/core/src/test/resources/sql-tests/results/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 55 +-- Number of queries: 57 -- !query @@ -458,3 +458,19 @@ struct -- !query output NULL + + +-- !query +select cast('23.45' as decimal(4, 2)) +-- !query schema +struct +-- !query output +23.45 + + +-- !query +select cast('123.45' as decimal(4, 2)) +-- !query schema +struct +-- !query output +NULL From 00292543ab6b1f86ef920a4840dc23759e0b9e44 Mon Sep 17 00:00:00 2001 From: Anish Shrigondekar Date: Thu, 7 Apr 2022 05:51:57 +0900 Subject: [PATCH 103/535] [SPARK-38787][SS] Replace found value with non-null element in the remaining list for key and remove remaining null elements from values in keyWithIndexToValue store for stream-stream joins MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? In stream-stream joins, for removing old state (watermark by value), we call the `removeByValue` function with a removal condition. Within the iterator returned, if we find null at the end for matched value at non-last index, we are currently not removing and swapping the matched value. With this change, we will find the first non-null value from end and swap current index with that value and remove all elements from index + 1 to the end and then drop the last element as before. ### Why are the changes needed? This change fixes a bug where we were not replacing found/matching values for `removeByValue` when encountering nulls in the symmetric hash join code. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added a unit test for this change with nulls added. Here is a sample output: ``` Executing tests from //sql/core:org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManagerSuite-hive-2.3__hadoop-3.2 ----------------------------------------------------------------------------- 2022-04-01 21:11:59,641 INFO CodeGenerator - Code generated in 225.884757 ms 2022-04-01 21:11:59,662 INFO CodeGenerator - Code generated in 10.870786 ms Run starting. Expected test count is: 4 … ===== TEST OUTPUT FOR o.a.s.sql.execution.streaming.state.SymmetricHashJoinStateManagerSuite: 'StreamingJoinStateManager V2 - all operations with nulls' ===== 2022-04-01 21:12:03,487 INFO StateStore - State Store maintenance task started 2022-04-01 21:12:03,508 INFO CheckpointFileManager - Writing atomically to /tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292/0/0/left-keyToNumValues/_metadata/schema using temp file /tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292/0/0/left-keyToNumValues/_metadata/.schema.9bcc39c9-721e-4ee0-b369-fb4f516c4fd6.tmp 2022-04-01 21:12:03,524 INFO CheckpointFileManager - Renamed temp file /tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292/0/0/left-keyToNumValues/_metadata/.schema.9bcc39c9-721e-4ee0-b369-fb4f516c4fd6.tmp to /tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292/0/0/left-keyToNumValues/_metadata/schema 2022-04-01 21:12:03,525 INFO StateStore - Retrieved reference to StateStoreCoordinator: org.apache.spark.sql.execution.streaming.state.StateStoreCoordinatorRef374ccb9 2022-04-01 21:12:03,525 INFO StateStore - Reported that the loaded instance StateStoreProviderId(StateStoreId(/tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292,0,0,left-keyToNumValues),47925997-9891-4025-a36a-3e18bc758b50) is active 2022-04-01 21:12:03,525 INFO HDFSBackedStateStoreProvider - Retrieved version 0 of HDFSStateStoreProvider[id = (op=0,part=0),dir = /tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292/0/0/left-keyToNumValues] for update 2022-04-01 21:12:03,525 INFO SymmetricHashJoinStateManager$KeyToNumValuesStore - Loaded store StateStoreId(/tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292,0,0,left-keyToNumValues) 2022-04-01 21:12:03,541 INFO CheckpointFileManager - Writing atomically to /tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292/0/0/left-keyWithIndexToValue/_metadata/schema using temp file /tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292/0/0/left-keyWithIndexToValue/_metadata/.schema.fcde2229-a4fa-409b-b3eb-751572f06c08.tmp 2022-04-01 21:12:03,556 INFO CheckpointFileManager - Renamed temp file /tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292/0/0/left-keyWithIndexToValue/_metadata/.schema.fcde2229-a4fa-409b-b3eb-751572f06c08.tmp to /tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292/0/0/left-keyWithIndexToValue/_metadata/schema 2022-04-01 21:12:03,558 INFO StateStore - Retrieved reference to StateStoreCoordinator: org.apache.spark.sql.execution.streaming.state.StateStoreCoordinatorRef1ea930eb 2022-04-01 21:12:03,559 INFO StateStore - Reported that the loaded instance StateStoreProviderId(StateStoreId(/tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292,0,0,left-keyWithIndexToValue),47925997-9891-4025-a36a-3e18bc758b50) is active 2022-04-01 21:12:03,559 INFO HDFSBackedStateStoreProvider - Retrieved version 0 of HDFSStateStoreProvider[id = (op=0,part=0),dir = /tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292/0/0/left-keyWithIndexToValue] for update 2022-04-01 21:12:03,559 INFO SymmetricHashJoinStateManager$KeyWithIndexToValueStore - Loaded store StateStoreId(/tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292,0,0,left-keyWithIndexToValue) 2022-04-01 21:12:03,564 INFO CheckpointFileManager - Writing atomically to /tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292/0/0/left-keyWithIndexToValue/1.delta using temp file /tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292/0/0/left-keyWithIndexToValue/.1.delta.86db3ac9-aa68-4a6b-9729-df93dc4b8a45.tmp 2022-04-01 21:12:03,568 INFO CheckpointFileManager - Writing atomically to /tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292/0/0/left-keyToNumValues/1.delta using temp file /tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292/0/0/left-keyToNumValues/.1.delta.9673bc1b-2bbe-412d-a0af-69f237cde31e.tmp 2022-04-01 21:12:03,572 WARN SymmetricHashJoinStateManager - `keyWithIndexToValue` returns a null value for index 4 at current key [false,40,10.0]. 2022-04-01 21:12:03,574 WARN SymmetricHashJoinStateManager - `keyWithIndexToValue` returns a null value for index 3 at current key [false,40,10.0]. 2022-04-01 21:12:03,576 WARN SymmetricHashJoinStateManager - `keyWithIndexToValue` returns a null value for index 3 at current key [false,60,10.0]. 2022-04-01 21:12:03,576 WARN SymmetricHashJoinStateManager - `keyWithIndexToValue` returns a null value for index 1 at current key [false,40,10.0]. 2022-04-01 21:12:03,577 INFO SymmetricHashJoinStateManager$KeyToNumValuesStore - Aborted store StateStoreId(/tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292,0,0,left-keyToNumValues) 2022-04-01 21:12:03,577 INFO HDFSBackedStateStoreProvider - Aborted version 1 for HDFSStateStore[id=(op=0,part=0),dir=/tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292/0/0/left-keyToNumValues] 2022-04-01 21:12:03,577 INFO SymmetricHashJoinStateManager$KeyWithIndexToValueStore - Aborted store StateStoreId(/tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292,0,0,left-keyWithIndexToValue) 2022-04-01 21:12:03,577 INFO HDFSBackedStateStoreProvider - Aborted version 1 for HDFSStateStore[id=(op=0,part=0),dir=/tmp/spark-d94b9f11-e04c-4871-aeea-1d0b5c62e292/0/0/left-keyWithIndexToValue] 2022-04-01 21:12:03,580 INFO StateStore - StateStore stopped 2022-04-01 21:12:03,580 INFO SymmetricHashJoinStateManagerSuite - ===== FINISHED o.a.s.sql.execution.streaming.state.SymmetricHashJoinStateManagerSuite: 'StreamingJoinStateManager V2 - all operations with nulls' ===== … 2022-04-01 21:12:04,205 INFO StateStore - StateStore stopped Run completed in 5 seconds, 908 milliseconds. Total number of tests run: 4 Suites: completed 1, aborted 0 Tests: succeeded 4, failed 0, canceled 0, ignored 0, pending 0 All tests passed. 2022-04-01 21:12:04,605 INFO ShutdownHookManager - Shutdown hook called 2022-04-01 21:12:04,605 INFO ShutdownHookManager - Deleting directory /tmp/spark-37347802-bee5-4e7f-bffe-acb13eda1c5c 2022-04-01 21:12:04,608 INFO ShutdownHookManager - Deleting directory /tmp/spark-9e79a2e1-cec7-4fbf-804a-92e63913f516 ``` Closes #36073 from anishshri-db/bfix/SPARK-38787. Authored-by: Anish Shrigondekar Signed-off-by: Jungtaek Lim (cherry picked from commit 6d9bfb675f3e58c6e7d9facd8cf3f22069c4cc48) Signed-off-by: Jungtaek Lim --- .../state/SymmetricHashJoinStateManager.scala | 46 +++++++++++++- .../SymmetricHashJoinStateManagerSuite.scala | 60 +++++++++++++++++-- 2 files changed, 97 insertions(+), 9 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala index f301d233cb0a0..56c47d564a3b3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala @@ -256,6 +256,16 @@ class SymmetricHashJoinStateManager( return null } + /** + * Find the first non-null value index starting from end + * and going up-to stopIndex. + */ + private def getRightMostNonNullIndex(stopIndex: Long): Option[Long] = { + (numValues - 1 to stopIndex by -1).find { idx => + keyWithIndexToValue.get(currentKey, idx) != null + } + } + override def getNext(): KeyToValuePair = { val currentValue = findNextValueForIndex() @@ -272,12 +282,33 @@ class SymmetricHashJoinStateManager( if (index != numValues - 1) { val valuePairAtMaxIndex = keyWithIndexToValue.get(currentKey, numValues - 1) if (valuePairAtMaxIndex != null) { + // Likely case where last element is non-null and we can simply swap with index. keyWithIndexToValue.put(currentKey, index, valuePairAtMaxIndex.value, valuePairAtMaxIndex.matched) } else { - val projectedKey = getInternalRowOfKeyWithIndex(currentKey) - logWarning(s"`keyWithIndexToValue` returns a null value for index ${numValues - 1} " + - s"at current key $projectedKey.") + // Find the rightmost non null index and swap values with that index, + // if index returned is not the same as the passed one + val nonNullIndex = getRightMostNonNullIndex(index + 1).getOrElse(index) + if (nonNullIndex != index) { + val valuePair = keyWithIndexToValue.get(currentKey, nonNullIndex) + keyWithIndexToValue.put(currentKey, index, valuePair.value, + valuePair.matched) + } + + // If nulls were found at the end, log a warning for the range of null indices. + if (nonNullIndex != numValues - 1) { + logWarning(s"`keyWithIndexToValue` returns a null value for indices " + + s"with range from startIndex=${nonNullIndex + 1} " + + s"and endIndex=${numValues - 1}.") + } + + // Remove all null values from nonNullIndex + 1 onwards + // The nonNullIndex itself will be handled as removing the last entry, + // similar to finding the value as the last element + (numValues - 1 to nonNullIndex + 1 by -1).foreach { removeIndex => + keyWithIndexToValue.remove(currentKey, removeIndex) + numValues -= 1 + } } } keyWithIndexToValue.remove(currentKey, numValues - 1) @@ -324,6 +355,15 @@ class SymmetricHashJoinStateManager( ) } + /** + * Update number of values for a key. + * NOTE: this function is only intended for use in unit tests + * to simulate null values. + */ + private[state] def updateNumValuesTestOnly(key: UnsafeRow, numValues: Long): Unit = { + keyToNumValues.put(key, numValues) + } + /* ===================================================== Private methods and inner classes diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala index 8a03d46d00007..deeebe1fc42bf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala @@ -46,6 +46,12 @@ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter } } + SymmetricHashJoinStateManager.supportedVersions.foreach { version => + test(s"StreamingJoinStateManager V${version} - all operations with nulls") { + testAllOperationsWithNulls(version) + } + } + SymmetricHashJoinStateManager.supportedVersions.foreach { version => test(s"SPARK-35689: StreamingJoinStateManager V${version} - " + "printable key of keyWithIndexToValue") { @@ -68,7 +74,6 @@ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter } } - private def testAllOperations(stateFormatVersion: Int): Unit = { withJoinStateManager(inputValueAttribs, joinKeyExprs, stateFormatVersion) { manager => implicit val mgr = manager @@ -99,11 +104,6 @@ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter assert(get(30) === Seq.empty) // should remove 30 assert(numRows === 0) - def appendAndTest(key: Int, values: Int*): Unit = { - values.foreach { value => append(key, value)} - require(get(key) === values) - } - appendAndTest(40, 100, 200, 300) appendAndTest(50, 125) appendAndTest(60, 275) // prepare for testing removeByValue @@ -130,6 +130,43 @@ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter assert(numRows === 0) } } + + /* Test removeByValue with nulls simulated by updating numValues on the state manager */ + private def testAllOperationsWithNulls(stateFormatVersion: Int): Unit = { + withJoinStateManager(inputValueAttribs, joinKeyExprs, stateFormatVersion) { manager => + implicit val mgr = manager + + appendAndTest(40, 100, 200, 300) + appendAndTest(50, 125) + appendAndTest(60, 275) // prepare for testing removeByValue + assert(numRows === 5) + + updateNumValues(40, 5) // update total values to 5 to create 2 nulls + removeByValue(125) + assert(get(40) === Seq(200, 300)) + assert(get(50) === Seq.empty) + assert(get(60) === Seq(275)) // should remove only some values, not all and nulls + assert(numRows === 3) + + append(40, 50) + assert(get(40) === Seq(50, 200, 300)) + assert(numRows === 4) + updateNumValues(40, 4) // update total values to 4 to create 1 null + + removeByValue(200) + assert(get(40) === Seq(300)) + assert(get(60) === Seq(275)) // should remove only some values, not all and nulls + assert(numRows === 2) + updateNumValues(40, 2) // update total values to simulate nulls + updateNumValues(60, 4) + + removeByValue(300) + assert(get(40) === Seq.empty) + assert(get(60) === Seq.empty) // should remove all values now including nulls + assert(numRows === 0) + } + } + val watermarkMetadata = new MetadataBuilder().putLong(EventTimeWatermark.delayKey, 10).build() val inputValueSchema = new StructType() .add(StructField("time", IntegerType, metadata = watermarkMetadata)) @@ -157,6 +194,17 @@ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter manager.append(toJoinKeyRow(key), toInputValue(value), matched = false) } + def appendAndTest(key: Int, values: Int*) + (implicit manager: SymmetricHashJoinStateManager): Unit = { + values.foreach { value => append(key, value)} + require(get(key) === values) + } + + def updateNumValues(key: Int, numValues: Long) + (implicit manager: SymmetricHashJoinStateManager): Unit = { + manager.updateNumValuesTestOnly(toJoinKeyRow(key), numValues) + } + def get(key: Int)(implicit manager: SymmetricHashJoinStateManager): Seq[Int] = { manager.get(toJoinKeyRow(key)).map(toValueInt).toSeq.sorted } From 01cdfb4b7858e85a89162435ee176dc64b63b700 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 7 Apr 2022 08:52:50 +0300 Subject: [PATCH 104/535] [SPARK-38590][SQL] New SQL function: try_to_binary ### What changes were proposed in this pull request? Add a new SQL function: `try_to_binary`. It is identical to the function `to_binary`, except that it returns NULL results instead of throwing an exception on encoding errors. There is a similar function in Snowflake: https://docs.snowflake.com/en/sql-reference/functions/try_to_binary.html ### Why are the changes needed? Users can manage to finish queries without interruptions by encoding errors. ### Does this PR introduce _any_ user-facing change? Yes, adding a new SQL function: `try_to_binary`. It is identical to the function `to_binary`, except that it returns NULL results instead of throwing an exception on encoding errors. ### How was this patch tested? UT Closes #35897 from gengliangwang/try_to_binary. Authored-by: Gengliang Wang Signed-off-by: Max Gekk (cherry picked from commit becda3339381b3975ed567c156260eda036d7a1b) Signed-off-by: Max Gekk --- .../catalyst/analysis/FunctionRegistry.scala | 1 + .../sql/catalyst/expressions/TryEval.scala | 35 ++++++++++++++++++ .../expressions/stringExpressions.scala | 24 +++++++----- .../sql-functions/sql-expression-schema.md | 3 +- .../sql-tests/inputs/string-functions.sql | 4 +- .../sql-tests/inputs/try-string-functions.sql | 21 +++++++++++ .../results/ansi/string-functions.sql.out | 11 +++++- .../results/string-functions.sql.out | 11 +++++- .../results/try-string-functions.sql.out | Bin 0 -> 1925 bytes 9 files changed, 97 insertions(+), 13 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/try-string-functions.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/try-string-functions.sql.out diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index bb4aa701102fe..5befa779d166e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -454,6 +454,7 @@ object FunctionRegistry { expression[TryMultiply]("try_multiply"), expression[TryElementAt]("try_element_at"), expression[TrySum]("try_sum"), + expression[TryToBinary]("try_to_binary"), // aggregate functions expression[HyperLogLogPlusPlus]("approx_count_distinct"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala index 7a8a689a1bd3e..589e5801424e7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala @@ -181,3 +181,38 @@ case class TryMultiply(left: Expression, right: Expression, replacement: Express override protected def withNewChildInternal(newChild: Expression): Expression = this.copy(replacement = newChild) } + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(str[, fmt]) - This is a special version of `to_binary` that performs the same operation, but returns a NULL value instead of raising an error if the conversion cannot be performed.", + examples = """ + Examples: + > SELECT _FUNC_('abc', 'utf-8'); + abc + > select _FUNC_('a!', 'base64'); + NULL + > select _FUNC_('abc', 'invalidFormat'); + NULL + """, + since = "3.3.0", + group = "string_funcs") +// scalastyle:on line.size.limit +case class TryToBinary( + expr: Expression, + format: Option[Expression], + replacement: Expression) extends RuntimeReplaceable + with InheritAnalysisRules { + def this(expr: Expression) = + this(expr, None, TryEval(ToBinary(expr, None, nullOnInvalidFormat = true))) + + def this(expr: Expression, formatExpression: Expression) = + this(expr, Some(formatExpression), + TryEval(ToBinary(expr, Some(formatExpression), nullOnInvalidFormat = true))) + + override def prettyName: String = "try_to_binary" + + override def parameters: Seq[Expression] = expr +: format.toSeq + + override protected def withNewChildInternal(newChild: Expression): Expression = + this.copy(replacement = newChild) +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index a08ab84ac6f4a..88045f85bca55 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -2638,7 +2638,10 @@ case class Encode(value: Expression, charset: Expression) since = "3.3.0", group = "string_funcs") // scalastyle:on line.size.limit -case class ToBinary(expr: Expression, format: Option[Expression]) extends RuntimeReplaceable +case class ToBinary( + expr: Expression, + format: Option[Expression], + nullOnInvalidFormat: Boolean = false) extends RuntimeReplaceable with ImplicitCastInputTypes { override lazy val replacement: Expression = format.map { f => @@ -2651,6 +2654,7 @@ case class ToBinary(expr: Expression, format: Option[Expression]) extends Runtim case "hex" => Unhex(expr) case "utf-8" => Encode(expr, Literal("UTF-8")) case "base64" => UnBase64(expr) + case _ if nullOnInvalidFormat => Literal(null, BinaryType) case other => throw QueryCompilationErrors.invalidStringLiteralParameter( "to_binary", "format", other, Some("The value has to be a case-insensitive string literal of " + @@ -2659,16 +2663,18 @@ case class ToBinary(expr: Expression, format: Option[Expression]) extends Runtim } }.getOrElse(Unhex(expr)) - def this(expr: Expression) = this(expr, None) + def this(expr: Expression) = this(expr, None, false) def this(expr: Expression, format: Expression) = this(expr, Some({ - // We perform this check in the constructor to make it eager and not go through type coercion. - if (format.foldable && (format.dataType == StringType || format.dataType == NullType)) { - format - } else { - throw QueryCompilationErrors.requireLiteralParameter("to_binary", "format", "string") - } - })) + // We perform this check in the constructor to make it eager and not go through type coercion. + if (format.foldable && (format.dataType == StringType || format.dataType == NullType)) { + format + } else { + throw QueryCompilationErrors.requireLiteralParameter("to_binary", "format", "string") + } + }), + false + ) override def prettyName: String = "to_binary" diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 644bfa926dafb..1dbf9678af9e3 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,6 +1,6 @@ ## Summary - - Number of queries: 385 + - Number of queries: 386 - Number of expressions that missing example: 12 - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint ## Schema of Built-in Functions @@ -316,6 +316,7 @@ | org.apache.spark.sql.catalyst.expressions.TryElementAt | try_element_at | SELECT try_element_at(array(1, 2, 3), 2) | struct | | org.apache.spark.sql.catalyst.expressions.TryMultiply | try_multiply | SELECT try_multiply(2, 3) | struct | | org.apache.spark.sql.catalyst.expressions.TrySubtract | try_subtract | SELECT try_subtract(2, 1) | struct | +| org.apache.spark.sql.catalyst.expressions.TryToBinary | try_to_binary | SELECT try_to_binary('abc', 'utf-8') | struct | | org.apache.spark.sql.catalyst.expressions.TypeOf | typeof | SELECT typeof(1) | struct | | org.apache.spark.sql.catalyst.expressions.UnBase64 | unbase64 | SELECT unbase64('U3BhcmsgU1FM') | struct | | org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct | diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index 7d22e791570c3..0db28ad9f3ecc 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -184,5 +184,7 @@ select to_binary(null, cast(null as string)); -- 'format' parameter must be string type or void type. select to_binary(null, cast(null as int)); select to_binary('abc', 1); --- invalid inputs. +-- invalid format select to_binary('abc', 'invalidFormat'); +-- invalid string input +select to_binary('a!', 'base64'); diff --git a/sql/core/src/test/resources/sql-tests/inputs/try-string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/try-string-functions.sql new file mode 100644 index 0000000000000..20f02374e78c6 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/try-string-functions.sql @@ -0,0 +1,21 @@ +-- try_to_binary +select try_to_binary('abc'); +select try_to_binary('abc', 'utf-8'); +select try_to_binary('abc', 'base64'); +select try_to_binary('abc', 'hex'); +-- 'format' parameter can be any foldable string value, not just literal. +select try_to_binary('abc', concat('utf', '-8')); +-- 'format' parameter is case insensitive. +select try_to_binary('abc', 'Hex'); +-- null inputs lead to null result. +select try_to_binary('abc', null); +select try_to_binary(null, 'utf-8'); +select try_to_binary(null, null); +select try_to_binary(null, cast(null as string)); +-- 'format' parameter must be string type or void type. +select try_to_binary(null, cast(null as int)); +select try_to_binary('abc', 1); +-- invalid format +select try_to_binary('abc', 'invalidFormat'); +-- invalid string input +select try_to_binary('a!', 'base64'); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index 01213bd57ad73..52d70e22a44dd 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 141 +-- Number of queries: 142 -- !query @@ -1140,3 +1140,12 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException Invalid value for the 'format' parameter of function 'to_binary': invalidformat. The value has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'. + + +-- !query +select to_binary('a!', 'base64') +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Last unit does not have enough valid bits diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index 3a7f197e36234..ff14da143da7b 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 141 +-- Number of queries: 142 -- !query @@ -1136,3 +1136,12 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException Invalid value for the 'format' parameter of function 'to_binary': invalidformat. The value has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'. + + +-- !query +select to_binary('a!', 'base64') +-- !query schema +struct<> +-- !query output +java.lang.IllegalArgumentException +Last unit does not have enough valid bits diff --git a/sql/core/src/test/resources/sql-tests/results/try-string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/try-string-functions.sql.out new file mode 100644 index 0000000000000000000000000000000000000000..bda723fd19e6481b68ba1a722ba393e4662e1114 GIT binary patch literal 1925 zcmb`IQES355Xbl2PjNoALi-@d5E%m*Y!js5+C57eqXufaC6}>&xUrA4udu`ln=i44 zzLe73zyDwEE|)BWnZ;($P;yq+Eo_AnnxR-j)k3jcEUnOODGU}?qKL8#a=WjDhGqlr zZcPd^h0!>Q{L;XGX(JZoWem80C0Hy{VQg~?U)A8X#xb&<)A;pPJGh*ogiRN z%0F)uXPEdh#I72?6of8F;iz}=6tc-MfJLgxGc_en5b!3YMA^C~Pa8j zw=^ekQsE_J^Srzq>oBrY7?FB)baAPUwy{(ntd{G?M!V~YHos1!R{$FL(M28PgA~rr I@k4+70zUABasU7T literal 0 HcmV?d00001 From 3d93bd13decf270d41d2b279be809fe0388ac2ff Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 8 Apr 2022 01:10:13 +0800 Subject: [PATCH 105/535] [SPARK-38818][DOC] Fix the docs of try_multiply/try_subtract/ANSI cast ### What changes were proposed in this pull request? - Fix the valid combinations of ANSI CAST: Numeric types can be cast as Timestamp instead of Interval. - Fix the usage of try_multiply/try_subtract, from `expr1 FUNC expr2` to `FUNC(expr1< expr2)` ### Why are the changes needed? Fix wrong documention. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Preview: image Closes #36099 from gengliangwang/fixDoc. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit 26428fe812bd5600a61c4d4efc4dcb0f32646222) Signed-off-by: Gengliang Wang --- docs/sql-ref-ansi-compliance.md | 2 +- .../org/apache/spark/sql/catalyst/expressions/TryEval.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index f296f2b4a820c..ab11e9fede2e4 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -81,7 +81,7 @@ Besides, the ANSI SQL mode disallows the following type conversions which are al | Source\Target | Numeric | String | Date | Timestamp | Interval | Boolean | Binary | Array | Map | Struct | |-----------|---------|--------|------|-----------|----------|---------|--------|-------|-----|--------| -| Numeric | **Y** | Y | N | N | **Y** | Y | N | N | N | N | +| Numeric | **Y** | Y | N | **Y** | N | Y | N | N | N | N | | String | **Y** | Y | **Y** | **Y** | **Y** | **Y** | Y | N | N | N | | Date | N | Y | Y | Y | N | N | N | N | N | N | | Timestamp | **Y** | Y | Y | Y | N | N | N | N | N | N | diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala index 589e5801424e7..c179c83befb4c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala @@ -123,7 +123,7 @@ case class TryDivide(left: Expression, right: Expression, replacement: Expressio } @ExpressionDescription( - usage = "expr1 _FUNC_ expr2 - Returns `expr1`-`expr2` and the result is null on overflow. " + + usage = "_FUNC_(expr1, expr2) - Returns `expr1`-`expr2` and the result is null on overflow. " + "The acceptable input types are the same with the `-` operator.", examples = """ Examples: @@ -156,7 +156,7 @@ case class TrySubtract(left: Expression, right: Expression, replacement: Express } @ExpressionDescription( - usage = "expr1 _FUNC_ expr2 - Returns `expr1`*`expr2` and the result is null on overflow. " + + usage = "_FUNC_(expr1, expr2) - Returns `expr1`*`expr2` and the result is null on overflow. " + "The acceptable input types are the same with the `*` operator.", examples = """ Examples: From cc5f074c4c91bc1666b20efa40cfedd65d4137fd Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Thu, 7 Apr 2022 15:19:46 -0700 Subject: [PATCH 106/535] [SPARK-38817][K8S][BUILD] Upgrade kubernetes-client to 5.12.2 ### What changes were proposed in this pull request? Upgrade kubernetes-client to 5.12.2: Changes list: - https://github.com/fabric8io/kubernetes-client/releases/tag/v5.12.2 Especially, `Supports Queue (cluster) API for Volcano extension` will help us setting queue capacity dynamically in K8s IT. ### Why are the changes needed? The next kubernetes client version will be 6.x with breaking changes: https://github.com/fabric8io/kubernetes-client/blob/master/CHANGELOG.md#note-breaking-changes-in-the-api . We'd better to upgrade to latest 5.X first to reduce follow upgrade cost. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - CI - integration test Closes #36098 from Yikun/k8scli-5.12.2. Authored-by: Yikun Jiang Signed-off-by: Dongjoon Hyun (cherry picked from commit 5dae4bebea4e677e098fcf815389b1652dbcba52) Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 42 +++++++++++++-------------- dev/deps/spark-deps-hadoop-3-hive-2.3 | 42 +++++++++++++-------------- pom.xml | 2 +- 3 files changed, 43 insertions(+), 43 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index 0202d9fff8803..c0b1502743013 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -162,27 +162,27 @@ jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar jul-to-slf4j/1.7.32//jul-to-slf4j-1.7.32.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar -kubernetes-client/5.12.1//kubernetes-client-5.12.1.jar -kubernetes-model-admissionregistration/5.12.1//kubernetes-model-admissionregistration-5.12.1.jar -kubernetes-model-apiextensions/5.12.1//kubernetes-model-apiextensions-5.12.1.jar -kubernetes-model-apps/5.12.1//kubernetes-model-apps-5.12.1.jar -kubernetes-model-autoscaling/5.12.1//kubernetes-model-autoscaling-5.12.1.jar -kubernetes-model-batch/5.12.1//kubernetes-model-batch-5.12.1.jar -kubernetes-model-certificates/5.12.1//kubernetes-model-certificates-5.12.1.jar -kubernetes-model-common/5.12.1//kubernetes-model-common-5.12.1.jar -kubernetes-model-coordination/5.12.1//kubernetes-model-coordination-5.12.1.jar -kubernetes-model-core/5.12.1//kubernetes-model-core-5.12.1.jar -kubernetes-model-discovery/5.12.1//kubernetes-model-discovery-5.12.1.jar -kubernetes-model-events/5.12.1//kubernetes-model-events-5.12.1.jar -kubernetes-model-extensions/5.12.1//kubernetes-model-extensions-5.12.1.jar -kubernetes-model-flowcontrol/5.12.1//kubernetes-model-flowcontrol-5.12.1.jar -kubernetes-model-metrics/5.12.1//kubernetes-model-metrics-5.12.1.jar -kubernetes-model-networking/5.12.1//kubernetes-model-networking-5.12.1.jar -kubernetes-model-node/5.12.1//kubernetes-model-node-5.12.1.jar -kubernetes-model-policy/5.12.1//kubernetes-model-policy-5.12.1.jar -kubernetes-model-rbac/5.12.1//kubernetes-model-rbac-5.12.1.jar -kubernetes-model-scheduling/5.12.1//kubernetes-model-scheduling-5.12.1.jar -kubernetes-model-storageclass/5.12.1//kubernetes-model-storageclass-5.12.1.jar +kubernetes-client/5.12.2//kubernetes-client-5.12.2.jar +kubernetes-model-admissionregistration/5.12.2//kubernetes-model-admissionregistration-5.12.2.jar +kubernetes-model-apiextensions/5.12.2//kubernetes-model-apiextensions-5.12.2.jar +kubernetes-model-apps/5.12.2//kubernetes-model-apps-5.12.2.jar +kubernetes-model-autoscaling/5.12.2//kubernetes-model-autoscaling-5.12.2.jar +kubernetes-model-batch/5.12.2//kubernetes-model-batch-5.12.2.jar +kubernetes-model-certificates/5.12.2//kubernetes-model-certificates-5.12.2.jar +kubernetes-model-common/5.12.2//kubernetes-model-common-5.12.2.jar +kubernetes-model-coordination/5.12.2//kubernetes-model-coordination-5.12.2.jar +kubernetes-model-core/5.12.2//kubernetes-model-core-5.12.2.jar +kubernetes-model-discovery/5.12.2//kubernetes-model-discovery-5.12.2.jar +kubernetes-model-events/5.12.2//kubernetes-model-events-5.12.2.jar +kubernetes-model-extensions/5.12.2//kubernetes-model-extensions-5.12.2.jar +kubernetes-model-flowcontrol/5.12.2//kubernetes-model-flowcontrol-5.12.2.jar +kubernetes-model-metrics/5.12.2//kubernetes-model-metrics-5.12.2.jar +kubernetes-model-networking/5.12.2//kubernetes-model-networking-5.12.2.jar +kubernetes-model-node/5.12.2//kubernetes-model-node-5.12.2.jar +kubernetes-model-policy/5.12.2//kubernetes-model-policy-5.12.2.jar +kubernetes-model-rbac/5.12.2//kubernetes-model-rbac-5.12.2.jar +kubernetes-model-scheduling/5.12.2//kubernetes-model-scheduling-5.12.2.jar +kubernetes-model-storageclass/5.12.2//kubernetes-model-storageclass-5.12.2.jar lapack/2.2.1//lapack-2.2.1.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index d1593d75a7cf4..20a727521aa97 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -148,27 +148,27 @@ jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar jul-to-slf4j/1.7.32//jul-to-slf4j-1.7.32.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar -kubernetes-client/5.12.1//kubernetes-client-5.12.1.jar -kubernetes-model-admissionregistration/5.12.1//kubernetes-model-admissionregistration-5.12.1.jar -kubernetes-model-apiextensions/5.12.1//kubernetes-model-apiextensions-5.12.1.jar -kubernetes-model-apps/5.12.1//kubernetes-model-apps-5.12.1.jar -kubernetes-model-autoscaling/5.12.1//kubernetes-model-autoscaling-5.12.1.jar -kubernetes-model-batch/5.12.1//kubernetes-model-batch-5.12.1.jar -kubernetes-model-certificates/5.12.1//kubernetes-model-certificates-5.12.1.jar -kubernetes-model-common/5.12.1//kubernetes-model-common-5.12.1.jar -kubernetes-model-coordination/5.12.1//kubernetes-model-coordination-5.12.1.jar -kubernetes-model-core/5.12.1//kubernetes-model-core-5.12.1.jar -kubernetes-model-discovery/5.12.1//kubernetes-model-discovery-5.12.1.jar -kubernetes-model-events/5.12.1//kubernetes-model-events-5.12.1.jar -kubernetes-model-extensions/5.12.1//kubernetes-model-extensions-5.12.1.jar -kubernetes-model-flowcontrol/5.12.1//kubernetes-model-flowcontrol-5.12.1.jar -kubernetes-model-metrics/5.12.1//kubernetes-model-metrics-5.12.1.jar -kubernetes-model-networking/5.12.1//kubernetes-model-networking-5.12.1.jar -kubernetes-model-node/5.12.1//kubernetes-model-node-5.12.1.jar -kubernetes-model-policy/5.12.1//kubernetes-model-policy-5.12.1.jar -kubernetes-model-rbac/5.12.1//kubernetes-model-rbac-5.12.1.jar -kubernetes-model-scheduling/5.12.1//kubernetes-model-scheduling-5.12.1.jar -kubernetes-model-storageclass/5.12.1//kubernetes-model-storageclass-5.12.1.jar +kubernetes-client/5.12.2//kubernetes-client-5.12.2.jar +kubernetes-model-admissionregistration/5.12.2//kubernetes-model-admissionregistration-5.12.2.jar +kubernetes-model-apiextensions/5.12.2//kubernetes-model-apiextensions-5.12.2.jar +kubernetes-model-apps/5.12.2//kubernetes-model-apps-5.12.2.jar +kubernetes-model-autoscaling/5.12.2//kubernetes-model-autoscaling-5.12.2.jar +kubernetes-model-batch/5.12.2//kubernetes-model-batch-5.12.2.jar +kubernetes-model-certificates/5.12.2//kubernetes-model-certificates-5.12.2.jar +kubernetes-model-common/5.12.2//kubernetes-model-common-5.12.2.jar +kubernetes-model-coordination/5.12.2//kubernetes-model-coordination-5.12.2.jar +kubernetes-model-core/5.12.2//kubernetes-model-core-5.12.2.jar +kubernetes-model-discovery/5.12.2//kubernetes-model-discovery-5.12.2.jar +kubernetes-model-events/5.12.2//kubernetes-model-events-5.12.2.jar +kubernetes-model-extensions/5.12.2//kubernetes-model-extensions-5.12.2.jar +kubernetes-model-flowcontrol/5.12.2//kubernetes-model-flowcontrol-5.12.2.jar +kubernetes-model-metrics/5.12.2//kubernetes-model-metrics-5.12.2.jar +kubernetes-model-networking/5.12.2//kubernetes-model-networking-5.12.2.jar +kubernetes-model-node/5.12.2//kubernetes-model-node-5.12.2.jar +kubernetes-model-policy/5.12.2//kubernetes-model-policy-5.12.2.jar +kubernetes-model-rbac/5.12.2//kubernetes-model-rbac-5.12.2.jar +kubernetes-model-scheduling/5.12.2//kubernetes-model-scheduling-5.12.2.jar +kubernetes-model-storageclass/5.12.2//kubernetes-model-storageclass-5.12.2.jar lapack/2.2.1//lapack-2.2.1.jar leveldbjni-all/1.8//leveldbjni-all-1.8.jar libfb303/0.9.3//libfb303-0.9.3.jar diff --git a/pom.xml b/pom.xml index 64d6e0762151b..77fbdc0c5e139 100644 --- a/pom.xml +++ b/pom.xml @@ -215,7 +215,7 @@ 7.0.0 org.fusesource.leveldbjni - 5.12.1 + 5.12.2 ${java.home} From cf7e3574efc1d4bb7233f18fcf344e94d26c2ac1 Mon Sep 17 00:00:00 2001 From: huaxingao Date: Thu, 7 Apr 2022 16:08:45 -0700 Subject: [PATCH 107/535] [SPARK-38825][SQL][TEST] Add a test to cover parquet notIn filter ### What changes were proposed in this pull request? Currently we don't have a test for parquet `notIn` filter, so add a test for this ### Why are the changes needed? to make tests more complete ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new test Closes #36109 from huaxingao/inFilter. Authored-by: huaxingao Signed-off-by: huaxingao (cherry picked from commit d6fd0405b60875ac5e2c9daee1ec785f74e9b7a3) Signed-off-by: huaxingao --- .../parquet/ParquetFilterSuite.scala | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 64a2ec6308cde..71ea474409c6f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -1901,6 +1901,27 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared } } } + + test("SPARK-38825: in and notIn filters") { + import testImplicits._ + withTempPath { file => + Seq(1, 2, 0, -1, 99, 1000, 3, 7, 2).toDF("id").coalesce(1).write.mode("overwrite") + .parquet(file.getCanonicalPath) + var df = spark.read.parquet(file.getCanonicalPath) + var in = df.filter(col("id").isin(100, 3, 11, 12, 13)) + var notIn = df.filter(!col("id").isin(100, 3, 11, 12, 13)) + checkAnswer(in, Seq(Row(3))) + checkAnswer(notIn, Seq(Row(1), Row(2), Row(0), Row(-1), Row(99), Row(1000), Row(7), Row(2))) + + Seq("mary", "martin", "lucy", "alex", "mary", "dan").toDF("name").coalesce(1) + .write.mode("overwrite").parquet(file.getCanonicalPath) + df = spark.read.parquet(file.getCanonicalPath) + in = df.filter(col("name").isin("mary", "victor", "leo", "alex")) + notIn = df.filter(!col("name").isin("mary", "victor", "leo", "alex")) + checkAnswer(in, Seq(Row("mary"), Row("alex"), Row("mary"))) + checkAnswer(notIn, Seq(Row("martin"), Row("lucy"), Row("dan"))) + } + } } @ExtendedSQLTest From b27e8842c349bd8bd9937b30b153546060ec56bb Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 8 Apr 2022 09:32:21 +0800 Subject: [PATCH 108/535] [SPARK-38548][SQL][FOLLOWUP] try_sum: return null if overflow happens before merging ### What changes were proposed in this pull request? This PR is to fix a bug in the new function `try_sum`. It should return null if overflow happens before merging the sums from map tasks. For example: MAP TASK 1: partial aggregation TRY_SUM(large_numbers_column) -> overflows, turns into NULL MAP TASK 2: partial aggregation TRY_SUM(large_numbers_column) -> succeeds, returns 12345 REDUCE TASK: merge TRY_SUM(NULL, 12345) -> returns 12345 We should use a new slot buffer `isEmpty` to track if there is a non-empty value in partial aggregation. If the partial result is null and there is non-empty value, the merge result should be `NULL`. ### Why are the changes needed? Bug fix ### Does this PR introduce _any_ user-facing change? No, the new function is not release yet. ### How was this patch tested? UT Closes #36097 from gengliangwang/fixTrySum. Lead-authored-by: Gengliang Wang Co-authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit 061127b67d2fbae0042505f1dabfad10eed4a782) Signed-off-by: Gengliang Wang --- .../catalyst/expressions/aggregate/Sum.scala | 131 ++++++++++-------- .../org/apache/spark/sql/SQLQuerySuite.scala | 13 ++ 2 files changed, 86 insertions(+), 58 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala index 5d8fd702ba423..fd27edfc8fc10 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala @@ -32,6 +32,8 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate def failOnError: Boolean + protected def shouldTrackIsEmpty: Boolean + override def nullable: Boolean = true // Return data type. @@ -45,7 +47,7 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate final override val nodePatterns: Seq[TreePattern] = Seq(SUM) - private lazy val resultType = child.dataType match { + protected lazy val resultType = child.dataType match { case DecimalType.Fixed(precision, scale) => DecimalType.bounded(precision + 10, scale) case _: IntegralType => LongType @@ -60,51 +62,51 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate private lazy val zero = Literal.default(resultType) - override lazy val aggBufferAttributes = resultType match { - case _: DecimalType => sum :: isEmpty :: Nil - case _ => sum :: Nil + override lazy val aggBufferAttributes = if (shouldTrackIsEmpty) { + sum :: isEmpty :: Nil + } else { + sum :: Nil } - override lazy val initialValues: Seq[Expression] = resultType match { - case _: DecimalType => Seq(zero, Literal(true, BooleanType)) - case _ => Seq(Literal(null, resultType)) - } + override lazy val initialValues: Seq[Expression] = + if (shouldTrackIsEmpty) { + Seq(zero, Literal(true, BooleanType)) + } else { + Seq(Literal(null, resultType)) + } - protected def getUpdateExpressions: Seq[Expression] = { - resultType match { - case _: DecimalType => - // For decimal type, the initial value of `sum` is 0. We need to keep `sum` unchanged if - // the input is null, as SUM function ignores null input. The `sum` can only be null if - // overflow happens under non-ansi mode. - val sumExpr = if (child.nullable) { - If(child.isNull, sum, - Add(sum, KnownNotNull(child).cast(resultType), failOnError = failOnError)) - } else { - Add(sum, child.cast(resultType), failOnError = failOnError) - } - // The buffer becomes non-empty after seeing the first not-null input. - val isEmptyExpr = if (child.nullable) { - isEmpty && child.isNull - } else { - Literal(false, BooleanType) - } - Seq(sumExpr, isEmptyExpr) - case _ => - // For non-decimal type, the initial value of `sum` is null, which indicates no value. - // We need `coalesce(sum, zero)` to start summing values. And we need an outer `coalesce` - // in case the input is nullable. The `sum` can only be null if there is no value, as - // non-decimal type can produce overflowed value under non-ansi mode. - if (child.nullable) { - Seq(coalesce(Add(coalesce(sum, zero), child.cast(resultType), failOnError = failOnError), - sum)) - } else { - Seq(Add(coalesce(sum, zero), child.cast(resultType), failOnError = failOnError)) - } + protected def getUpdateExpressions: Seq[Expression] = if (shouldTrackIsEmpty) { + // If shouldTrackIsEmpty is true, the initial value of `sum` is 0. We need to keep `sum` + // unchanged if the input is null, as SUM function ignores null input. The `sum` can only be + // null if overflow happens under non-ansi mode. + val sumExpr = if (child.nullable) { + If(child.isNull, sum, + Add(sum, KnownNotNull(child).cast(resultType), failOnError = failOnError)) + } else { + Add(sum, child.cast(resultType), failOnError = failOnError) + } + // The buffer becomes non-empty after seeing the first not-null input. + val isEmptyExpr = if (child.nullable) { + isEmpty && child.isNull + } else { + Literal(false, BooleanType) + } + Seq(sumExpr, isEmptyExpr) + } else { + // If shouldTrackIsEmpty is false, the initial value of `sum` is null, which indicates no value. + // We need `coalesce(sum, zero)` to start summing values. And we need an outer `coalesce` + // in case the input is nullable. The `sum` can only be null if there is no value, as + // non-decimal type can produce overflowed value under non-ansi mode. + if (child.nullable) { + Seq(coalesce(Add(coalesce(sum, zero), child.cast(resultType), failOnError = failOnError), + sum)) + } else { + Seq(Add(coalesce(sum, zero), child.cast(resultType), failOnError = failOnError)) } } /** - * For decimal type: + * When shouldTrackIsEmpty is true: * If isEmpty is false and if sum is null, then it means we have had an overflow. * * update of the sum is as follows: @@ -113,26 +115,24 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate * If it did not have overflow, then add the sum.left and sum.right * * isEmpty: Set to false if either one of the left or right is set to false. This - * means we have seen atleast a value that was not null. + * means we have seen at least a value that was not null. */ - protected def getMergeExpressions: Seq[Expression] = { - resultType match { - case _: DecimalType => - val bufferOverflow = !isEmpty.left && sum.left.isNull - val inputOverflow = !isEmpty.right && sum.right.isNull - Seq( - If( - bufferOverflow || inputOverflow, - Literal.create(null, resultType), - // If both the buffer and the input do not overflow, just add them, as they can't be - // null. See the comments inside `updateExpressions`: `sum` can only be null if - // overflow happens. - KnownNotNull(sum.left) + KnownNotNull(sum.right)), - isEmpty.left && isEmpty.right) - case _ => Seq(coalesce( - Add(coalesce(sum.left, zero), sum.right, failOnError = failOnError), - sum.left)) - } + protected def getMergeExpressions: Seq[Expression] = if (shouldTrackIsEmpty) { + val bufferOverflow = !isEmpty.left && sum.left.isNull + val inputOverflow = !isEmpty.right && sum.right.isNull + Seq( + If( + bufferOverflow || inputOverflow, + Literal.create(null, resultType), + // If both the buffer and the input do not overflow, just add them, as they can't be + // null. See the comments inside `updateExpressions`: `sum` can only be null if + // overflow happens. + Add(KnownNotNull(sum.left), KnownNotNull(sum.right), failOnError)), + isEmpty.left && isEmpty.right) + } else { + Seq(coalesce( + Add(coalesce(sum.left, zero), sum.right, failOnError = failOnError), + sum.left)) } /** @@ -146,6 +146,8 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate case d: DecimalType => If(isEmpty, Literal.create(null, resultType), CheckOverflowInSum(sum, d, !failOnError)) + case _ if shouldTrackIsEmpty => + If(isEmpty, Literal.create(null, resultType), sum) case _ => sum } @@ -172,6 +174,11 @@ case class Sum( extends SumBase(child) { def this(child: Expression) = this(child, failOnError = SQLConf.get.ansiEnabled) + override def shouldTrackIsEmpty: Boolean = resultType match { + case _: DecimalType => true + case _ => false + } + override protected def withNewChildInternal(newChild: Expression): Sum = copy(child = newChild) override lazy val updateExpressions: Seq[Expression] = getUpdateExpressions @@ -208,6 +215,14 @@ case class TrySum(child: Expression) extends SumBase(child) { case _ => true } + override def shouldTrackIsEmpty: Boolean = resultType match { + // The sum of following data types can cause overflow. + case _: DecimalType | _: IntegralType | _: YearMonthIntervalType | _: DayTimeIntervalType => + true + case _ => + false + } + override lazy val updateExpressions: Seq[Expression] = if (failOnError) { val expressions = getUpdateExpressions diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index c28dde9cea09a..f43fbeffab003 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4328,6 +4328,19 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark Row(3, 2, 6) :: Nil) } } + + test("SPARK-38548: try_sum should return null if overflow happens before merging") { + val longDf = Seq(Long.MaxValue, Long.MaxValue, 2).toDF("v") + val yearMonthDf = Seq(Int.MaxValue, Int.MaxValue, 2) + .map(Period.ofMonths) + .toDF("v") + val dayTimeDf = Seq(106751991L, 106751991L, 2L) + .map(Duration.ofDays) + .toDF("v") + Seq(longDf, yearMonthDf, dayTimeDf).foreach { df => + checkAnswer(df.repartitionByRange(2, col("v")).selectExpr("try_sum(v)"), Row(null)) + } + } } case class Foo(bar: Option[String]) From 4d5c85edf78853bf9a21be86bec004a892d1a842 Mon Sep 17 00:00:00 2001 From: Xinyi Yu Date: Fri, 8 Apr 2022 09:39:23 +0800 Subject: [PATCH 109/535] [SPARK-37013][SQL][FOLLOWUP] Add legacy flag for the breaking change of forbidding %0$ usage in format_string ### What changes were proposed in this pull request? Adds a legacy flag `spark.sql.legacy.allowZeroIndexInFormatString` for the breaking change introduced in https://github.com/apache/spark/pull/34313 and https://github.com/apache/spark/pull/34454 (followup). The flag is disabled by default. But when it is enabled, restore the pre-change behavior that allows the 0 based index in `format_string`. ### Why are the changes needed? The original commit is a breaking change, and breaking changes should be encouraged to add a flag to turn it off for smooth migration between versions. ### Does this PR introduce _any_ user-facing change? With the default value of the conf, there is no user-facing difference. If users turn this conf on, they can restore the pre-change behavior. ### How was this patch tested? Through unit tests. Closes #36101 from anchovYu/flags-format-string-java. Authored-by: Xinyi Yu Signed-off-by: Wenchen Fan (cherry picked from commit b7af2b3bac1a6d57e98c46831aa37a250d812c70) Signed-off-by: Wenchen Fan --- .../catalyst/expressions/stringExpressions.scala | 4 +++- .../org/apache/spark/sql/internal/SQLConf.scala | 11 +++++++++++ .../sql/errors/QueryCompilationErrorsSuite.scala | 13 ++++++++----- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 88045f85bca55..851261f87a4bc 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -1808,7 +1808,9 @@ case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.ge case class FormatString(children: Expression*) extends Expression with ImplicitCastInputTypes { require(children.nonEmpty, s"$prettyName() should take at least 1 argument") - checkArgumentIndexNotZero(children(0)) + if (!SQLConf.get.getConf(SQLConf.ALLOW_ZERO_INDEX_IN_FORMAT_STRING)) { + checkArgumentIndexNotZero(children(0)) + } override def foldable: Boolean = children.forall(_.foldable) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 223017d17304d..fbd56968c1d9e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2015,6 +2015,17 @@ object SQLConf { .booleanConf .createWithDefault(false) + val ALLOW_ZERO_INDEX_IN_FORMAT_STRING = + buildConf("spark.sql.legacy.allowZeroIndexInFormatString") + .internal() + .doc("When false, the `strfmt` in `format_string(strfmt, obj, ...)` and " + + "`printf(strfmt, obj, ...)` will no longer support to use \"0$\" to specify the first " + + "argument, the first argument should always reference by \"1$\" when use argument index " + + "to indicating the position of the argument in the argument list.") + .version("3.3") + .booleanConf + .createWithDefault(false) + val USE_CURRENT_SQL_CONFIGS_FOR_VIEW = buildConf("spark.sql.legacy.useCurrentConfigsForView") .internal() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index d5cbfc844ccdd..4d776caacf319 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.errors import org.apache.spark.sql.{AnalysisException, IntegratedUDFTestUtils, QueryTest} import org.apache.spark.sql.functions.{grouping, grouping_id, sum} +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession case class StringLongClass(a: String, b: Long) @@ -94,12 +95,14 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession { } test("ILLEGAL_SUBSTRING: the argument_index of string format is invalid") { - val e = intercept[AnalysisException] { - sql("select format_string('%0$s', 'Hello')") + withSQLConf(SQLConf.ALLOW_ZERO_INDEX_IN_FORMAT_STRING.key -> "false") { + val e = intercept[AnalysisException] { + sql("select format_string('%0$s', 'Hello')") + } + assert(e.errorClass === Some("ILLEGAL_SUBSTRING")) + assert(e.message === + "The argument_index of string format cannot contain position 0$.") } - assert(e.errorClass === Some("ILLEGAL_SUBSTRING")) - assert(e.message === - "The argument_index of string format cannot contain position 0$.") } test("CANNOT_USE_MIXTURE: Using aggregate function with grouped aggregate pandas UDF") { From bdf76b629ce4a85affb534dc596357dc49a8e894 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 8 Apr 2022 10:29:39 +0800 Subject: [PATCH 110/535] [SPARK-36194][SQL][FOLLOWUP] Propagate distinct keys more precisely ### What changes were proposed in this pull request? This PR is a followup of https://github.com/apache/spark/pull/35779 , to propagate distinct keys more precisely in 2 cases: 1. For `LIMIT 1`, each output attribute is a distinct key, not the entire tuple. 2. For aggregate, we can still propagate distinct keys from child. ### Why are the changes needed? make the optimization cover more cases ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new tests Closes #36100 from cloud-fan/followup. Authored-by: Wenchen Fan Signed-off-by: Yuming Wang (cherry picked from commit fbe82fb8ffaa0243c4085627e6e9a2813dc93e57) Signed-off-by: Yuming Wang --- .../plans/logical/DistinctKeyVisitor.scala | 20 +++++++++++++++++-- .../logical/DistinctKeyVisitorSuite.scala | 7 ++++++- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitor.scala index bb2bc4e3d2f93..726c52592887f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitor.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitor.scala @@ -50,11 +50,27 @@ object DistinctKeyVisitor extends LogicalPlanVisitor[Set[ExpressionSet]] { }.filter(_.nonEmpty) } + /** + * Add a new ExpressionSet S into distinctKeys D. + * To minimize the size of D: + * 1. If there is a subset of S in D, return D. + * 2. Otherwise, remove all the ExpressionSet containing S from D, and add the new one. + */ + private def addDistinctKey( + keys: Set[ExpressionSet], + newExpressionSet: ExpressionSet): Set[ExpressionSet] = { + if (keys.exists(_.subsetOf(newExpressionSet))) { + keys + } else { + keys.filterNot(s => newExpressionSet.subsetOf(s)) + newExpressionSet + } + } + override def default(p: LogicalPlan): Set[ExpressionSet] = Set.empty[ExpressionSet] override def visitAggregate(p: Aggregate): Set[ExpressionSet] = { val groupingExps = ExpressionSet(p.groupingExpressions) // handle group by a, a - projectDistinctKeys(Set(groupingExps), p.aggregateExpressions) + projectDistinctKeys(addDistinctKey(p.child.distinctKeys, groupingExps), p.aggregateExpressions) } override def visitDistinct(p: Distinct): Set[ExpressionSet] = Set(ExpressionSet(p.output)) @@ -70,7 +86,7 @@ object DistinctKeyVisitor extends LogicalPlanVisitor[Set[ExpressionSet]] { override def visitGlobalLimit(p: GlobalLimit): Set[ExpressionSet] = { p.maxRows match { - case Some(value) if value <= 1 => Set(ExpressionSet(p.output)) + case Some(value) if value <= 1 => p.output.map(attr => ExpressionSet(Seq(attr))).toSet case _ => p.child.distinctKeys } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitorSuite.scala index 131155f8c04d1..1868fe1c79149 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitorSuite.scala @@ -66,6 +66,10 @@ class DistinctKeyVisitorSuite extends PlanTest { Set(ExpressionSet(Seq(a)), ExpressionSet(Seq(d.toAttribute)))) checkDistinctAttributes(t1.groupBy(f.child, 'b)(f, 'b, sum('c)), Set(ExpressionSet(Seq(f.toAttribute, b)))) + + // Aggregate should also propagate distinct keys from child + checkDistinctAttributes(t1.limit(1).groupBy($"a", $"b")($"a", $"b"), + Set(ExpressionSet(Seq(a)), ExpressionSet(Seq(b)))) } test("Distinct's distinct attributes") { @@ -86,7 +90,8 @@ class DistinctKeyVisitorSuite extends PlanTest { test("Limit's distinct attributes") { checkDistinctAttributes(Distinct(t1).limit(10), Set(ExpressionSet(Seq(a, b, c)))) checkDistinctAttributes(LocalLimit(10, Distinct(t1)), Set(ExpressionSet(Seq(a, b, c)))) - checkDistinctAttributes(t1.limit(1), Set(ExpressionSet(Seq(a, b, c)))) + checkDistinctAttributes(t1.limit(1), + Set(ExpressionSet(Seq(a)), ExpressionSet(Seq(b)), ExpressionSet(Seq(c)))) } test("Intersect's distinct attributes") { From 17c56fc03b8e7269b293d6957c542eab9d723d52 Mon Sep 17 00:00:00 2001 From: minyyy Date: Fri, 8 Apr 2022 10:43:38 +0800 Subject: [PATCH 111/535] [SPARK-38531][SQL] Fix the condition of "Prune unrequired child index" branch of ColumnPruning ### What changes were proposed in this pull request? The "prune unrequired references" branch has the condition: `case p Project(_, g: Generate) if p.references != g.outputSet => ` This is wrong as generators like Inline will always enter this branch as long as it does not use all the generator output. Example: input: , b: int>>> Project(a.a as x) \- Generate(Inline(col1), ..., a, b) p.references is [a] g.outputSet is [a, b] This bug makes us never enter the GeneratorNestedColumnAliasing branch below thus miss some optimization opportunities. This PR changes the condition to check whether the child output is not used by the project and it is either not used by the generator or not already put into unrequiredChildOutput. ### Why are the changes needed? The wrong condition prevents some expressions like Inline, PosExplode from being optimized by rules after it. Before the PR, the test query added in the PR is not optimized since the optimization rule is not able to apply to it. After the PR the optimization rule can be correctly applied to the query. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit tests. Closes #35864 from minyyy/gnca_wrong_cond. Authored-by: minyyy Signed-off-by: Wenchen Fan (cherry picked from commit 4b9343593eca780ca30ffda45244a71413577884) Signed-off-by: Wenchen Fan --- .../optimizer/NestedColumnAliasing.scala | 19 +++++++++++ .../sql/catalyst/optimizer/Optimizer.scala | 15 ++++----- .../optimizer/ColumnPruningSuite.scala | 32 +++++++++++++++++++ 3 files changed, 58 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala index 4c7130e51e0b3..9cf2925cdd2a6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala @@ -312,6 +312,25 @@ object NestedColumnAliasing { } } +object GeneratorUnrequiredChildrenPruning { + def unapply(plan: LogicalPlan): Option[LogicalPlan] = plan match { + case p @ Project(_, g: Generate) => + val requiredAttrs = p.references ++ g.generator.references + val newChild = ColumnPruning.prunedChild(g.child, requiredAttrs) + val unrequired = g.generator.references -- p.references + val unrequiredIndices = newChild.output.zipWithIndex.filter(t => unrequired.contains(t._1)) + .map(_._2) + if (!newChild.fastEquals(g.child) || + unrequiredIndices.toSet != g.unrequiredChildIndex.toSet) { + Some(p.copy(child = g.copy(child = newChild, unrequiredChildIndex = unrequiredIndices))) + } else { + None + } + case _ => None + } +} + + /** * This prunes unnecessary nested columns from [[Generate]], or [[Project]] -> [[Generate]] */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index debd5a66adb23..66c2ad84ccee8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -831,13 +831,12 @@ object ColumnPruning extends Rule[LogicalPlan] { e.copy(child = prunedChild(child, e.references)) // prune unrequired references - case p @ Project(_, g: Generate) if p.references != g.outputSet => - val requiredAttrs = p.references -- g.producedAttributes ++ g.generator.references - val newChild = prunedChild(g.child, requiredAttrs) - val unrequired = g.generator.references -- p.references - val unrequiredIndices = newChild.output.zipWithIndex.filter(t => unrequired.contains(t._1)) - .map(_._2) - p.copy(child = g.copy(child = newChild, unrequiredChildIndex = unrequiredIndices)) + // There are 2 types of pruning here: + // 1. For attributes in g.child.outputSet that is not used by the generator nor the project, + // we directly remove it from the output list of g.child. + // 2. For attributes that is not used by the project but it is used by the generator, we put + // it in g.unrequiredChildIndex to save memory usage. + case GeneratorUnrequiredChildrenPruning(rewrittenPlan) => rewrittenPlan // prune unrequired nested fields from `Generate`. case GeneratorNestedColumnAliasing(rewrittenPlan) => rewrittenPlan @@ -897,7 +896,7 @@ object ColumnPruning extends Rule[LogicalPlan] { }) /** Applies a projection only when the child is producing unnecessary attributes */ - private def prunedChild(c: LogicalPlan, allReferences: AttributeSet) = + def prunedChild(c: LogicalPlan, allReferences: AttributeSet): LogicalPlan = if (!c.outputSet.subsetOf(allReferences)) { Project(c.output.filter(allReferences.contains), c) } else { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala index 0655acbcb1bab..0101c855152d6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala @@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.optimizer.NestedColumnAliasingSuite.collectGeneratedAliases import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor @@ -459,4 +460,35 @@ class ColumnPruningSuite extends PlanTest { val correctAnswer1 = Project(Seq('a), input).analyze comparePlans(Optimize.execute(plan1.analyze), correctAnswer1) } + + test("SPARK-38531: Nested field pruning for Project and PosExplode") { + val name = StructType.fromDDL("first string, middle string, last string") + val employer = StructType.fromDDL("id int, company struct") + val contact = LocalRelation( + 'id.int, + 'name.struct(name), + 'address.string, + 'friends.array(name), + 'relatives.map(StringType, name), + 'employer.struct(employer)) + + val query = contact + .select('id, 'friends) + .generate(PosExplode('friends)) + .select('col.getField("middle")) + .analyze + val optimized = Optimize.execute(query) + + val aliases = collectGeneratedAliases(optimized) + + val expected = contact + // GetStructField is pushed down, unused id column is pruned. + .select( + 'friends.getField("middle").as(aliases(0))) + .generate(PosExplode($"${aliases(0)}"), + unrequiredChildIndex = Seq(0)) // unrequiredChildIndex is added. + .select('col.as("col.middle")) + .analyze + comparePlans(optimized, expected) + } } From 3e5407dbbfb8ea955e9c44df1893ad24a9449a28 Mon Sep 17 00:00:00 2001 From: Anish Shrigondekar Date: Fri, 8 Apr 2022 12:19:50 +0900 Subject: [PATCH 112/535] [SPARK-38809][SS] Implement option to skip null values in symmetric hash implementation of stream-stream joins ### What changes were proposed in this pull request? In the symmetric has join state manager, we can receive entries with null values for a key and that can cause the `removeByValue` and get iterators to fail and run into the NullPointerException. This is possible if the state recovered is written from an old spark version or its corrupted on disk or due to issues with the iterators. Since we don't have a utility to query this state, we would like to provide a conf option to skip nulls for the symmetric hash implementation in stream stream joins. ### Why are the changes needed? Without these changes, if we encounter null values for stream-stream joins, the executor task will repeatedly fail with NullPointerException and will terminate the stage and eventually the query as well. This change allows the user to set a config option to continue iterating by skipping null values for symmetric hash based implementation of stream-stream joins. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added unit tests to test the new functionality by adding nulls in between and forcing the iteration/get calls with nulls in the mix and tested the behavior with the config disabled as well as enabled. Sample output: ``` [info] SymmetricHashJoinStateManagerSuite: 15:07:50.627 WARN org.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable [info] - StreamingJoinStateManager V1 - all operations (588 milliseconds) [info] - StreamingJoinStateManager V2 - all operations (251 milliseconds) 15:07:52.669 WARN org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager: `keyWithIndexToValue` returns a null value for indices with range from startIndex=3 and endIndex=4. 15:07:52.671 WARN org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager: `keyWithIndexToValue` returns a null value for indices with range from startIndex=3 and endIndex=3. 15:07:52.672 WARN org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager: `keyWithIndexToValue` returns a null value for indices with range from startIndex=1 and endIndex=3. 15:07:52.672 WARN org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager: `keyWithIndexToValue` returns a null value for indices with range from startIndex=1 and endIndex=1. [info] - StreamingJoinStateManager V1 - all operations with nulls (252 milliseconds) 15:07:52.896 WARN org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager: `keyWithIndexToValue` returns a null value for indices with range from startIndex=3 and endIndex=4. 15:07:52.897 WARN org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager: `keyWithIndexToValue` returns a null value for indices with range from startIndex=3 and endIndex=3. 15:07:52.898 WARN org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager: `keyWithIndexToValue` returns a null value for indices with range from startIndex=1 and endIndex=3. 15:07:52.898 WARN org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager: `keyWithIndexToValue` returns a null value for indices with range from startIndex=1 and endIndex=1. [info] - StreamingJoinStateManager V2 - all operations with nulls (221 milliseconds) 15:07:53.114 WARN org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager: `keyWithIndexToValue` returns a null value for indices with range from startIndex=5 and endIndex=6. 15:07:53.116 WARN org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager: `keyWithIndexToValue` returns a null value for indices with range from startIndex=3 and endIndex=6. 15:07:53.331 WARN org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager: `keyWithIndexToValue` returns a null value for indices with range from startIndex=5 and endIndex=6. 15:07:53.331 WARN org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager: `keyWithIndexToValue` returns a null value for indices with range from startIndex=3 and endIndex=3. [info] - StreamingJoinStateManager V1 - all operations with nulls in middle (435 milliseconds) 15:07:53.549 WARN org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager: `keyWithIndexToValue` returns a null value for indices with range from startIndex=5 and endIndex=6. 15:07:53.551 WARN org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager: `keyWithIndexToValue` returns a null value for indices with range from startIndex=3 and endIndex=6. 15:07:53.785 WARN org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager: `keyWithIndexToValue` returns a null value for indices with range from startIndex=5 and endIndex=6. 15:07:53.785 WARN org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager: `keyWithIndexToValue` returns a null value for indices with range from startIndex=3 and endIndex=3. [info] - StreamingJoinStateManager V2 - all operations with nulls in middle (456 milliseconds) [info] - SPARK-35689: StreamingJoinStateManager V1 - printable key of keyWithIndexToValue (390 milliseconds) [info] - SPARK-35689: StreamingJoinStateManager V2 - printable key of keyWithIndexToValue (216 milliseconds) 15:07:54.640 WARN org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManagerSuite: ===== POSSIBLE THREAD LEAK IN SUITE o.a.s.sql.execution.streaming.state.SymmetricHashJoinStateManagerSuite, threads: rpc-boss-3-1 (daemon=true), shuffle-boss-6-1 (daemon=true) ===== [info] Run completed in 5 seconds, 714 milliseconds. [info] Total number of tests run: 8 [info] Suites: completed 1, aborted 0 [info] Tests: succeeded 8, failed 0, canceled 0, ignored 0, pending 0 [info] All tests passed. ``` Closes #36090 from anishshri-db/bfix/SPARK-38809. Authored-by: Anish Shrigondekar Signed-off-by: Jungtaek Lim (cherry picked from commit 61c489ea7ef51d7d0217f770ec358ed7a7b76b42) Signed-off-by: Jungtaek Lim --- .../apache/spark/sql/internal/SQLConf.scala | 16 ++++ .../streaming/state/StateStoreConf.scala | 3 + .../state/SymmetricHashJoinStateManager.scala | 38 +++++--- .../SymmetricHashJoinStateManagerSuite.scala | 91 ++++++++++++++++--- 4 files changed, 123 insertions(+), 25 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index fbd56968c1d9e..9b7d4aee745f8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -1897,6 +1897,19 @@ object SQLConf { .booleanConf .createWithDefault(false) + /** + * SPARK-38809 - Config option to allow skipping null values for hash based stream-stream joins. + * Its possible for us to see nulls if state was written with an older version of Spark, + * the state was corrupted on disk or if we had an issue with the state iterators. + */ + val STATE_STORE_SKIP_NULLS_FOR_STREAM_STREAM_JOINS = + buildConf("spark.sql.streaming.stateStore.skipNullsForStreamStreamJoins.enabled") + .internal() + .doc("When true, this config will skip null values in hash based stream-stream joins.") + .version("3.3.0") + .booleanConf + .createWithDefault(false) + val VARIABLE_SUBSTITUTE_ENABLED = buildConf("spark.sql.variable.substitute") .doc("This enables substitution using syntax like `${var}`, `${system:var}`, " + @@ -3866,6 +3879,9 @@ class SQLConf extends Serializable with Logging { def stateStoreFormatValidationEnabled: Boolean = getConf(STATE_STORE_FORMAT_VALIDATION_ENABLED) + def stateStoreSkipNullsForStreamStreamJoins: Boolean = + getConf(STATE_STORE_SKIP_NULLS_FOR_STREAM_STREAM_JOINS) + def checkpointLocation: Option[String] = getConf(CHECKPOINT_LOCATION) def isUnsupportedOperationCheckEnabled: Boolean = getConf(UNSUPPORTED_OPERATION_CHECK_ENABLED) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala index 58af8272d1c09..529db2609cd45 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala @@ -52,6 +52,9 @@ class StateStoreConf( val formatValidationCheckValue: Boolean = extraOptions.getOrElse(StateStoreConf.FORMAT_VALIDATION_CHECK_VALUE_CONFIG, "true") == "true" + /** Whether to skip null values for hash based stream-stream joins. */ + val skipNullsForStreamStreamJoins: Boolean = sqlConf.stateStoreSkipNullsForStreamStreamJoins + /** The compression codec used to compress delta and snapshot files. */ val compressionCodec: String = sqlConf.stateStoreCompressionCodec diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala index 56c47d564a3b3..d17c6e8e862ca 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala @@ -222,8 +222,12 @@ class SymmetricHashJoinStateManager( valueRemoved = false } - // Find the next value satisfying the condition, updating `currentKey` and `numValues` if - // needed. Returns null when no value can be found. + /** + * Find the next value satisfying the condition, updating `currentKey` and `numValues` if + * needed. Returns null when no value can be found. + * Note that we will skip nulls explicitly if config setting for the same is + * set to true via STATE_STORE_SKIP_NULLS_FOR_STREAM_STREAM_JOINS. + */ private def findNextValueForIndex(): ValueAndMatchPair = { // Loop across all values for the current key, and then all other keys, until we find a // value satisfying the removal condition. @@ -233,7 +237,9 @@ class SymmetricHashJoinStateManager( if (hasMoreValuesForCurrentKey) { // First search the values for the current key. val valuePair = keyWithIndexToValue.get(currentKey, index) - if (removalCondition(valuePair.value)) { + if (valuePair == null && storeConf.skipNullsForStreamStreamJoins) { + index += 1 + } else if (removalCondition(valuePair.value)) { return valuePair } else { index += 1 @@ -597,22 +603,30 @@ class SymmetricHashJoinStateManager( /** * Get all values and indices for the provided key. * Should not return null. + * Note that we will skip nulls explicitly if config setting for the same is + * set to true via STATE_STORE_SKIP_NULLS_FOR_STREAM_STREAM_JOINS. */ def getAll(key: UnsafeRow, numValues: Long): Iterator[KeyWithIndexAndValue] = { - val keyWithIndexAndValue = new KeyWithIndexAndValue() - var index = 0 new NextIterator[KeyWithIndexAndValue] { + private val keyWithIndexAndValue = new KeyWithIndexAndValue() + private var index: Long = 0L + + private def hasMoreValues = index < numValues override protected def getNext(): KeyWithIndexAndValue = { - if (index >= numValues) { - finished = true - null - } else { + while (hasMoreValues) { val keyWithIndex = keyWithIndexRow(key, index) val valuePair = valueRowConverter.convertValue(stateStore.get(keyWithIndex)) - keyWithIndexAndValue.withNew(key, index, valuePair) - index += 1 - keyWithIndexAndValue + if (valuePair == null && storeConf.skipNullsForStreamStreamJoins) { + index += 1 + } else { + keyWithIndexAndValue.withNew(key, index, valuePair) + index += 1 + return keyWithIndexAndValue + } } + + finished = true + return null } override protected def close(): Unit = {} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala index deeebe1fc42bf..30d39ebcc4a91 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala @@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark import org.apache.spark.sql.execution.streaming.StatefulOperatorStateInfo import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper.LeftSide +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.streaming.StreamTest import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -52,6 +53,12 @@ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter } } + SymmetricHashJoinStateManager.supportedVersions.foreach { version => + test(s"StreamingJoinStateManager V${version} - all operations with nulls in middle") { + testAllOperationsWithNullsInMiddle(version) + } + } + SymmetricHashJoinStateManager.supportedVersions.foreach { version => test(s"SPARK-35689: StreamingJoinStateManager V${version} - " + "printable key of keyWithIndexToValue") { @@ -167,6 +174,55 @@ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter } } + /* Test removeByValue with nulls in middle simulated by updating numValues on the state manager */ + private def testAllOperationsWithNullsInMiddle(stateFormatVersion: Int): Unit = { + // Test with skipNullsForStreamStreamJoins set to false which would throw a + // NullPointerException while iterating and also return null values as part of get + withJoinStateManager(inputValueAttribs, joinKeyExprs, stateFormatVersion) { manager => + implicit val mgr = manager + + val ex = intercept[Exception] { + appendAndTest(40, 50, 200, 300) + assert(numRows === 3) + updateNumValues(40, 4) // create a null at the end + append(40, 400) + updateNumValues(40, 7) // create nulls in between and end + removeByValue(50) + } + assert(ex.isInstanceOf[NullPointerException]) + assert(getNumValues(40) === 7) // we should get 7 with no nulls skipped + + removeByValue(300) + assert(getNumValues(40) === 1) // only 400 should remain + assert(get(40) === Seq(400)) + removeByValue(400) + assert(get(40) === Seq.empty) + assert(numRows === 0) // ensure all elements removed + } + + // Test with skipNullsForStreamStreamJoins set to true which would skip nulls + // and continue iterating as part of removeByValue as well as get + withJoinStateManager(inputValueAttribs, joinKeyExprs, stateFormatVersion, true) { manager => + implicit val mgr = manager + + appendAndTest(40, 50, 200, 300) + assert(numRows === 3) + updateNumValues(40, 4) // create a null at the end + append(40, 400) + updateNumValues(40, 7) // create nulls in between and end + + removeByValue(50) + assert(getNumValues(40) === 3) // we should now get (400, 200, 300) with nulls skipped + + removeByValue(300) + assert(getNumValues(40) === 1) // only 400 should remain + assert(get(40) === Seq(400)) + removeByValue(400) + assert(get(40) === Seq.empty) + assert(numRows === 0) // ensure all elements removed + } + } + val watermarkMetadata = new MetadataBuilder().putLong(EventTimeWatermark.delayKey, 10).build() val inputValueSchema = new StructType() .add(StructField("time", IntegerType, metadata = watermarkMetadata)) @@ -205,6 +261,11 @@ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter manager.updateNumValuesTestOnly(toJoinKeyRow(key), numValues) } + def getNumValues(key: Int) + (implicit manager: SymmetricHashJoinStateManager): Int = { + manager.get(toJoinKeyRow(key)).size + } + def get(key: Int)(implicit manager: SymmetricHashJoinStateManager): Seq[Int] = { manager.get(toJoinKeyRow(key)).map(toValueInt).toSeq.sorted } @@ -232,22 +293,26 @@ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter manager.metrics.numKeys } - def withJoinStateManager( - inputValueAttribs: Seq[Attribute], - joinKeyExprs: Seq[Expression], - stateFormatVersion: Int)(f: SymmetricHashJoinStateManager => Unit): Unit = { + inputValueAttribs: Seq[Attribute], + joinKeyExprs: Seq[Expression], + stateFormatVersion: Int, + skipNullsForStreamStreamJoins: Boolean = false) + (f: SymmetricHashJoinStateManager => Unit): Unit = { withTempDir { file => - val storeConf = new StateStoreConf() - val stateInfo = StatefulOperatorStateInfo(file.getAbsolutePath, UUID.randomUUID, 0, 0, 5) - val manager = new SymmetricHashJoinStateManager( - LeftSide, inputValueAttribs, joinKeyExprs, Some(stateInfo), storeConf, new Configuration, - partitionId = 0, stateFormatVersion) - try { - f(manager) - } finally { - manager.abortIfNeeded() + withSQLConf(SQLConf.STATE_STORE_SKIP_NULLS_FOR_STREAM_STREAM_JOINS.key -> + skipNullsForStreamStreamJoins.toString) { + val storeConf = new StateStoreConf(spark.sqlContext.conf) + val stateInfo = StatefulOperatorStateInfo(file.getAbsolutePath, UUID.randomUUID, 0, 0, 5) + val manager = new SymmetricHashJoinStateManager( + LeftSide, inputValueAttribs, joinKeyExprs, Some(stateInfo), storeConf, new Configuration, + partitionId = 0, stateFormatVersion) + try { + f(manager) + } finally { + manager.abortIfNeeded() + } } } StateStore.stop() From baa93ec66e3068dcbcb52b83fb94101ddaf0a7e4 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 8 Apr 2022 21:00:06 +0800 Subject: [PATCH 113/535] [SPARK-38813][SQL][3.3] Remove TimestampNTZ type support in Spark 3.3 ### What changes were proposed in this pull request? Remove TimestampNTZ type support in the production code of Spark 3.3. To archive the goal, this PR adds the check "Utils.isTesting" in the following code branches: - keyword "timestamp_ntz" and "timestamp_ltz" in parser - New expressions from https://issues.apache.org/jira/browse/SPARK-35662 - Using java.time.localDateTime as the external type for TimestampNTZType - `SQLConf.timestampType` which determines the default timestamp type of Spark SQL. This is to minimize the code difference between the master branch. So that future users won't think TimestampNTZ is already available in Spark 3.3. The downside is that users can still find TimestampNTZType under package `org.apache.spark.sql.types`. There should be nothing left other than this. ### Why are the changes needed? The TimestampNTZ project is not finished yet: * Lack Hive metastore support * Lack JDBC support * Need to spend time scanning the codebase to find out any missing support. The current code usages of `TimestampType` are larger than `TimestampNTZType` ### Does this PR introduce _any_ user-facing change? No, the TimestampNTZ type is not released yet. ### How was this patch tested? UT Closes #36094 from gengliangwang/disableNTZ. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang --- .../sql/catalyst/expressions/UnsafeRow.java | 2 +- .../org/apache/spark/sql/types/DataTypes.java | 5 --- .../scala/org/apache/spark/sql/Encoders.scala | 8 ----- .../sql/catalyst/CatalystTypeConverters.scala | 4 ++- .../sql/catalyst/JavaTypeInference.scala | 11 +++++-- .../spark/sql/catalyst/ScalaReflection.scala | 12 +++++-- .../sql/catalyst/analysis/CheckAnalysis.scala | 5 +++ .../catalyst/analysis/FunctionRegistry.scala | 31 +++++++++++++------ .../spark/sql/catalyst/dsl/package.scala | 4 --- .../sql/catalyst/encoders/RowEncoder.scala | 10 ++++-- .../expressions/datetimeExpressions.scala | 12 +++---- .../sql/catalyst/expressions/literals.scala | 14 ++++++--- .../sql/catalyst/parser/AstBuilder.scala | 10 +++--- .../apache/spark/sql/internal/SQLConf.scala | 15 +++++---- .../spark/sql/types/TimestampNTZType.scala | 11 ++----- .../parquet/ParquetVectorUpdaterFactory.java | 6 ++-- .../org/apache/spark/sql/SQLImplicits.scala | 3 -- .../apache/spark/sql/JavaDatasetSuite.java | 8 ----- .../apache/spark/sql/CsvFunctionsSuite.scala | 17 +--------- .../spark/sql/DataFrameAggregateSuite.scala | 13 +------- .../org/apache/spark/sql/DatasetSuite.scala | 5 --- .../apache/spark/sql/JsonFunctionsSuite.scala | 14 +-------- .../scala/org/apache/spark/sql/UDFSuite.scala | 28 ----------------- 23 files changed, 94 insertions(+), 154 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java index 476201c9a8d8e..b8abd01d2e11e 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java @@ -91,7 +91,7 @@ public static int calculateBitSetWidthInBytes(int numFields) { DoubleType, DateType, TimestampType, - TimestampNTZType + TimestampNTZType$.MODULE$ ))); } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java b/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java index 9454c3c259856..90a2baf130384 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/types/DataTypes.java @@ -54,11 +54,6 @@ public class DataTypes { */ public static final DataType TimestampType = TimestampType$.MODULE$; - /** - * Gets the TimestampNTZType object. - */ - public static final DataType TimestampNTZType = TimestampNTZType$.MODULE$; - /** * Gets the CalendarIntervalType object. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala index 98d2f1a57120a..e6894847209d5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala @@ -114,14 +114,6 @@ object Encoders { */ def LOCALDATE: Encoder[java.time.LocalDate] = ExpressionEncoder() - /** - * Creates an encoder that serializes instances of the `java.time.LocalDateTime` class - * to the internal representation of nullable Catalyst's TimestampNTZType. - * - * @since 3.3.0 - */ - def LOCALDATETIME: Encoder[java.time.LocalDateTime] = ExpressionEncoder() - /** * An encoder for nullable timestamp type. * diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala index 3e6d31e79b773..a28540a4ce9d3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala @@ -35,6 +35,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.sql.types.DayTimeIntervalType._ import org.apache.spark.sql.types.YearMonthIntervalType._ import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.util.Utils /** * Functions to convert Scala types to Catalyst types and vice versa. @@ -503,7 +504,8 @@ object CatalystTypeConverters { case ld: LocalDate => LocalDateConverter.toCatalyst(ld) case t: Timestamp => TimestampConverter.toCatalyst(t) case i: Instant => InstantConverter.toCatalyst(i) - case l: LocalDateTime => TimestampNTZConverter.toCatalyst(l) + // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes. + case l: LocalDateTime if Utils.isTesting => TimestampNTZConverter.toCatalyst(l) case d: BigDecimal => new DecimalConverter(DecimalType(d.precision, d.scale)).toCatalyst(d) case d: JavaBigDecimal => new DecimalConverter(DecimalType(d.precision, d.scale)).toCatalyst(d) case seq: Seq[Any] => new GenericArrayData(seq.map(convertToCatalyst).toArray) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala index 903072ae29d8c..1f93933327e8d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala @@ -35,6 +35,7 @@ import org.apache.spark.sql.catalyst.expressions.objects._ import org.apache.spark.sql.catalyst.util.ArrayBasedMapData import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils /** * Type-inference utilities for POJOs and Java collections. @@ -119,7 +120,9 @@ object JavaTypeInference { case c: Class[_] if c == classOf[java.sql.Date] => (DateType, true) case c: Class[_] if c == classOf[java.time.Instant] => (TimestampType, true) case c: Class[_] if c == classOf[java.sql.Timestamp] => (TimestampType, true) - case c: Class[_] if c == classOf[java.time.LocalDateTime] => (TimestampNTZType, true) + // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes. + case c: Class[_] if c == classOf[java.time.LocalDateTime] && Utils.isTesting => + (TimestampNTZType, true) case c: Class[_] if c == classOf[java.time.Duration] => (DayTimeIntervalType(), true) case c: Class[_] if c == classOf[java.time.Period] => (YearMonthIntervalType(), true) @@ -251,7 +254,8 @@ object JavaTypeInference { case c if c == classOf[java.sql.Timestamp] => createDeserializerForSqlTimestamp(path) - case c if c == classOf[java.time.LocalDateTime] => + // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes. + case c if c == classOf[java.time.LocalDateTime] && Utils.isTesting => createDeserializerForLocalDateTime(path) case c if c == classOf[java.time.Duration] => @@ -413,7 +417,8 @@ object JavaTypeInference { case c if c == classOf[java.sql.Timestamp] => createSerializerForSqlTimestamp(inputObject) - case c if c == classOf[java.time.LocalDateTime] => + // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes. + case c if c == classOf[java.time.LocalDateTime] && Utils.isTesting => createSerializerForLocalDateTime(inputObject) case c if c == classOf[java.time.LocalDate] => createSerializerForJavaLocalDate(inputObject) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index fced82c97b445..e2b624f8e13a3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.util.{ArrayData, MapData} import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} +import org.apache.spark.util.Utils /** @@ -752,7 +753,8 @@ object ScalaReflection extends ScalaReflection { Schema(TimestampType, nullable = true) case t if isSubtype(t, localTypeOf[java.sql.Timestamp]) => Schema(TimestampType, nullable = true) - case t if isSubtype(t, localTypeOf[java.time.LocalDateTime]) => + // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes. + case t if isSubtype(t, localTypeOf[java.time.LocalDateTime]) && Utils.isTesting => Schema(TimestampNTZType, nullable = true) case t if isSubtype(t, localTypeOf[java.time.LocalDate]) => Schema(DateType, nullable = true) case t if isSubtype(t, localTypeOf[java.sql.Date]) => Schema(DateType, nullable = true) @@ -858,7 +860,9 @@ object ScalaReflection extends ScalaReflection { StringType -> classOf[UTF8String], DateType -> classOf[DateType.InternalType], TimestampType -> classOf[TimestampType.InternalType], - TimestampNTZType -> classOf[TimestampNTZType.InternalType], + // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes. + TimestampNTZType -> + (if (Utils.isTesting) classOf[TimestampNTZType.InternalType] else classOf[java.lang.Object]), BinaryType -> classOf[BinaryType.InternalType], CalendarIntervalType -> classOf[CalendarInterval] ) @@ -873,7 +877,9 @@ object ScalaReflection extends ScalaReflection { DoubleType -> classOf[java.lang.Double], DateType -> classOf[java.lang.Integer], TimestampType -> classOf[java.lang.Long], - TimestampNTZType -> classOf[java.lang.Long] + // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes. + TimestampNTZType -> + (if (Utils.isTesting) classOf[java.lang.Long] else classOf[java.lang.Object]) ) def dataTypeJavaClass(dt: DataType): Class[_] = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index c05b9326d2304..3b8a73717afee 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -32,6 +32,7 @@ import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.sql.util.SchemaUtils +import org.apache.spark.util.Utils /** * Throws user facing errors when passed invalid queries that fail to analyze. @@ -157,6 +158,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { case _: ShowTableExtended => throw QueryCompilationErrors.commandUnsupportedInV2TableError("SHOW TABLE EXTENDED") + case operator: LogicalPlan + if !Utils.isTesting && operator.output.exists(_.dataType.isInstanceOf[TimestampNTZType]) => + operator.failAnalysis("TimestampNTZ type is not supported in Spark 3.3.") + case operator: LogicalPlan => // Check argument data types of higher-order functions downwards first. // If the arguments of the higher-order functions are resolved but the type check fails, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 5befa779d166e..f6bd9891681fa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -34,6 +34,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Range} import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils /** @@ -358,6 +359,26 @@ object FunctionRegistry { // then create a `RuntimeReplaceable` expression to call the Java method with `Invoke` or // `StaticInvoke` expression. By doing so we don't need to implement codegen for new functions // anymore. See `AesEncrypt`/`AesDecrypt` as an example. + val expressionsForTimestampNTZSupport: Map[String, (ExpressionInfo, FunctionBuilder)] = + // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes. + if (Utils.isTesting) { + Map( + expression[LocalTimestamp]("localtimestamp"), + expression[ConvertTimezone]("convert_timezone"), + // We keep the 2 expression builders below to have different function docs. + expressionBuilder( + "to_timestamp_ntz", ParseToTimestampNTZExpressionBuilder, setAlias = true), + expressionBuilder( + "to_timestamp_ltz", ParseToTimestampLTZExpressionBuilder, setAlias = true), + // We keep the 2 expression builders below to have different function docs. + expressionBuilder("make_timestamp_ntz", MakeTimestampNTZExpressionBuilder, setAlias = true), + expressionBuilder("make_timestamp_ltz", MakeTimestampLTZExpressionBuilder, setAlias = true) + ) + } else { + Map.empty + } + + // Note: Whenever we add a new entry here, make sure we also update ExpressionToSQLSuite val expressions: Map[String, (ExpressionInfo, FunctionBuilder)] = Map( // misc non-aggregate functions expression[Abs]("abs"), @@ -580,7 +601,6 @@ object FunctionRegistry { expression[CurrentDate]("current_date"), expression[CurrentTimestamp]("current_timestamp"), expression[CurrentTimeZone]("current_timezone"), - expression[LocalTimestamp]("localtimestamp"), expression[DateDiff]("datediff"), expression[DateAdd]("date_add"), expression[DateFormatClass]("date_format"), @@ -604,9 +624,6 @@ object FunctionRegistry { expression[ToBinary]("to_binary"), expression[ToUnixTimestamp]("to_unix_timestamp"), expression[ToUTCTimestamp]("to_utc_timestamp"), - // We keep the 2 expression builders below to have different function docs. - expressionBuilder("to_timestamp_ntz", ParseToTimestampNTZExpressionBuilder, setAlias = true), - expressionBuilder("to_timestamp_ltz", ParseToTimestampLTZExpressionBuilder, setAlias = true), expression[TruncDate]("trunc"), expression[TruncTimestamp]("date_trunc"), expression[UnixTimestamp]("unix_timestamp"), @@ -618,9 +635,6 @@ object FunctionRegistry { expression[SessionWindow]("session_window"), expression[MakeDate]("make_date"), expression[MakeTimestamp]("make_timestamp"), - // We keep the 2 expression builders below to have different function docs. - expressionBuilder("make_timestamp_ntz", MakeTimestampNTZExpressionBuilder, setAlias = true), - expressionBuilder("make_timestamp_ltz", MakeTimestampLTZExpressionBuilder, setAlias = true), expression[MakeInterval]("make_interval"), expression[MakeDTInterval]("make_dt_interval"), expression[MakeYMInterval]("make_ym_interval"), @@ -635,7 +649,6 @@ object FunctionRegistry { expression[UnixSeconds]("unix_seconds"), expression[UnixMillis]("unix_millis"), expression[UnixMicros]("unix_micros"), - expression[ConvertTimezone]("convert_timezone"), // collection functions expression[CreateArray]("array"), @@ -782,7 +795,7 @@ object FunctionRegistry { expression[CsvToStructs]("from_csv"), expression[SchemaOfCsv]("schema_of_csv"), expression[StructsToCsv]("to_csv") - ) + ) ++ expressionsForTimestampNTZSupport val builtin: SimpleFunctionRegistry = { val fr = new SimpleFunctionRegistry diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 62b3ee7440745..d47e34b110dc8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -315,10 +315,6 @@ package object dsl { /** Creates a new AttributeReference of type timestamp */ def timestamp: AttributeReference = AttributeReference(s, TimestampType, nullable = true)() - /** Creates a new AttributeReference of type timestamp without time zone */ - def timestampNTZ: AttributeReference = - AttributeReference(s, TimestampNTZType, nullable = true)() - /** Creates a new AttributeReference of the day-time interval type */ def dayTimeInterval(startField: Byte, endField: Byte): AttributeReference = { AttributeReference(s, DayTimeIntervalType(startField, endField), nullable = true)() diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala index d7e497fafa86a..7b6865d0af4af 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala @@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData} import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils /** * A factory for constructing encoders that convert external row to/from the Spark SQL @@ -112,7 +113,8 @@ object RowEncoder { createSerializerForSqlTimestamp(inputObject) } - case TimestampNTZType => createSerializerForLocalDateTime(inputObject) + // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes. + case TimestampNTZType if Utils.isTesting => createSerializerForLocalDateTime(inputObject) case DateType => if (lenient) { @@ -243,7 +245,8 @@ object RowEncoder { } else { ObjectType(classOf[java.sql.Timestamp]) } - case TimestampNTZType => + // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes. + case TimestampNTZType if Utils.isTesting => ObjectType(classOf[java.time.LocalDateTime]) case DateType => if (SQLConf.get.datetimeJava8ApiEnabled) { @@ -301,7 +304,8 @@ object RowEncoder { createDeserializerForSqlTimestamp(input) } - case TimestampNTZType => + // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes. + case TimestampNTZType if Utils.isTesting => createDeserializerForLocalDateTime(input) case DateType => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index fc701d4f817ca..ff3d898942c33 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -219,7 +219,7 @@ case class Now() extends CurrentTimestampLike { 2020-04-25 15:49:11.914 """, group = "datetime_funcs", - since = "3.3.0") + since = "3.4.0") case class LocalTimestamp(timeZoneId: Option[String] = None) extends LeafExpression with TimeZoneAwareExpression with CodegenFallback { @@ -1111,7 +1111,7 @@ case class GetTimestamp( 2016-12-31 00:00:00 """, group = "datetime_funcs", - since = "3.3.0") + since = "3.4.0") // scalastyle:on line.size.limit object ParseToTimestampNTZExpressionBuilder extends ExpressionBuilder { override def build(funcName: String, expressions: Seq[Expression]): Expression = { @@ -1148,7 +1148,7 @@ object ParseToTimestampNTZExpressionBuilder extends ExpressionBuilder { 2016-12-31 00:00:00 """, group = "datetime_funcs", - since = "3.3.0") + since = "3.4.0") // scalastyle:on line.size.limit object ParseToTimestampLTZExpressionBuilder extends ExpressionBuilder { override def build(funcName: String, expressions: Seq[Expression]): Expression = { @@ -2440,7 +2440,7 @@ case class MakeDate( NULL """, group = "datetime_funcs", - since = "3.3.0") + since = "3.4.0") // scalastyle:on line.size.limit object MakeTimestampNTZExpressionBuilder extends ExpressionBuilder { override def build(funcName: String, expressions: Seq[Expression]): Expression = { @@ -2487,7 +2487,7 @@ object MakeTimestampNTZExpressionBuilder extends ExpressionBuilder { NULL """, group = "datetime_funcs", - since = "3.3.0") + since = "3.4.0") // scalastyle:on line.size.limit object MakeTimestampLTZExpressionBuilder extends ExpressionBuilder { override def build(funcName: String, expressions: Seq[Expression]): Expression = { @@ -3015,7 +3015,7 @@ object SubtractDates { 2021-12-06 00:00:00 """, group = "datetime_funcs", - since = "3.3.0") + since = "3.4.0") // scalastyle:on line.size.limit case class ConvertTimezone( sourceTz: Expression, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala index af10a18e4d16d..6262bdef7f799 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala @@ -80,7 +80,9 @@ object Literal { case d: Decimal => Literal(d, DecimalType(Math.max(d.precision, d.scale), d.scale)) case i: Instant => Literal(instantToMicros(i), TimestampType) case t: Timestamp => Literal(DateTimeUtils.fromJavaTimestamp(t), TimestampType) - case l: LocalDateTime => Literal(DateTimeUtils.localDateTimeToMicros(l), TimestampNTZType) + // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes. + case l: LocalDateTime if Utils.isTesting => + Literal(DateTimeUtils.localDateTimeToMicros(l), TimestampNTZType) case ld: LocalDate => Literal(ld.toEpochDay.toInt, DateType) case d: Date => Literal(DateTimeUtils.fromJavaDate(d), DateType) case d: Duration => Literal(durationToMicros(d), DayTimeIntervalType()) @@ -120,7 +122,8 @@ object Literal { case _ if clz == classOf[Date] => DateType case _ if clz == classOf[Instant] => TimestampType case _ if clz == classOf[Timestamp] => TimestampType - case _ if clz == classOf[LocalDateTime] => TimestampNTZType + // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes. + case _ if clz == classOf[LocalDateTime] && Utils.isTesting => TimestampNTZType case _ if clz == classOf[Duration] => DayTimeIntervalType() case _ if clz == classOf[Period] => YearMonthIntervalType() case _ if clz == classOf[JavaBigDecimal] => DecimalType.SYSTEM_DEFAULT @@ -186,7 +189,8 @@ object Literal { case dt: DecimalType => Literal(Decimal(0, dt.precision, dt.scale)) case DateType => create(0, DateType) case TimestampType => create(0L, TimestampType) - case TimestampNTZType => create(0L, TimestampNTZType) + // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes. + case TimestampNTZType if Utils.isTesting => create(0L, TimestampNTZType) case it: DayTimeIntervalType => create(0L, it) case it: YearMonthIntervalType => create(0, it) case StringType => Literal("") @@ -208,7 +212,9 @@ object Literal { case ByteType => v.isInstanceOf[Byte] case ShortType => v.isInstanceOf[Short] case IntegerType | DateType | _: YearMonthIntervalType => v.isInstanceOf[Int] - case LongType | TimestampType | TimestampNTZType | _: DayTimeIntervalType => + // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes. + case TimestampNTZType if Utils.isTesting => v.isInstanceOf[Long] + case LongType | TimestampType | _: DayTimeIntervalType => v.isInstanceOf[Long] case FloatType => v.isInstanceOf[Float] case DoubleType => v.isInstanceOf[Double] diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 3a22c5ee745cd..d334b5780f78a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -48,6 +48,7 @@ import org.apache.spark.sql.errors.QueryParsingErrors import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} +import org.apache.spark.util.Utils.isTesting import org.apache.spark.util.random.RandomSampler /** @@ -2203,11 +2204,11 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit val zoneId = getZoneId(conf.sessionLocalTimeZone) val specialDate = convertSpecialDate(value, zoneId).map(Literal(_, DateType)) specialDate.getOrElse(toLiteral(stringToDate, DateType)) - case "TIMESTAMP_NTZ" => + case "TIMESTAMP_NTZ" if isTesting => convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone)) .map(Literal(_, TimestampNTZType)) .getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType)) - case "TIMESTAMP_LTZ" => + case "TIMESTAMP_LTZ" if isTesting => constructTimestampLTZLiteral(value) case "TIMESTAMP" => SQLConf.get.timestampType match { @@ -2658,8 +2659,9 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit case ("double", Nil) => DoubleType case ("date", Nil) => DateType case ("timestamp", Nil) => SQLConf.get.timestampType - case ("timestamp_ntz", Nil) => TimestampNTZType - case ("timestamp_ltz", Nil) => TimestampType + // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes. + case ("timestamp_ntz", Nil) if isTesting => TimestampNTZType + case ("timestamp_ltz", Nil) if isTesting => TimestampType case ("string", Nil) => StringType case ("character" | "char", length :: Nil) => CharType(length.getText.toInt) case ("varchar", length :: Nil) => VarcharType(length.getText.toInt) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 9b7d4aee745f8..c4ffc844135d4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3215,9 +3215,10 @@ object SQLConf { s"and type literal. Setting the configuration as ${TimestampTypes.TIMESTAMP_NTZ} will " + "use TIMESTAMP WITHOUT TIME ZONE as the default type while putting it as " + s"${TimestampTypes.TIMESTAMP_LTZ} will use TIMESTAMP WITH LOCAL TIME ZONE. " + - "Before the 3.3.0 release, Spark only supports the TIMESTAMP WITH " + + "Before the 3.4.0 release, Spark only supports the TIMESTAMP WITH " + "LOCAL TIME ZONE type.") - .version("3.3.0") + .version("3.4.0") + .internal() .stringConf .transform(_.toUpperCase(Locale.ROOT)) .checkValues(TimestampTypes.values.map(_.toString)) @@ -4359,12 +4360,14 @@ class SQLConf extends Serializable with Logging { def strictIndexOperator: Boolean = ansiEnabled && getConf(ANSI_STRICT_INDEX_OPERATOR) def timestampType: AtomicType = getConf(TIMESTAMP_TYPE) match { - case "TIMESTAMP_LTZ" => + // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes. + // The configuration `TIMESTAMP_TYPE` is only effective for testing in Spark 3.3. + case "TIMESTAMP_NTZ" if Utils.isTesting => + TimestampNTZType + + case _ => // For historical reason, the TimestampType maps to TIMESTAMP WITH LOCAL TIME ZONE TimestampType - - case "TIMESTAMP_NTZ" => - TimestampNTZType } def nestedSchemaPruningEnabled: Boolean = getConf(NESTED_SCHEMA_PRUNING_ENABLED) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampNTZType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampNTZType.scala index ef653100e8148..d2a1f2c34c19c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampNTZType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampNTZType.scala @@ -20,8 +20,6 @@ package org.apache.spark.sql.types import scala.math.Ordering import scala.reflect.runtime.universe.typeTag -import org.apache.spark.annotation.Unstable - /** * The timestamp without time zone type represents a local time in microsecond precision, * which is independent of time zone. @@ -29,10 +27,8 @@ import org.apache.spark.annotation.Unstable * To represent an absolute point in time, use `TimestampType` instead. * * Please use the singleton `DataTypes.TimestampNTZType` to refer the type. - * @since 3.3.0 */ -@Unstable -class TimestampNTZType private() extends DatetimeType { +private[spark] class TimestampNTZType private() extends DatetimeType { /** * Internally, a timestamp is stored as the number of microseconds from * the epoch of 1970-01-01T00:00:00.000000(Unix system time zero) @@ -58,8 +54,5 @@ class TimestampNTZType private() extends DatetimeType { * the TimestampNTZType class. Otherwise, the companion object would be of type * "TimestampNTZType" in byte code. Defined with a private constructor so the companion * object is the only possible instantiation. - * - * @since 3.3.0 */ -@Unstable -case object TimestampNTZType extends TimestampNTZType +private[spark] case object TimestampNTZType extends TimestampNTZType diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java index cce53001ea621..53606f58dcfd2 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java @@ -140,7 +140,7 @@ public ParquetVectorUpdater getUpdater(ColumnDescriptor descriptor, DataType spa } break; case INT96: - if (sparkType == DataTypes.TimestampNTZType) { + if (sparkType == TimestampNTZType$.MODULE$) { convertErrorForTimestampNTZ(typeName.name()); } else if (sparkType == DataTypes.TimestampType) { final boolean failIfRebase = "EXCEPTION".equals(int96RebaseMode); @@ -197,14 +197,14 @@ void validateTimestampType(DataType sparkType) { // Throw an exception if the Parquet type is TimestampLTZ and the Catalyst type is TimestampNTZ. // This is to avoid mistakes in reading the timestamp values. if (((TimestampLogicalTypeAnnotation) logicalTypeAnnotation).isAdjustedToUTC() && - sparkType == DataTypes.TimestampNTZType) { + sparkType == TimestampNTZType$.MODULE$) { convertErrorForTimestampNTZ("int64 time(" + logicalTypeAnnotation + ")"); } } void convertErrorForTimestampNTZ(String parquetType) { throw new RuntimeException("Unable to create Parquet converter for data type " + - DataTypes.TimestampNTZType.json() + " whose Parquet type is " + parquetType); + TimestampNTZType$.MODULE$.json() + " whose Parquet type is " + parquetType); } boolean isUnsignedIntTypeMatched(int bitWidth) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala index 044231f4b5a49..90188cadfd3c3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala @@ -82,9 +82,6 @@ abstract class SQLImplicits extends LowPrioritySQLImplicits { /** @since 3.0.0 */ implicit def newLocalDateEncoder: Encoder[java.time.LocalDate] = Encoders.LOCALDATE - /** @since 3.3.0 */ - implicit def newLocalDateTimeEncoder: Encoder[java.time.LocalDateTime] = Encoders.LOCALDATETIME - /** @since 2.2.0 */ implicit def newTimeStampEncoder: Encoder[java.sql.Timestamp] = Encoders.TIMESTAMP diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java index 22978fb8c286e..87105a4d8a7fe 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java @@ -607,14 +607,6 @@ public void testLocalDateAndInstantEncoders() { Assert.assertEquals(data, ds.collectAsList()); } - @Test - public void testLocalDateTimeEncoder() { - Encoder encoder = Encoders.LOCALDATETIME(); - List data = Arrays.asList(LocalDateTime.of(1, 1, 1, 1, 1)); - Dataset ds = spark.createDataset(data, encoder); - Assert.assertEquals(data, ds.collectAsList()); - } - @Test public void testDurationEncoder() { Encoder encoder = Encoders.DURATION(); diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala index 461bbd8987cef..b676c26023a25 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql import java.text.SimpleDateFormat -import java.time.{Duration, LocalDateTime, Period} +import java.time.{Duration, Period} import java.util.Locale import scala.collection.JavaConverters._ @@ -354,21 +354,6 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession { } } - test("SPARK-36490: Make from_csv/to_csv to handle timestamp_ntz type properly") { - val localDT = LocalDateTime.parse("2021-08-12T15:16:23") - val df = Seq(localDT).toDF - val toCsvDF = df.select(to_csv(struct($"value")) as "csv") - checkAnswer(toCsvDF, Row("2021-08-12T15:16:23.000")) - val fromCsvDF = toCsvDF - .select( - from_csv( - $"csv", - StructType(StructField("a", TimestampNTZType) :: Nil), - Map.empty[String, String]) as "value") - .selectExpr("value.a") - checkAnswer(fromCsvDF, Row(localDT)) - } - test("SPARK-37326: Handle incorrectly formatted timestamp_ntz values in from_csv") { val fromCsvDF = Seq("2021-08-12T15:16:23.000+11:00").toDF("csv") .select( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala index 425be96d6b8ab..72e2d77bba624 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql -import java.time.{Duration, LocalDateTime, Period} +import java.time.{Duration, Period} import scala.util.Random @@ -1434,17 +1434,6 @@ class DataFrameAggregateSuite extends QueryTest checkAnswer(df2, Row(Period.ofYears(1), 1)) } - test("SPARK-36054: Support group by TimestampNTZ column") { - val ts1 = "2021-01-01T00:00:00" - val ts2 = "2021-01-01T00:00:01" - val localDateTime = Seq(ts1, ts1, ts2).map(LocalDateTime.parse) - val df = localDateTime.toDF("ts").groupBy("ts").count().orderBy("ts") - val expectedSchema = - new StructType().add(StructField("ts", TimestampNTZType)).add("count", LongType, false) - assert (df.schema == expectedSchema) - checkAnswer(df, Seq(Row(LocalDateTime.parse(ts1), 2), Row(LocalDateTime.parse(ts2), 1))) - } - test("SPARK-36926: decimal average mistakenly overflow") { val df = (1 to 10).map(_ => "9999999999.99").toDF("d") val res = df.select($"d".cast("decimal(12, 2)").as("d")).agg(avg($"d").cast("string")) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index c846441e9e009..cbdf31a6eaf1b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -2106,11 +2106,6 @@ class DatasetSuite extends QueryTest checkAnswer(withUDF, Row(Row(1), null, null) :: Row(Row(1), null, null) :: Nil) } - test("SPARK-35664: implicit encoder for java.time.LocalDateTime") { - val localDateTime = java.time.LocalDateTime.parse("2021-06-08T12:31:58.999999") - assert(Seq(localDateTime).toDS().head() === localDateTime) - } - test("SPARK-34605: implicit encoder for java.time.Duration") { val duration = java.time.Duration.ofMinutes(10) assert(spark.range(1).map { _ => duration }.head === duration) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index e18c087a26279..c86e1f6e297a6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql import java.text.SimpleDateFormat -import java.time.{Duration, LocalDateTime, Period} +import java.time.{Duration, Period} import java.util.Locale import collection.JavaConverters._ @@ -918,16 +918,4 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { } } } - - test("SPARK-36491: Make from_json/to_json to handle timestamp_ntz type properly") { - val localDT = LocalDateTime.parse("2021-08-12T15:16:23") - val df = Seq(localDT).toDF - val toJsonDF = df.select(to_json(map(lit("key"), $"value")) as "json") - checkAnswer(toJsonDF, Row("""{"key":"2021-08-12T15:16:23.000"}""")) - val fromJsonDF = toJsonDF - .select( - from_json($"json", StructType(StructField("key", TimestampNTZType) :: Nil)) as "value") - .selectExpr("value['key']") - checkAnswer(fromJsonDF, Row(localDT)) - } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala index e651459394fd9..912811bfda7f6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala @@ -848,34 +848,6 @@ class UDFSuite extends QueryTest with SharedSparkSession { } } - test("SPARK-35674: using java.time.LocalDateTime in UDF") { - // Regular case - val input = Seq(java.time.LocalDateTime.parse("2021-01-01T00:00:00")).toDF("dateTime") - val plusYear = udf((l: java.time.LocalDateTime) => l.plusYears(1)) - val result = input.select(plusYear($"dateTime").as("newDateTime")) - checkAnswer(result, Row(java.time.LocalDateTime.parse("2022-01-01T00:00:00")) :: Nil) - assert(result.schema === new StructType().add("newDateTime", TimestampNTZType)) - // UDF produces `null` - val nullFunc = udf((_: java.time.LocalDateTime) => null.asInstanceOf[java.time.LocalDateTime]) - val nullResult = input.select(nullFunc($"dateTime").as("nullDateTime")) - checkAnswer(nullResult, Row(null) :: Nil) - assert(nullResult.schema === new StructType().add("nullDateTime", TimestampNTZType)) - // Input parameter of UDF is null - val nullInput = Seq(null.asInstanceOf[java.time.LocalDateTime]).toDF("nullDateTime") - val constDuration = udf((_: java.time.LocalDateTime) => - java.time.LocalDateTime.parse("2021-01-01T00:00:00")) - val constResult = nullInput.select(constDuration($"nullDateTime").as("firstDayOf2021")) - checkAnswer(constResult, Row(java.time.LocalDateTime.parse("2021-01-01T00:00:00")) :: Nil) - assert(constResult.schema === new StructType().add("firstDayOf2021", TimestampNTZType)) - // Error in the conversion of UDF result to the internal representation of timestamp without - // time zone - val overflowFunc = udf((l: java.time.LocalDateTime) => l.plusDays(Long.MaxValue)) - val e = intercept[SparkException] { - input.select(overflowFunc($"dateTime")).collect() - }.getCause.getCause - assert(e.isInstanceOf[java.lang.ArithmeticException]) - } - test("SPARK-34663, SPARK-35730: using java.time.Duration in UDF") { // Regular case val input = Seq(java.time.Duration.ofHours(23)).toDF("d") From 46e9e3b49a59b9d84a9b073d07c2bbd689fac5d9 Mon Sep 17 00:00:00 2001 From: hi-zir Date: Sun, 10 Apr 2022 01:57:05 +0200 Subject: [PATCH 114/535] [SPARK-37398][PYTHON][ML] Inline type hints for pyspark.ml.classification ### What changes were proposed in this pull request Migration of type hints for pyspark.ml.evaluation from stub file to inline type hints. ### Why are the changes needed? Part of migration of type hints. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #36071 from hi-zir/SPARK-37398. Authored-by: hi-zir Signed-off-by: zero323 (cherry picked from commit 40cdb6d51c2befcfeac8fb5cf5faf178d1a5ee7b) Signed-off-by: zero323 --- python/pyspark/ml/classification.py | 1477 ++++++++++------- python/pyspark/ml/classification.pyi | 951 ----------- .../ml/tests/typing/test_classification.yml | 4 +- 3 files changed, 847 insertions(+), 1585 deletions(-) delete mode 100644 python/pyspark/ml/classification.pyi diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index b791e6f169d44..40a2a87c5db92 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -23,6 +23,21 @@ from abc import ABCMeta, abstractmethod from multiprocessing.pool import ThreadPool +from typing import ( + Any, + Dict, + Generic, + Iterable, + List, + Optional, + Type, + TypeVar, + Union, + cast, + overload, + TYPE_CHECKING, +) + from pyspark import keyword_only, since, SparkContext, inheritable_thread_target from pyspark.ml import Estimator, Predictor, PredictionModel, Model from pyspark.ml.param.shared import ( @@ -74,12 +89,22 @@ ) from pyspark.ml.wrapper import JavaParams, JavaPredictor, JavaPredictionModel, JavaWrapper from pyspark.ml.common import inherit_doc -from pyspark.ml.linalg import Vectors, VectorUDT -from pyspark.sql import DataFrame +from pyspark.ml.linalg import Matrix, Vector, Vectors, VectorUDT +from pyspark.sql import DataFrame, Row from pyspark.sql.functions import udf, when from pyspark.sql.types import ArrayType, DoubleType from pyspark.storagelevel import StorageLevel + +if TYPE_CHECKING: + from pyspark.ml._typing import P, ParamMap + from py4j.java_gateway import JavaObject + + +T = TypeVar("T") +JPM = TypeVar("JPM", bound=JavaPredictionModel) +CM = TypeVar("CM", bound="ClassificationModel") + __all__ = [ "LinearSVC", "LinearSVCModel", @@ -127,14 +152,14 @@ class _ClassifierParams(HasRawPredictionCol, _PredictorParams): @inherit_doc -class Classifier(Predictor, _ClassifierParams, metaclass=ABCMeta): +class Classifier(Predictor[CM], _ClassifierParams, Generic[CM], metaclass=ABCMeta): """ Classifier for classification tasks. Classes are indexed {0, 1, ..., numClasses - 1}. """ @since("3.0.0") - def setRawPredictionCol(self, value): + def setRawPredictionCol(self: "P", value: str) -> "P": """ Sets the value of :py:attr:`rawPredictionCol`. """ @@ -149,16 +174,16 @@ class ClassificationModel(PredictionModel, _ClassifierParams, metaclass=ABCMeta) """ @since("3.0.0") - def setRawPredictionCol(self, value): + def setRawPredictionCol(self: "P", value: str) -> "P": """ Sets the value of :py:attr:`rawPredictionCol`. """ return self._set(rawPredictionCol=value) - @property + @property # type: ignore[misc] @abstractmethod @since("2.1.0") - def numClasses(self): + def numClasses(self) -> int: """ Number of classes (values which the label can take). """ @@ -166,7 +191,7 @@ def numClasses(self): @abstractmethod @since("3.0.0") - def predictRaw(self, value): + def predictRaw(self, value: Vector) -> Vector: """ Raw prediction for each possible label. """ @@ -191,14 +216,14 @@ class ProbabilisticClassifier(Classifier, _ProbabilisticClassifierParams, metacl """ @since("3.0.0") - def setProbabilityCol(self, value): + def setProbabilityCol(self: "P", value: str) -> "P": """ Sets the value of :py:attr:`probabilityCol`. """ return self._set(probabilityCol=value) @since("3.0.0") - def setThresholds(self, value): + def setThresholds(self: "P", value: List[float]) -> "P": """ Sets the value of :py:attr:`thresholds`. """ @@ -214,14 +239,14 @@ class ProbabilisticClassificationModel( """ @since("3.0.0") - def setProbabilityCol(self, value): + def setProbabilityCol(self: CM, value: str) -> CM: """ Sets the value of :py:attr:`probabilityCol`. """ return self._set(probabilityCol=value) @since("3.0.0") - def setThresholds(self, value): + def setThresholds(self: CM, value: List[float]) -> CM: """ Sets the value of :py:attr:`thresholds`. """ @@ -229,7 +254,7 @@ def setThresholds(self, value): @abstractmethod @since("3.0.0") - def predictProbability(self, value): + def predictProbability(self, value: Vector) -> Vector: """ Predict the probability of each class given the features. """ @@ -237,14 +262,14 @@ def predictProbability(self, value): @inherit_doc -class _JavaClassifier(Classifier, JavaPredictor, metaclass=ABCMeta): +class _JavaClassifier(Classifier, JavaPredictor[JPM], Generic[JPM], metaclass=ABCMeta): """ Java Classifier for classification tasks. Classes are indexed {0, 1, ..., numClasses - 1}. """ @since("3.0.0") - def setRawPredictionCol(self, value): + def setRawPredictionCol(self: "P", value: str) -> "P": """ Sets the value of :py:attr:`rawPredictionCol`. """ @@ -252,23 +277,23 @@ def setRawPredictionCol(self, value): @inherit_doc -class _JavaClassificationModel(ClassificationModel, JavaPredictionModel): +class _JavaClassificationModel(ClassificationModel, JavaPredictionModel[T]): """ Java Model produced by a ``Classifier``. Classes are indexed {0, 1, ..., numClasses - 1}. To be mixed in with :class:`pyspark.ml.JavaModel` """ - @property + @property # type: ignore[misc] @since("2.1.0") - def numClasses(self): + def numClasses(self) -> int: """ Number of classes (values which the label can take). """ return self._call_java("numClasses") @since("3.0.0") - def predictRaw(self, value): + def predictRaw(self, value: Vector) -> Vector: """ Raw prediction for each possible label. """ @@ -276,7 +301,9 @@ def predictRaw(self, value): @inherit_doc -class _JavaProbabilisticClassifier(ProbabilisticClassifier, _JavaClassifier, metaclass=ABCMeta): +class _JavaProbabilisticClassifier( + ProbabilisticClassifier, _JavaClassifier[JPM], Generic[JPM], metaclass=ABCMeta +): """ Java Probabilistic Classifier for classification tasks. """ @@ -286,14 +313,14 @@ class _JavaProbabilisticClassifier(ProbabilisticClassifier, _JavaClassifier, met @inherit_doc class _JavaProbabilisticClassificationModel( - ProbabilisticClassificationModel, _JavaClassificationModel + ProbabilisticClassificationModel, _JavaClassificationModel[T] ): """ Java Model produced by a ``ProbabilisticClassifier``. """ @since("3.0.0") - def predictProbability(self, value): + def predictProbability(self, value: Vector) -> Vector: """ Predict the probability of each class given the features. """ @@ -308,34 +335,34 @@ class _ClassificationSummary(JavaWrapper): .. versionadded:: 3.1.0 """ - @property + @property # type: ignore[misc] @since("3.1.0") - def predictions(self): + def predictions(self) -> DataFrame: """ Dataframe outputted by the model's `transform` method. """ return self._call_java("predictions") - @property + @property # type: ignore[misc] @since("3.1.0") - def predictionCol(self): + def predictionCol(self) -> str: """ Field in "predictions" which gives the prediction of each class. """ return self._call_java("predictionCol") - @property + @property # type: ignore[misc] @since("3.1.0") - def labelCol(self): + def labelCol(self) -> str: """ Field in "predictions" which gives the true label of each instance. """ return self._call_java("labelCol") - @property + @property # type: ignore[misc] @since("3.1.0") - def weightCol(self): + def weightCol(self) -> str: """ Field in "predictions" which gives the weight of each instance as a vector. @@ -343,7 +370,7 @@ def weightCol(self): return self._call_java("weightCol") @property - def labels(self): + def labels(self) -> List[str]: """ Returns the sequence of labels in ascending order. This order matches the order used in metrics which are specified as arrays over labels, e.g., truePositiveRateByLabel. @@ -359,48 +386,48 @@ def labels(self): """ return self._call_java("labels") - @property + @property # type: ignore[misc] @since("3.1.0") - def truePositiveRateByLabel(self): + def truePositiveRateByLabel(self) -> List[float]: """ Returns true positive rate for each label (category). """ return self._call_java("truePositiveRateByLabel") - @property + @property # type: ignore[misc] @since("3.1.0") - def falsePositiveRateByLabel(self): + def falsePositiveRateByLabel(self) -> List[float]: """ Returns false positive rate for each label (category). """ return self._call_java("falsePositiveRateByLabel") - @property + @property # type: ignore[misc] @since("3.1.0") - def precisionByLabel(self): + def precisionByLabel(self) -> List[float]: """ Returns precision for each label (category). """ return self._call_java("precisionByLabel") - @property + @property # type: ignore[misc] @since("3.1.0") - def recallByLabel(self): + def recallByLabel(self) -> List[float]: """ Returns recall for each label (category). """ return self._call_java("recallByLabel") @since("3.1.0") - def fMeasureByLabel(self, beta=1.0): + def fMeasureByLabel(self, beta: float = 1.0) -> List[float]: """ Returns f-measure for each label (category). """ return self._call_java("fMeasureByLabel", beta) - @property + @property # type: ignore[misc] @since("3.1.0") - def accuracy(self): + def accuracy(self) -> float: """ Returns accuracy. (equals to the total number of correctly classified instances @@ -408,42 +435,42 @@ def accuracy(self): """ return self._call_java("accuracy") - @property + @property # type: ignore[misc] @since("3.1.0") - def weightedTruePositiveRate(self): + def weightedTruePositiveRate(self) -> float: """ Returns weighted true positive rate. (equals to precision, recall and f-measure) """ return self._call_java("weightedTruePositiveRate") - @property + @property # type: ignore[misc] @since("3.1.0") - def weightedFalsePositiveRate(self): + def weightedFalsePositiveRate(self) -> float: """ Returns weighted false positive rate. """ return self._call_java("weightedFalsePositiveRate") - @property + @property # type: ignore[misc] @since("3.1.0") - def weightedRecall(self): + def weightedRecall(self) -> float: """ Returns weighted averaged recall. (equals to precision, recall and f-measure) """ return self._call_java("weightedRecall") - @property + @property # type: ignore[misc] @since("3.1.0") - def weightedPrecision(self): + def weightedPrecision(self) -> float: """ Returns weighted averaged precision. """ return self._call_java("weightedPrecision") @since("3.1.0") - def weightedFMeasure(self, beta=1.0): + def weightedFMeasure(self, beta: float = 1.0) -> float: """ Returns weighted averaged f-measure. """ @@ -458,9 +485,9 @@ class _TrainingSummary(JavaWrapper): .. versionadded:: 3.1.0 """ - @property + @property # type: ignore[misc] @since("3.1.0") - def objectiveHistory(self): + def objectiveHistory(self) -> List[float]: """ Objective function (scaled loss + regularization) at each iteration. It contains one more element, the initial state, @@ -468,9 +495,9 @@ def objectiveHistory(self): """ return self._call_java("objectiveHistory") - @property + @property # type: ignore[misc] @since("3.1.0") - def totalIterations(self): + def totalIterations(self) -> int: """ Number of training iterations until termination. """ @@ -485,9 +512,9 @@ class _BinaryClassificationSummary(_ClassificationSummary): .. versionadded:: 3.1.0 """ - @property + @property # type: ignore[misc] @since("3.1.0") - def scoreCol(self): + def scoreCol(self) -> str: """ Field in "predictions" which gives the probability or raw prediction of each class as a vector. @@ -495,7 +522,7 @@ def scoreCol(self): return self._call_java("scoreCol") @property - def roc(self): + def roc(self) -> DataFrame: """ Returns the receiver operating characteristic (ROC) curve, which is a Dataframe having two fields (FPR, TPR) with @@ -509,18 +536,18 @@ def roc(self): """ return self._call_java("roc") - @property + @property # type: ignore[misc] @since("3.1.0") - def areaUnderROC(self): + def areaUnderROC(self) -> float: """ Computes the area under the receiver operating characteristic (ROC) curve. """ return self._call_java("areaUnderROC") - @property + @property # type: ignore[misc] @since("3.1.0") - def pr(self): + def pr(self) -> DataFrame: """ Returns the precision-recall curve, which is a Dataframe containing two fields recall, precision with (0.0, 1.0) prepended @@ -528,18 +555,18 @@ def pr(self): """ return self._call_java("pr") - @property + @property # type: ignore[misc] @since("3.1.0") - def fMeasureByThreshold(self): + def fMeasureByThreshold(self) -> DataFrame: """ Returns a dataframe with two fields (threshold, F-Measure) curve with beta = 1.0. """ return self._call_java("fMeasureByThreshold") - @property + @property # type: ignore[misc] @since("3.1.0") - def precisionByThreshold(self): + def precisionByThreshold(self) -> DataFrame: """ Returns a dataframe with two fields (threshold, precision) curve. Every possible probability obtained in transforming the dataset @@ -547,9 +574,9 @@ def precisionByThreshold(self): """ return self._call_java("precisionByThreshold") - @property + @property # type: ignore[misc] @since("3.1.0") - def recallByThreshold(self): + def recallByThreshold(self) -> DataFrame: """ Returns a dataframe with two fields (threshold, recall) curve. Every possible probability obtained in transforming the dataset @@ -576,7 +603,7 @@ class _LinearSVCParams( .. versionadded:: 3.0.0 """ - threshold = Param( + threshold: Param[float] = Param( Params._dummy(), "threshold", "The threshold in binary classification applied to the linear model" @@ -585,7 +612,7 @@ class _LinearSVCParams( typeConverter=TypeConverters.toFloat, ) - def __init__(self, *args): + def __init__(self, *args: Any) -> None: super(_LinearSVCParams, self).__init__(*args) self._setDefault( maxIter=100, @@ -600,7 +627,12 @@ def __init__(self, *args): @inherit_doc -class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadable): +class LinearSVC( + _JavaClassifier["LinearSVCModel"], + _LinearSVCParams, + JavaMLWritable, + JavaMLReadable["LinearSVC"], +): """ This binary classifier optimizes the Hinge Loss using the OWLQN optimizer. Only supports L2 regularization currently. @@ -676,23 +708,25 @@ class LinearSVC(_JavaClassifier, _LinearSVCParams, JavaMLWritable, JavaMLReadabl True """ + _input_kwargs: Dict[str, Any] + @keyword_only def __init__( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - maxIter=100, - regParam=0.0, - tol=1e-6, - rawPredictionCol="rawPrediction", - fitIntercept=True, - standardization=True, - threshold=0.0, - weightCol=None, - aggregationDepth=2, - maxBlockSizeInMB=0.0, + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + maxIter: int = 100, + regParam: float = 0.0, + tol: float = 1e-6, + rawPredictionCol: str = "rawPrediction", + fitIntercept: bool = True, + standardization: bool = True, + threshold: float = 0.0, + weightCol: Optional[str] = None, + aggregationDepth: int = 2, + maxBlockSizeInMB: float = 0.0, ): """ __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ @@ -712,20 +746,20 @@ def __init__( def setParams( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - maxIter=100, - regParam=0.0, - tol=1e-6, - rawPredictionCol="rawPrediction", - fitIntercept=True, - standardization=True, - threshold=0.0, - weightCol=None, - aggregationDepth=2, - maxBlockSizeInMB=0.0, - ): + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + maxIter: int = 100, + regParam: float = 0.0, + tol: float = 1e-6, + rawPredictionCol: str = "rawPrediction", + fitIntercept: bool = True, + standardization: bool = True, + threshold: float = 0.0, + weightCol: Optional[str] = None, + aggregationDepth: int = 2, + maxBlockSizeInMB: float = 0.0, + ) -> "LinearSVC": """ setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ maxIter=100, regParam=0.0, tol=1e-6, rawPredictionCol="rawPrediction", \ @@ -736,67 +770,67 @@ def setParams( kwargs = self._input_kwargs return self._set(**kwargs) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "LinearSVCModel": return LinearSVCModel(java_model) @since("2.2.0") - def setMaxIter(self, value): + def setMaxIter(self, value: int) -> "LinearSVC": """ Sets the value of :py:attr:`maxIter`. """ return self._set(maxIter=value) @since("2.2.0") - def setRegParam(self, value): + def setRegParam(self, value: float) -> "LinearSVC": """ Sets the value of :py:attr:`regParam`. """ return self._set(regParam=value) @since("2.2.0") - def setTol(self, value): + def setTol(self, value: float) -> "LinearSVC": """ Sets the value of :py:attr:`tol`. """ return self._set(tol=value) @since("2.2.0") - def setFitIntercept(self, value): + def setFitIntercept(self, value: bool) -> "LinearSVC": """ Sets the value of :py:attr:`fitIntercept`. """ return self._set(fitIntercept=value) @since("2.2.0") - def setStandardization(self, value): + def setStandardization(self, value: bool) -> "LinearSVC": """ Sets the value of :py:attr:`standardization`. """ return self._set(standardization=value) @since("2.2.0") - def setThreshold(self, value): + def setThreshold(self, value: float) -> "LinearSVC": """ Sets the value of :py:attr:`threshold`. """ return self._set(threshold=value) @since("2.2.0") - def setWeightCol(self, value): + def setWeightCol(self, value: str) -> "LinearSVC": """ Sets the value of :py:attr:`weightCol`. """ return self._set(weightCol=value) @since("2.2.0") - def setAggregationDepth(self, value): + def setAggregationDepth(self, value: int) -> "LinearSVC": """ Sets the value of :py:attr:`aggregationDepth`. """ return self._set(aggregationDepth=value) @since("3.1.0") - def setMaxBlockSizeInMB(self, value): + def setMaxBlockSizeInMB(self, value: float) -> "LinearSVC": """ Sets the value of :py:attr:`maxBlockSizeInMB`. """ @@ -804,7 +838,11 @@ def setMaxBlockSizeInMB(self, value): class LinearSVCModel( - _JavaClassificationModel, _LinearSVCParams, JavaMLWritable, JavaMLReadable, HasTrainingSummary + _JavaClassificationModel[Vector], + _LinearSVCParams, + JavaMLWritable, + JavaMLReadable["LinearSVCModel"], + HasTrainingSummary["LinearSVCTrainingSummary"], ): """ Model fitted by LinearSVC. @@ -813,30 +851,30 @@ class LinearSVCModel( """ @since("3.0.0") - def setThreshold(self, value): + def setThreshold(self, value: float) -> "LinearSVCModel": """ Sets the value of :py:attr:`threshold`. """ return self._set(threshold=value) - @property + @property # type: ignore[misc] @since("2.2.0") - def coefficients(self): + def coefficients(self) -> Vector: """ Model coefficients of Linear SVM Classifier. """ return self._call_java("coefficients") - @property + @property # type: ignore[misc] @since("2.2.0") - def intercept(self): + def intercept(self) -> float: """ Model intercept of Linear SVM Classifier. """ return self._call_java("intercept") @since("3.1.0") - def summary(self): + def summary(self) -> "LinearSVCTrainingSummary": """ Gets summary (accuracy/precision/recall, objective history, total iterations) of model trained on the training set. An exception is thrown if `trainingSummary is None`. @@ -848,7 +886,7 @@ def summary(self): "No training summary available for this %s" % self.__class__.__name__ ) - def evaluate(self, dataset): + def evaluate(self, dataset: DataFrame) -> "LinearSVCSummary": """ Evaluates the model on a test dataset. @@ -905,7 +943,7 @@ class _LogisticRegressionParams( .. versionadded:: 3.0.0 """ - threshold = Param( + threshold: Param[float] = Param( Params._dummy(), "threshold", "Threshold in binary classification prediction, in range [0, 1]." @@ -914,7 +952,7 @@ class _LogisticRegressionParams( typeConverter=TypeConverters.toFloat, ) - family = Param( + family: Param[str] = Param( Params._dummy(), "family", "The name of family which is a description of the label distribution to " @@ -922,7 +960,7 @@ class _LogisticRegressionParams( typeConverter=TypeConverters.toString, ) - lowerBoundsOnCoefficients = Param( + lowerBoundsOnCoefficients: Param[Matrix] = Param( Params._dummy(), "lowerBoundsOnCoefficients", "The lower bounds on coefficients if fitting under bound " @@ -934,7 +972,7 @@ class _LogisticRegressionParams( typeConverter=TypeConverters.toMatrix, ) - upperBoundsOnCoefficients = Param( + upperBoundsOnCoefficients: Param[Matrix] = Param( Params._dummy(), "upperBoundsOnCoefficients", "The upper bounds on coefficients if fitting under bound " @@ -946,7 +984,7 @@ class _LogisticRegressionParams( typeConverter=TypeConverters.toMatrix, ) - lowerBoundsOnIntercepts = Param( + lowerBoundsOnIntercepts: Param[Vector] = Param( Params._dummy(), "lowerBoundsOnIntercepts", "The lower bounds on intercepts if fitting under bound " @@ -956,7 +994,7 @@ class _LogisticRegressionParams( typeConverter=TypeConverters.toVector, ) - upperBoundsOnIntercepts = Param( + upperBoundsOnIntercepts: Param[Vector] = Param( Params._dummy(), "upperBoundsOnIntercepts", "The upper bounds on intercepts if fitting under bound " @@ -966,24 +1004,24 @@ class _LogisticRegressionParams( typeConverter=TypeConverters.toVector, ) - def __init__(self, *args): + def __init__(self, *args: Any): super(_LogisticRegressionParams, self).__init__(*args) self._setDefault( maxIter=100, regParam=0.0, tol=1e-6, threshold=0.5, family="auto", maxBlockSizeInMB=0.0 ) @since("1.4.0") - def setThreshold(self, value): + def setThreshold(self: "P", value: float) -> "P": """ Sets the value of :py:attr:`threshold`. Clears value of :py:attr:`thresholds` if it has been set. """ self._set(threshold=value) - self.clear(self.thresholds) + self.clear(self.thresholds) # type: ignore[attr-defined] return self @since("1.4.0") - def getThreshold(self): + def getThreshold(self) -> float: """ Get threshold for binary classification. @@ -1006,17 +1044,17 @@ def getThreshold(self): return self.getOrDefault(self.threshold) @since("1.5.0") - def setThresholds(self, value): + def setThresholds(self: "P", value: List[float]) -> "P": """ Sets the value of :py:attr:`thresholds`. Clears value of :py:attr:`threshold` if it has been set. """ self._set(thresholds=value) - self.clear(self.threshold) + self.clear(self.threshold) # type: ignore[attr-defined] return self @since("1.5.0") - def getThresholds(self): + def getThresholds(self) -> List[float]: """ If :py:attr:`thresholds` is set, return its value. Otherwise, if :py:attr:`threshold` is set, return the equivalent thresholds for binary @@ -1030,7 +1068,7 @@ def getThresholds(self): else: return self.getOrDefault(self.thresholds) - def _checkThresholdConsistency(self): + def _checkThresholdConsistency(self) -> None: if self.isSet(self.threshold) and self.isSet(self.thresholds): ts = self.getOrDefault(self.thresholds) if len(ts) != 2: @@ -1048,35 +1086,35 @@ def _checkThresholdConsistency(self): ) @since("2.1.0") - def getFamily(self): + def getFamily(self) -> str: """ Gets the value of :py:attr:`family` or its default value. """ return self.getOrDefault(self.family) @since("2.3.0") - def getLowerBoundsOnCoefficients(self): + def getLowerBoundsOnCoefficients(self) -> Matrix: """ Gets the value of :py:attr:`lowerBoundsOnCoefficients` """ return self.getOrDefault(self.lowerBoundsOnCoefficients) @since("2.3.0") - def getUpperBoundsOnCoefficients(self): + def getUpperBoundsOnCoefficients(self) -> Matrix: """ Gets the value of :py:attr:`upperBoundsOnCoefficients` """ return self.getOrDefault(self.upperBoundsOnCoefficients) @since("2.3.0") - def getLowerBoundsOnIntercepts(self): + def getLowerBoundsOnIntercepts(self) -> Vector: """ Gets the value of :py:attr:`lowerBoundsOnIntercepts` """ return self.getOrDefault(self.lowerBoundsOnIntercepts) @since("2.3.0") - def getUpperBoundsOnIntercepts(self): + def getUpperBoundsOnIntercepts(self) -> Vector: """ Gets the value of :py:attr:`upperBoundsOnIntercepts` """ @@ -1085,7 +1123,10 @@ def getUpperBoundsOnIntercepts(self): @inherit_doc class LogisticRegression( - _JavaProbabilisticClassifier, _LogisticRegressionParams, JavaMLWritable, JavaMLReadable + _JavaProbabilisticClassifier["LogisticRegressionModel"], + _LogisticRegressionParams, + JavaMLWritable, + JavaMLReadable["LogisticRegression"], ): """ Logistic regression. @@ -1180,33 +1221,88 @@ class LogisticRegression( True """ - @keyword_only + _input_kwargs: Dict[str, Any] + + @overload + def __init__( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxIter: int = ..., + regParam: float = ..., + elasticNetParam: float = ..., + tol: float = ..., + fitIntercept: bool = ..., + threshold: float = ..., + probabilityCol: str = ..., + rawPredictionCol: str = ..., + standardization: bool = ..., + weightCol: Optional[str] = ..., + aggregationDepth: int = ..., + family: str = ..., + lowerBoundsOnCoefficients: Optional[Matrix] = ..., + upperBoundsOnCoefficients: Optional[Matrix] = ..., + lowerBoundsOnIntercepts: Optional[Vector] = ..., + upperBoundsOnIntercepts: Optional[Vector] = ..., + maxBlockSizeInMB: float = ..., + ): + ... + + @overload def __init__( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - maxIter=100, - regParam=0.0, - elasticNetParam=0.0, - tol=1e-6, - fitIntercept=True, - threshold=0.5, - thresholds=None, - probabilityCol="probability", - rawPredictionCol="rawPrediction", - standardization=True, - weightCol=None, - aggregationDepth=2, - family="auto", - lowerBoundsOnCoefficients=None, - upperBoundsOnCoefficients=None, - lowerBoundsOnIntercepts=None, - upperBoundsOnIntercepts=None, - maxBlockSizeInMB=0.0, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxIter: int = ..., + regParam: float = ..., + elasticNetParam: float = ..., + tol: float = ..., + fitIntercept: bool = ..., + thresholds: Optional[List[float]] = ..., + probabilityCol: str = ..., + rawPredictionCol: str = ..., + standardization: bool = ..., + weightCol: Optional[str] = ..., + aggregationDepth: int = ..., + family: str = ..., + lowerBoundsOnCoefficients: Optional[Matrix] = ..., + upperBoundsOnCoefficients: Optional[Matrix] = ..., + lowerBoundsOnIntercepts: Optional[Vector] = ..., + upperBoundsOnIntercepts: Optional[Vector] = ..., + maxBlockSizeInMB: float = ..., ): + ... + @keyword_only + def __init__( + self, + *, + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + maxIter: int = 100, + regParam: float = 0.0, + elasticNetParam: float = 0.0, + tol: float = 1e-6, + fitIntercept: bool = True, + threshold: float = 0.5, + thresholds: Optional[List[float]] = None, + probabilityCol: str = "probability", + rawPredictionCol: str = "rawPrediction", + standardization: bool = True, + weightCol: Optional[str] = None, + aggregationDepth: int = 2, + family: str = "auto", + lowerBoundsOnCoefficients: Optional[Matrix] = None, + upperBoundsOnCoefficients: Optional[Matrix] = None, + lowerBoundsOnIntercepts: Optional[Vector] = None, + upperBoundsOnIntercepts: Optional[Vector] = None, + maxBlockSizeInMB: float = 0.0, + ): """ __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \ @@ -1226,33 +1322,87 @@ def __init__( self.setParams(**kwargs) self._checkThresholdConsistency() + @overload + def setParams( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxIter: int = ..., + regParam: float = ..., + elasticNetParam: float = ..., + tol: float = ..., + fitIntercept: bool = ..., + threshold: float = ..., + probabilityCol: str = ..., + rawPredictionCol: str = ..., + standardization: bool = ..., + weightCol: Optional[str] = ..., + aggregationDepth: int = ..., + family: str = ..., + lowerBoundsOnCoefficients: Optional[Matrix] = ..., + upperBoundsOnCoefficients: Optional[Matrix] = ..., + lowerBoundsOnIntercepts: Optional[Vector] = ..., + upperBoundsOnIntercepts: Optional[Vector] = ..., + maxBlockSizeInMB: float = ..., + ) -> "LogisticRegression": + ... + + @overload + def setParams( + self, + *, + featuresCol: str = ..., + labelCol: str = ..., + predictionCol: str = ..., + maxIter: int = ..., + regParam: float = ..., + elasticNetParam: float = ..., + tol: float = ..., + fitIntercept: bool = ..., + thresholds: Optional[List[float]] = ..., + probabilityCol: str = ..., + rawPredictionCol: str = ..., + standardization: bool = ..., + weightCol: Optional[str] = ..., + aggregationDepth: int = ..., + family: str = ..., + lowerBoundsOnCoefficients: Optional[Matrix] = ..., + upperBoundsOnCoefficients: Optional[Matrix] = ..., + lowerBoundsOnIntercepts: Optional[Vector] = ..., + upperBoundsOnIntercepts: Optional[Vector] = ..., + maxBlockSizeInMB: float = ..., + ) -> "LogisticRegression": + ... + @keyword_only @since("1.3.0") def setParams( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - maxIter=100, - regParam=0.0, - elasticNetParam=0.0, - tol=1e-6, - fitIntercept=True, - threshold=0.5, - thresholds=None, - probabilityCol="probability", - rawPredictionCol="rawPrediction", - standardization=True, - weightCol=None, - aggregationDepth=2, - family="auto", - lowerBoundsOnCoefficients=None, - upperBoundsOnCoefficients=None, - lowerBoundsOnIntercepts=None, - upperBoundsOnIntercepts=None, - maxBlockSizeInMB=0.0, - ): + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + maxIter: int = 100, + regParam: float = 0.0, + elasticNetParam: float = 0.0, + tol: float = 1e-6, + fitIntercept: bool = True, + threshold: float = 0.5, + thresholds: Optional[List[float]] = None, + probabilityCol: str = "probability", + rawPredictionCol: str = "rawPrediction", + standardization: bool = True, + weightCol: Optional[str] = None, + aggregationDepth: int = 2, + family: str = "auto", + lowerBoundsOnCoefficients: Optional[Matrix] = None, + upperBoundsOnCoefficients: Optional[Matrix] = None, + lowerBoundsOnIntercepts: Optional[Vector] = None, + upperBoundsOnIntercepts: Optional[Vector] = None, + maxBlockSizeInMB: float = 0.0, + ) -> "LogisticRegression": """ setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ maxIter=100, regParam=0.0, elasticNetParam=0.0, tol=1e-6, fitIntercept=True, \ @@ -1270,94 +1420,94 @@ def setParams( self._checkThresholdConsistency() return self - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "LogisticRegressionModel": return LogisticRegressionModel(java_model) @since("2.1.0") - def setFamily(self, value): + def setFamily(self, value: str) -> "LogisticRegression": """ Sets the value of :py:attr:`family`. """ return self._set(family=value) @since("2.3.0") - def setLowerBoundsOnCoefficients(self, value): + def setLowerBoundsOnCoefficients(self, value: Matrix) -> "LogisticRegression": """ Sets the value of :py:attr:`lowerBoundsOnCoefficients` """ return self._set(lowerBoundsOnCoefficients=value) @since("2.3.0") - def setUpperBoundsOnCoefficients(self, value): + def setUpperBoundsOnCoefficients(self, value: Matrix) -> "LogisticRegression": """ Sets the value of :py:attr:`upperBoundsOnCoefficients` """ return self._set(upperBoundsOnCoefficients=value) @since("2.3.0") - def setLowerBoundsOnIntercepts(self, value): + def setLowerBoundsOnIntercepts(self, value: Vector) -> "LogisticRegression": """ Sets the value of :py:attr:`lowerBoundsOnIntercepts` """ return self._set(lowerBoundsOnIntercepts=value) @since("2.3.0") - def setUpperBoundsOnIntercepts(self, value): + def setUpperBoundsOnIntercepts(self, value: Vector) -> "LogisticRegression": """ Sets the value of :py:attr:`upperBoundsOnIntercepts` """ return self._set(upperBoundsOnIntercepts=value) - def setMaxIter(self, value): + def setMaxIter(self, value: int) -> "LogisticRegression": """ Sets the value of :py:attr:`maxIter`. """ return self._set(maxIter=value) - def setRegParam(self, value): + def setRegParam(self, value: float) -> "LogisticRegression": """ Sets the value of :py:attr:`regParam`. """ return self._set(regParam=value) - def setTol(self, value): + def setTol(self, value: float) -> "LogisticRegression": """ Sets the value of :py:attr:`tol`. """ return self._set(tol=value) - def setElasticNetParam(self, value): + def setElasticNetParam(self, value: float) -> "LogisticRegression": """ Sets the value of :py:attr:`elasticNetParam`. """ return self._set(elasticNetParam=value) - def setFitIntercept(self, value): + def setFitIntercept(self, value: bool) -> "LogisticRegression": """ Sets the value of :py:attr:`fitIntercept`. """ return self._set(fitIntercept=value) - def setStandardization(self, value): + def setStandardization(self, value: bool) -> "LogisticRegression": """ Sets the value of :py:attr:`standardization`. """ return self._set(standardization=value) - def setWeightCol(self, value): + def setWeightCol(self, value: str) -> "LogisticRegression": """ Sets the value of :py:attr:`weightCol`. """ return self._set(weightCol=value) - def setAggregationDepth(self, value): + def setAggregationDepth(self, value: int) -> "LogisticRegression": """ Sets the value of :py:attr:`aggregationDepth`. """ return self._set(aggregationDepth=value) @since("3.1.0") - def setMaxBlockSizeInMB(self, value): + def setMaxBlockSizeInMB(self, value: float) -> "LogisticRegression": """ Sets the value of :py:attr:`maxBlockSizeInMB`. """ @@ -1365,11 +1515,11 @@ def setMaxBlockSizeInMB(self, value): class LogisticRegressionModel( - _JavaProbabilisticClassificationModel, + _JavaProbabilisticClassificationModel[Vector], _LogisticRegressionParams, JavaMLWritable, - JavaMLReadable, - HasTrainingSummary, + JavaMLReadable["LogisticRegressionModel"], + HasTrainingSummary["LogisticRegressionTrainingSummary"], ): """ Model fitted by LogisticRegression. @@ -1377,43 +1527,43 @@ class LogisticRegressionModel( .. versionadded:: 1.3.0 """ - @property + @property # type: ignore[misc] @since("2.0.0") - def coefficients(self): + def coefficients(self) -> Vector: """ Model coefficients of binomial logistic regression. An exception is thrown in the case of multinomial logistic regression. """ return self._call_java("coefficients") - @property + @property # type: ignore[misc] @since("1.4.0") - def intercept(self): + def intercept(self) -> float: """ Model intercept of binomial logistic regression. An exception is thrown in the case of multinomial logistic regression. """ return self._call_java("intercept") - @property + @property # type: ignore[misc] @since("2.1.0") - def coefficientMatrix(self): + def coefficientMatrix(self) -> Matrix: """ Model coefficients. """ return self._call_java("coefficientMatrix") - @property + @property # type: ignore[misc] @since("2.1.0") - def interceptVector(self): + def interceptVector(self) -> Vector: """ Model intercept. """ return self._call_java("interceptVector") - @property + @property # type: ignore[misc] @since("2.0.0") - def summary(self): + def summary(self) -> "LogisticRegressionTrainingSummary": """ Gets summary (accuracy/precision/recall, objective history, total iterations) of model trained on the training set. An exception is thrown if `trainingSummary is None`. @@ -1432,7 +1582,7 @@ def summary(self): "No training summary available for this %s" % self.__class__.__name__ ) - def evaluate(self, dataset): + def evaluate(self, dataset: DataFrame) -> "LogisticRegressionSummary": """ Evaluates the model on a test dataset. @@ -1459,18 +1609,18 @@ class LogisticRegressionSummary(_ClassificationSummary): .. versionadded:: 2.0.0 """ - @property + @property # type: ignore[misc] @since("2.0.0") - def probabilityCol(self): + def probabilityCol(self) -> str: """ Field in "predictions" which gives the probability of each class as a vector. """ return self._call_java("probabilityCol") - @property + @property # type: ignore[misc] @since("2.0.0") - def featuresCol(self): + def featuresCol(self) -> str: """ Field in "predictions" which gives the features of each instance as a vector. @@ -1519,7 +1669,7 @@ class _DecisionTreeClassifierParams(_DecisionTreeParams, _TreeClassifierParams): Params for :py:class:`DecisionTreeClassifier` and :py:class:`DecisionTreeClassificationModel`. """ - def __init__(self, *args): + def __init__(self, *args: Any): super(_DecisionTreeClassifierParams, self).__init__(*args) self._setDefault( maxDepth=5, @@ -1537,7 +1687,10 @@ def __init__(self, *args): @inherit_doc class DecisionTreeClassifier( - _JavaProbabilisticClassifier, _DecisionTreeClassifierParams, JavaMLWritable, JavaMLReadable + _JavaProbabilisticClassifier["DecisionTreeClassificationModel"], + _DecisionTreeClassifierParams, + JavaMLWritable, + JavaMLReadable["DecisionTreeClassifier"], ): """ `Decision tree `_ @@ -1619,27 +1772,29 @@ class DecisionTreeClassifier( DecisionTreeClassificationModel...depth=1, numNodes=3... """ + _input_kwargs: Dict[str, Any] + @keyword_only def __init__( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - probabilityCol="probability", - rawPredictionCol="rawPrediction", - maxDepth=5, - maxBins=32, - minInstancesPerNode=1, - minInfoGain=0.0, - maxMemoryInMB=256, - cacheNodeIds=False, - checkpointInterval=10, - impurity="gini", - seed=None, - weightCol=None, - leafCol="", - minWeightFractionPerNode=0.0, + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + probabilityCol: str = "probability", + rawPredictionCol: str = "rawPrediction", + maxDepth: int = 5, + maxBins: int = 32, + minInstancesPerNode: int = 1, + minInfoGain: float = 0.0, + maxMemoryInMB: int = 256, + cacheNodeIds: bool = False, + checkpointInterval: int = 10, + impurity: str = "gini", + seed: Optional[int] = None, + weightCol: Optional[str] = None, + leafCol: str = "", + minWeightFractionPerNode: float = 0.0, ): """ __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ @@ -1660,24 +1815,24 @@ def __init__( def setParams( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - probabilityCol="probability", - rawPredictionCol="rawPrediction", - maxDepth=5, - maxBins=32, - minInstancesPerNode=1, - minInfoGain=0.0, - maxMemoryInMB=256, - cacheNodeIds=False, - checkpointInterval=10, - impurity="gini", - seed=None, - weightCol=None, - leafCol="", - minWeightFractionPerNode=0.0, - ): + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + probabilityCol: str = "probability", + rawPredictionCol: str = "rawPrediction", + maxDepth: int = 5, + maxBins: int = 32, + minInstancesPerNode: int = 1, + minInfoGain: float = 0.0, + maxMemoryInMB: int = 256, + cacheNodeIds: bool = False, + checkpointInterval: int = 10, + impurity: str = "gini", + seed: Optional[int] = None, + weightCol: Optional[str] = None, + leafCol: str = "", + minWeightFractionPerNode: float = 0.0, + ) -> "DecisionTreeClassifier": """ setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ probabilityCol="probability", rawPredictionCol="rawPrediction", \ @@ -1689,74 +1844,74 @@ def setParams( kwargs = self._input_kwargs return self._set(**kwargs) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "DecisionTreeClassificationModel": return DecisionTreeClassificationModel(java_model) - def setMaxDepth(self, value): + def setMaxDepth(self, value: int) -> "DecisionTreeClassifier": """ Sets the value of :py:attr:`maxDepth`. """ return self._set(maxDepth=value) - def setMaxBins(self, value): + def setMaxBins(self, value: int) -> "DecisionTreeClassifier": """ Sets the value of :py:attr:`maxBins`. """ return self._set(maxBins=value) - def setMinInstancesPerNode(self, value): + def setMinInstancesPerNode(self, value: int) -> "DecisionTreeClassifier": """ Sets the value of :py:attr:`minInstancesPerNode`. """ return self._set(minInstancesPerNode=value) @since("3.0.0") - def setMinWeightFractionPerNode(self, value): + def setMinWeightFractionPerNode(self, value: float) -> "DecisionTreeClassifier": """ Sets the value of :py:attr:`minWeightFractionPerNode`. """ return self._set(minWeightFractionPerNode=value) - def setMinInfoGain(self, value): + def setMinInfoGain(self, value: float) -> "DecisionTreeClassifier": """ Sets the value of :py:attr:`minInfoGain`. """ return self._set(minInfoGain=value) - def setMaxMemoryInMB(self, value): + def setMaxMemoryInMB(self, value: int) -> "DecisionTreeClassifier": """ Sets the value of :py:attr:`maxMemoryInMB`. """ return self._set(maxMemoryInMB=value) - def setCacheNodeIds(self, value): + def setCacheNodeIds(self, value: bool) -> "DecisionTreeClassifier": """ Sets the value of :py:attr:`cacheNodeIds`. """ return self._set(cacheNodeIds=value) @since("1.4.0") - def setImpurity(self, value): + def setImpurity(self, value: str) -> "DecisionTreeClassifier": """ Sets the value of :py:attr:`impurity`. """ return self._set(impurity=value) @since("1.4.0") - def setCheckpointInterval(self, value): + def setCheckpointInterval(self, value: int) -> "DecisionTreeClassifier": """ Sets the value of :py:attr:`checkpointInterval`. """ return self._set(checkpointInterval=value) - def setSeed(self, value): + def setSeed(self, value: int) -> "DecisionTreeClassifier": """ Sets the value of :py:attr:`seed`. """ return self._set(seed=value) @since("3.0.0") - def setWeightCol(self, value): + def setWeightCol(self, value: str) -> "DecisionTreeClassifier": """ Sets the value of :py:attr:`weightCol`. """ @@ -1766,10 +1921,10 @@ def setWeightCol(self, value): @inherit_doc class DecisionTreeClassificationModel( _DecisionTreeModel, - _JavaProbabilisticClassificationModel, + _JavaProbabilisticClassificationModel[Vector], _DecisionTreeClassifierParams, JavaMLWritable, - JavaMLReadable, + JavaMLReadable["DecisionTreeClassificationModel"], ): """ Model fitted by DecisionTreeClassifier. @@ -1778,7 +1933,7 @@ class DecisionTreeClassificationModel( """ @property - def featureImportances(self): + def featureImportances(self) -> Vector: """ Estimate of the importance of each feature. @@ -1808,7 +1963,7 @@ class _RandomForestClassifierParams(_RandomForestParams, _TreeClassifierParams): Params for :py:class:`RandomForestClassifier` and :py:class:`RandomForestClassificationModel`. """ - def __init__(self, *args): + def __init__(self, *args: Any): super(_RandomForestClassifierParams, self).__init__(*args) self._setDefault( maxDepth=5, @@ -1830,7 +1985,10 @@ def __init__(self, *args): @inherit_doc class RandomForestClassifier( - _JavaProbabilisticClassifier, _RandomForestClassifierParams, JavaMLWritable, JavaMLReadable + _JavaProbabilisticClassifier["RandomForestClassificationModel"], + _RandomForestClassifierParams, + JavaMLWritable, + JavaMLReadable["RandomForestClassifier"], ): """ `Random Forest `_ @@ -1906,31 +2064,33 @@ class RandomForestClassifier( True """ + _input_kwargs: Dict[str, Any] + @keyword_only def __init__( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - probabilityCol="probability", - rawPredictionCol="rawPrediction", - maxDepth=5, - maxBins=32, - minInstancesPerNode=1, - minInfoGain=0.0, - maxMemoryInMB=256, - cacheNodeIds=False, - checkpointInterval=10, - impurity="gini", - numTrees=20, - featureSubsetStrategy="auto", - seed=None, - subsamplingRate=1.0, - leafCol="", - minWeightFractionPerNode=0.0, - weightCol=None, - bootstrap=True, + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + probabilityCol: str = "probability", + rawPredictionCol: str = "rawPrediction", + maxDepth: int = 5, + maxBins: int = 32, + minInstancesPerNode: int = 1, + minInfoGain: float = 0.0, + maxMemoryInMB: int = 256, + cacheNodeIds: bool = False, + checkpointInterval: int = 10, + impurity: str = "gini", + numTrees: int = 20, + featureSubsetStrategy: str = "auto", + seed: Optional[int] = None, + subsamplingRate: float = 1.0, + leafCol: str = "", + minWeightFractionPerNode: float = 0.0, + weightCol: Optional[str] = None, + bootstrap: Optional[bool] = True, ): """ __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ @@ -1952,28 +2112,28 @@ def __init__( def setParams( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - probabilityCol="probability", - rawPredictionCol="rawPrediction", - maxDepth=5, - maxBins=32, - minInstancesPerNode=1, - minInfoGain=0.0, - maxMemoryInMB=256, - cacheNodeIds=False, - checkpointInterval=10, - seed=None, - impurity="gini", - numTrees=20, - featureSubsetStrategy="auto", - subsamplingRate=1.0, - leafCol="", - minWeightFractionPerNode=0.0, - weightCol=None, - bootstrap=True, - ): + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + probabilityCol: str = "probability", + rawPredictionCol: str = "rawPrediction", + maxDepth: int = 5, + maxBins: int = 32, + minInstancesPerNode: int = 1, + minInfoGain: float = 0.0, + maxMemoryInMB: int = 256, + cacheNodeIds: bool = False, + checkpointInterval: int = 10, + impurity: str = "gini", + numTrees: int = 20, + featureSubsetStrategy: str = "auto", + seed: Optional[int] = None, + subsamplingRate: float = 1.0, + leafCol: str = "", + minWeightFractionPerNode: float = 0.0, + weightCol: Optional[str] = None, + bootstrap: Optional[bool] = True, + ) -> "RandomForestClassifier": """ setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \ probabilityCol="probability", rawPredictionCol="rawPrediction", \ @@ -1986,101 +2146,101 @@ def setParams( kwargs = self._input_kwargs return self._set(**kwargs) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "RandomForestClassificationModel": return RandomForestClassificationModel(java_model) - def setMaxDepth(self, value): + def setMaxDepth(self, value: int) -> "RandomForestClassifier": """ Sets the value of :py:attr:`maxDepth`. """ return self._set(maxDepth=value) - def setMaxBins(self, value): + def setMaxBins(self, value: int) -> "RandomForestClassifier": """ Sets the value of :py:attr:`maxBins`. """ return self._set(maxBins=value) - def setMinInstancesPerNode(self, value): + def setMinInstancesPerNode(self, value: int) -> "RandomForestClassifier": """ Sets the value of :py:attr:`minInstancesPerNode`. """ return self._set(minInstancesPerNode=value) - def setMinInfoGain(self, value): + def setMinInfoGain(self, value: float) -> "RandomForestClassifier": """ Sets the value of :py:attr:`minInfoGain`. """ return self._set(minInfoGain=value) - def setMaxMemoryInMB(self, value): + def setMaxMemoryInMB(self, value: int) -> "RandomForestClassifier": """ Sets the value of :py:attr:`maxMemoryInMB`. """ return self._set(maxMemoryInMB=value) - def setCacheNodeIds(self, value): + def setCacheNodeIds(self, value: bool) -> "RandomForestClassifier": """ Sets the value of :py:attr:`cacheNodeIds`. """ return self._set(cacheNodeIds=value) @since("1.4.0") - def setImpurity(self, value): + def setImpurity(self, value: str) -> "RandomForestClassifier": """ Sets the value of :py:attr:`impurity`. """ return self._set(impurity=value) @since("1.4.0") - def setNumTrees(self, value): + def setNumTrees(self, value: int) -> "RandomForestClassifier": """ Sets the value of :py:attr:`numTrees`. """ return self._set(numTrees=value) @since("3.0.0") - def setBootstrap(self, value): + def setBootstrap(self, value: bool) -> "RandomForestClassifier": """ Sets the value of :py:attr:`bootstrap`. """ return self._set(bootstrap=value) @since("1.4.0") - def setSubsamplingRate(self, value): + def setSubsamplingRate(self, value: float) -> "RandomForestClassifier": """ Sets the value of :py:attr:`subsamplingRate`. """ return self._set(subsamplingRate=value) @since("2.4.0") - def setFeatureSubsetStrategy(self, value): + def setFeatureSubsetStrategy(self, value: str) -> "RandomForestClassifier": """ Sets the value of :py:attr:`featureSubsetStrategy`. """ return self._set(featureSubsetStrategy=value) - def setSeed(self, value): + def setSeed(self, value: int) -> "RandomForestClassifier": """ Sets the value of :py:attr:`seed`. """ return self._set(seed=value) - def setCheckpointInterval(self, value): + def setCheckpointInterval(self, value: int) -> "RandomForestClassifier": """ Sets the value of :py:attr:`checkpointInterval`. """ return self._set(checkpointInterval=value) @since("3.0.0") - def setWeightCol(self, value): + def setWeightCol(self, value: str) -> "RandomForestClassifier": """ Sets the value of :py:attr:`weightCol`. """ return self._set(weightCol=value) @since("3.0.0") - def setMinWeightFractionPerNode(self, value): + def setMinWeightFractionPerNode(self, value: float) -> "RandomForestClassifier": """ Sets the value of :py:attr:`minWeightFractionPerNode`. """ @@ -2089,11 +2249,11 @@ def setMinWeightFractionPerNode(self, value): class RandomForestClassificationModel( _TreeEnsembleModel, - _JavaProbabilisticClassificationModel, + _JavaProbabilisticClassificationModel[Vector], _RandomForestClassifierParams, JavaMLWritable, - JavaMLReadable, - HasTrainingSummary, + JavaMLReadable["RandomForestClassificationModel"], + HasTrainingSummary["RandomForestClassificationTrainingSummary"], ): """ Model fitted by RandomForestClassifier. @@ -2102,7 +2262,7 @@ class RandomForestClassificationModel( """ @property - def featureImportances(self): + def featureImportances(self) -> Vector: """ Estimate of the importance of each feature. @@ -2119,15 +2279,15 @@ def featureImportances(self): """ return self._call_java("featureImportances") - @property + @property # type: ignore[misc] @since("2.0.0") - def trees(self): + def trees(self) -> List[DecisionTreeClassificationModel]: """Trees in this ensemble. Warning: These have null parent Estimators.""" return [DecisionTreeClassificationModel(m) for m in list(self._call_java("trees"))] - @property + @property # type: ignore[misc] @since("3.1.0") - def summary(self): + def summary(self) -> "RandomForestClassificationTrainingSummary": """ Gets summary (accuracy/precision/recall, objective history, total iterations) of model trained on the training set. An exception is thrown if `trainingSummary is None`. @@ -2146,7 +2306,9 @@ def summary(self): "No training summary available for this %s" % self.__class__.__name__ ) - def evaluate(self, dataset): + def evaluate( + self, dataset: DataFrame + ) -> Union["BinaryRandomForestClassificationSummary", "RandomForestClassificationSummary"]: """ Evaluates the model on a test dataset. @@ -2220,9 +2382,9 @@ class _GBTClassifierParams(_GBTParams, _HasVarianceImpurity): .. versionadded:: 3.0.0 """ - supportedLossTypes = ["logistic"] + supportedLossTypes: List[str] = ["logistic"] - lossType = Param( + lossType: Param[str] = Param( Params._dummy(), "lossType", "Loss function which GBT tries to minimize (case-insensitive). " @@ -2231,7 +2393,7 @@ class _GBTClassifierParams(_GBTParams, _HasVarianceImpurity): typeConverter=TypeConverters.toString, ) - def __init__(self, *args): + def __init__(self, *args: Any): super(_GBTClassifierParams, self).__init__(*args) self._setDefault( maxDepth=5, @@ -2253,7 +2415,7 @@ def __init__(self, *args): ) @since("1.4.0") - def getLossType(self): + def getLossType(self) -> str: """ Gets the value of lossType or its default value. """ @@ -2262,7 +2424,10 @@ def getLossType(self): @inherit_doc class GBTClassifier( - _JavaProbabilisticClassifier, _GBTClassifierParams, JavaMLWritable, JavaMLReadable + _JavaProbabilisticClassifier["GBTClassificationModel"], + _GBTClassifierParams, + JavaMLWritable, + JavaMLReadable["GBTClassifier"], ): """ `Gradient-Boosted Trees (GBTs) `_ @@ -2368,32 +2533,34 @@ class GBTClassifier( 0.01 """ + _input_kwargs: Dict[str, Any] + @keyword_only def __init__( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - maxDepth=5, - maxBins=32, - minInstancesPerNode=1, - minInfoGain=0.0, - maxMemoryInMB=256, - cacheNodeIds=False, - checkpointInterval=10, - lossType="logistic", - maxIter=20, - stepSize=0.1, - seed=None, - subsamplingRate=1.0, - impurity="variance", - featureSubsetStrategy="all", - validationTol=0.01, - validationIndicatorCol=None, - leafCol="", - minWeightFractionPerNode=0.0, - weightCol=None, + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + maxDepth: int = 5, + maxBins: int = 32, + minInstancesPerNode: int = 1, + minInfoGain: float = 0.0, + maxMemoryInMB: int = 256, + cacheNodeIds: bool = False, + checkpointInterval: int = 10, + lossType: str = "logistic", + maxIter: int = 20, + stepSize: float = 0.1, + seed: Optional[int] = None, + subsamplingRate: float = 1.0, + impurity: str = "variance", + featureSubsetStrategy: str = "all", + validationTol: float = 0.01, + validationIndicatorCol: Optional[str] = None, + leafCol: str = "", + minWeightFractionPerNode: float = 0.0, + weightCol: Optional[str] = None, ): """ __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ @@ -2416,29 +2583,29 @@ def __init__( def setParams( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - maxDepth=5, - maxBins=32, - minInstancesPerNode=1, - minInfoGain=0.0, - maxMemoryInMB=256, - cacheNodeIds=False, - checkpointInterval=10, - lossType="logistic", - maxIter=20, - stepSize=0.1, - seed=None, - subsamplingRate=1.0, - impurity="variance", - featureSubsetStrategy="all", - validationTol=0.01, - validationIndicatorCol=None, - leafCol="", - minWeightFractionPerNode=0.0, - weightCol=None, - ): + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + maxDepth: int = 5, + maxBins: int = 32, + minInstancesPerNode: int = 1, + minInfoGain: float = 0.0, + maxMemoryInMB: int = 256, + cacheNodeIds: bool = False, + checkpointInterval: int = 10, + lossType: str = "logistic", + maxIter: int = 20, + stepSize: float = 0.1, + seed: Optional[int] = None, + subsamplingRate: float = 1.0, + impurity: str = "variance", + featureSubsetStrategy: str = "all", + validationTol: float = 0.01, + validationIndicatorCol: Optional[str] = None, + leafCol: str = "", + minWeightFractionPerNode: float = 0.0, + weightCol: Optional[str] = None, + ) -> "GBTClassifier": """ setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ maxDepth=5, maxBins=32, minInstancesPerNode=1, minInfoGain=0.0, \ @@ -2452,117 +2619,117 @@ def setParams( kwargs = self._input_kwargs return self._set(**kwargs) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "GBTClassificationModel": return GBTClassificationModel(java_model) - def setMaxDepth(self, value): + def setMaxDepth(self, value: int) -> "GBTClassifier": """ Sets the value of :py:attr:`maxDepth`. """ return self._set(maxDepth=value) - def setMaxBins(self, value): + def setMaxBins(self, value: int) -> "GBTClassifier": """ Sets the value of :py:attr:`maxBins`. """ return self._set(maxBins=value) - def setMinInstancesPerNode(self, value): + def setMinInstancesPerNode(self, value: int) -> "GBTClassifier": """ Sets the value of :py:attr:`minInstancesPerNode`. """ return self._set(minInstancesPerNode=value) - def setMinInfoGain(self, value): + def setMinInfoGain(self, value: float) -> "GBTClassifier": """ Sets the value of :py:attr:`minInfoGain`. """ return self._set(minInfoGain=value) - def setMaxMemoryInMB(self, value): + def setMaxMemoryInMB(self, value: int) -> "GBTClassifier": """ Sets the value of :py:attr:`maxMemoryInMB`. """ return self._set(maxMemoryInMB=value) - def setCacheNodeIds(self, value): + def setCacheNodeIds(self, value: bool) -> "GBTClassifier": """ Sets the value of :py:attr:`cacheNodeIds`. """ return self._set(cacheNodeIds=value) @since("1.4.0") - def setImpurity(self, value): + def setImpurity(self, value: str) -> "GBTClassifier": """ Sets the value of :py:attr:`impurity`. """ return self._set(impurity=value) @since("1.4.0") - def setLossType(self, value): + def setLossType(self, value: str) -> "GBTClassifier": """ Sets the value of :py:attr:`lossType`. """ return self._set(lossType=value) @since("1.4.0") - def setSubsamplingRate(self, value): + def setSubsamplingRate(self, value: float) -> "GBTClassifier": """ Sets the value of :py:attr:`subsamplingRate`. """ return self._set(subsamplingRate=value) @since("2.4.0") - def setFeatureSubsetStrategy(self, value): + def setFeatureSubsetStrategy(self, value: str) -> "GBTClassifier": """ Sets the value of :py:attr:`featureSubsetStrategy`. """ return self._set(featureSubsetStrategy=value) @since("3.0.0") - def setValidationIndicatorCol(self, value): + def setValidationIndicatorCol(self, value: str) -> "GBTClassifier": """ Sets the value of :py:attr:`validationIndicatorCol`. """ return self._set(validationIndicatorCol=value) @since("1.4.0") - def setMaxIter(self, value): + def setMaxIter(self, value: int) -> "GBTClassifier": """ Sets the value of :py:attr:`maxIter`. """ return self._set(maxIter=value) @since("1.4.0") - def setCheckpointInterval(self, value): + def setCheckpointInterval(self, value: int) -> "GBTClassifier": """ Sets the value of :py:attr:`checkpointInterval`. """ return self._set(checkpointInterval=value) @since("1.4.0") - def setSeed(self, value): + def setSeed(self, value: int) -> "GBTClassifier": """ Sets the value of :py:attr:`seed`. """ return self._set(seed=value) @since("1.4.0") - def setStepSize(self, value): + def setStepSize(self, value: int) -> "GBTClassifier": """ Sets the value of :py:attr:`stepSize`. """ return self._set(stepSize=value) @since("3.0.0") - def setWeightCol(self, value): + def setWeightCol(self, value: str) -> "GBTClassifier": """ Sets the value of :py:attr:`weightCol`. """ return self._set(weightCol=value) @since("3.0.0") - def setMinWeightFractionPerNode(self, value): + def setMinWeightFractionPerNode(self, value: float) -> "GBTClassifier": """ Sets the value of :py:attr:`minWeightFractionPerNode`. """ @@ -2571,10 +2738,10 @@ def setMinWeightFractionPerNode(self, value): class GBTClassificationModel( _TreeEnsembleModel, - _JavaProbabilisticClassificationModel, + _JavaProbabilisticClassificationModel[Vector], _GBTClassifierParams, JavaMLWritable, - JavaMLReadable, + JavaMLReadable["GBTClassificationModel"], ): """ Model fitted by GBTClassifier. @@ -2583,7 +2750,7 @@ class GBTClassificationModel( """ @property - def featureImportances(self): + def featureImportances(self) -> Vector: """ Estimate of the importance of each feature. @@ -2600,13 +2767,13 @@ def featureImportances(self): """ return self._call_java("featureImportances") - @property + @property # type: ignore[misc] @since("2.0.0") - def trees(self): + def trees(self) -> List[DecisionTreeRegressionModel]: """Trees in this ensemble. Warning: These have null parent Estimators.""" return [DecisionTreeRegressionModel(m) for m in list(self._call_java("trees"))] - def evaluateEachIteration(self, dataset): + def evaluateEachIteration(self, dataset: DataFrame) -> List[float]: """ Method to compute error or loss for every iteration of gradient boosting. @@ -2627,13 +2794,13 @@ class _NaiveBayesParams(_PredictorParams, HasWeightCol): .. versionadded:: 3.0.0 """ - smoothing = Param( + smoothing: Param[float] = Param( Params._dummy(), "smoothing", "The smoothing parameter, should be >= 0, " + "default is 1.0", typeConverter=TypeConverters.toFloat, ) - modelType = Param( + modelType: Param[str] = Param( Params._dummy(), "modelType", "The model type which is a string " @@ -2642,19 +2809,19 @@ class _NaiveBayesParams(_PredictorParams, HasWeightCol): typeConverter=TypeConverters.toString, ) - def __init__(self, *args): + def __init__(self, *args: Any): super(_NaiveBayesParams, self).__init__(*args) self._setDefault(smoothing=1.0, modelType="multinomial") @since("1.5.0") - def getSmoothing(self): + def getSmoothing(self) -> float: """ Gets the value of smoothing or its default value. """ return self.getOrDefault(self.smoothing) @since("1.5.0") - def getModelType(self): + def getModelType(self) -> str: """ Gets the value of modelType or its default value. """ @@ -2663,12 +2830,12 @@ def getModelType(self): @inherit_doc class NaiveBayes( - _JavaProbabilisticClassifier, + _JavaProbabilisticClassifier["NaiveBayesModel"], _NaiveBayesParams, HasThresholds, HasWeightCol, JavaMLWritable, - JavaMLReadable, + JavaMLReadable["NaiveBayes"], ): """ Naive Bayes Classifiers. @@ -2763,19 +2930,21 @@ class NaiveBayes( DenseMatrix(0, 0, [...], ...) """ + _input_kwargs: Dict[str, Any] + @keyword_only def __init__( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - probabilityCol="probability", - rawPredictionCol="rawPrediction", - smoothing=1.0, - modelType="multinomial", - thresholds=None, - weightCol=None, + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + probabilityCol: str = "probability", + rawPredictionCol: str = "rawPrediction", + smoothing: float = 1.0, + modelType: str = "multinomial", + thresholds: Optional[List[float]] = None, + weightCol: Optional[str] = None, ): """ __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ @@ -2794,16 +2963,16 @@ def __init__( def setParams( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - probabilityCol="probability", - rawPredictionCol="rawPrediction", - smoothing=1.0, - modelType="multinomial", - thresholds=None, - weightCol=None, - ): + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + probabilityCol: str = "probability", + rawPredictionCol: str = "rawPrediction", + smoothing: float = 1.0, + modelType: str = "multinomial", + thresholds: Optional[List[float]] = None, + weightCol: Optional[str] = None, + ) -> "NaiveBayes": """ setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ probabilityCol="probability", rawPredictionCol="rawPrediction", smoothing=1.0, \ @@ -2813,24 +2982,24 @@ def setParams( kwargs = self._input_kwargs return self._set(**kwargs) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "NaiveBayesModel": return NaiveBayesModel(java_model) @since("1.5.0") - def setSmoothing(self, value): + def setSmoothing(self, value: float) -> "NaiveBayes": """ Sets the value of :py:attr:`smoothing`. """ return self._set(smoothing=value) @since("1.5.0") - def setModelType(self, value): + def setModelType(self, value: str) -> "NaiveBayes": """ Sets the value of :py:attr:`modelType`. """ return self._set(modelType=value) - def setWeightCol(self, value): + def setWeightCol(self, value: str) -> "NaiveBayes": """ Sets the value of :py:attr:`weightCol`. """ @@ -2838,7 +3007,10 @@ def setWeightCol(self, value): class NaiveBayesModel( - _JavaProbabilisticClassificationModel, _NaiveBayesParams, JavaMLWritable, JavaMLReadable + _JavaProbabilisticClassificationModel[Vector], + _NaiveBayesParams, + JavaMLWritable, + JavaMLReadable["NaiveBayesModel"], ): """ Model fitted by NaiveBayes. @@ -2846,25 +3018,25 @@ class NaiveBayesModel( .. versionadded:: 1.5.0 """ - @property + @property # type: ignore[misc] @since("2.0.0") - def pi(self): + def pi(self) -> Vector: """ log of class priors. """ return self._call_java("pi") - @property + @property # type: ignore[misc] @since("2.0.0") - def theta(self): + def theta(self) -> Matrix: """ log of class conditional probabilities. """ return self._call_java("theta") - @property + @property # type: ignore[misc] @since("3.0.0") - def sigma(self): + def sigma(self) -> Matrix: """ variance of each feature. """ @@ -2886,7 +3058,7 @@ class _MultilayerPerceptronParams( .. versionadded:: 3.0.0 """ - layers = Param( + layers: Param[List[int]] = Param( Params._dummy(), "layers", "Sizes of layers from input layer to output layer " @@ -2894,32 +3066,32 @@ class _MultilayerPerceptronParams( + "neurons and output layer of 10 neurons.", typeConverter=TypeConverters.toListInt, ) - solver = Param( + solver: Param[str] = Param( Params._dummy(), "solver", "The solver algorithm for optimization. Supported " + "options: l-bfgs, gd.", typeConverter=TypeConverters.toString, ) - initialWeights = Param( + initialWeights: Param[Vector] = Param( Params._dummy(), "initialWeights", "The initial weights of the model.", typeConverter=TypeConverters.toVector, ) - def __init__(self, *args): + def __init__(self, *args: Any): super(_MultilayerPerceptronParams, self).__init__(*args) self._setDefault(maxIter=100, tol=1e-6, blockSize=128, stepSize=0.03, solver="l-bfgs") @since("1.6.0") - def getLayers(self): + def getLayers(self) -> List[int]: """ Gets the value of layers or its default value. """ return self.getOrDefault(self.layers) @since("2.0.0") - def getInitialWeights(self): + def getInitialWeights(self) -> Vector: """ Gets the value of initialWeights or its default value. """ @@ -2928,7 +3100,10 @@ def getInitialWeights(self): @inherit_doc class MultilayerPerceptronClassifier( - _JavaProbabilisticClassifier, _MultilayerPerceptronParams, JavaMLWritable, JavaMLReadable + _JavaProbabilisticClassifier["MultilayerPerceptronClassificationModel"], + _MultilayerPerceptronParams, + JavaMLWritable, + JavaMLReadable["MultilayerPerceptronClassifier"], ): """ Classifier trainer based on the Multilayer Perceptron. @@ -3005,23 +3180,25 @@ class MultilayerPerceptronClassifier( True """ + _input_kwargs: Dict[str, Any] + @keyword_only def __init__( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - maxIter=100, - tol=1e-6, - seed=None, - layers=None, - blockSize=128, - stepSize=0.03, - solver="l-bfgs", - initialWeights=None, - probabilityCol="probability", - rawPredictionCol="rawPrediction", + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + maxIter: int = 100, + tol: float = 1e-6, + seed: Optional[int] = None, + layers: Optional[List[int]] = None, + blockSize: int = 128, + stepSize: float = 0.03, + solver: str = "l-bfgs", + initialWeights: Optional[Vector] = None, + probabilityCol: str = "probability", + rawPredictionCol: str = "rawPrediction", ): """ __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ @@ -3041,20 +3218,19 @@ def __init__( def setParams( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - maxIter=100, - tol=1e-6, - seed=None, - layers=None, - blockSize=128, - stepSize=0.03, - solver="l-bfgs", - initialWeights=None, - probabilityCol="probability", - rawPredictionCol="rawPrediction", - ): + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + maxIter: int = 100, + tol: float = 1e-6, + seed: Optional[int] = None, + layers: Optional[List[int]] = None, + blockSize: int = 128, + stepSize: float = 0.03, + solver: str = "l-bfgs", + initialWeights: Optional[Vector] = None, + probabilityCol: str = "probability", + ) -> "MultilayerPerceptronClassifier": """ setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ maxIter=100, tol=1e-6, seed=None, layers=None, blockSize=128, stepSize=0.03, \ @@ -3065,56 +3241,56 @@ def setParams( kwargs = self._input_kwargs return self._set(**kwargs) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "MultilayerPerceptronClassificationModel": return MultilayerPerceptronClassificationModel(java_model) @since("1.6.0") - def setLayers(self, value): + def setLayers(self, value: List[int]) -> "MultilayerPerceptronClassifier": """ Sets the value of :py:attr:`layers`. """ return self._set(layers=value) @since("1.6.0") - def setBlockSize(self, value): + def setBlockSize(self, value: int) -> "MultilayerPerceptronClassifier": """ Sets the value of :py:attr:`blockSize`. """ return self._set(blockSize=value) @since("2.0.0") - def setInitialWeights(self, value): + def setInitialWeights(self, value: Vector) -> "MultilayerPerceptronClassifier": """ Sets the value of :py:attr:`initialWeights`. """ return self._set(initialWeights=value) - def setMaxIter(self, value): + def setMaxIter(self, value: int) -> "MultilayerPerceptronClassifier": """ Sets the value of :py:attr:`maxIter`. """ return self._set(maxIter=value) - def setSeed(self, value): + def setSeed(self, value: int) -> "MultilayerPerceptronClassifier": """ Sets the value of :py:attr:`seed`. """ return self._set(seed=value) - def setTol(self, value): + def setTol(self, value: float) -> "MultilayerPerceptronClassifier": """ Sets the value of :py:attr:`tol`. """ return self._set(tol=value) @since("2.0.0") - def setStepSize(self, value): + def setStepSize(self, value: float) -> "MultilayerPerceptronClassifier": """ Sets the value of :py:attr:`stepSize`. """ return self._set(stepSize=value) - def setSolver(self, value): + def setSolver(self, value: str) -> "MultilayerPerceptronClassifier": """ Sets the value of :py:attr:`solver`. """ @@ -3122,11 +3298,11 @@ def setSolver(self, value): class MultilayerPerceptronClassificationModel( - _JavaProbabilisticClassificationModel, + _JavaProbabilisticClassificationModel[Vector], _MultilayerPerceptronParams, JavaMLWritable, - JavaMLReadable, - HasTrainingSummary, + JavaMLReadable["MultilayerPerceptronClassificationModel"], + HasTrainingSummary["MultilayerPerceptronClassificationTrainingSummary"], ): """ Model fitted by MultilayerPerceptronClassifier. @@ -3134,16 +3310,16 @@ class MultilayerPerceptronClassificationModel( .. versionadded:: 1.6.0 """ - @property + @property # type: ignore[misc] @since("2.0.0") - def weights(self): + def weights(self) -> Vector: """ the weights of layers. """ return self._call_java("weights") @since("3.1.0") - def summary(self): + def summary(self) -> "MultilayerPerceptronClassificationTrainingSummary": """ Gets summary (accuracy/precision/recall, objective history, total iterations) of model trained on the training set. An exception is thrown if `trainingSummary is None`. @@ -3157,7 +3333,7 @@ def summary(self): "No training summary available for this %s" % self.__class__.__name__ ) - def evaluate(self, dataset): + def evaluate(self, dataset: DataFrame) -> "MultilayerPerceptronClassificationSummary": """ Evaluates the model on a test dataset. @@ -3202,10 +3378,10 @@ class _OneVsRestParams(_ClassifierParams, HasWeightCol): Params for :py:class:`OneVsRest` and :py:class:`OneVsRestModelModel`. """ - classifier = Param(Params._dummy(), "classifier", "base binary classifier") + classifier: Param[Classifier] = Param(Params._dummy(), "classifier", "base binary classifier") @since("2.0.0") - def getClassifier(self): + def getClassifier(self) -> Classifier: """ Gets the value of classifier or its default value. """ @@ -3213,7 +3389,14 @@ def getClassifier(self): @inherit_doc -class OneVsRest(Estimator, _OneVsRestParams, HasParallelism, MLReadable, MLWritable): +class OneVsRest( + Estimator["OneVsRestModel"], + _OneVsRestParams, + HasParallelism, + MLReadable["OneVsRest"], + MLWritable, + Generic[CM], +): """ Reduction of Multiclass Classification to Binary Classification. Performs reduction using one against all strategy. @@ -3264,17 +3447,19 @@ class OneVsRest(Estimator, _OneVsRestParams, HasParallelism, MLReadable, MLWrita ['features', 'rawPrediction', 'newPrediction'] """ + _input_kwargs: Dict[str, Any] + @keyword_only def __init__( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - rawPredictionCol="rawPrediction", - classifier=None, - weightCol=None, - parallelism=1, + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + rawPredictionCol: str = "rawPrediction", + classifier: Optional[Classifier[CM]] = None, + weightCol: Optional[str] = None, + parallelism: int = 1, ): """ __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ @@ -3290,14 +3475,14 @@ def __init__( def setParams( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - rawPredictionCol="rawPrediction", - classifier=None, - weightCol=None, - parallelism=1, - ): + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + rawPredictionCol: str = "rawPrediction", + classifier: Optional[Classifier[CM]] = None, + weightCol: Optional[str] = None, + parallelism: int = 1, + ) -> "OneVsRest": """ setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ rawPredictionCol="rawPrediction", classifier=None, weightCol=None, parallelism=1): @@ -3307,55 +3492,57 @@ def setParams( return self._set(**kwargs) @since("2.0.0") - def setClassifier(self, value): + def setClassifier(self, value: Classifier[CM]) -> "OneVsRest": """ Sets the value of :py:attr:`classifier`. """ return self._set(classifier=value) - def setLabelCol(self, value): + def setLabelCol(self, value: str) -> "OneVsRest": """ Sets the value of :py:attr:`labelCol`. """ return self._set(labelCol=value) - def setFeaturesCol(self, value): + def setFeaturesCol(self, value: str) -> "OneVsRest": """ Sets the value of :py:attr:`featuresCol`. """ return self._set(featuresCol=value) - def setPredictionCol(self, value): + def setPredictionCol(self, value: str) -> "OneVsRest": """ Sets the value of :py:attr:`predictionCol`. """ return self._set(predictionCol=value) - def setRawPredictionCol(self, value): + def setRawPredictionCol(self, value: str) -> "OneVsRest": """ Sets the value of :py:attr:`rawPredictionCol`. """ return self._set(rawPredictionCol=value) - def setWeightCol(self, value): + def setWeightCol(self, value: str) -> "OneVsRest": """ Sets the value of :py:attr:`weightCol`. """ return self._set(weightCol=value) - def setParallelism(self, value): + def setParallelism(self, value: int) -> "OneVsRest": """ Sets the value of :py:attr:`parallelism`. """ return self._set(parallelism=value) - def _fit(self, dataset): + def _fit(self, dataset: DataFrame) -> "OneVsRestModel": labelCol = self.getLabelCol() featuresCol = self.getFeaturesCol() predictionCol = self.getPredictionCol() classifier = self.getClassifier() - numClasses = int(dataset.agg({labelCol: "max"}).head()["max(" + labelCol + ")"]) + 1 + numClasses = ( + int(cast(Row, dataset.agg({labelCol: "max"}).head())["max(" + labelCol + ")"]) + 1 + ) weightCol = None if self.isDefined(self.weightCol) and self.getWeightCol(): @@ -3376,7 +3563,7 @@ def _fit(self, dataset): if handlePersistence: multiclassLabeled.persist(StorageLevel.MEMORY_AND_DISK) - def trainSingleClass(index): + def trainSingleClass(index: int) -> CM: binaryLabelCol = "mc2b$" + str(index) trainingDataset = multiclassLabeled.withColumn( binaryLabelCol, @@ -3390,7 +3577,7 @@ def trainSingleClass(index): ] ) if weightCol: - paramMap[classifier.weightCol] = weightCol + paramMap[cast(HasWeightCol, classifier).weightCol] = weightCol return classifier.fit(trainingDataset, paramMap) pool = ThreadPool(processes=min(self.getParallelism(), numClasses)) @@ -3402,7 +3589,7 @@ def trainSingleClass(index): return self._copyValues(OneVsRestModel(models=models)) - def copy(self, extra=None): + def copy(self, extra: Optional["ParamMap"] = None) -> "OneVsRest": """ Creates a copy of this instance with a randomly generated uid and some extra params. This creates a deep copy of the embedded paramMap, @@ -3428,7 +3615,7 @@ def copy(self, extra=None): return newOvr @classmethod - def _from_java(cls, java_stage): + def _from_java(cls, java_stage: "JavaObject") -> "OneVsRest": """ Given a Java OneVsRest, create and return a Python wrapper of it. Used for ML persistence. @@ -3437,7 +3624,7 @@ def _from_java(cls, java_stage): labelCol = java_stage.getLabelCol() predictionCol = java_stage.getPredictionCol() rawPredictionCol = java_stage.getRawPredictionCol() - classifier = JavaParams._from_java(java_stage.getClassifier()) + classifier: Classifier = JavaParams._from_java(java_stage.getClassifier()) parallelism = java_stage.getParallelism() py_stage = cls( featuresCol=featuresCol, @@ -3452,7 +3639,7 @@ def _from_java(cls, java_stage): py_stage._resetUid(java_stage.uid()) return py_stage - def _to_java(self): + def _to_java(self) -> "JavaObject": """ Transfer this instance to a Java OneVsRest. Used for ML persistence. @@ -3464,7 +3651,7 @@ def _to_java(self): _java_obj = JavaParams._new_java_obj( "org.apache.spark.ml.classification.OneVsRest", self.uid ) - _java_obj.setClassifier(self.getClassifier()._to_java()) + _java_obj.setClassifier(cast(_JavaClassifier, self.getClassifier())._to_java()) _java_obj.setParallelism(self.getParallelism()) _java_obj.setFeaturesCol(self.getFeaturesCol()) _java_obj.setLabelCol(self.getLabelCol()) @@ -3475,35 +3662,40 @@ def _to_java(self): return _java_obj @classmethod - def read(cls): + def read(cls) -> "OneVsRestReader": return OneVsRestReader(cls) - def write(self): + def write(self) -> MLWriter: if isinstance(self.getClassifier(), JavaMLWritable): - return JavaMLWriter(self) + return JavaMLWriter(self) # type: ignore[arg-type] else: return OneVsRestWriter(self) class _OneVsRestSharedReadWrite: @staticmethod - def saveImpl(instance, sc, path, extraMetadata=None): + def saveImpl( + instance: Union[OneVsRest, "OneVsRestModel"], + sc: SparkContext, + path: str, + extraMetadata: Optional[Dict[str, Any]] = None, + ) -> None: skipParams = ["classifier"] jsonParams = DefaultParamsWriter.extractJsonParams(instance, skipParams) DefaultParamsWriter.saveMetadata( instance, path, sc, paramMap=jsonParams, extraMetadata=extraMetadata ) classifierPath = os.path.join(path, "classifier") - instance.getClassifier().save(classifierPath) + cast(MLWritable, instance.getClassifier()).save(classifierPath) @staticmethod - def loadClassifier(path, sc): + def loadClassifier(path: str, sc: SparkContext) -> Union[OneVsRest, "OneVsRestModel"]: classifierPath = os.path.join(path, "classifier") return DefaultParamsReader.loadParamsInstance(classifierPath, sc) @staticmethod - def validateParams(instance): - elems_to_check = [instance.getClassifier()] + def validateParams(instance: Union[OneVsRest, "OneVsRestModel"]) -> None: + elems_to_check: List[Params] = [instance.getClassifier()] if isinstance(instance, OneVsRestModel): elems_to_check.extend(instance.models) @@ -3516,34 +3708,39 @@ def validateParams(instance): @inherit_doc -class OneVsRestReader(MLReader): - def __init__(self, cls): +class OneVsRestReader(MLReader[OneVsRest]): + def __init__(self, cls: Type[OneVsRest]) -> None: super(OneVsRestReader, self).__init__() self.cls = cls - def load(self, path): + def load(self, path: str) -> OneVsRest: metadata = DefaultParamsReader.loadMetadata(path, self.sc) if not DefaultParamsReader.isPythonParamsInstance(metadata): - return JavaMLReader(self.cls).load(path) + return JavaMLReader(self.cls).load(path) # type: ignore[arg-type] else: - classifier = _OneVsRestSharedReadWrite.loadClassifier(path, self.sc) - ova = OneVsRest(classifier=classifier)._resetUid(metadata["uid"]) + classifier = cast(Classifier, _OneVsRestSharedReadWrite.loadClassifier(path, self.sc)) + ova: OneVsRest = OneVsRest(classifier=classifier)._resetUid(metadata["uid"]) DefaultParamsReader.getAndSetParams(ova, metadata, skipParams=["classifier"]) return ova @inherit_doc class OneVsRestWriter(MLWriter): - def __init__(self, instance): + def __init__(self, instance: OneVsRest): super(OneVsRestWriter, self).__init__() self.instance = instance - def saveImpl(self, path): + def saveImpl(self, path: str) -> None: _OneVsRestSharedReadWrite.validateParams(self.instance) _OneVsRestSharedReadWrite.saveImpl(self.instance, self.sc, path) -class OneVsRestModel(Model, _OneVsRestParams, MLReadable, MLWritable): +class OneVsRestModel( + Model, + _OneVsRestParams, + MLReadable["OneVsRestModel"], + MLWritable, +): """ Model fitted by OneVsRest. This stores the models resulting from training k binary classifiers: one for each class. @@ -3553,32 +3750,34 @@ class OneVsRestModel(Model, _OneVsRestParams, MLReadable, MLWritable): .. versionadded:: 2.0.0 """ - def setFeaturesCol(self, value): + def setFeaturesCol(self, value: str) -> "OneVsRestModel": """ Sets the value of :py:attr:`featuresCol`. """ return self._set(featuresCol=value) - def setPredictionCol(self, value): + def setPredictionCol(self, value: str) -> "OneVsRestModel": """ Sets the value of :py:attr:`predictionCol`. """ return self._set(predictionCol=value) - def setRawPredictionCol(self, value): + def setRawPredictionCol(self, value: str) -> "OneVsRestModel": """ Sets the value of :py:attr:`rawPredictionCol`. """ return self._set(rawPredictionCol=value) - def __init__(self, models): + def __init__(self, models: List[ClassificationModel]): super(OneVsRestModel, self).__init__() self.models = models if not isinstance(models[0], JavaMLWritable): return # set java instance - java_models = [model._to_java() for model in self.models] + java_models = [cast(_JavaClassificationModel, model)._to_java() for model in self.models] sc = SparkContext._active_spark_context + assert sc is not None and sc._gateway is not None + java_models_array = JavaWrapper._new_java_array( java_models, sc._gateway.jvm.org.apache.spark.ml.classification.ClassificationModel ) @@ -3591,7 +3790,7 @@ def __init__(self, models): java_models_array, ) - def _transform(self, dataset): + def _transform(self, dataset: DataFrame) -> DataFrame: # determine the input columns: these need to be passed through origCols = dataset.columns @@ -3635,8 +3834,8 @@ def _transform(self, dataset): if self.getRawPredictionCol(): - def func(predictions): - predArray = [] + def func(predictions: Iterable[float]) -> Vector: + predArray: List[float] = [] for x in predictions: predArray.append(x) return Vectors.dense(predArray) @@ -3659,7 +3858,7 @@ def func(predictions): ) return aggregatedDataset.drop(accColName) - def copy(self, extra=None): + def copy(self, extra: Optional["ParamMap"] = None) -> "OneVsRestModel": """ Creates a copy of this instance with a randomly generated uid and some extra params. This creates a deep copy of the embedded paramMap, @@ -3684,7 +3883,7 @@ def copy(self, extra=None): return newModel @classmethod - def _from_java(cls, java_stage): + def _from_java(cls, java_stage: "JavaObject") -> "OneVsRestModel": """ Given a Java OneVsRestModel, create and return a Python wrapper of it. Used for ML persistence. @@ -3692,8 +3891,10 @@ def _from_java(cls, java_stage): featuresCol = java_stage.getFeaturesCol() labelCol = java_stage.getLabelCol() predictionCol = java_stage.getPredictionCol() - classifier = JavaParams._from_java(java_stage.getClassifier()) - models = [JavaParams._from_java(model) for model in java_stage.models()] + classifier: Classifier = JavaParams._from_java(java_stage.getClassifier()) + models: List[ClassificationModel] = [ + JavaParams._from_java(model) for model in java_stage.models() + ] py_stage = cls(models=models).setPredictionCol(predictionCol).setFeaturesCol(featuresCol) py_stage._set(labelCol=labelCol) if java_stage.isDefined(java_stage.getParam("weightCol")): @@ -3702,7 +3903,7 @@ def _from_java(cls, java_stage): py_stage._resetUid(java_stage.uid()) return py_stage - def _to_java(self): + def _to_java(self) -> "JavaObject": """ Transfer this instance to a Java OneVsRestModel. Used for ML persistence. @@ -3712,7 +3913,9 @@ def _to_java(self): Java object equivalent to this instance. """ sc = SparkContext._active_spark_context - java_models = [model._to_java() for model in self.models] + assert sc is not None and sc._gateway is not None + + java_models = [cast(_JavaClassificationModel, model)._to_java() for model in self.models] java_models_array = JavaWrapper._new_java_array( java_models, sc._gateway.jvm.org.apache.spark.ml.classification.ClassificationModel ) @@ -3723,7 +3926,7 @@ def _to_java(self): metadata.empty(), java_models_array, ) - _java_obj.set("classifier", self.getClassifier()._to_java()) + _java_obj.set("classifier", cast(_JavaClassifier, self.getClassifier())._to_java()) _java_obj.set("featuresCol", self.getFeaturesCol()) _java_obj.set("labelCol", self.getLabelCol()) _java_obj.set("predictionCol", self.getPredictionCol()) @@ -3732,28 +3935,31 @@ def _to_java(self): return _java_obj @classmethod - def read(cls): + def read(cls) -> "OneVsRestModelReader": return OneVsRestModelReader(cls) - def write(self): + def write(self) -> MLWriter: if all( - map(lambda elem: isinstance(elem, JavaMLWritable), [self.getClassifier()] + self.models) + map( + lambda elem: isinstance(elem, JavaMLWritable), + [self.getClassifier()] + self.models, # type: ignore[operator] + ) ): - return JavaMLWriter(self) + return JavaMLWriter(self) # type: ignore[arg-type] else: return OneVsRestModelWriter(self) @inherit_doc -class OneVsRestModelReader(MLReader): - def __init__(self, cls): +class OneVsRestModelReader(MLReader[OneVsRestModel]): + def __init__(self, cls: Type[OneVsRestModel]): super(OneVsRestModelReader, self).__init__() self.cls = cls - def load(self, path): + def load(self, path: str) -> OneVsRestModel: metadata = DefaultParamsReader.loadMetadata(path, self.sc) if not DefaultParamsReader.isPythonParamsInstance(metadata): - return JavaMLReader(self.cls).load(path) + return JavaMLReader(self.cls).load(path) # type: ignore[arg-type] else: classifier = _OneVsRestSharedReadWrite.loadClassifier(path, self.sc) numClasses = metadata["numClasses"] @@ -3761,7 +3967,9 @@ def load(self, path): for idx in range(numClasses): subModelPath = os.path.join(path, f"model_{idx}") subModels[idx] = DefaultParamsReader.loadParamsInstance(subModelPath, self.sc) - ovaModel = OneVsRestModel(subModels)._resetUid(metadata["uid"]) + ovaModel = OneVsRestModel(cast(List[ClassificationModel], subModels))._resetUid( + metadata["uid"] + ) ovaModel.set(ovaModel.classifier, classifier) DefaultParamsReader.getAndSetParams(ovaModel, metadata, skipParams=["classifier"]) return ovaModel @@ -3769,11 +3977,11 @@ def load(self, path): @inherit_doc class OneVsRestModelWriter(MLWriter): - def __init__(self, instance): + def __init__(self, instance: OneVsRestModel): super(OneVsRestModelWriter, self).__init__() self.instance = instance - def saveImpl(self, path): + def saveImpl(self, path: str) -> None: _OneVsRestSharedReadWrite.validateParams(self.instance) instance = self.instance numClasses = len(instance.models) @@ -3781,12 +3989,15 @@ def saveImpl(self, path): _OneVsRestSharedReadWrite.saveImpl(instance, self.sc, path, extraMetadata=extraMetadata) for idx in range(numClasses): subModelPath = os.path.join(path, f"model_{idx}") - instance.models[idx].save(subModelPath) + cast(MLWritable, instance.models[idx]).save(subModelPath) @inherit_doc class FMClassifier( - _JavaProbabilisticClassifier, _FactorizationMachinesParams, JavaMLWritable, JavaMLReadable + _JavaProbabilisticClassifier["FMClassificationModel"], + _FactorizationMachinesParams, + JavaMLWritable, + JavaMLReadable["FMClassifier"], ): """ Factorization Machines learning algorithm for classification. @@ -3849,27 +4060,29 @@ class FMClassifier( True """ + _input_kwargs: Dict[str, Any] + @keyword_only def __init__( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - probabilityCol="probability", - rawPredictionCol="rawPrediction", - factorSize=8, - fitIntercept=True, - fitLinear=True, - regParam=0.0, - miniBatchFraction=1.0, - initStd=0.01, - maxIter=100, - stepSize=1.0, - tol=1e-6, - solver="adamW", - thresholds=None, - seed=None, + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + probabilityCol: str = "probability", + rawPredictionCol: str = "rawPrediction", + factorSize: int = 8, + fitIntercept: bool = True, + fitLinear: bool = True, + regParam: float = 0.0, + miniBatchFraction: float = 1.0, + initStd: float = 0.01, + maxIter: int = 100, + stepSize: float = 1.0, + tol: float = 1e-6, + solver: str = "adamW", + thresholds: Optional[List[float]] = None, + seed: Optional[int] = None, ): """ __init__(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ @@ -3890,24 +4103,24 @@ def __init__( def setParams( self, *, - featuresCol="features", - labelCol="label", - predictionCol="prediction", - probabilityCol="probability", - rawPredictionCol="rawPrediction", - factorSize=8, - fitIntercept=True, - fitLinear=True, - regParam=0.0, - miniBatchFraction=1.0, - initStd=0.01, - maxIter=100, - stepSize=1.0, - tol=1e-6, - solver="adamW", - thresholds=None, - seed=None, - ): + featuresCol: str = "features", + labelCol: str = "label", + predictionCol: str = "prediction", + probabilityCol: str = "probability", + rawPredictionCol: str = "rawPrediction", + factorSize: int = 8, + fitIntercept: bool = True, + fitLinear: bool = True, + regParam: float = 0.0, + miniBatchFraction: float = 1.0, + initStd: float = 0.01, + maxIter: int = 100, + stepSize: float = 1.0, + tol: float = 1e-6, + solver: str = "adamW", + thresholds: Optional[List[float]] = None, + seed: Optional[int] = None, + ) -> "FMClassifier": """ setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ probabilityCol="probability", rawPredictionCol="rawPrediction", \ @@ -3919,81 +4132,81 @@ def setParams( kwargs = self._input_kwargs return self._set(**kwargs) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "FMClassificationModel": return FMClassificationModel(java_model) @since("3.0.0") - def setFactorSize(self, value): + def setFactorSize(self, value: int) -> "FMClassifier": """ Sets the value of :py:attr:`factorSize`. """ return self._set(factorSize=value) @since("3.0.0") - def setFitLinear(self, value): + def setFitLinear(self, value: bool) -> "FMClassifier": """ Sets the value of :py:attr:`fitLinear`. """ return self._set(fitLinear=value) @since("3.0.0") - def setMiniBatchFraction(self, value): + def setMiniBatchFraction(self, value: float) -> "FMClassifier": """ Sets the value of :py:attr:`miniBatchFraction`. """ return self._set(miniBatchFraction=value) @since("3.0.0") - def setInitStd(self, value): + def setInitStd(self, value: float) -> "FMClassifier": """ Sets the value of :py:attr:`initStd`. """ return self._set(initStd=value) @since("3.0.0") - def setMaxIter(self, value): + def setMaxIter(self, value: int) -> "FMClassifier": """ Sets the value of :py:attr:`maxIter`. """ return self._set(maxIter=value) @since("3.0.0") - def setStepSize(self, value): + def setStepSize(self, value: float) -> "FMClassifier": """ Sets the value of :py:attr:`stepSize`. """ return self._set(stepSize=value) @since("3.0.0") - def setTol(self, value): + def setTol(self, value: float) -> "FMClassifier": """ Sets the value of :py:attr:`tol`. """ return self._set(tol=value) @since("3.0.0") - def setSolver(self, value): + def setSolver(self, value: str) -> "FMClassifier": """ Sets the value of :py:attr:`solver`. """ return self._set(solver=value) @since("3.0.0") - def setSeed(self, value): + def setSeed(self, value: int) -> "FMClassifier": """ Sets the value of :py:attr:`seed`. """ return self._set(seed=value) @since("3.0.0") - def setFitIntercept(self, value): + def setFitIntercept(self, value: bool) -> "FMClassifier": """ Sets the value of :py:attr:`fitIntercept`. """ return self._set(fitIntercept=value) @since("3.0.0") - def setRegParam(self, value): + def setRegParam(self, value: float) -> "FMClassifier": """ Sets the value of :py:attr:`regParam`. """ @@ -4001,10 +4214,10 @@ def setRegParam(self, value): class FMClassificationModel( - _JavaProbabilisticClassificationModel, + _JavaProbabilisticClassificationModel[Vector], _FactorizationMachinesParams, JavaMLWritable, - JavaMLReadable, + JavaMLReadable["FMClassificationModel"], HasTrainingSummary, ): """ @@ -4013,32 +4226,32 @@ class FMClassificationModel( .. versionadded:: 3.0.0 """ - @property + @property # type: ignore[misc] @since("3.0.0") - def intercept(self): + def intercept(self) -> float: """ Model intercept. """ return self._call_java("intercept") - @property + @property # type: ignore[misc] @since("3.0.0") - def linear(self): + def linear(self) -> Vector: """ Model linear term. """ return self._call_java("linear") - @property + @property # type: ignore[misc] @since("3.0.0") - def factors(self): + def factors(self) -> Matrix: """ Model factor term. """ return self._call_java("factors") @since("3.1.0") - def summary(self): + def summary(self) -> "FMClassificationTrainingSummary": """ Gets summary (accuracy/precision/recall, objective history, total iterations) of model trained on the training set. An exception is thrown if `trainingSummary is None`. @@ -4050,7 +4263,7 @@ def summary(self): "No training summary available for this %s" % self.__class__.__name__ ) - def evaluate(self, dataset): + def evaluate(self, dataset: DataFrame) -> "FMClassificationSummary": """ Evaluates the model on a test dataset. diff --git a/python/pyspark/ml/classification.pyi b/python/pyspark/ml/classification.pyi deleted file mode 100644 index 16c31924defde..0000000000000 --- a/python/pyspark/ml/classification.pyi +++ /dev/null @@ -1,951 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import Any, Generic, List, Optional, Type -from pyspark.ml._typing import JM, M, P, T, ParamMap - -import abc -from abc import abstractmethod -from pyspark.ml import Estimator, Model, PredictionModel, Predictor, Transformer -from pyspark.ml.base import _PredictorParams -from pyspark.ml.param.shared import ( - HasAggregationDepth, - HasBlockSize, - HasMaxBlockSizeInMB, - HasElasticNetParam, - HasFitIntercept, - HasMaxIter, - HasParallelism, - HasProbabilityCol, - HasRawPredictionCol, - HasRegParam, - HasSeed, - HasSolver, - HasStandardization, - HasStepSize, - HasThreshold, - HasThresholds, - HasTol, - HasWeightCol, -) -from pyspark.ml.regression import _FactorizationMachinesParams -from pyspark.ml.tree import ( - _DecisionTreeModel, - _DecisionTreeParams, - _GBTParams, - _HasVarianceImpurity, - _RandomForestParams, - _TreeClassifierParams, - _TreeEnsembleModel, -) -from pyspark.ml.util import ( - HasTrainingSummary, - JavaMLReadable, - JavaMLWritable, - MLReader, - MLReadable, - MLWriter, - MLWritable, -) -from pyspark.ml.wrapper import JavaPredictionModel, JavaPredictor, JavaWrapper - -from pyspark.ml.linalg import Matrix, Vector -from pyspark.ml.param import Param -from pyspark.ml.regression import DecisionTreeRegressionModel -from pyspark.sql.dataframe import DataFrame - -from py4j.java_gateway import JavaObject - -class _ClassifierParams(HasRawPredictionCol, _PredictorParams): ... - -class Classifier(Predictor, _ClassifierParams, metaclass=abc.ABCMeta): - def setRawPredictionCol(self: P, value: str) -> P: ... - -class ClassificationModel(PredictionModel, _ClassifierParams, metaclass=abc.ABCMeta): - def setRawPredictionCol(self: P, value: str) -> P: ... - @property - @abc.abstractmethod - def numClasses(self) -> int: ... - @abstractmethod - def predictRaw(self, value: Vector) -> Vector: ... - -class _ProbabilisticClassifierParams(HasProbabilityCol, HasThresholds, _ClassifierParams): ... - -class ProbabilisticClassifier(Classifier, _ProbabilisticClassifierParams, metaclass=abc.ABCMeta): - def setProbabilityCol(self: P, value: str) -> P: ... - def setThresholds(self: P, value: List[float]) -> P: ... - -class ProbabilisticClassificationModel( - ClassificationModel, _ProbabilisticClassifierParams, metaclass=abc.ABCMeta -): - def setProbabilityCol(self: M, value: str) -> M: ... - def setThresholds(self: M, value: List[float]) -> M: ... - @abstractmethod - def predictProbability(self, value: Vector) -> Vector: ... - -class _JavaClassifier(Classifier, JavaPredictor[JM], Generic[JM], metaclass=abc.ABCMeta): - def setRawPredictionCol(self: P, value: str) -> P: ... - -class _JavaClassificationModel(ClassificationModel, JavaPredictionModel[T]): - @property - def numClasses(self) -> int: ... - def predictRaw(self, value: Vector) -> Vector: ... - -class _JavaProbabilisticClassifier( - ProbabilisticClassifier, _JavaClassifier[JM], Generic[JM], metaclass=abc.ABCMeta -): ... - -class _JavaProbabilisticClassificationModel( - ProbabilisticClassificationModel, _JavaClassificationModel[T] -): - def predictProbability(self, value: Vector) -> Vector: ... - -class _ClassificationSummary(JavaWrapper): - @property - def predictions(self) -> DataFrame: ... - @property - def predictionCol(self) -> str: ... - @property - def labelCol(self) -> str: ... - @property - def weightCol(self) -> str: ... - @property - def labels(self) -> List[str]: ... - @property - def truePositiveRateByLabel(self) -> List[float]: ... - @property - def falsePositiveRateByLabel(self) -> List[float]: ... - @property - def precisionByLabel(self) -> List[float]: ... - @property - def recallByLabel(self) -> List[float]: ... - def fMeasureByLabel(self, beta: float = ...) -> List[float]: ... - @property - def accuracy(self) -> float: ... - @property - def weightedTruePositiveRate(self) -> float: ... - @property - def weightedFalsePositiveRate(self) -> float: ... - @property - def weightedRecall(self) -> float: ... - @property - def weightedPrecision(self) -> float: ... - def weightedFMeasure(self, beta: float = ...) -> float: ... - -class _TrainingSummary(JavaWrapper): - @property - def objectiveHistory(self) -> List[float]: ... - @property - def totalIterations(self) -> int: ... - -class _BinaryClassificationSummary(_ClassificationSummary): - @property - def scoreCol(self) -> str: ... - @property - def roc(self) -> DataFrame: ... - @property - def areaUnderROC(self) -> float: ... - @property - def pr(self) -> DataFrame: ... - @property - def fMeasureByThreshold(self) -> DataFrame: ... - @property - def precisionByThreshold(self) -> DataFrame: ... - @property - def recallByThreshold(self) -> DataFrame: ... - -class _LinearSVCParams( - _ClassifierParams, - HasRegParam, - HasMaxIter, - HasFitIntercept, - HasTol, - HasStandardization, - HasWeightCol, - HasAggregationDepth, - HasThreshold, - HasMaxBlockSizeInMB, -): - threshold: Param[float] - def __init__(self, *args: Any) -> None: ... - -class LinearSVC( - _JavaClassifier[LinearSVCModel], - _LinearSVCParams, - JavaMLWritable, - JavaMLReadable[LinearSVC], -): - def __init__( - self, - *, - featuresCol: str = ..., - labelCol: str = ..., - predictionCol: str = ..., - maxIter: int = ..., - regParam: float = ..., - tol: float = ..., - rawPredictionCol: str = ..., - fitIntercept: bool = ..., - standardization: bool = ..., - threshold: float = ..., - weightCol: Optional[str] = ..., - aggregationDepth: int = ..., - maxBlockSizeInMB: float = ..., - ) -> None: ... - def setParams( - self, - *, - featuresCol: str = ..., - labelCol: str = ..., - predictionCol: str = ..., - maxIter: int = ..., - regParam: float = ..., - tol: float = ..., - rawPredictionCol: str = ..., - fitIntercept: bool = ..., - standardization: bool = ..., - threshold: float = ..., - weightCol: Optional[str] = ..., - aggregationDepth: int = ..., - maxBlockSizeInMB: float = ..., - ) -> LinearSVC: ... - def setMaxIter(self, value: int) -> LinearSVC: ... - def setRegParam(self, value: float) -> LinearSVC: ... - def setTol(self, value: float) -> LinearSVC: ... - def setFitIntercept(self, value: bool) -> LinearSVC: ... - def setStandardization(self, value: bool) -> LinearSVC: ... - def setThreshold(self, value: float) -> LinearSVC: ... - def setWeightCol(self, value: str) -> LinearSVC: ... - def setAggregationDepth(self, value: int) -> LinearSVC: ... - def setMaxBlockSizeInMB(self, value: float) -> LinearSVC: ... - def _create_model(self, java_model: JavaObject) -> LinearSVCModel: ... - -class LinearSVCModel( - _JavaClassificationModel[Vector], - _LinearSVCParams, - JavaMLWritable, - JavaMLReadable[LinearSVCModel], - HasTrainingSummary[LinearSVCTrainingSummary], -): - def setThreshold(self, value: float) -> LinearSVCModel: ... - @property - def coefficients(self) -> Vector: ... - @property - def intercept(self) -> float: ... - def summary(self) -> LinearSVCTrainingSummary: ... - def evaluate(self, dataset: DataFrame) -> LinearSVCSummary: ... - -class LinearSVCSummary(_BinaryClassificationSummary): ... -class LinearSVCTrainingSummary(LinearSVCSummary, _TrainingSummary): ... - -class _LogisticRegressionParams( - _ProbabilisticClassifierParams, - HasRegParam, - HasElasticNetParam, - HasMaxIter, - HasFitIntercept, - HasTol, - HasStandardization, - HasWeightCol, - HasAggregationDepth, - HasThreshold, - HasMaxBlockSizeInMB, -): - threshold: Param[float] - family: Param[str] - lowerBoundsOnCoefficients: Param[Matrix] - upperBoundsOnCoefficients: Param[Matrix] - lowerBoundsOnIntercepts: Param[Vector] - upperBoundsOnIntercepts: Param[Vector] - def __init__(self, *args: Any): ... - def setThreshold(self: P, value: float) -> P: ... - def getThreshold(self) -> float: ... - def setThresholds(self: P, value: List[float]) -> P: ... - def getThresholds(self) -> List[float]: ... - def getFamily(self) -> str: ... - def getLowerBoundsOnCoefficients(self) -> Matrix: ... - def getUpperBoundsOnCoefficients(self) -> Matrix: ... - def getLowerBoundsOnIntercepts(self) -> Vector: ... - def getUpperBoundsOnIntercepts(self) -> Vector: ... - -class LogisticRegression( - _JavaProbabilisticClassifier[LogisticRegressionModel], - _LogisticRegressionParams, - JavaMLWritable, - JavaMLReadable[LogisticRegression], -): - def __init__( - self, - *, - featuresCol: str = ..., - labelCol: str = ..., - predictionCol: str = ..., - maxIter: int = ..., - regParam: float = ..., - elasticNetParam: float = ..., - tol: float = ..., - fitIntercept: bool = ..., - threshold: float = ..., - thresholds: Optional[List[float]] = ..., - probabilityCol: str = ..., - rawPredictionCol: str = ..., - standardization: bool = ..., - weightCol: Optional[str] = ..., - aggregationDepth: int = ..., - family: str = ..., - lowerBoundsOnCoefficients: Optional[Matrix] = ..., - upperBoundsOnCoefficients: Optional[Matrix] = ..., - lowerBoundsOnIntercepts: Optional[Vector] = ..., - upperBoundsOnIntercepts: Optional[Vector] = ..., - maxBlockSizeInMB: float = ..., - ) -> None: ... - def setParams( - self, - *, - featuresCol: str = ..., - labelCol: str = ..., - predictionCol: str = ..., - maxIter: int = ..., - regParam: float = ..., - elasticNetParam: float = ..., - tol: float = ..., - fitIntercept: bool = ..., - threshold: float = ..., - thresholds: Optional[List[float]] = ..., - probabilityCol: str = ..., - rawPredictionCol: str = ..., - standardization: bool = ..., - weightCol: Optional[str] = ..., - aggregationDepth: int = ..., - family: str = ..., - lowerBoundsOnCoefficients: Optional[Matrix] = ..., - upperBoundsOnCoefficients: Optional[Matrix] = ..., - lowerBoundsOnIntercepts: Optional[Vector] = ..., - upperBoundsOnIntercepts: Optional[Vector] = ..., - maxBlockSizeInMB: float = ..., - ) -> LogisticRegression: ... - def setFamily(self, value: str) -> LogisticRegression: ... - def setLowerBoundsOnCoefficients(self, value: Matrix) -> LogisticRegression: ... - def setUpperBoundsOnCoefficients(self, value: Matrix) -> LogisticRegression: ... - def setLowerBoundsOnIntercepts(self, value: Vector) -> LogisticRegression: ... - def setUpperBoundsOnIntercepts(self, value: Vector) -> LogisticRegression: ... - def setMaxIter(self, value: int) -> LogisticRegression: ... - def setRegParam(self, value: float) -> LogisticRegression: ... - def setTol(self, value: float) -> LogisticRegression: ... - def setElasticNetParam(self, value: float) -> LogisticRegression: ... - def setFitIntercept(self, value: bool) -> LogisticRegression: ... - def setStandardization(self, value: bool) -> LogisticRegression: ... - def setWeightCol(self, value: str) -> LogisticRegression: ... - def setAggregationDepth(self, value: int) -> LogisticRegression: ... - def setMaxBlockSizeInMB(self, value: float) -> LogisticRegression: ... - def _create_model(self, java_model: JavaObject) -> LogisticRegressionModel: ... - -class LogisticRegressionModel( - _JavaProbabilisticClassificationModel[Vector], - _LogisticRegressionParams, - JavaMLWritable, - JavaMLReadable[LogisticRegressionModel], - HasTrainingSummary[LogisticRegressionTrainingSummary], -): - @property - def coefficients(self) -> Vector: ... - @property - def intercept(self) -> float: ... - @property - def coefficientMatrix(self) -> Matrix: ... - @property - def interceptVector(self) -> Vector: ... - @property - def summary(self) -> LogisticRegressionTrainingSummary: ... - def evaluate(self, dataset: DataFrame) -> LogisticRegressionSummary: ... - -class LogisticRegressionSummary(_ClassificationSummary): - @property - def probabilityCol(self) -> str: ... - @property - def featuresCol(self) -> str: ... - -class LogisticRegressionTrainingSummary(LogisticRegressionSummary, _TrainingSummary): ... -class BinaryLogisticRegressionSummary(_BinaryClassificationSummary, LogisticRegressionSummary): ... -class BinaryLogisticRegressionTrainingSummary( - BinaryLogisticRegressionSummary, LogisticRegressionTrainingSummary -): ... - -class _DecisionTreeClassifierParams(_DecisionTreeParams, _TreeClassifierParams): - def __init__(self, *args: Any): ... - -class DecisionTreeClassifier( - _JavaProbabilisticClassifier[DecisionTreeClassificationModel], - _DecisionTreeClassifierParams, - JavaMLWritable, - JavaMLReadable[DecisionTreeClassifier], -): - def __init__( - self, - *, - featuresCol: str = ..., - labelCol: str = ..., - predictionCol: str = ..., - probabilityCol: str = ..., - rawPredictionCol: str = ..., - maxDepth: int = ..., - maxBins: int = ..., - minInstancesPerNode: int = ..., - minInfoGain: float = ..., - maxMemoryInMB: int = ..., - cacheNodeIds: bool = ..., - checkpointInterval: int = ..., - impurity: str = ..., - seed: Optional[int] = ..., - weightCol: Optional[str] = ..., - leafCol: str = ..., - minWeightFractionPerNode: float = ..., - ) -> None: ... - def setParams( - self, - *, - featuresCol: str = ..., - labelCol: str = ..., - predictionCol: str = ..., - probabilityCol: str = ..., - rawPredictionCol: str = ..., - maxDepth: int = ..., - maxBins: int = ..., - minInstancesPerNode: int = ..., - minInfoGain: float = ..., - maxMemoryInMB: int = ..., - cacheNodeIds: bool = ..., - checkpointInterval: int = ..., - impurity: str = ..., - seed: Optional[int] = ..., - weightCol: Optional[str] = ..., - leafCol: str = ..., - minWeightFractionPerNode: float = ..., - ) -> DecisionTreeClassifier: ... - def setMaxDepth(self, value: int) -> DecisionTreeClassifier: ... - def setMaxBins(self, value: int) -> DecisionTreeClassifier: ... - def setMinInstancesPerNode(self, value: int) -> DecisionTreeClassifier: ... - def setMinWeightFractionPerNode(self, value: float) -> DecisionTreeClassifier: ... - def setMinInfoGain(self, value: float) -> DecisionTreeClassifier: ... - def setMaxMemoryInMB(self, value: int) -> DecisionTreeClassifier: ... - def setCacheNodeIds(self, value: bool) -> DecisionTreeClassifier: ... - def setImpurity(self, value: str) -> DecisionTreeClassifier: ... - def setCheckpointInterval(self, value: int) -> DecisionTreeClassifier: ... - def setSeed(self, value: int) -> DecisionTreeClassifier: ... - def setWeightCol(self, value: str) -> DecisionTreeClassifier: ... - def _create_model(self, java_model: JavaObject) -> DecisionTreeClassificationModel: ... - -class DecisionTreeClassificationModel( - _DecisionTreeModel, - _JavaProbabilisticClassificationModel[Vector], - _DecisionTreeClassifierParams, - JavaMLWritable, - JavaMLReadable[DecisionTreeClassificationModel], -): - @property - def featureImportances(self) -> Vector: ... - -class _RandomForestClassifierParams(_RandomForestParams, _TreeClassifierParams): - def __init__(self, *args: Any): ... - -class RandomForestClassifier( - _JavaProbabilisticClassifier[RandomForestClassificationModel], - _RandomForestClassifierParams, - JavaMLWritable, - JavaMLReadable[RandomForestClassifier], -): - def __init__( - self, - *, - featuresCol: str = ..., - labelCol: str = ..., - predictionCol: str = ..., - probabilityCol: str = ..., - rawPredictionCol: str = ..., - maxDepth: int = ..., - maxBins: int = ..., - minInstancesPerNode: int = ..., - minInfoGain: float = ..., - maxMemoryInMB: int = ..., - cacheNodeIds: bool = ..., - checkpointInterval: int = ..., - impurity: str = ..., - numTrees: int = ..., - featureSubsetStrategy: str = ..., - seed: Optional[int] = ..., - subsamplingRate: float = ..., - leafCol: str = ..., - minWeightFractionPerNode: float = ..., - weightCol: Optional[str] = ..., - bootstrap: Optional[bool] = ..., - ) -> None: ... - def setParams( - self, - *, - featuresCol: str = ..., - labelCol: str = ..., - predictionCol: str = ..., - probabilityCol: str = ..., - rawPredictionCol: str = ..., - maxDepth: int = ..., - maxBins: int = ..., - minInstancesPerNode: int = ..., - minInfoGain: float = ..., - maxMemoryInMB: int = ..., - cacheNodeIds: bool = ..., - checkpointInterval: int = ..., - seed: Optional[int] = ..., - impurity: str = ..., - numTrees: int = ..., - featureSubsetStrategy: str = ..., - subsamplingRate: float = ..., - leafCol: str = ..., - minWeightFractionPerNode: float = ..., - weightCol: Optional[str] = ..., - bootstrap: Optional[bool] = ..., - ) -> RandomForestClassifier: ... - def setMaxDepth(self, value: int) -> RandomForestClassifier: ... - def setMaxBins(self, value: int) -> RandomForestClassifier: ... - def setMinInstancesPerNode(self, value: int) -> RandomForestClassifier: ... - def setMinInfoGain(self, value: float) -> RandomForestClassifier: ... - def setMaxMemoryInMB(self, value: int) -> RandomForestClassifier: ... - def setCacheNodeIds(self, value: bool) -> RandomForestClassifier: ... - def setImpurity(self, value: str) -> RandomForestClassifier: ... - def setNumTrees(self, value: int) -> RandomForestClassifier: ... - def setBootstrap(self, value: bool) -> RandomForestClassifier: ... - def setSubsamplingRate(self, value: float) -> RandomForestClassifier: ... - def setFeatureSubsetStrategy(self, value: str) -> RandomForestClassifier: ... - def setSeed(self, value: int) -> RandomForestClassifier: ... - def setCheckpointInterval(self, value: int) -> RandomForestClassifier: ... - def setWeightCol(self, value: str) -> RandomForestClassifier: ... - def setMinWeightFractionPerNode(self, value: float) -> RandomForestClassifier: ... - def _create_model(self, java_model: JavaObject) -> RandomForestClassificationModel: ... - -class RandomForestClassificationModel( - _TreeEnsembleModel, - _JavaProbabilisticClassificationModel[Vector], - _RandomForestClassifierParams, - JavaMLWritable, - JavaMLReadable[RandomForestClassificationModel], - HasTrainingSummary[RandomForestClassificationTrainingSummary], -): - @property - def featureImportances(self) -> Vector: ... - @property - def trees(self) -> List[DecisionTreeClassificationModel]: ... - def summary(self) -> RandomForestClassificationTrainingSummary: ... - def evaluate(self, dataset: DataFrame) -> RandomForestClassificationSummary: ... - -class RandomForestClassificationSummary(_ClassificationSummary): ... -class RandomForestClassificationTrainingSummary( - RandomForestClassificationSummary, _TrainingSummary -): ... -class BinaryRandomForestClassificationSummary(_BinaryClassificationSummary): ... -class BinaryRandomForestClassificationTrainingSummary( - BinaryRandomForestClassificationSummary, RandomForestClassificationTrainingSummary -): ... - -class _GBTClassifierParams(_GBTParams, _HasVarianceImpurity): - supportedLossTypes: List[str] - lossType: Param[str] - def __init__(self, *args: Any): ... - def getLossType(self) -> str: ... - -class GBTClassifier( - _JavaProbabilisticClassifier[GBTClassificationModel], - _GBTClassifierParams, - JavaMLWritable, - JavaMLReadable[GBTClassifier], -): - def __init__( - self, - *, - featuresCol: str = ..., - labelCol: str = ..., - predictionCol: str = ..., - maxDepth: int = ..., - maxBins: int = ..., - minInstancesPerNode: int = ..., - minInfoGain: float = ..., - maxMemoryInMB: int = ..., - cacheNodeIds: bool = ..., - checkpointInterval: int = ..., - lossType: str = ..., - maxIter: int = ..., - stepSize: float = ..., - seed: Optional[int] = ..., - subsamplingRate: float = ..., - featureSubsetStrategy: str = ..., - validationTol: float = ..., - validationIndicatorCol: Optional[str] = ..., - leafCol: str = ..., - minWeightFractionPerNode: float = ..., - weightCol: Optional[str] = ..., - ) -> None: ... - def setParams( - self, - *, - featuresCol: str = ..., - labelCol: str = ..., - predictionCol: str = ..., - maxDepth: int = ..., - maxBins: int = ..., - minInstancesPerNode: int = ..., - minInfoGain: float = ..., - maxMemoryInMB: int = ..., - cacheNodeIds: bool = ..., - checkpointInterval: int = ..., - lossType: str = ..., - maxIter: int = ..., - stepSize: float = ..., - seed: Optional[int] = ..., - subsamplingRate: float = ..., - featureSubsetStrategy: str = ..., - validationTol: float = ..., - validationIndicatorCol: Optional[str] = ..., - leafCol: str = ..., - minWeightFractionPerNode: float = ..., - weightCol: Optional[str] = ..., - ) -> GBTClassifier: ... - def setMaxDepth(self, value: int) -> GBTClassifier: ... - def setMaxBins(self, value: int) -> GBTClassifier: ... - def setMinInstancesPerNode(self, value: int) -> GBTClassifier: ... - def setMinInfoGain(self, value: float) -> GBTClassifier: ... - def setMaxMemoryInMB(self, value: int) -> GBTClassifier: ... - def setCacheNodeIds(self, value: bool) -> GBTClassifier: ... - def setImpurity(self, value: str) -> GBTClassifier: ... - def setLossType(self, value: str) -> GBTClassifier: ... - def setSubsamplingRate(self, value: float) -> GBTClassifier: ... - def setFeatureSubsetStrategy(self, value: str) -> GBTClassifier: ... - def setValidationIndicatorCol(self, value: str) -> GBTClassifier: ... - def setMaxIter(self, value: int) -> GBTClassifier: ... - def setCheckpointInterval(self, value: int) -> GBTClassifier: ... - def setSeed(self, value: int) -> GBTClassifier: ... - def setStepSize(self, value: float) -> GBTClassifier: ... - def setWeightCol(self, value: str) -> GBTClassifier: ... - def setMinWeightFractionPerNode(self, value: float) -> GBTClassifier: ... - def _create_model(self, java_model: JavaObject) -> GBTClassificationModel: ... - -class GBTClassificationModel( - _TreeEnsembleModel, - _JavaProbabilisticClassificationModel[Vector], - _GBTClassifierParams, - JavaMLWritable, - JavaMLReadable[GBTClassificationModel], -): - @property - def featureImportances(self) -> Vector: ... - @property - def trees(self) -> List[DecisionTreeRegressionModel]: ... - def evaluateEachIteration(self, dataset: DataFrame) -> List[float]: ... - -class _NaiveBayesParams(_PredictorParams, HasWeightCol): - smoothing: Param[float] - modelType: Param[str] - def __init__(self, *args: Any): ... - def getSmoothing(self) -> float: ... - def getModelType(self) -> str: ... - -class NaiveBayes( - _JavaProbabilisticClassifier[NaiveBayesModel], - _NaiveBayesParams, - HasThresholds, - HasWeightCol, - JavaMLWritable, - JavaMLReadable[NaiveBayes], -): - def __init__( - self, - *, - featuresCol: str = ..., - labelCol: str = ..., - predictionCol: str = ..., - probabilityCol: str = ..., - rawPredictionCol: str = ..., - smoothing: float = ..., - modelType: str = ..., - thresholds: Optional[List[float]] = ..., - weightCol: Optional[str] = ..., - ) -> None: ... - def setParams( - self, - *, - featuresCol: str = ..., - labelCol: str = ..., - predictionCol: str = ..., - probabilityCol: str = ..., - rawPredictionCol: str = ..., - smoothing: float = ..., - modelType: str = ..., - thresholds: Optional[List[float]] = ..., - weightCol: Optional[str] = ..., - ) -> NaiveBayes: ... - def setSmoothing(self, value: float) -> NaiveBayes: ... - def setModelType(self, value: str) -> NaiveBayes: ... - def setWeightCol(self, value: str) -> NaiveBayes: ... - def _create_model(self, java_model: JavaObject) -> NaiveBayesModel: ... - -class NaiveBayesModel( - _JavaProbabilisticClassificationModel[Vector], - _NaiveBayesParams, - JavaMLWritable, - JavaMLReadable[NaiveBayesModel], -): - @property - def pi(self) -> Vector: ... - @property - def theta(self) -> Matrix: ... - @property - def sigma(self) -> Matrix: ... - -class _MultilayerPerceptronParams( - _ProbabilisticClassifierParams, - HasSeed, - HasMaxIter, - HasTol, - HasStepSize, - HasSolver, - HasBlockSize, -): - layers: Param[List[int]] - solver: Param[str] - initialWeights: Param[Vector] - def __init__(self, *args: Any): ... - def getLayers(self) -> List[int]: ... - def getInitialWeights(self) -> Vector: ... - -class MultilayerPerceptronClassifier( - _JavaProbabilisticClassifier[MultilayerPerceptronClassificationModel], - _MultilayerPerceptronParams, - JavaMLWritable, - JavaMLReadable[MultilayerPerceptronClassifier], -): - def __init__( - self, - *, - featuresCol: str = ..., - labelCol: str = ..., - predictionCol: str = ..., - maxIter: int = ..., - tol: float = ..., - seed: Optional[int] = ..., - layers: Optional[List[int]] = ..., - blockSize: int = ..., - stepSize: float = ..., - solver: str = ..., - initialWeights: Optional[Vector] = ..., - probabilityCol: str = ..., - rawPredictionCol: str = ..., - ) -> None: ... - def setParams( - self, - *, - featuresCol: str = ..., - labelCol: str = ..., - predictionCol: str = ..., - maxIter: int = ..., - tol: float = ..., - seed: Optional[int] = ..., - layers: Optional[List[int]] = ..., - blockSize: int = ..., - stepSize: float = ..., - solver: str = ..., - initialWeights: Optional[Vector] = ..., - probabilityCol: str = ..., - rawPredictionCol: str = ..., - ) -> MultilayerPerceptronClassifier: ... - def setLayers(self, value: List[int]) -> MultilayerPerceptronClassifier: ... - def setBlockSize(self, value: int) -> MultilayerPerceptronClassifier: ... - def setInitialWeights(self, value: Vector) -> MultilayerPerceptronClassifier: ... - def setMaxIter(self, value: int) -> MultilayerPerceptronClassifier: ... - def setSeed(self, value: int) -> MultilayerPerceptronClassifier: ... - def setTol(self, value: float) -> MultilayerPerceptronClassifier: ... - def setStepSize(self, value: float) -> MultilayerPerceptronClassifier: ... - def setSolver(self, value: str) -> MultilayerPerceptronClassifier: ... - def _create_model(self, java_model: JavaObject) -> MultilayerPerceptronClassificationModel: ... - -class MultilayerPerceptronClassificationModel( - _JavaProbabilisticClassificationModel[Vector], - _MultilayerPerceptronParams, - JavaMLWritable, - JavaMLReadable[MultilayerPerceptronClassificationModel], - HasTrainingSummary[MultilayerPerceptronClassificationTrainingSummary], -): - @property - def weights(self) -> Vector: ... - def summary(self) -> MultilayerPerceptronClassificationTrainingSummary: ... - def evaluate(self, dataset: DataFrame) -> MultilayerPerceptronClassificationSummary: ... - -class MultilayerPerceptronClassificationSummary(_ClassificationSummary): ... -class MultilayerPerceptronClassificationTrainingSummary( - MultilayerPerceptronClassificationSummary, _TrainingSummary -): ... - -class _OneVsRestParams(_ClassifierParams, HasWeightCol): - classifier: Param[Estimator] - def getClassifier(self) -> Estimator[M]: ... - -class OneVsRest( - Estimator[OneVsRestModel], - _OneVsRestParams, - HasParallelism, - MLReadable[OneVsRest], - MLWritable, -): - def __init__( - self, - *, - featuresCol: str = ..., - labelCol: str = ..., - predictionCol: str = ..., - rawPredictionCol: str = ..., - classifier: Optional[Estimator[M]] = ..., - weightCol: Optional[str] = ..., - parallelism: int = ..., - ) -> None: ... - def setParams( - self, - *, - featuresCol: Optional[str] = ..., - labelCol: Optional[str] = ..., - predictionCol: Optional[str] = ..., - rawPredictionCol: str = ..., - classifier: Optional[Estimator[M]] = ..., - weightCol: Optional[str] = ..., - parallelism: int = ..., - ) -> OneVsRest: ... - def _fit(self, dataset: DataFrame) -> OneVsRestModel: ... - def setClassifier(self, value: Estimator[M]) -> OneVsRest: ... - def setLabelCol(self, value: str) -> OneVsRest: ... - def setFeaturesCol(self, value: str) -> OneVsRest: ... - def setPredictionCol(self, value: str) -> OneVsRest: ... - def setRawPredictionCol(self, value: str) -> OneVsRest: ... - def setWeightCol(self, value: str) -> OneVsRest: ... - def setParallelism(self, value: int) -> OneVsRest: ... - def copy(self, extra: Optional[ParamMap] = ...) -> OneVsRest: ... - -class OneVsRestModel(Model, _OneVsRestParams, MLReadable[OneVsRestModel], MLWritable): - models: List[Transformer] - def __init__(self, models: List[Transformer]) -> None: ... - def _transform(self, dataset: DataFrame) -> DataFrame: ... - def setFeaturesCol(self, value: str) -> OneVsRestModel: ... - def setPredictionCol(self, value: str) -> OneVsRestModel: ... - def setRawPredictionCol(self, value: str) -> OneVsRestModel: ... - def copy(self, extra: Optional[ParamMap] = ...) -> OneVsRestModel: ... - -class OneVsRestWriter(MLWriter): - instance: OneVsRest - def __init__(self, instance: OneVsRest) -> None: ... - def saveImpl(self, path: str) -> None: ... - -class OneVsRestReader(MLReader[OneVsRest]): - cls: Type[OneVsRest] - def __init__(self, cls: Type[OneVsRest]) -> None: ... - def load(self, path: str) -> OneVsRest: ... - -class OneVsRestModelWriter(MLWriter): - instance: OneVsRestModel - def __init__(self, instance: OneVsRestModel) -> None: ... - def saveImpl(self, path: str) -> None: ... - -class OneVsRestModelReader(MLReader[OneVsRestModel]): - cls: Type[OneVsRestModel] - def __init__(self, cls: Type[OneVsRestModel]) -> None: ... - def load(self, path: str) -> OneVsRestModel: ... - -class FMClassifier( - _JavaProbabilisticClassifier[FMClassificationModel], - _FactorizationMachinesParams, - JavaMLWritable, - JavaMLReadable[FMClassifier], -): - factorSize: Param[int] - fitLinear: Param[bool] - miniBatchFraction: Param[float] - initStd: Param[float] - solver: Param[str] - def __init__( - self, - featuresCol: str = ..., - labelCol: str = ..., - predictionCol: str = ..., - probabilityCol: str = ..., - rawPredictionCol: str = ..., - factorSize: int = ..., - fitIntercept: bool = ..., - fitLinear: bool = ..., - regParam: float = ..., - miniBatchFraction: float = ..., - initStd: float = ..., - maxIter: int = ..., - stepSize: float = ..., - tol: float = ..., - solver: str = ..., - thresholds: Optional[Any] = ..., - seed: Optional[Any] = ..., - ) -> None: ... - def setParams( - self, - featuresCol: str = ..., - labelCol: str = ..., - predictionCol: str = ..., - probabilityCol: str = ..., - rawPredictionCol: str = ..., - factorSize: int = ..., - fitIntercept: bool = ..., - fitLinear: bool = ..., - regParam: float = ..., - miniBatchFraction: float = ..., - initStd: float = ..., - maxIter: int = ..., - stepSize: float = ..., - tol: float = ..., - solver: str = ..., - thresholds: Optional[Any] = ..., - seed: Optional[Any] = ..., - ) -> FMClassifier: ... - def setFactorSize(self, value: int) -> FMClassifier: ... - def setFitLinear(self, value: bool) -> FMClassifier: ... - def setMiniBatchFraction(self, value: float) -> FMClassifier: ... - def setInitStd(self, value: float) -> FMClassifier: ... - def setMaxIter(self, value: int) -> FMClassifier: ... - def setStepSize(self, value: float) -> FMClassifier: ... - def setTol(self, value: float) -> FMClassifier: ... - def setSolver(self, value: str) -> FMClassifier: ... - def setSeed(self, value: int) -> FMClassifier: ... - def setFitIntercept(self, value: bool) -> FMClassifier: ... - def setRegParam(self, value: float) -> FMClassifier: ... - def _create_model(self, java_model: JavaObject) -> FMClassificationModel: ... - -class FMClassificationModel( - _JavaProbabilisticClassificationModel[Vector], - _FactorizationMachinesParams, - JavaMLWritable, - JavaMLReadable[FMClassificationModel], -): - @property - def intercept(self) -> float: ... - @property - def linear(self) -> Vector: ... - @property - def factors(self) -> Matrix: ... - def summary(self) -> FMClassificationTrainingSummary: ... - def evaluate(self, dataset: DataFrame) -> FMClassificationSummary: ... - -class FMClassificationSummary(_BinaryClassificationSummary): ... -class FMClassificationTrainingSummary(FMClassificationSummary, _TrainingSummary): ... diff --git a/python/pyspark/ml/tests/typing/test_classification.yml b/python/pyspark/ml/tests/typing/test_classification.yml index a6efc76a301a5..45e5f1ccafba2 100644 --- a/python/pyspark/ml/tests/typing/test_classification.yml +++ b/python/pyspark/ml/tests/typing/test_classification.yml @@ -23,8 +23,8 @@ # Should support OneVsRest(classifier=LogisticRegression()) - OneVsRest(classifier=LogisticRegressionModel.load("/foo")) # E: Argument "classifier" to "OneVsRest" has incompatible type "LogisticRegressionModel"; expected "Optional[Estimator[]]" [arg-type] - OneVsRest(classifier="foo") # E: Argument "classifier" to "OneVsRest" has incompatible type "str"; expected "Optional[Estimator[]]" [arg-type] + OneVsRest(classifier=LogisticRegressionModel.load("/foo")) # E: Argument "classifier" to "OneVsRest" has incompatible type "LogisticRegressionModel"; expected "Optional[Classifier[]]" [arg-type] + OneVsRest(classifier="foo") # E: Argument "classifier" to "OneVsRest" has incompatible type "str"; expected "Optional[Classifier[]]" [arg-type] - case: fitFMClassifier From 60ce69df029b1e1d7cf7f7eece02e668de24cca8 Mon Sep 17 00:00:00 2001 From: dch nguyen Date: Sun, 10 Apr 2022 14:14:33 +0200 Subject: [PATCH 115/535] [SPARK-37234][PYTHON] Inline type hints for python/pyspark/mllib/stat/_statistics.py ### What changes were proposed in this pull request? Inline type hints for python/pyspark/mllib/stat/_statistics.py ### Why are the changes needed? We can take advantage of static type checking within the functions by inlining the type hints. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests Closes #34513 from dchvn/SPARK-37234. Lead-authored-by: dch nguyen Co-authored-by: dch nguyen Signed-off-by: zero323 (cherry picked from commit c3dcdb118ca403a8fbefc3308a116d9e12a1f038) Signed-off-by: zero323 --- python/pyspark/mllib/_typing.pyi | 5 ++ python/pyspark/mllib/stat/_statistics.py | 94 +++++++++++++++++------ python/pyspark/mllib/stat/_statistics.pyi | 63 --------------- 3 files changed, 74 insertions(+), 88 deletions(-) delete mode 100644 python/pyspark/mllib/stat/_statistics.pyi diff --git a/python/pyspark/mllib/_typing.pyi b/python/pyspark/mllib/_typing.pyi index 6a1a0f53a5950..4fbaeca39beb6 100644 --- a/python/pyspark/mllib/_typing.pyi +++ b/python/pyspark/mllib/_typing.pyi @@ -17,7 +17,9 @@ # under the License. from typing import List, Tuple, TypeVar, Union + from typing_extensions import Literal + from pyspark.mllib.linalg import Vector from numpy import ndarray # noqa: F401 from py4j.java_gateway import JavaObject @@ -25,4 +27,7 @@ from py4j.java_gateway import JavaObject VectorLike = Union[ndarray, Vector, List[float], Tuple[float, ...]] C = TypeVar("C", bound=type) JavaObjectOrPickleDump = Union[JavaObject, bytearray, bytes] + +CorrelationMethod = Union[Literal["spearman"], Literal["pearson"]] +DistName = Literal["norm"] NormType = Union[None, float, Literal["fro"], Literal["nuc"]] diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py index 34a373d5358e3..25095d99dd9d4 100644 --- a/python/pyspark/mllib/stat/_statistics.py +++ b/python/pyspark/mllib/stat/_statistics.py @@ -16,13 +16,19 @@ # import sys +from typing import cast, overload, List, Optional, TYPE_CHECKING, Union + +from numpy import ndarray +from py4j.java_gateway import JavaObject from pyspark.rdd import RDD from pyspark.mllib.common import callMLlibFunc, JavaModelWrapper -from pyspark.mllib.linalg import Matrix, _convert_to_vector +from pyspark.mllib.linalg import Matrix, Vector, _convert_to_vector from pyspark.mllib.regression import LabeledPoint from pyspark.mllib.stat.test import ChiSqTestResult, KolmogorovSmirnovTestResult +if TYPE_CHECKING: + from pyspark.mllib._typing import CorrelationMethod, DistName __all__ = ["MultivariateStatisticalSummary", "Statistics"] @@ -33,34 +39,34 @@ class MultivariateStatisticalSummary(JavaModelWrapper): Trait for multivariate statistical summary of a data matrix. """ - def mean(self): - return self.call("mean").toArray() + def mean(self) -> ndarray: + return cast(JavaObject, self.call("mean")).toArray() - def variance(self): - return self.call("variance").toArray() + def variance(self) -> ndarray: + return cast(JavaObject, self.call("variance")).toArray() - def count(self): + def count(self) -> int: return int(self.call("count")) - def numNonzeros(self): - return self.call("numNonzeros").toArray() + def numNonzeros(self) -> ndarray: + return cast(JavaObject, self.call("numNonzeros")).toArray() - def max(self): - return self.call("max").toArray() + def max(self) -> ndarray: + return cast(JavaObject, self.call("max")).toArray() - def min(self): - return self.call("min").toArray() + def min(self) -> ndarray: + return cast(JavaObject, self.call("min")).toArray() - def normL1(self): - return self.call("normL1").toArray() + def normL1(self) -> ndarray: + return cast(JavaObject, self.call("normL1")).toArray() - def normL2(self): - return self.call("normL2").toArray() + def normL2(self) -> ndarray: + return cast(JavaObject, self.call("normL2")).toArray() class Statistics: @staticmethod - def colStats(rdd): + def colStats(rdd: RDD[Vector]) -> MultivariateStatisticalSummary: """ Computes column-wise summary statistics for the input RDD[Vector]. @@ -98,8 +104,22 @@ def colStats(rdd): cStats = callMLlibFunc("colStats", rdd.map(_convert_to_vector)) return MultivariateStatisticalSummary(cStats) + @overload + @staticmethod + def corr(x: RDD[Vector], *, method: Optional["CorrelationMethod"] = ...) -> Matrix: + ... + + @overload @staticmethod - def corr(x, y=None, method=None): + def corr(x: RDD[float], y: RDD[float], method: Optional["CorrelationMethod"] = ...) -> float: + ... + + @staticmethod + def corr( + x: Union[RDD[Vector], RDD[float]], + y: Optional[RDD[float]] = None, + method: Optional["CorrelationMethod"] = None, + ) -> Union[float, Matrix]: """ Compute the correlation (matrix) for the input RDD(s) using the specified method. @@ -168,12 +188,34 @@ def corr(x, y=None, method=None): raise TypeError("Use 'method=' to specify method name.") if not y: - return callMLlibFunc("corr", x.map(_convert_to_vector), method).toArray() + return cast( + JavaObject, callMLlibFunc("corr", x.map(_convert_to_vector), method) + ).toArray() else: - return callMLlibFunc("corr", x.map(float), y.map(float), method) + return cast( + float, + callMLlibFunc("corr", cast(RDD[float], x).map(float), y.map(float), method), + ) + + @overload + @staticmethod + def chiSqTest(observed: Matrix) -> ChiSqTestResult: + ... + + @overload + @staticmethod + def chiSqTest(observed: Vector, expected: Optional[Vector] = ...) -> ChiSqTestResult: + ... + + @overload + @staticmethod + def chiSqTest(observed: RDD[LabeledPoint]) -> List[ChiSqTestResult]: + ... @staticmethod - def chiSqTest(observed, expected=None): + def chiSqTest( + observed: Union[Matrix, RDD[LabeledPoint], Vector], expected: Optional[Vector] = None + ) -> Union[ChiSqTestResult, List[ChiSqTestResult]]: """ If `observed` is Vector, conduct Pearson's chi-squared goodness of fit test of the observed data against the expected distribution, @@ -270,7 +312,9 @@ def chiSqTest(observed, expected=None): return ChiSqTestResult(jmodel) @staticmethod - def kolmogorovSmirnovTest(data, distName="norm", *params): + def kolmogorovSmirnovTest( + data: RDD[float], distName: "DistName" = "norm", *params: float + ) -> KolmogorovSmirnovTestResult: """ Performs the Kolmogorov-Smirnov (KS) test for data sampled from a continuous distribution. It tests the null hypothesis that @@ -334,13 +378,13 @@ def kolmogorovSmirnovTest(data, distName="norm", *params): if not isinstance(distName, str): raise TypeError("distName should be a string, got %s." % type(distName)) - params = [float(param) for param in params] + param_list = [float(param) for param in params] return KolmogorovSmirnovTestResult( - callMLlibFunc("kolmogorovSmirnovTest", data, distName, params) + callMLlibFunc("kolmogorovSmirnovTest", data, distName, param_list) ) -def _test(): +def _test() -> None: import doctest import numpy from pyspark.sql import SparkSession diff --git a/python/pyspark/mllib/stat/_statistics.pyi b/python/pyspark/mllib/stat/_statistics.pyi deleted file mode 100644 index 1bf76dd3af0e5..0000000000000 --- a/python/pyspark/mllib/stat/_statistics.pyi +++ /dev/null @@ -1,63 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import List, Optional, overload, Union -from typing_extensions import Literal - -from numpy import ndarray - -from pyspark.mllib.common import JavaModelWrapper -from pyspark.mllib.linalg import Vector, Matrix -from pyspark.mllib.regression import LabeledPoint -from pyspark.mllib.stat.test import ChiSqTestResult, KolmogorovSmirnovTestResult -from pyspark.rdd import RDD - -CorrelationMethod = Union[Literal["spearman"], Literal["pearson"]] - -class MultivariateStatisticalSummary(JavaModelWrapper): - def mean(self) -> ndarray: ... - def variance(self) -> ndarray: ... - def count(self) -> int: ... - def numNonzeros(self) -> ndarray: ... - def max(self) -> ndarray: ... - def min(self) -> ndarray: ... - def normL1(self) -> ndarray: ... - def normL2(self) -> ndarray: ... - -class Statistics: - @staticmethod - def colStats(rdd: RDD[Vector]) -> MultivariateStatisticalSummary: ... - @overload - @staticmethod - def corr(x: RDD[Vector], *, method: Optional[CorrelationMethod] = ...) -> Matrix: ... - @overload - @staticmethod - def corr(x: RDD[float], y: RDD[float], method: Optional[CorrelationMethod] = ...) -> float: ... - @overload - @staticmethod - def chiSqTest(observed: Matrix) -> ChiSqTestResult: ... - @overload - @staticmethod - def chiSqTest(observed: Vector, expected: Optional[Vector] = ...) -> ChiSqTestResult: ... - @overload - @staticmethod - def chiSqTest(observed: RDD[LabeledPoint]) -> List[ChiSqTestResult]: ... - @staticmethod - def kolmogorovSmirnovTest( - data: RDD[float], distName: Literal["norm"] = ..., *params: float - ) -> KolmogorovSmirnovTestResult: ... From 82581684c64dcd388d757ebde8ec0b9eb2a30816 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 10 Apr 2022 17:44:39 -0700 Subject: [PATCH 116/535] [SPARK-38830][CORE] Warn on corrupted block messages ### What changes were proposed in this pull request? This PR aims to warn when `NettyBlockRpcServer` received a corrupted block RPC message or under attack. - `IllegalArgumentException`: When the type is unknown/invalid when decoding. This fails at Spark layer. - `NegativeArraySizeException`: When the size read is negative. This fails at Spark layer during buffer creation. - `IndexOutOfBoundsException`: When the data field isn't matched with the size. This fails at Netty later. ### Why are the changes needed? When the RPC messages are corrupted or the servers are under attack, Spark shows `IndexOutOfBoundsException` due to the failure from `Decoder`. Instead of `Exception`, we had better ignore the message with a directional warning message. ``` java.lang.IndexOutOfBoundsException: readerIndex(5) + length(602416) exceeds writerIndex(172): UnpooledUnsafeDirectByteBuf(ridx: 5, widx: 172, cap: 172/172) at io.netty.buffer.AbstractByteBuf.checkReadableBytes0(AbstractByteBuf.java:1477) at io.netty.buffer.AbstractByteBuf.checkReadableBytes(AbstractByteBuf.java:1463) at io.netty.buffer.UnpooledDirectByteBuf.readBytes(UnpooledDirectByteBuf.java:316) at io.netty.buffer.AbstractByteBuf.readBytes(AbstractByteBuf.java:904) at org.apache.spark.network.protocol.Encoders$Strings.decode(Encoders.java:45) at org.apache.spark.network.shuffle.protocol.UploadBlock.decode(UploadBlock.java:112) at org.apache.spark.network.shuffle.protocol.BlockTransferMessage$Decoder.fromByteBuffer(BlockTransferMessage.java:71) at org.apache.spark.network.netty.NettyBlockRpcServer.receive(NettyBlockRpcServer.scala:53) at org.apache.spark.network.server.TransportRequestHandler.processRpcRequest(TransportRequestHandler.java:161) at org.apache.spark.network.server.TransportRequestHandler.handle(TransportRequestHandler.java:109) at org.apache.spark.network.server.TransportChannelHandler.channelRead0(TransportChannelHandler.java:140) at org.apache.spark.network.server.TransportChannelHandler.channelRead0(TransportChannelHandler.java:53) ``` ### Does this PR introduce _any_ user-facing change? Yes, but this clarify the log messages from exceptions, `IndexOutOfBoundsException`. ### How was this patch tested? Pass the CIs with newly added test suite. Closes #36116 from dongjoon-hyun/SPARK-38830. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 8ac06474f8cfa8e5619f817aaeea29a77ec8a2a4) Signed-off-by: Dongjoon Hyun --- .../network/netty/NettyBlockRpcServer.scala | 18 +++++- .../netty/NettyBlockRpcServerSuite.scala | 59 +++++++++++++++++++ 2 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 core/src/test/scala/org/apache/spark/network/netty/NettyBlockRpcServerSuite.scala diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala index 81c878d17c695..f2a1fe49fcf37 100644 --- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala +++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockRpcServer.scala @@ -50,7 +50,23 @@ class NettyBlockRpcServer( client: TransportClient, rpcMessage: ByteBuffer, responseContext: RpcResponseCallback): Unit = { - val message = BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage) + val message = try { + BlockTransferMessage.Decoder.fromByteBuffer(rpcMessage) + } catch { + case e: IllegalArgumentException if e.getMessage.startsWith("Unknown message type") => + logWarning(s"This could be a corrupted RPC message (capacity: ${rpcMessage.capacity()}) " + + s"from ${client.getSocketAddress}. Please use `spark.authenticate.*` configurations " + + "in case of security incidents.") + throw e + + case _: IndexOutOfBoundsException | _: NegativeArraySizeException => + // Netty may throw non-'IOException's for corrupted buffers. In this case, + // we ignore the entire message with warnings because we cannot trust any contents. + logWarning(s"Ignored a corrupted RPC message (capacity: ${rpcMessage.capacity()}) " + + s"from ${client.getSocketAddress}. Please use `spark.authenticate.*` configurations " + + "in case of security incidents.") + return + } logTrace(s"Received request: $message") message match { diff --git a/core/src/test/scala/org/apache/spark/network/netty/NettyBlockRpcServerSuite.scala b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockRpcServerSuite.scala new file mode 100644 index 0000000000000..54e83e7bda50c --- /dev/null +++ b/core/src/test/scala/org/apache/spark/network/netty/NettyBlockRpcServerSuite.scala @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.network.netty + +import java.nio.ByteBuffer + +import org.mockito.Mockito.mock + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.network.client.TransportClient +import org.apache.spark.serializer.JavaSerializer + +class NettyBlockRpcServerSuite extends SparkFunSuite { + + test("SPARK-38830: Rethrow IllegalArgumentException due to `Unknown message type`") { + val serializer = new JavaSerializer(new SparkConf) + val server = new NettyBlockRpcServer("enhanced-rpc-server", serializer, null) + val bytes = Array[Byte](100.toByte) + val message = ByteBuffer.wrap(bytes) + val client = mock(classOf[TransportClient]) + val m = intercept[IllegalArgumentException] { + server.receive(client, message) + }.getMessage + assert(m.startsWith("Unknown message type: 100")) + } + + test("SPARK-38830: Warn and ignore NegativeArraySizeException due to the corruption") { + val serializer = new JavaSerializer(new SparkConf) + val server = new NettyBlockRpcServer("enhanced-rpc-server", serializer, null) + val bytes = Array[Byte](0.toByte, 0xFF.toByte, 0xFF.toByte, 0xFF.toByte, 0xFF.toByte) + val message = ByteBuffer.wrap(bytes) + val client = mock(classOf[TransportClient]) + server.receive(client, message) + } + + test("SPARK-38830: Warn and ignore IndexOutOfBoundsException due to the corruption") { + val serializer = new JavaSerializer(new SparkConf) + val server = new NettyBlockRpcServer("enhanced-rpc-server", serializer, null) + val bytes = Array[Byte](1.toByte) + val message = ByteBuffer.wrap(bytes) + val client = mock(classOf[TransportClient]) + server.receive(client, message) + } +} From 41798d9436ceecc1ebfe4866d150a4d40757045c Mon Sep 17 00:00:00 2001 From: itholic Date: Sun, 10 Apr 2022 18:19:25 -0700 Subject: [PATCH 117/535] [SPARK-38800][DOCS][PYTHON][3.3] Explicitly document the supported pandas version ### What changes were proposed in this pull request? This PR proposes to document the supported pandas version for pandas API on Spark. https://github.com/apache/spark/pull/36095 is corresponding PR for master branch. ### Why are the changes needed? Since the behavior of pandas is different per its version, it would be better explicitly documenting the supported pandas version so that users won't confuse. pandas API on Spark aims matching behavior to pandas 1.3. ### Does this PR introduce _any_ user-facing change? Yes, now the supported pandas version is mentioned to the PySpark API reference page. ### How was this patch tested? This existing doc build should cover Closes #36114 from itholic/SPARK-38800-3.3. Authored-by: itholic Signed-off-by: Dongjoon Hyun --- python/docs/source/reference/index.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/docs/source/reference/index.rst b/python/docs/source/reference/index.rst index 6d11e10df18c4..f023b5a8c9947 100644 --- a/python/docs/source/reference/index.rst +++ b/python/docs/source/reference/index.rst @@ -22,6 +22,8 @@ API Reference This page lists an overview of all public PySpark modules, classes, functions and methods. +Pandas API on Spark follows the API specifications of pandas 1.3. + .. toctree:: :maxdepth: 2 From f5561b1237321ae13cb6f9f986421344b911e2c0 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 11 Apr 2022 12:08:54 +0900 Subject: [PATCH 118/535] [SPARK-36681][CORE][TESTS][FOLLOW-UP] Tests only when Snappy native library is available in low Hadoop versions ### What changes were proposed in this pull request? This PR is a minor followup to only test when Snappy native library is available in low Hadoop versions. From Hadoop 3.3.1 with `HADOOP-17125`, the tests should pass but it fails in lower versions of Hadoop when Snappy native library is unavailable (see also https://github.com/apache/spark/pull/35784#issuecomment-1081290978). ### Why are the changes needed? To make the tests robust. ### Does this PR introduce _any_ user-facing change? Nope, this is test-only. ### How was this patch tested? Should monitor CI Closes #36136 from HyukjinKwon/SPARK-36681. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit bf19a5e1918bd2aec52a98428ccfe184102ea464) Signed-off-by: Hyukjin Kwon --- .../test/scala/org/apache/spark/FileSuite.scala | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala index ac7670014eb9d..97795c55c82ff 100644 --- a/core/src/test/scala/org/apache/spark/FileSuite.scala +++ b/core/src/test/scala/org/apache/spark/FileSuite.scala @@ -136,10 +136,18 @@ class FileSuite extends SparkFunSuite with LocalSparkContext { } // Hadoop "gzip" and "zstd" codecs require native library installed for sequence files - val codecs = Seq((new DefaultCodec(), "default"), (new BZip2Codec(), "bzip2"), - (new SnappyCodec(), "snappy")) ++ { - if (VersionUtils.isHadoop3) Seq((new Lz4Codec(), "lz4")) else Seq() + private val codecs = Seq((new DefaultCodec(), "default"), (new BZip2Codec(), "bzip2")) ++ { + scala.util.Try { + // See HADOOP-17125. Hadoop lower than 3.3.1 can throw an exception when its native + // library for Snappy is unavailable. Here it calls `SnappyCodec.getCompressorType` + // to indirectly test if the Snappy native library is available in lower Hadoop versions. + new SnappyCodec().getCompressorType + (new SnappyCodec(), "snappy") + }.toOption + } ++ { + if (VersionUtils.isHadoop3) Seq((new Lz4Codec(), "lz4")) else Seq.empty } + codecs.foreach { case (codec, codecName) => runSequenceFileCodecTest(codec, codecName) } From 1617eaded434069a38cd26cb1335d3fea2501bb0 Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Sun, 10 Apr 2022 20:36:58 -0700 Subject: [PATCH 119/535] [SPARK-38391][SPARK-38768][SQL][FOLLOWUP] Add comments for `pushLimit` and `pushTopN` of `PushDownUtils` ### What changes were proposed in this pull request? `pushLimit` and `pushTopN` of `PushDownUtils` returns tuple of boolean. It will be good to explain what the boolean value represents. ### Why are the changes needed? Make DS V2 API more friendly to developers. ### Does this PR introduce _any_ user-facing change? 'No'. Just update comments. ### How was this patch tested? N/A Closes #36092 from beliefer/SPARK-38391_SPARK-38768_followup. Authored-by: Jiaan Geng Signed-off-by: Dongjoon Hyun (cherry picked from commit c4397cb3dee4f9fa16297c224da15475b2d5a297) Signed-off-by: Dongjoon Hyun --- .../sql/execution/datasources/v2/PushDownUtils.scala | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala index f72310b5d7afa..862189ed3afff 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala @@ -116,7 +116,11 @@ object PushDownUtils extends PredicateHelper { } /** - * Pushes down LIMIT to the data source Scan + * Pushes down LIMIT to the data source Scan. + * + * @return the tuple of Boolean. The first Boolean value represents whether to push down, and + * the second Boolean value represents whether to push down partially, which means + * Spark will keep the Limit and do it again. */ def pushLimit(scanBuilder: ScanBuilder, limit: Int): Boolean = { scanBuilder match { @@ -127,7 +131,11 @@ object PushDownUtils extends PredicateHelper { } /** - * Pushes down top N to the data source Scan + * Pushes down top N to the data source Scan. + * + * @return the tuple of Boolean. The first Boolean value represents whether to push down, and + * the second Boolean value represents whether to push down partially, which means + * Spark will keep the Sort and Limit and do it again. */ def pushTopN( scanBuilder: ScanBuilder, From df786adb6a1ba97f232ea0fec14a0db493b9ca3d Mon Sep 17 00:00:00 2001 From: Xinrong Meng Date: Mon, 11 Apr 2022 13:38:04 +0900 Subject: [PATCH 120/535] [SPARK-38837][PYTHON] Implement `dropna` parameter of `SeriesGroupBy.value_counts` Implement `dropna` parameter of `SeriesGroupBy.value_counts` to exclude counts of NaN. It also fixes the behavior of `self._dropna` in the context of `SeriesGroupBy.value_counts`. To reach parity with pandas. Yes. `dropna` parameter of `SeriesGroupBy.value_counts` is supported. ```py >>> psdf = ps.DataFrame( ... {"A": [np.nan, 2, 2, 3, 3, 3], "B": [1, 1, 2, 3, 3, np.nan]}, columns=["A", "B"] ... ) >>> psdf.groupby("A")["B"].value_counts(dropna=False).sort_index() A B 2.0 1.0 1 2.0 1 3.0 3.0 2 NaN 1 Name: B, dtype: int64 >>> psdf.groupby("A", dropna=False)["B"].value_counts(dropna=False).sort_index() # self.dropna=False A B 2.0 1.0 1 2.0 1 3.0 3.0 2 NaN 1 NaN 1.0 1 Name: B, dtype: int64 >>> psdf.groupby("A")["B"].value_counts(dropna=True).sort_index() A B 2.0 1.0 1 2.0 1 3.0 3.0 2 Name: B, dtype: int64 ``` Unit tests. Closes #36093 from xinrong-databricks/SeriesGroupBy.value_counts. Authored-by: Xinrong Meng Signed-off-by: Hyukjin Kwon (cherry picked from commit 2f122ba6d13ea26411fa4bf3e636ced449a8a205) Signed-off-by: Hyukjin Kwon --- python/pyspark/pandas/groupby.py | 45 +++++++++++++++------ python/pyspark/pandas/tests/test_groupby.py | 38 +++++++++++++++-- 2 files changed, 68 insertions(+), 15 deletions(-) diff --git a/python/pyspark/pandas/groupby.py b/python/pyspark/pandas/groupby.py index addb53d8cd5c1..6ef698015dd79 100644 --- a/python/pyspark/pandas/groupby.py +++ b/python/pyspark/pandas/groupby.py @@ -3205,23 +3205,35 @@ def value_counts( Examples -------- >>> df = ps.DataFrame({'A': [1, 2, 2, 3, 3, 3], - ... 'B': [1, 1, 2, 3, 3, 3]}, + ... 'B': [1, 1, 2, 3, 3, np.nan]}, ... columns=['A', 'B']) >>> df - A B - 0 1 1 - 1 2 1 - 2 2 2 - 3 3 3 - 4 3 3 - 5 3 3 + A B + 0 1 1.0 + 1 2 1.0 + 2 2 2.0 + 3 3 3.0 + 4 3 3.0 + 5 3 NaN >>> df.groupby('A')['B'].value_counts().sort_index() # doctest: +NORMALIZE_WHITESPACE A B - 1 1 1 - 2 1 1 - 2 1 - 3 3 3 + 1 1.0 1 + 2 1.0 1 + 2.0 1 + 3 3.0 2 + Name: B, dtype: int64 + + Don't include counts of NaN when dropna is False. + + >>> df.groupby('A')['B'].value_counts( + ... dropna=False).sort_index() # doctest: +NORMALIZE_WHITESPACE + A B + 1 1.0 1 + 2 1.0 1 + 2.0 1 + 3 3.0 2 + NaN 1 Name: B, dtype: int64 """ groupkeys = self._groupkeys + self._agg_columns @@ -3229,9 +3241,18 @@ def value_counts( groupkey_cols = [s.spark.column.alias(name) for s, name in zip(groupkeys, groupkey_names)] sdf = self._psdf._internal.spark_frame + agg_column = self._agg_columns[0]._internal.data_spark_column_names[0] sdf = sdf.groupby(*groupkey_cols).count().withColumnRenamed("count", agg_column) + if self._dropna: + _groupkey_column_names = groupkey_names[: len(self._groupkeys)] + sdf = sdf.dropna(subset=_groupkey_column_names) + + if dropna: + _agg_columns_names = groupkey_names[len(self._groupkeys) :] + sdf = sdf.dropna(subset=_agg_columns_names) + if sort: if ascending: sdf = sdf.orderBy(scol_for(sdf, agg_column).asc()) diff --git a/python/pyspark/pandas/tests/test_groupby.py b/python/pyspark/pandas/tests/test_groupby.py index ec17e0dba2799..8beedcabf54ac 100644 --- a/python/pyspark/pandas/tests/test_groupby.py +++ b/python/pyspark/pandas/tests/test_groupby.py @@ -1054,24 +1054,56 @@ def test_unique(self): self.assertTrue(sorted(act) == sorted(exp)) def test_value_counts(self): - pdf = pd.DataFrame({"A": [1, 2, 2, 3, 3, 3], "B": [1, 1, 2, 3, 3, 3]}, columns=["A", "B"]) + pdf = pd.DataFrame( + {"A": [np.nan, 2, 2, 3, 3, 3], "B": [1, 1, 2, 3, 3, np.nan]}, columns=["A", "B"] + ) psdf = ps.from_pandas(pdf) self.assert_eq( psdf.groupby("A")["B"].value_counts().sort_index(), pdf.groupby("A")["B"].value_counts().sort_index(), ) + self.assert_eq( + psdf.groupby("A")["B"].value_counts(dropna=False).sort_index(), + pdf.groupby("A")["B"].value_counts(dropna=False).sort_index(), + ) + self.assert_eq( + psdf.groupby("A", dropna=False)["B"].value_counts(dropna=False).sort_index(), + pdf.groupby("A", dropna=False)["B"].value_counts(dropna=False).sort_index(), + # Returns are the same considering values and types, + # disable check_exact to pass the assert_eq + check_exact=False, + ) self.assert_eq( psdf.groupby("A")["B"].value_counts(sort=True, ascending=False).sort_index(), pdf.groupby("A")["B"].value_counts(sort=True, ascending=False).sort_index(), ) self.assert_eq( - psdf.groupby("A")["B"].value_counts(sort=True, ascending=True).sort_index(), - pdf.groupby("A")["B"].value_counts(sort=True, ascending=True).sort_index(), + psdf.groupby("A")["B"] + .value_counts(sort=True, ascending=False, dropna=False) + .sort_index(), + pdf.groupby("A")["B"] + .value_counts(sort=True, ascending=False, dropna=False) + .sort_index(), + ) + self.assert_eq( + psdf.groupby("A")["B"] + .value_counts(sort=True, ascending=True, dropna=False) + .sort_index(), + pdf.groupby("A")["B"] + .value_counts(sort=True, ascending=True, dropna=False) + .sort_index(), ) self.assert_eq( psdf.B.rename().groupby(psdf.A).value_counts().sort_index(), pdf.B.rename().groupby(pdf.A).value_counts().sort_index(), ) + self.assert_eq( + psdf.B.rename().groupby(psdf.A, dropna=False).value_counts().sort_index(), + pdf.B.rename().groupby(pdf.A, dropna=False).value_counts().sort_index(), + # Returns are the same considering values and types, + # disable check_exact to pass the assert_eq + check_exact=False, + ) self.assert_eq( psdf.B.groupby(psdf.A.rename()).value_counts().sort_index(), pdf.B.groupby(pdf.A.rename()).value_counts().sort_index(), From 5c3ef79544814473e49d356f36cf100aca9afe57 Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Mon, 11 Apr 2022 13:48:07 +0800 Subject: [PATCH 121/535] [SPARK-37960][SQL][FOLLOWUP] Make the testing CASE WHEN query more reasonable ### What changes were proposed in this pull request? Some testing CASE WHEN queries are not carefully written and do not make sense. In the future, the optimizer may get smarter and get rid of the CASE WHEN completely, and then we loose test coverage. This PR updates some CASE WHEN queries to make them more reasonable. ### Why are the changes needed? future-proof test coverage. ### Does this PR introduce _any_ user-facing change? 'No'. ### How was this patch tested? N/A Closes #36125 from beliefer/SPARK-37960_followup3. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit 4e118383f58d23d5515ce6b00d3935e3ac51fb03) Signed-off-by: Wenchen Fan --- .../scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index 6a0a55b77881e..23138b8899bda 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -889,10 +889,10 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel | COUNT(CASE WHEN SALARY >= 12000 OR SALARY < 9000 THEN SALARY ELSE 0 END), | COUNT(CASE WHEN SALARY >= 12000 OR NOT(SALARY >= 9000) THEN SALARY ELSE 0 END), | MAX(CASE WHEN NOT(SALARY > 10000) AND SALARY >= 8000 THEN SALARY ELSE 0 END), - | MAX(CASE WHEN NOT(SALARY > 10000) OR SALARY > 8000 THEN SALARY ELSE 0 END), + | MAX(CASE WHEN NOT(SALARY > 9000) OR SALARY > 10000 THEN SALARY ELSE 0 END), | MAX(CASE WHEN NOT(SALARY > 10000) AND NOT(SALARY < 8000) THEN SALARY ELSE 0 END), | MAX(CASE WHEN NOT(SALARY != 0) OR NOT(SALARY < 8000) THEN SALARY ELSE 0 END), - | MAX(CASE WHEN NOT(SALARY > 8000 AND SALARY > 8000) THEN 0 ELSE SALARY END), + | MAX(CASE WHEN NOT(SALARY > 8000 AND SALARY < 10000) THEN 0 ELSE SALARY END), | MIN(CASE WHEN NOT(SALARY > 8000 OR SALARY IS NULL) THEN SALARY ELSE 0 END), | SUM(CASE WHEN SALARY > 10000 THEN 2 WHEN SALARY > 8000 THEN 1 END), | AVG(CASE WHEN NOT(SALARY > 8000 OR SALARY IS NOT NULL) THEN SALARY ELSE 0 END) @@ -904,9 +904,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel " THEN SALARY ELSE 0.00 END), COUNT(CAS..., " + "PushedFilters: [], " + "PushedGroupByColumns: [DEPT], ") - checkAnswer(df, Seq(Row(1, 1, 1, 1, 1, 0d, 12000d, 0d, 12000d, 12000d, 0d, 2, 0d), - Row(2, 2, 2, 2, 2, 10000d, 10000d, 10000d, 10000d, 10000d, 0d, 2, 0d), - Row(2, 2, 2, 2, 2, 10000d, 12000d, 10000d, 12000d, 12000d, 0d, 3, 0d))) + checkAnswer(df, Seq(Row(1, 1, 1, 1, 1, 0d, 12000d, 0d, 12000d, 0d, 0d, 2, 0d), + Row(2, 2, 2, 2, 2, 10000d, 12000d, 10000d, 12000d, 0d, 0d, 3, 0d), + Row(2, 2, 2, 2, 2, 10000d, 9000d, 10000d, 10000d, 9000d, 0d, 2, 0d))) } test("scan with aggregate push-down: aggregate function with binary arithmetic") { From b3cd07b236f46e8c402b06820d6f3a25fe608593 Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Mon, 11 Apr 2022 13:50:57 +0800 Subject: [PATCH 122/535] [SPARK-38761][SQL] DS V2 supports push down misc non-aggregate functions ### What changes were proposed in this pull request? Currently, Spark have some misc non-aggregate functions of ANSI standard. Please refer https://github.com/apache/spark/blob/2f8613f22c0750c00cf1dcfb2f31c431d8dc1be7/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala#L362. These functions show below: `abs`, `coalesce`, `nullif`, `CASE WHEN` DS V2 should supports push down these misc non-aggregate functions. Because DS V2 already support push down `CASE WHEN`, so this PR no need do the job again. Because `nullif` extends `RuntimeReplaceable`, so this PR no need do the job too. ### Why are the changes needed? DS V2 supports push down misc non-aggregate functions ### Does this PR introduce _any_ user-facing change? 'No'. New feature. ### How was this patch tested? New tests. Closes #36039 from beliefer/SPARK-38761. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit 9ce4ba02d3f67116a4a9786af453d869596fb3ec) Signed-off-by: Wenchen Fan --- .../util/V2ExpressionSQLBuilder.java | 8 +++ .../catalyst/util/V2ExpressionBuilder.scala | 11 +++- .../apache/spark/sql/jdbc/JDBCV2Suite.scala | 50 ++++++++++--------- 3 files changed, 44 insertions(+), 25 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index c8d924db75aed..a7d1ed7f85e84 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -93,6 +93,10 @@ public String build(Expression expr) { return visitNot(build(e.children()[0])); case "~": return visitUnaryArithmetic(name, inputToSQL(e.children()[0])); + case "ABS": + case "COALESCE": + return visitSQLFunction(name, + Arrays.stream(e.children()).map(c -> build(c)).toArray(String[]::new)); case "CASE_WHEN": { List children = Arrays.stream(e.children()).map(c -> build(c)).collect(Collectors.toList()); @@ -210,6 +214,10 @@ protected String visitCaseWhen(String[] children) { return sb.toString(); } + protected String visitSQLFunction(String funcName, String[] inputs) { + return funcName + "(" + Arrays.stream(inputs).collect(Collectors.joining(", ")) + ")"; + } + protected String visitUnexpectedExpr(Expression expr) throws IllegalArgumentException { throw new IllegalArgumentException("Unexpected V2 expression: " + expr); } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala index 5fd01ac5636b1..37db499470aa3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.util -import org.apache.spark.sql.catalyst.expressions.{Add, And, BinaryComparison, BinaryOperator, BitwiseAnd, BitwiseNot, BitwiseOr, BitwiseXor, CaseWhen, Cast, Contains, Divide, EndsWith, EqualTo, Expression, In, InSet, IsNotNull, IsNull, Literal, Multiply, Not, Or, Predicate, Remainder, StartsWith, StringPredicate, Subtract, UnaryMinus} +import org.apache.spark.sql.catalyst.expressions.{Abs, Add, And, BinaryComparison, BinaryOperator, BitwiseAnd, BitwiseNot, BitwiseOr, BitwiseXor, CaseWhen, Cast, Coalesce, Contains, Divide, EndsWith, EqualTo, Expression, In, InSet, IsNotNull, IsNull, Literal, Multiply, Not, Or, Predicate, Remainder, StartsWith, StringPredicate, Subtract, UnaryMinus} import org.apache.spark.sql.connector.expressions.{Cast => V2Cast, Expression => V2Expression, FieldReference, GeneralScalarExpression, LiteralValue} import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse, AlwaysTrue, And => V2And, Not => V2Not, Or => V2Or, Predicate => V2Predicate} import org.apache.spark.sql.execution.datasources.PushableColumn @@ -95,6 +95,15 @@ class V2ExpressionBuilder( } case Cast(child, dataType, _, true) => generateExpression(child).map(v => new V2Cast(v, dataType)) + case Abs(child, true) => generateExpression(child) + .map(v => new GeneralScalarExpression("ABS", Array[V2Expression](v))) + case Coalesce(children) => + val childrenExpressions = children.flatMap(generateExpression(_)) + if (children.length == childrenExpressions.length) { + Some(new GeneralScalarExpression("COALESCE", childrenExpressions.toArray[V2Expression])) + } else { + None + } case and: And => // AND expects predicate val l = generateExpression(and.left, true) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index 23138b8899bda..858781f2cde1b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Sort} import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanRelation, V1ScanWrapper} import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog -import org.apache.spark.sql.functions.{avg, count, count_distinct, lit, not, sum, udf, when} +import org.apache.spark.sql.functions.{abs, avg, coalesce, count, count_distinct, lit, not, sum, udf, when} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.util.Utils @@ -381,19 +381,13 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkAnswer(df, Seq(Row("fred", 1), Row("mary", 2))) val df2 = spark.table("h2.test.people").filter($"id" + Int.MaxValue > 1) - checkFiltersRemoved(df2, ansiMode) - - df2.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = if (ansiMode) { - "PushedFilters: [ID IS NOT NULL, (ID + 2147483647) > 1], " - } else { - "PushedFilters: [ID IS NOT NULL], " - } - checkKeywordsExistsInExplain(df2, expected_plan_fragment) + val expectedPlanFragment2 = if (ansiMode) { + "PushedFilters: [ID IS NOT NULL, (ID + 2147483647) > 1], " + } else { + "PushedFilters: [ID IS NOT NULL], " } - + checkPushedInfo(df2, expectedPlanFragment2) if (ansiMode) { val e = intercept[SparkException] { checkAnswer(df2, Seq.empty) @@ -422,22 +416,30 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel val df4 = spark.table("h2.test.employee") .filter(($"salary" > 1000d).and($"salary" < 12000d)) - checkFiltersRemoved(df4, ansiMode) - - df4.queryExecution.optimizedPlan.collect { - case _: DataSourceV2ScanRelation => - val expected_plan_fragment = if (ansiMode) { - "PushedFilters: [SALARY IS NOT NULL, " + - "CAST(SALARY AS double) > 1000.0, CAST(SALARY AS double) < 12000.0], " - } else { - "PushedFilters: [SALARY IS NOT NULL], " - } - checkKeywordsExistsInExplain(df4, expected_plan_fragment) + val expectedPlanFragment4 = if (ansiMode) { + "PushedFilters: [SALARY IS NOT NULL, " + + "CAST(SALARY AS double) > 1000.0, CAST(SALARY AS double) < 12000.0], " + } else { + "PushedFilters: [SALARY IS NOT NULL], " } - + checkPushedInfo(df4, expectedPlanFragment4) checkAnswer(df4, Seq(Row(1, "amy", 10000, 1000, true), Row(1, "cathy", 9000, 1200, false), Row(2, "david", 10000, 1300, true))) + + val df5 = spark.table("h2.test.employee") + .filter(abs($"dept" - 3) > 1) + .filter(coalesce($"salary", $"bonus") > 2000) + checkFiltersRemoved(df5, ansiMode) + val expectedPlanFragment5 = if (ansiMode) { + "PushedFilters: [DEPT IS NOT NULL, ABS(DEPT - 3) > 1, " + + "(COALESCE(CAST(SALARY AS double), BONUS)) > 2000.0]" + } else { + "PushedFilters: [DEPT IS NOT NULL]" + } + checkPushedInfo(df5, expectedPlanFragment5) + checkAnswer(df5, Seq(Row(1, "amy", 10000, 1000, true), + Row(1, "cathy", 9000, 1200, false), Row(6, "jen", 12000, 1200, true))) } } } From e54dc43e750be23062422ca096d1e8439178a1d1 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Mon, 11 Apr 2022 15:43:41 +0800 Subject: [PATCH 123/535] [SPARK-38565][SQL] Support Left Semi join in row level runtime filters ### What changes were proposed in this pull request? 1. Support Left Semi join in row level runtime filters. 2. Rename `spark.sql.optimizer.runtime.bloomFilter.applicationSideScanSizethreshold` to `spark.sql.optimizer.runtime.bloomFilter.applicationSideScanSizeThreshold`. ### Why are the changes needed? Improve query performance and make the code easier to maintain. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing UT. Closes #36131 from wangyum/SPARK-38565. Authored-by: Yuming Wang Signed-off-by: Yuming Wang (cherry picked from commit 073fd2ad5c16d193725954e76ce357e4a9d97449) Signed-off-by: Yuming Wang --- .../optimizer/InjectRuntimeFilter.scala | 29 ++++++------------- .../spark/sql/catalyst/optimizer/joins.scala | 10 +++++++ .../apache/spark/sql/internal/SQLConf.scala | 2 +- .../dynamicpruning/PartitionPruning.scala | 14 ++------- .../spark/sql/InjectRuntimeFilterSuite.scala | 13 +++++++++ 5 files changed, 35 insertions(+), 33 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala index a69cda25ef4f9..134292ae30da1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala @@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, BloomFilterAggregate, Complete} import org.apache.spark.sql.catalyst.planning.{ExtractEquiJoinKeys, PhysicalOperation} -import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.TreePattern.{INVOKE, JSON_TO_STRUCT, LIKE_FAMLIY, PYTHON_UDF, REGEXP_EXTRACT_FAMILY, REGEXP_REPLACE, SCALA_UDF} @@ -132,16 +131,6 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with PredicateHelper with J REGEXP_EXTRACT_FAMILY, REGEXP_REPLACE) } - private def canFilterLeft(joinType: JoinType): Boolean = joinType match { - case Inner | RightOuter => true - case _ => false - } - - private def canFilterRight(joinType: JoinType): Boolean = joinType match { - case Inner | LeftOuter => true - case _ => false - } - private def isProbablyShuffleJoin(left: LogicalPlan, right: LogicalPlan, hint: JoinHint): Boolean = { !hintToBroadcastLeft(hint) && !hintToBroadcastRight(hint) && @@ -149,11 +138,11 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with PredicateHelper with J } private def probablyHasShuffle(plan: LogicalPlan): Boolean = { - plan.collectFirst { - case j@Join(left, right, _, _, hint) - if isProbablyShuffleJoin(left, right, hint) => j - case a: Aggregate => a - }.nonEmpty + plan.exists { + case Join(left, right, _, _, hint) => isProbablyShuffleJoin(left, right, hint) + case _: Aggregate => true + case _ => false + } } // Returns the max scan byte size in the subtree rooted at `filterApplicationSide`. @@ -235,7 +224,7 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with PredicateHelper with J } private def findBloomFilterWithExp(plan: LogicalPlan, key: Expression): Boolean = { - plan.find { + plan.exists { case Filter(condition, _) => splitConjunctivePredicates(condition).exists { case BloomFilterMightContain(_, XxHash64(Seq(valueExpression), _)) @@ -243,7 +232,7 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with PredicateHelper with J case _ => false } case _ => false - }.isDefined + } } def hasInSubquery(left: LogicalPlan, right: LogicalPlan, leftKey: Expression, @@ -277,11 +266,11 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with PredicateHelper with J isSimpleExpression(l) && isSimpleExpression(r)) { val oldLeft = newLeft val oldRight = newRight - if (canFilterLeft(joinType) && filteringHasBenefit(left, right, l, hint)) { + if (canPruneLeft(joinType) && filteringHasBenefit(left, right, l, hint)) { newLeft = injectFilter(l, newLeft, r, right) } // Did we actually inject on the left? If not, try on the right - if (newLeft.fastEquals(oldLeft) && canFilterRight(joinType) && + if (newLeft.fastEquals(oldLeft) && canPruneRight(joinType) && filteringHasBenefit(right, left, r, hint)) { newRight = injectFilter(r, newRight, l, left) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala index 6d683a7a11384..45d8c54ea195f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala @@ -347,6 +347,16 @@ trait JoinSelectionHelper { join.hint, hintOnly = false, conf).isDefined } + def canPruneLeft(joinType: JoinType): Boolean = joinType match { + case Inner | LeftSemi | RightOuter => true + case _ => false + } + + def canPruneRight(joinType: JoinType): Boolean = joinType match { + case Inner | LeftSemi | LeftOuter => true + case _ => false + } + def hintToBroadcastLeft(hint: JoinHint): Boolean = { hint.leftHint.exists(_.strategy.contains(BROADCAST)) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index c4ffc844135d4..365a9a378cb68 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -375,7 +375,7 @@ object SQLConf { .createWithDefaultString("10MB") val RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD = - buildConf("spark.sql.optimizer.runtime.bloomFilter.applicationSideScanSizethreshold") + buildConf("spark.sql.optimizer.runtime.bloomFilter.applicationSideScanSizeThreshold") .doc("Byte size threshold of the Bloom filter application side plan's aggregated scan " + "size. Aggregated scan byte size of the Bloom filter application side needs to be over " + "this value to inject a bloom filter.") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala index 114d58c739e29..402c59bc3de5f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala @@ -19,8 +19,8 @@ package org.apache.spark.sql.execution.dynamicpruning import org.apache.spark.sql.catalyst.catalog.HiveTableRelation import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.optimizer.JoinSelectionHelper import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys -import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.read.SupportsRuntimeFiltering @@ -49,7 +49,7 @@ import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation * subquery query twice, we keep the duplicated subquery * (3) otherwise, we drop the subquery. */ -object PartitionPruning extends Rule[LogicalPlan] with PredicateHelper { +object PartitionPruning extends Rule[LogicalPlan] with PredicateHelper with JoinSelectionHelper { /** * Searches for a table scan that can be filtered for a given column in a logical plan. @@ -215,16 +215,6 @@ object PartitionPruning extends Rule[LogicalPlan] with PredicateHelper { !plan.isStreaming && hasSelectivePredicate(plan) } - private def canPruneLeft(joinType: JoinType): Boolean = joinType match { - case Inner | LeftSemi | RightOuter => true - case _ => false - } - - private def canPruneRight(joinType: JoinType): Boolean = joinType match { - case Inner | LeftSemi | LeftOuter => true - case _ => false - } - private def prune(plan: LogicalPlan): LogicalPlan = { plan transformUp { // skip this rule if there's already a DPP subquery on the LHS of a join diff --git a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala index 097a18cabd58c..726fa341b5c71 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala @@ -526,4 +526,17 @@ class InjectRuntimeFilterSuite extends QueryTest with SQLTestUtils with SharedSp "bf1.c1 = square(bf2.c2) where bf2.a2 = 62" ) } } + + test("Support Left Semi join in row level runtime filters") { + withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "32") { + assertRewroteWithBloomFilter( + """ + |SELECT * + |FROM bf1 LEFT SEMI + |JOIN (SELECT * FROM bf2 WHERE bf2.a2 = 62) tmp + |ON bf1.c1 = tmp.c2 + """.stripMargin) + } + } } From f4457e6310f6bd900d7634b279606436a6faf8fb Mon Sep 17 00:00:00 2001 From: zero323 Date: Mon, 11 Apr 2022 11:29:47 +0200 Subject: [PATCH 124/535] [SPARK-37402][PYTHON][MLLIB] Inline typehints for pyspark.mllib.clustering ### What changes were proposed in this pull request? This PR migrates type `pyspark.mllib.clustering` annotations from stub file to inline type hints. ### Why are the changes needed? Part of ongoing migration of type hints. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #35578 from zero323/SPARK-37402. Authored-by: zero323 Signed-off-by: zero323 (cherry picked from commit e71cf3907b9ff2036dfe45bc8fe939f20cca741b) Signed-off-by: zero323 --- python/pyspark/mllib/clustering.py | 247 ++++++++++++++++++---------- python/pyspark/mllib/clustering.pyi | 188 --------------------- 2 files changed, 163 insertions(+), 272 deletions(-) delete mode 100644 python/pyspark/mllib/clustering.pyi diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index a9e4fd82089cd..fd33887fd9e2b 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -19,7 +19,9 @@ import array as pyarray from math import exp, log from collections import namedtuple +from typing import Any, List, Optional, Tuple, TypeVar, Union, overload, TYPE_CHECKING +import numpy as np from numpy import array, random, tile from pyspark import SparkContext, since @@ -30,6 +32,12 @@ from pyspark.mllib.util import Saveable, Loader, inherit_doc, JavaLoader, JavaSaveable from pyspark.streaming import DStream +if TYPE_CHECKING: + from py4j.java_gateway import JavaObject + from pyspark.mllib._typing import VectorLike + +T = TypeVar("T") + __all__ = [ "BisectingKMeansModel", "BisectingKMeans", @@ -67,24 +75,32 @@ class BisectingKMeansModel(JavaModelWrapper): 0.0 """ - def __init__(self, java_model): + def __init__(self, java_model: "JavaObject"): super(BisectingKMeansModel, self).__init__(java_model) self.centers = [c.toArray() for c in self.call("clusterCenters")] - @property + @property # type: ignore[misc] @since("2.0.0") - def clusterCenters(self): + def clusterCenters(self) -> List[np.ndarray]: """Get the cluster centers, represented as a list of NumPy arrays.""" return self.centers - @property + @property # type: ignore[misc] @since("2.0.0") - def k(self): + def k(self) -> int: """Get the number of clusters""" return self.call("k") - def predict(self, x): + @overload + def predict(self, x: "VectorLike") -> int: + ... + + @overload + def predict(self, x: RDD["VectorLike"]) -> RDD[int]: + ... + + def predict(self, x: Union["VectorLike", RDD["VectorLike"]]) -> Union[int, RDD[int]]: """ Find the cluster that each of the points belongs to in this model. @@ -111,7 +127,7 @@ def predict(self, x): x = _convert_to_vector(x) return self.call("predict", x) - def computeCost(self, x): + def computeCost(self, x: Union["VectorLike", RDD["VectorLike"]]) -> float: """ Return the Bisecting K-means cost (sum of squared distances of points to their nearest center) for this model on the given @@ -159,7 +175,14 @@ class BisectingKMeans: """ @classmethod - def train(self, rdd, k=4, maxIterations=20, minDivisibleClusterSize=1.0, seed=-1888008604): + def train( + self, + rdd: RDD["VectorLike"], + k: int = 4, + maxIterations: int = 20, + minDivisibleClusterSize: float = 1.0, + seed: int = -1888008604, + ) -> BisectingKMeansModel: """ Runs the bisecting k-means algorithm return the model. @@ -197,7 +220,7 @@ def train(self, rdd, k=4, maxIterations=20, minDivisibleClusterSize=1.0, seed=-1 @inherit_doc -class KMeansModel(Saveable, Loader): +class KMeansModel(Saveable, Loader["KMeansModel"]): """A clustering model derived from the k-means method. @@ -255,22 +278,30 @@ class KMeansModel(Saveable, Loader): [array([-1000., -1000.]), array([ 5., 5.]), array([ 1000., 1000.])] """ - def __init__(self, centers): + def __init__(self, centers: List["VectorLike"]): self.centers = centers - @property + @property # type: ignore[misc] @since("1.0.0") - def clusterCenters(self): + def clusterCenters(self) -> List["VectorLike"]: """Get the cluster centers, represented as a list of NumPy arrays.""" return self.centers - @property + @property # type: ignore[misc] @since("1.4.0") - def k(self): + def k(self) -> int: """Total number of clusters.""" return len(self.centers) - def predict(self, x): + @overload + def predict(self, x: "VectorLike") -> int: + ... + + @overload + def predict(self, x: RDD["VectorLike"]) -> RDD[int]: + ... + + def predict(self, x: Union["VectorLike", RDD["VectorLike"]]) -> Union[int, RDD[int]]: """ Find the cluster that each of the points belongs to in this model. @@ -297,13 +328,13 @@ def predict(self, x): x = _convert_to_vector(x) for i in range(len(self.centers)): - distance = x.squared_distance(self.centers[i]) + distance = x.squared_distance(self.centers[i]) # type: ignore[attr-defined] if distance < best_distance: best = i best_distance = distance return best - def computeCost(self, rdd): + def computeCost(self, rdd: RDD["VectorLike"]) -> float: """ Return the K-means cost (sum of squared distances of points to their nearest center) for this model on the given @@ -324,20 +355,24 @@ def computeCost(self, rdd): return cost @since("1.4.0") - def save(self, sc, path): + def save(self, sc: SparkContext, path: str) -> None: """ Save this model to the given path. """ + assert sc._jvm is not None + java_centers = _py2java(sc, [_convert_to_vector(c) for c in self.centers]) java_model = sc._jvm.org.apache.spark.mllib.clustering.KMeansModel(java_centers) java_model.save(sc._jsc.sc(), path) @classmethod @since("1.4.0") - def load(cls, sc, path): + def load(cls, sc: SparkContext, path: str) -> "KMeansModel": """ Load a model from the given path. """ + assert sc._jvm is not None + java_model = sc._jvm.org.apache.spark.mllib.clustering.KMeansModel.load(sc._jsc.sc(), path) return KMeansModel(_java2py(sc, java_model.clusterCenters())) @@ -352,16 +387,16 @@ class KMeans: @classmethod def train( cls, - rdd, - k, - maxIterations=100, - initializationMode="k-means||", - seed=None, - initializationSteps=2, - epsilon=1e-4, - initialModel=None, - distanceMeasure="euclidean", - ): + rdd: RDD["VectorLike"], + k: int, + maxIterations: int = 100, + initializationMode: str = "k-means||", + seed: Optional[int] = None, + initializationSteps: int = 2, + epsilon: float = 1e-4, + initialModel: Optional[KMeansModel] = None, + distanceMeasure: str = "euclidean", + ) -> "KMeansModel": """ Train a k-means clustering model. @@ -428,7 +463,7 @@ def train( @inherit_doc -class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader): +class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader["GaussianMixtureModel"]): """ A clustering model derived from the Gaussian Mixture Model method. @@ -497,18 +532,18 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader): True """ - @property + @property # type: ignore[misc] @since("1.4.0") - def weights(self): + def weights(self) -> np.ndarray: """ Weights for each Gaussian distribution in the mixture, where weights[i] is the weight for Gaussian i, and weights.sum == 1. """ return array(self.call("weights")) - @property + @property # type: ignore[misc] @since("1.4.0") - def gaussians(self): + def gaussians(self) -> List[MultivariateGaussian]: """ Array of MultivariateGaussian where gaussians[i] represents the Multivariate Gaussian (Normal) Distribution for Gaussian i. @@ -517,13 +552,21 @@ def gaussians(self): MultivariateGaussian(gaussian[0], gaussian[1]) for gaussian in self.call("gaussians") ] - @property + @property # type: ignore[misc] @since("1.4.0") - def k(self): + def k(self) -> int: """Number of gaussians in mixture.""" return len(self.weights) - def predict(self, x): + @overload + def predict(self, x: "VectorLike") -> np.int64: + ... + + @overload + def predict(self, x: RDD["VectorLike"]) -> RDD[int]: + ... + + def predict(self, x: Union["VectorLike", RDD["VectorLike"]]) -> Union[np.int64, RDD[int]]: """ Find the cluster to which the point 'x' or each point in RDD 'x' has maximum membership in this model. @@ -548,7 +591,17 @@ def predict(self, x): z = self.predictSoft(x) return z.argmax() - def predictSoft(self, x): + @overload + def predictSoft(self, x: "VectorLike") -> np.ndarray: + ... + + @overload + def predictSoft(self, x: RDD["VectorLike"]) -> RDD[pyarray.array]: + ... + + def predictSoft( + self, x: Union["VectorLike", RDD["VectorLike"]] + ) -> Union[np.ndarray, RDD[pyarray.array]]: """ Find the membership of point 'x' or each point in RDD 'x' to all mixture components. @@ -579,7 +632,7 @@ def predictSoft(self, x): return self.call("predictSoft", _convert_to_vector(x)).toArray() @classmethod - def load(cls, sc, path): + def load(cls, sc: SparkContext, path: str) -> "GaussianMixtureModel": """Load the GaussianMixtureModel from disk. .. versionadded:: 1.5.0 @@ -590,6 +643,8 @@ def load(cls, sc, path): path : str Path to where the model is stored. """ + assert sc._jvm is not None + model = cls._load_java(sc, path) wrapper = sc._jvm.org.apache.spark.mllib.api.python.GaussianMixtureModelWrapper(model) return cls(wrapper) @@ -603,7 +658,15 @@ class GaussianMixture: """ @classmethod - def train(cls, rdd, k, convergenceTol=1e-3, maxIterations=100, seed=None, initialModel=None): + def train( + cls, + rdd: RDD["VectorLike"], + k: int, + convergenceTol: float = 1e-3, + maxIterations: int = 100, + seed: Optional[int] = None, + initialModel: Optional[GaussianMixtureModel] = None, + ) -> GaussianMixtureModel: """ Train a Gaussian Mixture clustering model. @@ -658,7 +721,9 @@ def train(cls, rdd, k, convergenceTol=1e-3, maxIterations=100, seed=None, initia return GaussianMixtureModel(java_model) -class PowerIterationClusteringModel(JavaModelWrapper, JavaSaveable, JavaLoader): +class PowerIterationClusteringModel( + JavaModelWrapper, JavaSaveable, JavaLoader["PowerIterationClusteringModel"] +): """ Model produced by :py:class:`PowerIterationClustering`. @@ -711,16 +776,16 @@ class PowerIterationClusteringModel(JavaModelWrapper, JavaSaveable, JavaLoader): ... pass """ - @property + @property # type: ignore[misc] @since("1.5.0") - def k(self): + def k(self) -> int: """ Returns the number of clusters. """ return self.call("k") @since("1.5.0") - def assignments(self): + def assignments(self) -> RDD["PowerIterationClustering.Assignment"]: """ Returns the cluster assignments of this model. """ @@ -728,10 +793,12 @@ def assignments(self): @classmethod @since("1.5.0") - def load(cls, sc, path): + def load(cls, sc: SparkContext, path: str) -> "PowerIterationClusteringModel": """ Load a model from the given path. """ + assert sc._jvm is not None + model = cls._load_java(sc, path) wrapper = sc._jvm.org.apache.spark.mllib.api.python.PowerIterationClusteringModelWrapper( model @@ -757,7 +824,13 @@ class PowerIterationClustering: """ @classmethod - def train(cls, rdd, k, maxIterations=100, initMode="random"): + def train( + cls, + rdd: RDD[Tuple[int, int, float]], + k: int, + maxIterations: int = 100, + initMode: str = "random", + ) -> PowerIterationClusteringModel: r""" Train PowerIterationClusteringModel @@ -867,18 +940,20 @@ class StreamingKMeansModel(KMeansModel): 1 """ - def __init__(self, clusterCenters, clusterWeights): + def __init__(self, clusterCenters: List["VectorLike"], clusterWeights: "VectorLike"): super(StreamingKMeansModel, self).__init__(centers=clusterCenters) - self._clusterWeights = list(clusterWeights) + self._clusterWeights = list(clusterWeights) # type: ignore[arg-type] - @property + @property # type: ignore[misc] @since("1.5.0") - def clusterWeights(self): + def clusterWeights(self) -> List[np.float64]: """Return the cluster weights.""" return self._clusterWeights @since("1.5.0") - def update(self, data, decayFactor, timeUnit): + def update( + self, data: RDD["VectorLike"], decayFactor: float, timeUnit: str + ) -> "StreamingKMeansModel": """Update the centroids, according to data .. versionadded:: 1.5.0 @@ -909,7 +984,7 @@ def update(self, data, decayFactor, timeUnit): decayFactor, timeUnit, ) - self.centers = array(updatedModel[0]) + self.centers = array(updatedModel[0]) # type: ignore[assignment] self._clusterWeights = list(updatedModel[1]) return self @@ -938,20 +1013,20 @@ class StreamingKMeans: (default: "batches") """ - def __init__(self, k=2, decayFactor=1.0, timeUnit="batches"): + def __init__(self, k: int = 2, decayFactor: float = 1.0, timeUnit: str = "batches"): self._k = k self._decayFactor = decayFactor if timeUnit not in ["batches", "points"]: raise ValueError("timeUnit should be 'batches' or 'points', got %s." % timeUnit) self._timeUnit = timeUnit - self._model = None + self._model: Optional[StreamingKMeansModel] = None @since("1.5.0") - def latestModel(self): + def latestModel(self) -> Optional[StreamingKMeansModel]: """Return the latest model""" return self._model - def _validate(self, dstream): + def _validate(self, dstream: Any) -> None: if self._model is None: raise ValueError( "Initial centers should be set either by setInitialCenters " "or setRandomCenters." @@ -962,19 +1037,19 @@ def _validate(self, dstream): ) @since("1.5.0") - def setK(self, k): + def setK(self, k: int) -> "StreamingKMeans": """Set number of clusters.""" self._k = k return self @since("1.5.0") - def setDecayFactor(self, decayFactor): + def setDecayFactor(self, decayFactor: float) -> "StreamingKMeans": """Set decay factor.""" self._decayFactor = decayFactor return self @since("1.5.0") - def setHalfLife(self, halfLife, timeUnit): + def setHalfLife(self, halfLife: float, timeUnit: str) -> "StreamingKMeans": """ Set number of batches after which the centroids of that particular batch has half the weightage. @@ -984,7 +1059,9 @@ def setHalfLife(self, halfLife, timeUnit): return self @since("1.5.0") - def setInitialCenters(self, centers, weights): + def setInitialCenters( + self, centers: List["VectorLike"], weights: List[float] + ) -> "StreamingKMeans": """ Set initial centers. Should be set before calling trainOn. """ @@ -992,7 +1069,7 @@ def setInitialCenters(self, centers, weights): return self @since("1.5.0") - def setRandomCenters(self, dim, weight, seed): + def setRandomCenters(self, dim: int, weight: float, seed: int) -> "StreamingKMeans": """ Set the initial centers to be random samples from a gaussian population with constant weights. @@ -1000,39 +1077,39 @@ def setRandomCenters(self, dim, weight, seed): rng = random.RandomState(seed) clusterCenters = rng.randn(self._k, dim) clusterWeights = tile(weight, self._k) - self._model = StreamingKMeansModel(clusterCenters, clusterWeights) + self._model = StreamingKMeansModel(clusterCenters, clusterWeights) # type: ignore[arg-type] return self @since("1.5.0") - def trainOn(self, dstream): + def trainOn(self, dstream: "DStream[VectorLike]") -> None: """Train the model on the incoming dstream.""" self._validate(dstream) - def update(rdd): - self._model.update(rdd, self._decayFactor, self._timeUnit) + def update(rdd: RDD["VectorLike"]) -> None: + self._model.update(rdd, self._decayFactor, self._timeUnit) # type: ignore[union-attr] dstream.foreachRDD(update) @since("1.5.0") - def predictOn(self, dstream): + def predictOn(self, dstream: "DStream[VectorLike]") -> "DStream[int]": """ Make predictions on a dstream. Returns a transformed dstream object """ self._validate(dstream) - return dstream.map(lambda x: self._model.predict(x)) + return dstream.map(lambda x: self._model.predict(x)) # type: ignore[union-attr] @since("1.5.0") - def predictOnValues(self, dstream): + def predictOnValues(self, dstream: "DStream[Tuple[T, VectorLike]]") -> "DStream[Tuple[T, int]]": """ Make predictions on a keyed dstream. Returns a transformed dstream object. """ self._validate(dstream) - return dstream.mapValues(lambda x: self._model.predict(x)) + return dstream.mapValues(lambda x: self._model.predict(x)) # type: ignore[union-attr] -class LDAModel(JavaModelWrapper, JavaSaveable, Loader): +class LDAModel(JavaModelWrapper, JavaSaveable, Loader["LDAModel"]): """A clustering model derived from the LDA method. @@ -1089,16 +1166,18 @@ class LDAModel(JavaModelWrapper, JavaSaveable, Loader): """ @since("1.5.0") - def topicsMatrix(self): + def topicsMatrix(self) -> np.ndarray: """Inferred topics, where each topic is represented by a distribution over terms.""" return self.call("topicsMatrix").toArray() @since("1.5.0") - def vocabSize(self): + def vocabSize(self) -> int: """Vocabulary size (number of terms or terms in the vocabulary)""" return self.call("vocabSize") - def describeTopics(self, maxTermsPerTopic=None): + def describeTopics( + self, maxTermsPerTopic: Optional[int] = None + ) -> List[Tuple[List[int], List[float]]]: """Return the topics described by weighted terms. .. versionadded:: 1.6.0 @@ -1124,7 +1203,7 @@ def describeTopics(self, maxTermsPerTopic=None): return topics @classmethod - def load(cls, sc, path): + def load(cls, sc: SparkContext, path: str) -> "LDAModel": """Load the LDAModel from disk. .. versionadded:: 1.5.0 @@ -1153,15 +1232,15 @@ class LDA: @classmethod def train( cls, - rdd, - k=10, - maxIterations=20, - docConcentration=-1.0, - topicConcentration=-1.0, - seed=None, - checkpointInterval=10, - optimizer="em", - ): + rdd: RDD[Tuple[int, "VectorLike"]], + k: int = 10, + maxIterations: int = 20, + docConcentration: float = -1.0, + topicConcentration: float = -1.0, + seed: Optional[int] = None, + checkpointInterval: int = 10, + optimizer: str = "em", + ) -> LDAModel: """Train a LDA model. .. versionadded:: 1.5.0 @@ -1215,7 +1294,7 @@ def train( return LDAModel(model) -def _test(): +def _test() -> None: import doctest import numpy import pyspark.mllib.clustering diff --git a/python/pyspark/mllib/clustering.pyi b/python/pyspark/mllib/clustering.pyi deleted file mode 100644 index 8a8401d35657f..0000000000000 --- a/python/pyspark/mllib/clustering.pyi +++ /dev/null @@ -1,188 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import overload -from typing import List, NamedTuple, Optional, Tuple, TypeVar - -import array - -from numpy import float64, int64, ndarray -from py4j.java_gateway import JavaObject - -from pyspark.mllib._typing import VectorLike -from pyspark.context import SparkContext -from pyspark.rdd import RDD -from pyspark.mllib.common import JavaModelWrapper -from pyspark.mllib.stat.distribution import MultivariateGaussian -from pyspark.mllib.util import Saveable, Loader, JavaLoader, JavaSaveable -from pyspark.streaming.dstream import DStream - -T = TypeVar("T") - -class BisectingKMeansModel(JavaModelWrapper): - centers: List[VectorLike] - def __init__(self, java_model: JavaObject) -> None: ... - @property - def clusterCenters(self) -> List[ndarray]: ... - @property - def k(self) -> int: ... - @overload - def predict(self, x: VectorLike) -> int: ... - @overload - def predict(self, x: RDD[VectorLike]) -> RDD[int]: ... - @overload - def computeCost(self, x: VectorLike) -> float: ... - @overload - def computeCost(self, x: RDD[VectorLike]) -> float: ... - -class BisectingKMeans: - @classmethod - def train( - self, - rdd: RDD[VectorLike], - k: int = ..., - maxIterations: int = ..., - minDivisibleClusterSize: float = ..., - seed: int = ..., - ) -> BisectingKMeansModel: ... - -class KMeansModel(Saveable, Loader[KMeansModel]): - centers: List[VectorLike] - def __init__(self, centers: List[VectorLike]) -> None: ... - @property - def clusterCenters(self) -> List[ndarray]: ... - @property - def k(self) -> int: ... - @overload - def predict(self, x: VectorLike) -> int: ... - @overload - def predict(self, x: RDD[VectorLike]) -> RDD[int]: ... - def computeCost(self, rdd: RDD[VectorLike]) -> float: ... - def save(self, sc: SparkContext, path: str) -> None: ... - @classmethod - def load(cls, sc: SparkContext, path: str) -> KMeansModel: ... - -class KMeans: - @classmethod - def train( - cls, - rdd: RDD[VectorLike], - k: int, - maxIterations: int = ..., - initializationMode: str = ..., - seed: Optional[int] = ..., - initializationSteps: int = ..., - epsilon: float = ..., - initialModel: Optional[KMeansModel] = ..., - ) -> KMeansModel: ... - -class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader[GaussianMixtureModel]): - @property - def weights(self) -> ndarray: ... - @property - def gaussians(self) -> List[MultivariateGaussian]: ... - @property - def k(self) -> int: ... - @overload - def predict(self, x: VectorLike) -> int64: ... - @overload - def predict(self, x: RDD[VectorLike]) -> RDD[int]: ... - @overload - def predictSoft(self, x: VectorLike) -> ndarray: ... - @overload - def predictSoft(self, x: RDD[VectorLike]) -> RDD[array.array]: ... - @classmethod - def load(cls, sc: SparkContext, path: str) -> GaussianMixtureModel: ... - -class GaussianMixture: - @classmethod - def train( - cls, - rdd: RDD[VectorLike], - k: int, - convergenceTol: float = ..., - maxIterations: int = ..., - seed: Optional[int] = ..., - initialModel: Optional[GaussianMixtureModel] = ..., - ) -> GaussianMixtureModel: ... - -class PowerIterationClusteringModel( - JavaModelWrapper, JavaSaveable, JavaLoader[PowerIterationClusteringModel] -): - @property - def k(self) -> int: ... - def assignments(self) -> RDD[PowerIterationClustering.Assignment]: ... - @classmethod - def load(cls, sc: SparkContext, path: str) -> PowerIterationClusteringModel: ... - -class PowerIterationClustering: - @classmethod - def train( - cls, - rdd: RDD[Tuple[int, int, float]], - k: int, - maxIterations: int = ..., - initMode: str = ..., - ) -> PowerIterationClusteringModel: ... - class Assignment(NamedTuple("Assignment", [("id", int), ("cluster", int)])): ... - -class StreamingKMeansModel(KMeansModel): - def __init__(self, clusterCenters: List[VectorLike], clusterWeights: VectorLike) -> None: ... - @property - def clusterWeights(self) -> List[float64]: ... - centers: List[VectorLike] - def update( - self, data: RDD[VectorLike], decayFactor: float, timeUnit: str - ) -> StreamingKMeansModel: ... - -class StreamingKMeans: - def __init__(self, k: int = ..., decayFactor: float = ..., timeUnit: str = ...) -> None: ... - def latestModel(self) -> StreamingKMeansModel: ... - def setK(self, k: int) -> StreamingKMeans: ... - def setDecayFactor(self, decayFactor: float) -> StreamingKMeans: ... - def setHalfLife(self, halfLife: float, timeUnit: str) -> StreamingKMeans: ... - def setInitialCenters( - self, centers: List[VectorLike], weights: List[float] - ) -> StreamingKMeans: ... - def setRandomCenters(self, dim: int, weight: float, seed: int) -> StreamingKMeans: ... - def trainOn(self, dstream: DStream[VectorLike]) -> None: ... - def predictOn(self, dstream: DStream[VectorLike]) -> DStream[int]: ... - def predictOnValues(self, dstream: DStream[Tuple[T, VectorLike]]) -> DStream[Tuple[T, int]]: ... - -class LDAModel(JavaModelWrapper, JavaSaveable, Loader[LDAModel]): - def topicsMatrix(self) -> ndarray: ... - def vocabSize(self) -> int: ... - def describeTopics( - self, maxTermsPerTopic: Optional[int] = ... - ) -> List[Tuple[List[int], List[float]]]: ... - @classmethod - def load(cls, sc: SparkContext, path: str) -> LDAModel: ... - -class LDA: - @classmethod - def train( - cls, - rdd: RDD[Tuple[int, VectorLike]], - k: int = ..., - maxIterations: int = ..., - docConcentration: float = ..., - topicConcentration: float = ..., - seed: Optional[int] = ..., - checkpointInterval: int = ..., - optimizer: str = ..., - ) -> LDAModel: ... From 83ae6f4084f1ebbff96df27f076a283515424c9c Mon Sep 17 00:00:00 2001 From: Daniel Tenedorio Date: Mon, 11 Apr 2022 17:40:39 +0800 Subject: [PATCH 125/535] [SPARK-38796][SQL] Implement the to_number and try_to_number SQL functions according to a new specification ### What changes were proposed in this pull request? This PR implements the `to_number` and `try_to_number` SQL function expressions according to new semantics described below. The former is equivalent to the latter except that it throws an exception instead of returning NULL for cases where the input string does not match the format string. ----------- # `try_to_number` function Returns `expr` cast to DECIMAL using formatting `fmt`, or `NULL` if `expr` is not a valid match for the given format. ## Syntax ``` try_to_number(expr, fmt) fmt { ' [ S ] [ L | $ ] [ 0 | 9 | G | , ] [...] [ . | D ] [ 0 | 9 ] [...] [ L | $ ] [ PR | MI | S ] ' } ``` ## Arguments - `expr`: A STRING expression representing a number. `expr` may include leading or trailing spaces. - `fmt`: An STRING literal, specifying the expected format of `expr`. ## Returns A DECIMAL(p, s) where `p` is the total number of digits (`0` or `9`) and `s` is the number of digits after the decimal point, or 0 if there is none. `fmt` can contain the following elements (case insensitive): - **`0`** or **`9`** Specifies an expected digit between `0` and `9`. A `0` to the left of the decimal points indicates that `expr` must have at least as many digits. A leading `9` indicates that `expr` may omit these digits. `expr` must not be larger than the number of digits to the left of the decimal point allowed by the format string. Digits to the right of the decimal point in the format string indicate the most digits that `expr` may have to the right of the decimal point. - **`.`** or **`D`** Specifies the position of the decimal point. `expr` does not need to include a decimal point. - **`,`** or **`G`** Specifies the position of the `,` grouping (thousands) separator. There must be a `0` or `9` to the left of the rightmost grouping separator. `expr` must match the grouping separator relevant for the size of the number. - **`L`** or **`$`** Specifies the location of the `$` currency sign. This character may only be specified once. - **`S`** Specifies the position of an option '+' or '-' sign. This character may only be specified once. - **`MI`** Specifies that `expr` has an optional `-` sign at the end, but no `+`. - **`PR`** Specifies that `expr` indicates a negative number with wrapping angled brackets (`<1>`). If `expr` contains any characters other than `0` through `9` and those permitted in `fmt` a `NULL` is returned. ## Examples ```sql -- The format expects: -- * an optional sign at the beginning, -- * followed by a dollar sign, -- * followed by a number between 3 and 6 digits long, -- * thousands separators, -- * up to two dights beyond the decimal point. > SELECT try_to_number('-$12,345.67', 'S$999,099.99'); -12345.67 -- The plus sign is optional, and so are fractional digits. > SELECT try_to_number('$345', 'S$999,099.99'); 345.00 -- The format requires at least three digits. > SELECT try_to_number('$45', 'S$999,099.99'); NULL -- The format requires at least three digits. > SELECT try_to_number('$045', 'S$999,099.99'); 45.00 -- Using brackets to denote negative values > SELECT try_to_number('<1234>', '999999PR'); -1234 ``` ### Why are the changes needed? The new semantics bring Spark into consistency with other engines and grant the user flexibility about how to handle cases where inputs do not match the format string. ### Does this PR introduce _any_ user-facing change? Yes. * The minus sign `-` is no longer supported in the format string (`S` replaces it). * `MI` and `PR` are new options in the format string. * `to_number` and `try_to_number` are separate functions with different error behavior. ### How was this patch tested? * New positive and negative unit tests cover both `to_number` and `try_to_number` functions. * Query tests update as needed according to the behavior changes. Closes #36066 from dtenedor/to-number. Authored-by: Daniel Tenedorio Signed-off-by: Wenchen Fan (cherry picked from commit 7a6b98965bf40993ea2e7837ded1c79813bec5d8) Signed-off-by: Wenchen Fan --- .../catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/numberFormatExpressions.scala | 109 +++- .../sql/catalyst/util/NumberFormatter.scala | 243 -------- .../sql/catalyst/util/ToNumberParser.scala | 579 ++++++++++++++++++ .../expressions/StringExpressionsSuite.scala | 297 +++++---- .../catalyst/util/NumberFormatterSuite.scala | 315 ---------- .../sql-functions/sql-expression-schema.md | 5 +- .../sql-tests/inputs/string-functions.sql | 7 +- .../results/ansi/string-functions.sql.out | 22 +- .../results/postgreSQL/numeric.sql.out | 18 +- .../results/string-functions.sql.out | 22 +- 11 files changed, 859 insertions(+), 759 deletions(-) delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberFormatter.scala create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala delete mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberFormatterSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index f6bd9891681fa..91bc34bef371d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -541,6 +541,7 @@ object FunctionRegistry { expression[FormatNumber]("format_number"), expression[FormatString]("format_string"), expression[ToNumber]("to_number"), + expression[TryToNumber]("try_to_number"), expression[GetJsonObject]("get_json_object"), expression[InitCap]("initcap"), expression[StringInstr]("instr"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala index e29a425eef199..88947c5c87ab5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala @@ -22,48 +22,58 @@ import java.util.Locale import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode} import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper -import org.apache.spark.sql.catalyst.util.NumberFormatter +import org.apache.spark.sql.catalyst.util.ToNumberParser import org.apache.spark.sql.types.{DataType, StringType} import org.apache.spark.unsafe.types.UTF8String /** - * A function that converts string to numeric. + * A function that converts strings to decimal values, returning an exception if the input string + * fails to match the format string. */ @ExpressionDescription( usage = """ - _FUNC_(strExpr, formatExpr) - Convert `strExpr` to a number based on the `formatExpr`. - The format can consist of the following characters: - '0' or '9': digit position - '.' or 'D': decimal point (only allowed once) - ',' or 'G': group (thousands) separator - '-' or 'S': sign anchored to number (only allowed once) - '$': value with a leading dollar sign (only allowed once) + _FUNC_(expr, fmt) - Convert string 'expr' to a number based on the string format 'fmt'. + Throws an exception if the conversion fails. The format can consist of the following + characters, case insensitive: + '0' or '9': Specifies an expected digit between 0 and 9. A sequence of 0 or 9 in the format + string matches a sequence of digits in the input string. If the 0/9 sequence starts with + 0 and is before the decimal point, it can only match a digit sequence of the same size. + Otherwise, if the sequence starts with 9 or is after the decimal poin, it can match a + digit sequence that has the same or smaller size. + '.' or 'D': Specifies the position of the decimal point (optional, only allowed once). + ',' or 'G': Specifies the position of the grouping (thousands) separator (,). There must be + one or more 0 or 9 to the left of the rightmost grouping separator. 'expr' must match the + grouping separator relevant for the size of the number. + '$': Specifies the location of the $ currency sign. This character may only be specified + once. + 'S': Specifies the position of a '-' or '+' sign (optional, only allowed once). + 'MI': Specifies that 'expr' has an optional '-' sign, but no '+' (only allowed once). + 'PR': Only allowed at the end of the format string; specifies that 'expr' indicates a + negative number with wrapping angled brackets. + ('<1>'). """, examples = """ Examples: > SELECT _FUNC_('454', '999'); 454 - > SELECT _FUNC_('454.00', '000D00'); + > SELECT _FUNC_('454.00', '000.00'); 454.00 - > SELECT _FUNC_('12,454', '99G999'); + > SELECT _FUNC_('12,454', '99,999'); 12454 > SELECT _FUNC_('$78.12', '$99.99'); 78.12 - > SELECT _FUNC_('12,454.8-', '99G999D9S'); + > SELECT _FUNC_('12,454.8-', '99,999.9S'); -12454.8 """, since = "3.3.0", group = "string_funcs") case class ToNumber(left: Expression, right: Expression) extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant { - private lazy val numberFormat = right.eval().toString.toUpperCase(Locale.ROOT) - private lazy val numberFormatter = new NumberFormatter(numberFormat) + private lazy val numberFormatter = new ToNumberParser(numberFormat, true) override def dataType: DataType = numberFormatter.parsedDecimalType - override def inputTypes: Seq[DataType] = Seq(StringType, StringType) - override def checkInputDataTypes(): TypeCheckResult = { val inputTypeCheck = super.checkInputDataTypes() if (inputTypeCheck.isSuccess) { @@ -76,17 +86,14 @@ case class ToNumber(left: Expression, right: Expression) inputTypeCheck } } - override def prettyName: String = "to_number" - override def nullSafeEval(string: Any, format: Any): Any = { val input = string.asInstanceOf[UTF8String] numberFormatter.parse(input) } - override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val builder = - ctx.addReferenceObj("builder", numberFormatter, classOf[NumberFormatter].getName) + ctx.addReferenceObj("builder", numberFormatter, classOf[ToNumberParser].getName) val eval = left.genCode(ctx) ev.copy(code = code""" @@ -98,8 +105,66 @@ case class ToNumber(left: Expression, right: Expression) |} """.stripMargin) } - override protected def withNewChildrenInternal( - newLeft: Expression, newRight: Expression): ToNumber = copy(left = newLeft, right = newRight) + newLeft: Expression, newRight: Expression): ToNumber = + copy(left = newLeft, right = newRight) } +/** + * A function that converts strings to decimal values, returning NULL if the input string fails to + * match the format string. + */ +@ExpressionDescription( + usage = """ + _FUNC_(expr, fmt) - Convert string 'expr' to a number based on the string format `fmt`. + Returns NULL if the string 'expr' does not match the expected format. The format follows the + same semantics as the to_number function. + """, + examples = """ + Examples: + > SELECT _FUNC_('454', '999'); + 454 + > SELECT _FUNC_('454.00', '000.00'); + 454.00 + > SELECT _FUNC_('12,454', '99,999'); + 12454 + > SELECT _FUNC_('$78.12', '$99.99'); + 78.12 + > SELECT _FUNC_('12,454.8-', '99,999.9S'); + -12454.8 + """, + since = "3.3.0", + group = "string_funcs") +case class TryToNumber(left: Expression, right: Expression) + extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant { + private lazy val numberFormat = right.eval().toString.toUpperCase(Locale.ROOT) + private lazy val numberFormatter = new ToNumberParser(numberFormat, false) + + override def dataType: DataType = numberFormatter.parsedDecimalType + override def inputTypes: Seq[DataType] = Seq(StringType, StringType) + override def nullable: Boolean = true + override def checkInputDataTypes(): TypeCheckResult = ToNumber(left, right).checkInputDataTypes() + override def prettyName: String = "try_to_number" + override def nullSafeEval(string: Any, format: Any): Any = { + val input = string.asInstanceOf[UTF8String] + numberFormatter.parse(input) + } + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val builder = + ctx.addReferenceObj("builder", numberFormatter, classOf[ToNumberParser].getName) + val eval = left.genCode(ctx) + ev.copy(code = + code""" + |${eval.code} + |boolean ${ev.isNull} = ${eval.isNull}; + |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; + |if (!${ev.isNull}) { + | ${ev.value} = $builder.parse(${eval.value}); + |} + """.stripMargin) + } + override protected def withNewChildrenInternal( + newLeft: Expression, + newRight: Expression): TryToNumber = + copy(left = newLeft, right = newRight) +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberFormatter.scala deleted file mode 100644 index a14aceb692291..0000000000000 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberFormatter.scala +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.util - -import java.math.BigDecimal -import java.text.{DecimalFormat, ParsePosition} -import java.util.Locale - -import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.analysis.TypeCheckResult -import org.apache.spark.sql.errors.QueryExecutionErrors -import org.apache.spark.sql.types.{Decimal, DecimalType} -import org.apache.spark.unsafe.types.UTF8String - -object NumberFormatter { - final val POINT_SIGN = '.' - final val POINT_LETTER = 'D' - final val COMMA_SIGN = ',' - final val COMMA_LETTER = 'G' - final val MINUS_SIGN = '-' - final val MINUS_LETTER = 'S' - final val DOLLAR_SIGN = '$' - final val NINE_DIGIT = '9' - final val ZERO_DIGIT = '0' - final val POUND_SIGN = '#' - - final val COMMA_SIGN_STRING = COMMA_SIGN.toString - final val POUND_SIGN_STRING = POUND_SIGN.toString - - final val SIGN_SET = Set(POINT_SIGN, COMMA_SIGN, MINUS_SIGN, DOLLAR_SIGN) -} - -class NumberFormatter(originNumberFormat: String, isParse: Boolean = true) extends Serializable { - import NumberFormatter._ - - protected val normalizedNumberFormat = normalize(originNumberFormat) - - private val transformedFormat = transform(normalizedNumberFormat) - - private lazy val numberDecimalFormat = { - val decimalFormat = new DecimalFormat(transformedFormat) - decimalFormat.setParseBigDecimal(true) - decimalFormat - } - - private lazy val (precision, scale) = { - val formatSplits = normalizedNumberFormat.split(POINT_SIGN).map(_.filterNot(isSign)) - assert(formatSplits.length <= 2) - val precision = formatSplits.map(_.length).sum - val scale = if (formatSplits.length == 2) formatSplits.last.length else 0 - (precision, scale) - } - - def parsedDecimalType: DecimalType = DecimalType(precision, scale) - - /** - * DecimalFormat provides '#' and '0' as placeholder of digit, ',' as grouping separator, - * '.' as decimal separator, '-' as minus, '$' as dollar, but not '9', 'G', 'D', 'S'. So we need - * replace them show below: - * 1. '9' -> '#' - * 2. 'G' -> ',' - * 3. 'D' -> '.' - * 4. 'S' -> '-' - * - * Note: When calling format, we must preserve the digits after decimal point, so the digits - * after decimal point should be replaced as '0'. For example: '999.9' will be normalized as - * '###.0' and '999.99' will be normalized as '###.00', so if the input is 454, the format - * output will be 454.0 and 454.00 respectively. - * - * @param format number format string - * @return normalized number format string - */ - private def normalize(format: String): String = { - var notFindDecimalPoint = true - val normalizedFormat = format.toUpperCase(Locale.ROOT).map { - case NINE_DIGIT if notFindDecimalPoint => POUND_SIGN - case ZERO_DIGIT if isParse && notFindDecimalPoint => POUND_SIGN - case NINE_DIGIT if !notFindDecimalPoint => ZERO_DIGIT - case COMMA_LETTER => COMMA_SIGN - case POINT_LETTER | POINT_SIGN => - notFindDecimalPoint = false - POINT_SIGN - case MINUS_LETTER => MINUS_SIGN - case other => other - } - // If the comma is at the beginning or end of number format, then DecimalFormat will be - // invalid. For example, "##,###," or ",###,###" for DecimalFormat is invalid, so we must use - // "##,###" or "###,###". - normalizedFormat.stripPrefix(COMMA_SIGN_STRING).stripSuffix(COMMA_SIGN_STRING) - } - - private def isSign(c: Char): Boolean = { - SIGN_SET.contains(c) - } - - private def transform(format: String): String = { - if (format.contains(MINUS_SIGN)) { - // For example: '#.######' represents a positive number, - // but '#.######;#.######-' represents a negative number. - val positiveFormatString = format.replaceAll("-", "") - s"$positiveFormatString;$format" - } else { - format - } - } - - def check(): TypeCheckResult = { - def invalidSignPosition(c: Char): Boolean = { - val signIndex = normalizedNumberFormat.indexOf(c) - signIndex > 0 && signIndex < normalizedNumberFormat.length - 1 - } - - def multipleSignInNumberFormatError(message: String): String = { - s"At most one $message is allowed in the number format: '$originNumberFormat'" - } - - def nonFistOrLastCharInNumberFormatError(message: String): String = { - s"$message must be the first or last char in the number format: '$originNumberFormat'" - } - - if (normalizedNumberFormat.length == 0) { - TypeCheckResult.TypeCheckFailure("Number format cannot be empty") - } else if (normalizedNumberFormat.count(_ == POINT_SIGN) > 1) { - TypeCheckResult.TypeCheckFailure( - multipleSignInNumberFormatError(s"'$POINT_LETTER' or '$POINT_SIGN'")) - } else if (normalizedNumberFormat.count(_ == MINUS_SIGN) > 1) { - TypeCheckResult.TypeCheckFailure( - multipleSignInNumberFormatError(s"'$MINUS_LETTER' or '$MINUS_SIGN'")) - } else if (normalizedNumberFormat.count(_ == DOLLAR_SIGN) > 1) { - TypeCheckResult.TypeCheckFailure(multipleSignInNumberFormatError(s"'$DOLLAR_SIGN'")) - } else if (invalidSignPosition(MINUS_SIGN)) { - TypeCheckResult.TypeCheckFailure( - nonFistOrLastCharInNumberFormatError(s"'$MINUS_LETTER' or '$MINUS_SIGN'")) - } else if (invalidSignPosition(DOLLAR_SIGN)) { - TypeCheckResult.TypeCheckFailure( - nonFistOrLastCharInNumberFormatError(s"'$DOLLAR_SIGN'")) - } else { - TypeCheckResult.TypeCheckSuccess - } - } - - /** - * Convert string to numeric based on the given number format. - * The format can consist of the following characters: - * '0' or '9': digit position - * '.' or 'D': decimal point (only allowed once) - * ',' or 'G': group (thousands) separator - * '-' or 'S': sign anchored to number (only allowed once) - * '$': value with a leading dollar sign (only allowed once) - * - * @param input the string need to converted - * @return decimal obtained from string parsing - */ - def parse(input: UTF8String): Decimal = { - val inputStr = input.toString.trim - val inputSplits = inputStr.split(POINT_SIGN) - assert(inputSplits.length <= 2) - if (inputSplits.length == 1) { - if (inputStr.filterNot(isSign).length > precision - scale) { - throw QueryExecutionErrors.invalidNumberFormatError(input, originNumberFormat) - } - } else if (inputSplits(0).filterNot(isSign).length > precision - scale || - inputSplits(1).filterNot(isSign).length > scale) { - throw QueryExecutionErrors.invalidNumberFormatError(input, originNumberFormat) - } - - try { - val number = numberDecimalFormat.parse(inputStr, new ParsePosition(0)) - assert(number.isInstanceOf[BigDecimal]) - Decimal(number.asInstanceOf[BigDecimal]) - } catch { - case _: IllegalArgumentException => - throw QueryExecutionErrors.invalidNumberFormatError(input, originNumberFormat) - } - } - - /** - * Convert numeric to string based on the given number format. - * The format can consist of the following characters: - * '9': digit position (can be dropped if insignificant) - * '0': digit position (will not be dropped, even if insignificant) - * '.' or 'D': decimal point (only allowed once) - * ',' or 'G': group (thousands) separator - * '-' or 'S': sign anchored to number (only allowed once) - * '$': value with a leading dollar sign (only allowed once) - * - * @param input the decimal to format - * @param numberFormat the format string - * @return The string after formatting input decimal - */ - def format(input: Decimal): String = { - val bigDecimal = input.toJavaBigDecimal - val decimalPlainStr = bigDecimal.toPlainString - if (decimalPlainStr.length > transformedFormat.length) { - transformedFormat.replaceAll("0", POUND_SIGN_STRING) - } else { - var resultStr = numberDecimalFormat.format(bigDecimal) - // Since we trimmed the comma at the beginning or end of number format in function - // `normalize`, we restore the comma to the result here. - // For example, if the specified number format is "99,999," or ",999,999", function - // `normalize` normalize them to "##,###" or "###,###". - // new DecimalFormat("##,###").parse(12454) and new DecimalFormat("###,###").parse(124546) - // will return "12,454" and "124,546" respectively. So we add ',' at the end and head of - // the result, then the final output are "12,454," or ",124,546". - if (originNumberFormat.last == COMMA_SIGN || originNumberFormat.last == COMMA_LETTER) { - resultStr = resultStr + COMMA_SIGN - } - if (originNumberFormat.charAt(0) == COMMA_SIGN || - originNumberFormat.charAt(0) == COMMA_LETTER) { - resultStr = COMMA_SIGN + resultStr - } - - resultStr - } - } -} - -// Visible for testing -class TestNumberFormatter(originNumberFormat: String, isParse: Boolean = true) - extends NumberFormatter(originNumberFormat, isParse) { - def checkWithException(): Unit = { - check() match { - case TypeCheckResult.TypeCheckFailure(message) => - throw new AnalysisException(message) - case _ => - } - } -} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala new file mode 100644 index 0000000000000..afba683efad94 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala @@ -0,0 +1,579 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.util + +import scala.collection.mutable + +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult +import org.apache.spark.sql.errors.QueryExecutionErrors +import org.apache.spark.sql.types.{Decimal, DecimalType} +import org.apache.spark.unsafe.types.UTF8String + +// This object contains some definitions of characters and tokens for the parser below. +object ToNumberParser { + final val ANGLE_BRACKET_CLOSE = '>' + final val ANGLE_BRACKET_OPEN = '<' + final val COMMA_LETTER = 'G' + final val COMMA_SIGN = ',' + final val DOLLAR_SIGN = '$' + final val MINUS_SIGN = '-' + final val NINE_DIGIT = '9' + final val OPTIONAL_PLUS_OR_MINUS_LETTER = 'S' + final val PLUS_SIGN = '+' + final val POINT_LETTER = 'D' + final val POINT_SIGN = '.' + final val ZERO_DIGIT = '0' + + final val OPTIONAL_MINUS_STRING = "MI" + final val WRAPPING_ANGLE_BRACKETS_TO_NEGATIVE_NUMBER = "PR" + + final val OPTIONAL_MINUS_STRING_START = 'M' + final val OPTIONAL_MINUS_STRING_END = 'I' + + final val WRAPPING_ANGLE_BRACKETS_TO_NEGATIVE_NUMBER_START = 'P' + final val WRAPPING_ANGLE_BRACKETS_TO_NEGATIVE_NUMBER_END = 'R' + + // This class represents one or more characters that we expect to be present in the input string + // based on the format string. + abstract class InputToken() + // Represents some number of digits (0-9). + abstract class Digits extends InputToken + // Represents exactly 'num' digits (0-9). + case class ExactlyAsManyDigits(num: Int) extends Digits + // Represents at most 'num' digits (0-9). + case class AtMostAsManyDigits(num: Int) extends Digits + // Represents one decimal point (.). + case class DecimalPoint() extends InputToken + // Represents one thousands separator (,). + case class ThousandsSeparator() extends InputToken + // Represents one or more groups of Digits (0-9) with ThousandsSeparators (,) between each group. + // The 'tokens' are the Digits and ThousandsSeparators in order; the 'digits' are just the Digits. + case class DigitGroups(tokens: Seq[InputToken], digits: Seq[Digits]) extends InputToken + // Represents one dollar sign ($). + case class DollarSign() extends InputToken + // Represents one optional plus sign (+) or minus sign (-). + case class OptionalPlusOrMinusSign() extends InputToken + // Represents one optional minus sign (-). + case class OptionalMinusSign() extends InputToken + // Represents one opening angle bracket (<). + case class OpeningAngleBracket() extends InputToken + // Represents one closing angle bracket (>). + case class ClosingAngleBracket() extends InputToken + // Represents any unrecognized character other than the above. + case class InvalidUnrecognizedCharacter(char: Char) extends InputToken +} + +/** + * This class represents a parser to implement the to_number or try_to_number SQL functions. + * + * It works by consuming an input string and a format string. This class accepts the format string + * as a field, and proceeds to iterate through the format string to generate a sequence of tokens + * (or throw an exception if the format string is invalid). Then when the function is called with an + * input string, this class steps through the sequence of tokens and compares them against the input + * string, returning a Spark Decimal object if they match (or throwing an exception otherwise). + * + * @param numberFormat the format string describing the expected inputs. + * @param errorOnFail true if evaluation should throw an exception if the input string fails to + * match the format string. Otherwise, returns NULL instead. + */ +class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Serializable { + import ToNumberParser._ + + // Consumes the format string and produce a sequence of input tokens expected from each input + // string. + private lazy val formatTokens: Seq[InputToken] = { + val tokens = mutable.Buffer.empty[InputToken] + var i = 0 + var reachedDecimalPoint = false + val len = numberFormat.length + while (i < len) { + val char: Char = numberFormat(i) + char match { + case ZERO_DIGIT => + val prevI = i + do { + i += 1 + } while (i < len && (numberFormat(i) == ZERO_DIGIT || numberFormat(i) == NINE_DIGIT)) + if (reachedDecimalPoint) { + tokens.append(AtMostAsManyDigits(i - prevI)) + } else { + tokens.append(ExactlyAsManyDigits(i - prevI)) + } + case NINE_DIGIT => + val prevI = i + do { + i += 1 + } while (i < len && (numberFormat(i) == ZERO_DIGIT || numberFormat(i) == NINE_DIGIT)) + tokens.append(AtMostAsManyDigits(i - prevI)) + case POINT_SIGN | POINT_LETTER => + tokens.append(DecimalPoint()) + reachedDecimalPoint = true + i += 1 + case COMMA_SIGN | COMMA_LETTER => + tokens.append(ThousandsSeparator()) + i += 1 + case DOLLAR_SIGN => + tokens.append(DollarSign()) + i += 1 + case OPTIONAL_PLUS_OR_MINUS_LETTER => + tokens.append(OptionalPlusOrMinusSign()) + i += 1 + case OPTIONAL_MINUS_STRING_START if i < len - 1 && + OPTIONAL_MINUS_STRING_END == numberFormat(i + 1) => + tokens.append(OptionalMinusSign()) + i += 2 + case WRAPPING_ANGLE_BRACKETS_TO_NEGATIVE_NUMBER_START if i < len - 1 && + WRAPPING_ANGLE_BRACKETS_TO_NEGATIVE_NUMBER_END == numberFormat(i + 1) => + tokens.prepend(OpeningAngleBracket()) + tokens.append(ClosingAngleBracket()) + i += 2 + case c: Char => + tokens.append(InvalidUnrecognizedCharacter(c)) + i += 1 + } + } + + // Combine each group of consecutive Digits and ThousandsSeparator tokens into a DigitGroups. + val groupedTokens = mutable.Buffer.empty[InputToken] + var currentGroup = mutable.Buffer.empty[InputToken] + var currentDigits = mutable.Buffer.empty[Digits] + for (token <- tokens) { + token match { + case digits: Digits => + currentGroup.append(token) + currentDigits.append(digits) + case _: ThousandsSeparator => + currentGroup.append(token) + case other => + if (currentGroup.nonEmpty) { + // We reverse the expected digit tokens in this new DigitGroups here, and we do the same + // for actual groups of 0-9 characters in each input string. In this way, we can safely + // ignore any leading optional groups of digits in the format string. + groupedTokens.append( + DigitGroups(currentGroup.reverse.toSeq, currentDigits.reverse.toSeq)) + currentGroup = mutable.Buffer.empty[InputToken] + currentDigits = mutable.Buffer.empty[Digits] + } + groupedTokens.append(other) + } + } + if (currentGroup.nonEmpty) { + groupedTokens.append(DigitGroups(currentGroup.reverse.toSeq, currentDigits.reverse.toSeq)) + } + groupedTokens.toSeq + } + + /** + * Precision is the number of digits in a number. Scale is the number of digits to the right of + * the decimal point in a number. For example, the number 123.45 has a precision of 5 and a + * scale of 2. + */ + private lazy val precision: Int = { + val lengths = formatTokens.map { + case DigitGroups(_, digits) => digits.map { + case ExactlyAsManyDigits(num) => num + case AtMostAsManyDigits(num) => num + }.sum + case _ => 0 + } + lengths.sum + } + + private lazy val scale: Int = { + val index = formatTokens.indexOf(DecimalPoint()) + if (index != -1) { + val suffix: Seq[InputToken] = formatTokens.drop(index) + val lengths: Seq[Int] = suffix.map { + case DigitGroups(_, digits) => digits.map { + case ExactlyAsManyDigits(num) => num + case AtMostAsManyDigits(num) => num + }.sum + case _ => 0 + } + lengths.sum + } else { + 0 + } + } + + // Holds all digits (0-9) before the decimal point (.) while parsing each input string. + private lazy val beforeDecimalPoint = new StringBuilder(precision) + // Holds all digits (0-9) after the decimal point (.) while parsing each input string. + private lazy val afterDecimalPoint = new StringBuilder(scale) + // Number of digits (0-9) in each group of the input string, split by thousands separators. + private lazy val parsedDigitGroupSizes = mutable.Buffer.empty[Int] + // Increments to count the number of digits (0-9) in the current group within the input string. + private var numDigitsInCurrentGroup: Int = 0 + + /** + * The result type of this parsing is a Decimal value with the appropriate precision and scale. + */ + def parsedDecimalType: DecimalType = DecimalType(precision, scale) + + /** + * Consumes the format string to check validity and computes an appropriate Decimal output type. + */ + def check(): TypeCheckResult = { + val validateResult: String = validateFormatString + if (validateResult.nonEmpty) { + TypeCheckResult.TypeCheckFailure(validateResult) + } else { + TypeCheckResult.TypeCheckSuccess + } + } + + /** + * This implementation of the [[check]] method returns any error, or the empty string on success. + */ + private def validateFormatString: String = { + def multipleSignInNumberFormatError(message: String) = { + s"At most one $message is allowed in the number format: '$numberFormat'" + } + + def notAtEndOfNumberFormatError(message: String) = { + s"$message must be at the end of the number format: '$numberFormat'" + } + + val inputTokenCounts = formatTokens.groupBy(identity).mapValues(_.size) + + val firstDollarSignIndex: Int = formatTokens.indexOf(DollarSign()) + val firstDigitIndex: Int = formatTokens.indexWhere { + case _: DigitGroups => true + case _ => false + } + val firstDecimalPointIndex: Int = formatTokens.indexOf(DecimalPoint()) + val digitGroupsBeforeDecimalPoint: Seq[DigitGroups] = + formatTokens.zipWithIndex.flatMap { + case (d@DigitGroups(_, _), i) + if firstDecimalPointIndex == -1 || + i < firstDecimalPointIndex => + Seq(d) + case _ => Seq() + } + val digitGroupsAfterDecimalPoint: Seq[DigitGroups] = + formatTokens.zipWithIndex.flatMap { + case (d@DigitGroups(_, _), i) + if firstDecimalPointIndex != -1 && + i > firstDecimalPointIndex => + Seq(d) + case _ => Seq() + } + + // Make sure the format string contains at least one token. + if (numberFormat.isEmpty) { + "The format string cannot be empty" + } + // Make sure the format string does not contain any unrecognized characters. + else if (formatTokens.exists(_.isInstanceOf[InvalidUnrecognizedCharacter])) { + val unrecognizedChars = + formatTokens.filter { + _.isInstanceOf[InvalidUnrecognizedCharacter] + }.map { + case i: InvalidUnrecognizedCharacter => i.char + } + val char: Char = unrecognizedChars.head + s"Encountered invalid character $char in the number format: '$numberFormat'" + } + // Make sure the format string contains at least one digit. + else if (!formatTokens.exists( + token => token.isInstanceOf[DigitGroups])) { + "The format string requires at least one number digit" + } + // Make sure the format string contains at most one decimal point. + else if (inputTokenCounts.getOrElse(DecimalPoint(), 0) > 1) { + multipleSignInNumberFormatError(s"'$POINT_LETTER' or '$POINT_SIGN'") + } + // Make sure the format string contains at most one plus or minus sign. + else if (inputTokenCounts.getOrElse(OptionalPlusOrMinusSign(), 0) > 1) { + multipleSignInNumberFormatError(s"'$OPTIONAL_PLUS_OR_MINUS_LETTER'") + } + // Make sure the format string contains at most one dollar sign. + else if (inputTokenCounts.getOrElse(DollarSign(), 0) > 1) { + multipleSignInNumberFormatError(s"'$DOLLAR_SIGN'") + } + // Make sure the format string contains at most one "MI" sequence. + else if (inputTokenCounts.getOrElse(OptionalMinusSign(), 0) > 1) { + multipleSignInNumberFormatError(s"'$OPTIONAL_MINUS_STRING'") + } + // Make sure the format string contains at most one closing angle bracket at the end. + else if (inputTokenCounts.getOrElse(ClosingAngleBracket(), 0) > 1 || + (inputTokenCounts.getOrElse(ClosingAngleBracket(), 0) == 1 && + formatTokens.last != ClosingAngleBracket())) { + notAtEndOfNumberFormatError(s"'$WRAPPING_ANGLE_BRACKETS_TO_NEGATIVE_NUMBER'") + } + // Make sure that any dollar sign in the format string occurs before any digits. + else if (firstDigitIndex < firstDollarSignIndex) { + s"Currency characters must appear before digits in the number format: '$numberFormat'" + } + // Make sure that any dollar sign in the format string occurs before any decimal point. + else if (firstDecimalPointIndex != -1 && + firstDecimalPointIndex < firstDollarSignIndex) { + "Currency characters must appear before any decimal point in the " + + s"number format: '$numberFormat'" + } + // Make sure that any thousands separators in the format string have digits before and after. + else if (digitGroupsBeforeDecimalPoint.exists { + case DigitGroups(tokens, _) => + tokens.zipWithIndex.exists({ + case (_: ThousandsSeparator, j: Int) if j == 0 || j == tokens.length - 1 => + true + case (_: ThousandsSeparator, j: Int) if tokens(j - 1).isInstanceOf[ThousandsSeparator] => + true + case (_: ThousandsSeparator, j: Int) if tokens(j + 1).isInstanceOf[ThousandsSeparator] => + true + case _ => + false + }) + }) { + "Thousands separators (,) must have digits in between them " + + s"in the number format: '$numberFormat'" + } + // Thousands separators are not allowed after the decimal point, if any. + else if (digitGroupsAfterDecimalPoint.exists { + case DigitGroups(tokens, digits) => + tokens.length > digits.length + }) { + "Thousands separators (,) may not appear after the decimal point " + + s"in the number format: '$numberFormat'" + } + // Validation of the format string finished successfully. + else { + "" + } + } + + /** + * Convert string to numeric based on the given number format. + * + * Iterates through the [[formatTokens]] obtained from processing the format string, while also + * keeping a parallel index into the input string. Throws an exception if the latter does not + * contain expected characters at any point. + * + * @param input the string that needs to converted + * @return the result Decimal value obtained from string parsing + */ + def parse(input: UTF8String): Decimal = { + val inputString = input.toString + val inputLength = inputString.length + // Build strings representing all digits before and after the decimal point, respectively. + beforeDecimalPoint.clear() + afterDecimalPoint.clear() + var reachedDecimalPoint = false + // Record whether the input specified a negative result, such as with a minus sign. + var negateResult = false + // This is an index into the characters of the provided input string. + var inputIndex = 0 + // This is an index into the tokens of the provided format string. + var formatIndex = 0 + + // Iterate through the tokens representing the provided format string, in order. + while (formatIndex < formatTokens.size) { + val token: InputToken = formatTokens(formatIndex) + token match { + case d: DigitGroups => + inputIndex = parseDigitGroups(d, inputString, inputIndex, reachedDecimalPoint).getOrElse( + return formatMatchFailure(input, numberFormat)) + case DecimalPoint() => + if (inputIndex < inputLength && + inputString(inputIndex) == POINT_SIGN) { + reachedDecimalPoint = true + inputIndex += 1 + } else { + // There is no decimal point. Consume the token and remain at the same character in the + // input string. + } + case DollarSign() => + if (inputIndex >= inputLength || + inputString(inputIndex) != DOLLAR_SIGN) { + // The input string did not contain an expected dollar sign. + return formatMatchFailure(input, numberFormat) + } + inputIndex += 1 + case OptionalPlusOrMinusSign() => + if (inputIndex < inputLength && + inputString(inputIndex) == PLUS_SIGN) { + inputIndex += 1 + } else if (inputIndex < inputLength && + inputString(inputIndex) == MINUS_SIGN) { + negateResult = !negateResult + inputIndex += 1 + } else { + // There is no plus or minus sign. Consume the token and remain at the same character in + // the input string. + } + case OptionalMinusSign() => + if (inputIndex < inputLength && + inputString(inputIndex) == MINUS_SIGN) { + negateResult = !negateResult + inputIndex += 1 + } else { + // There is no minus sign. Consume the token and remain at the same character in the + // input string. + } + case OpeningAngleBracket() => + if (inputIndex >= inputLength || + inputString(inputIndex) != ANGLE_BRACKET_OPEN) { + // The input string did not contain an expected opening angle bracket. + return formatMatchFailure(input, numberFormat) + } + inputIndex += 1 + case ClosingAngleBracket() => + if (inputIndex >= inputLength || + inputString(inputIndex) != ANGLE_BRACKET_CLOSE) { + // The input string did not contain an expected closing angle bracket. + return formatMatchFailure(input, numberFormat) + } + negateResult = !negateResult + inputIndex += 1 + } + formatIndex += 1 + } + if (inputIndex < inputLength) { + // If we have consumed all the tokens in the format string, but characters remain unconsumed + // in the input string, then the input string does not match the format string. + formatMatchFailure(input, numberFormat) + } else { + getDecimal(negateResult) + } + } + + /** + * Handle parsing the input string for the given expected DigitGroups from the format string. + * + * @param digitGroups the expected DigitGroups from the format string + * @param inputString the input string provided to the original parsing method + * @param startingInputIndex the input index within the input string to begin parsing here + * @param reachedDecimalPoint true if we have already parsed past the decimal point + * @return the new updated index within the input string to resume parsing, or None on error + */ + private def parseDigitGroups( + digitGroups: DigitGroups, + inputString: String, + startingInputIndex: Int, + reachedDecimalPoint: Boolean): Option[Int] = { + val expectedDigits: Seq[Digits] = digitGroups.digits + val inputLength = inputString.length + // Consume characters from the current input index forwards in the input string as long as + // they are digits (0-9) or the thousands separator (,). + numDigitsInCurrentGroup = 0 + var inputIndex = startingInputIndex + parsedDigitGroupSizes.clear() + + while (inputIndex < inputLength && + matchesDigitOrComma(inputString(inputIndex), reachedDecimalPoint)) { + inputIndex += 1 + } + if (inputIndex == inputLength) { + parsedDigitGroupSizes.prepend(numDigitsInCurrentGroup) + } + // Compare the number of digits encountered in each group (separated by thousands + // separators) with the expected numbers from the format string. + if (parsedDigitGroupSizes.length > expectedDigits.length) { + // The input contains more thousands separators than the format string. + return None + } + for (i <- 0 until expectedDigits.length) { + val expectedToken: Digits = expectedDigits(i) + val actualNumDigits: Int = + if (i < parsedDigitGroupSizes.length) { + parsedDigitGroupSizes(i) + } else { + 0 + } + expectedToken match { + case ExactlyAsManyDigits(expectedNumDigits) + if actualNumDigits != expectedNumDigits => + // The input contained more or fewer digits than required. + return None + case AtMostAsManyDigits(expectedMaxDigits) + if actualNumDigits > expectedMaxDigits => + // The input contained more digits than allowed. + return None + case _ => + } + } + Some(inputIndex) + } + + /** + * Returns true if the given character matches a digit (0-9) or a comma, updating fields of + * this class related to parsing during the process. + */ + private def matchesDigitOrComma(char: Char, reachedDecimalPoint: Boolean): Boolean = { + char match { + case _ if char.isWhitespace => + // Ignore whitespace and keep advancing through the input string. + true + case _ if char >= ZERO_DIGIT && char <= NINE_DIGIT => + numDigitsInCurrentGroup += 1 + // Append each group of input digits to the appropriate before/afterDecimalPoint + // string for later use in constructing the result Decimal value. + if (reachedDecimalPoint) { + afterDecimalPoint.append(char) + } else { + beforeDecimalPoint.append(char) + } + true + case COMMA_SIGN => + parsedDigitGroupSizes.prepend(numDigitsInCurrentGroup) + numDigitsInCurrentGroup = 0 + true + case _ => + parsedDigitGroupSizes.prepend(numDigitsInCurrentGroup) + false + } + } + + /** + * This method executes when the input string fails to match the format string. It throws an + * exception if indicated on construction of this class, or returns NULL otherwise. + */ + private def formatMatchFailure(input: UTF8String, originNumberFormat: String): Decimal = { + if (errorOnFail) { + throw QueryExecutionErrors.invalidNumberFormatError(input, originNumberFormat) + } + null + } + + /** + * Computes the final Decimal value from the beforeDecimalPoint and afterDecimalPoint fields of + * this class, as a result of parsing. + * + * @param negateResult whether the input string specified to negate the result + * @return a Decimal value with the value indicated by the input string and the precision and + * scale indicated by the format string + */ + private def getDecimal(negateResult: Boolean): Decimal = { + // Append zeros to the afterDecimalPoint until it comprises the same number of digits as the + // scale. This is necessary because we must determine the scale from the format string alone but + // each input string may include a variable number of digits after the decimal point. + val extraZeros = "0" * (scale - afterDecimalPoint.length) + val afterDecimalPadded = afterDecimalPoint.toString + extraZeros + val prefix = if (negateResult) "-" else "" + val suffix = if (afterDecimalPadded.nonEmpty) "." + afterDecimalPadded else "" + val numStr = s"$prefix$beforeDecimalPoint$suffix" + val javaDecimal = new java.math.BigDecimal(numStr) + if (precision <= Decimal.MAX_LONG_DIGITS) { + // Constructs a `Decimal` with an unscaled `Long` value if possible. + Decimal(javaDecimal.unscaledValue().longValue(), precision, scale) + } else { + // Otherwise, resorts to an unscaled `BigInteger` instead. + Decimal(javaDecimal, precision, scale) + } + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index b05142add0bab..4936bce7bf214 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.catalyst.expressions +import java.math.{BigDecimal => JavaBigDecimal} + import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.dsl.expressions._ @@ -892,169 +894,160 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { Literal.create(null, IntegerType), Literal.create(null, IntegerType)), null) } - test("ToNumber") { - ToNumber(Literal("454"), Literal("")).checkInputDataTypes() match { - case TypeCheckResult.TypeCheckFailure(msg) => - assert(msg.contains("Number format cannot be empty")) - } - ToNumber(Literal("454"), NonFoldableLiteral.create("999", StringType)) - .checkInputDataTypes() match { - case TypeCheckResult.TypeCheckFailure(msg) => - assert(msg.contains("Format expression must be foldable")) - } - - // Test '0' and '9' - - Seq("454", "054", "54", "450").foreach { input => - val invalidFormat1 = 0.until(input.length - 1).map(_ => '0').mkString - val invalidFormat2 = 0.until(input.length - 2).map(_ => '0').mkString - val invalidFormat3 = 0.until(input.length - 1).map(_ => '9').mkString - val invalidFormat4 = 0.until(input.length - 2).map(_ => '9').mkString - Seq(invalidFormat1, invalidFormat2, invalidFormat3, invalidFormat4) - .filter(_.nonEmpty).foreach { format => - checkExceptionInExpression[IllegalArgumentException]( - ToNumber(Literal(input), Literal(format)), - s"The input string '$input' does not match the given number format: '$format'") - } - - val format1 = 0.until(input.length).map(_ => '0').mkString - val format2 = 0.until(input.length).map(_ => '9').mkString - val format3 = 0.until(input.length).map(i => i % 2 * 9).mkString - val format4 = 0.until(input.length + 1).map(_ => '0').mkString - val format5 = 0.until(input.length + 1).map(_ => '9').mkString - val format6 = 0.until(input.length + 1).map(i => i % 2 * 9).mkString - Seq(format1, format2, format3, format4, format5, format6).foreach { format => - checkEvaluation(ToNumber(Literal(input), Literal(format)), Decimal(input)) - } - } - - // Test '.' and 'D' - checkExceptionInExpression[IllegalArgumentException]( - ToNumber(Literal("454.2"), Literal("999")), - "The input string '454.2' does not match the given number format: '999'") - Seq("999.9", "000.0", "99.99", "00.00", "0000.0", "9999.9", "00.000", "99.999") - .foreach { format => - checkExceptionInExpression[IllegalArgumentException]( - ToNumber(Literal("454.23"), Literal(format)), - s"The input string '454.23' does not match the given number format: '$format'") - val format2 = format.replace('.', 'D') - checkExceptionInExpression[IllegalArgumentException]( - ToNumber(Literal("454.23"), Literal(format2)), - s"The input string '454.23' does not match the given number format: '$format2'") - } - + test("ToNumber: positive tests") { Seq( - ("454.2", "000.0") -> Decimal(454.2), - ("454.23", "000.00") -> Decimal(454.23), - ("454.2", "000.00") -> Decimal(454.2), - ("454.0", "000.0") -> Decimal(454), - ("454.00", "000.00") -> Decimal(454), - (".4542", ".0000") -> Decimal(0.4542), - ("4542.", "0000.") -> Decimal(4542) - ).foreach { case ((str, format), expected) => - checkEvaluation(ToNumber(Literal(str), Literal(format)), expected) - val format2 = format.replace('.', 'D') - checkEvaluation(ToNumber(Literal(str), Literal(format2)), expected) - val format3 = format.replace('0', '9') - checkEvaluation(ToNumber(Literal(str), Literal(format3)), expected) - val format4 = format3.replace('.', 'D') - checkEvaluation(ToNumber(Literal(str), Literal(format4)), expected) + ("$345", "S$999,099.99") -> Decimal(345), + ("-$12,345.67", "S$999,099.99") -> Decimal(-12345.67), + ("454,123", "999,099") -> Decimal(454123), + ("$045", "S$999,099.99") -> Decimal(45), + ("454", "099") -> Decimal(454), + ("454.", "099.99") -> Decimal(454.0), + ("454.6", "099D99") -> Decimal(454.6), + ("454.67", "099.00") -> Decimal(454.67), + ("454", "000") -> Decimal(454), + (" 454 ", "9099") -> Decimal(454), + ("454", "099") -> Decimal(454), + ("454.67", "099.99") -> Decimal(454.67), + ("$454", "$999") -> Decimal(454), + (" 454,123 ", "999G099") -> Decimal(454123), + ("$454,123", "$999G099") -> Decimal(454123), + ("+$89,1,2,3,45.123", "S$999,0,0,0,999.00000") -> Decimal(8912345.123), + ("-454", "S999") -> Decimal(-454), + ("+454", "S999") -> Decimal(454), + ("<454>", "999PR") -> Decimal(-454), + ("454-", "999MI") -> Decimal(-454), + ("-$54", "MI$99") -> Decimal(-54), + ("$4-4", "$9MI9") -> Decimal(-44), + // The input string contains more digits than fit in a long integer. + ("123,456,789,123,456,789,123", "999,999,999,999,999,999,999") -> + Decimal(new JavaBigDecimal("123456789123456789123")) + ).foreach { case ((str: String, format: String), expected: Decimal) => + val toNumberExpr = ToNumber(Literal(str), Literal(format)) + assert(toNumberExpr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess) + checkEvaluation(toNumberExpr, expected) + + val tryToNumberExpr = TryToNumber(Literal(str), Literal(format)) + assert(tryToNumberExpr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess) + checkEvaluation(tryToNumberExpr, expected) } - Seq("999.9.9", "999D9D9", "999.9D9", "999D9.9").foreach { str => - ToNumber(Literal("454.3.2"), Literal(str)).checkInputDataTypes() match { - case TypeCheckResult.TypeCheckFailure(msg) => - assert(msg.contains(s"At most one 'D' or '.' is allowed in the number format: '$str'")) + for (i <- 0 to 2) { + for (j <- 3 to 5) { + for (k <- 6 to 9) { + Seq( + (s"$i$j$k", "999") -> Decimal(s"$i$j$k".toInt), + (s"$i$j$k", "S099.") -> Decimal(s"$i$j$k".toInt), + (s"$i$j.$k", "99.9") -> Decimal(s"$i$j.$k".toDouble), + (s"$i,$j,$k", "999,999,0") -> Decimal(s"$i$j$k".toInt) + ).foreach { case ((str: String, format: String), expected: Decimal) => + val toNumberExpr = ToNumber(Literal(str), Literal(format)) + assert(toNumberExpr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess) + checkEvaluation(toNumberExpr, expected) + + val tryToNumberExpr = TryToNumber(Literal(str), Literal(format)) + assert(tryToNumberExpr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess) + checkEvaluation(tryToNumberExpr, expected) + } + } } } + } - // Test ',' and 'G' - checkExceptionInExpression[IllegalArgumentException]( - ToNumber(Literal("123,456"), Literal("9G9")), - "The input string '123,456' does not match the given number format: '9G9'") - checkExceptionInExpression[IllegalArgumentException]( - ToNumber(Literal("123,456,789"), Literal("999,999")), - "The input string '123,456,789' does not match the given number format: '999,999'") - - Seq( - ("12,454", "99,999") -> Decimal(12454), - ("12,454", "99,999,999") -> Decimal(12454), - ("12,454,367", "99,999,999") -> Decimal(12454367), - ("12,454,", "99,999,") -> Decimal(12454), - (",454,367", ",999,999") -> Decimal(454367), - (",454,367", "999,999") -> Decimal(454367) - ).foreach { case ((str, format), expected) => - checkEvaluation(ToNumber(Literal(str), Literal(format)), expected) - val format2 = format.replace(',', 'G') - checkEvaluation(ToNumber(Literal(str), Literal(format2)), expected) - val format3 = format.replace('9', '0') - checkEvaluation(ToNumber(Literal(str), Literal(format3)), expected) - val format4 = format3.replace(',', 'G') - checkEvaluation(ToNumber(Literal(str), Literal(format4)), expected) - val format5 = s"${format}9" - checkEvaluation(ToNumber(Literal(str), Literal(format5)), expected) - val format6 = s"${format}0" - checkEvaluation(ToNumber(Literal(str), Literal(format6)), expected) - val format7 = s"9${format}9" - checkEvaluation(ToNumber(Literal(str), Literal(format7)), expected) - val format8 = s"0${format}0" - checkEvaluation(ToNumber(Literal(str), Literal(format8)), expected) - val format9 = s"${format3}9" - checkEvaluation(ToNumber(Literal(str), Literal(format9)), expected) - val format10 = s"${format3}0" - checkEvaluation(ToNumber(Literal(str), Literal(format10)), expected) - val format11 = s"9${format3}9" - checkEvaluation(ToNumber(Literal(str), Literal(format11)), expected) - val format12 = s"0${format3}0" - checkEvaluation(ToNumber(Literal(str), Literal(format12)), expected) - } - - // Test '$' + test("ToNumber: negative tests (the format string is invalid)") { + val invalidCharacter = "Encountered invalid character" + val thousandsSeparatorDigitsBetween = + "Thousands separators (,) must have digits in between them" + val mustBeAtEnd = "must be at the end of the number format" + val atMostOne = "At most one" Seq( - ("$78.12", "$99.99") -> Decimal(78.12), - ("$78.12", "$00.00") -> Decimal(78.12), - ("78.12$", "99.99$") -> Decimal(78.12), - ("78.12$", "00.00$") -> Decimal(78.12) - ).foreach { case ((str, format), expected) => - checkEvaluation(ToNumber(Literal(str), Literal(format)), expected) - } + // The format string must not be empty. + ("454", "") -> "The format string cannot be empty", + // Make sure the format string does not contain any unrecognized characters. + ("454", "999@") -> invalidCharacter, + ("454", "999M") -> invalidCharacter, + ("454", "999P") -> invalidCharacter, + // Make sure the format string contains at least one digit. + ("454", "$") -> "The format string requires at least one number digit", + // Make sure the format string contains at most one decimal point. + ("454", "99.99.99") -> atMostOne, + // Make sure the format string contains at most one dollar sign. + ("454", "$$99") -> atMostOne, + // Make sure the format string contains at most one minus sign at the end. + ("--$54", "SS$99") -> atMostOne, + ("-$54", "MI$99MI") -> atMostOne, + ("$4-4", "$9MI9MI") -> atMostOne, + // Make sure the format string contains at most one closing angle bracket at the end. + ("<$45>", "PR$99") -> mustBeAtEnd, + ("$4<4>", "$9PR9") -> mustBeAtEnd, + ("<<454>>", "999PRPR") -> mustBeAtEnd, + // Make sure that any dollar sign in the format string occurs before any digits. + ("4$54", "9$99") -> "Currency characters must appear before digits", + // Make sure that any dollar sign in the format string occurs before any decimal point. + (".$99", ".$99") -> "Currency characters must appear before any decimal point", + // Thousands separators must have digits in between them. + (",123", ",099") -> thousandsSeparatorDigitsBetween, + (",123,456", ",999,099") -> thousandsSeparatorDigitsBetween, + (",,345", "9,,09.99") -> thousandsSeparatorDigitsBetween, + (",,345", "9,99,.99") -> thousandsSeparatorDigitsBetween, + (",,345", "9,99,") -> thousandsSeparatorDigitsBetween, + (",,345", ",,999,099.99") -> thousandsSeparatorDigitsBetween, + // Thousands separators must not appear after the decimal point. + ("123.45,6", "099.99,9") -> "Thousands separators (,) may not appear after the decimal point" + ).foreach { case ((str: String, format: String), expectedErrMsg: String) => + val toNumberResult = ToNumber(Literal(str), Literal(format)).checkInputDataTypes() + assert(toNumberResult != TypeCheckResult.TypeCheckSuccess, + s"The format string should have been invalid: $format") + toNumberResult match { + case TypeCheckResult.TypeCheckFailure(message) => + assert(message.contains(expectedErrMsg)) + } - ToNumber(Literal("$78$.12"), Literal("$99$.99")).checkInputDataTypes() match { - case TypeCheckResult.TypeCheckFailure(msg) => - assert(msg.contains("At most one '$' is allowed in the number format: '$99$.99'")) - } - ToNumber(Literal("78$.12"), Literal("99$.99")).checkInputDataTypes() match { - case TypeCheckResult.TypeCheckFailure(msg) => - assert(msg.contains("'$' must be the first or last char in the number format: '99$.99'")) + val tryToNumberResult = TryToNumber(Literal(str), Literal(format)).checkInputDataTypes() + assert(tryToNumberResult != TypeCheckResult.TypeCheckSuccess, + s"The format string should have been invalid: $format") + tryToNumberResult match { + case TypeCheckResult.TypeCheckFailure(message) => + assert(message.contains(expectedErrMsg)) + } } + } - // Test '-' and 'S' + test("ToNumber: negative tests (the input string does not match the format string)") { Seq( - ("454-", "999-") -> Decimal(-454), - ("-454", "-999") -> Decimal(-454), - ("12,454.8-", "99G999D9-") -> Decimal(-12454.8), - ("00,454.8-", "99G999.9-") -> Decimal(-454.8) - ).foreach { case ((str, format), expected) => - checkEvaluation(ToNumber(Literal(str), Literal(format)), expected) - val format2 = format.replace('9', '0') - checkEvaluation(ToNumber(Literal(str), Literal(format2)), expected) - val format3 = format.replace('-', 'S') - checkEvaluation(ToNumber(Literal(str), Literal(format3)), expected) - val format4 = format2.replace('-', 'S') - checkEvaluation(ToNumber(Literal(str), Literal(format4)), expected) - } - - ToNumber(Literal("454.3--"), Literal("999D9SS")).checkInputDataTypes() match { - case TypeCheckResult.TypeCheckFailure(msg) => - assert(msg.contains("At most one 'S' or '-' is allowed in the number format: '999D9SS'")) - } - - Seq("9S99", "9-99").foreach { str => - ToNumber(Literal("-454"), Literal(str)).checkInputDataTypes() match { - case TypeCheckResult.TypeCheckFailure(msg) => - assert(msg.contains( - s"'S' or '-' must be the first or last char in the number format: '$str'")) - } + // The input contained more thousands separators than the format string. + ("45", "0,9"), + // The input contained more or fewer digits than required. + ("4", "09"), + ("454", "09"), + // The input contained more digits than allowed. + ("454", "99"), + // The input string did not contain an expected dollar sign. + ("45", "$99"), + // The input string did not contain an expected opening angle bracket. + ("45>", "99PR"), + // The input string did not contain an expected closing angle bracket. + ("<45", "99PR"), + // The trailing MI did not match against any trailing +. + ("454+", "999MI"), + // The trailing PR required exactly one leading < and trailing >. + ("<454", "999PR"), + ("454>", "999PR"), + ("<<454>>", "999PR"), + // At least three digits were required. + ("45", "S$999,099.99"), + // Groups of digits with leading zeros are not optional. + ("$345", "S$099,099.99"), + // The letter 'D' is allowed in the format string, but not the input string. + ("4D5", "0D9") + ).foreach { case (str: String, format: String) => + val toNumberExpr = ToNumber(Literal(str), Literal(format)) + assert(toNumberExpr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess) + checkExceptionInExpression[IllegalArgumentException]( + toNumberExpr, "does not match the given number format") + + val tryToNumberExpr = TryToNumber(Literal(str), Literal(format)) + assert(tryToNumberExpr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess) + checkEvaluation(tryToNumberExpr, null) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberFormatterSuite.scala deleted file mode 100644 index 81264f4e85080..0000000000000 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberFormatterSuite.scala +++ /dev/null @@ -1,315 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.util - -import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.types.Decimal -import org.apache.spark.unsafe.types.UTF8String - -class NumberFormatterSuite extends SparkFunSuite { - - private def invalidNumberFormat(numberFormat: String, errorMsg: String): Unit = { - val testNumberFormatter = new TestNumberFormatter(numberFormat) - val e = intercept[AnalysisException](testNumberFormatter.checkWithException()) - assert(e.getMessage.contains(errorMsg)) - } - - private def failParseWithInvalidInput( - input: UTF8String, numberFormat: String, errorMsg: String): Unit = { - val testNumberFormatter = new TestNumberFormatter(numberFormat) - val e = intercept[IllegalArgumentException](testNumberFormatter.parse(input)) - assert(e.getMessage.contains(errorMsg)) - } - - test("parse") { - invalidNumberFormat("", "Number format cannot be empty") - - // Test '9' and '0' - failParseWithInvalidInput(UTF8String.fromString("454"), "9", - "The input string '454' does not match the given number format: '9'") - failParseWithInvalidInput(UTF8String.fromString("454"), "99", - "The input string '454' does not match the given number format: '99'") - - Seq( - ("454", "999") -> Decimal(454), - ("054", "999") -> Decimal(54), - ("54", "999") -> Decimal(54), - ("404", "999") -> Decimal(404), - ("450", "999") -> Decimal(450), - ("454", "9999") -> Decimal(454), - ("054", "9999") -> Decimal(54), - ("404", "9999") -> Decimal(404), - ("450", "9999") -> Decimal(450) - ).foreach { case ((str, format), expected) => - val builder = new TestNumberFormatter(format) - builder.check() - assert(builder.parse(UTF8String.fromString(str)) === expected) - } - - failParseWithInvalidInput(UTF8String.fromString("454"), "0", - "The input string '454' does not match the given number format: '0'") - failParseWithInvalidInput(UTF8String.fromString("454"), "00", - "The input string '454' does not match the given number format: '00'") - - Seq( - ("454", "000") -> Decimal(454), - ("054", "000") -> Decimal(54), - ("54", "000") -> Decimal(54), - ("404", "000") -> Decimal(404), - ("450", "000") -> Decimal(450), - ("454", "0000") -> Decimal(454), - ("054", "0000") -> Decimal(54), - ("404", "0000") -> Decimal(404), - ("450", "0000") -> Decimal(450) - ).foreach { case ((str, format), expected) => - val builder = new TestNumberFormatter(format) - builder.check() - assert(builder.parse(UTF8String.fromString(str)) === expected) - } - - // Test '.' and 'D' - failParseWithInvalidInput(UTF8String.fromString("454.2"), "999", - "The input string '454.2' does not match the given number format: '999'") - failParseWithInvalidInput(UTF8String.fromString("454.23"), "999.9", - "The input string '454.23' does not match the given number format: '999.9'") - - Seq( - ("454.2", "999.9") -> Decimal(454.2), - ("454.2", "000.0") -> Decimal(454.2), - ("454.2", "999D9") -> Decimal(454.2), - ("454.2", "000D0") -> Decimal(454.2), - ("454.23", "999.99") -> Decimal(454.23), - ("454.23", "000.00") -> Decimal(454.23), - ("454.23", "999D99") -> Decimal(454.23), - ("454.23", "000D00") -> Decimal(454.23), - ("454.0", "999.9") -> Decimal(454), - ("454.0", "000.0") -> Decimal(454), - ("454.0", "999D9") -> Decimal(454), - ("454.0", "000D0") -> Decimal(454), - ("454.00", "999.99") -> Decimal(454), - ("454.00", "000.00") -> Decimal(454), - ("454.00", "999D99") -> Decimal(454), - ("454.00", "000D00") -> Decimal(454), - (".4542", ".9999") -> Decimal(0.4542), - (".4542", ".0000") -> Decimal(0.4542), - (".4542", "D9999") -> Decimal(0.4542), - (".4542", "D0000") -> Decimal(0.4542), - ("4542.", "9999.") -> Decimal(4542), - ("4542.", "0000.") -> Decimal(4542), - ("4542.", "9999D") -> Decimal(4542), - ("4542.", "0000D") -> Decimal(4542) - ).foreach { case ((str, format), expected) => - val builder = new TestNumberFormatter(format) - builder.check() - assert(builder.parse(UTF8String.fromString(str)) === expected) - } - - invalidNumberFormat( - "999.9.9", "At most one 'D' or '.' is allowed in the number format: '999.9.9'") - invalidNumberFormat( - "999D9D9", "At most one 'D' or '.' is allowed in the number format: '999D9D9'") - invalidNumberFormat( - "999.9D9", "At most one 'D' or '.' is allowed in the number format: '999.9D9'") - invalidNumberFormat( - "999D9.9", "At most one 'D' or '.' is allowed in the number format: '999D9.9'") - - // Test ',' and 'G' - Seq( - ("12,454", "99,999") -> Decimal(12454), - ("12,454", "00,000") -> Decimal(12454), - ("12,454", "99G999") -> Decimal(12454), - ("12,454", "00G000") -> Decimal(12454), - ("12,454,367", "99,999,999") -> Decimal(12454367), - ("12,454,367", "00,000,000") -> Decimal(12454367), - ("12,454,367", "99G999G999") -> Decimal(12454367), - ("12,454,367", "00G000G000") -> Decimal(12454367), - ("12,454,", "99,999,") -> Decimal(12454), - ("12,454,", "00,000,") -> Decimal(12454), - ("12,454,", "99G999G") -> Decimal(12454), - ("12,454,", "00G000G") -> Decimal(12454), - (",454,367", ",999,999") -> Decimal(454367), - (",454,367", ",000,000") -> Decimal(454367), - (",454,367", "G999G999") -> Decimal(454367), - (",454,367", "G000G000") -> Decimal(454367), - (",454,367", "999,999") -> Decimal(454367), - (",454,367", "000,000") -> Decimal(454367), - (",454,367", "999G999") -> Decimal(454367), - (",454,367", "000G000") -> Decimal(454367) - ).foreach { case ((str, format), expected) => - val builder = new TestNumberFormatter(format) - builder.check() - assert(builder.parse(UTF8String.fromString(str)) === expected) - } - - // Test '$' - Seq( - ("$78.12", "$99.99") -> Decimal(78.12), - ("$78.12", "$00.00") -> Decimal(78.12), - ("78.12$", "99.99$") -> Decimal(78.12), - ("78.12$", "00.00$") -> Decimal(78.12) - ).foreach { case ((str, format), expected) => - val builder = new TestNumberFormatter(format) - builder.check() - assert(builder.parse(UTF8String.fromString(str)) === expected) - } - - invalidNumberFormat( - "99$.99", "'$' must be the first or last char in the number format: '99$.99'") - invalidNumberFormat("$99.99$", "At most one '$' is allowed in the number format: '$99.99$'") - - // Test '-' and 'S' - Seq( - ("454-", "999-") -> Decimal(-454), - ("454-", "999S") -> Decimal(-454), - ("-454", "-999") -> Decimal(-454), - ("-454", "S999") -> Decimal(-454), - ("454-", "000-") -> Decimal(-454), - ("454-", "000S") -> Decimal(-454), - ("-454", "-000") -> Decimal(-454), - ("-454", "S000") -> Decimal(-454), - ("12,454.8-", "99G999D9S") -> Decimal(-12454.8), - ("00,454.8-", "99G999.9S") -> Decimal(-454.8) - ).foreach { case ((str, format), expected) => - val builder = new TestNumberFormatter(format) - builder.check() - assert(builder.parse(UTF8String.fromString(str)) === expected) - } - - invalidNumberFormat( - "9S99", "'S' or '-' must be the first or last char in the number format: '9S99'") - invalidNumberFormat( - "9-99", "'S' or '-' must be the first or last char in the number format: '9-99'") - invalidNumberFormat( - "999D9SS", "At most one 'S' or '-' is allowed in the number format: '999D9SS'") - } - - test("format") { - - // Test '9' and '0' - Seq( - (Decimal(454), "9") -> "#", - (Decimal(454), "99") -> "##", - (Decimal(454), "999") -> "454", - (Decimal(54), "999") -> "54", - (Decimal(404), "999") -> "404", - (Decimal(450), "999") -> "450", - (Decimal(454), "9999") -> "454", - (Decimal(54), "9999") -> "54", - (Decimal(404), "9999") -> "404", - (Decimal(450), "9999") -> "450", - (Decimal(454), "0") -> "#", - (Decimal(454), "00") -> "##", - (Decimal(454), "000") -> "454", - (Decimal(54), "000") -> "054", - (Decimal(404), "000") -> "404", - (Decimal(450), "000") -> "450", - (Decimal(454), "0000") -> "0454", - (Decimal(54), "0000") -> "0054", - (Decimal(404), "0000") -> "0404", - (Decimal(450), "0000") -> "0450" - ).foreach { case ((decimal, format), expected) => - val builder = new TestNumberFormatter(format, false) - builder.check() - assert(builder.format(decimal) === expected) - } - - // Test '.' and 'D' - Seq( - (Decimal(454.2), "999.9") -> "454.2", - (Decimal(454.2), "000.0") -> "454.2", - (Decimal(454.2), "999D9") -> "454.2", - (Decimal(454.2), "000D0") -> "454.2", - (Decimal(454), "999.9") -> "454.0", - (Decimal(454), "000.0") -> "454.0", - (Decimal(454), "999D9") -> "454.0", - (Decimal(454), "000D0") -> "454.0", - (Decimal(454), "999.99") -> "454.00", - (Decimal(454), "000.00") -> "454.00", - (Decimal(454), "999D99") -> "454.00", - (Decimal(454), "000D00") -> "454.00", - (Decimal(0.4542), ".9999") -> ".####", - (Decimal(0.4542), ".0000") -> ".####", - (Decimal(0.4542), "D9999") -> ".####", - (Decimal(0.4542), "D0000") -> ".####", - (Decimal(4542), "9999.") -> "4542.", - (Decimal(4542), "0000.") -> "4542.", - (Decimal(4542), "9999D") -> "4542.", - (Decimal(4542), "0000D") -> "4542." - ).foreach { case ((decimal, format), expected) => - val builder = new TestNumberFormatter(format, false) - builder.check() - assert(builder.format(decimal) === expected) - } - - // Test ',' and 'G' - Seq( - (Decimal(12454), "99,999") -> "12,454", - (Decimal(12454), "00,000") -> "12,454", - (Decimal(12454), "99G999") -> "12,454", - (Decimal(12454), "00G000") -> "12,454", - (Decimal(12454367), "99,999,999") -> "12,454,367", - (Decimal(12454367), "00,000,000") -> "12,454,367", - (Decimal(12454367), "99G999G999") -> "12,454,367", - (Decimal(12454367), "00G000G000") -> "12,454,367", - (Decimal(12454), "99,999,") -> "12,454,", - (Decimal(12454), "00,000,") -> "12,454,", - (Decimal(12454), "99G999G") -> "12,454,", - (Decimal(12454), "00G000G") -> "12,454,", - (Decimal(454367), ",999,999") -> ",454,367", - (Decimal(454367), ",000,000") -> ",454,367", - (Decimal(454367), "G999G999") -> ",454,367", - (Decimal(454367), "G000G000") -> ",454,367" - ).foreach { case ((decimal, format), expected) => - val builder = new TestNumberFormatter(format, false) - builder.check() - assert(builder.format(decimal) === expected) - } - - // Test '$' - Seq( - (Decimal(78.12), "$99.99") -> "$78.12", - (Decimal(78.12), "$00.00") -> "$78.12", - (Decimal(78.12), "99.99$") -> "78.12$", - (Decimal(78.12), "00.00$") -> "78.12$" - ).foreach { case ((decimal, format), expected) => - val builder = new TestNumberFormatter(format, false) - builder.check() - assert(builder.format(decimal) === expected) - } - - // Test '-' and 'S' - Seq( - (Decimal(-454), "999-") -> "454-", - (Decimal(-454), "999S") -> "454-", - (Decimal(-454), "-999") -> "-454", - (Decimal(-454), "S999") -> "-454", - (Decimal(-454), "000-") -> "454-", - (Decimal(-454), "000S") -> "454-", - (Decimal(-454), "-000") -> "-454", - (Decimal(-454), "S000") -> "-454", - (Decimal(-12454.8), "99G999D9S") -> "12,454.8-", - (Decimal(-454.8), "99G999.9S") -> "454.8-" - ).foreach { case ((decimal, format), expected) => - val builder = new TestNumberFormatter(format, false) - builder.check() - assert(builder.format(decimal) === expected) - } - } - -} diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 1dbf9678af9e3..14902b0854987 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,6 +1,6 @@ ## Summary - - Number of queries: 386 + - Number of queries: 387 - Number of expressions that missing example: 12 - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint ## Schema of Built-in Functions @@ -317,6 +317,7 @@ | org.apache.spark.sql.catalyst.expressions.TryMultiply | try_multiply | SELECT try_multiply(2, 3) | struct | | org.apache.spark.sql.catalyst.expressions.TrySubtract | try_subtract | SELECT try_subtract(2, 1) | struct | | org.apache.spark.sql.catalyst.expressions.TryToBinary | try_to_binary | SELECT try_to_binary('abc', 'utf-8') | struct | +| org.apache.spark.sql.catalyst.expressions.TryToNumber | try_to_number | SELECT try_to_number('454', '999') | struct | | org.apache.spark.sql.catalyst.expressions.TypeOf | typeof | SELECT typeof(1) | struct | | org.apache.spark.sql.catalyst.expressions.UnBase64 | unbase64 | SELECT unbase64('U3BhcmsgU1FM') | struct | | org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct | @@ -391,4 +392,4 @@ | org.apache.spark.sql.catalyst.expressions.xml.XPathList | xpath | SELECT xpath('b1b2b3c1c2','a/b/text()') | structb1b2b3c1c2, a/b/text()):array> | | org.apache.spark.sql.catalyst.expressions.xml.XPathLong | xpath_long | SELECT xpath_long('12', 'sum(a/b)') | struct12, sum(a/b)):bigint> | | org.apache.spark.sql.catalyst.expressions.xml.XPathShort | xpath_short | SELECT xpath_short('12', 'sum(a/b)') | struct12, sum(a/b)):smallint> | -| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('bcc','a/c') | structbcc, a/c):string> | \ No newline at end of file +| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('bcc','a/c') | structbcc, a/c):string> | diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index 0db28ad9f3ecc..e1c97b468f27a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -162,10 +162,11 @@ select to_number('454', '000'); select to_number('454.2', '000.0'); select to_number('12,454', '00,000'); select to_number('$78.12', '$00.00'); -select to_number('-454', '-000'); +select to_number('+454', 'S000'); select to_number('-454', 'S000'); -select to_number('12,454.8-', '00,000.9-'); -select to_number('00,454.8-', '00,000.9-'); +select to_number('12,454.8-', '00,000.9MI'); +select to_number('00,454.8-', '00,000.9MI'); +select to_number('<00,454.8>', '00,000.9PR'); -- to_binary select to_binary('abc'); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index 52d70e22a44dd..e330cafa73a82 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -1004,11 +1004,11 @@ struct -- !query -select to_number('-454', '-000') +select to_number('+454', 'S000') -- !query schema -struct +struct -- !query output --454 +454 -- !query @@ -1020,17 +1020,25 @@ struct -- !query -select to_number('12,454.8-', '00,000.9-') +select to_number('12,454.8-', '00,000.9MI') -- !query schema -struct +struct -- !query output -12454.8 -- !query -select to_number('00,454.8-', '00,000.9-') +select to_number('00,454.8-', '00,000.9MI') +-- !query schema +struct +-- !query output +-454.8 + + +-- !query +select to_number('<00,454.8>', '00,000.9PR') -- !query schema -struct +struct, 00,000.9PR):decimal(6,1)> -- !query output -454.8 diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out index 41fc9908d0c2b..9a6cc7eac027b 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out @@ -4597,9 +4597,10 @@ struct<> -- !query SELECT '' AS to_number_1, to_number('-34,338,492', '99G999G999') -- !query schema -struct +struct<> -- !query output - -34338492 +java.lang.IllegalArgumentException +The input string '-34,338,492' does not match the given number format: '99G999G999' -- !query @@ -4607,8 +4608,8 @@ SELECT '' AS to_number_2, to_number('-34,338,492.654,878', '99G999G999D999G999' -- !query schema struct<> -- !query output -java.lang.IllegalArgumentException -The input string '-34,338,492.654,878' does not match the given number format: '99G999G999D999G999' +org.apache.spark.sql.AnalysisException +cannot resolve 'to_number('-34,338,492.654,878', '99G999G999D999G999')' due to data type mismatch: Thousands separators (,) may not appear after the decimal point in the number format: '99G999G999D999G999'; line 1 pos 27 -- !query @@ -4656,16 +4657,17 @@ SELECT '' AS to_number_15, to_number('123,000','999G') -- !query schema struct<> -- !query output -java.lang.IllegalArgumentException -The input string '123,000' does not match the given number format: '999G' +org.apache.spark.sql.AnalysisException +cannot resolve 'to_number('123,000', '999G')' due to data type mismatch: Thousands separators (,) must have digits in between them in the number format: '999G'; line 1 pos 27 -- !query SELECT '' AS to_number_16, to_number('123456','999G999') -- !query schema -struct +struct<> -- !query output - 123456 +java.lang.IllegalArgumentException +The input string '123456' does not match the given number format: '999G999' -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index ff14da143da7b..af861e3913b6e 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -1000,11 +1000,11 @@ struct -- !query -select to_number('-454', '-000') +select to_number('+454', 'S000') -- !query schema -struct +struct -- !query output --454 +454 -- !query @@ -1016,17 +1016,25 @@ struct -- !query -select to_number('12,454.8-', '00,000.9-') +select to_number('12,454.8-', '00,000.9MI') -- !query schema -struct +struct -- !query output -12454.8 -- !query -select to_number('00,454.8-', '00,000.9-') +select to_number('00,454.8-', '00,000.9MI') +-- !query schema +struct +-- !query output +-454.8 + + +-- !query +select to_number('<00,454.8>', '00,000.9PR') -- !query schema -struct +struct, 00,000.9PR):decimal(6,1)> -- !query output -454.8 From fae6a1c5e08ef75bd480f7ce2569b4b3959259ea Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 11 Apr 2022 20:22:40 +0300 Subject: [PATCH 126/535] [SPARK-38791][SQL][3.3] Output parameter values of error classes in the SQL style ### What changes were proposed in this pull request? In the PR, I propose new trait `QueryErrorsBase` which is supposed to be used by `Query.*Errors`, and new method `toSQLValue()`. The method converts a parameter value of error classes to its SQL representation. ### Why are the changes needed? To improve user experience with Spark SQL. Users should see values in error messages in unified SQL style. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? By running the modified test suites: ``` $ build/sbt "test:testOnly *QueryExecutionErrorsSuite" ``` Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit bc8c264851457d8ef59f5b332c79296651ec5d1e) Signed-off-by: Max Gekk Closes #36143 from MaxGekk/cleanup-error-classes-3.3. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../apache/spark/unsafe/types/UTF8String.java | 8 ++-- .../sql/catalyst/util/IntervalUtils.scala | 15 ++++-- .../spark/sql/errors/QueryErrorsBase.scala | 47 +++++++++++++++++++ .../sql/errors/QueryExecutionErrors.scala | 36 +++++++------- .../expressions/AnsiCastSuiteBase.scala | 16 +++---- .../sql/catalyst/expressions/CastSuite.scala | 24 ++++++++-- .../sql-tests/results/ansi/cast.sql.out | 42 ++++++++--------- .../sql-tests/results/ansi/date.sql.out | 4 +- .../sql-tests/results/ansi/interval.sql.out | 12 ++--- .../results/ansi/string-functions.sql.out | 8 ++-- .../results/postgreSQL/float4.sql.out | 8 ++-- .../results/postgreSQL/float8.sql.out | 10 ++-- .../sql-tests/results/postgreSQL/int8.sql.out | 8 ++-- .../sql-tests/results/postgreSQL/text.sql.out | 4 +- .../results/postgreSQL/window_part2.sql.out | 2 +- .../results/postgreSQL/window_part4.sql.out | 2 +- .../apache/spark/sql/SQLInsertTestSuite.scala | 2 +- .../errors/QueryExecutionErrorsSuite.scala | 2 +- .../spark/sql/sources/InsertSuite.scala | 8 ++-- 19 files changed, 164 insertions(+), 94 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index 0f9d653a0eb32..bf11814c981fb 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -1315,7 +1315,7 @@ public long toLongExact() { if (toLong(result, false)) { return result.value; } - throw new NumberFormatException("invalid input syntax for type numeric: " + this); + throw new NumberFormatException("invalid input syntax for type numeric: '" + this + "'"); } /** @@ -1329,7 +1329,7 @@ public int toIntExact() { if (toInt(result, false)) { return result.value; } - throw new NumberFormatException("invalid input syntax for type numeric: " + this); + throw new NumberFormatException("invalid input syntax for type numeric: '" + this + "'"); } public short toShortExact() { @@ -1338,7 +1338,7 @@ public short toShortExact() { if (result == value) { return result; } - throw new NumberFormatException("invalid input syntax for type numeric: " + this); + throw new NumberFormatException("invalid input syntax for type numeric: '" + this + "'"); } public byte toByteExact() { @@ -1347,7 +1347,7 @@ public byte toByteExact() { if (result == value) { return result; } - throw new NumberFormatException("invalid input syntax for type numeric: " + this); + throw new NumberFormatException("invalid input syntax for type numeric: '" + this + "'"); } @Override diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index f05e3203c078e..a90a6a798cd54 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -1289,7 +1289,8 @@ object IntervalUtils { val vShort = vInt.toShort if (vInt != vShort) { throw QueryExecutionErrors.castingCauseOverflowError( - toYearMonthIntervalString(v, ANSI_STYLE, startField, endField), ShortType) + Literal(v, YearMonthIntervalType(startField, endField)), + ShortType) } vShort } @@ -1299,7 +1300,8 @@ object IntervalUtils { val vByte = vInt.toByte if (vInt != vByte) { throw QueryExecutionErrors.castingCauseOverflowError( - toYearMonthIntervalString(v, ANSI_STYLE, startField, endField), ByteType) + Literal(v, YearMonthIntervalType(startField, endField)), + ByteType) } vByte } @@ -1347,7 +1349,8 @@ object IntervalUtils { val vInt = vLong.toInt if (vLong != vInt) { throw QueryExecutionErrors.castingCauseOverflowError( - toDayTimeIntervalString(v, ANSI_STYLE, startField, endField), IntegerType) + Literal(v, DayTimeIntervalType(startField, endField)), + IntegerType) } vInt } @@ -1357,7 +1360,8 @@ object IntervalUtils { val vShort = vLong.toShort if (vLong != vShort) { throw QueryExecutionErrors.castingCauseOverflowError( - toDayTimeIntervalString(v, ANSI_STYLE, startField, endField), ShortType) + Literal(v, DayTimeIntervalType(startField, endField)), + ShortType) } vShort } @@ -1367,7 +1371,8 @@ object IntervalUtils { val vByte = vLong.toByte if (vLong != vByte) { throw QueryExecutionErrors.castingCauseOverflowError( - toDayTimeIntervalString(v, ANSI_STYLE, startField, endField), ByteType) + Literal(v, DayTimeIntervalType(startField, endField)), + ByteType) } vByte } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala new file mode 100644 index 0000000000000..e69e1382ecf62 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.errors + +import org.apache.spark.sql.catalyst.expressions.Literal +import org.apache.spark.sql.types.{DataType, DoubleType, FloatType} + +trait QueryErrorsBase { + private def litToErrorValue(l: Literal): String = l match { + case Literal(null, _) => "NULL" + case Literal(v: Float, FloatType) => + if (v.isNaN) "NaN" + else if (v.isPosInfinity) "Infinity" + else if (v.isNegInfinity) "-Infinity" + else v.toString + case Literal(v: Double, DoubleType) => + if (v.isNaN) "NaN" + else if (v.isPosInfinity) "Infinity" + else if (v.isNegInfinity) "-Infinity" + else l.sql + case l => l.sql + } + + // Converts an error class parameter to its SQL representation + def toSQLValue(v: Any): String = { + litToErrorValue(Literal(v)) + } + + def toSQLValue(v: Any, t: DataType): String = { + litToErrorValue(Literal.create(v, t)) + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 4c75bdf234155..3a89147c4b53b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -66,7 +66,7 @@ import org.apache.spark.util.CircularBuffer * This does not include exceptions thrown during the eager execution of commands, which are * grouped into [[QueryCompilationErrors]]. */ -object QueryExecutionErrors { +object QueryExecutionErrors extends QueryErrorsBase { def logicalHintOperatorNotRemovedDuringAnalysisError(): Throwable = { new SparkIllegalStateException(errorClass = "INTERNAL_ERROR", @@ -91,7 +91,7 @@ object QueryExecutionErrors { def castingCauseOverflowError(t: Any, dataType: DataType): ArithmeticException = { new SparkArithmeticException(errorClass = "CAST_CAUSES_OVERFLOW", - messageParameters = Array(t.toString, dataType.catalogString, SQLConf.ANSI_ENABLED.key)) + messageParameters = Array(toSQLValue(t), dataType.catalogString, SQLConf.ANSI_ENABLED.key)) } def cannotChangeDecimalPrecisionError( @@ -111,7 +111,7 @@ object QueryExecutionErrors { def invalidInputSyntaxForNumericError(s: UTF8String): NumberFormatException = { new SparkNumberFormatException(errorClass = "INVALID_INPUT_SYNTAX_FOR_NUMERIC_TYPE", - messageParameters = Array(s.toString, SQLConf.ANSI_ENABLED.key)) + messageParameters = Array(toSQLValue(s, StringType), SQLConf.ANSI_ENABLED.key)) } def cannotCastFromNullTypeError(to: DataType): Throwable = { @@ -158,14 +158,16 @@ object QueryExecutionErrors { numElements: Int, key: String): ArrayIndexOutOfBoundsException = { new SparkArrayIndexOutOfBoundsException(errorClass = "INVALID_ARRAY_INDEX", - messageParameters = Array(index.toString, numElements.toString, key)) + messageParameters = Array(toSQLValue(index), toSQLValue(numElements), key)) } def invalidElementAtIndexError( index: Int, numElements: Int): ArrayIndexOutOfBoundsException = { - new SparkArrayIndexOutOfBoundsException(errorClass = "INVALID_ARRAY_INDEX_IN_ELEMENT_AT", - messageParameters = Array(index.toString, numElements.toString, SQLConf.ANSI_ENABLED.key)) + new SparkArrayIndexOutOfBoundsException( + errorClass = "INVALID_ARRAY_INDEX_IN_ELEMENT_AT", + messageParameters = + Array(toSQLValue(index), toSQLValue(numElements), SQLConf.ANSI_ENABLED.key)) } def mapKeyNotExistError( @@ -174,10 +176,10 @@ object QueryExecutionErrors { context: String): NoSuchElementException = { if (isElementAtFunction) { new SparkNoSuchElementException(errorClass = "MAP_KEY_DOES_NOT_EXIST_IN_ELEMENT_AT", - messageParameters = Array(key.toString, SQLConf.ANSI_ENABLED.key, context)) + messageParameters = Array(toSQLValue(key), SQLConf.ANSI_ENABLED.key, context)) } else { new SparkNoSuchElementException(errorClass = "MAP_KEY_DOES_NOT_EXIST", - messageParameters = Array(key.toString, SQLConf.ANSI_STRICT_INDEX_OPERATOR.key, context)) + messageParameters = Array(toSQLValue(key), SQLConf.ANSI_STRICT_INDEX_OPERATOR.key, context)) } } @@ -457,12 +459,12 @@ object QueryExecutionErrors { } def unaryMinusCauseOverflowError(originValue: AnyVal): ArithmeticException = { - arithmeticOverflowError(s"- $originValue caused overflow") + arithmeticOverflowError(s"- ${toSQLValue(originValue)} caused overflow") } def binaryArithmeticCauseOverflowError( eval1: Short, symbol: String, eval2: Short): ArithmeticException = { - arithmeticOverflowError(s"$eval1 $symbol $eval2 caused overflow") + arithmeticOverflowError(s"${toSQLValue(eval1)} $symbol ${toSQLValue(eval2)} caused overflow") } def failedSplitSubExpressionMsg(length: Int): String = { @@ -1070,7 +1072,7 @@ object QueryExecutionErrors { def cannotParseStringAsDataTypeError(pattern: String, value: String, dataType: DataType) : Throwable = { new RuntimeException( - s"Cannot parse field value ${value} for pattern ${pattern} " + + s"Cannot parse field value ${toSQLValue(value)} for pattern ${toSQLValue(pattern)} " + s"as target spark data type [$dataType].") } @@ -1135,7 +1137,7 @@ object QueryExecutionErrors { } def paramIsNotIntegerError(paramName: String, value: String): Throwable = { - new RuntimeException(s"$paramName should be an integer. Found $value") + new RuntimeException(s"$paramName should be an integer. Found ${toSQLValue(value)}") } def paramIsNotBooleanValueError(paramName: String): Throwable = { @@ -1341,7 +1343,7 @@ object QueryExecutionErrors { } def indexOutOfBoundsOfArrayDataError(idx: Int): Throwable = { - new SparkIndexOutOfBoundsException(errorClass = "INDEX_OUT_OF_BOUNDS", Array(idx.toString)) + new SparkIndexOutOfBoundsException(errorClass = "INDEX_OUT_OF_BOUNDS", Array(toSQLValue(idx))) } def malformedRecordsDetectedInRecordParsingError(e: BadRecordException): Throwable = { @@ -1378,7 +1380,8 @@ object QueryExecutionErrors { } def dynamicPartitionKeyNotAmongWrittenPartitionPathsError(key: String): Throwable = { - new SparkException(s"Dynamic partition key $key is not among written partition paths.") + new SparkException( + s"Dynamic partition key ${toSQLValue(key)} is not among written partition paths.") } def cannotRemovePartitionDirError(partitionPath: Path): Throwable = { @@ -1661,7 +1664,7 @@ object QueryExecutionErrors { } def valueIsNullError(index: Int): Throwable = { - new NullPointerException(s"Value at index $index is null") + new NullPointerException(s"Value at index ${toSQLValue(index)} is null") } def onlySupportDataSourcesProvidingFileFormatError(providingClass: String): Throwable = { @@ -2005,6 +2008,7 @@ object QueryExecutionErrors { def timestampAddOverflowError(micros: Long, amount: Int, unit: String): ArithmeticException = { new SparkArithmeticException( errorClass = "DATETIME_OVERFLOW", - messageParameters = Array(s"add $amount $unit to '${DateTimeUtils.microsToInstant(micros)}'")) + messageParameters = Array( + s"add ${toSQLValue(amount)} $unit to ${toSQLValue(DateTimeUtils.microsToInstant(micros))}")) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala index 7fb04fe8b7f76..6494fb29fda59 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala @@ -175,28 +175,28 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { // cast to IntegerType Seq(IntegerType, ShortType, ByteType, LongType).foreach { dataType => checkExceptionInExpression[NumberFormatException]( - cast("string", dataType), "invalid input syntax for type numeric: string") + cast("string", dataType), "invalid input syntax for type numeric: 'string'") checkExceptionInExpression[NumberFormatException]( - cast("123-string", dataType), "invalid input syntax for type numeric: 123-string") + cast("123-string", dataType), "invalid input syntax for type numeric: '123-string'") checkExceptionInExpression[NumberFormatException]( - cast("2020-07-19", dataType), "invalid input syntax for type numeric: 2020-07-19") + cast("2020-07-19", dataType), "invalid input syntax for type numeric: '2020-07-19'") checkExceptionInExpression[NumberFormatException]( - cast("1.23", dataType), "invalid input syntax for type numeric: 1.23") + cast("1.23", dataType), "invalid input syntax for type numeric: '1.23'") } Seq(DoubleType, FloatType, DecimalType.USER_DEFAULT).foreach { dataType => checkExceptionInExpression[NumberFormatException]( - cast("string", dataType), "invalid input syntax for type numeric: string") + cast("string", dataType), "invalid input syntax for type numeric: 'string'") checkExceptionInExpression[NumberFormatException]( - cast("123.000.00", dataType), "invalid input syntax for type numeric: 123.000.00") + cast("123.000.00", dataType), "invalid input syntax for type numeric: '123.000.00'") checkExceptionInExpression[NumberFormatException]( - cast("abc.com", dataType), "invalid input syntax for type numeric: abc.com") + cast("abc.com", dataType), "invalid input syntax for type numeric: 'abc.com'") } } protected def checkCastToNumericError(l: Literal, to: DataType, tryCastResult: Any): Unit = { checkExceptionInExpression[NumberFormatException]( - cast(l, to), "invalid input syntax for type numeric: true") + cast(l, to), "invalid input syntax for type numeric: 'true'") } test("cast from invalid string array to numeric array should throw NumberFormatException") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index ca110502c6b3a..b6c347cfedb75 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -600,7 +600,7 @@ class CastSuite extends CastSuiteBase { val e3 = intercept[ArithmeticException] { Cast(Literal(Int.MaxValue + 1L), IntegerType).eval() }.getMessage - assert(e3.contains("Casting 2147483648 to int causes overflow")) + assert(e3.contains("Casting 2147483648L to int causes overflow")) } } @@ -773,7 +773,14 @@ class CastSuite extends CastSuiteBase { Seq( (Int.MaxValue, DayTimeIntervalType(DAY)), - (Int.MinValue, DayTimeIntervalType(DAY)), + (Int.MinValue, DayTimeIntervalType(DAY)) + ).foreach { + case (v, toType) => + checkExceptionInExpression[ArithmeticException](cast(v, toType), + s"Casting $v to ${toType.catalogString} causes overflow") + } + + Seq( (Long.MaxValue, DayTimeIntervalType(DAY)), (Long.MinValue, DayTimeIntervalType(DAY)), (Long.MaxValue, DayTimeIntervalType(HOUR)), @@ -785,7 +792,7 @@ class CastSuite extends CastSuiteBase { ).foreach { case (v, toType) => checkExceptionInExpression[ArithmeticException](cast(v, toType), - s"Casting $v to ${toType.catalogString} causes overflow") + s"Casting ${v}L to ${toType.catalogString} causes overflow") } } @@ -876,7 +883,14 @@ class CastSuite extends CastSuiteBase { Seq( (Int.MaxValue, YearMonthIntervalType(YEAR)), - (Int.MinValue, YearMonthIntervalType(YEAR)), + (Int.MinValue, YearMonthIntervalType(YEAR)) + ).foreach { + case (v, toType) => + checkExceptionInExpression[ArithmeticException](cast(v, toType), + s"Casting $v to ${toType.catalogString} causes overflow") + } + + Seq( (Long.MaxValue, YearMonthIntervalType(YEAR)), (Long.MinValue, YearMonthIntervalType(YEAR)), (Long.MaxValue, YearMonthIntervalType(MONTH)), @@ -884,7 +898,7 @@ class CastSuite extends CastSuiteBase { ).foreach { case (v, toType) => checkExceptionInExpression[ArithmeticException](cast(v, toType), - s"Casting $v to ${toType.catalogString} causes overflow") + s"Casting ${v}L to ${toType.catalogString} causes overflow") } } } diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out index 6e45fe8dce938..6b705274dc885 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out @@ -8,7 +8,7 @@ SELECT CAST('1.23' AS int) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: 1.23. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '1.23'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -17,7 +17,7 @@ SELECT CAST('1.23' AS long) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: 1.23. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '1.23'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -26,7 +26,7 @@ SELECT CAST('-4.56' AS int) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: -4.56. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '-4.56'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -35,7 +35,7 @@ SELECT CAST('-4.56' AS long) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: -4.56. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '-4.56'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -44,7 +44,7 @@ SELECT CAST('abc' AS int) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: abc. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'abc'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -53,7 +53,7 @@ SELECT CAST('abc' AS long) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: abc. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'abc'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -62,7 +62,7 @@ SELECT CAST('1234567890123' AS int) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: 1234567890123. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '1234567890123'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -71,7 +71,7 @@ SELECT CAST('12345678901234567890123' AS long) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: 12345678901234567890123. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '12345678901234567890123'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -80,7 +80,7 @@ SELECT CAST('' AS int) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: . To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: ''. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -89,7 +89,7 @@ SELECT CAST('' AS long) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: . To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: ''. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -114,7 +114,7 @@ SELECT CAST('123.a' AS int) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: 123.a. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '123.a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -123,7 +123,7 @@ SELECT CAST('123.a' AS long) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: 123.a. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '123.a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -140,7 +140,7 @@ SELECT CAST('-2147483649' AS int) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: -2147483649. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '-2147483649'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -157,7 +157,7 @@ SELECT CAST('2147483648' AS int) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: 2147483648. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '2147483648'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -174,7 +174,7 @@ SELECT CAST('-9223372036854775809' AS long) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: -9223372036854775809. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '-9223372036854775809'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -191,7 +191,7 @@ SELECT CAST('9223372036854775808' AS long) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: 9223372036854775808. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '9223372036854775808'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -447,7 +447,7 @@ select cast('1中文' as tinyint) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: 1中文. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -456,7 +456,7 @@ select cast('1中文' as smallint) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: 1中文. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -465,7 +465,7 @@ select cast('1中文' as INT) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: 1中文. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -474,7 +474,7 @@ select cast('中文1' as bigint) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: 中文1. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '中文1'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -483,7 +483,7 @@ select cast('1中文' as bigint) struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: 1中文. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out index 98b1ec42a79ca..c7058cd7e3be4 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out @@ -324,7 +324,7 @@ select date_add('2011-11-11', '1.2') struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: 1.2. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '1.2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -432,7 +432,7 @@ select date_sub(date'2011-11-11', '1.2') struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: 1.2. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: '1.2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index 1f82263843232..8f88727f66fb8 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -122,7 +122,7 @@ select interval 2 second * 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: a. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -131,7 +131,7 @@ select interval 2 second / 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: a. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -140,7 +140,7 @@ select interval 2 year * 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: a. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -149,7 +149,7 @@ select interval 2 year / 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: a. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -174,7 +174,7 @@ select 'a' * interval 2 second struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: a. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -183,7 +183,7 @@ select 'a' * interval 2 year struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: a. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index e330cafa73a82..c65384673c2b1 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -82,7 +82,7 @@ select left("abcd", -2), left("abcd", 0), left("abcd", 'a') struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: a. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -107,7 +107,7 @@ select right("abcd", -2), right("abcd", 0), right("abcd", 'a') struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: a. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -413,7 +413,7 @@ SELECT lpad('hi', 'invalid_length') struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: invalid_length. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'invalid_length'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -422,7 +422,7 @@ SELECT rpad('hi', 'invalid_length') struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: invalid_length. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'invalid_length'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out index 690fd7cd2cbbc..eccfdbae75768 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out @@ -96,7 +96,7 @@ SELECT float('N A N') struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: N A N. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'N A N'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -105,7 +105,7 @@ SELECT float('NaN x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: NaN x. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'NaN x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -114,7 +114,7 @@ SELECT float(' INFINITY x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: INFINITY x. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: ' INFINITY x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -147,7 +147,7 @@ SELECT float(decimal('nan')) struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: nan. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'nan'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out index 2b71be5a5d96c..d143e1f1c5991 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out @@ -128,7 +128,7 @@ SELECT double('N A N') struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: N A N. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'N A N'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -137,7 +137,7 @@ SELECT double('NaN x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: NaN x. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'NaN x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -146,7 +146,7 @@ SELECT double(' INFINITY x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: INFINITY x. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: ' INFINITY x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -179,7 +179,7 @@ SELECT double(decimal('nan')) struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: nan. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'nan'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -833,7 +833,7 @@ SELECT bigint(double('-9223372036854780000')) struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting -9.22337203685478E18 to bigint causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Casting -9.22337203685478E18D to bigint causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out index cc524b575d33e..b7185fcbf1fea 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out @@ -619,7 +619,7 @@ SELECT CAST(q1 AS int) FROM int8_tbl WHERE q2 <> 456 struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting 4567890123456789 to int causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Casting 4567890123456789L to int causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -636,7 +636,7 @@ SELECT CAST(q1 AS smallint) FROM int8_tbl WHERE q2 <> 456 struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting 4567890123456789 to smallint causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Casting 4567890123456789L to smallint causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -673,7 +673,7 @@ SELECT CAST(double('922337203685477580700.0') AS bigint) struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting 9.223372036854776E20 to bigint causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Casting 9.223372036854776E20D to bigint causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -745,7 +745,7 @@ SELECT string(int(shiftleft(bigint(-1), 63))+1) struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting -9223372036854775808 to int causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Casting -9223372036854775808L to int causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out index 56f50ec3a1d29..9f9f212c731b2 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out @@ -65,7 +65,7 @@ select string('four: ') || 2+2 struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: four: 2. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'four: 2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -74,7 +74,7 @@ select 'four: ' || 2+2 struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: four: 2. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'four: 2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out index 75c40ce92d2d7..158196e7c8280 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out @@ -462,7 +462,7 @@ window w as (order by f_numeric range between struct<> -- !query output java.lang.NumberFormatException -invalid input syntax for type numeric: NaN. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +invalid input syntax for type numeric: 'NaN'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out index 691df3c45a28a..c937d6637716a 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out @@ -501,4 +501,4 @@ FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b) struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('nan' AS INT): invalid input syntax for type numeric: nan. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error.; line 3 pos 6 +failed to evaluate expression CAST('nan' AS INT): invalid input syntax for type numeric: 'nan'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error.; line 3 pos 6 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala index fad01db82ca0e..97623a2e8dc50 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala @@ -302,7 +302,7 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils { val errorMsg = intercept[NumberFormatException] { sql("insert into t partition(a='ansi') values('ansi')") }.getMessage - assert(errorMsg.contains("invalid input syntax for type numeric: ansi")) + assert(errorMsg.contains("invalid input syntax for type numeric: 'ansi'")) } else { sql("insert into t partition(a='ansi') values('ansi')") checkAnswer(sql("select * from t"), Row("ansi", null) :: Nil) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala index 9268be43ba490..a7625e17b4ae6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala @@ -276,6 +276,6 @@ class QueryExecutionErrorsSuite extends QueryTest assert(e.getErrorClass === "DATETIME_OVERFLOW") assert(e.getSqlState === "22008") assert(e.getMessage === - "Datetime operation overflow: add 1000000 YEAR to '2022-03-09T09:02:03Z'.") + "Datetime operation overflow: add 1000000 YEAR to TIMESTAMP '2022-03-09 01:02:03'.") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index 1fb4737c45a61..aca0675e260e0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -713,13 +713,13 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { var msg = intercept[SparkException] { sql(s"insert into t values($outOfRangeValue1)") }.getCause.getMessage - assert(msg.contains(s"Casting $outOfRangeValue1 to int causes overflow")) + assert(msg.contains(s"Casting ${outOfRangeValue1}L to int causes overflow")) val outOfRangeValue2 = (Int.MinValue - 1L).toString msg = intercept[SparkException] { sql(s"insert into t values($outOfRangeValue2)") }.getCause.getMessage - assert(msg.contains(s"Casting $outOfRangeValue2 to int causes overflow")) + assert(msg.contains(s"Casting ${outOfRangeValue2}L to int causes overflow")) } } } @@ -733,13 +733,13 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { var msg = intercept[SparkException] { sql(s"insert into t values(${outOfRangeValue1}D)") }.getCause.getMessage - assert(msg.contains(s"Casting $outOfRangeValue1 to bigint causes overflow")) + assert(msg.contains(s"Casting ${outOfRangeValue1}D to bigint causes overflow")) val outOfRangeValue2 = Math.nextDown(Long.MinValue) msg = intercept[SparkException] { sql(s"insert into t values(${outOfRangeValue2}D)") }.getCause.getMessage - assert(msg.contains(s"Casting $outOfRangeValue2 to bigint causes overflow")) + assert(msg.contains(s"Casting ${outOfRangeValue2}D to bigint causes overflow")) } } } From 47c7ba2fcb573ac7f39fbe0518b3abbcde905522 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Mon, 11 Apr 2022 19:17:10 -0700 Subject: [PATCH 127/535] [SPARK-34863][SQL][FOLLOWUP] Add benchmark for Parquet & ORC nested column scan ### What changes were proposed in this pull request? This adds benchmark for Parquet & ORC nested column scan, e.g., struct, list and map. ### Why are the changes needed? Both Parquet and ORC now support vectorized reader for nested column now, but there is no benchmark to measure the performance yet. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? N/A - benchmark only. Closes #36123 from sunchao/SPARK-34863-bench. Authored-by: Chao Sun Signed-off-by: Chao Sun --- .../DataSourceReadBenchmark-jdk11-results.txt | 578 +++++++++++------- .../DataSourceReadBenchmark-jdk17-results.txt | 578 +++++++++++------- .../DataSourceReadBenchmark-results.txt | 578 +++++++++++------- .../benchmark/DataSourceReadBenchmark.scala | 150 ++++- 4 files changed, 1176 insertions(+), 708 deletions(-) diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt index 11fc93406c363..2759244a03f8a 100644 --- a/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt +++ b/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt @@ -2,322 +2,430 @@ SQL Single Numeric Column Scan ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz SQL Single BOOLEAN Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 11809 12046 335 1.3 750.8 1.0X -SQL Json 8588 8592 7 1.8 546.0 1.4X -SQL Parquet Vectorized: DataPageV1 140 162 18 112.0 8.9 84.1X -SQL Parquet Vectorized: DataPageV2 103 117 12 152.6 6.6 114.6X -SQL Parquet MR: DataPageV1 1634 1648 20 9.6 103.9 7.2X -SQL Parquet MR: DataPageV2 1495 1501 9 10.5 95.1 7.9X -SQL ORC Vectorized 180 224 42 87.4 11.4 65.6X -SQL ORC MR 1536 1576 57 10.2 97.7 7.7X - -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 11004 11065 86 1.4 699.6 1.0X +SQL Json 7986 8011 35 2.0 507.7 1.4X +SQL Parquet Vectorized: DataPageV1 124 148 16 127.0 7.9 88.9X +SQL Parquet Vectorized: DataPageV2 101 115 12 155.0 6.5 108.4X +SQL Parquet MR: DataPageV1 1614 1620 8 9.7 102.6 6.8X +SQL Parquet MR: DataPageV2 1445 1446 2 10.9 91.9 7.6X +SQL ORC Vectorized 163 204 41 96.2 10.4 67.3X +SQL ORC MR 1407 1429 31 11.2 89.4 7.8X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Parquet Reader Single BOOLEAN Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 109 114 10 144.3 6.9 1.0X -ParquetReader Vectorized: DataPageV2 90 93 3 175.3 5.7 1.2X -ParquetReader Vectorized -> Row: DataPageV1 58 60 4 271.9 3.7 1.9X -ParquetReader Vectorized -> Row: DataPageV2 39 41 3 404.0 2.5 2.8X +ParquetReader Vectorized: DataPageV1 123 140 14 128.3 7.8 1.0X +ParquetReader Vectorized: DataPageV2 105 114 11 150.3 6.7 1.2X +ParquetReader Vectorized -> Row: DataPageV1 56 61 5 279.9 3.6 2.2X +ParquetReader Vectorized -> Row: DataPageV2 39 43 4 399.4 2.5 3.1X -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz SQL Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 14515 14526 16 1.1 922.8 1.0X -SQL Json 9862 9863 2 1.6 627.0 1.5X -SQL Parquet Vectorized: DataPageV1 144 167 31 109.5 9.1 101.1X -SQL Parquet Vectorized: DataPageV2 139 159 27 113.4 8.8 104.6X -SQL Parquet MR: DataPageV1 1777 1780 3 8.8 113.0 8.2X -SQL Parquet MR: DataPageV2 1690 1691 2 9.3 107.4 8.6X -SQL ORC Vectorized 201 238 46 78.3 12.8 72.2X -SQL ORC MR 1513 1522 14 10.4 96.2 9.6X - -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 13262 13310 67 1.2 843.2 1.0X +SQL Json 9104 9173 98 1.7 578.8 1.5X +SQL Parquet Vectorized: DataPageV1 136 172 31 115.4 8.7 97.3X +SQL Parquet Vectorized: DataPageV2 138 153 17 114.0 8.8 96.1X +SQL Parquet MR: DataPageV1 1789 1805 22 8.8 113.7 7.4X +SQL Parquet MR: DataPageV2 1631 1662 44 9.6 103.7 8.1X +SQL ORC Vectorized 210 252 33 74.8 13.4 63.0X +SQL ORC MR 1412 1437 36 11.1 89.7 9.4X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Parquet Reader Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 182 192 11 86.6 11.5 1.0X -ParquetReader Vectorized: DataPageV2 181 188 7 86.9 11.5 1.0X -ParquetReader Vectorized -> Row: DataPageV1 96 99 4 163.3 6.1 1.9X -ParquetReader Vectorized -> Row: DataPageV2 96 99 3 163.4 6.1 1.9X +ParquetReader Vectorized: DataPageV1 171 183 14 92.0 10.9 1.0X +ParquetReader Vectorized: DataPageV2 175 184 9 90.1 11.1 1.0X +ParquetReader Vectorized -> Row: DataPageV1 88 95 12 179.0 5.6 1.9X +ParquetReader Vectorized -> Row: DataPageV2 88 92 4 179.0 5.6 1.9X -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz SQL Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 15326 15437 156 1.0 974.4 1.0X -SQL Json 10281 10290 13 1.5 653.7 1.5X -SQL Parquet Vectorized: DataPageV1 164 212 36 95.9 10.4 93.4X -SQL Parquet Vectorized: DataPageV2 230 244 11 68.5 14.6 66.7X -SQL Parquet MR: DataPageV1 2108 2111 4 7.5 134.0 7.3X -SQL Parquet MR: DataPageV2 1940 1963 33 8.1 123.3 7.9X -SQL ORC Vectorized 229 279 34 68.7 14.6 66.9X -SQL ORC MR 1903 1906 3 8.3 121.0 8.1X - -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 14022 14236 303 1.1 891.5 1.0X +SQL Json 9763 9929 235 1.6 620.7 1.4X +SQL Parquet Vectorized: DataPageV1 173 226 38 90.7 11.0 80.9X +SQL Parquet Vectorized: DataPageV2 222 241 13 70.7 14.1 63.1X +SQL Parquet MR: DataPageV1 2069 2086 24 7.6 131.5 6.8X +SQL Parquet MR: DataPageV2 1771 1806 49 8.9 112.6 7.9X +SQL ORC Vectorized 203 263 37 77.6 12.9 69.2X +SQL ORC MR 1528 1552 34 10.3 97.2 9.2X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Parquet Reader Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 253 262 10 62.2 16.1 1.0X -ParquetReader Vectorized: DataPageV2 323 327 9 48.8 20.5 0.8X -ParquetReader Vectorized -> Row: DataPageV1 280 288 8 56.3 17.8 0.9X -ParquetReader Vectorized -> Row: DataPageV2 301 314 21 52.2 19.1 0.8X +ParquetReader Vectorized: DataPageV1 246 256 11 63.9 15.6 1.0X +ParquetReader Vectorized: DataPageV2 301 313 17 52.3 19.1 0.8X +ParquetReader Vectorized -> Row: DataPageV1 257 292 18 61.2 16.3 1.0X +ParquetReader Vectorized -> Row: DataPageV2 296 318 25 53.1 18.8 0.8X -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz SQL Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 16756 16776 28 0.9 1065.3 1.0X -SQL Json 10690 10692 3 1.5 679.6 1.6X -SQL Parquet Vectorized: DataPageV1 160 208 45 98.1 10.2 104.5X -SQL Parquet Vectorized: DataPageV2 390 423 23 40.3 24.8 43.0X -SQL Parquet MR: DataPageV1 2196 2201 8 7.2 139.6 7.6X -SQL Parquet MR: DataPageV2 2065 2072 10 7.6 131.3 8.1X -SQL ORC Vectorized 323 338 10 48.7 20.5 51.9X -SQL ORC MR 1899 1906 11 8.3 120.7 8.8X - -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 16153 16252 140 1.0 1027.0 1.0X +SQL Json 10406 10547 200 1.5 661.6 1.6X +SQL Parquet Vectorized: DataPageV1 159 207 33 99.1 10.1 101.8X +SQL Parquet Vectorized: DataPageV2 337 402 40 46.6 21.4 47.9X +SQL Parquet MR: DataPageV1 2160 2193 46 7.3 137.4 7.5X +SQL Parquet MR: DataPageV2 1892 1900 11 8.3 120.3 8.5X +SQL ORC Vectorized 297 340 42 53.0 18.9 54.5X +SQL ORC MR 1705 1732 38 9.2 108.4 9.5X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Parquet Reader Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 278 285 9 56.6 17.7 1.0X -ParquetReader Vectorized: DataPageV2 514 518 2 30.6 32.7 0.5X -ParquetReader Vectorized -> Row: DataPageV1 308 316 11 51.0 19.6 0.9X -ParquetReader Vectorized -> Row: DataPageV2 498 525 27 31.6 31.6 0.6X +ParquetReader Vectorized: DataPageV1 251 262 10 62.6 16.0 1.0X +ParquetReader Vectorized: DataPageV2 418 431 13 37.7 26.6 0.6X +ParquetReader Vectorized -> Row: DataPageV1 247 288 30 63.7 15.7 1.0X +ParquetReader Vectorized -> Row: DataPageV2 412 455 39 38.1 26.2 0.6X -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz SQL Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 21841 21851 14 0.7 1388.6 1.0X -SQL Json 12828 12843 21 1.2 815.6 1.7X -SQL Parquet Vectorized: DataPageV1 241 279 19 65.2 15.3 90.6X -SQL Parquet Vectorized: DataPageV2 554 596 29 28.4 35.2 39.5X -SQL Parquet MR: DataPageV1 2404 2428 34 6.5 152.8 9.1X -SQL Parquet MR: DataPageV2 2153 2166 18 7.3 136.9 10.1X -SQL ORC Vectorized 417 464 62 37.7 26.5 52.4X -SQL ORC MR 2136 2146 14 7.4 135.8 10.2X - -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 19896 20026 183 0.8 1264.9 1.0X +SQL Json 12540 12634 132 1.3 797.3 1.6X +SQL Parquet Vectorized: DataPageV1 221 271 30 71.3 14.0 90.1X +SQL Parquet Vectorized: DataPageV2 546 564 23 28.8 34.7 36.5X +SQL Parquet MR: DataPageV1 2196 2211 21 7.2 139.6 9.1X +SQL Parquet MR: DataPageV2 2085 2089 6 7.5 132.5 9.5X +SQL ORC Vectorized 379 416 39 41.5 24.1 52.5X +SQL ORC MR 1858 1859 2 8.5 118.1 10.7X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Parquet Reader Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 324 357 34 48.6 20.6 1.0X -ParquetReader Vectorized: DataPageV2 694 702 11 22.6 44.2 0.5X -ParquetReader Vectorized -> Row: DataPageV1 378 385 8 41.6 24.0 0.9X -ParquetReader Vectorized -> Row: DataPageV2 701 708 8 22.4 44.6 0.5X +ParquetReader Vectorized: DataPageV1 311 340 20 50.5 19.8 1.0X +ParquetReader Vectorized: DataPageV2 639 647 11 24.6 40.6 0.5X +ParquetReader Vectorized -> Row: DataPageV1 359 376 13 43.9 22.8 0.9X +ParquetReader Vectorized -> Row: DataPageV2 653 658 9 24.1 41.5 0.5X -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz SQL Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 17238 17239 2 0.9 1096.0 1.0X -SQL Json 12295 12307 18 1.3 781.7 1.4X -SQL Parquet Vectorized: DataPageV1 162 203 27 96.8 10.3 106.1X -SQL Parquet Vectorized: DataPageV2 157 194 32 100.4 10.0 110.0X -SQL Parquet MR: DataPageV1 2163 2165 3 7.3 137.5 8.0X -SQL Parquet MR: DataPageV2 2014 2014 1 7.8 128.0 8.6X -SQL ORC Vectorized 458 462 5 34.4 29.1 37.7X -SQL ORC MR 1984 1984 0 7.9 126.1 8.7X - -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 17338 17710 526 0.9 1102.3 1.0X +SQL Json 11844 12121 392 1.3 753.0 1.5X +SQL Parquet Vectorized: DataPageV1 148 187 28 106.2 9.4 117.0X +SQL Parquet Vectorized: DataPageV2 147 183 31 106.8 9.4 117.7X +SQL Parquet MR: DataPageV1 2027 2033 9 7.8 128.9 8.6X +SQL Parquet MR: DataPageV2 1966 1981 21 8.0 125.0 8.8X +SQL ORC Vectorized 399 425 25 39.4 25.4 43.4X +SQL ORC MR 1748 1756 11 9.0 111.2 9.9X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Parquet Reader Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 252 259 10 62.3 16.0 1.0X -ParquetReader Vectorized: DataPageV2 252 256 9 62.3 16.0 1.0X -ParquetReader Vectorized -> Row: DataPageV1 259 307 40 60.7 16.5 1.0X -ParquetReader Vectorized -> Row: DataPageV2 260 295 25 60.5 16.5 1.0X +ParquetReader Vectorized: DataPageV1 226 240 15 69.6 14.4 1.0X +ParquetReader Vectorized: DataPageV2 225 237 15 69.9 14.3 1.0X +ParquetReader Vectorized -> Row: DataPageV1 247 299 38 63.6 15.7 0.9X +ParquetReader Vectorized -> Row: DataPageV2 245 296 25 64.1 15.6 0.9X -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz SQL Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 22485 22536 72 0.7 1429.5 1.0X -SQL Json 16281 16286 8 1.0 1035.1 1.4X -SQL Parquet Vectorized: DataPageV1 232 288 35 67.9 14.7 97.1X -SQL Parquet Vectorized: DataPageV2 277 290 9 56.8 17.6 81.2X -SQL Parquet MR: DataPageV1 2331 2341 15 6.7 148.2 9.6X -SQL Parquet MR: DataPageV2 2216 2229 18 7.1 140.9 10.1X -SQL ORC Vectorized 561 569 9 28.0 35.7 40.1X -SQL ORC MR 2118 2137 27 7.4 134.6 10.6X - -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 21052 21617 799 0.7 1338.4 1.0X +SQL Json 15822 16049 321 1.0 1005.9 1.3X +SQL Parquet Vectorized: DataPageV1 266 286 19 59.0 16.9 79.0X +SQL Parquet Vectorized: DataPageV2 277 291 14 56.8 17.6 76.0X +SQL Parquet MR: DataPageV1 2267 2275 12 6.9 144.1 9.3X +SQL Parquet MR: DataPageV2 2046 2064 26 7.7 130.1 10.3X +SQL ORC Vectorized 535 545 10 29.4 34.0 39.3X +SQL ORC MR 1976 2000 34 8.0 125.6 10.7X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Parquet Reader Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 355 356 1 44.3 22.6 1.0X -ParquetReader Vectorized: DataPageV2 355 356 1 44.3 22.6 1.0X -ParquetReader Vectorized -> Row: DataPageV1 379 386 9 41.5 24.1 0.9X -ParquetReader Vectorized -> Row: DataPageV2 379 389 10 41.5 24.1 0.9X +ParquetReader Vectorized: DataPageV1 314 337 25 50.1 20.0 1.0X +ParquetReader Vectorized: DataPageV2 309 323 14 50.8 19.7 1.0X +ParquetReader Vectorized -> Row: DataPageV1 331 348 13 47.5 21.1 0.9X +ParquetReader Vectorized -> Row: DataPageV2 332 347 11 47.4 21.1 0.9X + + +================================================================================================ +SQL Single Numeric Column Scan in Struct +================================================================================================ + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +SQL Single TINYINT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 2193 2196 4 7.2 139.5 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2211 2222 16 7.1 140.6 1.0X +SQL ORC Vectorized (Nested Column Enabled) 268 310 32 58.7 17.0 8.2X +SQL Parquet MR: DataPageV1 2243 2280 53 7.0 142.6 1.0X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 2747 2758 16 5.7 174.6 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 155 174 22 101.7 9.8 14.2X +SQL Parquet MR: DataPageV2 2193 2203 13 7.2 139.5 1.0X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2709 2733 33 5.8 172.3 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 150 174 27 104.7 9.6 14.6X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +SQL Single SMALLINT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 2322 2391 97 6.8 147.6 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2362 2374 17 6.7 150.2 1.0X +SQL ORC Vectorized (Nested Column Enabled) 412 419 9 38.2 26.2 5.6X +SQL Parquet MR: DataPageV1 2393 2400 10 6.6 152.1 1.0X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 2919 2922 4 5.4 185.6 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 228 281 54 69.0 14.5 10.2X +SQL Parquet MR: DataPageV2 2223 2240 25 7.1 141.3 1.0X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2692 2712 28 5.8 171.2 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 341 361 31 46.1 21.7 6.8X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +SQL Single INT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 2376 2380 6 6.6 151.0 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2333 2378 64 6.7 148.4 1.0X +SQL ORC Vectorized (Nested Column Enabled) 430 451 20 36.6 27.3 5.5X +SQL Parquet MR: DataPageV1 2485 2501 22 6.3 158.0 1.0X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3017 3062 65 5.2 191.8 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 189 235 34 83.1 12.0 12.6X +SQL Parquet MR: DataPageV2 2356 2376 29 6.7 149.8 1.0X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2823 2831 12 5.6 179.5 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 408 447 25 38.5 26.0 5.8X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +SQL Single BIGINT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 2614 2633 26 6.0 166.2 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2711 2776 91 5.8 172.4 1.0X +SQL ORC Vectorized (Nested Column Enabled) 556 598 35 28.3 35.4 4.7X +SQL Parquet MR: DataPageV1 2671 2673 2 5.9 169.8 1.0X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3148 3172 34 5.0 200.2 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 282 369 63 55.8 17.9 9.3X +SQL Parquet MR: DataPageV2 2430 2443 19 6.5 154.5 1.1X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3047 3119 101 5.2 193.7 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 577 628 62 27.3 36.7 4.5X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +SQL Single FLOAT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 2516 2527 15 6.3 160.0 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2588 2591 4 6.1 164.5 1.0X +SQL ORC Vectorized (Nested Column Enabled) 577 589 10 27.3 36.7 4.4X +SQL Parquet MR: DataPageV1 2446 2480 49 6.4 155.5 1.0X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3011 3018 10 5.2 191.4 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 173 263 70 91.2 11.0 14.6X +SQL Parquet MR: DataPageV2 2204 2216 16 7.1 140.2 1.1X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 2733 2758 35 5.8 173.8 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 173 207 24 90.8 11.0 14.5X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +SQL Single DOUBLE Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 2566 2632 92 6.1 163.2 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2627 2642 21 6.0 167.0 1.0X +SQL ORC Vectorized (Nested Column Enabled) 678 690 16 23.2 43.1 3.8X +SQL Parquet MR: DataPageV1 2497 2501 6 6.3 158.8 1.0X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3726 3752 36 4.2 236.9 0.7X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 295 401 60 53.4 18.7 8.7X +SQL Parquet MR: DataPageV2 2417 2464 66 6.5 153.7 1.1X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3682 3697 20 4.3 234.1 0.7X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 293 303 14 53.7 18.6 8.8X + + +================================================================================================ +SQL Nested Column Scan +================================================================================================ + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +SQL Nested Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 20991 21114 172 0.0 20018.5 1.0X +SQL ORC Vectorized (Nested Column Disabled) 20899 21160 293 0.1 19931.0 1.0X +SQL ORC Vectorized (Nested Column Enabled) 9512 9580 75 0.1 9071.5 2.2X +SQL Parquet MR: DataPageV1 16203 16490 305 0.1 15452.5 1.3X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 16718 16851 128 0.1 15943.3 1.3X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 11510 11671 130 0.1 10976.8 1.8X +SQL Parquet MR: DataPageV2 15935 16063 109 0.1 15197.1 1.3X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 16648 16869 193 0.1 15876.4 1.3X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 8865 8996 87 0.1 8454.6 2.4X ================================================================================================ Int and String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Int and String Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 15733 15738 8 0.7 1500.4 1.0X -SQL Json 11953 11969 22 0.9 1140.0 1.3X -SQL Parquet Vectorized: DataPageV1 2100 2137 52 5.0 200.2 7.5X -SQL Parquet Vectorized: DataPageV2 2525 2535 14 4.2 240.8 6.2X -SQL Parquet MR: DataPageV1 4075 4110 49 2.6 388.6 3.9X -SQL Parquet MR: DataPageV2 3991 4014 34 2.6 380.6 3.9X -SQL ORC Vectorized 2323 2355 45 4.5 221.5 6.8X -SQL ORC MR 3776 3882 150 2.8 360.1 4.2X +SQL CSV 14365 14389 34 0.7 1369.9 1.0X +SQL Json 11768 11819 73 0.9 1122.2 1.2X +SQL Parquet Vectorized: DataPageV1 2037 2047 14 5.1 194.2 7.1X +SQL Parquet Vectorized: DataPageV2 2460 2468 11 4.3 234.6 5.8X +SQL Parquet MR: DataPageV1 4289 4334 64 2.4 409.0 3.3X +SQL Parquet MR: DataPageV2 4098 4149 72 2.6 390.8 3.5X +SQL ORC Vectorized 2183 2206 33 4.8 208.2 6.6X +SQL ORC MR 3657 3697 56 2.9 348.8 3.9X ================================================================================================ Repeated String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Repeated String: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 8921 8966 63 1.2 850.7 1.0X -SQL Json 7215 7218 5 1.5 688.1 1.2X -SQL Parquet Vectorized: DataPageV1 604 627 23 17.3 57.6 14.8X -SQL Parquet Vectorized: DataPageV2 606 620 18 17.3 57.8 14.7X -SQL Parquet MR: DataPageV1 1686 1693 10 6.2 160.8 5.3X -SQL Parquet MR: DataPageV2 1660 1665 8 6.3 158.3 5.4X -SQL ORC Vectorized 541 548 7 19.4 51.6 16.5X -SQL ORC MR 1920 1930 13 5.5 183.1 4.6X +SQL CSV 8224 8262 53 1.3 784.3 1.0X +SQL Json 6795 6809 20 1.5 648.1 1.2X +SQL Parquet Vectorized: DataPageV1 587 602 14 17.9 56.0 14.0X +SQL Parquet Vectorized: DataPageV2 563 592 26 18.6 53.7 14.6X +SQL Parquet MR: DataPageV1 1682 1693 15 6.2 160.4 4.9X +SQL Parquet MR: DataPageV2 1562 1593 44 6.7 149.0 5.3X +SQL ORC Vectorized 447 491 52 23.5 42.6 18.4X +SQL ORC MR 1803 1835 46 5.8 171.9 4.6X ================================================================================================ Partitioned Table Scan ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Partitioned Table: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------------- -Data column - CSV 21951 21976 36 0.7 1395.6 1.0X -Data column - Json 12896 12905 14 1.2 819.9 1.7X -Data column - Parquet Vectorized: DataPageV1 247 307 48 63.6 15.7 88.7X -Data column - Parquet Vectorized: DataPageV2 657 686 25 23.9 41.8 33.4X -Data column - Parquet MR: DataPageV1 2705 2708 3 5.8 172.0 8.1X -Data column - Parquet MR: DataPageV2 2621 2621 0 6.0 166.6 8.4X -Data column - ORC Vectorized 440 468 30 35.7 28.0 49.9X -Data column - ORC MR 2553 2565 17 6.2 162.3 8.6X -Partition column - CSV 6640 6641 1 2.4 422.2 3.3X -Partition column - Json 10499 10512 19 1.5 667.5 2.1X -Partition column - Parquet Vectorized: DataPageV1 60 79 24 261.4 3.8 364.8X -Partition column - Parquet Vectorized: DataPageV2 58 81 26 270.2 3.7 377.0X -Partition column - Parquet MR: DataPageV1 1387 1412 35 11.3 88.2 15.8X -Partition column - Parquet MR: DataPageV2 1383 1407 34 11.4 87.9 15.9X -Partition column - ORC Vectorized 61 85 25 256.8 3.9 358.4X -Partition column - ORC MR 1552 1553 1 10.1 98.7 14.1X -Both columns - CSV 21896 21919 32 0.7 1392.1 1.0X -Both columns - Json 13645 13664 27 1.2 867.5 1.6X -Both columns - Parquet Vectorized: DataPageV1 307 351 33 51.3 19.5 71.6X -Both columns - Parquet Vectorized: DataPageV2 698 740 36 22.5 44.4 31.4X -Both columns - Parquet MR: DataPageV1 2804 2821 24 5.6 178.3 7.8X -Both columns - Parquet MR: DataPageV2 2624 2636 16 6.0 166.8 8.4X -Both columns - ORC Vectorized 462 521 53 34.0 29.4 47.5X -Both columns - ORC MR 2564 2580 22 6.1 163.0 8.6X +Data column - CSV 19758 19973 303 0.8 1256.2 1.0X +Data column - Json 12715 12800 120 1.2 808.4 1.6X +Data column - Parquet Vectorized: DataPageV1 260 274 12 60.5 16.5 76.0X +Data column - Parquet Vectorized: DataPageV2 639 673 37 24.6 40.6 30.9X +Data column - Parquet MR: DataPageV1 2598 2638 57 6.1 165.2 7.6X +Data column - Parquet MR: DataPageV2 2535 2543 11 6.2 161.2 7.8X +Data column - ORC Vectorized 397 424 21 39.6 25.3 49.7X +Data column - ORC MR 2252 2378 178 7.0 143.2 8.8X +Partition column - CSV 6023 6057 48 2.6 382.9 3.3X +Partition column - Json 10133 10275 202 1.6 644.2 1.9X +Partition column - Parquet Vectorized: DataPageV1 50 67 18 313.5 3.2 393.9X +Partition column - Parquet Vectorized: DataPageV2 49 63 15 319.8 3.1 401.8X +Partition column - Parquet MR: DataPageV1 1288 1339 72 12.2 81.9 15.3X +Partition column - Parquet MR: DataPageV2 1347 1357 15 11.7 85.6 14.7X +Partition column - ORC Vectorized 53 69 19 299.6 3.3 376.3X +Partition column - ORC MR 1456 1512 79 10.8 92.6 13.6X +Both columns - CSV 19667 19925 364 0.8 1250.4 1.0X +Both columns - Json 14112 14138 36 1.1 897.2 1.4X +Both columns - Parquet Vectorized: DataPageV1 319 334 11 49.4 20.3 62.0X +Both columns - Parquet Vectorized: DataPageV2 725 735 14 21.7 46.1 27.3X +Both columns - Parquet MR: DataPageV1 2815 2848 47 5.6 179.0 7.0X +Both columns - Parquet MR: DataPageV2 2582 2638 80 6.1 164.1 7.7X +Both columns - ORC Vectorized 449 509 48 35.0 28.5 44.0X +Both columns - ORC MR 2437 2439 2 6.5 155.0 8.1X ================================================================================================ String with Nulls Scan ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz String with Nulls Scan (0.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 10818 10826 11 1.0 1031.6 1.0X -SQL Json 10812 10833 29 1.0 1031.2 1.0X -SQL Parquet Vectorized: DataPageV1 1301 1312 15 8.1 124.1 8.3X -SQL Parquet Vectorized: DataPageV2 1953 1982 42 5.4 186.2 5.5X -SQL Parquet MR: DataPageV1 3677 3680 5 2.9 350.6 2.9X -SQL Parquet MR: DataPageV2 3970 3972 2 2.6 378.6 2.7X -ParquetReader Vectorized: DataPageV1 1004 1016 16 10.4 95.8 10.8X -ParquetReader Vectorized: DataPageV2 1606 1622 22 6.5 153.2 6.7X -SQL ORC Vectorized 1160 1182 30 9.0 110.7 9.3X -SQL ORC MR 3266 3330 90 3.2 311.4 3.3X - -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 9544 9547 5 1.1 910.2 1.0X +SQL Json 10015 10102 123 1.0 955.1 1.0X +SQL Parquet Vectorized: DataPageV1 1381 1399 26 7.6 131.7 6.9X +SQL Parquet Vectorized: DataPageV2 1778 1780 2 5.9 169.6 5.4X +SQL Parquet MR: DataPageV1 3675 3708 47 2.9 350.5 2.6X +SQL Parquet MR: DataPageV2 3778 3812 47 2.8 360.3 2.5X +ParquetReader Vectorized: DataPageV1 937 954 24 11.2 89.4 10.2X +ParquetReader Vectorized: DataPageV2 1438 1440 4 7.3 137.1 6.6X +SQL ORC Vectorized 1061 1065 5 9.9 101.2 9.0X +SQL ORC MR 2899 2937 54 3.6 276.5 3.3X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz String with Nulls Scan (50.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 7971 7981 15 1.3 760.2 1.0X -SQL Json 8266 8269 3 1.3 788.4 1.0X -SQL Parquet Vectorized: DataPageV1 1025 1036 15 10.2 97.8 7.8X -SQL Parquet Vectorized: DataPageV2 1432 1440 11 7.3 136.6 5.6X -SQL Parquet MR: DataPageV1 2792 2806 20 3.8 266.3 2.9X -SQL Parquet MR: DataPageV2 2958 2992 47 3.5 282.1 2.7X -ParquetReader Vectorized: DataPageV1 1010 1024 20 10.4 96.3 7.9X -ParquetReader Vectorized: DataPageV2 1331 1335 4 7.9 127.0 6.0X -SQL ORC Vectorized 1266 1271 6 8.3 120.8 6.3X -SQL ORC MR 3032 3089 81 3.5 289.2 2.6X - -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 7267 7292 35 1.4 693.0 1.0X +SQL Json 7658 7728 99 1.4 730.4 0.9X +SQL Parquet Vectorized: DataPageV1 1021 1055 48 10.3 97.4 7.1X +SQL Parquet Vectorized: DataPageV2 1411 1412 1 7.4 134.6 5.1X +SQL Parquet MR: DataPageV1 2866 2883 24 3.7 273.3 2.5X +SQL Parquet MR: DataPageV2 2880 2899 26 3.6 274.7 2.5X +ParquetReader Vectorized: DataPageV1 1000 1065 93 10.5 95.3 7.3X +ParquetReader Vectorized: DataPageV2 1288 1294 9 8.1 122.8 5.6X +SQL ORC Vectorized 1274 1311 51 8.2 121.5 5.7X +SQL ORC MR 2818 2884 92 3.7 268.8 2.6X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz String with Nulls Scan (95.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 5829 5833 5 1.8 555.9 1.0X -SQL Json 4966 4978 17 2.1 473.6 1.2X -SQL Parquet Vectorized: DataPageV1 236 244 7 44.5 22.5 24.7X -SQL Parquet Vectorized: DataPageV2 305 315 13 34.4 29.1 19.1X -SQL Parquet MR: DataPageV1 1777 1784 10 5.9 169.5 3.3X -SQL Parquet MR: DataPageV2 1635 1637 4 6.4 155.9 3.6X -ParquetReader Vectorized: DataPageV1 242 246 2 43.2 23.1 24.0X -ParquetReader Vectorized: DataPageV2 309 313 7 34.0 29.5 18.9X -SQL ORC Vectorized 391 419 53 26.8 37.3 14.9X -SQL ORC MR 1686 1687 1 6.2 160.8 3.5X +SQL CSV 5408 5434 38 1.9 515.7 1.0X +SQL Json 4570 4693 175 2.3 435.8 1.2X +SQL Parquet Vectorized: DataPageV1 254 274 25 41.3 24.2 21.3X +SQL Parquet Vectorized: DataPageV2 316 336 22 33.2 30.1 17.1X +SQL Parquet MR: DataPageV1 1738 1768 42 6.0 165.8 3.1X +SQL Parquet MR: DataPageV2 1613 1619 10 6.5 153.8 3.4X +ParquetReader Vectorized: DataPageV1 265 274 10 39.5 25.3 20.4X +ParquetReader Vectorized: DataPageV2 326 335 9 32.1 31.1 16.6X +SQL ORC Vectorized 383 407 37 27.4 36.5 14.1X +SQL ORC MR 1543 1550 11 6.8 147.1 3.5X ================================================================================================ Single Column Scan From Wide Columns ================================================================================================ -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Single Column Scan from 10 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 2301 2305 6 0.5 2194.0 1.0X -SQL Json 2874 2895 29 0.4 2741.1 0.8X -SQL Parquet Vectorized: DataPageV1 47 66 20 22.3 44.8 48.9X -SQL Parquet Vectorized: DataPageV2 74 90 25 14.2 70.5 31.1X -SQL Parquet MR: DataPageV1 198 219 26 5.3 189.0 11.6X -SQL Parquet MR: DataPageV2 178 207 45 5.9 170.1 12.9X -SQL ORC Vectorized 59 76 20 17.6 56.7 38.7X -SQL ORC MR 173 193 24 6.1 164.6 13.3X - -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 2192 2196 6 0.5 2090.2 1.0X +SQL Json 2994 3016 31 0.4 2855.4 0.7X +SQL Parquet Vectorized: DataPageV1 42 58 16 25.2 39.6 52.8X +SQL Parquet Vectorized: DataPageV2 62 79 19 16.8 59.5 35.2X +SQL Parquet MR: DataPageV1 184 201 22 5.7 175.6 11.9X +SQL Parquet MR: DataPageV2 171 192 26 6.1 163.0 12.8X +SQL ORC Vectorized 52 74 27 20.2 49.5 42.2X +SQL ORC MR 143 167 25 7.4 136.0 15.4X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Single Column Scan from 50 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 5418 5425 9 0.2 5167.2 1.0X -SQL Json 11463 11574 156 0.1 10932.3 0.5X -SQL Parquet Vectorized: DataPageV1 66 103 28 15.8 63.4 81.5X -SQL Parquet Vectorized: DataPageV2 90 115 27 11.7 85.5 60.4X -SQL Parquet MR: DataPageV1 218 234 23 4.8 208.3 24.8X -SQL Parquet MR: DataPageV2 199 225 29 5.3 190.1 27.2X -SQL ORC Vectorized 76 106 31 13.7 72.8 71.0X -SQL ORC MR 193 216 28 5.4 184.2 28.0X - -OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 5245 5333 126 0.2 5001.7 1.0X +SQL Json 11916 12064 210 0.1 11363.9 0.4X +SQL Parquet Vectorized: DataPageV1 56 91 28 18.6 53.7 93.1X +SQL Parquet Vectorized: DataPageV2 76 99 22 13.7 72.9 68.6X +SQL Parquet MR: DataPageV1 194 221 33 5.4 185.5 27.0X +SQL Parquet MR: DataPageV2 184 211 23 5.7 175.0 28.6X +SQL ORC Vectorized 64 89 27 16.5 60.7 82.4X +SQL ORC MR 151 177 30 6.9 144.3 34.7X + +OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Single Column Scan from 100 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 9430 9430 0 0.1 8993.3 1.0X -SQL Json 21268 21347 111 0.0 20283.1 0.4X -SQL Parquet Vectorized: DataPageV1 97 124 24 10.9 92.1 97.6X -SQL Parquet Vectorized: DataPageV2 119 136 19 8.8 113.6 79.2X -SQL Parquet MR: DataPageV1 254 285 35 4.1 242.1 37.1X -SQL Parquet MR: DataPageV2 231 260 30 4.5 220.0 40.9X -SQL ORC Vectorized 95 119 31 11.1 90.4 99.5X -SQL ORC MR 214 219 5 4.9 203.6 44.2X +SQL CSV 9296 9407 157 0.1 8865.0 1.0X +SQL Json 23191 23509 450 0.0 22116.5 0.4X +SQL Parquet Vectorized: DataPageV1 87 119 24 12.1 82.5 107.5X +SQL Parquet Vectorized: DataPageV2 105 125 21 10.0 100.2 88.5X +SQL Parquet MR: DataPageV1 226 253 34 4.6 215.7 41.1X +SQL Parquet MR: DataPageV2 232 263 31 4.5 221.2 40.1X +SQL ORC Vectorized 78 101 20 13.4 74.6 118.9X +SQL ORC MR 173 191 28 6.1 164.8 53.8X diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-jdk17-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-jdk17-results.txt index 8ff176457af10..6f52338a78df9 100644 --- a/sql/core/benchmarks/DataSourceReadBenchmark-jdk17-results.txt +++ b/sql/core/benchmarks/DataSourceReadBenchmark-jdk17-results.txt @@ -2,322 +2,430 @@ SQL Single Numeric Column Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz SQL Single BOOLEAN Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 9610 10067 646 1.6 611.0 1.0X -SQL Json 8316 8410 133 1.9 528.7 1.2X -SQL Parquet Vectorized: DataPageV1 123 145 10 127.7 7.8 78.0X -SQL Parquet Vectorized: DataPageV2 93 108 12 170.0 5.9 103.8X -SQL Parquet MR: DataPageV1 1766 1768 2 8.9 112.3 5.4X -SQL Parquet MR: DataPageV2 1540 1543 3 10.2 97.9 6.2X -SQL ORC Vectorized 175 182 6 89.6 11.2 54.8X -SQL ORC MR 1517 1533 22 10.4 96.5 6.3X - -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 12773 12910 194 1.2 812.1 1.0X +SQL Json 9907 10034 178 1.6 629.9 1.3X +SQL Parquet Vectorized: DataPageV1 135 164 24 116.1 8.6 94.3X +SQL Parquet Vectorized: DataPageV2 100 110 8 157.6 6.3 127.9X +SQL Parquet MR: DataPageV1 2176 2196 29 7.2 138.3 5.9X +SQL Parquet MR: DataPageV2 1974 1995 30 8.0 125.5 6.5X +SQL ORC Vectorized 203 215 9 77.4 12.9 62.9X +SQL ORC MR 1897 1909 18 8.3 120.6 6.7X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Parquet Reader Single BOOLEAN Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 61 63 2 256.3 3.9 1.0X -ParquetReader Vectorized: DataPageV2 44 45 2 356.3 2.8 1.4X -ParquetReader Vectorized -> Row: DataPageV1 51 51 1 311.3 3.2 1.2X -ParquetReader Vectorized -> Row: DataPageV2 32 33 2 492.4 2.0 1.9X +ParquetReader Vectorized: DataPageV1 78 79 2 201.6 5.0 1.0X +ParquetReader Vectorized: DataPageV2 54 56 2 291.5 3.4 1.4X +ParquetReader Vectorized -> Row: DataPageV1 58 61 4 273.1 3.7 1.4X +ParquetReader Vectorized -> Row: DataPageV2 34 36 2 459.2 2.2 2.3X -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz SQL Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 14866 14885 26 1.1 945.2 1.0X -SQL Json 9585 9586 3 1.6 609.4 1.6X -SQL Parquet Vectorized: DataPageV1 119 131 12 132.4 7.6 125.2X -SQL Parquet Vectorized: DataPageV2 119 125 5 132.0 7.6 124.7X -SQL Parquet MR: DataPageV1 1954 2025 101 8.0 124.2 7.6X -SQL Parquet MR: DataPageV2 1800 1824 35 8.7 114.4 8.3X -SQL ORC Vectorized 169 176 6 93.0 10.8 87.9X -SQL ORC MR 1432 1467 50 11.0 91.0 10.4X - -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 15279 15317 53 1.0 971.4 1.0X +SQL Json 11724 11763 54 1.3 745.4 1.3X +SQL Parquet Vectorized: DataPageV1 136 149 10 115.5 8.7 112.2X +SQL Parquet Vectorized: DataPageV2 134 145 9 117.2 8.5 113.8X +SQL Parquet MR: DataPageV1 2340 2412 101 6.7 148.8 6.5X +SQL Parquet MR: DataPageV2 2356 2359 5 6.7 149.8 6.5X +SQL ORC Vectorized 200 216 11 78.5 12.7 76.2X +SQL ORC MR 1808 1814 9 8.7 114.9 8.5X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Parquet Reader Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 118 120 2 133.0 7.5 1.0X -ParquetReader Vectorized: DataPageV2 119 120 2 132.6 7.5 1.0X -ParquetReader Vectorized -> Row: DataPageV1 72 73 2 218.1 4.6 1.6X -ParquetReader Vectorized -> Row: DataPageV2 72 74 2 217.7 4.6 1.6X +ParquetReader Vectorized: DataPageV1 134 141 5 117.3 8.5 1.0X +ParquetReader Vectorized: DataPageV2 131 140 7 119.7 8.4 1.0X +ParquetReader Vectorized -> Row: DataPageV1 83 89 5 188.4 5.3 1.6X +ParquetReader Vectorized -> Row: DataPageV2 83 88 4 188.4 5.3 1.6X -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz SQL Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 14601 14699 139 1.1 928.3 1.0X -SQL Json 9446 9517 101 1.7 600.5 1.5X -SQL Parquet Vectorized: DataPageV1 156 168 15 101.1 9.9 93.8X -SQL Parquet Vectorized: DataPageV2 197 213 15 79.6 12.6 73.9X -SQL Parquet MR: DataPageV1 2113 2130 23 7.4 134.4 6.9X -SQL Parquet MR: DataPageV2 1739 1784 64 9.0 110.5 8.4X -SQL ORC Vectorized 192 205 10 81.9 12.2 76.0X -SQL ORC MR 1518 1588 100 10.4 96.5 9.6X - -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 16066 16127 87 1.0 1021.4 1.0X +SQL Json 12056 12123 94 1.3 766.5 1.3X +SQL Parquet Vectorized: DataPageV1 159 174 9 98.7 10.1 100.8X +SQL Parquet Vectorized: DataPageV2 235 253 9 66.8 15.0 68.2X +SQL Parquet MR: DataPageV1 2563 2571 11 6.1 163.0 6.3X +SQL Parquet MR: DataPageV2 2457 2494 51 6.4 156.2 6.5X +SQL ORC Vectorized 239 257 15 65.9 15.2 67.3X +SQL ORC MR 2067 2071 4 7.6 131.4 7.8X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Parquet Reader Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 215 221 6 73.2 13.7 1.0X -ParquetReader Vectorized: DataPageV2 269 278 8 58.5 17.1 0.8X -ParquetReader Vectorized -> Row: DataPageV1 206 208 2 76.2 13.1 1.0X -ParquetReader Vectorized -> Row: DataPageV2 244 262 10 64.4 15.5 0.9X +ParquetReader Vectorized: DataPageV1 247 257 7 63.7 15.7 1.0X +ParquetReader Vectorized: DataPageV2 323 337 16 48.7 20.5 0.8X +ParquetReader Vectorized -> Row: DataPageV1 244 250 5 64.5 15.5 1.0X +ParquetReader Vectorized -> Row: DataPageV2 306 315 10 51.5 19.4 0.8X -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz SQL Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 15886 16086 282 1.0 1010.0 1.0X -SQL Json 9872 9880 12 1.6 627.6 1.6X -SQL Parquet Vectorized: DataPageV1 174 195 22 90.4 11.1 91.3X -SQL Parquet Vectorized: DataPageV2 393 409 16 40.0 25.0 40.4X -SQL Parquet MR: DataPageV1 1953 2064 157 8.1 124.2 8.1X -SQL Parquet MR: DataPageV2 2215 2231 23 7.1 140.8 7.2X -SQL ORC Vectorized 280 314 22 56.1 17.8 56.7X -SQL ORC MR 1681 1706 35 9.4 106.9 9.5X - -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 17633 17752 168 0.9 1121.1 1.0X +SQL Json 12979 13019 57 1.2 825.2 1.4X +SQL Parquet Vectorized: DataPageV1 158 167 7 99.5 10.0 111.6X +SQL Parquet Vectorized: DataPageV2 335 346 12 46.9 21.3 52.6X +SQL Parquet MR: DataPageV1 2597 2601 6 6.1 165.1 6.8X +SQL Parquet MR: DataPageV2 2497 2529 45 6.3 158.7 7.1X +SQL ORC Vectorized 285 294 8 55.2 18.1 61.9X +SQL ORC MR 2022 2107 120 7.8 128.6 8.7X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Parquet Reader Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 253 263 8 62.1 16.1 1.0X -ParquetReader Vectorized: DataPageV2 450 461 15 34.9 28.6 0.6X -ParquetReader Vectorized -> Row: DataPageV1 241 253 12 65.2 15.3 1.1X -ParquetReader Vectorized -> Row: DataPageV2 437 448 14 36.0 27.8 0.6X +ParquetReader Vectorized: DataPageV1 243 265 15 64.7 15.4 1.0X +ParquetReader Vectorized: DataPageV2 416 420 5 37.8 26.5 0.6X +ParquetReader Vectorized -> Row: DataPageV1 282 288 6 55.8 17.9 0.9X +ParquetReader Vectorized -> Row: DataPageV2 445 456 11 35.4 28.3 0.5X -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz SQL Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 20641 20744 145 0.8 1312.3 1.0X -SQL Json 13055 13122 95 1.2 830.0 1.6X -SQL Parquet Vectorized: DataPageV1 246 267 16 63.8 15.7 83.8X -SQL Parquet Vectorized: DataPageV2 513 532 16 30.7 32.6 40.2X -SQL Parquet MR: DataPageV1 2354 2387 47 6.7 149.7 8.8X -SQL Parquet MR: DataPageV2 2118 2148 43 7.4 134.6 9.7X -SQL ORC Vectorized 418 437 17 37.6 26.6 49.4X -SQL ORC MR 1808 1852 61 8.7 115.0 11.4X - -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 23061 23236 247 0.7 1466.2 1.0X +SQL Json 16191 16295 147 1.0 1029.4 1.4X +SQL Parquet Vectorized: DataPageV1 230 241 8 68.5 14.6 100.5X +SQL Parquet Vectorized: DataPageV2 465 474 17 33.8 29.5 49.6X +SQL Parquet MR: DataPageV1 2863 2949 122 5.5 182.0 8.1X +SQL Parquet MR: DataPageV2 2556 2578 31 6.2 162.5 9.0X +SQL ORC Vectorized 400 421 25 39.3 25.4 57.6X +SQL ORC MR 2333 2352 26 6.7 148.4 9.9X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Parquet Reader Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 306 315 5 51.5 19.4 1.0X -ParquetReader Vectorized: DataPageV2 584 591 11 26.9 37.1 0.5X -ParquetReader Vectorized -> Row: DataPageV1 288 299 14 54.6 18.3 1.1X -ParquetReader Vectorized -> Row: DataPageV2 549 557 8 28.6 34.9 0.6X +ParquetReader Vectorized: DataPageV1 343 349 7 45.8 21.8 1.0X +ParquetReader Vectorized: DataPageV2 577 590 24 27.2 36.7 0.6X +ParquetReader Vectorized -> Row: DataPageV1 390 402 14 40.3 24.8 0.9X +ParquetReader Vectorized -> Row: DataPageV2 620 634 10 25.4 39.4 0.6X -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz SQL Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 17024 17292 378 0.9 1082.4 1.0X -SQL Json 11724 11904 255 1.3 745.4 1.5X -SQL Parquet Vectorized: DataPageV1 174 186 11 90.6 11.0 98.1X -SQL Parquet Vectorized: DataPageV2 173 189 14 90.9 11.0 98.4X -SQL Parquet MR: DataPageV1 1932 2037 148 8.1 122.9 8.8X -SQL Parquet MR: DataPageV2 1947 1976 41 8.1 123.8 8.7X -SQL ORC Vectorized 432 459 36 36.4 27.5 39.4X -SQL ORC MR 1984 1985 1 7.9 126.1 8.6X - -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 18313 18370 81 0.9 1164.3 1.0X +SQL Json 15439 15522 118 1.0 981.6 1.2X +SQL Parquet Vectorized: DataPageV1 190 219 28 82.9 12.1 96.5X +SQL Parquet Vectorized: DataPageV2 166 197 21 94.6 10.6 110.2X +SQL Parquet MR: DataPageV1 2588 2667 111 6.1 164.5 7.1X +SQL Parquet MR: DataPageV2 2350 2438 124 6.7 149.4 7.8X +SQL ORC Vectorized 446 476 23 35.3 28.3 41.1X +SQL ORC MR 2280 2305 36 6.9 145.0 8.0X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Parquet Reader Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 257 259 2 61.2 16.3 1.0X -ParquetReader Vectorized: DataPageV2 239 254 10 65.8 15.2 1.1X -ParquetReader Vectorized -> Row: DataPageV1 259 260 1 60.8 16.4 1.0X -ParquetReader Vectorized -> Row: DataPageV2 258 262 6 61.0 16.4 1.0X +ParquetReader Vectorized: DataPageV1 287 291 4 54.8 18.2 1.0X +ParquetReader Vectorized: DataPageV2 271 289 9 57.9 17.3 1.1X +ParquetReader Vectorized -> Row: DataPageV1 260 274 11 60.5 16.5 1.1X +ParquetReader Vectorized -> Row: DataPageV2 258 274 15 61.0 16.4 1.1X -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz SQL Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 22592 22594 4 0.7 1436.3 1.0X -SQL Json 16252 16271 26 1.0 1033.3 1.4X -SQL Parquet Vectorized: DataPageV1 247 271 22 63.6 15.7 91.3X -SQL Parquet Vectorized: DataPageV2 252 266 14 62.4 16.0 89.6X -SQL Parquet MR: DataPageV1 2337 2352 21 6.7 148.6 9.7X -SQL Parquet MR: DataPageV2 2187 2223 50 7.2 139.1 10.3X -SQL ORC Vectorized 489 526 25 32.2 31.1 46.2X -SQL ORC MR 1816 1892 107 8.7 115.5 12.4X - -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 23580 23698 167 0.7 1499.2 1.0X +SQL Json 20420 20476 79 0.8 1298.2 1.2X +SQL Parquet Vectorized: DataPageV1 259 297 51 60.6 16.5 90.9X +SQL Parquet Vectorized: DataPageV2 246 280 26 64.1 15.6 96.0X +SQL Parquet MR: DataPageV1 2743 2795 74 5.7 174.4 8.6X +SQL Parquet MR: DataPageV2 2495 2512 24 6.3 158.6 9.5X +SQL ORC Vectorized 505 554 34 31.1 32.1 46.7X +SQL ORC MR 2245 2269 33 7.0 142.7 10.5X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Parquet Reader Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 291 304 8 54.0 18.5 1.0X -ParquetReader Vectorized: DataPageV2 298 309 7 52.9 18.9 1.0X -ParquetReader Vectorized -> Row: DataPageV1 330 338 16 47.7 21.0 0.9X -ParquetReader Vectorized -> Row: DataPageV2 331 338 12 47.5 21.1 0.9X +ParquetReader Vectorized: DataPageV1 362 369 7 43.5 23.0 1.0X +ParquetReader Vectorized: DataPageV2 358 368 6 43.9 22.8 1.0X +ParquetReader Vectorized -> Row: DataPageV1 359 363 5 43.9 22.8 1.0X +ParquetReader Vectorized -> Row: DataPageV2 359 362 3 43.8 22.8 1.0X + + +================================================================================================ +SQL Single Numeric Column Scan in Struct +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +SQL Single TINYINT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 2949 2960 15 5.3 187.5 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2814 2861 67 5.6 178.9 1.0X +SQL ORC Vectorized (Nested Column Enabled) 316 337 33 49.8 20.1 9.3X +SQL Parquet MR: DataPageV1 2954 2986 44 5.3 187.8 1.0X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3668 3701 46 4.3 233.2 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 188 262 91 83.9 11.9 15.7X +SQL Parquet MR: DataPageV2 2726 2795 97 5.8 173.3 1.1X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3511 3551 56 4.5 223.2 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 185 199 10 84.9 11.8 15.9X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +SQL Single SMALLINT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 2823 2929 150 5.6 179.5 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2836 2895 83 5.5 180.3 1.0X +SQL ORC Vectorized (Nested Column Enabled) 465 519 48 33.8 29.5 6.1X +SQL Parquet MR: DataPageV1 3441 3457 22 4.6 218.8 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3948 4013 92 4.0 251.0 0.7X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 181 220 21 86.8 11.5 15.6X +SQL Parquet MR: DataPageV2 2928 2995 95 5.4 186.2 1.0X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3577 3621 62 4.4 227.4 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 432 459 31 36.4 27.4 6.5X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +SQL Single INT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 3029 3032 4 5.2 192.6 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2961 3007 64 5.3 188.3 1.0X +SQL ORC Vectorized (Nested Column Enabled) 488 527 30 32.2 31.0 6.2X +SQL Parquet MR: DataPageV1 3403 3434 45 4.6 216.3 0.9X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 4088 4266 252 3.8 259.9 0.7X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 209 227 17 75.3 13.3 14.5X +SQL Parquet MR: DataPageV2 3189 3192 5 4.9 202.7 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3820 3843 32 4.1 242.9 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 505 528 17 31.2 32.1 6.0X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +SQL Single BIGINT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 3153 3203 69 5.0 200.5 1.0X +SQL ORC Vectorized (Nested Column Disabled) 3002 3023 29 5.2 190.9 1.1X +SQL ORC Vectorized (Nested Column Enabled) 626 651 27 25.1 39.8 5.0X +SQL Parquet MR: DataPageV1 3701 3705 5 4.2 235.3 0.9X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 4168 4189 30 3.8 265.0 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 296 316 36 53.2 18.8 10.7X +SQL Parquet MR: DataPageV2 3076 3179 145 5.1 195.6 1.0X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3769 3869 141 4.2 239.6 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 485 500 13 32.4 30.9 6.5X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +SQL Single FLOAT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 3035 3113 111 5.2 192.9 1.0X +SQL ORC Vectorized (Nested Column Disabled) 3214 3249 49 4.9 204.4 0.9X +SQL ORC Vectorized (Nested Column Enabled) 706 713 7 22.3 44.9 4.3X +SQL Parquet MR: DataPageV1 3190 3225 50 4.9 202.8 1.0X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3711 3795 119 4.2 235.9 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 167 179 7 94.0 10.6 18.1X +SQL Parquet MR: DataPageV2 2722 2725 4 5.8 173.1 1.1X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3357 3429 102 4.7 213.4 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 176 194 19 89.2 11.2 17.2X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +SQL Single DOUBLE Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 3146 3157 15 5.0 200.0 1.0X +SQL ORC Vectorized (Nested Column Disabled) 3059 3089 42 5.1 194.5 1.0X +SQL ORC Vectorized (Nested Column Enabled) 729 742 17 21.6 46.4 4.3X +SQL Parquet MR: DataPageV1 3409 3429 28 4.6 216.7 0.9X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 4156 4193 51 3.8 264.2 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 238 288 35 66.0 15.2 13.2X +SQL Parquet MR: DataPageV2 3147 3198 72 5.0 200.1 1.0X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3725 3737 18 4.2 236.8 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 265 271 4 59.4 16.8 11.9X + + +================================================================================================ +SQL Nested Column Scan +================================================================================================ + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz +SQL Nested Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 22187 22637 480 0.0 21158.8 1.0X +SQL ORC Vectorized (Nested Column Disabled) 21946 22361 192 0.0 20929.5 1.0X +SQL ORC Vectorized (Nested Column Enabled) 10302 10575 237 0.1 9824.5 2.2X +SQL Parquet MR: DataPageV1 14303 14615 186 0.1 13640.4 1.6X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 15471 15847 183 0.1 14754.1 1.4X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 10542 10794 190 0.1 10053.7 2.1X +SQL Parquet MR: DataPageV2 15047 15436 456 0.1 14350.0 1.5X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 15663 15953 304 0.1 14937.9 1.4X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 7861 8123 237 0.1 7497.2 2.8X ================================================================================================ Int and String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Int and String Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 14365 14780 587 0.7 1369.9 1.0X -SQL Json 10718 10772 76 1.0 1022.2 1.3X -SQL Parquet Vectorized: DataPageV1 1932 1988 80 5.4 184.2 7.4X -SQL Parquet Vectorized: DataPageV2 2298 2317 27 4.6 219.2 6.2X -SQL Parquet MR: DataPageV1 3829 3957 181 2.7 365.1 3.8X -SQL Parquet MR: DataPageV2 4176 4208 46 2.5 398.3 3.4X -SQL ORC Vectorized 2026 2046 28 5.2 193.2 7.1X -SQL ORC MR 3566 3580 21 2.9 340.0 4.0X +SQL CSV 16721 16823 143 0.6 1594.7 1.0X +SQL Json 14569 14617 68 0.7 1389.4 1.1X +SQL Parquet Vectorized: DataPageV1 2554 2641 122 4.1 243.6 6.5X +SQL Parquet Vectorized: DataPageV2 2821 2881 85 3.7 269.1 5.9X +SQL Parquet MR: DataPageV1 5548 5563 20 1.9 529.1 3.0X +SQL Parquet MR: DataPageV2 5504 5544 56 1.9 524.9 3.0X +SQL ORC Vectorized 2580 2598 25 4.1 246.1 6.5X +SQL ORC MR 4902 5036 190 2.1 467.5 3.4X ================================================================================================ Repeated String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Repeated String: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 9372 9373 1 1.1 893.8 1.0X -SQL Json 6862 6865 4 1.5 654.4 1.4X -SQL Parquet Vectorized: DataPageV1 606 613 8 17.3 57.8 15.5X -SQL Parquet Vectorized: DataPageV2 611 615 3 17.2 58.3 15.3X -SQL Parquet MR: DataPageV1 1713 1721 11 6.1 163.3 5.5X -SQL Parquet MR: DataPageV2 1721 1724 4 6.1 164.1 5.4X -SQL ORC Vectorized 467 469 2 22.5 44.5 20.1X -SQL ORC MR 1816 1818 2 5.8 173.2 5.2X +SQL CSV 9365 9468 145 1.1 893.2 1.0X +SQL Json 8506 8528 31 1.2 811.2 1.1X +SQL Parquet Vectorized: DataPageV1 669 692 24 15.7 63.8 14.0X +SQL Parquet Vectorized: DataPageV2 674 707 31 15.5 64.3 13.9X +SQL Parquet MR: DataPageV1 2316 2392 109 4.5 220.8 4.0X +SQL Parquet MR: DataPageV2 2177 2212 49 4.8 207.6 4.3X +SQL ORC Vectorized 564 600 44 18.6 53.8 16.6X +SQL ORC MR 2412 2427 21 4.3 230.1 3.9X ================================================================================================ Partitioned Table Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Partitioned Table: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------------- -Data column - CSV 21799 22053 360 0.7 1385.9 1.0X -Data column - Json 12978 12985 10 1.2 825.1 1.7X -Data column - Parquet Vectorized: DataPageV1 261 277 15 60.4 16.6 83.7X -Data column - Parquet Vectorized: DataPageV2 601 647 42 26.2 38.2 36.3X -Data column - Parquet MR: DataPageV1 2796 2798 2 5.6 177.8 7.8X -Data column - Parquet MR: DataPageV2 2595 2626 43 6.1 165.0 8.4X -Data column - ORC Vectorized 428 449 25 36.8 27.2 50.9X -Data column - ORC MR 2162 2274 159 7.3 137.5 10.1X -Partition column - CSV 5804 5922 167 2.7 369.0 3.8X -Partition column - Json 10410 10455 64 1.5 661.8 2.1X -Partition column - Parquet Vectorized: DataPageV1 56 60 6 280.9 3.6 389.3X -Partition column - Parquet Vectorized: DataPageV2 55 59 5 286.5 3.5 397.1X -Partition column - Parquet MR: DataPageV1 1357 1357 1 11.6 86.3 16.1X -Partition column - Parquet MR: DataPageV2 1339 1339 0 11.7 85.1 16.3X -Partition column - ORC Vectorized 57 61 5 276.3 3.6 382.9X -Partition column - ORC MR 1346 1351 7 11.7 85.6 16.2X -Both columns - CSV 20812 21349 759 0.8 1323.2 1.0X -Both columns - Json 13061 13372 440 1.2 830.4 1.7X -Both columns - Parquet Vectorized: DataPageV1 265 275 6 59.3 16.9 82.1X -Both columns - Parquet Vectorized: DataPageV2 619 637 20 25.4 39.4 35.2X -Both columns - Parquet MR: DataPageV1 2827 2830 4 5.6 179.8 7.7X -Both columns - Parquet MR: DataPageV2 2593 2603 14 6.1 164.8 8.4X -Both columns - ORC Vectorized 391 432 37 40.2 24.9 55.7X -Both columns - ORC MR 2438 2455 25 6.5 155.0 8.9X +Data column - CSV 23111 23239 181 0.7 1469.3 1.0X +Data column - Json 15498 15602 147 1.0 985.3 1.5X +Data column - Parquet Vectorized: DataPageV1 231 244 8 68.2 14.7 100.2X +Data column - Parquet Vectorized: DataPageV2 521 532 16 30.2 33.1 44.4X +Data column - Parquet MR: DataPageV1 3561 3632 100 4.4 226.4 6.5X +Data column - Parquet MR: DataPageV2 3222 3263 59 4.9 204.8 7.2X +Data column - ORC Vectorized 478 505 33 32.9 30.4 48.4X +Data column - ORC MR 3118 3208 128 5.0 198.2 7.4X +Partition column - CSV 7929 8099 241 2.0 504.1 2.9X +Partition column - Json 12790 12807 24 1.2 813.2 1.8X +Partition column - Parquet Vectorized: DataPageV1 62 67 6 252.0 4.0 370.3X +Partition column - Parquet Vectorized: DataPageV2 61 68 7 257.6 3.9 378.5X +Partition column - Parquet MR: DataPageV1 1603 1654 72 9.8 101.9 14.4X +Partition column - Parquet MR: DataPageV2 1611 1654 60 9.8 102.4 14.3X +Partition column - ORC Vectorized 62 67 7 254.7 3.9 374.3X +Partition column - ORC MR 1811 1860 69 8.7 115.2 12.8X +Both columns - CSV 23937 24052 162 0.7 1521.9 1.0X +Both columns - Json 16604 16619 21 0.9 1055.7 1.4X +Both columns - Parquet Vectorized: DataPageV1 321 343 20 49.1 20.4 72.1X +Both columns - Parquet Vectorized: DataPageV2 620 650 22 25.4 39.4 37.3X +Both columns - Parquet MR: DataPageV1 3546 3668 173 4.4 225.5 6.5X +Both columns - Parquet MR: DataPageV2 3460 3466 9 4.5 220.0 6.7X +Both columns - ORC Vectorized 516 527 12 30.5 32.8 44.8X +Both columns - ORC MR 3122 3163 57 5.0 198.5 7.4X ================================================================================================ String with Nulls Scan ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz String with Nulls Scan (0.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 10697 10736 56 1.0 1020.1 1.0X -SQL Json 9722 9963 341 1.1 927.2 1.1X -SQL Parquet Vectorized: DataPageV1 1337 1342 6 7.8 127.6 8.0X -SQL Parquet Vectorized: DataPageV2 1731 1757 38 6.1 165.1 6.2X -SQL Parquet MR: DataPageV1 3581 3584 4 2.9 341.5 3.0X -SQL Parquet MR: DataPageV2 3996 4001 7 2.6 381.1 2.7X -ParquetReader Vectorized: DataPageV1 1006 1015 13 10.4 96.0 10.6X -ParquetReader Vectorized: DataPageV2 1476 1477 2 7.1 140.7 7.2X -SQL ORC Vectorized 957 1042 120 11.0 91.3 11.2X -SQL ORC MR 3060 3068 11 3.4 291.8 3.5X - -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 11666 11668 3 0.9 1112.5 1.0X +SQL Json 13075 13170 134 0.8 1246.9 0.9X +SQL Parquet Vectorized: DataPageV1 1755 1771 23 6.0 167.4 6.6X +SQL Parquet Vectorized: DataPageV2 1983 2021 53 5.3 189.2 5.9X +SQL Parquet MR: DataPageV1 5326 5384 82 2.0 508.0 2.2X +SQL Parquet MR: DataPageV2 5471 5512 58 1.9 521.7 2.1X +ParquetReader Vectorized: DataPageV1 1290 1336 64 8.1 123.1 9.0X +ParquetReader Vectorized: DataPageV2 1510 1516 9 6.9 144.0 7.7X +SQL ORC Vectorized 1271 1301 43 8.3 121.2 9.2X +SQL ORC MR 3890 3899 13 2.7 371.0 3.0X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz String with Nulls Scan (50.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 7299 7300 1 1.4 696.1 1.0X -SQL Json 7453 7659 292 1.4 710.8 1.0X -SQL Parquet Vectorized: DataPageV1 896 916 32 11.7 85.4 8.1X -SQL Parquet Vectorized: DataPageV2 1282 1283 1 8.2 122.3 5.7X -SQL Parquet MR: DataPageV1 2586 2678 130 4.1 246.6 2.8X -SQL Parquet MR: DataPageV2 3061 3066 6 3.4 291.9 2.4X -ParquetReader Vectorized: DataPageV1 913 915 3 11.5 87.0 8.0X -ParquetReader Vectorized: DataPageV2 1181 1183 3 8.9 112.6 6.2X -SQL ORC Vectorized 1102 1111 13 9.5 105.1 6.6X -SQL ORC MR 2916 3002 121 3.6 278.1 2.5X - -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 8582 8845 372 1.2 818.4 1.0X +SQL Json 10106 10115 12 1.0 963.8 0.8X +SQL Parquet Vectorized: DataPageV1 1337 1389 75 7.8 127.5 6.4X +SQL Parquet Vectorized: DataPageV2 1494 1521 38 7.0 142.5 5.7X +SQL Parquet MR: DataPageV1 3965 3984 27 2.6 378.1 2.2X +SQL Parquet MR: DataPageV2 3911 3920 13 2.7 373.0 2.2X +ParquetReader Vectorized: DataPageV1 1311 1354 62 8.0 125.0 6.5X +ParquetReader Vectorized: DataPageV2 1554 1556 4 6.7 148.2 5.5X +SQL ORC Vectorized 1385 1389 6 7.6 132.1 6.2X +SQL ORC MR 3896 3904 11 2.7 371.5 2.2X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz String with Nulls Scan (95.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 4615 4619 6 2.3 440.1 1.0X -SQL Json 4926 4927 1 2.1 469.8 0.9X -SQL Parquet Vectorized: DataPageV1 240 246 5 43.8 22.9 19.3X -SQL Parquet Vectorized: DataPageV2 287 295 4 36.5 27.4 16.1X -SQL Parquet MR: DataPageV1 1774 1781 10 5.9 169.2 2.6X -SQL Parquet MR: DataPageV2 1772 1773 1 5.9 169.0 2.6X -ParquetReader Vectorized: DataPageV1 238 240 2 44.0 22.7 19.4X -ParquetReader Vectorized: DataPageV2 285 288 3 36.8 27.2 16.2X -SQL ORC Vectorized 382 392 6 27.4 36.5 12.1X -SQL ORC MR 1616 1617 2 6.5 154.1 2.9X +SQL CSV 5974 6029 78 1.8 569.8 1.0X +SQL Json 6444 6451 10 1.6 614.5 0.9X +SQL Parquet Vectorized: DataPageV1 356 382 28 29.5 33.9 16.8X +SQL Parquet Vectorized: DataPageV2 397 413 14 26.4 37.9 15.0X +SQL Parquet MR: DataPageV1 2602 2671 98 4.0 248.2 2.3X +SQL Parquet MR: DataPageV2 2328 2332 5 4.5 222.1 2.6X +ParquetReader Vectorized: DataPageV1 362 366 2 29.0 34.5 16.5X +ParquetReader Vectorized: DataPageV2 397 407 13 26.4 37.9 15.0X +SQL ORC Vectorized 457 480 27 23.0 43.6 13.1X +SQL ORC MR 1981 1990 13 5.3 188.9 3.0X ================================================================================================ Single Column Scan From Wide Columns ================================================================================================ -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Single Column Scan from 10 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 2051 2052 2 0.5 1956.1 1.0X -SQL Json 3230 3232 3 0.3 3080.6 0.6X -SQL Parquet Vectorized: DataPageV1 45 50 7 23.2 43.2 45.3X -SQL Parquet Vectorized: DataPageV2 67 72 8 15.6 64.1 30.5X -SQL Parquet MR: DataPageV1 191 198 8 5.5 181.9 10.8X -SQL Parquet MR: DataPageV2 176 181 6 6.0 167.7 11.7X -SQL ORC Vectorized 55 60 6 19.0 52.7 37.1X -SQL ORC MR 164 168 4 6.4 156.1 12.5X - -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 2378 2441 89 0.4 2267.9 1.0X +SQL Json 3840 3919 112 0.3 3662.1 0.6X +SQL Parquet Vectorized: DataPageV1 50 56 6 21.0 47.7 47.6X +SQL Parquet Vectorized: DataPageV2 66 72 6 15.9 62.9 36.1X +SQL Parquet MR: DataPageV1 241 255 11 4.3 230.0 9.9X +SQL Parquet MR: DataPageV2 218 231 9 4.8 207.9 10.9X +SQL ORC Vectorized 59 66 6 17.7 56.6 40.1X +SQL ORC MR 203 208 5 5.2 193.8 11.7X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Single Column Scan from 50 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 4530 4530 0 0.2 4320.0 1.0X -SQL Json 12530 12536 9 0.1 11949.2 0.4X -SQL Parquet Vectorized: DataPageV1 60 65 6 17.4 57.6 75.0X -SQL Parquet Vectorized: DataPageV2 83 91 8 12.6 79.1 54.6X -SQL Parquet MR: DataPageV1 211 216 7 5.0 201.2 21.5X -SQL Parquet MR: DataPageV2 195 204 12 5.4 186.0 23.2X -SQL ORC Vectorized 70 75 5 14.9 67.1 64.4X -SQL ORC MR 182 191 11 5.8 173.5 24.9X - -OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 5236 5239 4 0.2 4993.6 1.0X +SQL Json 16124 16203 112 0.1 15376.6 0.3X +SQL Parquet Vectorized: DataPageV1 68 74 6 15.4 64.9 77.0X +SQL Parquet Vectorized: DataPageV2 82 87 5 12.8 78.2 63.9X +SQL Parquet MR: DataPageV1 260 271 9 4.0 248.0 20.1X +SQL Parquet MR: DataPageV2 235 250 10 4.5 223.9 22.3X +SQL ORC Vectorized 81 89 7 13.0 77.0 64.8X +SQL ORC MR 226 235 7 4.6 215.2 23.2X + +OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz Single Column Scan from 100 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 7758 7763 7 0.1 7398.8 1.0X -SQL Json 24530 24546 23 0.0 23393.2 0.3X -SQL Parquet Vectorized: DataPageV1 91 96 6 11.5 87.1 84.9X -SQL Parquet Vectorized: DataPageV2 113 118 6 9.2 108.1 68.4X -SQL Parquet MR: DataPageV1 246 254 8 4.3 234.2 31.6X -SQL Parquet MR: DataPageV2 229 235 6 4.6 218.7 33.8X -SQL ORC Vectorized 88 92 6 11.9 83.8 88.3X -SQL ORC MR 205 214 9 5.1 195.2 37.9X +SQL CSV 8774 8858 119 0.1 8367.3 1.0X +SQL Json 29846 30043 278 0.0 28463.7 0.3X +SQL Parquet Vectorized: DataPageV1 108 114 7 9.7 103.0 81.2X +SQL Parquet Vectorized: DataPageV2 118 124 5 8.9 112.5 74.3X +SQL Parquet MR: DataPageV1 297 315 12 3.5 283.7 29.5X +SQL Parquet MR: DataPageV2 278 293 12 3.8 265.1 31.6X +SQL ORC Vectorized 109 116 5 9.6 104.3 80.2X +SQL ORC MR 268 278 7 3.9 256.0 32.7X diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt index 1a7ebe51057be..e23b5a1f59eb7 100644 --- a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt +++ b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt @@ -2,322 +2,430 @@ SQL Single Numeric Column Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz SQL Single BOOLEAN Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 12972 13210 337 1.2 824.8 1.0X -SQL Json 7440 7634 275 2.1 473.0 1.7X -SQL Parquet Vectorized: DataPageV1 125 137 10 125.8 8.0 103.7X -SQL Parquet Vectorized: DataPageV2 93 103 20 168.4 5.9 138.9X -SQL Parquet MR: DataPageV1 1621 1657 52 9.7 103.0 8.0X -SQL Parquet MR: DataPageV2 1396 1420 34 11.3 88.7 9.3X -SQL ORC Vectorized 178 186 16 88.5 11.3 73.0X -SQL ORC MR 1501 1503 4 10.5 95.4 8.6X - -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 16494 17271 1098 1.0 1048.7 1.0X +SQL Json 10189 10357 237 1.5 647.8 1.6X +SQL Parquet Vectorized: DataPageV1 161 170 7 97.9 10.2 102.6X +SQL Parquet Vectorized: DataPageV2 129 157 31 121.7 8.2 127.6X +SQL Parquet MR: DataPageV1 2307 2310 5 6.8 146.7 7.2X +SQL Parquet MR: DataPageV2 2070 2090 29 7.6 131.6 8.0X +SQL ORC Vectorized 234 245 10 67.1 14.9 70.4X +SQL ORC MR 2102 2105 4 7.5 133.6 7.8X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz Parquet Reader Single BOOLEAN Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 132 134 4 119.3 8.4 1.0X -ParquetReader Vectorized: DataPageV2 115 117 3 136.7 7.3 1.1X -ParquetReader Vectorized -> Row: DataPageV1 57 58 1 275.1 3.6 2.3X -ParquetReader Vectorized -> Row: DataPageV2 41 41 1 387.9 2.6 3.3X +ParquetReader Vectorized: DataPageV1 130 138 5 120.9 8.3 1.0X +ParquetReader Vectorized: DataPageV2 120 128 7 130.6 7.7 1.1X +ParquetReader Vectorized -> Row: DataPageV1 63 65 3 251.2 4.0 2.1X +ParquetReader Vectorized -> Row: DataPageV2 52 57 3 302.5 3.3 2.5X -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz SQL Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 15808 15867 83 1.0 1005.0 1.0X -SQL Json 9119 9174 78 1.7 579.8 1.7X -SQL Parquet Vectorized: DataPageV1 157 163 7 100.2 10.0 100.7X -SQL Parquet Vectorized: DataPageV2 156 161 5 100.6 9.9 101.1X -SQL Parquet MR: DataPageV1 1846 1871 36 8.5 117.4 8.6X -SQL Parquet MR: DataPageV2 1702 1707 7 9.2 108.2 9.3X -SQL ORC Vectorized 130 134 2 120.7 8.3 121.3X -SQL ORC MR 1536 1542 9 10.2 97.7 10.3X - -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 19383 19602 310 0.8 1232.3 1.0X +SQL Json 11614 11661 67 1.4 738.4 1.7X +SQL Parquet Vectorized: DataPageV1 178 198 18 88.3 11.3 108.8X +SQL Parquet Vectorized: DataPageV2 177 193 12 89.1 11.2 109.8X +SQL Parquet MR: DataPageV1 2396 2494 139 6.6 152.3 8.1X +SQL Parquet MR: DataPageV2 2158 2182 34 7.3 137.2 9.0X +SQL ORC Vectorized 160 172 9 98.3 10.2 121.1X +SQL ORC MR 2178 2185 11 7.2 138.4 8.9X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz Parquet Reader Single TINYINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 198 202 5 79.3 12.6 1.0X -ParquetReader Vectorized: DataPageV2 197 199 3 79.8 12.5 1.0X -ParquetReader Vectorized -> Row: DataPageV1 188 190 3 83.4 12.0 1.1X -ParquetReader Vectorized -> Row: DataPageV2 188 190 3 83.5 12.0 1.1X +ParquetReader Vectorized: DataPageV1 236 243 5 66.7 15.0 1.0X +ParquetReader Vectorized: DataPageV2 226 237 7 69.7 14.3 1.0X +ParquetReader Vectorized -> Row: DataPageV1 214 221 4 73.4 13.6 1.1X +ParquetReader Vectorized -> Row: DataPageV2 213 223 11 74.0 13.5 1.1X -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz SQL Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 16474 16493 27 1.0 1047.4 1.0X -SQL Json 9477 9478 1 1.7 602.6 1.7X -SQL Parquet Vectorized: DataPageV1 211 216 7 74.4 13.4 77.9X -SQL Parquet Vectorized: DataPageV2 215 221 5 73.0 13.7 76.5X -SQL Parquet MR: DataPageV1 2114 2133 28 7.4 134.4 7.8X -SQL Parquet MR: DataPageV2 1792 1808 22 8.8 113.9 9.2X -SQL ORC Vectorized 179 182 4 88.0 11.4 92.2X -SQL ORC MR 1586 1588 2 9.9 100.8 10.4X - -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 20127 20210 117 0.8 1279.6 1.0X +SQL Json 12106 12149 61 1.3 769.7 1.7X +SQL Parquet Vectorized: DataPageV1 278 284 7 56.6 17.7 72.5X +SQL Parquet Vectorized: DataPageV2 269 284 11 58.6 17.1 74.9X +SQL Parquet MR: DataPageV1 2885 2885 0 5.5 183.4 7.0X +SQL Parquet MR: DataPageV2 2540 2553 18 6.2 161.5 7.9X +SQL ORC Vectorized 230 244 15 68.5 14.6 87.7X +SQL ORC MR 2232 2274 60 7.0 141.9 9.0X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz Parquet Reader Single SMALLINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 254 257 5 62.0 16.1 1.0X -ParquetReader Vectorized: DataPageV2 299 302 4 52.6 19.0 0.8X -ParquetReader Vectorized -> Row: DataPageV1 236 238 4 66.7 15.0 1.1X -ParquetReader Vectorized -> Row: DataPageV2 281 283 4 56.0 17.9 0.9X +ParquetReader Vectorized: DataPageV1 373 382 7 42.2 23.7 1.0X +ParquetReader Vectorized: DataPageV2 371 383 12 42.4 23.6 1.0X +ParquetReader Vectorized -> Row: DataPageV1 350 361 8 44.9 22.3 1.1X +ParquetReader Vectorized -> Row: DataPageV2 346 360 9 45.4 22.0 1.1X -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz SQL Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 18049 18086 52 0.9 1147.5 1.0X -SQL Json 10073 10074 1 1.6 640.4 1.8X -SQL Parquet Vectorized: DataPageV1 177 184 9 89.1 11.2 102.3X -SQL Parquet Vectorized: DataPageV2 301 306 6 52.2 19.1 59.9X -SQL Parquet MR: DataPageV1 2120 2134 21 7.4 134.8 8.5X -SQL Parquet MR: DataPageV2 1855 1893 54 8.5 117.9 9.7X -SQL ORC Vectorized 246 249 1 63.8 15.7 73.2X -SQL ORC MR 1655 1660 6 9.5 105.2 10.9X - -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 21775 21892 165 0.7 1384.4 1.0X +SQL Json 12758 12820 88 1.2 811.1 1.7X +SQL Parquet Vectorized: DataPageV1 215 228 11 73.0 13.7 101.1X +SQL Parquet Vectorized: DataPageV2 379 396 16 41.5 24.1 57.5X +SQL Parquet MR: DataPageV1 2866 2965 140 5.5 182.2 7.6X +SQL Parquet MR: DataPageV2 2654 2656 2 5.9 168.8 8.2X +SQL ORC Vectorized 308 327 13 51.0 19.6 70.7X +SQL ORC MR 2350 2382 45 6.7 149.4 9.3X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz Parquet Reader Single INT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 239 243 5 65.8 15.2 1.0X -ParquetReader Vectorized: DataPageV2 384 387 4 40.9 24.4 0.6X -ParquetReader Vectorized -> Row: DataPageV1 223 224 3 70.7 14.2 1.1X -ParquetReader Vectorized -> Row: DataPageV2 366 370 7 43.0 23.3 0.7X +ParquetReader Vectorized: DataPageV1 342 349 9 45.9 21.8 1.0X +ParquetReader Vectorized: DataPageV2 516 522 8 30.5 32.8 0.7X +ParquetReader Vectorized -> Row: DataPageV1 295 303 7 53.3 18.8 1.2X +ParquetReader Vectorized -> Row: DataPageV2 464 487 18 33.9 29.5 0.7X -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz SQL Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 22703 22737 48 0.7 1443.4 1.0X -SQL Json 12723 12743 28 1.2 808.9 1.8X -SQL Parquet Vectorized: DataPageV1 228 261 76 69.1 14.5 99.7X -SQL Parquet Vectorized: DataPageV2 465 472 7 33.8 29.5 48.9X -SQL Parquet MR: DataPageV1 2166 2168 3 7.3 137.7 10.5X -SQL Parquet MR: DataPageV2 1921 1936 21 8.2 122.1 11.8X -SQL ORC Vectorized 307 313 10 51.2 19.5 73.9X -SQL ORC MR 1730 1745 21 9.1 110.0 13.1X - -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 27672 27888 305 0.6 1759.3 1.0X +SQL Json 15870 15989 169 1.0 1009.0 1.7X +SQL Parquet Vectorized: DataPageV1 303 309 5 51.8 19.3 91.2X +SQL Parquet Vectorized: DataPageV2 601 622 34 26.2 38.2 46.1X +SQL Parquet MR: DataPageV1 3139 3203 91 5.0 199.6 8.8X +SQL Parquet MR: DataPageV2 2719 2719 1 5.8 172.9 10.2X +SQL ORC Vectorized 384 408 27 41.0 24.4 72.1X +SQL ORC MR 2398 2404 9 6.6 152.5 11.5X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz Parquet Reader Single BIGINT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 309 316 10 51.0 19.6 1.0X -ParquetReader Vectorized: DataPageV2 559 563 5 28.1 35.5 0.6X -ParquetReader Vectorized -> Row: DataPageV1 292 296 6 53.9 18.6 1.1X -ParquetReader Vectorized -> Row: DataPageV2 541 547 8 29.1 34.4 0.6X +ParquetReader Vectorized: DataPageV1 393 411 15 40.0 25.0 1.0X +ParquetReader Vectorized: DataPageV2 715 736 23 22.0 45.5 0.5X +ParquetReader Vectorized -> Row: DataPageV1 398 403 6 39.5 25.3 1.0X +ParquetReader Vectorized -> Row: DataPageV2 692 710 17 22.7 44.0 0.6X -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz SQL Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 18790 18808 25 0.8 1194.6 1.0X -SQL Json 11572 11579 10 1.4 735.7 1.6X -SQL Parquet Vectorized: DataPageV1 155 158 5 101.7 9.8 121.6X -SQL Parquet Vectorized: DataPageV2 158 162 6 99.6 10.0 119.0X -SQL Parquet MR: DataPageV1 2041 2050 12 7.7 129.8 9.2X -SQL Parquet MR: DataPageV2 1903 1905 3 8.3 121.0 9.9X -SQL ORC Vectorized 357 359 2 44.1 22.7 52.7X -SQL ORC MR 1745 1755 15 9.0 110.9 10.8X - -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 22773 22926 216 0.7 1447.9 1.0X +SQL Json 15200 15399 281 1.0 966.4 1.5X +SQL Parquet Vectorized: DataPageV1 189 204 11 83.2 12.0 120.4X +SQL Parquet Vectorized: DataPageV2 193 202 9 81.6 12.3 118.2X +SQL Parquet MR: DataPageV1 2729 2772 60 5.8 173.5 8.3X +SQL Parquet MR: DataPageV2 2583 2609 37 6.1 164.2 8.8X +SQL ORC Vectorized 443 449 7 35.5 28.1 51.4X +SQL ORC MR 2393 2435 59 6.6 152.2 9.5X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz Parquet Reader Single FLOAT Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 239 243 4 65.7 15.2 1.0X -ParquetReader Vectorized: DataPageV2 240 243 4 65.7 15.2 1.0X -ParquetReader Vectorized -> Row: DataPageV1 221 225 4 71.1 14.1 1.1X -ParquetReader Vectorized -> Row: DataPageV2 223 225 4 70.6 14.2 1.1X +ParquetReader Vectorized: DataPageV1 304 311 5 51.8 19.3 1.0X +ParquetReader Vectorized: DataPageV2 305 312 4 51.5 19.4 1.0X +ParquetReader Vectorized -> Row: DataPageV1 281 288 5 56.0 17.9 1.1X +ParquetReader Vectorized -> Row: DataPageV2 284 296 9 55.3 18.1 1.1X -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz SQL Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 23476 23478 3 0.7 1492.6 1.0X -SQL Json 14568 15103 757 1.1 926.2 1.6X -SQL Parquet Vectorized: DataPageV1 212 230 16 74.2 13.5 110.7X -SQL Parquet Vectorized: DataPageV2 209 218 8 75.4 13.3 112.5X -SQL Parquet MR: DataPageV1 1943 2080 194 8.1 123.5 12.1X -SQL Parquet MR: DataPageV2 1824 1830 9 8.6 116.0 12.9X -SQL ORC Vectorized 395 419 20 39.9 25.1 59.5X -SQL ORC MR 1844 1855 15 8.5 117.2 12.7X - -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 28871 28941 99 0.5 1835.6 1.0X +SQL Json 20205 20296 129 0.8 1284.6 1.4X +SQL Parquet Vectorized: DataPageV1 289 293 7 54.5 18.4 100.0X +SQL Parquet Vectorized: DataPageV2 281 289 8 55.9 17.9 102.6X +SQL Parquet MR: DataPageV1 2926 2953 38 5.4 186.0 9.9X +SQL Parquet MR: DataPageV2 2779 2804 36 5.7 176.7 10.4X +SQL ORC Vectorized 525 531 8 29.9 33.4 55.0X +SQL ORC MR 2495 2533 55 6.3 158.6 11.6X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz Parquet Reader Single DOUBLE Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------- -ParquetReader Vectorized: DataPageV1 280 322 88 56.1 17.8 1.0X -ParquetReader Vectorized: DataPageV2 282 301 19 55.8 17.9 1.0X -ParquetReader Vectorized -> Row: DataPageV1 284 290 4 55.3 18.1 1.0X -ParquetReader Vectorized -> Row: DataPageV2 287 293 9 54.8 18.3 1.0X +ParquetReader Vectorized: DataPageV1 383 435 106 41.1 24.4 1.0X +ParquetReader Vectorized: DataPageV2 381 433 97 41.3 24.2 1.0X +ParquetReader Vectorized -> Row: DataPageV1 415 429 18 37.9 26.4 0.9X +ParquetReader Vectorized -> Row: DataPageV2 412 424 13 38.2 26.2 0.9X + + +================================================================================================ +SQL Single Numeric Column Scan in Struct +================================================================================================ + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL Single TINYINT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 2871 2888 23 5.5 182.6 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2916 2926 15 5.4 185.4 1.0X +SQL ORC Vectorized (Nested Column Enabled) 371 382 8 42.4 23.6 7.7X +SQL Parquet MR: DataPageV1 3038 3079 57 5.2 193.2 0.9X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3681 3717 51 4.3 234.0 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 376 382 4 41.9 23.9 7.6X +SQL Parquet MR: DataPageV2 2921 2941 29 5.4 185.7 1.0X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3444 3503 85 4.6 218.9 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 374 383 6 42.0 23.8 7.7X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL Single SMALLINT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 2945 2955 14 5.3 187.3 1.0X +SQL ORC Vectorized (Nested Column Disabled) 2925 2966 58 5.4 185.9 1.0X +SQL ORC Vectorized (Nested Column Enabled) 405 422 24 38.9 25.7 7.3X +SQL Parquet MR: DataPageV1 3379 3386 10 4.7 214.8 0.9X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3919 3938 27 4.0 249.1 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 435 447 12 36.2 27.6 6.8X +SQL Parquet MR: DataPageV2 2985 3041 80 5.3 189.8 1.0X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3484 3492 10 4.5 221.5 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 552 556 4 28.5 35.1 5.3X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL Single INT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 3019 3085 93 5.2 192.0 1.0X +SQL ORC Vectorized (Nested Column Disabled) 3050 3055 7 5.2 193.9 1.0X +SQL ORC Vectorized (Nested Column Enabled) 515 525 14 30.5 32.8 5.9X +SQL Parquet MR: DataPageV1 3277 3313 50 4.8 208.4 0.9X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 4008 4023 22 3.9 254.8 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 368 373 6 42.8 23.4 8.2X +SQL Parquet MR: DataPageV2 2994 3012 25 5.3 190.4 1.0X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3550 3603 74 4.4 225.7 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 608 618 13 25.9 38.7 5.0X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL Single BIGINT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 3199 3252 75 4.9 203.4 1.0X +SQL ORC Vectorized (Nested Column Disabled) 3224 3236 17 4.9 205.0 1.0X +SQL ORC Vectorized (Nested Column Enabled) 584 592 6 26.9 37.2 5.5X +SQL Parquet MR: DataPageV1 3386 3412 36 4.6 215.3 0.9X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 4100 4139 54 3.8 260.7 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 464 470 9 33.9 29.5 6.9X +SQL Parquet MR: DataPageV2 3165 3211 65 5.0 201.2 1.0X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3740 3748 11 4.2 237.8 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 787 808 21 20.0 50.1 4.1X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL Single FLOAT Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 3242 3249 10 4.9 206.1 1.0X +SQL ORC Vectorized (Nested Column Disabled) 3214 3225 16 4.9 204.3 1.0X +SQL ORC Vectorized (Nested Column Enabled) 622 635 9 25.3 39.5 5.2X +SQL Parquet MR: DataPageV1 3206 3210 5 4.9 203.8 1.0X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 3946 4029 117 4.0 250.9 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 355 383 24 44.3 22.6 9.1X +SQL Parquet MR: DataPageV2 3131 3136 8 5.0 199.1 1.0X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3618 3622 6 4.3 230.0 0.9X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 362 376 14 43.5 23.0 9.0X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL Single DOUBLE Column Scan in Struct: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 3309 3340 45 4.8 210.4 1.0X +SQL ORC Vectorized (Nested Column Disabled) 3249 3260 15 4.8 206.6 1.0X +SQL ORC Vectorized (Nested Column Enabled) 704 719 14 22.3 44.8 4.7X +SQL Parquet MR: DataPageV1 3407 3440 46 4.6 216.6 1.0X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 4035 4085 70 3.9 256.5 0.8X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 458 470 11 34.3 29.1 7.2X +SQL Parquet MR: DataPageV2 3257 3266 14 4.8 207.1 1.0X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 3894 3956 87 4.0 247.6 0.8X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 447 486 40 35.2 28.4 7.4X + + +================================================================================================ +SQL Nested Column Scan +================================================================================================ + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz +SQL Nested Column Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------------------------------------------------- +SQL ORC MR 29134 29647 241 0.0 27783.9 1.0X +SQL ORC Vectorized (Nested Column Disabled) 29452 30131 375 0.0 28087.7 1.0X +SQL ORC Vectorized (Nested Column Enabled) 11260 11493 186 0.1 10738.3 2.6X +SQL Parquet MR: DataPageV1 17360 17659 219 0.1 16555.9 1.7X +SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled) 18195 18529 233 0.1 17352.3 1.6X +SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled) 11704 12025 192 0.1 11162.1 2.5X +SQL Parquet MR: DataPageV2 19841 20287 299 0.1 18921.5 1.5X +SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled) 20579 20769 193 0.1 19625.8 1.4X +SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled) 8834 9022 214 0.1 8424.7 3.3X ================================================================================================ Int and String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz Int and String Scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 14663 15652 1399 0.7 1398.4 1.0X -SQL Json 10757 10845 125 1.0 1025.9 1.4X -SQL Parquet Vectorized: DataPageV1 1815 1933 166 5.8 173.1 8.1X -SQL Parquet Vectorized: DataPageV2 2244 2297 75 4.7 214.0 6.5X -SQL Parquet MR: DataPageV1 3491 3685 273 3.0 333.0 4.2X -SQL Parquet MR: DataPageV2 3600 3627 37 2.9 343.4 4.1X -SQL ORC Vectorized 1804 1895 129 5.8 172.0 8.1X -SQL ORC MR 3181 3379 280 3.3 303.4 4.6X +SQL CSV 19151 19633 682 0.5 1826.4 1.0X +SQL Json 14145 14192 67 0.7 1348.9 1.4X +SQL Parquet Vectorized: DataPageV1 2408 2456 68 4.4 229.7 8.0X +SQL Parquet Vectorized: DataPageV2 2789 2805 23 3.8 266.0 6.9X +SQL Parquet MR: DataPageV1 5074 5093 27 2.1 483.9 3.8X +SQL Parquet MR: DataPageV2 5452 5479 38 1.9 519.9 3.5X +SQL ORC Vectorized 2358 2377 26 4.4 224.9 8.1X +SQL ORC MR 4464 4492 39 2.3 425.8 4.3X ================================================================================================ Repeated String Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz Repeated String: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 8466 8778 441 1.2 807.4 1.0X -SQL Json 6389 6454 93 1.6 609.3 1.3X -SQL Parquet Vectorized: DataPageV1 644 675 52 16.3 61.4 13.1X -SQL Parquet Vectorized: DataPageV2 640 668 44 16.4 61.0 13.2X -SQL Parquet MR: DataPageV1 1579 1602 33 6.6 150.6 5.4X -SQL Parquet MR: DataPageV2 1536 1539 4 6.8 146.5 5.5X -SQL ORC Vectorized 439 443 4 23.9 41.9 19.3X -SQL ORC MR 1787 1806 27 5.9 170.5 4.7X +SQL CSV 10950 11139 268 1.0 1044.3 1.0X +SQL Json 8888 8997 154 1.2 847.6 1.2X +SQL Parquet Vectorized: DataPageV1 843 874 27 12.4 80.4 13.0X +SQL Parquet Vectorized: DataPageV2 817 835 19 12.8 78.0 13.4X +SQL Parquet MR: DataPageV1 2234 2241 10 4.7 213.1 4.9X +SQL Parquet MR: DataPageV2 2171 2181 14 4.8 207.0 5.0X +SQL ORC Vectorized 524 533 12 20.0 49.9 20.9X +SQL ORC MR 2318 2329 16 4.5 221.0 4.7X ================================================================================================ Partitioned Table Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz Partitioned Table: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative --------------------------------------------------------------------------------------------------------------------------------- -Data column - CSV 22527 22546 26 0.7 1432.3 1.0X -Data column - Json 12533 12712 254 1.3 796.8 1.8X -Data column - Parquet Vectorized: DataPageV1 229 244 14 68.7 14.6 98.3X -Data column - Parquet Vectorized: DataPageV2 508 519 16 31.0 32.3 44.3X -Data column - Parquet MR: DataPageV1 2525 2535 13 6.2 160.6 8.9X -Data column - Parquet MR: DataPageV2 2194 2209 21 7.2 139.5 10.3X -Data column - ORC Vectorized 315 317 2 50.0 20.0 71.6X -Data column - ORC MR 2098 2100 3 7.5 133.4 10.7X -Partition column - CSV 6747 6753 9 2.3 429.0 3.3X -Partition column - Json 10080 10102 32 1.6 640.8 2.2X -Partition column - Parquet Vectorized: DataPageV1 60 63 2 262.8 3.8 376.4X -Partition column - Parquet Vectorized: DataPageV2 58 63 8 270.2 3.7 387.1X -Partition column - Parquet MR: DataPageV1 1152 1155 4 13.6 73.3 19.5X -Partition column - Parquet MR: DataPageV2 1149 1149 1 13.7 73.0 19.6X -Partition column - ORC Vectorized 61 64 3 259.8 3.8 372.1X -Partition column - ORC MR 1332 1332 0 11.8 84.7 16.9X -Both columns - CSV 23030 23042 17 0.7 1464.2 1.0X -Both columns - Json 13569 13581 16 1.2 862.7 1.7X -Both columns - Parquet Vectorized: DataPageV1 268 277 11 58.7 17.0 84.0X -Both columns - Parquet Vectorized: DataPageV2 551 557 7 28.6 35.0 40.9X -Both columns - Parquet MR: DataPageV1 2556 2557 0 6.2 162.5 8.8X -Both columns - Parquet MR: DataPageV2 2287 2292 7 6.9 145.4 9.9X -Both columns - ORC Vectorized 361 363 2 43.6 22.9 62.5X -Both columns - ORC MR 2158 2161 5 7.3 137.2 10.4X +Data column - CSV 27169 27463 416 0.6 1727.3 1.0X +Data column - Json 15259 15337 110 1.0 970.1 1.8X +Data column - Parquet Vectorized: DataPageV1 287 299 10 54.8 18.2 94.7X +Data column - Parquet Vectorized: DataPageV2 622 624 2 25.3 39.5 43.7X +Data column - Parquet MR: DataPageV1 3264 3280 22 4.8 207.5 8.3X +Data column - Parquet MR: DataPageV2 3068 3073 8 5.1 195.0 8.9X +Data column - ORC Vectorized 381 394 8 41.2 24.2 71.2X +Data column - ORC MR 2704 2745 58 5.8 171.9 10.0X +Partition column - CSV 7918 7935 24 2.0 503.4 3.4X +Partition column - Json 12063 12236 245 1.3 766.9 2.3X +Partition column - Parquet Vectorized: DataPageV1 63 66 2 250.9 4.0 433.4X +Partition column - Parquet Vectorized: DataPageV2 61 68 4 256.8 3.9 443.6X +Partition column - Parquet MR: DataPageV1 1619 1637 25 9.7 102.9 16.8X +Partition column - Parquet MR: DataPageV2 1623 1642 26 9.7 103.2 16.7X +Partition column - ORC Vectorized 66 72 4 238.0 4.2 411.1X +Partition column - ORC MR 1887 1888 1 8.3 120.0 14.4X +Both columns - CSV 26235 26475 340 0.6 1668.0 1.0X +Both columns - Json 15890 16037 207 1.0 1010.3 1.7X +Both columns - Parquet Vectorized: DataPageV1 333 346 11 47.3 21.2 81.7X +Both columns - Parquet Vectorized: DataPageV2 672 680 6 23.4 42.8 40.4X +Both columns - Parquet MR: DataPageV1 3374 3388 19 4.7 214.5 8.1X +Both columns - Parquet MR: DataPageV2 3115 3131 22 5.0 198.1 8.7X +Both columns - ORC Vectorized 417 426 17 37.7 26.5 65.2X +Both columns - ORC MR 2732 2748 22 5.8 173.7 9.9X ================================================================================================ String with Nulls Scan ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz String with Nulls Scan (0.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 11418 11463 63 0.9 1088.9 1.0X -SQL Json 9698 9938 339 1.1 924.9 1.2X -SQL Parquet Vectorized: DataPageV1 1176 1207 45 8.9 112.1 9.7X -SQL Parquet Vectorized: DataPageV2 1652 1669 24 6.3 157.6 6.9X -SQL Parquet MR: DataPageV1 3041 3119 109 3.4 290.0 3.8X -SQL Parquet MR: DataPageV2 4030 4110 114 2.6 384.3 2.8X -ParquetReader Vectorized: DataPageV1 1008 1014 8 10.4 96.2 11.3X -ParquetReader Vectorized: DataPageV2 1247 1305 82 8.4 118.9 9.2X -SQL ORC Vectorized 820 856 56 12.8 78.2 13.9X -SQL ORC MR 2762 2807 64 3.8 263.4 4.1X - -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 12593 12635 60 0.8 1201.0 1.0X +SQL Json 12873 13025 216 0.8 1227.6 1.0X +SQL Parquet Vectorized: DataPageV1 1597 1613 23 6.6 152.3 7.9X +SQL Parquet Vectorized: DataPageV2 2055 2060 7 5.1 196.0 6.1X +SQL Parquet MR: DataPageV1 4275 4321 65 2.5 407.7 2.9X +SQL Parquet MR: DataPageV2 5410 5476 93 1.9 515.9 2.3X +ParquetReader Vectorized: DataPageV1 1202 1218 23 8.7 114.6 10.5X +ParquetReader Vectorized: DataPageV2 1873 1887 20 5.6 178.6 6.7X +SQL ORC Vectorized 1211 1215 6 8.7 115.5 10.4X +SQL ORC MR 4208 4230 32 2.5 401.3 3.0X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz String with Nulls Scan (50.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 6752 6756 5 1.6 644.0 1.0X -SQL Json 7469 7549 112 1.4 712.3 0.9X -SQL Parquet Vectorized: DataPageV1 912 990 67 11.5 87.0 7.4X -SQL Parquet Vectorized: DataPageV2 1141 1215 104 9.2 108.8 5.9X -SQL Parquet MR: DataPageV1 2256 2418 229 4.6 215.1 3.0X -SQL Parquet MR: DataPageV2 2712 2882 241 3.9 258.6 2.5X -ParquetReader Vectorized: DataPageV1 956 960 6 11.0 91.2 7.1X -ParquetReader Vectorized: DataPageV2 1211 1211 1 8.7 115.5 5.6X -SQL ORC Vectorized 1135 1135 1 9.2 108.2 6.0X -SQL ORC MR 2716 2766 70 3.9 259.0 2.5X - -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 8497 8533 51 1.2 810.3 1.0X +SQL Json 9766 9829 88 1.1 931.4 0.9X +SQL Parquet Vectorized: DataPageV1 1277 1286 13 8.2 121.8 6.7X +SQL Parquet Vectorized: DataPageV2 1541 1553 16 6.8 147.0 5.5X +SQL Parquet MR: DataPageV1 3294 3335 58 3.2 314.1 2.6X +SQL Parquet MR: DataPageV2 4003 4029 36 2.6 381.8 2.1X +ParquetReader Vectorized: DataPageV1 1158 1172 20 9.1 110.5 7.3X +ParquetReader Vectorized: DataPageV2 1464 1477 18 7.2 139.7 5.8X +SQL ORC Vectorized 1217 1219 3 8.6 116.0 7.0X +SQL ORC MR 3371 3422 73 3.1 321.4 2.5X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz String with Nulls Scan (95.0%): Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 4496 4710 303 2.3 428.8 1.0X -SQL Json 4324 4343 28 2.4 412.3 1.0X -SQL Parquet Vectorized: DataPageV1 221 244 9 47.5 21.0 20.4X -SQL Parquet Vectorized: DataPageV2 270 288 13 38.8 25.8 16.6X -SQL Parquet MR: DataPageV1 1451 1461 15 7.2 138.3 3.1X -SQL Parquet MR: DataPageV2 1364 1368 5 7.7 130.0 3.3X -ParquetReader Vectorized: DataPageV1 256 258 2 40.9 24.5 17.5X -ParquetReader Vectorized: DataPageV2 273 291 17 38.4 26.0 16.5X -SQL ORC Vectorized 345 367 24 30.4 32.9 13.0X -SQL ORC MR 1508 1509 2 7.0 143.8 3.0X +SQL CSV 5118 5124 9 2.0 488.1 1.0X +SQL Json 5598 5604 8 1.9 533.9 0.9X +SQL Parquet Vectorized: DataPageV1 303 309 6 34.6 28.9 16.9X +SQL Parquet Vectorized: DataPageV2 360 379 20 29.2 34.3 14.2X +SQL Parquet MR: DataPageV1 2104 2142 55 5.0 200.6 2.4X +SQL Parquet MR: DataPageV2 2153 2168 20 4.9 205.3 2.4X +ParquetReader Vectorized: DataPageV1 305 323 17 34.4 29.1 16.8X +ParquetReader Vectorized: DataPageV2 366 371 4 28.6 34.9 14.0X +SQL ORC Vectorized 405 409 5 25.9 38.7 12.6X +SQL ORC MR 1854 1894 57 5.7 176.8 2.8X ================================================================================================ Single Column Scan From Wide Columns ================================================================================================ -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz Single Column Scan from 10 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 2036 2140 147 0.5 1941.4 1.0X -SQL Json 2796 2927 186 0.4 2666.5 0.7X -SQL Parquet Vectorized: DataPageV1 47 52 7 22.2 45.0 43.1X -SQL Parquet Vectorized: DataPageV2 64 69 7 16.4 61.2 31.7X -SQL Parquet MR: DataPageV1 176 190 11 5.9 168.1 11.5X -SQL Parquet MR: DataPageV2 157 171 6 6.7 149.3 13.0X -SQL ORC Vectorized 52 56 10 20.3 49.2 39.5X -SQL ORC MR 142 152 8 7.4 135.9 14.3X - -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 2589 2590 1 0.4 2469.2 1.0X +SQL Json 3426 3574 210 0.3 3267.2 0.8X +SQL Parquet Vectorized: DataPageV1 55 60 7 19.1 52.4 47.2X +SQL Parquet Vectorized: DataPageV2 75 81 7 13.9 71.9 34.4X +SQL Parquet MR: DataPageV1 229 237 9 4.6 218.0 11.3X +SQL Parquet MR: DataPageV2 219 227 4 4.8 208.8 11.8X +SQL ORC Vectorized 60 67 11 17.6 56.8 43.4X +SQL ORC MR 190 199 6 5.5 181.6 13.6X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz Single Column Scan from 50 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 5384 5560 249 0.2 5134.8 1.0X -SQL Json 10934 11224 410 0.1 10427.1 0.5X -SQL Parquet Vectorized: DataPageV1 62 67 7 16.8 59.5 86.3X -SQL Parquet Vectorized: DataPageV2 79 85 7 13.3 75.3 68.1X -SQL Parquet MR: DataPageV1 198 211 9 5.3 188.6 27.2X -SQL Parquet MR: DataPageV2 177 188 9 5.9 168.7 30.4X -SQL ORC Vectorized 67 73 10 15.6 64.0 80.2X -SQL ORC MR 160 172 8 6.6 152.3 33.7X - -OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure -Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz +SQL CSV 6409 6455 65 0.2 6111.9 1.0X +SQL Json 13331 13615 402 0.1 12713.3 0.5X +SQL Parquet Vectorized: DataPageV1 72 78 7 14.5 68.8 88.9X +SQL Parquet Vectorized: DataPageV2 92 100 11 11.4 87.6 69.7X +SQL Parquet MR: DataPageV1 255 264 9 4.1 243.2 25.1X +SQL Parquet MR: DataPageV2 239 243 4 4.4 227.5 26.9X +SQL ORC Vectorized 77 84 8 13.6 73.8 82.9X +SQL ORC MR 203 215 7 5.2 193.4 31.6X + +OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure +Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz Single Column Scan from 100 columns: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative ------------------------------------------------------------------------------------------------------------------------ -SQL CSV 9602 9882 396 0.1 9157.0 1.0X -SQL Json 21369 21987 874 0.0 20379.5 0.4X -SQL Parquet Vectorized: DataPageV1 90 97 7 11.7 85.4 107.2X -SQL Parquet Vectorized: DataPageV2 107 115 7 9.8 102.0 89.8X -SQL Parquet MR: DataPageV1 227 234 14 4.6 216.1 42.4X -SQL Parquet MR: DataPageV2 204 216 10 5.1 194.4 47.1X -SQL ORC Vectorized 81 89 8 12.9 77.6 118.1X -SQL ORC MR 181 195 12 5.8 172.3 53.2X +SQL CSV 10921 11068 208 0.1 10414.9 1.0X +SQL Json 25389 25466 108 0.0 24213.1 0.4X +SQL Parquet Vectorized: DataPageV1 105 113 9 10.0 100.0 104.1X +SQL Parquet Vectorized: DataPageV2 130 137 13 8.1 123.9 84.0X +SQL Parquet MR: DataPageV1 285 303 21 3.7 271.8 38.3X +SQL Parquet MR: DataPageV2 281 287 4 3.7 268.2 38.8X +SQL ORC Vectorized 99 108 7 10.6 94.3 110.4X +SQL ORC MR 231 240 6 4.5 220.6 47.2X diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala index 7c9fa58d77f42..b35aa73e146f7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala @@ -69,15 +69,21 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark { try f finally tableNames.foreach(spark.catalog.dropTempView) } - private def prepareTable(dir: File, df: DataFrame, partition: Option[String] = None): Unit = { + private def prepareTable( + dir: File, + df: DataFrame, + partition: Option[String] = None, + onlyParquetOrc: Boolean = false): Unit = { val testDf = if (partition.isDefined) { df.write.partitionBy(partition.get) } else { df.write } - saveAsCsvTable(testDf, dir.getCanonicalPath + "/csv") - saveAsJsonTable(testDf, dir.getCanonicalPath + "/json") + if (!onlyParquetOrc) { + saveAsCsvTable(testDf, dir.getCanonicalPath + "/csv") + saveAsJsonTable(testDf, dir.getCanonicalPath + "/json") + } saveAsParquetV1Table(testDf, dir.getCanonicalPath + "/parquetV1") saveAsParquetV2Table(testDf, dir.getCanonicalPath + "/parquetV2") saveAsOrcTable(testDf, dir.getCanonicalPath + "/orc") @@ -262,6 +268,136 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark { } } + /** + * Similar to [[numericScanBenchmark]] but accessed column is a struct field. + */ + def nestedNumericScanBenchmark(values: Int, dataType: DataType): Unit = { + val sqlBenchmark = new Benchmark( + s"SQL Single ${dataType.sql} Column Scan in Struct", + values, + output = output) + + withTempPath { dir => + withTempTable("t1", "parquetV1Table", "parquetV2Table", "orcTable") { + import spark.implicits._ + spark.range(values).map(_ => Random.nextLong).createOrReplaceTempView("t1") + + prepareTable(dir, + spark.sql(s"SELECT named_struct('f', CAST(value as ${dataType.sql})) as col FROM t1"), + onlyParquetOrc = true) + + sqlBenchmark.addCase(s"SQL ORC MR") { _ => + withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") { + spark.sql(s"select sum(col.f) from orcTable").noop() + } + } + + sqlBenchmark.addCase(s"SQL ORC Vectorized (Nested Column Disabled)") { _ => + withSQLConf(SQLConf.ORC_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "false") { + spark.sql(s"select sum(col.f) from orcTable").noop() + } + } + + sqlBenchmark.addCase(s"SQL ORC Vectorized (Nested Column Enabled)") { _ => + withSQLConf(SQLConf.ORC_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "true") { + spark.sql(s"select sum(col.f) from orcTable").noop() + } + } + + withParquetVersions { version => + sqlBenchmark.addCase(s"SQL Parquet MR: DataPage$version") { _ => + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") { + spark.sql(s"select sum(col.f) from parquet${version}Table").noop() + } + } + + sqlBenchmark.addCase(s"SQL Parquet Vectorized: DataPage$version " + + "(Nested Column Disabled)") { _ => + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "false") { + spark.sql(s"select sum(col.f) from parquet${version}Table").noop() + } + } + + sqlBenchmark.addCase(s"SQL Parquet Vectorized: DataPage$version " + + "(Nested Column Enabled)") { _ => + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "true") { + spark.sql(s"select sum(col.f) from parquet${version}Table").noop() + } + } + } + + sqlBenchmark.run() + } + } + } + + def nestedColumnScanBenchmark(values: Int): Unit = { + val benchmark = new Benchmark(s"SQL Nested Column Scan", values, minNumIters = 10, + output = output) + + withTempPath { dir => + withTempTable("t1", "parquetV1Table", "parquetV2Table", "orcTable") { + import spark.implicits._ + spark.range(values).map(_ => Random.nextLong).map { x => + val arrayOfStructColumn = (0 until 5).map(i => (x + i, s"$x" * 5)) + val mapOfStructColumn = Map( + s"$x" -> (x * 0.1, (x, s"$x" * 100)), + (s"$x" * 2) -> (x * 0.2, (x, s"$x" * 200)), + (s"$x" * 3) -> (x * 0.3, (x, s"$x" * 300))) + (arrayOfStructColumn, mapOfStructColumn) + }.toDF("col1", "col2").createOrReplaceTempView("t1") + + prepareTable(dir, spark.sql(s"SELECT * FROM t1"), onlyParquetOrc = true) + + benchmark.addCase("SQL ORC MR") { _ => + withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") { + spark.sql("SELECT SUM(SIZE(col1)), SUM(SIZE(col2)) FROM orcTable").noop() + } + } + + benchmark.addCase("SQL ORC Vectorized (Nested Column Disabled)") { _ => + withSQLConf(SQLConf.ORC_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "false") { + spark.sql("SELECT SUM(SIZE(col1)), SUM(SIZE(col2)) FROM orcTable").noop() + } + } + + benchmark.addCase("SQL ORC Vectorized (Nested Column Enabled)") { _ => + withSQLConf(SQLConf.ORC_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "true") { + spark.sql("SELECT SUM(SIZE(col1)), SUM(SIZE(col2)) FROM orcTable").noop() + } + } + + + withParquetVersions { version => + benchmark.addCase(s"SQL Parquet MR: DataPage$version") { _ => + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") { + spark.sql(s"SELECT SUM(SIZE(col1)), SUM(SIZE(col2)) FROM parquet${version}Table") + .noop() + } + } + + benchmark.addCase(s"SQL Parquet Vectorized: DataPage$version " + + s"(Nested Column Disabled)") { _ => + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "false") { + spark.sql(s"SELECT SUM(SIZE(col1)), SUM(SIZE(col2)) FROM parquet${version}Table") + .noop() + } + } + + benchmark.addCase(s"SQL Parquet Vectorized: DataPage$version " + + s"(Nested Column Enabled)") { _ => + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "true") { + spark.sql(s"SELECT SUM(SIZE(col1)), SUM(SIZE(col2)) FROM parquet${version}Table") + .noop() + } + } + } + + benchmark.run() + } + } + } + def intStringScanBenchmark(values: Int): Unit = { val benchmark = new Benchmark("Int and String Scan", values, output = output) @@ -615,6 +751,14 @@ object DataSourceReadBenchmark extends SqlBasedBenchmark { dataType => numericScanBenchmark(1024 * 1024 * 15, dataType) } } + runBenchmark("SQL Single Numeric Column Scan in Struct") { + Seq(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType).foreach { + dataType => nestedNumericScanBenchmark(1024 * 1024 * 15, dataType) + } + } + runBenchmark("SQL Nested Column Scan") { + nestedColumnScanBenchmark(1024 * 1024) + } runBenchmark("Int and String Scan") { intStringScanBenchmark(1024 * 1024 * 10) } From 9926b093056a8896e84aedb42ad62e08a3f4950c Mon Sep 17 00:00:00 2001 From: Ankur Dave Date: Tue, 12 Apr 2022 12:01:19 +0900 Subject: [PATCH 128/535] [SPARK-38677][PYSPARK] Python MonitorThread should detect deadlock due to blocking I/O ### What changes were proposed in this pull request? When calling a Python UDF on a DataFrame with large rows, a deadlock can occur involving the following three threads: 1. The Scala task executor thread. During task execution, this is responsible for reading output produced by the Python process. However, in this case the task has finished early, and this thread is no longer reading output produced by the Python process. Instead, it is waiting for the Scala WriterThread to exit so that it can finish the task. 2. The Scala WriterThread. This is trying to send a large row to the Python process, and is waiting for the Python process to read that row. 3. The Python process. This is trying to send a large output to the Scala task executor thread, and is waiting for that thread to read that output, which will never happen. We considered the following three solutions for the deadlock: 1. When the task completes, make the Scala task executor thread close the socket before waiting for the Scala WriterThread to exit. If the WriterThread is blocked on a large write, this would interrupt that write and allow the WriterThread to exit. However, it would prevent Python worker reuse. 2. Modify PythonWorkerFactory to use interruptible I/O. [java.nio.channels.SocketChannel](https://docs.oracle.com/javase/6/docs/api/java/nio/channels/SocketChannel.html#write(java.nio.ByteBuffer)) supports interruptible blocking operations. The goal is that when the WriterThread is interrupted, it should exit even if it was blocked on a large write. However, this would be invasive. 3. Add a watchdog thread similar to the existing PythonRunner.MonitorThread to detect this deadlock and kill the Python worker. The MonitorThread currently kills the Python worker only if the task itself is interrupted. In this case, the task completes normally, so the MonitorThread does not take action. We want the new watchdog thread (WriterMonitorThread) to detect that the task is completed but the Python writer thread has not stopped, indicating a deadlock. This PR implements Option 3. ### Why are the changes needed? To fix a deadlock that can cause PySpark queries to hang. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added a test that previously encountered the deadlock and timed out, and now succeeds. Closes #36065 from ankurdave/SPARK-38677. Authored-by: Ankur Dave Signed-off-by: Hyukjin Kwon (cherry picked from commit 088e05d2518883aa27d0b8144107e45f41dd6b90) Signed-off-by: Hyukjin Kwon --- .../spark/api/python/PythonRunner.scala | 49 +++++++++++++++++++ python/pyspark/tests/test_rdd.py | 35 +++++++++++++ 2 files changed, 84 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala index 6a4871ba26916..15707ab9157dc 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala @@ -183,6 +183,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT]( } writerThread.start() + new WriterMonitorThread(SparkEnv.get, worker, writerThread, context).start() if (reuseWorker) { val key = (worker, context.taskAttemptId) // SPARK-35009: avoid creating multiple monitor threads for the same python worker @@ -646,6 +647,54 @@ private[spark] abstract class BasePythonRunner[IN, OUT]( } } } + + /** + * This thread monitors the WriterThread and kills it in case of deadlock. + * + * A deadlock can arise if the task completes while the writer thread is sending input to the + * Python process (e.g. due to the use of `take()`), and the Python process is still producing + * output. When the inputs are sufficiently large, this can result in a deadlock due to the use of + * blocking I/O (SPARK-38677). To resolve the deadlock, we need to close the socket. + */ + class WriterMonitorThread( + env: SparkEnv, worker: Socket, writerThread: WriterThread, context: TaskContext) + extends Thread(s"Writer Monitor for $pythonExec (writer thread id ${writerThread.getId})") { + + /** + * How long to wait before closing the socket if the writer thread has not exited after the task + * ends. + */ + private val taskKillTimeout = env.conf.get(PYTHON_TASK_KILL_TIMEOUT) + + setDaemon(true) + + override def run(): Unit = { + // Wait until the task is completed (or the writer thread exits, in which case this thread has + // nothing to do). + while (!context.isCompleted && writerThread.isAlive) { + Thread.sleep(2000) + } + if (writerThread.isAlive) { + Thread.sleep(taskKillTimeout) + // If the writer thread continues running, this indicates a deadlock. Kill the worker to + // resolve the deadlock. + if (writerThread.isAlive) { + try { + // Mimic the task name used in `Executor` to help the user find out the task to blame. + val taskName = s"${context.partitionId}.${context.attemptNumber} " + + s"in stage ${context.stageId} (TID ${context.taskAttemptId})" + logWarning( + s"Detected deadlock while completing task $taskName: " + + "Attempting to kill Python Worker") + env.destroyPythonWorker(pythonExec, envVars.asScala.toMap, worker) + } catch { + case e: Exception => + logError("Exception when trying to kill worker", e) + } + } + } + } + } } private[spark] object PythonRunner { diff --git a/python/pyspark/tests/test_rdd.py b/python/pyspark/tests/test_rdd.py index bf066e80b6b3b..d5d6cdbae8a20 100644 --- a/python/pyspark/tests/test_rdd.py +++ b/python/pyspark/tests/test_rdd.py @@ -34,6 +34,7 @@ UTF8Deserializer, NoOpSerializer, ) +from pyspark.sql import SparkSession from pyspark.testing.utils import ReusedPySparkTestCase, SPARK_HOME, QuietTest @@ -697,6 +698,40 @@ def test_take_on_jrdd(self): rdd = self.sc.parallelize(range(1 << 20)).map(lambda x: str(x)) rdd._jrdd.first() + def test_take_on_jrdd_with_large_rows_should_not_cause_deadlock(self): + # Regression test for SPARK-38677. + # + # Create a DataFrame with many columns, call a Python function on each row, and take only + # the first result row. + # + # This produces large rows that trigger a deadlock involving the following three threads: + # + # 1. The Scala task executor thread. During task execution, this is responsible for reading + # output produced by the Python process. However, in this case the task has finished + # early, and this thread is no longer reading output produced by the Python process. + # Instead, it is waiting for the Scala WriterThread to exit so that it can finish the + # task. + # + # 2. The Scala WriterThread. This is trying to send a large row to the Python process, and + # is waiting for the Python process to read that row. + # + # 3. The Python process. This is trying to send a large output to the Scala task executor + # thread, and is waiting for that thread to read that output. + # + # For this test to succeed rather than hanging, the Scala MonitorThread must detect this + # deadlock and kill the Python worker. + import numpy as np + import pandas as pd + + num_rows = 100000 + num_columns = 134 + data = np.zeros((num_rows, num_columns)) + columns = map(str, range(num_columns)) + df = SparkSession(self.sc).createDataFrame(pd.DataFrame(data, columns=columns)) + actual = CPickleSerializer().loads(df.rdd.map(list)._jrdd.first()) + expected = [list(data[0])] + self.assertEqual(expected, actual) + def test_sortByKey_uses_all_partitions_not_only_first_and_last(self): # Regression test for SPARK-5969 seq = [(i * 59 % 101, i) for i in range(101)] # unsorted sequence From b2960b25b7f506c5543e7137a46d409cba44ff4c Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Tue, 12 Apr 2022 20:39:08 +0800 Subject: [PATCH 129/535] [SPARK-38589][SQL] New SQL function: try_avg ### What changes were proposed in this pull request? Add a new SQL function: try_avg. It is identical to the function `avg`, except that it returns NULL result instead of throwing an exception on decimal/interval value overflow. Note it is also different from `avg` when ANSI mode is off on interval overflows | Function | avg | try_avg | |------------------|------------------------------------|-------------| | year-month interval overflow | Error | Return NULL | | day-time interval overflow | Error | Return NULL | ### Why are the changes needed? * Users can manage to finish queries without interruptions in ANSI mode. * Users can get NULLs instead of runtime errors if interval overflow occurs when ANSI mode is off. For example ``` > SELECT avg(col) FROM VALUES (interval '2147483647 months'),(interval '1 months') AS tab(col) java.lang.ArithmeticException: integer overflow. > SELECT try_avg(col) FROM VALUES (interval '2147483647 months'),(interval '1 months') AS tab(col) NULL ``` ### Does this PR introduce _any_ user-facing change? Yes, adding a new SQL function: try_avg. It is identical to the function `avg`, except that it returns NULL result instead of throwing an exception on decimal/interval value overflow. ### How was this patch tested? UT Closes #35896 from gengliangwang/tryAvg. Lead-authored-by: Gengliang Wang Co-authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit a7f0adb2dd8449af6f9e9b5a25f11b5dcf5868f1) Signed-off-by: Gengliang Wang --- docs/sql-ref-ansi-compliance.md | 3 +- .../catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/aggregate/Average.scala | 125 ++++++++++++++---- .../catalyst/expressions/aggregate/Sum.scala | 35 ++--- .../sql-functions/sql-expression-schema.md | 5 +- .../sql-tests/inputs/try_aggregates.sql | 14 ++ .../results/ansi/try_aggregates.sql.out | 82 +++++++++++- .../sql-tests/results/try_aggregates.sql.out | 82 +++++++++++- .../org/apache/spark/sql/SQLQuerySuite.scala | 12 ++ 9 files changed, 313 insertions(+), 46 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index ab11e9fede2e4..692ea60b52ce0 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -316,7 +316,8 @@ When ANSI mode is on, it throws exceptions for invalid operations. You can use t - `try_subtract`: identical to the add operator `-`, except that it returns `NULL` result instead of throwing an exception on integral value overflow. - `try_multiply`: identical to the add operator `*`, except that it returns `NULL` result instead of throwing an exception on integral value overflow. - `try_divide`: identical to the division operator `/`, except that it returns `NULL` result instead of throwing an exception on dividing 0. - - `try_sum`: identical to the function `sum`, except that it returns `NULL` result instead of throwing an exception on integral/decimal value overflow. + - `try_sum`: identical to the function `sum`, except that it returns `NULL` result instead of throwing an exception on integral/decimal/interval value overflow. + - `try_avg`: identical to the function `avg`, except that it returns `NULL` result instead of throwing an exception on decimal/interval value overflow. - `try_element_at`: identical to the function `element_at`, except that it returns `NULL` result instead of throwing an exception on array's index out of bound or map's key not found. ### SQL Keywords (optional, disabled by default) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 91bc34bef371d..b4f77302bdd8a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -474,6 +474,7 @@ object FunctionRegistry { expression[TrySubtract]("try_subtract"), expression[TryMultiply]("try_multiply"), expression[TryElementAt]("try_element_at"), + expression[TryAverage]("try_avg"), expression[TrySum]("try_sum"), expression[TryToBinary]("try_to_binary"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala index 533f7f20b2530..14914576091be 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala @@ -26,25 +26,13 @@ import org.apache.spark.sql.catalyst.util.TypeUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ -@ExpressionDescription( - usage = "_FUNC_(expr) - Returns the mean calculated from values of a group.", - examples = """ - Examples: - > SELECT _FUNC_(col) FROM VALUES (1), (2), (3) AS tab(col); - 2.0 - > SELECT _FUNC_(col) FROM VALUES (1), (2), (NULL) AS tab(col); - 1.5 - """, - group = "agg_funcs", - since = "1.0.0") -case class Average( - child: Expression, - failOnError: Boolean = SQLConf.get.ansiEnabled) +abstract class AverageBase extends DeclarativeAggregate with ImplicitCastInputTypes with UnaryLike[Expression] { - def this(child: Expression) = this(child, failOnError = SQLConf.get.ansiEnabled) + // Whether to use ANSI add or not during the execution. + def useAnsiAdd: Boolean override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("avg") @@ -61,7 +49,7 @@ case class Average( final override val nodePatterns: Seq[TreePattern] = Seq(AVERAGE) - private lazy val resultType = child.dataType match { + protected lazy val resultType = child.dataType match { case DecimalType.Fixed(p, s) => DecimalType.bounded(p + 4, s + 4) case _: YearMonthIntervalType => YearMonthIntervalType() @@ -86,18 +74,18 @@ case class Average( /* count = */ Literal(0L) ) - override lazy val mergeExpressions = Seq( - /* sum = */ sum.left + sum.right, + protected def getMergeExpressions = Seq( + /* sum = */ Add(sum.left, sum.right, useAnsiAdd), /* count = */ count.left + count.right ) // If all input are nulls, count will be 0 and we will get null after the division. // We can't directly use `/` as it throws an exception under ansi mode. - override lazy val evaluateExpression = child.dataType match { + protected def getEvaluateExpression = child.dataType match { case _: DecimalType => DecimalPrecision.decimalAndDecimal()( Divide( - CheckOverflowInSum(sum, sumDataType.asInstanceOf[DecimalType], !failOnError), + CheckOverflowInSum(sum, sumDataType.asInstanceOf[DecimalType], !useAnsiAdd), count.cast(DecimalType.LongDecimal), failOnError = false)).cast(resultType) case _: YearMonthIntervalType => If(EqualTo(count, Literal(0L)), @@ -109,17 +97,106 @@ case class Average( Divide(sum.cast(resultType), count.cast(resultType), failOnError = false) } - override lazy val updateExpressions: Seq[Expression] = Seq( + protected def getUpdateExpressions: Seq[Expression] = Seq( /* sum = */ Add( sum, - coalesce(child.cast(sumDataType), Literal.default(sumDataType))), + coalesce(child.cast(sumDataType), Literal.default(sumDataType)), + failOnError = useAnsiAdd), /* count = */ If(child.isNull, count, count + 1L) ) + // The flag `useAnsiAdd` won't be shown in the `toString` or `toAggString` methods + override def flatArguments: Iterator[Any] = Iterator(child) +} + +@ExpressionDescription( + usage = "_FUNC_(expr) - Returns the mean calculated from values of a group.", + examples = """ + Examples: + > SELECT _FUNC_(col) FROM VALUES (1), (2), (3) AS tab(col); + 2.0 + > SELECT _FUNC_(col) FROM VALUES (1), (2), (NULL) AS tab(col); + 1.5 + """, + group = "agg_funcs", + since = "1.0.0") +case class Average( + child: Expression, + useAnsiAdd: Boolean = SQLConf.get.ansiEnabled) extends AverageBase { + def this(child: Expression) = this(child, useAnsiAdd = SQLConf.get.ansiEnabled) + override protected def withNewChildInternal(newChild: Expression): Average = copy(child = newChild) - // The flag `failOnError` won't be shown in the `toString` or `toAggString` methods - override def flatArguments: Iterator[Any] = Iterator(child) + override lazy val updateExpressions: Seq[Expression] = getUpdateExpressions + + override lazy val mergeExpressions: Seq[Expression] = getMergeExpressions + + override lazy val evaluateExpression: Expression = getEvaluateExpression +} + +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(expr) - Returns the mean calculated from values of a group and the result is null on overflow.", + examples = """ + Examples: + > SELECT _FUNC_(col) FROM VALUES (1), (2), (3) AS tab(col); + 2.0 + > SELECT _FUNC_(col) FROM VALUES (1), (2), (NULL) AS tab(col); + 1.5 + > SELECT _FUNC_(col) FROM VALUES (interval '2147483647 months'), (interval '1 months') AS tab(col); + NULL + """, + group = "agg_funcs", + since = "3.3.0") +// scalastyle:on line.size.limit +case class TryAverage(child: Expression) extends AverageBase { + override def useAnsiAdd: Boolean = resultType match { + // Double type won't fail, thus we can always use non-Ansi Add. + // For decimal type, it returns NULL on overflow. It behaves the same as TrySum when + // `failOnError` is false. + case _: DoubleType | _: DecimalType => false + case _ => true + } + + private def addTryEvalIfNeeded(expression: Expression): Expression = { + if (useAnsiAdd) { + TryEval(expression) + } else { + expression + } + } + + override lazy val updateExpressions: Seq[Expression] = { + val expressions = getUpdateExpressions + addTryEvalIfNeeded(expressions.head) +: expressions.tail + } + + override lazy val mergeExpressions: Seq[Expression] = { + val expressions = getMergeExpressions + if (useAnsiAdd) { + val bufferOverflow = sum.left.isNull && count.left > 0L + val inputOverflow = sum.right.isNull && count.right > 0L + Seq( + If( + bufferOverflow || inputOverflow, + Literal.create(null, resultType), + // If both the buffer and the input do not overflow, just add them, as they can't be + // null. + TryEval(Add(KnownNotNull(sum.left), KnownNotNull(sum.right), useAnsiAdd))), + expressions(1)) + } else { + expressions + } + } + + override lazy val evaluateExpression: Expression = { + addTryEvalIfNeeded(getEvaluateExpression) + } + + override protected def withNewChildInternal(newChild: Expression): Expression = + copy(child = newChild) + + override def prettyName: String = "try_avg" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala index fd27edfc8fc10..f2c6925b837e9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala @@ -30,7 +30,8 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate with ImplicitCastInputTypes with UnaryLike[Expression] { - def failOnError: Boolean + // Whether to use ANSI add or not during the execution. + def useAnsiAdd: Boolean protected def shouldTrackIsEmpty: Boolean @@ -81,9 +82,9 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate // null if overflow happens under non-ansi mode. val sumExpr = if (child.nullable) { If(child.isNull, sum, - Add(sum, KnownNotNull(child).cast(resultType), failOnError = failOnError)) + Add(sum, KnownNotNull(child).cast(resultType), failOnError = useAnsiAdd)) } else { - Add(sum, child.cast(resultType), failOnError = failOnError) + Add(sum, child.cast(resultType), failOnError = useAnsiAdd) } // The buffer becomes non-empty after seeing the first not-null input. val isEmptyExpr = if (child.nullable) { @@ -98,10 +99,10 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate // in case the input is nullable. The `sum` can only be null if there is no value, as // non-decimal type can produce overflowed value under non-ansi mode. if (child.nullable) { - Seq(coalesce(Add(coalesce(sum, zero), child.cast(resultType), failOnError = failOnError), + Seq(coalesce(Add(coalesce(sum, zero), child.cast(resultType), failOnError = useAnsiAdd), sum)) } else { - Seq(Add(coalesce(sum, zero), child.cast(resultType), failOnError = failOnError)) + Seq(Add(coalesce(sum, zero), child.cast(resultType), failOnError = useAnsiAdd)) } } @@ -127,11 +128,11 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate // If both the buffer and the input do not overflow, just add them, as they can't be // null. See the comments inside `updateExpressions`: `sum` can only be null if // overflow happens. - Add(KnownNotNull(sum.left), KnownNotNull(sum.right), failOnError)), + Add(KnownNotNull(sum.left), KnownNotNull(sum.right), useAnsiAdd)), isEmpty.left && isEmpty.right) } else { Seq(coalesce( - Add(coalesce(sum.left, zero), sum.right, failOnError = failOnError), + Add(coalesce(sum.left, zero), sum.right, failOnError = useAnsiAdd), sum.left)) } @@ -145,13 +146,13 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate protected def getEvaluateExpression: Expression = resultType match { case d: DecimalType => If(isEmpty, Literal.create(null, resultType), - CheckOverflowInSum(sum, d, !failOnError)) + CheckOverflowInSum(sum, d, !useAnsiAdd)) case _ if shouldTrackIsEmpty => If(isEmpty, Literal.create(null, resultType), sum) case _ => sum } - // The flag `failOnError` won't be shown in the `toString` or `toAggString` methods + // The flag `useAnsiAdd` won't be shown in the `toString` or `toAggString` methods override def flatArguments: Iterator[Any] = Iterator(child) } @@ -170,9 +171,9 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate since = "1.0.0") case class Sum( child: Expression, - failOnError: Boolean = SQLConf.get.ansiEnabled) + useAnsiAdd: Boolean = SQLConf.get.ansiEnabled) extends SumBase(child) { - def this(child: Expression) = this(child, failOnError = SQLConf.get.ansiEnabled) + def this(child: Expression) = this(child, useAnsiAdd = SQLConf.get.ansiEnabled) override def shouldTrackIsEmpty: Boolean = resultType match { case _: DecimalType => true @@ -207,10 +208,10 @@ case class Sum( // scalastyle:on line.size.limit case class TrySum(child: Expression) extends SumBase(child) { - override def failOnError: Boolean = dataType match { - // Double type won't fail, thus the failOnError is always false + override def useAnsiAdd: Boolean = dataType match { + // Double type won't fail, thus useAnsiAdd is always false // For decimal type, it returns NULL on overflow. It behaves the same as TrySum when - // `failOnError` is false. + // `useAnsiAdd` is false. case _: DoubleType | _: DecimalType => false case _ => true } @@ -224,7 +225,7 @@ case class TrySum(child: Expression) extends SumBase(child) { } override lazy val updateExpressions: Seq[Expression] = - if (failOnError) { + if (useAnsiAdd) { val expressions = getUpdateExpressions // If the length of updateExpressions is larger than 1, the tail expressions are for // tracking whether the input is empty, which doesn't need `TryEval` execution. @@ -234,14 +235,14 @@ case class TrySum(child: Expression) extends SumBase(child) { } override lazy val mergeExpressions: Seq[Expression] = - if (failOnError) { + if (useAnsiAdd) { getMergeExpressions.map(TryEval) } else { getMergeExpressions } override lazy val evaluateExpression: Expression = - if (failOnError) { + if (useAnsiAdd) { TryEval(getEvaluateExpression) } else { getEvaluateExpression diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 14902b0854987..9f8faf517a4ba 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,6 +1,6 @@ ## Summary - - Number of queries: 387 + - Number of queries: 388 - Number of expressions that missing example: 12 - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint ## Schema of Built-in Functions @@ -380,6 +380,7 @@ | org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | stddev | SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | stddev_samp | SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.Sum | sum | SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.TryAverage | try_avg | SELECT try_avg(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.TrySum | try_sum | SELECT try_sum(col) FROM VALUES (5), (10), (15) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.VariancePop | var_pop | SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp | var_samp | SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col) | struct | @@ -392,4 +393,4 @@ | org.apache.spark.sql.catalyst.expressions.xml.XPathList | xpath | SELECT xpath('b1b2b3c1c2','a/b/text()') | structb1b2b3c1c2, a/b/text()):array> | | org.apache.spark.sql.catalyst.expressions.xml.XPathLong | xpath_long | SELECT xpath_long('12', 'sum(a/b)') | struct12, sum(a/b)):bigint> | | org.apache.spark.sql.catalyst.expressions.xml.XPathShort | xpath_short | SELECT xpath_short('12', 'sum(a/b)') | struct12, sum(a/b)):smallint> | -| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('bcc','a/c') | structbcc, a/c):string> | +| org.apache.spark.sql.catalyst.expressions.xml.XPathString | xpath_string | SELECT xpath_string('bcc','a/c') | structbcc, a/c):string> | \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/inputs/try_aggregates.sql b/sql/core/src/test/resources/sql-tests/inputs/try_aggregates.sql index ffa8eefe82831..cdd2e6323198e 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/try_aggregates.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/try_aggregates.sql @@ -11,3 +11,17 @@ SELECT try_sum(col) FROM VALUES (interval '1 months'), (interval '1 months') AS SELECT try_sum(col) FROM VALUES (interval '2147483647 months'), (interval '1 months') AS tab(col); SELECT try_sum(col) FROM VALUES (interval '1 seconds'), (interval '1 seconds') AS tab(col); SELECT try_sum(col) FROM VALUES (interval '106751991 DAYS'), (interval '1 DAYS') AS tab(col); + +-- try_avg +SELECT try_avg(col) FROM VALUES (5), (10), (15) AS tab(col); +SELECT try_avg(col) FROM VALUES (5.0), (10.0), (15.0) AS tab(col); +SELECT try_avg(col) FROM VALUES (NULL), (10), (15) AS tab(col); +SELECT try_avg(col) FROM VALUES (NULL), (NULL) AS tab(col); +SELECT try_avg(col) FROM VALUES (9223372036854775807L), (1L) AS tab(col); +-- test overflow in Decimal(38, 0) +SELECT try_avg(col) FROM VALUES (98765432109876543210987654321098765432BD), (98765432109876543210987654321098765432BD) AS tab(col); + +SELECT try_avg(col) FROM VALUES (interval '1 months'), (interval '1 months') AS tab(col); +SELECT try_avg(col) FROM VALUES (interval '2147483647 months'), (interval '1 months') AS tab(col); +SELECT try_avg(col) FROM VALUES (interval '1 seconds'), (interval '1 seconds') AS tab(col); +SELECT try_avg(col) FROM VALUES (interval '106751991 DAYS'), (interval '1 DAYS') AS tab(col); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out index 7ae217ad7582b..724553f6bd10c 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 10 +-- Number of queries: 20 -- !query @@ -80,3 +80,83 @@ SELECT try_sum(col) FROM VALUES (interval '106751991 DAYS'), (interval '1 DAYS') struct -- !query output NULL + + +-- !query +SELECT try_avg(col) FROM VALUES (5), (10), (15) AS tab(col) +-- !query schema +struct +-- !query output +10.0 + + +-- !query +SELECT try_avg(col) FROM VALUES (5.0), (10.0), (15.0) AS tab(col) +-- !query schema +struct +-- !query output +10.00000 + + +-- !query +SELECT try_avg(col) FROM VALUES (NULL), (10), (15) AS tab(col) +-- !query schema +struct +-- !query output +12.5 + + +-- !query +SELECT try_avg(col) FROM VALUES (NULL), (NULL) AS tab(col) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT try_avg(col) FROM VALUES (9223372036854775807L), (1L) AS tab(col) +-- !query schema +struct +-- !query output +4.6116860184273879E18 + + +-- !query +SELECT try_avg(col) FROM VALUES (98765432109876543210987654321098765432BD), (98765432109876543210987654321098765432BD) AS tab(col) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT try_avg(col) FROM VALUES (interval '1 months'), (interval '1 months') AS tab(col) +-- !query schema +struct +-- !query output +0-1 + + +-- !query +SELECT try_avg(col) FROM VALUES (interval '2147483647 months'), (interval '1 months') AS tab(col) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT try_avg(col) FROM VALUES (interval '1 seconds'), (interval '1 seconds') AS tab(col) +-- !query schema +struct +-- !query output +0 00:00:01.000000000 + + +-- !query +SELECT try_avg(col) FROM VALUES (interval '106751991 DAYS'), (interval '1 DAYS') AS tab(col) +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out b/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out index 7ae217ad7582b..724553f6bd10c 100644 --- a/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 10 +-- Number of queries: 20 -- !query @@ -80,3 +80,83 @@ SELECT try_sum(col) FROM VALUES (interval '106751991 DAYS'), (interval '1 DAYS') struct -- !query output NULL + + +-- !query +SELECT try_avg(col) FROM VALUES (5), (10), (15) AS tab(col) +-- !query schema +struct +-- !query output +10.0 + + +-- !query +SELECT try_avg(col) FROM VALUES (5.0), (10.0), (15.0) AS tab(col) +-- !query schema +struct +-- !query output +10.00000 + + +-- !query +SELECT try_avg(col) FROM VALUES (NULL), (10), (15) AS tab(col) +-- !query schema +struct +-- !query output +12.5 + + +-- !query +SELECT try_avg(col) FROM VALUES (NULL), (NULL) AS tab(col) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT try_avg(col) FROM VALUES (9223372036854775807L), (1L) AS tab(col) +-- !query schema +struct +-- !query output +4.6116860184273879E18 + + +-- !query +SELECT try_avg(col) FROM VALUES (98765432109876543210987654321098765432BD), (98765432109876543210987654321098765432BD) AS tab(col) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT try_avg(col) FROM VALUES (interval '1 months'), (interval '1 months') AS tab(col) +-- !query schema +struct +-- !query output +0-1 + + +-- !query +SELECT try_avg(col) FROM VALUES (interval '2147483647 months'), (interval '1 months') AS tab(col) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT try_avg(col) FROM VALUES (interval '1 seconds'), (interval '1 seconds') AS tab(col) +-- !query schema +struct +-- !query output +0 00:00:01.000000000 + + +-- !query +SELECT try_avg(col) FROM VALUES (interval '106751991 DAYS'), (interval '1 DAYS') AS tab(col) +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index f43fbeffab003..309396543d46c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4341,6 +4341,18 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark checkAnswer(df.repartitionByRange(2, col("v")).selectExpr("try_sum(v)"), Row(null)) } } + + test("SPARK-38589: try_avg should return null if overflow happens before merging") { + val yearMonthDf = Seq(Int.MaxValue, Int.MaxValue, 2) + .map(Period.ofMonths) + .toDF("v") + val dayTimeDf = Seq(106751991L, 106751991L, 2L) + .map(Duration.ofDays) + .toDF("v") + Seq(yearMonthDf, dayTimeDf).foreach { df => + checkAnswer(df.repartitionByRange(2, col("v")).selectExpr("try_avg(v)"), Row(null)) + } + } } case class Foo(bar: Option[String]) From 76fa565fac36f4fd94b181b213611fa716092a99 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Wed, 13 Apr 2022 09:21:55 +0900 Subject: [PATCH 130/535] [SPARK-38882][PYTHON] Fix usage logger attachment to handle static methods properly ### What changes were proposed in this pull request? Fixes usage logger attachment to handle static methods properly. ### Why are the changes needed? The usage logger attachment logic has an issue when handling static methods. For example, ``` $ PYSPARK_PANDAS_USAGE_LOGGER=pyspark.pandas.usage_logging.usage_logger ./bin/pyspark ``` ```py >>> import pyspark.pandas as ps >>> psdf = ps.DataFrame({"a": [1,2,3], "b": [4,5,6]}) >>> psdf.from_records([(1, 2), (3, 4)]) A function `DataFrame.from_records(data, index, exclude, columns, coerce_float, nrows)` was failed after 2007.430 ms: 0 Traceback (most recent call last): ... ``` without usage logger: ```py >>> import pyspark.pandas as ps >>> psdf = ps.DataFrame({"a": [1,2,3], "b": [4,5,6]}) >>> psdf.from_records([(1, 2), (3, 4)]) 0 1 0 1 2 1 3 4 ``` ### Does this PR introduce _any_ user-facing change? Yes, for a user attaches the usage logger, static methods will work as static methods. ### How was this patch tested? Manually tested. ```py >>> import pyspark.pandas as ps >>> import logging >>> import sys >>> root = logging.getLogger() >>> root.setLevel(logging.INFO) >>> handler = logging.StreamHandler(sys.stdout) >>> handler.setLevel(logging.INFO) >>> >>> formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') >>> handler.setFormatter(formatter) >>> root.addHandler(handler) >>> psdf = ps.DataFrame({"a": [1,2,3], "b": [4,5,6]}) 2022-04-12 14:43:52,254 - pyspark.pandas.usage_logger - INFO - A function `DataFrame.__init__(self, data, index, columns, dtype, copy)` was successfully finished after 2714.896 ms. >>> psdf.from_records([(1, 2), (3, 4)]) 2022-04-12 14:43:59,765 - pyspark.pandas.usage_logger - INFO - A function `DataFrame.from_records(data, index, exclude, columns, coerce_float, nrows)` was successfully finished after 51.105 ms. 2022-04-12 14:44:01,371 - pyspark.pandas.usage_logger - INFO - A function `DataFrame.__repr__(self)` was successfully finished after 1605.759 ms. 0 1 0 1 2 1 3 4 >>> ps.DataFrame.from_records([(1, 2), (3, 4)]) 2022-04-12 14:44:25,301 - pyspark.pandas.usage_logger - INFO - A function `DataFrame.from_records(data, index, exclude, columns, coerce_float, nrows)` was successfully finished after 43.446 ms. 2022-04-12 14:44:25,493 - pyspark.pandas.usage_logger - INFO - A function `DataFrame.__repr__(self)` was successfully finished after 192.053 ms. 0 1 0 1 2 1 3 4 ``` Closes #36167 from ueshin/issues/SPARK-38882/staticmethod. Authored-by: Takuya UESHIN Signed-off-by: Hyukjin Kwon (cherry picked from commit 1c1216f18f3008b410a601516b2fde49a9e27f7d) Signed-off-by: Hyukjin Kwon --- python/pyspark/instrumentation_utils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/python/pyspark/instrumentation_utils.py b/python/pyspark/instrumentation_utils.py index 908f5cbb3d473..256c09068cdc5 100644 --- a/python/pyspark/instrumentation_utils.py +++ b/python/pyspark/instrumentation_utils.py @@ -163,7 +163,14 @@ def _attach( for name, func in inspect.getmembers(target_class, inspect.isfunction): if name.startswith("_") and name not in special_functions: continue - setattr(target_class, name, _wrap_function(target_class.__name__, name, func, logger)) + try: + isstatic = isinstance(inspect.getattr_static(target_class, name), staticmethod) + except AttributeError: + isstatic = False + wrapped_function = _wrap_function(target_class.__name__, name, func, logger) + setattr( + target_class, name, staticmethod(wrapped_function) if isstatic else wrapped_function + ) for name, prop in inspect.getmembers(target_class, lambda o: isinstance(o, property)): if name.startswith("_"): From 2e0a21ae1d33a5247af8ea228e7c687040f7cd5f Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Tue, 12 Apr 2022 22:17:54 -0700 Subject: [PATCH 131/535] [SPARK-38865][SQL][DOCS] Update document of JDBC options for `pushDownAggregate` and `pushDownLimit` ### What changes were proposed in this pull request? Because the DS v2 pushdown framework refactored, we need to add more doc in `sql-data-sources-jdbc.md` to reflect the new changes. ### Why are the changes needed? Add doc for new changes for `pushDownAggregate` and `pushDownLimit`. ### Does this PR introduce _any_ user-facing change? 'No'. Updated for new feature. ### How was this patch tested? N/A Closes #36152 from beliefer/SPARK-38865. Authored-by: Jiaan Geng Signed-off-by: huaxingao (cherry picked from commit 988af33af8d35316aa131dab01814fd31fc6b59a) Signed-off-by: huaxingao --- docs/sql-data-sources-jdbc.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sql-data-sources-jdbc.md b/docs/sql-data-sources-jdbc.md index c57bc072df9e2..e17c8f686fce5 100644 --- a/docs/sql-data-sources-jdbc.md +++ b/docs/sql-data-sources-jdbc.md @@ -241,7 +241,7 @@ logging into the data sources.

        @@ -250,7 +250,7 @@ logging into the data sources. From 4ccd530f639e3652b7aad7c8bcfa379847dc2b68 Mon Sep 17 00:00:00 2001 From: Anton Okolnychyi Date: Wed, 13 Apr 2022 13:47:00 +0800 Subject: [PATCH 132/535] [SPARK-38085][SQL] DataSource V2: Handle DELETE commands for group-based sources This PR contains changes to rewrite DELETE operations for V2 data sources that can replace groups of data (e.g. files, partitions). These changes are needed to support row-level operations in Spark per SPIP SPARK-35801. No. This PR comes with tests. Closes #35395 from aokolnychyi/spark-38085. Authored-by: Anton Okolnychyi Signed-off-by: Wenchen Fan (cherry picked from commit 5a92eccd514b7bc0513feaecb041aee2f8cd5a24) Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 1 + .../analysis/RewriteDeleteFromTable.scala | 89 +++ .../analysis/RewriteRowLevelCommand.scala | 71 ++ .../ReplaceNullWithFalseInPredicate.scala | 3 +- .../SimplifyConditionalsInPredicate.scala | 1 + .../sql/catalyst/planning/patterns.scala | 51 ++ .../catalyst/plans/logical/v2Commands.scala | 92 ++- .../write/RowLevelOperationInfoImpl.scala | 25 + .../write/RowLevelOperationTable.scala | 51 ++ .../sql/errors/QueryCompilationErrors.scala | 4 + .../v2/DataSourceV2Implicits.scala | 10 + .../InMemoryRowLevelOperationTable.scala | 96 +++ ...nMemoryRowLevelOperationTableCatalog.scala | 46 ++ .../sql/connector/catalog/InMemoryTable.scala | 22 +- .../spark/sql/execution/SparkOptimizer.scala | 7 +- .../datasources/v2/DataSourceV2Strategy.scala | 22 +- ...upBasedRowLevelOperationScanPlanning.scala | 83 +++ .../OptimizeMetadataOnlyDeleteFromTable.scala | 84 +++ .../datasources/v2/PushDownUtils.scala | 2 +- .../execution/datasources/v2/V2Writes.scala | 24 +- .../v2/WriteToDataSourceV2Exec.scala | 15 + .../sql/connector/DeleteFromTableSuite.scala | 629 ++++++++++++++++++ .../command/PlanResolutionSuite.scala | 4 +- 23 files changed, 1407 insertions(+), 25 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteDeleteFromTable.scala create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteRowLevelCommand.scala create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationInfoImpl.scala create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationTable.scala create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTableCatalog.scala create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/GroupBasedRowLevelOperationScanPlanning.scala create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/OptimizeMetadataOnlyDeleteFromTable.scala create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTableSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 6b44483ab1d2d..9fdc466b4259c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -318,6 +318,7 @@ class Analyzer(override val catalogManager: CatalogManager) ResolveRandomSeed :: ResolveBinaryArithmetic :: ResolveUnion :: + RewriteDeleteFromTable :: typeCoercionRules ++ Seq(ResolveWithCTE) ++ extendedResolutionRules : _*), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteDeleteFromTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteDeleteFromTable.scala new file mode 100644 index 0000000000000..85af999902e26 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteDeleteFromTable.scala @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.analysis + +import org.apache.spark.sql.catalyst.expressions.{EqualNullSafe, Expression, Not} +import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral +import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, Filter, LogicalPlan, ReplaceData} +import org.apache.spark.sql.connector.catalog.{SupportsDelete, SupportsRowLevelOperations, TruncatableTable} +import org.apache.spark.sql.connector.write.RowLevelOperation.Command.DELETE +import org.apache.spark.sql.connector.write.RowLevelOperationTable +import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +/** + * A rule that rewrites DELETE operations using plans that operate on individual or groups of rows. + * + * If a table implements [[SupportsDelete]] and [[SupportsRowLevelOperations]], this rule will + * still rewrite the DELETE operation but the optimizer will check whether this particular DELETE + * statement can be handled by simply passing delete filters to the connector. If so, the optimizer + * will discard the rewritten plan and will allow the data source to delete using filters. + */ +object RewriteDeleteFromTable extends RewriteRowLevelCommand { + + override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { + case d @ DeleteFromTable(aliasedTable, cond) if d.resolved => + EliminateSubqueryAliases(aliasedTable) match { + case DataSourceV2Relation(_: TruncatableTable, _, _, _, _) if cond == TrueLiteral => + // don't rewrite as the table supports truncation + d + + case r @ DataSourceV2Relation(t: SupportsRowLevelOperations, _, _, _, _) => + val table = buildOperationTable(t, DELETE, CaseInsensitiveStringMap.empty()) + buildReplaceDataPlan(r, table, cond) + + case DataSourceV2Relation(_: SupportsDelete, _, _, _, _) => + // don't rewrite as the table supports deletes only with filters + d + + case DataSourceV2Relation(t, _, _, _, _) => + throw QueryCompilationErrors.tableDoesNotSupportDeletesError(t) + + case _ => + d + } + } + + // build a rewrite plan for sources that support replacing groups of data (e.g. files, partitions) + private def buildReplaceDataPlan( + relation: DataSourceV2Relation, + operationTable: RowLevelOperationTable, + cond: Expression): ReplaceData = { + + // resolve all required metadata attrs that may be used for grouping data on write + // for instance, JDBC data source may cluster data by shard/host before writing + val metadataAttrs = resolveRequiredMetadataAttrs(relation, operationTable.operation) + + // construct a read relation and include all required metadata columns + val readRelation = buildRelationWithAttrs(relation, operationTable, metadataAttrs) + + // construct a plan that contains unmatched rows in matched groups that must be carried over + // such rows do not match the condition but have to be copied over as the source can replace + // only groups of rows (e.g. if a source supports replacing files, unmatched rows in matched + // files must be carried over) + // it is safe to negate the condition here as the predicate pushdown for group-based row-level + // operations is handled in a special way + val remainingRowsFilter = Not(EqualNullSafe(cond, TrueLiteral)) + val remainingRowsPlan = Filter(remainingRowsFilter, readRelation) + + // build a plan to replace read groups in the table + val writeRelation = relation.copy(table = operationTable) + ReplaceData(writeRelation, cond, remainingRowsPlan, relation) + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteRowLevelCommand.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteRowLevelCommand.scala new file mode 100644 index 0000000000000..bf8c3e27f4d3a --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteRowLevelCommand.scala @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.analysis + +import scala.collection.mutable + +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, ExprId, V2ExpressionUtils} +import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.connector.catalog.SupportsRowLevelOperations +import org.apache.spark.sql.connector.write.{RowLevelOperation, RowLevelOperationInfoImpl, RowLevelOperationTable} +import org.apache.spark.sql.connector.write.RowLevelOperation.Command +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +trait RewriteRowLevelCommand extends Rule[LogicalPlan] { + + protected def buildOperationTable( + table: SupportsRowLevelOperations, + command: Command, + options: CaseInsensitiveStringMap): RowLevelOperationTable = { + val info = RowLevelOperationInfoImpl(command, options) + val operation = table.newRowLevelOperationBuilder(info).build() + RowLevelOperationTable(table, operation) + } + + protected def buildRelationWithAttrs( + relation: DataSourceV2Relation, + table: RowLevelOperationTable, + metadataAttrs: Seq[AttributeReference]): DataSourceV2Relation = { + + val attrs = dedupAttrs(relation.output ++ metadataAttrs) + relation.copy(table = table, output = attrs) + } + + protected def dedupAttrs(attrs: Seq[AttributeReference]): Seq[AttributeReference] = { + val exprIds = mutable.Set.empty[ExprId] + attrs.flatMap { attr => + if (exprIds.contains(attr.exprId)) { + None + } else { + exprIds += attr.exprId + Some(attr) + } + } + } + + protected def resolveRequiredMetadataAttrs( + relation: DataSourceV2Relation, + operation: RowLevelOperation): Seq[AttributeReference] = { + + V2ExpressionUtils.resolveRefs[AttributeReference]( + operation.requiredMetadataAttributes, + relation) + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala index 9ec498aa14e3c..d060a8be5dad6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, CaseWhen, EqualNullSafe, Expression, If, In, InSet, LambdaFunction, Literal, MapFilter, Not, Or} import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} -import org.apache.spark.sql.catalyst.plans.logical.{DeleteAction, DeleteFromTable, Filter, InsertAction, InsertStarAction, Join, LogicalPlan, MergeAction, MergeIntoTable, UpdateAction, UpdateStarAction, UpdateTable} +import org.apache.spark.sql.catalyst.plans.logical.{DeleteAction, DeleteFromTable, Filter, InsertAction, InsertStarAction, Join, LogicalPlan, MergeAction, MergeIntoTable, ReplaceData, UpdateAction, UpdateStarAction, UpdateTable} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.TreePattern.{INSET, NULL_LITERAL, TRUE_OR_FALSE_LITERAL} import org.apache.spark.sql.types.BooleanType @@ -54,6 +54,7 @@ object ReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] { _.containsAnyPattern(NULL_LITERAL, TRUE_OR_FALSE_LITERAL, INSET), ruleId) { case f @ Filter(cond, _) => f.copy(condition = replaceNullWithFalse(cond)) case j @ Join(_, _, _, Some(cond), _) => j.copy(condition = Some(replaceNullWithFalse(cond))) + case rd @ ReplaceData(_, cond, _, _, _) => rd.copy(condition = replaceNullWithFalse(cond)) case d @ DeleteFromTable(_, cond) => d.copy(condition = replaceNullWithFalse(cond)) case u @ UpdateTable(_, _, Some(cond)) => u.copy(condition = Some(replaceNullWithFalse(cond))) case m @ MergeIntoTable(_, _, mergeCond, matchedActions, notMatchedActions) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala index e1972b997c2be..34773b24cacbe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala @@ -48,6 +48,7 @@ object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] { _.containsAnyPattern(CASE_WHEN, IF), ruleId) { case f @ Filter(cond, _) => f.copy(condition = simplifyConditional(cond)) case j @ Join(_, _, _, Some(cond), _) => j.copy(condition = Some(simplifyConditional(cond))) + case rd @ ReplaceData(_, cond, _, _, _) => rd.copy(condition = simplifyConditional(cond)) case d @ DeleteFromTable(_, cond) => d.copy(condition = simplifyConditional(cond)) case u @ UpdateTable(_, _, Some(cond)) => u.copy(condition = Some(simplifyConditional(cond))) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala index 8c41ab2797bf1..382909d6d6f71 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala @@ -18,12 +18,15 @@ package org.apache.spark.sql.catalyst.planning import org.apache.spark.internal.Logging +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.optimizer.JoinSelectionHelper import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.connector.catalog.Table import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2ScanRelation} import org.apache.spark.sql.internal.SQLConf trait OperationHelper extends AliasHelper with PredicateHelper { @@ -388,3 +391,51 @@ object ExtractSingleColumnNullAwareAntiJoin extends JoinSelectionHelper with Pre case _ => None } } + +/** + * An extractor for row-level commands such as DELETE, UPDATE, MERGE that were rewritten using plans + * that operate on groups of rows. + * + * This class extracts the following entities: + * - the group-based rewrite plan; + * - the condition that defines matching groups; + * - the read relation that can be either [[DataSourceV2Relation]] or [[DataSourceV2ScanRelation]] + * depending on whether the planning has already happened; + */ +object GroupBasedRowLevelOperation { + type ReturnType = (ReplaceData, Expression, LogicalPlan) + + def unapply(plan: LogicalPlan): Option[ReturnType] = plan match { + case rd @ ReplaceData(DataSourceV2Relation(table, _, _, _, _), cond, query, _, _) => + val readRelation = findReadRelation(table, query) + readRelation.map((rd, cond, _)) + + case _ => + None + } + + private def findReadRelation( + table: Table, + plan: LogicalPlan): Option[LogicalPlan] = { + + val readRelations = plan.collect { + case r: DataSourceV2Relation if r.table eq table => r + case r: DataSourceV2ScanRelation if r.relation.table eq table => r + } + + // in some cases, the optimizer replaces the v2 read relation with a local relation + // for example, there is no reason to query the table if the condition is always false + // that's why it is valid not to find the corresponding v2 read relation + + readRelations match { + case relations if relations.isEmpty => + None + + case Seq(relation) => + Some(relation) + + case relations => + throw new AnalysisException(s"Expected only one row-level read relation: $relations") + } + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala index b2ca34668a6f6..b1b8843aa33be 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala @@ -17,16 +17,18 @@ package org.apache.spark.sql.catalyst.plans.logical -import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, FieldName, NamedRelation, PartitionSpec, ResolvedDBObjectName, UnresolvedException} +import org.apache.spark.sql.{sources, AnalysisException} +import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, EliminateSubqueryAliases, FieldName, NamedRelation, PartitionSpec, ResolvedDBObjectName, UnresolvedException} import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.catalog.FunctionResource -import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, Unevaluable} +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, MetadataAttribute, Unevaluable} import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema import org.apache.spark.sql.catalyst.trees.BinaryLike import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.expressions.Transform -import org.apache.spark.sql.connector.write.Write +import org.apache.spark.sql.connector.write.{RowLevelOperation, RowLevelOperationTable, Write} +import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation import org.apache.spark.sql.types.{BooleanType, DataType, MetadataBuilder, StringType, StructType} /** @@ -176,6 +178,80 @@ object OverwritePartitionsDynamic { } } +trait RowLevelWrite extends V2WriteCommand with SupportsSubquery { + def operation: RowLevelOperation + def condition: Expression + def originalTable: NamedRelation +} + +/** + * Replace groups of data in an existing table during a row-level operation. + * + * This node is constructed in rules that rewrite DELETE, UPDATE, MERGE operations for data sources + * that can replace groups of data (e.g. files, partitions). + * + * @param table a plan that references a row-level operation table + * @param condition a condition that defines matching groups + * @param query a query with records that should replace the records that were read + * @param originalTable a plan for the original table for which the row-level command was triggered + * @param write a logical write, if already constructed + */ +case class ReplaceData( + table: NamedRelation, + condition: Expression, + query: LogicalPlan, + originalTable: NamedRelation, + write: Option[Write] = None) extends RowLevelWrite { + + override val isByName: Boolean = false + override val stringArgs: Iterator[Any] = Iterator(table, query, write) + + override lazy val references: AttributeSet = query.outputSet + + lazy val operation: RowLevelOperation = { + EliminateSubqueryAliases(table) match { + case DataSourceV2Relation(RowLevelOperationTable(_, operation), _, _, _, _) => + operation + case _ => + throw new AnalysisException(s"Cannot retrieve row-level operation from $table") + } + } + + // the incoming query may include metadata columns + lazy val dataInput: Seq[Attribute] = { + query.output.filter { + case MetadataAttribute(_) => false + case _ => true + } + } + + override def outputResolved: Boolean = { + assert(table.resolved && query.resolved, + "`outputResolved` can only be called when `table` and `query` are both resolved.") + + // take into account only incoming data columns and ignore metadata columns in the query + // they will be discarded after the logical write is built in the optimizer + // metadata columns may be needed to request a correct distribution or ordering + // but are not passed back to the data source during writes + + table.skipSchemaResolution || (dataInput.size == table.output.size && + dataInput.zip(table.output).forall { case (inAttr, outAttr) => + val outType = CharVarcharUtils.getRawType(outAttr.metadata).getOrElse(outAttr.dataType) + // names and types must match, nullability must be compatible + inAttr.name == outAttr.name && + DataType.equalsIgnoreCompatibleNullability(inAttr.dataType, outType) && + (outAttr.nullable || !inAttr.nullable) + }) + } + + override def withNewQuery(newQuery: LogicalPlan): ReplaceData = copy(query = newQuery) + + override def withNewTable(newTable: NamedRelation): ReplaceData = copy(table = newTable) + + override protected def withNewChildInternal(newChild: LogicalPlan): ReplaceData = { + copy(query = newChild) + } +} /** A trait used for logical plan nodes that create or replace V2 table definitions. */ trait V2CreateTablePlan extends LogicalPlan { @@ -457,6 +533,16 @@ case class DeleteFromTable( copy(table = newChild) } +/** + * The logical plan of the DELETE FROM command that can be executed using data source filters. + * + * As opposed to [[DeleteFromTable]], this node represents a DELETE operation where the condition + * was converted into filters and the data source reported that it can handle all of them. + */ +case class DeleteFromTableWithFilters( + table: LogicalPlan, + condition: Seq[sources.Filter]) extends LeafCommand + /** * The logical plan of the UPDATE TABLE command. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationInfoImpl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationInfoImpl.scala new file mode 100644 index 0000000000000..9d499cdef361b --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationInfoImpl.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.write + +import org.apache.spark.sql.connector.write.RowLevelOperation.Command +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +private[sql] case class RowLevelOperationInfoImpl( + command: Command, + options: CaseInsensitiveStringMap) extends RowLevelOperationInfo diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationTable.scala new file mode 100644 index 0000000000000..d1f7ba000c62a --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationTable.scala @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.write + +import java.util + +import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsRowLevelOperations, SupportsWrite, Table, TableCapability} +import org.apache.spark.sql.connector.read.ScanBuilder +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +/** + * An internal v2 table implementation that wraps the original table and a logical row-level + * operation for DELETE, UPDATE, MERGE commands that require rewriting data. + * + * The purpose of this table is to make the existing scan and write planning rules work + * with commands that require coordination between the scan and the write (so that the write + * knows what to replace). + */ +private[sql] case class RowLevelOperationTable( + table: Table with SupportsRowLevelOperations, + operation: RowLevelOperation) extends Table with SupportsRead with SupportsWrite { + + override def name: String = table.name + override def schema: StructType = table.schema + override def capabilities: util.Set[TableCapability] = table.capabilities + override def toString: String = table.toString + + override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = { + operation.newScanBuilder(options) + } + + override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = { + operation.newWriteBuilder(info) + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 57ed7da7b2051..0532a953ef4e0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -926,6 +926,10 @@ object QueryCompilationErrors { tableDoesNotSupportError("atomic partition management", table) } + def tableIsNotRowLevelOperationTableError(table: Table): Throwable = { + throw new AnalysisException(s"Table ${table.name} is not a row-level operation table") + } + def cannotRenameTableWithAlterViewError(): Throwable = { new AnalysisException( "Cannot rename a table with ALTER VIEW. Please use ALTER TABLE instead.") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala index efd3ffebf5c1f..16d5a9cc70d18 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala @@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.analysis.{PartitionSpec, ResolvedPartitionS import org.apache.spark.sql.catalyst.expressions.AttributeReference import org.apache.spark.sql.catalyst.util.METADATA_COL_ATTR_KEY import org.apache.spark.sql.connector.catalog.{MetadataColumn, SupportsAtomicPartitionManagement, SupportsDelete, SupportsPartitionManagement, SupportsRead, SupportsWrite, Table, TableCapability, TruncatableTable} +import org.apache.spark.sql.connector.write.RowLevelOperationTable import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType} import org.apache.spark.sql.util.CaseInsensitiveStringMap @@ -82,6 +83,15 @@ object DataSourceV2Implicits { } } + def asRowLevelOperationTable: RowLevelOperationTable = { + table match { + case rowLevelOperationTable: RowLevelOperationTable => + rowLevelOperationTable + case _ => + throw QueryCompilationErrors.tableIsNotRowLevelOperationTableError(table) + } + } + def supports(capability: TableCapability): Boolean = table.capabilities.contains(capability) def supportsAny(capabilities: TableCapability*): Boolean = capabilities.exists(supports) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala new file mode 100644 index 0000000000000..cb061602ec151 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.catalog + +import java.util + +import org.apache.spark.sql.connector.distributions.{Distribution, Distributions} +import org.apache.spark.sql.connector.expressions.{FieldReference, LogicalExpressions, NamedReference, SortDirection, SortOrder, Transform} +import org.apache.spark.sql.connector.read.{Scan, ScanBuilder} +import org.apache.spark.sql.connector.write.{BatchWrite, LogicalWriteInfo, RequiresDistributionAndOrdering, RowLevelOperation, RowLevelOperationBuilder, RowLevelOperationInfo, Write, WriteBuilder, WriterCommitMessage} +import org.apache.spark.sql.connector.write.RowLevelOperation.Command +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.CaseInsensitiveStringMap + +class InMemoryRowLevelOperationTable( + name: String, + schema: StructType, + partitioning: Array[Transform], + properties: util.Map[String, String]) + extends InMemoryTable(name, schema, partitioning, properties) with SupportsRowLevelOperations { + + override def newRowLevelOperationBuilder( + info: RowLevelOperationInfo): RowLevelOperationBuilder = { + () => PartitionBasedOperation(info.command) + } + + case class PartitionBasedOperation(command: Command) extends RowLevelOperation { + private final val PARTITION_COLUMN_REF = FieldReference(PartitionKeyColumn.name) + + var configuredScan: InMemoryBatchScan = _ + + override def requiredMetadataAttributes(): Array[NamedReference] = { + Array(PARTITION_COLUMN_REF) + } + + override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = { + new InMemoryScanBuilder(schema) { + override def build: Scan = { + val scan = super.build() + configuredScan = scan.asInstanceOf[InMemoryBatchScan] + scan + } + } + } + + override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = new WriteBuilder { + + override def build(): Write = new Write with RequiresDistributionAndOrdering { + override def requiredDistribution(): Distribution = { + Distributions.clustered(Array(PARTITION_COLUMN_REF)) + } + + override def requiredOrdering(): Array[SortOrder] = { + Array[SortOrder]( + LogicalExpressions.sort( + PARTITION_COLUMN_REF, + SortDirection.ASCENDING, + SortDirection.ASCENDING.defaultNullOrdering()) + ) + } + + override def toBatch: BatchWrite = PartitionBasedReplaceData(configuredScan) + + override def description(): String = "InMemoryWrite" + } + } + + override def description(): String = "InMemoryPartitionReplaceOperation" + } + + private case class PartitionBasedReplaceData(scan: InMemoryBatchScan) extends TestBatchWrite { + + override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized { + val newData = messages.map(_.asInstanceOf[BufferedRows]) + val readRows = scan.data.flatMap(_.asInstanceOf[BufferedRows].rows) + val readPartitions = readRows.map(r => getKey(r, schema)) + dataMap --= readPartitions + withData(newData, schema) + } + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTableCatalog.scala new file mode 100644 index 0000000000000..2d9a9f04785e7 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTableCatalog.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector.catalog + +import java.util + +import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException +import org.apache.spark.sql.connector.expressions.Transform +import org.apache.spark.sql.types.StructType + +class InMemoryRowLevelOperationTableCatalog extends InMemoryTableCatalog { + import CatalogV2Implicits._ + + override def createTable( + ident: Identifier, + schema: StructType, + partitions: Array[Transform], + properties: util.Map[String, String]): Table = { + if (tables.containsKey(ident)) { + throw new TableAlreadyExistsException(ident) + } + + InMemoryTableCatalog.maybeSimulateFailedTableCreation(properties) + + val tableName = s"$name.${ident.quoted}" + val table = new InMemoryRowLevelOperationTable(tableName, schema, partitions, properties) + tables.put(ident, table) + namespaces.putIfAbsent(ident.namespace.toList, Map()) + table + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala index a762b0f87839f..beed9111a308a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala @@ -56,7 +56,7 @@ class InMemoryTable( extends Table with SupportsRead with SupportsWrite with SupportsDelete with SupportsMetadataColumns { - private object PartitionKeyColumn extends MetadataColumn { + protected object PartitionKeyColumn extends MetadataColumn { override def name: String = "_partition" override def dataType: DataType = StringType override def comment: String = "Partition key used to store the row" @@ -104,7 +104,11 @@ class InMemoryTable( private val UTC = ZoneId.of("UTC") private val EPOCH_LOCAL_DATE = Instant.EPOCH.atZone(UTC).toLocalDate - private def getKey(row: InternalRow): Seq[Any] = { + protected def getKey(row: InternalRow): Seq[Any] = { + getKey(row, schema) + } + + protected def getKey(row: InternalRow, rowSchema: StructType): Seq[Any] = { @scala.annotation.tailrec def extractor( fieldNames: Array[String], @@ -124,7 +128,7 @@ class InMemoryTable( } } - val cleanedSchema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(schema) + val cleanedSchema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(rowSchema) partitioning.map { case IdentityTransform(ref) => extractor(ref.fieldNames, cleanedSchema, row)._1 @@ -219,9 +223,15 @@ class InMemoryTable( dataMap(key).clear() } - def withData(data: Array[BufferedRows]): InMemoryTable = dataMap.synchronized { + def withData(data: Array[BufferedRows]): InMemoryTable = { + withData(data, schema) + } + + def withData( + data: Array[BufferedRows], + writeSchema: StructType): InMemoryTable = dataMap.synchronized { data.foreach(_.rows.foreach { row => - val key = getKey(row) + val key = getKey(row, writeSchema) dataMap += dataMap.get(key) .map(key -> _.withRow(row)) .getOrElse(key -> new BufferedRows(key).withRow(row)) @@ -372,7 +382,7 @@ class InMemoryTable( } } - private abstract class TestBatchWrite extends BatchWrite { + protected abstract class TestBatchWrite extends BatchWrite { override def createBatchWriterFactory(info: PhysicalWriteInfo): DataWriterFactory = { BufferedRowsWriterFactory } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala index bfe4bd2924118..8c134363af112 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions import org.apache.spark.sql.execution.datasources.SchemaPruning -import org.apache.spark.sql.execution.datasources.v2.{V2ScanPartitioning, V2ScanRelationPushDown, V2Writes} +import org.apache.spark.sql.execution.datasources.v2.{GroupBasedRowLevelOperationScanPlanning, OptimizeMetadataOnlyDeleteFromTable, V2ScanPartitioning, V2ScanRelationPushDown, V2Writes} import org.apache.spark.sql.execution.dynamicpruning.{CleanupDynamicPruningFilters, PartitionPruning} import org.apache.spark.sql.execution.python.{ExtractGroupingPythonUDFFromAggregate, ExtractPythonUDFFromAggregate, ExtractPythonUDFs} @@ -38,11 +38,15 @@ class SparkOptimizer( override def earlyScanPushDownRules: Seq[Rule[LogicalPlan]] = // TODO: move SchemaPruning into catalyst Seq(SchemaPruning) :+ + GroupBasedRowLevelOperationScanPlanning :+ V2ScanRelationPushDown :+ V2ScanPartitioning :+ V2Writes :+ PruneFileSourcePartitions + override def preCBORules: Seq[Rule[LogicalPlan]] = + OptimizeMetadataOnlyDeleteFromTable :: Nil + override def defaultBatches: Seq[Batch] = (preOptimizationBatches ++ super.defaultBatches :+ Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog)) :+ Batch("PartitionPruning", Once, @@ -78,6 +82,7 @@ class SparkOptimizer( ExtractPythonUDFFromJoinCondition.ruleName :+ ExtractPythonUDFFromAggregate.ruleName :+ ExtractGroupingPythonUDFFromAggregate.ruleName :+ ExtractPythonUDFs.ruleName :+ + GroupBasedRowLevelOperationScanPlanning.ruleName :+ V2ScanRelationPushDown.ruleName :+ V2ScanPartitioning.ruleName :+ V2Writes.ruleName diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 45540fb4a1122..95418027187cb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -25,10 +25,11 @@ import org.apache.spark.sql.catalyst.analysis.{ResolvedDBObjectName, ResolvedNam import org.apache.spark.sql.catalyst.catalog.CatalogUtils import org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.expressions.{And, Attribute, DynamicPruning, Expression, NamedExpression, Not, Or, PredicateHelper, SubqueryExpression} +import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.util.{toPrettySQL, V2ExpressionBuilder} -import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, Table, TableCapability, TableCatalog} +import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsDelete, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, Table, TableCapability, TableCatalog, TruncatableTable} import org.apache.spark.sql.connector.catalog.index.SupportsIndex import org.apache.spark.sql.connector.expressions.{FieldReference} import org.apache.spark.sql.connector.expressions.filter.{And => V2And, Not => V2Not, Or => V2Or, Predicate} @@ -254,6 +255,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, _, _, Some(write)) => OverwritePartitionsDynamicExec(planLater(query), refreshCache(r), write) :: Nil + case DeleteFromTableWithFilters(r: DataSourceV2Relation, filters) => + DeleteFromTableExec(r.table.asDeletable, filters.toArray, refreshCache(r)) :: Nil + case DeleteFromTable(relation, condition) => relation match { case DataSourceV2ScanRelation(r, _, output, _) => @@ -269,15 +273,25 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat throw QueryCompilationErrors.cannotTranslateExpressionToSourceFilterError(f)) }).toArray - if (!table.asDeletable.canDeleteWhere(filters)) { - throw QueryCompilationErrors.cannotDeleteTableWhereFiltersError(table, filters) + table match { + case t: SupportsDelete if t.canDeleteWhere(filters) => + DeleteFromTableExec(t, filters, refreshCache(r)) :: Nil + case t: SupportsDelete => + throw QueryCompilationErrors.cannotDeleteTableWhereFiltersError(t, filters) + case t: TruncatableTable if condition == TrueLiteral => + TruncateTableExec(t, refreshCache(r)) :: Nil + case _ => + throw QueryCompilationErrors.tableDoesNotSupportDeletesError(table) } - DeleteFromTableExec(table.asDeletable, filters, refreshCache(r)) :: Nil case _ => throw QueryCompilationErrors.deleteOnlySupportedWithV2TablesError() } + case ReplaceData(_: DataSourceV2Relation, _, query, r: DataSourceV2Relation, Some(write)) => + // use the original relation to refresh the cache + ReplaceDataExec(planLater(query), refreshCache(r), write) :: Nil + case WriteToContinuousDataSource(writer, query, customMetrics) => WriteToContinuousDataSourceExec(writer, planLater(query), customMetrics) :: Nil diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/GroupBasedRowLevelOperationScanPlanning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/GroupBasedRowLevelOperationScanPlanning.scala new file mode 100644 index 0000000000000..48dee3f652c6f --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/GroupBasedRowLevelOperationScanPlanning.scala @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, Expression, PredicateHelper, SubqueryExpression} +import org.apache.spark.sql.catalyst.planning.GroupBasedRowLevelOperation +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ReplaceData} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.connector.expressions.filter.{Predicate => V2Filter} +import org.apache.spark.sql.connector.read.ScanBuilder +import org.apache.spark.sql.execution.datasources.DataSourceStrategy +import org.apache.spark.sql.sources.Filter + +/** + * A rule that builds scans for group-based row-level operations. + * + * Note this rule must be run before [[V2ScanRelationPushDown]] as scans for group-based + * row-level operations must be planned in a special way. + */ +object GroupBasedRowLevelOperationScanPlanning extends Rule[LogicalPlan] with PredicateHelper { + + import DataSourceV2Implicits._ + + override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { + // push down the filter from the command condition instead of the filter in the rewrite plan, + // which is negated for data sources that only support replacing groups of data (e.g. files) + case GroupBasedRowLevelOperation(rd: ReplaceData, cond, relation: DataSourceV2Relation) => + val table = relation.table.asRowLevelOperationTable + val scanBuilder = table.newScanBuilder(relation.options) + + val (pushedFilters, remainingFilters) = pushFilters(cond, relation.output, scanBuilder) + val pushedFiltersStr = if (pushedFilters.isLeft) { + pushedFilters.left.get.mkString(", ") + } else { + pushedFilters.right.get.mkString(", ") + } + + val (scan, output) = PushDownUtils.pruneColumns(scanBuilder, relation, relation.output, Nil) + + logInfo( + s""" + |Pushing operators to ${relation.name} + |Pushed filters: $pushedFiltersStr + |Filters that were not pushed: ${remainingFilters.mkString(", ")} + |Output: ${output.mkString(", ")} + """.stripMargin) + + // replace DataSourceV2Relation with DataSourceV2ScanRelation for the row operation table + rd transform { + case r: DataSourceV2Relation if r eq relation => + DataSourceV2ScanRelation(r, scan, PushDownUtils.toOutputAttrs(scan.readSchema(), r)) + } + } + + private def pushFilters( + cond: Expression, + tableAttrs: Seq[AttributeReference], + scanBuilder: ScanBuilder): (Either[Seq[Filter], Seq[V2Filter]], Seq[Expression]) = { + + val tableAttrSet = AttributeSet(tableAttrs) + val filters = splitConjunctivePredicates(cond).filter(_.references.subsetOf(tableAttrSet)) + val normalizedFilters = DataSourceStrategy.normalizeExprs(filters, tableAttrs) + val (_, normalizedFiltersWithoutSubquery) = + normalizedFilters.partition(SubqueryExpression.hasSubquery) + + PushDownUtils.pushFilters(scanBuilder, normalizedFiltersWithoutSubquery) + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/OptimizeMetadataOnlyDeleteFromTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/OptimizeMetadataOnlyDeleteFromTable.scala new file mode 100644 index 0000000000000..bc45dbe9fef96 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/OptimizeMetadataOnlyDeleteFromTable.scala @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.datasources.v2 + +import org.apache.spark.sql.catalyst.expressions.{Expression, PredicateHelper, SubqueryExpression} +import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral +import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, DeleteFromTableWithFilters, LogicalPlan, ReplaceData, RowLevelWrite} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.connector.catalog.{SupportsDelete, TruncatableTable} +import org.apache.spark.sql.connector.write.RowLevelOperation +import org.apache.spark.sql.connector.write.RowLevelOperation.Command.DELETE +import org.apache.spark.sql.execution.datasources.DataSourceStrategy +import org.apache.spark.sql.sources + +/** + * A rule that replaces a rewritten DELETE operation with a delete using filters if the data source + * can handle this DELETE command without executing the plan that operates on individual or groups + * of rows. + * + * Note this rule must be run after expression optimization but before scan planning. + */ +object OptimizeMetadataOnlyDeleteFromTable extends Rule[LogicalPlan] with PredicateHelper { + + override def apply(plan: LogicalPlan): LogicalPlan = plan transform { + case RewrittenRowLevelCommand(rowLevelPlan, DELETE, cond, relation: DataSourceV2Relation) => + relation.table match { + case table: SupportsDelete if !SubqueryExpression.hasSubquery(cond) => + val predicates = splitConjunctivePredicates(cond) + val normalizedPredicates = DataSourceStrategy.normalizeExprs(predicates, relation.output) + val filters = toDataSourceFilters(normalizedPredicates) + val allPredicatesTranslated = normalizedPredicates.size == filters.length + if (allPredicatesTranslated && table.canDeleteWhere(filters)) { + logDebug(s"Switching to delete with filters: ${filters.mkString("[", ", ", "]")}") + DeleteFromTableWithFilters(relation, filters) + } else { + rowLevelPlan + } + + case _: TruncatableTable if cond == TrueLiteral => + DeleteFromTable(relation, cond) + + case _ => + rowLevelPlan + } + } + + private def toDataSourceFilters(predicates: Seq[Expression]): Array[sources.Filter] = { + predicates.flatMap { p => + val filter = DataSourceStrategy.translateFilter(p, supportNestedPredicatePushdown = true) + if (filter.isEmpty) { + logDebug(s"Cannot translate expression to data source filter: $p") + } + filter + }.toArray + } + + private object RewrittenRowLevelCommand { + type ReturnType = (RowLevelWrite, RowLevelOperation.Command, Expression, LogicalPlan) + + def unapply(plan: LogicalPlan): Option[ReturnType] = plan match { + case rd @ ReplaceData(_, cond, _, originalTable, _) => + val command = rd.operation.command + Some(rd, command, cond, originalTable) + + case _ => + None + } + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala index 862189ed3afff..8ac91e02579c5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala @@ -187,7 +187,7 @@ object PushDownUtils extends PredicateHelper { } } - private def toOutputAttrs( + def toOutputAttrs( schema: StructType, relation: DataSourceV2Relation): Seq[AttributeReference] = { val nameToAttr = relation.output.map(_.name).zip(relation.output).toMap diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala index 38f741532d786..2fd1d52fd981d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2 import java.util.UUID import org.apache.spark.sql.catalyst.expressions.PredicateHelper -import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic} +import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, Project, ReplaceData} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._ import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table} @@ -31,6 +31,7 @@ import org.apache.spark.sql.execution.streaming.sources.{MicroBatchWrite, WriteT import org.apache.spark.sql.internal.connector.SupportsStreamingUpdateAsAppend import org.apache.spark.sql.sources.{AlwaysTrue, Filter} import org.apache.spark.sql.streaming.OutputMode +import org.apache.spark.sql.types.StructType /** * A rule that constructs logical writes. @@ -41,7 +42,7 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper { override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { case a @ AppendData(r: DataSourceV2Relation, query, options, _, None) => - val writeBuilder = newWriteBuilder(r.table, query, options) + val writeBuilder = newWriteBuilder(r.table, options, query.schema) val write = writeBuilder.build() val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, conf) a.copy(write = Some(write), query = newQuery) @@ -57,7 +58,7 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper { }.toArray val table = r.table - val writeBuilder = newWriteBuilder(table, query, options) + val writeBuilder = newWriteBuilder(table, options, query.schema) val write = writeBuilder match { case builder: SupportsTruncate if isTruncate(filters) => builder.truncate().build() @@ -72,7 +73,7 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper { case o @ OverwritePartitionsDynamic(r: DataSourceV2Relation, query, options, _, None) => val table = r.table - val writeBuilder = newWriteBuilder(table, query, options) + val writeBuilder = newWriteBuilder(table, options, query.schema) val write = writeBuilder match { case builder: SupportsDynamicOverwrite => builder.overwriteDynamicPartitions().build() @@ -85,12 +86,21 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper { case WriteToMicroBatchDataSource( relation, table, query, queryId, writeOptions, outputMode, Some(batchId)) => - val writeBuilder = newWriteBuilder(table, query, writeOptions, queryId) + val writeBuilder = newWriteBuilder(table, writeOptions, query.schema, queryId) val write = buildWriteForMicroBatch(table, writeBuilder, outputMode) val microBatchWrite = new MicroBatchWrite(batchId, write.toStreaming) val customMetrics = write.supportedCustomMetrics.toSeq val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, conf) WriteToDataSourceV2(relation, microBatchWrite, newQuery, customMetrics) + + case rd @ ReplaceData(r: DataSourceV2Relation, _, query, _, None) => + val rowSchema = StructType.fromAttributes(rd.dataInput) + val writeBuilder = newWriteBuilder(r.table, Map.empty, rowSchema) + val write = writeBuilder.build() + val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, conf) + // project away any metadata columns that could be used for distribution and ordering + rd.copy(write = Some(write), query = Project(rd.dataInput, newQuery)) + } private def buildWriteForMicroBatch( @@ -119,11 +129,11 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper { private def newWriteBuilder( table: Table, - query: LogicalPlan, writeOptions: Map[String, String], + rowSchema: StructType, queryId: String = UUID.randomUUID().toString): WriteBuilder = { - val info = LogicalWriteInfoImpl(queryId, query.schema, writeOptions.asOptions) + val info = LogicalWriteInfoImpl(queryId, rowSchema, writeOptions.asOptions) table.asWritable.newWriteBuilder(info) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala index 65c49283dd763..d23a9e51f6580 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala @@ -284,6 +284,21 @@ case class OverwritePartitionsDynamicExec( copy(query = newChild) } +/** + * Physical plan node to replace data in existing tables. + */ +case class ReplaceDataExec( + query: SparkPlan, + refreshCache: () => Unit, + write: Write) extends V2ExistingTableWriteExec { + + override val stringArgs: Iterator[Any] = Iterator(query, write) + + override protected def withNewChildInternal(newChild: SparkPlan): ReplaceDataExec = { + copy(query = newChild) + } +} + case class WriteToDataSourceV2Exec( batchWrite: BatchWrite, refreshCache: () => Unit, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTableSuite.scala new file mode 100644 index 0000000000000..a2cfdde2671f6 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTableSuite.scala @@ -0,0 +1,629 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector + +import java.util.Collections + +import org.scalatest.BeforeAndAfter + +import org.apache.spark.sql.{AnalysisException, DataFrame, Encoders, QueryTest, Row} +import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryRowLevelOperationTableCatalog} +import org.apache.spark.sql.connector.expressions.LogicalExpressions._ +import org.apache.spark.sql.execution.{QueryExecution, SparkPlan} +import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper +import org.apache.spark.sql.execution.datasources.v2.{DeleteFromTableExec, ReplaceDataExec} +import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.util.QueryExecutionListener + +abstract class DeleteFromTableSuiteBase + extends QueryTest with SharedSparkSession with BeforeAndAfter with AdaptiveSparkPlanHelper { + + import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ + import testImplicits._ + + before { + spark.conf.set("spark.sql.catalog.cat", classOf[InMemoryRowLevelOperationTableCatalog].getName) + } + + after { + spark.sessionState.catalogManager.reset() + spark.sessionState.conf.unsetConf("spark.sql.catalog.cat") + } + + private val namespace = Array("ns1") + private val ident = Identifier.of(namespace, "test_table") + private val tableNameAsString = "cat." + ident.toString + + private def catalog: InMemoryRowLevelOperationTableCatalog = { + val catalog = spark.sessionState.catalogManager.catalog("cat") + catalog.asTableCatalog.asInstanceOf[InMemoryRowLevelOperationTableCatalog] + } + + test("EXPLAIN only delete") { + createAndInitTable("id INT, dep STRING", """{ "id": 1, "dep": "hr" }""") + + sql(s"EXPLAIN DELETE FROM $tableNameAsString WHERE id <= 10") + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(1, "hr") :: Nil) + } + + test("delete from empty tables") { + createTable("id INT, dep STRING") + + sql(s"DELETE FROM $tableNameAsString WHERE id <= 1") + + checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Nil) + } + + test("delete with basic filters") { + createAndInitTable("id INT, dep STRING", + """{ "id": 1, "dep": "hr" } + |{ "id": 2, "dep": "software" } + |{ "id": 3, "dep": "hr" } + |""".stripMargin) + + sql(s"DELETE FROM $tableNameAsString WHERE id <= 1") + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(2, "software") :: Row(3, "hr") :: Nil) + } + + test("delete with aliases") { + createAndInitTable("id INT, dep STRING", + """{ "id": 1, "dep": "hr" } + |{ "id": 2, "dep": "software" } + |{ "id": 3, "dep": "hr" } + |""".stripMargin) + + sql(s"DELETE FROM $tableNameAsString AS t WHERE t.id <= 1 OR t.dep = 'hr'") + + checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Row(2, "software") :: Nil) + } + + test("delete with IN predicates") { + createAndInitTable("id INT, dep STRING", + """{ "id": 1, "dep": "hr" } + |{ "id": 2, "dep": "software" } + |{ "id": null, "dep": "hr" } + |""".stripMargin) + + sql(s"DELETE FROM $tableNameAsString WHERE id IN (1, null)") + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(2, "software") :: Row(null, "hr") :: Nil) + } + + test("delete with NOT IN predicates") { + createAndInitTable("id INT, dep STRING", + """{ "id": 1, "dep": "hr" } + |{ "id": 2, "dep": "software" } + |{ "id": null, "dep": "hr" } + |""".stripMargin) + + sql(s"DELETE FROM $tableNameAsString WHERE id NOT IN (null, 1)") + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(1, "hr") :: Row(2, "software") :: Row(null, "hr") :: Nil) + + sql(s"DELETE FROM $tableNameAsString WHERE id NOT IN (1, 10)") + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(1, "hr") :: Row(null, "hr") :: Nil) + } + + test("delete with conditions on nested columns") { + createAndInitTable("id INT, complex STRUCT, dep STRING", + """{ "id": 1, "complex": { "c1": 3, "c2": "v1" }, "dep": "hr" } + |{ "id": 2, "complex": { "c1": 2, "c2": "v2" }, "dep": "software" } + |""".stripMargin) + + sql(s"DELETE FROM $tableNameAsString WHERE complex.c1 = id + 2") + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(2, Row(2, "v2"), "software") :: Nil) + + sql(s"DELETE FROM $tableNameAsString t WHERE t.complex.c1 = id") + + checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Nil) + } + + test("delete with IN subqueries") { + withTempView("deleted_id", "deleted_dep") { + createAndInitTable("id INT, dep STRING", + """{ "id": 1, "dep": "hr" } + |{ "id": 2, "dep": "hardware" } + |{ "id": null, "dep": "hr" } + |""".stripMargin) + + val deletedIdDF = Seq(Some(0), Some(1), None).toDF() + deletedIdDF.createOrReplaceTempView("deleted_id") + + val deletedDepDF = Seq("software", "hr").toDF() + deletedDepDF.createOrReplaceTempView("deleted_dep") + + sql( + s"""DELETE FROM $tableNameAsString + |WHERE + | id IN (SELECT * FROM deleted_id) + | AND + | dep IN (SELECT * FROM deleted_dep) + |""".stripMargin) + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(2, "hardware") :: Row(null, "hr") :: Nil) + + append("id INT, dep STRING", + """{ "id": 1, "dep": "hr" } + |{ "id": -1, "dep": "hr" } + |""".stripMargin) + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(-1, "hr") :: Row(1, "hr") :: Row(2, "hardware") :: Row(null, "hr") :: Nil) + + sql( + s"""DELETE FROM $tableNameAsString + |WHERE + | id IS NULL + | OR + | id IN (SELECT value + 2 FROM deleted_id) + |""".stripMargin) + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(-1, "hr") :: Row(1, "hr") :: Nil) + + append("id INT, dep STRING", + """{ "id": null, "dep": "hr" } + |{ "id": 2, "dep": "hr" } + |""".stripMargin) + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(-1, "hr") :: Row(1, "hr") :: Row(2, "hr") :: Row(null, "hr") :: Nil) + + sql( + s"""DELETE FROM $tableNameAsString + |WHERE + | id IN (SELECT value + 2 FROM deleted_id) + | AND + | dep = 'hr' + |""".stripMargin) + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(-1, "hr") :: Row(1, "hr") :: Row(null, "hr") :: Nil) + } + } + + test("delete with multi-column IN subqueries") { + withTempView("deleted_employee") { + createAndInitTable("id INT, dep STRING", + """{ "id": 1, "dep": "hr" } + |{ "id": 2, "dep": "hardware" } + |{ "id": null, "dep": "hr" } + |""".stripMargin) + + val deletedEmployeeDF = Seq((None, "hr"), (Some(1), "hr")).toDF() + deletedEmployeeDF.createOrReplaceTempView("deleted_employee") + + sql( + s"""DELETE FROM $tableNameAsString + |WHERE + | (id, dep) IN (SELECT * FROM deleted_employee) + |""".stripMargin) + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(2, "hardware") :: Row(null, "hr") :: Nil) + } + } + + test("delete with NOT IN subqueries") { + withTempView("deleted_id", "deleted_dep") { + createAndInitTable("id INT, dep STRING", + """{ "id": 1, "dep": "hr" } + |{ "id": 2, "dep": "hardware" } + |{ "id": null, "dep": "hr" } + |""".stripMargin) + + val deletedIdDF = Seq(Some(-1), Some(-2), None).toDF() + deletedIdDF.createOrReplaceTempView("deleted_id") + + val deletedDepDF = Seq("software", "hr").toDF() + deletedDepDF.createOrReplaceTempView("deleted_dep") + + sql( + s"""DELETE FROM $tableNameAsString + |WHERE + | id NOT IN (SELECT * FROM deleted_id) + |""".stripMargin) + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(1, "hr") :: Row(2, "hardware") :: Row(null, "hr") :: Nil) + + sql( + s"""DELETE FROM $tableNameAsString + |WHERE + | id NOT IN (SELECT * FROM deleted_id WHERE value IS NOT NULL) + |""".stripMargin) + + checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Row(null, "hr") :: Nil) + + append("id INT, dep STRING", + """{ "id": 1, "dep": "hr" } + |{ "id": 2, "dep": "hardware" } + |{ "id": null, "dep": "hr" } + |""".stripMargin) + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(1, "hr") :: Row(2, "hardware") :: Row(null, "hr") :: Row(null, "hr") :: Nil) + + sql( + s"""DELETE FROM $tableNameAsString + |WHERE + | id NOT IN (SELECT * FROM deleted_id) + | OR + | dep IN ('software', 'hr') + |""".stripMargin) + + checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Row(2, "hardware") :: Nil) + + sql( + s"""DELETE FROM $tableNameAsString + |WHERE + | id NOT IN (SELECT * FROM deleted_id WHERE value IS NOT NULL) + | AND + | EXISTS (SELECT 1 FROM FROM deleted_dep WHERE dep = deleted_dep.value) + |""".stripMargin) + + checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Row(2, "hardware") :: Nil) + + sql( + s"""DELETE FROM $tableNameAsString t + |WHERE + | t.id NOT IN (SELECT * FROM deleted_id WHERE value IS NOT NULL) + | OR + | EXISTS (SELECT 1 FROM FROM deleted_dep WHERE t.dep = deleted_dep.value) + |""".stripMargin) + + checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Nil) + } + } + + test("delete with EXISTS subquery") { + withTempView("deleted_id", "deleted_dep") { + createAndInitTable("id INT, dep STRING", + """{ "id": 1, "dep": "hr" } + |{ "id": 2, "dep": "hardware" } + |{ "id": null, "dep": "hr" } + |""".stripMargin) + + val deletedIdDF = Seq(Some(-1), Some(-2), None).toDF() + deletedIdDF.createOrReplaceTempView("deleted_id") + + val deletedDepDF = Seq("software", "hr").toDF() + deletedDepDF.createOrReplaceTempView("deleted_dep") + + sql( + s"""DELETE FROM $tableNameAsString t + |WHERE + | EXISTS (SELECT 1 FROM deleted_id d WHERE t.id = d.value) + |""".stripMargin) + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(1, "hr") :: Row(2, "hardware") :: Row(null, "hr") :: Nil) + + sql( + s"""DELETE FROM $tableNameAsString t + |WHERE + | EXISTS (SELECT 1 FROM deleted_id d WHERE t.id = d.value + 2) + |""".stripMargin) + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(2, "hardware") :: Row(null, "hr") :: Nil) + + sql( + s"""DELETE FROM $tableNameAsString t + |WHERE + | EXISTS (SELECT 1 FROM deleted_id d WHERE t.id = d.value) OR t.id IS NULL + |""".stripMargin) + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(2, "hardware") :: Nil) + + sql( + s"""DELETE FROM $tableNameAsString t + |WHERE + | EXISTS (SELECT 1 FROM deleted_id di WHERE t.id = di.value) + | AND + | EXISTS (SELECT 1 FROM deleted_dep dd WHERE t.dep = dd.value) + |""".stripMargin) + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(2, "hardware") :: Nil) + } + } + + test("delete with NOT EXISTS subquery") { + withTempView("deleted_id", "deleted_dep") { + createAndInitTable("id INT, dep STRING", + """{ "id": 1, "dep": "hr" } + |{ "id": 2, "dep": "hardware" } + |{ "id": null, "dep": "hr" } + |""".stripMargin) + + val deletedIdDF = Seq(Some(-1), Some(-2), None).toDF() + deletedIdDF.createOrReplaceTempView("deleted_id") + + val deletedDepDF = Seq("software", "hr").toDF() + deletedDepDF.createOrReplaceTempView("deleted_dep") + + sql( + s"""DELETE FROM $tableNameAsString t + |WHERE + | NOT EXISTS (SELECT 1 FROM deleted_id di WHERE t.id = di.value + 2) + | AND + | NOT EXISTS (SELECT 1 FROM deleted_dep dd WHERE t.dep = dd.value) + |""".stripMargin) + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(1, "hr") :: Row(null, "hr") :: Nil) + + sql( + s"""DELETE FROM $tableNameAsString t + |WHERE + | NOT EXISTS (SELECT 1 FROM deleted_id d WHERE t.id = d.value + 2) + |""".stripMargin) + + checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Row(1, "hr") :: Nil) + + sql( + s"""DELETE FROM $tableNameAsString t + |WHERE + | NOT EXISTS (SELECT 1 FROM deleted_id d WHERE t.id = d.value + 2) + | OR + | t.id = 1 + |""".stripMargin) + + checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Nil) + } + } + + test("delete with a scalar subquery") { + withTempView("deleted_id") { + createAndInitTable("id INT, dep STRING", + """{ "id": 1, "dep": "hr" } + |{ "id": 2, "dep": "hardware" } + |{ "id": null, "dep": "hr" } + |""".stripMargin) + + val deletedIdDF = Seq(Some(1), Some(100), None).toDF() + deletedIdDF.createOrReplaceTempView("deleted_id") + + sql( + s"""DELETE FROM $tableNameAsString t + |WHERE + | id <= (SELECT min(value) FROM deleted_id) + |""".stripMargin) + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(2, "hardware") :: Row(null, "hr") :: Nil) + } + } + + test("delete refreshes relation cache") { + withTempView("temp") { + withCache("temp") { + createAndInitTable("id INT, dep STRING", + """{ "id": 1, "dep": "hr" } + |{ "id": 1, "dep": "hardware" } + |{ "id": 2, "dep": "hardware" } + |{ "id": 3, "dep": "hr" } + |""".stripMargin) + + // define a view on top of the table + val query = sql(s"SELECT * FROM $tableNameAsString WHERE id = 1") + query.createOrReplaceTempView("temp") + + // cache the view + sql("CACHE TABLE temp") + + // verify the view returns expected results + checkAnswer( + sql("SELECT * FROM temp"), + Row(1, "hr") :: Row(1, "hardware") :: Nil) + + // delete some records from the table + sql(s"DELETE FROM $tableNameAsString WHERE id <= 1") + + // verify the delete was successful + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(2, "hardware") :: Row(3, "hr") :: Nil) + + // verify the view reflects the changes in the table + checkAnswer(sql("SELECT * FROM temp"), Nil) + } + } + } + + test("delete with nondeterministic conditions") { + createAndInitTable("id INT, dep STRING", + """{ "id": 1, "dep": "hr" } + |{ "id": 2, "dep": "software" } + |{ "id": 3, "dep": "hr" } + |""".stripMargin) + + val e = intercept[AnalysisException] { + sql(s"DELETE FROM $tableNameAsString WHERE id <= 1 AND rand() > 0.5") + } + assert(e.message.contains("nondeterministic expressions are only allowed")) + } + + test("delete without condition executed as delete with filters") { + createAndInitTable("id INT, dep INT", + """{ "id": 1, "dep": 100 } + |{ "id": 2, "dep": 200 } + |{ "id": 3, "dep": 100 } + |""".stripMargin) + + executeDeleteWithFilters(s"DELETE FROM $tableNameAsString") + + checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Nil) + } + + test("delete with supported predicates gets converted into delete with filters") { + createAndInitTable("id INT, dep INT", + """{ "id": 1, "dep": 100 } + |{ "id": 2, "dep": 200 } + |{ "id": 3, "dep": 100 } + |""".stripMargin) + + executeDeleteWithFilters(s"DELETE FROM $tableNameAsString WHERE dep = 100") + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(2, 200) :: Nil) + } + + test("delete with unsupported predicates cannot be converted into delete with filters") { + createAndInitTable("id INT, dep INT", + """{ "id": 1, "dep": 100 } + |{ "id": 2, "dep": 200 } + |{ "id": 3, "dep": 100 } + |""".stripMargin) + + executeDeleteWithRewrite(s"DELETE FROM $tableNameAsString WHERE dep = 100 OR dep < 200") + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(2, 200) :: Nil) + } + + test("delete with subquery cannot be converted into delete with filters") { + withTempView("deleted_id") { + createAndInitTable("id INT, dep INT", + """{ "id": 1, "dep": 100 } + |{ "id": 2, "dep": 200 } + |{ "id": 3, "dep": 100 } + |""".stripMargin) + + val deletedIdDF = Seq(Some(1), Some(100), None).toDF() + deletedIdDF.createOrReplaceTempView("deleted_id") + + val q = s"DELETE FROM $tableNameAsString WHERE dep = 100 AND id IN (SELECT * FROM deleted_id)" + executeDeleteWithRewrite(q) + + checkAnswer( + sql(s"SELECT * FROM $tableNameAsString"), + Row(2, 200) :: Row(3, 100) :: Nil) + } + } + + private def createTable(schemaString: String): Unit = { + val schema = StructType.fromDDL(schemaString) + val tableProps = Collections.emptyMap[String, String] + catalog.createTable(ident, schema, Array(identity(reference(Seq("dep")))), tableProps) + } + + private def createAndInitTable(schemaString: String, jsonData: String): Unit = { + createTable(schemaString) + append(schemaString, jsonData) + } + + private def append(schemaString: String, jsonData: String): Unit = { + val df = toDF(jsonData, schemaString) + df.coalesce(1).writeTo(tableNameAsString).append() + } + + private def toDF(jsonData: String, schemaString: String = null): DataFrame = { + val jsonRows = jsonData.split("\\n").filter(str => str.trim.nonEmpty) + val jsonDS = spark.createDataset(jsonRows)(Encoders.STRING) + if (schemaString == null) { + spark.read.json(jsonDS) + } else { + spark.read.schema(schemaString).json(jsonDS) + } + } + + private def executeDeleteWithFilters(query: String): Unit = { + val executedPlan = executeAndKeepPlan { + sql(query) + } + + executedPlan match { + case _: DeleteFromTableExec => + // OK + case other => + fail("unexpected executed plan: " + other) + } + } + + private def executeDeleteWithRewrite(query: String): Unit = { + val executedPlan = executeAndKeepPlan { + sql(query) + } + + executedPlan match { + case _: ReplaceDataExec => + // OK + case other => + fail("unexpected executed plan: " + other) + } + } + + // executes an operation and keeps the executed plan + private def executeAndKeepPlan(func: => Unit): SparkPlan = { + var executedPlan: SparkPlan = null + + val listener = new QueryExecutionListener { + override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = { + executedPlan = qe.executedPlan + } + override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = { + } + } + spark.listenerManager.register(listener) + + func + + sparkContext.listenerBus.waitUntilEmpty() + + stripAQEPlan(executedPlan) + } +} + +class GroupBasedDeleteFromTableSuite extends DeleteFromTableSuiteBase diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 24b6be07619f3..6a20ee212942b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} import org.apache.spark.sql.catalyst.plans.logical.{AlterColumn, AnalysisOnlyCommand, AppendData, Assignment, CreateTable, CreateTableAsSelect, DeleteAction, DeleteFromTable, DescribeRelation, DropTable, InsertAction, LocalRelation, LogicalPlan, MergeIntoTable, OneRowRelation, Project, SetTableLocation, SetTableProperties, ShowTableProperties, SubqueryAlias, UnsetTableProperties, UpdateAction, UpdateTable} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.connector.FakeV2Provider -import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogNotFoundException, Identifier, Table, TableCapability, TableCatalog, V1Table} +import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogNotFoundException, Identifier, SupportsDelete, Table, TableCapability, TableCatalog, V1Table} import org.apache.spark.sql.connector.expressions.Transform import org.apache.spark.sql.execution.datasources.{CreateTable => CreateTableV1} import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation @@ -49,7 +49,7 @@ class PlanResolutionSuite extends AnalysisTest { private val v2Format = classOf[FakeV2Provider].getName private val table: Table = { - val t = mock(classOf[Table]) + val t = mock(classOf[SupportsDelete]) when(t.schema()).thenReturn(new StructType().add("i", "int").add("s", "string")) when(t.partitioning()).thenReturn(Array.empty[Transform]) t From 2ef56c295fc5c5e1060001cfe7158a2d00aa0d91 Mon Sep 17 00:00:00 2001 From: minyyy Date: Wed, 13 Apr 2022 14:01:27 +0800 Subject: [PATCH 133/535] [SPARK-38530][SQL] Fix a bug that GeneratorNestedColumnAliasing can be incorrectly applied to some expressions ### What changes were proposed in this pull request? This PR makes GeneratorNestedColumnAliasing only be able to apply to GetStructField*(_: AttributeReference), here GetStructField* means nested GetStructField. The current way to collect expressions is a top-down way and it actually only checks 2 levels which is wrong. The rule is simple - If we see expressions other than GetStructField, we are done. When an expression E is pushed down into an Explode, the thing happens is: E(x) is now pushed down to apply to E(array(x)). So only expressions that can operate on both x and array(x) can be pushed. GetStructField is special since we have GetArrayStructFields and when GetStructField is pushed down, it becomes GetArrayStructFields. Any other expressions are not applicable. We also do not even need to check the child type is Array(Array()) or whether the rewritten expression has the pattern GetArrayStructFields(GetArrayStructFields()). 1. When the child input type is Array(Array()), the ExtractValues expressions we get will always start from an innermost GetArrayStructFields, it does not align with GetStructField*(x). 2. When we see GetArrayStructFields(GetArrayStructFields()) in the rewritten generator, we must have seen a GetArrayStructFields in the expressions before pushdown. ### Why are the changes needed? It fixes some correctness issues. See the above section for more details. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit tests. Closes #35866 from minyyy/gnca_wrong_expr. Lead-authored-by: minyyy Co-authored-by: minyyy <98760575+minyyy@users.noreply.github.com> Signed-off-by: Wenchen Fan (cherry picked from commit 13edafab9f45cc80aee41e2f82475367d88357ec) Signed-off-by: Wenchen Fan --- .../optimizer/NestedColumnAliasing.scala | 50 ++++++++++++------- .../optimizer/NestedColumnAliasingSuite.scala | 40 ++++++++++++++- 2 files changed, 69 insertions(+), 21 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala index 9cf2925cdd2a6..45f84c21b7d66 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala @@ -240,12 +240,14 @@ object NestedColumnAliasing { */ def getAttributeToExtractValues( exprList: Seq[Expression], - exclusiveAttrs: Seq[Attribute]): Map[Attribute, Seq[ExtractValue]] = { + exclusiveAttrs: Seq[Attribute], + extractor: (Expression) => Seq[Expression] = collectRootReferenceAndExtractValue) + : Map[Attribute, Seq[ExtractValue]] = { val nestedFieldReferences = new mutable.ArrayBuffer[ExtractValue]() val otherRootReferences = new mutable.ArrayBuffer[AttributeReference]() exprList.foreach { e => - collectRootReferenceAndExtractValue(e).foreach { + extractor(e).foreach { // we can not alias the attr from lambda variable whose expr id is not available case ev: ExtractValue if !ev.exists(_.isInstanceOf[NamedLambdaVariable]) => if (ev.references.size == 1) { @@ -350,23 +352,44 @@ object GeneratorNestedColumnAliasing { return None } val generatorOutputSet = AttributeSet(g.qualifiedGeneratorOutput) - val (attrToExtractValuesOnGenerator, attrToExtractValuesNotOnGenerator) = + var (attrToExtractValuesOnGenerator, attrToExtractValuesNotOnGenerator) = attrToExtractValues.partition { case (attr, _) => attr.references.subsetOf(generatorOutputSet) } val pushedThrough = NestedColumnAliasing.rewritePlanWithAliases( plan, attrToExtractValuesNotOnGenerator) - // If the generator output is `ArrayType`, we cannot push through the extractor. - // It is because we don't allow field extractor on two-level array, - // i.e., attr.field when attr is a ArrayType(ArrayType(...)). - // Similarily, we also cannot push through if the child of generator is `MapType`. + // We cannot push through if the child of generator is `MapType`. g.generator.children.head.dataType match { case _: MapType => return Some(pushedThrough) case ArrayType(_: ArrayType, _) => return Some(pushedThrough) case _ => } + // This function collects all GetStructField*(attribute) from the passed in expression. + // GetStructField* means arbitrary levels of nesting. + def collectNestedGetStructFields(e: Expression): Seq[Expression] = { + // The helper function returns a tuple of + // (nested GetStructField including the current level, all other nested GetStructField) + def helper(e: Expression): (Seq[Expression], Seq[Expression]) = e match { + case _: AttributeReference => (Seq(e), Seq.empty) + case gsf: GetStructField => + val child_res = helper(gsf.child) + (child_res._1.map(p => gsf.withNewChildren(Seq(p))), child_res._2) + case other => + val child_res = other.children.map(helper) + val child_res_combined = (child_res.flatMap(_._1), child_res.flatMap(_._2)) + (Seq.empty, child_res_combined._1 ++ child_res_combined._2) + } + + val res = helper(e) + (res._1 ++ res._2).filterNot(_.isInstanceOf[Attribute]) + } + + attrToExtractValuesOnGenerator = NestedColumnAliasing.getAttributeToExtractValues( + attrToExtractValuesOnGenerator.flatMap(_._2).toSeq, Seq.empty, + collectNestedGetStructFields) + // Pruning on `Generator`'s output. We only process single field case. // For multiple field case, we cannot directly move field extractor into // the generator expression. A workaround is to re-construct array of struct @@ -391,17 +414,6 @@ object GeneratorNestedColumnAliasing { e.withNewChildren(Seq(extractor)) } - // If after replacing generator expression with nested extractor, there - // is invalid extractor pattern like - // `GetArrayStructFields(GetArrayStructFields(...), ...), we cannot do - // pruning but fallback to original query plan. - val invalidExtractor = rewrittenG.generator.children.head.collect { - case GetArrayStructFields(_: GetArrayStructFields, _, _, _, _) => true - } - if (invalidExtractor.nonEmpty) { - return Some(pushedThrough) - } - // As we change the child of the generator, its output data type must be updated. val updatedGeneratorOutput = rewrittenG.generatorOutput .zip(rewrittenG.generator.elementSchema.toAttributes) @@ -416,7 +428,7 @@ object GeneratorNestedColumnAliasing { // Replace nested column accessor with generator output. val attrExprIdsOnGenerator = attrToExtractValuesOnGenerator.keys.map(_.exprId).toSet val updatedProject = p.withNewChildren(Seq(updatedGenerate)).transformExpressions { - case f: ExtractValue if nestedFieldsOnGenerator.contains(f) => + case f: GetStructField if nestedFieldsOnGenerator.contains(f) => updatedGenerate.output .find(a => attrExprIdsOnGenerator.contains(a.exprId)) .getOrElse(f) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala index ff3414d901208..42323aac7e869 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala @@ -20,14 +20,14 @@ package org.apache.spark.sql.catalyst.optimizer import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.SchemaPruningTest -import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer +import org.apache.spark.sql.catalyst.analysis.{SimpleAnalyzer, UnresolvedExtractValue} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.Cross import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor -import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} +import org.apache.spark.sql.types.{ArrayType, IntegerType, StringType, StructField, StructType} class NestedColumnAliasingSuite extends SchemaPruningTest { @@ -812,6 +812,42 @@ class NestedColumnAliasingSuite extends SchemaPruningTest { val expected3 = contact.select($"name").rebalance($"name").select($"name.first").analyze comparePlans(optimized3, expected3) } + + test("SPARK-38530: Do not push down nested ExtractValues with other expressions") { + val inputType = StructType.fromDDL( + "a int, b struct, c2: int>") + val simpleStruct = StructType.fromDDL( + "b struct, c2 int>" + ) + val input = LocalRelation( + 'id.int, + 'col1.array(ArrayType(inputType))) + + val query = input + .generate(Explode('col1)) + .select( + UnresolvedExtractValue( + UnresolvedExtractValue( + CaseWhen(Seq(('col.getField("a") === 1, + Literal.default(simpleStruct)))), + Literal("b")), + Literal("c")).as("result")) + .analyze + val optimized = Optimize.execute(query) + + val aliases = collectGeneratedAliases(optimized) + + // Only the inner-most col.a should be pushed down. + val expected = input + .select('col1.getField("a").as(aliases(0))) + .generate(Explode($"${aliases(0)}"), unrequiredChildIndex = Seq(0)) + .select(UnresolvedExtractValue(UnresolvedExtractValue( + CaseWhen(Seq(('col === 1, + Literal.default(simpleStruct)))), Literal("b")), Literal("c")).as("result")) + .analyze + + comparePlans(optimized, expected) + } } object NestedColumnAliasingSuite { From 9db11628ff7c8ebf2fc9bd8c7cd7c36003d8436f Mon Sep 17 00:00:00 2001 From: Xinyi Yu Date: Wed, 13 Apr 2022 14:25:20 +0800 Subject: [PATCH 134/535] [SPARK-37047][SQL][FOLLOWUP] Add legacy flag for the breaking change of lpad and rpad for binary type ### What changes were proposed in this pull request? Add a legacy flag `spark.sql.legacy.lpadRpadForBinaryType.enabled` for the breaking change introduced in https://github.com/apache/spark/pull/34154. The flag is enabled by default. When it is disabled, restore the pre-change behavior that there is no special handling on `BINARY` input types. ### Why are the changes needed? The original commit is a breaking change, and breaking changes should be encouraged to add a flag to turn it off for smooth migration between versions. ### Does this PR introduce _any_ user-facing change? With the default value of the conf, there is no user-facing difference. If users turn this conf off, they can restore the pre-change behavior. ### How was this patch tested? Through unit tests. Closes #36103 from anchovYu/flags-lpad-rpad-binary. Authored-by: Xinyi Yu Signed-off-by: Wenchen Fan (cherry picked from commit e2683c2f3c6e758ef852355533793c707fd5e061) Signed-off-by: Wenchen Fan --- docs/sql-migration-guide.md | 2 +- .../expressions/stringExpressions.scala | 6 +- .../apache/spark/sql/internal/SQLConf.scala | 12 ++++ .../expressions/StringExpressionsSuite.scala | 55 +++++++++++++++++++ .../org/apache/spark/sql/functions.scala | 4 +- 5 files changed, 74 insertions(+), 5 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 32c55d1826e6f..607100b0850b7 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -28,7 +28,7 @@ license: | - Since Spark 3.3, `DayTimeIntervalType` in Spark SQL is mapped to Arrow's `Duration` type in `ArrowWriter` and `ArrowColumnVector` developer APIs. Previously, `DayTimeIntervalType` was mapped to Arrow's `Interval` type which does not match with the types of other languages Spark SQL maps. For example, `DayTimeIntervalType` is mapped to `java.time.Duration` in Java. - - Since Spark 3.3, the functions `lpad` and `rpad` have been overloaded to support byte sequences. When the first argument is a byte sequence, the optional padding pattern must also be a byte sequence and the result is a BINARY value. The default padding pattern in this case is the zero byte. + - Since Spark 3.3, the functions `lpad` and `rpad` have been overloaded to support byte sequences. When the first argument is a byte sequence, the optional padding pattern must also be a byte sequence and the result is a BINARY value. The default padding pattern in this case is the zero byte. To restore the legacy behavior of always returning string types, set `spark.sql.legacy.lpadRpadAlwaysReturnString` to `true`. - Since Spark 3.3, Spark turns a non-nullable schema into nullable for API `DataFrameReader.schema(schema: StructType).json(jsonDataset: Dataset[String])` and `DataFrameReader.schema(schema: StructType).csv(csvDataset: Dataset[String])` when the schema is specified by the user and contains non-nullable fields. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 851261f87a4bc..976caeb3502a9 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -1461,15 +1461,17 @@ case class StringLocate(substr: Expression, str: Expression, start: Expression) trait PadExpressionBuilderBase extends ExpressionBuilder { override def build(funcName: String, expressions: Seq[Expression]): Expression = { + val behaviorChangeEnabled = !SQLConf.get.getConf(SQLConf.LEGACY_LPAD_RPAD_BINARY_TYPE_AS_STRING) val numArgs = expressions.length if (numArgs == 2) { - if (expressions(0).dataType == BinaryType) { + if (expressions(0).dataType == BinaryType && behaviorChangeEnabled) { BinaryPad(funcName, expressions(0), expressions(1), Literal(Array[Byte](0))) } else { createStringPad(expressions(0), expressions(1), Literal(" ")) } } else if (numArgs == 3) { - if (expressions(0).dataType == BinaryType && expressions(2).dataType == BinaryType) { + if (expressions(0).dataType == BinaryType && expressions(2).dataType == BinaryType + && behaviorChangeEnabled) { BinaryPad(funcName, expressions(0), expressions(1), expressions(2)) } else { createStringPad(expressions(0), expressions(1), expressions(2)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 365a9a378cb68..9e4496a2c331f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3712,6 +3712,18 @@ object SQLConf { .booleanConf .createWithDefault(true) + val LEGACY_LPAD_RPAD_BINARY_TYPE_AS_STRING = + buildConf("spark.sql.legacy.lpadRpadAlwaysReturnString") + .internal() + .doc("When set to false, when the first argument and the optional padding pattern is a " + + "byte sequence, the result is a BINARY value. The default padding pattern in this case " + + "is the zero byte. " + + "When set to true, it restores the legacy behavior of always returning string types " + + "even for binary inputs.") + .version("3.3.0") + .booleanConf + .createWithDefault(false) + /** * Holds information about keys that have been deprecated. * diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index 4936bce7bf214..afb05dd4d77b3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -773,6 +773,61 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(StringRPad(Literal("hi"), Literal(1)), "h") } + test("PadExpressionBuilderBase") { + // test if the correct lpad/rpad expression is created given different parameter types + Seq(true, false).foreach { confVal => + SQLConf.get.setConf(SQLConf.LEGACY_LPAD_RPAD_BINARY_TYPE_AS_STRING, confVal) + + val lpadExp1 = LPadExpressionBuilder.build("lpad", Seq(Literal("hi"), Literal(5))) + val lpadExp2 = LPadExpressionBuilder.build("lpad", Seq(Literal(Array[Byte]()), Literal(5))) + val lpadExp3 = LPadExpressionBuilder.build("lpad", + Seq(Literal("hi"), Literal(5), Literal("somepadding"))) + val lpadExp4 = LPadExpressionBuilder.build("lpad", + Seq(Literal(Array[Byte](1, 2)), Literal(5), Literal("somepadding"))) + val lpadExp5 = LPadExpressionBuilder.build("lpad", + Seq(Literal(Array[Byte](1, 2)), Literal(5), Literal(Array[Byte](1)))) + + val rpadExp1 = RPadExpressionBuilder.build("rpad", Seq(Literal("hi"), Literal(5))) + val rpadExp2 = RPadExpressionBuilder.build("rpad", Seq(Literal(Array[Byte]()), Literal(5))) + val rpadExp3 = RPadExpressionBuilder.build("rpad", + Seq(Literal("hi"), Literal(5), Literal("somepadding"))) + val rpadExp4 = RPadExpressionBuilder.build("rpad", + Seq(Literal(Array[Byte](1, 2)), Literal(5), Literal("somepadding"))) + val rpadExp5 = RPadExpressionBuilder.build("rpad", + Seq(Literal(Array[Byte](1, 2)), Literal(5), Literal(Array[Byte](1)))) + + assert(lpadExp1 == StringLPad(Literal("hi"), Literal(5), Literal(" "))) + assert(lpadExp3 == StringLPad(Literal("hi"), Literal(5), Literal("somepadding"))) + assert(lpadExp4 == StringLPad(Literal(Array[Byte](1, 2)), Literal(5), Literal("somepadding"))) + + assert(rpadExp1 == StringRPad(Literal("hi"), Literal(5), Literal(" "))) + assert(rpadExp3 == StringRPad(Literal("hi"), Literal(5), Literal("somepadding"))) + assert(rpadExp4 == StringRPad(Literal(Array[Byte](1, 2)), Literal(5), Literal("somepadding"))) + + if (!SQLConf.get.getConf(SQLConf.LEGACY_LPAD_RPAD_BINARY_TYPE_AS_STRING)) { + assert(lpadExp2 == + BinaryPad("lpad", Literal(Array[Byte]()), Literal(5), Literal(Array[Byte](0)))) + assert(lpadExp5 == + BinaryPad("lpad", Literal(Array[Byte](1, 2)), Literal(5), Literal(Array[Byte](1)))) + + assert(rpadExp2 == + BinaryPad("rpad", Literal(Array[Byte]()), Literal(5), Literal(Array[Byte](0)))) + assert(rpadExp5 == + BinaryPad("rpad", Literal(Array[Byte](1, 2)), Literal(5), Literal(Array[Byte](1)))) + } else { + assert(lpadExp2 == + StringLPad(Literal(Array[Byte]()), Literal(5), Literal(" "))) + assert(lpadExp5 == + StringLPad(Literal(Array[Byte](1, 2)), Literal(5), Literal(Array[Byte](1)))) + + assert(rpadExp2 == + StringRPad(Literal(Array[Byte]()), Literal(5), Literal(" "))) + assert(rpadExp5 == + StringRPad(Literal(Array[Byte](1, 2)), Literal(5), Literal(Array[Byte](1)))) + } + } + } + test("REPEAT") { val s1 = 'a.string.at(0) val s2 = 'b.int.at(1) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 17e1d48bb2cd6..f6c3bc7e3cece 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2776,7 +2776,7 @@ object functions { * @since 3.3.0 */ def lpad(str: Column, len: Int, pad: Array[Byte]): Column = withExpr { - BinaryPad("lpad", str.expr, lit(len).expr, lit(pad).expr) + UnresolvedFunction("lpad", Seq(str.expr, lit(len).expr, lit(pad).expr), isDistinct = false) } /** @@ -2865,7 +2865,7 @@ object functions { * @since 3.3.0 */ def rpad(str: Column, len: Int, pad: Array[Byte]): Column = withExpr { - BinaryPad("rpad", str.expr, lit(len).expr, lit(pad).expr) + UnresolvedFunction("rpad", Seq(str.expr, lit(len).expr, lit(pad).expr), isDistinct = false) } /** From 96c8b4f47c2d0df249efb088882b248b5c230188 Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Wed, 13 Apr 2022 14:41:47 +0800 Subject: [PATCH 135/535] [SPARK-38855][SQL] DS V2 supports push down math functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Currently, Spark have some math functions of ANSI standard. Please refer https://github.com/apache/spark/blob/2f8613f22c0750c00cf1dcfb2f31c431d8dc1be7/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala#L388 These functions show below: `LN`, `EXP`, `POWER`, `SQRT`, `FLOOR`, `CEIL`, `WIDTH_BUCKET` The mainstream databases support these functions show below. | 函数 | PostgreSQL | ClickHouse | H2 | MySQL | Oracle | Redshift | Presto | Teradata | Snowflake | DB2 | Vertica | Exasol | SqlServer | Yellowbrick | Impala | Mariadb | Druid | Pig | SQLite | Influxdata | Singlestore | ElasticSearch | | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | | `LN` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | | `EXP` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | | `POWER` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | No | Yes | Yes | Yes | Yes | | `SQRT` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | | `FLOOR` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | | `CEIL` | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | Yes | | `WIDTH_BUCKET` | Yes | No | No | No | Yes | No | Yes | Yes | Yes | Yes | Yes | No | No | No | Yes | No | No | No | No | No | No | No | DS V2 should supports push down these math functions. ### Why are the changes needed? DS V2 supports push down math functions ### Does this PR introduce _any_ user-facing change? 'No'. New feature. ### How was this patch tested? New tests. Closes #36140 from beliefer/SPARK-38855. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit bf75b495e18ed87d0c118bfd5f1ceb52d720cad9) Signed-off-by: Wenchen Fan --- .../expressions/GeneralScalarExpression.java | 54 +++++++++++++++++++ .../util/V2ExpressionSQLBuilder.java | 7 +++ .../sql/errors/QueryCompilationErrors.scala | 4 ++ .../catalyst/util/V2ExpressionBuilder.scala | 28 +++++++++- .../org/apache/spark/sql/jdbc/H2Dialect.scala | 26 +++++++++ .../apache/spark/sql/jdbc/JDBCV2Suite.scala | 28 +++++++++- 6 files changed, 145 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java index 8952761f9ef34..58082d5ee09c1 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java @@ -94,6 +94,60 @@ *
      • Since version: 3.3.0
      • * * + *
      • Name: ABS + *
          + *
        • SQL semantic: ABS(expr)
        • + *
        • Since version: 3.3.0
        • + *
        + *
      • + *
      • Name: COALESCE + *
          + *
        • SQL semantic: COALESCE(expr1, expr2)
        • + *
        • Since version: 3.3.0
        • + *
        + *
      • + *
      • Name: LN + *
          + *
        • SQL semantic: LN(expr)
        • + *
        • Since version: 3.3.0
        • + *
        + *
      • + *
      • Name: EXP + *
          + *
        • SQL semantic: EXP(expr)
        • + *
        • Since version: 3.3.0
        • + *
        + *
      • + *
      • Name: POWER + *
          + *
        • SQL semantic: POWER(expr, number)
        • + *
        • Since version: 3.3.0
        • + *
        + *
      • + *
      • Name: SQRT + *
          + *
        • SQL semantic: SQRT(expr)
        • + *
        • Since version: 3.3.0
        • + *
        + *
      • + *
      • Name: FLOOR + *
          + *
        • SQL semantic: FLOOR(expr)
        • + *
        • Since version: 3.3.0
        • + *
        + *
      • + *
      • Name: CEIL + *
          + *
        • SQL semantic: CEIL(expr)
        • + *
        • Since version: 3.3.0
        • + *
        + *
      • + *
      • Name: WIDTH_BUCKET + *
          + *
        • SQL semantic: WIDTH_BUCKET(expr)
        • + *
        • Since version: 3.3.0
        • + *
        + *
      • * * Note: SQL semantic conforms ANSI standard, so some expressions are not supported when ANSI off, * including: add, subtract, multiply, divide, remainder, pmod. diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index a7d1ed7f85e84..c9dfa2003e3c1 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -95,6 +95,13 @@ public String build(Expression expr) { return visitUnaryArithmetic(name, inputToSQL(e.children()[0])); case "ABS": case "COALESCE": + case "LN": + case "EXP": + case "POWER": + case "SQRT": + case "FLOOR": + case "CEIL": + case "WIDTH_BUCKET": return visitSQLFunction(name, Arrays.stream(e.children()).map(c -> build(c)).toArray(String[]::new)); case "CASE_WHEN": { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 0532a953ef4e0..f1357f91f9d2f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -2392,4 +2392,8 @@ object QueryCompilationErrors { new AnalysisException( "Sinks cannot request distribution and ordering in continuous execution mode") } + + def noSuchFunctionError(database: String, funcInfo: String): Throwable = { + new AnalysisException(s"$database does not support function: $funcInfo") + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala index 37db499470aa3..487b809d48a01 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.util -import org.apache.spark.sql.catalyst.expressions.{Abs, Add, And, BinaryComparison, BinaryOperator, BitwiseAnd, BitwiseNot, BitwiseOr, BitwiseXor, CaseWhen, Cast, Coalesce, Contains, Divide, EndsWith, EqualTo, Expression, In, InSet, IsNotNull, IsNull, Literal, Multiply, Not, Or, Predicate, Remainder, StartsWith, StringPredicate, Subtract, UnaryMinus} +import org.apache.spark.sql.catalyst.expressions.{Abs, Add, And, BinaryComparison, BinaryOperator, BitwiseAnd, BitwiseNot, BitwiseOr, BitwiseXor, CaseWhen, Cast, Ceil, Coalesce, Contains, Divide, EndsWith, EqualTo, Exp, Expression, Floor, In, InSet, IsNotNull, IsNull, Literal, Log, Multiply, Not, Or, Pow, Predicate, Remainder, Sqrt, StartsWith, StringPredicate, Subtract, UnaryMinus, WidthBucket} import org.apache.spark.sql.connector.expressions.{Cast => V2Cast, Expression => V2Expression, FieldReference, GeneralScalarExpression, LiteralValue} import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse, AlwaysTrue, And => V2And, Not => V2Not, Or => V2Or, Predicate => V2Predicate} import org.apache.spark.sql.execution.datasources.PushableColumn @@ -104,6 +104,32 @@ class V2ExpressionBuilder( } else { None } + case Log(child) => generateExpression(child) + .map(v => new GeneralScalarExpression("LN", Array[V2Expression](v))) + case Exp(child) => generateExpression(child) + .map(v => new GeneralScalarExpression("EXP", Array[V2Expression](v))) + case Pow(left, right) => + val l = generateExpression(left) + val r = generateExpression(right) + if (l.isDefined && r.isDefined) { + Some(new GeneralScalarExpression("POWER", Array[V2Expression](l.get, r.get))) + } else { + None + } + case Sqrt(child) => generateExpression(child) + .map(v => new GeneralScalarExpression("SQRT", Array[V2Expression](v))) + case Floor(child) => generateExpression(child) + .map(v => new GeneralScalarExpression("FLOOR", Array[V2Expression](v))) + case Ceil(child) => generateExpression(child) + .map(v => new GeneralScalarExpression("CEIL", Array[V2Expression](v))) + case wb: WidthBucket => + val childrenExpressions = wb.children.flatMap(generateExpression(_)) + if (childrenExpressions.length == wb.children.length) { + Some(new GeneralScalarExpression("WIDTH_BUCKET", + childrenExpressions.toArray[V2Expression])) + } else { + None + } case and: And => // AND expects predicate val l = generateExpression(and.left, true) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala index 643376cdb126a..0aa971c0d3ab1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala @@ -20,14 +20,40 @@ package org.apache.spark.sql.jdbc import java.sql.SQLException import java.util.Locale +import scala.util.control.NonFatal + import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException} +import org.apache.spark.sql.connector.expressions.Expression import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc} +import org.apache.spark.sql.errors.QueryCompilationErrors private object H2Dialect extends JdbcDialect { override def canHandle(url: String): Boolean = url.toLowerCase(Locale.ROOT).startsWith("jdbc:h2") + class H2SQLBuilder extends JDBCSQLBuilder { + override def visitSQLFunction(funcName: String, inputs: Array[String]): String = { + funcName match { + case "WIDTH_BUCKET" => + val functionInfo = super.visitSQLFunction(funcName, inputs) + throw QueryCompilationErrors.noSuchFunctionError("H2", functionInfo) + case _ => super.visitSQLFunction(funcName, inputs) + } + } + } + + override def compileExpression(expr: Expression): Option[String] = { + val h2SQLBuilder = new H2SQLBuilder() + try { + Some(h2SQLBuilder.build(expr)) + } catch { + case NonFatal(e) => + logWarning("Error occurs while compiling V2 expression", e) + None + } + } + override def compileAggregate(aggFunction: AggregateFunc): Option[String] = { super.compileAggregate(aggFunction).orElse( aggFunction match { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index 858781f2cde1b..e28d9ba9ba815 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Sort} import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanRelation, V1ScanWrapper} import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog -import org.apache.spark.sql.functions.{abs, avg, coalesce, count, count_distinct, lit, not, sum, udf, when} +import org.apache.spark.sql.functions.{abs, avg, ceil, coalesce, count, count_distinct, exp, floor, lit, log => ln, not, pow, sqrt, sum, udf, when} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.util.Utils @@ -440,6 +440,32 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkPushedInfo(df5, expectedPlanFragment5) checkAnswer(df5, Seq(Row(1, "amy", 10000, 1000, true), Row(1, "cathy", 9000, 1200, false), Row(6, "jen", 12000, 1200, true))) + + val df6 = spark.table("h2.test.employee") + .filter(ln($"dept") > 1) + .filter(exp($"salary") > 2000) + .filter(pow($"dept", 2) > 4) + .filter(sqrt($"salary") > 100) + .filter(floor($"dept") > 1) + .filter(ceil($"dept") > 1) + checkFiltersRemoved(df6, ansiMode) + val expectedPlanFragment6 = if (ansiMode) { + "PushedFilters: [DEPT IS NOT NULL, SALARY IS NOT NULL, " + + "LN(CAST(DEPT AS double)) > 1.0, EXP(CAST(SALARY AS double)...," + } else { + "PushedFilters: [DEPT IS NOT NULL, SALARY IS NOT NULL]" + } + checkPushedInfo(df6, expectedPlanFragment6) + checkAnswer(df6, Seq(Row(6, "jen", 12000, 1200, true))) + + // H2 does not support width_bucket + val df7 = sql(""" + |SELECT * FROM h2.test.employee + |WHERE width_bucket(dept, 1, 6, 3) > 1 + |""".stripMargin) + checkFiltersRemoved(df7, false) + checkPushedInfo(df7, "PushedFilters: [DEPT IS NOT NULL]") + checkAnswer(df7, Seq(Row(6, "jen", 12000, 1200, true))) } } } From c44020b961ffe44e30ee617af6ffb84effbd28fe Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Wed, 13 Apr 2022 17:07:27 +0900 Subject: [PATCH 136/535] [SPARK-38833][PYTHON][SQL] Allow applyInPandas to return empty DataFrame without columns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Methods `wrap_cogrouped_map_pandas_udf` and `wrap_grouped_map_pandas_udf` in `python/pyspark/worker.py` do not need to reject `pd.DataFrame`s with no columns return by udf when that DataFrame is empty (zero rows). This allows to return empty DataFrames without the need to define columns. The DataFrame is empty after all! **The proposed behaviour is consistent with the current behaviour of `DataFrame.mapInPandas`.** ### Why are the changes needed? Returning an empty DataFrame from the lambda given to `applyInPandas` should be as easy as this: ```python return pd.DataFrame([]) ``` However, PySpark requires that empty DataFrame to have the right _number_ of columns. This seems redundant as the schema is already defined in the `applyInPandas` call. Returning a non-empty DataFrame does not require defining columns. Behaviour of `applyInPandas` should be consistent with `mapInPandas`. Here is an example to reproduce: ```python import pandas as pd from pyspark.sql.functions import pandas_udf, ceil df = spark.createDataFrame( [(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0), (2, 10.0)], ("id", "v")) def mean_func(key, pdf): if key == (1,): return pd.DataFrame([]) else: return pd.DataFrame([key + (pdf.v.mean(),)]) df.groupby("id").applyInPandas(mean_func, schema="id long, v double").show() ``` ### Does this PR introduce _any_ user-facing change? It changes the behaviour of the following calls to allow returning empty `pd.DataFrame` without defining columns. The PySpark DataFrame returned by `applyInPandas` is unchanged: - `df.groupby(…).applyInPandas(…)` - `df.cogroup(…).applyInPandas(…)` ### How was this patch tested? Tests are added that test `applyInPandas` and `mapInPandas` when returning - empty DataFrame with no columns - empty DataFrame with the wrong number of columns - non-empty DataFrame with wrong number of columns - something other than `pd.DataFrame` NOTE: It is not an error for `mapInPandas` to return DataFrames with more columns than specified in the `mapInPandas` schema. Closes #36120 from EnricoMi/branch-empty-pd-dataframes. Authored-by: Enrico Minack Signed-off-by: Hyukjin Kwon (cherry picked from commit 556c74578eb2379fc6e0ec8d147674d0b10e5a2c) Signed-off-by: Hyukjin Kwon --- .../sql/tests/test_pandas_cogrouped_map.py | 97 +++++++++++++++++++ .../sql/tests/test_pandas_grouped_map.py | 76 +++++++++++++++ python/pyspark/sql/tests/test_pandas_map.py | 71 ++++++++++++-- python/pyspark/worker.py | 12 ++- 4 files changed, 246 insertions(+), 10 deletions(-) diff --git a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py index 58022fa6e838d..3f403d9c9d6fc 100644 --- a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py +++ b/python/pyspark/sql/tests/test_pandas_cogrouped_map.py @@ -20,6 +20,7 @@ from pyspark.sql.functions import array, explode, col, lit, udf, pandas_udf from pyspark.sql.types import DoubleType, StructType, StructField, Row +from pyspark.sql.utils import PythonException from pyspark.testing.sqlutils import ( ReusedSQLTestCase, have_pandas, @@ -124,6 +125,102 @@ def merge_pandas(lft, rgt): assert_frame_equal(expected, result) + def test_apply_in_pandas_not_returning_pandas_dataframe(self): + left = self.data1 + right = self.data2 + + def merge_pandas(lft, rgt): + return lft.size + rgt.size + + with QuietTest(self.sc): + with self.assertRaisesRegex( + PythonException, + "Return type of the user-defined function should be pandas.DataFrame, " + "but is ", + ): + ( + left.groupby("id") + .cogroup(right.groupby("id")) + .applyInPandas(merge_pandas, "id long, k int, v int, v2 int") + .collect() + ) + + def test_apply_in_pandas_returning_wrong_number_of_columns(self): + left = self.data1 + right = self.data2 + + def merge_pandas(lft, rgt): + if 0 in lft["id"] and lft["id"][0] % 2 == 0: + lft["add"] = 0 + if 0 in rgt["id"] and rgt["id"][0] % 3 == 0: + rgt["more"] = 1 + return pd.merge(lft, rgt, on=["id", "k"]) + + with QuietTest(self.sc): + with self.assertRaisesRegex( + PythonException, + "Number of columns of the returned pandas.DataFrame " + "doesn't match specified schema. Expected: 4 Actual: 6", + ): + ( + # merge_pandas returns two columns for even keys while we set schema to four + left.groupby("id") + .cogroup(right.groupby("id")) + .applyInPandas(merge_pandas, "id long, k int, v int, v2 int") + .collect() + ) + + def test_apply_in_pandas_returning_empty_dataframe(self): + left = self.data1 + right = self.data2 + + def merge_pandas(lft, rgt): + if 0 in lft["id"] and lft["id"][0] % 2 == 0: + return pd.DataFrame([]) + if 0 in rgt["id"] and rgt["id"][0] % 3 == 0: + return pd.DataFrame([]) + return pd.merge(lft, rgt, on=["id", "k"]) + + result = ( + left.groupby("id") + .cogroup(right.groupby("id")) + .applyInPandas(merge_pandas, "id long, k int, v int, v2 int") + .sort(["id", "k"]) + .toPandas() + ) + + left = left.toPandas() + right = right.toPandas() + + expected = pd.merge( + left[left["id"] % 2 != 0], right[right["id"] % 3 != 0], on=["id", "k"] + ).sort_values(by=["id", "k"]) + + assert_frame_equal(expected, result) + + def test_apply_in_pandas_returning_empty_dataframe_and_wrong_number_of_columns(self): + left = self.data1 + right = self.data2 + + def merge_pandas(lft, rgt): + if 0 in lft["id"] and lft["id"][0] % 2 == 0: + return pd.DataFrame([], columns=["id", "k"]) + return pd.merge(lft, rgt, on=["id", "k"]) + + with QuietTest(self.sc): + with self.assertRaisesRegex( + PythonException, + "Number of columns of the returned pandas.DataFrame doesn't " + "match specified schema. Expected: 4 Actual: 2", + ): + ( + # merge_pandas returns two columns for even keys while we set schema to four + left.groupby("id") + .cogroup(right.groupby("id")) + .applyInPandas(merge_pandas, "id long, k int, v int, v2 int") + .collect() + ) + def test_mixed_scalar_udfs_followed_by_cogrouby_apply(self): df = self.spark.range(0, 10).toDF("v1") df = df.withColumn("v2", udf(lambda x: x + 1, "int")(df["v1"])).withColumn( diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map.py b/python/pyspark/sql/tests/test_pandas_grouped_map.py index bc1593069ed14..4fd5207f73a7b 100644 --- a/python/pyspark/sql/tests/test_pandas_grouped_map.py +++ b/python/pyspark/sql/tests/test_pandas_grouped_map.py @@ -51,6 +51,7 @@ NullType, TimestampType, ) +from pyspark.sql.utils import PythonException from pyspark.testing.sqlutils import ( ReusedSQLTestCase, have_pandas, @@ -268,6 +269,81 @@ def normalize(pdf): expected = expected.assign(norm=expected.norm.astype("float64")) assert_frame_equal(expected, result) + def test_apply_in_pandas_not_returning_pandas_dataframe(self): + df = self.data + + def stats(key, _): + return key + + with QuietTest(self.sc): + with self.assertRaisesRegex( + PythonException, + "Return type of the user-defined function should be pandas.DataFrame, " + "but is ", + ): + df.groupby("id").applyInPandas(stats, schema="id integer, m double").collect() + + def test_apply_in_pandas_returning_wrong_number_of_columns(self): + df = self.data + + def stats(key, pdf): + v = pdf.v + # returning three columns + res = pd.DataFrame([key + (v.mean(), v.std())]) + return res + + with QuietTest(self.sc): + with self.assertRaisesRegex( + PythonException, + "Number of columns of the returned pandas.DataFrame doesn't match " + "specified schema. Expected: 2 Actual: 3", + ): + # stats returns three columns while here we set schema with two columns + df.groupby("id").applyInPandas(stats, schema="id integer, m double").collect() + + def test_apply_in_pandas_returning_empty_dataframe(self): + df = self.data + + def odd_means(key, pdf): + if key[0] % 2 == 0: + return pd.DataFrame([]) + else: + return pd.DataFrame([key + (pdf.v.mean(),)]) + + expected_ids = {row[0] for row in self.data.collect() if row[0] % 2 != 0} + + result = ( + df.groupby("id") + .applyInPandas(odd_means, schema="id integer, m double") + .sort("id", "m") + .collect() + ) + + actual_ids = {row[0] for row in result} + self.assertSetEqual(expected_ids, actual_ids) + + self.assertEqual(len(expected_ids), len(result)) + for row in result: + self.assertEqual(24.5, row[1]) + + def test_apply_in_pandas_returning_empty_dataframe_and_wrong_number_of_columns(self): + df = self.data + + def odd_means(key, pdf): + if key[0] % 2 == 0: + return pd.DataFrame([], columns=["id"]) + else: + return pd.DataFrame([key + (pdf.v.mean(),)]) + + with QuietTest(self.sc): + with self.assertRaisesRegex( + PythonException, + "Number of columns of the returned pandas.DataFrame doesn't match " + "specified schema. Expected: 2 Actual: 1", + ): + # stats returns one column for even keys while here we set schema with two columns + df.groupby("id").applyInPandas(odd_means, schema="id integer, m double").collect() + def test_datatype_string(self): df = self.data diff --git a/python/pyspark/sql/tests/test_pandas_map.py b/python/pyspark/sql/tests/test_pandas_map.py index 360d20050a3ad..11da879da3828 100644 --- a/python/pyspark/sql/tests/test_pandas_map.py +++ b/python/pyspark/sql/tests/test_pandas_map.py @@ -22,6 +22,8 @@ from typing import cast from pyspark.sql import Row +from pyspark.sql.functions import lit +from pyspark.sql.utils import PythonException from pyspark.testing.sqlutils import ( ReusedSQLTestCase, have_pandas, @@ -29,6 +31,7 @@ pandas_requirement_message, pyarrow_requirement_message, ) +from pyspark.testing.utils import QuietTest if have_pandas: import pandas as pd @@ -60,14 +63,14 @@ def tearDownClass(cls): time.tzset() ReusedSQLTestCase.tearDownClass() - def test_map_partitions_in_pandas(self): + def test_map_in_pandas(self): def func(iterator): for pdf in iterator: assert isinstance(pdf, pd.DataFrame) assert pdf.columns == ["id"] yield pdf - df = self.spark.range(10) + df = self.spark.range(10, numPartitions=3) actual = df.mapInPandas(func, "id long").collect() expected = df.collect() self.assertEqual(actual, expected) @@ -95,17 +98,69 @@ def func(iterator): actual = df.repartition(1).mapInPandas(func, "a long").collect() self.assertEqual(set((r.a for r in actual)), set(range(100))) + def test_other_than_dataframe(self): + def bad_iter(_): + return iter([1]) + + with QuietTest(self.sc): + with self.assertRaisesRegex( + PythonException, + "Return type of the user-defined function should be Pandas.DataFrame, " + "but is ", + ): + ( + self.spark.range(10, numPartitions=3) + .mapInPandas(bad_iter, "a int, b string") + .count() + ) + def test_empty_iterator(self): def empty_iter(_): return iter([]) - self.assertEqual(self.spark.range(10).mapInPandas(empty_iter, "a int, b string").count(), 0) + mapped = self.spark.range(10, numPartitions=3).mapInPandas(empty_iter, "a int, b string") + self.assertEqual(mapped.count(), 0) - def test_empty_rows(self): - def empty_rows(_): + def test_empty_dataframes(self): + def empty_dataframes(_): return iter([pd.DataFrame({"a": []})]) - self.assertEqual(self.spark.range(10).mapInPandas(empty_rows, "a int").count(), 0) + mapped = self.spark.range(10, numPartitions=3).mapInPandas(empty_dataframes, "a int") + self.assertEqual(mapped.count(), 0) + + def test_empty_dataframes_without_columns(self): + def empty_dataframes_wo_columns(iterator): + for pdf in iterator: + yield pdf + # after yielding all elements of the iterator, also yield one dataframe without columns + yield pd.DataFrame([]) + + mapped = ( + self.spark.range(10, numPartitions=3) + .toDF("id") + .mapInPandas(empty_dataframes_wo_columns, "id int") + ) + self.assertEqual(mapped.count(), 10) + + def test_empty_dataframes_with_less_columns(self): + def empty_dataframes_with_less_columns(iterator): + for pdf in iterator: + yield pdf + # after yielding all elements of the iterator, also yield a dataframe with less columns + yield pd.DataFrame([(1,)], columns=["id"]) + + with QuietTest(self.sc): + with self.assertRaisesRegex( + PythonException, + "KeyError: 'value'", + ): + ( + self.spark.range(10, numPartitions=3) + .withColumn("value", lit(0)) + .toDF("id", "value") + .mapInPandas(empty_dataframes_with_less_columns, "id int, value int") + .collect() + ) def test_chain_map_partitions_in_pandas(self): def func(iterator): @@ -114,14 +169,14 @@ def func(iterator): assert pdf.columns == ["id"] yield pdf - df = self.spark.range(10) + df = self.spark.range(10, numPartitions=3) actual = df.mapInPandas(func, "id long").mapInPandas(func, "id long").collect() expected = df.collect() self.assertEqual(actual, expected) def test_self_join(self): # SPARK-34319: self-join with MapInPandas - df1 = self.spark.range(10) + df1 = self.spark.range(10, numPartitions=3) df2 = df1.mapInPandas(lambda iter: iter, "id long") actual = df2.join(df2).collect() expected = df1.join(df1).collect() diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py index 8784abfb33379..c486b7bed1d81 100644 --- a/python/pyspark/worker.py +++ b/python/pyspark/worker.py @@ -162,7 +162,11 @@ def wrapped(left_key_series, left_value_series, right_key_series, right_value_se "Return type of the user-defined function should be " "pandas.DataFrame, but is {}".format(type(result)) ) - if not len(result.columns) == len(return_type): + # the number of columns of result have to match the return type + # but it is fine for result to have no columns at all if it is empty + if not ( + len(result.columns) == len(return_type) or len(result.columns) == 0 and result.empty + ): raise RuntimeError( "Number of columns of the returned pandas.DataFrame " "doesn't match specified schema. " @@ -188,7 +192,11 @@ def wrapped(key_series, value_series): "Return type of the user-defined function should be " "pandas.DataFrame, but is {}".format(type(result)) ) - if not len(result.columns) == len(return_type): + # the number of columns of result have to match the return type + # but it is fine for result to have no columns at all if it is empty + if not ( + len(result.columns) == len(return_type) or len(result.columns) == 0 and result.empty + ): raise RuntimeError( "Number of columns of the returned pandas.DataFrame " "doesn't match specified schema. " From 76f40eef8b97e23f4a16e471366ae410a3e6cc20 Mon Sep 17 00:00:00 2001 From: Ivan Sadikov Date: Wed, 13 Apr 2022 17:06:03 +0800 Subject: [PATCH 137/535] [SPARK-38829][SQL][3.3] Remove TimestampNTZ type support in Parquet for Spark 3.3 ### What changes were proposed in this pull request? This is a follow-up for https://github.com/apache/spark/pull/36094. I added `Utils.isTesting` whenever we perform schema conversion or row conversion for TimestampNTZType. I verified that the tests, e.g. ParquetIOSuite, fail with unsupported data type when running in non-testing mode: ``` [info] Cause: org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 40.0 failed 1 times, most recent failure: Lost task 1.0 in stage 40.0 (TID 66) (ip-10-110-16-208.us-west-2.compute.internal executor driver): org.apache.spark.sql.AnalysisException: Unsupported data type timestamp_ntz [info] at org.apache.spark.sql.errors.QueryCompilationErrors$.cannotConvertDataTypeToParquetTypeError(QueryCompilationErrors.scala:1304) [info] at org.apache.spark.sql.execution.datasources.parquet.SparkToParquetSchemaConverter.convertField(ParquetSchemaConverter.scala:707) [info] at org.apache.spark.sql.execution.datasources.parquet.SparkToParquetSchemaConverter.convertField(ParquetSchemaConverter.scala:479) [info] at org.apache.spark.sql.execution.datasources.parquet.SparkToParquetSchemaConverter.$anonfun$convert$1(ParquetSchemaConverter.scala:471) ``` ### Why are the changes needed? We have to disable TimestampNTZType as other parts of the codebase do not yet support this type. ### Does this PR introduce _any_ user-facing change? No, the TimestampNTZ type is not released yet. ### How was this patch tested? I tested the changes manually by rerunning the test suites that verify TimestampNTZType in the non-testing mode. Closes #36137 from sadikovi/SPARK-38829-parquet-ntz-off. Authored-by: Ivan Sadikov Signed-off-by: Gengliang Wang --- .../datasources/parquet/ParquetRowConverter.scala | 5 ++++- .../datasources/parquet/ParquetSchemaConverter.scala | 7 +++++-- .../datasources/parquet/ParquetWriteSupport.scala | 4 +++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala index a955dd6fc76a3..ffd90fd722bda 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala @@ -41,6 +41,7 @@ import org.apache.spark.sql.execution.datasources.DataSourceUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String +import org.apache.spark.util.Utils /** * A [[ParentContainerUpdater]] is used by a Parquet converter to set converted values to some @@ -487,7 +488,9 @@ private[parquet] class ParquetRowConverter( parquetType.asPrimitiveType().getPrimitiveTypeName == INT64 && parquetType.getLogicalTypeAnnotation.isInstanceOf[TimestampLogicalTypeAnnotation] && !parquetType.getLogicalTypeAnnotation - .asInstanceOf[TimestampLogicalTypeAnnotation].isAdjustedToUTC + .asInstanceOf[TimestampLogicalTypeAnnotation].isAdjustedToUTC && + // SPARK-38829: Remove TimestampNTZ type support in Parquet for Spark 3.3 + Utils.isTesting /** * Parquet converter for strings. A dictionary is used to minimize string decoding cost. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala index 0e065f19a88a4..3419bf15f8e97 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala @@ -30,6 +30,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils /** * This converter class is used to convert Parquet [[MessageType]] to Spark SQL [[StructType]] @@ -253,7 +254,8 @@ class ParquetToSparkSchemaConverter( if (timestamp.isAdjustedToUTC) { TimestampType } else { - TimestampNTZType + // SPARK-38829: Remove TimestampNTZ type support in Parquet for Spark 3.3 + if (Utils.isTesting) TimestampNTZType else TimestampType } case _ => illegalType() } @@ -547,7 +549,8 @@ class SparkToParquetSchemaConverter( .as(LogicalTypeAnnotation.timestampType(true, TimeUnit.MILLIS)).named(field.name) } - case TimestampNTZType => + // SPARK-38829: Remove TimestampNTZ type support in Parquet for Spark 3.3 + case TimestampNTZType if Utils.isTesting => Types.primitive(INT64, repetition) .as(LogicalTypeAnnotation.timestampType(false, TimeUnit.MICROS)).named(field.name) case BinaryType => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala index 9d38c967cb031..e71863657dd25 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala @@ -39,6 +39,7 @@ import org.apache.spark.sql.execution.datasources.DataSourceUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy import org.apache.spark.sql.types._ +import org.apache.spark.util.Utils /** * A Parquet [[WriteSupport]] implementation that writes Catalyst [[InternalRow]]s as Parquet @@ -227,7 +228,8 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging { recordConsumer.addLong(millis) } - case TimestampNTZType => + // SPARK-38829: Remove TimestampNTZ type support in Parquet for Spark 3.3 + case TimestampNTZType if Utils.isTesting => // For TimestampNTZType column, Spark always output as INT64 with Timestamp annotation in // MICROS time unit. (row: SpecializedGetters, ordinal: Int) => recordConsumer.addLong(row.getLong(ordinal)) From baaa3bbecd9f63aa0a71cf76de4b53d3c1dcf7a4 Mon Sep 17 00:00:00 2001 From: dch nguyen Date: Thu, 14 Apr 2022 02:03:24 +0200 Subject: [PATCH 138/535] [SPARK-37014][PYTHON] Inline type hints for python/pyspark/streaming/context.py ### What changes were proposed in this pull request? Inline type hints for python/pyspark/streaming/context.py from Inline type hints for python/pyspark/streaming/context.pyi. ### Why are the changes needed? Currently, there is type hint stub files python/pyspark/streaming/context.pyi to show the expected types for functions, but we can also take advantage of static type checking within the functions by inlining the type hints. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing test. Closes #34293 from dchvn/SPARK-37014. Authored-by: dch nguyen Signed-off-by: zero323 (cherry picked from commit c0c1f35cd9279bc1a7a50119be72a297162a9b55) Signed-off-by: zero323 --- python/pyspark/streaming/context.py | 123 +++++++++++++++++++-------- python/pyspark/streaming/context.pyi | 71 ---------------- python/pyspark/streaming/kinesis.py | 9 +- 3 files changed, 91 insertions(+), 112 deletions(-) delete mode 100644 python/pyspark/streaming/context.pyi diff --git a/python/pyspark/streaming/context.py b/python/pyspark/streaming/context.py index cc9875d6575e1..52e5efed06308 100644 --- a/python/pyspark/streaming/context.py +++ b/python/pyspark/streaming/context.py @@ -14,18 +14,22 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from typing import Any, Callable, List, Optional, TypeVar -from py4j.java_gateway import java_import, is_instance_of +from py4j.java_gateway import java_import, is_instance_of, JavaObject from pyspark import RDD, SparkConf from pyspark.serializers import NoOpSerializer, UTF8Deserializer, CloudPickleSerializer from pyspark.context import SparkContext from pyspark.storagelevel import StorageLevel from pyspark.streaming.dstream import DStream +from pyspark.streaming.listener import StreamingListener from pyspark.streaming.util import TransformFunction, TransformFunctionSerializer __all__ = ["StreamingContext"] +T = TypeVar("T") + class StreamingContext: """ @@ -51,27 +55,35 @@ class StreamingContext: # Reference to a currently active StreamingContext _activeContext = None - def __init__(self, sparkContext, batchDuration=None, jssc=None): - + def __init__( + self, + sparkContext: SparkContext, + batchDuration: Optional[int] = None, + jssc: Optional[JavaObject] = None, + ): self._sc = sparkContext self._jvm = self._sc._jvm self._jssc = jssc or self._initialize_context(self._sc, batchDuration) - def _initialize_context(self, sc, duration): + def _initialize_context(self, sc: SparkContext, duration: Optional[int]) -> JavaObject: self._ensure_initialized() + assert self._jvm is not None and duration is not None return self._jvm.JavaStreamingContext(sc._jsc, self._jduration(duration)) - def _jduration(self, seconds): + def _jduration(self, seconds: int) -> JavaObject: """ Create Duration object given number of seconds """ + assert self._jvm is not None return self._jvm.Duration(int(seconds * 1000)) @classmethod - def _ensure_initialized(cls): + def _ensure_initialized(cls) -> None: SparkContext._ensure_initialized() gw = SparkContext._gateway + assert gw is not None + java_import(gw.jvm, "org.apache.spark.streaming.*") java_import(gw.jvm, "org.apache.spark.streaming.api.java.*") java_import(gw.jvm, "org.apache.spark.streaming.api.python.*") @@ -83,11 +95,15 @@ def _ensure_initialized(cls): # register serializer for TransformFunction # it happens before creating SparkContext when loading from checkpointing cls._transformerSerializer = TransformFunctionSerializer( - SparkContext._active_spark_context, CloudPickleSerializer(), gw + SparkContext._active_spark_context, + CloudPickleSerializer(), + gw, ) @classmethod - def getOrCreate(cls, checkpointPath, setupFunc): + def getOrCreate( + cls, checkpointPath: str, setupFunc: Callable[[], "StreamingContext"] + ) -> "StreamingContext": """ Either recreate a StreamingContext from checkpoint data or create a new StreamingContext. If checkpoint data exists in the provided `checkpointPath`, then StreamingContext will be @@ -104,6 +120,8 @@ def getOrCreate(cls, checkpointPath, setupFunc): cls._ensure_initialized() gw = SparkContext._gateway + assert gw is not None + # Check whether valid checkpoint information exists in the given path ssc_option = gw.jvm.StreamingContextPythonHelper().tryRecoverFromCheckpoint(checkpointPath) if ssc_option.isEmpty(): @@ -121,12 +139,15 @@ def getOrCreate(cls, checkpointPath, setupFunc): sc = SparkContext._active_spark_context + assert sc is not None + # update ctx in serializer + assert cls._transformerSerializer is not None cls._transformerSerializer.ctx = sc return StreamingContext(sc, None, jssc) @classmethod - def getActive(cls): + def getActive(cls) -> Optional["StreamingContext"]: """ Return either the currently active StreamingContext (i.e., if there is a context started but not stopped) or None. @@ -149,7 +170,9 @@ def getActive(cls): return cls._activeContext @classmethod - def getActiveOrCreate(cls, checkpointPath, setupFunc): + def getActiveOrCreate( + cls, checkpointPath: str, setupFunc: Callable[[], "StreamingContext"] + ) -> "StreamingContext": """ Either return the active StreamingContext (i.e. currently started but not stopped), or recreate a StreamingContext from checkpoint data or create a new StreamingContext @@ -178,20 +201,20 @@ def getActiveOrCreate(cls, checkpointPath, setupFunc): return setupFunc() @property - def sparkContext(self): + def sparkContext(self) -> SparkContext: """ Return SparkContext which is associated with this StreamingContext. """ return self._sc - def start(self): + def start(self) -> None: """ Start the execution of the streams. """ self._jssc.start() StreamingContext._activeContext = self - def awaitTermination(self, timeout=None): + def awaitTermination(self, timeout: Optional[int] = None) -> None: """ Wait for the execution to stop. @@ -205,7 +228,7 @@ def awaitTermination(self, timeout=None): else: self._jssc.awaitTerminationOrTimeout(int(timeout * 1000)) - def awaitTerminationOrTimeout(self, timeout): + def awaitTerminationOrTimeout(self, timeout: int) -> None: """ Wait for the execution to stop. Return `true` if it's stopped; or throw the reported error during the execution; or `false` if the @@ -218,7 +241,7 @@ def awaitTerminationOrTimeout(self, timeout): """ return self._jssc.awaitTerminationOrTimeout(int(timeout * 1000)) - def stop(self, stopSparkContext=True, stopGraceFully=False): + def stop(self, stopSparkContext: bool = True, stopGraceFully: bool = False) -> None: """ Stop the execution of the streams, with option of ensuring all received data has been processed. @@ -236,7 +259,7 @@ def stop(self, stopSparkContext=True, stopGraceFully=False): if stopSparkContext: self._sc.stop() - def remember(self, duration): + def remember(self, duration: int) -> None: """ Set each DStreams in this context to remember RDDs it generated in the last given duration. DStreams remember RDDs only for a @@ -252,7 +275,7 @@ def remember(self, duration): """ self._jssc.remember(self._jduration(duration)) - def checkpoint(self, directory): + def checkpoint(self, directory: str) -> None: """ Sets the context to periodically checkpoint the DStream operations for master fault-tolerance. The graph will be checkpointed every batch interval. @@ -264,7 +287,9 @@ def checkpoint(self, directory): """ self._jssc.checkpoint(directory) - def socketTextStream(self, hostname, port, storageLevel=StorageLevel.MEMORY_AND_DISK_2): + def socketTextStream( + self, hostname: str, port: int, storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_2 + ) -> "DStream[str]": """ Create an input from TCP source hostname:port. Data is received using a TCP socket and receive byte is interpreted as UTF8 encoded ``\\n`` delimited @@ -284,7 +309,7 @@ def socketTextStream(self, hostname, port, storageLevel=StorageLevel.MEMORY_AND_ self._jssc.socketTextStream(hostname, port, jlevel), self, UTF8Deserializer() ) - def textFileStream(self, directory): + def textFileStream(self, directory: str) -> "DStream[str]": """ Create an input stream that monitors a Hadoop-compatible file system for new files and reads them as text files. Files must be written to the @@ -294,7 +319,7 @@ def textFileStream(self, directory): """ return DStream(self._jssc.textFileStream(directory), self, UTF8Deserializer()) - def binaryRecordsStream(self, directory, recordLength): + def binaryRecordsStream(self, directory: str, recordLength: int) -> "DStream[bytes]": """ Create an input stream that monitors a Hadoop-compatible file system for new files and reads them as flat binary files with records of @@ -313,14 +338,19 @@ def binaryRecordsStream(self, directory, recordLength): self._jssc.binaryRecordsStream(directory, recordLength), self, NoOpSerializer() ) - def _check_serializers(self, rdds): + def _check_serializers(self, rdds: List[RDD[T]]) -> None: # make sure they have same serializer if len(set(rdd._jrdd_deserializer for rdd in rdds)) > 1: for i in range(len(rdds)): # reset them to sc.serializer rdds[i] = rdds[i]._reserialize() - def queueStream(self, rdds, oneAtATime=True, default=None): + def queueStream( + self, + rdds: List[RDD[T]], + oneAtATime: bool = True, + default: Optional[RDD[T]] = None, + ) -> "DStream[T]": """ Create an input stream from a queue of RDDs or list. In each batch, it will process either one or all of the RDDs returned by the queue. @@ -339,42 +369,48 @@ def queueStream(self, rdds, oneAtATime=True, default=None): Changes to the queue after the stream is created will not be recognized. """ if default and not isinstance(default, RDD): - default = self._sc.parallelize(default) + default = self._sc.parallelize(default) # type: ignore[arg-type] if not rdds and default: - rdds = [rdds] + rdds = [rdds] # type: ignore[list-item] if rdds and not isinstance(rdds[0], RDD): - rdds = [self._sc.parallelize(input) for input in rdds] + rdds = [self._sc.parallelize(input) for input in rdds] # type: ignore[arg-type] self._check_serializers(rdds) + assert self._jvm is not None queue = self._jvm.PythonDStream.toRDDQueue([r._jrdd for r in rdds]) if default: default = default._reserialize(rdds[0]._jrdd_deserializer) + assert default is not None jdstream = self._jssc.queueStream(queue, oneAtATime, default._jrdd) else: jdstream = self._jssc.queueStream(queue, oneAtATime) return DStream(jdstream, self, rdds[0]._jrdd_deserializer) - def transform(self, dstreams, transformFunc): + def transform( + self, dstreams: List["DStream[Any]"], transformFunc: Callable[..., RDD[T]] + ) -> "DStream[T]": """ Create a new DStream in which each RDD is generated by applying a function on RDDs of the DStreams. The order of the JavaRDDs in the transform function parameter will be the same as the order of corresponding DStreams in the list. """ - jdstreams = [d._jdstream for d in dstreams] + jdstreams = [d._jdstream for d in dstreams] # type: ignore[attr-defined] # change the final serializer to sc.serializer func = TransformFunction( self._sc, lambda t, *rdds: transformFunc(rdds), - *[d._jrdd_deserializer for d in dstreams], + *[d._jrdd_deserializer for d in dstreams], # type: ignore[attr-defined] ) + + assert self._jvm is not None jfunc = self._jvm.TransformFunction(func) jdstream = self._jssc.transform(jdstreams, jfunc) return DStream(jdstream, self, self._sc.serializer) - def union(self, *dstreams): + def union(self, *dstreams: "DStream[T]") -> "DStream[T]": """ Create a unified DStream from multiple DStreams of the same type and same slide duration. @@ -383,30 +419,43 @@ def union(self, *dstreams): raise ValueError("should have at least one DStream to union") if len(dstreams) == 1: return dstreams[0] - if len(set(s._jrdd_deserializer for s in dstreams)) > 1: + if len(set(s._jrdd_deserializer for s in dstreams)) > 1: # type: ignore[attr-defined] raise ValueError("All DStreams should have same serializer") - if len(set(s._slideDuration for s in dstreams)) > 1: + if len(set(s._slideDuration for s in dstreams)) > 1: # type: ignore[attr-defined] raise ValueError("All DStreams should have same slide duration") + + assert SparkContext._jvm is not None jdstream_cls = SparkContext._jvm.org.apache.spark.streaming.api.java.JavaDStream jpair_dstream_cls = SparkContext._jvm.org.apache.spark.streaming.api.java.JavaPairDStream gw = SparkContext._gateway - if is_instance_of(gw, dstreams[0]._jdstream, jdstream_cls): + if is_instance_of(gw, dstreams[0]._jdstream, jdstream_cls): # type: ignore[attr-defined] cls = jdstream_cls - elif is_instance_of(gw, dstreams[0]._jdstream, jpair_dstream_cls): + elif is_instance_of( + gw, dstreams[0]._jdstream, jpair_dstream_cls # type: ignore[attr-defined] + ): cls = jpair_dstream_cls else: - cls_name = dstreams[0]._jdstream.getClass().getCanonicalName() + cls_name = ( + dstreams[0]._jdstream.getClass().getCanonicalName() # type: ignore[attr-defined] + ) raise TypeError("Unsupported Java DStream class %s" % cls_name) + + assert gw is not None jdstreams = gw.new_array(cls, len(dstreams)) for i in range(0, len(dstreams)): - jdstreams[i] = dstreams[i]._jdstream - return DStream(self._jssc.union(jdstreams), self, dstreams[0]._jrdd_deserializer) + jdstreams[i] = dstreams[i]._jdstream # type: ignore[attr-defined] + return DStream( + self._jssc.union(jdstreams), + self, + dstreams[0]._jrdd_deserializer, # type: ignore[attr-defined] + ) - def addStreamingListener(self, streamingListener): + def addStreamingListener(self, streamingListener: StreamingListener) -> None: """ Add a [[org.apache.spark.streaming.scheduler.StreamingListener]] object for receiving system events related to streaming. """ + assert self._jvm is not None self._jssc.addStreamingListener( self._jvm.JavaStreamingListenerWrapper( self._jvm.PythonStreamingListenerWrapper(streamingListener) diff --git a/python/pyspark/streaming/context.pyi b/python/pyspark/streaming/context.pyi deleted file mode 100644 index 0d1b2aca7395f..0000000000000 --- a/python/pyspark/streaming/context.pyi +++ /dev/null @@ -1,71 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import Any, Callable, List, Optional, TypeVar - -from py4j.java_gateway import JavaObject - -from pyspark.context import SparkContext -from pyspark.rdd import RDD -from pyspark.storagelevel import StorageLevel -from pyspark.streaming.dstream import DStream -from pyspark.streaming.listener import StreamingListener - -T = TypeVar("T") - -class StreamingContext: - def __init__( - self, - sparkContext: SparkContext, - batchDuration: int = ..., - jssc: Optional[JavaObject] = ..., - ) -> None: ... - @classmethod - def getOrCreate( - cls, checkpointPath: str, setupFunc: Callable[[], StreamingContext] - ) -> StreamingContext: ... - @classmethod - def getActive(cls) -> StreamingContext: ... - @classmethod - def getActiveOrCreate( - cls, checkpointPath: str, setupFunc: Callable[[], StreamingContext] - ) -> StreamingContext: ... - @property - def sparkContext(self) -> SparkContext: ... - def start(self) -> None: ... - def awaitTermination(self, timeout: Optional[int] = ...) -> None: ... - def awaitTerminationOrTimeout(self, timeout: int) -> None: ... - def stop(self, stopSparkContext: bool = ..., stopGraceFully: bool = ...) -> None: ... - def remember(self, duration: int) -> None: ... - def checkpoint(self, directory: str) -> None: ... - def socketTextStream( - self, hostname: str, port: int, storageLevel: StorageLevel = ... - ) -> DStream[str]: ... - def textFileStream(self, directory: str) -> DStream[str]: ... - def binaryRecordsStream(self, directory: str, recordLength: int) -> DStream[bytes]: ... - def queueStream( - self, - rdds: List[RDD[T]], - oneAtATime: bool = ..., - default: Optional[RDD[T]] = ..., - ) -> DStream[T]: ... - def transform( - self, dstreams: List[DStream[Any]], transformFunc: Callable[..., RDD[T]] - ) -> DStream[T]: ... - def union(self, *dstreams: DStream[T]) -> DStream[T]: ... - def addStreamingListener(self, streamingListener: StreamingListener) -> None: ... diff --git a/python/pyspark/streaming/kinesis.py b/python/pyspark/streaming/kinesis.py index 26d66c394ab83..150fb79f5727b 100644 --- a/python/pyspark/streaming/kinesis.py +++ b/python/pyspark/streaming/kinesis.py @@ -153,10 +153,11 @@ def createStream( The given AWS credentials will get saved in DStream checkpoints if checkpointing is enabled. Make sure that your checkpoint directory is secure. """ - jlevel = ssc._sc._getJavaStorageLevel(storageLevel) # type: ignore[attr-defined] - jduration = ssc._jduration(checkpointInterval) # type: ignore[attr-defined] + jlevel = ssc._sc._getJavaStorageLevel(storageLevel) + jduration = ssc._jduration(checkpointInterval) - jvm = ssc._jvm # type: ignore[attr-defined] + jvm = ssc._jvm + assert jvm is not None try: helper = jvm.org.apache.spark.streaming.kinesis.KinesisUtilsPythonHelper() @@ -170,7 +171,7 @@ def createStream( ) raise jstream = helper.createStream( - ssc._jssc, # type: ignore[attr-defined] + ssc._jssc, kinesisAppName, streamName, endpointUrl, From 30c6802574e5993e6f0f10d4c189c6e8325bcc5c Mon Sep 17 00:00:00 2001 From: allisonwang-db Date: Thu, 14 Apr 2022 13:11:00 +0900 Subject: [PATCH 139/535] [SPARK-38889][SQL] Compile boolean column filters to use the bit type for MSSQL data source ### What changes were proposed in this pull request? This PR compiles the boolean data type to the bit data type for pushed column filters while querying the MSSQL data soruce. Microsoft SQL Server does not support the boolean type, so the JDBC dialect should use the bit data type instead. ### Why are the changes needed? To fix a bug that was exposed by the boolean column filter pushdown to SQL server data source. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added a new integration test. Closes #36182 from allisonwang-db/spark-38889-mssql-predicate-pushdown. Authored-by: allisonwang-db Signed-off-by: Hyukjin Kwon (cherry picked from commit 320f88d54440e05228a90ef5663991e28ae07c95) Signed-off-by: Hyukjin Kwon --- .../sql/jdbc/MsSqlServerIntegrationSuite.scala | 17 +++++++++++++++++ .../spark/sql/jdbc/MsSqlServerDialect.scala | 10 ++++++++++ 2 files changed, 27 insertions(+) diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala index 5992253a958e6..e293f9a8f7ba9 100644 --- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala +++ b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala @@ -22,6 +22,7 @@ import java.sql.{Connection, Date, Timestamp} import java.util.Properties import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ +import org.apache.spark.sql.functions.col import org.apache.spark.sql.internal.SQLConf import org.apache.spark.tags.DockerTest @@ -140,6 +141,14 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { |'MULTIPOLYGON(((2 2, 2 -2, -2 -2, -2 2, 2 2)),((1 1, 3 1, 3 3, 1 3, 1 1)))', |'GEOMETRYCOLLECTION(LINESTRING(1 1, 3 5),POLYGON((-1 -1, -1 -5, -5 -5, -5 -1, -1 -1)))') """.stripMargin).executeUpdate() + conn.prepareStatement( + """ + |CREATE TABLE bits(a INT, b INT, c BIT) + |""".stripMargin).executeUpdate() + conn.prepareStatement( + """ + |INSERT INTO bits VALUES (1, 2, 1) + """.stripMargin).executeUpdate() } test("Basic test") { @@ -357,4 +366,12 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite { 0, 3, 0, 0, 0, -1, -1, -1, -1, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 0, 0, 3)) } + + test("SPARK-38889: MsSqlServerDialect should handle boolean filter push down") { + val df = spark.read.jdbc(jdbcUrl, "bits", new Properties) + val rows = df.collect() + assert(rows.length == 1) + val filtered = df.where(col("c") === 0).collect() + assert(filtered.length == 0) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala index 8d2fbec55f919..a42129dbe8da8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala @@ -40,6 +40,16 @@ private object MsSqlServerDialect extends JdbcDialect { override def canHandle(url: String): Boolean = url.toLowerCase(Locale.ROOT).startsWith("jdbc:sqlserver") + // Microsoft SQL Server does not have the boolean type. + // Compile the boolean value to the bit data type instead. + // scalastyle:off line.size.limit + // See https://docs.microsoft.com/en-us/sql/t-sql/data-types/data-types-transact-sql?view=sql-server-ver15 + // scalastyle:on line.size.limit + override def compileValue(value: Any): Any = value match { + case booleanValue: Boolean => if (booleanValue) 1 else 0 + case other => super.compileValue(other) + } + // scalastyle:off line.size.limit // See https://docs.microsoft.com/en-us/sql/t-sql/functions/aggregate-functions-transact-sql?view=sql-server-ver15 // scalastyle:on line.size.limit From dc2212ed05bb9c9dc340d7290458da77813222d5 Mon Sep 17 00:00:00 2001 From: dch nguyen Date: Thu, 14 Apr 2022 21:11:32 +0200 Subject: [PATCH 140/535] [SPARK-37405][PYTHON] Inline type hints for python/pyspark/ml/feature.py ### What changes were proposed in this pull request? Inline type hints for python/pyspark/ml/feature.py ### Why are the changes needed? We can take advantage of static type checking within the functions by inlining the type hints. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests Closes #35530 from dchvn/SPARK-37405. Lead-authored-by: dch nguyen Co-authored-by: dch nguyen Signed-off-by: zero323 (cherry picked from commit 8ce8a70e2f8a04c92a2b0d2f45fcdc8c7c8014be) Signed-off-by: zero323 --- python/pyspark/ml/feature.py | 2337 ++++++++++++++++++++++----------- python/pyspark/ml/feature.pyi | 1586 ---------------------- 2 files changed, 1583 insertions(+), 2340 deletions(-) delete mode 100644 python/pyspark/ml/feature.pyi diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 18731ae01ae7c..8cebea2363dc0 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -14,9 +14,23 @@ # See the License for the specific language governing permissions and # limitations under the License. # +from typing import ( + cast, + overload, + Any, + Dict, + Generic, + List, + Optional, + Tuple, + TypeVar, + Union, + TYPE_CHECKING, +) -from pyspark import since, keyword_only, SparkContext -from pyspark.ml.linalg import _convert_to_vector +from pyspark import keyword_only, since, SparkContext +from pyspark.ml.linalg import _convert_to_vector, DenseMatrix, DenseVector, Vector +from pyspark.sql.dataframe import DataFrame from pyspark.ml.param.shared import ( HasThreshold, HasThresholds, @@ -40,6 +54,12 @@ from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, JavaTransformer, _jvm from pyspark.ml.common import inherit_doc +if TYPE_CHECKING: + from py4j.java_gateway import JavaObject + +JM = TypeVar("JM", bound=JavaTransformer) +P = TypeVar("P", bound=Params) + __all__ = [ "Binarizer", "BucketedRandomProjectionLSH", @@ -108,7 +128,7 @@ class Binarizer( HasOutputCol, HasInputCols, HasOutputCols, - JavaMLReadable, + JavaMLReadable["Binarizer"], JavaMLWritable, ): """ @@ -157,7 +177,7 @@ class Binarizer( ... """ - threshold = Param( + threshold: Param[float] = Param( Params._dummy(), "threshold", "Param for threshold used to binarize continuous features. " @@ -165,7 +185,7 @@ class Binarizer( + "The features equal to or less than the threshold will be binarized to 0.0", typeConverter=TypeConverters.toFloat, ) - thresholds = Param( + thresholds: Param[List[float]] = Param( Params._dummy(), "thresholds", "Param for array of threshold used to binarize continuous features. " @@ -175,16 +195,38 @@ class Binarizer( typeConverter=TypeConverters.toListFloat, ) + _input_kwargs: Dict[str, Any] + + @overload + def __init__( + self, + *, + threshold: float = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + ): + ... + + @overload + def __init__( + self, + *, + thresholds: Optional[List[float]] = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + ): + ... + @keyword_only def __init__( self, *, - threshold=0.0, - inputCol=None, - outputCol=None, - thresholds=None, - inputCols=None, - outputCols=None, + threshold: float = 0.0, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + thresholds: Optional[List[float]] = None, + inputCols: Optional[List[str]] = None, + outputCols: Optional[List[str]] = None, ): """ __init__(self, \\*, threshold=0.0, inputCol=None, outputCol=None, thresholds=None, \ @@ -196,18 +238,38 @@ def __init__( kwargs = self._input_kwargs self.setParams(**kwargs) + @overload + def setParams( + self, + *, + threshold: float = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + ) -> "Binarizer": + ... + + @overload + def setParams( + self, + *, + thresholds: Optional[List[float]] = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + ) -> "Binarizer": + ... + @keyword_only @since("1.4.0") def setParams( self, *, - threshold=0.0, - inputCol=None, - outputCol=None, - thresholds=None, - inputCols=None, - outputCols=None, - ): + threshold: float = 0.0, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + thresholds: Optional[List[float]] = None, + inputCols: Optional[List[str]] = None, + outputCols: Optional[List[str]] = None, + ) -> "Binarizer": """ setParams(self, \\*, threshold=0.0, inputCol=None, outputCol=None, thresholds=None, \ inputCols=None, outputCols=None) @@ -217,40 +279,40 @@ def setParams( return self._set(**kwargs) @since("1.4.0") - def setThreshold(self, value): + def setThreshold(self, value: float) -> "Binarizer": """ Sets the value of :py:attr:`threshold`. """ return self._set(threshold=value) @since("3.0.0") - def setThresholds(self, value): + def setThresholds(self, value: List[float]) -> "Binarizer": """ Sets the value of :py:attr:`thresholds`. """ return self._set(thresholds=value) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "Binarizer": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) @since("3.0.0") - def setInputCols(self, value): + def setInputCols(self, value: List[str]) -> "Binarizer": """ Sets the value of :py:attr:`inputCols`. """ return self._set(inputCols=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "Binarizer": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) @since("3.0.0") - def setOutputCols(self, value): + def setOutputCols(self, value: List[str]) -> "Binarizer": """ Sets the value of :py:attr:`outputCols`. """ @@ -262,7 +324,7 @@ class _LSHParams(HasInputCol, HasOutputCol): Mixin for Locality Sensitive Hashing (LSH) algorithm parameters. """ - numHashTables = Param( + numHashTables: Param[int] = Param( Params._dummy(), "numHashTables", "number of hash tables, where " @@ -271,35 +333,35 @@ class _LSHParams(HasInputCol, HasOutputCol): typeConverter=TypeConverters.toInt, ) - def __init__(self, *args): + def __init__(self, *args: Any): super(_LSHParams, self).__init__(*args) self._setDefault(numHashTables=1) - def getNumHashTables(self): + def getNumHashTables(self) -> int: """ Gets the value of numHashTables or its default value. """ return self.getOrDefault(self.numHashTables) -class _LSH(JavaEstimator, _LSHParams, JavaMLReadable, JavaMLWritable): +class _LSH(JavaEstimator[JM], _LSHParams, JavaMLReadable, JavaMLWritable, Generic[JM]): """ Mixin for Locality Sensitive Hashing (LSH). """ - def setNumHashTables(self, value): + def setNumHashTables(self: P, value: int) -> P: """ Sets the value of :py:attr:`numHashTables`. """ return self._set(numHashTables=value) - def setInputCol(self, value): + def setInputCol(self: P, value: str) -> P: """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self: P, value: str) -> P: """ Sets the value of :py:attr:`outputCol`. """ @@ -311,19 +373,25 @@ class _LSHModel(JavaModel, _LSHParams): Mixin for Locality Sensitive Hashing (LSH) models. """ - def setInputCol(self, value): + def setInputCol(self: P, value: str) -> P: """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self: P, value: str) -> P: """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - def approxNearestNeighbors(self, dataset, key, numNearestNeighbors, distCol="distCol"): + def approxNearestNeighbors( + self, + dataset: DataFrame, + key: Vector, + numNearestNeighbors: int, + distCol: str = "distCol", + ) -> DataFrame: """ Given a large dataset and an item, approximately find at most k items which have the closest distance to the item. If the :py:attr:`outputCol` is missing, the method will @@ -354,7 +422,13 @@ def approxNearestNeighbors(self, dataset, key, numNearestNeighbors, distCol="dis """ return self._call_java("approxNearestNeighbors", dataset, key, numNearestNeighbors, distCol) - def approxSimilarityJoin(self, datasetA, datasetB, threshold, distCol="distCol"): + def approxSimilarityJoin( + self, + datasetA: DataFrame, + datasetB: DataFrame, + threshold: float, + distCol: str = "distCol", + ) -> DataFrame: """ Join two datasets to approximately find all pairs of rows whose distance are smaller than the threshold. If the :py:attr:`outputCol` is missing, the method will transform the data; @@ -392,7 +466,7 @@ class _BucketedRandomProjectionLSHParams: .. versionadded:: 3.0.0 """ - bucketLength = Param( + bucketLength: Param[float] = Param( Params._dummy(), "bucketLength", "the length of each hash bucket, " + "a larger bucket lowers the false negative rate.", @@ -400,16 +474,21 @@ class _BucketedRandomProjectionLSHParams: ) @since("2.2.0") - def getBucketLength(self): + def getBucketLength(self) -> float: """ Gets the value of bucketLength or its default value. """ - return self.getOrDefault(self.bucketLength) + return (cast(Params, self)).getOrDefault(self.bucketLength) @inherit_doc class BucketedRandomProjectionLSH( - _LSH, _BucketedRandomProjectionLSHParams, HasSeed, JavaMLReadable, JavaMLWritable + _LSH["BucketedRandomProjectionLSHModel"], + _LSHParams, + _BucketedRandomProjectionLSHParams, + HasSeed, + JavaMLReadable["BucketedRandomProjectionLSH"], + JavaMLWritable, ): """ LSH class for Euclidean distance metrics. @@ -490,9 +569,17 @@ class BucketedRandomProjectionLSH( True """ + _input_kwargs: Dict[str, Any] + @keyword_only def __init__( - self, *, inputCol=None, outputCol=None, seed=None, numHashTables=1, bucketLength=None + self, + *, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + seed: Optional[int] = None, + numHashTables: int = 1, + bucketLength: Optional[float] = None, ): """ __init__(self, \\*, inputCol=None, outputCol=None, seed=None, numHashTables=1, \ @@ -508,8 +595,14 @@ def __init__( @keyword_only @since("2.2.0") def setParams( - self, *, inputCol=None, outputCol=None, seed=None, numHashTables=1, bucketLength=None - ): + self, + *, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + seed: Optional[int] = None, + numHashTables: int = 1, + bucketLength: Optional[float] = None, + ) -> "BucketedRandomProjectionLSH": """ setParams(self, \\*, inputCol=None, outputCol=None, seed=None, numHashTables=1, \ bucketLength=None) @@ -519,24 +612,27 @@ def setParams( return self._set(**kwargs) @since("2.2.0") - def setBucketLength(self, value): + def setBucketLength(self, value: float) -> "BucketedRandomProjectionLSH": """ Sets the value of :py:attr:`bucketLength`. """ return self._set(bucketLength=value) - def setSeed(self, value): + def setSeed(self, value: int) -> "BucketedRandomProjectionLSH": """ Sets the value of :py:attr:`seed`. """ return self._set(seed=value) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "BucketedRandomProjectionLSHModel": return BucketedRandomProjectionLSHModel(java_model) class BucketedRandomProjectionLSHModel( - _LSHModel, _BucketedRandomProjectionLSHParams, JavaMLReadable, JavaMLWritable + _LSHModel, + _BucketedRandomProjectionLSHParams, + JavaMLReadable["BucketedRandomProjectionLSHModel"], + JavaMLWritable, ): r""" Model fitted by :py:class:`BucketedRandomProjectionLSH`, where multiple random vectors are @@ -557,7 +653,7 @@ class Bucketizer( HasInputCols, HasOutputCols, HasHandleInvalid, - JavaMLReadable, + JavaMLReadable["Bucketizer"], JavaMLWritable, ): """ @@ -625,7 +721,7 @@ class Bucketizer( ... """ - splits = Param( + splits: Param[List[float]] = Param( Params._dummy(), "splits", "Split points for mapping continuous features into buckets. With n+1 splits, " @@ -637,7 +733,7 @@ class Bucketizer( typeConverter=TypeConverters.toListFloat, ) - handleInvalid = Param( + handleInvalid: Param[str] = Param( Params._dummy(), "handleInvalid", "how to handle invalid entries " @@ -652,7 +748,7 @@ class Bucketizer( typeConverter=TypeConverters.toString, ) - splitsArray = Param( + splitsArray: Param[List[List[float]]] = Param( Params._dummy(), "splitsArray", "The array of split points for mapping " @@ -666,17 +762,41 @@ class Bucketizer( typeConverter=TypeConverters.toListListFloat, ) + _input_kwargs: Dict[str, Any] + + @overload + def __init__( + self, + *, + splits: Optional[List[float]] = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + handleInvalid: str = ..., + ): + ... + + @overload + def __init__( + self, + *, + handleInvalid: str = ..., + splitsArray: Optional[List[List[float]]] = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + ): + ... + @keyword_only def __init__( self, *, - splits=None, - inputCol=None, - outputCol=None, - handleInvalid="error", - splitsArray=None, - inputCols=None, - outputCols=None, + splits: Optional[List[float]] = None, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + handleInvalid: str = "error", + splitsArray: Optional[List[List[float]]] = None, + inputCols: Optional[List[str]] = None, + outputCols: Optional[List[str]] = None, ): """ __init__(self, \\*, splits=None, inputCol=None, outputCol=None, handleInvalid="error", \ @@ -688,19 +808,41 @@ def __init__( kwargs = self._input_kwargs self.setParams(**kwargs) + @overload + def setParams( + self, + *, + splits: Optional[List[float]] = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + handleInvalid: str = ..., + ) -> "Bucketizer": + ... + + @overload + def setParams( + self, + *, + handleInvalid: str = ..., + splitsArray: Optional[List[List[float]]] = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + ) -> "Bucketizer": + ... + @keyword_only @since("1.4.0") def setParams( self, *, - splits=None, - inputCol=None, - outputCol=None, - handleInvalid="error", - splitsArray=None, - inputCols=None, - outputCols=None, - ): + splits: Optional[List[float]] = None, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + handleInvalid: str = "error", + splitsArray: Optional[List[List[float]]] = None, + inputCols: Optional[List[str]] = None, + outputCols: Optional[List[str]] = None, + ) -> "Bucketizer": """ setParams(self, \\*, splits=None, inputCol=None, outputCol=None, handleInvalid="error", \ splitsArray=None, inputCols=None, outputCols=None) @@ -710,60 +852,60 @@ def setParams( return self._set(**kwargs) @since("1.4.0") - def setSplits(self, value): + def setSplits(self, value: List[float]) -> "Bucketizer": """ Sets the value of :py:attr:`splits`. """ return self._set(splits=value) @since("1.4.0") - def getSplits(self): + def getSplits(self) -> List[float]: """ Gets the value of threshold or its default value. """ return self.getOrDefault(self.splits) @since("3.0.0") - def setSplitsArray(self, value): + def setSplitsArray(self, value: List[List[float]]) -> "Bucketizer": """ Sets the value of :py:attr:`splitsArray`. """ return self._set(splitsArray=value) @since("3.0.0") - def getSplitsArray(self): + def getSplitsArray(self) -> List[List[float]]: """ Gets the array of split points or its default value. """ return self.getOrDefault(self.splitsArray) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "Bucketizer": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) @since("3.0.0") - def setInputCols(self, value): + def setInputCols(self, value: List[str]) -> "Bucketizer": """ Sets the value of :py:attr:`inputCols`. """ return self._set(inputCols=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "Bucketizer": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) @since("3.0.0") - def setOutputCols(self, value): + def setOutputCols(self, value: List[str]) -> "Bucketizer": """ Sets the value of :py:attr:`outputCols`. """ return self._set(outputCols=value) - def setHandleInvalid(self, value): + def setHandleInvalid(self, value: str) -> "Bucketizer": """ Sets the value of :py:attr:`handleInvalid`. """ @@ -775,7 +917,7 @@ class _CountVectorizerParams(JavaParams, HasInputCol, HasOutputCol): Params for :py:class:`CountVectorizer` and :py:class:`CountVectorizerModel`. """ - minTF = Param( + minTF: Param[float] = Param( Params._dummy(), "minTF", "Filter to ignore rare words in" @@ -786,7 +928,7 @@ class _CountVectorizerParams(JavaParams, HasInputCol, HasOutputCol): + "only used in transform of CountVectorizerModel and does not affect fitting. Default 1.0", typeConverter=TypeConverters.toFloat, ) - minDF = Param( + minDF: Param[float] = Param( Params._dummy(), "minDF", "Specifies the minimum number of" @@ -796,7 +938,7 @@ class _CountVectorizerParams(JavaParams, HasInputCol, HasOutputCol): + " Default 1.0", typeConverter=TypeConverters.toFloat, ) - maxDF = Param( + maxDF: Param[float] = Param( Params._dummy(), "maxDF", "Specifies the maximum number of" @@ -808,13 +950,13 @@ class _CountVectorizerParams(JavaParams, HasInputCol, HasOutputCol): + " Default (2^63) - 1", typeConverter=TypeConverters.toFloat, ) - vocabSize = Param( + vocabSize: Param[int] = Param( Params._dummy(), "vocabSize", "max size of the vocabulary. Default 1 << 18.", typeConverter=TypeConverters.toInt, ) - binary = Param( + binary: Param[bool] = Param( Params._dummy(), "binary", "Binary toggle to control the output vector values." @@ -824,40 +966,40 @@ class _CountVectorizerParams(JavaParams, HasInputCol, HasOutputCol): typeConverter=TypeConverters.toBoolean, ) - def __init__(self, *args): + def __init__(self, *args: Any): super(_CountVectorizerParams, self).__init__(*args) self._setDefault(minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18, binary=False) @since("1.6.0") - def getMinTF(self): + def getMinTF(self) -> float: """ Gets the value of minTF or its default value. """ return self.getOrDefault(self.minTF) @since("1.6.0") - def getMinDF(self): + def getMinDF(self) -> float: """ Gets the value of minDF or its default value. """ return self.getOrDefault(self.minDF) @since("2.4.0") - def getMaxDF(self): + def getMaxDF(self) -> float: """ Gets the value of maxDF or its default value. """ return self.getOrDefault(self.maxDF) @since("1.6.0") - def getVocabSize(self): + def getVocabSize(self) -> int: """ Gets the value of vocabSize or its default value. """ return self.getOrDefault(self.vocabSize) @since("2.0.0") - def getBinary(self): + def getBinary(self) -> bool: """ Gets the value of binary or its default value. """ @@ -865,7 +1007,12 @@ def getBinary(self): @inherit_doc -class CountVectorizer(JavaEstimator, _CountVectorizerParams, JavaMLReadable, JavaMLWritable): +class CountVectorizer( + JavaEstimator["CountVectorizerModel"], + _CountVectorizerParams, + JavaMLReadable["CountVectorizer"], + JavaMLWritable, +): """ Extracts a vocabulary from document collections and generates a :py:attr:`CountVectorizerModel`. @@ -922,17 +1069,19 @@ class CountVectorizer(JavaEstimator, _CountVectorizerParams, JavaMLReadable, Jav ... """ + _input_kwargs: Dict[str, Any] + @keyword_only def __init__( self, *, - minTF=1.0, - minDF=1.0, - maxDF=2 ** 63 - 1, - vocabSize=1 << 18, - binary=False, - inputCol=None, - outputCol=None, + minTF: float = 1.0, + minDF: float = 1.0, + maxDF: float = 2 ** 63 - 1, + vocabSize: int = 1 << 18, + binary: bool = False, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, ): """ __init__(self, \\*, minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18,\ @@ -948,14 +1097,14 @@ def __init__( def setParams( self, *, - minTF=1.0, - minDF=1.0, - maxDF=2 ** 63 - 1, - vocabSize=1 << 18, - binary=False, - inputCol=None, - outputCol=None, - ): + minTF: float = 1.0, + minDF: float = 1.0, + maxDF: float = 2 ** 63 - 1, + vocabSize: int = 1 << 18, + binary: bool = False, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + ) -> "CountVectorizer": """ setParams(self, \\*, minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18,\ binary=False, inputCol=None, outputCol=None) @@ -965,58 +1114,60 @@ def setParams( return self._set(**kwargs) @since("1.6.0") - def setMinTF(self, value): + def setMinTF(self, value: float) -> "CountVectorizer": """ Sets the value of :py:attr:`minTF`. """ return self._set(minTF=value) @since("1.6.0") - def setMinDF(self, value): + def setMinDF(self, value: float) -> "CountVectorizer": """ Sets the value of :py:attr:`minDF`. """ return self._set(minDF=value) @since("2.4.0") - def setMaxDF(self, value): + def setMaxDF(self, value: float) -> "CountVectorizer": """ Sets the value of :py:attr:`maxDF`. """ return self._set(maxDF=value) @since("1.6.0") - def setVocabSize(self, value): + def setVocabSize(self, value: int) -> "CountVectorizer": """ Sets the value of :py:attr:`vocabSize`. """ return self._set(vocabSize=value) @since("2.0.0") - def setBinary(self, value): + def setBinary(self, value: bool) -> "CountVectorizer": """ Sets the value of :py:attr:`binary`. """ return self._set(binary=value) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "CountVectorizer": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "CountVectorizer": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "CountVectorizerModel": return CountVectorizerModel(java_model) @inherit_doc -class CountVectorizerModel(JavaModel, _CountVectorizerParams, JavaMLReadable, JavaMLWritable): +class CountVectorizerModel( + JavaModel, _CountVectorizerParams, JavaMLReadable["CountVectorizerModel"], JavaMLWritable +): """ Model fitted by :py:class:`CountVectorizer`. @@ -1024,14 +1175,14 @@ class CountVectorizerModel(JavaModel, _CountVectorizerParams, JavaMLReadable, Ja """ @since("3.0.0") - def setInputCol(self, value): + def setInputCol(self, value: str) -> "CountVectorizerModel": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) @since("3.0.0") - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "CountVectorizerModel": """ Sets the value of :py:attr:`outputCol`. """ @@ -1039,12 +1190,20 @@ def setOutputCol(self, value): @classmethod @since("2.4.0") - def from_vocabulary(cls, vocabulary, inputCol, outputCol=None, minTF=None, binary=None): + def from_vocabulary( + cls, + vocabulary: List[str], + inputCol: str, + outputCol: Optional[str] = None, + minTF: Optional[float] = None, + binary: Optional[bool] = None, + ) -> "CountVectorizerModel": """ Construct the model directly from a vocabulary list of strings, requires an active SparkContext. """ sc = SparkContext._active_spark_context + assert sc is not None and sc._gateway is not None java_class = sc._gateway.jvm.java.lang.String jvocab = CountVectorizerModel._new_java_array(vocabulary, java_class) model = CountVectorizerModel._create_from_java_class( @@ -1060,23 +1219,23 @@ def from_vocabulary(cls, vocabulary, inputCol, outputCol=None, minTF=None, binar model._set(vocabSize=len(vocabulary)) return model - @property + @property # type: ignore[misc] @since("1.6.0") - def vocabulary(self): + def vocabulary(self) -> List[str]: """ An array of terms in the vocabulary. """ return self._call_java("vocabulary") @since("2.4.0") - def setMinTF(self, value): + def setMinTF(self, value: float) -> "CountVectorizerModel": """ Sets the value of :py:attr:`minTF`. """ return self._set(minTF=value) @since("2.4.0") - def setBinary(self, value): + def setBinary(self, value: bool) -> "CountVectorizerModel": """ Sets the value of :py:attr:`binary`. """ @@ -1084,7 +1243,7 @@ def setBinary(self, value): @inherit_doc -class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): +class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable["DCT"], JavaMLWritable): """ A feature transformer that takes the 1D discrete cosine transform of a real vector. No zero padding is performed on the input vector. @@ -1125,15 +1284,23 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWrit False """ - inverse = Param( + inverse: Param[bool] = Param( Params._dummy(), "inverse", "Set transformer to perform inverse DCT, " + "default False.", typeConverter=TypeConverters.toBoolean, ) + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, inverse=False, inputCol=None, outputCol=None): + def __init__( + self, + *, + inverse: bool = False, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + ): """ __init__(self, \\*, inverse=False, inputCol=None, outputCol=None) """ @@ -1145,7 +1312,13 @@ def __init__(self, *, inverse=False, inputCol=None, outputCol=None): @keyword_only @since("1.6.0") - def setParams(self, *, inverse=False, inputCol=None, outputCol=None): + def setParams( + self, + *, + inverse: bool = False, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + ) -> "DCT": """ setParams(self, \\*, inverse=False, inputCol=None, outputCol=None) Sets params for this DCT. @@ -1154,26 +1327,26 @@ def setParams(self, *, inverse=False, inputCol=None, outputCol=None): return self._set(**kwargs) @since("1.6.0") - def setInverse(self, value): + def setInverse(self, value: bool) -> "DCT": """ Sets the value of :py:attr:`inverse`. """ return self._set(inverse=value) @since("1.6.0") - def getInverse(self): + def getInverse(self) -> bool: """ Gets the value of inverse or its default value. """ return self.getOrDefault(self.inverse) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "DCT": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "DCT": """ Sets the value of :py:attr:`outputCol`. """ @@ -1182,7 +1355,11 @@ def setOutputCol(self, value): @inherit_doc class ElementwiseProduct( - JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable + JavaTransformer, + HasInputCol, + HasOutputCol, + JavaMLReadable["ElementwiseProduct"], + JavaMLWritable, ): """ Outputs the Hadamard product (i.e., the element-wise product) of each input vector @@ -1215,15 +1392,23 @@ class ElementwiseProduct( True """ - scalingVec = Param( + scalingVec: Param[Vector] = Param( Params._dummy(), "scalingVec", "Vector for hadamard product.", typeConverter=TypeConverters.toVector, ) + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, scalingVec=None, inputCol=None, outputCol=None): + def __init__( + self, + *, + scalingVec: Optional[Vector] = None, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + ): """ __init__(self, \\*, scalingVec=None, inputCol=None, outputCol=None) """ @@ -1236,7 +1421,13 @@ def __init__(self, *, scalingVec=None, inputCol=None, outputCol=None): @keyword_only @since("1.5.0") - def setParams(self, *, scalingVec=None, inputCol=None, outputCol=None): + def setParams( + self, + *, + scalingVec: Optional[Vector] = None, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + ) -> "ElementwiseProduct": """ setParams(self, \\*, scalingVec=None, inputCol=None, outputCol=None) Sets params for this ElementwiseProduct. @@ -1245,26 +1436,26 @@ def setParams(self, *, scalingVec=None, inputCol=None, outputCol=None): return self._set(**kwargs) @since("2.0.0") - def setScalingVec(self, value): + def setScalingVec(self, value: Vector) -> "ElementwiseProduct": """ Sets the value of :py:attr:`scalingVec`. """ return self._set(scalingVec=value) @since("2.0.0") - def getScalingVec(self): + def getScalingVec(self) -> Vector: """ Gets the value of scalingVec or its default value. """ return self.getOrDefault(self.scalingVec) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "ElementwiseProduct": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "ElementwiseProduct": """ Sets the value of :py:attr:`outputCol`. """ @@ -1273,7 +1464,12 @@ def setOutputCol(self, value): @inherit_doc class FeatureHasher( - JavaTransformer, HasInputCols, HasOutputCol, HasNumFeatures, JavaMLReadable, JavaMLWritable + JavaTransformer, + HasInputCols, + HasOutputCol, + HasNumFeatures, + JavaMLReadable["FeatureHasher"], + JavaMLWritable, ): """ Feature hashing projects a set of categorical or numerical features into a feature vector of @@ -1332,16 +1528,23 @@ class FeatureHasher( True """ - categoricalCols = Param( + categoricalCols: Param[List[str]] = Param( Params._dummy(), "categoricalCols", "numeric columns to treat as categorical", typeConverter=TypeConverters.toListString, ) + _input_kwargs: Dict[str, Any] + @keyword_only def __init__( - self, *, numFeatures=1 << 18, inputCols=None, outputCol=None, categoricalCols=None + self, + *, + numFeatures: int = 1 << 18, + inputCols: Optional[List[str]] = None, + outputCol: Optional[str] = None, + categoricalCols: Optional[List[str]] = None, ): """ __init__(self, \\*, numFeatures=1 << 18, inputCols=None, outputCol=None, \ @@ -1356,8 +1559,13 @@ def __init__( @keyword_only @since("2.3.0") def setParams( - self, *, numFeatures=1 << 18, inputCols=None, outputCol=None, categoricalCols=None - ): + self, + *, + numFeatures: int = 1 << 18, + inputCols: Optional[List[str]] = None, + outputCol: Optional[str] = None, + categoricalCols: Optional[List[str]] = None, + ) -> "FeatureHasher": """ setParams(self, \\*, numFeatures=1 << 18, inputCols=None, outputCol=None, \ categoricalCols=None) @@ -1367,32 +1575,32 @@ def setParams( return self._set(**kwargs) @since("2.3.0") - def setCategoricalCols(self, value): + def setCategoricalCols(self, value: List[str]) -> "FeatureHasher": """ Sets the value of :py:attr:`categoricalCols`. """ return self._set(categoricalCols=value) @since("2.3.0") - def getCategoricalCols(self): + def getCategoricalCols(self) -> List[str]: """ Gets the value of binary or its default value. """ return self.getOrDefault(self.categoricalCols) - def setInputCols(self, value): + def setInputCols(self, value: List[str]) -> "FeatureHasher": """ Sets the value of :py:attr:`inputCols`. """ return self._set(inputCols=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "FeatureHasher": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - def setNumFeatures(self, value): + def setNumFeatures(self, value: int) -> "FeatureHasher": """ Sets the value of :py:attr:`numFeatures`. """ @@ -1401,7 +1609,12 @@ def setNumFeatures(self, value): @inherit_doc class HashingTF( - JavaTransformer, HasInputCol, HasOutputCol, HasNumFeatures, JavaMLReadable, JavaMLWritable + JavaTransformer, + HasInputCol, + HasOutputCol, + HasNumFeatures, + JavaMLReadable["HashingTF"], + JavaMLWritable, ): """ Maps a sequence of terms to their term frequencies using the hashing trick. @@ -1437,7 +1650,7 @@ class HashingTF( 5 """ - binary = Param( + binary: Param[bool] = Param( Params._dummy(), "binary", "If True, all non zero counts are set to 1. " @@ -1446,8 +1659,17 @@ class HashingTF( typeConverter=TypeConverters.toBoolean, ) + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None): + def __init__( + self, + *, + numFeatures: int = 1 << 18, + binary: bool = False, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + ): """ __init__(self, \\*, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None) """ @@ -1459,7 +1681,14 @@ def __init__(self, *, numFeatures=1 << 18, binary=False, inputCol=None, outputCo @keyword_only @since("1.3.0") - def setParams(self, *, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None): + def setParams( + self, + *, + numFeatures: int = 1 << 18, + binary: bool = False, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + ) -> "HashingTF": """ setParams(self, \\*, numFeatures=1 << 18, binary=False, inputCol=None, outputCol=None) Sets params for this HashingTF. @@ -1468,43 +1697,44 @@ def setParams(self, *, numFeatures=1 << 18, binary=False, inputCol=None, outputC return self._set(**kwargs) @since("2.0.0") - def setBinary(self, value): + def setBinary(self, value: bool) -> "HashingTF": """ Sets the value of :py:attr:`binary`. """ return self._set(binary=value) @since("2.0.0") - def getBinary(self): + def getBinary(self) -> bool: """ Gets the value of binary or its default value. """ return self.getOrDefault(self.binary) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "HashingTF": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "HashingTF": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - def setNumFeatures(self, value): + def setNumFeatures(self, value: int) -> "HashingTF": """ Sets the value of :py:attr:`numFeatures`. """ return self._set(numFeatures=value) @since("3.0.0") - def indexOf(self, term): + def indexOf(self, term: Any) -> int: """ Returns the index of the input term. """ self._transfer_params_to_java() + assert self._java_obj is not None return self._java_obj.indexOf(term) @@ -1515,7 +1745,7 @@ class _IDFParams(HasInputCol, HasOutputCol): .. versionadded:: 3.0.0 """ - minDocFreq = Param( + minDocFreq: Param[int] = Param( Params._dummy(), "minDocFreq", "minimum number of documents in which a term should appear for filtering", @@ -1523,19 +1753,19 @@ class _IDFParams(HasInputCol, HasOutputCol): ) @since("1.4.0") - def getMinDocFreq(self): + def getMinDocFreq(self) -> int: """ Gets the value of minDocFreq or its default value. """ return self.getOrDefault(self.minDocFreq) - def __init__(self, *args): + def __init__(self, *args: Any): super(_IDFParams, self).__init__(*args) self._setDefault(minDocFreq=0) @inherit_doc -class IDF(JavaEstimator, _IDFParams, JavaMLReadable, JavaMLWritable): +class IDF(JavaEstimator["IDFModel"], _IDFParams, JavaMLReadable["IDF"], JavaMLWritable): """ Compute the Inverse Document Frequency (IDF) given a collection of documents. @@ -1581,8 +1811,16 @@ class IDF(JavaEstimator, _IDFParams, JavaMLReadable, JavaMLWritable): True """ + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, minDocFreq=0, inputCol=None, outputCol=None): + def __init__( + self, + *, + minDocFreq: int = 0, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + ): """ __init__(self, \\*, minDocFreq=0, inputCol=None, outputCol=None) """ @@ -1593,7 +1831,13 @@ def __init__(self, *, minDocFreq=0, inputCol=None, outputCol=None): @keyword_only @since("1.4.0") - def setParams(self, *, minDocFreq=0, inputCol=None, outputCol=None): + def setParams( + self, + *, + minDocFreq: int = 0, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + ) -> "IDF": """ setParams(self, \\*, minDocFreq=0, inputCol=None, outputCol=None) Sets params for this IDF. @@ -1602,29 +1846,29 @@ def setParams(self, *, minDocFreq=0, inputCol=None, outputCol=None): return self._set(**kwargs) @since("1.4.0") - def setMinDocFreq(self, value): + def setMinDocFreq(self, value: int) -> "IDF": """ Sets the value of :py:attr:`minDocFreq`. """ return self._set(minDocFreq=value) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "IDF": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "IDF": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "IDFModel": return IDFModel(java_model) -class IDFModel(JavaModel, _IDFParams, JavaMLReadable, JavaMLWritable): +class IDFModel(JavaModel, _IDFParams, JavaMLReadable["IDFModel"], JavaMLWritable): """ Model fitted by :py:class:`IDF`. @@ -1632,38 +1876,38 @@ class IDFModel(JavaModel, _IDFParams, JavaMLReadable, JavaMLWritable): """ @since("3.0.0") - def setInputCol(self, value): + def setInputCol(self, value: str) -> "IDFModel": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) @since("3.0.0") - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "IDFModel": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - @property + @property # type: ignore[misc] @since("2.0.0") - def idf(self): + def idf(self) -> Vector: """ Returns the IDF vector. """ return self._call_java("idf") - @property + @property # type: ignore[misc] @since("3.0.0") - def docFreq(self): + def docFreq(self) -> List[int]: """ Returns the document frequency. """ return self._call_java("docFreq") - @property + @property # type: ignore[misc] @since("3.0.0") - def numDocs(self): + def numDocs(self) -> int: """ Returns number of documents evaluated to compute idf """ @@ -1677,7 +1921,7 @@ class _ImputerParams(HasInputCol, HasInputCols, HasOutputCol, HasOutputCols, Has .. versionadded:: 3.0.0 """ - strategy = Param( + strategy: Param[str] = Param( Params._dummy(), "strategy", "strategy for imputation. If mean, then replace missing values using the mean " @@ -1687,7 +1931,7 @@ class _ImputerParams(HasInputCol, HasInputCols, HasOutputCol, HasOutputCols, Has typeConverter=TypeConverters.toString, ) - missingValue = Param( + missingValue: Param[float] = Param( Params._dummy(), "missingValue", "The placeholder for the missing values. All occurrences of missingValue " @@ -1695,19 +1939,19 @@ class _ImputerParams(HasInputCol, HasInputCols, HasOutputCol, HasOutputCols, Has typeConverter=TypeConverters.toFloat, ) - def __init__(self, *args): + def __init__(self, *args: Any): super(_ImputerParams, self).__init__(*args) self._setDefault(strategy="mean", missingValue=float("nan"), relativeError=0.001) @since("2.2.0") - def getStrategy(self): + def getStrategy(self) -> str: """ Gets the value of :py:attr:`strategy` or its default value. """ return self.getOrDefault(self.strategy) @since("2.2.0") - def getMissingValue(self): + def getMissingValue(self) -> float: """ Gets the value of :py:attr:`missingValue` or its default value. """ @@ -1715,7 +1959,9 @@ def getMissingValue(self): @inherit_doc -class Imputer(JavaEstimator, _ImputerParams, JavaMLReadable, JavaMLWritable): +class Imputer( + JavaEstimator["ImputerModel"], _ImputerParams, JavaMLReadable["Imputer"], JavaMLWritable +): """ Imputation estimator for completing missing values, using the mean, median or mode of the columns in which the missing values are located. The input columns should be of @@ -1829,17 +2075,43 @@ class Imputer(JavaEstimator, _ImputerParams, JavaMLReadable, JavaMLWritable): True """ + _input_kwargs: Dict[str, Any] + + @overload + def __init__( + self, + *, + strategy: str = ..., + missingValue: float = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + relativeError: float = ..., + ): + ... + + @overload + def __init__( + self, + *, + strategy: str = ..., + missingValue: float = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + relativeError: float = ..., + ): + ... + @keyword_only def __init__( self, *, - strategy="mean", - missingValue=float("nan"), - inputCols=None, - outputCols=None, - inputCol=None, - outputCol=None, - relativeError=0.001, + strategy: str = "mean", + missingValue: float = float("nan"), + inputCols: Optional[List[str]] = None, + outputCols: Optional[List[str]] = None, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + relativeError: float = 0.001, ): """ __init__(self, \\*, strategy="mean", missingValue=float("nan"), inputCols=None, \ @@ -1850,19 +2122,43 @@ def __init__( kwargs = self._input_kwargs self.setParams(**kwargs) + @overload + def setParams( + self, + *, + strategy: str = ..., + missingValue: float = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + relativeError: float = ..., + ) -> "Imputer": + ... + + @overload + def setParams( + self, + *, + strategy: str = ..., + missingValue: float = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + relativeError: float = ..., + ) -> "Imputer": + ... + @keyword_only @since("2.2.0") def setParams( self, *, - strategy="mean", - missingValue=float("nan"), - inputCols=None, - outputCols=None, - inputCol=None, - outputCol=None, - relativeError=0.001, - ): + strategy: str = "mean", + missingValue: float = float("nan"), + inputCols: Optional[List[str]] = None, + outputCols: Optional[List[str]] = None, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + relativeError: float = 0.001, + ) -> "Imputer": """ setParams(self, \\*, strategy="mean", missingValue=float("nan"), inputCols=None, \ outputCols=None, inputCol=None, outputCol=None, relativeError=0.001) @@ -1872,59 +2168,59 @@ def setParams( return self._set(**kwargs) @since("2.2.0") - def setStrategy(self, value): + def setStrategy(self, value: str) -> "Imputer": """ Sets the value of :py:attr:`strategy`. """ return self._set(strategy=value) @since("2.2.0") - def setMissingValue(self, value): + def setMissingValue(self, value: float) -> "Imputer": """ Sets the value of :py:attr:`missingValue`. """ return self._set(missingValue=value) @since("2.2.0") - def setInputCols(self, value): + def setInputCols(self, value: List[str]) -> "Imputer": """ Sets the value of :py:attr:`inputCols`. """ return self._set(inputCols=value) @since("2.2.0") - def setOutputCols(self, value): + def setOutputCols(self, value: List[str]) -> "Imputer": """ Sets the value of :py:attr:`outputCols`. """ return self._set(outputCols=value) @since("3.0.0") - def setInputCol(self, value): + def setInputCol(self, value: str) -> "Imputer": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) @since("3.0.0") - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "Imputer": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) @since("3.0.0") - def setRelativeError(self, value): + def setRelativeError(self, value: float) -> "Imputer": """ Sets the value of :py:attr:`relativeError`. """ return self._set(relativeError=value) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "ImputerModel": return ImputerModel(java_model) -class ImputerModel(JavaModel, _ImputerParams, JavaMLReadable, JavaMLWritable): +class ImputerModel(JavaModel, _ImputerParams, JavaMLReadable["ImputerModel"], JavaMLWritable): """ Model fitted by :py:class:`Imputer`. @@ -1932,36 +2228,36 @@ class ImputerModel(JavaModel, _ImputerParams, JavaMLReadable, JavaMLWritable): """ @since("3.0.0") - def setInputCols(self, value): + def setInputCols(self, value: List[str]) -> "ImputerModel": """ Sets the value of :py:attr:`inputCols`. """ return self._set(inputCols=value) @since("3.0.0") - def setOutputCols(self, value): + def setOutputCols(self, value: List[str]) -> "ImputerModel": """ Sets the value of :py:attr:`outputCols`. """ return self._set(outputCols=value) @since("3.0.0") - def setInputCol(self, value): + def setInputCol(self, value: str) -> "ImputerModel": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) @since("3.0.0") - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "ImputerModel": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - @property + @property # type: ignore[misc] @since("2.2.0") - def surrogateDF(self): + def surrogateDF(self) -> DataFrame: """ Returns a DataFrame containing inputCols and their corresponding surrogates, which are used to replace the missing values in the input DataFrame. @@ -1970,7 +2266,13 @@ def surrogateDF(self): @inherit_doc -class Interaction(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadable, JavaMLWritable): +class Interaction( + JavaTransformer, + HasInputCols, + HasOutputCol, + JavaMLReadable["Interaction"], + JavaMLWritable, +): """ Implements the feature interaction transform. This transformer takes in Double and Vector type columns and outputs a flattened vector of their feature interactions. To handle interaction, @@ -2006,8 +2308,10 @@ class Interaction(JavaTransformer, HasInputCols, HasOutputCol, JavaMLReadable, J True """ + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, inputCols=None, outputCol=None): + def __init__(self, *, inputCols: Optional[List[str]] = None, outputCol: Optional[str] = None): """ __init__(self, \\*, inputCols=None, outputCol=None): """ @@ -2019,7 +2323,9 @@ def __init__(self, *, inputCols=None, outputCol=None): @keyword_only @since("3.0.0") - def setParams(self, *, inputCols=None, outputCol=None): + def setParams( + self, *, inputCols: Optional[List[str]] = None, outputCol: Optional[str] = None + ) -> "Interaction": """ setParams(self, \\*, inputCols=None, outputCol=None) Sets params for this Interaction. @@ -2028,14 +2334,14 @@ def setParams(self, *, inputCols=None, outputCol=None): return self._set(**kwargs) @since("3.0.0") - def setInputCols(self, value): + def setInputCols(self, value: List[str]) -> "Interaction": """ Sets the value of :py:attr:`inputCols`. """ return self._set(inputCols=value) @since("3.0.0") - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "Interaction": """ Sets the value of :py:attr:`outputCol`. """ @@ -2053,7 +2359,12 @@ class _MaxAbsScalerParams(HasInputCol, HasOutputCol): @inherit_doc -class MaxAbsScaler(JavaEstimator, _MaxAbsScalerParams, JavaMLReadable, JavaMLWritable): +class MaxAbsScaler( + JavaEstimator["MaxAbsScalerModel"], + _MaxAbsScalerParams, + JavaMLReadable["MaxAbsScaler"], + JavaMLWritable, +): """ Rescale each feature individually to range [-1, 1] by dividing through the largest maximum absolute value in each feature. It does not shift/center the data, and thus does not destroy @@ -2095,8 +2406,10 @@ class MaxAbsScaler(JavaEstimator, _MaxAbsScalerParams, JavaMLReadable, JavaMLWri True """ + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, inputCol=None, outputCol=None): + def __init__(self, *, inputCol: Optional[str] = None, outputCol: Optional[str] = None): """ __init__(self, \\*, inputCol=None, outputCol=None) """ @@ -2108,7 +2421,9 @@ def __init__(self, *, inputCol=None, outputCol=None): @keyword_only @since("2.0.0") - def setParams(self, *, inputCol=None, outputCol=None): + def setParams( + self, *, inputCol: Optional[str] = None, outputCol: Optional[str] = None + ) -> "MaxAbsScaler": """ setParams(self, \\*, inputCol=None, outputCol=None) Sets params for this MaxAbsScaler. @@ -2116,23 +2431,25 @@ def setParams(self, *, inputCol=None, outputCol=None): kwargs = self._input_kwargs return self._set(**kwargs) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "MaxAbsScaler": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "MaxAbsScaler": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "MaxAbsScalerModel": return MaxAbsScalerModel(java_model) -class MaxAbsScalerModel(JavaModel, _MaxAbsScalerParams, JavaMLReadable, JavaMLWritable): +class MaxAbsScalerModel( + JavaModel, _MaxAbsScalerParams, JavaMLReadable["MaxAbsScalerModel"], JavaMLWritable +): """ Model fitted by :py:class:`MaxAbsScaler`. @@ -2140,22 +2457,22 @@ class MaxAbsScalerModel(JavaModel, _MaxAbsScalerParams, JavaMLReadable, JavaMLWr """ @since("3.0.0") - def setInputCol(self, value): + def setInputCol(self, value: str) -> "MaxAbsScalerModel": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) @since("3.0.0") - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "MaxAbsScalerModel": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - @property + @property # type: ignore[misc] @since("2.0.0") - def maxAbs(self): + def maxAbs(self) -> Vector: """ Max Abs vector. """ @@ -2163,7 +2480,14 @@ def maxAbs(self): @inherit_doc -class MinHashLSH(_LSH, HasInputCol, HasOutputCol, HasSeed, JavaMLReadable, JavaMLWritable): +class MinHashLSH( + _LSH["MinHashLSHModel"], + HasInputCol, + HasOutputCol, + HasSeed, + JavaMLReadable["MinHashLSH"], + JavaMLWritable, +): """ LSH class for Jaccard distance. @@ -2228,8 +2552,17 @@ class MinHashLSH(_LSH, HasInputCol, HasOutputCol, HasSeed, JavaMLReadable, JavaM True """ + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, inputCol=None, outputCol=None, seed=None, numHashTables=1): + def __init__( + self, + *, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + seed: Optional[int] = None, + numHashTables: int = 1, + ): """ __init__(self, \\*, inputCol=None, outputCol=None, seed=None, numHashTables=1) """ @@ -2240,7 +2573,14 @@ def __init__(self, *, inputCol=None, outputCol=None, seed=None, numHashTables=1) @keyword_only @since("2.2.0") - def setParams(self, *, inputCol=None, outputCol=None, seed=None, numHashTables=1): + def setParams( + self, + *, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + seed: Optional[int] = None, + numHashTables: int = 1, + ) -> "MinHashLSH": """ setParams(self, \\*, inputCol=None, outputCol=None, seed=None, numHashTables=1) Sets params for this MinHashLSH. @@ -2248,13 +2588,13 @@ def setParams(self, *, inputCol=None, outputCol=None, seed=None, numHashTables=1 kwargs = self._input_kwargs return self._set(**kwargs) - def setSeed(self, value): + def setSeed(self, value: int) -> "MinHashLSH": """ Sets the value of :py:attr:`seed`. """ return self._set(seed=value) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "MinHashLSHModel": return MinHashLSHModel(java_model) @@ -2282,32 +2622,32 @@ class _MinMaxScalerParams(HasInputCol, HasOutputCol): .. versionadded:: 3.0.0 """ - min = Param( + min: Param[float] = Param( Params._dummy(), "min", "Lower bound of the output feature range", typeConverter=TypeConverters.toFloat, ) - max = Param( + max: Param[float] = Param( Params._dummy(), "max", "Upper bound of the output feature range", typeConverter=TypeConverters.toFloat, ) - def __init__(self, *args): + def __init__(self, *args: Any): super(_MinMaxScalerParams, self).__init__(*args) self._setDefault(min=0.0, max=1.0) @since("1.6.0") - def getMin(self): + def getMin(self) -> float: """ Gets the value of min or its default value. """ return self.getOrDefault(self.min) @since("1.6.0") - def getMax(self): + def getMax(self) -> float: """ Gets the value of max or its default value. """ @@ -2315,7 +2655,12 @@ def getMax(self): @inherit_doc -class MinMaxScaler(JavaEstimator, _MinMaxScalerParams, JavaMLReadable, JavaMLWritable): +class MinMaxScaler( + JavaEstimator["MinMaxScalerModel"], + _MinMaxScalerParams, + JavaMLReadable["MinMaxScaler"], + JavaMLWritable, +): """ Rescale each feature individually to a common range [min, max] linearly using column summary statistics, which is also known as min-max normalization or Rescaling. The rescaled value for @@ -2372,8 +2717,17 @@ class MinMaxScaler(JavaEstimator, _MinMaxScalerParams, JavaMLReadable, JavaMLWri True """ + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, min=0.0, max=1.0, inputCol=None, outputCol=None): + def __init__( + self, + *, + min: float = 0.0, + max: float = 1.0, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + ): """ __init__(self, \\*, min=0.0, max=1.0, inputCol=None, outputCol=None) """ @@ -2384,7 +2738,14 @@ def __init__(self, *, min=0.0, max=1.0, inputCol=None, outputCol=None): @keyword_only @since("1.6.0") - def setParams(self, *, min=0.0, max=1.0, inputCol=None, outputCol=None): + def setParams( + self, + *, + min: float = 0.0, + max: float = 1.0, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + ) -> "MinMaxScaler": """ setParams(self, \\*, min=0.0, max=1.0, inputCol=None, outputCol=None) Sets params for this MinMaxScaler. @@ -2393,36 +2754,38 @@ def setParams(self, *, min=0.0, max=1.0, inputCol=None, outputCol=None): return self._set(**kwargs) @since("1.6.0") - def setMin(self, value): + def setMin(self, value: float) -> "MinMaxScaler": """ Sets the value of :py:attr:`min`. """ return self._set(min=value) @since("1.6.0") - def setMax(self, value): + def setMax(self, value: float) -> "MinMaxScaler": """ Sets the value of :py:attr:`max`. """ return self._set(max=value) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "MinMaxScaler": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "MinMaxScaler": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "MinMaxScalerModel": return MinMaxScalerModel(java_model) -class MinMaxScalerModel(JavaModel, _MinMaxScalerParams, JavaMLReadable, JavaMLWritable): +class MinMaxScalerModel( + JavaModel, _MinMaxScalerParams, JavaMLReadable["MinMaxScalerModel"], JavaMLWritable +): """ Model fitted by :py:class:`MinMaxScaler`. @@ -2430,44 +2793,44 @@ class MinMaxScalerModel(JavaModel, _MinMaxScalerParams, JavaMLReadable, JavaMLWr """ @since("3.0.0") - def setInputCol(self, value): + def setInputCol(self, value: str) -> "MinMaxScalerModel": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) @since("3.0.0") - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "MinMaxScalerModel": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) @since("3.0.0") - def setMin(self, value): + def setMin(self, value: float) -> "MinMaxScalerModel": """ Sets the value of :py:attr:`min`. """ return self._set(min=value) @since("3.0.0") - def setMax(self, value): + def setMax(self, value: float) -> "MinMaxScalerModel": """ Sets the value of :py:attr:`max`. """ return self._set(max=value) - @property + @property # type: ignore[misc] @since("2.0.0") - def originalMin(self): + def originalMin(self) -> Vector: """ Min value for each original column during fitting. """ return self._call_java("originalMin") - @property + @property # type: ignore[misc] @since("2.0.0") - def originalMax(self): + def originalMax(self) -> Vector: """ Max value for each original column during fitting. """ @@ -2475,7 +2838,7 @@ def originalMax(self): @inherit_doc -class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): +class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable["NGram"], JavaMLWritable): """ A feature transformer that converts the input array of strings into an array of n-grams. Null values in the input array are ignored. @@ -2519,15 +2882,19 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWr True """ - n = Param( + n: Param[int] = Param( Params._dummy(), "n", "number of elements per n-gram (>=1)", typeConverter=TypeConverters.toInt, ) + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, n=2, inputCol=None, outputCol=None): + def __init__( + self, *, n: int = 2, inputCol: Optional[str] = None, outputCol: Optional[str] = None + ): """ __init__(self, \\*, n=2, inputCol=None, outputCol=None) """ @@ -2539,7 +2906,9 @@ def __init__(self, *, n=2, inputCol=None, outputCol=None): @keyword_only @since("1.5.0") - def setParams(self, *, n=2, inputCol=None, outputCol=None): + def setParams( + self, *, n: int = 2, inputCol: Optional[str] = None, outputCol: Optional[str] = None + ) -> "NGram": """ setParams(self, \\*, n=2, inputCol=None, outputCol=None) Sets params for this NGram. @@ -2548,26 +2917,26 @@ def setParams(self, *, n=2, inputCol=None, outputCol=None): return self._set(**kwargs) @since("1.5.0") - def setN(self, value): + def setN(self, value: int) -> "NGram": """ Sets the value of :py:attr:`n`. """ return self._set(n=value) @since("1.5.0") - def getN(self): + def getN(self) -> int: """ Gets the value of n or its default value. """ return self.getOrDefault(self.n) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "NGram": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "NGram": """ Sets the value of :py:attr:`outputCol`. """ @@ -2575,7 +2944,13 @@ def setOutputCol(self, value): @inherit_doc -class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): +class Normalizer( + JavaTransformer, + HasInputCol, + HasOutputCol, + JavaMLReadable["Normalizer"], + JavaMLWritable, +): """ Normalize a vector to have unit norm using the given p-norm. @@ -2609,8 +2984,12 @@ class Normalizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Jav p = Param(Params._dummy(), "p", "the p norm value.", typeConverter=TypeConverters.toFloat) + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, p=2.0, inputCol=None, outputCol=None): + def __init__( + self, *, p: float = 2.0, inputCol: Optional[str] = None, outputCol: Optional[str] = None + ): """ __init__(self, \\*, p=2.0, inputCol=None, outputCol=None) """ @@ -2622,7 +3001,9 @@ def __init__(self, *, p=2.0, inputCol=None, outputCol=None): @keyword_only @since("1.4.0") - def setParams(self, *, p=2.0, inputCol=None, outputCol=None): + def setParams( + self, *, p: float = 2.0, inputCol: Optional[str] = None, outputCol: Optional[str] = None + ) -> "Normalizer": """ setParams(self, \\*, p=2.0, inputCol=None, outputCol=None) Sets params for this Normalizer. @@ -2631,26 +3012,26 @@ def setParams(self, *, p=2.0, inputCol=None, outputCol=None): return self._set(**kwargs) @since("1.4.0") - def setP(self, value): + def setP(self, value: float) -> "Normalizer": """ Sets the value of :py:attr:`p`. """ return self._set(p=value) @since("1.4.0") - def getP(self): + def getP(self) -> float: """ Gets the value of p or its default value. """ return self.getOrDefault(self.p) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "Normalizer": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "Normalizer": """ Sets the value of :py:attr:`outputCol`. """ @@ -2666,7 +3047,7 @@ class _OneHotEncoderParams( .. versionadded:: 3.0.0 """ - handleInvalid = Param( + handleInvalid: Param[str] = Param( Params._dummy(), "handleInvalid", "How to handle invalid data during " @@ -2677,19 +3058,19 @@ class _OneHotEncoderParams( typeConverter=TypeConverters.toString, ) - dropLast = Param( + dropLast: Param[bool] = Param( Params._dummy(), "dropLast", "whether to drop the last category", typeConverter=TypeConverters.toBoolean, ) - def __init__(self, *args): + def __init__(self, *args: Any): super(_OneHotEncoderParams, self).__init__(*args) self._setDefault(handleInvalid="error", dropLast=True) @since("2.3.0") - def getDropLast(self): + def getDropLast(self) -> bool: """ Gets the value of dropLast or its default value. """ @@ -2697,7 +3078,12 @@ def getDropLast(self): @inherit_doc -class OneHotEncoder(JavaEstimator, _OneHotEncoderParams, JavaMLReadable, JavaMLWritable): +class OneHotEncoder( + JavaEstimator["OneHotEncoderModel"], + _OneHotEncoderParams, + JavaMLReadable["OneHotEncoder"], + JavaMLWritable, +): """ A one-hot encoder that maps a column of category indices to a column of binary vectors, with at most a single one-value per row that indicates the input category index. @@ -2760,16 +3146,40 @@ class OneHotEncoder(JavaEstimator, _OneHotEncoderParams, JavaMLReadable, JavaMLW True """ + _input_kwargs: Dict[str, Any] + + @overload + def __init__( + self, + *, + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + handleInvalid: str = ..., + dropLast: bool = ..., + ): + ... + + @overload + def __init__( + self, + *, + handleInvalid: str = ..., + dropLast: bool = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + ): + ... + @keyword_only def __init__( self, *, - inputCols=None, - outputCols=None, - handleInvalid="error", - dropLast=True, - inputCol=None, - outputCol=None, + inputCols: Optional[List[str]] = None, + outputCols: Optional[List[str]] = None, + handleInvalid: str = "error", + dropLast: bool = True, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, ): """ __init__(self, \\*, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True, \ @@ -2780,18 +3190,40 @@ def __init__( kwargs = self._input_kwargs self.setParams(**kwargs) + @overload + def setParams( + self, + *, + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + handleInvalid: str = ..., + dropLast: bool = ..., + ) -> "OneHotEncoder": + ... + + @overload + def setParams( + self, + *, + handleInvalid: str = ..., + dropLast: bool = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + ) -> "OneHotEncoder": + ... + @keyword_only @since("2.3.0") def setParams( self, *, - inputCols=None, - outputCols=None, - handleInvalid="error", - dropLast=True, - inputCol=None, - outputCol=None, - ): + inputCols: Optional[List[str]] = None, + outputCols: Optional[List[str]] = None, + handleInvalid: str = "error", + dropLast: bool = True, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + ) -> "OneHotEncoder": """ setParams(self, \\*, inputCols=None, outputCols=None, handleInvalid="error", \ dropLast=True, inputCol=None, outputCol=None) @@ -2801,52 +3233,54 @@ def setParams( return self._set(**kwargs) @since("2.3.0") - def setDropLast(self, value): + def setDropLast(self, value: bool) -> "OneHotEncoder": """ Sets the value of :py:attr:`dropLast`. """ return self._set(dropLast=value) @since("3.0.0") - def setInputCols(self, value): + def setInputCols(self, value: List[str]) -> "OneHotEncoder": """ Sets the value of :py:attr:`inputCols`. """ return self._set(inputCols=value) @since("3.0.0") - def setOutputCols(self, value): + def setOutputCols(self, value: List[str]) -> "OneHotEncoder": """ Sets the value of :py:attr:`outputCols`. """ return self._set(outputCols=value) @since("3.0.0") - def setHandleInvalid(self, value): + def setHandleInvalid(self, value: str) -> "OneHotEncoder": """ Sets the value of :py:attr:`handleInvalid`. """ return self._set(handleInvalid=value) @since("3.0.0") - def setInputCol(self, value): + def setInputCol(self, value: str) -> "OneHotEncoder": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) @since("3.0.0") - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "OneHotEncoder": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "OneHotEncoderModel": return OneHotEncoderModel(java_model) -class OneHotEncoderModel(JavaModel, _OneHotEncoderParams, JavaMLReadable, JavaMLWritable): +class OneHotEncoderModel( + JavaModel, _OneHotEncoderParams, JavaMLReadable["OneHotEncoderModel"], JavaMLWritable +): """ Model fitted by :py:class:`OneHotEncoder`. @@ -2854,50 +3288,50 @@ class OneHotEncoderModel(JavaModel, _OneHotEncoderParams, JavaMLReadable, JavaML """ @since("3.0.0") - def setDropLast(self, value): + def setDropLast(self, value: bool) -> "OneHotEncoderModel": """ Sets the value of :py:attr:`dropLast`. """ return self._set(dropLast=value) @since("3.0.0") - def setInputCols(self, value): + def setInputCols(self, value: List[str]) -> "OneHotEncoderModel": """ Sets the value of :py:attr:`inputCols`. """ return self._set(inputCols=value) @since("3.0.0") - def setOutputCols(self, value): + def setOutputCols(self, value: List[str]) -> "OneHotEncoderModel": """ Sets the value of :py:attr:`outputCols`. """ return self._set(outputCols=value) @since("3.0.0") - def setInputCol(self, value): + def setInputCol(self, value: str) -> "OneHotEncoderModel": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) @since("3.0.0") - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "OneHotEncoderModel": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) @since("3.0.0") - def setHandleInvalid(self, value): + def setHandleInvalid(self, value: str) -> "OneHotEncoderModel": """ Sets the value of :py:attr:`handleInvalid`. """ return self._set(handleInvalid=value) - @property + @property # type: ignore[misc] @since("2.3.0") - def categorySizes(self): + def categorySizes(self) -> List[int]: """ Original number of categories for each feature being encoded. The array contains one value for each input column, in order. @@ -2907,7 +3341,11 @@ def categorySizes(self): @inherit_doc class PolynomialExpansion( - JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable + JavaTransformer, + HasInputCol, + HasOutputCol, + JavaMLReadable["PolynomialExpansion"], + JavaMLWritable, ): """ Perform feature expansion in a polynomial space. As said in `wikipedia of Polynomial Expansion @@ -2940,15 +3378,19 @@ class PolynomialExpansion( True """ - degree = Param( + degree: Param[int] = Param( Params._dummy(), "degree", "the polynomial degree to expand (>= 1)", typeConverter=TypeConverters.toInt, ) + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, degree=2, inputCol=None, outputCol=None): + def __init__( + self, *, degree: int = 2, inputCol: Optional[str] = None, outputCol: Optional[str] = None + ): """ __init__(self, \\*, degree=2, inputCol=None, outputCol=None) """ @@ -2962,7 +3404,9 @@ def __init__(self, *, degree=2, inputCol=None, outputCol=None): @keyword_only @since("1.4.0") - def setParams(self, *, degree=2, inputCol=None, outputCol=None): + def setParams( + self, *, degree: int = 2, inputCol: Optional[str] = None, outputCol: Optional[str] = None + ) -> "PolynomialExpansion": """ setParams(self, \\*, degree=2, inputCol=None, outputCol=None) Sets params for this PolynomialExpansion. @@ -2971,26 +3415,26 @@ def setParams(self, *, degree=2, inputCol=None, outputCol=None): return self._set(**kwargs) @since("1.4.0") - def setDegree(self, value): + def setDegree(self, value: int) -> "PolynomialExpansion": """ Sets the value of :py:attr:`degree`. """ return self._set(degree=value) @since("1.4.0") - def getDegree(self): + def getDegree(self) -> int: """ Gets the value of degree or its default value. """ return self.getOrDefault(self.degree) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "PolynomialExpansion": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "PolynomialExpansion": """ Sets the value of :py:attr:`outputCol`. """ @@ -3006,7 +3450,7 @@ class QuantileDiscretizer( HasOutputCols, HasHandleInvalid, HasRelativeError, - JavaMLReadable, + JavaMLReadable["QuantileDiscretizer"], JavaMLWritable, ): """ @@ -3102,7 +3546,7 @@ class QuantileDiscretizer( ... """ - numBuckets = Param( + numBuckets: Param[int] = Param( Params._dummy(), "numBuckets", "Maximum number of buckets (quantiles, or " @@ -3110,7 +3554,7 @@ class QuantileDiscretizer( typeConverter=TypeConverters.toInt, ) - handleInvalid = Param( + handleInvalid: Param[str] = Param( Params._dummy(), "handleInvalid", "how to handle invalid entries. " @@ -3124,7 +3568,7 @@ class QuantileDiscretizer( typeConverter=TypeConverters.toString, ) - numBucketsArray = Param( + numBucketsArray: Param[List[int]] = Param( Params._dummy(), "numBucketsArray", "Array of number of buckets " @@ -3135,18 +3579,44 @@ class QuantileDiscretizer( typeConverter=TypeConverters.toListInt, ) + _input_kwargs: Dict[str, Any] + + @overload + def __init__( + self, + *, + numBuckets: int = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + relativeError: float = ..., + handleInvalid: str = ..., + ): + ... + + @overload + def __init__( + self, + *, + relativeError: float = ..., + handleInvalid: str = ..., + numBucketsArray: Optional[List[int]] = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + ): + ... + @keyword_only def __init__( self, *, - numBuckets=2, - inputCol=None, - outputCol=None, - relativeError=0.001, - handleInvalid="error", - numBucketsArray=None, - inputCols=None, - outputCols=None, + numBuckets: int = 2, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + relativeError: float = 0.001, + handleInvalid: str = "error", + numBucketsArray: Optional[List[int]] = None, + inputCols: Optional[List[str]] = None, + outputCols: Optional[List[str]] = None, ): """ __init__(self, \\*, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001, \ @@ -3160,20 +3630,44 @@ def __init__( kwargs = self._input_kwargs self.setParams(**kwargs) + @overload + def setParams( + self, + *, + numBuckets: int = ..., + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + relativeError: float = ..., + handleInvalid: str = ..., + ) -> "QuantileDiscretizer": + ... + + @overload + def setParams( + self, + *, + relativeError: float = ..., + handleInvalid: str = ..., + numBucketsArray: Optional[List[int]] = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + ) -> "QuantileDiscretizer": + ... + @keyword_only @since("2.0.0") def setParams( self, *, - numBuckets=2, - inputCol=None, - outputCol=None, - relativeError=0.001, - handleInvalid="error", - numBucketsArray=None, - inputCols=None, - outputCols=None, - ): + numBuckets: int = 2, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + relativeError: float = 0.001, + handleInvalid: str = "error", + numBucketsArray: Optional[List[int]] = None, + inputCols: Optional[List[str]] = None, + outputCols: Optional[List[str]] = None, + ) -> "QuantileDiscretizer": """ setParams(self, \\*, numBuckets=2, inputCol=None, outputCol=None, relativeError=0.001, \ handleInvalid="error", numBucketsArray=None, inputCols=None, outputCols=None) @@ -3183,73 +3677,73 @@ def setParams( return self._set(**kwargs) @since("2.0.0") - def setNumBuckets(self, value): + def setNumBuckets(self, value: int) -> "QuantileDiscretizer": """ Sets the value of :py:attr:`numBuckets`. """ return self._set(numBuckets=value) @since("2.0.0") - def getNumBuckets(self): + def getNumBuckets(self) -> int: """ Gets the value of numBuckets or its default value. """ return self.getOrDefault(self.numBuckets) @since("3.0.0") - def setNumBucketsArray(self, value): + def setNumBucketsArray(self, value: List[int]) -> "QuantileDiscretizer": """ Sets the value of :py:attr:`numBucketsArray`. """ return self._set(numBucketsArray=value) @since("3.0.0") - def getNumBucketsArray(self): + def getNumBucketsArray(self) -> List[int]: """ Gets the value of numBucketsArray or its default value. """ return self.getOrDefault(self.numBucketsArray) @since("2.0.0") - def setRelativeError(self, value): + def setRelativeError(self, value: float) -> "QuantileDiscretizer": """ Sets the value of :py:attr:`relativeError`. """ return self._set(relativeError=value) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "QuantileDiscretizer": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) @since("3.0.0") - def setInputCols(self, value): + def setInputCols(self, value: List[str]) -> "QuantileDiscretizer": """ Sets the value of :py:attr:`inputCols`. """ return self._set(inputCols=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "QuantileDiscretizer": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) @since("3.0.0") - def setOutputCols(self, value): + def setOutputCols(self, value: List[str]) -> "QuantileDiscretizer": """ Sets the value of :py:attr:`outputCols`. """ return self._set(outputCols=value) - def setHandleInvalid(self, value): + def setHandleInvalid(self, value: str) -> "QuantileDiscretizer": """ Sets the value of :py:attr:`handleInvalid`. """ return self._set(handleInvalid=value) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> Bucketizer: """ Private method to convert the java_model to a Python model. """ @@ -3277,60 +3771,60 @@ class _RobustScalerParams(HasInputCol, HasOutputCol, HasRelativeError): .. versionadded:: 3.0.0 """ - lower = Param( + lower: Param[float] = Param( Params._dummy(), "lower", "Lower quantile to calculate quantile range", typeConverter=TypeConverters.toFloat, ) - upper = Param( + upper: Param[float] = Param( Params._dummy(), "upper", "Upper quantile to calculate quantile range", typeConverter=TypeConverters.toFloat, ) - withCentering = Param( + withCentering: Param[bool] = Param( Params._dummy(), "withCentering", "Whether to center data with median", typeConverter=TypeConverters.toBoolean, ) - withScaling = Param( + withScaling: Param[bool] = Param( Params._dummy(), "withScaling", "Whether to scale the data to " "quantile range", typeConverter=TypeConverters.toBoolean, ) - def __init__(self, *args): + def __init__(self, *args: Any): super(_RobustScalerParams, self).__init__(*args) self._setDefault( lower=0.25, upper=0.75, withCentering=False, withScaling=True, relativeError=0.001 ) @since("3.0.0") - def getLower(self): + def getLower(self) -> float: """ Gets the value of lower or its default value. """ return self.getOrDefault(self.lower) @since("3.0.0") - def getUpper(self): + def getUpper(self) -> float: """ Gets the value of upper or its default value. """ return self.getOrDefault(self.upper) @since("3.0.0") - def getWithCentering(self): + def getWithCentering(self) -> bool: """ Gets the value of withCentering or its default value. """ return self.getOrDefault(self.withCentering) @since("3.0.0") - def getWithScaling(self): + def getWithScaling(self) -> bool: """ Gets the value of withScaling or its default value. """ @@ -3338,7 +3832,9 @@ def getWithScaling(self): @inherit_doc -class RobustScaler(JavaEstimator, _RobustScalerParams, JavaMLReadable, JavaMLWritable): +class RobustScaler( + JavaEstimator, _RobustScalerParams, JavaMLReadable["RobustScaler"], JavaMLWritable +): """ RobustScaler removes the median and scales the data according to the quantile range. The quantile range is by default IQR (Interquartile Range, quantile range between the @@ -3391,17 +3887,19 @@ class RobustScaler(JavaEstimator, _RobustScalerParams, JavaMLReadable, JavaMLWri True """ + _input_kwargs: Dict[str, Any] + @keyword_only def __init__( self, *, - lower=0.25, - upper=0.75, - withCentering=False, - withScaling=True, - inputCol=None, - outputCol=None, - relativeError=0.001, + lower: float = 0.25, + upper: float = 0.75, + withCentering: bool = False, + withScaling: bool = True, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + relativeError: float = 0.001, ): """ __init__(self, \\*, lower=0.25, upper=0.75, withCentering=False, withScaling=True, \ @@ -3417,14 +3915,14 @@ def __init__( def setParams( self, *, - lower=0.25, - upper=0.75, - withCentering=False, - withScaling=True, - inputCol=None, - outputCol=None, - relativeError=0.001, - ): + lower: float = 0.25, + upper: float = 0.75, + withCentering: bool = False, + withScaling: bool = True, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + relativeError: float = 0.001, + ) -> "RobustScaler": """ setParams(self, \\*, lower=0.25, upper=0.75, withCentering=False, withScaling=True, \ inputCol=None, outputCol=None, relativeError=0.001) @@ -3434,59 +3932,61 @@ def setParams( return self._set(**kwargs) @since("3.0.0") - def setLower(self, value): + def setLower(self, value: float) -> "RobustScaler": """ Sets the value of :py:attr:`lower`. """ return self._set(lower=value) @since("3.0.0") - def setUpper(self, value): + def setUpper(self, value: float) -> "RobustScaler": """ Sets the value of :py:attr:`upper`. """ return self._set(upper=value) @since("3.0.0") - def setWithCentering(self, value): + def setWithCentering(self, value: bool) -> "RobustScaler": """ Sets the value of :py:attr:`withCentering`. """ return self._set(withCentering=value) @since("3.0.0") - def setWithScaling(self, value): + def setWithScaling(self, value: bool) -> "RobustScaler": """ Sets the value of :py:attr:`withScaling`. """ return self._set(withScaling=value) @since("3.0.0") - def setInputCol(self, value): + def setInputCol(self, value: str) -> "RobustScaler": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) @since("3.0.0") - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "RobustScaler": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) @since("3.0.0") - def setRelativeError(self, value): + def setRelativeError(self, value: float) -> "RobustScaler": """ Sets the value of :py:attr:`relativeError`. """ return self._set(relativeError=value) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "RobustScalerModel": return RobustScalerModel(java_model) -class RobustScalerModel(JavaModel, _RobustScalerParams, JavaMLReadable, JavaMLWritable): +class RobustScalerModel( + JavaModel, _RobustScalerParams, JavaMLReadable["RobustScalerModel"], JavaMLWritable +): """ Model fitted by :py:class:`RobustScaler`. @@ -3494,30 +3994,30 @@ class RobustScalerModel(JavaModel, _RobustScalerParams, JavaMLReadable, JavaMLWr """ @since("3.0.0") - def setInputCol(self, value): + def setInputCol(self, value: str) -> "RobustScalerModel": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) @since("3.0.0") - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "RobustScalerModel": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - @property + @property # type: ignore[misc] @since("3.0.0") - def median(self): + def median(self) -> Vector: """ Median of the RobustScalerModel. """ return self._call_java("median") - @property + @property # type: ignore[misc] @since("3.0.0") - def range(self): + def range(self) -> Vector: """ Quantile range of the RobustScalerModel. """ @@ -3525,7 +4025,13 @@ def range(self): @inherit_doc -class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): +class RegexTokenizer( + JavaTransformer, + HasInputCol, + HasOutputCol, + JavaMLReadable["RegexTokenizer"], + JavaMLWritable, +): """ A regex based tokenizer that extracts tokens either by using the provided regex pattern (in Java dialect) to split the text @@ -3570,40 +4076,42 @@ class RegexTokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, True """ - minTokenLength = Param( + minTokenLength: Param[int] = Param( Params._dummy(), "minTokenLength", "minimum token length (>= 0)", typeConverter=TypeConverters.toInt, ) - gaps = Param( + gaps: Param[bool] = Param( Params._dummy(), "gaps", "whether regex splits on gaps (True) or matches tokens " + "(False)", ) - pattern = Param( + pattern: Param[str] = Param( Params._dummy(), "pattern", "regex pattern (Java dialect) used for tokenizing", typeConverter=TypeConverters.toString, ) - toLowercase = Param( + toLowercase: Param[bool] = Param( Params._dummy(), "toLowercase", "whether to convert all characters to " + "lowercase before tokenizing", typeConverter=TypeConverters.toBoolean, ) + _input_kwargs: Dict[str, Any] + @keyword_only def __init__( self, *, - minTokenLength=1, - gaps=True, - pattern="\\s+", - inputCol=None, - outputCol=None, - toLowercase=True, + minTokenLength: int = 1, + gaps: bool = True, + pattern: str = "\\s+", + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + toLowercase: bool = True, ): """ __init__(self, \\*, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, \ @@ -3620,13 +4128,13 @@ def __init__( def setParams( self, *, - minTokenLength=1, - gaps=True, - pattern="\\s+", - inputCol=None, - outputCol=None, - toLowercase=True, - ): + minTokenLength: int = 1, + gaps: bool = True, + pattern: str = "\\s+", + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + toLowercase: bool = True, + ) -> "RegexTokenizer": """ setParams(self, \\*, minTokenLength=1, gaps=True, pattern="\\s+", inputCol=None, \ outputCol=None, toLowercase=True) @@ -3636,68 +4144,68 @@ def setParams( return self._set(**kwargs) @since("1.4.0") - def setMinTokenLength(self, value): + def setMinTokenLength(self, value: int) -> "RegexTokenizer": """ Sets the value of :py:attr:`minTokenLength`. """ return self._set(minTokenLength=value) @since("1.4.0") - def getMinTokenLength(self): + def getMinTokenLength(self) -> int: """ Gets the value of minTokenLength or its default value. """ return self.getOrDefault(self.minTokenLength) @since("1.4.0") - def setGaps(self, value): + def setGaps(self, value: bool) -> "RegexTokenizer": """ Sets the value of :py:attr:`gaps`. """ return self._set(gaps=value) @since("1.4.0") - def getGaps(self): + def getGaps(self) -> bool: """ Gets the value of gaps or its default value. """ return self.getOrDefault(self.gaps) @since("1.4.0") - def setPattern(self, value): + def setPattern(self, value: str) -> "RegexTokenizer": """ Sets the value of :py:attr:`pattern`. """ return self._set(pattern=value) @since("1.4.0") - def getPattern(self): + def getPattern(self) -> str: """ Gets the value of pattern or its default value. """ return self.getOrDefault(self.pattern) @since("2.0.0") - def setToLowercase(self, value): + def setToLowercase(self, value: bool) -> "RegexTokenizer": """ Sets the value of :py:attr:`toLowercase`. """ return self._set(toLowercase=value) @since("2.0.0") - def getToLowercase(self): + def getToLowercase(self) -> bool: """ Gets the value of toLowercase or its default value. """ return self.getOrDefault(self.toLowercase) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "RegexTokenizer": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "RegexTokenizer": """ Sets the value of :py:attr:`outputCol`. """ @@ -3705,7 +4213,7 @@ def setOutputCol(self, value): @inherit_doc -class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable): +class SQLTransformer(JavaTransformer, JavaMLReadable["SQLTransformer"], JavaMLWritable): """ Implements the transforms which are defined by SQL statement. Currently we only support SQL syntax like `SELECT ... FROM __THIS__` @@ -3733,8 +4241,10 @@ class SQLTransformer(JavaTransformer, JavaMLReadable, JavaMLWritable): Params._dummy(), "statement", "SQL statement", typeConverter=TypeConverters.toString ) + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, statement=None): + def __init__(self, *, statement: Optional[str] = None): """ __init__(self, \\*, statement=None) """ @@ -3745,7 +4255,7 @@ def __init__(self, *, statement=None): @keyword_only @since("1.6.0") - def setParams(self, *, statement=None): + def setParams(self, *, statement: Optional[str] = None) -> "SQLTransformer": """ setParams(self, \\*, statement=None) Sets params for this SQLTransformer. @@ -3754,14 +4264,14 @@ def setParams(self, *, statement=None): return self._set(**kwargs) @since("1.6.0") - def setStatement(self, value): + def setStatement(self, value: str) -> "SQLTransformer": """ Sets the value of :py:attr:`statement`. """ return self._set(statement=value) @since("1.6.0") - def getStatement(self): + def getStatement(self) -> str: """ Gets the value of statement or its default value. """ @@ -3775,29 +4285,29 @@ class _StandardScalerParams(HasInputCol, HasOutputCol): .. versionadded:: 3.0.0 """ - withMean = Param( + withMean: Param[bool] = Param( Params._dummy(), "withMean", "Center data with mean", typeConverter=TypeConverters.toBoolean ) - withStd = Param( + withStd: Param[bool] = Param( Params._dummy(), "withStd", "Scale to unit standard deviation", typeConverter=TypeConverters.toBoolean, ) - def __init__(self, *args): + def __init__(self, *args: Any): super(_StandardScalerParams, self).__init__(*args) self._setDefault(withMean=False, withStd=True) @since("1.4.0") - def getWithMean(self): + def getWithMean(self) -> bool: """ Gets the value of withMean or its default value. """ return self.getOrDefault(self.withMean) @since("1.4.0") - def getWithStd(self): + def getWithStd(self) -> bool: """ Gets the value of withStd or its default value. """ @@ -3805,7 +4315,12 @@ def getWithStd(self): @inherit_doc -class StandardScaler(JavaEstimator, _StandardScalerParams, JavaMLReadable, JavaMLWritable): +class StandardScaler( + JavaEstimator["StandardScalerModel"], + _StandardScalerParams, + JavaMLReadable["StandardScaler"], + JavaMLWritable, +): """ Standardizes features by removing the mean and scaling to unit variance using column summary statistics on the samples in the training set. @@ -3854,8 +4369,17 @@ class StandardScaler(JavaEstimator, _StandardScalerParams, JavaMLReadable, JavaM True """ + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, withMean=False, withStd=True, inputCol=None, outputCol=None): + def __init__( + self, + *, + withMean: bool = False, + withStd: bool = True, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + ): """ __init__(self, \\*, withMean=False, withStd=True, inputCol=None, outputCol=None) """ @@ -3866,7 +4390,14 @@ def __init__(self, *, withMean=False, withStd=True, inputCol=None, outputCol=Non @keyword_only @since("1.4.0") - def setParams(self, *, withMean=False, withStd=True, inputCol=None, outputCol=None): + def setParams( + self, + *, + withMean: bool = False, + withStd: bool = True, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + ) -> "StandardScaler": """ setParams(self, \\*, withMean=False, withStd=True, inputCol=None, outputCol=None) Sets params for this StandardScaler. @@ -3875,65 +4406,70 @@ def setParams(self, *, withMean=False, withStd=True, inputCol=None, outputCol=No return self._set(**kwargs) @since("1.4.0") - def setWithMean(self, value): + def setWithMean(self, value: bool) -> "StandardScaler": """ Sets the value of :py:attr:`withMean`. """ return self._set(withMean=value) @since("1.4.0") - def setWithStd(self, value): + def setWithStd(self, value: bool) -> "StandardScaler": """ Sets the value of :py:attr:`withStd`. """ return self._set(withStd=value) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "StandardScaler": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "StandardScaler": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "StandardScalerModel": return StandardScalerModel(java_model) -class StandardScalerModel(JavaModel, _StandardScalerParams, JavaMLReadable, JavaMLWritable): +class StandardScalerModel( + JavaModel, + _StandardScalerParams, + JavaMLReadable["StandardScalerModel"], + JavaMLWritable, +): """ Model fitted by :py:class:`StandardScaler`. .. versionadded:: 1.4.0 """ - def setInputCol(self, value): + def setInputCol(self, value: str) -> "StandardScalerModel": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "StandardScalerModel": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - @property + @property # type: ignore[misc] @since("2.0.0") - def std(self): + def std(self) -> Vector: """ Standard deviation of the StandardScalerModel. """ return self._call_java("std") - @property + @property # type: ignore[misc] @since("2.0.0") - def mean(self): + def mean(self) -> Vector: """ Mean of the StandardScalerModel. """ @@ -3947,7 +4483,7 @@ class _StringIndexerParams( Params for :py:class:`StringIndexer` and :py:class:`StringIndexerModel`. """ - stringOrderType = Param( + stringOrderType: Param[str] = Param( Params._dummy(), "stringOrderType", "How to order labels of string column. The first label after " @@ -3959,7 +4495,7 @@ class _StringIndexerParams( typeConverter=TypeConverters.toString, ) - handleInvalid = Param( + handleInvalid: Param[str] = Param( Params._dummy(), "handleInvalid", "how to handle invalid data (unseen " @@ -3970,12 +4506,12 @@ class _StringIndexerParams( typeConverter=TypeConverters.toString, ) - def __init__(self, *args): + def __init__(self, *args: Any): super(_StringIndexerParams, self).__init__(*args) self._setDefault(handleInvalid="error", stringOrderType="frequencyDesc") @since("2.3.0") - def getStringOrderType(self): + def getStringOrderType(self) -> str: """ Gets the value of :py:attr:`stringOrderType` or its default value 'frequencyDesc'. """ @@ -3983,7 +4519,12 @@ def getStringOrderType(self): @inherit_doc -class StringIndexer(JavaEstimator, _StringIndexerParams, JavaMLReadable, JavaMLWritable): +class StringIndexer( + JavaEstimator["StringIndexerModel"], + _StringIndexerParams, + JavaMLReadable["StringIndexer"], + JavaMLWritable, +): """ A label indexer that maps a string column of labels to an ML column of label indices. If the input column is numeric, we cast it to string and index the string values. @@ -4066,16 +4607,40 @@ class StringIndexer(JavaEstimator, _StringIndexerParams, JavaMLReadable, JavaMLW [(0, 0.0, 0.0), (1, 1.0, 1.0), (2, 2.0, 0.0), (3, 0.0, 1.0), (4, 0.0, 1.0), (5, 2.0, 1.0)] """ + _input_kwargs: Dict[str, Any] + + @overload + def __init__( + self, + *, + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + handleInvalid: str = ..., + stringOrderType: str = ..., + ): + ... + + @overload + def __init__( + self, + *, + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + handleInvalid: str = ..., + stringOrderType: str = ..., + ): + ... + @keyword_only def __init__( self, *, - inputCol=None, - outputCol=None, - inputCols=None, - outputCols=None, - handleInvalid="error", - stringOrderType="frequencyDesc", + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + inputCols: Optional[List[str]] = None, + outputCols: Optional[List[str]] = None, + handleInvalid: str = "error", + stringOrderType: str = "frequencyDesc", ): """ __init__(self, \\*, inputCol=None, outputCol=None, inputCols=None, outputCols=None, \ @@ -4086,18 +4651,40 @@ def __init__( kwargs = self._input_kwargs self.setParams(**kwargs) + @overload + def setParams( + self, + *, + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + handleInvalid: str = ..., + stringOrderType: str = ..., + ) -> "StringIndexer": + ... + + @overload + def setParams( + self, + *, + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + handleInvalid: str = ..., + stringOrderType: str = ..., + ) -> "StringIndexer": + ... + @keyword_only @since("1.4.0") def setParams( self, *, - inputCol=None, - outputCol=None, - inputCols=None, - outputCols=None, - handleInvalid="error", - stringOrderType="frequencyDesc", - ): + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + inputCols: Optional[List[str]] = None, + outputCols: Optional[List[str]] = None, + handleInvalid: str = "error", + stringOrderType: str = "frequencyDesc", + ) -> "StringIndexer": """ setParams(self, \\*, inputCol=None, outputCol=None, inputCols=None, outputCols=None, \ handleInvalid="error", stringOrderType="frequencyDesc") @@ -4106,84 +4693,86 @@ def setParams( kwargs = self._input_kwargs return self._set(**kwargs) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "StringIndexerModel": return StringIndexerModel(java_model) @since("2.3.0") - def setStringOrderType(self, value): + def setStringOrderType(self, value: str) -> "StringIndexer": """ Sets the value of :py:attr:`stringOrderType`. """ return self._set(stringOrderType=value) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "StringIndexer": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) @since("3.0.0") - def setInputCols(self, value): + def setInputCols(self, value: List[str]) -> "StringIndexer": """ Sets the value of :py:attr:`inputCols`. """ return self._set(inputCols=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "StringIndexer": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) @since("3.0.0") - def setOutputCols(self, value): + def setOutputCols(self, value: List[str]) -> "StringIndexer": """ Sets the value of :py:attr:`outputCols`. """ return self._set(outputCols=value) - def setHandleInvalid(self, value): + def setHandleInvalid(self, value: str) -> "StringIndexer": """ Sets the value of :py:attr:`handleInvalid`. """ return self._set(handleInvalid=value) -class StringIndexerModel(JavaModel, _StringIndexerParams, JavaMLReadable, JavaMLWritable): +class StringIndexerModel( + JavaModel, _StringIndexerParams, JavaMLReadable["StringIndexerModel"], JavaMLWritable +): """ Model fitted by :py:class:`StringIndexer`. .. versionadded:: 1.4.0 """ - def setInputCol(self, value): + def setInputCol(self, value: str) -> "StringIndexerModel": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) @since("3.0.0") - def setInputCols(self, value): + def setInputCols(self, value: List[str]) -> "StringIndexerModel": """ Sets the value of :py:attr:`inputCols`. """ return self._set(inputCols=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "StringIndexerModel": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) @since("3.0.0") - def setOutputCols(self, value): + def setOutputCols(self, value: List[str]) -> "StringIndexerModel": """ Sets the value of :py:attr:`outputCols`. """ return self._set(outputCols=value) @since("2.4.0") - def setHandleInvalid(self, value): + def setHandleInvalid(self, value: str) -> "StringIndexerModel": """ Sets the value of :py:attr:`handleInvalid`. """ @@ -4191,12 +4780,19 @@ def setHandleInvalid(self, value): @classmethod @since("2.4.0") - def from_labels(cls, labels, inputCol, outputCol=None, handleInvalid=None): + def from_labels( + cls, + labels: List[str], + inputCol: str, + outputCol: Optional[str] = None, + handleInvalid: Optional[str] = None, + ) -> "StringIndexerModel": """ Construct the model directly from an array of label strings, requires an active SparkContext. """ sc = SparkContext._active_spark_context + assert sc is not None and sc._gateway is not None java_class = sc._gateway.jvm.java.lang.String jlabels = StringIndexerModel._new_java_array(labels, java_class) model = StringIndexerModel._create_from_java_class( @@ -4211,12 +4807,19 @@ def from_labels(cls, labels, inputCol, outputCol=None, handleInvalid=None): @classmethod @since("3.0.0") - def from_arrays_of_labels(cls, arrayOfLabels, inputCols, outputCols=None, handleInvalid=None): + def from_arrays_of_labels( + cls, + arrayOfLabels: List[List[str]], + inputCols: List[str], + outputCols: Optional[List[str]] = None, + handleInvalid: Optional[str] = None, + ) -> "StringIndexerModel": """ Construct the model directly from an array of array of label strings, requires an active SparkContext. """ sc = SparkContext._active_spark_context + assert sc is not None and sc._gateway is not None java_class = sc._gateway.jvm.java.lang.String jlabels = StringIndexerModel._new_java_array(arrayOfLabels, java_class) model = StringIndexerModel._create_from_java_class( @@ -4229,9 +4832,9 @@ def from_arrays_of_labels(cls, arrayOfLabels, inputCols, outputCols=None, handle model.setHandleInvalid(handleInvalid) return model - @property + @property # type: ignore[misc] @since("1.5.0") - def labels(self): + def labels(self) -> List[str]: """ Ordered list of labels, corresponding to indices to be assigned. @@ -4240,9 +4843,9 @@ def labels(self): """ return self._call_java("labels") - @property + @property # type: ignore[misc] @since("3.0.2") - def labelsArray(self): + def labelsArray(self) -> List[str]: """ Array of ordered list of labels, corresponding to indices to be assigned for each input column. @@ -4251,7 +4854,13 @@ def labelsArray(self): @inherit_doc -class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): +class IndexToString( + JavaTransformer, + HasInputCol, + HasOutputCol, + JavaMLReadable["IndexToString"], + JavaMLWritable, +): """ A :py:class:`pyspark.ml.base.Transformer` that maps a column of indices back to a new column of corresponding string values. @@ -4265,7 +4874,7 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, StringIndexer : for converting categorical values into category indices """ - labels = Param( + labels: Param[List[str]] = Param( Params._dummy(), "labels", "Optional array of labels specifying index-string mapping." @@ -4273,8 +4882,16 @@ class IndexToString(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, typeConverter=TypeConverters.toListString, ) + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, inputCol=None, outputCol=None, labels=None): + def __init__( + self, + *, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + labels: Optional[List[str]] = None, + ): """ __init__(self, \\*, inputCol=None, outputCol=None, labels=None) """ @@ -4285,7 +4902,13 @@ def __init__(self, *, inputCol=None, outputCol=None, labels=None): @keyword_only @since("1.6.0") - def setParams(self, *, inputCol=None, outputCol=None, labels=None): + def setParams( + self, + *, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + labels: Optional[List[str]] = None, + ) -> "IndexToString": """ setParams(self, \\*, inputCol=None, outputCol=None, labels=None) Sets params for this IndexToString. @@ -4294,26 +4917,26 @@ def setParams(self, *, inputCol=None, outputCol=None, labels=None): return self._set(**kwargs) @since("1.6.0") - def setLabels(self, value): + def setLabels(self, value: List[str]) -> "IndexToString": """ Sets the value of :py:attr:`labels`. """ return self._set(labels=value) @since("1.6.0") - def getLabels(self): + def getLabels(self) -> List[str]: """ Gets the value of :py:attr:`labels` or its default value. """ return self.getOrDefault(self.labels) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "IndexToString": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "IndexToString": """ Sets the value of :py:attr:`outputCol`. """ @@ -4326,7 +4949,7 @@ class StopWordsRemover( HasOutputCol, HasInputCols, HasOutputCols, - JavaMLReadable, + JavaMLReadable["StopWordsRemover"], JavaMLWritable, ): """ @@ -4373,36 +4996,62 @@ class StopWordsRemover( ... """ - stopWords = Param( + stopWords: Param[List[str]] = Param( Params._dummy(), "stopWords", "The words to be filtered out", typeConverter=TypeConverters.toListString, ) - caseSensitive = Param( + caseSensitive: Param[bool] = Param( Params._dummy(), "caseSensitive", "whether to do a case sensitive " + "comparison over the stop words", typeConverter=TypeConverters.toBoolean, ) - locale = Param( + locale: Param[str] = Param( Params._dummy(), "locale", "locale of the input. ignored when case sensitive " + "is true", typeConverter=TypeConverters.toString, ) + _input_kwargs: Dict[str, Any] + + @overload + def __init__( + self, + *, + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + stopWords: Optional[List[str]] = ..., + caseSensitive: bool = ..., + locale: Optional[str] = ..., + ): + ... + + @overload + def __init__( + self, + *, + stopWords: Optional[List[str]] = ..., + caseSensitive: bool = ..., + locale: Optional[str] = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + ): + ... + @keyword_only def __init__( self, *, - inputCol=None, - outputCol=None, - stopWords=None, - caseSensitive=False, - locale=None, - inputCols=None, - outputCols=None, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + stopWords: Optional[List[str]] = None, + caseSensitive: bool = False, + locale: Optional[str] = None, + inputCols: Optional[List[str]] = None, + outputCols: Optional[List[str]] = None, ): """ __init__(self, \\*, inputCol=None, outputCol=None, stopWords=None, caseSensitive=false, \ @@ -4420,19 +5069,43 @@ def __init__( kwargs = self._input_kwargs self.setParams(**kwargs) + @overload + def setParams( + self, + *, + inputCol: Optional[str] = ..., + outputCol: Optional[str] = ..., + stopWords: Optional[List[str]] = ..., + caseSensitive: bool = ..., + locale: Optional[str] = ..., + ) -> "StopWordsRemover": + ... + + @overload + def setParams( + self, + *, + stopWords: Optional[List[str]] = ..., + caseSensitive: bool = ..., + locale: Optional[str] = ..., + inputCols: Optional[List[str]] = ..., + outputCols: Optional[List[str]] = ..., + ) -> "StopWordsRemover": + ... + @keyword_only @since("1.6.0") def setParams( self, *, - inputCol=None, - outputCol=None, - stopWords=None, - caseSensitive=False, - locale=None, - inputCols=None, - outputCols=None, - ): + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + stopWords: Optional[List[str]] = None, + caseSensitive: bool = False, + locale: Optional[str] = None, + inputCols: Optional[List[str]] = None, + outputCols: Optional[List[str]] = None, + ) -> "StopWordsRemover": """ setParams(self, \\*, inputCol=None, outputCol=None, stopWords=None, caseSensitive=false, \ locale=None, inputCols=None, outputCols=None) @@ -4442,68 +5115,68 @@ def setParams( return self._set(**kwargs) @since("1.6.0") - def setStopWords(self, value): + def setStopWords(self, value: List[str]) -> "StopWordsRemover": """ Sets the value of :py:attr:`stopWords`. """ return self._set(stopWords=value) @since("1.6.0") - def getStopWords(self): + def getStopWords(self) -> List[str]: """ Gets the value of :py:attr:`stopWords` or its default value. """ return self.getOrDefault(self.stopWords) @since("1.6.0") - def setCaseSensitive(self, value): + def setCaseSensitive(self, value: bool) -> "StopWordsRemover": """ Sets the value of :py:attr:`caseSensitive`. """ return self._set(caseSensitive=value) @since("1.6.0") - def getCaseSensitive(self): + def getCaseSensitive(self) -> bool: """ Gets the value of :py:attr:`caseSensitive` or its default value. """ return self.getOrDefault(self.caseSensitive) @since("2.4.0") - def setLocale(self, value): + def setLocale(self, value: str) -> "StopWordsRemover": """ Sets the value of :py:attr:`locale`. """ return self._set(locale=value) @since("2.4.0") - def getLocale(self): + def getLocale(self) -> str: """ Gets the value of :py:attr:`locale`. """ return self.getOrDefault(self.locale) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "StopWordsRemover": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "StopWordsRemover": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) @since("3.0.0") - def setInputCols(self, value): + def setInputCols(self, value: List[str]) -> "StopWordsRemover": """ Sets the value of :py:attr:`inputCols`. """ return self._set(inputCols=value) @since("3.0.0") - def setOutputCols(self, value): + def setOutputCols(self, value: List[str]) -> "StopWordsRemover": """ Sets the value of :py:attr:`outputCols`. """ @@ -4511,7 +5184,7 @@ def setOutputCols(self, value): @staticmethod @since("2.0.0") - def loadDefaultStopWords(language): + def loadDefaultStopWords(language: str) -> List[str]: """ Loads the default stop words for the given language. Supported languages: danish, dutch, english, finnish, french, german, hungarian, @@ -4522,7 +5195,13 @@ def loadDefaultStopWords(language): @inherit_doc -class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): +class Tokenizer( + JavaTransformer, + HasInputCol, + HasOutputCol, + JavaMLReadable["Tokenizer"], + JavaMLWritable, +): """ A tokenizer that converts the input string to lowercase and then splits it by white spaces. @@ -4557,8 +5236,10 @@ class Tokenizer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, Java True """ + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, inputCol=None, outputCol=None): + def __init__(self, *, inputCol: Optional[str] = None, outputCol: Optional[str] = None): """ __init__(self, \\*, inputCol=None, outputCol=None) """ @@ -4569,7 +5250,9 @@ def __init__(self, *, inputCol=None, outputCol=None): @keyword_only @since("1.3.0") - def setParams(self, *, inputCol=None, outputCol=None): + def setParams( + self, *, inputCol: Optional[str] = None, outputCol: Optional[str] = None + ) -> "Tokenizer": """ setParams(self, \\*, inputCol=None, outputCol=None) Sets params for this Tokenizer. @@ -4577,13 +5260,13 @@ def setParams(self, *, inputCol=None, outputCol=None): kwargs = self._input_kwargs return self._set(**kwargs) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "Tokenizer": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "Tokenizer": """ Sets the value of :py:attr:`outputCol`. """ @@ -4592,7 +5275,12 @@ def setOutputCol(self, value): @inherit_doc class VectorAssembler( - JavaTransformer, HasInputCols, HasOutputCol, HasHandleInvalid, JavaMLReadable, JavaMLWritable + JavaTransformer, + HasInputCols, + HasOutputCol, + HasHandleInvalid, + JavaMLReadable["VectorAssembler"], + JavaMLWritable, ): """ A feature transformer that merges multiple columns into a vector column. @@ -4639,7 +5327,7 @@ class VectorAssembler( ... """ - handleInvalid = Param( + handleInvalid: Param[str] = Param( Params._dummy(), "handleInvalid", "How to handle invalid data (NULL " @@ -4653,8 +5341,16 @@ class VectorAssembler( typeConverter=TypeConverters.toString, ) + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, inputCols=None, outputCol=None, handleInvalid="error"): + def __init__( + self, + *, + inputCols: Optional[List[str]] = None, + outputCol: Optional[str] = None, + handleInvalid: str = "error", + ): """ __init__(self, \\*, inputCols=None, outputCol=None, handleInvalid="error") """ @@ -4666,7 +5362,13 @@ def __init__(self, *, inputCols=None, outputCol=None, handleInvalid="error"): @keyword_only @since("1.4.0") - def setParams(self, *, inputCols=None, outputCol=None, handleInvalid="error"): + def setParams( + self, + *, + inputCols: Optional[List[str]] = None, + outputCol: Optional[str] = None, + handleInvalid: str = "error", + ) -> "VectorAssembler": """ setParams(self, \\*, inputCols=None, outputCol=None, handleInvalid="error") Sets params for this VectorAssembler. @@ -4674,19 +5376,19 @@ def setParams(self, *, inputCols=None, outputCol=None, handleInvalid="error"): kwargs = self._input_kwargs return self._set(**kwargs) - def setInputCols(self, value): + def setInputCols(self, value: List[str]) -> "VectorAssembler": """ Sets the value of :py:attr:`inputCols`. """ return self._set(inputCols=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "VectorAssembler": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - def setHandleInvalid(self, value): + def setHandleInvalid(self, value: str) -> "VectorAssembler": """ Sets the value of :py:attr:`handleInvalid`. """ @@ -4700,7 +5402,7 @@ class _VectorIndexerParams(HasInputCol, HasOutputCol, HasHandleInvalid): .. versionadded:: 3.0.0 """ - maxCategories = Param( + maxCategories: Param[int] = Param( Params._dummy(), "maxCategories", "Threshold for the number of values a categorical feature can take " @@ -4709,7 +5411,7 @@ class _VectorIndexerParams(HasInputCol, HasOutputCol, HasHandleInvalid): typeConverter=TypeConverters.toInt, ) - handleInvalid = Param( + handleInvalid: Param[str] = Param( Params._dummy(), "handleInvalid", "How to handle invalid data " @@ -4720,12 +5422,12 @@ class _VectorIndexerParams(HasInputCol, HasOutputCol, HasHandleInvalid): typeConverter=TypeConverters.toString, ) - def __init__(self, *args): + def __init__(self, *args: Any): super(_VectorIndexerParams, self).__init__(*args) self._setDefault(maxCategories=20, handleInvalid="error") @since("1.4.0") - def getMaxCategories(self): + def getMaxCategories(self) -> int: """ Gets the value of maxCategories or its default value. """ @@ -4733,7 +5435,13 @@ def getMaxCategories(self): @inherit_doc -class VectorIndexer(JavaEstimator, _VectorIndexerParams, JavaMLReadable, JavaMLWritable): +class VectorIndexer( + JavaEstimator["VectorIndexerModel"], + _VectorIndexerParams, + HasHandleInvalid, + JavaMLReadable["VectorIndexer"], + JavaMLWritable, +): """ Class for indexing categorical feature columns in a dataset of `Vector`. @@ -4821,8 +5529,17 @@ class VectorIndexer(JavaEstimator, _VectorIndexerParams, JavaMLReadable, JavaMLW DenseVector([2.0, 1.0]) """ + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error"): + def __init__( + self, + *, + maxCategories: int = 20, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + handleInvalid: str = "error", + ): """ __init__(self, \\*, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error") """ @@ -4833,7 +5550,14 @@ def __init__(self, *, maxCategories=20, inputCol=None, outputCol=None, handleInv @keyword_only @since("1.4.0") - def setParams(self, *, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error"): + def setParams( + self, + *, + maxCategories: int = 20, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + handleInvalid: str = "error", + ) -> "VectorIndexer": """ setParams(self, \\*, maxCategories=20, inputCol=None, outputCol=None, handleInvalid="error") Sets params for this VectorIndexer. @@ -4842,35 +5566,37 @@ def setParams(self, *, maxCategories=20, inputCol=None, outputCol=None, handleIn return self._set(**kwargs) @since("1.4.0") - def setMaxCategories(self, value): + def setMaxCategories(self, value: int) -> "VectorIndexer": """ Sets the value of :py:attr:`maxCategories`. """ return self._set(maxCategories=value) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "VectorIndexer": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "VectorIndexer": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - def setHandleInvalid(self, value): + def setHandleInvalid(self, value: str) -> "VectorIndexer": """ Sets the value of :py:attr:`handleInvalid`. """ return self._set(handleInvalid=value) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "VectorIndexerModel": return VectorIndexerModel(java_model) -class VectorIndexerModel(JavaModel, _VectorIndexerParams, JavaMLReadable, JavaMLWritable): +class VectorIndexerModel( + JavaModel, _VectorIndexerParams, JavaMLReadable["VectorIndexerModel"], JavaMLWritable +): """ Model fitted by :py:class:`VectorIndexer`. @@ -4888,30 +5614,30 @@ class VectorIndexerModel(JavaModel, _VectorIndexerParams, JavaMLReadable, JavaML """ @since("3.0.0") - def setInputCol(self, value): + def setInputCol(self, value: str) -> "VectorIndexerModel": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) @since("3.0.0") - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "VectorIndexerModel": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - @property + @property # type: ignore[misc] @since("1.4.0") - def numFeatures(self): + def numFeatures(self) -> int: """ Number of features, i.e., length of Vectors which this transforms. """ return self._call_java("numFeatures") - @property + @property # type: ignore[misc] @since("1.4.0") - def categoryMaps(self): + def categoryMaps(self) -> Dict[int, Tuple[float, int]]: """ Feature value index. Keys are categorical feature indices (column indices). Values are maps from original features values to 0-based category indices. @@ -4921,7 +5647,13 @@ def categoryMaps(self): @inherit_doc -class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): +class VectorSlicer( + JavaTransformer, + HasInputCol, + HasOutputCol, + JavaMLReadable["VectorSlicer"], + JavaMLWritable, +): """ This class takes a feature vector and outputs a new feature vector with a subarray of the original features. @@ -4958,14 +5690,14 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, J True """ - indices = Param( + indices: Param[List[int]] = Param( Params._dummy(), "indices", "An array of indices to select features from " + "a vector column. There can be no overlap with names.", typeConverter=TypeConverters.toListInt, ) - names = Param( + names: Param[List[str]] = Param( Params._dummy(), "names", "An array of feature names to select features from " @@ -4975,8 +5707,17 @@ class VectorSlicer(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable, J typeConverter=TypeConverters.toListString, ) + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, inputCol=None, outputCol=None, indices=None, names=None): + def __init__( + self, + *, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + indices: Optional[List[int]] = None, + names: Optional[List[str]] = None, + ): """ __init__(self, \\*, inputCol=None, outputCol=None, indices=None, names=None) """ @@ -4988,7 +5729,14 @@ def __init__(self, *, inputCol=None, outputCol=None, indices=None, names=None): @keyword_only @since("1.6.0") - def setParams(self, *, inputCol=None, outputCol=None, indices=None, names=None): + def setParams( + self, + *, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + indices: Optional[List[int]] = None, + names: Optional[List[str]] = None, + ) -> "VectorSlicer": """ setParams(self, \\*, inputCol=None, outputCol=None, indices=None, names=None): Sets params for this VectorSlicer. @@ -4997,40 +5745,40 @@ def setParams(self, *, inputCol=None, outputCol=None, indices=None, names=None): return self._set(**kwargs) @since("1.6.0") - def setIndices(self, value): + def setIndices(self, value: List[int]) -> "VectorSlicer": """ Sets the value of :py:attr:`indices`. """ return self._set(indices=value) @since("1.6.0") - def getIndices(self): + def getIndices(self) -> List[int]: """ Gets the value of indices or its default value. """ return self.getOrDefault(self.indices) @since("1.6.0") - def setNames(self, value): + def setNames(self, value: List[str]) -> "VectorSlicer": """ Sets the value of :py:attr:`names`. """ return self._set(names=value) @since("1.6.0") - def getNames(self): + def getNames(self) -> List[str]: """ Gets the value of names or its default value. """ return self.getOrDefault(self.names) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "VectorSlicer": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "VectorSlicer": """ Sets the value of :py:attr:`outputCol`. """ @@ -5044,32 +5792,32 @@ class _Word2VecParams(HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCo .. versionadded:: 3.0.0 """ - vectorSize = Param( + vectorSize: Param[int] = Param( Params._dummy(), "vectorSize", "the dimension of codes after transforming from words", typeConverter=TypeConverters.toInt, ) - numPartitions = Param( + numPartitions: Param[int] = Param( Params._dummy(), "numPartitions", "number of partitions for sentences of words", typeConverter=TypeConverters.toInt, ) - minCount = Param( + minCount: Param[int] = Param( Params._dummy(), "minCount", "the minimum number of times a token must appear to be included in the " + "word2vec model's vocabulary", typeConverter=TypeConverters.toInt, ) - windowSize = Param( + windowSize: Param[int] = Param( Params._dummy(), "windowSize", "the window size (context words from [-window, window]). Default value is 5", typeConverter=TypeConverters.toInt, ) - maxSentenceLength = Param( + maxSentenceLength: Param[int] = Param( Params._dummy(), "maxSentenceLength", "Maximum length (in words) of each sentence in the input data. " @@ -5078,7 +5826,7 @@ class _Word2VecParams(HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCo typeConverter=TypeConverters.toInt, ) - def __init__(self, *args): + def __init__(self, *args: Any): super(_Word2VecParams, self).__init__(*args) self._setDefault( vectorSize=100, @@ -5091,35 +5839,35 @@ def __init__(self, *args): ) @since("1.4.0") - def getVectorSize(self): + def getVectorSize(self) -> int: """ Gets the value of vectorSize or its default value. """ return self.getOrDefault(self.vectorSize) @since("1.4.0") - def getNumPartitions(self): + def getNumPartitions(self) -> int: """ Gets the value of numPartitions or its default value. """ return self.getOrDefault(self.numPartitions) @since("1.4.0") - def getMinCount(self): + def getMinCount(self) -> int: """ Gets the value of minCount or its default value. """ return self.getOrDefault(self.minCount) @since("2.0.0") - def getWindowSize(self): + def getWindowSize(self) -> int: """ Gets the value of windowSize or its default value. """ return self.getOrDefault(self.windowSize) @since("2.0.0") - def getMaxSentenceLength(self): + def getMaxSentenceLength(self) -> int: """ Gets the value of maxSentenceLength or its default value. """ @@ -5127,7 +5875,12 @@ def getMaxSentenceLength(self): @inherit_doc -class Word2Vec(JavaEstimator, _Word2VecParams, JavaMLReadable, JavaMLWritable): +class Word2Vec( + JavaEstimator["Word2VecModel"], + _Word2VecParams, + JavaMLReadable["Word2Vec"], + JavaMLWritable, +): """ Word2Vec trains a model of `Map(String, Vector)`, i.e. transforms a word into a code for further natural language processing or machine learning process. @@ -5191,20 +5944,22 @@ class Word2Vec(JavaEstimator, _Word2VecParams, JavaMLReadable, JavaMLWritable): True """ + _input_kwargs: Dict[str, Any] + @keyword_only def __init__( self, *, - vectorSize=100, - minCount=5, - numPartitions=1, - stepSize=0.025, - maxIter=1, - seed=None, - inputCol=None, - outputCol=None, - windowSize=5, - maxSentenceLength=1000, + vectorSize: int = 100, + minCount: int = 5, + numPartitions: int = 1, + stepSize: float = 0.025, + maxIter: int = 1, + seed: Optional[int] = None, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + windowSize: int = 5, + maxSentenceLength: int = 1000, ): """ __init__(self, \\*, vectorSize=100, minCount=5, numPartitions=1, stepSize=0.025, \ @@ -5221,17 +5976,17 @@ def __init__( def setParams( self, *, - vectorSize=100, - minCount=5, - numPartitions=1, - stepSize=0.025, - maxIter=1, - seed=None, - inputCol=None, - outputCol=None, - windowSize=5, - maxSentenceLength=1000, - ): + vectorSize: int = 100, + minCount: int = 5, + numPartitions: int = 1, + stepSize: float = 0.025, + maxIter: int = 1, + seed: Optional[int] = None, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + windowSize: int = 5, + maxSentenceLength: int = 1000, + ) -> "Word2Vec": """ setParams(self, \\*, minCount=5, numPartitions=1, stepSize=0.025, maxIter=1, \ seed=None, inputCol=None, outputCol=None, windowSize=5, \ @@ -5242,76 +5997,76 @@ def setParams( return self._set(**kwargs) @since("1.4.0") - def setVectorSize(self, value): + def setVectorSize(self, value: int) -> "Word2Vec": """ Sets the value of :py:attr:`vectorSize`. """ return self._set(vectorSize=value) @since("1.4.0") - def setNumPartitions(self, value): + def setNumPartitions(self, value: int) -> "Word2Vec": """ Sets the value of :py:attr:`numPartitions`. """ return self._set(numPartitions=value) @since("1.4.0") - def setMinCount(self, value): + def setMinCount(self, value: int) -> "Word2Vec": """ Sets the value of :py:attr:`minCount`. """ return self._set(minCount=value) @since("2.0.0") - def setWindowSize(self, value): + def setWindowSize(self, value: int) -> "Word2Vec": """ Sets the value of :py:attr:`windowSize`. """ return self._set(windowSize=value) @since("2.0.0") - def setMaxSentenceLength(self, value): + def setMaxSentenceLength(self, value: int) -> "Word2Vec": """ Sets the value of :py:attr:`maxSentenceLength`. """ return self._set(maxSentenceLength=value) - def setMaxIter(self, value): + def setMaxIter(self, value: int) -> "Word2Vec": """ Sets the value of :py:attr:`maxIter`. """ return self._set(maxIter=value) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "Word2Vec": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "Word2Vec": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - def setSeed(self, value): + def setSeed(self, value: int) -> "Word2Vec": """ Sets the value of :py:attr:`seed`. """ return self._set(seed=value) @since("1.4.0") - def setStepSize(self, value): + def setStepSize(self, value: float) -> "Word2Vec": """ Sets the value of :py:attr:`stepSize`. """ return self._set(stepSize=value) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "Word2VecModel": return Word2VecModel(java_model) -class Word2VecModel(JavaModel, _Word2VecParams, JavaMLReadable, JavaMLWritable): +class Word2VecModel(JavaModel, _Word2VecParams, JavaMLReadable["Word2VecModel"], JavaMLWritable): """ Model fitted by :py:class:`Word2Vec`. @@ -5319,27 +6074,27 @@ class Word2VecModel(JavaModel, _Word2VecParams, JavaMLReadable, JavaMLWritable): """ @since("1.5.0") - def getVectors(self): + def getVectors(self) -> DataFrame: """ Returns the vector representation of the words as a dataframe with two fields, word and vector. """ return self._call_java("getVectors") - def setInputCol(self, value): + def setInputCol(self, value: str) -> "Word2VecModel": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "Word2VecModel": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) @since("1.5.0") - def findSynonyms(self, word, num): + def findSynonyms(self, word: Union[str, Vector], num: int) -> DataFrame: """ Find "num" number of words closest in similarity to "word". word can be a string or vector representation. @@ -5351,7 +6106,7 @@ def findSynonyms(self, word, num): return self._call_java("findSynonyms", word, num) @since("2.3.0") - def findSynonymsArray(self, word, num): + def findSynonymsArray(self, word: Union[Vector, str], num: int) -> List[Tuple[str, float]]: """ Find "num" number of words closest in similarity to "word". word can be a string or vector representation. @@ -5360,6 +6115,7 @@ def findSynonymsArray(self, word, num): """ if not isinstance(word, str): word = _convert_to_vector(word) + assert self._java_obj is not None tuples = self._java_obj.findSynonymsArray(word, num) return list(map(lambda st: (st._1(), st._2()), list(tuples))) @@ -5371,7 +6127,7 @@ class _PCAParams(HasInputCol, HasOutputCol): .. versionadded:: 3.0.0 """ - k = Param( + k: Param[int] = Param( Params._dummy(), "k", "the number of principal components", @@ -5379,7 +6135,7 @@ class _PCAParams(HasInputCol, HasOutputCol): ) @since("1.5.0") - def getK(self): + def getK(self) -> int: """ Gets the value of k or its default value. """ @@ -5387,7 +6143,7 @@ def getK(self): @inherit_doc -class PCA(JavaEstimator, _PCAParams, JavaMLReadable, JavaMLWritable): +class PCA(JavaEstimator["PCAModel"], _PCAParams, JavaMLReadable["PCA"], JavaMLWritable): """ PCA trains a model to project vectors to a lower dimensional space of the top :py:attr:`k` principal components. @@ -5429,8 +6185,16 @@ class PCA(JavaEstimator, _PCAParams, JavaMLReadable, JavaMLWritable): True """ + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, k=None, inputCol=None, outputCol=None): + def __init__( + self, + *, + k: Optional[int] = None, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + ): """ __init__(self, \\*, k=None, inputCol=None, outputCol=None) """ @@ -5441,7 +6205,13 @@ def __init__(self, *, k=None, inputCol=None, outputCol=None): @keyword_only @since("1.5.0") - def setParams(self, *, k=None, inputCol=None, outputCol=None): + def setParams( + self, + *, + k: Optional[int] = None, + inputCol: Optional[str] = None, + outputCol: Optional[str] = None, + ) -> "PCA": """ setParams(self, \\*, k=None, inputCol=None, outputCol=None) Set params for this PCA. @@ -5450,29 +6220,29 @@ def setParams(self, *, k=None, inputCol=None, outputCol=None): return self._set(**kwargs) @since("1.5.0") - def setK(self, value): + def setK(self, value: int) -> "PCA": """ Sets the value of :py:attr:`k`. """ return self._set(k=value) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "PCA": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "PCA": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "PCAModel": return PCAModel(java_model) -class PCAModel(JavaModel, _PCAParams, JavaMLReadable, JavaMLWritable): +class PCAModel(JavaModel, _PCAParams, JavaMLReadable["PCAModel"], JavaMLWritable): """ Model fitted by :py:class:`PCA`. Transforms vectors to a lower dimensional space. @@ -5480,31 +6250,31 @@ class PCAModel(JavaModel, _PCAParams, JavaMLReadable, JavaMLWritable): """ @since("3.0.0") - def setInputCol(self, value): + def setInputCol(self, value: str) -> "PCAModel": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) @since("3.0.0") - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "PCAModel": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - @property + @property # type: ignore[misc] @since("2.0.0") - def pc(self): + def pc(self) -> DenseMatrix: """ Returns a principal components Matrix. Each column is one principal component. """ return self._call_java("pc") - @property + @property # type: ignore[misc] @since("2.0.0") - def explainedVariance(self): + def explainedVariance(self) -> DenseVector: """ Returns a vector of proportions of variance explained by each principal component. @@ -5519,18 +6289,18 @@ class _RFormulaParams(HasFeaturesCol, HasLabelCol, HasHandleInvalid): .. versionadded:: 3.0.0 """ - formula = Param( + formula: Param[str] = Param( Params._dummy(), "formula", "R model formula", typeConverter=TypeConverters.toString ) - forceIndexLabel = Param( + forceIndexLabel: Param[bool] = Param( Params._dummy(), "forceIndexLabel", "Force to index label whether it is numeric or string", typeConverter=TypeConverters.toBoolean, ) - stringIndexerOrderType = Param( + stringIndexerOrderType: Param[str] = Param( Params._dummy(), "stringIndexerOrderType", "How to order categories of a string feature column used by " @@ -5542,7 +6312,7 @@ class _RFormulaParams(HasFeaturesCol, HasLabelCol, HasHandleInvalid): typeConverter=TypeConverters.toString, ) - handleInvalid = Param( + handleInvalid: Param[str] = Param( Params._dummy(), "handleInvalid", "how to handle invalid entries. " @@ -5552,28 +6322,28 @@ class _RFormulaParams(HasFeaturesCol, HasLabelCol, HasHandleInvalid): typeConverter=TypeConverters.toString, ) - def __init__(self, *args): + def __init__(self, *args: Any): super(_RFormulaParams, self).__init__(*args) self._setDefault( forceIndexLabel=False, stringIndexerOrderType="frequencyDesc", handleInvalid="error" ) @since("1.5.0") - def getFormula(self): + def getFormula(self) -> str: """ Gets the value of :py:attr:`formula`. """ return self.getOrDefault(self.formula) @since("2.1.0") - def getForceIndexLabel(self): + def getForceIndexLabel(self) -> bool: """ Gets the value of :py:attr:`forceIndexLabel`. """ return self.getOrDefault(self.forceIndexLabel) @since("2.3.0") - def getStringIndexerOrderType(self): + def getStringIndexerOrderType(self) -> str: """ Gets the value of :py:attr:`stringIndexerOrderType` or its default value 'frequencyDesc'. """ @@ -5581,7 +6351,12 @@ def getStringIndexerOrderType(self): @inherit_doc -class RFormula(JavaEstimator, _RFormulaParams, JavaMLReadable, JavaMLWritable): +class RFormula( + JavaEstimator["RFormulaModel"], + _RFormulaParams, + JavaMLReadable["RFormula"], + JavaMLWritable, +): """ Implements the transforms required for fitting a dataset against an R model formula. Currently we support a limited subset of the R @@ -5654,16 +6429,18 @@ class RFormula(JavaEstimator, _RFormulaParams, JavaMLReadable, JavaMLWritable): 'RFormulaModel(ResolvedRFormula(label=y, terms=[x,s], hasIntercept=true)) (uid=...)' """ + _input_kwargs: Dict[str, Any] + @keyword_only def __init__( self, *, - formula=None, - featuresCol="features", - labelCol="label", - forceIndexLabel=False, - stringIndexerOrderType="frequencyDesc", - handleInvalid="error", + formula: Optional[str] = None, + featuresCol: str = "features", + labelCol: str = "label", + forceIndexLabel: bool = False, + stringIndexerOrderType: str = "frequencyDesc", + handleInvalid: str = "error", ): """ __init__(self, \\*, formula=None, featuresCol="features", labelCol="label", \ @@ -5680,13 +6457,13 @@ def __init__( def setParams( self, *, - formula=None, - featuresCol="features", - labelCol="label", - forceIndexLabel=False, - stringIndexerOrderType="frequencyDesc", - handleInvalid="error", - ): + formula: Optional[str] = None, + featuresCol: str = "features", + labelCol: str = "label", + forceIndexLabel: bool = False, + stringIndexerOrderType: str = "frequencyDesc", + handleInvalid: str = "error", + ) -> "RFormula": """ setParams(self, \\*, formula=None, featuresCol="features", labelCol="label", \ forceIndexLabel=False, stringIndexerOrderType="frequencyDesc", \ @@ -5697,53 +6474,53 @@ def setParams( return self._set(**kwargs) @since("1.5.0") - def setFormula(self, value): + def setFormula(self, value: str) -> "RFormula": """ Sets the value of :py:attr:`formula`. """ return self._set(formula=value) @since("2.1.0") - def setForceIndexLabel(self, value): + def setForceIndexLabel(self, value: bool) -> "RFormula": """ Sets the value of :py:attr:`forceIndexLabel`. """ return self._set(forceIndexLabel=value) @since("2.3.0") - def setStringIndexerOrderType(self, value): + def setStringIndexerOrderType(self, value: str) -> "RFormula": """ Sets the value of :py:attr:`stringIndexerOrderType`. """ return self._set(stringIndexerOrderType=value) - def setFeaturesCol(self, value): + def setFeaturesCol(self, value: str) -> "RFormula": """ Sets the value of :py:attr:`featuresCol`. """ return self._set(featuresCol=value) - def setLabelCol(self, value): + def setLabelCol(self, value: str) -> "RFormula": """ Sets the value of :py:attr:`labelCol`. """ return self._set(labelCol=value) - def setHandleInvalid(self, value): + def setHandleInvalid(self, value: str) -> "RFormula": """ Sets the value of :py:attr:`handleInvalid`. """ return self._set(handleInvalid=value) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "RFormulaModel": return RFormulaModel(java_model) - def __str__(self): + def __str__(self) -> str: formulaStr = self.getFormula() if self.isDefined(self.formula) else "" return "RFormula(%s) (uid=%s)" % (formulaStr, self.uid) -class RFormulaModel(JavaModel, _RFormulaParams, JavaMLReadable, JavaMLWritable): +class RFormulaModel(JavaModel, _RFormulaParams, JavaMLReadable["RFormulaModel"], JavaMLWritable): """ Model fitted by :py:class:`RFormula`. Fitting is required to determine the factor levels of formula terms. @@ -5751,7 +6528,7 @@ class RFormulaModel(JavaModel, _RFormulaParams, JavaMLReadable, JavaMLWritable): .. versionadded:: 1.5.0 """ - def __str__(self): + def __str__(self) -> str: resolvedFormula = self._call_java("resolvedFormula") return "RFormulaModel(%s) (uid=%s)" % (resolvedFormula, self.uid) @@ -5763,7 +6540,7 @@ class _SelectorParams(HasFeaturesCol, HasOutputCol, HasLabelCol): .. versionadded:: 3.1.0 """ - selectorType = Param( + selectorType: Param[str] = Param( Params._dummy(), "selectorType", "The selector type. " @@ -5771,7 +6548,7 @@ class _SelectorParams(HasFeaturesCol, HasOutputCol, HasLabelCol): typeConverter=TypeConverters.toString, ) - numTopFeatures = Param( + numTopFeatures: Param[int] = Param( Params._dummy(), "numTopFeatures", "Number of features that selector will select, ordered by ascending p-value. " @@ -5780,35 +6557,35 @@ class _SelectorParams(HasFeaturesCol, HasOutputCol, HasLabelCol): typeConverter=TypeConverters.toInt, ) - percentile = Param( + percentile: Param[float] = Param( Params._dummy(), "percentile", "Percentile of features that selector " + "will select, ordered by ascending p-value.", typeConverter=TypeConverters.toFloat, ) - fpr = Param( + fpr: Param[float] = Param( Params._dummy(), "fpr", "The highest p-value for features to be kept.", typeConverter=TypeConverters.toFloat, ) - fdr = Param( + fdr: Param[float] = Param( Params._dummy(), "fdr", "The upper bound of the expected false discovery rate.", typeConverter=TypeConverters.toFloat, ) - fwe = Param( + fwe: Param[float] = Param( Params._dummy(), "fwe", "The upper bound of the expected family-wise error rate.", typeConverter=TypeConverters.toFloat, ) - def __init__(self, *args): + def __init__(self, *args: Any): super(_SelectorParams, self).__init__(*args) self._setDefault( numTopFeatures=50, @@ -5820,62 +6597,62 @@ def __init__(self, *args): ) @since("2.1.0") - def getSelectorType(self): + def getSelectorType(self) -> str: """ Gets the value of selectorType or its default value. """ return self.getOrDefault(self.selectorType) @since("2.0.0") - def getNumTopFeatures(self): + def getNumTopFeatures(self) -> int: """ Gets the value of numTopFeatures or its default value. """ return self.getOrDefault(self.numTopFeatures) @since("2.1.0") - def getPercentile(self): + def getPercentile(self) -> float: """ Gets the value of percentile or its default value. """ return self.getOrDefault(self.percentile) @since("2.1.0") - def getFpr(self): + def getFpr(self) -> float: """ Gets the value of fpr or its default value. """ return self.getOrDefault(self.fpr) @since("2.2.0") - def getFdr(self): + def getFdr(self) -> float: """ Gets the value of fdr or its default value. """ return self.getOrDefault(self.fdr) @since("2.2.0") - def getFwe(self): + def getFwe(self) -> float: """ Gets the value of fwe or its default value. """ return self.getOrDefault(self.fwe) -class _Selector(JavaEstimator, _SelectorParams, JavaMLReadable, JavaMLWritable): +class _Selector(JavaEstimator[JM], _SelectorParams, JavaMLReadable, JavaMLWritable, Generic[JM]): """ Mixin for Selectors. """ @since("2.1.0") - def setSelectorType(self, value): + def setSelectorType(self: P, value: str) -> P: """ Sets the value of :py:attr:`selectorType`. """ return self._set(selectorType=value) @since("2.0.0") - def setNumTopFeatures(self, value): + def setNumTopFeatures(self: P, value: int) -> P: """ Sets the value of :py:attr:`numTopFeatures`. Only applicable when selectorType = "numTopFeatures". @@ -5883,7 +6660,7 @@ def setNumTopFeatures(self, value): return self._set(numTopFeatures=value) @since("2.1.0") - def setPercentile(self, value): + def setPercentile(self: P, value: float) -> P: """ Sets the value of :py:attr:`percentile`. Only applicable when selectorType = "percentile". @@ -5891,7 +6668,7 @@ def setPercentile(self, value): return self._set(percentile=value) @since("2.1.0") - def setFpr(self, value): + def setFpr(self: P, value: float) -> P: """ Sets the value of :py:attr:`fpr`. Only applicable when selectorType = "fpr". @@ -5899,7 +6676,7 @@ def setFpr(self, value): return self._set(fpr=value) @since("2.2.0") - def setFdr(self, value): + def setFdr(self: P, value: float) -> P: """ Sets the value of :py:attr:`fdr`. Only applicable when selectorType = "fdr". @@ -5907,26 +6684,26 @@ def setFdr(self, value): return self._set(fdr=value) @since("2.2.0") - def setFwe(self, value): + def setFwe(self: P, value: float) -> P: """ Sets the value of :py:attr:`fwe`. Only applicable when selectorType = "fwe". """ return self._set(fwe=value) - def setFeaturesCol(self, value): + def setFeaturesCol(self: P, value: str) -> P: """ Sets the value of :py:attr:`featuresCol`. """ return self._set(featuresCol=value) - def setOutputCol(self, value): + def setOutputCol(self: P, value: str) -> P: """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - def setLabelCol(self, value): + def setLabelCol(self: P, value: str) -> P: """ Sets the value of :py:attr:`labelCol`. """ @@ -5939,22 +6716,22 @@ class _SelectorModel(JavaModel, _SelectorParams): """ @since("3.0.0") - def setFeaturesCol(self, value): + def setFeaturesCol(self: P, value: str) -> P: """ Sets the value of :py:attr:`featuresCol`. """ return self._set(featuresCol=value) @since("3.0.0") - def setOutputCol(self, value): + def setOutputCol(self: P, value: str) -> P: """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - @property + @property # type: ignore[misc] @since("2.0.0") - def selectedFeatures(self): + def selectedFeatures(self) -> List[int]: """ List of indices to select (filter). """ @@ -5962,7 +6739,11 @@ def selectedFeatures(self): @inherit_doc -class ChiSqSelector(_Selector, JavaMLReadable, JavaMLWritable): +class ChiSqSelector( + _Selector["ChiSqSelectorModel"], + JavaMLReadable["ChiSqSelector"], + JavaMLWritable, +): """ Chi-Squared feature selection, which selects categorical features to use for predicting a categorical label. @@ -6024,19 +6805,21 @@ class ChiSqSelector(_Selector, JavaMLReadable, JavaMLWritable): True """ + _input_kwargs: Dict[str, Any] + @keyword_only def __init__( self, *, - numTopFeatures=50, - featuresCol="features", - outputCol=None, - labelCol="label", - selectorType="numTopFeatures", - percentile=0.1, - fpr=0.05, - fdr=0.05, - fwe=0.05, + numTopFeatures: int = 50, + featuresCol: str = "features", + outputCol: Optional[str] = None, + labelCol: str = "label", + selectorType: str = "numTopFeatures", + percentile: float = 0.1, + fpr: float = 0.05, + fdr: float = 0.05, + fwe: float = 0.05, ): """ __init__(self, \\*, numTopFeatures=50, featuresCol="features", outputCol=None, \ @@ -6053,30 +6836,30 @@ def __init__( def setParams( self, *, - numTopFeatures=50, - featuresCol="features", - outputCol=None, - labelCol="labels", - selectorType="numTopFeatures", - percentile=0.1, - fpr=0.05, - fdr=0.05, - fwe=0.05, - ): + numTopFeatures: int = 50, + featuresCol: str = "features", + outputCol: Optional[str] = None, + labelCol: str = "label", + selectorType: str = "numTopFeatures", + percentile: float = 0.1, + fpr: float = 0.05, + fdr: float = 0.05, + fwe: float = 0.05, + ) -> "ChiSqSelector": """ setParams(self, \\*, numTopFeatures=50, featuresCol="features", outputCol=None, \ - labelCol="labels", selectorType="numTopFeatures", percentile=0.1, fpr=0.05, \ + labelCol="label", selectorType="numTopFeatures", percentile=0.1, fpr=0.05, \ fdr=0.05, fwe=0.05) Sets params for this ChiSqSelector. """ kwargs = self._input_kwargs return self._set(**kwargs) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "ChiSqSelectorModel": return ChiSqSelectorModel(java_model) -class ChiSqSelectorModel(_SelectorModel, JavaMLReadable, JavaMLWritable): +class ChiSqSelectorModel(_SelectorModel, JavaMLReadable["ChiSqSelectorModel"], JavaMLWritable): """ Model fitted by :py:class:`ChiSqSelector`. @@ -6086,7 +6869,11 @@ class ChiSqSelectorModel(_SelectorModel, JavaMLReadable, JavaMLWritable): @inherit_doc class VectorSizeHint( - JavaTransformer, HasInputCol, HasHandleInvalid, JavaMLReadable, JavaMLWritable + JavaTransformer, + HasInputCol, + HasHandleInvalid, + JavaMLReadable["VectorSizeHint"], + JavaMLWritable, ): """ A feature transformer that adds size information to the metadata of a vector column. @@ -6122,11 +6909,11 @@ class VectorSizeHint( True """ - size = Param( + size: Param[int] = Param( Params._dummy(), "size", "Size of vectors in column.", typeConverter=TypeConverters.toInt ) - handleInvalid = Param( + handleInvalid: Param[str] = Param( Params._dummy(), "handleInvalid", "How to handle invalid vectors in inputCol. Invalid vectors include " @@ -6137,8 +6924,16 @@ class VectorSizeHint( TypeConverters.toString, ) + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, inputCol=None, size=None, handleInvalid="error"): + def __init__( + self, + *, + inputCol: Optional[str] = None, + size: Optional[int] = None, + handleInvalid: str = "error", + ): """ __init__(self, \\*, inputCol=None, size=None, handleInvalid="error") """ @@ -6149,7 +6944,13 @@ def __init__(self, *, inputCol=None, size=None, handleInvalid="error"): @keyword_only @since("2.3.0") - def setParams(self, *, inputCol=None, size=None, handleInvalid="error"): + def setParams( + self, + *, + inputCol: Optional[str] = None, + size: Optional[str] = None, + handleInvalid: str = "error", + ) -> "VectorSizeHint": """ setParams(self, \\*, inputCol=None, size=None, handleInvalid="error") Sets params for this VectorSizeHint. @@ -6158,22 +6959,22 @@ def setParams(self, *, inputCol=None, size=None, handleInvalid="error"): return self._set(**kwargs) @since("2.3.0") - def getSize(self): + def getSize(self) -> int: """Gets size param, the size of vectors in `inputCol`.""" return self.getOrDefault(self.size) @since("2.3.0") - def setSize(self, value): + def setSize(self, value: int) -> "VectorSizeHint": """Sets size param, the size of vectors in `inputCol`.""" return self._set(size=value) - def setInputCol(self, value): + def setInputCol(self, value: str) -> "VectorSizeHint": """ Sets the value of :py:attr:`inputCol`. """ return self._set(inputCol=value) - def setHandleInvalid(self, value): + def setHandleInvalid(self, value: str) -> "VectorSizeHint": """ Sets the value of :py:attr:`handleInvalid`. """ @@ -6188,7 +6989,7 @@ class _VarianceThresholdSelectorParams(HasFeaturesCol, HasOutputCol): .. versionadded:: 3.1.0 """ - varianceThreshold = Param( + varianceThreshold: Param[float] = Param( Params._dummy(), "varianceThreshold", "Param for variance threshold. Features with a variance not " @@ -6198,7 +6999,7 @@ class _VarianceThresholdSelectorParams(HasFeaturesCol, HasOutputCol): ) @since("3.1.0") - def getVarianceThreshold(self): + def getVarianceThreshold(self) -> float: """ Gets the value of varianceThreshold or its default value. """ @@ -6207,7 +7008,10 @@ def getVarianceThreshold(self): @inherit_doc class VarianceThresholdSelector( - JavaEstimator, _VarianceThresholdSelectorParams, JavaMLReadable, JavaMLWritable + JavaEstimator["VarianceThresholdSelectorModel"], + _VarianceThresholdSelectorParams, + JavaMLReadable["VarianceThresholdSelector"], + JavaMLWritable, ): """ Feature selector that removes all low-variance features. Features with a @@ -6252,8 +7056,16 @@ class VarianceThresholdSelector( True """ + _input_kwargs: Dict[str, Any] + @keyword_only - def __init__(self, *, featuresCol="features", outputCol=None, varianceThreshold=0.0): + def __init__( + self, + *, + featuresCol: str = "features", + outputCol: Optional[str] = None, + varianceThreshold: float = 0.0, + ): """ __init__(self, \\*, featuresCol="features", outputCol=None, varianceThreshold=0.0) """ @@ -6267,7 +7079,13 @@ def __init__(self, *, featuresCol="features", outputCol=None, varianceThreshold= @keyword_only @since("3.1.0") - def setParams(self, *, featuresCol="features", outputCol=None, varianceThreshold=0.0): + def setParams( + self, + *, + featuresCol: str = "features", + outputCol: Optional[str] = None, + varianceThreshold: float = 0.0, + ) -> "VarianceThresholdSelector": """ setParams(self, \\*, featuresCol="features", outputCol=None, varianceThreshold=0.0) Sets params for this VarianceThresholdSelector. @@ -6276,32 +7094,35 @@ def setParams(self, *, featuresCol="features", outputCol=None, varianceThreshold return self._set(**kwargs) @since("3.1.0") - def setVarianceThreshold(self, value): + def setVarianceThreshold(self, value: float) -> "VarianceThresholdSelector": """ Sets the value of :py:attr:`varianceThreshold`. """ return self._set(varianceThreshold=value) @since("3.1.0") - def setFeaturesCol(self, value): + def setFeaturesCol(self, value: str) -> "VarianceThresholdSelector": """ Sets the value of :py:attr:`featuresCol`. """ return self._set(featuresCol=value) @since("3.1.0") - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "VarianceThresholdSelector": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "VarianceThresholdSelectorModel": return VarianceThresholdSelectorModel(java_model) class VarianceThresholdSelectorModel( - JavaModel, _VarianceThresholdSelectorParams, JavaMLReadable, JavaMLWritable + JavaModel, + _VarianceThresholdSelectorParams, + JavaMLReadable["VarianceThresholdSelectorModel"], + JavaMLWritable, ): """ Model fitted by :py:class:`VarianceThresholdSelector`. @@ -6310,22 +7131,22 @@ class VarianceThresholdSelectorModel( """ @since("3.1.0") - def setFeaturesCol(self, value): + def setFeaturesCol(self, value: str) -> "VarianceThresholdSelectorModel": """ Sets the value of :py:attr:`featuresCol`. """ return self._set(featuresCol=value) @since("3.1.0") - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "VarianceThresholdSelectorModel": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - @property + @property # type: ignore[misc] @since("3.1.0") - def selectedFeatures(self): + def selectedFeatures(self) -> List[int]: """ List of indices to select (filter). """ @@ -6340,21 +7161,21 @@ class _UnivariateFeatureSelectorParams(HasFeaturesCol, HasOutputCol, HasLabelCol .. versionadded:: 3.1.0 """ - featureType = Param( + featureType: Param[str] = Param( Params._dummy(), "featureType", "The feature type. " + "Supported options: categorical, continuous.", typeConverter=TypeConverters.toString, ) - labelType = Param( + labelType: Param[str] = Param( Params._dummy(), "labelType", "The label type. " + "Supported options: categorical, continuous.", typeConverter=TypeConverters.toString, ) - selectionMode = Param( + selectionMode: Param[str] = Param( Params._dummy(), "selectionMode", "The selection mode. " @@ -6363,40 +7184,40 @@ class _UnivariateFeatureSelectorParams(HasFeaturesCol, HasOutputCol, HasLabelCol typeConverter=TypeConverters.toString, ) - selectionThreshold = Param( + selectionThreshold: Param[float] = Param( Params._dummy(), "selectionThreshold", "The upper bound of the " + "features that selector will select.", typeConverter=TypeConverters.toFloat, ) - def __init__(self, *args): + def __init__(self, *args: Any): super(_UnivariateFeatureSelectorParams, self).__init__(*args) self._setDefault(selectionMode="numTopFeatures") @since("3.1.1") - def getFeatureType(self): + def getFeatureType(self) -> str: """ Gets the value of featureType or its default value. """ return self.getOrDefault(self.featureType) @since("3.1.1") - def getLabelType(self): + def getLabelType(self) -> str: """ Gets the value of labelType or its default value. """ return self.getOrDefault(self.labelType) @since("3.1.1") - def getSelectionMode(self): + def getSelectionMode(self) -> str: """ Gets the value of selectionMode or its default value. """ return self.getOrDefault(self.selectionMode) @since("3.1.1") - def getSelectionThreshold(self): + def getSelectionThreshold(self) -> float: """ Gets the value of selectionThreshold or its default value. """ @@ -6405,7 +7226,10 @@ def getSelectionThreshold(self): @inherit_doc class UnivariateFeatureSelector( - JavaEstimator, _UnivariateFeatureSelectorParams, JavaMLReadable, JavaMLWritable + JavaEstimator["UnivariateFeatureSelectorModel"], + _UnivariateFeatureSelectorParams, + JavaMLReadable["UnivariateFeatureSelector"], + JavaMLWritable, ): """ UnivariateFeatureSelector @@ -6479,14 +7303,16 @@ class UnivariateFeatureSelector( True """ + _input_kwargs: Dict[str, Any] + @keyword_only def __init__( self, *, - featuresCol="features", - outputCol=None, - labelCol="label", - selectionMode="numTopFeatures", + featuresCol: str = "features", + outputCol: Optional[str] = None, + labelCol: str = "label", + selectionMode: str = "numTopFeatures", ): """ __init__(self, \\*, featuresCol="features", outputCol=None, \ @@ -6504,71 +7330,74 @@ def __init__( def setParams( self, *, - featuresCol="features", - outputCol=None, - labelCol="labels", - selectionMode="numTopFeatures", - ): + featuresCol: str = "features", + outputCol: Optional[str] = None, + labelCol: str = "label", + selectionMode: str = "numTopFeatures", + ) -> "UnivariateFeatureSelector": """ setParams(self, \\*, featuresCol="features", outputCol=None, \ - labelCol="labels", selectionMode="numTopFeatures") + labelCol="label", selectionMode="numTopFeatures") Sets params for this UnivariateFeatureSelector. """ kwargs = self._input_kwargs return self._set(**kwargs) @since("3.1.1") - def setFeatureType(self, value): + def setFeatureType(self, value: str) -> "UnivariateFeatureSelector": """ Sets the value of :py:attr:`featureType`. """ return self._set(featureType=value) @since("3.1.1") - def setLabelType(self, value): + def setLabelType(self, value: str) -> "UnivariateFeatureSelector": """ Sets the value of :py:attr:`labelType`. """ return self._set(labelType=value) @since("3.1.1") - def setSelectionMode(self, value): + def setSelectionMode(self, value: str) -> "UnivariateFeatureSelector": """ Sets the value of :py:attr:`selectionMode`. """ return self._set(selectionMode=value) @since("3.1.1") - def setSelectionThreshold(self, value): + def setSelectionThreshold(self, value: float) -> "UnivariateFeatureSelector": """ Sets the value of :py:attr:`selectionThreshold`. """ return self._set(selectionThreshold=value) - def setFeaturesCol(self, value): + def setFeaturesCol(self, value: str) -> "UnivariateFeatureSelector": """ Sets the value of :py:attr:`featuresCol`. """ return self._set(featuresCol=value) - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "UnivariateFeatureSelector": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - def setLabelCol(self, value): + def setLabelCol(self, value: str) -> "UnivariateFeatureSelector": """ Sets the value of :py:attr:`labelCol`. """ return self._set(labelCol=value) - def _create_model(self, java_model): + def _create_model(self, java_model: "JavaObject") -> "UnivariateFeatureSelectorModel": return UnivariateFeatureSelectorModel(java_model) class UnivariateFeatureSelectorModel( - JavaModel, _UnivariateFeatureSelectorParams, JavaMLReadable, JavaMLWritable + JavaModel, + _UnivariateFeatureSelectorParams, + JavaMLReadable["UnivariateFeatureSelectorModel"], + JavaMLWritable, ): """ Model fitted by :py:class:`UnivariateFeatureSelector`. @@ -6577,22 +7406,22 @@ class UnivariateFeatureSelectorModel( """ @since("3.1.1") - def setFeaturesCol(self, value): + def setFeaturesCol(self, value: str) -> "UnivariateFeatureSelectorModel": """ Sets the value of :py:attr:`featuresCol`. """ return self._set(featuresCol=value) @since("3.1.1") - def setOutputCol(self, value): + def setOutputCol(self, value: str) -> "UnivariateFeatureSelectorModel": """ Sets the value of :py:attr:`outputCol`. """ return self._set(outputCol=value) - @property + @property # type: ignore[misc] @since("3.1.1") - def selectedFeatures(self): + def selectedFeatures(self) -> List[int]: """ List of indices to select (filter). """ diff --git a/python/pyspark/ml/feature.pyi b/python/pyspark/ml/feature.pyi deleted file mode 100644 index 6545bcd1c516a..0000000000000 --- a/python/pyspark/ml/feature.pyi +++ /dev/null @@ -1,1586 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import overload -from typing import Any, Dict, Generic, List, Optional, Tuple -from pyspark.ml._typing import JM, P - -from pyspark.ml.param.shared import ( - HasFeaturesCol, - HasHandleInvalid, - HasInputCol, - HasInputCols, - HasLabelCol, - HasMaxIter, - HasNumFeatures, - HasOutputCol, - HasOutputCols, - HasRelativeError, - HasSeed, - HasStepSize, - HasThreshold, - HasThresholds, -) -from pyspark.ml.util import JavaMLReadable, JavaMLWritable -from pyspark.ml.wrapper import JavaEstimator, JavaModel, JavaParams, JavaTransformer -from pyspark.ml.linalg import Vector, DenseVector, DenseMatrix -from pyspark.sql.dataframe import DataFrame -from pyspark.ml.param import Param - -from py4j.java_gateway import JavaObject - -class Binarizer( - JavaTransformer, - HasThreshold, - HasThresholds, - HasInputCol, - HasOutputCol, - HasInputCols, - HasOutputCols, - JavaMLReadable[Binarizer], - JavaMLWritable, -): - threshold: Param[float] - thresholds: Param[List[float]] - @overload - def __init__( - self, - *, - threshold: float = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> None: ... - @overload - def __init__( - self, - *, - thresholds: Optional[List[float]] = ..., - inputCols: Optional[List[str]] = ..., - outputCols: Optional[List[str]] = ..., - ) -> None: ... - @overload - def setParams( - self, - *, - threshold: float = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> Binarizer: ... - @overload - def setParams( - self, - *, - thresholds: Optional[List[float]] = ..., - inputCols: Optional[List[str]] = ..., - outputCols: Optional[List[str]] = ..., - ) -> Binarizer: ... - def setThreshold(self, value: float) -> Binarizer: ... - def setThresholds(self, value: List[float]) -> Binarizer: ... - def setInputCol(self, value: str) -> Binarizer: ... - def setInputCols(self, value: List[str]) -> Binarizer: ... - def setOutputCol(self, value: str) -> Binarizer: ... - def setOutputCols(self, value: List[str]) -> Binarizer: ... - -class _LSHParams(HasInputCol, HasOutputCol): - numHashTables: Param[int] - def __init__(self, *args: Any): ... - def getNumHashTables(self) -> int: ... - -class _LSH(Generic[JM], JavaEstimator[JM], _LSHParams, JavaMLReadable, JavaMLWritable): - def setNumHashTables(self: P, value: int) -> P: ... - def setInputCol(self: P, value: str) -> P: ... - def setOutputCol(self: P, value: str) -> P: ... - def _create_model(self, java_model: JavaObject) -> JM: ... - -class _LSHModel(JavaModel, _LSHParams): - def setInputCol(self: P, value: str) -> P: ... - def setOutputCol(self: P, value: str) -> P: ... - def approxNearestNeighbors( - self, - dataset: DataFrame, - key: Vector, - numNearestNeighbors: int, - distCol: str = ..., - ) -> DataFrame: ... - def approxSimilarityJoin( - self, - datasetA: DataFrame, - datasetB: DataFrame, - threshold: float, - distCol: str = ..., - ) -> DataFrame: ... - -class _BucketedRandomProjectionLSHParams: - bucketLength: Param[float] - def getBucketLength(self) -> float: ... - -class BucketedRandomProjectionLSH( - _LSH[BucketedRandomProjectionLSHModel], - _LSHParams, - HasSeed, - JavaMLReadable[BucketedRandomProjectionLSH], - JavaMLWritable, -): - def __init__( - self, - *, - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - seed: Optional[int] = ..., - numHashTables: int = ..., - bucketLength: Optional[float] = ..., - ) -> None: ... - def setParams( - self, - *, - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - seed: Optional[int] = ..., - numHashTables: int = ..., - bucketLength: Optional[float] = ..., - ) -> BucketedRandomProjectionLSH: ... - def setBucketLength(self, value: float) -> BucketedRandomProjectionLSH: ... - def setSeed(self, value: int) -> BucketedRandomProjectionLSH: ... - -class BucketedRandomProjectionLSHModel( - _LSHModel, - _BucketedRandomProjectionLSHParams, - JavaMLReadable[BucketedRandomProjectionLSHModel], - JavaMLWritable, -): ... - -class Bucketizer( - JavaTransformer, - HasInputCol, - HasOutputCol, - HasInputCols, - HasOutputCols, - HasHandleInvalid, - JavaMLReadable[Bucketizer], - JavaMLWritable, -): - splits: Param[List[float]] - handleInvalid: Param[str] - splitsArray: Param[List[List[float]]] - @overload - def __init__( - self, - *, - splits: Optional[List[float]] = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - handleInvalid: str = ..., - ) -> None: ... - @overload - def __init__( - self, - *, - handleInvalid: str = ..., - splitsArray: Optional[List[List[float]]] = ..., - inputCols: Optional[List[str]] = ..., - outputCols: Optional[List[str]] = ..., - ) -> None: ... - @overload - def setParams( - self, - *, - splits: Optional[List[float]] = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - handleInvalid: str = ..., - ) -> Bucketizer: ... - @overload - def setParams( - self, - *, - handleInvalid: str = ..., - splitsArray: Optional[List[List[float]]] = ..., - inputCols: Optional[List[str]] = ..., - outputCols: Optional[List[str]] = ..., - ) -> Bucketizer: ... - def setSplits(self, value: List[float]) -> Bucketizer: ... - def getSplits(self) -> List[float]: ... - def setSplitsArray(self, value: List[List[float]]) -> Bucketizer: ... - def getSplitsArray(self) -> List[List[float]]: ... - def setInputCol(self, value: str) -> Bucketizer: ... - def setInputCols(self, value: List[str]) -> Bucketizer: ... - def setOutputCol(self, value: str) -> Bucketizer: ... - def setOutputCols(self, value: List[str]) -> Bucketizer: ... - def setHandleInvalid(self, value: str) -> Bucketizer: ... - -class _CountVectorizerParams(JavaParams, HasInputCol, HasOutputCol): - minTF: Param[float] - minDF: Param[float] - maxDF: Param[float] - vocabSize: Param[int] - binary: Param[bool] - def __init__(self, *args: Any) -> None: ... - def getMinTF(self) -> float: ... - def getMinDF(self) -> float: ... - def getMaxDF(self) -> float: ... - def getVocabSize(self) -> int: ... - def getBinary(self) -> bool: ... - -class CountVectorizer( - JavaEstimator[CountVectorizerModel], - _CountVectorizerParams, - JavaMLReadable[CountVectorizer], - JavaMLWritable, -): - def __init__( - self, - *, - minTF: float = ..., - minDF: float = ..., - maxDF: float = ..., - vocabSize: int = ..., - binary: bool = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> None: ... - def setParams( - self, - *, - minTF: float = ..., - minDF: float = ..., - maxDF: float = ..., - vocabSize: int = ..., - binary: bool = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> CountVectorizer: ... - def setMinTF(self, value: float) -> CountVectorizer: ... - def setMinDF(self, value: float) -> CountVectorizer: ... - def setMaxDF(self, value: float) -> CountVectorizer: ... - def setVocabSize(self, value: int) -> CountVectorizer: ... - def setBinary(self, value: bool) -> CountVectorizer: ... - def setInputCol(self, value: str) -> CountVectorizer: ... - def setOutputCol(self, value: str) -> CountVectorizer: ... - def _create_model(self, java_model: JavaObject) -> CountVectorizerModel: ... - -class CountVectorizerModel(JavaModel, JavaMLReadable[CountVectorizerModel], JavaMLWritable): - def setInputCol(self, value: str) -> CountVectorizerModel: ... - def setOutputCol(self, value: str) -> CountVectorizerModel: ... - def setMinTF(self, value: float) -> CountVectorizerModel: ... - def setBinary(self, value: bool) -> CountVectorizerModel: ... - @classmethod - def from_vocabulary( - cls, - vocabulary: List[str], - inputCol: str, - outputCol: Optional[str] = ..., - minTF: Optional[float] = ..., - binary: Optional[bool] = ..., - ) -> CountVectorizerModel: ... - @property - def vocabulary(self) -> List[str]: ... - -class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable[DCT], JavaMLWritable): - inverse: Param[bool] - def __init__( - self, *, inverse: bool = ..., inputCol: Optional[str] = ..., outputCol: Optional[str] = ... - ) -> None: ... - def setParams( - self, *, inverse: bool = ..., inputCol: Optional[str] = ..., outputCol: Optional[str] = ... - ) -> DCT: ... - def setInverse(self, value: bool) -> DCT: ... - def getInverse(self) -> bool: ... - def setInputCol(self, value: str) -> DCT: ... - def setOutputCol(self, value: str) -> DCT: ... - -class ElementwiseProduct( - JavaTransformer, - HasInputCol, - HasOutputCol, - JavaMLReadable[ElementwiseProduct], - JavaMLWritable, -): - scalingVec: Param[Vector] - def __init__( - self, - *, - scalingVec: Optional[Vector] = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> None: ... - def setParams( - self, - *, - scalingVec: Optional[Vector] = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> ElementwiseProduct: ... - def setScalingVec(self, value: Vector) -> ElementwiseProduct: ... - def getScalingVec(self) -> Vector: ... - def setInputCol(self, value: str) -> ElementwiseProduct: ... - def setOutputCol(self, value: str) -> ElementwiseProduct: ... - -class FeatureHasher( - JavaTransformer, - HasInputCols, - HasOutputCol, - HasNumFeatures, - JavaMLReadable[FeatureHasher], - JavaMLWritable, -): - categoricalCols: Param[List[str]] - def __init__( - self, - *, - numFeatures: int = ..., - inputCols: Optional[List[str]] = ..., - outputCol: Optional[str] = ..., - categoricalCols: Optional[List[str]] = ..., - ) -> None: ... - def setParams( - self, - *, - numFeatures: int = ..., - inputCols: Optional[List[str]] = ..., - outputCol: Optional[str] = ..., - categoricalCols: Optional[List[str]] = ..., - ) -> FeatureHasher: ... - def setCategoricalCols(self, value: List[str]) -> FeatureHasher: ... - def getCategoricalCols(self) -> List[str]: ... - def setInputCols(self, value: List[str]) -> FeatureHasher: ... - def setOutputCol(self, value: str) -> FeatureHasher: ... - def setNumFeatures(self, value: int) -> FeatureHasher: ... - -class HashingTF( - JavaTransformer, - HasInputCol, - HasOutputCol, - HasNumFeatures, - JavaMLReadable[HashingTF], - JavaMLWritable, -): - binary: Param[bool] - def __init__( - self, - *, - numFeatures: int = ..., - binary: bool = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> None: ... - def setParams( - self, - *, - numFeatures: int = ..., - binary: bool = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> HashingTF: ... - def setBinary(self, value: bool) -> HashingTF: ... - def getBinary(self) -> bool: ... - def setInputCol(self, value: str) -> HashingTF: ... - def setOutputCol(self, value: str) -> HashingTF: ... - def setNumFeatures(self, value: int) -> HashingTF: ... - def indexOf(self, term: Any) -> int: ... - -class _IDFParams(HasInputCol, HasOutputCol): - minDocFreq: Param[int] - def __init__(self, *args: Any): ... - def getMinDocFreq(self) -> int: ... - -class IDF(JavaEstimator[IDFModel], _IDFParams, JavaMLReadable[IDF], JavaMLWritable): - def __init__( - self, - *, - minDocFreq: int = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> None: ... - def setParams( - self, - *, - minDocFreq: int = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> IDF: ... - def setMinDocFreq(self, value: int) -> IDF: ... - def setInputCol(self, value: str) -> IDF: ... - def setOutputCol(self, value: str) -> IDF: ... - def _create_model(self, java_model: JavaObject) -> IDFModel: ... - -class IDFModel(JavaModel, _IDFParams, JavaMLReadable[IDFModel], JavaMLWritable): - def setInputCol(self, value: str) -> IDFModel: ... - def setOutputCol(self, value: str) -> IDFModel: ... - @property - def idf(self) -> Vector: ... - @property - def docFreq(self) -> List[int]: ... - @property - def numDocs(self) -> int: ... - -class _ImputerParams(HasInputCol, HasInputCols, HasOutputCol, HasOutputCols, HasRelativeError): - strategy: Param[str] - missingValue: Param[float] - def getStrategy(self) -> str: ... - def getMissingValue(self) -> float: ... - -class Imputer(JavaEstimator[ImputerModel], _ImputerParams, JavaMLReadable[Imputer], JavaMLWritable): - @overload - def __init__( - self, - *, - strategy: str = ..., - missingValue: float = ..., - inputCols: Optional[List[str]] = ..., - outputCols: Optional[List[str]] = ..., - relativeError: float = ..., - ) -> None: ... - @overload - def __init__( - self, - *, - strategy: str = ..., - missingValue: float = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - relativeError: float = ..., - ) -> None: ... - @overload - def setParams( - self, - *, - strategy: str = ..., - missingValue: float = ..., - inputCols: Optional[List[str]] = ..., - outputCols: Optional[List[str]] = ..., - relativeError: float = ..., - ) -> Imputer: ... - @overload - def setParams( - self, - *, - strategy: str = ..., - missingValue: float = ..., - inputCol: Optional[str] = ..., - outputCols: Optional[str] = ..., - relativeError: float = ..., - ) -> Imputer: ... - def setStrategy(self, value: str) -> Imputer: ... - def setMissingValue(self, value: float) -> Imputer: ... - def setInputCols(self, value: List[str]) -> Imputer: ... - def setOutputCols(self, value: List[str]) -> Imputer: ... - def setInputCol(self, value: str) -> Imputer: ... - def setOutputCol(self, value: str) -> Imputer: ... - def setRelativeError(self, value: float) -> Imputer: ... - def _create_model(self, java_model: JavaObject) -> ImputerModel: ... - -class ImputerModel(JavaModel, _ImputerParams, JavaMLReadable[ImputerModel], JavaMLWritable): - def setInputCols(self, value: List[str]) -> ImputerModel: ... - def setOutputCols(self, value: List[str]) -> ImputerModel: ... - def setInputCol(self, value: str) -> ImputerModel: ... - def setOutputCol(self, value: str) -> ImputerModel: ... - @property - def surrogateDF(self) -> DataFrame: ... - -class Interaction( - JavaTransformer, - HasInputCols, - HasOutputCol, - JavaMLReadable[Interaction], - JavaMLWritable, -): - def __init__( - self, *, inputCols: Optional[List[str]] = ..., outputCol: Optional[str] = ... - ) -> None: ... - def setParams( - self, *, inputCols: Optional[List[str]] = ..., outputCol: Optional[str] = ... - ) -> Interaction: ... - def setInputCols(self, value: List[str]) -> Interaction: ... - def setOutputCol(self, value: str) -> Interaction: ... - -class _MaxAbsScalerParams(HasInputCol, HasOutputCol): ... - -class MaxAbsScaler( - JavaEstimator[MaxAbsScalerModel], - _MaxAbsScalerParams, - JavaMLReadable[MaxAbsScaler], - JavaMLWritable, -): - def __init__( - self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ... - ) -> None: ... - def setParams( - self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ... - ) -> MaxAbsScaler: ... - def setInputCol(self, value: str) -> MaxAbsScaler: ... - def setOutputCol(self, value: str) -> MaxAbsScaler: ... - def _create_model(self, java_model: JavaObject) -> MaxAbsScalerModel: ... - -class MaxAbsScalerModel( - JavaModel, _MaxAbsScalerParams, JavaMLReadable[MaxAbsScalerModel], JavaMLWritable -): - def setInputCol(self, value: str) -> MaxAbsScalerModel: ... - def setOutputCol(self, value: str) -> MaxAbsScalerModel: ... - @property - def maxAbs(self) -> Vector: ... - -class MinHashLSH( - _LSH[MinHashLSHModel], - HasInputCol, - HasOutputCol, - HasSeed, - JavaMLReadable[MinHashLSH], - JavaMLWritable, -): - def __init__( - self, - *, - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - seed: Optional[int] = ..., - numHashTables: int = ..., - ) -> None: ... - def setParams( - self, - *, - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - seed: Optional[int] = ..., - numHashTables: int = ..., - ) -> MinHashLSH: ... - def setSeed(self, value: int) -> MinHashLSH: ... - -class MinHashLSHModel(_LSHModel, JavaMLReadable[MinHashLSHModel], JavaMLWritable): ... - -class _MinMaxScalerParams(HasInputCol, HasOutputCol): - min: Param[float] - max: Param[float] - def __init__(self, *args: Any): ... - def getMin(self) -> float: ... - def getMax(self) -> float: ... - -class MinMaxScaler( - JavaEstimator[MinMaxScalerModel], - _MinMaxScalerParams, - JavaMLReadable[MinMaxScaler], - JavaMLWritable, -): - def __init__( - self, - *, - min: float = ..., - max: float = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> None: ... - def setParams( - self, - *, - min: float = ..., - max: float = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> MinMaxScaler: ... - def setMin(self, value: float) -> MinMaxScaler: ... - def setMax(self, value: float) -> MinMaxScaler: ... - def setInputCol(self, value: str) -> MinMaxScaler: ... - def setOutputCol(self, value: str) -> MinMaxScaler: ... - def _create_model(self, java_model: JavaObject) -> MinMaxScalerModel: ... - -class MinMaxScalerModel( - JavaModel, _MinMaxScalerParams, JavaMLReadable[MinMaxScalerModel], JavaMLWritable -): - def setInputCol(self, value: str) -> MinMaxScalerModel: ... - def setOutputCol(self, value: str) -> MinMaxScalerModel: ... - def setMin(self, value: float) -> MinMaxScalerModel: ... - def setMax(self, value: float) -> MinMaxScalerModel: ... - @property - def originalMin(self) -> Vector: ... - @property - def originalMax(self) -> Vector: ... - -class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable[NGram], JavaMLWritable): - n: Param[int] - def __init__( - self, *, n: int = ..., inputCol: Optional[str] = ..., outputCol: Optional[str] = ... - ) -> None: ... - def setParams( - self, *, n: int = ..., inputCol: Optional[str] = ..., outputCol: Optional[str] = ... - ) -> NGram: ... - def setN(self, value: int) -> NGram: ... - def getN(self) -> int: ... - def setInputCol(self, value: str) -> NGram: ... - def setOutputCol(self, value: str) -> NGram: ... - -class Normalizer( - JavaTransformer, - HasInputCol, - HasOutputCol, - JavaMLReadable[Normalizer], - JavaMLWritable, -): - p: Param[float] - def __init__( - self, *, p: float = ..., inputCol: Optional[str] = ..., outputCol: Optional[str] = ... - ) -> None: ... - def setParams( - self, *, p: float = ..., inputCol: Optional[str] = ..., outputCol: Optional[str] = ... - ) -> Normalizer: ... - def setP(self, value: float) -> Normalizer: ... - def getP(self) -> float: ... - def setInputCol(self, value: str) -> Normalizer: ... - def setOutputCol(self, value: str) -> Normalizer: ... - -class _OneHotEncoderParams(HasInputCols, HasOutputCols, HasHandleInvalid): - handleInvalid: Param[str] - dropLast: Param[bool] - def __init__(self, *args: Any): ... - def getDropLast(self) -> bool: ... - -class OneHotEncoder( - JavaEstimator[OneHotEncoderModel], - _OneHotEncoderParams, - JavaMLReadable[OneHotEncoder], - JavaMLWritable, -): - @overload - def __init__( - self, - *, - inputCols: Optional[List[str]] = ..., - outputCols: Optional[List[str]] = ..., - handleInvalid: str = ..., - dropLast: bool = ..., - ) -> None: ... - @overload - def __init__( - self, - *, - handleInvalid: str = ..., - dropLast: bool = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> None: ... - @overload - def setParams( - self, - *, - inputCols: Optional[List[str]] = ..., - outputCols: Optional[List[str]] = ..., - handleInvalid: str = ..., - dropLast: bool = ..., - ) -> OneHotEncoder: ... - @overload - def setParams( - self, - *, - handleInvalid: str = ..., - dropLast: bool = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> OneHotEncoder: ... - def setDropLast(self, value: bool) -> OneHotEncoder: ... - def setInputCols(self, value: List[str]) -> OneHotEncoder: ... - def setOutputCols(self, value: List[str]) -> OneHotEncoder: ... - def setHandleInvalid(self, value: str) -> OneHotEncoder: ... - def setInputCol(self, value: str) -> OneHotEncoder: ... - def setOutputCol(self, value: str) -> OneHotEncoder: ... - def _create_model(self, java_model: JavaObject) -> OneHotEncoderModel: ... - -class OneHotEncoderModel( - JavaModel, _OneHotEncoderParams, JavaMLReadable[OneHotEncoderModel], JavaMLWritable -): - def setDropLast(self, value: bool) -> OneHotEncoderModel: ... - def setInputCols(self, value: List[str]) -> OneHotEncoderModel: ... - def setOutputCols(self, value: List[str]) -> OneHotEncoderModel: ... - def setInputCol(self, value: str) -> OneHotEncoderModel: ... - def setOutputCol(self, value: str) -> OneHotEncoderModel: ... - def setHandleInvalid(self, value: str) -> OneHotEncoderModel: ... - @property - def categorySizes(self) -> List[int]: ... - -class PolynomialExpansion( - JavaTransformer, - HasInputCol, - HasOutputCol, - JavaMLReadable[PolynomialExpansion], - JavaMLWritable, -): - degree: Param[int] - def __init__( - self, *, degree: int = ..., inputCol: Optional[str] = ..., outputCol: Optional[str] = ... - ) -> None: ... - def setParams( - self, *, degree: int = ..., inputCol: Optional[str] = ..., outputCol: Optional[str] = ... - ) -> PolynomialExpansion: ... - def setDegree(self, value: int) -> PolynomialExpansion: ... - def getDegree(self) -> int: ... - def setInputCol(self, value: str) -> PolynomialExpansion: ... - def setOutputCol(self, value: str) -> PolynomialExpansion: ... - -class QuantileDiscretizer( - JavaEstimator[Bucketizer], - HasInputCol, - HasOutputCol, - HasInputCols, - HasOutputCols, - HasHandleInvalid, - HasRelativeError, - JavaMLReadable[QuantileDiscretizer], - JavaMLWritable, -): - numBuckets: Param[int] - handleInvalid: Param[str] - numBucketsArray: Param[List[int]] - @overload - def __init__( - self, - *, - numBuckets: int = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - relativeError: float = ..., - handleInvalid: str = ..., - ) -> None: ... - @overload - def __init__( - self, - *, - relativeError: float = ..., - handleInvalid: str = ..., - numBucketsArray: Optional[List[int]] = ..., - inputCols: Optional[List[str]] = ..., - outputCols: Optional[List[str]] = ..., - ) -> None: ... - @overload - def setParams( - self, - *, - numBuckets: int = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - relativeError: float = ..., - handleInvalid: str = ..., - ) -> QuantileDiscretizer: ... - @overload - def setParams( - self, - *, - relativeError: float = ..., - handleInvalid: str = ..., - numBucketsArray: Optional[List[int]] = ..., - inputCols: Optional[List[str]] = ..., - outputCols: Optional[List[str]] = ..., - ) -> QuantileDiscretizer: ... - def setNumBuckets(self, value: int) -> QuantileDiscretizer: ... - def getNumBuckets(self) -> int: ... - def setNumBucketsArray(self, value: List[int]) -> QuantileDiscretizer: ... - def getNumBucketsArray(self) -> List[int]: ... - def setRelativeError(self, value: float) -> QuantileDiscretizer: ... - def setInputCol(self, value: str) -> QuantileDiscretizer: ... - def setInputCols(self, value: List[str]) -> QuantileDiscretizer: ... - def setOutputCol(self, value: str) -> QuantileDiscretizer: ... - def setOutputCols(self, value: List[str]) -> QuantileDiscretizer: ... - def setHandleInvalid(self, value: str) -> QuantileDiscretizer: ... - def _create_model(self, java_model: JavaObject) -> Bucketizer: ... - -class _RobustScalerParams(HasInputCol, HasOutputCol, HasRelativeError): - lower: Param[float] - upper: Param[float] - withCentering: Param[bool] - withScaling: Param[bool] - def __init__(self, *args: Any): ... - def getLower(self) -> float: ... - def getUpper(self) -> float: ... - def getWithCentering(self) -> bool: ... - def getWithScaling(self) -> bool: ... - -class RobustScaler( - JavaEstimator, _RobustScalerParams, JavaMLReadable[RobustScaler], JavaMLWritable -): - def __init__( - self, - *, - lower: float = ..., - upper: float = ..., - withCentering: bool = ..., - withScaling: bool = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - relativeError: float = ..., - ) -> None: ... - def setParams( - self, - *, - lower: float = ..., - upper: float = ..., - withCentering: bool = ..., - withScaling: bool = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - relativeError: float = ..., - ) -> RobustScaler: ... - def setLower(self, value: float) -> RobustScaler: ... - def setUpper(self, value: float) -> RobustScaler: ... - def setWithCentering(self, value: bool) -> RobustScaler: ... - def setWithScaling(self, value: bool) -> RobustScaler: ... - def setInputCol(self, value: str) -> RobustScaler: ... - def setOutputCol(self, value: str) -> RobustScaler: ... - def setRelativeError(self, value: float) -> RobustScaler: ... - def _create_model(self, java_model: JavaObject) -> RobustScalerModel: ... - -class RobustScalerModel( - JavaModel, _RobustScalerParams, JavaMLReadable[RobustScalerModel], JavaMLWritable -): - def setInputCol(self, value: str) -> RobustScalerModel: ... - def setOutputCol(self, value: str) -> RobustScalerModel: ... - @property - def median(self) -> Vector: ... - @property - def range(self) -> Vector: ... - -class RegexTokenizer( - JavaTransformer, - HasInputCol, - HasOutputCol, - JavaMLReadable[RegexTokenizer], - JavaMLWritable, -): - minTokenLength: Param[int] - gaps: Param[bool] - pattern: Param[str] - toLowercase: Param[bool] - def __init__( - self, - *, - minTokenLength: int = ..., - gaps: bool = ..., - pattern: str = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - toLowercase: bool = ..., - ) -> None: ... - def setParams( - self, - *, - minTokenLength: int = ..., - gaps: bool = ..., - pattern: str = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - toLowercase: bool = ..., - ) -> RegexTokenizer: ... - def setMinTokenLength(self, value: int) -> RegexTokenizer: ... - def getMinTokenLength(self) -> int: ... - def setGaps(self, value: bool) -> RegexTokenizer: ... - def getGaps(self) -> bool: ... - def setPattern(self, value: str) -> RegexTokenizer: ... - def getPattern(self) -> str: ... - def setToLowercase(self, value: bool) -> RegexTokenizer: ... - def getToLowercase(self) -> bool: ... - def setInputCol(self, value: str) -> RegexTokenizer: ... - def setOutputCol(self, value: str) -> RegexTokenizer: ... - -class SQLTransformer(JavaTransformer, JavaMLReadable[SQLTransformer], JavaMLWritable): - statement: Param[str] - def __init__(self, *, statement: Optional[str] = ...) -> None: ... - def setParams(self, *, statement: Optional[str] = ...) -> SQLTransformer: ... - def setStatement(self, value: str) -> SQLTransformer: ... - def getStatement(self) -> str: ... - -class _StandardScalerParams(HasInputCol, HasOutputCol): - withMean: Param[bool] - withStd: Param[bool] - def __init__(self, *args: Any): ... - def getWithMean(self) -> bool: ... - def getWithStd(self) -> bool: ... - -class StandardScaler( - JavaEstimator[StandardScalerModel], - _StandardScalerParams, - JavaMLReadable[StandardScaler], - JavaMLWritable, -): - def __init__( - self, - *, - withMean: bool = ..., - withStd: bool = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> None: ... - def setParams( - self, - *, - withMean: bool = ..., - withStd: bool = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> StandardScaler: ... - def setWithMean(self, value: bool) -> StandardScaler: ... - def setWithStd(self, value: bool) -> StandardScaler: ... - def setInputCol(self, value: str) -> StandardScaler: ... - def setOutputCol(self, value: str) -> StandardScaler: ... - def _create_model(self, java_model: JavaObject) -> StandardScalerModel: ... - -class StandardScalerModel( - JavaModel, - _StandardScalerParams, - JavaMLReadable[StandardScalerModel], - JavaMLWritable, -): - def setInputCol(self, value: str) -> StandardScalerModel: ... - def setOutputCol(self, value: str) -> StandardScalerModel: ... - @property - def std(self) -> Vector: ... - @property - def mean(self) -> Vector: ... - -class _StringIndexerParams( - JavaParams, HasHandleInvalid, HasInputCol, HasOutputCol, HasInputCols, HasOutputCols -): - stringOrderType: Param[str] - handleInvalid: Param[str] - def __init__(self, *args: Any) -> None: ... - def getStringOrderType(self) -> str: ... - -class StringIndexer( - JavaEstimator[StringIndexerModel], - _StringIndexerParams, - JavaMLReadable[StringIndexer], - JavaMLWritable, -): - @overload - def __init__( - self, - *, - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - handleInvalid: str = ..., - stringOrderType: str = ..., - ) -> None: ... - @overload - def __init__( - self, - *, - inputCols: Optional[List[str]] = ..., - outputCols: Optional[List[str]] = ..., - handleInvalid: str = ..., - stringOrderType: str = ..., - ) -> None: ... - @overload - def setParams( - self, - *, - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - handleInvalid: str = ..., - stringOrderType: str = ..., - ) -> StringIndexer: ... - @overload - def setParams( - self, - *, - inputCols: Optional[List[str]] = ..., - outputCols: Optional[List[str]] = ..., - handleInvalid: str = ..., - stringOrderType: str = ..., - ) -> StringIndexer: ... - def setStringOrderType(self, value: str) -> StringIndexer: ... - def setInputCol(self, value: str) -> StringIndexer: ... - def setInputCols(self, value: List[str]) -> StringIndexer: ... - def setOutputCol(self, value: str) -> StringIndexer: ... - def setOutputCols(self, value: List[str]) -> StringIndexer: ... - def setHandleInvalid(self, value: str) -> StringIndexer: ... - def _create_model(self, java_model: JavaObject) -> StringIndexerModel: ... - -class StringIndexerModel( - JavaModel, _StringIndexerParams, JavaMLReadable[StringIndexerModel], JavaMLWritable -): - def setInputCol(self, value: str) -> StringIndexerModel: ... - def setInputCols(self, value: List[str]) -> StringIndexerModel: ... - def setOutputCol(self, value: str) -> StringIndexerModel: ... - def setOutputCols(self, value: List[str]) -> StringIndexerModel: ... - def setHandleInvalid(self, value: str) -> StringIndexerModel: ... - @classmethod - def from_labels( - cls, - labels: List[str], - inputCol: str, - outputCol: Optional[str] = ..., - handleInvalid: Optional[str] = ..., - ) -> StringIndexerModel: ... - @classmethod - def from_arrays_of_labels( - cls, - arrayOfLabels: List[List[str]], - inputCols: List[str], - outputCols: Optional[List[str]] = ..., - handleInvalid: Optional[str] = ..., - ) -> StringIndexerModel: ... - @property - def labels(self) -> List[str]: ... - -class IndexToString( - JavaTransformer, - HasInputCol, - HasOutputCol, - JavaMLReadable[IndexToString], - JavaMLWritable, -): - labels: Param[List[str]] - def __init__( - self, - *, - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - labels: Optional[List[str]] = ..., - ) -> None: ... - def setParams( - self, - *, - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - labels: Optional[List[str]] = ..., - ) -> IndexToString: ... - def setLabels(self, value: List[str]) -> IndexToString: ... - def getLabels(self) -> List[str]: ... - def setInputCol(self, value: str) -> IndexToString: ... - def setOutputCol(self, value: str) -> IndexToString: ... - -class StopWordsRemover( - JavaTransformer, - HasInputCol, - HasOutputCol, - HasInputCols, - HasOutputCols, - JavaMLReadable[StopWordsRemover], - JavaMLWritable, -): - stopWords: Param[List[str]] - caseSensitive: Param[bool] - locale: Param[str] - @overload - def __init__( - self, - *, - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - stopWords: Optional[List[str]] = ..., - caseSensitive: bool = ..., - locale: Optional[str] = ..., - ) -> None: ... - @overload - def __init__( - self, - *, - stopWords: Optional[List[str]] = ..., - caseSensitive: bool = ..., - locale: Optional[str] = ..., - inputCols: Optional[List[str]] = ..., - outputCols: Optional[List[str]] = ..., - ) -> None: ... - @overload - def setParams( - self, - *, - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - stopWords: Optional[List[str]] = ..., - caseSensitive: bool = ..., - locale: Optional[str] = ..., - ) -> StopWordsRemover: ... - @overload - def setParams( - self, - *, - stopWords: Optional[List[str]] = ..., - caseSensitive: bool = ..., - locale: Optional[str] = ..., - inputCols: Optional[List[str]] = ..., - outputCols: Optional[List[str]] = ..., - ) -> StopWordsRemover: ... - def setStopWords(self, value: List[str]) -> StopWordsRemover: ... - def getStopWords(self) -> List[str]: ... - def setCaseSensitive(self, value: bool) -> StopWordsRemover: ... - def getCaseSensitive(self) -> bool: ... - def setLocale(self, value: str) -> StopWordsRemover: ... - def getLocale(self) -> str: ... - def setInputCol(self, value: str) -> StopWordsRemover: ... - def setOutputCol(self, value: str) -> StopWordsRemover: ... - def setInputCols(self, value: List[str]) -> StopWordsRemover: ... - def setOutputCols(self, value: List[str]) -> StopWordsRemover: ... - @staticmethod - def loadDefaultStopWords(language: str) -> List[str]: ... - -class Tokenizer( - JavaTransformer, - HasInputCol, - HasOutputCol, - JavaMLReadable[Tokenizer], - JavaMLWritable, -): - def __init__( - self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ... - ) -> None: ... - def setParams( - self, *, inputCol: Optional[str] = ..., outputCol: Optional[str] = ... - ) -> Tokenizer: ... - def setInputCol(self, value: str) -> Tokenizer: ... - def setOutputCol(self, value: str) -> Tokenizer: ... - -class VectorAssembler( - JavaTransformer, - HasInputCols, - HasOutputCol, - HasHandleInvalid, - JavaMLReadable[VectorAssembler], - JavaMLWritable, -): - handleInvalid: Param[str] - def __init__( - self, - *, - inputCols: Optional[List[str]] = ..., - outputCol: Optional[str] = ..., - handleInvalid: str = ..., - ) -> None: ... - def setParams( - self, - *, - inputCols: Optional[List[str]] = ..., - outputCol: Optional[str] = ..., - handleInvalid: str = ..., - ) -> VectorAssembler: ... - def setInputCols(self, value: List[str]) -> VectorAssembler: ... - def setOutputCol(self, value: str) -> VectorAssembler: ... - def setHandleInvalid(self, value: str) -> VectorAssembler: ... - -class _VectorIndexerParams(HasInputCol, HasOutputCol, HasHandleInvalid): - maxCategories: Param[int] - handleInvalid: Param[str] - def __init__(self, *args: Any): ... - def getMaxCategories(self) -> int: ... - -class VectorIndexer( - JavaEstimator[VectorIndexerModel], - _VectorIndexerParams, - HasHandleInvalid, - JavaMLReadable[VectorIndexer], - JavaMLWritable, -): - def __init__( - self, - *, - maxCategories: int = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - handleInvalid: str = ..., - ) -> None: ... - def setParams( - self, - *, - maxCategories: int = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - handleInvalid: str = ..., - ) -> VectorIndexer: ... - def setMaxCategories(self, value: int) -> VectorIndexer: ... - def setInputCol(self, value: str) -> VectorIndexer: ... - def setOutputCol(self, value: str) -> VectorIndexer: ... - def setHandleInvalid(self, value: str) -> VectorIndexer: ... - def _create_model(self, java_model: JavaObject) -> VectorIndexerModel: ... - -class VectorIndexerModel( - JavaModel, _VectorIndexerParams, JavaMLReadable[VectorIndexerModel], JavaMLWritable -): - def setInputCol(self, value: str) -> VectorIndexerModel: ... - def setOutputCol(self, value: str) -> VectorIndexerModel: ... - @property - def numFeatures(self) -> int: ... - @property - def categoryMaps(self) -> Dict[int, Tuple[float, int]]: ... - -class VectorSlicer( - JavaTransformer, - HasInputCol, - HasOutputCol, - JavaMLReadable[VectorSlicer], - JavaMLWritable, -): - indices: Param[List[int]] - names: Param[List[str]] - def __init__( - self, - *, - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - indices: Optional[List[int]] = ..., - names: Optional[List[str]] = ..., - ) -> None: ... - def setParams( - self, - *, - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - indices: Optional[List[int]] = ..., - names: Optional[List[str]] = ..., - ) -> VectorSlicer: ... - def setIndices(self, value: List[int]) -> VectorSlicer: ... - def getIndices(self) -> List[int]: ... - def setNames(self, value: List[str]) -> VectorSlicer: ... - def getNames(self) -> List[str]: ... - def setInputCol(self, value: str) -> VectorSlicer: ... - def setOutputCol(self, value: str) -> VectorSlicer: ... - -class _Word2VecParams(HasStepSize, HasMaxIter, HasSeed, HasInputCol, HasOutputCol): - vectorSize: Param[int] - numPartitions: Param[int] - minCount: Param[int] - windowSize: Param[int] - maxSentenceLength: Param[int] - def __init__(self, *args: Any): ... - def getVectorSize(self) -> int: ... - def getNumPartitions(self) -> int: ... - def getMinCount(self) -> int: ... - def getWindowSize(self) -> int: ... - def getMaxSentenceLength(self) -> int: ... - -class Word2Vec( - JavaEstimator[Word2VecModel], - _Word2VecParams, - JavaMLReadable[Word2Vec], - JavaMLWritable, -): - def __init__( - self, - *, - vectorSize: int = ..., - minCount: int = ..., - numPartitions: int = ..., - stepSize: float = ..., - maxIter: int = ..., - seed: Optional[int] = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - windowSize: int = ..., - maxSentenceLength: int = ..., - ) -> None: ... - def setParams( - self, - *, - vectorSize: int = ..., - minCount: int = ..., - numPartitions: int = ..., - stepSize: float = ..., - maxIter: int = ..., - seed: Optional[int] = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - windowSize: int = ..., - maxSentenceLength: int = ..., - ) -> Word2Vec: ... - def setVectorSize(self, value: int) -> Word2Vec: ... - def setNumPartitions(self, value: int) -> Word2Vec: ... - def setMinCount(self, value: int) -> Word2Vec: ... - def setWindowSize(self, value: int) -> Word2Vec: ... - def setMaxSentenceLength(self, value: int) -> Word2Vec: ... - def setMaxIter(self, value: int) -> Word2Vec: ... - def setInputCol(self, value: str) -> Word2Vec: ... - def setOutputCol(self, value: str) -> Word2Vec: ... - def setSeed(self, value: int) -> Word2Vec: ... - def setStepSize(self, value: float) -> Word2Vec: ... - def _create_model(self, java_model: JavaObject) -> Word2VecModel: ... - -class Word2VecModel(JavaModel, _Word2VecParams, JavaMLReadable[Word2VecModel], JavaMLWritable): - def getVectors(self) -> DataFrame: ... - def setInputCol(self, value: str) -> Word2VecModel: ... - def setOutputCol(self, value: str) -> Word2VecModel: ... - @overload - def findSynonyms(self, word: str, num: int) -> DataFrame: ... - @overload - def findSynonyms(self, word: Vector, num: int) -> DataFrame: ... - @overload - def findSynonymsArray(self, word: str, num: int) -> List[Tuple[str, float]]: ... - @overload - def findSynonymsArray(self, word: Vector, num: int) -> List[Tuple[str, float]]: ... - -class _PCAParams(HasInputCol, HasOutputCol): - k: Param[int] - def getK(self) -> int: ... - -class PCA(JavaEstimator[PCAModel], _PCAParams, JavaMLReadable[PCA], JavaMLWritable): - def __init__( - self, - *, - k: Optional[int] = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> None: ... - def setParams( - self, - *, - k: Optional[int] = ..., - inputCol: Optional[str] = ..., - outputCol: Optional[str] = ..., - ) -> PCA: ... - def setK(self, value: int) -> PCA: ... - def setInputCol(self, value: str) -> PCA: ... - def setOutputCol(self, value: str) -> PCA: ... - def _create_model(self, java_model: JavaObject) -> PCAModel: ... - -class PCAModel(JavaModel, _PCAParams, JavaMLReadable[PCAModel], JavaMLWritable): - def setInputCol(self, value: str) -> PCAModel: ... - def setOutputCol(self, value: str) -> PCAModel: ... - @property - def pc(self) -> DenseMatrix: ... - @property - def explainedVariance(self) -> DenseVector: ... - -class _RFormulaParams(HasFeaturesCol, HasLabelCol, HasHandleInvalid): - formula: Param[str] - forceIndexLabel: Param[bool] - stringIndexerOrderType: Param[str] - handleInvalid: Param[str] - def __init__(self, *args: Any): ... - def getFormula(self) -> str: ... - def getForceIndexLabel(self) -> bool: ... - def getStringIndexerOrderType(self) -> str: ... - -class RFormula( - JavaEstimator[RFormulaModel], - _RFormulaParams, - JavaMLReadable[RFormula], - JavaMLWritable, -): - def __init__( - self, - *, - formula: Optional[str] = ..., - featuresCol: str = ..., - labelCol: str = ..., - forceIndexLabel: bool = ..., - stringIndexerOrderType: str = ..., - handleInvalid: str = ..., - ) -> None: ... - def setParams( - self, - *, - formula: Optional[str] = ..., - featuresCol: str = ..., - labelCol: str = ..., - forceIndexLabel: bool = ..., - stringIndexerOrderType: str = ..., - handleInvalid: str = ..., - ) -> RFormula: ... - def setFormula(self, value: str) -> RFormula: ... - def setForceIndexLabel(self, value: bool) -> RFormula: ... - def setStringIndexerOrderType(self, value: str) -> RFormula: ... - def setFeaturesCol(self, value: str) -> RFormula: ... - def setLabelCol(self, value: str) -> RFormula: ... - def setHandleInvalid(self, value: str) -> RFormula: ... - def _create_model(self, java_model: JavaObject) -> RFormulaModel: ... - -class RFormulaModel(JavaModel, _RFormulaParams, JavaMLReadable[RFormulaModel], JavaMLWritable): ... - -class _SelectorParams(HasFeaturesCol, HasOutputCol, HasLabelCol): - selectorType: Param[str] - numTopFeatures: Param[int] - percentile: Param[float] - fpr: Param[float] - fdr: Param[float] - fwe: Param[float] - def __init__(self, *args: Any): ... - def getSelectorType(self) -> str: ... - def getNumTopFeatures(self) -> int: ... - def getPercentile(self) -> float: ... - def getFpr(self) -> float: ... - def getFdr(self) -> float: ... - def getFwe(self) -> float: ... - -class _Selector(JavaEstimator[JM], _SelectorParams, JavaMLReadable, JavaMLWritable, Generic[JM]): - def setSelectorType(self: P, value: str) -> P: ... - def setNumTopFeatures(self: P, value: int) -> P: ... - def setPercentile(self: P, value: float) -> P: ... - def setFpr(self: P, value: float) -> P: ... - def setFdr(self: P, value: float) -> P: ... - def setFwe(self: P, value: float) -> P: ... - def setFeaturesCol(self: P, value: str) -> P: ... - def setOutputCol(self: P, value: str) -> P: ... - def setLabelCol(self: P, value: str) -> P: ... - def _create_model(self, java_model: JavaObject) -> JM: ... - -class _SelectorModel(JavaModel, _SelectorParams): - def setFeaturesCol(self: P, value: str) -> P: ... - def setOutputCol(self: P, value: str) -> P: ... - @property - def selectedFeatures(self) -> List[int]: ... - -class ChiSqSelector( - _Selector[ChiSqSelectorModel], - JavaMLReadable[ChiSqSelector], - JavaMLWritable, -): - def __init__( - self, - *, - numTopFeatures: int = ..., - featuresCol: str = ..., - outputCol: Optional[str] = ..., - labelCol: str = ..., - selectorType: str = ..., - percentile: float = ..., - fpr: float = ..., - fdr: float = ..., - fwe: float = ..., - ) -> None: ... - def setParams( - self, - *, - numTopFeatures: int = ..., - featuresCol: str = ..., - outputCol: Optional[str] = ..., - labelCol: str = ..., - selectorType: str = ..., - percentile: float = ..., - fpr: float = ..., - fdr: float = ..., - fwe: float = ..., - ) -> ChiSqSelector: ... - def setSelectorType(self, value: str) -> ChiSqSelector: ... - def setNumTopFeatures(self, value: int) -> ChiSqSelector: ... - def setPercentile(self, value: float) -> ChiSqSelector: ... - def setFpr(self, value: float) -> ChiSqSelector: ... - def setFdr(self, value: float) -> ChiSqSelector: ... - def setFwe(self, value: float) -> ChiSqSelector: ... - def setFeaturesCol(self, value: str) -> ChiSqSelector: ... - def setOutputCol(self, value: str) -> ChiSqSelector: ... - def setLabelCol(self, value: str) -> ChiSqSelector: ... - def _create_model(self, java_model: JavaObject) -> ChiSqSelectorModel: ... - -class ChiSqSelectorModel(_SelectorModel, JavaMLReadable[ChiSqSelectorModel], JavaMLWritable): - def setFeaturesCol(self, value: str) -> ChiSqSelectorModel: ... - def setOutputCol(self, value: str) -> ChiSqSelectorModel: ... - @property - def selectedFeatures(self) -> List[int]: ... - -class VectorSizeHint( - JavaTransformer, - HasInputCol, - HasHandleInvalid, - JavaMLReadable[VectorSizeHint], - JavaMLWritable, -): - size: Param[int] - handleInvalid: Param[str] - def __init__( - self, *, inputCol: Optional[str] = ..., size: Optional[int] = ..., handleInvalid: str = ... - ) -> None: ... - def setParams( - self, *, inputCol: Optional[str] = ..., size: Optional[int] = ..., handleInvalid: str = ... - ) -> VectorSizeHint: ... - def setSize(self, value: int) -> VectorSizeHint: ... - def getSize(self) -> int: ... - def setInputCol(self, value: str) -> VectorSizeHint: ... - def setHandleInvalid(self, value: str) -> VectorSizeHint: ... - -class _VarianceThresholdSelectorParams(HasFeaturesCol, HasOutputCol): - varianceThreshold: Param[float] = ... - def getVarianceThreshold(self) -> float: ... - -class VarianceThresholdSelector( - JavaEstimator[VarianceThresholdSelectorModel], - _VarianceThresholdSelectorParams, - JavaMLReadable[VarianceThresholdSelector], - JavaMLWritable, -): - def __init__( - self, - featuresCol: str = ..., - outputCol: Optional[str] = ..., - varianceThreshold: float = ..., - ) -> None: ... - def setParams( - self, - featuresCol: str = ..., - outputCol: Optional[str] = ..., - varianceThreshold: float = ..., - ) -> VarianceThresholdSelector: ... - def setVarianceThreshold(self, value: float) -> VarianceThresholdSelector: ... - def setFeaturesCol(self, value: str) -> VarianceThresholdSelector: ... - def setOutputCol(self, value: str) -> VarianceThresholdSelector: ... - def _create_model(self, java_model: JavaObject) -> VarianceThresholdSelectorModel: ... - -class VarianceThresholdSelectorModel( - JavaModel, - _VarianceThresholdSelectorParams, - JavaMLReadable[VarianceThresholdSelectorModel], - JavaMLWritable, -): - def setFeaturesCol(self, value: str) -> VarianceThresholdSelectorModel: ... - def setOutputCol(self, value: str) -> VarianceThresholdSelectorModel: ... - @property - def selectedFeatures(self) -> List[int]: ... - -class _UnivariateFeatureSelectorParams(HasFeaturesCol, HasOutputCol, HasLabelCol): - featureType: Param[str] = ... - labelType: Param[str] = ... - selectionMode: Param[str] = ... - selectionThreshold: Param[float] = ... - def __init__(self, *args: Any): ... - def getFeatureType(self) -> str: ... - def getLabelType(self) -> str: ... - def getSelectionMode(self) -> str: ... - def getSelectionThreshold(self) -> float: ... - -class UnivariateFeatureSelector( - JavaEstimator[UnivariateFeatureSelectorModel], - _UnivariateFeatureSelectorParams, - JavaMLReadable[UnivariateFeatureSelector], - JavaMLWritable, -): - def __init__( - self, - *, - featuresCol: str = ..., - outputCol: Optional[str] = ..., - labelCol: str = ..., - selectionMode: str = ..., - ) -> None: ... - def setParams( - self, - *, - featuresCol: str = ..., - outputCol: Optional[str] = ..., - labelCol: str = ..., - selectionMode: str = ..., - ) -> UnivariateFeatureSelector: ... - def setFeatureType(self, value: str) -> UnivariateFeatureSelector: ... - def setLabelType(self, value: str) -> UnivariateFeatureSelector: ... - def setSelectionMode(self, value: str) -> UnivariateFeatureSelector: ... - def setSelectionThreshold(self, value: float) -> UnivariateFeatureSelector: ... - def setFeaturesCol(self, value: str) -> UnivariateFeatureSelector: ... - def setOutputCol(self, value: str) -> UnivariateFeatureSelector: ... - def setLabelCol(self, value: str) -> UnivariateFeatureSelector: ... - def _create_model(self, java_model: JavaObject) -> UnivariateFeatureSelectorModel: ... - -class UnivariateFeatureSelectorModel( - JavaModel, - _UnivariateFeatureSelectorParams, - JavaMLReadable[UnivariateFeatureSelectorModel], - JavaMLWritable, -): - def setFeaturesCol(self, value: str) -> UnivariateFeatureSelectorModel: ... - def setOutputCol(self, value: str) -> UnivariateFeatureSelectorModel: ... - @property - def selectedFeatures(self) -> List[int]: ... From a7480e647fe1ed930c0cd2ad1679b3685a675d02 Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Fri, 15 Apr 2022 08:37:43 +0900 Subject: [PATCH 141/535] [SPARK-38823][SQL] Make `NewInstance` non-foldable to fix aggregation buffer corruption issue ### What changes were proposed in this pull request? Make `NewInstance` non-foldable. ### Why are the changes needed? When handling Java beans as input, Spark creates `NewInstance` with no arguments. On master and 3.3, `NewInstance` with no arguments is considered foldable. As a result, the `ConstantFolding` rule converts `NewInstance` into a `Literal` holding an instance of the user's specified Java bean. The instance becomes a singleton that gets reused for each input record (although its fields get updated by `InitializeJavaBean`). Because the instance gets reused, sometimes multiple buffers in `AggregationIterator` are actually referring to the same Java bean instance. Take, for example, the test I added in this PR, or the `spark-shell` example I added to SPARK-38823 as a comment. The input is: ``` new Item("a", 1), new Item("b", 3), new Item("c", 2), new Item("a", 7) ``` As `ObjectAggregationIterator` reads the input, the buffers get set up as follows (note that the first field of Item should be the same as the key): ``` - Read Item("a", 1) - Buffers are now: Key "a" --> Item("a", 1) - Read Item("b", 3) - Buffers are now: Key "a" -> Item("b", 3) Key "b" -> Item("b", 3) ``` The buffer for key "a" now contains `Item("b", 3)`. That's because both buffers contain a reference to the same Item instance, and that Item instance's fields were updated when `Item("b", 3)` was read. This PR makes `NewInstance` non-foldable, so it will not get optimized away, thus ensuring a new instance of the Java bean for each input record. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New unit test. Closes #36183 from bersprockets/newinstance_issue. Authored-by: Bruce Robbins Signed-off-by: Hyukjin Kwon (cherry picked from commit cc7cb7a803d5de03c526480c8968bbb2c3e82484) Signed-off-by: Hyukjin Kwon --- .../expressions/objects/objects.scala | 3 + .../optimizer/ConstantFoldingSuite.scala | 15 +-- .../sql/JavaBeanDeserializationSuite.java | 93 +++++++++++++++++++ 3 files changed, 99 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 2c879beeed623..fe982b238296f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -516,6 +516,9 @@ case class NewInstance( override def nullable: Boolean = needNullCheck + // Non-foldable to prevent the optimizer from replacing NewInstance with a singleton instance + // of the specified class. + override def foldable: Boolean = false override def children: Seq[Expression] = arguments final override val nodePatterns: Seq[TreePattern] = Seq(NEW_INSTANCE) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala index a2ee2a2fb6813..b06e001e41243 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala @@ -21,11 +21,10 @@ import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, Unresol import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, NewInstance, StaticInvoke} +import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, StaticInvoke} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor -import org.apache.spark.sql.catalyst.util.GenericArrayData import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.ByteArray @@ -318,14 +317,7 @@ class ConstantFoldingSuite extends PlanTest { Literal.create("a", StringType), "substring", StringType, - Seq(Literal(0), Literal(1))).as("c2"), - NewInstance( - cls = classOf[GenericArrayData], - arguments = Literal.fromObject(List(1, 2, 3)) :: Nil, - inputTypes = Nil, - propagateNull = false, - dataType = ArrayType(IntegerType), - outerPointer = None).as("c3")) + Seq(Literal(0), Literal(1))).as("c2")) val optimized = Optimize.execute(originalQuery.analyze) @@ -333,8 +325,7 @@ class ConstantFoldingSuite extends PlanTest { testRelation .select( Literal("WWSpark".getBytes()).as("c1"), - Literal.create("a", StringType).as("c2"), - Literal.create(new GenericArrayData(List(1, 2, 3)), ArrayType(IntegerType)).as("c3")) + Literal.create("a", StringType).as("c2")) .analyze comparePlans(optimized, correctAnswer) diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java index af0a22b036030..06a5c50b30c64 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java @@ -26,6 +26,8 @@ import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.commons.lang3.builder.ToStringStyle; +import org.apache.spark.api.java.function.MapFunction; +import org.apache.spark.api.java.function.ReduceFunction; import org.junit.*; import org.apache.spark.sql.*; @@ -37,6 +39,7 @@ import org.apache.spark.sql.types.StructType; import org.apache.spark.sql.test.TestSparkSession; +import scala.Tuple2; public class JavaBeanDeserializationSuite implements Serializable { @@ -562,6 +565,96 @@ public void testBeanWithLocalDateAndInstant() { } } + @Test + public void testSPARK38823NoBeanReuse() { + List items = Arrays.asList( + new Item("a", 1), + new Item("b", 3), + new Item("c", 2), + new Item("a", 7)); + + Encoder encoder = Encoders.bean(Item.class); + + Dataset ds = spark.createDataFrame(items, Item.class) + .as(encoder) + .coalesce(1); + + MapFunction mf = new MapFunction() { + @Override + public String call(Item item) throws Exception { + return item.getK(); + } + }; + + ReduceFunction rf = new ReduceFunction() { + @Override + public Item call(Item item1, Item item2) throws Exception { + Assert.assertNotSame(item1, item2); + return item1.addValue(item2.getV()); + } + }; + + Dataset> finalDs = ds + .groupByKey(mf, Encoders.STRING()) + .reduceGroups(rf); + + List> expectedRecords = Arrays.asList( + new Tuple2("a", new Item("a", 8)), + new Tuple2("b", new Item("b", 3)), + new Tuple2("c", new Item("c", 2))); + + List> result = finalDs.collectAsList(); + + Assert.assertEquals(expectedRecords, result); + } + + public static class Item implements Serializable { + private String k; + private int v; + + public String getK() { + return k; + } + + public int getV() { + return v; + } + + public void setK(String k) { + this.k = k; + } + + public void setV(int v) { + this.v = v; + } + + public Item() { } + + public Item(String k, int v) { + this.k = k; + this.v = v; + } + + public Item addValue(int inc) { + return new Item(k, v + inc); + } + + public String toString() { + return "Item(" + k + "," + v + ")"; + } + + public boolean equals(Object o) { + if (!(o instanceof Item)) { + return false; + } + Item other = (Item) o; + if (other.getK().equals(k) && other.getV() == v) { + return true; + } + return false; + } + } + public static final class LocalDateInstantRecord { private String localDateField; private String instantField; From 7101e88201d88cf24057187f360c828d1b376589 Mon Sep 17 00:00:00 2001 From: zero323 Date: Fri, 15 Apr 2022 10:32:13 +0200 Subject: [PATCH 142/535] [SPARK-37405][FOLLOW-UP][PYTHON][ML] Move _input_kwargs hints to consistent positions ### What changes were proposed in this pull request? This PR moves `_input_kwargs` hints to beginning of the bodies of the annotated classes. ### Why are the changes needed? Consistency with other modules. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #36203 from zero323/SPARK-37405-FOLLOW-UP. Authored-by: zero323 Signed-off-by: zero323 (cherry picked from commit 797abc069348a2770742d5b57fd8c0fab0abe8d4) Signed-off-by: zero323 --- python/pyspark/ml/feature.py | 68 ++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 8cebea2363dc0..7136c29f156b5 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -177,6 +177,8 @@ class Binarizer( ... """ + _input_kwargs: Dict[str, Any] + threshold: Param[float] = Param( Params._dummy(), "threshold", @@ -195,8 +197,6 @@ class Binarizer( typeConverter=TypeConverters.toListFloat, ) - _input_kwargs: Dict[str, Any] - @overload def __init__( self, @@ -721,6 +721,8 @@ class Bucketizer( ... """ + _input_kwargs: Dict[str, Any] + splits: Param[List[float]] = Param( Params._dummy(), "splits", @@ -762,8 +764,6 @@ class Bucketizer( typeConverter=TypeConverters.toListListFloat, ) - _input_kwargs: Dict[str, Any] - @overload def __init__( self, @@ -1284,6 +1284,8 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable["DCT"], Jav False """ + _input_kwargs: Dict[str, Any] + inverse: Param[bool] = Param( Params._dummy(), "inverse", @@ -1291,8 +1293,6 @@ class DCT(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable["DCT"], Jav typeConverter=TypeConverters.toBoolean, ) - _input_kwargs: Dict[str, Any] - @keyword_only def __init__( self, @@ -1392,6 +1392,8 @@ class ElementwiseProduct( True """ + _input_kwargs: Dict[str, Any] + scalingVec: Param[Vector] = Param( Params._dummy(), "scalingVec", @@ -1399,8 +1401,6 @@ class ElementwiseProduct( typeConverter=TypeConverters.toVector, ) - _input_kwargs: Dict[str, Any] - @keyword_only def __init__( self, @@ -1528,6 +1528,8 @@ class FeatureHasher( True """ + _input_kwargs: Dict[str, Any] + categoricalCols: Param[List[str]] = Param( Params._dummy(), "categoricalCols", @@ -1535,8 +1537,6 @@ class FeatureHasher( typeConverter=TypeConverters.toListString, ) - _input_kwargs: Dict[str, Any] - @keyword_only def __init__( self, @@ -1650,6 +1650,8 @@ class HashingTF( 5 """ + _input_kwargs: Dict[str, Any] + binary: Param[bool] = Param( Params._dummy(), "binary", @@ -1659,8 +1661,6 @@ class HashingTF( typeConverter=TypeConverters.toBoolean, ) - _input_kwargs: Dict[str, Any] - @keyword_only def __init__( self, @@ -2882,6 +2882,8 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable["NGram"], True """ + _input_kwargs: Dict[str, Any] + n: Param[int] = Param( Params._dummy(), "n", @@ -2889,8 +2891,6 @@ class NGram(JavaTransformer, HasInputCol, HasOutputCol, JavaMLReadable["NGram"], typeConverter=TypeConverters.toInt, ) - _input_kwargs: Dict[str, Any] - @keyword_only def __init__( self, *, n: int = 2, inputCol: Optional[str] = None, outputCol: Optional[str] = None @@ -2982,10 +2982,10 @@ class Normalizer( True """ - p = Param(Params._dummy(), "p", "the p norm value.", typeConverter=TypeConverters.toFloat) - _input_kwargs: Dict[str, Any] + p = Param(Params._dummy(), "p", "the p norm value.", typeConverter=TypeConverters.toFloat) + @keyword_only def __init__( self, *, p: float = 2.0, inputCol: Optional[str] = None, outputCol: Optional[str] = None @@ -3378,6 +3378,8 @@ class PolynomialExpansion( True """ + _input_kwargs: Dict[str, Any] + degree: Param[int] = Param( Params._dummy(), "degree", @@ -3385,8 +3387,6 @@ class PolynomialExpansion( typeConverter=TypeConverters.toInt, ) - _input_kwargs: Dict[str, Any] - @keyword_only def __init__( self, *, degree: int = 2, inputCol: Optional[str] = None, outputCol: Optional[str] = None @@ -3546,6 +3546,8 @@ class QuantileDiscretizer( ... """ + _input_kwargs: Dict[str, Any] + numBuckets: Param[int] = Param( Params._dummy(), "numBuckets", @@ -3579,8 +3581,6 @@ class QuantileDiscretizer( typeConverter=TypeConverters.toListInt, ) - _input_kwargs: Dict[str, Any] - @overload def __init__( self, @@ -4076,6 +4076,8 @@ class RegexTokenizer( True """ + _input_kwargs: Dict[str, Any] + minTokenLength: Param[int] = Param( Params._dummy(), "minTokenLength", @@ -4100,8 +4102,6 @@ class RegexTokenizer( typeConverter=TypeConverters.toBoolean, ) - _input_kwargs: Dict[str, Any] - @keyword_only def __init__( self, @@ -4237,12 +4237,12 @@ class SQLTransformer(JavaTransformer, JavaMLReadable["SQLTransformer"], JavaMLWr True """ + _input_kwargs: Dict[str, Any] + statement = Param( Params._dummy(), "statement", "SQL statement", typeConverter=TypeConverters.toString ) - _input_kwargs: Dict[str, Any] - @keyword_only def __init__(self, *, statement: Optional[str] = None): """ @@ -4874,6 +4874,8 @@ class IndexToString( StringIndexer : for converting categorical values into category indices """ + _input_kwargs: Dict[str, Any] + labels: Param[List[str]] = Param( Params._dummy(), "labels", @@ -4882,8 +4884,6 @@ class IndexToString( typeConverter=TypeConverters.toListString, ) - _input_kwargs: Dict[str, Any] - @keyword_only def __init__( self, @@ -4996,6 +4996,8 @@ class StopWordsRemover( ... """ + _input_kwargs: Dict[str, Any] + stopWords: Param[List[str]] = Param( Params._dummy(), "stopWords", @@ -5015,8 +5017,6 @@ class StopWordsRemover( typeConverter=TypeConverters.toString, ) - _input_kwargs: Dict[str, Any] - @overload def __init__( self, @@ -5327,6 +5327,8 @@ class VectorAssembler( ... """ + _input_kwargs: Dict[str, Any] + handleInvalid: Param[str] = Param( Params._dummy(), "handleInvalid", @@ -5341,8 +5343,6 @@ class VectorAssembler( typeConverter=TypeConverters.toString, ) - _input_kwargs: Dict[str, Any] - @keyword_only def __init__( self, @@ -5690,6 +5690,8 @@ class VectorSlicer( True """ + _input_kwargs: Dict[str, Any] + indices: Param[List[int]] = Param( Params._dummy(), "indices", @@ -5707,8 +5709,6 @@ class VectorSlicer( typeConverter=TypeConverters.toListString, ) - _input_kwargs: Dict[str, Any] - @keyword_only def __init__( self, @@ -6909,6 +6909,8 @@ class VectorSizeHint( True """ + _input_kwargs: Dict[str, Any] + size: Param[int] = Param( Params._dummy(), "size", "Size of vectors in column.", typeConverter=TypeConverters.toInt ) @@ -6924,8 +6926,6 @@ class VectorSizeHint( TypeConverters.toString, ) - _input_kwargs: Dict[str, Any] - @keyword_only def __init__( self, From 811c92f7c5f0e1bc4c12d9b121912a91fc67c208 Mon Sep 17 00:00:00 2001 From: Xinyi Yu Date: Fri, 15 Apr 2022 16:45:47 +0800 Subject: [PATCH 143/535] [SPARK-37575][SQL][FOLLOWUP] Add legacy flag for the breaking change of write null value in csv to unquoted empty string ### What changes were proposed in this pull request? Add a legacy flag `spark.sql.legacy.nullValueWrittenAsQuotedEmptyStringCsv` for the breaking change introduced in https://github.com/apache/spark/pull/34853 and https://github.com/apache/spark/pull/34905 (followup). The flag is disabled by default, so the null values written as csv will output an unquoted empty string. When the legacy flag is enabled, the null will output quoted empty string. ### Why are the changes needed? The original commit is a breaking change, and breaking changes should be encouraged to add a flag to turn it off for smooth migration between versions. ### Does this PR introduce _any_ user-facing change? With the default value of the conf, there is no user-facing difference. If users turn this conf off, they can restore the pre-change behavior. ### How was this patch tested? Through unit tests. Closes #36110 from anchovYu/flags-null-to-csv. Authored-by: Xinyi Yu Signed-off-by: Wenchen Fan (cherry picked from commit 965f872500a3554142cab3078a7a4d513d2d2ee8) Signed-off-by: Wenchen Fan --- .../sql/catalyst/csv/UnivocityGenerator.scala | 4 ++++ .../apache/spark/sql/internal/SQLConf.scala | 10 ++++++++++ .../execution/datasources/csv/CSVSuite.scala | 20 +++++++++++++------ 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala index 5dd8c35e4c2e5..d124a055f63a4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala @@ -24,6 +24,7 @@ import com.univocity.parsers.csv.CsvWriter import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, IntervalStringStyles, IntervalUtils, TimestampFormatter} import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ class UnivocityGenerator( @@ -95,6 +96,9 @@ class UnivocityGenerator( while (i < row.numFields) { if (!row.isNullAt(i)) { values(i) = valueConverters(i).apply(row, i) + } else if ( + SQLConf.get.getConf(SQLConf.LEGACY_NULL_VALUE_WRITTEN_AS_QUOTED_EMPTY_STRING_CSV)) { + values(i) = options.nullValue } i += 1 } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 9e4496a2c331f..5f803ed963beb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3724,6 +3724,16 @@ object SQLConf { .booleanConf .createWithDefault(false) + val LEGACY_NULL_VALUE_WRITTEN_AS_QUOTED_EMPTY_STRING_CSV = + buildConf("spark.sql.legacy.nullValueWrittenAsQuotedEmptyStringCsv") + .internal() + .doc("When set to false, nulls are written as unquoted empty strings in CSV data source. " + + "If set to false, it restores the legacy behavior that nulls were written as quoted " + + "empty strings, `\"\"`.") + .version("3.3.0") + .booleanConf + .createWithDefault(false) + /** * Holds information about keys that have been deprecated. * diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 41b4f909ce958..7cbe6ed9fceef 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -807,12 +807,20 @@ abstract class CSVSuite test("SPARK-37575: null values should be saved as nothing rather than " + "quoted empty Strings \"\" with default settings") { - withTempPath { path => - Seq(("Tesla", null: String, "")) - .toDF("make", "comment", "blank") - .write - .csv(path.getCanonicalPath) - checkAnswer(spark.read.text(path.getCanonicalPath), Row("Tesla,,\"\"")) + Seq("true", "false").foreach { confVal => + withSQLConf(SQLConf.LEGACY_NULL_VALUE_WRITTEN_AS_QUOTED_EMPTY_STRING_CSV.key -> confVal) { + withTempPath { path => + Seq(("Tesla", null: String, "")) + .toDF("make", "comment", "blank") + .write + .csv(path.getCanonicalPath) + if (confVal == "false") { + checkAnswer(spark.read.text(path.getCanonicalPath), Row("Tesla,,\"\"")) + } else { + checkAnswer(spark.read.text(path.getCanonicalPath), Row("Tesla,\"\",\"\"")) + } + } + } } } From a3ea6b4e00df1a6a5712db5dd228819044c09dc9 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 15 Apr 2022 19:20:31 +0800 Subject: [PATCH 144/535] [SPARK-38908][SQL] Provide query context in runtime error of Casting from String to Number/Date/Timestamp/Boolean ### What changes were proposed in this pull request? Provide query context in runtime error of Casting from String to Number/Date/Timestamp/Boolean. Casting Double/Float to Timestamp shares the same error method as casting String to Timestamp, so this PR also provides query context in its error. ### Why are the changes needed? Provide SQL query context of runtime errors to users, so that they can understand it better. ### Does this PR introduce _any_ user-facing change? Yes, improve the runtime error message of Casting from String to Number/Date/Timestamp/Boolean ### How was this patch tested? UT Closes #36206 from gengliangwang/castStringContext. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit 49fa2e0720d3ca681d817981cbc2c7b811de2706) Signed-off-by: Gengliang Wang --- .../main/resources/error/error-classes.json | 2 +- project/MimaExcludes.scala | 6 +- .../spark/sql/catalyst/expressions/Cast.scala | 74 ++++---- .../sql/catalyst/util/DateTimeUtils.scala | 16 +- .../sql/catalyst/util/UTF8StringUtils.scala | 16 +- .../sql/errors/QueryExecutionErrors.scala | 22 ++- .../org/apache/spark/sql/types/Decimal.scala | 4 +- .../test/resources/sql-tests/inputs/cast.sql | 11 ++ .../sql-tests/results/ansi/cast.sql.out | 164 +++++++++++++++++- .../sql-tests/results/ansi/date.sql.out | 9 + .../ansi/datetime-parsing-invalid.sql.out | 6 + .../sql-tests/results/ansi/interval.sql.out | 30 ++++ .../results/ansi/string-functions.sql.out | 14 +- .../resources/sql-tests/results/cast.sql.out | 74 +++++++- .../results/postgreSQL/boolean.sql.out | 51 +++++- .../results/postgreSQL/float4.sql.out | 12 ++ .../results/postgreSQL/float8.sql.out | 12 ++ .../sql-tests/results/postgreSQL/text.sql.out | 6 + .../results/postgreSQL/window_part2.sql.out | 5 + .../results/postgreSQL/window_part3.sql.out | 6 +- .../results/postgreSQL/window_part4.sql.out | 6 +- 21 files changed, 484 insertions(+), 62 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 855d3c5cd6e0e..a0fa042fd4828 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -106,7 +106,7 @@ "sqlState" : "22023" }, "INVALID_INPUT_SYNTAX_FOR_NUMERIC_TYPE" : { - "message" : [ "invalid input syntax for type numeric: %s. To return NULL instead, use 'try_cast'. If necessary set %s to false to bypass this error." ], + "message" : [ "invalid input syntax for type numeric: %s. To return NULL instead, use 'try_cast'. If necessary set %s to false to bypass this error.%s" ], "sqlState" : "42000" }, "INVALID_JSON_SCHEMA_MAPTYPE" : { diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index d832d68c999aa..8f3bd43ec6597 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -64,7 +64,11 @@ object MimaExcludes { ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.connector.read.partitioning.ClusteredDistribution"), ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.connector.read.partitioning.Distribution"), ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.connector.read.partitioning.Partitioning.*"), - ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.connector.read.partitioning.Partitioning.*") + ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.connector.read.partitioning.Partitioning.*"), + + // [SPARK-38908][SQL] Provide query context in runtime error of Casting from String to + // Number/Date/Timestamp/Boolean + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.types.Decimal.fromStringANSI") ) // Exclude rules for 3.2.x from 3.1.1 diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 03ecaecca066c..e522c211cb228 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -467,7 +467,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit false } else { if (ansiEnabled) { - throw QueryExecutionErrors.invalidInputSyntaxForBooleanError(s) + throw QueryExecutionErrors.invalidInputSyntaxForBooleanError(s, origin.context) } else { null } @@ -499,7 +499,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case StringType => buildCast[UTF8String](_, utfs => { if (ansiEnabled) { - DateTimeUtils.stringToTimestampAnsi(utfs, zoneId) + DateTimeUtils.stringToTimestampAnsi(utfs, zoneId, origin.context) } else { DateTimeUtils.stringToTimestamp(utfs, zoneId).orNull } @@ -524,14 +524,14 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit // TimestampWritable.doubleToTimestamp case DoubleType => if (ansiEnabled) { - buildCast[Double](_, d => doubleToTimestampAnsi(d)) + buildCast[Double](_, d => doubleToTimestampAnsi(d, origin.context)) } else { buildCast[Double](_, d => doubleToTimestamp(d)) } // TimestampWritable.floatToTimestamp case FloatType => if (ansiEnabled) { - buildCast[Float](_, f => doubleToTimestampAnsi(f.toDouble)) + buildCast[Float](_, f => doubleToTimestampAnsi(f.toDouble, origin.context)) } else { buildCast[Float](_, f => doubleToTimestamp(f.toDouble)) } @@ -541,7 +541,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case StringType => buildCast[UTF8String](_, utfs => { if (ansiEnabled) { - DateTimeUtils.stringToTimestampWithoutTimeZoneAnsi(utfs) + DateTimeUtils.stringToTimestampWithoutTimeZoneAnsi(utfs, origin.context) } else { DateTimeUtils.stringToTimestampWithoutTimeZone(utfs).orNull } @@ -574,7 +574,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit private[this] def castToDate(from: DataType): Any => Any = from match { case StringType => if (ansiEnabled) { - buildCast[UTF8String](_, s => DateTimeUtils.stringToDateAnsi(s)) + buildCast[UTF8String](_, s => DateTimeUtils.stringToDateAnsi(s, origin.context)) } else { buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s).orNull) } @@ -631,7 +631,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit // LongConverter private[this] def castToLong(from: DataType): Any => Any = from match { case StringType if ansiEnabled => - buildCast[UTF8String](_, UTF8StringUtils.toLongExact) + buildCast[UTF8String](_, v => UTF8StringUtils.toLongExact(v, origin.context)) case StringType => val result = new LongWrapper() buildCast[UTF8String](_, s => if (s.toLong(result)) result.value else null) @@ -654,7 +654,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit // IntConverter private[this] def castToInt(from: DataType): Any => Any = from match { case StringType if ansiEnabled => - buildCast[UTF8String](_, UTF8StringUtils.toIntExact) + buildCast[UTF8String](_, v => UTF8StringUtils.toIntExact(v, origin.context)) case StringType => val result = new IntWrapper() buildCast[UTF8String](_, s => if (s.toInt(result)) result.value else null) @@ -686,7 +686,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit // ShortConverter private[this] def castToShort(from: DataType): Any => Any = from match { case StringType if ansiEnabled => - buildCast[UTF8String](_, UTF8StringUtils.toShortExact) + buildCast[UTF8String](_, v => UTF8StringUtils.toShortExact(v, origin.context)) case StringType => val result = new IntWrapper() buildCast[UTF8String](_, s => if (s.toShort(result)) { @@ -733,7 +733,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit // ByteConverter private[this] def castToByte(from: DataType): Any => Any = from match { case StringType if ansiEnabled => - buildCast[UTF8String](_, UTF8StringUtils.toByteExact) + buildCast[UTF8String](_, v => UTF8StringUtils.toByteExact(v, origin.context)) case StringType => val result = new IntWrapper() buildCast[UTF8String](_, s => if (s.toByte(result)) { @@ -815,7 +815,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit if (d == null) null else changePrecision(d, target) }) case StringType if ansiEnabled => - buildCast[UTF8String](_, s => changePrecision(Decimal.fromStringANSI(s), target)) + buildCast[UTF8String](_, + s => changePrecision(Decimal.fromStringANSI(s, origin.context), target)) case BooleanType => buildCast[Boolean](_, b => toPrecision(if (b) Decimal.ONE else Decimal.ZERO, target)) case DateType => @@ -844,7 +845,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case _: NumberFormatException => val d = Cast.processFloatingPointSpecialLiterals(doubleStr, false) if(ansiEnabled && d == null) { - throw QueryExecutionErrors.invalidInputSyntaxForNumericError(s) + throw QueryExecutionErrors.invalidInputSyntaxForNumericError(s, origin.context) } else { d } @@ -869,7 +870,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case _: NumberFormatException => val f = Cast.processFloatingPointSpecialLiterals(floatStr, true) if (ansiEnabled && f == null) { - throw QueryExecutionErrors.invalidInputSyntaxForNumericError(s) + throw QueryExecutionErrors.invalidInputSyntaxForNumericError(s, origin.context) } else { f } @@ -1016,7 +1017,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case CalendarIntervalType => castToIntervalCode(from) case it: DayTimeIntervalType => castToDayTimeIntervalCode(from, it) case it: YearMonthIntervalType => castToYearMonthIntervalCode(from, it) - case BooleanType => castToBooleanCode(from) + case BooleanType => castToBooleanCode(from, ctx) case ByteType => castToByteCode(from, ctx) case ShortType => castToShortCode(from, ctx) case IntegerType => castToIntCode(from, ctx) @@ -1295,8 +1296,9 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]]) (c, evPrim, evNull) => if (ansiEnabled) { + val errorContext = ctx.addReferenceObj("errCtx", origin.context) code""" - $evPrim = org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToDateAnsi($c); + $evPrim = $dateTimeUtilsCls.stringToDateAnsi($c, $errorContext); """ } else { code""" @@ -1373,9 +1375,10 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit } """ case StringType if ansiEnabled => + val errorContext = ctx.addReferenceObj("errCtx", origin.context) (c, evPrim, evNull) => code""" - Decimal $tmp = Decimal.fromStringANSI($c); + Decimal $tmp = Decimal.fromStringANSI($c, $errorContext); ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast, ctx)} """ case BooleanType => @@ -1432,9 +1435,9 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]]) (c, evPrim, evNull) => if (ansiEnabled) { + val errorContext = ctx.addReferenceObj("errCtx", origin.context) code""" - $evPrim = - org.apache.spark.sql.catalyst.util.DateTimeUtils.stringToTimestampAnsi($c, $zid); + $evPrim = $dateTimeUtilsCls.stringToTimestampAnsi($c, $zid, $errorContext); """ } else { code""" @@ -1471,7 +1474,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case DoubleType => (c, evPrim, evNull) => if (ansiEnabled) { - code"$evPrim = $dateTimeUtilsCls.doubleToTimestampAnsi($c);" + val errorContext = ctx.addReferenceObj("errCtx", origin.context) + code"$evPrim = $dateTimeUtilsCls.doubleToTimestampAnsi($c, $errorContext);" } else { code""" if (Double.isNaN($c) || Double.isInfinite($c)) { @@ -1484,7 +1488,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case FloatType => (c, evPrim, evNull) => if (ansiEnabled) { - code"$evPrim = $dateTimeUtilsCls.doubleToTimestampAnsi((double)$c);" + val errorContext = ctx.addReferenceObj("errCtx", origin.context) + code"$evPrim = $dateTimeUtilsCls.doubleToTimestampAnsi((double)$c, $errorContext);" } else { code""" if (Float.isNaN($c) || Float.isInfinite($c)) { @@ -1503,9 +1508,9 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]]) (c, evPrim, evNull) => if (ansiEnabled) { + val errorContext = ctx.addReferenceObj("errCtx", origin.context) code""" - $evPrim = - $dateTimeUtilsCls.stringToTimestampWithoutTimeZoneAnsi($c); + $evPrim = $dateTimeUtilsCls.stringToTimestampWithoutTimeZoneAnsi($c, $errorContext); """ } else { code""" @@ -1613,12 +1618,15 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit private[this] def timestampToDoubleCode(ts: ExprValue): Block = code"$ts / (double)$MICROS_PER_SECOND" - private[this] def castToBooleanCode(from: DataType): CastFunction = from match { + private[this] def castToBooleanCode( + from: DataType, + ctx: CodegenContext): CastFunction = from match { case StringType => val stringUtils = inline"${StringUtils.getClass.getName.stripSuffix("$")}" (c, evPrim, evNull) => val castFailureCode = if (ansiEnabled) { - s"throw QueryExecutionErrors.invalidInputSyntaxForBooleanError($c);" + val errorContext = ctx.addReferenceObj("errCtx", origin.context) + s"throw QueryExecutionErrors.invalidInputSyntaxForBooleanError($c, $errorContext);" } else { s"$evNull = true;" } @@ -1746,7 +1754,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit private[this] def castToByteCode(from: DataType, ctx: CodegenContext): CastFunction = from match { case StringType if ansiEnabled => val stringUtils = UTF8StringUtils.getClass.getCanonicalName.stripSuffix("$") - (c, evPrim, evNull) => code"$evPrim = $stringUtils.toByteExact($c);" + val errorContext = ctx.addReferenceObj("errCtx", origin.context) + (c, evPrim, evNull) => code"$evPrim = $stringUtils.toByteExact($c, $errorContext);" case StringType => val wrapper = ctx.freshVariable("intWrapper", classOf[UTF8String.IntWrapper]) (c, evPrim, evNull) => @@ -1782,7 +1791,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit ctx: CodegenContext): CastFunction = from match { case StringType if ansiEnabled => val stringUtils = UTF8StringUtils.getClass.getCanonicalName.stripSuffix("$") - (c, evPrim, evNull) => code"$evPrim = $stringUtils.toShortExact($c);" + val errorContext = ctx.addReferenceObj("errCtx", origin.context) + (c, evPrim, evNull) => code"$evPrim = $stringUtils.toShortExact($c, $errorContext);" case StringType => val wrapper = ctx.freshVariable("intWrapper", classOf[UTF8String.IntWrapper]) (c, evPrim, evNull) => @@ -1816,7 +1826,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit private[this] def castToIntCode(from: DataType, ctx: CodegenContext): CastFunction = from match { case StringType if ansiEnabled => val stringUtils = UTF8StringUtils.getClass.getCanonicalName.stripSuffix("$") - (c, evPrim, evNull) => code"$evPrim = $stringUtils.toIntExact($c);" + val errorContext = ctx.addReferenceObj("errCtx", origin.context) + (c, evPrim, evNull) => code"$evPrim = $stringUtils.toIntExact($c, $errorContext);" case StringType => val wrapper = ctx.freshVariable("intWrapper", classOf[UTF8String.IntWrapper]) (c, evPrim, evNull) => @@ -1850,7 +1861,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit private[this] def castToLongCode(from: DataType, ctx: CodegenContext): CastFunction = from match { case StringType if ansiEnabled => val stringUtils = UTF8StringUtils.getClass.getCanonicalName.stripSuffix("$") - (c, evPrim, evNull) => code"$evPrim = $stringUtils.toLongExact($c);" + val errorContext = ctx.addReferenceObj("errCtx", origin.context) + (c, evPrim, evNull) => code"$evPrim = $stringUtils.toLongExact($c, $errorContext);" case StringType => val wrapper = ctx.freshVariable("longWrapper", classOf[UTF8String.LongWrapper]) (c, evPrim, evNull) => @@ -1886,7 +1898,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val floatStr = ctx.freshVariable("floatStr", StringType) (c, evPrim, evNull) => val handleNull = if (ansiEnabled) { - s"throw QueryExecutionErrors.invalidInputSyntaxForNumericError($c);" + val errorContext = ctx.addReferenceObj("errCtx", origin.context) + s"throw QueryExecutionErrors.invalidInputSyntaxForNumericError($c, $errorContext);" } else { s"$evNull = true;" } @@ -1922,7 +1935,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val doubleStr = ctx.freshVariable("doubleStr", StringType) (c, evPrim, evNull) => val handleNull = if (ansiEnabled) { - s"throw QueryExecutionErrors.invalidInputSyntaxForNumericError($c);" + val errorContext = ctx.addReferenceObj("errCtx", origin.context) + s"throw QueryExecutionErrors.invalidInputSyntaxForNumericError($c, $errorContext);" } else { s"$evNull = true;" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 65da5e9cb4251..97ad3e3c10a08 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -448,15 +448,15 @@ object DateTimeUtils { } } - def stringToTimestampAnsi(s: UTF8String, timeZoneId: ZoneId): Long = { + def stringToTimestampAnsi(s: UTF8String, timeZoneId: ZoneId, errorContext: String = ""): Long = { stringToTimestamp(s, timeZoneId).getOrElse { - throw QueryExecutionErrors.cannotCastToDateTimeError(s, TimestampType) + throw QueryExecutionErrors.cannotCastToDateTimeError(s, TimestampType, errorContext) } } - def doubleToTimestampAnsi(d: Double): Long = { + def doubleToTimestampAnsi(d: Double, errorContext: String): Long = { if (d.isNaN || d.isInfinite) { - throw QueryExecutionErrors.cannotCastToDateTimeError(d, TimestampType) + throw QueryExecutionErrors.cannotCastToDateTimeError(d, TimestampType, errorContext) } else { DoubleExactNumeric.toLong(d * MICROS_PER_SECOND) } @@ -503,9 +503,9 @@ object DateTimeUtils { stringToTimestampWithoutTimeZone(s, true) } - def stringToTimestampWithoutTimeZoneAnsi(s: UTF8String): Long = { + def stringToTimestampWithoutTimeZoneAnsi(s: UTF8String, errorContext: String): Long = { stringToTimestampWithoutTimeZone(s, true).getOrElse { - throw QueryExecutionErrors.cannotCastToDateTimeError(s, TimestampNTZType) + throw QueryExecutionErrors.cannotCastToDateTimeError(s, TimestampNTZType, errorContext) } } @@ -621,9 +621,9 @@ object DateTimeUtils { } } - def stringToDateAnsi(s: UTF8String): Int = { + def stringToDateAnsi(s: UTF8String, errorContext: String = ""): Int = { stringToDate(s).getOrElse { - throw QueryExecutionErrors.cannotCastToDateTimeError(s, DateType) + throw QueryExecutionErrors.cannotCastToDateTimeError(s, DateType, errorContext) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala index 7fb564d1bd35d..9589cf3774ee1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala @@ -25,20 +25,24 @@ import org.apache.spark.unsafe.types.UTF8String */ object UTF8StringUtils { - def toLongExact(s: UTF8String): Long = withException(s.toLongExact) + def toLongExact(s: UTF8String, errorContext: String): Long = + withException(s.toLongExact, errorContext) - def toIntExact(s: UTF8String): Int = withException(s.toIntExact) + def toIntExact(s: UTF8String, errorContext: String): Int = + withException(s.toIntExact, errorContext) - def toShortExact(s: UTF8String): Short = withException(s.toShortExact) + def toShortExact(s: UTF8String, errorContext: String): Short = + withException(s.toShortExact, errorContext) - def toByteExact(s: UTF8String): Byte = withException(s.toByteExact) + def toByteExact(s: UTF8String, errorContext: String): Byte = + withException(s.toByteExact, errorContext) - private def withException[A](f: => A): A = { + private def withException[A](f: => A, errorContext: String): A = { try { f } catch { case e: NumberFormatException => - throw QueryExecutionErrors.invalidInputSyntaxForNumericError(e) + throw QueryExecutionErrors.invalidInputSyntaxForNumericError(e, errorContext) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 3a89147c4b53b..fec4788c33307 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -104,14 +104,18 @@ object QueryExecutionErrors extends QueryErrorsBase { decimalPrecision.toString, decimalScale.toString, SQLConf.ANSI_ENABLED.key, context)) } - def invalidInputSyntaxForNumericError(e: NumberFormatException): NumberFormatException = { + def invalidInputSyntaxForNumericError( + e: NumberFormatException, + errorContext: String): NumberFormatException = { new NumberFormatException(s"${e.getMessage}. To return NULL instead, use 'try_cast'. " + - s"If necessary set ${SQLConf.ANSI_ENABLED.key} to false to bypass this error.") + s"If necessary set ${SQLConf.ANSI_ENABLED.key} to false to bypass this error." + errorContext) } - def invalidInputSyntaxForNumericError(s: UTF8String): NumberFormatException = { + def invalidInputSyntaxForNumericError( + s: UTF8String, + errorContext: String): NumberFormatException = { new SparkNumberFormatException(errorClass = "INVALID_INPUT_SYNTAX_FOR_NUMERIC_TYPE", - messageParameters = Array(toSQLValue(s, StringType), SQLConf.ANSI_ENABLED.key)) + messageParameters = Array(toSQLValue(s, StringType), SQLConf.ANSI_ENABLED.key, errorContext)) } def cannotCastFromNullTypeError(to: DataType): Throwable = { @@ -1044,9 +1048,9 @@ object QueryExecutionErrors extends QueryErrorsBase { e) } - def cannotCastToDateTimeError(value: Any, to: DataType): Throwable = { + def cannotCastToDateTimeError(value: Any, to: DataType, errorContext: String): Throwable = { new DateTimeException(s"Cannot cast $value to $to. To return NULL instead, use 'try_cast'. " + - s"If necessary set ${SQLConf.ANSI_ENABLED.key} to false to bypass this error.") + s"If necessary set ${SQLConf.ANSI_ENABLED.key} to false to bypass this error." + errorContext) } def registeringStreamingQueryListenerError(e: Exception): Throwable = { @@ -1180,10 +1184,12 @@ object QueryExecutionErrors extends QueryErrorsBase { "SQLUserDefinedType nor registered with UDTRegistration.}") } - def invalidInputSyntaxForBooleanError(s: UTF8String): UnsupportedOperationException = { + def invalidInputSyntaxForBooleanError( + s: UTF8String, + errorContext: String): UnsupportedOperationException = { new UnsupportedOperationException(s"invalid input syntax for type boolean: $s. " + s"To return NULL instead, use 'try_cast'. If necessary set ${SQLConf.ANSI_ENABLED.key} " + - "to false to bypass this error.") + "to false to bypass this error." + errorContext) } def unsupportedOperandTypeForSizeFunctionError(dataType: DataType): Throwable = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala index 39c7e6ba58007..ac6ac33451cdf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala @@ -613,7 +613,7 @@ object Decimal { } } - def fromStringANSI(str: UTF8String): Decimal = { + def fromStringANSI(str: UTF8String, errorContext: String = ""): Decimal = { try { val bigDecimal = stringToJavaBigDecimal(str) // We fast fail because constructing a very large JavaBigDecimal to Decimal is very slow. @@ -626,7 +626,7 @@ object Decimal { } } catch { case _: NumberFormatException => - throw QueryExecutionErrors.invalidInputSyntaxForNumericError(str) + throw QueryExecutionErrors.invalidInputSyntaxForNumericError(str, errorContext) } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/cast.sql index 39095cb8ce032..e391c31690fd7 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cast.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cast.sql @@ -87,3 +87,14 @@ select cast('\t\n xyz \t\r' as boolean); select cast('23.45' as decimal(4, 2)); select cast('123.45' as decimal(4, 2)); +select cast('xyz' as decimal(4, 2)); + +select cast('2022-01-01' as date); +select cast('a' as date); +select cast('2022-01-01 00:00:00' as timestamp); +select cast('a' as timestamp); +select cast('2022-01-01 00:00:00' as timestamp_ntz); +select cast('a' as timestamp_ntz); + +select cast(cast('inf' as double) as timestamp); +select cast(cast('inf' as float) as timestamp); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out index 6b705274dc885..3de9c1f743def 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 57 +-- Number of queries: 66 -- !query @@ -9,6 +9,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '1.23'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('1.23' AS int) + ^^^^^^^^^^^^^^^^^^^ -- !query @@ -18,6 +21,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '1.23'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('1.23' AS long) + ^^^^^^^^^^^^^^^^^^^^ -- !query @@ -27,6 +33,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '-4.56'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('-4.56' AS int) + ^^^^^^^^^^^^^^^^^^^^ -- !query @@ -36,6 +45,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '-4.56'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('-4.56' AS long) + ^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -45,6 +57,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: 'abc'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('abc' AS int) + ^^^^^^^^^^^^^^^^^^ -- !query @@ -54,6 +69,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: 'abc'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('abc' AS long) + ^^^^^^^^^^^^^^^^^^^ -- !query @@ -63,6 +81,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '1234567890123'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('1234567890123' AS int) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -72,6 +93,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '12345678901234567890123'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('12345678901234567890123' AS long) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -81,6 +105,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: ''. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('' AS int) + ^^^^^^^^^^^^^^^ -- !query @@ -90,6 +117,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: ''. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('' AS long) + ^^^^^^^^^^^^^^^^ -- !query @@ -115,6 +145,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '123.a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('123.a' AS int) + ^^^^^^^^^^^^^^^^^^^^ -- !query @@ -124,6 +157,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '123.a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('123.a' AS long) + ^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -141,6 +177,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '-2147483649'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('-2147483649' AS int) + ^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -158,6 +197,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '2147483648'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('2147483648' AS int) + ^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -175,6 +217,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '-9223372036854775809'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('-9223372036854775809' AS long) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -192,6 +237,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '9223372036854775808'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('9223372036854775808' AS long) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -448,6 +496,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select cast('1中文' as tinyint) + ^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -457,6 +508,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select cast('1中文' as smallint) + ^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -466,6 +520,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select cast('1中文' as INT) + ^^^^^^^^^^^^^^^^^^ -- !query @@ -475,6 +532,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '中文1'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select cast('中文1' as bigint) + ^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -484,6 +544,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select cast('1中文' as bigint) + ^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -512,6 +575,9 @@ struct<> java.lang.UnsupportedOperationException invalid input syntax for type boolean: xyz . To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select cast('\t\n xyz \t\r' as boolean) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -532,3 +598,99 @@ Decimal(expanded,123.45,5,2}) cannot be represented as Decimal(4, 2). If necessa == SQL(line 1, position 7) == select cast('123.45' as decimal(4, 2)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +-- !query +select cast('xyz' as decimal(4, 2)) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkNumberFormatException +invalid input syntax for type numeric: 'xyz'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select cast('xyz' as decimal(4, 2)) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +-- !query +select cast('2022-01-01' as date) +-- !query schema +struct +-- !query output +2022-01-01 + + +-- !query +select cast('a' as date) +-- !query schema +struct<> +-- !query output +java.time.DateTimeException +Cannot cast a to DateType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select cast('a' as date) + ^^^^^^^^^^^^^^^^^ + + +-- !query +select cast('2022-01-01 00:00:00' as timestamp) +-- !query schema +struct +-- !query output +2022-01-01 00:00:00 + + +-- !query +select cast('a' as timestamp) +-- !query schema +struct<> +-- !query output +java.time.DateTimeException +Cannot cast a to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select cast('a' as timestamp) + ^^^^^^^^^^^^^^^^^^^^^^ + + +-- !query +select cast('2022-01-01 00:00:00' as timestamp_ntz) +-- !query schema +struct +-- !query output +2022-01-01 00:00:00 + + +-- !query +select cast('a' as timestamp_ntz) +-- !query schema +struct<> +-- !query output +java.time.DateTimeException +Cannot cast a to TimestampNTZType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select cast('a' as timestamp_ntz) + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +-- !query +select cast(cast('inf' as double) as timestamp) +-- !query schema +struct<> +-- !query output +java.time.DateTimeException +Cannot cast Infinity to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select cast(cast('inf' as double) as timestamp) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +-- !query +select cast(cast('inf' as float) as timestamp) +-- !query schema +struct<> +-- !query output +java.time.DateTimeException +Cannot cast Infinity to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select cast(cast('inf' as float) as timestamp) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out index c7058cd7e3be4..d9777b53d21a7 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out @@ -233,6 +233,9 @@ struct<> -- !query output java.time.DateTimeException Cannot cast xx to DateType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select next_day("xx", "Mon") + ^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -325,6 +328,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '1.2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select date_add('2011-11-11', '1.2') + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -433,6 +439,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: '1.2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select date_sub(date'2011-11-11', '1.2') + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out index 59761d5ac53f0..57e39bbfe3a00 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out @@ -243,6 +243,9 @@ struct<> -- !query output java.time.DateTimeException Cannot cast Unparseable to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select cast("Unparseable" as timestamp) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -252,3 +255,6 @@ struct<> -- !query output java.time.DateTimeException Cannot cast Unparseable to DateType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select cast("Unparseable" as date) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index 8f88727f66fb8..3b8d95bca0ad2 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -123,6 +123,9 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select interval 2 second * 'a' + ^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -132,6 +135,9 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select interval 2 second / 'a' + ^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -141,6 +147,9 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select interval 2 year * 'a' + ^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -150,6 +159,9 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select interval 2 year / 'a' + ^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -175,6 +187,9 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select 'a' * interval 2 second + ^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -184,6 +199,9 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select 'a' * interval 2 year + ^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -1499,6 +1517,9 @@ struct<> -- !query output java.time.DateTimeException Cannot cast 4 11:11 to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select '4 11:11' - interval '4 22:12' day to minute + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -1508,6 +1529,9 @@ struct<> -- !query output java.time.DateTimeException Cannot cast 4 12:12:12 to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select '4 12:12:12' + interval '4 22:12' day to minute + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -1543,6 +1567,9 @@ struct<> -- !query output java.time.DateTimeException Cannot cast 1 to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select str - interval '4 22:12' day to minute from interval_view + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -1552,6 +1579,9 @@ struct<> -- !query output java.time.DateTimeException Cannot cast 1 to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select str + interval '4 22:12' day to minute from interval_view + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index c65384673c2b1..7d07282ab6763 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 142 +-- Number of queries: 143 -- !query @@ -83,6 +83,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 42) == +...t("abcd", -2), left("abcd", 0), left("abcd", 'a') + ^^^^^^^^^^^^^^^^^ -- !query @@ -108,6 +111,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 44) == +...("abcd", -2), right("abcd", 0), right("abcd", 'a') + ^^^^^^^^^^^^^^^^^^ -- !query @@ -414,6 +420,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: 'invalid_length'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT lpad('hi', 'invalid_length') + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -423,6 +432,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: 'invalid_length'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT rpad('hi', 'invalid_length') + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out index 79a1e28a143b5..9ed02e3bed2c6 100644 --- a/sql/core/src/test/resources/sql-tests/results/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 57 +-- Number of queries: 66 -- !query @@ -474,3 +474,75 @@ select cast('123.45' as decimal(4, 2)) struct -- !query output NULL + + +-- !query +select cast('xyz' as decimal(4, 2)) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast('2022-01-01' as date) +-- !query schema +struct +-- !query output +2022-01-01 + + +-- !query +select cast('a' as date) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast('2022-01-01 00:00:00' as timestamp) +-- !query schema +struct +-- !query output +2022-01-01 00:00:00 + + +-- !query +select cast('a' as timestamp) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast('2022-01-01 00:00:00' as timestamp_ntz) +-- !query schema +struct +-- !query output +2022-01-01 00:00:00 + + +-- !query +select cast('a' as timestamp_ntz) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast(cast('inf' as double) as timestamp) +-- !query schema +struct +-- !query output +NULL + + +-- !query +select cast(cast('inf' as float) as timestamp) +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out index 4aba60b0220d5..166bea4a722a7 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out @@ -57,6 +57,9 @@ struct<> -- !query output java.lang.UnsupportedOperationException invalid input syntax for type boolean: test. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT boolean('test') AS error + ^^^^^^^^^^^^^^^ -- !query @@ -74,6 +77,9 @@ struct<> -- !query output java.lang.UnsupportedOperationException invalid input syntax for type boolean: foo. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT boolean('foo') AS error + ^^^^^^^^^^^^^^ -- !query @@ -99,6 +105,9 @@ struct<> -- !query output java.lang.UnsupportedOperationException invalid input syntax for type boolean: yeah. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT boolean('yeah') AS error + ^^^^^^^^^^^^^^^ -- !query @@ -124,6 +133,9 @@ struct<> -- !query output java.lang.UnsupportedOperationException invalid input syntax for type boolean: nay. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT boolean('nay') AS error + ^^^^^^^^^^^^^^ -- !query @@ -133,6 +145,9 @@ struct<> -- !query output java.lang.UnsupportedOperationException invalid input syntax for type boolean: on. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT boolean('on') AS true + ^^^^^^^^^^^^^ -- !query @@ -142,6 +157,9 @@ struct<> -- !query output java.lang.UnsupportedOperationException invalid input syntax for type boolean: off. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT boolean('off') AS `false` + ^^^^^^^^^^^^^^ -- !query @@ -151,6 +169,9 @@ struct<> -- !query output java.lang.UnsupportedOperationException invalid input syntax for type boolean: of. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT boolean('of') AS `false` + ^^^^^^^^^^^^^ -- !query @@ -160,6 +181,9 @@ struct<> -- !query output java.lang.UnsupportedOperationException invalid input syntax for type boolean: o. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT boolean('o') AS error + ^^^^^^^^^^^^ -- !query @@ -169,6 +193,9 @@ struct<> -- !query output java.lang.UnsupportedOperationException invalid input syntax for type boolean: on_. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT boolean('on_') AS error + ^^^^^^^^^^^^^^ -- !query @@ -178,6 +205,9 @@ struct<> -- !query output java.lang.UnsupportedOperationException invalid input syntax for type boolean: off_. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT boolean('off_') AS error + ^^^^^^^^^^^^^^^ -- !query @@ -195,6 +225,9 @@ struct<> -- !query output java.lang.UnsupportedOperationException invalid input syntax for type boolean: 11. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT boolean('11') AS error + ^^^^^^^^^^^^^ -- !query @@ -212,6 +245,9 @@ struct<> -- !query output java.lang.UnsupportedOperationException invalid input syntax for type boolean: 000. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT boolean('000') AS error + ^^^^^^^^^^^^^^ -- !query @@ -221,6 +257,9 @@ struct<> -- !query output java.lang.UnsupportedOperationException invalid input syntax for type boolean: . To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT boolean('') AS error + ^^^^^^^^^^^ -- !query @@ -327,6 +366,9 @@ struct<> -- !query output java.lang.UnsupportedOperationException invalid input syntax for type boolean: tru e . To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT boolean(string(' tru e ')) AS invalid + ^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -336,6 +378,9 @@ struct<> -- !query output java.lang.UnsupportedOperationException invalid input syntax for type boolean: . To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT boolean(string('')) AS invalid + ^^^^^^^^^^^^^^^^^^^ -- !query @@ -479,7 +524,11 @@ INSERT INTO BOOLTBL2 struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('XXX' AS BOOLEAN): invalid input syntax for type boolean: XXX. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error.; line 2 pos 3 +failed to evaluate expression CAST('XXX' AS BOOLEAN): invalid input syntax for type boolean: XXX. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 2, position 11) == + VALUES (boolean('XXX')) + ^^^^^^^^^^^^^^ +; line 2 pos 3 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out index eccfdbae75768..39636e02159eb 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out @@ -97,6 +97,9 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException invalid input syntax for type numeric: 'N A N'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT float('N A N') + ^^^^^^^^^^^^^^ -- !query @@ -106,6 +109,9 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException invalid input syntax for type numeric: 'NaN x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT float('NaN x') + ^^^^^^^^^^^^^^ -- !query @@ -115,6 +121,9 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException invalid input syntax for type numeric: ' INFINITY x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT float(' INFINITY x') + ^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -148,6 +157,9 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException invalid input syntax for type numeric: 'nan'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 13) == +SELECT float(decimal('nan')) + ^^^^^^^^^^^^^^ -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out index d143e1f1c5991..b2f61306c7c7c 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out @@ -129,6 +129,9 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException invalid input syntax for type numeric: 'N A N'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT double('N A N') + ^^^^^^^^^^^^^^^ -- !query @@ -138,6 +141,9 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException invalid input syntax for type numeric: 'NaN x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT double('NaN x') + ^^^^^^^^^^^^^^^ -- !query @@ -147,6 +153,9 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException invalid input syntax for type numeric: ' INFINITY x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT double(' INFINITY x') + ^^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -180,6 +189,9 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException invalid input syntax for type numeric: 'nan'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 14) == +SELECT double(decimal('nan')) + ^^^^^^^^^^^^^^ -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out index 9f9f212c731b2..836370935f64d 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out @@ -66,6 +66,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: 'four: 2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select string('four: ') || 2+2 + ^^^^^^^^^^^^^^^^^^^^^^^ -- !query @@ -75,6 +78,9 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: 'four: 2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +select 'four: ' || 2+2 + ^^^^^^^^^^^^^^^ -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out index 158196e7c8280..1d48d7c7b92c4 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out @@ -463,6 +463,11 @@ struct<> -- !query output java.lang.NumberFormatException invalid input syntax for type numeric: 'NaN'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 3, position 12) == +window w as (order by f_numeric range between + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + 1.1 preceding and 'NaN' following) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out index 680c5707a450d..c799d65985d5d 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out @@ -72,7 +72,11 @@ insert into datetimes values struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('11:00 BST' AS TIMESTAMP): Cannot cast 11:00 BST to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error.; line 1 pos 22 +failed to evaluate expression CAST('11:00 BST' AS TIMESTAMP): Cannot cast 11:00 BST to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 2, position 23) == +(1, timestamp '11:00', cast ('11:00 BST' as timestamp), cast ('1 year' as timestamp), ... + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +; line 1 pos 22 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out index c937d6637716a..87beeacc0bc15 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out @@ -501,4 +501,8 @@ FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b) struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('nan' AS INT): invalid input syntax for type numeric: 'nan'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error.; line 3 pos 6 +failed to evaluate expression CAST('nan' AS INT): invalid input syntax for type numeric: 'nan'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 3, position 28) == +FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b) + ^^^^^^^^^^^^^^^^^^ +; line 3 pos 6 From 194ed0c74415700e13ec8c4fade053c523542efc Mon Sep 17 00:00:00 2001 From: fhygh <283452027@qq.com> Date: Fri, 15 Apr 2022 20:42:18 +0900 Subject: [PATCH 145/535] [SPARK-38892][SQL][TESTS] Fix a test case schema assertion of ParquetPartitionDiscoverySuite ### What changes were proposed in this pull request? in ParquetPartitionDiscoverySuite, thare are some assert have no parctical significance. `assert(input.schema.sameType(input.schema))` ### Why are the changes needed? fix this to assert the actual result. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? updated testsuites Closes #36189 from fhygh/assertutfix. Authored-by: fhygh <283452027@qq.com> Signed-off-by: Hyukjin Kwon (cherry picked from commit 4835946de2ef71b176da5106e9b6c2706e182722) Signed-off-by: Hyukjin Kwon --- .../datasources/parquet/ParquetPartitionDiscoverySuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala index f3751562c332e..ee905fba74527 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala @@ -1076,7 +1076,7 @@ abstract class ParquetPartitionDiscoverySuite val input = spark.read.parquet(path.getAbsolutePath).select("id", "date_month", "date_hour", "date_t_hour", "data") - assert(input.schema.sameType(input.schema)) + assert(data.schema.sameType(input.schema)) checkAnswer(input, data) } } From dfb668d2541c5e15c7b41dfa74c9dea7291fe9e1 Mon Sep 17 00:00:00 2001 From: William Hyun Date: Sat, 16 Apr 2022 00:31:41 -0700 Subject: [PATCH 146/535] [SPARK-38866][BUILD] Update ORC to 1.7.4 ### What changes were proposed in this pull request? This PR aims to update ORC to version 1.7.4. ### Why are the changes needed? This will bring the following bug fixes. - https://github.com/apache/orc/milestone/7?closed=1 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. Closes #36153 from williamhyun/orc174RC0. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 7caf487c76abfdc76fc79a3bd4787d2e6c8034eb) Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 6 +++--- dev/deps/spark-deps-hadoop-3-hive-2.3 | 6 +++--- pom.xml | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index c0b1502743013..9847f794e0b73 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -219,9 +219,9 @@ objenesis/3.2//objenesis-3.2.jar okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar -orc-core/1.7.3//orc-core-1.7.3.jar -orc-mapreduce/1.7.3//orc-mapreduce-1.7.3.jar -orc-shims/1.7.3//orc-shims-1.7.3.jar +orc-core/1.7.4//orc-core-1.7.4.jar +orc-mapreduce/1.7.4//orc-mapreduce-1.7.4.jar +orc-shims/1.7.4//orc-shims-1.7.4.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 20a727521aa97..5d26abb88cde2 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -208,9 +208,9 @@ opencsv/2.3//opencsv-2.3.jar opentracing-api/0.33.0//opentracing-api-0.33.0.jar opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar opentracing-util/0.33.0//opentracing-util-0.33.0.jar -orc-core/1.7.3//orc-core-1.7.3.jar -orc-mapreduce/1.7.3//orc-mapreduce-1.7.3.jar -orc-shims/1.7.3//orc-shims-1.7.3.jar +orc-core/1.7.4//orc-core-1.7.4.jar +orc-mapreduce/1.7.4//orc-mapreduce-1.7.4.jar +orc-shims/1.7.4//orc-shims-1.7.4.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar diff --git a/pom.xml b/pom.xml index 77fbdc0c5e139..8d60f880af4ea 100644 --- a/pom.xml +++ b/pom.xml @@ -138,7 +138,7 @@ 10.14.2.0 1.12.2 - 1.7.3 + 1.7.4 9.4.44.v20210927 4.0.3 0.10.0 From baeaaeb8cbb8a69b15fac1df7063186dfa81e6a8 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Sat, 16 Apr 2022 20:31:34 -0700 Subject: [PATCH 147/535] [SPARK-38784][CORE] Upgrade Jetty to 9.4.46 ### What changes were proposed in this pull request? Upgrade Jetty to 9.4.46 ### Why are the changes needed? Three CVEs, which don't necessarily appear to affect Spark, are fixed in this version. Just housekeeping. CVE-2021-28169 CVE-2021-34428 CVE-2021-34429 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests Closes #36229 from srowen/SPARK-38784. Authored-by: Sean Owen Signed-off-by: Dongjoon Hyun (cherry picked from commit 619b7b4345013684e814499f8cec3b99ba9d88c2) Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 4 ++-- pom.xml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index 9847f794e0b73..7499a9b94c05d 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -146,7 +146,7 @@ jersey-hk2/2.34//jersey-hk2-2.34.jar jersey-server/2.34//jersey-server-2.34.jar jetty-sslengine/6.1.26//jetty-sslengine-6.1.26.jar jetty-util/6.1.26//jetty-util-6.1.26.jar -jetty-util/9.4.44.v20210927//jetty-util-9.4.44.v20210927.jar +jetty-util/9.4.46.v20220331//jetty-util-9.4.46.v20220331.jar jetty/6.1.26//jetty-6.1.26.jar jline/2.14.6//jline-2.14.6.jar joda-time/2.10.13//joda-time-2.10.13.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 5d26abb88cde2..94cd002122397 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -133,8 +133,8 @@ jersey-container-servlet/2.34//jersey-container-servlet-2.34.jar jersey-hk2/2.34//jersey-hk2-2.34.jar jersey-server/2.34//jersey-server-2.34.jar jettison/1.1//jettison-1.1.jar -jetty-util-ajax/9.4.44.v20210927//jetty-util-ajax-9.4.44.v20210927.jar -jetty-util/9.4.44.v20210927//jetty-util-9.4.44.v20210927.jar +jetty-util-ajax/9.4.46.v20220331//jetty-util-ajax-9.4.46.v20220331.jar +jetty-util/9.4.46.v20220331//jetty-util-9.4.46.v20220331.jar jline/2.14.6//jline-2.14.6.jar joda-time/2.10.13//joda-time-2.10.13.jar jodd-core/3.5.2//jodd-core-3.5.2.jar diff --git a/pom.xml b/pom.xml index 8d60f880af4ea..072556a59975d 100644 --- a/pom.xml +++ b/pom.xml @@ -139,7 +139,7 @@ 10.14.2.0 1.12.2 1.7.4 - 9.4.44.v20210927 + 9.4.46.v20220331 4.0.3 0.10.0 2.5.0 From c10160b4163be00b8009cb462b1e33704b0ff3d6 Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Sun, 17 Apr 2022 08:39:27 -0500 Subject: [PATCH 148/535] [SPARK-38640][CORE] Fix NPE with memory-only cache blocks and RDD fetching ### What changes were proposed in this pull request? Fixes a bug where if `spark.shuffle.service.fetch.rdd.enabled=true`, memory-only cached blocks will fail to unpersist. ### Why are the changes needed? In https://github.com/apache/spark/pull/33020, when all RDD blocks are removed from `externalShuffleServiceBlockStatus`, the underlying Map is nulled to reduce memory. When persisting blocks we check if it's using disk before adding it to `externalShuffleServiceBlockStatus`, but when removing them there is no check, so a memory-only cache block will keep `externalShuffleServiceBlockStatus` null, and when unpersisting it throw an NPE because it tries to remove from the null Map. This adds checks to the removal as well to only remove if the block is on disk, and therefore should have been added to `externalShuffleServiceBlockStatus` in the first place. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? New and updated UT Closes #35959 from Kimahriman/fetch-rdd-memory-only-unpersist. Authored-by: Adam Binford Signed-off-by: Sean Owen (cherry picked from commit e0939f0f7c3d3bd4baa89e720038dbd3c7363a72) Signed-off-by: Sean Owen --- .../storage/BlockManagerMasterEndpoint.scala | 8 ++++--- .../spark/ExternalShuffleServiceSuite.scala | 22 +++++++++++++++++++ .../spark/storage/BlockManagerInfoSuite.scala | 2 ++ 3 files changed, 29 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala index 4d8ba9b3e4e0a..adeb507941c0e 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala @@ -838,9 +838,11 @@ private[spark] class BlockStatusPerBlockId { } def remove(blockId: BlockId): Unit = { - blocks.remove(blockId) - if (blocks.isEmpty) { - blocks = null + if (blocks != null) { + blocks.remove(blockId) + if (blocks.isEmpty) { + blocks = null + } } } diff --git a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala index dd3d90f3124d5..1ca78d572c7ad 100644 --- a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala +++ b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala @@ -255,4 +255,26 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll wi } } } + + test("SPARK-38640: memory only blocks can unpersist using shuffle service cache fetching") { + for (enabled <- Seq(true, false)) { + val confWithRddFetch = + conf.clone.set(config.SHUFFLE_SERVICE_FETCH_RDD_ENABLED, enabled) + sc = new SparkContext("local-cluster[1,1,1024]", "test", confWithRddFetch) + sc.env.blockManager.externalShuffleServiceEnabled should equal(true) + sc.env.blockManager.blockStoreClient.getClass should equal(classOf[ExternalBlockStoreClient]) + try { + val rdd = sc.parallelize(0 until 100, 2) + .map { i => (i, 1) } + .persist(StorageLevel.MEMORY_ONLY) + + rdd.count() + rdd.unpersist(true) + assert(sc.persistentRdds.isEmpty) + } finally { + rpcHandler.applicationRemoved(sc.conf.getAppId, true) + sc.stop() + } + } + } } diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerInfoSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerInfoSuite.scala index f0c19c5ccce12..85f012aece3b4 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockManagerInfoSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerInfoSuite.scala @@ -63,6 +63,8 @@ class BlockManagerInfoSuite extends SparkFunSuite { if (svcEnabled) { assert(getEssBlockStatus(bmInfo, rddId).isEmpty) } + bmInfo.updateBlockInfo(rddId, StorageLevel.NONE, memSize = 0, diskSize = 0) + assert(bmInfo.remainingMem === 30000) } testWithShuffleServiceOnOff("RDD block with MEMORY_AND_DISK") { (svcEnabled, bmInfo) => From 6846cc98896f0cd3f98b1fae0000b74084dd08ac Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Mon, 18 Apr 2022 02:25:17 +0800 Subject: [PATCH 149/535] [SPARK-38924][UI] Update datatables to 1.10.25 ### What changes were proposed in this pull request? Update javascript library datatables, used in the UI, to 1.10.25 ### Why are the changes needed? https://nvd.nist.gov/vuln/detail/CVE-2020-28458 affects the current version of datatables, 1.10.20, and would be safer to just update. ### Does this PR introduce _any_ user-facing change? Should not. ### How was this patch tested? Existing tests, with some minor manual local testing. Closes #36226 from srowen/SPARK-38924. Authored-by: Sean Owen Signed-off-by: Gengliang Wang (cherry picked from commit 74b336858cc94194ef22483f0d684f0bcdd29599) Signed-off-by: Gengliang Wang --- .../dataTables.bootstrap4.1.10.20.min.css | 1 - .../dataTables.bootstrap4.1.10.20.min.js | 11 -- .../dataTables.bootstrap4.1.10.25.min.css | 1 + .../dataTables.bootstrap4.1.10.25.min.js | 14 ++ .../static/jquery.dataTables.1.10.20.min.css | 1 - .../static/jquery.dataTables.1.10.20.min.js | 180 ----------------- .../static/jquery.dataTables.1.10.25.min.css | 1 + .../static/jquery.dataTables.1.10.25.min.js | 184 ++++++++++++++++++ .../spark/ui/static/webui-dataTables.css | 4 + .../scala/org/apache/spark/ui/UIUtils.scala | 8 +- dev/.rat-excludes | 8 +- 11 files changed, 212 insertions(+), 201 deletions(-) delete mode 100644 core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.1.10.20.min.css delete mode 100644 core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.1.10.20.min.js create mode 100644 core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.1.10.25.min.css create mode 100644 core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.1.10.25.min.js delete mode 100644 core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.20.min.css delete mode 100644 core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.20.min.js create mode 100644 core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.25.min.css create mode 100644 core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.25.min.js diff --git a/core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.1.10.20.min.css b/core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.1.10.20.min.css deleted file mode 100644 index f1930be0e23f6..0000000000000 --- a/core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.1.10.20.min.css +++ /dev/null @@ -1 +0,0 @@ -table.dataTable{clear:both;margin-top:6px !important;margin-bottom:6px !important;max-width:none !important;border-collapse:separate !important;border-spacing:0}table.dataTable td,table.dataTable th{-webkit-box-sizing:content-box;box-sizing:content-box}table.dataTable td.dataTables_empty,table.dataTable th.dataTables_empty{text-align:center}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}div.dataTables_wrapper div.dataTables_length label{font-weight:normal;text-align:left;white-space:nowrap}div.dataTables_wrapper div.dataTables_length select{width:auto;display:inline-block}div.dataTables_wrapper div.dataTables_filter{text-align:right}div.dataTables_wrapper div.dataTables_filter label{font-weight:normal;white-space:nowrap;text-align:left}div.dataTables_wrapper div.dataTables_filter input{margin-left:0.5em;display:inline-block;width:auto}div.dataTables_wrapper div.dataTables_info{padding-top:0.85em;white-space:nowrap}div.dataTables_wrapper div.dataTables_paginate{margin:0;white-space:nowrap;text-align:right}div.dataTables_wrapper div.dataTables_paginate ul.pagination{margin:2px 0;white-space:nowrap;justify-content:flex-end}div.dataTables_wrapper div.dataTables_processing{position:absolute;top:50%;left:50%;width:200px;margin-left:-100px;margin-top:-26px;text-align:center;padding:1em 0}table.dataTable thead>tr>th.sorting_asc,table.dataTable thead>tr>th.sorting_desc,table.dataTable thead>tr>th.sorting,table.dataTable thead>tr>td.sorting_asc,table.dataTable thead>tr>td.sorting_desc,table.dataTable thead>tr>td.sorting{padding-right:30px}table.dataTable thead>tr>th:active,table.dataTable thead>tr>td:active{outline:none}table.dataTable thead .sorting,table.dataTable thead .sorting_asc,table.dataTable thead .sorting_desc,table.dataTable thead .sorting_asc_disabled,table.dataTable thead .sorting_desc_disabled{cursor:pointer;position:relative}table.dataTable thead .sorting:before,table.dataTable thead .sorting:after,table.dataTable thead .sorting_asc:before,table.dataTable thead .sorting_asc:after,table.dataTable thead .sorting_desc:before,table.dataTable thead .sorting_desc:after,table.dataTable thead .sorting_asc_disabled:before,table.dataTable thead .sorting_asc_disabled:after,table.dataTable thead .sorting_desc_disabled:before,table.dataTable thead .sorting_desc_disabled:after{position:absolute;bottom:0.9em;display:block;opacity:0.3}table.dataTable thead .sorting:before,table.dataTable thead .sorting_asc:before,table.dataTable thead .sorting_desc:before,table.dataTable thead .sorting_asc_disabled:before,table.dataTable thead .sorting_desc_disabled:before{right:1em;content:"\2191"}table.dataTable thead .sorting:after,table.dataTable thead .sorting_asc:after,table.dataTable thead .sorting_desc:after,table.dataTable thead .sorting_asc_disabled:after,table.dataTable thead .sorting_desc_disabled:after{right:0.5em;content:"\2193"}table.dataTable thead .sorting_asc:before,table.dataTable thead .sorting_desc:after{opacity:1}table.dataTable thead .sorting_asc_disabled:before,table.dataTable thead .sorting_desc_disabled:after{opacity:0}div.dataTables_scrollHead table.dataTable{margin-bottom:0 !important}div.dataTables_scrollBody table{border-top:none;margin-top:0 !important;margin-bottom:0 !important}div.dataTables_scrollBody table thead .sorting:before,div.dataTables_scrollBody table thead .sorting_asc:before,div.dataTables_scrollBody table thead .sorting_desc:before,div.dataTables_scrollBody table thead .sorting:after,div.dataTables_scrollBody table thead .sorting_asc:after,div.dataTables_scrollBody table thead .sorting_desc:after{display:none}div.dataTables_scrollBody table tbody tr:first-child th,div.dataTables_scrollBody table tbody tr:first-child td{border-top:none}div.dataTables_scrollFoot>.dataTables_scrollFootInner{box-sizing:content-box}div.dataTables_scrollFoot>.dataTables_scrollFootInner>table{margin-top:0 !important;border-top:none}@media screen and (max-width: 767px){div.dataTables_wrapper div.dataTables_length,div.dataTables_wrapper div.dataTables_filter,div.dataTables_wrapper div.dataTables_info,div.dataTables_wrapper div.dataTables_paginate{text-align:center}}table.dataTable.table-sm>thead>tr>th{padding-right:20px}table.dataTable.table-sm .sorting:before,table.dataTable.table-sm .sorting_asc:before,table.dataTable.table-sm .sorting_desc:before{top:5px;right:0.85em}table.dataTable.table-sm .sorting:after,table.dataTable.table-sm .sorting_asc:after,table.dataTable.table-sm .sorting_desc:after{top:5px}table.table-bordered.dataTable th,table.table-bordered.dataTable td{border-left-width:0}table.table-bordered.dataTable th:last-child,table.table-bordered.dataTable th:last-child,table.table-bordered.dataTable td:last-child,table.table-bordered.dataTable td:last-child{border-right-width:0}table.table-bordered.dataTable tbody th,table.table-bordered.dataTable tbody td{border-bottom-width:0}div.dataTables_scrollHead table.table-bordered{border-bottom-width:0}div.table-responsive>div.dataTables_wrapper>div.row{margin:0}div.table-responsive>div.dataTables_wrapper>div.row>div[class^="col-"]:first-child{padding-left:0}div.table-responsive>div.dataTables_wrapper>div.row>div[class^="col-"]:last-child{padding-right:0} diff --git a/core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.1.10.20.min.js b/core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.1.10.20.min.js deleted file mode 100644 index 8cecad738d64c..0000000000000 --- a/core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.1.10.20.min.js +++ /dev/null @@ -1,11 +0,0 @@ -/*! - DataTables Bootstrap 4 integration - ©2011-2017 SpryMedia Ltd - datatables.net/license -*/ -var $jscomp=$jscomp||{};$jscomp.scope={};$jscomp.findInternal=function(a,b,c){a instanceof String&&(a=String(a));for(var e=a.length,d=0;d<'col-sm-12 col-md-6'f>><'row'<'col-sm-12'tr>><'row'<'col-sm-12 col-md-5'i><'col-sm-12 col-md-7'p>>", -renderer:"bootstrap"});a.extend(d.ext.classes,{sWrapper:"dataTables_wrapper dt-bootstrap4",sFilterInput:"form-control form-control-sm",sLengthSelect:"custom-select custom-select-sm form-control form-control-sm",sProcessing:"dataTables_processing card",sPageButton:"paginate_button page-item"});d.ext.renderer.pageButton.bootstrap=function(b,l,v,w,m,r){var k=new d.Api(b),x=b.oClasses,n=b.oLanguage.oPaginate,y=b.oLanguage.oAria.paginate||{},g,h,t=0,u=function(c,d){var e,l=function(b){b.preventDefault(); -a(b.currentTarget).hasClass("disabled")||k.page()==b.data.action||k.page(b.data.action).draw("page")};var q=0;for(e=d.length;q",{"class":x.sPageButton+" "+h,id:0===v&&"string"===typeof f?b.sTableId+"_"+f:null}).append(a("",{href:"#","aria-controls":b.sTableId,"aria-label":y[f],"data-dt-idx":t,tabindex:b.iTabIndex,"class":"page-link"}).html(g)).appendTo(c);b.oApi._fnBindAction(p,{action:f},l);t++}}}};try{var p=a(l).find(c.activeElement).data("dt-idx")}catch(z){}u(a(l).empty().html('
        ").appendTo(w));r.nTBody=b[0];b=w.children("tfoot");0===b.length&&0").appendTo(w));0===b.length||0===b.children().length?w.addClass(x.sNoFooter):0/g,cc=/^\d{2,4}[\.\/\-]\d{1,2}[\.\/\-]\d{1,2}([T ]{1}\d{1,2}[:\.]\d{2}([\.:]\d{2})?)?$/,dc=/(\/|\.|\*|\+|\?|\||\(|\)|\[|\]|\{|\}|\\|\$|\^|\-)/g,bb=/[',$£€¥%\u2009\u202F\u20BD\u20a9\u20BArfkɃΞ]/gi,P=function(a){return a&&!0!==a&&"-"!==a?!1: -!0},Sb=function(a){var b=parseInt(a,10);return!isNaN(b)&&isFinite(a)?b:null},Tb=function(a,b){cb[b]||(cb[b]=new RegExp(Ua(b),"g"));return"string"===typeof a&&"."!==b?a.replace(/\./g,"").replace(cb[b],"."):a},db=function(a,b,c){var d="string"===typeof a;if(P(a))return!0;b&&d&&(a=Tb(a,b));c&&d&&(a=a.replace(bb,""));return!isNaN(parseFloat(a))&&isFinite(a)},Ub=function(a,b,c){return P(a)?!0:P(a)||"string"===typeof a?db(a.replace(Ea,""),b,c)?!0:null:null},J=function(a,b,c){var d=[],e=0,h=a.length;if(c!== -p)for(;ea.length)){var b=a.slice().sort();for(var c=b[0],d=1, -e=b.length;d")[0],$b=ya.textContent!==p,bc=/<.*?>/g,Sa=q.util.throttle,Wb=[],G=Array.prototype,ec=function(a){var b,c=q.settings,d=f.map(c,function(a,b){return a.nTable});if(a){if(a.nTable&&a.oApi)return[a];if(a.nodeName&&"table"===a.nodeName.toLowerCase()){var e=f.inArray(a,d);return-1!==e?[c[e]]:null}if(a&&"function"===typeof a.settings)return a.settings().toArray();"string"===typeof a?b=f(a):a instanceof f&&(b=a)}else return[];if(b)return b.map(function(a){e=f.inArray(this, -d);return-1!==e?c[e]:null}).toArray()};var v=function(a,b){if(!(this instanceof v))return new v(a,b);var c=[],d=function(a){(a=ec(a))&&c.push.apply(c,a)};if(f.isArray(a))for(var e=0,h=a.length;ea?new v(b[a],this[a]):null},filter:function(a){var b=[];if(G.filter)b=G.filter.call(this,a,this);else for(var c=0,d=this.length;c").addClass(c),f("td",d).addClass(c).html(b)[0].colSpan=W(a),e.push(d[0]))};h(c,d);b._details&&b._details.detach();b._details=f(e);b._detailsShow&&b._details.insertAfter(b.nTr)},hb=function(a,b){var c=a.context;c.length&&(a=c[0].aoData[b!==p?b:a[0]])&&a._details&&(a._details.remove(),a._detailsShow=p,a._details=p)},Yb=function(a,b){var c=a.context;c.length&&a.length&&(a=c[0].aoData[a[0]],a._details&&((a._detailsShow=b)?a._details.insertAfter(a.nTr): -a._details.detach(),ic(c[0])))},ic=function(a){var b=new v(a),c=a.aoData;b.off("draw.dt.DT_details column-visibility.dt.DT_details destroy.dt.DT_details");0g){var m=f.map(d,function(a,b){return a.bVisible?b:null});return[m[m.length+g]]}return[ba(a,g)];case "name":return f.map(e,function(a,b){return a===n[1]?b:null});default:return[]}if(b.nodeName&&b._DT_CellIndex)return[b._DT_CellIndex.column];g=f(h).filter(b).map(function(){return f.inArray(this, -h)}).toArray();if(g.length||!b.nodeName)return g;g=f(b).closest("*[data-dt-column]");return g.length?[g.data("dt-column")]:[]},a,c)};t("columns()",function(a,b){a===p?a="":f.isPlainObject(a)&&(b=a,a="");b=fb(b);var c=this.iterator("table",function(c){return kc(c,a,b)},1);c.selector.cols=a;c.selector.opts=b;return c});x("columns().header()","column().header()",function(a,b){return this.iterator("column",function(a,b){return a.aoColumns[b].nTh},1)});x("columns().footer()","column().footer()",function(a, -b){return this.iterator("column",function(a,b){return a.aoColumns[b].nTf},1)});x("columns().data()","column().data()",function(){return this.iterator("column-rows",Zb,1)});x("columns().dataSrc()","column().dataSrc()",function(){return this.iterator("column",function(a,b){return a.aoColumns[b].mData},1)});x("columns().cache()","column().cache()",function(a){return this.iterator("column-rows",function(b,c,d,e,f){return la(b.aoData,f,"search"===a?"_aFilterData":"_aSortData",c)},1)});x("columns().nodes()", -"column().nodes()",function(){return this.iterator("column-rows",function(a,b,c,d,e){return la(a.aoData,e,"anCells",b)},1)});x("columns().visible()","column().visible()",function(a,b){var c=this,d=this.iterator("column",function(b,c){if(a===p)return b.aoColumns[c].bVisible;var d=b.aoColumns,e=d[c],h=b.aoData,n;if(a!==p&&e.bVisible!==a){if(a){var m=f.inArray(!0,J(d,"bVisible"),c+1);d=0;for(n=h.length;dd;return!0};q.isDataTable=q.fnIsDataTable=function(a){var b=f(a).get(0),c=!1;if(a instanceof -q.Api)return!0;f.each(q.settings,function(a,e){a=e.nScrollHead?f("table",e.nScrollHead)[0]:null;var d=e.nScrollFoot?f("table",e.nScrollFoot)[0]:null;if(e.nTable===b||a===b||d===b)c=!0});return c};q.tables=q.fnTables=function(a){var b=!1;f.isPlainObject(a)&&(b=a.api,a=a.visible);var c=f.map(q.settings,function(b){if(!a||a&&f(b.nTable).is(":visible"))return b.nTable});return b?new v(c):c};q.camelToHungarian=L;t("$()",function(a,b){b=this.rows(b).nodes();b=f(b);return f([].concat(b.filter(a).toArray(), -b.find(a).toArray()))});f.each(["on","one","off"],function(a,b){t(b+"()",function(){var a=Array.prototype.slice.call(arguments);a[0]=f.map(a[0].split(/\s/),function(a){return a.match(/\.dt\b/)?a:a+".dt"}).join(" ");var d=f(this.tables().nodes());d[b].apply(d,a);return this})});t("clear()",function(){return this.iterator("table",function(a){qa(a)})});t("settings()",function(){return new v(this.context,this.context)});t("init()",function(){var a=this.context;return a.length?a[0].oInit:null});t("data()", -function(){return this.iterator("table",function(a){return J(a.aoData,"_aData")}).flatten()});t("destroy()",function(a){a=a||!1;return this.iterator("table",function(b){var c=b.nTableWrapper.parentNode,d=b.oClasses,e=b.nTable,h=b.nTBody,g=b.nTHead,k=b.nTFoot,l=f(e);h=f(h);var n=f(b.nTableWrapper),m=f.map(b.aoData,function(a){return a.nTr}),p;b.bDestroying=!0;A(b,"aoDestroyCallback","destroy",[b]);a||(new v(b)).columns().visible(!0);n.off(".DT").find(":not(tbody *)").off(".DT");f(z).off(".DT-"+b.sInstance); -e!=g.parentNode&&(l.children("thead").detach(),l.append(g));k&&e!=k.parentNode&&(l.children("tfoot").detach(),l.append(k));b.aaSorting=[];b.aaSortingFixed=[];Aa(b);f(m).removeClass(b.asStripeClasses.join(" "));f("th, td",g).removeClass(d.sSortable+" "+d.sSortableAsc+" "+d.sSortableDesc+" "+d.sSortableNone);h.children().detach();h.append(m);g=a?"remove":"detach";l[g]();n[g]();!a&&c&&(c.insertBefore(e,b.nTableReinsertBefore),l.css("width",b.sDestroyWidth).removeClass(d.sTable),(p=b.asDestroyStripes.length)&& -h.children().each(function(a){f(this).addClass(b.asDestroyStripes[a%p])}));c=f.inArray(b,q.settings);-1!==c&&q.settings.splice(c,1)})});f.each(["column","row","cell"],function(a,b){t(b+"s().every()",function(a){var c=this.selector.opts,e=this;return this.iterator(b,function(d,f,k,l,n){a.call(e[b](f,"cell"===b?k:c,"cell"===b?c:p),f,k,l,n)})})});t("i18n()",function(a,b,c){var d=this.context[0];a=U(a)(d.oLanguage);a===p&&(a=b);c!==p&&f.isPlainObject(a)&&(a=a[c]!==p?a[c]:a._);return a.replace("%d",c)}); -q.version="1.10.20";q.settings=[];q.models={};q.models.oSearch={bCaseInsensitive:!0,sSearch:"",bRegex:!1,bSmart:!0};q.models.oRow={nTr:null,anCells:null,_aData:[],_aSortData:null,_aFilterData:null,_sFilterRow:null,_sRowStripe:"",src:null,idx:-1};q.models.oColumn={idx:null,aDataSort:null,asSorting:null,bSearchable:null,bSortable:null,bVisible:null,_sManualType:null,_bAttrSrc:!1,fnCreatedCell:null,fnGetData:null,fnSetData:null,mData:null,mRender:null,nTh:null,nTf:null,sClass:null,sContentPadding:null, -sDefaultContent:null,sName:null,sSortDataType:"std",sSortingClass:null,sSortingClassJUI:null,sTitle:null,sType:null,sWidth:null,sWidthOrig:null};q.defaults={aaData:null,aaSorting:[[0,"asc"]],aaSortingFixed:[],ajax:null,aLengthMenu:[10,25,50,100],aoColumns:null,aoColumnDefs:null,aoSearchCols:[],asStripeClasses:null,bAutoWidth:!0,bDeferRender:!1,bDestroy:!1,bFilter:!0,bInfo:!0,bLengthChange:!0,bPaginate:!0,bProcessing:!1,bRetrieve:!1,bScrollCollapse:!1,bServerSide:!1,bSort:!0,bSortMulti:!0,bSortCellsTop:!1, -bSortClasses:!0,bStateSave:!1,fnCreatedRow:null,fnDrawCallback:null,fnFooterCallback:null,fnFormatNumber:function(a){return a.toString().replace(/\B(?=(\d{3})+(?!\d))/g,this.oLanguage.sThousands)},fnHeaderCallback:null,fnInfoCallback:null,fnInitComplete:null,fnPreDrawCallback:null,fnRowCallback:null,fnServerData:null,fnServerParams:null,fnStateLoadCallback:function(a){try{return JSON.parse((-1===a.iStateDuration?sessionStorage:localStorage).getItem("DataTables_"+a.sInstance+"_"+location.pathname))}catch(b){}}, -fnStateLoadParams:null,fnStateLoaded:null,fnStateSaveCallback:function(a,b){try{(-1===a.iStateDuration?sessionStorage:localStorage).setItem("DataTables_"+a.sInstance+"_"+location.pathname,JSON.stringify(b))}catch(c){}},fnStateSaveParams:null,iStateDuration:7200,iDeferLoading:null,iDisplayLength:10,iDisplayStart:0,iTabIndex:0,oClasses:{},oLanguage:{oAria:{sSortAscending:": activate to sort column ascending",sSortDescending:": activate to sort column descending"},oPaginate:{sFirst:"First",sLast:"Last", -sNext:"Next",sPrevious:"Previous"},sEmptyTable:"No data available in table",sInfo:"Showing _START_ to _END_ of _TOTAL_ entries",sInfoEmpty:"Showing 0 to 0 of 0 entries",sInfoFiltered:"(filtered from _MAX_ total entries)",sInfoPostFix:"",sDecimal:"",sThousands:",",sLengthMenu:"Show _MENU_ entries",sLoadingRecords:"Loading...",sProcessing:"Processing...",sSearch:"Search:",sSearchPlaceholder:"",sUrl:"",sZeroRecords:"No matching records found"},oSearch:f.extend({},q.models.oSearch),sAjaxDataProp:"data", -sAjaxSource:null,sDom:"lfrtip",searchDelay:null,sPaginationType:"simple_numbers",sScrollX:"",sScrollXInner:"",sScrollY:"",sServerMethod:"GET",renderer:null,rowId:"DT_RowId"};H(q.defaults);q.defaults.column={aDataSort:null,iDataSort:-1,asSorting:["asc","desc"],bSearchable:!0,bSortable:!0,bVisible:!0,fnCreatedCell:null,mData:null,mRender:null,sCellType:"td",sClass:"",sContentPadding:"",sDefaultContent:null,sName:"",sSortDataType:"std",sTitle:null,sType:null,sWidth:null};H(q.defaults.column);q.models.oSettings= -{oFeatures:{bAutoWidth:null,bDeferRender:null,bFilter:null,bInfo:null,bLengthChange:null,bPaginate:null,bProcessing:null,bServerSide:null,bSort:null,bSortMulti:null,bSortClasses:null,bStateSave:null},oScroll:{bCollapse:null,iBarWidth:0,sX:null,sXInner:null,sY:null},oLanguage:{fnInfoCallback:null},oBrowser:{bScrollOversize:!1,bScrollbarLeft:!1,bBounding:!1,barWidth:0},ajax:null,aanFeatures:[],aoData:[],aiDisplay:[],aiDisplayMaster:[],aIds:{},aoColumns:[],aoHeader:[],aoFooter:[],oPreviousSearch:{}, -aoPreSearchCols:[],aaSorting:null,aaSortingFixed:[],asStripeClasses:null,asDestroyStripes:[],sDestroyWidth:0,aoRowCallback:[],aoHeaderCallback:[],aoFooterCallback:[],aoDrawCallback:[],aoRowCreatedCallback:[],aoPreDrawCallback:[],aoInitComplete:[],aoStateSaveParams:[],aoStateLoadParams:[],aoStateLoaded:[],sTableId:"",nTable:null,nTHead:null,nTFoot:null,nTBody:null,nTableWrapper:null,bDeferLoading:!1,bInitialised:!1,aoOpenRows:[],sDom:null,searchDelay:null,sPaginationType:"two_button",iStateDuration:0, -aoStateSave:[],aoStateLoad:[],oSavedState:null,oLoadedState:null,sAjaxSource:null,sAjaxDataProp:null,bAjaxDataGet:!0,jqXHR:null,json:p,oAjaxData:p,fnServerData:null,aoServerParams:[],sServerMethod:null,fnFormatNumber:null,aLengthMenu:null,iDraw:0,bDrawing:!1,iDrawError:-1,_iDisplayLength:10,_iDisplayStart:0,_iRecordsTotal:0,_iRecordsDisplay:0,oClasses:{},bFiltered:!1,bSorted:!1,bSortCellsTop:null,oInit:null,aoDestroyCallback:[],fnRecordsTotal:function(){return"ssp"==D(this)?1*this._iRecordsTotal: -this.aiDisplayMaster.length},fnRecordsDisplay:function(){return"ssp"==D(this)?1*this._iRecordsDisplay:this.aiDisplay.length},fnDisplayEnd:function(){var a=this._iDisplayLength,b=this._iDisplayStart,c=b+a,d=this.aiDisplay.length,e=this.oFeatures,f=e.bPaginate;return e.bServerSide?!1===f||-1===a?b+d:Math.min(b+a,this._iRecordsDisplay):!f||c>d||-1===a?d:c},oInstance:null,sInstance:null,iTabIndex:0,nScrollHead:null,nScrollFoot:null,aLastSort:[],oPlugins:{},rowIdFn:null,rowId:null};q.ext=C={buttons:{}, -classes:{},builder:"-source-",errMode:"alert",feature:[],search:[],selector:{cell:[],column:[],row:[]},internal:{},legacy:{ajax:null},pager:{},renderer:{pageButton:{},header:{}},order:{},type:{detect:[],search:{},order:{}},_unique:0,fnVersionCheck:q.fnVersionCheck,iApiIndex:0,oJUIClasses:{},sVersion:q.version};f.extend(C,{afnFiltering:C.search,aTypes:C.type.detect,ofnSearch:C.type.search,oSort:C.type.order,afnSortData:C.order,aoFeatures:C.feature,oApi:C.internal,oStdClasses:C.classes,oPagination:C.pager}); -f.extend(q.ext.classes,{sTable:"dataTable",sNoFooter:"no-footer",sPageButton:"paginate_button",sPageButtonActive:"current",sPageButtonDisabled:"disabled",sStripeOdd:"odd",sStripeEven:"even",sRowEmpty:"dataTables_empty",sWrapper:"dataTables_wrapper",sFilter:"dataTables_filter",sInfo:"dataTables_info",sPaging:"dataTables_paginate paging_",sLength:"dataTables_length",sProcessing:"dataTables_processing",sSortAsc:"sorting_asc",sSortDesc:"sorting_desc",sSortable:"sorting",sSortableAsc:"sorting_asc_disabled", -sSortableDesc:"sorting_desc_disabled",sSortableNone:"sorting_disabled",sSortColumn:"sorting_",sFilterInput:"",sLengthSelect:"",sScrollWrapper:"dataTables_scroll",sScrollHead:"dataTables_scrollHead",sScrollHeadInner:"dataTables_scrollHeadInner",sScrollBody:"dataTables_scrollBody",sScrollFoot:"dataTables_scrollFoot",sScrollFootInner:"dataTables_scrollFootInner",sHeaderTH:"",sFooterTH:"",sSortJUIAsc:"",sSortJUIDesc:"",sSortJUI:"",sSortJUIAscAllowed:"",sSortJUIDescAllowed:"",sSortJUIWrapper:"",sSortIcon:"", -sJUIHeader:"",sJUIFooter:""});var Pb=q.ext.pager;f.extend(Pb,{simple:function(a,b){return["previous","next"]},full:function(a,b){return["first","previous","next","last"]},numbers:function(a,b){return[ka(a,b)]},simple_numbers:function(a,b){return["previous",ka(a,b),"next"]},full_numbers:function(a,b){return["first","previous",ka(a,b),"next","last"]},first_last_numbers:function(a,b){return["first",ka(a,b),"last"]},_numbers:ka,numbers_length:7});f.extend(!0,q.ext.renderer,{pageButton:{_:function(a,b, -c,d,e,h){var g=a.oClasses,k=a.oLanguage.oPaginate,l=a.oLanguage.oAria.paginate||{},n,m,q=0,t=function(b,d){var p,r=g.sPageButtonDisabled,u=function(b){Xa(a,b.data.action,!0)};var w=0;for(p=d.length;w").appendTo(b);t(x,v)}else{n=null;m=v;x=a.iTabIndex;switch(v){case "ellipsis":b.append('');break;case "first":n=k.sFirst;0===e&&(x=-1,m+=" "+r);break;case "previous":n=k.sPrevious;0===e&&(x=-1,m+= -" "+r);break;case "next":n=k.sNext;e===h-1&&(x=-1,m+=" "+r);break;case "last":n=k.sLast;e===h-1&&(x=-1,m+=" "+r);break;default:n=v+1,m=e===v?g.sPageButtonActive:""}null!==n&&(x=f("",{"class":g.sPageButton+" "+m,"aria-controls":a.sTableId,"aria-label":l[v],"data-dt-idx":q,tabindex:x,id:0===c&&"string"===typeof v?a.sTableId+"_"+v:null}).html(n).appendTo(b),$a(x,{action:v},u),q++)}}};try{var v=f(b).find(y.activeElement).data("dt-idx")}catch(mc){}t(f(b).empty(),d);v!==p&&f(b).find("[data-dt-idx="+ -v+"]").focus()}}});f.extend(q.ext.type.detect,[function(a,b){b=b.oLanguage.sDecimal;return db(a,b)?"num"+b:null},function(a,b){if(a&&!(a instanceof Date)&&!cc.test(a))return null;b=Date.parse(a);return null!==b&&!isNaN(b)||P(a)?"date":null},function(a,b){b=b.oLanguage.sDecimal;return db(a,b,!0)?"num-fmt"+b:null},function(a,b){b=b.oLanguage.sDecimal;return Ub(a,b)?"html-num"+b:null},function(a,b){b=b.oLanguage.sDecimal;return Ub(a,b,!0)?"html-num-fmt"+b:null},function(a,b){return P(a)||"string"=== -typeof a&&-1!==a.indexOf("<")?"html":null}]);f.extend(q.ext.type.search,{html:function(a){return P(a)?a:"string"===typeof a?a.replace(Rb," ").replace(Ea,""):""},string:function(a){return P(a)?a:"string"===typeof a?a.replace(Rb," "):a}});var Da=function(a,b,c,d){if(0!==a&&(!a||"-"===a))return-Infinity;b&&(a=Tb(a,b));a.replace&&(c&&(a=a.replace(c,"")),d&&(a=a.replace(d,"")));return 1*a};f.extend(C.type.order,{"date-pre":function(a){a=Date.parse(a);return isNaN(a)?-Infinity:a},"html-pre":function(a){return P(a)? -"":a.replace?a.replace(/<.*?>/g,"").toLowerCase():a+""},"string-pre":function(a){return P(a)?"":"string"===typeof a?a.toLowerCase():a.toString?a.toString():""},"string-asc":function(a,b){return ab?1:0},"string-desc":function(a,b){return ab?-1:0}});Ha("");f.extend(!0,q.ext.renderer,{header:{_:function(a,b,c,d){f(a.nTable).on("order.dt.DT",function(e,f,g,k){a===f&&(e=c.idx,b.removeClass(c.sSortingClass+" "+d.sSortAsc+" "+d.sSortDesc).addClass("asc"==k[e]?d.sSortAsc:"desc"==k[e]?d.sSortDesc: -c.sSortingClass))})},jqueryui:function(a,b,c,d){f("
        ").addClass(d.sSortJUIWrapper).append(b.contents()).append(f("").addClass(d.sSortIcon+" "+c.sSortingClassJUI)).appendTo(b);f(a.nTable).on("order.dt.DT",function(e,f,g,k){a===f&&(e=c.idx,b.removeClass(d.sSortAsc+" "+d.sSortDesc).addClass("asc"==k[e]?d.sSortAsc:"desc"==k[e]?d.sSortDesc:c.sSortingClass),b.find("span."+d.sSortIcon).removeClass(d.sSortJUIAsc+" "+d.sSortJUIDesc+" "+d.sSortJUI+" "+d.sSortJUIAscAllowed+" "+d.sSortJUIDescAllowed).addClass("asc"== -k[e]?d.sSortJUIAsc:"desc"==k[e]?d.sSortJUIDesc:c.sSortingClassJUI))})}}});var ib=function(a){return"string"===typeof a?a.replace(//g,">").replace(/"/g,"""):a};q.render={number:function(a,b,c,d,e){return{display:function(f){if("number"!==typeof f&&"string"!==typeof f)return f;var g=0>f?"-":"",h=parseFloat(f);if(isNaN(h))return ib(f);h=h.toFixed(c);f=Math.abs(h);h=parseInt(f,10);f=c?b+(f-h).toFixed(c).substring(2):"";return g+(d||"")+h.toString().replace(/\B(?=(\d{3})+(?!\d))/g, -a)+f+(e||"")}}},text:function(){return{display:ib,filter:ib}}};f.extend(q.ext.internal,{_fnExternApiFunc:Qb,_fnBuildAjax:va,_fnAjaxUpdate:qb,_fnAjaxParameters:zb,_fnAjaxUpdateDraw:Ab,_fnAjaxDataSrc:wa,_fnAddColumn:Ia,_fnColumnOptions:ma,_fnAdjustColumnSizing:aa,_fnVisibleToColumnIndex:ba,_fnColumnIndexToVisible:ca,_fnVisbleColumns:W,_fnGetColumns:oa,_fnColumnTypes:Ka,_fnApplyColumnDefs:nb,_fnHungarianMap:H,_fnCamelToHungarian:L,_fnLanguageCompat:Ga,_fnBrowserDetect:lb,_fnAddData:R,_fnAddTr:pa,_fnNodeToDataIndex:function(a, -b){return b._DT_RowIndex!==p?b._DT_RowIndex:null},_fnNodeToColumnIndex:function(a,b,c){return f.inArray(c,a.aoData[b].anCells)},_fnGetCellData:I,_fnSetCellData:ob,_fnSplitObjNotation:Na,_fnGetObjectDataFn:U,_fnSetObjectDataFn:Q,_fnGetDataMaster:Oa,_fnClearTable:qa,_fnDeleteIndex:ra,_fnInvalidate:ea,_fnGetRowElements:Ma,_fnCreateTr:La,_fnBuildHead:pb,_fnDrawHead:ha,_fnDraw:S,_fnReDraw:V,_fnAddOptionsHtml:sb,_fnDetectHeader:fa,_fnGetUniqueThs:ua,_fnFeatureHtmlFilter:ub,_fnFilterComplete:ia,_fnFilterCustom:Db, -_fnFilterColumn:Cb,_fnFilter:Bb,_fnFilterCreateSearch:Ta,_fnEscapeRegex:Ua,_fnFilterData:Eb,_fnFeatureHtmlInfo:xb,_fnUpdateInfo:Hb,_fnInfoMacros:Ib,_fnInitialise:ja,_fnInitComplete:xa,_fnLengthChange:Va,_fnFeatureHtmlLength:tb,_fnFeatureHtmlPaginate:yb,_fnPageChange:Xa,_fnFeatureHtmlProcessing:vb,_fnProcessingDisplay:K,_fnFeatureHtmlTable:wb,_fnScrollDraw:na,_fnApplyToChildren:N,_fnCalculateColumnWidths:Ja,_fnThrottle:Sa,_fnConvertToWidth:Jb,_fnGetWidestNode:Kb,_fnGetMaxLenString:Lb,_fnStringToCss:B, -_fnSortFlatten:Y,_fnSort:rb,_fnSortAria:Nb,_fnSortListener:Za,_fnSortAttachListener:Qa,_fnSortingClasses:Aa,_fnSortData:Mb,_fnSaveState:Ba,_fnLoadState:Ob,_fnSettingsFromNode:Ca,_fnLog:O,_fnMap:M,_fnBindAction:$a,_fnCallbackReg:E,_fnCallbackFire:A,_fnLengthOverflow:Wa,_fnRenderer:Ra,_fnDataSource:D,_fnRowAttributes:Pa,_fnExtend:ab,_fnCalculateEnd:function(){}});f.fn.dataTable=q;q.$=f;f.fn.dataTableSettings=q.settings;f.fn.dataTableExt=q.ext;f.fn.DataTable=function(a){return f(this).dataTable(a).api()}; -f.each(q,function(a,b){f.fn.DataTable[a]=b});return f.fn.dataTable}); diff --git a/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.25.min.css b/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.25.min.css new file mode 100644 index 0000000000000..6e60559741ccb --- /dev/null +++ b/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.25.min.css @@ -0,0 +1 @@ +table.dataTable{width:100%;margin:0 auto;clear:both;border-collapse:separate;border-spacing:0}table.dataTable thead th,table.dataTable tfoot th{font-weight:bold}table.dataTable thead th,table.dataTable thead td{padding:10px 18px;border-bottom:1px solid #111}table.dataTable thead th:active,table.dataTable thead td:active{outline:none}table.dataTable tfoot th,table.dataTable tfoot td{padding:10px 18px 6px 18px;border-top:1px solid #111}table.dataTable thead .sorting,table.dataTable thead .sorting_asc,table.dataTable thead .sorting_desc,table.dataTable thead .sorting_asc_disabled,table.dataTable thead .sorting_desc_disabled{cursor:pointer;*cursor:hand;background-repeat:no-repeat;background-position:center right}table.dataTable thead .sorting{background-image:url("../images/sort_both.png")}table.dataTable thead .sorting_asc{background-image:url("../images/sort_asc.png") !important}table.dataTable thead .sorting_desc{background-image:url("../images/sort_desc.png") !important}table.dataTable thead .sorting_asc_disabled{background-image:url("../images/sort_asc_disabled.png")}table.dataTable thead .sorting_desc_disabled{background-image:url("../images/sort_desc_disabled.png")}table.dataTable tbody tr{background-color:#fff}table.dataTable tbody tr.selected{background-color:#b0bed9}table.dataTable tbody th,table.dataTable tbody td{padding:8px 10px}table.dataTable.row-border tbody th,table.dataTable.row-border tbody td,table.dataTable.display tbody th,table.dataTable.display tbody td{border-top:1px solid #ddd}table.dataTable.row-border tbody tr:first-child th,table.dataTable.row-border tbody tr:first-child td,table.dataTable.display tbody tr:first-child th,table.dataTable.display tbody tr:first-child td{border-top:none}table.dataTable.cell-border tbody th,table.dataTable.cell-border tbody td{border-top:1px solid #ddd;border-right:1px solid #ddd}table.dataTable.cell-border tbody tr th:first-child,table.dataTable.cell-border tbody tr td:first-child{border-left:1px solid #ddd}table.dataTable.cell-border tbody tr:first-child th,table.dataTable.cell-border tbody tr:first-child td{border-top:none}table.dataTable.stripe tbody tr.odd,table.dataTable.display tbody tr.odd{background-color:#f9f9f9}table.dataTable.stripe tbody tr.odd.selected,table.dataTable.display tbody tr.odd.selected{background-color:#acbad4}table.dataTable.hover tbody tr:hover,table.dataTable.display tbody tr:hover{background-color:#f6f6f6}table.dataTable.hover tbody tr:hover.selected,table.dataTable.display tbody tr:hover.selected{background-color:#aab7d1}table.dataTable.order-column tbody tr>.sorting_1,table.dataTable.order-column tbody tr>.sorting_2,table.dataTable.order-column tbody tr>.sorting_3,table.dataTable.display tbody tr>.sorting_1,table.dataTable.display tbody tr>.sorting_2,table.dataTable.display tbody tr>.sorting_3{background-color:#fafafa}table.dataTable.order-column tbody tr.selected>.sorting_1,table.dataTable.order-column tbody tr.selected>.sorting_2,table.dataTable.order-column tbody tr.selected>.sorting_3,table.dataTable.display tbody tr.selected>.sorting_1,table.dataTable.display tbody tr.selected>.sorting_2,table.dataTable.display tbody tr.selected>.sorting_3{background-color:#acbad5}table.dataTable.display tbody tr.odd>.sorting_1,table.dataTable.order-column.stripe tbody tr.odd>.sorting_1{background-color:#f1f1f1}table.dataTable.display tbody tr.odd>.sorting_2,table.dataTable.order-column.stripe tbody tr.odd>.sorting_2{background-color:#f3f3f3}table.dataTable.display tbody tr.odd>.sorting_3,table.dataTable.order-column.stripe tbody tr.odd>.sorting_3{background-color:whitesmoke}table.dataTable.display tbody tr.odd.selected>.sorting_1,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_1{background-color:#a6b4cd}table.dataTable.display tbody tr.odd.selected>.sorting_2,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_2{background-color:#a8b5cf}table.dataTable.display tbody tr.odd.selected>.sorting_3,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_3{background-color:#a9b7d1}table.dataTable.display tbody tr.even>.sorting_1,table.dataTable.order-column.stripe tbody tr.even>.sorting_1{background-color:#fafafa}table.dataTable.display tbody tr.even>.sorting_2,table.dataTable.order-column.stripe tbody tr.even>.sorting_2{background-color:#fcfcfc}table.dataTable.display tbody tr.even>.sorting_3,table.dataTable.order-column.stripe tbody tr.even>.sorting_3{background-color:#fefefe}table.dataTable.display tbody tr.even.selected>.sorting_1,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_1{background-color:#acbad5}table.dataTable.display tbody tr.even.selected>.sorting_2,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_2{background-color:#aebcd6}table.dataTable.display tbody tr.even.selected>.sorting_3,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_3{background-color:#afbdd8}table.dataTable.display tbody tr:hover>.sorting_1,table.dataTable.order-column.hover tbody tr:hover>.sorting_1{background-color:#eaeaea}table.dataTable.display tbody tr:hover>.sorting_2,table.dataTable.order-column.hover tbody tr:hover>.sorting_2{background-color:#ececec}table.dataTable.display tbody tr:hover>.sorting_3,table.dataTable.order-column.hover tbody tr:hover>.sorting_3{background-color:#efefef}table.dataTable.display tbody tr:hover.selected>.sorting_1,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_1{background-color:#a2aec7}table.dataTable.display tbody tr:hover.selected>.sorting_2,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_2{background-color:#a3b0c9}table.dataTable.display tbody tr:hover.selected>.sorting_3,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_3{background-color:#a5b2cb}table.dataTable.no-footer{border-bottom:1px solid #111}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}table.dataTable.compact thead th,table.dataTable.compact thead td{padding:4px 17px}table.dataTable.compact tfoot th,table.dataTable.compact tfoot td{padding:4px}table.dataTable.compact tbody th,table.dataTable.compact tbody td{padding:4px}table.dataTable th.dt-left,table.dataTable td.dt-left{text-align:left}table.dataTable th.dt-center,table.dataTable td.dt-center,table.dataTable td.dataTables_empty{text-align:center}table.dataTable th.dt-right,table.dataTable td.dt-right{text-align:right}table.dataTable th.dt-justify,table.dataTable td.dt-justify{text-align:justify}table.dataTable th.dt-nowrap,table.dataTable td.dt-nowrap{white-space:nowrap}table.dataTable thead th.dt-head-left,table.dataTable thead td.dt-head-left,table.dataTable tfoot th.dt-head-left,table.dataTable tfoot td.dt-head-left{text-align:left}table.dataTable thead th.dt-head-center,table.dataTable thead td.dt-head-center,table.dataTable tfoot th.dt-head-center,table.dataTable tfoot td.dt-head-center{text-align:center}table.dataTable thead th.dt-head-right,table.dataTable thead td.dt-head-right,table.dataTable tfoot th.dt-head-right,table.dataTable tfoot td.dt-head-right{text-align:right}table.dataTable thead th.dt-head-justify,table.dataTable thead td.dt-head-justify,table.dataTable tfoot th.dt-head-justify,table.dataTable tfoot td.dt-head-justify{text-align:justify}table.dataTable thead th.dt-head-nowrap,table.dataTable thead td.dt-head-nowrap,table.dataTable tfoot th.dt-head-nowrap,table.dataTable tfoot td.dt-head-nowrap{white-space:nowrap}table.dataTable tbody th.dt-body-left,table.dataTable tbody td.dt-body-left{text-align:left}table.dataTable tbody th.dt-body-center,table.dataTable tbody td.dt-body-center{text-align:center}table.dataTable tbody th.dt-body-right,table.dataTable tbody td.dt-body-right{text-align:right}table.dataTable tbody th.dt-body-justify,table.dataTable tbody td.dt-body-justify{text-align:justify}table.dataTable tbody th.dt-body-nowrap,table.dataTable tbody td.dt-body-nowrap{white-space:nowrap}table.dataTable,table.dataTable th,table.dataTable td{box-sizing:content-box}.dataTables_wrapper{position:relative;clear:both;*zoom:1;zoom:1}.dataTables_wrapper .dataTables_length{float:left}.dataTables_wrapper .dataTables_length select{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;padding:4px}.dataTables_wrapper .dataTables_filter{float:right;text-align:right}.dataTables_wrapper .dataTables_filter input{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;margin-left:3px}.dataTables_wrapper .dataTables_info{clear:both;float:left;padding-top:.755em}.dataTables_wrapper .dataTables_paginate{float:right;text-align:right;padding-top:.25em}.dataTables_wrapper .dataTables_paginate .paginate_button{box-sizing:border-box;display:inline-block;min-width:1.5em;padding:.5em 1em;margin-left:2px;text-align:center;text-decoration:none !important;cursor:pointer;*cursor:hand;color:#333 !important;border:1px solid transparent;border-radius:2px}.dataTables_wrapper .dataTables_paginate .paginate_button.current,.dataTables_wrapper .dataTables_paginate .paginate_button.current:hover{color:#333 !important;border:1px solid #979797;background-color:white;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, white), color-stop(100%, #dcdcdc));background:-webkit-linear-gradient(top, white 0%, #dcdcdc 100%);background:-moz-linear-gradient(top, white 0%, #dcdcdc 100%);background:-ms-linear-gradient(top, white 0%, #dcdcdc 100%);background:-o-linear-gradient(top, white 0%, #dcdcdc 100%);background:linear-gradient(to bottom, white 0%, #dcdcdc 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button.disabled,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:hover,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:active{cursor:default;color:#666 !important;border:1px solid transparent;background:transparent;box-shadow:none}.dataTables_wrapper .dataTables_paginate .paginate_button:hover{color:white !important;border:1px solid #111;background-color:#585858;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #585858), color-stop(100%, #111));background:-webkit-linear-gradient(top, #585858 0%, #111 100%);background:-moz-linear-gradient(top, #585858 0%, #111 100%);background:-ms-linear-gradient(top, #585858 0%, #111 100%);background:-o-linear-gradient(top, #585858 0%, #111 100%);background:linear-gradient(to bottom, #585858 0%, #111 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button:active{outline:none;background-color:#2b2b2b;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #2b2b2b), color-stop(100%, #0c0c0c));background:-webkit-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-moz-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-ms-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-o-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:linear-gradient(to bottom, #2b2b2b 0%, #0c0c0c 100%);box-shadow:inset 0 0 3px #111}.dataTables_wrapper .dataTables_paginate .ellipsis{padding:0 1em}.dataTables_wrapper .dataTables_processing{position:absolute;top:50%;left:50%;width:100%;height:40px;margin-left:-50%;margin-top:-25px;padding-top:20px;text-align:center;font-size:1.2em;background-color:white;background:-webkit-gradient(linear, left top, right top, color-stop(0%, rgba(255, 255, 255, 0)), color-stop(25%, rgba(255, 255, 255, 0.9)), color-stop(75%, rgba(255, 255, 255, 0.9)), color-stop(100%, rgba(255, 255, 255, 0)));background:-webkit-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:-moz-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:-ms-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:-o-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:linear-gradient(to right, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%)}.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter,.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_processing,.dataTables_wrapper .dataTables_paginate{color:#333}.dataTables_wrapper .dataTables_scroll{clear:both}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody{*margin-top:-1px;-webkit-overflow-scrolling:touch}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td{vertical-align:middle}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td>div.dataTables_sizing{height:0;overflow:hidden;margin:0 !important;padding:0 !important}.dataTables_wrapper.no-footer .dataTables_scrollBody{border-bottom:1px solid #111}.dataTables_wrapper.no-footer div.dataTables_scrollHead table.dataTable,.dataTables_wrapper.no-footer div.dataTables_scrollBody>table{border-bottom:none}.dataTables_wrapper:after{visibility:hidden;display:block;content:"";clear:both;height:0}@media screen and (max-width: 767px){.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_paginate{float:none;text-align:center}.dataTables_wrapper .dataTables_paginate{margin-top:.5em}}@media screen and (max-width: 640px){.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter{float:none;text-align:center}.dataTables_wrapper .dataTables_filter{margin-top:.5em}} \ No newline at end of file diff --git a/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.25.min.js b/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.25.min.js new file mode 100644 index 0000000000000..5c85b24f8e112 --- /dev/null +++ b/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.25.min.js @@ -0,0 +1,184 @@ +/*! + Copyright 2008-2021 SpryMedia Ltd. + + This source file is free software, available under the following license: + MIT license - http://datatables.net/license + + This source file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the license files for details. + + For details please refer to: http://www.datatables.net + DataTables 1.10.25 + ©2008-2021 SpryMedia Ltd - datatables.net/license +*/ +var $jscomp=$jscomp||{};$jscomp.scope={};$jscomp.findInternal=function(k,y,z){k instanceof String&&(k=String(k));for(var q=k.length,G=0;G").css({position:"fixed",top:0,left:-1*k(y).scrollLeft(),height:1, +width:1,overflow:"hidden"}).append(k("
        ").css({position:"absolute",top:1,left:1,width:100,overflow:"scroll"}).append(k("
        ").css({width:"100%",height:10}))).appendTo("body"),d=c.children(),e=d.children();b.barWidth=d[0].offsetWidth-d[0].clientWidth;b.bScrollOversize=100===e[0].offsetWidth&&100!==d[0].clientWidth;b.bScrollbarLeft=1!==Math.round(e.offset().left);b.bBounding=c[0].getBoundingClientRect().width?!0:!1;c.remove()}k.extend(a.oBrowser,u.__browser);a.oScroll.iBarWidth=u.__browser.barWidth} +function Cb(a,b,c,d,e,f){var g=!1;if(c!==q){var h=c;g=!0}for(;d!==e;)a.hasOwnProperty(d)&&(h=g?b(h,a[d],d,a):a[d],g=!0,d+=f);return h}function Xa(a,b){var c=u.defaults.column,d=a.aoColumns.length;c=k.extend({},u.models.oColumn,c,{nTh:b?b:z.createElement("th"),sTitle:c.sTitle?c.sTitle:b?b.innerHTML:"",aDataSort:c.aDataSort?c.aDataSort:[d],mData:c.mData?c.mData:d,idx:d});a.aoColumns.push(c);c=a.aoPreSearchCols;c[d]=k.extend({},u.models.oSearch,c[d]);Ea(a,d,k(b).data())}function Ea(a,b,c){b=a.aoColumns[b]; +var d=a.oClasses,e=k(b.nTh);if(!b.sWidthOrig){b.sWidthOrig=e.attr("width")||null;var f=(e.attr("style")||"").match(/width:\s*(\d+[pxem%]+)/);f&&(b.sWidthOrig=f[1])}c!==q&&null!==c&&(Ab(c),O(u.defaults.column,c,!0),c.mDataProp===q||c.mData||(c.mData=c.mDataProp),c.sType&&(b._sManualType=c.sType),c.className&&!c.sClass&&(c.sClass=c.className),c.sClass&&e.addClass(c.sClass),k.extend(b,c),V(b,c,"sWidth","sWidthOrig"),c.iDataSort!==q&&(b.aDataSort=[c.iDataSort]),V(b,c,"aDataSort"));var g=b.mData,h=ia(g), +l=b.mRender?ia(b.mRender):null;c=function(n){return"string"===typeof n&&-1!==n.indexOf("@")};b._bAttrSrc=k.isPlainObject(g)&&(c(g.sort)||c(g.type)||c(g.filter));b._setter=null;b.fnGetData=function(n,m,p){var t=h(n,m,q,p);return l&&m?l(t,m,n,p):t};b.fnSetData=function(n,m,p){return da(g)(n,m,p)};"number"!==typeof g&&(a._rowReadObject=!0);a.oFeatures.bSort||(b.bSortable=!1,e.addClass(d.sSortableNone));a=-1!==k.inArray("asc",b.asSorting);c=-1!==k.inArray("desc",b.asSorting);b.bSortable&&(a||c)?a&&!c? +(b.sSortingClass=d.sSortableAsc,b.sSortingClassJUI=d.sSortJUIAscAllowed):!a&&c?(b.sSortingClass=d.sSortableDesc,b.sSortingClassJUI=d.sSortJUIDescAllowed):(b.sSortingClass=d.sSortable,b.sSortingClassJUI=d.sSortJUI):(b.sSortingClass=d.sSortableNone,b.sSortingClassJUI="")}function ra(a){if(!1!==a.oFeatures.bAutoWidth){var b=a.aoColumns;Ya(a);for(var c=0,d=b.length;cn[m])d(h.length+n[m],l);else if("string"===typeof n[m]){var p=0;for(g=h.length;pb&&a[e]--; -1!=d&&c===q&&a.splice(d,1)}function va(a,b,c,d){var e=a.aoData[b],f,g=function(l,n){for(;l.childNodes.length;)l.removeChild(l.firstChild);l.innerHTML=S(a,b,n,"display")};if("dom"!==c&&(c&&"auto"!==c||"dom"!==e.src)){var h=e.anCells;if(h)if(d!==q)g(h[d],d);else for(c=0,f=h.length;c").appendTo(d));var l=0;for(b=h.length;l=a.fnRecordsDisplay()?0:h,a.iInitDisplayStart=-1);h=a._iDisplayStart;var m=a.fnDisplayEnd();if(a.bDeferLoading)a.bDeferLoading=!1,a.iDraw++,U(a,!1);else if(!l)a.iDraw++;else if(!a.bDestroying&&!b){Gb(a);return}if(0!==n.length)for(b=l?a.aoData.length:m,g=l?0:h;g",{"class":f?e[0]:""}).append(k("
        ").insertAfter(E));r.nTBody=aa[0];E=p.children("tfoot"); +0===E.length&&0").appendTo(p));0===E.length||0===E.children().length?p.addClass(A.sNoFooter):0/g,tc=/^\d{2,4}[\.\/\-]\d{1,2}[\.\/\-]\d{1,2}([T ]{1}\d{1,2}[:\.]\d{2}([\.:]\d{2})?)?$/,uc=/(\/|\.|\*|\+|\?|\||\(|\)|\[|\]|\{|\}|\\|\$|\^|\-)/g,rb=/['\u00A0,$£€¥%\u2009\u202F\u20BD\u20a9\u20BArfkɃΞ]/gi,Z=function(a){return a&&!0!==a&&"-"!==a?!1:!0},hc=function(a){var b=parseInt(a,10);return!isNaN(b)&&isFinite(a)?b:null},ic=function(a,b){sb[b]||(sb[b]=new RegExp(ib(b),"g"));return"string"===typeof a&&"."!==b?a.replace(/\./g,"").replace(sb[b],"."): +a},tb=function(a,b,c){var d="string"===typeof a;if(Z(a))return!0;b&&d&&(a=ic(a,b));c&&d&&(a=a.replace(rb,""));return!isNaN(parseFloat(a))&&isFinite(a)},jc=function(a,b,c){return Z(a)?!0:Z(a)||"string"===typeof a?tb(a.replace(Ua,""),b,c)?!0:null:null},T=function(a,b,c){var d=[],e=0,f=a.length;if(c!==q)for(;ea.length)){var b=a.slice().sort();for(var c=b[0],d=1,e=b.length;d")[0],rc=Pa.textContent!==q,sc=/<.*?>/g,gb=u.util.throttle,mc=[],N=Array.prototype,vc=function(a){var b,c=u.settings,d=k.map(c,function(f,g){return f.nTable});if(a){if(a.nTable&&a.oApi)return[a];if(a.nodeName&&"table"===a.nodeName.toLowerCase()){var e=k.inArray(a,d);return-1!==e?[c[e]]:null}if(a&&"function"===typeof a.settings)return a.settings().toArray(); +"string"===typeof a?b=k(a):a instanceof k&&(b=a)}else return[];if(b)return b.map(function(f){e=k.inArray(this,d);return-1!==e?c[e]:null}).toArray()};var C=function(a,b){if(!(this instanceof C))return new C(a,b);var c=[],d=function(g){(g=vc(g))&&c.push.apply(c,g)};if(Array.isArray(a))for(var e=0,f=a.length;ea?new C(b[a],this[a]):null},filter:function(a){var b=[];if(N.filter)b=N.filter.call(this,a,this);else for(var c=0,d=this.length;c").addClass(h),k("td",l).addClass(h).html(g)[0].colSpan=na(a),e.push(l[0]))};f(c,d);b._details&&b._details.detach();b._details=k(e);b._detailsShow&&b._details.insertAfter(b.nTr)},xb=function(a,b){var c=a.context;c.length&& +(a=c[0].aoData[b!==q?b:a[0]])&&a._details&&(a._details.remove(),a._detailsShow=q,a._details=q)},pc=function(a,b){var c=a.context;c.length&&a.length&&(a=c[0].aoData[a[0]],a._details&&((a._detailsShow=b)?a._details.insertAfter(a.nTr):a._details.detach(),yc(c[0])))},yc=function(a){var b=new C(a),c=a.aoData;b.off("draw.dt.DT_details column-visibility.dt.DT_details destroy.dt.DT_details");0h){var m=k.map(d,function(p, +t){return p.bVisible?t:null});return[m[m.length+h]]}return[sa(a,h)];case "name":return k.map(e,function(p,t){return p===n[1]?t:null});default:return[]}if(g.nodeName&&g._DT_CellIndex)return[g._DT_CellIndex.column];h=k(f).filter(g).map(function(){return k.inArray(this,f)}).toArray();if(h.length||!g.nodeName)return h;h=k(g).closest("*[data-dt-column]");return h.length?[h.data("dt-column")]:[]},a,c)};w("columns()",function(a,b){a===q?a="":k.isPlainObject(a)&&(b=a,a="");b=vb(b);var c=this.iterator("table", +function(d){return Ac(d,a,b)},1);c.selector.cols=a;c.selector.opts=b;return c});J("columns().header()","column().header()",function(a,b){return this.iterator("column",function(c,d){return c.aoColumns[d].nTh},1)});J("columns().footer()","column().footer()",function(a,b){return this.iterator("column",function(c,d){return c.aoColumns[d].nTf},1)});J("columns().data()","column().data()",function(){return this.iterator("column-rows",qc,1)});J("columns().dataSrc()","column().dataSrc()",function(){return this.iterator("column", +function(a,b){return a.aoColumns[b].mData},1)});J("columns().cache()","column().cache()",function(a){return this.iterator("column-rows",function(b,c,d,e,f){return Da(b.aoData,f,"search"===a?"_aFilterData":"_aSortData",c)},1)});J("columns().nodes()","column().nodes()",function(){return this.iterator("column-rows",function(a,b,c,d,e){return Da(a.aoData,e,"anCells",b)},1)});J("columns().visible()","column().visible()",function(a,b){var c=this,d=this.iterator("column",function(e,f){if(a===q)return e.aoColumns[f].bVisible; +var g=e.aoColumns,h=g[f],l=e.aoData,n;if(a!==q&&h.bVisible!==a){if(a){var m=k.inArray(!0,T(g,"bVisible"),f+1);g=0;for(n=l.length;gd;return!0};u.isDataTable=u.fnIsDataTable=function(a){var b=k(a).get(0),c=!1;if(a instanceof u.Api)return!0;k.each(u.settings,function(d,e){d=e.nScrollHead?k("table",e.nScrollHead)[0]:null;var f=e.nScrollFoot?k("table",e.nScrollFoot)[0]:null;if(e.nTable===b||d===b||f===b)c=!0});return c};u.tables=u.fnTables=function(a){var b=!1;k.isPlainObject(a)&&(b=a.api,a=a.visible); +var c=k.map(u.settings,function(d){if(!a||a&&k(d.nTable).is(":visible"))return d.nTable});return b?new C(c):c};u.camelToHungarian=O;w("$()",function(a,b){b=this.rows(b).nodes();b=k(b);return k([].concat(b.filter(a).toArray(),b.find(a).toArray()))});k.each(["on","one","off"],function(a,b){w(b+"()",function(){var c=Array.prototype.slice.call(arguments);c[0]=k.map(c[0].split(/\s/),function(e){return e.match(/\.dt\b/)?e:e+".dt"}).join(" ");var d=k(this.tables().nodes());d[b].apply(d,c);return this})}); +w("clear()",function(){return this.iterator("table",function(a){Ia(a)})});w("settings()",function(){return new C(this.context,this.context)});w("init()",function(){var a=this.context;return a.length?a[0].oInit:null});w("data()",function(){return this.iterator("table",function(a){return T(a.aoData,"_aData")}).flatten()});w("destroy()",function(a){a=a||!1;return this.iterator("table",function(b){var c=b.nTableWrapper.parentNode,d=b.oClasses,e=b.nTable,f=b.nTBody,g=b.nTHead,h=b.nTFoot,l=k(e);f=k(f); +var n=k(b.nTableWrapper),m=k.map(b.aoData,function(t){return t.nTr}),p;b.bDestroying=!0;H(b,"aoDestroyCallback","destroy",[b]);a||(new C(b)).columns().visible(!0);n.off(".DT").find(":not(tbody *)").off(".DT");k(y).off(".DT-"+b.sInstance);e!=g.parentNode&&(l.children("thead").detach(),l.append(g));h&&e!=h.parentNode&&(l.children("tfoot").detach(),l.append(h));b.aaSorting=[];b.aaSortingFixed=[];Qa(b);k(m).removeClass(b.asStripeClasses.join(" "));k("th, td",g).removeClass(d.sSortable+" "+d.sSortableAsc+ +" "+d.sSortableDesc+" "+d.sSortableNone);f.children().detach();f.append(m);g=a?"remove":"detach";l[g]();n[g]();!a&&c&&(c.insertBefore(e,b.nTableReinsertBefore),l.css("width",b.sDestroyWidth).removeClass(d.sTable),(p=b.asDestroyStripes.length)&&f.children().each(function(t){k(this).addClass(b.asDestroyStripes[t%p])}));c=k.inArray(b,u.settings);-1!==c&&u.settings.splice(c,1)})});k.each(["column","row","cell"],function(a,b){w(b+"s().every()",function(c){var d=this.selector.opts,e=this;return this.iterator(b, +function(f,g,h,l,n){c.call(e[b](g,"cell"===b?h:d,"cell"===b?d:q),g,h,l,n)})})});w("i18n()",function(a,b,c){var d=this.context[0];a=ia(a)(d.oLanguage);a===q&&(a=b);c!==q&&k.isPlainObject(a)&&(a=a[c]!==q?a[c]:a._);return a.replace("%d",c)});u.version="1.10.25";u.settings=[];u.models={};u.models.oSearch={bCaseInsensitive:!0,sSearch:"",bRegex:!1,bSmart:!0};u.models.oRow={nTr:null,anCells:null,_aData:[],_aSortData:null,_aFilterData:null,_sFilterRow:null,_sRowStripe:"",src:null,idx:-1};u.models.oColumn= +{idx:null,aDataSort:null,asSorting:null,bSearchable:null,bSortable:null,bVisible:null,_sManualType:null,_bAttrSrc:!1,fnCreatedCell:null,fnGetData:null,fnSetData:null,mData:null,mRender:null,nTh:null,nTf:null,sClass:null,sContentPadding:null,sDefaultContent:null,sName:null,sSortDataType:"std",sSortingClass:null,sSortingClassJUI:null,sTitle:null,sType:null,sWidth:null,sWidthOrig:null};u.defaults={aaData:null,aaSorting:[[0,"asc"]],aaSortingFixed:[],ajax:null,aLengthMenu:[10,25,50,100],aoColumns:null, +aoColumnDefs:null,aoSearchCols:[],asStripeClasses:null,bAutoWidth:!0,bDeferRender:!1,bDestroy:!1,bFilter:!0,bInfo:!0,bLengthChange:!0,bPaginate:!0,bProcessing:!1,bRetrieve:!1,bScrollCollapse:!1,bServerSide:!1,bSort:!0,bSortMulti:!0,bSortCellsTop:!1,bSortClasses:!0,bStateSave:!1,fnCreatedRow:null,fnDrawCallback:null,fnFooterCallback:null,fnFormatNumber:function(a){return a.toString().replace(/\B(?=(\d{3})+(?!\d))/g,this.oLanguage.sThousands)},fnHeaderCallback:null,fnInfoCallback:null,fnInitComplete:null, +fnPreDrawCallback:null,fnRowCallback:null,fnServerData:null,fnServerParams:null,fnStateLoadCallback:function(a){try{return JSON.parse((-1===a.iStateDuration?sessionStorage:localStorage).getItem("DataTables_"+a.sInstance+"_"+location.pathname))}catch(b){return{}}},fnStateLoadParams:null,fnStateLoaded:null,fnStateSaveCallback:function(a,b){try{(-1===a.iStateDuration?sessionStorage:localStorage).setItem("DataTables_"+a.sInstance+"_"+location.pathname,JSON.stringify(b))}catch(c){}},fnStateSaveParams:null, +iStateDuration:7200,iDeferLoading:null,iDisplayLength:10,iDisplayStart:0,iTabIndex:0,oClasses:{},oLanguage:{oAria:{sSortAscending:": activate to sort column ascending",sSortDescending:": activate to sort column descending"},oPaginate:{sFirst:"First",sLast:"Last",sNext:"Next",sPrevious:"Previous"},sEmptyTable:"No data available in table",sInfo:"Showing _START_ to _END_ of _TOTAL_ entries",sInfoEmpty:"Showing 0 to 0 of 0 entries",sInfoFiltered:"(filtered from _MAX_ total entries)",sInfoPostFix:"",sDecimal:"", +sThousands:",",sLengthMenu:"Show _MENU_ entries",sLoadingRecords:"Loading...",sProcessing:"Processing...",sSearch:"Search:",sSearchPlaceholder:"",sUrl:"",sZeroRecords:"No matching records found"},oSearch:k.extend({},u.models.oSearch),sAjaxDataProp:"data",sAjaxSource:null,sDom:"lfrtip",searchDelay:null,sPaginationType:"simple_numbers",sScrollX:"",sScrollXInner:"",sScrollY:"",sServerMethod:"GET",renderer:null,rowId:"DT_RowId"};G(u.defaults);u.defaults.column={aDataSort:null,iDataSort:-1,asSorting:["asc", +"desc"],bSearchable:!0,bSortable:!0,bVisible:!0,fnCreatedCell:null,mData:null,mRender:null,sCellType:"td",sClass:"",sContentPadding:"",sDefaultContent:null,sName:"",sSortDataType:"std",sTitle:null,sType:null,sWidth:null};G(u.defaults.column);u.models.oSettings={oFeatures:{bAutoWidth:null,bDeferRender:null,bFilter:null,bInfo:null,bLengthChange:null,bPaginate:null,bProcessing:null,bServerSide:null,bSort:null,bSortMulti:null,bSortClasses:null,bStateSave:null},oScroll:{bCollapse:null,iBarWidth:0,sX:null, +sXInner:null,sY:null},oLanguage:{fnInfoCallback:null},oBrowser:{bScrollOversize:!1,bScrollbarLeft:!1,bBounding:!1,barWidth:0},ajax:null,aanFeatures:[],aoData:[],aiDisplay:[],aiDisplayMaster:[],aIds:{},aoColumns:[],aoHeader:[],aoFooter:[],oPreviousSearch:{},aoPreSearchCols:[],aaSorting:null,aaSortingFixed:[],asStripeClasses:null,asDestroyStripes:[],sDestroyWidth:0,aoRowCallback:[],aoHeaderCallback:[],aoFooterCallback:[],aoDrawCallback:[],aoRowCreatedCallback:[],aoPreDrawCallback:[],aoInitComplete:[], +aoStateSaveParams:[],aoStateLoadParams:[],aoStateLoaded:[],sTableId:"",nTable:null,nTHead:null,nTFoot:null,nTBody:null,nTableWrapper:null,bDeferLoading:!1,bInitialised:!1,aoOpenRows:[],sDom:null,searchDelay:null,sPaginationType:"two_button",iStateDuration:0,aoStateSave:[],aoStateLoad:[],oSavedState:null,oLoadedState:null,sAjaxSource:null,sAjaxDataProp:null,jqXHR:null,json:q,oAjaxData:q,fnServerData:null,aoServerParams:[],sServerMethod:null,fnFormatNumber:null,aLengthMenu:null,iDraw:0,bDrawing:!1, +iDrawError:-1,_iDisplayLength:10,_iDisplayStart:0,_iRecordsTotal:0,_iRecordsDisplay:0,oClasses:{},bFiltered:!1,bSorted:!1,bSortCellsTop:null,oInit:null,aoDestroyCallback:[],fnRecordsTotal:function(){return"ssp"==P(this)?1*this._iRecordsTotal:this.aiDisplayMaster.length},fnRecordsDisplay:function(){return"ssp"==P(this)?1*this._iRecordsDisplay:this.aiDisplay.length},fnDisplayEnd:function(){var a=this._iDisplayLength,b=this._iDisplayStart,c=b+a,d=this.aiDisplay.length,e=this.oFeatures,f=e.bPaginate; +return e.bServerSide?!1===f||-1===a?b+d:Math.min(b+a,this._iRecordsDisplay):!f||c>d||-1===a?d:c},oInstance:null,sInstance:null,iTabIndex:0,nScrollHead:null,nScrollFoot:null,aLastSort:[],oPlugins:{},rowIdFn:null,rowId:null};u.ext=L={buttons:{},classes:{},builder:"-source-",errMode:"alert",feature:[],search:[],selector:{cell:[],column:[],row:[]},internal:{},legacy:{ajax:null},pager:{},renderer:{pageButton:{},header:{}},order:{},type:{detect:[],search:{},order:{}},_unique:0,fnVersionCheck:u.fnVersionCheck, +iApiIndex:0,oJUIClasses:{},sVersion:u.version};k.extend(L,{afnFiltering:L.search,aTypes:L.type.detect,ofnSearch:L.type.search,oSort:L.type.order,afnSortData:L.order,aoFeatures:L.feature,oApi:L.internal,oStdClasses:L.classes,oPagination:L.pager});k.extend(u.ext.classes,{sTable:"dataTable",sNoFooter:"no-footer",sPageButton:"paginate_button",sPageButtonActive:"current",sPageButtonDisabled:"disabled",sStripeOdd:"odd",sStripeEven:"even",sRowEmpty:"dataTables_empty",sWrapper:"dataTables_wrapper",sFilter:"dataTables_filter", +sInfo:"dataTables_info",sPaging:"dataTables_paginate paging_",sLength:"dataTables_length",sProcessing:"dataTables_processing",sSortAsc:"sorting_asc",sSortDesc:"sorting_desc",sSortable:"sorting",sSortableAsc:"sorting_desc_disabled",sSortableDesc:"sorting_asc_disabled",sSortableNone:"sorting_disabled",sSortColumn:"sorting_",sFilterInput:"",sLengthSelect:"",sScrollWrapper:"dataTables_scroll",sScrollHead:"dataTables_scrollHead",sScrollHeadInner:"dataTables_scrollHeadInner",sScrollBody:"dataTables_scrollBody", +sScrollFoot:"dataTables_scrollFoot",sScrollFootInner:"dataTables_scrollFootInner",sHeaderTH:"",sFooterTH:"",sSortJUIAsc:"",sSortJUIDesc:"",sSortJUI:"",sSortJUIAscAllowed:"",sSortJUIDescAllowed:"",sSortJUIWrapper:"",sSortIcon:"",sJUIHeader:"",sJUIFooter:""});var ec=u.ext.pager;k.extend(ec,{simple:function(a,b){return["previous","next"]},full:function(a,b){return["first","previous","next","last"]},numbers:function(a,b){return[Ca(a,b)]},simple_numbers:function(a,b){return["previous",Ca(a,b),"next"]}, +full_numbers:function(a,b){return["first","previous",Ca(a,b),"next","last"]},first_last_numbers:function(a,b){return["first",Ca(a,b),"last"]},_numbers:Ca,numbers_length:7});k.extend(!0,u.ext.renderer,{pageButton:{_:function(a,b,c,d,e,f){var g=a.oClasses,h=a.oLanguage.oPaginate,l=a.oLanguage.oAria.paginate||{},n,m,p=0,t=function(x,r){var A,D=g.sPageButtonDisabled,I=function(E){lb(a,E.data.action,!0)};var W=0;for(A=r.length;W").appendTo(x); +t(B,M)}else{n=null;m=M;B=a.iTabIndex;switch(M){case "ellipsis":x.append('');break;case "first":n=h.sFirst;0===e&&(B=-1,m+=" "+D);break;case "previous":n=h.sPrevious;0===e&&(B=-1,m+=" "+D);break;case "next":n=h.sNext;if(0===f||e===f-1)B=-1,m+=" "+D;break;case "last":n=h.sLast;if(0===f||e===f-1)B=-1,m+=" "+D;break;default:n=a.fnFormatNumber(M+1),m=e===M?g.sPageButtonActive:""}null!==n&&(B=k("",{"class":g.sPageButton+" "+m,"aria-controls":a.sTableId,"aria-label":l[M], +"data-dt-idx":p,tabindex:B,id:0===c&&"string"===typeof M?a.sTableId+"_"+M:null}).html(n).appendTo(x),pb(B,{action:M},I),p++)}}};try{var v=k(b).find(z.activeElement).data("dt-idx")}catch(x){}t(k(b).empty(),d);v!==q&&k(b).find("[data-dt-idx="+v+"]").trigger("focus")}}});k.extend(u.ext.type.detect,[function(a,b){b=b.oLanguage.sDecimal;return tb(a,b)?"num"+b:null},function(a,b){if(a&&!(a instanceof Date)&&!tc.test(a))return null;b=Date.parse(a);return null!==b&&!isNaN(b)||Z(a)?"date":null},function(a, +b){b=b.oLanguage.sDecimal;return tb(a,b,!0)?"num-fmt"+b:null},function(a,b){b=b.oLanguage.sDecimal;return jc(a,b)?"html-num"+b:null},function(a,b){b=b.oLanguage.sDecimal;return jc(a,b,!0)?"html-num-fmt"+b:null},function(a,b){return Z(a)||"string"===typeof a&&-1!==a.indexOf("<")?"html":null}]);k.extend(u.ext.type.search,{html:function(a){return Z(a)?a:"string"===typeof a?a.replace(gc," ").replace(Ua,""):""},string:function(a){return Z(a)?a:"string"===typeof a?a.replace(gc," "):a}});var Ta=function(a, +b,c,d){if(0!==a&&(!a||"-"===a))return-Infinity;b&&(a=ic(a,b));a.replace&&(c&&(a=a.replace(c,"")),d&&(a=a.replace(d,"")));return 1*a};k.extend(L.type.order,{"date-pre":function(a){a=Date.parse(a);return isNaN(a)?-Infinity:a},"html-pre":function(a){return Z(a)?"":a.replace?a.replace(/<.*?>/g,"").toLowerCase():a+""},"string-pre":function(a){return Z(a)?"":"string"===typeof a?a.toLowerCase():a.toString?a.toString():""},"string-asc":function(a,b){return ab?1:0},"string-desc":function(a,b){return a< +b?1:a>b?-1:0}});Wa("");k.extend(!0,u.ext.renderer,{header:{_:function(a,b,c,d){k(a.nTable).on("order.dt.DT",function(e,f,g,h){a===f&&(e=c.idx,b.removeClass(d.sSortAsc+" "+d.sSortDesc).addClass("asc"==h[e]?d.sSortAsc:"desc"==h[e]?d.sSortDesc:c.sSortingClass))})},jqueryui:function(a,b,c,d){k("
        ").addClass(d.sSortJUIWrapper).append(b.contents()).append(k("").addClass(d.sSortIcon+" "+c.sSortingClassJUI)).appendTo(b);k(a.nTable).on("order.dt.DT",function(e,f,g,h){a===f&&(e=c.idx,b.removeClass(d.sSortAsc+ +" "+d.sSortDesc).addClass("asc"==h[e]?d.sSortAsc:"desc"==h[e]?d.sSortDesc:c.sSortingClass),b.find("span."+d.sSortIcon).removeClass(d.sSortJUIAsc+" "+d.sSortJUIDesc+" "+d.sSortJUI+" "+d.sSortJUIAscAllowed+" "+d.sSortJUIDescAllowed).addClass("asc"==h[e]?d.sSortJUIAsc:"desc"==h[e]?d.sSortJUIDesc:c.sSortingClassJUI))})}}});var yb=function(a){return"string"===typeof a?a.replace(/&/g,"&").replace(//g,">").replace(/"/g,"""):a};u.render={number:function(a,b,c,d,e){return{display:function(f){if("number"!== +typeof f&&"string"!==typeof f)return f;var g=0>f?"-":"",h=parseFloat(f);if(isNaN(h))return yb(f);h=h.toFixed(c);f=Math.abs(h);h=parseInt(f,10);f=c?b+(f-h).toFixed(c).substring(2):"";0===h&&0===parseFloat(f)&&(g="");return g+(d||"")+h.toString().replace(/\B(?=(\d{3})+(?!\d))/g,a)+f+(e||"")}}},text:function(){return{display:yb,filter:yb}}};k.extend(u.ext.internal,{_fnExternApiFunc:fc,_fnBuildAjax:Ma,_fnAjaxUpdate:Gb,_fnAjaxParameters:Pb,_fnAjaxUpdateDraw:Qb,_fnAjaxDataSrc:Na,_fnAddColumn:Xa,_fnColumnOptions:Ea, +_fnAdjustColumnSizing:ra,_fnVisibleToColumnIndex:sa,_fnColumnIndexToVisible:ta,_fnVisbleColumns:na,_fnGetColumns:Ga,_fnColumnTypes:Za,_fnApplyColumnDefs:Db,_fnHungarianMap:G,_fnCamelToHungarian:O,_fnLanguageCompat:ma,_fnBrowserDetect:Bb,_fnAddData:ea,_fnAddTr:Ha,_fnNodeToDataIndex:function(a,b){return b._DT_RowIndex!==q?b._DT_RowIndex:null},_fnNodeToColumnIndex:function(a,b,c){return k.inArray(c,a.aoData[b].anCells)},_fnGetCellData:S,_fnSetCellData:Eb,_fnSplitObjNotation:bb,_fnGetObjectDataFn:ia, +_fnSetObjectDataFn:da,_fnGetDataMaster:cb,_fnClearTable:Ia,_fnDeleteIndex:Ja,_fnInvalidate:va,_fnGetRowElements:ab,_fnCreateTr:$a,_fnBuildHead:Fb,_fnDrawHead:xa,_fnDraw:fa,_fnReDraw:ja,_fnAddOptionsHtml:Ib,_fnDetectHeader:wa,_fnGetUniqueThs:La,_fnFeatureHtmlFilter:Kb,_fnFilterComplete:ya,_fnFilterCustom:Tb,_fnFilterColumn:Sb,_fnFilter:Rb,_fnFilterCreateSearch:hb,_fnEscapeRegex:ib,_fnFilterData:Ub,_fnFeatureHtmlInfo:Nb,_fnUpdateInfo:Xb,_fnInfoMacros:Yb,_fnInitialise:za,_fnInitComplete:Oa,_fnLengthChange:jb, +_fnFeatureHtmlLength:Jb,_fnFeatureHtmlPaginate:Ob,_fnPageChange:lb,_fnFeatureHtmlProcessing:Lb,_fnProcessingDisplay:U,_fnFeatureHtmlTable:Mb,_fnScrollDraw:Fa,_fnApplyToChildren:ba,_fnCalculateColumnWidths:Ya,_fnThrottle:gb,_fnConvertToWidth:Zb,_fnGetWidestNode:$b,_fnGetMaxLenString:ac,_fnStringToCss:K,_fnSortFlatten:pa,_fnSort:Hb,_fnSortAria:cc,_fnSortListener:ob,_fnSortAttachListener:eb,_fnSortingClasses:Qa,_fnSortData:bc,_fnSaveState:Ra,_fnLoadState:dc,_fnSettingsFromNode:Sa,_fnLog:ca,_fnMap:V, +_fnBindAction:pb,_fnCallbackReg:Q,_fnCallbackFire:H,_fnLengthOverflow:kb,_fnRenderer:fb,_fnDataSource:P,_fnRowAttributes:db,_fnExtend:qb,_fnCalculateEnd:function(){}});k.fn.dataTable=u;u.$=k;k.fn.dataTableSettings=u.settings;k.fn.dataTableExt=u.ext;k.fn.DataTable=function(a){return k(this).dataTable(a).api()};k.each(u,function(a,b){k.fn.DataTable[a]=b});return k.fn.dataTable}); \ No newline at end of file diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui-dataTables.css b/core/src/main/resources/org/apache/spark/ui/static/webui-dataTables.css index be40cd98b6689..8aac1307bc144 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/webui-dataTables.css +++ b/core/src/main/resources/org/apache/spark/ui/static/webui-dataTables.css @@ -63,3 +63,7 @@ table.dataTable thead .sorting_desc { background: url('images/sort_desc.png') no .dataTables_wrapper .dataTables_paginate .paginate_button.disabled:active { border: none; } + +div.dataTables_wrapper div.dataTables_length select { + width: 100%; +} \ No newline at end of file diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala index 60db87c18a977..111e8f8b3ad4b 100644 --- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala +++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala @@ -253,18 +253,18 @@ private[spark] object UIUtils extends Logging { def dataTablesHeaderNodes(request: HttpServletRequest): Seq[Node] = { + "/static/jquery.dataTables.1.10.25.min.css")} type="text/css"/> - + - + } diff --git a/dev/.rat-excludes b/dev/.rat-excludes index 464d5b234c835..2ce8abc0f8736 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -33,13 +33,13 @@ graphlib-dot.min.js sorttable.js vis-timeline-graph2d.min.js vis-timeline-graph2d.min.css -dataTables.bootstrap4.1.10.20.min.css -dataTables.bootstrap4.1.10.20.min.js +dataTables.bootstrap4.1.10.25.min.css +dataTables.bootstrap4.1.10.25.min.js dataTables.rowsGroup.js jquery.blockUI.min.js jquery.cookies.2.2.0.min.js -jquery.dataTables.1.10.20.min.css -jquery.dataTables.1.10.20.min.js +jquery.dataTables.1.10.25.min.css +jquery.dataTables.1.10.25.min.js jquery.mustache.js jsonFormatter.min.css jsonFormatter.min.js From 2f1abc1cf121958d8646c26becd57e121b4ed6ec Mon Sep 17 00:00:00 2001 From: William Hyun Date: Sun, 17 Apr 2022 12:55:44 -0700 Subject: [PATCH 150/535] [SPARK-38927][TESTS] Skip NumPy/Pandas tests in `test_rdd.py` if not available ### What changes were proposed in this pull request? This PR aims to skip NumPy/Pandas tests in `test_rdd.py` if they are not available. ### Why are the changes needed? Currently, the tests that involve NumPy or Pandas are failing because NumPy and Pandas are unavailable in underlying Python. The tests should be skipped instead instead of showing failure. **BEFORE** ``` ====================================================================== ERROR: test_take_on_jrdd_with_large_rows_should_not_cause_deadlock (pyspark.tests.test_rdd.RDDTests) ---------------------------------------------------------------------- Traceback (most recent call last): File ".../test_rdd.py", line 723, in test_take_on_jrdd_with_large_rows_should_not_cause_deadlock import numpy as np ModuleNotFoundError: No module named 'numpy' ---------------------------------------------------------------------- Ran 1 test in 1.990s FAILED (errors=1) ``` **AFTER** ``` Finished test(python3.9): pyspark.tests.test_rdd RDDTests.test_take_on_jrdd_with_large_rows_should_not_cause_deadlock (1s) ... 1 tests were skipped Tests passed in 1 seconds Skipped tests in pyspark.tests.test_rdd RDDTests.test_take_on_jrdd_with_large_rows_should_not_cause_deadlock with python3.9: test_take_on_jrdd_with_large_rows_should_not_cause_deadlock (pyspark.tests.test_rdd.RDDTests) ... skipped 'NumPy or Pandas not installed' ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. Closes #36235 from williamhyun/skipnumpy. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit c34140d8d744dc75d130af60080a2a8e25d501b1) Signed-off-by: Dongjoon Hyun --- python/pyspark/tests/test_rdd.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/pyspark/tests/test_rdd.py b/python/pyspark/tests/test_rdd.py index d5d6cdbae8a20..23e41d6c0367b 100644 --- a/python/pyspark/tests/test_rdd.py +++ b/python/pyspark/tests/test_rdd.py @@ -20,6 +20,7 @@ import random import tempfile import time +import unittest from glob import glob from py4j.protocol import Py4JJavaError @@ -35,7 +36,8 @@ NoOpSerializer, ) from pyspark.sql import SparkSession -from pyspark.testing.utils import ReusedPySparkTestCase, SPARK_HOME, QuietTest +from pyspark.testing.utils import ReusedPySparkTestCase, SPARK_HOME, QuietTest, have_numpy +from pyspark.testing.sqlutils import have_pandas global_func = lambda: "Hi" # noqa: E731 @@ -698,6 +700,7 @@ def test_take_on_jrdd(self): rdd = self.sc.parallelize(range(1 << 20)).map(lambda x: str(x)) rdd._jrdd.first() + @unittest.skipIf(not have_numpy or not have_pandas, "NumPy or Pandas not installed") def test_take_on_jrdd_with_large_rows_should_not_cause_deadlock(self): # Regression test for SPARK-38677. # From 6d23b40273a84c9e65f2d78af312956954e47897 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Mon, 18 Apr 2022 08:54:45 +0900 Subject: [PATCH 151/535] [SPARK-38816][ML][DOCS] Fix comment about choice of initial factors in ALS ### What changes were proposed in this pull request? Change a comment in ALS code to match impl. The comment refers to taking the absolute value of a Normal(0,1) value, but it doesn't. ### Why are the changes needed? The docs and impl are inconsistent. The current behavior actually seems fine, desirable, so, change the comments. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests Closes #36228 from srowen/SPARK-38816. Authored-by: Sean Owen Signed-off-by: Hyukjin Kwon (cherry picked from commit b2b350b1566b8b45c6dba2f79ccbc2dc4e95816d) Signed-off-by: Hyukjin Kwon --- .../main/scala/org/apache/spark/ml/recommendation/ALS.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index a0ddf7129c9b5..35c9cca3d7aea 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -1268,9 +1268,8 @@ object ALS extends DefaultParamsReadable[ALS] with Logging { inBlocks: RDD[(Int, InBlock[ID])], rank: Int, seed: Long): RDD[(Int, FactorBlock)] = { - // Choose a unit vector uniformly at random from the unit sphere, but from the - // "first quadrant" where all elements are nonnegative. This can be done by choosing - // elements distributed as Normal(0,1) and taking the absolute value, and then normalizing. + // Choose a unit vector uniformly at random from the unit sphere. This can be done by choosing + // elements distributed as Normal(0,1), and then normalizing. // This appears to create factorizations that have a slightly better reconstruction // (<1%) compared picking elements uniformly at random in [0,1]. inBlocks.mapPartitions({ iter => From 0d1005e8ccdd379f93ae3ce9a61bb0469ec0d695 Mon Sep 17 00:00:00 2001 From: William Hyun Date: Sun, 17 Apr 2022 19:29:16 -0700 Subject: [PATCH 152/535] [SPARK-38928][TESTS][SQL] Skip Pandas UDF test in `QueryCompilationErrorsSuite` if not available ### What changes were proposed in this pull request? This PR aims to skip Pandas UDF tests in `QueryCompilationErrorsSuite` if not available. ### Why are the changes needed? The tests should be skipped instead of showing failure. **BEFORE** ``` $ build/sbt "sql/testOnly org.apache.spark.sql.errors.QueryCompilationErrorsSuite" ... [info] *** 2 TESTS FAILED *** [error] Failed tests: [error] org.apache.spark.sql.errors.QueryCompilationErrorsSuite [error] (sql / Test / testOnly) sbt.TestsFailedException: Tests unsuccessful ``` **AFTER** ``` $ build/sbt "sql/testOnly org.apache.spark.sql.errors.QueryCompilationErrorsSuite" ... [info] Tests: succeeded 13, failed 0, canceled 2, ignored 0, pending 0 [info] All tests passed. ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. Closes #36236 from williamhyun/skippandas. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 1f55a2af225b9c6226004180d9b83d2424bbe154) Signed-off-by: Dongjoon Hyun --- .../apache/spark/sql/errors/QueryCompilationErrorsSuite.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index 4d776caacf319..02f639008aac4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -107,6 +107,7 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession { test("CANNOT_USE_MIXTURE: Using aggregate function with grouped aggregate pandas UDF") { import IntegratedUDFTestUtils._ + assume(shouldTestGroupedAggPandasUDFs) val df = Seq( (536361, "85123A", 2, 17850), @@ -153,6 +154,7 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession { test("UNSUPPORTED_FEATURE: Using pandas UDF aggregate expression with pivot") { import IntegratedUDFTestUtils._ + assume(shouldTestGroupedAggPandasUDFs) val df = Seq( (536361, "85123A", 2, 17850), From 7f317c93383b51bd3ce163b9f8b481f2203760f7 Mon Sep 17 00:00:00 2001 From: fhygh <283452027@qq.com> Date: Mon, 18 Apr 2022 23:11:32 +0800 Subject: [PATCH 153/535] [SPARK-37643][SQL] when charVarcharAsString is true, for char datatype predicate query should skip rpadding rule ### What changes were proposed in this pull request? after add ApplyCharTypePadding rule, when predicate query column data type is char, if column value length is less then defined, will be right-padding, then query will get incorrect result ### Why are the changes needed? fix query incorrect issue when predicate column data type is char, so in this case when charVarcharAsString is true, we should skip the rpadding rule. ### Does this PR introduce _any_ user-facing change? before this fix, if we query with char data type for predicate, then we should be careful to set charVarcharAsString to true. ### How was this patch tested? add new UT. Closes #36187 from fhygh/charpredicatequery. Authored-by: fhygh <283452027@qq.com> Signed-off-by: Wenchen Fan (cherry picked from commit c1ea8b446d00dd0123a0fad93a3e143933419a76) Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/analysis/Analyzer.scala | 3 +++ .../org/apache/spark/sql/CharVarcharTestSuite.scala | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 9fdc466b4259c..1bc8814b33453 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -4185,6 +4185,9 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] { } override def apply(plan: LogicalPlan): LogicalPlan = { + if (SQLConf.get.charVarcharAsString) { + return plan + } plan.resolveOperatorsUpWithPruning(_.containsAnyPattern(BINARY_COMPARISON, IN)) { case operator => operator.transformExpressionsUpWithPruning( _.containsAnyPattern(BINARY_COMPARISON, IN)) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala index 6ade7a7c99e37..978e3f8d36d1b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -100,6 +100,19 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { } } + test("char type values should not be padded when charVarcharAsString is true") { + withSQLConf(SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key -> "true") { + withTable("t") { + sql(s"CREATE TABLE t(a STRING, b CHAR(5), c CHAR(5)) USING $format partitioned by (c)") + sql("INSERT INTO t VALUES ('abc', 'abc', 'abc')") + checkAnswer(sql("SELECT b FROM t WHERE b='abc'"), Row("abc")) + checkAnswer(sql("SELECT b FROM t WHERE b in ('abc')"), Row("abc")) + checkAnswer(sql("SELECT c FROM t WHERE c='abc'"), Row("abc")) + checkAnswer(sql("SELECT c FROM t WHERE c in ('abc')"), Row("abc")) + } + } + } + test("varchar type values length check and trim: partitioned columns") { (0 to 5).foreach { n => // SPARK-34192: we need to create a a new table for each round of test because of From 5544cce15885b1f12ae5826cd3bd2d151e1d544a Mon Sep 17 00:00:00 2001 From: dch nguyen Date: Mon, 18 Apr 2022 17:38:32 +0200 Subject: [PATCH 154/535] [SPARK-37015][PYTHON] Inline type hints for python/pyspark/streaming/dstream.py ### What changes were proposed in this pull request? Inline type hints for python/pyspark/streaming/dstream.py ### Why are the changes needed? We can take advantage of static type checking within the functions by inlining the type hints. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests Closes #34324 from dchvn/SPARK-37015. Lead-authored-by: dch nguyen Co-authored-by: dch nguyen Signed-off-by: zero323 (cherry picked from commit dff52d649d1e27baf3b107f75636624e0cfe780f) Signed-off-by: zero323 --- python/pyspark/streaming/context.py | 22 +- python/pyspark/streaming/dstream.py | 369 +++++++++++++++++++++------ python/pyspark/streaming/dstream.pyi | 211 --------------- 3 files changed, 296 insertions(+), 306 deletions(-) delete mode 100644 python/pyspark/streaming/dstream.pyi diff --git a/python/pyspark/streaming/context.py b/python/pyspark/streaming/context.py index 52e5efed06308..0be0c7b034a0b 100644 --- a/python/pyspark/streaming/context.py +++ b/python/pyspark/streaming/context.py @@ -397,12 +397,12 @@ def transform( the transform function parameter will be the same as the order of corresponding DStreams in the list. """ - jdstreams = [d._jdstream for d in dstreams] # type: ignore[attr-defined] + jdstreams = [d._jdstream for d in dstreams] # change the final serializer to sc.serializer func = TransformFunction( self._sc, lambda t, *rdds: transformFunc(rdds), - *[d._jrdd_deserializer for d in dstreams], # type: ignore[attr-defined] + *[d._jrdd_deserializer for d in dstreams], ) assert self._jvm is not None @@ -419,35 +419,31 @@ def union(self, *dstreams: "DStream[T]") -> "DStream[T]": raise ValueError("should have at least one DStream to union") if len(dstreams) == 1: return dstreams[0] - if len(set(s._jrdd_deserializer for s in dstreams)) > 1: # type: ignore[attr-defined] + if len(set(s._jrdd_deserializer for s in dstreams)) > 1: raise ValueError("All DStreams should have same serializer") - if len(set(s._slideDuration for s in dstreams)) > 1: # type: ignore[attr-defined] + if len(set(s._slideDuration for s in dstreams)) > 1: raise ValueError("All DStreams should have same slide duration") assert SparkContext._jvm is not None jdstream_cls = SparkContext._jvm.org.apache.spark.streaming.api.java.JavaDStream jpair_dstream_cls = SparkContext._jvm.org.apache.spark.streaming.api.java.JavaPairDStream gw = SparkContext._gateway - if is_instance_of(gw, dstreams[0]._jdstream, jdstream_cls): # type: ignore[attr-defined] + if is_instance_of(gw, dstreams[0]._jdstream, jdstream_cls): cls = jdstream_cls - elif is_instance_of( - gw, dstreams[0]._jdstream, jpair_dstream_cls # type: ignore[attr-defined] - ): + elif is_instance_of(gw, dstreams[0]._jdstream, jpair_dstream_cls): cls = jpair_dstream_cls else: - cls_name = ( - dstreams[0]._jdstream.getClass().getCanonicalName() # type: ignore[attr-defined] - ) + cls_name = dstreams[0]._jdstream.getClass().getCanonicalName() raise TypeError("Unsupported Java DStream class %s" % cls_name) assert gw is not None jdstreams = gw.new_array(cls, len(dstreams)) for i in range(0, len(dstreams)): - jdstreams[i] = dstreams[i]._jdstream # type: ignore[attr-defined] + jdstreams[i] = dstreams[i]._jdstream return DStream( self._jssc.union(jdstreams), self, - dstreams[0]._jrdd_deserializer, # type: ignore[attr-defined] + dstreams[0]._jrdd_deserializer, ) def addStreamingListener(self, streamingListener: StreamingListener) -> None: diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py index f445a78bd9530..934b3ae5783db 100644 --- a/python/pyspark/streaming/dstream.py +++ b/python/pyspark/streaming/dstream.py @@ -19,19 +19,45 @@ import time from itertools import chain from datetime import datetime +from typing import ( + Any, + Callable, + Generic, + Hashable, + Iterable, + List, + Optional, + Tuple, + TypeVar, + Union, + TYPE_CHECKING, + cast, + overload, +) from py4j.protocol import Py4JJavaError -from pyspark import RDD from pyspark.storagelevel import StorageLevel from pyspark.streaming.util import rddToFileName, TransformFunction -from pyspark.rdd import portable_hash +from pyspark.rdd import portable_hash, RDD from pyspark.resultiterable import ResultIterable +from py4j.java_gateway import JavaObject + +if TYPE_CHECKING: + from pyspark.serializers import Serializer + from pyspark.streaming.context import StreamingContext __all__ = ["DStream"] +S = TypeVar("S") +T = TypeVar("T") +T_co = TypeVar("T_co", covariant=True) +U = TypeVar("U") +K = TypeVar("K", bound=Hashable) +V = TypeVar("V") + -class DStream: +class DStream(Generic[T_co]): """ A Discretized Stream (DStream), the basic abstraction in Spark Streaming, is a continuous sequence of RDDs (of the same type) representing a @@ -51,7 +77,12 @@ class DStream: - A function that is used to generate an RDD after each time interval """ - def __init__(self, jdstream, ssc, jrdd_deserializer): + def __init__( + self, + jdstream: JavaObject, + ssc: "StreamingContext", + jrdd_deserializer: "Serializer", + ): self._jdstream = jdstream self._ssc = ssc self._sc = ssc._sc @@ -59,76 +90,94 @@ def __init__(self, jdstream, ssc, jrdd_deserializer): self.is_cached = False self.is_checkpointed = False - def context(self): + def context(self) -> "StreamingContext": """ Return the StreamingContext associated with this DStream """ return self._ssc - def count(self): + def count(self) -> "DStream[int]": """ Return a new DStream in which each RDD has a single element generated by counting each RDD of this DStream. """ return self.mapPartitions(lambda i: [sum(1 for _ in i)]).reduce(operator.add) - def filter(self, f): + def filter(self: "DStream[T]", f: Callable[[T], bool]) -> "DStream[T]": """ Return a new DStream containing only the elements that satisfy predicate. """ - def func(iterator): + def func(iterator: Iterable[T]) -> Iterable[T]: return filter(f, iterator) return self.mapPartitions(func, True) - def flatMap(self, f, preservesPartitioning=False): + def flatMap( + self: "DStream[T]", + f: Callable[[T], Iterable[U]], + preservesPartitioning: bool = False, + ) -> "DStream[U]": """ Return a new DStream by applying a function to all elements of this DStream, and then flattening the results """ - def func(s, iterator): + def func(s: int, iterator: Iterable[T]) -> Iterable[U]: return chain.from_iterable(map(f, iterator)) return self.mapPartitionsWithIndex(func, preservesPartitioning) - def map(self, f, preservesPartitioning=False): + def map( + self: "DStream[T]", f: Callable[[T], U], preservesPartitioning: bool = False + ) -> "DStream[U]": """ Return a new DStream by applying a function to each element of DStream. """ - def func(iterator): + def func(iterator: Iterable[T]) -> Iterable[U]: return map(f, iterator) return self.mapPartitions(func, preservesPartitioning) - def mapPartitions(self, f, preservesPartitioning=False): + def mapPartitions( + self: "DStream[T]", + f: Callable[[Iterable[T]], Iterable[U]], + preservesPartitioning: bool = False, + ) -> "DStream[U]": """ Return a new DStream in which each RDD is generated by applying mapPartitions() to each RDDs of this DStream. """ - def func(s, iterator): + def func(s: int, iterator: Iterable[T]) -> Iterable[U]: return f(iterator) return self.mapPartitionsWithIndex(func, preservesPartitioning) - def mapPartitionsWithIndex(self, f, preservesPartitioning=False): + def mapPartitionsWithIndex( + self: "DStream[T]", + f: Callable[[int, Iterable[T]], Iterable[U]], + preservesPartitioning: bool = False, + ) -> "DStream[U]": """ Return a new DStream in which each RDD is generated by applying mapPartitionsWithIndex() to each RDDs of this DStream. """ return self.transform(lambda rdd: rdd.mapPartitionsWithIndex(f, preservesPartitioning)) - def reduce(self, func): + def reduce(self: "DStream[T]", func: Callable[[T, T], T]) -> "DStream[T]": """ Return a new DStream in which each RDD has a single element generated by reducing each RDD of this DStream. """ return self.map(lambda x: (None, x)).reduceByKey(func, 1).map(lambda x: x[1]) - def reduceByKey(self, func, numPartitions=None): + def reduceByKey( + self: "DStream[Tuple[K, V]]", + func: Callable[[V, V], V], + numPartitions: Optional[int] = None, + ) -> "DStream[Tuple[K, V]]": """ Return a new DStream by applying reduceByKey to each RDD. """ @@ -136,40 +185,62 @@ def reduceByKey(self, func, numPartitions=None): numPartitions = self._sc.defaultParallelism return self.combineByKey(lambda x: x, func, func, numPartitions) - def combineByKey(self, createCombiner, mergeValue, mergeCombiners, numPartitions=None): + def combineByKey( + self: "DStream[Tuple[K, V]]", + createCombiner: Callable[[V], U], + mergeValue: Callable[[U, V], U], + mergeCombiners: Callable[[U, U], U], + numPartitions: Optional[int] = None, + ) -> "DStream[Tuple[K, U]]": """ Return a new DStream by applying combineByKey to each RDD. """ if numPartitions is None: numPartitions = self._sc.defaultParallelism - def func(rdd): + def func(rdd: RDD[Tuple[K, V]]) -> RDD[Tuple[K, U]]: return rdd.combineByKey(createCombiner, mergeValue, mergeCombiners, numPartitions) return self.transform(func) - def partitionBy(self, numPartitions, partitionFunc=portable_hash): + def partitionBy( + self: "DStream[Tuple[K, V]]", + numPartitions: int, + partitionFunc: Callable[[K], int] = portable_hash, + ) -> "DStream[Tuple[K, V]]": """ Return a copy of the DStream in which each RDD are partitioned using the specified partitioner. """ return self.transform(lambda rdd: rdd.partitionBy(numPartitions, partitionFunc)) - def foreachRDD(self, func): + @overload + def foreachRDD(self: "DStream[T]", func: Callable[[RDD[T]], None]) -> None: + ... + + @overload + def foreachRDD(self: "DStream[T]", func: Callable[[datetime, RDD[T]], None]) -> None: + ... + + def foreachRDD( + self: "DStream[T]", + func: Union[Callable[[RDD[T]], None], Callable[[datetime, RDD[T]], None]], + ) -> None: """ Apply a function to each RDD in this DStream. """ if func.__code__.co_argcount == 1: old_func = func - def func(_, rdd): - return old_func(rdd) + def func(_: datetime, rdd: "RDD[T]") -> None: + return old_func(rdd) # type: ignore[call-arg, arg-type] jfunc = TransformFunction(self._sc, func, self._jrdd_deserializer) + assert self._ssc._jvm is not None api = self._ssc._jvm.PythonDStream api.callForeachRDD(self._jdstream, jfunc) - def pprint(self, num=10): + def pprint(self, num: int = 10) -> None: """ Print the first num elements of each RDD generated in this DStream. @@ -179,7 +250,7 @@ def pprint(self, num=10): the number of elements from the first will be printed. """ - def takeAndPrint(time, rdd): + def takeAndPrint(time: datetime, rdd: RDD[T]) -> None: taken = rdd.take(num + 1) print("-------------------------------------------") print("Time: %s" % time) @@ -192,40 +263,42 @@ def takeAndPrint(time, rdd): self.foreachRDD(takeAndPrint) - def mapValues(self, f): + def mapValues(self: "DStream[Tuple[K, V]]", f: Callable[[V], U]) -> "DStream[Tuple[K, U]]": """ Return a new DStream by applying a map function to the value of each key-value pairs in this DStream without changing the key. """ - def map_values_fn(kv): + def map_values_fn(kv: Tuple[K, V]) -> Tuple[K, U]: return kv[0], f(kv[1]) return self.map(map_values_fn, preservesPartitioning=True) - def flatMapValues(self, f): + def flatMapValues( + self: "DStream[Tuple[K, V]]", f: Callable[[V], Iterable[U]] + ) -> "DStream[Tuple[K, U]]": """ Return a new DStream by applying a flatmap function to the value of each key-value pairs in this DStream without changing the key. """ - def flat_map_fn(kv): + def flat_map_fn(kv: Tuple[K, V]) -> Iterable[Tuple[K, U]]: return ((kv[0], x) for x in f(kv[1])) return self.flatMap(flat_map_fn, preservesPartitioning=True) - def glom(self): + def glom(self: "DStream[T]") -> "DStream[List[T]]": """ Return a new DStream in which RDD is generated by applying glom() to RDD of this DStream. """ - def func(iterator): + def func(iterator: Iterable[T]) -> Iterable[List[T]]: yield list(iterator) return self.mapPartitions(func) - def cache(self): + def cache(self: "DStream[T]") -> "DStream[T]": """ Persist the RDDs of this DStream with the default storage level (`MEMORY_ONLY`). @@ -234,7 +307,7 @@ def cache(self): self.persist(StorageLevel.MEMORY_ONLY) return self - def persist(self, storageLevel): + def persist(self: "DStream[T]", storageLevel: StorageLevel) -> "DStream[T]": """ Persist the RDDs of this DStream with the given storage level """ @@ -243,7 +316,7 @@ def persist(self, storageLevel): self._jdstream.persist(javaStorageLevel) return self - def checkpoint(self, interval): + def checkpoint(self: "DStream[T]", interval: int) -> "DStream[T]": """ Enable periodic checkpointing of RDDs of this DStream @@ -257,7 +330,9 @@ def checkpoint(self, interval): self._jdstream.checkpoint(self._ssc._jduration(interval)) return self - def groupByKey(self, numPartitions=None): + def groupByKey( + self: "DStream[Tuple[K, V]]", numPartitions: Optional[int] = None + ) -> "DStream[Tuple[K, Iterable[V]]]": """ Return a new DStream by applying groupByKey on each RDD. """ @@ -265,20 +340,20 @@ def groupByKey(self, numPartitions=None): numPartitions = self._sc.defaultParallelism return self.transform(lambda rdd: rdd.groupByKey(numPartitions)) - def countByValue(self): + def countByValue(self: "DStream[K]") -> "DStream[Tuple[K, int]]": """ Return a new DStream in which each RDD contains the counts of each distinct value in each RDD of this DStream. """ return self.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y) - def saveAsTextFiles(self, prefix, suffix=None): + def saveAsTextFiles(self, prefix: str, suffix: Optional[str] = None) -> None: """ Save each RDD in this DStream as at text file, using string representation of elements. """ - def saveAsTextFile(t, rdd): + def saveAsTextFile(t: Optional[datetime], rdd: RDD[T]) -> None: path = rddToFileName(prefix, suffix, t) try: rdd.saveAsTextFile(path) @@ -307,7 +382,20 @@ def saveAsTextFile(t, rdd): # raise # return self.foreachRDD(saveAsPickleFile) - def transform(self, func): + @overload + def transform(self: "DStream[T]", func: Callable[[RDD[T]], RDD[U]]) -> "TransformedDStream[U]": + ... + + @overload + def transform( + self: "DStream[T]", func: Callable[[datetime, RDD[T]], RDD[U]] + ) -> "TransformedDStream[U]": + ... + + def transform( + self: "DStream[T]", + func: Union[Callable[[RDD[T]], RDD[U]], Callable[[datetime, RDD[T]], RDD[U]]], + ) -> "TransformedDStream[U]": """ Return a new DStream in which each RDD is generated by applying a function on each RDD of this DStream. @@ -318,13 +406,39 @@ def transform(self, func): if func.__code__.co_argcount == 1: oldfunc = func - def func(_, rdd): - return oldfunc(rdd) + def func(_: datetime, rdd: RDD[T]) -> RDD[U]: + return oldfunc(rdd) # type: ignore[arg-type, call-arg] assert func.__code__.co_argcount == 2, "func should take one or two arguments" return TransformedDStream(self, func) - def transformWith(self, func, other, keepSerializer=False): + @overload + def transformWith( + self: "DStream[T]", + func: Callable[[RDD[T], RDD[U]], RDD[V]], + other: "DStream[U]", + keepSerializer: bool = ..., + ) -> "DStream[V]": + ... + + @overload + def transformWith( + self: "DStream[T]", + func: Callable[[datetime, RDD[T], RDD[U]], RDD[V]], + other: "DStream[U]", + keepSerializer: bool = ..., + ) -> "DStream[V]": + ... + + def transformWith( + self: "DStream[T]", + func: Union[ + Callable[[RDD[T], RDD[U]], RDD[V]], + Callable[[datetime, RDD[T], RDD[U]], RDD[V]], + ], + other: "DStream[U]", + keepSerializer: bool = False, + ) -> "DStream[V]": """ Return a new DStream in which each RDD is generated by applying a function on each RDD of this DStream and 'other' DStream. @@ -335,31 +449,37 @@ def transformWith(self, func, other, keepSerializer=False): if func.__code__.co_argcount == 2: oldfunc = func - def func(_, a, b): - return oldfunc(a, b) + def func(_: datetime, a: RDD[T], b: RDD[U]) -> RDD[V]: + return oldfunc(a, b) # type: ignore[call-arg, arg-type] assert func.__code__.co_argcount == 3, "func should take two or three arguments" - jfunc = TransformFunction(self._sc, func, self._jrdd_deserializer, other._jrdd_deserializer) + jfunc = TransformFunction( + self._sc, + func, + self._jrdd_deserializer, + other._jrdd_deserializer, + ) + assert self._sc._jvm is not None dstream = self._sc._jvm.PythonTransformed2DStream( self._jdstream.dstream(), other._jdstream.dstream(), jfunc ) jrdd_serializer = self._jrdd_deserializer if keepSerializer else self._sc.serializer return DStream(dstream.asJavaDStream(), self._ssc, jrdd_serializer) - def repartition(self, numPartitions): + def repartition(self: "DStream[T]", numPartitions: int) -> "DStream[T]": """ Return a new DStream with an increased or decreased level of parallelism. """ return self.transform(lambda rdd: rdd.repartition(numPartitions)) @property - def _slideDuration(self): + def _slideDuration(self) -> None: """ Return the slideDuration in seconds of this DStream """ return self._jdstream.dstream().slideDuration().milliseconds() / 1000.0 - def union(self, other): + def union(self: "DStream[T]", other: "DStream[U]") -> "DStream[Union[T, U]]": """ Return a new DStream by unifying data of another DStream with this DStream. @@ -373,7 +493,11 @@ def union(self, other): raise ValueError("the two DStream should have same slide duration") return self.transformWith(lambda a, b: a.union(b), other, True) - def cogroup(self, other, numPartitions=None): + def cogroup( + self: "DStream[Tuple[K, V]]", + other: "DStream[Tuple[K, U]]", + numPartitions: Optional[int] = None, + ) -> "DStream[Tuple[K, Tuple[ResultIterable[V], ResultIterable[U]]]]": """ Return a new DStream by applying 'cogroup' between RDDs of this DStream and `other` DStream. @@ -382,9 +506,16 @@ def cogroup(self, other, numPartitions=None): """ if numPartitions is None: numPartitions = self._sc.defaultParallelism - return self.transformWith(lambda a, b: a.cogroup(b, numPartitions), other) + return self.transformWith( + lambda a, b: a.cogroup(b, numPartitions), + other, + ) - def join(self, other, numPartitions=None): + def join( + self: "DStream[Tuple[K, V]]", + other: "DStream[Tuple[K, U]]", + numPartitions: Optional[int] = None, + ) -> "DStream[Tuple[K, Tuple[V, U]]]": """ Return a new DStream by applying 'join' between RDDs of this DStream and `other` DStream. @@ -396,7 +527,11 @@ def join(self, other, numPartitions=None): numPartitions = self._sc.defaultParallelism return self.transformWith(lambda a, b: a.join(b, numPartitions), other) - def leftOuterJoin(self, other, numPartitions=None): + def leftOuterJoin( + self: "DStream[Tuple[K, V]]", + other: "DStream[Tuple[K, U]]", + numPartitions: Optional[int] = None, + ) -> "DStream[Tuple[K, Tuple[V, Optional[U]]]]": """ Return a new DStream by applying 'left outer join' between RDDs of this DStream and `other` DStream. @@ -408,7 +543,11 @@ def leftOuterJoin(self, other, numPartitions=None): numPartitions = self._sc.defaultParallelism return self.transformWith(lambda a, b: a.leftOuterJoin(b, numPartitions), other) - def rightOuterJoin(self, other, numPartitions=None): + def rightOuterJoin( + self: "DStream[Tuple[K, V]]", + other: "DStream[Tuple[K, U]]", + numPartitions: Optional[int] = None, + ) -> "DStream[Tuple[K, Tuple[Optional[V], U]]]": """ Return a new DStream by applying 'right outer join' between RDDs of this DStream and `other` DStream. @@ -420,7 +559,11 @@ def rightOuterJoin(self, other, numPartitions=None): numPartitions = self._sc.defaultParallelism return self.transformWith(lambda a, b: a.rightOuterJoin(b, numPartitions), other) - def fullOuterJoin(self, other, numPartitions=None): + def fullOuterJoin( + self: "DStream[Tuple[K, V]]", + other: "DStream[Tuple[K, U]]", + numPartitions: Optional[int] = None, + ) -> "DStream[Tuple[K, Tuple[Optional[V], Optional[U]]]]": """ Return a new DStream by applying 'full outer join' between RDDs of this DStream and `other` DStream. @@ -432,13 +575,14 @@ def fullOuterJoin(self, other, numPartitions=None): numPartitions = self._sc.defaultParallelism return self.transformWith(lambda a, b: a.fullOuterJoin(b, numPartitions), other) - def _jtime(self, timestamp): + def _jtime(self, timestamp: Union[datetime, int, float]) -> JavaObject: """Convert datetime or unix_timestamp into Time""" if isinstance(timestamp, datetime): timestamp = time.mktime(timestamp.timetuple()) + assert self._sc._jvm is not None return self._sc._jvm.Time(int(timestamp * 1000)) - def slice(self, begin, end): + def slice(self, begin: Union[datetime, int], end: Union[datetime, int]) -> List[RDD[T]]: """ Return all the RDDs between 'begin' to 'end' (both included) @@ -447,7 +591,7 @@ def slice(self, begin, end): jrdds = self._jdstream.slice(self._jtime(begin), self._jtime(end)) return [RDD(jrdd, self._sc, self._jrdd_deserializer) for jrdd in jrdds] - def _validate_window_param(self, window, slide): + def _validate_window_param(self, window: int, slide: Optional[int]) -> None: duration = self._jdstream.dstream().slideDuration().milliseconds() if int(window * 1000) % duration != 0: raise ValueError( @@ -460,7 +604,7 @@ def _validate_window_param(self, window, slide): "dstream's slide (batch) duration (%d ms)" % duration ) - def window(self, windowDuration, slideDuration=None): + def window(self, windowDuration: int, slideDuration: Optional[int] = None) -> "DStream[T]": """ Return a new DStream in which each RDD contains all the elements in seen in a sliding window of time over this DStream. @@ -482,7 +626,13 @@ def window(self, windowDuration, slideDuration=None): s = self._ssc._jduration(slideDuration) return DStream(self._jdstream.window(d, s), self._ssc, self._jrdd_deserializer) - def reduceByWindow(self, reduceFunc, invReduceFunc, windowDuration, slideDuration): + def reduceByWindow( + self: "DStream[T]", + reduceFunc: Callable[[T, T], T], + invReduceFunc: Optional[Callable[[T, T], T]], + windowDuration: int, + slideDuration: int, + ) -> "DStream[T]": """ Return a new DStream in which each RDD has a single element generated by reducing all elements in a sliding window over this DStream. @@ -517,7 +667,9 @@ def reduceByWindow(self, reduceFunc, invReduceFunc, windowDuration, slideDuratio ) return reduced.map(lambda kv: kv[1]) - def countByWindow(self, windowDuration, slideDuration): + def countByWindow( + self: "DStream[T]", windowDuration: int, slideDuration: int + ) -> "DStream[int]": """ Return a new DStream in which each RDD has a single element generated by counting the number of elements in a window over this DStream. @@ -530,7 +682,12 @@ def countByWindow(self, windowDuration, slideDuration): operator.add, operator.sub, windowDuration, slideDuration ) - def countByValueAndWindow(self, windowDuration, slideDuration, numPartitions=None): + def countByValueAndWindow( + self: "DStream[T]", + windowDuration: int, + slideDuration: int, + numPartitions: Optional[int] = None, + ) -> "DStream[Tuple[T, int]]": """ Return a new DStream in which each RDD contains the count of distinct elements in RDDs in a sliding window over this DStream. @@ -553,7 +710,12 @@ def countByValueAndWindow(self, windowDuration, slideDuration, numPartitions=Non ) return counted.filter(lambda kv: kv[1] > 0) - def groupByKeyAndWindow(self, windowDuration, slideDuration, numPartitions=None): + def groupByKeyAndWindow( + self: "DStream[Tuple[K, V]]", + windowDuration: int, + slideDuration: int, + numPartitions: Optional[int] = None, + ) -> "DStream[Tuple[K, Iterable[V]]]": """ Return a new DStream by applying `groupByKey` over a sliding window. Similar to `DStream.groupByKey()`, but applies it over a sliding window. @@ -572,7 +734,7 @@ def groupByKeyAndWindow(self, windowDuration, slideDuration, numPartitions=None) """ ls = self.mapValues(lambda x: [x]) grouped = ls.reduceByKeyAndWindow( - lambda a, b: a.extend(b) or a, + lambda a, b: a.extend(b) or a, # type: ignore[func-returns-value] lambda a, b: a[len(b) :], windowDuration, slideDuration, @@ -581,8 +743,14 @@ def groupByKeyAndWindow(self, windowDuration, slideDuration, numPartitions=None) return grouped.mapValues(ResultIterable) def reduceByKeyAndWindow( - self, func, invFunc, windowDuration, slideDuration=None, numPartitions=None, filterFunc=None - ): + self: "DStream[Tuple[K, V]]", + func: Callable[[V, V], V], + invFunc: Optional[Callable[[V, V], V]], + windowDuration: int, + slideDuration: Optional[int] = None, + numPartitions: Optional[int] = None, + filterFunc: Optional[Callable[[Tuple[K, V]], bool]] = None, + ) -> "DStream[Tuple[K, V]]": """ Return a new DStream by applying incremental `reduceByKey` over a sliding window. @@ -621,36 +789,46 @@ def reduceByKeyAndWindow( if invFunc: - def reduceFunc(t, a, b): + def reduceFunc(t: datetime, a: Any, b: Any) -> Any: b = b.reduceByKey(func, numPartitions) r = a.union(b).reduceByKey(func, numPartitions) if a else b if filterFunc: r = r.filter(filterFunc) return r - def invReduceFunc(t, a, b): + def invReduceFunc(t: datetime, a: Any, b: Any) -> Any: b = b.reduceByKey(func, numPartitions) joined = a.leftOuterJoin(b, numPartitions) return joined.mapValues( - lambda kv: invFunc(kv[0], kv[1]) if kv[1] is not None else kv[0] + lambda kv: invFunc(kv[0], kv[1]) # type: ignore[misc] + if kv[1] is not None + else kv[0] ) jreduceFunc = TransformFunction(self._sc, reduceFunc, reduced._jrdd_deserializer) jinvReduceFunc = TransformFunction(self._sc, invReduceFunc, reduced._jrdd_deserializer) if slideDuration is None: slideDuration = self._slideDuration + assert self._sc._jvm is not None dstream = self._sc._jvm.PythonReducedWindowedDStream( reduced._jdstream.dstream(), jreduceFunc, jinvReduceFunc, self._ssc._jduration(windowDuration), - self._ssc._jduration(slideDuration), + self._ssc._jduration(slideDuration), # type: ignore[arg-type] ) return DStream(dstream.asJavaDStream(), self._ssc, self._sc.serializer) else: - return reduced.window(windowDuration, slideDuration).reduceByKey(func, numPartitions) + return reduced.window(windowDuration, slideDuration).reduceByKey( + func, numPartitions # type: ignore[arg-type] + ) - def updateStateByKey(self, updateFunc, numPartitions=None, initialRDD=None): + def updateStateByKey( + self: "DStream[Tuple[K, V]]", + updateFunc: Callable[[Iterable[V], Optional[S]], S], + numPartitions: Optional[int] = None, + initialRDD: Optional[Union[RDD[Tuple[K, S]], Iterable[Tuple[K, S]]]] = None, + ) -> "DStream[Tuple[K, S]]": """ Return a new "state" DStream where the state for each key is updated by applying the given function on the previous state of the key and the new values of the key. @@ -667,30 +845,37 @@ def updateStateByKey(self, updateFunc, numPartitions=None, initialRDD=None): if initialRDD and not isinstance(initialRDD, RDD): initialRDD = self._sc.parallelize(initialRDD) - def reduceFunc(t, a, b): + def reduceFunc(t: datetime, a: Any, b: Any) -> Any: if a is None: g = b.groupByKey(numPartitions).mapValues(lambda vs: (list(vs), None)) else: - g = a.cogroup(b.partitionBy(numPartitions), numPartitions) + g = a.cogroup(b.partitionBy(cast(int, numPartitions)), numPartitions) g = g.mapValues(lambda ab: (list(ab[1]), list(ab[0])[0] if len(ab[0]) else None)) state = g.mapValues(lambda vs_s: updateFunc(vs_s[0], vs_s[1])) return state.filter(lambda k_v: k_v[1] is not None) jreduceFunc = TransformFunction( - self._sc, reduceFunc, self._sc.serializer, self._jrdd_deserializer + self._sc, + reduceFunc, + self._sc.serializer, + self._jrdd_deserializer, ) if initialRDD: - initialRDD = initialRDD._reserialize(self._jrdd_deserializer) + initialRDD = cast(RDD[Tuple[K, S]], initialRDD)._reserialize(self._jrdd_deserializer) + assert self._sc._jvm is not None dstream = self._sc._jvm.PythonStateDStream( - self._jdstream.dstream(), jreduceFunc, initialRDD._jrdd + self._jdstream.dstream(), + jreduceFunc, + initialRDD._jrdd, ) else: + assert self._sc._jvm is not None dstream = self._sc._jvm.PythonStateDStream(self._jdstream.dstream(), jreduceFunc) return DStream(dstream.asJavaDStream(), self._ssc, self._sc.serializer) -class TransformedDStream(DStream): +class TransformedDStream(DStream[U]): """ TransformedDStream is a DStream generated by an Python function transforming each RDD of a DStream to another RDDs. @@ -699,7 +884,23 @@ class TransformedDStream(DStream): one transformation. """ - def __init__(self, prev, func): + @overload + def __init__(self: DStream[U], prev: DStream[T], func: Callable[[RDD[T]], RDD[U]]): + ... + + @overload + def __init__( + self: DStream[U], + prev: DStream[T], + func: Callable[[datetime, RDD[T]], RDD[U]], + ): + ... + + def __init__( + self, + prev: DStream[T], + func: Union[Callable[[RDD[T]], RDD[U]], Callable[[datetime, RDD[T]], RDD[U]]], + ): self._ssc = prev._ssc self._sc = self._ssc._sc self._jrdd_deserializer = self._sc.serializer @@ -710,19 +911,23 @@ def __init__(self, prev, func): # Using type() to avoid folding the functions and compacting the DStreams which is not # not strictly an object of TransformedDStream. if type(prev) is TransformedDStream and not prev.is_cached and not prev.is_checkpointed: - prev_func = prev.func - self.func = lambda t, rdd: func(t, prev_func(t, rdd)) - self.prev = prev.prev + prev_func: Callable = prev.func + func = cast(Callable[[datetime, RDD[T]], RDD[U]], func) + self.func: Union[ + Callable[[RDD[T]], RDD[U]], Callable[[datetime, RDD[T]], RDD[U]] + ] = lambda t, rdd: func(t, prev_func(t, rdd)) + self.prev: DStream[T] = prev.prev else: self.prev = prev self.func = func @property - def _jdstream(self): + def _jdstream(self) -> JavaObject: if self._jdstream_val is not None: return self._jdstream_val jfunc = TransformFunction(self._sc, self.func, self.prev._jrdd_deserializer) + assert self._sc._jvm is not None dstream = self._sc._jvm.PythonTransformedDStream(self.prev._jdstream.dstream(), jfunc) self._jdstream_val = dstream.asJavaDStream() return self._jdstream_val diff --git a/python/pyspark/streaming/dstream.pyi b/python/pyspark/streaming/dstream.pyi deleted file mode 100644 index c9f31b37f0457..0000000000000 --- a/python/pyspark/streaming/dstream.pyi +++ /dev/null @@ -1,211 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -from typing import overload -from typing import ( - Callable, - Generic, - Hashable, - Iterable, - List, - Optional, - Tuple, - TypeVar, - Union, -) -import datetime -from pyspark.rdd import RDD -import pyspark.serializers -from pyspark.storagelevel import StorageLevel -import pyspark.streaming.context - -from py4j.java_gateway import JavaObject - -S = TypeVar("S") -T = TypeVar("T") -T_co = TypeVar("T_co", covariant=True) -U = TypeVar("U") -K = TypeVar("K", bound=Hashable) -V = TypeVar("V") - -class DStream(Generic[T_co]): - is_cached: bool - is_checkpointed: bool - def __init__( - self, - jdstream: JavaObject, - ssc: pyspark.streaming.context.StreamingContext, - jrdd_deserializer: pyspark.serializers.Serializer, - ) -> None: ... - def context(self) -> pyspark.streaming.context.StreamingContext: ... - def count(self) -> DStream[int]: ... - def filter(self, f: Callable[[T_co], bool]) -> DStream[T_co]: ... - def flatMap( - self: DStream[T_co], - f: Callable[[T_co], Iterable[U]], - preservesPartitioning: bool = ..., - ) -> DStream[U]: ... - def map( - self: DStream[T_co], f: Callable[[T_co], U], preservesPartitioning: bool = ... - ) -> DStream[U]: ... - def mapPartitions( - self, f: Callable[[Iterable[T_co]], Iterable[U]], preservesPartitioning: bool = ... - ) -> DStream[U]: ... - def mapPartitionsWithIndex( - self, - f: Callable[[int, Iterable[T_co]], Iterable[U]], - preservesPartitioning: bool = ..., - ) -> DStream[U]: ... - def reduce(self, func: Callable[[T_co, T_co], T_co]) -> DStream[T_co]: ... - def reduceByKey( - self: DStream[Tuple[K, V]], - func: Callable[[V, V], V], - numPartitions: Optional[int] = ..., - ) -> DStream[Tuple[K, V]]: ... - def combineByKey( - self: DStream[Tuple[K, V]], - createCombiner: Callable[[V], U], - mergeValue: Callable[[U, V], U], - mergeCombiners: Callable[[U, U], U], - numPartitions: Optional[int] = ..., - ) -> DStream[Tuple[K, U]]: ... - def partitionBy( - self: DStream[Tuple[K, V]], - numPartitions: int, - partitionFunc: Callable[[K], int] = ..., - ) -> DStream[Tuple[K, V]]: ... - @overload - def foreachRDD(self, func: Callable[[RDD[T_co]], None]) -> None: ... - @overload - def foreachRDD(self, func: Callable[[datetime.datetime, RDD[T_co]], None]) -> None: ... - def pprint(self, num: int = ...) -> None: ... - def mapValues(self: DStream[Tuple[K, V]], f: Callable[[V], U]) -> DStream[Tuple[K, U]]: ... - def flatMapValues( - self: DStream[Tuple[K, V]], f: Callable[[V], Iterable[U]] - ) -> DStream[Tuple[K, U]]: ... - def glom(self) -> DStream[List[T_co]]: ... - def cache(self) -> DStream[T_co]: ... - def persist(self, storageLevel: StorageLevel) -> DStream[T_co]: ... - def checkpoint(self, interval: int) -> DStream[T_co]: ... - def groupByKey( - self: DStream[Tuple[K, V]], numPartitions: Optional[int] = ... - ) -> DStream[Tuple[K, Iterable[V]]]: ... - def countByValue(self) -> DStream[Tuple[T_co, int]]: ... - def saveAsTextFiles(self, prefix: str, suffix: Optional[str] = ...) -> None: ... - @overload - def transform(self, func: Callable[[RDD[T_co]], RDD[U]]) -> TransformedDStream[U]: ... - @overload - def transform( - self, func: Callable[[datetime.datetime, RDD[T_co]], RDD[U]] - ) -> TransformedDStream[U]: ... - @overload - def transformWith( - self, - func: Callable[[RDD[T_co], RDD[U]], RDD[V]], - other: RDD[U], - keepSerializer: bool = ..., - ) -> DStream[V]: ... - @overload - def transformWith( - self, - func: Callable[[datetime.datetime, RDD[T_co], RDD[U]], RDD[V]], - other: RDD[U], - keepSerializer: bool = ..., - ) -> DStream[V]: ... - def repartition(self, numPartitions: int) -> DStream[T_co]: ... - def union(self, other: DStream[U]) -> DStream[Union[T_co, U]]: ... - def cogroup( - self: DStream[Tuple[K, V]], - other: DStream[Tuple[K, U]], - numPartitions: Optional[int] = ..., - ) -> DStream[Tuple[K, Tuple[List[V], List[U]]]]: ... - def join( - self: DStream[Tuple[K, V]], - other: DStream[Tuple[K, U]], - numPartitions: Optional[int] = ..., - ) -> DStream[Tuple[K, Tuple[V, U]]]: ... - def leftOuterJoin( - self: DStream[Tuple[K, V]], - other: DStream[Tuple[K, U]], - numPartitions: Optional[int] = ..., - ) -> DStream[Tuple[K, Tuple[V, Optional[U]]]]: ... - def rightOuterJoin( - self: DStream[Tuple[K, V]], - other: DStream[Tuple[K, U]], - numPartitions: Optional[int] = ..., - ) -> DStream[Tuple[K, Tuple[Optional[V], U]]]: ... - def fullOuterJoin( - self: DStream[Tuple[K, V]], - other: DStream[Tuple[K, U]], - numPartitions: Optional[int] = ..., - ) -> DStream[Tuple[K, Tuple[Optional[V], Optional[U]]]]: ... - def slice( - self, begin: Union[datetime.datetime, int], end: Union[datetime.datetime, int] - ) -> List[RDD[T_co]]: ... - def window(self, windowDuration: int, slideDuration: Optional[int] = ...) -> DStream[T_co]: ... - def reduceByWindow( - self, - reduceFunc: Callable[[T_co, T_co], T_co], - invReduceFunc: Optional[Callable[[T_co, T_co], T_co]], - windowDuration: int, - slideDuration: int, - ) -> DStream[T_co]: ... - def countByWindow( - self, windowDuration: int, slideDuration: int - ) -> DStream[Tuple[T_co, int]]: ... - def countByValueAndWindow( - self, - windowDuration: int, - slideDuration: int, - numPartitions: Optional[int] = ..., - ) -> DStream[Tuple[T_co, int]]: ... - def groupByKeyAndWindow( - self: DStream[Tuple[K, V]], - windowDuration: int, - slideDuration: int, - numPartitions: Optional[int] = ..., - ) -> DStream[Tuple[K, Iterable[V]]]: ... - def reduceByKeyAndWindow( - self: DStream[Tuple[K, V]], - func: Callable[[V, V], V], - invFunc: Optional[Callable[[V, V], V]], - windowDuration: int, - slideDuration: Optional[int] = ..., - numPartitions: Optional[int] = ..., - filterFunc: Optional[Callable[[Tuple[K, V]], bool]] = ..., - ) -> DStream[Tuple[K, V]]: ... - def updateStateByKey( - self: DStream[Tuple[K, V]], - updateFunc: Callable[[Iterable[V], Optional[S]], S], - numPartitions: Optional[int] = ..., - initialRDD: Optional[RDD[Tuple[K, S]]] = ..., - ) -> DStream[Tuple[K, S]]: ... - -class TransformedDStream(DStream[U]): - is_cached: bool - is_checkpointed: bool - func: Callable - prev: DStream - @overload - def __init__(self: DStream[U], prev: DStream[T], func: Callable[[RDD[T]], RDD[U]]) -> None: ... - @overload - def __init__( - self: DStream[U], - prev: DStream[T], - func: Callable[[datetime.datetime, RDD[T]], RDD[U]], - ) -> None: ... From 142b933eebec4feb58ea3643cc55e9480204fe8a Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Tue, 19 Apr 2022 10:41:39 +0900 Subject: [PATCH 155/535] [SPARK-38933][SQL][DOCS] Add examples of window functions into SQL docs Currently, Spark SQL docs display the window functions without examples. ![image](https://user-images.githubusercontent.com/8486025/163788857-38313a9c-48b2-4b72-bc60-38056d91124e.png) In fact, Mkdocs also generates the doc `generated-window-funcs-examples.html` This PR just updates the `sql-ref-functions-builtin.md` ![image](https://user-images.githubusercontent.com/8486025/163789775-17255e1a-7f7e-4b79-b780-3b04ba55dde7.png) Let SQL docs display the examples of window functions. 'No'. Just update docs. Manual tests. Closes #36243 from beliefer/SPARK-38933. Authored-by: Jiaan Geng Signed-off-by: Hyukjin Kwon (cherry picked from commit d2c5c53a5c21a72b3e00ecc48e6cac6ae73c3c23) Signed-off-by: Hyukjin Kwon --- docs/sql-ref-functions-builtin.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/sql-ref-functions-builtin.md b/docs/sql-ref-functions-builtin.md index cabb83e09fde9..08e620b0f4f86 100644 --- a/docs/sql-ref-functions-builtin.md +++ b/docs/sql-ref-functions-builtin.md @@ -31,6 +31,8 @@ license: | {% if static_file.name == 'generated-window-funcs-table.html' %} ### Window Functions {% include_relative generated-window-funcs-table.html %} +#### Examples +{% include_relative generated-window-funcs-examples.html %} {% break %} {% endif %} {% endfor %} From 671539de00c1da817859de66345e122cac01a2ee Mon Sep 17 00:00:00 2001 From: Maryann Xue Date: Tue, 19 Apr 2022 10:50:07 +0800 Subject: [PATCH 156/535] [SPARK-37670][SQL] Support predicate pushdown and column pruning for de-duped CTEs This PR adds predicate push-down and column pruning to CTEs that are not inlined as well as fixes a few potential correctness issues: 1) Replace (previously not inlined) CTE refs with Repartition operations at the end of logical plan optimization so that WithCTE is not carried over to physical plan. As a result, we can simplify the logic of physical planning, as well as avoid a correctness issue where the logical link of a physical plan node can point to `WithCTE` and lead to unexpected behaviors in AQE, e.g., class cast exceptions in DPP. 2) Pull (not inlined) CTE defs from subqueries up to the main query level, in order to avoid creating copies of the same CTE def during predicate push-downs and other transformations. 3) Make CTE IDs more deterministic by starting from 0 for each query. Improve de-duped CTEs' performance with predicate pushdown and column pruning; fixes de-duped CTEs' correctness issues. No. Added UTs. Closes #34929 from maryannxue/cte-followup. Lead-authored-by: Maryann Xue Co-authored-by: Wenchen Fan Signed-off-by: Wenchen Fan (cherry picked from commit 175e429cca29c2314ee029bf009ed5222c0bffad) Signed-off-by: Wenchen Fan --- .../catalyst/analysis/CTESubstitution.scala | 30 ++- .../sql/catalyst/analysis/CheckAnalysis.scala | 8 +- .../sql/catalyst/optimizer/InlineCTE.scala | 56 ++--- .../sql/catalyst/optimizer/Optimizer.scala | 64 ++--- ...wnPredicatesAndPruneColumnsForCTEDef.scala | 175 +++++++++++++ .../ReplaceCTERefWithRepartition.scala | 84 +++++++ .../spark/sql/catalyst/plans/QueryPlan.scala | 31 +++ .../plans/logical/basicLogicalOperators.scala | 9 +- .../sql/catalyst/analysis/AnalysisTest.scala | 3 +- .../spark/sql/execution/QueryExecution.scala | 23 +- .../spark/sql/execution/SparkOptimizer.scala | 3 +- .../spark/sql/execution/SparkPlanner.scala | 1 - .../spark/sql/execution/SparkStrategies.scala | 31 --- .../adaptive/AdaptiveSparkPlanExec.scala | 7 +- .../scalar-subquery-select.sql | 42 ++++ .../scalar-subquery-select.sql.out | 103 +++++++- .../q23a.sf100/explain.txt | 166 ++++++------- .../q23b.sf100/explain.txt | 190 +++++++-------- .../org/apache/spark/sql/CTEInlineSuite.scala | 229 +++++++++++++++++- .../org/apache/spark/sql/SQLQuerySuite.scala | 15 ++ 20 files changed, 962 insertions(+), 308 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceCTERefWithRepartition.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala index c0ba3598e4ba1..976a5d385d874 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala @@ -69,13 +69,13 @@ object CTESubstitution extends Rule[LogicalPlan] { if (cteDefs.isEmpty) { substituted } else if (substituted eq lastSubstituted.get) { - WithCTE(substituted, cteDefs.toSeq) + WithCTE(substituted, cteDefs.sortBy(_.id).toSeq) } else { var done = false substituted.resolveOperatorsWithPruning(_ => !done) { case p if p eq lastSubstituted.get => done = true - WithCTE(p, cteDefs.toSeq) + WithCTE(p, cteDefs.sortBy(_.id).toSeq) } } } @@ -203,6 +203,7 @@ object CTESubstitution extends Rule[LogicalPlan] { cteDefs: mutable.ArrayBuffer[CTERelationDef]): Seq[(String, CTERelationDef)] = { val resolvedCTERelations = new mutable.ArrayBuffer[(String, CTERelationDef)](relations.size) for ((name, relation) <- relations) { + val lastCTEDefCount = cteDefs.length val innerCTEResolved = if (isLegacy) { // In legacy mode, outer CTE relations take precedence. Here we don't resolve the inner // `With` nodes, later we will substitute `UnresolvedRelation`s with outer CTE relations. @@ -211,8 +212,33 @@ object CTESubstitution extends Rule[LogicalPlan] { } else { // A CTE definition might contain an inner CTE that has a higher priority, so traverse and // substitute CTE defined in `relation` first. + // NOTE: we must call `traverseAndSubstituteCTE` before `substituteCTE`, as the relations + // in the inner CTE have higher priority over the relations in the outer CTE when resolving + // inner CTE relations. For example: + // WITH t1 AS (SELECT 1) + // t2 AS ( + // WITH t1 AS (SELECT 2) + // WITH t3 AS (SELECT * FROM t1) + // ) + // t3 should resolve the t1 to `SELECT 2` instead of `SELECT 1`. traverseAndSubstituteCTE(relation, isCommand, cteDefs)._1 } + + if (cteDefs.length > lastCTEDefCount) { + // We have added more CTE relations to the `cteDefs` from the inner CTE, and these relations + // should also be substituted with `resolvedCTERelations` as inner CTE relation can refer to + // outer CTE relation. For example: + // WITH t1 AS (SELECT 1) + // t2 AS ( + // WITH t3 AS (SELECT * FROM t1) + // ) + for (i <- lastCTEDefCount until cteDefs.length) { + val substituted = + substituteCTE(cteDefs(i).child, isLegacy || isCommand, resolvedCTERelations.toSeq) + cteDefs(i) = cteDefs(i).copy(child = substituted) + } + } + // CTE definition can reference a previous one val substituted = substituteCTE(innerCTEResolved, isLegacy || isCommand, resolvedCTERelations.toSeq) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 3b8a73717afee..1c2de771a3d00 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -22,7 +22,7 @@ import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.SubExprUtils._ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression -import org.apache.spark.sql.catalyst.optimizer.{BooleanSimplification, DecorrelateInnerQuery} +import org.apache.spark.sql.catalyst.optimizer.{BooleanSimplification, DecorrelateInnerQuery, InlineCTE} import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.TreeNodeTag @@ -94,8 +94,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { def checkAnalysis(plan: LogicalPlan): Unit = { // We transform up and order the rules so as to catch the first possible failure instead - // of the result of cascading resolution failures. - plan.foreachUp { + // of the result of cascading resolution failures. Inline all CTEs in the plan to help check + // query plan structures in subqueries. + val inlineCTE = InlineCTE(alwaysInline = true) + inlineCTE(plan).foreachUp { case p if p.analyzed => // Skip already analyzed sub-plans diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala index 61577b1d21ea4..a740b92933fa4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala @@ -28,26 +28,37 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.{CTE, PLAN_EXPRESSION} /** * Inlines CTE definitions into corresponding references if either of the conditions satisfies: - * 1. The CTE definition does not contain any non-deterministic expressions. If this CTE - * definition references another CTE definition that has non-deterministic expressions, it - * is still OK to inline the current CTE definition. + * 1. The CTE definition does not contain any non-deterministic expressions or contains attribute + * references to an outer query. If this CTE definition references another CTE definition that + * has non-deterministic expressions, it is still OK to inline the current CTE definition. * 2. The CTE definition is only referenced once throughout the main query and all the subqueries. * - * In addition, due to the complexity of correlated subqueries, all CTE references in correlated - * subqueries are inlined regardless of the conditions above. + * CTE definitions that appear in subqueries and are not inlined will be pulled up to the main + * query level. + * + * @param alwaysInline if true, inline all CTEs in the query plan. */ -object InlineCTE extends Rule[LogicalPlan] { +case class InlineCTE(alwaysInline: Boolean = false) extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = { if (!plan.isInstanceOf[Subquery] && plan.containsPattern(CTE)) { val cteMap = mutable.HashMap.empty[Long, (CTERelationDef, Int)] buildCTEMap(plan, cteMap) - inlineCTE(plan, cteMap, forceInline = false) + val notInlined = mutable.ArrayBuffer.empty[CTERelationDef] + val inlined = inlineCTE(plan, cteMap, notInlined) + // CTEs in SQL Commands have been inlined by `CTESubstitution` already, so it is safe to add + // WithCTE as top node here. + if (notInlined.isEmpty) { + inlined + } else { + WithCTE(inlined, notInlined.toSeq) + } } else { plan } } - private def shouldInline(cteDef: CTERelationDef, refCount: Int): Boolean = { + private def shouldInline(cteDef: CTERelationDef, refCount: Int): Boolean = alwaysInline || { // We do not need to check enclosed `CTERelationRef`s for `deterministic` or `OuterReference`, // because: // 1) It is fine to inline a CTE if it references another CTE that is non-deterministic; @@ -93,25 +104,24 @@ object InlineCTE extends Rule[LogicalPlan] { private def inlineCTE( plan: LogicalPlan, cteMap: mutable.HashMap[Long, (CTERelationDef, Int)], - forceInline: Boolean): LogicalPlan = { - val (stripped, notInlined) = plan match { + notInlined: mutable.ArrayBuffer[CTERelationDef]): LogicalPlan = { + plan match { case WithCTE(child, cteDefs) => - val notInlined = mutable.ArrayBuffer.empty[CTERelationDef] cteDefs.foreach { cteDef => val (cte, refCount) = cteMap(cteDef.id) if (refCount > 0) { - val inlined = cte.copy(child = inlineCTE(cte.child, cteMap, forceInline)) + val inlined = cte.copy(child = inlineCTE(cte.child, cteMap, notInlined)) cteMap.update(cteDef.id, (inlined, refCount)) - if (!forceInline && !shouldInline(inlined, refCount)) { + if (!shouldInline(inlined, refCount)) { notInlined.append(inlined) } } } - (inlineCTE(child, cteMap, forceInline), notInlined.toSeq) + inlineCTE(child, cteMap, notInlined) case ref: CTERelationRef => val (cteDef, refCount) = cteMap(ref.cteId) - val newRef = if (forceInline || shouldInline(cteDef, refCount)) { + if (shouldInline(cteDef, refCount)) { if (ref.outputSet == cteDef.outputSet) { cteDef.child } else { @@ -125,24 +135,16 @@ object InlineCTE extends Rule[LogicalPlan] { } else { ref } - (newRef, Seq.empty) case _ if plan.containsPattern(CTE) => - val newPlan = plan - .withNewChildren(plan.children.map(child => inlineCTE(child, cteMap, forceInline))) + plan + .withNewChildren(plan.children.map(child => inlineCTE(child, cteMap, notInlined))) .transformExpressionsWithPruning(_.containsAllPatterns(PLAN_EXPRESSION, CTE)) { case e: SubqueryExpression => - e.withNewPlan(inlineCTE(e.plan, cteMap, forceInline = e.isCorrelated)) + e.withNewPlan(inlineCTE(e.plan, cteMap, notInlined)) } - (newPlan, Seq.empty) - case _ => (plan, Seq.empty) - } - - if (notInlined.isEmpty) { - stripped - } else { - WithCTE(stripped, notInlined) + case _ => plan } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 66c2ad84ccee8..dc3e4c3da34a5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -128,7 +128,8 @@ abstract class Optimizer(catalogManager: CatalogManager) OptimizeUpdateFields, SimplifyExtractValueOps, OptimizeCsvJsonExprs, - CombineConcats) ++ + CombineConcats, + PushdownPredicatesAndPruneColumnsForCTEDef) ++ extendedOperatorOptimizationRules val operatorOptimizationBatch: Seq[Batch] = { @@ -147,22 +148,7 @@ abstract class Optimizer(catalogManager: CatalogManager) } val batches = (Batch("Eliminate Distinct", Once, EliminateDistinct) :: - // Technically some of the rules in Finish Analysis are not optimizer rules and belong more - // in the analyzer, because they are needed for correctness (e.g. ComputeCurrentTime). - // However, because we also use the analyzer to canonicalized queries (for view definition), - // we do not eliminate subqueries or compute current time in the analyzer. - Batch("Finish Analysis", Once, - EliminateResolvedHint, - EliminateSubqueryAliases, - EliminateView, - InlineCTE, - ReplaceExpressions, - RewriteNonCorrelatedExists, - PullOutGroupingExpressions, - ComputeCurrentTime, - ReplaceCurrentLike(catalogManager), - SpecialDatetimeValues, - RewriteAsOfJoin) :: + Batch("Finish Analysis", Once, FinishAnalysis) :: ////////////////////////////////////////////////////////////////////////////////////////// // Optimizer rules start here ////////////////////////////////////////////////////////////////////////////////////////// @@ -171,6 +157,8 @@ abstract class Optimizer(catalogManager: CatalogManager) // extra operators between two adjacent Union operators. // - Call CombineUnions again in Batch("Operator Optimizations"), // since the other rules might make two separate Unions operators adjacent. + Batch("Inline CTE", Once, + InlineCTE()) :: Batch("Union", Once, RemoveNoopOperators, CombineUnions, @@ -207,6 +195,7 @@ abstract class Optimizer(catalogManager: CatalogManager) RemoveLiteralFromGroupExpressions, RemoveRepetitionFromGroupExpressions) :: Nil ++ operatorOptimizationBatch) :+ + Batch("Clean Up Temporary CTE Info", Once, CleanUpTempCTEInfo) :+ // This batch rewrites plans after the operator optimization and // before any batches that depend on stats. Batch("Pre CBO Rules", Once, preCBORules: _*) :+ @@ -265,14 +254,7 @@ abstract class Optimizer(catalogManager: CatalogManager) * (defaultBatches - (excludedRules - nonExcludableRules)). */ def nonExcludableRules: Seq[String] = - EliminateDistinct.ruleName :: - EliminateResolvedHint.ruleName :: - EliminateSubqueryAliases.ruleName :: - EliminateView.ruleName :: - ReplaceExpressions.ruleName :: - ComputeCurrentTime.ruleName :: - SpecialDatetimeValues.ruleName :: - ReplaceCurrentLike(catalogManager).ruleName :: + FinishAnalysis.ruleName :: RewriteDistinctAggregates.ruleName :: ReplaceDeduplicateWithAggregate.ruleName :: ReplaceIntersectWithSemiJoin.ruleName :: @@ -286,10 +268,38 @@ abstract class Optimizer(catalogManager: CatalogManager) RewritePredicateSubquery.ruleName :: NormalizeFloatingNumbers.ruleName :: ReplaceUpdateFieldsExpression.ruleName :: - PullOutGroupingExpressions.ruleName :: - RewriteAsOfJoin.ruleName :: RewriteLateralSubquery.ruleName :: Nil + /** + * Apply finish-analysis rules for the entire plan including all subqueries. + */ + object FinishAnalysis extends Rule[LogicalPlan] { + // Technically some of the rules in Finish Analysis are not optimizer rules and belong more + // in the analyzer, because they are needed for correctness (e.g. ComputeCurrentTime). + // However, because we also use the analyzer to canonicalized queries (for view definition), + // we do not eliminate subqueries or compute current time in the analyzer. + private val rules = Seq( + EliminateResolvedHint, + EliminateSubqueryAliases, + EliminateView, + ReplaceExpressions, + RewriteNonCorrelatedExists, + PullOutGroupingExpressions, + ComputeCurrentTime, + ReplaceCurrentLike(catalogManager), + SpecialDatetimeValues, + RewriteAsOfJoin) + + override def apply(plan: LogicalPlan): LogicalPlan = { + rules.foldLeft(plan) { case (sp, rule) => rule.apply(sp) } + .transformAllExpressionsWithPruning(_.containsPattern(PLAN_EXPRESSION)) { + case s: SubqueryExpression => + val Subquery(newPlan, _) = apply(Subquery.fromExpression(s)) + s.withNewPlan(newPlan) + } + } + } + /** * Optimize all the subqueries inside expression. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala new file mode 100644 index 0000000000000..ab9f20edb0bb9 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import scala.collection.mutable + +import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeSet, Expression, Literal, Or, SubqueryExpression} +import org.apache.spark.sql.catalyst.planning.ScanOperation +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.trees.TreePattern.CTE + +/** + * Infer predicates and column pruning for [[CTERelationDef]] from its reference points, and push + * the disjunctive predicates as well as the union of attributes down the CTE plan. + */ +object PushdownPredicatesAndPruneColumnsForCTEDef extends Rule[LogicalPlan] { + + // CTE_id - (CTE_definition, precedence, predicates_to_push_down, attributes_to_prune) + private type CTEMap = mutable.HashMap[Long, (CTERelationDef, Int, Seq[Expression], AttributeSet)] + + override def apply(plan: LogicalPlan): LogicalPlan = { + if (!plan.isInstanceOf[Subquery] && plan.containsPattern(CTE)) { + val cteMap = new CTEMap + gatherPredicatesAndAttributes(plan, cteMap) + pushdownPredicatesAndAttributes(plan, cteMap) + } else { + plan + } + } + + private def restoreCTEDefAttrs( + input: Seq[Expression], + mapping: Map[Attribute, Expression]): Seq[Expression] = { + input.map(e => e.transform { + case a: Attribute => + mapping.keys.find(_.semanticEquals(a)).map(mapping).getOrElse(a) + }) + } + + /** + * Gather all the predicates and referenced attributes on different points of CTE references + * using pattern `ScanOperation` (which takes care of determinism) and combine those predicates + * and attributes that belong to the same CTE definition. + * For the same CTE definition, if any of its references does not have predicates, the combined + * predicate will be a TRUE literal, which means there will be no predicate push-down. + */ + private def gatherPredicatesAndAttributes(plan: LogicalPlan, cteMap: CTEMap): Unit = { + plan match { + case WithCTE(child, cteDefs) => + cteDefs.zipWithIndex.foreach { case (cteDef, precedence) => + gatherPredicatesAndAttributes(cteDef.child, cteMap) + cteMap.put(cteDef.id, (cteDef, precedence, Seq.empty, AttributeSet.empty)) + } + gatherPredicatesAndAttributes(child, cteMap) + + case ScanOperation(projects, predicates, ref: CTERelationRef) => + val (cteDef, precedence, preds, attrs) = cteMap(ref.cteId) + val attrMapping = ref.output.zip(cteDef.output).map{ case (r, d) => r -> d }.toMap + val newPredicates = if (isTruePredicate(preds)) { + preds + } else { + // Make sure we only push down predicates that do not contain forward CTE references. + val filteredPredicates = restoreCTEDefAttrs(predicates.filter(_.find { + case s: SubqueryExpression => s.plan.find { + case r: CTERelationRef => + // If the ref's ID does not exist in the map or if ref's corresponding precedence + // is bigger than that of the current CTE we are pushing predicates for, it + // indicates a forward reference and we should exclude this predicate. + !cteMap.contains(r.cteId) || cteMap(r.cteId)._2 >= precedence + case _ => false + }.nonEmpty + case _ => false + }.isEmpty), attrMapping).filter(_.references.forall(cteDef.outputSet.contains)) + if (filteredPredicates.isEmpty) { + Seq(Literal.TrueLiteral) + } else { + preds :+ filteredPredicates.reduce(And) + } + } + val newAttributes = attrs ++ + AttributeSet(restoreCTEDefAttrs(projects.flatMap(_.references), attrMapping)) ++ + AttributeSet(restoreCTEDefAttrs(predicates.flatMap(_.references), attrMapping)) + + cteMap.update(ref.cteId, (cteDef, precedence, newPredicates, newAttributes)) + plan.subqueriesAll.foreach(s => gatherPredicatesAndAttributes(s, cteMap)) + + case _ => + plan.children.foreach(c => gatherPredicatesAndAttributes(c, cteMap)) + plan.subqueries.foreach(s => gatherPredicatesAndAttributes(s, cteMap)) + } + } + + /** + * Push down the combined predicate and attribute references to each CTE definition plan. + * + * In order to guarantee idempotency, we keep the predicates (if any) being pushed down by the + * last iteration of this rule in a temporary field of `CTERelationDef`, so that on the current + * iteration, we only push down predicates for a CTE def if there exists any new predicate that + * has not been pushed before. Also, since part of a new predicate might overlap with some + * existing predicate and it can be hard to extract only the non-overlapping part, we also keep + * the original CTE definition plan without any predicate push-down in that temporary field so + * that when we do a new predicate push-down, we can construct a new plan with all latest + * predicates over the original plan without having to figure out the exact predicate difference. + */ + private def pushdownPredicatesAndAttributes( + plan: LogicalPlan, + cteMap: CTEMap): LogicalPlan = plan.transformWithSubqueries { + case cteDef @ CTERelationDef(child, id, originalPlanWithPredicates) => + val (_, _, newPreds, newAttrSet) = cteMap(id) + val originalPlan = originalPlanWithPredicates.map(_._1).getOrElse(child) + val preds = originalPlanWithPredicates.map(_._2).getOrElse(Seq.empty) + if (!isTruePredicate(newPreds) && + newPreds.exists(newPred => !preds.exists(_.semanticEquals(newPred)))) { + val newCombinedPred = newPreds.reduce(Or) + val newChild = if (needsPruning(originalPlan, newAttrSet)) { + Project(newAttrSet.toSeq, originalPlan) + } else { + originalPlan + } + CTERelationDef(Filter(newCombinedPred, newChild), id, Some((originalPlan, newPreds))) + } else if (needsPruning(cteDef.child, newAttrSet)) { + CTERelationDef(Project(newAttrSet.toSeq, cteDef.child), id, Some((originalPlan, preds))) + } else { + cteDef + } + + case cteRef @ CTERelationRef(cteId, _, output, _) => + val (cteDef, _, _, newAttrSet) = cteMap(cteId) + if (newAttrSet.size < output.size) { + val indices = newAttrSet.toSeq.map(cteDef.output.indexOf) + val newOutput = indices.map(output) + cteRef.copy(output = newOutput) + } else { + // Do not change the order of output columns if no column is pruned, in which case there + // might be no Project and the order is important. + cteRef + } + } + + private def isTruePredicate(predicates: Seq[Expression]): Boolean = { + predicates.length == 1 && predicates.head == Literal.TrueLiteral + } + + private def needsPruning(sourcePlan: LogicalPlan, attributeSet: AttributeSet): Boolean = { + attributeSet.size < sourcePlan.outputSet.size && attributeSet.subsetOf(sourcePlan.outputSet) + } +} + +/** + * Clean up temporary info from [[CTERelationDef]] nodes. This rule should be called after all + * iterations of [[PushdownPredicatesAndPruneColumnsForCTEDef]] are done. + */ +object CleanUpTempCTEInfo extends Rule[LogicalPlan] { + override def apply(plan: LogicalPlan): LogicalPlan = + plan.transformWithPruning(_.containsPattern(CTE)) { + case cteDef @ CTERelationDef(_, _, Some(_)) => + cteDef.copy(originalPlanWithPredicates = None) + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceCTERefWithRepartition.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceCTERefWithRepartition.scala new file mode 100644 index 0000000000000..e0d0417ce5161 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceCTERefWithRepartition.scala @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import scala.collection.mutable + +import org.apache.spark.sql.catalyst.analysis.DeduplicateRelations +import org.apache.spark.sql.catalyst.expressions.{Alias, SubqueryExpression} +import org.apache.spark.sql.catalyst.plans.Inner +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.trees.TreePattern.{CTE, PLAN_EXPRESSION} + +/** + * Replaces CTE references that have not been previously inlined with [[Repartition]] operations + * which will then be planned as shuffles and reused across different reference points. + * + * Note that this rule should be called at the very end of the optimization phase to best guarantee + * that CTE repartition shuffles are reused. + */ +object ReplaceCTERefWithRepartition extends Rule[LogicalPlan] { + + override def apply(plan: LogicalPlan): LogicalPlan = plan match { + case _: Subquery => plan + case _ => + replaceWithRepartition(plan, mutable.HashMap.empty[Long, LogicalPlan]) + } + + private def replaceWithRepartition( + plan: LogicalPlan, + cteMap: mutable.HashMap[Long, LogicalPlan]): LogicalPlan = plan match { + case WithCTE(child, cteDefs) => + cteDefs.foreach { cteDef => + val inlined = replaceWithRepartition(cteDef.child, cteMap) + val withRepartition = if (inlined.isInstanceOf[RepartitionOperation]) { + // If the CTE definition plan itself is a repartition operation, we do not need to add an + // extra repartition shuffle. + inlined + } else { + Repartition(conf.numShufflePartitions, shuffle = true, inlined) + } + cteMap.put(cteDef.id, withRepartition) + } + replaceWithRepartition(child, cteMap) + + case ref: CTERelationRef => + val cteDefPlan = cteMap(ref.cteId) + if (ref.outputSet == cteDefPlan.outputSet) { + cteDefPlan + } else { + val ctePlan = DeduplicateRelations( + Join(cteDefPlan, cteDefPlan, Inner, None, JoinHint(None, None))).children(1) + val projectList = ref.output.zip(ctePlan.output).map { case (tgtAttr, srcAttr) => + Alias(srcAttr, tgtAttr.name)(exprId = tgtAttr.exprId) + } + Project(projectList, ctePlan) + } + + case _ if plan.containsPattern(CTE) => + plan + .withNewChildren(plan.children.map(c => replaceWithRepartition(c, cteMap))) + .transformExpressionsWithPruning(_.containsAllPatterns(PLAN_EXPRESSION, CTE)) { + case e: SubqueryExpression => + e.withNewPlan(replaceWithRepartition(e.plan, cteMap)) + } + + case _ => plan + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index 5d749b8fc4b53..0f8df5df3764a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -448,6 +448,14 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] subqueries ++ subqueries.flatMap(_.subqueriesAll) } + /** + * This method is similar to the transform method, but also applies the given partial function + * also to all the plans in the subqueries of a node. This method is useful when we want + * to rewrite the whole plan, include its subqueries, in one go. + */ + def transformWithSubqueries(f: PartialFunction[PlanType, PlanType]): PlanType = + transformDownWithSubqueries(f) + /** * Returns a copy of this node where the given partial function has been recursively applied * first to the subqueries in this node's children, then this node's children, and finally @@ -465,6 +473,29 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] } } + /** + * This method is the top-down (pre-order) counterpart of transformUpWithSubqueries. + * Returns a copy of this node where the given partial function has been recursively applied + * first to this node, then this node's subqueries and finally this node's children. + * When the partial function does not apply to a given node, it is left unchanged. + */ + def transformDownWithSubqueries(f: PartialFunction[PlanType, PlanType]): PlanType = { + val g: PartialFunction[PlanType, PlanType] = new PartialFunction[PlanType, PlanType] { + override def isDefinedAt(x: PlanType): Boolean = true + + override def apply(plan: PlanType): PlanType = { + val transformed = f.applyOrElse[PlanType, PlanType](plan, identity) + transformed transformExpressionsDown { + case planExpression: PlanExpression[PlanType] => + val newPlan = planExpression.plan.transformDownWithSubqueries(f) + planExpression.withNewPlan(newPlan) + } + } + } + + transformDown(g) + } + /** * A variant of `collect`. This method not only apply the given function to all elements in this * plan, also considering all the plans in its (nested) subqueries diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 895eeb772075d..e5eab691d14fd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -659,8 +659,15 @@ case class UnresolvedWith( * A wrapper for CTE definition plan with a unique ID. * @param child The CTE definition query plan. * @param id The unique ID for this CTE definition. + * @param originalPlanWithPredicates The original query plan before predicate pushdown and the + * predicates that have been pushed down into `child`. This is + * a temporary field used by optimization rules for CTE predicate + * pushdown to help ensure rule idempotency. */ -case class CTERelationDef(child: LogicalPlan, id: Long = CTERelationDef.newId) extends UnaryNode { +case class CTERelationDef( + child: LogicalPlan, + id: Long = CTERelationDef.newId, + originalPlanWithPredicates: Option[(LogicalPlan, Seq[Expression])] = None) extends UnaryNode { final override val nodePatterns: Seq[TreePattern] = Seq(CTE) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala index 804f1edbe06fd..7dde85014e7c7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala @@ -108,7 +108,8 @@ trait AnalysisTest extends PlanTest { case v: View if v.isTempViewStoringAnalyzedPlan => v.child } val actualPlan = if (inlineCTE) { - InlineCTE(transformed) + val inlineCTE = InlineCTE() + inlineCTE(transformed) } else { transformed } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index 9bf8de5ea6c4b..5dcdebfbe0ea7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -21,8 +21,6 @@ import java.io.{BufferedWriter, OutputStreamWriter} import java.util.UUID import java.util.concurrent.atomic.AtomicLong -import scala.collection.mutable - import org.apache.hadoop.fs.Path import org.apache.spark.internal.Logging @@ -32,7 +30,7 @@ import org.apache.spark.sql.catalyst.{InternalRow, QueryPlanningTracker} import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker import org.apache.spark.sql.catalyst.expressions.codegen.ByteCodeStats import org.apache.spark.sql.catalyst.plans.QueryPlan -import org.apache.spark.sql.catalyst.plans.logical.{AppendData, Command, CommandResult, CreateTableAsSelect, CTERelationDef, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect, ReturnAnswer} +import org.apache.spark.sql.catalyst.plans.logical.{AppendData, Command, CommandResult, CreateTableAsSelect, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect, ReturnAnswer} import org.apache.spark.sql.catalyst.rules.{PlanChangeLogger, Rule} import org.apache.spark.sql.catalyst.util.StringUtils.PlanStringConcat import org.apache.spark.sql.catalyst.util.truncatedString @@ -64,17 +62,6 @@ class QueryExecution( // TODO: Move the planner an optimizer into here from SessionState. protected def planner = sparkSession.sessionState.planner - // The CTE map for the planner shared by the main query and all subqueries. - private val cteMap = mutable.HashMap.empty[Long, CTERelationDef] - - def withCteMap[T](f: => T): T = { - val old = QueryExecution.currentCteMap.get() - QueryExecution.currentCteMap.set(cteMap) - try f finally { - QueryExecution.currentCteMap.set(old) - } - } - def assertAnalyzed(): Unit = analyzed def assertSupported(): Unit = { @@ -147,7 +134,7 @@ class QueryExecution( private def assertOptimized(): Unit = optimizedPlan - lazy val sparkPlan: SparkPlan = withCteMap { + lazy val sparkPlan: SparkPlan = { // We need to materialize the optimizedPlan here because sparkPlan is also tracked under // the planning phase assertOptimized() @@ -160,7 +147,7 @@ class QueryExecution( // executedPlan should not be used to initialize any SparkPlan. It should be // only used for execution. - lazy val executedPlan: SparkPlan = withCteMap { + lazy val executedPlan: SparkPlan = { // We need to materialize the optimizedPlan here, before tracking the planning phase, to ensure // that the optimization time is not counted as part of the planning phase. assertOptimized() @@ -497,8 +484,4 @@ object QueryExecution { val preparationRules = preparations(session, Option(InsertAdaptiveSparkPlan(context)), true) prepareForExecution(preparationRules, sparkPlan.clone()) } - - private val currentCteMap = new ThreadLocal[mutable.HashMap[Long, CTERelationDef]]() - - def cteMap: mutable.HashMap[Long, CTERelationDef] = currentCteMap.get() } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala index 8c134363af112..d9457a20d91c9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala @@ -76,7 +76,8 @@ class SparkOptimizer( ColumnPruning, PushPredicateThroughNonJoin, RemoveNoopOperators) :+ - Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) + Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) :+ + Batch("Replace CTE with Repartition", Once, ReplaceCTERefWithRepartition) override def nonExcludableRules: Seq[String] = super.nonExcludableRules :+ ExtractPythonUDFFromJoinCondition.ruleName :+ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala index 32ac58f8353ab..6994aaf47dfba 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlanner.scala @@ -44,7 +44,6 @@ class SparkPlanner(val session: SparkSession, val experimentalMethods: Experimen JoinSelection :: InMemoryScans :: SparkScripts :: - WithCTEStrategy :: BasicOperators :: Nil) /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index 675b158100394..3b8a70ffe94c3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -29,7 +29,6 @@ import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide import org.apache.spark.sql.catalyst.planning._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.plans.physical.RoundRobinPartitioning import org.apache.spark.sql.catalyst.streaming.{InternalOutputModes, StreamingRelationV2} import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.execution.aggregate.AggUtils @@ -675,36 +674,6 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { } } - /** - * Strategy to plan CTE relations left not inlined. - */ - object WithCTEStrategy extends Strategy { - override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { - case WithCTE(plan, cteDefs) => - val cteMap = QueryExecution.cteMap - cteDefs.foreach { cteDef => - cteMap.put(cteDef.id, cteDef) - } - planLater(plan) :: Nil - - case r: CTERelationRef => - val ctePlan = QueryExecution.cteMap(r.cteId).child - val projectList = r.output.zip(ctePlan.output).map { case (tgtAttr, srcAttr) => - Alias(srcAttr, tgtAttr.name)(exprId = tgtAttr.exprId) - } - val newPlan = Project(projectList, ctePlan) - // Plan CTE ref as a repartition shuffle so that all refs of the same CTE def will share - // an Exchange reuse at runtime. - // TODO create a new identity partitioning instead of using RoundRobinPartitioning. - exchange.ShuffleExchangeExec( - RoundRobinPartitioning(conf.numShufflePartitions), - planLater(newPlan), - REPARTITION_BY_COL) :: Nil - - case _ => Nil - } - } - object BasicOperators extends Strategy { def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case d: DataWritingCommand => DataWritingCommandExec(d, planLater(d.query)) :: Nil diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala index c6505a0ea5f73..df302e5dc7577 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala @@ -148,9 +148,7 @@ case class AdaptiveSparkPlanExec( collapseCodegenStagesRule ) - private def optimizeQueryStage( - plan: SparkPlan, - isFinalStage: Boolean): SparkPlan = context.qe.withCteMap { + private def optimizeQueryStage(plan: SparkPlan, isFinalStage: Boolean): SparkPlan = { val optimized = queryStageOptimizerRules.foldLeft(plan) { case (latestPlan, rule) => val applied = rule.apply(latestPlan) val result = rule match { @@ -640,8 +638,7 @@ case class AdaptiveSparkPlanExec( /** * Re-optimize and run physical planning on the current logical plan based on the latest stats. */ - private def reOptimize( - logicalPlan: LogicalPlan): (SparkPlan, LogicalPlan) = context.qe.withCteMap { + private def reOptimize(logicalPlan: LogicalPlan): (SparkPlan, LogicalPlan) = { logicalPlan.invalidateStatsCache() val optimized = optimizer.execute(logicalPlan) val sparkPlan = context.session.sessionState.planner.plan(ReturnAnswer(optimized)).next() diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql index a76a010722090..4c80b268c20c3 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql @@ -145,3 +145,45 @@ SELECT t1c, (SELECT t1c WHERE t1c = 8) FROM t1; SELECT t1c, t1d, (SELECT c + d FROM (SELECT t1c AS c, t1d AS d)) FROM t1; SELECT t1c, (SELECT SUM(c) FROM (SELECT t1c AS c)) FROM t1; SELECT t1a, (SELECT SUM(t2b) FROM t2 JOIN (SELECT t1a AS a) ON t2a = a) FROM t1; + +-- CTE in correlated scalar subqueries +CREATE OR REPLACE TEMPORARY VIEW t1 AS VALUES (0, 1), (1, 2) t1(c1, c2); +CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (0, 2), (0, 3) t2(c1, c2); + +-- Single row subquery +SELECT c1, (WITH t AS (SELECT 1 AS a) SELECT a + c1 FROM t) FROM t1; +-- Correlation in CTE. +SELECT c1, (WITH t AS (SELECT * FROM t2 WHERE c1 = t1.c1) SELECT SUM(c2) FROM t) FROM t1; +-- Multiple CTE definitions. +SELECT c1, ( + WITH t3 AS (SELECT c1 + 1 AS c1, c2 + 1 AS c2 FROM t2), + t4 AS (SELECT * FROM t3 WHERE t1.c1 = c1) + SELECT SUM(c2) FROM t4 +) FROM t1; +-- Multiple CTE references. +SELECT c1, ( + WITH t AS (SELECT * FROM t2) + SELECT SUM(c2) FROM (SELECT c1, c2 FROM t UNION SELECT c2, c1 FROM t) r(c1, c2) + WHERE c1 = t1.c1 +) FROM t1; +-- Reference CTE in both the main query and the subquery. +WITH v AS (SELECT * FROM t2) +SELECT * FROM t1 WHERE c1 > ( + WITH t AS (SELECT * FROM t2) + SELECT COUNT(*) FROM v WHERE c1 = t1.c1 AND c1 > (SELECT SUM(c2) FROM t WHERE c1 = v.c1) +); +-- Single row subquery that references CTE in the main query. +WITH t AS (SELECT 1 AS a) +SELECT c1, (SELECT a FROM t WHERE a = c1) FROM t1; +-- Multiple CTE references with non-deterministic CTEs. +WITH +v1 AS (SELECT c1, c2, rand(0) c3 FROM t1), +v2 AS (SELECT c1, c2, rand(0) c4 FROM v1 WHERE c3 IN (SELECT c3 FROM v1)) +SELECT c1, ( + WITH v3 AS (SELECT c1, c2, rand(0) c5 FROM t2) + SELECT COUNT(*) FROM ( + SELECT * FROM v2 WHERE c1 > 0 + UNION SELECT * FROM v2 WHERE c2 > 0 + UNION SELECT * FROM v3 WHERE c2 > 0 + ) WHERE c1 = v1.c1 +) FROM v1; diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out index 8fac940f8efd0..3eb1c6ffba187 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 17 +-- Number of queries: 26 -- !query @@ -317,3 +317,104 @@ val1d NULL val1e 8 val1e 8 val1e 8 + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW t1 AS VALUES (0, 1), (1, 2) t1(c1, c2) +-- !query schema +struct<> +-- !query output + + + +-- !query +CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (0, 2), (0, 3) t2(c1, c2) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT c1, (WITH t AS (SELECT 1 AS a) SELECT a + c1 FROM t) FROM t1 +-- !query schema +struct +-- !query output +0 1 +1 2 + + +-- !query +SELECT c1, (WITH t AS (SELECT * FROM t2 WHERE c1 = t1.c1) SELECT SUM(c2) FROM t) FROM t1 +-- !query schema +struct +-- !query output +0 5 +1 NULL + + +-- !query +SELECT c1, ( + WITH t3 AS (SELECT c1 + 1 AS c1, c2 + 1 AS c2 FROM t2), + t4 AS (SELECT * FROM t3 WHERE t1.c1 = c1) + SELECT SUM(c2) FROM t4 +) FROM t1 +-- !query schema +struct +-- !query output +0 NULL +1 7 + + +-- !query +SELECT c1, ( + WITH t AS (SELECT * FROM t2) + SELECT SUM(c2) FROM (SELECT c1, c2 FROM t UNION SELECT c2, c1 FROM t) r(c1, c2) + WHERE c1 = t1.c1 +) FROM t1 +-- !query schema +struct +-- !query output +0 5 +1 NULL + + +-- !query +WITH v AS (SELECT * FROM t2) +SELECT * FROM t1 WHERE c1 > ( + WITH t AS (SELECT * FROM t2) + SELECT COUNT(*) FROM v WHERE c1 = t1.c1 AND c1 > (SELECT SUM(c2) FROM t WHERE c1 = v.c1) +) +-- !query schema +struct +-- !query output +1 2 + + +-- !query +WITH t AS (SELECT 1 AS a) +SELECT c1, (SELECT a FROM t WHERE a = c1) FROM t1 +-- !query schema +struct +-- !query output +0 NULL +1 1 + + +-- !query +WITH +v1 AS (SELECT c1, c2, rand(0) c3 FROM t1), +v2 AS (SELECT c1, c2, rand(0) c4 FROM v1 WHERE c3 IN (SELECT c3 FROM v1)) +SELECT c1, ( + WITH v3 AS (SELECT c1, c2, rand(0) c5 FROM t2) + SELECT COUNT(*) FROM ( + SELECT * FROM v2 WHERE c1 > 0 + UNION SELECT * FROM v2 WHERE c2 > 0 + UNION SELECT * FROM v3 WHERE c2 > 0 + ) WHERE c1 = v1.c1 +) FROM v1 +-- !query schema +struct +-- !query output +0 3 +1 1 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt index 5bf5193487b07..7f419ce3eaf6d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt @@ -360,19 +360,19 @@ Right keys [1]: [i_item_sk#14] Join condition: None (61) Project [codegen id : 25] -Output [3]: [d_date#12, i_item_sk#14, substr(i_item_desc#15, 1, 30) AS _groupingexpression#47] +Output [3]: [d_date#12, i_item_sk#14, substr(i_item_desc#15, 1, 30) AS _groupingexpression#17] Input [4]: [ss_item_sk#8, d_date#12, i_item_sk#14, i_item_desc#15] (62) HashAggregate [codegen id : 25] -Input [3]: [d_date#12, i_item_sk#14, _groupingexpression#47] -Keys [3]: [_groupingexpression#47, i_item_sk#14, d_date#12] +Input [3]: [d_date#12, i_item_sk#14, _groupingexpression#17] +Keys [3]: [_groupingexpression#17, i_item_sk#14, d_date#12] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#18] -Results [4]: [_groupingexpression#47, i_item_sk#14, d_date#12, count#19] +Results [4]: [_groupingexpression#17, i_item_sk#14, d_date#12, count#19] (63) HashAggregate [codegen id : 25] -Input [4]: [_groupingexpression#47, i_item_sk#14, d_date#12, count#19] -Keys [3]: [_groupingexpression#47, i_item_sk#14, d_date#12] +Input [4]: [_groupingexpression#17, i_item_sk#14, d_date#12, count#19] +Keys [3]: [_groupingexpression#17, i_item_sk#14, d_date#12] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#20] Results [2]: [i_item_sk#14 AS item_sk#21, count(1)#20 AS cnt#22] @@ -400,7 +400,7 @@ Input [5]: [ws_item_sk#41, ws_bill_customer_sk#42, ws_quantity#43, ws_list_price (69) Exchange Input [4]: [ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] -Arguments: hashpartitioning(ws_bill_customer_sk#42, 5), ENSURE_REQUIREMENTS, [id=#48] +Arguments: hashpartitioning(ws_bill_customer_sk#42, 5), ENSURE_REQUIREMENTS, [id=#47] (70) Sort [codegen id : 27] Input [4]: [ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] @@ -433,11 +433,11 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk# Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29] Keys [1]: [c_customer_sk#29] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#49, isEmpty#50] -Results [3]: [c_customer_sk#29, sum#51, isEmpty#52] +Aggregate Attributes [2]: [sum#48, isEmpty#49] +Results [3]: [c_customer_sk#29, sum#50, isEmpty#51] (78) HashAggregate [codegen id : 32] -Input [3]: [c_customer_sk#29, sum#51, isEmpty#52] +Input [3]: [c_customer_sk#29, sum#50, isEmpty#51] Keys [1]: [c_customer_sk#29] Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35] @@ -465,16 +465,16 @@ Output [3]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] Input [4]: [ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] (84) ReusedExchange [Reuses operator id: 95] -Output [1]: [d_date_sk#53] +Output [1]: [d_date_sk#52] (85) BroadcastHashJoin [codegen id : 34] Left keys [1]: [ws_sold_date_sk#45] -Right keys [1]: [d_date_sk#53] +Right keys [1]: [d_date_sk#52] Join condition: None (86) Project [codegen id : 34] -Output [1]: [CheckOverflow((promote_precision(cast(ws_quantity#43 as decimal(12,2))) * promote_precision(cast(ws_list_price#44 as decimal(12,2)))), DecimalType(18,2)) AS sales#54] -Input [4]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45, d_date_sk#53] +Output [1]: [CheckOverflow((promote_precision(cast(ws_quantity#43 as decimal(12,2))) * promote_precision(cast(ws_list_price#44 as decimal(12,2)))), DecimalType(18,2)) AS sales#53] +Input [4]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45, d_date_sk#52] (87) Union @@ -482,19 +482,19 @@ Input [4]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45, d_date_sk#53] Input [1]: [sales#40] Keys: [] Functions [1]: [partial_sum(sales#40)] -Aggregate Attributes [2]: [sum#55, isEmpty#56] -Results [2]: [sum#57, isEmpty#58] +Aggregate Attributes [2]: [sum#54, isEmpty#55] +Results [2]: [sum#56, isEmpty#57] (89) Exchange -Input [2]: [sum#57, isEmpty#58] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#59] +Input [2]: [sum#56, isEmpty#57] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#58] (90) HashAggregate [codegen id : 36] -Input [2]: [sum#57, isEmpty#58] +Input [2]: [sum#56, isEmpty#57] Keys: [] Functions [1]: [sum(sales#40)] -Aggregate Attributes [1]: [sum(sales#40)#60] -Results [1]: [sum(sales#40)#60 AS sum(sales)#61] +Aggregate Attributes [1]: [sum(sales#40)#59] +Results [1]: [sum(sales#40)#59 AS sum(sales)#60] ===== Subqueries ===== @@ -507,26 +507,26 @@ BroadcastExchange (95) (91) Scan parquet default.date_dim -Output [3]: [d_date_sk#39, d_year#62, d_moy#63] +Output [3]: [d_date_sk#39, d_year#61, d_moy#62] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] ReadSchema: struct (92) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#39, d_year#62, d_moy#63] +Input [3]: [d_date_sk#39, d_year#61, d_moy#62] (93) Filter [codegen id : 1] -Input [3]: [d_date_sk#39, d_year#62, d_moy#63] -Condition : ((((isnotnull(d_year#62) AND isnotnull(d_moy#63)) AND (d_year#62 = 2000)) AND (d_moy#63 = 2)) AND isnotnull(d_date_sk#39)) +Input [3]: [d_date_sk#39, d_year#61, d_moy#62] +Condition : ((((isnotnull(d_year#61) AND isnotnull(d_moy#62)) AND (d_year#61 = 2000)) AND (d_moy#62 = 2)) AND isnotnull(d_date_sk#39)) (94) Project [codegen id : 1] Output [1]: [d_date_sk#39] -Input [3]: [d_date_sk#39, d_year#62, d_moy#63] +Input [3]: [d_date_sk#39, d_year#61, d_moy#62] (95) BroadcastExchange Input [1]: [d_date_sk#39] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#63] Subquery:2 Hosting operator id = 5 Hosting Expression = ss_sold_date_sk#9 IN dynamicpruning#10 BroadcastExchange (100) @@ -537,26 +537,26 @@ BroadcastExchange (100) (96) Scan parquet default.date_dim -Output [3]: [d_date_sk#11, d_date#12, d_year#65] +Output [3]: [d_date_sk#11, d_date#12, d_year#64] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct (97) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#11, d_date#12, d_year#65] +Input [3]: [d_date_sk#11, d_date#12, d_year#64] (98) Filter [codegen id : 1] -Input [3]: [d_date_sk#11, d_date#12, d_year#65] -Condition : (d_year#65 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#11)) +Input [3]: [d_date_sk#11, d_date#12, d_year#64] +Condition : (d_year#64 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#11)) (99) Project [codegen id : 1] Output [2]: [d_date_sk#11, d_date#12] -Input [3]: [d_date_sk#11, d_date#12, d_year#65] +Input [3]: [d_date_sk#11, d_date#12, d_year#64] (100) BroadcastExchange Input [2]: [d_date_sk#11, d_date#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#66] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#65] Subquery:3 Hosting operator id = 44 Hosting Expression = Subquery scalar-subquery#37, [id=#38] * HashAggregate (117) @@ -579,89 +579,89 @@ Subquery:3 Hosting operator id = 44 Hosting Expression = Subquery scalar-subquer (101) Scan parquet default.store_sales -Output [4]: [ss_customer_sk#67, ss_quantity#68, ss_sales_price#69, ss_sold_date_sk#70] +Output [4]: [ss_customer_sk#66, ss_quantity#67, ss_sales_price#68, ss_sold_date_sk#69] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#70), dynamicpruningexpression(ss_sold_date_sk#70 IN dynamicpruning#71)] +PartitionFilters: [isnotnull(ss_sold_date_sk#69), dynamicpruningexpression(ss_sold_date_sk#69 IN dynamicpruning#70)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (102) ColumnarToRow [codegen id : 2] -Input [4]: [ss_customer_sk#67, ss_quantity#68, ss_sales_price#69, ss_sold_date_sk#70] +Input [4]: [ss_customer_sk#66, ss_quantity#67, ss_sales_price#68, ss_sold_date_sk#69] (103) Filter [codegen id : 2] -Input [4]: [ss_customer_sk#67, ss_quantity#68, ss_sales_price#69, ss_sold_date_sk#70] -Condition : isnotnull(ss_customer_sk#67) +Input [4]: [ss_customer_sk#66, ss_quantity#67, ss_sales_price#68, ss_sold_date_sk#69] +Condition : isnotnull(ss_customer_sk#66) (104) ReusedExchange [Reuses operator id: 122] -Output [1]: [d_date_sk#72] +Output [1]: [d_date_sk#71] (105) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#70] -Right keys [1]: [d_date_sk#72] +Left keys [1]: [ss_sold_date_sk#69] +Right keys [1]: [d_date_sk#71] Join condition: None (106) Project [codegen id : 2] -Output [3]: [ss_customer_sk#67, ss_quantity#68, ss_sales_price#69] -Input [5]: [ss_customer_sk#67, ss_quantity#68, ss_sales_price#69, ss_sold_date_sk#70, d_date_sk#72] +Output [3]: [ss_customer_sk#66, ss_quantity#67, ss_sales_price#68] +Input [5]: [ss_customer_sk#66, ss_quantity#67, ss_sales_price#68, ss_sold_date_sk#69, d_date_sk#71] (107) Exchange -Input [3]: [ss_customer_sk#67, ss_quantity#68, ss_sales_price#69] -Arguments: hashpartitioning(ss_customer_sk#67, 5), ENSURE_REQUIREMENTS, [id=#73] +Input [3]: [ss_customer_sk#66, ss_quantity#67, ss_sales_price#68] +Arguments: hashpartitioning(ss_customer_sk#66, 5), ENSURE_REQUIREMENTS, [id=#72] (108) Sort [codegen id : 3] -Input [3]: [ss_customer_sk#67, ss_quantity#68, ss_sales_price#69] -Arguments: [ss_customer_sk#67 ASC NULLS FIRST], false, 0 +Input [3]: [ss_customer_sk#66, ss_quantity#67, ss_sales_price#68] +Arguments: [ss_customer_sk#66 ASC NULLS FIRST], false, 0 (109) ReusedExchange [Reuses operator id: 38] -Output [1]: [c_customer_sk#74] +Output [1]: [c_customer_sk#73] (110) Sort [codegen id : 5] -Input [1]: [c_customer_sk#74] -Arguments: [c_customer_sk#74 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#73] +Arguments: [c_customer_sk#73 ASC NULLS FIRST], false, 0 (111) SortMergeJoin [codegen id : 6] -Left keys [1]: [ss_customer_sk#67] -Right keys [1]: [c_customer_sk#74] +Left keys [1]: [ss_customer_sk#66] +Right keys [1]: [c_customer_sk#73] Join condition: None (112) Project [codegen id : 6] -Output [3]: [ss_quantity#68, ss_sales_price#69, c_customer_sk#74] -Input [4]: [ss_customer_sk#67, ss_quantity#68, ss_sales_price#69, c_customer_sk#74] +Output [3]: [ss_quantity#67, ss_sales_price#68, c_customer_sk#73] +Input [4]: [ss_customer_sk#66, ss_quantity#67, ss_sales_price#68, c_customer_sk#73] (113) HashAggregate [codegen id : 6] -Input [3]: [ss_quantity#68, ss_sales_price#69, c_customer_sk#74] -Keys [1]: [c_customer_sk#74] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_sales_price#69 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#75, isEmpty#76] -Results [3]: [c_customer_sk#74, sum#77, isEmpty#78] +Input [3]: [ss_quantity#67, ss_sales_price#68, c_customer_sk#73] +Keys [1]: [c_customer_sk#73] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#67 as decimal(12,2))) * promote_precision(cast(ss_sales_price#68 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#74, isEmpty#75] +Results [3]: [c_customer_sk#73, sum#76, isEmpty#77] (114) HashAggregate [codegen id : 6] -Input [3]: [c_customer_sk#74, sum#77, isEmpty#78] -Keys [1]: [c_customer_sk#74] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_sales_price#69 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_sales_price#69 as decimal(12,2)))), DecimalType(18,2)))#79] -Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_sales_price#69 as decimal(12,2)))), DecimalType(18,2)))#79 AS csales#80] +Input [3]: [c_customer_sk#73, sum#76, isEmpty#77] +Keys [1]: [c_customer_sk#73] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#67 as decimal(12,2))) * promote_precision(cast(ss_sales_price#68 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#67 as decimal(12,2))) * promote_precision(cast(ss_sales_price#68 as decimal(12,2)))), DecimalType(18,2)))#78] +Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#67 as decimal(12,2))) * promote_precision(cast(ss_sales_price#68 as decimal(12,2)))), DecimalType(18,2)))#78 AS csales#79] (115) HashAggregate [codegen id : 6] -Input [1]: [csales#80] +Input [1]: [csales#79] Keys: [] -Functions [1]: [partial_max(csales#80)] -Aggregate Attributes [1]: [max#81] -Results [1]: [max#82] +Functions [1]: [partial_max(csales#79)] +Aggregate Attributes [1]: [max#80] +Results [1]: [max#81] (116) Exchange -Input [1]: [max#82] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#83] +Input [1]: [max#81] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#82] (117) HashAggregate [codegen id : 7] -Input [1]: [max#82] +Input [1]: [max#81] Keys: [] -Functions [1]: [max(csales#80)] -Aggregate Attributes [1]: [max(csales#80)#84] -Results [1]: [max(csales#80)#84 AS tpcds_cmax#85] +Functions [1]: [max(csales#79)] +Aggregate Attributes [1]: [max(csales#79)#83] +Results [1]: [max(csales#79)#83 AS tpcds_cmax#84] -Subquery:4 Hosting operator id = 101 Hosting Expression = ss_sold_date_sk#70 IN dynamicpruning#71 +Subquery:4 Hosting operator id = 101 Hosting Expression = ss_sold_date_sk#69 IN dynamicpruning#70 BroadcastExchange (122) +- * Project (121) +- * Filter (120) @@ -670,26 +670,26 @@ BroadcastExchange (122) (118) Scan parquet default.date_dim -Output [2]: [d_date_sk#72, d_year#86] +Output [2]: [d_date_sk#71, d_year#85] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct (119) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#72, d_year#86] +Input [2]: [d_date_sk#71, d_year#85] (120) Filter [codegen id : 1] -Input [2]: [d_date_sk#72, d_year#86] -Condition : (d_year#86 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#72)) +Input [2]: [d_date_sk#71, d_year#85] +Condition : (d_year#85 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#71)) (121) Project [codegen id : 1] -Output [1]: [d_date_sk#72] -Input [2]: [d_date_sk#72, d_year#86] +Output [1]: [d_date_sk#71] +Input [2]: [d_date_sk#71, d_year#85] (122) BroadcastExchange -Input [1]: [d_date_sk#72] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#87] +Input [1]: [d_date_sk#71] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#86] Subquery:5 Hosting operator id = 52 Hosting Expression = ws_sold_date_sk#45 IN dynamicpruning#6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt index 3de1f24613451..4d1109078e346 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt @@ -508,19 +508,19 @@ Right keys [1]: [i_item_sk#14] Join condition: None (84) Project [codegen id : 35] -Output [3]: [d_date#12, i_item_sk#14, substr(i_item_desc#15, 1, 30) AS _groupingexpression#57] +Output [3]: [d_date#12, i_item_sk#14, substr(i_item_desc#15, 1, 30) AS _groupingexpression#17] Input [4]: [ss_item_sk#8, d_date#12, i_item_sk#14, i_item_desc#15] (85) HashAggregate [codegen id : 35] -Input [3]: [d_date#12, i_item_sk#14, _groupingexpression#57] -Keys [3]: [_groupingexpression#57, i_item_sk#14, d_date#12] +Input [3]: [d_date#12, i_item_sk#14, _groupingexpression#17] +Keys [3]: [_groupingexpression#17, i_item_sk#14, d_date#12] Functions [1]: [partial_count(1)] Aggregate Attributes [1]: [count#18] -Results [4]: [_groupingexpression#57, i_item_sk#14, d_date#12, count#19] +Results [4]: [_groupingexpression#17, i_item_sk#14, d_date#12, count#19] (86) HashAggregate [codegen id : 35] -Input [4]: [_groupingexpression#57, i_item_sk#14, d_date#12, count#19] -Keys [3]: [_groupingexpression#57, i_item_sk#14, d_date#12] +Input [4]: [_groupingexpression#17, i_item_sk#14, d_date#12, count#19] +Keys [3]: [_groupingexpression#17, i_item_sk#14, d_date#12] Functions [1]: [count(1)] Aggregate Attributes [1]: [count(1)#20] Results [2]: [i_item_sk#14 AS item_sk#21, count(1)#20 AS cnt#22] @@ -548,7 +548,7 @@ Input [5]: [ws_item_sk#51, ws_bill_customer_sk#52, ws_quantity#53, ws_list_price (92) Exchange Input [4]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55] -Arguments: hashpartitioning(ws_bill_customer_sk#52, 5), ENSURE_REQUIREMENTS, [id=#58] +Arguments: hashpartitioning(ws_bill_customer_sk#52, 5), ENSURE_REQUIREMENTS, [id=#57] (93) Sort [codegen id : 37] Input [4]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55] @@ -581,11 +581,11 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk# Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29] Keys [1]: [c_customer_sk#29] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#59, isEmpty#60] -Results [3]: [c_customer_sk#29, sum#61, isEmpty#62] +Aggregate Attributes [2]: [sum#58, isEmpty#59] +Results [3]: [c_customer_sk#29, sum#60, isEmpty#61] (101) HashAggregate [codegen id : 42] -Input [3]: [c_customer_sk#29, sum#61, isEmpty#62] +Input [3]: [c_customer_sk#29, sum#60, isEmpty#61] Keys [1]: [c_customer_sk#29] Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35] @@ -609,23 +609,23 @@ Right keys [1]: [c_customer_sk#29] Join condition: None (106) ReusedExchange [Reuses operator id: 134] -Output [1]: [d_date_sk#63] +Output [1]: [d_date_sk#62] (107) BroadcastHashJoin [codegen id : 44] Left keys [1]: [ws_sold_date_sk#55] -Right keys [1]: [d_date_sk#63] +Right keys [1]: [d_date_sk#62] Join condition: None (108) Project [codegen id : 44] Output [3]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54] -Input [5]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55, d_date_sk#63] +Input [5]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55, d_date_sk#62] (109) ReusedExchange [Reuses operator id: 55] -Output [3]: [c_customer_sk#64, c_first_name#65, c_last_name#66] +Output [3]: [c_customer_sk#63, c_first_name#64, c_last_name#65] (110) Sort [codegen id : 46] -Input [3]: [c_customer_sk#64, c_first_name#65, c_last_name#66] -Arguments: [c_customer_sk#64 ASC NULLS FIRST], false, 0 +Input [3]: [c_customer_sk#63, c_first_name#64, c_last_name#65] +Arguments: [c_customer_sk#63 ASC NULLS FIRST], false, 0 (111) ReusedExchange [Reuses operator id: 34] Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] @@ -654,11 +654,11 @@ Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk# Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29] Keys [1]: [c_customer_sk#29] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#59, isEmpty#60] -Results [3]: [c_customer_sk#29, sum#61, isEmpty#62] +Aggregate Attributes [2]: [sum#58, isEmpty#59] +Results [3]: [c_customer_sk#29, sum#60, isEmpty#61] (118) HashAggregate [codegen id : 51] -Input [3]: [c_customer_sk#29, sum#61, isEmpty#62] +Input [3]: [c_customer_sk#29, sum#60, isEmpty#61] Keys [1]: [c_customer_sk#29] Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35] @@ -677,36 +677,36 @@ Input [1]: [c_customer_sk#29] Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 (122) SortMergeJoin [codegen id : 52] -Left keys [1]: [c_customer_sk#64] +Left keys [1]: [c_customer_sk#63] Right keys [1]: [c_customer_sk#29] Join condition: None (123) SortMergeJoin [codegen id : 53] Left keys [1]: [ws_bill_customer_sk#52] -Right keys [1]: [c_customer_sk#64] +Right keys [1]: [c_customer_sk#63] Join condition: None (124) Project [codegen id : 53] -Output [4]: [ws_quantity#53, ws_list_price#54, c_first_name#65, c_last_name#66] -Input [6]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, c_customer_sk#64, c_first_name#65, c_last_name#66] +Output [4]: [ws_quantity#53, ws_list_price#54, c_first_name#64, c_last_name#65] +Input [6]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, c_customer_sk#63, c_first_name#64, c_last_name#65] (125) HashAggregate [codegen id : 53] -Input [4]: [ws_quantity#53, ws_list_price#54, c_first_name#65, c_last_name#66] -Keys [2]: [c_last_name#66, c_first_name#65] +Input [4]: [ws_quantity#53, ws_list_price#54, c_first_name#64, c_last_name#65] +Keys [2]: [c_last_name#65, c_first_name#64] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#67, isEmpty#68] -Results [4]: [c_last_name#66, c_first_name#65, sum#69, isEmpty#70] +Aggregate Attributes [2]: [sum#66, isEmpty#67] +Results [4]: [c_last_name#65, c_first_name#64, sum#68, isEmpty#69] (126) Exchange -Input [4]: [c_last_name#66, c_first_name#65, sum#69, isEmpty#70] -Arguments: hashpartitioning(c_last_name#66, c_first_name#65, 5), ENSURE_REQUIREMENTS, [id=#71] +Input [4]: [c_last_name#65, c_first_name#64, sum#68, isEmpty#69] +Arguments: hashpartitioning(c_last_name#65, c_first_name#64, 5), ENSURE_REQUIREMENTS, [id=#70] (127) HashAggregate [codegen id : 54] -Input [4]: [c_last_name#66, c_first_name#65, sum#69, isEmpty#70] -Keys [2]: [c_last_name#66, c_first_name#65] +Input [4]: [c_last_name#65, c_first_name#64, sum#68, isEmpty#69] +Keys [2]: [c_last_name#65, c_first_name#64] Functions [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2)))#72] -Results [3]: [c_last_name#66, c_first_name#65, sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2)))#72 AS sales#73] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2)))#71] +Results [3]: [c_last_name#65, c_first_name#64, sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2)))#71 AS sales#72] (128) Union @@ -725,26 +725,26 @@ BroadcastExchange (134) (130) Scan parquet default.date_dim -Output [3]: [d_date_sk#39, d_year#74, d_moy#75] +Output [3]: [d_date_sk#39, d_year#73, d_moy#74] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] ReadSchema: struct (131) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#39, d_year#74, d_moy#75] +Input [3]: [d_date_sk#39, d_year#73, d_moy#74] (132) Filter [codegen id : 1] -Input [3]: [d_date_sk#39, d_year#74, d_moy#75] -Condition : ((((isnotnull(d_year#74) AND isnotnull(d_moy#75)) AND (d_year#74 = 2000)) AND (d_moy#75 = 2)) AND isnotnull(d_date_sk#39)) +Input [3]: [d_date_sk#39, d_year#73, d_moy#74] +Condition : ((((isnotnull(d_year#73) AND isnotnull(d_moy#74)) AND (d_year#73 = 2000)) AND (d_moy#74 = 2)) AND isnotnull(d_date_sk#39)) (133) Project [codegen id : 1] Output [1]: [d_date_sk#39] -Input [3]: [d_date_sk#39, d_year#74, d_moy#75] +Input [3]: [d_date_sk#39, d_year#73, d_moy#74] (134) BroadcastExchange Input [1]: [d_date_sk#39] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#76] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#75] Subquery:2 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#9 IN dynamicpruning#10 BroadcastExchange (139) @@ -755,26 +755,26 @@ BroadcastExchange (139) (135) Scan parquet default.date_dim -Output [3]: [d_date_sk#11, d_date#12, d_year#77] +Output [3]: [d_date_sk#11, d_date#12, d_year#76] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct (136) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#11, d_date#12, d_year#77] +Input [3]: [d_date_sk#11, d_date#12, d_year#76] (137) Filter [codegen id : 1] -Input [3]: [d_date_sk#11, d_date#12, d_year#77] -Condition : (d_year#77 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#11)) +Input [3]: [d_date_sk#11, d_date#12, d_year#76] +Condition : (d_year#76 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#11)) (138) Project [codegen id : 1] Output [2]: [d_date_sk#11, d_date#12] -Input [3]: [d_date_sk#11, d_date#12, d_year#77] +Input [3]: [d_date_sk#11, d_date#12, d_year#76] (139) BroadcastExchange Input [2]: [d_date_sk#11, d_date#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#78] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#77] Subquery:3 Hosting operator id = 45 Hosting Expression = Subquery scalar-subquery#37, [id=#38] * HashAggregate (156) @@ -797,89 +797,89 @@ Subquery:3 Hosting operator id = 45 Hosting Expression = Subquery scalar-subquer (140) Scan parquet default.store_sales -Output [4]: [ss_customer_sk#79, ss_quantity#80, ss_sales_price#81, ss_sold_date_sk#82] +Output [4]: [ss_customer_sk#78, ss_quantity#79, ss_sales_price#80, ss_sold_date_sk#81] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#82), dynamicpruningexpression(ss_sold_date_sk#82 IN dynamicpruning#83)] +PartitionFilters: [isnotnull(ss_sold_date_sk#81), dynamicpruningexpression(ss_sold_date_sk#81 IN dynamicpruning#82)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (141) ColumnarToRow [codegen id : 2] -Input [4]: [ss_customer_sk#79, ss_quantity#80, ss_sales_price#81, ss_sold_date_sk#82] +Input [4]: [ss_customer_sk#78, ss_quantity#79, ss_sales_price#80, ss_sold_date_sk#81] (142) Filter [codegen id : 2] -Input [4]: [ss_customer_sk#79, ss_quantity#80, ss_sales_price#81, ss_sold_date_sk#82] -Condition : isnotnull(ss_customer_sk#79) +Input [4]: [ss_customer_sk#78, ss_quantity#79, ss_sales_price#80, ss_sold_date_sk#81] +Condition : isnotnull(ss_customer_sk#78) (143) ReusedExchange [Reuses operator id: 161] -Output [1]: [d_date_sk#84] +Output [1]: [d_date_sk#83] (144) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#82] -Right keys [1]: [d_date_sk#84] +Left keys [1]: [ss_sold_date_sk#81] +Right keys [1]: [d_date_sk#83] Join condition: None (145) Project [codegen id : 2] -Output [3]: [ss_customer_sk#79, ss_quantity#80, ss_sales_price#81] -Input [5]: [ss_customer_sk#79, ss_quantity#80, ss_sales_price#81, ss_sold_date_sk#82, d_date_sk#84] +Output [3]: [ss_customer_sk#78, ss_quantity#79, ss_sales_price#80] +Input [5]: [ss_customer_sk#78, ss_quantity#79, ss_sales_price#80, ss_sold_date_sk#81, d_date_sk#83] (146) Exchange -Input [3]: [ss_customer_sk#79, ss_quantity#80, ss_sales_price#81] -Arguments: hashpartitioning(ss_customer_sk#79, 5), ENSURE_REQUIREMENTS, [id=#85] +Input [3]: [ss_customer_sk#78, ss_quantity#79, ss_sales_price#80] +Arguments: hashpartitioning(ss_customer_sk#78, 5), ENSURE_REQUIREMENTS, [id=#84] (147) Sort [codegen id : 3] -Input [3]: [ss_customer_sk#79, ss_quantity#80, ss_sales_price#81] -Arguments: [ss_customer_sk#79 ASC NULLS FIRST], false, 0 +Input [3]: [ss_customer_sk#78, ss_quantity#79, ss_sales_price#80] +Arguments: [ss_customer_sk#78 ASC NULLS FIRST], false, 0 (148) ReusedExchange [Reuses operator id: 39] -Output [1]: [c_customer_sk#86] +Output [1]: [c_customer_sk#85] (149) Sort [codegen id : 5] -Input [1]: [c_customer_sk#86] -Arguments: [c_customer_sk#86 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#85] +Arguments: [c_customer_sk#85 ASC NULLS FIRST], false, 0 (150) SortMergeJoin [codegen id : 6] -Left keys [1]: [ss_customer_sk#79] -Right keys [1]: [c_customer_sk#86] +Left keys [1]: [ss_customer_sk#78] +Right keys [1]: [c_customer_sk#85] Join condition: None (151) Project [codegen id : 6] -Output [3]: [ss_quantity#80, ss_sales_price#81, c_customer_sk#86] -Input [4]: [ss_customer_sk#79, ss_quantity#80, ss_sales_price#81, c_customer_sk#86] +Output [3]: [ss_quantity#79, ss_sales_price#80, c_customer_sk#85] +Input [4]: [ss_customer_sk#78, ss_quantity#79, ss_sales_price#80, c_customer_sk#85] (152) HashAggregate [codegen id : 6] -Input [3]: [ss_quantity#80, ss_sales_price#81, c_customer_sk#86] -Keys [1]: [c_customer_sk#86] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#80 as decimal(12,2))) * promote_precision(cast(ss_sales_price#81 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#87, isEmpty#88] -Results [3]: [c_customer_sk#86, sum#89, isEmpty#90] +Input [3]: [ss_quantity#79, ss_sales_price#80, c_customer_sk#85] +Keys [1]: [c_customer_sk#85] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#79 as decimal(12,2))) * promote_precision(cast(ss_sales_price#80 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#86, isEmpty#87] +Results [3]: [c_customer_sk#85, sum#88, isEmpty#89] (153) HashAggregate [codegen id : 6] -Input [3]: [c_customer_sk#86, sum#89, isEmpty#90] -Keys [1]: [c_customer_sk#86] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#80 as decimal(12,2))) * promote_precision(cast(ss_sales_price#81 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#80 as decimal(12,2))) * promote_precision(cast(ss_sales_price#81 as decimal(12,2)))), DecimalType(18,2)))#91] -Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#80 as decimal(12,2))) * promote_precision(cast(ss_sales_price#81 as decimal(12,2)))), DecimalType(18,2)))#91 AS csales#92] +Input [3]: [c_customer_sk#85, sum#88, isEmpty#89] +Keys [1]: [c_customer_sk#85] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#79 as decimal(12,2))) * promote_precision(cast(ss_sales_price#80 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#79 as decimal(12,2))) * promote_precision(cast(ss_sales_price#80 as decimal(12,2)))), DecimalType(18,2)))#90] +Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#79 as decimal(12,2))) * promote_precision(cast(ss_sales_price#80 as decimal(12,2)))), DecimalType(18,2)))#90 AS csales#91] (154) HashAggregate [codegen id : 6] -Input [1]: [csales#92] +Input [1]: [csales#91] Keys: [] -Functions [1]: [partial_max(csales#92)] -Aggregate Attributes [1]: [max#93] -Results [1]: [max#94] +Functions [1]: [partial_max(csales#91)] +Aggregate Attributes [1]: [max#92] +Results [1]: [max#93] (155) Exchange -Input [1]: [max#94] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#95] +Input [1]: [max#93] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#94] (156) HashAggregate [codegen id : 7] -Input [1]: [max#94] +Input [1]: [max#93] Keys: [] -Functions [1]: [max(csales#92)] -Aggregate Attributes [1]: [max(csales#92)#96] -Results [1]: [max(csales#92)#96 AS tpcds_cmax#97] +Functions [1]: [max(csales#91)] +Aggregate Attributes [1]: [max(csales#91)#95] +Results [1]: [max(csales#91)#95 AS tpcds_cmax#96] -Subquery:4 Hosting operator id = 140 Hosting Expression = ss_sold_date_sk#82 IN dynamicpruning#83 +Subquery:4 Hosting operator id = 140 Hosting Expression = ss_sold_date_sk#81 IN dynamicpruning#82 BroadcastExchange (161) +- * Project (160) +- * Filter (159) @@ -888,26 +888,26 @@ BroadcastExchange (161) (157) Scan parquet default.date_dim -Output [2]: [d_date_sk#84, d_year#98] +Output [2]: [d_date_sk#83, d_year#97] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct (158) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#84, d_year#98] +Input [2]: [d_date_sk#83, d_year#97] (159) Filter [codegen id : 1] -Input [2]: [d_date_sk#84, d_year#98] -Condition : (d_year#98 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#84)) +Input [2]: [d_date_sk#83, d_year#97] +Condition : (d_year#97 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#83)) (160) Project [codegen id : 1] -Output [1]: [d_date_sk#84] -Input [2]: [d_date_sk#84, d_year#98] +Output [1]: [d_date_sk#83] +Input [2]: [d_date_sk#83, d_year#97] (161) BroadcastExchange -Input [1]: [d_date_sk#84] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#99] +Input [1]: [d_date_sk#83] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#98] Subquery:5 Hosting operator id = 65 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala index dd30ff68da417..7d45102ac83d3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala @@ -17,7 +17,8 @@ package org.apache.spark.sql -import org.apache.spark.sql.catalyst.plans.logical.WithCTE +import org.apache.spark.sql.catalyst.expressions.{And, GreaterThan, LessThan, Literal, Or} +import org.apache.spark.sql.catalyst.plans.logical.{Filter, Project, RepartitionOperation, WithCTE} import org.apache.spark.sql.execution.adaptive._ import org.apache.spark.sql.execution.exchange.ReusedExchangeExec import org.apache.spark.sql.internal.SQLConf @@ -42,7 +43,7 @@ abstract class CTEInlineSuiteBase """.stripMargin) checkAnswer(df, Nil) assert( - df.queryExecution.optimizedPlan.exists(_.isInstanceOf[WithCTE]), + df.queryExecution.optimizedPlan.exists(_.isInstanceOf[RepartitionOperation]), "Non-deterministic With-CTE with multiple references should be not inlined.") } } @@ -59,7 +60,7 @@ abstract class CTEInlineSuiteBase """.stripMargin) checkAnswer(df, Nil) assert( - df.queryExecution.optimizedPlan.exists(_.isInstanceOf[WithCTE]), + df.queryExecution.optimizedPlan.exists(_.isInstanceOf[RepartitionOperation]), "Non-deterministic With-CTE with multiple references should be not inlined.") } } @@ -79,7 +80,7 @@ abstract class CTEInlineSuiteBase df.queryExecution.analyzed.exists(_.isInstanceOf[WithCTE]), "With-CTE should not be inlined in analyzed plan.") assert( - !df.queryExecution.optimizedPlan.exists(_.isInstanceOf[WithCTE]), + !df.queryExecution.optimizedPlan.exists(_.isInstanceOf[RepartitionOperation]), "With-CTE with one reference should be inlined in optimized plan.") } } @@ -107,8 +108,8 @@ abstract class CTEInlineSuiteBase "With-CTE should contain 2 CTE defs after analysis.") assert( df.queryExecution.optimizedPlan.collect { - case WithCTE(_, cteDefs) => cteDefs - }.head.length == 2, + case r: RepartitionOperation => r + }.length == 6, "With-CTE should contain 2 CTE def after optimization.") } } @@ -136,8 +137,8 @@ abstract class CTEInlineSuiteBase "With-CTE should contain 2 CTE defs after analysis.") assert( df.queryExecution.optimizedPlan.collect { - case WithCTE(_, cteDefs) => cteDefs - }.head.length == 1, + case r: RepartitionOperation => r + }.length == 4, "One CTE def should be inlined after optimization.") } } @@ -163,7 +164,7 @@ abstract class CTEInlineSuiteBase "With-CTE should contain 2 CTE defs after analysis.") assert( df.queryExecution.optimizedPlan.collect { - case WithCTE(_, cteDefs) => cteDefs + case r: RepartitionOperation => r }.isEmpty, "CTEs with one reference should all be inlined after optimization.") } @@ -248,7 +249,7 @@ abstract class CTEInlineSuiteBase "With-CTE should contain 2 CTE defs after analysis.") assert( df.queryExecution.optimizedPlan.collect { - case WithCTE(_, cteDefs) => cteDefs + case r: RepartitionOperation => r }.isEmpty, "Deterministic CTEs should all be inlined after optimization.") } @@ -272,6 +273,214 @@ abstract class CTEInlineSuiteBase assert(ex.message.contains("Table or view not found: v1")) } } + + test("CTE Predicate push-down and column pruning") { + withView("t") { + Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t") + val df = sql( + s"""with + |v as ( + | select c1, c2, 's' c3, rand() c4 from t + |), + |vv as ( + | select v1.c1, v1.c2, rand() c5 from v v1, v v2 + | where v1.c1 > 0 and v1.c3 = 's' and v1.c2 = v2.c2 + |) + |select vv1.c1, vv1.c2, vv2.c1, vv2.c2 from vv vv1, vv vv2 + |where vv1.c2 > 0 and vv2.c2 > 0 and vv1.c1 = vv2.c1 + """.stripMargin) + checkAnswer(df, Row(1, 2, 1, 2) :: Nil) + assert( + df.queryExecution.analyzed.collect { + case WithCTE(_, cteDefs) => cteDefs + }.head.length == 2, + "With-CTE should contain 2 CTE defs after analysis.") + val cteRepartitions = df.queryExecution.optimizedPlan.collect { + case r: RepartitionOperation => r + } + assert(cteRepartitions.length == 6, + "CTE should not be inlined after optimization.") + val distinctCteRepartitions = cteRepartitions.map(_.canonicalized).distinct + // Check column pruning and predicate push-down. + assert(distinctCteRepartitions.length == 2) + assert(distinctCteRepartitions(1).collectFirst { + case p: Project if p.projectList.length == 3 => p + }.isDefined, "CTE columns should be pruned.") + assert(distinctCteRepartitions(1).collectFirst { + case f: Filter if f.condition.semanticEquals(GreaterThan(f.output(1), Literal(0))) => f + }.isDefined, "Predicate 'c2 > 0' should be pushed down to the CTE def 'v'.") + assert(distinctCteRepartitions(0).collectFirst { + case f: Filter if f.condition.find(_.semanticEquals(f.output(0))).isDefined => f + }.isDefined, "CTE 'vv' definition contains predicate 'c1 > 0'.") + assert(distinctCteRepartitions(1).collectFirst { + case f: Filter if f.condition.find(_.semanticEquals(f.output(0))).isDefined => f + }.isEmpty, "Predicate 'c1 > 0' should be not pushed down to the CTE def 'v'.") + // Check runtime repartition reuse. + assert( + collectWithSubqueries(df.queryExecution.executedPlan) { + case r: ReusedExchangeExec => r + }.length == 2, + "CTE repartition is reused.") + } + } + + test("CTE Predicate push-down and column pruning - combined predicate") { + withView("t") { + Seq((0, 1, 2), (1, 2, 3)).toDF("c1", "c2", "c3").createOrReplaceTempView("t") + val df = sql( + s"""with + |v as ( + | select c1, c2, c3, rand() c4 from t + |), + |vv as ( + | select v1.c1, v1.c2, rand() c5 from v v1, v v2 + | where v1.c1 > 0 and v2.c3 < 5 and v1.c2 = v2.c2 + |) + |select vv1.c1, vv1.c2, vv2.c1, vv2.c2 from vv vv1, vv vv2 + |where vv1.c2 > 0 and vv2.c2 > 0 and vv1.c1 = vv2.c1 + """.stripMargin) + checkAnswer(df, Row(1, 2, 1, 2) :: Nil) + assert( + df.queryExecution.analyzed.collect { + case WithCTE(_, cteDefs) => cteDefs + }.head.length == 2, + "With-CTE should contain 2 CTE defs after analysis.") + val cteRepartitions = df.queryExecution.optimizedPlan.collect { + case r: RepartitionOperation => r + } + assert(cteRepartitions.length == 6, + "CTE should not be inlined after optimization.") + val distinctCteRepartitions = cteRepartitions.map(_.canonicalized).distinct + // Check column pruning and predicate push-down. + assert(distinctCteRepartitions.length == 2) + assert(distinctCteRepartitions(1).collectFirst { + case p: Project if p.projectList.length == 3 => p + }.isDefined, "CTE columns should be pruned.") + assert( + distinctCteRepartitions(1).collectFirst { + case f: Filter + if f.condition.semanticEquals( + And( + GreaterThan(f.output(1), Literal(0)), + Or( + GreaterThan(f.output(0), Literal(0)), + LessThan(f.output(2), Literal(5))))) => + f + }.isDefined, + "Predicate 'c2 > 0 AND (c1 > 0 OR c3 < 5)' should be pushed down to the CTE def 'v'.") + // Check runtime repartition reuse. + assert( + collectWithSubqueries(df.queryExecution.executedPlan) { + case r: ReusedExchangeExec => r + }.length == 2, + "CTE repartition is reused.") + } + } + + test("Views with CTEs - 1 temp view") { + withView("t", "t2") { + Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t") + sql( + s"""with + |v as ( + | select c1 + c2 c3 from t + |) + |select sum(c3) s from v + """.stripMargin).createOrReplaceTempView("t2") + val df = sql( + s"""with + |v as ( + | select c1 * c2 c3 from t + |) + |select sum(c3) from v except select s from t2 + """.stripMargin) + checkAnswer(df, Row(2) :: Nil) + } + } + + test("Views with CTEs - 2 temp views") { + withView("t", "t2", "t3") { + Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t") + sql( + s"""with + |v as ( + | select c1 + c2 c3 from t + |) + |select sum(c3) s from v + """.stripMargin).createOrReplaceTempView("t2") + sql( + s"""with + |v as ( + | select c1 * c2 c3 from t + |) + |select sum(c3) s from v + """.stripMargin).createOrReplaceTempView("t3") + val df = sql("select s from t3 except select s from t2") + checkAnswer(df, Row(2) :: Nil) + } + } + + test("Views with CTEs - temp view + sql view") { + withTable("t") { + withView ("t2", "t3") { + Seq((0, 1), (1, 2)).toDF("c1", "c2").write.saveAsTable("t") + sql( + s"""with + |v as ( + | select c1 + c2 c3 from t + |) + |select sum(c3) s from v + """.stripMargin).createOrReplaceTempView("t2") + sql( + s"""create view t3 as + |with + |v as ( + | select c1 * c2 c3 from t + |) + |select sum(c3) s from v + """.stripMargin) + val df = sql("select s from t3 except select s from t2") + checkAnswer(df, Row(2) :: Nil) + } + } + } + + test("Union of Dataframes with CTEs") { + val a = spark.sql("with t as (select 1 as n) select * from t ") + val b = spark.sql("with t as (select 2 as n) select * from t ") + val df = a.union(b) + checkAnswer(df, Row(1) :: Row(2) :: Nil) + } + + test("CTE definitions out of original order when not inlined") { + withView("t1", "t2") { + Seq((1, 2, 10, 100), (2, 3, 20, 200)).toDF("workspace_id", "issue_id", "shard_id", "field_id") + .createOrReplaceTempView("issue_current") + withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> + "org.apache.spark.sql.catalyst.optimizer.InlineCTE") { + val df = sql( + """ + |WITH cte_0 AS ( + | SELECT workspace_id, issue_id, shard_id, field_id FROM issue_current + |), + |cte_1 AS ( + | WITH filtered_source_table AS ( + | SELECT * FROM cte_0 WHERE shard_id in ( 10 ) + | ) + | SELECT source_table.workspace_id, field_id FROM cte_0 source_table + | INNER JOIN ( + | SELECT workspace_id, issue_id FROM filtered_source_table GROUP BY 1, 2 + | ) target_table + | ON source_table.issue_id = target_table.issue_id + | AND source_table.workspace_id = target_table.workspace_id + | WHERE source_table.shard_id IN ( 10 ) + |) + |SELECT * FROM cte_1 + """.stripMargin) + checkAnswer(df, Row(1, 100) :: Nil) + } + } + } } class CTEInlineSuiteAEOff extends CTEInlineSuiteBase with DisableAdaptiveExecutionSuite diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 309396543d46c..42945e7f1c5a7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -593,6 +593,21 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark |select * from q1 union all select * from q2""".stripMargin), Row(5, "5") :: Row(4, "4") :: Nil) + // inner CTE relation refers to outer CTE relation. + withSQLConf(SQLConf.LEGACY_CTE_PRECEDENCE_POLICY.key -> "CORRECTED") { + checkAnswer( + sql( + """ + |with temp1 as (select 1 col), + |temp2 as ( + | with temp1 as (select col + 1 AS col from temp1), + | temp3 as (select col + 1 from temp1) + | select * from temp3 + |) + |select * from temp2 + |""".stripMargin), + Row(3)) + } } test("Allow only a single WITH clause per query") { From 8bd7d886e0570ed6d01ebbadca83c77821aee93f Mon Sep 17 00:00:00 2001 From: Daniel Tenedorio Date: Tue, 19 Apr 2022 11:18:56 +0800 Subject: [PATCH 157/535] [SPARK-38796][SQL] Update to_number and try_to_number functions to restrict S and MI sequence to start or end only ### What changes were proposed in this pull request? Update `to_number` and `try_to_number` functions to restrict MI sequence to start or end only. This satisfies the following specification: ``` to_number(expr, fmt) fmt { ' [ MI | S ] [ L | $ ] [ 0 | 9 | G | , ] [...] [ . | D ] [ 0 | 9 ] [...] [ L | $ ] [ PR | MI | S ] ' } ``` ### Why are the changes needed? After reviewing the specification, this behavior makes the most sense. ### Does this PR introduce _any_ user-facing change? Yes, a slight change in the behavior of the format string. ### How was this patch tested? Existing and updated unit test coverage. Closes #36154 from dtenedor/mi-anywhere. Authored-by: Daniel Tenedorio Signed-off-by: Wenchen Fan (cherry picked from commit 242ee22c00394c29e21bc3de0a93cb6d9746d93c) Signed-off-by: Wenchen Fan --- .../expressions/numberFormatExpressions.scala | 4 +- .../sql/catalyst/util/ToNumberParser.scala | 163 ++++++++++-------- .../expressions/StringExpressionsSuite.scala | 20 ++- 3 files changed, 106 insertions(+), 81 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala index 88947c5c87ab5..c866bb9af9eca 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala @@ -46,8 +46,8 @@ import org.apache.spark.unsafe.types.UTF8String grouping separator relevant for the size of the number. '$': Specifies the location of the $ currency sign. This character may only be specified once. - 'S': Specifies the position of a '-' or '+' sign (optional, only allowed once). - 'MI': Specifies that 'expr' has an optional '-' sign, but no '+' (only allowed once). + 'S' or 'MI': Specifies the position of a '-' or '+' sign (optional, only allowed once at + the beginning or end of the format string). Note that 'S' allows '-' but 'MI' does not. 'PR': Only allowed at the end of the format string; specifies that 'expr' indicates a negative number with wrapping angled brackets. ('<1>'). diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala index afba683efad94..716224983e0d0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala @@ -49,33 +49,56 @@ object ToNumberParser { final val WRAPPING_ANGLE_BRACKETS_TO_NEGATIVE_NUMBER_END = 'R' // This class represents one or more characters that we expect to be present in the input string - // based on the format string. + // based on the format string. The toString method returns a representation of each token suitable + // for use in error messages. abstract class InputToken() // Represents some number of digits (0-9). abstract class Digits extends InputToken // Represents exactly 'num' digits (0-9). - case class ExactlyAsManyDigits(num: Int) extends Digits + case class ExactlyAsManyDigits(num: Int) extends Digits { + override def toString: String = "digit sequence" + } // Represents at most 'num' digits (0-9). - case class AtMostAsManyDigits(num: Int) extends Digits + case class AtMostAsManyDigits(num: Int) extends Digits { + override def toString: String = "digit sequence" + } // Represents one decimal point (.). - case class DecimalPoint() extends InputToken + case class DecimalPoint() extends InputToken { + override def toString: String = ". or D" + } // Represents one thousands separator (,). - case class ThousandsSeparator() extends InputToken + case class ThousandsSeparator() extends InputToken { + override def toString: String = ", or G" + } // Represents one or more groups of Digits (0-9) with ThousandsSeparators (,) between each group. // The 'tokens' are the Digits and ThousandsSeparators in order; the 'digits' are just the Digits. - case class DigitGroups(tokens: Seq[InputToken], digits: Seq[Digits]) extends InputToken + case class DigitGroups(tokens: Seq[InputToken], digits: Seq[Digits]) extends InputToken { + override def toString: String = "digit sequence" + } // Represents one dollar sign ($). - case class DollarSign() extends InputToken + case class DollarSign() extends InputToken { + override def toString: String = "$" + } // Represents one optional plus sign (+) or minus sign (-). - case class OptionalPlusOrMinusSign() extends InputToken + case class OptionalPlusOrMinusSign() extends InputToken { + override def toString: String = "S" + } // Represents one optional minus sign (-). - case class OptionalMinusSign() extends InputToken + case class OptionalMinusSign() extends InputToken { + override def toString: String = "MI" + } // Represents one opening angle bracket (<). - case class OpeningAngleBracket() extends InputToken + case class OpeningAngleBracket() extends InputToken { + override def toString: String = "PR" + } // Represents one closing angle bracket (>). - case class ClosingAngleBracket() extends InputToken + case class ClosingAngleBracket() extends InputToken { + override def toString: String = "PR" + } // Represents any unrecognized character other than the above. - case class InvalidUnrecognizedCharacter(char: Char) extends InputToken + case class InvalidUnrecognizedCharacter(char: Char) extends InputToken { + override def toString: String = s"character '$char''" + } } /** @@ -241,16 +264,6 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali * This implementation of the [[check]] method returns any error, or the empty string on success. */ private def validateFormatString: String = { - def multipleSignInNumberFormatError(message: String) = { - s"At most one $message is allowed in the number format: '$numberFormat'" - } - - def notAtEndOfNumberFormatError(message: String) = { - s"$message must be at the end of the number format: '$numberFormat'" - } - - val inputTokenCounts = formatTokens.groupBy(identity).mapValues(_.size) - val firstDollarSignIndex: Int = formatTokens.indexOf(DollarSign()) val firstDigitIndex: Int = formatTokens.indexWhere { case _: DigitGroups => true @@ -276,58 +289,25 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali // Make sure the format string contains at least one token. if (numberFormat.isEmpty) { - "The format string cannot be empty" - } - // Make sure the format string does not contain any unrecognized characters. - else if (formatTokens.exists(_.isInstanceOf[InvalidUnrecognizedCharacter])) { - val unrecognizedChars = - formatTokens.filter { - _.isInstanceOf[InvalidUnrecognizedCharacter] - }.map { - case i: InvalidUnrecognizedCharacter => i.char - } - val char: Char = unrecognizedChars.head - s"Encountered invalid character $char in the number format: '$numberFormat'" + return "The format string cannot be empty" } // Make sure the format string contains at least one digit. - else if (!formatTokens.exists( + if (!formatTokens.exists( token => token.isInstanceOf[DigitGroups])) { - "The format string requires at least one number digit" - } - // Make sure the format string contains at most one decimal point. - else if (inputTokenCounts.getOrElse(DecimalPoint(), 0) > 1) { - multipleSignInNumberFormatError(s"'$POINT_LETTER' or '$POINT_SIGN'") - } - // Make sure the format string contains at most one plus or minus sign. - else if (inputTokenCounts.getOrElse(OptionalPlusOrMinusSign(), 0) > 1) { - multipleSignInNumberFormatError(s"'$OPTIONAL_PLUS_OR_MINUS_LETTER'") - } - // Make sure the format string contains at most one dollar sign. - else if (inputTokenCounts.getOrElse(DollarSign(), 0) > 1) { - multipleSignInNumberFormatError(s"'$DOLLAR_SIGN'") - } - // Make sure the format string contains at most one "MI" sequence. - else if (inputTokenCounts.getOrElse(OptionalMinusSign(), 0) > 1) { - multipleSignInNumberFormatError(s"'$OPTIONAL_MINUS_STRING'") - } - // Make sure the format string contains at most one closing angle bracket at the end. - else if (inputTokenCounts.getOrElse(ClosingAngleBracket(), 0) > 1 || - (inputTokenCounts.getOrElse(ClosingAngleBracket(), 0) == 1 && - formatTokens.last != ClosingAngleBracket())) { - notAtEndOfNumberFormatError(s"'$WRAPPING_ANGLE_BRACKETS_TO_NEGATIVE_NUMBER'") + return "The format string requires at least one number digit" } // Make sure that any dollar sign in the format string occurs before any digits. - else if (firstDigitIndex < firstDollarSignIndex) { - s"Currency characters must appear before digits in the number format: '$numberFormat'" + if (firstDigitIndex < firstDollarSignIndex) { + return s"Currency characters must appear before digits in the number format: '$numberFormat'" } // Make sure that any dollar sign in the format string occurs before any decimal point. - else if (firstDecimalPointIndex != -1 && + if (firstDecimalPointIndex != -1 && firstDecimalPointIndex < firstDollarSignIndex) { - "Currency characters must appear before any decimal point in the " + + return "Currency characters must appear before any decimal point in the " + s"number format: '$numberFormat'" } // Make sure that any thousands separators in the format string have digits before and after. - else if (digitGroupsBeforeDecimalPoint.exists { + if (digitGroupsBeforeDecimalPoint.exists { case DigitGroups(tokens, _) => tokens.zipWithIndex.exists({ case (_: ThousandsSeparator, j: Int) if j == 0 || j == tokens.length - 1 => @@ -340,21 +320,64 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali false }) }) { - "Thousands separators (,) must have digits in between them " + + return "Thousands separators (,) must have digits in between them " + s"in the number format: '$numberFormat'" } - // Thousands separators are not allowed after the decimal point, if any. - else if (digitGroupsAfterDecimalPoint.exists { + // Make sure that thousands separators does not appear after the decimal point, if any. + if (digitGroupsAfterDecimalPoint.exists { case DigitGroups(tokens, digits) => tokens.length > digits.length }) { - "Thousands separators (,) may not appear after the decimal point " + + return "Thousands separators (,) may not appear after the decimal point " + s"in the number format: '$numberFormat'" } - // Validation of the format string finished successfully. - else { - "" + // Make sure that the format string does not contain any prohibited duplicate tokens. + val inputTokenCounts = formatTokens.groupBy(identity).mapValues(_.size) + Seq(DecimalPoint(), + OptionalPlusOrMinusSign(), + OptionalMinusSign(), + DollarSign(), + ClosingAngleBracket()).foreach { + token => if (inputTokenCounts.getOrElse(token, 0) > 1) { + return s"At most one ${token.toString} is allowed in the number format: '$numberFormat'" + } + } + // Enforce the ordering of tokens in the format string according to this specification: + // [ MI | S ] [ $ ] + // [ 0 | 9 | G | , ] [...] + // [ . | D ] + // [ 0 | 9 ] [...] + // [ $ ] [ PR | MI | S ] + val allowedFormatTokens: Seq[Seq[InputToken]] = Seq( + Seq(OpeningAngleBracket()), + Seq(OptionalMinusSign(), OptionalPlusOrMinusSign()), + Seq(DollarSign()), + Seq(DigitGroups(Seq(), Seq())), + Seq(DecimalPoint()), + Seq(DigitGroups(Seq(), Seq())), + Seq(DollarSign()), + Seq(OptionalMinusSign(), OptionalPlusOrMinusSign(), ClosingAngleBracket()) + ) + var formatTokenIndex = 0 + for (allowedTokens: Seq[InputToken] <- allowedFormatTokens) { + def tokensMatch(lhs: InputToken, rhs: InputToken): Boolean = { + lhs match { + case _: DigitGroups => rhs.isInstanceOf[DigitGroups] + case _ => lhs == rhs + } + } + if (formatTokenIndex < formatTokens.length && + allowedTokens.exists(tokensMatch(_, formatTokens(formatTokenIndex)))) { + formatTokenIndex += 1 + } } + if (formatTokenIndex < formatTokens.length) { + return s"Unexpected ${formatTokens(formatTokenIndex).toString} found in the format string " + + s"'$numberFormat'; the structure of the format string must match: " + + "[MI|S] [$] [0|9|G|,]* [.|D] [0|9]* [$] [PR|MI|S]" + } + // Validation of the format string finished successfully. + "" } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index afb05dd4d77b3..91b3d0c69b8dd 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -972,7 +972,6 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { ("<454>", "999PR") -> Decimal(-454), ("454-", "999MI") -> Decimal(-454), ("-$54", "MI$99") -> Decimal(-54), - ("$4-4", "$9MI9") -> Decimal(-44), // The input string contains more digits than fit in a long integer. ("123,456,789,123,456,789,123", "999,999,999,999,999,999,999") -> Decimal(new JavaBigDecimal("123456789123456789123")) @@ -1009,7 +1008,8 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { } test("ToNumber: negative tests (the format string is invalid)") { - val invalidCharacter = "Encountered invalid character" + val unexpectedCharacter = "the structure of the format string must match: " + + "[MI|S] [$] [0|9|G|,]* [.|D] [0|9]* [$] [PR|MI|S]" val thousandsSeparatorDigitsBetween = "Thousands separators (,) must have digits in between them" val mustBeAtEnd = "must be at the end of the number format" @@ -1018,23 +1018,25 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { // The format string must not be empty. ("454", "") -> "The format string cannot be empty", // Make sure the format string does not contain any unrecognized characters. - ("454", "999@") -> invalidCharacter, - ("454", "999M") -> invalidCharacter, - ("454", "999P") -> invalidCharacter, + ("454", "999@") -> unexpectedCharacter, + ("454", "999M") -> unexpectedCharacter, + ("454", "999P") -> unexpectedCharacter, // Make sure the format string contains at least one digit. ("454", "$") -> "The format string requires at least one number digit", // Make sure the format string contains at most one decimal point. ("454", "99.99.99") -> atMostOne, // Make sure the format string contains at most one dollar sign. ("454", "$$99") -> atMostOne, - // Make sure the format string contains at most one minus sign at the end. + // Make sure the format string contains at most one minus sign at the beginning or end. + ("$4-4", "$9MI9") -> unexpectedCharacter, + ("--4", "SMI9") -> unexpectedCharacter, ("--$54", "SS$99") -> atMostOne, ("-$54", "MI$99MI") -> atMostOne, ("$4-4", "$9MI9MI") -> atMostOne, // Make sure the format string contains at most one closing angle bracket at the end. - ("<$45>", "PR$99") -> mustBeAtEnd, - ("$4<4>", "$9PR9") -> mustBeAtEnd, - ("<<454>>", "999PRPR") -> mustBeAtEnd, + ("<$45>", "PR$99") -> unexpectedCharacter, + ("$4<4>", "$9PR9") -> unexpectedCharacter, + ("<<454>>", "999PRPR") -> atMostOne, // Make sure that any dollar sign in the format string occurs before any digits. ("4$54", "9$99") -> "Currency characters must appear before digits", // Make sure that any dollar sign in the format string occurs before any decimal point. From dd6eca7550c25dbcad9f12caf9fccfcad981d33f Mon Sep 17 00:00:00 2001 From: huaxingao Date: Mon, 18 Apr 2022 21:27:57 -0700 Subject: [PATCH 158/535] [SPARK-38825][SQL][TEST][FOLLOWUP] Add test for in(null) and notIn(null) ### What changes were proposed in this pull request? Add test for filter `in(null)` and `notIn(null)` ### Why are the changes needed? to make tests more complete ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? new test Closes #36248 from huaxingao/inNotIn. Authored-by: huaxingao Signed-off-by: huaxingao (cherry picked from commit b760e4a686939bdb837402286b8d3d8b445c5ed4) Signed-off-by: huaxingao --- .../parquet/ParquetFilterSuite.scala | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 71ea474409c6f..7a09011f27c7e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -1905,21 +1905,33 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared test("SPARK-38825: in and notIn filters") { import testImplicits._ withTempPath { file => - Seq(1, 2, 0, -1, 99, 1000, 3, 7, 2).toDF("id").coalesce(1).write.mode("overwrite") + Seq(1, 2, 0, -1, 99, Integer.MAX_VALUE, 1000, 3, 7, Integer.MIN_VALUE, 2) + .toDF("id").coalesce(1).write.mode("overwrite") .parquet(file.getCanonicalPath) var df = spark.read.parquet(file.getCanonicalPath) - var in = df.filter(col("id").isin(100, 3, 11, 12, 13)) - var notIn = df.filter(!col("id").isin(100, 3, 11, 12, 13)) - checkAnswer(in, Seq(Row(3))) + var in = df.filter(col("id").isin(100, 3, 11, 12, 13, Integer.MAX_VALUE, Integer.MIN_VALUE)) + var notIn = + df.filter(!col("id").isin(100, 3, 11, 12, 13, Integer.MAX_VALUE, Integer.MIN_VALUE)) + checkAnswer(in, Seq(Row(3), Row(-2147483648), Row(2147483647))) checkAnswer(notIn, Seq(Row(1), Row(2), Row(0), Row(-1), Row(99), Row(1000), Row(7), Row(2))) - Seq("mary", "martin", "lucy", "alex", "mary", "dan").toDF("name").coalesce(1) + Seq("mary", "martin", "lucy", "alex", null, "mary", "dan").toDF("name").coalesce(1) .write.mode("overwrite").parquet(file.getCanonicalPath) df = spark.read.parquet(file.getCanonicalPath) in = df.filter(col("name").isin("mary", "victor", "leo", "alex")) notIn = df.filter(!col("name").isin("mary", "victor", "leo", "alex")) checkAnswer(in, Seq(Row("mary"), Row("alex"), Row("mary"))) checkAnswer(notIn, Seq(Row("martin"), Row("lucy"), Row("dan"))) + + in = df.filter(col("name").isin("mary", "victor", "leo", "alex", null)) + notIn = df.filter(!col("name").isin("mary", "victor", "leo", "alex", null)) + checkAnswer(in, Seq(Row("mary"), Row("alex"), Row("mary"))) + checkAnswer(notIn, Seq()) + + in = df.filter(col("name").isin(null)) + notIn = df.filter(!col("name").isin(null)) + checkAnswer(in, Seq()) + checkAnswer(notIn, Seq()) } } } From 74043ddd0d60111717a290902014b02e9e9972da Mon Sep 17 00:00:00 2001 From: William Hyun Date: Mon, 18 Apr 2022 22:57:45 -0700 Subject: [PATCH 159/535] [SPARK-38941][TESTS][SQL][3.3] Skip RocksDB-based test case in StreamingJoinSuite on Apple Silicon ### What changes were proposed in this pull request? This PR aims to skip RocksDB-based test case in `StreamingJoinSuite` on Apple Silicon. ### Why are the changes needed? Currently, it is broken on Apple Silicon. **BEFORE** ``` $ build/sbt "sql/testOnly org.apache.spark.sql.streaming.Streaming*JoinSuite" ... [info] Run completed in 2 minutes, 47 seconds. [info] Total number of tests run: 43 [info] Suites: completed 4, aborted 0 [info] Tests: succeeded 42, failed 1, canceled 0, ignored 0, pending 0 [info] *** 1 TEST FAILED *** [error] Failed tests: [error] org.apache.spark.sql.streaming.StreamingOuterJoinSuite [error] (sql / Test / testOnly) sbt.TestsFailedException: Tests unsuccessful ``` **AFTER** ``` $ build/sbt "sql/testOnly org.apache.spark.sql.streaming.Streaming*JoinSuite" ... [info] Run completed in 2 minutes, 52 seconds. [info] Total number of tests run: 42 [info] Suites: completed 4, aborted 0 [info] Tests: succeeded 42, failed 0, canceled 1, ignored 0, pending 0 [info] All tests passed. ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually test on Apple Silicon. Closes #36254 from williamhyun/SPARK-38941. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/streaming/StreamingJoinSuite.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala index 491b8da213e10..5b899453283cd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala @@ -1356,6 +1356,7 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite { test("SPARK-38684: outer join works correctly even if processing input rows and " + "evicting state rows for same grouping key happens in the same micro-batch") { + assume(!Utils.isMacOnAppleSilicon) // The test is to demonstrate the correctness issue in outer join before SPARK-38684. withSQLConf( From f7268008b139e2dcf6987432ddb33c4d4489a399 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 18 Apr 2022 23:02:30 -0700 Subject: [PATCH 160/535] [SPARK-38926][SQL][3.3] Output types in error messages in SQL style ### What changes were proposed in this pull request? In the PR, I propose to upper case SQL types in error messages similar to the SQL standard. I added new util functions `toSQLType()` to the trait `QueryErrorsBase`, and applied it in `Query.*Errors` (also modified tests in `Query.*ErrorsSuite`). For example: Before: ```sql Cannot up cast b.`b` from decimal(38,18) to bigint. ``` After: ```sql Cannot up cast b.`b` from DECIMAL(38,18) to BIGINT. ``` ### Why are the changes needed? To improve user experience with Spark SQL. The changes highlight SQL types in error massages and make them more visible for users. ### Does this PR introduce _any_ user-facing change? No since error classes haven't been released yet. ### How was this patch tested? By running the modified test suites: ``` $ build/sbt "test:testOnly *QueryParsingErrorsSuite" $ build/sbt "test:testOnly *QueryCompilationErrorsSuite" $ build/sbt "test:testOnly *QueryExecutionErrorsSuite" $ build/sbt "testOnly *CastSuite" $ build/sbt "testOnly *AnsiCastSuiteWithAnsiModeOn" $ build/sbt "testOnly *EncoderResolutionSuite" $ build/sbt "test:testOnly *DatasetSuite" $ build/sbt "test:testOnly *InsertSuite" ``` Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 0d16159bfa85ed346843e0952f37922a579c011e) Signed-off-by: Max Gekk Closes #36247 from MaxGekk/error-class-toSQLType-3.3. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../sql/errors/QueryCompilationErrors.scala | 6 +- .../spark/sql/errors/QueryErrorsBase.scala | 4 ++ .../sql/errors/QueryExecutionErrors.scala | 18 ++--- .../spark/sql/errors/QueryParsingErrors.scala | 11 +++- .../encoders/EncoderResolutionSuite.scala | 8 +-- .../expressions/AnsiCastSuiteBase.scala | 4 +- .../sql/catalyst/expressions/CastSuite.scala | 66 +++++++++---------- .../results/postgreSQL/float4.sql.out | 6 +- .../results/postgreSQL/float8.sql.out | 2 +- .../sql-tests/results/postgreSQL/int8.sql.out | 8 +-- .../org/apache/spark/sql/DatasetSuite.scala | 2 +- .../errors/QueryCompilationErrorsSuite.scala | 4 +- .../errors/QueryExecutionErrorsSuite.scala | 8 +-- .../spark/sql/sources/InsertSuite.scala | 8 +-- 14 files changed, 83 insertions(+), 72 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index f1357f91f9d2f..65b59655be07c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -46,7 +46,7 @@ import org.apache.spark.sql.types._ * As commands are executed eagerly, this also includes errors thrown during the execution of * commands, which users can see immediately. */ -object QueryCompilationErrors { +object QueryCompilationErrors extends QueryErrorsBase { def groupingIDMismatchError(groupingID: GroupingID, groupByExprs: Seq[Expression]): Throwable = { new AnalysisException( @@ -161,8 +161,8 @@ object QueryCompilationErrors { errorClass = "CANNOT_UP_CAST_DATATYPE", messageParameters = Array( fromStr, - from.dataType.catalogString, - to.catalogString, + toSQLType(from.dataType), + toSQLType(to), s"The type path of the target object is:\n" + walkedTypePath.mkString("", "\n", "\n") + "You can either add an explicit cast to the input data or choose a higher precision " + "type of the field in the target object" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala index e69e1382ecf62..7002f19f9fc84 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala @@ -44,4 +44,8 @@ trait QueryErrorsBase { def toSQLValue(v: Any, t: DataType): String = { litToErrorValue(Literal.create(v, t)) } + + def toSQLType(t: DataType): String = { + t.sql + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index fec4788c33307..970c5f22a5259 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -91,7 +91,7 @@ object QueryExecutionErrors extends QueryErrorsBase { def castingCauseOverflowError(t: Any, dataType: DataType): ArithmeticException = { new SparkArithmeticException(errorClass = "CAST_CAUSES_OVERFLOW", - messageParameters = Array(toSQLValue(t), dataType.catalogString, SQLConf.ANSI_ENABLED.key)) + messageParameters = Array(toSQLValue(t), toSQLType(dataType), SQLConf.ANSI_ENABLED.key)) } def cannotChangeDecimalPrecisionError( @@ -252,8 +252,7 @@ object QueryExecutionErrors extends QueryErrorsBase { new SparkRuntimeException( errorClass = "UNSUPPORTED_FEATURE", messageParameters = Array( - s"pivoting by the value '${v.toString}' of the column data type" + - s" '${dataType.catalogString}'.")) + s"pivoting by the value '${v.toString}' of the column data type ${toSQLType(dataType)}.")) } def noDefaultForDataTypeError(dataType: DataType): RuntimeException = { @@ -1651,8 +1650,8 @@ object QueryExecutionErrors extends QueryErrorsBase { new SparkUnsupportedOperationException( errorClass = "UNSUPPORTED_OPERATION", messageParameters = Array( - s"${TimestampType.catalogString} must supply timeZoneId parameter " + - s"while converting to ArrowType") + s"${toSQLType(TimestampType)} must supply timeZoneId parameter " + + s"while converting to the arrow timestamp type.") ) } @@ -1969,14 +1968,17 @@ object QueryExecutionErrors extends QueryErrorsBase { def cannotConvertOrcTimestampToTimestampNTZError(): Throwable = { new SparkUnsupportedOperationException( errorClass = "UNSUPPORTED_OPERATION", - messageParameters = Array("Unable to convert timestamp of Orc to data type 'timestamp_ntz'")) + messageParameters = Array( + s"Unable to convert ${toSQLType(TimestampType)} of Orc to " + + s"data type ${toSQLType(TimestampNTZType)}.")) } def cannotConvertOrcTimestampNTZToTimestampLTZError(): Throwable = { new SparkUnsupportedOperationException( errorClass = "UNSUPPORTED_OPERATION", - messageParameters = - Array("Unable to convert timestamp ntz of Orc to data type 'timestamp_ltz'")) + messageParameters = Array( + s"Unable to convert ${toSQLType(TimestampNTZType)} of Orc to " + + s"data type ${toSQLType(TimestampType)}.")) } def writePartitionExceedConfigSizeWhenDynamicPartitionError( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index e41c4cd9098ad..ad0973ccbb44d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -23,12 +23,13 @@ import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.catalyst.trees.Origin import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ +import org.apache.spark.sql.types.StringType /** * Object for grouping all error messages of the query parsing. * Currently it includes all ParseException. */ -object QueryParsingErrors { +object QueryParsingErrors extends QueryErrorsBase { def invalidInsertIntoError(ctx: InsertIntoContext): Throwable = { new ParseException("Invalid InsertIntoContext", ctx) @@ -301,8 +302,12 @@ object QueryParsingErrors { } def showFunctionsInvalidPatternError(pattern: String, ctx: ParserRuleContext): Throwable = { - new ParseException(s"Invalid pattern in SHOW FUNCTIONS: $pattern. It must be " + - "a string literal.", ctx) + new ParseException( + errorClass = "INVALID_SQL_SYNTAX", + messageParameters = Array( + s"Invalid pattern in SHOW FUNCTIONS: $pattern. " + + s"It must be a ${toSQLType(StringType)} literal."), + ctx) } def duplicateCteDefinitionNamesError(duplicateNames: String, ctx: CtesContext): Throwable = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala index 3a02c837aba3c..a96196669b3eb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala @@ -88,7 +88,7 @@ class EncoderResolutionSuite extends PlanTest { val attrs = Seq('arr.array(StringType)) assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == s""" - |Cannot up cast array element from string to bigint. + |Cannot up cast array element from STRING to BIGINT. |The type path of the target object is: |- array element class: "scala.Long" |- field (class: "scala.Array", name: "arr") @@ -211,7 +211,7 @@ class EncoderResolutionSuite extends PlanTest { val attrs = Seq(attr) assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == s""" - |Cannot up cast a from ${attr.dataType.catalogString} to string. + |Cannot up cast a from ${attr.dataType.sql} to STRING. |The type path of the target object is: |- root class: "java.lang.String" |You can either add an explicit cast to the input data or choose a higher precision type @@ -225,7 +225,7 @@ class EncoderResolutionSuite extends PlanTest { }.message assert(msg1 == s""" - |Cannot up cast b from bigint to int. + |Cannot up cast b from BIGINT to INT. |The type path of the target object is: |- field (class: "scala.Int", name: "b") |- root class: "org.apache.spark.sql.catalyst.encoders.StringIntClass" @@ -238,7 +238,7 @@ class EncoderResolutionSuite extends PlanTest { }.message assert(msg2 == s""" - |Cannot up cast b.`b` from decimal(38,18) to bigint. + |Cannot up cast b.`b` from DECIMAL(38,18) to BIGINT. |The type path of the target object is: |- field (class: "scala.Long", name: "b") |- field (class: "org.apache.spark.sql.catalyst.encoders.StringLongClass", name: "b") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala index 6494fb29fda59..785fd95692e9b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala @@ -279,7 +279,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { assert(negativeTs.getTime < 0) Seq(ByteType, ShortType, IntegerType).foreach { dt => checkExceptionInExpression[SparkArithmeticException]( - cast(negativeTs, dt), s"to ${dt.catalogString} causes overflow") + cast(negativeTs, dt), s"to ${dt.sql} causes overflow") } } } @@ -290,7 +290,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { assert(negativeTs.getTime < 0) Seq(ByteType, ShortType, IntegerType).foreach { dt => checkExceptionInExpression[SparkArithmeticException]( - cast(negativeTs, dt), s"to ${dt.catalogString} causes overflow") + cast(negativeTs, dt), s"to ${dt.sql} causes overflow") } val expectedSecs = Math.floorDiv(negativeTs.getTime, MILLIS_PER_SECOND) checkEvaluation(cast(negativeTs, LongType), expectedSecs) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index b6c347cfedb75..fe53dd0e5816a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -592,15 +592,15 @@ class CastSuite extends CastSuiteBase { val e1 = intercept[ArithmeticException] { Cast(Literal(Byte.MaxValue + 1), ByteType).eval() }.getMessage - assert(e1.contains("Casting 128 to tinyint causes overflow")) + assert(e1.contains("Casting 128 to TINYINT causes overflow")) val e2 = intercept[ArithmeticException] { Cast(Literal(Short.MaxValue + 1), ShortType).eval() }.getMessage - assert(e2.contains("Casting 32768 to smallint causes overflow")) + assert(e2.contains("Casting 32768 to SMALLINT causes overflow")) val e3 = intercept[ArithmeticException] { Cast(Literal(Int.MaxValue + 1L), IntegerType).eval() }.getMessage - assert(e3.contains("Casting 2147483648L to int causes overflow")) + assert(e3.contains("Casting 2147483648L to INT causes overflow")) } } @@ -642,15 +642,15 @@ class CastSuite extends CastSuiteBase { checkEvaluation(cast(v2, LongType), 25L) case MINUTE => checkExceptionInExpression[ArithmeticException](cast(v2, ByteType), - s"Casting $v2 to tinyint causes overflow") + s"Casting $v2 to TINYINT causes overflow") checkEvaluation(cast(v2, ShortType), (MINUTES_PER_HOUR * 25 + 1).toShort) checkEvaluation(cast(v2, IntegerType), (MINUTES_PER_HOUR * 25 + 1).toInt) checkEvaluation(cast(v2, LongType), MINUTES_PER_HOUR * 25 + 1) case SECOND => checkExceptionInExpression[ArithmeticException](cast(v2, ByteType), - s"Casting $v2 to tinyint causes overflow") + s"Casting $v2 to TINYINT causes overflow") checkExceptionInExpression[ArithmeticException](cast(v2, ShortType), - s"Casting $v2 to smallint causes overflow") + s"Casting $v2 to SMALLINT causes overflow") checkEvaluation(cast(v2, IntegerType), num.toInt) checkEvaluation(cast(v2, LongType), num) } @@ -659,34 +659,34 @@ class CastSuite extends CastSuiteBase { dt.endField match { case DAY => checkExceptionInExpression[ArithmeticException](cast(v3, ByteType), - s"Casting $v3 to tinyint causes overflow") + s"Casting $v3 to TINYINT causes overflow") checkExceptionInExpression[ArithmeticException](cast(v3, ShortType), - s"Casting $v3 to smallint causes overflow") + s"Casting $v3 to SMALLINT causes overflow") checkEvaluation(cast(v3, IntegerType), (Long.MaxValue / MICROS_PER_DAY).toInt) checkEvaluation(cast(v3, LongType), Long.MaxValue / MICROS_PER_DAY) case HOUR => checkExceptionInExpression[ArithmeticException](cast(v3, ByteType), - s"Casting $v3 to tinyint causes overflow") + s"Casting $v3 to TINYINT causes overflow") checkExceptionInExpression[ArithmeticException](cast(v3, ShortType), - s"Casting $v3 to smallint causes overflow") + s"Casting $v3 to SMALLINT causes overflow") checkExceptionInExpression[ArithmeticException](cast(v3, IntegerType), - s"Casting $v3 to int causes overflow") + s"Casting $v3 to INT causes overflow") checkEvaluation(cast(v3, LongType), Long.MaxValue / MICROS_PER_HOUR) case MINUTE => checkExceptionInExpression[ArithmeticException](cast(v3, ByteType), - s"Casting $v3 to tinyint causes overflow") + s"Casting $v3 to TINYINT causes overflow") checkExceptionInExpression[ArithmeticException](cast(v3, ShortType), - s"Casting $v3 to smallint causes overflow") + s"Casting $v3 to SMALLINT causes overflow") checkExceptionInExpression[ArithmeticException](cast(v3, IntegerType), - s"Casting $v3 to int causes overflow") + s"Casting $v3 to INT causes overflow") checkEvaluation(cast(v3, LongType), Long.MaxValue / MICROS_PER_MINUTE) case SECOND => checkExceptionInExpression[ArithmeticException](cast(v3, ByteType), - s"Casting $v3 to tinyint causes overflow") + s"Casting $v3 to TINYINT causes overflow") checkExceptionInExpression[ArithmeticException](cast(v3, ShortType), - s"Casting $v3 to smallint causes overflow") + s"Casting $v3 to SMALLINT causes overflow") checkExceptionInExpression[ArithmeticException](cast(v3, IntegerType), - s"Casting $v3 to int causes overflow") + s"Casting $v3 to INT causes overflow") checkEvaluation(cast(v3, LongType), Long.MaxValue / MICROS_PER_SECOND) } @@ -694,34 +694,34 @@ class CastSuite extends CastSuiteBase { dt.endField match { case DAY => checkExceptionInExpression[ArithmeticException](cast(v4, ByteType), - s"Casting $v4 to tinyint causes overflow") + s"Casting $v4 to TINYINT causes overflow") checkExceptionInExpression[ArithmeticException](cast(v4, ShortType), - s"Casting $v4 to smallint causes overflow") + s"Casting $v4 to SMALLINT causes overflow") checkEvaluation(cast(v4, IntegerType), (Long.MinValue / MICROS_PER_DAY).toInt) checkEvaluation(cast(v4, LongType), Long.MinValue / MICROS_PER_DAY) case HOUR => checkExceptionInExpression[ArithmeticException](cast(v4, ByteType), - s"Casting $v4 to tinyint causes overflow") + s"Casting $v4 to TINYINT causes overflow") checkExceptionInExpression[ArithmeticException](cast(v4, ShortType), - s"Casting $v4 to smallint causes overflow") + s"Casting $v4 to SMALLINT causes overflow") checkExceptionInExpression[ArithmeticException](cast(v4, IntegerType), - s"Casting $v4 to int causes overflow") + s"Casting $v4 to INT causes overflow") checkEvaluation(cast(v4, LongType), Long.MinValue / MICROS_PER_HOUR) case MINUTE => checkExceptionInExpression[ArithmeticException](cast(v4, ByteType), - s"Casting $v4 to tinyint causes overflow") + s"Casting $v4 to TINYINT causes overflow") checkExceptionInExpression[ArithmeticException](cast(v4, ShortType), - s"Casting $v4 to smallint causes overflow") + s"Casting $v4 to SMALLINT causes overflow") checkExceptionInExpression[ArithmeticException](cast(v4, IntegerType), - s"Casting $v4 to int causes overflow") + s"Casting $v4 to INT causes overflow") checkEvaluation(cast(v4, LongType), Long.MinValue / MICROS_PER_MINUTE) case SECOND => checkExceptionInExpression[ArithmeticException](cast(v4, ByteType), - s"Casting $v4 to tinyint causes overflow") + s"Casting $v4 to TINYINT causes overflow") checkExceptionInExpression[ArithmeticException](cast(v4, ShortType), - s"Casting $v4 to smallint causes overflow") + s"Casting $v4 to SMALLINT causes overflow") checkExceptionInExpression[ArithmeticException](cast(v4, IntegerType), - s"Casting $v4 to int causes overflow") + s"Casting $v4 to INT causes overflow") checkEvaluation(cast(v4, LongType), Long.MinValue / MICROS_PER_SECOND) } } @@ -777,7 +777,7 @@ class CastSuite extends CastSuiteBase { ).foreach { case (v, toType) => checkExceptionInExpression[ArithmeticException](cast(v, toType), - s"Casting $v to ${toType.catalogString} causes overflow") + s"Casting $v to ${toType.sql} causes overflow") } Seq( @@ -792,7 +792,7 @@ class CastSuite extends CastSuiteBase { ).foreach { case (v, toType) => checkExceptionInExpression[ArithmeticException](cast(v, toType), - s"Casting ${v}L to ${toType.catalogString} causes overflow") + s"Casting ${v}L to ${toType.sql} causes overflow") } } @@ -829,7 +829,7 @@ class CastSuite extends CastSuiteBase { case (v, dt, toType) => val value = Literal.create(v, dt) checkExceptionInExpression[ArithmeticException](cast(value, toType), - s"Casting $value to ${toType.catalogString} causes overflow") + s"Casting $value to ${toType.sql} causes overflow") } Seq( @@ -887,7 +887,7 @@ class CastSuite extends CastSuiteBase { ).foreach { case (v, toType) => checkExceptionInExpression[ArithmeticException](cast(v, toType), - s"Casting $v to ${toType.catalogString} causes overflow") + s"Casting $v to ${toType.sql} causes overflow") } Seq( @@ -898,7 +898,7 @@ class CastSuite extends CastSuiteBase { ).foreach { case (v, toType) => checkExceptionInExpression[ArithmeticException](cast(v, toType), - s"Casting ${v}L to ${toType.catalogString} causes overflow") + s"Casting ${v}L to ${toType.sql} causes overflow") } } } diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out index 39636e02159eb..a7ee7400e58bd 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out @@ -340,7 +340,7 @@ SELECT int(float('2147483647')) struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting 2.14748365E9 to int causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Casting 2.14748365E9 to INT causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -357,7 +357,7 @@ SELECT int(float('-2147483900')) struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting -2.1474839E9 to int causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Casting -2.1474839E9 to INT causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -390,7 +390,7 @@ SELECT bigint(float('-9223380000000000000')) struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting -9.22338E18 to bigint causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Casting -9.22338E18 to BIGINT causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out index b2f61306c7c7c..3237969ea8736 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out @@ -845,7 +845,7 @@ SELECT bigint(double('-9223372036854780000')) struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting -9.22337203685478E18D to bigint causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Casting -9.22337203685478E18D to BIGINT causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out index b7185fcbf1fea..be1fce4b41e7c 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out @@ -619,7 +619,7 @@ SELECT CAST(q1 AS int) FROM int8_tbl WHERE q2 <> 456 struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting 4567890123456789L to int causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Casting 4567890123456789L to INT causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -636,7 +636,7 @@ SELECT CAST(q1 AS smallint) FROM int8_tbl WHERE q2 <> 456 struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting 4567890123456789L to smallint causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Casting 4567890123456789L to SMALLINT causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -673,7 +673,7 @@ SELECT CAST(double('922337203685477580700.0') AS bigint) struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting 9.223372036854776E20D to bigint causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Casting 9.223372036854776E20D to BIGINT causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -745,7 +745,7 @@ SELECT string(int(shiftleft(bigint(-1), 63))+1) struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting -9223372036854775808L to int causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Casting -9223372036854775808L to INT causes overflow. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index cbdf31a6eaf1b..f3dccc224a72a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -1951,7 +1951,7 @@ class DatasetSuite extends QueryTest .map(b => b - 1) .collect() } - assert(thrownException.message.contains("Cannot up cast id from bigint to tinyint")) + assert(thrownException.message.contains("Cannot up cast id from BIGINT to TINYINT")) } test("SPARK-26690: checkpoints should be executed with an execution id") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index 02f639008aac4..cac1ef67fac40 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -37,7 +37,7 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession { }.message assert(msg1 === s""" - |Cannot up cast b from bigint to int. + |Cannot up cast b from BIGINT to INT. |The type path of the target object is: |- field (class: "scala.Int", name: "b") |- root class: "org.apache.spark.sql.errors.StringIntClass" @@ -51,7 +51,7 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession { }.message assert(msg2 === s""" - |Cannot up cast b.`b` from decimal(38,18) to bigint. + |Cannot up cast b.`b` from DECIMAL(38,18) to BIGINT. |The type path of the target object is: |- field (class: "scala.Long", name: "b") |- field (class: "org.apache.spark.sql.errors.StringLongClass", name: "b") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala index a7625e17b4ae6..f73d1e1c3c5b1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala @@ -142,7 +142,7 @@ class QueryExecutionErrorsSuite extends QueryTest .collect() } assert(e2.getMessage === "The feature is not supported: pivoting by the value" + - """ '[dotnet,Dummies]' of the column data type 'struct'.""") + """ '[dotnet,Dummies]' of the column data type STRUCT.""") } test("UNSUPPORTED_FEATURE: unsupported pivot operations") { @@ -236,7 +236,7 @@ class QueryExecutionErrorsSuite extends QueryTest assert(e.getErrorClass === "UNSUPPORTED_OPERATION") assert(e.getMessage === "The operation is not supported: " + - "timestamp must supply timeZoneId parameter while converting to ArrowType") + "TIMESTAMP must supply timeZoneId parameter while converting to the arrow timestamp type.") } test("UNSUPPORTED_OPERATION - SPARK-36346: can't read Timestamp as TimestampNTZ") { @@ -249,7 +249,7 @@ class QueryExecutionErrorsSuite extends QueryTest assert(e.getErrorClass === "UNSUPPORTED_OPERATION") assert(e.getMessage === "The operation is not supported: " + - "Unable to convert timestamp of Orc to data type 'timestamp_ntz'") + "Unable to convert TIMESTAMP of Orc to data type TIMESTAMP_NTZ.") } } } @@ -264,7 +264,7 @@ class QueryExecutionErrorsSuite extends QueryTest assert(e.getErrorClass === "UNSUPPORTED_OPERATION") assert(e.getMessage === "The operation is not supported: " + - "Unable to convert timestamp ntz of Orc to data type 'timestamp_ltz'") + "Unable to convert TIMESTAMP_NTZ of Orc to data type TIMESTAMP.") } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index aca0675e260e0..52d03e6f956b7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -713,13 +713,13 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { var msg = intercept[SparkException] { sql(s"insert into t values($outOfRangeValue1)") }.getCause.getMessage - assert(msg.contains(s"Casting ${outOfRangeValue1}L to int causes overflow")) + assert(msg.contains(s"Casting ${outOfRangeValue1}L to INT causes overflow")) val outOfRangeValue2 = (Int.MinValue - 1L).toString msg = intercept[SparkException] { sql(s"insert into t values($outOfRangeValue2)") }.getCause.getMessage - assert(msg.contains(s"Casting ${outOfRangeValue2}L to int causes overflow")) + assert(msg.contains(s"Casting ${outOfRangeValue2}L to INT causes overflow")) } } } @@ -733,13 +733,13 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { var msg = intercept[SparkException] { sql(s"insert into t values(${outOfRangeValue1}D)") }.getCause.getMessage - assert(msg.contains(s"Casting ${outOfRangeValue1}D to bigint causes overflow")) + assert(msg.contains(s"Casting ${outOfRangeValue1}D to BIGINT causes overflow")) val outOfRangeValue2 = Math.nextDown(Long.MinValue) msg = intercept[SparkException] { sql(s"insert into t values(${outOfRangeValue2}D)") }.getCause.getMessage - assert(msg.contains(s"Casting ${outOfRangeValue2}D to bigint causes overflow")) + assert(msg.contains(s"Casting ${outOfRangeValue2}D to BIGINT causes overflow")) } } } From bb5e3aa0e6326305a788d189d2a9cc813000bd1c Mon Sep 17 00:00:00 2001 From: William Hyun Date: Mon, 18 Apr 2022 23:50:09 -0700 Subject: [PATCH 161/535] [SPARK-38942][TESTS][SQL][3.3] Skip RocksDB-based test case in FlatMapGroupsWithStateSuite on Apple Silicon ### What changes were proposed in this pull request? This PR aims to skip RocksDB-based test case in FlatMapGroupsWithStateSuite on Apple Silicon. ### Why are the changes needed? Currently, it is broken on Apple Silicon. **BEFORE** ``` $ build/sbt "sql/testOnly org.apache.spark.sql.streaming.FlatMapGroupsWithStateSuite" ... [info] *** 1 TEST FAILED *** [error] Failed tests: [error] org.apache.spark.sql.streaming.FlatMapGroupsWithStateSuite [error] (sql / Test / testOnly) sbt.TestsFailedException: Tests unsuccessful ``` **AFTER** ``` $ build/sbt "sql/testOnly org.apache.spark.sql.streaming.FlatMapGroupsWithStateSuite" ... [info] Run completed in 32 seconds, 692 milliseconds. [info] Total number of tests run: 105 [info] Suites: completed 1, aborted 0 [info] Tests: succeeded 105, failed 0, canceled 1, ignored 0, pending 0 [info] All tests passed. ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Test manually on Apple Silicon. Closes #36256 from williamhyun/SPARK-38942. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun --- .../apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala index 9d34ceea8dd47..7012dec91ecf7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala @@ -1521,6 +1521,7 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest { } test("SPARK-38320 - flatMapGroupsWithState state with data should not timeout") { + assume(!Utils.isMacOnAppleSilicon) withTempDir { dir => withSQLConf( (SQLConf.STREAMING_NO_DATA_MICRO_BATCHES_ENABLED.key -> "false"), From 8811e8caaa8540d1fa05fb77152043addc607b82 Mon Sep 17 00:00:00 2001 From: Yun Tang Date: Tue, 19 Apr 2022 20:31:04 +0900 Subject: [PATCH 162/535] [SPARK-38931][SS] Create root dfs directory for RocksDBFileManager with unknown number of keys on 1st checkpoint ### What changes were proposed in this pull request? Create root dfs directory for RocksDBFileManager with unknown number of keys on 1st checkpoint. ### Why are the changes needed? If this fix is not introduced, we might meet exception below: ~~~java File /private/var/folders/rk/wyr101_562ngn8lp7tbqt7_00000gp/T/spark-ce4a0607-b1d8-43b8-becd-638c6b030019/state/1/1 does not exist java.io.FileNotFoundException: File /private/var/folders/rk/wyr101_562ngn8lp7tbqt7_00000gp/T/spark-ce4a0607-b1d8-43b8-becd-638c6b030019/state/1/1 does not exist at org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:779) at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:1100) at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:769) at org.apache.hadoop.fs.DelegateToFileSystem.getFileStatus(DelegateToFileSystem.java:128) at org.apache.hadoop.fs.DelegateToFileSystem.createInternal(DelegateToFileSystem.java:93) at org.apache.hadoop.fs.ChecksumFs$ChecksumFSOutputSummer.(ChecksumFs.java:353) at org.apache.hadoop.fs.ChecksumFs.createInternal(ChecksumFs.java:400) at org.apache.hadoop.fs.AbstractFileSystem.create(AbstractFileSystem.java:626) at org.apache.hadoop.fs.FileContext$3.next(FileContext.java:701) at org.apache.hadoop.fs.FileContext$3.next(FileContext.java:697) at org.apache.hadoop.fs.FSLinkResolver.resolve(FSLinkResolver.java:90) at org.apache.hadoop.fs.FileContext.create(FileContext.java:703) at org.apache.spark.sql.execution.streaming.FileContextBasedCheckpointFileManager.createTempFile(CheckpointFileManager.scala:327) at org.apache.spark.sql.execution.streaming.CheckpointFileManager$RenameBasedFSDataOutputStream.(CheckpointFileManager.scala:140) at org.apache.spark.sql.execution.streaming.CheckpointFileManager$RenameBasedFSDataOutputStream.(CheckpointFileManager.scala:143) at org.apache.spark.sql.execution.streaming.FileContextBasedCheckpointFileManager.createAtomic(CheckpointFileManager.scala:333) at org.apache.spark.sql.execution.streaming.state.RocksDBFileManager.zipToDfsFile(RocksDBFileManager.scala:438) at org.apache.spark.sql.execution.streaming.state.RocksDBFileManager.saveCheckpointToDfs(RocksDBFileManager.scala:174) at org.apache.spark.sql.execution.streaming.state.RocksDBSuite.saveCheckpointFiles(RocksDBSuite.scala:566) at org.apache.spark.sql.execution.streaming.state.RocksDBSuite.$anonfun$new$35(RocksDBSuite.scala:179) ........ ~~~ ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Tested via RocksDBSuite. Closes #36242 from Myasuka/SPARK-38931. Authored-by: Yun Tang Signed-off-by: Jungtaek Lim (cherry picked from commit abb1df9d190e35a17b693f2b013b092af4f2528a) Signed-off-by: Jungtaek Lim --- .../streaming/state/RocksDBFileManager.scala | 4 +++- .../streaming/state/RocksDBSuite.scala | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala index 4f2ce9b123707..26084747c3240 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala @@ -161,11 +161,13 @@ class RocksDBFileManager( metadata.writeToFile(metadataFile) logInfo(s"Written metadata for version $version:\n${metadata.prettyJson}") - if (version <= 1 && numKeys == 0) { + if (version <= 1 && numKeys <= 0) { // If we're writing the initial version and there's no data, we have to explicitly initialize // the root directory. Normally saveImmutableFilesToDfs will do this initialization, but // when there's no data that method won't write any files, and zipToDfsFile uses the // CheckpointFileManager.createAtomic API which doesn't auto-initialize parent directories. + // Moreover, once we disable to track the number of keys, in which the numKeys is -1, we + // still need to create the initial dfs root directory anyway. val path = new Path(dfsRootDir) if (!fm.exists(path)) fm.mkdirs(path) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala index 91cd91b639a3b..75717d2768726 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala @@ -170,6 +170,25 @@ class RocksDBSuite extends SparkFunSuite { } } + test("RocksDBFileManager: create init dfs directory with unknown number of keys") { + val dfsRootDir = new File(Utils.createTempDir().getAbsolutePath + "/state/1/1") + try { + val verificationDir = Utils.createTempDir().getAbsolutePath + val fileManager = new RocksDBFileManager( + dfsRootDir.getAbsolutePath, Utils.createTempDir(), new Configuration) + // Save a version of empty checkpoint files + val cpFiles = Seq() + generateFiles(verificationDir, cpFiles) + assert(!dfsRootDir.exists()) + saveCheckpointFiles(fileManager, cpFiles, version = 1, numKeys = -1) + // The dfs root dir is created even with unknown number of keys + assert(dfsRootDir.exists()) + loadAndVerifyCheckpointFiles(fileManager, verificationDir, version = 1, Nil, -1) + } finally { + Utils.deleteRecursively(dfsRootDir) + } + } + test("RocksDBFileManager: upload only new immutable files") { withTempDir { dir => val dfsRootDir = dir.getAbsolutePath From fb58c3e507113e2e9e398cb77703e54603bfa29a Mon Sep 17 00:00:00 2001 From: itholic Date: Wed, 20 Apr 2022 10:49:07 +0900 Subject: [PATCH 163/535] [SPARK-38828][PYTHON] Remove TimestampNTZ type Python support in Spark 3.3 This PR proposes to remove `TimestampNTZ` type Python support in Spark 3.3 from documentation and `pyspark.sql.types` module. The purpose of this PR is just hide `TimestampNTZ` type from end-users. Because the `TimestampNTZ` project is not finished yet: - Lack Hive metastore support - Lack JDBC support - Need to spend time scanning the codebase to find out any missing support. The current code usages of TimestampType are larger than TimestampNTZType No. The existing tests should cover. Closes #36255 from itholic/SPARK-38828. Authored-by: itholic Signed-off-by: Hyukjin Kwon (cherry picked from commit 581000de24377ca373df7fa94b214baa7e9b0462) Signed-off-by: Hyukjin Kwon --- python/docs/source/reference/pyspark.sql.rst | 1 - python/pyspark/sql/types.py | 1 - 2 files changed, 2 deletions(-) diff --git a/python/docs/source/reference/pyspark.sql.rst b/python/docs/source/reference/pyspark.sql.rst index 1d34961a91a61..adc1958822ed4 100644 --- a/python/docs/source/reference/pyspark.sql.rst +++ b/python/docs/source/reference/pyspark.sql.rst @@ -302,7 +302,6 @@ Data Types StringType StructField StructType - TimestampNTZType TimestampType DayTimeIntervalType diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 23e54eb8889d9..2a41508d634d0 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -59,7 +59,6 @@ "BooleanType", "DateType", "TimestampType", - "TimestampNTZType", "DecimalType", "DoubleType", "FloatType", From 2b3df38b430b92e4a8392854988f071b795d543c Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Wed, 20 Apr 2022 11:02:58 +0900 Subject: [PATCH 164/535] [SPARK-37613][SQL][FOLLOWUP] Supplement docs for regr_count ### What changes were proposed in this pull request? https://github.com/apache/spark/pull/34880 supported ANSI Aggregate Function: regr_count. But the docs of regr_count is not good enough. ### Why are the changes needed? Make the docs of regr_count more detailed. ### Does this PR introduce _any_ user-facing change? 'No'. New feature. ### How was this patch tested? N/A Closes #36258 from beliefer/SPARK-37613_followup. Authored-by: Jiaan Geng Signed-off-by: Hyukjin Kwon (cherry picked from commit 1b106ea32d567dd32ac697ed0d6cfd40ea7e6e08) Signed-off-by: Hyukjin Kwon --- .../catalyst/expressions/aggregate/linearRegression.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala index 7463ef59c78f7..ce37e69d9fd96 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala @@ -22,10 +22,9 @@ import org.apache.spark.sql.catalyst.expressions.{And, Expression, ExpressionDes import org.apache.spark.sql.catalyst.trees.BinaryLike import org.apache.spark.sql.types.{AbstractDataType, DoubleType, NumericType} +// scalastyle:off line.size.limit @ExpressionDescription( - usage = """ - _FUNC_(expr) - Returns the number of non-null number pairs in a group. - """, + usage = "_FUNC_(y, x) - Returns the number of non-null number pairs in a group, where `y` is the dependent variable and `x` is the independent variable.", examples = """ Examples: > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x); @@ -37,6 +36,7 @@ import org.apache.spark.sql.types.{AbstractDataType, DoubleType, NumericType} """, group = "agg_funcs", since = "3.3.0") +// scalastyle:on line.size.limit case class RegrCount(left: Expression, right: Expression) extends AggregateFunction with RuntimeReplaceableAggregate From 27c75eae92333add3ba6854b6c46410ec8e6743f Mon Sep 17 00:00:00 2001 From: Xinyi Yu Date: Wed, 20 Apr 2022 10:48:00 +0800 Subject: [PATCH 165/535] [SPARK-37575][SQL][FOLLOWUP] Update the migration guide for added legacy flag for the breaking change of write null value in csv to unquoted empty string ### What changes were proposed in this pull request? This is a follow-up of updating the migration guide for https://github.com/apache/spark/pull/36110 which adds a legacy flag to restore the pre-change behavior. It also fixes a typo in the previous flag description. ### Why are the changes needed? The flag needs to be documented. ### Does this PR introduce _any_ user-facing change? It changes the migration doc for users. ### How was this patch tested? No tests Closes #36268 from anchovYu/flags-null-to-csv-migration-guide. Authored-by: Xinyi Yu Signed-off-by: Wenchen Fan (cherry picked from commit a67acbaa29d1ab9071910cac09323c2544d65303) Signed-off-by: Wenchen Fan --- docs/sql-migration-guide.md | 2 +- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 607100b0850b7..b6bfb0ed2be1e 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -54,7 +54,7 @@ license: | - Since Spark 3.3, the `strfmt` in `format_string(strfmt, obj, ...)` and `printf(strfmt, obj, ...)` will no longer support to use "0$" to specify the first argument, the first argument should always reference by "1$" when use argument index to indicating the position of the argument in the argument list. - - Since Spark 3.3, nulls are written as empty strings in CSV data source by default. In Spark 3.2 or earlier, nulls were written as empty strings as quoted empty strings, `""`. To restore the previous behavior, set `nullValue` to `""`. + - Since Spark 3.3, nulls are written as empty strings in CSV data source by default. In Spark 3.2 or earlier, nulls were written as empty strings as quoted empty strings, `""`. To restore the previous behavior, set `nullValue` to `""`, or set the configuration `spark.sql.legacy.nullValueWrittenAsQuotedEmptyStringCsv` to `true`. - Since Spark 3.3, DESCRIBE FUNCTION fails if the function does not exist. In Spark 3.2 or earlier, DESCRIBE FUNCTION can still run and print "Function: func_name not found". diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 5f803ed963beb..e8d99a2d44d72 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3728,7 +3728,7 @@ object SQLConf { buildConf("spark.sql.legacy.nullValueWrittenAsQuotedEmptyStringCsv") .internal() .doc("When set to false, nulls are written as unquoted empty strings in CSV data source. " + - "If set to false, it restores the legacy behavior that nulls were written as quoted " + + "If set to true, it restores the legacy behavior that nulls were written as quoted " + "empty strings, `\"\"`.") .version("3.3.0") .booleanConf From 83a365edf163bdd30974756c6c58fdca2e16f7f3 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Wed, 20 Apr 2022 14:38:26 +0800 Subject: [PATCH 166/535] [SPARK-38922][CORE] TaskLocation.apply throw NullPointerException ### What changes were proposed in this pull request? TaskLocation.apply w/o NULL check may throw NPE and fail job scheduling ``` Caused by: java.lang.NullPointerException at scala.collection.immutable.StringLike$class.stripPrefix(StringLike.scala:155) at scala.collection.immutable.StringOps.stripPrefix(StringOps.scala:29) at org.apache.spark.scheduler.TaskLocation$.apply(TaskLocation.scala:71) at org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$getPreferredLocsInternal ``` For instance, `org.apache.spark.rdd.HadoopRDD#convertSplitLocationInfo` might generate unexpected `Some(null)` elements where should be replace by `Option.apply` ### Why are the changes needed? fix NPE ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new tests Closes #36222 from yaooqinn/SPARK-38922. Authored-by: Kent Yao Signed-off-by: Kent Yao (cherry picked from commit 33e07f3cd926105c6d28986eb6218f237505549e) Signed-off-by: Kent Yao --- .../org/apache/spark/rdd/HadoopRDD.scala | 2 +- .../apache/spark/scheduler/DAGScheduler.scala | 2 +- .../org/apache/spark/rdd/HadoopRDDSuite.scala | 30 +++++++++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 core/src/test/scala/org/apache/spark/rdd/HadoopRDDSuite.scala diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala index fcc2275585e83..0d905b46953c0 100644 --- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala @@ -454,7 +454,7 @@ private[spark] object HadoopRDD extends Logging { infos: Array[SplitLocationInfo]): Option[Seq[String]] = { Option(infos).map(_.flatMap { loc => val locationStr = loc.getLocation - if (locationStr != "localhost") { + if (locationStr != null && locationStr != "localhost") { if (loc.isInMemory) { logDebug(s"Partition $locationStr is cached by Hadoop.") Some(HDFSCacheTaskLocation(locationStr).toString) diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index ffaabba71e8cc..ea3a333b19e2c 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -2736,7 +2736,7 @@ private[spark] class DAGScheduler( // If the RDD has some placement preferences (as is the case for input RDDs), get those val rddPrefs = rdd.preferredLocations(rdd.partitions(partition)).toList if (rddPrefs.nonEmpty) { - return rddPrefs.map(TaskLocation(_)) + return rddPrefs.filter(_ != null).map(TaskLocation(_)) } // If the RDD has narrow dependencies, pick the first partition of the first narrow dependency diff --git a/core/src/test/scala/org/apache/spark/rdd/HadoopRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/HadoopRDDSuite.scala new file mode 100644 index 0000000000000..b43d76c114c9a --- /dev/null +++ b/core/src/test/scala/org/apache/spark/rdd/HadoopRDDSuite.scala @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.rdd + +import org.apache.hadoop.mapred.SplitLocationInfo + +import org.apache.spark.SparkFunSuite + +class HadoopRDDSuite extends SparkFunSuite { + + test("SPARK-38922: HadoopRDD convertSplitLocationInfo contains Some(null) cause NPE") { + val locs = Array(new SplitLocationInfo(null, false)) + assert(HadoopRDD.convertSplitLocationInfo(locs).get.isEmpty) + } +} From 9d0650a1a7fd61611be525a7c263bcfc54b7ad25 Mon Sep 17 00:00:00 2001 From: Xinyi Yu Date: Wed, 20 Apr 2022 13:08:38 +0300 Subject: [PATCH 167/535] [SPARK-38929][SQL][3.3] Improve error messages for cast failures in ANSI ### What changes were proposed in this pull request? Improve the error messages for cast failures in ANSI. As mentioned in https://issues.apache.org/jira/browse/SPARK-38929, this PR targets two cast-to types: numeric types and date types. * For numeric(`int`, `smallint`, `double`, `float`, `decimal` ..) types, it embeds the cast-to types in the error message. For example, ``` Invalid input value for type INT: '1.0'. To return NULL instead, use 'try_cast'. If necessary set %s to false to bypass this error. ``` It uses the `toSQLType` and `toSQLValue` to wrap the corresponding types and literals. * For date types, it does similarly as above. For example, ``` Invalid input value for type TIMESTAMP: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. ``` ### Why are the changes needed? To improve the error message in general. ### Does this PR introduce _any_ user-facing change? It changes the error messages. ### How was this patch tested? The related unit tests are updated. Authored-by: Xinyi Yu Signed-off-by: Max Gekk (cherry picked from commit f76b3e766f79b4c2d4f1ecffaad25aeb962336b7) Closes #36275 from anchovYu/ansi-error-improve-3.3. Authored-by: Xinyi Yu Signed-off-by: Max Gekk --- .../main/resources/error/error-classes.json | 8 +- .../spark/sql/catalyst/expressions/Cast.scala | 17 +- .../sql/catalyst/util/UTF8StringUtils.scala | 13 +- .../sql/errors/QueryExecutionErrors.scala | 16 +- .../org/apache/spark/sql/types/Decimal.scala | 7 +- .../expressions/AnsiCastSuiteBase.scala | 58 +++--- .../catalyst/expressions/TryCastSuite.scala | 3 +- .../catalyst/util/DateFormatterSuite.scala | 2 +- .../util/TimestampFormatterSuite.scala | 2 +- .../apache/spark/sql/types/DecimalSuite.scala | 3 +- .../test/resources/sql-tests/inputs/cast.sql | 10 +- .../sql-tests/results/ansi/cast.sql.out | 170 +++++++++++++----- .../sql-tests/results/ansi/date.sql.out | 10 +- .../ansi/datetime-parsing-invalid.sql.out | 4 +- .../sql-tests/results/ansi/interval.sql.out | 20 +-- .../results/ansi/string-functions.sql.out | 16 +- .../resources/sql-tests/results/cast.sql.out | 50 +++++- .../results/postgreSQL/float4.sql.out | 8 +- .../results/postgreSQL/float8.sql.out | 8 +- .../sql-tests/results/postgreSQL/text.sql.out | 8 +- .../results/postgreSQL/window_part2.sql.out | 4 +- .../results/postgreSQL/window_part3.sql.out | 2 +- .../results/postgreSQL/window_part4.sql.out | 2 +- .../results/string-functions.sql.out | 2 +- .../timestampNTZ/timestamp-ansi.sql.out | 2 +- .../apache/spark/sql/SQLInsertTestSuite.scala | 2 +- 26 files changed, 297 insertions(+), 150 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index a0fa042fd4828..09ceca1414fcb 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -105,10 +105,6 @@ "message" : [ "The fraction of sec must be zero. Valid range is [0, 60]. If necessary set %s to false to bypass this error. " ], "sqlState" : "22023" }, - "INVALID_INPUT_SYNTAX_FOR_NUMERIC_TYPE" : { - "message" : [ "invalid input syntax for type numeric: %s. To return NULL instead, use 'try_cast'. If necessary set %s to false to bypass this error.%s" ], - "sqlState" : "42000" - }, "INVALID_JSON_SCHEMA_MAPTYPE" : { "message" : [ "Input schema %s can only contain StringType as a key type for a MapType." ] }, @@ -120,6 +116,10 @@ "message" : [ "Invalid SQL syntax: %s" ], "sqlState" : "42000" }, + "INVALID_SYNTAX_FOR_CAST" : { + "message" : [ "Invalid input syntax for type %s: %s. To return NULL instead, use 'try_cast'. If necessary set %s to false to bypass this error.%s" ], + "sqlState" : "42000" + }, "MAP_KEY_DOES_NOT_EXIST" : { "message" : [ "Key %s does not exist. If necessary set %s to false to bypass this error.%s" ] }, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index e522c211cb228..865202caa5fc2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -816,7 +816,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit }) case StringType if ansiEnabled => buildCast[UTF8String](_, - s => changePrecision(Decimal.fromStringANSI(s, origin.context), target)) + s => changePrecision(Decimal.fromStringANSI(s, target, origin.context), target)) case BooleanType => buildCast[Boolean](_, b => toPrecision(if (b) Decimal.ONE else Decimal.ZERO, target)) case DateType => @@ -845,7 +845,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case _: NumberFormatException => val d = Cast.processFloatingPointSpecialLiterals(doubleStr, false) if(ansiEnabled && d == null) { - throw QueryExecutionErrors.invalidInputSyntaxForNumericError(s, origin.context) + throw QueryExecutionErrors.invalidInputSyntaxForNumericError( + DoubleType, s, origin.context) } else { d } @@ -870,7 +871,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case _: NumberFormatException => val f = Cast.processFloatingPointSpecialLiterals(floatStr, true) if (ansiEnabled && f == null) { - throw QueryExecutionErrors.invalidInputSyntaxForNumericError(s, origin.context) + throw QueryExecutionErrors.invalidInputSyntaxForNumericError( + FloatType, s, origin.context) } else { f } @@ -1376,9 +1378,10 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit """ case StringType if ansiEnabled => val errorContext = ctx.addReferenceObj("errCtx", origin.context) + val toType = ctx.addReferenceObj("toType", target) (c, evPrim, evNull) => code""" - Decimal $tmp = Decimal.fromStringANSI($c, $errorContext); + Decimal $tmp = Decimal.fromStringANSI($c, $toType, $errorContext); ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast, ctx)} """ case BooleanType => @@ -1899,7 +1902,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit (c, evPrim, evNull) => val handleNull = if (ansiEnabled) { val errorContext = ctx.addReferenceObj("errCtx", origin.context) - s"throw QueryExecutionErrors.invalidInputSyntaxForNumericError($c, $errorContext);" + s"throw QueryExecutionErrors.invalidInputSyntaxForNumericError(" + + s"org.apache.spark.sql.types.FloatType$$.MODULE$$,$c, $errorContext);" } else { s"$evNull = true;" } @@ -1936,7 +1940,8 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit (c, evPrim, evNull) => val handleNull = if (ansiEnabled) { val errorContext = ctx.addReferenceObj("errCtx", origin.context) - s"throw QueryExecutionErrors.invalidInputSyntaxForNumericError($c, $errorContext);" + s"throw QueryExecutionErrors.invalidInputSyntaxForNumericError(" + + s"org.apache.spark.sql.types.DoubleType$$.MODULE$$, $c, $errorContext);" } else { s"$evNull = true;" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala index 9589cf3774ee1..c01fcbe6ca2bb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.catalyst.util import org.apache.spark.sql.errors.QueryExecutionErrors +import org.apache.spark.sql.types.{ByteType, DataType, IntegerType, LongType, ShortType} import org.apache.spark.unsafe.types.UTF8String /** @@ -26,23 +27,23 @@ import org.apache.spark.unsafe.types.UTF8String object UTF8StringUtils { def toLongExact(s: UTF8String, errorContext: String): Long = - withException(s.toLongExact, errorContext) + withException(s.toLongExact, errorContext, LongType, s) def toIntExact(s: UTF8String, errorContext: String): Int = - withException(s.toIntExact, errorContext) + withException(s.toIntExact, errorContext, IntegerType, s) def toShortExact(s: UTF8String, errorContext: String): Short = - withException(s.toShortExact, errorContext) + withException(s.toShortExact, errorContext, ShortType, s) def toByteExact(s: UTF8String, errorContext: String): Byte = - withException(s.toByteExact, errorContext) + withException(s.toByteExact, errorContext, ByteType, s) - private def withException[A](f: => A, errorContext: String): A = { + private def withException[A](f: => A, errorContext: String, to: DataType, s: UTF8String): A = { try { f } catch { case e: NumberFormatException => - throw QueryExecutionErrors.invalidInputSyntaxForNumericError(e, errorContext) + throw QueryExecutionErrors.invalidInputSyntaxForNumericError(to, s, errorContext) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 970c5f22a5259..3cc8c3a6667f6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -112,10 +112,12 @@ object QueryExecutionErrors extends QueryErrorsBase { } def invalidInputSyntaxForNumericError( + to: DataType, s: UTF8String, errorContext: String): NumberFormatException = { - new SparkNumberFormatException(errorClass = "INVALID_INPUT_SYNTAX_FOR_NUMERIC_TYPE", - messageParameters = Array(toSQLValue(s, StringType), SQLConf.ANSI_ENABLED.key, errorContext)) + new SparkNumberFormatException(errorClass = "INVALID_SYNTAX_FOR_CAST", + messageParameters = Array(toSQLType(to), toSQLValue(s, StringType), + SQLConf.ANSI_ENABLED.key, errorContext)) } def cannotCastFromNullTypeError(to: DataType): Throwable = { @@ -1048,8 +1050,14 @@ object QueryExecutionErrors extends QueryErrorsBase { } def cannotCastToDateTimeError(value: Any, to: DataType, errorContext: String): Throwable = { - new DateTimeException(s"Cannot cast $value to $to. To return NULL instead, use 'try_cast'. " + - s"If necessary set ${SQLConf.ANSI_ENABLED.key} to false to bypass this error." + errorContext) + val valueString = if (value.isInstanceOf[UTF8String]) { + toSQLValue(value, StringType) + } else { + toSQLValue(value) + } + new DateTimeException(s"Invalid input syntax for type ${toSQLType(to)}: $valueString. " + + s"To return NULL instead, use 'try_cast'. If necessary set ${SQLConf.ANSI_ENABLED.key} " + + s"to false to bypass this error." + errorContext) } def registeringStreamingQueryListenerError(e: Exception): Throwable = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala index ac6ac33451cdf..22e57fae52de4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala @@ -613,7 +613,10 @@ object Decimal { } } - def fromStringANSI(str: UTF8String, errorContext: String = ""): Decimal = { + def fromStringANSI( + str: UTF8String, + to: DecimalType = DecimalType.USER_DEFAULT, + errorContext: String = ""): Decimal = { try { val bigDecimal = stringToJavaBigDecimal(str) // We fast fail because constructing a very large JavaBigDecimal to Decimal is very slow. @@ -626,7 +629,7 @@ object Decimal { } } catch { case _: NumberFormatException => - throw QueryExecutionErrors.invalidInputSyntaxForNumericError(str, errorContext) + throw QueryExecutionErrors.invalidInputSyntaxForNumericError(to, str, errorContext) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala index 785fd95692e9b..9be144efd7738 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_SECOND import org.apache.spark.sql.catalyst.util.DateTimeTestUtils import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, UTC} +import org.apache.spark.sql.errors.QueryExecutionErrors.toSQLValue import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -174,42 +175,43 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { test("cast from invalid string to numeric should throw NumberFormatException") { // cast to IntegerType Seq(IntegerType, ShortType, ByteType, LongType).foreach { dataType => - checkExceptionInExpression[NumberFormatException]( - cast("string", dataType), "invalid input syntax for type numeric: 'string'") - checkExceptionInExpression[NumberFormatException]( - cast("123-string", dataType), "invalid input syntax for type numeric: '123-string'") - checkExceptionInExpression[NumberFormatException]( - cast("2020-07-19", dataType), "invalid input syntax for type numeric: '2020-07-19'") - checkExceptionInExpression[NumberFormatException]( - cast("1.23", dataType), "invalid input syntax for type numeric: '1.23'") + checkExceptionInExpression[NumberFormatException](cast("string", dataType), + s"Invalid input syntax for type ${dataType.sql}: 'string'") + checkExceptionInExpression[NumberFormatException](cast("123-string", dataType), + s"Invalid input syntax for type ${dataType.sql}: '123-string'") + checkExceptionInExpression[NumberFormatException](cast("2020-07-19", dataType), + s"Invalid input syntax for type ${dataType.sql}: '2020-07-19'") + checkExceptionInExpression[NumberFormatException](cast("1.23", dataType), + s"Invalid input syntax for type ${dataType.sql}: '1.23'") } Seq(DoubleType, FloatType, DecimalType.USER_DEFAULT).foreach { dataType => - checkExceptionInExpression[NumberFormatException]( - cast("string", dataType), "invalid input syntax for type numeric: 'string'") - checkExceptionInExpression[NumberFormatException]( - cast("123.000.00", dataType), "invalid input syntax for type numeric: '123.000.00'") - checkExceptionInExpression[NumberFormatException]( - cast("abc.com", dataType), "invalid input syntax for type numeric: 'abc.com'") + checkExceptionInExpression[NumberFormatException](cast("string", dataType), + s"Invalid input syntax for type ${dataType.sql}: 'string'") + checkExceptionInExpression[NumberFormatException](cast("123.000.00", dataType), + s"Invalid input syntax for type ${dataType.sql}: '123.000.00'") + checkExceptionInExpression[NumberFormatException](cast("abc.com", dataType), + s"Invalid input syntax for type ${dataType.sql}: 'abc.com'") } } - protected def checkCastToNumericError(l: Literal, to: DataType, tryCastResult: Any): Unit = { + protected def checkCastToNumericError(l: Literal, to: DataType, + expectedDataTypeInErrorMsg: DataType, tryCastResult: Any): Unit = { checkExceptionInExpression[NumberFormatException]( - cast(l, to), "invalid input syntax for type numeric: 'true'") + cast(l, to), s"Invalid input syntax for type ${expectedDataTypeInErrorMsg.sql}: 'true'") } test("cast from invalid string array to numeric array should throw NumberFormatException") { val array = Literal.create(Seq("123", "true", "f", null), ArrayType(StringType, containsNull = true)) - checkCastToNumericError(array, ArrayType(ByteType, containsNull = true), + checkCastToNumericError(array, ArrayType(ByteType, containsNull = true), ByteType, Seq(123.toByte, null, null, null)) - checkCastToNumericError(array, ArrayType(ShortType, containsNull = true), + checkCastToNumericError(array, ArrayType(ShortType, containsNull = true), ShortType, Seq(123.toShort, null, null, null)) - checkCastToNumericError(array, ArrayType(IntegerType, containsNull = true), + checkCastToNumericError(array, ArrayType(IntegerType, containsNull = true), IntegerType, Seq(123, null, null, null)) - checkCastToNumericError(array, ArrayType(LongType, containsNull = true), + checkCastToNumericError(array, ArrayType(LongType, containsNull = true), LongType, Seq(123L, null, null, null)) } @@ -243,7 +245,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { checkExceptionInExpression[NumberFormatException]( cast("abcd", DecimalType(38, 1)), - "invalid input syntax for type numeric") + s"Invalid input syntax for type ${DecimalType(38, 1).sql}: 'abcd'") } protected def checkCastToBooleanError(l: Literal, to: DataType, tryCastResult: Any): Unit = { @@ -258,7 +260,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { protected def checkCastToTimestampError(l: Literal, to: DataType): Unit = { checkExceptionInExpression[DateTimeException]( - cast(l, to), s"Cannot cast $l to $to") + cast(l, to), s"Invalid input syntax for type TIMESTAMP: ${toSQLValue(l)}") } test("cast from timestamp II") { @@ -369,7 +371,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { assert(ret.resolved == !isTryCast) if (!isTryCast) { checkExceptionInExpression[NumberFormatException]( - ret, "invalid input syntax for type numeric") + ret, s"Invalid input syntax for type ${IntegerType.sql}") } } @@ -387,7 +389,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { assert(ret.resolved == !isTryCast) if (!isTryCast) { checkExceptionInExpression[NumberFormatException]( - ret, "invalid input syntax for type numeric") + ret, s"Invalid input syntax for type ${IntegerType.sql}") } } } @@ -512,7 +514,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { assert(ret.resolved === !isTryCast) if (!isTryCast) { checkExceptionInExpression[NumberFormatException]( - ret, "invalid input syntax for type numeric") + ret, s"Invalid input syntax for type ${IntegerType.sql}") } } @@ -521,7 +523,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { def checkCastWithParseError(str: String): Unit = { checkExceptionInExpression[DateTimeException]( cast(Literal(str), TimestampType, Option(zid.getId)), - s"Cannot cast $str to TimestampType.") + s"Invalid input syntax for type TIMESTAMP: '$str'") } checkCastWithParseError("123") @@ -542,7 +544,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { def checkCastWithParseError(str: String): Unit = { checkExceptionInExpression[DateTimeException]( cast(Literal(str), DateType, Option(zid.getId)), - s"Cannot cast $str to DateType.") + s"Invalid input syntax for type DATE: '$str'") } checkCastWithParseError("2015-13-18") @@ -570,7 +572,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { "2021-06-17 00:00:00ABC").foreach { invalidInput => checkExceptionInExpression[DateTimeException]( cast(invalidInput, TimestampNTZType), - s"Cannot cast $invalidInput to TimestampNTZType") + s"Invalid input syntax for type TIMESTAMP_NTZ: '$invalidInput'") } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryCastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryCastSuite.scala index 1394ec8c8e2fc..bb9ab88894741 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryCastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryCastSuite.scala @@ -45,7 +45,8 @@ class TryCastSuite extends AnsiCastSuiteBase { checkEvaluation(cast(l, to), tryCastResult, InternalRow(l.value)) } - override def checkCastToNumericError(l: Literal, to: DataType, tryCastResult: Any): Unit = { + override def checkCastToNumericError(l: Literal, to: DataType, + expectedDataTypeInErrorMsg: DataType, tryCastResult: Any): Unit = { checkEvaluation(cast(l, to), tryCastResult, InternalRow(l.value)) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateFormatterSuite.scala index 44c90db7630ac..71351f6263fbe 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateFormatterSuite.scala @@ -208,6 +208,6 @@ class DateFormatterSuite extends DatetimeFormatterSuite { val errMsg = intercept[DateTimeException] { formatter.parse("x123") }.getMessage - assert(errMsg.contains("Cannot cast x123 to DateType")) + assert(errMsg.contains("Invalid input syntax for type DATE: 'x123'")) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index 661e624efa592..204fe93e2d1b6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -453,7 +453,7 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { val errMsg = intercept[DateTimeException] { formatter.parse("x123") }.getMessage - assert(errMsg.contains("Cannot cast x123 to TimestampType")) + assert(errMsg.contains("Invalid input syntax for type TIMESTAMP: 'x123'")) } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala index 5433c561a0379..77b07ce533ece 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala @@ -284,7 +284,8 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper assert(Decimal.fromString(UTF8String.fromString("str")) === null) val e = intercept[NumberFormatException](Decimal.fromStringANSI(UTF8String.fromString("str"))) - assert(e.getMessage.contains("invalid input syntax for type numeric")) + assert(e.getMessage.contains("Invalid input syntax for type " + + s"${DecimalType.USER_DEFAULT.sql}: 'str'")) } test("SPARK-35841: Casting string to decimal type doesn't work " + diff --git a/sql/core/src/test/resources/sql-tests/inputs/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/cast.sql index e391c31690fd7..4610716902e5d 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cast.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cast.sql @@ -4,9 +4,11 @@ SELECT CAST('1.23' AS long); SELECT CAST('-4.56' AS int); SELECT CAST('-4.56' AS long); --- cast string which are not numbers to integral should return null +-- cast string which are not numbers to numeric types SELECT CAST('abc' AS int); SELECT CAST('abc' AS long); +SELECT CAST('abc' AS float); +SELECT CAST('abc' AS double); -- cast string representing a very large number to integral should return null SELECT CAST('1234567890123' AS int); @@ -15,14 +17,18 @@ SELECT CAST('12345678901234567890123' AS long); -- cast empty string to integral should return null SELECT CAST('' AS int); SELECT CAST('' AS long); +SELECT CAST('' AS float); +SELECT CAST('' AS double); -- cast null to integral should return null SELECT CAST(NULL AS int); SELECT CAST(NULL AS long); --- cast invalid decimal string to integral should return null +-- cast invalid decimal string to numeric types SELECT CAST('123.a' AS int); SELECT CAST('123.a' AS long); +SELECT CAST('123.a' AS float); +SELECT CAST('123.a' AS double); -- '-2147483648' is the smallest int value SELECT CAST('-2147483648' AS int); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out index 3de9c1f743def..a2cb4ca11252d 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 66 +-- Number of queries: 72 -- !query @@ -7,8 +7,8 @@ SELECT CAST('1.23' AS int) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '1.23'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type INT: '1.23'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('1.23' AS int) ^^^^^^^^^^^^^^^^^^^ @@ -19,8 +19,8 @@ SELECT CAST('1.23' AS long) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '1.23'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type BIGINT: '1.23'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('1.23' AS long) ^^^^^^^^^^^^^^^^^^^^ @@ -31,8 +31,8 @@ SELECT CAST('-4.56' AS int) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '-4.56'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type INT: '-4.56'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('-4.56' AS int) ^^^^^^^^^^^^^^^^^^^^ @@ -43,8 +43,8 @@ SELECT CAST('-4.56' AS long) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '-4.56'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type BIGINT: '-4.56'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('-4.56' AS long) ^^^^^^^^^^^^^^^^^^^^^ @@ -55,8 +55,8 @@ SELECT CAST('abc' AS int) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: 'abc'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type INT: 'abc'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('abc' AS int) ^^^^^^^^^^^^^^^^^^ @@ -67,20 +67,44 @@ SELECT CAST('abc' AS long) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: 'abc'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type BIGINT: 'abc'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('abc' AS long) ^^^^^^^^^^^^^^^^^^^ +-- !query +SELECT CAST('abc' AS float) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type FLOAT: 'abc'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('abc' AS float) + ^^^^^^^^^^^^^^^^^^^^ + + +-- !query +SELECT CAST('abc' AS double) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type DOUBLE: 'abc'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('abc' AS double) + ^^^^^^^^^^^^^^^^^^^^^ + + -- !query SELECT CAST('1234567890123' AS int) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '1234567890123'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type INT: '1234567890123'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('1234567890123' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -91,8 +115,8 @@ SELECT CAST('12345678901234567890123' AS long) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '12345678901234567890123'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type BIGINT: '12345678901234567890123'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('12345678901234567890123' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -103,8 +127,8 @@ SELECT CAST('' AS int) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: ''. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type INT: ''. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('' AS int) ^^^^^^^^^^^^^^^ @@ -115,13 +139,37 @@ SELECT CAST('' AS long) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: ''. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type BIGINT: ''. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('' AS long) ^^^^^^^^^^^^^^^^ +-- !query +SELECT CAST('' AS float) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type FLOAT: ''. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('' AS float) + ^^^^^^^^^^^^^^^^^ + + +-- !query +SELECT CAST('' AS double) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type DOUBLE: ''. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('' AS double) + ^^^^^^^^^^^^^^^^^^ + + -- !query SELECT CAST(NULL AS int) -- !query schema @@ -143,8 +191,8 @@ SELECT CAST('123.a' AS int) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '123.a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type INT: '123.a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('123.a' AS int) ^^^^^^^^^^^^^^^^^^^^ @@ -155,13 +203,37 @@ SELECT CAST('123.a' AS long) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '123.a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type BIGINT: '123.a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('123.a' AS long) ^^^^^^^^^^^^^^^^^^^^^ +-- !query +SELECT CAST('123.a' AS float) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type FLOAT: '123.a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('123.a' AS float) + ^^^^^^^^^^^^^^^^^^^^^^ + + +-- !query +SELECT CAST('123.a' AS double) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type DOUBLE: '123.a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +== SQL(line 1, position 7) == +SELECT CAST('123.a' AS double) + ^^^^^^^^^^^^^^^^^^^^^^^ + + -- !query SELECT CAST('-2147483648' AS int) -- !query schema @@ -175,8 +247,8 @@ SELECT CAST('-2147483649' AS int) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '-2147483649'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type INT: '-2147483649'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('-2147483649' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -195,8 +267,8 @@ SELECT CAST('2147483648' AS int) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '2147483648'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type INT: '2147483648'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('2147483648' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -215,8 +287,8 @@ SELECT CAST('-9223372036854775809' AS long) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '-9223372036854775809'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type BIGINT: '-9223372036854775809'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('-9223372036854775809' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -235,8 +307,8 @@ SELECT CAST('9223372036854775808' AS long) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '9223372036854775808'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type BIGINT: '9223372036854775808'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('9223372036854775808' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -494,8 +566,8 @@ select cast('1中文' as tinyint) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type TINYINT: '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select cast('1中文' as tinyint) ^^^^^^^^^^^^^^^^^^^^^^ @@ -506,8 +578,8 @@ select cast('1中文' as smallint) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type SMALLINT: '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select cast('1中文' as smallint) ^^^^^^^^^^^^^^^^^^^^^^^ @@ -518,8 +590,8 @@ select cast('1中文' as INT) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type INT: '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select cast('1中文' as INT) ^^^^^^^^^^^^^^^^^^ @@ -530,8 +602,8 @@ select cast('中文1' as bigint) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '中文1'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type BIGINT: '中文1'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select cast('中文1' as bigint) ^^^^^^^^^^^^^^^^^^^^^ @@ -542,8 +614,8 @@ select cast('1中文' as bigint) -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type BIGINT: '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select cast('1中文' as bigint) ^^^^^^^^^^^^^^^^^^^^^ @@ -606,7 +678,7 @@ select cast('xyz' as decimal(4, 2)) struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: 'xyz'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type DECIMAL(4,2): 'xyz'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select cast('xyz' as decimal(4, 2)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -626,7 +698,7 @@ select cast('a' as date) struct<> -- !query output java.time.DateTimeException -Cannot cast a to DateType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type DATE: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select cast('a' as date) ^^^^^^^^^^^^^^^^^ @@ -646,7 +718,7 @@ select cast('a' as timestamp) struct<> -- !query output java.time.DateTimeException -Cannot cast a to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type TIMESTAMP: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select cast('a' as timestamp) ^^^^^^^^^^^^^^^^^^^^^^ @@ -666,7 +738,7 @@ select cast('a' as timestamp_ntz) struct<> -- !query output java.time.DateTimeException -Cannot cast a to TimestampNTZType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type TIMESTAMP_NTZ: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select cast('a' as timestamp_ntz) ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -678,7 +750,7 @@ select cast(cast('inf' as double) as timestamp) struct<> -- !query output java.time.DateTimeException -Cannot cast Infinity to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type TIMESTAMP: Infinity. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select cast(cast('inf' as double) as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -690,7 +762,7 @@ select cast(cast('inf' as float) as timestamp) struct<> -- !query output java.time.DateTimeException -Cannot cast Infinity to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type TIMESTAMP: Infinity. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select cast(cast('inf' as float) as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out index d9777b53d21a7..fa65b4dd07110 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out @@ -232,7 +232,7 @@ select next_day("xx", "Mon") struct<> -- !query output java.time.DateTimeException -Cannot cast xx to DateType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type DATE: 'xx'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select next_day("xx", "Mon") ^^^^^^^^^^^^^^^^^^^^^ @@ -326,8 +326,8 @@ select date_add('2011-11-11', '1.2') -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '1.2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type INT: '1.2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select date_add('2011-11-11', '1.2') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -437,8 +437,8 @@ select date_sub(date'2011-11-11', '1.2') -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: '1.2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type INT: '1.2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select date_sub(date'2011-11-11', '1.2') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out index 57e39bbfe3a00..e30b592020d97 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out @@ -242,7 +242,7 @@ select cast("Unparseable" as timestamp) struct<> -- !query output java.time.DateTimeException -Cannot cast Unparseable to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type TIMESTAMP: 'Unparseable'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select cast("Unparseable" as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -254,7 +254,7 @@ select cast("Unparseable" as date) struct<> -- !query output java.time.DateTimeException -Cannot cast Unparseable to DateType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type DATE: 'Unparseable'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select cast("Unparseable" as date) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index 3b8d95bca0ad2..d7975dfb58a5f 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -122,7 +122,7 @@ select interval 2 second * 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type DOUBLE: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select interval 2 second * 'a' ^^^^^^^^^^^^^^^^^^^^^^^ @@ -134,7 +134,7 @@ select interval 2 second / 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type DOUBLE: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select interval 2 second / 'a' ^^^^^^^^^^^^^^^^^^^^^^^ @@ -146,7 +146,7 @@ select interval 2 year * 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type DOUBLE: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select interval 2 year * 'a' ^^^^^^^^^^^^^^^^^^^^^ @@ -158,7 +158,7 @@ select interval 2 year / 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type DOUBLE: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select interval 2 year / 'a' ^^^^^^^^^^^^^^^^^^^^^ @@ -186,7 +186,7 @@ select 'a' * interval 2 second struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type DOUBLE: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select 'a' * interval 2 second ^^^^^^^^^^^^^^^^^^^^^^^ @@ -198,7 +198,7 @@ select 'a' * interval 2 year struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type DOUBLE: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select 'a' * interval 2 year ^^^^^^^^^^^^^^^^^^^^^ @@ -1516,7 +1516,7 @@ select '4 11:11' - interval '4 22:12' day to minute struct<> -- !query output java.time.DateTimeException -Cannot cast 4 11:11 to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type TIMESTAMP: '4 11:11'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select '4 11:11' - interval '4 22:12' day to minute ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1528,7 +1528,7 @@ select '4 12:12:12' + interval '4 22:12' day to minute struct<> -- !query output java.time.DateTimeException -Cannot cast 4 12:12:12 to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type TIMESTAMP: '4 12:12:12'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select '4 12:12:12' + interval '4 22:12' day to minute ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1566,7 +1566,7 @@ select str - interval '4 22:12' day to minute from interval_view struct<> -- !query output java.time.DateTimeException -Cannot cast 1 to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type TIMESTAMP: '1'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select str - interval '4 22:12' day to minute from interval_view ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1578,7 +1578,7 @@ select str + interval '4 22:12' day to minute from interval_view struct<> -- !query output java.time.DateTimeException -Cannot cast 1 to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type TIMESTAMP: '1'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select str + interval '4 22:12' day to minute from interval_view ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index 7d07282ab6763..083471b15d4d4 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -81,8 +81,8 @@ select left("abcd", -2), left("abcd", 0), left("abcd", 'a') -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type INT: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 42) == ...t("abcd", -2), left("abcd", 0), left("abcd", 'a') ^^^^^^^^^^^^^^^^^ @@ -109,8 +109,8 @@ select right("abcd", -2), right("abcd", 0), right("abcd", 'a') -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type INT: 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 44) == ...("abcd", -2), right("abcd", 0), right("abcd", 'a') ^^^^^^^^^^^^^^^^^^ @@ -418,8 +418,8 @@ SELECT lpad('hi', 'invalid_length') -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: 'invalid_length'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type INT: 'invalid_length'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT lpad('hi', 'invalid_length') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -430,8 +430,8 @@ SELECT rpad('hi', 'invalid_length') -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: 'invalid_length'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type INT: 'invalid_length'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT rpad('hi', 'invalid_length') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out index 9ed02e3bed2c6..aaa82e4351351 100644 --- a/sql/core/src/test/resources/sql-tests/results/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 66 +-- Number of queries: 72 -- !query @@ -50,6 +50,22 @@ struct NULL +-- !query +SELECT CAST('abc' AS float) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT CAST('abc' AS double) +-- !query schema +struct +-- !query output +NULL + + -- !query SELECT CAST('1234567890123' AS int) -- !query schema @@ -82,6 +98,22 @@ struct NULL +-- !query +SELECT CAST('' AS float) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT CAST('' AS double) +-- !query schema +struct +-- !query output +NULL + + -- !query SELECT CAST(NULL AS int) -- !query schema @@ -114,6 +146,22 @@ struct NULL +-- !query +SELECT CAST('123.a' AS float) +-- !query schema +struct +-- !query output +NULL + + +-- !query +SELECT CAST('123.a' AS double) +-- !query schema +struct +-- !query output +NULL + + -- !query SELECT CAST('-2147483648' AS int) -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out index a7ee7400e58bd..b63d2d1307ec3 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out @@ -96,7 +96,7 @@ SELECT float('N A N') struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: 'N A N'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type FLOAT: 'N A N'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT float('N A N') ^^^^^^^^^^^^^^ @@ -108,7 +108,7 @@ SELECT float('NaN x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: 'NaN x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type FLOAT: 'NaN x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT float('NaN x') ^^^^^^^^^^^^^^ @@ -120,7 +120,7 @@ SELECT float(' INFINITY x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: ' INFINITY x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type FLOAT: ' INFINITY x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT float(' INFINITY x') ^^^^^^^^^^^^^^^^^^^^^^^ @@ -156,7 +156,7 @@ SELECT float(decimal('nan')) struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: 'nan'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type DECIMAL(10,0): 'nan'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 13) == SELECT float(decimal('nan')) ^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out index 3237969ea8736..b0582c0952387 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out @@ -128,7 +128,7 @@ SELECT double('N A N') struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: 'N A N'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type DOUBLE: 'N A N'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT double('N A N') ^^^^^^^^^^^^^^^ @@ -140,7 +140,7 @@ SELECT double('NaN x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: 'NaN x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type DOUBLE: 'NaN x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT double('NaN x') ^^^^^^^^^^^^^^^ @@ -152,7 +152,7 @@ SELECT double(' INFINITY x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: ' INFINITY x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type DOUBLE: ' INFINITY x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == SELECT double(' INFINITY x') ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -188,7 +188,7 @@ SELECT double(decimal('nan')) struct<> -- !query output org.apache.spark.SparkNumberFormatException -invalid input syntax for type numeric: 'nan'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type DECIMAL(10,0): 'nan'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 14) == SELECT double(decimal('nan')) ^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out index 836370935f64d..cff6bf280401f 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out @@ -64,8 +64,8 @@ select string('four: ') || 2+2 -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: 'four: 2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type BIGINT: 'four: 2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select string('four: ') || 2+2 ^^^^^^^^^^^^^^^^^^^^^^^ @@ -76,8 +76,8 @@ select 'four: ' || 2+2 -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: 'four: 2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type BIGINT: 'four: 2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select 'four: ' || 2+2 ^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out index 1d48d7c7b92c4..c48d92a99007f 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out @@ -461,8 +461,8 @@ window w as (order by f_numeric range between -- !query schema struct<> -- !query output -java.lang.NumberFormatException -invalid input syntax for type numeric: 'NaN'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkNumberFormatException +Invalid input syntax for type INT: 'NaN'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 3, position 12) == window w as (order by f_numeric range between ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out index c799d65985d5d..b5281d4c6051c 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out @@ -72,7 +72,7 @@ insert into datetimes values struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('11:00 BST' AS TIMESTAMP): Cannot cast 11:00 BST to TimestampType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +failed to evaluate expression CAST('11:00 BST' AS TIMESTAMP): Invalid input syntax for type TIMESTAMP: '11:00 BST'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 2, position 23) == (1, timestamp '11:00', cast ('11:00 BST' as timestamp), cast ('1 year' as timestamp), ... ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out index 87beeacc0bc15..6beb6fd595817 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out @@ -501,7 +501,7 @@ FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b) struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('nan' AS INT): invalid input syntax for type numeric: 'nan'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +failed to evaluate expression CAST('nan' AS INT): Invalid input syntax for type INT: 'nan'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 3, position 28) == FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b) ^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index af861e3913b6e..dc72dfe137d7e 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 142 +-- Number of queries: 143 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out index 3f275b2a2bdeb..c09a7a1811c71 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out @@ -332,7 +332,7 @@ select to_timestamp(1) struct<> -- !query output java.time.DateTimeException -Cannot cast 1 to TimestampNTZType. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Invalid input syntax for type TIMESTAMP_NTZ: '1'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala index 97623a2e8dc50..748c8fefa707e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala @@ -302,7 +302,7 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils { val errorMsg = intercept[NumberFormatException] { sql("insert into t partition(a='ansi') values('ansi')") }.getMessage - assert(errorMsg.contains("invalid input syntax for type numeric: 'ansi'")) + assert(errorMsg.contains("Invalid input syntax for type INT: 'ansi'")) } else { sql("insert into t partition(a='ansi') values('ansi')") checkAnswer(sql("select * from t"), Row("ansi", null) :: Nil) From 5c5a68c03cf06a0e3b3b2f24cbd4841c489b89dc Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Wed, 20 Apr 2022 21:18:11 +0800 Subject: [PATCH 168/535] [SPARK-38219][SPARK-37691][3.3] Support ANSI Aggregation Function: percentile_cont and percentile_disc ### What changes were proposed in this pull request? This PR backport https://github.com/apache/spark/pull/35531 and https://github.com/apache/spark/pull/35041 to branch-3.3 ### Why are the changes needed? `percentile_cont` and `percentile_disc` in Spark3.3 release. ### Does this PR introduce _any_ user-facing change? 'No'. New feature. ### How was this patch tested? New tests. Closes #36277 from beliefer/SPARK-38219_SPARK-37691_backport_3.3. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan --- docs/sql-ref-ansi-compliance.md | 1 + .../spark/sql/catalyst/parser/SqlBaseLexer.g4 | 1 + .../sql/catalyst/parser/SqlBaseParser.g4 | 5 +- .../sql/catalyst/analysis/CheckAnalysis.scala | 7 +- .../{Percentile.scala => percentiles.scala} | 261 ++++++++++++------ .../sql/catalyst/parser/AstBuilder.scala | 25 +- .../aggregate/PercentileSuite.scala | 22 +- .../sql/catalyst/parser/PlanParserSuite.scala | 28 +- .../resources/sql-tests/inputs/group-by.sql | 17 +- .../inputs/postgreSQL/aggregates_part4.sql | 8 +- .../udf/postgreSQL/udf-aggregates_part4.sql | 8 +- .../resources/sql-tests/inputs/window.sql | 112 ++++++++ .../sql-tests/results/group-by.sql.out | 39 ++- .../postgreSQL/aggregates_part4.sql.out | 31 ++- .../postgreSQL/udf-aggregates_part4.sql.out | 31 ++- .../sql-tests/results/window.sql.out | 229 ++++++++++++++- 16 files changed, 698 insertions(+), 127 deletions(-) rename sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/{Percentile.scala => percentiles.scala} (69%) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 692ea60b52ce0..94ef94a5e7bac 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -511,6 +511,7 @@ Below is a list of all the keywords in Spark SQL. |PARTITIONS|non-reserved|non-reserved|non-reserved| |PERCENT|non-reserved|non-reserved|non-reserved| |PERCENTILE_CONT|reserved|non-reserved|non-reserved| +|PERCENTILE_DISC|reserved|non-reserved|non-reserved| |PIVOT|non-reserved|non-reserved|non-reserved| |PLACING|non-reserved|non-reserved|non-reserved| |POSITION|non-reserved|non-reserved|reserved| diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 index 6c731bb02bc39..e2c4c5444e5bc 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 @@ -258,6 +258,7 @@ PARTITION: 'PARTITION'; PARTITIONED: 'PARTITIONED'; PARTITIONS: 'PARTITIONS'; PERCENTILE_CONT: 'PERCENTILE_CONT'; +PERCENTILE_DISC: 'PERCENTILE_DISC'; PERCENTLIT: 'PERCENT'; PIVOT: 'PIVOT'; PLACING: 'PLACING'; diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index fe81f0ccb8a48..9da39a1a96d9e 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -844,8 +844,8 @@ primaryExpression FROM srcStr=valueExpression RIGHT_PAREN #trim | OVERLAY LEFT_PAREN input=valueExpression PLACING replace=valueExpression FROM position=valueExpression (FOR length=valueExpression)? RIGHT_PAREN #overlay - | PERCENTILE_CONT LEFT_PAREN percentage=valueExpression RIGHT_PAREN - WITHIN GROUP LEFT_PAREN ORDER BY sortItem RIGHT_PAREN #percentile + | name=(PERCENTILE_CONT | PERCENTILE_DISC) LEFT_PAREN percentage=valueExpression RIGHT_PAREN + WITHIN GROUP LEFT_PAREN ORDER BY sortItem RIGHT_PAREN ( OVER windowSpec)? #percentile ; constant @@ -1449,6 +1449,7 @@ nonReserved | PARTITIONED | PARTITIONS | PERCENTILE_CONT + | PERCENTILE_DISC | PERCENTLIT | PIVOT | PLACING diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 1c2de771a3d00..b714c5f9ceb4b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -21,7 +21,7 @@ import scala.collection.mutable import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.SubExprUtils._ -import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression +import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, PercentileCont, PercentileDisc} import org.apache.spark.sql.catalyst.optimizer.{BooleanSimplification, DecorrelateInnerQuery, InlineCTE} import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ @@ -233,6 +233,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { // Only allow window functions with an aggregate expression or an offset window // function or a Pandas window UDF. w.windowFunction match { + case agg @ AggregateExpression(_: PercentileCont | _: PercentileDisc, _, _, _, _) + if w.windowSpec.orderSpec.nonEmpty || w.windowSpec.frameSpecification != + SpecifiedWindowFrame(RowFrame, UnboundedPreceding, UnboundedFollowing) => + failAnalysis( + s"Cannot specify order by or frame for '${agg.aggregateFunction.prettyName}'.") case _: AggregateExpression | _: FrameLessOffsetWindowFunction | _: AggregateWindowFunction => // OK case f: PythonUDF if PythonUDF.isWindowPandasUDF(f) => // OK diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala similarity index 69% rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala index a98585e0ff1e7..e861fb370ca1e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Percentile.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala @@ -24,93 +24,36 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess} import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.trees.TernaryLike +import org.apache.spark.sql.catalyst.trees.{BinaryLike, TernaryLike} import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.types._ import org.apache.spark.util.collection.OpenHashMap -/** - * The Percentile aggregate function returns the exact percentile(s) of numeric column `expr` at - * the given percentage(s) with value range in [0.0, 1.0]. - * - * Because the number of elements and their partial order cannot be determined in advance. - * Therefore we have to store all the elements in memory, and so notice that too many elements can - * cause GC paused and eventually OutOfMemory Errors. - * - * @param child child expression that produce numeric column value with `child.eval(inputRow)` - * @param percentageExpression Expression that represents a single percentage value or an array of - * percentage values. Each percentage value must be in the range - * [0.0, 1.0]. - */ -// scalastyle:off line.size.limit -@ExpressionDescription( - usage = - """ - _FUNC_(col, percentage [, frequency]) - Returns the exact percentile value of numeric - or ansi interval column `col` at the given percentage. The value of percentage must be - between 0.0 and 1.0. The value of frequency should be positive integral +abstract class PercentileBase extends TypedImperativeAggregate[OpenHashMap[AnyRef, Long]] + with ImplicitCastInputTypes { - _FUNC_(col, array(percentage1 [, percentage2]...) [, frequency]) - Returns the exact - percentile value array of numeric column `col` at the given percentage(s). Each value - of the percentage array must be between 0.0 and 1.0. The value of frequency should be - positive integral + val child: Expression + val percentageExpression: Expression + val frequencyExpression : Expression - """, - examples = """ - Examples: - > SELECT _FUNC_(col, 0.3) FROM VALUES (0), (10) AS tab(col); - 3.0 - > SELECT _FUNC_(col, array(0.25, 0.75)) FROM VALUES (0), (10) AS tab(col); - [2.5,7.5] - > SELECT _FUNC_(col, 0.5) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '10' MONTH) AS tab(col); - 5.0 - > SELECT _FUNC_(col, array(0.2, 0.5)) FROM VALUES (INTERVAL '0' SECOND), (INTERVAL '10' SECOND) AS tab(col); - [2000000.0,5000000.0] - """, - group = "agg_funcs", - since = "2.1.0") -// scalastyle:on line.size.limit -case class Percentile( - child: Expression, - percentageExpression: Expression, - frequencyExpression : Expression, - mutableAggBufferOffset: Int = 0, - inputAggBufferOffset: Int = 0) - extends TypedImperativeAggregate[OpenHashMap[AnyRef, Long]] with ImplicitCastInputTypes - with TernaryLike[Expression] { + // Whether to reverse calculate percentile value + val reverse: Boolean - def this(child: Expression, percentageExpression: Expression) = { - this(child, percentageExpression, Literal(1L), 0, 0) - } - - def this(child: Expression, percentageExpression: Expression, frequency: Expression) = { - this(child, percentageExpression, frequency, 0, 0) - } - - override def prettyName: String = "percentile" - - override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): Percentile = - copy(mutableAggBufferOffset = newMutableAggBufferOffset) - - override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): Percentile = - copy(inputAggBufferOffset = newInputAggBufferOffset) + // Whether the value is discrete + protected def discrete: Boolean // Mark as lazy so that percentageExpression is not evaluated during tree transformation. @transient private lazy val returnPercentileArray = percentageExpression.dataType.isInstanceOf[ArrayType] @transient - private lazy val percentages = percentageExpression.eval() match { + protected lazy val percentages = percentageExpression.eval() match { case null => null case num: Double => Array(num) case arrayData: ArrayData => arrayData.toDoubleArray() } - override def first: Expression = child - override def second: Expression = percentageExpression - override def third: Expression = frequencyExpression - // Returns null for empty inputs override def nullable: Boolean = true @@ -151,7 +94,7 @@ case class Percentile( } } - private def toDoubleValue(d: Any): Double = d match { + protected def toDoubleValue(d: Any): Double = d match { case d: Decimal => d.toDouble case n: Number => n.doubleValue } @@ -204,7 +147,11 @@ case class Percentile( case intervalType: DayTimeIntervalType => intervalType.ordering case otherType => QueryExecutionErrors.unsupportedTypeError(otherType) } - val sortedCounts = buffer.toSeq.sortBy(_._1)(ordering.asInstanceOf[Ordering[AnyRef]]) + val sortedCounts = if (reverse) { + buffer.toSeq.sortBy(_._1)(ordering.asInstanceOf[Ordering[AnyRef]].reverse) + } else { + buffer.toSeq.sortBy(_._1)(ordering.asInstanceOf[Ordering[AnyRef]]) + } val accumulatedCounts = sortedCounts.scanLeft((sortedCounts.head._1, 0L)) { case ((key1, count1), (key2, count2)) => (key2, count1 + count2) }.tail @@ -227,40 +174,44 @@ case class Percentile( /** * Get the percentile value. - * * This function has been based upon similar function from HIVE * `org.apache.hadoop.hive.ql.udf.UDAFPercentile.getPercentile()`. */ - private def getPercentile(aggreCounts: Seq[(AnyRef, Long)], position: Double): Double = { + private def getPercentile( + accumulatedCounts: Seq[(AnyRef, Long)], position: Double): Double = { // We may need to do linear interpolation to get the exact percentile val lower = position.floor.toLong val higher = position.ceil.toLong // Use binary search to find the lower and the higher position. - val countsArray = aggreCounts.map(_._2).toArray[Long] - val lowerIndex = binarySearchCount(countsArray, 0, aggreCounts.size, lower + 1) - val higherIndex = binarySearchCount(countsArray, 0, aggreCounts.size, higher + 1) + val countsArray = accumulatedCounts.map(_._2).toArray[Long] + val lowerIndex = binarySearchCount(countsArray, 0, accumulatedCounts.size, lower + 1) + val higherIndex = binarySearchCount(countsArray, 0, accumulatedCounts.size, higher + 1) - val lowerKey = aggreCounts(lowerIndex)._1 + val lowerKey = accumulatedCounts(lowerIndex)._1 if (higher == lower) { // no interpolation needed because position does not have a fraction return toDoubleValue(lowerKey) } - val higherKey = aggreCounts(higherIndex)._1 + val higherKey = accumulatedCounts(higherIndex)._1 if (higherKey == lowerKey) { // no interpolation needed because lower position and higher position has the same key return toDoubleValue(lowerKey) } - // Linear interpolation to get the exact percentile - (higher - position) * toDoubleValue(lowerKey) + (position - lower) * toDoubleValue(higherKey) + if (discrete) { + toDoubleValue(lowerKey) + } else { + // Linear interpolation to get the exact percentile + (higher - position) * toDoubleValue(lowerKey) + (position - lower) * toDoubleValue(higherKey) + } } /** * use a binary search to find the index of the position closest to the current value. */ - private def binarySearchCount( + protected def binarySearchCount( countsArray: Array[Long], start: Int, end: Int, value: Long): Int = { util.Arrays.binarySearch(countsArray, 0, end, value) match { case ix if ix < 0 => -(ix + 1) @@ -268,12 +219,13 @@ case class Percentile( } } + private lazy val projection = UnsafeProjection.create(Array[DataType](child.dataType, LongType)) + override def serialize(obj: OpenHashMap[AnyRef, Long]): Array[Byte] = { val buffer = new Array[Byte](4 << 10) // 4K val bos = new ByteArrayOutputStream() val out = new DataOutputStream(bos) try { - val projection = UnsafeProjection.create(Array[DataType](child.dataType, LongType)) // Write pairs in counts map to byte buffer. obj.foreach { case (key, count) => val row = InternalRow.apply(key, count) @@ -316,6 +268,86 @@ case class Percentile( bis.close() } } +} + +/** + * The Percentile aggregate function returns the exact percentile(s) of numeric column `expr` at + * the given percentage(s) with value range in [0.0, 1.0]. + * + * Because the number of elements and their partial order cannot be determined in advance. + * Therefore we have to store all the elements in memory, and so notice that too many elements can + * cause GC paused and eventually OutOfMemory Errors. + * + * @param child child expression that produce numeric column value with `child.eval(inputRow)` + * @param percentageExpression Expression that represents a single percentage value or an array of + * percentage values. Each percentage value must be in the range + * [0.0, 1.0]. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = + """ + _FUNC_(col, percentage [, frequency]) - Returns the exact percentile value of numeric + or ansi interval column `col` at the given percentage. The value of percentage must be + between 0.0 and 1.0. The value of frequency should be positive integral + _FUNC_(col, array(percentage1 [, percentage2]...) [, frequency]) - Returns the exact + percentile value array of numeric column `col` at the given percentage(s). Each value + of the percentage array must be between 0.0 and 1.0. The value of frequency should be + positive integral + """, + examples = """ + Examples: + > SELECT _FUNC_(col, 0.3) FROM VALUES (0), (10) AS tab(col); + 3.0 + > SELECT _FUNC_(col, array(0.25, 0.75)) FROM VALUES (0), (10) AS tab(col); + [2.5,7.5] + > SELECT _FUNC_(col, 0.5) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '10' MONTH) AS tab(col); + 5.0 + > SELECT _FUNC_(col, array(0.2, 0.5)) FROM VALUES (INTERVAL '0' SECOND), (INTERVAL '10' SECOND) AS tab(col); + [2000000.0,5000000.0] + """, + group = "agg_funcs", + since = "2.1.0") +// scalastyle:on line.size.limit +case class Percentile( + child: Expression, + percentageExpression: Expression, + frequencyExpression : Expression, + mutableAggBufferOffset: Int = 0, + inputAggBufferOffset: Int = 0, + reverse: Boolean = false) extends PercentileBase with TernaryLike[Expression] { + + def this(child: Expression, percentageExpression: Expression) = { + this(child, percentageExpression, Literal(1L), 0, 0) + } + + def this(child: Expression, percentageExpression: Expression, frequency: Expression) = { + this(child, percentageExpression, frequency, 0, 0) + } + + def this(child: Expression, percentageExpression: Expression, reverse: Boolean) = { + this(child, percentageExpression, Literal(1L), reverse = reverse) + } + + override def first: Expression = child + override def second: Expression = percentageExpression + override def third: Expression = frequencyExpression + + override def prettyName: String = "percentile" + + override def discrete: Boolean = false + + override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): Percentile = + copy(mutableAggBufferOffset = newMutableAggBufferOffset) + + override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): Percentile = + copy(inputAggBufferOffset = newInputAggBufferOffset) + + override protected def stringArgs: Iterator[Any] = if (discrete) { + super.stringArgs ++ Some(discrete) + } else { + super.stringArgs + } override protected def withNewChildrenInternal( newFirst: Expression, newSecond: Expression, newThird: Expression): Percentile = copy( @@ -324,3 +356,70 @@ case class Percentile( frequencyExpression = newThird ) } + +/** + * Return a percentile value based on a continuous distribution of + * numeric or ansi interval column at the given percentage (specified in ORDER BY clause). + * The value of percentage must be between 0.0 and 1.0. + */ +case class PercentileCont(left: Expression, right: Expression, reverse: Boolean = false) + extends AggregateFunction + with RuntimeReplaceableAggregate + with ImplicitCastInputTypes + with BinaryLike[Expression] { + private lazy val percentile = new Percentile(left, right, reverse) + override def replacement: Expression = percentile + override def nodeName: String = "percentile_cont" + override def inputTypes: Seq[AbstractDataType] = percentile.inputTypes + override def sql(isDistinct: Boolean): String = { + val distinct = if (isDistinct) "DISTINCT " else "" + val direction = if (reverse) " DESC" else "" + s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY v$direction)" + } + override protected def withNewChildrenInternal( + newLeft: Expression, newRight: Expression): PercentileCont = + this.copy(left = newLeft, right = newRight) +} + +/** + * The Percentile aggregate function returns the percentile(s) based on a discrete distribution of + * numeric column `expr` at the given percentage(s) with value range in [0.0, 1.0]. + * + * Because the number of elements and their partial order cannot be determined in advance. + * Therefore we have to store all the elements in memory, and so notice that too many elements can + * cause GC paused and eventually OutOfMemory Errors. + */ +case class PercentileDisc( + child: Expression, + percentageExpression: Expression, + reverse: Boolean = false, + mutableAggBufferOffset: Int = 0, + inputAggBufferOffset: Int = 0) extends PercentileBase with BinaryLike[Expression] { + + val frequencyExpression: Expression = Literal(1L) + + override def left: Expression = child + override def right: Expression = percentageExpression + + override def prettyName: String = "percentile_disc" + + override def discrete: Boolean = true + + override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): PercentileDisc = + copy(mutableAggBufferOffset = newMutableAggBufferOffset) + + override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): PercentileDisc = + copy(inputAggBufferOffset = newInputAggBufferOffset) + + override def sql(isDistinct: Boolean): String = { + val distinct = if (isDistinct) "DISTINCT " else "" + val direction = if (reverse) " DESC" else "" + s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY v$direction)" + } + + override protected def withNewChildrenInternal( + newLeft: Expression, newRight: Expression): PercentileDisc = copy( + child = newLeft, + percentageExpression = newRight + ) +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index d334b5780f78a..e788368604fa1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, SQLConfHelper, TableId import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat} import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last, Percentile} +import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last, PercentileCont, PercentileDisc} import org.apache.spark.sql.catalyst.parser.SqlBaseParser._ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ @@ -1836,11 +1836,26 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit override def visitPercentile(ctx: PercentileContext): Expression = withOrigin(ctx) { val percentage = expression(ctx.percentage) val sortOrder = visitSortItem(ctx.sortItem) - val percentile = sortOrder.direction match { - case Ascending => new Percentile(sortOrder.child, percentage) - case Descending => new Percentile(sortOrder.child, Subtract(Literal(1), percentage)) + val percentile = ctx.name.getType match { + case SqlBaseParser.PERCENTILE_CONT => + sortOrder.direction match { + case Ascending => PercentileCont(sortOrder.child, percentage) + case Descending => PercentileCont(sortOrder.child, percentage, true) + } + case SqlBaseParser.PERCENTILE_DISC => + sortOrder.direction match { + case Ascending => PercentileDisc(sortOrder.child, percentage) + case Descending => PercentileDisc(sortOrder.child, percentage, true) + } + } + val aggregateExpression = percentile.toAggregateExpression() + ctx.windowSpec match { + case spec: WindowRefContext => + UnresolvedWindowExpression(aggregateExpression, visitWindowRef(spec)) + case spec: WindowDefContext => + WindowExpression(aggregateExpression, visitWindowDef(spec)) + case _ => aggregateExpression } - percentile.toAggregateExpression() } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala index b5882b1ab4064..7b85be05e3b3d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala @@ -83,8 +83,8 @@ class PercentileSuite extends SparkFunSuite { } private def runTest(agg: Percentile, - rows : Seq[Seq[Any]], - expectedPercentiles : Seq[Double]): Unit = { + rows : Seq[Seq[Any]], + expectedPercentiles : Seq[Double]): Unit = { assert(agg.nullable) val group1 = (0 until rows.length / 2) val group1Buffer = agg.createAggregationBuffer() @@ -218,7 +218,7 @@ class PercentileSuite extends SparkFunSuite { val percentile2 = new Percentile(child, percentage) assertEqual(percentile2.checkInputDataTypes(), TypeCheckFailure(s"Percentage(s) must be between 0.0 and 1.0, " + - s"but got ${percentage.simpleString(100)}")) + s"but got ${percentage.simpleString(100)}")) } val nonFoldablePercentage = Seq(NonFoldableLiteral(0.5), @@ -270,7 +270,6 @@ class PercentileSuite extends SparkFunSuite { } test("nulls in percentage expression") { - assert(new Percentile( AttributeReference("a", DoubleType)(), percentageExpression = Literal(null, DoubleType)).checkInputDataTypes() === @@ -280,14 +279,13 @@ class PercentileSuite extends SparkFunSuite { Seq(CreateArray(Seq(null).map(Literal(_))), CreateArray(Seq(0.1D, null).map(Literal(_)))) nullPercentageExprs.foreach { percentageExpression => - val wrongPercentage = new Percentile( - AttributeReference("a", DoubleType)(), - percentageExpression = percentageExpression) - assert( - wrongPercentage.checkInputDataTypes() match { - case TypeCheckFailure(msg) if msg.contains("argument 2 requires array") => true - case _ => false - }) + val wrongPercentage = new Percentile( + AttributeReference("a", DoubleType)(), + percentageExpression = percentageExpression) + assert(wrongPercentage.checkInputDataTypes() match { + case TypeCheckFailure(msg) if msg.contains("argument 2 requires array") => true + case _ => false + }) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 3e2d917a8932f..688c0d1237320 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.parser import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, RelationTimeTravel, UnresolvedAlias, UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedStar, UnresolvedSubqueryColumnAliases, UnresolvedTableValuedFunction} import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.aggregate.Percentile +import org.apache.spark.sql.catalyst.expressions.aggregate.{PercentileCont, PercentileDisc} import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.internal.SQLConf @@ -1303,24 +1303,36 @@ class PlanParserSuite extends AnalysisTest { "timestamp expression cannot contain subqueries") } - test("PERCENTILE_CONT function") { - def assertPercentileContPlans(inputSQL: String, expectedExpression: Expression): Unit = { + test("PERCENTILE_CONT & PERCENTILE_DISC") { + def assertPercentilePlans(inputSQL: String, expectedExpression: Expression): Unit = { comparePlans( parsePlan(inputSQL), Project(Seq(UnresolvedAlias(expectedExpression)), OneRowRelation()) ) } - assertPercentileContPlans( + assertPercentilePlans( "SELECT PERCENTILE_CONT(0.1) WITHIN GROUP (ORDER BY col)", - new Percentile(UnresolvedAttribute("col"), Literal(Decimal(0.1), DecimalType(1, 1))) + PercentileCont(UnresolvedAttribute("col"), Literal(Decimal(0.1), DecimalType(1, 1))) .toAggregateExpression() ) - assertPercentileContPlans( + assertPercentilePlans( "SELECT PERCENTILE_CONT(0.1) WITHIN GROUP (ORDER BY col DESC)", - new Percentile(UnresolvedAttribute("col"), - Subtract(Literal(1), Literal(Decimal(0.1), DecimalType(1, 1)))).toAggregateExpression() + PercentileCont(UnresolvedAttribute("col"), + Literal(Decimal(0.1), DecimalType(1, 1)), true).toAggregateExpression() + ) + + assertPercentilePlans( + "SELECT PERCENTILE_DISC(0.1) WITHIN GROUP (ORDER BY col)", + PercentileDisc(UnresolvedAttribute("col"), Literal(Decimal(0.1), DecimalType(1, 1))) + .toAggregateExpression() + ) + + assertPercentilePlans( + "SELECT PERCENTILE_DISC(0.1) WITHIN GROUP (ORDER BY col DESC)", + PercentileDisc(UnresolvedAttribute("col"), + Literal(Decimal(0.1), DecimalType(1, 1)), true).toAggregateExpression() ) } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql index b97b24140ede8..291a8478c7a81 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql @@ -272,8 +272,8 @@ SELECT k, avg(x) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), avg(y) FILTER ( -- SPARK-37676: Support ANSI Aggregation Function: percentile_cont SELECT - percentile_cont(0.25) WITHIN GROUP (ORDER BY v), - percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) + percentile_cont(0.25) WITHIN GROUP (ORDER BY v), + percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) FROM aggr; SELECT k, @@ -282,3 +282,16 @@ SELECT FROM aggr GROUP BY k ORDER BY k; + +-- SPARK-37691: Support ANSI Aggregation Function: percentile_disc +SELECT + percentile_disc(0.25) WITHIN GROUP (ORDER BY v), + percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) +FROM aggr; +SELECT + k, + percentile_disc(0.25) WITHIN GROUP (ORDER BY v), + percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) +FROM aggr +GROUP BY k +ORDER BY k; diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part4.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part4.sql index 0d255bed24e9c..3cf57b1f0e502 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part4.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part4.sql @@ -33,10 +33,10 @@ -- (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) -- group by p order by p; --- select percentile_cont(0.5) within group (order by b) from aggtest; --- select percentile_cont(0.5) within group (order by b), sum(b) from aggtest; --- select percentile_cont(0.5) within group (order by thousand) from tenk1; --- select percentile_disc(0.5) within group (order by thousand) from tenk1; +select percentile_cont(0.5) within group (order by b) from aggtest; +select percentile_cont(0.5) within group (order by b), sum(b) from aggtest; +select percentile_cont(0.5) within group (order by thousand) from tenk1; +select percentile_disc(0.5) within group (order by thousand) from tenk1; -- [SPARK-28661] Hypothetical-Set Aggregate Functions -- select rank(3) within group (order by x) -- from (values (1),(1),(2),(2),(3),(3),(4)) v(x); diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part4.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part4.sql index 8aea00073eee8..dd14a7db0db16 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part4.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part4.sql @@ -30,10 +30,10 @@ -- (values (0::float8),(0.1),(0.25),(0.4),(0.5),(0.6),(0.75),(0.9),(1)) v(p) -- group by p order by p; --- select percentile_cont(0.5) within group (order by b) from aggtest; --- select percentile_cont(0.5) within group (order by b), sum(b) from aggtest; --- select percentile_cont(0.5) within group (order by thousand) from tenk1; --- select percentile_disc(0.5) within group (order by thousand) from tenk1; +select percentile_cont(0.5) within group (order by b) from aggtest; +select percentile_cont(0.5) within group (order by b), sum(b) from aggtest; +select percentile_cont(0.5) within group (order by thousand) from tenk1; +select percentile_disc(0.5) within group (order by thousand) from tenk1; -- [SPARK-28661] Hypothetical-Set Aggregate Functions -- select rank(3) within group (order by x) -- from (values (1),(1),(2),(2),(3),(3),(4)) v(x); diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql index 666c0577f13bb..e982683250ce5 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/window.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql @@ -440,3 +440,115 @@ SELECT SUM(salary) OVER w sum_salary FROM basic_pays; + +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department), + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department), + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department), + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department) +FROM basic_pays +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary), + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary) +FROM basic_pays +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary), + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary) +FROM basic_pays +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +FROM basic_pays +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +FROM basic_pays +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department) +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_disc(0.5) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_cont(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w, + percentile_disc(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WHERE salary > 8900 +WINDOW w AS (PARTITION BY department) +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ORDER BY salary) +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ORDER BY salary) +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +ORDER BY salary; + +SELECT + employee_name, + department, + salary, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +ORDER BY salary; diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index 04d34972c3061..ba06b148d2dee 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 99 +-- Number of queries: 101 -- !query @@ -974,11 +974,11 @@ struct +struct -- !query output 10.0 30.0 @@ -992,10 +992,39 @@ FROM aggr GROUP BY k ORDER BY k -- !query schema -struct +struct -- !query output 0 10.0 30.0 1 12.5 17.5 2 17.5 26.25 3 60.0 60.0 4 NULL NULL + + +-- !query +SELECT + percentile_disc(0.25) WITHIN GROUP (ORDER BY v), + percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) +FROM aggr +-- !query schema +struct +-- !query output +10.0 30.0 + + +-- !query +SELECT + k, + percentile_disc(0.25) WITHIN GROUP (ORDER BY v), + percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) +FROM aggr +GROUP BY k +ORDER BY k +-- !query schema +struct +-- !query output +0 10.0 30.0 +1 10.0 20.0 +2 10.0 30.0 +3 60.0 60.0 +4 NULL NULL diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out index b7bbdc50dfb41..8c21a5067bf7c 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out @@ -1,5 +1,34 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 0 +-- Number of queries: 4 +-- !query +select percentile_cont(0.5) within group (order by b) from aggtest +-- !query schema +struct +-- !query output +53.44850015640259 + +-- !query +select percentile_cont(0.5) within group (order by b), sum(b) from aggtest +-- !query schema +struct +-- !query output +53.44850015640259 431.77260909229517 + + +-- !query +select percentile_cont(0.5) within group (order by thousand) from tenk1 +-- !query schema +struct +-- !query output +499.5 + + +-- !query +select percentile_disc(0.5) within group (order by thousand) from tenk1 +-- !query schema +struct +-- !query output +499.0 diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out index b7bbdc50dfb41..8c21a5067bf7c 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out @@ -1,5 +1,34 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 0 +-- Number of queries: 4 +-- !query +select percentile_cont(0.5) within group (order by b) from aggtest +-- !query schema +struct +-- !query output +53.44850015640259 + +-- !query +select percentile_cont(0.5) within group (order by b), sum(b) from aggtest +-- !query schema +struct +-- !query output +53.44850015640259 431.77260909229517 + + +-- !query +select percentile_cont(0.5) within group (order by thousand) from tenk1 +-- !query schema +struct +-- !query output +499.5 + + +-- !query +select percentile_disc(0.5) within group (order by thousand) from tenk1 +-- !query schema +struct +-- !query output +499.0 diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out index d13411e333371..dcce285c30925 100644 --- a/sql/core/src/test/resources/sql-tests/results/window.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 55 +-- Number of queries: 66 -- !query @@ -1197,3 +1197,230 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException Window specification w is not defined in the WINDOW clause. + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department), + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department), + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department), + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department) +FROM basic_pays +ORDER BY salary +-- !query schema +struct +-- !query output +Leslie Thompson IT 5186 5917.75 5186.0 7381.25 8113.0 +Anthony Bow Accounting 6627 8543.75 8435.0 9746.5 9998.0 +Foon Yue Tseng Sales 6660 8550.75 6660.0 9721.5 10563.0 +Gerard Hernandez SCM 6949 10449.0 10449.0 11303.0 11303.0 +Leslie Jennings IT 8113 5917.75 5186.0 7381.25 8113.0 +Diane Murphy Accounting 8435 8543.75 8435.0 9746.5 9998.0 +William Patterson Accounting 8870 8543.75 8435.0 9746.5 9998.0 +Jeff Firrelli Accounting 8992 8543.75 8435.0 9746.5 9998.0 +Julie Firrelli Sales 9181 8550.75 6660.0 9721.5 10563.0 +Steve Patterson Sales 9441 8550.75 6660.0 9721.5 10563.0 +Mary Patterson Accounting 9998 8543.75 8435.0 9746.5 9998.0 +Loui Bondur SCM 10449 10449.0 10449.0 11303.0 11303.0 +George Vanauf Sales 10563 8550.75 6660.0 9721.5 10563.0 +Barry Jones SCM 10586 10449.0 10449.0 11303.0 11303.0 +Pamela Castillo SCM 11303 10449.0 10449.0 11303.0 11303.0 +Gerard Bondur Accounting 11472 8543.75 8435.0 9746.5 9998.0 +Larry Bott SCM 11798 10449.0 10449.0 11303.0 11303.0 + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary), + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary) +FROM basic_pays +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'percentile_cont'. + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary), + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary) +FROM basic_pays +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'percentile_disc'. + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +FROM basic_pays +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'percentile_cont'. + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING), + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +FROM basic_pays +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'percentile_disc'. + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department) +ORDER BY salary +-- !query schema +struct +-- !query output +Leslie Thompson IT 5186 5917.75 5186.0 7381.25 8113.0 +Anthony Bow Accounting 6627 8543.75 8435.0 9746.5 9998.0 +Foon Yue Tseng Sales 6660 8550.75 6660.0 9721.5 10563.0 +Gerard Hernandez SCM 6949 10449.0 10449.0 11303.0 11303.0 +Leslie Jennings IT 8113 5917.75 5186.0 7381.25 8113.0 +Diane Murphy Accounting 8435 8543.75 8435.0 9746.5 9998.0 +William Patterson Accounting 8870 8543.75 8435.0 9746.5 9998.0 +Jeff Firrelli Accounting 8992 8543.75 8435.0 9746.5 9998.0 +Julie Firrelli Sales 9181 8550.75 6660.0 9721.5 10563.0 +Steve Patterson Sales 9441 8550.75 6660.0 9721.5 10563.0 +Mary Patterson Accounting 9998 8543.75 8435.0 9746.5 9998.0 +Loui Bondur SCM 10449 10449.0 10449.0 11303.0 11303.0 +George Vanauf Sales 10563 8550.75 6660.0 9721.5 10563.0 +Barry Jones SCM 10586 10449.0 10449.0 11303.0 11303.0 +Pamela Castillo SCM 11303 10449.0 10449.0 11303.0 11303.0 +Gerard Bondur Accounting 11472 8543.75 8435.0 9746.5 9998.0 +Larry Bott SCM 11798 10449.0 10449.0 11303.0 11303.0 + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_disc(0.5) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_cont(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w, + percentile_disc(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WHERE salary > 8900 +WINDOW w AS (PARTITION BY department) +ORDER BY salary +-- !query schema +struct +-- !query output +Jeff Firrelli Accounting 8992 9998.0 9998.0 9998.0 9998.0 +Julie Firrelli Sales 9181 9441.0 9441.0 9441.0 9441.0 +Steve Patterson Sales 9441 9441.0 9441.0 9441.0 9441.0 +Mary Patterson Accounting 9998 9998.0 9998.0 9998.0 9998.0 +Loui Bondur SCM 10449 10944.5 10586.0 10944.5 11303.0 +George Vanauf Sales 10563 9441.0 9441.0 9441.0 9441.0 +Barry Jones SCM 10586 10944.5 10586.0 10944.5 11303.0 +Pamela Castillo SCM 11303 10944.5 10586.0 10944.5 11303.0 +Gerard Bondur Accounting 11472 9998.0 9998.0 9998.0 9998.0 +Larry Bott SCM 11798 10944.5 10586.0 10944.5 11303.0 + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ORDER BY salary) +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'percentile_cont'. + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ORDER BY salary) +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'percentile_disc'. + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'percentile_cont'. + + +-- !query +SELECT + employee_name, + department, + salary, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w, + percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w +FROM basic_pays +WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) +ORDER BY salary +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Cannot specify order by or frame for 'percentile_disc'. From 1e7cdda24175ad076ccb059499b82384ce47f868 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Wed, 20 Apr 2022 21:36:47 +0800 Subject: [PATCH 169/535] [SPARK-34079][SQL] Merge non-correlated scalar subqueries ### What changes were proposed in this pull request? This PR adds a new optimizer rule `MergeScalarSubqueries` to merge multiple non-correlated `ScalarSubquery`s to compute multiple scalar values once. E.g. the following query: ``` SELECT (SELECT avg(a) FROM t), (SELECT sum(b) FROM t) ``` is optimized from: ``` == Optimized Logical Plan == Project [scalar-subquery#242 [] AS scalarsubquery()#253, scalar-subquery#243 [] AS scalarsubquery()#254L] : :- Aggregate [avg(a#244) AS avg(a)#247] : : +- Project [a#244] : : +- Relation default.t[a#244,b#245] parquet : +- Aggregate [sum(a#251) AS sum(a)#250L] : +- Project [a#251] : +- Relation default.t[a#251,b#252] parquet +- OneRowRelation ``` to: ``` == Optimized Logical Plan == Project [scalar-subquery#242 [].avg(a) AS scalarsubquery()#253, scalar-subquery#243 [].sum(a) AS scalarsubquery()#254L] : :- Project [named_struct(avg(a), avg(a)#247, sum(a), sum(a)#250L) AS mergedValue#260] : : +- Aggregate [avg(a#244) AS avg(a)#247, sum(a#244) AS sum(a)#250L] : : +- Project [a#244] : : +- Relation default.t[a#244,b#245] parquet : +- Project [named_struct(avg(a), avg(a)#247, sum(a), sum(a)#250L) AS mergedValue#260] : +- Aggregate [avg(a#244) AS avg(a)#247, sum(a#244) AS sum(a)#250L] : +- Project [a#244] : +- Relation default.t[a#244,b#245] parquet +- OneRowRelation ``` and in the physical plan subqueries are reused: ``` == Physical Plan == AdaptiveSparkPlan isFinalPlan=true +- == Final Plan == *(1) Project [Subquery subquery#242, [id=#113].avg(a) AS scalarsubquery()#253, ReusedSubquery Subquery subquery#242, [id=#113].sum(a) AS scalarsubquery()#254L] : :- Subquery subquery#242, [id=#113] : : +- AdaptiveSparkPlan isFinalPlan=true +- == Final Plan == *(2) Project [named_struct(avg(a), avg(a)#247, sum(a), sum(a)#250L) AS mergedValue#260] +- *(2) HashAggregate(keys=[], functions=[avg(a#244), sum(a#244)], output=[avg(a)#247, sum(a)#250L]) +- ShuffleQueryStage 0 +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#158] +- *(1) HashAggregate(keys=[], functions=[partial_avg(a#244), partial_sum(a#244)], output=[sum#262, count#263L, sum#264L]) +- *(1) ColumnarToRow +- FileScan parquet default.t[a#244] Batched: true, DataFilters: [], Format: Parquet, Location: ..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct +- == Initial Plan == Project [named_struct(avg(a), avg(a)#247, sum(a), sum(a)#250L) AS mergedValue#260] +- HashAggregate(keys=[], functions=[avg(a#244), sum(a#244)], output=[avg(a)#247, sum(a)#250L]) +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#110] +- HashAggregate(keys=[], functions=[partial_avg(a#244), partial_sum(a#244)], output=[sum#262, count#263L, sum#264L]) +- FileScan parquet default.t[a#244] Batched: true, DataFilters: [], Format: Parquet, Location: ..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct : +- ReusedSubquery Subquery subquery#242, [id=#113] +- *(1) Scan OneRowRelation[] +- == Initial Plan == ... ``` Please note that the above simple example could be easily optimized into a common select expression without reuse node, but this PR can handle more complex queries as well. ### Why are the changes needed? Performance improvement. ``` [info] TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative [info] ------------------------------------------------------------------------------------------------------------------------ [info] q9 - MergeScalarSubqueries off 50798 52521 1423 0.0 Infinity 1.0X [info] q9 - MergeScalarSubqueries on 19484 19675 226 0.0 Infinity 2.6X [info] TPCDS Snappy: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative [info] ------------------------------------------------------------------------------------------------------------------------ [info] q9b - MergeScalarSubqueries off 15430 17803 NaN 0.0 Infinity 1.0X [info] q9b - MergeScalarSubqueries on 3862 4002 196 0.0 Infinity 4.0X ``` Please find `q9b` in the description of SPARK-34079. It is a variant of [q9.sql](https://github.com/apache/spark/blob/master/sql/core/src/test/resources/tpcds/q9.sql) using CTE. The performance improvement in case of `q9` comes from merging 15 subqueries into 5 and in case of `q9b` it comes from merging 5 subqueries into 1. ### Does this PR introduce _any_ user-facing change? No. But this optimization can be disabled with `spark.sql.optimizer.excludedRules` config. ### How was this patch tested? Existing and new UTs. Closes #32298 from peter-toth/SPARK-34079-multi-column-scalar-subquery. Lead-authored-by: Peter Toth Co-authored-by: attilapiros Signed-off-by: Wenchen Fan (cherry picked from commit e00b81ee9b37067ce8e8242907b26d3ae200f401) Signed-off-by: Wenchen Fan --- .../expressions/BloomFilterMightContain.scala | 3 + .../optimizer/MergeScalarSubqueries.scala | 389 +++++++++ ...wnPredicatesAndPruneColumnsForCTEDef.scala | 4 +- .../ReplaceCTERefWithRepartition.scala | 15 +- .../plans/logical/basicLogicalOperators.scala | 39 +- .../sql/catalyst/trees/TreePatterns.scala | 1 + .../MergeScalarSubqueriesSuite.scala | 578 +++++++++++++ .../UnsafeFixedWidthAggregationMap.java | 9 +- .../spark/sql/execution/SparkOptimizer.scala | 2 + .../sql/execution/aggregate/AggUtils.scala | 5 +- .../aggregate/HashAggregateExec.scala | 10 +- .../aggregate/ObjectHashAggregateExec.scala | 9 - .../approved-plans-v1_4/q9.sf100/explain.txt | 781 ++++-------------- .../q9.sf100/simplified.txt | 235 ++---- .../approved-plans-v1_4/q9/explain.txt | 781 ++++-------------- .../approved-plans-v1_4/q9/simplified.txt | 235 ++---- .../spark/sql/InjectRuntimeFilterSuite.scala | 35 +- .../org/apache/spark/sql/SubquerySuite.scala | 167 ++++ .../LogicalPlanTagInSparkPlanSuite.scala | 8 +- 19 files changed, 1706 insertions(+), 1600 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueries.scala create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueriesSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala index cf052f865ea90..ba958b3db031b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala @@ -56,6 +56,9 @@ case class BloomFilterMightContain( case e : Expression if e.foldable => TypeCheckResult.TypeCheckSuccess case subquery : PlanExpression[_] if !subquery.containsPattern(OUTER_REFERENCE) => TypeCheckResult.TypeCheckSuccess + case GetStructField(subquery: PlanExpression[_], _, _) + if !subquery.containsPattern(OUTER_REFERENCE) => + TypeCheckResult.TypeCheckSuccess case _ => TypeCheckResult.TypeCheckFailure(s"The Bloom filter binary input to $prettyName " + "should be either a constant value or a scalar subquery expression") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueries.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueries.scala new file mode 100644 index 0000000000000..44f3b653de75c --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueries.scala @@ -0,0 +1,389 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, CTERelationDef, CTERelationRef, Filter, Join, LogicalPlan, Project, Subquery, WithCTE} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.catalyst.trees.TreePattern.{SCALAR_SUBQUERY, SCALAR_SUBQUERY_REFERENCE, TreePattern} +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.DataType + +/** + * This rule tries to merge multiple non-correlated [[ScalarSubquery]]s to compute multiple scalar + * values once. + * + * The process is the following: + * - While traversing through the plan each [[ScalarSubquery]] plan is tried to merge into the cache + * of already seen subquery plans. If merge is possible then cache is updated with the merged + * subquery plan, if not then the new subquery plan is added to the cache. + * During this first traversal each [[ScalarSubquery]] expression is replaced to a temporal + * [[ScalarSubqueryReference]] reference pointing to its cached version. + * The cache uses a flag to keep track of if a cache entry is a result of merging 2 or more + * plans, or it is a plan that was seen only once. + * Merged plans in the cache get a "Header", that contains the list of attributes form the scalar + * return value of a merged subquery. + * - A second traversal checks if there are merged subqueries in the cache and builds a `WithCTE` + * node from these queries. The `CTERelationDef` nodes contain the merged subquery in the + * following form: + * `Project(Seq(CreateNamedStruct(name1, attribute1, ...) AS mergedValue), mergedSubqueryPlan)` + * and the definitions are flagged that they host a subquery, that can return maximum one row. + * During the second traversal [[ScalarSubqueryReference]] expressions that pont to a merged + * subquery is either transformed to a `GetStructField(ScalarSubquery(CTERelationRef(...)))` + * expression or restored to the original [[ScalarSubquery]]. + * + * Eg. the following query: + * + * SELECT + * (SELECT avg(a) FROM t), + * (SELECT sum(b) FROM t) + * + * is optimized from: + * + * == Optimized Logical Plan == + * Project [scalar-subquery#242 [] AS scalarsubquery()#253, + * scalar-subquery#243 [] AS scalarsubquery()#254L] + * : :- Aggregate [avg(a#244) AS avg(a)#247] + * : : +- Project [a#244] + * : : +- Relation default.t[a#244,b#245] parquet + * : +- Aggregate [sum(a#251) AS sum(a)#250L] + * : +- Project [a#251] + * : +- Relation default.t[a#251,b#252] parquet + * +- OneRowRelation + * + * to: + * + * == Optimized Logical Plan == + * Project [scalar-subquery#242 [].avg(a) AS scalarsubquery()#253, + * scalar-subquery#243 [].sum(a) AS scalarsubquery()#254L] + * : :- Project [named_struct(avg(a), avg(a)#247, sum(a), sum(a)#250L) AS mergedValue#260] + * : : +- Aggregate [avg(a#244) AS avg(a)#247, sum(a#244) AS sum(a)#250L] + * : : +- Project [a#244] + * : : +- Relation default.t[a#244,b#245] parquet + * : +- Project [named_struct(avg(a), avg(a)#247, sum(a), sum(a)#250L) AS mergedValue#260] + * : +- Aggregate [avg(a#244) AS avg(a)#247, sum(a#244) AS sum(a)#250L] + * : +- Project [a#244] + * : +- Relation default.t[a#244,b#245] parquet + * +- OneRowRelation + * + * == Physical Plan == + * *(1) Project [Subquery scalar-subquery#242, [id=#125].avg(a) AS scalarsubquery()#253, + * ReusedSubquery + * Subquery scalar-subquery#242, [id=#125].sum(a) AS scalarsubquery()#254L] + * : :- Subquery scalar-subquery#242, [id=#125] + * : : +- *(2) Project [named_struct(avg(a), avg(a)#247, sum(a), sum(a)#250L) AS mergedValue#260] + * : : +- *(2) HashAggregate(keys=[], functions=[avg(a#244), sum(a#244)], + * output=[avg(a)#247, sum(a)#250L]) + * : : +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#120] + * : : +- *(1) HashAggregate(keys=[], functions=[partial_avg(a#244), partial_sum(a#244)], + * output=[sum#262, count#263L, sum#264L]) + * : : +- *(1) ColumnarToRow + * : : +- FileScan parquet default.t[a#244] ... + * : +- ReusedSubquery Subquery scalar-subquery#242, [id=#125] + * +- *(1) Scan OneRowRelation[] + */ +object MergeScalarSubqueries extends Rule[LogicalPlan] with PredicateHelper { + def apply(plan: LogicalPlan): LogicalPlan = { + plan match { + // Subquery reuse needs to be enabled for this optimization. + case _ if !conf.getConf(SQLConf.SUBQUERY_REUSE_ENABLED) => plan + + // This rule does a whole plan traversal, no need to run on subqueries. + case _: Subquery => plan + + // Plans with CTEs are not supported for now. + case _: WithCTE => plan + + case _ => extractCommonScalarSubqueries(plan) + } + } + + /** + * An item in the cache of merged scalar subqueries. + * + * @param attributes Attributes that form the struct scalar return value of a merged subquery. + * @param plan The plan of a merged scalar subquery. + * @param merged A flag to identify if this item is the result of merging subqueries. + * Please note that `attributes.size == 1` doesn't always mean that the plan is not + * merged as there can be subqueries that are different ([[checkIdenticalPlans]] is + * false) due to an extra [[Project]] node in one of them. In that case + * `attributes.size` remains 1 after merging, but the merged flag becomes true. + */ + case class Header(attributes: Seq[Attribute], plan: LogicalPlan, merged: Boolean) + + private def extractCommonScalarSubqueries(plan: LogicalPlan) = { + val cache = ArrayBuffer.empty[Header] + val planWithReferences = insertReferences(plan, cache) + cache.zipWithIndex.foreach { case (header, i) => + cache(i) = cache(i).copy(plan = + if (header.merged) { + CTERelationDef( + createProject(header.attributes, removeReferences(header.plan, cache)), + underSubquery = true) + } else { + removeReferences(header.plan, cache) + }) + } + val newPlan = removeReferences(planWithReferences, cache) + val subqueryCTEs = cache.filter(_.merged).map(_.plan.asInstanceOf[CTERelationDef]) + if (subqueryCTEs.nonEmpty) { + WithCTE(newPlan, subqueryCTEs.toSeq) + } else { + newPlan + } + } + + // First traversal builds up the cache and inserts `ScalarSubqueryReference`s to the plan. + private def insertReferences(plan: LogicalPlan, cache: ArrayBuffer[Header]): LogicalPlan = { + plan.transformUpWithSubqueries { + case n => n.transformExpressionsUpWithPruning(_.containsAnyPattern(SCALAR_SUBQUERY)) { + case s: ScalarSubquery if !s.isCorrelated && s.deterministic => + val (subqueryIndex, headerIndex) = cacheSubquery(s.plan, cache) + ScalarSubqueryReference(subqueryIndex, headerIndex, s.dataType, s.exprId) + } + } + } + + // Caching returns the index of the subquery in the cache and the index of scalar member in the + // "Header". + private def cacheSubquery(plan: LogicalPlan, cache: ArrayBuffer[Header]): (Int, Int) = { + val output = plan.output.head + cache.zipWithIndex.collectFirst(Function.unlift { case (header, subqueryIndex) => + checkIdenticalPlans(plan, header.plan).map { outputMap => + val mappedOutput = mapAttributes(output, outputMap) + val headerIndex = header.attributes.indexWhere(_.exprId == mappedOutput.exprId) + subqueryIndex -> headerIndex + }.orElse(tryMergePlans(plan, header.plan).map { + case (mergedPlan, outputMap) => + val mappedOutput = mapAttributes(output, outputMap) + var headerIndex = header.attributes.indexWhere(_.exprId == mappedOutput.exprId) + val newHeaderAttributes = if (headerIndex == -1) { + headerIndex = header.attributes.size + header.attributes :+ mappedOutput + } else { + header.attributes + } + cache(subqueryIndex) = Header(newHeaderAttributes, mergedPlan, true) + subqueryIndex -> headerIndex + }) + }).getOrElse { + cache += Header(Seq(output), plan, false) + cache.length - 1 -> 0 + } + } + + // If 2 plans are identical return the attribute mapping from the new to the cached version. + private def checkIdenticalPlans( + newPlan: LogicalPlan, + cachedPlan: LogicalPlan): Option[AttributeMap[Attribute]] = { + if (newPlan.canonicalized == cachedPlan.canonicalized) { + Some(AttributeMap(newPlan.output.zip(cachedPlan.output))) + } else { + None + } + } + + // Recursively traverse down and try merging 2 plans. If merge is possible then return the merged + // plan with the attribute mapping from the new to the merged version. + // Please note that merging arbitrary plans can be complicated, the current version supports only + // some of the most important nodes. + private def tryMergePlans( + newPlan: LogicalPlan, + cachedPlan: LogicalPlan): Option[(LogicalPlan, AttributeMap[Attribute])] = { + checkIdenticalPlans(newPlan, cachedPlan).map(cachedPlan -> _).orElse( + (newPlan, cachedPlan) match { + case (np: Project, cp: Project) => + tryMergePlans(np.child, cp.child).map { case (mergedChild, outputMap) => + val (mergedProjectList, newOutputMap) = + mergeNamedExpressions(np.projectList, outputMap, cp.projectList) + val mergedPlan = Project(mergedProjectList, mergedChild) + mergedPlan -> newOutputMap + } + case (np, cp: Project) => + tryMergePlans(np, cp.child).map { case (mergedChild, outputMap) => + val (mergedProjectList, newOutputMap) = + mergeNamedExpressions(np.output, outputMap, cp.projectList) + val mergedPlan = Project(mergedProjectList, mergedChild) + mergedPlan -> newOutputMap + } + case (np: Project, cp) => + tryMergePlans(np.child, cp).map { case (mergedChild, outputMap) => + val (mergedProjectList, newOutputMap) = + mergeNamedExpressions(np.projectList, outputMap, cp.output) + val mergedPlan = Project(mergedProjectList, mergedChild) + mergedPlan -> newOutputMap + } + case (np: Aggregate, cp: Aggregate) if supportedAggregateMerge(np, cp) => + tryMergePlans(np.child, cp.child).flatMap { case (mergedChild, outputMap) => + val mappedNewGroupingExpression = + np.groupingExpressions.map(mapAttributes(_, outputMap)) + // Order of grouping expression does matter as merging different grouping orders can + // introduce "extra" shuffles/sorts that might not present in all of the original + // subqueries. + if (mappedNewGroupingExpression.map(_.canonicalized) == + cp.groupingExpressions.map(_.canonicalized)) { + val (mergedAggregateExpressions, newOutputMap) = + mergeNamedExpressions(np.aggregateExpressions, outputMap, cp.aggregateExpressions) + val mergedPlan = + Aggregate(cp.groupingExpressions, mergedAggregateExpressions, mergedChild) + Some(mergedPlan -> newOutputMap) + } else { + None + } + } + + case (np: Filter, cp: Filter) => + tryMergePlans(np.child, cp.child).flatMap { case (mergedChild, outputMap) => + val mappedNewCondition = mapAttributes(np.condition, outputMap) + // Comparing the canonicalized form is required to ignore different forms of the same + // expression. + if (mappedNewCondition.canonicalized == cp.condition.canonicalized) { + val mergedPlan = cp.withNewChildren(Seq(mergedChild)) + Some(mergedPlan -> outputMap) + } else { + None + } + } + + case (np: Join, cp: Join) if np.joinType == cp.joinType && np.hint == cp.hint => + tryMergePlans(np.left, cp.left).flatMap { case (mergedLeft, leftOutputMap) => + tryMergePlans(np.right, cp.right).flatMap { case (mergedRight, rightOutputMap) => + val outputMap = leftOutputMap ++ rightOutputMap + val mappedNewCondition = np.condition.map(mapAttributes(_, outputMap)) + // Comparing the canonicalized form is required to ignore different forms of the same + // expression and `AttributeReference.quailifier`s in `cp.condition`. + if (mappedNewCondition.map(_.canonicalized) == cp.condition.map(_.canonicalized)) { + val mergedPlan = cp.withNewChildren(Seq(mergedLeft, mergedRight)) + Some(mergedPlan -> outputMap) + } else { + None + } + } + } + + // Otherwise merging is not possible. + case _ => None + }) + } + + private def createProject(attributes: Seq[Attribute], plan: LogicalPlan): Project = { + Project( + Seq(Alias( + CreateNamedStruct(attributes.flatMap(a => Seq(Literal(a.name), a))), + "mergedValue")()), + plan) + } + + private def mapAttributes[T <: Expression](expr: T, outputMap: AttributeMap[Attribute]) = { + expr.transform { + case a: Attribute => outputMap.getOrElse(a, a) + }.asInstanceOf[T] + } + + // Applies `outputMap` attribute mapping on attributes of `newExpressions` and merges them into + // `cachedExpressions`. Returns the merged expressions and the attribute mapping from the new to + // the merged version that can be propagated up during merging nodes. + private def mergeNamedExpressions( + newExpressions: Seq[NamedExpression], + outputMap: AttributeMap[Attribute], + cachedExpressions: Seq[NamedExpression]) = { + val mergedExpressions = ArrayBuffer[NamedExpression](cachedExpressions: _*) + val newOutputMap = AttributeMap(newExpressions.map { ne => + val mapped = mapAttributes(ne, outputMap) + val withoutAlias = mapped match { + case Alias(child, _) => child + case e => e + } + ne.toAttribute -> mergedExpressions.find { + case Alias(child, _) => child semanticEquals withoutAlias + case e => e semanticEquals withoutAlias + }.getOrElse { + mergedExpressions += mapped + mapped + }.toAttribute + }) + (mergedExpressions.toSeq, newOutputMap) + } + + // Only allow aggregates of the same implementation because merging different implementations + // could cause performance regression. + private def supportedAggregateMerge(newPlan: Aggregate, cachedPlan: Aggregate) = { + val newPlanAggregateExpressions = newPlan.aggregateExpressions.flatMap(_.collect { + case a: AggregateExpression => a + }) + val cachedPlanAggregateExpressions = cachedPlan.aggregateExpressions.flatMap(_.collect { + case a: AggregateExpression => a + }) + val newPlanSupportsHashAggregate = Aggregate.supportsHashAggregate( + newPlanAggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)) + val cachedPlanSupportsHashAggregate = Aggregate.supportsHashAggregate( + cachedPlanAggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)) + newPlanSupportsHashAggregate && cachedPlanSupportsHashAggregate || + newPlanSupportsHashAggregate == cachedPlanSupportsHashAggregate && { + val newPlanSupportsObjectHashAggregate = + Aggregate.supportsObjectHashAggregate(newPlanAggregateExpressions) + val cachedPlanSupportsObjectHashAggregate = + Aggregate.supportsObjectHashAggregate(cachedPlanAggregateExpressions) + newPlanSupportsObjectHashAggregate && cachedPlanSupportsObjectHashAggregate || + newPlanSupportsObjectHashAggregate == cachedPlanSupportsObjectHashAggregate + } + } + + // Second traversal replaces `ScalarSubqueryReference`s to either + // `GetStructField(ScalarSubquery(CTERelationRef to the merged plan)` if the plan is merged from + // multiple subqueries or `ScalarSubquery(original plan)` if it isn't. + private def removeReferences( + plan: LogicalPlan, + cache: ArrayBuffer[Header]) = { + plan.transformUpWithSubqueries { + case n => + n.transformExpressionsWithPruning(_.containsAnyPattern(SCALAR_SUBQUERY_REFERENCE)) { + case ssr: ScalarSubqueryReference => + val header = cache(ssr.subqueryIndex) + if (header.merged) { + val subqueryCTE = header.plan.asInstanceOf[CTERelationDef] + GetStructField( + ScalarSubquery( + CTERelationRef(subqueryCTE.id, _resolved = true, subqueryCTE.output), + exprId = ssr.exprId), + ssr.headerIndex) + } else { + ScalarSubquery(header.plan, exprId = ssr.exprId) + } + } + } + } +} + +/** + * Temporal reference to a subquery. + */ +case class ScalarSubqueryReference( + subqueryIndex: Int, + headerIndex: Int, + dataType: DataType, + exprId: ExprId) extends LeafExpression with Unevaluable { + override def nullable: Boolean = true + + final override val nodePatterns: Seq[TreePattern] = Seq(SCALAR_SUBQUERY_REFERENCE) + + override def stringArgs: Iterator[Any] = Iterator(subqueryIndex, headerIndex, dataType, exprId.id) +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala index ab9f20edb0bb9..2195eef2fc93b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala @@ -121,7 +121,7 @@ object PushdownPredicatesAndPruneColumnsForCTEDef extends Rule[LogicalPlan] { private def pushdownPredicatesAndAttributes( plan: LogicalPlan, cteMap: CTEMap): LogicalPlan = plan.transformWithSubqueries { - case cteDef @ CTERelationDef(child, id, originalPlanWithPredicates) => + case cteDef @ CTERelationDef(child, id, originalPlanWithPredicates, _) => val (_, _, newPreds, newAttrSet) = cteMap(id) val originalPlan = originalPlanWithPredicates.map(_._1).getOrElse(child) val preds = originalPlanWithPredicates.map(_._2).getOrElse(Seq.empty) @@ -169,7 +169,7 @@ object PushdownPredicatesAndPruneColumnsForCTEDef extends Rule[LogicalPlan] { object CleanUpTempCTEInfo extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(_.containsPattern(CTE)) { - case cteDef @ CTERelationDef(_, _, Some(_)) => + case cteDef @ CTERelationDef(_, _, Some(_), _) => cteDef.copy(originalPlanWithPredicates = None) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceCTERefWithRepartition.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceCTERefWithRepartition.scala index e0d0417ce5161..0190fa2a2ab09 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceCTERefWithRepartition.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceCTERefWithRepartition.scala @@ -47,13 +47,14 @@ object ReplaceCTERefWithRepartition extends Rule[LogicalPlan] { case WithCTE(child, cteDefs) => cteDefs.foreach { cteDef => val inlined = replaceWithRepartition(cteDef.child, cteMap) - val withRepartition = if (inlined.isInstanceOf[RepartitionOperation]) { - // If the CTE definition plan itself is a repartition operation, we do not need to add an - // extra repartition shuffle. - inlined - } else { - Repartition(conf.numShufflePartitions, shuffle = true, inlined) - } + val withRepartition = + if (inlined.isInstanceOf[RepartitionOperation] || cteDef.underSubquery) { + // If the CTE definition plan itself is a repartition operation or if it hosts a merged + // scalar subquery, we do not need to add an extra repartition shuffle. + inlined + } else { + Repartition(conf.numShufflePartitions, shuffle = true, inlined) + } cteMap.put(cteDef.id, withRepartition) } replaceWithRepartition(child, cteMap) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index e5eab691d14fd..692601be75d10 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.analysis.{AnsiTypeCoercion, MultiInstanceRe import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable} import org.apache.spark.sql.catalyst.catalog.CatalogTable.VIEW_STORING_ANALYZED_PLAN import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression +import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, TypedImperativeAggregate} import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning, SinglePartition} import org.apache.spark.sql.catalyst.trees.TreeNodeTag @@ -663,11 +663,14 @@ case class UnresolvedWith( * predicates that have been pushed down into `child`. This is * a temporary field used by optimization rules for CTE predicate * pushdown to help ensure rule idempotency. + * @param underSubquery If true, it means we don't need to add a shuffle for this CTE relation as + * subquery reuse will be applied to reuse CTE relation output. */ case class CTERelationDef( child: LogicalPlan, id: Long = CTERelationDef.newId, - originalPlanWithPredicates: Option[(LogicalPlan, Seq[Expression])] = None) extends UnaryNode { + originalPlanWithPredicates: Option[(LogicalPlan, Seq[Expression])] = None, + underSubquery: Boolean = false) extends UnaryNode { final override val nodePatterns: Seq[TreePattern] = Seq(CTE) @@ -678,17 +681,19 @@ case class CTERelationDef( } object CTERelationDef { - private val curId = new java.util.concurrent.atomic.AtomicLong() + private[sql] val curId = new java.util.concurrent.atomic.AtomicLong() def newId: Long = curId.getAndIncrement() } /** * Represents the relation of a CTE reference. - * @param cteId The ID of the corresponding CTE definition. - * @param _resolved Whether this reference is resolved. - * @param output The output attributes of this CTE reference, which can be different from - * the output of its corresponding CTE definition after attribute de-duplication. - * @param statsOpt The optional statistics inferred from the corresponding CTE definition. + * @param cteId The ID of the corresponding CTE definition. + * @param _resolved Whether this reference is resolved. + * @param output The output attributes of this CTE reference, which can be different + * from the output of its corresponding CTE definition after attribute + * de-duplication. + * @param statsOpt The optional statistics inferred from the corresponding CTE + * definition. */ case class CTERelationRef( cteId: Long, @@ -1014,6 +1019,24 @@ case class Aggregate( } } +object Aggregate { + def isAggregateBufferMutable(schema: StructType): Boolean = { + schema.forall(f => UnsafeRow.isMutable(f.dataType)) + } + + def supportsHashAggregate(aggregateBufferAttributes: Seq[Attribute]): Boolean = { + val aggregationBufferSchema = StructType.fromAttributes(aggregateBufferAttributes) + isAggregateBufferMutable(aggregationBufferSchema) + } + + def supportsObjectHashAggregate(aggregateExpressions: Seq[AggregateExpression]): Boolean = { + aggregateExpressions.map(_.aggregateFunction).exists { + case _: TypedImperativeAggregate[_] => true + case _ => false + } + } +} + case class Window( windowExpressions: Seq[NamedExpression], partitionSpec: Seq[Expression], diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala index 3cf45d5f79f00..93273b5a2c7a7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala @@ -77,6 +77,7 @@ object TreePattern extends Enumeration { val REGEXP_REPLACE: Value = Value val RUNTIME_REPLACEABLE: Value = Value val SCALAR_SUBQUERY: Value = Value + val SCALAR_SUBQUERY_REFERENCE: Value = Value val SCALA_UDF: Value = Value val SORT: Value = Value val SUBQUERY_ALIAS: Value = Value diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueriesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueriesSuite.scala new file mode 100644 index 0000000000000..8af0e02855b12 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueriesSuite.scala @@ -0,0 +1,578 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.expressions.{Attribute, CreateNamedStruct, GetStructField, Literal, ScalarSubquery} +import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectList, CollectSet} +import org.apache.spark.sql.catalyst.plans._ +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules._ + +class MergeScalarSubqueriesSuite extends PlanTest { + + override def beforeEach(): Unit = { + CTERelationDef.curId.set(0) + } + + private object Optimize extends RuleExecutor[LogicalPlan] { + val batches = Batch("MergeScalarSubqueries", Once, MergeScalarSubqueries) :: Nil + } + + val testRelation = LocalRelation('a.int, 'b.int, 'c.string) + + private def definitionNode(plan: LogicalPlan, cteIndex: Int) = { + CTERelationDef(plan, cteIndex, underSubquery = true) + } + + private def extractorExpression(cteIndex: Int, output: Seq[Attribute], fieldIndex: Int) = { + GetStructField(ScalarSubquery(CTERelationRef(cteIndex, _resolved = true, output)), fieldIndex) + .as("scalarsubquery()") + } + + test("Merging subqueries with projects") { + val subquery1 = ScalarSubquery(testRelation.select(('a + 1).as("a_plus1"))) + val subquery2 = ScalarSubquery(testRelation.select(('a + 2).as("a_plus2"))) + val subquery3 = ScalarSubquery(testRelation.select('b)) + val subquery4 = ScalarSubquery(testRelation.select(('a + 1).as("a_plus1_2"))) + val subquery5 = ScalarSubquery(testRelation.select(('a + 2).as("a_plus2_2"))) + val subquery6 = ScalarSubquery(testRelation.select('b.as("b_2"))) + val originalQuery = testRelation + .select( + subquery1, + subquery2, + subquery3, + subquery4, + subquery5, + subquery6) + + val mergedSubquery = testRelation + .select( + ('a + 1).as("a_plus1"), + ('a + 2).as("a_plus2"), + 'b) + .select( + CreateNamedStruct(Seq( + Literal("a_plus1"), 'a_plus1, + Literal("a_plus2"), 'a_plus2, + Literal("b"), 'b + )).as("mergedValue")) + val analyzedMergedSubquery = mergedSubquery.analyze + val correctAnswer = WithCTE( + testRelation + .select( + extractorExpression(0, analyzedMergedSubquery.output, 0), + extractorExpression(0, analyzedMergedSubquery.output, 1), + extractorExpression(0, analyzedMergedSubquery.output, 2), + extractorExpression(0, analyzedMergedSubquery.output, 0), + extractorExpression(0, analyzedMergedSubquery.output, 1), + extractorExpression(0, analyzedMergedSubquery.output, 2)), + Seq(definitionNode(analyzedMergedSubquery, 0))) + + comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze) + } + + test("Merging subqueries with aggregates") { + val subquery1 = ScalarSubquery(testRelation.groupBy('b)(max('a).as("max_a"))) + val subquery2 = ScalarSubquery(testRelation.groupBy('b)(sum('a).as("sum_a"))) + val subquery3 = ScalarSubquery(testRelation.groupBy('b)('b)) + val subquery4 = ScalarSubquery(testRelation.groupBy('b)(max('a).as("max_a_2"))) + val subquery5 = ScalarSubquery(testRelation.groupBy('b)(sum('a).as("sum_a_2"))) + val subquery6 = ScalarSubquery(testRelation.groupBy('b)('b.as("b_2"))) + val originalQuery = testRelation + .select( + subquery1, + subquery2, + subquery3, + subquery4, + subquery5, + subquery6) + + val mergedSubquery = testRelation + .groupBy('b)( + max('a).as("max_a"), + sum('a).as("sum_a"), + 'b) + .select(CreateNamedStruct(Seq( + Literal("max_a"), 'max_a, + Literal("sum_a"), 'sum_a, + Literal("b"), 'b + )).as("mergedValue")) + val analyzedMergedSubquery = mergedSubquery.analyze + val correctAnswer = WithCTE( + testRelation + .select( + extractorExpression(0, analyzedMergedSubquery.output, 0), + extractorExpression(0, analyzedMergedSubquery.output, 1), + extractorExpression(0, analyzedMergedSubquery.output, 2), + extractorExpression(0, analyzedMergedSubquery.output, 0), + extractorExpression(0, analyzedMergedSubquery.output, 1), + extractorExpression(0, analyzedMergedSubquery.output, 2)), + Seq(definitionNode(analyzedMergedSubquery, 0))) + + comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze) + } + + test("Merging subqueries with aggregates with complex grouping expressions") { + val subquery1 = ScalarSubquery(testRelation.groupBy('b > 1 && 'a === 2)(max('a).as("max_a"))) + val subquery2 = ScalarSubquery( + testRelation + .select('a, 'b.as("b_2")) + .groupBy(Literal(2) === 'a && Literal(1) < 'b_2)(sum('a).as("sum_a"))) + + val originalQuery = testRelation + .select( + subquery1, + subquery2) + + val mergedSubquery = testRelation + .select('a, 'b, 'c) + .groupBy('b > 1 && 'a === 2)( + max('a).as("max_a"), + sum('a).as("sum_a")) + .select(CreateNamedStruct(Seq( + Literal("max_a"), 'max_a, + Literal("sum_a"), 'sum_a + )).as("mergedValue")) + val analyzedMergedSubquery = mergedSubquery.analyze + val correctAnswer = WithCTE( + testRelation + .select( + extractorExpression(0, analyzedMergedSubquery.output, 0), + extractorExpression(0, analyzedMergedSubquery.output, 1)), + Seq(definitionNode(analyzedMergedSubquery, 0))) + + comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze) + } + + test("Merging subqueries with aggregates with multiple grouping expressions") { + // supports HashAggregate + val subquery1 = ScalarSubquery(testRelation.groupBy('b, 'c)(max('a).as("max_a"))) + val subquery2 = ScalarSubquery(testRelation.groupBy('b, 'c)(min('a).as("min_a"))) + + val originalQuery = testRelation + .select( + subquery1, + subquery2) + + val hashAggregates = testRelation + .groupBy('b, 'c)( + max('a).as("max_a"), + min('a).as("min_a")) + .select(CreateNamedStruct(Seq( + Literal("max_a"), 'max_a, + Literal("min_a"), 'min_a + )).as("mergedValue")) + val analyzedHashAggregates = hashAggregates.analyze + val correctAnswer = WithCTE( + testRelation + .select( + extractorExpression(0, analyzedHashAggregates.output, 0), + extractorExpression(0, analyzedHashAggregates.output, 1)), + Seq(definitionNode(analyzedHashAggregates, 0))) + + comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze) + } + + test("Merging subqueries with filters") { + val subquery1 = ScalarSubquery(testRelation.where('a > 1).select('a)) + // Despite having an extra Project node, `subquery2` is mergeable with `subquery1` + val subquery2 = ScalarSubquery(testRelation.where('a > 1).select('b.as("b_1")).select('b_1)) + // Despite lacking a Project node, `subquery3` is mergeable with the result of merging + // `subquery1` and `subquery2` + val subquery3 = ScalarSubquery(testRelation.select('a.as("a_2")).where('a_2 > 1).select('a_2)) + val subquery4 = ScalarSubquery( + testRelation.select('a.as("a_2"), 'b).where('a_2 > 1).select('b.as("b_2"))) + val originalQuery = testRelation + .select( + subquery1, + subquery2, + subquery3, + subquery4) + + val mergedSubquery = testRelation + .select('a, 'b, 'c) + .where('a > 1) + .select('a, 'b, 'c) + .select('a, 'b) + .select(CreateNamedStruct(Seq( + Literal("a"), 'a, + Literal("b"), 'b + )).as("mergedValue")) + val analyzedMergedSubquery = mergedSubquery.analyze + val correctAnswer = WithCTE( + testRelation + .select( + extractorExpression(0, analyzedMergedSubquery.output, 0), + extractorExpression(0, analyzedMergedSubquery.output, 1), + extractorExpression(0, analyzedMergedSubquery.output, 0), + extractorExpression(0, analyzedMergedSubquery.output, 1)), + Seq(definitionNode(analyzedMergedSubquery, 0))) + + comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze) + } + + test("Merging subqueries with complex filter conditions") { + val subquery1 = ScalarSubquery(testRelation.where('a > 1 && 'b === 2).select('a)) + val subquery2 = ScalarSubquery( + testRelation + .select('a.as("a_2"), 'b) + .where(Literal(2) === 'b && Literal(1) < 'a_2) + .select('b.as("b_2"))) + val originalQuery = testRelation + .select( + subquery1, + subquery2) + + val mergedSubquery = testRelation + .select('a, 'b, 'c) + .where('a > 1 && 'b === 2) + .select('a, 'b.as("b_2")) + .select(CreateNamedStruct(Seq( + Literal("a"), 'a, + Literal("b_2"), 'b_2 + )).as("mergedValue")) + val analyzedMergedSubquery = mergedSubquery.analyze + val correctAnswer = WithCTE( + testRelation + .select( + extractorExpression(0, analyzedMergedSubquery.output, 0), + extractorExpression(0, analyzedMergedSubquery.output, 1)), + Seq(definitionNode(analyzedMergedSubquery, 0))) + + comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze) + } + + test("Do not merge subqueries with different filter conditions") { + val subquery1 = ScalarSubquery(testRelation.where('a > 1).select('a)) + val subquery2 = ScalarSubquery(testRelation.where('a < 1).select('a)) + + val originalQuery = testRelation + .select( + subquery1, + subquery2) + + comparePlans(Optimize.execute(originalQuery.analyze), originalQuery.analyze) + } + + test("Merging subqueries with aggregate filters") { + val subquery1 = ScalarSubquery( + testRelation.having('b)(max('a).as("max_a"))(max('a) > 1)) + val subquery2 = ScalarSubquery( + testRelation.having('b)(sum('a).as("sum_a"))(max('a) > 1)) + val originalQuery = testRelation.select( + subquery1, + subquery2) + + val mergedSubquery = testRelation + .having('b)( + max('a).as("max_a"), + sum('a).as("sum_a"))('max_a > 1) + .select( + 'max_a, + 'sum_a) + .select(CreateNamedStruct(Seq( + Literal("max_a"), 'max_a, + Literal("sum_a"), 'sum_a + )).as("mergedValue")) + val analyzedMergedSubquery = mergedSubquery.analyze + val correctAnswer = WithCTE( + testRelation + .select( + extractorExpression(0, analyzedMergedSubquery.output, 0), + extractorExpression(0, analyzedMergedSubquery.output, 1)), + Seq(definitionNode(analyzedMergedSubquery, 0))) + + comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze) + } + + test("Merging subqueries with joins") { + val subquery1 = ScalarSubquery(testRelation.as("t1") + .join( + testRelation.as("t2"), + Inner, + Some($"t1.b" === $"t2.b")) + .select($"t1.a").analyze) + val subquery2 = ScalarSubquery(testRelation.as("t1") + .select('a.as("a_1"), 'b.as("b_1"), 'c.as("c_1")) + .join( + testRelation.as("t2").select('a.as("a_2"), 'b.as("b_2"), 'c.as("c_2")), + Inner, + Some('b_1 === 'b_2)) + .select('c_2).analyze) + val originalQuery = testRelation.select( + subquery1, + subquery2) + + val mergedSubquery = testRelation.as("t1") + .select('a, 'b, 'c) + .join( + testRelation.as("t2").select('a, 'b, 'c), + Inner, + Some($"t1.b" === $"t2.b")) + .select($"t1.a", $"t2.c") + .select(CreateNamedStruct(Seq( + Literal("a"), 'a, + Literal("c"), 'c + )).as("mergedValue")) + val analyzedMergedSubquery = mergedSubquery.analyze + val correctAnswer = WithCTE( + testRelation + .select( + extractorExpression(0, analyzedMergedSubquery.output, 0), + extractorExpression(0, analyzedMergedSubquery.output, 1)), + Seq(definitionNode(analyzedMergedSubquery, 0))) + + comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze) + } + + test("Merge subqueries with complex join conditions") { + val subquery1 = ScalarSubquery(testRelation.as("t1") + .join( + testRelation.as("t2"), + Inner, + Some($"t1.b" < $"t2.b" && $"t1.a" === $"t2.c")) + .select($"t1.a").analyze) + val subquery2 = ScalarSubquery(testRelation.as("t1") + .select('a.as("a_1"), 'b.as("b_1"), 'c.as("c_1")) + .join( + testRelation.as("t2").select('a.as("a_2"), 'b.as("b_2"), 'c.as("c_2")), + Inner, + Some('c_2 === 'a_1 && 'b_1 < 'b_2)) + .select('c_2).analyze) + val originalQuery = testRelation.select( + subquery1, + subquery2) + + val mergedSubquery = testRelation.as("t1") + .select('a, 'b, 'c) + .join( + testRelation.as("t2").select('a, 'b, 'c), + Inner, + Some($"t1.b" < $"t2.b" && $"t1.a" === $"t2.c")) + .select($"t1.a", $"t2.c") + .select(CreateNamedStruct(Seq( + Literal("a"), 'a, + Literal("c"), 'c + )).as("mergedValue")) + val analyzedMergedSubquery = mergedSubquery.analyze + val correctAnswer = WithCTE( + testRelation + .select( + extractorExpression(0, analyzedMergedSubquery.output, 0), + extractorExpression(0, analyzedMergedSubquery.output, 1)), + Seq(definitionNode(analyzedMergedSubquery, 0))) + + comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze) + } + + test("Do not merge subqueries with different join types") { + val subquery1 = ScalarSubquery(testRelation.as("t1") + .join( + testRelation.as("t2"), + Inner, + Some($"t1.b" === $"t2.b")) + .select($"t1.a")) + val subquery2 = ScalarSubquery(testRelation.as("t1") + .join( + testRelation.as("t2"), + LeftOuter, + Some($"t1.b" === $"t2.b")) + .select($"t1.a")) + val originalQuery = testRelation.select( + subquery1, + subquery2) + + comparePlans(Optimize.execute(originalQuery.analyze), originalQuery.analyze) + } + + test("Do not merge subqueries with different join conditions") { + val subquery1 = ScalarSubquery(testRelation.as("t1") + .join( + testRelation.as("t2"), + Inner, + Some($"t1.b" < $"t2.b")) + .select($"t1.a")) + val subquery2 = ScalarSubquery(testRelation.as("t1") + .join( + testRelation.as("t2"), + Inner, + Some($"t1.b" > $"t2.b")) + .select($"t1.a")) + val originalQuery = testRelation.select( + subquery1, + subquery2) + + comparePlans(Optimize.execute(originalQuery.analyze), originalQuery.analyze) + } + + test("Do not merge subqueries with nondeterministic elements") { + val subquery1 = ScalarSubquery(testRelation.select(('a + rand(0)).as("rand_a"))) + val subquery2 = ScalarSubquery(testRelation.select(('b + rand(0)).as("rand_b"))) + val originalQuery = testRelation + .select( + subquery1, + subquery2) + + comparePlans(Optimize.execute(originalQuery.analyze), originalQuery.analyze) + + val subquery3 = ScalarSubquery(testRelation.where('a > rand(0)).select('a)) + val subquery4 = ScalarSubquery(testRelation.where('a > rand(0)).select('b)) + val originalQuery2 = testRelation + .select( + subquery3, + subquery4) + + comparePlans(Optimize.execute(originalQuery2.analyze), originalQuery2.analyze) + + val subquery5 = ScalarSubquery(testRelation.groupBy()((max('a) + rand(0)).as("max_a"))) + val subquery6 = ScalarSubquery(testRelation.groupBy()((max('b) + rand(0)).as("max_b"))) + val originalQuery3 = testRelation + .select( + subquery5, + subquery6) + + comparePlans(Optimize.execute(originalQuery3.analyze), originalQuery3.analyze) + } + + test("Do not merge different aggregate implementations") { + // supports HashAggregate + val subquery1 = ScalarSubquery(testRelation.groupBy('b)(max('a).as("max_a"))) + val subquery2 = ScalarSubquery(testRelation.groupBy('b)(min('a).as("min_a"))) + + // supports ObjectHashAggregate + val subquery3 = ScalarSubquery(testRelation + .groupBy('b)(CollectList('a).toAggregateExpression(isDistinct = false).as("collectlist_a"))) + val subquery4 = ScalarSubquery(testRelation + .groupBy('b)(CollectSet('a).toAggregateExpression(isDistinct = false).as("collectset_a"))) + + // supports SortAggregate + val subquery5 = ScalarSubquery(testRelation.groupBy('b)(max('c).as("max_c"))) + val subquery6 = ScalarSubquery(testRelation.groupBy('b)(min('c).as("min_c"))) + + val originalQuery = testRelation + .select( + subquery1, + subquery2, + subquery3, + subquery4, + subquery5, + subquery6) + + val hashAggregates = testRelation + .groupBy('b)( + max('a).as("max_a"), + min('a).as("min_a")) + .select(CreateNamedStruct(Seq( + Literal("max_a"), 'max_a, + Literal("min_a"), 'min_a + )).as("mergedValue")) + val analyzedHashAggregates = hashAggregates.analyze + val objectHashAggregates = testRelation + .groupBy('b)( + CollectList('a).toAggregateExpression(isDistinct = false).as("collectlist_a"), + CollectSet('a).toAggregateExpression(isDistinct = false).as("collectset_a")) + .select(CreateNamedStruct(Seq( + Literal("collectlist_a"), 'collectlist_a, + Literal("collectset_a"), 'collectset_a + )).as("mergedValue")) + val analyzedObjectHashAggregates = objectHashAggregates.analyze + val sortAggregates = testRelation + .groupBy('b)( + max('c).as("max_c"), + min('c).as("min_c")) + .select(CreateNamedStruct(Seq( + Literal("max_c"), 'max_c, + Literal("min_c"), 'min_c + )).as("mergedValue")) + val analyzedSortAggregates = sortAggregates.analyze + val correctAnswer = WithCTE( + testRelation + .select( + extractorExpression(0, analyzedHashAggregates.output, 0), + extractorExpression(0, analyzedHashAggregates.output, 1), + extractorExpression(1, analyzedObjectHashAggregates.output, 0), + extractorExpression(1, analyzedObjectHashAggregates.output, 1), + extractorExpression(2, analyzedSortAggregates.output, 0), + extractorExpression(2, analyzedSortAggregates.output, 1)), + Seq( + definitionNode(analyzedHashAggregates, 0), + definitionNode(analyzedObjectHashAggregates, 1), + definitionNode(analyzedSortAggregates, 2))) + + comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze) + } + + test("Do not merge subqueries with different aggregate grouping orders") { + // supports HashAggregate + val subquery1 = ScalarSubquery(testRelation.groupBy('b, 'c)(max('a).as("max_a"))) + val subquery2 = ScalarSubquery(testRelation.groupBy('c, 'b)(min('a).as("min_a"))) + + val originalQuery = testRelation + .select( + subquery1, + subquery2) + + comparePlans(Optimize.execute(originalQuery.analyze), originalQuery.analyze) + } + + test("Merging subqueries from different places") { + val subquery1 = ScalarSubquery(testRelation.select(('a + 1).as("a_plus1"))) + val subquery2 = ScalarSubquery(testRelation.select(('a + 2).as("a_plus2"))) + val subquery3 = ScalarSubquery(testRelation.select('b)) + val subquery4 = ScalarSubquery(testRelation.select(('a + 1).as("a_plus1_2"))) + val subquery5 = ScalarSubquery(testRelation.select(('a + 2).as("a_plus2_2"))) + val subquery6 = ScalarSubquery(testRelation.select('b.as("b_2"))) + val originalQuery = testRelation + .select( + subquery1, + subquery2, + subquery3) + .where( + subquery4 + + subquery5 + + subquery6 === 0) + + val mergedSubquery = testRelation + .select( + ('a + 1).as("a_plus1"), + ('a + 2).as("a_plus2"), + 'b) + .select( + CreateNamedStruct(Seq( + Literal("a_plus1"), 'a_plus1, + Literal("a_plus2"), 'a_plus2, + Literal("b"), 'b + )).as("mergedValue")) + val analyzedMergedSubquery = mergedSubquery.analyze + val correctAnswer = WithCTE( + testRelation + .select( + extractorExpression(0, analyzedMergedSubquery.output, 0), + extractorExpression(0, analyzedMergedSubquery.output, 1), + extractorExpression(0, analyzedMergedSubquery.output, 2)) + .where( + extractorExpression(0, analyzedMergedSubquery.output, 0) + + extractorExpression(0, analyzedMergedSubquery.output, 1) + + extractorExpression(0, analyzedMergedSubquery.output, 2) === 0), + Seq(definitionNode(analyzedMergedSubquery, 0))) + + comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze) + } +} diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java index 31e10af38a42b..8587d9290078b 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.catalyst.expressions.UnsafeProjection; import org.apache.spark.sql.catalyst.expressions.UnsafeRow; -import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.catalyst.plans.logical.Aggregate$; import org.apache.spark.sql.types.StructType; import org.apache.spark.unsafe.KVIterator; import org.apache.spark.unsafe.Platform; @@ -68,12 +68,7 @@ public final class UnsafeFixedWidthAggregationMap { * schema, false otherwise. */ public static boolean supportsAggregationBufferSchema(StructType schema) { - for (StructField field: schema.fields()) { - if (!UnsafeRow.isMutable(field.dataType())) { - return false; - } - } - return true; + return Aggregate$.MODULE$.isAggregateBufferMutable(schema); } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala index d9457a20d91c9..84e5975189b8f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala @@ -54,6 +54,8 @@ class SparkOptimizer( Batch("InjectRuntimeFilter", FixedPoint(1), InjectRuntimeFilter, RewritePredicateSubquery) :+ + Batch("MergeScalarSubqueries", Once, + MergeScalarSubqueries) :+ Batch("Pushdown Filters from PartitionPruning", fixedPoint, PushDownPredicates) :+ Batch("Cleanup filters that cannot be pushed down", Once, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala index 26161acae30b2..3e5846bcdfd77 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.aggregate import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate._ +import org.apache.spark.sql.catalyst.plans.logical.Aggregate import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.streaming._ import org.apache.spark.sql.internal.SQLConf @@ -73,7 +74,7 @@ object AggUtils { initialInputBufferOffset: Int = 0, resultExpressions: Seq[NamedExpression] = Nil, child: SparkPlan): SparkPlan = { - val useHash = HashAggregateExec.supportsAggregate( + val useHash = Aggregate.supportsHashAggregate( aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)) val forceSortAggregate = forceApplySortAggregate(child.conf) @@ -90,7 +91,7 @@ object AggUtils { child = child) } else { val objectHashEnabled = child.conf.useObjectHashAggregation - val useObjectHash = ObjectHashAggregateExec.supportsAggregate(aggregateExpressions) + val useObjectHash = Aggregate.supportsObjectHashAggregate(aggregateExpressions) if (objectHashEnabled && useObjectHash && !forceSortAggregate) { ObjectHashAggregateExec( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala index 8be3a018cee58..6c83ba5546d2a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala @@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ +import org.apache.spark.sql.catalyst.plans.logical.Aggregate import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_MILLIS import org.apache.spark.sql.catalyst.util.truncatedString import org.apache.spark.sql.execution._ @@ -55,7 +56,7 @@ case class HashAggregateExec( child: SparkPlan) extends AggregateCodegenSupport { - require(HashAggregateExec.supportsAggregate(aggregateBufferAttributes)) + require(Aggregate.supportsHashAggregate(aggregateBufferAttributes)) override lazy val allAttributes: AttributeSeq = child.output ++ aggregateBufferAttributes ++ aggregateAttributes ++ @@ -885,10 +886,3 @@ case class HashAggregateExec( override protected def withNewChildInternal(newChild: SparkPlan): HashAggregateExec = copy(child = newChild) } - -object HashAggregateExec { - def supportsAggregate(aggregateBufferAttributes: Seq[Attribute]): Boolean = { - val aggregationBufferSchema = StructType.fromAttributes(aggregateBufferAttributes) - UnsafeFixedWidthAggregationMap.supportsAggregationBufferSchema(aggregationBufferSchema) - } -} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala index 9da0ca93c1819..e6530e94701f9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala @@ -143,12 +143,3 @@ case class ObjectHashAggregateExec( override protected def withNewChildInternal(newChild: SparkPlan): ObjectHashAggregateExec = copy(child = newChild) } - -object ObjectHashAggregateExec { - def supportsAggregate(aggregateExpressions: Seq[AggregateExpression]): Boolean = { - aggregateExpressions.map(_.aggregateFunction).exists { - case _: TypedImperativeAggregate[_] => true - case _ => false - } - } -} diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/explain.txt index 8736c9861a5ce..8bf63794f25e4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/explain.txt @@ -20,699 +20,284 @@ Input [1]: [r_reason_sk#1] Condition : (isnotnull(r_reason_sk#1) AND (r_reason_sk#1 = 1)) (4) Project [codegen id : 1] -Output [5]: [CASE WHEN (Subquery scalar-subquery#2, [id=#3] > 62316685) THEN Subquery scalar-subquery#4, [id=#5] ELSE Subquery scalar-subquery#6, [id=#7] END AS bucket1#8, CASE WHEN (Subquery scalar-subquery#9, [id=#10] > 19045798) THEN Subquery scalar-subquery#11, [id=#12] ELSE Subquery scalar-subquery#13, [id=#14] END AS bucket2#15, CASE WHEN (Subquery scalar-subquery#16, [id=#17] > 365541424) THEN Subquery scalar-subquery#18, [id=#19] ELSE Subquery scalar-subquery#20, [id=#21] END AS bucket3#22, CASE WHEN (Subquery scalar-subquery#23, [id=#24] > 216357808) THEN Subquery scalar-subquery#25, [id=#26] ELSE Subquery scalar-subquery#27, [id=#28] END AS bucket4#29, CASE WHEN (Subquery scalar-subquery#30, [id=#31] > 184483884) THEN Subquery scalar-subquery#32, [id=#33] ELSE Subquery scalar-subquery#34, [id=#35] END AS bucket5#36] +Output [5]: [CASE WHEN (Subquery scalar-subquery#2, [id=#3].count(1) > 62316685) THEN ReusedSubquery Subquery scalar-subquery#2, [id=#3].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#2, [id=#3].avg(ss_net_paid) END AS bucket1#4, CASE WHEN (Subquery scalar-subquery#5, [id=#6].count(1) > 19045798) THEN ReusedSubquery Subquery scalar-subquery#5, [id=#6].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#5, [id=#6].avg(ss_net_paid) END AS bucket2#7, CASE WHEN (Subquery scalar-subquery#8, [id=#9].count(1) > 365541424) THEN ReusedSubquery Subquery scalar-subquery#8, [id=#9].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#8, [id=#9].avg(ss_net_paid) END AS bucket3#10, CASE WHEN (Subquery scalar-subquery#11, [id=#12].count(1) > 216357808) THEN ReusedSubquery Subquery scalar-subquery#11, [id=#12].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#11, [id=#12].avg(ss_net_paid) END AS bucket4#13, CASE WHEN (Subquery scalar-subquery#14, [id=#15].count(1) > 184483884) THEN ReusedSubquery Subquery scalar-subquery#14, [id=#15].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#14, [id=#15].avg(ss_net_paid) END AS bucket5#16] Input [1]: [r_reason_sk#1] ===== Subqueries ===== Subquery:1 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#2, [id=#3] -* HashAggregate (11) -+- Exchange (10) - +- * HashAggregate (9) - +- * Project (8) - +- * Filter (7) - +- * ColumnarToRow (6) - +- Scan parquet default.store_sales (5) +* Project (12) ++- * HashAggregate (11) + +- Exchange (10) + +- * HashAggregate (9) + +- * Project (8) + +- * Filter (7) + +- * ColumnarToRow (6) + +- Scan parquet default.store_sales (5) (5) Scan parquet default.store_sales -Output [2]: [ss_quantity#37, ss_sold_date_sk#38] +Output [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] -ReadSchema: struct +ReadSchema: struct (6) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#37, ss_sold_date_sk#38] +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] (7) Filter [codegen id : 1] -Input [2]: [ss_quantity#37, ss_sold_date_sk#38] -Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 1)) AND (ss_quantity#37 <= 20)) +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] +Condition : ((isnotnull(ss_quantity#17) AND (ss_quantity#17 >= 1)) AND (ss_quantity#17 <= 20)) (8) Project [codegen id : 1] -Output: [] -Input [2]: [ss_quantity#37, ss_sold_date_sk#38] +Output [2]: [ss_ext_discount_amt#18, ss_net_paid#19] +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] (9) HashAggregate [codegen id : 1] -Input: [] +Input [2]: [ss_ext_discount_amt#18, ss_net_paid#19] Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#39] -Results [1]: [count#40] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#18)), partial_avg(UnscaledValue(ss_net_paid#19))] +Aggregate Attributes [5]: [count#21, sum#22, count#23, sum#24, count#25] +Results [5]: [count#26, sum#27, count#28, sum#29, count#30] (10) Exchange -Input [1]: [count#40] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#41] +Input [5]: [count#26, sum#27, count#28, sum#29, count#30] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#31] (11) HashAggregate [codegen id : 2] -Input [1]: [count#40] +Input [5]: [count#26, sum#27, count#28, sum#29, count#30] Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#42] -Results [1]: [count(1)#42 AS count(1)#43] - -Subquery:2 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#4, [id=#5] -* HashAggregate (18) -+- Exchange (17) - +- * HashAggregate (16) - +- * Project (15) - +- * Filter (14) - +- * ColumnarToRow (13) - +- Scan parquet default.store_sales (12) - - -(12) Scan parquet default.store_sales -Output [3]: [ss_quantity#44, ss_ext_discount_amt#45, ss_sold_date_sk#46] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] -ReadSchema: struct - -(13) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#44, ss_ext_discount_amt#45, ss_sold_date_sk#46] - -(14) Filter [codegen id : 1] -Input [3]: [ss_quantity#44, ss_ext_discount_amt#45, ss_sold_date_sk#46] -Condition : ((isnotnull(ss_quantity#44) AND (ss_quantity#44 >= 1)) AND (ss_quantity#44 <= 20)) - -(15) Project [codegen id : 1] -Output [1]: [ss_ext_discount_amt#45] -Input [3]: [ss_quantity#44, ss_ext_discount_amt#45, ss_sold_date_sk#46] - -(16) HashAggregate [codegen id : 1] -Input [1]: [ss_ext_discount_amt#45] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#45))] -Aggregate Attributes [2]: [sum#47, count#48] -Results [2]: [sum#49, count#50] - -(17) Exchange -Input [2]: [sum#49, count#50] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#51] - -(18) HashAggregate [codegen id : 2] -Input [2]: [sum#49, count#50] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#45))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#45))#52] -Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#45))#52 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#53] - -Subquery:3 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#6, [id=#7] -* HashAggregate (25) -+- Exchange (24) - +- * HashAggregate (23) - +- * Project (22) - +- * Filter (21) - +- * ColumnarToRow (20) - +- Scan parquet default.store_sales (19) - - -(19) Scan parquet default.store_sales -Output [3]: [ss_quantity#54, ss_net_paid#55, ss_sold_date_sk#56] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] -ReadSchema: struct - -(20) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#54, ss_net_paid#55, ss_sold_date_sk#56] - -(21) Filter [codegen id : 1] -Input [3]: [ss_quantity#54, ss_net_paid#55, ss_sold_date_sk#56] -Condition : ((isnotnull(ss_quantity#54) AND (ss_quantity#54 >= 1)) AND (ss_quantity#54 <= 20)) - -(22) Project [codegen id : 1] -Output [1]: [ss_net_paid#55] -Input [3]: [ss_quantity#54, ss_net_paid#55, ss_sold_date_sk#56] - -(23) HashAggregate [codegen id : 1] -Input [1]: [ss_net_paid#55] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#55))] -Aggregate Attributes [2]: [sum#57, count#58] -Results [2]: [sum#59, count#60] - -(24) Exchange -Input [2]: [sum#59, count#60] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#61] - -(25) HashAggregate [codegen id : 2] -Input [2]: [sum#59, count#60] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_net_paid#55))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#55))#62] -Results [1]: [cast((avg(UnscaledValue(ss_net_paid#55))#62 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#63] - -Subquery:4 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#9, [id=#10] -* HashAggregate (32) -+- Exchange (31) - +- * HashAggregate (30) - +- * Project (29) - +- * Filter (28) - +- * ColumnarToRow (27) - +- Scan parquet default.store_sales (26) - - -(26) Scan parquet default.store_sales -Output [2]: [ss_quantity#64, ss_sold_date_sk#65] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] -ReadSchema: struct +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#18)), avg(UnscaledValue(ss_net_paid#19))] +Aggregate Attributes [3]: [count(1)#32, avg(UnscaledValue(ss_ext_discount_amt#18))#33, avg(UnscaledValue(ss_net_paid#19))#34] +Results [3]: [count(1)#32 AS count(1)#35, cast((avg(UnscaledValue(ss_ext_discount_amt#18))#33 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#36, cast((avg(UnscaledValue(ss_net_paid#19))#34 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#37] -(27) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#64, ss_sold_date_sk#65] +(12) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#35, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#36, avg(ss_net_paid), avg(ss_net_paid)#37) AS mergedValue#38] +Input [3]: [count(1)#35, avg(ss_ext_discount_amt)#36, avg(ss_net_paid)#37] -(28) Filter [codegen id : 1] -Input [2]: [ss_quantity#64, ss_sold_date_sk#65] -Condition : ((isnotnull(ss_quantity#64) AND (ss_quantity#64 >= 21)) AND (ss_quantity#64 <= 40)) +Subquery:2 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#2, [id=#3] -(29) Project [codegen id : 1] -Output: [] -Input [2]: [ss_quantity#64, ss_sold_date_sk#65] +Subquery:3 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#2, [id=#3] -(30) HashAggregate [codegen id : 1] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#66] -Results [1]: [count#67] +Subquery:4 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#5, [id=#6] +* Project (20) ++- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * Filter (15) + +- * ColumnarToRow (14) + +- Scan parquet default.store_sales (13) -(31) Exchange -Input [1]: [count#67] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#68] -(32) HashAggregate [codegen id : 2] -Input [1]: [count#67] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#69] -Results [1]: [count(1)#69 AS count(1)#70] - -Subquery:5 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#11, [id=#12] -* HashAggregate (39) -+- Exchange (38) - +- * HashAggregate (37) - +- * Project (36) - +- * Filter (35) - +- * ColumnarToRow (34) - +- Scan parquet default.store_sales (33) - - -(33) Scan parquet default.store_sales -Output [3]: [ss_quantity#71, ss_ext_discount_amt#72, ss_sold_date_sk#73] +(13) Scan parquet default.store_sales +Output [4]: [ss_quantity#39, ss_ext_discount_amt#40, ss_net_paid#41, ss_sold_date_sk#42] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] -ReadSchema: struct +ReadSchema: struct -(34) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#71, ss_ext_discount_amt#72, ss_sold_date_sk#73] +(14) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#39, ss_ext_discount_amt#40, ss_net_paid#41, ss_sold_date_sk#42] -(35) Filter [codegen id : 1] -Input [3]: [ss_quantity#71, ss_ext_discount_amt#72, ss_sold_date_sk#73] -Condition : ((isnotnull(ss_quantity#71) AND (ss_quantity#71 >= 21)) AND (ss_quantity#71 <= 40)) +(15) Filter [codegen id : 1] +Input [4]: [ss_quantity#39, ss_ext_discount_amt#40, ss_net_paid#41, ss_sold_date_sk#42] +Condition : ((isnotnull(ss_quantity#39) AND (ss_quantity#39 >= 21)) AND (ss_quantity#39 <= 40)) -(36) Project [codegen id : 1] -Output [1]: [ss_ext_discount_amt#72] -Input [3]: [ss_quantity#71, ss_ext_discount_amt#72, ss_sold_date_sk#73] +(16) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#40, ss_net_paid#41] +Input [4]: [ss_quantity#39, ss_ext_discount_amt#40, ss_net_paid#41, ss_sold_date_sk#42] -(37) HashAggregate [codegen id : 1] -Input [1]: [ss_ext_discount_amt#72] +(17) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#40, ss_net_paid#41] Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#72))] -Aggregate Attributes [2]: [sum#74, count#75] -Results [2]: [sum#76, count#77] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#40)), partial_avg(UnscaledValue(ss_net_paid#41))] +Aggregate Attributes [5]: [count#43, sum#44, count#45, sum#46, count#47] +Results [5]: [count#48, sum#49, count#50, sum#51, count#52] -(38) Exchange -Input [2]: [sum#76, count#77] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#78] +(18) Exchange +Input [5]: [count#48, sum#49, count#50, sum#51, count#52] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#53] -(39) HashAggregate [codegen id : 2] -Input [2]: [sum#76, count#77] +(19) HashAggregate [codegen id : 2] +Input [5]: [count#48, sum#49, count#50, sum#51, count#52] Keys: [] -Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#72))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#72))#79] -Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#72))#79 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#80] - -Subquery:6 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#13, [id=#14] -* HashAggregate (46) -+- Exchange (45) - +- * HashAggregate (44) - +- * Project (43) - +- * Filter (42) - +- * ColumnarToRow (41) - +- Scan parquet default.store_sales (40) - - -(40) Scan parquet default.store_sales -Output [3]: [ss_quantity#81, ss_net_paid#82, ss_sold_date_sk#83] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] -ReadSchema: struct +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#40)), avg(UnscaledValue(ss_net_paid#41))] +Aggregate Attributes [3]: [count(1)#54, avg(UnscaledValue(ss_ext_discount_amt#40))#55, avg(UnscaledValue(ss_net_paid#41))#56] +Results [3]: [count(1)#54 AS count(1)#57, cast((avg(UnscaledValue(ss_ext_discount_amt#40))#55 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#58, cast((avg(UnscaledValue(ss_net_paid#41))#56 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#59] -(41) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#81, ss_net_paid#82, ss_sold_date_sk#83] +(20) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#57, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#58, avg(ss_net_paid), avg(ss_net_paid)#59) AS mergedValue#60] +Input [3]: [count(1)#57, avg(ss_ext_discount_amt)#58, avg(ss_net_paid)#59] -(42) Filter [codegen id : 1] -Input [3]: [ss_quantity#81, ss_net_paid#82, ss_sold_date_sk#83] -Condition : ((isnotnull(ss_quantity#81) AND (ss_quantity#81 >= 21)) AND (ss_quantity#81 <= 40)) +Subquery:5 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#5, [id=#6] -(43) Project [codegen id : 1] -Output [1]: [ss_net_paid#82] -Input [3]: [ss_quantity#81, ss_net_paid#82, ss_sold_date_sk#83] +Subquery:6 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#5, [id=#6] -(44) HashAggregate [codegen id : 1] -Input [1]: [ss_net_paid#82] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#82))] -Aggregate Attributes [2]: [sum#84, count#85] -Results [2]: [sum#86, count#87] +Subquery:7 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#8, [id=#9] +* Project (28) ++- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * Project (24) + +- * Filter (23) + +- * ColumnarToRow (22) + +- Scan parquet default.store_sales (21) -(45) Exchange -Input [2]: [sum#86, count#87] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#88] -(46) HashAggregate [codegen id : 2] -Input [2]: [sum#86, count#87] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_net_paid#82))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#82))#89] -Results [1]: [cast((avg(UnscaledValue(ss_net_paid#82))#89 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#90] - -Subquery:7 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#16, [id=#17] -* HashAggregate (53) -+- Exchange (52) - +- * HashAggregate (51) - +- * Project (50) - +- * Filter (49) - +- * ColumnarToRow (48) - +- Scan parquet default.store_sales (47) - - -(47) Scan parquet default.store_sales -Output [2]: [ss_quantity#91, ss_sold_date_sk#92] +(21) Scan parquet default.store_sales +Output [4]: [ss_quantity#61, ss_ext_discount_amt#62, ss_net_paid#63, ss_sold_date_sk#64] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] -ReadSchema: struct +ReadSchema: struct -(48) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#91, ss_sold_date_sk#92] +(22) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#61, ss_ext_discount_amt#62, ss_net_paid#63, ss_sold_date_sk#64] -(49) Filter [codegen id : 1] -Input [2]: [ss_quantity#91, ss_sold_date_sk#92] -Condition : ((isnotnull(ss_quantity#91) AND (ss_quantity#91 >= 41)) AND (ss_quantity#91 <= 60)) +(23) Filter [codegen id : 1] +Input [4]: [ss_quantity#61, ss_ext_discount_amt#62, ss_net_paid#63, ss_sold_date_sk#64] +Condition : ((isnotnull(ss_quantity#61) AND (ss_quantity#61 >= 41)) AND (ss_quantity#61 <= 60)) -(50) Project [codegen id : 1] -Output: [] -Input [2]: [ss_quantity#91, ss_sold_date_sk#92] +(24) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#62, ss_net_paid#63] +Input [4]: [ss_quantity#61, ss_ext_discount_amt#62, ss_net_paid#63, ss_sold_date_sk#64] -(51) HashAggregate [codegen id : 1] -Input: [] +(25) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#62, ss_net_paid#63] Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#93] -Results [1]: [count#94] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#62)), partial_avg(UnscaledValue(ss_net_paid#63))] +Aggregate Attributes [5]: [count#65, sum#66, count#67, sum#68, count#69] +Results [5]: [count#70, sum#71, count#72, sum#73, count#74] -(52) Exchange -Input [1]: [count#94] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#95] +(26) Exchange +Input [5]: [count#70, sum#71, count#72, sum#73, count#74] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#75] -(53) HashAggregate [codegen id : 2] -Input [1]: [count#94] +(27) HashAggregate [codegen id : 2] +Input [5]: [count#70, sum#71, count#72, sum#73, count#74] Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#96] -Results [1]: [count(1)#96 AS count(1)#97] - -Subquery:8 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#18, [id=#19] -* HashAggregate (60) -+- Exchange (59) - +- * HashAggregate (58) - +- * Project (57) - +- * Filter (56) - +- * ColumnarToRow (55) - +- Scan parquet default.store_sales (54) - - -(54) Scan parquet default.store_sales -Output [3]: [ss_quantity#98, ss_ext_discount_amt#99, ss_sold_date_sk#100] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] -ReadSchema: struct +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#62)), avg(UnscaledValue(ss_net_paid#63))] +Aggregate Attributes [3]: [count(1)#76, avg(UnscaledValue(ss_ext_discount_amt#62))#77, avg(UnscaledValue(ss_net_paid#63))#78] +Results [3]: [count(1)#76 AS count(1)#79, cast((avg(UnscaledValue(ss_ext_discount_amt#62))#77 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#80, cast((avg(UnscaledValue(ss_net_paid#63))#78 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#81] -(55) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#98, ss_ext_discount_amt#99, ss_sold_date_sk#100] +(28) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#79, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#80, avg(ss_net_paid), avg(ss_net_paid)#81) AS mergedValue#82] +Input [3]: [count(1)#79, avg(ss_ext_discount_amt)#80, avg(ss_net_paid)#81] -(56) Filter [codegen id : 1] -Input [3]: [ss_quantity#98, ss_ext_discount_amt#99, ss_sold_date_sk#100] -Condition : ((isnotnull(ss_quantity#98) AND (ss_quantity#98 >= 41)) AND (ss_quantity#98 <= 60)) +Subquery:8 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#8, [id=#9] -(57) Project [codegen id : 1] -Output [1]: [ss_ext_discount_amt#99] -Input [3]: [ss_quantity#98, ss_ext_discount_amt#99, ss_sold_date_sk#100] +Subquery:9 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#8, [id=#9] -(58) HashAggregate [codegen id : 1] -Input [1]: [ss_ext_discount_amt#99] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#99))] -Aggregate Attributes [2]: [sum#101, count#102] -Results [2]: [sum#103, count#104] +Subquery:10 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#11, [id=#12] +* Project (36) ++- * HashAggregate (35) + +- Exchange (34) + +- * HashAggregate (33) + +- * Project (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet default.store_sales (29) -(59) Exchange -Input [2]: [sum#103, count#104] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#105] -(60) HashAggregate [codegen id : 2] -Input [2]: [sum#103, count#104] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#99))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#99))#106] -Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#99))#106 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#107] - -Subquery:9 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#20, [id=#21] -* HashAggregate (67) -+- Exchange (66) - +- * HashAggregate (65) - +- * Project (64) - +- * Filter (63) - +- * ColumnarToRow (62) - +- Scan parquet default.store_sales (61) - - -(61) Scan parquet default.store_sales -Output [3]: [ss_quantity#108, ss_net_paid#109, ss_sold_date_sk#110] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] -ReadSchema: struct - -(62) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#108, ss_net_paid#109, ss_sold_date_sk#110] - -(63) Filter [codegen id : 1] -Input [3]: [ss_quantity#108, ss_net_paid#109, ss_sold_date_sk#110] -Condition : ((isnotnull(ss_quantity#108) AND (ss_quantity#108 >= 41)) AND (ss_quantity#108 <= 60)) - -(64) Project [codegen id : 1] -Output [1]: [ss_net_paid#109] -Input [3]: [ss_quantity#108, ss_net_paid#109, ss_sold_date_sk#110] - -(65) HashAggregate [codegen id : 1] -Input [1]: [ss_net_paid#109] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#109))] -Aggregate Attributes [2]: [sum#111, count#112] -Results [2]: [sum#113, count#114] - -(66) Exchange -Input [2]: [sum#113, count#114] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#115] - -(67) HashAggregate [codegen id : 2] -Input [2]: [sum#113, count#114] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_net_paid#109))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#109))#116] -Results [1]: [cast((avg(UnscaledValue(ss_net_paid#109))#116 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#117] - -Subquery:10 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#23, [id=#24] -* HashAggregate (74) -+- Exchange (73) - +- * HashAggregate (72) - +- * Project (71) - +- * Filter (70) - +- * ColumnarToRow (69) - +- Scan parquet default.store_sales (68) - - -(68) Scan parquet default.store_sales -Output [2]: [ss_quantity#118, ss_sold_date_sk#119] +(29) Scan parquet default.store_sales +Output [4]: [ss_quantity#83, ss_ext_discount_amt#84, ss_net_paid#85, ss_sold_date_sk#86] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] -ReadSchema: struct +ReadSchema: struct -(69) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#118, ss_sold_date_sk#119] +(30) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#83, ss_ext_discount_amt#84, ss_net_paid#85, ss_sold_date_sk#86] -(70) Filter [codegen id : 1] -Input [2]: [ss_quantity#118, ss_sold_date_sk#119] -Condition : ((isnotnull(ss_quantity#118) AND (ss_quantity#118 >= 61)) AND (ss_quantity#118 <= 80)) +(31) Filter [codegen id : 1] +Input [4]: [ss_quantity#83, ss_ext_discount_amt#84, ss_net_paid#85, ss_sold_date_sk#86] +Condition : ((isnotnull(ss_quantity#83) AND (ss_quantity#83 >= 61)) AND (ss_quantity#83 <= 80)) -(71) Project [codegen id : 1] -Output: [] -Input [2]: [ss_quantity#118, ss_sold_date_sk#119] +(32) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#84, ss_net_paid#85] +Input [4]: [ss_quantity#83, ss_ext_discount_amt#84, ss_net_paid#85, ss_sold_date_sk#86] -(72) HashAggregate [codegen id : 1] -Input: [] +(33) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#84, ss_net_paid#85] Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#120] -Results [1]: [count#121] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#84)), partial_avg(UnscaledValue(ss_net_paid#85))] +Aggregate Attributes [5]: [count#87, sum#88, count#89, sum#90, count#91] +Results [5]: [count#92, sum#93, count#94, sum#95, count#96] -(73) Exchange -Input [1]: [count#121] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#122] +(34) Exchange +Input [5]: [count#92, sum#93, count#94, sum#95, count#96] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#97] -(74) HashAggregate [codegen id : 2] -Input [1]: [count#121] +(35) HashAggregate [codegen id : 2] +Input [5]: [count#92, sum#93, count#94, sum#95, count#96] Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#123] -Results [1]: [count(1)#123 AS count(1)#124] - -Subquery:11 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#25, [id=#26] -* HashAggregate (81) -+- Exchange (80) - +- * HashAggregate (79) - +- * Project (78) - +- * Filter (77) - +- * ColumnarToRow (76) - +- Scan parquet default.store_sales (75) - - -(75) Scan parquet default.store_sales -Output [3]: [ss_quantity#125, ss_ext_discount_amt#126, ss_sold_date_sk#127] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] -ReadSchema: struct +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#84)), avg(UnscaledValue(ss_net_paid#85))] +Aggregate Attributes [3]: [count(1)#98, avg(UnscaledValue(ss_ext_discount_amt#84))#99, avg(UnscaledValue(ss_net_paid#85))#100] +Results [3]: [count(1)#98 AS count(1)#101, cast((avg(UnscaledValue(ss_ext_discount_amt#84))#99 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#102, cast((avg(UnscaledValue(ss_net_paid#85))#100 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#103] -(76) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#125, ss_ext_discount_amt#126, ss_sold_date_sk#127] +(36) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#101, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#102, avg(ss_net_paid), avg(ss_net_paid)#103) AS mergedValue#104] +Input [3]: [count(1)#101, avg(ss_ext_discount_amt)#102, avg(ss_net_paid)#103] -(77) Filter [codegen id : 1] -Input [3]: [ss_quantity#125, ss_ext_discount_amt#126, ss_sold_date_sk#127] -Condition : ((isnotnull(ss_quantity#125) AND (ss_quantity#125 >= 61)) AND (ss_quantity#125 <= 80)) +Subquery:11 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] -(78) Project [codegen id : 1] -Output [1]: [ss_ext_discount_amt#126] -Input [3]: [ss_quantity#125, ss_ext_discount_amt#126, ss_sold_date_sk#127] +Subquery:12 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] -(79) HashAggregate [codegen id : 1] -Input [1]: [ss_ext_discount_amt#126] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#126))] -Aggregate Attributes [2]: [sum#128, count#129] -Results [2]: [sum#130, count#131] +Subquery:13 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#14, [id=#15] +* Project (44) ++- * HashAggregate (43) + +- Exchange (42) + +- * HashAggregate (41) + +- * Project (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet default.store_sales (37) -(80) Exchange -Input [2]: [sum#130, count#131] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#132] -(81) HashAggregate [codegen id : 2] -Input [2]: [sum#130, count#131] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#126))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#126))#133] -Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#126))#133 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#134] - -Subquery:12 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#27, [id=#28] -* HashAggregate (88) -+- Exchange (87) - +- * HashAggregate (86) - +- * Project (85) - +- * Filter (84) - +- * ColumnarToRow (83) - +- Scan parquet default.store_sales (82) - - -(82) Scan parquet default.store_sales -Output [3]: [ss_quantity#135, ss_net_paid#136, ss_sold_date_sk#137] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] -ReadSchema: struct - -(83) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#135, ss_net_paid#136, ss_sold_date_sk#137] - -(84) Filter [codegen id : 1] -Input [3]: [ss_quantity#135, ss_net_paid#136, ss_sold_date_sk#137] -Condition : ((isnotnull(ss_quantity#135) AND (ss_quantity#135 >= 61)) AND (ss_quantity#135 <= 80)) - -(85) Project [codegen id : 1] -Output [1]: [ss_net_paid#136] -Input [3]: [ss_quantity#135, ss_net_paid#136, ss_sold_date_sk#137] - -(86) HashAggregate [codegen id : 1] -Input [1]: [ss_net_paid#136] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#136))] -Aggregate Attributes [2]: [sum#138, count#139] -Results [2]: [sum#140, count#141] - -(87) Exchange -Input [2]: [sum#140, count#141] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#142] - -(88) HashAggregate [codegen id : 2] -Input [2]: [sum#140, count#141] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_net_paid#136))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#136))#143] -Results [1]: [cast((avg(UnscaledValue(ss_net_paid#136))#143 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#144] - -Subquery:13 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#30, [id=#31] -* HashAggregate (95) -+- Exchange (94) - +- * HashAggregate (93) - +- * Project (92) - +- * Filter (91) - +- * ColumnarToRow (90) - +- Scan parquet default.store_sales (89) - - -(89) Scan parquet default.store_sales -Output [2]: [ss_quantity#145, ss_sold_date_sk#146] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] -ReadSchema: struct - -(90) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#145, ss_sold_date_sk#146] - -(91) Filter [codegen id : 1] -Input [2]: [ss_quantity#145, ss_sold_date_sk#146] -Condition : ((isnotnull(ss_quantity#145) AND (ss_quantity#145 >= 81)) AND (ss_quantity#145 <= 100)) - -(92) Project [codegen id : 1] -Output: [] -Input [2]: [ss_quantity#145, ss_sold_date_sk#146] - -(93) HashAggregate [codegen id : 1] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#147] -Results [1]: [count#148] - -(94) Exchange -Input [1]: [count#148] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#149] - -(95) HashAggregate [codegen id : 2] -Input [1]: [count#148] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#150] -Results [1]: [count(1)#150 AS count(1)#151] - -Subquery:14 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#32, [id=#33] -* HashAggregate (102) -+- Exchange (101) - +- * HashAggregate (100) - +- * Project (99) - +- * Filter (98) - +- * ColumnarToRow (97) - +- Scan parquet default.store_sales (96) - - -(96) Scan parquet default.store_sales -Output [3]: [ss_quantity#152, ss_ext_discount_amt#153, ss_sold_date_sk#154] +(37) Scan parquet default.store_sales +Output [4]: [ss_quantity#105, ss_ext_discount_amt#106, ss_net_paid#107, ss_sold_date_sk#108] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] -ReadSchema: struct +ReadSchema: struct -(97) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#152, ss_ext_discount_amt#153, ss_sold_date_sk#154] +(38) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#105, ss_ext_discount_amt#106, ss_net_paid#107, ss_sold_date_sk#108] -(98) Filter [codegen id : 1] -Input [3]: [ss_quantity#152, ss_ext_discount_amt#153, ss_sold_date_sk#154] -Condition : ((isnotnull(ss_quantity#152) AND (ss_quantity#152 >= 81)) AND (ss_quantity#152 <= 100)) +(39) Filter [codegen id : 1] +Input [4]: [ss_quantity#105, ss_ext_discount_amt#106, ss_net_paid#107, ss_sold_date_sk#108] +Condition : ((isnotnull(ss_quantity#105) AND (ss_quantity#105 >= 81)) AND (ss_quantity#105 <= 100)) -(99) Project [codegen id : 1] -Output [1]: [ss_ext_discount_amt#153] -Input [3]: [ss_quantity#152, ss_ext_discount_amt#153, ss_sold_date_sk#154] +(40) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#106, ss_net_paid#107] +Input [4]: [ss_quantity#105, ss_ext_discount_amt#106, ss_net_paid#107, ss_sold_date_sk#108] -(100) HashAggregate [codegen id : 1] -Input [1]: [ss_ext_discount_amt#153] +(41) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#106, ss_net_paid#107] Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#153))] -Aggregate Attributes [2]: [sum#155, count#156] -Results [2]: [sum#157, count#158] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#106)), partial_avg(UnscaledValue(ss_net_paid#107))] +Aggregate Attributes [5]: [count#109, sum#110, count#111, sum#112, count#113] +Results [5]: [count#114, sum#115, count#116, sum#117, count#118] -(101) Exchange -Input [2]: [sum#157, count#158] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#159] +(42) Exchange +Input [5]: [count#114, sum#115, count#116, sum#117, count#118] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#119] -(102) HashAggregate [codegen id : 2] -Input [2]: [sum#157, count#158] +(43) HashAggregate [codegen id : 2] +Input [5]: [count#114, sum#115, count#116, sum#117, count#118] Keys: [] -Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#153))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#153))#160] -Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#153))#160 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#161] - -Subquery:15 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#34, [id=#35] -* HashAggregate (109) -+- Exchange (108) - +- * HashAggregate (107) - +- * Project (106) - +- * Filter (105) - +- * ColumnarToRow (104) - +- Scan parquet default.store_sales (103) - - -(103) Scan parquet default.store_sales -Output [3]: [ss_quantity#162, ss_net_paid#163, ss_sold_date_sk#164] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] -ReadSchema: struct +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#106)), avg(UnscaledValue(ss_net_paid#107))] +Aggregate Attributes [3]: [count(1)#120, avg(UnscaledValue(ss_ext_discount_amt#106))#121, avg(UnscaledValue(ss_net_paid#107))#122] +Results [3]: [count(1)#120 AS count(1)#123, cast((avg(UnscaledValue(ss_ext_discount_amt#106))#121 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#124, cast((avg(UnscaledValue(ss_net_paid#107))#122 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#125] -(104) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#162, ss_net_paid#163, ss_sold_date_sk#164] +(44) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#123, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#124, avg(ss_net_paid), avg(ss_net_paid)#125) AS mergedValue#126] +Input [3]: [count(1)#123, avg(ss_ext_discount_amt)#124, avg(ss_net_paid)#125] -(105) Filter [codegen id : 1] -Input [3]: [ss_quantity#162, ss_net_paid#163, ss_sold_date_sk#164] -Condition : ((isnotnull(ss_quantity#162) AND (ss_quantity#162 >= 81)) AND (ss_quantity#162 <= 100)) +Subquery:14 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15] -(106) Project [codegen id : 1] -Output [1]: [ss_net_paid#163] -Input [3]: [ss_quantity#162, ss_net_paid#163, ss_sold_date_sk#164] - -(107) HashAggregate [codegen id : 1] -Input [1]: [ss_net_paid#163] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#163))] -Aggregate Attributes [2]: [sum#165, count#166] -Results [2]: [sum#167, count#168] - -(108) Exchange -Input [2]: [sum#167, count#168] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#169] - -(109) HashAggregate [codegen id : 2] -Input [2]: [sum#167, count#168] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_net_paid#163))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#163))#170] -Results [1]: [cast((avg(UnscaledValue(ss_net_paid#163))#170 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#171] +Subquery:15 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/simplified.txt index 1c42d8f2638c6..66ba481fd2045 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/simplified.txt @@ -2,184 +2,79 @@ WholeStageCodegen (1) Project Subquery #1 WholeStageCodegen (2) - HashAggregate [count] [count(1),count(1),count] - InputAdapter - Exchange #1 - WholeStageCodegen (1) - HashAggregate [count,count] - Project - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_sold_date_sk] + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #1 + ReusedSubquery [mergedValue] #1 Subquery #2 WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] - InputAdapter - Exchange #2 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] - Project [ss_ext_discount_amt] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_sold_date_sk] + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #2 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #2 + ReusedSubquery [mergedValue] #2 Subquery #3 WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] - InputAdapter - Exchange #3 - WholeStageCodegen (1) - HashAggregate [ss_net_paid] [sum,count,sum,count] - Project [ss_net_paid] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_net_paid,ss_sold_date_sk] + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #3 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #3 + ReusedSubquery [mergedValue] #3 Subquery #4 WholeStageCodegen (2) - HashAggregate [count] [count(1),count(1),count] - InputAdapter - Exchange #4 - WholeStageCodegen (1) - HashAggregate [count,count] - Project - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_sold_date_sk] + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #4 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #4 + ReusedSubquery [mergedValue] #4 Subquery #5 WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] - InputAdapter - Exchange #5 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] - Project [ss_ext_discount_amt] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_sold_date_sk] - Subquery #6 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] - InputAdapter - Exchange #6 - WholeStageCodegen (1) - HashAggregate [ss_net_paid] [sum,count,sum,count] - Project [ss_net_paid] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_net_paid,ss_sold_date_sk] - Subquery #7 - WholeStageCodegen (2) - HashAggregate [count] [count(1),count(1),count] - InputAdapter - Exchange #7 - WholeStageCodegen (1) - HashAggregate [count,count] - Project - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_sold_date_sk] - Subquery #8 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] - InputAdapter - Exchange #8 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] - Project [ss_ext_discount_amt] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_sold_date_sk] - Subquery #9 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] - InputAdapter - Exchange #9 - WholeStageCodegen (1) - HashAggregate [ss_net_paid] [sum,count,sum,count] - Project [ss_net_paid] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_net_paid,ss_sold_date_sk] - Subquery #10 - WholeStageCodegen (2) - HashAggregate [count] [count(1),count(1),count] - InputAdapter - Exchange #10 - WholeStageCodegen (1) - HashAggregate [count,count] - Project - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_sold_date_sk] - Subquery #11 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] - InputAdapter - Exchange #11 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] - Project [ss_ext_discount_amt] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_sold_date_sk] - Subquery #12 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] - InputAdapter - Exchange #12 - WholeStageCodegen (1) - HashAggregate [ss_net_paid] [sum,count,sum,count] - Project [ss_net_paid] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_net_paid,ss_sold_date_sk] - Subquery #13 - WholeStageCodegen (2) - HashAggregate [count] [count(1),count(1),count] - InputAdapter - Exchange #13 - WholeStageCodegen (1) - HashAggregate [count,count] - Project - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_sold_date_sk] - Subquery #14 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] - InputAdapter - Exchange #14 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] - Project [ss_ext_discount_amt] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_sold_date_sk] - Subquery #15 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] - InputAdapter - Exchange #15 - WholeStageCodegen (1) - HashAggregate [ss_net_paid] [sum,count,sum,count] - Project [ss_net_paid] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_net_paid,ss_sold_date_sk] + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #5 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #5 + ReusedSubquery [mergedValue] #5 Filter [r_reason_sk] ColumnarToRow InputAdapter diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt index 8736c9861a5ce..8bf63794f25e4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt @@ -20,699 +20,284 @@ Input [1]: [r_reason_sk#1] Condition : (isnotnull(r_reason_sk#1) AND (r_reason_sk#1 = 1)) (4) Project [codegen id : 1] -Output [5]: [CASE WHEN (Subquery scalar-subquery#2, [id=#3] > 62316685) THEN Subquery scalar-subquery#4, [id=#5] ELSE Subquery scalar-subquery#6, [id=#7] END AS bucket1#8, CASE WHEN (Subquery scalar-subquery#9, [id=#10] > 19045798) THEN Subquery scalar-subquery#11, [id=#12] ELSE Subquery scalar-subquery#13, [id=#14] END AS bucket2#15, CASE WHEN (Subquery scalar-subquery#16, [id=#17] > 365541424) THEN Subquery scalar-subquery#18, [id=#19] ELSE Subquery scalar-subquery#20, [id=#21] END AS bucket3#22, CASE WHEN (Subquery scalar-subquery#23, [id=#24] > 216357808) THEN Subquery scalar-subquery#25, [id=#26] ELSE Subquery scalar-subquery#27, [id=#28] END AS bucket4#29, CASE WHEN (Subquery scalar-subquery#30, [id=#31] > 184483884) THEN Subquery scalar-subquery#32, [id=#33] ELSE Subquery scalar-subquery#34, [id=#35] END AS bucket5#36] +Output [5]: [CASE WHEN (Subquery scalar-subquery#2, [id=#3].count(1) > 62316685) THEN ReusedSubquery Subquery scalar-subquery#2, [id=#3].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#2, [id=#3].avg(ss_net_paid) END AS bucket1#4, CASE WHEN (Subquery scalar-subquery#5, [id=#6].count(1) > 19045798) THEN ReusedSubquery Subquery scalar-subquery#5, [id=#6].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#5, [id=#6].avg(ss_net_paid) END AS bucket2#7, CASE WHEN (Subquery scalar-subquery#8, [id=#9].count(1) > 365541424) THEN ReusedSubquery Subquery scalar-subquery#8, [id=#9].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#8, [id=#9].avg(ss_net_paid) END AS bucket3#10, CASE WHEN (Subquery scalar-subquery#11, [id=#12].count(1) > 216357808) THEN ReusedSubquery Subquery scalar-subquery#11, [id=#12].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#11, [id=#12].avg(ss_net_paid) END AS bucket4#13, CASE WHEN (Subquery scalar-subquery#14, [id=#15].count(1) > 184483884) THEN ReusedSubquery Subquery scalar-subquery#14, [id=#15].avg(ss_ext_discount_amt) ELSE ReusedSubquery Subquery scalar-subquery#14, [id=#15].avg(ss_net_paid) END AS bucket5#16] Input [1]: [r_reason_sk#1] ===== Subqueries ===== Subquery:1 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#2, [id=#3] -* HashAggregate (11) -+- Exchange (10) - +- * HashAggregate (9) - +- * Project (8) - +- * Filter (7) - +- * ColumnarToRow (6) - +- Scan parquet default.store_sales (5) +* Project (12) ++- * HashAggregate (11) + +- Exchange (10) + +- * HashAggregate (9) + +- * Project (8) + +- * Filter (7) + +- * ColumnarToRow (6) + +- Scan parquet default.store_sales (5) (5) Scan parquet default.store_sales -Output [2]: [ss_quantity#37, ss_sold_date_sk#38] +Output [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] -ReadSchema: struct +ReadSchema: struct (6) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#37, ss_sold_date_sk#38] +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] (7) Filter [codegen id : 1] -Input [2]: [ss_quantity#37, ss_sold_date_sk#38] -Condition : ((isnotnull(ss_quantity#37) AND (ss_quantity#37 >= 1)) AND (ss_quantity#37 <= 20)) +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] +Condition : ((isnotnull(ss_quantity#17) AND (ss_quantity#17 >= 1)) AND (ss_quantity#17 <= 20)) (8) Project [codegen id : 1] -Output: [] -Input [2]: [ss_quantity#37, ss_sold_date_sk#38] +Output [2]: [ss_ext_discount_amt#18, ss_net_paid#19] +Input [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20] (9) HashAggregate [codegen id : 1] -Input: [] +Input [2]: [ss_ext_discount_amt#18, ss_net_paid#19] Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#39] -Results [1]: [count#40] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#18)), partial_avg(UnscaledValue(ss_net_paid#19))] +Aggregate Attributes [5]: [count#21, sum#22, count#23, sum#24, count#25] +Results [5]: [count#26, sum#27, count#28, sum#29, count#30] (10) Exchange -Input [1]: [count#40] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#41] +Input [5]: [count#26, sum#27, count#28, sum#29, count#30] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#31] (11) HashAggregate [codegen id : 2] -Input [1]: [count#40] +Input [5]: [count#26, sum#27, count#28, sum#29, count#30] Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#42] -Results [1]: [count(1)#42 AS count(1)#43] - -Subquery:2 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#4, [id=#5] -* HashAggregate (18) -+- Exchange (17) - +- * HashAggregate (16) - +- * Project (15) - +- * Filter (14) - +- * ColumnarToRow (13) - +- Scan parquet default.store_sales (12) - - -(12) Scan parquet default.store_sales -Output [3]: [ss_quantity#44, ss_ext_discount_amt#45, ss_sold_date_sk#46] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] -ReadSchema: struct - -(13) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#44, ss_ext_discount_amt#45, ss_sold_date_sk#46] - -(14) Filter [codegen id : 1] -Input [3]: [ss_quantity#44, ss_ext_discount_amt#45, ss_sold_date_sk#46] -Condition : ((isnotnull(ss_quantity#44) AND (ss_quantity#44 >= 1)) AND (ss_quantity#44 <= 20)) - -(15) Project [codegen id : 1] -Output [1]: [ss_ext_discount_amt#45] -Input [3]: [ss_quantity#44, ss_ext_discount_amt#45, ss_sold_date_sk#46] - -(16) HashAggregate [codegen id : 1] -Input [1]: [ss_ext_discount_amt#45] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#45))] -Aggregate Attributes [2]: [sum#47, count#48] -Results [2]: [sum#49, count#50] - -(17) Exchange -Input [2]: [sum#49, count#50] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#51] - -(18) HashAggregate [codegen id : 2] -Input [2]: [sum#49, count#50] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#45))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#45))#52] -Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#45))#52 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#53] - -Subquery:3 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#6, [id=#7] -* HashAggregate (25) -+- Exchange (24) - +- * HashAggregate (23) - +- * Project (22) - +- * Filter (21) - +- * ColumnarToRow (20) - +- Scan parquet default.store_sales (19) - - -(19) Scan parquet default.store_sales -Output [3]: [ss_quantity#54, ss_net_paid#55, ss_sold_date_sk#56] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,1), LessThanOrEqual(ss_quantity,20)] -ReadSchema: struct - -(20) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#54, ss_net_paid#55, ss_sold_date_sk#56] - -(21) Filter [codegen id : 1] -Input [3]: [ss_quantity#54, ss_net_paid#55, ss_sold_date_sk#56] -Condition : ((isnotnull(ss_quantity#54) AND (ss_quantity#54 >= 1)) AND (ss_quantity#54 <= 20)) - -(22) Project [codegen id : 1] -Output [1]: [ss_net_paid#55] -Input [3]: [ss_quantity#54, ss_net_paid#55, ss_sold_date_sk#56] - -(23) HashAggregate [codegen id : 1] -Input [1]: [ss_net_paid#55] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#55))] -Aggregate Attributes [2]: [sum#57, count#58] -Results [2]: [sum#59, count#60] - -(24) Exchange -Input [2]: [sum#59, count#60] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#61] - -(25) HashAggregate [codegen id : 2] -Input [2]: [sum#59, count#60] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_net_paid#55))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#55))#62] -Results [1]: [cast((avg(UnscaledValue(ss_net_paid#55))#62 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#63] - -Subquery:4 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#9, [id=#10] -* HashAggregate (32) -+- Exchange (31) - +- * HashAggregate (30) - +- * Project (29) - +- * Filter (28) - +- * ColumnarToRow (27) - +- Scan parquet default.store_sales (26) - - -(26) Scan parquet default.store_sales -Output [2]: [ss_quantity#64, ss_sold_date_sk#65] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] -ReadSchema: struct +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#18)), avg(UnscaledValue(ss_net_paid#19))] +Aggregate Attributes [3]: [count(1)#32, avg(UnscaledValue(ss_ext_discount_amt#18))#33, avg(UnscaledValue(ss_net_paid#19))#34] +Results [3]: [count(1)#32 AS count(1)#35, cast((avg(UnscaledValue(ss_ext_discount_amt#18))#33 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#36, cast((avg(UnscaledValue(ss_net_paid#19))#34 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#37] -(27) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#64, ss_sold_date_sk#65] +(12) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#35, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#36, avg(ss_net_paid), avg(ss_net_paid)#37) AS mergedValue#38] +Input [3]: [count(1)#35, avg(ss_ext_discount_amt)#36, avg(ss_net_paid)#37] -(28) Filter [codegen id : 1] -Input [2]: [ss_quantity#64, ss_sold_date_sk#65] -Condition : ((isnotnull(ss_quantity#64) AND (ss_quantity#64 >= 21)) AND (ss_quantity#64 <= 40)) +Subquery:2 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#2, [id=#3] -(29) Project [codegen id : 1] -Output: [] -Input [2]: [ss_quantity#64, ss_sold_date_sk#65] +Subquery:3 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#2, [id=#3] -(30) HashAggregate [codegen id : 1] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#66] -Results [1]: [count#67] +Subquery:4 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#5, [id=#6] +* Project (20) ++- * HashAggregate (19) + +- Exchange (18) + +- * HashAggregate (17) + +- * Project (16) + +- * Filter (15) + +- * ColumnarToRow (14) + +- Scan parquet default.store_sales (13) -(31) Exchange -Input [1]: [count#67] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#68] -(32) HashAggregate [codegen id : 2] -Input [1]: [count#67] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#69] -Results [1]: [count(1)#69 AS count(1)#70] - -Subquery:5 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#11, [id=#12] -* HashAggregate (39) -+- Exchange (38) - +- * HashAggregate (37) - +- * Project (36) - +- * Filter (35) - +- * ColumnarToRow (34) - +- Scan parquet default.store_sales (33) - - -(33) Scan parquet default.store_sales -Output [3]: [ss_quantity#71, ss_ext_discount_amt#72, ss_sold_date_sk#73] +(13) Scan parquet default.store_sales +Output [4]: [ss_quantity#39, ss_ext_discount_amt#40, ss_net_paid#41, ss_sold_date_sk#42] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] -ReadSchema: struct +ReadSchema: struct -(34) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#71, ss_ext_discount_amt#72, ss_sold_date_sk#73] +(14) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#39, ss_ext_discount_amt#40, ss_net_paid#41, ss_sold_date_sk#42] -(35) Filter [codegen id : 1] -Input [3]: [ss_quantity#71, ss_ext_discount_amt#72, ss_sold_date_sk#73] -Condition : ((isnotnull(ss_quantity#71) AND (ss_quantity#71 >= 21)) AND (ss_quantity#71 <= 40)) +(15) Filter [codegen id : 1] +Input [4]: [ss_quantity#39, ss_ext_discount_amt#40, ss_net_paid#41, ss_sold_date_sk#42] +Condition : ((isnotnull(ss_quantity#39) AND (ss_quantity#39 >= 21)) AND (ss_quantity#39 <= 40)) -(36) Project [codegen id : 1] -Output [1]: [ss_ext_discount_amt#72] -Input [3]: [ss_quantity#71, ss_ext_discount_amt#72, ss_sold_date_sk#73] +(16) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#40, ss_net_paid#41] +Input [4]: [ss_quantity#39, ss_ext_discount_amt#40, ss_net_paid#41, ss_sold_date_sk#42] -(37) HashAggregate [codegen id : 1] -Input [1]: [ss_ext_discount_amt#72] +(17) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#40, ss_net_paid#41] Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#72))] -Aggregate Attributes [2]: [sum#74, count#75] -Results [2]: [sum#76, count#77] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#40)), partial_avg(UnscaledValue(ss_net_paid#41))] +Aggregate Attributes [5]: [count#43, sum#44, count#45, sum#46, count#47] +Results [5]: [count#48, sum#49, count#50, sum#51, count#52] -(38) Exchange -Input [2]: [sum#76, count#77] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#78] +(18) Exchange +Input [5]: [count#48, sum#49, count#50, sum#51, count#52] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#53] -(39) HashAggregate [codegen id : 2] -Input [2]: [sum#76, count#77] +(19) HashAggregate [codegen id : 2] +Input [5]: [count#48, sum#49, count#50, sum#51, count#52] Keys: [] -Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#72))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#72))#79] -Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#72))#79 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#80] - -Subquery:6 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#13, [id=#14] -* HashAggregate (46) -+- Exchange (45) - +- * HashAggregate (44) - +- * Project (43) - +- * Filter (42) - +- * ColumnarToRow (41) - +- Scan parquet default.store_sales (40) - - -(40) Scan parquet default.store_sales -Output [3]: [ss_quantity#81, ss_net_paid#82, ss_sold_date_sk#83] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] -ReadSchema: struct +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#40)), avg(UnscaledValue(ss_net_paid#41))] +Aggregate Attributes [3]: [count(1)#54, avg(UnscaledValue(ss_ext_discount_amt#40))#55, avg(UnscaledValue(ss_net_paid#41))#56] +Results [3]: [count(1)#54 AS count(1)#57, cast((avg(UnscaledValue(ss_ext_discount_amt#40))#55 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#58, cast((avg(UnscaledValue(ss_net_paid#41))#56 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#59] -(41) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#81, ss_net_paid#82, ss_sold_date_sk#83] +(20) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#57, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#58, avg(ss_net_paid), avg(ss_net_paid)#59) AS mergedValue#60] +Input [3]: [count(1)#57, avg(ss_ext_discount_amt)#58, avg(ss_net_paid)#59] -(42) Filter [codegen id : 1] -Input [3]: [ss_quantity#81, ss_net_paid#82, ss_sold_date_sk#83] -Condition : ((isnotnull(ss_quantity#81) AND (ss_quantity#81 >= 21)) AND (ss_quantity#81 <= 40)) +Subquery:5 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#5, [id=#6] -(43) Project [codegen id : 1] -Output [1]: [ss_net_paid#82] -Input [3]: [ss_quantity#81, ss_net_paid#82, ss_sold_date_sk#83] +Subquery:6 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#5, [id=#6] -(44) HashAggregate [codegen id : 1] -Input [1]: [ss_net_paid#82] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#82))] -Aggregate Attributes [2]: [sum#84, count#85] -Results [2]: [sum#86, count#87] +Subquery:7 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#8, [id=#9] +* Project (28) ++- * HashAggregate (27) + +- Exchange (26) + +- * HashAggregate (25) + +- * Project (24) + +- * Filter (23) + +- * ColumnarToRow (22) + +- Scan parquet default.store_sales (21) -(45) Exchange -Input [2]: [sum#86, count#87] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#88] -(46) HashAggregate [codegen id : 2] -Input [2]: [sum#86, count#87] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_net_paid#82))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#82))#89] -Results [1]: [cast((avg(UnscaledValue(ss_net_paid#82))#89 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#90] - -Subquery:7 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#16, [id=#17] -* HashAggregate (53) -+- Exchange (52) - +- * HashAggregate (51) - +- * Project (50) - +- * Filter (49) - +- * ColumnarToRow (48) - +- Scan parquet default.store_sales (47) - - -(47) Scan parquet default.store_sales -Output [2]: [ss_quantity#91, ss_sold_date_sk#92] +(21) Scan parquet default.store_sales +Output [4]: [ss_quantity#61, ss_ext_discount_amt#62, ss_net_paid#63, ss_sold_date_sk#64] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] -ReadSchema: struct +ReadSchema: struct -(48) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#91, ss_sold_date_sk#92] +(22) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#61, ss_ext_discount_amt#62, ss_net_paid#63, ss_sold_date_sk#64] -(49) Filter [codegen id : 1] -Input [2]: [ss_quantity#91, ss_sold_date_sk#92] -Condition : ((isnotnull(ss_quantity#91) AND (ss_quantity#91 >= 41)) AND (ss_quantity#91 <= 60)) +(23) Filter [codegen id : 1] +Input [4]: [ss_quantity#61, ss_ext_discount_amt#62, ss_net_paid#63, ss_sold_date_sk#64] +Condition : ((isnotnull(ss_quantity#61) AND (ss_quantity#61 >= 41)) AND (ss_quantity#61 <= 60)) -(50) Project [codegen id : 1] -Output: [] -Input [2]: [ss_quantity#91, ss_sold_date_sk#92] +(24) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#62, ss_net_paid#63] +Input [4]: [ss_quantity#61, ss_ext_discount_amt#62, ss_net_paid#63, ss_sold_date_sk#64] -(51) HashAggregate [codegen id : 1] -Input: [] +(25) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#62, ss_net_paid#63] Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#93] -Results [1]: [count#94] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#62)), partial_avg(UnscaledValue(ss_net_paid#63))] +Aggregate Attributes [5]: [count#65, sum#66, count#67, sum#68, count#69] +Results [5]: [count#70, sum#71, count#72, sum#73, count#74] -(52) Exchange -Input [1]: [count#94] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#95] +(26) Exchange +Input [5]: [count#70, sum#71, count#72, sum#73, count#74] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#75] -(53) HashAggregate [codegen id : 2] -Input [1]: [count#94] +(27) HashAggregate [codegen id : 2] +Input [5]: [count#70, sum#71, count#72, sum#73, count#74] Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#96] -Results [1]: [count(1)#96 AS count(1)#97] - -Subquery:8 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#18, [id=#19] -* HashAggregate (60) -+- Exchange (59) - +- * HashAggregate (58) - +- * Project (57) - +- * Filter (56) - +- * ColumnarToRow (55) - +- Scan parquet default.store_sales (54) - - -(54) Scan parquet default.store_sales -Output [3]: [ss_quantity#98, ss_ext_discount_amt#99, ss_sold_date_sk#100] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] -ReadSchema: struct +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#62)), avg(UnscaledValue(ss_net_paid#63))] +Aggregate Attributes [3]: [count(1)#76, avg(UnscaledValue(ss_ext_discount_amt#62))#77, avg(UnscaledValue(ss_net_paid#63))#78] +Results [3]: [count(1)#76 AS count(1)#79, cast((avg(UnscaledValue(ss_ext_discount_amt#62))#77 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#80, cast((avg(UnscaledValue(ss_net_paid#63))#78 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#81] -(55) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#98, ss_ext_discount_amt#99, ss_sold_date_sk#100] +(28) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#79, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#80, avg(ss_net_paid), avg(ss_net_paid)#81) AS mergedValue#82] +Input [3]: [count(1)#79, avg(ss_ext_discount_amt)#80, avg(ss_net_paid)#81] -(56) Filter [codegen id : 1] -Input [3]: [ss_quantity#98, ss_ext_discount_amt#99, ss_sold_date_sk#100] -Condition : ((isnotnull(ss_quantity#98) AND (ss_quantity#98 >= 41)) AND (ss_quantity#98 <= 60)) +Subquery:8 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#8, [id=#9] -(57) Project [codegen id : 1] -Output [1]: [ss_ext_discount_amt#99] -Input [3]: [ss_quantity#98, ss_ext_discount_amt#99, ss_sold_date_sk#100] +Subquery:9 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#8, [id=#9] -(58) HashAggregate [codegen id : 1] -Input [1]: [ss_ext_discount_amt#99] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#99))] -Aggregate Attributes [2]: [sum#101, count#102] -Results [2]: [sum#103, count#104] +Subquery:10 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#11, [id=#12] +* Project (36) ++- * HashAggregate (35) + +- Exchange (34) + +- * HashAggregate (33) + +- * Project (32) + +- * Filter (31) + +- * ColumnarToRow (30) + +- Scan parquet default.store_sales (29) -(59) Exchange -Input [2]: [sum#103, count#104] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#105] -(60) HashAggregate [codegen id : 2] -Input [2]: [sum#103, count#104] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#99))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#99))#106] -Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#99))#106 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#107] - -Subquery:9 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#20, [id=#21] -* HashAggregate (67) -+- Exchange (66) - +- * HashAggregate (65) - +- * Project (64) - +- * Filter (63) - +- * ColumnarToRow (62) - +- Scan parquet default.store_sales (61) - - -(61) Scan parquet default.store_sales -Output [3]: [ss_quantity#108, ss_net_paid#109, ss_sold_date_sk#110] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] -ReadSchema: struct - -(62) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#108, ss_net_paid#109, ss_sold_date_sk#110] - -(63) Filter [codegen id : 1] -Input [3]: [ss_quantity#108, ss_net_paid#109, ss_sold_date_sk#110] -Condition : ((isnotnull(ss_quantity#108) AND (ss_quantity#108 >= 41)) AND (ss_quantity#108 <= 60)) - -(64) Project [codegen id : 1] -Output [1]: [ss_net_paid#109] -Input [3]: [ss_quantity#108, ss_net_paid#109, ss_sold_date_sk#110] - -(65) HashAggregate [codegen id : 1] -Input [1]: [ss_net_paid#109] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#109))] -Aggregate Attributes [2]: [sum#111, count#112] -Results [2]: [sum#113, count#114] - -(66) Exchange -Input [2]: [sum#113, count#114] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#115] - -(67) HashAggregate [codegen id : 2] -Input [2]: [sum#113, count#114] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_net_paid#109))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#109))#116] -Results [1]: [cast((avg(UnscaledValue(ss_net_paid#109))#116 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#117] - -Subquery:10 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#23, [id=#24] -* HashAggregate (74) -+- Exchange (73) - +- * HashAggregate (72) - +- * Project (71) - +- * Filter (70) - +- * ColumnarToRow (69) - +- Scan parquet default.store_sales (68) - - -(68) Scan parquet default.store_sales -Output [2]: [ss_quantity#118, ss_sold_date_sk#119] +(29) Scan parquet default.store_sales +Output [4]: [ss_quantity#83, ss_ext_discount_amt#84, ss_net_paid#85, ss_sold_date_sk#86] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] -ReadSchema: struct +ReadSchema: struct -(69) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#118, ss_sold_date_sk#119] +(30) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#83, ss_ext_discount_amt#84, ss_net_paid#85, ss_sold_date_sk#86] -(70) Filter [codegen id : 1] -Input [2]: [ss_quantity#118, ss_sold_date_sk#119] -Condition : ((isnotnull(ss_quantity#118) AND (ss_quantity#118 >= 61)) AND (ss_quantity#118 <= 80)) +(31) Filter [codegen id : 1] +Input [4]: [ss_quantity#83, ss_ext_discount_amt#84, ss_net_paid#85, ss_sold_date_sk#86] +Condition : ((isnotnull(ss_quantity#83) AND (ss_quantity#83 >= 61)) AND (ss_quantity#83 <= 80)) -(71) Project [codegen id : 1] -Output: [] -Input [2]: [ss_quantity#118, ss_sold_date_sk#119] +(32) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#84, ss_net_paid#85] +Input [4]: [ss_quantity#83, ss_ext_discount_amt#84, ss_net_paid#85, ss_sold_date_sk#86] -(72) HashAggregate [codegen id : 1] -Input: [] +(33) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#84, ss_net_paid#85] Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#120] -Results [1]: [count#121] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#84)), partial_avg(UnscaledValue(ss_net_paid#85))] +Aggregate Attributes [5]: [count#87, sum#88, count#89, sum#90, count#91] +Results [5]: [count#92, sum#93, count#94, sum#95, count#96] -(73) Exchange -Input [1]: [count#121] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#122] +(34) Exchange +Input [5]: [count#92, sum#93, count#94, sum#95, count#96] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#97] -(74) HashAggregate [codegen id : 2] -Input [1]: [count#121] +(35) HashAggregate [codegen id : 2] +Input [5]: [count#92, sum#93, count#94, sum#95, count#96] Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#123] -Results [1]: [count(1)#123 AS count(1)#124] - -Subquery:11 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#25, [id=#26] -* HashAggregate (81) -+- Exchange (80) - +- * HashAggregate (79) - +- * Project (78) - +- * Filter (77) - +- * ColumnarToRow (76) - +- Scan parquet default.store_sales (75) - - -(75) Scan parquet default.store_sales -Output [3]: [ss_quantity#125, ss_ext_discount_amt#126, ss_sold_date_sk#127] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] -ReadSchema: struct +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#84)), avg(UnscaledValue(ss_net_paid#85))] +Aggregate Attributes [3]: [count(1)#98, avg(UnscaledValue(ss_ext_discount_amt#84))#99, avg(UnscaledValue(ss_net_paid#85))#100] +Results [3]: [count(1)#98 AS count(1)#101, cast((avg(UnscaledValue(ss_ext_discount_amt#84))#99 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#102, cast((avg(UnscaledValue(ss_net_paid#85))#100 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#103] -(76) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#125, ss_ext_discount_amt#126, ss_sold_date_sk#127] +(36) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#101, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#102, avg(ss_net_paid), avg(ss_net_paid)#103) AS mergedValue#104] +Input [3]: [count(1)#101, avg(ss_ext_discount_amt)#102, avg(ss_net_paid)#103] -(77) Filter [codegen id : 1] -Input [3]: [ss_quantity#125, ss_ext_discount_amt#126, ss_sold_date_sk#127] -Condition : ((isnotnull(ss_quantity#125) AND (ss_quantity#125 >= 61)) AND (ss_quantity#125 <= 80)) +Subquery:11 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] -(78) Project [codegen id : 1] -Output [1]: [ss_ext_discount_amt#126] -Input [3]: [ss_quantity#125, ss_ext_discount_amt#126, ss_sold_date_sk#127] +Subquery:12 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] -(79) HashAggregate [codegen id : 1] -Input [1]: [ss_ext_discount_amt#126] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#126))] -Aggregate Attributes [2]: [sum#128, count#129] -Results [2]: [sum#130, count#131] +Subquery:13 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#14, [id=#15] +* Project (44) ++- * HashAggregate (43) + +- Exchange (42) + +- * HashAggregate (41) + +- * Project (40) + +- * Filter (39) + +- * ColumnarToRow (38) + +- Scan parquet default.store_sales (37) -(80) Exchange -Input [2]: [sum#130, count#131] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#132] -(81) HashAggregate [codegen id : 2] -Input [2]: [sum#130, count#131] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#126))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#126))#133] -Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#126))#133 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#134] - -Subquery:12 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#27, [id=#28] -* HashAggregate (88) -+- Exchange (87) - +- * HashAggregate (86) - +- * Project (85) - +- * Filter (84) - +- * ColumnarToRow (83) - +- Scan parquet default.store_sales (82) - - -(82) Scan parquet default.store_sales -Output [3]: [ss_quantity#135, ss_net_paid#136, ss_sold_date_sk#137] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] -ReadSchema: struct - -(83) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#135, ss_net_paid#136, ss_sold_date_sk#137] - -(84) Filter [codegen id : 1] -Input [3]: [ss_quantity#135, ss_net_paid#136, ss_sold_date_sk#137] -Condition : ((isnotnull(ss_quantity#135) AND (ss_quantity#135 >= 61)) AND (ss_quantity#135 <= 80)) - -(85) Project [codegen id : 1] -Output [1]: [ss_net_paid#136] -Input [3]: [ss_quantity#135, ss_net_paid#136, ss_sold_date_sk#137] - -(86) HashAggregate [codegen id : 1] -Input [1]: [ss_net_paid#136] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#136))] -Aggregate Attributes [2]: [sum#138, count#139] -Results [2]: [sum#140, count#141] - -(87) Exchange -Input [2]: [sum#140, count#141] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#142] - -(88) HashAggregate [codegen id : 2] -Input [2]: [sum#140, count#141] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_net_paid#136))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#136))#143] -Results [1]: [cast((avg(UnscaledValue(ss_net_paid#136))#143 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#144] - -Subquery:13 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#30, [id=#31] -* HashAggregate (95) -+- Exchange (94) - +- * HashAggregate (93) - +- * Project (92) - +- * Filter (91) - +- * ColumnarToRow (90) - +- Scan parquet default.store_sales (89) - - -(89) Scan parquet default.store_sales -Output [2]: [ss_quantity#145, ss_sold_date_sk#146] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] -ReadSchema: struct - -(90) ColumnarToRow [codegen id : 1] -Input [2]: [ss_quantity#145, ss_sold_date_sk#146] - -(91) Filter [codegen id : 1] -Input [2]: [ss_quantity#145, ss_sold_date_sk#146] -Condition : ((isnotnull(ss_quantity#145) AND (ss_quantity#145 >= 81)) AND (ss_quantity#145 <= 100)) - -(92) Project [codegen id : 1] -Output: [] -Input [2]: [ss_quantity#145, ss_sold_date_sk#146] - -(93) HashAggregate [codegen id : 1] -Input: [] -Keys: [] -Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#147] -Results [1]: [count#148] - -(94) Exchange -Input [1]: [count#148] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#149] - -(95) HashAggregate [codegen id : 2] -Input [1]: [count#148] -Keys: [] -Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#150] -Results [1]: [count(1)#150 AS count(1)#151] - -Subquery:14 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#32, [id=#33] -* HashAggregate (102) -+- Exchange (101) - +- * HashAggregate (100) - +- * Project (99) - +- * Filter (98) - +- * ColumnarToRow (97) - +- Scan parquet default.store_sales (96) - - -(96) Scan parquet default.store_sales -Output [3]: [ss_quantity#152, ss_ext_discount_amt#153, ss_sold_date_sk#154] +(37) Scan parquet default.store_sales +Output [4]: [ss_quantity#105, ss_ext_discount_amt#106, ss_net_paid#107, ss_sold_date_sk#108] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] -ReadSchema: struct +ReadSchema: struct -(97) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#152, ss_ext_discount_amt#153, ss_sold_date_sk#154] +(38) ColumnarToRow [codegen id : 1] +Input [4]: [ss_quantity#105, ss_ext_discount_amt#106, ss_net_paid#107, ss_sold_date_sk#108] -(98) Filter [codegen id : 1] -Input [3]: [ss_quantity#152, ss_ext_discount_amt#153, ss_sold_date_sk#154] -Condition : ((isnotnull(ss_quantity#152) AND (ss_quantity#152 >= 81)) AND (ss_quantity#152 <= 100)) +(39) Filter [codegen id : 1] +Input [4]: [ss_quantity#105, ss_ext_discount_amt#106, ss_net_paid#107, ss_sold_date_sk#108] +Condition : ((isnotnull(ss_quantity#105) AND (ss_quantity#105 >= 81)) AND (ss_quantity#105 <= 100)) -(99) Project [codegen id : 1] -Output [1]: [ss_ext_discount_amt#153] -Input [3]: [ss_quantity#152, ss_ext_discount_amt#153, ss_sold_date_sk#154] +(40) Project [codegen id : 1] +Output [2]: [ss_ext_discount_amt#106, ss_net_paid#107] +Input [4]: [ss_quantity#105, ss_ext_discount_amt#106, ss_net_paid#107, ss_sold_date_sk#108] -(100) HashAggregate [codegen id : 1] -Input [1]: [ss_ext_discount_amt#153] +(41) HashAggregate [codegen id : 1] +Input [2]: [ss_ext_discount_amt#106, ss_net_paid#107] Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_ext_discount_amt#153))] -Aggregate Attributes [2]: [sum#155, count#156] -Results [2]: [sum#157, count#158] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#106)), partial_avg(UnscaledValue(ss_net_paid#107))] +Aggregate Attributes [5]: [count#109, sum#110, count#111, sum#112, count#113] +Results [5]: [count#114, sum#115, count#116, sum#117, count#118] -(101) Exchange -Input [2]: [sum#157, count#158] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#159] +(42) Exchange +Input [5]: [count#114, sum#115, count#116, sum#117, count#118] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#119] -(102) HashAggregate [codegen id : 2] -Input [2]: [sum#157, count#158] +(43) HashAggregate [codegen id : 2] +Input [5]: [count#114, sum#115, count#116, sum#117, count#118] Keys: [] -Functions [1]: [avg(UnscaledValue(ss_ext_discount_amt#153))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_ext_discount_amt#153))#160] -Results [1]: [cast((avg(UnscaledValue(ss_ext_discount_amt#153))#160 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#161] - -Subquery:15 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery#34, [id=#35] -* HashAggregate (109) -+- Exchange (108) - +- * HashAggregate (107) - +- * Project (106) - +- * Filter (105) - +- * ColumnarToRow (104) - +- Scan parquet default.store_sales (103) - - -(103) Scan parquet default.store_sales -Output [3]: [ss_quantity#162, ss_net_paid#163, ss_sold_date_sk#164] -Batched: true -Location [not included in comparison]/{warehouse_dir}/store_sales] -PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] -ReadSchema: struct +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#106)), avg(UnscaledValue(ss_net_paid#107))] +Aggregate Attributes [3]: [count(1)#120, avg(UnscaledValue(ss_ext_discount_amt#106))#121, avg(UnscaledValue(ss_net_paid#107))#122] +Results [3]: [count(1)#120 AS count(1)#123, cast((avg(UnscaledValue(ss_ext_discount_amt#106))#121 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#124, cast((avg(UnscaledValue(ss_net_paid#107))#122 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#125] -(104) ColumnarToRow [codegen id : 1] -Input [3]: [ss_quantity#162, ss_net_paid#163, ss_sold_date_sk#164] +(44) Project [codegen id : 2] +Output [1]: [named_struct(count(1), count(1)#123, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#124, avg(ss_net_paid), avg(ss_net_paid)#125) AS mergedValue#126] +Input [3]: [count(1)#123, avg(ss_ext_discount_amt)#124, avg(ss_net_paid)#125] -(105) Filter [codegen id : 1] -Input [3]: [ss_quantity#162, ss_net_paid#163, ss_sold_date_sk#164] -Condition : ((isnotnull(ss_quantity#162) AND (ss_quantity#162 >= 81)) AND (ss_quantity#162 <= 100)) +Subquery:14 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15] -(106) Project [codegen id : 1] -Output [1]: [ss_net_paid#163] -Input [3]: [ss_quantity#162, ss_net_paid#163, ss_sold_date_sk#164] - -(107) HashAggregate [codegen id : 1] -Input [1]: [ss_net_paid#163] -Keys: [] -Functions [1]: [partial_avg(UnscaledValue(ss_net_paid#163))] -Aggregate Attributes [2]: [sum#165, count#166] -Results [2]: [sum#167, count#168] - -(108) Exchange -Input [2]: [sum#167, count#168] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#169] - -(109) HashAggregate [codegen id : 2] -Input [2]: [sum#167, count#168] -Keys: [] -Functions [1]: [avg(UnscaledValue(ss_net_paid#163))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_paid#163))#170] -Results [1]: [cast((avg(UnscaledValue(ss_net_paid#163))#170 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#171] +Subquery:15 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt index 1c42d8f2638c6..66ba481fd2045 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt @@ -2,184 +2,79 @@ WholeStageCodegen (1) Project Subquery #1 WholeStageCodegen (2) - HashAggregate [count] [count(1),count(1),count] - InputAdapter - Exchange #1 - WholeStageCodegen (1) - HashAggregate [count,count] - Project - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_sold_date_sk] + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #1 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #1 + ReusedSubquery [mergedValue] #1 Subquery #2 WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] - InputAdapter - Exchange #2 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] - Project [ss_ext_discount_amt] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_sold_date_sk] + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #2 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #2 + ReusedSubquery [mergedValue] #2 Subquery #3 WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] - InputAdapter - Exchange #3 - WholeStageCodegen (1) - HashAggregate [ss_net_paid] [sum,count,sum,count] - Project [ss_net_paid] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_net_paid,ss_sold_date_sk] + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #3 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #3 + ReusedSubquery [mergedValue] #3 Subquery #4 WholeStageCodegen (2) - HashAggregate [count] [count(1),count(1),count] - InputAdapter - Exchange #4 - WholeStageCodegen (1) - HashAggregate [count,count] - Project - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_sold_date_sk] + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #4 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #4 + ReusedSubquery [mergedValue] #4 Subquery #5 WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] - InputAdapter - Exchange #5 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] - Project [ss_ext_discount_amt] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_sold_date_sk] - Subquery #6 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] - InputAdapter - Exchange #6 - WholeStageCodegen (1) - HashAggregate [ss_net_paid] [sum,count,sum,count] - Project [ss_net_paid] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_net_paid,ss_sold_date_sk] - Subquery #7 - WholeStageCodegen (2) - HashAggregate [count] [count(1),count(1),count] - InputAdapter - Exchange #7 - WholeStageCodegen (1) - HashAggregate [count,count] - Project - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_sold_date_sk] - Subquery #8 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] - InputAdapter - Exchange #8 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] - Project [ss_ext_discount_amt] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_sold_date_sk] - Subquery #9 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] - InputAdapter - Exchange #9 - WholeStageCodegen (1) - HashAggregate [ss_net_paid] [sum,count,sum,count] - Project [ss_net_paid] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_net_paid,ss_sold_date_sk] - Subquery #10 - WholeStageCodegen (2) - HashAggregate [count] [count(1),count(1),count] - InputAdapter - Exchange #10 - WholeStageCodegen (1) - HashAggregate [count,count] - Project - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_sold_date_sk] - Subquery #11 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] - InputAdapter - Exchange #11 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] - Project [ss_ext_discount_amt] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_sold_date_sk] - Subquery #12 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] - InputAdapter - Exchange #12 - WholeStageCodegen (1) - HashAggregate [ss_net_paid] [sum,count,sum,count] - Project [ss_net_paid] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_net_paid,ss_sold_date_sk] - Subquery #13 - WholeStageCodegen (2) - HashAggregate [count] [count(1),count(1),count] - InputAdapter - Exchange #13 - WholeStageCodegen (1) - HashAggregate [count,count] - Project - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_sold_date_sk] - Subquery #14 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_ext_discount_amt)),avg(ss_ext_discount_amt),sum,count] - InputAdapter - Exchange #14 - WholeStageCodegen (1) - HashAggregate [ss_ext_discount_amt] [sum,count,sum,count] - Project [ss_ext_discount_amt] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_sold_date_sk] - Subquery #15 - WholeStageCodegen (2) - HashAggregate [sum,count] [avg(UnscaledValue(ss_net_paid)),avg(ss_net_paid),sum,count] - InputAdapter - Exchange #15 - WholeStageCodegen (1) - HashAggregate [ss_net_paid] [sum,count,sum,count] - Project [ss_net_paid] - Filter [ss_quantity] - ColumnarToRow - InputAdapter - Scan parquet default.store_sales [ss_quantity,ss_net_paid,ss_sold_date_sk] + Project [count(1),avg(ss_ext_discount_amt),avg(ss_net_paid)] + HashAggregate [count,sum,count,sum,count] [count(1),avg(UnscaledValue(ss_ext_discount_amt)),avg(UnscaledValue(ss_net_paid)),count(1),avg(ss_ext_discount_amt),avg(ss_net_paid),count,sum,count,sum,count] + InputAdapter + Exchange #5 + WholeStageCodegen (1) + HashAggregate [ss_ext_discount_amt,ss_net_paid] [count,sum,count,sum,count,count,sum,count,sum,count] + Project [ss_ext_discount_amt,ss_net_paid] + Filter [ss_quantity] + ColumnarToRow + InputAdapter + Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk] + ReusedSubquery [mergedValue] #5 + ReusedSubquery [mergedValue] #5 Filter [r_reason_sk] ColumnarToRow InputAdapter diff --git a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala index 726fa341b5c71..43a860d76b470 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala @@ -264,28 +264,25 @@ class InjectRuntimeFilterSuite extends QueryTest with SQLTestUtils with SharedSp } } - def getNumBloomFilters(plan: LogicalPlan): Integer = { - val numBloomFilterAggs = plan.collect { - case Filter(condition, _) => condition.collect { - case subquery: org.apache.spark.sql.catalyst.expressions.ScalarSubquery - => subquery.plan.collect { - case Aggregate(_, aggregateExpressions, _) => - aggregateExpressions.map { - case Alias(AggregateExpression(bfAgg : BloomFilterAggregate, _, _, _, _), - _) => - assert(bfAgg.estimatedNumItemsExpression.isInstanceOf[Literal]) - assert(bfAgg.numBitsExpression.isInstanceOf[Literal]) - 1 - }.sum + // `MergeScalarSubqueries` can duplicate subqueries in the optimized plan, but the subqueries will + // be reused in the physical plan. + def getNumBloomFilters(plan: LogicalPlan, scalarSubqueryCTEMultiplicator: Int = 1): Integer = { + print(plan) + val numBloomFilterAggs = plan.collectWithSubqueries { + case Aggregate(_, aggregateExpressions, _) => + aggregateExpressions.collect { + case Alias(AggregateExpression(bfAgg: BloomFilterAggregate, _, _, _, _), _) => + assert(bfAgg.estimatedNumItemsExpression.isInstanceOf[Literal]) + assert(bfAgg.numBitsExpression.isInstanceOf[Literal]) + 1 }.sum - }.sum }.sum val numMightContains = plan.collect { case Filter(condition, _) => condition.collect { case BloomFilterMightContain(_, _) => 1 }.sum }.sum - assert(numBloomFilterAggs == numMightContains) + assert(numBloomFilterAggs == numMightContains * scalarSubqueryCTEMultiplicator) numMightContains } @@ -389,7 +386,7 @@ class InjectRuntimeFilterSuite extends QueryTest with SQLTestUtils with SharedSp planEnabled = sql(query).queryExecution.optimizedPlan checkAnswer(sql(query), expectedAnswer) } - assert(getNumBloomFilters(planEnabled) == getNumBloomFilters(planDisabled) + 2) + assert(getNumBloomFilters(planEnabled, 2) == getNumBloomFilters(planDisabled) + 2) } } @@ -417,10 +414,10 @@ class InjectRuntimeFilterSuite extends QueryTest with SQLTestUtils with SharedSp checkAnswer(sql(query), expectedAnswer) } if (numFilterThreshold < 3) { - assert(getNumBloomFilters(planEnabled) == getNumBloomFilters(planDisabled) - + numFilterThreshold) + assert(getNumBloomFilters(planEnabled, numFilterThreshold) == + getNumBloomFilters(planDisabled) + numFilterThreshold) } else { - assert(getNumBloomFilters(planEnabled) == getNumBloomFilters(planDisabled) + 2) + assert(getNumBloomFilters(planEnabled, 2) == getNumBloomFilters(planDisabled) + 2) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index 92c373a33fb24..221663c61e18d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -2018,4 +2018,171 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark }.getMessage.contains("Correlated column is not allowed in predicate")) } } + + test("Merge non-correlated scalar subqueries") { + Seq(false, true).foreach { enableAQE => + withSQLConf( + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> enableAQE.toString) { + val df = sql( + """ + |SELECT + | (SELECT avg(key) FROM testData), + | (SELECT sum(key) FROM testData), + | (SELECT count(distinct key) FROM testData) + """.stripMargin) + + checkAnswer(df, Row(50.5, 5050, 100) :: Nil) + + val plan = df.queryExecution.executedPlan + val subqueryIds = collectWithSubqueries(plan) { case s: SubqueryExec => s.id } + val reusedSubqueryIds = collectWithSubqueries(plan) { + case rs: ReusedSubqueryExec => rs.child.id + } + + assert(subqueryIds.size == 1, "Missing or unexpected SubqueryExec in the plan") + assert(reusedSubqueryIds.size == 2, + "Missing or unexpected reused ReusedSubqueryExec in the plan") + } + } + } + + test("Merge non-correlated scalar subqueries in a subquery") { + Seq(false, true).foreach { enableAQE => + withSQLConf( + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> enableAQE.toString) { + val df = sql( + """ + |SELECT ( + | SELECT + | SUM( + | (SELECT avg(key) FROM testData) + + | (SELECT sum(key) FROM testData) + + | (SELECT count(distinct key) FROM testData)) + | FROM testData + |) + """.stripMargin) + + checkAnswer(df, Row(520050.0) :: Nil) + + val plan = df.queryExecution.executedPlan + val subqueryIds = collectWithSubqueries(plan) { case s: SubqueryExec => s.id } + val reusedSubqueryIds = collectWithSubqueries(plan) { + case rs: ReusedSubqueryExec => rs.child.id + } + + if (enableAQE) { + assert(subqueryIds.size == 3, "Missing or unexpected SubqueryExec in the plan") + assert(reusedSubqueryIds.size == 4, + "Missing or unexpected reused ReusedSubqueryExec in the plan") + } else { + assert(subqueryIds.size == 2, "Missing or unexpected SubqueryExec in the plan") + assert(reusedSubqueryIds.size == 5, + "Missing or unexpected reused ReusedSubqueryExec in the plan") + } + } + } + } + + test("Merge non-correlated scalar subqueries from different levels") { + Seq(false, true).foreach { enableAQE => + withSQLConf( + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> enableAQE.toString) { + val df = sql( + """ + |SELECT + | (SELECT avg(key) FROM testData), + | ( + | SELECT + | SUM( + | (SELECT sum(key) FROM testData) + | ) + | FROM testData + | ) + """.stripMargin) + + checkAnswer(df, Row(50.5, 505000) :: Nil) + + val plan = df.queryExecution.executedPlan + val subqueryIds = collectWithSubqueries(plan) { case s: SubqueryExec => s.id } + val reusedSubqueryIds = collectWithSubqueries(plan) { + case rs: ReusedSubqueryExec => rs.child.id + } + + assert(subqueryIds.size == 2, "Missing or unexpected SubqueryExec in the plan") + assert(reusedSubqueryIds.size == 2, + "Missing or unexpected reused ReusedSubqueryExec in the plan") + } + } + } + + test("Merge non-correlated scalar subqueries from different parent plans") { + Seq(false, true).foreach { enableAQE => + withSQLConf( + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> enableAQE.toString) { + val df = sql( + """ + |SELECT + | ( + | SELECT + | SUM( + | (SELECT avg(key) FROM testData) + | ) + | FROM testData + | ), + | ( + | SELECT + | SUM( + | (SELECT sum(key) FROM testData) + | ) + | FROM testData + | ) + """.stripMargin) + + checkAnswer(df, Row(5050.0, 505000) :: Nil) + + val plan = df.queryExecution.executedPlan + val subqueryIds = collectWithSubqueries(plan) { case s: SubqueryExec => s.id } + val reusedSubqueryIds = collectWithSubqueries(plan) { + case rs: ReusedSubqueryExec => rs.child.id + } + + if (enableAQE) { + assert(subqueryIds.size == 3, "Missing or unexpected SubqueryExec in the plan") + assert(reusedSubqueryIds.size == 3, + "Missing or unexpected reused ReusedSubqueryExec in the plan") + } else { + assert(subqueryIds.size == 2, "Missing or unexpected SubqueryExec in the plan") + assert(reusedSubqueryIds.size == 4, + "Missing or unexpected reused ReusedSubqueryExec in the plan") + } + } + } + } + + test("Merge non-correlated scalar subqueries with conflicting names") { + Seq(false, true).foreach { enableAQE => + withSQLConf( + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> enableAQE.toString) { + val df = sql( + """ + |SELECT + | (SELECT avg(key) AS key FROM testData), + | (SELECT sum(key) AS key FROM testData), + | (SELECT count(distinct key) AS key FROM testData) + """.stripMargin) + + checkAnswer(df, Row(50.5, 5050, 100) :: Nil) + + val plan = df.queryExecution.executedPlan + val subqueryIds = collectWithSubqueries(plan) { case s: SubqueryExec => s.id } + val reusedSubqueryIds = collectWithSubqueries(plan) { + case rs: ReusedSubqueryExec => rs.child.id + } + + assert(subqueryIds.size == 1, "Missing or unexpected SubqueryExec in the plan") + assert(reusedSubqueryIds.size == 2, + "Missing or unexpected reused ReusedSubqueryExec in the plan") + } + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala index 66f01a0709110..743ec41dbe7cd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/LogicalPlanTagInSparkPlanSuite.scala @@ -21,7 +21,7 @@ import scala.reflect.ClassTag import org.apache.spark.sql.TPCDSQuerySuite import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete, Final} -import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Generate, Join, LocalRelation, LogicalPlan, Range, Sample, Union, Window} +import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Generate, Join, LocalRelation, LogicalPlan, Range, Sample, Union, Window, WithCTE} import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec} import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec} @@ -108,7 +108,11 @@ class LogicalPlanTagInSparkPlanSuite extends TPCDSQuerySuite with DisableAdaptiv // logical = Project(Filter(Scan A)) // physical = ProjectExec(ScanExec A) // we only check that leaf modes match between logical and physical plan. - val logicalLeaves = getLogicalPlan(actualPlan).collectLeaves() + val logicalPlan = getLogicalPlan(actualPlan) match { + case w: WithCTE => w.plan + case o => o + } + val logicalLeaves = logicalPlan.collectLeaves() val physicalLeaves = plan.collectLeaves() assert(logicalLeaves.length == 1) assert(physicalLeaves.length == 1) From 44e90f37ad653d1ad3f8475926c261b9dba78a32 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Wed, 20 Apr 2022 21:41:58 +0800 Subject: [PATCH 170/535] [SPARK-38967][SQL] Turn "spark.sql.ansi.strictIndexOperator" into an internal configuration ### What changes were proposed in this pull request? Currently, most the ANSI error message shows the hint "If necessary set spark.sql.ansi.enabled to false to bypass this error." There is only one special case: "Map key not exist" or "array index out of bound" from the `[]` operator. It shows the config spark.sql.ansi.strictIndexOperator instead. This one special case can confuse users. To make it simple: - Turn "spark.sql.ansi.strictIndexOperator" into an internal configuration - Show the configuration `spark.sql.ansi.enabled` in error messages instead - If it is "map key not exist" error, show the hint for using `try_element_at`. Otherwise, we don't show it. For array, `[]` operator is using 0-based index while `try_element_at` is using 1-based index. ### Why are the changes needed? Make the hints in ANSI runtime error message simple and consistent ### Does this PR introduce _any_ user-facing change? No, the new configuration is not released yet. ### How was this patch tested? Existing UT Closes #36282 from gengliangwang/updateErrorMsg. Authored-by: Gengliang Wang Signed-off-by: Wenchen Fan (cherry picked from commit 276bdbafe83a5c0b8425a20eb8101a630be8b752) Signed-off-by: Wenchen Fan --- core/src/main/resources/error/error-classes.json | 3 --- .../expressions/collectionOperations.scala | 2 -- .../expressions/complexTypeExtractors.scala | 7 ++----- .../spark/sql/errors/QueryExecutionErrors.scala | 16 ++++------------ .../org/apache/spark/sql/internal/SQLConf.scala | 1 + .../sql-tests/results/ansi/array.sql.out | 4 ++-- .../resources/sql-tests/results/ansi/map.sql.out | 2 +- 7 files changed, 10 insertions(+), 25 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 09ceca1414fcb..13ec8f022c5fd 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -121,9 +121,6 @@ "sqlState" : "42000" }, "MAP_KEY_DOES_NOT_EXIST" : { - "message" : [ "Key %s does not exist. If necessary set %s to false to bypass this error.%s" ] - }, - "MAP_KEY_DOES_NOT_EXIST_IN_ELEMENT_AT" : { "message" : [ "Key %s does not exist. To return NULL instead, use 'try_element_at'. If necessary set %s to false to bypass this error.%s" ] }, "MISSING_COLUMN" : { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index ca008391d1bde..1b42ea5eb8748 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -2116,8 +2116,6 @@ case class ElementAt( case MapType(_, valueType, _) => valueType } - override val isElementAtFunction: Boolean = true - override def inputTypes: Seq[AbstractDataType] = { (left.dataType, right.dataType) match { case (arr: ArrayType, e2: IntegralType) if (e2 != LongType) => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index 3cd404a9c0d5f..e889d37411ccf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -341,8 +341,6 @@ trait GetArrayItemUtil { */ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes { - protected val isElementAtFunction: Boolean = false - // todo: current search is O(n), improve it. def getValueEval( value: Any, @@ -367,7 +365,7 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes { if (!found) { if (failOnError) { - throw QueryExecutionErrors.mapKeyNotExistError(ordinal, isElementAtFunction, origin.context) + throw QueryExecutionErrors.mapKeyNotExistError(ordinal, origin.context) } else { null } @@ -403,8 +401,7 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes { lazy val errorContext = ctx.addReferenceObj("errCtx", origin.context) nullSafeCodeGen(ctx, ev, (eval1, eval2) => { val keyNotFoundBranch = if (failOnError) { - s"throw QueryExecutionErrors.mapKeyNotExistError(" + - s"$eval2, $isElementAtFunction, $errorContext);" + s"throw QueryExecutionErrors.mapKeyNotExistError($eval2, $errorContext);" } else { s"${ev.isNull} = true;" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 3cc8c3a6667f6..cbae61a66d5e1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -152,7 +152,7 @@ object QueryExecutionErrors extends QueryErrorsBase { } def invalidArrayIndexError(index: Int, numElements: Int): ArrayIndexOutOfBoundsException = { - invalidArrayIndexErrorInternal(index, numElements, SQLConf.ANSI_STRICT_INDEX_OPERATOR.key) + invalidArrayIndexErrorInternal(index, numElements, SQLConf.ANSI_ENABLED.key) } def invalidInputIndexError(index: Int, numElements: Int): ArrayIndexOutOfBoundsException = { @@ -176,17 +176,9 @@ object QueryExecutionErrors extends QueryErrorsBase { Array(toSQLValue(index), toSQLValue(numElements), SQLConf.ANSI_ENABLED.key)) } - def mapKeyNotExistError( - key: Any, - isElementAtFunction: Boolean, - context: String): NoSuchElementException = { - if (isElementAtFunction) { - new SparkNoSuchElementException(errorClass = "MAP_KEY_DOES_NOT_EXIST_IN_ELEMENT_AT", - messageParameters = Array(toSQLValue(key), SQLConf.ANSI_ENABLED.key, context)) - } else { - new SparkNoSuchElementException(errorClass = "MAP_KEY_DOES_NOT_EXIST", - messageParameters = Array(toSQLValue(key), SQLConf.ANSI_STRICT_INDEX_OPERATOR.key, context)) - } + def mapKeyNotExistError(key: Any, context: String): NoSuchElementException = { + new SparkNoSuchElementException(errorClass = "MAP_KEY_DOES_NOT_EXIST", + messageParameters = Array(toSQLValue(key), SQLConf.ANSI_ENABLED.key, context)) } def inputTypeUnsupportedError(dataType: DataType): Throwable = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index e8d99a2d44d72..f97b7f8f0048d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2855,6 +2855,7 @@ object SQLConf { .createWithDefault(false) val ANSI_STRICT_INDEX_OPERATOR = buildConf("spark.sql.ansi.strictIndexOperator") + .internal() .doc(s"When true and '${ANSI_ENABLED.key}' is true, accessing complex SQL types via [] " + "operator will throw an exception if array index is out of bound, or map key does not " + "exist. Otherwise, Spark will return a null result when accessing an invalid index.") diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out index 00ac2eeba7ffd..187630d78d2f0 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out @@ -254,7 +254,7 @@ select array(1, 2, 3)[5] struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -Invalid index: 5, numElements: 3. If necessary set spark.sql.ansi.strictIndexOperator to false to bypass this error. +Invalid index: 5, numElements: 3. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query @@ -263,7 +263,7 @@ select array(1, 2, 3)[-1] struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -Invalid index: -1, numElements: 3. If necessary set spark.sql.ansi.strictIndexOperator to false to bypass this error. +Invalid index: -1, numElements: 3. If necessary set spark.sql.ansi.enabled to false to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out index 5ba37278fbcb5..fa41865dd7bdc 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out @@ -20,7 +20,7 @@ select map(1, 'a', 2, 'b')[5] struct<> -- !query output org.apache.spark.SparkNoSuchElementException -Key 5 does not exist. If necessary set spark.sql.ansi.strictIndexOperator to false to bypass this error. +Key 5 does not exist. To return NULL instead, use 'try_element_at'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select map(1, 'a', 2, 'b')[5] ^^^^^^^^^^^^^^^^^^^^^^ From 2e102b8bd233441bb2dd74e1870de5b8218d5331 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 20 Apr 2022 22:52:04 +0300 Subject: [PATCH 171/535] [SPARK-38949][SQL][3.3] Wrap SQL statements by double quotes in error messages ### What changes were proposed in this pull request? In the PR, I propose to wrap any SQL statement in error messages by double quotes "", and apply new implementation of `QueryErrorsBase.toSQLStmt()` to all exceptions in `Query.*Errors` w/ error classes. Also this PR modifies all affected tests, see the list in the section "How was this patch tested?". ### Why are the changes needed? To improve user experience with Spark SQL by highlighting SQL statements in error massage and make them more visible to users. ### Does this PR introduce _any_ user-facing change? Yes. The changes might influence on error messages that are visible to users. Before: ```sql The operation DESC PARTITION is not allowed ``` After: ```sql The operation "DESC PARTITION" is not allowed ``` ### How was this patch tested? By running affected test suites: ``` $ build/sbt "sql/testOnly *QueryExecutionErrorsSuite" $ build/sbt "sql/testOnly *QueryParsingErrorsSuite" $ build/sbt "sql/testOnly *QueryCompilationErrorsSuite" $ build/sbt "test:testOnly *QueryCompilationErrorsDSv2Suite" $ build/sbt "test:testOnly *ExtractPythonUDFFromJoinConditionSuite" $ build/sbt "testOnly *PlanParserSuite" $ build/sbt "sql/testOnly *SQLQueryTestSuite -- -z transform.sql" $ build/sbt "sql/testOnly *SQLQueryTestSuite -- -z join-lateral.sql" $ build/sbt "sql/testOnly *SQLQueryTestSuite -- -z describe.sql" ``` Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 5aba2b38beae6e1baf6f0c6f9eb3b65cf607fe77) Signed-off-by: Max Gekk Closes #36286 from MaxGekk/error-class-apply-toSQLStmt-3.3. Authored-by: Max Gekk Signed-off-by: Max Gekk --- python/pyspark/sql/tests/test_udf.py | 13 +++--- .../sql/catalyst/parser/AstBuilder.scala | 2 +- .../sql/errors/QueryCompilationErrors.scala | 7 ++- .../spark/sql/errors/QueryErrorsBase.scala | 7 +++ .../sql/errors/QueryExecutionErrors.scala | 4 +- .../spark/sql/errors/QueryParsingErrors.scala | 44 ++++++++++++++----- ...tractPythonUDFFromJoinConditionSuite.scala | 4 +- .../sql/catalyst/parser/DDLParserSuite.scala | 7 ++- .../sql/catalyst/parser/PlanParserSuite.scala | 2 +- .../sql-tests/results/join-lateral.sql.out | 4 +- .../sql-tests/results/transform.sql.out | 4 +- .../QueryCompilationErrorsDSv2Suite.scala | 2 +- .../errors/QueryCompilationErrorsSuite.scala | 2 +- .../errors/QueryExecutionErrorsSuite.scala | 4 +- .../sql/errors/QueryParsingErrorsSuite.scala | 31 ++++++------- .../SparkScriptTransformationSuite.scala | 2 +- 16 files changed, 83 insertions(+), 56 deletions(-) diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py index 805d5a8dfec9a..e40c3ba0d6479 100644 --- a/python/pyspark/sql/tests/test_udf.py +++ b/python/pyspark/sql/tests/test_udf.py @@ -258,15 +258,16 @@ def test_udf_not_supported_in_join_condition(self): def runWithJoinType(join_type, type_string): with self.assertRaisesRegex( AnalysisException, - "Using PythonUDF in join condition of join type %s is not supported" % type_string, + """Using PythonUDF in join condition of join type "%s" is not supported""" + % type_string, ): left.join(right, [f("a", "b"), left.a1 == right.b1], join_type).collect() - runWithJoinType("full", "FullOuter") - runWithJoinType("left", "LeftOuter") - runWithJoinType("right", "RightOuter") - runWithJoinType("leftanti", "LeftAnti") - runWithJoinType("leftsemi", "LeftSemi") + runWithJoinType("full", "FULL OUTER") + runWithJoinType("left", "LEFT OUTER") + runWithJoinType("right", "RIGHT OUTER") + runWithJoinType("leftanti", "LEFT ANTI") + runWithJoinType("leftsemi", "LEFT SEMI") def test_udf_as_join_condition(self): left = self.spark.createDataFrame([Row(a=1, a1=1, a2=1), Row(a=2, a1=2, a2=2)]) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index e788368604fa1..60e691ba4acbb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1161,7 +1161,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit } if (join.LATERAL != null) { if (!Seq(Inner, Cross, LeftOuter).contains(joinType)) { - throw QueryParsingErrors.unsupportedLateralJoinTypeError(ctx, joinType.toString) + throw QueryParsingErrors.unsupportedLateralJoinTypeError(ctx, joinType.sql) } LateralJoin(left, LateralSubquery(plan(join.right)), joinType, condition) } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 65b59655be07c..3a8cd68966656 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -94,7 +94,9 @@ object QueryCompilationErrors extends QueryErrorsBase { def unsupportedIfNotExistsError(tableName: String): Throwable = { new AnalysisException( errorClass = "UNSUPPORTED_FEATURE", - messageParameters = Array(s"IF NOT EXISTS for the table '$tableName' by INSERT INTO.")) + messageParameters = Array( + s"${toSQLStmt("IF NOT EXISTS")} for the table '$tableName' " + + s"by ${toSQLStmt("INSERT INTO")}.")) } def nonPartitionColError(partitionName: String): Throwable = { @@ -1576,7 +1578,8 @@ object QueryCompilationErrors extends QueryErrorsBase { new AnalysisException( errorClass = "UNSUPPORTED_FEATURE", messageParameters = Array( - s"Using PythonUDF in join condition of join type $joinType is not supported")) + "Using PythonUDF in join condition of join type " + + s"${toSQLStmt(joinType.sql)} is not supported.")) } def conflictingAttributesInJoinConditionError( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala index 7002f19f9fc84..b115891f370ad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.errors +import java.util.Locale + import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.types.{DataType, DoubleType, FloatType} @@ -45,6 +47,11 @@ trait QueryErrorsBase { litToErrorValue(Literal.create(v, t)) } + // Quote sql statements in error messages. + def toSQLStmt(text: String): String = { + "\"" + text.toUpperCase(Locale.ROOT) + "\"" + } + def toSQLType(t: DataType): String = { t.sql } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index cbae61a66d5e1..86dcf4cfc3ce7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1927,13 +1927,13 @@ object QueryExecutionErrors extends QueryErrorsBase { def repeatedPivotsUnsupportedError(): Throwable = { new SparkUnsupportedOperationException( errorClass = "UNSUPPORTED_FEATURE", - messageParameters = Array("Repeated pivots.")) + messageParameters = Array(s"Repeated ${toSQLStmt("pivot")}s.")) } def pivotNotAfterGroupByUnsupportedError(): Throwable = { new SparkUnsupportedOperationException( errorClass = "UNSUPPORTED_FEATURE", - messageParameters = Array("Pivot not after a groupBy.")) + messageParameters = Array(s"${toSQLStmt("pivot")} not after a ${toSQLStmt("group by")}.")) } def invalidAesKeyLengthError(actualLength: Int): RuntimeException = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index ad0973ccbb44d..39c1944bbba9a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -91,13 +91,19 @@ object QueryParsingErrors extends QueryErrorsBase { } def transformNotSupportQuantifierError(ctx: ParserRuleContext): Throwable = { - new ParseException("UNSUPPORTED_FEATURE", - Array("TRANSFORM does not support DISTINCT/ALL in inputs"), ctx) + new ParseException( + errorClass = "UNSUPPORTED_FEATURE", + messageParameters = Array(s"${toSQLStmt("TRANSFORM")} does not support" + + s" ${toSQLStmt("DISTINCT")}/${toSQLStmt("ALL")} in inputs"), + ctx) } def transformWithSerdeUnsupportedError(ctx: ParserRuleContext): Throwable = { - new ParseException("UNSUPPORTED_FEATURE", - Array("TRANSFORM with serde is only supported in hive mode"), ctx) + new ParseException( + errorClass = "UNSUPPORTED_FEATURE", + messageParameters = Array( + s"${toSQLStmt("TRANSFORM")} with serde is only supported in hive mode"), + ctx) } def lateralWithPivotInFromClauseNotAllowedError(ctx: FromClauseContext): Throwable = { @@ -105,19 +111,31 @@ object QueryParsingErrors extends QueryErrorsBase { } def lateralJoinWithNaturalJoinUnsupportedError(ctx: ParserRuleContext): Throwable = { - new ParseException("UNSUPPORTED_FEATURE", Array("LATERAL join with NATURAL join."), ctx) + new ParseException( + errorClass = "UNSUPPORTED_FEATURE", + messageParameters = Array(s"${toSQLStmt("LATERAL")} join with ${toSQLStmt("NATURAL")} join."), + ctx) } def lateralJoinWithUsingJoinUnsupportedError(ctx: ParserRuleContext): Throwable = { - new ParseException("UNSUPPORTED_FEATURE", Array("LATERAL join with USING join."), ctx) + new ParseException( + errorClass = "UNSUPPORTED_FEATURE", + messageParameters = Array(s"${toSQLStmt("LATERAL")} join with ${toSQLStmt("USING")} join."), + ctx) } def unsupportedLateralJoinTypeError(ctx: ParserRuleContext, joinType: String): Throwable = { - new ParseException("UNSUPPORTED_FEATURE", Array(s"LATERAL join type '$joinType'."), ctx) + new ParseException( + errorClass = "UNSUPPORTED_FEATURE", + messageParameters = Array(s"${toSQLStmt("LATERAL")} join type ${toSQLStmt(joinType)}."), + ctx) } def invalidLateralJoinRelationError(ctx: RelationPrimaryContext): Throwable = { - new ParseException("INVALID_SQL_SYNTAX", Array("LATERAL can only be used with subquery."), ctx) + new ParseException( + errorClass = "INVALID_SQL_SYNTAX", + messageParameters = Array(s"${toSQLStmt("LATERAL")} can only be used with subquery."), + ctx) } def repetitiveWindowDefinitionError(name: String, ctx: WindowClauseContext): Throwable = { @@ -136,7 +154,7 @@ object QueryParsingErrors extends QueryErrorsBase { } def naturalCrossJoinUnsupportedError(ctx: RelationContext): Throwable = { - new ParseException("UNSUPPORTED_FEATURE", Array("NATURAL CROSS JOIN."), ctx) + new ParseException("UNSUPPORTED_FEATURE", Array(toSQLStmt("NATURAL CROSS JOIN") + "."), ctx) } def emptyInputForTableSampleError(ctx: ParserRuleContext): Throwable = { @@ -298,14 +316,18 @@ object QueryParsingErrors extends QueryErrorsBase { } def showFunctionsUnsupportedError(identifier: String, ctx: IdentifierContext): Throwable = { - new ParseException(s"SHOW $identifier FUNCTIONS not supported", ctx) + new ParseException( + errorClass = "INVALID_SQL_SYNTAX", + messageParameters = Array( + s"${toSQLStmt("SHOW")} $identifier ${toSQLStmt("FUNCTIONS")} not supported"), + ctx) } def showFunctionsInvalidPatternError(pattern: String, ctx: ParserRuleContext): Throwable = { new ParseException( errorClass = "INVALID_SQL_SYNTAX", messageParameters = Array( - s"Invalid pattern in SHOW FUNCTIONS: $pattern. " + + s"Invalid pattern in ${toSQLStmt("SHOW FUNCTIONS")}: $pattern. " + s"It must be a ${toSQLType(StringType)} literal."), ctx) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala index 65c8f5d300c62..1e58f5c94b073 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala @@ -187,9 +187,9 @@ class ExtractPythonUDFFromJoinConditionSuite extends PlanTest { condition = Some(unevaluableJoinCond)) Optimize.execute(query.analyze) } - assert(e.message.contentEquals( + assert(e.message == "The feature is not supported: " + - s"Using PythonUDF in join condition of join type $joinType is not supported")) + s"""Using PythonUDF in join condition of join type "${joinType.sql}" is not supported.""") val query2 = testRelationLeft.join( testRelationRight, diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 472506fa9070b..bc5380e27f536 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -2048,12 +2048,11 @@ class DDLParserSuite extends AnalysisTest { comparePlans( parsePlan("SHOW FUNCTIONS IN db LIKE 'funct*'"), ShowFunctions(UnresolvedNamespace(Seq("db")), true, true, Some("funct*"))) - val sql = "SHOW other FUNCTIONS" - intercept(sql, s"$sql not supported") + intercept("SHOW other FUNCTIONS", "\"SHOW\" other \"FUNCTIONS\" not supported") intercept("SHOW FUNCTIONS IN db f1", - "Invalid pattern in SHOW FUNCTIONS: f1") + "Invalid pattern in \"SHOW FUNCTIONS\": f1") intercept("SHOW FUNCTIONS IN db LIKE f1", - "Invalid pattern in SHOW FUNCTIONS: f1") + "Invalid pattern in \"SHOW FUNCTIONS\": f1") // The legacy syntax. comparePlans( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 688c0d1237320..fb9fdfb859826 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -1254,7 +1254,7 @@ class PlanParserSuite extends AnalysisTest { | "escapeChar" = "\\") |FROM testData """.stripMargin, - "TRANSFORM with serde is only supported in hive mode") + "\"TRANSFORM\" with serde is only supported in hive mode") } diff --git a/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out b/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out index cc1619813dd55..6e47579a9b011 100644 --- a/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out @@ -153,7 +153,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -The feature is not supported: LATERAL join with NATURAL join.(line 1, pos 14) +The feature is not supported: "LATERAL" join with "NATURAL" join.(line 1, pos 14) == SQL == SELECT * FROM t1 NATURAL JOIN LATERAL (SELECT c1 + c2 AS c2) @@ -167,7 +167,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -The feature is not supported: LATERAL join with USING join.(line 1, pos 14) +The feature is not supported: "LATERAL" join with "USING" join.(line 1, pos 14) == SQL == SELECT * FROM t1 JOIN LATERAL (SELECT c1 + c2 AS c2) USING (c2) diff --git a/sql/core/src/test/resources/sql-tests/results/transform.sql.out b/sql/core/src/test/resources/sql-tests/results/transform.sql.out index be57390761ba3..69fe58e1343d1 100644 --- a/sql/core/src/test/resources/sql-tests/results/transform.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/transform.sql.out @@ -719,7 +719,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -The feature is not supported: TRANSFORM does not support DISTINCT/ALL in inputs(line 1, pos 17) +The feature is not supported: "TRANSFORM" does not support "DISTINCT"/"ALL" in inputs(line 1, pos 17) == SQL == SELECT TRANSFORM(DISTINCT b, a, c) @@ -739,7 +739,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -The feature is not supported: TRANSFORM does not support DISTINCT/ALL in inputs(line 1, pos 17) +The feature is not supported: "TRANSFORM" does not support "DISTINCT"/"ALL" in inputs(line 1, pos 17) == SQL == SELECT TRANSFORM(ALL b, a, c) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala index bfea3f535dd94..be8e65249202b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala @@ -43,7 +43,7 @@ class QueryCompilationErrorsDSv2Suite checkAnswer(spark.table(tbl), spark.emptyDataFrame) assert(e.getMessage === "The feature is not supported: " + - s"IF NOT EXISTS for the table '$tbl' by INSERT INTO.") + s""""IF NOT EXISTS" for the table '$tbl' by "INSERT INTO".""") assert(e.getErrorClass === "UNSUPPORTED_FEATURE") assert(e.getSqlState === "0A000") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index cac1ef67fac40..6a7da405fcc03 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -149,7 +149,7 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession { assert(e.getSqlState === "0A000") assert(e.message === "The feature is not supported: " + - "Using PythonUDF in join condition of join type LeftOuter is not supported") + "Using PythonUDF in join condition of join type \"LEFT OUTER\" is not supported.") } test("UNSUPPORTED_FEATURE: Using pandas UDF aggregate expression with pivot") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala index f73d1e1c3c5b1..9ff57859acb90 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala @@ -156,7 +156,7 @@ class QueryExecutionErrorsSuite extends QueryTest } assert(e1.getErrorClass === "UNSUPPORTED_FEATURE") assert(e1.getSqlState === "0A000") - assert(e1.getMessage === "The feature is not supported: Repeated pivots.") + assert(e1.getMessage === """The feature is not supported: Repeated "PIVOT"s.""") val e2 = intercept[SparkUnsupportedOperationException] { trainingSales @@ -167,7 +167,7 @@ class QueryExecutionErrorsSuite extends QueryTest } assert(e2.getErrorClass === "UNSUPPORTED_FEATURE") assert(e2.getSqlState === "0A000") - assert(e2.getMessage === "The feature is not supported: Pivot not after a groupBy.") + assert(e2.getMessage === """The feature is not supported: "PIVOT" not after a "GROUP BY".""") } test("INCONSISTENT_BEHAVIOR_CROSS_VERSION: " + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala index 5610c4d000bfa..5a47ce5ae73e4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala @@ -21,6 +21,8 @@ import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.test.SharedSparkSession +// Turn of the length check because most of the tests check entire error messages +// scalastyle:off line.size.limit class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession { def validateParsingError( sqlText: String, @@ -42,7 +44,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession { sqlState = "0A000", message = """ - |The feature is not supported: LATERAL join with NATURAL join.(line 1, pos 14) + |The feature is not supported: "LATERAL" join with "NATURAL" join.(line 1, pos 14) | |== SQL == |SELECT * FROM t1 NATURAL JOIN LATERAL (SELECT c1 + c2 AS c2) @@ -57,7 +59,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession { sqlState = "0A000", message = """ - |The feature is not supported: LATERAL join with USING join.(line 1, pos 14) + |The feature is not supported: "LATERAL" join with "USING" join.(line 1, pos 14) | |== SQL == |SELECT * FROM t1 JOIN LATERAL (SELECT c1 + c2 AS c2) USING (c2) @@ -66,21 +68,17 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession { } test("UNSUPPORTED_FEATURE: Unsupported LATERAL join type") { - Seq( - ("RIGHT OUTER", "RightOuter"), - ("FULL OUTER", "FullOuter"), - ("LEFT SEMI", "LeftSemi"), - ("LEFT ANTI", "LeftAnti")).foreach { pair => + Seq("RIGHT OUTER", "FULL OUTER", "LEFT SEMI", "LEFT ANTI").foreach { joinType => validateParsingError( - sqlText = s"SELECT * FROM t1 ${pair._1} JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3", + sqlText = s"SELECT * FROM t1 $joinType JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3", errorClass = "UNSUPPORTED_FEATURE", sqlState = "0A000", message = s""" - |The feature is not supported: LATERAL join type '${pair._2}'.(line 1, pos 14) + |The feature is not supported: "LATERAL" join type "$joinType".(line 1, pos 14) | |== SQL == - |SELECT * FROM t1 ${pair._1} JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3 + |SELECT * FROM t1 $joinType JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3 |--------------^^^ |""".stripMargin) } @@ -101,7 +99,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession { sqlState = "42000", message = s""" - |Invalid SQL syntax: LATERAL can only be used with subquery.(line 1, pos $pos) + |Invalid SQL syntax: "LATERAL" can only be used with subquery.(line 1, pos $pos) | |== SQL == |$sqlText @@ -117,7 +115,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession { sqlState = "0A000", message = """ - |The feature is not supported: NATURAL CROSS JOIN.(line 1, pos 14) + |The feature is not supported: "NATURAL CROSS JOIN".(line 1, pos 14) | |== SQL == |SELECT * FROM a NATURAL CROSS JOIN b @@ -177,8 +175,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession { sqlState = "0A000", message = """ - |The feature is not supported: """.stripMargin + - """TRANSFORM does not support DISTINCT/ALL in inputs(line 1, pos 17) + |The feature is not supported: "TRANSFORM" does not support "DISTINCT"/"ALL" in inputs(line 1, pos 17) | |== SQL == |SELECT TRANSFORM(DISTINCT a) USING 'a' FROM t @@ -194,12 +191,10 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession { sqlState = "0A000", message = """ - |The feature is not supported: """.stripMargin + - """TRANSFORM with serde is only supported in hive mode(line 1, pos 0) + |The feature is not supported: "TRANSFORM" with serde is only supported in hive mode(line 1, pos 0) | |== SQL == - |SELECT TRANSFORM(a) ROW FORMAT SERDE """.stripMargin + - """'org.apache.hadoop.hive.serde2.OpenCSVSerde' USING 'a' FROM t + |SELECT TRANSFORM(a) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' USING 'a' FROM t |^^^ |""".stripMargin) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala index 5638743b7633d..1f431e173b3c7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala @@ -56,7 +56,7 @@ class SparkScriptTransformationSuite extends BaseScriptTransformationSuite with |FROM v """.stripMargin) }.getMessage - assert(e.contains("TRANSFORM with serde is only supported in hive mode")) + assert(e.contains("\"TRANSFORM\" with serde is only supported in hive mode")) } } } From 57078110d0a60eff9e4ecda4252c14549bb05ed0 Mon Sep 17 00:00:00 2001 From: sychen Date: Thu, 21 Apr 2022 11:34:39 +0900 Subject: [PATCH 172/535] [SPARK-38936][SQL] Script transform feed thread should have name ### What changes were proposed in this pull request? re-add thread name(`Thread-ScriptTransformation-Feed`). ### Why are the changes needed? Lost feed thread name after [SPARK-32105](https://issues.apache.org/jira/browse/SPARK-32105) refactoring. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? exist UT Closes #36245 from cxzl25/SPARK-38936. Authored-by: sychen Signed-off-by: Hyukjin Kwon (cherry picked from commit 4dc12eb54544a12ff7ddf078ca8bcec9471212c3) Signed-off-by: Hyukjin Kwon --- .../spark/sql/execution/BaseScriptTransformationExec.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala index 60400506f9fb9..bfc2bc7cd11d5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala @@ -273,6 +273,7 @@ abstract class BaseScriptTransformationWriterThread extends Thread with Logging def taskContext: TaskContext def conf: Configuration + setName(s"Thread-${this.getClass.getSimpleName}-Feed") setDaemon(true) @volatile protected var _exception: Throwable = null @@ -328,7 +329,7 @@ abstract class BaseScriptTransformationWriterThread extends Thread with Logging // Javadoc this call will not throw an exception: _exception = t proc.destroy() - logError("Thread-ScriptTransformation-Feed exit cause by: ", t) + logError(s"Thread-${this.getClass.getSimpleName}-Feed exit cause by: ", t) } finally { try { Utils.tryLogNonFatalError(outputStream.close()) From c2fa3b80e6807d4f66d23795d54c2ee59478358b Mon Sep 17 00:00:00 2001 From: huaxingao Date: Thu, 21 Apr 2022 16:16:47 +0800 Subject: [PATCH 173/535] [SPARK-38432][SQL][FOLLOWUP] Fix problems in And/Or/Not to V2 Filter ### What changes were proposed in this pull request? Instead of having ``` override def toV2: Predicate = new Predicate("AND", Seq(left, right).map(_.toV2).toArray) ``` I think we should construct a V2 `And` directly. ``` override def toV2: Predicate = new org.apache.spark.sql.connector.expressions.filter.And(left.toV2, right.toV2) ``` same for `Or` and `Not`. ### Why are the changes needed? bug fixing ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? New tests Closes #36290 from huaxingao/toV1. Authored-by: huaxingao Signed-off-by: Wenchen Fan (cherry picked from commit 36fc8bd185da99b64954ca0dd393b452fb788226) Signed-off-by: Wenchen Fan --- .../apache/spark/sql/sources/filters.scala | 8 +- .../datasources/v2/V2PredicateSuite.scala | 93 +++++++++++++++++++ 2 files changed, 97 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala index 9954821e7cebc..66ec4a6c7b951 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala @@ -21,7 +21,7 @@ import org.apache.spark.annotation.{Evolving, Stable} import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.parseColumnPath import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue} -import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse => V2AlwaysFalse, AlwaysTrue => V2AlwaysTrue, Predicate} +import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse => V2AlwaysFalse, AlwaysTrue => V2AlwaysTrue, And => V2And, Not => V2Not, Or => V2Or, Predicate} import org.apache.spark.sql.types.StringType import org.apache.spark.unsafe.types.UTF8String @@ -270,7 +270,7 @@ case class IsNotNull(attribute: String) extends Filter { @Stable case class And(left: Filter, right: Filter) extends Filter { override def references: Array[String] = left.references ++ right.references - override def toV2: Predicate = new Predicate("AND", Seq(left, right).map(_.toV2).toArray) + override def toV2: Predicate = new V2And(left.toV2, right.toV2) } /** @@ -281,7 +281,7 @@ case class And(left: Filter, right: Filter) extends Filter { @Stable case class Or(left: Filter, right: Filter) extends Filter { override def references: Array[String] = left.references ++ right.references - override def toV2: Predicate = new Predicate("OR", Seq(left, right).map(_.toV2).toArray) + override def toV2: Predicate = new V2Or(left.toV2, right.toV2) } /** @@ -292,7 +292,7 @@ case class Or(left: Filter, right: Filter) extends Filter { @Stable case class Not(child: Filter) extends Filter { override def references: Array[String] = child.references - override def toV2: Predicate = new Predicate("NOT", Array(child.toV2)) + override def toV2: Predicate = new V2Not(child.toV2) } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala index 2d6e6fcf16174..2df8b8e56c44b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala @@ -21,6 +21,7 @@ import org.apache.spark.SparkFunSuite import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, Literal, LiteralValue} import org.apache.spark.sql.connector.expressions.filter._ import org.apache.spark.sql.execution.datasources.v2.V2PredicateSuite.ref +import org.apache.spark.sql.sources.{AlwaysFalse => V1AlwaysFalse, AlwaysTrue => V1AlwaysTrue, And => V1And, EqualNullSafe, EqualTo, GreaterThan, GreaterThanOrEqual, In, IsNotNull, IsNull, LessThan, LessThanOrEqual, Not => V1Not, Or => V1Or, StringContains, StringEndsWith, StringStartsWith} import org.apache.spark.sql.types.{IntegerType, StringType} import org.apache.spark.unsafe.types.UTF8String @@ -31,16 +32,22 @@ class V2PredicateSuite extends SparkFunSuite { new Predicate("=", Array[Expression](ref("a", "B"), LiteralValue(1, IntegerType))) assert(predicate1.references.map(_.describe()).toSeq == Seq("a.B")) assert(predicate1.describe.equals("a.B = 1")) + val v1Filter1 = EqualTo(ref("a", "B").describe(), 1) + assert(v1Filter1.toV2 == predicate1) val predicate2 = new Predicate("=", Array[Expression](ref("a", "b.c"), LiteralValue(1, IntegerType))) assert(predicate2.references.map(_.describe()).toSeq == Seq("a.`b.c`")) assert(predicate2.describe.equals("a.`b.c` = 1")) + val v1Filter2 = EqualTo(ref("a", "b.c").describe(), 1) + assert(v1Filter2.toV2 == predicate2) val predicate3 = new Predicate("=", Array[Expression](ref("`a`.b", "c"), LiteralValue(1, IntegerType))) assert(predicate3.references.map(_.describe()).toSeq == Seq("```a``.b`.c")) assert(predicate3.describe.equals("```a``.b`.c = 1")) + val v1Filter3 = EqualTo(ref("`a`.b", "c").describe(), 1) + assert(v1Filter3.toV2 == predicate3) } test("AlwaysTrue") { @@ -49,6 +56,9 @@ class V2PredicateSuite extends SparkFunSuite { assert(predicate1.equals(predicate2)) assert(predicate1.references.map(_.describe()).length == 0) assert(predicate1.describe.equals("TRUE")) + + val v1Filter = V1AlwaysTrue + assert(v1Filter.toV2 == predicate1) } test("AlwaysFalse") { @@ -57,6 +67,9 @@ class V2PredicateSuite extends SparkFunSuite { assert(predicate1.equals(predicate2)) assert(predicate1.references.map(_.describe()).length == 0) assert(predicate1.describe.equals("FALSE")) + + val v1Filter = V1AlwaysFalse + assert(v1Filter.toV2 == predicate1) } test("EqualTo") { @@ -65,6 +78,9 @@ class V2PredicateSuite extends SparkFunSuite { assert(predicate1.equals(predicate2)) assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) assert(predicate1.describe.equals("a = 1")) + + val v1Filter = EqualTo("a", 1) + assert(v1Filter.toV2 == predicate1) } test("EqualNullSafe") { @@ -73,6 +89,53 @@ class V2PredicateSuite extends SparkFunSuite { assert(predicate1.equals(predicate2)) assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) assert(predicate1.describe.equals("(a = 1) OR (a IS NULL AND 1 IS NULL)")) + + val v1Filter = EqualNullSafe("a", 1) + assert(v1Filter.toV2 == predicate1) + } + + test("LessThan") { + val predicate1 = new Predicate("<", Array[Expression](ref("a"), LiteralValue(1, IntegerType))) + val predicate2 = new Predicate("<", Array[Expression](ref("a"), LiteralValue(1, IntegerType))) + assert(predicate1.equals(predicate2)) + assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) + assert(predicate1.describe.equals("a < 1")) + + val v1Filter = LessThan("a", 1) + assert(v1Filter.toV2 == predicate1) + } + + test("LessThanOrEqual") { + val predicate1 = new Predicate("<=", Array[Expression](ref("a"), LiteralValue(1, IntegerType))) + val predicate2 = new Predicate("<=", Array[Expression](ref("a"), LiteralValue(1, IntegerType))) + assert(predicate1.equals(predicate2)) + assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) + assert(predicate1.describe.equals("a <= 1")) + + val v1Filter = LessThanOrEqual("a", 1) + assert(v1Filter.toV2 == predicate1) + } + + test("GreatThan") { + val predicate1 = new Predicate(">", Array[Expression](ref("a"), LiteralValue(1, IntegerType))) + val predicate2 = new Predicate(">", Array[Expression](ref("a"), LiteralValue(1, IntegerType))) + assert(predicate1.equals(predicate2)) + assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) + assert(predicate1.describe.equals("a > 1")) + + val v1Filter = GreaterThan("a", 1) + assert(v1Filter.toV2 == predicate1) + } + + test("GreatThanOrEqual") { + val predicate1 = new Predicate(">=", Array[Expression](ref("a"), LiteralValue(1, IntegerType))) + val predicate2 = new Predicate(">=", Array[Expression](ref("a"), LiteralValue(1, IntegerType))) + assert(predicate1.equals(predicate2)) + assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) + assert(predicate1.describe.equals("a >= 1")) + + val v1Filter = GreaterThanOrEqual("a", 1) + assert(v1Filter.toV2 == predicate1) } test("In") { @@ -95,6 +158,12 @@ class V2PredicateSuite extends SparkFunSuite { expected = expected.dropRight(2) // remove the last ", " expected += ")" assert(predicate3.describe.equals(expected)) + + val v1Filter1 = In("a", Array(1, 2, 3, 4)) + assert(v1Filter1.toV2 == predicate1) + + val v1Filter2 = In("a", values.map(_.value())) + assert(v1Filter2.toV2 == predicate3) } test("IsNull") { @@ -103,6 +172,9 @@ class V2PredicateSuite extends SparkFunSuite { assert(predicate1.equals(predicate2)) assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) assert(predicate1.describe.equals("a IS NULL")) + + val v1Filter = IsNull("a") + assert(v1Filter.toV2 == predicate1) } test("IsNotNull") { @@ -111,6 +183,9 @@ class V2PredicateSuite extends SparkFunSuite { assert(predicate1.equals(predicate2)) assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) assert(predicate1.describe.equals("a IS NOT NULL")) + + val v1Filter = IsNotNull("a") + assert(v1Filter.toV2 == predicate1) } test("Not") { @@ -121,6 +196,9 @@ class V2PredicateSuite extends SparkFunSuite { assert(predicate1.equals(predicate2)) assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) assert(predicate1.describe.equals("NOT (a < 1)")) + + val v1Filter = V1Not(LessThan("a", 1)) + assert(v1Filter.toV2 == predicate1) } test("And") { @@ -133,6 +211,9 @@ class V2PredicateSuite extends SparkFunSuite { assert(predicate1.equals(predicate2)) assert(predicate1.references.map(_.describe()).toSeq == Seq("a", "b")) assert(predicate1.describe.equals("(a = 1) AND (b = 1)")) + + val v1Filter = V1And(EqualTo("a", 1), EqualTo("b", 1)) + assert(v1Filter.toV2 == predicate1) } test("Or") { @@ -145,6 +226,9 @@ class V2PredicateSuite extends SparkFunSuite { assert(predicate1.equals(predicate2)) assert(predicate1.references.map(_.describe()).toSeq == Seq("a", "b")) assert(predicate1.describe.equals("(a = 1) OR (b = 1)")) + + val v1Filter = V1Or(EqualTo("a", 1), EqualTo("b", 1)) + assert(v1Filter.toV2.equals(predicate1)) } test("StringStartsWith") { @@ -156,6 +240,9 @@ class V2PredicateSuite extends SparkFunSuite { assert(predicate1.equals(predicate2)) assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) assert(predicate1.describe.equals("a LIKE 'str%'")) + + val v1Filter = StringStartsWith("a", "str") + assert(v1Filter.toV2.equals(predicate1)) } test("StringEndsWith") { @@ -167,6 +254,9 @@ class V2PredicateSuite extends SparkFunSuite { assert(predicate1.equals(predicate2)) assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) assert(predicate1.describe.equals("a LIKE '%str'")) + + val v1Filter = StringEndsWith("a", "str") + assert(v1Filter.toV2.equals(predicate1)) } test("StringContains") { @@ -178,6 +268,9 @@ class V2PredicateSuite extends SparkFunSuite { assert(predicate1.equals(predicate2)) assert(predicate1.references.map(_.describe()).toSeq == Seq("a")) assert(predicate1.describe.equals("a LIKE '%str%'")) + + val v1Filter = StringContains("a", "str") + assert(v1Filter.toV2.equals(predicate1)) } } From 4ad0b2c25eba29694811445f72513629bf90e7e9 Mon Sep 17 00:00:00 2001 From: allisonwang-db Date: Thu, 21 Apr 2022 16:19:20 +0800 Subject: [PATCH 174/535] [SPARK-38957][SQL] Use multipartIdentifier for parsing table-valued functions This PR uses multipart identifiers when parsing table-valued functions. To make table-valued functions error messages consistent for 2-part names and n-part names. For example, before this PR: ``` select * from a.b.c org.apache.spark.sql.catalyst.parser.ParseException: Invalid SQL syntax: Unsupported function name `a`.`b`.`c`(line 1, pos 14) == SQL == select * from a.b.c(1) --------------^^^ ``` After this PR: ``` Invalid SQL syntax: table valued function cannot specify database name (line 1, pos 14) == SQL == SELECT * FROM a.b.c(1) --------------^^^ ``` No Unit test. Closes #36272 from allisonwang-db/spark-38957-parse-table-func. Authored-by: allisonwang-db Signed-off-by: Wenchen Fan (cherry picked from commit 8fe5bca1773521d967b82a920c6881f081155bc3) Signed-off-by: Wenchen Fan --- .../sql/catalyst/parser/AstBuilder.scala | 19 ++++--------------- .../spark/sql/errors/QueryParsingErrors.scala | 8 ++++++++ .../sql/catalyst/parser/PlanParserSuite.scala | 5 ++++- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 60e691ba4acbb..7b3374e9332c8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1296,13 +1296,13 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit } else { Seq.empty } - val name = getFunctionIdentifier(func.functionName) - if (name.database.nonEmpty) { - operationNotAllowed(s"table valued function cannot specify database name: $name", ctx) + val name = getFunctionMultiparts(func.functionName) + if (name.length > 1) { + throw QueryParsingErrors.invalidTableValuedFunctionNameError(name, ctx) } val tvf = UnresolvedTableValuedFunction( - name, func.expression.asScala.map(expression).toSeq, aliases) + name.asFunctionIdentifier, func.expression.asScala.map(expression).toSeq, aliases) tvf.optionalMap(func.tableAlias.strictIdentifier)(aliasPlan) } @@ -1952,17 +1952,6 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit } } - /** - * Get a function identifier consist by database (optional) and name. - */ - protected def getFunctionIdentifier(ctx: FunctionNameContext): FunctionIdentifier = { - if (ctx.qualifiedName != null) { - visitFunctionName(ctx.qualifiedName) - } else { - FunctionIdentifier(ctx.getText, None) - } - } - protected def getFunctionMultiparts(ctx: FunctionNameContext): Seq[String] = { if (ctx.qualifiedName != null) { ctx.qualifiedName().identifier().asScala.map(_.getText).toSeq diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index 39c1944bbba9a..38cff081eb5e4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -452,6 +452,14 @@ object QueryParsingErrors extends QueryErrorsBase { s"Specifying a database in CREATE TEMPORARY FUNCTION is not allowed: '$databaseName'", ctx) } + def invalidTableValuedFunctionNameError( + name: Seq[String], + ctx: TableValuedFunctionContext): Throwable = { + new ParseException( + "INVALID_SQL_SYNTAX", + Array("table valued function cannot specify database name ", toSQLId(name)), ctx) + } + def unclosedBracketedCommentError(command: String, position: Origin): Throwable = { new ParseException(Some(command), "Unclosed bracketed comment", position, position) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index fb9fdfb859826..f6e94dc604b87 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -692,7 +692,10 @@ class PlanParserSuite extends AnalysisTest { UnresolvedTableValuedFunction("range", Literal(2) :: Nil, Seq.empty).select(star())) // SPARK-34627 intercept("select * from default.range(2)", - "table valued function cannot specify database name: default.range") + "table valued function cannot specify database name") + // SPARK-38957 + intercept("select * from spark_catalog.default.range(2)", + "table valued function cannot specify database name") } test("SPARK-20311 range(N) as alias") { From 76fe1bf41268bc928b0637510728dde1aa8c5805 Mon Sep 17 00:00:00 2001 From: Maryann Xue Date: Thu, 21 Apr 2022 16:30:54 +0800 Subject: [PATCH 175/535] [SPARK-38916][CORE] Tasks not killed caused by race conditions between killTask() and launchTask() ### What changes were proposed in this pull request? This PR fixes the race conditions between the killTask() call and the launchTask() call that sometimes causes tasks not to be killed properly. If killTask() probes the map of pendingTasksLaunches before launchTask() has had a chance to put the corresponding task into that map, the kill flag will be lost and the subsequent launchTask() call will just proceed and run that task without knowing this task should be killed instead. The fix adds a kill mark during the killTask() call so that subsequent launchTask() can pick up the kill mark and call kill() on the TaskLauncher. If killTask() happens to happen after the task has finished and thus makes the kill mark useless, it will be cleaned up in a background thread. ### Why are the changes needed? Bug fix. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added UTs. Closes #36238 from maryannxue/spark-38916. Authored-by: Maryann Xue Signed-off-by: Wenchen Fan (cherry picked from commit bb5092b9af60afdceeccb239d14be660f77ae0ea) Signed-off-by: Wenchen Fan --- .../org/apache/spark/executor/Executor.scala | 51 ++++- .../CoarseGrainedExecutorBackendSuite.scala | 185 +++++++++++++++++- .../apache/spark/executor/ExecutorSuite.scala | 10 +- 3 files changed, 230 insertions(+), 16 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala index 79d7190a33e2d..d01de3b9ed086 100644 --- a/core/src/main/scala/org/apache/spark/executor/Executor.scala +++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala @@ -83,7 +83,7 @@ private[spark] class Executor( private val EMPTY_BYTE_BUFFER = ByteBuffer.wrap(new Array[Byte](0)) - private val conf = env.conf + private[executor] val conf = env.conf // No ip or host:port - just hostname Utils.checkHost(executorHostname) @@ -104,7 +104,7 @@ private[spark] class Executor( // Use UninterruptibleThread to run tasks so that we can allow running codes without being // interrupted by `Thread.interrupt()`. Some issues, such as KAFKA-1894, HADOOP-10622, // will hang forever if some methods are interrupted. - private val threadPool = { + private[executor] val threadPool = { val threadFactory = new ThreadFactoryBuilder() .setDaemon(true) .setNameFormat("Executor task launch worker-%d") @@ -174,7 +174,33 @@ private[spark] class Executor( private val maxResultSize = conf.get(MAX_RESULT_SIZE) // Maintains the list of running tasks. - private val runningTasks = new ConcurrentHashMap[Long, TaskRunner] + private[executor] val runningTasks = new ConcurrentHashMap[Long, TaskRunner] + + // Kill mark TTL in milliseconds - 10 seconds. + private val KILL_MARK_TTL_MS = 10000L + + // Kill marks with interruptThread flag, kill reason and timestamp. + // This is to avoid dropping the kill event when killTask() is called before launchTask(). + private[executor] val killMarks = new ConcurrentHashMap[Long, (Boolean, String, Long)] + + private val killMarkCleanupTask = new Runnable { + override def run(): Unit = { + val oldest = System.currentTimeMillis() - KILL_MARK_TTL_MS + val iter = killMarks.entrySet().iterator() + while (iter.hasNext) { + if (iter.next().getValue._3 < oldest) { + iter.remove() + } + } + } + } + + // Kill mark cleanup thread executor. + private val killMarkCleanupService = + ThreadUtils.newDaemonSingleThreadScheduledExecutor("executor-kill-mark-cleanup") + + killMarkCleanupService.scheduleAtFixedRate( + killMarkCleanupTask, KILL_MARK_TTL_MS, KILL_MARK_TTL_MS, TimeUnit.MILLISECONDS) /** * When an executor is unable to send heartbeats to the driver more than `HEARTBEAT_MAX_FAILURES` @@ -264,9 +290,18 @@ private[spark] class Executor( decommissioned = true } + private[executor] def createTaskRunner(context: ExecutorBackend, + taskDescription: TaskDescription) = new TaskRunner(context, taskDescription, plugins) + def launchTask(context: ExecutorBackend, taskDescription: TaskDescription): Unit = { - val tr = new TaskRunner(context, taskDescription, plugins) - runningTasks.put(taskDescription.taskId, tr) + val taskId = taskDescription.taskId + val tr = createTaskRunner(context, taskDescription) + runningTasks.put(taskId, tr) + val killMark = killMarks.get(taskId) + if (killMark != null) { + tr.kill(killMark._1, killMark._2) + killMarks.remove(taskId) + } threadPool.execute(tr) if (decommissioned) { log.error(s"Launching a task while in decommissioned state.") @@ -274,6 +309,7 @@ private[spark] class Executor( } def killTask(taskId: Long, interruptThread: Boolean, reason: String): Unit = { + killMarks.put(taskId, (interruptThread, reason, System.currentTimeMillis())) val taskRunner = runningTasks.get(taskId) if (taskRunner != null) { if (taskReaperEnabled) { @@ -296,6 +332,8 @@ private[spark] class Executor( } else { taskRunner.kill(interruptThread = interruptThread, reason = reason) } + // Safe to remove kill mark as we got a chance with the TaskRunner. + killMarks.remove(taskId) } } @@ -334,6 +372,9 @@ private[spark] class Executor( if (threadPool != null) { threadPool.shutdown() } + if (killMarkCleanupService != null) { + killMarkCleanupService.shutdown() + } if (replClassLoader != null && plugins != null) { // Notify plugins that executor is shutting down so they can terminate cleanly Utils.withContextClassLoader(replClassLoader) { diff --git a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala index 9bbfdc76e4f6a..a12b7034a6df4 100644 --- a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala +++ b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala @@ -20,14 +20,17 @@ package org.apache.spark.executor import java.io.File import java.nio.ByteBuffer import java.util.Properties +import java.util.concurrent.ConcurrentHashMap +import scala.collection.concurrent.TrieMap import scala.collection.mutable import scala.concurrent.duration._ import org.json4s.{DefaultFormats, Extraction} import org.json4s.JsonAST.{JArray, JObject} import org.json4s.JsonDSL._ -import org.mockito.Mockito.when +import org.mockito.ArgumentMatchers.any +import org.mockito.Mockito._ import org.scalatest.concurrent.Eventually.{eventually, timeout} import org.scalatestplus.mockito.MockitoSugar @@ -38,9 +41,9 @@ import org.apache.spark.resource.ResourceUtils._ import org.apache.spark.resource.TestResourceIDs._ import org.apache.spark.rpc.RpcEnv import org.apache.spark.scheduler.TaskDescription -import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.LaunchTask +import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.{KillTask, LaunchTask} import org.apache.spark.serializer.JavaSerializer -import org.apache.spark.util.{SerializableBuffer, Utils} +import org.apache.spark.util.{SerializableBuffer, ThreadUtils, Utils} class CoarseGrainedExecutorBackendSuite extends SparkFunSuite with LocalSparkContext with MockitoSugar { @@ -356,6 +359,182 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite assert(arg.bindAddress == "bindaddress1") } + /** + * This testcase is to verify that [[Executor.killTask()]] will always cancel a task that is + * being executed in [[Executor.TaskRunner]]. + */ + test(s"Tasks launched should always be cancelled.") { + val conf = new SparkConf + val securityMgr = new SecurityManager(conf) + val serializer = new JavaSerializer(conf) + val threadPool = ThreadUtils.newDaemonFixedThreadPool(32, "test-executor") + var backend: CoarseGrainedExecutorBackend = null + + try { + val rpcEnv = RpcEnv.create("1", "localhost", 0, conf, securityMgr) + val env = createMockEnv(conf, serializer, Some(rpcEnv)) + backend = new CoarseGrainedExecutorBackend(env.rpcEnv, rpcEnv.address.hostPort, "1", + "host1", "host1", 4, env, None, + resourceProfile = ResourceProfile.getOrCreateDefaultProfile(conf)) + + backend.rpcEnv.setupEndpoint("Executor 1", backend) + backend.executor = mock[Executor](CALLS_REAL_METHODS) + val executor = backend.executor + // Mock the executor. + when(executor.threadPool).thenReturn(threadPool) + val runningTasks = spy(new ConcurrentHashMap[Long, Executor#TaskRunner]) + when(executor.runningTasks).thenAnswer(_ => runningTasks) + when(executor.conf).thenReturn(conf) + + // We don't really verify the data, just pass it around. + val data = ByteBuffer.wrap(Array[Byte](1, 2, 3, 4)) + + val numTasks = 1000 + val tasksKilled = new TrieMap[Long, Boolean]() + val tasksExecuted = new TrieMap[Long, Boolean]() + + // Fake tasks with different taskIds. + val taskDescriptions = (1 to numTasks).map { + taskId => new TaskDescription(taskId, 2, "1", "TASK ${taskId}", 19, + 1, mutable.Map.empty, mutable.Map.empty, mutable.Map.empty, new Properties, 1, + Map(GPU -> new ResourceInformation(GPU, Array("0", "1"))), data) + } + assert(taskDescriptions.length == numTasks) + + def getFakeTaskRunner(taskDescription: TaskDescription): Executor#TaskRunner = { + new executor.TaskRunner(backend, taskDescription, None) { + override def run(): Unit = { + tasksExecuted.put(taskDescription.taskId, true) + logInfo(s"task ${taskDescription.taskId} runs.") + } + + override def kill(interruptThread: Boolean, reason: String): Unit = { + logInfo(s"task ${taskDescription.taskId} killed.") + tasksKilled.put(taskDescription.taskId, true) + } + } + } + + // Feed the fake task-runners to be executed by the executor. + val firstLaunchTask = getFakeTaskRunner(taskDescriptions(1)) + val otherTasks = taskDescriptions.slice(1, numTasks).map(getFakeTaskRunner(_)).toArray + assert (otherTasks.length == numTasks - 1) + // Workaround for compilation issue around Mockito.doReturn + doReturn(firstLaunchTask, otherTasks: _*).when(executor). + createTaskRunner(any(), any()) + + // Launch tasks and quickly kill them so that TaskRunner.killTask will be triggered. + taskDescriptions.foreach { taskDescription => + val buffer = new SerializableBuffer(TaskDescription.encode(taskDescription)) + backend.self.send(LaunchTask(buffer)) + Thread.sleep(1) + backend.self.send(KillTask(taskDescription.taskId, "exec1", false, "test")) + } + + eventually(timeout(10.seconds)) { + verify(runningTasks, times(numTasks)).put(any(), any()) + } + + assert(tasksExecuted.size == tasksKilled.size, + s"Tasks killed ${tasksKilled.size} != tasks executed ${tasksExecuted.size}") + assert(tasksExecuted.keySet == tasksKilled.keySet) + logInfo(s"Task executed ${tasksExecuted.size}, task killed ${tasksKilled.size}") + } finally { + if (backend != null) { + backend.rpcEnv.shutdown() + } + threadPool.shutdownNow() + } + } + + /** + * This testcase is to verify that [[Executor.killTask()]] will always cancel a task even if + * it has not been launched yet. + */ + test(s"Tasks not launched should always be cancelled.") { + val conf = new SparkConf + val securityMgr = new SecurityManager(conf) + val serializer = new JavaSerializer(conf) + val threadPool = ThreadUtils.newDaemonFixedThreadPool(32, "test-executor") + var backend: CoarseGrainedExecutorBackend = null + + try { + val rpcEnv = RpcEnv.create("1", "localhost", 0, conf, securityMgr) + val env = createMockEnv(conf, serializer, Some(rpcEnv)) + backend = new CoarseGrainedExecutorBackend(env.rpcEnv, rpcEnv.address.hostPort, "1", + "host1", "host1", 4, env, None, + resourceProfile = ResourceProfile.getOrCreateDefaultProfile(conf)) + + backend.rpcEnv.setupEndpoint("Executor 1", backend) + backend.executor = mock[Executor](CALLS_REAL_METHODS) + val executor = backend.executor + // Mock the executor. + when(executor.threadPool).thenReturn(threadPool) + val runningTasks = spy(new ConcurrentHashMap[Long, Executor#TaskRunner]) + when(executor.runningTasks).thenAnswer(_ => runningTasks) + when(executor.conf).thenReturn(conf) + + // We don't really verify the data, just pass it around. + val data = ByteBuffer.wrap(Array[Byte](1, 2, 3, 4)) + + val numTasks = 1000 + val tasksKilled = new TrieMap[Long, Boolean]() + val tasksExecuted = new TrieMap[Long, Boolean]() + + // Fake tasks with different taskIds. + val taskDescriptions = (1 to numTasks).map { + taskId => new TaskDescription(taskId, 2, "1", "TASK ${taskId}", 19, + 1, mutable.Map.empty, mutable.Map.empty, mutable.Map.empty, new Properties, 1, + Map(GPU -> new ResourceInformation(GPU, Array("0", "1"))), data) + } + assert(taskDescriptions.length == numTasks) + + def getFakeTaskRunner(taskDescription: TaskDescription): Executor#TaskRunner = { + new executor.TaskRunner(backend, taskDescription, None) { + override def run(): Unit = { + tasksExecuted.put(taskDescription.taskId, true) + logInfo(s"task ${taskDescription.taskId} runs.") + } + + override def kill(interruptThread: Boolean, reason: String): Unit = { + logInfo(s"task ${taskDescription.taskId} killed.") + tasksKilled.put(taskDescription.taskId, true) + } + } + } + + // Feed the fake task-runners to be executed by the executor. + val firstLaunchTask = getFakeTaskRunner(taskDescriptions(1)) + val otherTasks = taskDescriptions.slice(1, numTasks).map(getFakeTaskRunner(_)).toArray + assert (otherTasks.length == numTasks - 1) + // Workaround for compilation issue around Mockito.doReturn + doReturn(firstLaunchTask, otherTasks: _*).when(executor). + createTaskRunner(any(), any()) + + // The reverse order of events can happen when the scheduler tries to cancel a task right + // after launching it. + taskDescriptions.foreach { taskDescription => + val buffer = new SerializableBuffer(TaskDescription.encode(taskDescription)) + backend.self.send(KillTask(taskDescription.taskId, "exec1", false, "test")) + backend.self.send(LaunchTask(buffer)) + } + + eventually(timeout(10.seconds)) { + verify(runningTasks, times(numTasks)).put(any(), any()) + } + + assert(tasksExecuted.size == tasksKilled.size, + s"Tasks killed ${tasksKilled.size} != tasks executed ${tasksExecuted.size}") + assert(tasksExecuted.keySet == tasksKilled.keySet) + logInfo(s"Task executed ${tasksExecuted.size}, task killed ${tasksKilled.size}") + } finally { + if (backend != null) { + backend.rpcEnv.shutdown() + } + threadPool.shutdownNow() + } + } + private def createMockEnv(conf: SparkConf, serializer: JavaSerializer, rpcEnv: Option[RpcEnv] = None): SparkEnv = { val mockEnv = mock[SparkEnv] diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala index 943f4e115a596..8683e19b7a2d7 100644 --- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala +++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala @@ -22,7 +22,7 @@ import java.lang.Thread.UncaughtExceptionHandler import java.net.URL import java.nio.ByteBuffer import java.util.Properties -import java.util.concurrent.{ConcurrentHashMap, CountDownLatch, TimeUnit} +import java.util.concurrent.{CountDownLatch, TimeUnit} import java.util.concurrent.atomic.AtomicBoolean import scala.collection.immutable @@ -321,13 +321,7 @@ class ExecutorSuite extends SparkFunSuite nonZeroAccumulator.add(1) metrics.registerAccumulator(nonZeroAccumulator) - val executorClass = classOf[Executor] - val tasksMap = { - val field = - executorClass.getDeclaredField("org$apache$spark$executor$Executor$$runningTasks") - field.setAccessible(true) - field.get(executor).asInstanceOf[ConcurrentHashMap[Long, executor.TaskRunner]] - } + val tasksMap = executor.runningTasks val mockTaskRunner = mock[executor.TaskRunner] val mockTask = mock[Task[Any]] when(mockTask.metrics).thenReturn(metrics) From bb1a523a399b8e29b7543e615c0c884d4bf76215 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 21 Apr 2022 11:50:01 +0300 Subject: [PATCH 176/535] [SPARK-38913][SQL][3.3] Output identifiers in error messages in SQL style ### What changes were proposed in this pull request? In the PR, I propose to use backticks to wrap SQL identifiers in error messages. I added new util functions `toSQLId()` to the trait `QueryErrorsBase`, and applied it in `Query.*Errors` (also modified tests in `Query.*ErrorsSuite`). For example: Before: ```sql Invalid SQL syntax: The definition of window win is repetitive. ``` After: ``` Invalid SQL syntax: The definition of window `win` is repetitive. ``` ### Why are the changes needed? To improve user experience with Spark SQL. The changes highlight SQL identifiers in error massages and make them more visible for users. ### Does this PR introduce _any_ user-facing change? No since error classes haven't been released yet. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "test:testOnly *QueryParsingErrorsSuite" $ build/sbt "test:testOnly *QueryCompilationErrorsSuite" $ build/sbt "test:testOnly *QueryCompilationErrorsDSv2Suite" $ build/sbt "test:testOnly *QueryExecutionErrorsSuite" $ build/sbt "testOnly *PlanParserSuite" $ build/sbt "testOnly *DDLParserSuite" $ build/sbt -Phive-2.3 "testOnly *HiveSQLInsertTestSuite" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z window.sql" $ build/sbt "testOnly *DSV2SQLInsertTestSuite" ``` Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 2ff6914e6bac053231825c083fd508726a11a349) Closes #36288 from MaxGekk/error-class-toSQLId-3.3. Authored-by: Max Gekk Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 2 +- .../spark/sql/errors/QueryCompilationErrors.scala | 4 ++-- .../apache/spark/sql/errors/QueryErrorsBase.scala | 9 +++++++++ .../spark/sql/errors/QueryExecutionErrors.scala | 8 +++++--- .../spark/sql/errors/QueryParsingErrors.scala | 15 +++++++++------ .../sql/catalyst/parser/DDLParserSuite.scala | 4 ++-- .../sql/catalyst/parser/PlanParserSuite.scala | 12 ------------ .../resources/sql-tests/results/window.sql.out | 2 +- .../org/apache/spark/sql/SQLInsertTestSuite.scala | 2 +- .../errors/QueryCompilationErrorsDSv2Suite.scala | 2 +- .../sql/errors/QueryExecutionErrorsSuite.scala | 8 ++++---- .../sql/errors/QueryParsingErrorsSuite.scala | 8 ++++---- .../sql/execution/command/DDLParserSuite.scala | 15 --------------- 13 files changed, 39 insertions(+), 52 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 13ec8f022c5fd..e1b3c74eec8b6 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -41,7 +41,7 @@ "sqlState" : "22012" }, "DUPLICATE_KEY" : { - "message" : [ "Found duplicate keys '%s'" ], + "message" : [ "Found duplicate keys %s" ], "sqlState" : "23000" }, "FAILED_EXECUTE_UDF" : { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 3a8cd68966656..07d07fce9edea 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -95,14 +95,14 @@ object QueryCompilationErrors extends QueryErrorsBase { new AnalysisException( errorClass = "UNSUPPORTED_FEATURE", messageParameters = Array( - s"${toSQLStmt("IF NOT EXISTS")} for the table '$tableName' " + + s"${toSQLStmt("IF NOT EXISTS")} for the table ${toSQLId(tableName)} " + s"by ${toSQLStmt("INSERT INTO")}.")) } def nonPartitionColError(partitionName: String): Throwable = { new AnalysisException( errorClass = "NON_PARTITION_COLUMN", - messageParameters = Array(partitionName)) + messageParameters = Array(toSQLId(partitionName))) } def missingStaticPartitionColumn(staticName: String): Throwable = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala index b115891f370ad..7daf8ae7325db 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.errors import java.util.Locale import org.apache.spark.sql.catalyst.expressions.Literal +import org.apache.spark.sql.catalyst.util.quoteIdentifier import org.apache.spark.sql.types.{DataType, DoubleType, FloatType} trait QueryErrorsBase { @@ -52,6 +53,14 @@ trait QueryErrorsBase { "\"" + text.toUpperCase(Locale.ROOT) + "\"" } + def toSQLId(parts: Seq[String]): String = { + parts.map(quoteIdentifier).mkString(".") + } + + def toSQLId(parts: String): String = { + toSQLId(parts.split("\\.")) + } + def toSQLType(t: DataType): String = { t.sql } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 86dcf4cfc3ce7..0fb8edf873d79 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1936,12 +1936,14 @@ object QueryExecutionErrors extends QueryErrorsBase { messageParameters = Array(s"${toSQLStmt("pivot")} not after a ${toSQLStmt("group by")}.")) } + private val aesFuncName = toSQLId("aes_encrypt") + "/" + toSQLId("aes_decrypt") + def invalidAesKeyLengthError(actualLength: Int): RuntimeException = { new SparkRuntimeException( errorClass = "INVALID_PARAMETER_VALUE", messageParameters = Array( "key", - "the aes_encrypt/aes_decrypt function", + s"the $aesFuncName function", s"expects a binary value with 16, 24 or 32 bytes, but got ${actualLength.toString} bytes.")) } @@ -1949,7 +1951,7 @@ object QueryExecutionErrors extends QueryErrorsBase { new SparkRuntimeException( errorClass = "UNSUPPORTED_FEATURE", messageParameters = Array( - s"AES-$mode with the padding $padding by the aes_encrypt/aes_decrypt function.")) + s"AES-$mode with the padding $padding by the $aesFuncName function.")) } def aesCryptoError(detailMessage: String): RuntimeException = { @@ -1957,7 +1959,7 @@ object QueryExecutionErrors extends QueryErrorsBase { errorClass = "INVALID_PARAMETER_VALUE", messageParameters = Array( "expr, key", - "the aes_encrypt/aes_decrypt function", + s"the $aesFuncName function", s"Detail message: $detailMessage")) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index 38cff081eb5e4..32caa3fa7a24a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -140,17 +140,17 @@ object QueryParsingErrors extends QueryErrorsBase { def repetitiveWindowDefinitionError(name: String, ctx: WindowClauseContext): Throwable = { new ParseException("INVALID_SQL_SYNTAX", - Array(s"The definition of window '$name' is repetitive."), ctx) + Array(s"The definition of window ${toSQLId(name)} is repetitive."), ctx) } def invalidWindowReferenceError(name: String, ctx: WindowClauseContext): Throwable = { new ParseException("INVALID_SQL_SYNTAX", - Array(s"Window reference '$name' is not a window specification."), ctx) + Array(s"Window reference ${toSQLId(name)} is not a window specification."), ctx) } def cannotResolveWindowReferenceError(name: String, ctx: WindowClauseContext): Throwable = { new ParseException("INVALID_SQL_SYNTAX", - Array(s"Cannot resolve window reference '$name'."), ctx) + Array(s"Cannot resolve window reference ${toSQLId(name)}."), ctx) } def naturalCrossJoinUnsupportedError(ctx: RelationContext): Throwable = { @@ -249,7 +249,10 @@ object QueryParsingErrors extends QueryErrorsBase { } def tooManyArgumentsForTransformError(name: String, ctx: ApplyTransformContext): Throwable = { - new ParseException("INVALID_SQL_SYNTAX", Array(s"Too many arguments for transform $name"), ctx) + new ParseException( + errorClass = "INVALID_SQL_SYNTAX", + messageParameters = Array(s"Too many arguments for transform ${toSQLId(name)}"), + ctx) } def invalidBucketsNumberError(describe: String, ctx: ApplyTransformContext): Throwable = { @@ -327,7 +330,7 @@ object QueryParsingErrors extends QueryErrorsBase { new ParseException( errorClass = "INVALID_SQL_SYNTAX", messageParameters = Array( - s"Invalid pattern in ${toSQLStmt("SHOW FUNCTIONS")}: $pattern. " + + s"Invalid pattern in ${toSQLStmt("SHOW FUNCTIONS")}: ${toSQLId(pattern)}. " + s"It must be a ${toSQLType(StringType)} literal."), ctx) } @@ -351,7 +354,7 @@ object QueryParsingErrors extends QueryErrorsBase { def duplicateKeysError(key: String, ctx: ParserRuleContext): Throwable = { // Found duplicate keys '$key' - new ParseException(errorClass = "DUPLICATE_KEY", messageParameters = Array(key), ctx) + new ParseException(errorClass = "DUPLICATE_KEY", messageParameters = Array(toSQLId(key)), ctx) } def unexpectedFomatForSetConfigurationError(ctx: ParserRuleContext): Throwable = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index bc5380e27f536..e6ed8046f784c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -2050,9 +2050,9 @@ class DDLParserSuite extends AnalysisTest { ShowFunctions(UnresolvedNamespace(Seq("db")), true, true, Some("funct*"))) intercept("SHOW other FUNCTIONS", "\"SHOW\" other \"FUNCTIONS\" not supported") intercept("SHOW FUNCTIONS IN db f1", - "Invalid pattern in \"SHOW FUNCTIONS\": f1") + "Invalid pattern in \"SHOW FUNCTIONS\": `f1`") intercept("SHOW FUNCTIONS IN db LIKE f1", - "Invalid pattern in \"SHOW FUNCTIONS\": f1") + "Invalid pattern in \"SHOW FUNCTIONS\": `f1`") // The legacy syntax. comparePlans( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index f6e94dc604b87..d791496b22756 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -430,18 +430,6 @@ class PlanParserSuite extends AnalysisTest { | w2 as w1, | w3 as w1""".stripMargin, WithWindowDefinition(ws1, plan)) - - // Fail with no reference. - intercept(s"$sql window w2 as w1", "Cannot resolve window reference 'w1'") - - // Fail when resolved reference is not a window spec. - intercept( - s"""$sql - |window w1 as (partition by a, b order by c rows between 1 preceding and 1 following), - | w2 as w1, - | w3 as w2""".stripMargin, - "Window reference 'w2' is not a window specification" - ) } test("lateral view") { diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out index dcce285c30925..7e0fd2772d804 100644 --- a/sql/core/src/test/resources/sql-tests/results/window.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out @@ -898,7 +898,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -Invalid SQL syntax: The definition of window 'w' is repetitive.(line 8, pos 0) +Invalid SQL syntax: The definition of window `w` is repetitive.(line 8, pos 0) == SQL == SELECT diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala index 748c8fefa707e..4023e6cbed072 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala @@ -264,7 +264,7 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils { val e = intercept[AnalysisException] { sql("INSERT OVERWRITE t PARTITION (c='2', C='3') VALUES (1)") } - assert(e.getMessage.contains("Found duplicate keys 'c'")) + assert(e.getMessage.contains("Found duplicate keys `c`")) } // The following code is skipped for Hive because columns stored in Hive Metastore is always // case insensitive and we cannot create such table in Hive Metastore. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala index be8e65249202b..95850ee7f6e96 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala @@ -43,7 +43,7 @@ class QueryCompilationErrorsDSv2Suite checkAnswer(spark.table(tbl), spark.emptyDataFrame) assert(e.getMessage === "The feature is not supported: " + - s""""IF NOT EXISTS" for the table '$tbl' by "INSERT INTO".""") + s""""IF NOT EXISTS" for the table `testcat`.`ns1`.`ns2`.`tbl` by "INSERT INTO".""") assert(e.getErrorClass === "UNSUPPORTED_FEATURE") assert(e.getSqlState === "0A000") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala index 9ff57859acb90..8b5699dd2b1c1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala @@ -59,8 +59,8 @@ class QueryExecutionErrorsSuite extends QueryTest assert(e.getErrorClass === "INVALID_PARAMETER_VALUE") assert(e.getSqlState === "22023") assert(e.getMessage.matches( - "The value of parameter\\(s\\) 'key' in the aes_encrypt/aes_decrypt function is invalid: " + - "expects a binary value with 16, 24 or 32 bytes, but got \\d+ bytes.")) + "The value of parameter\\(s\\) 'key' in the `aes_encrypt`/`aes_decrypt` function " + + "is invalid: expects a binary value with 16, 24 or 32 bytes, but got \\d+ bytes.")) } // Encryption failure - invalid key length @@ -93,7 +93,7 @@ class QueryExecutionErrorsSuite extends QueryTest assert(e.getErrorClass === "INVALID_PARAMETER_VALUE") assert(e.getSqlState === "22023") assert(e.getMessage === - "The value of parameter(s) 'expr, key' in the aes_encrypt/aes_decrypt function " + + "The value of parameter(s) 'expr, key' in the `aes_encrypt`/`aes_decrypt` function " + "is invalid: Detail message: " + "Given final block not properly padded. " + "Such issues can arise if a bad key is used during decryption.") @@ -111,7 +111,7 @@ class QueryExecutionErrorsSuite extends QueryTest assert(e.getErrorClass === "UNSUPPORTED_FEATURE") assert(e.getSqlState === "0A000") assert(e.getMessage.matches("""The feature is not supported: AES-\w+ with the padding \w+""" + - " by the aes_encrypt/aes_decrypt function.")) + " by the `aes_encrypt`/`aes_decrypt` function.")) } // Unsupported AES mode and padding in encrypt diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala index 5a47ce5ae73e4..057bccce3ef6b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala @@ -130,7 +130,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession { sqlState = "42000", message = """ - |Invalid SQL syntax: The definition of window 'win' is repetitive.(line 1, pos 31) + |Invalid SQL syntax: The definition of window `win` is repetitive.(line 1, pos 31) | |== SQL == |SELECT min(a) OVER win FROM t1 WINDOW win AS win, win AS win2 @@ -145,7 +145,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession { sqlState = "42000", message = """ - |Invalid SQL syntax: Window reference 'win' is not a window specification.(line 1, pos 31) + |Invalid SQL syntax: Window reference `win` is not a window specification.(line 1, pos 31) | |== SQL == |SELECT min(a) OVER win FROM t1 WINDOW win AS win @@ -160,7 +160,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession { sqlState = "42000", message = """ - |Invalid SQL syntax: Cannot resolve window reference 'win2'.(line 1, pos 31) + |Invalid SQL syntax: Cannot resolve window reference `win2`.(line 1, pos 31) | |== SQL == |SELECT min(a) OVER win FROM t1 WINDOW win AS win2 @@ -206,7 +206,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession { sqlState = "42000", message = """ - |Invalid SQL syntax: Too many arguments for transform years(line 1, pos 44) + |Invalid SQL syntax: Too many arguments for transform `years`(line 1, pos 44) | |== SQL == |CREATE TABLE table(col int) PARTITIONED BY (years(col,col)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala index 1053cb9f2a772..7bf2b8ff04ab1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala @@ -200,21 +200,6 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession { assert(parsed.isInstanceOf[Project]) } - test("duplicate keys in table properties") { - val e = intercept[ParseException] { - parser.parsePlan("ALTER TABLE dbx.tab1 SET TBLPROPERTIES ('key1' = '1', 'key1' = '2')") - }.getMessage - assert(e.contains("Found duplicate keys 'key1'")) - } - - test("duplicate columns in partition specs") { - val e = intercept[ParseException] { - parser.parsePlan( - "ALTER TABLE dbx.tab1 PARTITION (a='1', a='2') RENAME TO PARTITION (a='100', a='200')") - }.getMessage - assert(e.contains("Found duplicate keys 'a'")) - } - test("unsupported operations") { intercept[ParseException] { parser.parsePlan( From 197c975a6765d693f5fafe7614bac6d832c5aabf Mon Sep 17 00:00:00 2001 From: Serge Rielau Date: Thu, 21 Apr 2022 21:53:28 +0800 Subject: [PATCH 177/535] [SPARK-38972][SQL] Support in error-class messages Use symbolic names for parameters in error messages which are substituted with %s before formatting the string. Increase readability of error message docs (TBD) No SQL Project. Closes #36289 from srielau/symbolic-error-arg-names. Authored-by: Serge Rielau Signed-off-by: Wenchen Fan (cherry picked from commit 43e610333fb78834a09cd82f3da32bad262564f3) Signed-off-by: Wenchen Fan --- .../main/resources/error/error-classes.json | 92 +++++++++---------- .../scala/org/apache/spark/ErrorInfo.scala | 3 +- 2 files changed, 48 insertions(+), 47 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index e1b3c74eec8b6..283c953ff4fb9 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1,18 +1,18 @@ { "AMBIGUOUS_FIELD_NAME" : { - "message" : [ "Field name %s is ambiguous and has %s matching fields in the struct." ], + "message" : [ "Field name is ambiguous and has matching fields in the struct." ], "sqlState" : "42000" }, "ARITHMETIC_OVERFLOW" : { - "message" : [ "%s.%s If necessary set %s to false (except for ANSI interval type) to bypass this error.%s" ], + "message" : [ ". If necessary set to false (except for ANSI interval type) to bypass this error." ], "sqlState" : "22003" }, "CANNOT_CAST_DATATYPE" : { - "message" : [ "Cannot cast %s to %s." ], + "message" : [ "Cannot cast to ." ], "sqlState" : "22005" }, "CANNOT_CHANGE_DECIMAL_PRECISION" : { - "message" : [ "%s cannot be represented as Decimal(%s, %s). If necessary set %s to false to bypass this error.%s" ], + "message" : [ " cannot be represented as Decimal(, ). If necessary set to false to bypass this error.
        " ], "sqlState" : "22005" }, "CANNOT_PARSE_DECIMAL" : { @@ -20,127 +20,127 @@ "sqlState" : "42000" }, "CANNOT_UP_CAST_DATATYPE" : { - "message" : [ "Cannot up cast %s from %s to %s.\n%s" ] + "message" : [ "Cannot up cast from to .\n
        " ] }, "CANNOT_USE_MIXTURE" : { "message" : [ "Cannot use a mixture of aggregate function and group aggregate pandas UDF" ] }, "CAST_CAUSES_OVERFLOW" : { - "message" : [ "Casting %s to %s causes overflow. To return NULL instead, use 'try_cast'. If necessary set %s to false to bypass this error." ], + "message" : [ "Casting to causes overflow. To return NULL instead, use 'try_cast'. If necessary set to false to bypass this error." ], "sqlState" : "22005" }, "CONCURRENT_QUERY" : { "message" : [ "Another instance of this query was just started by a concurrent session." ] }, "DATETIME_OVERFLOW" : { - "message" : [ "Datetime operation overflow: %s." ], + "message" : [ "Datetime operation overflow: ." ], "sqlState" : "22008" }, "DIVIDE_BY_ZERO" : { - "message" : [ "divide by zero. To return NULL instead, use 'try_divide'. If necessary set %s to false (except for ANSI interval type) to bypass this error.%s" ], + "message" : [ "divide by zero. To return NULL instead, use 'try_divide'. If necessary set to false (except for ANSI interval type) to bypass this error.
        " ], "sqlState" : "22012" }, "DUPLICATE_KEY" : { - "message" : [ "Found duplicate keys %s" ], + "message" : [ "Found duplicate keys " ], "sqlState" : "23000" }, "FAILED_EXECUTE_UDF" : { - "message" : [ "Failed to execute user defined function (%s: (%s) => %s)" ] + "message" : [ "Failed to execute user defined function (: () => )" ] }, "FAILED_RENAME_PATH" : { - "message" : [ "Failed to rename %s to %s as destination already exists" ], + "message" : [ "Failed to rename to as destination already exists" ], "sqlState" : "22023" }, "FAILED_SET_ORIGINAL_PERMISSION_BACK" : { - "message" : [ "Failed to set original permission %s back to the created path: %s. Exception: %s" ] + "message" : [ "Failed to set original permission back to the created path: . Exception: " ] }, "GRAPHITE_SINK_INVALID_PROTOCOL" : { - "message" : [ "Invalid Graphite protocol: %s" ] + "message" : [ "Invalid Graphite protocol: " ] }, "GRAPHITE_SINK_PROPERTY_MISSING" : { - "message" : [ "Graphite sink requires '%s' property." ] + "message" : [ "Graphite sink requires '' property." ] }, "GROUPING_COLUMN_MISMATCH" : { - "message" : [ "Column of grouping (%s) can't be found in grouping columns %s" ], + "message" : [ "Column of grouping () can't be found in grouping columns " ], "sqlState" : "42000" }, "GROUPING_ID_COLUMN_MISMATCH" : { - "message" : [ "Columns of grouping_id (%s) does not match grouping columns (%s)" ], + "message" : [ "Columns of grouping_id () does not match grouping columns ()" ], "sqlState" : "42000" }, "GROUPING_SIZE_LIMIT_EXCEEDED" : { - "message" : [ "Grouping sets size cannot be greater than %s" ] + "message" : [ "Grouping sets size cannot be greater than " ] }, "ILLEGAL_SUBSTRING" : { - "message" : [ "%s cannot contain %s." ] + "message" : [ " cannot contain ." ] }, "INCOMPARABLE_PIVOT_COLUMN" : { - "message" : [ "Invalid pivot column '%s'. Pivot columns must be comparable." ], + "message" : [ "Invalid pivot column ''. Pivot columns must be comparable." ], "sqlState" : "42000" }, "INCOMPATIBLE_DATASOURCE_REGISTER" : { - "message" : [ "Detected an incompatible DataSourceRegister. Please remove the incompatible library from classpath or upgrade it. Error: %s" ] + "message" : [ "Detected an incompatible DataSourceRegister. Please remove the incompatible library from classpath or upgrade it. Error: " ] }, "INCONSISTENT_BEHAVIOR_CROSS_VERSION" : { - "message" : [ "You may get a different result due to the upgrading to Spark >= %s: %s" ] + "message" : [ "You may get a different result due to the upgrading to Spark >= :
        " ] }, "INDEX_OUT_OF_BOUNDS" : { - "message" : [ "Index %s must be between 0 and the length of the ArrayData." ], + "message" : [ "Index must be between 0 and the length of the ArrayData." ], "sqlState" : "22023" }, "INTERNAL_ERROR" : { - "message" : [ "%s" ] + "message" : [ "" ] }, "INVALID_ARRAY_INDEX" : { - "message" : [ "Invalid index: %s, numElements: %s. If necessary set %s to false to bypass this error." ] + "message" : [ "Invalid index: , numElements: . If necessary set to false to bypass this error." ] }, "INVALID_ARRAY_INDEX_IN_ELEMENT_AT" : { - "message" : [ "Invalid index: %s, numElements: %s. To return NULL instead, use 'try_element_at'. If necessary set %s to false to bypass this error." ] + "message" : [ "Invalid index: , numElements: . To return NULL instead, use 'try_element_at'. If necessary set to false to bypass this error." ] }, "INVALID_FIELD_NAME" : { - "message" : [ "Field name %s is invalid: %s is not a struct." ], + "message" : [ "Field name is invalid: is not a struct." ], "sqlState" : "42000" }, "INVALID_FRACTION_OF_SECOND" : { - "message" : [ "The fraction of sec must be zero. Valid range is [0, 60]. If necessary set %s to false to bypass this error. " ], + "message" : [ "The fraction of sec must be zero. Valid range is [0, 60]. If necessary set to false to bypass this error. " ], "sqlState" : "22023" }, "INVALID_JSON_SCHEMA_MAPTYPE" : { - "message" : [ "Input schema %s can only contain StringType as a key type for a MapType." ] + "message" : [ "Input schema can only contain StringType as a key type for a MapType." ] }, "INVALID_PARAMETER_VALUE" : { - "message" : [ "The value of parameter(s) '%s' in %s is invalid: %s" ], + "message" : [ "The value of parameter(s) '' in is invalid: " ], "sqlState" : "22023" }, "INVALID_SQL_SYNTAX" : { - "message" : [ "Invalid SQL syntax: %s" ], + "message" : [ "Invalid SQL syntax: " ], "sqlState" : "42000" }, "INVALID_SYNTAX_FOR_CAST" : { - "message" : [ "Invalid input syntax for type %s: %s. To return NULL instead, use 'try_cast'. If necessary set %s to false to bypass this error.%s" ], + "message" : [ "Invalid input syntax for type : . To return NULL instead, use 'try_cast'. If necessary set to false to bypass this error.
        " ], "sqlState" : "42000" }, "MAP_KEY_DOES_NOT_EXIST" : { - "message" : [ "Key %s does not exist. To return NULL instead, use 'try_element_at'. If necessary set %s to false to bypass this error.%s" ] + "message" : [ "Key does not exist. To return NULL instead, use 'try_element_at'. If necessary set to false to bypass this error.
        " ] }, "MISSING_COLUMN" : { - "message" : [ "Column '%s' does not exist. Did you mean one of the following? [%s]" ], + "message" : [ "Column '' does not exist. Did you mean one of the following? []" ], "sqlState" : "42000" }, "MISSING_STATIC_PARTITION_COLUMN" : { - "message" : [ "Unknown static partition column: %s" ], + "message" : [ "Unknown static partition column: " ], "sqlState" : "42000" }, "NON_LITERAL_PIVOT_VALUES" : { - "message" : [ "Literal expressions required for pivot values, found '%s'" ], + "message" : [ "Literal expressions required for pivot values, found ''" ], "sqlState" : "42000" }, "NON_PARTITION_COLUMN" : { - "message" : [ "PARTITION clause cannot contain a non-partition column name: %s" ], + "message" : [ "PARTITION clause cannot contain a non-partition column name: " ], "sqlState" : "42000" }, "PARSE_CHAR_MISSING_LENGTH" : { - "message" : [ "DataType %s requires a length parameter, for example %s(10). Please specify the length." ], + "message" : [ "DataType requires a length parameter, for example (10). Please specify the length." ], "sqlState" : "42000" }, "PARSE_EMPTY_STATEMENT" : { @@ -148,41 +148,41 @@ "sqlState" : "42000" }, "PARSE_SYNTAX_ERROR" : { - "message" : [ "Syntax error at or near %s%s" ], + "message" : [ "Syntax error at or near " ], "sqlState" : "42000" }, "PIVOT_VALUE_DATA_TYPE_MISMATCH" : { - "message" : [ "Invalid pivot value '%s': value data type %s does not match pivot column data type %s" ], + "message" : [ "Invalid pivot value '': value data type does not match pivot column data type " ], "sqlState" : "42000" }, "RENAME_SRC_PATH_NOT_FOUND" : { - "message" : [ "Failed to rename as %s was not found" ], + "message" : [ "Failed to rename as was not found" ], "sqlState" : "22023" }, "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER" : { - "message" : [ "The second argument of '%s' function needs to be an integer." ], + "message" : [ "The second argument of '' function needs to be an integer." ], "sqlState" : "22023" }, "UNABLE_TO_ACQUIRE_MEMORY" : { - "message" : [ "Unable to acquire %s bytes of memory, got %s" ] + "message" : [ "Unable to acquire bytes of memory, got " ] }, "UNRECOGNIZED_SQL_TYPE" : { - "message" : [ "Unrecognized SQL type %s" ], + "message" : [ "Unrecognized SQL type " ], "sqlState" : "42000" }, "UNSUPPORTED_DATATYPE" : { - "message" : [ "Unsupported data type %s" ], + "message" : [ "Unsupported data type " ], "sqlState" : "0A000" }, "UNSUPPORTED_FEATURE" : { - "message" : [ "The feature is not supported: %s" ], + "message" : [ "The feature is not supported: " ], "sqlState" : "0A000" }, "UNSUPPORTED_GROUPING_EXPRESSION" : { "message" : [ "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup" ] }, "UNSUPPORTED_OPERATION" : { - "message" : [ "The operation is not supported: %s" ] + "message" : [ "The operation is not supported: " ] }, "WRITING_JOB_ABORTED" : { "message" : [ "Writing job aborted" ], diff --git a/core/src/main/scala/org/apache/spark/ErrorInfo.scala b/core/src/main/scala/org/apache/spark/ErrorInfo.scala index 0917085c01b23..6cb8f4d8ed33d 100644 --- a/core/src/main/scala/org/apache/spark/ErrorInfo.scala +++ b/core/src/main/scala/org/apache/spark/ErrorInfo.scala @@ -58,7 +58,8 @@ private[spark] object SparkThrowableHelper { def getMessage(errorClass: String, messageParameters: Array[String]): String = { val errorInfo = errorClassToInfoMap.getOrElse(errorClass, throw new IllegalArgumentException(s"Cannot find error class '$errorClass'")) - String.format(errorInfo.messageFormat, messageParameters: _*) + String.format(errorInfo.messageFormat.replaceAll("<[a-zA-Z0-9_-]+>", "%s"), + messageParameters: _*) } def getSqlState(errorClass: String): String = { From 24d588c8587f84f7e8c1c9f665d55eb14869b707 Mon Sep 17 00:00:00 2001 From: huaxingao Date: Thu, 21 Apr 2022 23:16:22 +0800 Subject: [PATCH 178/535] [SPARK-38950][SQL] Return Array of Predicate for SupportsPushDownCatalystFilters.pushedFilters ### What changes were proposed in this pull request? in `SupportsPushDownCatalystFilters`, change ``` def pushedFilters: Array[Filter] ``` to ``` def pushedFilters: Array[Predicate] ``` ### Why are the changes needed? use v2Filter in DS V2 ### Does this PR introduce _any_ user-facing change? yes ### How was this patch tested? existing tests Closes #36264 from huaxingao/V2Filter. Authored-by: huaxingao Signed-off-by: Wenchen Fan (cherry picked from commit 7221d754075656ce41edacb0fccc1cf89a62fc77) Signed-off-by: Wenchen Fan --- .../connector/SupportsPushDownCatalystFilters.scala | 6 +++--- .../sql/execution/datasources/v2/FileScanBuilder.scala | 3 ++- .../spark/sql/execution/datasources/v2/PushDownUtils.scala | 2 +- .../datasources/v2/parquet/ParquetScanBuilder.scala | 3 ++- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/SupportsPushDownCatalystFilters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/SupportsPushDownCatalystFilters.scala index 9c2a4ac78a24a..99590480220d1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/SupportsPushDownCatalystFilters.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/SupportsPushDownCatalystFilters.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.internal.connector import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.sources.Filter +import org.apache.spark.sql.connector.expressions.filter.Predicate /** * A mix-in interface for {@link FileScanBuilder}. File sources can implement this interface to @@ -35,7 +35,7 @@ trait SupportsPushDownCatalystFilters { /** * Returns the data filters that are pushed to the data source via - * {@link #pushFilters(Expression[])}. + * {@link #pushFilters(Predicate[])}. */ - def pushedFilters: Array[Filter] + def pushedFilters: Array[Predicate] } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala index 2dc4137d6f9a1..ae82eecd313e6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala @@ -20,6 +20,7 @@ import scala.collection.mutable import org.apache.spark.sql.{sources, SparkSession} import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.connector.expressions.filter.Predicate import org.apache.spark.sql.connector.read.{ScanBuilder, SupportsPushDownRequiredColumns} import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, DataSourceUtils, PartitioningAwareFileIndex, PartitioningUtils} import org.apache.spark.sql.internal.connector.SupportsPushDownCatalystFilters @@ -84,7 +85,7 @@ abstract class FileScanBuilder( dataFilters } - override def pushedFilters: Array[Filter] = pushedDataFilters + override def pushedFilters: Array[Predicate] = pushedDataFilters.map(_.toV2) /* * Push down data filters to the file source, so the data filters can be evaluated there to diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala index 8ac91e02579c5..0ebfed2fe9eef 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala @@ -98,7 +98,7 @@ object PushDownUtils extends PredicateHelper { case f: FileScanBuilder => val postScanFilters = f.pushFilters(filters) - (Left(f.pushedFilters), postScanFilters) + (Right(f.pushedFilters), postScanFilters) case _ => (Left(Nil), filters) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala index 1f2f75aebd7bf..2093f4a16ef49 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala @@ -22,6 +22,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.sql.SparkSession import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec import org.apache.spark.sql.connector.expressions.aggregate.Aggregation +import org.apache.spark.sql.connector.expressions.filter.Predicate import org.apache.spark.sql.connector.read.{Scan, SupportsPushDownAggregates} import org.apache.spark.sql.execution.datasources.{AggregatePushDownUtils, PartitioningAwareFileIndex} import org.apache.spark.sql.execution.datasources.parquet.{ParquetFilters, SparkToParquetSchemaConverter} @@ -84,7 +85,7 @@ case class ParquetScanBuilder( // Note: for Parquet, the actual filter push down happens in [[ParquetPartitionReaderFactory]]. // It requires the Parquet physical schema to determine whether a filter is convertible. // All filters that can be converted to Parquet are pushed down. - override def pushedFilters(): Array[Filter] = pushedParquetFilters + override def pushedFilters: Array[Predicate] = pushedParquetFilters.map(_.toV2) override def pushAggregation(aggregation: Aggregation): Boolean = { if (!sparkSession.sessionState.conf.parquetAggregatePushDown) { From 17552d5ff90e6421b2699726468c5798a12970b9 Mon Sep 17 00:00:00 2001 From: huaxingao Date: Thu, 21 Apr 2022 11:52:04 -0700 Subject: [PATCH 179/535] [SPARK-38950][SQL][FOLLOWUP] Fix java doc ### What changes were proposed in this pull request? `{link #pushFilters(Predicate[])}` -> `{link #pushFilters(Seq[Expression])}` ### Why are the changes needed? Fixed java doc ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Closes #36302 from huaxingao/fix. Authored-by: huaxingao Signed-off-by: huaxingao (cherry picked from commit 0b543e7480b6e414b23e02e6c805a33abc535c89) Signed-off-by: huaxingao --- .../internal/connector/SupportsPushDownCatalystFilters.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/SupportsPushDownCatalystFilters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/SupportsPushDownCatalystFilters.scala index 99590480220d1..4641a06ba3e17 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/SupportsPushDownCatalystFilters.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/SupportsPushDownCatalystFilters.scala @@ -35,7 +35,7 @@ trait SupportsPushDownCatalystFilters { /** * Returns the data filters that are pushed to the data source via - * {@link #pushFilters(Predicate[])}. + * {@link #pushFilters(Seq[Expression])}. */ def pushedFilters: Array[Predicate] } From 176dc61a935185f8fd2099b41467783a2ae8b151 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Thu, 21 Apr 2022 19:26:26 -0700 Subject: [PATCH 180/535] [MINOR][DOCS] Also remove Google Analytics from Spark release docs, per ASF policy ### What changes were proposed in this pull request? Remove Google Analytics from Spark release docs. See also https://github.com/apache/spark-website/pull/384 ### Why are the changes needed? New ASF privacy policy requirement ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A Closes #36310 from srowen/PrivacyPolicy. Authored-by: Sean Owen Signed-off-by: Dongjoon Hyun (cherry picked from commit 7a58670e2e68ee4950cf62c2be236e00eb8fc44b) Signed-off-by: Dongjoon Hyun --- docs/_layouts/global.html | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html index f10d46763cf76..d44639227665d 100755 --- a/docs/_layouts/global.html +++ b/docs/_layouts/global.html @@ -33,21 +33,6 @@ - {% production %} - - - {% endproduction %} - From a52a245d11c20b0360d463c973388f3ee05768ac Mon Sep 17 00:00:00 2001 From: Kazuyuki Tanimura Date: Tue, 5 Apr 2022 17:49:44 -0700 Subject: [PATCH 234/535] [SPARK-38786][SQL][TEST] Bug in StatisticsSuite 'change stats after add/drop partition command' ### What changes were proposed in this pull request? https://github.com/apache/spark/blob/cbffc12f90e45d33e651e38cf886d7ab4bcf96da/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala#L979 It should be `partDir2` instead of `partDir1`. Looks like it is a copy paste bug. ### Why are the changes needed? Due to this test bug, the drop command was dropping a wrong (`partDir1`) underlying file in the test. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added extra underlying file location check. Closes #36075 from kazuyukitanimura/SPARK-38786. Authored-by: Kazuyuki Tanimura Signed-off-by: Chao Sun (cherry picked from commit a6b04f007c07fe00637aa8be33a56f247a494110) Signed-off-by: Dongjoon Hyun --- .../test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 8afdd50b0c96b..46acc9b2f0a2e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -976,7 +976,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto s""" |ALTER TABLE $table ADD |PARTITION (ds='2008-04-09', hr='11') LOCATION '${partDir1.toURI.toString}' - |PARTITION (ds='2008-04-09', hr='12') LOCATION '${partDir1.toURI.toString}' + |PARTITION (ds='2008-04-09', hr='12') LOCATION '${partDir2.toURI.toString}' """.stripMargin) if (autoUpdate) { val fetched2 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = None) @@ -999,6 +999,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto sql(s"ALTER TABLE $table DROP PARTITION (ds='2008-04-08'), PARTITION (hr='12')") assert(spark.sessionState.catalog.listPartitions(TableIdentifier(table)) .map(_.spec).toSet == Set(Map("ds" -> "2008-04-09", "hr" -> "11"))) + assert(partDir1.exists()) // only one partition left if (autoUpdate) { val fetched4 = checkTableStats(table, hasSizeInBytes = true, expectedRowCounts = None) From cf13262bc2d7ee1bce8c08292725353b2beccadd Mon Sep 17 00:00:00 2001 From: "Qian.Sun" Date: Mon, 9 May 2022 10:00:05 -0700 Subject: [PATCH 235/535] [SPARK-38939][SQL][FOLLOWUP] Replace named parameter with comment in ReplaceColumns ### What changes were proposed in this pull request? This PR aims to replace named parameter with comment in `ReplaceColumns`. ### Why are the changes needed? #36252 changed signature of deleteColumn#**TableChange.java**, but this PR breaks sbt compilation in k8s integration test. ```shell > build/sbt -Pkubernetes -Pkubernetes-integration-tests -Dtest.exclude.tags=r -Dspark.kubernetes.test.imageRepo=kubespark "kubernetes-integration-tests/test" [error] /Users/IdeaProjects/spark/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala:147:45: not found: value ifExists [error] TableChange.deleteColumn(Array(name), ifExists = false) [error] ^ [error] /Users/IdeaProjects/spark/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala:159:19: value ++ is not a member of Array[Nothing] [error] deleteChanges ++ addChanges [error] ^ [error] two errors found [error] (catalyst / Compile / compileIncremental) Compilation failed ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass the GA and k8s integration test. Closes #36487 from dcoliversun/SPARK-38939. Authored-by: Qian.Sun Signed-off-by: huaxingao (cherry picked from commit 16b5124d75dc974c37f2fd87c78d231f8a3bf772) Signed-off-by: huaxingao --- .../spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala index 8cc93c2dd099b..4bd4f58b6a78c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala @@ -144,7 +144,7 @@ case class ReplaceColumns( require(table.resolved) val deleteChanges = table.schema.fieldNames.map { name => // REPLACE COLUMN should require column to exist - TableChange.deleteColumn(Array(name), ifExists = false) + TableChange.deleteColumn(Array(name), false /* ifExists */) } val addChanges = columnsToAdd.map { col => assert(col.path.isEmpty) From c759151838b7515a3d5fc5abb33d0d93e067cd75 Mon Sep 17 00:00:00 2001 From: Lorenzo Martini Date: Mon, 9 May 2022 19:44:19 -0500 Subject: [PATCH 236/535] [SPARK-39107][SQL] Account for empty string input in regex replace ### What changes were proposed in this pull request? When trying to perform a regex replace, account for the possibility of having empty strings as input. ### Why are the changes needed? https://github.com/apache/spark/pull/29891 was merged to address https://issues.apache.org/jira/browse/SPARK-30796 and introduced a bug that would not allow regex matching on empty strings, as it would account for position within substring but not consider the case where input string has length 0 (empty string) From https://issues.apache.org/jira/browse/SPARK-39107 there is a change in behavior between spark versions. 3.0.2 ``` scala> val df = spark.sql("SELECT '' AS col") df: org.apache.spark.sql.DataFrame = [col: string] scala> df.withColumn("replaced", regexp_replace(col("col"), "^$", "")).show +---+--------+ |col|replaced| +---+--------+ | | | +---+--------+ ``` 3.1.2 ``` scala> val df = spark.sql("SELECT '' AS col") df: org.apache.spark.sql.DataFrame = [col: string] scala> df.withColumn("replaced", regexp_replace(col("col"), "^$", "")).show +---+--------+ |col|replaced| +---+--------+ | | | +---+--------+ ``` The 3.0.2 outcome is the expected and correct one ### Does this PR introduce _any_ user-facing change? Yes compared to spark 3.2.1, as it brings back the correct behavior when trying to regex match empty strings, as shown in the example above. ### How was this patch tested? Added special casing test in `RegexpExpressionsSuite.RegexReplace` with empty string replacement. Closes #36457 from LorenzoMartini/lmartini/fix-empty-string-replace. Authored-by: Lorenzo Martini Signed-off-by: Sean Owen (cherry picked from commit 731aa2cdf8a78835621fbf3de2d3492b27711d1a) Signed-off-by: Sean Owen --- .../spark/sql/catalyst/expressions/regexpExpressions.scala | 4 ++-- .../sql/catalyst/expressions/RegexpExpressionsSuite.scala | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index bfaaba514462f..01763f082d606 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -642,7 +642,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio } val source = s.toString() val position = i.asInstanceOf[Int] - 1 - if (position < source.length) { + if (position == 0 || position < source.length) { val m = pattern.matcher(source) m.region(position, source.length) result.delete(0, result.length()) @@ -696,7 +696,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio } String $source = $subject.toString(); int $position = $pos - 1; - if ($position < $source.length()) { + if ($position == 0 || $position < $source.length()) { $classNameStringBuffer $termResult = new $classNameStringBuffer(); java.util.regex.Matcher $matcher = $termPattern.matcher($source); $matcher.region($position, $source.length()); diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala index 019857580d077..2ca9ede77421a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala @@ -293,6 +293,7 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val row4 = create_row(null, "(\\d+)", "###") val row5 = create_row("100-200", null, "###") val row6 = create_row("100-200", "(-)", null) + val row7 = create_row("", "^$", "") val s = 's.string.at(0) val p = 'p.string.at(1) @@ -305,6 +306,7 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(expr, null, row4) checkEvaluation(expr, null, row5) checkEvaluation(expr, null, row6) + checkEvaluation(expr, "", row7) // test position val exprWithPos = RegExpReplace(s, p, r, 4) checkEvaluation(exprWithPos, "100-num", row1) @@ -313,6 +315,7 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(exprWithPos, null, row4) checkEvaluation(exprWithPos, null, row5) checkEvaluation(exprWithPos, null, row6) + checkEvaluation(exprWithPos, "", row7) val exprWithLargePos = RegExpReplace(s, p, r, 7) checkEvaluation(exprWithLargePos, "100-20num", row1) checkEvaluation(exprWithLargePos, "100-20###", row2) From 7838a140e3b380b8b65d21e73d99d00ff2e874d7 Mon Sep 17 00:00:00 2001 From: Yuto Akutsu Date: Mon, 9 May 2022 21:50:44 -0500 Subject: [PATCH 237/535] [MINOR][INFRA][3.3] Add ANTLR generated files to .gitignore ### What changes were proposed in this pull request? Add git ignore entries for files created by ANTLR. This is a backport of #35838. ### Why are the changes needed? To avoid developers from accidentally adding those files when working on parser/lexer. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By making sure those files are ignored by git status when they exist. Closes #36489 from yutoacts/minor_gitignore_3.3. Authored-by: Yuto Akutsu Signed-off-by: Sean Owen --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index b75878189a975..0e2f59f43f83d 100644 --- a/.gitignore +++ b/.gitignore @@ -117,3 +117,8 @@ spark-warehouse/ # For Node.js node_modules + +# For Antlr +sql/catalyst/gen/ +sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.tokens +sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/gen/ From c6584af37a45bd782e92b10e130caba42877c64c Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Tue, 10 May 2022 17:37:23 +0800 Subject: [PATCH 238/535] [SPARK-39135][SQL] DS V2 aggregate partial push-down should supports group by without aggregate functions ### What changes were proposed in this pull request? Currently, the SQL show below not supported by DS V2 aggregate partial push-down. `select key from tab group by key` ### Why are the changes needed? Make DS V2 aggregate partial push-down supports group by without aggregate functions. ### Does this PR introduce _any_ user-facing change? 'No'. New feature. ### How was this patch tested? New tests Closes #36492 from beliefer/SPARK-39135. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit decd393e23406d82b47aa75c4d24db04c7d1efd6) Signed-off-by: Wenchen Fan --- .../v2/V2ScanRelationPushDown.scala | 2 +- .../apache/spark/sql/jdbc/JDBCV2Suite.scala | 51 +++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala index 20d508df5683b..60048f83fb183 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala @@ -294,7 +294,7 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper wit private def supportPartialAggPushDown(agg: Aggregation): Boolean = { // We don't know the agg buffer of `GeneralAggregateFunc`, so can't do partial agg push down. // If `Sum`, `Count`, `Avg` with distinct, can't do partial agg push down. - agg.aggregateExpressions().exists { + agg.aggregateExpressions().isEmpty || agg.aggregateExpressions().exists { case sum: Sum => !sum.isDistinct case count: Count => !count.isDistinct case avg: Avg => !avg.isDistinct diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index 30dbc7bd60983..b6f36b912f870 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -671,6 +671,57 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkAnswer(df, Seq(Row(5))) } + test("scan with aggregate push-down: GROUP BY without aggregate functions") { + val df = sql("select name FROM h2.test.employee GROUP BY name") + checkAggregateRemoved(df) + checkPushedInfo(df, + "PushedAggregates: [], PushedFilters: [], PushedGroupByExpressions: [NAME],") + checkAnswer(df, Seq(Row("alex"), Row("amy"), Row("cathy"), Row("david"), Row("jen"))) + + val df2 = spark.read + .option("partitionColumn", "dept") + .option("lowerBound", "0") + .option("upperBound", "2") + .option("numPartitions", "2") + .table("h2.test.employee") + .groupBy($"name") + .agg(Map.empty[String, String]) + checkAggregateRemoved(df2, false) + checkPushedInfo(df2, + "PushedAggregates: [], PushedFilters: [], PushedGroupByExpressions: [NAME],") + checkAnswer(df2, Seq(Row("alex"), Row("amy"), Row("cathy"), Row("david"), Row("jen"))) + + val df3 = sql("SELECT CASE WHEN SALARY > 8000 AND SALARY < 10000 THEN SALARY ELSE 0 END as" + + " key FROM h2.test.employee GROUP BY key") + checkAggregateRemoved(df3) + checkPushedInfo(df3, + """ + |PushedAggregates: [], + |PushedFilters: [], + |PushedGroupByExpressions: + |[CASE WHEN (SALARY > 8000.00) AND (SALARY < 10000.00) THEN SALARY ELSE 0.00 END], + |""".stripMargin.replaceAll("\n", " ")) + checkAnswer(df3, Seq(Row(0), Row(9000))) + + val df4 = spark.read + .option("partitionColumn", "dept") + .option("lowerBound", "0") + .option("upperBound", "2") + .option("numPartitions", "2") + .table("h2.test.employee") + .groupBy(when(($"SALARY" > 8000).and($"SALARY" < 10000), $"SALARY").otherwise(0).as("key")) + .agg(Map.empty[String, String]) + checkAggregateRemoved(df4, false) + checkPushedInfo(df4, + """ + |PushedAggregates: [], + |PushedFilters: [], + |PushedGroupByExpressions: + |[CASE WHEN (SALARY > 8000.00) AND (SALARY < 10000.00) THEN SALARY ELSE 0.00 END], + |""".stripMargin.replaceAll("\n", " ")) + checkAnswer(df4, Seq(Row(0), Row(9000))) + } + test("scan with aggregate push-down: COUNT(col)") { val df = sql("select COUNT(DEPT) FROM h2.test.employee") checkAggregateRemoved(df) From a4b420cacd4eef93ea661d31d886fe3b60d5fe64 Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Tue, 10 May 2022 20:19:36 +0800 Subject: [PATCH 239/535] [SPARK-39106][SQL] Correct conditional expression constant folding - add try-catch when we fold children inside `ConditionalExpression` if it's not foldable - mark `CaseWhen` and `If` as foldable if it's children are foldable For a conditional expression, we should add a try-catch to partially fold the constant inside it's children because some bracnhes may no be evaluated at runtime. For example if c1 or c2 is not null, the last branch should be never hit: ```sql SELECT COALESCE(c1, c2, 1/0); ``` Besides, for CaseWhen and If, we should mark it as foldable if it's children are foldable. It is safe since the both non-codegen and codegen code path have already respected the evaluation order. yes, bug fix add more test in sql file Closes #36468 from ulysses-you/SPARK-39106. Authored-by: ulysses-you Signed-off-by: Wenchen Fan (cherry picked from commit 08a4ade8ba881589da0741b3ffacd3304dc1e9b5) Signed-off-by: Wenchen Fan --- .../sql/catalyst/expressions/Expression.scala | 2 + .../expressions/conditionalExpressions.scala | 1 + .../expressions/nullExpressions.scala | 3 - .../sql/catalyst/optimizer/expressions.scala | 56 ++++++--- .../optimizer/ConstantFoldingSuite.scala | 38 ++++++ .../inputs/ansi/conditional-functions.sql | 19 ++- .../sql-tests/inputs/postgreSQL/case.sql | 1 - .../inputs/udf/postgreSQL/udf-case.sql | 6 +- .../ansi/conditional-functions.sql.out | 115 +++++++++++++++++- .../sql-tests/results/postgreSQL/case.sql.out | 27 ++-- .../results/udf/postgreSQL/udf-case.sql.out | 30 +---- 11 files changed, 224 insertions(+), 74 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index 30b6773ce1cbf..e1d8c2e43e264 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -459,6 +459,8 @@ trait Nondeterministic extends Expression { * All optimization should be careful with the evaluation order. */ trait ConditionalExpression extends Expression { + final override def foldable: Boolean = children.forall(_.foldable) + /** * Return the children expressions which can always be hit at runtime. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala index 5dacabd646d8e..7213440bebe41 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala @@ -48,6 +48,7 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi override def second: Expression = trueValue override def third: Expression = falseValue override def nullable: Boolean = trueValue.nullable || falseValue.nullable + /** * Only the condition expression will always be evaluated. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala index 8f59ab5b24942..8d171c2c6631d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala @@ -53,9 +53,6 @@ case class Coalesce(children: Seq[Expression]) /** Coalesce is nullable if all of its children are nullable, or if it has no children. */ override def nullable: Boolean = children.forall(_.nullable) - // Coalesce is foldable if all children are foldable. - override def foldable: Boolean = children.forall(_.foldable) - final override val nodePatterns: Seq[TreePattern] = Seq(COALESCE) override def checkInputDataTypes(): TypeCheckResult = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 5aa134a0c1109..158734597f77b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer import scala.collection.immutable.HashSet import scala.collection.mutable.{ArrayBuffer, Stack} +import scala.util.control.NonFatal import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.expressions.{MultiLikeBase, _} @@ -28,7 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ -import org.apache.spark.sql.catalyst.trees.AlwaysProcess +import org.apache.spark.sql.catalyst.trees.{AlwaysProcess, TreeNodeTag} import org.apache.spark.sql.catalyst.trees.TreePattern._ import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -43,6 +44,9 @@ import org.apache.spark.unsafe.types.UTF8String * equivalent [[Literal]] values. */ object ConstantFolding extends Rule[LogicalPlan] { + // This tag is for avoid repeatedly evaluating expression inside conditional expression + // which has already failed to evaluate before. + private[sql] val FAILED_TO_EVALUATE = TreeNodeTag[Unit]("FAILED_TO_EVALUATE") private def hasNoSideEffect(e: Expression): Boolean = e match { case _: Attribute => true @@ -52,22 +56,42 @@ object ConstantFolding extends Rule[LogicalPlan] { case _ => false } + private def constantFolding( + e: Expression, + isConditionalBranch: Boolean = false): Expression = e match { + case c: ConditionalExpression if !c.foldable => + c.mapChildren(constantFolding(_, isConditionalBranch = true)) + + // Skip redundant folding of literals. This rule is technically not necessary. Placing this + // here avoids running the next rule for Literal values, which would create a new Literal + // object and running eval unnecessarily. + case l: Literal => l + + case Size(c: CreateArray, _) if c.children.forall(hasNoSideEffect) => + Literal(c.children.length) + case Size(c: CreateMap, _) if c.children.forall(hasNoSideEffect) => + Literal(c.children.length / 2) + + case e if e.getTagValue(FAILED_TO_EVALUATE).isDefined => e + + // Fold expressions that are foldable. + case e if e.foldable => + try { + Literal.create(e.eval(EmptyRow), e.dataType) + } catch { + case NonFatal(_) if isConditionalBranch => + // When doing constant folding inside conditional expressions, we should not fail + // during expression evaluation, as the branch we are evaluating may not be reached at + // runtime, and we shouldn't fail the query, to match the original behavior. + e.setTagValue(FAILED_TO_EVALUATE, ()) + e + } + + case other => other.mapChildren(constantFolding(_, isConditionalBranch)) + } + def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(AlwaysProcess.fn, ruleId) { - case q: LogicalPlan => q.transformExpressionsDownWithPruning( - AlwaysProcess.fn, ruleId) { - // Skip redundant folding of literals. This rule is technically not necessary. Placing this - // here avoids running the next rule for Literal values, which would create a new Literal - // object and running eval unnecessarily. - case l: Literal => l - - case Size(c: CreateArray, _) if c.children.forall(hasNoSideEffect) => - Literal(c.children.length) - case Size(c: CreateMap, _) if c.children.forall(hasNoSideEffect) => - Literal(c.children.length / 2) - - // Fold expressions that are foldable. - case e if e.foldable => Literal.create(e.eval(EmptyRow), e.dataType) - } + case q: LogicalPlan => q.mapExpressions(constantFolding(_)) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala index b06e001e41243..7f534c6e43f86 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.catalyst.optimizer +import org.apache.spark.SparkArithmeticException +import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, UnresolvedExtractValue} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ @@ -25,6 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, StaticInvoke} import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.RuleExecutor +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.ByteArray @@ -330,4 +333,39 @@ class ConstantFoldingSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + + test("SPARK-39106: Correct conditional expression constant folding") { + val t = LocalRelation.fromExternalRows( + $"c".double :: Nil, + Row(1d) :: Row(null) :: Row(Double.NaN) :: Nil) + + withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + // conditional expression is foldable, throw exception during query compilation + Seq( + t.select(CaseWhen((Divide(1, 0) === 1, Add(1, 0)) :: Nil, Subtract(1, 0))), + t.select(If(Divide(1, 0) === 1, Add(1, 0), Add(1, 0))), + t.select(Coalesce(Divide(1, 0) :: Add(1, 0) :: Nil)), + t.select(NaNvl(Divide(1, 0), Add(1, 0))) + ).foreach { query => + intercept[SparkArithmeticException] { + Optimize.execute(query.analyze) + } + } + + // conditional expression is not foldable, suppress the exception during query compilation + Seq( + t.select(CaseWhen(($"c" === 1d, Divide(1, 0)) :: Nil, 1d)), + t.select(If($"c" === 1d, Divide(1, 0), 1d)), + t.select(Coalesce($"c" :: Divide(1, 0) :: Nil)), + t.select(NaNvl($"c", Divide(1, 0))) + ).foreach { query => + val optimized = Optimize.execute(query.analyze) + val failedToEvaluated = optimized.expressions.flatMap(_.collect { + case e: Expression if e.getTagValue(ConstantFolding.FAILED_TO_EVALUATE).isDefined => e + }) + assert(failedToEvaluated.size == 1) + comparePlans(query.analyze, optimized) + } + } + } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/conditional-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/conditional-functions.sql index 5c548b1e9c4fc..ba8f0ffe7f1b8 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/ansi/conditional-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/conditional-functions.sql @@ -1,6 +1,21 @@ -- Tests for conditional functions -CREATE TABLE t USING PARQUET AS SELECT c1, c2 FROM VALUES(1, 0),(2, 1) AS t(c1, c2); -SELECT nanvl(c1, c1/c2 + c1/c2) FROM t; +CREATE TABLE t USING PARQUET AS SELECT c1, c2 FROM VALUES(1d, 0),(2d, 1),(null, 1),(CAST('NaN' AS DOUBLE), 0) AS t(c1, c2); + +SELECT nanvl(c2, c1/c2 + c1/c2) FROM t; +SELECT nanvl(c2, 1/0) FROM t; +SELECT nanvl(1-0, 1/0) FROM t; + +SELECT if(c2 >= 0, 1-0, 1/0) from t; +SELECT if(1 == 1, 1, 1/0); +SELECT if(1 != 1, 1/0, 1); + +SELECT coalesce(c2, 1/0) from t; +SELECT coalesce(1, 1/0); +SELECT coalesce(null, 1, 1/0); + +SELECT case when c2 >= 0 then 1 else 1/0 end from t; +SELECT case when 1 < 2 then 1 else 1/0 end; +SELECT case when 1 > 2 then 1/0 else 1 end; DROP TABLE IF EXISTS t; diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/case.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/case.sql index b39ccb85fb366..e8129b8cfaf66 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/case.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/case.sql @@ -65,7 +65,6 @@ SELECT '7' AS `None`, CASE WHEN rand() < 0 THEN 1 END AS `NULL on no matches`; --- [SPARK-33008] Spark SQL throws an exception -- Constant-expression folding shouldn't evaluate unreachable subexpressions SELECT CASE WHEN 1=0 THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END; SELECT CASE 1 WHEN 0 THEN 1/0 WHEN 1 THEN 1 ELSE 2/0 END; diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql index 5322c1b502439..4ee0dba8fa9c0 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-case.sql @@ -67,14 +67,12 @@ SELECT '7' AS `None`, CASE WHEN rand() < udf(0) THEN 1 END AS `NULL on no matches`; --- [SPARK-33008] Spark SQL throws an exception -- Constant-expression folding shouldn't evaluate unreachable subexpressions SELECT CASE WHEN udf(1=0) THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END; SELECT CASE 1 WHEN 0 THEN 1/udf(0) WHEN 1 THEN 1 ELSE 2/0 END; --- However we do not currently suppress folding of potentially --- reachable subexpressions -SELECT CASE WHEN i > 100 THEN udf(1/0) ELSE udf(0) END FROM case_tbl; +-- SPARK-39122: Python UDF does not follow the conditional expression evaluation order +-- SELECT CASE WHEN i > 100 THEN udf(1/0) ELSE udf(0) END FROM case_tbl; -- Test for cases involving untyped literals in test expression SELECT CASE 'a' WHEN 'a' THEN udf(1) ELSE udf(2) END; diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/conditional-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/conditional-functions.sql.out index d3af659fc480f..6a4f694f4d77d 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/conditional-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/conditional-functions.sql.out @@ -1,9 +1,9 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 3 +-- Number of queries: 14 -- !query -CREATE TABLE t USING PARQUET AS SELECT c1, c2 FROM VALUES(1, 0),(2, 1) AS t(c1, c2) +CREATE TABLE t USING PARQUET AS SELECT c1, c2 FROM VALUES(1d, 0),(2d, 1),(null, 1),(CAST('NaN' AS DOUBLE), 0) AS t(c1, c2) -- !query schema struct<> -- !query output @@ -11,12 +11,117 @@ struct<> -- !query -SELECT nanvl(c1, c1/c2 + c1/c2) FROM t +SELECT nanvl(c2, c1/c2 + c1/c2) FROM t -- !query schema -struct +struct +-- !query output +0.0 +0.0 +1.0 +1.0 + + +-- !query +SELECT nanvl(c2, 1/0) FROM t +-- !query schema +struct +-- !query output +0.0 +0.0 +1.0 +1.0 + + +-- !query +SELECT nanvl(1-0, 1/0) FROM t +-- !query schema +struct +-- !query output +1.0 +1.0 +1.0 +1.0 + + +-- !query +SELECT if(c2 >= 0, 1-0, 1/0) from t +-- !query schema +struct<(IF((c2 >= 0), (1 - 0), (1 / 0))):double> +-- !query output +1.0 +1.0 +1.0 +1.0 + + +-- !query +SELECT if(1 == 1, 1, 1/0) +-- !query schema +struct<(IF((1 = 1), 1, (1 / 0))):double> +-- !query output +1.0 + + +-- !query +SELECT if(1 != 1, 1/0, 1) +-- !query schema +struct<(IF((NOT (1 = 1)), (1 / 0), 1)):double> +-- !query output +1.0 + + +-- !query +SELECT coalesce(c2, 1/0) from t +-- !query schema +struct +-- !query output +0.0 +0.0 +1.0 +1.0 + + +-- !query +SELECT coalesce(1, 1/0) +-- !query schema +struct +-- !query output +1.0 + + +-- !query +SELECT coalesce(null, 1, 1/0) +-- !query schema +struct +-- !query output +1.0 + + +-- !query +SELECT case when c2 >= 0 then 1 else 1/0 end from t +-- !query schema +struct= 0) THEN 1 ELSE (1 / 0) END:double> +-- !query output +1.0 +1.0 +1.0 +1.0 + + +-- !query +SELECT case when 1 < 2 then 1 else 1/0 end +-- !query schema +struct +-- !query output +1.0 + + +-- !query +SELECT case when 1 > 2 then 1/0 else 1 end +-- !query schema +struct 2) THEN (1 / 0) ELSE 1 END:double> -- !query output 1.0 -2.0 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out index 6f28df8358a40..603df103846be 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out @@ -176,37 +176,28 @@ struct -- !query SELECT CASE WHEN 1=0 THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END -- !query schema -struct<> +struct -- !query output -org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to false (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 26) == -SELECT CASE WHEN 1=0 THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END - ^^^ +1.0 -- !query SELECT CASE 1 WHEN 0 THEN 1/0 WHEN 1 THEN 1 ELSE 2/0 END -- !query schema -struct<> +struct -- !query output -org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to false (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 26) == -SELECT CASE 1 WHEN 0 THEN 1/0 WHEN 1 THEN 1 ELSE 2/0 END - ^^^ +1.0 -- !query SELECT CASE WHEN i > 100 THEN 1/0 ELSE 0 END FROM case_tbl -- !query schema -struct<> +struct 100) THEN (1 / 0) ELSE 0 END:double> -- !query output -org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to false (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 30) == -SELECT CASE WHEN i > 100 THEN 1/0 ELSE 0 END FROM case_tbl - ^^^ +0.0 +0.0 +0.0 +0.0 -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out index 27a13805199c7..7c0a37ee66743 100755 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 35 +-- Number of queries: 34 -- !query @@ -176,37 +176,17 @@ struct -- !query SELECT CASE WHEN udf(1=0) THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END -- !query schema -struct<> +struct -- !query output -org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to false (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 31) == -SELECT CASE WHEN udf(1=0) THEN 1/0 WHEN 1=1 THEN 1 ELSE 2/0 END - ^^^ +1.0 -- !query SELECT CASE 1 WHEN 0 THEN 1/udf(0) WHEN 1 THEN 1 ELSE 2/0 END -- !query schema -struct<> +struct -- !query output -org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to false (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 54) == -...HEN 1/udf(0) WHEN 1 THEN 1 ELSE 2/0 END - ^^^ - - --- !query -SELECT CASE WHEN i > 100 THEN udf(1/0) ELSE udf(0) END FROM case_tbl --- !query schema -struct<> --- !query output -org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to false (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 34) == -...LECT CASE WHEN i > 100 THEN udf(1/0) ELSE udf(0) END FROM case_tbl - ^^^ +1.0 -- !query From 1738f48e31b00141d013051df5f8a4e179dc0bcb Mon Sep 17 00:00:00 2001 From: Sachin Tripathi Date: Wed, 11 May 2022 09:13:16 +0900 Subject: [PATCH 240/535] [MINOR][DOCS][PYTHON] Fixes pandas import statement in code example ### What changes were proposed in this pull request? In 'Applying a Function' section, example code import statement `as pd` was added ### Why are the changes needed? In 'Applying a Function' section, example code import statement needs to have a `as pd` because in function definitions we're using `pd`. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Documentation change only. Continues to be markdown compliant. Closes #36502 from snifhex/patch-1. Authored-by: Sachin Tripathi Signed-off-by: Hyukjin Kwon (cherry picked from commit 60edc5758e82e76a37ce5a5f98e870fac587b656) Signed-off-by: Hyukjin Kwon --- python/docs/source/getting_started/quickstart_df.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/docs/source/getting_started/quickstart_df.ipynb b/python/docs/source/getting_started/quickstart_df.ipynb index edfc730fb2425..8c237a30a09da 100644 --- a/python/docs/source/getting_started/quickstart_df.ipynb +++ b/python/docs/source/getting_started/quickstart_df.ipynb @@ -709,7 +709,7 @@ } ], "source": [ - "import pandas\n", + "import pandas as pd\n", "from pyspark.sql.functions import pandas_udf\n", "\n", "@pandas_udf('long')\n", From 7d57577037fe082b6b1ded093943669dd1f8dd05 Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Wed, 11 May 2022 14:32:05 +0800 Subject: [PATCH 241/535] [SPARK-39112][SQL] UnsupportedOperationException if spark.sql.ui.explainMode is set to cost ### What changes were proposed in this pull request? Add a new leaf like node `LeafNodeWithoutStats` and apply to the list: - ResolvedDBObjectName - ResolvedNamespace - ResolvedTable - ResolvedView - ResolvedNonPersistentFunc - ResolvedPersistentFunc ### Why are the changes needed? We enable v2 command at 3.3.0 branch by default `spark.sql.legacy.useV1Command`. However this is a behavior change between v1 and c2 command. - v1 command: We resolve logical plan to command at analyzer phase by `ResolveSessionCatalog` - v2 commnd: We resolve logical plan to v2 command at physical phase by `DataSourceV2Strategy` Foe cost explain mode, we will call `LogicalPlanStats.stats` using optimized plan so there is a gap between v1 and v2 command. Unfortunately, the logical plan of v2 command contains the `LeafNode` which does not override the `computeStats`. As a result, there is a error running such sql: ```sql set spark.sql.ui.explainMode=cost; show tables; ``` ``` java.lang.UnsupportedOperationException: at org.apache.spark.sql.catalyst.plans.logical.LeafNode.computeStats(LogicalPlan.scala:171) at org.apache.spark.sql.catalyst.plans.logical.LeafNode.computeStats$(LogicalPlan.scala:171) at org.apache.spark.sql.catalyst.analysis.ResolvedNamespace.computeStats(v2ResolutionPlans.scala:155) at org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.default(SizeInBytesOnlyStatsPlanVisitor.scala:55) at org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.default(SizeInBytesOnlyStatsPlanVisitor.scala:27) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit(LogicalPlanVisitor.scala:49) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlanVisitor.visit$(LogicalPlanVisitor.scala:25) at org.apache.spark.sql.catalyst.plans.logical.statsEstimation.SizeInBytesOnlyStatsPlanVisitor$.visit(SizeInBytesOnlyStatsPlanVisitor.scala:27) at org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.$anonfun$stats$1(LogicalPlanStats.scala:37) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats(LogicalPlanStats.scala:33) at org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats.stats$(LogicalPlanStats.scala:33) at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.stats(LogicalPlan.scala:30) ``` ### Does this PR introduce _any_ user-facing change? yes, bug fix ### How was this patch tested? add test Closes #36488 from ulysses-you/SPARK-39112. Authored-by: ulysses-you Signed-off-by: Wenchen Fan (cherry picked from commit 06fd340daefd67a3e96393539401c9bf4b3cbde9) Signed-off-by: Wenchen Fan --- .../catalyst/analysis/v2ResolutionPlans.scala | 28 ++++++++++++++----- .../org/apache/spark/sql/ExplainSuite.scala | 15 ++++++++++ 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala index 4cffead93b257..a87f9e0082d61 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec import org.apache.spark.sql.catalyst.expressions.{Attribute, LeafExpression, Unevaluable} -import org.apache.spark.sql.catalyst.plans.logical.LeafNode +import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics} import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, UNRESOLVED_FUNC} import org.apache.spark.sql.catalyst.util.CharVarcharUtils import org.apache.spark.sql.connector.catalog.{CatalogPlugin, FunctionCatalog, Identifier, Table, TableCatalog} @@ -140,11 +140,19 @@ case class UnresolvedDBObjectName(nameParts: Seq[String], isNamespace: Boolean) override def output: Seq[Attribute] = Nil } +/** + * A resolved leaf node whose statistics has no meaning. + */ +trait LeafNodeWithoutStats extends LeafNode { + // Here we just return a dummy statistics to avoid compute statsCache + override def stats: Statistics = Statistics.DUMMY +} + /** * A plan containing resolved namespace. */ case class ResolvedNamespace(catalog: CatalogPlugin, namespace: Seq[String]) - extends LeafNode { + extends LeafNodeWithoutStats { override def output: Seq[Attribute] = Nil } @@ -156,7 +164,7 @@ case class ResolvedTable( identifier: Identifier, table: Table, outputAttributes: Seq[Attribute]) - extends LeafNode { + extends LeafNodeWithoutStats { override def output: Seq[Attribute] = { val qualifier = catalog.name +: identifier.namespace :+ identifier.name outputAttributes.map(_.withQualifier(qualifier)) @@ -191,7 +199,7 @@ case class ResolvedFieldPosition(position: ColumnPosition) extends FieldPosition */ // TODO: create a generic representation for temp view, v1 view and v2 view, after we add view // support to v2 catalog. For now we only need the identifier to fallback to v1 command. -case class ResolvedView(identifier: Identifier, isTemp: Boolean) extends LeafNode { +case class ResolvedView(identifier: Identifier, isTemp: Boolean) extends LeafNodeWithoutStats { override def output: Seq[Attribute] = Nil } @@ -202,20 +210,26 @@ case class ResolvedPersistentFunc( catalog: FunctionCatalog, identifier: Identifier, func: UnboundFunction) - extends LeafNode { + extends LeafNodeWithoutStats { override def output: Seq[Attribute] = Nil } /** * A plan containing resolved non-persistent (temp or built-in) function. */ -case class ResolvedNonPersistentFunc(name: String, func: UnboundFunction) extends LeafNode { +case class ResolvedNonPersistentFunc( + name: String, + func: UnboundFunction) + extends LeafNodeWithoutStats { override def output: Seq[Attribute] = Nil } /** * A plan containing resolved database object name with catalog determined. */ -case class ResolvedDBObjectName(catalog: CatalogPlugin, nameParts: Seq[String]) extends LeafNode { +case class ResolvedDBObjectName( + catalog: CatalogPlugin, + nameParts: Seq[String]) + extends LeafNodeWithoutStats { override def output: Seq[Attribute] = Nil } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala index 073b67e0472bc..d637283446625 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala @@ -528,6 +528,21 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite "== Analyzed Logical Plan ==\nCreateViewCommand") } } + + test("SPARK-39112: UnsupportedOperationException if explain cost command using v2 command") { + withTempDir { dir => + sql("EXPLAIN COST CREATE DATABASE tmp") + sql("EXPLAIN COST DESC DATABASE tmp") + sql(s"EXPLAIN COST ALTER DATABASE tmp SET LOCATION '${dir.toURI.toString}'") + sql("EXPLAIN COST USE tmp") + sql("EXPLAIN COST CREATE TABLE t(c1 int) USING PARQUET") + sql("EXPLAIN COST SHOW TABLES") + sql("EXPLAIN COST SHOW CREATE TABLE t") + sql("EXPLAIN COST SHOW TBLPROPERTIES t") + sql("EXPLAIN COST DROP TABLE t") + sql("EXPLAIN COST DROP DATABASE tmp") + } + } } class ExplainSuiteAE extends ExplainSuiteHelper with EnableAdaptiveExecutionSuite { From 8608baad7ab31eef0903b9229789e8112c9c1234 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 11 May 2022 14:49:54 +0800 Subject: [PATCH 242/535] [SPARK-37878][SQL][FOLLOWUP] V1Table should always carry the "location" property ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/35204 . https://github.com/apache/spark/pull/35204 introduced a potential regression: it removes the "location" table property from `V1Table` if the table is not external. The intention was to avoid putting the LOCATION clause for managed tables in `ShowCreateTableExec`. However, if we use the v2 DESCRIBE TABLE command by default in the future, this will bring a behavior change and v2 DESCRIBE TABLE command won't print the table location for managed tables. This PR fixes this regression by using a different idea to fix the SHOW CREATE TABLE issue: 1. introduce a new reserved table property `is_managed_location`, to indicate that the location is managed by the catalog, not user given. 2. `ShowCreateTableExec` only generates the LOCATION clause if the "location" property is present and is not managed. ### Why are the changes needed? avoid a potential regression ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests. We can add a test when we use v2 DESCRIBE TABLE command by default. Closes #36498 from cloud-fan/regression. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan (cherry picked from commit fa2bda5c4eabb23d5f5b3e14ccd055a2453f579f) Signed-off-by: Wenchen Fan --- .../spark/sql/connector/catalog/TableCatalog.java | 6 ++++++ .../spark/sql/catalyst/parser/AstBuilder.scala | 12 ++++++++++-- .../spark/sql/connector/catalog/CatalogV2Util.scala | 3 ++- .../apache/spark/sql/connector/catalog/V1Table.scala | 6 +++--- .../datasources/v2/ShowCreateTableExec.scala | 10 +++++++--- 5 files changed, 28 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java index 9336c2a1cae85..ec773ab90add6 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java @@ -47,6 +47,12 @@ public interface TableCatalog extends CatalogPlugin { */ String PROP_LOCATION = "location"; + /** + * A reserved property to indicate that the table location is managed, not user-specified. + * If this property is "true", SHOW CREATE TABLE will not generate the LOCATION clause. + */ + String PROP_IS_MANAGED_LOCATION = "is_managed_location"; + /** * A reserved property to specify a table was created with EXTERNAL. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 195443464477f..ecc5360a4f784 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -41,7 +41,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.CurrentOrigin import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils, IntervalUtils} import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, getZoneId, stringToDate, stringToTimestamp, stringToTimestampWithoutTimeZone} -import org.apache.spark.sql.connector.catalog.{SupportsNamespaces, TableCatalog} +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsNamespaces, TableCatalog} import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform} import org.apache.spark.sql.errors.QueryParsingErrors @@ -3215,7 +3215,15 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit throw QueryParsingErrors.cannotCleanReservedTablePropertyError( PROP_EXTERNAL, ctx, "please use CREATE EXTERNAL TABLE") case (PROP_EXTERNAL, _) => false - case _ => true + // It's safe to set whatever table comment, so we don't make it a reserved table property. + case (PROP_COMMENT, _) => true + case (k, _) => + val isReserved = CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(k) + if (!legacyOn && isReserved) { + throw QueryParsingErrors.cannotCleanReservedTablePropertyError( + k, ctx, "please remove it from the TBLPROPERTIES list.") + } + !isReserved } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala index 2fc13510c54e2..4c174ad7c4f1f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala @@ -48,7 +48,8 @@ private[sql] object CatalogV2Util { TableCatalog.PROP_LOCATION, TableCatalog.PROP_PROVIDER, TableCatalog.PROP_OWNER, - TableCatalog.PROP_EXTERNAL) + TableCatalog.PROP_EXTERNAL, + TableCatalog.PROP_IS_MANAGED_LOCATION) /** * The list of reserved namespace properties, which can not be removed or changed directly by diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala index bf92107f6ae2d..da201e816497c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala @@ -74,15 +74,15 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table { private[sql] object V1Table { def addV2TableProperties(v1Table: CatalogTable): Map[String, String] = { val external = v1Table.tableType == CatalogTableType.EXTERNAL + val managed = v1Table.tableType == CatalogTableType.MANAGED v1Table.properties ++ v1Table.storage.properties.map { case (key, value) => TableCatalog.OPTION_PREFIX + key -> value } ++ v1Table.provider.map(TableCatalog.PROP_PROVIDER -> _) ++ v1Table.comment.map(TableCatalog.PROP_COMMENT -> _) ++ - (if (external) { - v1Table.storage.locationUri.map(TableCatalog.PROP_LOCATION -> _.toString) - } else None) ++ + v1Table.storage.locationUri.map(TableCatalog.PROP_LOCATION -> _.toString) ++ + (if (managed) Some(TableCatalog.PROP_IS_MANAGED_LOCATION -> "true") else None) ++ (if (external) Some(TableCatalog.PROP_EXTERNAL -> "true") else None) ++ Some(TableCatalog.PROP_OWNER -> v1Table.owner) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala index 06f5a08ffd9c7..2ad24b845c2cf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala @@ -114,9 +114,13 @@ case class ShowCreateTableExec( } private def showTableLocation(table: Table, builder: StringBuilder): Unit = { - Option(table.properties.get(TableCatalog.PROP_LOCATION)) - .map("LOCATION '" + escapeSingleQuotedString(_) + "'\n") - .foreach(builder.append) + val isManagedOption = Option(table.properties.get(TableCatalog.PROP_IS_MANAGED_LOCATION)) + // Only generate LOCATION clause if it's not managed. + if (isManagedOption.forall(_.equalsIgnoreCase("false"))) { + Option(table.properties.get(TableCatalog.PROP_LOCATION)) + .map("LOCATION '" + escapeSingleQuotedString(_) + "'\n") + .foreach(builder.append) + } } private def showTableProperties( From f75c00da3cf01e63d93cedbe480198413af41455 Mon Sep 17 00:00:00 2001 From: Xinrong Meng Date: Thu, 12 May 2022 09:07:11 +0900 Subject: [PATCH 243/535] [SPARK-34827][PYTHON][DOC] Remove outdated statements on distributed-sequence default index Remove outdated statements on distributed-sequence default index. Since distributed-sequence default index is updated to be enforced only while execution, there are stale statements in documents to be removed. No. Doc change only. Manual tests. Closes #36513 from xinrong-databricks/defaultIndexDoc. Authored-by: Xinrong Meng Signed-off-by: Hyukjin Kwon (cherry picked from commit cec1e7b4e68deac321f409d424a3acdcd4cb91be) Signed-off-by: Hyukjin Kwon --- python/docs/source/user_guide/pandas_on_spark/options.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/docs/source/user_guide/pandas_on_spark/options.rst b/python/docs/source/user_guide/pandas_on_spark/options.rst index c0d9b18c085ab..67b8f6841f536 100644 --- a/python/docs/source/user_guide/pandas_on_spark/options.rst +++ b/python/docs/source/user_guide/pandas_on_spark/options.rst @@ -186,9 +186,7 @@ This is conceptually equivalent to the PySpark example as below: **distributed-sequence** (default): It implements a sequence that increases one by one, by group-by and group-map approach in a distributed manner. It still generates the sequential index globally. If the default index must be the sequence in a large dataset, this -index has to be used. -Note that if more data are added to the data source after creating this index, -then it does not guarantee the sequential index. See the example below: +index has to be used. See the example below: .. code-block:: python From e4bb341d37661e93097e56e0087699bca60825fb Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Thu, 12 May 2022 09:12:13 +0900 Subject: [PATCH 244/535] Revert "[SPARK-34827][PYTHON][DOC] Remove outdated statements on distributed-sequence default index" This reverts commit f75c00da3cf01e63d93cedbe480198413af41455. --- python/docs/source/user_guide/pandas_on_spark/options.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/docs/source/user_guide/pandas_on_spark/options.rst b/python/docs/source/user_guide/pandas_on_spark/options.rst index 67b8f6841f536..c0d9b18c085ab 100644 --- a/python/docs/source/user_guide/pandas_on_spark/options.rst +++ b/python/docs/source/user_guide/pandas_on_spark/options.rst @@ -186,7 +186,9 @@ This is conceptually equivalent to the PySpark example as below: **distributed-sequence** (default): It implements a sequence that increases one by one, by group-by and group-map approach in a distributed manner. It still generates the sequential index globally. If the default index must be the sequence in a large dataset, this -index has to be used. See the example below: +index has to be used. +Note that if more data are added to the data source after creating this index, +then it does not guarantee the sequential index. See the example below: .. code-block:: python From 65dd72743e40a6276b69938f04fc69655bd8270e Mon Sep 17 00:00:00 2001 From: Xinrong Meng Date: Thu, 12 May 2022 09:13:27 +0900 Subject: [PATCH 245/535] [SPARK-39154][PYTHON][DOCS] Remove outdated statements on distributed-sequence default index ### What changes were proposed in this pull request? Remove outdated statements on distributed-sequence default index. ### Why are the changes needed? Since distributed-sequence default index is updated to be enforced only while execution, there are stale statements in documents to be removed. ### Does this PR introduce _any_ user-facing change? No. Doc change only. ### How was this patch tested? Manual tests. Closes #36513 from xinrong-databricks/defaultIndexDoc. Authored-by: Xinrong Meng Signed-off-by: Hyukjin Kwon (cherry picked from commit f37150a5549a8f3cb4c1877bcfd2d1459fc73cac) Signed-off-by: Hyukjin Kwon --- python/docs/source/user_guide/pandas_on_spark/options.rst | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/docs/source/user_guide/pandas_on_spark/options.rst b/python/docs/source/user_guide/pandas_on_spark/options.rst index c0d9b18c085ab..67b8f6841f536 100644 --- a/python/docs/source/user_guide/pandas_on_spark/options.rst +++ b/python/docs/source/user_guide/pandas_on_spark/options.rst @@ -186,9 +186,7 @@ This is conceptually equivalent to the PySpark example as below: **distributed-sequence** (default): It implements a sequence that increases one by one, by group-by and group-map approach in a distributed manner. It still generates the sequential index globally. If the default index must be the sequence in a large dataset, this -index has to be used. -Note that if more data are added to the data source after creating this index, -then it does not guarantee the sequential index. See the example below: +index has to be used. See the example below: .. code-block:: python From ebe4252e415e6afdf888e21d0b89ab744fd2dac7 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 12 May 2022 11:18:18 +0800 Subject: [PATCH 246/535] [SPARK-39149][SQL] SHOW DATABASES command should not quote database names under legacy mode ### What changes were proposed in this pull request? This is a bug of the command legacy mode as it does not fully restore to the legacy behavior. The legacy v1 SHOW DATABASES command does not quote the database names. This PR fixes it. ### Why are the changes needed? bug fix ### Does this PR introduce _any_ user-facing change? no change by default, unless people turn on legacy mode, in which case SHOW DATABASES common won't quote the database names. ### How was this patch tested? new tests Closes #36508 from cloud-fan/regression. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan (cherry picked from commit 3094e495095635f6c9e83f4646d3321c2a9311f4) Signed-off-by: Wenchen Fan --- .../datasources/v2/ShowNamespacesExec.scala | 11 ++++++++++- .../command/ShowNamespacesSuiteBase.scala | 17 +++++++++++++++++ .../command/v1/ShowNamespacesSuite.scala | 13 +++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala index 9dafbd79a527e..c55c7b9f98544 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowNamespacesExec.scala @@ -42,8 +42,17 @@ case class ShowNamespacesExec( catalog.listNamespaces() } + // Please refer to the rule `KeepLegacyOutputs` for details about legacy command. + // The legacy SHOW DATABASES command does not quote the database names. + val isLegacy = output.head.name == "databaseName" + val namespaceNames = if (isLegacy && namespaces.forall(_.length == 1)) { + namespaces.map(_.head) + } else { + namespaces.map(_.quoted) + } + val rows = new ArrayBuffer[InternalRow]() - namespaces.map(_.quoted).map { ns => + namespaceNames.map { ns => if (pattern.map(StringUtils.filterPattern(Seq(ns), _).nonEmpty).getOrElse(true)) { rows += toCatalystRow(ns) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala index b3693845c3b28..80e545f6e3c2f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowNamespacesSuiteBase.scala @@ -42,6 +42,9 @@ trait ShowNamespacesSuiteBase extends QueryTest with DDLCommandTestUtils { protected def builtinTopNamespaces: Seq[String] = Seq.empty protected def isCasePreserving: Boolean = true + protected def createNamespaceWithSpecialName(ns: String): Unit = { + sql(s"CREATE NAMESPACE $catalog.`$ns`") + } test("default namespace") { withSQLConf(SQLConf.DEFAULT_CATALOG.key -> catalog) { @@ -124,6 +127,20 @@ trait ShowNamespacesSuiteBase extends QueryTest with DDLCommandTestUtils { } } + test("SPARK-39149: keep the legacy no-quote behavior") { + Seq(true, false).foreach { legacy => + withSQLConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA.key -> legacy.toString) { + withNamespace(s"$catalog.`123`") { + createNamespaceWithSpecialName("123") + val res = if (legacy) "123" else "`123`" + checkAnswer( + sql(s"SHOW NAMESPACES IN $catalog"), + (res +: builtinTopNamespaces).map(Row(_))) + } + } + } + } + test("case sensitivity of the pattern string") { Seq(true, false).foreach { caseSensitive => withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala index a1b32e42ae2e5..b65a9acb65612 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala @@ -18,7 +18,9 @@ package org.apache.spark.sql.execution.command.v1 import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.catalog.CatalogDatabase import org.apache.spark.sql.execution.command +import org.apache.spark.util.Utils /** * This base suite contains unified tests for the `SHOW NAMESPACES` and `SHOW DATABASES` commands @@ -31,6 +33,17 @@ import org.apache.spark.sql.execution.command trait ShowNamespacesSuiteBase extends command.ShowNamespacesSuiteBase { override protected def builtinTopNamespaces: Seq[String] = Seq("default") + override protected def createNamespaceWithSpecialName(ns: String): Unit = { + // Call `ExternalCatalog` directly to bypass the database name validation in `SessionCatalog`. + spark.sharedState.externalCatalog.createDatabase( + CatalogDatabase( + name = ns, + description = "", + locationUri = Utils.createTempDir().toURI, + properties = Map.empty), + ignoreIfExists = false) + } + test("IN namespace doesn't exist") { val errMsg = intercept[AnalysisException] { sql("SHOW NAMESPACES in dummy") From 70becf290700e88c7be248e4277421dd17f3af4b Mon Sep 17 00:00:00 2001 From: Xinrong Meng Date: Thu, 12 May 2022 12:22:11 +0900 Subject: [PATCH 247/535] [SPARK-39155][PYTHON] Access to JVM through passed-in GatewayClient during type conversion ### What changes were proposed in this pull request? Access to JVM through passed-in GatewayClient during type conversion. ### Why are the changes needed? In customized type converters, we may utilize the passed-in GatewayClient to access JVM, rather than rely on the `SparkContext._jvm`. That's [how](https://github.com/py4j/py4j/blob/master/py4j-python/src/py4j/java_collections.py#L508) Py4J explicit converters access JVM. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #36504 from xinrong-databricks/gateway_client_jvm. Authored-by: Xinrong Meng Signed-off-by: Hyukjin Kwon (cherry picked from commit 92fcf214c107358c1a70566b644cec2d35c096c0) Signed-off-by: Hyukjin Kwon --- python/pyspark/sql/types.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index 2a41508d634d0..123fd62898043 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -44,7 +44,7 @@ ) from py4j.protocol import register_input_converter -from py4j.java_gateway import JavaClass, JavaGateway, JavaObject +from py4j.java_gateway import GatewayClient, JavaClass, JavaObject from pyspark.serializers import CloudPickleSerializer @@ -1929,7 +1929,7 @@ class DateConverter: def can_convert(self, obj: Any) -> bool: return isinstance(obj, datetime.date) - def convert(self, obj: datetime.date, gateway_client: JavaGateway) -> JavaObject: + def convert(self, obj: datetime.date, gateway_client: GatewayClient) -> JavaObject: Date = JavaClass("java.sql.Date", gateway_client) return Date.valueOf(obj.strftime("%Y-%m-%d")) @@ -1938,7 +1938,7 @@ class DatetimeConverter: def can_convert(self, obj: Any) -> bool: return isinstance(obj, datetime.datetime) - def convert(self, obj: datetime.datetime, gateway_client: JavaGateway) -> JavaObject: + def convert(self, obj: datetime.datetime, gateway_client: GatewayClient) -> JavaObject: Timestamp = JavaClass("java.sql.Timestamp", gateway_client) seconds = ( calendar.timegm(obj.utctimetuple()) if obj.tzinfo else time.mktime(obj.timetuple()) @@ -1958,27 +1958,25 @@ def can_convert(self, obj: Any) -> bool: and is_timestamp_ntz_preferred() ) - def convert(self, obj: datetime.datetime, gateway_client: JavaGateway) -> JavaObject: - from pyspark import SparkContext - + def convert(self, obj: datetime.datetime, gateway_client: GatewayClient) -> JavaObject: seconds = calendar.timegm(obj.utctimetuple()) - jvm = SparkContext._jvm - assert jvm is not None - return jvm.org.apache.spark.sql.catalyst.util.DateTimeUtils.microsToLocalDateTime( - int(seconds) * 1000000 + obj.microsecond + DateTimeUtils = JavaClass( + "org.apache.spark.sql.catalyst.util.DateTimeUtils", + gateway_client, ) + return DateTimeUtils.microsToLocalDateTime(int(seconds) * 1000000 + obj.microsecond) class DayTimeIntervalTypeConverter: def can_convert(self, obj: Any) -> bool: return isinstance(obj, datetime.timedelta) - def convert(self, obj: datetime.timedelta, gateway_client: JavaGateway) -> JavaObject: - from pyspark import SparkContext - - jvm = SparkContext._jvm - assert jvm is not None - return jvm.org.apache.spark.sql.catalyst.util.IntervalUtils.microsToDuration( + def convert(self, obj: datetime.timedelta, gateway_client: GatewayClient) -> JavaObject: + IntervalUtils = JavaClass( + "org.apache.spark.sql.catalyst.util.IntervalUtils", + gateway_client, + ) + return IntervalUtils.microsToDuration( (math.floor(obj.total_seconds()) * 1000000) + obj.microseconds ) From 0baa5c7d2b71f379fad6a8a0b72f427acf70f4e4 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 11 May 2022 21:58:14 -0700 Subject: [PATCH 248/535] [SPARK-36718][SQL][FOLLOWUP] Fix the `isExtractOnly` check in CollapseProject This PR fixes a perf regression in Spark 3.3 caused by https://github.com/apache/spark/pull/33958 In `CollapseProject`, we want to treat `CreateStruct` and its friends as cheap expressions if they are only referenced by `ExtractValue`, but the check is too conservative, which causes a perf regression. This PR fixes this check. Now "extract-only" means: the attribute only appears as a child of `ExtractValue`, but the consumer expression can be in any shape. Fixes perf regression No new tests Closes #36510 from cloud-fan/bug. Lead-authored-by: Wenchen Fan Co-authored-by: Wenchen Fan Signed-off-by: Dongjoon Hyun (cherry picked from commit 547f032d04bd2cf06c54b5a4a2f984f5166beb7d) Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/optimizer/Optimizer.scala | 14 ++++++++------ .../optimizer/CollapseProjectSuite.scala | 18 +++++++++++++++--- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 753d81e4003f8..759a7044f159f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -997,12 +997,14 @@ object CollapseProject extends Rule[LogicalPlan] with AliasHelper { } } - @scala.annotation.tailrec - private def isExtractOnly(expr: Expression, ref: Attribute): Boolean = expr match { - case a: Alias => isExtractOnly(a.child, ref) - case e: ExtractValue => isExtractOnly(e.children.head, ref) - case a: Attribute => a.semanticEquals(ref) - case _ => false + private def isExtractOnly(expr: Expression, ref: Attribute): Boolean = { + def hasRefInNonExtractValue(e: Expression): Boolean = e match { + case a: Attribute => a.semanticEquals(ref) + // The first child of `ExtractValue` is the complex type to be extracted. + case e: ExtractValue if e.children.head.semanticEquals(ref) => false + case _ => e.children.exists(hasRefInNonExtractValue) + } + !hasRefInNonExtractValue(expr) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala index c1d13d14b05f7..f6c3209726b7d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala @@ -30,7 +30,8 @@ class CollapseProjectSuite extends PlanTest { object Optimize extends RuleExecutor[LogicalPlan] { val batches = Batch("Subqueries", FixedPoint(10), EliminateSubqueryAliases) :: - Batch("CollapseProject", Once, CollapseProject) :: Nil + Batch("CollapseProject", Once, CollapseProject) :: + Batch("SimplifyExtractValueOps", Once, SimplifyExtractValueOps) :: Nil } val testRelation = LocalRelation('a.int, 'b.int) @@ -123,12 +124,23 @@ class CollapseProjectSuite extends PlanTest { test("SPARK-36718: do not collapse project if non-cheap expressions will be repeated") { val query = testRelation - .select(('a + 1).as('a_plus_1)) - .select(('a_plus_1 + 'a_plus_1).as('a_2_plus_2)) + .select(($"a" + 1).as("a_plus_1")) + .select(($"a_plus_1" + $"a_plus_1").as("a_2_plus_2")) .analyze val optimized = Optimize.execute(query) comparePlans(optimized, query) + + // CreateStruct is an exception if it's only referenced by ExtractValue. + val query2 = testRelation + .select(namedStruct("a", $"a", "a_plus_1", $"a" + 1).as("struct")) + .select(($"struct".getField("a") + $"struct".getField("a_plus_1")).as("add")) + .analyze + val optimized2 = Optimize.execute(query2) + val expected2 = testRelation + .select(($"a" + ($"a" + 1)).as("add")) + .analyze + comparePlans(optimized2, expected2) } test("preserve top-level alias metadata while collapsing projects") { From 3cc47a178a68e957cde70fc1c3f10dbcca9bf84b Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 12 May 2022 10:55:21 -0700 Subject: [PATCH 249/535] Revert "[SPARK-36837][BUILD] Upgrade Kafka to 3.1.0" ### What changes were proposed in this pull request? This PR aims to revert commit 973ea0f06e72ab64574cbf00e095922a3415f864 from `branch-3.3` to exclude it from Apache Spark 3.3 scope. ### Why are the changes needed? SPARK-36837 tried to use Apache Kafka 3.1.0 at Apache Spark 3.3.0 and initially wanted to upgrade to Apache Kafka 3.3.1 before the official release. However, we can use the stable Apache Kafka 2.8.1 at Spark 3.3.0 and wait for more proven versions, Apache Kafka 3.2.x or 3.3.x. Apache Kafka 3.2.0 vote is already passed and will arrive. - https://lists.apache.org/thread/9k5sysvchg98lchv2rvvvq6xhpgk99cc Apache Kafka 3.3.0 release discussion is started too. - https://lists.apache.org/thread/cmol5bcf011s1xl91rt4ylb1dgz2vb1r ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. Closes #36517 from dongjoon-hyun/SPARK-36837-REVERT. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../spark/sql/kafka010/KafkaTestUtils.scala | 7 ++++--- .../streaming/kafka010/KafkaRDDSuite.scala | 20 ++++++------------- .../streaming/kafka010/KafkaTestUtils.scala | 3 +-- pom.xml | 2 +- 4 files changed, 12 insertions(+), 20 deletions(-) diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala index c5d2a99d156f8..058563dfa167d 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala @@ -44,7 +44,6 @@ import org.apache.kafka.common.network.ListenerName import org.apache.kafka.common.security.auth.SecurityProtocol.{PLAINTEXT, SASL_PLAINTEXT} import org.apache.kafka.common.serialization.StringSerializer import org.apache.kafka.common.utils.SystemTime -import org.apache.zookeeper.client.ZKClientConfig import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer} import org.apache.zookeeper.server.auth.SASLAuthenticationProvider import org.scalatest.Assertions._ @@ -267,7 +266,7 @@ class KafkaTestUtils( // Get the actual zookeeper binding port zkPort = zookeeper.actualPort zkClient = KafkaZkClient(s"$zkHost:$zkPort", isSecure = false, zkSessionTimeout, - zkConnectionTimeout, 1, new SystemTime(), "test", new ZKClientConfig) + zkConnectionTimeout, 1, new SystemTime()) zkReady = true } @@ -489,7 +488,9 @@ class KafkaTestUtils( protected def brokerConfiguration: Properties = { val props = new Properties() props.put("broker.id", "0") - props.put("listeners", s"PLAINTEXT://127.0.0.1:$brokerPort") + props.put("host.name", "127.0.0.1") + props.put("advertised.host.name", "127.0.0.1") + props.put("port", brokerPort.toString) props.put("log.dir", Utils.createTempDir().getAbsolutePath) props.put("zookeeper.connect", zkAddress) props.put("zookeeper.connection.timeout.ms", "60000") diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala index 9c57663b3d8ef..b9ef16fb58cb9 100644 --- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala +++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala @@ -21,17 +21,15 @@ import java.{ util => ju } import java.io.File import scala.collection.JavaConverters._ -import scala.concurrent.duration._ import scala.util.Random -import kafka.log.{CleanerConfig, LogCleaner, LogConfig, UnifiedLog} +import kafka.log.{CleanerConfig, Log, LogCleaner, LogConfig, ProducerStateManager} import kafka.server.{BrokerTopicStats, LogDirFailureChannel} import kafka.utils.Pool import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.record.{CompressionType, MemoryRecords, SimpleRecord} import org.apache.kafka.common.serialization.StringDeserializer import org.scalatest.BeforeAndAfterAll -import org.scalatest.concurrent.Eventually.{eventually, interval, timeout} import org.apache.spark._ import org.apache.spark.scheduler.ExecutorCacheTaskLocation @@ -86,7 +84,7 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll { private def compactLogs(topic: String, partition: Int, messages: Array[(String, String)]): Unit = { val mockTime = new MockTime() - val logs = new Pool[TopicPartition, UnifiedLog]() + val logs = new Pool[TopicPartition, Log]() val logDir = kafkaTestUtils.brokerLogDir val dir = new File(logDir, topic + "-" + partition) dir.mkdirs() @@ -95,7 +93,7 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll { logProps.put(LogConfig.MinCleanableDirtyRatioProp, java.lang.Float.valueOf(0.1f)) val logDirFailureChannel = new LogDirFailureChannel(1) val topicPartition = new TopicPartition(topic, partition) - val log = UnifiedLog( + val log = new Log( dir, LogConfig(logProps), 0L, @@ -105,10 +103,9 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll { mockTime, Int.MaxValue, Int.MaxValue, - logDirFailureChannel, - lastShutdownClean = false, - topicId = None, - keepPartitionMetadataFile = false + topicPartition, + new ProducerStateManager(topicPartition, dir), + logDirFailureChannel ) messages.foreach { case (k, v) => val record = new SimpleRecord(k.getBytes, v.getBytes) @@ -204,11 +201,6 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll { sc, kafkaParams, offsetRanges, preferredHosts ).map(m => m.key -> m.value) - // To make it sure that the compaction happens - eventually(timeout(20.second), interval(1.seconds)) { - val dir = new File(kafkaTestUtils.brokerLogDir, topic + "-0") - assert(dir.listFiles().exists(_.getName.endsWith(".deleted"))) - } val received = rdd.collect.toSet assert(received === compactedMessages.toSet) diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala index dd8d66f1fc08f..0783e591def51 100644 --- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala +++ b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala @@ -35,7 +35,6 @@ import org.apache.kafka.common.TopicPartition import org.apache.kafka.common.network.ListenerName import org.apache.kafka.common.serialization.StringSerializer import org.apache.kafka.common.utils.{Time => KTime} -import org.apache.zookeeper.client.ZKClientConfig import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer} import org.apache.spark.{SparkConf, SparkException} @@ -107,7 +106,7 @@ private[kafka010] class KafkaTestUtils extends Logging { // Get the actual zookeeper binding port zkPort = zookeeper.actualPort zkClient = KafkaZkClient(s"$zkHost:$zkPort", isSecure = false, zkSessionTimeout, - zkConnectionTimeout, 1, KTime.SYSTEM, "test", new ZKClientConfig) + zkConnectionTimeout, 1, KTime.SYSTEM) admClient = new AdminZkClient(zkClient) zkReady = true } diff --git a/pom.xml b/pom.xml index c91167d8de630..34c8354a3d4a1 100644 --- a/pom.xml +++ b/pom.xml @@ -128,7 +128,7 @@ 2.3 - 3.1.0 + 2.8.1 10.14.2.0 1.12.2 From 63f20c526bed8346fe3399aff6c0b2f7a78b441e Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 13 May 2022 13:50:27 +0800 Subject: [PATCH 250/535] [SPARK-39166][SQL] Provide runtime error query context for binary arithmetic when WSCG is off ### What changes were proposed in this pull request? Currently, for most of the cases, the project https://issues.apache.org/jira/browse/SPARK-38615 is able to show where the runtime errors happen within the original query. However, after trying on production, I found that the following queries won't show where the divide by 0 error happens ``` create table aggTest(i int, j int, k int, d date) using parquet insert into aggTest values(1, 2, 0, date'2022-01-01') select sum(j)/sum(k),percentile(i, 0.9) from aggTest group by d ``` With `percentile` function in the query, the plan can't execute with whole stage codegen. Thus the child plan of `Project` is serialized to executors for execution, from ProjectExec: ``` protected override def doExecute(): RDD[InternalRow] = { child.execute().mapPartitionsWithIndexInternal { (index, iter) => val project = UnsafeProjection.create(projectList, child.output) project.initialize(index) iter.map(project) } } ``` Note that the `TreeNode.origin` is not serialized to executors since `TreeNode` doesn't extend the trait `Serializable`, which results in an empty query context on errors. For more details, please read https://issues.apache.org/jira/browse/SPARK-39140 A dummy fix is to make `TreeNode` extend the trait `Serializable`. However, it can be performance regression if the query text is long (every `TreeNode` carries it for serialization). A better fix is to introduce a new trait `SupportQueryContext` and materialize the truncated query context for special expressions. This PR targets on binary arithmetic expressions only. I will create follow-ups for the remaining expressions which support runtime error query context. ### Why are the changes needed? Improve the error context framework and make sure it works when whole stage codegen is not available. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit tests Closes #36525 from gengliangwang/serializeContext. Lead-authored-by: Gengliang Wang Co-authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit e336567c8a9704b500efecd276abaf5bd3988679) Signed-off-by: Gengliang Wang --- .../sql/catalyst/expressions/Expression.scala | 22 +++++++++++ .../sql/catalyst/expressions/arithmetic.scala | 38 +++++++++++-------- .../ArithmeticExpressionSuite.scala | 25 ++++++------ .../org/apache/spark/sql/SQLQuerySuite.scala | 20 ++++++++++ 4 files changed, 77 insertions(+), 28 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index e1d8c2e43e264..d620c5d739283 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -588,6 +588,28 @@ abstract class UnaryExpression extends Expression with UnaryLike[Expression] { } } +/** + * An expression with SQL query context. The context string can be serialized from the Driver + * to executors. It will also be kept after rule transforms. + */ +trait SupportQueryContext extends Expression with Serializable { + protected var queryContext: String = initQueryContext() + + def initQueryContext(): String + + // Note: Even though query contexts are serialized to executors, it will be regenerated from an + // empty "Origin" during rule transforms since "Origin"s are not serialized to executors + // for better performance. Thus, we need to copy the original query context during + // transforms. The query context string is considered as a "tag" on the expression here. + override def copyTagsFrom(other: Expression): Unit = { + other match { + case s: SupportQueryContext => + queryContext = s.queryContext + case _ => + } + super.copyTagsFrom(other) + } +} object UnaryExpression { def unapply(e: UnaryExpression): Option[Expression] = Some(e.child) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index c6d66d8e60734..153187f9e309c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -21,8 +21,7 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult, TypeCoercion} import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ -import org.apache.spark.sql.catalyst.trees.TreePattern.{BINARY_ARITHMETIC, TreePattern, - UNARY_POSITIVE} +import org.apache.spark.sql.catalyst.trees.TreePattern.{BINARY_ARITHMETIC, TreePattern, UNARY_POSITIVE} import org.apache.spark.sql.catalyst.util.{IntervalUtils, MathUtils, TypeUtils} import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.internal.SQLConf @@ -209,7 +208,8 @@ case class Abs(child: Expression, failOnError: Boolean = SQLConf.get.ansiEnabled override protected def withNewChildInternal(newChild: Expression): Abs = copy(child = newChild) } -abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant { +abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant + with SupportQueryContext { protected val failOnError: Boolean @@ -219,6 +219,14 @@ abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant { override lazy val resolved: Boolean = childrenResolved && checkInputDataTypes().isSuccess + override def initQueryContext(): String = { + if (failOnError) { + origin.context + } else { + "" + } + } + /** Name of the function for this expression on a [[Decimal]] type. */ def decimalMethod: String = throw QueryExecutionErrors.notOverrideExpectedMethodsError("BinaryArithmetics", @@ -270,7 +278,7 @@ abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant { }) case IntegerType | LongType if failOnError && exactMathMethod.isDefined => nullSafeCodeGen(ctx, ev, (eval1, eval2) => { - val errorContext = ctx.addReferenceObj("errCtx", origin.context) + val errorContext = ctx.addReferenceObj("errCtx", queryContext) val mathUtils = MathUtils.getClass.getCanonicalName.stripSuffix("$") s""" |${ev.value} = $mathUtils.${exactMathMethod.get}($eval1, $eval2, $errorContext); @@ -331,9 +339,9 @@ case class Add( case _: YearMonthIntervalType => MathUtils.addExact(input1.asInstanceOf[Int], input2.asInstanceOf[Int]) case _: IntegerType if failOnError => - MathUtils.addExact(input1.asInstanceOf[Int], input2.asInstanceOf[Int], origin.context) + MathUtils.addExact(input1.asInstanceOf[Int], input2.asInstanceOf[Int], queryContext) case _: LongType if failOnError => - MathUtils.addExact(input1.asInstanceOf[Long], input2.asInstanceOf[Long], origin.context) + MathUtils.addExact(input1.asInstanceOf[Long], input2.asInstanceOf[Long], queryContext) case _ => numeric.plus(input1, input2) } @@ -381,9 +389,9 @@ case class Subtract( case _: YearMonthIntervalType => MathUtils.subtractExact(input1.asInstanceOf[Int], input2.asInstanceOf[Int]) case _: IntegerType if failOnError => - MathUtils.subtractExact(input1.asInstanceOf[Int], input2.asInstanceOf[Int], origin.context) + MathUtils.subtractExact(input1.asInstanceOf[Int], input2.asInstanceOf[Int], queryContext) case _: LongType if failOnError => - MathUtils.subtractExact(input1.asInstanceOf[Long], input2.asInstanceOf[Long], origin.context) + MathUtils.subtractExact(input1.asInstanceOf[Long], input2.asInstanceOf[Long], queryContext) case _ => numeric.minus(input1, input2) } @@ -418,9 +426,9 @@ case class Multiply( protected override def nullSafeEval(input1: Any, input2: Any): Any = dataType match { case _: IntegerType if failOnError => - MathUtils.multiplyExact(input1.asInstanceOf[Int], input2.asInstanceOf[Int], origin.context) + MathUtils.multiplyExact(input1.asInstanceOf[Int], input2.asInstanceOf[Int], queryContext) case _: LongType if failOnError => - MathUtils.multiplyExact(input1.asInstanceOf[Long], input2.asInstanceOf[Long], origin.context) + MathUtils.multiplyExact(input1.asInstanceOf[Long], input2.asInstanceOf[Long], queryContext) case _ => numeric.times(input1, input2) } @@ -457,10 +465,10 @@ trait DivModLike extends BinaryArithmetic { } else { if (isZero(input2)) { // when we reach here, failOnError must be true. - throw QueryExecutionErrors.divideByZeroError(origin.context) + throw QueryExecutionErrors.divideByZeroError(queryContext) } if (checkDivideOverflow && input1 == Long.MinValue && input2 == -1) { - throw QueryExecutionErrors.overflowInIntegralDivideError(origin.context) + throw QueryExecutionErrors.overflowInIntegralDivideError(queryContext) } evalOperation(input1, input2) } @@ -487,7 +495,7 @@ trait DivModLike extends BinaryArithmetic { } else { s"($javaType)(${eval1.value} $symbol ${eval2.value})" } - lazy val errorContext = ctx.addReferenceObj("errCtx", origin.context) + lazy val errorContext = ctx.addReferenceObj("errCtx", queryContext) val checkIntegralDivideOverflow = if (checkDivideOverflow) { s""" |if (${eval1.value} == ${Long.MinValue}L && ${eval2.value} == -1) @@ -743,7 +751,7 @@ case class Pmod( } else { if (isZero(input2)) { // when we reach here, failOnError must bet true. - throw QueryExecutionErrors.divideByZeroError(origin.context) + throw QueryExecutionErrors.divideByZeroError(queryContext) } input1 match { case i: Integer => pmod(i, input2.asInstanceOf[java.lang.Integer]) @@ -768,7 +776,7 @@ case class Pmod( } val remainder = ctx.freshName("remainder") val javaType = CodeGenerator.javaType(dataType) - lazy val errorContext = ctx.addReferenceObj("errCtx", origin.context) + lazy val errorContext = ctx.addReferenceObj("errCtx", queryContext) val result = dataType match { case DecimalType.Fixed(_, _) => val decimalAdd = "$plus" diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala index 87777991cb971..e76ff0b439007 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala @@ -359,19 +359,18 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper } test("Remainder/Pmod: exception should contain SQL text context") { - Seq( - Remainder(Literal(1L, LongType), Literal(0L, LongType), failOnError = true), - Pmod(Literal(1L, LongType), Literal(0L, LongType), failOnError = true)).foreach { expr => - val query = s"1L ${expr.symbol} 0L" - val o = Origin( - line = Some(1), - startPosition = Some(7), - startIndex = Some(7), - stopIndex = Some(7 + query.length -1), - sqlText = Some(s"select $query")) - withOrigin(o) { - checkExceptionInExpression[ArithmeticException](expr, EmptyRow, query) - } + Seq(("%", Remainder), ("pmod", Pmod)).foreach { case (symbol, exprBuilder) => + val query = s"1L $symbol 0L" + val o = Origin( + line = Some(1), + startPosition = Some(7), + startIndex = Some(7), + stopIndex = Some(7 + query.length -1), + sqlText = Some(s"select $query")) + withOrigin(o) { + val expression = exprBuilder(Literal(1L, LongType), Literal(0L, LongType), true) + checkExceptionInExpression[ArithmeticException](expression, EmptyRow, query) + } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index cba677da19adf..f099d3c015cbf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4359,6 +4359,26 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } + test("SPARK-39166: Query context should be serialized to executors when WSCG is off") { + withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", + SQLConf.ANSI_ENABLED.key -> "true") { + withTable("t") { + sql("create table t(i int, j int) using parquet") + sql("insert into t values(2147483647, 10)") + Seq( + "select i + j from t", + "select -i - j from t", + "select i * j from t", + "select i / (j - 10) from t").foreach { query => + val msg = intercept[SparkException] { + sql(query).collect() + }.getMessage + assert(msg.contains(query)) + } + } + } + } + test("SPARK-38589: try_avg should return null if overflow happens before merging") { val yearMonthDf = Seq(Int.MaxValue, Int.MaxValue, 2) .map(Period.ofMonths) From 27c03e5741af25b7afacac727865e23f60ce61fa Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 13 May 2022 17:46:33 +0800 Subject: [PATCH 251/535] [SPARK-39175][SQL] Provide runtime error query context for Cast when WSCG is off ### What changes were proposed in this pull request? Similar to https://github.com/apache/spark/pull/36525, this PR provides runtime error query context for the Cast expression when WSCG is off. ### Why are the changes needed? Enhance the runtime error query context of Cast expression. After changes, it works when the whole stage codegen is not available. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? UT Closes #36535 from gengliangwang/fixCastContext. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit cdd33e83c3919c4475e2e1ef387acb604bea81b9) Signed-off-by: Gengliang Wang --- .../spark/sql/catalyst/expressions/Cast.scala | 67 +++++++++++-------- .../org/apache/spark/sql/SQLQuerySuite.scala | 27 +++++++- 2 files changed, 64 insertions(+), 30 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 335a34514c2ba..17d571a70f2f2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -277,7 +277,10 @@ object Cast { } } -abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression with NullIntolerant { +abstract class CastBase extends UnaryExpression + with TimeZoneAwareExpression + with NullIntolerant + with SupportQueryContext { def child: Expression @@ -307,6 +310,12 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit protected def ansiEnabled: Boolean + override def initQueryContext(): String = if (ansiEnabled) { + origin.context + } else { + "" + } + // When this cast involves TimeZone, it's only resolved if the timeZoneId is set; // Otherwise behave like Expression.resolved. override lazy val resolved: Boolean = @@ -467,7 +476,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit false } else { if (ansiEnabled) { - throw QueryExecutionErrors.invalidInputSyntaxForBooleanError(s, origin.context) + throw QueryExecutionErrors.invalidInputSyntaxForBooleanError(s, queryContext) } else { null } @@ -499,7 +508,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case StringType => buildCast[UTF8String](_, utfs => { if (ansiEnabled) { - DateTimeUtils.stringToTimestampAnsi(utfs, zoneId, origin.context) + DateTimeUtils.stringToTimestampAnsi(utfs, zoneId, queryContext) } else { DateTimeUtils.stringToTimestamp(utfs, zoneId).orNull } @@ -524,14 +533,14 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit // TimestampWritable.doubleToTimestamp case DoubleType => if (ansiEnabled) { - buildCast[Double](_, d => doubleToTimestampAnsi(d, origin.context)) + buildCast[Double](_, d => doubleToTimestampAnsi(d, queryContext)) } else { buildCast[Double](_, d => doubleToTimestamp(d)) } // TimestampWritable.floatToTimestamp case FloatType => if (ansiEnabled) { - buildCast[Float](_, f => doubleToTimestampAnsi(f.toDouble, origin.context)) + buildCast[Float](_, f => doubleToTimestampAnsi(f.toDouble, queryContext)) } else { buildCast[Float](_, f => doubleToTimestamp(f.toDouble)) } @@ -541,7 +550,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case StringType => buildCast[UTF8String](_, utfs => { if (ansiEnabled) { - DateTimeUtils.stringToTimestampWithoutTimeZoneAnsi(utfs, origin.context) + DateTimeUtils.stringToTimestampWithoutTimeZoneAnsi(utfs, queryContext) } else { DateTimeUtils.stringToTimestampWithoutTimeZone(utfs).orNull } @@ -574,7 +583,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit private[this] def castToDate(from: DataType): Any => Any = from match { case StringType => if (ansiEnabled) { - buildCast[UTF8String](_, s => DateTimeUtils.stringToDateAnsi(s, origin.context)) + buildCast[UTF8String](_, s => DateTimeUtils.stringToDateAnsi(s, queryContext)) } else { buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s).orNull) } @@ -631,7 +640,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit // LongConverter private[this] def castToLong(from: DataType): Any => Any = from match { case StringType if ansiEnabled => - buildCast[UTF8String](_, v => UTF8StringUtils.toLongExact(v, origin.context)) + buildCast[UTF8String](_, v => UTF8StringUtils.toLongExact(v, queryContext)) case StringType => val result = new LongWrapper() buildCast[UTF8String](_, s => if (s.toLong(result)) result.value else null) @@ -654,7 +663,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit // IntConverter private[this] def castToInt(from: DataType): Any => Any = from match { case StringType if ansiEnabled => - buildCast[UTF8String](_, v => UTF8StringUtils.toIntExact(v, origin.context)) + buildCast[UTF8String](_, v => UTF8StringUtils.toIntExact(v, queryContext)) case StringType => val result = new IntWrapper() buildCast[UTF8String](_, s => if (s.toInt(result)) result.value else null) @@ -686,7 +695,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit // ShortConverter private[this] def castToShort(from: DataType): Any => Any = from match { case StringType if ansiEnabled => - buildCast[UTF8String](_, v => UTF8StringUtils.toShortExact(v, origin.context)) + buildCast[UTF8String](_, v => UTF8StringUtils.toShortExact(v, queryContext)) case StringType => val result = new IntWrapper() buildCast[UTF8String](_, s => if (s.toShort(result)) { @@ -733,7 +742,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit // ByteConverter private[this] def castToByte(from: DataType): Any => Any = from match { case StringType if ansiEnabled => - buildCast[UTF8String](_, v => UTF8StringUtils.toByteExact(v, origin.context)) + buildCast[UTF8String](_, v => UTF8StringUtils.toByteExact(v, queryContext)) case StringType => val result = new IntWrapper() buildCast[UTF8String](_, s => if (s.toByte(result)) { @@ -793,7 +802,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit null } else { throw QueryExecutionErrors.cannotChangeDecimalPrecisionError( - value, decimalType.precision, decimalType.scale, origin.context) + value, decimalType.precision, decimalType.scale, queryContext) } } } @@ -816,7 +825,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit }) case StringType if ansiEnabled => buildCast[UTF8String](_, - s => changePrecision(Decimal.fromStringANSI(s, target, origin.context), target)) + s => changePrecision(Decimal.fromStringANSI(s, target, queryContext), target)) case BooleanType => buildCast[Boolean](_, b => toPrecision(if (b) Decimal.ONE else Decimal.ZERO, target)) case DateType => @@ -846,7 +855,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val d = Cast.processFloatingPointSpecialLiterals(doubleStr, false) if(ansiEnabled && d == null) { throw QueryExecutionErrors.invalidInputSyntaxForNumericError( - DoubleType, s, origin.context) + DoubleType, s, queryContext) } else { d } @@ -872,7 +881,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val f = Cast.processFloatingPointSpecialLiterals(floatStr, true) if (ansiEnabled && f == null) { throw QueryExecutionErrors.invalidInputSyntaxForNumericError( - FloatType, s, origin.context) + FloatType, s, queryContext) } else { f } @@ -988,7 +997,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit } def errorContextCode(codegenContext: CodegenContext): String = { - codegenContext.addReferenceObj("errCtx", origin.context) + codegenContext.addReferenceObj("errCtx", queryContext) } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { @@ -1298,7 +1307,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]]) (c, evPrim, evNull) => if (ansiEnabled) { - val errorContext = ctx.addReferenceObj("errCtx", origin.context) + val errorContext = ctx.addReferenceObj("errCtx", queryContext) code""" $evPrim = $dateTimeUtilsCls.stringToDateAnsi($c, $errorContext); """ @@ -1377,7 +1386,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit } """ case StringType if ansiEnabled => - val errorContext = ctx.addReferenceObj("errCtx", origin.context) + val errorContext = ctx.addReferenceObj("errCtx", queryContext) val toType = ctx.addReferenceObj("toType", target) (c, evPrim, evNull) => code""" @@ -1438,7 +1447,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]]) (c, evPrim, evNull) => if (ansiEnabled) { - val errorContext = ctx.addReferenceObj("errCtx", origin.context) + val errorContext = ctx.addReferenceObj("errCtx", queryContext) code""" $evPrim = $dateTimeUtilsCls.stringToTimestampAnsi($c, $zid, $errorContext); """ @@ -1477,7 +1486,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case DoubleType => (c, evPrim, evNull) => if (ansiEnabled) { - val errorContext = ctx.addReferenceObj("errCtx", origin.context) + val errorContext = ctx.addReferenceObj("errCtx", queryContext) code"$evPrim = $dateTimeUtilsCls.doubleToTimestampAnsi($c, $errorContext);" } else { code""" @@ -1491,7 +1500,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit case FloatType => (c, evPrim, evNull) => if (ansiEnabled) { - val errorContext = ctx.addReferenceObj("errCtx", origin.context) + val errorContext = ctx.addReferenceObj("errCtx", queryContext) code"$evPrim = $dateTimeUtilsCls.doubleToTimestampAnsi((double)$c, $errorContext);" } else { code""" @@ -1511,7 +1520,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]]) (c, evPrim, evNull) => if (ansiEnabled) { - val errorContext = ctx.addReferenceObj("errCtx", origin.context) + val errorContext = ctx.addReferenceObj("errCtx", queryContext) code""" $evPrim = $dateTimeUtilsCls.stringToTimestampWithoutTimeZoneAnsi($c, $errorContext); """ @@ -1628,7 +1637,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val stringUtils = inline"${StringUtils.getClass.getName.stripSuffix("$")}" (c, evPrim, evNull) => val castFailureCode = if (ansiEnabled) { - val errorContext = ctx.addReferenceObj("errCtx", origin.context) + val errorContext = ctx.addReferenceObj("errCtx", queryContext) s"throw QueryExecutionErrors.invalidInputSyntaxForBooleanError($c, $errorContext);" } else { s"$evNull = true;" @@ -1763,7 +1772,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit private[this] def castToByteCode(from: DataType, ctx: CodegenContext): CastFunction = from match { case StringType if ansiEnabled => val stringUtils = UTF8StringUtils.getClass.getCanonicalName.stripSuffix("$") - val errorContext = ctx.addReferenceObj("errCtx", origin.context) + val errorContext = ctx.addReferenceObj("errCtx", queryContext) (c, evPrim, evNull) => code"$evPrim = $stringUtils.toByteExact($c, $errorContext);" case StringType => val wrapper = ctx.freshVariable("intWrapper", classOf[UTF8String.IntWrapper]) @@ -1800,7 +1809,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit ctx: CodegenContext): CastFunction = from match { case StringType if ansiEnabled => val stringUtils = UTF8StringUtils.getClass.getCanonicalName.stripSuffix("$") - val errorContext = ctx.addReferenceObj("errCtx", origin.context) + val errorContext = ctx.addReferenceObj("errCtx", queryContext) (c, evPrim, evNull) => code"$evPrim = $stringUtils.toShortExact($c, $errorContext);" case StringType => val wrapper = ctx.freshVariable("intWrapper", classOf[UTF8String.IntWrapper]) @@ -1835,7 +1844,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit private[this] def castToIntCode(from: DataType, ctx: CodegenContext): CastFunction = from match { case StringType if ansiEnabled => val stringUtils = UTF8StringUtils.getClass.getCanonicalName.stripSuffix("$") - val errorContext = ctx.addReferenceObj("errCtx", origin.context) + val errorContext = ctx.addReferenceObj("errCtx", queryContext) (c, evPrim, evNull) => code"$evPrim = $stringUtils.toIntExact($c, $errorContext);" case StringType => val wrapper = ctx.freshVariable("intWrapper", classOf[UTF8String.IntWrapper]) @@ -1870,7 +1879,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit private[this] def castToLongCode(from: DataType, ctx: CodegenContext): CastFunction = from match { case StringType if ansiEnabled => val stringUtils = UTF8StringUtils.getClass.getCanonicalName.stripSuffix("$") - val errorContext = ctx.addReferenceObj("errCtx", origin.context) + val errorContext = ctx.addReferenceObj("errCtx", queryContext) (c, evPrim, evNull) => code"$evPrim = $stringUtils.toLongExact($c, $errorContext);" case StringType => val wrapper = ctx.freshVariable("longWrapper", classOf[UTF8String.LongWrapper]) @@ -1907,7 +1916,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val floatStr = ctx.freshVariable("floatStr", StringType) (c, evPrim, evNull) => val handleNull = if (ansiEnabled) { - val errorContext = ctx.addReferenceObj("errCtx", origin.context) + val errorContext = ctx.addReferenceObj("errCtx", queryContext) s"throw QueryExecutionErrors.invalidInputSyntaxForNumericError(" + s"org.apache.spark.sql.types.FloatType$$.MODULE$$,$c, $errorContext);" } else { @@ -1945,7 +1954,7 @@ abstract class CastBase extends UnaryExpression with TimeZoneAwareExpression wit val doubleStr = ctx.freshVariable("doubleStr", StringType) (c, evPrim, evNull) => val handleNull = if (ansiEnabled) { - val errorContext = ctx.addReferenceObj("errCtx", origin.context) + val errorContext = ctx.addReferenceObj("errCtx", queryContext) s"throw QueryExecutionErrors.invalidInputSyntaxForNumericError(" + s"org.apache.spark.sql.types.DoubleType$$.MODULE$$, $c, $errorContext);" } else { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index f099d3c015cbf..68db57ea3649f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4359,7 +4359,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } - test("SPARK-39166: Query context should be serialized to executors when WSCG is off") { + test("SPARK-39166: Query context of binary arithmetic should be serialized to executors" + + " when WSCG is off") { withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", SQLConf.ANSI_ENABLED.key -> "true") { withTable("t") { @@ -4379,6 +4380,30 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } + test("SPARK-39175: Query context of Cast should be serialized to executors" + + " when WSCG is off") { + withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", + SQLConf.ANSI_ENABLED.key -> "true") { + withTable("t") { + sql("create table t(s string) using parquet") + sql("insert into t values('a')") + Seq( + "select cast(s as int) from t", + "select cast(s as long) from t", + "select cast(s as double) from t", + "select cast(s as decimal(10, 2)) from t", + "select cast(s as date) from t", + "select cast(s as timestamp) from t", + "select cast(s as boolean) from t").foreach { query => + val msg = intercept[SparkException] { + sql(query).collect() + }.getMessage + assert(msg.contains(query)) + } + } + } + } + test("SPARK-38589: try_avg should return null if overflow happens before merging") { val yearMonthDf = Seq(Int.MaxValue, Int.MaxValue, 2) .map(Period.ofMonths) From c2bd7bac76a5cf7ffc5ef61a1df2b8bb5a72f131 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 13 May 2022 12:47:53 +0300 Subject: [PATCH 252/535] [SPARK-39165][SQL][3.3] Replace `sys.error` by `IllegalStateException` ### What changes were proposed in this pull request? Replace all invokes of `sys.error()` by throwing of `IllegalStateException` in the `sql` namespace. This is a backport of https://github.com/apache/spark/pull/36524. ### Why are the changes needed? In the context of wrapping all internal errors like asserts/illegal state exceptions (see https://github.com/apache/spark/pull/36500), it is impossible to distinguish `RuntimeException` of `sys.error()` from Spark's exceptions like `SparkRuntimeException`. The last one can be propagated to the user space but `sys.error` exceptions shouldn't be visible to users in regular cases. ### Does this PR introduce _any_ user-facing change? No, shouldn't. sys.error shouldn't propagate exception to user space in regular cases. ### How was this patch tested? By running the existing test suites. Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 95c7efd7571464d8adfb76fb22e47a5816cf73fb) Signed-off-by: Max Gekk Closes #36532 from MaxGekk/sys_error-internal-3.3. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../org/apache/spark/sql/execution/SparkStrategies.scala | 4 ++-- .../spark/sql/execution/datasources/DataSource.scala | 8 ++++---- .../datasources/parquet/ParquetWriteSupport.scala | 3 +-- .../sql/execution/exchange/ShuffleExchangeExec.scala | 4 ++-- .../spark/sql/execution/python/ExtractPythonUDFs.scala | 5 +++-- .../org/apache/spark/sql/execution/streaming/memory.scala | 3 ++- .../streaming/sources/TextSocketMicroBatchStream.scala | 3 ++- .../scala/org/apache/spark/sql/execution/subquery.scala | 3 ++- .../spark/sql/execution/window/AggregateProcessor.scala | 2 +- .../spark/sql/execution/window/WindowExecBase.scala | 8 ++++---- .../scala/org/apache/spark/sql/hive/HiveInspectors.scala | 3 ++- .../org/apache/spark/sql/hive/client/HiveClientImpl.scala | 2 +- 12 files changed, 26 insertions(+), 22 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index 3b8a70ffe94c3..17f3cfbda89b3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -503,8 +503,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { _.aggregateFunction.children.filterNot(_.foldable).toSet).distinct.length > 1) { // This is a sanity check. We should not reach here when we have multiple distinct // column sets. Our `RewriteDistinctAggregates` should take care this case. - sys.error("You hit a query analyzer bug. Please report your query to " + - "Spark user mailing list.") + throw new IllegalStateException( + "You hit a query analyzer bug. Please report your query to Spark user mailing list.") } // Ideally this should be done in `NormalizeFloatingNumbers`, but we do it here because diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index 2bb3d48c1458c..143fb4cf9603a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -539,8 +539,8 @@ case class DataSource( DataWritingCommand.propogateMetrics(sparkSession.sparkContext, resolved, metrics) // Replace the schema with that of the DataFrame we just wrote out to avoid re-inferring copy(userSpecifiedSchema = Some(outputColumns.toStructType.asNullable)).resolveRelation() - case _ => - sys.error(s"${providingClass.getCanonicalName} does not allow create table as select.") + case _ => throw new IllegalStateException( + s"${providingClass.getCanonicalName} does not allow create table as select.") } } @@ -556,8 +556,8 @@ case class DataSource( disallowWritingIntervals(data.schema.map(_.dataType), forbidAnsiIntervals = false) DataSource.validateSchema(data.schema) planForWritingFileFormat(format, mode, data) - case _ => - sys.error(s"${providingClass.getCanonicalName} does not allow create table as select.") + case _ => throw new IllegalStateException( + s"${providingClass.getCanonicalName} does not allow create table as select.") } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala index e71863657dd25..a4122fe0bdfb4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetWriteSupport.scala @@ -254,8 +254,7 @@ class ParquetWriteSupport extends WriteSupport[InternalRow] with Logging { case t: UserDefinedType[_] => makeWriter(t.sqlType) - // TODO Adds IntervalType support - case _ => sys.error(s"Unsupported data type $dataType.") + case _ => throw new IllegalStateException(s"Unsupported data type $dataType.") } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala index c033aedc7786d..f3eb5636bb997 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala @@ -300,7 +300,7 @@ object ShuffleExchangeExec { override def numPartitions: Int = 1 override def getPartition(key: Any): Int = 0 } - case _ => sys.error(s"Exchange not implemented for $newPartitioning") + case _ => throw new IllegalStateException(s"Exchange not implemented for $newPartitioning") // TODO: Handle BroadcastPartitioning. } def getPartitionKeyExtractor(): InternalRow => Any = newPartitioning match { @@ -319,7 +319,7 @@ object ShuffleExchangeExec { val projection = UnsafeProjection.create(sortingExpressions.map(_.child), outputAttributes) row => projection(row) case SinglePartition => identity - case _ => sys.error(s"Exchange not implemented for $newPartitioning") + case _ => throw new IllegalStateException(s"Exchange not implemented for $newPartitioning") } val isRoundRobin = newPartitioning.isInstanceOf[RoundRobinPartitioning] && diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala index a809ea07d0ec6..a6a5423b1f7a6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala @@ -284,8 +284,9 @@ object ExtractPythonUDFs extends Rule[LogicalPlan] with PredicateHelper { } // Other cases are disallowed as they are ambiguous or would require a cartesian // product. - udfs.map(canonicalizeDeterministic).filterNot(attributeMap.contains).foreach { - udf => sys.error(s"Invalid PythonUDF $udf, requires attributes from more than one child.") + udfs.map(canonicalizeDeterministic).filterNot(attributeMap.contains).foreach { udf => + throw new IllegalStateException( + s"Invalid PythonUDF $udf, requires attributes from more than one child.") } val rewritten = plan.withNewChildren(newChildren).transformExpressions { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala index dd09a38c8b340..1d3773502533d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala @@ -257,7 +257,8 @@ case class MemoryStream[A : Encoder]( val offsetDiff = (newOffset.offset - lastOffsetCommitted.offset).toInt if (offsetDiff < 0) { - sys.error(s"Offsets committed out of order: $lastOffsetCommitted followed by $end") + throw new IllegalStateException( + s"Offsets committed out of order: $lastOffsetCommitted followed by $end") } batches.trimStart(offsetDiff) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala index 04431f3d381a7..580f7066e448e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketMicroBatchStream.scala @@ -155,7 +155,8 @@ class TextSocketMicroBatchStream(host: String, port: Int, numPartitions: Int) val offsetDiff = (newOffset.offset - lastOffsetCommitted.offset).toInt if (offsetDiff < 0) { - sys.error(s"Offsets committed out of order: $lastOffsetCommitted followed by $end") + throw new IllegalStateException( + s"Offsets committed out of order: $lastOffsetCommitted followed by $end") } batches.trimStart(offsetDiff) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala index afd0aba00680e..4bbfc3467d4de 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala @@ -79,7 +79,8 @@ case class ScalarSubquery( def updateResult(): Unit = { val rows = plan.executeCollect() if (rows.length > 1) { - sys.error(s"more than one row returned by a subquery used as an expression:\n$plan") + throw new IllegalStateException( + s"more than one row returned by a subquery used as an expression:\n$plan") } if (rows.length == 1) { assert(rows(0).numFields == 1, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala index 1ebbd5f40646e..e40373917c525 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/AggregateProcessor.scala @@ -90,7 +90,7 @@ private[window] object AggregateProcessor { updateExpressions ++= noOps evaluateExpressions += imperative case other => - sys.error(s"Unsupported aggregate function: $other") + throw new IllegalStateException(s"Unsupported aggregate function: $other") } // Create the projections. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala index 5f1758d12fd5d..31b7df1abd012 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala @@ -97,7 +97,7 @@ trait WindowExecBase extends UnaryExecNode { RowBoundOrdering(offset) case (RowFrame, _) => - sys.error(s"Unhandled bound in windows expressions: $bound") + throw new IllegalStateException(s"Unhandled bound in windows expressions: $bound") case (RangeFrame, CurrentRow) => val ordering = RowOrdering.create(orderSpec, child.output) @@ -139,7 +139,7 @@ trait WindowExecBase extends UnaryExecNode { RangeBoundOrdering(ordering, current, bound) case (RangeFrame, _) => - sys.error("Non-Zero range offsets are not supported for windows " + + throw new IllegalStateException("Non-Zero range offsets are not supported for windows " + "with multiple order expressions.") } } @@ -189,7 +189,7 @@ trait WindowExecBase extends UnaryExecNode { } case f: AggregateWindowFunction => collect("AGGREGATE", frame, e, f) case f: PythonUDF => collect("AGGREGATE", frame, e, f) - case f => sys.error(s"Unsupported window function: $f") + case f => throw new IllegalStateException(s"Unsupported window function: $f") } case _ => } @@ -296,7 +296,7 @@ trait WindowExecBase extends UnaryExecNode { } case _ => - sys.error(s"Unsupported factory: $key") + throw new IllegalStateException(s"Unsupported factory: $key") } // Keep track of the number of expressions. This is a side-effect in a map... diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index f49018b0c850f..455735a187909 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -922,7 +922,8 @@ private[hive] trait HiveInspectors { case Literal(_, dt: UserDefinedType[_]) => toInspector(dt.sqlType) // We will enumerate all of the possible constant expressions, throw exception if we missed - case Literal(_, dt) => sys.error(s"Hive doesn't support the constant type [$dt].") + case Literal(_, dt) => + throw new IllegalStateException(s"Hive doesn't support the constant type [$dt].") // ideally, we don't test the foldable here(but in optimizer), however, some of the // Hive UDF / UDAF requires its argument to be constant objectinspector, we do it eagerly. case _ if expr.foldable => toInspector(Literal.create(expr.eval(), expr.dataType)) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 3dddca844750d..d70ac781c0395 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -801,7 +801,7 @@ private[hive] class HiveClientImpl( val maxResults = 100000 val results = runHive(sql, maxResults) // It is very confusing when you only get back some of the results... - if (results.size == maxResults) sys.error("RESULTS POSSIBLY TRUNCATED") + if (results.size == maxResults) throw new IllegalStateException("RESULTS POSSIBLY TRUNCATED") results } From 1372f312052dd0361e371e2ed63436f3e299c617 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Fri, 13 May 2022 16:43:53 +0300 Subject: [PATCH 253/535] [SPARK-39164][SQL][3.3] Wrap asserts/illegal state exceptions by the INTERNAL_ERROR exception in actions ### What changes were proposed in this pull request? In the PR, I propose to catch `java.lang.IllegalStateException` and `java.lang.AssertionError` (raised by asserts), and wrap them by Spark's exception w/ the `INTERNAL_ERROR` error class. The modification affects only actions so far. This PR affects the case of missing bucket file. After the changes, Spark throws `SparkException` w/ `INTERNAL_ERROR` instead of `IllegalStateException`. Since this is not Spark's illegal state, the exception should be replaced by another runtime exception. Created the ticket SPARK-39163 to fix this. This is a backport of https://github.com/apache/spark/pull/36500. ### Why are the changes needed? To improve user experience with Spark SQL and unify representation of internal errors by using error classes like for other errors. Usually, users shouldn't observe asserts and illegal states, but even if such situation happens, they should see errors in the same way as other errors (w/ error class `INTERNAL_ERROR`). ### Does this PR introduce _any_ user-facing change? Yes. At least, in one particular case, see the modified test suites and SPARK-39163. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "test:testOnly *.BucketedReadWithoutHiveSupportSuite" $ build/sbt "test:testOnly *.AdaptiveQueryExecSuite" $ build/sbt "test:testOnly *.WholeStageCodegenSuite" ``` Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit f5c3f0c228fef7808d1f927e134595ddd4d31723) Signed-off-by: Max Gekk Closes #36533 from MaxGekk/class-internal-error-3.3. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../scala/org/apache/spark/sql/Dataset.scala | 21 ++++++++++++++----- .../sql/execution/DataSourceScanExec.scala | 1 + .../apache/spark/sql/execution/subquery.scala | 1 + .../org/apache/spark/sql/SubquerySuite.scala | 10 +++++---- .../execution/WholeStageCodegenSuite.scala | 14 +++++++------ .../adaptive/AdaptiveQueryExecSuite.scala | 9 +++++--- .../spark/sql/sources/BucketedReadSuite.scala | 8 ++++--- 7 files changed, 43 insertions(+), 21 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 7d16a2f5eee14..56f0e8978ecda 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -27,7 +27,7 @@ import scala.util.control.NonFatal import org.apache.commons.lang3.StringUtils -import org.apache.spark.TaskContext +import org.apache.spark.{SparkException, SparkThrowable, TaskContext} import org.apache.spark.annotation.{DeveloperApi, Stable, Unstable} import org.apache.spark.api.java.JavaRDD import org.apache.spark.api.java.function._ @@ -3848,12 +3848,23 @@ class Dataset[T] private[sql]( /** * Wrap a Dataset action to track the QueryExecution and time cost, then report to the - * user-registered callback functions. + * user-registered callback functions, and also to convert asserts/illegal states to + * the internal error exception. */ private def withAction[U](name: String, qe: QueryExecution)(action: SparkPlan => U) = { - SQLExecution.withNewExecutionId(qe, Some(name)) { - qe.executedPlan.resetMetrics() - action(qe.executedPlan) + try { + SQLExecution.withNewExecutionId(qe, Some(name)) { + qe.executedPlan.resetMetrics() + action(qe.executedPlan) + } + } catch { + case e: SparkThrowable => throw e + case e @ (_: java.lang.IllegalStateException | _: java.lang.AssertionError) => + throw new SparkException( + errorClass = "INTERNAL_ERROR", + messageParameters = Array(s"""The "$name" action failed."""), + cause = e) + case e: Throwable => throw e } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala index ac0f3af57254d..1ec93a614b779 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala @@ -592,6 +592,7 @@ case class FileSourceScanExec( }.groupBy { f => BucketingUtils .getBucketId(new Path(f.filePath).getName) + // TODO(SPARK-39163): Throw an exception w/ error class for an invalid bucket file .getOrElse(throw new IllegalStateException(s"Invalid bucket file ${f.filePath}")) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala index 4bbfc3467d4de..209b0f79243e5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala @@ -79,6 +79,7 @@ case class ScalarSubquery( def updateResult(): Unit = { val rows = plan.executeCollect() if (rows.length > 1) { + // TODO(SPARK-39167): Throw an exception w/ an error class for multiple rows from a subquery throw new IllegalStateException( s"more than one row returned by a subquery used as an expression:\n$plan") } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index 221663c61e18d..396fca47634ac 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql import scala.collection.mutable.ArrayBuffer +import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.expressions.SubqueryExpression import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan, Sort} import org.apache.spark.sql.execution.{ColumnarToRowExec, ExecSubqueryExpression, FileSourceScanExec, InputAdapter, ReusedSubqueryExec, ScalarSubquery, SubqueryExec, WholeStageCodegenExec} @@ -146,12 +147,13 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } test("runtime error when the number of rows is greater than 1") { - val error2 = intercept[RuntimeException] { + val e = intercept[SparkException] { sql("select (select a from (select 1 as a union all select 2 as a) t) as b").collect() } - assert(error2.getMessage.contains( - "more than one row returned by a subquery used as an expression") - ) + // TODO(SPARK-39167): Throw an exception w/ an error class for multiple rows from a subquery + assert(e.getErrorClass === "INTERNAL_ERROR") + assert(e.getCause.getMessage.contains( + "more than one row returned by a subquery used as an expression")) } test("uncorrelated scalar subquery on a DataFrame generated query") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala index ba511354f7a40..27689bb4d45b4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.execution +import org.apache.spark.SparkException import org.apache.spark.sql.{Dataset, QueryTest, Row, SaveMode} import org.apache.spark.sql.catalyst.expressions.codegen.{ByteCodeStats, CodeAndComment, CodeGenerator} import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite @@ -762,10 +763,11 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession "SELECT AVG(v) FROM VALUES(1) t(v)", // Tet case with keys "SELECT k, AVG(v) FROM VALUES((1, 1)) t(k, v) GROUP BY k").foreach { query => - val errMsg = intercept[IllegalStateException] { + val e = intercept[SparkException] { sql(query).collect - }.getMessage - assert(errMsg.contains(expectedErrMsg)) + } + assert(e.getErrorClass === "INTERNAL_ERROR") + assert(e.getCause.getMessage.contains(expectedErrMsg)) } } } @@ -784,11 +786,11 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession // Tet case with keys "SELECT k, AVG(a + b), SUM(a + b + c) FROM VALUES((1, 1, 1, 1)) t(k, a, b, c) " + "GROUP BY k").foreach { query => - val e = intercept[Exception] { + val e = intercept[SparkException] { sql(query).collect } - assert(e.isInstanceOf[IllegalStateException]) - assert(e.getMessage.contains(expectedErrMsg)) + assert(e.getErrorClass === "INTERNAL_ERROR") + assert(e.getCause.getMessage.contains(expectedErrMsg)) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index 76741dc4d08e0..90aff26b7fe31 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -23,6 +23,7 @@ import java.net.URI import org.apache.logging.log4j.Level import org.scalatest.PrivateMethodTester +import org.apache.spark.SparkException import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListenerJobStart} import org.apache.spark.sql.{Dataset, QueryTest, Row, SparkSession, Strategy} import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight} @@ -856,11 +857,13 @@ class AdaptiveQueryExecSuite df1.write.parquet(tableDir.getAbsolutePath) val aggregated = spark.table("bucketed_table").groupBy("i").count() - val error = intercept[Exception] { + val error = intercept[SparkException] { aggregated.count() } - assert(error.toString contains "Invalid bucket file") - assert(error.getSuppressed.size === 0) + // TODO(SPARK-39163): Throw an exception w/ error class for an invalid bucket file + assert(error.getErrorClass === "INTERNAL_ERROR") + assert(error.getCause.toString contains "Invalid bucket file") + assert(error.getCause.getSuppressed.size === 0) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala index 18039db2ca744..c3250f8d9fc0c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala @@ -22,6 +22,7 @@ import java.net.URI import scala.util.Random +import org.apache.spark.SparkException import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.catalog.BucketSpec import org.apache.spark.sql.catalyst.expressions @@ -841,11 +842,12 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti df1.write.parquet(tableDir.getAbsolutePath) val aggregated = spark.table("bucketed_table").groupBy("i").count() - val error = intercept[Exception] { + val e = intercept[SparkException] { aggregated.count() } - - assert(error.toString contains "Invalid bucket file") + // TODO(SPARK-39163): Throw an exception w/ error class for an invalid bucket file + assert(e.getErrorClass === "INTERNAL_ERROR") + assert(e.getCause.toString contains "Invalid bucket file") } } From 1a49de67e3fa0d25e84540313688cde82d6001df Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 13 May 2022 21:45:06 +0800 Subject: [PATCH 254/535] [SPARK-39177][SQL] Provide query context on map key not exists error when WSCG is off ### What changes were proposed in this pull request? Similar to https://github.com/apache/spark/pull/36525, this PR provides query context for "map key not exists" runtime error when WSCG is off. ### Why are the changes needed? Enhance the runtime error query context for "map key not exists" runtime error. After changes, it works when the whole stage codegen is not available. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? UT Closes #36538 from gengliangwang/fixMapKeyContext. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit 1afddf407436c3b315ec601fab5a4a1b2028e672) Signed-off-by: Gengliang Wang --- .../expressions/collectionOperations.scala | 6 ++++++ .../expressions/complexTypeExtractors.scala | 13 ++++++++++--- .../org/apache/spark/sql/SQLQuerySuite.scala | 19 +++++++++++++++++++ 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 1b42ea5eb8748..1bd934214f5c7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -2261,6 +2261,12 @@ case class ElementAt( override protected def withNewChildrenInternal( newLeft: Expression, newRight: Expression): ElementAt = copy(left = newLeft, right = newRight) + + override def initQueryContext(): String = if (failOnError) { + origin.context + } else { + "" + } } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index 45661c00c5193..b84050c1837df 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -339,7 +339,8 @@ trait GetArrayItemUtil { /** * Common trait for [[GetMapValue]] and [[ElementAt]]. */ -trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes { +trait GetMapValueUtil + extends BinaryExpression with ImplicitCastInputTypes with SupportQueryContext { // todo: current search is O(n), improve it. def getValueEval( @@ -365,7 +366,7 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes { if (!found) { if (failOnError) { - throw QueryExecutionErrors.mapKeyNotExistError(ordinal, keyType, origin.context) + throw QueryExecutionErrors.mapKeyNotExistError(ordinal, keyType, queryContext) } else { null } @@ -398,7 +399,7 @@ trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes { } val keyJavaType = CodeGenerator.javaType(keyType) - lazy val errorContext = ctx.addReferenceObj("errCtx", origin.context) + lazy val errorContext = ctx.addReferenceObj("errCtx", queryContext) val keyDt = ctx.addReferenceObj("keyType", keyType, keyType.getClass.getName) nullSafeCodeGen(ctx, ev, (eval1, eval2) => { val keyNotFoundBranch = if (failOnError) { @@ -488,4 +489,10 @@ case class GetMapValue( override protected def withNewChildrenInternal( newLeft: Expression, newRight: Expression): GetMapValue = copy(child = newLeft, key = newRight) + + override def initQueryContext(): String = if (failOnError) { + origin.context + } else { + "" + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 68db57ea3649f..21ce009a9076a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4404,6 +4404,25 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } + test("SPARK-39177: Query context of getting map value should be serialized to executors" + + " when WSCG is off") { + withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", + SQLConf.ANSI_ENABLED.key -> "true") { + withTable("t") { + sql("create table t(m map) using parquet") + sql("insert into t values map('a', 'b')") + Seq( + "select m['foo'] from t", + "select element_at(m, 'foo') from t").foreach { query => + val msg = intercept[SparkException] { + sql(query).collect() + }.getMessage + assert(msg.contains(query)) + } + } + } + } + test("SPARK-38589: try_avg should return null if overflow happens before merging") { val yearMonthDf = Seq(Int.MaxValue, Int.MaxValue, 2) .map(Period.ofMonths) From e743e68ce62e18ced6c49a22f5d101c72b7bfbe2 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Fri, 13 May 2022 16:47:11 +0300 Subject: [PATCH 255/535] [SPARK-39178][CORE] SparkFatalException should show root cause when print error stack ### What changes were proposed in this pull request? Our user meet an case when running broadcast, throw `SparkFatalException`, but in error stack, it don't show the error case. ### Why are the changes needed? Make exception more clear ### Does this PR introduce _any_ user-facing change? User can got root cause when application throw `SparkFatalException`. ### How was this patch tested? For ut ``` test("xxxx") { throw new SparkFatalException( new OutOfMemoryError("Not enough memory to build and broadcast the table to all " + "worker nodes. As a workaround, you can either disable broadcast by setting " + s"driver memory by setting ${SparkLauncher.DRIVER_MEMORY} to a higher value.") .initCause(null)) } ``` Before this pr: ``` [info] org.apache.spark.util.SparkFatalException: [info] at org.apache.spark.SparkContextSuite.$anonfun$new$1(SparkContextSuite.scala:59) [info] at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) [info] at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) [info] at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) [info] at org.scalatest.Transformer.apply(Transformer.scala:22) [info] at org.scalatest.Transformer.apply(Transformer.scala:20) [info] at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:190) [info] at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:203) [info] at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:188) [info] at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:200) [info] at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:200) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:182) [info] at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:64) [info] at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234) [info] at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227) [info] at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:64) [info] at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:233) [info] at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413) [info] at scala.collection.immutable.List.foreach(List.scala:431) [info] at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) [info] at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396) [info] at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:233) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:232) [info] at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1563) [info] at org.scalatest.Suite.run(Suite.scala:1112) ``` After this pr: ``` [info] org.apache.spark.util.SparkFatalException: java.lang.OutOfMemoryError: Not enough memory to build and broadcast the table to all worker nodes. As a workaround, you can either disable broadcast by setting driver memory by setting spark.driver.memory to a higher value. [info] at org.apache.spark.SparkContextSuite.$anonfun$new$1(SparkContextSuite.scala:59) [info] at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) [info] at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) [info] at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) [info] at org.scalatest.Transformer.apply(Transformer.scala:22) [info] at org.scalatest.Transformer.apply(Transformer.scala:20) [info] at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:190) [info] at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:203) [info] at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:188) [info] at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:200) [info] at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:200) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:182) [info] at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:64) [info] at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234) [info] at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227) [info] at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:64) [info] at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:233) [info] at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413) [info] at scala.collection.immutable.List.foreach(List.scala:431) [info] at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) [info] at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396) [info] at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:233) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:232) [info] at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1563) [info] at org.scalatest.Suite.run(Suite.scala:1112) [info] at org.scalatest.Suite.run$(Suite.scala:1094) [info] at org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1563) [info] at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:237) [info] at org.scalatest.SuperEngine.runImpl(Engine.scala:535) [info] at org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:237) [info] at org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:236) [info] at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:64) [info] at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213) [info] at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210) [info] at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208) [info] at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:64) [info] at org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:318) [info] at org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:513) [info] at sbt.ForkMain$Run.lambda$runTest$1(ForkMain.java:413) [info] at java.util.concurrent.FutureTask.run(FutureTask.java:266) [info] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [info] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [info] at java.lang.Thread.run(Thread.java:748) [info] Cause: java.lang.OutOfMemoryError: Not enough memory to build and broadcast the table to all worker nodes. As a workaround, you can either disable broadcast by setting driver memory by setting spark.driver.memory to a higher value. [info] at org.apache.spark.SparkContextSuite.$anonfun$new$1(SparkContextSuite.scala:58) ``` Closes #36539 from AngersZhuuuu/SPARK-39178. Authored-by: Angerszhuuuu Signed-off-by: Max Gekk (cherry picked from commit d7317b03e975f8dc1a8c276dd0a931e00c478717) Signed-off-by: Max Gekk --- .../main/scala/org/apache/spark/util/SparkFatalException.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/util/SparkFatalException.scala b/core/src/main/scala/org/apache/spark/util/SparkFatalException.scala index 1aa2009fa9b5b..d15d300ae3ce4 100644 --- a/core/src/main/scala/org/apache/spark/util/SparkFatalException.scala +++ b/core/src/main/scala/org/apache/spark/util/SparkFatalException.scala @@ -24,4 +24,5 @@ package org.apache.spark.util * which is run by using ThreadUtils.awaitResult. ThreadUtils.awaitResult will catch * it and re-throw the original exception/error. */ -private[spark] final class SparkFatalException(val throwable: Throwable) extends Exception +private[spark] final class SparkFatalException(val throwable: Throwable) + extends Exception(throwable) From 30834b847e7577cf694558d43fb618fc0b1eb09e Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Fri, 13 May 2022 22:01:04 +0800 Subject: [PATCH 256/535] [SPARK-39157][SQL] H2Dialect should override getJDBCType so as make the data type is correct ### What changes were proposed in this pull request? Currently, `H2Dialect` not implement `getJDBCType` of `JdbcDialect`, so the DS V2 push-down will throw exception show below: ``` Job aborted due to stage failure: Task 0 in stage 13.0 failed 1 times, most recent failure: Lost task 0.0 in stage 13.0 (TID 13) (jiaan-gengdembp executor driver): org.h2.jdbc.JdbcSQLNonTransientException: Unknown data type: "STRING"; SQL statement: SELECT "DEPT","NAME","SALARY","BONUS","IS_MANAGER" FROM "test"."employee" WHERE ("BONUS" IS NOT NULL) AND ("DEPT" IS NOT NULL) AND (CAST("BONUS" AS string) LIKE '%30%') AND (CAST("DEPT" AS byte) > 1) AND (CAST("DEPT" AS short) > 1) AND (CAST("BONUS" AS decimal(20,2)) > 1200.00) [50004-210] ``` H2Dialect should implement `getJDBCType` of `JdbcDialect`. ### Why are the changes needed? make the H2 data type is correct. ### Does this PR introduce _any_ user-facing change? 'Yes'. Fix a bug for `H2Dialect`. ### How was this patch tested? New tests. Closes #36516 from beliefer/SPARK-39157. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit fa3f096e02d408fbeab5f69af451ef8bc8f5b3db) Signed-off-by: Wenchen Fan --- .../org/apache/spark/sql/jdbc/H2Dialect.scala | 13 ++++++++++++- .../org/apache/spark/sql/jdbc/JDBCV2Suite.scala | 17 +++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala index 0aa971c0d3ab1..56cadbe8e2c07 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.jdbc -import java.sql.SQLException +import java.sql.{SQLException, Types} import java.util.Locale import scala.util.control.NonFatal @@ -27,6 +27,8 @@ import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchT import org.apache.spark.sql.connector.expressions.Expression import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc} import org.apache.spark.sql.errors.QueryCompilationErrors +import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils +import org.apache.spark.sql.types.{BooleanType, ByteType, DataType, DecimalType, ShortType, StringType} private object H2Dialect extends JdbcDialect { override def canHandle(url: String): Boolean = @@ -90,6 +92,15 @@ private object H2Dialect extends JdbcDialect { ) } + override def getJDBCType(dt: DataType): Option[JdbcType] = dt match { + case StringType => Option(JdbcType("CLOB", Types.CLOB)) + case BooleanType => Some(JdbcType("BOOLEAN", Types.BOOLEAN)) + case ShortType | ByteType => Some(JdbcType("SMALLINT", Types.SMALLINT)) + case t: DecimalType => Some( + JdbcType(s"NUMERIC(${t.precision},${t.scale})", Types.NUMERIC)) + case _ => JdbcUtils.getCommonJDBCType(dt) + } + override def classifyException(message: String, e: Throwable): AnalysisException = { e match { case exception: SQLException => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index b6f36b912f870..91526cef50785 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -466,6 +466,23 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkFiltersRemoved(df7, false) checkPushedInfo(df7, "PushedFilters: [DEPT IS NOT NULL]") checkAnswer(df7, Seq(Row(6, "jen", 12000, 1200, true))) + + val df8 = sql( + """ + |SELECT * FROM h2.test.employee + |WHERE cast(bonus as string) like '%30%' + |AND cast(dept as byte) > 1 + |AND cast(dept as short) > 1 + |AND cast(bonus as decimal(20, 2)) > 1200""".stripMargin) + checkFiltersRemoved(df8, ansiMode) + val expectedPlanFragment8 = if (ansiMode) { + "PushedFilters: [BONUS IS NOT NULL, DEPT IS NOT NULL, " + + "CAST(BONUS AS string) LIKE '%30%', CAST(DEPT AS byte) > 1, ...," + } else { + "PushedFilters: [BONUS IS NOT NULL, DEPT IS NOT NULL]," + } + checkPushedInfo(df8, expectedPlanFragment8) + checkAnswer(df8, Seq(Row(2, "david", 10000, 1300, true))) } } } From 2db9d78d71662de276dd65e582674b6088eff119 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Sat, 14 May 2022 19:45:24 +0900 Subject: [PATCH 257/535] [SPARK-39174][SQL] Catalogs loading swallows missing classname for ClassNotFoundException ### What changes were proposed in this pull request? this PR captures the actual missing classname when catalog loading meets ClassNotFoundException ### Why are the changes needed? ClassNotFoundException can occur when missing dependencies, we shall not always report the catalog class is missing ### Does this PR introduce _any_ user-facing change? yes, when loading catalogs and ClassNotFoundException occurs, it shows the correct missing class. ### How was this patch tested? new test added Closes #36534 from yaooqinn/SPARK-39174. Authored-by: Kent Yao Signed-off-by: Hyukjin Kwon (cherry picked from commit 1b37f19876298e995596a30edc322c856ea1bbb4) Signed-off-by: Hyukjin Kwon --- .../sql/connector/catalog/Catalogs.scala | 5 ++-- .../sql/errors/QueryExecutionErrors.scala | 5 ++-- .../catalog/CatalogLoadingSuite.java | 28 +++++++++++++++++++ 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala index 9949f45d48335..71b1042ab3064 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala @@ -60,8 +60,9 @@ private[sql] object Catalogs { plugin.initialize(name, catalogOptions(name, conf)) plugin } catch { - case _: ClassNotFoundException => - throw QueryExecutionErrors.catalogPluginClassNotFoundForCatalogError(name, pluginClassName) + case e: ClassNotFoundException => + throw QueryExecutionErrors.catalogPluginClassNotFoundForCatalogError( + name, pluginClassName, e) case e: NoSuchMethodException => throw QueryExecutionErrors.catalogFailToFindPublicNoArgConstructorError( name, pluginClassName, e) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 7825e9a94dc36..df5959283eb62 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1586,8 +1586,9 @@ object QueryExecutionErrors extends QueryErrorsBase { def catalogPluginClassNotFoundForCatalogError( name: String, - pluginClassName: String): Throwable = { - new SparkException(s"Cannot find catalog plugin class for catalog '$name': $pluginClassName") + pluginClassName: String, + e: Exception): Throwable = { + new SparkException(s"Cannot find catalog plugin class for catalog '$name': $pluginClassName", e) } def catalogFailToFindPublicNoArgConstructorError( diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java index 37f60511cd60a..81870508b70a1 100644 --- a/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java +++ b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java @@ -20,6 +20,8 @@ import org.apache.spark.SparkException; import org.apache.spark.sql.internal.SQLConf; import org.apache.spark.sql.util.CaseInsensitiveStringMap; +import org.apache.spark.util.Utils; + import org.junit.Assert; import org.junit.Test; @@ -91,6 +93,19 @@ public void testLoadMissingClass() { exc.getMessage().contains("com.example.NoSuchCatalogPlugin")); } + @Test + public void testLoadMissingDependentClasses() { + SQLConf conf = new SQLConf(); + String catalogClass = ClassFoundCatalogPlugin.class.getCanonicalName(); + conf.setConfString("spark.sql.catalog.missing", catalogClass); + + SparkException exc = + Assert.assertThrows(SparkException.class, () -> Catalogs.load("missing", conf)); + + Assert.assertTrue(exc.getCause() instanceof ClassNotFoundException); + Assert.assertTrue(exc.getCause().getMessage().contains(catalogClass + "Dep")); + } + @Test public void testLoadNonCatalogPlugin() { SQLConf conf = new SQLConf(); @@ -209,3 +224,16 @@ class InvalidCatalogPlugin { // doesn't implement CatalogPlugin public void initialize(CaseInsensitiveStringMap options) { } } + +class ClassFoundCatalogPlugin implements CatalogPlugin { + + @Override + public void initialize(String name, CaseInsensitiveStringMap options) { + Utils.classForName(this.getClass().getCanonicalName() + "Dep", true, true); + } + + @Override + public String name() { + return null; + } +} From 2672624931dd4784fad6cdd912e3669c83741060 Mon Sep 17 00:00:00 2001 From: Xinrong Meng Date: Sun, 15 May 2022 09:25:02 +0900 Subject: [PATCH 258/535] [SPARK-38953][PYTHON][DOC] Document PySpark common exceptions / errors ### What changes were proposed in this pull request? Document PySpark(SQL, pandas API on Spark, and Py4J) common exceptions/errors and respective solutions. ### Why are the changes needed? Make PySpark debugging easier. There are common exceptions/errors in PySpark SQL, pandas API on Spark, and Py4J. Documenting exceptions and respective solutions help users debug PySpark. ### Does this PR introduce _any_ user-facing change? No. Document change only. ### How was this patch tested? Manual test. image Please refer to https://github.com/apache/spark/blob/7a1c7599a21cbbe2778707b72643cf98ac601ab1/python/docs/source/development/debugging.rst#common-exceptions--errors for the whole rendered page. Closes #36267 from xinrong-databricks/common_err. Authored-by: Xinrong Meng Signed-off-by: Hyukjin Kwon (cherry picked from commit f940d7adfd6d071bc3bdcc406e01263a7f03e955) Signed-off-by: Hyukjin Kwon --- python/docs/source/development/debugging.rst | 280 +++++++++++++++++++ 1 file changed, 280 insertions(+) diff --git a/python/docs/source/development/debugging.rst b/python/docs/source/development/debugging.rst index 1e6571da0289e..05c47ae4bf7fc 100644 --- a/python/docs/source/development/debugging.rst +++ b/python/docs/source/development/debugging.rst @@ -332,3 +332,283 @@ The UDF IDs can be seen in the query plan, for example, ``add1(...)#2L`` in ``Ar This feature is not supported with registered UDFs. + +Common Exceptions / Errors +-------------------------- + +PySpark SQL +~~~~~~~~~~~ + +**AnalysisException** + +``AnalysisException`` is raised when failing to analyze a SQL query plan. + +Example: + +.. code-block:: python + + >>> df = spark.range(1) + >>> df['bad_key'] + Traceback (most recent call last): + ... + pyspark.sql.utils.AnalysisException: Cannot resolve column name "bad_key" among (id) + +Solution: + +.. code-block:: python + + >>> df['id'] + Column<'id'> + +**ParseException** + +``ParseException`` is raised when failing to parse a SQL command. + +Example: + +.. code-block:: python + + >>> spark.sql("select * 1") + Traceback (most recent call last): + ... + pyspark.sql.utils.ParseException: + Syntax error at or near '1': extra input '1'(line 1, pos 9) + == SQL == + select * 1 + ---------^^^ + +Solution: + +.. code-block:: python + + >>> spark.sql("select *") + DataFrame[] + +**IllegalArgumentException** + +``IllegalArgumentException`` is raised when passing an illegal or inappropriate argument. + +Example: + +.. code-block:: python + + >>> spark.range(1).sample(-1.0) + Traceback (most recent call last): + ... + pyspark.sql.utils.IllegalArgumentException: requirement failed: Sampling fraction (-1.0) must be on interval [0, 1] without replacement + +Solution: + +.. code-block:: python + + >>> spark.range(1).sample(1.0) + DataFrame[id: bigint] + +**PythonException** + +``PythonException`` is thrown from Python workers. + +You can see the type of exception that was thrown from the Python worker and its stack trace, as ``TypeError`` below. + +Example: + +.. code-block:: python + + >>> from pyspark.sql.functions import udf + >>> def f(x): + ... return F.abs(x) + ... + >>> spark.range(-1, 1).withColumn("abs", udf(f)("id")).collect() + 22/04/12 14:52:31 ERROR Executor: Exception in task 7.0 in stage 37.0 (TID 232) + org.apache.spark.api.python.PythonException: Traceback (most recent call last): + ... + TypeError: Invalid argument, not a string or column: -1 of type . For column literals, use 'lit', 'array', 'struct' or 'create_map' function. + +Solution: + +.. code-block:: python + + >>> def f(x): + ... return abs(x) + ... + >>> spark.range(-1, 1).withColumn("abs", udf(f)("id")).collect() + [Row(id=-1, abs='1'), Row(id=0, abs='0')] + +**StreamingQueryException** + +``StreamingQueryException`` is raised when failing a StreamingQuery. Most often, it is thrown from Python workers, that wrap it as a ``PythonException``. + +Example: + +.. code-block:: python + + >>> sdf = spark.readStream.format("text").load("python/test_support/sql/streaming") + >>> from pyspark.sql.functions import col, udf + >>> bad_udf = udf(lambda x: 1 / 0) + >>> (sdf.select(bad_udf(col("value"))).writeStream.format("memory").queryName("q1").start()).processAllAvailable() + Traceback (most recent call last): + ... + org.apache.spark.api.python.PythonException: Traceback (most recent call last): + File "", line 1, in + ZeroDivisionError: division by zero + ... + pyspark.sql.utils.StreamingQueryException: Query q1 [id = ced5797c-74e2-4079-825b-f3316b327c7d, runId = 65bacaf3-9d51-476a-80ce-0ac388d4906a] terminated with exception: Writing job aborted + +Solution: + +Fix the StreamingQuery and re-execute the workflow. + +**SparkUpgradeException** + +``SparkUpgradeException`` is thrown because of Spark upgrade. + +Example: + +.. code-block:: python + + >>> from pyspark.sql.functions import to_date, unix_timestamp, from_unixtime + >>> df = spark.createDataFrame([("2014-31-12",)], ["date_str"]) + >>> df2 = df.select("date_str", to_date(from_unixtime(unix_timestamp("date_str", "yyyy-dd-aa")))) + >>> df2.collect() + Traceback (most recent call last): + ... + pyspark.sql.utils.SparkUpgradeException: You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'yyyy-dd-aa' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html + +Solution: + +.. code-block:: python + + >>> spark.conf.set("spark.sql.legacy.timeParserPolicy", "LEGACY") + >>> df2 = df.select("date_str", to_date(from_unixtime(unix_timestamp("date_str", "yyyy-dd-aa")))) + >>> df2.collect() + [Row(date_str='2014-31-12', to_date(from_unixtime(unix_timestamp(date_str, yyyy-dd-aa), yyyy-MM-dd HH:mm:ss))=None)] + +pandas API on Spark +~~~~~~~~~~~~~~~~~~~ + +There are specific common exceptions / errors in pandas API on Spark. + +**ValueError: Cannot combine the series or dataframe because it comes from a different dataframe** + +Operations involving more than one series or dataframes raises a ``ValueError`` if ``compute.ops_on_diff_frames`` is disabled (disabled by default). Such operations may be expensive due to joining of underlying Spark frames. So users should be aware of the cost and enable that flag only when necessary. + +Exception: + +.. code-block:: python + + >>> ps.Series([1, 2]) + ps.Series([3, 4]) + Traceback (most recent call last): + ... + ValueError: Cannot combine the series or dataframe because it comes from a different dataframe. In order to allow this operation, enable 'compute.ops_on_diff_frames' option. + + +Solution: + +.. code-block:: python + + >>> with ps.option_context('compute.ops_on_diff_frames', True): + ... ps.Series([1, 2]) + ps.Series([3, 4]) + ... + 0 4 + 1 6 + dtype: int64 + +**RuntimeError: Result vector from pandas_udf was not the required length** + +Exception: + +.. code-block:: python + + >>> def f(x) -> ps.Series[np.int32]: + ... return x[:-1] + ... + >>> ps.DataFrame({"x":[1, 2], "y":[3, 4]}).transform(f) + 22/04/12 13:46:39 ERROR Executor: Exception in task 2.0 in stage 16.0 (TID 88) + org.apache.spark.api.python.PythonException: Traceback (most recent call last): + ... + RuntimeError: Result vector from pandas_udf was not the required length: expected 1, got 0 + +Solution: + +.. code-block:: python + + >>> def f(x) -> ps.Series[np.int32]: + ... return x + ... + >>> ps.DataFrame({"x":[1, 2], "y":[3, 4]}).transform(f) + x y + 0 1 3 + 1 2 4 + +Py4j +~~~~ + +**Py4JJavaError** + +``Py4JJavaError`` is raised when an exception occurs in the Java client code. +You can see the type of exception that was thrown on the Java side and its stack trace, as ``java.lang.NullPointerException`` below. + +Example: + +.. code-block:: python + + >>> spark.sparkContext._jvm.java.lang.String(None) + Traceback (most recent call last): + ... + py4j.protocol.Py4JJavaError: An error occurred while calling None.java.lang.String. + : java.lang.NullPointerException + .. + +Solution: + +.. code-block:: python + + >>> spark.sparkContext._jvm.java.lang.String("x") + 'x' + +**Py4JError** + +``Py4JError`` is raised when any other error occurs such as when the Python client program tries to access an object that no longer exists on the Java side. + +Example: + +.. code-block:: python + + >>> from pyspark.ml.linalg import Vectors + >>> from pyspark.ml.regression import LinearRegression + >>> df = spark.createDataFrame( + ... [(1.0, 2.0, Vectors.dense(1.0)), (0.0, 2.0, Vectors.sparse(1, [], []))], + ... ["label", "weight", "features"], + ... ) + >>> lr = LinearRegression( + ... maxIter=1, regParam=0.0, solver="normal", weightCol="weight", fitIntercept=False + ... ) + >>> model = lr.fit(df) + >>> model + LinearRegressionModel: uid=LinearRegression_eb7bc1d4bf25, numFeatures=1 + >>> model.__del__() + >>> model + Traceback (most recent call last): + ... + py4j.protocol.Py4JError: An error occurred while calling o531.toString. Trace: + py4j.Py4JException: Target Object ID does not exist for this gateway :o531 + ... + +Solution: + +Access an object that exists on the Java side. + +**Py4JNetworkError** + +``Py4JNetworkError`` is raised when a problem occurs during network transfer (e.g., connection lost). In this case, we shall debug the network and rebuild the connection. + +Stack Traces +------------ + +There are Spark configurations to control stack traces: + +- ``spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled`` is true by default to simplify traceback from Python UDFs. + +- ``spark.sql.pyspark.jvmStacktrace.enabled`` is false by default to hide JVM stacktrace and to show a Python-friendly exception only. + +Spark configurations above are independent from log level settings. Control log levels through :meth:`pyspark.SparkContext.setLogLevel`. From ab1d986b631f531ae0f756d4f3a536d30c858604 Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Sun, 15 May 2022 09:26:19 +0900 Subject: [PATCH 259/535] [SPARK-37544][SQL] Correct date arithmetic in sequences ### What changes were proposed in this pull request? Change `InternalSequenceBase` to pass a time-zone aware value to `DateTimeUtils#timestampAddInterval`, rather than a time-zone agnostic value, when performing `Date` arithmetic. ### Why are the changes needed? The following query gets the wrong answer if run in the America/Los_Angeles time zone: ``` spark-sql> select sequence(date '2021-01-01', date '2022-01-01', interval '3' month) x; [2021-01-01,2021-03-31,2021-06-30,2021-09-30,2022-01-01] Time taken: 0.664 seconds, Fetched 1 row(s) spark-sql> ``` The answer should be ``` [2021-01-01,2021-04-01,2021-07-01,2021-10-01,2022-01-01] ``` `InternalSequenceBase` converts the date to micros by multiplying days by micros per day. This converts the date into a time-zone agnostic timestamp. However, `InternalSequenceBase` uses `DateTimeUtils#timestampAddInterval` to perform the arithmetic, and that function assumes a _time-zone aware_ timestamp. One simple fix would be to call `DateTimeUtils#timestampNTZAddInterval` instead for date arithmetic. However, Spark date arithmetic is typically time-zone aware (see the comment in the test added by this PR), so this PR converts the date to a time-zone aware value before calling `DateTimeUtils#timestampAddInterval`. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New unit test. Closes #36546 from bersprockets/date_sequence_issue. Authored-by: Bruce Robbins Signed-off-by: Hyukjin Kwon (cherry picked from commit 14ee0d8f04f218ad61688196a0b984f024151468) Signed-off-by: Hyukjin Kwon --- .../expressions/collectionOperations.scala | 58 +++++++++++++++---- .../CollectionExpressionsSuite.scala | 46 ++++++++++++++- 2 files changed, 93 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 1bd934214f5c7..f38beb480e680 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -3012,6 +3012,22 @@ object Sequence { case TimestampNTZType => timestampNTZAddInterval } + private def toMicros(value: Long, scale: Long): Long = { + if (scale == MICROS_PER_DAY) { + daysToMicros(value.toInt, zoneId) + } else { + value * scale + } + } + + private def fromMicros(value: Long, scale: Long): Long = { + if (scale == MICROS_PER_DAY) { + microsToDays(value, zoneId).toLong + } else { + value / scale + } + } + override def eval(input1: Any, input2: Any, input3: Any): Array[T] = { val start = input1.asInstanceOf[T] val stop = input2.asInstanceOf[T] @@ -3035,8 +3051,9 @@ object Sequence { // about a month length in days and a day length in microseconds val intervalStepInMicros = stepMicros + stepMonths * microsPerMonth + stepDays * MICROS_PER_DAY - val startMicros: Long = num.toLong(start) * scale - val stopMicros: Long = num.toLong(stop) * scale + + val startMicros: Long = toMicros(num.toLong(start), scale) + val stopMicros: Long = toMicros(num.toLong(stop), scale) val maxEstimatedArrayLength = getSequenceLength(startMicros, stopMicros, input3, intervalStepInMicros) @@ -3048,7 +3065,8 @@ object Sequence { var i = 0 while (t < exclusiveItem ^ stepSign < 0) { - arr(i) = fromLong(t / scale) + val result = fromMicros(t, scale) + arr(i) = fromLong(result) i += 1 t = addInterval(startMicros, i * stepMonths, i * stepDays, i * stepMicros, zoneId) } @@ -3061,13 +3079,16 @@ object Sequence { protected def stepSplitCode( stepMonths: String, stepDays: String, stepMicros: String, step: String): String + private val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") + private val addIntervalCode = outerDataType match { - case TimestampType | DateType => - "org.apache.spark.sql.catalyst.util.DateTimeUtils.timestampAddInterval" - case TimestampNTZType => - "org.apache.spark.sql.catalyst.util.DateTimeUtils.timestampNTZAddInterval" + case TimestampType | DateType => s"$dtu.timestampAddInterval" + case TimestampNTZType => s"$dtu.timestampNTZAddInterval" } + private val daysToMicrosCode = s"$dtu.daysToMicros" + private val microsToDaysCode = s"$dtu.microsToDays" + override def genCode( ctx: CodegenContext, start: String, @@ -3111,6 +3132,24 @@ object Sequence { val stepSplits = stepSplitCode(stepMonths, stepDays, stepMicros, step) + val toMicrosCode = if (scale == MICROS_PER_DAY) { + s""" + | final long $startMicros = $daysToMicrosCode((int) $start, $zid); + | final long $stopMicros = $daysToMicrosCode((int) $stop, $zid); + |""".stripMargin + } else { + s""" + | final long $startMicros = $start * ${scale}L; + | final long $stopMicros = $stop * ${scale}L; + |""".stripMargin + } + + val fromMicrosCode = if (scale == MICROS_PER_DAY) { + s"($elemType) $microsToDaysCode($t, $zid)" + } else { + s"($elemType) ($t / ${scale}L)" + } + s""" |$stepSplits | @@ -3122,8 +3161,7 @@ object Sequence { |} else if ($stepMonths == 0 && $stepDays == 0 && ${scale}L == 1) { | ${backedSequenceImpl.genCode(ctx, start, stop, stepMicros, arr, elemType)}; |} else { - | final long $startMicros = $start * ${scale}L; - | final long $stopMicros = $stop * ${scale}L; + | $toMicrosCode | | $sequenceLengthCode | @@ -3135,7 +3173,7 @@ object Sequence { | int $i = 0; | | while ($t < $exclusiveItem ^ $stepSign < 0) { - | $arr[$i] = ($elemType) ($t / ${scale}L); + | $arr[$i] = $fromMicrosCode; | $i += 1; | $t = $addIntervalCode( | $startMicros, $i * $stepMonths, $i * $stepDays, $i * $stepMicros, $zid); diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index fb4bf43ba83f1..a8c4b16c7a05d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils} -import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.UTC +import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{outstandingZoneIds, LA, UTC} import org.apache.spark.sql.catalyst.util.IntervalUtils._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -964,6 +964,50 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper } } + test("SPARK-37544: Time zone should not affect date sequence with month interval") { + outstandingZoneIds.foreach { zid => + DateTimeTestUtils.withDefaultTimeZone(zid) { + checkEvaluation(new Sequence( + Literal(Date.valueOf("2021-01-01")), + Literal(Date.valueOf("2022-01-01")), + Literal(stringToInterval("interval 3 month"))), + Seq( + Date.valueOf("2021-01-01"), + Date.valueOf("2021-04-01"), + Date.valueOf("2021-07-01"), + Date.valueOf("2021-10-01"), + Date.valueOf("2022-01-01"))) + } + } + + // However, time zone should still affect sequences generated using hours interval, + // especially if the sequence's start-stop includes a "spring forward". + // Take, for example, the following Spark date arithmetic: + // select cast(date'2022-03-09' + interval '4' days '23' hour as date) as x; + // In the America/Los_Angeles time zone, it returns 2022-03-14. + // In the UTC time zone, it instead returns 2022-03-13. + // The sequence function should be consistent with the date arithmetic. + DateTimeTestUtils.withDefaultTimeZone(LA) { + checkEvaluation(new Sequence( + Literal(Date.valueOf("2022-03-09")), + Literal(Date.valueOf("2022-03-15")), + Literal(stringToInterval("interval 4 days 23 hours"))), + Seq( + Date.valueOf("2022-03-09"), + Date.valueOf("2022-03-14"))) + } + + DateTimeTestUtils.withDefaultTimeZone(UTC) { + checkEvaluation(new Sequence( + Literal(Date.valueOf("2022-03-09")), + Literal(Date.valueOf("2022-03-15")), + Literal(stringToInterval("interval 4 days 23 hours"))), + Seq( + Date.valueOf("2022-03-09"), + Date.valueOf("2022-03-13"))) // this is different from LA time zone above + } + } + test("SPARK-35088: Accept ANSI intervals by the Sequence expression") { checkEvaluation(new Sequence( Literal(Timestamp.valueOf("2018-01-01 00:00:00")), From 386c75693b5b9dd5e3b2147d49f0284badaa7d6d Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Sun, 15 May 2022 09:30:55 +0900 Subject: [PATCH 260/535] [SPARK-39186][PYTHON] Make pandas-on-Spark's skew consistent with pandas ### What changes were proposed in this pull request? the logics of computing skewness are different between spark sql and pandas: spark sql: [`sqrt(n) * m3 / sqrt(m2 * m2 * m2))`](https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala#L304) pandas: [`(count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2**1.5)`](https://github.com/pandas-dev/pandas/blob/main/pandas/core/nanops.py#L1221) ### Why are the changes needed? to make skew consistent with pandas ### Does this PR introduce _any_ user-facing change? yes, the logic to compute skew was changed ### How was this patch tested? added UT Closes #36549 from zhengruifeng/adjust_pandas_skew. Authored-by: Ruifeng Zheng Signed-off-by: Hyukjin Kwon (cherry picked from commit 7e4519c9a8ba35958ef6d408be3ca4e97917c965) Signed-off-by: Hyukjin Kwon --- python/pyspark/pandas/generic.py | 11 ++++++++++- python/pyspark/pandas/tests/test_stats.py | 6 ++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py index 7750126868437..c8a8289c56a6f 100644 --- a/python/pyspark/pandas/generic.py +++ b/python/pyspark/pandas/generic.py @@ -1468,7 +1468,16 @@ def skew(psser: "Series") -> Column: spark_type_to_pandas_dtype(spark_type), spark_type.simpleString() ) ) - return F.skewness(spark_column) + + count_scol = F.count(F.when(~spark_column.isNull(), 1).otherwise(None)) + # refer to the Pandas implementation 'nanskew' + # https://github.com/pandas-dev/pandas/blob/main/pandas/core/nanops.py#L1152 + return F.when( + count_scol > 2, + F.skewness(spark_column) + * F.sqrt(1 - 1 / count_scol) + * (count_scol / (count_scol - 2)), + ).otherwise(None) return self._reduce_for_stat_function( skew, name="skew", axis=axis, numeric_only=numeric_only diff --git a/python/pyspark/pandas/tests/test_stats.py b/python/pyspark/pandas/tests/test_stats.py index eef1616aa2883..89f5f755e125f 100644 --- a/python/pyspark/pandas/tests/test_stats.py +++ b/python/pyspark/pandas/tests/test_stats.py @@ -181,6 +181,7 @@ def test_axis_on_dataframe(self): self.assert_eq(psdf.sum(axis=1), pdf.sum(axis=1)) self.assert_eq(psdf.product(axis=1), pdf.product(axis=1)) self.assert_eq(psdf.kurtosis(axis=1), pdf.kurtosis(axis=1)) + self.assert_eq(psdf.skew(axis=0), pdf.skew(axis=0), almost=True) self.assert_eq(psdf.skew(axis=1), pdf.skew(axis=1)) self.assert_eq(psdf.mean(axis=1), pdf.mean(axis=1)) self.assert_eq(psdf.sem(axis=1), pdf.sem(axis=1)) @@ -218,6 +219,11 @@ def test_axis_on_dataframe(self): self.assert_eq( psdf.kurtosis(axis=1, numeric_only=True), pdf.kurtosis(axis=1, numeric_only=True) ) + self.assert_eq( + psdf.skew(axis=0, numeric_only=True), + pdf.skew(axis=0, numeric_only=True), + almost=True, + ) self.assert_eq( psdf.skew(axis=1, numeric_only=True), pdf.skew(axis=1, numeric_only=True) ) From c8c657b922ac8fd8dcf9553113e11a80079db059 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 16 May 2022 05:42:28 +0000 Subject: [PATCH 261/535] Preparing Spark release v3.3.0-rc2 --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 0e449e841cf6d..9479bb3bf87df 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.1 +Version: 3.3.0 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index d12f2ad73fabd..2e9c4d9960b14 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 842d63f5d3811..2a9acfa335e71 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index f7d187bf9527d..7b17e625d7599 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 53f38df885102..c5c920e774782 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 845f6659407bd..697b5a3928e58 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 8e1590891933b..ad2db11370ae7 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1987c13328559..1a7bdee70f3bc 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index c7e7be1e3bbf1..66dc93de0599e 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index ac644130a61e2..219ceca6648d8 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 9a6fe2d313fde..4966db6b4a8af 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.3.1-SNAPSHOT -SPARK_VERSION_SHORT: 3.3.1 +SPARK_VERSION: 3.3.0 +SPARK_VERSION_SHORT: 3.3.0 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.15" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.3.1"] + 'facetFilters': ["version:3.3.0"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index e97f3b40cb2bd..42e58f2726df1 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index 578854e3eaa9a..5aaa91cfdf20d 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 95e1ce74ca172..36309bb417362 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 0b803c5d3864a..072cedaa594c8 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 95726829bcbbd..b9063b543f512 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 25e7e25ae25b6..6f6a51a972c73 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 3ba16b7b838a2..95fd080383995 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 6cee275e6adc7..33cf30ff803e7 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index ad23da0d7f249..79b2e8f2a5a47 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 6de1f9eee532c..647d0c3f87552 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 95c8c312eb0e2..562ddc8dcc23c 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 4d87bd2730e3b..08bcae6e0f53f 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index 889f0b5a92e08..beceaecd31a1c 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 9b7b0370d3b4d..584a5df0a4a35 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 04a68a47a4f45..42bab72668c00 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/pom.xml b/pom.xml index 34c8354a3d4a1..28c29814cf969 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 pom Spark Project Parent POM https://spark.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index 2e5f8bf5395a3..980b64c4dca8c 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index d1d6a449bd5dc..f3ec959370807 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 40e578f9a7eba..66ae5adfbd19f 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index f4ac384409174..1472bd0fcb1a2 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 61d5adec0e7cc..77811f35692d8 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 970d42ba4590e..ceba171e41134 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 0cfb5f616cd24..34137add48553 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 7024e0dcfab75..e1b725929a8fc 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index cc8d8796da601..8f1e9d2f3ccb1 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index c4977726a3cac..52273e7fa76e1 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 9bbcb7f322798..dadc9324f95a2 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml From af38fce62da393ff0b56662be050b46de115a89f Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Mon, 16 May 2022 05:42:35 +0000 Subject: [PATCH 262/535] Preparing development version 3.3.1-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 9479bb3bf87df..0e449e841cf6d 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.0 +Version: 3.3.1 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 2e9c4d9960b14..d12f2ad73fabd 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 2a9acfa335e71..842d63f5d3811 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 7b17e625d7599..f7d187bf9527d 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index c5c920e774782..53f38df885102 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 697b5a3928e58..845f6659407bd 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index ad2db11370ae7..8e1590891933b 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1a7bdee70f3bc..1987c13328559 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 66dc93de0599e..c7e7be1e3bbf1 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 219ceca6648d8..ac644130a61e2 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 4966db6b4a8af..9a6fe2d313fde 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.3.0 -SPARK_VERSION_SHORT: 3.3.0 +SPARK_VERSION: 3.3.1-SNAPSHOT +SPARK_VERSION_SHORT: 3.3.1 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.15" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.3.0"] + 'facetFilters': ["version:3.3.1"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index 42e58f2726df1..e97f3b40cb2bd 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index 5aaa91cfdf20d..578854e3eaa9a 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 36309bb417362..95e1ce74ca172 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 072cedaa594c8..0b803c5d3864a 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index b9063b543f512..95726829bcbbd 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 6f6a51a972c73..25e7e25ae25b6 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 95fd080383995..3ba16b7b838a2 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 33cf30ff803e7..6cee275e6adc7 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index 79b2e8f2a5a47..ad23da0d7f249 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 647d0c3f87552..6de1f9eee532c 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 562ddc8dcc23c..95c8c312eb0e2 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 08bcae6e0f53f..4d87bd2730e3b 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index beceaecd31a1c..889f0b5a92e08 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 584a5df0a4a35..9b7b0370d3b4d 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 42bab72668c00..04a68a47a4f45 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 28c29814cf969..34c8354a3d4a1 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT pom Spark Project Parent POM https://spark.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index 980b64c4dca8c..2e5f8bf5395a3 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index f3ec959370807..d1d6a449bd5dc 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 66ae5adfbd19f..40e578f9a7eba 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 1472bd0fcb1a2..f4ac384409174 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 77811f35692d8..61d5adec0e7cc 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index ceba171e41134..970d42ba4590e 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 34137add48553..0cfb5f616cd24 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index e1b725929a8fc..7024e0dcfab75 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 8f1e9d2f3ccb1..cc8d8796da601 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 52273e7fa76e1..c4977726a3cac 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index dadc9324f95a2..9bbcb7f322798 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml From 1853eb117e24bcc0509d275c4caca6c033bf0ab9 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Mon, 16 May 2022 11:39:37 +0300 Subject: [PATCH 263/535] [SPARK-39187][SQL][3.3] Remove `SparkIllegalStateException` ### What changes were proposed in this pull request? Remove `SparkIllegalStateException` and replace it by `IllegalStateException` where it was used. This is a backport of https://github.com/apache/spark/pull/36550. ### Why are the changes needed? To improve code maintenance and be consistent to other places where `IllegalStateException` is used in illegal states (for instance, see https://github.com/apache/spark/pull/36524). After the PR https://github.com/apache/spark/pull/36500, the exception is substituted by `SparkException` w/ the `INTERNAL_ERROR` error class. ### Does this PR introduce _any_ user-facing change? No. Users shouldn't face to the exception in regular cases. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "sql/test:testOnly *QueryExecutionErrorsSuite*" $ build/sbt "test:testOnly *ArrowUtilsSuite" ``` Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 1a90512f605c490255f7b38215c207e64621475b) Signed-off-by: Max Gekk Closes #36558 from MaxGekk/remove-SparkIllegalStateException-3.3. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../scala/org/apache/spark/SparkException.scala | 12 ------------ .../sql/catalyst/analysis/CheckAnalysis.scala | 6 +++--- .../spark/sql/errors/QueryExecutionErrors.scala | 11 +++-------- .../org/apache/spark/sql/util/ArrowUtils.scala | 9 +++------ .../apache/spark/sql/util/ArrowUtilsSuite.scala | 4 ++-- .../sql/errors/QueryExecutionErrorsSuite.scala | 14 -------------- 6 files changed, 11 insertions(+), 45 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkException.scala b/core/src/main/scala/org/apache/spark/SparkException.scala index 8442c8eb8d35d..ed6e811a4cc26 100644 --- a/core/src/main/scala/org/apache/spark/SparkException.scala +++ b/core/src/main/scala/org/apache/spark/SparkException.scala @@ -158,18 +158,6 @@ private[spark] class SparkFileAlreadyExistsException( override def getErrorClass: String = errorClass } -/** - * Illegal state exception thrown from Spark with an error class. - */ -private[spark] class SparkIllegalStateException( - errorClass: String, - messageParameters: Array[String]) - extends IllegalStateException( - SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable { - - override def getErrorClass: String = errorClass -} - /** * File not found exception thrown from Spark with an error class. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index ff40272682e62..f89fbe59af62a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils, TypeUtils} import org.apache.spark.sql.connector.catalog.{LookupCatalog, SupportsPartitionManagement} -import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} +import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.sql.util.SchemaUtils @@ -571,8 +571,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { |in operator ${operator.simpleString(SQLConf.get.maxToStringFields)} """.stripMargin) - case _: UnresolvedHint => - throw QueryExecutionErrors.logicalHintOperatorNotRemovedDuringAnalysisError + case _: UnresolvedHint => throw new IllegalStateException( + "Logical hint operator should be removed during analysis.") case f @ Filter(condition, _) if PlanHelper.specialExpressionsInUnsupportedOperator(f).nonEmpty => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index df5959283eb62..cf87094ad27ba 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -34,7 +34,7 @@ import org.apache.hadoop.fs.permission.FsPermission import org.codehaus.commons.compiler.CompileException import org.codehaus.janino.InternalCompilerException -import org.apache.spark.{Partition, SparkArithmeticException, SparkArrayIndexOutOfBoundsException, SparkClassNotFoundException, SparkConcurrentModificationException, SparkDateTimeException, SparkException, SparkFileAlreadyExistsException, SparkFileNotFoundException, SparkIllegalArgumentException, SparkIllegalStateException, SparkIndexOutOfBoundsException, SparkNoSuchElementException, SparkNoSuchMethodException, SparkNumberFormatException, SparkRuntimeException, SparkSecurityException, SparkSQLException, SparkSQLFeatureNotSupportedException, SparkUnsupportedOperationException, SparkUpgradeException} +import org.apache.spark.{Partition, SparkArithmeticException, SparkArrayIndexOutOfBoundsException, SparkClassNotFoundException, SparkConcurrentModificationException, SparkDateTimeException, SparkException, SparkFileAlreadyExistsException, SparkFileNotFoundException, SparkIllegalArgumentException, SparkIndexOutOfBoundsException, SparkNoSuchElementException, SparkNoSuchMethodException, SparkNumberFormatException, SparkRuntimeException, SparkSecurityException, SparkSQLException, SparkSQLFeatureNotSupportedException, SparkUnsupportedOperationException, SparkUpgradeException} import org.apache.spark.executor.CommitDeniedException import org.apache.spark.launcher.SparkLauncher import org.apache.spark.memory.SparkOutOfMemoryError @@ -68,12 +68,6 @@ import org.apache.spark.util.CircularBuffer */ object QueryExecutionErrors extends QueryErrorsBase { - def logicalHintOperatorNotRemovedDuringAnalysisError(): Throwable = { - new SparkIllegalStateException(errorClass = "INTERNAL_ERROR", - messageParameters = Array( - "Internal error: logical hint operator should have been removed during analysis")) - } - def cannotEvaluateExpressionError(expression: Expression): Throwable = { new SparkUnsupportedOperationException(errorClass = "INTERNAL_ERROR", messageParameters = Array(s"Cannot evaluate expression: $expression")) @@ -137,7 +131,8 @@ object QueryExecutionErrors extends QueryErrorsBase { } def cannotParseDecimalError(): Throwable = { - new SparkIllegalStateException(errorClass = "CANNOT_PARSE_DECIMAL", + new SparkRuntimeException( + errorClass = "CANNOT_PARSE_DECIMAL", messageParameters = Array.empty) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala index 4254c045ca6c0..b8f77c3646cad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala @@ -47,12 +47,9 @@ private[sql] object ArrowUtils { case BinaryType => ArrowType.Binary.INSTANCE case DecimalType.Fixed(precision, scale) => new ArrowType.Decimal(precision, scale) case DateType => new ArrowType.Date(DateUnit.DAY) - case TimestampType => - if (timeZoneId == null) { - throw QueryExecutionErrors.timeZoneIdNotSpecifiedForTimestampTypeError() - } else { - new ArrowType.Timestamp(TimeUnit.MICROSECOND, timeZoneId) - } + case TimestampType if timeZoneId == null => + throw new IllegalStateException("Missing timezoneId where it is mandatory.") + case TimestampType => new ArrowType.Timestamp(TimeUnit.MICROSECOND, timeZoneId) case TimestampNTZType => new ArrowType.Timestamp(TimeUnit.MICROSECOND, null) case NullType => ArrowType.Null.INSTANCE diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala index 642b387b88e75..6dd02afe19b24 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala @@ -50,10 +50,10 @@ class ArrowUtilsSuite extends SparkFunSuite { roundtrip(DateType) roundtrip(YearMonthIntervalType()) roundtrip(DayTimeIntervalType()) - val tsExMsg = intercept[UnsupportedOperationException] { + val tsExMsg = intercept[IllegalStateException] { roundtrip(TimestampType) } - assert(tsExMsg.getMessage.contains("timeZoneId")) + assert(tsExMsg.getMessage.contains("timezoneId")) } test("timestamp") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala index af6402e4fe2ea..96a29f6dab6f5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala @@ -25,8 +25,6 @@ import org.apache.spark.sql.functions.{lit, lower, struct, sum} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy.EXCEPTION import org.apache.spark.sql.test.SharedSparkSession -import org.apache.spark.sql.types.{StructType, TimestampType} -import org.apache.spark.sql.util.ArrowUtils class QueryExecutionErrorsSuite extends QueryTest with ParquetTest with OrcTest with SharedSparkSession { @@ -228,18 +226,6 @@ class QueryExecutionErrorsSuite extends QueryTest } } - test("UNSUPPORTED_OPERATION: timeZoneId not specified while converting TimestampType to Arrow") { - val schema = new StructType().add("value", TimestampType) - val e = intercept[SparkUnsupportedOperationException] { - ArrowUtils.toArrowSchema(schema, null) - } - - assert(e.getErrorClass === "UNSUPPORTED_OPERATION") - assert(e.getMessage === "The operation is not supported: " + - "\"TIMESTAMP\" must supply timeZoneId parameter " + - "while converting to the arrow timestamp type.") - } - test("UNSUPPORTED_OPERATION - SPARK-36346: can't read Timestamp as TimestampNTZ") { withTempPath { file => sql("select timestamp_ltz'2019-03-21 00:02:03'").write.orc(file.getCanonicalPath) From e2ce0885b6666babe536707008ed0afaa09dca99 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Mon, 16 May 2022 17:44:53 +0800 Subject: [PATCH 264/535] [SPARK-39190][SQL] Provide query context for decimal precision overflow error when WSCG is off ### What changes were proposed in this pull request? Similar to https://github.com/apache/spark/pull/36525, this PR provides query context for decimal precision overflow error when WSCG is off ### Why are the changes needed? Enhance the runtime error query context of checking decimal overflow. After changes, it works when the whole stage codegen is not available. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? UT Closes #36557 from gengliangwang/decimalContextWSCG. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit 17b85ff97569a43d7fd33863d17bfdaf62d539e0) Signed-off-by: Gengliang Wang --- .../expressions/decimalExpressions.scala | 12 +++++++++--- .../org/apache/spark/sql/SQLQuerySuite.scala | 16 ++++++++++++++++ 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala index 4a4b8e0fc0dfd..2cdd784ea4da4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala @@ -128,7 +128,7 @@ case class PromotePrecision(child: Expression) extends UnaryExpression { case class CheckOverflow( child: Expression, dataType: DecimalType, - nullOnOverflow: Boolean) extends UnaryExpression { + nullOnOverflow: Boolean) extends UnaryExpression with SupportQueryContext { override def nullable: Boolean = true @@ -138,11 +138,11 @@ case class CheckOverflow( dataType.scale, Decimal.ROUND_HALF_UP, nullOnOverflow, - origin.context) + queryContext) override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val errorContextCode = if (nullOnOverflow) { - ctx.addReferenceObj("errCtx", origin.context) + ctx.addReferenceObj("errCtx", queryContext) } else { "\"\"" } @@ -163,6 +163,12 @@ case class CheckOverflow( override protected def withNewChildInternal(newChild: Expression): CheckOverflow = copy(child = newChild) + + override def initQueryContext(): String = if (nullOnOverflow) { + "" + } else { + origin.context + } } // A variant `CheckOverflow`, which treats null as overflow. This is necessary in `Sum`. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 21ce009a9076a..f6998fe5c1c55 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4423,6 +4423,22 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } + test("SPARK-39190: Query context of decimal overflow error should be serialized to executors" + + " when WSCG is off") { + withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", + SQLConf.ANSI_ENABLED.key -> "true") { + withTable("t") { + sql("create table t(d decimal(38, 0)) using parquet") + sql("insert into t values (2e37BD)") + val query = "select d / 0.1 from t" + val msg = intercept[SparkException] { + sql(query).collect() + }.getMessage + assert(msg.contains(query)) + } + } + } + test("SPARK-38589: try_avg should return null if overflow happens before merging") { val yearMonthDf = Seq(Int.MaxValue, Int.MaxValue, 2) .map(Period.ofMonths) From 30bb19e23d28f454e35c96d20db70db5650bd160 Mon Sep 17 00:00:00 2001 From: bjornjorgensen Date: Mon, 16 May 2022 18:10:08 -0500 Subject: [PATCH 265/535] [SPARK-39183][BUILD] Upgrade Apache Xerces Java to 2.12.2 ### What changes were proposed in this pull request? Upgrade Apache Xerces Java to 2.12.2 [Release notes](https://xerces.apache.org/xerces2-j/releases.html) ### Why are the changes needed? [Infinite Loop in Apache Xerces Java](https://github.com/advisories/GHSA-h65f-jvqw-m9fj) There's a vulnerability within the Apache Xerces Java (XercesJ) XML parser when handling specially crafted XML document payloads. This causes, the XercesJ XML parser to wait in an infinite loop, which may sometimes consume system resources for prolonged duration. This vulnerability is present within XercesJ version 2.12.1 and the previous versions. References https://nvd.nist.gov/vuln/detail/CVE-2022-23437 https://lists.apache.org/thread/6pjwm10bb69kq955fzr1n0nflnjd27dl http://www.openwall.com/lists/oss-security/2022/01/24/3 https://www.oracle.com/security-alerts/cpuapr2022.html ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA. Closes #36544 from bjornjorgensen/Upgrade-xerces-to-2.12.2. Authored-by: bjornjorgensen Signed-off-by: Sean Owen (cherry picked from commit 181436bd990d3bdf178a33fa6489ad416f3e7f94) Signed-off-by: Sean Owen --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index 7499a9b94c05d..ab00ad568cbd3 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -260,7 +260,7 @@ transaction-api/1.1//transaction-api-1.1.jar univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar velocity/1.5//velocity-1.5.jar xbean-asm9-shaded/4.20//xbean-asm9-shaded-4.20.jar -xercesImpl/2.12.0//xercesImpl-2.12.0.jar +xercesImpl/2.12.2//xercesImpl-2.12.2.jar xml-apis/1.4.01//xml-apis-1.4.01.jar xmlenc/0.52//xmlenc-0.52.jar xz/1.8//xz-1.8.jar diff --git a/pom.xml b/pom.xml index 34c8354a3d4a1..0d296febbd821 100644 --- a/pom.xml +++ b/pom.xml @@ -1389,7 +1389,7 @@ xerces xercesImpl - 2.12.0 + 2.12.2 org.apache.avro From c25624b4d0c2d77f0a6db7e70ecf750e9a1143f2 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 17 May 2022 15:56:47 +0800 Subject: [PATCH 266/535] [SPARK-36718][SQL][FOLLOWUP] Improve the extract-only check in CollapseProject ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/36510 , to fix a corner case: if the `CreateStruct` is only referenced once in non-extract expressions, we should still allow collapsing the projects. ### Why are the changes needed? completely fix the perf regression ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? a new test Closes #36572 from cloud-fan/regression. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan (cherry picked from commit 98fad57221d4dffc6f1fe28d9aca1093172ecf72) Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 16 +++++++++------- .../optimizer/CollapseProjectSuite.scala | 11 +++++++++++ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 759a7044f159f..94e9d3cdd14f3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -991,20 +991,22 @@ object CollapseProject extends Rule[LogicalPlan] with AliasHelper { val producer = producerMap.getOrElse(reference, reference) producer.deterministic && (count == 1 || alwaysInline || { val relatedConsumers = consumers.filter(_.references.contains(reference)) - val extractOnly = relatedConsumers.forall(isExtractOnly(_, reference)) + // It's still exactly-only if there is only one reference in non-extract expressions, + // as we won't duplicate the expensive CreateStruct-like expressions. + val extractOnly = relatedConsumers.map(refCountInNonExtract(_, reference)).sum <= 1 shouldInline(producer, extractOnly) }) } } - private def isExtractOnly(expr: Expression, ref: Attribute): Boolean = { - def hasRefInNonExtractValue(e: Expression): Boolean = e match { - case a: Attribute => a.semanticEquals(ref) + private def refCountInNonExtract(expr: Expression, ref: Attribute): Int = { + def refCount(e: Expression): Int = e match { + case a: Attribute if a.semanticEquals(ref) => 1 // The first child of `ExtractValue` is the complex type to be extracted. - case e: ExtractValue if e.children.head.semanticEquals(ref) => false - case _ => e.children.exists(hasRefInNonExtractValue) + case e: ExtractValue if e.children.head.semanticEquals(ref) => 0 + case _ => e.children.map(refCount).sum } - !hasRefInNonExtractValue(expr) + refCount(expr) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala index f6c3209726b7d..ba5c5572e242e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala @@ -141,6 +141,17 @@ class CollapseProjectSuite extends PlanTest { .select(($"a" + ($"a" + 1)).as("add")) .analyze comparePlans(optimized2, expected2) + + // referencing `CreateStruct` only once in non-extract expression is OK. + val query3 = testRelation + .select(namedStruct("a", $"a", "a_plus_1", $"a" + 1).as("struct")) + .select($"struct", $"struct".getField("a")) + .analyze + val optimized3 = Optimize.execute(query3) + val expected3 = testRelation + .select(namedStruct("a", $"a", "a_plus_1", $"a" + 1).as("struct"), $"a".as("struct.a")) + .analyze + comparePlans(optimized3, expected3) } test("preserve top-level alias metadata while collapsing projects") { From c07f65c51681107e869d2ebb46aa546ac3871e3a Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 17 May 2022 23:05:48 +0900 Subject: [PATCH 267/535] [SPARK-32268][SQL][TESTS][FOLLOW-UP] Use function registry in the SparkSession ### What changes were proposed in this pull request? This PR proposes: 1. Use the function registry in the Spark Session being used 2. Move function registration into `beforeAll` ### Why are the changes needed? Registration of the function without `beforeAll` at `builtin` can affect other tests. See also https://lists.apache.org/thread/jp0ccqv10ht716g9xldm2ohdv3mpmmz1. ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? Unittests fixed. Closes #36576 from HyukjinKwon/SPARK-32268-followup. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit c5351f85dec628a5c806893aa66777cbd77a4d65) Signed-off-by: Hyukjin Kwon --- .../sql/BloomFilterAggregateQuerySuite.scala | 34 ++++++++++--------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala index 7fc89ecc88ba3..05513cddccb86 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql import org.apache.spark.sql.catalyst.FunctionIdentifier -import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.aggregate.BloomFilterAggregate import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec @@ -35,23 +34,26 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession { val funcId_bloom_filter_agg = new FunctionIdentifier("bloom_filter_agg") val funcId_might_contain = new FunctionIdentifier("might_contain") - // Register 'bloom_filter_agg' to builtin. - FunctionRegistry.builtin.registerFunction(funcId_bloom_filter_agg, - new ExpressionInfo(classOf[BloomFilterAggregate].getName, "bloom_filter_agg"), - (children: Seq[Expression]) => children.size match { - case 1 => new BloomFilterAggregate(children.head) - case 2 => new BloomFilterAggregate(children.head, children(1)) - case 3 => new BloomFilterAggregate(children.head, children(1), children(2)) - }) - - // Register 'might_contain' to builtin. - FunctionRegistry.builtin.registerFunction(funcId_might_contain, - new ExpressionInfo(classOf[BloomFilterMightContain].getName, "might_contain"), - (children: Seq[Expression]) => BloomFilterMightContain(children.head, children(1))) + override def beforeAll(): Unit = { + super.beforeAll() + // Register 'bloom_filter_agg' to builtin. + spark.sessionState.functionRegistry.registerFunction(funcId_bloom_filter_agg, + new ExpressionInfo(classOf[BloomFilterAggregate].getName, "bloom_filter_agg"), + (children: Seq[Expression]) => children.size match { + case 1 => new BloomFilterAggregate(children.head) + case 2 => new BloomFilterAggregate(children.head, children(1)) + case 3 => new BloomFilterAggregate(children.head, children(1), children(2)) + }) + + // Register 'might_contain' to builtin. + spark.sessionState.functionRegistry.registerFunction(funcId_might_contain, + new ExpressionInfo(classOf[BloomFilterMightContain].getName, "might_contain"), + (children: Seq[Expression]) => BloomFilterMightContain(children.head, children(1))) + } override def afterAll(): Unit = { - FunctionRegistry.builtin.dropFunction(funcId_bloom_filter_agg) - FunctionRegistry.builtin.dropFunction(funcId_might_contain) + spark.sessionState.functionRegistry.dropFunction(funcId_bloom_filter_agg) + spark.sessionState.functionRegistry.dropFunction(funcId_might_contain) super.afterAll() } From 4fb7fe2a40623526ed22311eac16c937450031e5 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Tue, 17 May 2022 22:31:30 +0800 Subject: [PATCH 268/535] [SPARK-39208][SQL] Fix query context bugs in decimal overflow under codegen mode ### What changes were proposed in this pull request? 1. Fix logical bugs in adding query contexts as references under codegen mode. https://github.com/apache/spark/pull/36040/files#diff-4a70d2f3a4b99f58796b87192143f9838f4c4cf469f3313eb30af79c4e07153aR145 The code ``` val errorContextCode = if (nullOnOverflow) { ctx.addReferenceObj("errCtx", queryContext) } else { "\"\"" } ``` should be ``` val errorContextCode = if (nullOnOverflow) { "\"\"" } else { ctx.addReferenceObj("errCtx", queryContext) } ``` 2. Similar to https://github.com/apache/spark/pull/36557, make `CheckOverflowInSum` support query context when WSCG is not available. ### Why are the changes needed? Bugfix and enhancement in the query context of decimal expressions. ### Does this PR introduce _any_ user-facing change? No, the query context is not released yet. ### How was this patch tested? New UT Closes #36577 from gengliangwang/fixDecimalSumOverflow. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit 191e535b975e5813719d3143797c9fcf86321368) Signed-off-by: Gengliang Wang --- .../catalyst/expressions/aggregate/Sum.scala | 21 ++++++++++++------- .../expressions/decimalExpressions.scala | 15 ++++++------- .../expressions/DecimalExpressionSuite.scala | 19 +++++++++++++++++ .../org/apache/spark/sql/SQLQuerySuite.scala | 19 ++++++++++------- 4 files changed, 52 insertions(+), 22 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala index f2c6925b837e9..fa43565d80726 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala @@ -143,10 +143,11 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate * So now, if ansi is enabled, then throw exception, if not then return null. * If sum is not null, then return the sum. */ - protected def getEvaluateExpression: Expression = resultType match { + protected def getEvaluateExpression(queryContext: String): Expression = resultType match { case d: DecimalType => - If(isEmpty, Literal.create(null, resultType), - CheckOverflowInSum(sum, d, !useAnsiAdd)) + val checkOverflowInSum = + CheckOverflowInSum(sum, d, !useAnsiAdd, queryContext) + If(isEmpty, Literal.create(null, resultType), checkOverflowInSum) case _ if shouldTrackIsEmpty => If(isEmpty, Literal.create(null, resultType), sum) case _ => sum @@ -172,7 +173,7 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate case class Sum( child: Expression, useAnsiAdd: Boolean = SQLConf.get.ansiEnabled) - extends SumBase(child) { + extends SumBase(child) with SupportQueryContext { def this(child: Expression) = this(child, useAnsiAdd = SQLConf.get.ansiEnabled) override def shouldTrackIsEmpty: Boolean = resultType match { @@ -186,7 +187,13 @@ case class Sum( override lazy val mergeExpressions: Seq[Expression] = getMergeExpressions - override lazy val evaluateExpression: Expression = getEvaluateExpression + override lazy val evaluateExpression: Expression = getEvaluateExpression(queryContext) + + override def initQueryContext(): String = if (useAnsiAdd) { + origin.context + } else { + "" + } } // scalastyle:off line.size.limit @@ -243,9 +250,9 @@ case class TrySum(child: Expression) extends SumBase(child) { override lazy val evaluateExpression: Expression = if (useAnsiAdd) { - TryEval(getEvaluateExpression) + TryEval(getEvaluateExpression("")) } else { - getEvaluateExpression + getEvaluateExpression("") } override protected def withNewChildInternal(newChild: Expression): Expression = diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala index 2cdd784ea4da4..7d25df5ae9cb7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala @@ -142,9 +142,9 @@ case class CheckOverflow( override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val errorContextCode = if (nullOnOverflow) { - ctx.addReferenceObj("errCtx", queryContext) - } else { "\"\"" + } else { + ctx.addReferenceObj("errCtx", queryContext) } nullSafeCodeGen(ctx, ev, eval => { // scalastyle:off line.size.limit @@ -175,7 +175,8 @@ case class CheckOverflow( case class CheckOverflowInSum( child: Expression, dataType: DecimalType, - nullOnOverflow: Boolean) extends UnaryExpression { + nullOnOverflow: Boolean, + queryContext: String = "") extends UnaryExpression { override def nullable: Boolean = true @@ -183,23 +184,23 @@ case class CheckOverflowInSum( val value = child.eval(input) if (value == null) { if (nullOnOverflow) null - else throw QueryExecutionErrors.overflowInSumOfDecimalError(origin.context) + else throw QueryExecutionErrors.overflowInSumOfDecimalError(queryContext) } else { value.asInstanceOf[Decimal].toPrecision( dataType.precision, dataType.scale, Decimal.ROUND_HALF_UP, nullOnOverflow, - origin.context) + queryContext) } } override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val childGen = child.genCode(ctx) val errorContextCode = if (nullOnOverflow) { - ctx.addReferenceObj("errCtx", origin.context) - } else { "\"\"" + } else { + ctx.addReferenceObj("errCtx", queryContext) } val nullHandling = if (nullOnOverflow) { "" diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DecimalExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DecimalExpressionSuite.scala index 36bc3db580400..1a8cd63aed097 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DecimalExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DecimalExpressionSuite.scala @@ -18,6 +18,8 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin +import org.apache.spark.sql.catalyst.trees.Origin import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{Decimal, DecimalType, LongType} @@ -83,4 +85,21 @@ class DecimalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(CheckOverflow( Literal.create(null, DecimalType(2, 1)), DecimalType(3, 2), false), null) } + + test("SPARK-39208: CheckOverflow & CheckOverflowInSum support query context in runtime errors") { + val d = Decimal(101, 3, 1) + val query = "select cast(d as decimal(4, 3)) from t" + val origin = Origin( + startIndex = Some(7), + stopIndex = Some(30), + sqlText = Some(query)) + + val expr1 = withOrigin(origin) { + CheckOverflow(Literal(d), DecimalType(4, 3), false) + } + checkExceptionInExpression[ArithmeticException](expr1, query) + + val expr2 = CheckOverflowInSum(Literal(d), DecimalType(4, 3), false, queryContext = query) + checkExceptionInExpression[ArithmeticException](expr2, query) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index f6998fe5c1c55..422ba7c2a9e2d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4423,18 +4423,21 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } - test("SPARK-39190: Query context of decimal overflow error should be serialized to executors" + - " when WSCG is off") { + test("SPARK-39190, SPARK-39208: Query context of decimal overflow error should be serialized " + + "to executors when WSCG is off") { withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", SQLConf.ANSI_ENABLED.key -> "true") { withTable("t") { sql("create table t(d decimal(38, 0)) using parquet") - sql("insert into t values (2e37BD)") - val query = "select d / 0.1 from t" - val msg = intercept[SparkException] { - sql(query).collect() - }.getMessage - assert(msg.contains(query)) + sql("insert into t values (6e37BD),(6e37BD)") + Seq( + "select d / 0.1 from t", + "select sum(d) from t").foreach { query => + val msg = intercept[SparkException] { + sql(query).collect() + }.getMessage + assert(msg.contains(query)) + } } } } From e52b0487583314ae159dab3496be3c28df3e56b7 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Tue, 17 May 2022 18:26:55 -0500 Subject: [PATCH 269/535] [SPARK-39104][SQL] InMemoryRelation#isCachedColumnBuffersLoaded should be thread-safe ### What changes were proposed in this pull request? Add `synchronized` on method `isCachedColumnBuffersLoaded` ### Why are the changes needed? `isCachedColumnBuffersLoaded` should has `synchronized` wrapped, otherwise may cause NPE when modify `_cachedColumnBuffers` concurrently. ``` def isCachedColumnBuffersLoaded: Boolean = { _cachedColumnBuffers != null && isCachedRDDLoaded } def isCachedRDDLoaded: Boolean = { _cachedColumnBuffersAreLoaded || { val bmMaster = SparkEnv.get.blockManager.master val rddLoaded = _cachedColumnBuffers.partitions.forall { partition => bmMaster.getBlockStatus(RDDBlockId(_cachedColumnBuffers.id, partition.index), false) .exists { case(_, blockStatus) => blockStatus.isCached } } if (rddLoaded) { _cachedColumnBuffersAreLoaded = rddLoaded } rddLoaded } } ``` ``` java.lang.NullPointerException at org.apache.spark.sql.execution.columnar.CachedRDDBuilder.isCachedRDDLoaded(InMemoryRelation.scala:247) at org.apache.spark.sql.execution.columnar.CachedRDDBuilder.isCachedColumnBuffersLoaded(InMemoryRelation.scala:241) at org.apache.spark.sql.execution.CacheManager.$anonfun$uncacheQuery$8(CacheManager.scala:189) at org.apache.spark.sql.execution.CacheManager.$anonfun$uncacheQuery$8$adapted(CacheManager.scala:176) at scala.collection.TraversableLike.$anonfun$filterImpl$1(TraversableLike.scala:304) at scala.collection.Iterator.foreach(Iterator.scala:943) at scala.collection.Iterator.foreach$(Iterator.scala:943) at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) at scala.collection.IterableLike.foreach(IterableLike.scala:74) at scala.collection.IterableLike.foreach$(IterableLike.scala:73) at scala.collection.AbstractIterable.foreach(Iterable.scala:56) at scala.collection.TraversableLike.filterImpl(TraversableLike.scala:303) at scala.collection.TraversableLike.filterImpl$(TraversableLike.scala:297) at scala.collection.AbstractTraversable.filterImpl(Traversable.scala:108) at scala.collection.TraversableLike.filter(TraversableLike.scala:395) at scala.collection.TraversableLike.filter$(TraversableLike.scala:395) at scala.collection.AbstractTraversable.filter(Traversable.scala:108) at org.apache.spark.sql.execution.CacheManager.recacheByCondition(CacheManager.scala:219) at org.apache.spark.sql.execution.CacheManager.uncacheQuery(CacheManager.scala:176) at org.apache.spark.sql.Dataset.unpersist(Dataset.scala:3220) at org.apache.spark.sql.Dataset.unpersist(Dataset.scala:3231) ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing UT. Closes #36496 from pan3793/SPARK-39104. Authored-by: Cheng Pan Signed-off-by: Sean Owen (cherry picked from commit 3c8d8d7a864281fbe080316ad8de9b8eac80fa71) Signed-off-by: Sean Owen --- .../execution/columnar/InMemoryRelation.scala | 9 +++- .../columnar/InMemoryColumnarQuerySuite.scala | 53 +++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala index 89323e7d1a429..0ace24777b7cd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala @@ -238,10 +238,15 @@ case class CachedRDDBuilder( } def isCachedColumnBuffersLoaded: Boolean = { - _cachedColumnBuffers != null && isCachedRDDLoaded + if (_cachedColumnBuffers != null) { + synchronized { + return _cachedColumnBuffers != null && isCachedRDDLoaded + } + } + false } - def isCachedRDDLoaded: Boolean = { + private def isCachedRDDLoaded: Boolean = { _cachedColumnBuffersAreLoaded || { val bmMaster = SparkEnv.get.blockManager.master val rddLoaded = _cachedColumnBuffers.partitions.forall { partition => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala index 120ddf469f4a0..779aa49a34431 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.columnar import java.nio.charset.StandardCharsets import java.sql.{Date, Timestamp} +import java.util.concurrent.atomic.AtomicInteger import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, QueryTest, Row} @@ -563,4 +564,56 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSparkSession { } } } + + test("SPARK-39104: InMemoryRelation#isCachedColumnBuffersLoaded should be thread-safe") { + val plan = spark.range(1).queryExecution.executedPlan + val serializer = new TestCachedBatchSerializer(true, 1) + val cachedRDDBuilder = CachedRDDBuilder(serializer, MEMORY_ONLY, plan, None) + + @volatile var isCachedColumnBuffersLoaded = false + @volatile var stopped = false + + val th1 = new Thread { + override def run(): Unit = { + while (!isCachedColumnBuffersLoaded && !stopped) { + cachedRDDBuilder.cachedColumnBuffers + cachedRDDBuilder.clearCache() + } + } + } + + val th2 = new Thread { + override def run(): Unit = { + while (!isCachedColumnBuffersLoaded && !stopped) { + isCachedColumnBuffersLoaded = cachedRDDBuilder.isCachedColumnBuffersLoaded + } + } + } + + val th3 = new Thread { + override def run(): Unit = { + Thread.sleep(3000L) + stopped = true + } + } + + val exceptionCnt = new AtomicInteger + val exceptionHandler: Thread.UncaughtExceptionHandler = (_: Thread, cause: Throwable) => { + exceptionCnt.incrementAndGet + fail(cause) + } + + th1.setUncaughtExceptionHandler(exceptionHandler) + th2.setUncaughtExceptionHandler(exceptionHandler) + th1.start() + th2.start() + th3.start() + th1.join() + th2.join() + th3.join() + + cachedRDDBuilder.clearCache() + + assert(exceptionCnt.get == 0) + } } From 0e998d31234f08be956c5bd2dec0b086952c2e18 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Wed, 18 May 2022 10:59:52 +0800 Subject: [PATCH 270/535] [SPARK-39193][SQL] Fasten Timestamp type inference of JSON/CSV data sources ### What changes were proposed in this pull request? When reading JSON/CSV files with inferring timestamp types (`.option("inferTimestamp", true)`), the Timestamp conversion will throw and catch exceptions. As we are putting decent error messages in the exception: ``` def cannotCastToDateTimeError( value: Any, from: DataType, to: DataType, errorContext: String): Throwable = { val valueString = toSQLValue(value, from) new SparkDateTimeException("INVALID_SYNTAX_FOR_CAST", Array(toSQLType(to), valueString, SQLConf.ANSI_ENABLED.key, errorContext)) } ``` Throwing and catching the timestamp parsing exceptions is actually not cheap. It consumes more than 90% of the type inference time. This PR improves the default timestamp parsing by returning optional results instead of throwing/catching the exceptions. With this PR, the schema inference time is reduced by 90% in a local benchmark. Note this PR is for the default timestamp parser. It doesn't cover the scenarios of * users provide a customized timestamp format via option * users enable legacy timestamp formatter We can have follow-ups for it. ### Why are the changes needed? Performance improvement ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UT Also manual test the runtime to inferring a JSON file of 624MB with inferring timestamp enabled: ``` spark.read.option("inferTimestamp", true).json(file) ``` Before the change, it takes 166 seconds After the change, it only 16 seconds. Closes #36562 from gengliangwang/improveInferTS. Lead-authored-by: Gengliang Wang Co-authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit 888bf1b2ef44a27c3d4be716a72175bbaa8c6453) Signed-off-by: Gengliang Wang --- .../sql/catalyst/csv/CSVInferSchema.scala | 4 +- .../sql/catalyst/json/JsonInferSchema.scala | 4 +- .../catalyst/util/TimestampFormatter.scala | 51 +++++++++++++++++++ .../util/TimestampFormatterSuite.scala | 15 ++++++ 4 files changed, 70 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala index f30fa8a0b5f95..8b0c6c49b8551 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala @@ -178,7 +178,7 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable { // We can only parse the value as TimestampNTZType if it does not have zone-offset or // time-zone component and can be parsed with the timestamp formatter. // Otherwise, it is likely to be a timestamp with timezone. - if ((allCatch opt timestampNTZFormatter.parseWithoutTimeZone(field, false)).isDefined) { + if (timestampNTZFormatter.parseWithoutTimeZoneOptional(field, false).isDefined) { SQLConf.get.timestampType } else { tryParseTimestamp(field) @@ -187,7 +187,7 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable { private def tryParseTimestamp(field: String): DataType = { // This case infers a custom `dataFormat` is set. - if ((allCatch opt timestampParser.parse(field)).isDefined) { + if (timestampParser.parseOptional(field).isDefined) { TimestampType } else { tryParseBoolean(field) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala index d08773d846960..f6064bd7195b6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala @@ -151,10 +151,10 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable { if (options.prefersDecimal && decimalTry.isDefined) { decimalTry.get } else if (options.inferTimestamp && - (allCatch opt timestampNTZFormatter.parseWithoutTimeZone(field, false)).isDefined) { + timestampNTZFormatter.parseWithoutTimeZoneOptional(field, false).isDefined) { SQLConf.get.timestampType } else if (options.inferTimestamp && - (allCatch opt timestampFormatter.parse(field)).isDefined) { + timestampFormatter.parseOptional(field).isDefined) { TimestampType } else { StringType diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala index 7502e0a463bbd..8ebe77978b57c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala @@ -52,6 +52,25 @@ sealed trait TimestampFormatter extends Serializable { @throws(classOf[DateTimeException]) def parse(s: String): Long + /** + * Parses a timestamp in a string and converts it to an optional number of microseconds. + * + * @param s - string with timestamp to parse + * @return An optional number of microseconds since epoch. The result is None on invalid input. + * @throws ParseException can be thrown by legacy parser + * @throws DateTimeParseException can be thrown by new parser + * @throws DateTimeException unable to obtain local date or time + */ + @throws(classOf[ParseException]) + @throws(classOf[DateTimeParseException]) + @throws(classOf[DateTimeException]) + def parseOptional(s: String): Option[Long] = + try { + Some(parse(s)) + } catch { + case _: Exception => None + } + /** * Parses a timestamp in a string and converts it to microseconds since Unix Epoch in local time. * @@ -73,6 +92,30 @@ sealed trait TimestampFormatter extends Serializable { s"The method `parseWithoutTimeZone(s: String, allowTimeZone: Boolean)` should be " + "implemented in the formatter of timestamp without time zone") + /** + * Parses a timestamp in a string and converts it to an optional number of microseconds since + * Unix Epoch in local time. + * + * @param s - string with timestamp to parse + * @param allowTimeZone - indicates strict parsing of timezone + * @return An optional number of microseconds since epoch. The result is None on invalid input. + * @throws ParseException can be thrown by legacy parser + * @throws DateTimeParseException can be thrown by new parser + * @throws DateTimeException unable to obtain local date or time + * @throws IllegalStateException The formatter for timestamp without time zone should always + * implement this method. The exception should never be hit. + */ + @throws(classOf[ParseException]) + @throws(classOf[DateTimeParseException]) + @throws(classOf[DateTimeException]) + @throws(classOf[IllegalStateException]) + def parseWithoutTimeZoneOptional(s: String, allowTimeZone: Boolean): Option[Long] = + try { + Some(parseWithoutTimeZone(s, allowTimeZone)) + } catch { + case _: Exception => None + } + /** * Parses a timestamp in a string and converts it to microseconds since Unix Epoch in local time. * Zone-id and zone-offset components are ignored. @@ -204,6 +247,9 @@ class DefaultTimestampFormatter( } catch checkParsedDiff(s, legacyFormatter.parse) } + override def parseOptional(s: String): Option[Long] = + DateTimeUtils.stringToTimestamp(UTF8String.fromString(s), zoneId) + override def parseWithoutTimeZone(s: String, allowTimeZone: Boolean): Long = { try { val utf8Value = UTF8String.fromString(s) @@ -213,6 +259,11 @@ class DefaultTimestampFormatter( } } catch checkParsedDiff(s, legacyFormatter.parse) } + + override def parseWithoutTimeZoneOptional(s: String, allowTimeZone: Boolean): Option[Long] = { + val utf8Value = UTF8String.fromString(s) + DateTimeUtils.stringToTimestampWithoutTimeZone(utf8Value, allowTimeZone) + } } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index c812f8b9b73a0..e3d7c972baf2c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -456,4 +456,19 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { assert(errMsg.contains("""Invalid input syntax for type "TIMESTAMP": 'x123'""")) } } + + test("SPARK-39193: support returning optional parse results in the default formatter") { + val formatter = new DefaultTimestampFormatter( + DateTimeTestUtils.LA, + locale = DateFormatter.defaultLocale, + legacyFormat = LegacyDateFormats.SIMPLE_DATE_FORMAT, + isParsing = true) + assert(formatter.parseOptional("2021-01-01T00:00:00").contains(1609488000000000L)) + assert( + formatter.parseWithoutTimeZoneOptional("2021-01-01T00:00:00", false) + .contains(1609459200000000L)) + assert(formatter.parseOptional("abc").isEmpty) + assert( + formatter.parseWithoutTimeZoneOptional("abc", false).isEmpty) + } } From 72eb58ae224efb0f5bd3912073ff133116c0d05e Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 18 May 2022 18:21:09 +0900 Subject: [PATCH 271/535] [SPARK-39215][PYTHON] Reduce Py4J calls in pyspark.sql.utils.is_timestamp_ntz_preferred ### What changes were proposed in this pull request? This PR proposes to reduce the number of Py4J calls at `pyspark.sql.utils.is_timestamp_ntz_preferred` by having a single method to check. ### Why are the changes needed? For better performance, and simplicity in the code. ### Does this PR introduce _any_ user-facing change? Yes, the number of Py4J calls will be reduced, and the driver side access will become faster. ### How was this patch tested? Existing tests should cover. Closes #36587 from HyukjinKwon/SPARK-39215. Lead-authored-by: Hyukjin Kwon Co-authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 28e7764bbe6949b2a68ef1466e210ca6418a3018) Signed-off-by: Hyukjin Kwon --- python/pyspark/sql/utils.py | 9 +-------- .../org/apache/spark/sql/api/python/PythonSQLUtils.scala | 3 +++ 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py index b3219b8b9be4e..e4a0299164e26 100644 --- a/python/pyspark/sql/utils.py +++ b/python/pyspark/sql/utils.py @@ -293,11 +293,4 @@ def is_timestamp_ntz_preferred() -> bool: Return a bool if TimestampNTZType is preferred according to the SQL configuration set. """ jvm = SparkContext._jvm - return ( - jvm is not None - and getattr(getattr(jvm.org.apache.spark.sql.internal, "SQLConf$"), "MODULE$") - .get() - .timestampType() - .typeName() - == "timestamp_ntz" - ) + return jvm is not None and jvm.PythonSQLUtils.isTimestampNTZPreferred() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala index ab43aa49944c8..ce295655badae 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala @@ -55,6 +55,9 @@ private[sql] object PythonSQLUtils extends Logging { listAllSQLConfigs().filter(p => SQLConf.isStaticConfigKey(p._1)).toArray } + def isTimestampNTZPreferred: Boolean = + SQLConf.get.timestampType == org.apache.spark.sql.types.TimestampNTZType + /** * Python callable function to read a file in Arrow stream format and create a [[RDD]] * using each serialized ArrowRecordBatch as a partition. From ec6fc7419571114cfda94bfa15d4a40712b53fea Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Wed, 18 May 2022 18:52:15 +0800 Subject: [PATCH 272/535] [SPARK-39210][SQL] Provide query context of Decimal overflow in AVG when WSCG is off ### What changes were proposed in this pull request? Similar to https://github.com/apache/spark/pull/36525, this PR provides runtime error query context for the Average expression when WSCG is off. ### Why are the changes needed? Enhance the runtime error query context of Average function. After changes, it works when the whole stage codegen is not available. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? New UT Closes #36582 from gengliangwang/fixAvgContext. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit 8b5b3e95f8761af97255cbcba35c3d836a419dba) Signed-off-by: Gengliang Wang --- .../catalyst/expressions/aggregate/Average.scala | 16 +++++++++++----- .../org/apache/spark/sql/SQLQuerySuite.scala | 7 ++++--- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala index 14914576091be..343e27d863bb8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala @@ -81,11 +81,11 @@ abstract class AverageBase // If all input are nulls, count will be 0 and we will get null after the division. // We can't directly use `/` as it throws an exception under ansi mode. - protected def getEvaluateExpression = child.dataType match { + protected def getEvaluateExpression(queryContext: String) = child.dataType match { case _: DecimalType => DecimalPrecision.decimalAndDecimal()( Divide( - CheckOverflowInSum(sum, sumDataType.asInstanceOf[DecimalType], !useAnsiAdd), + CheckOverflowInSum(sum, sumDataType.asInstanceOf[DecimalType], !useAnsiAdd, queryContext), count.cast(DecimalType.LongDecimal), failOnError = false)).cast(resultType) case _: YearMonthIntervalType => If(EqualTo(count, Literal(0L)), @@ -123,7 +123,7 @@ abstract class AverageBase since = "1.0.0") case class Average( child: Expression, - useAnsiAdd: Boolean = SQLConf.get.ansiEnabled) extends AverageBase { + useAnsiAdd: Boolean = SQLConf.get.ansiEnabled) extends AverageBase with SupportQueryContext { def this(child: Expression) = this(child, useAnsiAdd = SQLConf.get.ansiEnabled) override protected def withNewChildInternal(newChild: Expression): Average = @@ -133,7 +133,13 @@ case class Average( override lazy val mergeExpressions: Seq[Expression] = getMergeExpressions - override lazy val evaluateExpression: Expression = getEvaluateExpression + override lazy val evaluateExpression: Expression = getEvaluateExpression(queryContext) + + override def initQueryContext(): String = if (useAnsiAdd) { + origin.context + } else { + "" + } } // scalastyle:off line.size.limit @@ -192,7 +198,7 @@ case class TryAverage(child: Expression) extends AverageBase { } override lazy val evaluateExpression: Expression = { - addTryEvalIfNeeded(getEvaluateExpression) + addTryEvalIfNeeded(getEvaluateExpression("")) } override protected def withNewChildInternal(newChild: Expression): Expression = diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 422ba7c2a9e2d..919fe88ec4b32 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4423,8 +4423,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } - test("SPARK-39190, SPARK-39208: Query context of decimal overflow error should be serialized " + - "to executors when WSCG is off") { + test("SPARK-39190,SPARK-39208,SPARK-39210: Query context of decimal overflow error should " + + "be serialized to executors when WSCG is off") { withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false", SQLConf.ANSI_ENABLED.key -> "true") { withTable("t") { @@ -4432,7 +4432,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark sql("insert into t values (6e37BD),(6e37BD)") Seq( "select d / 0.1 from t", - "select sum(d) from t").foreach { query => + "select sum(d) from t", + "select avg(d) from t").foreach { query => val msg = intercept[SparkException] { sql(query).collect() }.getMessage From b5ce32f41f9e4aecb02cc383184ac0a6dfabc4dd Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Wed, 18 May 2022 20:29:01 +0800 Subject: [PATCH 273/535] [SPARK-39162][SQL][3.3] Jdbc dialect should decide which function could be pushed down ### What changes were proposed in this pull request? This PR used to back port https://github.com/apache/spark/pull/36521 to 3.3 ### Why are the changes needed? Let function push-down more flexible. ### Does this PR introduce _any_ user-facing change? 'No'. New feature. ### How was this patch tested? Exists tests. Closes #36556 from beliefer/SPARK-39162_3.3. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan --- .../sql/errors/QueryCompilationErrors.scala | 4 --- .../org/apache/spark/sql/jdbc/H2Dialect.scala | 28 +++---------------- .../apache/spark/sql/jdbc/JdbcDialects.scala | 19 +++++++++++++ 3 files changed, 23 insertions(+), 28 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 8925d4f5317ee..2d2dba63e3a59 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -2411,8 +2411,4 @@ object QueryCompilationErrors extends QueryErrorsBase { new AnalysisException( "Sinks cannot request distribution and ordering in continuous execution mode") } - - def noSuchFunctionError(database: String, funcInfo: String): Throwable = { - new AnalysisException(s"$database does not support function: $funcInfo") - } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala index 56cadbe8e2c07..4a88203ec59c9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala @@ -20,13 +20,9 @@ package org.apache.spark.sql.jdbc import java.sql.{SQLException, Types} import java.util.Locale -import scala.util.control.NonFatal - import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException} -import org.apache.spark.sql.connector.expressions.Expression import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc} -import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils import org.apache.spark.sql.types.{BooleanType, ByteType, DataType, DecimalType, ShortType, StringType} @@ -34,27 +30,11 @@ private object H2Dialect extends JdbcDialect { override def canHandle(url: String): Boolean = url.toLowerCase(Locale.ROOT).startsWith("jdbc:h2") - class H2SQLBuilder extends JDBCSQLBuilder { - override def visitSQLFunction(funcName: String, inputs: Array[String]): String = { - funcName match { - case "WIDTH_BUCKET" => - val functionInfo = super.visitSQLFunction(funcName, inputs) - throw QueryCompilationErrors.noSuchFunctionError("H2", functionInfo) - case _ => super.visitSQLFunction(funcName, inputs) - } - } - } + private val supportedFunctions = + Set("ABS", "COALESCE", "LN", "EXP", "POWER", "SQRT", "FLOOR", "CEIL") - override def compileExpression(expr: Expression): Option[String] = { - val h2SQLBuilder = new H2SQLBuilder() - try { - Some(h2SQLBuilder.build(expr)) - } catch { - case NonFatal(e) => - logWarning("Error occurs while compiling V2 expression", e) - None - } - } + override def isSupportedFunction(funcName: String): Boolean = + supportedFunctions.contains(funcName) override def compileAggregate(aggFunction: AggregateFunc): Option[String] = { super.compileAggregate(aggFunction).orElse( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index 0ef23073a27a5..e1883e4e7f4b8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -240,8 +240,27 @@ abstract class JdbcDialect extends Serializable with Logging{ getJDBCType(dataType).map(_.databaseTypeDefinition).getOrElse(dataType.typeName) s"CAST($l AS $databaseTypeDefinition)" } + + override def visitSQLFunction(funcName: String, inputs: Array[String]): String = { + if (isSupportedFunction(funcName)) { + s"""$funcName(${inputs.mkString(", ")})""" + } else { + // The framework will catch the error and give up the push-down. + // Please see `JdbcDialect.compileExpression(expr: Expression)` for more details. + throw new UnsupportedOperationException( + s"${this.getClass.getSimpleName} does not support function: $funcName") + } + } } + /** + * Returns whether the database supports function. + * @param funcName Upper-cased function name + * @return True if the database supports function. + */ + @Since("3.3.0") + def isSupportedFunction(funcName: String): Boolean = false + /** * Converts V2 expression to String representing a SQL expression. * @param expr The V2 expression to be converted. From 47c47b6e8641876f1336d3d5fbac01e47d931d43 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 18 May 2022 21:39:53 +0300 Subject: [PATCH 274/535] [SPARK-39214][SQL][3.3] Improve errors related to CAST ### What changes were proposed in this pull request? In the PR, I propose to rename the error classes: 1. INVALID_SYNTAX_FOR_CAST -> CAST_INVALID_INPUT 2. CAST_CAUSES_OVERFLOW -> CAST_OVERFLOW and change error messages: CAST_INVALID_INPUT: `The value of the type cannot be cast to because it is malformed. ...` CAST_OVERFLOW: `The value of the type cannot be cast to due to an overflow....` Also quote the SQL config `"spark.sql.ansi.enabled"` and a function name. This is a backport of https://github.com/apache/spark/pull/36553. ### Why are the changes needed? To improve user experience with Spark SQL by making errors/error classes related to CAST more clear and **unified**. ### Does this PR introduce _any_ user-facing change? Yes, the PR changes user-facing error messages. ### How was this patch tested? By running the modified test suites: ``` $ build/sbt "testOnly *CastSuite" $ build/sbt "test:testOnly *DateFormatterSuite" $ build/sbt "test:testOnly *TimestampFormatterSuite" $ build/sbt "testOnly *DSV2SQLInsertTestSuite" $ build/sbt "test:testOnly *InsertSuite" $ build/sbt "testOnly *AnsiCastSuiteWithAnsiModeOff" ``` Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 66648e96e4283223003c80a1a997325bbd27f940) Signed-off-by: Max Gekk Closes #36591 from MaxGekk/error-class-improve-msg-2-3.3. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../main/resources/error/error-classes.json | 12 +- .../spark/sql/catalyst/expressions/Cast.scala | 8 +- .../sql/catalyst/util/DateTimeUtils.scala | 8 +- .../sql/catalyst/util/UTF8StringUtils.scala | 2 +- .../sql/errors/QueryExecutionErrors.scala | 62 +++++++---- .../org/apache/spark/sql/types/Decimal.scala | 2 +- .../expressions/AnsiCastSuiteBase.scala | 104 +++++++++++------- .../sql/catalyst/expressions/CastSuite.scala | 70 ++++++------ .../catalyst/util/DateFormatterSuite.scala | 2 +- .../util/TimestampFormatterSuite.scala | 3 +- .../apache/spark/sql/types/DecimalSuite.scala | 4 +- .../sql-tests/results/ansi/cast.sql.out | 82 +++++++------- .../sql-tests/results/ansi/date.sql.out | 8 +- .../ansi/datetime-parsing-invalid.sql.out | 8 +- .../sql-tests/results/ansi/interval.sql.out | 28 ++--- .../results/ansi/string-functions.sql.out | 8 +- .../results/postgreSQL/boolean.sql.out | 62 +++++------ .../results/postgreSQL/float4.sql.out | 14 +-- .../results/postgreSQL/float8.sql.out | 10 +- .../sql-tests/results/postgreSQL/int8.sql.out | 8 +- .../sql-tests/results/postgreSQL/text.sql.out | 4 +- .../results/postgreSQL/window_part2.sql.out | 2 +- .../results/postgreSQL/window_part3.sql.out | 2 +- .../results/postgreSQL/window_part4.sql.out | 2 +- .../timestampNTZ/timestamp-ansi.sql.out | 4 +- .../apache/spark/sql/SQLInsertTestSuite.scala | 3 +- .../spark/sql/sources/InsertSuite.scala | 12 +- 27 files changed, 294 insertions(+), 240 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 78934667ac060..7fef9e563c247 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -25,8 +25,12 @@ "CANNOT_USE_MIXTURE" : { "message" : [ "Cannot use a mixture of aggregate function and group aggregate pandas UDF" ] }, - "CAST_CAUSES_OVERFLOW" : { - "message" : [ "Casting to causes overflow. To return NULL instead, use 'try_cast'. If necessary set to false to bypass this error." ], + "CAST_INVALID_INPUT" : { + "message" : [ "The value of the type cannot be cast to because it is malformed. To return NULL instead, use `try_cast`. If necessary set to false to bypass this error.
        " ], + "sqlState" : "42000" + }, + "CAST_OVERFLOW" : { + "message" : [ "The value of the type cannot be cast to due to an overflow. To return NULL instead, use `try_cast`. If necessary set to false to bypass this error." ], "sqlState" : "22005" }, "CONCURRENT_QUERY" : { @@ -113,10 +117,6 @@ "message" : [ "Invalid SQL syntax: " ], "sqlState" : "42000" }, - "INVALID_SYNTAX_FOR_CAST" : { - "message" : [ "Invalid input syntax for type : . To return NULL instead, use 'try_cast'. If necessary set to false to bypass this error.
        " ], - "sqlState" : "42000" - }, "MAP_KEY_DOES_NOT_EXIST" : { "message" : [ "Key does not exist. To return NULL instead, use 'try_element_at'. If necessary set to false to bypass this error.
        " ] }, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 17d571a70f2f2..4b7c7b479d4cc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -854,7 +854,7 @@ abstract class CastBase extends UnaryExpression case _: NumberFormatException => val d = Cast.processFloatingPointSpecialLiterals(doubleStr, false) if(ansiEnabled && d == null) { - throw QueryExecutionErrors.invalidInputSyntaxForNumericError( + throw QueryExecutionErrors.invalidInputInCastToNumberError( DoubleType, s, queryContext) } else { d @@ -880,7 +880,7 @@ abstract class CastBase extends UnaryExpression case _: NumberFormatException => val f = Cast.processFloatingPointSpecialLiterals(floatStr, true) if (ansiEnabled && f == null) { - throw QueryExecutionErrors.invalidInputSyntaxForNumericError( + throw QueryExecutionErrors.invalidInputInCastToNumberError( FloatType, s, queryContext) } else { f @@ -1917,7 +1917,7 @@ abstract class CastBase extends UnaryExpression (c, evPrim, evNull) => val handleNull = if (ansiEnabled) { val errorContext = ctx.addReferenceObj("errCtx", queryContext) - s"throw QueryExecutionErrors.invalidInputSyntaxForNumericError(" + + s"throw QueryExecutionErrors.invalidInputInCastToNumberError(" + s"org.apache.spark.sql.types.FloatType$$.MODULE$$,$c, $errorContext);" } else { s"$evNull = true;" @@ -1955,7 +1955,7 @@ abstract class CastBase extends UnaryExpression (c, evPrim, evNull) => val handleNull = if (ansiEnabled) { val errorContext = ctx.addReferenceObj("errCtx", queryContext) - s"throw QueryExecutionErrors.invalidInputSyntaxForNumericError(" + + s"throw QueryExecutionErrors.invalidInputInCastToNumberError(" + s"org.apache.spark.sql.types.DoubleType$$.MODULE$$, $c, $errorContext);" } else { s"$evNull = true;" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 3a844437fd337..cc61491dc95d7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -450,14 +450,14 @@ object DateTimeUtils { def stringToTimestampAnsi(s: UTF8String, timeZoneId: ZoneId, errorContext: String = ""): Long = { stringToTimestamp(s, timeZoneId).getOrElse { - throw QueryExecutionErrors.cannotCastToDateTimeError( + throw QueryExecutionErrors.invalidInputInCastToDatetimeError( s, StringType, TimestampType, errorContext) } } def doubleToTimestampAnsi(d: Double, errorContext: String): Long = { if (d.isNaN || d.isInfinite) { - throw QueryExecutionErrors.cannotCastToDateTimeError( + throw QueryExecutionErrors.invalidInputInCastToDatetimeError( d, DoubleType, TimestampType, errorContext) } else { DoubleExactNumeric.toLong(d * MICROS_PER_SECOND) @@ -507,7 +507,7 @@ object DateTimeUtils { def stringToTimestampWithoutTimeZoneAnsi(s: UTF8String, errorContext: String): Long = { stringToTimestampWithoutTimeZone(s, true).getOrElse { - throw QueryExecutionErrors.cannotCastToDateTimeError( + throw QueryExecutionErrors.invalidInputInCastToDatetimeError( s, StringType, TimestampNTZType, errorContext) } } @@ -626,7 +626,7 @@ object DateTimeUtils { def stringToDateAnsi(s: UTF8String, errorContext: String = ""): Int = { stringToDate(s).getOrElse { - throw QueryExecutionErrors.cannotCastToDateTimeError( + throw QueryExecutionErrors.invalidInputInCastToDatetimeError( s, StringType, DateType, errorContext) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala index c01fcbe6ca2bb..d4aac3e88dfd1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala @@ -43,7 +43,7 @@ object UTF8StringUtils { f } catch { case e: NumberFormatException => - throw QueryExecutionErrors.invalidInputSyntaxForNumericError(to, s, errorContext) + throw QueryExecutionErrors.invalidInputInCastToNumberError(to, s, errorContext) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index cf87094ad27ba..c350a6b28ba5e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -84,9 +84,13 @@ object QueryExecutionErrors extends QueryErrorsBase { } def castingCauseOverflowError(t: Any, from: DataType, to: DataType): ArithmeticException = { - new SparkArithmeticException(errorClass = "CAST_CAUSES_OVERFLOW", + new SparkArithmeticException( + errorClass = "CAST_OVERFLOW", messageParameters = Array( - toSQLValue(t, from), toSQLType(to), toSQLConf(SQLConf.ANSI_ENABLED.key))) + toSQLValue(t, from), + toSQLType(from), + toSQLType(to), + toSQLConf(SQLConf.ANSI_ENABLED.key))) } def cannotChangeDecimalPrecisionError( @@ -104,20 +108,46 @@ object QueryExecutionErrors extends QueryErrorsBase { context)) } - def invalidInputSyntaxForNumericError( - e: NumberFormatException, - errorContext: String): NumberFormatException = { - new NumberFormatException(s"${e.getMessage}. To return NULL instead, use 'try_cast'. " + - s"If necessary set ${SQLConf.ANSI_ENABLED.key} to false to bypass this error." + errorContext) + def invalidInputInCastToDatetimeError( + value: Any, + from: DataType, + to: DataType, + errorContext: String): Throwable = { + new SparkDateTimeException( + errorClass = "CAST_INVALID_INPUT", + messageParameters = Array( + toSQLValue(value, from), + toSQLType(from), + toSQLType(to), + toSQLConf(SQLConf.ANSI_ENABLED.key), + errorContext)) } - def invalidInputSyntaxForNumericError( + def invalidInputSyntaxForBooleanError( + s: UTF8String, + errorContext: String): SparkRuntimeException = { + new SparkRuntimeException( + errorClass = "CAST_INVALID_INPUT", + messageParameters = Array( + toSQLValue(s, StringType), + toSQLType(StringType), + toSQLType(BooleanType), + toSQLConf(SQLConf.ANSI_ENABLED.key), + errorContext)) + } + + def invalidInputInCastToNumberError( to: DataType, s: UTF8String, - errorContext: String): NumberFormatException = { - new SparkNumberFormatException(errorClass = "INVALID_SYNTAX_FOR_CAST", - messageParameters = Array(toSQLType(to), toSQLValue(s, StringType), - SQLConf.ANSI_ENABLED.key, errorContext)) + errorContext: String): SparkNumberFormatException = { + new SparkNumberFormatException( + errorClass = "CAST_INVALID_INPUT", + messageParameters = Array( + toSQLValue(s, StringType), + toSQLType(StringType), + toSQLType(to), + toSQLConf(SQLConf.ANSI_ENABLED.key), + errorContext)) } def cannotCastFromNullTypeError(to: DataType): Throwable = { @@ -1190,14 +1220,6 @@ object QueryExecutionErrors extends QueryErrorsBase { "SQLUserDefinedType nor registered with UDTRegistration.}") } - def invalidInputSyntaxForBooleanError( - s: UTF8String, - errorContext: String): UnsupportedOperationException = { - new UnsupportedOperationException(s"invalid input syntax for type boolean: $s. " + - s"To return NULL instead, use 'try_cast'. If necessary set ${SQLConf.ANSI_ENABLED.key} " + - "to false to bypass this error." + errorContext) - } - def unsupportedOperandTypeForSizeFunctionError(dataType: DataType): Throwable = { new UnsupportedOperationException( s"The size function doesn't support the operand type ${dataType.getClass.getCanonicalName}") diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala index 1eeaa46736e9d..7a43d01eb2f19 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala @@ -636,7 +636,7 @@ object Decimal { } } catch { case _: NumberFormatException => - throw QueryExecutionErrors.invalidInputSyntaxForNumericError(to, str, errorContext) + throw QueryExecutionErrors.invalidInputInCastToNumberError(to, str, errorContext) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala index bf563a84bed07..c851f37d73c08 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala @@ -20,13 +20,13 @@ package org.apache.spark.sql.catalyst.expressions import java.sql.Timestamp import java.time.DateTimeException -import org.apache.spark.SparkArithmeticException +import org.apache.spark.{SparkArithmeticException, SparkRuntimeException} import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_SECOND import org.apache.spark.sql.catalyst.util.DateTimeTestUtils import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, UTC} -import org.apache.spark.sql.errors.QueryExecutionErrors.toSQLValue +import org.apache.spark.sql.errors.QueryErrorsBase import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -39,7 +39,7 @@ import org.apache.spark.unsafe.types.UTF8String * Note: for new test cases that work for [[Cast]], [[AnsiCast]] and [[TryCast]], please add them * in `CastSuiteBase` instead of this file to ensure the test coverage. */ -abstract class AnsiCastSuiteBase extends CastSuiteBase { +abstract class AnsiCastSuiteBase extends CastSuiteBase with QueryErrorsBase { private def testIntMaxAndMin(dt: DataType): Unit = { assert(Seq(IntegerType, ShortType, ByteType).contains(dt)) @@ -172,33 +172,44 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { assert(cast(booleanLiteral, DateType).checkInputDataTypes().isFailure) } + private def castErrMsg(v: Any, to: DataType, from: DataType = StringType): String = { + s"The value ${toSQLValue(v, from)} of the type ${toSQLType(from)} " + + s"cannot be cast to ${toSQLType(to)} because it is malformed." + } + + private def castErrMsg(l: Literal, to: DataType, from: DataType): String = { + s"The value ${toSQLValue(l.eval(), from)} of the type ${toSQLType(from)} " + + s"cannot be cast to ${toSQLType(to)} because it is malformed." + } + + private def castErrMsg(l: Literal, to: DataType): String = { + castErrMsg(l, to, l.dataType) + } + test("cast from invalid string to numeric should throw NumberFormatException") { + def check(value: String, dataType: DataType): Unit = { + checkExceptionInExpression[NumberFormatException](cast(value, dataType), + castErrMsg(value, dataType)) + } // cast to IntegerType Seq(IntegerType, ShortType, ByteType, LongType).foreach { dataType => - checkExceptionInExpression[NumberFormatException](cast("string", dataType), - s"""Invalid input syntax for type "${dataType.sql}": 'string'""") - checkExceptionInExpression[NumberFormatException](cast("123-string", dataType), - s"""Invalid input syntax for type "${dataType.sql}": '123-string'""") - checkExceptionInExpression[NumberFormatException](cast("2020-07-19", dataType), - s"""Invalid input syntax for type "${dataType.sql}": '2020-07-19'""") - checkExceptionInExpression[NumberFormatException](cast("1.23", dataType), - s"""Invalid input syntax for type "${dataType.sql}": '1.23'""") + check("string", dataType) + check("123-string", dataType) + check("2020-07-19", dataType) + check("1.23", dataType) } Seq(DoubleType, FloatType, DecimalType.USER_DEFAULT).foreach { dataType => - checkExceptionInExpression[NumberFormatException](cast("string", dataType), - s"""Invalid input syntax for type "${dataType.sql}": 'string'""") - checkExceptionInExpression[NumberFormatException](cast("123.000.00", dataType), - s"""Invalid input syntax for type "${dataType.sql}": '123.000.00'""") - checkExceptionInExpression[NumberFormatException](cast("abc.com", dataType), - s"""Invalid input syntax for type "${dataType.sql}": 'abc.com'""") + check("string", dataType) + check("123.000.00", dataType) + check("abc.com", dataType) } } protected def checkCastToNumericError(l: Literal, to: DataType, expectedDataTypeInErrorMsg: DataType, tryCastResult: Any): Unit = { checkExceptionInExpression[NumberFormatException]( - cast(l, to), s"""Invalid input syntax for type "${expectedDataTypeInErrorMsg.sql}": 'true'""") + cast(l, to), castErrMsg("true", expectedDataTypeInErrorMsg)) } test("cast from invalid string array to numeric array should throw NumberFormatException") { @@ -245,12 +256,12 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { checkExceptionInExpression[NumberFormatException]( cast("abcd", DecimalType(38, 1)), - s"""Invalid input syntax for type "${DecimalType(38, 1).sql}": 'abcd'""") + castErrMsg("abcd", DecimalType(38, 1))) } protected def checkCastToBooleanError(l: Literal, to: DataType, tryCastResult: Any): Unit = { - checkExceptionInExpression[UnsupportedOperationException]( - cast(l, to), s"invalid input syntax for type boolean") + checkExceptionInExpression[SparkRuntimeException]( + cast(l, to), """cannot be cast to "BOOLEAN"""") } test("ANSI mode: cast string to boolean with parse error") { @@ -258,13 +269,12 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { checkCastToBooleanError(Literal(""), BooleanType, null) } - protected def checkCastToTimestampError(l: Literal, to: DataType): Unit = { - checkExceptionInExpression[DateTimeException]( - cast(l, to), - s"""Invalid input syntax for type "TIMESTAMP": ${toSQLValue(l.eval(), l.dataType)}""") - } - test("cast from timestamp II") { + def checkCastToTimestampError(l: Literal, to: DataType): Unit = { + checkExceptionInExpression[DateTimeException]( + cast(l, to), + """cannot be cast to "TIMESTAMP" because it is malformed""") + } checkCastToTimestampError(Literal(Double.NaN), TimestampType) checkCastToTimestampError(Literal(1.0 / 0.0), TimestampType) checkCastToTimestampError(Literal(Float.NaN), TimestampType) @@ -276,13 +286,19 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { } } + private def castOverflowErrMsg(v: Any, from: DataType, to: DataType): String = { + s"The value ${toSQLValue(v, from)} of the type ${toSQLType(from)} cannot be " + + s"cast to ${toSQLType(to)} due to an overflow." + } + test("cast a timestamp before the epoch 1970-01-01 00:00:00Z II") { withDefaultTimeZone(UTC) { val negativeTs = Timestamp.valueOf("1900-05-05 18:34:56.1") assert(negativeTs.getTime < 0) Seq(ByteType, ShortType, IntegerType).foreach { dt => checkExceptionInExpression[SparkArithmeticException]( - cast(negativeTs, dt), s"""to "${dt.sql}" causes overflow""") + cast(negativeTs, dt), + castOverflowErrMsg(negativeTs, TimestampType, dt)) } } } @@ -293,7 +309,8 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { assert(negativeTs.getTime < 0) Seq(ByteType, ShortType, IntegerType).foreach { dt => checkExceptionInExpression[SparkArithmeticException]( - cast(negativeTs, dt), s"""to "${dt.sql}" causes overflow""") + cast(negativeTs, dt), + castOverflowErrMsg(negativeTs, TimestampType, dt)) } val expectedSecs = Math.floorDiv(negativeTs.getTime, MILLIS_PER_SECOND) checkEvaluation(cast(negativeTs, LongType), expectedSecs) @@ -324,8 +341,8 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { val ret = cast(array_notNull, ArrayType(BooleanType, containsNull = false)) assert(ret.resolved == !isTryCast) if (!isTryCast) { - checkExceptionInExpression[UnsupportedOperationException]( - ret, "invalid input syntax for type boolean") + checkExceptionInExpression[SparkRuntimeException]( + ret, """cannot be cast to "BOOLEAN"""") } } } @@ -372,7 +389,8 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { assert(ret.resolved == !isTryCast) if (!isTryCast) { checkExceptionInExpression[NumberFormatException]( - ret, s"""Invalid input syntax for type "${IntegerType.sql}"""") + ret, + castErrMsg("a", IntegerType)) } } @@ -380,8 +398,9 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { val ret = cast(map_notNull, MapType(StringType, BooleanType, valueContainsNull = false)) assert(ret.resolved == !isTryCast) if (!isTryCast) { - checkExceptionInExpression[UnsupportedOperationException]( - ret, "invalid input syntax for type boolean") + checkExceptionInExpression[SparkRuntimeException]( + ret, + castErrMsg("123", BooleanType)) } } @@ -390,7 +409,8 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { assert(ret.resolved == !isTryCast) if (!isTryCast) { checkExceptionInExpression[NumberFormatException]( - ret, s"""Invalid input syntax for type "${IntegerType.sql}"""") + ret, + castErrMsg("a", IntegerType)) } } } @@ -469,8 +489,9 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { StructField("c", BooleanType, nullable = false)))) assert(ret.resolved == !isTryCast) if (!isTryCast) { - checkExceptionInExpression[UnsupportedOperationException]( - ret, "invalid input syntax for type boolean") + checkExceptionInExpression[SparkRuntimeException]( + ret, + castErrMsg("123", BooleanType)) } } } @@ -515,7 +536,8 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { assert(ret.resolved === !isTryCast) if (!isTryCast) { checkExceptionInExpression[NumberFormatException]( - ret, s"""Invalid input syntax for type "${IntegerType.sql}"""") + ret, + castErrMsg("true", IntegerType)) } } @@ -524,7 +546,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { def checkCastWithParseError(str: String): Unit = { checkExceptionInExpression[DateTimeException]( cast(Literal(str), TimestampType, Option(zid.getId)), - s"""Invalid input syntax for type "TIMESTAMP": '$str'""") + castErrMsg(str, TimestampType)) } checkCastWithParseError("123") @@ -545,7 +567,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { def checkCastWithParseError(str: String): Unit = { checkExceptionInExpression[DateTimeException]( cast(Literal(str), DateType, Option(zid.getId)), - s"""Invalid input syntax for type "DATE": '$str'""") + castErrMsg(str, DateType)) } checkCastWithParseError("2015-13-18") @@ -573,7 +595,7 @@ abstract class AnsiCastSuiteBase extends CastSuiteBase { "2021-06-17 00:00:00ABC").foreach { invalidInput => checkExceptionInExpression[DateTimeException]( cast(invalidInput, TimestampNTZType), - s"""Invalid input syntax for type "TIMESTAMP_NTZ": '$invalidInput'""") + castErrMsg(invalidInput, TimestampNTZType)) } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index f78a1d30d7279..630c45adba1b3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -592,15 +592,15 @@ class CastSuite extends CastSuiteBase { val e1 = intercept[ArithmeticException] { Cast(Literal(Byte.MaxValue + 1), ByteType).eval() }.getMessage - assert(e1.contains("Casting 128 to \"TINYINT\" causes overflow")) + assert(e1.contains("The value 128 of the type \"INT\" cannot be cast to \"TINYINT\"")) val e2 = intercept[ArithmeticException] { Cast(Literal(Short.MaxValue + 1), ShortType).eval() }.getMessage - assert(e2.contains("Casting 32768 to \"SMALLINT\" causes overflow")) + assert(e2.contains("The value 32768 of the type \"INT\" cannot be cast to \"SMALLINT\"")) val e3 = intercept[ArithmeticException] { Cast(Literal(Int.MaxValue + 1L), IntegerType).eval() }.getMessage - assert(e3.contains("Casting 2147483648L to \"INT\" causes overflow")) + assert(e3.contains("The value 2147483648L of the type \"BIGINT\" cannot be cast to \"INT\"")) } } @@ -619,6 +619,10 @@ class CastSuite extends CastSuiteBase { checkEvaluation(cast(Literal("2015-03-18T"), TimestampType), null) } + private def castOverflowErrMsg(targetType: DataType): String = { + s"""cannot be cast to "${targetType.sql}" due to an overflow.""" + } + test("SPARK-36924: Cast DayTimeIntervalType to IntegralType") { DataTypeTestUtils.dayTimeIntervalTypes.foreach { dt => val v1 = Literal.create(Duration.ZERO, dt) @@ -642,15 +646,15 @@ class CastSuite extends CastSuiteBase { checkEvaluation(cast(v2, LongType), 25L) case MINUTE => checkExceptionInExpression[ArithmeticException](cast(v2, ByteType), - s"""Casting $v2 to "TINYINT" causes overflow""") + castOverflowErrMsg(ByteType)) checkEvaluation(cast(v2, ShortType), (MINUTES_PER_HOUR * 25 + 1).toShort) checkEvaluation(cast(v2, IntegerType), (MINUTES_PER_HOUR * 25 + 1).toInt) checkEvaluation(cast(v2, LongType), MINUTES_PER_HOUR * 25 + 1) case SECOND => checkExceptionInExpression[ArithmeticException](cast(v2, ByteType), - s"""Casting $v2 to "TINYINT" causes overflow""") + castOverflowErrMsg(ByteType)) checkExceptionInExpression[ArithmeticException](cast(v2, ShortType), - s"""Casting $v2 to "SMALLINT" causes overflow""") + castOverflowErrMsg(ShortType)) checkEvaluation(cast(v2, IntegerType), num.toInt) checkEvaluation(cast(v2, LongType), num) } @@ -659,34 +663,34 @@ class CastSuite extends CastSuiteBase { dt.endField match { case DAY => checkExceptionInExpression[ArithmeticException](cast(v3, ByteType), - s"""Casting $v3 to "TINYINT" causes overflow""") + castOverflowErrMsg(ByteType)) checkExceptionInExpression[ArithmeticException](cast(v3, ShortType), - s"""Casting $v3 to "SMALLINT" causes overflow""") + castOverflowErrMsg(ShortType)) checkEvaluation(cast(v3, IntegerType), (Long.MaxValue / MICROS_PER_DAY).toInt) checkEvaluation(cast(v3, LongType), Long.MaxValue / MICROS_PER_DAY) case HOUR => checkExceptionInExpression[ArithmeticException](cast(v3, ByteType), - s"""Casting $v3 to "TINYINT" causes overflow""") + castOverflowErrMsg(ByteType)) checkExceptionInExpression[ArithmeticException](cast(v3, ShortType), - s"""Casting $v3 to "SMALLINT" causes overflow""") + castOverflowErrMsg(ShortType)) checkExceptionInExpression[ArithmeticException](cast(v3, IntegerType), - s"""Casting $v3 to "INT" causes overflow""") + castOverflowErrMsg(IntegerType)) checkEvaluation(cast(v3, LongType), Long.MaxValue / MICROS_PER_HOUR) case MINUTE => checkExceptionInExpression[ArithmeticException](cast(v3, ByteType), - s"""Casting $v3 to "TINYINT" causes overflow""") + castOverflowErrMsg(ByteType)) checkExceptionInExpression[ArithmeticException](cast(v3, ShortType), - s"""Casting $v3 to "SMALLINT" causes overflow""") + castOverflowErrMsg(ShortType)) checkExceptionInExpression[ArithmeticException](cast(v3, IntegerType), - s"""Casting $v3 to "INT" causes overflow""") + castOverflowErrMsg(IntegerType)) checkEvaluation(cast(v3, LongType), Long.MaxValue / MICROS_PER_MINUTE) case SECOND => checkExceptionInExpression[ArithmeticException](cast(v3, ByteType), - s"""Casting $v3 to "TINYINT" causes overflow""") + castOverflowErrMsg(ByteType)) checkExceptionInExpression[ArithmeticException](cast(v3, ShortType), - s"""Casting $v3 to "SMALLINT" causes overflow""") + castOverflowErrMsg(ShortType)) checkExceptionInExpression[ArithmeticException](cast(v3, IntegerType), - s"""Casting $v3 to "INT" causes overflow""") + castOverflowErrMsg(IntegerType)) checkEvaluation(cast(v3, LongType), Long.MaxValue / MICROS_PER_SECOND) } @@ -694,34 +698,34 @@ class CastSuite extends CastSuiteBase { dt.endField match { case DAY => checkExceptionInExpression[ArithmeticException](cast(v4, ByteType), - s"""Casting $v4 to "TINYINT" causes overflow""") + castOverflowErrMsg(ByteType)) checkExceptionInExpression[ArithmeticException](cast(v4, ShortType), - s"""Casting $v4 to "SMALLINT" causes overflow""") + castOverflowErrMsg(ShortType)) checkEvaluation(cast(v4, IntegerType), (Long.MinValue / MICROS_PER_DAY).toInt) checkEvaluation(cast(v4, LongType), Long.MinValue / MICROS_PER_DAY) case HOUR => checkExceptionInExpression[ArithmeticException](cast(v4, ByteType), - s"""Casting $v4 to "TINYINT" causes overflow""") + castOverflowErrMsg(ByteType)) checkExceptionInExpression[ArithmeticException](cast(v4, ShortType), - s"""Casting $v4 to "SMALLINT" causes overflow""") + castOverflowErrMsg(ShortType)) checkExceptionInExpression[ArithmeticException](cast(v4, IntegerType), - s"""Casting $v4 to "INT" causes overflow""") + castOverflowErrMsg(IntegerType)) checkEvaluation(cast(v4, LongType), Long.MinValue / MICROS_PER_HOUR) case MINUTE => checkExceptionInExpression[ArithmeticException](cast(v4, ByteType), - s"""Casting $v4 to "TINYINT" causes overflow""") + castOverflowErrMsg(ByteType)) checkExceptionInExpression[ArithmeticException](cast(v4, ShortType), - s"""Casting $v4 to "SMALLINT" causes overflow""") + castOverflowErrMsg(ShortType)) checkExceptionInExpression[ArithmeticException](cast(v4, IntegerType), - s"""Casting $v4 to "INT" causes overflow""") + castOverflowErrMsg(IntegerType)) checkEvaluation(cast(v4, LongType), Long.MinValue / MICROS_PER_MINUTE) case SECOND => checkExceptionInExpression[ArithmeticException](cast(v4, ByteType), - s"""Casting $v4 to "TINYINT" causes overflow""") + castOverflowErrMsg(ByteType)) checkExceptionInExpression[ArithmeticException](cast(v4, ShortType), - s"""Casting $v4 to "SMALLINT" causes overflow""") + castOverflowErrMsg(ShortType)) checkExceptionInExpression[ArithmeticException](cast(v4, IntegerType), - s"""Casting $v4 to "INT" causes overflow""") + castOverflowErrMsg(IntegerType)) checkEvaluation(cast(v4, LongType), Long.MinValue / MICROS_PER_SECOND) } } @@ -777,7 +781,7 @@ class CastSuite extends CastSuiteBase { ).foreach { case (v, toType) => checkExceptionInExpression[ArithmeticException](cast(v, toType), - s"""Casting $v to "${toType.sql}" causes overflow""") + castOverflowErrMsg(toType)) } Seq( @@ -792,7 +796,7 @@ class CastSuite extends CastSuiteBase { ).foreach { case (v, toType) => checkExceptionInExpression[ArithmeticException](cast(v, toType), - s"""Casting ${v}L to "${toType.sql}" causes overflow""") + castOverflowErrMsg(toType)) } } @@ -829,7 +833,7 @@ class CastSuite extends CastSuiteBase { case (v, dt, toType) => val value = Literal.create(v, dt) checkExceptionInExpression[ArithmeticException](cast(value, toType), - s"""Casting $value to "${toType.sql}" causes overflow""") + castOverflowErrMsg(toType)) } Seq( @@ -887,7 +891,7 @@ class CastSuite extends CastSuiteBase { ).foreach { case (v, toType) => checkExceptionInExpression[ArithmeticException](cast(v, toType), - s"""Casting $v to "${toType.sql}" causes overflow""") + castOverflowErrMsg(toType)) } Seq( @@ -898,7 +902,7 @@ class CastSuite extends CastSuiteBase { ).foreach { case (v, toType) => checkExceptionInExpression[ArithmeticException](cast(v, toType), - s"""Casting ${v}L to "${toType.sql}" causes overflow""") + castOverflowErrMsg(toType)) } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateFormatterSuite.scala index cae89b64e06e0..4b7032854342c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateFormatterSuite.scala @@ -208,6 +208,6 @@ class DateFormatterSuite extends DatetimeFormatterSuite { val errMsg = intercept[DateTimeException] { formatter.parse("x123") }.getMessage - assert(errMsg.contains("""Invalid input syntax for type "DATE": 'x123'""")) + assert(errMsg.contains("""The value 'x123' of the type "STRING" cannot be cast to "DATE"""")) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala index e3d7c972baf2c..56e73ead53de2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala @@ -453,7 +453,8 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite { val errMsg = intercept[DateTimeException] { formatter.parse("x123") }.getMessage - assert(errMsg.contains("""Invalid input syntax for type "TIMESTAMP": 'x123'""")) + assert(errMsg.contains( + """The value 'x123' of the type "STRING" cannot be cast to "TIMESTAMP"""")) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala index 0e9222a0196c4..6f70dc51b950d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala @@ -284,8 +284,8 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper assert(Decimal.fromString(UTF8String.fromString("str")) === null) val e = intercept[NumberFormatException](Decimal.fromStringANSI(UTF8String.fromString("str"))) - assert(e.getMessage.contains("Invalid input syntax for type " + - s""""${DecimalType.USER_DEFAULT.sql}": 'str'""")) + assert(e.getMessage.contains( + """The value 'str' of the type "STRING" cannot be cast to "DECIMAL(10,0)"""")) } test("SPARK-35841: Casting string to decimal type doesn't work " + diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out index 1f228d0cd229f..1ca6c64b4e0d1 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out @@ -8,7 +8,7 @@ SELECT CAST('1.23' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "INT": '1.23'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '1.23' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('1.23' AS int) ^^^^^^^^^^^^^^^^^^^ @@ -20,7 +20,7 @@ SELECT CAST('1.23' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "BIGINT": '1.23'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '1.23' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('1.23' AS long) ^^^^^^^^^^^^^^^^^^^^ @@ -32,7 +32,7 @@ SELECT CAST('-4.56' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "INT": '-4.56'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '-4.56' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('-4.56' AS int) ^^^^^^^^^^^^^^^^^^^^ @@ -44,7 +44,7 @@ SELECT CAST('-4.56' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "BIGINT": '-4.56'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '-4.56' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('-4.56' AS long) ^^^^^^^^^^^^^^^^^^^^^ @@ -56,7 +56,7 @@ SELECT CAST('abc' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "INT": 'abc'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'abc' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('abc' AS int) ^^^^^^^^^^^^^^^^^^ @@ -68,7 +68,7 @@ SELECT CAST('abc' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "BIGINT": 'abc'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'abc' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('abc' AS long) ^^^^^^^^^^^^^^^^^^^ @@ -80,7 +80,7 @@ SELECT CAST('abc' AS float) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "FLOAT": 'abc'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'abc' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('abc' AS float) ^^^^^^^^^^^^^^^^^^^^ @@ -92,7 +92,7 @@ SELECT CAST('abc' AS double) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "DOUBLE": 'abc'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'abc' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('abc' AS double) ^^^^^^^^^^^^^^^^^^^^^ @@ -104,7 +104,7 @@ SELECT CAST('1234567890123' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "INT": '1234567890123'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '1234567890123' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('1234567890123' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -116,7 +116,7 @@ SELECT CAST('12345678901234567890123' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "BIGINT": '12345678901234567890123'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '12345678901234567890123' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('12345678901234567890123' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -128,7 +128,7 @@ SELECT CAST('' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "INT": ''. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('' AS int) ^^^^^^^^^^^^^^^ @@ -140,7 +140,7 @@ SELECT CAST('' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "BIGINT": ''. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('' AS long) ^^^^^^^^^^^^^^^^ @@ -152,7 +152,7 @@ SELECT CAST('' AS float) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "FLOAT": ''. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('' AS float) ^^^^^^^^^^^^^^^^^ @@ -164,7 +164,7 @@ SELECT CAST('' AS double) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "DOUBLE": ''. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('' AS double) ^^^^^^^^^^^^^^^^^^ @@ -192,7 +192,7 @@ SELECT CAST('123.a' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "INT": '123.a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '123.a' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('123.a' AS int) ^^^^^^^^^^^^^^^^^^^^ @@ -204,7 +204,7 @@ SELECT CAST('123.a' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "BIGINT": '123.a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '123.a' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('123.a' AS long) ^^^^^^^^^^^^^^^^^^^^^ @@ -216,7 +216,7 @@ SELECT CAST('123.a' AS float) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "FLOAT": '123.a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '123.a' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('123.a' AS float) ^^^^^^^^^^^^^^^^^^^^^^ @@ -228,7 +228,7 @@ SELECT CAST('123.a' AS double) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "DOUBLE": '123.a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '123.a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('123.a' AS double) ^^^^^^^^^^^^^^^^^^^^^^^ @@ -248,7 +248,7 @@ SELECT CAST('-2147483649' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "INT": '-2147483649'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '-2147483649' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('-2147483649' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -268,7 +268,7 @@ SELECT CAST('2147483648' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "INT": '2147483648'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '2147483648' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('2147483648' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -288,7 +288,7 @@ SELECT CAST('-9223372036854775809' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "BIGINT": '-9223372036854775809'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '-9223372036854775809' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('-9223372036854775809' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -308,7 +308,7 @@ SELECT CAST('9223372036854775808' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "BIGINT": '9223372036854775808'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '9223372036854775808' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT CAST('9223372036854775808' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -567,7 +567,7 @@ select cast('1中文' as tinyint) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "TINYINT": '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '1中文' of the type "STRING" cannot be cast to "TINYINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select cast('1中文' as tinyint) ^^^^^^^^^^^^^^^^^^^^^^ @@ -579,7 +579,7 @@ select cast('1中文' as smallint) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "SMALLINT": '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '1中文' of the type "STRING" cannot be cast to "SMALLINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select cast('1中文' as smallint) ^^^^^^^^^^^^^^^^^^^^^^^ @@ -591,7 +591,7 @@ select cast('1中文' as INT) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "INT": '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '1中文' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select cast('1中文' as INT) ^^^^^^^^^^^^^^^^^^ @@ -603,7 +603,7 @@ select cast('中文1' as bigint) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "BIGINT": '中文1'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '中文1' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select cast('中文1' as bigint) ^^^^^^^^^^^^^^^^^^^^^ @@ -615,7 +615,7 @@ select cast('1中文' as bigint) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "BIGINT": '1中文'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '1中文' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select cast('1中文' as bigint) ^^^^^^^^^^^^^^^^^^^^^ @@ -644,9 +644,9 @@ select cast('\t\n xyz \t\r' as boolean) -- !query schema struct<> -- !query output -java.lang.UnsupportedOperationException -invalid input syntax for type boolean: - xyz . To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkRuntimeException +The value ' + xyz ' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select cast('\t\n xyz \t\r' as boolean) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -678,7 +678,7 @@ select cast('xyz' as decimal(4, 2)) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "DECIMAL(4,2)": 'xyz'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'xyz' of the type "STRING" cannot be cast to "DECIMAL(4,2)" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select cast('xyz' as decimal(4, 2)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -697,8 +697,8 @@ select cast('a' as date) -- !query schema struct<> -- !query output -java.time.DateTimeException -Invalid input syntax for type "DATE": 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkDateTimeException +The value 'a' of the type "STRING" cannot be cast to "DATE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select cast('a' as date) ^^^^^^^^^^^^^^^^^ @@ -717,8 +717,8 @@ select cast('a' as timestamp) -- !query schema struct<> -- !query output -java.time.DateTimeException -Invalid input syntax for type "TIMESTAMP": 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkDateTimeException +The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select cast('a' as timestamp) ^^^^^^^^^^^^^^^^^^^^^^ @@ -737,8 +737,8 @@ select cast('a' as timestamp_ntz) -- !query schema struct<> -- !query output -java.time.DateTimeException -Invalid input syntax for type "TIMESTAMP_NTZ": 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkDateTimeException +The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP_NTZ" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select cast('a' as timestamp_ntz) ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -749,8 +749,8 @@ select cast(cast('inf' as double) as timestamp) -- !query schema struct<> -- !query output -java.time.DateTimeException -Invalid input syntax for type "TIMESTAMP": Infinity. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkDateTimeException +The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select cast(cast('inf' as double) as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -761,8 +761,8 @@ select cast(cast('inf' as float) as timestamp) -- !query schema struct<> -- !query output -java.time.DateTimeException -Invalid input syntax for type "TIMESTAMP": Infinity. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkDateTimeException +The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select cast(cast('inf' as float) as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out index d13585c7fd1c3..844ecacc9aa06 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out @@ -231,8 +231,8 @@ select next_day("xx", "Mon") -- !query schema struct<> -- !query output -java.time.DateTimeException -Invalid input syntax for type "DATE": 'xx'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkDateTimeException +The value 'xx' of the type "STRING" cannot be cast to "DATE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select next_day("xx", "Mon") ^^^^^^^^^^^^^^^^^^^^^ @@ -327,7 +327,7 @@ select date_add('2011-11-11', '1.2') struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "INT": '1.2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select date_add('2011-11-11', '1.2') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -438,7 +438,7 @@ select date_sub(date'2011-11-11', '1.2') struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "INT": '1.2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select date_sub(date'2011-11-11', '1.2') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out index d19b94df3fe2e..1632ea0a239f2 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out @@ -241,8 +241,8 @@ select cast("Unparseable" as timestamp) -- !query schema struct<> -- !query output -java.time.DateTimeException -Invalid input syntax for type "TIMESTAMP": 'Unparseable'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkDateTimeException +The value 'Unparseable' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select cast("Unparseable" as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -253,8 +253,8 @@ select cast("Unparseable" as date) -- !query schema struct<> -- !query output -java.time.DateTimeException -Invalid input syntax for type "DATE": 'Unparseable'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkDateTimeException +The value 'Unparseable' of the type "STRING" cannot be cast to "DATE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select cast("Unparseable" as date) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index ee6f591f29e59..ddb829f6b5f56 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -122,7 +122,7 @@ select interval 2 second * 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "DOUBLE": 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select interval 2 second * 'a' ^^^^^^^^^^^^^^^^^^^^^^^ @@ -134,7 +134,7 @@ select interval 2 second / 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "DOUBLE": 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select interval 2 second / 'a' ^^^^^^^^^^^^^^^^^^^^^^^ @@ -146,7 +146,7 @@ select interval 2 year * 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "DOUBLE": 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select interval 2 year * 'a' ^^^^^^^^^^^^^^^^^^^^^ @@ -158,7 +158,7 @@ select interval 2 year / 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "DOUBLE": 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select interval 2 year / 'a' ^^^^^^^^^^^^^^^^^^^^^ @@ -186,7 +186,7 @@ select 'a' * interval 2 second struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "DOUBLE": 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select 'a' * interval 2 second ^^^^^^^^^^^^^^^^^^^^^^^ @@ -198,7 +198,7 @@ select 'a' * interval 2 year struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "DOUBLE": 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select 'a' * interval 2 year ^^^^^^^^^^^^^^^^^^^^^ @@ -1515,8 +1515,8 @@ select '4 11:11' - interval '4 22:12' day to minute -- !query schema struct<> -- !query output -java.time.DateTimeException -Invalid input syntax for type "TIMESTAMP": '4 11:11'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkDateTimeException +The value '4 11:11' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select '4 11:11' - interval '4 22:12' day to minute ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1527,8 +1527,8 @@ select '4 12:12:12' + interval '4 22:12' day to minute -- !query schema struct<> -- !query output -java.time.DateTimeException -Invalid input syntax for type "TIMESTAMP": '4 12:12:12'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkDateTimeException +The value '4 12:12:12' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select '4 12:12:12' + interval '4 22:12' day to minute ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1565,8 +1565,8 @@ select str - interval '4 22:12' day to minute from interval_view -- !query schema struct<> -- !query output -java.time.DateTimeException -Invalid input syntax for type "TIMESTAMP": '1'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkDateTimeException +The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select str - interval '4 22:12' day to minute from interval_view ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1577,8 +1577,8 @@ select str + interval '4 22:12' day to minute from interval_view -- !query schema struct<> -- !query output -java.time.DateTimeException -Invalid input syntax for type "TIMESTAMP": '1'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkDateTimeException +The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select str + interval '4 22:12' day to minute from interval_view ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index 814655ba68a51..409ef51edd570 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -82,7 +82,7 @@ select left("abcd", -2), left("abcd", 0), left("abcd", 'a') struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "INT": 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 42) == ...t("abcd", -2), left("abcd", 0), left("abcd", 'a') ^^^^^^^^^^^^^^^^^ @@ -110,7 +110,7 @@ select right("abcd", -2), right("abcd", 0), right("abcd", 'a') struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "INT": 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 44) == ...("abcd", -2), right("abcd", 0), right("abcd", 'a') ^^^^^^^^^^^^^^^^^^ @@ -419,7 +419,7 @@ SELECT lpad('hi', 'invalid_length') struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "INT": 'invalid_length'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT lpad('hi', 'invalid_length') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -431,7 +431,7 @@ SELECT rpad('hi', 'invalid_length') struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "INT": 'invalid_length'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT rpad('hi', 'invalid_length') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out index 166bea4a722a7..45bbaa955b2b3 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out @@ -55,8 +55,8 @@ SELECT boolean('test') AS error -- !query schema struct<> -- !query output -java.lang.UnsupportedOperationException -invalid input syntax for type boolean: test. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkRuntimeException +The value 'test' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT boolean('test') AS error ^^^^^^^^^^^^^^^ @@ -75,8 +75,8 @@ SELECT boolean('foo') AS error -- !query schema struct<> -- !query output -java.lang.UnsupportedOperationException -invalid input syntax for type boolean: foo. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkRuntimeException +The value 'foo' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT boolean('foo') AS error ^^^^^^^^^^^^^^ @@ -103,8 +103,8 @@ SELECT boolean('yeah') AS error -- !query schema struct<> -- !query output -java.lang.UnsupportedOperationException -invalid input syntax for type boolean: yeah. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkRuntimeException +The value 'yeah' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT boolean('yeah') AS error ^^^^^^^^^^^^^^^ @@ -131,8 +131,8 @@ SELECT boolean('nay') AS error -- !query schema struct<> -- !query output -java.lang.UnsupportedOperationException -invalid input syntax for type boolean: nay. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkRuntimeException +The value 'nay' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT boolean('nay') AS error ^^^^^^^^^^^^^^ @@ -143,8 +143,8 @@ SELECT boolean('on') AS true -- !query schema struct<> -- !query output -java.lang.UnsupportedOperationException -invalid input syntax for type boolean: on. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkRuntimeException +The value 'on' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT boolean('on') AS true ^^^^^^^^^^^^^ @@ -155,8 +155,8 @@ SELECT boolean('off') AS `false` -- !query schema struct<> -- !query output -java.lang.UnsupportedOperationException -invalid input syntax for type boolean: off. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkRuntimeException +The value 'off' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT boolean('off') AS `false` ^^^^^^^^^^^^^^ @@ -167,8 +167,8 @@ SELECT boolean('of') AS `false` -- !query schema struct<> -- !query output -java.lang.UnsupportedOperationException -invalid input syntax for type boolean: of. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkRuntimeException +The value 'of' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT boolean('of') AS `false` ^^^^^^^^^^^^^ @@ -179,8 +179,8 @@ SELECT boolean('o') AS error -- !query schema struct<> -- !query output -java.lang.UnsupportedOperationException -invalid input syntax for type boolean: o. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkRuntimeException +The value 'o' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT boolean('o') AS error ^^^^^^^^^^^^ @@ -191,8 +191,8 @@ SELECT boolean('on_') AS error -- !query schema struct<> -- !query output -java.lang.UnsupportedOperationException -invalid input syntax for type boolean: on_. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkRuntimeException +The value 'on_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT boolean('on_') AS error ^^^^^^^^^^^^^^ @@ -203,8 +203,8 @@ SELECT boolean('off_') AS error -- !query schema struct<> -- !query output -java.lang.UnsupportedOperationException -invalid input syntax for type boolean: off_. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkRuntimeException +The value 'off_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT boolean('off_') AS error ^^^^^^^^^^^^^^^ @@ -223,8 +223,8 @@ SELECT boolean('11') AS error -- !query schema struct<> -- !query output -java.lang.UnsupportedOperationException -invalid input syntax for type boolean: 11. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkRuntimeException +The value '11' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT boolean('11') AS error ^^^^^^^^^^^^^ @@ -243,8 +243,8 @@ SELECT boolean('000') AS error -- !query schema struct<> -- !query output -java.lang.UnsupportedOperationException -invalid input syntax for type boolean: 000. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkRuntimeException +The value '000' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT boolean('000') AS error ^^^^^^^^^^^^^^ @@ -255,8 +255,8 @@ SELECT boolean('') AS error -- !query schema struct<> -- !query output -java.lang.UnsupportedOperationException -invalid input syntax for type boolean: . To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkRuntimeException +The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT boolean('') AS error ^^^^^^^^^^^ @@ -364,8 +364,8 @@ SELECT boolean(string(' tru e ')) AS invalid -- !query schema struct<> -- !query output -java.lang.UnsupportedOperationException -invalid input syntax for type boolean: tru e . To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkRuntimeException +The value ' tru e ' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT boolean(string(' tru e ')) AS invalid ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -376,8 +376,8 @@ SELECT boolean(string('')) AS invalid -- !query schema struct<> -- !query output -java.lang.UnsupportedOperationException -invalid input syntax for type boolean: . To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkRuntimeException +The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT boolean(string('')) AS invalid ^^^^^^^^^^^^^^^^^^^ @@ -524,7 +524,7 @@ INSERT INTO BOOLTBL2 struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('XXX' AS BOOLEAN): invalid input syntax for type boolean: XXX. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +failed to evaluate expression CAST('XXX' AS BOOLEAN): The value 'XXX' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 2, position 11) == VALUES (boolean('XXX')) ^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out index 04c8837baac03..fa8736d089b35 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out @@ -96,7 +96,7 @@ SELECT float('N A N') struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "FLOAT": 'N A N'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'N A N' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT float('N A N') ^^^^^^^^^^^^^^ @@ -108,7 +108,7 @@ SELECT float('NaN x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "FLOAT": 'NaN x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'NaN x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT float('NaN x') ^^^^^^^^^^^^^^ @@ -120,7 +120,7 @@ SELECT float(' INFINITY x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "FLOAT": ' INFINITY x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value ' INFINITY x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT float(' INFINITY x') ^^^^^^^^^^^^^^^^^^^^^^^ @@ -156,7 +156,7 @@ SELECT float(decimal('nan')) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "DECIMAL(10,0)": 'nan'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 13) == SELECT float(decimal('nan')) ^^^^^^^^^^^^^^ @@ -340,7 +340,7 @@ SELECT int(float('2147483647')) struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting 2.14748365E9 to "INT" causes overflow. To return NULL instead, use 'try_cast'. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 2.14748365E9 of the type "FLOAT" cannot be cast to "INT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. -- !query @@ -357,7 +357,7 @@ SELECT int(float('-2147483900')) struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting -2.1474839E9 to "INT" causes overflow. To return NULL instead, use 'try_cast'. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value -2.1474839E9 of the type "FLOAT" cannot be cast to "INT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. -- !query @@ -390,7 +390,7 @@ SELECT bigint(float('-9223380000000000000')) struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting -9.22338E18 to "BIGINT" causes overflow. To return NULL instead, use 'try_cast'. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value -9.22338E18 of the type "FLOAT" cannot be cast to "BIGINT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out index 75943e4010bea..f90d7c663e0f3 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out @@ -128,7 +128,7 @@ SELECT double('N A N') struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "DOUBLE": 'N A N'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'N A N' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT double('N A N') ^^^^^^^^^^^^^^^ @@ -140,7 +140,7 @@ SELECT double('NaN x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "DOUBLE": 'NaN x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'NaN x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT double('NaN x') ^^^^^^^^^^^^^^^ @@ -152,7 +152,7 @@ SELECT double(' INFINITY x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "DOUBLE": ' INFINITY x'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value ' INFINITY x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == SELECT double(' INFINITY x') ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -188,7 +188,7 @@ SELECT double(decimal('nan')) struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "DECIMAL(10,0)": 'nan'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 14) == SELECT double(decimal('nan')) ^^^^^^^^^^^^^^ @@ -845,7 +845,7 @@ SELECT bigint(double('-9223372036854780000')) struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting -9.22337203685478E18D to "BIGINT" causes overflow. To return NULL instead, use 'try_cast'. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value -9.22337203685478E18D of the type "DOUBLE" cannot be cast to "BIGINT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out index 6c5673f9ce6df..bbdee119a51bf 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out @@ -619,7 +619,7 @@ SELECT CAST(q1 AS int) FROM int8_tbl WHERE q2 <> 456 struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting 4567890123456789L to "INT" causes overflow. To return NULL instead, use 'try_cast'. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 4567890123456789L of the type "BIGINT" cannot be cast to "INT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. -- !query @@ -636,7 +636,7 @@ SELECT CAST(q1 AS smallint) FROM int8_tbl WHERE q2 <> 456 struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting 4567890123456789L to "SMALLINT" causes overflow. To return NULL instead, use 'try_cast'. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 4567890123456789L of the type "BIGINT" cannot be cast to "SMALLINT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. -- !query @@ -673,7 +673,7 @@ SELECT CAST(double('922337203685477580700.0') AS bigint) struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting 9.223372036854776E20D to "BIGINT" causes overflow. To return NULL instead, use 'try_cast'. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 9.223372036854776E20D of the type "DOUBLE" cannot be cast to "BIGINT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. -- !query @@ -745,7 +745,7 @@ SELECT string(int(shiftleft(bigint(-1), 63))+1) struct<> -- !query output org.apache.spark.SparkArithmeticException -Casting -9223372036854775808L to "INT" causes overflow. To return NULL instead, use 'try_cast'. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value -9223372036854775808L of the type "BIGINT" cannot be cast to "INT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out index 03220ff68e907..39e2603fd265b 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out @@ -65,7 +65,7 @@ select string('four: ') || 2+2 struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "BIGINT": 'four: 2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select string('four: ') || 2+2 ^^^^^^^^^^^^^^^^^^^^^^^ @@ -77,7 +77,7 @@ select 'four: ' || 2+2 struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "BIGINT": 'four: 2'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select 'four: ' || 2+2 ^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out index ab3b9b5dc0991..2c335898d65bc 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out @@ -462,7 +462,7 @@ window w as (order by f_numeric range between struct<> -- !query output org.apache.spark.SparkNumberFormatException -Invalid input syntax for type "INT": 'NaN'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +The value 'NaN' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 3, position 12) == window w as (order by f_numeric range between ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out index 1d44bb22a828b..418c997361183 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out @@ -72,7 +72,7 @@ insert into datetimes values struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('11:00 BST' AS TIMESTAMP): Invalid input syntax for type "TIMESTAMP": '11:00 BST'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +failed to evaluate expression CAST('11:00 BST' AS TIMESTAMP): The value '11:00 BST' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 2, position 23) == (1, timestamp '11:00', cast ('11:00 BST' as timestamp), cast ('1 year' as timestamp), ... ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out index 2c4aee689ed3a..3e30ad2694100 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out @@ -501,7 +501,7 @@ FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b) struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('nan' AS INT): Invalid input syntax for type "INT": 'nan'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +failed to evaluate expression CAST('nan' AS INT): The value 'nan' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 3, position 28) == FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b) ^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out index 68060936c9449..e6f62b679c638 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out @@ -331,8 +331,8 @@ select to_timestamp(1) -- !query schema struct<> -- !query output -java.time.DateTimeException -Invalid input syntax for type "TIMESTAMP_NTZ": '1'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkDateTimeException +The value '1' of the type "STRING" cannot be cast to "TIMESTAMP_NTZ" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala index 0f7cd0c558118..f30465203d751 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala @@ -302,7 +302,8 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils { val errorMsg = intercept[NumberFormatException] { sql("insert into t partition(a='ansi') values('ansi')") }.getMessage - assert(errorMsg.contains("""Invalid input syntax for type "INT": 'ansi'""")) + assert(errorMsg.contains( + """The value 'ansi' of the type "STRING" cannot be cast to "INT"""")) } else { sql("insert into t partition(a='ansi') values('ansi')") checkAnswer(sql("select * from t"), Row("ansi", null) :: Nil) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index 9e0f63cd8ef59..b293307d4ea95 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -713,13 +713,15 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { var msg = intercept[SparkException] { sql(s"insert into t values($outOfRangeValue1)") }.getCause.getMessage - assert(msg.contains(s"""Casting ${outOfRangeValue1}L to "INT" causes overflow""")) + assert(msg.contains( + s"""The value ${outOfRangeValue1}L of the type "BIGINT" cannot be cast to "INT"""")) val outOfRangeValue2 = (Int.MinValue - 1L).toString msg = intercept[SparkException] { sql(s"insert into t values($outOfRangeValue2)") }.getCause.getMessage - assert(msg.contains(s"""Casting ${outOfRangeValue2}L to "INT" causes overflow""")) + assert(msg.contains( + s"""The value ${outOfRangeValue2}L of the type "BIGINT" cannot be cast to "INT"""")) } } } @@ -733,13 +735,15 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { var msg = intercept[SparkException] { sql(s"insert into t values(${outOfRangeValue1}D)") }.getCause.getMessage - assert(msg.contains(s"""Casting ${outOfRangeValue1}D to "BIGINT" causes overflow""")) + assert(msg.contains( + s"""The value ${outOfRangeValue1}D of the type "DOUBLE" cannot be cast to "BIGINT"""")) val outOfRangeValue2 = Math.nextDown(Long.MinValue) msg = intercept[SparkException] { sql(s"insert into t values(${outOfRangeValue2}D)") }.getCause.getMessage - assert(msg.contains(s"""Casting ${outOfRangeValue2}D to "BIGINT" causes overflow""")) + assert(msg.contains( + s"""The value ${outOfRangeValue2}D of the type "DOUBLE" cannot be cast to "BIGINT"""")) } } } From 69b7e1a11597446ea4607cdb578d5404750b0cea Mon Sep 17 00:00:00 2001 From: Jungtaek Lim Date: Thu, 19 May 2022 11:50:05 +0900 Subject: [PATCH 275/535] [SPARK-39219][DOC] Promote Structured Streaming over DStream ### What changes were proposed in this pull request? This PR proposes to add NOTE section for DStream guide doc to promote Structured Streaming. Screenshot: screenshot-spark-streaming-programming-guide-change ### Why are the changes needed? We see efforts of community are more focused on Structured Streaming (based on Spark SQL) than Spark Streaming (DStream). We would like to encourage end users to use Structured Streaming than Spark Streaming whenever possible for their workloads. ### Does this PR introduce _any_ user-facing change? Yes, doc change. ### How was this patch tested? N/A Closes #36590 from HeartSaVioR/SPARK-39219. Authored-by: Jungtaek Lim Signed-off-by: Jungtaek Lim (cherry picked from commit 7d153392db2f61104da0af1cb175f4ee7c7fbc38) Signed-off-by: Jungtaek Lim --- docs/streaming-programming-guide.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md index b13bf8dd2e7c2..4a104238a6d20 100644 --- a/docs/streaming-programming-guide.md +++ b/docs/streaming-programming-guide.md @@ -23,6 +23,14 @@ license: | * This will become a table of contents (this text will be scraped). {:toc} +# Note + +Spark Streaming is the previous generation of Spark’s streaming engine. There are no longer +updates to Spark Streaming and it’s a legacy project. There is a newer and easier to use +streaming engine in Spark called Structured Streaming. You should use Spark Structured Streaming +for your streaming applications and pipelines. See +[Structured Streaming Programming Guide](structured-streaming-programming-guide.html). + # Overview Spark Streaming is an extension of the core Spark API that enables scalable, high-throughput, fault-tolerant stream processing of live data streams. Data can be ingested from many sources From 77b131396b773748faceb20b5efa6c2d2f9a01ae Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 19 May 2022 13:40:16 +0800 Subject: [PATCH 276/535] [SPARK-39226][SQL] Fix the precision of the return type of round-like functions ### What changes were proposed in this pull request? Currently, the precision of the return type of round-like functions (round, bround, ceil, floor) with decimal inputs has a few problems: 1. It does not reserve one more digit in the integral part, in the case of rounding. As a result, `CEIL(CAST(99 AS DECIMAL(2, 0)), -1)` fails. 2. It should return more accurate precision, to count for the scale loose. For example, `CEIL(1.23456, 1)` does not need to keep the precision as 7 in the result. 3. `round`/`bround` with negative scale fails if the input is decimal type. This is not a bug but a little weird. This PR fixes these issues by correcting the formula of calculating the returned decimal type. ### Why are the changes needed? Fix bugs ### Does this PR introduce _any_ user-facing change? Yes, the new functions in 3.3:`ceil`/`floor` with scale parameter, can report a more accurate precision in the result type, and can run some certain queries which failed before. The old functions: `round` and `bround`, can support negative scale parameter with decimal inputs. ### How was this patch tested? new tests Closes #36598 from cloud-fan/follow. Authored-by: Wenchen Fan Signed-off-by: Gengliang Wang (cherry picked from commit ee0aecca05af9b0cb256fd81a78430958a09d19f) Signed-off-by: Gengliang Wang --- .../expressions/mathExpressions.scala | 54 ++++++++----------- .../expressions/MathExpressionsSuite.scala | 2 + .../inputs/ceil-floor-with-scale-param.sql | 4 ++ .../ceil-floor-with-scale-param.sql.out | 46 +++++++++++++--- .../results/postgreSQL/numeric.sql.out | 2 +- .../tpcds-query-results/v1_4/q2.sql.out | 2 +- .../apache/spark/sql/MathFunctionsSuite.scala | 4 +- 7 files changed, 72 insertions(+), 42 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala index d8a20f1a6c8b7..228b2a974e2aa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala @@ -317,21 +317,10 @@ object CeilExpressionBuilder extends CeilFloorExpressionBuilderBase { } case class RoundCeil(child: Expression, scale: Expression) - extends RoundBase(child, scale, BigDecimal.RoundingMode.CEILING, "ROUND_CEILING") - with ImplicitCastInputTypes { + extends RoundBase(child, scale, BigDecimal.RoundingMode.CEILING, "ROUND_CEILING") { override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType, IntegerType) - override lazy val dataType: DataType = child.dataType match { - case DecimalType.Fixed(p, s) => - if (_scale < 0) { - DecimalType(math.max(p, 1 - _scale), 0) - } else { - DecimalType(p, math.min(s, _scale)) - } - case t => t - } - override def nodeName: String = "ceil" override protected def withNewChildrenInternal( @@ -563,21 +552,10 @@ object FloorExpressionBuilder extends CeilFloorExpressionBuilderBase { } case class RoundFloor(child: Expression, scale: Expression) - extends RoundBase(child, scale, BigDecimal.RoundingMode.FLOOR, "ROUND_FLOOR") - with ImplicitCastInputTypes { + extends RoundBase(child, scale, BigDecimal.RoundingMode.FLOOR, "ROUND_FLOOR") { override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType, IntegerType) - override lazy val dataType: DataType = child.dataType match { - case DecimalType.Fixed(p, s) => - if (_scale < 0) { - DecimalType(math.max(p, 1 - _scale), 0) - } else { - DecimalType(p, math.min(s, _scale)) - } - case t => t - } - override def nodeName: String = "floor" override protected def withNewChildrenInternal( @@ -1447,9 +1425,21 @@ abstract class RoundBase(child: Expression, scale: Expression, override def foldable: Boolean = child.foldable override lazy val dataType: DataType = child.dataType match { - // if the new scale is bigger which means we are scaling up, - // keep the original scale as `Decimal` does - case DecimalType.Fixed(p, s) => DecimalType(p, if (_scale > s) s else _scale) + case DecimalType.Fixed(p, s) => + // After rounding we may need one more digit in the integral part, + // e.g. `ceil(9.9, 0)` -> `10`, `ceil(99, -1)` -> `100`. + val integralLeastNumDigits = p - s + 1 + if (_scale < 0) { + // negative scale means we need to adjust `-scale` number of digits before the decimal + // point, which means we need at lease `-scale + 1` digits (after rounding). + val newPrecision = math.max(integralLeastNumDigits, -_scale + 1) + // We have to accept the risk of overflow as we can't exceed the max precision. + DecimalType(math.min(newPrecision, DecimalType.MAX_PRECISION), 0) + } else { + val newScale = math.min(s, _scale) + // We have to accept the risk of overflow as we can't exceed the max precision. + DecimalType(math.min(integralLeastNumDigits + newScale, 38), newScale) + } case t => t } @@ -1616,13 +1606,14 @@ abstract class RoundBase(child: Expression, scale: Expression, Examples: > SELECT _FUNC_(2.5, 0); 3 + > SELECT _FUNC_(25, -1); + 30 """, since = "1.5.0", group = "math_funcs") // scalastyle:on line.size.limit case class Round(child: Expression, scale: Expression) - extends RoundBase(child, scale, BigDecimal.RoundingMode.HALF_UP, "ROUND_HALF_UP") - with Serializable with ImplicitCastInputTypes { + extends RoundBase(child, scale, BigDecimal.RoundingMode.HALF_UP, "ROUND_HALF_UP") { def this(child: Expression) = this(child, Literal(0)) override protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression): Round = copy(child = newLeft, scale = newRight) @@ -1640,13 +1631,14 @@ case class Round(child: Expression, scale: Expression) Examples: > SELECT _FUNC_(2.5, 0); 2 + > SELECT _FUNC_(25, -1); + 20 """, since = "2.0.0", group = "math_funcs") // scalastyle:on line.size.limit case class BRound(child: Expression, scale: Expression) - extends RoundBase(child, scale, BigDecimal.RoundingMode.HALF_EVEN, "ROUND_HALF_EVEN") - with Serializable with ImplicitCastInputTypes { + extends RoundBase(child, scale, BigDecimal.RoundingMode.HALF_EVEN, "ROUND_HALF_EVEN") { def this(child: Expression) = this(child, Literal(0)) override protected def withNewChildrenInternal( newLeft: Expression, newRight: Expression): BRound = copy(child = newLeft, scale = newRight) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala index 5281643b7b107..947250593a969 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala @@ -806,12 +806,14 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Round(-3.5, 0), -4.0) checkEvaluation(Round(-0.35, 1), -0.4) checkEvaluation(Round(-35, -1), -40) + checkEvaluation(Round(BigDecimal("45.00"), -1), BigDecimal(50)) checkEvaluation(BRound(2.5, 0), 2.0) checkEvaluation(BRound(3.5, 0), 4.0) checkEvaluation(BRound(-2.5, 0), -2.0) checkEvaluation(BRound(-3.5, 0), -4.0) checkEvaluation(BRound(-0.35, 1), -0.4) checkEvaluation(BRound(-35, -1), -40) + checkEvaluation(BRound(BigDecimal("45.00"), -1), BigDecimal(40)) checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(2.5), Literal(0))), Decimal(2)) checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(3.5), Literal(0))), Decimal(3)) checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(-2.5), Literal(0))), Decimal(-3L)) diff --git a/sql/core/src/test/resources/sql-tests/inputs/ceil-floor-with-scale-param.sql b/sql/core/src/test/resources/sql-tests/inputs/ceil-floor-with-scale-param.sql index 1baee30a8cf9a..c05429b3ef772 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/ceil-floor-with-scale-param.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/ceil-floor-with-scale-param.sql @@ -8,6 +8,8 @@ SELECT CEIL(-35, -1); SELECT CEIL(-0.1, 0); SELECT CEIL(5, 0); SELECT CEIL(3.14115, -3); +SELECT CEIL(9.9, 0); +SELECT CEIL(CAST(99 AS DECIMAL(2, 0)), -1); SELECT CEIL(2.5, null); SELECT CEIL(2.5, 'a'); SELECT CEIL(2.5, 0, 0); @@ -22,6 +24,8 @@ SELECT FLOOR(-35, -1); SELECT FLOOR(-0.1, 0); SELECT FLOOR(5, 0); SELECT FLOOR(3.14115, -3); +SELECT FLOOR(-9.9, 0); +SELECT FLOOR(CAST(-99 AS DECIMAL(2, 0)), -1); SELECT FLOOR(2.5, null); SELECT FLOOR(2.5, 'a'); SELECT FLOOR(2.5, 0, 0); \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/ceil-floor-with-scale-param.sql.out b/sql/core/src/test/resources/sql-tests/results/ceil-floor-with-scale-param.sql.out index 132bd96350fb1..84eb503c07b77 100644 --- a/sql/core/src/test/resources/sql-tests/results/ceil-floor-with-scale-param.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ceil-floor-with-scale-param.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 24 +-- Number of queries: 28 -- !query @@ -45,7 +45,7 @@ struct -- !query SELECT CEIL(-35, -1) -- !query schema -struct +struct -- !query output -30 @@ -61,7 +61,7 @@ struct -- !query SELECT CEIL(5, 0) -- !query schema -struct +struct -- !query output 5 @@ -69,11 +69,27 @@ struct -- !query SELECT CEIL(3.14115, -3) -- !query schema -struct +struct -- !query output 1000 +-- !query +SELECT CEIL(9.9, 0) +-- !query schema +struct +-- !query output +10 + + +-- !query +SELECT CEIL(CAST(99 AS DECIMAL(2, 0)), -1) +-- !query schema +struct +-- !query output +100 + + -- !query SELECT CEIL(2.5, null) -- !query schema @@ -144,7 +160,7 @@ struct -- !query SELECT FLOOR(-35, -1) -- !query schema -struct +struct -- !query output -40 @@ -160,7 +176,7 @@ struct -- !query SELECT FLOOR(5, 0) -- !query schema -struct +struct -- !query output 5 @@ -168,11 +184,27 @@ struct -- !query SELECT FLOOR(3.14115, -3) -- !query schema -struct +struct -- !query output 0 +-- !query +SELECT FLOOR(-9.9, 0) +-- !query schema +struct +-- !query output +-10 + + +-- !query +SELECT FLOOR(CAST(-99 AS DECIMAL(2, 0)), -1) +-- !query schema +struct +-- !query output +-100 + + -- !query SELECT FLOOR(2.5, null) -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out index 9a6cc7eac027b..955b38db944cc 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out @@ -4404,7 +4404,7 @@ struct<> -- !query SELECT a, ceil(a), ceiling(a), floor(a), round(a) FROM ceil_floor_round -- !query schema -struct +struct -- !query output -0.000001000000000000 0 0 -1 0 -5.499999000000000000 -5 -5 -6 -5 diff --git a/sql/core/src/test/resources/tpcds-query-results/v1_4/q2.sql.out b/sql/core/src/test/resources/tpcds-query-results/v1_4/q2.sql.out index 44e1f7bfef7fa..6590eda178569 100644 --- a/sql/core/src/test/resources/tpcds-query-results/v1_4/q2.sql.out +++ b/sql/core/src/test/resources/tpcds-query-results/v1_4/q2.sql.out @@ -1,7 +1,7 @@ -- Automatically generated by TPCDSQueryTestSuite -- !query schema -struct +struct -- !query output 5270 3.18 1.63 2.25 1.64 3.41 3.62 3.72 5270 3.18 1.63 2.25 1.64 3.41 3.62 3.72 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala index 1a00491ccb1b1..88071da293ae1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala @@ -208,7 +208,7 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { types.StructType(Seq(types.StructField("a", types.LongType)))) assert( spark.range(1).select(ceil(col("id"), lit(0)).alias("a")).schema == - types.StructType(Seq(types.StructField("a", types.DecimalType(20, 0))))) + types.StructType(Seq(types.StructField("a", types.DecimalType(21, 0))))) checkAnswer( sql("SELECT ceiling(0), ceiling(1), ceiling(1.5)"), Row(0L, 1L, 2L)) @@ -263,7 +263,7 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession { types.StructType(Seq(types.StructField("a", types.LongType)))) assert( spark.range(1).select(floor(col("id"), lit(0)).alias("a")).schema == - types.StructType(Seq(types.StructField("a", types.DecimalType(20, 0))))) + types.StructType(Seq(types.StructField("a", types.DecimalType(21, 0))))) } test("factorial") { From 669fc1b2c1cce7049a9f10e386ed1af050de3909 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 18 May 2022 23:37:25 -0700 Subject: [PATCH 277/535] [SPARK-39216][SQL] Do not collapse projects in CombineUnions if it hasCorrelatedSubquery ### What changes were proposed in this pull request? Makes `CombineUnions` do not collapse projects if it hasCorrelatedSubquery. For example: ```sql SELECT (SELECT IF(x, 1, 0)) AS a FROM (SELECT true) t(x) UNION SELECT 1 AS a ``` It will throw exception: ``` java.lang.IllegalStateException: Couldn't find x#4 in [] ``` ### Why are the changes needed? Fix bug. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #36595 from wangyum/SPARK-39216. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun (cherry picked from commit 85bb7bf008d0346feaedc2aab55857d8f1b19908) Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/optimizer/Optimizer.scala | 4 ++- .../org/apache/spark/sql/SQLQuerySuite.scala | 25 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 94e9d3cdd14f3..02f9a9eb01c07 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1340,7 +1340,9 @@ object CombineUnions extends Rule[LogicalPlan] { while (stack.nonEmpty) { stack.pop() match { case p1 @ Project(_, p2: Project) - if canCollapseExpressions(p1.projectList, p2.projectList, alwaysInline = false) => + if canCollapseExpressions(p1.projectList, p2.projectList, alwaysInline = false) && + !p1.projectList.exists(SubqueryExpression.hasCorrelatedSubquery) && + !p2.projectList.exists(SubqueryExpression.hasCorrelatedSubquery) => val newProjectList = buildCleanedProjectList(p1.projectList, p2.projectList) stack.pushAll(Seq(p2.copy(projectList = newProjectList))) case Distinct(Union(children, byName, allowMissingCol)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 919fe88ec4b32..0761f8e274999 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4478,6 +4478,31 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark )) } } + + test("SPARK-39216: Don't collapse projects in CombineUnions if it hasCorrelatedSubquery") { + checkAnswer( + sql( + """ + |SELECT (SELECT IF(x, 1, 0)) AS a + |FROM (SELECT true) t(x) + |UNION + |SELECT 1 AS a + """.stripMargin), + Seq(Row(1))) + + checkAnswer( + sql( + """ + |SELECT x + 1 + |FROM (SELECT id + | + (SELECT Max(id) + | FROM range(2)) AS x + | FROM range(1)) t + |UNION + |SELECT 1 AS a + """.stripMargin), + Seq(Row(2), Row(1))) + } } case class Foo(bar: Option[String]) From e088c820e1ee5736e130f5d7d1030990b0059141 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 19 May 2022 16:51:20 +0800 Subject: [PATCH 278/535] [SPARK-39212][SQL][3.3] Use double quotes for values of SQL configs/DS options in error messages ### What changes were proposed in this pull request? Wrap values of SQL configs and datasource options in error messages by double quotes. Added the `toDSOption()` method to `QueryErrorsBase` to quote DS options. This is a backport of https://github.com/apache/spark/pull/36579. ### Why are the changes needed? 1. To highlight SQL config/DS option values and make them more visible for users. 2. To be able to easily parse values from error text. 3. To be consistent to other outputs of identifiers, sql statement and etc. where Spark uses quotes or ticks. ### Does this PR introduce _any_ user-facing change? Yes, it changes user-facing error messages. ### How was this patch tested? By running the modified test suites: ``` $ build/sbt "testOnly *QueryCompilationErrorsSuite" $ build/sbt "testOnly *QueryExecutionAnsiErrorsSuite" $ build/sbt "testOnly *QueryExecutionErrorsSuite" ``` Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 96f4b7dbc1facd1a38be296263606aa312861c95) Signed-off-by: Max Gekk Closes #36600 from MaxGekk/move-ise-from-query-errors-3.3-2. Authored-by: Max Gekk Signed-off-by: Gengliang Wang --- .../main/resources/error/error-classes.json | 18 ++--- .../apache/spark/SparkThrowableSuite.scala | 4 +- .../spark/sql/errors/QueryErrorsBase.scala | 4 ++ .../sql/errors/QueryExecutionErrors.scala | 20 ++++-- .../sql-tests/results/ansi/array.sql.out | 24 +++---- .../sql-tests/results/ansi/cast.sql.out | 70 +++++++++---------- .../sql-tests/results/ansi/date.sql.out | 6 +- .../ansi/datetime-parsing-invalid.sql.out | 4 +- .../ansi/decimalArithmeticOperations.sql.out | 8 +-- .../sql-tests/results/ansi/interval.sql.out | 40 +++++------ .../sql-tests/results/ansi/map.sql.out | 8 +-- .../results/ansi/string-functions.sql.out | 8 +-- .../sql-tests/results/ansi/timestamp.sql.out | 2 +- .../sql-tests/results/interval.sql.out | 18 ++--- .../results/postgreSQL/boolean.sql.out | 32 ++++----- .../results/postgreSQL/float4.sql.out | 14 ++-- .../results/postgreSQL/float8.sql.out | 10 +-- .../sql-tests/results/postgreSQL/int4.sql.out | 12 ++-- .../sql-tests/results/postgreSQL/int8.sql.out | 22 +++--- .../results/postgreSQL/select_having.sql.out | 2 +- .../sql-tests/results/postgreSQL/text.sql.out | 4 +- .../results/postgreSQL/window_part2.sql.out | 6 +- .../results/postgreSQL/window_part3.sql.out | 2 +- .../results/postgreSQL/window_part4.sql.out | 2 +- .../timestampNTZ/timestamp-ansi.sql.out | 4 +- .../udf/postgreSQL/udf-select_having.sql.out | 2 +- .../errors/QueryExecutionErrorsSuite.scala | 12 ++-- 27 files changed, 184 insertions(+), 174 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 7fef9e563c247..e4ab3a7a353d3 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -4,7 +4,7 @@ "sqlState" : "42000" }, "ARITHMETIC_OVERFLOW" : { - "message" : [ ". If necessary set to false (except for ANSI interval type) to bypass this error." ], + "message" : [ ". If necessary set to \"false\" (except for ANSI interval type) to bypass this error." ], "sqlState" : "22003" }, "CANNOT_CAST_DATATYPE" : { @@ -12,7 +12,7 @@ "sqlState" : "22005" }, "CANNOT_CHANGE_DECIMAL_PRECISION" : { - "message" : [ " cannot be represented as Decimal(, ). If necessary set to false to bypass this error.
        " ], + "message" : [ " cannot be represented as Decimal(, ). If necessary set to \"false\" to bypass this error.
        " ], "sqlState" : "22005" }, "CANNOT_PARSE_DECIMAL" : { @@ -26,11 +26,11 @@ "message" : [ "Cannot use a mixture of aggregate function and group aggregate pandas UDF" ] }, "CAST_INVALID_INPUT" : { - "message" : [ "The value of the type cannot be cast to because it is malformed. To return NULL instead, use `try_cast`. If necessary set to false to bypass this error.
        " ], + "message" : [ "The value of the type cannot be cast to because it is malformed. To return NULL instead, use `try_cast`. If necessary set to \"false\" to bypass this error.
        " ], "sqlState" : "42000" }, "CAST_OVERFLOW" : { - "message" : [ "The value of the type cannot be cast to due to an overflow. To return NULL instead, use `try_cast`. If necessary set to false to bypass this error." ], + "message" : [ "The value of the type cannot be cast to due to an overflow. To return NULL instead, use `try_cast`. If necessary set to \"false\" to bypass this error." ], "sqlState" : "22005" }, "CONCURRENT_QUERY" : { @@ -41,7 +41,7 @@ "sqlState" : "22008" }, "DIVIDE_BY_ZERO" : { - "message" : [ "Division by zero. To return NULL instead, use `try_divide`. If necessary set to false (except for ANSI interval type) to bypass this error.
        " ], + "message" : [ "Division by zero. To return NULL instead, use `try_divide`. If necessary set to \"false\" (except for ANSI interval type) to bypass this error.
        " ], "sqlState" : "22012" }, "DUPLICATE_KEY" : { @@ -93,17 +93,17 @@ "message" : [ "" ] }, "INVALID_ARRAY_INDEX" : { - "message" : [ "The index is out of bounds. The array has elements. If necessary set to false to bypass this error." ] + "message" : [ "The index is out of bounds. The array has elements. If necessary set to \"false\" to bypass this error." ] }, "INVALID_ARRAY_INDEX_IN_ELEMENT_AT" : { - "message" : [ "The index is out of bounds. The array has elements. To return NULL instead, use `try_element_at`. If necessary set to false to bypass this error." ] + "message" : [ "The index is out of bounds. The array has elements. To return NULL instead, use `try_element_at`. If necessary set to \"false\" to bypass this error." ] }, "INVALID_FIELD_NAME" : { "message" : [ "Field name is invalid: is not a struct." ], "sqlState" : "42000" }, "INVALID_FRACTION_OF_SECOND" : { - "message" : [ "The fraction of sec must be zero. Valid range is [0, 60]. If necessary set to false to bypass this error. " ], + "message" : [ "The fraction of sec must be zero. Valid range is [0, 60]. If necessary set to \"false\" to bypass this error. " ], "sqlState" : "22023" }, "INVALID_JSON_SCHEMA_MAP_TYPE" : { @@ -118,7 +118,7 @@ "sqlState" : "42000" }, "MAP_KEY_DOES_NOT_EXIST" : { - "message" : [ "Key does not exist. To return NULL instead, use 'try_element_at'. If necessary set to false to bypass this error.
        " ] + "message" : [ "Key does not exist. To return NULL instead, use `try_element_at`. If necessary set to \"false\" to bypass this error.
        " ] }, "MISSING_COLUMN" : { "message" : [ "Column '' does not exist. Did you mean one of the following? []" ], diff --git a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala index acfe721914b48..208e278e2a178 100644 --- a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala @@ -125,8 +125,8 @@ class SparkThrowableSuite extends SparkFunSuite { // Does not fail with too many args (expects 0 args) assert(getMessage("DIVIDE_BY_ZERO", Array("foo", "bar", "baz")) == - "Division by zero. To return NULL instead, use `try_divide`. If necessary set foo to false " + - "(except for ANSI interval type) to bypass this error.bar") + "Division by zero. To return NULL instead, use `try_divide`. If necessary set foo " + + "to \"false\" (except for ANSI interval type) to bypass this error.bar") } test("Error message is formatted") { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala index d51ee13acef9a..89bc1039e7340 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala @@ -63,4 +63,8 @@ trait QueryErrorsBase { def toSQLConf(conf: String): String = { quoteByDefault(conf) } + + def toDSOption(option: String): String = { + quoteByDefault(option) + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index c350a6b28ba5e..9f07dd1ce1925 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -214,8 +214,12 @@ object QueryExecutionErrors extends QueryErrorsBase { } def mapKeyNotExistError(key: Any, dataType: DataType, context: String): NoSuchElementException = { - new SparkNoSuchElementException(errorClass = "MAP_KEY_DOES_NOT_EXIST", - messageParameters = Array(toSQLValue(key, dataType), SQLConf.ANSI_ENABLED.key, context)) + new SparkNoSuchElementException( + errorClass = "MAP_KEY_DOES_NOT_EXIST", + messageParameters = Array( + toSQLValue(key, dataType), + toSQLConf(SQLConf.ANSI_ENABLED.key), + context)) } def inputTypeUnsupportedError(dataType: DataType): Throwable = { @@ -578,6 +582,7 @@ object QueryExecutionErrors extends QueryErrorsBase { new IllegalStateException(s"unrecognized format $format") } + // scalastyle:off line.size.limit def sparkUpgradeInReadingDatesError( format: String, config: String, option: String): SparkUpgradeException = { new SparkUpgradeException( @@ -590,14 +595,15 @@ object QueryExecutionErrors extends QueryErrorsBase { |Spark 2.x or legacy versions of Hive, which uses a legacy hybrid calendar |that is different from Spark 3.0+'s Proleptic Gregorian calendar. |See more details in SPARK-31404. You can set the SQL config ${toSQLConf(config)} or - |the datasource option '$option' to 'LEGACY' to rebase the datetime values + |the datasource option ${toDSOption(option)} to "LEGACY" to rebase the datetime values |w.r.t. the calendar difference during reading. To read the datetime values - |as it is, set the SQL config ${toSQLConf(config)} or the datasource option '$option' - |to 'CORRECTED'. + |as it is, set the SQL config ${toSQLConf(config)} or the datasource option ${toDSOption(option)} + |to "CORRECTED". |""".stripMargin), cause = null ) } + // scalastyle:on line.size.limit def sparkUpgradeInWritingDatesError(format: String, config: String): SparkUpgradeException = { new SparkUpgradeException( @@ -609,9 +615,9 @@ object QueryExecutionErrors extends QueryErrorsBase { |into $format files can be dangerous, as the files may be read by Spark 2.x |or legacy versions of Hive later, which uses a legacy hybrid calendar that |is different from Spark 3.0+'s Proleptic Gregorian calendar. See more - |details in SPARK-31404. You can set ${toSQLConf(config)} to 'LEGACY' to rebase the + |details in SPARK-31404. You can set ${toSQLConf(config)} to "LEGACY" to rebase the |datetime values w.r.t. the calendar difference during writing, to get maximum - |interoperability. Or set ${toSQLConf(config)} to 'CORRECTED' to write the datetime + |interoperability. Or set ${toSQLConf(config)} to "CORRECTED" to write the datetime |values as it is, if you are 100% sure that the written files will only be read by |Spark 3.0+ or other systems that use Proleptic Gregorian calendar. |""".stripMargin), diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out index 64a7cc68b9c4b..25d2704c2c826 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out @@ -168,7 +168,7 @@ select element_at(array(1, 2, 3), 5) struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -The index 5 is out of bounds. The array has 3 elements. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The index 5 is out of bounds. The array has 3 elements. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -177,7 +177,7 @@ select element_at(array(1, 2, 3), -5) struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -The index -5 is out of bounds. The array has 3 elements. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The index -5 is out of bounds. The array has 3 elements. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -195,7 +195,7 @@ select elt(4, '123', '456') struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -The index 4 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The index 4 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -204,7 +204,7 @@ select elt(0, '123', '456') struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -The index 0 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The index 0 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -213,7 +213,7 @@ select elt(-1, '123', '456') struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -The index -1 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The index -1 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -254,7 +254,7 @@ select array(1, 2, 3)[5] struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -The index 5 is out of bounds. The array has 3 elements. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The index 5 is out of bounds. The array has 3 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -263,7 +263,7 @@ select array(1, 2, 3)[-1] struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -The index -1 is out of bounds. The array has 3 elements. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The index -1 is out of bounds. The array has 3 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -337,7 +337,7 @@ select element_at(array(1, 2, 3), 5) struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -The index 5 is out of bounds. The array has 3 elements. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The index 5 is out of bounds. The array has 3 elements. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -346,7 +346,7 @@ select element_at(array(1, 2, 3), -5) struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -The index -5 is out of bounds. The array has 3 elements. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The index -5 is out of bounds. The array has 3 elements. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -364,7 +364,7 @@ select elt(4, '123', '456') struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -The index 4 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The index 4 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -373,7 +373,7 @@ select elt(0, '123', '456') struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -The index 0 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The index 0 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -382,4 +382,4 @@ select elt(-1, '123', '456') struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -The index -1 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The index -1 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out index 1ca6c64b4e0d1..654433c0ca561 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out @@ -8,7 +8,7 @@ SELECT CAST('1.23' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1.23' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '1.23' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('1.23' AS int) ^^^^^^^^^^^^^^^^^^^ @@ -20,7 +20,7 @@ SELECT CAST('1.23' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1.23' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '1.23' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('1.23' AS long) ^^^^^^^^^^^^^^^^^^^^ @@ -32,7 +32,7 @@ SELECT CAST('-4.56' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '-4.56' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '-4.56' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('-4.56' AS int) ^^^^^^^^^^^^^^^^^^^^ @@ -44,7 +44,7 @@ SELECT CAST('-4.56' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '-4.56' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '-4.56' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('-4.56' AS long) ^^^^^^^^^^^^^^^^^^^^^ @@ -56,7 +56,7 @@ SELECT CAST('abc' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'abc' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'abc' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('abc' AS int) ^^^^^^^^^^^^^^^^^^ @@ -68,7 +68,7 @@ SELECT CAST('abc' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'abc' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'abc' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('abc' AS long) ^^^^^^^^^^^^^^^^^^^ @@ -80,7 +80,7 @@ SELECT CAST('abc' AS float) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'abc' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'abc' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('abc' AS float) ^^^^^^^^^^^^^^^^^^^^ @@ -92,7 +92,7 @@ SELECT CAST('abc' AS double) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'abc' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'abc' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('abc' AS double) ^^^^^^^^^^^^^^^^^^^^^ @@ -104,7 +104,7 @@ SELECT CAST('1234567890123' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1234567890123' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '1234567890123' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('1234567890123' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -116,7 +116,7 @@ SELECT CAST('12345678901234567890123' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '12345678901234567890123' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '12345678901234567890123' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('12345678901234567890123' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -128,7 +128,7 @@ SELECT CAST('' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('' AS int) ^^^^^^^^^^^^^^^ @@ -140,7 +140,7 @@ SELECT CAST('' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('' AS long) ^^^^^^^^^^^^^^^^ @@ -152,7 +152,7 @@ SELECT CAST('' AS float) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('' AS float) ^^^^^^^^^^^^^^^^^ @@ -164,7 +164,7 @@ SELECT CAST('' AS double) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('' AS double) ^^^^^^^^^^^^^^^^^^ @@ -192,7 +192,7 @@ SELECT CAST('123.a' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '123.a' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '123.a' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('123.a' AS int) ^^^^^^^^^^^^^^^^^^^^ @@ -204,7 +204,7 @@ SELECT CAST('123.a' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '123.a' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '123.a' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('123.a' AS long) ^^^^^^^^^^^^^^^^^^^^^ @@ -216,7 +216,7 @@ SELECT CAST('123.a' AS float) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '123.a' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '123.a' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('123.a' AS float) ^^^^^^^^^^^^^^^^^^^^^^ @@ -228,7 +228,7 @@ SELECT CAST('123.a' AS double) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '123.a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '123.a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('123.a' AS double) ^^^^^^^^^^^^^^^^^^^^^^^ @@ -248,7 +248,7 @@ SELECT CAST('-2147483649' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '-2147483649' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '-2147483649' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('-2147483649' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -268,7 +268,7 @@ SELECT CAST('2147483648' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '2147483648' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '2147483648' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('2147483648' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -288,7 +288,7 @@ SELECT CAST('-9223372036854775809' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '-9223372036854775809' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '-9223372036854775809' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('-9223372036854775809' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -308,7 +308,7 @@ SELECT CAST('9223372036854775808' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '9223372036854775808' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '9223372036854775808' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('9223372036854775808' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -567,7 +567,7 @@ select cast('1中文' as tinyint) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1中文' of the type "STRING" cannot be cast to "TINYINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '1中文' of the type "STRING" cannot be cast to "TINYINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('1中文' as tinyint) ^^^^^^^^^^^^^^^^^^^^^^ @@ -579,7 +579,7 @@ select cast('1中文' as smallint) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1中文' of the type "STRING" cannot be cast to "SMALLINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '1中文' of the type "STRING" cannot be cast to "SMALLINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('1中文' as smallint) ^^^^^^^^^^^^^^^^^^^^^^^ @@ -591,7 +591,7 @@ select cast('1中文' as INT) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1中文' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '1中文' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('1中文' as INT) ^^^^^^^^^^^^^^^^^^ @@ -603,7 +603,7 @@ select cast('中文1' as bigint) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '中文1' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '中文1' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('中文1' as bigint) ^^^^^^^^^^^^^^^^^^^^^ @@ -615,7 +615,7 @@ select cast('1中文' as bigint) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1中文' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '1中文' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('1中文' as bigint) ^^^^^^^^^^^^^^^^^^^^^ @@ -646,7 +646,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value ' - xyz ' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. + xyz ' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('\t\n xyz \t\r' as boolean) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -666,7 +666,7 @@ select cast('123.45' as decimal(4, 2)) struct<> -- !query output org.apache.spark.SparkArithmeticException -Decimal(expanded, 123.45, 5, 2) cannot be represented as Decimal(4, 2). If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +Decimal(expanded, 123.45, 5, 2) cannot be represented as Decimal(4, 2). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('123.45' as decimal(4, 2)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -678,7 +678,7 @@ select cast('xyz' as decimal(4, 2)) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'xyz' of the type "STRING" cannot be cast to "DECIMAL(4,2)" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'xyz' of the type "STRING" cannot be cast to "DECIMAL(4,2)" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('xyz' as decimal(4, 2)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -698,7 +698,7 @@ select cast('a' as date) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'a' of the type "STRING" cannot be cast to "DATE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DATE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('a' as date) ^^^^^^^^^^^^^^^^^ @@ -718,7 +718,7 @@ select cast('a' as timestamp) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('a' as timestamp) ^^^^^^^^^^^^^^^^^^^^^^ @@ -738,7 +738,7 @@ select cast('a' as timestamp_ntz) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP_NTZ" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP_NTZ" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('a' as timestamp_ntz) ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -750,7 +750,7 @@ select cast(cast('inf' as double) as timestamp) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast(cast('inf' as double) as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -762,7 +762,7 @@ select cast(cast('inf' as float) as timestamp) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast(cast('inf' as float) as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out index 844ecacc9aa06..2cf50284d6639 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out @@ -232,7 +232,7 @@ select next_day("xx", "Mon") struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'xx' of the type "STRING" cannot be cast to "DATE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'xx' of the type "STRING" cannot be cast to "DATE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select next_day("xx", "Mon") ^^^^^^^^^^^^^^^^^^^^^ @@ -327,7 +327,7 @@ select date_add('2011-11-11', '1.2') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select date_add('2011-11-11', '1.2') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -438,7 +438,7 @@ select date_sub(date'2011-11-11', '1.2') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select date_sub(date'2011-11-11', '1.2') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out index 1632ea0a239f2..d1eb604d4fcd2 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out @@ -242,7 +242,7 @@ select cast("Unparseable" as timestamp) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'Unparseable' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'Unparseable' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast("Unparseable" as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -254,7 +254,7 @@ select cast("Unparseable" as date) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'Unparseable' of the type "STRING" cannot be cast to "DATE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'Unparseable' of the type "STRING" cannot be cast to "DATE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast("Unparseable" as date) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out index 5a2f964a31f6b..5db875ff10ad9 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out @@ -76,7 +76,7 @@ select (5e36BD + 0.1) + 5e36BD struct<> -- !query output org.apache.spark.SparkArithmeticException -Decimal(expanded, 10000000000000000000000000000000000000.1, 39, 1) cannot be represented as Decimal(38, 1). If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +Decimal(expanded, 10000000000000000000000000000000000000.1, 39, 1) cannot be represented as Decimal(38, 1). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select (5e36BD + 0.1) + 5e36BD ^^^^^^^^^^^^^^^^^^^^^^^ @@ -88,7 +88,7 @@ select (-4e36BD - 0.1) - 7e36BD struct<> -- !query output org.apache.spark.SparkArithmeticException -Decimal(expanded, -11000000000000000000000000000000000000.1, 39, 1) cannot be represented as Decimal(38, 1). If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +Decimal(expanded, -11000000000000000000000000000000000000.1, 39, 1) cannot be represented as Decimal(38, 1). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select (-4e36BD - 0.1) - 7e36BD ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -100,7 +100,7 @@ select 12345678901234567890.0 * 12345678901234567890.0 struct<> -- !query output org.apache.spark.SparkArithmeticException -Decimal(expanded, 152415787532388367501905199875019052100, 39, 0) cannot be represented as Decimal(38, 2). If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +Decimal(expanded, 152415787532388367501905199875019052100, 39, 0) cannot be represented as Decimal(38, 2). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select 12345678901234567890.0 * 12345678901234567890.0 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -112,7 +112,7 @@ select 1e35BD / 0.1 struct<> -- !query output org.apache.spark.SparkArithmeticException -Decimal(expanded, 1000000000000000000000000000000000000, 37, 0) cannot be represented as Decimal(38, 6). If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +Decimal(expanded, 1000000000000000000000000000000000000, 37, 0) cannot be represented as Decimal(38, 6). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select 1e35BD / 0.1 ^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index ddb829f6b5f56..5d2ead16511f1 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -122,7 +122,7 @@ select interval 2 second * 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select interval 2 second * 'a' ^^^^^^^^^^^^^^^^^^^^^^^ @@ -134,7 +134,7 @@ select interval 2 second / 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select interval 2 second / 'a' ^^^^^^^^^^^^^^^^^^^^^^^ @@ -146,7 +146,7 @@ select interval 2 year * 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select interval 2 year * 'a' ^^^^^^^^^^^^^^^^^^^^^ @@ -158,7 +158,7 @@ select interval 2 year / 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select interval 2 year / 'a' ^^^^^^^^^^^^^^^^^^^^^ @@ -186,7 +186,7 @@ select 'a' * interval 2 second struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select 'a' * interval 2 second ^^^^^^^^^^^^^^^^^^^^^^^ @@ -198,7 +198,7 @@ select 'a' * interval 2 year struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select 'a' * interval 2 year ^^^^^^^^^^^^^^^^^^^^^ @@ -228,7 +228,7 @@ select interval '2 seconds' / 0 struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to false (except for ANSI interval type) to bypass this error. +Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == select interval '2 seconds' / 0 ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -264,7 +264,7 @@ select interval '2' year / 0 struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to false (except for ANSI interval type) to bypass this error. +Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == select interval '2' year / 0 ^^^^^^^^^^^^^^^^^^^^^ @@ -664,7 +664,7 @@ select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789) struct<> -- !query output org.apache.spark.SparkArithmeticException -Decimal(expanded, 1234567890123456789, 20, 0) cannot be represented as Decimal(18, 6). If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +Decimal(expanded, 1234567890123456789, 20, 0) cannot be represented as Decimal(18, 6). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1516,7 +1516,7 @@ select '4 11:11' - interval '4 22:12' day to minute struct<> -- !query output org.apache.spark.SparkDateTimeException -The value '4 11:11' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '4 11:11' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select '4 11:11' - interval '4 22:12' day to minute ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1528,7 +1528,7 @@ select '4 12:12:12' + interval '4 22:12' day to minute struct<> -- !query output org.apache.spark.SparkDateTimeException -The value '4 12:12:12' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '4 12:12:12' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select '4 12:12:12' + interval '4 22:12' day to minute ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1566,7 +1566,7 @@ select str - interval '4 22:12' day to minute from interval_view struct<> -- !query output org.apache.spark.SparkDateTimeException -The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select str - interval '4 22:12' day to minute from interval_view ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1578,7 +1578,7 @@ select str + interval '4 22:12' day to minute from interval_view struct<> -- !query output org.apache.spark.SparkDateTimeException -The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select str + interval '4 22:12' day to minute from interval_view ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1789,7 +1789,7 @@ select -(a) from values (interval '-2147483648 months', interval '2147483647 mon struct<> -- !query output org.apache.spark.SparkArithmeticException -integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -- !query @@ -1798,7 +1798,7 @@ select a - b from values (interval '-2147483648 months', interval '2147483647 mo struct<> -- !query output org.apache.spark.SparkArithmeticException -integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -- !query @@ -1807,7 +1807,7 @@ select b + interval '1 month' from values (interval '-2147483648 months', interv struct<> -- !query output org.apache.spark.SparkArithmeticException -integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -- !query @@ -2036,7 +2036,7 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1 struct<> -- !query output org.apache.spark.SparkArithmeticException -Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2048,7 +2048,7 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L struct<> -- !query output org.apache.spark.SparkArithmeticException -Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2094,7 +2094,7 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1 struct<> -- !query output org.apache.spark.SparkArithmeticException -Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2106,7 +2106,7 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L struct<> -- !query output org.apache.spark.SparkArithmeticException -Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out index eb2f305e4b01f..b54cc6d48bf38 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out @@ -8,7 +8,7 @@ select element_at(map(1, 'a', 2, 'b'), 5) struct<> -- !query output org.apache.spark.SparkNoSuchElementException -Key 5 does not exist. To return NULL instead, use 'try_element_at'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Key 5 does not exist. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select element_at(map(1, 'a', 2, 'b'), 5) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -20,7 +20,7 @@ select map(1, 'a', 2, 'b')[5] struct<> -- !query output org.apache.spark.SparkNoSuchElementException -Key 5 does not exist. To return NULL instead, use 'try_element_at'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Key 5 does not exist. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select map(1, 'a', 2, 'b')[5] ^^^^^^^^^^^^^^^^^^^^^^ @@ -114,7 +114,7 @@ select element_at(map(1, 'a', 2, 'b'), 5) struct<> -- !query output org.apache.spark.SparkNoSuchElementException -Key 5 does not exist. To return NULL instead, use 'try_element_at'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Key 5 does not exist. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select element_at(map(1, 'a', 2, 'b'), 5) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -126,7 +126,7 @@ select element_at(map('a', 1, 'b', 2), 'c') struct<> -- !query output org.apache.spark.SparkNoSuchElementException -Key 'c' does not exist. To return NULL instead, use 'try_element_at'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +Key 'c' does not exist. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select element_at(map('a', 1, 'b', 2), 'c') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index 409ef51edd570..ad388e211f588 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -82,7 +82,7 @@ select left("abcd", -2), left("abcd", 0), left("abcd", 'a') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 42) == ...t("abcd", -2), left("abcd", 0), left("abcd", 'a') ^^^^^^^^^^^^^^^^^ @@ -110,7 +110,7 @@ select right("abcd", -2), right("abcd", 0), right("abcd", 'a') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 44) == ...("abcd", -2), right("abcd", 0), right("abcd", 'a') ^^^^^^^^^^^^^^^^^^ @@ -419,7 +419,7 @@ SELECT lpad('hi', 'invalid_length') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT lpad('hi', 'invalid_length') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -431,7 +431,7 @@ SELECT rpad('hi', 'invalid_length') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT rpad('hi', 'invalid_length') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out index 7a76e36846076..368cab2eaeac3 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out @@ -98,7 +98,7 @@ SELECT make_timestamp(2021, 07, 11, 6, 30, 60.007) struct<> -- !query output org.apache.spark.SparkDateTimeException -The fraction of sec must be zero. Valid range is [0, 60]. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The fraction of sec must be zero. Valid range is [0, 60]. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 61b4f20e5fdbe..01cc7efb492a0 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -204,7 +204,7 @@ select interval '2 seconds' / 0 struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to false (except for ANSI interval type) to bypass this error. +Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == select interval '2 seconds' / 0 ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -240,7 +240,7 @@ select interval '2' year / 0 struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to false (except for ANSI interval type) to bypass this error. +Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == select interval '2' year / 0 ^^^^^^^^^^^^^^^^^^^^^ @@ -1745,7 +1745,7 @@ select -(a) from values (interval '-2147483648 months', interval '2147483647 mon struct<> -- !query output org.apache.spark.SparkArithmeticException -integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -- !query @@ -1754,7 +1754,7 @@ select a - b from values (interval '-2147483648 months', interval '2147483647 mo struct<> -- !query output org.apache.spark.SparkArithmeticException -integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -- !query @@ -1763,7 +1763,7 @@ select b + interval '1 month' from values (interval '-2147483648 months', interv struct<> -- !query output org.apache.spark.SparkArithmeticException -integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -- !query @@ -1992,7 +1992,7 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1 struct<> -- !query output org.apache.spark.SparkArithmeticException -Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2004,7 +2004,7 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L struct<> -- !query output org.apache.spark.SparkArithmeticException -Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2050,7 +2050,7 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1 struct<> -- !query output org.apache.spark.SparkArithmeticException -Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2062,7 +2062,7 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L struct<> -- !query output org.apache.spark.SparkArithmeticException -Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out index 45bbaa955b2b3..fe23273c4d9a9 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out @@ -56,7 +56,7 @@ SELECT boolean('test') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'test' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'test' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('test') AS error ^^^^^^^^^^^^^^^ @@ -76,7 +76,7 @@ SELECT boolean('foo') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'foo' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'foo' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('foo') AS error ^^^^^^^^^^^^^^ @@ -104,7 +104,7 @@ SELECT boolean('yeah') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'yeah' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'yeah' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('yeah') AS error ^^^^^^^^^^^^^^^ @@ -132,7 +132,7 @@ SELECT boolean('nay') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'nay' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'nay' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('nay') AS error ^^^^^^^^^^^^^^ @@ -144,7 +144,7 @@ SELECT boolean('on') AS true struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'on' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'on' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('on') AS true ^^^^^^^^^^^^^ @@ -156,7 +156,7 @@ SELECT boolean('off') AS `false` struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'off' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'off' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('off') AS `false` ^^^^^^^^^^^^^^ @@ -168,7 +168,7 @@ SELECT boolean('of') AS `false` struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'of' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'of' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('of') AS `false` ^^^^^^^^^^^^^ @@ -180,7 +180,7 @@ SELECT boolean('o') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'o' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'o' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('o') AS error ^^^^^^^^^^^^ @@ -192,7 +192,7 @@ SELECT boolean('on_') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'on_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'on_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('on_') AS error ^^^^^^^^^^^^^^ @@ -204,7 +204,7 @@ SELECT boolean('off_') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'off_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'off_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('off_') AS error ^^^^^^^^^^^^^^^ @@ -224,7 +224,7 @@ SELECT boolean('11') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value '11' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '11' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('11') AS error ^^^^^^^^^^^^^ @@ -244,7 +244,7 @@ SELECT boolean('000') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value '000' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '000' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('000') AS error ^^^^^^^^^^^^^^ @@ -256,7 +256,7 @@ SELECT boolean('') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('') AS error ^^^^^^^^^^^ @@ -365,7 +365,7 @@ SELECT boolean(string(' tru e ')) AS invalid struct<> -- !query output org.apache.spark.SparkRuntimeException -The value ' tru e ' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value ' tru e ' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean(string(' tru e ')) AS invalid ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -377,7 +377,7 @@ SELECT boolean(string('')) AS invalid struct<> -- !query output org.apache.spark.SparkRuntimeException -The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean(string('')) AS invalid ^^^^^^^^^^^^^^^^^^^ @@ -524,7 +524,7 @@ INSERT INTO BOOLTBL2 struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('XXX' AS BOOLEAN): The value 'XXX' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +failed to evaluate expression CAST('XXX' AS BOOLEAN): The value 'XXX' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 2, position 11) == VALUES (boolean('XXX')) ^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out index fa8736d089b35..a1399062419c9 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out @@ -96,7 +96,7 @@ SELECT float('N A N') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'N A N' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'N A N' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT float('N A N') ^^^^^^^^^^^^^^ @@ -108,7 +108,7 @@ SELECT float('NaN x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'NaN x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'NaN x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT float('NaN x') ^^^^^^^^^^^^^^ @@ -120,7 +120,7 @@ SELECT float(' INFINITY x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value ' INFINITY x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value ' INFINITY x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT float(' INFINITY x') ^^^^^^^^^^^^^^^^^^^^^^^ @@ -156,7 +156,7 @@ SELECT float(decimal('nan')) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 13) == SELECT float(decimal('nan')) ^^^^^^^^^^^^^^ @@ -340,7 +340,7 @@ SELECT int(float('2147483647')) struct<> -- !query output org.apache.spark.SparkArithmeticException -The value 2.14748365E9 of the type "FLOAT" cannot be cast to "INT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 2.14748365E9 of the type "FLOAT" cannot be cast to "INT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -357,7 +357,7 @@ SELECT int(float('-2147483900')) struct<> -- !query output org.apache.spark.SparkArithmeticException -The value -2.1474839E9 of the type "FLOAT" cannot be cast to "INT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value -2.1474839E9 of the type "FLOAT" cannot be cast to "INT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -390,7 +390,7 @@ SELECT bigint(float('-9223380000000000000')) struct<> -- !query output org.apache.spark.SparkArithmeticException -The value -9.22338E18 of the type "FLOAT" cannot be cast to "BIGINT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value -9.22338E18 of the type "FLOAT" cannot be cast to "BIGINT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out index f90d7c663e0f3..270332cd19664 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out @@ -128,7 +128,7 @@ SELECT double('N A N') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'N A N' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'N A N' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT double('N A N') ^^^^^^^^^^^^^^^ @@ -140,7 +140,7 @@ SELECT double('NaN x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'NaN x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'NaN x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT double('NaN x') ^^^^^^^^^^^^^^^ @@ -152,7 +152,7 @@ SELECT double(' INFINITY x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value ' INFINITY x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value ' INFINITY x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT double(' INFINITY x') ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -188,7 +188,7 @@ SELECT double(decimal('nan')) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 14) == SELECT double(decimal('nan')) ^^^^^^^^^^^^^^ @@ -845,7 +845,7 @@ SELECT bigint(double('-9223372036854780000')) struct<> -- !query output org.apache.spark.SparkArithmeticException -The value -9.22337203685478E18D of the type "DOUBLE" cannot be cast to "BIGINT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value -9.22337203685478E18D of the type "DOUBLE" cannot be cast to "BIGINT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out index 144a01511f271..265aaa7ce2af9 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out @@ -200,7 +200,7 @@ SELECT '' AS five, i.f1, i.f1 * smallint('2') AS x FROM INT4_TBL i struct<> -- !query output org.apache.spark.SparkArithmeticException -integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 25) == SELECT '' AS five, i.f1, i.f1 * smallint('2') AS x FROM INT4_TBL i ^^^^^^^^^^^^^^^^^^^^ @@ -223,7 +223,7 @@ SELECT '' AS five, i.f1, i.f1 * int('2') AS x FROM INT4_TBL i struct<> -- !query output org.apache.spark.SparkArithmeticException -integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 25) == SELECT '' AS five, i.f1, i.f1 * int('2') AS x FROM INT4_TBL i ^^^^^^^^^^^^^^^ @@ -246,7 +246,7 @@ SELECT '' AS five, i.f1, i.f1 + smallint('2') AS x FROM INT4_TBL i struct<> -- !query output org.apache.spark.SparkArithmeticException -integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 25) == SELECT '' AS five, i.f1, i.f1 + smallint('2') AS x FROM INT4_TBL i ^^^^^^^^^^^^^^^^^^^^ @@ -270,7 +270,7 @@ SELECT '' AS five, i.f1, i.f1 + int('2') AS x FROM INT4_TBL i struct<> -- !query output org.apache.spark.SparkArithmeticException -integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 25) == SELECT '' AS five, i.f1, i.f1 + int('2') AS x FROM INT4_TBL i ^^^^^^^^^^^^^^^ @@ -294,7 +294,7 @@ SELECT '' AS five, i.f1, i.f1 - smallint('2') AS x FROM INT4_TBL i struct<> -- !query output org.apache.spark.SparkArithmeticException -integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 25) == SELECT '' AS five, i.f1, i.f1 - smallint('2') AS x FROM INT4_TBL i ^^^^^^^^^^^^^^^^^^^^ @@ -318,7 +318,7 @@ SELECT '' AS five, i.f1, i.f1 - int('2') AS x FROM INT4_TBL i struct<> -- !query output org.apache.spark.SparkArithmeticException -integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 25) == SELECT '' AS five, i.f1, i.f1 - int('2') AS x FROM INT4_TBL i ^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out index bbdee119a51bf..7761127d7b5ad 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out @@ -392,7 +392,7 @@ SELECT '' AS three, q1, q2, q1 * q2 AS multiply FROM INT8_TBL struct<> -- !query output org.apache.spark.SparkArithmeticException -long overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +long overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 28) == SELECT '' AS three, q1, q2, q1 * q2 AS multiply FROM INT8_TBL ^^^^^^^ @@ -575,7 +575,7 @@ select bigint('9223372036854775800') / bigint('0') struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to false (except for ANSI interval type) to bypass this error. +Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == select bigint('9223372036854775800') / bigint('0') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -587,7 +587,7 @@ select bigint('-9223372036854775808') / smallint('0') struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to false (except for ANSI interval type) to bypass this error. +Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == select bigint('-9223372036854775808') / smallint('0') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -599,7 +599,7 @@ select smallint('100') / bigint('0') struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to false (except for ANSI interval type) to bypass this error. +Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == select smallint('100') / bigint('0') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -619,7 +619,7 @@ SELECT CAST(q1 AS int) FROM int8_tbl WHERE q2 <> 456 struct<> -- !query output org.apache.spark.SparkArithmeticException -The value 4567890123456789L of the type "BIGINT" cannot be cast to "INT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 4567890123456789L of the type "BIGINT" cannot be cast to "INT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -636,7 +636,7 @@ SELECT CAST(q1 AS smallint) FROM int8_tbl WHERE q2 <> 456 struct<> -- !query output org.apache.spark.SparkArithmeticException -The value 4567890123456789L of the type "BIGINT" cannot be cast to "SMALLINT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 4567890123456789L of the type "BIGINT" cannot be cast to "SMALLINT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -673,7 +673,7 @@ SELECT CAST(double('922337203685477580700.0') AS bigint) struct<> -- !query output org.apache.spark.SparkArithmeticException -The value 9.223372036854776E20D of the type "DOUBLE" cannot be cast to "BIGINT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 9.223372036854776E20D of the type "DOUBLE" cannot be cast to "BIGINT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -745,7 +745,7 @@ SELECT string(int(shiftleft(bigint(-1), 63))+1) struct<> -- !query output org.apache.spark.SparkArithmeticException -The value -9223372036854775808L of the type "BIGINT" cannot be cast to "INT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value -9223372036854775808L of the type "BIGINT" cannot be cast to "INT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -754,7 +754,7 @@ SELECT bigint((-9223372036854775808)) * bigint((-1)) struct<> -- !query output org.apache.spark.SparkArithmeticException -long overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +long overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT bigint((-9223372036854775808)) * bigint((-1)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -782,7 +782,7 @@ SELECT bigint((-9223372036854775808)) * int((-1)) struct<> -- !query output org.apache.spark.SparkArithmeticException -long overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +long overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT bigint((-9223372036854775808)) * int((-1)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -810,7 +810,7 @@ SELECT bigint((-9223372036854775808)) * smallint((-1)) struct<> -- !query output org.apache.spark.SparkArithmeticException -long overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +long overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 7) == SELECT bigint((-9223372036854775808)) * smallint((-1)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out index d91adc7ed244f..7a9f4ae055cc2 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out @@ -177,7 +177,7 @@ SELECT 1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2 struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to false (except for ANSI interval type) to bypass this error. +Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 39) == ...1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2 ^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out index 39e2603fd265b..ed218c1a52c3c 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out @@ -65,7 +65,7 @@ select string('four: ') || 2+2 struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select string('four: ') || 2+2 ^^^^^^^^^^^^^^^^^^^^^^^ @@ -77,7 +77,7 @@ select 'four: ' || 2+2 struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select 'four: ' || 2+2 ^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out index 2c335898d65bc..58633790cf793 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out @@ -225,7 +225,7 @@ from range(9223372036854775804, 9223372036854775807) x struct<> -- !query output org.apache.spark.SparkArithmeticException -long overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +long overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -- !query @@ -235,7 +235,7 @@ from range(-9223372036854775806, -9223372036854775805) x struct<> -- !query output org.apache.spark.SparkArithmeticException -long overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +long overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -- !query @@ -462,7 +462,7 @@ window w as (order by f_numeric range between struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'NaN' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value 'NaN' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 3, position 12) == window w as (order by f_numeric range between ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out index 418c997361183..68f9d532a1cd5 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out @@ -72,7 +72,7 @@ insert into datetimes values struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('11:00 BST' AS TIMESTAMP): The value '11:00 BST' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +failed to evaluate expression CAST('11:00 BST' AS TIMESTAMP): The value '11:00 BST' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 2, position 23) == (1, timestamp '11:00', cast ('11:00 BST' as timestamp), cast ('1 year' as timestamp), ... ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out index 3e30ad2694100..f3f4a448df69c 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out @@ -501,7 +501,7 @@ FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b) struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('nan' AS INT): The value 'nan' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +failed to evaluate expression CAST('nan' AS INT): The value 'nan' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 3, position 28) == FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b) ^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out index e6f62b679c638..e374f92c74e93 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out @@ -98,7 +98,7 @@ SELECT make_timestamp(2021, 07, 11, 6, 30, 60.007) struct<> -- !query output org.apache.spark.SparkDateTimeException -The fraction of sec must be zero. Valid range is [0, 60]. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The fraction of sec must be zero. Valid range is [0, 60]. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -332,7 +332,7 @@ select to_timestamp(1) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value '1' of the type "STRING" cannot be cast to "TIMESTAMP_NTZ" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +The value '1' of the type "STRING" cannot be cast to "TIMESTAMP_NTZ" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out index dfb287ff023f5..f16a680cfbcc0 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out @@ -177,7 +177,7 @@ SELECT 1 AS one FROM test_having WHERE 1/udf(a) = 1 HAVING 1 < 2 struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to false (except for ANSI interval type) to bypass this error. +Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 39) == ...1 AS one FROM test_having WHERE 1/udf(a) = 1 HAVING 1 < 2 ^^^^^^^^ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala index 96a29f6dab6f5..e6ce1d7008039 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala @@ -181,7 +181,7 @@ class QueryExecutionErrorsSuite extends QueryTest val format = "Parquet" val config = "\"" + SQLConf.PARQUET_REBASE_MODE_IN_READ.key + "\"" - val option = "datetimeRebaseMode" + val option = "\"" + "datetimeRebaseMode" + "\"" assert(e.getErrorClass === "INCONSISTENT_BEHAVIOR_CROSS_VERSION") assert(e.getMessage === "You may get a different result due to the upgrading to Spark >= 3.0: " + @@ -191,10 +191,10 @@ class QueryExecutionErrorsSuite extends QueryTest |Spark 2.x or legacy versions of Hive, which uses a legacy hybrid calendar |that is different from Spark 3.0+'s Proleptic Gregorian calendar. |See more details in SPARK-31404. You can set the SQL config $config or - |the datasource option '$option' to 'LEGACY' to rebase the datetime values + |the datasource option $option to "LEGACY" to rebase the datetime values |w.r.t. the calendar difference during reading. To read the datetime values - |as it is, set the SQL config $config or the datasource option '$option' - |to 'CORRECTED'. + |as it is, set the SQL config $config or the datasource option $option + |to "CORRECTED". |""".stripMargin) } @@ -216,9 +216,9 @@ class QueryExecutionErrorsSuite extends QueryTest |into $format files can be dangerous, as the files may be read by Spark 2.x |or legacy versions of Hive later, which uses a legacy hybrid calendar that |is different from Spark 3.0+'s Proleptic Gregorian calendar. See more - |details in SPARK-31404. You can set $config to 'LEGACY' to rebase the + |details in SPARK-31404. You can set $config to "LEGACY" to rebase the |datetime values w.r.t. the calendar difference during writing, to get maximum - |interoperability. Or set $config to 'CORRECTED' to write the datetime + |interoperability. Or set $config to "CORRECTED" to write the datetime |values as it is, if you are 100% sure that the written files will only be read by |Spark 3.0+ or other systems that use Proleptic Gregorian calendar. |""".stripMargin) From 88c076de9d042d1e57b324634a913ab7dfdc1db3 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 19 May 2022 20:02:44 +0800 Subject: [PATCH 279/535] [SPARK-39229][SQL][3.3] Separate query contexts from error-classes.json ### What changes were proposed in this pull request? Separate query contexts for runtime errors from error-classes.json. ### Why are the changes needed? The message is JSON should only contain parameters explicitly thrown. It is more elegant to separate query contexts from error-classes.json. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UT Closes #36607 from gengliangwang/SPARK-39229-3.3. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang --- .../spark/memory/SparkOutOfMemoryError.java | 2 +- .../main/resources/error/error-classes.json | 10 +++--- .../scala/org/apache/spark/ErrorInfo.scala | 7 ++-- .../org/apache/spark/SparkException.scala | 34 +++++++++++++------ .../apache/spark/SparkThrowableSuite.scala | 2 +- .../sql/errors/QueryExecutionErrors.scala | 29 +++++++++------- 6 files changed, 52 insertions(+), 32 deletions(-) diff --git a/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java b/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java index 88eada34e3c18..7c992c80f4641 100644 --- a/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java +++ b/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java @@ -39,7 +39,7 @@ public SparkOutOfMemoryError(OutOfMemoryError e) { } public SparkOutOfMemoryError(String errorClass, String[] messageParameters) { - super(SparkThrowableHelper.getMessage(errorClass, messageParameters)); + super(SparkThrowableHelper.getMessage(errorClass, messageParameters, "")); this.errorClass = errorClass; this.messageParameters = messageParameters; } diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index e4ab3a7a353d3..60a432163b5a9 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -4,7 +4,7 @@ "sqlState" : "42000" }, "ARITHMETIC_OVERFLOW" : { - "message" : [ ". If necessary set to \"false\" (except for ANSI interval type) to bypass this error." ], + "message" : [ ". If necessary set to \"false\" (except for ANSI interval type) to bypass this error." ], "sqlState" : "22003" }, "CANNOT_CAST_DATATYPE" : { @@ -12,7 +12,7 @@ "sqlState" : "22005" }, "CANNOT_CHANGE_DECIMAL_PRECISION" : { - "message" : [ " cannot be represented as Decimal(, ). If necessary set to \"false\" to bypass this error.
        " ], + "message" : [ " cannot be represented as Decimal(, ). If necessary set to \"false\" to bypass this error." ], "sqlState" : "22005" }, "CANNOT_PARSE_DECIMAL" : { @@ -26,7 +26,7 @@ "message" : [ "Cannot use a mixture of aggregate function and group aggregate pandas UDF" ] }, "CAST_INVALID_INPUT" : { - "message" : [ "The value of the type cannot be cast to because it is malformed. To return NULL instead, use `try_cast`. If necessary set to \"false\" to bypass this error.
        " ], + "message" : [ "The value of the type cannot be cast to because it is malformed. To return NULL instead, use `try_cast`. If necessary set to \"false\" to bypass this error." ], "sqlState" : "42000" }, "CAST_OVERFLOW" : { @@ -41,7 +41,7 @@ "sqlState" : "22008" }, "DIVIDE_BY_ZERO" : { - "message" : [ "Division by zero. To return NULL instead, use `try_divide`. If necessary set to \"false\" (except for ANSI interval type) to bypass this error.
        " ], + "message" : [ "Division by zero. To return NULL instead, use `try_divide`. If necessary set to \"false\" (except for ANSI interval type) to bypass this error." ], "sqlState" : "22012" }, "DUPLICATE_KEY" : { @@ -118,7 +118,7 @@ "sqlState" : "42000" }, "MAP_KEY_DOES_NOT_EXIST" : { - "message" : [ "Key does not exist. To return NULL instead, use `try_element_at`. If necessary set to \"false\" to bypass this error.
        " ] + "message" : [ "Key does not exist. To return NULL instead, use `try_element_at`. If necessary set to \"false\" to bypass this error." ] }, "MISSING_COLUMN" : { "message" : [ "Column '' does not exist. Did you mean one of the following? []" ], diff --git a/core/src/main/scala/org/apache/spark/ErrorInfo.scala b/core/src/main/scala/org/apache/spark/ErrorInfo.scala index 6cb8f4d8ed33d..41c0e83f917b4 100644 --- a/core/src/main/scala/org/apache/spark/ErrorInfo.scala +++ b/core/src/main/scala/org/apache/spark/ErrorInfo.scala @@ -55,11 +55,14 @@ private[spark] object SparkThrowableHelper { mapper.readValue(errorClassesUrl, new TypeReference[SortedMap[String, ErrorInfo]]() {}) } - def getMessage(errorClass: String, messageParameters: Array[String]): String = { + def getMessage( + errorClass: String, + messageParameters: Array[String], + queryContext: String = ""): String = { val errorInfo = errorClassToInfoMap.getOrElse(errorClass, throw new IllegalArgumentException(s"Cannot find error class '$errorClass'")) String.format(errorInfo.messageFormat.replaceAll("<[a-zA-Z0-9_-]+>", "%s"), - messageParameters: _*) + messageParameters: _*) + queryContext } def getSqlState(errorClass: String): String = { diff --git a/core/src/main/scala/org/apache/spark/SparkException.scala b/core/src/main/scala/org/apache/spark/SparkException.scala index ed6e811a4cc26..2483015f4a968 100644 --- a/core/src/main/scala/org/apache/spark/SparkException.scala +++ b/core/src/main/scala/org/apache/spark/SparkException.scala @@ -91,8 +91,12 @@ private[spark] class SparkUpgradeException( /** * Arithmetic exception thrown from Spark with an error class. */ -private[spark] class SparkArithmeticException(errorClass: String, messageParameters: Array[String]) - extends ArithmeticException(SparkThrowableHelper.getMessage(errorClass, messageParameters)) +private[spark] class SparkArithmeticException( + errorClass: String, + messageParameters: Array[String], + queryContext: String = "") + extends ArithmeticException( + SparkThrowableHelper.getMessage(errorClass, messageParameters, queryContext)) with SparkThrowable { override def getErrorClass: String = errorClass @@ -139,9 +143,13 @@ private[spark] class SparkConcurrentModificationException( /** * Datetime exception thrown from Spark with an error class. */ -private[spark] class SparkDateTimeException(errorClass: String, messageParameters: Array[String]) +private[spark] class SparkDateTimeException( + errorClass: String, + messageParameters: Array[String], + queryContext: String = "") extends DateTimeException( - SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable { + SparkThrowableHelper.getMessage(errorClass, messageParameters, queryContext)) + with SparkThrowable { override def getErrorClass: String = errorClass } @@ -175,9 +183,11 @@ private[spark] class SparkFileNotFoundException( */ private[spark] class SparkNumberFormatException( errorClass: String, - messageParameters: Array[String]) + messageParameters: Array[String], + queryContext: String) extends NumberFormatException( - SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable { + SparkThrowableHelper.getMessage(errorClass, messageParameters, queryContext)) + with SparkThrowable { override def getErrorClass: String = errorClass } @@ -233,9 +243,11 @@ private[spark] class SparkIOException( private[spark] class SparkRuntimeException( errorClass: String, messageParameters: Array[String], - cause: Throwable = null) + cause: Throwable = null, + queryContext: String = "") extends RuntimeException( - SparkThrowableHelper.getMessage(errorClass, messageParameters), cause) with SparkThrowable { + SparkThrowableHelper.getMessage(errorClass, messageParameters, queryContext), cause) + with SparkThrowable { override def getErrorClass: String = errorClass } @@ -281,9 +293,11 @@ private[spark] class SparkSQLException( */ private[spark] class SparkNoSuchElementException( errorClass: String, - messageParameters: Array[String]) + messageParameters: Array[String], + queryContext: String) extends NoSuchElementException( - SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable { + SparkThrowableHelper.getMessage(errorClass, messageParameters, queryContext)) + with SparkThrowable { override def getErrorClass: String = errorClass } diff --git a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala index 208e278e2a178..73135d0f1c790 100644 --- a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala @@ -126,7 +126,7 @@ class SparkThrowableSuite extends SparkFunSuite { // Does not fail with too many args (expects 0 args) assert(getMessage("DIVIDE_BY_ZERO", Array("foo", "bar", "baz")) == "Division by zero. To return NULL instead, use `try_divide`. If necessary set foo " + - "to \"false\" (except for ANSI interval type) to bypass this error.bar") + "to \"false\" (except for ANSI interval type) to bypass this error.") } test("Error message is formatted") { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 9f07dd1ce1925..487be632f62db 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -104,8 +104,8 @@ object QueryExecutionErrors extends QueryErrorsBase { value.toDebugString, decimalPrecision.toString, decimalScale.toString, - toSQLConf(SQLConf.ANSI_ENABLED.key), - context)) + toSQLConf(SQLConf.ANSI_ENABLED.key)), + queryContext = context) } def invalidInputInCastToDatetimeError( @@ -119,8 +119,8 @@ object QueryExecutionErrors extends QueryErrorsBase { toSQLValue(value, from), toSQLType(from), toSQLType(to), - toSQLConf(SQLConf.ANSI_ENABLED.key), - errorContext)) + toSQLConf(SQLConf.ANSI_ENABLED.key)), + queryContext = errorContext) } def invalidInputSyntaxForBooleanError( @@ -132,8 +132,8 @@ object QueryExecutionErrors extends QueryErrorsBase { toSQLValue(s, StringType), toSQLType(StringType), toSQLType(BooleanType), - toSQLConf(SQLConf.ANSI_ENABLED.key), - errorContext)) + toSQLConf(SQLConf.ANSI_ENABLED.key)), + queryContext = errorContext) } def invalidInputInCastToNumberError( @@ -146,8 +146,8 @@ object QueryExecutionErrors extends QueryErrorsBase { toSQLValue(s, StringType), toSQLType(StringType), toSQLType(to), - toSQLConf(SQLConf.ANSI_ENABLED.key), - errorContext)) + toSQLConf(SQLConf.ANSI_ENABLED.key)), + queryContext = errorContext) } def cannotCastFromNullTypeError(to: DataType): Throwable = { @@ -180,7 +180,8 @@ object QueryExecutionErrors extends QueryErrorsBase { def divideByZeroError(context: String): ArithmeticException = { new SparkArithmeticException( errorClass = "DIVIDE_BY_ZERO", - messageParameters = Array(toSQLConf(SQLConf.ANSI_ENABLED.key), context)) + messageParameters = Array(toSQLConf(SQLConf.ANSI_ENABLED.key)), + queryContext = context) } def invalidArrayIndexError(index: Int, numElements: Int): ArrayIndexOutOfBoundsException = { @@ -218,8 +219,8 @@ object QueryExecutionErrors extends QueryErrorsBase { errorClass = "MAP_KEY_DOES_NOT_EXIST", messageParameters = Array( toSQLValue(key, dataType), - toSQLConf(SQLConf.ANSI_ENABLED.key), - context)) + toSQLConf(SQLConf.ANSI_ENABLED.key)), + queryContext = context) } def inputTypeUnsupportedError(dataType: DataType): Throwable = { @@ -493,8 +494,10 @@ object QueryExecutionErrors extends QueryErrorsBase { hint: String = "", errorContext: String = ""): ArithmeticException = { val alternative = if (hint.nonEmpty) s" To return NULL instead, use '$hint'." else "" - new SparkArithmeticException("ARITHMETIC_OVERFLOW", - Array(message, alternative, SQLConf.ANSI_ENABLED.key, errorContext)) + new SparkArithmeticException( + errorClass = "ARITHMETIC_OVERFLOW", + messageParameters = Array(message, alternative, SQLConf.ANSI_ENABLED.key), + queryContext = errorContext) } def unaryMinusCauseOverflowError(originValue: Int): ArithmeticException = { From 24a3fa95a384159004d45cf2c01a699f1c2e55f7 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 19 May 2022 20:34:50 +0800 Subject: [PATCH 280/535] [SPARK-39233][SQL] Remove the check for TimestampNTZ output in Analyzer ### What changes were proposed in this pull request? In [#36094](https://github.com/apache/spark/pull/36094), a check for failing TimestampNTZ type output(since we are disabling TimestampNTZ in 3.3) is added: ``` case operator: LogicalPlan if !Utils.isTesting && operator.output.exists(attr => attr.resolved && attr.dataType.isInstanceOf[TimestampNTZType]) => operator.failAnalysis("TimestampNTZ type is not supported in Spark 3.3.") ``` However, the check can cause misleading error messages. In 3.3: ``` > sql( "select date '2018-11-17' > 1").show() org.apache.spark.sql.AnalysisException: Invalid call to toAttribute on unresolved object; 'Project [unresolvedalias((2018-11-17 > 1), None)] +- OneRowRelation at org.apache.spark.sql.catalyst.analysis.UnresolvedAlias.toAttribute(unresolved.scala:510) at org.apache.spark.sql.catalyst.plans.logical.Project.$anonfun$output$1(basicLogicalOperators.scala:70) ``` In master or 3.2 ``` > sql( "select date '2018-11-17' > 1").show() org.apache.spark.sql.AnalysisException: cannot resolve '(DATE '2018-11-17' > 1)' due to data type mismatch: differing types in '(DATE '2018-11-17' > 1)' (date and int).; line 1 pos 7; 'Project [unresolvedalias((2018-11-17 > 1), None)] +- OneRowRelation at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42) ``` We should just remove the check to avoid such regression. It's not necessary for disabling TimestampNTZ anyway. ### Why are the changes needed? Fix regression in the error output of analysis check. ### Does this PR introduce _any_ user-facing change? No, it is not released yet. ### How was this patch tested? Build and try on `spark-shell` Closes #36609 from gengliangwang/fixRegression. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang --- .../apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 6 ------ 1 file changed, 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index f89fbe59af62a..b9f3b3b824bf8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -32,7 +32,6 @@ import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.sql.util.SchemaUtils -import org.apache.spark.util.Utils /** * Throws user facing errors when passed invalid queries that fail to analyze. @@ -160,11 +159,6 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { case _: ShowTableExtended => throw QueryCompilationErrors.commandUnsupportedInV2TableError("SHOW TABLE EXTENDED") - case operator: LogicalPlan - if !Utils.isTesting && operator.output.exists(attr => - attr.resolved && attr.dataType.isInstanceOf[TimestampNTZType]) => - operator.failAnalysis("TimestampNTZ type is not supported in Spark 3.3.") - case operator: LogicalPlan => // Check argument data types of higher-order functions downwards first. // If the arguments of the higher-order functions are resolved but the type check fails, From 2977791ca7974aaacdf02f9c7b4f7bd83a8c2628 Mon Sep 17 00:00:00 2001 From: minyyy Date: Thu, 19 May 2022 22:52:11 +0800 Subject: [PATCH 281/535] [SPARK-38529][SQL] Prevent GeneratorNestedColumnAliasing to be applied to non-Explode generators ### What changes were proposed in this pull request? 1. Explicitly return in GeneratorNestedColumnAliasing when the generator is not Explode. 2. Add extensive comment to GeneratorNestedColumnAliasing. 3. An off-hand code refactor to make the code clearer. ### Why are the changes needed? GeneratorNestedColumnAliasing does not handle other generators correctly. We only try to rewrite the generator for Explode but try to rewrite all ExtractValue expressions. This can cause inconsistency for non-Explode generators. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit tests. Closes #35850 from minyyy/gnca_non_explode. Authored-by: minyyy Signed-off-by: Wenchen Fan (cherry picked from commit 026102489b8edce827a05a1dba3b0ef8687f134f) Signed-off-by: Wenchen Fan --- .../optimizer/NestedColumnAliasing.scala | 36 +++++++++++++++++++ .../optimizer/NestedColumnAliasingSuite.scala | 12 +++++++ 2 files changed, 48 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala index 45f84c21b7d66..6ba7907fdab4f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala @@ -342,6 +342,38 @@ object GeneratorNestedColumnAliasing { // need to prune nested columns through Project and under Generate. The difference is // when `nestedSchemaPruningEnabled` is on, nested columns will be pruned further at // file format readers if it is supported. + + // There are [[ExtractValue]] expressions on or not on the output of the generator. Generator + // can also have different types: + // 1. For [[ExtractValue]]s not on the output of the generator, theoretically speaking, there + // lots of expressions that we can push down, including non ExtractValues and GetArrayItem + // and GetMapValue. But to be safe, we only handle GetStructField and GetArrayStructFields. + // 2. For [[ExtractValue]]s on the output of the generator, the situation depends on the type + // of the generator expression. *For now, we only support Explode*. + // 2.1 Inline + // Inline takes an input of ARRAY>, and returns an output of + // STRUCT, the output field can be directly accessed by name "field1". + // In this case, we should not try to push down the ExtractValue expressions to the + // input of the Inline. For example: + // Project[field1.x AS x] + // - Generate[ARRAY, field2:int>>, ..., field1, field2] + // It is incorrect to push down the .x to the input of the Inline. + // A valid field pruning would be to extract all the fields that are accessed by the + // Project, and manually reconstruct an expression using those fields. + // 2.2 Explode + // Explode takes an input of ARRAY and returns an output of + // STRUCT. The default field name "col" can be overwritten. + // If the input is MAP, it returns STRUCT. + // For the array case, it is only valid to push down GetStructField. After push down, + // the GetStructField becomes a GetArrayStructFields. Note that we cannot push down + // GetArrayStructFields, since the pushed down expression will operate on an array of + // array which is invalid. + // 2.3 Stack + // Stack takes a sequence of expressions, and returns an output of + // STRUCT + // The push down is doable but more complicated in this case as the expression that + // operates on the col_i of the output needs to pushed down to every (kn+i)-th input + // expression where n is the total number of columns (or struct fields) of the output. case Project(projectList, g: Generate) if (SQLConf.get.nestedPruningOnExpressions || SQLConf.get.nestedSchemaPruningEnabled) && canPruneGenerator(g.generator) => // On top on `Generate`, a `Project` that might have nested column accessors. @@ -366,6 +398,10 @@ object GeneratorNestedColumnAliasing { case _ => } + if (!g.generator.isInstanceOf[ExplodeBase]) { + return Some(pushedThrough) + } + // This function collects all GetStructField*(attribute) from the passed in expression. // GetStructField* means arbitrary levels of nesting. def collectNestedGetStructFields(e: Expression): Seq[Expression] = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala index 42323aac7e869..0c153baa54de0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala @@ -848,6 +848,18 @@ class NestedColumnAliasingSuite extends SchemaPruningTest { comparePlans(optimized, expected) } + + test("SPARK-38529: GeneratorNestedColumnAliasing does not pushdown for non-Explode") { + val employer = StructType.fromDDL("id int, company struct") + val input = LocalRelation( + 'col1.int, + 'col2.array(ArrayType(StructType.fromDDL("field1 struct, field2 int"))) + ) + val plan = input.generate(Inline('col2)).select('field1.getField("col1")).analyze + val optimized = GeneratorNestedColumnAliasing.unapply(plan) + // The plan is expected to be unchanged. + comparePlans(plan, RemoveNoopOperators.apply(optimized.get)) + } } object NestedColumnAliasingSuite { From c3a171d9875b517b9cb9286db2249cc60c96ade4 Mon Sep 17 00:00:00 2001 From: Emil Ejbyfeldt Date: Thu, 19 May 2022 19:12:38 -0500 Subject: [PATCH 282/535] [SPARK-38681][SQL] Support nested generic case classes ### What changes were proposed in this pull request? Master and branch-3.3 will fail to derive schema for case classes with generic parameters if the parameter was not used directly as a field, but instead pass on as a generic parameter to another type. e.g. ``` case class NestedGeneric[T]( generic: GenericData[T]) ``` This is a regression from the latest release of 3.2.1 where this works as expected. ### Why are the changes needed? Support more general case classes that user might have. ### Does this PR introduce _any_ user-facing change? Better support for generic case classes. ### How was this patch tested? New specs in ScalaReflectionSuite and ExpressionEncoderSuite. All the new test cases that does not use value classes pass if added to the 3.2 branch Closes #36004 from eejbyfeldt/SPARK-38681-nested-generic. Authored-by: Emil Ejbyfeldt Signed-off-by: Sean Owen (cherry picked from commit 49c68020e702f9258f3c693f446669bea66b12f4) Signed-off-by: Sean Owen --- .../spark/sql/catalyst/ScalaReflection.scala | 14 ++----- .../sql/catalyst/ScalaReflectionSuite.scala | 41 +++++++++++++++++++ .../encoders/ExpressionEncoderSuite.scala | 17 ++++++++ 3 files changed, 61 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index e2b624f8e13a3..ff0488b670612 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -964,11 +964,7 @@ trait ScalaReflection extends Logging { } private def isValueClass(tpe: Type): Boolean = { - tpe.typeSymbol.asClass.isDerivedValueClass - } - - private def isTypeParameter(tpe: Type): Boolean = { - tpe.typeSymbol.isParameter + tpe.typeSymbol.isClass && tpe.typeSymbol.asClass.isDerivedValueClass } /** Returns the name and type of the underlying parameter of value class `tpe`. */ @@ -989,15 +985,11 @@ trait ScalaReflection extends Logging { val params = constructParams(dealiasedTpe) params.map { p => val paramTpe = p.typeSignature - if (isTypeParameter(paramTpe)) { - // if there are type variables to fill in, do the substitution - // (SomeClass[T] -> SomeClass[Int]) - p.name.decodedName.toString -> paramTpe.substituteTypes(formalTypeArgs, actualTypeArgs) - } else if (isValueClass(paramTpe)) { + if (isValueClass(paramTpe)) { // Replace value class with underlying type p.name.decodedName.toString -> getUnderlyingTypeOfValueClass(paramTpe) } else { - p.name.decodedName.toString -> paramTpe + p.name.decodedName.toString -> paramTpe.substituteTypes(formalTypeArgs, actualTypeArgs) } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala index d63fbd8785a96..2c0cb7f640b2c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala @@ -81,6 +81,13 @@ object GenericData { type IntData = GenericData[Int] } +case class NestedGeneric[T]( + generic: GenericData[T]) + +case class SeqNestedGeneric[T]( + generic: Seq[T]) + + case class MultipleConstructorsData(a: Int, b: String, c: Double) { def this(b: String, a: Int) = this(a, b, c = 1.0) } @@ -295,6 +302,40 @@ class ScalaReflectionSuite extends SparkFunSuite { nullable = true)) } + test("SPARK-38681: Nested generic data") { + val schema = schemaFor[NestedGeneric[Int]] + assert(schema === Schema( + StructType(Seq( + StructField( + "generic", + StructType(Seq( + StructField("genericField", IntegerType, nullable = false))), + nullable = true))), + nullable = true)) + } + + test("SPARK-38681: List nested generic") { + val schema = schemaFor[SeqNestedGeneric[Int]] + assert(schema === Schema( + StructType(Seq( + StructField( + "generic", + ArrayType(IntegerType, false), + nullable = true))), + nullable = true)) + } + + test("SPARK-38681: List nested generic with value class") { + val schema = schemaFor[SeqNestedGeneric[IntWrapper]] + assert(schema === Schema( + StructType(Seq( + StructField( + "generic", + ArrayType(StructType(Seq(StructField("i", IntegerType, false))), true), + nullable = true))), + nullable = true)) + } + test("tuple data") { val schema = schemaFor[(Int, String)] assert(schema === Schema( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala index bb4b58dc28a8b..e2eafb7370d18 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala @@ -131,6 +131,11 @@ case class MapOfValueClassKey(m: Map[IntWrapper, String]) case class MapOfValueClassValue(m: Map[String, StringWrapper]) case class OptionOfValueClassValue(o: Option[StringWrapper]) case class CaseClassWithGeneric[T](generic: T, value: IntWrapper) +case class NestedGeneric[T](generic: CaseClassWithGeneric[T]) +case class SeqNestedGeneric[T](list: Seq[T]) +case class OptionNestedGeneric[T](list: Option[T]) +case class MapNestedGenericKey[T](list: Map[T, Int]) +case class MapNestedGenericValue[T](list: Map[Int, T]) class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTest { OuterScopes.addOuterScope(this) @@ -454,6 +459,18 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes "nested tuple._2 of class value") encodeDecodeTest(CaseClassWithGeneric(IntWrapper(1), IntWrapper(2)), "case class with value class in generic parameter") + encodeDecodeTest(NestedGeneric(CaseClassWithGeneric(IntWrapper(1), IntWrapper(2))), + "case class with nested generic parameter") + encodeDecodeTest(SeqNestedGeneric(List(2)), + "case class with nested generic parameter seq") + encodeDecodeTest(SeqNestedGeneric(List(IntWrapper(2))), + "case class with value class and nested generic parameter seq") + encodeDecodeTest(OptionNestedGeneric(Some(2)), + "case class with nested generic option") + encodeDecodeTest(MapNestedGenericKey(Map(1 -> 2)), + "case class with nested generic map key ") + encodeDecodeTest(MapNestedGenericValue(Map(1 -> 2)), + "case class with nested generic map value") encodeDecodeTest(Option(31), "option of int") encodeDecodeTest(Option.empty[Int], "empty option of int") From e8e330fbbca5452e9af0a78e5f2cfae0cc6be134 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Fri, 20 May 2022 13:02:17 +0900 Subject: [PATCH 283/535] [SPARK-39218][SS][PYTHON] Make foreachBatch streaming query stop gracefully ### What changes were proposed in this pull request? This PR proposes to make the `foreachBatch` streaming query stop gracefully by handling the interrupted exceptions at `StreamExecution.isInterruptionException`. Because there is no straightforward way to access to the original JVM exception, here we rely on string pattern match for now (see also "Why are the changes needed?" below). There is only one place from Py4J https://github.com/py4j/py4j/blob/master/py4j-python/src/py4j/protocol.py#L326-L328 so the approach would work at least. ### Why are the changes needed? In `foreachBatch`, the Python user-defined function in the microbatch runs till the end even when `StreamingQuery.stop` is invoked. However, when any Py4J access is attempted within the user-defined function: - With the pinned thread mode disabled, the interrupt exception is not blocked, and the Python function is executed till the end in a different thread. - With the pinned thread mode enabled, the interrupt exception is raised in the same thread, and the Python thread raises a Py4J exception in the same thread. The latter case is a problem because the interrupt exception is first thrown from JVM side (`java.lang. InterruptedException`) -> Python callback server (`py4j.protocol.Py4JJavaError`) -> JVM (`py4j.Py4JException`), and `py4j.Py4JException` is not listed in `StreamExecution.isInterruptionException` which doesn't gracefully stop the query. Therefore, we should handle this exception at `StreamExecution.isInterruptionException`. ### Does this PR introduce _any_ user-facing change? Yes, it will make the query gracefully stop. ### How was this patch tested? Manually tested with: ```python import time def func(batch_df, batch_id): time.sleep(10) print(batch_df.count()) q = spark.readStream.format("rate").load().writeStream.foreachBatch(func).start() time.sleep(5) q.stop() ``` Closes #36589 from HyukjinKwon/SPARK-39218. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 499de87b77944157828a6d905d9b9df37b7c9a67) Signed-off-by: Hyukjin Kwon --- python/pyspark/sql/tests/test_streaming.py | 10 ++++++++++ .../sql/execution/streaming/StreamExecution.scala | 11 +++++++++++ .../apache/spark/sql/streaming/StreamSuite.scala | 14 ++++++++++++-- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/python/pyspark/sql/tests/test_streaming.py b/python/pyspark/sql/tests/test_streaming.py index 4920423be228b..809294d34c32f 100644 --- a/python/pyspark/sql/tests/test_streaming.py +++ b/python/pyspark/sql/tests/test_streaming.py @@ -592,6 +592,16 @@ def collectBatch(df, id): if q: q.stop() + def test_streaming_foreachBatch_graceful_stop(self): + # SPARK-39218: Make foreachBatch streaming query stop gracefully + def func(batch_df, _): + batch_df.sparkSession._jvm.java.lang.Thread.sleep(10000) + + q = self.spark.readStream.format("rate").load().writeStream.foreachBatch(func).start() + time.sleep(3) # 'rowsPerSecond' defaults to 1. Waits 3 secs out for the input. + q.stop() + self.assertIsNone(q.exception(), "No exception has to be propagated.") + def test_streaming_read_from_table(self): with self.table("input_table", "this_query"): self.spark.sql("CREATE TABLE input_table (value string) USING parquet") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index f9ae65cdc47d5..c7ce9f52e0653 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala @@ -618,6 +618,13 @@ abstract class StreamExecution( object StreamExecution { val QUERY_ID_KEY = "sql.streaming.queryId" val IS_CONTINUOUS_PROCESSING = "__is_continuous_processing" + val IO_EXCEPTION_NAMES = Seq( + classOf[InterruptedException].getName, + classOf[InterruptedIOException].getName, + classOf[ClosedByInterruptException].getName) + val PROXY_ERROR = ( + "py4j.protocol.Py4JJavaError: An error occurred while calling" + + s".+(\\r\\n|\\r|\\n): (${IO_EXCEPTION_NAMES.mkString("|")})").r @scala.annotation.tailrec def isInterruptionException(e: Throwable, sc: SparkContext): Boolean = e match { @@ -647,6 +654,10 @@ object StreamExecution { } else { false } + // py4j.Py4JException - with pinned thread mode on, the exception can be interrupted by Py4J + // access, for example, in `DataFrameWriter.foreachBatch`. See also + // SPARK-39218. + case e: py4j.Py4JException => PROXY_ERROR.findFirstIn(e.getMessage).isDefined case _ => false } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala index 71e8ae74fe207..f2031b94231b7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala @@ -1175,8 +1175,18 @@ class StreamSuite extends StreamTest { new ClosedByInterruptException, new UncheckedIOException("test", new ClosedByInterruptException), new ExecutionException("test", new InterruptedException), - new UncheckedExecutionException("test", new InterruptedException))) { - test(s"view ${e.getClass.getSimpleName} as a normal query stop") { + new UncheckedExecutionException("test", new InterruptedException)) ++ + Seq( + classOf[InterruptedException].getName, + classOf[InterruptedIOException].getName, + classOf[ClosedByInterruptException].getName).map { s => + new py4j.Py4JException( + s""" + |py4j.protocol.Py4JJavaError: An error occurred while calling o44.count. + |: $s + |""".stripMargin) + }) { + test(s"view ${e.getClass.getSimpleName} [${e.getMessage}] as a normal query stop") { ThrowingExceptionInCreateSource.createSourceLatch = new CountDownLatch(1) ThrowingExceptionInCreateSource.exception = e val query = spark From ab057c72509006cbd8b501b6be4eb26793dc1e71 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 20 May 2022 16:58:22 +0800 Subject: [PATCH 284/535] [SPARK-39237][DOCS] Update the ANSI SQL mode documentation ### What changes were proposed in this pull request? 1. Remove the Experimental notation in ANSI SQL compliance doc 2. Update the description of `spark.sql.ansi.enabled`, since the ANSI reversed keyword is disabled by default now ### Why are the changes needed? 1. The ANSI SQL dialect is GAed in Spark 3.2 release: https://spark.apache.org/releases/spark-release-3-2-0.html We should not mark it as "Experimental" in the doc. 2. The ANSI reversed keyword is disabled by default now ### Does this PR introduce _any_ user-facing change? No, just doc change ### How was this patch tested? Doc preview: image image Closes #36614 from gengliangwang/updateAnsiDoc. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit 86a351c13d62644d596cc5249fc1c45d318a0bbf) Signed-off-by: Gengliang Wang --- docs/sql-ref-ansi-compliance.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 94ef94a5e7bac..c4572c71f4a6e 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -19,7 +19,7 @@ license: | limitations under the License. --- -Since Spark 3.0, Spark SQL introduces two experimental options to comply with the SQL standard: `spark.sql.ansi.enabled` and `spark.sql.storeAssignmentPolicy` (See a table below for details). +In Spark SQL, there are two options to comply with the SQL standard: `spark.sql.ansi.enabled` and `spark.sql.storeAssignmentPolicy` (See a table below for details). When `spark.sql.ansi.enabled` is set to `true`, Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. For example, Spark will throw an exception at runtime instead of returning null results if the inputs to a SQL operator/function are invalid. Some ANSI dialect features may be not from the ANSI SQL standard directly, but their behaviors align with ANSI SQL's style. @@ -28,10 +28,10 @@ The casting behaviours are defined as store assignment rules in the standard. When `spark.sql.storeAssignmentPolicy` is set to `ANSI`, Spark SQL complies with the ANSI store assignment rules. This is a separate configuration because its default value is `ANSI`, while the configuration `spark.sql.ansi.enabled` is disabled by default. -|Property Name|Default|Meaning|Since Version| -|-------------|-------|-------|-------------| -|`spark.sql.ansi.enabled`|false|(Experimental) When true, Spark tries to conform to the ANSI SQL specification:
        1. Spark will throw a runtime exception if an overflow occurs in any operation on integral/decimal field.
        2. Spark will forbid using the reserved keywords of ANSI SQL as identifiers in the SQL parser.|3.0.0| -|`spark.sql.storeAssignmentPolicy`|ANSI|(Experimental) When inserting a value into a column with different data type, Spark will perform type conversion. Currently, we support 3 policies for the type coercion rules: ANSI, legacy and strict. With ANSI policy, Spark performs the type coercion as per ANSI SQL. In practice, the behavior is mostly the same as PostgreSQL. It disallows certain unreasonable type conversions such as converting string to int or double to boolean. With legacy policy, Spark allows the type coercion as long as it is a valid Cast, which is very loose. e.g. converting string to int or double to boolean is allowed. It is also the only behavior in Spark 2.x and it is compatible with Hive. With strict policy, Spark doesn't allow any possible precision loss or data truncation in type coercion, e.g. converting double to int or decimal to double is not allowed.|3.0.0| +|Property Name|Default| Meaning |Since Version| +|-------------|-------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------| +|`spark.sql.ansi.enabled`|false| When true, Spark tries to conform to the ANSI SQL specification:
        1. Spark SQL will throw runtime exceptions on invalid operations, including integer overflow errors, string parsing errors, etc.
        2. Spark will use different type coercion rules for resolving conflicts among data types. The rules are consistently based on data type precedence. |3.0.0| +|`spark.sql.storeAssignmentPolicy`|ANSI| When inserting a value into a column with different data type, Spark will perform type conversion. Currently, we support 3 policies for the type coercion rules: ANSI, legacy and strict.
        1. With ANSI policy, Spark performs the type coercion as per ANSI SQL. In practice, the behavior is mostly the same as PostgreSQL. It disallows certain unreasonable type conversions such as converting string to int or double to boolean. On inserting a numeric type column, an overflow error will be thrown if the value is out of the target data type's range.
        2. With legacy policy, Spark allows the type coercion as long as it is a valid Cast, which is very loose. e.g. converting string to int or double to boolean is allowed. It is also the only behavior in Spark 2.x and it is compatible with Hive.
        3. With strict policy, Spark doesn't allow any possible precision loss or data truncation in type coercion, e.g. converting double to int or decimal to double is not allowed. |3.0.0| The following subsections present behaviour changes in arithmetic operations, type conversions, and SQL parsing when the ANSI mode enabled. For type conversions in Spark SQL, there are three kinds of them and this article will introduce them one by one: cast, store assignment and type coercion. From 3f77be288ffee792ef6bb49c65132f48b269e142 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Fri, 20 May 2022 10:54:53 -0500 Subject: [PATCH 285/535] [SPARK-39240][INFRA][BUILD] Source and binary releases using different tool to generate hashes for integrity ### What changes were proposed in this pull request? unify the hash generator for release files. ### Why are the changes needed? Currently, we use `shasum` for source but `gpg` for binary, since https://github.com/apache/spark/pull/30123 this confuses me when validating the integrities of spark 3.3.0 RC https://dist.apache.org/repos/dist/dev/spark/v3.3.0-rc2-bin/ ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? test script manually Closes #36619 from yaooqinn/SPARK-39240. Authored-by: Kent Yao Signed-off-by: Sean Owen (cherry picked from commit 3e783375097d14f1c28eb9b0e08075f1f8daa4a2) Signed-off-by: Sean Owen --- dev/create-release/release-build.sh | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index a65d02289c0f0..78fd06ba2be26 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -283,9 +283,7 @@ if [[ "$1" == "package" ]]; then echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \ --output $R_DIST_NAME.asc \ --detach-sig $R_DIST_NAME - echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \ - SHA512 $R_DIST_NAME > \ - $R_DIST_NAME.sha512 + shasum -a 512 $R_DIST_NAME > $R_DIST_NAME.sha512 fi if [[ -n $PIP_FLAG ]]; then @@ -296,9 +294,7 @@ if [[ "$1" == "package" ]]; then echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \ --output $PYTHON_DIST_NAME.asc \ --detach-sig $PYTHON_DIST_NAME - echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \ - SHA512 $PYTHON_DIST_NAME > \ - $PYTHON_DIST_NAME.sha512 + shasum -a 512 $PYTHON_DIST_NAME > $PYTHON_DIST_NAME.sha512 fi echo "Copying and signing regular binary distribution" @@ -306,9 +302,7 @@ if [[ "$1" == "package" ]]; then echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --armour \ --output spark-$SPARK_VERSION-bin-$NAME.tgz.asc \ --detach-sig spark-$SPARK_VERSION-bin-$NAME.tgz - echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --print-md \ - SHA512 spark-$SPARK_VERSION-bin-$NAME.tgz > \ - spark-$SPARK_VERSION-bin-$NAME.tgz.sha512 + shasum -a 512 spark-$SPARK_VERSION-bin-$NAME.tgz > spark-$SPARK_VERSION-bin-$NAME.tgz.sha512 } # List of binary packages built. Populates two associative arrays, where the key is the "name" of From fa400c666c41cf864103ba8705116a24092b3687 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Sun, 22 May 2022 18:58:25 +0300 Subject: [PATCH 286/535] [SPARK-39243][SQL][DOCS] Rules of quoting elements in error messages ### What changes were proposed in this pull request? In the PR, I propose to describe the rules of quoting elements in error messages introduced by the PRs: - https://github.com/apache/spark/pull/36210 - https://github.com/apache/spark/pull/36233 - https://github.com/apache/spark/pull/36259 - https://github.com/apache/spark/pull/36324 - https://github.com/apache/spark/pull/36335 - https://github.com/apache/spark/pull/36359 - https://github.com/apache/spark/pull/36579 ### Why are the changes needed? To improve code maintenance, and the process of code review. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By existing GAs. Closes #36621 from MaxGekk/update-error-class-guide. Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 2a4d8a4ea709339175257027e31a75bdeed5daec) Signed-off-by: Max Gekk --- .../spark/sql/errors/QueryErrorsBase.scala | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala index 89bc1039e7340..52ffa6d32fd9b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala @@ -23,6 +23,23 @@ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.util.quoteIdentifier import org.apache.spark.sql.types.{DataType, DoubleType, FloatType} +/** + * The trait exposes util methods for preparing error messages such as quoting of error elements. + * All classes that extent `QueryErrorsBase` shall follow the rules: + * 1. Any values shall be outputted in the SQL standard style by using `toSQLValue()`. + * For example: 'a string value', 1, NULL. + * 2. SQL types shall be double quoted and outputted in the upper case using `toSQLType()`. + * For example: "INT", "DECIMAL(10,0)". + * 3. Elements of identifiers shall be wrapped by backticks by using `toSQLId()`. + * For example: `namespaceA`.`funcB`, `tableC`. + * 4. SQL statements shall be in the upper case prepared by using `toSQLStmt`. + * For example: DESC PARTITION, DROP TEMPORARY FUNCTION. + * 5. SQL configs and datasource options shall be wrapped by double quotes by using + * `toSQLConf()`/`toDSOption()`. + * For example: "spark.sql.ansi.enabled". + * 6. Any values of datasource options or SQL configs shall be double quoted. + * For example: "true", "CORRECTED". + */ trait QueryErrorsBase { // Converts an error class parameter to its SQL representation def toSQLValue(v: Any, t: DataType): String = Literal.create(v, t) match { From 047c108378facf4a6bdddac058ba13fad1aca014 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 22 May 2022 14:13:19 -0700 Subject: [PATCH 287/535] [SPARK-39250][BUILD] Upgrade Jackson to 2.13.3 ### What changes were proposed in this pull request? This PR aims to upgrade Jackson to 2.13.3. ### Why are the changes needed? Although Spark is not affected, Jackson 2.13.0~2.13.2 has the following regression which affects the user apps. - https://github.com/FasterXML/jackson-databind/issues/3446 Here is a full release note. - https://github.com/FasterXML/jackson/wiki/Jackson-Release-2.13.3 ### Does this PR introduce _any_ user-facing change? No. The previous version is not released yet. ### How was this patch tested? Pass the CIs. Closes #36627 from dongjoon-hyun/SPARK-39250. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 73438c048fc646f944415ba2e99cb08cc57d856b) Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 14 +++++++------- dev/deps/spark-deps-hadoop-3-hive-2.3 | 14 +++++++------- pom.xml | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index ab00ad568cbd3..092531a320ab5 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -112,16 +112,16 @@ httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.14//httpcore-4.4.14.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.5.0//ivy-2.5.0.jar -jackson-annotations/2.13.2//jackson-annotations-2.13.2.jar +jackson-annotations/2.13.3//jackson-annotations-2.13.3.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar -jackson-core/2.13.2//jackson-core-2.13.2.jar -jackson-databind/2.13.2.1//jackson-databind-2.13.2.1.jar -jackson-dataformat-cbor/2.13.2//jackson-dataformat-cbor-2.13.2.jar -jackson-dataformat-yaml/2.13.2//jackson-dataformat-yaml-2.13.2.jar -jackson-datatype-jsr310/2.13.2//jackson-datatype-jsr310-2.13.2.jar +jackson-core/2.13.3//jackson-core-2.13.3.jar +jackson-databind/2.13.3//jackson-databind-2.13.3.jar +jackson-dataformat-cbor/2.13.3//jackson-dataformat-cbor-2.13.3.jar +jackson-dataformat-yaml/2.13.3//jackson-dataformat-yaml-2.13.3.jar +jackson-datatype-jsr310/2.13.3//jackson-datatype-jsr310-2.13.3.jar jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar -jackson-module-scala_2.12/2.13.2//jackson-module-scala_2.12-2.13.2.jar +jackson-module-scala_2.12/2.13.3//jackson-module-scala_2.12-2.13.3.jar jackson-xc/1.9.13//jackson-xc-1.9.13.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 94cd002122397..c4baa6bb1fc77 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -102,15 +102,15 @@ httpcore/4.4.14//httpcore-4.4.14.jar ini4j/0.5.4//ini4j-0.5.4.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.5.0//ivy-2.5.0.jar -jackson-annotations/2.13.2//jackson-annotations-2.13.2.jar +jackson-annotations/2.13.3//jackson-annotations-2.13.3.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar -jackson-core/2.13.2//jackson-core-2.13.2.jar -jackson-databind/2.13.2.1//jackson-databind-2.13.2.1.jar -jackson-dataformat-cbor/2.13.2//jackson-dataformat-cbor-2.13.2.jar -jackson-dataformat-yaml/2.13.2//jackson-dataformat-yaml-2.13.2.jar -jackson-datatype-jsr310/2.13.2//jackson-datatype-jsr310-2.13.2.jar +jackson-core/2.13.3//jackson-core-2.13.3.jar +jackson-databind/2.13.3//jackson-databind-2.13.3.jar +jackson-dataformat-cbor/2.13.3//jackson-dataformat-cbor-2.13.3.jar +jackson-dataformat-yaml/2.13.3//jackson-dataformat-yaml-2.13.3.jar +jackson-datatype-jsr310/2.13.3//jackson-datatype-jsr310-2.13.3.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar -jackson-module-scala_2.12/2.13.2//jackson-module-scala_2.12-2.13.2.jar +jackson-module-scala_2.12/2.13.3//jackson-module-scala_2.12-2.13.3.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar diff --git a/pom.xml b/pom.xml index 0d296febbd821..879d565bf9804 100644 --- a/pom.xml +++ b/pom.xml @@ -171,8 +171,8 @@ true 1.9.13 - 2.13.2 - 2.13.2.1 + 2.13.3 + 2.13.3 1.1.8.4 1.1.2 2.2.1 From 0f13606bb55087da657b87d0c2f5a5583ed75e6c Mon Sep 17 00:00:00 2001 From: Ivan Sadikov Date: Mon, 23 May 2022 16:58:36 +0800 Subject: [PATCH 288/535] [SPARK-35378][SQL][FOLLOW-UP] Fix incorrect return type in CommandResultExec.executeCollect() ### What changes were proposed in this pull request? This PR is a follow-up for https://github.com/apache/spark/pull/32513 and fixes an issue introduced by that patch. CommandResultExec is supposed to return `UnsafeRow` records in all of the `executeXYZ` methods but `executeCollect` was left out which causes issues like this one: ``` Error in SQL statement: ClassCastException: org.apache.spark.sql.catalyst.expressions.GenericInternalRow cannot be cast to org.apache.spark.sql.catalyst.expressions.UnsafeRow ``` We need to return `unsafeRows` instead of `rows` in `executeCollect` similar to other methods in the class. ### Why are the changes needed? Fixes a bug in CommandResultExec. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? I added a unit test to check the return type of all commands. Closes #36632 from sadikovi/fix-command-exec. Authored-by: Ivan Sadikov Signed-off-by: Wenchen Fan (cherry picked from commit a0decfc7db68c464e3ba2c2fb0b79a8b0c464684) Signed-off-by: Wenchen Fan --- .../apache/spark/sql/execution/CommandResultExec.scala | 4 ++-- .../apache/spark/sql/execution/QueryExecutionSuite.scala | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CommandResultExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CommandResultExec.scala index 37c8de983f406..21d1c97db989d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CommandResultExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CommandResultExec.scala @@ -76,8 +76,8 @@ case class CommandResultExec( } override def executeCollect(): Array[InternalRow] = { - longMetric("numOutputRows").add(rows.size) - rows.toArray + longMetric("numOutputRows").add(unsafeRows.size) + unsafeRows } override def executeTake(limit: Int): Array[InternalRow] = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala index 2c58b53969bcd..41a1cd9b294a1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala @@ -20,6 +20,7 @@ import scala.io.Source import org.apache.spark.sql.{AnalysisException, FastOperator} import org.apache.spark.sql.catalyst.analysis.UnresolvedNamespace +import org.apache.spark.sql.catalyst.expressions.UnsafeRow import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.{CommandResult, LogicalPlan, OneRowRelation, Project, ShowTables, SubqueryAlias} import org.apache.spark.sql.catalyst.trees.TreeNodeTag @@ -262,6 +263,14 @@ class QueryExecutionSuite extends SharedSparkSession { assert(cmdResultExec.commandPhysicalPlan.isInstanceOf[ShowTablesExec]) } + test("SPARK-35378: Return UnsafeRow in CommandResultExecCheck execute methods") { + val plan = spark.sql("SHOW FUNCTIONS").queryExecution.executedPlan + assert(plan.isInstanceOf[CommandResultExec]) + plan.executeCollect().foreach { row => assert(row.isInstanceOf[UnsafeRow]) } + plan.executeTake(10).foreach { row => assert(row.isInstanceOf[UnsafeRow]) } + plan.executeTail(10).foreach { row => assert(row.isInstanceOf[UnsafeRow]) } + } + test("SPARK-38198: check specify maxFields when call toFile method") { withTempDir { dir => val path = dir.getCanonicalPath + "/plans.txt" From 2a31bf572bf386bbae2a8c6941ea43722068e0c6 Mon Sep 17 00:00:00 2001 From: Kent Yao Date: Mon, 23 May 2022 07:45:50 -0700 Subject: [PATCH 289/535] [MINOR][ML][DOCS] Fix sql data types link in the ml-pipeline page ### What changes were proposed in this pull request? image [Spark SQL datatype reference](https://spark.apache.org/docs/latest/sql-reference.html#data-types) - `https://spark.apache.org/docs/latest/sql-reference.html#data-types` is invalid and it shall be [Spark SQL datatype reference](https://spark.apache.org/docs/latest/sql-ref-datatypes.html) - `https://spark.apache.org/docs/latest/sql-ref-datatypes.html` https://spark.apache.org/docs/latest/ml-pipeline.html#dataframe ### Why are the changes needed? doc fix ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? `bundle exec jekyll serve` Closes #36633 from yaooqinn/minor. Authored-by: Kent Yao Signed-off-by: huaxingao (cherry picked from commit de73753bb2e5fd947f237e731ff05aa9f2711677) Signed-off-by: huaxingao --- docs/ml-pipeline.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ml-pipeline.md b/docs/ml-pipeline.md index 105b1273311c8..5f9c94781ba84 100644 --- a/docs/ml-pipeline.md +++ b/docs/ml-pipeline.md @@ -72,7 +72,7 @@ E.g., a learning algorithm is an `Estimator` which trains on a `DataFrame` and p Machine learning can be applied to a wide variety of data types, such as vectors, text, images, and structured data. This API adopts the `DataFrame` from Spark SQL in order to support a variety of data types. -`DataFrame` supports many basic and structured types; see the [Spark SQL datatype reference](sql-reference.html#data-types) for a list of supported types. +`DataFrame` supports many basic and structured types; see the [Spark SQL datatype reference](sql-ref-datatypes.html) for a list of supported types. In addition to the types listed in the Spark SQL guide, `DataFrame` can use ML [`Vector`](mllib-data-types.html#local-vector) types. A `DataFrame` can be created either implicitly or explicitly from a regular `RDD`. See the code examples below and the [Spark SQL programming guide](sql-programming-guide.html) for examples. From 505248df16b9785e56a206db62129f6eff945483 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 23 May 2022 14:28:06 -0700 Subject: [PATCH 290/535] [SPARK-39258][TESTS] Fix `Hide credentials in show create table` ### What changes were proposed in this pull request? [SPARK-35378-FOLLOWUP](https://github.com/apache/spark/pull/36632) changes the return value of `CommandResultExec.executeCollect()` from `InternalRow` to `UnsafeRow`, this change causes the result of `r.tostring` in the following code: https://github.com/apache/spark/blob/de73753bb2e5fd947f237e731ff05aa9f2711677/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala#L1143-L1148 change from ``` [CREATE TABLE tab1 ( NAME STRING, THEID INT) USING org.apache.spark.sql.jdbc OPTIONS ( 'dbtable' = 'TEST.PEOPLE', 'password' = '*********(redacted)', 'url' = '*********(redacted)', 'user' = 'testUser') ] ``` to ``` [0,10000000d5,5420455441455243,62617420454c4241,414e20200a282031,4e4952545320454d,45485420200a2c47,a29544e49204449,726f20474e495355,6568636170612e67,732e6b726170732e,a6362646a2e6c71,20534e4f4954504f,7462642720200a28,203d2027656c6261,45502e5453455427,200a2c27454c504f,6f77737361702720,2a27203d20276472,2a2a2a2a2a2a2a2a,6574636164657228,2720200a2c272964,27203d20276c7275,2a2a2a2a2a2a2a2a,746361646572282a,20200a2c27296465,3d20277265737527,7355747365742720,a29277265] ``` and the UT `JDBCSuite$Hide credentials in show create table` failed in master branch. This pr is change to use `executeCollectPublic()` instead of `executeCollect()` to fix this UT. ### Why are the changes needed? Fix UT failed in mater branch after [SPARK-35378-FOLLOWUP](https://github.com/apache/spark/pull/36632) ### Does this PR introduce _any_ user-facing change? NO. ### How was this patch tested? - GitHub Action pass - Manual test Run `mvn clean install -DskipTests -pl sql/core -am -Dtest=none -DwildcardSuites=org.apache.spark.sql.jdbc.JDBCSuite` **Before** ``` - Hide credentials in show create table *** FAILED *** "[0,10000000d5,5420455441455243,62617420454c4241,414e20200a282031,4e4952545320454d,45485420200a2c47,a29544e49204449,726f20474e495355,6568636170612e67,732e6b726170732e,a6362646a2e6c71,20534e4f4954504f,7462642720200a28,203d2027656c6261,45502e5453455427,200a2c27454c504f,6f77737361702720,2a27203d20276472,2a2a2a2a2a2a2a2a,6574636164657228,2720200a2c272964,27203d20276c7275,2a2a2a2a2a2a2a2a,746361646572282a,20200a2c27296465,3d20277265737527,7355747365742720,a29277265]" did not contain "TEST.PEOPLE" (JDBCSuite.scala:1146) ``` **After** ``` Run completed in 24 seconds, 868 milliseconds. Total number of tests run: 93 Suites: completed 2, aborted 0 Tests: succeeded 93, failed 0, canceled 0, ignored 0, pending 0 All tests passed. ``` Closes #36637 from LuciferYang/SPARK-39258. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun (cherry picked from commit 6eb15d12ae6bd77412dbfbf46eb8dbeec1eab466) Signed-off-by: Dongjoon Hyun --- .../src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index b1f5fd00868db..a222391c06fb6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -1141,7 +1141,7 @@ class JDBCSuite extends QueryTest """.stripMargin) val show = ShowCreateTableCommand(TableIdentifier(tableName), ShowCreateTable.getoutputAttrs) - spark.sessionState.executePlan(show).executedPlan.executeCollect().foreach { r => + spark.sessionState.executePlan(show).executedPlan.executeCollectPublic().foreach { r => assert(!r.toString.contains(password)) assert(r.toString.contains(dbTable)) assert(r.toString.contains(userName)) @@ -1154,7 +1154,7 @@ class JDBCSuite extends QueryTest } withSQLConf(SQLConf.SQL_OPTIONS_REDACTION_PATTERN.key -> "(?i)dbtable|user") { - spark.sessionState.executePlan(show).executedPlan.executeCollect().foreach { r => + spark.sessionState.executePlan(show).executedPlan.executeCollectPublic().foreach { r => assert(!r.toString.contains(password)) assert(!r.toString.contains(dbTable)) assert(!r.toString.contains(userName)) From 459c4b0c94a39efe9ea8b5ef1da3f6e379417c40 Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Tue, 24 May 2022 13:05:29 +0800 Subject: [PATCH 291/535] [SPARK-39144][SQL] Nested subquery expressions deduplicate relations should be done bottom up ### What changes were proposed in this pull request? When we have nested subquery expressions, there is a chance that deduplicate relations could replace an attributes with a wrong one. This is because the attributes replacement is done by top down than bottom up. This could happen if the subplan gets deduplicate relations first (thus two same relation with different attributes id), then a more complex plan built on top of the subplan (e.g. a UNION of queries with nested subquery expressions) can trigger this wrong attribute replacement error. For concrete example please see the added unit test. ### Why are the changes needed? This is bug that we can fix. Without this PR, we could hit that outer attribute reference does not exist in the outer relation at certain scenario. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? UT Closes #36503 from amaliujia/testnestedsubqueryexpression. Authored-by: Rui Wang Signed-off-by: Wenchen Fan (cherry picked from commit d9fd36eb76fcfec95763cc4dc594eb7856b0fad2) Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/CheckAnalysis.scala | 18 +++++++++ .../analysis/DeduplicateRelations.scala | 26 ++++++------- .../sql/catalyst/analysis/AnalysisSuite.scala | 38 +++++++++++++++++++ 3 files changed, 69 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index b9f3b3b824bf8..9c72b9974c472 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -728,9 +728,27 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { expressions.exists(_.exists(_.semanticEquals(expr))) } + def checkOuterReference(p: LogicalPlan, expr: SubqueryExpression): Unit = p match { + case f: Filter => + if (hasOuterReferences(expr.plan)) { + expr.plan.expressions.foreach(_.foreachUp { + case o: OuterReference => + p.children.foreach(e => + if (!e.output.exists(_.exprId == o.exprId)) { + failAnalysis("outer attribute not found") + }) + case _ => + }) + } + case _ => + } + // Validate the subquery plan. checkAnalysis(expr.plan) + // Check if there is outer attribute that cannot be found from the plan. + checkOuterReference(plan, expr) + expr match { case ScalarSubquery(query, outerAttrs, _, _) => // Scalar subquery must return one column as output. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala index 4c351e3237df2..aed19f2499fad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala @@ -125,9 +125,18 @@ object DeduplicateRelations extends Rule[LogicalPlan] { } } + val planWithNewSubquery = plan.transformExpressions { + case subquery: SubqueryExpression => + val (renewed, collected, changed) = renewDuplicatedRelations( + existingRelations ++ relations, subquery.plan) + relations ++= collected + if (changed) planChanged = true + subquery.withNewPlan(renewed) + } + if (planChanged) { - if (plan.childrenResolved) { - val planWithNewChildren = plan.withNewChildren(newChildren.toSeq) + if (planWithNewSubquery.childrenResolved) { + val planWithNewChildren = planWithNewSubquery.withNewChildren(newChildren.toSeq) val attrMap = AttributeMap( plan .children @@ -140,7 +149,7 @@ object DeduplicateRelations extends Rule[LogicalPlan] { planWithNewChildren.rewriteAttrs(attrMap) } } else { - plan.withNewChildren(newChildren.toSeq) + planWithNewSubquery.withNewChildren(newChildren.toSeq) } } else { plan @@ -148,16 +157,7 @@ object DeduplicateRelations extends Rule[LogicalPlan] { } else { plan } - - val planWithNewSubquery = newPlan.transformExpressions { - case subquery: SubqueryExpression => - val (renewed, collected, changed) = renewDuplicatedRelations( - existingRelations ++ relations, subquery.plan) - relations ++= collected - if (changed) planChanged = true - subquery.withNewPlan(renewed) - } - (planWithNewSubquery, relations, planChanged) + (newPlan, relations, planChanged) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index fff25b59eff98..1f82aa7e35511 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -1176,4 +1176,42 @@ class AnalysisSuite extends AnalysisTest with Matchers { false) } } + + test("SPARK-39144: nested subquery expressions deduplicate relations should be done bottom up") { + val innerRelation = SubqueryAlias("src1", testRelation) + val outerRelation = SubqueryAlias("src2", testRelation) + val ref1 = testRelation.output.head + + val subPlan = getAnalyzer.execute( + Project( + Seq(UnresolvedStar(None)), + Filter.apply( + Exists( + Filter.apply( + EqualTo( + OuterReference(ref1), + ref1), + innerRelation + ) + ), + outerRelation + ))) + + val finalPlan = { + Union.apply( + Project( + Seq(UnresolvedStar(None)), + subPlan + ), + Filter.apply( + Exists( + subPlan + ), + subPlan + ) + ) + } + + assertAnalysisSuccess(finalPlan) + } } From a7259279d07b302a51456adb13dc1e41a6fd06ed Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 24 May 2022 10:15:29 +0000 Subject: [PATCH 292/535] Preparing Spark release v3.3.0-rc3 --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 0e449e841cf6d..9479bb3bf87df 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.1 +Version: 3.3.0 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index d12f2ad73fabd..2e9c4d9960b14 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 842d63f5d3811..2a9acfa335e71 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index f7d187bf9527d..7b17e625d7599 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 53f38df885102..c5c920e774782 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 845f6659407bd..697b5a3928e58 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 8e1590891933b..ad2db11370ae7 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1987c13328559..1a7bdee70f3bc 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index c7e7be1e3bbf1..66dc93de0599e 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index ac644130a61e2..219ceca6648d8 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 9a6fe2d313fde..4966db6b4a8af 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.3.1-SNAPSHOT -SPARK_VERSION_SHORT: 3.3.1 +SPARK_VERSION: 3.3.0 +SPARK_VERSION_SHORT: 3.3.0 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.15" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.3.1"] + 'facetFilters': ["version:3.3.0"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index e97f3b40cb2bd..42e58f2726df1 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index 578854e3eaa9a..5aaa91cfdf20d 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 95e1ce74ca172..36309bb417362 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 0b803c5d3864a..072cedaa594c8 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 95726829bcbbd..b9063b543f512 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 25e7e25ae25b6..6f6a51a972c73 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 3ba16b7b838a2..95fd080383995 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 6cee275e6adc7..33cf30ff803e7 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index ad23da0d7f249..79b2e8f2a5a47 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 6de1f9eee532c..647d0c3f87552 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 95c8c312eb0e2..562ddc8dcc23c 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 4d87bd2730e3b..08bcae6e0f53f 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index 889f0b5a92e08..beceaecd31a1c 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 9b7b0370d3b4d..584a5df0a4a35 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 04a68a47a4f45..42bab72668c00 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/pom.xml b/pom.xml index 879d565bf9804..944eebd959852 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 pom Spark Project Parent POM https://spark.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index 2e5f8bf5395a3..980b64c4dca8c 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index d1d6a449bd5dc..f3ec959370807 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 40e578f9a7eba..66ae5adfbd19f 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index f4ac384409174..1472bd0fcb1a2 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 61d5adec0e7cc..77811f35692d8 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 970d42ba4590e..ceba171e41134 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 0cfb5f616cd24..34137add48553 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 7024e0dcfab75..e1b725929a8fc 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index cc8d8796da601..8f1e9d2f3ccb1 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index c4977726a3cac..52273e7fa76e1 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 9bbcb7f322798..dadc9324f95a2 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml From d491e390adaba04dc238868b7adc33251d880095 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Tue, 24 May 2022 10:15:35 +0000 Subject: [PATCH 293/535] Preparing development version 3.3.1-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 9479bb3bf87df..0e449e841cf6d 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.0 +Version: 3.3.1 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 2e9c4d9960b14..d12f2ad73fabd 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 2a9acfa335e71..842d63f5d3811 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 7b17e625d7599..f7d187bf9527d 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index c5c920e774782..53f38df885102 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 697b5a3928e58..845f6659407bd 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index ad2db11370ae7..8e1590891933b 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1a7bdee70f3bc..1987c13328559 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 66dc93de0599e..c7e7be1e3bbf1 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 219ceca6648d8..ac644130a61e2 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 4966db6b4a8af..9a6fe2d313fde 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.3.0 -SPARK_VERSION_SHORT: 3.3.0 +SPARK_VERSION: 3.3.1-SNAPSHOT +SPARK_VERSION_SHORT: 3.3.1 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.15" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.3.0"] + 'facetFilters': ["version:3.3.1"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index 42e58f2726df1..e97f3b40cb2bd 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index 5aaa91cfdf20d..578854e3eaa9a 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 36309bb417362..95e1ce74ca172 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 072cedaa594c8..0b803c5d3864a 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index b9063b543f512..95726829bcbbd 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 6f6a51a972c73..25e7e25ae25b6 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 95fd080383995..3ba16b7b838a2 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 33cf30ff803e7..6cee275e6adc7 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index 79b2e8f2a5a47..ad23da0d7f249 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 647d0c3f87552..6de1f9eee532c 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 562ddc8dcc23c..95c8c312eb0e2 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 08bcae6e0f53f..4d87bd2730e3b 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index beceaecd31a1c..889f0b5a92e08 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 584a5df0a4a35..9b7b0370d3b4d 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 42bab72668c00..04a68a47a4f45 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 944eebd959852..879d565bf9804 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT pom Spark Project Parent POM https://spark.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index 980b64c4dca8c..2e5f8bf5395a3 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index f3ec959370807..d1d6a449bd5dc 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 66ae5adfbd19f..40e578f9a7eba 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 1472bd0fcb1a2..f4ac384409174 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 77811f35692d8..61d5adec0e7cc 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index ceba171e41134..970d42ba4590e 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 34137add48553..0cfb5f616cd24 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index e1b725929a8fc..7024e0dcfab75 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 8f1e9d2f3ccb1..cc8d8796da601 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 52273e7fa76e1..c4977726a3cac 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index dadc9324f95a2..9bbcb7f322798 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml From a33d697e1e7c2854d77cc2302015ef54bf0c32ab Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 25 May 2022 09:56:30 +0900 Subject: [PATCH 294/535] [SPARK-39273][PS][TESTS] Make PandasOnSparkTestCase inherit ReusedSQLTestCase ### What changes were proposed in this pull request? This PR proposes to make `PandasOnSparkTestCase` inherit `ReusedSQLTestCase`. ### Why are the changes needed? We don't need this: ```python classmethod def tearDownClass(cls): # We don't stop Spark session to reuse across all tests. # The Spark session will be started and stopped at PyTest session level. # Please see pyspark/pandas/conftest.py. pass ``` anymore in Apache Spark. This has existed to speed up the tests when the codes are in Koalas repository where the tests run sequentially in single process. In Apache Spark, we run in multiple processes, and we don't need this anymore. ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? Existing CI should test it out. Closes #36652 from HyukjinKwon/SPARK-39273. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit a6dd6076d708713d11585bf7f3401d522ea48822) Signed-off-by: Hyukjin Kwon --- python/pyspark/testing/pandasutils.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/python/pyspark/testing/pandasutils.py b/python/pyspark/testing/pandasutils.py index 9b07a23ae1b56..baa43e5b9d5c2 100644 --- a/python/pyspark/testing/pandasutils.py +++ b/python/pyspark/testing/pandasutils.py @@ -18,7 +18,6 @@ import functools import shutil import tempfile -import unittest import warnings from contextlib import contextmanager from distutils.version import LooseVersion @@ -32,9 +31,8 @@ from pyspark.pandas.frame import DataFrame from pyspark.pandas.indexes import Index from pyspark.pandas.series import Series -from pyspark.pandas.utils import default_session, SPARK_CONF_ARROW_ENABLED -from pyspark.testing.sqlutils import SQLTestUtils - +from pyspark.pandas.utils import SPARK_CONF_ARROW_ENABLED +from pyspark.testing.sqlutils import ReusedSQLTestCase tabulate_requirement_message = None try: @@ -61,19 +59,12 @@ have_plotly = plotly_requirement_message is None -class PandasOnSparkTestCase(unittest.TestCase, SQLTestUtils): +class PandasOnSparkTestCase(ReusedSQLTestCase): @classmethod def setUpClass(cls): - cls.spark = default_session() + super(PandasOnSparkTestCase, cls).setUpClass() cls.spark.conf.set(SPARK_CONF_ARROW_ENABLED, True) - @classmethod - def tearDownClass(cls): - # We don't stop Spark session to reuse across all tests. - # The Spark session will be started and stopped at PyTest session level. - # Please see pyspark/pandas/conftest.py. - pass - def assertPandasEqual(self, left, right, check_exact=True): if isinstance(left, pd.DataFrame) and isinstance(right, pd.DataFrame): try: From 37a2416ca4c37eebeabfefc3be812594804f5ff5 Mon Sep 17 00:00:00 2001 From: Ivan Sadikov Date: Wed, 25 May 2022 11:39:54 +0900 Subject: [PATCH 295/535] [SPARK-39252][PYSPARK][TESTS] Remove flaky test_df_is_empty ### What changes were proposed in this pull request? ### Why are the changes needed? This PR removes flaky `test_df_is_empty` as reported in https://issues.apache.org/jira/browse/SPARK-39252. I will open a follow-up PR to reintroduce the test and fix the flakiness (or see if it was a regression). ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing unit tests. Closes #36656 from sadikovi/SPARK-39252. Authored-by: Ivan Sadikov Signed-off-by: Hyukjin Kwon (cherry picked from commit 9823bb385cd6dca7c4fb5a6315721420ad42f80a) Signed-off-by: Hyukjin Kwon --- python/pyspark/sql/tests/test_dataframe.py | 36 ---------------------- 1 file changed, 36 deletions(-) diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py index fd54c25c70511..be5e1d9a6e5dc 100644 --- a/python/pyspark/sql/tests/test_dataframe.py +++ b/python/pyspark/sql/tests/test_dataframe.py @@ -22,7 +22,6 @@ import tempfile import time import unittest -import uuid from typing import cast from pyspark.sql import SparkSession, Row @@ -1142,41 +1141,6 @@ def test_df_show(self): with self.assertRaisesRegex(TypeError, "Parameter 'truncate=foo'"): df.show(truncate="foo") - def test_df_is_empty(self): - # SPARK-39084: Fix df.rdd.isEmpty() resulting in JVM crash. - - # This particular example of DataFrame reproduces an issue in isEmpty call - # which could result in JVM crash. - data = [] - for t in range(0, 10000): - id = str(uuid.uuid4()) - if t == 0: - for i in range(0, 99): - data.append((id,)) - elif t < 10: - for i in range(0, 75): - data.append((id,)) - elif t < 100: - for i in range(0, 50): - data.append((id,)) - elif t < 1000: - for i in range(0, 25): - data.append((id,)) - else: - for i in range(0, 10): - data.append((id,)) - - tmpPath = tempfile.mkdtemp() - shutil.rmtree(tmpPath) - try: - df = self.spark.createDataFrame(data, ["col"]) - df.coalesce(1).write.parquet(tmpPath) - - res = self.spark.read.parquet(tmpPath).groupBy("col").count() - self.assertFalse(res.rdd.isEmpty()) - finally: - shutil.rmtree(tmpPath) - @unittest.skipIf( not have_pandas or not have_pyarrow, cast(str, pandas_requirement_message or pyarrow_requirement_message), From 6c4e07dbe38ade7be9051cc16667f2e75cac6b3e Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 25 May 2022 19:36:55 +0300 Subject: [PATCH 296/535] [SPARK-39255][SQL][3.3] Improve error messages ### What changes were proposed in this pull request? In the PR, I propose to improve errors of the following error classes: 1. NON_PARTITION_COLUMN - `a non-partition column name` -> `the non-partition column` 2. UNSUPPORTED_SAVE_MODE - `a not existent path` -> `a non existent path`. 3. INVALID_FIELD_NAME. Quote ids to follow the rules https://github.com/apache/spark/pull/36621. 4. FAILED_SET_ORIGINAL_PERMISSION_BACK. It is renamed to RESET_PERMISSION_TO_ORIGINAL. 5. NON_LITERAL_PIVOT_VALUES - Wrap error's expression by double quotes. The PR adds new helper method `toSQLExpr()` for that. 6. CAST_INVALID_INPUT - Add the recommendation: `... Correct the syntax for the value before casting it, or change the type to one appropriate for the value.` This is a backport of https://github.com/apache/spark/pull/36635. ### Why are the changes needed? To improve user experience with Spark SQL by making error message more clear. ### Does this PR introduce _any_ user-facing change? Yes, it changes user-facing error messages. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" $ build/sbt "sql/testOnly *QueryCompilationErrorsDSv2Suite" $ build/sbt "sql/testOnly *QueryCompilationErrorsSuite" $ build/sbt "sql/testOnly *QueryExecutionAnsiErrorsSuite" $ build/sbt "sql/testOnly *QueryExecutionErrorsSuite" $ build/sbt "sql/testOnly *QueryParsingErrorsSuite*" ``` Lead-authored-by: Max Gekk Co-authored-by: Maxim Gekk Signed-off-by: Max Gekk (cherry picked from commit 625afb4e1aefda59191d79b31f8c94941aedde1e) Signed-off-by: Max Gekk Closes #36655 from MaxGekk/error-class-improve-msg-3-3.3. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../main/resources/error/error-classes.json | 12 ++-- .../sql/errors/QueryCompilationErrors.scala | 4 +- .../spark/sql/errors/QueryErrorsBase.scala | 10 ++- .../sql/errors/QueryExecutionErrors.scala | 2 +- .../spark/sql/types/StructTypeSuite.scala | 22 +++--- .../sql-tests/results/ansi/cast.sql.out | 68 +++++++++---------- .../sql-tests/results/ansi/date.sql.out | 6 +- .../ansi/datetime-parsing-invalid.sql.out | 4 +- .../sql-tests/results/ansi/interval.sql.out | 20 +++--- .../results/ansi/string-functions.sql.out | 8 +-- .../resources/sql-tests/results/pivot.sql.out | 2 +- .../results/postgreSQL/boolean.sql.out | 32 ++++----- .../results/postgreSQL/float4.sql.out | 8 +-- .../results/postgreSQL/float8.sql.out | 8 +-- .../sql-tests/results/postgreSQL/text.sql.out | 4 +- .../results/postgreSQL/window_part2.sql.out | 2 +- .../results/postgreSQL/window_part3.sql.out | 2 +- .../results/postgreSQL/window_part4.sql.out | 2 +- .../timestampNTZ/timestamp-ansi.sql.out | 2 +- .../sql-tests/results/udf/udf-pivot.sql.out | 2 +- .../spark/sql/connector/InsertIntoTests.scala | 4 +- 21 files changed, 117 insertions(+), 107 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 60a432163b5a9..463bf798e49c3 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -26,7 +26,7 @@ "message" : [ "Cannot use a mixture of aggregate function and group aggregate pandas UDF" ] }, "CAST_INVALID_INPUT" : { - "message" : [ "The value of the type cannot be cast to because it is malformed. To return NULL instead, use `try_cast`. If necessary set to \"false\" to bypass this error." ], + "message" : [ "The value of the type cannot be cast to because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set to \"false\" to bypass this error." ], "sqlState" : "42000" }, "CAST_OVERFLOW" : { @@ -55,9 +55,6 @@ "message" : [ "Failed to rename to as destination already exists" ], "sqlState" : "22023" }, - "FAILED_SET_ORIGINAL_PERMISSION_BACK" : { - "message" : [ "Failed to set original permission back to the created path: . Exception: " ] - }, "GRAPHITE_SINK_INVALID_PROTOCOL" : { "message" : [ "Invalid Graphite protocol: " ] }, @@ -129,11 +126,11 @@ "sqlState" : "42000" }, "NON_LITERAL_PIVOT_VALUES" : { - "message" : [ "Literal expressions required for pivot values, found ''" ], + "message" : [ "Literal expressions required for pivot values, found ." ], "sqlState" : "42000" }, "NON_PARTITION_COLUMN" : { - "message" : [ "PARTITION clause cannot contain a non-partition column name: " ], + "message" : [ "PARTITION clause cannot contain the non-partition column: ." ], "sqlState" : "42000" }, "PARSE_CHAR_MISSING_LENGTH" : { @@ -156,6 +153,9 @@ "message" : [ "Failed to rename as was not found" ], "sqlState" : "22023" }, + "RESET_PERMISSION_TO_ORIGINAL" : { + "message" : [ "Failed to set original permission back to the created path: . Exception: " ] + }, "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER" : { "message" : [ "The second argument of '' function needs to be an integer." ], "sqlState" : "22023" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 2d2dba63e3a59..70ef344fda59b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -82,7 +82,7 @@ object QueryCompilationErrors extends QueryErrorsBase { def nonLiteralPivotValError(pivotVal: Expression): Throwable = { new AnalysisException( errorClass = "NON_LITERAL_PIVOT_VALUES", - messageParameters = Array(pivotVal.toString)) + messageParameters = Array(toSQLExpr(pivotVal))) } def pivotValDataTypeMismatchError(pivotVal: Expression, pivotCol: Expression): Throwable = { @@ -2371,7 +2371,7 @@ object QueryCompilationErrors extends QueryErrorsBase { def invalidFieldName(fieldName: Seq[String], path: Seq[String], context: Origin): Throwable = { new AnalysisException( errorClass = "INVALID_FIELD_NAME", - messageParameters = Array(fieldName.quoted, path.quoted), + messageParameters = Array(toSQLId(fieldName), toSQLId(path)), origin = context) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala index 52ffa6d32fd9b..758a0d34b2689 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala @@ -19,8 +19,8 @@ package org.apache.spark.sql.errors import java.util.Locale -import org.apache.spark.sql.catalyst.expressions.Literal -import org.apache.spark.sql.catalyst.util.quoteIdentifier +import org.apache.spark.sql.catalyst.expressions.{Expression, Literal} +import org.apache.spark.sql.catalyst.util.{quoteIdentifier, toPrettySQL} import org.apache.spark.sql.types.{DataType, DoubleType, FloatType} /** @@ -39,6 +39,8 @@ import org.apache.spark.sql.types.{DataType, DoubleType, FloatType} * For example: "spark.sql.ansi.enabled". * 6. Any values of datasource options or SQL configs shall be double quoted. * For example: "true", "CORRECTED". + * 7. SQL expressions shall be wrapped by double quotes. + * For example: "earnings + 1". */ trait QueryErrorsBase { // Converts an error class parameter to its SQL representation @@ -84,4 +86,8 @@ trait QueryErrorsBase { def toDSOption(option: String): String = { quoteByDefault(option) } + + def toSQLExpr(e: Expression): String = { + quoteByDefault(toPrettySQL(e)) + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 487be632f62db..22dc100a43476 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1720,7 +1720,7 @@ object QueryExecutionErrors extends QueryErrorsBase { permission: FsPermission, path: Path, e: Throwable): Throwable = { - new SparkSecurityException(errorClass = "FAILED_SET_ORIGINAL_PERMISSION_BACK", + new SparkSecurityException(errorClass = "RESET_PERMISSION_TO_ORIGINAL", Array(permission.toString, path.toString, e.getMessage)) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala index 16f122334f370..0352943086d93 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala @@ -319,7 +319,8 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper { var e = intercept[AnalysisException] { check(Seq("S1", "S12", "S123"), None) } - assert(e.getMessage.contains("Field name S1.S12.S123 is invalid: s1.s12 is not a struct")) + assert(e.getMessage.contains( + "Field name `S1`.`S12`.`S123` is invalid: `s1`.`s12` is not a struct")) // ambiguous name e = intercept[AnalysisException] { @@ -333,17 +334,19 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper { e = intercept[AnalysisException] { check(Seq("m1", "key"), None) } - assert(e.getMessage.contains("Field name m1.key is invalid: m1 is not a struct")) + assert(e.getMessage.contains("Field name `m1`.`key` is invalid: `m1` is not a struct")) checkCollection(Seq("m1", "key"), Some(Seq("m1") -> StructField("key", IntegerType, false))) checkCollection(Seq("M1", "value"), Some(Seq("m1") -> StructField("value", IntegerType))) e = intercept[AnalysisException] { checkCollection(Seq("M1", "key", "name"), None) } - assert(e.getMessage.contains("Field name M1.key.name is invalid: m1.key is not a struct")) + assert(e.getMessage.contains( + "Field name `M1`.`key`.`name` is invalid: `m1`.`key` is not a struct")) e = intercept[AnalysisException] { checkCollection(Seq("M1", "value", "name"), None) } - assert(e.getMessage.contains("Field name M1.value.name is invalid: m1.value is not a struct")) + assert(e.getMessage.contains( + "Field name `M1`.`value`.`name` is invalid: `m1`.`value` is not a struct")) // map of struct checkCollection(Seq("M2", "key", "A"), @@ -355,24 +358,25 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper { e = intercept[AnalysisException] { checkCollection(Seq("m2", "key", "A", "name"), None) } - assert(e.getMessage.contains("Field name m2.key.A.name is invalid: m2.key.a is not a struct")) + assert(e.getMessage.contains( + "Field name `m2`.`key`.`A`.`name` is invalid: `m2`.`key`.`a` is not a struct")) e = intercept[AnalysisException] { checkCollection(Seq("M2", "value", "b", "name"), None) } assert(e.getMessage.contains( - "Field name M2.value.b.name is invalid: m2.value.b is not a struct")) + "Field name `M2`.`value`.`b`.`name` is invalid: `m2`.`value`.`b` is not a struct")) // simple array type e = intercept[AnalysisException] { check(Seq("A1", "element"), None) } - assert(e.getMessage.contains("Field name A1.element is invalid: a1 is not a struct")) + assert(e.getMessage.contains("Field name `A1`.`element` is invalid: `a1` is not a struct")) checkCollection(Seq("A1", "element"), Some(Seq("a1") -> StructField("element", IntegerType))) e = intercept[AnalysisException] { checkCollection(Seq("A1", "element", "name"), None) } assert(e.getMessage.contains( - "Field name A1.element.name is invalid: a1.element is not a struct")) + "Field name `A1`.`element`.`name` is invalid: `a1`.`element` is not a struct")) // array of struct checkCollection(Seq("A2", "element", "C"), @@ -382,7 +386,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper { checkCollection(Seq("a2", "element", "C", "name"), None) } assert(e.getMessage.contains( - "Field name a2.element.C.name is invalid: a2.element.c is not a struct")) + "Field name `a2`.`element`.`C`.`name` is invalid: `a2`.`element`.`c` is not a struct")) } test("SPARK-36807: Merge ANSI interval types to a tightest common type") { diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out index 654433c0ca561..6286afecbef80 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out @@ -8,7 +8,7 @@ SELECT CAST('1.23' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1.23' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1.23' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('1.23' AS int) ^^^^^^^^^^^^^^^^^^^ @@ -20,7 +20,7 @@ SELECT CAST('1.23' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1.23' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1.23' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('1.23' AS long) ^^^^^^^^^^^^^^^^^^^^ @@ -32,7 +32,7 @@ SELECT CAST('-4.56' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '-4.56' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '-4.56' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('-4.56' AS int) ^^^^^^^^^^^^^^^^^^^^ @@ -44,7 +44,7 @@ SELECT CAST('-4.56' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '-4.56' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '-4.56' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('-4.56' AS long) ^^^^^^^^^^^^^^^^^^^^^ @@ -56,7 +56,7 @@ SELECT CAST('abc' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'abc' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'abc' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('abc' AS int) ^^^^^^^^^^^^^^^^^^ @@ -68,7 +68,7 @@ SELECT CAST('abc' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'abc' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'abc' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('abc' AS long) ^^^^^^^^^^^^^^^^^^^ @@ -80,7 +80,7 @@ SELECT CAST('abc' AS float) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'abc' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'abc' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('abc' AS float) ^^^^^^^^^^^^^^^^^^^^ @@ -92,7 +92,7 @@ SELECT CAST('abc' AS double) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'abc' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'abc' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('abc' AS double) ^^^^^^^^^^^^^^^^^^^^^ @@ -104,7 +104,7 @@ SELECT CAST('1234567890123' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1234567890123' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1234567890123' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('1234567890123' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -116,7 +116,7 @@ SELECT CAST('12345678901234567890123' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '12345678901234567890123' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '12345678901234567890123' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('12345678901234567890123' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -128,7 +128,7 @@ SELECT CAST('' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('' AS int) ^^^^^^^^^^^^^^^ @@ -140,7 +140,7 @@ SELECT CAST('' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('' AS long) ^^^^^^^^^^^^^^^^ @@ -152,7 +152,7 @@ SELECT CAST('' AS float) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('' AS float) ^^^^^^^^^^^^^^^^^ @@ -164,7 +164,7 @@ SELECT CAST('' AS double) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('' AS double) ^^^^^^^^^^^^^^^^^^ @@ -192,7 +192,7 @@ SELECT CAST('123.a' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '123.a' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '123.a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('123.a' AS int) ^^^^^^^^^^^^^^^^^^^^ @@ -204,7 +204,7 @@ SELECT CAST('123.a' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '123.a' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '123.a' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('123.a' AS long) ^^^^^^^^^^^^^^^^^^^^^ @@ -216,7 +216,7 @@ SELECT CAST('123.a' AS float) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '123.a' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '123.a' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('123.a' AS float) ^^^^^^^^^^^^^^^^^^^^^^ @@ -228,7 +228,7 @@ SELECT CAST('123.a' AS double) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '123.a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '123.a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('123.a' AS double) ^^^^^^^^^^^^^^^^^^^^^^^ @@ -248,7 +248,7 @@ SELECT CAST('-2147483649' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '-2147483649' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '-2147483649' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('-2147483649' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -268,7 +268,7 @@ SELECT CAST('2147483648' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '2147483648' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '2147483648' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('2147483648' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -288,7 +288,7 @@ SELECT CAST('-9223372036854775809' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '-9223372036854775809' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '-9223372036854775809' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('-9223372036854775809' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -308,7 +308,7 @@ SELECT CAST('9223372036854775808' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '9223372036854775808' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '9223372036854775808' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT CAST('9223372036854775808' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -567,7 +567,7 @@ select cast('1中文' as tinyint) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1中文' of the type "STRING" cannot be cast to "TINYINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1中文' of the type "STRING" cannot be cast to "TINYINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('1中文' as tinyint) ^^^^^^^^^^^^^^^^^^^^^^ @@ -579,7 +579,7 @@ select cast('1中文' as smallint) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1中文' of the type "STRING" cannot be cast to "SMALLINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1中文' of the type "STRING" cannot be cast to "SMALLINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('1中文' as smallint) ^^^^^^^^^^^^^^^^^^^^^^^ @@ -591,7 +591,7 @@ select cast('1中文' as INT) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1中文' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1中文' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('1中文' as INT) ^^^^^^^^^^^^^^^^^^ @@ -603,7 +603,7 @@ select cast('中文1' as bigint) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '中文1' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '中文1' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('中文1' as bigint) ^^^^^^^^^^^^^^^^^^^^^ @@ -615,7 +615,7 @@ select cast('1中文' as bigint) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1中文' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1中文' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('1中文' as bigint) ^^^^^^^^^^^^^^^^^^^^^ @@ -646,7 +646,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value ' - xyz ' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. + xyz ' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('\t\n xyz \t\r' as boolean) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -678,7 +678,7 @@ select cast('xyz' as decimal(4, 2)) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'xyz' of the type "STRING" cannot be cast to "DECIMAL(4,2)" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'xyz' of the type "STRING" cannot be cast to "DECIMAL(4,2)" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('xyz' as decimal(4, 2)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -698,7 +698,7 @@ select cast('a' as date) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'a' of the type "STRING" cannot be cast to "DATE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DATE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('a' as date) ^^^^^^^^^^^^^^^^^ @@ -718,7 +718,7 @@ select cast('a' as timestamp) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('a' as timestamp) ^^^^^^^^^^^^^^^^^^^^^^ @@ -738,7 +738,7 @@ select cast('a' as timestamp_ntz) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP_NTZ" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP_NTZ" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast('a' as timestamp_ntz) ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -750,7 +750,7 @@ select cast(cast('inf' as double) as timestamp) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast(cast('inf' as double) as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -762,7 +762,7 @@ select cast(cast('inf' as float) as timestamp) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast(cast('inf' as float) as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out index 2cf50284d6639..0bb5de24831fc 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out @@ -232,7 +232,7 @@ select next_day("xx", "Mon") struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'xx' of the type "STRING" cannot be cast to "DATE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'xx' of the type "STRING" cannot be cast to "DATE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select next_day("xx", "Mon") ^^^^^^^^^^^^^^^^^^^^^ @@ -327,7 +327,7 @@ select date_add('2011-11-11', '1.2') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select date_add('2011-11-11', '1.2') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -438,7 +438,7 @@ select date_sub(date'2011-11-11', '1.2') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select date_sub(date'2011-11-11', '1.2') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out index d1eb604d4fcd2..c823ca55f3b0d 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out @@ -242,7 +242,7 @@ select cast("Unparseable" as timestamp) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'Unparseable' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'Unparseable' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast("Unparseable" as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -254,7 +254,7 @@ select cast("Unparseable" as date) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'Unparseable' of the type "STRING" cannot be cast to "DATE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'Unparseable' of the type "STRING" cannot be cast to "DATE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select cast("Unparseable" as date) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index 5d2ead16511f1..cefa7cf20ac87 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -122,7 +122,7 @@ select interval 2 second * 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select interval 2 second * 'a' ^^^^^^^^^^^^^^^^^^^^^^^ @@ -134,7 +134,7 @@ select interval 2 second / 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select interval 2 second / 'a' ^^^^^^^^^^^^^^^^^^^^^^^ @@ -146,7 +146,7 @@ select interval 2 year * 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select interval 2 year * 'a' ^^^^^^^^^^^^^^^^^^^^^ @@ -158,7 +158,7 @@ select interval 2 year / 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select interval 2 year / 'a' ^^^^^^^^^^^^^^^^^^^^^ @@ -186,7 +186,7 @@ select 'a' * interval 2 second struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select 'a' * interval 2 second ^^^^^^^^^^^^^^^^^^^^^^^ @@ -198,7 +198,7 @@ select 'a' * interval 2 year struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select 'a' * interval 2 year ^^^^^^^^^^^^^^^^^^^^^ @@ -1516,7 +1516,7 @@ select '4 11:11' - interval '4 22:12' day to minute struct<> -- !query output org.apache.spark.SparkDateTimeException -The value '4 11:11' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '4 11:11' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select '4 11:11' - interval '4 22:12' day to minute ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1528,7 +1528,7 @@ select '4 12:12:12' + interval '4 22:12' day to minute struct<> -- !query output org.apache.spark.SparkDateTimeException -The value '4 12:12:12' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '4 12:12:12' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select '4 12:12:12' + interval '4 22:12' day to minute ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1566,7 +1566,7 @@ select str - interval '4 22:12' day to minute from interval_view struct<> -- !query output org.apache.spark.SparkDateTimeException -The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select str - interval '4 22:12' day to minute from interval_view ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1578,7 +1578,7 @@ select str + interval '4 22:12' day to minute from interval_view struct<> -- !query output org.apache.spark.SparkDateTimeException -The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select str + interval '4 22:12' day to minute from interval_view ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index ad388e211f588..5621759421019 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -82,7 +82,7 @@ select left("abcd", -2), left("abcd", 0), left("abcd", 'a') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 42) == ...t("abcd", -2), left("abcd", 0), left("abcd", 'a') ^^^^^^^^^^^^^^^^^ @@ -110,7 +110,7 @@ select right("abcd", -2), right("abcd", 0), right("abcd", 'a') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 44) == ...("abcd", -2), right("abcd", 0), right("abcd", 'a') ^^^^^^^^^^^^^^^^^^ @@ -419,7 +419,7 @@ SELECT lpad('hi', 'invalid_length') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT lpad('hi', 'invalid_length') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -431,7 +431,7 @@ SELECT rpad('hi', 'invalid_length') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT rpad('hi', 'invalid_length') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/pivot.sql.out b/sql/core/src/test/resources/sql-tests/results/pivot.sql.out index 54086bcc54e6c..0a42750d24571 100644 --- a/sql/core/src/test/resources/sql-tests/results/pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/pivot.sql.out @@ -339,7 +339,7 @@ PIVOT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Literal expressions required for pivot values, found 'course#x' +Literal expressions required for pivot values, found "course". -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out index fe23273c4d9a9..a2d0ba73e5d30 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out @@ -56,7 +56,7 @@ SELECT boolean('test') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'test' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'test' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('test') AS error ^^^^^^^^^^^^^^^ @@ -76,7 +76,7 @@ SELECT boolean('foo') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'foo' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'foo' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('foo') AS error ^^^^^^^^^^^^^^ @@ -104,7 +104,7 @@ SELECT boolean('yeah') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'yeah' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'yeah' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('yeah') AS error ^^^^^^^^^^^^^^^ @@ -132,7 +132,7 @@ SELECT boolean('nay') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'nay' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'nay' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('nay') AS error ^^^^^^^^^^^^^^ @@ -144,7 +144,7 @@ SELECT boolean('on') AS true struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'on' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'on' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('on') AS true ^^^^^^^^^^^^^ @@ -156,7 +156,7 @@ SELECT boolean('off') AS `false` struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'off' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'off' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('off') AS `false` ^^^^^^^^^^^^^^ @@ -168,7 +168,7 @@ SELECT boolean('of') AS `false` struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'of' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'of' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('of') AS `false` ^^^^^^^^^^^^^ @@ -180,7 +180,7 @@ SELECT boolean('o') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'o' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'o' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('o') AS error ^^^^^^^^^^^^ @@ -192,7 +192,7 @@ SELECT boolean('on_') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'on_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'on_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('on_') AS error ^^^^^^^^^^^^^^ @@ -204,7 +204,7 @@ SELECT boolean('off_') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'off_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'off_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('off_') AS error ^^^^^^^^^^^^^^^ @@ -224,7 +224,7 @@ SELECT boolean('11') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value '11' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '11' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('11') AS error ^^^^^^^^^^^^^ @@ -244,7 +244,7 @@ SELECT boolean('000') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value '000' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '000' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('000') AS error ^^^^^^^^^^^^^^ @@ -256,7 +256,7 @@ SELECT boolean('') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean('') AS error ^^^^^^^^^^^ @@ -365,7 +365,7 @@ SELECT boolean(string(' tru e ')) AS invalid struct<> -- !query output org.apache.spark.SparkRuntimeException -The value ' tru e ' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value ' tru e ' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean(string(' tru e ')) AS invalid ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -377,7 +377,7 @@ SELECT boolean(string('')) AS invalid struct<> -- !query output org.apache.spark.SparkRuntimeException -The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT boolean(string('')) AS invalid ^^^^^^^^^^^^^^^^^^^ @@ -524,7 +524,7 @@ INSERT INTO BOOLTBL2 struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('XXX' AS BOOLEAN): The value 'XXX' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +failed to evaluate expression CAST('XXX' AS BOOLEAN): The value 'XXX' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 2, position 11) == VALUES (boolean('XXX')) ^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out index a1399062419c9..34ab90a26f1a4 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out @@ -96,7 +96,7 @@ SELECT float('N A N') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'N A N' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'N A N' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT float('N A N') ^^^^^^^^^^^^^^ @@ -108,7 +108,7 @@ SELECT float('NaN x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'NaN x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'NaN x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT float('NaN x') ^^^^^^^^^^^^^^ @@ -120,7 +120,7 @@ SELECT float(' INFINITY x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value ' INFINITY x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value ' INFINITY x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT float(' INFINITY x') ^^^^^^^^^^^^^^^^^^^^^^^ @@ -156,7 +156,7 @@ SELECT float(decimal('nan')) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 13) == SELECT float(decimal('nan')) ^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out index 270332cd19664..33aec5bfaf100 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out @@ -128,7 +128,7 @@ SELECT double('N A N') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'N A N' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'N A N' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT double('N A N') ^^^^^^^^^^^^^^^ @@ -140,7 +140,7 @@ SELECT double('NaN x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'NaN x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'NaN x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT double('NaN x') ^^^^^^^^^^^^^^^ @@ -152,7 +152,7 @@ SELECT double(' INFINITY x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value ' INFINITY x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value ' INFINITY x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == SELECT double(' INFINITY x') ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -188,7 +188,7 @@ SELECT double(decimal('nan')) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 14) == SELECT double(decimal('nan')) ^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out index ed218c1a52c3c..a3f149211966a 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out @@ -65,7 +65,7 @@ select string('four: ') || 2+2 struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select string('four: ') || 2+2 ^^^^^^^^^^^^^^^^^^^^^^^ @@ -77,7 +77,7 @@ select 'four: ' || 2+2 struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 7) == select 'four: ' || 2+2 ^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out index 58633790cf793..4da230c2e5a55 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out @@ -462,7 +462,7 @@ window w as (order by f_numeric range between struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'NaN' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'NaN' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 3, position 12) == window w as (order by f_numeric range between ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out index 68f9d532a1cd5..25125281a74c8 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out @@ -72,7 +72,7 @@ insert into datetimes values struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('11:00 BST' AS TIMESTAMP): The value '11:00 BST' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +failed to evaluate expression CAST('11:00 BST' AS TIMESTAMP): The value '11:00 BST' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 2, position 23) == (1, timestamp '11:00', cast ('11:00 BST' as timestamp), cast ('1 year' as timestamp), ... ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out index f3f4a448df69c..f341f475fcdf0 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out @@ -501,7 +501,7 @@ FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b) struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('nan' AS INT): The value 'nan' of the type "STRING" cannot be cast to "INT" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +failed to evaluate expression CAST('nan' AS INT): The value 'nan' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 3, position 28) == FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b) ^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out index e374f92c74e93..531f89003bdc1 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out @@ -332,7 +332,7 @@ select to_timestamp(1) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value '1' of the type "STRING" cannot be cast to "TIMESTAMP_NTZ" because it is malformed. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1' of the type "STRING" cannot be cast to "TIMESTAMP_NTZ" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out index 7b986a25be089..0dccf39d435f9 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out @@ -339,7 +339,7 @@ PIVOT ( struct<> -- !query output org.apache.spark.sql.AnalysisException -Literal expressions required for pivot values, found 'course#x' +Literal expressions required for pivot values, found "course". -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala index fc98cfd5138e1..85904bbf12373 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala @@ -259,7 +259,7 @@ trait InsertIntoSQLOnlyTests verifyTable(t1, spark.emptyDataFrame) assert(exc.getMessage.contains( - "PARTITION clause cannot contain a non-partition column name")) + "PARTITION clause cannot contain the non-partition column")) assert(exc.getMessage.contains("id")) assert(exc.getErrorClass == "NON_PARTITION_COLUMN") } @@ -276,7 +276,7 @@ trait InsertIntoSQLOnlyTests verifyTable(t1, spark.emptyDataFrame) assert(exc.getMessage.contains( - "PARTITION clause cannot contain a non-partition column name")) + "PARTITION clause cannot contain the non-partition column")) assert(exc.getMessage.contains("data")) assert(exc.getErrorClass == "NON_PARTITION_COLUMN") } From 92e82fdf8e2faec5add61e2448f11272dfb19c6e Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Thu, 26 May 2022 10:36:03 +0900 Subject: [PATCH 297/535] [SPARK-39293][SQL] Fix the accumulator of ArrayAggregate to handle complex types properly ### What changes were proposed in this pull request? Fix the accumulator of `ArrayAggregate` to handle complex types properly. The accumulator of `ArrayAggregate` should copy the intermediate result if string, struct, array, or map. ### Why are the changes needed? If the intermediate data of `ArrayAggregate` holds reusable data, the result will be duplicated. ```scala import org.apache.spark.sql.functions._ val reverse = udf((s: String) => s.reverse) val df = Seq(Array("abc", "def")).toDF("array") val testArray = df.withColumn( "agg", aggregate( col("array"), array().cast("array"), (acc, s) => concat(acc, array(reverse(s))))) aggArray.show(truncate=false) ``` should be: ``` +----------+----------+ |array |agg | +----------+----------+ |[abc, def]|[cba, fed]| +----------+----------+ ``` but: ``` +----------+----------+ |array |agg | +----------+----------+ |[abc, def]|[fed, fed]| +----------+----------+ ``` ### Does this PR introduce _any_ user-facing change? Yes, this fixes the correctness issue. ### How was this patch tested? Added a test. Closes #36674 from ueshin/issues/SPARK-39293/array_aggregate. Authored-by: Takuya UESHIN Signed-off-by: Hyukjin Kwon (cherry picked from commit d6a11cb4b411c8136eb241aac167bc96990f5421) Signed-off-by: Hyukjin Kwon --- .../expressions/higherOrderFunctions.scala | 2 +- .../org/apache/spark/sql/DataFrameSuite.scala | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala index f9b2ade9a6029..fa444a670f283 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala @@ -826,7 +826,7 @@ case class ArrayAggregate( var i = 0 while (i < arr.numElements()) { elementVar.value.set(arr.get(i, elementVar.dataType)) - accForMergeVar.value.set(mergeForEval.eval(input)) + accForMergeVar.value.set(InternalRow.copyValue(mergeForEval.eval(input))) i += 1 } accForFinishVar.value.set(accForMergeVar.value.get) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index d16416d600d19..728ba3d645625 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -2933,6 +2933,25 @@ class DataFrameSuite extends QueryTest checkAnswer(test10, Row(Array(Row("cbaihg"), Row("fedlkj"))) :: Nil) } + test("SPARK-39293: The accumulator of ArrayAggregate to handle complex types properly") { + val reverse = udf((s: String) => s.reverse) + + val df = Seq(Array("abc", "def")).toDF("array") + val testArray = df.select( + aggregate( + col("array"), + array().cast("array"), + (acc, s) => concat(acc, array(reverse(s))))) + checkAnswer(testArray, Row(Array("cba", "fed")) :: Nil) + + val testMap = df.select( + aggregate( + col("array"), + map().cast("map"), + (acc, s) => map_concat(acc, map(s, reverse(s))))) + checkAnswer(testMap, Row(Map("abc" -> "cba", "def" -> "fed")) :: Nil) + } + test("SPARK-34882: Aggregate with multiple distinct null sensitive aggregators") { withUserDefinedFunction(("countNulls", true)) { spark.udf.register("countNulls", udaf(new Aggregator[JLong, JLong, JLong] { From 997e7f0af506410d76f1232acc00ad9518e9a804 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 26 May 2022 15:25:40 +0800 Subject: [PATCH 298/535] [SPARK-39234][SQL][3.3] Code clean up in SparkThrowableHelper.getMessage ### What changes were proposed in this pull request? 1. Remove the starting "\n" in `Origin.context`. The "\n" will be append in the method `SparkThrowableHelper.getMessage` instead. 2. Code clean up the method SparkThrowableHelper.getMessage to eliminate redundant code. ### Why are the changes needed? Code clean up to eliminate redundant code. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UT Closes #36669 from gengliangwang/portSPARK-39234. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang --- core/src/main/scala/org/apache/spark/ErrorInfo.scala | 7 ++++++- .../org/apache/spark/sql/catalyst/trees/TreeNode.scala | 2 +- .../apache/spark/sql/catalyst/trees/TreeNodeSuite.scala | 3 +-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ErrorInfo.scala b/core/src/main/scala/org/apache/spark/ErrorInfo.scala index 41c0e83f917b4..99e081ebb45ea 100644 --- a/core/src/main/scala/org/apache/spark/ErrorInfo.scala +++ b/core/src/main/scala/org/apache/spark/ErrorInfo.scala @@ -61,8 +61,13 @@ private[spark] object SparkThrowableHelper { queryContext: String = ""): String = { val errorInfo = errorClassToInfoMap.getOrElse(errorClass, throw new IllegalArgumentException(s"Cannot find error class '$errorClass'")) + val displayQueryContext = if (queryContext.isEmpty) { + "" + } else { + s"\n$queryContext" + } String.format(errorInfo.messageFormat.replaceAll("<[a-zA-Z0-9_-]+>", "%s"), - messageParameters: _*) + queryContext + messageParameters: _*) + displayQueryContext } def getSqlState(errorClass: String): String = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala index 079abd3f2e03c..09d24c5f81181 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala @@ -89,7 +89,7 @@ case class Origin( "" } val builder = new StringBuilder - builder ++= s"\n== SQL$objectContext$positionContext ==\n" + builder ++= s"== SQL$objectContext$positionContext ==\n" val text = sqlText.get val start = math.max(startIndex.get, 0) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala index ffbc5d89bdb61..899a740bdae86 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala @@ -876,8 +876,7 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper { objectType = Some("VIEW"), objectName = Some("some_view")) val expected = - """ - |== SQL of VIEW some_view(line 3, position 38) == + """== SQL of VIEW some_view(line 3, position 38) == |...7890 + 1234567890 + 1234567890, cast('a' | ^^^^^^^^ |as /* comment */ From e24859be7407123018a07a23ec0a78e386bb7398 Mon Sep 17 00:00:00 2001 From: itholic Date: Thu, 26 May 2022 19:35:35 +0900 Subject: [PATCH 299/535] [SPARK-39253][DOCS][PYTHON][3.3] Improve PySpark API reference to be more readable ### What changes were proposed in this pull request? Hotfix https://github.com/apache/spark/pull/36647 for branch-3.3. ### Why are the changes needed? The improvement of document readability will also improve the usability for PySpark. ### Does this PR introduce _any_ user-facing change? Yes, now the documentation is categorized by its class or their own purpose more clearly as below: Screen Shot 2022-05-24 at 1 50 23 PM ### How was this patch tested? The existing test should cover. Closes #36685 from itholic/SPARK-39253-3.3. Authored-by: itholic Signed-off-by: Hyukjin Kwon --- python/docs/source/reference/index.rst | 2 +- python/docs/source/reference/pyspark.sql.rst | 663 ------------------ .../source/reference/pyspark.sql/avro.rst | 28 + .../source/reference/pyspark.sql/catalog.rst | 48 ++ .../source/reference/pyspark.sql/column.rst | 58 ++ .../reference/pyspark.sql/configuration.rst | 27 + .../reference/pyspark.sql/core_classes.rst | 39 ++ .../reference/pyspark.sql/data_types.rst | 46 ++ .../reference/pyspark.sql/dataframe.rst | 133 ++++ .../reference/pyspark.sql/functions.rst | 343 +++++++++ .../source/reference/pyspark.sql/grouping.rst | 38 + .../source/reference/pyspark.sql/index.rst | 41 ++ .../docs/source/reference/pyspark.sql/io.rst | 54 ++ .../reference/pyspark.sql/observation.rst | 27 + .../docs/source/reference/pyspark.sql/row.rst | 27 + .../reference/pyspark.sql/spark_session.rst | 53 ++ .../source/reference/pyspark.sql/window.rst | 38 + 17 files changed, 1001 insertions(+), 664 deletions(-) delete mode 100644 python/docs/source/reference/pyspark.sql.rst create mode 100644 python/docs/source/reference/pyspark.sql/avro.rst create mode 100644 python/docs/source/reference/pyspark.sql/catalog.rst create mode 100644 python/docs/source/reference/pyspark.sql/column.rst create mode 100644 python/docs/source/reference/pyspark.sql/configuration.rst create mode 100644 python/docs/source/reference/pyspark.sql/core_classes.rst create mode 100644 python/docs/source/reference/pyspark.sql/data_types.rst create mode 100644 python/docs/source/reference/pyspark.sql/dataframe.rst create mode 100644 python/docs/source/reference/pyspark.sql/functions.rst create mode 100644 python/docs/source/reference/pyspark.sql/grouping.rst create mode 100644 python/docs/source/reference/pyspark.sql/index.rst create mode 100644 python/docs/source/reference/pyspark.sql/io.rst create mode 100644 python/docs/source/reference/pyspark.sql/observation.rst create mode 100644 python/docs/source/reference/pyspark.sql/row.rst create mode 100644 python/docs/source/reference/pyspark.sql/spark_session.rst create mode 100644 python/docs/source/reference/pyspark.sql/window.rst diff --git a/python/docs/source/reference/index.rst b/python/docs/source/reference/index.rst index f023b5a8c9947..1d2db3f4a156e 100644 --- a/python/docs/source/reference/index.rst +++ b/python/docs/source/reference/index.rst @@ -27,7 +27,7 @@ Pandas API on Spark follows the API specifications of pandas 1.3. .. toctree:: :maxdepth: 2 - pyspark.sql + pyspark.sql/index pyspark.pandas/index pyspark.ss pyspark.ml diff --git a/python/docs/source/reference/pyspark.sql.rst b/python/docs/source/reference/pyspark.sql.rst deleted file mode 100644 index adc1958822ed4..0000000000000 --- a/python/docs/source/reference/pyspark.sql.rst +++ /dev/null @@ -1,663 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - -========= -Spark SQL -========= - -Core Classes ------------- - -.. currentmodule:: pyspark.sql - -.. autosummary:: - :toctree: api/ - - SparkSession - Catalog - DataFrame - Column - Observation - Row - GroupedData - PandasCogroupedOps - DataFrameNaFunctions - DataFrameStatFunctions - Window - - -Spark Session APIs ------------------- - -.. currentmodule:: pyspark.sql - -The entry point to programming Spark with the Dataset and DataFrame API. -To create a Spark session, you should use ``SparkSession.builder`` attribute. -See also :class:`SparkSession`. - -.. autosummary:: - :toctree: api/ - - SparkSession.builder.appName - SparkSession.builder.config - SparkSession.builder.enableHiveSupport - SparkSession.builder.getOrCreate - SparkSession.builder.master - SparkSession.catalog - SparkSession.conf - SparkSession.createDataFrame - SparkSession.getActiveSession - SparkSession.newSession - SparkSession.range - SparkSession.read - SparkSession.readStream - SparkSession.sparkContext - SparkSession.sql - SparkSession.stop - SparkSession.streams - SparkSession.table - SparkSession.udf - SparkSession.version - - -Configuration -------------- - -.. currentmodule:: pyspark.sql.conf - -.. autosummary:: - :toctree: api/ - - RuntimeConfig - - -Input and Output ----------------- - -.. currentmodule:: pyspark.sql - -.. autosummary:: - :toctree: api/ - - DataFrameReader.csv - DataFrameReader.format - DataFrameReader.jdbc - DataFrameReader.json - DataFrameReader.load - DataFrameReader.option - DataFrameReader.options - DataFrameReader.orc - DataFrameReader.parquet - DataFrameReader.schema - DataFrameReader.table - DataFrameWriter.bucketBy - DataFrameWriter.csv - DataFrameWriter.format - DataFrameWriter.insertInto - DataFrameWriter.jdbc - DataFrameWriter.json - DataFrameWriter.mode - DataFrameWriter.option - DataFrameWriter.options - DataFrameWriter.orc - DataFrameWriter.parquet - DataFrameWriter.partitionBy - DataFrameWriter.save - DataFrameWriter.saveAsTable - DataFrameWriter.sortBy - DataFrameWriter.text - - -DataFrame APIs --------------- - -.. currentmodule:: pyspark.sql - -.. autosummary:: - :toctree: api/ - - DataFrame.agg - DataFrame.alias - DataFrame.approxQuantile - DataFrame.cache - DataFrame.checkpoint - DataFrame.coalesce - DataFrame.colRegex - DataFrame.collect - DataFrame.columns - DataFrame.corr - DataFrame.count - DataFrame.cov - DataFrame.createGlobalTempView - DataFrame.createOrReplaceGlobalTempView - DataFrame.createOrReplaceTempView - DataFrame.createTempView - DataFrame.crossJoin - DataFrame.crosstab - DataFrame.cube - DataFrame.describe - DataFrame.distinct - DataFrame.drop - DataFrame.dropDuplicates - DataFrame.drop_duplicates - DataFrame.dropna - DataFrame.dtypes - DataFrame.exceptAll - DataFrame.explain - DataFrame.fillna - DataFrame.filter - DataFrame.first - DataFrame.foreach - DataFrame.foreachPartition - DataFrame.freqItems - DataFrame.groupBy - DataFrame.head - DataFrame.hint - DataFrame.inputFiles - DataFrame.intersect - DataFrame.intersectAll - DataFrame.isEmpty - DataFrame.isLocal - DataFrame.isStreaming - DataFrame.join - DataFrame.limit - DataFrame.localCheckpoint - DataFrame.mapInPandas - DataFrame.mapInArrow - DataFrame.na - DataFrame.observe - DataFrame.orderBy - DataFrame.persist - DataFrame.printSchema - DataFrame.randomSplit - DataFrame.rdd - DataFrame.registerTempTable - DataFrame.repartition - DataFrame.repartitionByRange - DataFrame.replace - DataFrame.rollup - DataFrame.sameSemantics - DataFrame.sample - DataFrame.sampleBy - DataFrame.schema - DataFrame.select - DataFrame.selectExpr - DataFrame.semanticHash - DataFrame.show - DataFrame.sort - DataFrame.sortWithinPartitions - DataFrame.sparkSession - DataFrame.stat - DataFrame.storageLevel - DataFrame.subtract - DataFrame.summary - DataFrame.tail - DataFrame.take - DataFrame.toDF - DataFrame.toJSON - DataFrame.toLocalIterator - DataFrame.toPandas - DataFrame.transform - DataFrame.union - DataFrame.unionAll - DataFrame.unionByName - DataFrame.unpersist - DataFrame.where - DataFrame.withColumn - DataFrame.withColumnRenamed - DataFrame.withWatermark - DataFrame.write - DataFrame.writeStream - DataFrame.writeTo - DataFrame.pandas_api - DataFrameNaFunctions.drop - DataFrameNaFunctions.fill - DataFrameNaFunctions.replace - DataFrameStatFunctions.approxQuantile - DataFrameStatFunctions.corr - DataFrameStatFunctions.cov - DataFrameStatFunctions.crosstab - DataFrameStatFunctions.freqItems - DataFrameStatFunctions.sampleBy - -Column APIs ------------ - -.. currentmodule:: pyspark.sql - -.. autosummary:: - :toctree: api/ - - Column.alias - Column.asc - Column.asc_nulls_first - Column.asc_nulls_last - Column.astype - Column.between - Column.bitwiseAND - Column.bitwiseOR - Column.bitwiseXOR - Column.cast - Column.contains - Column.desc - Column.desc_nulls_first - Column.desc_nulls_last - Column.dropFields - Column.endswith - Column.eqNullSafe - Column.getField - Column.getItem - Column.ilike - Column.isNotNull - Column.isNull - Column.isin - Column.like - Column.name - Column.otherwise - Column.over - Column.rlike - Column.startswith - Column.substr - Column.when - Column.withField - -Data Types ----------- - -.. currentmodule:: pyspark.sql.types - -.. autosummary:: - :template: autosummary/class_with_docs.rst - :toctree: api/ - - ArrayType - BinaryType - BooleanType - ByteType - DataType - DateType - DecimalType - DoubleType - FloatType - IntegerType - LongType - MapType - NullType - ShortType - StringType - StructField - StructType - TimestampType - DayTimeIntervalType - - -Observation ------------ - -.. currentmodule:: pyspark.sql - -.. autosummary:: - :toctree: api/ - - Observation.get - - -Row ---- - -.. currentmodule:: pyspark.sql - -.. autosummary:: - :toctree: api/ - - Row.asDict - - -Functions ---------- - -.. currentmodule:: pyspark.sql.functions - -.. autosummary:: - :toctree: api/ - - abs - acos - acosh - add_months - aggregate - approxCountDistinct - approx_count_distinct - array - array_contains - array_distinct - array_except - array_intersect - array_join - array_max - array_min - array_position - array_remove - array_repeat - array_sort - array_union - arrays_overlap - arrays_zip - asc - asc_nulls_first - asc_nulls_last - ascii - asin - asinh - assert_true - atan - atanh - atan2 - avg - base64 - bin - bit_length - bitwise_not - bitwiseNOT - broadcast - bround - bucket - cbrt - ceil - coalesce - col - collect_list - collect_set - column - concat - concat_ws - conv - corr - cos - cosh - cot - count - count_distinct - countDistinct - covar_pop - covar_samp - crc32 - create_map - csc - cume_dist - current_date - current_timestamp - date_add - date_format - date_sub - date_trunc - datediff - dayofmonth - dayofweek - dayofyear - days - decode - degrees - dense_rank - desc - desc_nulls_first - desc_nulls_last - element_at - encode - exists - exp - explode - explode_outer - expm1 - expr - factorial - filter - first - flatten - floor - forall - format_number - format_string - from_csv - from_json - from_unixtime - from_utc_timestamp - get_json_object - greatest - grouping - grouping_id - hash - hex - hour - hours - hypot - initcap - input_file_name - instr - isnan - isnull - json_tuple - kurtosis - lag - last - last_day - lead - least - length - levenshtein - lit - locate - log - log10 - log1p - log2 - lower - lpad - ltrim - make_date - map_concat - map_entries - map_filter - map_from_arrays - map_from_entries - map_keys - map_values - map_zip_with - max - max_by - md5 - mean - min - min_by - minute - monotonically_increasing_id - month - months - months_between - nanvl - next_day - nth_value - ntile - octet_length - overlay - pandas_udf - percent_rank - percentile_approx - posexplode - posexplode_outer - pow - product - quarter - radians - raise_error - rand - randn - rank - regexp_extract - regexp_replace - repeat - reverse - rint - round - row_number - rpad - rtrim - schema_of_csv - schema_of_json - sec - second - sentences - sequence - session_window - sha1 - sha2 - shiftleft - shiftright - shiftrightunsigned - shuffle - signum - sin - sinh - size - skewness - slice - sort_array - soundex - spark_partition_id - split - sqrt - stddev - stddev_pop - stddev_samp - struct - substring - substring_index - sum - sum_distinct - sumDistinct - tan - tanh - timestamp_seconds - toDegrees - toRadians - to_csv - to_date - to_json - to_timestamp - to_utc_timestamp - transform - transform_keys - transform_values - translate - trim - trunc - udf - unbase64 - unhex - unix_timestamp - upper - var_pop - var_samp - variance - weekofyear - when - window - xxhash64 - year - years - zip_with - - -.. currentmodule:: pyspark.sql.avro.functions - -.. autosummary:: - :toctree: api/ - - from_avro - to_avro - -Window ------- - -.. currentmodule:: pyspark.sql - -.. autosummary:: - :toctree: api/ - - Window.currentRow - Window.orderBy - Window.partitionBy - Window.rangeBetween - Window.rowsBetween - Window.unboundedFollowing - Window.unboundedPreceding - WindowSpec.orderBy - WindowSpec.partitionBy - WindowSpec.rangeBetween - WindowSpec.rowsBetween - -Grouping --------- - -.. currentmodule:: pyspark.sql - -.. autosummary:: - :toctree: api/ - - GroupedData.agg - GroupedData.apply - GroupedData.applyInPandas - GroupedData.avg - GroupedData.cogroup - GroupedData.count - GroupedData.max - GroupedData.mean - GroupedData.min - GroupedData.pivot - GroupedData.sum - PandasCogroupedOps.applyInPandas - -Catalog APIs ------------- - -.. currentmodule:: pyspark.sql - -.. autosummary:: - :toctree: api/ - - Catalog.cacheTable - Catalog.clearCache - Catalog.createExternalTable - Catalog.createTable - Catalog.currentDatabase - Catalog.databaseExists - Catalog.dropGlobalTempView - Catalog.dropTempView - Catalog.functionExists - Catalog.isCached - Catalog.listColumns - Catalog.listDatabases - Catalog.listFunctions - Catalog.listTables - Catalog.recoverPartitions - Catalog.refreshByPath - Catalog.refreshTable - Catalog.registerFunction - Catalog.setCurrentDatabase - Catalog.tableExists - Catalog.uncacheTable diff --git a/python/docs/source/reference/pyspark.sql/avro.rst b/python/docs/source/reference/pyspark.sql/avro.rst new file mode 100644 index 0000000000000..b6de88deef1fb --- /dev/null +++ b/python/docs/source/reference/pyspark.sql/avro.rst @@ -0,0 +1,28 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +==== +Avro +==== +.. currentmodule:: pyspark.sql.avro.functions + +.. autosummary:: + :toctree: api/ + + from_avro + to_avro diff --git a/python/docs/source/reference/pyspark.sql/catalog.rst b/python/docs/source/reference/pyspark.sql/catalog.rst new file mode 100644 index 0000000000000..8267e06410e0d --- /dev/null +++ b/python/docs/source/reference/pyspark.sql/catalog.rst @@ -0,0 +1,48 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +======= +Catalog +======= + +.. currentmodule:: pyspark.sql + +.. autosummary:: + :toctree: api/ + + Catalog.cacheTable + Catalog.clearCache + Catalog.createExternalTable + Catalog.createTable + Catalog.currentDatabase + Catalog.databaseExists + Catalog.dropGlobalTempView + Catalog.dropTempView + Catalog.functionExists + Catalog.isCached + Catalog.listColumns + Catalog.listDatabases + Catalog.listFunctions + Catalog.listTables + Catalog.recoverPartitions + Catalog.refreshByPath + Catalog.refreshTable + Catalog.registerFunction + Catalog.setCurrentDatabase + Catalog.tableExists + Catalog.uncacheTable diff --git a/python/docs/source/reference/pyspark.sql/column.rst b/python/docs/source/reference/pyspark.sql/column.rst new file mode 100644 index 0000000000000..b5f39d299c12d --- /dev/null +++ b/python/docs/source/reference/pyspark.sql/column.rst @@ -0,0 +1,58 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +====== +Column +====== +.. currentmodule:: pyspark.sql + +.. autosummary:: + :toctree: api/ + + Column.alias + Column.asc + Column.asc_nulls_first + Column.asc_nulls_last + Column.astype + Column.between + Column.bitwiseAND + Column.bitwiseOR + Column.bitwiseXOR + Column.cast + Column.contains + Column.desc + Column.desc_nulls_first + Column.desc_nulls_last + Column.dropFields + Column.endswith + Column.eqNullSafe + Column.getField + Column.getItem + Column.ilike + Column.isNotNull + Column.isNull + Column.isin + Column.like + Column.name + Column.otherwise + Column.over + Column.rlike + Column.startswith + Column.substr + Column.when + Column.withField diff --git a/python/docs/source/reference/pyspark.sql/configuration.rst b/python/docs/source/reference/pyspark.sql/configuration.rst new file mode 100644 index 0000000000000..7a5c10400de92 --- /dev/null +++ b/python/docs/source/reference/pyspark.sql/configuration.rst @@ -0,0 +1,27 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +============= +Configuration +============= +.. currentmodule:: pyspark.sql.conf + +.. autosummary:: + :toctree: api/ + + RuntimeConfig diff --git a/python/docs/source/reference/pyspark.sql/core_classes.rst b/python/docs/source/reference/pyspark.sql/core_classes.rst new file mode 100644 index 0000000000000..72f9ca122a943 --- /dev/null +++ b/python/docs/source/reference/pyspark.sql/core_classes.rst @@ -0,0 +1,39 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +============ +Core Classes +============ +.. currentmodule:: pyspark.sql + +.. autosummary:: + :toctree: api/ + + SparkSession + Catalog + DataFrame + Column + Observation + Row + GroupedData + PandasCogroupedOps + DataFrameNaFunctions + DataFrameStatFunctions + Window + DataFrameReader + DataFrameWriter diff --git a/python/docs/source/reference/pyspark.sql/data_types.rst b/python/docs/source/reference/pyspark.sql/data_types.rst new file mode 100644 index 0000000000000..d146c640477d6 --- /dev/null +++ b/python/docs/source/reference/pyspark.sql/data_types.rst @@ -0,0 +1,46 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +========== +Data Types +========== +.. currentmodule:: pyspark.sql.types + +.. autosummary:: + :template: autosummary/class_with_docs.rst + :toctree: api/ + + ArrayType + BinaryType + BooleanType + ByteType + DataType + DateType + DecimalType + DoubleType + FloatType + IntegerType + LongType + MapType + NullType + ShortType + StringType + StructField + StructType + TimestampType + DayTimeIntervalType diff --git a/python/docs/source/reference/pyspark.sql/dataframe.rst b/python/docs/source/reference/pyspark.sql/dataframe.rst new file mode 100644 index 0000000000000..5b6e704ba4829 --- /dev/null +++ b/python/docs/source/reference/pyspark.sql/dataframe.rst @@ -0,0 +1,133 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +========= +DataFrame +========= + +.. currentmodule:: pyspark.sql + +.. autosummary:: + :toctree: api/ + + DataFrame.agg + DataFrame.alias + DataFrame.approxQuantile + DataFrame.cache + DataFrame.checkpoint + DataFrame.coalesce + DataFrame.colRegex + DataFrame.collect + DataFrame.columns + DataFrame.corr + DataFrame.count + DataFrame.cov + DataFrame.createGlobalTempView + DataFrame.createOrReplaceGlobalTempView + DataFrame.createOrReplaceTempView + DataFrame.createTempView + DataFrame.crossJoin + DataFrame.crosstab + DataFrame.cube + DataFrame.describe + DataFrame.distinct + DataFrame.drop + DataFrame.dropDuplicates + DataFrame.drop_duplicates + DataFrame.dropna + DataFrame.dtypes + DataFrame.exceptAll + DataFrame.explain + DataFrame.fillna + DataFrame.filter + DataFrame.first + DataFrame.foreach + DataFrame.foreachPartition + DataFrame.freqItems + DataFrame.groupBy + DataFrame.head + DataFrame.hint + DataFrame.inputFiles + DataFrame.intersect + DataFrame.intersectAll + DataFrame.isEmpty + DataFrame.isLocal + DataFrame.isStreaming + DataFrame.join + DataFrame.limit + DataFrame.localCheckpoint + DataFrame.mapInPandas + DataFrame.mapInArrow + DataFrame.na + DataFrame.observe + DataFrame.orderBy + DataFrame.persist + DataFrame.printSchema + DataFrame.randomSplit + DataFrame.rdd + DataFrame.registerTempTable + DataFrame.repartition + DataFrame.repartitionByRange + DataFrame.replace + DataFrame.rollup + DataFrame.sameSemantics + DataFrame.sample + DataFrame.sampleBy + DataFrame.schema + DataFrame.select + DataFrame.selectExpr + DataFrame.semanticHash + DataFrame.show + DataFrame.sort + DataFrame.sortWithinPartitions + DataFrame.sparkSession + DataFrame.stat + DataFrame.storageLevel + DataFrame.subtract + DataFrame.summary + DataFrame.tail + DataFrame.take + DataFrame.toDF + DataFrame.toJSON + DataFrame.toLocalIterator + DataFrame.toPandas + DataFrame.to_pandas_on_spark + DataFrame.transform + DataFrame.union + DataFrame.unionAll + DataFrame.unionByName + DataFrame.unpersist + DataFrame.where + DataFrame.withColumn + DataFrame.withColumns + DataFrame.withColumnRenamed + DataFrame.withMetadata + DataFrame.withWatermark + DataFrame.write + DataFrame.writeStream + DataFrame.writeTo + DataFrame.pandas_api + DataFrameNaFunctions.drop + DataFrameNaFunctions.fill + DataFrameNaFunctions.replace + DataFrameStatFunctions.approxQuantile + DataFrameStatFunctions.corr + DataFrameStatFunctions.cov + DataFrameStatFunctions.crosstab + DataFrameStatFunctions.freqItems + DataFrameStatFunctions.sampleBy diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst new file mode 100644 index 0000000000000..390d7d768ca86 --- /dev/null +++ b/python/docs/source/reference/pyspark.sql/functions.rst @@ -0,0 +1,343 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +========= +Functions +========= +.. currentmodule:: pyspark.sql.functions + +Normal Functions +---------------- +.. autosummary:: + :toctree: api/ + + col + column + create_map + lit + array + map_from_arrays + broadcast + coalesce + input_file_name + isnan + isnull + monotonically_increasing_id + nanvl + rand + randn + spark_partition_id + struct + when + bitwise_not + bitwiseNOT + expr + greatest + least + + +Math Functions +-------------- +.. autosummary:: + :toctree: api/ + + sqrt + abs + acos + acosh + asin + asinh + atan + atanh + atan2 + bin + cbrt + ceil + conv + cos + cosh + cot + csc + exp + expm1 + factorial + floor + hex + unhex + hypot + log + log10 + log1p + log2 + pow + rint + round + bround + sec + shiftleft + shiftright + shiftrightunsigned + signum + sin + sinh + tan + tanh + toDegrees + degrees + toRadians + radians + + +Datetime Functions +------------------ +.. autosummary:: + :toctree: api/ + + add_months + current_date + current_timestamp + date_add + date_format + date_sub + date_trunc + datediff + dayofmonth + dayofweek + dayofyear + second + weekofyear + year + quarter + month + last_day + minute + months_between + next_day + hour + make_date + from_unixtime + unix_timestamp + to_timestamp + to_date + trunc + from_utc_timestamp + to_utc_timestamp + window + session_window + timestamp_seconds + + +Collection Functions +-------------------- +.. autosummary:: + :toctree: api/ + + array_contains + arrays_overlap + slice + array_join + concat + array_position + element_at + array_sort + array_remove + array_distinct + array_intersect + array_union + array_except + transform + exists + forall + filter + aggregate + zip_with + transform_keys + transform_values + map_filter + map_zip_with + explode + explode_outer + posexplode + posexplode_outer + get_json_object + json_tuple + from_json + schema_of_json + to_json + size + sort_array + array_max + array_min + shuffle + reverse + flatten + sequence + array_repeat + map_keys + map_values + map_entries + map_from_entries + arrays_zip + map_concat + from_csv + schema_of_csv + to_csv + + +Partition Transformation Functions +---------------------------------- +.. autosummary:: + :toctree: api/ + + years + months + days + hours + bucket + + +Aggregate Functions +------------------- +.. autosummary:: + :toctree: api/ + + approxCountDistinct + approx_count_distinct + avg + collect_list + collect_set + corr + count + count_distinct + countDistinct + covar_pop + covar_samp + first + grouping + grouping_id + kurtosis + last + max + max_by + mean + min + min_by + percentile_approx + product + skewness + stddev + stddev_pop + stddev_samp + sum + sum_distinct + sumDistinct + var_pop + var_samp + variance + + +Window Functions +---------------- +.. autosummary:: + :toctree: api/ + + cume_dist + dense_rank + lag + lead + nth_value + ntile + percent_rank + rank + row_number + + +Sort Functions +-------------- +.. autosummary:: + :toctree: api/ + + asc + asc_nulls_first + asc_nulls_last + desc + desc_nulls_first + desc_nulls_last + + +String Functions +---------------- +.. autosummary:: + :toctree: api/ + + ascii + base64 + bit_length + concat_ws + decode + encode + format_number + format_string + initcap + instr + length + lower + levenshtein + locate + lpad + ltrim + octet_length + regexp_extract + regexp_replace + unbase64 + rpad + repeat + rtrim + soundex + split + substring + substring_index + overlay + sentences + translate + trim + upper + + +UDF +--- +.. autosummary:: + :toctree: api/ + + pandas_udf + udf + +Misc Functions +-------------- +.. autosummary:: + :toctree: api/ + + md5 + sha1 + sha2 + crc32 + hash + xxhash64 + assert_true + raise_error + diff --git a/python/docs/source/reference/pyspark.sql/grouping.rst b/python/docs/source/reference/pyspark.sql/grouping.rst new file mode 100644 index 0000000000000..459ef57275647 --- /dev/null +++ b/python/docs/source/reference/pyspark.sql/grouping.rst @@ -0,0 +1,38 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +======== +Grouping +======== +.. currentmodule:: pyspark.sql + +.. autosummary:: + :toctree: api/ + + GroupedData.agg + GroupedData.apply + GroupedData.applyInPandas + GroupedData.avg + GroupedData.cogroup + GroupedData.count + GroupedData.max + GroupedData.mean + GroupedData.min + GroupedData.pivot + GroupedData.sum + PandasCogroupedOps.applyInPandas diff --git a/python/docs/source/reference/pyspark.sql/index.rst b/python/docs/source/reference/pyspark.sql/index.rst new file mode 100644 index 0000000000000..a8b52f4a1b5ee --- /dev/null +++ b/python/docs/source/reference/pyspark.sql/index.rst @@ -0,0 +1,41 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +========= +Spark SQL +========= + +This page gives an overview of all public Spark SQL API. + +.. toctree:: + :maxdepth: 2 + + core_classes + spark_session + configuration + io + dataframe + column + data_types + row + functions + window + grouping + catalog + observation + avro diff --git a/python/docs/source/reference/pyspark.sql/io.rst b/python/docs/source/reference/pyspark.sql/io.rst new file mode 100644 index 0000000000000..52e4593eeadf3 --- /dev/null +++ b/python/docs/source/reference/pyspark.sql/io.rst @@ -0,0 +1,54 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +============ +Input/Output +============ +.. currentmodule:: pyspark.sql + +.. autosummary:: + :toctree: api/ + + DataFrameReader.csv + DataFrameReader.format + DataFrameReader.jdbc + DataFrameReader.json + DataFrameReader.load + DataFrameReader.option + DataFrameReader.options + DataFrameReader.orc + DataFrameReader.parquet + DataFrameReader.schema + DataFrameReader.table + DataFrameReader.text + DataFrameWriter.bucketBy + DataFrameWriter.csv + DataFrameWriter.format + DataFrameWriter.insertInto + DataFrameWriter.jdbc + DataFrameWriter.json + DataFrameWriter.mode + DataFrameWriter.option + DataFrameWriter.options + DataFrameWriter.orc + DataFrameWriter.parquet + DataFrameWriter.partitionBy + DataFrameWriter.save + DataFrameWriter.saveAsTable + DataFrameWriter.sortBy + DataFrameWriter.text diff --git a/python/docs/source/reference/pyspark.sql/observation.rst b/python/docs/source/reference/pyspark.sql/observation.rst new file mode 100644 index 0000000000000..52867eda109f9 --- /dev/null +++ b/python/docs/source/reference/pyspark.sql/observation.rst @@ -0,0 +1,27 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +=========== +Observation +=========== +.. currentmodule:: pyspark.sql + +.. autosummary:: + :toctree: api/ + + Observation.get diff --git a/python/docs/source/reference/pyspark.sql/row.rst b/python/docs/source/reference/pyspark.sql/row.rst new file mode 100644 index 0000000000000..1234b8d92ae6c --- /dev/null +++ b/python/docs/source/reference/pyspark.sql/row.rst @@ -0,0 +1,27 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +=== +Row +=== +.. currentmodule:: pyspark.sql + +.. autosummary:: + :toctree: api/ + + Row.asDict diff --git a/python/docs/source/reference/pyspark.sql/spark_session.rst b/python/docs/source/reference/pyspark.sql/spark_session.rst new file mode 100644 index 0000000000000..d4fb7270a77ba --- /dev/null +++ b/python/docs/source/reference/pyspark.sql/spark_session.rst @@ -0,0 +1,53 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +============= +Spark Session +============= +.. currentmodule:: pyspark.sql + +.. autosummary:: + :toctree: api/ + +The entry point to programming Spark with the Dataset and DataFrame API. +To create a Spark session, you should use ``SparkSession.builder`` attribute. +See also :class:`SparkSession`. + +.. autosummary:: + :toctree: api/ + + SparkSession.builder.appName + SparkSession.builder.config + SparkSession.builder.enableHiveSupport + SparkSession.builder.getOrCreate + SparkSession.builder.master + SparkSession.catalog + SparkSession.conf + SparkSession.createDataFrame + SparkSession.getActiveSession + SparkSession.newSession + SparkSession.range + SparkSession.read + SparkSession.readStream + SparkSession.sparkContext + SparkSession.sql + SparkSession.stop + SparkSession.streams + SparkSession.table + SparkSession.udf + SparkSession.version diff --git a/python/docs/source/reference/pyspark.sql/window.rst b/python/docs/source/reference/pyspark.sql/window.rst new file mode 100644 index 0000000000000..3625164d0a07f --- /dev/null +++ b/python/docs/source/reference/pyspark.sql/window.rst @@ -0,0 +1,38 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +====== +Window +====== + +.. currentmodule:: pyspark.sql + +.. autosummary:: + :toctree: api/ + + Window.currentRow + Window.orderBy + Window.partitionBy + Window.rangeBetween + Window.rowsBetween + Window.unboundedFollowing + Window.unboundedPreceding + WindowSpec.orderBy + WindowSpec.partitionBy + WindowSpec.rangeBetween + WindowSpec.rowsBetween From 2faaf8a95609b44bfd0bcfd59629cd27a9abb024 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 26 May 2022 21:05:29 +0800 Subject: [PATCH 300/535] [SPARK-39272][SQL][3.3] Increase the start position of query context by 1 ### What changes were proposed in this pull request? Increase the start position of query context by 1 ### Why are the changes needed? Currently, the line number starts from 1, while the start position starts from 0. Thus it's better to increase the start position by 1 for consistency. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? UT Closes #36684 from gengliangwang/portSPARK-39234. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang --- .../spark/sql/catalyst/trees/TreeNode.scala | 4 +- .../sql/catalyst/trees/TreeNodeSuite.scala | 2 +- .../sql-tests/results/ansi/cast.sql.out | 70 +++++++++---------- .../sql-tests/results/ansi/date.sql.out | 6 +- .../ansi/datetime-parsing-invalid.sql.out | 4 +- .../ansi/decimalArithmeticOperations.sql.out | 8 +-- .../sql-tests/results/ansi/interval.sql.out | 34 ++++----- .../sql-tests/results/ansi/map.sql.out | 8 +-- .../results/ansi/string-functions.sql.out | 8 +-- .../sql-tests/results/interval.sql.out | 12 ++-- .../results/postgreSQL/boolean.sql.out | 32 ++++----- .../results/postgreSQL/float4.sql.out | 8 +-- .../results/postgreSQL/float8.sql.out | 8 +-- .../sql-tests/results/postgreSQL/int4.sql.out | 12 ++-- .../sql-tests/results/postgreSQL/int8.sql.out | 14 ++-- .../results/postgreSQL/select_having.sql.out | 2 +- .../sql-tests/results/postgreSQL/text.sql.out | 4 +- .../results/postgreSQL/window_part2.sql.out | 2 +- .../results/postgreSQL/window_part3.sql.out | 2 +- .../results/postgreSQL/window_part4.sql.out | 2 +- .../udf/postgreSQL/udf-select_having.sql.out | 2 +- 21 files changed, 123 insertions(+), 121 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala index 09d24c5f81181..85616f118e607 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala @@ -79,7 +79,9 @@ case class Origin( "" } else { val positionContext = if (line.isDefined && startPosition.isDefined) { - s"(line ${line.get}, position ${startPosition.get})" + // Note that the line number starts from 1, while the start position starts from 0. + // Here we increase the start position by 1 for consistency. + s"(line ${line.get}, position ${startPosition.get + 1})" } else { "" } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala index 899a740bdae86..1e1206c0e1ee3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala @@ -876,7 +876,7 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper { objectType = Some("VIEW"), objectName = Some("some_view")) val expected = - """== SQL of VIEW some_view(line 3, position 38) == + """== SQL of VIEW some_view(line 3, position 39) == |...7890 + 1234567890 + 1234567890, cast('a' | ^^^^^^^^ |as /* comment */ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out index 6286afecbef80..aff07bd3946ff 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out @@ -9,7 +9,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '1.23' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('1.23' AS int) ^^^^^^^^^^^^^^^^^^^ @@ -21,7 +21,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '1.23' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('1.23' AS long) ^^^^^^^^^^^^^^^^^^^^ @@ -33,7 +33,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '-4.56' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('-4.56' AS int) ^^^^^^^^^^^^^^^^^^^^ @@ -45,7 +45,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '-4.56' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('-4.56' AS long) ^^^^^^^^^^^^^^^^^^^^^ @@ -57,7 +57,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'abc' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('abc' AS int) ^^^^^^^^^^^^^^^^^^ @@ -69,7 +69,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'abc' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('abc' AS long) ^^^^^^^^^^^^^^^^^^^ @@ -81,7 +81,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'abc' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('abc' AS float) ^^^^^^^^^^^^^^^^^^^^ @@ -93,7 +93,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'abc' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('abc' AS double) ^^^^^^^^^^^^^^^^^^^^^ @@ -105,7 +105,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '1234567890123' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('1234567890123' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -117,7 +117,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '12345678901234567890123' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('12345678901234567890123' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -129,7 +129,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('' AS int) ^^^^^^^^^^^^^^^ @@ -141,7 +141,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('' AS long) ^^^^^^^^^^^^^^^^ @@ -153,7 +153,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('' AS float) ^^^^^^^^^^^^^^^^^ @@ -165,7 +165,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('' AS double) ^^^^^^^^^^^^^^^^^^ @@ -193,7 +193,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '123.a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('123.a' AS int) ^^^^^^^^^^^^^^^^^^^^ @@ -205,7 +205,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '123.a' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('123.a' AS long) ^^^^^^^^^^^^^^^^^^^^^ @@ -217,7 +217,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '123.a' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('123.a' AS float) ^^^^^^^^^^^^^^^^^^^^^^ @@ -229,7 +229,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '123.a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('123.a' AS double) ^^^^^^^^^^^^^^^^^^^^^^^ @@ -249,7 +249,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '-2147483649' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('-2147483649' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -269,7 +269,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '2147483648' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('2147483648' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -289,7 +289,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '-9223372036854775809' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('-9223372036854775809' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -309,7 +309,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '9223372036854775808' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT CAST('9223372036854775808' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -568,7 +568,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '1中文' of the type "STRING" cannot be cast to "TINYINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select cast('1中文' as tinyint) ^^^^^^^^^^^^^^^^^^^^^^ @@ -580,7 +580,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '1中文' of the type "STRING" cannot be cast to "SMALLINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select cast('1中文' as smallint) ^^^^^^^^^^^^^^^^^^^^^^^ @@ -592,7 +592,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '1中文' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select cast('1中文' as INT) ^^^^^^^^^^^^^^^^^^ @@ -604,7 +604,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '中文1' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select cast('中文1' as bigint) ^^^^^^^^^^^^^^^^^^^^^ @@ -616,7 +616,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '1中文' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select cast('1中文' as bigint) ^^^^^^^^^^^^^^^^^^^^^ @@ -647,7 +647,7 @@ struct<> org.apache.spark.SparkRuntimeException The value ' xyz ' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select cast('\t\n xyz \t\r' as boolean) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -667,7 +667,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Decimal(expanded, 123.45, 5, 2) cannot be represented as Decimal(4, 2). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select cast('123.45' as decimal(4, 2)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -679,7 +679,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'xyz' of the type "STRING" cannot be cast to "DECIMAL(4,2)" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select cast('xyz' as decimal(4, 2)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -699,7 +699,7 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException The value 'a' of the type "STRING" cannot be cast to "DATE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select cast('a' as date) ^^^^^^^^^^^^^^^^^ @@ -719,7 +719,7 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select cast('a' as timestamp) ^^^^^^^^^^^^^^^^^^^^^^ @@ -739,7 +739,7 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP_NTZ" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select cast('a' as timestamp_ntz) ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -751,7 +751,7 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select cast(cast('inf' as double) as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -763,6 +763,6 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select cast(cast('inf' as float) as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out index 0bb5de24831fc..88855b16c6785 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out @@ -233,7 +233,7 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException The value 'xx' of the type "STRING" cannot be cast to "DATE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select next_day("xx", "Mon") ^^^^^^^^^^^^^^^^^^^^^ @@ -328,7 +328,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select date_add('2011-11-11', '1.2') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -439,7 +439,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select date_sub(date'2011-11-11', '1.2') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out index c823ca55f3b0d..7d4b3c25c6efa 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out @@ -243,7 +243,7 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException The value 'Unparseable' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select cast("Unparseable" as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -255,6 +255,6 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException The value 'Unparseable' of the type "STRING" cannot be cast to "DATE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select cast("Unparseable" as date) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out index 5db875ff10ad9..9025e91064888 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out @@ -77,7 +77,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Decimal(expanded, 10000000000000000000000000000000000000.1, 39, 1) cannot be represented as Decimal(38, 1). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select (5e36BD + 0.1) + 5e36BD ^^^^^^^^^^^^^^^^^^^^^^^ @@ -89,7 +89,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Decimal(expanded, -11000000000000000000000000000000000000.1, 39, 1) cannot be represented as Decimal(38, 1). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select (-4e36BD - 0.1) - 7e36BD ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -101,7 +101,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Decimal(expanded, 152415787532388367501905199875019052100, 39, 0) cannot be represented as Decimal(38, 2). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select 12345678901234567890.0 * 12345678901234567890.0 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -113,7 +113,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Decimal(expanded, 1000000000000000000000000000000000000, 37, 0) cannot be represented as Decimal(38, 6). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select 1e35BD / 0.1 ^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index cefa7cf20ac87..1364309d01016 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -123,7 +123,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select interval 2 second * 'a' ^^^^^^^^^^^^^^^^^^^^^^^ @@ -135,7 +135,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select interval 2 second / 'a' ^^^^^^^^^^^^^^^^^^^^^^^ @@ -147,7 +147,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select interval 2 year * 'a' ^^^^^^^^^^^^^^^^^^^^^ @@ -159,7 +159,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select interval 2 year / 'a' ^^^^^^^^^^^^^^^^^^^^^ @@ -187,7 +187,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select 'a' * interval 2 second ^^^^^^^^^^^^^^^^^^^^^^^ @@ -199,7 +199,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select 'a' * interval 2 year ^^^^^^^^^^^^^^^^^^^^^ @@ -229,7 +229,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select interval '2 seconds' / 0 ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -265,7 +265,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select interval '2' year / 0 ^^^^^^^^^^^^^^^^^^^^^ @@ -665,7 +665,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Decimal(expanded, 1234567890123456789, 20, 0) cannot be represented as Decimal(18, 6). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1517,7 +1517,7 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException The value '4 11:11' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select '4 11:11' - interval '4 22:12' day to minute ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1529,7 +1529,7 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException The value '4 12:12:12' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select '4 12:12:12' + interval '4 22:12' day to minute ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1567,7 +1567,7 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select str - interval '4 22:12' day to minute from interval_view ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1579,7 +1579,7 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select str + interval '4 22:12' day to minute from interval_view ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2037,7 +2037,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2049,7 +2049,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2095,7 +2095,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2107,7 +2107,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out index b54cc6d48bf38..20a6c013a2d0f 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out @@ -9,7 +9,7 @@ struct<> -- !query output org.apache.spark.SparkNoSuchElementException Key 5 does not exist. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select element_at(map(1, 'a', 2, 'b'), 5) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -21,7 +21,7 @@ struct<> -- !query output org.apache.spark.SparkNoSuchElementException Key 5 does not exist. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select map(1, 'a', 2, 'b')[5] ^^^^^^^^^^^^^^^^^^^^^^ @@ -115,7 +115,7 @@ struct<> -- !query output org.apache.spark.SparkNoSuchElementException Key 5 does not exist. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select element_at(map(1, 'a', 2, 'b'), 5) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -127,6 +127,6 @@ struct<> -- !query output org.apache.spark.SparkNoSuchElementException Key 'c' does not exist. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select element_at(map('a', 1, 'b', 2), 'c') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index 5621759421019..1729daab8fbcd 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -83,7 +83,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 42) == +== SQL(line 1, position 43) == ...t("abcd", -2), left("abcd", 0), left("abcd", 'a') ^^^^^^^^^^^^^^^^^ @@ -111,7 +111,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 44) == +== SQL(line 1, position 45) == ...("abcd", -2), right("abcd", 0), right("abcd", 'a') ^^^^^^^^^^^^^^^^^^ @@ -420,7 +420,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT lpad('hi', 'invalid_length') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -432,7 +432,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT rpad('hi', 'invalid_length') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 01cc7efb492a0..08efee4d96a44 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -205,7 +205,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select interval '2 seconds' / 0 ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -241,7 +241,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select interval '2' year / 0 ^^^^^^^^^^^^^^^^^^^^^ @@ -1993,7 +1993,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2005,7 +2005,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2051,7 +2051,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2063,7 +2063,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out index a2d0ba73e5d30..26c2dea4a994b 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out @@ -57,7 +57,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value 'test' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT boolean('test') AS error ^^^^^^^^^^^^^^^ @@ -77,7 +77,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value 'foo' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT boolean('foo') AS error ^^^^^^^^^^^^^^ @@ -105,7 +105,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value 'yeah' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT boolean('yeah') AS error ^^^^^^^^^^^^^^^ @@ -133,7 +133,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value 'nay' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT boolean('nay') AS error ^^^^^^^^^^^^^^ @@ -145,7 +145,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value 'on' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT boolean('on') AS true ^^^^^^^^^^^^^ @@ -157,7 +157,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value 'off' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT boolean('off') AS `false` ^^^^^^^^^^^^^^ @@ -169,7 +169,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value 'of' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT boolean('of') AS `false` ^^^^^^^^^^^^^ @@ -181,7 +181,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value 'o' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT boolean('o') AS error ^^^^^^^^^^^^ @@ -193,7 +193,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value 'on_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT boolean('on_') AS error ^^^^^^^^^^^^^^ @@ -205,7 +205,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value 'off_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT boolean('off_') AS error ^^^^^^^^^^^^^^^ @@ -225,7 +225,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value '11' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT boolean('11') AS error ^^^^^^^^^^^^^ @@ -245,7 +245,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value '000' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT boolean('000') AS error ^^^^^^^^^^^^^^ @@ -257,7 +257,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT boolean('') AS error ^^^^^^^^^^^ @@ -366,7 +366,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value ' tru e ' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT boolean(string(' tru e ')) AS invalid ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -378,7 +378,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT boolean(string('')) AS invalid ^^^^^^^^^^^^^^^^^^^ @@ -525,7 +525,7 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException failed to evaluate expression CAST('XXX' AS BOOLEAN): The value 'XXX' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 2, position 11) == +== SQL(line 2, position 12) == VALUES (boolean('XXX')) ^^^^^^^^^^^^^^ ; line 2 pos 3 diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out index 34ab90a26f1a4..59fd447f6fea3 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out @@ -97,7 +97,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'N A N' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT float('N A N') ^^^^^^^^^^^^^^ @@ -109,7 +109,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'NaN x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT float('NaN x') ^^^^^^^^^^^^^^ @@ -121,7 +121,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value ' INFINITY x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT float(' INFINITY x') ^^^^^^^^^^^^^^^^^^^^^^^ @@ -157,7 +157,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 13) == +== SQL(line 1, position 14) == SELECT float(decimal('nan')) ^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out index 33aec5bfaf100..6c5ae56debfac 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out @@ -129,7 +129,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'N A N' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT double('N A N') ^^^^^^^^^^^^^^^ @@ -141,7 +141,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'NaN x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT double('NaN x') ^^^^^^^^^^^^^^^ @@ -153,7 +153,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value ' INFINITY x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT double(' INFINITY x') ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -189,7 +189,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 14) == +== SQL(line 1, position 15) == SELECT double(decimal('nan')) ^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out index 265aaa7ce2af9..d8351aa6251ac 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out @@ -201,7 +201,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 25) == +== SQL(line 1, position 26) == SELECT '' AS five, i.f1, i.f1 * smallint('2') AS x FROM INT4_TBL i ^^^^^^^^^^^^^^^^^^^^ @@ -224,7 +224,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 25) == +== SQL(line 1, position 26) == SELECT '' AS five, i.f1, i.f1 * int('2') AS x FROM INT4_TBL i ^^^^^^^^^^^^^^^ @@ -247,7 +247,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 25) == +== SQL(line 1, position 26) == SELECT '' AS five, i.f1, i.f1 + smallint('2') AS x FROM INT4_TBL i ^^^^^^^^^^^^^^^^^^^^ @@ -271,7 +271,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 25) == +== SQL(line 1, position 26) == SELECT '' AS five, i.f1, i.f1 + int('2') AS x FROM INT4_TBL i ^^^^^^^^^^^^^^^ @@ -295,7 +295,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 25) == +== SQL(line 1, position 26) == SELECT '' AS five, i.f1, i.f1 - smallint('2') AS x FROM INT4_TBL i ^^^^^^^^^^^^^^^^^^^^ @@ -319,7 +319,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 25) == +== SQL(line 1, position 26) == SELECT '' AS five, i.f1, i.f1 - int('2') AS x FROM INT4_TBL i ^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out index 7761127d7b5ad..10f7606595ec7 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out @@ -393,7 +393,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException long overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 28) == +== SQL(line 1, position 29) == SELECT '' AS three, q1, q2, q1 * q2 AS multiply FROM INT8_TBL ^^^^^^^ @@ -576,7 +576,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select bigint('9223372036854775800') / bigint('0') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -588,7 +588,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select bigint('-9223372036854775808') / smallint('0') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -600,7 +600,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select smallint('100') / bigint('0') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -755,7 +755,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException long overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT bigint((-9223372036854775808)) * bigint((-1)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -783,7 +783,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException long overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT bigint((-9223372036854775808)) * int((-1)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -811,7 +811,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException long overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == SELECT bigint((-9223372036854775808)) * smallint((-1)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out index 7a9f4ae055cc2..e9fd01ef92965 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out @@ -178,7 +178,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 39) == +== SQL(line 1, position 40) == ...1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2 ^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out index a3f149211966a..5828a51c14c2a 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out @@ -66,7 +66,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select string('four: ') || 2+2 ^^^^^^^^^^^^^^^^^^^^^^^ @@ -78,7 +78,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 1, position 7) == +== SQL(line 1, position 8) == select 'four: ' || 2+2 ^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out index 4da230c2e5a55..b1536a2a3e2f7 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out @@ -463,7 +463,7 @@ struct<> -- !query output org.apache.spark.SparkNumberFormatException The value 'NaN' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 3, position 12) == +== SQL(line 3, position 13) == window w as (order by f_numeric range between ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 1.1 preceding and 'NaN' following) diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out index 25125281a74c8..c510d31a06e82 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out @@ -73,7 +73,7 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException failed to evaluate expression CAST('11:00 BST' AS TIMESTAMP): The value '11:00 BST' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 2, position 23) == +== SQL(line 2, position 24) == (1, timestamp '11:00', cast ('11:00 BST' as timestamp), cast ('1 year' as timestamp), ... ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ; line 1 pos 22 diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out index f341f475fcdf0..4662cb6493f83 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out @@ -502,7 +502,7 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException failed to evaluate expression CAST('nan' AS INT): The value 'nan' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -== SQL(line 3, position 28) == +== SQL(line 3, position 29) == FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b) ^^^^^^^^^^^^^^^^^^ ; line 3 pos 6 diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out index f16a680cfbcc0..cde9b1c9df162 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out @@ -178,7 +178,7 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. -== SQL(line 1, position 39) == +== SQL(line 1, position 40) == ...1 AS one FROM test_having WHERE 1/udf(a) = 1 HAVING 1 < 2 ^^^^^^^^ From 4a43b4d7ddea96873095ddedae517268cbbe1663 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Thu, 26 May 2022 10:35:05 -0700 Subject: [PATCH 301/535] [SPARK-36681][CORE][TESTS][FOLLOW-UP] Handle LinkageError when Snappy native library is not available in low Hadoop versions ### What changes were proposed in this pull request? This is a follow-up to https://github.com/apache/spark/pull/36136 to fix `LinkageError` handling in `FileSuite` to avoid test suite abort when Snappy native library is not available in low Hadoop versions: ``` 23:16:22 FileSuite: 23:16:22 org.apache.spark.FileSuite *** ABORTED *** 23:16:22 java.lang.RuntimeException: Unable to load a Suite class that was discovered in the runpath: org.apache.spark.FileSuite 23:16:22 at org.scalatest.tools.DiscoverySuite$.getSuiteInstance(DiscoverySuite.scala:81) 23:16:22 at org.scalatest.tools.DiscoverySuite.$anonfun$nestedSuites$1(DiscoverySuite.scala:38) 23:16:22 at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238) 23:16:22 at scala.collection.Iterator.foreach(Iterator.scala:941) 23:16:22 at scala.collection.Iterator.foreach$(Iterator.scala:941) 23:16:22 at scala.collection.AbstractIterator.foreach(Iterator.scala:1429) 23:16:22 at scala.collection.IterableLike.foreach(IterableLike.scala:74) 23:16:22 at scala.collection.IterableLike.foreach$(IterableLike.scala:73) 23:16:22 at scala.collection.AbstractIterable.foreach(Iterable.scala:56) 23:16:22 at scala.collection.TraversableLike.map(TraversableLike.scala:238) 23:16:22 ... 23:16:22 Cause: java.lang.UnsatisfiedLinkError: org.apache.hadoop.util.NativeCodeLoader.buildSupportsSnappy()Z 23:16:22 at org.apache.hadoop.util.NativeCodeLoader.buildSupportsSnappy(Native Method) 23:16:22 at org.apache.hadoop.io.compress.SnappyCodec.checkNativeCodeLoaded(SnappyCodec.java:63) 23:16:22 at org.apache.hadoop.io.compress.SnappyCodec.getCompressorType(SnappyCodec.java:136) 23:16:22 at org.apache.spark.FileSuite.$anonfun$new$12(FileSuite.scala:145) 23:16:22 at scala.util.Try$.apply(Try.scala:213) 23:16:22 at org.apache.spark.FileSuite.(FileSuite.scala:141) 23:16:22 at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) 23:16:22 at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) 23:16:22 at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) 23:16:22 at java.lang.reflect.Constructor.newInstance(Constructor.java:423) ``` Scala's `Try` can handle only `NonFatal` throwables. ### Why are the changes needed? To make the tests robust. ### Does this PR introduce _any_ user-facing change? Nope, this is test-only. ### How was this patch tested? Manual test. Closes #36687 from peter-toth/SPARK-36681-handle-linkageerror. Authored-by: Peter Toth Signed-off-by: Dongjoon Hyun (cherry picked from commit dbde77856d2e51ff502a7fc1dba7f10316c2211b) Signed-off-by: Dongjoon Hyun --- core/src/test/scala/org/apache/spark/FileSuite.scala | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala index 97795c55c82ff..9c22ee09d0be1 100644 --- a/core/src/test/scala/org/apache/spark/FileSuite.scala +++ b/core/src/test/scala/org/apache/spark/FileSuite.scala @@ -23,6 +23,7 @@ import java.nio.charset.StandardCharsets import java.util.zip.GZIPOutputStream import scala.io.Source +import scala.util.control.NonFatal import com.google.common.io.Files import org.apache.hadoop.conf.Configuration @@ -137,13 +138,16 @@ class FileSuite extends SparkFunSuite with LocalSparkContext { // Hadoop "gzip" and "zstd" codecs require native library installed for sequence files private val codecs = Seq((new DefaultCodec(), "default"), (new BZip2Codec(), "bzip2")) ++ { - scala.util.Try { + try { // See HADOOP-17125. Hadoop lower than 3.3.1 can throw an exception when its native // library for Snappy is unavailable. Here it calls `SnappyCodec.getCompressorType` // to indirectly test if the Snappy native library is available in lower Hadoop versions. new SnappyCodec().getCompressorType - (new SnappyCodec(), "snappy") - }.toOption + Some(new SnappyCodec(), "snappy") + } catch { + case _: LinkageError => None + case NonFatal(_) => None + } } ++ { if (VersionUtils.isHadoop3) Seq((new Lz4Codec(), "lz4")) else Seq.empty } From ad8c8676b1e55ca43bee538d304ae8ae874c2b92 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 27 May 2022 17:58:33 -0700 Subject: [PATCH 302/535] [SPARK-39322][DOCS] Remove `Experimental` from `spark.dynamicAllocation.shuffleTracking.enabled` ### What changes were proposed in this pull request? This PR aims to remove `Experimental` from `spark.dynamicAllocation.shuffleTracking.enabled`. ### Why are the changes needed? `spark.dynamicAllocation.shuffleTracking.enabled` was added at Apache Spark 3.0.0 and has been used with K8s resource manager. ### Does this PR introduce _any_ user-facing change? No, this is a documentation only change. ### How was this patch tested? Manual. Closes #36705 from dongjoon-hyun/SPARK-39322. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit fe85d7912f86c3e337aa93b23bfa7e7e01c0a32e) Signed-off-by: Dongjoon Hyun --- docs/configuration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/configuration.md b/docs/configuration.md index 4fa37792a335f..7952b4b1a2a69 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2726,7 +2726,7 @@ Apart from these, the following properties are also available, and may be useful
        From 109904e8fd1a7543e23c1cdb6c86af490bc75a4e Mon Sep 17 00:00:00 2001 From: William Hyun Date: Sun, 29 May 2022 00:27:34 -0700 Subject: [PATCH 303/535] [SPARK-39327][K8S] ExecutorRollPolicy.ID should consider ID as a numerical value This PR aims to make `ExecutorRollPolicy.ID` should consider ID as a numerical value. Currently, the ExecutorRollPolicy chooses the smallest ID from string sorting. No, 3.3.0 is not released yet. Pass the CIs. Closes #36715 from williamhyun/SPARK-39327. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 97f4b0cc1b20ca641d0e968e0b0fb45557085115) Signed-off-by: Dongjoon Hyun --- .../scheduler/cluster/k8s/ExecutorRollPlugin.scala | 3 ++- .../cluster/k8s/ExecutorRollPluginSuite.scala | 12 +++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala index 5da4510d2cc86..ac048d68adfde 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala @@ -106,7 +106,8 @@ class ExecutorRollDriverPlugin extends DriverPlugin with Logging { .filter(_.totalTasks >= minTasks) val sortedList = policy match { case ExecutorRollPolicy.ID => - listWithoutDriver.sortBy(_.id) + // We can convert to integer because EXECUTOR_ID_COUNTER uses AtomicInteger. + listWithoutDriver.sortBy(_.id.toInt) case ExecutorRollPolicy.ADD_TIME => listWithoutDriver.sortBy(_.addTime) case ExecutorRollPolicy.TOTAL_GC_TIME => diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPluginSuite.scala index 886abc033893d..495b2e3ac1b02 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPluginSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPluginSuite.scala @@ -110,9 +110,17 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester { Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1, false, Set()) + val execWithTwoDigitID = new ExecutorSummary("10", "host:port", true, 1, + 10, 10, 1, 1, 1, + 4, 0, 2, 280, + 30, 100, 100, + 10, false, 20, new Date(1639300001000L), + Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1, + false, Set()) + val list = Seq(driverSummary, execWithSmallestID, execWithSmallestAddTime, execWithBiggestTotalGCTime, execWithBiggestTotalDuration, execWithBiggestFailedTasks, - execWithBiggestAverageDuration, execWithoutTasks, execNormal) + execWithBiggestAverageDuration, execWithoutTasks, execNormal, execWithTwoDigitID) override def beforeEach(): Unit = { super.beforeEach() @@ -148,6 +156,8 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester { test("Policy: ID") { assertEquals(Some("1"), plugin.invokePrivate(_choose(list, ExecutorRollPolicy.ID))) + assertEquals(Some("2"), plugin.invokePrivate(_choose(list.filter(_.id != "1"), + ExecutorRollPolicy.ID))) } test("Policy: ADD_TIME") { From bb032925f1a20efe1e6a3ad5dd8e94022ee8dd61 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 30 May 2022 01:39:22 -0700 Subject: [PATCH 304/535] [SPARK-39334][BUILD] Exclude `slf4j-reload4j` from `hadoop-minikdc` test dependency ### What changes were proposed in this pull request? [HADOOP-18088 Replace log4j 1.x with reload4j](https://issues.apache.org/jira/browse/HADOOP-18088) , this pr adds the exclusion of `slf4j-reload4j` for `hadoop-minikdc` to clean up waring message about `Class path contains multiple SLF4J bindings` when run UTs. ### Why are the changes needed? Cleanup `Class path contains multiple SLF4J bindings` waring when run UTs. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Pass GA - Manual test for example, run `mvn clean test -pl core` **Before** ``` [INFO] Running test.org.apache.spark.Java8RDDAPISuite SLF4J: Class path contains multiple SLF4J bindings. SLF4J: Found binding in [jar:file:/Users/xxx/.m2/repository/org/apache/logging/log4j/log4j-slf4j-impl/2.17.2/log4j-slf4j-impl-2.17.2.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: Found binding in [jar:file:/Users/xxx/.m2/repository/org/slf4j/slf4j-reload4j/1.7.36/slf4j-reload4j-1.7.36.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory] ``` **After** no above warnings Closes #36721 from LuciferYang/SPARK-39334. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun (cherry picked from commit 93b8cc05d582fe4be1a3cd9452708f18e728f0bb) Signed-off-by: Dongjoon Hyun --- pom.xml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pom.xml b/pom.xml index 879d565bf9804..32bd40ee83461 100644 --- a/pom.xml +++ b/pom.xml @@ -1371,6 +1371,10 @@ org.slf4j slf4j-log4j12 + + org.slf4j + slf4j-reload4j + From 16788ee8905b94e51e00ccd8a48894f8824077b0 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 30 May 2022 03:54:37 -0700 Subject: [PATCH 305/535] [SPARK-39322][CORE][FOLLOWUP] Revise log messages for dynamic allocation and shuffle decommission ### What changes were proposed in this pull request? This PR is a follow-up for #36705 to revise the missed log message change. ### Why are the changes needed? Like the documentation, this PR updates the log message correspondingly. - Lower log level to `INFO` from `WARN` - Provide a specific message according to the configurations. ### Does this PR introduce _any_ user-facing change? No. This is a log-message-only change. ### How was this patch tested? Pass the CIs. Closes #36725 from dongjoon-hyun/SPARK-39322-2. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit be2c8c0115861e6975b658a7b0455bae828b7553) Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/ExecutorAllocationManager.scala | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala index c4b619300b583..b9bc3c63ff4ae 100644 --- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala +++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala @@ -204,13 +204,11 @@ private[spark] class ExecutorAllocationManager( s"s${DYN_ALLOCATION_SUSTAINED_SCHEDULER_BACKLOG_TIMEOUT.key} must be > 0!") } if (!conf.get(config.SHUFFLE_SERVICE_ENABLED)) { - // If dynamic allocation shuffle tracking or worker decommissioning along with - // storage shuffle decommissioning is enabled we have *experimental* support for - // decommissioning without a shuffle service. - if (conf.get(config.DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED) || - (decommissionEnabled && - conf.get(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED))) { - logWarning("Dynamic allocation without a shuffle service is an experimental feature.") + if (conf.get(config.DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED)) { + logInfo("Dynamic allocation is enabled without a shuffle service.") + } else if (decommissionEnabled && + conf.get(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED)) { + logInfo("Shuffle data decommission is enabled without a shuffle service.") } else if (!testing) { throw new SparkException("Dynamic allocation of executors requires the external " + "shuffle service. You may enable this through spark.shuffle.service.enabled.") From b8904c39c0426ac55ceff8b2716c24e3ef7cdfbb Mon Sep 17 00:00:00 2001 From: William Hyun Date: Mon, 30 May 2022 13:07:12 -0700 Subject: [PATCH 306/535] [SPARK-39341][K8S] KubernetesExecutorBackend should allow IPv6 pod IP ### What changes were proposed in this pull request? This PR aims to make KubernetesExecutorBackend allow IPv6 pod IP. ### Why are the changes needed? The `hostname` comes from `SPARK_EXECUTOR_POD_IP`. ``` resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh: --hostname $SPARK_EXECUTOR_POD_IP ``` `SPARK_EXECUTOR_POD_IP` comes from `status.podIP` where it does not have `[]` in case of IPv6. https://github.com/apache/spark/blob/1a54a2bd69e35ab5f0cbd83df673c6f1452df418/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala#L140-L145 - https://kubernetes.io/docs/concepts/services-networking/dual-stack/ - https://en.wikipedia.org/wiki/IPv6_address ### Does this PR introduce _any_ user-facing change? No, this PR removes only the `[]` constraint from `checkHost`. ### How was this patch tested? Pass the CIs. Closes #36728 from williamhyun/IPv6. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 7bb009888eec416eef36587546d4c0ab0077bcf5) Signed-off-by: Dongjoon Hyun --- .../scheduler/cluster/k8s/KubernetesExecutorBackend.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBackend.scala index dd06688da349b..fbf485cfa2f29 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBackend.scala @@ -66,7 +66,8 @@ private[spark] object KubernetesExecutorBackend extends Logging { SparkHadoopUtil.get.runAsSparkUser { () => // Debug code - Utils.checkHost(arguments.hostname) + assert(arguments.hostname != null && + (arguments.hostname.indexOf(':') == -1 || arguments.hostname.split(":").length > 2)) // Bootstrap to fetch the driver's Spark properties. val executorConf = new SparkConf From 1ad1c18fc283acea6d18bc4c8753d3b6e50408ed Mon Sep 17 00:00:00 2001 From: sandeepvinayak Date: Tue, 31 May 2022 15:28:07 -0700 Subject: [PATCH 307/535] [SPARK-39283][CORE] Fix deadlock between TaskMemoryManager and UnsafeExternalSorter.SpillableIterator ### What changes were proposed in this pull request? This PR fixes a deadlock between TaskMemoryManager and UnsafeExternalSorter.SpillableIterator. ### Why are the changes needed? We are facing the deadlock issue b/w TaskMemoryManager and UnsafeExternalSorter.SpillableIterator during the join. It turns out that in UnsafeExternalSorter.SpillableIterator#spill() function, it tries to get lock on UnsafeExternalSorter`SpillableIterator` and UnsafeExternalSorter and call `freePage` to free all allocated pages except the last one which takes the lock on TaskMemoryManager. At the same time, there can be another `MemoryConsumer` using `UnsafeExternalSorter` as part of sorting can try to allocatePage needs to get lock on `TaskMemoryManager` which can cause spill to happen which requires lock on `UnsafeExternalSorter` again causing deadlock. There is a similar fix here as well: https://issues.apache.org/jira/browse/SPARK-27338 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing unit tests. Closes #36680 from sandeepvinayak/SPARK-39283. Authored-by: sandeepvinayak Signed-off-by: Josh Rosen (cherry picked from commit 8d0c035f102b005c2e85f03253f1c0c24f0a539f) Signed-off-by: Josh Rosen --- .../unsafe/sort/UnsafeExternalSorter.java | 160 ++++++++++++------ 1 file changed, 104 insertions(+), 56 deletions(-) diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java index c38327cae8ce3..ac8170c9d97a0 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java @@ -21,6 +21,7 @@ import java.io.File; import java.io.IOException; import java.util.LinkedList; +import java.util.List; import java.util.Queue; import java.util.function.Supplier; @@ -298,16 +299,30 @@ public int getNumberOfAllocatedPages() { * @return the number of bytes freed. */ private long freeMemory() { - updatePeakMemoryUsed(); + List pagesToFree = clearAndGetAllocatedPagesToFree(); long memoryFreed = 0; - for (MemoryBlock block : allocatedPages) { + for (MemoryBlock block : pagesToFree) { memoryFreed += block.size(); freePage(block); } + return memoryFreed; + } + + /** + * Clear the allocated pages and return the list of allocated pages to let + * the caller free the page. This is to prevent the deadlock by nested locks + * if the caller locks the UnsafeExternalSorter and call freePage which locks the + * TaskMemoryManager and cause nested locks. + * + * @return list of allocated pages to free + */ + private List clearAndGetAllocatedPagesToFree() { + updatePeakMemoryUsed(); + List pagesToFree = new LinkedList<>(allocatedPages); allocatedPages.clear(); currentPage = null; pageCursor = 0; - return memoryFreed; + return pagesToFree; } /** @@ -328,12 +343,27 @@ private void deleteSpillFiles() { * Frees this sorter's in-memory data structures and cleans up its spill files. */ public void cleanupResources() { - synchronized (this) { - deleteSpillFiles(); - freeMemory(); - if (inMemSorter != null) { - inMemSorter.freeMemory(); - inMemSorter = null; + // To avoid deadlocks, we can't call methods that lock the TaskMemoryManager + // (such as various free() methods) while synchronizing on the UnsafeExternalSorter. + // Instead, we will manipulate UnsafeExternalSorter state inside the synchronized + // lock and perform the actual free() calls outside it. + UnsafeInMemorySorter inMemSorterToFree = null; + List pagesToFree = null; + try { + synchronized (this) { + deleteSpillFiles(); + pagesToFree = clearAndGetAllocatedPagesToFree(); + if (inMemSorter != null) { + inMemSorterToFree = inMemSorter; + inMemSorter = null; + } + } + } finally { + for (MemoryBlock pageToFree : pagesToFree) { + freePage(pageToFree); + } + if (inMemSorterToFree != null) { + inMemSorterToFree.freeMemory(); } } } @@ -576,58 +606,76 @@ public long getCurrentPageNumber() { } public long spill() throws IOException { - synchronized (this) { - if (inMemSorter == null) { - return 0L; - } - - long currentPageNumber = upstream.getCurrentPageNumber(); + UnsafeInMemorySorter inMemSorterToFree = null; + List pagesToFree = new LinkedList<>(); + try { + synchronized (this) { + if (inMemSorter == null) { + return 0L; + } - ShuffleWriteMetrics writeMetrics = new ShuffleWriteMetrics(); - if (numRecords > 0) { - // Iterate over the records that have not been returned and spill them. - final UnsafeSorterSpillWriter spillWriter = new UnsafeSorterSpillWriter( - blockManager, fileBufferSizeBytes, writeMetrics, numRecords); - spillIterator(upstream, spillWriter); - spillWriters.add(spillWriter); - upstream = spillWriter.getReader(serializerManager); - } else { - // Nothing to spill as all records have been read already, but do not return yet, as the - // memory still has to be freed. - upstream = null; - } + long currentPageNumber = upstream.getCurrentPageNumber(); + + ShuffleWriteMetrics writeMetrics = new ShuffleWriteMetrics(); + if (numRecords > 0) { + // Iterate over the records that have not been returned and spill them. + final UnsafeSorterSpillWriter spillWriter = new UnsafeSorterSpillWriter( + blockManager, fileBufferSizeBytes, writeMetrics, numRecords); + spillIterator(upstream, spillWriter); + spillWriters.add(spillWriter); + upstream = spillWriter.getReader(serializerManager); + } else { + // Nothing to spill as all records have been read already, but do not return yet, as the + // memory still has to be freed. + upstream = null; + } - long released = 0L; - synchronized (UnsafeExternalSorter.this) { - // release the pages except the one that is used. There can still be a caller that - // is accessing the current record. We free this page in that caller's next loadNext() - // call. - for (MemoryBlock page : allocatedPages) { - if (!loaded || page.pageNumber != currentPageNumber) { - released += page.size(); - freePage(page); - } else { - lastPage = page; + long released = 0L; + synchronized (UnsafeExternalSorter.this) { + // release the pages except the one that is used. There can still be a caller that + // is accessing the current record. We free this page in that caller's next loadNext() + // call. + for (MemoryBlock page : allocatedPages) { + if (!loaded || page.pageNumber != currentPageNumber) { + released += page.size(); + // Do not free the page, while we are locking `SpillableIterator`. The `freePage` + // method locks the `TaskMemoryManager`, and it's not a good idea to lock 2 objects + // in sequence. We may hit dead lock if another thread locks `TaskMemoryManager` + // and `SpillableIterator` in sequence, which may happen in + // `TaskMemoryManager.acquireExecutionMemory`. + pagesToFree.add(page); + } else { + lastPage = page; + } + } + allocatedPages.clear(); + if (lastPage != null) { + // Add the last page back to the list of allocated pages to make sure it gets freed in + // case loadNext() never gets called again. + allocatedPages.add(lastPage); } } - allocatedPages.clear(); - if (lastPage != null) { - // Add the last page back to the list of allocated pages to make sure it gets freed in - // case loadNext() never gets called again. - allocatedPages.add(lastPage); - } - } - // in-memory sorter will not be used after spilling - assert(inMemSorter != null); - released += inMemSorter.getMemoryUsage(); - totalSortTimeNanos += inMemSorter.getSortTimeNanos(); - inMemSorter.freeMemory(); - inMemSorter = null; - taskContext.taskMetrics().incMemoryBytesSpilled(released); - taskContext.taskMetrics().incDiskBytesSpilled(writeMetrics.bytesWritten()); - totalSpillBytes += released; - return released; + // in-memory sorter will not be used after spilling + assert (inMemSorter != null); + released += inMemSorter.getMemoryUsage(); + totalSortTimeNanos += inMemSorter.getSortTimeNanos(); + // Do not free the sorter while we are locking `SpillableIterator`, + // as this can cause a deadlock. + inMemSorterToFree = inMemSorter; + inMemSorter = null; + taskContext.taskMetrics().incMemoryBytesSpilled(released); + taskContext.taskMetrics().incDiskBytesSpilled(writeMetrics.bytesWritten()); + totalSpillBytes += released; + return released; + } + } finally { + for (MemoryBlock pageToFree : pagesToFree) { + freePage(pageToFree); + } + if (inMemSorterToFree != null) { + inMemSorterToFree.freeMemory(); + } } } From 000270a4ead61bb9d7333d05c55b02a2ec477a04 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Wed, 1 Jun 2022 09:49:45 -0700 Subject: [PATCH 308/535] [SPARK-39313][SQL] `toCatalystOrdering` should fail if V2Expression can not be translated After reading code changes in #35657, I guess the original intention of changing the return type of `V2ExpressionUtils.toCatalyst` from `Expression` to `Option[Expression]` is, for reading, spark can ignore unrecognized distribution and ordering, but for writing, it should always be strict. Specifically, `V2ExpressionUtils.toCatalystOrdering` should fail if V2Expression can not be translated instead of returning empty Seq. `V2ExpressionUtils.toCatalystOrdering` is used by `DistributionAndOrderingUtils`, the current behavior will break the semantics of `RequiresDistributionAndOrdering#requiredOrdering` in some cases(see UT). No. New UT. Closes #36697 from pan3793/SPARK-39313. Authored-by: Cheng Pan Signed-off-by: Chao Sun --- .../expressions/V2ExpressionUtils.scala | 23 +++-- .../expressions/V2ExpressionUtilsSuite.scala | 40 ++++++++ .../sql/connector/catalog/InMemoryTable.scala | 11 ++- .../v2/DistributionAndOrderingUtils.scala | 5 +- .../datasources/v2/V2ScanPartitioning.scala | 4 +- .../KeyGroupedPartitioningSuite.scala | 92 +++++-------------- 6 files changed, 85 insertions(+), 90 deletions(-) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtilsSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala index 596d5d8b565df..c252ea5ccfe03 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala @@ -27,7 +27,6 @@ import org.apache.spark.sql.connector.catalog.functions._ import org.apache.spark.sql.connector.expressions.{BucketTransform, Expression => V2Expression, FieldReference, IdentityTransform, NamedReference, NamedTransform, NullOrdering => V2NullOrdering, SortDirection => V2SortDirection, SortOrder => V2SortOrder, SortValue, Transform} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.types._ -import org.apache.spark.util.collection.Utils.sequenceToOption /** * A utility class that converts public connector expressions into Catalyst expressions. @@ -54,19 +53,25 @@ object V2ExpressionUtils extends SQLConfHelper with Logging { * Converts the array of input V2 [[V2SortOrder]] into their counterparts in catalyst. */ def toCatalystOrdering(ordering: Array[V2SortOrder], query: LogicalPlan): Seq[SortOrder] = { - sequenceToOption(ordering.map(toCatalyst(_, query))).asInstanceOf[Option[Seq[SortOrder]]] - .getOrElse(Seq.empty) + ordering.map(toCatalyst(_, query).asInstanceOf[SortOrder]) } def toCatalyst( + expr: V2Expression, + query: LogicalPlan, + funCatalogOpt: Option[FunctionCatalog] = None): Expression = + toCatalystOpt(expr, query, funCatalogOpt) + .getOrElse(throw new AnalysisException(s"$expr is not currently supported")) + + def toCatalystOpt( expr: V2Expression, query: LogicalPlan, funCatalogOpt: Option[FunctionCatalog] = None): Option[Expression] = { expr match { case t: Transform => - toCatalystTransform(t, query, funCatalogOpt) + toCatalystTransformOpt(t, query, funCatalogOpt) case SortValue(child, direction, nullOrdering) => - toCatalyst(child, query, funCatalogOpt).map { catalystChild => + toCatalystOpt(child, query, funCatalogOpt).map { catalystChild => SortOrder(catalystChild, toCatalyst(direction), toCatalyst(nullOrdering), Seq.empty) } case ref: FieldReference => @@ -76,7 +81,7 @@ object V2ExpressionUtils extends SQLConfHelper with Logging { } } - def toCatalystTransform( + def toCatalystTransformOpt( trans: Transform, query: LogicalPlan, funCatalogOpt: Option[FunctionCatalog] = None): Option[Expression] = trans match { @@ -89,7 +94,7 @@ object V2ExpressionUtils extends SQLConfHelper with Logging { // look up the V2 function. val numBucketsRef = AttributeReference("numBuckets", IntegerType, nullable = false)() funCatalogOpt.flatMap { catalog => - loadV2Function(catalog, "bucket", Seq(numBucketsRef) ++ resolvedRefs).map { bound => + loadV2FunctionOpt(catalog, "bucket", Seq(numBucketsRef) ++ resolvedRefs).map { bound => TransformExpression(bound, resolvedRefs, Some(numBuckets)) } } @@ -99,7 +104,7 @@ object V2ExpressionUtils extends SQLConfHelper with Logging { resolveRef[NamedExpression](r, query) } funCatalogOpt.flatMap { catalog => - loadV2Function(catalog, name, resolvedRefs).map { bound => + loadV2FunctionOpt(catalog, name, resolvedRefs).map { bound => TransformExpression(bound, resolvedRefs) } } @@ -107,7 +112,7 @@ object V2ExpressionUtils extends SQLConfHelper with Logging { throw new AnalysisException(s"Transform $trans is not currently supported") } - private def loadV2Function( + private def loadV2FunctionOpt( catalog: FunctionCatalog, name: String, args: Seq[Expression]): Option[BoundFunction] = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtilsSuite.scala new file mode 100644 index 0000000000000..d1c23d68555af --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtilsSuite.scala @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.plans.logical.LocalRelation +import org.apache.spark.sql.connector.expressions._ +import org.apache.spark.sql.types.StringType + +class V2ExpressionUtilsSuite extends SparkFunSuite { + + test("SPARK-39313: toCatalystOrdering should fail if V2Expression can not be translated") { + val supportedV2Sort = SortValue( + FieldReference("a"), SortDirection.ASCENDING, NullOrdering.NULLS_FIRST) + val unsupportedV2Sort = supportedV2Sort.copy( + expression = ApplyTransform("v2Fun", FieldReference("a") :: Nil)) + val exc = intercept[AnalysisException] { + V2ExpressionUtils.toCatalystOrdering( + Array(supportedV2Sort, unsupportedV2Sort), + LocalRelation.apply(AttributeReference("a", StringType)())) + } + assert(exc.message.contains("v2Fun(a) ASC NULLS FIRST is not currently supported")) + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala index beed9111a308a..7cc97bdf29735 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala @@ -29,7 +29,7 @@ import org.scalatest.Assertions._ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, JoinedRow} import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils} -import org.apache.spark.sql.connector.distributions.{ClusteredDistribution, Distribution, Distributions} +import org.apache.spark.sql.connector.distributions.{Distribution, Distributions} import org.apache.spark.sql.connector.expressions._ import org.apache.spark.sql.connector.metric.{CustomMetric, CustomTaskMetric} import org.apache.spark.sql.connector.read._ @@ -291,9 +291,12 @@ class InMemoryTable( } override def outputPartitioning(): Partitioning = { - InMemoryTable.this.distribution match { - case cd: ClusteredDistribution => new KeyGroupedPartitioning(cd.clustering(), data.size) - case _ => new UnknownPartitioning(data.size) + if (InMemoryTable.this.partitioning.nonEmpty) { + new KeyGroupedPartitioning( + InMemoryTable.this.partitioning.map(_.asInstanceOf[Expression]), + data.size) + } else { + new UnknownPartitioning(data.size) } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala index 275255c9a3d39..0c0b5db14ace3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala @@ -24,7 +24,6 @@ import org.apache.spark.sql.connector.distributions._ import org.apache.spark.sql.connector.write.{RequiresDistributionAndOrdering, Write} import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.util.collection.Utils.sequenceToOption object DistributionAndOrderingUtils { @@ -34,9 +33,7 @@ object DistributionAndOrderingUtils { val distribution = write.requiredDistribution match { case d: OrderedDistribution => toCatalystOrdering(d.ordering(), query) - case d: ClusteredDistribution => - sequenceToOption(d.clustering.map(e => toCatalyst(e, query))) - .getOrElse(Seq.empty[Expression]) + case d: ClusteredDistribution => d.clustering.map(e => toCatalyst(e, query)).toSeq case _: UnspecifiedDistribution => Seq.empty[Expression] } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioning.scala index 8d2b3a8880cd3..9a5a7e6aab63a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioning.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioning.scala @@ -32,7 +32,7 @@ import org.apache.spark.util.collection.Utils.sequenceToOption */ object V2ScanPartitioning extends Rule[LogicalPlan] with SQLConfHelper { override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { - case d @ DataSourceV2ScanRelation(relation, scan: SupportsReportPartitioning, _, _) => + case d @ DataSourceV2ScanRelation(relation, scan: SupportsReportPartitioning, _, None) => val funCatalogOpt = relation.catalog.flatMap { case c: FunctionCatalog => Some(c) case _ => None @@ -40,7 +40,7 @@ object V2ScanPartitioning extends Rule[LogicalPlan] with SQLConfHelper { val catalystPartitioning = scan.outputPartitioning() match { case kgp: KeyGroupedPartitioning => sequenceToOption(kgp.keys().map( - V2ExpressionUtils.toCatalyst(_, relation, funCatalogOpt))) + V2ExpressionUtils.toCatalystOpt(_, relation, funCatalogOpt))) case _: UnknownPartitioning => None case p => throw new IllegalArgumentException("Unsupported data source V2 partitioning " + "type: " + p.getClass.getSimpleName) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala index 834faedd1ceef..bdbf309214fdf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala @@ -20,12 +20,11 @@ import java.util.Collections import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.expressions.{Ascending, SortOrder => catalystSortOrder, TransformExpression} +import org.apache.spark.sql.catalyst.expressions.TransformExpression import org.apache.spark.sql.catalyst.plans.physical import org.apache.spark.sql.connector.catalog.Identifier import org.apache.spark.sql.connector.catalog.InMemoryTableCatalog import org.apache.spark.sql.connector.catalog.functions._ -import org.apache.spark.sql.connector.distributions.Distribution import org.apache.spark.sql.connector.distributions.Distributions import org.apache.spark.sql.connector.expressions._ import org.apache.spark.sql.connector.expressions.Expressions._ @@ -83,8 +82,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { val partitions: Array[Transform] = Array(Expressions.years("ts")) // create a table with 3 partitions, partitioned by `years` transform - createTable(table, schema, partitions, - Distributions.clustered(partitions.map(_.asInstanceOf[Expression]))) + createTable(table, schema, partitions) sql(s"INSERT INTO testcat.ns.$table VALUES " + s"(0, 'aaa', CAST('2022-01-01' AS timestamp)), " + s"(1, 'bbb', CAST('2021-01-01' AS timestamp)), " + @@ -104,28 +102,9 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { physical.KeyGroupedPartitioning(catalystDistribution.clustering, partitionValues)) } - test("non-clustered distribution: fallback to super.partitioning") { - val partitions: Array[Transform] = Array(years("ts")) - val ordering: Array[SortOrder] = Array(sort(FieldReference("ts"), - SortDirection.ASCENDING, NullOrdering.NULLS_FIRST)) - - createTable(table, schema, partitions, Distributions.ordered(ordering), ordering) - sql(s"INSERT INTO testcat.ns.$table VALUES " + - s"(0, 'aaa', CAST('2022-01-01' AS timestamp)), " + - s"(1, 'bbb', CAST('2021-01-01' AS timestamp)), " + - s"(2, 'ccc', CAST('2020-01-01' AS timestamp))") - - val df = sql(s"SELECT * FROM testcat.ns.$table") - val catalystOrdering = Seq(catalystSortOrder(attr("ts"), Ascending)) - val catalystDistribution = physical.OrderedDistribution(catalystOrdering) - - checkQueryPlan(df, catalystDistribution, physical.UnknownPartitioning(0)) - } - test("non-clustered distribution: no partition") { val partitions: Array[Transform] = Array(bucket(32, "ts")) - createTable(table, schema, partitions, - Distributions.clustered(partitions.map(_.asInstanceOf[Expression]))) + createTable(table, schema, partitions) val df = sql(s"SELECT * FROM testcat.ns.$table") val distribution = physical.ClusteredDistribution( @@ -136,8 +115,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { test("non-clustered distribution: single partition") { val partitions: Array[Transform] = Array(bucket(32, "ts")) - createTable(table, schema, partitions, - Distributions.clustered(partitions.map(_.asInstanceOf[Expression]))) + createTable(table, schema, partitions) sql(s"INSERT INTO testcat.ns.$table VALUES (0, 'aaa', CAST('2020-01-01' AS timestamp))") val df = sql(s"SELECT * FROM testcat.ns.$table") @@ -152,9 +130,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { val nonFunctionCatalog = spark.sessionState.catalogManager.catalog("testcat2") .asInstanceOf[InMemoryTableCatalog] val partitions: Array[Transform] = Array(bucket(32, "ts")) - createTable(table, schema, partitions, - Distributions.clustered(partitions.map(_.asInstanceOf[Expression])), - catalog = nonFunctionCatalog) + createTable(table, schema, partitions, catalog = nonFunctionCatalog) sql(s"INSERT INTO testcat2.ns.$table VALUES " + s"(0, 'aaa', CAST('2022-01-01' AS timestamp)), " + s"(1, 'bbb', CAST('2021-01-01' AS timestamp)), " + @@ -174,8 +150,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { catalog.clearFunctions() val partitions: Array[Transform] = Array(bucket(32, "ts")) - createTable(table, schema, partitions, - Distributions.clustered(partitions.map(_.asInstanceOf[Expression]))) + createTable(table, schema, partitions) sql(s"INSERT INTO testcat.ns.$table VALUES " + s"(0, 'aaa', CAST('2022-01-01' AS timestamp)), " + s"(1, 'bbb', CAST('2021-01-01' AS timestamp)), " + @@ -190,8 +165,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { test("non-clustered distribution: V2 bucketing disabled") { withSQLConf(SQLConf.V2_BUCKETING_ENABLED.key -> "false") { val partitions: Array[Transform] = Array(bucket(32, "ts")) - createTable(table, schema, partitions, - Distributions.clustered(partitions.map(_.asInstanceOf[Expression]))) + createTable(table, schema, partitions) sql(s"INSERT INTO testcat.ns.$table VALUES " + s"(0, 'aaa', CAST('2022-01-01' AS timestamp)), " + s"(1, 'bbb', CAST('2021-01-01' AS timestamp)), " + @@ -239,11 +213,9 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { table: String, schema: StructType, partitions: Array[Transform], - distribution: Distribution = Distributions.unspecified(), - ordering: Array[expressions.SortOrder] = Array.empty, catalog: InMemoryTableCatalog = catalog): Unit = { catalog.createTable(Identifier.of(Array("ns"), table), - schema, partitions, emptyProps, distribution, ordering, None) + schema, partitions, emptyProps, Distributions.unspecified(), Array.empty, None) } private val customers: String = "customers" @@ -259,15 +231,13 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { private def testWithCustomersAndOrders( customers_partitions: Array[Transform], - customers_distribution: Distribution, orders_partitions: Array[Transform], - orders_distribution: Distribution, expectedNumOfShuffleExecs: Int): Unit = { - createTable(customers, customers_schema, customers_partitions, customers_distribution) + createTable(customers, customers_schema, customers_partitions) sql(s"INSERT INTO testcat.ns.$customers VALUES " + s"('aaa', 10, 1), ('bbb', 20, 2), ('ccc', 30, 3)") - createTable(orders, orders_schema, orders_partitions, orders_distribution) + createTable(orders, orders_schema, orders_partitions) sql(s"INSERT INTO testcat.ns.$orders VALUES " + s"(100.0, 1), (200.0, 1), (150.0, 2), (250.0, 2), (350.0, 2), (400.50, 3)") @@ -297,11 +267,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { val customers_partitions = Array(bucket(4, "customer_id")) val orders_partitions = Array(bucket(4, "customer_id")) - testWithCustomersAndOrders(customers_partitions, - Distributions.clustered(customers_partitions.toArray), - orders_partitions, - Distributions.clustered(orders_partitions.toArray), - 0) + testWithCustomersAndOrders(customers_partitions, orders_partitions, 0) } test("partitioned join: number of buckets mismatch should trigger shuffle") { @@ -309,22 +275,14 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { val orders_partitions = Array(bucket(2, "customer_id")) // should shuffle both sides when number of buckets are not the same - testWithCustomersAndOrders(customers_partitions, - Distributions.clustered(customers_partitions.toArray), - orders_partitions, - Distributions.clustered(orders_partitions.toArray), - 2) + testWithCustomersAndOrders(customers_partitions, orders_partitions, 2) } test("partitioned join: only one side reports partitioning") { val customers_partitions = Array(bucket(4, "customer_id")) val orders_partitions = Array(bucket(2, "customer_id")) - testWithCustomersAndOrders(customers_partitions, - Distributions.clustered(customers_partitions.toArray), - orders_partitions, - Distributions.unspecified(), - 2) + testWithCustomersAndOrders(customers_partitions, orders_partitions, 2) } private val items: String = "items" @@ -342,8 +300,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { test("partitioned join: join with two partition keys and matching & sorted partitions") { val items_partitions = Array(bucket(8, "id"), days("arrive_time")) - createTable(items, items_schema, items_partitions, - Distributions.clustered(items_partitions.toArray)) + createTable(items, items_schema, items_partitions) sql(s"INSERT INTO testcat.ns.$items VALUES " + s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " + s"(1, 'aa', 41.0, cast('2020-01-15' as timestamp)), " + @@ -352,8 +309,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp))") val purchases_partitions = Array(bucket(8, "item_id"), days("time")) - createTable(purchases, purchases_schema, purchases_partitions, - Distributions.clustered(purchases_partitions.toArray)) + createTable(purchases, purchases_schema, purchases_partitions) sql(s"INSERT INTO testcat.ns.$purchases VALUES " + s"(1, 42.0, cast('2020-01-01' as timestamp)), " + s"(1, 44.0, cast('2020-01-15' as timestamp)), " + @@ -375,8 +331,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { test("partitioned join: join with two partition keys and unsorted partitions") { val items_partitions = Array(bucket(8, "id"), days("arrive_time")) - createTable(items, items_schema, items_partitions, - Distributions.clustered(items_partitions.toArray)) + createTable(items, items_schema, items_partitions) sql(s"INSERT INTO testcat.ns.$items VALUES " + s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp)), " + s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " + @@ -385,8 +340,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { s"(2, 'bb', 10.5, cast('2020-01-01' as timestamp))") val purchases_partitions = Array(bucket(8, "item_id"), days("time")) - createTable(purchases, purchases_schema, purchases_partitions, - Distributions.clustered(purchases_partitions.toArray)) + createTable(purchases, purchases_schema, purchases_partitions) sql(s"INSERT INTO testcat.ns.$purchases VALUES " + s"(2, 11.0, cast('2020-01-01' as timestamp)), " + s"(1, 42.0, cast('2020-01-01' as timestamp)), " + @@ -408,8 +362,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { test("partitioned join: join with two partition keys and different # of partition keys") { val items_partitions = Array(bucket(8, "id"), days("arrive_time")) - createTable(items, items_schema, items_partitions, - Distributions.clustered(items_partitions.toArray)) + createTable(items, items_schema, items_partitions) sql(s"INSERT INTO testcat.ns.$items VALUES " + s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " + @@ -417,8 +370,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp))") val purchases_partitions = Array(bucket(8, "item_id"), days("time")) - createTable(purchases, purchases_schema, purchases_partitions, - Distributions.clustered(purchases_partitions.toArray)) + createTable(purchases, purchases_schema, purchases_partitions) sql(s"INSERT INTO testcat.ns.$purchases VALUES " + s"(1, 42.0, cast('2020-01-01' as timestamp)), " + s"(2, 11.0, cast('2020-01-01' as timestamp))") @@ -439,8 +391,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false", SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "10") { val items_partitions = Array(identity("id")) - createTable(items, items_schema, items_partitions, - Distributions.clustered(items_partitions.toArray)) + createTable(items, items_schema, items_partitions) sql(s"INSERT INTO testcat.ns.$items VALUES " + s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " + s"(1, 'aa', 41.0, cast('2020-01-15' as timestamp)), " + @@ -449,8 +400,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase { s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp))") val purchases_partitions = Array(identity("item_id")) - createTable(purchases, purchases_schema, purchases_partitions, - Distributions.clustered(purchases_partitions.toArray)) + createTable(purchases, purchases_schema, purchases_partitions) sql(s"INSERT INTO testcat.ns.$purchases VALUES " + s"(1, 42.0, cast('2020-01-01' as timestamp)), " + s"(1, 44.0, cast('2020-01-15' as timestamp)), " + From 37aa0793ae1b4018eb331c1ccd4de9bd5aef9905 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Thu, 2 Jun 2022 10:31:53 +0900 Subject: [PATCH 309/535] [SPARK-39040][SQL][FOLLOWUP] Use a unique table name in conditional-functions.sql ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/36376, to use a unique table name in the test. `t` is a quite common table name and may make test environment unstable. ### Why are the changes needed? make tests more stable ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? N/A Closes #36739 from cloud-fan/test. Authored-by: Wenchen Fan Signed-off-by: Hyukjin Kwon (cherry picked from commit 4f672db5719549c522a24cffe7b4d0c1e0cb859b) Signed-off-by: Hyukjin Kwon --- .../inputs/ansi/conditional-functions.sql | 16 ++++++++-------- .../results/ansi/conditional-functions.sql.out | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/conditional-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/conditional-functions.sql index ba8f0ffe7f1b8..e7835619f583a 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/ansi/conditional-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/conditional-functions.sql @@ -1,21 +1,21 @@ -- Tests for conditional functions -CREATE TABLE t USING PARQUET AS SELECT c1, c2 FROM VALUES(1d, 0),(2d, 1),(null, 1),(CAST('NaN' AS DOUBLE), 0) AS t(c1, c2); +CREATE TABLE conditional_t USING PARQUET AS SELECT c1, c2 FROM VALUES(1d, 0),(2d, 1),(null, 1),(CAST('NaN' AS DOUBLE), 0) AS t(c1, c2); -SELECT nanvl(c2, c1/c2 + c1/c2) FROM t; -SELECT nanvl(c2, 1/0) FROM t; -SELECT nanvl(1-0, 1/0) FROM t; +SELECT nanvl(c2, c1/c2 + c1/c2) FROM conditional_t; +SELECT nanvl(c2, 1/0) FROM conditional_t; +SELECT nanvl(1-0, 1/0) FROM conditional_t; -SELECT if(c2 >= 0, 1-0, 1/0) from t; +SELECT if(c2 >= 0, 1-0, 1/0) from conditional_t; SELECT if(1 == 1, 1, 1/0); SELECT if(1 != 1, 1/0, 1); -SELECT coalesce(c2, 1/0) from t; +SELECT coalesce(c2, 1/0) from conditional_t; SELECT coalesce(1, 1/0); SELECT coalesce(null, 1, 1/0); -SELECT case when c2 >= 0 then 1 else 1/0 end from t; +SELECT case when c2 >= 0 then 1 else 1/0 end from conditional_t; SELECT case when 1 < 2 then 1 else 1/0 end; SELECT case when 1 > 2 then 1/0 else 1 end; -DROP TABLE IF EXISTS t; +DROP TABLE conditional_t; diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/conditional-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/conditional-functions.sql.out index 6a4f694f4d77d..e62654c3e23a3 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/conditional-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/conditional-functions.sql.out @@ -3,7 +3,7 @@ -- !query -CREATE TABLE t USING PARQUET AS SELECT c1, c2 FROM VALUES(1d, 0),(2d, 1),(null, 1),(CAST('NaN' AS DOUBLE), 0) AS t(c1, c2) +CREATE TABLE conditional_t USING PARQUET AS SELECT c1, c2 FROM VALUES(1d, 0),(2d, 1),(null, 1),(CAST('NaN' AS DOUBLE), 0) AS t(c1, c2) -- !query schema struct<> -- !query output @@ -11,7 +11,7 @@ struct<> -- !query -SELECT nanvl(c2, c1/c2 + c1/c2) FROM t +SELECT nanvl(c2, c1/c2 + c1/c2) FROM conditional_t -- !query schema struct -- !query output @@ -22,7 +22,7 @@ struct -- !query -SELECT nanvl(c2, 1/0) FROM t +SELECT nanvl(c2, 1/0) FROM conditional_t -- !query schema struct -- !query output @@ -33,7 +33,7 @@ struct -- !query -SELECT nanvl(1-0, 1/0) FROM t +SELECT nanvl(1-0, 1/0) FROM conditional_t -- !query schema struct -- !query output @@ -44,7 +44,7 @@ struct -- !query -SELECT if(c2 >= 0, 1-0, 1/0) from t +SELECT if(c2 >= 0, 1-0, 1/0) from conditional_t -- !query schema struct<(IF((c2 >= 0), (1 - 0), (1 / 0))):double> -- !query output @@ -71,7 +71,7 @@ struct<(IF((NOT (1 = 1)), (1 / 0), 1)):double> -- !query -SELECT coalesce(c2, 1/0) from t +SELECT coalesce(c2, 1/0) from conditional_t -- !query schema struct -- !query output @@ -98,7 +98,7 @@ struct -- !query -SELECT case when c2 >= 0 then 1 else 1/0 end from t +SELECT case when c2 >= 0 then 1 else 1/0 end from conditional_t -- !query schema struct= 0) THEN 1 ELSE (1 / 0) END:double> -- !query output @@ -125,7 +125,7 @@ struct 2) THEN (1 / 0) ELSE 1 END:double> -- !query -DROP TABLE IF EXISTS t +DROP TABLE conditional_t -- !query schema struct<> -- !query output From 2268665663684dd381adf266feb74ac97a53900d Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 1 Jun 2022 20:10:01 -0700 Subject: [PATCH 310/535] [SPARK-39360][K8S] Remove deprecation of `spark.kubernetes.memoryOverheadFactor` and recover doc ### What changes were proposed in this pull request? This PR aims to avoid the deprecation of `spark.kubernetes.memoryOverheadFactor` from Apache Spark 3.3. In addition, also recovers the documentation which is removed mistakenly at the `deprecation`. `Deprecation` is not a removal. ### Why are the changes needed? - Apache Spark 3.3.0 RC complains always about `spark.kubernetes.memoryOverheadFactor` because the configuration has the default value (which is not given by the users). There is no way to remove the warnings which means the directional message is not helpful and makes the users confused in a wrong way. In other words, we still get warnings even we use only new configurations or no configuration. ``` 22/06/01 23:53:49 WARN SparkConf: The configuration key 'spark.kubernetes.memoryOverheadFactor' has been deprecated as of Spark 3.3.0 and may be removed in the future. Please use spark.driver.memoryOverheadFactor and spark.executor.memoryOverheadFactor 22/06/01 23:53:49 WARN SparkConf: The configuration key 'spark.kubernetes.memoryOverheadFactor' has been deprecated as of Spark 3.3.0 and may be removed in the future. Please use spark.driver.memoryOverheadFactor and spark.executor.memoryOverheadFactor 22/06/01 23:53:50 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 22/06/01 23:53:50 WARN SparkConf: The configuration key 'spark.kubernetes.memoryOverheadFactor' has been deprecated as of Spark 3.3.0 and may be removed in the future. Please use spark.driver.memoryOverheadFactor and spark.executor.memoryOverheadFactor ``` - The minimum constraint is slightly different because `spark.kubernetes.memoryOverheadFactor` allowed 0 since Apache Spark 2.4 while new configurations disallow `0`. - This documentation removal might be too early because the deprecation is not the removal of configuration. This PR recoveres the removed doc and added the following. ``` This will be overridden by the value set by spark.driver.memoryOverheadFactor and spark.executor.memoryOverheadFactor explicitly. ``` ### Does this PR introduce _any_ user-facing change? No. This is a consistent with the existing behavior. ### How was this patch tested? Pass the CIs. Closes #36744 from dongjoon-hyun/SPARK-39360. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 6d43556089a21b26d1a7590fbe1e25bd1ca7cedd) Signed-off-by: Dongjoon Hyun --- core/src/main/scala/org/apache/spark/SparkConf.scala | 4 +--- docs/running-on-kubernetes.md | 10 ++++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index cf121749b7348..5f37a1abb1909 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -636,9 +636,7 @@ private[spark] object SparkConf extends Logging { DeprecatedConfig("spark.blacklist.killBlacklistedExecutors", "3.1.0", "Please use spark.excludeOnFailure.killExcludedExecutors"), DeprecatedConfig("spark.yarn.blacklist.executor.launch.blacklisting.enabled", "3.1.0", - "Please use spark.yarn.executor.launch.excludeOnFailure.enabled"), - DeprecatedConfig("spark.kubernetes.memoryOverheadFactor", "3.3.0", - "Please use spark.driver.memoryOverheadFactor and spark.executor.memoryOverheadFactor") + "Please use spark.yarn.executor.launch.excludeOnFailure.enabled") ) Map(configs.map { cfg => (cfg.key -> cfg) } : _*) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index ee77e37beb33f..9659a6ebe2f8b 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -1137,6 +1137,16 @@ See the [configuration page](configuration.html) for information on Spark config + + + + + + From 4bbaf3777e9cd90151ec526a05dd67aab22da403 Mon Sep 17 00:00:00 2001 From: Herman van Hovell Date: Thu, 2 Jun 2022 16:48:11 +0800 Subject: [PATCH 311/535] [SPARK-38675][CORE] Fix race during unlock in BlockInfoManager ### What changes were proposed in this pull request? This PR fixes a race in the `BlockInfoManager` between `unlock` and `releaseAllLocksForTask`, resulting in a negative reader count for a block (which trips an assert). This happens when the following events take place: 1. [THREAD 1] calls `releaseAllLocksForTask`. This starts by collecting all the blocks to be unlocked for this task. 2. [THREAD 2] calls `unlock` for a read lock for the same task (this means the block is also in the list collected in step 1). It then proceeds to unlock the block by decrementing the reader count. 3. [THREAD 1] now starts to release the collected locks, it does this by decrementing the readers counts for blocks by the number of acquired read locks. The problem is that step 2 made the lock counts for blocks incorrect, and we decrement by one (or a few) too many. This triggers a negative reader count assert. We fix this by adding a check to `unlock` that makes sure we are not in the process of unlocking. We do this by checking if there is a multiset associated with the task that contains the read locks. ### Why are the changes needed? It is a bug. Not fixing this can cause negative reader counts for blocks, and this causes task failures. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added a regression test in BlockInfoManager suite. Closes #35991 from hvanhovell/SPARK-38675. Authored-by: Herman van Hovell Signed-off-by: Wenchen Fan (cherry picked from commit 078b505d2f0a0a4958dec7da816a7d672820b637) Signed-off-by: Wenchen Fan --- .../spark/storage/BlockInfoManager.scala | 15 ++++++--- .../spark/storage/BlockInfoManagerSuite.scala | 31 +++++++++++++++++++ 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala index 5392c20eefb72..9eb1418fd16ef 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala @@ -360,12 +360,17 @@ private[storage] class BlockInfoManager extends Logging { info.writerTask = BlockInfo.NO_WRITER writeLocksByTask.get(taskAttemptId).remove(blockId) } else { - assert(info.readerCount > 0, s"Block $blockId is not locked for reading") - info.readerCount -= 1 + // There can be a race between unlock and releaseAllLocksForTask which causes negative + // reader counts. We need to check if the readLocksByTask per tasks are present, if they + // are not then we know releaseAllLocksForTask has already cleaned up the read lock. val countsForTask = readLocksByTask.get(taskAttemptId) - val newPinCountForTask: Int = countsForTask.remove(blockId, 1) - 1 - assert(newPinCountForTask >= 0, - s"Task $taskAttemptId release lock on block $blockId more times than it acquired it") + if (countsForTask != null) { + assert(info.readerCount > 0, s"Block $blockId is not locked for reading") + info.readerCount -= 1 + val newPinCountForTask: Int = countsForTask.remove(blockId, 1) - 1 + assert(newPinCountForTask >= 0, + s"Task $taskAttemptId release lock on block $blockId more times than it acquired it") + } } condition.signalAll() } diff --git a/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala index 8ffc6798526b4..887644a826452 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala @@ -360,4 +360,35 @@ class BlockInfoManagerSuite extends SparkFunSuite with BeforeAndAfterEach { blockInfoManager.releaseAllLocksForTask(0) assert(blockInfoManager.getNumberOfMapEntries === initialNumMapEntries - 1) } + + test("SPARK-38675 - concurrent unlock and releaseAllLocksForTask calls should not fail") { + // Create block + val blockId = TestBlockId("block") + assert(blockInfoManager.lockNewBlockForWriting(blockId, newBlockInfo())) + blockInfoManager.unlock(blockId) + + // Without the fix the block below fails in 50% of the time. By executing it + // 10 times we increase the chance of failing to ~99.9%. + (0 to 10).foreach { task => + withTaskId(task) { + blockInfoManager.registerTask(task) + + // Acquire read locks + (0 to 50).foreach { _ => + assert(blockInfoManager.lockForReading(blockId).isDefined) + } + + // Asynchronously release read locks. + val futures = (0 to 50).map { _ => + Future(blockInfoManager.unlock(blockId, Option(0L))) + } + + // Remove all lock and hopefully don't hit an assertion error + blockInfoManager.releaseAllLocksForTask(task) + + // Wait until all futures complete for the next iteration + futures.foreach(ThreadUtils.awaitReady(_, 100.millis)) + } + } + } } From fef569507bcd23b3b515bcbf489e2a9151ac51be Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 2 Jun 2022 12:24:51 +0300 Subject: [PATCH 312/535] [SPARK-39346][SQL][3.3] Convert asserts/illegal state exception to internal errors on each phase ### What changes were proposed in this pull request? In the PR, I propose to catch asserts/illegal state exception on each phase of query execution: ANALYSIS, OPTIMIZATION, PLANNING, and convert them to a SparkException w/ the `INTERNAL_ERROR` error class. This is a backport of https://github.com/apache/spark/pull/36704. ### Why are the changes needed? To improve user experience with Spark SQL and unify representation of user-facing errors. ### Does this PR introduce _any_ user-facing change? No. The changes might affect users in corner cases only. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "test:testOnly *KafkaMicroBatchV1SourceSuite" $ build/sbt "test:testOnly *KafkaMicroBatchV2SourceSuite" ``` Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 8894e785edae42a642351ad91e539324c39da8e4) Signed-off-by: Max Gekk Closes #36742 from MaxGekk/wrapby-INTERNAL_ERROR-every-phase-3.3. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../kafka010/KafkaMicroBatchSourceSuite.scala | 11 ++++--- .../scala/org/apache/spark/sql/Dataset.scala | 14 ++------- .../spark/sql/execution/QueryExecution.scala | 31 ++++++++++++++++++- .../execution/streaming/StreamExecution.scala | 4 ++- .../streaming/MicroBatchExecutionSuite.scala | 6 ++-- .../continuous/ContinuousSuite.scala | 7 +++-- 6 files changed, 51 insertions(+), 22 deletions(-) diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala index db71f0fd9184a..41277a535f58c 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala @@ -34,6 +34,7 @@ import org.apache.kafka.common.TopicPartition import org.scalatest.concurrent.PatienceConfiguration.Timeout import org.scalatest.time.SpanSugar._ +import org.apache.spark.{SparkException, SparkThrowable} import org.apache.spark.sql.{Dataset, ForeachWriter, Row, SparkSession} import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.connector.read.streaming.SparkDataStream @@ -666,9 +667,10 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { testUtils.sendMessages(topic2, Array("6")) }, StartStream(), - ExpectFailure[IllegalStateException](e => { + ExpectFailure[SparkException](e => { + assert(e.asInstanceOf[SparkThrowable].getErrorClass === "INTERNAL_ERROR") // The offset of `topic2` should be changed from 2 to 1 - assert(e.getMessage.contains("was changed from 2 to 1")) + assert(e.getCause.getMessage.contains("was changed from 2 to 1")) }) ) } @@ -764,12 +766,13 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { testStream(df)( StartStream(checkpointLocation = metadataPath.getAbsolutePath), - ExpectFailure[IllegalStateException](e => { + ExpectFailure[SparkException](e => { + assert(e.asInstanceOf[SparkThrowable].getErrorClass === "INTERNAL_ERROR") Seq( s"maximum supported log version is v1, but encountered v99999", "produced by a newer version of Spark and cannot be read by this version" ).foreach { message => - assert(e.toString.contains(message)) + assert(e.getCause.toString.contains(message)) } })) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 56f0e8978ecda..a4a40cc0e6924 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -27,7 +27,7 @@ import scala.util.control.NonFatal import org.apache.commons.lang3.StringUtils -import org.apache.spark.{SparkException, SparkThrowable, TaskContext} +import org.apache.spark.TaskContext import org.apache.spark.annotation.{DeveloperApi, Stable, Unstable} import org.apache.spark.api.java.JavaRDD import org.apache.spark.api.java.function._ @@ -3852,19 +3852,11 @@ class Dataset[T] private[sql]( * the internal error exception. */ private def withAction[U](name: String, qe: QueryExecution)(action: SparkPlan => U) = { - try { - SQLExecution.withNewExecutionId(qe, Some(name)) { + SQLExecution.withNewExecutionId(qe, Some(name)) { + QueryExecution.withInternalError(s"""The "$name" action failed.""") { qe.executedPlan.resetMetrics() action(qe.executedPlan) } - } catch { - case e: SparkThrowable => throw e - case e @ (_: java.lang.IllegalStateException | _: java.lang.AssertionError) => - throw new SparkException( - errorClass = "INTERNAL_ERROR", - messageParameters = Array(s"""The "$name" action failed."""), - cause = e) - case e: Throwable => throw e } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index 5dcdebfbe0ea7..ab9b9861c036e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -23,6 +23,7 @@ import java.util.concurrent.atomic.AtomicLong import org.apache.hadoop.fs.Path +import org.apache.spark.SparkException import org.apache.spark.internal.Logging import org.apache.spark.rdd.RDD import org.apache.spark.sql.{AnalysisException, Row, SparkSession} @@ -180,7 +181,9 @@ class QueryExecution( } protected def executePhase[T](phase: String)(block: => T): T = sparkSession.withActive { - tracker.measurePhase(phase)(block) + QueryExecution.withInternalError(s"The Spark SQL phase $phase failed with an internal error.") { + tracker.measurePhase(phase)(block) + } } def simpleString: String = { @@ -484,4 +487,30 @@ object QueryExecution { val preparationRules = preparations(session, Option(InsertAdaptiveSparkPlan(context)), true) prepareForExecution(preparationRules, sparkPlan.clone()) } + + /** + * Converts asserts, null pointer, illegal state exceptions to internal errors. + */ + private[sql] def toInternalError(msg: String, e: Throwable): Throwable = e match { + case e @ (_: java.lang.IllegalStateException | _: java.lang.NullPointerException | + _: java.lang.AssertionError) => + new SparkException( + errorClass = "INTERNAL_ERROR", + messageParameters = Array(msg + + " Please, fill a bug report in, and provide the full stack trace."), + cause = e) + case e: Throwable => + e + } + + /** + * Catches asserts, null pointer, illegal state exceptions, and converts them to internal errors. + */ + private[sql] def withInternalError[T](msg: String)(block: => T): T = { + try { + block + } catch { + case e: Throwable => throw toInternalError(msg, e) + } + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index c7ce9f52e0653..587f5af60acc8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala @@ -39,6 +39,7 @@ import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._ import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table} import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, ReadLimit, SparkDataStream} import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsTruncate, Write} +import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.execution.command.StreamingExplainCommand import org.apache.spark.sql.execution.datasources.v2.StreamWriterCommitProgress import org.apache.spark.sql.internal.SQLConf @@ -319,7 +320,8 @@ abstract class StreamExecution( // This is a workaround for HADOOP-12074: `Shell.runCommand` converts `InterruptedException` // to `new IOException(ie.toString())` before Hadoop 2.8. updateStatusMessage("Stopped") - case e: Throwable => + case t: Throwable => + val e = QueryExecution.toInternalError(msg = s"Execution of the stream $name failed.", t) streamDeathCause = new StreamingQueryException( toDebugString(includeLogicalPlan = isInitialized), s"Query $prettyIdString terminated with exception: ${e.getMessage}", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala index f06e62b33b1a0..9d731248ad404 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala @@ -22,6 +22,7 @@ import java.io.File import org.apache.commons.io.FileUtils import org.scalatest.BeforeAndAfter +import org.apache.spark.{SparkException, SparkThrowable} import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} import org.apache.spark.sql.catalyst.plans.logical.Range import org.apache.spark.sql.connector.read.streaming @@ -93,8 +94,9 @@ class MicroBatchExecutionSuite extends StreamTest with BeforeAndAfter { testStream(streamEvent) ( AddData(inputData, 1, 2, 3, 4, 5, 6), StartStream(Trigger.Once, checkpointLocation = checkpointDir.getAbsolutePath), - ExpectFailure[IllegalStateException] { e => - assert(e.getMessage.contains("batch 3 doesn't exist")) + ExpectFailure[SparkException] { e => + assert(e.asInstanceOf[SparkThrowable].getErrorClass === "INTERNAL_ERROR") + assert(e.getCause.getMessage.contains("batch 3 doesn't exist")) } ) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index 5893c3da09812..a28d44caab065 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.streaming.continuous import java.sql.Timestamp -import org.apache.spark.{SparkContext, SparkException} +import org.apache.spark.{SparkContext, SparkException, SparkThrowable} import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskStart} import org.apache.spark.sql._ import org.apache.spark.sql.execution.streaming._ @@ -440,8 +440,9 @@ class ContinuousEpochBacklogSuite extends ContinuousSuiteBase { testStream(df)( StartStream(Trigger.Continuous(1)), - ExpectFailure[IllegalStateException] { e => - e.getMessage.contains("queue has exceeded its maximum") + ExpectFailure[SparkException] { e => + assert(e.asInstanceOf[SparkThrowable].getErrorClass === "INTERNAL_ERROR") + e.getCause.getMessage.contains("queue has exceeded its maximum") } ) } From ef521d30a3b023213bbc3076911a93c0c0c425dc Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Thu, 2 Jun 2022 13:06:14 +0300 Subject: [PATCH 313/535] [SPARK-39354][SQL] Ensure show `Table or view not found` even if there are `dataTypeMismatchError` related to `Filter` at the same time ### What changes were proposed in this pull request? After SPARK-38118, `dataTypeMismatchError` related to `Filter` will be checked and throw in `RemoveTempResolvedColumn`, this will cause compatibility issue with exception message presentation. For example, the following case: ``` spark.sql("create table t1(user_id int, auct_end_dt date) using parquet;") spark.sql("select * from t1 join t2 on t1.user_id = t2.user_id where t1.auct_end_dt >= Date_sub('2020-12-27', 90)").show ``` The expected message is ``` Table or view not found: t2 ``` But the actual message is ``` org.apache.spark.sql.AnalysisException: cannot resolve 'date_sub('2020-12-27', 90)' due to data type mismatch: argument 1 requires date type, however, ''2020-12-27'' is of string type.; line 1 pos 76 ``` For forward compatibility, this pr change to only records `DATA_TYPE_MISMATCH_ERROR_MESSAGE` in the `RemoveTempResolvedColumn` check process , and move `failAnalysis` to `CheckAnalysis#checkAnalysis` ### Why are the changes needed? Fix analysis exception message compatibility. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass Github Actions and add a new test case Closes #36746 from LuciferYang/SPARK-39354. Authored-by: yangjie01 Signed-off-by: Max Gekk (cherry picked from commit 89fdb8a6fb6a669c458891b3abeba236e64b1e89) Signed-off-by: Max Gekk --- .../spark/sql/catalyst/analysis/Analyzer.scala | 7 ++----- .../sql/catalyst/analysis/CheckAnalysis.scala | 17 ++++++++++++++++- .../sql/catalyst/analysis/AnalysisSuite.scala | 16 ++++++++++++++-- 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ba492e58f6e83..51c1d1f768f73 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -28,7 +28,7 @@ import scala.util.{Failure, Random, Success, Try} import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst._ -import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.{extraHintForAnsiTypeCoercionExpression, DATA_TYPE_MISMATCH_ERROR} +import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.DATA_TYPE_MISMATCH_ERROR_MESSAGE import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.encoders.OuterScopes import org.apache.spark.sql.catalyst.expressions.{Expression, FrameLessOffsetWindowFunction, _} @@ -4328,10 +4328,7 @@ object RemoveTempResolvedColumn extends Rule[LogicalPlan] { case e: Expression if e.childrenResolved && e.checkInputDataTypes().isFailure => e.checkInputDataTypes() match { case TypeCheckResult.TypeCheckFailure(message) => - e.setTagValue(DATA_TYPE_MISMATCH_ERROR, true) - e.failAnalysis( - s"cannot resolve '${e.sql}' due to data type mismatch: $message" + - extraHintForAnsiTypeCoercionExpression(plan)) + e.setTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE, message) } case _ => }) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 9c72b9974c472..b0d1d6c2a30c4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -50,6 +50,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { val DATA_TYPE_MISMATCH_ERROR = TreeNodeTag[Boolean]("dataTypeMismatchError") + val DATA_TYPE_MISMATCH_ERROR_MESSAGE = TreeNodeTag[String]("dataTypeMismatchError") + protected def failAnalysis(msg: String): Nothing = { throw new AnalysisException(msg) } @@ -174,7 +176,20 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { } } - getAllExpressions(operator).foreach(_.foreachUp { + val expressions = getAllExpressions(operator) + + expressions.foreach(_.foreachUp { + case e: Expression => + e.getTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE) match { + case Some(message) => + e.failAnalysis(s"cannot resolve '${e.sql}' due to data type mismatch: $message" + + extraHintForAnsiTypeCoercionExpression(operator)) + case _ => + } + case _ => + }) + + expressions.foreach(_.foreachUp { case a: Attribute if !a.resolved => val missingCol = a.sql val candidates = operator.inputSet.toSeq.map(_.qualifiedName) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 1f82aa7e35511..e72cdbe487b98 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -1170,13 +1170,25 @@ class AnalysisSuite extends AnalysisTest with Matchers { |WITH t as (SELECT true c, false d) |SELECT (t.c AND t.d) c |FROM t - |GROUP BY t.c + |GROUP BY t.c, t.d |HAVING ${func}(c) > 0d""".stripMargin), - Seq(s"cannot resolve '$func(t.c)' due to data type mismatch"), + Seq(s"cannot resolve '$func(c)' due to data type mismatch"), false) } } + test("SPARK-39354: should be `Table or view not found`") { + assertAnalysisError(parsePlan( + s""" + |WITH t1 as (SELECT 1 user_id, CAST("2022-06-02" AS DATE) dt) + |SELECT * + |FROM t1 + |JOIN t2 ON t1.user_id = t2.user_id + |WHERE t1.dt >= DATE_SUB('2020-12-27', 90)""".stripMargin), + Seq(s"Table or view not found: t2"), + false) + } + test("SPARK-39144: nested subquery expressions deduplicate relations should be done bottom up") { val innerRelation = SubqueryAlias("src1", testRelation) val outerRelation = SubqueryAlias("src2", testRelation) From 7ed30443a09dde842424165283d45c0c54d86a81 Mon Sep 17 00:00:00 2001 From: Ming Li <1104056452@qq.com> Date: Thu, 2 Jun 2022 07:44:17 -0500 Subject: [PATCH 314/535] [SPARK-38807][CORE] Fix the startup error of spark shell on Windows ### What changes were proposed in this pull request? The File.getCanonicalPath method will return the drive letter in the windows system. The RpcEnvFileServer.validateDirectoryUri method uses the File.getCanonicalPath method to process the baseuri, which will cause the baseuri not to comply with the URI verification rules. For example, the / classes is processed into F: \ classes.This causes the sparkcontext to fail to start on windows. This PR modifies the RpcEnvFileServer.validateDirectoryUri method and replaces `new File(baseUri).getCanonicalPath` with `new URI(baseUri).normalize().getPath`. This method can work normally in windows. ### Why are the changes needed? Fix the startup error of spark shell on Windows system [[SPARK-35691](https://issues.apache.org/jira/browse/SPARK-35691)] introduced this regression. ### Does this PR introduce any user-facing change? No ### How was this patch tested? CI Closes #36447 from 1104056452/master. Lead-authored-by: Ming Li <1104056452@qq.com> Co-authored-by: ming li <1104056452@qq.com> Signed-off-by: Sean Owen (cherry picked from commit a760975083ea0696e8fd834ecfe3fb877b7f7449) Signed-off-by: Sean Owen --- core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala index bf19190c021df..82d3a28894b60 100644 --- a/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala +++ b/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala @@ -18,6 +18,7 @@ package org.apache.spark.rpc import java.io.File +import java.net.URI import java.nio.channels.ReadableByteChannel import scala.concurrent.Future @@ -187,7 +188,7 @@ private[spark] trait RpcEnvFileServer { /** Validates and normalizes the base URI for directories. */ protected def validateDirectoryUri(baseUri: String): String = { - val baseCanonicalUri = new File(baseUri).getCanonicalPath + val baseCanonicalUri = new URI(baseUri).normalize().getPath val fixedBaseUri = "/" + baseCanonicalUri.stripPrefix("/").stripSuffix("/") require(fixedBaseUri != "/files" && fixedBaseUri != "/jars", "Directory URI cannot be /files nor /jars.") From 4da8f3a76b196383e00664e4d1c863f5fe927474 Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Thu, 2 Jun 2022 09:28:34 -0700 Subject: [PATCH 315/535] [SPARK-39361] Don't use Log4J2's extended throwable conversion pattern in default logging configurations ### What changes were proposed in this pull request? This PR addresses a performance problem in Log4J 2 related to exception logging: in certain scenarios I observed that Log4J2's default exception stacktrace logging can be ~10x slower than Log4J 1. The problem stems from a new log pattern format in Log4J2 called ["extended exception"](https://logging.apache.org/log4j/2.x/manual/layouts.html#PatternExtendedException), which enriches the regular stacktrace string with information on the name of the JAR files that contained the classes in each stack frame. Log4J queries the classloader to determine the source JAR for each class. This isn't cheap, but this information is cached and reused in future exception logging calls. In certain scenarios involving runtime-generated classes, this lookup will fail and the failed lookup result will _not_ be cached. As a result, expensive classloading operations will be performed every time such an exception is logged. In addition to being very slow, these operations take out a lock on the classloader and thus can cause severe lock contention if multiple threads are logging errors. This issue is described in more detail in [a comment on a Log4J2 JIRA](https://issues.apache.org/jira/browse/LOG4J2-2391?focusedCommentId=16667140&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-16667140) and in a linked blogpost. Spark frequently uses generated classes and lambdas and thus Spark executor logs will almost always trigger this edge-case and suffer from poor performance. By default, if you do not specify an explicit exception format in your logging pattern then Log4J2 will add this "extended exception" pattern (see PatternLayout's alwaysWriteExceptions flag in Log4J's documentation, plus [the code implementing that flag](https://github.com/apache/logging-log4j2/blob/d6c8ab0863c551cdf0f8a5b1966ab45e3cddf572/log4j-core/src/main/java/org/apache/logging/log4j/core/pattern/PatternParser.java#L206-L209) in Log4J2). In this PR, I have updated Spark's default Log4J2 configurations so that each pattern layout includes an explicit %ex so that it uses the normal (non-extended) exception logging format. This is the workaround that is currently recommended on the Log4J JIRA. ### Why are the changes needed? Avoid performance regressions in Spark programs which use Spark's default Log4J 2 configuration and log many exceptions. Although it's true that any program logging exceptions at a high rate should probably just fix the source of the exceptions, I think it's still a good idea for us to try to fix this out-of-the-box performance difference so that users' existing workloads do not regress when upgrading to 3.3.0. ### Does this PR introduce _any_ user-facing change? Yes: it changes the default exception logging format so that it matches Log4J 1's default rather than Log4J 2's. The new format is consistent with behavior in previous Spark versions, but is different than the behavior in the current Spark 3.3.0-rc3. ### How was this patch tested? Existing tests. Closes #36747 from JoshRosen/disable-log4j2-extended-exception-pattern. Authored-by: Josh Rosen Signed-off-by: Josh Rosen (cherry picked from commit fd45c3656be6add7cf483ddfb7016b12f77d7c8e) Signed-off-by: Josh Rosen --- R/log4j2.properties | 2 +- common/kvstore/src/test/resources/log4j2.properties | 2 +- .../network-common/src/test/resources/log4j2.properties | 2 +- .../network-shuffle/src/test/resources/log4j2.properties | 2 +- conf/log4j2.properties.template | 8 +++++++- .../resources/org/apache/spark/log4j2-defaults.properties | 2 +- core/src/main/scala/org/apache/spark/TestUtils.scala | 2 +- .../org/apache/spark/util/logging/DriverLogger.scala | 2 +- core/src/test/resources/log4j2.properties | 4 ++-- docs/configuration.md | 2 +- external/avro/src/test/resources/log4j2.properties | 2 +- .../src/test/resources/log4j2.properties | 4 ++-- .../kafka-0-10-sql/src/test/resources/log4j2.properties | 2 +- .../src/test/resources/log4j2.properties | 2 +- external/kafka-0-10/src/test/resources/log4j2.properties | 2 +- external/kinesis-asl/src/main/resources/log4j2.properties | 4 ++-- external/kinesis-asl/src/test/resources/log4j2.properties | 2 +- graphx/src/test/resources/log4j2.properties | 2 +- hadoop-cloud/src/test/resources/log4j2.properties | 4 ++-- launcher/src/test/resources/log4j2.properties | 4 ++-- mllib/src/test/resources/log4j2.properties | 2 +- repl/src/test/resources/log4j2.properties | 2 +- repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala | 2 +- .../kubernetes/core/src/test/resources/log4j2.properties | 2 +- .../src/test/resources/log-config-test-log4j.properties | 2 +- .../src/test/resources/log4j2.properties | 2 +- .../deploy/k8s/integrationtest/DecommissionSuite.scala | 2 +- .../mesos/src/test/resources/log4j2.properties | 2 +- .../yarn/src/test/resources/log4j2.properties | 2 +- .../apache/spark/deploy/yarn/BaseYarnClusterSuite.scala | 2 +- sql/catalyst/src/test/resources/log4j2.properties | 2 +- sql/core/src/test/resources/log4j2.properties | 4 ++-- .../java/org/apache/hive/service/cli/CLIServiceUtils.java | 4 ++-- .../src/test/resources/log4j2.properties | 4 ++-- .../sql/hive/thriftserver/HiveThriftServer2Suites.scala | 2 +- .../spark/sql/hive/thriftserver/UISeleniumSuite.scala | 2 +- sql/hive/src/test/resources/log4j2.properties | 4 ++-- streaming/src/test/resources/log4j2.properties | 2 +- 38 files changed, 53 insertions(+), 47 deletions(-) diff --git a/R/log4j2.properties b/R/log4j2.properties index 8ed7b9f6c8c01..689518e42807c 100644 --- a/R/log4j2.properties +++ b/R/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Ignore messages below warning level from Jetty, because it's a bit verbose logger.jetty.name = org.eclipse.jetty diff --git a/common/kvstore/src/test/resources/log4j2.properties b/common/kvstore/src/test/resources/log4j2.properties index 9a0fd7cdc6f23..551abd8413ccd 100644 --- a/common/kvstore/src/test/resources/log4j2.properties +++ b/common/kvstore/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Silence verbose logs from 3rd-party libraries. logger.netty.name = io.netty diff --git a/common/network-common/src/test/resources/log4j2.properties b/common/network-common/src/test/resources/log4j2.properties index 9a0fd7cdc6f23..551abd8413ccd 100644 --- a/common/network-common/src/test/resources/log4j2.properties +++ b/common/network-common/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Silence verbose logs from 3rd-party libraries. logger.netty.name = io.netty diff --git a/common/network-shuffle/src/test/resources/log4j2.properties b/common/network-shuffle/src/test/resources/log4j2.properties index 4fc8e41b6f007..fbdac2fb17074 100644 --- a/common/network-shuffle/src/test/resources/log4j2.properties +++ b/common/network-shuffle/src/test/resources/log4j2.properties @@ -24,4 +24,4 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex diff --git a/conf/log4j2.properties.template b/conf/log4j2.properties.template index 99f68a8a9e98c..ab96e03baed20 100644 --- a/conf/log4j2.properties.template +++ b/conf/log4j2.properties.template @@ -19,11 +19,17 @@ rootLogger.level = info rootLogger.appenderRef.stdout.ref = console +# In the pattern layout configuration below, we specify an explicit `%ex` conversion +# pattern for logging Throwables. If this was omitted, then (by default) Log4J would +# implicitly add an `%xEx` conversion pattern which logs stacktraces with additional +# class packaging information. That extra information can sometimes add a substantial +# performance overhead, so we disable it in our default logging config. +# For more information, see SPARK-39361. appender.console.type = Console appender.console.name = console appender.console.target = SYSTEM_ERR appender.console.layout.type = PatternLayout -appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex # Set the default spark-shell/spark-sql log level to WARN. When running the # spark-shell/spark-sql, the log level for these classes is used to overwrite diff --git a/core/src/main/resources/org/apache/spark/log4j2-defaults.properties b/core/src/main/resources/org/apache/spark/log4j2-defaults.properties index cb3c70e2b8791..62eab7f3ef950 100644 --- a/core/src/main/resources/org/apache/spark/log4j2-defaults.properties +++ b/core/src/main/resources/org/apache/spark/log4j2-defaults.properties @@ -23,7 +23,7 @@ appender.console.type = Console appender.console.name = STDOUT appender.console.target = SYSTEM_OUT appender.console.layout.type = PatternLayout -appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex # Settings to quiet third party logs that are too verbose logger.jetty.name = org.sparkproject.jetty diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala index 104e98b8ae0a4..880f8856fe35f 100644 --- a/core/src/main/scala/org/apache/spark/TestUtils.scala +++ b/core/src/main/scala/org/apache/spark/TestUtils.scala @@ -431,7 +431,7 @@ private[spark] object TestUtils { val appenderBuilder = builder.newAppender("console", "CONSOLE") .addAttribute("target", ConsoleAppender.Target.SYSTEM_ERR) appenderBuilder.add(builder.newLayout("PatternLayout") - .addAttribute("pattern", "%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n")) + .addAttribute("pattern", "%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex")) builder.add(appenderBuilder) builder.add(builder.newRootLogger(level).add(builder.newAppenderRef("console"))) val configuration = builder.build() diff --git a/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala b/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala index 4854a84eb5680..c826cef213f53 100644 --- a/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala +++ b/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala @@ -42,7 +42,7 @@ private[spark] class DriverLogger(conf: SparkConf) extends Logging { private val UPLOAD_CHUNK_SIZE = 1024 * 1024 private val UPLOAD_INTERVAL_IN_SECS = 5 - private val DEFAULT_LAYOUT = "%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n" + private val DEFAULT_LAYOUT = "%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex" private val LOG_FILE_PERMISSIONS = new FsPermission(Integer.parseInt("770", 8).toShort) private val localLogFile: String = FileUtils.getFile( diff --git a/core/src/test/resources/log4j2.properties b/core/src/test/resources/log4j2.properties index c6cd10d639e69..ab02104c69697 100644 --- a/core/src/test/resources/log4j2.properties +++ b/core/src/test/resources/log4j2.properties @@ -23,7 +23,7 @@ appender.file.type = File appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Tests that launch java subprocesses can set the "test.appender" system property to # "console" to avoid having the child process's logs overwrite the unit test's @@ -32,7 +32,7 @@ appender.console.type = Console appender.console.name = console appender.console.target = SYSTEM_ERR appender.console.layout.type = PatternLayout -appender.console.layout.pattern = %t: %m%n +appender.console.layout.pattern = %t: %m%n%ex # Ignore messages below warning level from Jetty, because it's a bit verbose logger.jetty.name = org.sparkproject.jetty diff --git a/docs/configuration.md b/docs/configuration.md index 7952b4b1a2a69..89097bf839ecf 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -449,7 +449,7 @@ of the most common options to set are: - + ") result.append(" " % sig) From cf72e52c3df77231cce829111c96a13a79c9b529 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 27 Jun 2022 12:52:11 -0700 Subject: [PATCH 374/535] [SPARK-39621][PYTHON][TESTS] Make `run-tests.py` robust by avoiding `rmtree` on MacOS ### What changes were proposed in this pull request? This PR aims to make `run-tests.py` robust by avoiding `rmtree` on MacOS. ### Why are the changes needed? There exists a race condition in Python and it causes flakiness in MacOS - https://bugs.python.org/issue29699 - https://github.com/python/cpython/issues/73885 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? After passing CIs, this should be tested on MacOS. Closes #37010 from dongjoon-hyun/SPARK-39621. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 432945db743965f1beb59e3a001605335ca2df83) Signed-off-by: Dongjoon Hyun --- python/run-tests.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/python/run-tests.py b/python/run-tests.py index 6fce3f9a1ce99..1e3c1e8544df8 100755 --- a/python/run-tests.py +++ b/python/run-tests.py @@ -20,6 +20,7 @@ import logging from argparse import ArgumentParser import os +import platform import re import shutil import subprocess @@ -113,7 +114,12 @@ def run_individual_python_test(target_dir, test_name, pyspark_python): retcode = subprocess.Popen( [os.path.join(SPARK_HOME, "bin/pyspark")] + test_name.split(), stderr=per_test_output, stdout=per_test_output, env=env).wait() - shutil.rmtree(tmp_dir, ignore_errors=True) + # There exists a race condition in Python and it causes flakiness in MacOS + # https://github.com/python/cpython/issues/73885 + if platform.system() == "Darwin": + os.system("rm -rf " + tmp_dir) + else: + shutil.rmtree(tmp_dir, ignore_errors=True) except BaseException: LOGGER.exception("Got exception while running %s with %s", test_name, pyspark_python) # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if From 1f6b142e6966cbbda08f1a568974734d2d4f6208 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 28 Jun 2022 22:40:41 +0800 Subject: [PATCH 375/535] [SPARK-39570][SQL] Inline table should allow expressions with alias ### What changes were proposed in this pull request? `ResolveInlineTables` requires the column expressions to be foldable, however, `Alias` is not foldable. Inline-table does not use the names in the column expressions, and we should trim aliases before checking foldable. We did something similar in `ResolvePivot`. ### Why are the changes needed? To make inline-table handle more cases, and also fixed a regression caused by https://github.com/apache/spark/pull/31844 . After https://github.com/apache/spark/pull/31844 , we always add an alias for function literals like `current_timestamp`, which breaks inline table. ### Does this PR introduce _any_ user-facing change? yea, some failed queries can be run after this PR. ### How was this patch tested? new tests Closes #36967 from cloud-fan/bug. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan (cherry picked from commit 1df992f03fd935ac215424576530ab57d1ab939b) Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/analysis/Analyzer.scala | 7 ++----- .../catalyst/analysis/ResolveInlineTables.scala | 5 +++-- .../analysis/ResolveInlineTablesSuite.scala | 6 +++++- .../resources/sql-tests/inputs/inline-table.sql | 6 ++++++ .../sql-tests/results/inline-table.sql.out | 16 ++++++++++++++++ 5 files changed, 32 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 03f021350a269..37024e15377ef 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -739,7 +739,7 @@ class Analyzer(override val catalogManager: CatalogManager) } } - object ResolvePivot extends Rule[LogicalPlan] { + object ResolvePivot extends Rule[LogicalPlan] with AliasHelper { def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning( _.containsPattern(PIVOT), ruleId) { case p: Pivot if !p.childrenResolved || !p.aggregates.forall(_.resolved) @@ -753,10 +753,7 @@ class Analyzer(override val catalogManager: CatalogManager) aggregates.foreach(checkValidAggregateExpression) // Check all pivot values are literal and match pivot column data type. val evalPivotValues = pivotValues.map { value => - val foldable = value match { - case Alias(v, _) => v.foldable - case _ => value.foldable - } + val foldable = trimAliases(value).foldable if (!foldable) { throw QueryCompilationErrors.nonLiteralPivotValError(value) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala index 91d724dc0135a..b91745a0cca3b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.analysis import scala.util.control.NonFatal import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.AliasHelper import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.AlwaysProcess @@ -28,7 +29,7 @@ import org.apache.spark.sql.types.{StructField, StructType} /** * An analyzer rule that replaces [[UnresolvedInlineTable]] with [[LocalRelation]]. */ -object ResolveInlineTables extends Rule[LogicalPlan] with CastSupport { +object ResolveInlineTables extends Rule[LogicalPlan] with CastSupport with AliasHelper { override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning( AlwaysProcess.fn, ruleId) { case table: UnresolvedInlineTable if table.expressionsResolved => @@ -65,7 +66,7 @@ object ResolveInlineTables extends Rule[LogicalPlan] with CastSupport { table.rows.foreach { row => row.foreach { e => // Note that nondeterministic expressions are not supported since they are not foldable. - if (!e.resolved || !e.foldable) { + if (!e.resolved || !trimAliases(e).foldable) { e.failAnalysis(s"cannot evaluate expression ${e.sql} in inline table definition") } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala index 16d23153c1c53..2e6c6e4eaf4c3 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis import org.scalatest.BeforeAndAfter import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.expressions.{Cast, Literal, Rand} +import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, Literal, Rand} import org.apache.spark.sql.catalyst.expressions.aggregate.Count import org.apache.spark.sql.catalyst.plans.logical.LocalRelation import org.apache.spark.sql.types.{LongType, NullType, TimestampType} @@ -38,6 +38,10 @@ class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter { ResolveInlineTables.validateInputEvaluable( UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1))))) + // Alias is OK + ResolveInlineTables.validateInputEvaluable( + UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(Alias(lit(1), "a")())))) + // nondeterministic (rand) should not work intercept[AnalysisException] { ResolveInlineTables.validateInputEvaluable( diff --git a/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql b/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql index b3ec956cd178e..fd8bb2d837d97 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql @@ -23,6 +23,12 @@ select * from values ("one", 1), ("two", 2L) as data(a, b); -- foldable expressions select * from values ("one", 1 + 0), ("two", 1 + 3L) as data(a, b); +-- expressions with alias +select * from values ("one", 1 as one) as data(a, b); + +-- literal functions +select a from values ("one", current_timestamp) as data(a, b); + -- complex types select * from values ("one", array(0, 1)), ("two", array(2, 3)) as data(a, b); diff --git a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out index 401d684a55b42..d9aa34da6f154 100644 --- a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out @@ -73,6 +73,22 @@ one 1 two 4 +-- !query +select * from values ("one", 1 as one) as data(a, b) +-- !query schema +struct +-- !query output +one 1 + + +-- !query +select a from values ("one", current_timestamp) as data(a, b) +-- !query schema +struct +-- !query output +one + + -- !query select * from values ("one", array(0, 1)), ("two", array(2, 3)) as data(a, b) -- !query schema From 7adb6e2102232447fde0cea010c2b68a602613b5 Mon Sep 17 00:00:00 2001 From: mcdull-zhang Date: Tue, 28 Jun 2022 11:55:51 -0500 Subject: [PATCH 376/535] [SPARK-37753][FOLLOWUP][SQL] Add comments to unit test ### What changes were proposed in this pull request? add comments to unit test. ### Why are the changes needed? code can be hard to understand without comments ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? existing test Closes #37018 from mcdull-zhang/add_reason. Authored-by: mcdull-zhang Signed-off-by: Sean Owen (cherry picked from commit 9fd010be24fcd6d81e05bd08133fd80ba81b97ac) Signed-off-by: Sean Owen --- .../spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index dd727855ce2fe..d5c933fbc8b56 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -711,6 +711,10 @@ class AdaptiveQueryExecSuite test("SPARK-37753: Inhibit broadcast in left outer join when there are many empty" + " partitions on outer/left side") { + // if the right side is completed first and the left side is still being executed, + // the right side does not know whether there are many empty partitions on the left side, + // so there is no demote, and then the right side is broadcast in the planning stage. + // so retry several times here to avoid unit test failure. eventually(timeout(15.seconds), interval(500.milliseconds)) { withSQLConf( SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true", From d3f7f42b2780416b2cf5cb50e522909bb68e8c56 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Wed, 29 Jun 2022 18:09:10 -0500 Subject: [PATCH 377/535] [SPARK-39553][CORE] Multi-thread unregister shuffle shouldn't throw NPE when using Scala 2.13 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This pr add a `shuffleStatus != null` condition to `o.a.s.MapOutputTrackerMaster#unregisterShuffle` method to avoid throwing NPE when using Scala 2.13. ### Why are the changes needed? Ensure that no NPE is thrown when `o.a.s.MapOutputTrackerMaster#unregisterShuffle` is called by multiple threads, this pr is only for Scala 2.13. `o.a.s.MapOutputTrackerMaster#unregisterShuffle` method will be called concurrently by the following two paths: - BlockManagerStorageEndpoint: https://github.com/apache/spark/blob/6f1046afa40096f477b29beecca5ca6286dfa7f3/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala#L56-L62 - ContextCleaner: https://github.com/apache/spark/blob/6f1046afa40096f477b29beecca5ca6286dfa7f3/core/src/main/scala/org/apache/spark/ContextCleaner.scala#L234-L241 When test with Scala 2.13, for example `sql/core` module, there are many log as follows,although these did not cause UTs failure: ``` 17:44:09.957 WARN org.apache.spark.storage.BlockManagerMaster: Failed to remove shuffle 87 - null java.lang.NullPointerException at org.apache.spark.MapOutputTrackerMaster.$anonfun$unregisterShuffle$1(MapOutputTracker.scala:882) at org.apache.spark.MapOutputTrackerMaster.$anonfun$unregisterShuffle$1$adapted(MapOutputTracker.scala:881) at scala.Option.foreach(Option.scala:437) at org.apache.spark.MapOutputTrackerMaster.unregisterShuffle(MapOutputTracker.scala:881) at org.apache.spark.storage.BlockManagerStorageEndpoint$$anonfun$receiveAndReply$1.$anonfun$applyOrElse$3(BlockManagerStorageEndpoint.scala:59) at scala.runtime.java8.JFunction0$mcZ$sp.apply(JFunction0$mcZ$sp.scala:17) at org.apache.spark.storage.BlockManagerStorageEndpoint.$anonfun$doAsync$1(BlockManagerStorageEndpoint.scala:89) at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:678) at scala.concurrent.impl.Promise$Transformation.run(Promise.scala:467) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 17:44:09.958 ERROR org.apache.spark.ContextCleaner: Error cleaning shuffle 94 java.lang.NullPointerException at org.apache.spark.MapOutputTrackerMaster.$anonfun$unregisterShuffle$1(MapOutputTracker.scala:882) at org.apache.spark.MapOutputTrackerMaster.$anonfun$unregisterShuffle$1$adapted(MapOutputTracker.scala:881) at scala.Option.foreach(Option.scala:437) at org.apache.spark.MapOutputTrackerMaster.unregisterShuffle(MapOutputTracker.scala:881) at org.apache.spark.ContextCleaner.doCleanupShuffle(ContextCleaner.scala:241) at org.apache.spark.ContextCleaner.$anonfun$keepCleaning$3(ContextCleaner.scala:202) at org.apache.spark.ContextCleaner.$anonfun$keepCleaning$3$adapted(ContextCleaner.scala:195) at scala.Option.foreach(Option.scala:437) at org.apache.spark.ContextCleaner.$anonfun$keepCleaning$1(ContextCleaner.scala:195) at org.apache.spark.util.Utils$.tryOrStopSparkContext(Utils.scala:1432) at org.apache.spark.ContextCleaner.org$apache$spark$ContextCleaner$$keepCleaning(ContextCleaner.scala:189) at org.apache.spark.ContextCleaner$$anon$1.run(ContextCleaner.scala:79) ``` I think this is a bug of Scala 2.13.8 and already submit an issue to https://github.com/scala/bug/issues/12613, this PR is only for protection, we should remove this protection after Scala 2.13(maybe https://github.com/scala/scala/pull/9957) fixes this issue. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Pass GA - Add new test `SPARK-39553: Multi-thread unregister shuffle shouldn't throw NPE` to `MapOutputTrackerSuite`, we can test manually as follows: ``` dev/change-scala-version.sh 2.13 mvn clean install -DskipTests -pl core -am -Pscala-2.13 mvn clean test -pl core -Pscala-2.13 -Dtest=none -DwildcardSuites=org.apache.spark.MapOutputTrackerSuite ``` **Before** ``` - SPARK-39553: Multi-thread unregister shuffle shouldn't throw NPE *** FAILED *** 3 did not equal 0 (MapOutputTrackerSuite.scala:971) Run completed in 17 seconds, 505 milliseconds. Total number of tests run: 25 Suites: completed 2, aborted 0 Tests: succeeded 24, failed 1, canceled 0, ignored 1, pending 0 *** 1 TEST FAILED *** ``` **After** ``` - SPARK-39553: Multi-thread unregister shuffle shouldn't throw NPE Run completed in 17 seconds, 996 milliseconds. Total number of tests run: 25 Suites: completed 2, aborted 0 Tests: succeeded 25, failed 0, canceled 0, ignored 1, pending 0 All tests passed. ``` Closes #37024 from LuciferYang/SPARK-39553. Authored-by: yangjie01 Signed-off-by: Sean Owen (cherry picked from commit 29258964cae45cea43617ade971fb4ea9fe2902a) Signed-off-by: Sean Owen --- .../org/apache/spark/MapOutputTracker.scala | 8 +++-- .../apache/spark/MapOutputTrackerSuite.scala | 34 +++++++++++++++++++ 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala index e6ed469250b47..b1974948430a1 100644 --- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala +++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala @@ -879,8 +879,12 @@ private[spark] class MapOutputTrackerMaster( /** Unregister shuffle data */ def unregisterShuffle(shuffleId: Int): Unit = { shuffleStatuses.remove(shuffleId).foreach { shuffleStatus => - shuffleStatus.invalidateSerializedMapOutputStatusCache() - shuffleStatus.invalidateSerializedMergeOutputStatusCache() + // SPARK-39553: Add protection for Scala 2.13 due to https://github.com/scala/bug/issues/12613 + // We should revert this if Scala 2.13 solves this issue. + if (shuffleStatus != null) { + shuffleStatus.invalidateSerializedMapOutputStatusCache() + shuffleStatus.invalidateSerializedMergeOutputStatusCache() + } } } diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala index 5e502eb568759..a13527f4b74c2 100644 --- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala +++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark +import java.util.concurrent.atomic.LongAdder + import scala.collection.mutable.ArrayBuffer import org.mockito.ArgumentMatchers.any @@ -938,4 +940,36 @@ class MapOutputTrackerSuite extends SparkFunSuite with LocalSparkContext { assert(worker.shufflePushMergerLocations.isEmpty) } } + + test("SPARK-39553: Multi-thread unregister shuffle shouldn't throw NPE") { + val rpcEnv = createRpcEnv("test") + val tracker = newTrackerMaster() + tracker.trackerEndpoint = rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME, + new MapOutputTrackerMasterEndpoint(rpcEnv, tracker, conf)) + val shuffleIdRange = 0 until 100 + shuffleIdRange.foreach { shuffleId => + tracker.registerShuffle(shuffleId, 2, MergeStatus.SHUFFLE_PUSH_DUMMY_NUM_REDUCES) + } + val npeCounter = new LongAdder() + // More threads will help to reproduce the problem + val threads = new Array[Thread](5) + threads.indices.foreach { i => + threads(i) = new Thread() { + override def run(): Unit = { + shuffleIdRange.foreach { shuffleId => + try { + tracker.unregisterShuffle(shuffleId) + } catch { + case _: NullPointerException => npeCounter.increment() + } + } + } + } + } + threads.foreach(_.start()) + threads.foreach(_.join()) + tracker.stop() + rpcEnv.shutdown() + assert(npeCounter.intValue() == 0) + } } From 18000fd0e20787b44b930296556483f3fb419a8f Mon Sep 17 00:00:00 2001 From: Prashant Singh Date: Thu, 30 Jun 2022 17:16:32 -0700 Subject: [PATCH 378/535] [SPARK-39633][SQL] Support timestamp in seconds for TimeTravel using Dataframe options ### What changes were proposed in this pull request? Support timestamp in seconds for TimeTravel using Dataframe options ### Why are the changes needed? To have a parity in doing TimeTravel via SQL and Dataframe option. SPARK-SQL supports queries like : ```sql SELECT * from {table} TIMESTAMP AS OF 1548751078 ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added new UTs for testing the behaviour. Closes #37025 from singhpk234/fix/timetravel_df_options. Authored-by: Prashant Singh Signed-off-by: huaxingao (cherry picked from commit 44e2657f3d511c25135c95dc3d584c540d227b5b) Signed-off-by: huaxingao --- .../execution/datasources/v2/DataSourceV2Utils.scala | 12 ++++++++++-- .../spark/sql/connector/DataSourceV2SQLSuite.scala | 11 +++++++++++ .../sql/connector/SupportsCatalogOptionsSuite.scala | 7 +++++++ 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala index f69a2a4588602..7fd61c44fd160 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SessionConfigSuppo import org.apache.spark.sql.connector.catalog.TableCapability.BATCH_READ import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.types.{LongType, StructType} import org.apache.spark.sql.util.CaseInsensitiveStringMap private[sql] object DataSourceV2Utils extends Logging { @@ -124,7 +124,15 @@ private[sql] object DataSourceV2Utils extends Logging { val timestamp = hasCatalog.extractTimeTravelTimestamp(dsOptions) val timeTravelVersion = if (version.isPresent) Some(version.get) else None - val timeTravelTimestamp = if (timestamp.isPresent) Some(Literal(timestamp.get)) else None + val timeTravelTimestamp = if (timestamp.isPresent) { + if (timestamp.get.forall(_.isDigit)) { + Some(Literal(timestamp.get.toLong, LongType)) + } else { + Some(Literal(timestamp.get)) + } + } else { + None + } val timeTravel = TimeTravelSpec.create(timeTravelTimestamp, timeTravelVersion, conf) (CatalogV2Util.loadTable(catalog, ident, timeTravel).get, Some(catalog), Some(ident)) case _ => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index b64ed080d8bf1..675dd2807ca2b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -21,6 +21,7 @@ import java.sql.Timestamp import java.time.{Duration, LocalDate, Period} import scala.collection.JavaConverters._ +import scala.concurrent.duration.MICROSECONDS import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.InternalRow @@ -2691,6 +2692,8 @@ class DataSourceV2SQLSuite val ts2 = DateTimeUtils.stringToTimestampAnsi( UTF8String.fromString("2021-01-29 00:00:00"), DateTimeUtils.getZoneId(SQLConf.get.sessionLocalTimeZone)) + val ts1InSeconds = MICROSECONDS.toSeconds(ts1).toString + val ts2InSeconds = MICROSECONDS.toSeconds(ts2).toString val t3 = s"testcat.t$ts1" val t4 = s"testcat.t$ts2" @@ -2707,6 +2710,14 @@ class DataSourceV2SQLSuite === Array(Row(5), Row(6))) assert(sql("SELECT * FROM t TIMESTAMP AS OF '2021-01-29 00:00:00'").collect === Array(Row(7), Row(8))) + assert(sql(s"SELECT * FROM t TIMESTAMP AS OF $ts1InSeconds").collect + === Array(Row(5), Row(6))) + assert(sql(s"SELECT * FROM t TIMESTAMP AS OF $ts2InSeconds").collect + === Array(Row(7), Row(8))) + assert(sql(s"SELECT * FROM t FOR SYSTEM_TIME AS OF $ts1InSeconds").collect + === Array(Row(5), Row(6))) + assert(sql(s"SELECT * FROM t FOR SYSTEM_TIME AS OF $ts2InSeconds").collect + === Array(Row(7), Row(8))) assert(sql("SELECT * FROM t TIMESTAMP AS OF make_date(2021, 1, 29)").collect === Array(Row(7), Row(8))) assert(sql("SELECT * FROM t TIMESTAMP AS OF to_timestamp('2021-01-29 00:00:00')").collect diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala index 473f679b4b99d..8d771b0736772 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.connector import java.util.Optional +import scala.concurrent.duration.MICROSECONDS import scala.language.implicitConversions import scala.util.Try @@ -322,6 +323,12 @@ class SupportsCatalogOptionsSuite extends QueryTest with SharedSparkSession with timestamp = Some("2019-01-29 00:37:58")), df3.toDF()) checkAnswer(load("t", Some(catalogName), version = None, timestamp = Some("2021-01-29 00:37:58")), df4.toDF()) + + // load with timestamp in number format + checkAnswer(load("t", Some(catalogName), version = None, + timestamp = Some(MICROSECONDS.toSeconds(ts1).toString)), df3.toDF()) + checkAnswer(load("t", Some(catalogName), version = None, + timestamp = Some(MICROSECONDS.toSeconds(ts2).toString)), df4.toDF()) } val e = intercept[AnalysisException] { From 7e2a1827757a8c0e356ab795387f094e81f5f37e Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Thu, 30 Jun 2022 23:18:06 -0700 Subject: [PATCH 379/535] [SPARK-37205][FOLLOWUP] Should call non-static setTokensConf method ### What changes were proposed in this pull request? This fixes a bug in the original SPARK-37205 PR, where we treat the method `setTokensConf` as a static method, but it should be non-static instead. ### Why are the changes needed? The method `setTokensConf` is non-static so the current code will fail: ``` 06/29/2022 - 17:28:16 - Exception in thread "main" java.lang.IllegalArgumentException: object is not an instance of declaring class 06/29/2022 - 17:28:16 - at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 06/29/2022 - 17:28:16 - at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) 06/29/2022 - 17:28:16 - at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 06/29/2022 - 17:28:16 - at java.base/java.lang.reflect.Method.invoke(Method.java:566) ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manually tested this change internally and it now works. Closes #37037 from sunchao/SPARK-37205-fix. Authored-by: Chao Sun Signed-off-by: Chao Sun --- .../src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index f364b79216098..5402c503908ce 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -391,7 +391,7 @@ private[spark] class Client( throw new SparkException(s"Cannot find setTokensConf method in ${amContainer.getClass}." + s" Please check YARN version and make sure it is 2.9+ or 3.x") } - setTokensConfMethod.invoke(ByteBuffer.wrap(dob.getData)) + setTokensConfMethod.invoke(amContainer, ByteBuffer.wrap(dob.getData)) } } From f96b96d7971a03740a15c029bb76e87fb99306ad Mon Sep 17 00:00:00 2001 From: Daniel Tenedorio Date: Fri, 1 Jul 2022 16:49:50 +0800 Subject: [PATCH 380/535] [SPARK-38796][SQL] Update documentation for number format strings with the {try_}to_number functions ### What changes were proposed in this pull request? Update documentation for number format strings with the `{try_}to_number` functions. ### Why are the changes needed? The existing documentation is incomplete. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Built the doc site locally to provide the following initial screenshot: image Closes #36950 from dtenedor/number-docs. Lead-authored-by: Daniel Tenedorio Co-authored-by: Wenchen Fan Signed-off-by: Wenchen Fan (cherry picked from commit 09d1bae95be2af01da65573d57867346f3833907) Signed-off-by: Wenchen Fan --- docs/sql-ref-number-pattern.md | 166 ++++++++++++++++++++++++++++++++- 1 file changed, 165 insertions(+), 1 deletion(-) diff --git a/docs/sql-ref-number-pattern.md b/docs/sql-ref-number-pattern.md index dc7d696e32fb1..dd4997a6aec5e 100644 --- a/docs/sql-ref-number-pattern.md +++ b/docs/sql-ref-number-pattern.md @@ -19,4 +19,168 @@ license: | limitations under the License. --- -TODO: Add the content of Number Patterns for Formatting and Parsing +### Description + +Functions such as `to_number` and `to_char` support converting between values of string and +Decimal type. Such functions accept format strings indicating how to map between these types. + +### Syntax + +Number format strings support the following syntax: + +``` + { ' [ MI | S ] [ $ ] + [ 0 | 9 | G | , ] [...] + [ . | D ] + [ 0 | 9 ] [...] + [ $ ] [ PR | MI | S ] ' } +``` + +### Elements + +Each number format string can contain the following elements (case insensitive): + +- **`0`** or **`9`** + + Specifies an expected digit between `0` and `9`. + + A sequence of 0 or 9 in the format string matches a sequence of digits with the same or smaller + size. If the 0/9 sequence starts with 0 and is before the decimal point, it requires matching the + number of digits: when parsing, it matches only a digit sequence of the same size; when + formatting, the result string adds left-padding with zeros to the digit sequence to reach the + same size. Otherwise, the 0/9 sequence matches any digit sequence with the same or smaller size + when parsing, and pads the digit sequence with spaces in the result string when formatting. Note + that the digit sequence will become a '#' sequence if the size is larger than the 0/9 sequence. + +- **`.`** or **`D`** + + Specifies the position of the decimal point. This character may only be specified once. + + When parsing, the input string does not need to include a decimal point. + +- **`,`** or **`G`** + + Specifies the position of the `,` grouping (thousands) separator. + + There must be a `0` or `9` to the left and right of each grouping separator. When parsing, + the input string must match the grouping separator relevant for the size of the number. + +- **`$`** + + Specifies the location of the `$` currency sign. This character may only be specified once. + +- **`S`** + + Specifies the position of an optional '+' or '-' sign. This character may only be specified once. + +- **`MI`** + + Specifies the position of an optional '-' sign (no '+'). This character may only be specified once. + + When formatting, it prints a space for positive values. + +- **`PR`** + + Maps negative input values to wrapping angle brackets (`<1>`) in the corresponding string. + + Positive input values do not receive wrapping angle brackets. + +### Function types and error handling + +* The `to_number` function accepts an input string and a format string argument. It requires that +the input string matches the provided format and raises an error otherwise. The function then +returns the corresponding Decimal value. +* The `try_to_number` function accepts an input string and a format string argument. It works the +same as the `to_number` function except that it returns NULL instead of raising an error if the +input string does not match the given number format. +* The `to_char` function accepts an input decimal and a format string argument. It requires that +the input decimal matches the provided format and raises an error otherwise. The function then +returns the corresponding string value. +* All functions will fail if the given format string is invalid. + +### Examples + +The following examples use the `to_number`, `try_to_number`, `to_char`, and `try_to_char` SQL +functions. + +Note that the format string used in most of these examples expects: +* an optional sign at the beginning, +* followed by a dollar sign, +* followed by a number between 3 and 6 digits long, +* thousands separators, +* up to two digits beyond the decimal point. + +#### The `to_number` function + +```sql +-- The negative number with currency symbol maps to characters in the format string. +> SELECT to_number('-$12,345.67', 'S$999,099.99'); + -12345.67 + +-- The '$' sign is not optional. +> SELECT to_number('5', '$9'); + Error: the input string does not match the given number format + +-- The plus sign is optional, and so are fractional digits. +> SELECT to_number('$345', 'S$999,099.99'); + 345.00 + +-- The format requires at least three digits. +> SELECT to_number('$45', 'S$999,099.99'); + Error: the input string does not match the given number format + +-- The format requires at least three digits. +> SELECT to_number('$045', 'S$999,099.99'); + 45.00 + +-- MI indicates an optional minus sign at the beginning or end of the input string. +> SELECT to_number('1234-', '999999MI'); + -1234 + +-- PR indicates optional wrapping angel brakets. +> SELECT to_number('9', '999PR') + 9 +``` + +#### The `try_to_number` function: + +```sql +-- The '$' sign is not optional. +> SELECT try_to_number('5', '$9'); + NULL + +-- The format requires at least three digits. +> SELECT try_to_number('$45', 'S$999,099.99'); + NULL +``` + +#### The `to_char` function: + +```sql +> SELECT to_char(decimal(454), '999'); + "454" + +-- '99' can format digit sequence with a smaller size. +> SELECT to_char(decimal(1), '99.9'); + " 1.0" + +-- '000' left-pads 0 for digit sequence with a smaller size. +> SELECT to_char(decimal(45.00), '000.00'); + "045.00" + +> SELECT to_char(decimal(12454), '99,999'); + "12,454" + +-- digit sequence with a larger size leads to '#' sequence. +> SELECT to_char(decimal(78.12), '$9.99'); + "$#.##" + +-- 'S' can be at the end. +> SELECT try_to_char(decimal(-12454.8), '99,999.9S'); + "12,454.8-" + +> SELECT try_to_char(decimal(12454.8), 'L99,999.9'); + Error: cannot resolve 'try_to_char(Decimal(12454.8), 'L99,999.9')' due to data type mismatch: + Unexpected character 'L' found in the format string 'L99,999.9'; the structure of the format + string must match: [MI|S] [$] [0|9|G|,]* [.|D] [0|9]* [$] [PR|MI|S]; line 1 pos 25 +``` From 2707a5ab5225cd24f5ca2bfee024d04f44904f80 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Fri, 1 Jul 2022 09:45:28 -0700 Subject: [PATCH 381/535] Revert "[SPARK-37205][FOLLOWUP] Should call non-static setTokensConf method" This reverts commit 7e2a1827757a8c0e356ab795387f094e81f5f37e. --- .../src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 5402c503908ce..f364b79216098 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -391,7 +391,7 @@ private[spark] class Client( throw new SparkException(s"Cannot find setTokensConf method in ${amContainer.getClass}." + s" Please check YARN version and make sure it is 2.9+ or 3.x") } - setTokensConfMethod.invoke(amContainer, ByteBuffer.wrap(dob.getData)) + setTokensConfMethod.invoke(ByteBuffer.wrap(dob.getData)) } } From 27f78e6672e4ca5449539f88793fd6f76c8e1df2 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Fri, 1 Jul 2022 13:46:55 -0700 Subject: [PATCH 382/535] [SPARK-39657][YARN] YARN AM client should call the non-static setTokensConf method ### What changes were proposed in this pull request? This fixes a bug in the original SPARK-37205 PR, where we treat the method `setTokensConf` as a static method, but it should be non-static instead. ### Why are the changes needed? The method `setTokensConf` is non-static so the current code will fail: ``` 06/29/2022 - 17:28:16 - Exception in thread "main" java.lang.IllegalArgumentException: object is not an instance of declaring class 06/29/2022 - 17:28:16 - at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 06/29/2022 - 17:28:16 - at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) 06/29/2022 - 17:28:16 - at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 06/29/2022 - 17:28:16 - at java.base/java.lang.reflect.Method.invoke(Method.java:566) ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manually tested this change internally and it now works. Closes #37050 from sunchao/SPARK-39657. Authored-by: Chao Sun Signed-off-by: Dongjoon Hyun (cherry picked from commit 6624d91c9644526f1cb6fdfb4677604b40aa786f) Signed-off-by: Dongjoon Hyun --- .../src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index f364b79216098..5402c503908ce 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -391,7 +391,7 @@ private[spark] class Client( throw new SparkException(s"Cannot find setTokensConf method in ${amContainer.getClass}." + s" Please check YARN version and make sure it is 2.9+ or 3.x") } - setTokensConfMethod.invoke(ByteBuffer.wrap(dob.getData)) + setTokensConfMethod.invoke(amContainer, ByteBuffer.wrap(dob.getData)) } } From 463a24d9afdaefabfa9f1129350b38e69ccd062d Mon Sep 17 00:00:00 2001 From: Jungtaek Lim Date: Sat, 2 Jul 2022 22:46:03 +0900 Subject: [PATCH 383/535] [SPARK-39650][SS] Fix incorrect value schema in streaming deduplication with backward compatibility ### What changes were proposed in this pull request? This PR proposes to fix the incorrect value schema in streaming deduplication. It stores the empty row having a single column with null (using NullType), but the value schema is specified as all columns, which leads incorrect behavior from state store schema compatibility checker. This PR proposes to set the schema of value as `StructType(Array(StructField("__dummy__", NullType)))` to fit with the empty row. With this change, the streaming queries creating the checkpoint after this fix would work smoothly. To not break the existing streaming queries having incorrect value schema, this PR proposes to disable the check for value schema on streaming deduplication. Disabling the value check was there for the format validation (we have two different checkers for state store), but it has been missing for state store schema compatibility check. To avoid adding more config, this PR leverages the existing config "format validation" is using. ### Why are the changes needed? This is a bug fix. Suppose the streaming query below: ``` # df has the columns `a`, `b`, `c` val df = spark.readStream.format("...").load() val query = df.dropDuplicate("a").writeStream.format("...").start() ``` while the query is running, df can produce a different set of columns (e.g. `a`, `b`, `c`, `d`) from the same source due to schema evolution. Since we only deduplicate the rows with column `a`, the change of schema should not matter for streaming deduplication, but state store schema checker throws error saying "value schema is not compatible" before this fix. ### Does this PR introduce _any_ user-facing change? No, this is basically a bug fix which end users wouldn't notice unless they encountered a bug. ### How was this patch tested? New tests. Closes #37041 from HeartSaVioR/SPARK-39650. Authored-by: Jungtaek Lim Signed-off-by: Jungtaek Lim (cherry picked from commit fe536033bdd00d921b3c86af329246ca55a4f46a) Signed-off-by: Jungtaek Lim --- .../StateSchemaCompatibilityChecker.scala | 26 +++++-- .../streaming/state/StateStore.scala | 7 +- .../streaming/state/StateStoreConf.scala | 7 +- .../streaming/statefulOperators.scala | 4 +- .../commits/.0.crc | Bin 0 -> 12 bytes .../commits/.1.crc | Bin 0 -> 12 bytes .../commits/0 | 2 + .../commits/1 | 2 + .../metadata | 1 + .../offsets/.0.crc | Bin 0 -> 16 bytes .../offsets/.1.crc | Bin 0 -> 16 bytes .../offsets/0 | 3 + .../offsets/1 | 3 + .../state/0/0/.1.delta.crc | Bin 0 -> 12 bytes .../state/0/0/.2.delta.crc | Bin 0 -> 12 bytes .../state/0/0/1.delta | Bin 0 -> 77 bytes .../state/0/0/2.delta | Bin 0 -> 46 bytes .../state/0/0/_metadata/.schema.crc | Bin 0 -> 12 bytes .../state/0/0/_metadata/schema | Bin 0 -> 254 bytes .../state/0/1/.1.delta.crc | Bin 0 -> 12 bytes .../state/0/1/.2.delta.crc | Bin 0 -> 12 bytes .../state/0/1/1.delta | Bin 0 -> 46 bytes .../state/0/1/2.delta | Bin 0 -> 77 bytes .../state/0/2/.1.delta.crc | Bin 0 -> 12 bytes .../state/0/2/.2.delta.crc | Bin 0 -> 12 bytes .../state/0/2/1.delta | Bin 0 -> 46 bytes .../state/0/2/2.delta | Bin 0 -> 46 bytes .../state/0/3/.1.delta.crc | Bin 0 -> 12 bytes .../state/0/3/.2.delta.crc | Bin 0 -> 12 bytes .../state/0/3/1.delta | Bin 0 -> 46 bytes .../state/0/3/2.delta | Bin 0 -> 46 bytes .../state/0/4/.1.delta.crc | Bin 0 -> 12 bytes .../state/0/4/.2.delta.crc | Bin 0 -> 12 bytes .../state/0/4/1.delta | Bin 0 -> 46 bytes .../state/0/4/2.delta | Bin 0 -> 46 bytes ...StateSchemaCompatibilityCheckerSuite.scala | 49 ++++++++---- .../StreamingDeduplicationSuite.scala | 70 ++++++++++++++++++ 37 files changed, 152 insertions(+), 22 deletions(-) create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/.0.crc create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/.1.crc create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/0 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/1 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/metadata create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/offsets/.0.crc create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/offsets/.1.crc create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/offsets/0 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/offsets/1 create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/.1.delta.crc create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/.2.delta.crc create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/1.delta create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/2.delta create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/_metadata/.schema.crc create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/_metadata/schema create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/1/.1.delta.crc create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/1/.2.delta.crc create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/1/1.delta create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/1/2.delta create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/2/.1.delta.crc create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/2/.2.delta.crc create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/2/1.delta create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/2/2.delta create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/3/.1.delta.crc create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/3/.2.delta.crc create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/3/1.delta create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/3/2.delta create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/4/.1.delta.crc create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/4/.2.delta.crc create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/4/1.delta create mode 100644 sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/4/2.delta diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala index 0c8cabb75ed65..80384f8cb3b93 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala @@ -41,20 +41,34 @@ class StateSchemaCompatibilityChecker( fm.mkdirs(schemaFileLocation.getParent) def check(keySchema: StructType, valueSchema: StructType): Unit = { + check(keySchema, valueSchema, ignoreValueSchema = false) + } + + def check(keySchema: StructType, valueSchema: StructType, ignoreValueSchema: Boolean): Unit = { if (fm.exists(schemaFileLocation)) { logDebug(s"Schema file for provider $providerId exists. Comparing with provided schema.") val (storedKeySchema, storedValueSchema) = readSchemaFile() - if (storedKeySchema.equals(keySchema) && storedValueSchema.equals(valueSchema)) { + if (storedKeySchema.equals(keySchema) && + (ignoreValueSchema || storedValueSchema.equals(valueSchema))) { // schema is exactly same } else if (!schemasCompatible(storedKeySchema, keySchema) || - !schemasCompatible(storedValueSchema, valueSchema)) { + (!ignoreValueSchema && !schemasCompatible(storedValueSchema, valueSchema))) { + val errorMsgForKeySchema = s"- Provided key schema: $keySchema\n" + + s"- Existing key schema: $storedKeySchema\n" + + // If it is requested to skip checking the value schema, we also don't expose the value + // schema information to the error message. + val errorMsgForValueSchema = if (!ignoreValueSchema) { + s"- Provided value schema: $valueSchema\n" + + s"- Existing value schema: $storedValueSchema\n" + } else { + "" + } val errorMsg = "Provided schema doesn't match to the schema for existing state! " + "Please note that Spark allow difference of field name: check count of fields " + "and data type of each field.\n" + - s"- Provided key schema: $keySchema\n" + - s"- Provided value schema: $valueSchema\n" + - s"- Existing key schema: $storedKeySchema\n" + - s"- Existing value schema: $storedValueSchema\n" + + errorMsgForKeySchema + + errorMsgForValueSchema + s"If you want to force running query without schema validation, please set " + s"${SQLConf.STATE_SCHEMA_CHECK_ENABLED.key} to false.\n" + "Please note running query with incompatible schema could cause indeterministic" + diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala index 5020638abc425..5d65c8e9f20b4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala @@ -511,7 +511,12 @@ object StateStore extends Logging { val checker = new StateSchemaCompatibilityChecker(storeProviderId, hadoopConf) // regardless of configuration, we check compatibility to at least write schema file // if necessary - val ret = Try(checker.check(keySchema, valueSchema)).toEither.fold(Some(_), _ => None) + // if the format validation for value schema is disabled, we also disable the schema + // compatibility checker for value schema as well. + val ret = Try( + checker.check(keySchema, valueSchema, + ignoreValueSchema = !storeConf.formatValidationCheckValue) + ).toEither.fold(Some(_), _ => None) if (storeConf.stateSchemaCheckEnabled) { ret } else { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala index 529db2609cd45..66bb37d7a57bd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala @@ -48,7 +48,12 @@ class StateStoreConf( /** Whether validate the underlying format or not. */ val formatValidationEnabled: Boolean = sqlConf.stateStoreFormatValidationEnabled - /** Whether validate the value format when the format invalidation enabled. */ + /** + * Whether to validate the value side. This config is applied to both validators as below: + * + * - whether to validate the value format when the format validation is enabled. + * - whether to validate the value schema when the state schema check is enabled. + */ val formatValidationCheckValue: Boolean = extraOptions.getOrElse(StateStoreConf.FORMAT_VALIDATION_CHECK_VALUE_CONFIG, "true") == "true" diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala index e2a0598644258..2b8fc6515618d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala @@ -783,13 +783,15 @@ case class StreamingDeduplicateExec( keyExpressions, getStateInfo, conf) :: Nil } + private val schemaForEmptyRow: StructType = StructType(Array(StructField("__dummy__", NullType))) + override protected def doExecute(): RDD[InternalRow] = { metrics // force lazy init at driver child.execute().mapPartitionsWithStateStore( getStateInfo, keyExpressions.toStructType, - child.output.toStructType, + schemaForEmptyRow, numColsPrefixKey = 0, session.sessionState, Some(session.streams.stateStoreCoordinator), diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/.0.crc new file mode 100644 index 0000000000000000000000000000000000000000..1aee7033161ecac53eda98ef9b64746c31483c89 GIT binary patch literal 12 TcmYc;N@ieSU}E^Jwf`;v6eR=n literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/.1.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/.1.crc new file mode 100644 index 0000000000000000000000000000000000000000..1aee7033161ecac53eda98ef9b64746c31483c89 GIT binary patch literal 12 TcmYc;N@ieSU}E^Jwf`;v6eR=n literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/0 new file mode 100644 index 0000000000000..9c1e3021c3ead --- /dev/null +++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/0 @@ -0,0 +1,2 @@ +v1 +{"nextBatchWatermarkMs":0} \ No newline at end of file diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/1 new file mode 100644 index 0000000000000..9c1e3021c3ead --- /dev/null +++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/commits/1 @@ -0,0 +1,2 @@ +v1 +{"nextBatchWatermarkMs":0} \ No newline at end of file diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/metadata b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/metadata new file mode 100644 index 0000000000000..78bd74a789fcc --- /dev/null +++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/metadata @@ -0,0 +1 @@ +{"id":"33e8de33-00b8-4b60-8246-df2f433257ff"} \ No newline at end of file diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/offsets/.0.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/offsets/.0.crc new file mode 100644 index 0000000000000000000000000000000000000000..726c678bc6a292057ba9ba7f414c1237c614317d GIT binary patch literal 16 XcmYc;N@ieSU}D&{nl;z&)w+BDBM%^ literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/offsets/0 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/offsets/0 new file mode 100644 index 0000000000000..443c682435801 --- /dev/null +++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/offsets/0 @@ -0,0 +1,3 @@ +v1 +{"batchWatermarkMs":0,"batchTimestampMs":1656644489789,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5"}} +0 \ No newline at end of file diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/offsets/1 b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/offsets/1 new file mode 100644 index 0000000000000..67b4217556378 --- /dev/null +++ b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/offsets/1 @@ -0,0 +1,3 @@ +v1 +{"batchWatermarkMs":0,"batchTimestampMs":1656644492462,"conf":{"spark.sql.streaming.stateStore.providerClass":"org.apache.spark.sql.execution.streaming.state.HDFSBackedStateStoreProvider","spark.sql.streaming.join.stateFormatVersion":"2","spark.sql.streaming.stateStore.compression.codec":"lz4","spark.sql.streaming.stateStore.rocksdb.formatVersion":"5","spark.sql.streaming.statefulOperator.useStrictDistribution":"true","spark.sql.streaming.flatMapGroupsWithState.stateFormatVersion":"2","spark.sql.streaming.multipleWatermarkPolicy":"min","spark.sql.streaming.aggregation.stateFormatVersion":"2","spark.sql.shuffle.partitions":"5"}} +1 \ No newline at end of file diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/.1.delta.crc new file mode 100644 index 0000000000000000000000000000000000000000..1992982c58ff232b862a5e00e92235b8895264db GIT binary patch literal 12 TcmYc;N@ieSU}BKGxWp0w5!eF) literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/.2.delta.crc new file mode 100644 index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0 GIT binary patch literal 12 TcmYc;N@ieSU}A7peP;>)5flQ* literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/1.delta new file mode 100644 index 0000000000000000000000000000000000000000..fec40e83a5471a5624119611a69d7bfdfc01a875 GIT binary patch literal 77 zcmeZ?GI7euPtI0VW?*120pe+5Z`p%^v;+eq!(>JvLjZ^qfdoGTg9rmV6GH&Qe<1LI K>JtUZ!T|s+-3_$> literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/2.delta new file mode 100644 index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d GIT binary patch literal 46 icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^ literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/_metadata/.schema.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/_metadata/.schema.crc new file mode 100644 index 0000000000000000000000000000000000000000..022717c6b5016bdd850f6cd11ba7005aa18b2472 GIT binary patch literal 12 TcmYc;N@ieSU}9Ky+`0?^6IlaU literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/_metadata/schema b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/0/_metadata/schema new file mode 100644 index 0000000000000000000000000000000000000000..f132f9601b73a14dc3ce0a5f641c2c91bdd29dd2 GIT binary patch literal 254 zcmZQzDl=kWU|?j3s8%YeEJ#(dQYtPfDorj?(osswOwCCtRBVo9QsRdsD`Y%SRIX|yq22jMa!gv&DXN>bBP Oi%^`FmY7qF={5i=UQx&Z literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/1/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/1/.1.delta.crc new file mode 100644 index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0 GIT binary patch literal 12 TcmYc;N@ieSU}A7peP;>)5flQ* literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/1/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/1/.2.delta.crc new file mode 100644 index 0000000000000000000000000000000000000000..d18b77b93aff2c88bcfd28423b9f0322d1925578 GIT binary patch literal 12 TcmYc;N@ieSU}9kUlr$Fr5x)a2 literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/1/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/1/1.delta new file mode 100644 index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d GIT binary patch literal 46 icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^ literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/1/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/1/2.delta new file mode 100644 index 0000000000000000000000000000000000000000..fcbf8df80f5f9699414708afa88c5515e92a5b12 GIT binary patch literal 77 zcmeZ?GI7euPtI0VW?*120b;l7-}r)ov;+eq!(>JvLjZ`AfCN7Sg9rmV6GH&Qe<1LI K>JtUZ!T|tFSPnM; literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/2/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/2/.1.delta.crc new file mode 100644 index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0 GIT binary patch literal 12 TcmYc;N@ieSU}A7peP;>)5flQ* literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/2/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/2/.2.delta.crc new file mode 100644 index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0 GIT binary patch literal 12 TcmYc;N@ieSU}A7peP;>)5flQ* literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/2/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/2/1.delta new file mode 100644 index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d GIT binary patch literal 46 icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^ literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/2/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/2/2.delta new file mode 100644 index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d GIT binary patch literal 46 icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^ literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/3/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/3/.1.delta.crc new file mode 100644 index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0 GIT binary patch literal 12 TcmYc;N@ieSU}A7peP;>)5flQ* literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/3/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/3/.2.delta.crc new file mode 100644 index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0 GIT binary patch literal 12 TcmYc;N@ieSU}A7peP;>)5flQ* literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/3/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/3/1.delta new file mode 100644 index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d GIT binary patch literal 46 icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^ literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/3/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/3/2.delta new file mode 100644 index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d GIT binary patch literal 46 icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^ literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/4/.1.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/4/.1.delta.crc new file mode 100644 index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0 GIT binary patch literal 12 TcmYc;N@ieSU}A7peP;>)5flQ* literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/4/.2.delta.crc b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/4/.2.delta.crc new file mode 100644 index 0000000000000000000000000000000000000000..cf1d68e2acee3bca2b92320c4bafc702a6539ea0 GIT binary patch literal 12 TcmYc;N@ieSU}A7peP;>)5flQ* literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/4/1.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/4/1.delta new file mode 100644 index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d GIT binary patch literal 46 icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^ literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/4/2.delta b/sql/core/src/test/resources/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/state/0/4/2.delta new file mode 100644 index 0000000000000000000000000000000000000000..6352978051846970ca41a0ca97fd79952105726d GIT binary patch literal 46 icmeZ?GI7euPtF!)VPIeY;oA+q9RGp92POd&g989JFAHe^ literal 0 HcmV?d00001 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala index 1539341359337..7ba18a8140443 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala @@ -231,6 +231,16 @@ class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession { assert((resultKeySchema, resultValueSchema) === (keySchema, valueSchema)) } + test("SPARK-39650: ignore value schema on compatibility check") { + val typeChangedValueSchema = StructType(valueSchema.map(_.copy(dataType = TimestampType))) + verifySuccess(keySchema, valueSchema, keySchema, typeChangedValueSchema, + ignoreValueSchema = true) + + val typeChangedKeySchema = StructType(keySchema.map(_.copy(dataType = TimestampType))) + verifyException(keySchema, valueSchema, typeChangedKeySchema, valueSchema, + ignoreValueSchema = true) + } + private def applyNewSchemaToNestedFieldInKey(newNestedSchema: StructType): StructType = { applyNewSchemaToNestedField(keySchema, newNestedSchema, "key3") } @@ -257,44 +267,57 @@ class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession { dir: String, queryId: UUID, newKeySchema: StructType, - newValueSchema: StructType): Unit = { + newValueSchema: StructType, + ignoreValueSchema: Boolean): Unit = { // in fact, Spark doesn't support online state schema change, so need to check // schema only once for each running of JVM val providerId = StateStoreProviderId( StateStoreId(dir, opId, partitionId), queryId) new StateSchemaCompatibilityChecker(providerId, hadoopConf) - .check(newKeySchema, newValueSchema) + .check(newKeySchema, newValueSchema, ignoreValueSchema = ignoreValueSchema) } private def verifyException( oldKeySchema: StructType, oldValueSchema: StructType, newKeySchema: StructType, - newValueSchema: StructType): Unit = { + newValueSchema: StructType, + ignoreValueSchema: Boolean = false): Unit = { val dir = newDir() val queryId = UUID.randomUUID() - runSchemaChecker(dir, queryId, oldKeySchema, oldValueSchema) + runSchemaChecker(dir, queryId, oldKeySchema, oldValueSchema, + ignoreValueSchema = ignoreValueSchema) val e = intercept[StateSchemaNotCompatible] { - runSchemaChecker(dir, queryId, newKeySchema, newValueSchema) + runSchemaChecker(dir, queryId, newKeySchema, newValueSchema, + ignoreValueSchema = ignoreValueSchema) } - e.getMessage.contains("Provided schema doesn't match to the schema for existing state!") - e.getMessage.contains(newKeySchema.json) - e.getMessage.contains(newValueSchema.json) - e.getMessage.contains(oldKeySchema.json) - e.getMessage.contains(oldValueSchema.json) + assert(e.getMessage.contains("Provided schema doesn't match to the schema for existing state!")) + assert(e.getMessage.contains(newKeySchema.toString())) + assert(e.getMessage.contains(oldKeySchema.toString())) + + if (ignoreValueSchema) { + assert(!e.getMessage.contains(newValueSchema.toString())) + assert(!e.getMessage.contains(oldValueSchema.toString())) + } else { + assert(e.getMessage.contains(newValueSchema.toString())) + assert(e.getMessage.contains(oldValueSchema.toString())) + } } private def verifySuccess( oldKeySchema: StructType, oldValueSchema: StructType, newKeySchema: StructType, - newValueSchema: StructType): Unit = { + newValueSchema: StructType, + ignoreValueSchema: Boolean = false): Unit = { val dir = newDir() val queryId = UUID.randomUUID() - runSchemaChecker(dir, queryId, oldKeySchema, oldValueSchema) - runSchemaChecker(dir, queryId, newKeySchema, newValueSchema) + runSchemaChecker(dir, queryId, oldKeySchema, oldValueSchema, + ignoreValueSchema = ignoreValueSchema) + runSchemaChecker(dir, queryId, newKeySchema, newValueSchema, + ignoreValueSchema = ignoreValueSchema) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala index c1908d95f39e3..0315e03d18784 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala @@ -17,11 +17,16 @@ package org.apache.spark.sql.streaming +import java.io.File + +import org.apache.commons.io.FileUtils + import org.apache.spark.sql.DataFrame import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._ import org.apache.spark.sql.execution.streaming.MemoryStream import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.util.Utils class StreamingDeduplicationSuite extends StateStoreMetricsTest { @@ -413,4 +418,69 @@ class StreamingDeduplicationSuite extends StateStoreMetricsTest { assertStateOperatorCustomMetric("numDroppedDuplicateRows", expected = 1) ) } + + test("SPARK-39650: duplicate with specific keys should allow input to change schema") { + withTempDir { checkpoint => + val dedupeInputData = MemoryStream[(String, Int)] + val dedupe = dedupeInputData.toDS().dropDuplicates("_1") + + testStream(dedupe, Append)( + StartStream(checkpointLocation = checkpoint.getCanonicalPath), + + AddData(dedupeInputData, "a" -> 1), + CheckLastBatch("a" -> 1), + + AddData(dedupeInputData, "a" -> 2, "b" -> 3), + CheckLastBatch("b" -> 3) + ) + + val dedupeInputData2 = MemoryStream[(String, Int, String)] + val dedupe2 = dedupeInputData2.toDS().dropDuplicates("_1") + + // initialize new memory stream with previously executed batches + dedupeInputData2.addData(("a", 1, "dummy")) + dedupeInputData2.addData(Seq(("a", 2, "dummy"), ("b", 3, "dummy"))) + + testStream(dedupe2, Append)( + StartStream(checkpointLocation = checkpoint.getCanonicalPath), + + AddData(dedupeInputData2, ("a", 5, "a"), ("b", 2, "b"), ("c", 9, "c")), + CheckLastBatch(("c", 9, "c")) + ) + } + } + + test("SPARK-39650: recovery from checkpoint having all columns as value schema") { + // NOTE: We are also changing the schema of input compared to the checkpoint. In the checkpoint + // we define the input schema as (String, Int). + val inputData = MemoryStream[(String, Int, String)] + val dedupe = inputData.toDS().dropDuplicates("_1") + + // The fix will land after Spark 3.3.0, hence we can check backward compatibility with + // checkpoint being built from Spark 3.3.0. + val resourceUri = this.getClass.getResource( + "/structured-streaming/checkpoint-version-3.3.0-streaming-deduplication/").toURI + + val checkpointDir = Utils.createTempDir().getCanonicalFile + // Copy the checkpoint to a temp dir to prevent changes to the original. + // Not doing this will lead to the test passing on the first run, but fail subsequent runs. + FileUtils.copyDirectory(new File(resourceUri), checkpointDir) + + inputData.addData(("a", 1, "dummy")) + inputData.addData(("a", 2, "dummy"), ("b", 3, "dummy")) + + testStream(dedupe, Append)( + StartStream(checkpointLocation = checkpointDir.getAbsolutePath), + /* + Note: The checkpoint was generated using the following input in Spark version 3.3.0 + AddData(inputData, ("a", 1)), + CheckLastBatch(("a", 1)), + AddData(inputData, ("a", 2), ("b", 3)), + CheckLastBatch(("b", 3)) + */ + + AddData(inputData, ("a", 5, "a"), ("b", 2, "b"), ("c", 9, "c")), + CheckLastBatch(("c", 9, "c")) + ) + } } From 3e28f338fad66393b6d2f7a2da6ce5eee60a626e Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Tue, 5 Jul 2022 11:31:02 +0800 Subject: [PATCH 384/535] [SPARK-39447][SQL] Avoid AssertionError in AdaptiveSparkPlanExec.doExecuteBroadcast ### What changes were proposed in this pull request? Change `currentPhysicalPlan` to `inputPlan ` when we restore the broadcast exchange for DPP. ### Why are the changes needed? The currentPhysicalPlan can be wrapped with broadcast query stage so it is not safe to match it. For example: The broadcast exchange which is added by DPP is running before than the normal broadcast exchange(e.g. introduced by join). ### Does this PR introduce _any_ user-facing change? yes bug fix ### How was this patch tested? add test Closes #36974 from ulysses-you/inputplan. Authored-by: ulysses-you Signed-off-by: Wenchen Fan (cherry picked from commit c320a5d51b2c8427fc5d6648984bfd266891b451) Signed-off-by: Wenchen Fan --- .../adaptive/AdaptiveSparkPlanExec.scala | 2 +- .../sql/DynamicPartitionPruningSuite.scala | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala index 40d2e1a3a8f46..6c9c0e1cda4e2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala @@ -659,7 +659,7 @@ case class AdaptiveSparkPlanExec( // node to prevent the loss of the `BroadcastExchangeExec` node in DPP subquery. // Here, we also need to avoid to insert the `BroadcastExchangeExec` node when the newPlan is // already the `BroadcastExchangeExec` plan after apply the `LogicalQueryStageStrategy` rule. - val finalPlan = currentPhysicalPlan match { + val finalPlan = inputPlan match { case b: BroadcastExchangeLike if (!newPlan.isInstanceOf[BroadcastExchangeLike]) => b.withNewChildren(Seq(newPlan)) case _ => newPlan diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala index cfdd2e08a79ea..d5498c469c541 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala @@ -1694,6 +1694,25 @@ class DynamicPartitionPruningV1SuiteAEOff extends DynamicPartitionPruningV1Suite class DynamicPartitionPruningV1SuiteAEOn extends DynamicPartitionPruningV1Suite with EnableAdaptiveExecutionSuite { + test("SPARK-39447: Avoid AssertionError in AdaptiveSparkPlanExec.doExecuteBroadcast") { + val df = sql( + """ + |WITH empty_result AS ( + | SELECT * FROM fact_stats WHERE product_id < 0 + |) + |SELECT * + |FROM (SELECT /*+ SHUFFLE_MERGE(fact_sk) */ empty_result.store_id + | FROM fact_sk + | JOIN empty_result + | ON fact_sk.product_id = empty_result.product_id) t2 + | JOIN empty_result + | ON t2.store_id = empty_result.store_id + """.stripMargin) + + checkPartitionPruningPredicate(df, false, false) + checkAnswer(df, Nil) + } + test("SPARK-37995: PlanAdaptiveDynamicPruningFilters should use prepareExecutedPlan " + "rather than createSparkPlan to re-plan subquery") { withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true", From 3f969ada5fecddab272f2abbc849d2591f30f44c Mon Sep 17 00:00:00 2001 From: "Qian.Sun" Date: Tue, 5 Jul 2022 15:40:44 +0900 Subject: [PATCH 385/535] [SPARK-39676][CORE][TESTS] Add task partition id for TaskInfo assertEquals method in JsonProtocolSuite ### What changes were proposed in this pull request? In https://github.com/apache/spark/pull/35185 , task partition id was added in taskInfo. And, JsonProtocolSuite#assertEquals about TaskInfo doesn't have partitionId. ### Why are the changes needed? Should assert partitionId equals or not. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? No need to add unit test. Closes #37081 from dcoliversun/SPARK-39676. Authored-by: Qian.Sun Signed-off-by: Hyukjin Kwon --- .../test/scala/org/apache/spark/util/JsonProtocolSuite.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala index 36b61f67e3b87..3b7929b278ebc 100644 --- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala @@ -790,6 +790,8 @@ private[spark] object JsonProtocolSuite extends Assertions { assert(info1.taskId === info2.taskId) assert(info1.index === info2.index) assert(info1.attemptNumber === info2.attemptNumber) + // The "Partition ID" field was added in Spark 3.3.0 + assert(info1.partitionId === info2.partitionId) assert(info1.launchTime === info2.launchTime) assert(info1.executorId === info2.executorId) assert(info1.host === info2.host) From 4512e0943036d30587ab19a95efb0e66b47dd746 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 5 Jul 2022 18:02:37 +0900 Subject: [PATCH 386/535] Revert "[SPARK-38531][SQL] Fix the condition of "Prune unrequired child index" branch of ColumnPruning" This reverts commit 17c56fc03b8e7269b293d6957c542eab9d723d52. --- .../optimizer/NestedColumnAliasing.scala | 19 ----------- .../sql/catalyst/optimizer/Optimizer.scala | 15 +++++---- .../optimizer/ColumnPruningSuite.scala | 32 ------------------- 3 files changed, 8 insertions(+), 58 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala index 6ba7907fdab4f..977e9b1ab1329 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala @@ -314,25 +314,6 @@ object NestedColumnAliasing { } } -object GeneratorUnrequiredChildrenPruning { - def unapply(plan: LogicalPlan): Option[LogicalPlan] = plan match { - case p @ Project(_, g: Generate) => - val requiredAttrs = p.references ++ g.generator.references - val newChild = ColumnPruning.prunedChild(g.child, requiredAttrs) - val unrequired = g.generator.references -- p.references - val unrequiredIndices = newChild.output.zipWithIndex.filter(t => unrequired.contains(t._1)) - .map(_._2) - if (!newChild.fastEquals(g.child) || - unrequiredIndices.toSet != g.unrequiredChildIndex.toSet) { - Some(p.copy(child = g.copy(child = newChild, unrequiredChildIndex = unrequiredIndices))) - } else { - None - } - case _ => None - } -} - - /** * This prunes unnecessary nested columns from [[Generate]], or [[Project]] -> [[Generate]] */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 02f9a9eb01c07..21903976656b6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -842,12 +842,13 @@ object ColumnPruning extends Rule[LogicalPlan] { e.copy(child = prunedChild(child, e.references)) // prune unrequired references - // There are 2 types of pruning here: - // 1. For attributes in g.child.outputSet that is not used by the generator nor the project, - // we directly remove it from the output list of g.child. - // 2. For attributes that is not used by the project but it is used by the generator, we put - // it in g.unrequiredChildIndex to save memory usage. - case GeneratorUnrequiredChildrenPruning(rewrittenPlan) => rewrittenPlan + case p @ Project(_, g: Generate) if p.references != g.outputSet => + val requiredAttrs = p.references -- g.producedAttributes ++ g.generator.references + val newChild = prunedChild(g.child, requiredAttrs) + val unrequired = g.generator.references -- p.references + val unrequiredIndices = newChild.output.zipWithIndex.filter(t => unrequired.contains(t._1)) + .map(_._2) + p.copy(child = g.copy(child = newChild, unrequiredChildIndex = unrequiredIndices)) // prune unrequired nested fields from `Generate`. case GeneratorNestedColumnAliasing(rewrittenPlan) => rewrittenPlan @@ -907,7 +908,7 @@ object ColumnPruning extends Rule[LogicalPlan] { }) /** Applies a projection only when the child is producing unnecessary attributes */ - def prunedChild(c: LogicalPlan, allReferences: AttributeSet): LogicalPlan = + private def prunedChild(c: LogicalPlan, allReferences: AttributeSet) = if (!c.outputSet.subsetOf(allReferences)) { Project(c.output.filter(allReferences.contains), c) } else { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala index 0101c855152d6..0655acbcb1bab 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala @@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.optimizer.NestedColumnAliasingSuite.collectGeneratedAliases import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor @@ -460,35 +459,4 @@ class ColumnPruningSuite extends PlanTest { val correctAnswer1 = Project(Seq('a), input).analyze comparePlans(Optimize.execute(plan1.analyze), correctAnswer1) } - - test("SPARK-38531: Nested field pruning for Project and PosExplode") { - val name = StructType.fromDDL("first string, middle string, last string") - val employer = StructType.fromDDL("id int, company struct") - val contact = LocalRelation( - 'id.int, - 'name.struct(name), - 'address.string, - 'friends.array(name), - 'relatives.map(StringType, name), - 'employer.struct(employer)) - - val query = contact - .select('id, 'friends) - .generate(PosExplode('friends)) - .select('col.getField("middle")) - .analyze - val optimized = Optimize.execute(query) - - val aliases = collectGeneratedAliases(optimized) - - val expected = contact - // GetStructField is pushed down, unused id column is pruned. - .select( - 'friends.getField("middle").as(aliases(0))) - .generate(PosExplode($"${aliases(0)}"), - unrequiredChildIndex = Seq(0)) // unrequiredChildIndex is added. - .select('col.as("col.middle")) - .analyze - comparePlans(optimized, expected) - } } From 2069fd03fd30faaabd1d73ca0416a76ab5908937 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Tue, 5 Jul 2022 13:37:41 +0300 Subject: [PATCH 387/535] [SPARK-39677][SQL][DOCS] Fix args formatting of the regexp and like functions ### What changes were proposed in this pull request? In the PR, I propose to fix args formatting of some regexp functions by adding explicit new lines. That fixes the following items in arg lists. Before: Screenshot 2022-07-05 at 09 48 28 After: Screenshot 2022-07-05 at 11 06 13 ### Why are the changes needed? To improve readability of Spark SQL docs. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By building docs and checking manually: ``` $ SKIP_SCALADOC=1 SKIP_PYTHONDOC=1 SKIP_RDOC=1 bundle exec jekyll build ``` Closes #37082 from MaxGekk/fix-regexp-docs. Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 4e42f8b12e8dc57a15998f22d508a19cf3c856aa) Signed-off-by: Max Gekk --- .../expressions/regexpExpressions.scala | 46 +++++++------------ 1 file changed, 16 insertions(+), 30 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 01763f082d606..e3eea6f46e234 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -84,16 +84,12 @@ abstract class StringRegexExpression extends BinaryExpression Arguments: * str - a string expression * pattern - a string expression. The pattern is a string which is matched literally, with - exception to the following special symbols: - - _ matches any one character in the input (similar to . in posix regular expressions) - + exception to the following special symbols:

        + _ matches any one character in the input (similar to . in posix regular expressions)\ % matches zero or more characters in the input (similar to .* in posix regular - expressions) - + expressions)

        Since Spark 2.0, string literals are unescaped in our SQL parser. For example, in order - to match "\abc", the pattern should be "\\abc". - + to match "\abc", the pattern should be "\\abc".

        When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it falls back to Spark 1.6 behavior regarding string literal parsing. For example, if the config is enabled, the pattern to match "\abc" should be "\abc". @@ -189,7 +185,7 @@ case class Like(left: Expression, right: Expression, escapeChar: Char) copy(left = newLeft, right = newRight) } -// scalastyle:off line.contains.tab +// scalastyle:off line.contains.tab line.size.limit /** * Simple RegEx case-insensitive pattern matching function */ @@ -200,16 +196,12 @@ case class Like(left: Expression, right: Expression, escapeChar: Char) Arguments: * str - a string expression * pattern - a string expression. The pattern is a string which is matched literally and - case-insensitively, with exception to the following special symbols: - - _ matches any one character in the input (similar to . in posix regular expressions) - + case-insensitively, with exception to the following special symbols:

        + _ matches any one character in the input (similar to . in posix regular expressions)

        % matches zero or more characters in the input (similar to .* in posix regular - expressions) - + expressions)

        Since Spark 2.0, string literals are unescaped in our SQL parser. For example, in order - to match "\abc", the pattern should be "\\abc". - + to match "\abc", the pattern should be "\\abc".

        When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it falls back to Spark 1.6 behavior regarding string literal parsing. For example, if the config is enabled, the pattern to match "\abc" should be "\abc". @@ -237,7 +229,7 @@ case class Like(left: Expression, right: Expression, escapeChar: Char) """, since = "3.3.0", group = "predicate_funcs") -// scalastyle:on line.contains.tab +// scalastyle:on line.contains.tab line.size.limit case class ILike( left: Expression, right: Expression, @@ -574,12 +566,10 @@ case class StringSplit(str: Expression, regex: Expression, limit: Expression) Arguments: * str - a string expression to search for a regular expression pattern match. * regexp - a string representing a regular expression. The regex string should be a - Java regular expression. - + Java regular expression.

        Since Spark 2.0, string literals (including regex patterns) are unescaped in our SQL parser. For example, to match "\abc", a regular expression for `regexp` can be - "^\\abc$". - + "^\\abc$".

        There is a SQL config 'spark.sql.parser.escapedStringLiterals' that can be used to fallback to the Spark 1.6 behavior regarding string literal parsing. For example, if the config is enabled, the `regexp` that can match "\abc" is "^\abc$". @@ -783,12 +773,10 @@ abstract class RegExpExtractBase Arguments: * str - a string expression. * regexp - a string representing a regular expression. The regex string should be a - Java regular expression. - + Java regular expression.

        Since Spark 2.0, string literals (including regex patterns) are unescaped in our SQL parser. For example, to match "\abc", a regular expression for `regexp` can be - "^\\abc$". - + "^\\abc$".

        There is a SQL config 'spark.sql.parser.escapedStringLiterals' that can be used to fallback to the Spark 1.6 behavior regarding string literal parsing. For example, if the config is enabled, the `regexp` that can match "\abc" is "^\abc$". @@ -888,12 +876,10 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio Arguments: * str - a string expression. * regexp - a string representing a regular expression. The regex string should be a - Java regular expression. - + Java regular expression.

        Since Spark 2.0, string literals (including regex patterns) are unescaped in our SQL parser. For example, to match "\abc", a regular expression for `regexp` can be - "^\\abc$". - + "^\\abc$".

        There is a SQL config 'spark.sql.parser.escapedStringLiterals' that can be used to fallback to the Spark 1.6 behavior regarding string literal parsing. For example, if the config is enabled, the `regexp` that can match "\abc" is "^\abc$". From 364a4f52610fdacdefc2d16af984900c55f8e31b Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 5 Jul 2022 20:44:43 +0900 Subject: [PATCH 388/535] [SPARK-39612][SQL][TESTS] DataFrame.exceptAll followed by count should work ### What changes were proposed in this pull request? This PR adds a test case broken by https://github.com/apache/spark/commit/4b9343593eca780ca30ffda45244a71413577884 which was reverted in https://github.com/apache/spark/commit/161c596cafea9c235b5c918d8999c085401d73a9. ### Why are the changes needed? To prevent a regression in the future. This was a regression in Apache Spark 3.3 that used to work in Apache Spark 3.2. ### Does this PR introduce _any_ user-facing change? Yes, it makes `DataFrame.exceptAll` followed by `count` working. ### How was this patch tested? The unit test was added. Closes #37084 from HyukjinKwon/SPARK-39612. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 947e271402f749f6f58b79fecd59279eaf86db57) Signed-off-by: Hyukjin Kwon --- .../src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 728ba3d645625..a4651c913c6c1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -3215,6 +3215,11 @@ class DataFrameSuite extends QueryTest } } } + + test("SPARK-39612: exceptAll with following count should work") { + val d1 = Seq("a").toDF + assert(d1.exceptAll(d1).count() === 0) + } } case class GroupByKey(a: Int, b: Int) From 2edd344392a5ddb44f97449b8ad3c6292eb334e3 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Tue, 5 Jul 2022 20:52:36 +0900 Subject: [PATCH 389/535] [SPARK-39611][PYTHON][PS] Fix wrong aliases in __array_ufunc__ ### What changes were proposed in this pull request? This PR fix the wrong aliases in `__array_ufunc__` ### Why are the changes needed? When running test with numpy 1.23.0 (current latest), hit a bug: `NotImplementedError: pandas-on-Spark objects currently do not support .` In `__array_ufunc__` we first call `maybe_dispatch_ufunc_to_dunder_op` to try dunder methods first, and then we try pyspark API. `maybe_dispatch_ufunc_to_dunder_op` is from pandas code. pandas fix a bug https://github.com/pandas-dev/pandas/pull/44822#issuecomment-991166419 https://github.com/pandas-dev/pandas/pull/44822/commits/206b2496bc6f6aa025cb26cb42f52abeec227741 when upgrade to numpy 1.23.0, we need to also sync this. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Current CI passed - The exsiting UT `test_series_datetime` already cover this, I also test it in my local env with 1.23.0 ```shell pip install "numpy==1.23.0" python/run-tests --testnames 'pyspark.pandas.tests.test_series_datetime SeriesDateTimeTest.test_arithmetic_op_exceptions' ``` Closes #37078 from Yikun/SPARK-39611. Authored-by: Yikun Jiang Signed-off-by: Hyukjin Kwon (cherry picked from commit fb48a14a67940b9270390b8ce74c19ae58e2880e) Signed-off-by: Hyukjin Kwon --- python/pyspark/pandas/numpy_compat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/pandas/numpy_compat.py b/python/pyspark/pandas/numpy_compat.py index ea72fa658e458..f9b7bd67a9b74 100644 --- a/python/pyspark/pandas/numpy_compat.py +++ b/python/pyspark/pandas/numpy_compat.py @@ -166,7 +166,7 @@ def maybe_dispatch_ufunc_to_dunder_op( "true_divide": "truediv", "power": "pow", "remainder": "mod", - "divide": "div", + "divide": "truediv", "equal": "eq", "not_equal": "ne", "less": "lt", From f9e3668dbb1cafdac0d7c46fc65035a1f9262af1 Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Tue, 5 Jul 2022 12:43:27 -0700 Subject: [PATCH 390/535] [SPARK-39656][SQL][3.3] Fix wrong namespace in DescribeNamespaceExec backport https://github.com/apache/spark/pull/37049 for branch-3.3 ### What changes were proposed in this pull request? DescribeNamespaceExec change ns.last to ns.quoted ### Why are the changes needed? DescribeNamespaceExec should show the whole namespace rather than last ### Does this PR introduce _any_ user-facing change? yes, a small bug fix ### How was this patch tested? fix test Closes #37071 from ulysses-you/desc-namespace-3.3. Authored-by: ulysses-you Signed-off-by: huaxingao --- .../sql/execution/datasources/v2/DescribeNamespaceExec.scala | 3 ++- .../sql/execution/command/v2/DescribeNamespaceSuite.scala | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala index a965355ec724e..3c2d22e1252c6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala @@ -23,6 +23,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsNamespaces} +import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ /** * Physical plan node for describing a namespace. @@ -37,7 +38,7 @@ case class DescribeNamespaceExec( val ns = namespace.toArray val metadata = catalog.loadNamespaceMetadata(ns) - rows += toCatalystRow("Namespace Name", ns.last) + rows += toCatalystRow("Namespace Name", ns.quoted) CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES.foreach { p => rows ++= Option(metadata.get(p)).map(toCatalystRow(p.capitalize, _)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeNamespaceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeNamespaceSuite.scala index 7d6835f09b1dd..259eeec6442fa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeNamespaceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeNamespaceSuite.scala @@ -41,7 +41,7 @@ class DescribeNamespaceSuite extends command.DescribeNamespaceSuiteBase with Com )) val description = descriptionDf.collect() assert(description === Seq( - Row("Namespace Name", "ns2"), + Row("Namespace Name", "ns1.ns2"), Row(SupportsNamespaces.PROP_COMMENT.capitalize, "test namespace"), Row(SupportsNamespaces.PROP_LOCATION.capitalize, "file:/tmp/ns_test"), Row(SupportsNamespaces.PROP_OWNER.capitalize, Utils.getCurrentUserName())) From 016dfeb760dbe1109e3c81c39bcd1bf3316a3e20 Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Thu, 7 Jul 2022 09:55:45 +0800 Subject: [PATCH 391/535] [SPARK-37527][SQL][FOLLOWUP] Cannot compile COVAR_POP, COVAR_SAMP and CORR in `H2Dialect` if them with `DISTINCT` https://github.com/apache/spark/pull/35145 compile COVAR_POP, COVAR_SAMP and CORR in H2Dialect. Because H2 does't support COVAR_POP, COVAR_SAMP and CORR works with DISTINCT. So https://github.com/apache/spark/pull/35145 introduces a bug that compile COVAR_POP, COVAR_SAMP and CORR if these aggregate functions with DISTINCT. Fix bug that compile COVAR_POP, COVAR_SAMP and CORR if these aggregate functions with DISTINCT. 'Yes'. Bug will be fix. New test cases. Closes #37090 from beliefer/SPARK-37527_followup2. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit 14f2bae208c093dea58e3f947fb660e8345fb256) Signed-off-by: Wenchen Fan --- .../org/apache/spark/sql/jdbc/H2Dialect.scala | 15 +++----- .../apache/spark/sql/jdbc/JDBCV2Suite.scala | 38 +++++++++++++------ 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala index 4a88203ec59c9..967df112af22b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala @@ -55,18 +55,15 @@ private object H2Dialect extends JdbcDialect { assert(f.children().length == 1) val distinct = if (f.isDistinct) "DISTINCT " else "" Some(s"STDDEV_SAMP($distinct${f.children().head})") - case f: GeneralAggregateFunc if f.name() == "COVAR_POP" => + case f: GeneralAggregateFunc if f.name() == "COVAR_POP" && !f.isDistinct => assert(f.children().length == 2) - val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"COVAR_POP($distinct${f.children().head}, ${f.children().last})") - case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" => + Some(s"COVAR_POP(${f.children().head}, ${f.children().last})") + case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" && !f.isDistinct => assert(f.children().length == 2) - val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"COVAR_SAMP($distinct${f.children().head}, ${f.children().last})") - case f: GeneralAggregateFunc if f.name() == "CORR" => + Some(s"COVAR_SAMP(${f.children().head}, ${f.children().last})") + case f: GeneralAggregateFunc if f.name() == "CORR" && !f.isDistinct => assert(f.children().length == 2) - val distinct = if (f.isDistinct) "DISTINCT " else "" - Some(s"CORR($distinct${f.children().head}, ${f.children().last})") + Some(s"CORR(${f.children().head}, ${f.children().last})") case _ => None } ) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index 2f94f9ef31e83..293334084af27 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -1028,23 +1028,37 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel } test("scan with aggregate push-down: COVAR_POP COVAR_SAMP with filter and group by") { - val df = sql("select COVAR_POP(bonus, bonus), COVAR_SAMP(bonus, bonus)" + - " FROM h2.test.employee where dept > 0 group by DePt") - checkFiltersRemoved(df) - checkAggregateRemoved(df) - checkPushedInfo(df, "PushedAggregates: [COVAR_POP(BONUS, BONUS), COVAR_SAMP(BONUS, BONUS)], " + + val df1 = sql("SELECT COVAR_POP(bonus, bonus), COVAR_SAMP(bonus, bonus)" + + " FROM h2.test.employee WHERE dept > 0 GROUP BY DePt") + checkFiltersRemoved(df1) + checkAggregateRemoved(df1) + checkPushedInfo(df1, "PushedAggregates: [COVAR_POP(BONUS, BONUS), COVAR_SAMP(BONUS, BONUS)], " + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByExpressions: [DEPT]") - checkAnswer(df, Seq(Row(10000d, 20000d), Row(2500d, 5000d), Row(0d, null))) + checkAnswer(df1, Seq(Row(10000d, 20000d), Row(2500d, 5000d), Row(0d, null))) + + val df2 = sql("SELECT COVAR_POP(DISTINCT bonus, bonus), COVAR_SAMP(DISTINCT bonus, bonus)" + + " FROM h2.test.employee WHERE dept > 0 GROUP BY DePt") + checkFiltersRemoved(df2) + checkAggregateRemoved(df2, false) + checkPushedInfo(df2, "PushedFilters: [DEPT IS NOT NULL, DEPT > 0]") + checkAnswer(df2, Seq(Row(10000d, 20000d), Row(2500d, 5000d), Row(0d, null))) } test("scan with aggregate push-down: CORR with filter and group by") { - val df = sql("select CORR(bonus, bonus) FROM h2.test.employee where dept > 0" + - " group by DePt") - checkFiltersRemoved(df) - checkAggregateRemoved(df) - checkPushedInfo(df, "PushedAggregates: [CORR(BONUS, BONUS)], " + + val df1 = sql("SELECT CORR(bonus, bonus) FROM h2.test.employee WHERE dept > 0" + + " GROUP BY DePt") + checkFiltersRemoved(df1) + checkAggregateRemoved(df1) + checkPushedInfo(df1, "PushedAggregates: [CORR(BONUS, BONUS)], " + "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByExpressions: [DEPT]") - checkAnswer(df, Seq(Row(1d), Row(1d), Row(null))) + checkAnswer(df1, Seq(Row(1d), Row(1d), Row(null))) + + val df2 = sql("SELECT CORR(DISTINCT bonus, bonus) FROM h2.test.employee WHERE dept > 0" + + " GROUP BY DePt") + checkFiltersRemoved(df2) + checkAggregateRemoved(df2, false) + checkPushedInfo(df2, "PushedFilters: [DEPT IS NOT NULL, DEPT > 0]") + checkAnswer(df2, Seq(Row(1d), Row(1d), Row(null))) } test("scan with aggregate push-down: aggregate over alias push down") { From acf8f66650af53718b08f3778c2a2a3a5d10a88f Mon Sep 17 00:00:00 2001 From: Chandni Singh Date: Tue, 12 Jul 2022 00:20:43 -0500 Subject: [PATCH 392/535] [SPARK-39647][CORE] Register the executor with ESS before registering the BlockManager ### What changes were proposed in this pull request? Currently the executors register with the ESS after the `BlockManager` registration with the `BlockManagerMaster`. This order creates a problem with the push-based shuffle. A registered BlockManager node is picked up by the driver as a merger but the shuffle service on that node is not yet ready to merge the data which causes block pushes to fail until the local executor registers with it. This fix is to reverse the order, that is, register with the ESS before registering the `BlockManager` ### Why are the changes needed? They are needed to fix the issue which causes block pushes to fail. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added a UT. Closes #37052 from otterc/SPARK-39647. Authored-by: Chandni Singh Signed-off-by: Mridul Muralidharan gmail.com> (cherry picked from commit 79ba2890f51c5f676b9cd6e3a6682c7969462999) Signed-off-by: Mridul Muralidharan --- .../apache/spark/storage/BlockManager.scala | 30 ++++++++++------ .../spark/storage/BlockManagerSuite.scala | 36 +++++++++++++++++++ 2 files changed, 56 insertions(+), 10 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index d5901888d1abf..53d2d05412145 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -516,9 +516,27 @@ private[spark] class BlockManager( ret } + // Register Executors' configuration with the local shuffle service, if one should exist. + // Registration with the ESS should happen before registering the block manager with the + // BlockManagerMaster. In push-based shuffle, the registered BM is selected by the driver + // as a merger. However, for the ESS on this host to be able to merge blocks successfully, + // it needs the merge directories metadata which is provided by the local executor during + // the registration with the ESS. Therefore, this registration should be prior to + // the BlockManager registration. See SPARK-39647. + if (externalShuffleServiceEnabled) { + logInfo(s"external shuffle service port = $externalShuffleServicePort") + shuffleServerId = BlockManagerId(executorId, blockTransferService.hostName, + externalShuffleServicePort) + if (!isDriver) { + registerWithExternalShuffleServer() + } + } + val id = BlockManagerId(executorId, blockTransferService.hostName, blockTransferService.port, None) + // The idFromMaster has just additional topology information. Otherwise, it has the same + // executor id/host/port of idWithoutTopologyInfo which is not expected to be changed. val idFromMaster = master.registerBlockManager( id, diskBlockManager.localDirsString, @@ -528,16 +546,8 @@ private[spark] class BlockManager( blockManagerId = if (idFromMaster != null) idFromMaster else id - shuffleServerId = if (externalShuffleServiceEnabled) { - logInfo(s"external shuffle service port = $externalShuffleServicePort") - BlockManagerId(executorId, blockTransferService.hostName, externalShuffleServicePort) - } else { - blockManagerId - } - - // Register Executors' configuration with the local shuffle service, if one should exist. - if (externalShuffleServiceEnabled && !blockManagerId.isDriver) { - registerWithExternalShuffleServer() + if (!externalShuffleServiceEnabled) { + shuffleServerId = blockManagerId } hostLocalDirManager = { diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala index 45e05b2cc2da1..874b2b4f00521 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala @@ -2175,6 +2175,42 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE assert(kryoException.getMessage === "java.io.IOException: Input/output error") } + test("SPARK-39647: Failure to register with ESS should prevent registering the BM") { + val handler = new NoOpRpcHandler { + override def receive( + client: TransportClient, + message: ByteBuffer, + callback: RpcResponseCallback): Unit = { + val msgObj = BlockTransferMessage.Decoder.fromByteBuffer(message) + msgObj match { + case _: RegisterExecutor => () // No reply to generate client-side timeout + } + } + } + val transConf = SparkTransportConf.fromSparkConf(conf, "shuffle") + Utils.tryWithResource(new TransportContext(transConf, handler, true)) { transCtx => + def newShuffleServer(port: Int): (TransportServer, Int) = { + (transCtx.createServer(port, Seq.empty[TransportServerBootstrap].asJava), port) + } + + val candidatePort = RandomUtils.nextInt(1024, 65536) + val (server, shufflePort) = Utils.startServiceOnPort(candidatePort, + newShuffleServer, conf, "ShuffleServer") + + conf.set(SHUFFLE_SERVICE_ENABLED.key, "true") + conf.set(SHUFFLE_SERVICE_PORT.key, shufflePort.toString) + conf.set(SHUFFLE_REGISTRATION_TIMEOUT.key, "40") + conf.set(SHUFFLE_REGISTRATION_MAX_ATTEMPTS.key, "1") + val e = intercept[SparkException] { + makeBlockManager(8000, "timeoutExec") + }.getMessage + assert(e.contains("TimeoutException")) + verify(master, times(0)) + .registerBlockManager(mc.any(), mc.any(), mc.any(), mc.any(), mc.any()) + server.close() + } + } + private def createKryoSerializerWithDiskCorruptedInputStream(): KryoSerializer = { class TestDiskCorruptedInputStream extends InputStream { override def read(): Int = throw new IOException("Input/output error") From 2fe16015cdd701f395693b4e6bfa72cd101a8b8c Mon Sep 17 00:00:00 2001 From: tianlzhang Date: Thu, 14 Jul 2022 12:49:57 +0800 Subject: [PATCH 393/535] [SPARK-39672][SQL][3.1] Fix removing project before filter with correlated subquery Add more checks to`removeProjectBeforeFilter` in `ColumnPruning` and only remove the project if 1. the filter condition contains correlated subquery 2. same attribute exists in both output of child of Project and subquery This is a legitimate self-join query and should not throw exception when de-duplicating attributes in subquery and outer values. ```sql select * from ( select v1.a, v1.b, v2.c from v1 inner join v2 on v1.a=v2.a) t3 where not exists ( select 1 from v2 where t3.a=v2.a and t3.b=v2.b and t3.c=v2.c ) ``` Here's what happens with the current code. The above query is analyzed into following `LogicalPlan` before `ColumnPruning`. ``` Project [a#250, b#251, c#268] +- Filter NOT exists#272 [(a#250 = a#266) && (b#251 = b#267) && (c#268 = c#268#277)] : +- Project [1 AS 1#273, _1#259 AS a#266, _2#260 AS b#267, _3#261 AS c#268#277] : +- LocalRelation [_1#259, _2#260, _3#261] +- Project [a#250, b#251, c#268] +- Join Inner, (a#250 = a#266) :- Project [a#250, b#251] : +- Project [_1#243 AS a#250, _2#244 AS b#251] : +- LocalRelation [_1#243, _2#244, _3#245] +- Project [a#266, c#268] +- Project [_1#259 AS a#266, _3#261 AS c#268] +- LocalRelation [_1#259, _2#260, _3#261] ``` Then in `ColumnPruning`, the Project before Filter (between Filter and Join) is removed. This changes the `outputSet` of the child of Filter among which the same attribute also exists in the subquery. Later, when `RewritePredicateSubquery` de-duplicates conflicting attributes, it would complain `Found conflicting attributes a#266 in the condition joining outer plan`. No. Add UT. Closes #37074 from manuzhang/spark-39672. Lead-authored-by: tianlzhang Co-authored-by: Manu Zhang Signed-off-by: Wenchen Fan (cherry picked from commit 36fc73e7c42b84e05b15b2caecc0f804610dce20) Signed-off-by: Wenchen Fan --- .../sql/catalyst/optimizer/Optimizer.scala | 16 ++++- .../org/apache/spark/sql/SubquerySuite.scala | 58 ++++++++++++++++++- 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 21903976656b6..d3a6065f72643 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -921,12 +921,24 @@ object ColumnPruning extends Rule[LogicalPlan] { * order, otherwise lower Projects can be missed. */ private def removeProjectBeforeFilter(plan: LogicalPlan): LogicalPlan = plan transformUp { - case p1 @ Project(_, f @ Filter(_, p2 @ Project(_, child))) + case p1 @ Project(_, f @ Filter(e, p2 @ Project(_, child))) if p2.outputSet.subsetOf(child.outputSet) && // We only remove attribute-only project. - p2.projectList.forall(_.isInstanceOf[AttributeReference]) => + p2.projectList.forall(_.isInstanceOf[AttributeReference]) && + // We can't remove project when the child has conflicting attributes + // with the subquery in filter predicate + !hasConflictingAttrsWithSubquery(e, child) => p1.copy(child = f.copy(child = child)) } + + private def hasConflictingAttrsWithSubquery( + predicate: Expression, + child: LogicalPlan): Boolean = { + predicate.find { + case s: SubqueryExpression if s.plan.outputSet.intersect(child.outputSet).nonEmpty => true + case _ => false + }.isDefined + } } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index e8ddf93afc334..0975772fb9052 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.expressions.SubqueryExpression -import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan, Sort} +import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan, Project, Sort} import org.apache.spark.sql.execution.{ColumnarToRowExec, ExecSubqueryExpression, FileSourceScanExec, InputAdapter, ReusedSubqueryExec, ScalarSubquery, SubqueryExec, WholeStageCodegenExec} import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecution} import org.apache.spark.sql.execution.datasources.FileScanRDD @@ -2213,4 +2213,60 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark |""".stripMargin), Row("2022-06-01")) } + + test("SPARK-39672: Fix removing project before filter with correlated subquery") { + withTempView("v1", "v2") { + Seq((1, 2, 3), (4, 5, 6)).toDF("a", "b", "c").createTempView("v1") + Seq((1, 3, 5), (4, 5, 6)).toDF("a", "b", "c").createTempView("v2") + + def findProject(df: DataFrame): Seq[Project] = { + df.queryExecution.optimizedPlan.collect { + case p: Project => p + } + } + + // project before filter cannot be removed since subquery has conflicting attributes + // with outer reference + val df1 = sql( + """ + |select * from + |( + |select + |v1.a, + |v1.b, + |v2.c + |from v1 + |inner join v2 + |on v1.a=v2.a) t3 + |where not exists ( + | select 1 + | from v2 + | where t3.a=v2.a and t3.b=v2.b and t3.c=v2.c + |) + |""".stripMargin) + checkAnswer(df1, Row(1, 2, 5)) + assert(findProject(df1).size == 4) + + // project before filter can be removed when there are no conflicting attributes + val df2 = sql( + """ + |select * from + |( + |select + |v1.b, + |v2.c + |from v1 + |inner join v2 + |on v1.b=v2.c) t3 + |where not exists ( + | select 1 + | from v2 + | where t3.b=v2.b and t3.c=v2.c + |) + |""".stripMargin) + + checkAnswer(df2, Row(5, 5)) + assert(findProject(df2).size == 3) + } + } } From 0e2758c9955c2ae102e37e0b49aa9446bbe6fecf Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 14 Jul 2022 17:45:39 +0300 Subject: [PATCH 394/535] [SPARK-39758][SQL][3.3] Fix NPE from the regexp functions on invalid patterns ### What changes were proposed in this pull request? In the PR, I propose to catch `PatternSyntaxException` while compiling the regexp pattern by the `regexp_extract`, `regexp_extract_all` and `regexp_instr`, and substitute the exception by Spark's exception w/ the error class `INVALID_PARAMETER_VALUE`. In this way, Spark SQL will output the error in the form: ```sql org.apache.spark.SparkRuntimeException [INVALID_PARAMETER_VALUE] The value of parameter(s) 'regexp' in `regexp_instr` is invalid: ) ? ``` instead of (on Spark 3.3.0): ```java java.lang.NullPointerException: null ``` Also I propose to set `lastRegex` only after the compilation of the regexp pattern completes successfully. This is a backport of https://github.com/apache/spark/pull/37171. ### Why are the changes needed? The changes fix NPE portrayed by the code on Spark 3.3.0: ```sql spark-sql> SELECT regexp_extract('1a 2b 14m', '(?l)'); 22/07/12 19:07:21 ERROR SparkSQLDriver: Failed in [SELECT regexp_extract('1a 2b 14m', '(?l)')] java.lang.NullPointerException: null at org.apache.spark.sql.catalyst.expressions.RegExpExtractBase.getLastMatcher(regexpExpressions.scala:768) ~[spark-catalyst_2.12-3.3.0.jar:3.3.0] ``` This should improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No. In regular cases, the behavior is the same but users will observe different exceptions (error messages) after the changes. ### How was this patch tested? By running new tests: ``` $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z regexp-functions.sql" $ build/sbt "test:testOnly *.RegexpExpressionsSuite" $ build/sbt "sql/test:testOnly org.apache.spark.sql.expressions.ExpressionInfoSuite" ``` Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 5b96bd5cf8f44eee7a16cd027d37dec552ed5a6a) Signed-off-by: Max Gekk Closes #37181 from MaxGekk/pattern-syntax-exception-3.3. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../expressions/regexpExpressions.scala | 62 +++++++++++-------- .../sql/errors/QueryExecutionErrors.scala | 9 +++ .../expressions/RegexpExpressionsSuite.scala | 17 ++++- .../sql-tests/inputs/regexp-functions.sql | 2 + .../results/regexp-functions.sql.out | 20 +++++- 5 files changed, 82 insertions(+), 28 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index e3eea6f46e234..f6857a68c8cf6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.expressions import java.util.Locale -import java.util.regex.{Matcher, MatchResult, Pattern} +import java.util.regex.{Matcher, MatchResult, Pattern, PatternSyntaxException} import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer @@ -752,11 +752,42 @@ abstract class RegExpExtractBase protected def getLastMatcher(s: Any, p: Any): Matcher = { if (p != lastRegex) { // regex value changed - lastRegex = p.asInstanceOf[UTF8String].clone() - pattern = Pattern.compile(lastRegex.toString) + try { + val r = p.asInstanceOf[UTF8String].clone() + pattern = Pattern.compile(r.toString) + lastRegex = r + } catch { + case e: PatternSyntaxException => + throw QueryExecutionErrors.invalidPatternError(prettyName, e.getPattern) + + } } pattern.matcher(s.toString) } + + protected def initLastMatcherCode( + ctx: CodegenContext, + subject: String, + regexp: String, + matcher: String): String = { + val classNamePattern = classOf[Pattern].getCanonicalName + val termLastRegex = ctx.addMutableState("UTF8String", "lastRegex") + val termPattern = ctx.addMutableState(classNamePattern, "pattern") + + s""" + |if (!$regexp.equals($termLastRegex)) { + | // regex value changed + | try { + | UTF8String r = $regexp.clone(); + | $termPattern = $classNamePattern.compile(r.toString()); + | $termLastRegex = r; + | } catch (java.util.regex.PatternSyntaxException e) { + | throw QueryExecutionErrors.invalidPatternError("$prettyName", e.getPattern()); + | } + |} + |java.util.regex.Matcher $matcher = $termPattern.matcher($subject.toString()); + |""".stripMargin + } } /** @@ -818,14 +849,9 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio override def prettyName: String = "regexp_extract" override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val classNamePattern = classOf[Pattern].getCanonicalName val classNameRegExpExtractBase = classOf[RegExpExtractBase].getCanonicalName val matcher = ctx.freshName("matcher") val matchResult = ctx.freshName("matchResult") - - val termLastRegex = ctx.addMutableState("UTF8String", "lastRegex") - val termPattern = ctx.addMutableState(classNamePattern, "pattern") - val setEvNotNull = if (nullable) { s"${ev.isNull} = false;" } else { @@ -834,13 +860,7 @@ case class RegExpExtract(subject: Expression, regexp: Expression, idx: Expressio nullSafeCodeGen(ctx, ev, (subject, regexp, idx) => { s""" - if (!$regexp.equals($termLastRegex)) { - // regex value changed - $termLastRegex = $regexp.clone(); - $termPattern = $classNamePattern.compile($termLastRegex.toString()); - } - java.util.regex.Matcher $matcher = - $termPattern.matcher($subject.toString()); + ${initLastMatcherCode(ctx, subject, regexp, matcher)} if ($matcher.find()) { java.util.regex.MatchResult $matchResult = $matcher.toMatchResult(); $classNameRegExpExtractBase.checkGroupIndex($matchResult.groupCount(), $idx); @@ -922,16 +942,11 @@ case class RegExpExtractAll(subject: Expression, regexp: Expression, idx: Expres override def prettyName: String = "regexp_extract_all" override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - val classNamePattern = classOf[Pattern].getCanonicalName val classNameRegExpExtractBase = classOf[RegExpExtractBase].getCanonicalName val arrayClass = classOf[GenericArrayData].getName val matcher = ctx.freshName("matcher") val matchResult = ctx.freshName("matchResult") val matchResults = ctx.freshName("matchResults") - - val termLastRegex = ctx.addMutableState("UTF8String", "lastRegex") - val termPattern = ctx.addMutableState(classNamePattern, "pattern") - val setEvNotNull = if (nullable) { s"${ev.isNull} = false;" } else { @@ -939,12 +954,7 @@ case class RegExpExtractAll(subject: Expression, regexp: Expression, idx: Expres } nullSafeCodeGen(ctx, ev, (subject, regexp, idx) => { s""" - | if (!$regexp.equals($termLastRegex)) { - | // regex value changed - | $termLastRegex = $regexp.clone(); - | $termPattern = $classNamePattern.compile($termLastRegex.toString()); - | } - | java.util.regex.Matcher $matcher = $termPattern.matcher($subject.toString()); + | ${initLastMatcherCode(ctx, subject, regexp, matcher)} | java.util.ArrayList $matchResults = new java.util.ArrayList(); | while ($matcher.find()) { | java.util.regex.MatchResult $matchResult = $matcher.toMatchResult(); diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 161bfd3c03d84..b1378275b8556 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2080,4 +2080,13 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { new SparkException(errorClass = "INVALID_BUCKET_FILE", messageParameters = Array(path), cause = null) } + + def invalidPatternError(funcName: String, pattern: String): RuntimeException = { + new SparkRuntimeException( + errorClass = "INVALID_PARAMETER_VALUE", + messageParameters = Array( + "regexp", + toSQLId(funcName), + toSQLValue(pattern, StringType))) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala index 2ca9ede77421a..a42113842266b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.expressions -import org.apache.spark.SparkFunSuite +import org.apache.spark.{SparkFunSuite, SparkRuntimeException} import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext @@ -483,4 +483,19 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { .likeAll("%foo%", Literal.create(null, StringType)), null) } } + + test("SPARK-39758: invalid regexp pattern") { + val s = $"s".string.at(0) + val p = $"p".string.at(1) + val r = $"r".int.at(2) + val prefix = "The value of parameter(s) 'regexp' in" + checkExceptionInExpression[SparkRuntimeException]( + RegExpExtract(s, p, r), + create_row("1a 2b 14m", "(?l)", 0), + s"$prefix `regexp_extract` is invalid: '(?l)'") + checkExceptionInExpression[SparkRuntimeException]( + RegExpExtractAll(s, p, r), + create_row("abc", "] [", 0), + s"$prefix `regexp_extract_all` is invalid: '] ['") + } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql index efe5c278730a6..b11d2c7ce0d72 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql @@ -14,6 +14,7 @@ SELECT regexp_extract('1a 2b 14m', '(\\d+)([a-z]+)', 3); SELECT regexp_extract('1a 2b 14m', '(\\d+)([a-z]+)', -1); SELECT regexp_extract('1a 2b 14m', '(\\d+)?([a-z]+)', 1); SELECT regexp_extract('a b m', '(\\d+)?([a-z]+)', 1); +SELECT regexp_extract('1a 2b 14m', '(?l)'); -- regexp_extract_all SELECT regexp_extract_all('1a 2b 14m', '\\d+'); @@ -31,6 +32,7 @@ SELECT regexp_extract_all('1a 2b 14m', '(\\d+)([a-z]+)', 3); SELECT regexp_extract_all('1a 2b 14m', '(\\d+)([a-z]+)', -1); SELECT regexp_extract_all('1a 2b 14m', '(\\d+)?([a-z]+)', 1); SELECT regexp_extract_all('a 2b 14m', '(\\d+)?([a-z]+)', 1); +SELECT regexp_extract_all('abc', col0, 1) FROM VALUES('], [') AS t(col0); -- regexp_replace SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something'); diff --git a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out index f0a6fa064d05b..20d1273f34858 100644 --- a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 46 +-- Number of queries: 48 -- !query @@ -128,6 +128,15 @@ struct +-- !query +SELECT regexp_extract('1a 2b 14m', '(?l)') +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkRuntimeException +The value of parameter(s) 'regexp' in `regexp_extract` is invalid: '(?l)' + + -- !query SELECT regexp_extract_all('1a 2b 14m', '\\d+') -- !query schema @@ -254,6 +263,15 @@ struct> ["","2","14"] +-- !query +SELECT regexp_extract_all('abc', col0, 1) FROM VALUES('], [') AS t(col0) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkRuntimeException +The value of parameter(s) 'regexp' in `regexp_extract_all` is invalid: '], [' + + -- !query SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something') -- !query schema From 7765b8a3110d52698852af981d46b93bccd944c8 Mon Sep 17 00:00:00 2001 From: Cheng Su Date: Fri, 15 Jul 2022 09:56:11 +0900 Subject: [PATCH 395/535] [SPARK-39777][DOCS] Remove Hive bucketing incompatiblity documentation ### What changes were proposed in this pull request? We support Hive bucketing (with Hive hash function - https://issues.apache.org/jira/browse/SPARK-32709 and https://issues.apache.org/jira/browse/SPARK-32712) started from Spark 3.3.0, we should also update the documentation to reflect the fact, that we are no longer incompatible with Hive bucketing. ### Why are the changes needed? Update user-facing documentation to avoid confusion. ### Does this PR introduce _any_ user-facing change? Yes, the doc itself. ### How was this patch tested? Manually checked the doc file locally. Closes #37189 from c21/doc. Authored-by: Cheng Su Signed-off-by: Hyukjin Kwon (cherry picked from commit 528b9ebd165cb226e4365b5b17ceae49a3a7aa6f) Signed-off-by: Hyukjin Kwon --- docs/sql-migration-guide.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 7601355f6d51d..4214c2b9aee63 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -1097,12 +1097,6 @@ Spark SQL supports the vast majority of Hive features, such as: Below is a list of Hive features that we don't support yet. Most of these features are rarely used in Hive deployments. -**Major Hive Features** - -* Tables with buckets: bucket is the hash partitioning within a Hive table partition. Spark SQL - doesn't support buckets yet. - - **Esoteric Hive Features** * `UNION` type From aeafb175875c00519e03e0ea5b5f22f765dc3607 Mon Sep 17 00:00:00 2001 From: Ala Luszczak Date: Tue, 19 Jul 2022 09:04:03 +0800 Subject: [PATCH 396/535] [SPARK-39806][SQL] Accessing `_metadata` on partitioned table can crash a query This changes alters the projection used in `FileScanRDD` to attach file metadata to a row produced by the reader. This projection used to remove the partitioning columns from the produced row. The produced row had different schema than expected by the consumers, and was missing part of the data, which resulted in query failure. This is a bug. `FileScanRDD` should produce rows matching expected schema, and containing all the requested data. Queries should not crash due to internal errors. No. Adds a new test in `FileMetadataStructSuite.scala` that reproduces the issue. Closes #37214 from ala/metadata-partition-by. Authored-by: Ala Luszczak Signed-off-by: Wenchen Fan (cherry picked from commit 385f1c8e4037928afafbf6664e30dc268510c05e) Signed-off-by: Wenchen Fan --- .../sql/execution/DataSourceScanExec.scala | 4 +-- .../execution/datasources/FileScanRDD.scala | 4 +-- .../datasources/FileMetadataStructSuite.scala | 26 +++++++++++++++++++ 3 files changed, 30 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala index 9e8ae9a714d5f..40d29af28f908 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala @@ -621,7 +621,7 @@ case class FileSourceScanExec( } new FileScanRDD(fsRelation.sparkSession, readFile, filePartitions, - requiredSchema, metadataColumns) + new StructType(requiredSchema.fields ++ fsRelation.partitionSchema.fields), metadataColumns) } /** @@ -678,7 +678,7 @@ case class FileSourceScanExec( FilePartition.getFilePartitions(relation.sparkSession, splitFiles, maxSplitBytes) new FileScanRDD(fsRelation.sparkSession, readFile, partitions, - requiredSchema, metadataColumns) + new StructType(requiredSchema.fields ++ fsRelation.partitionSchema.fields), metadataColumns) } // Filters unused DynamicPruningExpression expressions - one which has been replaced diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala index 20c393a5c0e60..b65b36ef3937d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala @@ -68,7 +68,7 @@ class FileScanRDD( @transient private val sparkSession: SparkSession, readFunction: (PartitionedFile) => Iterator[InternalRow], @transient val filePartitions: Seq[FilePartition], - val readDataSchema: StructType, + val readSchema: StructType, val metadataColumns: Seq[AttributeReference] = Seq.empty) extends RDD[InternalRow](sparkSession.sparkContext, Nil) { @@ -126,7 +126,7 @@ class FileScanRDD( // an unsafe projection to convert a joined internal row to an unsafe row private lazy val projection = { val joinedExpressions = - readDataSchema.fields.map(_.dataType) ++ metadataColumns.map(_.dataType) + readSchema.fields.map(_.dataType) ++ metadataColumns.map(_.dataType) UnsafeProjection.create(joinedExpressions) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala index 410fc985dd3bd..6afea42ee83d8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala @@ -21,6 +21,7 @@ import java.io.File import java.sql.Timestamp import java.text.SimpleDateFormat +import org.apache.spark.TestUtils import org.apache.spark.sql.{AnalysisException, Column, DataFrame, QueryTest, Row} import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.functions._ @@ -30,6 +31,8 @@ import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructFiel class FileMetadataStructSuite extends QueryTest with SharedSparkSession { + import testImplicits._ + val data0: Seq[Row] = Seq(Row("jack", 24, Row(12345L, "uom"))) val data1: Seq[Row] = Seq(Row("lily", 31, Row(54321L, "ucb"))) @@ -564,4 +567,27 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession { ) } } + + Seq(true, false).foreach { useVectorizedReader => + val label = if (useVectorizedReader) "reading batches" else "reading rows" + test(s"SPARK-39806: metadata for a partitioned table ($label)") { + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> useVectorizedReader.toString) { + withTempPath { dir => + // Store dynamically partitioned data. + Seq(1 -> 1).toDF("a", "b").write.format("parquet").partitionBy("b") + .save(dir.getAbsolutePath) + + // Identify the data file and its metadata. + val file = TestUtils.recursiveList(dir) + .filter(_.getName.endsWith(".parquet")).head + val expectedDf = Seq(1 -> 1).toDF("a", "b") + .withColumn(FileFormat.FILE_NAME, lit(file.getName)) + .withColumn(FileFormat.FILE_SIZE, lit(file.length())) + + checkAnswer(spark.read.parquet(dir.getAbsolutePath) + .select("*", METADATA_FILE_NAME, METADATA_FILE_SIZE), expectedDf) + } + } + } + } } From dcaa6e0eb6d5b4c90df64b5396ec0d31e7c9f99a Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 19 Jul 2022 17:52:32 +0900 Subject: [PATCH 397/535] [MINOR][PYTHON][DOCS] Fix broken Example section in col/column functions This PR fixes a bug in the documentation. Trailing `'` breaks Example section in Python reference documentation. This PR removes it. To render the documentation as intended in NumPy documentation style. Yes, the documentation is updated. **Before** Screen Shot 2022-07-19 at 12 20 55 PM **After** Screen Shot 2022-07-19 at 12 48 04 PM Manually built the documentation and tested. Closes #37223 from HyukjinKwon/minor-doc-fx. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 2bdb5bfa48d1fc44358c49f7e379c2afc4a1a32f) Signed-off-by: Hyukjin Kwon --- python/pyspark/sql/functions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 019f64b5171f5..ed3b0789b4731 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -140,7 +140,8 @@ def lit(col: Any) -> Column: @since(1.3) def col(col: str) -> Column: """ - Returns a :class:`~pyspark.sql.Column` based on the given column name.' + Returns a :class:`~pyspark.sql.Column` based on the given column name. + Examples -------- >>> col('x') From 25fdf9322f75f9ef58d4e3cf0c9da42d4e39cc84 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Thu, 21 Jul 2022 22:29:18 +0900 Subject: [PATCH 398/535] [SPARK-39831][BUILD] Fix R dependencies installation failure ### What changes were proposed in this pull request? move `libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev` from `Install dependencies for documentation generation` to `Install R linter dependencies and SparkR` ### Why are the changes needed? to make CI happy `Install R linter dependencies and SparkR` started to fail after `devtools_2.4.4` was released. ``` --------------------------- [ANTICONF] -------------------------------- Configuration failed to find the fontconfig freetype2 library. Try installing: * deb: libfontconfig1-dev (Debian, Ubuntu, etc) * rpm: fontconfig-devel (Fedora, EPEL) * csw: fontconfig_dev (Solaris) * brew: freetype (OSX) ``` it seems that `libfontconfig1-dev` is needed now. also refer to https://github.com/r-lib/systemfonts/issues/35#issuecomment-633560151 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? existing tests Closes #37243 from zhengruifeng/ci_add_dep. Authored-by: Ruifeng Zheng Signed-off-by: Hyukjin Kwon (cherry picked from commit 67efa318ec8cababdb5683ac262a8ebc3b3beefb) Signed-off-by: Hyukjin Kwon --- .github/workflows/build_and_test.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 5f57003cbd83a..2bdfcbd837a9d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -506,7 +506,9 @@ jobs: python3.9 -m pip install 'pandas-stubs==1.2.0.53' - name: Install R linter dependencies and SparkR run: | - apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev + apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev \ + libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev \ + libtiff5-dev libjpeg-dev Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')" ./R/install-dev.sh @@ -517,8 +519,7 @@ jobs: - name: Install dependencies for documentation generation run: | # pandoc is required to generate PySpark APIs as well in nbsphinx. - apt-get install -y libcurl4-openssl-dev pandoc libfontconfig1-dev libharfbuzz-dev \ - libfribidi-dev libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev + apt-get install -y libcurl4-openssl-dev pandoc # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes. # See also https://github.com/sphinx-doc/sphinx/issues/7551. # Jinja2 3.0.0+ causes error when building with Sphinx. From b54d985223e07963db4b62a00dd29ebd012382ad Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Fri, 22 Jul 2022 09:05:09 +0900 Subject: [PATCH 399/535] Revert "[SPARK-39831][BUILD] Fix R dependencies installation failure" This reverts commit 29290306749f75eb96f51fc5b61114e9b8a3bf53. --- .github/workflows/build_and_test.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 2bdfcbd837a9d..9da442bf6c806 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -506,9 +506,7 @@ jobs: python3.9 -m pip install 'pandas-stubs==1.2.0.53' - name: Install R linter dependencies and SparkR run: | - apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev \ - libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev \ - libtiff5-dev libjpeg-dev + apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')" ./R/install-dev.sh From e24e7bffde2636e29e090597a4cbd23a174b48fb Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Fri, 22 Jul 2022 12:11:57 +0900 Subject: [PATCH 400/535] [SPARK-39831][BUILD] Fix R dependencies installation failure ### What changes were proposed in this pull request? move `libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev` from Install dependencies for documentation generation to Install R linter dependencies and SparkR Update after https://github.com/apache/spark/pull/37243: **add `apt update` before installation.** ### Why are the changes needed? to make CI happy Install R linter dependencies and SparkR started to fail after devtools_2.4.4 was released. ``` --------------------------- [ANTICONF] -------------------------------- Configuration failed to find the fontconfig freetype2 library. Try installing: * deb: libfontconfig1-dev (Debian, Ubuntu, etc) * rpm: fontconfig-devel (Fedora, EPEL) * csw: fontconfig_dev (Solaris) * brew: freetype (OSX) it seems that libfontconfig1-dev is needed now. ``` also refer to https://github.com/r-lib/systemfonts/issues/35#issuecomment-633560151 ### Does this PR introduce any user-facing change? No ### How was this patch tested? CI passed Closes #37247 from Yikun/patch-25. Lead-authored-by: Ruifeng Zheng Co-authored-by: Yikun Jiang Signed-off-by: Hyukjin Kwon (cherry picked from commit ffa82c219029a7f6f3caf613dd1d0ab56d0c599e) Signed-off-by: Hyukjin Kwon --- .github/workflows/build_and_test.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 9da442bf6c806..55c0b12d4bcbc 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -506,7 +506,10 @@ jobs: python3.9 -m pip install 'pandas-stubs==1.2.0.53' - name: Install R linter dependencies and SparkR run: | - apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev + apt update + apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev \ + libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev \ + libtiff5-dev libjpeg-dev Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')" ./R/install-dev.sh From 0a6ed8acd7423fba3cb499c7c89a662e3818d66a Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Sat, 23 Jul 2022 15:45:58 -0700 Subject: [PATCH 401/535] [SPARK-39847][SS] Fix race condition in RocksDBLoader.loadLibrary() if caller thread is interrupted ### What changes were proposed in this pull request? This PR fixes a race condition in `RocksDBLoader.loadLibrary()`, which can occur if the thread which calls that method is interrupted. One of our jobs experienced a failure in `RocksDBLoader`: ``` Caused by: java.lang.IllegalThreadStateException at java.lang.Thread.start(Thread.java:708) at org.apache.spark.sql.execution.streaming.state.RocksDBLoader$.loadLibrary(RocksDBLoader.scala:51) ``` After investigation, we determined that this was due to task cancellation/interruption: if the task which starts the RocksDB library loading is interrupted, another thread may begin a load and crash with the thread state exception: - Although the `loadLibraryThread` child thread is is uninterruptible, the task thread which calls loadLibrary is still interruptible. - Let's say we have two tasks, A and B, both of which will call `RocksDBLoader.loadLibrary()` - Say that Task A wins the race to perform the load and enters the `synchronized` block in `loadLibrary()`, starts the `loadLibraryThread`, then blocks in the `loadLibraryThread.join()` call. - If Task A is interrupted, an `InterruptedException` will be thrown and it will exit the loadLibrary synchronized block. - At this point, Task B enters the synchronized block of its `loadLibrary() call and sees that `exception == null` because the `loadLibraryThread` started by the other task is still running, so Task B calls `loadLibraryThread.start()` and hits the thread state error because it tries to start an already-started thread. This PR fixes this issue by adding code to check `loadLibraryThread`'s state before calling `start()`: if the thread has already been started then we will skip the `start()` call and proceed directly to the `join()`. I also modified the logging so that we can detect when this case occurs. ### Why are the changes needed? Fix a bug that can lead to task or job failures. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? I reproduced the original race condition by adding a `Thread.sleep(10000)` to `loadLibraryThread.run()` (so it wouldn't complete instantly), then ran ```scala test("multi-threaded RocksDBLoader calls with interruption") { val taskThread = new Thread("interruptible Task Thread 1") { override def run(): Unit = { RocksDBLoader.loadLibrary() } } taskThread.start() // Give the thread time to enter the `loadLibrary()` call: Thread.sleep(1000) taskThread.interrupt() // Check that the load hasn't finished: assert(RocksDBLoader.exception == null) assert(RocksDBLoader.loadLibraryThread.getState != Thread.State.NEW) // Simulate the second task thread starting the load: RocksDBLoader.loadLibrary() // The load should finish successfully: RocksDBLoader.exception.isEmpty } ``` This test failed prior to my changes and succeeds afterwards. I don't want to actually commit this test because I'm concerned about flakiness and false-negatives: in order to ensure that the test would have failed before my change, we need to carefully control the thread interleaving. This code rarely changes and is relatively simple, so I think the ROI on spending time to write and commit a reliable test is low. Closes #37260 from JoshRosen/rocksdbloader-fix. Authored-by: Josh Rosen Signed-off-by: Dongjoon Hyun (cherry picked from commit 9cee1bb2527a496943ffedbd935dc737246a2d89) Signed-off-by: Dongjoon Hyun --- .../sql/execution/streaming/state/RocksDBLoader.scala | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBLoader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBLoader.scala index cc5181924377b..02c98c14f86bb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBLoader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBLoader.scala @@ -48,7 +48,16 @@ object RocksDBLoader extends Logging { def loadLibrary(): Unit = synchronized { if (exception == null) { - loadLibraryThread.start() + // SPARK-39847: if a task thread is interrupted while blocking in this loadLibrary() + // call then a second task thread might start a loadLibrary() call while the first + // call's loadLibraryThread is still running. Checking loadLibraryThread's state here + // ensures that the second loadLibrary() call will wait for the original call's + // loadLibraryThread to complete. If we didn't have this call then the second + // loadLibraryCall() would call start() on an already-started thread, causing a + // java.lang.IllegalThreadStateException error. + if (loadLibraryThread.getState == Thread.State.NEW) { + loadLibraryThread.start() + } logInfo("RocksDB library loading thread started") loadLibraryThread.join() exception.foreach(throw _) From c7e2604b098d153b98825db6e049e3e1a515a148 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 25 Jul 2022 12:44:54 +0900 Subject: [PATCH 402/535] [SPARK-39856][SQL][TESTS] Increase the number of partitions in TPC-DS build to avoid out-of-memory ### What changes were proposed in this pull request? This PR proposes to avoid out-of-memory in TPC-DS build at GitHub Actions CI by: - Increasing the number of partitions being used in shuffle. - Truncating precisions after 10th in floats. The number of partitions was previously set to 1 because of different results in precisions that generally we can just ignore. - Sort the results regardless of join type since Apache Spark does not guarantee the order of results ### Why are the changes needed? One of the reasons for the large memory usage seems to be single partition that's being used in the shuffle. ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? GitHub Actions in this CI will test it out. Closes #37270 from HyukjinKwon/deflake-tpcds. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 7358253755762f9bfe6cedc1a50ec14616cfeace) Signed-off-by: Hyukjin Kwon --- .../spark/sql/TPCDSQueryTestSuite.scala | 23 ++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala index 8019fc98a52f2..92cf574781f12 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala @@ -62,7 +62,7 @@ class TPCDSQueryTestSuite extends QueryTest with TPCDSBase with SQLQueryTestHelp // To make output results deterministic override protected def sparkConf: SparkConf = super.sparkConf - .set(SQLConf.SHUFFLE_PARTITIONS.key, "1") + .set(SQLConf.SHUFFLE_PARTITIONS.key, 4.toString) protected override def createSparkSession: TestSparkSession = { new TestSparkSession(new SparkContext("local[1]", this.getClass.getSimpleName, sparkConf)) @@ -105,7 +105,6 @@ class TPCDSQueryTestSuite extends QueryTest with TPCDSBase with SQLQueryTestHelp query: String, goldenFile: File, conf: Map[String, String]): Unit = { - val shouldSortResults = sortMergeJoinConf != conf // Sort for other joins withSQLConf(conf.toSeq: _*) { try { val (schema, output) = handleExceptions(getNormalizedResult(spark, query)) @@ -143,17 +142,15 @@ class TPCDSQueryTestSuite extends QueryTest with TPCDSBase with SQLQueryTestHelp assertResult(expectedSchema, s"Schema did not match\n$queryString") { schema } - if (shouldSortResults) { - val expectSorted = expectedOutput.split("\n").sorted.map(_.trim) - .mkString("\n").replaceAll("\\s+$", "") - val outputSorted = output.sorted.map(_.trim).mkString("\n").replaceAll("\\s+$", "") - assertResult(expectSorted, s"Result did not match\n$queryString") { - outputSorted - } - } else { - assertResult(expectedOutput, s"Result did not match\n$queryString") { - outputString - } + // Truncate precisions because they can be vary per how the shuffle is performed. + val expectSorted = expectedOutput.split("\n").sorted.map(_.trim) + .mkString("\n").replaceAll("\\s+$", "") + .replaceAll("""([0-9]+.[0-9]{10})([0-9]*)""", "$1") + val outputSorted = output.sorted.map(_.trim).mkString("\n") + .replaceAll("\\s+$", "") + .replaceAll("""([0-9]+.[0-9]{10})([0-9]*)""", "$1") + assertResult(expectSorted, s"Result did not match\n$queryString") { + outputSorted } } catch { case e: Throwable => From 7603f8d0aeb72e1989643fc9911edca0744087ad Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Mon, 25 Jul 2022 18:35:03 +0800 Subject: [PATCH 403/535] [SPARK-39835][SQL] Fix EliminateSorts remove global sort below the local sort ### What changes were proposed in this pull request? Correct the `EliminateSorts` follows: - If the upper sort is global then we can remove the global or local sort recursively. - If the upper sort is local then we can only remove the local sort recursively. ### Why are the changes needed? If a global sort below locol sort, we should not remove the global sort becuase the output partitioning can be affected. This issue is going to worse since we pull out the V1 Write sort to logcial side. ### Does this PR introduce _any_ user-facing change? yes, bug fix ### How was this patch tested? add test Closes #37250 from ulysses-you/remove-sort. Authored-by: ulysses-you Signed-off-by: Wenchen Fan (cherry picked from commit 5dca26d514a150bda58f7c4919624c9638498fec) Signed-off-by: Wenchen Fan --- .../sql/catalyst/optimizer/Optimizer.scala | 26 ++++++++++++++----- .../optimizer/EliminateSortsSuite.scala | 16 ++++++++++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index d3a6065f72643..827df04443e52 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1445,21 +1445,31 @@ object EliminateSorts extends Rule[LogicalPlan] { } case Sort(orders, false, child) if SortOrder.orderingSatisfies(child.outputOrdering, orders) => applyLocally.lift(child).getOrElse(child) - case s @ Sort(_, _, child) => s.copy(child = recursiveRemoveSort(child)) + case s @ Sort(_, global, child) => s.copy(child = recursiveRemoveSort(child, global)) case j @ Join(originLeft, originRight, _, cond, _) if cond.forall(_.deterministic) => - j.copy(left = recursiveRemoveSort(originLeft), right = recursiveRemoveSort(originRight)) + j.copy(left = recursiveRemoveSort(originLeft, true), + right = recursiveRemoveSort(originRight, true)) case g @ Aggregate(_, aggs, originChild) if isOrderIrrelevantAggs(aggs) => - g.copy(child = recursiveRemoveSort(originChild)) + g.copy(child = recursiveRemoveSort(originChild, true)) } - private def recursiveRemoveSort(plan: LogicalPlan): LogicalPlan = { + /** + * If the upper sort is global then we can remove the global or local sort recursively. + * If the upper sort is local then we can only remove the local sort recursively. + */ + private def recursiveRemoveSort( + plan: LogicalPlan, + canRemoveGlobalSort: Boolean): LogicalPlan = { if (!plan.containsPattern(SORT)) { return plan } plan match { - case Sort(_, _, child) => recursiveRemoveSort(child) + case Sort(_, global, child) if canRemoveGlobalSort || !global => + recursiveRemoveSort(child, canRemoveGlobalSort) case other if canEliminateSort(other) => - other.withNewChildren(other.children.map(recursiveRemoveSort)) + other.withNewChildren(other.children.map(c => recursiveRemoveSort(c, canRemoveGlobalSort))) + case other if canEliminateGlobalSort(other) => + other.withNewChildren(other.children.map(c => recursiveRemoveSort(c, true))) case _ => plan } } @@ -1467,6 +1477,10 @@ object EliminateSorts extends Rule[LogicalPlan] { private def canEliminateSort(plan: LogicalPlan): Boolean = plan match { case p: Project => p.projectList.forall(_.deterministic) case f: Filter => f.condition.deterministic + case _ => false + } + + private def canEliminateGlobalSort(plan: LogicalPlan): Boolean = plan match { case r: RepartitionByExpression => r.partitionExpressions.forall(_.deterministic) case r: RebalancePartitions => r.partitionExpressions.forall(_.deterministic) case _: Repartition => true diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala index 01ecbd808c251..053bc1c21373e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala @@ -422,4 +422,20 @@ class EliminateSortsSuite extends AnalysisTest { comparePlans(optimized, correctAnswer) } } + + test("SPARK-39835: Fix EliminateSorts remove global sort below the local sort") { + // global -> local + val plan = testRelation.orderBy($"a".asc).sortBy($"c".asc).analyze + comparePlans(Optimize.execute(plan), plan) + + // global -> global -> local + val plan2 = testRelation.orderBy($"a".asc).orderBy($"b".asc).sortBy($"c".asc).analyze + val expected2 = testRelation.orderBy($"b".asc).sortBy($"c".asc).analyze + comparePlans(Optimize.execute(plan2), expected2) + + // local -> global -> local + val plan3 = testRelation.sortBy($"a".asc).orderBy($"b".asc).sortBy($"c".asc).analyze + val expected3 = testRelation.orderBy($"b".asc).sortBy($"c".asc).analyze + comparePlans(Optimize.execute(plan3), expected3) + } } From aa53fcad4fc1622c18b14d22fc909f7b349f7931 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Mon, 25 Jul 2022 22:08:11 +0900 Subject: [PATCH 404/535] [SPARK-39856][SQL][TESTS][FOLLOW-UP] Increase the number of partitions in TPC-DS build to avoid out-of-memory ### What changes were proposed in this pull request? This PR increases the number of partitions further more (see also https://github.com/apache/spark/pull/37270) ### Why are the changes needed? To make the build pass. ### Does this PR introduce _any_ user-facing change? No, test and dev-only. ### How was this patch tested? It's tested in https://github.com/LuciferYang/spark/runs/7497163716?check_suite_focus=true Closes #37273 from LuciferYang/SPARK-39856-FOLLOWUP. Authored-by: yangjie01 Signed-off-by: Hyukjin Kwon --- .../test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala index 92cf574781f12..f3eaa898e59ac 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala @@ -62,7 +62,7 @@ class TPCDSQueryTestSuite extends QueryTest with TPCDSBase with SQLQueryTestHelp // To make output results deterministic override protected def sparkConf: SparkConf = super.sparkConf - .set(SQLConf.SHUFFLE_PARTITIONS.key, 4.toString) + .set(SQLConf.SHUFFLE_PARTITIONS.key, 16.toString) protected override def createSparkSession: TestSparkSession = { new TestSparkSession(new SparkContext("local[1]", this.getClass.getSimpleName, sparkConf)) From 84ed6142ef5f4caa4ef94685a8fc5e0105231aea Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 26 Jul 2022 15:03:17 +0900 Subject: [PATCH 405/535] [SPARK-39856][SQL][TESTS][FOLLOW-UP] Increase the number of partitions in TPC-DS build to avoid out-of-memory ### What changes were proposed in this pull request? This PR increases the number of partitions further more (see also https://github.com/apache/spark/pull/37270) ### Why are the changes needed? To make the build pass. At least, two builds (https://github.com/apache/spark/runs/7500542538?check_suite_focus=true and https://github.com/apache/spark/runs/7511748355?check_suite_focus=true) passed after https://github.com/apache/spark/pull/37273. I assume that the number of partitions helps, and this PR increases some more. ### Does this PR introduce _any_ user-facing change? No, test and dev-only. ### How was this patch tested? It's tested in https://github.com/LuciferYang/spark/runs/7497163716?check_suite_focus=true Closes #37286 from HyukjinKwon/SPARK-39856-follwup. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 72b55ccf8327c00e173ab6130fdb428ad0d5aacc) Signed-off-by: Hyukjin Kwon --- .../test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala index f3eaa898e59ac..9affe827bc1a6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala @@ -62,7 +62,7 @@ class TPCDSQueryTestSuite extends QueryTest with TPCDSBase with SQLQueryTestHelp // To make output results deterministic override protected def sparkConf: SparkConf = super.sparkConf - .set(SQLConf.SHUFFLE_PARTITIONS.key, 16.toString) + .set(SQLConf.SHUFFLE_PARTITIONS.key, 32.toString) protected override def createSparkSession: TestSparkSession = { new TestSparkSession(new SparkContext("local[1]", this.getClass.getSimpleName, sparkConf)) From c9d56758a8c28a44161f63eb5c8763ab92616a56 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 26 Jul 2022 18:25:50 +0900 Subject: [PATCH 406/535] [SPARK-39856][TESTS][INFRA] Skip q72 at TPC-DS build at GitHub Actions ### What changes were proposed in this pull request? This PR reverts https://github.com/apache/spark/commit/7358253755762f9bfe6cedc1a50ec14616cfeace, https://github.com/apache/spark/commit/ae1f6a26ed39b297ace8d6c9420b72a3c01a3291 and https://github.com/apache/spark/commit/72b55ccf8327c00e173ab6130fdb428ad0d5aacc because they do not help fixing the TPC-DS build. In addition, this PR skips the problematic query in GitHub Actions to avoid OOM. ### Why are the changes needed? To make the build pass. ### Does this PR introduce _any_ user-facing change? No, dev and test-only. ### How was this patch tested? CI in this PR should test it out. Closes #37289 from HyukjinKwon/SPARK-39856-followup. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit de9a4b0747a4127e320f80f5e1bf431429da70a9) Signed-off-by: Hyukjin Kwon --- .../apache/spark/sql/TPCDSQuerySuite.scala | 6 +++-- .../spark/sql/TPCDSQueryTestSuite.scala | 23 +++++++++++-------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala index 22e1b838f3f3f..8c4d25a7eb988 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala @@ -29,7 +29,8 @@ import org.apache.spark.tags.ExtendedSQLTest @ExtendedSQLTest class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSBase { - tpcdsQueries.foreach { name => + // q72 is skipped due to GitHub Actions' memory limit. + tpcdsQueries.filterNot(sys.env.contains("GITHUB_ACTIONS") && _ == "q72").foreach { name => val queryString = resourceToString(s"tpcds/$name.sql", classLoader = Thread.currentThread().getContextClassLoader) test(name) { @@ -39,7 +40,8 @@ class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSBase { } } - tpcdsQueriesV2_7_0.foreach { name => + // q72 is skipped due to GitHub Actions' memory limit. + tpcdsQueriesV2_7_0.filterNot(sys.env.contains("GITHUB_ACTIONS") && _ == "q72").foreach { name => val queryString = resourceToString(s"tpcds-v2.7.0/$name.sql", classLoader = Thread.currentThread().getContextClassLoader) test(s"$name-v2.7") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala index 9affe827bc1a6..8019fc98a52f2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala @@ -62,7 +62,7 @@ class TPCDSQueryTestSuite extends QueryTest with TPCDSBase with SQLQueryTestHelp // To make output results deterministic override protected def sparkConf: SparkConf = super.sparkConf - .set(SQLConf.SHUFFLE_PARTITIONS.key, 32.toString) + .set(SQLConf.SHUFFLE_PARTITIONS.key, "1") protected override def createSparkSession: TestSparkSession = { new TestSparkSession(new SparkContext("local[1]", this.getClass.getSimpleName, sparkConf)) @@ -105,6 +105,7 @@ class TPCDSQueryTestSuite extends QueryTest with TPCDSBase with SQLQueryTestHelp query: String, goldenFile: File, conf: Map[String, String]): Unit = { + val shouldSortResults = sortMergeJoinConf != conf // Sort for other joins withSQLConf(conf.toSeq: _*) { try { val (schema, output) = handleExceptions(getNormalizedResult(spark, query)) @@ -142,15 +143,17 @@ class TPCDSQueryTestSuite extends QueryTest with TPCDSBase with SQLQueryTestHelp assertResult(expectedSchema, s"Schema did not match\n$queryString") { schema } - // Truncate precisions because they can be vary per how the shuffle is performed. - val expectSorted = expectedOutput.split("\n").sorted.map(_.trim) - .mkString("\n").replaceAll("\\s+$", "") - .replaceAll("""([0-9]+.[0-9]{10})([0-9]*)""", "$1") - val outputSorted = output.sorted.map(_.trim).mkString("\n") - .replaceAll("\\s+$", "") - .replaceAll("""([0-9]+.[0-9]{10})([0-9]*)""", "$1") - assertResult(expectSorted, s"Result did not match\n$queryString") { - outputSorted + if (shouldSortResults) { + val expectSorted = expectedOutput.split("\n").sorted.map(_.trim) + .mkString("\n").replaceAll("\\s+$", "") + val outputSorted = output.sorted.map(_.trim).mkString("\n").replaceAll("\\s+$", "") + assertResult(expectSorted, s"Result did not match\n$queryString") { + outputSorted + } + } else { + assertResult(expectedOutput, s"Result did not match\n$queryString") { + outputString + } } } catch { case e: Throwable => From 9fdd097aa6c05e7ecfd33dccad876a00d96b6ddf Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Wed, 27 Jul 2022 09:26:47 +0900 Subject: [PATCH 407/535] [SPARK-39879][SQL][TESTS] Reduce local-cluster maximum memory size in `BroadcastJoinSuite*` and `HiveSparkSubmitSuite` ### What changes were proposed in this pull request? This pr change `local-cluster[2, 1, 1024]` in `BroadcastJoinSuite*` and `HiveSparkSubmitSuite` to `local-cluster[2, 1, 512]` to reduce test maximum memory usage. ### Why are the changes needed? Reduce the maximum memory usage of test cases. ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? Should monitor CI Closes #37298 from LuciferYang/reduce-local-cluster-memory. Authored-by: yangjie01 Signed-off-by: Hyukjin Kwon (cherry picked from commit 01d41e7de418d0a40db7b16ddd0d8546f0794d17) Signed-off-by: Hyukjin Kwon --- .../execution/joins/BroadcastJoinSuite.scala | 4 +- .../spark/sql/hive/HiveSparkSubmitSuite.scala | 43 +++++++++++++------ 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala index 256e942620272..2d553d2b84f61 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.joins import scala.reflect.ClassTag import org.apache.spark.AccumulatorSuite +import org.apache.spark.internal.config.EXECUTOR_MEMORY import org.apache.spark.sql.{Dataset, QueryTest, Row, SparkSession} import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BitwiseAnd, BitwiseOr, Cast, Expression, Literal, ShiftLeft} import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide} @@ -56,7 +57,8 @@ abstract class BroadcastJoinSuiteBase extends QueryTest with SQLTestUtils override def beforeAll(): Unit = { super.beforeAll() spark = SparkSession.builder() - .master("local-cluster[2,1,1024]") + .master("local-cluster[2,1,512]") + .config(EXECUTOR_MEMORY.key, "512m") .appName("testing") .getOrCreate() } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala index 170cf4898f314..fc8d6e61a0d0e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala @@ -33,6 +33,7 @@ import org.scalatest.time.SpanSugar._ import org.apache.spark._ import org.apache.spark.deploy.SparkSubmitTestUtils import org.apache.spark.internal.Logging +import org.apache.spark.internal.config.EXECUTOR_MEMORY import org.apache.spark.internal.config.UI.UI_ENABLED import org.apache.spark.sql.{QueryTest, Row, SparkSession} import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} @@ -73,7 +74,8 @@ class HiveSparkSubmitSuite val args = Seq( "--class", TemporaryHiveUDFTest.getClass.getName.stripSuffix("$"), "--name", "TemporaryHiveUDFTest", - "--master", "local-cluster[2,1,1024]", + "--master", "local-cluster[2,1,512]", + "--conf", s"${EXECUTOR_MEMORY.key}=512m", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", "--driver-java-options", "-Dderby.system.durability=test", @@ -90,7 +92,8 @@ class HiveSparkSubmitSuite val args = Seq( "--class", PermanentHiveUDFTest1.getClass.getName.stripSuffix("$"), "--name", "PermanentHiveUDFTest1", - "--master", "local-cluster[2,1,1024]", + "--master", "local-cluster[2,1,512]", + "--conf", s"${EXECUTOR_MEMORY.key}=512m", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", "--driver-java-options", "-Dderby.system.durability=test", @@ -107,7 +110,8 @@ class HiveSparkSubmitSuite val args = Seq( "--class", PermanentHiveUDFTest2.getClass.getName.stripSuffix("$"), "--name", "PermanentHiveUDFTest2", - "--master", "local-cluster[2,1,1024]", + "--master", "local-cluster[2,1,512]", + "--conf", s"${EXECUTOR_MEMORY.key}=512m", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", "--driver-java-options", "-Dderby.system.durability=test", @@ -126,7 +130,8 @@ class HiveSparkSubmitSuite val args = Seq( "--class", SparkSubmitClassLoaderTest.getClass.getName.stripSuffix("$"), "--name", "SparkSubmitClassLoaderTest", - "--master", "local-cluster[2,1,1024]", + "--master", "local-cluster[2,1,512]", + "--conf", s"${EXECUTOR_MEMORY.key}=512m", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", "--driver-java-options", "-Dderby.system.durability=test", @@ -141,7 +146,8 @@ class HiveSparkSubmitSuite val args = Seq( "--class", SparkSQLConfTest.getClass.getName.stripSuffix("$"), "--name", "SparkSQLConfTest", - "--master", "local-cluster[2,1,1024]", + "--master", "local-cluster[2,1,512]", + "--conf", s"${EXECUTOR_MEMORY.key}=512m", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", "--conf", "spark.sql.hive.metastore.version=0.12", @@ -179,7 +185,8 @@ class HiveSparkSubmitSuite val args = Seq( "--class", SPARK_9757.getClass.getName.stripSuffix("$"), "--name", "SparkSQLConfTest", - "--master", "local-cluster[2,1,1024]", + "--master", "local-cluster[2,1,512]", + "--conf", s"${EXECUTOR_MEMORY.key}=512m", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", "--driver-java-options", "-Dderby.system.durability=test", @@ -192,7 +199,8 @@ class HiveSparkSubmitSuite val args = Seq( "--class", SPARK_11009.getClass.getName.stripSuffix("$"), "--name", "SparkSQLConfTest", - "--master", "local-cluster[2,1,1024]", + "--master", "local-cluster[2,1,512]", + "--conf", s"${EXECUTOR_MEMORY.key}=512m", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", "--driver-java-options", "-Dderby.system.durability=test", @@ -205,7 +213,8 @@ class HiveSparkSubmitSuite val args = Seq( "--class", SPARK_14244.getClass.getName.stripSuffix("$"), "--name", "SparkSQLConfTest", - "--master", "local-cluster[2,1,1024]", + "--master", "local-cluster[2,1,512]", + "--conf", s"${EXECUTOR_MEMORY.key}=512m", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", "--driver-java-options", "-Dderby.system.durability=test", @@ -218,7 +227,8 @@ class HiveSparkSubmitSuite val args = Seq( "--class", SetWarehouseLocationTest.getClass.getName.stripSuffix("$"), "--name", "SetSparkWarehouseLocationTest", - "--master", "local-cluster[2,1,1024]", + "--master", "local-cluster[2,1,512]", + "--conf", s"${EXECUTOR_MEMORY.key}=512m", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", "--driver-java-options", "-Dderby.system.durability=test", @@ -255,7 +265,8 @@ class HiveSparkSubmitSuite val args = Seq( "--class", SetWarehouseLocationTest.getClass.getName.stripSuffix("$"), "--name", "SetHiveWarehouseLocationTest", - "--master", "local-cluster[2,1,1024]", + "--master", "local-cluster[2,1,512]", + "--conf", s"${EXECUTOR_MEMORY.key}=512m", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", "--conf", s"spark.sql.test.expectedWarehouseDir=$hiveWarehouseLocation", @@ -313,7 +324,8 @@ class HiveSparkSubmitSuite val args = Seq( "--class", SPARK_18360.getClass.getName.stripSuffix("$"), "--name", "SPARK-18360", - "--master", "local-cluster[2,1,1024]", + "--master", "local-cluster[2,1,512]", + "--conf", s"${EXECUTOR_MEMORY.key}=512m", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", "--driver-java-options", "-Dderby.system.durability=test", @@ -327,7 +339,8 @@ class HiveSparkSubmitSuite val argsForCreateTable = Seq( "--class", SPARK_18989_CREATE_TABLE.getClass.getName.stripSuffix("$"), "--name", "SPARK-18947", - "--master", "local-cluster[2,1,1024]", + "--master", "local-cluster[2,1,512]", + "--conf", s"${EXECUTOR_MEMORY.key}=512m", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", "--jars", HiveTestJars.getHiveContribJar().getCanonicalPath, @@ -337,7 +350,8 @@ class HiveSparkSubmitSuite val argsForShowTables = Seq( "--class", SPARK_18989_DESC_TABLE.getClass.getName.stripSuffix("$"), "--name", "SPARK-18947", - "--master", "local-cluster[2,1,1024]", + "--master", "local-cluster[2,1,512]", + "--conf", s"${EXECUTOR_MEMORY.key}=512m", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", unusedJar.toString) @@ -358,7 +372,8 @@ class HiveSparkSubmitSuite val args = Seq( "--class", SPARK_34772.getClass.getName.stripSuffix("$"), "--name", "SPARK-34772", - "--master", "local-cluster[2,1,1024]", + "--master", "local-cluster[2,1,512]", + "--conf", s"${EXECUTOR_MEMORY.key}=512m", "--conf", s"${LEGACY_TIME_PARSER_POLICY.key}=LEGACY", "--conf", s"${HiveUtils.HIVE_METASTORE_VERSION.key}=1.2.1", "--conf", s"${HiveUtils.HIVE_METASTORE_JARS.key}=maven", From ee8cafbd0ff36116a212ac99fdf65b24c486cae8 Mon Sep 17 00:00:00 2001 From: Kris Mok Date: Thu, 28 Jul 2022 08:49:33 +0900 Subject: [PATCH 408/535] [SPARK-39839][SQL] Handle special case of null variable-length Decimal with non-zero offsetAndSize in UnsafeRow structural integrity check ### What changes were proposed in this pull request? Update the `UnsafeRow` structural integrity check in `UnsafeRowUtils.validateStructuralIntegrity` to handle a special case with null variable-length DecimalType value. ### Why are the changes needed? The check should follow the format that `UnsafeRowWriter` produces. In general, `UnsafeRowWriter` clears out a field with zero when the field is set to be null, c.f. `UnsafeRowWriter.setNullAt(ordinal)` and `UnsafeRow.setNullAt(ordinal)`. But there's a special case for `DecimalType` values: this is the only type that is both: - can be fixed-length or variable-length, depending on the precision, and - is mutable in `UnsafeRow`. To support a variable-length `DecimalType` to be mutable in `UnsafeRow`, the `UnsafeRowWriter` always leaves a 16-byte space in the variable-length section of the `UnsafeRow` (tail end of the row), regardless of whether the `Decimal` value being written is null or not. In the fixed-length part of the field, it would be an "OffsetAndSize", and the `offset` part always points to the start offset of the variable-length part of the field, while the `size` part will either be `0` for the null value, or `1` to at most `16` for non-null values. When `setNullAt(ordinal)` is called instead of passing a null value to `write(int, Decimal, int, int)`, however, the `offset` part gets zero'd out and this field stops being mutable. There's a comment on `UnsafeRow.setDecimal` that mentions to keep this field able to support updates, `setNullAt(ordinal)` cannot be called, but there's no code enforcement of that. So we need to recognize that in the structural integrity check and allow variable-length `DecimalType` to have non-zero field even for null. Note that for non-null values, the existing check does conform to the format from `UnsafeRowWriter`. It's only null value of variable-length `DecimalType` that'd trigger a bug, which can affect Structured Streaming's checkpoint file read where this check is applied. ### Does this PR introduce _any_ user-facing change? Yes, previously the `UnsafeRow` structural integrity validation will return false positive for correct data, when there's a null value in a variable-length `DecimalType` field. The fix will no longer return false positive. Because the Structured Streaming checkpoint file validation uses this check, previously a good checkpoint file may be rejected by the check, and the only workaround is to disable the check; with the fix, the correct checkpoint file will be allowed to load. ### How was this patch tested? Added new test case in `UnsafeRowUtilsSuite` Closes #37252 from rednaxelafx/fix-unsaferow-validation. Authored-by: Kris Mok Signed-off-by: Jungtaek Lim (cherry picked from commit c608ae2fc6a3a50f2e67f2a3dad8d4e4be1aaf9f) Signed-off-by: Jungtaek Lim --- .../sql/catalyst/util/UnsafeRowUtils.scala | 44 ++++++++++++++++--- .../catalyst/util/UnsafeRowUtilsSuite.scala | 31 ++++++++++++- 2 files changed, 67 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtils.scala index 37a34fac66364..48db0c7d971c5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtils.scala @@ -27,8 +27,15 @@ object UnsafeRowUtils { * - schema.fields.length == row.numFields should always be true * - UnsafeRow.calculateBitSetWidthInBytes(row.numFields) < row.getSizeInBytes should always be * true if the expectedSchema contains at least one field. - * - For variable-length fields: if null bit says it's null then don't do anything, else extract - * offset and size: + * - For variable-length fields: + * - if null bit says it's null, then + * - in general the offset-and-size should be zero + * - special case: variable-length DecimalType is considered mutable in UnsafeRow, and to + * support that, the offset is set to point to the variable-length part like a non-null + * value, while the size is set to zero to signal that it's a null value. The offset + * may also be set to zero, in which case this variable-length Decimal no longer supports + * being mutable in the UnsafeRow. + * - otherwise the field is not null, then extract offset and size: * 1) 0 <= size < row.getSizeInBytes should always be true. We can be even more precise than * this, where the upper bound of size can only be as big as the variable length part of * the row. @@ -52,9 +59,7 @@ object UnsafeRowUtils { var varLenFieldsSizeInBytes = 0 expectedSchema.fields.zipWithIndex.foreach { case (field, index) if !UnsafeRow.isFixedLength(field.dataType) && !row.isNullAt(index) => - val offsetAndSize = row.getLong(index) - val offset = (offsetAndSize >> 32).toInt - val size = offsetAndSize.toInt + val (offset, size) = getOffsetAndSize(row, index) if (size < 0 || offset < bitSetWidthInBytes + 8 * row.numFields || offset + size > rowSizeInBytes) { return false @@ -74,8 +79,26 @@ object UnsafeRowUtils { if ((row.getLong(index) >> 32) != 0L) return false case _ => } - case (_, index) if row.isNullAt(index) => - if (row.getLong(index) != 0L) return false + case (field, index) if row.isNullAt(index) => + field.dataType match { + case dt: DecimalType if !UnsafeRow.isFixedLength(dt) => + // See special case in UnsafeRowWriter.write(int, Decimal, int, int) and + // UnsafeRow.setDecimal(int, Decimal, int). + // A variable-length Decimal may be marked as null while having non-zero offset and + // zero length. This allows the field to be updated (i.e. mutable variable-length data) + + // Check the integrity of null value of variable-length DecimalType in UnsafeRow: + // 1. size must be zero + // 2. offset may be zero, in which case this variable-length field is no longer mutable + // 3. otherwise offset is non-zero, range check it the same way as a non-null value + val (offset, size) = getOffsetAndSize(row, index) + if (size != 0 || offset != 0 && + (offset < bitSetWidthInBytes + 8 * row.numFields || offset > rowSizeInBytes)) { + return false + } + case _ => + if (row.getLong(index) != 0L) return false + } case _ => } if (bitSetWidthInBytes + 8 * row.numFields + varLenFieldsSizeInBytes > rowSizeInBytes) { @@ -83,4 +106,11 @@ object UnsafeRowUtils { } true } + + def getOffsetAndSize(row: UnsafeRow, index: Int): (Int, Int) = { + val offsetAndSize = row.getLong(index) + val offset = (offsetAndSize >> 32).toInt + val size = offsetAndSize.toInt + (offset, size) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtilsSuite.scala index 4b6a3cfafd894..518d68ce1d285 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtilsSuite.scala @@ -17,9 +17,11 @@ package org.apache.spark.sql.catalyst.util +import java.math.{BigDecimal => JavaBigDecimal} + import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeProjection, UnsafeRow} -import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} +import org.apache.spark.sql.types.{Decimal, DecimalType, IntegerType, StringType, StructField, StructType} class UnsafeRowUtilsSuite extends SparkFunSuite { @@ -52,4 +54,31 @@ class UnsafeRowUtilsSuite extends SparkFunSuite { StructField("value2", IntegerType, false))) assert(!UnsafeRowUtils.validateStructuralIntegrity(testRow, invalidSchema)) } + + test("Handle special case for null variable-length Decimal") { + val schema = StructType(StructField("d", DecimalType(19, 0), nullable = true) :: Nil) + val unsafeRowProjection = UnsafeProjection.create(schema) + val row = unsafeRowProjection(new SpecificInternalRow(schema)) + + // row is empty at this point + assert(row.isNullAt(0) && UnsafeRowUtils.getOffsetAndSize(row, 0) == (16, 0)) + assert(UnsafeRowUtils.validateStructuralIntegrity(row, schema)) + + // set Decimal field to precision-overflowed value + val bigDecimalVal = Decimal(new JavaBigDecimal("12345678901234567890")) // precision=20, scale=0 + row.setDecimal(0, bigDecimalVal, 19) // should overflow and become null + assert(row.isNullAt(0) && UnsafeRowUtils.getOffsetAndSize(row, 0) == (16, 0)) + assert(UnsafeRowUtils.validateStructuralIntegrity(row, schema)) + + // set Decimal field to valid non-null value + val bigDecimalVal2 = Decimal(new JavaBigDecimal("1234567890123456789")) // precision=19, scale=0 + row.setDecimal(0, bigDecimalVal2, 19) // should succeed + assert(!row.isNullAt(0) && UnsafeRowUtils.getOffsetAndSize(row, 0) == (16, 8)) + assert(UnsafeRowUtils.validateStructuralIntegrity(row, schema)) + + // set Decimal field to null explicitly, after which this field no longer supports updating + row.setNullAt(0) + assert(row.isNullAt(0) && UnsafeRowUtils.getOffsetAndSize(row, 0) == (0, 0)) + assert(UnsafeRowUtils.validateStructuralIntegrity(row, schema)) + } } From 609efe1515fc03d04ebcfc4b7f25872c2cfbe185 Mon Sep 17 00:00:00 2001 From: huaxingao Date: Wed, 27 Jul 2022 18:07:35 -0700 Subject: [PATCH 409/535] [SPARK-39857][SQL][3.3] V2ExpressionBuilder uses the wrong LiteralValue data type for In predicate ### What changes were proposed in this pull request? When building V2 In `Predicate` in `V2ExpressionBuilder`, `InSet.dataType` (which is BooleanType) is used to build the `LiteralValue`, `InSet.child.dataType `should be used instead. back port https://github.com/apache/spark/pull/37271 to 3.3 ### Why are the changes needed? bug fix ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new test Closes #37324 from huaxingao/backport. Authored-by: huaxingao Signed-off-by: huaxingao --- .../catalyst/util/V2ExpressionBuilder.scala | 4 +- .../v2/DataSourceV2StrategySuite.scala | 282 +++++++++++++++++- 2 files changed, 280 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala index 23560ae1d098e..1edbb6d230f86 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala @@ -52,10 +52,10 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) { } else { Some(ref) } - case in @ InSet(child, hset) => + case InSet(child, hset) => generateExpression(child).map { v => val children = - (v +: hset.toSeq.map(elem => LiteralValue(elem, in.dataType))).toArray[V2Expression] + (v +: hset.toSeq.map(elem => LiteralValue(elem, child.dataType))).toArray[V2Expression] new V2Predicate("IN", children) } // Because we only convert In to InSet in Optimizer when there are more than certain diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala index 1a5a382afdc6b..6d355f7761546 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala @@ -18,16 +18,290 @@ package org.apache.spark.sql.execution.datasources.v2 import org.apache.spark.sql.catalyst.dsl.expressions._ -import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue} -import org.apache.spark.sql.connector.expressions.filter.Predicate +import org.apache.spark.sql.connector.expressions.filter.{And => V2And, Not => V2Not, Or => V2Or, Predicate} import org.apache.spark.sql.test.SharedSparkSession -import org.apache.spark.sql.types.BooleanType +import org.apache.spark.sql.types.{BooleanType, IntegerType, StringType, StructField, StructType} +import org.apache.spark.unsafe.types.UTF8String class DataSourceV2StrategySuite extends PlanTest with SharedSparkSession { + val attrInts = Seq( + $"cint".int, + $"c.int".int, + GetStructField($"a".struct(StructType( + StructField("cstr", StringType, nullable = true) :: + StructField("cint", IntegerType, nullable = true) :: Nil)), 1, None), + GetStructField($"a".struct(StructType( + StructField("c.int", IntegerType, nullable = true) :: + StructField("cstr", StringType, nullable = true) :: Nil)), 0, None), + GetStructField($"a.b".struct(StructType( + StructField("cstr1", StringType, nullable = true) :: + StructField("cstr2", StringType, nullable = true) :: + StructField("cint", IntegerType, nullable = true) :: Nil)), 2, None), + GetStructField($"a.b".struct(StructType( + StructField("c.int", IntegerType, nullable = true) :: Nil)), 0, None), + GetStructField(GetStructField($"a".struct(StructType( + StructField("cstr1", StringType, nullable = true) :: + StructField("b", StructType(StructField("cint", IntegerType, nullable = true) :: + StructField("cstr2", StringType, nullable = true) :: Nil)) :: Nil)), 1, None), 0, None) + ).zip(Seq( + "cint", + "`c.int`", // single level field that contains `dot` in name + "a.cint", // two level nested field + "a.`c.int`", // two level nested field, and nested level contains `dot` + "`a.b`.cint", // two level nested field, and top level contains `dot` + "`a.b`.`c.int`", // two level nested field, and both levels contain `dot` + "a.b.cint" // three level nested field + )) + + val attrStrs = Seq( + $"cstr".string, + $"c.str".string, + GetStructField($"a".struct(StructType( + StructField("cint", IntegerType, nullable = true) :: + StructField("cstr", StringType, nullable = true) :: Nil)), 1, None), + GetStructField($"a".struct(StructType( + StructField("c.str", StringType, nullable = true) :: + StructField("cint", IntegerType, nullable = true) :: Nil)), 0, None), + GetStructField($"a.b".struct(StructType( + StructField("cint1", IntegerType, nullable = true) :: + StructField("cint2", IntegerType, nullable = true) :: + StructField("cstr", StringType, nullable = true) :: Nil)), 2, None), + GetStructField($"a.b".struct(StructType( + StructField("c.str", StringType, nullable = true) :: Nil)), 0, None), + GetStructField(GetStructField($"a".struct(StructType( + StructField("cint1", IntegerType, nullable = true) :: + StructField("b", StructType(StructField("cstr", StringType, nullable = true) :: + StructField("cint2", IntegerType, nullable = true) :: Nil)) :: Nil)), 1, None), 0, None) + ).zip(Seq( + "cstr", + "`c.str`", // single level field that contains `dot` in name + "a.cstr", // two level nested field + "a.`c.str`", // two level nested field, and nested level contains `dot` + "`a.b`.cstr", // two level nested field, and top level contains `dot` + "`a.b`.`c.str`", // two level nested field, and both levels contain `dot` + "a.b.cstr" // three level nested field + )) + + test("translate simple expression") { attrInts.zip(attrStrs) + .foreach { case ((attrInt, intColName), (attrStr, strColName)) => + testTranslateFilter(EqualTo(attrInt, 1), + Some(new Predicate("=", Array(FieldReference(intColName), LiteralValue(1, IntegerType))))) + + testTranslateFilter(EqualNullSafe(attrInt, 1), + Some(new Predicate("<=>", Array(FieldReference(intColName), LiteralValue(1, IntegerType))))) + + testTranslateFilter(GreaterThan(attrInt, 1), + Some(new Predicate(">", Array(FieldReference(intColName), LiteralValue(1, IntegerType))))) + + testTranslateFilter(LessThan(attrInt, 1), + Some(new Predicate("<", Array(FieldReference(intColName), LiteralValue(1, IntegerType))))) + + testTranslateFilter(GreaterThanOrEqual(attrInt, 1), + Some(new Predicate(">=", Array(FieldReference(intColName), LiteralValue(1, IntegerType))))) + + testTranslateFilter(LessThanOrEqual(attrInt, 1), + Some(new Predicate("<=", Array(FieldReference(intColName), LiteralValue(1, IntegerType))))) + + testTranslateFilter(IsNull(attrInt), + Some(new Predicate("IS_NULL", Array(FieldReference(intColName))))) + testTranslateFilter(IsNotNull(attrInt), + Some(new Predicate("IS_NOT_NULL", Array(FieldReference(intColName))))) + + testTranslateFilter(InSet(attrInt, Set(1, 2, 3)), + Some(new Predicate("IN", Array(FieldReference(intColName), + LiteralValue(1, IntegerType), LiteralValue(2, IntegerType), + LiteralValue(3, IntegerType))))) + + testTranslateFilter(In(attrInt, Seq(1, 2, 3)), + Some(new Predicate("IN", Array(FieldReference(intColName), + LiteralValue(1, IntegerType), LiteralValue(2, IntegerType), + LiteralValue(3, IntegerType))))) + + // cint > 1 AND cint < 10 + testTranslateFilter(And( + GreaterThan(attrInt, 1), + LessThan(attrInt, 10)), + Some(new V2And( + new Predicate(">", Array(FieldReference(intColName), LiteralValue(1, IntegerType))), + new Predicate("<", Array(FieldReference(intColName), LiteralValue(10, IntegerType)))))) + + // cint >= 8 OR cint <= 2 + testTranslateFilter(Or( + GreaterThanOrEqual(attrInt, 8), + LessThanOrEqual(attrInt, 2)), + Some(new V2Or( + new Predicate(">=", Array(FieldReference(intColName), LiteralValue(8, IntegerType))), + new Predicate("<=", Array(FieldReference(intColName), LiteralValue(2, IntegerType)))))) + + testTranslateFilter(Not(GreaterThanOrEqual(attrInt, 8)), + Some(new V2Not(new Predicate(">=", Array(FieldReference(intColName), + LiteralValue(8, IntegerType)))))) + + testTranslateFilter(StartsWith(attrStr, "a"), + Some(new Predicate("STARTS_WITH", Array(FieldReference(strColName), + LiteralValue(UTF8String.fromString("a"), StringType))))) + + testTranslateFilter(EndsWith(attrStr, "a"), + Some(new Predicate("ENDS_WITH", Array(FieldReference(strColName), + LiteralValue(UTF8String.fromString("a"), StringType))))) + + testTranslateFilter(Contains(attrStr, "a"), + Some(new Predicate("CONTAINS", Array(FieldReference(strColName), + LiteralValue(UTF8String.fromString("a"), StringType))))) + } + } + + test("translate complex expression") { + attrInts.foreach { case (attrInt, intColName) => + + // ABS(cint) - 2 <= 1 + testTranslateFilter(LessThanOrEqual( + // Expressions are not supported + // Functions such as 'Abs' are not supported + Subtract(Abs(attrInt), 2), 1), None) + + // (cin1 > 1 AND cint < 10) OR (cint > 50 AND cint > 100) + testTranslateFilter(Or( + And( + GreaterThan(attrInt, 1), + LessThan(attrInt, 10) + ), + And( + GreaterThan(attrInt, 50), + LessThan(attrInt, 100))), + Some(new V2Or( + new V2And( + new Predicate(">", Array(FieldReference(intColName), LiteralValue(1, IntegerType))), + new Predicate("<", Array(FieldReference(intColName), LiteralValue(10, IntegerType)))), + new V2And( + new Predicate(">", Array(FieldReference(intColName), LiteralValue(50, IntegerType))), + new Predicate("<", Array(FieldReference(intColName), + LiteralValue(100, IntegerType))))) + ) + ) + + // (cint > 1 AND ABS(cint) < 10) OR (cint < 50 AND cint > 100) + testTranslateFilter(Or( + And( + GreaterThan(attrInt, 1), + // Functions such as 'Abs' are not supported + LessThan(Abs(attrInt), 10) + ), + And( + GreaterThan(attrInt, 50), + LessThan(attrInt, 100))), None) + + // NOT ((cint <= 1 OR ABS(cint) >= 10) AND (cint <= 50 OR cint >= 100)) + testTranslateFilter(Not(And( + Or( + LessThanOrEqual(attrInt, 1), + // Functions such as 'Abs' are not supported + GreaterThanOrEqual(Abs(attrInt), 10) + ), + Or( + LessThanOrEqual(attrInt, 50), + GreaterThanOrEqual(attrInt, 100)))), None) + + // (cint = 1 OR cint = 10) OR (cint > 0 OR cint < -10) + testTranslateFilter(Or( + Or( + EqualTo(attrInt, 1), + EqualTo(attrInt, 10) + ), + Or( + GreaterThan(attrInt, 0), + LessThan(attrInt, -10))), + Some(new V2Or( + new V2Or( + new Predicate("=", Array(FieldReference(intColName), LiteralValue(1, IntegerType))), + new Predicate("=", Array(FieldReference(intColName), LiteralValue(10, IntegerType)))), + new V2Or( + new Predicate(">", Array(FieldReference(intColName), LiteralValue(0, IntegerType))), + new Predicate("<", Array(FieldReference(intColName), LiteralValue(-10, IntegerType))))) + ) + ) + + // (cint = 1 OR ABS(cint) = 10) OR (cint > 0 OR cint < -10) + testTranslateFilter(Or( + Or( + EqualTo(attrInt, 1), + // Functions such as 'Abs' are not supported + EqualTo(Abs(attrInt), 10) + ), + Or( + GreaterThan(attrInt, 0), + LessThan(attrInt, -10))), None) + + // In end-to-end testing, conjunctive predicate should has been split + // before reaching DataSourceStrategy.translateFilter. + // This is for UT purpose to test each [[case]]. + // (cint > 1 AND cint < 10) AND (cint = 6 AND cint IS NOT NULL) + testTranslateFilter(And( + And( + GreaterThan(attrInt, 1), + LessThan(attrInt, 10) + ), + And( + EqualTo(attrInt, 6), + IsNotNull(attrInt))), + Some(new V2And( + new V2And( + new Predicate(">", Array(FieldReference(intColName), LiteralValue(1, IntegerType))), + new Predicate("<", Array(FieldReference(intColName), LiteralValue(10, IntegerType)))), + new V2And( + new Predicate("=", Array(FieldReference(intColName), LiteralValue(6, IntegerType))), + new Predicate("IS_NOT_NULL", Array(FieldReference(intColName))))) + ) + ) + + // (cint > 1 AND cint < 10) AND (ABS(cint) = 6 AND cint IS NOT NULL) + testTranslateFilter(And( + And( + GreaterThan(attrInt, 1), + LessThan(attrInt, 10) + ), + And( + // Functions such as 'Abs' are not supported + EqualTo(Abs(attrInt), 6), + IsNotNull(attrInt))), None) + + // (cint > 1 OR cint < 10) AND (cint = 6 OR cint IS NOT NULL) + testTranslateFilter(And( + Or( + GreaterThan(attrInt, 1), + LessThan(attrInt, 10) + ), + Or( + EqualTo(attrInt, 6), + IsNotNull(attrInt))), + Some(new V2And( + new V2Or( + new Predicate(">", Array(FieldReference(intColName), LiteralValue(1, IntegerType))), + new Predicate("<", Array(FieldReference(intColName), LiteralValue(10, IntegerType)))), + new V2Or( + new Predicate("=", Array(FieldReference(intColName), LiteralValue(6, IntegerType))), + new Predicate("IS_NOT_NULL", Array(FieldReference(intColName))))) + ) + ) + + // (cint > 1 OR cint < 10) AND (cint = 6 OR cint IS NOT NULL) + testTranslateFilter(And( + Or( + GreaterThan(attrInt, 1), + LessThan(attrInt, 10) + ), + Or( + // Functions such as 'Abs' are not supported + EqualTo(Abs(attrInt), 6), + IsNotNull(attrInt))), None) + } + } + test("SPARK-36644: Push down boolean column filter") { - testTranslateFilter(Symbol("col").boolean, + testTranslateFilter($"col".boolean, Some(new Predicate("=", Array(FieldReference("col"), LiteralValue(true, BooleanType))))) } From 19991047d5b5316412d8b1763807c5945a705bff Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 28 Jul 2022 11:26:34 -0700 Subject: [PATCH 410/535] [SPARK-39865][SQL][3.3] Show proper error messages on the overflow errors of table insert ### What changes were proposed in this pull request? In Spark 3.3, the error message of ANSI CAST is improved. However, the table insertion is using the same CAST expression: ``` > create table tiny(i tinyint); > insert into tiny values (1000); org.apache.spark.SparkArithmeticException[CAST_OVERFLOW]: The value 1000 of the type "INT" cannot be cast to "TINYINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. ``` Showing the hint of `If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error` doesn't help at all. This PR is to fix the error message. After changes, the error message of this example will become: ``` org.apache.spark.SparkArithmeticException: [CAST_OVERFLOW_IN_TABLE_INSERT] Fail to insert a value of "INT" type into the "TINYINT" type column `i` due to an overflow. Use `try_cast` on the input value to tolerate overflow and return NULL instead. ``` ### Why are the changes needed? Show proper error messages on the overflow errors of table insert. The current message is super confusing. ### Does this PR introduce _any_ user-facing change? Yes, after changes it show proper error messages on the overflow errors of table insert. ### How was this patch tested? Unit test Closes #37311 from gengliangwang/PR_TOOL_PICK_PR_37283_BRANCH-3.3. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang --- .../main/resources/error/error-classes.json | 4 ++ .../analysis/TableOutputResolver.scala | 25 ++++++++++- .../spark/sql/catalyst/expressions/Cast.scala | 44 +++++++++++++++++++ .../sql/errors/QueryExecutionErrors.scala | 13 ++++++ .../spark/sql/sources/InsertSuite.scala | 20 +++++---- 5 files changed, 95 insertions(+), 11 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 31ec5aaa05e33..89a9d5af587d7 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -33,6 +33,10 @@ "message" : [ "The value of the type cannot be cast to due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set to \"false\" to bypass this error." ], "sqlState" : "22005" }, + "CAST_OVERFLOW_IN_TABLE_INSERT" : { + "message" : [ "Fail to insert a value of type into the type column due to an overflow. Use `try_cast` on the input value to tolerate overflow and return NULL instead." ], + "sqlState" : "22005" + }, "CONCURRENT_QUERY" : { "message" : [ "Another instance of this query was just started by a concurrent session." ] }, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala index 2cd069e5858da..c723a018a6c53 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala @@ -26,7 +26,7 @@ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy -import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType} +import org.apache.spark.sql.types.{ArrayType, DataType, DecimalType, IntegralType, MapType, StructType} object TableOutputResolver { def resolveOutputColumns( @@ -220,6 +220,21 @@ object TableOutputResolver { } } + private def containsIntegralOrDecimalType(dt: DataType): Boolean = dt match { + case _: IntegralType | _: DecimalType => true + case a: ArrayType => containsIntegralOrDecimalType(a.elementType) + case m: MapType => + containsIntegralOrDecimalType(m.keyType) || containsIntegralOrDecimalType(m.valueType) + case s: StructType => + s.fields.exists(sf => containsIntegralOrDecimalType(sf.dataType)) + case _ => false + } + + private def canCauseCastOverflow(cast: AnsiCast): Boolean = { + containsIntegralOrDecimalType(cast.dataType) && + !Cast.canUpCast(cast.child.dataType, cast.dataType) + } + private def checkField( tableAttr: Attribute, queryExpr: NamedExpression, @@ -235,7 +250,13 @@ object TableOutputResolver { } else { val casted = storeAssignmentPolicy match { case StoreAssignmentPolicy.ANSI => - AnsiCast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone)) + val cast = AnsiCast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone)) + if (canCauseCastOverflow(cast)) { + CheckOverflowInTableInsert(cast, tableAttr.name) + } else { + cast + } + case StoreAssignmentPolicy.LEGACY => Cast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone), ansiEnabled = false) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 4b7c7b479d4cc..014c2be731983 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -21,6 +21,7 @@ import java.time.{ZoneId, ZoneOffset} import java.util.Locale import java.util.concurrent.TimeUnit._ +import org.apache.spark.SparkArithmeticException import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion} import org.apache.spark.sql.catalyst.expressions.Cast.resolvableNullability @@ -2351,3 +2352,46 @@ case class UpCast(child: Expression, target: AbstractDataType, walkedTypePath: S override protected def withNewChildInternal(newChild: Expression): UpCast = copy(child = newChild) } + +/** + * Casting a numeric value as another numeric type in store assignment. It can capture the + * arithmetic errors and show proper error messages to users. + */ +case class CheckOverflowInTableInsert(child: AnsiCast, columnName: String) extends UnaryExpression { + override protected def withNewChildInternal(newChild: Expression): Expression = + copy(child = newChild.asInstanceOf[AnsiCast]) + + override def eval(input: InternalRow): Any = try { + child.eval(input) + } catch { + case e: SparkArithmeticException => + QueryExecutionErrors.castingCauseOverflowErrorInTableInsert( + child.child.dataType, + child.dataType, + columnName) + } + + override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { + val childGen = child.genCode(ctx) + val exceptionClass = classOf[SparkArithmeticException].getCanonicalName + val fromDt = + ctx.addReferenceObj("from", child.child.dataType, child.child.dataType.getClass.getName) + val toDt = ctx.addReferenceObj("to", child.dataType, child.dataType.getClass.getName) + val col = ctx.addReferenceObj("colName", columnName, "java.lang.String") + // scalastyle:off line.size.limit + ev.copy(code = code""" + boolean ${ev.isNull} = true; + ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)}; + try { + ${childGen.code} + ${ev.isNull} = ${childGen.isNull}; + ${ev.value} = ${childGen.value}; + } catch ($exceptionClass e) { + throw QueryExecutionErrors.castingCauseOverflowErrorInTableInsert($fromDt, $toDt, $col); + }""" + ) + // scalastyle:on line.size.limit + } + + override def dataType: DataType = child.dataType +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index b1378275b8556..1db38d854a9e1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -93,6 +93,19 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { toSQLConf(SQLConf.ANSI_ENABLED.key))) } + def castingCauseOverflowErrorInTableInsert( + from: DataType, + to: DataType, + columnName: String): ArithmeticException = { + new SparkArithmeticException( + errorClass = "CAST_OVERFLOW_IN_TABLE_INSERT", + messageParameters = Array( + toSQLType(from), + toSQLType(to), + toSQLId(columnName)) + ) + } + def cannotChangeDecimalPrecisionError( value: Decimal, decimalPrecision: Int, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index b293307d4ea95..679a5eb2661ff 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -710,18 +710,18 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { withTable("t") { sql("create table t(b int) using parquet") val outOfRangeValue1 = (Int.MaxValue + 1L).toString + val expectedMsg = "Fail to insert a value of \"BIGINT\" type into the \"INT\" type column" + + " `b` due to an overflow." var msg = intercept[SparkException] { sql(s"insert into t values($outOfRangeValue1)") }.getCause.getMessage - assert(msg.contains( - s"""The value ${outOfRangeValue1}L of the type "BIGINT" cannot be cast to "INT"""")) + assert(msg.contains(expectedMsg)) val outOfRangeValue2 = (Int.MinValue - 1L).toString msg = intercept[SparkException] { sql(s"insert into t values($outOfRangeValue2)") }.getCause.getMessage - assert(msg.contains( - s"""The value ${outOfRangeValue2}L of the type "BIGINT" cannot be cast to "INT"""")) + assert(msg.contains(expectedMsg)) } } } @@ -732,18 +732,18 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { withTable("t") { sql("create table t(b long) using parquet") val outOfRangeValue1 = Math.nextUp(Long.MaxValue) + val expectedMsg = "Fail to insert a value of \"DOUBLE\" type into the \"BIGINT\" type " + + "column `b` due to an overflow." var msg = intercept[SparkException] { sql(s"insert into t values(${outOfRangeValue1}D)") }.getCause.getMessage - assert(msg.contains( - s"""The value ${outOfRangeValue1}D of the type "DOUBLE" cannot be cast to "BIGINT"""")) + assert(msg.contains(expectedMsg)) val outOfRangeValue2 = Math.nextDown(Long.MinValue) msg = intercept[SparkException] { sql(s"insert into t values(${outOfRangeValue2}D)") }.getCause.getMessage - assert(msg.contains( - s"""The value ${outOfRangeValue2}D of the type "DOUBLE" cannot be cast to "BIGINT"""")) + assert(msg.contains(expectedMsg)) } } } @@ -754,10 +754,12 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { withTable("t") { sql("create table t(b decimal(3,2)) using parquet") val outOfRangeValue = "123.45" + val expectedMsg = "Fail to insert a value of \"DECIMAL(5,2)\" type into the " + + "\"DECIMAL(3,2)\" type column `b` due to an overflow." val msg = intercept[SparkException] { sql(s"insert into t values(${outOfRangeValue})") }.getCause.getMessage - assert(msg.contains("cannot be represented as Decimal(3, 2)")) + assert(msg.contains(expectedMsg)) } } } From 301d6e39e22297b3771ef502c3204c5fd84f2f9f Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 1 Aug 2022 09:12:40 +0900 Subject: [PATCH 411/535] [SPARK-39857][SQL][TESTS][FOLLOW-UP] Make "translate complex expression" pass with ANSI mode on ### What changes were proposed in this pull request? This PR fixes `translate complex expression` to pass with ANSI mode on. We do push `Abs` with ANSI mode on (https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala#L93): ``` [info] - translate complex expression *** FAILED *** (22 milliseconds) [info] Expected None, but got Some((ABS(cint) - 2) <= 1) (DataSourceV2StrategySuite.scala:325) [info] org.scalatest.exceptions.TestFailedException: [info] at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:472) [info] at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:471) [info] at org.scalatest.funsuite.AnyFunSuite.newAssertionFailedException(AnyFunSuite.scala:1563) [info] at org.scalatest.Assertions.assertResult(Assertions.scala:867) [info] at org.scalatest.Assertions.assertResult$(Assertions.scala:863) [info] at org.scalatest.funsuite.AnyFunSuite.assertResult(AnyFunSuite.scala:1563) [info] at org.apache.spark.sql.execution.datasources.v2.DataSourceV2StrategySuite.testTranslateFilter(DataSourceV2StrategySuite.scala:325) [info] at org.apache.spark.sql.execution.datasources.v2.DataSourceV2StrategySuite.$anonfun$new$4(DataSourceV2StrategySuite.scala:176) [info] at org.apache.spark.sql.execution.datasources.v2.DataSourceV2StrategySuite.$anonfun$new$4$adapted(DataSourceV2StrategySuite.scala:170) [info] at scala.collection.immutable.List.foreach(List.scala:431) [info] at org.apache.spark.sql.execution.datasources.v2.DataSourceV2StrategySuite.$anonfun$new$3(DataSourceV2StrategySuite.scala:170) [info] at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) [info] at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) [info] at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) [info] at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) [info] at org.scalatest.Transformer.apply(Transformer.scala:22) [info] at org.scalatest.Transformer.apply(Transformer.scala:20) [info] at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:190) [info] at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:204) [info] at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:188) [info] at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:200) [info] at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:200) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:182) [info] at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:65) [info] at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234) [info] at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227) [info] at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:65) ``` https://github.com/apache/spark/runs/7595362617?check_suite_focus=true ### Why are the changes needed? To make the build pass with ANSI mode on. ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? Manually ran the unittest with ANSI mode on. Closes #37349 from HyukjinKwon/SPARK-39857-followup. Lead-authored-by: Hyukjin Kwon Co-authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit c211abe970d9e88fd25cd859ea729e630d9491a7) Signed-off-by: Hyukjin Kwon --- .../v2/DataSourceV2StrategySuite.scala | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala index 6d355f7761546..c6aa13d900987 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala @@ -160,8 +160,8 @@ class DataSourceV2StrategySuite extends PlanTest with SharedSparkSession { // ABS(cint) - 2 <= 1 testTranslateFilter(LessThanOrEqual( // Expressions are not supported - // Functions such as 'Abs' are not supported - Subtract(Abs(attrInt), 2), 1), None) + // Functions such as 'Abs' are not pushed down with ANSI mode off + Subtract(Abs(attrInt, failOnError = false), 2), 1), None) // (cin1 > 1 AND cint < 10) OR (cint > 50 AND cint > 100) testTranslateFilter(Or( @@ -187,8 +187,8 @@ class DataSourceV2StrategySuite extends PlanTest with SharedSparkSession { testTranslateFilter(Or( And( GreaterThan(attrInt, 1), - // Functions such as 'Abs' are not supported - LessThan(Abs(attrInt), 10) + // Functions such as 'Abs' are not pushed down with ANSI mode off + LessThan(Abs(attrInt, failOnError = false), 10) ), And( GreaterThan(attrInt, 50), @@ -198,8 +198,8 @@ class DataSourceV2StrategySuite extends PlanTest with SharedSparkSession { testTranslateFilter(Not(And( Or( LessThanOrEqual(attrInt, 1), - // Functions such as 'Abs' are not supported - GreaterThanOrEqual(Abs(attrInt), 10) + // Functions such as 'Abs' are not pushed down with ANSI mode off + GreaterThanOrEqual(Abs(attrInt, failOnError = false), 10) ), Or( LessThanOrEqual(attrInt, 50), @@ -228,8 +228,8 @@ class DataSourceV2StrategySuite extends PlanTest with SharedSparkSession { testTranslateFilter(Or( Or( EqualTo(attrInt, 1), - // Functions such as 'Abs' are not supported - EqualTo(Abs(attrInt), 10) + // Functions such as 'Abs' are not pushed down with ANSI mode off + EqualTo(Abs(attrInt, failOnError = false), 10) ), Or( GreaterThan(attrInt, 0), @@ -264,8 +264,8 @@ class DataSourceV2StrategySuite extends PlanTest with SharedSparkSession { LessThan(attrInt, 10) ), And( - // Functions such as 'Abs' are not supported - EqualTo(Abs(attrInt), 6), + // Functions such as 'Abs' are not pushed down with ANSI mode off + EqualTo(Abs(attrInt, failOnError = false), 6), IsNotNull(attrInt))), None) // (cint > 1 OR cint < 10) AND (cint = 6 OR cint IS NOT NULL) @@ -294,8 +294,8 @@ class DataSourceV2StrategySuite extends PlanTest with SharedSparkSession { LessThan(attrInt, 10) ), Or( - // Functions such as 'Abs' are not supported - EqualTo(Abs(attrInt), 6), + // Functions such as 'Abs' are not pushed down with ANSI mode off + EqualTo(Abs(attrInt, failOnError = false), 6), IsNotNull(attrInt))), None) } } From fb9f85ed3b2391fae3349a34cbda951eee224fd1 Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Tue, 2 Aug 2022 18:05:48 +0900 Subject: [PATCH 412/535] [SPARK-39932][SQL] WindowExec should clear the final partition buffer ### What changes were proposed in this pull request? Explicitly clear final partition buffer if can not find next in `WindowExec`. The same fix in `WindowInPandasExec` ### Why are the changes needed? We do a repartition after a window, then we need do a local sort after window due to RoundRobinPartitioning shuffle. The error stack: ```java ExternalAppendOnlyUnsafeRowArray INFO - Reached spill threshold of 4096 rows, switching to org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter org.apache.spark.memory.SparkOutOfMemoryError: Unable to acquire 65536 bytes of memory, got 0 at org.apache.spark.memory.MemoryConsumer.throwOom(MemoryConsumer.java:157) at org.apache.spark.memory.MemoryConsumer.allocateArray(MemoryConsumer.java:97) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.growPointerArrayIfNecessary(UnsafeExternalSorter.java:352) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.allocateMemoryForRecordIfNecessary(UnsafeExternalSorter.java:435) at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.insertRecord(UnsafeExternalSorter.java:455) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.insertRow(UnsafeExternalRowSorter.java:138) at org.apache.spark.sql.execution.UnsafeExternalRowSorter.sort(UnsafeExternalRowSorter.java:226) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$.$anonfun$prepareShuffleDependency$10(ShuffleExchangeExec.scala:355) ``` `WindowExec` only clear buffer in `fetchNextPartition` so the final partition buffer miss to clear. It is not a big problem since we have task completion listener. ```scala taskContext.addTaskCompletionListener(context -> { cleanupResources(); }); ``` This bug only affects if the window is not the last operator for this task and the follow operator like sort. ### Does this PR introduce _any_ user-facing change? yes, bug fix ### How was this patch tested? N/A Closes #37358 from ulysses-you/window. Authored-by: ulysses-you Signed-off-by: Hyukjin Kwon (cherry picked from commit 1fac870126c289a7ec75f45b6b61c93b9a4965d4) Signed-off-by: Hyukjin Kwon --- .../sql/execution/python/WindowInPandasExec.scala | 10 ++++++++-- .../apache/spark/sql/execution/window/WindowExec.scala | 10 ++++++++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala index e73da99786ceb..ccb1ed92525d1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala @@ -332,8 +332,14 @@ case class WindowInPandasExec( // Iteration var rowIndex = 0 - override final def hasNext: Boolean = - (bufferIterator != null && bufferIterator.hasNext) || nextRowAvailable + override final def hasNext: Boolean = { + val found = (bufferIterator != null && bufferIterator.hasNext) || nextRowAvailable + if (!found) { + // clear final partition + buffer.clear() + } + found + } override final def next(): Iterator[UnsafeRow] = { // Load the next partition if we need to. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala index 33c37e871e385..dc85585b13d0c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala @@ -158,8 +158,14 @@ case class WindowExec( // Iteration var rowIndex = 0 - override final def hasNext: Boolean = - (bufferIterator != null && bufferIterator.hasNext) || nextRowAvailable + override final def hasNext: Boolean = { + val found = (bufferIterator != null && bufferIterator.hasNext) || nextRowAvailable + if (!found) { + // clear final partition + buffer.clear() + } + found + } val join = new JoinedRow override final def next(): InternalRow = { From a0242eabaeef39ec4d74d2bdd0bcac78c71a63e6 Mon Sep 17 00:00:00 2001 From: Adam Binford Date: Tue, 2 Aug 2022 16:50:05 -0700 Subject: [PATCH 413/535] [SPARK-39951][SQL] Update Parquet V2 columnar check for nested fields ### What changes were proposed in this pull request? Update the `supportsColumnarReads` check for Parquet V2 to take into account support for nested fields. Also fixed a typo I saw in one of the tests. ### Why are the changes needed? Match Parquet V1 in returning columnar batches if nested field vectorization is enabled. ### Does this PR introduce _any_ user-facing change? Parquet V2 scans will return columnar batches with nested fields if the config is enabled. ### How was this patch tested? Added new UTs checking both V1 and V2 return columnar batches for nested fields when the config is enabled. Closes #37379 from Kimahriman/parquet-v2-columnar. Authored-by: Adam Binford Signed-off-by: Chao Sun --- .../parquet/ParquetFileFormat.scala | 5 +-- .../ParquetPartitionReaderFactory.scala | 9 ++-- .../parquet/ParquetQuerySuite.scala | 43 +++++++++++++++++++ .../parquet/ParquetSchemaPruningSuite.scala | 2 +- 4 files changed, 51 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index 44dc145d36e68..9765e7c780193 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -173,9 +173,8 @@ class ParquetFileFormat */ override def supportBatch(sparkSession: SparkSession, schema: StructType): Boolean = { val conf = sparkSession.sessionState.conf - conf.parquetVectorizedReaderEnabled && conf.wholeStageEnabled && - ParquetUtils.isBatchReadSupportedForSchema(conf, schema) && - !WholeStageCodegenExec.isTooManyFields(conf, schema) + ParquetUtils.isBatchReadSupportedForSchema(conf, schema) && conf.wholeStageEnabled && + !WholeStageCodegenExec.isTooManyFields(conf, schema) } override def vectorTypes( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala index ea4f5e0d287ab..c16b762510f76 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala @@ -37,12 +37,13 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec import org.apache.spark.sql.connector.expressions.aggregate.Aggregation import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader} +import org.apache.spark.sql.execution.WholeStageCodegenExec import org.apache.spark.sql.execution.datasources.{AggregatePushDownUtils, DataSourceUtils, PartitionedFile, RecordReaderIterator} import org.apache.spark.sql.execution.datasources.parquet._ import org.apache.spark.sql.execution.datasources.v2._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.sources.Filter -import org.apache.spark.sql.types.{AtomicType, StructType} +import org.apache.spark.sql.types.StructType import org.apache.spark.sql.vectorized.ColumnarBatch import org.apache.spark.util.SerializableConfiguration @@ -72,6 +73,8 @@ case class ParquetPartitionReaderFactory( private val enableOffHeapColumnVector = sqlConf.offHeapColumnVectorEnabled private val enableVectorizedReader: Boolean = ParquetUtils.isBatchReadSupportedForSchema(sqlConf, resultSchema) + private val supportsColumnar = enableVectorizedReader && sqlConf.wholeStageEnabled && + !WholeStageCodegenExec.isTooManyFields(sqlConf, resultSchema) private val enableRecordFilter: Boolean = sqlConf.parquetRecordFilterEnabled private val timestampConversion: Boolean = sqlConf.isParquetINT96TimestampConversion private val capacity = sqlConf.parquetVectorizedReaderBatchSize @@ -104,9 +107,7 @@ case class ParquetPartitionReaderFactory( } override def supportColumnarReads(partition: InputPartition): Boolean = { - sqlConf.parquetVectorizedReaderEnabled && sqlConf.wholeStageEnabled && - resultSchema.length <= sqlConf.wholeStageMaxNumFields && - resultSchema.forall(_.dataType.isInstanceOf[AtomicType]) + supportsColumnar } override def buildReader(file: PartitionedFile): PartitionReader[InternalRow] = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala index 654ab7fe36200..33656c84c88f0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala @@ -34,6 +34,7 @@ import org.apache.spark.sql.execution.datasources.{SchemaColumnConvertNotSupport import org.apache.spark.sql.execution.datasources.parquet.TestingUDT.{NestedStruct, NestedStructUDT, SingleElement} import org.apache.spark.sql.execution.datasources.v2.BatchScanExec import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan +import org.apache.spark.sql.functions.struct import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types._ @@ -1042,6 +1043,25 @@ class ParquetV1QuerySuite extends ParquetQuerySuite { val fileScan3 = df3.queryExecution.sparkPlan.find(_.isInstanceOf[FileSourceScanExec]).get assert(fileScan3.asInstanceOf[FileSourceScanExec].supportsColumnar) checkAnswer(df3, df.selectExpr(columns : _*)) + + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "true") { + val df4 = spark.range(10).select(struct( + Seq.tabulate(11) {i => ($"id" + i).as(s"c$i")} : _*).as("nested")) + df4.write.mode(SaveMode.Overwrite).parquet(path) + + // do not return batch - whole stage codegen is disabled for wide table (>200 columns) + val df5 = spark.read.parquet(path) + val fileScan5 = df5.queryExecution.sparkPlan.find(_.isInstanceOf[FileSourceScanExec]).get + assert(!fileScan5.asInstanceOf[FileSourceScanExec].supportsColumnar) + checkAnswer(df5, df4) + + // return batch + val columns2 = Seq.tabulate(9) {i => s"nested.c$i"} + val df6 = df5.selectExpr(columns2 : _*) + val fileScan6 = df6.queryExecution.sparkPlan.find(_.isInstanceOf[FileSourceScanExec]).get + assert(fileScan6.asInstanceOf[FileSourceScanExec].supportsColumnar) + checkAnswer(df6, df4.selectExpr(columns2 : _*)) + } } } } @@ -1079,6 +1099,29 @@ class ParquetV2QuerySuite extends ParquetQuerySuite { val parquetScan3 = fileScan3.asInstanceOf[BatchScanExec].scan.asInstanceOf[ParquetScan] assert(parquetScan3.createReaderFactory().supportColumnarReads(null)) checkAnswer(df3, df.selectExpr(columns : _*)) + + withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> "true") { + val df4 = spark.range(10).select(struct( + Seq.tabulate(11) {i => ($"id" + i).as(s"c$i")} : _*).as("nested")) + df4.write.mode(SaveMode.Overwrite).parquet(path) + + // do not return batch - whole stage codegen is disabled for wide table (>200 columns) + val df5 = spark.read.parquet(path) + val fileScan5 = df5.queryExecution.sparkPlan.find(_.isInstanceOf[BatchScanExec]).get + val parquetScan5 = fileScan5.asInstanceOf[BatchScanExec].scan.asInstanceOf[ParquetScan] + // The method `supportColumnarReads` in Parquet doesn't depends on the input partition. + // Here we can pass null input partition to the method for testing propose. + assert(!parquetScan5.createReaderFactory().supportColumnarReads(null)) + checkAnswer(df5, df4) + + // return batch + val columns2 = Seq.tabulate(9) {i => s"nested.c$i"} + val df6 = df5.selectExpr(columns2 : _*) + val fileScan6 = df6.queryExecution.sparkPlan.find(_.isInstanceOf[BatchScanExec]).get + val parquetScan6 = fileScan6.asInstanceOf[BatchScanExec].scan.asInstanceOf[ParquetScan] + assert(parquetScan6.createReaderFactory().supportColumnarReads(null)) + checkAnswer(df6, df4.selectExpr(columns2 : _*)) + } } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala index 6a93b72472c73..5c0b7def039a1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaPruningSuite.scala @@ -32,7 +32,7 @@ abstract class ParquetSchemaPruningSuite extends SchemaPruningSuite with Adaptiv override protected val vectorizedReaderEnabledKey: String = SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key override protected val vectorizedReaderNestedEnabledKey: String = - SQLConf.ORC_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key + SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key } From 41779ea26122de6a2f0e70a0398f82841a3f909b Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Tue, 2 Aug 2022 21:18:45 -0500 Subject: [PATCH 414/535] [MINOR][DOCS] Remove generated statement about Scala version in docs homepage as Spark supports multiple versions ### What changes were proposed in this pull request? Remove this statement from the docs homepage: "For the Scala API, Spark 3.3.0 uses Scala 2.12. You will need to use a compatible Scala version (2.12.x)." ### Why are the changes needed? It's misleading, as Spark supports 2.12 and 2.13. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A Closes #37381 from srowen/RemoveScalaStatement. Authored-by: Sean Owen Signed-off-by: Sean Owen (cherry picked from commit 73ef5432547e3e8e9b0cce0913200a94402aeb4c) Signed-off-by: Sean Owen --- docs/index.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/index.md b/docs/index.md index c6caf31d5603a..0c3c02737576d 100644 --- a/docs/index.md +++ b/docs/index.md @@ -41,9 +41,8 @@ Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS), and it sh Spark runs on Java 8/11/17, Scala 2.12/2.13, Python 3.7+ and R 3.5+. Java 8 prior to version 8u201 support is deprecated as of Spark 3.2.0. -For the Scala API, Spark {{site.SPARK_VERSION}} -uses Scala {{site.SCALA_BINARY_VERSION}}. You will need to use a compatible Scala version -({{site.SCALA_BINARY_VERSION}}.x). +When using the Scala API, it is necessary for applications to use the same version of Scala that Spark was compiled for. +For example, when using Scala 2.13, use Spark compiled for 2.13, and compile code/applications for Scala 2.13 as well. For Python 3.9, Arrow optimization and pandas UDFs might not work due to the supported Python versions in Apache Arrow. Please refer to the latest [Python Compatibility](https://arrow.apache.org/docs/python/install.html#python-compatibility) page. For Java 11, `-Dio.netty.tryReflectionSetAccessible=true` is required additionally for Apache Arrow library. This prevents `java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.(long, int) not available` when Apache Arrow uses Netty internally. From ea6d57715fd7a0ac5294b895b3ad607b3f4e983b Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Wed, 3 Aug 2022 11:11:22 +0800 Subject: [PATCH 415/535] [SPARK-39911][SQL][3.3] Optimize global Sort to RepartitionByExpression this is for backport https://github.com/apache/spark/pull/37330 into branch-3.3 ### What changes were proposed in this pull request? Optimize Global sort to RepartitionByExpression, for example: ``` Sort local Sort local Sort global => RepartitionByExpression ``` ### Why are the changes needed? If a global sort below a local sort, the only meaningful thing is it's distribution. So this pr optimizes that global sort to RepartitionByExpression to save a local sort. ### Does this PR introduce _any_ user-facing change? we fix a bug in https://github.com/apache/spark/pull/37250 and that pr backport into branch-3.3. However, that fix may introduce performance regression. This pr itself is only to improve performance but in order to avoid the regression, we also backport this pr. see the details https://github.com/apache/spark/pull/37330#issuecomment-1201979396 ### How was this patch tested? add test Closes #37330 from ulysses-you/optimize-sort. Authored-by: ulysses-you Signed-off-by: Wenchen Fan Closes #37373 from ulysses-you/SPARK-39911-3.3. Authored-by: ulysses-you Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 6 ++++++ .../sql/catalyst/optimizer/EliminateSortsSuite.scala | 10 +++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 827df04443e52..5d670ecdf1d1a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1466,6 +1466,12 @@ object EliminateSorts extends Rule[LogicalPlan] { plan match { case Sort(_, global, child) if canRemoveGlobalSort || !global => recursiveRemoveSort(child, canRemoveGlobalSort) + case Sort(sortOrder, true, child) => + // For this case, the upper sort is local so the ordering of present sort is unnecessary, + // so here we only preserve its output partitioning using `RepartitionByExpression`. + // We should use `None` as the optNumPartitions so AQE can coalesce shuffle partitions. + // This behavior is same with original global sort. + RepartitionByExpression(sortOrder, recursiveRemoveSort(child, true), None) case other if canEliminateSort(other) => other.withNewChildren(other.children.map(c => recursiveRemoveSort(c, canRemoveGlobalSort))) case other if canEliminateGlobalSort(other) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala index 053bc1c21373e..7ceac3b3000c6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala @@ -426,16 +426,20 @@ class EliminateSortsSuite extends AnalysisTest { test("SPARK-39835: Fix EliminateSorts remove global sort below the local sort") { // global -> local val plan = testRelation.orderBy($"a".asc).sortBy($"c".asc).analyze - comparePlans(Optimize.execute(plan), plan) + val expect = RepartitionByExpression($"a".asc :: Nil, testRelation, None) + .sortBy($"c".asc).analyze + comparePlans(Optimize.execute(plan), expect) // global -> global -> local val plan2 = testRelation.orderBy($"a".asc).orderBy($"b".asc).sortBy($"c".asc).analyze - val expected2 = testRelation.orderBy($"b".asc).sortBy($"c".asc).analyze + val expected2 = RepartitionByExpression($"b".asc :: Nil, testRelation, None) + .sortBy($"c".asc).analyze comparePlans(Optimize.execute(plan2), expected2) // local -> global -> local val plan3 = testRelation.sortBy($"a".asc).orderBy($"b".asc).sortBy($"c".asc).analyze - val expected3 = testRelation.orderBy($"b".asc).sortBy($"c".asc).analyze + val expected3 = RepartitionByExpression($"b".asc :: Nil, testRelation, None) + .sortBy($"c".asc).analyze comparePlans(Optimize.execute(plan3), expected3) } } From 2254240dba4a71d9a68a22ca9a83080351fa3343 Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Wed, 3 Aug 2022 11:59:22 +0800 Subject: [PATCH 416/535] [SPARK-39867][SQL] Global limit should not inherit OrderPreservingUnaryNode Make GlobalLimit inherit UnaryNode rather than OrderPreservingUnaryNode Global limit can not promise the output ordering is same with child, it actually depend on the certain physical plan. For all physical plan with gobal limits: - CollectLimitExec: it does not promise output ordering - GlobalLimitExec: it required all tuples so it can assume the child is shuffle or child is single partition. Then it can use output ordering of child - TakeOrderedAndProjectExec: it do sort inside it's implementation This bug get worse since we pull out v1 write require ordering. yes, bug fix fix test and add test Closes #37284 from ulysses-you/sort. Authored-by: ulysses-you Signed-off-by: Wenchen Fan (cherry picked from commit e9cc1024df4d587a0f456842d495db91984ed9db) Signed-off-by: Wenchen Fan --- .../plans/logical/basicLogicalOperators.scala | 7 ++++++- .../catalyst/optimizer/EliminateSortsSuite.scala | 15 ++++++++++----- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 774f6956162e3..e12a5918ee0b4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -1248,8 +1248,13 @@ object Limit { * A global (coordinated) limit. This operator can emit at most `limitExpr` number in total. * * See [[Limit]] for more information. + * + * Note that, we can not make it inherit [[OrderPreservingUnaryNode]] due to the different strategy + * of physical plan. The output ordering of child will be broken if a shuffle exchange comes in + * between the child and global limit, due to the fact that shuffle reader fetches blocks in random + * order. */ -case class GlobalLimit(limitExpr: Expression, child: LogicalPlan) extends OrderPreservingUnaryNode { +case class GlobalLimit(limitExpr: Expression, child: LogicalPlan) extends UnaryNode { override def output: Seq[Attribute] = child.output override def maxRows: Option[Long] = { limitExpr match { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala index 7ceac3b3000c6..b97dc455dad99 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala @@ -115,9 +115,9 @@ class EliminateSortsSuite extends AnalysisTest { test("SPARK-33183: remove redundant sort by") { val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst) - val unnecessaryReordered = orderedPlan.limit(2).select('a).sortBy('a.asc, 'b.desc_nullsFirst) + val unnecessaryReordered = LocalLimit(2, orderedPlan).select('a).sortBy('a.asc, 'b.desc_nullsFirst) val optimized = Optimize.execute(unnecessaryReordered.analyze) - val correctAnswer = orderedPlan.limit(2).select('a).analyze + val correctAnswer = LocalLimit(2, orderedPlan).select('a).analyze comparePlans(optimized, correctAnswer) } @@ -161,11 +161,11 @@ class EliminateSortsSuite extends AnalysisTest { comparePlans(optimized, correctAnswer) } - test("SPARK-33183: limits should not affect order for local sort") { + test("SPARK-33183: local limits should not affect order for local sort") { val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc) - val filteredAndReordered = orderedPlan.limit(Literal(10)).sortBy('a.asc, 'b.desc) + val filteredAndReordered = LocalLimit(10, orderedPlan).sortBy('a.asc, 'b.desc) val optimized = Optimize.execute(filteredAndReordered.analyze) - val correctAnswer = orderedPlan.limit(Literal(10)).analyze + val correctAnswer = LocalLimit(10, orderedPlan).analyze comparePlans(optimized, correctAnswer) } @@ -442,4 +442,9 @@ class EliminateSortsSuite extends AnalysisTest { .sortBy($"c".asc).analyze comparePlans(Optimize.execute(plan3), expected3) } + + test("SPARK-39867: Global limit should not inherit OrderPreservingUnaryNode") { + val plan = testRelation.sortBy($"a".asc).limit(2).sortBy($"a".asc).analyze + comparePlans(Optimize.execute(plan), plan) + } } From bd3f36f6626f0fb71ab0ceb9bbe7fa4d05c628f5 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 3 Aug 2022 16:11:20 +0900 Subject: [PATCH 417/535] [SPARK-39962][PYTHON][SQL] Apply projection when group attributes are empty ### What changes were proposed in this pull request? This PR proposes to apply the projection to respect the reordered columns in its child when group attributes are empty. ### Why are the changes needed? To respect the column order in the child. ### Does this PR introduce _any_ user-facing change? Yes, it fixes a bug as below: ```python import pandas as pd from pyspark.sql import functions as f f.pandas_udf("double") def AVG(x: pd.Series) -> float: return x.mean() abc = spark.createDataFrame([(1.0, 5.0, 17.0)], schema=["a", "b", "c"]) abc.agg(AVG("a"), AVG("c")).show() abc.select("c", "a").agg(AVG("a"), AVG("c")).show() ``` **Before** ``` +------+------+ |AVG(a)|AVG(c)| +------+------+ | 17.0| 1.0| +------+------+ ``` **After** ``` +------+------+ |AVG(a)|AVG(c)| +------+------+ | 1.0| 17.0| +------+------+ ``` ### How was this patch tested? Manually tested, and added an unittest. Closes #37390 from HyukjinKwon/SPARK-39962. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 5335c784ae76c9cc0aaa7a4b57b3cd6b3891ad9a) Signed-off-by: Hyukjin Kwon --- .../execution/python/AggregateInPandasExec.scala | 5 +++-- .../spark/sql/execution/python/PythonUDFSuite.scala | 13 +++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala index a7f63aafc9f1d..2f85149ee8e13 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala @@ -131,12 +131,13 @@ case class AggregateInPandasExec( val newIter: Iterator[InternalRow] = mayAppendUpdatingSessionIterator(iter) val prunedProj = UnsafeProjection.create(allInputs.toSeq, child.output) - val grouped = if (groupingExpressions.isEmpty) { + val groupedItr = if (groupingExpressions.isEmpty) { // Use an empty unsafe row as a place holder for the grouping key Iterator((new UnsafeRow(), newIter)) } else { GroupedIterator(newIter, groupingExpressions, child.output) - }.map { case (key, rows) => + } + val grouped = groupedItr.map { case (key, rows) => (key, rows.map(prunedProj)) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala index 45b57207c5782..4ad7f90105373 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala @@ -71,4 +71,17 @@ class PythonUDFSuite extends QueryTest with SharedSparkSession { pythonTestUDF(count(pythonTestUDF(base("a") + 1)))) checkAnswer(df1, df2) } + + test("SPARK-39962: Global aggregation of Pandas UDF should respect the column order") { + assume(shouldTestGroupedAggPandasUDFs) + val df = Seq[(java.lang.Integer, java.lang.Integer)]((1, null)).toDF("a", "b") + + val pandasTestUDF = TestGroupedAggPandasUDF(name = "pandas_udf") + val reorderedDf = df.select("b", "a") + val actual = reorderedDf.agg( + pandasTestUDF(reorderedDf("a")), pandasTestUDF(reorderedDf("b"))) + val expected = df.agg(pandasTestUDF(df("a")), pandasTestUDF(df("b"))) + + checkAnswer(actual, expected) + } } From 3b023516f11fa7bb8f92ede6ef4463a4f35f1f6b Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Wed, 3 Aug 2022 17:48:12 +0900 Subject: [PATCH 418/535] [SPARK-39867][SQL][3.3] Fix scala style ### What changes were proposed in this pull request? fix scala style ### Why are the changes needed? fix failed test ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? pass CI Closes #37394 from ulysses-you/style. Authored-by: ulysses-you Signed-off-by: Hyukjin Kwon --- .../spark/sql/catalyst/optimizer/EliminateSortsSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala index b97dc455dad99..376bbbed7efc9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala @@ -115,7 +115,8 @@ class EliminateSortsSuite extends AnalysisTest { test("SPARK-33183: remove redundant sort by") { val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst) - val unnecessaryReordered = LocalLimit(2, orderedPlan).select('a).sortBy('a.asc, 'b.desc_nullsFirst) + val unnecessaryReordered = LocalLimit(2, orderedPlan).select('a) + .sortBy('a.asc, 'b.desc_nullsFirst) val optimized = Optimize.execute(unnecessaryReordered.analyze) val correctAnswer = LocalLimit(2, orderedPlan).select('a).analyze comparePlans(optimized, correctAnswer) From 630dc7e34f3da642451d9f7904f75370f0fbc84a Mon Sep 17 00:00:00 2001 From: zzzzming95 <505306252@qq.com> Date: Wed, 3 Aug 2022 21:22:55 +0900 Subject: [PATCH 419/535] [SPARK-39900][SQL] Address partial or negated condition in binary format's predicate pushdown ### What changes were proposed in this pull request? fix `BinaryFileFormat` filter push down bug. Before modification, when Filter tree is: ```` -Not - - IsNotNull ```` Since `IsNotNull` cannot be matched, `IsNotNull` will return a result that is always true (that is, `case _ => (_ => true)`), that is, no filter pushdown is performed. But because there is still a `Not`, after negation, it will return a result that is always False, that is, no result can be returned. ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? test suit in `BinaryFileFormatSuite` ``` testCreateFilterFunction( Seq(Not(IsNull(LENGTH))), Seq((t1, true), (t2, true), (t3, true))) ``` Closes #37350 from zzzzming95/SPARK-39900. Lead-authored-by: zzzzming95 <505306252@qq.com> Co-authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit a0dc7d9117b66426aaa2257c8d448a2f96882ecd) Signed-off-by: Hyukjin Kwon --- .../binaryfile/BinaryFileFormat.scala | 54 +++++++++---------- .../binaryfile/BinaryFileFormatSuite.scala | 5 +- 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala index 4b500aa9637bc..3874d70981bbb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala @@ -97,7 +97,7 @@ class BinaryFileFormat extends FileFormat with DataSourceRegister { val broadcastedHadoopConf = sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf)) - val filterFuncs = filters.map(filter => createFilterFunction(filter)) + val filterFuncs = filters.flatMap(filter => createFilterFunction(filter)) val maxLength = sparkSession.conf.get(SOURCES_BINARY_FILE_MAX_LENGTH) file: PartitionedFile => { @@ -158,38 +158,38 @@ object BinaryFileFormat { StructField(LENGTH, LongType, false) :: StructField(CONTENT, BinaryType, true) :: Nil) - private[binaryfile] def createFilterFunction(filter: Filter): FileStatus => Boolean = { + private[binaryfile] def createFilterFunction(filter: Filter): Option[FileStatus => Boolean] = { filter match { - case And(left, right) => - s => createFilterFunction(left)(s) && createFilterFunction(right)(s) - case Or(left, right) => - s => createFilterFunction(left)(s) || createFilterFunction(right)(s) - case Not(child) => - s => !createFilterFunction(child)(s) - - case LessThan(LENGTH, value: Long) => - _.getLen < value - case LessThanOrEqual(LENGTH, value: Long) => - _.getLen <= value - case GreaterThan(LENGTH, value: Long) => - _.getLen > value - case GreaterThanOrEqual(LENGTH, value: Long) => - _.getLen >= value - case EqualTo(LENGTH, value: Long) => - _.getLen == value - + case And(left, right) => (createFilterFunction(left), createFilterFunction(right)) match { + case (Some(leftPred), Some(rightPred)) => Some(s => leftPred(s) && rightPred(s)) + case (Some(leftPred), None) => Some(leftPred) + case (None, Some(rightPred)) => Some(rightPred) + case (None, None) => Some(_ => true) + } + case Or(left, right) => (createFilterFunction(left), createFilterFunction(right)) match { + case (Some(leftPred), Some(rightPred)) => Some(s => leftPred(s) || rightPred(s)) + case _ => Some(_ => true) + } + case Not(child) => createFilterFunction(child) match { + case Some(pred) => Some(s => !pred(s)) + case _ => Some(_ => true) + } + case LessThan(LENGTH, value: Long) => Some(_.getLen < value) + case LessThanOrEqual(LENGTH, value: Long) => Some(_.getLen <= value) + case GreaterThan(LENGTH, value: Long) => Some(_.getLen > value) + case GreaterThanOrEqual(LENGTH, value: Long) => Some(_.getLen >= value) + case EqualTo(LENGTH, value: Long) => Some(_.getLen == value) case LessThan(MODIFICATION_TIME, value: Timestamp) => - _.getModificationTime < value.getTime + Some(_.getModificationTime < value.getTime) case LessThanOrEqual(MODIFICATION_TIME, value: Timestamp) => - _.getModificationTime <= value.getTime + Some(_.getModificationTime <= value.getTime) case GreaterThan(MODIFICATION_TIME, value: Timestamp) => - _.getModificationTime > value.getTime + Some(_.getModificationTime > value.getTime) case GreaterThanOrEqual(MODIFICATION_TIME, value: Timestamp) => - _.getModificationTime >= value.getTime + Some(_.getModificationTime >= value.getTime) case EqualTo(MODIFICATION_TIME, value: Timestamp) => - _.getModificationTime == value.getTime - - case _ => (_ => true) + Some(_.getModificationTime == value.getTime) + case _ => None } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala index 86ff026d7b1e9..9a374d5c3021d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala @@ -183,7 +183,7 @@ class BinaryFileFormatSuite extends QueryTest with SharedSparkSession { def testCreateFilterFunction( filters: Seq[Filter], testCases: Seq[(FileStatus, Boolean)]): Unit = { - val funcs = filters.map(BinaryFileFormat.createFilterFunction) + val funcs = filters.flatMap(BinaryFileFormat.createFilterFunction) testCases.foreach { case (status, expected) => assert(funcs.forall(f => f(status)) === expected, s"$filters applied to $status should be $expected.") @@ -250,6 +250,9 @@ class BinaryFileFormatSuite extends QueryTest with SharedSparkSession { Seq(Or(LessThanOrEqual(MODIFICATION_TIME, new Timestamp(1L)), GreaterThanOrEqual(MODIFICATION_TIME, new Timestamp(3L)))), Seq((t1, true), (t2, false), (t3, true))) + testCreateFilterFunction( + Seq(Not(IsNull(LENGTH))), + Seq((t1, true), (t2, true), (t3, true))) // test filters applied on both columns testCreateFilterFunction( From 6e9a58fb5f51612702608ec690dc33035fe1ca21 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Wed, 3 Aug 2022 08:28:08 -0500 Subject: [PATCH 420/535] [SPARK-39947][BUILD] Upgrade Jersey to 2.36 ### What changes were proposed in this pull request? This pr upgrade Jersey from 2.35 to 2.36. ### Why are the changes needed? This version adapts to Jack 2.13.3, which is also used by Spark currently - [Adopt Jackson 2.13](https://github.com/eclipse-ee4j/jersey/pull/4928) - [Update Jackson to 2.13.3](https://github.com/eclipse-ee4j/jersey/pull/5076) The release notes as follows: - https://github.com/eclipse-ee4j/jersey/releases/tag/2.36 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions Closes #37375 from LuciferYang/jersey-236. Authored-by: yangjie01 Signed-off-by: Sean Owen (cherry picked from commit d1c145b0b0b892fcbf1e1adda7b8ecff75c56f6d) Signed-off-by: Sean Owen # Conflicts: # dev/deps/spark-deps-hadoop-2-hive-2.3 # dev/deps/spark-deps-hadoop-3-hive-2.3 # pom.xml --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 12 ++++++------ dev/deps/spark-deps-hadoop-3-hive-2.3 | 12 ++++++------ pom.xml | 2 +- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index e426b8f030643..0eaac80ab3b77 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -138,12 +138,12 @@ jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar jcl-over-slf4j/1.7.32//jcl-over-slf4j-1.7.32.jar jdo-api/3.0.1//jdo-api-3.0.1.jar -jersey-client/2.34//jersey-client-2.34.jar -jersey-common/2.34//jersey-common-2.34.jar -jersey-container-servlet-core/2.34//jersey-container-servlet-core-2.34.jar -jersey-container-servlet/2.34//jersey-container-servlet-2.34.jar -jersey-hk2/2.34//jersey-hk2-2.34.jar -jersey-server/2.34//jersey-server-2.34.jar +jersey-client/2.36//jersey-client-2.36.jar +jersey-common/2.36//jersey-common-2.36.jar +jersey-container-servlet-core/2.36//jersey-container-servlet-core-2.36.jar +jersey-container-servlet/2.36//jersey-container-servlet-2.36.jar +jersey-hk2/2.36//jersey-hk2-2.36.jar +jersey-server/2.36//jersey-server-2.36.jar jetty-sslengine/6.1.26//jetty-sslengine-6.1.26.jar jetty-util/6.1.26//jetty-util-6.1.26.jar jetty-util/9.4.46.v20220331//jetty-util-9.4.46.v20220331.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 365ede5ca10f7..76b69f016af69 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -126,12 +126,12 @@ jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar jcl-over-slf4j/1.7.32//jcl-over-slf4j-1.7.32.jar jdo-api/3.0.1//jdo-api-3.0.1.jar jdom2/2.0.6//jdom2-2.0.6.jar -jersey-client/2.34//jersey-client-2.34.jar -jersey-common/2.34//jersey-common-2.34.jar -jersey-container-servlet-core/2.34//jersey-container-servlet-core-2.34.jar -jersey-container-servlet/2.34//jersey-container-servlet-2.34.jar -jersey-hk2/2.34//jersey-hk2-2.34.jar -jersey-server/2.34//jersey-server-2.34.jar +jersey-client/2.36//jersey-client-2.36.jar +jersey-common/2.36//jersey-common-2.36.jar +jersey-container-servlet-core/2.36//jersey-container-servlet-core-2.36.jar +jersey-container-servlet/2.36//jersey-container-servlet-2.36.jar +jersey-hk2/2.36//jersey-hk2-2.36.jar +jersey-server/2.36//jersey-server-2.36.jar jettison/1.1//jettison-1.1.jar jetty-util-ajax/9.4.46.v20220331//jetty-util-ajax-9.4.46.v20220331.jar jetty-util/9.4.46.v20220331//jetty-util-9.4.46.v20220331.jar diff --git a/pom.xml b/pom.xml index 3ac52a7494451..206cad9eb981a 100644 --- a/pom.xml +++ b/pom.xml @@ -188,7 +188,7 @@ 4.1.17 14.0.1 3.0.16 - 2.34 + 2.36 2.10.13 3.5.2 3.0.0 From 15ebd56de6ae37587d750bb1e106c5dcb3e22958 Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Thu, 4 Aug 2022 01:03:45 +0800 Subject: [PATCH 421/535] [SPARK-39952][SQL] SaveIntoDataSourceCommand should recache result relation ### What changes were proposed in this pull request? recacheByPlan the result relation inside `SaveIntoDataSourceCommand` ### Why are the changes needed? The behavior of `SaveIntoDataSourceCommand` is similar with `InsertIntoDataSourceCommand` which supports append or overwirte data. In order to keep data consistent, we should always do recacheByPlan the relation on post hoc. ### Does this PR introduce _any_ user-facing change? yes, bug fix ### How was this patch tested? add test Closes #37380 from ulysses-you/refresh. Authored-by: ulysses-you Signed-off-by: Wenchen Fan (cherry picked from commit 5fe0b245f7891a05bc4e1e641fd0aa9130118ea4) Signed-off-by: Wenchen Fan --- .../SaveIntoDataSourceCommand.scala | 12 +++- .../SaveIntoDataSourceCommandSuite.scala | 61 ++++++++++++++++++- 2 files changed, 70 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala index 486f73cab44f7..ef74036b23bef 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.execution.datasources +import scala.util.control.NonFatal + import org.apache.spark.sql.{Dataset, Row, SaveMode, SparkSession} import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan @@ -41,9 +43,17 @@ case class SaveIntoDataSourceCommand( override def innerChildren: Seq[QueryPlan[_]] = Seq(query) override def run(sparkSession: SparkSession): Seq[Row] = { - dataSource.createRelation( + val relation = dataSource.createRelation( sparkSession.sqlContext, mode, options, Dataset.ofRows(sparkSession, query)) + try { + val logicalRelation = LogicalRelation(relation, relation.schema.toAttributes, None, false) + sparkSession.sharedState.cacheManager.recacheByPlan(sparkSession, logicalRelation) + } catch { + case NonFatal(_) => + // some data source can not support return a valid relation, e.g. `KafkaSourceProvider` + } + Seq.empty[Row] } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala index e843d1d328425..e68d6561fb8fa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala @@ -17,10 +17,13 @@ package org.apache.spark.sql.execution.datasources -import org.apache.spark.sql.SaveMode +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{DataFrame, QueryTest, Row, SaveMode, SparkSession, SQLContext} +import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, RelationProvider, TableScan} import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.{LongType, StructField, StructType} -class SaveIntoDataSourceCommandSuite extends SharedSparkSession { +class SaveIntoDataSourceCommandSuite extends QueryTest with SharedSparkSession { test("simpleString is redacted") { val URL = "connection.url" @@ -41,4 +44,58 @@ class SaveIntoDataSourceCommandSuite extends SharedSparkSession { assert(!logicalPlanString.contains(PASS)) assert(logicalPlanString.contains(DRIVER)) } + + test("SPARK-39952: SaveIntoDataSourceCommand should recache result relation") { + val provider = classOf[FakeV1DataSource].getName + + def saveIntoDataSource(data: Int): Unit = { + spark.range(data) + .write + .mode("append") + .format(provider) + .save() + } + + def loadData: DataFrame = { + spark.read + .format(provider) + .load() + } + + saveIntoDataSource(1) + val cached = loadData.cache() + checkAnswer(cached, Row(0)) + + saveIntoDataSource(2) + checkAnswer(loadData, Row(0) :: Row(1) :: Nil) + + FakeV1DataSource.data = null + } +} + +object FakeV1DataSource { + var data: RDD[Row] = _ +} + +class FakeV1DataSource extends RelationProvider with CreatableRelationProvider { + override def createRelation( + sqlContext: SQLContext, + parameters: Map[String, String]): BaseRelation = { + FakeRelation() + } + + override def createRelation( + sqlContext: SQLContext, + mode: SaveMode, + parameters: Map[String, String], + data: DataFrame): BaseRelation = { + FakeV1DataSource.data = data.rdd + FakeRelation() + } +} + +case class FakeRelation() extends BaseRelation with TableScan { + override def sqlContext: SQLContext = SparkSession.getActiveSession.get.sqlContext + override def schema: StructType = StructType(Seq(StructField("id", LongType))) + override def buildScan(): RDD[Row] = FakeV1DataSource.data } From c358ee6761539b4a4d12dbe36a4dd1a632a0efeb Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Fri, 5 Aug 2022 11:25:51 +0800 Subject: [PATCH 422/535] [SPARK-39775][CORE][AVRO] Disable validate default values when parsing Avro schemas ### What changes were proposed in this pull request? This PR disables validate default values when parsing Avro schemas. ### Why are the changes needed? Spark will throw exception if upgrade to Spark 3.2. We have fixed the Hive serde tables before: SPARK-34512. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #37191 from wangyum/SPARK-39775. Authored-by: Yuming Wang Signed-off-by: Wenchen Fan (cherry picked from commit 5c1b99f441ec5e178290637a9a9e7902aaa116e1) Signed-off-by: Wenchen Fan --- .../serializer/GenericAvroSerializer.scala | 4 +-- .../GenericAvroSerializerSuite.scala | 16 ++++++++++ .../spark/sql/avro/AvroDataToCatalyst.scala | 3 +- .../apache/spark/sql/avro/AvroOptions.scala | 4 +-- .../spark/sql/avro/CatalystDataToAvro.scala | 2 +- .../spark/sql/avro/AvroFunctionsSuite.scala | 32 +++++++++++++++++++ 6 files changed, 55 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/serializer/GenericAvroSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/GenericAvroSerializer.scala index c1ef3ee769af2..7d2923fdf3752 100644 --- a/core/src/main/scala/org/apache/spark/serializer/GenericAvroSerializer.scala +++ b/core/src/main/scala/org/apache/spark/serializer/GenericAvroSerializer.scala @@ -97,7 +97,7 @@ private[serializer] class GenericAvroSerializer[D <: GenericContainer] } { in.close() } - new Schema.Parser().parse(new String(bytes, StandardCharsets.UTF_8)) + new Schema.Parser().setValidateDefaults(false).parse(new String(bytes, StandardCharsets.UTF_8)) }) /** @@ -137,7 +137,7 @@ private[serializer] class GenericAvroSerializer[D <: GenericContainer] val fingerprint = input.readLong() schemaCache.getOrElseUpdate(fingerprint, { schemas.get(fingerprint) match { - case Some(s) => new Schema.Parser().parse(s) + case Some(s) => new Schema.Parser().setValidateDefaults(false).parse(s) case None => throw new SparkException( "Error reading attempting to read avro data -- encountered an unknown " + diff --git a/core/src/test/scala/org/apache/spark/serializer/GenericAvroSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/GenericAvroSerializerSuite.scala index 54e4aebe54430..98493c12f59a3 100644 --- a/core/src/test/scala/org/apache/spark/serializer/GenericAvroSerializerSuite.scala +++ b/core/src/test/scala/org/apache/spark/serializer/GenericAvroSerializerSuite.scala @@ -110,4 +110,20 @@ class GenericAvroSerializerSuite extends SparkFunSuite with SharedSparkContext { assert(rdd.collect() sameElements Array.fill(10)(datum)) } } + + test("SPARK-39775: Disable validate default values when parsing Avro schemas") { + val avroTypeStruct = s""" + |{ + | "type": "record", + | "name": "struct", + | "fields": [ + | {"name": "id", "type": "long", "default": null} + | ] + |} + """.stripMargin + val schema = new Schema.Parser().setValidateDefaults(false).parse(avroTypeStruct) + + val genericSer = new GenericAvroSerializer(conf.getAvroSchema) + assert(schema === genericSer.decompress(ByteBuffer.wrap(genericSer.compress(schema)))) + } } diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala index b4965003ba33d..c4a4b16b05228 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala @@ -53,7 +53,8 @@ private[avro] case class AvroDataToCatalyst( private lazy val avroOptions = AvroOptions(options) - @transient private lazy val actualSchema = new Schema.Parser().parse(jsonFormatSchema) + @transient private lazy val actualSchema = + new Schema.Parser().setValidateDefaults(false).parse(jsonFormatSchema) @transient private lazy val expectedSchema = avroOptions.schema.getOrElse(actualSchema) diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala index 48b2c3481a6ee..fec2b77773ddc 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala @@ -52,13 +52,13 @@ private[sql] class AvroOptions( * instead of "string" type in the default converted schema. */ val schema: Option[Schema] = { - parameters.get("avroSchema").map(new Schema.Parser().parse).orElse({ + parameters.get("avroSchema").map(new Schema.Parser().setValidateDefaults(false).parse).orElse({ val avroUrlSchema = parameters.get("avroSchemaUrl").map(url => { log.debug("loading avro schema from url: " + url) val fs = FileSystem.get(new URI(url), conf) val in = fs.open(new Path(url)) try { - new Schema.Parser().parse(in) + new Schema.Parser().setValidateDefaults(false).parse(in) } finally { in.close() } diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/CatalystDataToAvro.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/CatalystDataToAvro.scala index 5d79c44ad422e..1e7e8600977e6 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/CatalystDataToAvro.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/CatalystDataToAvro.scala @@ -35,7 +35,7 @@ private[avro] case class CatalystDataToAvro( @transient private lazy val avroType = jsonFormatSchema - .map(new Schema.Parser().parse) + .map(new Schema.Parser().setValidateDefaults(false).parse) .getOrElse(SchemaConverters.toAvroType(child.dataType, child.nullable)) @transient private lazy val serializer = diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala index c9e0d4344691a..69cda3efb52bb 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.execution.LocalTableScanExec import org.apache.spark.sql.functions.{col, lit, struct} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.sql.types.StructType class AvroFunctionsSuite extends QueryTest with SharedSparkSession { import testImplicits._ @@ -238,4 +239,35 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession { assert(message.contains("Only UNION of a null type and a non-null type is supported")) } } + + test("SPARK-39775: Disable validate default values when parsing Avro schemas") { + val avroTypeStruct = s""" + |{ + | "type": "record", + | "name": "struct", + | "fields": [ + | {"name": "id", "type": "long", "default": null} + | ] + |} + """.stripMargin + val avroSchema = AvroOptions(Map("avroSchema" -> avroTypeStruct)).schema.get + val sparkSchema = SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType] + + val df = spark.range(5).select($"id") + val structDf = df.select(struct($"id").as("struct")) + val avroStructDF = structDf.select(functions.to_avro('struct, avroTypeStruct).as("avro")) + checkAnswer(avroStructDF.select(functions.from_avro('avro, avroTypeStruct)), structDf) + + withTempPath { dir => + df.write.format("avro").save(dir.getCanonicalPath) + checkAnswer(spark.read.schema(sparkSchema).format("avro").load(dir.getCanonicalPath), df) + + val msg = intercept[SparkException] { + spark.read.option("avroSchema", avroTypeStruct).format("avro") + .load(dir.getCanonicalPath) + .collect() + }.getCause.getMessage + assert(msg.contains("Invalid default for field id: null not a \"long\"")) + } + } } From 26f0d501595d4bddc7420e5c0505ccb1a9a991c4 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Fri, 5 Aug 2022 18:31:46 +0900 Subject: [PATCH 423/535] [SPARK-39981][SQL] Throw the exception QueryExecutionErrors.castingCauseOverflowErrorInTableInsert in Cast This PR is a followup of https://github.com/apache/spark/pull/37283. It missed `throw` keyword in the interpreted path. To throw an exception as intended instead of returning an exception itself. Yes, it will throw an exception as expected in the interpreted path. Haven't tested because it's too much straightforward. Closes #37414 from HyukjinKwon/SPARK-39981. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit e6b9c6166a08ad4dca2550bbbb151fa575b730a8) Signed-off-by: Hyukjin Kwon --- .../scala/org/apache/spark/sql/catalyst/expressions/Cast.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 014c2be731983..f5dbeda514a55 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -2365,7 +2365,7 @@ case class CheckOverflowInTableInsert(child: AnsiCast, columnName: String) exten child.eval(input) } catch { case e: SparkArithmeticException => - QueryExecutionErrors.castingCauseOverflowErrorInTableInsert( + throw QueryExecutionErrors.castingCauseOverflowErrorInTableInsert( child.child.dataType, child.dataType, columnName) From 369b01404c25f02458316fd99307a3f94a13cec5 Mon Sep 17 00:00:00 2001 From: xzhou <15210830305@163.com> Date: Fri, 5 Aug 2022 18:42:57 +0800 Subject: [PATCH 424/535] [SPARK-38034][SQL] Optimize TransposeWindow rule MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Optimize the TransposeWindow rule to extend applicable cases and optimize time complexity. TransposeWindow rule will try to eliminate unnecessary shuffle: but the function compatiblePartitions will only take the first n elements of the window2 partition sequence, for some cases, this will not take effect, like the case below:  val df = spark.range(10).selectExpr("id AS a", "id AS b", "id AS c", "id AS d") df.selectExpr( "sum(`d`) OVER(PARTITION BY `b`,`a`) as e", "sum(`c`) OVER(PARTITION BY `a`) as f" ).explain Current plan == Physical Plan == *(5) Project [e#10L, f#11L] +- Window [sum(c#4L) windowspecdefinition(a#2L, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS f#11L], [a#2L]    +- *(4) Sort [a#2L ASC NULLS FIRST], false, 0       +- Exchange hashpartitioning(a#2L, 200), true, [id=#41]          +- *(3) Project [a#2L, c#4L, e#10L]             +- Window [sum(d#5L) windowspecdefinition(b#3L, a#2L, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS e#10L], [b#3L, a#2L]                +- *(2) Sort [b#3L ASC NULLS FIRST, a#2L ASC NULLS FIRST], false, 0                   +- Exchange hashpartitioning(b#3L, a#2L, 200), true, [id=#33]                      +- *(1) Project [id#0L AS d#5L, id#0L AS b#3L, id#0L AS a#2L, id#0L AS c#4L]                         +- *(1) Range (0, 10, step=1, splits=10) Expected plan: == Physical Plan == *(4) Project [e#924L, f#925L] +- Window [sum(d#43L) windowspecdefinition(b#41L, a#40L, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS e#924L], [b#41L, a#40L]  +- *(3) Sort [b#41L ASC NULLS FIRST, a#40L ASC NULLS FIRST], false, 0       +- *(3) Project [d#43L, b#41L, a#40L, f#925L]          +- Window [sum(c#42L) windowspecdefinition(a#40L, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS f#925L], [a#40L]             +- *(2) Sort [a#40L ASC NULLS FIRST], false, 0                +- Exchange hashpartitioning(a#40L, 200), true, [id=#282]                   +- *(1) Project [id#38L AS d#43L, id#38L AS b#41L, id#38L AS a#40L, id#38L AS c#42L]                      +- *(1) Range (0, 10, step=1, splits=10) Also the permutations method has a O(n!) time complexity, which is very expensive when there are many partition columns, we could try to optimize it. ### Why are the changes needed? We could apply the rule for more cases, which could improve the execution performance by eliminate unnecessary shuffle, and by reducing the time complexity from O(n!) to O(n2), the performance for the rule itself could improve ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? UT Closes #35334 from constzhou/SPARK-38034_optimize_transpose_window_rule. Authored-by: xzhou <15210830305@163.com> Signed-off-by: Wenchen Fan (cherry picked from commit 0cc331dc7e51e53000063052b0c8ace417eb281b) Signed-off-by: Wenchen Fan --- .../sql/catalyst/optimizer/Optimizer.scala | 6 +++--- .../optimizer/TransposeWindowSuite.scala | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 5d670ecdf1d1a..db2e64c558f84 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1176,9 +1176,9 @@ object CollapseWindow extends Rule[LogicalPlan] { */ object TransposeWindow extends Rule[LogicalPlan] { private def compatiblePartitions(ps1 : Seq[Expression], ps2: Seq[Expression]): Boolean = { - ps1.length < ps2.length && ps2.take(ps1.length).permutations.exists(ps1.zip(_).forall { - case (l, r) => l.semanticEquals(r) - }) + ps1.length < ps2.length && ps1.forall { expr1 => + ps2.exists(expr1.semanticEquals) + } } private def windowsCompatible(w1: Window, w2: Window): Boolean = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala index a53e04da19d41..8efdcf0d6c6af 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala @@ -142,4 +142,21 @@ class TransposeWindowSuite extends PlanTest { comparePlans(optimized, analyzed) } + test("SPARK-38034: transpose two adjacent windows with compatible partitions " + + "which is not a prefix") { + val query = testRelation + .window(Seq(sum(c).as('sum_a_2)), partitionSpec4, orderSpec2) + .window(Seq(sum(c).as('sum_a_1)), partitionSpec3, orderSpec1) + + val analyzed = query.analyze + val optimized = Optimize.execute(analyzed) + + val correctAnswer = testRelation + .window(Seq(sum(c).as('sum_a_1)), partitionSpec3, orderSpec1) + .window(Seq(sum(c).as('sum_a_2)), partitionSpec4, orderSpec2) + .select('a, 'b, 'c, 'd, 'sum_a_2, 'sum_a_1) + + comparePlans(optimized, correctAnswer.analyze) + } + } From 66faaa58ea5f69efc0fdbb5e3aa3a2d9703377b5 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 8 Aug 2022 09:58:14 -0700 Subject: [PATCH 425/535] [SPARK-39965][K8S] Skip PVC cleanup when driver doesn't own PVCs ### What changes were proposed in this pull request? This PR aims to skip PVC cleanup logic when `spark.kubernetes.driver.ownPersistentVolumeClaim=false`. ### Why are the changes needed? To simplify Spark termination log by removing unnecessary log containing Exception message when Spark jobs have no PVC permission and at the same time `spark.kubernetes.driver.ownPersistentVolumeClaim` is `false`. ### Does this PR introduce _any_ user-facing change? Only in the termination logs of Spark jobs that has no PVC permission. ### How was this patch tested? Manually. Closes #37433 from dongjoon-hyun/SPARK-39965. Lead-authored-by: Dongjoon Hyun Co-authored-by: pralabhkumar Signed-off-by: Dongjoon Hyun (cherry picked from commit 87b312a9c9273535e22168c3da73834c22e1fbbb) Signed-off-by: Dongjoon Hyun --- .../k8s/KubernetesClusterSchedulerBackend.scala | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala index 43c6597362e41..985b8b7bef051 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala @@ -141,11 +141,13 @@ private[spark] class KubernetesClusterSchedulerBackend( } } - Utils.tryLogNonFatalError { - kubernetesClient - .persistentVolumeClaims() - .withLabel(SPARK_APP_ID_LABEL, applicationId()) - .delete() + if (conf.get(KUBERNETES_DRIVER_OWN_PVC)) { + Utils.tryLogNonFatalError { + kubernetesClient + .persistentVolumeClaims() + .withLabel(SPARK_APP_ID_LABEL, applicationId()) + .delete() + } } if (shouldDeleteExecutors) { From 9353437ce11f646373d557f2c24aeb381b2aeec5 Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Tue, 9 Aug 2022 11:39:58 +0900 Subject: [PATCH 426/535] [SPARK-40002][SQL] Don't push down limit through window using ntile ### What changes were proposed in this pull request? Change `LimitPushDownThroughWindow` so that it no longer supports pushing down a limit through a window using ntile. ### Why are the changes needed? In an unpartitioned window, the ntile function is currently applied to the result of the limit. This behavior produces results that conflict with Spark 3.1.3, Hive 2.3.9 and Prestodb 0.268 #### Example Assume this data: ``` create table t1 stored as parquet as select * from range(101); ``` Also assume this query: ``` select id, ntile(10) over (order by id) as nt from t1 limit 10; ``` With Spark 3.2.2, Spark 3.3.0, and master, the limit is applied before the ntile function: ``` +---+---+ |id |nt | +---+---+ |0 |1 | |1 |2 | |2 |3 | |3 |4 | |4 |5 | |5 |6 | |6 |7 | |7 |8 | |8 |9 | |9 |10 | +---+---+ ``` With Spark 3.1.3, and Hive 2.3.9, and Prestodb 0.268, the limit is applied _after_ ntile. Spark 3.1.3: ``` +---+---+ |id |nt | +---+---+ |0 |1 | |1 |1 | |2 |1 | |3 |1 | |4 |1 | |5 |1 | |6 |1 | |7 |1 | |8 |1 | |9 |1 | +---+---+ ``` Hive 2.3.9: ``` +-----+-----+ | id | nt | +-----+-----+ | 0 | 1 | | 1 | 1 | | 2 | 1 | | 3 | 1 | | 4 | 1 | | 5 | 1 | | 6 | 1 | | 7 | 1 | | 8 | 1 | | 9 | 1 | +-----+-----+ 10 rows selected (1.72 seconds) ``` Prestodb 0.268: ``` id | nt ----+---- 0 | 1 1 | 1 2 | 1 3 | 1 4 | 1 5 | 1 6 | 1 7 | 1 8 | 1 9 | 1 (10 rows) ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Two new unit tests. Closes #37443 from bersprockets/pushdown_ntile. Authored-by: Bruce Robbins Signed-off-by: Hyukjin Kwon (cherry picked from commit c9156e5a3b9cb290c7cdda8db298c9875e67aa5e) Signed-off-by: Hyukjin Kwon --- .../optimizer/LimitPushDownThroughWindow.scala | 5 ++--- .../optimizer/LimitPushdownThroughWindowSuite.scala | 13 ++++++++++++- .../spark/sql/DataFrameWindowFunctionsSuite.scala | 13 +++++++++++++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushDownThroughWindow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushDownThroughWindow.scala index 635434741b944..88f92262dcc20 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushDownThroughWindow.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushDownThroughWindow.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.optimizer -import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentRow, DenseRank, IntegerLiteral, NamedExpression, NTile, Rank, RowFrame, RowNumber, SpecifiedWindowFrame, UnboundedPreceding, WindowExpression, WindowSpecDefinition} +import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentRow, DenseRank, IntegerLiteral, NamedExpression, Rank, RowFrame, RowNumber, SpecifiedWindowFrame, UnboundedPreceding, WindowExpression, WindowSpecDefinition} import org.apache.spark.sql.catalyst.plans.logical.{Limit, LocalLimit, LogicalPlan, Project, Sort, Window} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.TreePattern.{LIMIT, WINDOW} @@ -33,8 +33,7 @@ object LimitPushDownThroughWindow extends Rule[LogicalPlan] { // The window frame of RankLike and RowNumberLike can only be UNBOUNDED PRECEDING to CURRENT ROW. private def supportsPushdownThroughWindow( windowExpressions: Seq[NamedExpression]): Boolean = windowExpressions.forall { - case Alias(WindowExpression(_: Rank | _: DenseRank | _: NTile | _: RowNumber, - WindowSpecDefinition(Nil, _, + case Alias(WindowExpression(_: Rank | _: DenseRank | _: RowNumber, WindowSpecDefinition(Nil, _, SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow))), _) => true case _ => false } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownThroughWindowSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownThroughWindowSuite.scala index b09d10b260174..99812d20bf55f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownThroughWindowSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownThroughWindowSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.expressions.{CurrentRow, PercentRank, Rank, RowFrame, RowNumber, SpecifiedWindowFrame, UnboundedPreceding} +import org.apache.spark.sql.catalyst.expressions.{CurrentRow, NTile, PercentRank, Rank, RowFrame, RowNumber, SpecifiedWindowFrame, UnboundedPreceding} import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ @@ -198,4 +198,15 @@ class LimitPushdownThroughWindowSuite extends PlanTest { Optimize.execute(originalQuery.analyze), WithoutOptimize.execute(originalQuery.analyze)) } + + test("SPARK-40002: Should not push through ntile window function") { + val originalQuery = testRelation + .select(a, b, c, + windowExpr(new NTile(), windowSpec(Nil, c.desc :: Nil, windowFrame)).as("nt")) + .limit(2) + + comparePlans( + Optimize.execute(originalQuery.analyze), + WithoutOptimize.execute(originalQuery.analyze)) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala index 557b278f9c45c..e57650ff62950 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala @@ -1203,4 +1203,17 @@ class DataFrameWindowFunctionsSuite extends QueryTest ) ) } + + test("SPARK-40002: ntile should apply before limit") { + val df = Seq.tabulate(101)(identity).toDF("id") + val w = Window.orderBy("id") + checkAnswer( + df.select($"id", ntile(10).over(w)).limit(3), + Seq( + Row(0, 1), + Row(1, 1), + Row(2, 1) + ) + ) + } } From 42b30ee9a65ed2a50c13364309eb0608d75b7999 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Wed, 10 Aug 2022 09:59:35 +0900 Subject: [PATCH 427/535] [SPARK-40022][YARN][TESTS] Ignore pyspark suites in `YarnClusterSuite` when python3 is unavailable ### What changes were proposed in this pull request? This pr adds `assume(isPythonAvailable)` to `testPySpark` method in `YarnClusterSuite` to make `YarnClusterSuite` test succeeded in an environment without Python 3 configured. ### Why are the changes needed? `YarnClusterSuite` should not `ABORTED` when `python3` is not configured. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? - Pass GitHub Actions - Manual test Run ``` mvn clean test -pl resource-managers/yarn -am -Pyarn -DwildcardSuites=org.apache.spark.deploy.yarn.YarnClusterSuite -Dtest=none ``` in an environment without Python 3 configured: **Before** ``` YarnClusterSuite: org.apache.spark.deploy.yarn.YarnClusterSuite *** ABORTED *** java.lang.RuntimeException: Unable to load a Suite class that was discovered in the runpath: org.apache.spark.deploy.yarn.YarnClusterSuite at org.scalatest.tools.DiscoverySuite$.getSuiteInstance(DiscoverySuite.scala:81) at org.scalatest.tools.DiscoverySuite.$anonfun$nestedSuites$1(DiscoverySuite.scala:38) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:286) at scala.collection.Iterator.foreach(Iterator.scala:943) at scala.collection.Iterator.foreach$(Iterator.scala:943) at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) at scala.collection.IterableLike.foreach(IterableLike.scala:74) at scala.collection.IterableLike.foreach$(IterableLike.scala:73) at scala.collection.AbstractIterable.foreach(Iterable.scala:56) at scala.collection.TraversableLike.map(TraversableLike.scala:286) ... Run completed in 833 milliseconds. Total number of tests run: 0 Suites: completed 1, aborted 1 Tests: succeeded 0, failed 0, canceled 0, ignored 0, pending 0 *** 1 SUITE ABORTED *** ``` **After** ``` YarnClusterSuite: - run Spark in yarn-client mode - run Spark in yarn-cluster mode - run Spark in yarn-client mode with unmanaged am - run Spark in yarn-client mode with different configurations, ensuring redaction - run Spark in yarn-cluster mode with different configurations, ensuring redaction - yarn-cluster should respect conf overrides in SparkHadoopUtil (SPARK-16414, SPARK-23630) - SPARK-35672: run Spark in yarn-client mode with additional jar using URI scheme 'local' - SPARK-35672: run Spark in yarn-cluster mode with additional jar using URI scheme 'local' - SPARK-35672: run Spark in yarn-client mode with additional jar using URI scheme 'local' and gateway-replacement path - SPARK-35672: run Spark in yarn-cluster mode with additional jar using URI scheme 'local' and gateway-replacement path - SPARK-35672: run Spark in yarn-cluster mode with additional jar using URI scheme 'local' and gateway-replacement path containing an environment variable - SPARK-35672: run Spark in yarn-client mode with additional jar using URI scheme 'file' - SPARK-35672: run Spark in yarn-cluster mode with additional jar using URI scheme 'file' - run Spark in yarn-cluster mode unsuccessfully - run Spark in yarn-cluster mode failure after sc initialized - run Python application in yarn-client mode !!! CANCELED !!! YarnClusterSuite.this.isPythonAvailable was false (YarnClusterSuite.scala:376) - run Python application in yarn-cluster mode !!! CANCELED !!! YarnClusterSuite.this.isPythonAvailable was false (YarnClusterSuite.scala:376) - run Python application in yarn-cluster mode using spark.yarn.appMasterEnv to override local envvar !!! CANCELED !!! YarnClusterSuite.this.isPythonAvailable was false (YarnClusterSuite.scala:376) - user class path first in client mode - user class path first in cluster mode - monitor app using launcher library - running Spark in yarn-cluster mode displays driver log links - timeout to get SparkContext in cluster mode triggers failure - executor env overwrite AM env in client mode - executor env overwrite AM env in cluster mode - SPARK-34472: ivySettings file with no scheme or file:// scheme should be localized on driver in cluster mode - SPARK-34472: ivySettings file with no scheme or file:// scheme should retain user provided path in client mode - SPARK-34472: ivySettings file with non-file:// schemes should throw an error Run completed in 7 minutes, 2 seconds. Total number of tests run: 25 Suites: completed 2, aborted 0 Tests: succeeded 25, failed 0, canceled 3, ignored 0, pending 0 All tests passed. ``` Closes #37454 from LuciferYang/yarnclustersuite. Authored-by: yangjie01 Signed-off-by: Hyukjin Kwon (cherry picked from commit 8e472443081342a0e0dc37aa154e30a0a6df39b7) Signed-off-by: Hyukjin Kwon --- .../apache/spark/deploy/yarn/YarnClusterSuite.scala | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala index 1c20723ff7ade..7a48a43959f31 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala @@ -55,11 +55,12 @@ import org.apache.spark.util.{Utils, YarnContainerInfoHelper} @ExtendedYarnTest class YarnClusterSuite extends BaseYarnClusterSuite { - private val pythonExecutablePath = { + private val (isPythonAvailable, pythonExecutablePath) = { // To make sure to use the same Python executable. - val maybePath = TestUtils.getAbsolutePathFromExecutable("python3") - assert(maybePath.isDefined) - maybePath.get + TestUtils.getAbsolutePathFromExecutable("python3") match { + case Some(path) => (true, path) + case _ => (false, "") + } } override def newYarnConfig(): YarnConfiguration = new YarnConfiguration() @@ -371,6 +372,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite { clientMode: Boolean, extraConf: Map[String, String] = Map(), extraEnv: Map[String, String] = Map()): Unit = { + assume(isPythonAvailable) val primaryPyFile = new File(tempDir, "test.py") Files.write(TEST_PYFILE, primaryPyFile, StandardCharsets.UTF_8) From 248e8b49b114d725e7a94bc8193f371b89270af7 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Thu, 11 Aug 2022 15:01:05 +0900 Subject: [PATCH 428/535] [SPARK-40043][PYTHON][SS][DOCS] Document DataStreamWriter.toTable and DataStreamReader.table ### What changes were proposed in this pull request? This PR is a followup of https://github.com/apache/spark/pull/30835 that adds `DataStreamWriter.toTable` and `DataStreamReader.table` into PySpark documentation. ### Why are the changes needed? To document both features. ### Does this PR introduce _any_ user-facing change? Yes, both API will be shown in PySpark reference documentation. ### How was this patch tested? Manually built the documentation and checked. Closes #37477 from HyukjinKwon/SPARK-40043. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 447003324d2cf9f2bfa799ef3a1e744a5bc9277d) Signed-off-by: Hyukjin Kwon --- python/docs/source/reference/pyspark.ss/io.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/docs/source/reference/pyspark.ss/io.rst b/python/docs/source/reference/pyspark.ss/io.rst index da476fb6fac51..7a20777fdc7c8 100644 --- a/python/docs/source/reference/pyspark.ss/io.rst +++ b/python/docs/source/reference/pyspark.ss/io.rst @@ -34,6 +34,7 @@ Input/Output DataStreamReader.orc DataStreamReader.parquet DataStreamReader.schema + DataStreamReader.table DataStreamReader.text DataStreamWriter.foreach DataStreamWriter.foreachBatch @@ -44,4 +45,5 @@ Input/Output DataStreamWriter.partitionBy DataStreamWriter.queryName DataStreamWriter.start + DataStreamWriter.toTable DataStreamWriter.trigger From 221fee8973ce438b089fae769dd054c47f6774ed Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Thu, 11 Aug 2022 15:10:42 -0700 Subject: [PATCH 429/535] [SPARK-40047][TEST] Exclude unused `xalan` transitive dependency from `htmlunit` ### What changes were proposed in this pull request? This pr exclude `xalan` from `htmlunit` to clean warning of CVE-2022-34169: ``` Provides transitive vulnerable dependency xalan:xalan:2.7.2 CVE-2022-34169 7.5 Integer Coercion Error vulnerability with medium severity found Results powered by Checkmarx(c) ``` `xalan:xalan:2.7.2` is the latest version, the code base has not been updated for 5 years, so can't solve by upgrading `xalan`. ### Why are the changes needed? The vulnerability is described is [CVE-2022-34169](https://github.com/advisories/GHSA-9339-86wc-4qgf), better to exclude it although it's just test dependency for Spark. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? - Pass GitHub Actions - Manual test: run `mvn dependency:tree -Phadoop-3 -Phadoop-cloud -Pmesos -Pyarn -Pkinesis-asl -Phive-thriftserver -Pspark-ganglia-lgpl -Pkubernetes -Phive | grep xalan` to check that `xalan` is not matched after this pr Closes #37481 from LuciferYang/exclude-xalan. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun (cherry picked from commit 7f3baa77acbf7747963a95d0f24e3b8868c7b16a) Signed-off-by: Dongjoon Hyun --- pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pom.xml b/pom.xml index 206cad9eb981a..f639e5e54447d 100644 --- a/pom.xml +++ b/pom.xml @@ -709,6 +709,12 @@ net.sourceforge.htmlunit htmlunit ${htmlunit.version} + + + xalan + xalan + + test From 21d9db39b7ac0aae07b83fb9dba2639a4daffadc Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Fri, 12 Aug 2022 10:41:37 +0800 Subject: [PATCH 430/535] [SPARK-39887][SQL][3.3] RemoveRedundantAliases should keep aliases that make the output of projection nodes unique ### What changes were proposed in this pull request? Keep the output attributes of a `Union` node's first child in the `RemoveRedundantAliases` rule to avoid correctness issues. ### Why are the changes needed? To fix the result of the following query: ``` SELECT a, b AS a FROM ( SELECT a, a AS b FROM (SELECT a FROM VALUES (1) AS t(a)) UNION ALL SELECT a, b FROM (SELECT a, b FROM VALUES (1, 2) AS t(a, b)) ) ``` Before this PR the query returns the incorrect result: ``` +---+---+ | a| a| +---+---+ | 1| 1| | 2| 2| +---+---+ ``` After this PR it returns the expected result: ``` +---+---+ | a| a| +---+---+ | 1| 1| | 1| 2| +---+---+ ``` ### Does this PR introduce _any_ user-facing change? Yes, fixes a correctness issue. ### How was this patch tested? Added new UTs. Closes #37472 from peter-toth/SPARK-39887-keep-attributes-of-unions-first-child-3.3. Authored-by: Peter Toth Signed-off-by: Wenchen Fan --- .../sql/catalyst/optimizer/Optimizer.scala | 26 +- .../RemoveRedundantAliasAndProjectSuite.scala | 2 +- .../q14a.sf100/explain.txt | 270 +++++++++--------- .../approved-plans-v1_4/q14a/explain.txt | 254 ++++++++-------- .../org/apache/spark/sql/DataFrameSuite.scala | 61 ++++ .../execution/metric/SQLMetricsSuite.scala | 5 +- 6 files changed, 347 insertions(+), 271 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index db2e64c558f84..558a67ff5ca66 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -513,9 +513,11 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] { } /** - * Remove redundant alias expression from a LogicalPlan and its subtree. A set of excludes is used - * to prevent the removal of seemingly redundant aliases used to deduplicate the input for a - * (self) join or to prevent the removal of top-level subquery attributes. + * Remove redundant alias expression from a LogicalPlan and its subtree. + * A set of excludes is used to prevent the removal of: + * - seemingly redundant aliases used to deduplicate the input for a (self) join, + * - top-level subquery attributes and + * - attributes of a Union's first child */ private def removeRedundantAliases(plan: LogicalPlan, excluded: AttributeSet): LogicalPlan = { if (!plan.containsPattern(ALIAS)) { @@ -542,6 +544,21 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] { }) Join(newLeft, newRight, joinType, newCondition, hint) + case _: Union => + var first = true + plan.mapChildren { child => + if (first) { + first = false + // `Union` inherits its first child's outputs. We don't remove those aliases from the + // first child's tree that prevent aliased attributes to appear multiple times in the + // `Union`'s output. A parent projection node on the top of an `Union` with non-unique + // output attributes could return incorrect result. + removeRedundantAliases(child, excluded ++ child.outputSet) + } else { + removeRedundantAliases(child, excluded) + } + } + case _ => // Remove redundant aliases in the subtree(s). val currentNextAttrPairs = mutable.Buffer.empty[(Attribute, Attribute)] @@ -551,9 +568,6 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] { newChild } - // Create the attribute mapping. Note that the currentNextAttrPairs can contain duplicate - // keys in case of Union (this is caused by the PushProjectionThroughUnion rule); in this - // case we use the first mapping (which should be provided by the first child). val mapping = AttributeMap(currentNextAttrPairs.toSeq) // Create a an expression cleaning function for nodes that can actually produce redundant diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala index 4b02a847880f7..29359b1f0cb60 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala @@ -97,7 +97,7 @@ class RemoveRedundantAliasAndProjectSuite extends PlanTest with PredicateHelper val r2 = LocalRelation('b.int) val query = r1.select('a as 'a).union(r2.select('b as 'b)).select('a).analyze val optimized = Optimize.execute(query) - val expected = r1.union(r2) + val expected = r1.select($"a" as "a").union(r2).analyze comparePlans(optimized, expected) } diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt index f5b3161c7e777..e93babc018cad 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt @@ -476,31 +476,31 @@ Input [5]: [i_brand_id#38, i_class_id#39, i_category_id#40, sales#49, number_sal Condition : (isnotnull(sales#49) AND (cast(sales#49 as decimal(32,6)) > cast(Subquery scalar-subquery#51, [id=#52] as decimal(32,6)))) (79) Project [codegen id : 44] -Output [6]: [sales#49, number_sales#50, store AS channel#53, i_brand_id#38, i_class_id#39, i_category_id#40] +Output [6]: [sales#49, number_sales#50, store AS channel#53, i_brand_id#38 AS i_brand_id#54, i_class_id#39 AS i_class_id#55, i_category_id#40 AS i_category_id#56] Input [5]: [i_brand_id#38, i_class_id#39, i_category_id#40, sales#49, number_sales#50] (80) Scan parquet default.catalog_sales -Output [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] +Output [4]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#57), dynamicpruningexpression(cs_sold_date_sk#57 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#60), dynamicpruningexpression(cs_sold_date_sk#60 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (81) ColumnarToRow [codegen id : 45] -Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] +Input [4]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60] (82) Filter [codegen id : 45] -Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] -Condition : isnotnull(cs_item_sk#54) +Input [4]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60] +Condition : isnotnull(cs_item_sk#57) (83) Exchange -Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] -Arguments: hashpartitioning(cs_item_sk#54, 5), ENSURE_REQUIREMENTS, [plan_id=14] +Input [4]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60] +Arguments: hashpartitioning(cs_item_sk#57, 5), ENSURE_REQUIREMENTS, [plan_id=14] (84) Sort [codegen id : 46] -Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] -Arguments: [cs_item_sk#54 ASC NULLS FIRST], false, 0 +Input [4]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60] +Arguments: [cs_item_sk#57 ASC NULLS FIRST], false, 0 (85) ReusedExchange [Reuses operator id: 58] Output [1]: [ss_item_sk#35] @@ -510,82 +510,82 @@ Input [1]: [ss_item_sk#35] Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0 (87) SortMergeJoin [codegen id : 87] -Left keys [1]: [cs_item_sk#54] +Left keys [1]: [cs_item_sk#57] Right keys [1]: [ss_item_sk#35] Join condition: None (88) ReusedExchange [Reuses operator id: 147] -Output [1]: [d_date_sk#58] +Output [1]: [d_date_sk#61] (89) BroadcastHashJoin [codegen id : 87] -Left keys [1]: [cs_sold_date_sk#57] -Right keys [1]: [d_date_sk#58] +Left keys [1]: [cs_sold_date_sk#60] +Right keys [1]: [d_date_sk#61] Join condition: None (90) Project [codegen id : 87] -Output [3]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56] -Input [5]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57, d_date_sk#58] +Output [3]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59] +Input [5]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60, d_date_sk#61] (91) ReusedExchange [Reuses operator id: 72] -Output [4]: [i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62] +Output [4]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65] (92) BroadcastHashJoin [codegen id : 87] -Left keys [1]: [cs_item_sk#54] -Right keys [1]: [i_item_sk#59] +Left keys [1]: [cs_item_sk#57] +Right keys [1]: [i_item_sk#62] Join condition: None (93) Project [codegen id : 87] -Output [5]: [cs_quantity#55, cs_list_price#56, i_brand_id#60, i_class_id#61, i_category_id#62] -Input [7]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62] +Output [5]: [cs_quantity#58, cs_list_price#59, i_brand_id#63, i_class_id#64, i_category_id#65] +Input [7]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65] (94) HashAggregate [codegen id : 87] -Input [5]: [cs_quantity#55, cs_list_price#56, i_brand_id#60, i_class_id#61, i_category_id#62] -Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#63, isEmpty#64, count#65] -Results [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#66, isEmpty#67, count#68] +Input [5]: [cs_quantity#58, cs_list_price#59, i_brand_id#63, i_class_id#64, i_category_id#65] +Keys [3]: [i_brand_id#63, i_class_id#64, i_category_id#65] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#58 as decimal(12,2))) * promote_precision(cast(cs_list_price#59 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] +Aggregate Attributes [3]: [sum#66, isEmpty#67, count#68] +Results [6]: [i_brand_id#63, i_class_id#64, i_category_id#65, sum#69, isEmpty#70, count#71] (95) Exchange -Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#66, isEmpty#67, count#68] -Arguments: hashpartitioning(i_brand_id#60, i_class_id#61, i_category_id#62, 5), ENSURE_REQUIREMENTS, [plan_id=15] +Input [6]: [i_brand_id#63, i_class_id#64, i_category_id#65, sum#69, isEmpty#70, count#71] +Arguments: hashpartitioning(i_brand_id#63, i_class_id#64, i_category_id#65, 5), ENSURE_REQUIREMENTS, [plan_id=15] (96) HashAggregate [codegen id : 88] -Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#66, isEmpty#67, count#68] -Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#69, count(1)#70] -Results [5]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#69 AS sales#71, count(1)#70 AS number_sales#72] +Input [6]: [i_brand_id#63, i_class_id#64, i_category_id#65, sum#69, isEmpty#70, count#71] +Keys [3]: [i_brand_id#63, i_class_id#64, i_category_id#65] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#58 as decimal(12,2))) * promote_precision(cast(cs_list_price#59 as decimal(12,2)))), DecimalType(18,2))), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#58 as decimal(12,2))) * promote_precision(cast(cs_list_price#59 as decimal(12,2)))), DecimalType(18,2)))#72, count(1)#73] +Results [5]: [i_brand_id#63, i_class_id#64, i_category_id#65, sum(CheckOverflow((promote_precision(cast(cs_quantity#58 as decimal(12,2))) * promote_precision(cast(cs_list_price#59 as decimal(12,2)))), DecimalType(18,2)))#72 AS sales#74, count(1)#73 AS number_sales#75] (97) Filter [codegen id : 88] -Input [5]: [i_brand_id#60, i_class_id#61, i_category_id#62, sales#71, number_sales#72] -Condition : (isnotnull(sales#71) AND (cast(sales#71 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#51, [id=#52] as decimal(32,6)))) +Input [5]: [i_brand_id#63, i_class_id#64, i_category_id#65, sales#74, number_sales#75] +Condition : (isnotnull(sales#74) AND (cast(sales#74 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#51, [id=#52] as decimal(32,6)))) (98) Project [codegen id : 88] -Output [6]: [sales#71, number_sales#72, catalog AS channel#73, i_brand_id#60, i_class_id#61, i_category_id#62] -Input [5]: [i_brand_id#60, i_class_id#61, i_category_id#62, sales#71, number_sales#72] +Output [6]: [sales#74, number_sales#75, catalog AS channel#76, i_brand_id#63, i_class_id#64, i_category_id#65] +Input [5]: [i_brand_id#63, i_class_id#64, i_category_id#65, sales#74, number_sales#75] (99) Scan parquet default.web_sales -Output [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] +Output [4]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#77), dynamicpruningexpression(ws_sold_date_sk#77 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#80), dynamicpruningexpression(ws_sold_date_sk#80 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (100) ColumnarToRow [codegen id : 89] -Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] +Input [4]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80] (101) Filter [codegen id : 89] -Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] -Condition : isnotnull(ws_item_sk#74) +Input [4]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80] +Condition : isnotnull(ws_item_sk#77) (102) Exchange -Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] -Arguments: hashpartitioning(ws_item_sk#74, 5), ENSURE_REQUIREMENTS, [plan_id=16] +Input [4]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80] +Arguments: hashpartitioning(ws_item_sk#77, 5), ENSURE_REQUIREMENTS, [plan_id=16] (103) Sort [codegen id : 90] -Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] -Arguments: [ws_item_sk#74 ASC NULLS FIRST], false, 0 +Input [4]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80] +Arguments: [ws_item_sk#77 ASC NULLS FIRST], false, 0 (104) ReusedExchange [Reuses operator id: 58] Output [1]: [ss_item_sk#35] @@ -595,87 +595,87 @@ Input [1]: [ss_item_sk#35] Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0 (106) SortMergeJoin [codegen id : 131] -Left keys [1]: [ws_item_sk#74] +Left keys [1]: [ws_item_sk#77] Right keys [1]: [ss_item_sk#35] Join condition: None (107) ReusedExchange [Reuses operator id: 147] -Output [1]: [d_date_sk#78] +Output [1]: [d_date_sk#81] (108) BroadcastHashJoin [codegen id : 131] -Left keys [1]: [ws_sold_date_sk#77] -Right keys [1]: [d_date_sk#78] +Left keys [1]: [ws_sold_date_sk#80] +Right keys [1]: [d_date_sk#81] Join condition: None (109) Project [codegen id : 131] -Output [3]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76] -Input [5]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77, d_date_sk#78] +Output [3]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79] +Input [5]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80, d_date_sk#81] (110) ReusedExchange [Reuses operator id: 72] -Output [4]: [i_item_sk#79, i_brand_id#80, i_class_id#81, i_category_id#82] +Output [4]: [i_item_sk#82, i_brand_id#83, i_class_id#84, i_category_id#85] (111) BroadcastHashJoin [codegen id : 131] -Left keys [1]: [ws_item_sk#74] -Right keys [1]: [i_item_sk#79] +Left keys [1]: [ws_item_sk#77] +Right keys [1]: [i_item_sk#82] Join condition: None (112) Project [codegen id : 131] -Output [5]: [ws_quantity#75, ws_list_price#76, i_brand_id#80, i_class_id#81, i_category_id#82] -Input [7]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, i_item_sk#79, i_brand_id#80, i_class_id#81, i_category_id#82] +Output [5]: [ws_quantity#78, ws_list_price#79, i_brand_id#83, i_class_id#84, i_category_id#85] +Input [7]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, i_item_sk#82, i_brand_id#83, i_class_id#84, i_category_id#85] (113) HashAggregate [codegen id : 131] -Input [5]: [ws_quantity#75, ws_list_price#76, i_brand_id#80, i_class_id#81, i_category_id#82] -Keys [3]: [i_brand_id#80, i_class_id#81, i_category_id#82] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#83, isEmpty#84, count#85] -Results [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#86, isEmpty#87, count#88] +Input [5]: [ws_quantity#78, ws_list_price#79, i_brand_id#83, i_class_id#84, i_category_id#85] +Keys [3]: [i_brand_id#83, i_class_id#84, i_category_id#85] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#78 as decimal(12,2))) * promote_precision(cast(ws_list_price#79 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] +Aggregate Attributes [3]: [sum#86, isEmpty#87, count#88] +Results [6]: [i_brand_id#83, i_class_id#84, i_category_id#85, sum#89, isEmpty#90, count#91] (114) Exchange -Input [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#86, isEmpty#87, count#88] -Arguments: hashpartitioning(i_brand_id#80, i_class_id#81, i_category_id#82, 5), ENSURE_REQUIREMENTS, [plan_id=17] +Input [6]: [i_brand_id#83, i_class_id#84, i_category_id#85, sum#89, isEmpty#90, count#91] +Arguments: hashpartitioning(i_brand_id#83, i_class_id#84, i_category_id#85, 5), ENSURE_REQUIREMENTS, [plan_id=17] (115) HashAggregate [codegen id : 132] -Input [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#86, isEmpty#87, count#88] -Keys [3]: [i_brand_id#80, i_class_id#81, i_category_id#82] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2)))#89, count(1)#90] -Results [5]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2)))#89 AS sales#91, count(1)#90 AS number_sales#92] +Input [6]: [i_brand_id#83, i_class_id#84, i_category_id#85, sum#89, isEmpty#90, count#91] +Keys [3]: [i_brand_id#83, i_class_id#84, i_category_id#85] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#78 as decimal(12,2))) * promote_precision(cast(ws_list_price#79 as decimal(12,2)))), DecimalType(18,2))), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#78 as decimal(12,2))) * promote_precision(cast(ws_list_price#79 as decimal(12,2)))), DecimalType(18,2)))#92, count(1)#93] +Results [5]: [i_brand_id#83, i_class_id#84, i_category_id#85, sum(CheckOverflow((promote_precision(cast(ws_quantity#78 as decimal(12,2))) * promote_precision(cast(ws_list_price#79 as decimal(12,2)))), DecimalType(18,2)))#92 AS sales#94, count(1)#93 AS number_sales#95] (116) Filter [codegen id : 132] -Input [5]: [i_brand_id#80, i_class_id#81, i_category_id#82, sales#91, number_sales#92] -Condition : (isnotnull(sales#91) AND (cast(sales#91 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#51, [id=#52] as decimal(32,6)))) +Input [5]: [i_brand_id#83, i_class_id#84, i_category_id#85, sales#94, number_sales#95] +Condition : (isnotnull(sales#94) AND (cast(sales#94 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#51, [id=#52] as decimal(32,6)))) (117) Project [codegen id : 132] -Output [6]: [sales#91, number_sales#92, web AS channel#93, i_brand_id#80, i_class_id#81, i_category_id#82] -Input [5]: [i_brand_id#80, i_class_id#81, i_category_id#82, sales#91, number_sales#92] +Output [6]: [sales#94, number_sales#95, web AS channel#96, i_brand_id#83, i_class_id#84, i_category_id#85] +Input [5]: [i_brand_id#83, i_class_id#84, i_category_id#85, sales#94, number_sales#95] (118) Union (119) Expand [codegen id : 133] -Input [6]: [sales#49, number_sales#50, channel#53, i_brand_id#38, i_class_id#39, i_category_id#40] -Arguments: [[sales#49, number_sales#50, channel#53, i_brand_id#38, i_class_id#39, i_category_id#40, 0], [sales#49, number_sales#50, channel#53, i_brand_id#38, i_class_id#39, null, 1], [sales#49, number_sales#50, channel#53, i_brand_id#38, null, null, 3], [sales#49, number_sales#50, channel#53, null, null, null, 7], [sales#49, number_sales#50, null, null, null, null, 15]], [sales#49, number_sales#50, channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98] +Input [6]: [sales#49, number_sales#50, channel#53, i_brand_id#54, i_class_id#55, i_category_id#56] +Arguments: [[sales#49, number_sales#50, channel#53, i_brand_id#54, i_class_id#55, i_category_id#56, 0], [sales#49, number_sales#50, channel#53, i_brand_id#54, i_class_id#55, null, 1], [sales#49, number_sales#50, channel#53, i_brand_id#54, null, null, 3], [sales#49, number_sales#50, channel#53, null, null, null, 7], [sales#49, number_sales#50, null, null, null, null, 15]], [sales#49, number_sales#50, channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101] (120) HashAggregate [codegen id : 133] -Input [7]: [sales#49, number_sales#50, channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98] -Keys [5]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98] +Input [7]: [sales#49, number_sales#50, channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101] +Keys [5]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101] Functions [2]: [partial_sum(sales#49), partial_sum(number_sales#50)] -Aggregate Attributes [3]: [sum#99, isEmpty#100, sum#101] -Results [8]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98, sum#102, isEmpty#103, sum#104] +Aggregate Attributes [3]: [sum#102, isEmpty#103, sum#104] +Results [8]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101, sum#105, isEmpty#106, sum#107] (121) Exchange -Input [8]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98, sum#102, isEmpty#103, sum#104] -Arguments: hashpartitioning(channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98, 5), ENSURE_REQUIREMENTS, [plan_id=18] +Input [8]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101, sum#105, isEmpty#106, sum#107] +Arguments: hashpartitioning(channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101, 5), ENSURE_REQUIREMENTS, [plan_id=18] (122) HashAggregate [codegen id : 134] -Input [8]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98, sum#102, isEmpty#103, sum#104] -Keys [5]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98] +Input [8]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101, sum#105, isEmpty#106, sum#107] +Keys [5]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101] Functions [2]: [sum(sales#49), sum(number_sales#50)] -Aggregate Attributes [2]: [sum(sales#49)#105, sum(number_sales#50)#106] -Results [6]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, sum(sales#49)#105 AS sum(sales)#107, sum(number_sales#50)#106 AS sum(number_sales)#108] +Aggregate Attributes [2]: [sum(sales#49)#108, sum(number_sales#50)#109] +Results [6]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, sum(sales#49)#108 AS sum(sales)#110, sum(number_sales#50)#109 AS sum(number_sales)#111] (123) TakeOrderedAndProject -Input [6]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, sum(sales)#107, sum(number_sales)#108] -Arguments: 100, [channel#94 ASC NULLS FIRST, i_brand_id#95 ASC NULLS FIRST, i_class_id#96 ASC NULLS FIRST, i_category_id#97 ASC NULLS FIRST], [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, sum(sales)#107, sum(number_sales)#108] +Input [6]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, sum(sales)#110, sum(number_sales)#111] +Arguments: 100, [channel#97 ASC NULLS FIRST, i_brand_id#98 ASC NULLS FIRST, i_class_id#99 ASC NULLS FIRST, i_category_id#100 ASC NULLS FIRST], [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, sum(sales)#110, sum(number_sales)#111] ===== Subqueries ===== @@ -702,96 +702,96 @@ Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquer (124) Scan parquet default.store_sales -Output [3]: [ss_quantity#109, ss_list_price#110, ss_sold_date_sk#111] +Output [3]: [ss_quantity#112, ss_list_price#113, ss_sold_date_sk#114] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#111), dynamicpruningexpression(ss_sold_date_sk#111 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(ss_sold_date_sk#114), dynamicpruningexpression(ss_sold_date_sk#114 IN dynamicpruning#12)] ReadSchema: struct (125) ColumnarToRow [codegen id : 2] -Input [3]: [ss_quantity#109, ss_list_price#110, ss_sold_date_sk#111] +Input [3]: [ss_quantity#112, ss_list_price#113, ss_sold_date_sk#114] (126) ReusedExchange [Reuses operator id: 152] -Output [1]: [d_date_sk#112] +Output [1]: [d_date_sk#115] (127) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#111] -Right keys [1]: [d_date_sk#112] +Left keys [1]: [ss_sold_date_sk#114] +Right keys [1]: [d_date_sk#115] Join condition: None (128) Project [codegen id : 2] -Output [2]: [ss_quantity#109 AS quantity#113, ss_list_price#110 AS list_price#114] -Input [4]: [ss_quantity#109, ss_list_price#110, ss_sold_date_sk#111, d_date_sk#112] +Output [2]: [ss_quantity#112 AS quantity#116, ss_list_price#113 AS list_price#117] +Input [4]: [ss_quantity#112, ss_list_price#113, ss_sold_date_sk#114, d_date_sk#115] (129) Scan parquet default.catalog_sales -Output [3]: [cs_quantity#115, cs_list_price#116, cs_sold_date_sk#117] +Output [3]: [cs_quantity#118, cs_list_price#119, cs_sold_date_sk#120] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#117), dynamicpruningexpression(cs_sold_date_sk#117 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(cs_sold_date_sk#120), dynamicpruningexpression(cs_sold_date_sk#120 IN dynamicpruning#12)] ReadSchema: struct (130) ColumnarToRow [codegen id : 4] -Input [3]: [cs_quantity#115, cs_list_price#116, cs_sold_date_sk#117] +Input [3]: [cs_quantity#118, cs_list_price#119, cs_sold_date_sk#120] (131) ReusedExchange [Reuses operator id: 152] -Output [1]: [d_date_sk#118] +Output [1]: [d_date_sk#121] (132) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_sold_date_sk#117] -Right keys [1]: [d_date_sk#118] +Left keys [1]: [cs_sold_date_sk#120] +Right keys [1]: [d_date_sk#121] Join condition: None (133) Project [codegen id : 4] -Output [2]: [cs_quantity#115 AS quantity#119, cs_list_price#116 AS list_price#120] -Input [4]: [cs_quantity#115, cs_list_price#116, cs_sold_date_sk#117, d_date_sk#118] +Output [2]: [cs_quantity#118 AS quantity#122, cs_list_price#119 AS list_price#123] +Input [4]: [cs_quantity#118, cs_list_price#119, cs_sold_date_sk#120, d_date_sk#121] (134) Scan parquet default.web_sales -Output [3]: [ws_quantity#121, ws_list_price#122, ws_sold_date_sk#123] +Output [3]: [ws_quantity#124, ws_list_price#125, ws_sold_date_sk#126] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#123), dynamicpruningexpression(ws_sold_date_sk#123 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(ws_sold_date_sk#126), dynamicpruningexpression(ws_sold_date_sk#126 IN dynamicpruning#12)] ReadSchema: struct (135) ColumnarToRow [codegen id : 6] -Input [3]: [ws_quantity#121, ws_list_price#122, ws_sold_date_sk#123] +Input [3]: [ws_quantity#124, ws_list_price#125, ws_sold_date_sk#126] (136) ReusedExchange [Reuses operator id: 152] -Output [1]: [d_date_sk#124] +Output [1]: [d_date_sk#127] (137) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#123] -Right keys [1]: [d_date_sk#124] +Left keys [1]: [ws_sold_date_sk#126] +Right keys [1]: [d_date_sk#127] Join condition: None (138) Project [codegen id : 6] -Output [2]: [ws_quantity#121 AS quantity#125, ws_list_price#122 AS list_price#126] -Input [4]: [ws_quantity#121, ws_list_price#122, ws_sold_date_sk#123, d_date_sk#124] +Output [2]: [ws_quantity#124 AS quantity#128, ws_list_price#125 AS list_price#129] +Input [4]: [ws_quantity#124, ws_list_price#125, ws_sold_date_sk#126, d_date_sk#127] (139) Union (140) HashAggregate [codegen id : 7] -Input [2]: [quantity#113, list_price#114] +Input [2]: [quantity#116, list_price#117] Keys: [] -Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#113 as decimal(12,2))) * promote_precision(cast(list_price#114 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#127, count#128] -Results [2]: [sum#129, count#130] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#116 as decimal(12,2))) * promote_precision(cast(list_price#117 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#130, count#131] +Results [2]: [sum#132, count#133] (141) Exchange -Input [2]: [sum#129, count#130] +Input [2]: [sum#132, count#133] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=19] (142) HashAggregate [codegen id : 8] -Input [2]: [sum#129, count#130] +Input [2]: [sum#132, count#133] Keys: [] -Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#113 as decimal(12,2))) * promote_precision(cast(list_price#114 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#113 as decimal(12,2))) * promote_precision(cast(list_price#114 as decimal(12,2)))), DecimalType(18,2)))#131] -Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#113 as decimal(12,2))) * promote_precision(cast(list_price#114 as decimal(12,2)))), DecimalType(18,2)))#131 AS average_sales#132] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#116 as decimal(12,2))) * promote_precision(cast(list_price#117 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#116 as decimal(12,2))) * promote_precision(cast(list_price#117 as decimal(12,2)))), DecimalType(18,2)))#134] +Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#116 as decimal(12,2))) * promote_precision(cast(list_price#117 as decimal(12,2)))), DecimalType(18,2)))#134 AS average_sales#135] -Subquery:2 Hosting operator id = 124 Hosting Expression = ss_sold_date_sk#111 IN dynamicpruning#12 +Subquery:2 Hosting operator id = 124 Hosting Expression = ss_sold_date_sk#114 IN dynamicpruning#12 -Subquery:3 Hosting operator id = 129 Hosting Expression = cs_sold_date_sk#117 IN dynamicpruning#12 +Subquery:3 Hosting operator id = 129 Hosting Expression = cs_sold_date_sk#120 IN dynamicpruning#12 -Subquery:4 Hosting operator id = 134 Hosting Expression = ws_sold_date_sk#123 IN dynamicpruning#12 +Subquery:4 Hosting operator id = 134 Hosting Expression = ws_sold_date_sk#126 IN dynamicpruning#12 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5 BroadcastExchange (147) @@ -802,22 +802,22 @@ BroadcastExchange (147) (143) Scan parquet default.date_dim -Output [3]: [d_date_sk#36, d_year#133, d_moy#134] +Output [3]: [d_date_sk#36, d_year#136, d_moy#137] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)] ReadSchema: struct (144) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#36, d_year#133, d_moy#134] +Input [3]: [d_date_sk#36, d_year#136, d_moy#137] (145) Filter [codegen id : 1] -Input [3]: [d_date_sk#36, d_year#133, d_moy#134] -Condition : ((((isnotnull(d_year#133) AND isnotnull(d_moy#134)) AND (d_year#133 = 2001)) AND (d_moy#134 = 11)) AND isnotnull(d_date_sk#36)) +Input [3]: [d_date_sk#36, d_year#136, d_moy#137] +Condition : ((((isnotnull(d_year#136) AND isnotnull(d_moy#137)) AND (d_year#136 = 2001)) AND (d_moy#137 = 11)) AND isnotnull(d_date_sk#36)) (146) Project [codegen id : 1] Output [1]: [d_date_sk#36] -Input [3]: [d_date_sk#36, d_year#133, d_moy#134] +Input [3]: [d_date_sk#36, d_year#136, d_moy#137] (147) BroadcastExchange Input [1]: [d_date_sk#36] @@ -832,22 +832,22 @@ BroadcastExchange (152) (148) Scan parquet default.date_dim -Output [2]: [d_date_sk#13, d_year#135] +Output [2]: [d_date_sk#13, d_year#138] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (149) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#13, d_year#135] +Input [2]: [d_date_sk#13, d_year#138] (150) Filter [codegen id : 1] -Input [2]: [d_date_sk#13, d_year#135] -Condition : (((isnotnull(d_year#135) AND (d_year#135 >= 1999)) AND (d_year#135 <= 2001)) AND isnotnull(d_date_sk#13)) +Input [2]: [d_date_sk#13, d_year#138] +Condition : (((isnotnull(d_year#138) AND (d_year#138 >= 1999)) AND (d_year#138 <= 2001)) AND isnotnull(d_date_sk#13)) (151) Project [codegen id : 1] Output [1]: [d_date_sk#13] -Input [2]: [d_date_sk#13, d_year#135] +Input [2]: [d_date_sk#13, d_year#138] (152) BroadcastExchange Input [1]: [d_date_sk#13] @@ -859,10 +859,10 @@ Subquery:8 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#29 IN d Subquery:9 Hosting operator id = 97 Hosting Expression = ReusedSubquery Subquery scalar-subquery#51, [id=#52] -Subquery:10 Hosting operator id = 80 Hosting Expression = cs_sold_date_sk#57 IN dynamicpruning#5 +Subquery:10 Hosting operator id = 80 Hosting Expression = cs_sold_date_sk#60 IN dynamicpruning#5 Subquery:11 Hosting operator id = 116 Hosting Expression = ReusedSubquery Subquery scalar-subquery#51, [id=#52] -Subquery:12 Hosting operator id = 99 Hosting Expression = ws_sold_date_sk#77 IN dynamicpruning#5 +Subquery:12 Hosting operator id = 99 Hosting Expression = ws_sold_date_sk#80 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt index fd17c5d762e21..5eafc668066bf 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt @@ -410,182 +410,182 @@ Input [5]: [i_brand_id#37, i_class_id#38, i_category_id#39, sales#49, number_sal Condition : (isnotnull(sales#49) AND (cast(sales#49 as decimal(32,6)) > cast(Subquery scalar-subquery#51, [id=#52] as decimal(32,6)))) (67) Project [codegen id : 26] -Output [6]: [sales#49, number_sales#50, store AS channel#53, i_brand_id#37, i_class_id#38, i_category_id#39] +Output [6]: [sales#49, number_sales#50, store AS channel#53, i_brand_id#37 AS i_brand_id#54, i_class_id#38 AS i_class_id#55, i_category_id#39 AS i_category_id#56] Input [5]: [i_brand_id#37, i_class_id#38, i_category_id#39, sales#49, number_sales#50] (68) Scan parquet default.catalog_sales -Output [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] +Output [4]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#57), dynamicpruningexpression(cs_sold_date_sk#57 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#60), dynamicpruningexpression(cs_sold_date_sk#60 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (69) ColumnarToRow [codegen id : 51] -Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] +Input [4]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60] (70) Filter [codegen id : 51] -Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] -Condition : isnotnull(cs_item_sk#54) +Input [4]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60] +Condition : isnotnull(cs_item_sk#57) (71) ReusedExchange [Reuses operator id: 50] Output [1]: [ss_item_sk#35] (72) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [cs_item_sk#54] +Left keys [1]: [cs_item_sk#57] Right keys [1]: [ss_item_sk#35] Join condition: None (73) ReusedExchange [Reuses operator id: 57] -Output [4]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61] +Output [4]: [i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64] (74) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [cs_item_sk#54] -Right keys [1]: [i_item_sk#58] +Left keys [1]: [cs_item_sk#57] +Right keys [1]: [i_item_sk#61] Join condition: None (75) Project [codegen id : 51] -Output [6]: [cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57, i_brand_id#59, i_class_id#60, i_category_id#61] -Input [8]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61] +Output [6]: [cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60, i_brand_id#62, i_class_id#63, i_category_id#64] +Input [8]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60, i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64] (76) ReusedExchange [Reuses operator id: 129] -Output [1]: [d_date_sk#62] +Output [1]: [d_date_sk#65] (77) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [cs_sold_date_sk#57] -Right keys [1]: [d_date_sk#62] +Left keys [1]: [cs_sold_date_sk#60] +Right keys [1]: [d_date_sk#65] Join condition: None (78) Project [codegen id : 51] -Output [5]: [cs_quantity#55, cs_list_price#56, i_brand_id#59, i_class_id#60, i_category_id#61] -Input [7]: [cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57, i_brand_id#59, i_class_id#60, i_category_id#61, d_date_sk#62] +Output [5]: [cs_quantity#58, cs_list_price#59, i_brand_id#62, i_class_id#63, i_category_id#64] +Input [7]: [cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60, i_brand_id#62, i_class_id#63, i_category_id#64, d_date_sk#65] (79) HashAggregate [codegen id : 51] -Input [5]: [cs_quantity#55, cs_list_price#56, i_brand_id#59, i_class_id#60, i_category_id#61] -Keys [3]: [i_brand_id#59, i_class_id#60, i_category_id#61] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#63, isEmpty#64, count#65] -Results [6]: [i_brand_id#59, i_class_id#60, i_category_id#61, sum#66, isEmpty#67, count#68] +Input [5]: [cs_quantity#58, cs_list_price#59, i_brand_id#62, i_class_id#63, i_category_id#64] +Keys [3]: [i_brand_id#62, i_class_id#63, i_category_id#64] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#58 as decimal(12,2))) * promote_precision(cast(cs_list_price#59 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] +Aggregate Attributes [3]: [sum#66, isEmpty#67, count#68] +Results [6]: [i_brand_id#62, i_class_id#63, i_category_id#64, sum#69, isEmpty#70, count#71] (80) Exchange -Input [6]: [i_brand_id#59, i_class_id#60, i_category_id#61, sum#66, isEmpty#67, count#68] -Arguments: hashpartitioning(i_brand_id#59, i_class_id#60, i_category_id#61, 5), ENSURE_REQUIREMENTS, [plan_id=10] +Input [6]: [i_brand_id#62, i_class_id#63, i_category_id#64, sum#69, isEmpty#70, count#71] +Arguments: hashpartitioning(i_brand_id#62, i_class_id#63, i_category_id#64, 5), ENSURE_REQUIREMENTS, [plan_id=10] (81) HashAggregate [codegen id : 52] -Input [6]: [i_brand_id#59, i_class_id#60, i_category_id#61, sum#66, isEmpty#67, count#68] -Keys [3]: [i_brand_id#59, i_class_id#60, i_category_id#61] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#69, count(1)#70] -Results [5]: [i_brand_id#59, i_class_id#60, i_category_id#61, sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#69 AS sales#71, count(1)#70 AS number_sales#72] +Input [6]: [i_brand_id#62, i_class_id#63, i_category_id#64, sum#69, isEmpty#70, count#71] +Keys [3]: [i_brand_id#62, i_class_id#63, i_category_id#64] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#58 as decimal(12,2))) * promote_precision(cast(cs_list_price#59 as decimal(12,2)))), DecimalType(18,2))), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#58 as decimal(12,2))) * promote_precision(cast(cs_list_price#59 as decimal(12,2)))), DecimalType(18,2)))#72, count(1)#73] +Results [5]: [i_brand_id#62, i_class_id#63, i_category_id#64, sum(CheckOverflow((promote_precision(cast(cs_quantity#58 as decimal(12,2))) * promote_precision(cast(cs_list_price#59 as decimal(12,2)))), DecimalType(18,2)))#72 AS sales#74, count(1)#73 AS number_sales#75] (82) Filter [codegen id : 52] -Input [5]: [i_brand_id#59, i_class_id#60, i_category_id#61, sales#71, number_sales#72] -Condition : (isnotnull(sales#71) AND (cast(sales#71 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#51, [id=#52] as decimal(32,6)))) +Input [5]: [i_brand_id#62, i_class_id#63, i_category_id#64, sales#74, number_sales#75] +Condition : (isnotnull(sales#74) AND (cast(sales#74 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#51, [id=#52] as decimal(32,6)))) (83) Project [codegen id : 52] -Output [6]: [sales#71, number_sales#72, catalog AS channel#73, i_brand_id#59, i_class_id#60, i_category_id#61] -Input [5]: [i_brand_id#59, i_class_id#60, i_category_id#61, sales#71, number_sales#72] +Output [6]: [sales#74, number_sales#75, catalog AS channel#76, i_brand_id#62, i_class_id#63, i_category_id#64] +Input [5]: [i_brand_id#62, i_class_id#63, i_category_id#64, sales#74, number_sales#75] (84) Scan parquet default.web_sales -Output [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] +Output [4]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#77), dynamicpruningexpression(ws_sold_date_sk#77 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#80), dynamicpruningexpression(ws_sold_date_sk#80 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (85) ColumnarToRow [codegen id : 77] -Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] +Input [4]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80] (86) Filter [codegen id : 77] -Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] -Condition : isnotnull(ws_item_sk#74) +Input [4]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80] +Condition : isnotnull(ws_item_sk#77) (87) ReusedExchange [Reuses operator id: 50] Output [1]: [ss_item_sk#35] (88) BroadcastHashJoin [codegen id : 77] -Left keys [1]: [ws_item_sk#74] +Left keys [1]: [ws_item_sk#77] Right keys [1]: [ss_item_sk#35] Join condition: None (89) ReusedExchange [Reuses operator id: 57] -Output [4]: [i_item_sk#78, i_brand_id#79, i_class_id#80, i_category_id#81] +Output [4]: [i_item_sk#81, i_brand_id#82, i_class_id#83, i_category_id#84] (90) BroadcastHashJoin [codegen id : 77] -Left keys [1]: [ws_item_sk#74] -Right keys [1]: [i_item_sk#78] +Left keys [1]: [ws_item_sk#77] +Right keys [1]: [i_item_sk#81] Join condition: None (91) Project [codegen id : 77] -Output [6]: [ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77, i_brand_id#79, i_class_id#80, i_category_id#81] -Input [8]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77, i_item_sk#78, i_brand_id#79, i_class_id#80, i_category_id#81] +Output [6]: [ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80, i_brand_id#82, i_class_id#83, i_category_id#84] +Input [8]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80, i_item_sk#81, i_brand_id#82, i_class_id#83, i_category_id#84] (92) ReusedExchange [Reuses operator id: 129] -Output [1]: [d_date_sk#82] +Output [1]: [d_date_sk#85] (93) BroadcastHashJoin [codegen id : 77] -Left keys [1]: [ws_sold_date_sk#77] -Right keys [1]: [d_date_sk#82] +Left keys [1]: [ws_sold_date_sk#80] +Right keys [1]: [d_date_sk#85] Join condition: None (94) Project [codegen id : 77] -Output [5]: [ws_quantity#75, ws_list_price#76, i_brand_id#79, i_class_id#80, i_category_id#81] -Input [7]: [ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77, i_brand_id#79, i_class_id#80, i_category_id#81, d_date_sk#82] +Output [5]: [ws_quantity#78, ws_list_price#79, i_brand_id#82, i_class_id#83, i_category_id#84] +Input [7]: [ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80, i_brand_id#82, i_class_id#83, i_category_id#84, d_date_sk#85] (95) HashAggregate [codegen id : 77] -Input [5]: [ws_quantity#75, ws_list_price#76, i_brand_id#79, i_class_id#80, i_category_id#81] -Keys [3]: [i_brand_id#79, i_class_id#80, i_category_id#81] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#83, isEmpty#84, count#85] -Results [6]: [i_brand_id#79, i_class_id#80, i_category_id#81, sum#86, isEmpty#87, count#88] +Input [5]: [ws_quantity#78, ws_list_price#79, i_brand_id#82, i_class_id#83, i_category_id#84] +Keys [3]: [i_brand_id#82, i_class_id#83, i_category_id#84] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#78 as decimal(12,2))) * promote_precision(cast(ws_list_price#79 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] +Aggregate Attributes [3]: [sum#86, isEmpty#87, count#88] +Results [6]: [i_brand_id#82, i_class_id#83, i_category_id#84, sum#89, isEmpty#90, count#91] (96) Exchange -Input [6]: [i_brand_id#79, i_class_id#80, i_category_id#81, sum#86, isEmpty#87, count#88] -Arguments: hashpartitioning(i_brand_id#79, i_class_id#80, i_category_id#81, 5), ENSURE_REQUIREMENTS, [plan_id=11] +Input [6]: [i_brand_id#82, i_class_id#83, i_category_id#84, sum#89, isEmpty#90, count#91] +Arguments: hashpartitioning(i_brand_id#82, i_class_id#83, i_category_id#84, 5), ENSURE_REQUIREMENTS, [plan_id=11] (97) HashAggregate [codegen id : 78] -Input [6]: [i_brand_id#79, i_class_id#80, i_category_id#81, sum#86, isEmpty#87, count#88] -Keys [3]: [i_brand_id#79, i_class_id#80, i_category_id#81] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2)))#89, count(1)#90] -Results [5]: [i_brand_id#79, i_class_id#80, i_category_id#81, sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2)))#89 AS sales#91, count(1)#90 AS number_sales#92] +Input [6]: [i_brand_id#82, i_class_id#83, i_category_id#84, sum#89, isEmpty#90, count#91] +Keys [3]: [i_brand_id#82, i_class_id#83, i_category_id#84] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#78 as decimal(12,2))) * promote_precision(cast(ws_list_price#79 as decimal(12,2)))), DecimalType(18,2))), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#78 as decimal(12,2))) * promote_precision(cast(ws_list_price#79 as decimal(12,2)))), DecimalType(18,2)))#92, count(1)#93] +Results [5]: [i_brand_id#82, i_class_id#83, i_category_id#84, sum(CheckOverflow((promote_precision(cast(ws_quantity#78 as decimal(12,2))) * promote_precision(cast(ws_list_price#79 as decimal(12,2)))), DecimalType(18,2)))#92 AS sales#94, count(1)#93 AS number_sales#95] (98) Filter [codegen id : 78] -Input [5]: [i_brand_id#79, i_class_id#80, i_category_id#81, sales#91, number_sales#92] -Condition : (isnotnull(sales#91) AND (cast(sales#91 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#51, [id=#52] as decimal(32,6)))) +Input [5]: [i_brand_id#82, i_class_id#83, i_category_id#84, sales#94, number_sales#95] +Condition : (isnotnull(sales#94) AND (cast(sales#94 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#51, [id=#52] as decimal(32,6)))) (99) Project [codegen id : 78] -Output [6]: [sales#91, number_sales#92, web AS channel#93, i_brand_id#79, i_class_id#80, i_category_id#81] -Input [5]: [i_brand_id#79, i_class_id#80, i_category_id#81, sales#91, number_sales#92] +Output [6]: [sales#94, number_sales#95, web AS channel#96, i_brand_id#82, i_class_id#83, i_category_id#84] +Input [5]: [i_brand_id#82, i_class_id#83, i_category_id#84, sales#94, number_sales#95] (100) Union (101) Expand [codegen id : 79] -Input [6]: [sales#49, number_sales#50, channel#53, i_brand_id#37, i_class_id#38, i_category_id#39] -Arguments: [[sales#49, number_sales#50, channel#53, i_brand_id#37, i_class_id#38, i_category_id#39, 0], [sales#49, number_sales#50, channel#53, i_brand_id#37, i_class_id#38, null, 1], [sales#49, number_sales#50, channel#53, i_brand_id#37, null, null, 3], [sales#49, number_sales#50, channel#53, null, null, null, 7], [sales#49, number_sales#50, null, null, null, null, 15]], [sales#49, number_sales#50, channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98] +Input [6]: [sales#49, number_sales#50, channel#53, i_brand_id#54, i_class_id#55, i_category_id#56] +Arguments: [[sales#49, number_sales#50, channel#53, i_brand_id#54, i_class_id#55, i_category_id#56, 0], [sales#49, number_sales#50, channel#53, i_brand_id#54, i_class_id#55, null, 1], [sales#49, number_sales#50, channel#53, i_brand_id#54, null, null, 3], [sales#49, number_sales#50, channel#53, null, null, null, 7], [sales#49, number_sales#50, null, null, null, null, 15]], [sales#49, number_sales#50, channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101] (102) HashAggregate [codegen id : 79] -Input [7]: [sales#49, number_sales#50, channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98] -Keys [5]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98] +Input [7]: [sales#49, number_sales#50, channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101] +Keys [5]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101] Functions [2]: [partial_sum(sales#49), partial_sum(number_sales#50)] -Aggregate Attributes [3]: [sum#99, isEmpty#100, sum#101] -Results [8]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98, sum#102, isEmpty#103, sum#104] +Aggregate Attributes [3]: [sum#102, isEmpty#103, sum#104] +Results [8]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101, sum#105, isEmpty#106, sum#107] (103) Exchange -Input [8]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98, sum#102, isEmpty#103, sum#104] -Arguments: hashpartitioning(channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98, 5), ENSURE_REQUIREMENTS, [plan_id=12] +Input [8]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101, sum#105, isEmpty#106, sum#107] +Arguments: hashpartitioning(channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101, 5), ENSURE_REQUIREMENTS, [plan_id=12] (104) HashAggregate [codegen id : 80] -Input [8]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98, sum#102, isEmpty#103, sum#104] -Keys [5]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98] +Input [8]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101, sum#105, isEmpty#106, sum#107] +Keys [5]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101] Functions [2]: [sum(sales#49), sum(number_sales#50)] -Aggregate Attributes [2]: [sum(sales#49)#105, sum(number_sales#50)#106] -Results [6]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, sum(sales#49)#105 AS sum(sales)#107, sum(number_sales#50)#106 AS sum(number_sales)#108] +Aggregate Attributes [2]: [sum(sales#49)#108, sum(number_sales#50)#109] +Results [6]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, sum(sales#49)#108 AS sum(sales)#110, sum(number_sales#50)#109 AS sum(number_sales)#111] (105) TakeOrderedAndProject -Input [6]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, sum(sales)#107, sum(number_sales)#108] -Arguments: 100, [channel#94 ASC NULLS FIRST, i_brand_id#95 ASC NULLS FIRST, i_class_id#96 ASC NULLS FIRST, i_category_id#97 ASC NULLS FIRST], [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, sum(sales)#107, sum(number_sales)#108] +Input [6]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, sum(sales)#110, sum(number_sales)#111] +Arguments: 100, [channel#97 ASC NULLS FIRST, i_brand_id#98 ASC NULLS FIRST, i_class_id#99 ASC NULLS FIRST, i_category_id#100 ASC NULLS FIRST], [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, sum(sales)#110, sum(number_sales)#111] ===== Subqueries ===== @@ -612,96 +612,96 @@ Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquer (106) Scan parquet default.store_sales -Output [3]: [ss_quantity#109, ss_list_price#110, ss_sold_date_sk#111] +Output [3]: [ss_quantity#112, ss_list_price#113, ss_sold_date_sk#114] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#111), dynamicpruningexpression(ss_sold_date_sk#111 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(ss_sold_date_sk#114), dynamicpruningexpression(ss_sold_date_sk#114 IN dynamicpruning#12)] ReadSchema: struct (107) ColumnarToRow [codegen id : 2] -Input [3]: [ss_quantity#109, ss_list_price#110, ss_sold_date_sk#111] +Input [3]: [ss_quantity#112, ss_list_price#113, ss_sold_date_sk#114] (108) ReusedExchange [Reuses operator id: 134] -Output [1]: [d_date_sk#112] +Output [1]: [d_date_sk#115] (109) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#111] -Right keys [1]: [d_date_sk#112] +Left keys [1]: [ss_sold_date_sk#114] +Right keys [1]: [d_date_sk#115] Join condition: None (110) Project [codegen id : 2] -Output [2]: [ss_quantity#109 AS quantity#113, ss_list_price#110 AS list_price#114] -Input [4]: [ss_quantity#109, ss_list_price#110, ss_sold_date_sk#111, d_date_sk#112] +Output [2]: [ss_quantity#112 AS quantity#116, ss_list_price#113 AS list_price#117] +Input [4]: [ss_quantity#112, ss_list_price#113, ss_sold_date_sk#114, d_date_sk#115] (111) Scan parquet default.catalog_sales -Output [3]: [cs_quantity#115, cs_list_price#116, cs_sold_date_sk#117] +Output [3]: [cs_quantity#118, cs_list_price#119, cs_sold_date_sk#120] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#117), dynamicpruningexpression(cs_sold_date_sk#117 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(cs_sold_date_sk#120), dynamicpruningexpression(cs_sold_date_sk#120 IN dynamicpruning#12)] ReadSchema: struct (112) ColumnarToRow [codegen id : 4] -Input [3]: [cs_quantity#115, cs_list_price#116, cs_sold_date_sk#117] +Input [3]: [cs_quantity#118, cs_list_price#119, cs_sold_date_sk#120] (113) ReusedExchange [Reuses operator id: 134] -Output [1]: [d_date_sk#118] +Output [1]: [d_date_sk#121] (114) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_sold_date_sk#117] -Right keys [1]: [d_date_sk#118] +Left keys [1]: [cs_sold_date_sk#120] +Right keys [1]: [d_date_sk#121] Join condition: None (115) Project [codegen id : 4] -Output [2]: [cs_quantity#115 AS quantity#119, cs_list_price#116 AS list_price#120] -Input [4]: [cs_quantity#115, cs_list_price#116, cs_sold_date_sk#117, d_date_sk#118] +Output [2]: [cs_quantity#118 AS quantity#122, cs_list_price#119 AS list_price#123] +Input [4]: [cs_quantity#118, cs_list_price#119, cs_sold_date_sk#120, d_date_sk#121] (116) Scan parquet default.web_sales -Output [3]: [ws_quantity#121, ws_list_price#122, ws_sold_date_sk#123] +Output [3]: [ws_quantity#124, ws_list_price#125, ws_sold_date_sk#126] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#123), dynamicpruningexpression(ws_sold_date_sk#123 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(ws_sold_date_sk#126), dynamicpruningexpression(ws_sold_date_sk#126 IN dynamicpruning#12)] ReadSchema: struct (117) ColumnarToRow [codegen id : 6] -Input [3]: [ws_quantity#121, ws_list_price#122, ws_sold_date_sk#123] +Input [3]: [ws_quantity#124, ws_list_price#125, ws_sold_date_sk#126] (118) ReusedExchange [Reuses operator id: 134] -Output [1]: [d_date_sk#124] +Output [1]: [d_date_sk#127] (119) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#123] -Right keys [1]: [d_date_sk#124] +Left keys [1]: [ws_sold_date_sk#126] +Right keys [1]: [d_date_sk#127] Join condition: None (120) Project [codegen id : 6] -Output [2]: [ws_quantity#121 AS quantity#125, ws_list_price#122 AS list_price#126] -Input [4]: [ws_quantity#121, ws_list_price#122, ws_sold_date_sk#123, d_date_sk#124] +Output [2]: [ws_quantity#124 AS quantity#128, ws_list_price#125 AS list_price#129] +Input [4]: [ws_quantity#124, ws_list_price#125, ws_sold_date_sk#126, d_date_sk#127] (121) Union (122) HashAggregate [codegen id : 7] -Input [2]: [quantity#113, list_price#114] +Input [2]: [quantity#116, list_price#117] Keys: [] -Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#113 as decimal(12,2))) * promote_precision(cast(list_price#114 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#127, count#128] -Results [2]: [sum#129, count#130] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#116 as decimal(12,2))) * promote_precision(cast(list_price#117 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#130, count#131] +Results [2]: [sum#132, count#133] (123) Exchange -Input [2]: [sum#129, count#130] +Input [2]: [sum#132, count#133] Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=13] (124) HashAggregate [codegen id : 8] -Input [2]: [sum#129, count#130] +Input [2]: [sum#132, count#133] Keys: [] -Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#113 as decimal(12,2))) * promote_precision(cast(list_price#114 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#113 as decimal(12,2))) * promote_precision(cast(list_price#114 as decimal(12,2)))), DecimalType(18,2)))#131] -Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#113 as decimal(12,2))) * promote_precision(cast(list_price#114 as decimal(12,2)))), DecimalType(18,2)))#131 AS average_sales#132] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#116 as decimal(12,2))) * promote_precision(cast(list_price#117 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#116 as decimal(12,2))) * promote_precision(cast(list_price#117 as decimal(12,2)))), DecimalType(18,2)))#134] +Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#116 as decimal(12,2))) * promote_precision(cast(list_price#117 as decimal(12,2)))), DecimalType(18,2)))#134 AS average_sales#135] -Subquery:2 Hosting operator id = 106 Hosting Expression = ss_sold_date_sk#111 IN dynamicpruning#12 +Subquery:2 Hosting operator id = 106 Hosting Expression = ss_sold_date_sk#114 IN dynamicpruning#12 -Subquery:3 Hosting operator id = 111 Hosting Expression = cs_sold_date_sk#117 IN dynamicpruning#12 +Subquery:3 Hosting operator id = 111 Hosting Expression = cs_sold_date_sk#120 IN dynamicpruning#12 -Subquery:4 Hosting operator id = 116 Hosting Expression = ws_sold_date_sk#123 IN dynamicpruning#12 +Subquery:4 Hosting operator id = 116 Hosting Expression = ws_sold_date_sk#126 IN dynamicpruning#12 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5 BroadcastExchange (129) @@ -712,22 +712,22 @@ BroadcastExchange (129) (125) Scan parquet default.date_dim -Output [3]: [d_date_sk#40, d_year#133, d_moy#134] +Output [3]: [d_date_sk#40, d_year#136, d_moy#137] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)] ReadSchema: struct (126) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#40, d_year#133, d_moy#134] +Input [3]: [d_date_sk#40, d_year#136, d_moy#137] (127) Filter [codegen id : 1] -Input [3]: [d_date_sk#40, d_year#133, d_moy#134] -Condition : ((((isnotnull(d_year#133) AND isnotnull(d_moy#134)) AND (d_year#133 = 2001)) AND (d_moy#134 = 11)) AND isnotnull(d_date_sk#40)) +Input [3]: [d_date_sk#40, d_year#136, d_moy#137] +Condition : ((((isnotnull(d_year#136) AND isnotnull(d_moy#137)) AND (d_year#136 = 2001)) AND (d_moy#137 = 11)) AND isnotnull(d_date_sk#40)) (128) Project [codegen id : 1] Output [1]: [d_date_sk#40] -Input [3]: [d_date_sk#40, d_year#133, d_moy#134] +Input [3]: [d_date_sk#40, d_year#136, d_moy#137] (129) BroadcastExchange Input [1]: [d_date_sk#40] @@ -742,22 +742,22 @@ BroadcastExchange (134) (130) Scan parquet default.date_dim -Output [2]: [d_date_sk#24, d_year#135] +Output [2]: [d_date_sk#24, d_year#138] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (131) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#24, d_year#135] +Input [2]: [d_date_sk#24, d_year#138] (132) Filter [codegen id : 1] -Input [2]: [d_date_sk#24, d_year#135] -Condition : (((isnotnull(d_year#135) AND (d_year#135 >= 1999)) AND (d_year#135 <= 2001)) AND isnotnull(d_date_sk#24)) +Input [2]: [d_date_sk#24, d_year#138] +Condition : (((isnotnull(d_year#138) AND (d_year#138 >= 1999)) AND (d_year#138 <= 2001)) AND isnotnull(d_date_sk#24)) (133) Project [codegen id : 1] Output [1]: [d_date_sk#24] -Input [2]: [d_date_sk#24, d_year#135] +Input [2]: [d_date_sk#24, d_year#138] (134) BroadcastExchange Input [1]: [d_date_sk#24] @@ -769,10 +769,10 @@ Subquery:8 Hosting operator id = 36 Hosting Expression = ws_sold_date_sk#29 IN d Subquery:9 Hosting operator id = 82 Hosting Expression = ReusedSubquery Subquery scalar-subquery#51, [id=#52] -Subquery:10 Hosting operator id = 68 Hosting Expression = cs_sold_date_sk#57 IN dynamicpruning#5 +Subquery:10 Hosting operator id = 68 Hosting Expression = cs_sold_date_sk#60 IN dynamicpruning#5 Subquery:11 Hosting operator id = 98 Hosting Expression = ReusedSubquery Subquery scalar-subquery#51, [id=#52] -Subquery:12 Hosting operator id = 84 Hosting Expression = ws_sold_date_sk#77 IN dynamicpruning#5 +Subquery:12 Hosting operator id = 84 Hosting Expression = ws_sold_date_sk#80 IN dynamicpruning#5 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index a4651c913c6c1..43aca31d138f4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -3220,6 +3220,67 @@ class DataFrameSuite extends QueryTest val d1 = Seq("a").toDF assert(d1.exceptAll(d1).count() === 0) } + + test("SPARK-39887: RemoveRedundantAliases should keep attributes of a Union's first child") { + val df = sql( + """ + |SELECT a, b AS a FROM ( + | SELECT a, a AS b FROM (SELECT a FROM VALUES (1) AS t(a)) + | UNION ALL + | SELECT a, b FROM (SELECT a, b FROM VALUES (1, 2) AS t(a, b)) + |) + |""".stripMargin) + val stringCols = df.logicalPlan.output.map(Column(_).cast(StringType)) + val castedDf = df.select(stringCols: _*) + checkAnswer(castedDf, Row("1", "1") :: Row("1", "2") :: Nil) + } + + test("SPARK-39887: RemoveRedundantAliases should keep attributes of a Union's first child 2") { + val df = sql( + """ + |SELECT + | to_date(a) a, + | to_date(b) b + |FROM + | ( + | SELECT + | a, + | a AS b + | FROM + | ( + | SELECT + | to_date(a) a + | FROM + | VALUES + | ('2020-02-01') AS t1(a) + | GROUP BY + | to_date(a) + | ) t3 + | UNION ALL + | SELECT + | a, + | b + | FROM + | ( + | SELECT + | to_date(a) a, + | to_date(b) b + | FROM + | VALUES + | ('2020-01-01', '2020-01-02') AS t1(a, b) + | GROUP BY + | to_date(a), + | to_date(b) + | ) t4 + | ) t5 + |GROUP BY + | to_date(a), + | to_date(b); + |""".stripMargin) + checkAnswer(df, + Row(java.sql.Date.valueOf("2020-02-01"), java.sql.Date.valueOf("2020-02-01")) :: + Row(java.sql.Date.valueOf("2020-01-01"), java.sql.Date.valueOf("2020-01-02")) :: Nil) + } } case class GroupByKey(a: Int, b: Int) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala index aa746370b8fd3..2667bc32d2153 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala @@ -639,8 +639,9 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils val union = view.union(view) testSparkPlanMetrics(union, 1, Map( 0L -> ("Union" -> Map()), - 1L -> ("LocalTableScan" -> Map("number of output rows" -> 2L)), - 2L -> ("LocalTableScan" -> Map("number of output rows" -> 2L)))) + 1L -> ("Project" -> Map()), + 2L -> ("LocalTableScan" -> Map("number of output rows" -> 2L)), + 3L -> ("LocalTableScan" -> Map("number of output rows" -> 2L)))) } } From d7af1d20f06412f80798c53d8588356ee1490afe Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Fri, 12 Aug 2022 10:52:33 +0800 Subject: [PATCH 431/535] [SPARK-39976][SQL] ArrayIntersect should handle null in left expression correctly ### What changes were proposed in this pull request? `ArrayInterscet` miss judge if null contains in right expression's hash set. ``` >>> a = [1, 2, 3] >>> b = [3, None, 5] >>> df = spark.sparkContext.parallelize(data).toDF(["a","b"]) >>> df.show() +---------+------------+ | a| b| +---------+------------+ |[1, 2, 3]|[3, null, 5]| +---------+------------+ >>> df.selectExpr("array_intersect(a,b)").show() +---------------------+ |array_intersect(a, b)| +---------------------+ | [3]| +---------------------+ >>> df.selectExpr("array_intersect(b,a)").show() +---------------------+ |array_intersect(b, a)| +---------------------+ | [3, null]| +---------------------+ ``` In origin code gen's code path, when handle `ArrayIntersect`'s array1, it use the below code ``` def withArray1NullAssignment(body: String) = if (left.dataType.asInstanceOf[ArrayType].containsNull) { if (right.dataType.asInstanceOf[ArrayType].containsNull) { s""" |if ($array1.isNullAt($i)) { | if ($foundNullElement) { | $nullElementIndex = $size; | $foundNullElement = false; | $size++; | $builder.$$plus$$eq($nullValueHolder); | } |} else { | $body |} """.stripMargin } else { s""" |if (!$array1.isNullAt($i)) { | $body |} """.stripMargin } } else { body } ``` We have a flag `foundNullElement` to indicate if array2 really contains a null value. But when implement https://issues.apache.org/jira/browse/SPARK-36829, misunderstand the meaning of `ArrayType.containsNull`, so when implement `SQLOpenHashSet.withNullCheckCode()` ``` def withNullCheckCode( arrayContainsNull: Boolean, setContainsNull: Boolean, array: String, index: String, hashSet: String, handleNotNull: (String, String) => String, handleNull: String): String = { if (arrayContainsNull) { if (setContainsNull) { s""" |if ($array.isNullAt($index)) { | if (!$hashSet.containsNull()) { | $hashSet.addNull(); | $handleNull | } |} else { | ${handleNotNull(array, index)} |} """.stripMargin } else { s""" |if (!$array.isNullAt($index)) { | ${handleNotNull(array, index)} |} """.stripMargin } } else { handleNotNull(array, index) } } ``` The code path of ` if (arrayContainsNull && setContainsNull) ` is misinterpreted that array's openHashSet really have a null value. In this pr we add a new parameter `additionalCondition ` to complements the previous implementation of `foundNullElement`. Also refactor the method's parameter name. ### Why are the changes needed? Fix data correct issue ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added UT Closes #37436 from AngersZhuuuu/SPARK-39776-FOLLOW_UP. Lead-authored-by: Angerszhuuuu Co-authored-by: AngersZhuuuu Signed-off-by: Wenchen Fan (cherry picked from commit dff5c2f2e9ce233e270e0e5cde0a40f682ba9534) Signed-off-by: Wenchen Fan --- .../expressions/collectionOperations.scala | 8 +++-- .../spark/sql/util/SQLOpenHashSet.scala | 8 ++--- .../CollectionExpressionsSuite.scala | 34 +++++++++++++++++++ 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index f38beb480e680..650cfc7bca852 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -4155,9 +4155,11 @@ case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBina right.dataType.asInstanceOf[ArrayType].containsNull, array1, i, hashSetResult, withArray1NaNCheckCodeGenerator, s""" - |$nullElementIndex = $size; - |$size++; - |$builder.$$plus$$eq($nullValueHolder); + |if ($hashSet.containsNull()) { + | $nullElementIndex = $size; + | $size++; + | $builder.$$plus$$eq($nullValueHolder); + |} """.stripMargin) // Only need to track null element index when result array's element is nullable. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SQLOpenHashSet.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SQLOpenHashSet.scala index 5f0366941def4..ee4dd54f28e95 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SQLOpenHashSet.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SQLOpenHashSet.scala @@ -79,15 +79,15 @@ object SQLOpenHashSet { } def withNullCheckCode( - arrayContainsNull: Boolean, - setContainsNull: Boolean, + array1ElementNullable: Boolean, + array2ElementNullable: Boolean, array: String, index: String, hashSet: String, handleNotNull: (String, String) => String, handleNull: String): String = { - if (arrayContainsNull) { - if (setContainsNull) { + if (array1ElementNullable) { + if (array2ElementNullable) { s""" |if ($array.isNullAt($index)) { | if (!$hashSet.containsNull()) { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index a8c4b16c7a05d..0ac1adccb8b7f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -2163,6 +2163,23 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(ArrayExcept(empty, oneNull), Seq.empty) checkEvaluation(ArrayExcept(oneNull, empty), Seq(null)) checkEvaluation(ArrayExcept(twoNulls, empty), Seq(null)) + + checkEvaluation(ArrayExcept( + Literal.create(Seq(1d, 2d, null), ArrayType(DoubleType)), + Literal.create(Seq(1d), ArrayType(DoubleType))), + Seq(2d, null)) + checkEvaluation(ArrayExcept( + Literal.create(Seq(1d, 2d, null), ArrayType(DoubleType)), + Literal.create(Seq(1d), ArrayType(DoubleType, false))), + Seq(2d, null)) + checkEvaluation(ArrayExcept( + Literal.create(Seq(1d, 2d), ArrayType(DoubleType)), + Literal.create(Seq(1d, null), ArrayType(DoubleType))), + Seq(2d)) + checkEvaluation(ArrayExcept( + Literal.create(Seq(1d, 2d), ArrayType(DoubleType, false)), + Literal.create(Seq(1d, null), ArrayType(DoubleType))), + Seq(2d)) } test("Array Intersect") { @@ -2288,6 +2305,23 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(ArrayIntersect(oneNull, twoNulls), Seq(null)) checkEvaluation(ArrayIntersect(empty, oneNull), Seq.empty) checkEvaluation(ArrayIntersect(oneNull, empty), Seq.empty) + + checkEvaluation(ArrayIntersect( + Literal.create(Seq(1d, 2d, null), ArrayType(DoubleType)), + Literal.create(Seq(1d), ArrayType(DoubleType))), + Seq(1d)) + checkEvaluation(ArrayIntersect( + Literal.create(Seq(1d, 2d, null), ArrayType(DoubleType)), + Literal.create(Seq(1d), ArrayType(DoubleType, false))), + Seq(1d)) + checkEvaluation(ArrayIntersect( + Literal.create(Seq(1d, 2d), ArrayType(DoubleType)), + Literal.create(Seq(1d, null), ArrayType(DoubleType))), + Seq(1d)) + checkEvaluation(ArrayIntersect( + Literal.create(Seq(1d, 2d), ArrayType(DoubleType, false)), + Literal.create(Seq(1d, null), ArrayType(DoubleType))), + Seq(1d)) } test("SPARK-31980: Start and end equal in month range") { From a6957d39f5711c0efd38d033c73a93fa6058647f Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Sat, 13 Aug 2022 11:42:00 -0700 Subject: [PATCH 432/535] Revert "[SPARK-40047][TEST] Exclude unused `xalan` transitive dependency from `htmlunit`" ### What changes were proposed in this pull request? This pr revert SPARK-40047 due to mvn test still need this dependency. ### Why are the changes needed? mvn test still need `xalan` dependency although GA passed before this pr. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Pass GitHub Actions - Manual test: ``` mvn clean install -DskipTests -pl core -am build/mvn clean test -pl core -Dtest=noen -DwildcardSuites=org.apache.spark.ui.UISeleniumSuite ``` **Before** ``` UISeleniumSuite: *** RUN ABORTED *** java.lang.NoClassDefFoundError: org/apache/xml/utils/PrefixResolver at java.lang.Class.getDeclaredFields0(Native Method) at java.lang.Class.privateGetDeclaredFields(Class.java:2583) at java.lang.Class.getField0(Class.java:2975) at java.lang.Class.getField(Class.java:1701) at com.gargoylesoftware.htmlunit.svg.SvgElementFactory.(SvgElementFactory.java:64) at com.gargoylesoftware.htmlunit.html.parser.neko.HtmlUnitNekoHtmlParser.(HtmlUnitNekoHtmlParser.java:77) at com.gargoylesoftware.htmlunit.DefaultPageCreator.(DefaultPageCreator.java:93) at com.gargoylesoftware.htmlunit.WebClient.(WebClient.java:191) at com.gargoylesoftware.htmlunit.WebClient.(WebClient.java:273) at com.gargoylesoftware.htmlunit.WebClient.(WebClient.java:263) ... Cause: java.lang.ClassNotFoundException: org.apache.xml.utils.PrefixResolver at java.net.URLClassLoader.findClass(URLClassLoader.java:387) at java.lang.ClassLoader.loadClass(ClassLoader.java:419) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:352) at java.lang.ClassLoader.loadClass(ClassLoader.java:352) at java.lang.Class.getDeclaredFields0(Native Method) at java.lang.Class.privateGetDeclaredFields(Class.java:2583) at java.lang.Class.getField0(Class.java:2975) at java.lang.Class.getField(Class.java:1701) at com.gargoylesoftware.htmlunit.svg.SvgElementFactory.(SvgElementFactory.java:64) at com.gargoylesoftware.htmlunit.html.parser.neko.HtmlUnitNekoHtmlParser.(HtmlUnitNekoHtmlParser.java:77) ... ``` **After** ``` UISeleniumSuite: - all jobs page should be rendered even though we configure the scheduling mode to fair - effects of unpersist() / persist() should be reflected - failed stages should not appear to be active - spark.ui.killEnabled should properly control kill button display - jobs page should not display job group name unless some job was submitted in a job group - job progress bars should handle stage / task failures - job details page should display useful information for stages that haven't started - job progress bars / cells reflect skipped stages / tasks - stages that aren't run appear as 'skipped stages' after a job finishes - jobs with stages that are skipped should show correct link descriptions on all jobs page - attaching and detaching a new tab - kill stage POST/GET response is correct - kill job POST/GET response is correct - stage & job retention - live UI json application list - job stages should have expected dotfile under DAG visualization - stages page should show skipped stages - Staleness of Spark UI should not last minutes or hours - description for empty jobs - Support disable event timeline Run completed in 17 seconds, 986 milliseconds. Total number of tests run: 20 Suites: completed 2, aborted 0 Tests: succeeded 20, failed 0, canceled 0, ignored 0, pending 0 All tests passed. ``` Closes #37508 from LuciferYang/revert-40047. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun (cherry picked from commit afd7098c7fb6c95aece39acc32cdad764c984cd2) Signed-off-by: Dongjoon Hyun --- pom.xml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pom.xml b/pom.xml index f639e5e54447d..206cad9eb981a 100644 --- a/pom.xml +++ b/pom.xml @@ -709,12 +709,6 @@ net.sourceforge.htmlunit htmlunit ${htmlunit.version} - - - xalan - xalan - - test From 2ee196dbb0bf9ecfd96a1928cbaf15b7c3856d3d Mon Sep 17 00:00:00 2001 From: Weichen Xu Date: Mon, 15 Aug 2022 18:03:08 +0800 Subject: [PATCH 433/535] [SPARK-40079] Add Imputer inputCols validation for empty input case Signed-off-by: Weichen Xu ### What changes were proposed in this pull request? Add Imputer inputCols validation for empty input case ### Why are the changes needed? If Imputer inputCols is empty, the `fit` works fine but when saving model, error will be raised: > AnalysisException: Datasource does not support writing empty or nested empty schemas. Please make sure the data schema has at least one or more column(s). ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #37518 from WeichenXu123/imputer-param-validation. Authored-by: Weichen Xu Signed-off-by: Weichen Xu (cherry picked from commit 87094f89655b7df09cdecb47c653461ae855b0ac) Signed-off-by: Weichen Xu --- .../scala/org/apache/spark/ml/feature/Imputer.scala | 1 + .../org/apache/spark/ml/feature/ImputerSuite.scala | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala index 71403acc91b55..5998887923f8b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala @@ -81,6 +81,7 @@ private[feature] trait ImputerParams extends Params with HasInputCol with HasInp protected def validateAndTransformSchema(schema: StructType): StructType = { ParamValidators.checkSingleVsMultiColumnParams(this, Seq(outputCol), Seq(outputCols)) val (inputColNames, outputColNames) = getInOutCols() + require(inputColNames.length > 0, "inputCols cannot be empty") require(inputColNames.length == inputColNames.distinct.length, s"inputCols contains" + s" duplicates: (${inputColNames.mkString(", ")})") require(outputColNames.length == outputColNames.distinct.length, s"outputCols contains" + diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala index 30887f55638f9..5ef22a282c3a5 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala @@ -268,6 +268,16 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest { } assert(e.getMessage.contains("outputCols contains duplicates")) } + + withClue("Imputer should fail if inputCols param is empty.") { + val e: IllegalArgumentException = intercept[IllegalArgumentException] { + val imputer = new Imputer().setStrategy(strategy) + .setInputCols(Array[String]()) + .setOutputCols(Array[String]()) + val model = imputer.fit(df) + } + assert(e.getMessage.contains("inputCols cannot be empty")) + } } } From 21acaaea662d003bc49861eee27d6d663618fb19 Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Tue, 16 Aug 2022 11:53:39 +0300 Subject: [PATCH 434/535] [SPARK-39184][SQL] Handle undersized result array in date and timestamp sequences ### What changes were proposed in this pull request? Add code to defensively check if the pre-allocated result array is big enough to handle the next element in a date or timestamp sequence. ### Why are the changes needed? `InternalSequenceBase.getSequenceLength` is a fast method for estimating the size of the result array. It uses an estimated step size in micros which is not always entirely accurate for the date/time/time-zone combination. As a result, `getSequenceLength` occasionally overestimates the size of the result array and also occasionally underestimates the size of the result array. `getSequenceLength` sometimes overestimates the size of the result array when the step size is in months (because `InternalSequenceBase` assumes 28 days per month). This case is handled: `InternalSequenceBase` will slice the array, if needed. `getSequenceLength` sometimes underestimates the size of the result array when the sequence crosses a DST "spring forward" without a corresponding "fall backward". This case is not handled (thus, this PR). For example: ``` select sequence( timestamp'2022-03-13 00:00:00', timestamp'2022-03-14 00:00:00', interval 1 day) as x; ``` In the America/Los_Angeles time zone, this results in the following error: ``` java.lang.ArrayIndexOutOfBoundsException: 1 at scala.runtime.ScalaRunTime$.array_update(ScalaRunTime.scala:77) ``` This happens because `InternalSequenceBase` calculates an estimated step size of 24 hours. If you add 24 hours to 2022-03-13 00:00:00 in the America/Los_Angeles time zone, you get 2022-03-14 01:00:00 (because 2022-03-13 has only 23 hours due to "spring forward"). Since 2022-03-14 01:00:00 is later than the specified stop value, `getSequenceLength` assumes the stop value is not included in the result. Therefore, `getSequenceLength` estimates an array size of 1. However, when actually creating the sequence, `InternalSequenceBase` does not use a step of 24 hours, but of 1 day. When you add 1 day to 2022-03-13 00:00:00, you get 2022-03-14 00:00:00. Now the stop value *is* included, and we overrun the end of the result array. The new unit test includes examples of problematic date sequences. This PR adds code to to handle the underestimation case: it checks if we're about to overrun the array, and if so, gets a new array that's larger by 1. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New unit test. Closes #37513 from bersprockets/date_sequence_array_size_issue. Authored-by: Bruce Robbins Signed-off-by: Max Gekk (cherry picked from commit 3a1136aa05dd5e16de81c7ec804416b3498ca967) Signed-off-by: Max Gekk --- .../expressions/collectionOperations.scala | 13 +++++- .../CollectionExpressionsSuite.scala | 40 +++++++++++++++++++ 2 files changed, 51 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 650cfc7bca852..4a5ae5d2e0205 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -3055,17 +3055,23 @@ object Sequence { val startMicros: Long = toMicros(num.toLong(start), scale) val stopMicros: Long = toMicros(num.toLong(stop), scale) - val maxEstimatedArrayLength = + val estimatedArrayLength = getSequenceLength(startMicros, stopMicros, input3, intervalStepInMicros) val stepSign = if (intervalStepInMicros > 0) +1 else -1 val exclusiveItem = stopMicros + stepSign - val arr = new Array[T](maxEstimatedArrayLength) + var arr = new Array[T](estimatedArrayLength) var t = startMicros var i = 0 while (t < exclusiveItem ^ stepSign < 0) { val result = fromMicros(t, scale) + // if we've underestimated the size of the array, due to crossing a DST + // "spring forward" without a corresponding "fall back", make a copy + // that's larger by 1 + if (i == arr.length) { + arr = arr.padTo(estimatedArrayLength + 1, fromLong(0L)) + } arr(i) = fromLong(result) i += 1 t = addInterval(startMicros, i * stepMonths, i * stepDays, i * stepMicros, zoneId) @@ -3173,6 +3179,9 @@ object Sequence { | int $i = 0; | | while ($t < $exclusiveItem ^ $stepSign < 0) { + | if ($i == $arr.length) { + | $arr = java.util.Arrays.copyOf($arr, $i + 1); + | } | $arr[$i] = $fromMicrosCode; | $i += 1; | $t = $addIntervalCode( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index 0ac1adccb8b7f..802988038a6ef 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -2492,4 +2492,44 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper Literal.create(Seq(Double.NaN, 1d, 2d, null), ArrayType(DoubleType))), Seq(null, 1d, 2d, Double.NaN)) } + + test("SPARK-39184: Avoid ArrayIndexOutOfBoundsException when crossing DST boundary") { + DateTimeTestUtils.withDefaultTimeZone(LA) { + checkEvaluation(new Sequence( + Literal(Timestamp.valueOf("2016-03-13 00:00:00")), + Literal(Timestamp.valueOf("2016-03-14 00:00:00")), + Literal(stringToInterval("interval 1 day"))), + Seq( + Timestamp.valueOf("2016-03-13 00:00:00"), + Timestamp.valueOf("2016-03-14 00:00:00"))) + + checkEvaluation(new Sequence( + Literal(Timestamp.valueOf("2016-03-14 00:00:00")), + Literal(Timestamp.valueOf("2016-03-13 00:00:00")), + Literal(stringToInterval("interval -1 days"))), + Seq( + Timestamp.valueOf("2016-03-14 00:00:00"), + Timestamp.valueOf("2016-03-13 00:00:00"))) + + checkEvaluation(new Sequence( + Literal(Date.valueOf("2016-03-13")), + Literal(Date.valueOf("2016-03-16")), + Literal(stringToInterval("interval 1 day 12 hour"))), + Seq( + Date.valueOf("2016-03-13"), + Date.valueOf("2016-03-14"), + Date.valueOf("2016-03-16"))) + + checkEvaluation(new Sequence( + Literal(Date.valueOf("2017-04-06")), + Literal(Date.valueOf("2017-02-12")), + Literal(stringToInterval("interval -13 days -6 hours"))), + Seq( + Date.valueOf("2017-04-06"), + Date.valueOf("2017-03-23"), + Date.valueOf("2017-03-10"), + Date.valueOf("2017-02-25"), + Date.valueOf("2017-02-12"))) + } + } } From 0db78424201cd7b2e2bcffb9de3c2a12a0c67b44 Mon Sep 17 00:00:00 2001 From: Wenli Looi Date: Wed, 17 Aug 2022 15:28:55 +0900 Subject: [PATCH 435/535] [SPARK-40117][PYTHON][SQL] Convert condition to java in DataFrameWriterV2.overwrite ### What changes were proposed in this pull request? Fix DataFrameWriterV2.overwrite() fails to convert the condition parameter to java. This prevents the function from being called. It is caused by the following commit that deleted the `_to_java_column` call instead of fixing it: https://github.com/apache/spark/commit/a1e459ed9f6777fb8d5a2d09fda666402f9230b9 ### Why are the changes needed? DataFrameWriterV2.overwrite() cannot be called. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manually checked whether the arguments are sent to JVM or not. Closes #37547 from looi/fix-overwrite. Authored-by: Wenli Looi Signed-off-by: Hyukjin Kwon (cherry picked from commit 46379863ab0dd2ee8fcf1e31e76476ff18397f60) Signed-off-by: Hyukjin Kwon --- python/pyspark/sql/readwriter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 760e54831c2f0..c4c813e56b17a 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -1465,6 +1465,7 @@ def overwrite(self, condition: Column) -> None: Overwrite rows matching the given filter condition with the contents of the data frame in the output table. """ + condition = _to_java_column(condition) self._jwriter.overwrite(condition) @since(3.1) From 9601be96a86eced683e6aa2b772c726eeb231de8 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Wed, 17 Aug 2022 14:57:35 +0800 Subject: [PATCH 436/535] [SPARK-39887][SQL][FOLLOW-UP] Do not exclude Union's first child attributes when traversing other children in RemoveRedundantAliases ### What changes were proposed in this pull request? Do not exclude `Union`'s first child attributes when traversing other children in `RemoveRedundantAliases`. ### Why are the changes needed? We don't need to exclude those attributes that `Union` inherits from its first child. See discussion here: https://github.com/apache/spark/pull/37496#discussion_r944509115 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing UTs. Closes #37534 from peter-toth/SPARK-39887-keep-attributes-of-unions-first-child-follow-up. Authored-by: Peter Toth Signed-off-by: Wenchen Fan (cherry picked from commit e732232dac420826af269d8cf5efacb52933f59a) Signed-off-by: Wenchen Fan --- .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 558a67ff5ca66..4807824ee7119 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -544,7 +544,7 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] { }) Join(newLeft, newRight, joinType, newCondition, hint) - case _: Union => + case u: Union => var first = true plan.mapChildren { child => if (first) { @@ -555,7 +555,8 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] { // output attributes could return incorrect result. removeRedundantAliases(child, excluded ++ child.outputSet) } else { - removeRedundantAliases(child, excluded) + // We don't need to exclude those attributes that `Union` inherits from its first child. + removeRedundantAliases(child, excluded -- u.children.head.outputSet) } } From 1a01a492c051bb861c480f224a3c310e133e4d01 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Thu, 18 Aug 2022 12:23:02 +0900 Subject: [PATCH 437/535] [SPARK-40121][PYTHON][SQL] Initialize projection used for Python UDF ### What changes were proposed in this pull request? This PR proposes to initialize the projection so non-deterministic expressions can be evaluated with Python UDFs. ### Why are the changes needed? To make the Python UDF working with non-deterministic expressions. ### Does this PR introduce _any_ user-facing change? Yes. ```python from pyspark.sql.functions import udf, rand spark.range(10).select(udf(lambda x: x, "double")(rand())).show() ``` **Before** ``` java.lang.NullPointerException at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificMutableProjection.apply(Unknown Source) at org.apache.spark.sql.execution.python.EvalPythonExec.$anonfun$doExecute$10(EvalPythonExec.scala:126) at scala.collection.Iterator$$anon$10.next(Iterator.scala:461) at scala.collection.Iterator$$anon$10.next(Iterator.scala:461) at scala.collection.Iterator$GroupedIterator.takeDestructively(Iterator.scala:1161) at scala.collection.Iterator$GroupedIterator.go(Iterator.scala:1176) at scala.collection.Iterator$GroupedIterator.fill(Iterator.scala:1213) ``` **After** ``` +----------------------------------+ |rand(-2507211707257730645)| +----------------------------------+ | 0.7691724424045242| | 0.09602244075319044| | 0.3006471278112862| | 0.4182649571961977| | 0.29349096650900974| | 0.7987097908937618| | 0.5324802583101007| | 0.72460930912789| | 0.1367749768412846| | 0.17277322931919348| +----------------------------------+ ``` ### How was this patch tested? Manually tested, and unittest was added. Closes #37552 from HyukjinKwon/SPARK-40121. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 336c9bc535895530cc3983b24e7507229fa9570d) Signed-off-by: Hyukjin Kwon --- python/pyspark/sql/tests/test_udf.py | 8 +++++++- .../spark/sql/execution/python/EvalPythonExec.scala | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py index 40deac992c4bd..34ac08cb818fb 100644 --- a/python/pyspark/sql/tests/test_udf.py +++ b/python/pyspark/sql/tests/test_udf.py @@ -24,7 +24,7 @@ from pyspark import SparkContext, SQLContext from pyspark.sql import SparkSession, Column, Row -from pyspark.sql.functions import udf, assert_true, lit +from pyspark.sql.functions import udf, assert_true, lit, rand from pyspark.sql.udf import UserDefinedFunction from pyspark.sql.types import ( StringType, @@ -798,6 +798,12 @@ def f(x): finally: shutil.rmtree(path) + def test_udf_with_rand(self): + # SPARK-40121: rand() with Python UDF. + self.assertEqual( + len(self.spark.range(10).select(udf(lambda x: x, DoubleType())(rand())).collect()), 10 + ) + class UDFInitializationTests(unittest.TestCase): def tearDown(self): diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala index c567a70e1d3cd..f117a40856692 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala @@ -116,6 +116,7 @@ trait EvalPythonExec extends UnaryExecNode { }.toArray }.toArray val projection = MutableProjection.create(allInputs.toSeq, child.output) + projection.initialize(context.partitionId()) val schema = StructType(dataTypes.zipWithIndex.map { case (dt, i) => StructField(s"_$i", dt) }.toSeq) From e1c5f90c700d844aa56c211e53eb75d0aa99b9ad Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Thu, 18 Aug 2022 00:23:52 -0500 Subject: [PATCH 438/535] [SPARK-40132][ML] Restore rawPredictionCol to MultilayerPerceptronClassifier.setParams ### What changes were proposed in this pull request? Restore rawPredictionCol to MultilayerPerceptronClassifier.setParams ### Why are the changes needed? This param was inadvertently removed in the refactoring in https://github.com/apache/spark/commit/40cdb6d51c2befcfeac8fb5cf5faf178d1a5ee7b#r81473316 Without it, using this param in the constructor fails. ### Does this PR introduce _any_ user-facing change? Not aside from the bug fix. ### How was this patch tested? Existing tests. Closes #37561 from srowen/SPARK-40132. Authored-by: Sean Owen Signed-off-by: Sean Owen (cherry picked from commit 6768d9cc38a320f7e1c6781afcd170577c5c7d0f) Signed-off-by: Sean Owen --- python/pyspark/ml/classification.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 40a2a87c5db92..c09a510d76b67 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -3230,6 +3230,7 @@ def setParams( solver: str = "l-bfgs", initialWeights: Optional[Vector] = None, probabilityCol: str = "probability", + rawPredictionCol: str = "rawPrediction", ) -> "MultilayerPerceptronClassifier": """ setParams(self, \\*, featuresCol="features", labelCol="label", predictionCol="prediction", \ From bd79706a25ffe3e937964c56037313c1a7de752d Mon Sep 17 00:00:00 2001 From: William Hyun Date: Thu, 18 Aug 2022 13:00:20 -0700 Subject: [PATCH 439/535] [SPARK-40134][BUILD] Update ORC to 1.7.6 This PR aims to update ORC to 1.7.6. This will bring the latest changes and bug fixes. https://github.com/apache/orc/releases/tag/v1.7.6 - ORC-1204: ORC MapReduce writer to flush when long arrays - ORC-1205: `nextVector` should invoke `ensureSize` when reusing vectors - ORC-1215: Remove a wrong `NotNull` annotation on `value` of `setAttribute` - ORC-1222: Upgrade `tools.hadoop.version` to 2.10.2 - ORC-1227: Use `Constructor.newInstance` instead of `Class.newInstance` - ORC-1228: Fix `setAttribute` to handle null value No. Pass the CIs. Closes #37563 from williamhyun/ORC-176. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit a1a049f01986c15e50a2f76d1fa8538ca3b6307e) Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 6 +++--- dev/deps/spark-deps-hadoop-3-hive-2.3 | 6 +++--- pom.xml | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index 0eaac80ab3b77..8a600122b4e55 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -219,9 +219,9 @@ objenesis/3.2//objenesis-3.2.jar okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar -orc-core/1.7.5//orc-core-1.7.5.jar -orc-mapreduce/1.7.5//orc-mapreduce-1.7.5.jar -orc-shims/1.7.5//orc-shims-1.7.5.jar +orc-core/1.7.6//orc-core-1.7.6.jar +orc-mapreduce/1.7.6//orc-mapreduce-1.7.6.jar +orc-shims/1.7.6//orc-shims-1.7.6.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 76b69f016af69..e36d4f2d91185 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -208,9 +208,9 @@ opencsv/2.3//opencsv-2.3.jar opentracing-api/0.33.0//opentracing-api-0.33.0.jar opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar opentracing-util/0.33.0//opentracing-util-0.33.0.jar -orc-core/1.7.5//orc-core-1.7.5.jar -orc-mapreduce/1.7.5//orc-mapreduce-1.7.5.jar -orc-shims/1.7.5//orc-shims-1.7.5.jar +orc-core/1.7.6//orc-core-1.7.6.jar +orc-mapreduce/1.7.6//orc-mapreduce-1.7.6.jar +orc-shims/1.7.6//orc-shims-1.7.6.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar diff --git a/pom.xml b/pom.xml index 206cad9eb981a..9f4d878e22946 100644 --- a/pom.xml +++ b/pom.xml @@ -132,7 +132,7 @@ 10.14.2.0 1.12.2 - 1.7.5 + 1.7.6 9.4.46.v20220331 4.0.3 0.10.0 From 87f957dea86fe1b8c5979e499b5400866b235e43 Mon Sep 17 00:00:00 2001 From: Weichen Xu Date: Fri, 19 Aug 2022 12:26:34 +0800 Subject: [PATCH 440/535] [SPARK-35542][ML] Fix: Bucketizer created for multiple columns with parameters splitsArray, inputCols and outputCols can not be loaded after saving it Signed-off-by: Weichen Xu ### What changes were proposed in this pull request? Fix: Bucketizer created for multiple columns with parameters splitsArray, inputCols and outputCols can not be loaded after saving it ### Why are the changes needed? Bugfix. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test Closes #37568 from WeichenXu123/SPARK-35542. Authored-by: Weichen Xu Signed-off-by: Weichen Xu (cherry picked from commit 876ce6a5df118095de51c3c4789d6db6da95eb23) Signed-off-by: Weichen Xu --- python/pyspark/ml/tests/test_persistence.py | 17 ++++++++++++++++- python/pyspark/ml/wrapper.py | 6 +++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/python/pyspark/ml/tests/test_persistence.py b/python/pyspark/ml/tests/test_persistence.py index 4f09a49dd0408..0b54540f06d76 100644 --- a/python/pyspark/ml/tests/test_persistence.py +++ b/python/pyspark/ml/tests/test_persistence.py @@ -32,7 +32,7 @@ OneVsRestModel, ) from pyspark.ml.clustering import KMeans -from pyspark.ml.feature import Binarizer, HashingTF, PCA +from pyspark.ml.feature import Binarizer, Bucketizer, HashingTF, PCA from pyspark.ml.linalg import Vectors from pyspark.ml.param import Params from pyspark.ml.pipeline import Pipeline, PipelineModel @@ -518,6 +518,21 @@ def test_default_read_write_default_params(self): ) reader.getAndSetParams(lr, loadedMetadata) + # Test for SPARK-35542 fix. + def test_save_and_load_on_nested_list_params(self): + temp_path = tempfile.mkdtemp() + splitsArray = [ + [-float("inf"), 0.5, 1.4, float("inf")], + [-float("inf"), 0.1, 1.2, float("inf")], + ] + bucketizer = Bucketizer( + splitsArray=splitsArray, inputCols=["values", "values"], outputCols=["b1", "b2"] + ) + savePath = temp_path + "/bk" + bucketizer.write().overwrite().save(savePath) + loadedBucketizer = Bucketizer.load(savePath) + assert loadedBucketizer.getSplitsArray() == splitsArray + if __name__ == "__main__": from pyspark.ml.tests.test_persistence import * # noqa: F401 diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py index 7853e76624464..32856540d6d0c 100644 --- a/python/pyspark/ml/wrapper.py +++ b/python/pyspark/ml/wrapper.py @@ -220,7 +220,11 @@ def _transfer_params_from_java(self) -> None: java_param = self._java_obj.getParam(param.name) # SPARK-14931: Only check set params back to avoid default params mismatch. if self._java_obj.isSet(java_param): - value = _java2py(sc, self._java_obj.getOrDefault(java_param)) + java_value = self._java_obj.getOrDefault(java_param) + if param.typeConverter.__name__.startswith("toList"): + value = [_java2py(sc, x) for x in list(java_value)] + else: + value = _java2py(sc, java_value) self._set(**{param.name: value}) # SPARK-10931: Temporary fix for params that have a default in Java if self._java_obj.hasDefault(java_param) and not self.isDefined(param): From 88f8ac6b55e9a68161aa275dc379bd8167ef29c1 Mon Sep 17 00:00:00 2001 From: Aki Sukegawa Date: Fri, 19 Aug 2022 12:28:48 -0700 Subject: [PATCH 441/535] [SPARK-40065][K8S] Mount ConfigMap on executors with non-default profile as well ### What changes were proposed in this pull request? This fixes a bug where ConfigMap is not mounted on executors if they are under a non-default resource profile. ### Why are the changes needed? When `spark.kubernetes.executor.disableConfigMap` is `false`, expected behavior is that the ConfigMap is mounted regardless of executor's resource profile. However, it is not mounted if the resource profile is non-default. ### Does this PR introduce _any_ user-facing change? Executors with non-default resource profile will have the ConfigMap mounted that was missing before if `spark.kubernetes.executor.disableConfigMap` is `false` or default. If certain users need to keep that behavior for some reason, they would need to explicitly set `spark.kubernetes.executor.disableConfigMap` to `true`. ### How was this patch tested? A new test case is added just below the existing ConfigMap test case. Closes #37504 from nsuke/SPARK-40065. Authored-by: Aki Sukegawa Signed-off-by: Dongjoon Hyun (cherry picked from commit 41ca6299eff4155aa3ac28656fe96501a7573fb0) Signed-off-by: Dongjoon Hyun --- .../features/BasicExecutorFeatureStep.scala | 2 +- .../BasicExecutorFeatureStepSuite.scala | 21 +++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala index 8102ca84affcc..171b368e35dc1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala @@ -246,7 +246,7 @@ private[spark] class BasicExecutorFeatureStep( .build() }.getOrElse(executorContainerWithConfVolume) } else { - executorContainer + executorContainerWithConfVolume } val containerWithLifecycle = if (!kubernetesConf.workerDecommissioning) { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala index 84c4f3b8ba352..4d57440c2c62e 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala @@ -372,6 +372,27 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter { assert(!SecretVolumeUtils.podHasVolume(podConfigured.pod, SPARK_CONF_VOLUME_EXEC)) } + test("SPARK-40065 Mount configmap on executors with non-default profile as well") { + val baseDriverPod = SparkPod.initialPod() + val rp = new ResourceProfileBuilder().build() + val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf), rp) + val podConfigured = step.configurePod(baseDriverPod) + assert(SecretVolumeUtils.containerHasVolume(podConfigured.container, + SPARK_CONF_VOLUME_EXEC, SPARK_CONF_DIR_INTERNAL)) + assert(SecretVolumeUtils.podHasVolume(podConfigured.pod, SPARK_CONF_VOLUME_EXEC)) + } + + test("SPARK-40065 Disable configmap volume on executor pod's container (non-default profile)") { + baseConf.set(KUBERNETES_EXECUTOR_DISABLE_CONFIGMAP, true) + val baseDriverPod = SparkPod.initialPod() + val rp = new ResourceProfileBuilder().build() + val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf), rp) + val podConfigured = step.configurePod(baseDriverPod) + assert(!SecretVolumeUtils.containerHasVolume(podConfigured.container, + SPARK_CONF_VOLUME_EXEC, SPARK_CONF_DIR_INTERNAL)) + assert(!SecretVolumeUtils.podHasVolume(podConfigured.pod, SPARK_CONF_VOLUME_EXEC)) + } + test("SPARK-35482: user correct block manager port for executor pods") { try { val initPod = SparkPod.initialPod() From 7c69614f067c9eb68d997e8881d9b5845cde00fd Mon Sep 17 00:00:00 2001 From: Ivan Sadikov Date: Sun, 21 Aug 2022 18:59:48 +0900 Subject: [PATCH 442/535] [SPARK-39833][SQL] Disable Parquet column index in DSv1 to fix a correctness issue in the case of overlapping partition and data columns ### What changes were proposed in this pull request? This PR fixes a correctness issue in Parquet DSv1 FileFormat when projection does not contain columns referenced in pushed filters. This typically happens when partition columns and data columns overlap. This could result in empty result when in fact there were records matching predicate as can be seen in the provided fields. The problem is especially visible with `count()` and `show()` reporting different results, for example, show() would return 1+ records where the count() would return 0. In Parquet, when the predicate is provided and column index is enabled, we would try to filter row ranges to figure out what the count should be. Unfortunately, there is an issue that if the projection is empty or is not in the set of filter columns, any checks on columns would fail and 0 rows are returned (`RowRanges.EMPTY`) even though there is data matching the filter. Note that this is rather a mitigation, a quick fix. The actual fix needs to go into Parquet-MR: https://issues.apache.org/jira/browse/PARQUET-2170. The fix is not required in DSv2 where the overlapping columns are removed in `FileScanBuilder::readDataSchema()`. ### Why are the changes needed? Fixes a correctness issue when projection columns are not referenced by columns in pushed down filters or the schema is empty in Parquet DSv1. Downsides: Parquet column filter would be disabled if it had not been explicitly enabled which could affect performance. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? I added a unit test that reproduces this behaviour. The test fails without the fix and passes with the fix. Closes #37419 from sadikovi/SPARK-39833. Authored-by: Ivan Sadikov Signed-off-by: Hyukjin Kwon (cherry picked from commit cde71aaf173aadd14dd6393b09e9851b5caad903) Signed-off-by: Hyukjin Kwon --- .../parquet/ParquetFileFormat.scala | 5 +++++ .../parquet/ParquetQuerySuite.scala | 22 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index 9765e7c780193..2fa0854c98308 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -230,6 +230,11 @@ class ParquetFileFormat SQLConf.PARQUET_INT96_AS_TIMESTAMP.key, sparkSession.sessionState.conf.isParquetINT96AsTimestamp) + // See PARQUET-2170. + // Disable column index optimisation when required schema does not have columns that appear in + // pushed filters to avoid getting incorrect results. + hadoopConf.setBooleanIfUnset(ParquetInputFormat.COLUMN_INDEX_FILTERING_ENABLED, false) + val broadcastedHadoopConf = sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala index 33656c84c88f0..d0a9a93b00fef 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala @@ -1065,6 +1065,28 @@ class ParquetV1QuerySuite extends ParquetQuerySuite { } } } + + test("SPARK-39833: pushed filters with count()") { + withTempPath { path => + val p = s"${path.getCanonicalPath}${File.separator}col=0${File.separator}" + Seq(0).toDF("COL").coalesce(1).write.save(p) + val df = spark.read.parquet(path.getCanonicalPath) + checkAnswer(df.filter("col = 0"), Seq(Row(0))) + assert(df.filter("col = 0").count() == 1, "col") + assert(df.filter("COL = 0").count() == 1, "COL") + } + } + + test("SPARK-39833: pushed filters with project without filter columns") { + withTempPath { path => + val p = s"${path.getCanonicalPath}${File.separator}col=0${File.separator}" + Seq((0, 1)).toDF("COL", "a").coalesce(1).write.save(p) + val df = spark.read.parquet(path.getCanonicalPath) + checkAnswer(df.filter("col = 0"), Seq(Row(0, 1))) + assert(df.filter("col = 0").select("a").collect().toSeq == Row(1) :: Nil) + assert(df.filter("col = 0 and a = 1").select("a").collect().toSeq == Row(1) :: Nil) + } + } } class ParquetV2QuerySuite extends ParquetQuerySuite { From 233a54d0ab39944ec815bd86d2fc6200c03ca79a Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sun, 21 Aug 2022 14:30:05 -0500 Subject: [PATCH 443/535] [SPARK-40152][SQL] Fix split_part codegen compilation issue ### What changes were proposed in this pull request? Fix `split_part` codegen compilation issue: ```sql SELECT split_part(str, delimiter, partNum) FROM VALUES ('11.12.13', '.', 3) AS v1(str, delimiter, partNum); ``` ``` org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 42, Column 1: failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 42, Column 1: Expression "project_isNull_0 = false" is not a type ``` ### Why are the changes needed? Fix bug. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #37589 from wangyum/SPARK-40152. Authored-by: Yuming Wang Signed-off-by: Sean Owen (cherry picked from commit cf1a80eeae8bf815270fb39568b1846c2bd8d437) Signed-off-by: Sean Owen --- .../sql/catalyst/expressions/collectionOperations.scala | 6 +++--- .../test/resources/sql-tests/inputs/string-functions.sql | 1 + .../sql-tests/results/ansi/string-functions.sql.out | 8 ++++++++ .../resources/sql-tests/results/string-functions.sql.out | 8 ++++++++ 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 4a5ae5d2e0205..3786c1a33bc4f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -2225,9 +2225,9 @@ case class ElementAt( case Some(value) => val defaultValueEval = value.genCode(ctx) s""" - ${defaultValueEval.code} - ${ev.isNull} = ${defaultValueEval.isNull} - ${ev.value} = ${defaultValueEval.value} + ${defaultValueEval.code}; + ${ev.isNull} = ${defaultValueEval.isNull}; + ${ev.value} = ${defaultValueEval.value}; """.stripMargin case None => s"${ev.isNull} = true;" } diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index e1c97b468f27a..058ea89179786 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -38,6 +38,7 @@ SELECT split_part('11.12.13', '.', 4); SELECT split_part('11.12.13', '.', 5); SELECT split_part('11.12.13', '.', -5); SELECT split_part(null, '.', 1); +SELECT split_part(str, delimiter, partNum) FROM VALUES ('11.12.13', '.', 3) AS v1(str, delimiter, partNum); -- substring function SELECT substr('Spark SQL', 5); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index 35ec3a9756602..c7fda3f68bce2 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -213,6 +213,14 @@ struct NULL +-- !query +SELECT split_part(str, delimiter, partNum) FROM VALUES ('11.12.13', '.', 3) AS v1(str, delimiter, partNum) +-- !query schema +struct +-- !query output +13 + + -- !query SELECT substr('Spark SQL', 5) -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index dc72dfe137d7e..b1d49ae2876a0 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -205,6 +205,14 @@ struct NULL +-- !query +SELECT split_part(str, delimiter, partNum) FROM VALUES ('11.12.13', '.', 3) AS v1(str, delimiter, partNum) +-- !query schema +struct +-- !query output +13 + + -- !query SELECT substr('Spark SQL', 5) -- !query schema From e16467c04cd1a69eedb26b73f9fa4088bb583b90 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Mon, 22 Aug 2022 16:33:37 +0800 Subject: [PATCH 444/535] [SPARK-40089][SQL] Fix sorting for some Decimal types ### What changes were proposed in this pull request? This fixes https://issues.apache.org/jira/browse/SPARK-40089 where the prefix can overflow in some cases and the code assumes that the overflow is always on the negative side, not the positive side. ### Why are the changes needed? This adds a check when the overflow does happen to know what is the proper prefix to return. ### Does this PR introduce _any_ user-facing change? No, unless you consider getting the sort order correct a user facing change. ### How was this patch tested? I tested manually with the file in the JIRA and I added a small unit test. Closes #37540 from revans2/fix_dec_sort. Authored-by: Robert (Bobby) Evans Signed-off-by: Wenchen Fan (cherry picked from commit 8dfd3dfc115d6e249f00a9a434b866d28e2eae45) Signed-off-by: Wenchen Fan --- .../sql/catalyst/expressions/SortOrder.scala | 23 +++++++++++-------- .../spark/sql/execution/SortSuite.scala | 19 +++++++++++++++ 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala index 974d4b5f86889..98a4e396bd60e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala @@ -173,7 +173,13 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { val s = p - (dt.precision - dt.scale) (raw) => { val value = raw.asInstanceOf[Decimal] - if (value.changePrecision(p, s)) value.toUnscaledLong else Long.MinValue + if (value.changePrecision(p, s)) { + value.toUnscaledLong + } else if (value.toBigDecimal.signum < 0) { + Long.MinValue + } else { + Long.MaxValue + } } case dt: DecimalType => (raw) => DoublePrefixComparator.computePrefix(raw.asInstanceOf[Decimal].toDouble) @@ -206,15 +212,14 @@ case class SortPrefix(child: SortOrder) extends UnaryExpression { s"$DoublePrefixCmp.computePrefix((double)$input)" case StringType => s"$StringPrefixCmp.computePrefix($input)" case BinaryType => s"$BinaryPrefixCmp.computePrefix($input)" + case dt: DecimalType if dt.precision < Decimal.MAX_LONG_DIGITS => + s"$input.toUnscaledLong()" case dt: DecimalType if dt.precision - dt.scale <= Decimal.MAX_LONG_DIGITS => - if (dt.precision <= Decimal.MAX_LONG_DIGITS) { - s"$input.toUnscaledLong()" - } else { - // reduce the scale to fit in a long - val p = Decimal.MAX_LONG_DIGITS - val s = p - (dt.precision - dt.scale) - s"$input.changePrecision($p, $s) ? $input.toUnscaledLong() : ${Long.MinValue}L" - } + // reduce the scale to fit in a long + val p = Decimal.MAX_LONG_DIGITS + val s = p - (dt.precision - dt.scale) + s"$input.changePrecision($p, $s) ? $input.toUnscaledLong() : " + + s"$input.toBigDecimal().signum() < 0 ? ${Long.MinValue}L : ${Long.MaxValue}L" case dt: DecimalType => s"$DoublePrefixCmp.computePrefix($input.toDouble())" case _ => "0L" diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala index 5fa7a4d0c71cc..7c74423af67e3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala @@ -114,6 +114,25 @@ class SortSuite extends SparkPlanTest with SharedSparkSession { sortAnswers = false) } + test("SPARK-40089: decimal values sort correctly") { + val input = Seq( + BigDecimal("999999999999999999.50"), + BigDecimal("1.11"), + BigDecimal("999999999999999999.49") + ) + // The range partitioner does the right thing. If there are too many + // shuffle partitions the error might not always show up. + withSQLConf("spark.sql.shuffle.partitions" -> "1") { + val inputDf = spark.createDataFrame(sparkContext.parallelize(input.map(v => Row(v)), 1), + StructType(StructField("a", DecimalType(20, 2)) :: Nil)) + checkAnswer( + inputDf, + (child: SparkPlan) => SortExec('a.asc :: Nil, global = true, child = child), + input.sorted.map(Row(_)), + sortAnswers = false) + } + } + // Test sorting on different data types for ( dataType <- DataTypeTestUtils.atomicTypes ++ Set(NullType); From db121c34dbe22e809c42ebd72618c1597932ac29 Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Mon, 22 Aug 2022 19:17:26 +0300 Subject: [PATCH 445/535] [SPARK-39184][SQL][FOLLOWUP] Make interpreted and codegen paths for date/timestamp sequences the same ### What changes were proposed in this pull request? Change how the length of the new result array is calculated in `InternalSequenceBase.eval` to match how the same is calculated in the generated code. ### Why are the changes needed? This change brings the interpreted mode code in line with the generated code. Although I am not aware of any case where the current interpreted mode code fails, the generated code is more correct (it handles the case where the result array must grow more than once, whereas the current interpreted mode code does not). ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing unit tests. Closes #37542 from bersprockets/date_sequence_array_size_issue_follow_up. Authored-by: Bruce Robbins Signed-off-by: Max Gekk (cherry picked from commit d718867a16754c62cb8c30a750485f4856481efc) Signed-off-by: Max Gekk --- .../spark/sql/catalyst/expressions/collectionOperations.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 3786c1a33bc4f..8186d006296f0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -3070,7 +3070,7 @@ object Sequence { // "spring forward" without a corresponding "fall back", make a copy // that's larger by 1 if (i == arr.length) { - arr = arr.padTo(estimatedArrayLength + 1, fromLong(0L)) + arr = arr.padTo(i + 1, fromLong(0L)) } arr(i) = fromLong(result) i += 1 From 008b3a347595cc47ff30853d7141b17bf7be4f13 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Tue, 23 Aug 2022 08:55:27 -0500 Subject: [PATCH 446/535] [SPARK-40152][SQL][TESTS] Add tests for SplitPart ### What changes were proposed in this pull request? Add tests for `SplitPart`. ### Why are the changes needed? Improve test coverage. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? N/A. Closes #37626 from wangyum/SPARK-40152-2. Authored-by: Yuming Wang Signed-off-by: Sean Owen (cherry picked from commit 4f525eed7d5d461498aee68c4d3e57941f9aae2c) Signed-off-by: Sean Owen --- .../expressions/collectionOperations.scala | 2 +- .../CollectionExpressionsSuite.scala | 20 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 8186d006296f0..53bda0cbdc773 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -2225,7 +2225,7 @@ case class ElementAt( case Some(value) => val defaultValueEval = value.genCode(ctx) s""" - ${defaultValueEval.code}; + ${defaultValueEval.code} ${ev.isNull} = ${defaultValueEval.isNull}; ${ev.value} = ${defaultValueEval.value}; """.stripMargin diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index 802988038a6ef..8fb04cd1ac7a0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -2532,4 +2532,24 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper Date.valueOf("2017-02-12"))) } } + + test("SplitPart") { + val delimiter = Literal.create(".", StringType) + val str = StringSplitSQL(Literal.create("11.12.13", StringType), delimiter) + val outOfBoundValue = Some(Literal.create("", StringType)) + + checkEvaluation(ElementAt(str, Literal(3), outOfBoundValue), UTF8String.fromString("13")) + checkEvaluation(ElementAt(str, Literal(1), outOfBoundValue), UTF8String.fromString("11")) + checkEvaluation(ElementAt(str, Literal(10), outOfBoundValue), UTF8String.fromString("")) + checkEvaluation(ElementAt(str, Literal(-10), outOfBoundValue), UTF8String.fromString("")) + + checkEvaluation(ElementAt(StringSplitSQL(Literal.create(null, StringType), delimiter), + Literal(1), outOfBoundValue), null) + checkEvaluation(ElementAt(StringSplitSQL(Literal.create("11.12.13", StringType), + Literal.create(null, StringType)), Literal(1), outOfBoundValue), null) + + intercept[Exception] { + checkEvaluation(ElementAt(str, Literal(0), outOfBoundValue), null) + }.getMessage.contains("The index 0 is invalid") + } } From 6572c66d01e3db00858f0b4743670a1243d3c44f Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Mon, 22 Aug 2022 16:16:03 +0900 Subject: [PATCH 447/535] [SPARK-40172][ML][TESTS] Temporarily disable flaky test cases in ImageFileFormatSuite ### What changes were proposed in this pull request? 3 test cases in ImageFileFormatSuite become flaky in the GitHub action tests: https://github.com/apache/spark/runs/7941765326?check_suite_focus=true https://github.com/gengliangwang/spark/runs/7928658069 Before they are fixed(https://issues.apache.org/jira/browse/SPARK-40171), I suggest disabling them in OSS. ### Why are the changes needed? Disable flaky tests before they are fixed. The test cases keep failing from time to time, while they always pass on local env. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing CI Closes #37605 from gengliangwang/disableFlakyTest. Authored-by: Gengliang Wang Signed-off-by: Hyukjin Kwon (cherry picked from commit 50f2f506327b7d51af9fb0ae1316135905d2f87d) Signed-off-by: Dongjoon Hyun --- .../spark/ml/source/image/ImageFileFormatSuite.scala | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala index 10b9bbb0bfe24..7981296e2101e 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/source/image/ImageFileFormatSuite.scala @@ -49,7 +49,8 @@ class ImageFileFormatSuite extends SparkFunSuite with MLlibTestSparkContext { assert(df.schema("image").dataType == columnSchema, "data do not fit ImageSchema") } - test("image datasource count test") { + // TODO(SPARK-40171): Re-enable the following flaky test case after being fixed. + ignore("image datasource count test") { val df1 = spark.read.format("image").load(imagePath) assert(df1.count === 9) @@ -87,7 +88,8 @@ class ImageFileFormatSuite extends SparkFunSuite with MLlibTestSparkContext { assert(result === invalidImageRow(resultOrigin)) } - test("image datasource partition test") { + // TODO(SPARK-40171): Re-enable the following flaky test case after being fixed. + ignore("image datasource partition test") { val result = spark.read.format("image") .option("dropInvalid", true).load(imagePath) .select(substring_index(col("image.origin"), "/", -1).as("origin"), col("cls"), col("date")) @@ -105,8 +107,9 @@ class ImageFileFormatSuite extends SparkFunSuite with MLlibTestSparkContext { )) } + // TODO(SPARK-40171): Re-enable the following flaky test case after being fixed. // Images with the different number of channels - test("readImages pixel values test") { + ignore("readImages pixel values test") { val images = spark.read.format("image").option("dropInvalid", true) .load(imagePath + "/cls=multichannel/").collect() From d725d9c20e33e3c68d9c7ec84b74fea2952814b6 Mon Sep 17 00:00:00 2001 From: Kapil Kumar Singh Date: Tue, 23 Aug 2022 17:02:36 -0700 Subject: [PATCH 448/535] [SPARK-40124][SQL][TEST][3.3] Update TPCDS v1.4 q32 for Plan Stability tests ### What changes were proposed in this pull request? This is port of SPARK-40124 to Spark 3.3. Fix query 32 for TPCDS v1.4 ### Why are the changes needed? Current q32.sql seems to be wrong. It is just selection `1`. Reference for query template: https://github.com/databricks/tpcds-kit/blob/eff5de2c30337b71cc0dc1976147742d2c65d378/query_templates/query32.tpl#L41 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Test change only Closes #37615 from mskapilks/change-q32-3.3. Authored-by: Kapil Kumar Singh Signed-off-by: Dongjoon Hyun --- .../approved-plans-v1_4/q32.sf100/explain.txt | 120 ++++++++++-------- .../q32.sf100/simplified.txt | 94 +++++++------- .../approved-plans-v1_4/q32/explain.txt | 120 ++++++++++-------- .../approved-plans-v1_4/q32/simplified.txt | 92 +++++++------- .../tpcds-query-results/v1_4/q32.sql.out | 4 +- sql/core/src/test/resources/tpcds/q32.sql | 2 +- 6 files changed, 236 insertions(+), 196 deletions(-) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt index e7ae6145b4332..0af12591bdafa 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt @@ -1,31 +1,33 @@ == Physical Plan == -CollectLimit (27) -+- * Project (26) - +- * BroadcastHashJoin Inner BuildRight (25) - :- * Project (23) - : +- * BroadcastHashJoin Inner BuildLeft (22) - : :- BroadcastExchange (18) - : : +- * Project (17) - : : +- * BroadcastHashJoin Inner BuildLeft (16) - : : :- BroadcastExchange (5) - : : : +- * Project (4) - : : : +- * Filter (3) - : : : +- * ColumnarToRow (2) - : : : +- Scan parquet default.item (1) - : : +- * Filter (15) - : : +- * HashAggregate (14) - : : +- Exchange (13) - : : +- * HashAggregate (12) - : : +- * Project (11) - : : +- * BroadcastHashJoin Inner BuildRight (10) - : : :- * Filter (8) - : : : +- * ColumnarToRow (7) - : : : +- Scan parquet default.catalog_sales (6) - : : +- ReusedExchange (9) - : +- * Filter (21) - : +- * ColumnarToRow (20) - : +- Scan parquet default.catalog_sales (19) - +- ReusedExchange (24) +* HashAggregate (29) ++- Exchange (28) + +- * HashAggregate (27) + +- * Project (26) + +- * BroadcastHashJoin Inner BuildRight (25) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildLeft (22) + : :- BroadcastExchange (18) + : : +- * Project (17) + : : +- * BroadcastHashJoin Inner BuildLeft (16) + : : :- BroadcastExchange (5) + : : : +- * Project (4) + : : : +- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.item (1) + : : +- * Filter (15) + : : +- * HashAggregate (14) + : : +- Exchange (13) + : : +- * HashAggregate (12) + : : +- * Project (11) + : : +- * BroadcastHashJoin Inner BuildRight (10) + : : :- * Filter (8) + : : : +- * ColumnarToRow (7) + : : : +- Scan parquet default.catalog_sales (6) + : : +- ReusedExchange (9) + : +- * Filter (21) + : +- * ColumnarToRow (20) + : +- Scan parquet default.catalog_sales (19) + +- ReusedExchange (24) (1) Scan parquet default.item @@ -65,7 +67,7 @@ Input [3]: [cs_item_sk#3, cs_ext_discount_amt#4, cs_sold_date_sk#5] Input [3]: [cs_item_sk#3, cs_ext_discount_amt#4, cs_sold_date_sk#5] Condition : isnotnull(cs_item_sk#3) -(9) ReusedExchange [Reuses operator id: 32] +(9) ReusedExchange [Reuses operator id: 34] Output [1]: [d_date_sk#7] (10) BroadcastHashJoin [codegen id : 3] @@ -133,10 +135,10 @@ Right keys [1]: [cs_item_sk#14] Join condition: (cast(cs_ext_discount_amt#15 as decimal(14,7)) > (1.3 * avg(cs_ext_discount_amt))#13) (23) Project [codegen id : 6] -Output [1]: [cs_sold_date_sk#16] +Output [2]: [cs_ext_discount_amt#15, cs_sold_date_sk#16] Input [5]: [i_item_sk#1, (1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#14, cs_ext_discount_amt#15, cs_sold_date_sk#16] -(24) ReusedExchange [Reuses operator id: 32] +(24) ReusedExchange [Reuses operator id: 34] Output [1]: [d_date_sk#17] (25) BroadcastHashJoin [codegen id : 6] @@ -145,44 +147,58 @@ Right keys [1]: [d_date_sk#17] Join condition: None (26) Project [codegen id : 6] -Output [1]: [1 AS excess discount amount #18] -Input [2]: [cs_sold_date_sk#16, d_date_sk#17] - -(27) CollectLimit -Input [1]: [excess discount amount #18] -Arguments: 100 +Output [1]: [cs_ext_discount_amt#15] +Input [3]: [cs_ext_discount_amt#15, cs_sold_date_sk#16, d_date_sk#17] + +(27) HashAggregate [codegen id : 6] +Input [1]: [cs_ext_discount_amt#15] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_discount_amt#15))] +Aggregate Attributes [1]: [sum#18] +Results [1]: [sum#19] + +(28) Exchange +Input [1]: [sum#19] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(29) HashAggregate [codegen id : 7] +Input [1]: [sum#19] +Keys: [] +Functions [1]: [sum(UnscaledValue(cs_ext_discount_amt#15))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_discount_amt#15))#20] +Results [1]: [MakeDecimal(sum(UnscaledValue(cs_ext_discount_amt#15))#20,17,2) AS excess discount amount#21] ===== Subqueries ===== Subquery:1 Hosting operator id = 6 Hosting Expression = cs_sold_date_sk#5 IN dynamicpruning#6 -BroadcastExchange (32) -+- * Project (31) - +- * Filter (30) - +- * ColumnarToRow (29) - +- Scan parquet default.date_dim (28) +BroadcastExchange (34) ++- * Project (33) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet default.date_dim (30) -(28) Scan parquet default.date_dim -Output [2]: [d_date_sk#7, d_date#19] +(30) Scan parquet default.date_dim +Output [2]: [d_date_sk#7, d_date#22] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] ReadSchema: struct -(29) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#7, d_date#19] +(31) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#7, d_date#22] -(30) Filter [codegen id : 1] -Input [2]: [d_date_sk#7, d_date#19] -Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 2000-01-27)) AND (d_date#19 <= 2000-04-26)) AND isnotnull(d_date_sk#7)) +(32) Filter [codegen id : 1] +Input [2]: [d_date_sk#7, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 2000-01-27)) AND (d_date#22 <= 2000-04-26)) AND isnotnull(d_date_sk#7)) -(31) Project [codegen id : 1] +(33) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [2]: [d_date_sk#7, d_date#19] +Input [2]: [d_date_sk#7, d_date#22] -(32) BroadcastExchange +(34) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] Subquery:2 Hosting operator id = 19 Hosting Expression = cs_sold_date_sk#16 IN dynamicpruning#6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/simplified.txt index 8ca9bf49029f8..27e630265a396 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/simplified.txt @@ -1,48 +1,52 @@ -CollectLimit - WholeStageCodegen (6) - Project - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk] - BroadcastHashJoin [i_item_sk,cs_item_sk,cs_ext_discount_amt,(1.3 * avg(cs_ext_discount_amt))] - InputAdapter - BroadcastExchange #1 - WholeStageCodegen (4) - Project [i_item_sk,(1.3 * avg(cs_ext_discount_amt))] - BroadcastHashJoin [i_item_sk,cs_item_sk] - InputAdapter - BroadcastExchange #2 - WholeStageCodegen (1) - Project [i_item_sk] - Filter [i_manufact_id,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_manufact_id] - Filter [(1.3 * avg(cs_ext_discount_amt))] - HashAggregate [cs_item_sk,sum,count] [avg(UnscaledValue(cs_ext_discount_amt)),(1.3 * avg(cs_ext_discount_amt)),sum,count] - InputAdapter - Exchange [cs_item_sk] #3 - WholeStageCodegen (3) - HashAggregate [cs_item_sk,cs_ext_discount_amt] [sum,count,sum,count] - Project [cs_item_sk,cs_ext_discount_amt] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_item_sk] +WholeStageCodegen (7) + HashAggregate [sum] [sum(UnscaledValue(cs_ext_discount_amt)),excess discount amount,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (6) + HashAggregate [cs_ext_discount_amt] [sum,sum] + Project [cs_ext_discount_amt] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_discount_amt,cs_sold_date_sk] + BroadcastHashJoin [i_item_sk,cs_item_sk,cs_ext_discount_amt,(1.3 * avg(cs_ext_discount_amt))] + InputAdapter + BroadcastExchange #2 + WholeStageCodegen (4) + Project [i_item_sk,(1.3 * avg(cs_ext_discount_amt))] + BroadcastHashJoin [i_item_sk,cs_item_sk] + InputAdapter + BroadcastExchange #3 + WholeStageCodegen (1) + Project [i_item_sk] + Filter [i_manufact_id,i_item_sk] ColumnarToRow InputAdapter - Scan parquet default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] - SubqueryBroadcast [d_date_sk] #1 - BroadcastExchange #4 - WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_date,d_date_sk] - ColumnarToRow - InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] - InputAdapter - ReusedExchange [d_date_sk] #4 - Filter [cs_item_sk,cs_ext_discount_amt] - ColumnarToRow + Scan parquet default.item [i_item_sk,i_manufact_id] + Filter [(1.3 * avg(cs_ext_discount_amt))] + HashAggregate [cs_item_sk,sum,count] [avg(UnscaledValue(cs_ext_discount_amt)),(1.3 * avg(cs_ext_discount_amt)),sum,count] + InputAdapter + Exchange [cs_item_sk] #4 + WholeStageCodegen (3) + HashAggregate [cs_item_sk,cs_ext_discount_amt] [sum,count,sum,count] + Project [cs_item_sk,cs_ext_discount_amt] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + SubqueryBroadcast [d_date_sk] #1 + BroadcastExchange #5 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] + InputAdapter + ReusedExchange [d_date_sk] #5 + Filter [cs_item_sk,cs_ext_discount_amt] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + ReusedSubquery [d_date_sk] #1 InputAdapter - Scan parquet default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] - ReusedSubquery [d_date_sk] #1 - InputAdapter - ReusedExchange [d_date_sk] #4 + ReusedExchange [d_date_sk] #5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt index abbb43c8c75d7..09e3a4025948d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt @@ -1,31 +1,33 @@ == Physical Plan == -CollectLimit (27) -+- * Project (26) - +- * BroadcastHashJoin Inner BuildRight (25) - :- * Project (23) - : +- * BroadcastHashJoin Inner BuildRight (22) - : :- * Project (10) - : : +- * BroadcastHashJoin Inner BuildRight (9) - : : :- * Filter (3) - : : : +- * ColumnarToRow (2) - : : : +- Scan parquet default.catalog_sales (1) - : : +- BroadcastExchange (8) - : : +- * Project (7) - : : +- * Filter (6) - : : +- * ColumnarToRow (5) - : : +- Scan parquet default.item (4) - : +- BroadcastExchange (21) - : +- * Filter (20) - : +- * HashAggregate (19) - : +- Exchange (18) - : +- * HashAggregate (17) - : +- * Project (16) - : +- * BroadcastHashJoin Inner BuildRight (15) - : :- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.catalog_sales (11) - : +- ReusedExchange (14) - +- ReusedExchange (24) +* HashAggregate (29) ++- Exchange (28) + +- * HashAggregate (27) + +- * Project (26) + +- * BroadcastHashJoin Inner BuildRight (25) + :- * Project (23) + : +- * BroadcastHashJoin Inner BuildRight (22) + : :- * Project (10) + : : +- * BroadcastHashJoin Inner BuildRight (9) + : : :- * Filter (3) + : : : +- * ColumnarToRow (2) + : : : +- Scan parquet default.catalog_sales (1) + : : +- BroadcastExchange (8) + : : +- * Project (7) + : : +- * Filter (6) + : : +- * ColumnarToRow (5) + : : +- Scan parquet default.item (4) + : +- BroadcastExchange (21) + : +- * Filter (20) + : +- * HashAggregate (19) + : +- Exchange (18) + : +- * HashAggregate (17) + : +- * Project (16) + : +- * BroadcastHashJoin Inner BuildRight (15) + : :- * Filter (13) + : : +- * ColumnarToRow (12) + : : +- Scan parquet default.catalog_sales (11) + : +- ReusedExchange (14) + +- ReusedExchange (24) (1) Scan parquet default.catalog_sales @@ -89,7 +91,7 @@ Input [3]: [cs_item_sk#7, cs_ext_discount_amt#8, cs_sold_date_sk#9] Input [3]: [cs_item_sk#7, cs_ext_discount_amt#8, cs_sold_date_sk#9] Condition : isnotnull(cs_item_sk#7) -(14) ReusedExchange [Reuses operator id: 32] +(14) ReusedExchange [Reuses operator id: 34] Output [1]: [d_date_sk#10] (15) BroadcastHashJoin [codegen id : 3] @@ -133,10 +135,10 @@ Right keys [1]: [cs_item_sk#7] Join condition: (cast(cs_ext_discount_amt#2 as decimal(14,7)) > (1.3 * avg(cs_ext_discount_amt))#16) (23) Project [codegen id : 6] -Output [1]: [cs_sold_date_sk#3] +Output [2]: [cs_ext_discount_amt#2, cs_sold_date_sk#3] Input [5]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#5, (1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#7] -(24) ReusedExchange [Reuses operator id: 32] +(24) ReusedExchange [Reuses operator id: 34] Output [1]: [d_date_sk#17] (25) BroadcastHashJoin [codegen id : 6] @@ -145,44 +147,58 @@ Right keys [1]: [d_date_sk#17] Join condition: None (26) Project [codegen id : 6] -Output [1]: [1 AS excess discount amount #18] -Input [2]: [cs_sold_date_sk#3, d_date_sk#17] - -(27) CollectLimit -Input [1]: [excess discount amount #18] -Arguments: 100 +Output [1]: [cs_ext_discount_amt#2] +Input [3]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, d_date_sk#17] + +(27) HashAggregate [codegen id : 6] +Input [1]: [cs_ext_discount_amt#2] +Keys: [] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_discount_amt#2))] +Aggregate Attributes [1]: [sum#18] +Results [1]: [sum#19] + +(28) Exchange +Input [1]: [sum#19] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] + +(29) HashAggregate [codegen id : 7] +Input [1]: [sum#19] +Keys: [] +Functions [1]: [sum(UnscaledValue(cs_ext_discount_amt#2))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_discount_amt#2))#20] +Results [1]: [MakeDecimal(sum(UnscaledValue(cs_ext_discount_amt#2))#20,17,2) AS excess discount amount#21] ===== Subqueries ===== Subquery:1 Hosting operator id = 1 Hosting Expression = cs_sold_date_sk#3 IN dynamicpruning#4 -BroadcastExchange (32) -+- * Project (31) - +- * Filter (30) - +- * ColumnarToRow (29) - +- Scan parquet default.date_dim (28) +BroadcastExchange (34) ++- * Project (33) + +- * Filter (32) + +- * ColumnarToRow (31) + +- Scan parquet default.date_dim (30) -(28) Scan parquet default.date_dim -Output [2]: [d_date_sk#17, d_date#19] +(30) Scan parquet default.date_dim +Output [2]: [d_date_sk#17, d_date#22] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] ReadSchema: struct -(29) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#17, d_date#19] +(31) ColumnarToRow [codegen id : 1] +Input [2]: [d_date_sk#17, d_date#22] -(30) Filter [codegen id : 1] -Input [2]: [d_date_sk#17, d_date#19] -Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 2000-01-27)) AND (d_date#19 <= 2000-04-26)) AND isnotnull(d_date_sk#17)) +(32) Filter [codegen id : 1] +Input [2]: [d_date_sk#17, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 2000-01-27)) AND (d_date#22 <= 2000-04-26)) AND isnotnull(d_date_sk#17)) -(31) Project [codegen id : 1] +(33) Project [codegen id : 1] Output [1]: [d_date_sk#17] -Input [2]: [d_date_sk#17, d_date#19] +Input [2]: [d_date_sk#17, d_date#22] -(32) BroadcastExchange +(34) BroadcastExchange Input [1]: [d_date_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] Subquery:2 Hosting operator id = 11 Hosting Expression = cs_sold_date_sk#9 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt index aea77be43cf05..0b2410699cf89 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt @@ -1,48 +1,52 @@ -CollectLimit - WholeStageCodegen (6) - Project - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Project [cs_sold_date_sk] - BroadcastHashJoin [i_item_sk,cs_item_sk,cs_ext_discount_amt,(1.3 * avg(cs_ext_discount_amt))] - Project [cs_ext_discount_amt,cs_sold_date_sk,i_item_sk] - BroadcastHashJoin [cs_item_sk,i_item_sk] - Filter [cs_item_sk,cs_ext_discount_amt] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] - SubqueryBroadcast [d_date_sk] #1 - BroadcastExchange #1 +WholeStageCodegen (7) + HashAggregate [sum] [sum(UnscaledValue(cs_ext_discount_amt)),excess discount amount,sum] + InputAdapter + Exchange #1 + WholeStageCodegen (6) + HashAggregate [cs_ext_discount_amt] [sum,sum] + Project [cs_ext_discount_amt] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Project [cs_ext_discount_amt,cs_sold_date_sk] + BroadcastHashJoin [i_item_sk,cs_item_sk,cs_ext_discount_amt,(1.3 * avg(cs_ext_discount_amt))] + Project [cs_ext_discount_amt,cs_sold_date_sk,i_item_sk] + BroadcastHashJoin [cs_item_sk,i_item_sk] + Filter [cs_item_sk,cs_ext_discount_amt] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + SubqueryBroadcast [d_date_sk] #1 + BroadcastExchange #2 + WholeStageCodegen (1) + Project [d_date_sk] + Filter [d_date,d_date_sk] + ColumnarToRow + InputAdapter + Scan parquet default.date_dim [d_date_sk,d_date] + InputAdapter + BroadcastExchange #3 WholeStageCodegen (1) - Project [d_date_sk] - Filter [d_date,d_date_sk] + Project [i_item_sk] + Filter [i_manufact_id,i_item_sk] ColumnarToRow InputAdapter - Scan parquet default.date_dim [d_date_sk,d_date] + Scan parquet default.item [i_item_sk,i_manufact_id] + InputAdapter + BroadcastExchange #4 + WholeStageCodegen (4) + Filter [(1.3 * avg(cs_ext_discount_amt))] + HashAggregate [cs_item_sk,sum,count] [avg(UnscaledValue(cs_ext_discount_amt)),(1.3 * avg(cs_ext_discount_amt)),sum,count] + InputAdapter + Exchange [cs_item_sk] #5 + WholeStageCodegen (3) + HashAggregate [cs_item_sk,cs_ext_discount_amt] [sum,count,sum,count] + Project [cs_item_sk,cs_ext_discount_amt] + BroadcastHashJoin [cs_sold_date_sk,d_date_sk] + Filter [cs_item_sk] + ColumnarToRow + InputAdapter + Scan parquet default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] + ReusedSubquery [d_date_sk] #1 + InputAdapter + ReusedExchange [d_date_sk] #2 InputAdapter - BroadcastExchange #2 - WholeStageCodegen (1) - Project [i_item_sk] - Filter [i_manufact_id,i_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.item [i_item_sk,i_manufact_id] - InputAdapter - BroadcastExchange #3 - WholeStageCodegen (4) - Filter [(1.3 * avg(cs_ext_discount_amt))] - HashAggregate [cs_item_sk,sum,count] [avg(UnscaledValue(cs_ext_discount_amt)),(1.3 * avg(cs_ext_discount_amt)),sum,count] - InputAdapter - Exchange [cs_item_sk] #4 - WholeStageCodegen (3) - HashAggregate [cs_item_sk,cs_ext_discount_amt] [sum,count,sum,count] - Project [cs_item_sk,cs_ext_discount_amt] - BroadcastHashJoin [cs_sold_date_sk,d_date_sk] - Filter [cs_item_sk] - ColumnarToRow - InputAdapter - Scan parquet default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk] - ReusedSubquery [d_date_sk] #1 - InputAdapter - ReusedExchange [d_date_sk] #1 - InputAdapter - ReusedExchange [d_date_sk] #1 + ReusedExchange [d_date_sk] #2 diff --git a/sql/core/src/test/resources/tpcds-query-results/v1_4/q32.sql.out b/sql/core/src/test/resources/tpcds-query-results/v1_4/q32.sql.out index de81b93b81cc1..fb73f26cf397b 100644 --- a/sql/core/src/test/resources/tpcds-query-results/v1_4/q32.sql.out +++ b/sql/core/src/test/resources/tpcds-query-results/v1_4/q32.sql.out @@ -1,6 +1,6 @@ -- Automatically generated by TPCDSQueryTestSuite -- !query schema -struct +struct -- !query output -1 +9089.28 diff --git a/sql/core/src/test/resources/tpcds/q32.sql b/sql/core/src/test/resources/tpcds/q32.sql index a6f59ecb87366..1d856ca523045 100755 --- a/sql/core/src/test/resources/tpcds/q32.sql +++ b/sql/core/src/test/resources/tpcds/q32.sql @@ -1,4 +1,4 @@ -SELECT 1 AS `excess discount amount ` +SELECT sum(cs_ext_discount_amt) AS `excess discount amount` FROM catalog_sales, item, date_dim WHERE From d9dc28075bcf6e4c6756418ae872fc8db36867f2 Mon Sep 17 00:00:00 2001 From: Linhong Liu Date: Thu, 25 Aug 2022 13:18:45 +0900 Subject: [PATCH 449/535] [SPARK-40213][SQL] Support ASCII value conversion for Latin-1 characters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This PR proposes to support ASCII value conversion for Latin-1 Supplement characters. ### Why are the changes needed? `ascii()` should be the inverse of `chr()`. But for latin-1 char, we get incorrect ascii value. For example: ```sql select ascii('§') -- output: -62, expect: 167 select chr(167) -- output: '§' ``` ### Does this PR introduce _any_ user-facing change? Yes, fixes the incorrect ASCII conversion for Latin-1 Supplement characters ### How was this patch tested? UT Closes #37651 from linhongliu-db/SPARK-40213. Authored-by: Linhong Liu Signed-off-by: Hyukjin Kwon (cherry picked from commit c07852380471f02955d6d17cddb3150231daa71f) Signed-off-by: Hyukjin Kwon --- .../catalyst/expressions/stringExpressions.scala | 15 ++++++++------- .../expressions/StringExpressionsSuite.scala | 11 +++++++++++ .../resources/sql-tests/inputs/charvarchar.sql | 4 ++++ .../sql-tests/results/charvarchar.sql.out | 16 ++++++++++++++++ 4 files changed, 39 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index c56a1dc47ae21..00bd98a93e55a 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -2335,9 +2335,10 @@ case class Ascii(child: Expression) override def inputTypes: Seq[DataType] = Seq(StringType) protected override def nullSafeEval(string: Any): Any = { - val bytes = string.asInstanceOf[UTF8String].getBytes - if (bytes.length > 0) { - bytes(0).asInstanceOf[Int] + // only pick the first character to reduce the `toString` cost + val firstCharStr = string.asInstanceOf[UTF8String].substring(0, 1) + if (firstCharStr.numChars > 0) { + firstCharStr.toString.codePointAt(0) } else { 0 } @@ -2345,11 +2346,11 @@ case class Ascii(child: Expression) override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { nullSafeCodeGen(ctx, ev, (child) => { - val bytes = ctx.freshName("bytes") + val firstCharStr = ctx.freshName("firstCharStr") s""" - byte[] $bytes = $child.getBytes(); - if ($bytes.length > 0) { - ${ev.value} = (int) $bytes[0]; + UTF8String $firstCharStr = $child.substring(0, 1); + if ($firstCharStr.numChars() > 0) { + ${ev.value} = $firstCharStr.toString().codePointAt(0); } else { ${ev.value} = 0; } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index 655e9b744bf15..459079178701e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -307,6 +307,17 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { SubstringIndex(Literal("www||apache||org"), Literal( "||"), Literal(2)), "www||apache") } + test("SPARK-40213: ascii for Latin-1 Supplement characters") { + // scalastyle:off + checkEvaluation(Ascii(Literal("¥")), 165, create_row("¥")) + checkEvaluation(Ascii(Literal("®")), 174, create_row("®")) + checkEvaluation(Ascii(Literal("©")), 169, create_row("©")) + // scalastyle:on + (128 until 256).foreach { c => + checkEvaluation(Ascii(Chr(Literal(c.toLong))), c, create_row(c.toLong)) + } + } + test("ascii for string") { val a = 'a.string.at(0) checkEvaluation(Ascii(Literal("efg")), 101, create_row("abdef")) diff --git a/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql b/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql index 098d09d98218d..b62cbf6432329 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql @@ -113,3 +113,7 @@ drop table char_tbl1; drop table char_tbl2; drop table char_tbl3; drop table char_tbl4; + +-- ascii value for Latin-1 Supplement characters +select ascii('§'), ascii('÷'), ascii('×10'); +select chr(167), chr(247), chr(215); diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out index 6345702e00ea2..a1eb20da9f68e 100644 --- a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out @@ -1154,3 +1154,19 @@ drop table char_tbl4 struct<> -- !query output + + +-- !query +select ascii('§'), ascii('÷'), ascii('×10') +-- !query schema +struct +-- !query output +167 247 215 + + +-- !query +select chr(167), chr(247), chr(215) +-- !query schema +struct +-- !query output +§ ÷ × From 784cb0f68af84ac85e66b67439c8c7289f1c8b6e Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Fri, 26 Aug 2022 15:24:01 +0800 Subject: [PATCH 450/535] [SPARK-40218][SQL] GROUPING SETS should preserve the grouping columns ### What changes were proposed in this pull request? This PR fixes a bug caused by https://github.com/apache/spark/pull/32022 . Although we deprecate `GROUP BY ... GROUPING SETS ...`, it should still work if it worked before. https://github.com/apache/spark/pull/32022 made a mistake that it didn't preserve the order of user-specified group by columns. Usually it's not a problem, as `GROUP BY a, b` is no different from `GROUP BY b, a`. However, the `grouping_id(...)` function requires the input to be exactly the same with the group by columns. This PR fixes the problem by preserve the order of user-specified group by columns. ### Why are the changes needed? bug fix ### Does this PR introduce _any_ user-facing change? Yes, now a query that worked before 3.2 can work again. ### How was this patch tested? new test Closes #37655 from cloud-fan/grouping. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan (cherry picked from commit 1ed592ef28abdb14aa1d8c8a129d6ac3084ffb0c) Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/expressions/grouping.scala | 9 +++++++-- .../test/resources/sql-tests/inputs/grouping_set.sql | 3 +++ .../resources/sql-tests/results/grouping_set.sql.out | 11 +++++++++++ 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala index 8ce0e57b69159..22e25b31f2e1c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala @@ -151,10 +151,15 @@ case class GroupingSets( override def selectedGroupByExprs: Seq[Seq[Expression]] = groupingSets // Includes the `userGivenGroupByExprs` in the children, which will be included in the final // GROUP BY expressions, so that `SELECT c ... GROUP BY (a, b, c) GROUPING SETS (a, b)` works. - override def children: Seq[Expression] = flatGroupingSets ++ userGivenGroupByExprs + // Note that, we must put `userGivenGroupByExprs` at the beginning, to preserve the order of + // grouping columns specified by users. For example, GROUP BY (a, b) GROUPING SETS (b, a), the + // final grouping columns should be (a, b). + override def children: Seq[Expression] = userGivenGroupByExprs ++ flatGroupingSets override protected def withNewChildrenInternal( newChildren: IndexedSeq[Expression]): GroupingSets = - super.legacyWithNewChildren(newChildren).asInstanceOf[GroupingSets] + copy( + userGivenGroupByExprs = newChildren.take(userGivenGroupByExprs.length), + flatGroupingSets = newChildren.drop(userGivenGroupByExprs.length)) } object GroupingSets { diff --git a/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql b/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql index d30914fdd92df..4d516bdda7b1b 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql @@ -57,3 +57,6 @@ SELECT k1, k2, avg(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUP SELECT grouping__id, k1, k2, avg(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1)); SELECT grouping(k1), k1, k2, avg(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1)); + +-- grouping_id function +SELECT grouping_id(k1, k2), avg(v) from (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY k1, k2 GROUPING SETS ((k2, k1), k1); diff --git a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out index 21c13af560dac..d89050ab6d8df 100644 --- a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out @@ -205,3 +205,14 @@ struct 0 2 2 2.0 0 2 2 2.0 0 2 NULL 2.0 + + +-- !query +SELECT grouping_id(k1, k2), avg(v) from (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY k1, k2 GROUPING SETS ((k2, k1), k1) +-- !query schema +struct +-- !query output +0 1.0 +0 2.0 +1 1.0 +1 2.0 From 167f3ff4d752c6f51b71a38378deb47c97f745f0 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 24 Aug 2022 13:33:26 +0900 Subject: [PATCH 451/535] [SPARK-40152][SQL][TESTS] Move tests from SplitPart to elementAt Move tests from SplitPart to elementAt in CollectionExpressionsSuite. Simplify test. No. N/A. Closes #37637 from wangyum/SPARK-40152-3. Authored-by: Yuming Wang Signed-off-by: Hyukjin Kwon (cherry picked from commit 06997d6eb73f271aede5b159d86d1db80a73b89f) Signed-off-by: Hyukjin Kwon --- .../CollectionExpressionsSuite.scala | 38 +++++++++---------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index 8fb04cd1ac7a0..27187a15f43c2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -1535,6 +1535,24 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper } checkEvaluation(ElementAt(mb0, Literal(Array[Byte](2, 1), BinaryType)), "2") checkEvaluation(ElementAt(mb0, Literal(Array[Byte](3, 4))), null) + + // test defaultValueOutOfBound + val delimiter = Literal.create(".", StringType) + val str = StringSplitSQL(Literal.create("11.12.13", StringType), delimiter) + val outOfBoundValue = Some(Literal.create("", StringType)) + + checkEvaluation(ElementAt(str, Literal(3), outOfBoundValue), UTF8String.fromString("13")) + checkEvaluation(ElementAt(str, Literal(1), outOfBoundValue), UTF8String.fromString("11")) + checkEvaluation(ElementAt(str, Literal(10), outOfBoundValue), UTF8String.fromString("")) + checkEvaluation(ElementAt(str, Literal(-10), outOfBoundValue), UTF8String.fromString("")) + + checkEvaluation(ElementAt(StringSplitSQL(Literal.create(null, StringType), delimiter), + Literal(1), outOfBoundValue), null) + checkEvaluation(ElementAt(StringSplitSQL(Literal.create("11.12.13", StringType), + Literal.create(null, StringType)), Literal(1), outOfBoundValue), null) + + checkExceptionInExpression[Exception]( + ElementAt(str, Literal(0), outOfBoundValue), "The index 0 is invalid") } test("correctly handles ElementAt nullability for arrays") { @@ -2532,24 +2550,4 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper Date.valueOf("2017-02-12"))) } } - - test("SplitPart") { - val delimiter = Literal.create(".", StringType) - val str = StringSplitSQL(Literal.create("11.12.13", StringType), delimiter) - val outOfBoundValue = Some(Literal.create("", StringType)) - - checkEvaluation(ElementAt(str, Literal(3), outOfBoundValue), UTF8String.fromString("13")) - checkEvaluation(ElementAt(str, Literal(1), outOfBoundValue), UTF8String.fromString("11")) - checkEvaluation(ElementAt(str, Literal(10), outOfBoundValue), UTF8String.fromString("")) - checkEvaluation(ElementAt(str, Literal(-10), outOfBoundValue), UTF8String.fromString("")) - - checkEvaluation(ElementAt(StringSplitSQL(Literal.create(null, StringType), delimiter), - Literal(1), outOfBoundValue), null) - checkEvaluation(ElementAt(StringSplitSQL(Literal.create("11.12.13", StringType), - Literal.create(null, StringType)), Literal(1), outOfBoundValue), null) - - intercept[Exception] { - checkEvaluation(ElementAt(str, Literal(0), outOfBoundValue), null) - }.getMessage.contains("The index 0 is invalid") - } } From 2de364a4134670b46f606acabd59f204bcbd7dc2 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Sat, 27 Aug 2022 14:58:56 +0900 Subject: [PATCH 452/535] [SPARK-40152][SQL][TESTS][FOLLOW-UP] Disable ANSI for out of bound test at ElementAt This PR proposes to fix the test to pass with ANSI mode on. Currently `elementAt` test fails when ANSI mode is on: ``` [info] - elementAt *** FAILED *** (309 milliseconds) [info] Exception evaluating element_at(stringsplitsql(11.12.13, .), 10, Some(), true) (ExpressionEvalHelper.scala:205) [info] org.scalatest.exceptions.TestFailedException: [info] at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:472) [info] at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:471) [info] at org.scalatest.funsuite.AnyFunSuite.newAssertionFailedException(AnyFunSuite.scala:1563) [info] at org.scalatest.Assertions.fail(Assertions.scala:949) [info] at org.scalatest.Assertions.fail$(Assertions.scala:945) [info] at org.scalatest.funsuite.AnyFunSuite.fail(AnyFunSuite.scala:1563) [info] at org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper.checkEvaluationWithoutCodegen(ExpressionEvalHelper.scala:205) [info] at org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper.checkEvaluationWithoutCodegen$(ExpressionEvalHelper.scala:199) [info] at org.apache.spark.sql.catalyst.expressions.CollectionExpressionsSuite.checkEvaluationWithoutCodegen(CollectionExpressionsSuite.scala:39) [info] at org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper.checkEvaluation(ExpressionEvalHelper.scala:87) [info] at org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper.checkEvaluation$(ExpressionEvalHelper.scala:82) [info] at org.apache.spark.sql.catalyst.expressions.CollectionExpressionsSuite.checkEvaluation(CollectionExpressionsSuite.scala:39) [info] at org.apache.spark.sql.catalyst.expressions.CollectionExpressionsSuite.$anonfun$new$333(CollectionExpressionsSuite.scala:1546) ``` https://github.com/apache/spark/runs/8046961366?check_suite_focus=true To recover the build with ANSI mode. No, test-only. Unittest fixed. Closes #37684 from HyukjinKwon/SPARK-40152. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 4b0c3bab1ab082565a051990bf45774f15962deb) Signed-off-by: Hyukjin Kwon --- .../CollectionExpressionsSuite.scala | 40 ++++++++++--------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index 27187a15f43c2..e121dab4c3398 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -1483,7 +1483,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(ElementAt(a0, Literal(0)), null) }.getMessage.contains("SQL array indices start at 1") intercept[Exception] { checkEvaluation(ElementAt(a0, Literal(1.1)), null) } - withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) { checkEvaluation(ElementAt(a0, Literal(4)), null) checkEvaluation(ElementAt(a0, Literal(-4)), null) } @@ -1512,7 +1512,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper assert(ElementAt(m0, Literal(1.0)).checkInputDataTypes().isFailure) - withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) { checkEvaluation(ElementAt(m0, Literal("d")), null) checkEvaluation(ElementAt(m1, Literal("a")), null) } @@ -1529,7 +1529,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper MapType(BinaryType, StringType)) val mb1 = Literal.create(Map[Array[Byte], String](), MapType(BinaryType, StringType)) - withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) { checkEvaluation(ElementAt(mb0, Literal(Array[Byte](1, 2, 3))), null) checkEvaluation(ElementAt(mb1, Literal(Array[Byte](1, 2))), null) } @@ -1537,22 +1537,24 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper checkEvaluation(ElementAt(mb0, Literal(Array[Byte](3, 4))), null) // test defaultValueOutOfBound - val delimiter = Literal.create(".", StringType) - val str = StringSplitSQL(Literal.create("11.12.13", StringType), delimiter) - val outOfBoundValue = Some(Literal.create("", StringType)) - - checkEvaluation(ElementAt(str, Literal(3), outOfBoundValue), UTF8String.fromString("13")) - checkEvaluation(ElementAt(str, Literal(1), outOfBoundValue), UTF8String.fromString("11")) - checkEvaluation(ElementAt(str, Literal(10), outOfBoundValue), UTF8String.fromString("")) - checkEvaluation(ElementAt(str, Literal(-10), outOfBoundValue), UTF8String.fromString("")) - - checkEvaluation(ElementAt(StringSplitSQL(Literal.create(null, StringType), delimiter), - Literal(1), outOfBoundValue), null) - checkEvaluation(ElementAt(StringSplitSQL(Literal.create("11.12.13", StringType), - Literal.create(null, StringType)), Literal(1), outOfBoundValue), null) - - checkExceptionInExpression[Exception]( - ElementAt(str, Literal(0), outOfBoundValue), "The index 0 is invalid") + withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) { + val delimiter = Literal.create(".", StringType) + val str = StringSplitSQL(Literal.create("11.12.13", StringType), delimiter) + val outOfBoundValue = Some(Literal.create("", StringType)) + + checkEvaluation(ElementAt(str, Literal(3), outOfBoundValue), UTF8String.fromString("13")) + checkEvaluation(ElementAt(str, Literal(1), outOfBoundValue), UTF8String.fromString("11")) + checkEvaluation(ElementAt(str, Literal(10), outOfBoundValue), UTF8String.fromString("")) + checkEvaluation(ElementAt(str, Literal(-10), outOfBoundValue), UTF8String.fromString("")) + + checkEvaluation(ElementAt(StringSplitSQL(Literal.create(null, StringType), delimiter), + Literal(1), outOfBoundValue), null) + checkEvaluation(ElementAt(StringSplitSQL(Literal.create("11.12.13", StringType), + Literal.create(null, StringType)), Literal(1), outOfBoundValue), null) + + checkExceptionInExpression[Exception]( + ElementAt(str, Literal(0), outOfBoundValue), "The index 0 is invalid") + } } test("correctly handles ElementAt nullability for arrays") { From 694e4e7479be009d15e9faca411cd4e6303d22d5 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Sat, 27 Aug 2022 18:39:17 +0800 Subject: [PATCH 453/535] [SPARK-40241][DOCS] Correct the link of GenericUDTF ### What changes were proposed in this pull request? Correct the link ### Why are the changes needed? existing link was wrong ### Does this PR introduce _any_ user-facing change? yes, a link was updated ### How was this patch tested? Manually check Closes #37685 from zhengruifeng/doc_fix_udtf. Authored-by: Ruifeng Zheng Signed-off-by: Yuming Wang (cherry picked from commit 8ffcecb68fafd0466e839281588aab50cd046b49) Signed-off-by: Yuming Wang --- docs/sql-ref-functions-udf-hive.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql-ref-functions-udf-hive.md b/docs/sql-ref-functions-udf-hive.md index 819c446c411d2..5b78dbf97098a 100644 --- a/docs/sql-ref-functions-udf-hive.md +++ b/docs/sql-ref-functions-udf-hive.md @@ -55,7 +55,7 @@ SELECT testUDF(value) FROM t; ``` -An example below uses [GenericUDTFExplode](https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplode.java) derived from [GenericUDTF](https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java). +An example below uses [GenericUDTFExplode](https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFExplode.java) derived from [GenericUDTF](https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTF.java). ```sql -- Register `GenericUDTFExplode` and use it in Spark SQL From e3f6b6d1e15378860b5e30fb4c40168215b16eea Mon Sep 17 00:00:00 2001 From: Brennan Stein Date: Mon, 29 Aug 2022 10:55:30 +0900 Subject: [PATCH 454/535] [SPARK-40212][SQL] SparkSQL castPartValue does not properly handle byte, short, or float The `castPartValueToDesiredType` function now returns byte for ByteType and short for ShortType, rather than ints; also floats for FloatType rather than double. Previously, attempting to read back in a file partitioned on one of these column types would result in a ClassCastException at runtime (for Byte, `java.lang.ClassCastException: java.lang.Integer cannot be cast to java.lang.Byte`). I can't think this is anything but a bug, as returning the correct data type prevents the crash. Yes: it changes the observed behavior when reading in a byte/short/float-partitioned file. Added unit test. Without the `castPartValueToDesiredType` updates, the test fails with the stated exception. === I'll note that I'm not familiar enough with the spark repo to know if this will have ripple effects elsewhere, but tests pass on my fork and since the very similar https://github.com/apache/spark/pull/36344/files only needed to touch these two files I expect this change is self-contained as well. Closes #37659 from BrennanStein/spark40212. Authored-by: Brennan Stein Signed-off-by: Hyukjin Kwon (cherry picked from commit 146f187342140635b83bfe775b6c327755edfbe1) Signed-off-by: Hyukjin Kwon --- .../datasources/PartitioningUtils.scala | 7 +++++-- .../ParquetPartitionDiscoverySuite.scala | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala index e856bb5b9c2f9..2b9c6e724b6c0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala @@ -530,9 +530,12 @@ object PartitioningUtils extends SQLConfHelper{ case _ if value == DEFAULT_PARTITION_NAME => null case NullType => null case StringType => UTF8String.fromString(unescapePathName(value)) - case ByteType | ShortType | IntegerType => Integer.parseInt(value) + case ByteType => Integer.parseInt(value).toByte + case ShortType => Integer.parseInt(value).toShort + case IntegerType => Integer.parseInt(value) case LongType => JLong.parseLong(value) - case FloatType | DoubleType => JDouble.parseDouble(value) + case FloatType => JDouble.parseDouble(value).toFloat + case DoubleType => JDouble.parseDouble(value) case _: DecimalType => Literal(new JBigDecimal(value)).value case DateType => Cast(Literal(value), DateType, Some(zoneId.getId)).eval() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala index bd908a36401f2..d87e0841dfe4c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala @@ -1095,6 +1095,23 @@ abstract class ParquetPartitionDiscoverySuite checkAnswer(readback, Row(0, "AA") :: Row(1, "-0") :: Nil) } } + + test("SPARK-40212: SparkSQL castPartValue does not properly handle byte, short, float") { + withTempDir { dir => + val data = Seq[(Int, Byte, Short, Float)]( + (1, 2, 3, 4.0f) + ) + data.toDF("a", "b", "c", "d") + .write + .mode("overwrite") + .partitionBy("b", "c", "d") + .parquet(dir.getCanonicalPath) + val res = spark.read + .schema("a INT, b BYTE, c SHORT, d FLOAT") + .parquet(dir.getCanonicalPath) + checkAnswer(res, Seq(Row(1, 2, 3, 4.0f))) + } + } } class ParquetV1PartitionDiscoverySuite extends ParquetPartitionDiscoverySuite { From 60bd91f257f601985de144fde84a019327cf23f2 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Mon, 29 Aug 2022 15:25:39 +0800 Subject: [PATCH 455/535] [SPARK-40247][SQL] Fix BitSet equality check ### What changes were proposed in this pull request? Spark's `BitSet` doesn't implement `equals()` and `hashCode()` but it is used in `FileSourceScanExec` for bucket pruning. ### Why are the changes needed? Without proper equality check reuse issues can occur. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added new UT. Closes #37696 from peter-toth/SPARK-40247-fix-bitset-equals. Authored-by: Peter Toth Signed-off-by: Wenchen Fan (cherry picked from commit 527ddece8fdbe703dcd239401c97ddb2c6122182) Signed-off-by: Wenchen Fan --- .../apache/spark/util/collection/BitSet.scala | 9 +++++++ .../org/apache/spark/sql/SQLQuerySuite.scala | 25 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala index 61386114997f6..6bb5058f5ed14 100644 --- a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala +++ b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala @@ -250,4 +250,13 @@ class BitSet(numBits: Int) extends Serializable { /** Return the number of longs it would take to hold numBits. */ private def bit2words(numBits: Int) = ((numBits - 1) >> 6) + 1 + + override def equals(other: Any): Boolean = other match { + case otherSet: BitSet => Arrays.equals(words, otherSet.words) + case _ => false + } + + override def hashCode(): Int = { + Arrays.hashCode(words) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index b0f2421d897ba..6ed6a85b8d1fd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4085,6 +4085,31 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } + test("SPARK-40247: Fix BitSet equals") { + withTable("td") { + testData + .withColumn("bucket", $"key" % 3) + .write + .mode(SaveMode.Overwrite) + .bucketBy(2, "bucket") + .format("parquet") + .saveAsTable("td") + val df = sql( + """ + |SELECT t1.key, t2.key, t3.key + |FROM td AS t1 + |JOIN td AS t2 ON t2.key = t1.key + |JOIN td AS t3 ON t3.key = t2.key + |WHERE t1.bucket = 1 AND t2.bucket = 1 AND t3.bucket = 1 + |""".stripMargin) + df.collect() + val reusedExchanges = collect(df.queryExecution.executedPlan) { + case r: ReusedExchangeExec => r + } + assert(reusedExchanges.size == 1) + } + } + test("SPARK-35331: Fix resolving original expression in RepartitionByExpression after aliased") { Seq("CLUSTER", "DISTRIBUTE").foreach { keyword => Seq("a", "substr(a, 0, 3)").foreach { expr => From c9710c50290be58ab5a044afe76c73c81f84b0a7 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Mon, 29 Aug 2022 22:39:29 +0800 Subject: [PATCH 456/535] [SPARK-40152][SQL][TESTS][FOLLOW-UP][3.3] Capture a different error message in 3.3 ### What changes were proposed in this pull request? This PR fixes the error message in branch-3.3. Different error message is thrown at the test added in https://github.com/apache/spark/commit/4b0c3bab1ab082565a051990bf45774f15962deb. ### Why are the changes needed? `branch-3.3` is broken because of the different error message being thrown (https://github.com/apache/spark/runs/8065373173?check_suite_focus=true). ``` [info] - elementAt *** FAILED *** (996 milliseconds) [info] (non-codegen mode) Expected error message is `The index 0 is invalid`, but `SQL array indices start at 1` found (ExpressionEvalHelper.scala:176) [info] org.scalatest.exceptions.TestFailedException: [info] at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:472) [info] at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:471) [info] at org.scalatest.funsuite.AnyFunSuite.newAssertionFailedException(AnyFunSuite.scala:1563) [info] at org.scalatest.Assertions.fail(Assertions.scala:933) [info] at org.scalatest.Assertions.fail$(Assertions.scala:929) [info] at org.scalatest.funsuite.AnyFunSuite.fail(AnyFunSuite.scala:1563) [info] at org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper.$anonfun$checkExceptionInExpression$1(ExpressionEvalHelper.scala:176) [info] at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18) [info] at org.scalatest.Assertions.withClue(Assertions.scala:1065) [info] at org.scalatest.Assertions.withClue$(Assertions.scala:1052) [info] at org.scalatest.funsuite.AnyFunSuite.withClue(AnyFunSuite.scala:1563) [info] at org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper.checkException$1(ExpressionEvalHelper.scala:163) [info] at org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper.checkExceptionInExpression(ExpressionEvalHelper.scala:183) [info] at org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper.checkExceptionInExpression$(ExpressionEvalHelper.scala:156) [info] at org.apache.spark.sql.catalyst.expressions.CollectionExpressionsSuite.checkExceptionInExpression(CollectionExpressionsSuite.scala:39) [info] at org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper.checkExceptionInExpression(ExpressionEvalHelper.scala:153) [info] at org.apache.spark.sql.catalyst.expressions.ExpressionEvalHelper.checkExceptionInExpression$(ExpressionEvalHelper.scala:150) [info] at org.apache.spark.sql.catalyst.expressions.CollectionExpressionsSuite.checkExceptionInExpression(CollectionExpressionsSuite.scala:39) [info] at org.apache.spark.sql.catalyst.expressions.CollectionExpressionsSuite.$anonfun$new$365(CollectionExpressionsSuite.scala:1555) [info] at org.apache.spark.sql.catalyst.plans.SQLHelper.withSQLConf(SQLHelper.scala:54) [info] at org.apache.spark.sql.catalyst.plans.SQLHelper.withSQLConf$(SQLHelper.scala:38) ``` ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? CI in this PR should test it out. Closes #37708 from HyukjinKwon/SPARK-40152-3.3. Authored-by: Hyukjin Kwon Signed-off-by: Yuming Wang --- .../sql/catalyst/expressions/CollectionExpressionsSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index e121dab4c3398..c5f5425978430 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -1553,7 +1553,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper Literal.create(null, StringType)), Literal(1), outOfBoundValue), null) checkExceptionInExpression[Exception]( - ElementAt(str, Literal(0), outOfBoundValue), "The index 0 is invalid") + ElementAt(str, Literal(0), outOfBoundValue), "SQL array indices start at 1") } } From e46d2e2d476e85024f1c53fdaf446fdd2e293d28 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 30 Aug 2022 16:25:26 +0900 Subject: [PATCH 457/535] [SPARK-40270][PS] Make 'compute.max_rows' as None working in DataFrame.style This PR make `compute.max_rows` option as `None` working in `DataFrame.style`, as expected instead of throwing an exception., by collecting it all to a pandas DataFrame. To make the configuration working as expected. Yes. ```python import pyspark.pandas as ps ps.set_option("compute.max_rows", None) ps.get_option("compute.max_rows") ps.range(1).style ``` **Before:** ``` Traceback (most recent call last): File "", line 1, in File "/.../spark/python/pyspark/pandas/frame.py", line 3656, in style pdf = self.head(max_results + 1)._to_internal_pandas() TypeError: unsupported operand type(s) for +: 'NoneType' and 'int' ``` **After:** ``` ``` Manually tested, and unittest was added. Closes #37718 from HyukjinKwon/SPARK-40270. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 0f0e8cc26b6c80cc179368e3009d4d6c88103a64) Signed-off-by: Hyukjin Kwon --- python/pyspark/pandas/frame.py | 16 +++++++------ python/pyspark/pandas/tests/test_dataframe.py | 23 +++++++++++++++++++ 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py index 6e8f69ad6e7ac..e9c5cbb9c1e12 100644 --- a/python/pyspark/pandas/frame.py +++ b/python/pyspark/pandas/frame.py @@ -3459,19 +3459,21 @@ def style(self) -> "Styler": Property returning a Styler object containing methods for building a styled HTML representation for the DataFrame. - .. note:: currently it collects top 1000 rows and return its - pandas `pandas.io.formats.style.Styler` instance. - Examples -------- >>> ps.range(1001).style # doctest: +SKIP """ max_results = get_option("compute.max_rows") - pdf = self.head(max_results + 1)._to_internal_pandas() - if len(pdf) > max_results: - warnings.warn("'style' property will only use top %s rows." % max_results, UserWarning) - return pdf.head(max_results).style + if max_results is not None: + pdf = self.head(max_results + 1)._to_internal_pandas() + if len(pdf) > max_results: + warnings.warn( + "'style' property will only use top %s rows." % max_results, UserWarning + ) + return pdf.head(max_results).style + else: + return self._to_internal_pandas().style def set_index( self, diff --git a/python/pyspark/pandas/tests/test_dataframe.py b/python/pyspark/pandas/tests/test_dataframe.py index 1cc03bf06f8ca..0a7eda77564c3 100644 --- a/python/pyspark/pandas/tests/test_dataframe.py +++ b/python/pyspark/pandas/tests/test_dataframe.py @@ -6375,6 +6375,29 @@ def test_cov(self): psdf = ps.from_pandas(pdf) self.assert_eq(pdf.cov(), psdf.cov()) + def test_style(self): + # Currently, the `style` function returns a pandas object `Styler` as it is, + # processing only the number of rows declared in `compute.max_rows`. + # So it's a bit vague to test, but we are doing minimal tests instead of not testing at all. + pdf = pd.DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"]) + psdf = ps.from_pandas(pdf) + + def style_negative(v, props=""): + return props if v < 0 else None + + def check_style(): + # If the value is negative, the text color will be displayed as red. + pdf_style = pdf.style.applymap(style_negative, props="color:red;") + psdf_style = psdf.style.applymap(style_negative, props="color:red;") + + # Test whether the same shape as pandas table is created including the color. + self.assert_eq(pdf_style.to_latex(), psdf_style.to_latex()) + + check_style() + + with ps.option_context("compute.max_rows", None): + check_style() + if __name__ == "__main__": from pyspark.pandas.tests.test_dataframe import * # noqa: F401 From c04aa36b7713a7ebaf368fc2ad4065478e264d85 Mon Sep 17 00:00:00 2001 From: Fu Chen Date: Wed, 31 Aug 2022 13:32:17 +0800 Subject: [PATCH 458/535] [SPARK-39896][SQL] UnwrapCastInBinaryComparison should work when the literal of In/InSet downcast failed ### Why are the changes needed? This PR aims to fix the case ```scala sql("create table t1(a decimal(3, 0)) using parquet") sql("insert into t1 values(100), (10), (1)") sql("select * from t1 where a in(100000, 1.00)").show ``` ``` java.lang.RuntimeException: After applying rule org.apache.spark.sql.catalyst.optimizer.UnwrapCastInBinaryComparison in batch Operator Optimization before Inferring Filters, the structural integrity of the plan is broken. at org.apache.spark.sql.errors.QueryExecutionErrors$.structuralIntegrityIsBrokenAfterApplyingRuleError(QueryExecutionErrors.scala:1325) ``` 1. the rule `UnwrapCastInBinaryComparison` transforms the expression `In` to Equals ``` CAST(a as decimal(12,2)) IN (100000.00,1.00) OR( CAST(a as decimal(12,2)) = 100000.00, CAST(a as decimal(12,2)) = 1.00 ) ``` 2. using `UnwrapCastInBinaryComparison.unwrapCast()` to optimize each `EqualTo` ``` // Expression1 CAST(a as decimal(12,2)) = 100000.00 => CAST(a as decimal(12,2)) = 100000.00 // Expression2 CAST(a as decimal(12,2)) = 1.00 => a = 1 ``` 3. return the new unwrapped cast expression `In` ``` a IN (100000.00, 1.00) ``` Before this PR: the method `UnwrapCastInBinaryComparison.unwrapCast()` returns the original expression when downcasting to a decimal type fails (the `Expression1`),returns the original expression if the downcast to the decimal type succeeds (the `Expression2`), the two expressions have different data type which would break the structural integrity ``` a IN (100000.00, 1.00) | | decimal(12, 2) | decimal(3, 0) ``` After this PR: the PR transform the downcasting failed expression to `falseIfNotNull(fromExp)` ``` ((isnull(a) AND null) OR a IN (1.00) ``` ### Does this PR introduce _any_ user-facing change? No, only bug fix. ### How was this patch tested? Unit test. Closes #37439 from cfmcgrady/SPARK-39896. Authored-by: Fu Chen Signed-off-by: Wenchen Fan (cherry picked from commit 6e62b93f3d1ef7e2d6be0a3bb729ab9b2d55a36d) Signed-off-by: Wenchen Fan --- .../UnwrapCastInBinaryComparison.scala | 131 +++++++++--------- .../UnwrapCastInBinaryComparisonSuite.scala | 68 ++++++--- 2 files changed, 113 insertions(+), 86 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala index 94e27379b7465..f4a92760d225c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.catalyst.optimizer -import scala.collection.immutable.HashSet import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.expressions._ @@ -145,80 +144,28 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] { case in @ In(Cast(fromExp, toType: NumericType, _, _), list @ Seq(firstLit, _*)) if canImplicitlyCast(fromExp, toType, firstLit.dataType) && in.inSetConvertible => - // There are 3 kinds of literals in the list: - // 1. null literals - // 2. The literals that can cast to fromExp.dataType - // 3. The literals that cannot cast to fromExp.dataType - // null literals is special as we can cast null literals to any data type. - val (nullList, canCastList, cannotCastList) = - (ArrayBuffer[Literal](), ArrayBuffer[Literal](), ArrayBuffer[Expression]()) - list.foreach { - case lit @ Literal(null, _) => nullList += lit - case lit @ NonNullLiteral(_, _) => - unwrapCast(EqualTo(in.value, lit)) match { - case EqualTo(_, unwrapLit: Literal) => canCastList += unwrapLit - case e @ And(IsNull(_), Literal(null, BooleanType)) => cannotCastList += e - case _ => throw new IllegalStateException("Illegal unwrap cast result found.") - } - case _ => throw new IllegalStateException("Illegal value found in in.list.") - } - - // return original expression when in.list contains only null values. - if (canCastList.isEmpty && cannotCastList.isEmpty) { - exp - } else { - // cast null value to fromExp.dataType, to make sure the new return list is in the same data - // type. - val newList = nullList.map(lit => Cast(lit, fromExp.dataType)) ++ canCastList - val unwrapIn = In(fromExp, newList.toSeq) - cannotCastList.headOption match { - case None => unwrapIn - // since `cannotCastList` are all the same, - // convert to a single value `And(IsNull(_), Literal(null, BooleanType))`. - case Some(falseIfNotNull @ And(IsNull(_), Literal(null, BooleanType))) - if cannotCastList.map(_.canonicalized).distinct.length == 1 => - Or(falseIfNotNull, unwrapIn) - case _ => exp - } + val buildIn = { + (nullList: ArrayBuffer[Literal], canCastList: ArrayBuffer[Literal]) => + // cast null value to fromExp.dataType, to make sure the new return list is in the same + // data type. + val newList = nullList.map(lit => Cast(lit, fromExp.dataType)) ++ canCastList + In(fromExp, newList.toSeq) } + simplifyIn(fromExp, toType, list, buildIn).getOrElse(exp) // The same with `In` expression, the analyzer makes sure that the hset of InSet is already of // the same data type, so simply check `fromExp.dataType` can implicitly cast to `toType` and // both `fromExp.dataType` and `toType` is numeric type or not. - case inSet @ InSet(Cast(fromExp, toType: NumericType, _, _), hset) + case InSet(Cast(fromExp, toType: NumericType, _, _), hset) if hset.nonEmpty && canImplicitlyCast(fromExp, toType, toType) => - - // The same with `In`, there are 3 kinds of literals in the hset: - // 1. null literals - // 2. The literals that can cast to fromExp.dataType - // 3. The literals that cannot cast to fromExp.dataType - var (nullSet, canCastSet, cannotCastSet) = - (HashSet[Any](), HashSet[Any](), HashSet[Expression]()) - hset.map(value => Literal.create(value, toType)) - .foreach { - case lit @ Literal(null, _) => nullSet += lit.value - case lit @ NonNullLiteral(_, _) => - unwrapCast(EqualTo(inSet.child, lit)) match { - case EqualTo(_, unwrapLit: Literal) => canCastSet += unwrapLit.value - case e @ And(IsNull(_), Literal(null, BooleanType)) => cannotCastSet += e - case _ => throw new IllegalStateException("Illegal unwrap cast result found.") - } - case _ => throw new IllegalStateException("Illegal value found in hset.") - } - - if (canCastSet.isEmpty && cannotCastSet.isEmpty) { - exp - } else { - val unwrapInSet = InSet(fromExp, nullSet ++ canCastSet) - cannotCastSet.headOption match { - case None => unwrapInSet - // since `cannotCastList` are all the same, - // convert to a single value `And(IsNull(_), Literal(null, BooleanType))`. - case Some(falseIfNotNull @ And(IsNull(_), Literal(null, BooleanType))) - if cannotCastSet.map(_.canonicalized).size == 1 => Or(falseIfNotNull, unwrapInSet) - case _ => exp - } - } + val buildInSet = + (nullList: ArrayBuffer[Literal], canCastList: ArrayBuffer[Literal]) => + InSet(fromExp, (nullList ++ canCastList).map(_.value).toSet) + simplifyIn( + fromExp, + toType, + hset.map(v => Literal.create(v, toType)).toSeq, + buildInSet).getOrElse(exp) case _ => exp } @@ -346,6 +293,52 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] { } } + private def simplifyIn[IN <: Expression]( + fromExp: Expression, + toType: NumericType, + list: Seq[Expression], + buildExpr: (ArrayBuffer[Literal], ArrayBuffer[Literal]) => IN): Option[Expression] = { + + // There are 3 kinds of literals in the list: + // 1. null literals + // 2. The literals that can cast to fromExp.dataType + // 3. The literals that cannot cast to fromExp.dataType + // Note that: + // - null literals are special as we can cast null literals to any data type + // - for 3, we have three cases + // 1). the literal cannot cast to fromExp.dataType, and there is no min/max for the fromType, + // for instance: + // `cast(input[2, decimal(5,2), true] as decimal(10,4)) = 123456.1234` + // 2). the literal value is out of fromType range, for instance: + // `cast(input[0, smallint, true] as bigint) = 2147483647` + // 3). the literal value is rounded up/down after casting to `fromType`, for instance: + // `cast(input[1, float, true] as double) = 3.14` + // note that 3.14 will be rounded to 3.14000010... after casting to float + + val (nullList, canCastList) = (ArrayBuffer[Literal](), ArrayBuffer[Literal]()) + val fromType = fromExp.dataType + val ordering = toType.ordering.asInstanceOf[Ordering[Any]] + + list.foreach { + case lit @ Literal(null, _) => nullList += lit + case NonNullLiteral(value, _) => + val newValue = Cast(Literal(value), fromType, ansiEnabled = false).eval() + val valueRoundTrip = Cast(Literal(newValue, fromType), toType).eval() + if (newValue != null && ordering.compare(value, valueRoundTrip) == 0) { + canCastList += Literal(newValue, fromType) + } + } + + if (nullList.isEmpty && canCastList.isEmpty) { + // only have cannot cast to fromExp.dataType literals + Option(falseIfNotNull(fromExp)) + } else { + val unwrapExpr = buildExpr(nullList, canCastList) + Option(unwrapExpr) + } + } + + /** * Check if the input `fromExp` can be safely cast to `toType` without any loss of precision, * i.e., the conversion is injective. Note this only handles the case when both sides are of diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala index a51be57db6fa7..5e9325d7c6c81 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala @@ -247,18 +247,6 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp val intLit = Literal.create(null, IntegerType) val shortLit = Literal.create(null, ShortType) - def checkInAndInSet(in: In, expected: Expression): Unit = { - assertEquivalent(in, expected) - val toInSet = (in: In) => InSet(in.value, HashSet() ++ in.list.map(_.eval())) - val expectedInSet = expected match { - case expectedIn: In => - toInSet(expectedIn) - case Or(falseIfNotNull: And, expectedIn: In) => - Or(falseIfNotNull, toInSet(expectedIn)) - } - assertEquivalent(toInSet(in), expectedInSet) - } - checkInAndInSet( In(Cast(f, LongType), Seq(1.toLong, 2.toLong, 3.toLong)), f.in(1.toShort, 2.toShort, 3.toShort)) @@ -266,12 +254,12 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp // in.list contains the value which out of `fromType` range checkInAndInSet( In(Cast(f, LongType), Seq(1.toLong, Int.MaxValue.toLong, Long.MaxValue)), - Or(falseIfNotNull(f), f.in(1.toShort))) + f.in(1.toShort)) // in.list only contains the value which out of `fromType` range checkInAndInSet( In(Cast(f, LongType), Seq(Int.MaxValue.toLong, Long.MaxValue)), - Or(falseIfNotNull(f), f.in())) + falseIfNotNull(f)) // in.list is empty checkInAndInSet( @@ -279,17 +267,51 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp // in.list contains null value checkInAndInSet( - In(Cast(f, IntegerType), Seq(intLit)), In(Cast(f, IntegerType), Seq(intLit))) + In(Cast(f, IntegerType), Seq(intLit)), f.in(shortLit)) checkInAndInSet( - In(Cast(f, IntegerType), Seq(intLit, intLit)), In(Cast(f, IntegerType), Seq(intLit, intLit))) + In(Cast(f, IntegerType), Seq(intLit, intLit)), f.in(shortLit, shortLit)) checkInAndInSet( In(Cast(f, IntegerType), Seq(intLit, 1)), f.in(shortLit, 1.toShort)) checkInAndInSet( In(Cast(f, LongType), Seq(longLit, 1.toLong, Long.MaxValue)), - Or(falseIfNotNull(f), f.in(shortLit, 1.toShort)) + f.in(shortLit, 1.toShort) + ) + checkInAndInSet( + In(Cast(f, LongType), Seq(longLit, Long.MaxValue)), + f.in(shortLit) ) } + test("SPARK-39896: unwrap cast when the literal of In/InSet downcast failed") { + val decimalValue = decimal2(123456.1234) + val decimalValue2 = decimal2(100.20) + checkInAndInSet( + In(castDecimal2(f3), Seq(decimalValue, decimalValue2)), + f3.in(decimal(decimalValue2))) + } + + test("SPARK-39896: unwrap cast when the literal of In/Inset has round up or down") { + + val doubleValue = 1.0 + val doubleValue1 = 100.6 + checkInAndInSet( + In(castDouble(f), Seq(doubleValue1, doubleValue)), + f.in(doubleValue.toShort)) + + // Cases for rounding up: 3.14 will be rounded to 3.14000010... after casting to float + val doubleValue2 = 3.14 + checkInAndInSet( + In(castDouble(f2), Seq(doubleValue2, doubleValue)), + f2.in(doubleValue.toFloat)) + + // Another case: 400.5678 is rounded up to 400.57 + val decimalValue1 = decimal2(400.5678) + val decimalValue2 = decimal2(1.0) + checkInAndInSet( + In(castDecimal2(f3), Seq(decimalValue1, decimalValue2)), + f3.in(decimal(decimalValue2))) + } + test("SPARK-36130: unwrap In should skip when in.list contains an expression that " + "is not literal") { val add = Cast(f2, DoubleType) + 1.0d @@ -374,4 +396,16 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp }) } } + + private def checkInAndInSet(in: In, expected: Expression): Unit = { + assertEquivalent(in, expected) + val toInSet = (in: In) => InSet(in.value, HashSet() ++ in.list.map(_.eval())) + val expectedInSet = expected match { + case expectedIn: In => + toInSet(expectedIn) + case falseIfNotNull: And => + falseIfNotNull + } + assertEquivalent(toInSet(in), expectedInSet) + } } From 03556ca5057b44add18109b96a8c5c25f28cb6d7 Mon Sep 17 00:00:00 2001 From: Weiwei Yang Date: Wed, 31 Aug 2022 22:26:43 -0700 Subject: [PATCH 459/535] [SPARK-40187][DOCS] Add `Apache YuniKorn` scheduler docs ### What changes were proposed in this pull request? Add a section under [customized-kubernetes-schedulers-for-spark-on-kubernetes](https://spark.apache.org/docs/latest/running-on-kubernetes.html#customized-kubernetes-schedulers-for-spark-on-kubernetes) to explain how to run Spark with Apache YuniKorn. This is based on the review comments from #35663. ### Why are the changes needed? Explain how to run Spark with Apache YuniKorn ### Does this PR introduce _any_ user-facing change? No Closes #37622 from yangwwei/SPARK-40187. Authored-by: Weiwei Yang Signed-off-by: Dongjoon Hyun (cherry picked from commit 4b1877398410fb23a285ed0d2c6b34711f52fc43) Signed-off-by: Dongjoon Hyun --- docs/running-on-kubernetes.md | 36 +++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 9659a6ebe2f8b..abc362c94c5db 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -1809,6 +1809,42 @@ spec: queue: default ``` +#### Using Apache YuniKorn as Customized Scheduler for Spark on Kubernetes + +[Apache YuniKorn](https://yunikorn.apache.org/) is a resource scheduler for Kubernetes that provides advanced batch scheduling +capabilities, such as job queuing, resource fairness, min/max queue capacity and flexible job ordering policies. +For available Apache YuniKorn features, please refer to [core features](https://yunikorn.apache.org/docs/get_started/core_features). + +##### Prerequisites + +Install Apache YuniKorn: + +```bash +helm repo add yunikorn https://apache.github.io/yunikorn-release +helm repo update +kubectl create namespace yunikorn +helm install yunikorn yunikorn/yunikorn --namespace yunikorn --version 1.0.0 +``` + +The above steps will install YuniKorn v1.0.0 on an existing Kubernetes cluster. + +##### Get started + +Submit Spark jobs with the following extra options: + +```bash +--conf spark.kubernetes.scheduler.name=yunikorn +--conf spark.kubernetes.driver.annotation.yunikorn.apache.org/app-id={{APP_ID}} +--conf spark.kubernetes.executor.annotation.yunikorn.apache.org/app-id={{APP_ID}} +``` + +Note that `{{APP_ID}}` is the built-in variable that will be substituted with Spark job ID automatically. +With the above configuration, the job will be scheduled by YuniKorn scheduler instead of the default Kubernetes scheduler. + +##### Limitations + +- Apache YuniKorn currently only supports x86 Linux, running Spark on ARM64 (or other platform) with Apache YuniKorn is not supported at present. + ### Stage Level Scheduling Overview Stage level scheduling is supported on Kubernetes when dynamic allocation is enabled. This also requires spark.dynamicAllocation.shuffleTracking.enabled to be enabled since Kubernetes doesn't support an external shuffle service at this time. The order in which containers for different profiles is requested from Kubernetes is not guaranteed. Note that since dynamic allocation on Kubernetes requires the shuffle tracking feature, this means that executors from previous stages that used a different ResourceProfile may not idle timeout due to having shuffle data on them. This could result in using more cluster resources and in the worst case if there are no remaining resources on the Kubernetes cluster then Spark could potentially hang. You may consider looking at config spark.dynamicAllocation.shuffleTracking.timeout to set a timeout, but that could result in data having to be recomputed if the shuffle data is really needed. From 7c19df6fb2f684a80ea3366fd365a6bbc13421b3 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 1 Sep 2022 09:26:45 -0700 Subject: [PATCH 460/535] [SPARK-40302][K8S][TESTS] Add `YuniKornSuite` This PR aims the followings. 1. Add `YuniKornSuite` integration test suite which extends `KubernetesSuite` on Apache YuniKorn scheduler. 2. Support `--default-exclude-tags` command to override `test.default.exclude.tags`. To improve test coverage. No. This is a test suite addition. Since this requires `Apache YuniKorn` installation, the test suite is disabled by default. So, CI K8s integration test should pass without running this suite. In order to run the tests, we need to override `test.default.exclude.tags` like the following. **SBT** ``` $ build/sbt -Psparkr -Pkubernetes -Pkubernetes-integration-tests \ -Dspark.kubernetes.test.deployMode=docker-desktop "kubernetes-integration-tests/test" \ -Dtest.exclude.tags=minikube,local \ -Dtest.default.exclude.tags= ``` **MAVEN** ``` $ dev/dev-run-integration-tests.sh --deploy-mode docker-desktop \ --exclude-tag minikube,local \ --default-exclude-tags '' ``` Closes #37753 from dongjoon-hyun/SPARK-40302. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit b2e38e16bfc547a62957e0a67085985b3c65d525) Signed-off-by: Dongjoon Hyun --- project/SparkBuild.scala | 3 +- .../dev/dev-run-integration-tests.sh | 10 +++++++ .../kubernetes/integration-tests/pom.xml | 5 ++-- .../k8s/integrationtest/YuniKornTag.java | 27 +++++++++++++++++ .../k8s/integrationtest/YuniKornSuite.scala | 29 +++++++++++++++++++ 5 files changed, 71 insertions(+), 3 deletions(-) create mode 100644 resource-managers/kubernetes/integration-tests/src/test/java/org/apache/spark/deploy/k8s/integrationtest/YuniKornTag.java create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 934fa4a1fddd9..f830e64edfce7 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -1135,7 +1135,8 @@ object CopyDependencies { object TestSettings { import BuildCommons._ - private val defaultExcludedTags = Seq("org.apache.spark.tags.ChromeUITest") + private val defaultExcludedTags = Seq("org.apache.spark.tags.ChromeUITest", + "org.apache.spark.deploy.k8s.integrationtest.YuniKornTag") lazy val settings = Seq ( // Fork new JVMs for tests and set Java options for those diff --git a/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh b/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh index 5f94203c0e2d4..f5f93adeddf6f 100755 --- a/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh +++ b/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh @@ -37,6 +37,7 @@ SERVICE_ACCOUNT= CONTEXT= INCLUDE_TAGS="k8s" EXCLUDE_TAGS= +DEFAULT_EXCLUDE_TAGS="N/A" JAVA_VERSION="8" BUILD_DEPENDENCIES_MVN_FLAG="-am" HADOOP_PROFILE="hadoop-3" @@ -101,6 +102,10 @@ while (( "$#" )); do EXCLUDE_TAGS="$2" shift ;; + --default-exclude-tags) + DEFAULT_EXCLUDE_TAGS="$2" + shift + ;; --base-image-name) BASE_IMAGE_NAME="$2" shift @@ -180,6 +185,11 @@ then properties=( ${properties[@]} -Dtest.exclude.tags=$EXCLUDE_TAGS ) fi +if [ "$DEFAULT_EXCLUDE_TAGS" != "N/A" ]; +then + properties=( ${properties[@]} -Dtest.default.exclude.tags=$DEFAULT_EXCLUDE_TAGS ) +fi + BASE_IMAGE_NAME=${BASE_IMAGE_NAME:-spark} JVM_IMAGE_NAME=${JVM_IMAGE_NAME:-${BASE_IMAGE_NAME}} PYTHON_IMAGE_NAME=${PYTHON_IMAGE_NAME:-${BASE_IMAGE_NAME}-py} diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 40e578f9a7eba..516c92b1df6b0 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -46,6 +46,7 @@ Dockerfile.java17 + org.apache.spark.deploy.k8s.integrationtest.YuniKornTag **/*Volcano*.scala @@ -137,7 +138,7 @@ ${spark.kubernetes.test.dockerFile} --test-exclude-tags - "${test.exclude.tags}" + "${test.exclude.tags},${test.default.exclude.tags}" @@ -179,7 +180,7 @@ ${spark.kubernetes.test.pythonImage} ${spark.kubernetes.test.rImage} - ${test.exclude.tags} + ${test.exclude.tags},${test.default.exclude.tags} ${test.include.tags} diff --git a/resource-managers/kubernetes/integration-tests/src/test/java/org/apache/spark/deploy/k8s/integrationtest/YuniKornTag.java b/resource-managers/kubernetes/integration-tests/src/test/java/org/apache/spark/deploy/k8s/integrationtest/YuniKornTag.java new file mode 100644 index 0000000000000..cc21cad7aad13 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/java/org/apache/spark/deploy/k8s/integrationtest/YuniKornTag.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.k8s.integrationtest; + +import java.lang.annotation.Retention; +import java.lang.annotation.Target; +import java.lang.annotation.RetentionPolicy; +import java.lang.annotation.ElementType; + +@org.scalatest.TagAnnotation +@Retention(RetentionPolicy.RUNTIME) +@Target({ElementType.METHOD, ElementType.TYPE}) +public @interface YuniKornTag {} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala new file mode 100644 index 0000000000000..5a3c063efa14b --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.k8s.integrationtest + +@YuniKornTag +class YuniKornSuite extends KubernetesSuite { + + override protected def setUpTest(): Unit = { + super.setUpTest() + sparkAppConf + .set("spark.kubernetes.scheduler.name", "yunikorn") + .set("spark.kubernetes.driver.annotation.yunikorn.apache.org/app-id", "{{APP_ID}}") + .set("spark.kubernetes.executor.annotation.yunikorn.apache.org/app-id", "{{APP_ID}}") + } +} From 399c397e7035665c928b1d439a860f9e7b1ce3b3 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 1 Sep 2022 09:34:55 -0700 Subject: [PATCH 461/535] [SPARK-40304][K8S][TESTS] Add `decomTestTag` to K8s Integration Test ### What changes were proposed in this pull request? This PR aims to add a new test tag, `decomTestTag`, to K8s Integration Test. ### Why are the changes needed? Decommission-related tests took over 6 minutes (`363s`). It would be helpful we can run them selectively. ``` [info] - Test basic decommissioning (44 seconds, 51 milliseconds) [info] - Test basic decommissioning with shuffle cleanup (44 seconds, 450 milliseconds) [info] - Test decommissioning with dynamic allocation & shuffle cleanups (2 minutes, 43 seconds) [info] - Test decommissioning timeouts (44 seconds, 389 milliseconds) [info] - SPARK-37576: Rolling decommissioning (1 minute, 8 seconds) ``` ### Does this PR introduce _any_ user-facing change? No, this is a test-only change. ### How was this patch tested? Pass the CIs and test manually. ``` $ build/sbt -Psparkr -Pkubernetes -Pkubernetes-integration-tests \ -Dspark.kubernetes.test.deployMode=docker-desktop "kubernetes-integration-tests/test" \ -Dtest.exclude.tags=minikube,local,decom ... [info] KubernetesSuite: [info] - Run SparkPi with no resources (12 seconds, 441 milliseconds) [info] - Run SparkPi with no resources & statefulset allocation (11 seconds, 949 milliseconds) [info] - Run SparkPi with a very long application name. (11 seconds, 999 milliseconds) [info] - Use SparkLauncher.NO_RESOURCE (11 seconds, 846 milliseconds) [info] - Run SparkPi with a master URL without a scheme. (11 seconds, 176 milliseconds) [info] - Run SparkPi with an argument. (11 seconds, 868 milliseconds) [info] - Run SparkPi with custom labels, annotations, and environment variables. (11 seconds, 858 milliseconds) [info] - All pods have the same service account by default (11 seconds, 5 milliseconds) [info] - Run extraJVMOptions check on driver (5 seconds, 757 milliseconds) [info] - Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j2.properties (12 seconds, 467 milliseconds) [info] - Run SparkPi with env and mount secrets. (21 seconds, 119 milliseconds) [info] - Run PySpark on simple pi.py example (13 seconds, 129 milliseconds) [info] - Run PySpark to test a pyfiles example (14 seconds, 937 milliseconds) [info] - Run PySpark with memory customization (12 seconds, 195 milliseconds) [info] - Run in client mode. (11 seconds, 343 milliseconds) [info] - Start pod creation from template (11 seconds, 975 milliseconds) [info] - SPARK-38398: Schedule pod creation from template (11 seconds, 901 milliseconds) [info] - Run SparkR on simple dataframe.R example (14 seconds, 305 milliseconds) ... ``` Closes #37755 from dongjoon-hyun/SPARK-40304. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit fd0498f81df72c196f19a5b26053660f6f3f4d70) Signed-off-by: Dongjoon Hyun --- .../k8s/integrationtest/DecommissionSuite.scala | 13 +++++++------ .../k8s/integrationtest/KubernetesSuite.scala | 1 + 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala index 5d1a57fb46ef2..81f4660afe9ea 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala @@ -34,7 +34,7 @@ import org.apache.spark.internal.config.PLUGINS private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite => import DecommissionSuite._ - import KubernetesSuite.k8sTestTag + import KubernetesSuite.{decomTestTag, k8sTestTag} def runDecommissionTest(f: () => Unit): Unit = { val logConfFilePath = s"${sparkHomeDir.toFile}/conf/log4j2.properties" @@ -61,7 +61,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite => } } - test("Test basic decommissioning", k8sTestTag) { + test("Test basic decommissioning", k8sTestTag, decomTestTag) { runDecommissionTest(() => { sparkAppConf .set(config.DECOMMISSION_ENABLED.key, "true") @@ -91,7 +91,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite => }) } - test("Test basic decommissioning with shuffle cleanup", k8sTestTag) { + test("Test basic decommissioning with shuffle cleanup", k8sTestTag, decomTestTag) { runDecommissionTest(() => { sparkAppConf .set(config.DECOMMISSION_ENABLED.key, "true") @@ -122,7 +122,8 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite => }) } - test("Test decommissioning with dynamic allocation & shuffle cleanups", k8sTestTag) { + test("Test decommissioning with dynamic allocation & shuffle cleanups", + k8sTestTag, decomTestTag) { runDecommissionTest(() => { sparkAppConf .set(config.DECOMMISSION_ENABLED.key, "true") @@ -183,7 +184,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite => }) } - test("Test decommissioning timeouts", k8sTestTag) { + test("Test decommissioning timeouts", k8sTestTag, decomTestTag) { runDecommissionTest(() => { sparkAppConf .set(config.DECOMMISSION_ENABLED.key, "true") @@ -216,7 +217,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite => }) } - test("SPARK-37576: Rolling decommissioning", k8sTestTag) { + test("SPARK-37576: Rolling decommissioning", k8sTestTag, decomTestTag) { runDecommissionTest(() => { sparkAppConf .set("spark.kubernetes.container.image", pyImage) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala index 3db51b2860023..3d7a9313031b0 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala @@ -612,6 +612,7 @@ private[spark] object KubernetesSuite { val k8sTestTag = Tag("k8s") val localTestTag = Tag("local") val schedulingTestTag = Tag("schedule") + val decomTestTag = Tag("decom") val rTestTag = Tag("r") val MinikubeTag = Tag("minikube") val SPARK_PI_MAIN_CLASS: String = "org.apache.spark.examples.SparkPi" From 284954a12576076965b3322656e08d09d76094f3 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 3 Sep 2022 16:14:43 +0800 Subject: [PATCH 462/535] Revert "[SPARK-33861][SQL] Simplify conditional in predicate" This reverts commit 32d4a2b and 3aa4e11. Closes #37729 from wangyum/SPARK-33861. Authored-by: Yuming Wang Signed-off-by: Yuming Wang (cherry picked from commit 43cbdc6ec9dbcf9ebe0b48e14852cec4af18b4ec) Signed-off-by: Yuming Wang --- .../sql/catalyst/optimizer/Optimizer.scala | 1 - .../SimplifyConditionalsInPredicate.scala | 80 ------ .../sql/catalyst/rules/RuleIdCollection.scala | 1 - ...SimplifyConditionalsInPredicateSuite.scala | 249 ----------------- .../q34.sf100/explain.txt | 4 +- .../q34.sf100/simplified.txt | 2 +- .../approved-plans-modified/q34/explain.txt | 4 +- .../q34/simplified.txt | 2 +- .../q53.sf100/explain.txt | 2 +- .../approved-plans-modified/q53/explain.txt | 2 +- .../q63.sf100/explain.txt | 2 +- .../approved-plans-modified/q63/explain.txt | 2 +- .../q73.sf100/explain.txt | 4 +- .../q73.sf100/simplified.txt | 2 +- .../approved-plans-modified/q73/explain.txt | 4 +- .../q73/simplified.txt | 2 +- .../q89.sf100/explain.txt | 2 +- .../approved-plans-modified/q89/explain.txt | 2 +- .../approved-plans-v1_4/q21.sf100/explain.txt | 2 +- .../approved-plans-v1_4/q21/explain.txt | 2 +- .../q34.sf100/simplified.txt | 2 +- .../approved-plans-v1_4/q34/explain.txt | 264 +++++++++--------- .../approved-plans-v1_4/q34/simplified.txt | 2 +- .../q39a.sf100/explain.txt | 4 +- .../q39a.sf100/simplified.txt | 4 +- .../approved-plans-v1_4/q39a/explain.txt | 4 +- .../approved-plans-v1_4/q39a/simplified.txt | 4 +- .../q39b.sf100/explain.txt | 4 +- .../q39b.sf100/simplified.txt | 4 +- .../approved-plans-v1_4/q39b/explain.txt | 4 +- .../approved-plans-v1_4/q39b/simplified.txt | 4 +- .../approved-plans-v1_4/q47.sf100/explain.txt | 2 +- .../approved-plans-v1_4/q47/explain.txt | 2 +- .../approved-plans-v1_4/q53.sf100/explain.txt | 2 +- .../approved-plans-v1_4/q53/explain.txt | 2 +- .../approved-plans-v1_4/q57.sf100/explain.txt | 2 +- .../approved-plans-v1_4/q57/explain.txt | 2 +- .../approved-plans-v1_4/q63.sf100/explain.txt | 2 +- .../approved-plans-v1_4/q63/explain.txt | 2 +- .../approved-plans-v1_4/q73.sf100/explain.txt | 4 +- .../q73.sf100/simplified.txt | 2 +- .../approved-plans-v1_4/q73/explain.txt | 4 +- .../approved-plans-v1_4/q73/simplified.txt | 2 +- .../approved-plans-v1_4/q89.sf100/explain.txt | 2 +- .../approved-plans-v1_4/q89/explain.txt | 2 +- .../approved-plans-v2_7/q34.sf100/explain.txt | 4 +- .../q34.sf100/simplified.txt | 2 +- .../approved-plans-v2_7/q34/explain.txt | 4 +- .../approved-plans-v2_7/q34/simplified.txt | 2 +- .../approved-plans-v2_7/q47.sf100/explain.txt | 2 +- .../approved-plans-v2_7/q47/explain.txt | 2 +- .../approved-plans-v2_7/q57.sf100/explain.txt | 2 +- .../approved-plans-v2_7/q57/explain.txt | 2 +- 53 files changed, 198 insertions(+), 525 deletions(-) delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala delete mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 4807824ee7119..3f756ea459cd2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -115,7 +115,6 @@ abstract class Optimizer(catalogManager: CatalogManager) RemoveDispensableExpressions, SimplifyBinaryComparison, ReplaceNullWithFalseInPredicate, - SimplifyConditionalsInPredicate, PruneFilters, SimplifyCasts, SimplifyCaseConversionExpressions, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala deleted file mode 100644 index 34773b24cacbe..0000000000000 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicate.scala +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.optimizer - -import org.apache.spark.sql.catalyst.expressions.{And, CaseWhen, Coalesce, Expression, If, Literal, Not, Or} -import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} -import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.rules.Rule -import org.apache.spark.sql.catalyst.trees.TreePattern.{CASE_WHEN, IF} -import org.apache.spark.sql.types.BooleanType - -/** - * A rule that converts conditional expressions to predicate expressions, if possible, in the - * search condition of the WHERE/HAVING/ON(JOIN) clauses, which contain an implicit Boolean operator - * "(search condition) = TRUE". After this converting, we can potentially push the filter down to - * the data source. This rule is null-safe. - * - * Supported cases are: - * - IF(cond, trueVal, false) => AND(cond, trueVal) - * - IF(cond, trueVal, true) => OR(NOT(cond), trueVal) - * - IF(cond, false, falseVal) => AND(NOT(cond), falseVal) - * - IF(cond, true, falseVal) => OR(cond, falseVal) - * - CASE WHEN cond THEN trueVal ELSE false END => AND(cond, trueVal) - * - CASE WHEN cond THEN trueVal END => AND(cond, trueVal) - * - CASE WHEN cond THEN trueVal ELSE null END => AND(cond, trueVal) - * - CASE WHEN cond THEN trueVal ELSE true END => OR(NOT(cond), trueVal) - * - CASE WHEN cond THEN false ELSE elseVal END => AND(NOT(cond), elseVal) - * - CASE WHEN cond THEN true ELSE elseVal END => OR(cond, elseVal) - */ -object SimplifyConditionalsInPredicate extends Rule[LogicalPlan] { - - def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning( - _.containsAnyPattern(CASE_WHEN, IF), ruleId) { - case f @ Filter(cond, _) => f.copy(condition = simplifyConditional(cond)) - case j @ Join(_, _, _, Some(cond), _) => j.copy(condition = Some(simplifyConditional(cond))) - case rd @ ReplaceData(_, cond, _, _, _) => rd.copy(condition = simplifyConditional(cond)) - case d @ DeleteFromTable(_, cond) => d.copy(condition = simplifyConditional(cond)) - case u @ UpdateTable(_, _, Some(cond)) => u.copy(condition = Some(simplifyConditional(cond))) - } - - private def simplifyConditional(e: Expression): Expression = e match { - case And(left, right) => And(simplifyConditional(left), simplifyConditional(right)) - case Or(left, right) => Or(simplifyConditional(left), simplifyConditional(right)) - case If(cond, trueValue, FalseLiteral) => And(cond, trueValue) - case If(cond, trueValue, TrueLiteral) => Or(Not(Coalesce(Seq(cond, FalseLiteral))), trueValue) - case If(cond, FalseLiteral, falseValue) => - And(Not(Coalesce(Seq(cond, FalseLiteral))), falseValue) - case If(cond, TrueLiteral, falseValue) => Or(cond, falseValue) - case CaseWhen(Seq((cond, trueValue)), - Some(FalseLiteral) | Some(Literal(null, BooleanType)) | None) => - And(cond, trueValue) - case CaseWhen(Seq((cond, trueValue)), Some(TrueLiteral)) => - Or(Not(Coalesce(Seq(cond, FalseLiteral))), trueValue) - case CaseWhen(Seq((cond, FalseLiteral)), Some(elseValue)) => - And(Not(Coalesce(Seq(cond, FalseLiteral))), elseValue) - case CaseWhen(Seq((cond, TrueLiteral)), Some(elseValue)) => - Or(cond, elseValue) - case e if e.dataType == BooleanType => e - case e => - assert(e.dataType != BooleanType, - "Expected a Boolean type expression in SimplifyConditionalsInPredicate, " + - s"but got the type `${e.dataType.catalogString}` in `${e.sql}`.") - e - } -} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala index e36a76b0b26cb..5b710e6e137b2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala @@ -153,7 +153,6 @@ object RuleIdCollection { "org.apache.spark.sql.catalyst.optimizer.SimplifyCaseConversionExpressions" :: "org.apache.spark.sql.catalyst.optimizer.SimplifyCasts" :: "org.apache.spark.sql.catalyst.optimizer.SimplifyConditionals" :: - "org.apache.spark.sql.catalyst.optimizer.SimplifyConditionalsInPredicate" :: "org.apache.spark.sql.catalyst.optimizer.SimplifyExtractValueOps" :: "org.apache.spark.sql.catalyst.optimizer.TransposeWindow" :: "org.apache.spark.sql.catalyst.optimizer.UnwrapCastInBinaryComparison" :: Nil diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala deleted file mode 100644 index bb6ca5499d133..0000000000000 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalsInPredicateSuite.scala +++ /dev/null @@ -1,249 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.optimizer - -import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute -import org.apache.spark.sql.catalyst.dsl.expressions._ -import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.expressions.{And, CaseWhen, Coalesce, Expression, If, IsNotNull, Literal, Not, Or, Rand} -import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} -import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest} -import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, LocalRelation, LogicalPlan, UpdateTable} -import org.apache.spark.sql.catalyst.rules.RuleExecutor -import org.apache.spark.sql.types.{BooleanType, IntegerType} - -class SimplifyConditionalsInPredicateSuite extends PlanTest { - - object Optimize extends RuleExecutor[LogicalPlan] { - val batches = - Batch("SimplifyConditionalsInPredicate", FixedPoint(10), - NullPropagation, - ConstantFolding, - BooleanSimplification, - SimplifyConditionals, - SimplifyConditionalsInPredicate) :: Nil - } - - private val testRelation = - LocalRelation('i.int, 'b.boolean, 'a.array(IntegerType), 'm.map(IntegerType, IntegerType)) - private val anotherTestRelation = LocalRelation('d.int) - - test("IF(cond, trueVal, false) => AND(cond, trueVal)") { - val originalCond = If( - UnresolvedAttribute("i") > Literal(10), - UnresolvedAttribute("b"), - FalseLiteral) - val expectedCond = And( - UnresolvedAttribute("i") > Literal(10), - UnresolvedAttribute("b")) - testFilter(originalCond, expectedCond = expectedCond) - testJoin(originalCond, expectedCond = expectedCond) - testDelete(originalCond, expectedCond = expectedCond) - testUpdate(originalCond, expectedCond = expectedCond) - testProjection(originalCond, expectedExpr = originalCond) - } - - test("IF(cond, trueVal, true) => OR(NOT(cond), trueVal)") { - val originalCond = If( - UnresolvedAttribute("i") > Literal(10), - UnresolvedAttribute("b"), - TrueLiteral) - val expectedCond = Or( - Not(Coalesce(Seq(UnresolvedAttribute("i") > Literal(10), FalseLiteral))), - UnresolvedAttribute("b")) - testFilter(originalCond, expectedCond = expectedCond) - testJoin(originalCond, expectedCond = expectedCond) - testDelete(originalCond, expectedCond = expectedCond) - testUpdate(originalCond, expectedCond = expectedCond) - testProjection(originalCond, expectedExpr = originalCond) - } - - test("IF(cond, false, falseVal) => AND(NOT(cond), elseVal)") { - val originalCond = If( - UnresolvedAttribute("i") > Literal(10), - FalseLiteral, - UnresolvedAttribute("b")) - val expectedCond = And( - Not(Coalesce(Seq(UnresolvedAttribute("i") > Literal(10), FalseLiteral))), - UnresolvedAttribute("b")) - testFilter(originalCond, expectedCond = expectedCond) - testJoin(originalCond, expectedCond = expectedCond) - testDelete(originalCond, expectedCond = expectedCond) - testUpdate(originalCond, expectedCond = expectedCond) - testProjection(originalCond, expectedExpr = originalCond) - } - - test("IF(cond, true, falseVal) => OR(cond, elseVal)") { - val originalCond = If( - UnresolvedAttribute("i") > Literal(10), - TrueLiteral, - UnresolvedAttribute("b")) - val expectedCond = Or( - UnresolvedAttribute("i") > Literal(10), - UnresolvedAttribute("b")) - testFilter(originalCond, expectedCond = expectedCond) - testJoin(originalCond, expectedCond = expectedCond) - testDelete(originalCond, expectedCond = expectedCond) - testUpdate(originalCond, expectedCond = expectedCond) - testProjection(originalCond, expectedExpr = originalCond) - } - - test("CASE WHEN cond THEN trueVal ELSE false END => AND(cond, trueVal)") { - Seq(Some(FalseLiteral), None, Some(Literal(null, BooleanType))).foreach { elseExp => - val originalCond = CaseWhen( - Seq((UnresolvedAttribute("i") > Literal(10), UnresolvedAttribute("b"))), - elseExp) - val expectedCond = And( - UnresolvedAttribute("i") > Literal(10), - UnresolvedAttribute("b")) - testFilter(originalCond, expectedCond = expectedCond) - testJoin(originalCond, expectedCond = expectedCond) - testDelete(originalCond, expectedCond = expectedCond) - testUpdate(originalCond, expectedCond = expectedCond) - testProjection(originalCond, - expectedExpr = CaseWhen( - Seq((UnresolvedAttribute("i") > Literal(10), UnresolvedAttribute("b"))), - elseExp.filterNot(_.semanticEquals(Literal(null, BooleanType))))) - } - } - - test("CASE WHEN cond THEN trueVal ELSE true END => OR(NOT(cond), trueVal)") { - val originalCond = CaseWhen( - Seq((UnresolvedAttribute("i") > Literal(10), UnresolvedAttribute("b"))), - TrueLiteral) - val expectedCond = Or( - Not(Coalesce(Seq(UnresolvedAttribute("i") > Literal(10), FalseLiteral))), - UnresolvedAttribute("b")) - testFilter(originalCond, expectedCond = expectedCond) - testJoin(originalCond, expectedCond = expectedCond) - testDelete(originalCond, expectedCond = expectedCond) - testUpdate(originalCond, expectedCond = expectedCond) - testProjection(originalCond, expectedExpr = originalCond) - } - - test("CASE WHEN cond THEN false ELSE elseVal END => AND(NOT(cond), elseVal)") { - val originalCond = CaseWhen( - Seq((UnresolvedAttribute("i") > Literal(10), FalseLiteral)), - UnresolvedAttribute("b")) - val expectedCond = And( - Not(Coalesce(Seq(UnresolvedAttribute("i") > Literal(10), FalseLiteral))), - UnresolvedAttribute("b")) - testFilter(originalCond, expectedCond = expectedCond) - testJoin(originalCond, expectedCond = expectedCond) - testDelete(originalCond, expectedCond = expectedCond) - testUpdate(originalCond, expectedCond = expectedCond) - testProjection(originalCond, expectedExpr = originalCond) - } - - test("CASE WHEN cond THEN false END => false") { - val originalCond = CaseWhen( - Seq((UnresolvedAttribute("i") > Literal(10), FalseLiteral))) - testFilter(originalCond, expectedCond = FalseLiteral) - testJoin(originalCond, expectedCond = FalseLiteral) - testDelete(originalCond, expectedCond = FalseLiteral) - testUpdate(originalCond, expectedCond = FalseLiteral) - testProjection(originalCond, expectedExpr = originalCond) - } - - test("CASE WHEN non-deterministic-cond THEN false END") { - val originalCond = - CaseWhen(Seq((UnresolvedAttribute("i") > Rand(0), FalseLiteral))) - val expectedCond = And(UnresolvedAttribute("i") > Rand(0), FalseLiteral) - // nondeterministic expressions are only allowed in Project, Filter, Aggregate or Window, - testFilter(originalCond, expectedCond = FalseLiteral) - testProjection(originalCond, expectedExpr = originalCond) - } - - test("CASE WHEN cond THEN true ELSE elseVal END => OR(cond, elseVal)") { - val originalCond = CaseWhen( - Seq((UnresolvedAttribute("i") > Literal(10), TrueLiteral)), - UnresolvedAttribute("b")) - val expectedCond = Or( - UnresolvedAttribute("i") > Literal(10), - UnresolvedAttribute("b")) - testFilter(originalCond, expectedCond = expectedCond) - testJoin(originalCond, expectedCond = expectedCond) - testDelete(originalCond, expectedCond = expectedCond) - testUpdate(originalCond, expectedCond = expectedCond) - testProjection(originalCond, expectedExpr = originalCond) - } - - test("CASE WHEN cond THEN true END => cond") { - val originalCond = CaseWhen( - Seq((UnresolvedAttribute("i") > Literal(10), TrueLiteral))) - val expectedCond = UnresolvedAttribute("i") > Literal(10) - testFilter(originalCond, expectedCond = expectedCond) - testJoin(originalCond, expectedCond = expectedCond) - testDelete(originalCond, expectedCond = expectedCond) - testUpdate(originalCond, expectedCond = expectedCond) - testProjection(originalCond, expectedExpr = originalCond) - } - - test("Simplify conditional in conditions of CaseWhen inside another CaseWhen") { - val nestedCaseWhen = CaseWhen( - Seq((UnresolvedAttribute("i") > Literal(10)) -> UnresolvedAttribute("b")), - FalseLiteral) - val originalCond = CaseWhen(Seq(IsNotNull(nestedCaseWhen) -> FalseLiteral)) - val expectedCond = FalseLiteral - - testFilter(originalCond, expectedCond = expectedCond) - testJoin(originalCond, expectedCond = expectedCond) - testDelete(originalCond, expectedCond = expectedCond) - testUpdate(originalCond, expectedCond = expectedCond) - testProjection(originalCond, expectedExpr = originalCond) - } - - test("Not expected type - SimplifyConditionalsInPredicate") { - val e = intercept[AnalysisException] { - testFilter(originalCond = Literal(null, IntegerType), expectedCond = FalseLiteral) - }.getMessage - assert(e.contains("'CAST(NULL AS INT)' of type int is not a boolean")) - } - - private def testFilter(originalCond: Expression, expectedCond: Expression): Unit = { - test((rel, exp) => rel.where(exp), originalCond, expectedCond) - } - - private def testJoin(originalCond: Expression, expectedCond: Expression): Unit = { - test((rel, exp) => rel.join(anotherTestRelation, Inner, Some(exp)), originalCond, expectedCond) - } - - private def testProjection(originalExpr: Expression, expectedExpr: Expression): Unit = { - test((rel, exp) => rel.select(exp), originalExpr.as("out"), expectedExpr.as("out")) - } - - private def testDelete(originalCond: Expression, expectedCond: Expression): Unit = { - test((rel, expr) => DeleteFromTable(rel, expr), originalCond, expectedCond) - } - - private def testUpdate(originalCond: Expression, expectedCond: Expression): Unit = { - test((rel, expr) => UpdateTable(rel, Seq.empty, Some(expr)), originalCond, expectedCond) - } - - private def test( - func: (LogicalPlan, Expression) => LogicalPlan, - originalExpr: Expression, - expectedExpr: Expression): Unit = { - - val originalPlan = func(testRelation, originalExpr).analyze - val optimizedPlan = Optimize.execute(originalPlan) - val expectedPlan = func(testRelation, expectedExpr).analyze - comparePlans(optimizedPlan, expectedPlan) - } -} diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt index bdd5dda489c61..8d77e0bb3e347 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt @@ -98,7 +98,7 @@ Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,Unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,Unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] @@ -106,7 +106,7 @@ Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_coun (16) Filter [codegen id : 3] Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] -Condition : (((((isnotnull(hd_vehicle_count#13) AND isnotnull(hd_dep_count#12)) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = Unknown ))) AND (hd_vehicle_count#13 > 0)) AND ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.2)) AND isnotnull(hd_demo_sk#10)) +Condition : ((((isnotnull(hd_vehicle_count#13) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = Unknown ))) AND (hd_vehicle_count#13 > 0)) AND CASE WHEN (hd_vehicle_count#13 > 0) THEN ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.2) END) AND isnotnull(hd_demo_sk#10)) (17) Project [codegen id : 3] Output [1]: [hd_demo_sk#10] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt index 1f4b2ff4eadd2..cb70bd42c1249 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt @@ -49,7 +49,7 @@ WholeStageCodegen (10) BroadcastExchange #6 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt index dddd98f235cd1..6b79cf53e5dc1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt @@ -95,7 +95,7 @@ Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,Unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,Unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] @@ -103,7 +103,7 @@ Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_coun (16) Filter [codegen id : 3] Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] -Condition : (((((isnotnull(hd_vehicle_count#13) AND isnotnull(hd_dep_count#12)) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = Unknown ))) AND (hd_vehicle_count#13 > 0)) AND ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.2)) AND isnotnull(hd_demo_sk#10)) +Condition : ((((isnotnull(hd_vehicle_count#13) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = Unknown ))) AND (hd_vehicle_count#13 > 0)) AND CASE WHEN (hd_vehicle_count#13 > 0) THEN ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.2) END) AND isnotnull(hd_demo_sk#10)) (17) Project [codegen id : 3] Output [1]: [hd_demo_sk#10] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt index 0e98ff2125b6b..1e88590181c6a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt @@ -43,7 +43,7 @@ WholeStageCodegen (7) BroadcastExchange #5 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt index 321d0ec4277f6..1ab31f4c5a84b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt @@ -146,7 +146,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_manufact_id#5, specifiedwindowfra (26) Filter [codegen id : 7] Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] -Condition : (isnotnull(avg_quarterly_sales#23) AND ((avg_quarterly_sales#23 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Condition : CASE WHEN (avg_quarterly_sales#23 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) ELSE false END (27) Project [codegen id : 7] Output [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt index c91dd8a11b5f7..cc73f64098224 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt @@ -146,7 +146,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_manufact_id#5, specifiedwindowfra (26) Filter [codegen id : 7] Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] -Condition : (isnotnull(avg_quarterly_sales#23) AND ((avg_quarterly_sales#23 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Condition : CASE WHEN (avg_quarterly_sales#23 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) ELSE false END (27) Project [codegen id : 7] Output [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt index 8a225052b4ff7..e0c0dcebeef53 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt @@ -146,7 +146,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_manager_id#5, specifiedwindowfram (26) Filter [codegen id : 7] Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] -Condition : (isnotnull(avg_monthly_sales#23) AND ((avg_monthly_sales#23 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Condition : CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) ELSE false END (27) Project [codegen id : 7] Output [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt index bdb9612bfa2f4..82961fee124e3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt @@ -146,7 +146,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_manager_id#5, specifiedwindowfram (26) Filter [codegen id : 7] Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] -Condition : (isnotnull(avg_monthly_sales#23) AND ((avg_monthly_sales#23 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Condition : CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) ELSE false END (27) Project [codegen id : 7] Output [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt index 4cd58b1442653..fd5b51a120f22 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt @@ -95,7 +95,7 @@ Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,Unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,Unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] @@ -103,7 +103,7 @@ Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_coun (16) Filter [codegen id : 3] Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] -Condition : (((((isnotnull(hd_vehicle_count#13) AND isnotnull(hd_dep_count#12)) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = Unknown ))) AND (hd_vehicle_count#13 > 0)) AND ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.0)) AND isnotnull(hd_demo_sk#10)) +Condition : ((((isnotnull(hd_vehicle_count#13) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = Unknown ))) AND (hd_vehicle_count#13 > 0)) AND CASE WHEN (hd_vehicle_count#13 > 0) THEN ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.0) END) AND isnotnull(hd_demo_sk#10)) (17) Project [codegen id : 3] Output [1]: [hd_demo_sk#10] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt index 025e26182c30f..72653182711cf 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt @@ -46,7 +46,7 @@ WholeStageCodegen (7) BroadcastExchange #6 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt index de22608640529..355612814c8d2 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt @@ -95,7 +95,7 @@ Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,Unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,Unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] @@ -103,7 +103,7 @@ Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_coun (16) Filter [codegen id : 3] Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] -Condition : (((((isnotnull(hd_vehicle_count#13) AND isnotnull(hd_dep_count#12)) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = Unknown ))) AND (hd_vehicle_count#13 > 0)) AND ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.0)) AND isnotnull(hd_demo_sk#10)) +Condition : ((((isnotnull(hd_vehicle_count#13) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = Unknown ))) AND (hd_vehicle_count#13 > 0)) AND CASE WHEN (hd_vehicle_count#13 > 0) THEN ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.0) END) AND isnotnull(hd_demo_sk#10)) (17) Project [codegen id : 3] Output [1]: [hd_demo_sk#10] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt index 311b101daa4a6..667bc0b2f4e93 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt @@ -43,7 +43,7 @@ WholeStageCodegen (7) BroadcastExchange #5 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt index a2cfd6b66801d..e6cbc705b06be 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt @@ -141,7 +141,7 @@ Arguments: [avg(_w0#19) windowspecdefinition(i_category#14, i_brand#12, s_store_ (25) Filter [codegen id : 7] Input [9]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#18, _w0#19, avg_monthly_sales#20] -Condition : (isnotnull(avg_monthly_sales#20) AND (NOT (avg_monthly_sales#20 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Condition : CASE WHEN NOT (avg_monthly_sales#20 = 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END (26) Project [codegen id : 7] Output [8]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#18, avg_monthly_sales#20] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt index 4ee1a5b7c2937..428ec486a46d7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt @@ -141,7 +141,7 @@ Arguments: [avg(_w0#19) windowspecdefinition(i_category#4, i_brand#2, s_store_na (25) Filter [codegen id : 7] Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20] -Condition : (isnotnull(avg_monthly_sales#20) AND (NOT (avg_monthly_sales#20 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Condition : CASE WHEN NOT (avg_monthly_sales#20 = 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END (26) Project [codegen id : 7] Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt index abc5b2a95fa69..df074a5db136c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt @@ -130,7 +130,7 @@ Results [4]: [w_warehouse_name#12, i_item_id#7, sum(CASE WHEN (d_date#10 < 2000- (23) Filter [codegen id : 5] Input [4]: [w_warehouse_name#12, i_item_id#7, inv_before#19, inv_after#20] -Condition : ((isnotnull(inv_before#19) AND isnotnull(inv_after#20)) AND (((inv_before#19 > 0) AND ((cast(inv_after#20 as double) / cast(inv_before#19 as double)) >= 0.666667)) AND ((cast(inv_after#20 as double) / cast(inv_before#19 as double)) <= 1.5))) +Condition : (CASE WHEN (inv_before#19 > 0) THEN ((cast(inv_after#20 as double) / cast(inv_before#19 as double)) >= 0.666667) END AND CASE WHEN (inv_before#19 > 0) THEN ((cast(inv_after#20 as double) / cast(inv_before#19 as double)) <= 1.5) END) (24) TakeOrderedAndProject Input [4]: [w_warehouse_name#12, i_item_id#7, inv_before#19, inv_after#20] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt index 491d823d17a8a..b71195a424d7c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt @@ -130,7 +130,7 @@ Results [4]: [w_warehouse_name#7, i_item_id#9, sum(CASE WHEN (d_date#12 < 2000-0 (23) Filter [codegen id : 5] Input [4]: [w_warehouse_name#7, i_item_id#9, inv_before#19, inv_after#20] -Condition : ((isnotnull(inv_before#19) AND isnotnull(inv_after#20)) AND (((inv_before#19 > 0) AND ((cast(inv_after#20 as double) / cast(inv_before#19 as double)) >= 0.666667)) AND ((cast(inv_after#20 as double) / cast(inv_before#19 as double)) <= 1.5))) +Condition : (CASE WHEN (inv_before#19 > 0) THEN ((cast(inv_after#20 as double) / cast(inv_before#19 as double)) >= 0.666667) END AND CASE WHEN (inv_before#19 > 0) THEN ((cast(inv_after#20 as double) / cast(inv_before#19 as double)) <= 1.5) END) (24) TakeOrderedAndProject Input [4]: [w_warehouse_name#7, i_item_id#9, inv_before#19, inv_after#20] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt index 24c5c1c256c26..4e7e2b03c92c3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt @@ -49,7 +49,7 @@ WholeStageCodegen (10) BroadcastExchange #6 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt index df5b93da51771..c1945a13a2154 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/explain.txt @@ -1,43 +1,39 @@ == Physical Plan == -* Sort (36) -+- Exchange (35) - +- * Project (34) - +- * BroadcastHashJoin Inner BuildRight (33) - :- * Filter (28) - : +- * HashAggregate (27) - : +- Exchange (26) - : +- * HashAggregate (25) - : +- * Project (24) - : +- * BroadcastHashJoin Inner BuildRight (23) - : :- * Project (17) - : : +- * BroadcastHashJoin Inner BuildRight (16) - : : :- * Project (10) - : : : +- * BroadcastHashJoin Inner BuildRight (9) +* Sort (32) ++- Exchange (31) + +- * Project (30) + +- * BroadcastHashJoin Inner BuildRight (29) + :- * Filter (24) + : +- * HashAggregate (23) + : +- Exchange (22) + : +- * HashAggregate (21) + : +- * Project (20) + : +- * BroadcastHashJoin Inner BuildRight (19) + : :- * Project (13) + : : +- * BroadcastHashJoin Inner BuildRight (12) + : : :- * Project (6) + : : : +- * BroadcastHashJoin Inner BuildRight (5) : : : :- * Filter (3) : : : : +- * ColumnarToRow (2) - : : : : +- Scan parquet default.store_sales (1) - : : : +- BroadcastExchange (8) - : : : +- * Project (7) - : : : +- * Filter (6) - : : : +- * ColumnarToRow (5) - : : : +- Scan parquet default.date_dim (4) - : : +- BroadcastExchange (15) - : : +- * Project (14) - : : +- * Filter (13) - : : +- * ColumnarToRow (12) - : : +- Scan parquet default.store (11) - : +- BroadcastExchange (22) - : +- * Project (21) - : +- * Filter (20) - : +- * ColumnarToRow (19) - : +- Scan parquet default.household_demographics (18) - +- BroadcastExchange (32) - +- * Filter (31) - +- * ColumnarToRow (30) - +- Scan parquet default.customer (29) - - -(1) Scan parquet default.store_sales + : : : : +- Scan parquet spark_catalog.default.store_sales (1) + : : : +- ReusedExchange (4) + : : +- BroadcastExchange (11) + : : +- * Project (10) + : : +- * Filter (9) + : : +- * ColumnarToRow (8) + : : +- Scan parquet spark_catalog.default.store (7) + : +- BroadcastExchange (18) + : +- * Project (17) + : +- * Filter (16) + : +- * ColumnarToRow (15) + : +- Scan parquet spark_catalog.default.household_demographics (14) + +- BroadcastExchange (28) + +- * Filter (27) + +- * ColumnarToRow (26) + +- Scan parquet spark_catalog.default.customer (25) + + +(1) Scan parquet spark_catalog.default.store_sales Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] Batched: true Location: InMemoryFileIndex [] @@ -52,163 +48,171 @@ Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5] Condition : ((isnotnull(ss_store_sk#3) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1)) -(4) Scan parquet default.date_dim -Output [3]: [d_date_sk#7, d_year#8, d_dom#9] -Batched: true -Location [not included in comparison]/{warehouse_dir}/date_dim] -PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] -ReadSchema: struct - -(5) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#8, d_dom#9] - -(6) Filter [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#8, d_dom#9] -Condition : (((((d_dom#9 >= 1) AND (d_dom#9 <= 3)) OR ((d_dom#9 >= 25) AND (d_dom#9 <= 28))) AND d_year#8 IN (1999,2000,2001)) AND isnotnull(d_date_sk#7)) - -(7) Project [codegen id : 1] +(4) ReusedExchange [Reuses operator id: 37] Output [1]: [d_date_sk#7] -Input [3]: [d_date_sk#7, d_year#8, d_dom#9] - -(8) BroadcastExchange -Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] -(9) BroadcastHashJoin [codegen id : 4] +(5) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_sold_date_sk#5] Right keys [1]: [d_date_sk#7] +Join type: Inner Join condition: None -(10) Project [codegen id : 4] +(6) Project [codegen id : 4] Output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4] Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#7] -(11) Scan parquet default.store -Output [2]: [s_store_sk#11, s_county#12] +(7) Scan parquet spark_catalog.default.store +Output [2]: [s_store_sk#8, s_county#9] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)] ReadSchema: struct -(12) ColumnarToRow [codegen id : 2] -Input [2]: [s_store_sk#11, s_county#12] +(8) ColumnarToRow [codegen id : 2] +Input [2]: [s_store_sk#8, s_county#9] -(13) Filter [codegen id : 2] -Input [2]: [s_store_sk#11, s_county#12] -Condition : ((isnotnull(s_county#12) AND (s_county#12 = Williamson County)) AND isnotnull(s_store_sk#11)) +(9) Filter [codegen id : 2] +Input [2]: [s_store_sk#8, s_county#9] +Condition : ((isnotnull(s_county#9) AND (s_county#9 = Williamson County)) AND isnotnull(s_store_sk#8)) -(14) Project [codegen id : 2] -Output [1]: [s_store_sk#11] -Input [2]: [s_store_sk#11, s_county#12] +(10) Project [codegen id : 2] +Output [1]: [s_store_sk#8] +Input [2]: [s_store_sk#8, s_county#9] -(15) BroadcastExchange -Input [1]: [s_store_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] +(11) BroadcastExchange +Input [1]: [s_store_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] -(16) BroadcastHashJoin [codegen id : 4] +(12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#11] +Right keys [1]: [s_store_sk#8] +Join type: Inner Join condition: None -(17) Project [codegen id : 4] +(13) Project [codegen id : 4] Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] -Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#11] +Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8] -(18) Scan parquet default.household_demographics -Output [4]: [hd_demo_sk#14, hd_buy_potential#15, hd_dep_count#16, hd_vehicle_count#17] +(14) Scan parquet spark_catalog.default.household_demographics +Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct -(19) ColumnarToRow [codegen id : 3] -Input [4]: [hd_demo_sk#14, hd_buy_potential#15, hd_dep_count#16, hd_vehicle_count#17] +(15) ColumnarToRow [codegen id : 3] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] -(20) Filter [codegen id : 3] -Input [4]: [hd_demo_sk#14, hd_buy_potential#15, hd_dep_count#16, hd_vehicle_count#17] -Condition : (((((isnotnull(hd_vehicle_count#17) AND isnotnull(hd_dep_count#16)) AND ((hd_buy_potential#15 = >10000 ) OR (hd_buy_potential#15 = unknown ))) AND (hd_vehicle_count#17 > 0)) AND ((cast(hd_dep_count#16 as double) / cast(hd_vehicle_count#17 as double)) > 1.2)) AND isnotnull(hd_demo_sk#14)) +(16) Filter [codegen id : 3] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] +Condition : ((((isnotnull(hd_vehicle_count#13) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = unknown ))) AND (hd_vehicle_count#13 > 0)) AND CASE WHEN (hd_vehicle_count#13 > 0) THEN ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.2) END) AND isnotnull(hd_demo_sk#10)) -(21) Project [codegen id : 3] -Output [1]: [hd_demo_sk#14] -Input [4]: [hd_demo_sk#14, hd_buy_potential#15, hd_dep_count#16, hd_vehicle_count#17] +(17) Project [codegen id : 3] +Output [1]: [hd_demo_sk#10] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] -(22) BroadcastExchange -Input [1]: [hd_demo_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +(18) BroadcastExchange +Input [1]: [hd_demo_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] -(23) BroadcastHashJoin [codegen id : 4] +(19) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#14] +Right keys [1]: [hd_demo_sk#10] +Join type: Inner Join condition: None -(24) Project [codegen id : 4] +(20) Project [codegen id : 4] Output [2]: [ss_customer_sk#1, ss_ticket_number#4] -Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#14] +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#10] -(25) HashAggregate [codegen id : 4] +(21) HashAggregate [codegen id : 4] Input [2]: [ss_customer_sk#1, ss_ticket_number#4] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#19] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#20] +Aggregate Attributes [1]: [count#14] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] -(26) Exchange -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#20] -Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#21] +(22) Exchange +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] -(27) HashAggregate [codegen id : 6] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#20] +(23) HashAggregate [codegen id : 6] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#22] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#22 AS cnt#23] +Aggregate Attributes [1]: [count(1)#16] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#16 AS cnt#17] -(28) Filter [codegen id : 6] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#23] -Condition : ((cnt#23 >= 15) AND (cnt#23 <= 20)) +(24) Filter [codegen id : 6] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17] +Condition : ((cnt#17 >= 15) AND (cnt#17 <= 20)) -(29) Scan parquet default.customer -Output [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +(25) Scan parquet spark_catalog.default.customer +Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct -(30) ColumnarToRow [codegen id : 5] -Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +(26) ColumnarToRow [codegen id : 5] +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] -(31) Filter [codegen id : 5] -Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] -Condition : isnotnull(c_customer_sk#24) +(27) Filter [codegen id : 5] +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Condition : isnotnull(c_customer_sk#18) -(32) BroadcastExchange -Input [5]: [c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#29] +(28) BroadcastExchange +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] -(33) BroadcastHashJoin [codegen id : 6] +(29) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#24] +Right keys [1]: [c_customer_sk#18] +Join type: Inner Join condition: None -(34) Project [codegen id : 6] -Output [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#4, cnt#23] -Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#23, c_customer_sk#24, c_salutation#25, c_first_name#26, c_last_name#27, c_preferred_cust_flag#28] +(30) Project [codegen id : 6] +Output [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17, c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] -(35) Exchange -Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#4, cnt#23] -Arguments: rangepartitioning(c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#30] +(31) Exchange +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: rangepartitioning(c_last_name#21 ASC NULLS FIRST, c_first_name#20 ASC NULLS FIRST, c_salutation#19 ASC NULLS FIRST, c_preferred_cust_flag#22 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=5] -(36) Sort [codegen id : 7] -Input [6]: [c_last_name#27, c_first_name#26, c_salutation#25, c_preferred_cust_flag#28, ss_ticket_number#4, cnt#23] -Arguments: [c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, c_salutation#25 ASC NULLS FIRST, c_preferred_cust_flag#28 DESC NULLS LAST], true, 0 +(32) Sort [codegen id : 7] +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: [c_last_name#21 ASC NULLS FIRST, c_first_name#20 ASC NULLS FIRST, c_salutation#19 ASC NULLS FIRST, c_preferred_cust_flag#22 DESC NULLS LAST], true, 0 ===== Subqueries ===== Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#5 IN dynamicpruning#6 -ReusedExchange (37) +BroadcastExchange (37) ++- * Project (36) + +- * Filter (35) + +- * ColumnarToRow (34) + +- Scan parquet spark_catalog.default.date_dim (33) -(37) ReusedExchange [Reuses operator id: 8] +(33) Scan parquet spark_catalog.default.date_dim +Output [3]: [d_date_sk#7, d_year#23, d_dom#24] +Batched: true +Location [not included in comparison]/{warehouse_dir}/date_dim] +PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] +ReadSchema: struct + +(34) ColumnarToRow [codegen id : 1] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] + +(35) Filter [codegen id : 1] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] +Condition : (((((d_dom#24 >= 1) AND (d_dom#24 <= 3)) OR ((d_dom#24 >= 25) AND (d_dom#24 <= 28))) AND d_year#23 IN (1999,2000,2001)) AND isnotnull(d_date_sk#7)) + +(36) Project [codegen id : 1] Output [1]: [d_date_sk#7] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] + +(37) BroadcastExchange +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt index e4f89ab27658b..e47f447e46fe5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt @@ -43,7 +43,7 @@ WholeStageCodegen (7) BroadcastExchange #5 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/explain.txt index 64b986d1a6fe4..8a1d86400e890 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/explain.txt @@ -149,7 +149,7 @@ Results [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, stddev_samp(cast(inv_quant (22) Filter [codegen id : 5] Input [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, stdev#23, mean#24] -Condition : ((isnotnull(stdev#23) AND isnotnull(mean#24)) AND (NOT coalesce((mean#24 = 0.0), false) AND ((stdev#23 / mean#24) > 1.0))) +Condition : CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.0) END (23) Project [codegen id : 5] Output [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, mean#24, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#25] @@ -234,7 +234,7 @@ Results [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, stddev_samp(cast(inv_qu (41) Filter [codegen id : 11] Input [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, stdev#23, mean#24] -Condition : ((isnotnull(stdev#23) AND isnotnull(mean#24)) AND (NOT coalesce((mean#24 = 0.0), false) AND ((stdev#23 / mean#24) > 1.0))) +Condition : CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.0) END (42) Project [codegen id : 11] Output [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, mean#24 AS mean#46, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#47] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/simplified.txt index 0c84e462edb97..7e4ffc89e4690 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/simplified.txt @@ -11,7 +11,7 @@ WholeStageCodegen (14) Exchange [i_item_sk,w_warehouse_sk] #2 WholeStageCodegen (5) Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] - Filter [stdev,mean] + Filter [mean,stdev] HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] InputAdapter Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #3 @@ -58,7 +58,7 @@ WholeStageCodegen (14) Exchange [i_item_sk,w_warehouse_sk] #7 WholeStageCodegen (11) Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] - Filter [stdev,mean] + Filter [mean,stdev] HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] InputAdapter Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt index bbf9b5185e776..f58bc30f2a49b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt @@ -146,7 +146,7 @@ Results [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, stddev_samp(cast(inv_quan (22) Filter [codegen id : 10] Input [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, stdev#23, mean#24] -Condition : ((isnotnull(stdev#23) AND isnotnull(mean#24)) AND (NOT coalesce((mean#24 = 0.0), false) AND ((stdev#23 / mean#24) > 1.0))) +Condition : CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.0) END (23) Project [codegen id : 10] Output [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, mean#24, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#25] @@ -223,7 +223,7 @@ Results [5]: [w_warehouse_sk#32, i_item_sk#31, d_moy#35, stddev_samp(cast(inv_qu (39) Filter [codegen id : 9] Input [5]: [w_warehouse_sk#32, i_item_sk#31, d_moy#35, stdev#23, mean#24] -Condition : ((isnotnull(stdev#23) AND isnotnull(mean#24)) AND (NOT coalesce((mean#24 = 0.0), false) AND ((stdev#23 / mean#24) > 1.0))) +Condition : CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.0) END (40) Project [codegen id : 9] Output [5]: [w_warehouse_sk#32, i_item_sk#31, d_moy#35, mean#24 AS mean#46, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#47] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt index 2c97e17e7e444..2cf9d5ea033af 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt @@ -5,7 +5,7 @@ WholeStageCodegen (11) WholeStageCodegen (10) BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] - Filter [stdev,mean] + Filter [mean,stdev] HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] InputAdapter Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 @@ -49,7 +49,7 @@ WholeStageCodegen (11) BroadcastExchange #6 WholeStageCodegen (9) Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] - Filter [stdev,mean] + Filter [mean,stdev] HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] InputAdapter Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/explain.txt index c9208985327a3..3fd1431555933 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/explain.txt @@ -149,7 +149,7 @@ Results [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, stddev_samp(cast(inv_quant (22) Filter [codegen id : 5] Input [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, stdev#23, mean#24] -Condition : ((isnotnull(stdev#23) AND isnotnull(mean#24)) AND ((NOT coalesce((mean#24 = 0.0), false) AND ((stdev#23 / mean#24) > 1.0)) AND ((stdev#23 / mean#24) > 1.5))) +Condition : (CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.0) END AND CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.5) END) (23) Project [codegen id : 5] Output [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, mean#24, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#25] @@ -234,7 +234,7 @@ Results [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, stddev_samp(cast(inv_qu (41) Filter [codegen id : 11] Input [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, stdev#23, mean#24] -Condition : ((isnotnull(stdev#23) AND isnotnull(mean#24)) AND (NOT coalesce((mean#24 = 0.0), false) AND ((stdev#23 / mean#24) > 1.0))) +Condition : CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.0) END (42) Project [codegen id : 11] Output [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, mean#24 AS mean#46, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#47] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/simplified.txt index 0c84e462edb97..7e4ffc89e4690 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/simplified.txt @@ -11,7 +11,7 @@ WholeStageCodegen (14) Exchange [i_item_sk,w_warehouse_sk] #2 WholeStageCodegen (5) Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] - Filter [stdev,mean] + Filter [mean,stdev] HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] InputAdapter Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #3 @@ -58,7 +58,7 @@ WholeStageCodegen (14) Exchange [i_item_sk,w_warehouse_sk] #7 WholeStageCodegen (11) Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] - Filter [stdev,mean] + Filter [mean,stdev] HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] InputAdapter Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt index 3a8329c86102e..ae556d15fd1c0 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt @@ -146,7 +146,7 @@ Results [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, stddev_samp(cast(inv_quan (22) Filter [codegen id : 10] Input [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, stdev#23, mean#24] -Condition : ((isnotnull(stdev#23) AND isnotnull(mean#24)) AND ((NOT coalesce((mean#24 = 0.0), false) AND ((stdev#23 / mean#24) > 1.0)) AND ((stdev#23 / mean#24) > 1.5))) +Condition : (CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.0) END AND CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.5) END) (23) Project [codegen id : 10] Output [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, mean#24, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#25] @@ -223,7 +223,7 @@ Results [5]: [w_warehouse_sk#32, i_item_sk#31, d_moy#35, stddev_samp(cast(inv_qu (39) Filter [codegen id : 9] Input [5]: [w_warehouse_sk#32, i_item_sk#31, d_moy#35, stdev#23, mean#24] -Condition : ((isnotnull(stdev#23) AND isnotnull(mean#24)) AND (NOT coalesce((mean#24 = 0.0), false) AND ((stdev#23 / mean#24) > 1.0))) +Condition : CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.0) END (40) Project [codegen id : 9] Output [5]: [w_warehouse_sk#32, i_item_sk#31, d_moy#35, mean#24 AS mean#46, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#47] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt index 2c97e17e7e444..2cf9d5ea033af 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt @@ -5,7 +5,7 @@ WholeStageCodegen (11) WholeStageCodegen (10) BroadcastHashJoin [i_item_sk,w_warehouse_sk,i_item_sk,w_warehouse_sk] Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] - Filter [stdev,mean] + Filter [mean,stdev] HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] InputAdapter Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #2 @@ -49,7 +49,7 @@ WholeStageCodegen (11) BroadcastExchange #6 WholeStageCodegen (9) Project [w_warehouse_sk,i_item_sk,d_moy,mean,stdev] - Filter [stdev,mean] + Filter [mean,stdev] HashAggregate [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy,n,avg,m2,sum,count] [stddev_samp(cast(inv_quantity_on_hand as double)),avg(inv_quantity_on_hand),stdev,mean,n,avg,m2,sum,count] InputAdapter Exchange [w_warehouse_name,w_warehouse_sk,i_item_sk,d_moy] #7 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt index 0dad98fba1969..53c293bea74bb 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt @@ -186,7 +186,7 @@ Arguments: [avg(_w0#19) windowspecdefinition(i_category#14, i_brand#13, s_store_ (30) Filter [codegen id : 11] Input [10]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19, rn#20, avg_monthly_sales#21] -Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) +Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND CASE WHEN (avg_monthly_sales#21 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END) (31) Project [codegen id : 11] Output [9]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, avg_monthly_sales#21, rn#20] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt index e55defd7ff65c..3b43012198d6c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt @@ -167,7 +167,7 @@ Arguments: [avg(_w0#19) windowspecdefinition(i_category#3, i_brand#2, s_store_na (27) Filter [codegen id : 22] Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19, rn#20, avg_monthly_sales#21] -Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) +Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND CASE WHEN (avg_monthly_sales#21 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END) (28) Project [codegen id : 22] Output [9]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, avg_monthly_sales#21, rn#20] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt index 507650dfadc19..b314785702379 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt @@ -146,7 +146,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_manufact_id#5, specifiedwindowfra (26) Filter [codegen id : 7] Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] -Condition : (isnotnull(avg_quarterly_sales#23) AND ((avg_quarterly_sales#23 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Condition : CASE WHEN (avg_quarterly_sales#23 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) ELSE false END (27) Project [codegen id : 7] Output [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt index 9e09bfb0dfb3f..2c2eaddf5b4b2 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt @@ -146,7 +146,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_manufact_id#5, specifiedwindowfra (26) Filter [codegen id : 7] Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] -Condition : (isnotnull(avg_quarterly_sales#23) AND ((avg_quarterly_sales#23 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Condition : CASE WHEN (avg_quarterly_sales#23 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) ELSE false END (27) Project [codegen id : 7] Output [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt index cd93eea6a1c8e..675995eeb6979 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt @@ -186,7 +186,7 @@ Arguments: [avg(_w0#18) windowspecdefinition(i_category#13, i_brand#12, cc_name# (30) Filter [codegen id : 11] Input [9]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] -Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) +Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND CASE WHEN (avg_monthly_sales#20 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END) (31) Project [codegen id : 11] Output [8]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, avg_monthly_sales#20, rn#19] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt index 7abc61f31e616..61f4188c878ac 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt @@ -167,7 +167,7 @@ Arguments: [avg(_w0#18) windowspecdefinition(i_category#3, i_brand#2, cc_name#13 (27) Filter [codegen id : 22] Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] -Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) +Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND CASE WHEN (avg_monthly_sales#20 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END) (28) Project [codegen id : 22] Output [8]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, avg_monthly_sales#20, rn#19] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt index fdc937b6de91e..6ae8a7dabe8cd 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt @@ -146,7 +146,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_manager_id#5, specifiedwindowfram (26) Filter [codegen id : 7] Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] -Condition : (isnotnull(avg_monthly_sales#23) AND ((avg_monthly_sales#23 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Condition : CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) ELSE false END (27) Project [codegen id : 7] Output [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt index 51d52f254de03..60bae96e73bfe 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt @@ -146,7 +146,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_manager_id#5, specifiedwindowfram (26) Filter [codegen id : 7] Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] -Condition : (isnotnull(avg_monthly_sales#23) AND ((avg_monthly_sales#23 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Condition : CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) ELSE false END (27) Project [codegen id : 7] Output [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt index e538e2ce20a8c..2c9e15e5c0bb4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt @@ -98,7 +98,7 @@ Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] @@ -106,7 +106,7 @@ Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_coun (16) Filter [codegen id : 3] Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] -Condition : (((((isnotnull(hd_vehicle_count#13) AND isnotnull(hd_dep_count#12)) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = unknown ))) AND (hd_vehicle_count#13 > 0)) AND ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.0)) AND isnotnull(hd_demo_sk#10)) +Condition : ((((isnotnull(hd_vehicle_count#13) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = unknown ))) AND (hd_vehicle_count#13 > 0)) AND CASE WHEN (hd_vehicle_count#13 > 0) THEN ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.0) END) AND isnotnull(hd_demo_sk#10)) (17) Project [codegen id : 3] Output [1]: [hd_demo_sk#10] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt index 33299bf87e5ab..2f3dc4ebef30f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt @@ -49,7 +49,7 @@ WholeStageCodegen (10) BroadcastExchange #6 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt index 70e0397d07a0f..9c15e3f4d4343 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt @@ -95,7 +95,7 @@ Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] @@ -103,7 +103,7 @@ Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_coun (16) Filter [codegen id : 3] Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] -Condition : (((((isnotnull(hd_vehicle_count#13) AND isnotnull(hd_dep_count#12)) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = unknown ))) AND (hd_vehicle_count#13 > 0)) AND ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.0)) AND isnotnull(hd_demo_sk#10)) +Condition : ((((isnotnull(hd_vehicle_count#13) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = unknown ))) AND (hd_vehicle_count#13 > 0)) AND CASE WHEN (hd_vehicle_count#13 > 0) THEN ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.0) END) AND isnotnull(hd_demo_sk#10)) (17) Project [codegen id : 3] Output [1]: [hd_demo_sk#10] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt index 311b101daa4a6..667bc0b2f4e93 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt @@ -43,7 +43,7 @@ WholeStageCodegen (7) BroadcastExchange #5 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt index 6dcaa4bcf93f2..f48b7c096831d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt @@ -141,7 +141,7 @@ Arguments: [avg(_w0#19) windowspecdefinition(i_category#4, i_brand#2, s_store_na (25) Filter [codegen id : 7] Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20] -Condition : (isnotnull(avg_monthly_sales#20) AND (NOT (avg_monthly_sales#20 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Condition : CASE WHEN NOT (avg_monthly_sales#20 = 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END (26) Project [codegen id : 7] Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt index fc949bd963e6f..0c89c4ddcec02 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt @@ -141,7 +141,7 @@ Arguments: [avg(_w0#19) windowspecdefinition(i_category#4, i_brand#2, s_store_na (25) Filter [codegen id : 7] Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20] -Condition : (isnotnull(avg_monthly_sales#20) AND (NOT (avg_monthly_sales#20 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Condition : CASE WHEN NOT (avg_monthly_sales#20 = 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END (26) Project [codegen id : 7] Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt index 503bfdb0233cd..8e9ad05d3aeb1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt @@ -98,7 +98,7 @@ Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] @@ -106,7 +106,7 @@ Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_coun (16) Filter [codegen id : 3] Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] -Condition : (((((isnotnull(hd_vehicle_count#13) AND isnotnull(hd_dep_count#12)) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = unknown ))) AND (hd_vehicle_count#13 > 0)) AND ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.2)) AND isnotnull(hd_demo_sk#10)) +Condition : ((((isnotnull(hd_vehicle_count#13) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = unknown ))) AND (hd_vehicle_count#13 > 0)) AND CASE WHEN (hd_vehicle_count#13 > 0) THEN ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.2) END) AND isnotnull(hd_demo_sk#10)) (17) Project [codegen id : 3] Output [1]: [hd_demo_sk#10] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt index 24279b0e0b3d5..0ce260795d12f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt @@ -49,7 +49,7 @@ WholeStageCodegen (10) BroadcastExchange #6 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt index 41141558a807a..90497aab731d3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt @@ -95,7 +95,7 @@ Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] -PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] +PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] @@ -103,7 +103,7 @@ Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_coun (16) Filter [codegen id : 3] Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] -Condition : (((((isnotnull(hd_vehicle_count#13) AND isnotnull(hd_dep_count#12)) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = unknown ))) AND (hd_vehicle_count#13 > 0)) AND ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.2)) AND isnotnull(hd_demo_sk#10)) +Condition : ((((isnotnull(hd_vehicle_count#13) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = unknown ))) AND (hd_vehicle_count#13 > 0)) AND CASE WHEN (hd_vehicle_count#13 > 0) THEN ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.2) END) AND isnotnull(hd_demo_sk#10)) (17) Project [codegen id : 3] Output [1]: [hd_demo_sk#10] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt index 4dd6079a7ecae..e9e68105865be 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt @@ -43,7 +43,7 @@ WholeStageCodegen (7) BroadcastExchange #5 WholeStageCodegen (3) Project [hd_demo_sk] - Filter [hd_vehicle_count,hd_dep_count,hd_buy_potential,hd_demo_sk] + Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk] ColumnarToRow InputAdapter Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt index 6d94222679ec3..e8a68d621765f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt @@ -186,7 +186,7 @@ Arguments: [avg(_w0#19) windowspecdefinition(i_category#14, i_brand#13, s_store_ (30) Filter [codegen id : 11] Input [10]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19, rn#20, avg_monthly_sales#21] -Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) +Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND CASE WHEN (avg_monthly_sales#21 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END) (31) Project [codegen id : 11] Output [9]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, avg_monthly_sales#21, rn#20] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt index 5cd32d2922d3c..ab9b77186a546 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt @@ -167,7 +167,7 @@ Arguments: [avg(_w0#19) windowspecdefinition(i_category#3, i_brand#2, s_store_na (27) Filter [codegen id : 22] Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19, rn#20, avg_monthly_sales#21] -Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) +Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND CASE WHEN (avg_monthly_sales#21 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END) (28) Project [codegen id : 22] Output [9]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, avg_monthly_sales#21, rn#20] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt index 857e754bf67d7..73d88eab9927a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt @@ -186,7 +186,7 @@ Arguments: [avg(_w0#18) windowspecdefinition(i_category#13, i_brand#12, cc_name# (30) Filter [codegen id : 11] Input [9]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] -Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) +Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND CASE WHEN (avg_monthly_sales#20 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END) (31) Project [codegen id : 11] Output [8]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, avg_monthly_sales#20, rn#19] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt index 0fa6debb223ab..8ff83aef0cb05 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt @@ -167,7 +167,7 @@ Arguments: [avg(_w0#18) windowspecdefinition(i_category#3, i_brand#2, cc_name#13 (27) Filter [codegen id : 22] Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] -Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) +Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND CASE WHEN (avg_monthly_sales#20 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END) (28) Project [codegen id : 22] Output [8]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, avg_monthly_sales#20, rn#19] From b066561cba01ef1ddc2dc8c5e21ef54cc22bfe08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn?= Date: Mon, 5 Sep 2022 19:51:40 -0500 Subject: [PATCH 463/535] [SPARK-40326][BUILD] Upgrade `fasterxml.jackson.version` to 2.13.4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit upgrade `com.fasterxml.jackson.dataformat:jackson-dataformat-yaml` and `fasterxml.jackson.databind.version` from 2.13.3 to 2.13.4 [CVE-2022-25857](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2022-25857) [SNYK-JAVA-ORGYAML](https://security.snyk.io/vuln/SNYK-JAVA-ORGYAML-2806360) No. Pass GA Closes #37796 from bjornjorgensen/upgrade-fasterxml.jackson-to-2.13.4. Authored-by: Bjørn Signed-off-by: Sean Owen (cherry picked from commit a82a006df80ac3aa6900d8688eb5bf77b804785d) Signed-off-by: Sean Owen --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 16 ++++++++-------- dev/deps/spark-deps-hadoop-3-hive-2.3 | 16 ++++++++-------- pom.xml | 4 ++-- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index 8a600122b4e55..8208f90efe6c1 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -112,16 +112,16 @@ httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.14//httpcore-4.4.14.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.5.0//ivy-2.5.0.jar -jackson-annotations/2.13.3//jackson-annotations-2.13.3.jar +jackson-annotations/2.13.4//jackson-annotations-2.13.4.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar -jackson-core/2.13.3//jackson-core-2.13.3.jar -jackson-databind/2.13.3//jackson-databind-2.13.3.jar -jackson-dataformat-cbor/2.13.3//jackson-dataformat-cbor-2.13.3.jar -jackson-dataformat-yaml/2.13.3//jackson-dataformat-yaml-2.13.3.jar -jackson-datatype-jsr310/2.13.3//jackson-datatype-jsr310-2.13.3.jar +jackson-core/2.13.4//jackson-core-2.13.4.jar +jackson-databind/2.13.4//jackson-databind-2.13.4.jar +jackson-dataformat-cbor/2.13.4//jackson-dataformat-cbor-2.13.4.jar +jackson-dataformat-yaml/2.13.4//jackson-dataformat-yaml-2.13.4.jar +jackson-datatype-jsr310/2.13.4//jackson-datatype-jsr310-2.13.4.jar jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar -jackson-module-scala_2.12/2.13.3//jackson-module-scala_2.12-2.13.3.jar +jackson-module-scala_2.12/2.13.4//jackson-module-scala_2.12-2.13.4.jar jackson-xc/1.9.13//jackson-xc-1.9.13.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar @@ -245,7 +245,7 @@ scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar shapeless_2.12/2.3.7//shapeless_2.12-2.3.7.jar shims/0.9.25//shims-0.9.25.jar slf4j-api/1.7.32//slf4j-api-1.7.32.jar -snakeyaml/1.30//snakeyaml-1.30.jar +snakeyaml/1.31//snakeyaml-1.31.jar snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index e36d4f2d91185..04be0c1d7d647 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -102,15 +102,15 @@ httpcore/4.4.14//httpcore-4.4.14.jar ini4j/0.5.4//ini4j-0.5.4.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.5.0//ivy-2.5.0.jar -jackson-annotations/2.13.3//jackson-annotations-2.13.3.jar +jackson-annotations/2.13.4//jackson-annotations-2.13.4.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar -jackson-core/2.13.3//jackson-core-2.13.3.jar -jackson-databind/2.13.3//jackson-databind-2.13.3.jar -jackson-dataformat-cbor/2.13.3//jackson-dataformat-cbor-2.13.3.jar -jackson-dataformat-yaml/2.13.3//jackson-dataformat-yaml-2.13.3.jar -jackson-datatype-jsr310/2.13.3//jackson-datatype-jsr310-2.13.3.jar +jackson-core/2.13.4//jackson-core-2.13.4.jar +jackson-databind/2.13.4//jackson-databind-2.13.4.jar +jackson-dataformat-cbor/2.13.4//jackson-dataformat-cbor-2.13.4.jar +jackson-dataformat-yaml/2.13.4//jackson-dataformat-yaml-2.13.4.jar +jackson-datatype-jsr310/2.13.4//jackson-datatype-jsr310-2.13.4.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar -jackson-module-scala_2.12/2.13.3//jackson-module-scala_2.12-2.13.3.jar +jackson-module-scala_2.12/2.13.4//jackson-module-scala_2.12-2.13.4.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar @@ -234,7 +234,7 @@ scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar shapeless_2.12/2.3.7//shapeless_2.12-2.3.7.jar shims/0.9.25//shims-0.9.25.jar slf4j-api/1.7.32//slf4j-api-1.7.32.jar -snakeyaml/1.30//snakeyaml-1.30.jar +snakeyaml/1.31//snakeyaml-1.31.jar snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar diff --git a/pom.xml b/pom.xml index 9f4d878e22946..d594fcca8c077 100644 --- a/pom.xml +++ b/pom.xml @@ -171,8 +171,8 @@ true 1.9.13 - 2.13.3 - 2.13.3 + 2.13.4 + 2.13.4 1.1.8.4 1.1.2 2.2.1 From 9473840bac8da0e92587a8d0edb5ac86757637c7 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Tue, 6 Sep 2022 12:34:39 +0800 Subject: [PATCH 464/535] [SPARK-38404][SQL][3.3] Improve CTE resolution when a nested CTE references an outer CTE ### What changes were proposed in this pull request? Please note that the bug in the [SPARK-38404](https://issues.apache.org/jira/browse/SPARK-38404) is fixed already with https://github.com/apache/spark/pull/34929. This PR is a minor improvement to the current implementation by collecting already resolved outer CTEs to avoid re-substituting already collected CTE definitions. ### Why are the changes needed? Small improvement + additional tests. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added new test case. Closes #37760 from peter-toth/SPARK-38404-nested-cte-references-outer-cte-3.3. Authored-by: Peter Toth Signed-off-by: Wenchen Fan --- .../catalyst/analysis/CTESubstitution.scala | 71 +++++++++---------- .../resources/sql-tests/inputs/cte-nested.sql | 13 +++- .../sql-tests/results/cte-legacy.sql.out | 19 ++++- .../sql-tests/results/cte-nested.sql.out | 18 ++++- .../sql-tests/results/cte-nonlegacy.sql.out | 18 ++++- 5 files changed, 96 insertions(+), 43 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala index 976a5d385d874..62ebfa8343181 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.analysis -import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer import org.apache.spark.sql.catalyst.expressions.SubqueryExpression import org.apache.spark.sql.catalyst.plans.logical.{Command, CTERelationDef, CTERelationRef, InsertIntoDir, LogicalPlan, ParsedStatement, SubqueryAlias, UnresolvedWith, WithCTE} @@ -55,27 +55,27 @@ object CTESubstitution extends Rule[LogicalPlan] { case _: Command | _: ParsedStatement | _: InsertIntoDir => true case _ => false } - val cteDefs = mutable.ArrayBuffer.empty[CTERelationDef] + val cteDefs = ArrayBuffer.empty[CTERelationDef] val (substituted, lastSubstituted) = LegacyBehaviorPolicy.withName(conf.getConf(LEGACY_CTE_PRECEDENCE_POLICY)) match { case LegacyBehaviorPolicy.EXCEPTION => assertNoNameConflictsInCTE(plan) - traverseAndSubstituteCTE(plan, isCommand, cteDefs) + traverseAndSubstituteCTE(plan, isCommand, Seq.empty, cteDefs) case LegacyBehaviorPolicy.LEGACY => (legacyTraverseAndSubstituteCTE(plan, cteDefs), None) case LegacyBehaviorPolicy.CORRECTED => - traverseAndSubstituteCTE(plan, isCommand, cteDefs) + traverseAndSubstituteCTE(plan, isCommand, Seq.empty, cteDefs) } if (cteDefs.isEmpty) { substituted } else if (substituted eq lastSubstituted.get) { - WithCTE(substituted, cteDefs.sortBy(_.id).toSeq) + WithCTE(substituted, cteDefs.toSeq) } else { var done = false substituted.resolveOperatorsWithPruning(_ => !done) { case p if p eq lastSubstituted.get => done = true - WithCTE(p, cteDefs.sortBy(_.id).toSeq) + WithCTE(p, cteDefs.toSeq) } } } @@ -98,7 +98,7 @@ object CTESubstitution extends Rule[LogicalPlan] { val resolver = conf.resolver plan match { case UnresolvedWith(child, relations) => - val newNames = mutable.ArrayBuffer.empty[String] + val newNames = ArrayBuffer.empty[String] newNames ++= outerCTERelationNames relations.foreach { case (name, relation) => @@ -121,11 +121,11 @@ object CTESubstitution extends Rule[LogicalPlan] { private def legacyTraverseAndSubstituteCTE( plan: LogicalPlan, - cteDefs: mutable.ArrayBuffer[CTERelationDef]): LogicalPlan = { + cteDefs: ArrayBuffer[CTERelationDef]): LogicalPlan = { plan.resolveOperatorsUp { case UnresolvedWith(child, relations) => val resolvedCTERelations = - resolveCTERelations(relations, isLegacy = true, isCommand = false, cteDefs) + resolveCTERelations(relations, isLegacy = true, isCommand = false, Seq.empty, cteDefs) substituteCTE(child, alwaysInline = true, resolvedCTERelations) } } @@ -170,21 +170,23 @@ object CTESubstitution extends Rule[LogicalPlan] { * SELECT * FROM t * ) * @param plan the plan to be traversed - * @return the plan where CTE substitution is applied + * @param isCommand if this is a command + * @param outerCTEDefs already resolved outer CTE definitions with names + * @param cteDefs all accumulated CTE definitions + * @return the plan where CTE substitution is applied and optionally the last substituted `With` + * where CTE definitions will be gathered to */ private def traverseAndSubstituteCTE( plan: LogicalPlan, isCommand: Boolean, - cteDefs: mutable.ArrayBuffer[CTERelationDef]): (LogicalPlan, Option[LogicalPlan]) = { + outerCTEDefs: Seq[(String, CTERelationDef)], + cteDefs: ArrayBuffer[CTERelationDef]): (LogicalPlan, Option[LogicalPlan]) = { var lastSubstituted: Option[LogicalPlan] = None val newPlan = plan.resolveOperatorsUpWithPruning( _.containsAnyPattern(UNRESOLVED_WITH, PLAN_EXPRESSION)) { case UnresolvedWith(child: LogicalPlan, relations) => val resolvedCTERelations = - resolveCTERelations(relations, isLegacy = false, isCommand, cteDefs) - if (!isCommand) { - cteDefs ++= resolvedCTERelations.map(_._2) - } + resolveCTERelations(relations, isLegacy = false, isCommand, outerCTEDefs, cteDefs) lastSubstituted = Some(substituteCTE(child, isCommand, resolvedCTERelations)) lastSubstituted.get @@ -200,10 +202,14 @@ object CTESubstitution extends Rule[LogicalPlan] { relations: Seq[(String, SubqueryAlias)], isLegacy: Boolean, isCommand: Boolean, - cteDefs: mutable.ArrayBuffer[CTERelationDef]): Seq[(String, CTERelationDef)] = { - val resolvedCTERelations = new mutable.ArrayBuffer[(String, CTERelationDef)](relations.size) + outerCTEDefs: Seq[(String, CTERelationDef)], + cteDefs: ArrayBuffer[CTERelationDef]): Seq[(String, CTERelationDef)] = { + var resolvedCTERelations = if (isLegacy || isCommand) { + Seq.empty + } else { + outerCTEDefs + } for ((name, relation) <- relations) { - val lastCTEDefCount = cteDefs.length val innerCTEResolved = if (isLegacy) { // In legacy mode, outer CTE relations take precedence. Here we don't resolve the inner // `With` nodes, later we will substitute `UnresolvedRelation`s with outer CTE relations. @@ -221,31 +227,18 @@ object CTESubstitution extends Rule[LogicalPlan] { // WITH t3 AS (SELECT * FROM t1) // ) // t3 should resolve the t1 to `SELECT 2` instead of `SELECT 1`. - traverseAndSubstituteCTE(relation, isCommand, cteDefs)._1 - } - - if (cteDefs.length > lastCTEDefCount) { - // We have added more CTE relations to the `cteDefs` from the inner CTE, and these relations - // should also be substituted with `resolvedCTERelations` as inner CTE relation can refer to - // outer CTE relation. For example: - // WITH t1 AS (SELECT 1) - // t2 AS ( - // WITH t3 AS (SELECT * FROM t1) - // ) - for (i <- lastCTEDefCount until cteDefs.length) { - val substituted = - substituteCTE(cteDefs(i).child, isLegacy || isCommand, resolvedCTERelations.toSeq) - cteDefs(i) = cteDefs(i).copy(child = substituted) - } + traverseAndSubstituteCTE(relation, isCommand, resolvedCTERelations, cteDefs)._1 } - // CTE definition can reference a previous one - val substituted = - substituteCTE(innerCTEResolved, isLegacy || isCommand, resolvedCTERelations.toSeq) + val substituted = substituteCTE(innerCTEResolved, isLegacy || isCommand, resolvedCTERelations) val cteRelation = CTERelationDef(substituted) - resolvedCTERelations += (name -> cteRelation) + if (!(isLegacy || isCommand)) { + cteDefs += cteRelation + } + // Prepending new CTEs makes sure that those have higher priority over outer ones. + resolvedCTERelations +:= (name -> cteRelation) } - resolvedCTERelations.toSeq + resolvedCTERelations } private def substituteCTE( diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql b/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql index 3b64b5daa82db..b5d7fa5687bc4 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql @@ -135,4 +135,15 @@ WITH abc AS (SELECT 1) SELECT ( WITH aBc AS (SELECT 2) SELECT * FROM aBC -); \ No newline at end of file +); + +-- SPARK-38404: CTE in CTE definition references outer +WITH + t1 AS (SELECT 1), + t2 AS ( + WITH t3 AS ( + SELECT * FROM t1 + ) + SELECT * FROM t3 + ) +SELECT * FROM t2; \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out index 4d0e5ea829d3f..db7d420a745cc 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 16 +-- Number of queries: 17 -- !query @@ -219,3 +219,20 @@ SELECT ( struct -- !query output 1 + + +-- !query +WITH + t1 AS (SELECT 1), + t2 AS ( + WITH t3 AS ( + SELECT * FROM t1 + ) + SELECT * FROM t3 + ) +SELECT * FROM t2 +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Table or view not found: t1; line 5 pos 20 diff --git a/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out index a8db4599dafcc..f714a11d1df3c 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 16 +-- Number of queries: 17 -- !query @@ -227,3 +227,19 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException Name aBc is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228. + + +-- !query +WITH + t1 AS (SELECT 1), + t2 AS ( + WITH t3 AS ( + SELECT * FROM t1 + ) + SELECT * FROM t3 + ) +SELECT * FROM t2 +-- !query schema +struct<1:int> +-- !query output +1 diff --git a/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out index 74394ee3ffc89..2ab13003d04dd 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 16 +-- Number of queries: 17 -- !query @@ -219,3 +219,19 @@ SELECT ( struct -- !query output 2 + + +-- !query +WITH + t1 AS (SELECT 1), + t2 AS ( + WITH t3 AS ( + SELECT * FROM t1 + ) + SELECT * FROM t3 + ) +SELECT * FROM t2 +-- !query schema +struct<1:int> +-- !query output +1 From 1324f7d16802e684c04b3773c84be57667452a0b Mon Sep 17 00:00:00 2001 From: Maryann Xue Date: Thu, 1 Sep 2022 22:03:58 +0800 Subject: [PATCH 465/535] [SPARK-40297][SQL] CTE outer reference nested in CTE main body cannot be resolved This PR fixes a bug where a CTE reference cannot be resolved if this reference occurs in an inner CTE definition nested in the outer CTE's main body FROM clause. E.g., ``` WITH cte_outer AS ( SELECT 1 ) SELECT * FROM ( WITH cte_inner AS ( SELECT * FROM cte_outer ) SELECT * FROM cte_inner ) ``` This fix is to change the `CTESubstitution`'s traverse order from `resolveOperatorsUpWithPruning` to `resolveOperatorsDownWithPruning` and also to recursively call `traverseAndSubstituteCTE` for CTE main body. Bug fix. Without the fix an `AnalysisException` would be thrown for CTE queries mentioned above. No. Added UTs. Closes #37751 from maryannxue/spark-40297. Authored-by: Maryann Xue Signed-off-by: Wenchen Fan --- .../catalyst/analysis/CTESubstitution.scala | 30 +++- .../resources/sql-tests/inputs/cte-nested.sql | 59 ++++++- .../sql-tests/results/cte-legacy.sql.out | 80 +++++++++ .../sql-tests/results/cte-nested.sql.out | 79 +++++++++ .../sql-tests/results/cte-nonlegacy.sql.out | 79 +++++++++ .../org/apache/spark/sql/CTEInlineSuite.scala | 160 +++++++++++++++++- 6 files changed, 476 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala index 62ebfa8343181..6a4562450b99c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala @@ -56,7 +56,7 @@ object CTESubstitution extends Rule[LogicalPlan] { case _ => false } val cteDefs = ArrayBuffer.empty[CTERelationDef] - val (substituted, lastSubstituted) = + val (substituted, firstSubstituted) = LegacyBehaviorPolicy.withName(conf.getConf(LEGACY_CTE_PRECEDENCE_POLICY)) match { case LegacyBehaviorPolicy.EXCEPTION => assertNoNameConflictsInCTE(plan) @@ -68,12 +68,17 @@ object CTESubstitution extends Rule[LogicalPlan] { } if (cteDefs.isEmpty) { substituted - } else if (substituted eq lastSubstituted.get) { + } else if (substituted eq firstSubstituted.get) { WithCTE(substituted, cteDefs.toSeq) } else { var done = false substituted.resolveOperatorsWithPruning(_ => !done) { - case p if p eq lastSubstituted.get => + case p if p eq firstSubstituted.get => + // `firstSubstituted` is the parent of all other CTEs (if any). + done = true + WithCTE(p, cteDefs.toSeq) + case p if p.children.count(_.containsPattern(CTE)) > 1 => + // This is the first common parent of all CTEs. done = true WithCTE(p, cteDefs.toSeq) } @@ -181,21 +186,28 @@ object CTESubstitution extends Rule[LogicalPlan] { isCommand: Boolean, outerCTEDefs: Seq[(String, CTERelationDef)], cteDefs: ArrayBuffer[CTERelationDef]): (LogicalPlan, Option[LogicalPlan]) = { - var lastSubstituted: Option[LogicalPlan] = None - val newPlan = plan.resolveOperatorsUpWithPruning( + var firstSubstituted: Option[LogicalPlan] = None + val newPlan = plan.resolveOperatorsDownWithPruning( _.containsAnyPattern(UNRESOLVED_WITH, PLAN_EXPRESSION)) { case UnresolvedWith(child: LogicalPlan, relations) => val resolvedCTERelations = - resolveCTERelations(relations, isLegacy = false, isCommand, outerCTEDefs, cteDefs) - lastSubstituted = Some(substituteCTE(child, isCommand, resolvedCTERelations)) - lastSubstituted.get + resolveCTERelations(relations, isLegacy = false, isCommand, outerCTEDefs, cteDefs) ++ + outerCTEDefs + val substituted = substituteCTE( + traverseAndSubstituteCTE(child, isCommand, resolvedCTERelations, cteDefs)._1, + isCommand, + resolvedCTERelations) + if (firstSubstituted.isEmpty) { + firstSubstituted = Some(substituted) + } + substituted case other => other.transformExpressionsWithPruning(_.containsPattern(PLAN_EXPRESSION)) { case e: SubqueryExpression => e.withNewPlan(apply(e.plan)) } } - (newPlan, lastSubstituted) + (newPlan, firstSubstituted) } private def resolveCTERelations( diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql b/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql index b5d7fa5687bc4..5f12388b9cba2 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql @@ -146,4 +146,61 @@ WITH ) SELECT * FROM t3 ) -SELECT * FROM t2; \ No newline at end of file +SELECT * FROM t2; + +-- CTE nested in CTE main body FROM clause references outer CTE def +WITH cte_outer AS ( + SELECT 1 +) +SELECT * FROM ( + WITH cte_inner AS ( + SELECT * FROM cte_outer + ) + SELECT * FROM cte_inner +); + +-- CTE double nested in CTE main body FROM clause references outer CTE def +WITH cte_outer AS ( + SELECT 1 +) +SELECT * FROM ( + WITH cte_inner AS ( + SELECT * FROM ( + WITH cte_inner_inner AS ( + SELECT * FROM cte_outer + ) + SELECT * FROM cte_inner_inner + ) + ) + SELECT * FROM cte_inner +); + +-- Invalid reference to invisible CTE def nested CTE def +WITH cte_outer AS ( + WITH cte_invisible_inner AS ( + SELECT 1 + ) + SELECT * FROM cte_invisible_inner +) +SELECT * FROM ( + WITH cte_inner AS ( + SELECT * FROM cte_invisible_inner + ) + SELECT * FROM cte_inner +); + +-- Invalid reference to invisible CTE def nested CTE def (in FROM) +WITH cte_outer AS ( + SELECT * FROM ( + WITH cte_invisible_inner AS ( + SELECT 1 + ) + SELECT * FROM cte_invisible_inner + ) +) +SELECT * FROM ( + WITH cte_inner AS ( + SELECT * FROM cte_invisible_inner + ) + SELECT * FROM cte_inner +); \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out index db7d420a745cc..264b64ffe96aa 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out @@ -236,3 +236,83 @@ struct<> -- !query output org.apache.spark.sql.AnalysisException Table or view not found: t1; line 5 pos 20 + + +-- !query +WITH cte_outer AS ( + SELECT 1 +) +SELECT * FROM ( + WITH cte_inner AS ( + SELECT * FROM cte_outer + ) + SELECT * FROM cte_inner +) +-- !query schema +struct<1:int> +-- !query output +1 + + +-- !query +WITH cte_outer AS ( + SELECT 1 +) +SELECT * FROM ( + WITH cte_inner AS ( + SELECT * FROM ( + WITH cte_inner_inner AS ( + SELECT * FROM cte_outer + ) + SELECT * FROM cte_inner_inner + ) + ) + SELECT * FROM cte_inner +) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Table or view not found: cte_outer; line 8 pos 22 + + +-- !query +WITH cte_outer AS ( + WITH cte_invisible_inner AS ( + SELECT 1 + ) + SELECT * FROM cte_invisible_inner +) +SELECT * FROM ( + WITH cte_inner AS ( + SELECT * FROM cte_invisible_inner + ) + SELECT * FROM cte_inner +) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Table or view not found: cte_invisible_inner; line 9 pos 18 + + +-- !query +WITH cte_outer AS ( + SELECT * FROM ( + WITH cte_invisible_inner AS ( + SELECT 1 + ) + SELECT * FROM cte_invisible_inner + ) +) +SELECT * FROM ( + WITH cte_inner AS ( + SELECT * FROM cte_invisible_inner + ) + SELECT * FROM cte_inner +) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Table or view not found: cte_invisible_inner; line 11 pos 18 diff --git a/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out index f714a11d1df3c..2c622de3f36d6 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out @@ -243,3 +243,82 @@ SELECT * FROM t2 struct<1:int> -- !query output 1 + + +-- !query +WITH cte_outer AS ( + SELECT 1 +) +SELECT * FROM ( + WITH cte_inner AS ( + SELECT * FROM cte_outer + ) + SELECT * FROM cte_inner +) +-- !query schema +struct<1:int> +-- !query output +1 + + +-- !query +WITH cte_outer AS ( + SELECT 1 +) +SELECT * FROM ( + WITH cte_inner AS ( + SELECT * FROM ( + WITH cte_inner_inner AS ( + SELECT * FROM cte_outer + ) + SELECT * FROM cte_inner_inner + ) + ) + SELECT * FROM cte_inner +) +-- !query schema +struct<1:int> +-- !query output +1 + + +-- !query +WITH cte_outer AS ( + WITH cte_invisible_inner AS ( + SELECT 1 + ) + SELECT * FROM cte_invisible_inner +) +SELECT * FROM ( + WITH cte_inner AS ( + SELECT * FROM cte_invisible_inner + ) + SELECT * FROM cte_inner +) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Table or view not found: cte_invisible_inner; line 9 pos 18 + + +-- !query +WITH cte_outer AS ( + SELECT * FROM ( + WITH cte_invisible_inner AS ( + SELECT 1 + ) + SELECT * FROM cte_invisible_inner + ) +) +SELECT * FROM ( + WITH cte_inner AS ( + SELECT * FROM cte_invisible_inner + ) + SELECT * FROM cte_inner +) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Table or view not found: cte_invisible_inner; line 11 pos 18 diff --git a/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out index 2ab13003d04dd..283f5a54a422f 100644 --- a/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out @@ -235,3 +235,82 @@ SELECT * FROM t2 struct<1:int> -- !query output 1 + + +-- !query +WITH cte_outer AS ( + SELECT 1 +) +SELECT * FROM ( + WITH cte_inner AS ( + SELECT * FROM cte_outer + ) + SELECT * FROM cte_inner +) +-- !query schema +struct<1:int> +-- !query output +1 + + +-- !query +WITH cte_outer AS ( + SELECT 1 +) +SELECT * FROM ( + WITH cte_inner AS ( + SELECT * FROM ( + WITH cte_inner_inner AS ( + SELECT * FROM cte_outer + ) + SELECT * FROM cte_inner_inner + ) + ) + SELECT * FROM cte_inner +) +-- !query schema +struct<1:int> +-- !query output +1 + + +-- !query +WITH cte_outer AS ( + WITH cte_invisible_inner AS ( + SELECT 1 + ) + SELECT * FROM cte_invisible_inner +) +SELECT * FROM ( + WITH cte_inner AS ( + SELECT * FROM cte_invisible_inner + ) + SELECT * FROM cte_inner +) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Table or view not found: cte_invisible_inner; line 9 pos 18 + + +-- !query +WITH cte_outer AS ( + SELECT * FROM ( + WITH cte_invisible_inner AS ( + SELECT 1 + ) + SELECT * FROM cte_invisible_inner + ) +) +SELECT * FROM ( + WITH cte_inner AS ( + SELECT * FROM cte_invisible_inner + ) + SELECT * FROM cte_inner +) +-- !query schema +struct<> +-- !query output +org.apache.spark.sql.AnalysisException +Table or view not found: cte_invisible_inner; line 11 pos 18 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala index 7d45102ac83d3..e758c6f8df593 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql import org.apache.spark.sql.catalyst.expressions.{And, GreaterThan, LessThan, Literal, Or} -import org.apache.spark.sql.catalyst.plans.logical.{Filter, Project, RepartitionOperation, WithCTE} +import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.execution.adaptive._ import org.apache.spark.sql.execution.exchange.ReusedExchangeExec import org.apache.spark.sql.internal.SQLConf @@ -481,6 +481,164 @@ abstract class CTEInlineSuiteBase } } } + + test("Make sure CTESubstitution places WithCTE back in the plan correctly.") { + withView("t") { + Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t") + + // CTE on both sides of join - WithCTE placed over first common parent, i.e., the join. + val df1 = sql( + s""" + |select count(v1.c3), count(v2.c3) from ( + | with + | v1 as ( + | select c1, c2, rand() c3 from t + | ) + | select * from v1 + |) v1 join ( + | with + | v2 as ( + | select c1, c2, rand() c3 from t + | ) + | select * from v2 + |) v2 on v1.c1 = v2.c1 + """.stripMargin) + checkAnswer(df1, Row(2, 2) :: Nil) + df1.queryExecution.analyzed match { + case Aggregate(_, _, WithCTE(_, cteDefs)) => assert(cteDefs.length == 2) + case other => fail(s"Expect pattern Aggregate(WithCTE(_)) but got $other") + } + + // CTE on one side of join - WithCTE placed back where it was. + val df2 = sql( + s""" + |select count(v1.c3), count(v2.c3) from ( + | select c1, c2, rand() c3 from t + |) v1 join ( + | with + | v2 as ( + | select c1, c2, rand() c3 from t + | ) + | select * from v2 + |) v2 on v1.c1 = v2.c1 + """.stripMargin) + checkAnswer(df2, Row(2, 2) :: Nil) + df2.queryExecution.analyzed match { + case Aggregate(_, _, Join(_, SubqueryAlias(_, WithCTE(_, cteDefs)), _, _, _)) => + assert(cteDefs.length == 1) + case other => fail(s"Expect pattern Aggregate(Join(_, WithCTE(_))) but got $other") + } + + // CTE on one side of join and both sides of union - WithCTE placed on first common parent. + val df3 = sql( + s""" + |select count(v1.c3), count(v2.c3) from ( + | select c1, c2, rand() c3 from t + |) v1 join ( + | select * from ( + | with + | v1 as ( + | select c1, c2, rand() c3 from t + | ) + | select * from v1 + | ) + | union all + | select * from ( + | with + | v2 as ( + | select c1, c2, rand() c3 from t + | ) + | select * from v2 + | ) + |) v2 on v1.c1 = v2.c1 + """.stripMargin) + checkAnswer(df3, Row(4, 4) :: Nil) + df3.queryExecution.analyzed match { + case Aggregate(_, _, Join(_, SubqueryAlias(_, WithCTE(_: Union, cteDefs)), _, _, _)) => + assert(cteDefs.length == 2) + case other => fail( + s"Expect pattern Aggregate(Join(_, (WithCTE(Union(_, _))))) but got $other") + } + + // CTE on one side of join and one side of union - WithCTE placed back where it was. + val df4 = sql( + s""" + |select count(v1.c3), count(v2.c3) from ( + | select c1, c2, rand() c3 from t + |) v1 join ( + | select * from ( + | with + | v1 as ( + | select c1, c2, rand() c3 from t + | ) + | select * from v1 + | ) + | union all + | select c1, c2, rand() c3 from t + |) v2 on v1.c1 = v2.c1 + """.stripMargin) + checkAnswer(df4, Row(4, 4) :: Nil) + df4.queryExecution.analyzed match { + case Aggregate(_, _, Join(_, SubqueryAlias(_, Union(children, _, _)), _, _, _)) + if children.head.find(_.isInstanceOf[WithCTE]).isDefined => + assert( + children.head.collect { + case w: WithCTE => w + }.head.cteDefs.length == 1) + case other => fail( + s"Expect pattern Aggregate(Join(_, (WithCTE(Union(_, _))))) but got $other") + } + + // CTE on both sides of join and one side of union - WithCTE placed on first common parent. + val df5 = sql( + s""" + |select count(v1.c3), count(v2.c3) from ( + | with + | v1 as ( + | select c1, c2, rand() c3 from t + | ) + | select * from v1 + |) v1 join ( + | select c1, c2, rand() c3 from t + | union all + | select * from ( + | with + | v2 as ( + | select c1, c2, rand() c3 from t + | ) + | select * from v2 + | ) + |) v2 on v1.c1 = v2.c1 + """.stripMargin) + checkAnswer(df5, Row(4, 4) :: Nil) + df5.queryExecution.analyzed match { + case Aggregate(_, _, WithCTE(_, cteDefs)) => assert(cteDefs.length == 2) + case other => fail(s"Expect pattern Aggregate(WithCTE(_)) but got $other") + } + + // CTE as root node - WithCTE placed back where it was. + val df6 = sql( + s""" + |with + |v1 as ( + | select c1, c2, rand() c3 from t + |) + |select count(v1.c3), count(v2.c3) from + |v1 join ( + | with + | v2 as ( + | select c1, c2, rand() c3 from t + | ) + | select * from v2 + |) v2 on v1.c1 = v2.c1 + """.stripMargin) + checkAnswer(df6, Row(2, 2) :: Nil) + df6.queryExecution.analyzed match { + case WithCTE(_, cteDefs) => assert(cteDefs.length == 2) + case other => fail(s"Expect pattern WithCTE(_) but got $other") + } + } + } } class CTEInlineSuiteAEOff extends CTEInlineSuiteBase with DisableAdaptiveExecutionSuite From 84c091845d46375b54014538035864744d7d4399 Mon Sep 17 00:00:00 2001 From: Carmen Kwan Date: Tue, 6 Sep 2022 21:11:24 +0800 Subject: [PATCH 466/535] [SPARK-40315][SQL] Add hashCode() for Literal of ArrayBasedMapData ### What changes were proposed in this pull request? There is no explicit `hashCode()` function override for `ArrayBasedMapData`. As a result, there is a non-deterministic error where the `hashCode()` computed for `Literal`s of `ArrayBasedMapData` can be different for two equal objects (`Literal`s of `ArrayBasedMapData` with equal keys and values). In this PR, we add a `hashCode` function so that it works exactly as we expect. ### Why are the changes needed? This is a bug fix for a non-deterministic error. It is also more consistent with the rest of Spark if we implement the `hashCode` method instead of relying on defaults. We can't add the `hashCode` directly to `ArrayBasedMapData` because of SPARK-9415. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? A simple unit test was added. Closes #37807 from c27kwan/SPARK-40315-lit. Authored-by: Carmen Kwan Signed-off-by: Wenchen Fan (cherry picked from commit e85a4ffbdfa063c8da91b23dfbde77e2f9ed62e9) Signed-off-by: Wenchen Fan --- .../sql/catalyst/expressions/literals.scala | 3 +++ .../expressions/ComplexTypeSuite.scala | 26 +++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala index 6262bdef7f799..3195d7667377e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala @@ -374,6 +374,9 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression { val valueHashCode = value match { case null => 0 case binary: Array[Byte] => util.Arrays.hashCode(binary) + // SPARK-40315: Literals of ArrayBasedMapData should have deterministic hashCode. + case arrayBasedMapData: ArrayBasedMapData => + arrayBasedMapData.keyArray.hashCode() * 37 + arrayBasedMapData.valueArray.hashCode() case other => other.hashCode() } 31 * Objects.hashCode(dataType) + valueHashCode diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala index 3ab8afcac1e8c..a2d62e9ded4fa 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala @@ -526,4 +526,30 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { assert(m1.semanticEquals(m2)) } + + test("SPARK-40315: Literals of ArrayBasedMapData should have deterministic hashCode.") { + val keys = new Array[UTF8String](1) + val values1 = new Array[UTF8String](1) + val values2 = new Array[UTF8String](1) + + keys(0) = UTF8String.fromString("key") + values1(0) = UTF8String.fromString("value1") + values2(0) = UTF8String.fromString("value2") + + val d1 = new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values1)) + val d2 = new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values1)) + val d3 = new ArrayBasedMapData(new GenericArrayData(keys), new GenericArrayData(values2)) + val m1 = Literal.create(d1, MapType(StringType, StringType)) + val m2 = Literal.create(d2, MapType(StringType, StringType)) + val m3 = Literal.create(d3, MapType(StringType, StringType)) + + // If two Literals of ArrayBasedMapData have the same elements, we expect them to be equal and + // to have the same hashCode(). + assert(m1 == m2) + assert(m1.hashCode() == m2.hashCode()) + // If two Literals of ArrayBasedMapData have different elements, we expect them not to be equal + // and to have different hashCode(). + assert(m1 != m3) + assert(m1.hashCode() != m3.hashCode()) + } } From 433469f284ee24150f6cff4005d39a70e91cc4d9 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 7 Sep 2022 18:45:20 +0800 Subject: [PATCH 467/535] [SPARK-40149][SQL] Propagate metadata columns through Project This PR fixes a regression caused by https://github.com/apache/spark/pull/32017 . In https://github.com/apache/spark/pull/32017 , we tried to be more conservative and decided to not propagate metadata columns in certain operators, including `Project`. However, the decision was made only considering SQL API, not DataFrame API. In fact, it's very common to chain `Project` operators in DataFrame, e.g. `df.withColumn(...).withColumn(...)...`, and it's very inconvenient if metadata columns are not propagated through `Project`. This PR makes 2 changes: 1. Project should propagate metadata columns 2. SubqueryAlias should only propagate metadata columns if the child is a leaf node or also a SubqueryAlias The second change is needed to still forbid weird queries like `SELECT m from (SELECT a from t)`, which is the main motivation of https://github.com/apache/spark/pull/32017 . After propagating metadata columns, a problem from https://github.com/apache/spark/pull/31666 is exposed: the natural join metadata columns may confuse the analyzer and lead to wrong analyzed plan. For example, `SELECT t1.value FROM t1 LEFT JOIN t2 USING (key) ORDER BY key`, how shall we resolve `ORDER BY key`? It should be resolved to `t1.key` via the rule `ResolveMissingReferences`, which is in the output of the left join. However, if `Project` can propagate metadata columns, `ORDER BY key` will be resolved to `t2.key`. To solve this problem, this PR only allows qualified access for metadata columns of natural join. This has no breaking change, as people can only do qualified access for natural join metadata columns before, in the `Project` right after `Join`. This actually enables more use cases, as people can now access natural join metadata columns in ORDER BY. I've added a test for it. fix a regression For SQL API, there is no change, as a `SubqueryAlias` always comes with a `Project` or `Aggregate`, so we still don't propagate metadata columns through a SELECT group. For DataFrame API, the behavior becomes more lenient. The only breaking case is an operator that can propagate metadata columns then follows a `SubqueryAlias`, e.g. `df.filter(...).as("t").select("t.metadata_col")`. But this is a weird use case and I don't think we should support it at the first place. new tests Closes #37758 from cloud-fan/metadata. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan (cherry picked from commit 99ae1d9a897909990881f14c5ea70a0d1a0bf456) Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 8 +- .../sql/catalyst/analysis/unresolved.scala | 2 +- .../sql/catalyst/expressions/package.scala | 13 +- .../plans/logical/basicLogicalOperators.scala | 13 +- .../spark/sql/catalyst/util/package.scala | 15 +- .../resources/sql-tests/inputs/using-join.sql | 2 + .../sql-tests/results/using-join.sql.out | 11 + .../sql/connector/DataSourceV2SQLSuite.scala | 242 ------------------ .../sql/connector/MetadataColumnSuite.scala | 219 ++++++++++++++++ 9 files changed, 263 insertions(+), 262 deletions(-) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 37024e15377ef..3a3997ff9c722 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -967,9 +967,11 @@ class Analyzer(override val catalogManager: CatalogManager) private def addMetadataCol(plan: LogicalPlan): LogicalPlan = plan match { case s: ExposesMetadataColumns => s.withMetadataColumns() case p: Project => - p.copy( + val newProj = p.copy( projectList = p.metadataOutput ++ p.projectList, child = addMetadataCol(p.child)) + newProj.copyTagsFrom(p) + newProj case _ => plan.withNewChildren(plan.children.map(addMetadataCol)) } } @@ -3475,8 +3477,8 @@ class Analyzer(override val catalogManager: CatalogManager) val project = Project(projectList, Join(left, right, joinType, newCondition, hint)) project.setTagValue( Project.hiddenOutputTag, - hiddenList.map(_.markAsSupportsQualifiedStar()) ++ - project.child.metadataOutput.filter(_.supportsQualifiedStar)) + hiddenList.map(_.markAsQualifiedAccessOnly()) ++ + project.child.metadataOutput.filter(_.qualifiedAccessOnly)) project } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index 9d24ae4a15950..d3e754ba670e7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -363,7 +363,7 @@ case class UnresolvedStar(target: Option[Seq[String]]) extends Star with Unevalu if (target.isEmpty) return input.output // If there is a table specified, use hidden input attributes as well - val hiddenOutput = input.metadataOutput.filter(_.supportsQualifiedStar) + val hiddenOutput = input.metadataOutput.filter(_.qualifiedAccessOnly) val expandedAttributes = (hiddenOutput ++ input.output).filter( matchedQualifier(_, target.get, resolver)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala index 6a4fb099c8b78..7913f396120f4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala @@ -23,6 +23,7 @@ import com.google.common.collect.Maps import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{Resolver, UnresolvedAttribute} +import org.apache.spark.sql.catalyst.util.MetadataColumnHelper import org.apache.spark.sql.types.{StructField, StructType} /** @@ -265,7 +266,7 @@ package object expressions { case (Seq(), _) => val name = nameParts.head val attributes = collectMatches(name, direct.get(name.toLowerCase(Locale.ROOT))) - (attributes, nameParts.tail) + (attributes.filterNot(_.qualifiedAccessOnly), nameParts.tail) case _ => matches } } @@ -314,10 +315,12 @@ package object expressions { var i = nameParts.length - 1 while (i >= 0 && candidates.isEmpty) { val name = nameParts(i) - candidates = collectMatches( - name, - nameParts.take(i), - direct.get(name.toLowerCase(Locale.ROOT))) + val attrsToLookup = if (i == 0) { + direct.get(name.toLowerCase(Locale.ROOT)).map(_.filterNot(_.qualifiedAccessOnly)) + } else { + direct.get(name.toLowerCase(Locale.ROOT)) + } + candidates = collectMatches(name, nameParts.take(i), attrsToLookup) if (candidates.nonEmpty) { nestedFields = nameParts.takeRight(nameParts.length - i - 1) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index e12a5918ee0b4..b52ce468390e9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -88,7 +88,7 @@ case class Project(projectList: Seq[NamedExpression], child: LogicalPlan) getAllValidConstraints(projectList) override def metadataOutput: Seq[Attribute] = - getTagValue(Project.hiddenOutputTag).getOrElse(Nil) + getTagValue(Project.hiddenOutputTag).getOrElse(child.metadataOutput) override protected def withNewChildInternal(newChild: LogicalPlan): Project = copy(child = newChild) @@ -1332,9 +1332,14 @@ case class SubqueryAlias( } override def metadataOutput: Seq[Attribute] = { - val qualifierList = identifier.qualifier :+ alias - val nonHiddenMetadataOutput = child.metadataOutput.filter(!_.supportsQualifiedStar) - nonHiddenMetadataOutput.map(_.withQualifier(qualifierList)) + // Propagate metadata columns from leaf nodes through a chain of `SubqueryAlias`. + if (child.isInstanceOf[LeafNode] || child.isInstanceOf[SubqueryAlias]) { + val qualifierList = identifier.qualifier :+ alias + val nonHiddenMetadataOutput = child.metadataOutput.filter(!_.qualifiedAccessOnly) + nonHiddenMetadataOutput.map(_.withQualifier(qualifierList)) + } else { + Nil + } } override def maxRows: Option[Long] = child.maxRows diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala index e06072cbed282..257749ed6d05f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala @@ -189,22 +189,23 @@ package object util extends Logging { implicit class MetadataColumnHelper(attr: Attribute) { /** - * If set, this metadata column is a candidate during qualified star expansions. + * If set, this metadata column can only be accessed with qualifiers, e.g. `qualifiers.col` or + * `qualifiers.*`. If not set, metadata columns cannot be accessed via star. */ - val SUPPORTS_QUALIFIED_STAR = "__supports_qualified_star" + val QUALIFIED_ACCESS_ONLY = "__qualified_access_only" def isMetadataCol: Boolean = attr.metadata.contains(METADATA_COL_ATTR_KEY) && attr.metadata.getBoolean(METADATA_COL_ATTR_KEY) - def supportsQualifiedStar: Boolean = attr.isMetadataCol && - attr.metadata.contains(SUPPORTS_QUALIFIED_STAR) && - attr.metadata.getBoolean(SUPPORTS_QUALIFIED_STAR) + def qualifiedAccessOnly: Boolean = attr.isMetadataCol && + attr.metadata.contains(QUALIFIED_ACCESS_ONLY) && + attr.metadata.getBoolean(QUALIFIED_ACCESS_ONLY) - def markAsSupportsQualifiedStar(): Attribute = attr.withMetadata( + def markAsQualifiedAccessOnly(): Attribute = attr.withMetadata( new MetadataBuilder() .withMetadata(attr.metadata) .putBoolean(METADATA_COL_ATTR_KEY, true) - .putBoolean(SUPPORTS_QUALIFIED_STAR, true) + .putBoolean(QUALIFIED_ACCESS_ONLY, true) .build() ) } diff --git a/sql/core/src/test/resources/sql-tests/inputs/using-join.sql b/sql/core/src/test/resources/sql-tests/inputs/using-join.sql index 336d19f0f2a3d..87390b388764f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/using-join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/using-join.sql @@ -19,6 +19,8 @@ SELECT nt1.*, nt2.* FROM nt1 left outer join nt2 using (k); SELECT nt1.k, nt2.k FROM nt1 left outer join nt2 using (k); +SELECT nt1.k, nt2.k FROM nt1 left outer join nt2 using (k) ORDER BY nt2.k; + SELECT k, nt1.k FROM nt1 left outer join nt2 using (k); SELECT k, nt2.k FROM nt1 left outer join nt2 using (k); diff --git a/sql/core/src/test/resources/sql-tests/results/using-join.sql.out b/sql/core/src/test/resources/sql-tests/results/using-join.sql.out index 1d2ae9d96ecad..db9ac1f10bb00 100644 --- a/sql/core/src/test/resources/sql-tests/results/using-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/using-join.sql.out @@ -71,6 +71,17 @@ three NULL two two +-- !query +SELECT nt1.k, nt2.k FROM nt1 left outer join nt2 using (k) ORDER BY nt2.k +-- !query schema +struct +-- !query output +three NULL +one one +one one +two two + + -- !query SELECT k, nt1.k FROM nt1 left outer join nt2 using (k) -- !query schema diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 675dd2807ca2b..304c77fd00315 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2204,100 +2204,6 @@ class DataSourceV2SQLSuite } } - test("SPARK-31255: Project a metadata column") { - val t1 = s"${catalogAndNamespace}table" - withTable(t1) { - sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format " + - "PARTITIONED BY (bucket(4, id), id)") - sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b'), (3, 'c')") - - val sqlQuery = spark.sql(s"SELECT id, data, index, _partition FROM $t1") - val dfQuery = spark.table(t1).select("id", "data", "index", "_partition") - - Seq(sqlQuery, dfQuery).foreach { query => - checkAnswer(query, Seq(Row(1, "a", 0, "3/1"), Row(2, "b", 0, "0/2"), Row(3, "c", 0, "1/3"))) - } - } - } - - test("SPARK-31255: Projects data column when metadata column has the same name") { - val t1 = s"${catalogAndNamespace}table" - withTable(t1) { - sql(s"CREATE TABLE $t1 (index bigint, data string) USING $v2Format " + - "PARTITIONED BY (bucket(4, index), index)") - sql(s"INSERT INTO $t1 VALUES (3, 'c'), (2, 'b'), (1, 'a')") - - val sqlQuery = spark.sql(s"SELECT index, data, _partition FROM $t1") - val dfQuery = spark.table(t1).select("index", "data", "_partition") - - Seq(sqlQuery, dfQuery).foreach { query => - checkAnswer(query, Seq(Row(3, "c", "1/3"), Row(2, "b", "0/2"), Row(1, "a", "3/1"))) - } - } - } - - test("SPARK-31255: * expansion does not include metadata columns") { - val t1 = s"${catalogAndNamespace}table" - withTable(t1) { - sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format " + - "PARTITIONED BY (bucket(4, id), id)") - sql(s"INSERT INTO $t1 VALUES (3, 'c'), (2, 'b'), (1, 'a')") - - val sqlQuery = spark.sql(s"SELECT * FROM $t1") - val dfQuery = spark.table(t1) - - Seq(sqlQuery, dfQuery).foreach { query => - checkAnswer(query, Seq(Row(3, "c"), Row(2, "b"), Row(1, "a"))) - } - } - } - - test("SPARK-31255: metadata column should only be produced when necessary") { - val t1 = s"${catalogAndNamespace}table" - withTable(t1) { - sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format " + - "PARTITIONED BY (bucket(4, id), id)") - - val sqlQuery = spark.sql(s"SELECT * FROM $t1 WHERE index = 0") - val dfQuery = spark.table(t1).filter("index = 0") - - Seq(sqlQuery, dfQuery).foreach { query => - assert(query.schema.fieldNames.toSeq == Seq("id", "data")) - } - } - } - - test("SPARK-34547: metadata columns are resolved last") { - val t1 = s"${catalogAndNamespace}tableOne" - val t2 = "t2" - withTable(t1) { - sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format " + - "PARTITIONED BY (bucket(4, id), id)") - sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b'), (3, 'c')") - withTempView(t2) { - sql(s"CREATE TEMPORARY VIEW $t2 AS SELECT * FROM " + - s"VALUES (1, -1), (2, -2), (3, -3) AS $t2(id, index)") - - val sqlQuery = spark.sql(s"SELECT $t1.id, $t2.id, data, index, $t1.index, $t2.index FROM " + - s"$t1 JOIN $t2 WHERE $t1.id = $t2.id") - val t1Table = spark.table(t1) - val t2Table = spark.table(t2) - val dfQuery = t1Table.join(t2Table, t1Table.col("id") === t2Table.col("id")) - .select(s"$t1.id", s"$t2.id", "data", "index", s"$t1.index", s"$t2.index") - - Seq(sqlQuery, dfQuery).foreach { query => - checkAnswer(query, - Seq( - Row(1, 1, "a", -1, 0, -1), - Row(2, 2, "b", -2, 0, -2), - Row(3, 3, "c", -3, 0, -3) - ) - ) - } - } - } - } - test("SPARK-33505: insert into partitioned table") { val t = "testpart.ns1.ns2.tbl" withTable(t) { @@ -2382,51 +2288,6 @@ class DataSourceV2SQLSuite } } - test("SPARK-34555: Resolve DataFrame metadata column") { - val tbl = s"${catalogAndNamespace}table" - withTable(tbl) { - sql(s"CREATE TABLE $tbl (id bigint, data string) USING $v2Format " + - "PARTITIONED BY (bucket(4, id), id)") - sql(s"INSERT INTO $tbl VALUES (1, 'a'), (2, 'b'), (3, 'c')") - val table = spark.table(tbl) - val dfQuery = table.select( - table.col("id"), - table.col("data"), - table.col("index"), - table.col("_partition") - ) - - checkAnswer( - dfQuery, - Seq(Row(1, "a", 0, "3/1"), Row(2, "b", 0, "0/2"), Row(3, "c", 0, "1/3")) - ) - } - } - - test("SPARK-34561: drop/add columns to a dataset of `DESCRIBE TABLE`") { - val tbl = s"${catalogAndNamespace}tbl" - withTable(tbl) { - sql(s"CREATE TABLE $tbl (c0 INT) USING $v2Format") - val description = sql(s"DESCRIBE TABLE $tbl") - val noCommentDataset = description.drop("comment") - val expectedSchema = new StructType() - .add( - name = "col_name", - dataType = StringType, - nullable = false, - metadata = new MetadataBuilder().putString("comment", "name of the column").build()) - .add( - name = "data_type", - dataType = StringType, - nullable = false, - metadata = new MetadataBuilder().putString("comment", "data type of the column").build()) - assert(noCommentDataset.schema === expectedSchema) - val isNullDataset = noCommentDataset - .withColumn("is_null", noCommentDataset("col_name").isNull) - assert(isNullDataset.schema === expectedSchema.add("is_null", BooleanType, false)) - } - } - test("SPARK-34576: drop/add columns to a dataset of `DESCRIBE COLUMN`") { val tbl = s"${catalogAndNamespace}tbl" withTable(tbl) { @@ -2446,109 +2307,6 @@ class DataSourceV2SQLSuite } } - test("SPARK-34923: do not propagate metadata columns through Project") { - val t1 = s"${catalogAndNamespace}table" - withTable(t1) { - sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format " + - "PARTITIONED BY (bucket(4, id), id)") - sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b'), (3, 'c')") - - assertThrows[AnalysisException] { - sql(s"SELECT index, _partition from (SELECT id, data FROM $t1)") - } - assertThrows[AnalysisException] { - spark.table(t1).select("id", "data").select("index", "_partition") - } - } - } - - test("SPARK-34923: do not propagate metadata columns through View") { - val t1 = s"${catalogAndNamespace}table" - val view = "view" - - withTable(t1) { - withTempView(view) { - sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format " + - "PARTITIONED BY (bucket(4, id), id)") - sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b'), (3, 'c')") - sql(s"CACHE TABLE $view AS SELECT * FROM $t1") - assertThrows[AnalysisException] { - sql(s"SELECT index, _partition FROM $view") - } - } - } - } - - test("SPARK-34923: propagate metadata columns through Filter") { - val t1 = s"${catalogAndNamespace}table" - withTable(t1) { - sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format " + - "PARTITIONED BY (bucket(4, id), id)") - sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b'), (3, 'c')") - - val sqlQuery = spark.sql(s"SELECT id, data, index, _partition FROM $t1 WHERE id > 1") - val dfQuery = spark.table(t1).where("id > 1").select("id", "data", "index", "_partition") - - Seq(sqlQuery, dfQuery).foreach { query => - checkAnswer(query, Seq(Row(2, "b", 0, "0/2"), Row(3, "c", 0, "1/3"))) - } - } - } - - test("SPARK-34923: propagate metadata columns through Sort") { - val t1 = s"${catalogAndNamespace}table" - withTable(t1) { - sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format " + - "PARTITIONED BY (bucket(4, id), id)") - sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b'), (3, 'c')") - - val sqlQuery = spark.sql(s"SELECT id, data, index, _partition FROM $t1 ORDER BY id") - val dfQuery = spark.table(t1).orderBy("id").select("id", "data", "index", "_partition") - - Seq(sqlQuery, dfQuery).foreach { query => - checkAnswer(query, Seq(Row(1, "a", 0, "3/1"), Row(2, "b", 0, "0/2"), Row(3, "c", 0, "1/3"))) - } - } - } - - test("SPARK-34923: propagate metadata columns through RepartitionBy") { - val t1 = s"${catalogAndNamespace}table" - withTable(t1) { - sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format " + - "PARTITIONED BY (bucket(4, id), id)") - sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b'), (3, 'c')") - - val sqlQuery = spark.sql( - s"SELECT /*+ REPARTITION_BY_RANGE(3, id) */ id, data, index, _partition FROM $t1") - val tbl = spark.table(t1) - val dfQuery = tbl.repartitionByRange(3, tbl.col("id")) - .select("id", "data", "index", "_partition") - - Seq(sqlQuery, dfQuery).foreach { query => - checkAnswer(query, Seq(Row(1, "a", 0, "3/1"), Row(2, "b", 0, "0/2"), Row(3, "c", 0, "1/3"))) - } - } - } - - test("SPARK-34923: propagate metadata columns through SubqueryAlias") { - val t1 = s"${catalogAndNamespace}table" - val sbq = "sbq" - withTable(t1) { - sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format " + - "PARTITIONED BY (bucket(4, id), id)") - sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b'), (3, 'c')") - - val sqlQuery = spark.sql( - s"SELECT $sbq.id, $sbq.data, $sbq.index, $sbq._partition FROM $t1 as $sbq") - val dfQuery = spark.table(t1).as(sbq).select( - s"$sbq.id", s"$sbq.data", s"$sbq.index", s"$sbq._partition") - - Seq(sqlQuery, dfQuery).foreach { query => - checkAnswer(query, Seq(Row(1, "a", 0, "3/1"), Row(2, "b", 0, "0/2"), Row(3, "c", 0, "1/3"))) - } - } - } - test("SPARK-36481: Test for SET CATALOG statement") { val catalogManager = spark.sessionState.catalogManager assert(catalogManager.currentCatalog.name() == SESSION_CATALOG_NAME) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala new file mode 100644 index 0000000000000..95b9c4f72356a --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala @@ -0,0 +1,219 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.connector + +import org.apache.spark.sql.{AnalysisException, Row} +import org.apache.spark.sql.functions.struct + +class MetadataColumnSuite extends DatasourceV2SQLBase { + import testImplicits._ + + private val tbl = "testcat.t" + + private def prepareTable(): Unit = { + sql(s"CREATE TABLE $tbl (id bigint, data string) PARTITIONED BY (bucket(4, id), id)") + sql(s"INSERT INTO $tbl VALUES (1, 'a'), (2, 'b'), (3, 'c')") + } + + test("SPARK-31255: Project a metadata column") { + withTable(tbl) { + prepareTable() + val sqlQuery = sql(s"SELECT id, data, index, _partition FROM $tbl") + val dfQuery = spark.table(tbl).select("id", "data", "index", "_partition") + + Seq(sqlQuery, dfQuery).foreach { query => + checkAnswer(query, Seq(Row(1, "a", 0, "3/1"), Row(2, "b", 0, "0/2"), Row(3, "c", 0, "1/3"))) + } + } + } + + test("SPARK-31255: Projects data column when metadata column has the same name") { + withTable(tbl) { + sql(s"CREATE TABLE $tbl (index bigint, data string) PARTITIONED BY (bucket(4, index), index)") + sql(s"INSERT INTO $tbl VALUES (3, 'c'), (2, 'b'), (1, 'a')") + + val sqlQuery = sql(s"SELECT index, data, _partition FROM $tbl") + val dfQuery = spark.table(tbl).select("index", "data", "_partition") + + Seq(sqlQuery, dfQuery).foreach { query => + checkAnswer(query, Seq(Row(3, "c", "1/3"), Row(2, "b", "0/2"), Row(1, "a", "3/1"))) + } + } + } + + test("SPARK-31255: * expansion does not include metadata columns") { + withTable(tbl) { + prepareTable() + val sqlQuery = sql(s"SELECT * FROM $tbl") + val dfQuery = spark.table(tbl) + + Seq(sqlQuery, dfQuery).foreach { query => + checkAnswer(query, Seq(Row(1, "a"), Row(2, "b"), Row(3, "c"))) + } + } + } + + test("SPARK-31255: metadata column should only be produced when necessary") { + withTable(tbl) { + prepareTable() + val sqlQuery = sql(s"SELECT * FROM $tbl WHERE index = 0") + val dfQuery = spark.table(tbl).filter("index = 0") + + Seq(sqlQuery, dfQuery).foreach { query => + assert(query.schema.fieldNames.toSeq == Seq("id", "data")) + } + } + } + + test("SPARK-34547: metadata columns are resolved last") { + withTable(tbl) { + prepareTable() + withTempView("v") { + sql(s"CREATE TEMPORARY VIEW v AS SELECT * FROM " + + s"VALUES (1, -1), (2, -2), (3, -3) AS v(id, index)") + + val sqlQuery = sql(s"SELECT $tbl.id, v.id, data, index, $tbl.index, v.index " + + s"FROM $tbl JOIN v WHERE $tbl.id = v.id") + val tableDf = spark.table(tbl) + val viewDf = spark.table("v") + val dfQuery = tableDf.join(viewDf, tableDf.col("id") === viewDf.col("id")) + .select(s"$tbl.id", "v.id", "data", "index", s"$tbl.index", "v.index") + + Seq(sqlQuery, dfQuery).foreach { query => + checkAnswer(query, + Seq( + Row(1, 1, "a", -1, 0, -1), + Row(2, 2, "b", -2, 0, -2), + Row(3, 3, "c", -3, 0, -3) + ) + ) + } + } + } + } + + test("SPARK-34555: Resolve DataFrame metadata column") { + withTable(tbl) { + prepareTable() + val table = spark.table(tbl) + val dfQuery = table.select( + table.col("id"), + table.col("data"), + table.col("index"), + table.col("_partition") + ) + + checkAnswer( + dfQuery, + Seq(Row(1, "a", 0, "3/1"), Row(2, "b", 0, "0/2"), Row(3, "c", 0, "1/3")) + ) + } + } + + test("SPARK-34923: propagate metadata columns through Project") { + withTable(tbl) { + prepareTable() + checkAnswer( + spark.table(tbl).select("id", "data").select("index", "_partition"), + Seq(Row(0, "3/1"), Row(0, "0/2"), Row(0, "1/3")) + ) + } + } + + test("SPARK-34923: do not propagate metadata columns through View") { + val view = "view" + withTable(tbl) { + withTempView(view) { + prepareTable() + sql(s"CACHE TABLE $view AS SELECT * FROM $tbl") + assertThrows[AnalysisException] { + sql(s"SELECT index, _partition FROM $view") + } + } + } + } + + test("SPARK-34923: propagate metadata columns through Filter") { + withTable(tbl) { + prepareTable() + val sqlQuery = sql(s"SELECT id, data, index, _partition FROM $tbl WHERE id > 1") + val dfQuery = spark.table(tbl).where("id > 1").select("id", "data", "index", "_partition") + + Seq(sqlQuery, dfQuery).foreach { query => + checkAnswer(query, Seq(Row(2, "b", 0, "0/2"), Row(3, "c", 0, "1/3"))) + } + } + } + + test("SPARK-34923: propagate metadata columns through Sort") { + withTable(tbl) { + prepareTable() + val sqlQuery = sql(s"SELECT id, data, index, _partition FROM $tbl ORDER BY id") + val dfQuery = spark.table(tbl).orderBy("id").select("id", "data", "index", "_partition") + + Seq(sqlQuery, dfQuery).foreach { query => + checkAnswer(query, Seq(Row(1, "a", 0, "3/1"), Row(2, "b", 0, "0/2"), Row(3, "c", 0, "1/3"))) + } + } + } + + test("SPARK-34923: propagate metadata columns through RepartitionBy") { + withTable(tbl) { + prepareTable() + val sqlQuery = sql( + s"SELECT /*+ REPARTITION_BY_RANGE(3, id) */ id, data, index, _partition FROM $tbl") + val dfQuery = spark.table(tbl).repartitionByRange(3, $"id") + .select("id", "data", "index", "_partition") + + Seq(sqlQuery, dfQuery).foreach { query => + checkAnswer(query, Seq(Row(1, "a", 0, "3/1"), Row(2, "b", 0, "0/2"), Row(3, "c", 0, "1/3"))) + } + } + } + + test("SPARK-34923: propagate metadata columns through SubqueryAlias if child is leaf node") { + val sbq = "sbq" + withTable(tbl) { + prepareTable() + val sqlQuery = sql( + s"SELECT $sbq.id, $sbq.data, $sbq.index, $sbq._partition FROM $tbl $sbq") + val dfQuery = spark.table(tbl).as(sbq).select( + s"$sbq.id", s"$sbq.data", s"$sbq.index", s"$sbq._partition") + + Seq(sqlQuery, dfQuery).foreach { query => + checkAnswer(query, Seq(Row(1, "a", 0, "3/1"), Row(2, "b", 0, "0/2"), Row(3, "c", 0, "1/3"))) + } + + assertThrows[AnalysisException] { + sql(s"SELECT $sbq.index FROM (SELECT id FROM $tbl) $sbq") + } + assertThrows[AnalysisException] { + spark.table(tbl).select($"id").as(sbq).select(s"$sbq.index") + } + } + } + + test("SPARK-40149: select outer join metadata columns with DataFrame API") { + val df1 = Seq(1 -> "a").toDF("k", "v").as("left") + val df2 = Seq(1 -> "b").toDF("k", "v").as("right") + val dfQuery = df1.join(df2, Seq("k"), "outer") + .withColumn("left_all", struct($"left.*")) + .withColumn("right_all", struct($"right.*")) + checkAnswer(dfQuery, Row(1, "a", "b", Row(1, "a"), Row(1, "b"))) + } +} From 81cb08b7b3ae6a4ccfa9787ec39a6041fae8143f Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 7 Sep 2022 19:29:39 +0800 Subject: [PATCH 468/535] add back a mistakenly removed test case --- .../sql/connector/DataSourceV2SQLSuite.scala | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 304c77fd00315..44f97f55713fd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -2288,6 +2288,30 @@ class DataSourceV2SQLSuite } } + test("SPARK-34561: drop/add columns to a dataset of `DESCRIBE TABLE`") { + val tbl = s"${catalogAndNamespace}tbl" + withTable(tbl) { + sql(s"CREATE TABLE $tbl (c0 INT) USING $v2Format") + val description = sql(s"DESCRIBE TABLE $tbl") + val noCommentDataset = description.drop("comment") + val expectedSchema = new StructType() + .add( + name = "col_name", + dataType = StringType, + nullable = false, + metadata = new MetadataBuilder().putString("comment", "name of the column").build()) + .add( + name = "data_type", + dataType = StringType, + nullable = false, + metadata = new MetadataBuilder().putString("comment", "data type of the column").build()) + assert(noCommentDataset.schema === expectedSchema) + val isNullDataset = noCommentDataset + .withColumn("is_null", noCommentDataset("col_name").isNull) + assert(isNullDataset.schema === expectedSchema.add("is_null", BooleanType, false)) + } + } + test("SPARK-34576: drop/add columns to a dataset of `DESCRIBE COLUMN`") { val tbl = s"${catalogAndNamespace}tbl" withTable(tbl) { From 10cd3ac16bc5b90efc8a2a729c4bfaf2e1cee034 Mon Sep 17 00:00:00 2001 From: Kris Mok Date: Thu, 8 Sep 2022 21:20:46 +0800 Subject: [PATCH 469/535] [SPARK-40380][SQL] Fix constant-folding of InvokeLike to avoid non-serializable literal embedded in the plan ### What changes were proposed in this pull request? Block `InvokeLike` expressions with `ObjectType` result from constant-folding, to ensure constant-folded results are trusted to be serializable. This is a conservative fix for ease of backport to Spark 3.3. A separate future change can relax the restriction and support constant-folding to serializable `ObjectType` as well. ### Why are the changes needed? This fixes a regression introduced by https://github.com/apache/spark/pull/35207 . It enabled the constant-folding logic to aggressively fold `InvokeLike` expressions (e.g. `Invoke`, `StaticInvoke`), when all arguments are foldable and the expression itself is deterministic. But it could go overly aggressive and constant-fold to non-serializable results, which would be problematic when that result needs to be serialized and sent over the wire. In the wild, users of sparksql-scalapb have hit this issue. The constant folding logic would fold a chain of `Invoke` / `StaticInvoke` expressions from only holding onto a serializable literal to holding onto a non-serializable literal: ``` Literal(com.example.protos.demo.Person$...).scalaDescriptor.findFieldByNumber.get ``` this expression works fine before constant-folding because the literal that gets sent to the executors is serializable, but when aggressive constant-folding kicks in it ends up as a `Literal(scalapb.descriptors.FieldDescriptor...)` which isn't serializable. The following minimal repro demonstrates this issue: ``` import org.apache.spark.sql.Column import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, StaticInvoke} import org.apache.spark.sql.types.{LongType, ObjectType} class NotSerializableBoxedLong(longVal: Long) { def add(other: Long): Long = longVal + other } case class SerializableBoxedLong(longVal: Long) { def toNotSerializable(): NotSerializableBoxedLong = new NotSerializableBoxedLong(longVal) } val litExpr = Literal.fromObject(SerializableBoxedLong(42L), ObjectType(classOf[SerializableBoxedLong])) val toNotSerializableExpr = Invoke(litExpr, "toNotSerializable", ObjectType(classOf[NotSerializableBoxedLong])) val addExpr = Invoke(toNotSerializableExpr, "add", LongType, Seq(UnresolvedAttribute.quotedString("id"))) val df = spark.range(2).select(new Column(addExpr)) df.collect ``` would result in an error if aggressive constant-folding kicked in: ``` ... Caused by: java.io.NotSerializableException: NotSerializableBoxedLong Serialization stack: - object not serializable (class: NotSerializableBoxedLong, value: NotSerializableBoxedLong71231636) - element of array (index: 1) - array (class [Ljava.lang.Object;, size 2) - element of array (index: 1) - array (class [Ljava.lang.Object;, size 3) - field (class: java.lang.invoke.SerializedLambda, name: capturedArgs, type: class [Ljava.lang.Object;) - object (class java.lang.invoke.SerializedLambda, SerializedLambda[capturingClass=class org.apache.spark.sql.execution.WholeStageCodegenExec, functionalInterfaceMethod=scala/Function2.apply:(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;, implementation=invokeStatic org/apache/spark/sql/execution/WholeStageCodegenExec.$anonfun$doExecute$4$adapted:(Lorg/apache/spark/sql/catalyst/expressions/codegen/CodeAndComment;[Ljava/lang/Object;Lorg/apache/spark/sql/execution/metric/SQLMetric;Ljava/lang/Object;Lscala/collection/Iterator;)Lscala/collection/Iterator;, instantiatedMethodType=(Ljava/lang/Object;Lscala/collection/Iterator;)Lscala/collection/Iterator;, numCaptured=3]) - writeReplace data (class: java.lang.invoke.SerializedLambda) - object (class org.apache.spark.sql.execution.WholeStageCodegenExec$$Lambda$3123/1641694389, org.apache.spark.sql.execution.WholeStageCodegenExec$$Lambda$3123/1641694389185db22c) at org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:41) at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:49) at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:115) at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:441) ``` ### Does this PR introduce _any_ user-facing change? Yes, a regression in ObjectType expression starting from Spark 3.3.0 is fixed. ### How was this patch tested? The existing test cases in `ConstantFoldingSuite` continues to pass; added a new test case to demonstrate the regression issue. Closes #37823 from rednaxelafx/fix-invokelike-constantfold. Authored-by: Kris Mok Signed-off-by: Wenchen Fan (cherry picked from commit 5b96e82ad6a4f5d5e4034d9d7112077159cf5044) Signed-off-by: Wenchen Fan --- .../expressions/objects/objects.scala | 11 +++- .../optimizer/ConstantFoldingSuite.scala | 54 +++++++++++++++++++ 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index fe982b238296f..70f4f95f0e987 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -50,7 +50,8 @@ trait InvokeLike extends Expression with NonSQLExpression with ImplicitCastInput def propagateNull: Boolean - override def foldable: Boolean = children.forall(_.foldable) && deterministic + override def foldable: Boolean = + children.forall(_.foldable) && deterministic && trustedSerializable(dataType) protected lazy val needNullCheck: Boolean = needNullCheckForIndex.contains(true) protected lazy val needNullCheckForIndex: Array[Boolean] = arguments.map(a => a.nullable && (propagateNull || @@ -62,6 +63,14 @@ trait InvokeLike extends Expression with NonSQLExpression with ImplicitCastInput .map(cls => v => cls.cast(v)) .getOrElse(identity) + // Returns true if we can trust all values of the given DataType can be serialized. + private def trustedSerializable(dt: DataType): Boolean = { + // Right now we conservatively block all ObjectType (Java objects) regardless of + // serializability, because the type-level info with java.io.Serializable and + // java.io.Externalizable marker interfaces are not strong guarantees. + // This restriction can be relaxed in the future to expose more optimizations. + !dt.existsRecursively(_.isInstanceOf[ObjectType]) + } /** * Prepares codes for arguments. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala index 7f534c6e43f86..079931c2ff055 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala @@ -334,6 +334,51 @@ class ConstantFoldingSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + test("SPARK-40380: InvokeLike should only constant-fold to serializable types") { + val serializableObjType = ObjectType(classOf[SerializableBoxedInt]) + val notSerializableObjType = ObjectType(classOf[NotSerializableBoxedInt]) + + val originalQuery = + testRelation + .select( + // SerializableBoxedInt(42).add(1).toNotSerializable().addAsInt($"a") + Invoke( + Invoke( + Invoke( + Literal.fromObject(SerializableBoxedInt(42), serializableObjType), + "add", + serializableObjType, + Literal(1) :: Nil + ), + "toNotSerializable", + notSerializableObjType), + "addAsInt", + IntegerType, + $"a" :: Nil).as("c1")) + + val optimized = Optimize.execute(originalQuery.analyze) + + val correctAnswer = originalQuery.analyze + + // If serializable ObjectType is allowed to be constant-folded in the future, this chain can + // be optimized into: + // val correctAnswer = + // testRelation + // .select( + // // SerializableBoxedInt(43).toNotSerializable().addAsInt($"a") + // Invoke( + // Invoke( + // Literal.fromObject(SerializableBoxedInt(43), serializableObjType), + // "toNotSerializable", + // notSerializableObjType), + // "addAsInt", + // IntegerType, + // $"a" :: Nil).as("c1")) + // .analyze + + comparePlans(optimized, correctAnswer) + } + test("SPARK-39106: Correct conditional expression constant folding") { val t = LocalRelation.fromExternalRows( $"c".double :: Nil, @@ -369,3 +414,12 @@ class ConstantFoldingSuite extends PlanTest { } } } + +case class SerializableBoxedInt(intVal: Int) { + def add(other: Int): SerializableBoxedInt = SerializableBoxedInt(intVal + other) + def toNotSerializable(): NotSerializableBoxedInt = new NotSerializableBoxedInt(intVal) +} + +class NotSerializableBoxedInt(intVal: Int) { + def addAsInt(other: Int): Int = intVal + other +} From 0cdb081670b55d9181d8ffb125911333e8ab339b Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Thu, 8 Sep 2022 08:54:28 -0500 Subject: [PATCH 470/535] [SPARK-40280][SQL] Add support for parquet push down for annotated int and long ### What changes were proposed in this pull request? This fixes SPARK-40280 by normalizing a parquet int/long that has optional metadata with it to look like the expected version that does not have the extra metadata. ## Why are the changes needed? This allows predicate push down in parquet to work when reading files that are complaint with the parquet specification, but different from what Spark writes. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? I added unit tests that cover this use case. I also did some manual testing on some queries to verify that less data is actually read after this change. Closes #37747 from revans2/normalize_int_long_parquet_push. Authored-by: Robert (Bobby) Evans Signed-off-by: Thomas Graves (cherry picked from commit 24b3baf0177fc1446bf59bb34987296aefd4b318) Signed-off-by: Thomas Graves --- .../datasources/parquet/ParquetFilters.scala | 16 +++- .../resources/test-data/tagged_int.parquet | Bin 0 -> 305 bytes .../resources/test-data/tagged_long.parquet | Bin 0 -> 313 bytes .../parquet/ParquetFilterSuite.scala | 82 ++++++++++++++++++ 4 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 sql/core/src/test/resources/test-data/tagged_int.parquet create mode 100644 sql/core/src/test/resources/test-data/tagged_long.parquet diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala index 9502ec0316ca3..e04019fa9a012 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala @@ -30,7 +30,7 @@ import org.apache.parquet.filter2.predicate._ import org.apache.parquet.filter2.predicate.SparkFilterApi._ import org.apache.parquet.io.api.Binary import org.apache.parquet.schema.{GroupType, LogicalTypeAnnotation, MessageType, PrimitiveComparator, PrimitiveType, Type} -import org.apache.parquet.schema.LogicalTypeAnnotation.{DecimalLogicalTypeAnnotation, TimeUnit} +import org.apache.parquet.schema.LogicalTypeAnnotation.{DecimalLogicalTypeAnnotation, IntLogicalTypeAnnotation, TimeUnit} import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._ import org.apache.parquet.schema.Type.Repetition @@ -59,6 +59,18 @@ class ParquetFilters( // nested columns. If any part of the names contains `dots`, it is quoted to avoid confusion. // See `org.apache.spark.sql.connector.catalog.quote` for implementation details. private val nameToParquetField : Map[String, ParquetPrimitiveField] = { + def getNormalizedLogicalType(p: PrimitiveType): LogicalTypeAnnotation = { + // SPARK-40280: Signed 64 bits on an INT64 and signed 32 bits on an INT32 are optional, but + // the rest of the code here assumes they are not set, so normalize them to not being set. + (p.getPrimitiveTypeName, p.getLogicalTypeAnnotation) match { + case (INT32, intType: IntLogicalTypeAnnotation) + if intType.getBitWidth() == 32 && intType.isSigned() => null + case (INT64, intType: IntLogicalTypeAnnotation) + if intType.getBitWidth() == 64 && intType.isSigned() => null + case (_, otherType) => otherType + } + } + // Recursively traverse the parquet schema to get primitive fields that can be pushed-down. // `parentFieldNames` is used to keep track of the current nested level when traversing. def getPrimitiveFields( @@ -70,7 +82,7 @@ class ParquetFilters( // repeated columns (https://issues.apache.org/jira/browse/PARQUET-34) case p: PrimitiveType if p.getRepetition != Repetition.REPEATED => Some(ParquetPrimitiveField(fieldNames = parentFieldNames :+ p.getName, - fieldType = ParquetSchemaType(p.getLogicalTypeAnnotation, + fieldType = ParquetSchemaType(getNormalizedLogicalType(p), p.getPrimitiveTypeName, p.getTypeLength))) // Note that when g is a `Struct`, `g.getOriginalType` is `null`. // When g is a `Map`, `g.getOriginalType` is `MAP`. diff --git a/sql/core/src/test/resources/test-data/tagged_int.parquet b/sql/core/src/test/resources/test-data/tagged_int.parquet new file mode 100644 index 0000000000000000000000000000000000000000..840f5dbdb93af04f98f11b3128c0f59dd4b7e42e GIT binary patch literal 305 zcmZXQPfNrw5XHxCE&T#13G}c8y0{R6Mc6~xi+l38comVcn_Y|PpG{IlmVS4qq9+3n zW`6T{JhH!kEE%vRTPl{)kgx>c3IHO~f)tZjB-BIV>o?OPi)bx?FJ9`~LPC~`T;war z$x}cgemEnX?Xuk}-EQC}o+EfXhteaSf>3ed82 zU_kzKk>P7FvXVZyu?|wnV`J>W%Zg4Nw)2VV5dZZxT`(u(2y~;h_-e^8VCjbBd literal 0 HcmV?d00001 diff --git a/sql/core/src/test/resources/test-data/tagged_long.parquet b/sql/core/src/test/resources/test-data/tagged_long.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3ff9450d834e8c93c06df11bcd809684c17f156f GIT binary patch literal 313 zcmZWlO-sZu6nyQ*nmz0>A%Pww*y2J77QsXDu-Q^VVL#(Lr#FKNSYDsK|~^e3lzXSfN*|}V79=37MO%QDROQy2ugs>UaITl z5-8>|k5h;V;t53jhbJM$bVJJ&-`&C$M({ff2PfngEhz?`v1QxurRtP7m$p;=o3uwG zJva{9$-e!NwRUQ&oMMloaG#wPg|Mz4Dkn0rJ7}}DqIeobqv~(T$MXE?&$P|nU(H~Q s+88hIl(9M!f0m*cr|EeD#{Q*v*S$To4LiG$xy)f1$3C?01=hZ_FV{szxc~qF literal 0 HcmV?d00001 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 92798d4f4ca41..f6b5bbf4dadb3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -365,6 +365,47 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared } } + test("SPARK-40280: filter pushdown - int with annotation") { + implicit val df = readResourceParquetFile("test-data/tagged_int.parquet") + + val intAttr = df("_c0").expr + assert(intAttr.dataType === IntegerType) + + checkFilterPredicate(intAttr.isNull, classOf[Eq[_]], Seq.empty[Row]) + checkFilterPredicate(intAttr.isNotNull, classOf[NotEq[_]], + (1 to 4).map(i => Row.apply(i))) + + checkFilterPredicate(intAttr === 1, classOf[Eq[_]], 1) + checkFilterPredicate(intAttr <=> 1, classOf[Eq[_]], 1) + checkFilterPredicate(intAttr =!= 1, classOf[NotEq[_]], + (2 to 4).map(i => Row.apply(i))) + + checkFilterPredicate(intAttr < 2, classOf[Lt[_]], 1) + checkFilterPredicate(intAttr > 3, classOf[Gt[_]], 4) + checkFilterPredicate(intAttr <= 1, classOf[LtEq[_]], 1) + checkFilterPredicate(intAttr >= 4, classOf[GtEq[_]], 4) + + checkFilterPredicate(Literal(1) === intAttr, classOf[Eq[_]], 1) + checkFilterPredicate(Literal(1) <=> intAttr, classOf[Eq[_]], 1) + checkFilterPredicate(Literal(2) > intAttr, classOf[Lt[_]], 1) + checkFilterPredicate(Literal(3) < intAttr, classOf[Gt[_]], 4) + checkFilterPredicate(Literal(1) >= intAttr, classOf[LtEq[_]], 1) + checkFilterPredicate(Literal(4) <= intAttr, classOf[GtEq[_]], 4) + + checkFilterPredicate(!(intAttr < 4), classOf[GtEq[_]], 4) + checkFilterPredicate(intAttr < 2 || intAttr > 3, classOf[Operators.Or], + Seq(Row(1), Row(4))) + + Seq(3, 20).foreach { threshold => + withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD.key -> s"$threshold") { + checkFilterPredicate( + In(intAttr, Array(2, 3, 4, 5, 6, 7).map(Literal.apply)), + if (threshold == 3) classOf[FilterIn[_]] else classOf[Operators.Or], + Seq(Row(2), Row(3), Row(4))) + } + } + } + test("filter pushdown - long") { val data = (1 to 4).map(i => Tuple1(Option(i.toLong))) withNestedParquetDataFrame(data) { case (inputDF, colName, resultFun) => @@ -409,6 +450,47 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared } } + test("SPARK-40280: filter pushdown - long with annotation") { + implicit val df = readResourceParquetFile("test-data/tagged_long.parquet") + + val longAttr = df("_c0").expr + assert(longAttr.dataType === LongType) + + checkFilterPredicate(longAttr.isNull, classOf[Eq[_]], Seq.empty[Row]) + checkFilterPredicate(longAttr.isNotNull, classOf[NotEq[_]], + (1 to 4).map(i => Row.apply(i))) + + checkFilterPredicate(longAttr === 1, classOf[Eq[_]], 1) + checkFilterPredicate(longAttr <=> 1, classOf[Eq[_]], 1) + checkFilterPredicate(longAttr =!= 1, classOf[NotEq[_]], + (2 to 4).map(i => Row.apply(i))) + + checkFilterPredicate(longAttr < 2, classOf[Lt[_]], 1) + checkFilterPredicate(longAttr > 3, classOf[Gt[_]], 4) + checkFilterPredicate(longAttr <= 1, classOf[LtEq[_]], 1) + checkFilterPredicate(longAttr >= 4, classOf[GtEq[_]], 4) + + checkFilterPredicate(Literal(1) === longAttr, classOf[Eq[_]], 1) + checkFilterPredicate(Literal(1) <=> longAttr, classOf[Eq[_]], 1) + checkFilterPredicate(Literal(2) > longAttr, classOf[Lt[_]], 1) + checkFilterPredicate(Literal(3) < longAttr, classOf[Gt[_]], 4) + checkFilterPredicate(Literal(1) >= longAttr, classOf[LtEq[_]], 1) + checkFilterPredicate(Literal(4) <= longAttr, classOf[GtEq[_]], 4) + + checkFilterPredicate(!(longAttr < 4), classOf[GtEq[_]], 4) + checkFilterPredicate(longAttr < 2 || longAttr > 3, classOf[Operators.Or], + Seq(Row(1), Row(4))) + + Seq(3, 20).foreach { threshold => + withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD.key -> s"$threshold") { + checkFilterPredicate( + In(longAttr, Array(2L, 3L, 4L, 5L, 6L, 7L).map(Literal.apply)), + if (threshold == 3) classOf[FilterIn[_]] else classOf[Operators.Or], + Seq(Row(2L), Row(3L), Row(4L))) + } + } + } + test("filter pushdown - float") { val data = (1 to 4).map(i => Tuple1(Option(i.toFloat))) withNestedParquetDataFrame(data) { case (inputDF, colName, resultFun) => From cd9f5642060cea344b6c84dde19de3e96836da19 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Thu, 8 Sep 2022 13:23:06 -0700 Subject: [PATCH 471/535] [SPARK-40389][SQL] Decimals can't upcast as integral types if the cast can overflow ### What changes were proposed in this pull request? In Spark SQL, the method `canUpCast` returns true iff we can safely up-cast the `from` type to `to` type without truncating or precision loss or possible runtime failures. Meanwhile, DecimalType(10, 0) is considered as `canUpCast` to Integer type. This is wrong since casting `9000000000BD` as Integer type will overflow. As a result: * The optimizer rule `SimplifyCasts` replies on the method `canUpCast` and it will mistakenly convert `cast(cast(9000000000BD as int) as long)` as `cast(9000000000BD as long)` * The STRICT store assignment policy relies on this method too. With the policy enabled, inserting `9000000000BD` into integer columns will pass compiling time check and insert an unexpected value `410065408`. * etc... ### Why are the changes needed? Bug fix on the method `Cast.canUpCast` ### Does this PR introduce _any_ user-facing change? Yes, fix a bug on the checking condition of whether a decimal can safely cast as integral types. ### How was this patch tested? New UT Closes #37832 from gengliangwang/SPARK-40389. Authored-by: Gengliang Wang Signed-off-by: Gengliang Wang (cherry picked from commit 17982519a749bd4ca2aa7eca12fba00ccc1520aa) Signed-off-by: Gengliang Wang --- .../org/apache/spark/sql/types/DecimalType.scala | 11 ++++++----- .../sql/catalyst/expressions/CastSuiteBase.scala | 9 +++++++++ .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 12 ++++++++++++ 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala index 08ddd12ef7d29..19e7d898d22b5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala @@ -92,14 +92,15 @@ case class DecimalType(precision: Int, scale: Int) extends FractionalType { * Returns whether this DecimalType is tighter than `other`. If yes, it means `this` * can be casted into `other` safely without losing any precision or range. */ - private[sql] def isTighterThan(other: DataType): Boolean = isTighterThanInternal(other) - - @tailrec - private def isTighterThanInternal(other: DataType): Boolean = other match { + private[sql] def isTighterThan(other: DataType): Boolean = other match { case dt: DecimalType => (precision - scale) <= (dt.precision - dt.scale) && scale <= dt.scale case dt: IntegralType => - isTighterThanInternal(DecimalType.forType(dt)) + val integerAsDecimal = DecimalType.forType(dt) + assert(integerAsDecimal.scale == 0) + // If the precision equals `integerAsDecimal.precision`, there can be integer overflow + // during casting. + precision < integerAsDecimal.precision && scale == 0 case _ => false } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala index ba8ab708046d1..8c0467aedd1ca 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala @@ -671,6 +671,15 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper { } } + test("SPARK-40389: canUpCast: return false if casting decimal to integral types can cause" + + " overflow") { + Seq(ByteType, ShortType, IntegerType, LongType).foreach { integralType => + val decimalType = DecimalType.forType(integralType) + assert(!Cast.canUpCast(decimalType, integralType)) + assert(Cast.canUpCast(integralType, decimalType)) + } + } + test("SPARK-27671: cast from nested null type in struct") { import DataTypeTestUtils._ diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 6ed6a85b8d1fd..b23da9282b036 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4551,6 +4551,18 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark sql("select * from test_temp_view"), Row(1, 2, 3, 1, 2, 3, 1, 1)) } + + test("SPARK-40389: Don't eliminate a cast which can cause overflow") { + withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") { + withTable("dt") { + sql("create table dt using parquet as select 9000000000BD as d") + val msg = intercept[SparkException] { + sql("select cast(cast(d as int) as long) from dt").collect() + }.getCause.getMessage + assert(msg.contains("[CAST_OVERFLOW]")) + } + } + } } case class Foo(bar: Option[String]) From b18d582c7a07a43ce2d25708bb8116ffc98cf8b2 Mon Sep 17 00:00:00 2001 From: Zhichao Zhang Date: Fri, 9 Sep 2022 11:31:16 -0700 Subject: [PATCH 472/535] [SPARK-40280][SQL][FOLLOWUP][3.3] Fix 'ParquetFilterSuite' issue ### What changes were proposed in this pull request? ### Why are the changes needed? Fix 'ParquetFilterSuite' issue after merging #37747 : The `org.apache.parquet.filter2.predicate.Operators.In` was added in the parquet 1.12.3, but spark branch-3.3 uses the parquet 1.12.2. Use `Operators.And` instead of `Operators.In`. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? Closes #37847 from zzcclp/SPARK-40280-hotfix-3.3. Authored-by: Zhichao Zhang Signed-off-by: huaxingao --- .../execution/datasources/parquet/ParquetFilterSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index f6b5bbf4dadb3..f291e1e71f6ce 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -400,7 +400,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD.key -> s"$threshold") { checkFilterPredicate( In(intAttr, Array(2, 3, 4, 5, 6, 7).map(Literal.apply)), - if (threshold == 3) classOf[FilterIn[_]] else classOf[Operators.Or], + if (threshold == 3) classOf[Operators.And] else classOf[Operators.Or], Seq(Row(2), Row(3), Row(4))) } } @@ -485,7 +485,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD.key -> s"$threshold") { checkFilterPredicate( In(longAttr, Array(2L, 3L, 4L, 5L, 6L, 7L).map(Literal.apply)), - if (threshold == 3) classOf[FilterIn[_]] else classOf[Operators.Or], + if (threshold == 3) classOf[Operators.And] else classOf[Operators.Or], Seq(Row(2L), Row(3L), Row(4L))) } } From aaa82928bcc763fee9ea5b0b43f984b862d65467 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 9 Sep 2022 11:57:00 -0700 Subject: [PATCH 473/535] [SPARK-40389][SQL][FOLLOWUP][3.3] Fix a test failure in SQLQuerySuite ### What changes were proposed in this pull request? Fix a test failure in SQLQuerySuite on branch-3.3. It's from the backport of https://github.com/apache/spark/pull/37832 since there is no error class "CAST_OVERFLOW" on branch-3.3 ### Why are the changes needed? Fix test failure ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? GA Closes #37848 from gengliangwang/fixTest. Authored-by: Gengliang Wang Signed-off-by: Dongjoon Hyun --- .../src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index b23da9282b036..0a3107cdff610 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4559,7 +4559,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark val msg = intercept[SparkException] { sql("select cast(cast(d as int) as long) from dt").collect() }.getCause.getMessage - assert(msg.contains("[CAST_OVERFLOW]")) + assert(msg.contains("The value 9000000000BD of the type \"DECIMAL(10,0)\" " + + "cannot be cast to \"INT\" due to an overflow")) } } } From 4f69c98ae95681cf972fa6701c94dbbb28e40d80 Mon Sep 17 00:00:00 2001 From: sychen Date: Fri, 9 Sep 2022 14:36:39 -0700 Subject: [PATCH 474/535] [SPARK-39830][SQL][TESTS][3.3] Add a test case to read ORC table that requires type promotion ### What changes were proposed in this pull request? Increase ORC test coverage. [ORC-1205](https://issues.apache.org/jira/browse/ORC-1205) Size of batches in some ConvertTreeReaders should be ensured before using ### Why are the changes needed? When spark reads an orc with type promotion, an `ArrayIndexOutOfBoundsException` may be thrown, which has been fixed in version 1.7.6 and 1.8.0. ```java java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.orc.impl.TreeReaderFactory$TreeReader.nextVector(TreeReaderFactory.java:387) at org.apache.orc.impl.TreeReaderFactory$LongTreeReader.nextVector(TreeReaderFactory.java:740) at org.apache.orc.impl.ConvertTreeReaderFactory$StringGroupFromAnyIntegerTreeReader.nextVector(ConvertTreeReaderFactory.java:1069) at org.apache.orc.impl.reader.tree.StructBatchReader.readBatchColumn(StructBatchReader.java:65) ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? add UT Closes #37808 from cxzl25/SPARK-39830-3.3. Authored-by: sychen Signed-off-by: Dongjoon Hyun --- .../datasources/orc/OrcQuerySuite.scala | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala index e44fb63c75726..3c051e4f66a09 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala @@ -832,6 +832,29 @@ abstract class OrcQuerySuite extends OrcQueryTest with SharedSparkSession { } } } + + test("SPARK-39830: Reading ORC table that requires type promotion may throw AIOOBE") { + withSQLConf("orc.stripe.size" -> "20480", + "orc.rows.between.memory.checks" -> "1") { + withTempPath { dir => + val path = dir.getCanonicalPath + val df = spark.range(1, 3072, 1, 1).map { i => + if (i < 1024) { + (i, Array.fill[Byte](1024)('X')) + } else { + (i, Array.fill[Byte](1)('X')) + } + }.toDF("c1", "c2") + df.write.format("orc").save(path) + withSQLConf(SQLConf.ORC_VECTORIZED_READER_BATCH_SIZE.key -> "1025") { + withTable("t1") { + spark.sql(s"create table t1 (c1 string,c2 binary) using orc location '$path'") + spark.sql("select * from t1").collect() + } + } + } + } + } } class OrcV1QuerySuite extends OrcQuerySuite { From 18fc8e8e023868f6e7fab3422c5ce57e690d7834 Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Fri, 9 Sep 2022 14:43:19 -0700 Subject: [PATCH 475/535] [SPARK-39915][SQL][3.3] Dataset.repartition(N) may not create N partitions Non-AQE part ### What changes were proposed in this pull request? backport https://github.com/apache/spark/pull/37706 for branch-3.3 Skip optimize the root user-specified repartition in `PropagateEmptyRelation`. ### Why are the changes needed? Spark should preserve the final repatition which can affect the final output partition which is user-specified. For example: ```scala spark.sql("select * from values(1) where 1 < rand()").repartition(1) // before: == Optimized Logical Plan == LocalTableScan , [col1#0] // after: == Optimized Logical Plan == Repartition 1, true +- LocalRelation , [col1#0] ``` ### Does this PR introduce _any_ user-facing change? yes, the empty plan may change ### How was this patch tested? add test Closes #37730 from ulysses-you/empty-3.3. Authored-by: ulysses-you Signed-off-by: Dongjoon Hyun --- .../spark/sql/catalyst/dsl/package.scala | 3 ++ .../optimizer/PropagateEmptyRelation.scala | 42 +++++++++++++++++-- .../PropagateEmptyRelationSuite.scala | 38 +++++++++++++++++ .../adaptive/AQEPropagateEmptyRelation.scala | 2 +- .../org/apache/spark/sql/DataFrameSuite.scala | 7 ++++ 5 files changed, 88 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index d47e34b110dc8..000622187f406 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -491,6 +491,9 @@ package object dsl { def repartition(num: Integer): LogicalPlan = Repartition(num, shuffle = true, logicalPlan) + def repartition(): LogicalPlan = + RepartitionByExpression(Seq.empty, logicalPlan, None) + def distribute(exprs: Expression*)(n: Int): LogicalPlan = RepartitionByExpression(exprs, logicalPlan, numPartitions = n) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala index 2c964fa6da3db..f8e2096e44326 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala @@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ +import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.spark.sql.catalyst.trees.TreePattern.{LOCAL_RELATION, TRUE_OR_FALSE_LITERAL} /** @@ -44,6 +45,9 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.{LOCAL_RELATION, TRUE_OR_ * - Generate(Explode) with all empty children. Others like Hive UDTF may return results. */ abstract class PropagateEmptyRelationBase extends Rule[LogicalPlan] with CastSupport { + // This tag is used to mark a repartition as a root repartition which is user-specified + private[sql] val ROOT_REPARTITION = TreeNodeTag[Unit]("ROOT_REPARTITION") + protected def isEmpty(plan: LogicalPlan): Boolean = plan match { case p: LocalRelation => p.data.isEmpty case _ => false @@ -136,8 +140,13 @@ abstract class PropagateEmptyRelationBase extends Rule[LogicalPlan] with CastSup case _: Sort => empty(p) case _: GlobalLimit if !p.isStreaming => empty(p) case _: LocalLimit if !p.isStreaming => empty(p) - case _: Repartition => empty(p) - case _: RepartitionByExpression => empty(p) + case _: RepartitionOperation => + if (p.getTagValue(ROOT_REPARTITION).isEmpty) { + empty(p) + } else { + p.unsetTagValue(ROOT_REPARTITION) + p + } case _: RebalancePartitions => empty(p) // An aggregate with non-empty group expression will return one output row per group when the // input to the aggregate is not empty. If the input to the aggregate is empty then all groups @@ -160,13 +169,40 @@ abstract class PropagateEmptyRelationBase extends Rule[LogicalPlan] with CastSup case _ => p } } + + protected def userSpecifiedRepartition(p: LogicalPlan): Boolean = p match { + case _: Repartition => true + case r: RepartitionByExpression + if r.optNumPartitions.isDefined || r.partitionExpressions.nonEmpty => true + case _ => false + } + + protected def applyInternal(plan: LogicalPlan): LogicalPlan + + /** + * Add a [[ROOT_REPARTITION]] tag for the root user-specified repartition so this rule can + * skip optimize it. + */ + private def addTagForRootRepartition(plan: LogicalPlan): LogicalPlan = plan match { + case p: Project => p.mapChildren(addTagForRootRepartition) + case f: Filter => f.mapChildren(addTagForRootRepartition) + case r if userSpecifiedRepartition(r) => + r.setTagValue(ROOT_REPARTITION, ()) + r + case _ => plan + } + + override def apply(plan: LogicalPlan): LogicalPlan = { + val planWithTag = addTagForRootRepartition(plan) + applyInternal(planWithTag) + } } /** * This rule runs in the normal optimizer */ object PropagateEmptyRelation extends PropagateEmptyRelationBase { - override def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning( + override protected def applyInternal(p: LogicalPlan): LogicalPlan = p.transformUpWithPruning( _.containsAnyPattern(LOCAL_RELATION, TRUE_OR_FALSE_LITERAL), ruleId) { commonApplyFunc } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala index 8277e44458bb1..72ef8fdd91b60 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala @@ -309,4 +309,42 @@ class PropagateEmptyRelationSuite extends PlanTest { val optimized2 = Optimize.execute(plan2) comparePlans(optimized2, expected) } + + test("Propagate empty relation with repartition") { + val emptyRelation = LocalRelation($"a".int, $"b".int) + comparePlans(Optimize.execute( + emptyRelation.repartition(1).sortBy($"a".asc).analyze + ), emptyRelation.analyze) + + comparePlans(Optimize.execute( + emptyRelation.distribute($"a")(1).sortBy($"a".asc).analyze + ), emptyRelation.analyze) + + comparePlans(Optimize.execute( + emptyRelation.repartition().analyze + ), emptyRelation.analyze) + + comparePlans(Optimize.execute( + emptyRelation.repartition(1).sortBy($"a".asc).repartition().analyze + ), emptyRelation.analyze) + } + + test("SPARK-39915: Dataset.repartition(N) may not create N partitions") { + val emptyRelation = LocalRelation($"a".int, $"b".int) + val p1 = emptyRelation.repartition(1).analyze + comparePlans(Optimize.execute(p1), p1) + + val p2 = emptyRelation.repartition(1).select($"a").analyze + comparePlans(Optimize.execute(p2), p2) + + val p3 = emptyRelation.repartition(1).where($"a" > rand(1)).analyze + comparePlans(Optimize.execute(p3), p3) + + val p4 = emptyRelation.repartition(1).where($"a" > rand(1)).select($"a").analyze + comparePlans(Optimize.execute(p4), p4) + + val p5 = emptyRelation.sortBy("$a".asc).repartition().limit(1).repartition(1).analyze + val expected5 = emptyRelation.repartition(1).analyze + comparePlans(Optimize.execute(p5), expected5) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEPropagateEmptyRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEPropagateEmptyRelation.scala index bab77515f79a2..132c919c29112 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEPropagateEmptyRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEPropagateEmptyRelation.scala @@ -69,7 +69,7 @@ object AQEPropagateEmptyRelation extends PropagateEmptyRelationBase { empty(j) } - def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning( + override protected def applyInternal(p: LogicalPlan): LogicalPlan = p.transformUpWithPruning( // LOCAL_RELATION and TRUE_OR_FALSE_LITERAL pattern are matched at // `PropagateEmptyRelationBase.commonApplyFunc` // LOGICAL_QUERY_STAGE pattern is matched at `PropagateEmptyRelationBase.commonApplyFunc` diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 43aca31d138f4..b05d320ca07f8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -3281,6 +3281,13 @@ class DataFrameSuite extends QueryTest Row(java.sql.Date.valueOf("2020-02-01"), java.sql.Date.valueOf("2020-02-01")) :: Row(java.sql.Date.valueOf("2020-01-01"), java.sql.Date.valueOf("2020-01-02")) :: Nil) } + + test("SPARK-39915: Dataset.repartition(N) may not create N partitions") { + withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") { + val df = spark.sql("select * from values(1) where 1 < rand()").repartition(2) + assert(df.queryExecution.executedPlan.execute().getNumPartitions == 2) + } + } } case class GroupByKey(a: Int, b: Int) From 052d60c28a8fd0e4e33051aa0682d3df4d979ae8 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Fri, 9 Sep 2022 16:48:34 -0700 Subject: [PATCH 476/535] [SPARK-40228][SQL][3.3] Do not simplify multiLike if child is not a cheap expression This PR backport https://github.com/apache/spark/pull/37672 to branch-3.3. The original PR's description: ### What changes were proposed in this pull request? Do not simplify multiLike if child is not a cheap expression. ### Why are the changes needed? 1. Simplifying multiLike in this cases can not benefit the query because it cannot be pushed down. 2. Reduce the number of evaluations for these expressions. For example: ```sql select * from t1 where substr(name, 1, 5) like any('%a', 'b%', '%c%'); ``` ``` == Physical Plan == *(1) Filter ((EndsWith(substr(name#0, 1, 5), a) OR StartsWith(substr(name#0, 1, 5), b)) OR Contains(substr(name#0, 1, 5), c)) +- *(1) ColumnarToRow +- FileScan parquet default.t1[name#0] Batched: true, DataFilters: [((EndsWith(substr(name#0, 1, 5), a) OR StartsWith(substr(name#0, 1, 5), b)) OR Contains(substr(n..., Format: Parquet, PartitionFilters: [], PushedFilters: [], ReadSchema: struct ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #37813 from wangyum/SPARK-40228-branch-3.3. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 13 +++++++++++++ .../spark/sql/catalyst/optimizer/expressions.scala | 12 ++++++++---- .../optimizer/LikeSimplificationSuite.scala | 13 +++++++++++++ 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 3f756ea459cd2..9794a310b6df9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -1075,6 +1075,19 @@ object CollapseProject extends Rule[LogicalPlan] with AliasHelper { case _ => false } + /** + * Check if the given expression is cheap that we can inline it. + */ + def isCheap(e: Expression): Boolean = e match { + case _: Attribute | _: OuterReference => true + case _ if e.foldable => true + // PythonUDF is handled by the rule ExtractPythonUDFs + case _: PythonUDF => true + // Alias and ExtractValue are very cheap. + case _: Alias | _: ExtractValue => e.children.forall(isCheap) + case _ => false + } + /** * Return all the references of the given expression without deduplication, which is different * from `Expression.references`. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 158734597f77b..a3d826aff5177 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -773,10 +773,14 @@ object LikeSimplification extends Rule[LogicalPlan] { } else { simplifyLike(input, pattern.toString, escapeChar).getOrElse(l) } - case l @ LikeAll(child, patterns) => simplifyMultiLike(child, patterns, l) - case l @ NotLikeAll(child, patterns) => simplifyMultiLike(child, patterns, l) - case l @ LikeAny(child, patterns) => simplifyMultiLike(child, patterns, l) - case l @ NotLikeAny(child, patterns) => simplifyMultiLike(child, patterns, l) + case l @ LikeAll(child, patterns) if CollapseProject.isCheap(child) => + simplifyMultiLike(child, patterns, l) + case l @ NotLikeAll(child, patterns) if CollapseProject.isCheap(child) => + simplifyMultiLike(child, patterns, l) + case l @ LikeAny(child, patterns) if CollapseProject.isCheap(child) => + simplifyMultiLike(child, patterns, l) + case l @ NotLikeAny(child, patterns) if CollapseProject.isCheap(child) => + simplifyMultiLike(child, patterns, l) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala index c06c92f9c1511..2d3be86fa286c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala @@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.types.{BooleanType, StringType} +import org.apache.spark.unsafe.types.UTF8String class LikeSimplificationSuite extends PlanTest { @@ -232,4 +233,16 @@ class LikeSimplificationSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + + test("SPARK-40228: Simplify multiLike if child is foldable expression") { + comparePlans(Optimize.execute(testRelation.where("a" likeAny("abc%", "", "ab")).analyze), + testRelation.where(StartsWith("a", "abc") || EqualTo("a", "") || EqualTo("a", "ab") || + LikeAny("a", Seq.empty[UTF8String])).analyze) + } + + test("SPARK-40228: Do not simplify multiLike if child is not a cheap expression") { + val originalQuery = testRelation.where($"a".substring(1, 5) likeAny("abc%", "", "ab")).analyze + + comparePlans(Optimize.execute(originalQuery), originalQuery) + } } From 0a180c0637e352e6a00a9b67d4f5d261f851ea5f Mon Sep 17 00:00:00 2001 From: Ivan Sadikov Date: Mon, 12 Sep 2022 07:33:36 +0300 Subject: [PATCH 477/535] [SPARK-40292][SQL] Fix column names in "arrays_zip" function when arrays are referenced from nested structs ### What changes were proposed in this pull request? This PR fixes an issue in `arrays_zip` function where a field index was used as a column name in the resulting schema which was a regression from Spark 3.1. With this change, the original behaviour is restored: a corresponding struct field name will be used instead of a field index. Example: ```sql with q as ( select named_struct( 'my_array', array(1, 2, 3), 'my_array2', array(4, 5, 6) ) as my_struct ) select arrays_zip(my_struct.my_array, my_struct.my_array2) from q ``` would return schema: ``` root |-- arrays_zip(my_struct.my_array, my_struct.my_array2): array (nullable = false) | |-- element: struct (containsNull = false) | | |-- 0: integer (nullable = true) | | |-- 1: integer (nullable = true) ``` which is somewhat inaccurate. PR adds handling of `GetStructField` expression to return the struct field names like this: ``` root |-- arrays_zip(my_struct.my_array, my_struct.my_array2): array (nullable = false) | |-- element: struct (containsNull = false) | | |-- my_array: integer (nullable = true) | | |-- my_array2: integer (nullable = true) ``` ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? Yes, `arrays_zip` function returns struct field names now as in Spark 3.1 instead of field indices. Some users might have worked around this issue so this patch would affect them by bringing back the original behaviour. ### How was this patch tested? Existing unit tests. I also added a test case that reproduces the problem. Closes #37833 from sadikovi/SPARK-40292. Authored-by: Ivan Sadikov Signed-off-by: Max Gekk (cherry picked from commit 443eea97578c41870c343cdb88cf69bfdf27033a) Signed-off-by: Max Gekk --- .../expressions/collectionOperations.scala | 1 + .../spark/sql/DataFrameFunctionsSuite.scala | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 53bda0cbdc773..05a273763b9ff 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -267,6 +267,7 @@ case class ArraysZip(children: Seq[Expression], names: Seq[Expression]) case (u: UnresolvedAttribute, _) => Literal(u.nameParts.last) case (e: NamedExpression, _) if e.resolved => Literal(e.name) case (e: NamedExpression, _) => NamePlaceholder + case (e: GetStructField, _) => Literal(e.extractFieldName) case (_, idx) => Literal(idx.toString) }) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index 4d82d110a4c51..a9c1704581270 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -721,6 +721,25 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { } } + test("SPARK-40292: arrays_zip should retain field names in nested structs") { + val df = spark.sql(""" + select + named_struct( + 'arr_1', array(named_struct('a', 1, 'b', 2)), + 'arr_2', array(named_struct('p', 1, 'q', 2)), + 'field', named_struct( + 'arr_3', array(named_struct('x', 1, 'y', 2)) + ) + ) as obj + """) + + val res = df.selectExpr("arrays_zip(obj.arr_1, obj.arr_2, obj.field.arr_3) as arr") + + val fieldNames = res.schema.head.dataType.asInstanceOf[ArrayType] + .elementType.asInstanceOf[StructType].fieldNames + assert(fieldNames.toSeq === Seq("arr_1", "arr_2", "arr_3")) + } + def testSizeOfMap(sizeOfNull: Any): Unit = { val df = Seq( (Map[Int, Int](1 -> 1, 2 -> 2), "x"), From 1741aabb5b27709ecd0043f4f671dadf3fa6dee5 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Tue, 13 Sep 2022 12:38:02 -0700 Subject: [PATCH 478/535] [SPARK-40362][SQL][3.3] Fix BinaryComparison canonicalization ### What changes were proposed in this pull request? Change canonicalization to a one pass process and move logic from `Canonicalize.reorderCommutativeOperators` to the respective commutative operators' `canonicalize`. ### Why are the changes needed? https://github.com/apache/spark/pull/34883 improved expression canonicalization performance but introduced regression when a commutative operator is under a `BinaryComparison`. This is because children reorder by their hashcode can't happen in `preCanonicalized` phase when children are not yet "final". ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added new UT. Closes #37866 from peter-toth/SPARK-40362-fix-binarycomparison-canonicalization-3.3. Lead-authored-by: Peter Toth Co-authored-by: Peter Toth Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/analysis/unresolved.scala | 2 +- .../catalyst/expressions/Canonicalize.scala | 70 ------------------- .../spark/sql/catalyst/expressions/Cast.scala | 4 +- .../catalyst/expressions/DynamicPruning.scala | 6 +- .../sql/catalyst/expressions/Expression.scala | 65 +++++++++-------- .../sql/catalyst/expressions/PythonUDF.scala | 4 +- .../sql/catalyst/expressions/ScalaUDF.scala | 4 +- .../expressions/aggregate/interfaces.scala | 6 +- .../sql/catalyst/expressions/arithmetic.scala | 30 ++++++-- .../expressions/bitwiseExpressions.scala | 21 +++++- .../expressions/complexTypeExtractors.scala | 4 +- .../expressions/decimalExpressions.scala | 2 +- .../expressions/higherOrderFunctions.scala | 4 +- .../expressions/namedExpressions.scala | 2 +- .../sql/catalyst/expressions/predicates.scala | 26 ++++--- .../sql/catalyst/expressions/subquery.scala | 26 +++---- .../expressions/CanonicalizeSuite.scala | 7 ++ .../apache/spark/sql/execution/subquery.scala | 6 +- 18 files changed, 138 insertions(+), 151 deletions(-) delete mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index d3e754ba670e7..677e9844cac66 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -641,7 +641,7 @@ case object UnresolvedSeed extends LeafExpression with Unevaluable { */ case class TempResolvedColumn(child: Expression, nameParts: Seq[String]) extends UnaryExpression with Unevaluable { - override lazy val preCanonicalized = child.preCanonicalized + override lazy val canonicalized = child.canonicalized override def dataType: DataType = child.dataType override protected def withNewChildInternal(newChild: Expression): Expression = copy(child = newChild) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala deleted file mode 100644 index 329ed3f20b727..0000000000000 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.catalyst.expressions - -/** - * Reorders adjacent commutative operators such as [[And]] in the expression tree, according to - * the `hashCode` of non-commutative nodes, to remove cosmetic variations. Caller side should only - * call it on the root node of an expression tree that needs to be canonicalized. - */ -object Canonicalize { - /** Collects adjacent commutative operations. */ - private def gatherCommutative( - e: Expression, - f: PartialFunction[Expression, Seq[Expression]]): Seq[Expression] = e match { - case c if f.isDefinedAt(c) => f(c).flatMap(gatherCommutative(_, f)) - case other => reorderCommutativeOperators(other) :: Nil - } - - /** Orders a set of commutative operations by their hash code. */ - private def orderCommutative( - e: Expression, - f: PartialFunction[Expression, Seq[Expression]]): Seq[Expression] = - gatherCommutative(e, f).sortBy(_.hashCode()) - - def reorderCommutativeOperators(e: Expression): Expression = e match { - // TODO: do not reorder consecutive `Add`s or `Multiply`s with different `failOnError` flags - case a @ Add(_, _, f) => - orderCommutative(a, { case Add(l, r, _) => Seq(l, r) }).reduce(Add(_, _, f)) - case m @ Multiply(_, _, f) => - orderCommutative(m, { case Multiply(l, r, _) => Seq(l, r) }).reduce(Multiply(_, _, f)) - - case o @ Or(l, r) if l.deterministic && r.deterministic => - orderCommutative(o, { case Or(l, r) if l.deterministic && r.deterministic => Seq(l, r) }) - .reduce(Or) - case a @ And(l, r) if l.deterministic && r.deterministic => - orderCommutative(a, { case And(l, r) if l.deterministic && r.deterministic => Seq(l, r)}) - .reduce(And) - - case o: BitwiseOr => - orderCommutative(o, { case BitwiseOr(l, r) => Seq(l, r) }).reduce(BitwiseOr) - case a: BitwiseAnd => - orderCommutative(a, { case BitwiseAnd(l, r) => Seq(l, r) }).reduce(BitwiseAnd) - case x: BitwiseXor => - orderCommutative(x, { case BitwiseXor(l, r) => Seq(l, r) }).reduce(BitwiseXor) - - case g: Greatest => - val newChildren = orderCommutative(g, { case Greatest(children) => children }) - Greatest(newChildren) - case l: Least => - val newChildren = orderCommutative(l, { case Least(children) => children }) - Least(newChildren) - - case _ => e.withNewChildren(e.children.map(reorderCommutativeOperators)) - } -} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index f5dbeda514a55..ee95ea7f9f63d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -322,8 +322,8 @@ abstract class CastBase extends UnaryExpression override lazy val resolved: Boolean = childrenResolved && checkInputDataTypes().isSuccess && (!needsTimeZone || timeZoneId.isDefined) - override lazy val preCanonicalized: Expression = { - val basic = withNewChildren(Seq(child.preCanonicalized)).asInstanceOf[CastBase] + override lazy val canonicalized: Expression = { + val basic = withNewChildren(Seq(child.canonicalized)).asInstanceOf[CastBase] if (timeZoneId.isDefined && !needsTimeZone) { basic.withTimeZone(null) } else { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/DynamicPruning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/DynamicPruning.scala index b268818568056..dd9e9307e74e1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/DynamicPruning.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/DynamicPruning.scala @@ -74,11 +74,11 @@ case class DynamicPruningSubquery( override def toString: String = s"dynamicpruning#${exprId.id} $conditionString" - override lazy val preCanonicalized: DynamicPruning = { + override lazy val canonicalized: DynamicPruning = { copy( - pruningKey = pruningKey.preCanonicalized, + pruningKey = pruningKey.canonicalized, buildQuery = buildQuery.canonicalized, - buildKeys = buildKeys.map(_.preCanonicalized), + buildKeys = buildKeys.map(_.canonicalized), exprId = ExprId(0)) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index d620c5d739283..2c208c0c66553 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -222,43 +222,28 @@ abstract class Expression extends TreeNode[Expression] { */ def childrenResolved: Boolean = children.forall(_.resolved) - // Expression canonicalization is done in 2 phases: - // 1. Recursively canonicalize each node in the expression tree. This does not change the tree - // structure and is more like "node-local" canonicalization. - // 2. Find adjacent commutative operators in the expression tree, reorder them to get a - // static order and remove cosmetic variations. This may change the tree structure - // dramatically and is more like a "global" canonicalization. - // - // The first phase is done by `preCanonicalized`. It's a `lazy val` which recursively calls - // `preCanonicalized` on the children. This means that almost every node in the expression tree - // will instantiate the `preCanonicalized` variable, which is good for performance as you can - // reuse the canonicalization result of the children when you construct a new expression node. - // - // The second phase is done by `canonicalized`, which simply calls `Canonicalize` and is kind of - // the actual "user-facing API" of expression canonicalization. Only the root node of the - // expression tree will instantiate the `canonicalized` variable. This is different from - // `preCanonicalized`, because `canonicalized` does "global" canonicalization and most of the time - // you cannot reuse the canonicalization result of the children. - - /** - * An internal lazy val to implement expression canonicalization. It should only be called in - * `canonicalized`, or in subclass's `preCanonicalized` when the subclass overrides this lazy val - * to provide custom canonicalization logic. - */ - lazy val preCanonicalized: Expression = { - val canonicalizedChildren = children.map(_.preCanonicalized) - withNewChildren(canonicalizedChildren) - } - /** * Returns an expression where a best effort attempt has been made to transform `this` in a way * that preserves the result but removes cosmetic variations (case sensitivity, ordering for - * commutative operations, etc.) See [[Canonicalize]] for more details. + * commutative operations, etc.). * * `deterministic` expressions where `this.canonicalized == other.canonicalized` will always * evaluate to the same result. + * + * The process of canonicalization is a one pass, bottum-up expression tree computation based on + * canonicalizing children before canonicalizing the current node. There is one exception though, + * as adjacent, same class [[CommutativeExpression]]s canonicalazion happens in a way that calling + * `canonicalized` on the root: + * 1. Gathers and canonicalizes the non-commutative (or commutative but not same class) child + * expressions of the adjacent expressions. + * 2. Reorder the canonicalized child expressions by their hashcode. + * This means that the lazy `cannonicalized` is called and computed only on the root of the + * adjacent expressions. */ - lazy val canonicalized: Expression = Canonicalize.reorderCommutativeOperators(preCanonicalized) + lazy val canonicalized: Expression = { + val canonicalizedChildren = children.map(_.canonicalized) + withNewChildren(canonicalizedChildren) + } /** * Returns true when two expressions will always compute the same result, even if they differ @@ -362,7 +347,7 @@ trait RuntimeReplaceable extends Expression { // As this expression gets replaced at optimization with its `child" expression, // two `RuntimeReplaceable` are considered to be semantically equal if their "child" expressions // are semantically equal. - override lazy val preCanonicalized: Expression = replacement.preCanonicalized + override lazy val canonicalized: Expression = replacement.canonicalized final override def eval(input: InternalRow = null): Any = throw QueryExecutionErrors.cannotEvaluateExpressionError(this) @@ -1156,3 +1141,21 @@ trait ComplexTypeMergingExpression extends Expression { trait UserDefinedExpression { def name: String } + +trait CommutativeExpression extends Expression { + /** Collects adjacent commutative operations. */ + private def gatherCommutative( + e: Expression, + f: PartialFunction[CommutativeExpression, Seq[Expression]]): Seq[Expression] = e match { + case c: CommutativeExpression if f.isDefinedAt(c) => f(c).flatMap(gatherCommutative(_, f)) + case other => other.canonicalized :: Nil + } + + /** + * Reorders adjacent commutative operators such as [[And]] in the expression tree, according to + * the `hashCode` of non-commutative nodes, to remove cosmetic variations. + */ + protected def orderCommutative( + f: PartialFunction[CommutativeExpression, Seq[Expression]]): Seq[Expression] = + gatherCommutative(this, f).sortBy(_.hashCode()) +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/PythonUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/PythonUDF.scala index b84810c3f4bb4..6b9017a01db36 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/PythonUDF.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/PythonUDF.scala @@ -71,8 +71,8 @@ case class PythonUDF( override def nullable: Boolean = true - override lazy val preCanonicalized: Expression = { - val canonicalizedChildren = children.map(_.preCanonicalized) + override lazy val canonicalized: Expression = { + val canonicalizedChildren = children.map(_.canonicalized) // `resultId` can be seen as cosmetic variation in PythonUDF, as it doesn't affect the result. this.copy(resultId = ExprId(-1)).withNewChildren(canonicalizedChildren) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala index 832b2656f0c96..f8ff5f583f602 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala @@ -64,10 +64,10 @@ case class ScalaUDF( override def name: String = udfName.getOrElse("UDF") - override lazy val preCanonicalized: Expression = { + override lazy val canonicalized: Expression = { // SPARK-32307: `ExpressionEncoder` can't be canonicalized, and technically we don't // need it to identify a `ScalaUDF`. - copy(children = children.map(_.preCanonicalized), inputEncoders = Nil, outputEncoder = None) + copy(children = children.map(_.canonicalized), inputEncoders = Nil, outputEncoder = None) } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala index e60c07b0d8268..e757584b04c62 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala @@ -123,7 +123,7 @@ case class AggregateExpression( def filterAttributes: AttributeSet = filter.map(_.references).getOrElse(AttributeSet.empty) // We compute the same thing regardless of our final result. - override lazy val preCanonicalized: Expression = { + override lazy val canonicalized: Expression = { val normalizedAggFunc = mode match { // For PartialMerge or Final mode, the input to the `aggregateFunction` is aggregate buffers, // and the actual children of `aggregateFunction` is not used, here we normalize the expr id. @@ -134,10 +134,10 @@ case class AggregateExpression( } AggregateExpression( - normalizedAggFunc.preCanonicalized.asInstanceOf[AggregateFunction], + normalizedAggFunc.canonicalized.asInstanceOf[AggregateFunction], mode, isDistinct, - filter.map(_.preCanonicalized), + filter.map(_.canonicalized), ExprId(0)) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 153187f9e309c..db29a731f12ff 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -313,7 +313,8 @@ object BinaryArithmetic { case class Add( left: Expression, right: Expression, - failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryArithmetic { + failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryArithmetic + with CommutativeExpression { def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled) @@ -349,6 +350,11 @@ case class Add( override protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression): Add = copy(left = newLeft, right = newRight) + + override lazy val canonicalized: Expression = { + // TODO: do not reorder consecutive `Add`s with different `failOnError` + orderCommutative({ case Add(l, r, _) => Seq(l, r) }).reduce(Add(_, _, failOnError)) + } } @ExpressionDescription( @@ -413,7 +419,8 @@ case class Subtract( case class Multiply( left: Expression, right: Expression, - failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryArithmetic { + failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryArithmetic + with CommutativeExpression { def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled) @@ -436,6 +443,11 @@ case class Multiply( override protected def withNewChildrenInternal( newLeft: Expression, newRight: Expression): Multiply = copy(left = newLeft, right = newRight) + + override lazy val canonicalized: Expression = { + // TODO: do not reorder consecutive `Multiply`s with different `failOnError` + orderCommutative({ case Multiply(l, r, _) => Seq(l, r) }).reduce(Multiply(_, _, failOnError)) + } } // Common base trait for Divide and Remainder, since these two classes are almost identical @@ -905,7 +917,8 @@ case class Pmod( """, since = "1.5.0", group = "math_funcs") -case class Least(children: Seq[Expression]) extends ComplexTypeMergingExpression { +case class Least(children: Seq[Expression]) extends ComplexTypeMergingExpression + with CommutativeExpression { override def nullable: Boolean = children.forall(_.nullable) override def foldable: Boolean = children.forall(_.foldable) @@ -968,6 +981,10 @@ case class Least(children: Seq[Expression]) extends ComplexTypeMergingExpression override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Least = copy(children = newChildren) + + override lazy val canonicalized: Expression = { + Least(orderCommutative({ case Least(children) => children })) + } } /** @@ -983,7 +1000,8 @@ case class Least(children: Seq[Expression]) extends ComplexTypeMergingExpression """, since = "1.5.0", group = "math_funcs") -case class Greatest(children: Seq[Expression]) extends ComplexTypeMergingExpression { +case class Greatest(children: Seq[Expression]) extends ComplexTypeMergingExpression + with CommutativeExpression { override def nullable: Boolean = children.forall(_.nullable) override def foldable: Boolean = children.forall(_.foldable) @@ -1046,4 +1064,8 @@ case class Greatest(children: Seq[Expression]) extends ComplexTypeMergingExpress override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Greatest = copy(children = newChildren) + + override lazy val canonicalized: Expression = { + Greatest(orderCommutative({ case Greatest(children) => children })) + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala index 57ab9e2773e31..ad0b63a556061 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala @@ -36,7 +36,8 @@ import org.apache.spark.sql.types._ """, since = "1.4.0", group = "bitwise_funcs") -case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithmetic { +case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithmetic + with CommutativeExpression { protected override val failOnError: Boolean = false @@ -59,6 +60,10 @@ case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithme override protected def withNewChildrenInternal( newLeft: Expression, newRight: Expression): BitwiseAnd = copy(left = newLeft, right = newRight) + + override lazy val canonicalized: Expression = { + orderCommutative({ case BitwiseAnd(l, r) => Seq(l, r) }).reduce(BitwiseAnd) + } } /** @@ -75,7 +80,8 @@ case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithme """, since = "1.4.0", group = "bitwise_funcs") -case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmetic { +case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmetic + with CommutativeExpression { protected override val failOnError: Boolean = false @@ -98,6 +104,10 @@ case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmet override protected def withNewChildrenInternal( newLeft: Expression, newRight: Expression): BitwiseOr = copy(left = newLeft, right = newRight) + + override lazy val canonicalized: Expression = { + orderCommutative({ case BitwiseOr(l, r) => Seq(l, r) }).reduce(BitwiseOr) + } } /** @@ -114,7 +124,8 @@ case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmet """, since = "1.4.0", group = "bitwise_funcs") -case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithmetic { +case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithmetic + with CommutativeExpression { protected override val failOnError: Boolean = false @@ -137,6 +148,10 @@ case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithme override protected def withNewChildrenInternal( newLeft: Expression, newRight: Expression): BitwiseXor = copy(left = newLeft, right = newRight) + + override lazy val canonicalized: Expression = { + orderCommutative({ case BitwiseXor(l, r) => Seq(l, r) }).reduce(BitwiseXor) + } } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index b84050c1837df..2ae146b9ff6ff 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -106,8 +106,8 @@ case class GetStructField(child: Expression, ordinal: Int, name: Option[String] lazy val childSchema = child.dataType.asInstanceOf[StructType] - override lazy val preCanonicalized: Expression = { - copy(child = child.preCanonicalized, name = None) + override lazy val canonicalized: Expression = { + copy(child = child.canonicalized, name = None) } override def dataType: DataType = childSchema(ordinal).dataType diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala index 7d25df5ae9cb7..bfe86499de2be 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala @@ -114,7 +114,7 @@ case class PromotePrecision(child: Expression) extends UnaryExpression { child.genCode(ctx) override def prettyName: String = "promote_precision" override def sql: String = child.sql - override lazy val preCanonicalized: Expression = child.preCanonicalized + override lazy val canonicalized: Expression = child.canonicalized override protected def withNewChildInternal(newChild: Expression): Expression = copy(child = newChild) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala index d56e761bd2f37..9f7ac716e550a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala @@ -208,7 +208,7 @@ trait HigherOrderFunction extends Expression with ExpectsInputTypes { } } - override lazy val preCanonicalized: Expression = { + override lazy val canonicalized: Expression = { var currExprId = -1 val argumentMap = functions.flatMap(_.collect { case l: NamedLambdaVariable => @@ -221,7 +221,7 @@ trait HigherOrderFunction extends Expression with ExpectsInputTypes { val newExprId = argumentMap(l.exprId) NamedLambdaVariable("none", l.dataType, l.nullable, exprId = ExprId(newExprId), null) } - val canonicalizedChildren = cleaned.children.map(_.preCanonicalized) + val canonicalizedChildren = cleaned.children.map(_.canonicalized) withNewChildren(canonicalizedChildren) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala index 145f371301f37..1efda20efcac8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala @@ -296,7 +296,7 @@ case class AttributeReference( h } - override lazy val preCanonicalized: Expression = { + override lazy val canonicalized: Expression = { AttributeReference("none", dataType)(exprId) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 949ce97411652..0584a13e61e51 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -328,8 +328,8 @@ case class Not(child: Expression) final override val nodePatterns: Seq[TreePattern] = Seq(NOT) - override lazy val preCanonicalized: Expression = { - withNewChildren(Seq(child.preCanonicalized)) match { + override lazy val canonicalized: Expression = { + withNewChildren(Seq(child.canonicalized)) match { case Not(GreaterThan(l, r)) => LessThanOrEqual(l, r) case Not(LessThan(l, r)) => GreaterThanOrEqual(l, r) case Not(GreaterThanOrEqual(l, r)) => LessThan(l, r) @@ -466,8 +466,8 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate { final override val nodePatterns: Seq[TreePattern] = Seq(IN) - override lazy val preCanonicalized: Expression = { - val basic = withNewChildren(children.map(_.preCanonicalized)).asInstanceOf[In] + override lazy val canonicalized: Expression = { + val basic = withNewChildren(children.map(_.canonicalized)).asInstanceOf[In] if (list.size > 1) { basic.copy(list = basic.list.sortBy(_.hashCode())) } else { @@ -736,7 +736,8 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with """, since = "1.0.0", group = "predicate_funcs") -case class And(left: Expression, right: Expression) extends BinaryOperator with Predicate { +case class And(left: Expression, right: Expression) extends BinaryOperator with Predicate + with CommutativeExpression { override def inputType: AbstractDataType = BooleanType @@ -807,6 +808,10 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with override protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression): And = copy(left = newLeft, right = newRight) + + override lazy val canonicalized: Expression = { + orderCommutative({ case And(l, r) => Seq(l, r) }).reduce(And) + } } @ExpressionDescription( @@ -824,7 +829,8 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with """, since = "1.0.0", group = "predicate_funcs") -case class Or(left: Expression, right: Expression) extends BinaryOperator with Predicate { +case class Or(left: Expression, right: Expression) extends BinaryOperator with Predicate + with CommutativeExpression { override def inputType: AbstractDataType = BooleanType @@ -896,6 +902,10 @@ case class Or(left: Expression, right: Expression) extends BinaryOperator with P override protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression): Or = copy(left = newLeft, right = newRight) + + override lazy val canonicalized: Expression = { + orderCommutative({ case Or(l, r) => Seq(l, r) }).reduce(Or) + } } @@ -907,8 +917,8 @@ abstract class BinaryComparison extends BinaryOperator with Predicate { final override val nodePatterns: Seq[TreePattern] = Seq(BINARY_COMPARISON) - override lazy val preCanonicalized: Expression = { - withNewChildren(children.map(_.preCanonicalized)) match { + override lazy val canonicalized: Expression = { + withNewChildren(children.map(_.canonicalized)) match { case EqualTo(l, r) if l.hashCode() > r.hashCode() => EqualTo(r, l) case EqualNullSafe(l, r) if l.hashCode() > r.hashCode() => EqualNullSafe(r, l) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala index 71b36fa8ef9ba..0e091cdbe0239 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala @@ -263,12 +263,12 @@ case class ScalarSubquery( override def nullable: Boolean = true override def withNewPlan(plan: LogicalPlan): ScalarSubquery = copy(plan = plan) override def toString: String = s"scalar-subquery#${exprId.id} $conditionString" - override lazy val preCanonicalized: Expression = { + override lazy val canonicalized: Expression = { ScalarSubquery( plan.canonicalized, - outerAttrs.map(_.preCanonicalized), + outerAttrs.map(_.canonicalized), ExprId(0), - joinCond.map(_.preCanonicalized)) + joinCond.map(_.canonicalized)) } override protected def withNewChildrenInternal( @@ -305,12 +305,12 @@ case class LateralSubquery( override def nullable: Boolean = true override def withNewPlan(plan: LogicalPlan): LateralSubquery = copy(plan = plan) override def toString: String = s"lateral-subquery#${exprId.id} $conditionString" - override lazy val preCanonicalized: Expression = { + override lazy val canonicalized: Expression = { LateralSubquery( plan.canonicalized, - outerAttrs.map(_.preCanonicalized), + outerAttrs.map(_.canonicalized), ExprId(0), - joinCond.map(_.preCanonicalized)) + joinCond.map(_.canonicalized)) } override protected def withNewChildrenInternal( @@ -350,13 +350,13 @@ case class ListQuery( override def nullable: Boolean = false override def withNewPlan(plan: LogicalPlan): ListQuery = copy(plan = plan) override def toString: String = s"list#${exprId.id} $conditionString" - override lazy val preCanonicalized: Expression = { + override lazy val canonicalized: Expression = { ListQuery( plan.canonicalized, - outerAttrs.map(_.preCanonicalized), + outerAttrs.map(_.canonicalized), ExprId(0), - childOutputs.map(_.preCanonicalized.asInstanceOf[Attribute]), - joinCond.map(_.preCanonicalized)) + childOutputs.map(_.canonicalized.asInstanceOf[Attribute]), + joinCond.map(_.canonicalized)) } override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): ListQuery = @@ -402,12 +402,12 @@ case class Exists( override def nullable: Boolean = false override def withNewPlan(plan: LogicalPlan): Exists = copy(plan = plan) override def toString: String = s"exists#${exprId.id} $conditionString" - override lazy val preCanonicalized: Expression = { + override lazy val canonicalized: Expression = { Exists( plan.canonicalized, - outerAttrs.map(_.preCanonicalized), + outerAttrs.map(_.canonicalized), ExprId(0), - joinCond.map(_.preCanonicalized)) + joinCond.map(_.canonicalized)) } override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Exists = diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala index 83307c9022dd2..90c0424f1d86b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala @@ -190,4 +190,11 @@ class CanonicalizeSuite extends SparkFunSuite { // canonicalization should not converted resolved cast to unresolved assert(cast.canonicalized.resolved) } + + test("SPARK-40362: Commutative operator under BinaryComparison") { + Seq(EqualTo, EqualNullSafe, GreaterThan, LessThan, GreaterThanOrEqual, LessThanOrEqual) + .foreach { bc => + assert(bc(Add($"a", $"b"), Literal(10)).semanticEquals(bc(Add($"b", $"a"), Literal(10)))) + } + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala index 209b0f79243e5..aef9a4dc7acee 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala @@ -68,7 +68,7 @@ case class ScalarSubquery( override def toString: String = plan.simpleString(SQLConf.get.maxToStringFields) override def withNewPlan(query: BaseSubqueryExec): ScalarSubquery = copy(plan = query) - override lazy val preCanonicalized: Expression = { + override lazy val canonicalized: Expression = { ScalarSubquery(plan.canonicalized.asInstanceOf[BaseSubqueryExec], ExprId(0)) } @@ -158,9 +158,9 @@ case class InSubqueryExec( inSet.doGenCode(ctx, ev) } - override lazy val preCanonicalized: InSubqueryExec = { + override lazy val canonicalized: InSubqueryExec = { copy( - child = child.preCanonicalized, + child = child.canonicalized, plan = plan.canonicalized.asInstanceOf[BaseSubqueryExec], exprId = ExprId(0), resultBroadcast = null, From d70f15621bf7ce4317dd74d84690efe02e94ae4e Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 13 Sep 2022 12:52:26 -0700 Subject: [PATCH 479/535] [SPARK-40417][K8S][DOCS] Use YuniKorn v1.1+ ### What changes were proposed in this pull request? This PR aims to update K8s document to declare the support of YuniKorn v1.1.+ ### Why are the changes needed? YuniKorn 1.1.0 has 87 JIRAs and is the first version to support multi-arch officially. - https://yunikorn.apache.org/release-announce/1.1.0 ``` $ docker inspect apache/yunikorn:scheduler-1.0.0 | grep Architecture "Architecture": "amd64", $ docker inspect apache/yunikorn:scheduler-1.1.0 | grep Architecture "Architecture": "arm64", ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually tested with Apache YuniKorn v1.1.0+. ``` $ build/sbt -Psparkr -Pkubernetes -Pkubernetes-integration-tests \ -Dspark.kubernetes.test.deployMode=docker-desktop "kubernetes-integration-tests/test" \ -Dtest.exclude.tags=minikube,local,decom \ -Dtest.default.exclude.tags= ... [info] KubernetesSuite: [info] - Run SparkPi with no resources (11 seconds, 238 milliseconds) [info] - Run SparkPi with no resources & statefulset allocation (11 seconds, 58 milliseconds) [info] - Run SparkPi with a very long application name. (9 seconds, 948 milliseconds) [info] - Use SparkLauncher.NO_RESOURCE (9 seconds, 884 milliseconds) [info] - Run SparkPi with a master URL without a scheme. (9 seconds, 834 milliseconds) [info] - Run SparkPi with an argument. (9 seconds, 870 milliseconds) [info] - Run SparkPi with custom labels, annotations, and environment variables. (9 seconds, 887 milliseconds) [info] - All pods have the same service account by default (9 seconds, 891 milliseconds) [info] - Run extraJVMOptions check on driver (5 seconds, 888 milliseconds) [info] - Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j2.properties (10 seconds, 261 milliseconds) [info] - Run SparkPi with env and mount secrets. (18 seconds, 702 milliseconds) [info] - Run PySpark on simple pi.py example (10 seconds, 944 milliseconds) [info] - Run PySpark to test a pyfiles example (13 seconds, 934 milliseconds) [info] - Run PySpark with memory customization (10 seconds, 853 milliseconds) [info] - Run in client mode. (11 seconds, 301 milliseconds) [info] - Start pod creation from template (9 seconds, 853 milliseconds) [info] - SPARK-38398: Schedule pod creation from template (9 seconds, 923 milliseconds) [info] - Run SparkR on simple dataframe.R example (13 seconds, 929 milliseconds) [info] YuniKornSuite: [info] - Run SparkPi with no resources (9 seconds, 769 milliseconds) [info] - Run SparkPi with no resources & statefulset allocation (9 seconds, 776 milliseconds) [info] - Run SparkPi with a very long application name. (9 seconds, 856 milliseconds) [info] - Use SparkLauncher.NO_RESOURCE (9 seconds, 803 milliseconds) [info] - Run SparkPi with a master URL without a scheme. (10 seconds, 783 milliseconds) [info] - Run SparkPi with an argument. (10 seconds, 771 milliseconds) [info] - Run SparkPi with custom labels, annotations, and environment variables. (9 seconds, 868 milliseconds) [info] - All pods have the same service account by default (10 seconds, 811 milliseconds) [info] - Run extraJVMOptions check on driver (6 seconds, 858 milliseconds) [info] - Verify logging configuration is picked from the provided SPARK_CONF_DIR/log4j2.properties (11 seconds, 171 milliseconds) [info] - Run SparkPi with env and mount secrets. (18 seconds, 221 milliseconds) [info] - Run PySpark on simple pi.py example (11 seconds, 970 milliseconds) [info] - Run PySpark to test a pyfiles example (13 seconds, 990 milliseconds) [info] - Run PySpark with memory customization (11 seconds, 992 milliseconds) [info] - Run in client mode. (11 seconds, 294 milliseconds) [info] - Start pod creation from template (11 seconds, 10 milliseconds) [info] - SPARK-38398: Schedule pod creation from template (9 seconds, 956 milliseconds) [info] - Run SparkR on simple dataframe.R example (12 seconds, 992 milliseconds) [info] Run completed in 10 minutes, 15 seconds. [info] Total number of tests run: 36 [info] Suites: completed 2, aborted 0 [info] Tests: succeeded 36, failed 0, canceled 0, ignored 0, pending 0 [info] All tests passed. [success] Total time: 751 s (12:31), completed Sep 13, 2022, 11:47:24 AM ``` Closes #37872 from dongjoon-hyun/SPARK-40417. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit a934cabd24afa5c8f6e8e1d2341829166129a5c8) Signed-off-by: Dongjoon Hyun --- docs/running-on-kubernetes.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index abc362c94c5db..b57175f358c32 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -1823,10 +1823,10 @@ Install Apache YuniKorn: helm repo add yunikorn https://apache.github.io/yunikorn-release helm repo update kubectl create namespace yunikorn -helm install yunikorn yunikorn/yunikorn --namespace yunikorn --version 1.0.0 +helm install yunikorn yunikorn/yunikorn --namespace yunikorn --version 1.1.0 ``` -The above steps will install YuniKorn v1.0.0 on an existing Kubernetes cluster. +The above steps will install YuniKorn v1.1.0 on an existing Kubernetes cluster. ##### Get started From ea1a426a889626f1ee1933e3befaa975a2f0a072 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 14 Sep 2022 00:20:48 +0000 Subject: [PATCH 480/535] Preparing Spark release v3.3.1-rc1 --- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 2 +- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 38 insertions(+), 38 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index d12f2ad73fabd..32126a5e13820 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 842d63f5d3811..21bf56094503b 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index f7d187bf9527d..43740354d84d1 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 53f38df885102..46c875dcb0a06 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 845f6659407bd..d6d28fe4ec687 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 8e1590891933b..a37bc21ca6e54 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1987c13328559..817a30e5deea0 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index c7e7be1e3bbf1..99b641a3658c0 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index ac644130a61e2..0711ecc3e0744 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 9a6fe2d313fde..15eea016135a4 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,7 +19,7 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.3.1-SNAPSHOT +SPARK_VERSION: 3.3.1 SPARK_VERSION_SHORT: 3.3.1 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.15" diff --git a/examples/pom.xml b/examples/pom.xml index e97f3b40cb2bd..18b30b092b273 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index 578854e3eaa9a..e932501b8b834 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 95e1ce74ca172..72940cb743386 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 0b803c5d3864a..f079671b8998a 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 95726829bcbbd..1b79350397482 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 25e7e25ae25b6..83097460edc9d 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 3ba16b7b838a2..91e111ee38d10 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 6cee275e6adc7..e622369eb7250 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index ad23da0d7f249..a208e03e8bbf3 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 6de1f9eee532c..e464dfacc4c7e 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 95c8c312eb0e2..ed0c627abb943 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 4d87bd2730e3b..606b6cb8c5cd7 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index 889f0b5a92e08..cb5c693068114 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 9b7b0370d3b4d..3fc9ece3d0e05 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 04a68a47a4f45..d4d0fc3b6f9e6 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/pom.xml b/pom.xml index d594fcca8c077..99d4e265332f9 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 pom Spark Project Parent POM https://spark.apache.org/ diff --git a/python/pyspark/version.py b/python/pyspark/version.py index 83c6d7125c367..49fe5caabc028 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__: str = "3.3.1.dev0" +__version__: str = "3.3.1" diff --git a/repl/pom.xml b/repl/pom.xml index 2e5f8bf5395a3..d5abd10e610c7 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index d1d6a449bd5dc..253a5aeffb521 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 516c92b1df6b0..13b0046c47b4d 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index f4ac384409174..1c91ae916bc4b 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 61d5adec0e7cc..eeb0ae37fa109 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 970d42ba4590e..5c6188add47cf 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 0cfb5f616cd24..c6754cf57f9dd 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 7024e0dcfab75..15ecd5597fcab 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index cc8d8796da601..944fd8f58dbed 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index c4977726a3cac..91ab784016069 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 9bbcb7f322798..0ea392b136b98 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.1 ../pom.xml From e7c9d1a8c8e3604347aa969b66b52fac6f58be97 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Wed, 14 Sep 2022 00:21:03 +0000 Subject: [PATCH 481/535] Preparing development version 3.3.2-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 39 files changed, 41 insertions(+), 41 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 0e449e841cf6d..c1e490df26f4a 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.1 +Version: 3.3.2 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 32126a5e13820..eff5e3419be64 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 21bf56094503b..8834464f7f6ac 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 43740354d84d1..bfadba306c5ec 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 46c875dcb0a06..287355ac07d96 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index d6d28fe4ec687..14d41802a8b74 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index a37bc21ca6e54..f6f26a262fd25 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 817a30e5deea0..b3b7da8919fc5 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 99b641a3658c0..9c13be8a1f017 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 0711ecc3e0744..d5267f3b32d27 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 15eea016135a4..667b574c867bc 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.3.1 -SPARK_VERSION_SHORT: 3.3.1 +SPARK_VERSION: 3.3.2-SNAPSHOT +SPARK_VERSION_SHORT: 3.3.2 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.15" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.3.1"] + 'facetFilters': ["version:3.3.2"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index 18b30b092b273..f3934614cb810 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index e932501b8b834..fbca1101eae44 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 72940cb743386..537e4c97b1f9d 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index f079671b8998a..1ce0b53014aa7 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 1b79350397482..e851b0a8b2c79 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 83097460edc9d..695154d8ceb3f 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 91e111ee38d10..2fcd0e4c2b75d 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index e622369eb7250..af53c827711c4 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index a208e03e8bbf3..2f8755241b3c2 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index e464dfacc4c7e..c3a1b68c82657 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index ed0c627abb943..9c0f78231df9d 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 606b6cb8c5cd7..3668043c4e316 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index cb5c693068114..a97e35dae4ce1 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 3fc9ece3d0e05..092c1c7d83da6 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index d4d0fc3b6f9e6..24370ce56e883 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 99d4e265332f9..045d299277769 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT pom Spark Project Parent POM https://spark.apache.org/ diff --git a/python/pyspark/version.py b/python/pyspark/version.py index 49fe5caabc028..3e5963da87f31 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__: str = "3.3.1" +__version__: str = "3.3.2.dev0" diff --git a/repl/pom.xml b/repl/pom.xml index d5abd10e610c7..68148f637ac0a 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 253a5aeffb521..be3c81fbf949f 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 13b0046c47b4d..fa9fc6473d330 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 1c91ae916bc4b..9354ffda8e46c 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index eeb0ae37fa109..a5c123e47ac0b 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 5c6188add47cf..e1aaf4afa59c0 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index c6754cf57f9dd..9ce6e61a7abc6 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 15ecd5597fcab..6653a4f61dbfe 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 944fd8f58dbed..79219abecf6a8 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 91ab784016069..13eb55d55ebf5 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 0ea392b136b98..1195252ba95c3 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml From 82351bee9bba75b631dc3a93aa5bc4cd9f46724c Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Wed, 14 Sep 2022 08:40:08 +0800 Subject: [PATCH 482/535] [SPARK-39915][SQL] Ensure the output partitioning is user-specified in AQE ### What changes were proposed in this pull request? - Support get user-specified root repartition through `DeserializeToObjectExec` - Skip optimize empty for the root repartition which is user-specified - Add a new rule `AdjustShuffleExchangePosition` to adjust the shuffle we add back, so that we can restore shuffle safely. ### Why are the changes needed? AQE can not completely respect the user-specified repartition. The main reasons are: 1. the AQE optimzier will convert empty to local relation which does not reserve the partitioning info 2. the machine of AQE `requiredDistribution` only restore the repartition which does not support through `DeserializeToObjectExec` After the fix: The partition number of `spark.range(0).repartition(5).rdd.getNumPartitions` should be 5. ### Does this PR introduce _any_ user-facing change? yes, ensure the user-specified distribution. ### How was this patch tested? add tests Closes #37612 from ulysses-you/output-partition. Lead-authored-by: ulysses-you Co-authored-by: Wenchen Fan Signed-off-by: Wenchen Fan (cherry picked from commit 801ca252f43b20cdd629c01d734ca9049e6eccf4) Signed-off-by: Wenchen Fan --- .../optimizer/PropagateEmptyRelation.scala | 23 ++++++---- .../adaptive/AQEPropagateEmptyRelation.scala | 15 ++++++- .../sql/execution/adaptive/AQEUtils.scala | 3 +- .../adaptive/AdaptiveSparkPlanExec.scala | 1 + .../AdjustShuffleExchangePosition.scala | 43 +++++++++++++++++++ .../adaptive/LogicalQueryStage.scala | 14 ++++-- .../org/apache/spark/sql/DataFrameSuite.scala | 2 +- .../adaptive/AdaptiveQueryExecSuite.scala | 43 +++++++++++++++++++ 8 files changed, 130 insertions(+), 14 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdjustShuffleExchangePosition.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala index f8e2096e44326..f3606566cb105 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala @@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.catalyst.trees.TreeNodeTag -import org.apache.spark.sql.catalyst.trees.TreePattern.{LOCAL_RELATION, TRUE_OR_FALSE_LITERAL} +import org.apache.spark.sql.catalyst.trees.TreePattern.{LOCAL_RELATION, REPARTITION_OPERATION, TRUE_OR_FALSE_LITERAL} /** * The base class of two rules in the normal and AQE Optimizer. It simplifies query plans with @@ -183,13 +183,20 @@ abstract class PropagateEmptyRelationBase extends Rule[LogicalPlan] with CastSup * Add a [[ROOT_REPARTITION]] tag for the root user-specified repartition so this rule can * skip optimize it. */ - private def addTagForRootRepartition(plan: LogicalPlan): LogicalPlan = plan match { - case p: Project => p.mapChildren(addTagForRootRepartition) - case f: Filter => f.mapChildren(addTagForRootRepartition) - case r if userSpecifiedRepartition(r) => - r.setTagValue(ROOT_REPARTITION, ()) - r - case _ => plan + private def addTagForRootRepartition(plan: LogicalPlan): LogicalPlan = { + if (!plan.containsPattern(REPARTITION_OPERATION)) { + return plan + } + + plan match { + case p: Project => p.mapChildren(addTagForRootRepartition) + case f: Filter => f.mapChildren(addTagForRootRepartition) + case d: DeserializeToObject => d.mapChildren(addTagForRootRepartition) + case r if userSpecifiedRepartition(r) => + r.setTagValue(ROOT_REPARTITION, ()) + r + case _ => plan + } } override def apply(plan: LogicalPlan): LogicalPlan = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEPropagateEmptyRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEPropagateEmptyRelation.scala index 132c919c29112..7951a6f36b9bd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEPropagateEmptyRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEPropagateEmptyRelation.scala @@ -22,6 +22,7 @@ import org.apache.spark.sql.catalyst.planning.ExtractSingleColumnNullAwareAntiJo import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.trees.TreePattern.{LOCAL_RELATION, LOGICAL_QUERY_STAGE, TRUE_OR_FALSE_LITERAL} import org.apache.spark.sql.execution.aggregate.BaseAggregateExec +import org.apache.spark.sql.execution.exchange.{REPARTITION_BY_COL, REPARTITION_BY_NUM, ShuffleExchangeLike} import org.apache.spark.sql.execution.joins.HashedRelationWithAllNullKeys /** @@ -33,11 +34,16 @@ import org.apache.spark.sql.execution.joins.HashedRelationWithAllNullKeys */ object AQEPropagateEmptyRelation extends PropagateEmptyRelationBase { override protected def isEmpty(plan: LogicalPlan): Boolean = - super.isEmpty(plan) || getEstimatedRowCount(plan).contains(0) + super.isEmpty(plan) || (!isRootRepartition(plan) && getEstimatedRowCount(plan).contains(0)) override protected def nonEmpty(plan: LogicalPlan): Boolean = super.nonEmpty(plan) || getEstimatedRowCount(plan).exists(_ > 0) + private def isRootRepartition(plan: LogicalPlan): Boolean = plan match { + case l: LogicalQueryStage if l.getTagValue(ROOT_REPARTITION).isDefined => true + case _ => false + } + // The returned value follows: // - 0 means the plan must produce 0 row // - positive value means an estimated row count which can be over-estimated @@ -69,6 +75,13 @@ object AQEPropagateEmptyRelation extends PropagateEmptyRelationBase { empty(j) } + override protected def userSpecifiedRepartition(p: LogicalPlan): Boolean = p match { + case LogicalQueryStage(_, ShuffleQueryStageExec(_, shuffle: ShuffleExchangeLike, _)) + if shuffle.shuffleOrigin == REPARTITION_BY_COL || + shuffle.shuffleOrigin == REPARTITION_BY_NUM => true + case _ => false + } + override protected def applyInternal(p: LogicalPlan): LogicalPlan = p.transformUpWithPruning( // LOCAL_RELATION and TRUE_OR_FALSE_LITERAL pattern are matched at // `PropagateEmptyRelationBase.commonApplyFunc` diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEUtils.scala index 51833012a128e..1a0836ed752bf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEUtils.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.execution.adaptive import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Distribution, HashPartitioning, UnspecifiedDistribution} -import org.apache.spark.sql.execution.{CollectMetricsExec, FilterExec, ProjectExec, SortExec, SparkPlan} +import org.apache.spark.sql.execution.{CollectMetricsExec, DeserializeToObjectExec, FilterExec, ProjectExec, SortExec, SparkPlan} import org.apache.spark.sql.execution.exchange.{REPARTITION_BY_COL, REPARTITION_BY_NUM, ShuffleExchangeExec} object AQEUtils { @@ -41,6 +41,7 @@ object AQEUtils { case f: FilterExec => getRequiredDistribution(f.child) case s: SortExec if !s.global => getRequiredDistribution(s.child) case c: CollectMetricsExec => getRequiredDistribution(c.child) + case d: DeserializeToObjectExec => getRequiredDistribution(d.child) case p: ProjectExec => getRequiredDistribution(p.child).flatMap { case h: ClusteredDistribution => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala index 6c9c0e1cda4e2..9b6c98fa0e561 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala @@ -116,6 +116,7 @@ case class AdaptiveSparkPlanExec( Seq( RemoveRedundantProjects, ensureRequirements, + AdjustShuffleExchangePosition, ValidateSparkPlan, ReplaceHashWithSortAgg, RemoveRedundantSorts, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdjustShuffleExchangePosition.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdjustShuffleExchangePosition.scala new file mode 100644 index 0000000000000..f211b6cc8a069 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdjustShuffleExchangePosition.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.adaptive + +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.{DeserializeToObjectExec, SparkPlan} +import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike + +/** + * This rule is used to adjust the shuffle exchange with special SparkPlan who + * does not allow a shuffle on top of it. + */ +object AdjustShuffleExchangePosition extends Rule[SparkPlan] { + private def shouldAdjust(plan: SparkPlan): Boolean = plan match { + // `DeserializeToObjectExec` is used by Spark internally e.g. `Dataset.rdd`. It produces + // safe rows and must be root node because SQL operators only accept unsafe rows as input. + // This conflicts with AQE framework since we may add shuffle back during re-optimize + // to preserve the user-specified repartition, so here we adjust the position with shuffle. + case _: DeserializeToObjectExec => true + case _ => false + } + + override def apply(plan: SparkPlan): SparkPlan = plan match { + case shuffle: ShuffleExchangeLike if shouldAdjust(shuffle.child) => + shuffle.child.withNewChildren(shuffle.withNewChildren(shuffle.child.children) :: Nil) + case _ => plan + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala index f8b786778a798..5e6f1b5a88408 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala @@ -18,8 +18,8 @@ package org.apache.spark.sql.execution.adaptive import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder} -import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Statistics} -import org.apache.spark.sql.catalyst.trees.TreePattern.{LOGICAL_QUERY_STAGE, TreePattern} +import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, RepartitionOperation, Statistics} +import org.apache.spark.sql.catalyst.trees.TreePattern.{LOGICAL_QUERY_STAGE, REPARTITION_OPERATION, TreePattern} import org.apache.spark.sql.execution.SparkPlan /** @@ -40,7 +40,15 @@ case class LogicalQueryStage( override def output: Seq[Attribute] = logicalPlan.output override val isStreaming: Boolean = logicalPlan.isStreaming override val outputOrdering: Seq[SortOrder] = physicalPlan.outputOrdering - override protected val nodePatterns: Seq[TreePattern] = Seq(LOGICAL_QUERY_STAGE) + override protected val nodePatterns: Seq[TreePattern] = { + // Repartition is a special node that it represents a shuffle exchange, + // then in AQE the repartition will be always wrapped into `LogicalQueryStage` + val repartitionPattern = logicalPlan match { + case _: RepartitionOperation => Some(REPARTITION_OPERATION) + case _ => None + } + Seq(LOGICAL_QUERY_STAGE) ++ repartitionPattern + } override def computeStats(): Statistics = { // TODO this is not accurate when there is other physical nodes above QueryStageExec. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index b05d320ca07f8..a696c3fd4995d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -482,7 +482,7 @@ class DataFrameSuite extends QueryTest testData.select("key").coalesce(1).select("key"), testData.select("key").collect().toSeq) - assert(spark.emptyDataFrame.coalesce(1).rdd.partitions.size === 0) + assert(spark.emptyDataFrame.coalesce(1).rdd.partitions.size === 1) } test("convert $\"attribute name\" into unresolved attribute") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index d5c933fbc8b56..55f092e2d601b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -2606,6 +2606,49 @@ class AdaptiveQueryExecSuite assert(findTopLevelBroadcastNestedLoopJoin(adaptivePlan).size == 1) } } + + test("SPARK-39915: Dataset.repartition(N) may not create N partitions") { + withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "6") { + // partitioning: HashPartitioning + // shuffleOrigin: REPARTITION_BY_NUM + assert(spark.range(0).repartition(5, $"id").rdd.getNumPartitions == 5) + // shuffleOrigin: REPARTITION_BY_COL + // The minimum partition number after AQE coalesce is 1 + assert(spark.range(0).repartition($"id").rdd.getNumPartitions == 1) + // through project + assert(spark.range(0).selectExpr("id % 3 as c1", "id % 7 as c2") + .repartition(5, $"c1").select($"c2").rdd.getNumPartitions == 5) + + // partitioning: RangePartitioning + // shuffleOrigin: REPARTITION_BY_NUM + // The minimum partition number of RangePartitioner is 1 + assert(spark.range(0).repartitionByRange(5, $"id").rdd.getNumPartitions == 1) + // shuffleOrigin: REPARTITION_BY_COL + assert(spark.range(0).repartitionByRange($"id").rdd.getNumPartitions == 1) + + // partitioning: RoundRobinPartitioning + // shuffleOrigin: REPARTITION_BY_NUM + assert(spark.range(0).repartition(5).rdd.getNumPartitions == 5) + // shuffleOrigin: REBALANCE_PARTITIONS_BY_NONE + assert(spark.range(0).repartition().rdd.getNumPartitions == 0) + // through project + assert(spark.range(0).selectExpr("id % 3 as c1", "id % 7 as c2") + .repartition(5).select($"c2").rdd.getNumPartitions == 5) + + // partitioning: SinglePartition + assert(spark.range(0).repartition(1).rdd.getNumPartitions == 1) + } + } + + test("SPARK-39915: Ensure the output partitioning is user-specified") { + withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "3", + SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + val df1 = spark.range(1).selectExpr("id as c1") + val df2 = spark.range(1).selectExpr("id as c2") + val df = df1.join(df2, col("c1") === col("c2")).repartition(3, col("c1")) + assert(df.rdd.getNumPartitions == 3) + } + } } /** From ec40006aa3bda9f6fd03bb9c0bda561c139ed5ce Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 14 Sep 2022 09:28:04 -0700 Subject: [PATCH 483/535] [SPARK-40423][K8S][TESTS] Add explicit YuniKorn queue submission test coverage ### What changes were proposed in this pull request? This PR aims to add explicit Yunikorn queue submission test coverage instead of implicit assignment by admission controller. ### Why are the changes needed? - To provide a proper test coverage. - To prevent the side effect of YuniKorn admission controller which overrides all Spark's scheduler settings by default (if we do not edit the rule explicitly). This breaks Apache Spark's default scheduler K8s IT test coverage. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually run the CI and check the YuniKorn queue UI. ``` $ build/sbt -Psparkr -Pkubernetes -Pkubernetes-integration-tests -Dspark.kubernetes.test.deployMode=docker-desktop "kubernetes-integration-tests/test" -Dtest.exclude.tags=minikube,local,decom -Dtest.default.exclude.tags= ``` Screen Shot 2022-09-14 at 2 07 38 AM Closes #37877 from dongjoon-hyun/SPARK-40423. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 12e48527846d993a78b159fbba3e900a4feb7b55) Signed-off-by: Dongjoon Hyun --- docs/running-on-kubernetes.md | 5 +++-- .../spark/deploy/k8s/integrationtest/YuniKornSuite.scala | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index b57175f358c32..f7f7ec539b85e 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -1822,8 +1822,7 @@ Install Apache YuniKorn: ```bash helm repo add yunikorn https://apache.github.io/yunikorn-release helm repo update -kubectl create namespace yunikorn -helm install yunikorn yunikorn/yunikorn --namespace yunikorn --version 1.1.0 +helm install yunikorn yunikorn/yunikorn --namespace yunikorn --version 1.1.0 --create-namespace --set embedAdmissionController=false ``` The above steps will install YuniKorn v1.1.0 on an existing Kubernetes cluster. @@ -1834,6 +1833,8 @@ Submit Spark jobs with the following extra options: ```bash --conf spark.kubernetes.scheduler.name=yunikorn +--conf spark.kubernetes.driver.label.queue=root.default +--conf spark.kubernetes.executor.label.queue=root.default --conf spark.kubernetes.driver.annotation.yunikorn.apache.org/app-id={{APP_ID}} --conf spark.kubernetes.executor.annotation.yunikorn.apache.org/app-id={{APP_ID}} ``` diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala index 5a3c063efa14b..0dfb88b259e21 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala @@ -21,8 +21,11 @@ class YuniKornSuite extends KubernetesSuite { override protected def setUpTest(): Unit = { super.setUpTest() + val namespace = sparkAppConf.get("spark.kubernetes.namespace") sparkAppConf .set("spark.kubernetes.scheduler.name", "yunikorn") + .set("spark.kubernetes.driver.label.queue", "root." + namespace) + .set("spark.kubernetes.executor.label.queue", "root." + namespace) .set("spark.kubernetes.driver.annotation.yunikorn.apache.org/app-id", "{{APP_ID}}") .set("spark.kubernetes.executor.annotation.yunikorn.apache.org/app-id", "{{APP_ID}}") } From d8e157d0347f51c54e334fabe76072fc95332671 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Thu, 15 Sep 2022 09:28:06 +0900 Subject: [PATCH 484/535] [SPARK-38017][FOLLOWUP][3.3] Hide TimestampNTZ in the doc ### What changes were proposed in this pull request? This PR removes `TimestampNTZ` from the doc about `TimeWindow` and `SessionWIndow`. ### Why are the changes needed? As we discussed, it's better to hide `TimestampNTZ` from the doc. https://github.com/apache/spark/pull/35313#issuecomment-1185192162 ### Does this PR introduce _any_ user-facing change? The document will be changed, but there is no compatibility problem. ### How was this patch tested? Built the doc with `SKIP_RDOC=1 SKIP_SQLDOC=1 bundle exec jekyll build` at `doc` directory. Then, confirmed the generated HTML. Closes #37882 from sarutak/fix-window-doc-3.3. Authored-by: Kousuke Saruta Signed-off-by: Hyukjin Kwon --- python/pyspark/sql/functions.py | 4 ++-- .../main/scala/org/apache/spark/sql/functions.scala | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index ed3b0789b4731..c8d7f9cdcb5e6 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2557,7 +2557,7 @@ def window( ---------- timeColumn : :class:`~pyspark.sql.Column` The column or the expression to use as the timestamp for windowing by time. - The time column must be of TimestampType or TimestampNTZType. + The time column must be of TimestampType. windowDuration : str A string specifying the width of the window, e.g. `10 minutes`, `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for @@ -2632,7 +2632,7 @@ def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str]) ---------- timeColumn : :class:`~pyspark.sql.Column` or str The column name or column to use as the timestamp for windowing by time. - The time column must be of TimestampType or TimestampNTZType. + The time column must be of TimestampType. gapDuration : :class:`~pyspark.sql.Column` or str A Python string literal or column specifying the timeout of the session. It could be static value, e.g. `10 minutes`, `1 second`, or an expression/UDF that specifies gap diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index f6c3bc7e3cece..6dbbca6733804 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -3645,7 +3645,7 @@ object functions { * processing time. * * @param timeColumn The column or the expression to use as the timestamp for windowing by time. - * The time column must be of TimestampType or TimestampNTZType. + * The time column must be of TimestampType. * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`, * `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for * valid duration identifiers. Note that the duration is a fixed length of @@ -3701,7 +3701,7 @@ object functions { * processing time. * * @param timeColumn The column or the expression to use as the timestamp for windowing by time. - * The time column must be of TimestampType or TimestampNTZType. + * The time column must be of TimestampType. * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`, * `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for * valid duration identifiers. Note that the duration is a fixed length of @@ -3746,7 +3746,7 @@ object functions { * processing time. * * @param timeColumn The column or the expression to use as the timestamp for windowing by time. - * The time column must be of TimestampType or TimestampNTZType. + * The time column must be of TimestampType. * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`, * `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for * valid duration identifiers. @@ -3774,7 +3774,7 @@ object functions { * processing time. * * @param timeColumn The column or the expression to use as the timestamp for windowing by time. - * The time column must be of TimestampType or TimestampNTZType. + * The time column must be of TimestampType. * @param gapDuration A string specifying the timeout of the session, e.g. `10 minutes`, * `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for * valid duration identifiers. @@ -3811,7 +3811,7 @@ object functions { * processing time. * * @param timeColumn The column or the expression to use as the timestamp for windowing by time. - * The time column must be of TimestampType or TimestampNTZType. + * The time column must be of TimestampType. * @param gapDuration A column specifying the timeout of the session. It could be static value, * e.g. `10 minutes`, `1 second`, or an expression/UDF that specifies gap * duration dynamically based on the input row. From d7483b51f562d361be3dc995dfefcc4ac8d2e45f Mon Sep 17 00:00:00 2001 From: huaxingao Date: Thu, 15 Sep 2022 11:28:20 -0700 Subject: [PATCH 485/535] [SPARK-40429][SQL][3.3] Only set KeyGroupedPartitioning when the referenced column is in the output ### What changes were proposed in this pull request? back porting [PR](https://github.com/apache/spark/pull/37886) to 3.3. Only set `KeyGroupedPartitioning` when the referenced column is in the output ### Why are the changes needed? bug fixing ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? New test Closes #37901 from huaxingao/3.3. Authored-by: huaxingao Signed-off-by: Dongjoon Hyun --- .../datasources/v2/V2ScanPartitioning.scala | 14 ++++++++++++-- .../sql/connector/MetadataColumnSuite.scala | 16 ++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioning.scala index 9a5a7e6aab63a..64e80081018a7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioning.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioning.scala @@ -39,8 +39,18 @@ object V2ScanPartitioning extends Rule[LogicalPlan] with SQLConfHelper { } val catalystPartitioning = scan.outputPartitioning() match { - case kgp: KeyGroupedPartitioning => sequenceToOption(kgp.keys().map( - V2ExpressionUtils.toCatalystOpt(_, relation, funCatalogOpt))) + case kgp: KeyGroupedPartitioning => + val partitioning = sequenceToOption(kgp.keys().map( + V2ExpressionUtils.toCatalystOpt(_, relation, funCatalogOpt))) + if (partitioning.isEmpty) { + None + } else { + if (partitioning.get.forall(p => p.references.subsetOf(d.outputSet))) { + partitioning + } else { + None + } + } case _: UnknownPartitioning => None case p => throw new IllegalArgumentException("Unsupported data source V2 partitioning " + "type: " + p.getClass.getSimpleName) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala index 95b9c4f72356a..7f0e74f6bc7ef 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala @@ -216,4 +216,20 @@ class MetadataColumnSuite extends DatasourceV2SQLBase { .withColumn("right_all", struct($"right.*")) checkAnswer(dfQuery, Row(1, "a", "b", Row(1, "a"), Row(1, "b"))) } + + test("SPARK-40429: Only set KeyGroupedPartitioning when the referenced column is in the output") { + withTable(tbl) { + sql(s"CREATE TABLE $tbl (id bigint, data string) PARTITIONED BY (id)") + sql(s"INSERT INTO $tbl VALUES (1, 'a'), (2, 'b'), (3, 'c')") + checkAnswer( + spark.table(tbl).select("index", "_partition"), + Seq(Row(0, "3"), Row(0, "2"), Row(0, "1")) + ) + + checkAnswer( + spark.table(tbl).select("id", "index", "_partition"), + Seq(Row(3, 0, "3"), Row(2, 0, "2"), Row(1, 0, "1")) + ) + } + } } From c0acd3f7398c695b9889378b26d4a775b00df452 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 15 Sep 2022 18:03:54 -0700 Subject: [PATCH 486/535] [SPARK-40459][K8S] `recoverDiskStore` should not stop by existing recomputed files ### What changes were proposed in this pull request? This PR aims to ignore `FileExistsException` during `recoverDiskStore` processing. ### Why are the changes needed? Although `recoverDiskStore` is already wrapped by `tryLogNonFatalError`, a single file recovery exception should not block the whole `recoverDiskStore` . https://github.com/apache/spark/blob/5938e84e72b81663ccacf0b36c2f8271455de292/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/shuffle/KubernetesLocalDiskShuffleExecutorComponents.scala#L45-L47 ``` org.apache.commons.io.FileExistsException: ... at org.apache.commons.io.FileUtils.requireAbsent(FileUtils.java:2587) at org.apache.commons.io.FileUtils.moveFile(FileUtils.java:2305) at org.apache.commons.io.FileUtils.moveFile(FileUtils.java:2283) at org.apache.spark.storage.DiskStore.moveFileToBlock(DiskStore.scala:150) at org.apache.spark.storage.BlockManager$TempFileBasedBlockStoreUpdater.saveToDiskStore(BlockManager.scala:487) at org.apache.spark.storage.BlockManager$BlockStoreUpdater.$anonfun$save$1(BlockManager.scala:407) at org.apache.spark.storage.BlockManager.org$apache$spark$storage$BlockManager$$doPut(BlockManager.scala:1445) at org.apache.spark.storage.BlockManager$BlockStoreUpdater.save(BlockManager.scala:380) at org.apache.spark.storage.BlockManager$TempFileBasedBlockStoreUpdater.save(BlockManager.scala:490) at org.apache.spark.shuffle.KubernetesLocalDiskShuffleExecutorComponents$.$anonfun$recoverDiskStore$14(KubernetesLocalDiskShuffleExecutorComponents.scala:95) at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36) at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33) at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198) at org.apache.spark.shuffle.KubernetesLocalDiskShuffleExecutorComponents$.recoverDiskStore(KubernetesLocalDiskShuffleExecutorComponents.scala:91) ``` ### Does this PR introduce _any_ user-facing change? No, this will improve the recover rate. ### How was this patch tested? Pass the CIs. Closes #37903 from dongjoon-hyun/SPARK-40459. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit f24bb430122eaa311070cfdefbc82d34b0341701) Signed-off-by: Dongjoon Hyun --- .../KubernetesLocalDiskShuffleExecutorComponents.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/shuffle/KubernetesLocalDiskShuffleExecutorComponents.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/shuffle/KubernetesLocalDiskShuffleExecutorComponents.scala index 01aba6d061721..3d6379b871388 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/shuffle/KubernetesLocalDiskShuffleExecutorComponents.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/shuffle/KubernetesLocalDiskShuffleExecutorComponents.scala @@ -22,6 +22,8 @@ import java.util.Optional import scala.reflect.ClassTag +import org.apache.commons.io.FileExistsException + import org.apache.spark.{SparkConf, SparkEnv} import org.apache.spark.internal.Logging import org.apache.spark.shuffle.api.{ShuffleExecutorComponents, ShuffleMapOutputWriter, SingleSpillShuffleMapOutputWriter} @@ -95,6 +97,8 @@ object KubernetesLocalDiskShuffleExecutorComponents extends Logging { bm.TempFileBasedBlockStoreUpdater(id, level, classTag, f, decryptedSize).save() } catch { case _: UnrecognizedBlockId => + case _: FileExistsException => + // This may happen due to recompute, but we continue to recover next files } } } From 507708d9e5ff3527dc4f871be9697128921efda8 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Fri, 16 Sep 2022 10:08:59 +0900 Subject: [PATCH 487/535] [SPARK-40461][INFRA] Set upperbound for pyzmq 24.0.0 for Python linter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This PR sets the upperbound for `pyzmq` as `<24.0.0` in our CI Python linter job. The new release seems having a problem (https://github.com/zeromq/pyzmq/commit/2d3327d2e50c2510d45db2fc51488578a737b79b). ### Why are the changes needed? To fix the linter build failure. See https://github.com/apache/spark/actions/runs/3063515551/jobs/4947782771 ``` /tmp/timer_created_0ftep6.c: In function ‘main’: /tmp/timer_created_0ftep6.c:2:5: warning: implicit declaration of function ‘timer_create’ [-Wimplicit-function-declaration] 2 | timer_create(); | ^~~~~~~~~~~~ x86_64-linux-gnu-gcc -pthread tmp/timer_created_0ftep6.o -L/usr/lib/x86_64-linux-gnu -o a.out /usr/bin/ld: tmp/timer_created_0ftep6.o: in function `main': /tmp/timer_created_0ftep6.c:2: undefined reference to `timer_create' collect2: error: ld returned 1 exit status no timer_create, linking librt ************************************************ building 'zmq.libzmq' extension creating build/temp.linux-x86_64-cpython-39/buildutils creating build/temp.linux-x86_64-cpython-39/bundled creating build/temp.linux-x86_64-cpython-39/bundled/zeromq creating build/temp.linux-x86_64-cpython-39/bundled/zeromq/src x86_64-linux-gnu-g++ -pthread -std=c++11 -pthread -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O2 -Wall -g -fstack-protector-strong -Wformat -Werror=format-security -g -fwrapv -O2 -fPIC -DZMQ_HAVE_CURVE=1 -DZMQ_USE_TWEETNACL=1 -DZMQ_USE_EPOLL=1 -DZMQ_IOTHREADS_USE_EPOLL=1 -DZMQ_POLL_BASED_ON_POLL=1 -Ibundled/zeromq/include -Ibundled -I/usr/include/python3.9 -c buildutils/initlibzmq.cpp -o build/temp.linux-x86_64-cpython-39/buildutils/initlibzmq.o buildutils/initlibzmq.cpp:10:10: fatal error: Python.h: No such file or directory 10 | #include "Python.h" | ^~~~~~~~~~ compilation terminated. error: command '/usr/bin/x86_64-linux-gnu-g++' failed with exit code 1 [end of output] note: This error originates from a subprocess, and is likely not a problem with pip. ERROR: Failed building wheel for pyzmq ERROR: Could not build wheels for pyzmq, which is required to install pyproject.toml-based projects ``` ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? CI in this PRs should validate it. Closes #37904 from HyukjinKwon/fix-linter. Lead-authored-by: Hyukjin Kwon Co-authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 254bd80278843b3bc13584ca2f04391a770a78c7) Signed-off-by: Hyukjin Kwon --- .github/workflows/build_and_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 55c0b12d4bcbc..a392f940df99d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -527,7 +527,7 @@ jobs: # See also https://issues.apache.org/jira/browse/SPARK-35375. # Pin the MarkupSafe to 2.0.1 to resolve the CI error. # See also https://issues.apache.org/jira/browse/SPARK-38279. - python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme ipython nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' + python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme ipython nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0' python3.9 -m pip install ipython_genutils # See SPARK-38517 python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421 From b9a514ea0519e2da21efe2201c7f888be2640458 Mon Sep 17 00:00:00 2001 From: Ivan Sadikov Date: Fri, 16 Sep 2022 22:05:03 +0900 Subject: [PATCH 488/535] [SPARK-40470][SQL] Handle GetArrayStructFields and GetMapValue in "arrays_zip" function ### What changes were proposed in this pull request? This is a follow-up for https://github.com/apache/spark/pull/37833. The PR fixes column names in `arrays_zip` function for the cases when `GetArrayStructFields` and `GetMapValue` expressions are used (see unit tests for more details). Before the patch, the column names would be indexes or an AnalysisException would be thrown in the case of `GetArrayStructFields` example. ### Why are the changes needed? Fixes an inconsistency issue in Spark 3.2 and onwards where the fields would be labeled as indexes instead of column names. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? I added unit tests that reproduce the issue and confirmed that the patch fixes them. Closes #37911 from sadikovi/SPARK-40470. Authored-by: Ivan Sadikov Signed-off-by: Hyukjin Kwon (cherry picked from commit 9b0f979141ba2c4124d96bc5da69ea5cac51df0d) Signed-off-by: Hyukjin Kwon --- .../expressions/collectionOperations.scala | 4 +- .../spark/sql/DataFrameFunctionsSuite.scala | 45 +++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index 05a273763b9ff..c4bf65bb8abcb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -267,7 +267,9 @@ case class ArraysZip(children: Seq[Expression], names: Seq[Expression]) case (u: UnresolvedAttribute, _) => Literal(u.nameParts.last) case (e: NamedExpression, _) if e.resolved => Literal(e.name) case (e: NamedExpression, _) => NamePlaceholder - case (e: GetStructField, _) => Literal(e.extractFieldName) + case (g: GetStructField, _) => Literal(g.extractFieldName) + case (g: GetArrayStructFields, _) => Literal(g.field.name) + case (g: GetMapValue, _) => Literal(g.key) case (_, idx) => Literal(idx.toString) }) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index a9c1704581270..697cce9b50d65 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -740,6 +740,51 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { assert(fieldNames.toSeq === Seq("arr_1", "arr_2", "arr_3")) } + test("SPARK-40470: array_zip should return field names in GetArrayStructFields") { + val df = spark.read.json(Seq( + """ + { + "arr": [ + { + "obj": { + "nested": { + "field1": [1], + "field2": [2] + } + } + } + ] + } + """).toDS()) + + val res = df + .selectExpr("arrays_zip(arr.obj.nested.field1, arr.obj.nested.field2) as arr") + .select(col("arr.field1"), col("arr.field2")) + + val fieldNames = res.schema.fieldNames + assert(fieldNames.toSeq === Seq("field1", "field2")) + + checkAnswer(res, Row(Seq(Seq(1)), Seq(Seq(2))) :: Nil) + } + + test("SPARK-40470: arrays_zip should return field names in GetMapValue") { + val df = spark.sql(""" + select + map( + 'arr_1', array(1, 2), + 'arr_2', array(3, 4) + ) as map_obj + """) + + val res = df.selectExpr("arrays_zip(map_obj.arr_1, map_obj.arr_2) as arr") + + val fieldNames = res.schema.head.dataType.asInstanceOf[ArrayType] + .elementType.asInstanceOf[StructType].fieldNames + assert(fieldNames.toSeq === Seq("arr_1", "arr_2")) + + checkAnswer(res, Row(Seq(Row(1, 3), Row(2, 4)))) + } + def testSizeOfMap(sizeOfNull: Any): Unit = { val df = Seq( (Map[Int, Int](1 -> 1, 2 -> 2), "x"), From ba6d17288c3287e8dc1f7cb95db0233a45732dc0 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Fri, 16 Sep 2022 10:46:36 -0700 Subject: [PATCH 489/535] [SPARK-40169][SQL] Don't pushdown Parquet filters with no reference to data schema ### What changes were proposed in this pull request? Currently in Parquet V1 read path, Spark will pushdown data filters even if they have no reference in the Parquet read schema. This can cause correctness issues as described in [SPARK-39833](https://issues.apache.org/jira/browse/SPARK-39833). The root cause, it seems, is because in the V1 path, we first use `AttributeReference` equality to filter out data columns without partition columns, and then use `AttributeSet` equality to filter out filters with only references to data columns. There's inconsistency in the two steps, when case sensitive check is false. Take the following scenario as example: - data column: `[COL, a]` - partition column: `[col]` - filter: `col > 10` With `AttributeReference` equality, `COL` is not considered equal to `col` (because their names are different), and thus the filtered out data column set is still `[COL, a]`. However, when calculating filters with only reference to data columns, `COL` is **considered equal** to `col`. Consequently, the filter `col > 10`, when checking with `[COL, a]`, is considered to have reference to data columns, and thus will be pushed down to Parquet as data filter. On the Parquet side, since `col` doesn't exist in the file schema (it only has `COL`), when column index enabled, it will incorrectly return wrong number of rows. See [PARQUET-2170](https://issues.apache.org/jira/browse/PARQUET-2170) for more detail. In general, where data columns overlap with partition columns and case sensitivity is false, partition filters will not be filter out before we calculate filters with only reference to data columns, which is incorrect. ### Why are the changes needed? This fixes the correctness bug described in [SPARK-39833](https://issues.apache.org/jira/browse/SPARK-39833). ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? There are existing test cases for this issue from [SPARK-39833](https://issues.apache.org/jira/browse/SPARK-39833). This also modified them to test the scenarios when case sensitivity is on or off. Closes #37881 from sunchao/SPARK-40169. Authored-by: Chao Sun Signed-off-by: Chao Sun --- .../datasources/FileSourceStrategy.scala | 2 +- .../parquet/ParquetFileFormat.scala | 5 --- .../parquet/ParquetQuerySuite.scala | 38 ++++++++++++------- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala index 9356e46a69187..37a0447777497 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala @@ -184,7 +184,7 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging { // Partition keys are not available in the statistics of the files. // `dataColumns` might have partition columns, we need to filter them out. - val dataColumnsWithoutPartitionCols = dataColumns.filterNot(partitionColumns.contains) + val dataColumnsWithoutPartitionCols = dataColumns.filterNot(partitionSet.contains) val dataFilters = normalizedFiltersWithoutSubqueries.flatMap { f => if (f.references.intersect(partitionSet).nonEmpty) { extractPredicatesWithinOutputSet(f, AttributeSet(dataColumnsWithoutPartitionCols)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index 2fa0854c98308..9765e7c780193 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -230,11 +230,6 @@ class ParquetFileFormat SQLConf.PARQUET_INT96_AS_TIMESTAMP.key, sparkSession.sessionState.conf.isParquetINT96AsTimestamp) - // See PARQUET-2170. - // Disable column index optimisation when required schema does not have columns that appear in - // pushed filters to avoid getting incorrect results. - hadoopConf.setBooleanIfUnset(ParquetInputFormat.COLUMN_INDEX_FILTERING_ENABLED, false) - val broadcastedHadoopConf = sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala index d0a9a93b00fef..4e236ad786595 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala @@ -1067,24 +1067,34 @@ class ParquetV1QuerySuite extends ParquetQuerySuite { } test("SPARK-39833: pushed filters with count()") { - withTempPath { path => - val p = s"${path.getCanonicalPath}${File.separator}col=0${File.separator}" - Seq(0).toDF("COL").coalesce(1).write.save(p) - val df = spark.read.parquet(path.getCanonicalPath) - checkAnswer(df.filter("col = 0"), Seq(Row(0))) - assert(df.filter("col = 0").count() == 1, "col") - assert(df.filter("COL = 0").count() == 1, "COL") + Seq(true, false).foreach { caseSensitive => + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { + withTempPath { path => + val p = s"${path.getCanonicalPath}${File.separator}col=0${File.separator}" + Seq(0).toDF("COL").coalesce(1).write.save(p) + val df = spark.read.parquet(path.getCanonicalPath) + val expected = if (caseSensitive) Seq(Row(0, 0)) else Seq(Row(0)) + checkAnswer(df.filter("col = 0"), expected) + assert(df.filter("col = 0").count() == 1, "col") + assert(df.filter("COL = 0").count() == 1, "COL") + } + } } } test("SPARK-39833: pushed filters with project without filter columns") { - withTempPath { path => - val p = s"${path.getCanonicalPath}${File.separator}col=0${File.separator}" - Seq((0, 1)).toDF("COL", "a").coalesce(1).write.save(p) - val df = spark.read.parquet(path.getCanonicalPath) - checkAnswer(df.filter("col = 0"), Seq(Row(0, 1))) - assert(df.filter("col = 0").select("a").collect().toSeq == Row(1) :: Nil) - assert(df.filter("col = 0 and a = 1").select("a").collect().toSeq == Row(1) :: Nil) + Seq(true, false).foreach { caseSensitive => + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) { + withTempPath { path => + val p = s"${path.getCanonicalPath}${File.separator}col=0${File.separator}" + Seq((0, 1)).toDF("COL", "a").coalesce(1).write.save(p) + val df = spark.read.parquet(path.getCanonicalPath) + val expected = if (caseSensitive) Seq(Row(0, 1, 0)) else Seq(Row(0, 1)) + checkAnswer(df.filter("col = 0"), expected) + assert(df.filter("col = 0").select("a").collect().toSeq == Row(1) :: Nil) + assert(df.filter("col = 0 and a = 1").select("a").collect().toSeq == Row(1) :: Nil) + } + } } } } From d616da74a4ed7202b3480ebc234eb109dcc86fb9 Mon Sep 17 00:00:00 2001 From: Ivan Sadikov Date: Sat, 17 Sep 2022 10:59:32 +0300 Subject: [PATCH 490/535] [SPARK-40468][SQL] Fix column pruning in CSV when _corrupt_record is selected ### What changes were proposed in this pull request? The PR fixes an issue when depending on the name of the `_corrupt_record` field, column pruning would behave differently for a record that has no parsing errors. For example, with a CSV file like this (c1 and c2 columns): ``` 1,a ``` Before the patch, the following query would return: ```scala val df = spark.read .schema("c1 int, c2 string, x string, _corrupt_record string") .csv("file:/tmp/file.csv") .withColumn("x", lit("A")) Result: +---+---+---+---------------+ |c1 |c2 |x |_corrupt_record| +---+---+---+---------------+ |1 |a |A |1,a | +---+---+---+---------------+ ``` However, if you rename the corrupt record column, the result is different (the original, arguably correct, behaviour before https://github.com/apache/spark/commit/959694271e30879c944d7fd5de2740571012460a): ```scala val df = spark.read .option("columnNameCorruptRecord", "corrupt_record") .schema("c1 int, c2 string, x string, corrupt_record string") .csv("file:/tmp/file.csv") .withColumn("x", lit("A")) +---+---+---+--------------+ |c1 |c2 |x |corrupt_record| +---+---+---+--------------+ |1 |a |A |null | +---+---+---+--------------+ ``` This patch fixes the former so both results would return `null` for corrupt record as there are no parsing issues. Note that https://issues.apache.org/jira/browse/SPARK-38523 is still fixed and works correctly. ### Why are the changes needed? Fixes a bug where corrupt record would be non-null even though the record has no parsing errors. ### Does this PR introduce _any_ user-facing change? Yes, fixes the output of corrupt record with additional columns provided by user. Everything should be unchanged outside of that scenario. ### How was this patch tested? I added a unit test that reproduces the issue. Closes #37909 from sadikovi/SPARK-40468. Authored-by: Ivan Sadikov Signed-off-by: Max Gekk (cherry picked from commit 0776f9e7bcb10612eb977ed4884e9848aea86c33) Signed-off-by: Max Gekk --- .../datasources/csv/CSVFileFormat.scala | 4 +-- .../datasources/v2/csv/CSVScan.scala | 3 +-- .../execution/datasources/csv/CSVSuite.scala | 26 +++++++++++++++++++ 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala index 8d9525078402e..93679516a8cca 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala @@ -100,8 +100,7 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister { hadoopConf: Configuration): (PartitionedFile) => Iterator[InternalRow] = { val broadcastedHadoopConf = sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf)) - val columnPruning = sparkSession.sessionState.conf.csvColumnPruning && - !requiredSchema.exists(_.name == sparkSession.sessionState.conf.columnNameOfCorruptRecord) + val columnPruning = sparkSession.sessionState.conf.csvColumnPruning val parsedOptions = new CSVOptions( options, columnPruning, @@ -154,4 +153,3 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister { } } - diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala index 5c33a1047a12f..d81223b48a53f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala @@ -44,8 +44,7 @@ case class CSVScan( dataFilters: Seq[Expression] = Seq.empty) extends TextBasedFileScan(sparkSession, options) { - val columnPruning = sparkSession.sessionState.conf.csvColumnPruning && - !readDataSchema.exists(_.name == sparkSession.sessionState.conf.columnNameOfCorruptRecord) + val columnPruning = sparkSession.sessionState.conf.csvColumnPruning private lazy val parsedOptions: CSVOptions = new CSVOptions( options.asScala.toMap, columnPruning = columnPruning, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index dd42f48d71662..5f4f8f84cf84c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -1655,6 +1655,32 @@ abstract class CSVSuite Row(1, Date.valueOf("1983-08-04"), null) :: Nil) } + test("SPARK-40468: column pruning with the corrupt record column") { + withTempPath { path => + Seq("1,a").toDF() + .repartition(1) + .write.text(path.getAbsolutePath) + + // Corrupt record column with the default name should return null instead of "1,a" + val corruptRecordCol = spark.sessionState.conf.columnNameOfCorruptRecord + var df = spark.read + .schema(s"c1 int, c2 string, x string, ${corruptRecordCol} string") + .csv(path.getAbsolutePath) + .selectExpr("c1", "c2", "'A' as x", corruptRecordCol) + + checkAnswer(df, Seq(Row(1, "a", "A", null))) + + // Corrupt record column with the user-provided name should return null instead of "1,a" + df = spark.read + .schema(s"c1 int, c2 string, x string, _invalid string") + .option("columnNameCorruptRecord", "_invalid") + .csv(path.getAbsolutePath) + .selectExpr("c1", "c2", "'A' as x", "_invalid") + + checkAnswer(df, Seq(Row(1, "a", "A", null))) + } + } + test("SPARK-23846: schema inferring touches less data if samplingRatio < 1.0") { // Set default values for the DataSource parameters to make sure // that whole test file is mapped to only one partition. This will guarantee From fca6ab996d6a9e013093d49625bf7e6c15d1c0d2 Mon Sep 17 00:00:00 2001 From: yaohua Date: Tue, 20 Sep 2022 12:46:58 +0900 Subject: [PATCH 491/535] [SPARK-40460][SS][3.3] Fix streaming metrics when selecting _metadata ### What changes were proposed in this pull request? Cherry-picked from #37905 Streaming metrics report all 0 (`processedRowsPerSecond`, etc) when selecting `_metadata` column. Because the logical plan from the batch and the actual planned logical plan are mismatched. So, [here](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala#L348) we cannot find the plan and collect metrics correctly. This PR fixes this by replacing the initial `LogicalPlan` with the `LogicalPlan` containing the metadata column ### Why are the changes needed? Bug fix. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing + New UTs Closes #37932 from Yaohua628/spark-40460-3-3. Authored-by: yaohua Signed-off-by: Hyukjin Kwon --- .../streaming/MicroBatchExecution.scala | 14 +++++-- .../datasources/FileMetadataStructSuite.scala | 38 +++++++++++++++++-- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala index 3b409fa2f6a72..d8806f03443fb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.execution.streaming import scala.collection.mutable.{Map => MutableMap} +import scala.collection.mutable import org.apache.spark.sql.{Dataset, SparkSession} import org.apache.spark.sql.catalyst.encoders.RowEncoder @@ -540,7 +541,7 @@ class MicroBatchExecution( logDebug(s"Running batch $currentBatchId") // Request unprocessed data from all sources. - newData = reportTimeTaken("getBatch") { + val mutableNewData = mutable.Map.empty ++ reportTimeTaken("getBatch") { availableOffsets.flatMap { case (source: Source, available: Offset) if committedOffsets.get(source).map(_ != available).getOrElse(true) => @@ -577,7 +578,7 @@ class MicroBatchExecution( val newBatchesPlan = logicalPlan transform { // For v1 sources. case StreamingExecutionRelation(source, output) => - newData.get(source).map { dataPlan => + mutableNewData.get(source).map { dataPlan => val hasFileMetadata = output.exists { case FileSourceMetadataAttribute(_) => true case _ => false @@ -586,6 +587,11 @@ class MicroBatchExecution( case l: LogicalRelation if hasFileMetadata => l.withMetadataColumns() case _ => dataPlan } + // SPARK-40460: overwrite the entry with the new logicalPlan + // because it might contain the _metadata column. It is a necessary change, + // in the ProgressReporter, we use the following mapping to get correct streaming metrics: + // streaming logical plan (with sources) <==> trigger's logical plan <==> executed plan + mutableNewData.put(source, finalDataPlan) val maxFields = SQLConf.get.maxToStringFields assert(output.size == finalDataPlan.output.size, s"Invalid batch: ${truncatedString(output, ",", maxFields)} != " + @@ -601,14 +607,14 @@ class MicroBatchExecution( // For v2 sources. case r: StreamingDataSourceV2Relation => - newData.get(r.stream).map { + mutableNewData.get(r.stream).map { case OffsetHolder(start, end) => r.copy(startOffset = Some(start), endOffset = Some(end)) }.getOrElse { LocalRelation(r.output, isStreaming = true) } } - + newData = mutableNewData.toMap // Rewire the plan to use the new attributes that were returned by the source. val newAttributePlan = newBatchesPlan.transformAllExpressionsWithPruning( _.containsPattern(CURRENT_LIKE)) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala index 6afea42ee83d8..ad75f634050d4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.{AnalysisException, Column, DataFrame, QueryTest, Ro import org.apache.spark.sql.execution.FileSourceScanExec import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.streaming.Trigger import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructField, StructType} @@ -518,16 +519,19 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession { withTempDir { dir => df.coalesce(1).write.format("json").save(dir.getCanonicalPath + "/source/new-streaming-data") - val stream = spark.readStream.format("json") + val streamDf = spark.readStream.format("json") .schema(schema) .load(dir.getCanonicalPath + "/source/new-streaming-data") .select("*", "_metadata") + + val streamQuery0 = streamDf .writeStream.format("json") .option("checkpointLocation", dir.getCanonicalPath + "/target/checkpoint") + .trigger(Trigger.AvailableNow()) .start(dir.getCanonicalPath + "/target/new-streaming-data") - stream.processAllAvailable() - stream.stop() + streamQuery0.awaitTermination() + assert(streamQuery0.lastProgress.numInputRows == 2L) val newDF = spark.read.format("json") .load(dir.getCanonicalPath + "/target/new-streaming-data") @@ -565,6 +569,34 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession { sourceFileMetadata(METADATA_FILE_MODIFICATION_TIME)) ) ) + + // Verify self-union + val streamQuery1 = streamDf.union(streamDf) + .writeStream.format("json") + .option("checkpointLocation", dir.getCanonicalPath + "/target/checkpoint_union") + .trigger(Trigger.AvailableNow()) + .start(dir.getCanonicalPath + "/target/new-streaming-data-union") + streamQuery1.awaitTermination() + val df1 = spark.read.format("json") + .load(dir.getCanonicalPath + "/target/new-streaming-data-union") + // Verify self-union results + assert(streamQuery1.lastProgress.numInputRows == 4L) + assert(df1.count() == 4L) + assert(df1.select("*").columns.toSet == Set("name", "age", "info", "_metadata")) + + // Verify self-join + val streamQuery2 = streamDf.join(streamDf, Seq("name", "age", "info", "_metadata")) + .writeStream.format("json") + .option("checkpointLocation", dir.getCanonicalPath + "/target/checkpoint_join") + .trigger(Trigger.AvailableNow()) + .start(dir.getCanonicalPath + "/target/new-streaming-data-join") + streamQuery2.awaitTermination() + val df2 = spark.read.format("json") + .load(dir.getCanonicalPath + "/target/new-streaming-data-join") + // Verify self-join results + assert(streamQuery2.lastProgress.numInputRows == 4L) + assert(df2.count() == 2L) + assert(df2.select("*").columns.toSet == Set("name", "age", "info", "_metadata")) } } From 883a481e44a1f91ef3fc3aea2838a598cbd6cf0f Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Thu, 7 Apr 2022 15:31:17 -0700 Subject: [PATCH 492/535] [SPARK-38802][K8S][TESTS] Add Support for `spark.kubernetes.test.(driver|executor)RequestCores` ### What changes were proposed in this pull request? This patch adds support for `spark.kubernetes.test.(driver|executor)RequestCores`, this help devs set specific cores info for (driver|executor)RequestCores. ### Why are the changes needed? In some cases (such as resource limited case), we want to set specifc `RequestCores`. See also: https://github.com/apache/spark/pull/35830#pullrequestreview-929597027 ### Does this PR introduce _any_ user-facing change? No, test only ### How was this patch tested? IT passed Closes #36087 from Yikun/SPARK-38802. Authored-by: Yikun Jiang Signed-off-by: Dongjoon Hyun (cherry picked from commit 83963828b54bffe99527a004057272bc584cbc26) Signed-off-by: Dongjoon Hyun --- .../kubernetes/integration-tests/README.md | 16 ++++++++++++++++ .../k8s/integrationtest/KubernetesSuite.scala | 6 ++++++ .../k8s/integrationtest/TestConstants.scala | 2 ++ 3 files changed, 24 insertions(+) diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md index 748664cf41b74..af0b1ec3dc76c 100644 --- a/resource-managers/kubernetes/integration-tests/README.md +++ b/resource-managers/kubernetes/integration-tests/README.md @@ -268,6 +268,22 @@ to the wrapper scripts and using the wrapper scripts will simply set these appro
        + + + + + + + + + +
        Avro logical typeAvro typeSpark SQL type
        pushDownAggregate false - The option to enable or disable aggregate push-down in V2 JDBC data source. The default value is false, in which case Spark will not push down aggregates to the JDBC data source. Otherwise, if sets to true, aggregates will be pushed down to the JDBC data source. Aggregate push-down is usually turned off when the aggregate is performed faster by Spark than by the JDBC data source. Please note that aggregates can be pushed down if and only if all the aggregate functions and the related filters can be pushed down. Spark assumes that the data source can't fully complete the aggregate and does a final aggregate over the data source output. + The option to enable or disable aggregate push-down in V2 JDBC data source. The default value is false, in which case Spark will not push down aggregates to the JDBC data source. Otherwise, if sets to true, aggregates will be pushed down to the JDBC data source. Aggregate push-down is usually turned off when the aggregate is performed faster by Spark than by the JDBC data source. Please note that aggregates can be pushed down if and only if all the aggregate functions and the related filters can be pushed down. If numPartitions equals to 1 or the group by key is the same as partitionColumn, Spark will push down aggregate to data source completely and not apply a final aggregate over the data source output. Otherwise, Spark will apply a final aggregate over the data source output. read
        pushDownLimit false - The option to enable or disable LIMIT push-down into V2 JDBC data source. The default value is false, in which case Spark does not push down LIMIT to the JDBC data source. Otherwise, if value sets to true, LIMIT is pushed down to the JDBC data source. SPARK still applies LIMIT on the result from data source even if LIMIT is pushed down. + The option to enable or disable LIMIT push-down into V2 JDBC data source. The LIMIT push-down also includes LIMIT + SORT , a.k.a. the Top N operator. The default value is false, in which case Spark does not push down LIMIT or LIMIT with SORT to the JDBC data source. Otherwise, if sets to true, LIMIT or LIMIT with SORT is pushed down to the JDBC data source. If numPartitions is greater than 1, SPARK still applies LIMIT or LIMIT with SORT on the result from data source even if LIMIT or LIMIT with SORT is pushed down. Otherwise, if LIMIT or LIMIT with SORT is pushed down and numPartitions equals to 1, SPARK will not apply LIMIT or LIMIT with SORT on the result from data source. read
        ",{valign:"top",colSpan:W(a),"class":a.oClasses.sRowEmpty}).html(c))[0];A(a,"aoHeaderCallback","header",[f(a.nTHead).children("tr")[0], -Oa(a),g,n,l]);A(a,"aoFooterCallback","footer",[f(a.nTFoot).children("tr")[0],Oa(a),g,n,l]);d=f(a.nTBody);d.children().detach();d.append(f(b));A(a,"aoDrawCallback","draw",[a]);a.bSorted=!1;a.bFiltered=!1;a.bDrawing=!1}}function V(a,b){var c=a.oFeatures,d=c.bFilter;c.bSort&&rb(a);d?ia(a,a.oPreviousSearch):a.aiDisplay=a.aiDisplayMaster.slice();!0!==b&&(a._iDisplayStart=0);a._drawHold=b;S(a);a._drawHold=!1}function sb(a){var b=a.oClasses,c=f(a.nTable);c=f("
        ").insertBefore(c);var d=a.oFeatures,e= -f("
        ",{id:a.sTableId+"_wrapper","class":b.sWrapper+(a.nTFoot?"":" "+b.sNoFooter)});a.nHolding=c[0];a.nTableWrapper=e[0];a.nTableReinsertBefore=a.nTable.nextSibling;for(var h=a.sDom.split(""),g,k,l,n,m,p,u=0;u")[0];n=h[u+1];if("'"==n||'"'==n){m="";for(p=2;h[u+p]!=n;)m+=h[u+p],p++;"H"==m?m=b.sJUIHeader:"F"==m&&(m=b.sJUIFooter);-1!=m.indexOf(".")?(n=m.split("."),l.id=n[0].substr(1,n[0].length-1),l.className=n[1]):"#"==m.charAt(0)?l.id=m.substr(1, -m.length-1):l.className=m;u+=p}e.append(l);e=f(l)}else if(">"==k)e=e.parent();else if("l"==k&&d.bPaginate&&d.bLengthChange)g=tb(a);else if("f"==k&&d.bFilter)g=ub(a);else if("r"==k&&d.bProcessing)g=vb(a);else if("t"==k)g=wb(a);else if("i"==k&&d.bInfo)g=xb(a);else if("p"==k&&d.bPaginate)g=yb(a);else if(0!==q.ext.feature.length)for(l=q.ext.feature,p=0,n=l.length;p',k=d.sSearch;k=k.match(/_INPUT_/)?k.replace("_INPUT_",g):k+g;b=f("
        ",{id:h.f?null:c+"_filter","class":b.sFilter}).append(f("
        ").addClass(b.sLength);a.aanFeatures.l||(l[0].id=c+"_length");l.children().append(a.oLanguage.sLengthMenu.replace("_MENU_", -e[0].outerHTML));f("select",l).val(a._iDisplayLength).on("change.DT",function(b){Va(a,f(this).val());S(a)});f(a.nTable).on("length.dt.DT",function(b,c,d){a===c&&f("select",l).val(d)});return l[0]}function yb(a){var b=a.sPaginationType,c=q.ext.pager[b],d="function"===typeof c,e=function(a){S(a)};b=f("
        ").addClass(a.oClasses.sPaging+b)[0];var h=a.aanFeatures;d||c.fnInit(a,b,e);h.p||(b.id=a.sTableId+"_paginate",a.aoDrawCallback.push({fn:function(a){if(d){var b=a._iDisplayStart,g=a._iDisplayLength, -f=a.fnRecordsDisplay(),m=-1===g;b=m?0:Math.ceil(b/g);g=m?1:Math.ceil(f/g);f=c(b,g);var p;m=0;for(p=h.p.length;mh&&(d=0)):"first"==b?d=0:"previous"==b?(d=0<=e?d-e:0,0>d&&(d=0)):"next"==b?d+e",{id:a.aanFeatures.r?null:a.sTableId+"_processing","class":a.oClasses.sProcessing}).html(a.oLanguage.sProcessing).insertBefore(a.nTable)[0]}function K(a,b){a.oFeatures.bProcessing&&f(a.aanFeatures.r).css("display",b?"block":"none");A(a,null,"processing",[a,b])}function wb(a){var b=f(a.nTable);b.attr("role","grid");var c=a.oScroll;if(""===c.sX&&""===c.sY)return a.nTable;var d=c.sX,e=c.sY, -h=a.oClasses,g=b.children("caption"),k=g.length?g[0]._captionSide:null,l=f(b[0].cloneNode(!1)),n=f(b[0].cloneNode(!1)),m=b.children("tfoot");m.length||(m=null);l=f("
        ",{"class":h.sScrollWrapper}).append(f("
        ",{"class":h.sScrollHead}).css({overflow:"hidden",position:"relative",border:0,width:d?d?B(d):null:"100%"}).append(f("
        ",{"class":h.sScrollHeadInner}).css({"box-sizing":"content-box",width:c.sXInner||"100%"}).append(l.removeAttr("id").css("margin-left",0).append("top"===k?g:null).append(b.children("thead"))))).append(f("
        ", -{"class":h.sScrollBody}).css({position:"relative",overflow:"auto",width:d?B(d):null}).append(b));m&&l.append(f("
        ",{"class":h.sScrollFoot}).css({overflow:"hidden",border:0,width:d?d?B(d):null:"100%"}).append(f("
        ",{"class":h.sScrollFootInner}).append(n.removeAttr("id").css("margin-left",0).append("bottom"===k?g:null).append(b.children("tfoot")))));b=l.children();var p=b[0];h=b[1];var u=m?b[2]:null;if(d)f(h).on("scroll.DT",function(a){a=this.scrollLeft;p.scrollLeft=a;m&&(u.scrollLeft=a)}); -f(h).css(e&&c.bCollapse?"max-height":"height",e);a.nScrollHead=p;a.nScrollBody=h;a.nScrollFoot=u;a.aoDrawCallback.push({fn:na,sName:"scrolling"});return l[0]}function na(a){var b=a.oScroll,c=b.sX,d=b.sXInner,e=b.sY;b=b.iBarWidth;var h=f(a.nScrollHead),g=h[0].style,k=h.children("div"),l=k[0].style,n=k.children("table");k=a.nScrollBody;var m=f(k),w=k.style,u=f(a.nScrollFoot).children("div"),q=u.children("table"),t=f(a.nTHead),r=f(a.nTable),v=r[0],za=v.style,T=a.nTFoot?f(a.nTFoot):null,A=a.oBrowser, -x=A.bScrollOversize,ac=J(a.aoColumns,"nTh"),Ya=[],y=[],z=[],C=[],G,H=function(a){a=a.style;a.paddingTop="0";a.paddingBottom="0";a.borderTopWidth="0";a.borderBottomWidth="0";a.height=0};var D=k.scrollHeight>k.clientHeight;if(a.scrollBarVis!==D&&a.scrollBarVis!==p)a.scrollBarVis=D,aa(a);else{a.scrollBarVis=D;r.children("thead, tfoot").remove();if(T){var E=T.clone().prependTo(r);var F=T.find("tr");E=E.find("tr")}var I=t.clone().prependTo(r);t=t.find("tr");D=I.find("tr");I.find("th, td").removeAttr("tabindex"); -c||(w.width="100%",h[0].style.width="100%");f.each(ua(a,I),function(b,c){G=ba(a,b);c.style.width=a.aoColumns[G].sWidth});T&&N(function(a){a.style.width=""},E);h=r.outerWidth();""===c?(za.width="100%",x&&(r.find("tbody").height()>k.offsetHeight||"scroll"==m.css("overflow-y"))&&(za.width=B(r.outerWidth()-b)),h=r.outerWidth()):""!==d&&(za.width=B(d),h=r.outerWidth());N(H,D);N(function(a){z.push(a.innerHTML);Ya.push(B(f(a).css("width")))},D);N(function(a,b){-1!==f.inArray(a,ac)&&(a.style.width=Ya[b])}, -t);f(D).height(0);T&&(N(H,E),N(function(a){C.push(a.innerHTML);y.push(B(f(a).css("width")))},E),N(function(a,b){a.style.width=y[b]},F),f(E).height(0));N(function(a,b){a.innerHTML='
        '+z[b]+"
        ";a.childNodes[0].style.height="0";a.childNodes[0].style.overflow="hidden";a.style.width=Ya[b]},D);T&&N(function(a,b){a.innerHTML='
        '+C[b]+"
        ";a.childNodes[0].style.height="0";a.childNodes[0].style.overflow="hidden";a.style.width=y[b]},E);r.outerWidth()< -h?(F=k.scrollHeight>k.offsetHeight||"scroll"==m.css("overflow-y")?h+b:h,x&&(k.scrollHeight>k.offsetHeight||"scroll"==m.css("overflow-y"))&&(za.width=B(F-b)),""!==c&&""===d||O(a,1,"Possible column misalignment",6)):F="100%";w.width=B(F);g.width=B(F);T&&(a.nScrollFoot.style.width=B(F));!e&&x&&(w.height=B(v.offsetHeight+b));c=r.outerWidth();n[0].style.width=B(c);l.width=B(c);d=r.height()>k.clientHeight||"scroll"==m.css("overflow-y");e="padding"+(A.bScrollbarLeft?"Left":"Right");l[e]=d?b+"px":"0px";T&& -(q[0].style.width=B(c),u[0].style.width=B(c),u[0].style[e]=d?b+"px":"0px");r.children("colgroup").insertBefore(r.children("thead"));m.trigger("scroll");!a.bSorted&&!a.bFiltered||a._drawHold||(k.scrollTop=0)}}function N(a,b,c){for(var d=0,e=0,h=b.length,g,k;e").appendTo(k.find("tbody"));k.find("thead, tfoot").remove(); -k.append(f(a.nTHead).clone()).append(f(a.nTFoot).clone());k.find("tfoot th, tfoot td").css("width","");n=ua(a,k.find("thead")[0]);for(q=0;q").css({width:r.sWidthOrig,margin:0,padding:0,border:0,height:1}));if(a.aoData.length)for(q=0;q").css(h|| -e?{position:"absolute",top:0,left:0,height:1,right:0,overflow:"hidden"}:{}).append(k).appendTo(p);h&&g?k.width(g):h?(k.css("width","auto"),k.removeAttr("width"),k.width()").css("width",B(a)).appendTo(b||y.body);b=a[0].offsetWidth;a.remove();return b}function Kb(a,b){var c=Lb(a,b);if(0>c)return null;var d=a.aoData[c];return d.nTr?d.anCells[b]:f("
        ").html(I(a,c,b,"display"))[0]}function Lb(a,b){for(var c,d=-1,e=-1,h=0,g=a.aoData.length;hd&&(d=c.length,e=h);return e} -function B(a){return null===a?"0px":"number"==typeof a?0>a?"0px":a+"px":a.match(/\d$/)?a+"px":a}function Y(a){var b=[],c=a.aoColumns;var d=a.aaSortingFixed;var e=f.isPlainObject(d);var h=[];var g=function(a){a.length&&!f.isArray(a[0])?h.push(a):f.merge(h,a)};f.isArray(d)&&g(d);e&&d.pre&&g(d.pre);g(a.aaSorting);e&&d.post&&g(d.post);for(a=0;an?1:0; -if(0!==m)return"asc"===l.dir?m:-m}m=c[a];n=c[b];return mn?1:0}):g.sort(function(a,b){var h,g=k.length,f=e[a]._aSortData,l=e[b]._aSortData;for(h=0;hp?1:0})}a.bSorted=!0}function Nb(a){var b=a.aoColumns,c=Y(a);a=a.oLanguage.oAria;for(var d=0,e=b.length;d/g,"");var f=h.nTh;f.removeAttribute("aria-sort"); -h.bSortable&&(0e?e+1:3))}e=0;for(h=d.length;ee?e+1:3))}a.aLastSort=d}function Mb(a,b){var c=a.aoColumns[b],d=q.ext.order[c.sSortDataType],e;d&&(e=d.call(a.oInstance,a,b,ca(a,b)));for(var h,g=q.ext.type.order[c.sType+"-pre"],k=0,f=a.aoData.length;k=h.length?[0,c[1]]:c)}));b.search!==p&&f.extend(a.oPreviousSearch, -Gb(b.search));if(b.columns)for(d=0,e=b.columns.length;d=c&&(b=c-d);b-=b%d;if(-1===d||0>b)b=0;a._iDisplayStart=b}function Ra(a,b){a=a.renderer;var c=q.ext.renderer[b];return f.isPlainObject(a)&&a[b]?c[a[b]]||c._:"string"===typeof a?c[a]||c._:c._}function D(a){return a.oFeatures.bServerSide?"ssp":a.ajax||a.sAjaxSource?"ajax":"dom"}function ka(a,b){var c=Pb.numbers_length,d=Math.floor(c/2);b<=c?a=Z(0,b):a<=d?(a=Z(0,c-2),a.push("ellipsis"),a.push(b-1)):(a>=b-1-d?a=Z(b-(c-2),b):(a=Z(a-d+2,a+d-1),a.push("ellipsis"), -a.push(b-1)),a.splice(0,0,"ellipsis"),a.splice(0,0,0));a.DT_el="span";return a}function Ha(a){f.each({num:function(b){return Da(b,a)},"num-fmt":function(b){return Da(b,a,bb)},"html-num":function(b){return Da(b,a,Ea)},"html-num-fmt":function(b){return Da(b,a,Ea,bb)}},function(b,c){C.type.order[b+a+"-pre"]=c;b.match(/^html\-/)&&(C.type.search[b+a]=C.type.search.html)})}function Qb(a){return function(){var b=[Ca(this[q.ext.iApiIndex])].concat(Array.prototype.slice.call(arguments));return q.ext.internal[a].apply(this, -b)}}var q=function(a){this.$=function(a,b){return this.api(!0).$(a,b)};this._=function(a,b){return this.api(!0).rows(a,b).data()};this.api=function(a){return a?new v(Ca(this[C.iApiIndex])):new v(this)};this.fnAddData=function(a,b){var c=this.api(!0);a=f.isArray(a)&&(f.isArray(a[0])||f.isPlainObject(a[0]))?c.rows.add(a):c.row.add(a);(b===p||b)&&c.draw();return a.flatten().toArray()};this.fnAdjustColumnSizing=function(a){var b=this.api(!0).columns.adjust(),c=b.settings()[0],d=c.oScroll;a===p||a?b.draw(!1): -(""!==d.sX||""!==d.sY)&&na(c)};this.fnClearTable=function(a){var b=this.api(!0).clear();(a===p||a)&&b.draw()};this.fnClose=function(a){this.api(!0).row(a).child.hide()};this.fnDeleteRow=function(a,b,c){var d=this.api(!0);a=d.rows(a);var e=a.settings()[0],h=e.aoData[a[0][0]];a.remove();b&&b.call(this,e,h);(c===p||c)&&d.draw();return h};this.fnDestroy=function(a){this.api(!0).destroy(a)};this.fnDraw=function(a){this.api(!0).draw(a)};this.fnFilter=function(a,b,c,d,e,f){e=this.api(!0);null===b||b===p? -e.search(a,c,d,f):e.column(b).search(a,c,d,f);e.draw()};this.fnGetData=function(a,b){var c=this.api(!0);if(a!==p){var d=a.nodeName?a.nodeName.toLowerCase():"";return b!==p||"td"==d||"th"==d?c.cell(a,b).data():c.row(a).data()||null}return c.data().toArray()};this.fnGetNodes=function(a){var b=this.api(!0);return a!==p?b.row(a).node():b.rows().nodes().flatten().toArray()};this.fnGetPosition=function(a){var b=this.api(!0),c=a.nodeName.toUpperCase();return"TR"==c?b.row(a).index():"TD"==c||"TH"==c?(a=b.cell(a).index(), -[a.row,a.columnVisible,a.column]):null};this.fnIsOpen=function(a){return this.api(!0).row(a).child.isShown()};this.fnOpen=function(a,b,c){return this.api(!0).row(a).child(b,c).show().child()[0]};this.fnPageChange=function(a,b){a=this.api(!0).page(a);(b===p||b)&&a.draw(!1)};this.fnSetColumnVis=function(a,b,c){a=this.api(!0).column(a).visible(b);(c===p||c)&&a.columns.adjust().draw()};this.fnSettings=function(){return Ca(this[C.iApiIndex])};this.fnSort=function(a){this.api(!0).order(a).draw()};this.fnSortListener= -function(a,b,c){this.api(!0).order.listener(a,b,c)};this.fnUpdate=function(a,b,c,d,e){var h=this.api(!0);c===p||null===c?h.row(b).data(a):h.cell(b,c).data(a);(e===p||e)&&h.columns.adjust();(d===p||d)&&h.draw();return 0};this.fnVersionCheck=C.fnVersionCheck;var b=this,c=a===p,d=this.length;c&&(a={});this.oApi=this.internal=C.internal;for(var e in q.ext.internal)e&&(this[e]=Qb(e));this.each(function(){var e={},g=1").appendTo(w));r.nTHead=b[0];b=w.children("tbody");0===b.length&&(b=f("
        ",{valign:"top",colSpan:na(a),"class":a.oClasses.sRowEmpty}).html(d))[0];H(a,"aoHeaderCallback","header",[k(a.nTHead).children("tr")[0], +cb(a),h,m,n]);H(a,"aoFooterCallback","footer",[k(a.nTFoot).children("tr")[0],cb(a),h,m,n]);e=k(a.nTBody);e.children().detach();e.append(k(c));H(a,"aoDrawCallback","draw",[a]);a.bSorted=!1;a.bFiltered=!1;a.bDrawing=!1}}function ja(a,b){var c=a.oFeatures,d=c.bFilter;c.bSort&&Hb(a);d?ya(a,a.oPreviousSearch):a.aiDisplay=a.aiDisplayMaster.slice();!0!==b&&(a._iDisplayStart=0);a._drawHold=b;fa(a);a._drawHold=!1}function Ib(a){var b=a.oClasses,c=k(a.nTable);c=k("
        ").insertBefore(c);var d=a.oFeatures, +e=k("
        ",{id:a.sTableId+"_wrapper","class":b.sWrapper+(a.nTFoot?"":" "+b.sNoFooter)});a.nHolding=c[0];a.nTableWrapper=e[0];a.nTableReinsertBefore=a.nTable.nextSibling;for(var f=a.sDom.split(""),g,h,l,n,m,p,t=0;t")[0];n=f[t+1];if("'"==n||'"'==n){m="";for(p=2;f[t+p]!=n;)m+=f[t+p],p++;"H"==m?m=b.sJUIHeader:"F"==m&&(m=b.sJUIFooter);-1!=m.indexOf(".")?(n=m.split("."),l.id=n[0].substr(1,n[0].length-1),l.className=n[1]):"#"==m.charAt(0)?l.id=m.substr(1, +m.length-1):l.className=m;t+=p}e.append(l);e=k(l)}else if(">"==h)e=e.parent();else if("l"==h&&d.bPaginate&&d.bLengthChange)g=Jb(a);else if("f"==h&&d.bFilter)g=Kb(a);else if("r"==h&&d.bProcessing)g=Lb(a);else if("t"==h)g=Mb(a);else if("i"==h&&d.bInfo)g=Nb(a);else if("p"==h&&d.bPaginate)g=Ob(a);else if(0!==u.ext.feature.length)for(l=u.ext.feature,p=0,n=l.length;p',h=d.sSearch;h=h.match(/_INPUT_/)?h.replace("_INPUT_",g):h+g;b=k("
        ",{id:f.f?null:c+"_filter","class":b.sFilter}).append(k("
        ").addClass(b.sLength);a.aanFeatures.l||(l[0].id=c+"_length");l.children().append(a.oLanguage.sLengthMenu.replace("_MENU_", +e[0].outerHTML));k("select",l).val(a._iDisplayLength).on("change.DT",function(n){jb(a,k(this).val());fa(a)});k(a.nTable).on("length.dt.DT",function(n,m,p){a===m&&k("select",l).val(p)});return l[0]}function Ob(a){var b=a.sPaginationType,c=u.ext.pager[b],d="function"===typeof c,e=function(g){fa(g)};b=k("
        ").addClass(a.oClasses.sPaging+b)[0];var f=a.aanFeatures;d||c.fnInit(a,b,e);f.p||(b.id=a.sTableId+"_paginate",a.aoDrawCallback.push({fn:function(g){if(d){var h=g._iDisplayStart,l=g._iDisplayLength, +n=g.fnRecordsDisplay(),m=-1===l;h=m?0:Math.ceil(h/l);l=m?1:Math.ceil(n/l);n=c(h,l);var p;m=0;for(p=f.p.length;mf&&(d=0)):"first"==b?d=0:"previous"==b?(d=0<=e?d-e:0,0>d&&(d=0)):"next"==b?d+e",{id:a.aanFeatures.r?null:a.sTableId+"_processing","class":a.oClasses.sProcessing}).html(a.oLanguage.sProcessing).insertBefore(a.nTable)[0]}function U(a,b){a.oFeatures.bProcessing&&k(a.aanFeatures.r).css("display",b?"block":"none");H(a,null,"processing",[a,b])}function Mb(a){var b=k(a.nTable);b.attr("role","grid");var c=a.oScroll;if(""===c.sX&&""===c.sY)return a.nTable;var d=c.sX, +e=c.sY,f=a.oClasses,g=b.children("caption"),h=g.length?g[0]._captionSide:null,l=k(b[0].cloneNode(!1)),n=k(b[0].cloneNode(!1)),m=b.children("tfoot");m.length||(m=null);l=k("
        ",{"class":f.sScrollWrapper}).append(k("
        ",{"class":f.sScrollHead}).css({overflow:"hidden",position:"relative",border:0,width:d?d?K(d):null:"100%"}).append(k("
        ",{"class":f.sScrollHeadInner}).css({"box-sizing":"content-box",width:c.sXInner||"100%"}).append(l.removeAttr("id").css("margin-left",0).append("top"===h? +g:null).append(b.children("thead"))))).append(k("
        ",{"class":f.sScrollBody}).css({position:"relative",overflow:"auto",width:d?K(d):null}).append(b));m&&l.append(k("
        ",{"class":f.sScrollFoot}).css({overflow:"hidden",border:0,width:d?d?K(d):null:"100%"}).append(k("
        ",{"class":f.sScrollFootInner}).append(n.removeAttr("id").css("margin-left",0).append("bottom"===h?g:null).append(b.children("tfoot")))));b=l.children();var p=b[0];f=b[1];var t=m?b[2]:null;if(d)k(f).on("scroll.DT",function(v){v= +this.scrollLeft;p.scrollLeft=v;m&&(t.scrollLeft=v)});k(f).css("max-height",e);c.bCollapse||k(f).css("height",e);a.nScrollHead=p;a.nScrollBody=f;a.nScrollFoot=t;a.aoDrawCallback.push({fn:Fa,sName:"scrolling"});return l[0]}function Fa(a){var b=a.oScroll,c=b.sX,d=b.sXInner,e=b.sY;b=b.iBarWidth;var f=k(a.nScrollHead),g=f[0].style,h=f.children("div"),l=h[0].style,n=h.children("table");h=a.nScrollBody;var m=k(h),p=h.style,t=k(a.nScrollFoot).children("div"),v=t.children("table"),x=k(a.nTHead),r=k(a.nTable), +A=r[0],D=A.style,I=a.nTFoot?k(a.nTFoot):null,W=a.oBrowser,M=W.bScrollOversize,B=T(a.aoColumns,"nTh"),E=[],aa=[],X=[],Aa=[],mb,Ba=function(F){F=F.style;F.paddingTop="0";F.paddingBottom="0";F.borderTopWidth="0";F.borderBottomWidth="0";F.height=0};var ha=h.scrollHeight>h.clientHeight;if(a.scrollBarVis!==ha&&a.scrollBarVis!==q)a.scrollBarVis=ha,ra(a);else{a.scrollBarVis=ha;r.children("thead, tfoot").remove();if(I){var ka=I.clone().prependTo(r);var la=I.find("tr");ka=ka.find("tr")}var nb=x.clone().prependTo(r); +x=x.find("tr");ha=nb.find("tr");nb.find("th, td").removeAttr("tabindex");c||(p.width="100%",f[0].style.width="100%");k.each(La(a,nb),function(F,Y){mb=sa(a,F);Y.style.width=a.aoColumns[mb].sWidth});I&&ba(function(F){F.style.width=""},ka);f=r.outerWidth();""===c?(D.width="100%",M&&(r.find("tbody").height()>h.offsetHeight||"scroll"==m.css("overflow-y"))&&(D.width=K(r.outerWidth()-b)),f=r.outerWidth()):""!==d&&(D.width=K(d),f=r.outerWidth());ba(Ba,ha);ba(function(F){X.push(F.innerHTML);E.push(K(k(F).css("width")))}, +ha);ba(function(F,Y){-1!==k.inArray(F,B)&&(F.style.width=E[Y])},x);k(ha).height(0);I&&(ba(Ba,ka),ba(function(F){Aa.push(F.innerHTML);aa.push(K(k(F).css("width")))},ka),ba(function(F,Y){F.style.width=aa[Y]},la),k(ka).height(0));ba(function(F,Y){F.innerHTML='
        '+X[Y]+"
        ";F.childNodes[0].style.height="0";F.childNodes[0].style.overflow="hidden";F.style.width=E[Y]},ha);I&&ba(function(F,Y){F.innerHTML='
        '+Aa[Y]+"
        ";F.childNodes[0].style.height= +"0";F.childNodes[0].style.overflow="hidden";F.style.width=aa[Y]},ka);r.outerWidth()h.offsetHeight||"scroll"==m.css("overflow-y")?f+b:f,M&&(h.scrollHeight>h.offsetHeight||"scroll"==m.css("overflow-y"))&&(D.width=K(la-b)),""!==c&&""===d||ca(a,1,"Possible column misalignment",6)):la="100%";p.width=K(la);g.width=K(la);I&&(a.nScrollFoot.style.width=K(la));!e&&M&&(p.height=K(A.offsetHeight+b));c=r.outerWidth();n[0].style.width=K(c);l.width=K(c);d=r.height()>h.clientHeight||"scroll"== +m.css("overflow-y");e="padding"+(W.bScrollbarLeft?"Left":"Right");l[e]=d?b+"px":"0px";I&&(v[0].style.width=K(c),t[0].style.width=K(c),t[0].style[e]=d?b+"px":"0px");r.children("colgroup").insertBefore(r.children("thead"));m.trigger("scroll");!a.bSorted&&!a.bFiltered||a._drawHold||(h.scrollTop=0)}}function ba(a,b,c){for(var d=0,e=0,f=b.length,g,h;e").appendTo(h.find("tbody"));h.find("thead, tfoot").remove();h.append(k(a.nTHead).clone()).append(k(a.nTFoot).clone());h.find("tfoot th, tfoot td").css("width","");n=La(a,h.find("thead")[0]);for(v=0;v").css({width:r.sWidthOrig,margin:0,padding:0,border:0,height:1}));if(a.aoData.length)for(v=0;v").css(f||e?{position:"absolute",top:0,left:0,height:1,right:0,overflow:"hidden"}:{}).append(h).appendTo(p);f&&g?h.width(g):f?(h.css("width","auto"),h.removeAttr("width"),h.width()").css("width",K(a)).appendTo(b||z.body);b=a[0].offsetWidth;a.remove();return b}function $b(a,b){var c=ac(a,b);if(0>c)return null;var d=a.aoData[c];return d.nTr?d.anCells[b]:k("
        ").html(S(a,c,b,"display"))[0]}function ac(a,b){for(var c,d=-1,e=-1,f=0,g=a.aoData.length;fd&&(d=c.length,e=f);return e}function K(a){return null===a?"0px":"number"==typeof a?0>a?"0px":a+"px":a.match(/\d$/)?a+"px":a}function pa(a){var b=[],c=a.aoColumns;var d=a.aaSortingFixed;var e=k.isPlainObject(d);var f=[];var g=function(m){m.length&&!Array.isArray(m[0])?f.push(m):k.merge(f,m)};Array.isArray(d)&&g(d);e&&d.pre&&g(d.pre);g(a.aaSorting);e&&d.post&&g(d.post);for(a=0;aI?1:0;if(0!==D)return"asc"===A.dir?D:-D}D=c[m];I=c[p];return DI?1:0}):g.sort(function(m,p){var t,v=h.length,x=e[m]._aSortData,r=e[p]._aSortData;for(t=0;tI?1:0})}a.bSorted=!0}function cc(a){var b=a.aoColumns,c=pa(a);a=a.oLanguage.oAria;for(var d=0,e=b.length;d/g,"");var l=f.nTh;l.removeAttribute("aria-sort");f.bSortable&&(0e?e+1:3))}e=0;for(f=d.length;ee?e+1:3))}a.aLastSort=d}function bc(a,b){var c=a.aoColumns[b],d=u.ext.order[c.sSortDataType],e;d&&(e=d.call(a.oInstance,a,b,ta(a,b)));for(var f,g=u.ext.type.order[c.sType+ +"-pre"],h=0,l=a.aoData.length;h= +f.length?[0,m[1]]:m)}));h.search!==q&&k.extend(a.oPreviousSearch,Wb(h.search));if(h.columns)for(d=0,e=h.columns.length;d=c&&(b=c-d);b-=b%d;if(-1===d||0>b)b=0;a._iDisplayStart=b}function fb(a,b){a=a.renderer;var c=u.ext.renderer[b];return k.isPlainObject(a)&&a[b]?c[a[b]]||c._:"string"===typeof a?c[a]||c._:c._}function P(a){return a.oFeatures.bServerSide?"ssp":a.ajax||a.sAjaxSource?"ajax":"dom"}function Ca(a,b){var c=ec.numbers_length,d=Math.floor(c/2); +b<=c?a=qa(0,b):a<=d?(a=qa(0,c-2),a.push("ellipsis"),a.push(b-1)):(a>=b-1-d?a=qa(b-(c-2),b):(a=qa(a-d+2,a+d-1),a.push("ellipsis"),a.push(b-1)),a.splice(0,0,"ellipsis"),a.splice(0,0,0));a.DT_el="span";return a}function Wa(a){k.each({num:function(b){return Ta(b,a)},"num-fmt":function(b){return Ta(b,a,rb)},"html-num":function(b){return Ta(b,a,Ua)},"html-num-fmt":function(b){return Ta(b,a,Ua,rb)}},function(b,c){L.type.order[b+a+"-pre"]=c;b.match(/^html\-/)&&(L.type.search[b+a]=L.type.search.html)})}function fc(a){return function(){var b= +[Sa(this[u.ext.iApiIndex])].concat(Array.prototype.slice.call(arguments));return u.ext.internal[a].apply(this,b)}}var u=function(a){this.$=function(f,g){return this.api(!0).$(f,g)};this._=function(f,g){return this.api(!0).rows(f,g).data()};this.api=function(f){return f?new C(Sa(this[L.iApiIndex])):new C(this)};this.fnAddData=function(f,g){var h=this.api(!0);f=Array.isArray(f)&&(Array.isArray(f[0])||k.isPlainObject(f[0]))?h.rows.add(f):h.row.add(f);(g===q||g)&&h.draw();return f.flatten().toArray()}; +this.fnAdjustColumnSizing=function(f){var g=this.api(!0).columns.adjust(),h=g.settings()[0],l=h.oScroll;f===q||f?g.draw(!1):(""!==l.sX||""!==l.sY)&&Fa(h)};this.fnClearTable=function(f){var g=this.api(!0).clear();(f===q||f)&&g.draw()};this.fnClose=function(f){this.api(!0).row(f).child.hide()};this.fnDeleteRow=function(f,g,h){var l=this.api(!0);f=l.rows(f);var n=f.settings()[0],m=n.aoData[f[0][0]];f.remove();g&&g.call(this,n,m);(h===q||h)&&l.draw();return m};this.fnDestroy=function(f){this.api(!0).destroy(f)}; +this.fnDraw=function(f){this.api(!0).draw(f)};this.fnFilter=function(f,g,h,l,n,m){n=this.api(!0);null===g||g===q?n.search(f,h,l,m):n.column(g).search(f,h,l,m);n.draw()};this.fnGetData=function(f,g){var h=this.api(!0);if(f!==q){var l=f.nodeName?f.nodeName.toLowerCase():"";return g!==q||"td"==l||"th"==l?h.cell(f,g).data():h.row(f).data()||null}return h.data().toArray()};this.fnGetNodes=function(f){var g=this.api(!0);return f!==q?g.row(f).node():g.rows().nodes().flatten().toArray()};this.fnGetPosition= +function(f){var g=this.api(!0),h=f.nodeName.toUpperCase();return"TR"==h?g.row(f).index():"TD"==h||"TH"==h?(f=g.cell(f).index(),[f.row,f.columnVisible,f.column]):null};this.fnIsOpen=function(f){return this.api(!0).row(f).child.isShown()};this.fnOpen=function(f,g,h){return this.api(!0).row(f).child(g,h).show().child()[0]};this.fnPageChange=function(f,g){f=this.api(!0).page(f);(g===q||g)&&f.draw(!1)};this.fnSetColumnVis=function(f,g,h){f=this.api(!0).column(f).visible(g);(h===q||h)&&f.columns.adjust().draw()}; +this.fnSettings=function(){return Sa(this[L.iApiIndex])};this.fnSort=function(f){this.api(!0).order(f).draw()};this.fnSortListener=function(f,g,h){this.api(!0).order.listener(f,g,h)};this.fnUpdate=function(f,g,h,l,n){var m=this.api(!0);h===q||null===h?m.row(g).data(f):m.cell(g,h).data(f);(n===q||n)&&m.columns.adjust();(l===q||l)&&m.draw();return 0};this.fnVersionCheck=L.fnVersionCheck;var b=this,c=a===q,d=this.length;c&&(a={});this.oApi=this.internal=L.internal;for(var e in u.ext.internal)e&&(this[e]= +fc(e));this.each(function(){var f={},g=1").appendTo(p));r.nTHead=E[0];var aa=p.children("tbody");0===aa.length&&(aa=k("
        spark.dynamicAllocation.shuffleTracking.enabled false - Experimental. Enables shuffle file tracking for executors, which allows dynamic allocation + Enables shuffle file tracking for executors, which allows dynamic allocation without the need for an external shuffle service. This option will try to keep alive executors that are storing shuffle data for active jobs. 3.0.0
        spark.kubernetes.memoryOverheadFactor0.1 + This sets the Memory Overhead Factor that will allocate memory to non-JVM memory, which includes off-heap memory allocations, non-JVM tasks, various systems processes, and tmpfs-based local directories when spark.kubernetes.local.dirs.tmpfs is true. For JVM-based jobs this value will default to 0.10 and 0.40 for non-JVM jobs. + This is done as non-JVM tasks need more non-JVM heap space and such tasks commonly fail with "Memory Overhead Exceeded" errors. This preempts this error with a higher default. + This will be overridden by the value set by spark.driver.memoryOverheadFactor and spark.executor.memoryOverheadFactor explicitly. + 2.4.0
        spark.kubernetes.pyspark.pythonVersion "3"
        spark.driver.log.layout%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex The layout for the driver logs that are synced to spark.driver.log.dfsDir. If this is not configured, it uses the layout for the first appender defined in log4j2.properties. If that is also not configured, driver logs diff --git a/external/avro/src/test/resources/log4j2.properties b/external/avro/src/test/resources/log4j2.properties index 31a235c5d8297..02746f58f4687 100644 --- a/external/avro/src/test/resources/log4j2.properties +++ b/external/avro/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Ignore messages below warning level from Jetty, because it's a bit verbose logger.jetty.name = org.spark-project.jetty diff --git a/external/docker-integration-tests/src/test/resources/log4j2.properties b/external/docker-integration-tests/src/test/resources/log4j2.properties index a6db7b73f6f31..bc2edf5a7714b 100644 --- a/external/docker-integration-tests/src/test/resources/log4j2.properties +++ b/external/docker-integration-tests/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Tests that launch java subprocesses can set the "test.appender" system property to # "console" to avoid having the child process's logs overwrite the unit test's @@ -33,7 +33,7 @@ appender.console.type = Console appender.console.name = console appender.console.target = SYSTEM_ERR appender.console.layout.type = PatternLayout -appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Ignore messages below warning level from Jetty, because it's a bit verbose logger.jetty.name = org.sparkproject.jetty diff --git a/external/kafka-0-10-sql/src/test/resources/log4j2.properties b/external/kafka-0-10-sql/src/test/resources/log4j2.properties index 4c2d2d0b053e3..cb454ef0498f0 100644 --- a/external/kafka-0-10-sql/src/test/resources/log4j2.properties +++ b/external/kafka-0-10-sql/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Ignore messages below warning level from Jetty, because it's a bit verbose logger.jetty.name = org.spark-project.jetty diff --git a/external/kafka-0-10-token-provider/src/test/resources/log4j2.properties b/external/kafka-0-10-token-provider/src/test/resources/log4j2.properties index 31a235c5d8297..02746f58f4687 100644 --- a/external/kafka-0-10-token-provider/src/test/resources/log4j2.properties +++ b/external/kafka-0-10-token-provider/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Ignore messages below warning level from Jetty, because it's a bit verbose logger.jetty.name = org.spark-project.jetty diff --git a/external/kafka-0-10/src/test/resources/log4j2.properties b/external/kafka-0-10/src/test/resources/log4j2.properties index 31a235c5d8297..02746f58f4687 100644 --- a/external/kafka-0-10/src/test/resources/log4j2.properties +++ b/external/kafka-0-10/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Ignore messages below warning level from Jetty, because it's a bit verbose logger.jetty.name = org.spark-project.jetty diff --git a/external/kinesis-asl/src/main/resources/log4j2.properties b/external/kinesis-asl/src/main/resources/log4j2.properties index 0c0904b088b97..9538957d5ddda 100644 --- a/external/kinesis-asl/src/main/resources/log4j2.properties +++ b/external/kinesis-asl/src/main/resources/log4j2.properties @@ -23,14 +23,14 @@ appender.file.type = File appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n%ex # Console appender appender.console.type = Console appender.console.name = STDOUT appender.console.target = SYSTEM_OUT appender.console.layout.type = PatternLayout -appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex # Settings to quiet third party logs that are too verbose logger.jetty1.name = org.sparkproject.jetty diff --git a/external/kinesis-asl/src/test/resources/log4j2.properties b/external/kinesis-asl/src/test/resources/log4j2.properties index 08f43461b96ae..5f89859463a20 100644 --- a/external/kinesis-asl/src/test/resources/log4j2.properties +++ b/external/kinesis-asl/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Ignore messages below warning level from Jetty, because it's a bit verbose logger.jetty.name = org.sparkproject.jetty diff --git a/graphx/src/test/resources/log4j2.properties b/graphx/src/test/resources/log4j2.properties index 08f43461b96ae..5f89859463a20 100644 --- a/graphx/src/test/resources/log4j2.properties +++ b/graphx/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Ignore messages below warning level from Jetty, because it's a bit verbose logger.jetty.name = org.sparkproject.jetty diff --git a/hadoop-cloud/src/test/resources/log4j2.properties b/hadoop-cloud/src/test/resources/log4j2.properties index 640ec2e630536..01a9cafafa85e 100644 --- a/hadoop-cloud/src/test/resources/log4j2.properties +++ b/hadoop-cloud/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Tests that launch java subprocesses can set the "test.appender" system property to # "console" to avoid having the child process's logs overwrite the unit test's @@ -33,7 +33,7 @@ appender.console.type = Console appender.console.name = STDERR appender.console.target = SYSTEM_ERR appender.console.layout.type = PatternLayout -appender.console.layout.pattern = %t: %m%n +appender.console.layout.pattern = %t: %m%n%ex # Ignore messages below warning level from Jetty, because it's a bit verbose logger.jetty.name = org.spark_project.jetty diff --git a/launcher/src/test/resources/log4j2.properties b/launcher/src/test/resources/log4j2.properties index f7f2d7cfe4c49..62d38fbf8f734 100644 --- a/launcher/src/test/resources/log4j2.properties +++ b/launcher/src/test/resources/log4j2.properties @@ -23,13 +23,13 @@ appender.file.type = File appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex appender.childproc.type = Console appender.childproc.name = childproc appender.childproc.target = SYSTEM_ERR appender.childproc.layout.type = PatternLayout -appender.childproc.layout.pattern = %t: %m%n +appender.childproc.layout.pattern = %t: %m%n%ex appender.outputredirtest.type = LogAppender appender.outputredirtest.name = outputredirtest diff --git a/mllib/src/test/resources/log4j2.properties b/mllib/src/test/resources/log4j2.properties index 101a732ea970e..4c0b6e675279a 100644 --- a/mllib/src/test/resources/log4j2.properties +++ b/mllib/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Ignore messages below warning level from Jetty, because it's a bit verbose logger.jetty.name = org.sparkproject.jetty diff --git a/repl/src/test/resources/log4j2.properties b/repl/src/test/resources/log4j2.properties index 2654a3983a64f..b644dd5f7b9b2 100644 --- a/repl/src/test/resources/log4j2.properties +++ b/repl/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = file appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Ignore messages below warning level from Jetty, because it's a bit verbose logger.jetty.name = org.sparkproject.jetty diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala index 2ff2120625bdc..69e1273f5fa98 100644 --- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala +++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala @@ -285,7 +285,7 @@ class ReplSuite extends SparkFunSuite with BeforeAndAfterAll { |appender.console.target = SYSTEM_ERR |appender.console.follow = true |appender.console.layout.type = PatternLayout - |appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + |appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex | |# Set the log level for this class to ERROR same as the default setting. |logger.repl.name = org.apache.spark.repl.Main diff --git a/resource-managers/kubernetes/core/src/test/resources/log4j2.properties b/resource-managers/kubernetes/core/src/test/resources/log4j2.properties index 712e6d6c30cb6..210405374f099 100644 --- a/resource-managers/kubernetes/core/src/test/resources/log4j2.properties +++ b/resource-managers/kubernetes/core/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Ignore messages below warning level from a few verbose libraries. logger.jersey.name = com.sun.jersey diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/log-config-test-log4j.properties b/resource-managers/kubernetes/integration-tests/src/test/resources/log-config-test-log4j.properties index 17b8d598ac6f8..4aeea88625190 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/resources/log-config-test-log4j.properties +++ b/resource-managers/kubernetes/integration-tests/src/test/resources/log-config-test-log4j.properties @@ -23,4 +23,4 @@ appender.console.type = Console appender.console.name = console appender.console.target = SYSTEM_ERR appender.console.layout.type = PatternLayout -appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c: %m%n +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c: %m%n%ex diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/log4j2.properties b/resource-managers/kubernetes/integration-tests/src/test/resources/log4j2.properties index 75ac84ac6e1be..e7680d87bdf1d 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/resources/log4j2.properties +++ b/resource-managers/kubernetes/integration-tests/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/integration-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Ignore messages below warning level from a few verbose libraries. logger.jersey.name = com.sun.jersey diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala index 51ea1307236c8..5d1a57fb46ef2 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala @@ -47,7 +47,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite => |appender.console.name = console |appender.console.target = SYSTEM_OUT |appender.console.layout.type = PatternLayout - |appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + |appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex | |logger.spark.name = org.apache.spark |logger.spark.level = debug diff --git a/resource-managers/mesos/src/test/resources/log4j2.properties b/resource-managers/mesos/src/test/resources/log4j2.properties index a0d309ccb1d40..88847106120cc 100644 --- a/resource-managers/mesos/src/test/resources/log4j2.properties +++ b/resource-managers/mesos/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Ignore messages below warning level from Jetty, because it's a bit verbose logger.jetty.name = org.sparkproject.jetty diff --git a/resource-managers/yarn/src/test/resources/log4j2.properties b/resource-managers/yarn/src/test/resources/log4j2.properties index 96107fb3a3aef..aa93bc14552b2 100644 --- a/resource-managers/yarn/src/test/resources/log4j2.properties +++ b/resource-managers/yarn/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Ignore messages below warning level from a few verbose libraries. logger.jersey.name = com.sun.jersey diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala index f92bcdd677540..87ea44255ccdb 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala @@ -53,7 +53,7 @@ abstract class BaseYarnClusterSuite |appender.console.name = console |appender.console.target = SYSTEM_ERR |appender.console.layout.type = PatternLayout - |appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + |appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex |logger.jetty.name = org.sparkproject.jetty |logger.jetty.level = warn |logger.eclipse.name = org.eclipse.jetty diff --git a/sql/catalyst/src/test/resources/log4j2.properties b/sql/catalyst/src/test/resources/log4j2.properties index 08f43461b96ae..5f89859463a20 100644 --- a/sql/catalyst/src/test/resources/log4j2.properties +++ b/sql/catalyst/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Ignore messages below warning level from Jetty, because it's a bit verbose logger.jetty.name = org.sparkproject.jetty diff --git a/sql/core/src/test/resources/log4j2.properties b/sql/core/src/test/resources/log4j2.properties index 2ab43f896fc31..b1f3a726a92a4 100644 --- a/sql/core/src/test/resources/log4j2.properties +++ b/sql/core/src/test/resources/log4j2.properties @@ -25,7 +25,7 @@ appender.console.type = Console appender.console.name = STDOUT appender.console.target = SYSTEM_OUT appender.console.layout.type = PatternLayout -appender.console.layout.pattern = %d{HH:mm:ss.SSS} %p %c: %m%n +appender.console.layout.pattern = %d{HH:mm:ss.SSS} %p %c: %m%n%ex appender.console.filter.threshold.type = ThresholdFilter appender.console.filter.threshold.level = warn @@ -34,7 +34,7 @@ appender.file.type = File appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Set the logger level of File Appender to WARN appender.file.filter.threshold.type = ThresholdFilter diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIServiceUtils.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIServiceUtils.java index a371b1371703b..97ea1ca0d4c9c 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIServiceUtils.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIServiceUtils.java @@ -29,9 +29,9 @@ public class CLIServiceUtils { private static final char SEARCH_STRING_ESCAPE = '\\'; public static final StringLayout verboseLayout = PatternLayout.newBuilder().withPattern( - "%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n").build(); + "%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n%ex").build(); public static final StringLayout nonVerboseLayout = PatternLayout.newBuilder().withPattern( - "%-5p : %m%n").build(); + "%-5p : %m%n%ex").build(); /** * Convert a SQL search pattern into an equivalent Java Regex. diff --git a/sql/hive-thriftserver/src/test/resources/log4j2.properties b/sql/hive-thriftserver/src/test/resources/log4j2.properties index 939335bf3ac8d..5a3681a2a7ec8 100644 --- a/sql/hive-thriftserver/src/test/resources/log4j2.properties +++ b/sql/hive-thriftserver/src/test/resources/log4j2.properties @@ -25,7 +25,7 @@ appender.console.type = Console appender.console.name = STDOUT appender.console.target = SYSTEM_OUT appender.console.layout.type = PatternLayout -appender.console.layout.pattern = %d{HH:mm:ss.SSS} %p %c: %m%n +appender.console.layout.pattern = %d{HH:mm:ss.SSS} %p %c: %m%n%ex appender.console.filter.1.type = Filters @@ -43,7 +43,7 @@ appender.file.type = File appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex appender.file.filter.1.type = Filters diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala index bcb8ef0cdfb61..15cc04f5bd594 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala @@ -1226,7 +1226,7 @@ abstract class HiveThriftServer2TestBase extends SparkFunSuite with BeforeAndAft |appender.console.name = console |appender.console.target = SYSTEM_ERR |appender.console.layout.type = PatternLayout - |appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + |appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex """.stripMargin, new File(s"$tempLog4jConf/log4j2.properties"), StandardCharsets.UTF_8) diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala index 5d94be38de0c5..cd5bb1f6283f5 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala @@ -80,7 +80,7 @@ class UISeleniumSuite |appender.console.name = console |appender.console.target = SYSTEM_ERR |appender.console.layout.type = PatternLayout - |appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n + |appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex """.stripMargin, new File(s"$tempLog4jConf/log4j2.properties"), StandardCharsets.UTF_8) diff --git a/sql/hive/src/test/resources/log4j2.properties b/sql/hive/src/test/resources/log4j2.properties index 16b8924789130..cf9be6c68a508 100644 --- a/sql/hive/src/test/resources/log4j2.properties +++ b/sql/hive/src/test/resources/log4j2.properties @@ -25,7 +25,7 @@ appender.console.type = Console appender.console.name = STDOUT appender.console.target = SYSTEM_OUT appender.console.layout.type = PatternLayout -appender.console.layout.pattern = %d{HH:mm:ss.SSS} %p %c: %m%n +appender.console.layout.pattern = %d{HH:mm:ss.SSS} %p %c: %m%n%ex appender.console.filter.threshold.type = ThresholdFilter appender.console.filter.threshold.level = warn @@ -34,7 +34,7 @@ appender.file.type = File appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Set the logger level of File Appender to WARN appender.file.filter.threshold.type = ThresholdFilter diff --git a/streaming/src/test/resources/log4j2.properties b/streaming/src/test/resources/log4j2.properties index 08f43461b96ae..5f89859463a20 100644 --- a/streaming/src/test/resources/log4j2.properties +++ b/streaming/src/test/resources/log4j2.properties @@ -24,7 +24,7 @@ appender.file.name = File appender.file.fileName = target/unit-tests.log appender.file.append = true appender.file.layout.type = PatternLayout -appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n +appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex # Ignore messages below warning level from Jetty, because it's a bit verbose logger.jetty.name = org.sparkproject.jetty From bc4aab5c26d7f1f82697e23a898cce57642ea38b Mon Sep 17 00:00:00 2001 From: beobest2 Date: Thu, 2 Jun 2022 21:48:54 +0300 Subject: [PATCH 316/535] [SPARK-39295][DOCS][PYTHON][3.3] Improve documentation of pandas API supported list ### What changes were proposed in this pull request? The description provided in the supported pandas API list document or the code comment needs improvement. ### Why are the changes needed? To improve document readability for users. ### Does this PR introduce _any_ user-facing change? Yes, the "Supported pandas APIs" page has changed as below. Screen Shot 2022-06-02 at 5 10 39 AM ### How was this patch tested? Manually check the links in the documents & the existing doc build should be passed. Closes #36749 from beobest2/SPARK-39295_backport. Authored-by: beobest2 Signed-off-by: Max Gekk --- .../pandas_on_spark/supported_pandas_api.rst | 62 +++++++++---------- 1 file changed, 30 insertions(+), 32 deletions(-) diff --git a/python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst b/python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst index 4e89fc417d12a..c9a620a883342 100644 --- a/python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst +++ b/python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst @@ -16,36 +16,34 @@ under the License. -===================== -Supported pandas APIs -===================== +==================== +Supported pandas API +==================== .. currentmodule:: pyspark.pandas The following table shows the pandas APIs that implemented or non-implemented from pandas API on -Spark. +Spark. Some pandas API do not implement full parameters, so the third column shows missing +parameters for each API. -Some pandas APIs do not implement full parameters, so the third column shows missing parameters for -each API. +* 'Y' in the second column means it's implemented including its whole parameter. +* 'N' means it's not implemented yet. +* 'P' means it's partially implemented with the missing of some parameters. -'Y' in the second column means it's implemented including its whole parameter. -'N' means it's not implemented yet. -'P' means it's partially implemented with the missing of some parameters. +All API in the list below computes the data with distributed execution except the ones that require +the local execution by design. For example, `DataFrame.to_numpy() `__ +requires to collect the data to the driver side. If there is non-implemented pandas API or parameter you want, you can create an `Apache Spark -JIRA `__ to request or to contribute by your -own. +JIRA `__ to request or to contribute by +your own. -The API list is updated based on the `pandas 1.3 official API -reference `__. +The API list is updated based on the `pandas 1.3 official API reference +`__. -All implemented APIs listed here are distributed except the ones that requires the local -computation by design. For example, `DataFrame.to_numpy() `__ requires to collect the data to the driver side. - -Supported DataFrame APIs ------------------------- +DataFrame API +------------- .. currentmodule:: pyspark.pandas.DataFrame @@ -528,8 +526,8 @@ Supported DataFrame APIs | :func:`xs` | P | ``drop_level`` | +--------------------------------------------+-------------+--------------------------------------+ -Supported I/O APIs ------------------- +I/O API +------- .. currentmodule:: pyspark.pandas @@ -631,8 +629,8 @@ Supported I/O APIs | DataFrame.to_stata | N | | +--------------------------------+--------------------+-------------------------------------------+ -Supported General Function APIs -------------------------------- +General Function API +-------------------- +-----------------------------------------+--------------+----------------------------------------+ | API | Implemented | Missing parameters | @@ -694,8 +692,8 @@ Supported General Function APIs | eval | N | | +-----------------------------------------+--------------+----------------------------------------+ -Supported Series APIs ---------------------- +Series API +---------- .. currentmodule:: pyspark.pandas.Series @@ -1162,8 +1160,8 @@ Supported Series APIs | :func:`xs` | P | ``axis``, ``drop_level`` | +---------------------------------+-------------------+-------------------------------------------+ -Supported Index APIs --------------------- +Index API +--------- .. currentmodule:: pyspark.pandas.Index @@ -1381,8 +1379,8 @@ Supported Index APIs | where | N | | +-----------------------------------------+-------------+-----------------------------------------+ -Supported Window APIs ---------------------- +Window API +---------- .. currentmodule:: pyspark.pandas.window @@ -1510,8 +1508,8 @@ Supported Window APIs | Expanding.window | N | | +--------------------------------------------------------------+-------------+--------------------+ -Supported GroupBy APIs ----------------------- +GroupBy API +----------- .. currentmodule:: pyspark.pandas.groupby From 8f599bae52e7578341f03ec6ec2ae8f08c5ef477 Mon Sep 17 00:00:00 2001 From: Gengliang Wang Date: Fri, 3 Jun 2022 09:42:44 +0900 Subject: [PATCH 317/535] [SPARK-39367][DOCS][SQL] Review and fix issues in Scala/Java API docs of SQL module ### What changes were proposed in this pull request? Compare the 3.3.0 API doc with the latest release version 3.2.1. Fix the following issues: * Add missing Since annotation for new APIs * Remove the leaking class/object in API doc ### Why are the changes needed? Improve API docs ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UT Closes #36754 from gengliangwang/apiDoc. Authored-by: Gengliang Wang Signed-off-by: Hyukjin Kwon (cherry picked from commit 4c7888dd9159dc203628b0d84f0ee2f90ab4bf13) Signed-off-by: Hyukjin Kwon --- .../src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala | 2 +- .../spark/sql/connector/util/V2ExpressionSQLBuilder.java | 2 ++ .../apache/spark/sql/connector/write/RowLevelOperation.java | 2 ++ .../main/java/org/apache/spark/sql/util/NumericHistogram.java | 4 ++++ .../org/apache/spark/sql/vectorized/ColumnarBatchRow.java | 2 ++ .../org/apache/spark/sql/errors/QueryCompilationErrors.scala | 2 +- .../scala/org/apache/spark/sql/errors/QueryErrorsBase.scala | 2 +- .../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 2 +- .../org/apache/spark/sql/errors/QueryParsingErrors.scala | 2 +- .../main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala | 2 +- 10 files changed, 16 insertions(+), 6 deletions(-) diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala index ef9d22f35d048..de3626b1f3147 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala @@ -206,7 +206,7 @@ private[sql] object AvroUtils extends Logging { } /** Wrapper for a pair of matched fields, one Catalyst and one corresponding Avro field. */ - case class AvroMatchedField( + private[sql] case class AvroMatchedField( catalystField: StructField, catalystPosition: Int, avroField: Schema.Field) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java index c9dfa2003e3c1..3fa6480028797 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java @@ -30,6 +30,8 @@ /** * The builder to generate SQL from V2 expressions. + * + * @since 3.3.0 */ public class V2ExpressionSQLBuilder { diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperation.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperation.java index 04bbab11e10d7..7acd27759a1ba 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperation.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperation.java @@ -34,6 +34,8 @@ public interface RowLevelOperation { /** * A row-level SQL command. + * + * @since 3.3.0 */ enum Command { DELETE, UPDATE, MERGE diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/util/NumericHistogram.java b/sql/catalyst/src/main/java/org/apache/spark/sql/util/NumericHistogram.java index 987c18e4129c8..007db1c483bed 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/util/NumericHistogram.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/util/NumericHistogram.java @@ -44,10 +44,14 @@ * 4. In Hive's code, the method [[merge()] pass a serialized histogram, * in Spark, this method pass a deserialized histogram. * Here we change the code about merge bins. + * + * @since 3.3.0 */ public class NumericHistogram { /** * The Coord class defines a histogram bin, which is just an (x,y) pair. + * + * @since 3.3.0 */ public static class Coord implements Comparable { public double x; diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java index 7f841266008f8..32f6e71f77aac 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java @@ -25,6 +25,8 @@ /** * This class wraps an array of {@link ColumnVector} and provides a row view. + * + * @since 3.3.0 */ @DeveloperApi public final class ColumnarBatchRow extends InternalRow { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 70ef344fda59b..ebf40b4b5d0bb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -46,7 +46,7 @@ import org.apache.spark.sql.types._ * As commands are executed eagerly, this also includes errors thrown during the execution of * commands, which users can see immediately. */ -object QueryCompilationErrors extends QueryErrorsBase { +private[sql] object QueryCompilationErrors extends QueryErrorsBase { def groupingIDMismatchError(groupingID: GroupingID, groupByExprs: Seq[Expression]): Throwable = { new AnalysisException( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala index 758a0d34b2689..343a1561dcea3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala @@ -42,7 +42,7 @@ import org.apache.spark.sql.types.{DataType, DoubleType, FloatType} * 7. SQL expressions shall be wrapped by double quotes. * For example: "earnings + 1". */ -trait QueryErrorsBase { +private[sql] trait QueryErrorsBase { // Converts an error class parameter to its SQL representation def toSQLValue(v: Any, t: DataType): String = Literal.create(v, t) match { case Literal(null, _) => "NULL" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 22dc100a43476..ce5e65a736d2b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -66,7 +66,7 @@ import org.apache.spark.util.CircularBuffer * This does not include exceptions thrown during the eager execution of commands, which are * grouped into [[QueryCompilationErrors]]. */ -object QueryExecutionErrors extends QueryErrorsBase { +private[sql] object QueryExecutionErrors extends QueryErrorsBase { def cannotEvaluateExpressionError(expression: Expression): Throwable = { new SparkUnsupportedOperationException(errorClass = "INTERNAL_ERROR", diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index 32caa3fa7a24a..6e8124c89e2d6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -29,7 +29,7 @@ import org.apache.spark.sql.types.StringType * Object for grouping all error messages of the query parsing. * Currently it includes all ParseException. */ -object QueryParsingErrors extends QueryErrorsBase { +private[sql] object QueryParsingErrors extends QueryErrorsBase { def invalidInsertIntoError(ctx: InsertIntoContext): Throwable = { new ParseException("Invalid InsertIntoContext", ctx) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index e1883e4e7f4b8..1e65542946af7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -221,7 +221,7 @@ abstract class JdbcDialect extends Serializable with Logging{ case _ => value } - class JDBCSQLBuilder extends V2ExpressionSQLBuilder { + private[jdbc] class JDBCSQLBuilder extends V2ExpressionSQLBuilder { override def visitLiteral(literal: Literal[_]): String = { compileValue( CatalystTypeConverters.convertToScala(literal.value(), literal.dataType())).toString From 4a0f0ff6c22b85cb0fc1eef842da8dbe4c90543a Mon Sep 17 00:00:00 2001 From: Ole Sasse Date: Fri, 3 Jun 2022 09:12:26 +0300 Subject: [PATCH 318/535] [SPARK-39259][SQL][3.3] Evaluate timestamps consistently in subqueries ### What changes were proposed in this pull request? Apply the optimizer rule ComputeCurrentTime consistently across subqueries. This is a backport of https://github.com/apache/spark/pull/36654. ### Why are the changes needed? At the moment timestamp functions like now() can return different values within a query if subqueries are involved ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? A new unit test was added Closes #36752 from olaky/SPARK-39259-spark_3_3. Authored-by: Ole Sasse Signed-off-by: Max Gekk --- .../catalyst/optimizer/finishAnalysis.scala | 41 +++++---- .../spark/sql/catalyst/plans/QueryPlan.scala | 11 ++- .../optimizer/ComputeCurrentTimeSuite.scala | 89 ++++++++++++++----- 3 files changed, 95 insertions(+), 46 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala index ef9c4b9af40d3..242c799dd226e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala @@ -17,14 +17,16 @@ package org.apache.spark.sql.catalyst.optimizer -import scala.collection.mutable +import java.time.{Instant, LocalDateTime} import org.apache.spark.sql.catalyst.CurrentUserContext.CURRENT_USER import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.catalyst.trees.TreePattern._ -import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ} +import org.apache.spark.sql.catalyst.trees.TreePatternBits +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, instantToMicros, localDateTimeToMicros} import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.types._ import org.apache.spark.util.Utils @@ -73,29 +75,30 @@ object RewriteNonCorrelatedExists extends Rule[LogicalPlan] { */ object ComputeCurrentTime extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = { - val currentDates = mutable.Map.empty[String, Literal] - val timeExpr = CurrentTimestamp() - val timestamp = timeExpr.eval(EmptyRow).asInstanceOf[Long] - val currentTime = Literal.create(timestamp, timeExpr.dataType) + val instant = Instant.now() + val currentTimestampMicros = instantToMicros(instant) + val currentTime = Literal.create(currentTimestampMicros, TimestampType) val timezone = Literal.create(conf.sessionLocalTimeZone, StringType) - val localTimestamps = mutable.Map.empty[String, Literal] - plan.transformAllExpressionsWithPruning(_.containsPattern(CURRENT_LIKE)) { - case currentDate @ CurrentDate(Some(timeZoneId)) => - currentDates.getOrElseUpdate(timeZoneId, { - Literal.create(currentDate.eval().asInstanceOf[Int], DateType) - }) - case CurrentTimestamp() | Now() => currentTime - case CurrentTimeZone() => timezone - case localTimestamp @ LocalTimestamp(Some(timeZoneId)) => - localTimestamps.getOrElseUpdate(timeZoneId, { - Literal.create(localTimestamp.eval().asInstanceOf[Long], TimestampNTZType) - }) + def transformCondition(treePatternbits: TreePatternBits): Boolean = { + treePatternbits.containsPattern(CURRENT_LIKE) + } + + plan.transformDownWithSubqueries(transformCondition) { + case subQuery => + subQuery.transformAllExpressionsWithPruning(transformCondition) { + case cd: CurrentDate => + Literal.create(DateTimeUtils.microsToDays(currentTimestampMicros, cd.zoneId), DateType) + case CurrentTimestamp() | Now() => currentTime + case CurrentTimeZone() => timezone + case localTimestamp: LocalTimestamp => + val asDateTime = LocalDateTime.ofInstant(instant, localTimestamp.zoneId) + Literal.create(localDateTimeToMicros(asDateTime), TimestampNTZType) + } } } } - /** * Replaces the expression of CurrentDatabase with the current database name. * Replaces the expression of CurrentCatalog with the current catalog name. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index 0f8df5df3764a..d0283f4d36720 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -454,7 +454,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] * to rewrite the whole plan, include its subqueries, in one go. */ def transformWithSubqueries(f: PartialFunction[PlanType, PlanType]): PlanType = - transformDownWithSubqueries(f) + transformDownWithSubqueries(AlwaysProcess.fn, UnknownRuleId)(f) /** * Returns a copy of this node where the given partial function has been recursively applied @@ -479,7 +479,10 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] * first to this node, then this node's subqueries and finally this node's children. * When the partial function does not apply to a given node, it is left unchanged. */ - def transformDownWithSubqueries(f: PartialFunction[PlanType, PlanType]): PlanType = { + def transformDownWithSubqueries( + cond: TreePatternBits => Boolean = AlwaysProcess.fn, ruleId: RuleId = UnknownRuleId) + (f: PartialFunction[PlanType, PlanType]) +: PlanType = { val g: PartialFunction[PlanType, PlanType] = new PartialFunction[PlanType, PlanType] { override def isDefinedAt(x: PlanType): Boolean = true @@ -487,13 +490,13 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] val transformed = f.applyOrElse[PlanType, PlanType](plan, identity) transformed transformExpressionsDown { case planExpression: PlanExpression[PlanType] => - val newPlan = planExpression.plan.transformDownWithSubqueries(f) + val newPlan = planExpression.plan.transformDownWithSubqueries(cond, ruleId)(f) planExpression.withNewPlan(newPlan) } } } - transformDown(g) + transformDownWithPruning(cond, ruleId)(g) } /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala index 9b04dcddfb2ce..c034906c09bb6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala @@ -19,10 +19,13 @@ package org.apache.spark.sql.catalyst.optimizer import java.time.{LocalDateTime, ZoneId} +import scala.collection.JavaConverters.mapAsScalaMap +import scala.concurrent.duration._ + import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentDate, CurrentTimestamp, CurrentTimeZone, Literal, LocalTimestamp} +import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentDate, CurrentTimestamp, CurrentTimeZone, InSubquery, ListQuery, Literal, LocalTimestamp, Now} import org.apache.spark.sql.catalyst.plans.PlanTest -import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project} +import org.apache.spark.sql.catalyst.plans.logical.{Filter, LocalRelation, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.internal.SQLConf @@ -41,11 +44,7 @@ class ComputeCurrentTimeSuite extends PlanTest { val plan = Optimize.execute(in.analyze).asInstanceOf[Project] val max = (System.currentTimeMillis() + 1) * 1000 - val lits = new scala.collection.mutable.ArrayBuffer[Long] - plan.transformAllExpressions { case e: Literal => - lits += e.value.asInstanceOf[Long] - e - } + val lits = literals[Long](plan) assert(lits.size == 2) assert(lits(0) >= min && lits(0) <= max) assert(lits(1) >= min && lits(1) <= max) @@ -59,11 +58,7 @@ class ComputeCurrentTimeSuite extends PlanTest { val plan = Optimize.execute(in.analyze).asInstanceOf[Project] val max = DateTimeUtils.currentDate(ZoneId.systemDefault()) - val lits = new scala.collection.mutable.ArrayBuffer[Int] - plan.transformAllExpressions { case e: Literal => - lits += e.value.asInstanceOf[Int] - e - } + val lits = literals[Int](plan) assert(lits.size == 2) assert(lits(0) >= min && lits(0) <= max) assert(lits(1) >= min && lits(1) <= max) @@ -73,13 +68,9 @@ class ComputeCurrentTimeSuite extends PlanTest { test("SPARK-33469: Add current_timezone function") { val in = Project(Seq(Alias(CurrentTimeZone(), "c")()), LocalRelation()) val plan = Optimize.execute(in.analyze).asInstanceOf[Project] - val lits = new scala.collection.mutable.ArrayBuffer[String] - plan.transformAllExpressions { case e: Literal => - lits += e.value.asInstanceOf[UTF8String].toString - e - } + val lits = literals[UTF8String](plan) assert(lits.size == 1) - assert(lits.head == SQLConf.get.sessionLocalTimeZone) + assert(lits.head == UTF8String.fromString(SQLConf.get.sessionLocalTimeZone)) } test("analyzer should replace localtimestamp with literals") { @@ -92,14 +83,66 @@ class ComputeCurrentTimeSuite extends PlanTest { val plan = Optimize.execute(in.analyze).asInstanceOf[Project] val max = DateTimeUtils.localDateTimeToMicros(LocalDateTime.now(zoneId)) - val lits = new scala.collection.mutable.ArrayBuffer[Long] - plan.transformAllExpressions { case e: Literal => - lits += e.value.asInstanceOf[Long] - e - } + val lits = literals[Long](plan) assert(lits.size == 2) assert(lits(0) >= min && lits(0) <= max) assert(lits(1) >= min && lits(1) <= max) assert(lits(0) == lits(1)) } + + test("analyzer should use equal timestamps across subqueries") { + val timestampInSubQuery = Project(Seq(Alias(LocalTimestamp(), "timestamp1")()), LocalRelation()) + val listSubQuery = ListQuery(timestampInSubQuery) + val valueSearchedInSubQuery = Seq(Alias(LocalTimestamp(), "timestamp2")()) + val inFilterWithSubQuery = InSubquery(valueSearchedInSubQuery, listSubQuery) + val input = Project(Nil, Filter(inFilterWithSubQuery, LocalRelation())) + + val plan = Optimize.execute(input.analyze).asInstanceOf[Project] + + val lits = literals[Long](plan) + assert(lits.size == 3) // transformDownWithSubqueries covers the inner timestamp twice + assert(lits.toSet.size == 1) + } + + test("analyzer should use consistent timestamps for different timezones") { + val localTimestamps = mapAsScalaMap(ZoneId.SHORT_IDS) + .map { case (zoneId, _) => Alias(LocalTimestamp(Some(zoneId)), zoneId)() }.toSeq + val input = Project(localTimestamps, LocalRelation()) + + val plan = Optimize.execute(input).asInstanceOf[Project] + + val lits = literals[Long](plan) + assert(lits.size === localTimestamps.size) + // there are timezones with a 30 or 45 minute offset + val offsetsFromQuarterHour = lits.map( _ % Duration(15, MINUTES).toMicros).toSet + assert(offsetsFromQuarterHour.size == 1) + } + + test("analyzer should use consistent timestamps for different timestamp functions") { + val differentTimestamps = Seq( + Alias(CurrentTimestamp(), "currentTimestamp")(), + Alias(Now(), "now")(), + Alias(LocalTimestamp(Some("PLT")), "localTimestampWithTimezone")() + ) + val input = Project(differentTimestamps, LocalRelation()) + + val plan = Optimize.execute(input).asInstanceOf[Project] + + val lits = literals[Long](plan) + assert(lits.size === differentTimestamps.size) + // there are timezones with a 30 or 45 minute offset + val offsetsFromQuarterHour = lits.map( _ % Duration(15, MINUTES).toMicros).toSet + assert(offsetsFromQuarterHour.size == 1) + } + + private def literals[T](plan: LogicalPlan): Seq[T] = { + val literals = new scala.collection.mutable.ArrayBuffer[T] + plan.transformWithSubqueries { case subQuery => + subQuery.transformAllExpressions { case expression: Literal => + literals += expression.value.asInstanceOf[T] + expression + } + } + literals.asInstanceOf[Seq[T]] + } } From 61d22b6f313c20de1b65a595e88b6f5bd9595299 Mon Sep 17 00:00:00 2001 From: Yuanjian Li Date: Fri, 3 Jun 2022 17:49:01 +0900 Subject: [PATCH 319/535] [SPARK-39371][DOCS][CORE] Review and fix issues in Scala/Java API docs of Core module Compare the 3.3.0 API doc with the latest release version 3.2.1. Fix the following issues: * Add missing Since annotation for new APIs * Remove the leaking class/object in API doc Improve API docs No Existing UT Closes #36757 from xuanyuanking/doc. Authored-by: Yuanjian Li Signed-off-by: Hyukjin Kwon (cherry picked from commit 1fbb1d46feb992c3441f2a4f2c5d5179da465d4b) Signed-off-by: Hyukjin Kwon --- .../main/scala/org/apache/spark/errors/SparkCoreErrors.scala | 2 +- .../BlockSavedOnDecommissionedBlockManagerException.scala | 2 +- .../main/java/org/apache/spark/launcher/AbstractLauncher.java | 2 +- .../main/java/org/apache/spark/launcher/InProcessLauncher.java | 2 +- .../main/java/org/apache/spark/launcher/JavaModuleOptions.java | 2 ++ 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala b/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala index aecef8ed2d63d..1da0288446233 100644 --- a/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala +++ b/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala @@ -30,7 +30,7 @@ import org.apache.spark.storage.{BlockId, BlockManagerId, BlockNotFoundException /** * Object for grouping error messages from (most) exceptions thrown during query execution. */ -object SparkCoreErrors { +private[spark] object SparkCoreErrors { def unexpectedPy4JServerError(other: Object): Throwable = { new RuntimeException(s"Unexpected Py4J server ${other.getClass}") } diff --git a/core/src/main/scala/org/apache/spark/storage/BlockSavedOnDecommissionedBlockManagerException.scala b/core/src/main/scala/org/apache/spark/storage/BlockSavedOnDecommissionedBlockManagerException.scala index 4684d9c67754d..21a022864bb34 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockSavedOnDecommissionedBlockManagerException.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockSavedOnDecommissionedBlockManagerException.scala @@ -17,5 +17,5 @@ package org.apache.spark.storage -class BlockSavedOnDecommissionedBlockManagerException(blockId: BlockId) +private[spark] class BlockSavedOnDecommissionedBlockManagerException(blockId: BlockId) extends Exception(s"Block $blockId cannot be saved on decommissioned executor") diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractLauncher.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractLauncher.java index 8a1256f73416e..80b71e53075f3 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/AbstractLauncher.java +++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractLauncher.java @@ -26,7 +26,7 @@ /** * Base class for launcher implementations. * - * @since Spark 2.3.0 + * @since 2.3.0 */ public abstract class AbstractLauncher> { diff --git a/launcher/src/main/java/org/apache/spark/launcher/InProcessLauncher.java b/launcher/src/main/java/org/apache/spark/launcher/InProcessLauncher.java index 688e1f763c205..6867518b3212d 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/InProcessLauncher.java +++ b/launcher/src/main/java/org/apache/spark/launcher/InProcessLauncher.java @@ -37,7 +37,7 @@ * driver memory or configs which modify the driver's class path) do not take effect. Logging * configuration is also inherited from the parent application. * - * @since Spark 2.3.0 + * @since 2.3.0 */ public class InProcessLauncher extends AbstractLauncher { diff --git a/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java b/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java index c7d3df99c6e3d..978466cd77ccd 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java +++ b/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java @@ -21,6 +21,8 @@ * This helper class is used to place the all `--add-opens` options * required by Spark when using Java 17. `DEFAULT_MODULE_OPTIONS` has added * `-XX:+IgnoreUnrecognizedVMOptions` to be compatible with Java 8 and Java 11. + * + * @since 3.3.0 */ public class JavaModuleOptions { private static final String[] DEFAULT_MODULE_OPTIONS = { From 4e3599bc11a1cb0ea9fc819e7f752d2228e54baf Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Fri, 3 Jun 2022 09:20:31 +0000 Subject: [PATCH 320/535] Preparing Spark release v3.3.0-rc4 --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 0e449e841cf6d..9479bb3bf87df 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.1 +Version: 3.3.0 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index d12f2ad73fabd..2e9c4d9960b14 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 842d63f5d3811..2a9acfa335e71 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index f7d187bf9527d..7b17e625d7599 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 53f38df885102..c5c920e774782 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 845f6659407bd..697b5a3928e58 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 8e1590891933b..ad2db11370ae7 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1987c13328559..1a7bdee70f3bc 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index c7e7be1e3bbf1..66dc93de0599e 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index ac644130a61e2..219ceca6648d8 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 9a6fe2d313fde..4966db6b4a8af 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.3.1-SNAPSHOT -SPARK_VERSION_SHORT: 3.3.1 +SPARK_VERSION: 3.3.0 +SPARK_VERSION_SHORT: 3.3.0 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.15" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.3.1"] + 'facetFilters': ["version:3.3.0"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index e97f3b40cb2bd..42e58f2726df1 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index 578854e3eaa9a..5aaa91cfdf20d 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 95e1ce74ca172..36309bb417362 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 0b803c5d3864a..072cedaa594c8 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 95726829bcbbd..b9063b543f512 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 25e7e25ae25b6..6f6a51a972c73 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 3ba16b7b838a2..95fd080383995 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 6cee275e6adc7..33cf30ff803e7 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index ad23da0d7f249..79b2e8f2a5a47 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 6de1f9eee532c..647d0c3f87552 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 95c8c312eb0e2..562ddc8dcc23c 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 4d87bd2730e3b..08bcae6e0f53f 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index 889f0b5a92e08..beceaecd31a1c 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 9b7b0370d3b4d..584a5df0a4a35 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 04a68a47a4f45..42bab72668c00 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/pom.xml b/pom.xml index 32bd40ee83461..5f7b1b0b9dc46 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 pom Spark Project Parent POM https://spark.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index 2e5f8bf5395a3..980b64c4dca8c 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index d1d6a449bd5dc..f3ec959370807 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 40e578f9a7eba..66ae5adfbd19f 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index f4ac384409174..1472bd0fcb1a2 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 61d5adec0e7cc..77811f35692d8 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 970d42ba4590e..ceba171e41134 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 0cfb5f616cd24..34137add48553 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 7024e0dcfab75..e1b725929a8fc 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index cc8d8796da601..8f1e9d2f3ccb1 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index c4977726a3cac..52273e7fa76e1 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 9bbcb7f322798..dadc9324f95a2 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml From 03012f432ac24049291c71415a32677f612a7afd Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Fri, 3 Jun 2022 09:20:38 +0000 Subject: [PATCH 321/535] Preparing development version 3.3.1-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 9479bb3bf87df..0e449e841cf6d 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.0 +Version: 3.3.1 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 2e9c4d9960b14..d12f2ad73fabd 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 2a9acfa335e71..842d63f5d3811 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 7b17e625d7599..f7d187bf9527d 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index c5c920e774782..53f38df885102 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 697b5a3928e58..845f6659407bd 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index ad2db11370ae7..8e1590891933b 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1a7bdee70f3bc..1987c13328559 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 66dc93de0599e..c7e7be1e3bbf1 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 219ceca6648d8..ac644130a61e2 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 4966db6b4a8af..9a6fe2d313fde 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.3.0 -SPARK_VERSION_SHORT: 3.3.0 +SPARK_VERSION: 3.3.1-SNAPSHOT +SPARK_VERSION_SHORT: 3.3.1 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.15" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.3.0"] + 'facetFilters': ["version:3.3.1"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index 42e58f2726df1..e97f3b40cb2bd 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index 5aaa91cfdf20d..578854e3eaa9a 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 36309bb417362..95e1ce74ca172 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 072cedaa594c8..0b803c5d3864a 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index b9063b543f512..95726829bcbbd 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 6f6a51a972c73..25e7e25ae25b6 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 95fd080383995..3ba16b7b838a2 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 33cf30ff803e7..6cee275e6adc7 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index 79b2e8f2a5a47..ad23da0d7f249 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 647d0c3f87552..6de1f9eee532c 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 562ddc8dcc23c..95c8c312eb0e2 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 08bcae6e0f53f..4d87bd2730e3b 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index beceaecd31a1c..889f0b5a92e08 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 584a5df0a4a35..9b7b0370d3b4d 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 42bab72668c00..04a68a47a4f45 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 5f7b1b0b9dc46..32bd40ee83461 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT pom Spark Project Parent POM https://spark.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index 980b64c4dca8c..2e5f8bf5395a3 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index f3ec959370807..d1d6a449bd5dc 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 66ae5adfbd19f..40e578f9a7eba 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 1472bd0fcb1a2..f4ac384409174 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 77811f35692d8..61d5adec0e7cc 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index ceba171e41134..970d42ba4590e 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 34137add48553..0cfb5f616cd24 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index e1b725929a8fc..7024e0dcfab75 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 8f1e9d2f3ccb1..cc8d8796da601 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 52273e7fa76e1..c4977726a3cac 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index dadc9324f95a2..9bbcb7f322798 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml From b2046c282a5be8ade421db61b583a6738f0e9ed6 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 3 Jun 2022 12:52:16 -0700 Subject: [PATCH 322/535] [SPARK-39259][SQL][TEST][FOLLOWUP] Fix Scala 2.13 `ClassCastException` in `ComputeCurrentTimeSuite` ### What changes were proposed in this pull request? Unfortunately, #36654 causes seven Scala 2.13 test failures in master/3.3 and Apache Spark 3.3 RC4. This PR aims to fix Scala 2.13 ClassCastException in the test code. ### Why are the changes needed? ``` $ dev/change-scala-version.sh 2.13 $ build/sbt "catalyst/testOnly *.ComputeCurrentTimeSuite" -Pscala-2.13 ... [info] ComputeCurrentTimeSuite: [info] - analyzer should replace current_timestamp with literals *** FAILED *** (1 second, 189 milliseconds) [info] java.lang.ClassCastException: scala.collection.mutable.ArrayBuffer cannot be cast to scala.collection.immutable.Seq [info] at org.apache.spark.sql.catalyst.optimizer.ComputeCurrentTimeSuite.literals(ComputeCurrentTimeSuite.scala:146) [info] at org.apache.spark.sql.catalyst.optimizer.ComputeCurrentTimeSuite.$anonfun$new$1(ComputeCurrentTimeSuite.scala:47) ... [info] *** 7 TESTS FAILED *** [error] Failed tests: [error] org.apache.spark.sql.catalyst.optimizer.ComputeCurrentTimeSuite [error] (catalyst / Test / testOnly) sbt.TestsFailedException: Tests unsuccessful [error] Total time: 189 s (03:09), completed Jun 3, 2022 10:29:39 AM ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs and manually tests with Scala 2.13. ``` $ dev/change-scala-version.sh 2.13 $ build/sbt "catalyst/testOnly *.ComputeCurrentTimeSuite" -Pscala-2.13 ... [info] ComputeCurrentTimeSuite: [info] - analyzer should replace current_timestamp with literals (545 milliseconds) [info] - analyzer should replace current_date with literals (11 milliseconds) [info] - SPARK-33469: Add current_timezone function (3 milliseconds) [info] - analyzer should replace localtimestamp with literals (4 milliseconds) [info] - analyzer should use equal timestamps across subqueries (182 milliseconds) [info] - analyzer should use consistent timestamps for different timezones (13 milliseconds) [info] - analyzer should use consistent timestamps for different timestamp functions (2 milliseconds) [info] Run completed in 1 second, 579 milliseconds. [info] Total number of tests run: 7 [info] Suites: completed 1, aborted 0 [info] Tests: succeeded 7, failed 0, canceled 0, ignored 0, pending 0 [info] All tests passed. [success] Total time: 12 s, completed Jun 3, 2022, 10:54:03 AM ``` Closes #36762 from dongjoon-hyun/SPARK-39259. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit d79aa36b12d9d6816679ba6348705fdd3bd0061e) Signed-off-by: Dongjoon Hyun --- .../sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala index c034906c09bb6..86461522f7469 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ComputeCurrentTimeSuite.scala @@ -135,7 +135,7 @@ class ComputeCurrentTimeSuite extends PlanTest { assert(offsetsFromQuarterHour.size == 1) } - private def literals[T](plan: LogicalPlan): Seq[T] = { + private def literals[T](plan: LogicalPlan): scala.collection.mutable.ArrayBuffer[T] = { val literals = new scala.collection.mutable.ArrayBuffer[T] plan.transformWithSubqueries { case subQuery => subQuery.transformAllExpressions { case expression: Literal => @@ -143,6 +143,6 @@ class ComputeCurrentTimeSuite extends PlanTest { expression } } - literals.asInstanceOf[Seq[T]] + literals } } From b7e95bad882482168b7dd301fcfa3daf80477a7a Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Sat, 4 Jun 2022 09:12:42 +0300 Subject: [PATCH 323/535] [SPARK-39259][SQL][FOLLOWUP] Fix source and binary incompatibilities in transformDownWithSubqueries ### What changes were proposed in this pull request? This is a followup to #36654. That PR modified the existing `QueryPlan.transformDownWithSubqueries` to add additional arguments for tree pattern pruning. In this PR, I roll back the change to that method's signature and instead add a new `transformDownWithSubqueriesAndPruning` method. ### Why are the changes needed? The original change breaks binary and source compatibility in Catalyst. Technically speaking, Catalyst APIs are considered internal to Spark and are subject to change between minor releases (see [source](https://github.com/apache/spark/blob/bb51add5c79558df863d37965603387d40cc4387/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala#L20-L24)), but I think it's nice to try to avoid API breakage when possible. While trying to compile some custom Catalyst code, I ran into issues when trying to call the `transformDownWithSubqueries` method without supplying a tree pattern filter condition. If I do `transformDownWithSubqueries() { f} ` then I get a compilation error. I think this is due to the first parameter group containing all default parameters. My PR's solution of adding a new `transformDownWithSubqueriesAndPruning` method solves this problem. It's also more consistent with the naming convention used for other pruning-enabled tree transformation methods. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #36765 from JoshRosen/SPARK-39259-binary-compatibility-followup. Authored-by: Josh Rosen Signed-off-by: Max Gekk (cherry picked from commit eda6c4b9987f0515cb0aae4686c8a0ae0a3987d4) Signed-off-by: Max Gekk --- .../catalyst/optimizer/finishAnalysis.scala | 2 +- .../spark/sql/catalyst/plans/QueryPlan.scala | 22 ++++++++++++++----- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala index 242c799dd226e..a33069051d9d6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala @@ -84,7 +84,7 @@ object ComputeCurrentTime extends Rule[LogicalPlan] { treePatternbits.containsPattern(CURRENT_LIKE) } - plan.transformDownWithSubqueries(transformCondition) { + plan.transformDownWithSubqueriesAndPruning(transformCondition) { case subQuery => subQuery.transformAllExpressionsWithPruning(transformCondition) { case cd: CurrentDate => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index d0283f4d36720..cc62c81b101b7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -454,7 +454,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] * to rewrite the whole plan, include its subqueries, in one go. */ def transformWithSubqueries(f: PartialFunction[PlanType, PlanType]): PlanType = - transformDownWithSubqueries(AlwaysProcess.fn, UnknownRuleId)(f) + transformDownWithSubqueries(f) /** * Returns a copy of this node where the given partial function has been recursively applied @@ -479,10 +479,20 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] * first to this node, then this node's subqueries and finally this node's children. * When the partial function does not apply to a given node, it is left unchanged. */ - def transformDownWithSubqueries( - cond: TreePatternBits => Boolean = AlwaysProcess.fn, ruleId: RuleId = UnknownRuleId) - (f: PartialFunction[PlanType, PlanType]) -: PlanType = { + def transformDownWithSubqueries(f: PartialFunction[PlanType, PlanType]): PlanType = { + transformDownWithSubqueriesAndPruning(AlwaysProcess.fn, UnknownRuleId)(f) + } + + /** + * This method is the top-down (pre-order) counterpart of transformUpWithSubqueries. + * Returns a copy of this node where the given partial function has been recursively applied + * first to this node, then this node's subqueries and finally this node's children. + * When the partial function does not apply to a given node, it is left unchanged. + */ + def transformDownWithSubqueriesAndPruning( + cond: TreePatternBits => Boolean, + ruleId: RuleId = UnknownRuleId) + (f: PartialFunction[PlanType, PlanType]): PlanType = { val g: PartialFunction[PlanType, PlanType] = new PartialFunction[PlanType, PlanType] { override def isDefinedAt(x: PlanType): Boolean = true @@ -490,7 +500,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] val transformed = f.applyOrElse[PlanType, PlanType](plan, identity) transformed transformExpressionsDown { case planExpression: PlanExpression[PlanType] => - val newPlan = planExpression.plan.transformDownWithSubqueries(cond, ruleId)(f) + val newPlan = planExpression.plan.transformDownWithSubqueriesAndPruning(cond, ruleId)(f) planExpression.withNewPlan(newPlan) } } From 7cf29705272ab8e8c70e8885a3664ad8ae3cd5e9 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sat, 4 Jun 2022 06:43:05 +0000 Subject: [PATCH 324/535] Preparing Spark release v3.3.0-rc5 --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 0e449e841cf6d..9479bb3bf87df 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.1 +Version: 3.3.0 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index d12f2ad73fabd..2e9c4d9960b14 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 842d63f5d3811..2a9acfa335e71 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index f7d187bf9527d..7b17e625d7599 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 53f38df885102..c5c920e774782 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 845f6659407bd..697b5a3928e58 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 8e1590891933b..ad2db11370ae7 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1987c13328559..1a7bdee70f3bc 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index c7e7be1e3bbf1..66dc93de0599e 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index ac644130a61e2..219ceca6648d8 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 9a6fe2d313fde..4966db6b4a8af 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.3.1-SNAPSHOT -SPARK_VERSION_SHORT: 3.3.1 +SPARK_VERSION: 3.3.0 +SPARK_VERSION_SHORT: 3.3.0 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.15" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.3.1"] + 'facetFilters': ["version:3.3.0"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index e97f3b40cb2bd..42e58f2726df1 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index 578854e3eaa9a..5aaa91cfdf20d 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 95e1ce74ca172..36309bb417362 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 0b803c5d3864a..072cedaa594c8 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 95726829bcbbd..b9063b543f512 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 25e7e25ae25b6..6f6a51a972c73 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 3ba16b7b838a2..95fd080383995 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 6cee275e6adc7..33cf30ff803e7 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index ad23da0d7f249..79b2e8f2a5a47 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 6de1f9eee532c..647d0c3f87552 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 95c8c312eb0e2..562ddc8dcc23c 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 4d87bd2730e3b..08bcae6e0f53f 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index 889f0b5a92e08..beceaecd31a1c 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 9b7b0370d3b4d..584a5df0a4a35 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 04a68a47a4f45..42bab72668c00 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/pom.xml b/pom.xml index 32bd40ee83461..5f7b1b0b9dc46 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 pom Spark Project Parent POM https://spark.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index 2e5f8bf5395a3..980b64c4dca8c 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index d1d6a449bd5dc..f3ec959370807 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 40e578f9a7eba..66ae5adfbd19f 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index f4ac384409174..1472bd0fcb1a2 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 61d5adec0e7cc..77811f35692d8 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 970d42ba4590e..ceba171e41134 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 0cfb5f616cd24..34137add48553 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 7024e0dcfab75..e1b725929a8fc 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index cc8d8796da601..8f1e9d2f3ccb1 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index c4977726a3cac..52273e7fa76e1 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 9bbcb7f322798..dadc9324f95a2 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml From bf3c472ff87ab7ec17f55e4730d6c6c9a7f299ad Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Sat, 4 Jun 2022 06:43:12 +0000 Subject: [PATCH 325/535] Preparing development version 3.3.1-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 9479bb3bf87df..0e449e841cf6d 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.0 +Version: 3.3.1 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 2e9c4d9960b14..d12f2ad73fabd 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 2a9acfa335e71..842d63f5d3811 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 7b17e625d7599..f7d187bf9527d 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index c5c920e774782..53f38df885102 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 697b5a3928e58..845f6659407bd 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index ad2db11370ae7..8e1590891933b 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1a7bdee70f3bc..1987c13328559 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 66dc93de0599e..c7e7be1e3bbf1 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 219ceca6648d8..ac644130a61e2 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 4966db6b4a8af..9a6fe2d313fde 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.3.0 -SPARK_VERSION_SHORT: 3.3.0 +SPARK_VERSION: 3.3.1-SNAPSHOT +SPARK_VERSION_SHORT: 3.3.1 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.15" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.3.0"] + 'facetFilters': ["version:3.3.1"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index 42e58f2726df1..e97f3b40cb2bd 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index 5aaa91cfdf20d..578854e3eaa9a 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 36309bb417362..95e1ce74ca172 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 072cedaa594c8..0b803c5d3864a 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index b9063b543f512..95726829bcbbd 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 6f6a51a972c73..25e7e25ae25b6 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 95fd080383995..3ba16b7b838a2 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 33cf30ff803e7..6cee275e6adc7 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index 79b2e8f2a5a47..ad23da0d7f249 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 647d0c3f87552..6de1f9eee532c 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 562ddc8dcc23c..95c8c312eb0e2 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 08bcae6e0f53f..4d87bd2730e3b 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index beceaecd31a1c..889f0b5a92e08 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 584a5df0a4a35..9b7b0370d3b4d 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 42bab72668c00..04a68a47a4f45 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 5f7b1b0b9dc46..32bd40ee83461 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT pom Spark Project Parent POM https://spark.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index 980b64c4dca8c..2e5f8bf5395a3 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index f3ec959370807..d1d6a449bd5dc 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 66ae5adfbd19f..40e578f9a7eba 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 1472bd0fcb1a2..f4ac384409174 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 77811f35692d8..61d5adec0e7cc 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index ceba171e41134..970d42ba4590e 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 34137add48553..0cfb5f616cd24 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index e1b725929a8fc..7024e0dcfab75 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 8f1e9d2f3ccb1..cc8d8796da601 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 52273e7fa76e1..c4977726a3cac 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index dadc9324f95a2..9bbcb7f322798 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml From 3b549f4309497ecbe9f0b7a20d22a9a4417abb8b Mon Sep 17 00:00:00 2001 From: Karen Feng Date: Mon, 6 Jun 2022 20:58:23 +0800 Subject: [PATCH 326/535] [SPARK-39376][SQL] Hide duplicated columns in star expansion of subquery alias from NATURAL/USING JOIN ### What changes were proposed in this pull request? Follows up from https://github.com/apache/spark/pull/31666. This PR introduced a bug where the qualified star expansion of a subquery alias containing a NATURAL/USING output duplicated columns. ### Why are the changes needed? Duplicated, hidden columns should not be output from a star expansion. ### Does this PR introduce _any_ user-facing change? The query ``` val df1 = Seq((3, 8)).toDF("a", "b") val df2 = Seq((8, 7)).toDF("b", "d") val joinDF = df1.join(df2, "b") joinDF.alias("r").select("r.*") ``` Now outputs a single column `b`, instead of two (duplicate) columns for `b`. ### How was this patch tested? UTs Closes #36763 from karenfeng/SPARK-39376. Authored-by: Karen Feng Signed-off-by: Wenchen Fan (cherry picked from commit 18ca369f01905b421a658144e23b5a4e60702655) Signed-off-by: Wenchen Fan --- .../plans/logical/basicLogicalOperators.scala | 3 ++- .../apache/spark/sql/DataFrameJoinSuite.scala | 22 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 692601be75d10..774f6956162e3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -1328,7 +1328,8 @@ case class SubqueryAlias( override def metadataOutput: Seq[Attribute] = { val qualifierList = identifier.qualifier :+ alias - child.metadataOutput.map(_.withQualifier(qualifierList)) + val nonHiddenMetadataOutput = child.metadataOutput.filter(!_.supportsQualifiedStar) + nonHiddenMetadataOutput.map(_.withQualifier(qualifierList)) } override def maxRows: Option[Long] = child.maxRows diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala index a803fa88ed313..1fda13f996a47 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala @@ -499,4 +499,26 @@ class DataFrameJoinSuite extends QueryTest ) } } + + test("SPARK-39376: Hide duplicated columns in star expansion of subquery alias from USING JOIN") { + val joinDf = testData2.as("testData2").join( + testData3.as("testData3"), usingColumns = Seq("a"), joinType = "fullouter") + val equivalentQueries = Seq( + joinDf.select($"*"), + joinDf.as("r").select($"*"), + joinDf.as("r").select($"r.*") + ) + equivalentQueries.foreach { query => + checkAnswer(query, + Seq( + Row(1, 1, null), + Row(1, 2, null), + Row(2, 1, 2), + Row(2, 2, 2), + Row(3, 1, null), + Row(3, 2, null) + ) + ) + } + } } From be63826d423c3f5b15bb0ad39a58a564cf1d2b96 Mon Sep 17 00:00:00 2001 From: Luca Canali Date: Tue, 7 Jun 2022 16:07:27 +0800 Subject: [PATCH 327/535] [SPARK-39286][DOC] Update documentation for the decode function ### What changes were proposed in this pull request? The documentation for the decode function introduced in [SPARK-33527](https://issues.apache.org/jira/browse/SPARK-33527) refers erroneously to Oracle. It appears that the documentation string has been in large parts copied from https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/DECODE.html#GUID-39341D91-3442-4730-BD34-D3CF5D4701CE This proposes to update the documentation of the decode function to fix the issue. ### Why are the changes needed? Documentation fix. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? NA Closes #36662 from LucaCanali/fixDecodeDoc. Authored-by: Luca Canali Signed-off-by: Wenchen Fan (cherry picked from commit f4c34aa642320defb81c71f5755672603f866b49) Signed-off-by: Wenchen Fan --- .../spark/sql/catalyst/expressions/stringExpressions.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 9089ff4663767..c56a1dc47ae21 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -2504,9 +2504,10 @@ object Decode { usage = """ _FUNC_(bin, charset) - Decodes the first argument using the second argument character set. - _FUNC_(expr, search, result [, search, result ] ... [, default]) - Decode compares expr - to each search value one by one. If expr is equal to a search, returns the corresponding result. - If no match is found, then Oracle returns default. If default is omitted, returns null. + _FUNC_(expr, search, result [, search, result ] ... [, default]) - Compares expr + to each search value in order. If expr is equal to a search value, _FUNC_ returns + the corresponding result. If no match is found, then it returns default. If default + is omitted, it returns null. """, examples = """ Examples: From 86f1b6bfe397646697c299ab5af2157cb1dd2e1e Mon Sep 17 00:00:00 2001 From: itholic Date: Wed, 8 Jun 2022 14:34:25 +0900 Subject: [PATCH 328/535] [SPARK-39394][DOCS][SS][3.3] Improve PySpark Structured Streaming page more readable ### What changes were proposed in this pull request? Hotfix https://github.com/apache/spark/pull/36782 for branch-3.3. ### Why are the changes needed? The improvement of document readability will also improve the usability for PySpark Structured Streaming. ### Does this PR introduce _any_ user-facing change? Yes, now the documentation is categorized by its class or their own purpose more clearly as below: ![Screen Shot 2022-06-07 at 12 30 01 PM](https://user-images.githubusercontent.com/44108233/172289737-bd6ebf0e-601c-4a80-a16a-cf885302e7b6.png) ### How was this patch tested? The existing doc build in CI should cover. Closes #36797 from itholic/SPARK-39394-3.3. Authored-by: itholic Signed-off-by: Hyukjin Kwon --- python/docs/source/reference/index.rst | 2 +- .../reference/pyspark.ss/core_classes.rst | 31 ++++++++++++ .../source/reference/pyspark.ss/index.rst | 30 ++++++++++++ .../{pyspark.ss.rst => pyspark.ss/io.rst} | 47 ++----------------- .../reference/pyspark.ss/query_management.rst | 43 +++++++++++++++++ 5 files changed, 108 insertions(+), 45 deletions(-) create mode 100644 python/docs/source/reference/pyspark.ss/core_classes.rst create mode 100644 python/docs/source/reference/pyspark.ss/index.rst rename python/docs/source/reference/{pyspark.ss.rst => pyspark.ss/io.rst} (61%) create mode 100644 python/docs/source/reference/pyspark.ss/query_management.rst diff --git a/python/docs/source/reference/index.rst b/python/docs/source/reference/index.rst index 1d2db3f4a156e..127889afb0761 100644 --- a/python/docs/source/reference/index.rst +++ b/python/docs/source/reference/index.rst @@ -29,7 +29,7 @@ Pandas API on Spark follows the API specifications of pandas 1.3. pyspark.sql/index pyspark.pandas/index - pyspark.ss + pyspark.ss/index pyspark.ml pyspark.streaming pyspark.mllib diff --git a/python/docs/source/reference/pyspark.ss/core_classes.rst b/python/docs/source/reference/pyspark.ss/core_classes.rst new file mode 100644 index 0000000000000..4160008881c8d --- /dev/null +++ b/python/docs/source/reference/pyspark.ss/core_classes.rst @@ -0,0 +1,31 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +============ +Core Classes +============ + +.. currentmodule:: pyspark.sql.streaming + +.. autosummary:: + :toctree: api/ + + DataStreamReader + DataStreamWriter + StreamingQuery + StreamingQueryManager diff --git a/python/docs/source/reference/pyspark.ss/index.rst b/python/docs/source/reference/pyspark.ss/index.rst new file mode 100644 index 0000000000000..2cb0b1216eff9 --- /dev/null +++ b/python/docs/source/reference/pyspark.ss/index.rst @@ -0,0 +1,30 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +==================== +Structured Streaming +==================== + +This page gives an overview of all public Structed Streaming API. + +.. toctree:: + :maxdepth: 2 + + core_classes + io + query_management diff --git a/python/docs/source/reference/pyspark.ss.rst b/python/docs/source/reference/pyspark.ss/io.rst similarity index 61% rename from python/docs/source/reference/pyspark.ss.rst rename to python/docs/source/reference/pyspark.ss/io.rst index cace2d5a6bbb3..da476fb6fac51 100644 --- a/python/docs/source/reference/pyspark.ss.rst +++ b/python/docs/source/reference/pyspark.ss/io.rst @@ -16,25 +16,9 @@ under the License. -==================== -Structured Streaming -==================== - -Core Classes ------------- - -.. currentmodule:: pyspark.sql.streaming - -.. autosummary:: - :toctree: api/ - - DataStreamReader - DataStreamWriter - StreamingQuery - StreamingQueryManager - -Input and Output ----------------- +============ +Input/Output +============ .. currentmodule:: pyspark.sql.streaming @@ -61,28 +45,3 @@ Input and Output DataStreamWriter.queryName DataStreamWriter.start DataStreamWriter.trigger - -Query Management ----------------- - -.. currentmodule:: pyspark.sql.streaming - -.. autosummary:: - :toctree: api/ - - StreamingQuery.awaitTermination - StreamingQuery.exception - StreamingQuery.explain - StreamingQuery.id - StreamingQuery.isActive - StreamingQuery.lastProgress - StreamingQuery.name - StreamingQuery.processAllAvailable - StreamingQuery.recentProgress - StreamingQuery.runId - StreamingQuery.status - StreamingQuery.stop - StreamingQueryManager.active - StreamingQueryManager.awaitAnyTermination - StreamingQueryManager.get - StreamingQueryManager.resetTerminated diff --git a/python/docs/source/reference/pyspark.ss/query_management.rst b/python/docs/source/reference/pyspark.ss/query_management.rst new file mode 100644 index 0000000000000..b580015baa3fb --- /dev/null +++ b/python/docs/source/reference/pyspark.ss/query_management.rst @@ -0,0 +1,43 @@ +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +================ +Query Management +================ + +.. currentmodule:: pyspark.sql.streaming + +.. autosummary:: + :toctree: api/ + + StreamingQuery.awaitTermination + StreamingQuery.exception + StreamingQuery.explain + StreamingQuery.id + StreamingQuery.isActive + StreamingQuery.lastProgress + StreamingQuery.name + StreamingQuery.processAllAvailable + StreamingQuery.recentProgress + StreamingQuery.runId + StreamingQuery.status + StreamingQuery.stop + StreamingQueryManager.active + StreamingQueryManager.awaitAnyTermination + StreamingQueryManager.get + StreamingQueryManager.resetTerminated From 3a952933c348ea8a1b52e7ce5e7a4349d9318ec1 Mon Sep 17 00:00:00 2001 From: Vitalii Li Date: Wed, 8 Jun 2022 09:37:39 +0300 Subject: [PATCH 329/535] [SPARK-39392][SQL][3.3] Refine ANSI error messages for try_* function hints ### What changes were proposed in this pull request? Refine ANSI error messages and remove 'To return NULL instead'. This PR is a backport of https://github.com/apache/spark/pull/36780 from `master` ### Why are the changes needed? Improve error messaging for ANSI mode since the user may not even aware that query was returning NULLs. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit tests Closes #36792 from vli-databricks/SPARK-39392-3.3. Authored-by: Vitalii Li Signed-off-by: Max Gekk --- .../main/resources/error/error-classes.json | 10 +-- .../apache/spark/SparkThrowableSuite.scala | 3 +- .../sql/errors/QueryExecutionErrors.scala | 7 +- .../sql-tests/results/ansi/array.sql.out | 8 +-- .../sql-tests/results/ansi/cast.sql.out | 68 +++++++++---------- .../sql-tests/results/ansi/date.sql.out | 6 +- .../ansi/datetime-parsing-invalid.sql.out | 4 +- .../sql-tests/results/ansi/interval.sql.out | 32 ++++----- .../sql-tests/results/ansi/map.sql.out | 8 +-- .../results/ansi/string-functions.sql.out | 8 +-- .../sql-tests/results/interval.sql.out | 12 ++-- .../results/postgreSQL/boolean.sql.out | 32 ++++----- .../results/postgreSQL/float4.sql.out | 14 ++-- .../results/postgreSQL/float8.sql.out | 10 +-- .../sql-tests/results/postgreSQL/int8.sql.out | 14 ++-- .../results/postgreSQL/select_having.sql.out | 2 +- .../sql-tests/results/postgreSQL/text.sql.out | 4 +- .../results/postgreSQL/window_part2.sql.out | 2 +- .../results/postgreSQL/window_part3.sql.out | 2 +- .../results/postgreSQL/window_part4.sql.out | 2 +- .../timestampNTZ/timestamp-ansi.sql.out | 2 +- .../udf/postgreSQL/udf-select_having.sql.out | 2 +- 22 files changed, 128 insertions(+), 124 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 463bf798e49c3..34588fae5a45c 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -26,11 +26,11 @@ "message" : [ "Cannot use a mixture of aggregate function and group aggregate pandas UDF" ] }, "CAST_INVALID_INPUT" : { - "message" : [ "The value of the type cannot be cast to because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set to \"false\" to bypass this error." ], + "message" : [ "The value of the type cannot be cast to because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set to \"false\" to bypass this error." ], "sqlState" : "42000" }, "CAST_OVERFLOW" : { - "message" : [ "The value of the type cannot be cast to due to an overflow. To return NULL instead, use `try_cast`. If necessary set to \"false\" to bypass this error." ], + "message" : [ "The value of the type cannot be cast to due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set to \"false\" to bypass this error." ], "sqlState" : "22005" }, "CONCURRENT_QUERY" : { @@ -41,7 +41,7 @@ "sqlState" : "22008" }, "DIVIDE_BY_ZERO" : { - "message" : [ "Division by zero. To return NULL instead, use `try_divide`. If necessary set to \"false\" (except for ANSI interval type) to bypass this error." ], + "message" : [ "Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set to \"false\" (except for ANSI interval type) to bypass this error." ], "sqlState" : "22012" }, "DUPLICATE_KEY" : { @@ -93,7 +93,7 @@ "message" : [ "The index is out of bounds. The array has elements. If necessary set to \"false\" to bypass this error." ] }, "INVALID_ARRAY_INDEX_IN_ELEMENT_AT" : { - "message" : [ "The index is out of bounds. The array has elements. To return NULL instead, use `try_element_at`. If necessary set to \"false\" to bypass this error." ] + "message" : [ "The index is out of bounds. The array has elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set to \"false\" to bypass this error." ] }, "INVALID_FIELD_NAME" : { "message" : [ "Field name is invalid: is not a struct." ], @@ -115,7 +115,7 @@ "sqlState" : "42000" }, "MAP_KEY_DOES_NOT_EXIST" : { - "message" : [ "Key does not exist. To return NULL instead, use `try_element_at`. If necessary set to \"false\" to bypass this error." ] + "message" : [ "Key does not exist. Use `try_element_at` to tolerate non-existent key and return NULL instead. If necessary set to \"false\" to bypass this error." ] }, "MISSING_COLUMN" : { "message" : [ "Column '' does not exist. Did you mean one of the following? []" ], diff --git a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala index 73135d0f1c790..8b43f07675c14 100644 --- a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala @@ -125,7 +125,8 @@ class SparkThrowableSuite extends SparkFunSuite { // Does not fail with too many args (expects 0 args) assert(getMessage("DIVIDE_BY_ZERO", Array("foo", "bar", "baz")) == - "Division by zero. To return NULL instead, use `try_divide`. If necessary set foo " + + "Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. " + + "If necessary set foo " + "to \"false\" (except for ANSI interval type) to bypass this error.") } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index ce5e65a736d2b..21fe0b9267014 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -493,7 +493,9 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { message: String, hint: String = "", errorContext: String = ""): ArithmeticException = { - val alternative = if (hint.nonEmpty) s" To return NULL instead, use '$hint'." else "" + val alternative = if (hint.nonEmpty) { + s" Use '$hint' to tolerate overflow and return NULL instead." + } else "" new SparkArithmeticException( errorClass = "ARITHMETIC_OVERFLOW", messageParameters = Array(message, alternative, SQLConf.ANSI_ENABLED.key), @@ -1093,7 +1095,8 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { value: Any, from: DataType, to: DataType, errorContext: String): Throwable = { val valueString = toSQLValue(value, from) new DateTimeException(s"Invalid input syntax for type ${toSQLType(to)}: $valueString. " + - s"To return NULL instead, use 'try_cast'. If necessary set ${SQLConf.ANSI_ENABLED.key} " + + s"Use `try_cast` to tolerate malformed input and return NULL instead. " + + s"If necessary set ${SQLConf.ANSI_ENABLED.key} " + s"to false to bypass this error." + errorContext) } diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out index 25d2704c2c826..cdc225b191366 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out @@ -168,7 +168,7 @@ select element_at(array(1, 2, 3), 5) struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -The index 5 is out of bounds. The array has 3 elements. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The index 5 is out of bounds. The array has 3 elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -177,7 +177,7 @@ select element_at(array(1, 2, 3), -5) struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -The index -5 is out of bounds. The array has 3 elements. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The index -5 is out of bounds. The array has 3 elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -337,7 +337,7 @@ select element_at(array(1, 2, 3), 5) struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -The index 5 is out of bounds. The array has 3 elements. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The index 5 is out of bounds. The array has 3 elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -346,7 +346,7 @@ select element_at(array(1, 2, 3), -5) struct<> -- !query output org.apache.spark.SparkArrayIndexOutOfBoundsException -The index -5 is out of bounds. The array has 3 elements. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The index -5 is out of bounds. The array has 3 elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out index aff07bd3946ff..1bba7e0bb4ee9 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out @@ -8,7 +8,7 @@ SELECT CAST('1.23' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1.23' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1.23' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('1.23' AS int) ^^^^^^^^^^^^^^^^^^^ @@ -20,7 +20,7 @@ SELECT CAST('1.23' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1.23' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1.23' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('1.23' AS long) ^^^^^^^^^^^^^^^^^^^^ @@ -32,7 +32,7 @@ SELECT CAST('-4.56' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '-4.56' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '-4.56' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('-4.56' AS int) ^^^^^^^^^^^^^^^^^^^^ @@ -44,7 +44,7 @@ SELECT CAST('-4.56' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '-4.56' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '-4.56' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('-4.56' AS long) ^^^^^^^^^^^^^^^^^^^^^ @@ -56,7 +56,7 @@ SELECT CAST('abc' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'abc' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'abc' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('abc' AS int) ^^^^^^^^^^^^^^^^^^ @@ -68,7 +68,7 @@ SELECT CAST('abc' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'abc' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'abc' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('abc' AS long) ^^^^^^^^^^^^^^^^^^^ @@ -80,7 +80,7 @@ SELECT CAST('abc' AS float) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'abc' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'abc' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('abc' AS float) ^^^^^^^^^^^^^^^^^^^^ @@ -92,7 +92,7 @@ SELECT CAST('abc' AS double) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'abc' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'abc' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('abc' AS double) ^^^^^^^^^^^^^^^^^^^^^ @@ -104,7 +104,7 @@ SELECT CAST('1234567890123' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1234567890123' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1234567890123' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('1234567890123' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -116,7 +116,7 @@ SELECT CAST('12345678901234567890123' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '12345678901234567890123' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '12345678901234567890123' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('12345678901234567890123' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -128,7 +128,7 @@ SELECT CAST('' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('' AS int) ^^^^^^^^^^^^^^^ @@ -140,7 +140,7 @@ SELECT CAST('' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('' AS long) ^^^^^^^^^^^^^^^^ @@ -152,7 +152,7 @@ SELECT CAST('' AS float) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('' AS float) ^^^^^^^^^^^^^^^^^ @@ -164,7 +164,7 @@ SELECT CAST('' AS double) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('' AS double) ^^^^^^^^^^^^^^^^^^ @@ -192,7 +192,7 @@ SELECT CAST('123.a' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '123.a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '123.a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('123.a' AS int) ^^^^^^^^^^^^^^^^^^^^ @@ -204,7 +204,7 @@ SELECT CAST('123.a' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '123.a' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '123.a' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('123.a' AS long) ^^^^^^^^^^^^^^^^^^^^^ @@ -216,7 +216,7 @@ SELECT CAST('123.a' AS float) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '123.a' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '123.a' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('123.a' AS float) ^^^^^^^^^^^^^^^^^^^^^^ @@ -228,7 +228,7 @@ SELECT CAST('123.a' AS double) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '123.a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '123.a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('123.a' AS double) ^^^^^^^^^^^^^^^^^^^^^^^ @@ -248,7 +248,7 @@ SELECT CAST('-2147483649' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '-2147483649' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '-2147483649' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('-2147483649' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -268,7 +268,7 @@ SELECT CAST('2147483648' AS int) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '2147483648' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '2147483648' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('2147483648' AS int) ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -288,7 +288,7 @@ SELECT CAST('-9223372036854775809' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '-9223372036854775809' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '-9223372036854775809' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('-9223372036854775809' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -308,7 +308,7 @@ SELECT CAST('9223372036854775808' AS long) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '9223372036854775808' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '9223372036854775808' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT CAST('9223372036854775808' AS long) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -567,7 +567,7 @@ select cast('1中文' as tinyint) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1中文' of the type "STRING" cannot be cast to "TINYINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1中文' of the type "STRING" cannot be cast to "TINYINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select cast('1中文' as tinyint) ^^^^^^^^^^^^^^^^^^^^^^ @@ -579,7 +579,7 @@ select cast('1中文' as smallint) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1中文' of the type "STRING" cannot be cast to "SMALLINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1中文' of the type "STRING" cannot be cast to "SMALLINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select cast('1中文' as smallint) ^^^^^^^^^^^^^^^^^^^^^^^ @@ -591,7 +591,7 @@ select cast('1中文' as INT) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1中文' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1中文' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select cast('1中文' as INT) ^^^^^^^^^^^^^^^^^^ @@ -603,7 +603,7 @@ select cast('中文1' as bigint) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '中文1' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '中文1' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select cast('中文1' as bigint) ^^^^^^^^^^^^^^^^^^^^^ @@ -615,7 +615,7 @@ select cast('1中文' as bigint) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1中文' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1中文' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select cast('1中文' as bigint) ^^^^^^^^^^^^^^^^^^^^^ @@ -646,7 +646,7 @@ struct<> -- !query output org.apache.spark.SparkRuntimeException The value ' - xyz ' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. + xyz ' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select cast('\t\n xyz \t\r' as boolean) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -678,7 +678,7 @@ select cast('xyz' as decimal(4, 2)) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'xyz' of the type "STRING" cannot be cast to "DECIMAL(4,2)" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'xyz' of the type "STRING" cannot be cast to "DECIMAL(4,2)" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select cast('xyz' as decimal(4, 2)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -698,7 +698,7 @@ select cast('a' as date) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'a' of the type "STRING" cannot be cast to "DATE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DATE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select cast('a' as date) ^^^^^^^^^^^^^^^^^ @@ -718,7 +718,7 @@ select cast('a' as timestamp) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select cast('a' as timestamp) ^^^^^^^^^^^^^^^^^^^^^^ @@ -738,7 +738,7 @@ select cast('a' as timestamp_ntz) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP_NTZ" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP_NTZ" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select cast('a' as timestamp_ntz) ^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -750,7 +750,7 @@ select cast(cast('inf' as double) as timestamp) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select cast(cast('inf' as double) as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -762,7 +762,7 @@ select cast(cast('inf' as float) as timestamp) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select cast(cast('inf' as float) as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out index 88855b16c6785..0b3f408164fba 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out @@ -232,7 +232,7 @@ select next_day("xx", "Mon") struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'xx' of the type "STRING" cannot be cast to "DATE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'xx' of the type "STRING" cannot be cast to "DATE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select next_day("xx", "Mon") ^^^^^^^^^^^^^^^^^^^^^ @@ -327,7 +327,7 @@ select date_add('2011-11-11', '1.2') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select date_add('2011-11-11', '1.2') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -438,7 +438,7 @@ select date_sub(date'2011-11-11', '1.2') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select date_sub(date'2011-11-11', '1.2') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out index 7d4b3c25c6efa..d69477dd327e8 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out @@ -242,7 +242,7 @@ select cast("Unparseable" as timestamp) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'Unparseable' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'Unparseable' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select cast("Unparseable" as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -254,7 +254,7 @@ select cast("Unparseable" as date) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value 'Unparseable' of the type "STRING" cannot be cast to "DATE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'Unparseable' of the type "STRING" cannot be cast to "DATE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select cast("Unparseable" as date) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index 1364309d01016..f4ec0afb0cc2c 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -122,7 +122,7 @@ select interval 2 second * 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select interval 2 second * 'a' ^^^^^^^^^^^^^^^^^^^^^^^ @@ -134,7 +134,7 @@ select interval 2 second / 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select interval 2 second / 'a' ^^^^^^^^^^^^^^^^^^^^^^^ @@ -146,7 +146,7 @@ select interval 2 year * 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select interval 2 year * 'a' ^^^^^^^^^^^^^^^^^^^^^ @@ -158,7 +158,7 @@ select interval 2 year / 'a' struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select interval 2 year / 'a' ^^^^^^^^^^^^^^^^^^^^^ @@ -186,7 +186,7 @@ select 'a' * interval 2 second struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select 'a' * interval 2 second ^^^^^^^^^^^^^^^^^^^^^^^ @@ -198,7 +198,7 @@ select 'a' * interval 2 year struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select 'a' * interval 2 year ^^^^^^^^^^^^^^^^^^^^^ @@ -228,7 +228,7 @@ select interval '2 seconds' / 0 struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 8) == select interval '2 seconds' / 0 ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -264,7 +264,7 @@ select interval '2' year / 0 struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 8) == select interval '2' year / 0 ^^^^^^^^^^^^^^^^^^^^^ @@ -1516,7 +1516,7 @@ select '4 11:11' - interval '4 22:12' day to minute struct<> -- !query output org.apache.spark.SparkDateTimeException -The value '4 11:11' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '4 11:11' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select '4 11:11' - interval '4 22:12' day to minute ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1528,7 +1528,7 @@ select '4 12:12:12' + interval '4 22:12' day to minute struct<> -- !query output org.apache.spark.SparkDateTimeException -The value '4 12:12:12' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '4 12:12:12' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select '4 12:12:12' + interval '4 22:12' day to minute ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1566,7 +1566,7 @@ select str - interval '4 22:12' day to minute from interval_view struct<> -- !query output org.apache.spark.SparkDateTimeException -The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select str - interval '4 22:12' day to minute from interval_view ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1578,7 +1578,7 @@ select str + interval '4 22:12' day to minute from interval_view struct<> -- !query output org.apache.spark.SparkDateTimeException -The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select str + interval '4 22:12' day to minute from interval_view ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2036,7 +2036,7 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1 struct<> -- !query output org.apache.spark.SparkArithmeticException -Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. +Overflow in integral divide. Use 'try_divide' to tolerate overflow and return NULL instead. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 8) == SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2048,7 +2048,7 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L struct<> -- !query output org.apache.spark.SparkArithmeticException -Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. +Overflow in integral divide. Use 'try_divide' to tolerate overflow and return NULL instead. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 8) == SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2094,7 +2094,7 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1 struct<> -- !query output org.apache.spark.SparkArithmeticException -Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. +Overflow in integral divide. Use 'try_divide' to tolerate overflow and return NULL instead. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 8) == SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2106,7 +2106,7 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L struct<> -- !query output org.apache.spark.SparkArithmeticException -Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. +Overflow in integral divide. Use 'try_divide' to tolerate overflow and return NULL instead. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 8) == SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out index 20a6c013a2d0f..9e37402dd470f 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out @@ -8,7 +8,7 @@ select element_at(map(1, 'a', 2, 'b'), 5) struct<> -- !query output org.apache.spark.SparkNoSuchElementException -Key 5 does not exist. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +Key 5 does not exist. Use `try_element_at` to tolerate non-existent key and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select element_at(map(1, 'a', 2, 'b'), 5) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -20,7 +20,7 @@ select map(1, 'a', 2, 'b')[5] struct<> -- !query output org.apache.spark.SparkNoSuchElementException -Key 5 does not exist. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +Key 5 does not exist. Use `try_element_at` to tolerate non-existent key and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select map(1, 'a', 2, 'b')[5] ^^^^^^^^^^^^^^^^^^^^^^ @@ -114,7 +114,7 @@ select element_at(map(1, 'a', 2, 'b'), 5) struct<> -- !query output org.apache.spark.SparkNoSuchElementException -Key 5 does not exist. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +Key 5 does not exist. Use `try_element_at` to tolerate non-existent key and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select element_at(map(1, 'a', 2, 'b'), 5) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -126,7 +126,7 @@ select element_at(map('a', 1, 'b', 2), 'c') struct<> -- !query output org.apache.spark.SparkNoSuchElementException -Key 'c' does not exist. To return NULL instead, use `try_element_at`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +Key 'c' does not exist. Use `try_element_at` to tolerate non-existent key and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select element_at(map('a', 1, 'b', 2), 'c') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index 1729daab8fbcd..35ec3a9756602 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -82,7 +82,7 @@ select left("abcd", -2), left("abcd", 0), left("abcd", 'a') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 43) == ...t("abcd", -2), left("abcd", 0), left("abcd", 'a') ^^^^^^^^^^^^^^^^^ @@ -110,7 +110,7 @@ select right("abcd", -2), right("abcd", 0), right("abcd", 'a') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 45) == ...("abcd", -2), right("abcd", 0), right("abcd", 'a') ^^^^^^^^^^^^^^^^^^ @@ -419,7 +419,7 @@ SELECT lpad('hi', 'invalid_length') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT lpad('hi', 'invalid_length') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -431,7 +431,7 @@ SELECT rpad('hi', 'invalid_length') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT rpad('hi', 'invalid_length') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 08efee4d96a44..71fb0c0845d39 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -204,7 +204,7 @@ select interval '2 seconds' / 0 struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 8) == select interval '2 seconds' / 0 ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -240,7 +240,7 @@ select interval '2' year / 0 struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 8) == select interval '2' year / 0 ^^^^^^^^^^^^^^^^^^^^^ @@ -1992,7 +1992,7 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1 struct<> -- !query output org.apache.spark.SparkArithmeticException -Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. +Overflow in integral divide. Use 'try_divide' to tolerate overflow and return NULL instead. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 8) == SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2004,7 +2004,7 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L struct<> -- !query output org.apache.spark.SparkArithmeticException -Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. +Overflow in integral divide. Use 'try_divide' to tolerate overflow and return NULL instead. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 8) == SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2050,7 +2050,7 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1 struct<> -- !query output org.apache.spark.SparkArithmeticException -Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. +Overflow in integral divide. Use 'try_divide' to tolerate overflow and return NULL instead. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 8) == SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -2062,7 +2062,7 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L struct<> -- !query output org.apache.spark.SparkArithmeticException -Overflow in integral divide. To return NULL instead, use 'try_divide'. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. +Overflow in integral divide. Use 'try_divide' to tolerate overflow and return NULL instead. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 8) == SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out index 26c2dea4a994b..f68d92baee209 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out @@ -56,7 +56,7 @@ SELECT boolean('test') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'test' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'test' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT boolean('test') AS error ^^^^^^^^^^^^^^^ @@ -76,7 +76,7 @@ SELECT boolean('foo') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'foo' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'foo' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT boolean('foo') AS error ^^^^^^^^^^^^^^ @@ -104,7 +104,7 @@ SELECT boolean('yeah') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'yeah' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'yeah' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT boolean('yeah') AS error ^^^^^^^^^^^^^^^ @@ -132,7 +132,7 @@ SELECT boolean('nay') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'nay' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'nay' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT boolean('nay') AS error ^^^^^^^^^^^^^^ @@ -144,7 +144,7 @@ SELECT boolean('on') AS true struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'on' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'on' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT boolean('on') AS true ^^^^^^^^^^^^^ @@ -156,7 +156,7 @@ SELECT boolean('off') AS `false` struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'off' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'off' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT boolean('off') AS `false` ^^^^^^^^^^^^^^ @@ -168,7 +168,7 @@ SELECT boolean('of') AS `false` struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'of' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'of' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT boolean('of') AS `false` ^^^^^^^^^^^^^ @@ -180,7 +180,7 @@ SELECT boolean('o') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'o' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'o' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT boolean('o') AS error ^^^^^^^^^^^^ @@ -192,7 +192,7 @@ SELECT boolean('on_') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'on_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'on_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT boolean('on_') AS error ^^^^^^^^^^^^^^ @@ -204,7 +204,7 @@ SELECT boolean('off_') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value 'off_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'off_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT boolean('off_') AS error ^^^^^^^^^^^^^^^ @@ -224,7 +224,7 @@ SELECT boolean('11') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value '11' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '11' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT boolean('11') AS error ^^^^^^^^^^^^^ @@ -244,7 +244,7 @@ SELECT boolean('000') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value '000' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '000' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT boolean('000') AS error ^^^^^^^^^^^^^^ @@ -256,7 +256,7 @@ SELECT boolean('') AS error struct<> -- !query output org.apache.spark.SparkRuntimeException -The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT boolean('') AS error ^^^^^^^^^^^ @@ -365,7 +365,7 @@ SELECT boolean(string(' tru e ')) AS invalid struct<> -- !query output org.apache.spark.SparkRuntimeException -The value ' tru e ' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value ' tru e ' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT boolean(string(' tru e ')) AS invalid ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -377,7 +377,7 @@ SELECT boolean(string('')) AS invalid struct<> -- !query output org.apache.spark.SparkRuntimeException -The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT boolean(string('')) AS invalid ^^^^^^^^^^^^^^^^^^^ @@ -524,7 +524,7 @@ INSERT INTO BOOLTBL2 struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('XXX' AS BOOLEAN): The value 'XXX' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +failed to evaluate expression CAST('XXX' AS BOOLEAN): The value 'XXX' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 2, position 12) == VALUES (boolean('XXX')) ^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out index 59fd447f6fea3..d411c7bc4699f 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out @@ -96,7 +96,7 @@ SELECT float('N A N') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'N A N' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'N A N' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT float('N A N') ^^^^^^^^^^^^^^ @@ -108,7 +108,7 @@ SELECT float('NaN x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'NaN x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'NaN x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT float('NaN x') ^^^^^^^^^^^^^^ @@ -120,7 +120,7 @@ SELECT float(' INFINITY x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value ' INFINITY x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value ' INFINITY x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT float(' INFINITY x') ^^^^^^^^^^^^^^^^^^^^^^^ @@ -156,7 +156,7 @@ SELECT float(decimal('nan')) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 14) == SELECT float(decimal('nan')) ^^^^^^^^^^^^^^ @@ -340,7 +340,7 @@ SELECT int(float('2147483647')) struct<> -- !query output org.apache.spark.SparkArithmeticException -The value 2.14748365E9 of the type "FLOAT" cannot be cast to "INT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 2.14748365E9 of the type "FLOAT" cannot be cast to "INT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -357,7 +357,7 @@ SELECT int(float('-2147483900')) struct<> -- !query output org.apache.spark.SparkArithmeticException -The value -2.1474839E9 of the type "FLOAT" cannot be cast to "INT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value -2.1474839E9 of the type "FLOAT" cannot be cast to "INT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -390,7 +390,7 @@ SELECT bigint(float('-9223380000000000000')) struct<> -- !query output org.apache.spark.SparkArithmeticException -The value -9.22338E18 of the type "FLOAT" cannot be cast to "BIGINT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value -9.22338E18 of the type "FLOAT" cannot be cast to "BIGINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out index 6c5ae56debfac..b00a0d094636b 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out @@ -128,7 +128,7 @@ SELECT double('N A N') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'N A N' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'N A N' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT double('N A N') ^^^^^^^^^^^^^^^ @@ -140,7 +140,7 @@ SELECT double('NaN x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'NaN x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'NaN x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT double('NaN x') ^^^^^^^^^^^^^^^ @@ -152,7 +152,7 @@ SELECT double(' INFINITY x') struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value ' INFINITY x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value ' INFINITY x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == SELECT double(' INFINITY x') ^^^^^^^^^^^^^^^^^^^^^^^^ @@ -188,7 +188,7 @@ SELECT double(decimal('nan')) struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 15) == SELECT double(decimal('nan')) ^^^^^^^^^^^^^^ @@ -845,7 +845,7 @@ SELECT bigint(double('-9223372036854780000')) struct<> -- !query output org.apache.spark.SparkArithmeticException -The value -9.22337203685478E18D of the type "DOUBLE" cannot be cast to "BIGINT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value -9.22337203685478E18D of the type "DOUBLE" cannot be cast to "BIGINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out index 10f7606595ec7..1b52cd1580b57 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out @@ -575,7 +575,7 @@ select bigint('9223372036854775800') / bigint('0') struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 8) == select bigint('9223372036854775800') / bigint('0') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -587,7 +587,7 @@ select bigint('-9223372036854775808') / smallint('0') struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 8) == select bigint('-9223372036854775808') / smallint('0') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -599,7 +599,7 @@ select smallint('100') / bigint('0') struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 8) == select smallint('100') / bigint('0') ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -619,7 +619,7 @@ SELECT CAST(q1 AS int) FROM int8_tbl WHERE q2 <> 456 struct<> -- !query output org.apache.spark.SparkArithmeticException -The value 4567890123456789L of the type "BIGINT" cannot be cast to "INT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 4567890123456789L of the type "BIGINT" cannot be cast to "INT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -636,7 +636,7 @@ SELECT CAST(q1 AS smallint) FROM int8_tbl WHERE q2 <> 456 struct<> -- !query output org.apache.spark.SparkArithmeticException -The value 4567890123456789L of the type "BIGINT" cannot be cast to "SMALLINT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 4567890123456789L of the type "BIGINT" cannot be cast to "SMALLINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -673,7 +673,7 @@ SELECT CAST(double('922337203685477580700.0') AS bigint) struct<> -- !query output org.apache.spark.SparkArithmeticException -The value 9.223372036854776E20D of the type "DOUBLE" cannot be cast to "BIGINT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 9.223372036854776E20D of the type "DOUBLE" cannot be cast to "BIGINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query @@ -745,7 +745,7 @@ SELECT string(int(shiftleft(bigint(-1), 63))+1) struct<> -- !query output org.apache.spark.SparkArithmeticException -The value -9223372036854775808L of the type "BIGINT" cannot be cast to "INT" due to an overflow. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value -9223372036854775808L of the type "BIGINT" cannot be cast to "INT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out index e9fd01ef92965..3cd3087501bc5 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out @@ -177,7 +177,7 @@ SELECT 1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2 struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 40) == ...1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2 ^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out index 5828a51c14c2a..2b4f91c50b0dc 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out @@ -65,7 +65,7 @@ select string('four: ') || 2+2 struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select string('four: ') || 2+2 ^^^^^^^^^^^^^^^^^^^^^^^ @@ -77,7 +77,7 @@ select 'four: ' || 2+2 struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 1, position 8) == select 'four: ' || 2+2 ^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out index b1536a2a3e2f7..91540cfbe36db 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out @@ -462,7 +462,7 @@ window w as (order by f_numeric range between struct<> -- !query output org.apache.spark.SparkNumberFormatException -The value 'NaN' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value 'NaN' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 3, position 13) == window w as (order by f_numeric range between ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out index c510d31a06e82..85be166adc449 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out @@ -72,7 +72,7 @@ insert into datetimes values struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('11:00 BST' AS TIMESTAMP): The value '11:00 BST' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +failed to evaluate expression CAST('11:00 BST' AS TIMESTAMP): The value '11:00 BST' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 2, position 24) == (1, timestamp '11:00', cast ('11:00 BST' as timestamp), cast ('1 year' as timestamp), ... ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out index 4662cb6493f83..a685214ec792b 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out @@ -501,7 +501,7 @@ FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b) struct<> -- !query output org.apache.spark.sql.AnalysisException -failed to evaluate expression CAST('nan' AS INT): The value 'nan' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +failed to evaluate expression CAST('nan' AS INT): The value 'nan' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. == SQL(line 3, position 29) == FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b) ^^^^^^^^^^^^^^^^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out index 531f89003bdc1..9ba57ad8de314 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out @@ -332,7 +332,7 @@ select to_timestamp(1) struct<> -- !query output org.apache.spark.SparkDateTimeException -The value '1' of the type "STRING" cannot be cast to "TIMESTAMP_NTZ" because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. +The value '1' of the type "STRING" cannot be cast to "TIMESTAMP_NTZ" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out index cde9b1c9df162..bcd9bda90c316 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out @@ -177,7 +177,7 @@ SELECT 1 AS one FROM test_having WHERE 1/udf(a) = 1 HAVING 1 < 2 struct<> -- !query output org.apache.spark.SparkArithmeticException -Division by zero. To return NULL instead, use `try_divide`. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. +Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error. == SQL(line 1, position 40) == ...1 AS one FROM test_having WHERE 1/udf(a) = 1 HAVING 1 < 2 ^^^^^^^^ From 376c14ac8cfb6d51c29755b5ee951e5e41981a1a Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Wed, 8 Jun 2022 17:14:29 +0900 Subject: [PATCH 330/535] [SPARK-39411][BUILD] Fix release script to address type hint in pyspark/version.py This PR proposes to address type hints `__version__: str` correctly in each release. The type hint was added from Spark 3.3.0 at https://github.com/apache/spark/commit/f59e1d548e2e7c97195697910c40c5383a76ca48. For PySpark to have the correct version in releases. No, dev-only. Manually tested by setting environment variables and running the changed shall commands locally. Closes #36803 from HyukjinKwon/SPARK-39411. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 87b0a41cfb46ba9389c6f5abb9628415a72c4f93) Signed-off-by: Hyukjin Kwon --- dev/create-release/release-build.sh | 7 ++++++- dev/create-release/release-tag.sh | 13 ++++++++++--- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index 78fd06ba2be26..ddeb4d322ce3d 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -265,7 +265,12 @@ if [[ "$1" == "package" ]]; then # Write out the VERSION to PySpark version info we rewrite the - into a . and SNAPSHOT # to dev0 to be closer to PEP440. PYSPARK_VERSION=`echo "$SPARK_VERSION" | sed -e "s/-/./" -e "s/SNAPSHOT/dev0/" -e "s/preview/dev/"` - echo "__version__='$PYSPARK_VERSION'" > python/pyspark/version.py + + if [[ $SPARK_VERSION == 3.0* ]] || [[ $SPARK_VERSION == 3.1* ]] || [[ $SPARK_VERSION == 3.2* ]]; then + echo "__version__ = '$PYSPARK_VERSION'" > python/pyspark/version.py + else + echo "__version__: str = '$PYSPARK_VERSION'" > python/pyspark/version.py + fi # Get maven home set by MVN MVN_HOME=`$MVN -version 2>&1 | grep 'Maven home' | awk '{print $NF}'` diff --git a/dev/create-release/release-tag.sh b/dev/create-release/release-tag.sh index 55aa2e569fc87..255bda37ad8fc 100755 --- a/dev/create-release/release-tag.sh +++ b/dev/create-release/release-tag.sh @@ -85,7 +85,11 @@ fi sed -i".tmp1" 's/SPARK_VERSION:.*$/SPARK_VERSION: '"$RELEASE_VERSION"'/g' docs/_config.yml sed -i".tmp2" 's/SPARK_VERSION_SHORT:.*$/SPARK_VERSION_SHORT: '"$RELEASE_VERSION"'/g' docs/_config.yml sed -i".tmp3" "s/'facetFilters':.*$/'facetFilters': [\"version:$RELEASE_VERSION\"]/g" docs/_config.yml -sed -i".tmp4" 's/__version__ = .*$/__version__ = "'"$RELEASE_VERSION"'"/' python/pyspark/version.py +if [[ $RELEASE_VERSION == 3.0* ]] || [[ $RELEASE_VERSION == 3.1* ]] || [[ $RELEASE_VERSION == 3.2* ]]; then + sed -i".tmp4" 's/__version__ = .*$/__version__ = "'"$RELEASE_VERSION"'"/' python/pyspark/version.py +else + sed -i".tmp4" 's/__version__: str = .*$/__version__: str = "'"$RELEASE_VERSION"'"/' python/pyspark/version.py +fi git commit -a -m "Preparing Spark release $RELEASE_TAG" echo "Creating tag $RELEASE_TAG at the head of $GIT_BRANCH" @@ -98,8 +102,11 @@ R_NEXT_VERSION=`echo $NEXT_VERSION | sed 's/-SNAPSHOT//g'` sed -i".tmp5" 's/Version.*$/Version: '"$R_NEXT_VERSION"'/g' R/pkg/DESCRIPTION # Write out the R_NEXT_VERSION to PySpark version info we use dev0 instead of SNAPSHOT to be closer # to PEP440. -sed -i".tmp6" 's/__version__ = .*$/__version__ = "'"$R_NEXT_VERSION.dev0"'"/' python/pyspark/version.py - +if [[ $RELEASE_VERSION == 3.0* ]] || [[ $RELEASE_VERSION == 3.1* ]] || [[ $RELEASE_VERSION == 3.2* ]]; then + sed -i".tmp6" 's/__version__ = .*$/__version__ = "'"$R_NEXT_VERSION.dev0"'"/' python/pyspark/version.py +else + sed -i".tmp6" 's/__version__: str = .*$/__version__: str = "'"$R_NEXT_VERSION.dev0"'"/' python/pyspark/version.py +fi # Update docs with next version sed -i".tmp7" 's/SPARK_VERSION:.*$/SPARK_VERSION: '"$NEXT_VERSION"'/g' docs/_config.yml From 94f3e4113ef6fbf0940578bcb279f233e43c27f1 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 8 Jun 2022 21:20:55 +0300 Subject: [PATCH 331/535] [SPARK-39412][SQL] Exclude IllegalStateException from Spark's internal errors ### What changes were proposed in this pull request? In the PR, I propose to exclude `IllegalStateException` from the list of exceptions that are wrapped by `SparkException` with the `INTERNAL_ERROR` error class. ### Why are the changes needed? See explanation in SPARK-39412. ### Does this PR introduce _any_ user-facing change? No, the reverted changes haven't released yet. ### How was this patch tested? By running the modified test suites: ``` $ build/sbt "test:testOnly *ContinuousSuite" $ build/sbt "test:testOnly *MicroBatchExecutionSuite" $ build/sbt "test:testOnly *KafkaMicroBatchV1SourceSuite" $ build/sbt "test:testOnly *KafkaMicroBatchV2SourceSuite" $ build/sbt "test:testOnly *.WholeStageCodegenSuite" ``` Closes #36804 from MaxGekk/exclude-IllegalStateException. Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 19afe1341d277bc2d7dd47175d142a8c71141138) Signed-off-by: Max Gekk --- .../sql/kafka010/KafkaMicroBatchSourceSuite.scala | 11 ++++------- .../src/main/scala/org/apache/spark/sql/Dataset.scala | 2 +- .../apache/spark/sql/execution/QueryExecution.scala | 7 +++---- .../spark/sql/execution/WholeStageCodegenSuite.scala | 11 ++++------- .../streaming/MicroBatchExecutionSuite.scala | 6 ++---- .../sql/streaming/continuous/ContinuousSuite.scala | 7 +++---- 6 files changed, 17 insertions(+), 27 deletions(-) diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala index 41277a535f58c..db71f0fd9184a 100644 --- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala +++ b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala @@ -34,7 +34,6 @@ import org.apache.kafka.common.TopicPartition import org.scalatest.concurrent.PatienceConfiguration.Timeout import org.scalatest.time.SpanSugar._ -import org.apache.spark.{SparkException, SparkThrowable} import org.apache.spark.sql.{Dataset, ForeachWriter, Row, SparkSession} import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.connector.read.streaming.SparkDataStream @@ -667,10 +666,9 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { testUtils.sendMessages(topic2, Array("6")) }, StartStream(), - ExpectFailure[SparkException](e => { - assert(e.asInstanceOf[SparkThrowable].getErrorClass === "INTERNAL_ERROR") + ExpectFailure[IllegalStateException](e => { // The offset of `topic2` should be changed from 2 to 1 - assert(e.getCause.getMessage.contains("was changed from 2 to 1")) + assert(e.getMessage.contains("was changed from 2 to 1")) }) ) } @@ -766,13 +764,12 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { testStream(df)( StartStream(checkpointLocation = metadataPath.getAbsolutePath), - ExpectFailure[SparkException](e => { - assert(e.asInstanceOf[SparkThrowable].getErrorClass === "INTERNAL_ERROR") + ExpectFailure[IllegalStateException](e => { Seq( s"maximum supported log version is v1, but encountered v99999", "produced by a newer version of Spark and cannot be read by this version" ).foreach { message => - assert(e.getCause.toString.contains(message)) + assert(e.toString.contains(message)) } })) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index a4a40cc0e6924..6ef9bc2a7032d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -3848,7 +3848,7 @@ class Dataset[T] private[sql]( /** * Wrap a Dataset action to track the QueryExecution and time cost, then report to the - * user-registered callback functions, and also to convert asserts/illegal states to + * user-registered callback functions, and also to convert asserts/NPE to * the internal error exception. */ private def withAction[U](name: String, qe: QueryExecution)(action: SparkPlan => U) = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala index ab9b9861c036e..840bd4362665b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala @@ -489,11 +489,10 @@ object QueryExecution { } /** - * Converts asserts, null pointer, illegal state exceptions to internal errors. + * Converts asserts, null pointer exceptions to internal errors. */ private[sql] def toInternalError(msg: String, e: Throwable): Throwable = e match { - case e @ (_: java.lang.IllegalStateException | _: java.lang.NullPointerException | - _: java.lang.AssertionError) => + case e @ (_: java.lang.NullPointerException | _: java.lang.AssertionError) => new SparkException( errorClass = "INTERNAL_ERROR", messageParameters = Array(msg + @@ -504,7 +503,7 @@ object QueryExecution { } /** - * Catches asserts, null pointer, illegal state exceptions, and converts them to internal errors. + * Catches asserts, null pointer exceptions, and converts them to internal errors. */ private[sql] def withInternalError[T](msg: String)(block: => T): T = { try { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala index 27689bb4d45b4..2be915f000247 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.execution -import org.apache.spark.SparkException import org.apache.spark.sql.{Dataset, QueryTest, Row, SaveMode} import org.apache.spark.sql.catalyst.expressions.codegen.{ByteCodeStats, CodeAndComment, CodeGenerator} import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite @@ -763,11 +762,10 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession "SELECT AVG(v) FROM VALUES(1) t(v)", // Tet case with keys "SELECT k, AVG(v) FROM VALUES((1, 1)) t(k, v) GROUP BY k").foreach { query => - val e = intercept[SparkException] { + val e = intercept[IllegalStateException] { sql(query).collect } - assert(e.getErrorClass === "INTERNAL_ERROR") - assert(e.getCause.getMessage.contains(expectedErrMsg)) + assert(e.getMessage.contains(expectedErrMsg)) } } } @@ -786,11 +784,10 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession // Tet case with keys "SELECT k, AVG(a + b), SUM(a + b + c) FROM VALUES((1, 1, 1, 1)) t(k, a, b, c) " + "GROUP BY k").foreach { query => - val e = intercept[SparkException] { + val e = intercept[IllegalStateException] { sql(query).collect } - assert(e.getErrorClass === "INTERNAL_ERROR") - assert(e.getCause.getMessage.contains(expectedErrMsg)) + assert(e.getMessage.contains(expectedErrMsg)) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala index 9d731248ad404..f06e62b33b1a0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala @@ -22,7 +22,6 @@ import java.io.File import org.apache.commons.io.FileUtils import org.scalatest.BeforeAndAfter -import org.apache.spark.{SparkException, SparkThrowable} import org.apache.spark.sql.{DataFrame, Dataset, SparkSession} import org.apache.spark.sql.catalyst.plans.logical.Range import org.apache.spark.sql.connector.read.streaming @@ -94,9 +93,8 @@ class MicroBatchExecutionSuite extends StreamTest with BeforeAndAfter { testStream(streamEvent) ( AddData(inputData, 1, 2, 3, 4, 5, 6), StartStream(Trigger.Once, checkpointLocation = checkpointDir.getAbsolutePath), - ExpectFailure[SparkException] { e => - assert(e.asInstanceOf[SparkThrowable].getErrorClass === "INTERNAL_ERROR") - assert(e.getCause.getMessage.contains("batch 3 doesn't exist")) + ExpectFailure[IllegalStateException] { e => + assert(e.getMessage.contains("batch 3 doesn't exist")) } ) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala index a28d44caab065..5893c3da09812 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.streaming.continuous import java.sql.Timestamp -import org.apache.spark.{SparkContext, SparkException, SparkThrowable} +import org.apache.spark.{SparkContext, SparkException} import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskStart} import org.apache.spark.sql._ import org.apache.spark.sql.execution.streaming._ @@ -440,9 +440,8 @@ class ContinuousEpochBacklogSuite extends ContinuousSuiteBase { testStream(df)( StartStream(Trigger.Continuous(1)), - ExpectFailure[SparkException] { e => - assert(e.asInstanceOf[SparkThrowable].getErrorClass === "INTERNAL_ERROR") - e.getCause.getMessage.contains("queue has exceeded its maximum") + ExpectFailure[IllegalStateException] { e => + e.getMessage.contains("queue has exceeded its maximum") } ) } From 5847014fc3fe08b8a59c107a99c1540fbb2c2208 Mon Sep 17 00:00:00 2001 From: Amin Borjian Date: Wed, 8 Jun 2022 13:30:44 -0700 Subject: [PATCH 332/535] [SPARK-39393][SQL] Parquet data source only supports push-down predicate filters for non-repeated primitive types ### What changes were proposed in this pull request? In Spark version 3.1.0 and newer, Spark creates extra filter predicate conditions for repeated parquet columns. These fields do not have the ability to have a filter predicate, according to the [PARQUET-34](https://issues.apache.org/jira/browse/PARQUET-34) issue in the parquet library. This PR solves this problem until the appropriate functionality is provided by the parquet. Before this PR: Assume follow Protocol buffer schema: ``` message Model { string name = 1; repeated string keywords = 2; } ``` Suppose a parquet file is created from a set of records in the above format with the help of the parquet-protobuf library. Using Spark version 3.1.0 or newer, we get following exception when run the following query using spark-shell: ``` val data = spark.read.parquet("/path/to/parquet") data.registerTempTable("models") spark.sql("select * from models where array_contains(keywords, 'X')").show(false) ``` ``` Caused by: java.lang.IllegalArgumentException: FilterPredicates do not currently support repeated columns. Column keywords is repeated. at org.apache.parquet.filter2.predicate.SchemaCompatibilityValidator.validateColumn(SchemaCompatibilityValidator.java:176) at org.apache.parquet.filter2.predicate.SchemaCompatibilityValidator.validateColumnFilterPredicate(SchemaCompatibilityValidator.java:149) at org.apache.parquet.filter2.predicate.SchemaCompatibilityValidator.visit(SchemaCompatibilityValidator.java:89) at org.apache.parquet.filter2.predicate.SchemaCompatibilityValidator.visit(SchemaCompatibilityValidator.java:56) at org.apache.parquet.filter2.predicate.Operators$NotEq.accept(Operators.java:192) at org.apache.parquet.filter2.predicate.SchemaCompatibilityValidator.validate(SchemaCompatibilityValidator.java:61) at org.apache.parquet.filter2.compat.RowGroupFilter.visit(RowGroupFilter.java:95) at org.apache.parquet.filter2.compat.RowGroupFilter.visit(RowGroupFilter.java:45) at org.apache.parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:149) at org.apache.parquet.filter2.compat.RowGroupFilter.filterRowGroups(RowGroupFilter.java:72) at org.apache.parquet.hadoop.ParquetFileReader.filterRowGroups(ParquetFileReader.java:870) at org.apache.parquet.hadoop.ParquetFileReader.(ParquetFileReader.java:789) at org.apache.parquet.hadoop.ParquetFileReader.open(ParquetFileReader.java:657) at org.apache.parquet.hadoop.ParquetRecordReader.initializeInternalReader(ParquetRecordReader.java:162) at org.apache.parquet.hadoop.ParquetRecordReader.initialize(ParquetRecordReader.java:140) at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$2(ParquetFileFormat.scala:373) at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:127) ... ``` The cause of the problem is due to a change in the data filtering conditions: ``` spark.sql("select * from log where array_contains(keywords, 'X')").explain(true); // Spark 3.0.2 and older == Physical Plan == ... +- FileScan parquet [link#0,keywords#1] DataFilters: [array_contains(keywords#1, Google)] PushedFilters: [] ... // Spark 3.1.0 and newer == Physical Plan == ... +- FileScan parquet [link#0,keywords#1] DataFilters: [isnotnull(keywords#1), array_contains(keywords#1, Google)] PushedFilters: [IsNotNull(keywords)] ... ``` Pushing filters down for repeated columns of parquet is not necessary because it is not supported by parquet library for now. So we can exclude them from pushed predicate filters and solve issue. ### Why are the changes needed? Predicate filters that are pushed down to parquet should not be created on repeated-type fields. ### Does this PR introduce any user-facing change? No, It's only fixed a bug and before this, due to the limitations of the parquet library, no more work was possible. ### How was this patch tested? Add an extra test to ensure problem solved. Closes #36781 from Borjianamin98/master. Authored-by: Amin Borjian Signed-off-by: huaxingao (cherry picked from commit ac2881a8c3cfb196722a5680a62ebd6bb9fba728) Signed-off-by: huaxingao --- .../datasources/parquet/ParquetFilters.scala | 6 +++- .../parquet/ParquetFilterSuite.scala | 29 +++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala index 75060cfca24e6..9502ec0316ca3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala @@ -33,6 +33,7 @@ import org.apache.parquet.schema.{GroupType, LogicalTypeAnnotation, MessageType, import org.apache.parquet.schema.LogicalTypeAnnotation.{DecimalLogicalTypeAnnotation, TimeUnit} import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._ +import org.apache.parquet.schema.Type.Repetition import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils, IntervalUtils} import org.apache.spark.sql.catalyst.util.RebaseDateTime.{rebaseGregorianToJulianDays, rebaseGregorianToJulianMicros, RebaseSpec} @@ -64,7 +65,10 @@ class ParquetFilters( fields: Seq[Type], parentFieldNames: Array[String] = Array.empty): Seq[ParquetPrimitiveField] = { fields.flatMap { - case p: PrimitiveType => + // Parquet only supports predicate push-down for non-repeated primitive types. + // TODO(SPARK-39393): Remove extra condition when parquet added filter predicate support for + // repeated columns (https://issues.apache.org/jira/browse/PARQUET-34) + case p: PrimitiveType if p.getRepetition != Repetition.REPEATED => Some(ParquetPrimitiveField(fieldNames = parentFieldNames :+ p.getName, fieldType = ParquetSchemaType(p.getLogicalTypeAnnotation, p.getPrimitiveTypeName, p.getTypeLength))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 7a09011f27c7e..92798d4f4ca41 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.execution.datasources.parquet +import java.io.File import java.math.{BigDecimal => JBigDecimal} import java.nio.charset.StandardCharsets import java.sql.{Date, Timestamp} @@ -1297,6 +1298,34 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared } } + test("SPARK-39393: Do not push down predicate filters for repeated primitive fields") { + import ParquetCompatibilityTest._ + withTempDir { dir => + val protobufParquetFilePath = new File(dir, "protobuf-parquet").getCanonicalPath + + val protobufSchema = + """message protobuf_style { + | repeated int32 f; + |} + """.stripMargin + + writeDirect(protobufParquetFilePath, protobufSchema, { rc => + rc.message { + rc.field("f", 0) { + rc.addInteger(1) + rc.addInteger(2) + } + } + }) + + // If the "isnotnull(f)" filter gets pushed down, this query will throw an exception + // since column "f" is repeated primitive column in the Parquet file. + checkAnswer( + spark.read.parquet(dir.getCanonicalPath).filter("isnotnull(f)"), + Seq(Row(Seq(1, 2)))) + } + } + test("Filters should be pushed down for vectorized Parquet reader at row group level") { import testImplicits._ From 66826567fa12e57119acc97f9971e36fe834df21 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Thu, 9 Jun 2022 14:26:45 +0900 Subject: [PATCH 333/535] [SPARK-39421][PYTHON][DOCS] Pin the docutils version <0.18 in documentation build ### What changes were proposed in this pull request? This PR fixes the Sphinx build failure below (see https://github.com/singhpk234/spark/runs/6799026458?check_suite_focus=true): ``` Moving to python/docs directory and building sphinx. Running Sphinx v3.0.4 WARNING:root:'PYARROW_IGNORE_TIMEZONE' environment variable was not set. It is required to set this environment variable to '1' in both driver and executor sides if you use pyarrow>=2.0.0. pandas-on-Spark will set it for you but it does not work if there is a Spark context already launched. /__w/spark/spark/python/pyspark/pandas/supported_api_gen.py:101: UserWarning: Warning: Latest version of pandas(>=1.4.0) is required to generate the documentation; however, your version was 1.3.5 warnings.warn( Warning, treated as error: node class 'meta' is already registered, its visitors will be overridden make: *** [Makefile:35: html] Error 2 ------------------------------------------------ Jekyll 4.2.1 Please append `--trace` to the `build` command for any additional information or backtrace. ------------------------------------------------ ``` Sphinx build fails apparently with the latest docutils (see also https://issues.apache.org/jira/browse/FLINK-24662). we should pin the version. ### Why are the changes needed? To recover the CI. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? CI in this PR should test it out. Closes #36813 from HyukjinKwon/SPARK-39421. Lead-authored-by: Hyukjin Kwon Co-authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit c196ff4dfa1d9f1a8e20b884ee5b4a4e6e65a6e3) Signed-off-by: Hyukjin Kwon --- .github/workflows/build_and_test.yml | 1 + dev/requirements.txt | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 1f5df70cde936..e0e9f70556c5f 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -528,6 +528,7 @@ jobs: python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme ipython nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' python3.9 -m pip install ipython_genutils # See SPARK-38517 python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' + python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421 apt-get update -y apt-get install -y ruby ruby-dev Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'markdown', 'e1071', 'roxygen2'), repos='https://cloud.r-project.org/')" diff --git a/dev/requirements.txt b/dev/requirements.txt index 22e72d555434b..e7e0a4b427450 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -35,6 +35,7 @@ numpydoc jinja2<3.0.0 sphinx<3.1.0 sphinx-plotly-directive +docutils<0.18.0 # Development scripts jira From 4e5ada90cfb89caa25addd8991cec2af843e24a9 Mon Sep 17 00:00:00 2001 From: Prashant Singh Date: Wed, 8 Jun 2022 23:08:44 -0700 Subject: [PATCH 334/535] [SPARK-39417][SQL] Handle Null partition values in PartitioningUtils ### What changes were proposed in this pull request? We should not try casting everything returned by `removeLeadingZerosFromNumberTypePartition` to string, as it returns null value for the cases when partition has null value and is already replaced by `DEFAULT_PARTITION_NAME` ### Why are the changes needed? for null partitions where `removeLeadingZerosFromNumberTypePartition` is called it would throw a NPE and hence the query would fail. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added a UT, which would fail with an NPE otherwise. Closes #36810 from singhpk234/psinghvk/fix-npe. Authored-by: Prashant Singh Signed-off-by: huaxingao (cherry picked from commit dcfd9f01289f26c1a25e97432710a13772b3ad4c) Signed-off-by: huaxingao --- .../sql/execution/datasources/PartitioningUtils.scala | 2 +- .../parquet/ParquetPartitionDiscoverySuite.scala | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala index 166fc85289991..e856bb5b9c2f9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala @@ -359,7 +359,7 @@ object PartitioningUtils extends SQLConfHelper{ def removeLeadingZerosFromNumberTypePartition(value: String, dataType: DataType): String = dataType match { case ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType => - castPartValueToDesiredType(dataType, value, null).toString + Option(castPartValueToDesiredType(dataType, value, null)).map(_.toString).orNull case _ => value } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala index ee905fba74527..bd908a36401f2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala @@ -1259,6 +1259,14 @@ class ParquetV2PartitionDiscoverySuite extends ParquetPartitionDiscoverySuite { assert("p_int=10/p_float=1.0" === path) } + test("SPARK-39417: Null partition value") { + // null partition value is replaced by DEFAULT_PARTITION_NAME before hitting getPathFragment. + val spec = Map("p_int"-> ExternalCatalogUtils.DEFAULT_PARTITION_NAME) + val schema = new StructType().add("p_int", "int") + val path = PartitioningUtils.getPathFragment(spec, schema) + assert(s"p_int=${ExternalCatalogUtils.DEFAULT_PARTITION_NAME}" === path) + } + test("read partitioned table - partition key included in Parquet file") { withTempDir { base => for { From ea0571e001e6ce4ac415f20142c39eedc18250e1 Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Thu, 9 Jun 2022 14:26:18 +0800 Subject: [PATCH 335/535] [SPARK-38997][SPARK-39037][SQL][FOLLOWUP] PushableColumnWithoutNestedColumn` need be translated to predicate too ### What changes were proposed in this pull request? https://github.com/apache/spark/pull/35768 assume the expression in `And`, `Or` and `Not` must be predicate. https://github.com/apache/spark/pull/36370 and https://github.com/apache/spark/pull/36325 supported push down expressions in `GROUP BY` and `ORDER BY`. But the children of `And`, `Or` and `Not` can be `FieldReference.column(name)`. `FieldReference.column(name)` is not a predicate, so the assert may fail. ### Why are the changes needed? This PR fix the bug for `PushableColumnWithoutNestedColumn`. ### Does this PR introduce _any_ user-facing change? 'Yes'. Let the push-down framework more correctly. ### How was this patch tested? New tests Closes #36776 from beliefer/SPARK-38997_SPARK-39037_followup. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit 125555cf2c1388b28fcc34beae09f971c5fadcb7) Signed-off-by: Wenchen Fan --- .../catalyst/util/V2ExpressionBuilder.scala | 13 +++--- .../apache/spark/sql/jdbc/JDBCV2Suite.scala | 42 +++++++++++++++++++ 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala index 487b809d48a01..c77a040bc64e5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala @@ -49,14 +49,17 @@ class V2ExpressionBuilder( case Literal(true, BooleanType) => Some(new AlwaysTrue()) case Literal(false, BooleanType) => Some(new AlwaysFalse()) case Literal(value, dataType) => Some(LiteralValue(value, dataType)) - case col @ pushableColumn(name) if nestedPredicatePushdownEnabled => + case col @ pushableColumn(name) => + val ref = if (nestedPredicatePushdownEnabled) { + FieldReference(name) + } else { + FieldReference.column(name) + } if (isPredicate && col.dataType.isInstanceOf[BooleanType]) { - Some(new V2Predicate("=", Array(FieldReference(name), LiteralValue(true, BooleanType)))) + Some(new V2Predicate("=", Array(ref, LiteralValue(true, BooleanType)))) } else { - Some(FieldReference(name)) + Some(ref) } - case pushableColumn(name) if !nestedPredicatePushdownEnabled => - Some(FieldReference.column(name)) case in @ InSet(child, hset) => generateExpression(child).map { v => val children = diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index 91526cef50785..858aeaa13653b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -851,6 +851,48 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel |[DEPT, CASE WHEN (SALARY > 8000.00) AND (SALARY < 10000.00) THEN SALARY ELSE 0.00 END], |""".stripMargin.replaceAll("\n", " ")) checkAnswer(df5, Seq(Row(1, 0, 10000), Row(1, 9000, 9000), Row(2, 0, 22000), Row(6, 0, 12000))) + + val df6 = sql( + """ + |SELECT CASE WHEN SALARY > 8000 AND is_manager <> false THEN SALARY ELSE 0 END as key, + | SUM(SALARY) FROM h2.test.employee GROUP BY key""".stripMargin) + checkAggregateRemoved(df6) + checkPushedInfo(df6, + """ + |PushedAggregates: [SUM(SALARY)], + |PushedFilters: [], + |PushedGroupByExpressions: + |[CASE WHEN (SALARY > 8000.00) AND (IS_MANAGER = true) THEN SALARY ELSE 0.00 END], + |""".stripMargin.replaceAll("\n", " ")) + checkAnswer(df6, Seq(Row(0, 21000), Row(10000, 20000), Row(12000, 12000))) + + val df7 = sql( + """ + |SELECT CASE WHEN SALARY > 8000 OR is_manager <> false THEN SALARY ELSE 0 END as key, + | SUM(SALARY) FROM h2.test.employee GROUP BY key""".stripMargin) + checkAggregateRemoved(df7) + checkPushedInfo(df7, + """ + |PushedAggregates: [SUM(SALARY)], + |PushedFilters: [], + |PushedGroupByExpressions: + |[CASE WHEN (SALARY > 8000.00) OR (IS_MANAGER = true) THEN SALARY ELSE 0.00 END], + |""".stripMargin.replaceAll("\n", " ")) + checkAnswer(df7, Seq(Row(10000, 20000), Row(12000, 24000), Row(9000, 9000))) + + val df8 = sql( + """ + |SELECT CASE WHEN NOT(is_manager <> false) THEN SALARY ELSE 0 END as key, + | SUM(SALARY) FROM h2.test.employee GROUP BY key""".stripMargin) + checkAggregateRemoved(df8) + checkPushedInfo(df8, + """ + |PushedAggregates: [SUM(SALARY)], + |PushedFilters: [], + |PushedGroupByExpressions: + |[CASE WHEN NOT (IS_MANAGER = true) THEN SALARY ELSE 0.00 END], + |""".stripMargin.replaceAll("\n", " ")) + checkAnswer(df8, Seq(Row(0, 32000), Row(12000, 12000), Row(9000, 9000))) } test("scan with aggregate push-down: DISTINCT SUM with group by") { From d622f3133c1acd3f00aba0108838fdba826cef9b Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 9 Jun 2022 15:24:11 +0300 Subject: [PATCH 336/535] [SPARK-39427][SQL] Disable ANSI intervals in the percentile functions In the PR, I propose to don't support ANSI intervals by the percentile functions, and remove the YearMonthIntervalType and DayTimeIntervalType types from the list of input types. I propose to properly support ANSI intervals and enable them back after that. To don't confuse users by results of the percentile functions when inputs are ANSI intervals. At the moment, the functions return DOUBLE (or ARRAY OF DAUBLE) type independently from inputs. In the case of ANSI intervals, the functions should return ANSI interval too. No, since the functions haven't released yet. By running affected test suites: ``` $ build/sbt "sql/test:testOnly org.apache.spark.sql.expressions.ExpressionInfoSuite" $ build/sbt "sql/testOnly *ExpressionsSchemaSuite" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" $ build/sbt "test:testOnly *PercentileSuite" $ build/sbt "test:testOnly *PercentileQuerySuite" ``` and checked manually that ANSI intervals are not supported as input types: ```sql spark-sql> SELECT percentile(col, 0.5) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '10' MONTH) AS tab(col); Error in query: cannot resolve 'percentile(tab.col, CAST(0.5BD AS DOUBLE), 1L)' due to data type mismatch: argument 1 requires numeric type, however, 'tab.col' is of interval month type.; line 1 pos 7; ``` Closes #36817 from MaxGekk/percentile-disable-ansi-interval. Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit ee24847ad100139628a9bffe45f711bdebaa0170) Signed-off-by: Max Gekk --- .../expressions/aggregate/percentiles.scala | 11 +++-------- .../expressions/aggregate/PercentileSuite.scala | 8 ++++---- .../apache/spark/sql/PercentileQuerySuite.scala | 16 ++++++++-------- 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala index e861fb370ca1e..bd62c0aef7580 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala @@ -67,8 +67,7 @@ abstract class PercentileBase extends TypedImperativeAggregate[OpenHashMap[AnyRe case _: ArrayType => ArrayType(DoubleType, false) case _ => DoubleType } - Seq(TypeCollection(NumericType, YearMonthIntervalType, DayTimeIntervalType), - percentageExpType, IntegralType) + Seq(NumericType, percentageExpType, IntegralType) } // Check the inputTypes are valid, and the percentageExpression satisfies: @@ -288,7 +287,7 @@ abstract class PercentileBase extends TypedImperativeAggregate[OpenHashMap[AnyRe usage = """ _FUNC_(col, percentage [, frequency]) - Returns the exact percentile value of numeric - or ansi interval column `col` at the given percentage. The value of percentage must be + column `col` at the given percentage. The value of percentage must be between 0.0 and 1.0. The value of frequency should be positive integral _FUNC_(col, array(percentage1 [, percentage2]...) [, frequency]) - Returns the exact percentile value array of numeric column `col` at the given percentage(s). Each value @@ -301,10 +300,6 @@ abstract class PercentileBase extends TypedImperativeAggregate[OpenHashMap[AnyRe 3.0 > SELECT _FUNC_(col, array(0.25, 0.75)) FROM VALUES (0), (10) AS tab(col); [2.5,7.5] - > SELECT _FUNC_(col, 0.5) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '10' MONTH) AS tab(col); - 5.0 - > SELECT _FUNC_(col, array(0.2, 0.5)) FROM VALUES (INTERVAL '0' SECOND), (INTERVAL '10' SECOND) AS tab(col); - [2000000.0,5000000.0] """, group = "agg_funcs", since = "2.1.0") @@ -359,7 +354,7 @@ case class Percentile( /** * Return a percentile value based on a continuous distribution of - * numeric or ansi interval column at the given percentage (specified in ORDER BY clause). + * numeric column at the given percentage (specified in ORDER BY clause). * The value of percentage must be between 0.0 and 1.0. */ case class PercentileCont(left: Expression, right: Expression, reverse: Boolean = false) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala index 7b85be05e3b3d..6cc01f7bab9ad 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala @@ -170,8 +170,8 @@ class PercentileSuite extends SparkFunSuite { val child = AttributeReference("a", dataType)() val percentile = new Percentile(child, percentage) assertEqual(percentile.checkInputDataTypes(), - TypeCheckFailure(s"argument 1 requires (numeric or interval year to month or " + - s"interval day to second) type, however, 'a' is of ${dataType.simpleString} type.")) + TypeCheckFailure(s"argument 1 requires numeric type," + + s" however, 'a' is of ${dataType.simpleString} type.")) } val invalidFrequencyDataTypes = Seq(FloatType, DoubleType, BooleanType, @@ -184,8 +184,8 @@ class PercentileSuite extends SparkFunSuite { val frq = AttributeReference("frq", frequencyType)() val percentile = new Percentile(child, percentage, frq) assertEqual(percentile.checkInputDataTypes(), - TypeCheckFailure(s"argument 1 requires (numeric or interval year to month or " + - s"interval day to second) type, however, 'a' is of ${dataType.simpleString} type.")) + TypeCheckFailure(s"argument 1 requires numeric type," + + s" however, 'a' is of ${dataType.simpleString} type.")) } for(dataType <- validDataTypes; diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PercentileQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PercentileQuerySuite.scala index f39f0c1802483..823c1375de032 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/PercentileQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/PercentileQuerySuite.scala @@ -29,21 +29,21 @@ class PercentileQuerySuite extends QueryTest with SharedSparkSession { private val table = "percentile_test" - test("SPARK-37138: Support Ansi Interval type in Percentile") { + test("SPARK-37138, SPARK-39427: Disable Ansi Interval type in Percentile") { withTempView(table) { Seq((Period.ofMonths(100), Duration.ofSeconds(100L)), (Period.ofMonths(200), Duration.ofSeconds(200L)), (Period.ofMonths(300), Duration.ofSeconds(300L))) .toDF("col1", "col2").createOrReplaceTempView(table) - checkAnswer( + val e = intercept[AnalysisException] { spark.sql( s"""SELECT - | CAST(percentile(col1, 0.5) AS STRING), - | SUM(null), - | CAST(percentile(col2, 0.5) AS STRING) - |FROM $table - """.stripMargin), - Row("200.0", null, "2.0E8")) + | CAST(percentile(col1, 0.5) AS STRING), + | SUM(null), + | CAST(percentile(col2, 0.5) AS STRING) + |FROM $table""".stripMargin).collect() + } + assert(e.getMessage.contains("data type mismatch")) } } } From e26db0153ac25ae1b1759eb780da70dbd598b0d4 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 9 Jun 2022 16:43:31 +0300 Subject: [PATCH 337/535] [SPARK-39226][DOCS][FOLLOWUP] Update the migration guide after fixing the precision of the return type of round-like functions ### What changes were proposed in this pull request? Update the migration guide after fixing the precision of the return type of round-like functions. How to reproduce this issue: ```sql -- Spark 3.2 CREATE TABLE t1(CURNCY_AMT DECIMAL(18,6)) using parquet; CREATE VIEW v1 AS SELECT BROUND(CURNCY_AMT, 6) AS CURNCY_AMT FROM t1; ``` ```sql -- Spark 3.3 SELECT * FROM v1; org.apache.spark.sql.AnalysisException: [CANNOT_UP_CAST_DATATYPE] Cannot up cast CURNCY_AMT from "DECIMAL(19,6)" to "DECIMAL(18,6)". ``` ### Why are the changes needed? Update the migration guide. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? N/A Closes #36821 from wangyum/SPARK-39226. Authored-by: Yuming Wang Signed-off-by: Max Gekk (cherry picked from commit 105379406a371624569ac820e30d45fee3f017fc) Signed-off-by: Max Gekk --- docs/sql-migration-guide.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index a7757d6c9a027..7601355f6d51d 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -65,6 +65,8 @@ license: | - Since Spark 3.3, when reading values from a JSON attribute defined as `FloatType` or `DoubleType`, the strings `"+Infinity"`, `"+INF"`, and `"-INF"` are now parsed to the appropriate values, in addition to the already supported `"Infinity"` and `"-Infinity"` variations. This change was made to improve consistency with Jackson's parsing of the unquoted versions of these values. Also, the `allowNonNumericNumbers` option is now respected so these strings will now be considered invalid if this option is disabled. - Since Spark 3.3, Spark will try to use built-in data source writer instead of Hive serde in `INSERT OVERWRITE DIRECTORY`. This behavior is effective only if `spark.sql.hive.convertMetastoreParquet` or `spark.sql.hive.convertMetastoreOrc` is enabled respectively for Parquet and ORC formats. To restore the behavior before Spark 3.3, you can set `spark.sql.hive.convertMetastoreInsertDir` to `false`. + + - Since Spark 3.3, the precision of the return type of round-like functions has been fixed. This may cause Spark throw `AnalysisException` of the `CANNOT_UP_CAST_DATATYPE` error class when using views created by prior versions. In such cases, you need to recreate the views using ALTER VIEW AS or CREATE OR REPLACE VIEW AS with newer Spark versions. ## Upgrading from Spark SQL 3.1 to 3.2 From eea586d0c3df2e4b479c2c19f51f115927efeec1 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Thu, 9 Jun 2022 08:09:36 -0700 Subject: [PATCH 338/535] [SPARK-39412][SQL][FOLLOWUP][TESTS][3.3] Check `IllegalStateException` instead of Spark's internal errors ### What changes were proposed in this pull request? In the PR, I propose to correctly check `IllegalStateException` instead of `SparkException` w/ the `INTERNAL_ERROR` error class. The issues were introduced by https://github.com/apache/spark/pull/36804 merged to master and 3.3. ### Why are the changes needed? To fix test failures in GAs. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By running the modified test suites: ``` $ build/sbt "test:testOnly *BucketedReadWithoutHiveSupportSuite" $ build/sbt "test:testOnly *.AdaptiveQueryExecSuite" $ build/sbt "test:testOnly *.SubquerySuite" ``` Closes #36824 from MaxGekk/fix-IllegalStateException-3.3. Authored-by: Max Gekk Signed-off-by: Dongjoon Hyun --- .../test/scala/org/apache/spark/sql/SubquerySuite.scala | 6 ++---- .../sql/execution/adaptive/AdaptiveQueryExecSuite.scala | 8 +++----- .../org/apache/spark/sql/sources/BucketedReadSuite.scala | 6 ++---- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index 396fca47634ac..5a1ea6ea29e56 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql import scala.collection.mutable.ArrayBuffer -import org.apache.spark.SparkException import org.apache.spark.sql.catalyst.expressions.SubqueryExpression import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan, Sort} import org.apache.spark.sql.execution.{ColumnarToRowExec, ExecSubqueryExpression, FileSourceScanExec, InputAdapter, ReusedSubqueryExec, ScalarSubquery, SubqueryExec, WholeStageCodegenExec} @@ -147,12 +146,11 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } test("runtime error when the number of rows is greater than 1") { - val e = intercept[SparkException] { + val e = intercept[IllegalStateException] { sql("select (select a from (select 1 as a union all select 2 as a) t) as b").collect() } // TODO(SPARK-39167): Throw an exception w/ an error class for multiple rows from a subquery - assert(e.getErrorClass === "INTERNAL_ERROR") - assert(e.getCause.getMessage.contains( + assert(e.getMessage.contains( "more than one row returned by a subquery used as an expression")) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index 90aff26b7fe31..f068ab8a4e2b8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -23,7 +23,6 @@ import java.net.URI import org.apache.logging.log4j.Level import org.scalatest.PrivateMethodTester -import org.apache.spark.SparkException import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListenerJobStart} import org.apache.spark.sql.{Dataset, QueryTest, Row, SparkSession, Strategy} import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight} @@ -857,13 +856,12 @@ class AdaptiveQueryExecSuite df1.write.parquet(tableDir.getAbsolutePath) val aggregated = spark.table("bucketed_table").groupBy("i").count() - val error = intercept[SparkException] { + val error = intercept[IllegalStateException] { aggregated.count() } // TODO(SPARK-39163): Throw an exception w/ error class for an invalid bucket file - assert(error.getErrorClass === "INTERNAL_ERROR") - assert(error.getCause.toString contains "Invalid bucket file") - assert(error.getCause.getSuppressed.size === 0) + assert(error.toString contains "Invalid bucket file") + assert(error.getSuppressed.size === 0) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala index c3250f8d9fc0c..8d593a55a7ef5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala @@ -22,7 +22,6 @@ import java.net.URI import scala.util.Random -import org.apache.spark.SparkException import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.catalog.BucketSpec import org.apache.spark.sql.catalyst.expressions @@ -842,12 +841,11 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti df1.write.parquet(tableDir.getAbsolutePath) val aggregated = spark.table("bucketed_table").groupBy("i").count() - val e = intercept[SparkException] { + val e = intercept[IllegalStateException] { aggregated.count() } // TODO(SPARK-39163): Throw an exception w/ error class for an invalid bucket file - assert(e.getErrorClass === "INTERNAL_ERROR") - assert(e.getCause.toString contains "Invalid bucket file") + assert(e.toString contains "Invalid bucket file") } } From f74867bddfbcdd4d08076db36851e88b15e66556 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 9 Jun 2022 17:55:37 +0000 Subject: [PATCH 339/535] Preparing Spark release v3.3.0-rc6 --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 39 files changed, 41 insertions(+), 41 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 0e449e841cf6d..9479bb3bf87df 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.1 +Version: 3.3.0 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index d12f2ad73fabd..2e9c4d9960b14 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 842d63f5d3811..2a9acfa335e71 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index f7d187bf9527d..7b17e625d7599 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 53f38df885102..c5c920e774782 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 845f6659407bd..697b5a3928e58 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 8e1590891933b..ad2db11370ae7 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1987c13328559..1a7bdee70f3bc 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index c7e7be1e3bbf1..66dc93de0599e 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index ac644130a61e2..219ceca6648d8 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 9a6fe2d313fde..4966db6b4a8af 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.3.1-SNAPSHOT -SPARK_VERSION_SHORT: 3.3.1 +SPARK_VERSION: 3.3.0 +SPARK_VERSION_SHORT: 3.3.0 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.15" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.3.1"] + 'facetFilters': ["version:3.3.0"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index e97f3b40cb2bd..42e58f2726df1 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index 578854e3eaa9a..5aaa91cfdf20d 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 95e1ce74ca172..36309bb417362 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 0b803c5d3864a..072cedaa594c8 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 95726829bcbbd..b9063b543f512 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 25e7e25ae25b6..6f6a51a972c73 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 3ba16b7b838a2..95fd080383995 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 6cee275e6adc7..33cf30ff803e7 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index ad23da0d7f249..79b2e8f2a5a47 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 6de1f9eee532c..647d0c3f87552 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 95c8c312eb0e2..562ddc8dcc23c 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 4d87bd2730e3b..08bcae6e0f53f 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index 889f0b5a92e08..beceaecd31a1c 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 9b7b0370d3b4d..584a5df0a4a35 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 04a68a47a4f45..42bab72668c00 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/pom.xml b/pom.xml index 32bd40ee83461..5f7b1b0b9dc46 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 pom Spark Project Parent POM https://spark.apache.org/ diff --git a/python/pyspark/version.py b/python/pyspark/version.py index 99105d061cf74..36215e178c5c7 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__: str = "3.3.0.dev0" +__version__: str = "3.3.0" diff --git a/repl/pom.xml b/repl/pom.xml index 2e5f8bf5395a3..980b64c4dca8c 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index d1d6a449bd5dc..f3ec959370807 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 40e578f9a7eba..66ae5adfbd19f 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index f4ac384409174..1472bd0fcb1a2 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 61d5adec0e7cc..77811f35692d8 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 970d42ba4590e..ceba171e41134 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 0cfb5f616cd24..34137add48553 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 7024e0dcfab75..e1b725929a8fc 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index cc8d8796da601..8f1e9d2f3ccb1 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index c4977726a3cac..52273e7fa76e1 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 9bbcb7f322798..dadc9324f95a2 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1-SNAPSHOT + 3.3.0 ../pom.xml From 36c01df145072e0fee966b9b2fc6782f7636d862 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 9 Jun 2022 17:55:44 +0000 Subject: [PATCH 340/535] Preparing development version 3.3.1-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 39 files changed, 41 insertions(+), 41 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 9479bb3bf87df..0e449e841cf6d 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.0 +Version: 3.3.1 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 2e9c4d9960b14..d12f2ad73fabd 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 2a9acfa335e71..842d63f5d3811 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 7b17e625d7599..f7d187bf9527d 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index c5c920e774782..53f38df885102 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 697b5a3928e58..845f6659407bd 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index ad2db11370ae7..8e1590891933b 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1a7bdee70f3bc..1987c13328559 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 66dc93de0599e..c7e7be1e3bbf1 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 219ceca6648d8..ac644130a61e2 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 4966db6b4a8af..9a6fe2d313fde 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.3.0 -SPARK_VERSION_SHORT: 3.3.0 +SPARK_VERSION: 3.3.1-SNAPSHOT +SPARK_VERSION_SHORT: 3.3.1 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.15" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.3.0"] + 'facetFilters': ["version:3.3.1"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index 42e58f2726df1..e97f3b40cb2bd 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index 5aaa91cfdf20d..578854e3eaa9a 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 36309bb417362..95e1ce74ca172 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 072cedaa594c8..0b803c5d3864a 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index b9063b543f512..95726829bcbbd 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 6f6a51a972c73..25e7e25ae25b6 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 95fd080383995..3ba16b7b838a2 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 33cf30ff803e7..6cee275e6adc7 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index 79b2e8f2a5a47..ad23da0d7f249 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 647d0c3f87552..6de1f9eee532c 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 562ddc8dcc23c..95c8c312eb0e2 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 08bcae6e0f53f..4d87bd2730e3b 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index beceaecd31a1c..889f0b5a92e08 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 584a5df0a4a35..9b7b0370d3b4d 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 42bab72668c00..04a68a47a4f45 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 5f7b1b0b9dc46..32bd40ee83461 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT pom Spark Project Parent POM https://spark.apache.org/ diff --git a/python/pyspark/version.py b/python/pyspark/version.py index 36215e178c5c7..83c6d7125c367 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__: str = "3.3.0" +__version__: str = "3.3.1.dev0" diff --git a/repl/pom.xml b/repl/pom.xml index 980b64c4dca8c..2e5f8bf5395a3 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index f3ec959370807..d1d6a449bd5dc 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 66ae5adfbd19f..40e578f9a7eba 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 1472bd0fcb1a2..f4ac384409174 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 77811f35692d8..61d5adec0e7cc 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index ceba171e41134..970d42ba4590e 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 34137add48553..0cfb5f616cd24 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index e1b725929a8fc..7024e0dcfab75 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 8f1e9d2f3ccb1..cc8d8796da601 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 52273e7fa76e1..c4977726a3cac 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index dadc9324f95a2..9bbcb7f322798 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.0 + 3.3.1-SNAPSHOT ../pom.xml From ff048f1b69e5520c1fedbfd9869717f0b8919c0f Mon Sep 17 00:00:00 2001 From: Josh Rosen Date: Thu, 9 Jun 2022 12:34:27 -0700 Subject: [PATCH 341/535] [SPARK-39422][SQL] Improve error message for 'SHOW CREATE TABLE' with unsupported serdes ### What changes were proposed in this pull request? This PR improves the error message that is thrown when trying to run `SHOW CREATE TABLE` on a Hive table with an unsupported serde. Currently this results in an error like ``` org.apache.spark.sql.AnalysisException: Failed to execute SHOW CREATE TABLE against table rcFileTable, which is created by Hive and uses the following unsupported serde configuration SERDE: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe INPUTFORMAT: org.apache.hadoop.hive.ql.io.RCFileInputFormat OUTPUTFORMAT: org.apache.hadoop.hive.ql.io.RCFileOutputFormat ``` This patch improves this error message by adding a suggestion to use `SHOW CREATE TABLE ... AS SERDE`: ``` org.apache.spark.sql.AnalysisException: Failed to execute SHOW CREATE TABLE against table rcFileTable, which is created by Hive and uses the following unsupported serde configuration SERDE: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe INPUTFORMAT: org.apache.hadoop.hive.ql.io.RCFileInputFormat OUTPUTFORMAT: org.apache.hadoop.hive.ql.io.RCFileOutputFormat Please use `SHOW CREATE TABLE rcFileTable AS SERDE` to show Hive DDL instead. ``` The suggestion's wording is consistent with other error messages thrown by SHOW CREATE TABLE. ### Why are the changes needed? The existing error message is confusing. ### Does this PR introduce _any_ user-facing change? Yes, it improves a user-facing error message. ### How was this patch tested? Manually tested with ``` CREATE TABLE rcFileTable(i INT) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.RCFileOutputFormat' SHOW CREATE TABLE rcFileTable ``` to trigger the error. Confirmed that the `AS SERDE` suggestion actually works. Closes #36814 from JoshRosen/suggest-show-create-table-as-serde-in-error-message. Authored-by: Josh Rosen Signed-off-by: Josh Rosen (cherry picked from commit 8765eea1c08bc58a0cfc22b7cfbc0b5645cc81f9) Signed-off-by: Josh Rosen --- .../org/apache/spark/sql/errors/QueryCompilationErrors.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index ebf40b4b5d0bb..d877bb5b2a861 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1989,7 +1989,8 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { new AnalysisException("Failed to execute SHOW CREATE TABLE against table " + s"${table.identifier}, which is created by Hive and uses the " + "following unsupported serde configuration\n" + - builder.toString() + builder.toString() + "\n" + + s"Please use `SHOW CREATE TABLE ${table.identifier} AS SERDE` to show Hive DDL instead." ) } From 0361ee8cd2f65eb99e690804c6d415c46c12dfc3 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 9 Jun 2022 15:18:58 -0700 Subject: [PATCH 342/535] [SPARK-39431][DOCS][PYTHON] Update PySpark dependencies in Installation doc ### What changes were proposed in this pull request? This PR aims to update `PySpark dependencies` section in Installation document. - https://dist.apache.org/repos/dist/dev/spark/v3.3.0-rc5-docs/_site/api/python/getting_started/install.html#dependencies ### Why are the changes needed? Apache Spark 3.3 requires `numpy` 1.15. https://github.com/apache/spark/blob/8765eea1c08bc58a0cfc22b7cfbc0b5645cc81f9/python/setup.py#L270-L274 https://github.com/apache/spark/blob/8765eea1c08bc58a0cfc22b7cfbc0b5645cc81f9/python/setup.py#L264-L265 So, - We need to update `numpy` to 1.15 from 1.14 accordingly in documentation. - We had better remove the duplicated NumPy packages (with two versions) because both `MLlib` and `pandas API on Spark` requires the same version. - We should use package names consistently. ### Does this PR introduce _any_ user-facing change? This is a doc-only change. ### How was this patch tested? Manual review. Closes #36825 from dongjoon-hyun/SPARK-39431. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit b5c7f34b576d25aec292c65e7565360d67142227) Signed-off-by: Dongjoon Hyun --- python/docs/source/getting_started/install.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst index 479f8bf7cc88d..e5c1455da7af0 100644 --- a/python/docs/source/getting_started/install.rst +++ b/python/docs/source/getting_started/install.rst @@ -155,12 +155,11 @@ Dependencies Package Minimum supported version Note ============= ========================= ====================================== `pandas` 1.0.5 Optional for Spark SQL -`NumPy` 1.7 Required for MLlib DataFrame-based API `pyarrow` 1.0.0 Optional for Spark SQL -`Py4J` 0.10.9.5 Required +`py4j` 0.10.9.5 Required `pandas` 1.0.5 Required for pandas API on Spark `pyarrow` 1.0.0 Required for pandas API on Spark -`Numpy` 1.14 Required for pandas API on Spark +`numpy` 1.15 Required for pandas API on Spark and MLLib DataFrame-based API ============= ========================= ====================================== Note that PySpark requires Java 8 or later with ``JAVA_HOME`` properly set. From aba523c66086a8990d491cf5e9f27aadf39379a0 Mon Sep 17 00:00:00 2001 From: Takuya UESHIN Date: Fri, 10 Jun 2022 16:49:27 -0700 Subject: [PATCH 343/535] [SPARK-39419][SQL][3.3] Fix ArraySort to throw an exception when the comparator returns null MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Backport of #36812. Fixes `ArraySort` to throw an exception when the comparator returns `null`. Also updates the doc to follow the corrected behavior. ### Why are the changes needed? When the comparator of `ArraySort` returns `null`, currently it handles it as `0` (equal). According to the doc, ``` It returns -1, 0, or 1 as the first element is less than, equal to, or greater than the second element. If the comparator function returns other values (including null), the function will fail and raise an error. ``` It's fine to return non -1, 0, 1 integers to follow the Java convention (still need to update the doc, though), but it should throw an exception for `null` result. ### Does this PR introduce _any_ user-facing change? Yes, if a user uses a comparator that returns `null`, it will throw an error after this PR. The legacy flag `spark.sql.legacy.allowNullComparisonResultInArraySort` can be used to restore the legacy behavior that handles `null` as `0` (equal). ### How was this patch tested? Added some tests. Closes #36834 from ueshin/issues/SPARK-39419/3.3/array_sort. Authored-by: Takuya UESHIN Signed-off-by: Dongjoon Hyun --- .../main/resources/error/error-classes.json | 3 +++ .../expressions/higherOrderFunctions.scala | 26 +++++++++++++++---- .../sql/errors/QueryExecutionErrors.scala | 5 ++++ .../apache/spark/sql/internal/SQLConf.scala | 10 +++++++ .../HigherOrderFunctionsSuite.scala | 22 +++++++++++++++- 5 files changed, 60 insertions(+), 6 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 34588fae5a45c..2e32482328a7e 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -133,6 +133,9 @@ "message" : [ "PARTITION clause cannot contain the non-partition column: ." ], "sqlState" : "42000" }, + "NULL_COMPARISON_RESULT" : { + "message" : [ "The comparison result is null. If you want to handle null as 0 (equal), you can set \"spark.sql.legacy.allowNullComparisonResultInArraySort\" to \"true\"." ] + }, "PARSE_CHAR_MISSING_LENGTH" : { "message" : [ "DataType requires a length parameter, for example (10). Please specify the length." ], "sqlState" : "42000" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala index fa444a670f283..d56e761bd2f37 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala @@ -357,9 +357,9 @@ case class ArrayTransform( Since 3.0.0 this function also sorts and returns the array based on the given comparator function. The comparator will take two arguments representing two elements of the array. - It returns -1, 0, or 1 as the first element is less than, equal to, or greater - than the second element. If the comparator function returns other - values (including null), the function will fail and raise an error. + It returns a negative integer, 0, or a positive integer as the first element is less than, + equal to, or greater than the second element. If the comparator function returns null, + the function will fail and raise an error. """, examples = """ Examples: @@ -375,9 +375,17 @@ case class ArrayTransform( // scalastyle:on line.size.limit case class ArraySort( argument: Expression, - function: Expression) + function: Expression, + allowNullComparisonResult: Boolean) extends ArrayBasedSimpleHigherOrderFunction with CodegenFallback { + def this(argument: Expression, function: Expression) = { + this( + argument, + function, + SQLConf.get.getConf(SQLConf.LEGACY_ALLOW_NULL_COMPARISON_RESULT_IN_ARRAY_SORT)) + } + def this(argument: Expression) = this(argument, ArraySort.defaultComparator) @transient lazy val elementType: DataType = @@ -416,7 +424,11 @@ case class ArraySort( (o1: Any, o2: Any) => { firstElemVar.value.set(o1) secondElemVar.value.set(o2) - f.eval(inputRow).asInstanceOf[Int] + val cmp = f.eval(inputRow) + if (!allowNullComparisonResult && cmp == null) { + throw QueryExecutionErrors.nullComparisonResultError() + } + cmp.asInstanceOf[Int] } } @@ -437,6 +449,10 @@ case class ArraySort( object ArraySort { + def apply(argument: Expression, function: Expression): ArraySort = { + new ArraySort(argument, function) + } + def comparator(left: Expression, right: Expression): Expression = { val lit0 = Literal(0) val lit1 = Literal(1) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 21fe0b9267014..9e29acf04d2ef 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2065,4 +2065,9 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { s"add ${toSQLValue(amount, IntegerType)} $unit to " + s"${toSQLValue(DateTimeUtils.microsToInstant(micros), TimestampType)}")) } + + def nullComparisonResultError(): Throwable = { + new SparkException(errorClass = "NULL_COMPARISON_RESULT", + messageParameters = Array(), cause = null) + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index b6230f7138384..7f41e463d89a2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3748,6 +3748,16 @@ object SQLConf { .booleanConf .createWithDefault(false) + val LEGACY_ALLOW_NULL_COMPARISON_RESULT_IN_ARRAY_SORT = + buildConf("spark.sql.legacy.allowNullComparisonResultInArraySort") + .internal() + .doc("When set to false, `array_sort` function throws an error " + + "if the comparator function returns null. " + + "If set to true, it restores the legacy behavior that handles null as zero (equal).") + .version("3.2.2") + .booleanConf + .createWithDefault(false) + /** * Holds information about keys that have been deprecated. * diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala index c0db6d8dc29fa..b1c4c4414274c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.expressions -import org.apache.spark.SparkFunSuite +import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @@ -838,4 +838,24 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper Literal.create(Seq(Double.NaN, 1d, 2d, null), ArrayType(DoubleType))), Seq(1d, 2d, Double.NaN, null)) } + + test("SPARK-39419: ArraySort should throw an exception when the comparator returns null") { + val comparator = { + val comp = ArraySort.comparator _ + (left: Expression, right: Expression) => + If(comp(left, right) === 0, Literal.create(null, IntegerType), comp(left, right)) + } + + withSQLConf( + SQLConf.LEGACY_ALLOW_NULL_COMPARISON_RESULT_IN_ARRAY_SORT.key -> "false") { + checkExceptionInExpression[SparkException]( + arraySort(Literal.create(Seq(3, 1, 1, 2)), comparator), "The comparison result is null") + } + + withSQLConf( + SQLConf.LEGACY_ALLOW_NULL_COMPARISON_RESULT_IN_ARRAY_SORT.key -> "true") { + checkEvaluation(arraySort(Literal.create(Seq(3, 1, 1, 2)), comparator), + Seq(1, 1, 2, 3)) + } + } } From bcc646b6f3da194d44db36e68d57f0f0621f10fe Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 10 Jun 2022 19:50:13 -0700 Subject: [PATCH 344/535] [SPARK-39442][SQL][TESTS] Update `PlanStabilitySuite` comments with `SPARK_ANSI_SQL_MODE` ### What changes were proposed in this pull request? This PR aims to update `PlanStabilitySuite` direction to prevent future mistakes. 1. Add `SPARK_ANSI_SQL_MODE=true` explicitly because Apache Spark 3.3+ test coverage has ANSI and non-ANSI modes. We need to make it sure that both results are synced at the same time. ``` - SPARK_GENERATE_GOLDEN_FILES=1 build/sbt ... + SPARK_GENERATE_GOLDEN_FILES=1 build/sbt ... + SPARK_GENERATE_GOLDEN_FILES=1 SPARK_ANSI_SQL_MODE=true ... ``` 2. The existing commands are human-readable but is not working. So, we had better have more simple command which is *copy-and-pasteable*. ``` - build/sbt "sql/testOnly *PlanStability[WithStats]Suite" + build/sbt "sql/testOnly *PlanStability*Suite" ``` ### Why are the changes needed? This will help us update the test results more easily by preventing mistakes. ### Does this PR introduce _any_ user-facing change? No. This is a dev-only doc. ### How was this patch tested? Manual review. Closes #36839 from dongjoon-hyun/SPARK-39442. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit d426c10e94be162547fb8990434cc87bdff28380) Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/sql/PlanStabilitySuite.scala | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala index a0207e9b01920..d8caf80c9a961 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala @@ -48,22 +48,24 @@ import org.apache.spark.tags.ExtendedSQLTest * * To run the entire test suite: * {{{ - * build/sbt "sql/testOnly *PlanStability[WithStats]Suite" + * build/sbt "sql/testOnly *PlanStability*Suite" * }}} * * To run a single test file upon change: * {{{ - * build/sbt "sql/testOnly *PlanStability[WithStats]Suite -- -z (tpcds-v1.4/q49)" + * build/sbt "sql/testOnly *PlanStability*Suite -- -z (tpcds-v1.4/q49)" * }}} * * To re-generate golden files for entire suite, run: * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *PlanStability[WithStats]Suite" + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *PlanStability*Suite" + * SPARK_GENERATE_GOLDEN_FILES=1 SPARK_ANSI_SQL_MODE=true build/sbt "sql/testOnly *PlanStability*Suite" * }}} * * To re-generate golden file for a single test, run: * {{{ - * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *PlanStability[WithStats]Suite -- -z (tpcds-v1.4/q49)" + * SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/testOnly *PlanStability*Suite -- -z (tpcds-v1.4/q49)" + * SPARK_GENERATE_GOLDEN_FILES=1 SPARK_ANSI_SQL_MODE=true build/sbt "sql/testOnly *PlanStability*Suite -- -z (tpcds-v1.4/q49)" * }}} */ // scalastyle:on line.size.limit From 9a5eaa56f58efc91f86c3929b48e2baeaf6b4a3c Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Mon, 13 Jun 2022 10:31:10 -0700 Subject: [PATCH 345/535] [SPARK-39437][SQL][TEST][3.3] Normalize plan id separately in PlanStabilitySuite ### What changes were proposed in this pull request? In `PlanStabilitySuite`, we normalize expression IDs by matching `#\d+` in the explain string. However, this regex can match plan id in `Exchange` node as well, which will mess up the normalization if expression IDs and plan IDs overlap. This PR normalizes plan id separately in `PlanStabilitySuite`. ### Why are the changes needed? Make the plan golden file more stable. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? N/A backport https://github.com/apache/spark/pull/36827 Closes #36854 from cloud-fan/test2. Authored-by: Wenchen Fan Signed-off-by: Dongjoon Hyun --- .../sql/execution/exchange/Exchange.scala | 2 +- .../sql-tests/results/explain-aqe.sql.out | 42 +- .../sql-tests/results/explain-cbo.sql.out | 2 +- .../sql-tests/results/explain.sql.out | 38 +- .../q10.sf100/explain.txt | 164 ++-- .../approved-plans-modified/q10/explain.txt | 104 +-- .../q19.sf100/explain.txt | 134 ++-- .../approved-plans-modified/q19/explain.txt | 106 +-- .../q27.sf100/explain.txt | 174 ++-- .../approved-plans-modified/q27/explain.txt | 196 ++--- .../q3.sf100/explain.txt | 50 +- .../approved-plans-modified/q3/explain.txt | 48 +- .../q34.sf100/explain.txt | 88 +- .../approved-plans-modified/q34/explain.txt | 78 +- .../q42.sf100/explain.txt | 72 +- .../approved-plans-modified/q42/explain.txt | 48 +- .../q43.sf100/explain.txt | 72 +- .../approved-plans-modified/q43/explain.txt | 48 +- .../q46.sf100/explain.txt | 130 +-- .../approved-plans-modified/q46/explain.txt | 106 +-- .../q52.sf100/explain.txt | 72 +- .../approved-plans-modified/q52/explain.txt | 48 +- .../q53.sf100/explain.txt | 112 +-- .../approved-plans-modified/q53/explain.txt | 86 +- .../q55.sf100/explain.txt | 72 +- .../approved-plans-modified/q55/explain.txt | 48 +- .../q59.sf100/explain.txt | 126 +-- .../approved-plans-modified/q59/explain.txt | 126 +-- .../q63.sf100/explain.txt | 112 +-- .../approved-plans-modified/q63/explain.txt | 86 +- .../q65.sf100/explain.txt | 144 ++-- .../approved-plans-modified/q65/explain.txt | 134 ++-- .../q68.sf100/explain.txt | 126 +-- .../approved-plans-modified/q68/explain.txt | 106 +-- .../q7.sf100/explain.txt | 76 +- .../approved-plans-modified/q7/explain.txt | 88 +- .../q73.sf100/explain.txt | 78 +- .../approved-plans-modified/q73/explain.txt | 78 +- .../q79.sf100/explain.txt | 88 +- .../approved-plans-modified/q79/explain.txt | 70 +- .../q89.sf100/explain.txt | 76 +- .../approved-plans-modified/q89/explain.txt | 88 +- .../q98.sf100/explain.txt | 84 +- .../approved-plans-modified/q98/explain.txt | 64 +- .../ss_max.sf100/explain.txt | 16 +- .../ss_max/explain.txt | 16 +- .../approved-plans-v1_4/q1.sf100/explain.txt | 138 ++-- .../approved-plans-v1_4/q1/explain.txt | 126 +-- .../approved-plans-v1_4/q10.sf100/explain.txt | 170 ++-- .../approved-plans-v1_4/q10/explain.txt | 124 +-- .../approved-plans-v1_4/q11.sf100/explain.txt | 332 ++++---- .../approved-plans-v1_4/q11/explain.txt | 302 +++---- .../approved-plans-v1_4/q12.sf100/explain.txt | 88 +- .../approved-plans-v1_4/q12/explain.txt | 56 +- .../approved-plans-v1_4/q13.sf100/explain.txt | 94 +-- .../approved-plans-v1_4/q13/explain.txt | 98 +-- .../q14a.sf100/explain.txt | 582 +++++++------- .../approved-plans-v1_4/q14a/explain.txt | 436 +++++----- .../q14b.sf100/explain.txt | 514 ++++++------ .../approved-plans-v1_4/q14b/explain.txt | 380 ++++----- .../approved-plans-v1_4/q15.sf100/explain.txt | 94 +-- .../approved-plans-v1_4/q15/explain.txt | 70 +- .../approved-plans-v1_4/q16.sf100/explain.txt | 126 +-- .../approved-plans-v1_4/q16/explain.txt | 126 +-- .../approved-plans-v1_4/q17.sf100/explain.txt | 170 ++-- .../approved-plans-v1_4/q17/explain.txt | 146 ++-- .../approved-plans-v1_4/q18.sf100/explain.txt | 158 ++-- .../approved-plans-v1_4/q18/explain.txt | 138 ++-- .../approved-plans-v1_4/q19.sf100/explain.txt | 120 +-- .../approved-plans-v1_4/q19/explain.txt | 106 +-- .../approved-plans-v1_4/q2.sf100/explain.txt | 88 +- .../approved-plans-v1_4/q2/explain.txt | 88 +- .../approved-plans-v1_4/q20.sf100/explain.txt | 88 +- .../approved-plans-v1_4/q20/explain.txt | 56 +- .../approved-plans-v1_4/q21.sf100/explain.txt | 72 +- .../approved-plans-v1_4/q21/explain.txt | 76 +- .../approved-plans-v1_4/q22.sf100/explain.txt | 76 +- .../approved-plans-v1_4/q22/explain.txt | 56 +- .../q23a.sf100/explain.txt | 480 +++++------ .../approved-plans-v1_4/q23a/explain.txt | 330 ++++---- .../q23b.sf100/explain.txt | 658 +++++++-------- .../approved-plans-v1_4/q23b/explain.txt | 424 +++++----- .../q24a.sf100/explain.txt | 294 +++---- .../approved-plans-v1_4/q24a/explain.txt | 248 +++--- .../q24b.sf100/explain.txt | 294 +++---- .../approved-plans-v1_4/q24b/explain.txt | 248 +++--- .../approved-plans-v1_4/q25.sf100/explain.txt | 170 ++-- .../approved-plans-v1_4/q25/explain.txt | 146 ++-- .../approved-plans-v1_4/q26.sf100/explain.txt | 86 +- .../approved-plans-v1_4/q26/explain.txt | 88 +- .../approved-plans-v1_4/q27.sf100/explain.txt | 88 +- .../approved-plans-v1_4/q27/explain.txt | 88 +- .../approved-plans-v1_4/q28.sf100/explain.txt | 316 ++++---- .../approved-plans-v1_4/q28/explain.txt | 316 ++++---- .../approved-plans-v1_4/q29.sf100/explain.txt | 186 ++--- .../approved-plans-v1_4/q29/explain.txt | 162 ++-- .../approved-plans-v1_4/q3.sf100/explain.txt | 50 +- .../approved-plans-v1_4/q3/explain.txt | 48 +- .../approved-plans-v1_4/q30.sf100/explain.txt | 222 ++--- .../approved-plans-v1_4/q30/explain.txt | 136 ++-- .../approved-plans-v1_4/q31.sf100/explain.txt | 480 +++++------ .../approved-plans-v1_4/q31/explain.txt | 380 ++++----- .../approved-plans-v1_4/q32.sf100/explain.txt | 110 +-- .../approved-plans-v1_4/q32/explain.txt | 88 +- .../approved-plans-v1_4/q33.sf100/explain.txt | 206 ++--- .../approved-plans-v1_4/q33/explain.txt | 232 +++--- .../approved-plans-v1_4/q35.sf100/explain.txt | 182 ++--- .../approved-plans-v1_4/q35/explain.txt | 126 +-- .../approved-plans-v1_4/q36.sf100/explain.txt | 76 +- .../approved-plans-v1_4/q36/explain.txt | 76 +- .../approved-plans-v1_4/q37.sf100/explain.txt | 76 +- .../approved-plans-v1_4/q37/explain.txt | 44 +- .../approved-plans-v1_4/q38.sf100/explain.txt | 220 ++--- .../approved-plans-v1_4/q38/explain.txt | 138 ++-- .../q39a.sf100/explain.txt | 178 ++--- .../approved-plans-v1_4/q39a/explain.txt | 178 ++--- .../q39b.sf100/explain.txt | 178 ++--- .../approved-plans-v1_4/q39b/explain.txt | 178 ++--- .../approved-plans-v1_4/q4.sf100/explain.txt | 502 ++++++------ .../approved-plans-v1_4/q4/explain.txt | 460 +++++------ .../approved-plans-v1_4/q40.sf100/explain.txt | 116 +-- .../approved-plans-v1_4/q40/explain.txt | 116 +-- .../approved-plans-v1_4/q41.sf100/explain.txt | 16 +- .../approved-plans-v1_4/q41/explain.txt | 16 +- .../approved-plans-v1_4/q42.sf100/explain.txt | 50 +- .../approved-plans-v1_4/q42/explain.txt | 48 +- .../approved-plans-v1_4/q43.sf100/explain.txt | 72 +- .../approved-plans-v1_4/q43/explain.txt | 48 +- .../approved-plans-v1_4/q44.sf100/explain.txt | 132 +-- .../approved-plans-v1_4/q44/explain.txt | 136 ++-- .../approved-plans-v1_4/q45.sf100/explain.txt | 120 +-- .../approved-plans-v1_4/q45/explain.txt | 114 +-- .../approved-plans-v1_4/q46.sf100/explain.txt | 178 ++--- .../approved-plans-v1_4/q46/explain.txt | 106 +-- .../approved-plans-v1_4/q47.sf100/explain.txt | 160 ++-- .../approved-plans-v1_4/q47/explain.txt | 158 ++-- .../approved-plans-v1_4/q48.sf100/explain.txt | 78 +- .../approved-plans-v1_4/q48/explain.txt | 78 +- .../approved-plans-v1_4/q49.sf100/explain.txt | 336 ++++---- .../approved-plans-v1_4/q49/explain.txt | 316 ++++---- .../approved-plans-v1_4/q5.sf100/explain.txt | 270 +++---- .../approved-plans-v1_4/q5/explain.txt | 248 +++--- .../approved-plans-v1_4/q50.sf100/explain.txt | 106 +-- .../approved-plans-v1_4/q50/explain.txt | 86 +- .../approved-plans-v1_4/q51.sf100/explain.txt | 138 ++-- .../approved-plans-v1_4/q51/explain.txt | 138 ++-- .../approved-plans-v1_4/q52.sf100/explain.txt | 50 +- .../approved-plans-v1_4/q52/explain.txt | 48 +- .../approved-plans-v1_4/q53.sf100/explain.txt | 112 +-- .../approved-plans-v1_4/q53/explain.txt | 86 +- .../approved-plans-v1_4/q54.sf100/explain.txt | 298 +++---- .../approved-plans-v1_4/q54/explain.txt | 256 +++--- .../approved-plans-v1_4/q55.sf100/explain.txt | 42 +- .../approved-plans-v1_4/q55/explain.txt | 48 +- .../approved-plans-v1_4/q56.sf100/explain.txt | 232 +++--- .../approved-plans-v1_4/q56/explain.txt | 232 +++--- .../approved-plans-v1_4/q57.sf100/explain.txt | 160 ++-- .../approved-plans-v1_4/q57/explain.txt | 158 ++-- .../approved-plans-v1_4/q58.sf100/explain.txt | 216 ++--- .../approved-plans-v1_4/q58/explain.txt | 228 +++--- .../approved-plans-v1_4/q59.sf100/explain.txt | 120 +-- .../approved-plans-v1_4/q59/explain.txt | 120 +-- .../approved-plans-v1_4/q60.sf100/explain.txt | 232 +++--- .../approved-plans-v1_4/q60/explain.txt | 232 +++--- .../approved-plans-v1_4/q61.sf100/explain.txt | 188 ++--- .../approved-plans-v1_4/q61/explain.txt | 208 ++--- .../approved-plans-v1_4/q62.sf100/explain.txt | 80 +- .../approved-plans-v1_4/q62/explain.txt | 84 +- .../approved-plans-v1_4/q63.sf100/explain.txt | 112 +-- .../approved-plans-v1_4/q63/explain.txt | 86 +- .../approved-plans-v1_4/q65.sf100/explain.txt | 154 ++-- .../approved-plans-v1_4/q65/explain.txt | 134 ++-- .../approved-plans-v1_4/q66.sf100/explain.txt | 188 ++--- .../approved-plans-v1_4/q66/explain.txt | 194 ++--- .../approved-plans-v1_4/q67.sf100/explain.txt | 82 +- .../approved-plans-v1_4/q67/explain.txt | 76 +- .../approved-plans-v1_4/q68.sf100/explain.txt | 178 ++--- .../approved-plans-v1_4/q68/explain.txt | 106 +-- .../approved-plans-v1_4/q69.sf100/explain.txt | 162 ++-- .../approved-plans-v1_4/q69/explain.txt | 124 +-- .../approved-plans-v1_4/q7.sf100/explain.txt | 86 +- .../approved-plans-v1_4/q7/explain.txt | 88 +- .../approved-plans-v1_4/q70.sf100/explain.txt | 88 +- .../approved-plans-v1_4/q70/explain.txt | 94 +-- .../approved-plans-v1_4/q71.sf100/explain.txt | 146 ++-- .../approved-plans-v1_4/q71/explain.txt | 146 ++-- .../approved-plans-v1_4/q72.sf100/explain.txt | 244 +++--- .../approved-plans-v1_4/q72/explain.txt | 220 ++--- .../approved-plans-v1_4/q73.sf100/explain.txt | 88 +- .../approved-plans-v1_4/q73/explain.txt | 78 +- .../approved-plans-v1_4/q76.sf100/explain.txt | 128 +-- .../approved-plans-v1_4/q76/explain.txt | 108 +-- .../approved-plans-v1_4/q77.sf100/explain.txt | 348 ++++---- .../approved-plans-v1_4/q77/explain.txt | 348 ++++---- .../approved-plans-v1_4/q79.sf100/explain.txt | 88 +- .../approved-plans-v1_4/q79/explain.txt | 70 +- .../approved-plans-v1_4/q8.sf100/explain.txt | 154 ++-- .../approved-plans-v1_4/q8/explain.txt | 136 ++-- .../approved-plans-v1_4/q80.sf100/explain.txt | 414 +++++----- .../approved-plans-v1_4/q80/explain.txt | 414 +++++----- .../approved-plans-v1_4/q81.sf100/explain.txt | 230 +++--- .../approved-plans-v1_4/q81/explain.txt | 132 +-- .../approved-plans-v1_4/q82.sf100/explain.txt | 76 +- .../approved-plans-v1_4/q82/explain.txt | 44 +- .../approved-plans-v1_4/q83.ansi/explain.txt | 214 ++--- .../q83.sf100.ansi/explain.txt | 202 ++--- .../approved-plans-v1_4/q83.sf100/explain.txt | 202 ++--- .../approved-plans-v1_4/q83/explain.txt | 214 ++--- .../approved-plans-v1_4/q84.sf100/explain.txt | 86 +- .../approved-plans-v1_4/q84/explain.txt | 90 +-- .../approved-plans-v1_4/q85.sf100/explain.txt | 182 ++--- .../approved-plans-v1_4/q85/explain.txt | 178 ++--- .../approved-plans-v1_4/q86.sf100/explain.txt | 56 +- .../approved-plans-v1_4/q86/explain.txt | 56 +- .../approved-plans-v1_4/q87.sf100/explain.txt | 220 ++--- .../approved-plans-v1_4/q87/explain.txt | 138 ++-- .../approved-plans-v1_4/q88.sf100/explain.txt | 560 ++++++------- .../approved-plans-v1_4/q88/explain.txt | 560 ++++++------- .../approved-plans-v1_4/q89.sf100/explain.txt | 114 +-- .../approved-plans-v1_4/q89/explain.txt | 88 +- .../approved-plans-v1_4/q9.sf100/explain.txt | 154 ++-- .../approved-plans-v1_4/q9/explain.txt | 154 ++-- .../approved-plans-v1_4/q90.sf100/explain.txt | 132 +-- .../approved-plans-v1_4/q90/explain.txt | 132 +-- .../approved-plans-v1_4/q91.sf100/explain.txt | 142 ++-- .../approved-plans-v1_4/q91/explain.txt | 138 ++-- .../approved-plans-v1_4/q92.sf100/explain.txt | 128 +-- .../approved-plans-v1_4/q92/explain.txt | 98 +-- .../approved-plans-v1_4/q93.sf100/explain.txt | 54 +- .../approved-plans-v1_4/q93/explain.txt | 76 +- .../approved-plans-v1_4/q94.sf100/explain.txt | 126 +-- .../approved-plans-v1_4/q94/explain.txt | 126 +-- .../approved-plans-v1_4/q95.sf100/explain.txt | 174 ++-- .../approved-plans-v1_4/q95/explain.txt | 178 ++--- .../approved-plans-v1_4/q96.sf100/explain.txt | 56 +- .../approved-plans-v1_4/q96/explain.txt | 56 +- .../approved-plans-v1_4/q97.sf100/explain.txt | 86 +- .../approved-plans-v1_4/q97/explain.txt | 86 +- .../approved-plans-v1_4/q98.sf100/explain.txt | 96 +-- .../approved-plans-v1_4/q98/explain.txt | 64 +- .../approved-plans-v1_4/q99.sf100/explain.txt | 80 +- .../approved-plans-v1_4/q99/explain.txt | 84 +- .../q10a.sf100/explain.txt | 152 ++-- .../approved-plans-v2_7/q10a/explain.txt | 118 +-- .../approved-plans-v2_7/q11.sf100/explain.txt | 328 ++++---- .../approved-plans-v2_7/q11/explain.txt | 298 +++---- .../approved-plans-v2_7/q12.sf100/explain.txt | 88 +- .../approved-plans-v2_7/q12/explain.txt | 56 +- .../approved-plans-v2_7/q14.sf100/explain.txt | 514 ++++++------ .../approved-plans-v2_7/q14/explain.txt | 380 ++++----- .../q14a.sf100/explain.txt | 738 ++++++++--------- .../approved-plans-v2_7/q14a/explain.txt | 592 +++++++------- .../q18a.sf100/explain.txt | 504 ++++++------ .../approved-plans-v2_7/q18a/explain.txt | 450 +++++------ .../approved-plans-v2_7/q20.sf100/explain.txt | 88 +- .../approved-plans-v2_7/q20/explain.txt | 56 +- .../approved-plans-v2_7/q22.sf100/explain.txt | 70 +- .../approved-plans-v2_7/q22/explain.txt | 42 +- .../q22a.sf100/explain.txt | 224 +++--- .../approved-plans-v2_7/q22a/explain.txt | 176 ++-- .../approved-plans-v2_7/q24.sf100/explain.txt | 298 +++---- .../approved-plans-v2_7/q24/explain.txt | 256 +++--- .../q27a.sf100/explain.txt | 196 ++--- .../approved-plans-v2_7/q27a/explain.txt | 196 ++--- .../approved-plans-v2_7/q34.sf100/explain.txt | 88 +- .../approved-plans-v2_7/q34/explain.txt | 78 +- .../approved-plans-v2_7/q35.sf100/explain.txt | 182 ++--- .../approved-plans-v2_7/q35/explain.txt | 126 +-- .../q35a.sf100/explain.txt | 172 ++-- .../approved-plans-v2_7/q35a/explain.txt | 120 +-- .../q36a.sf100/explain.txt | 152 ++-- .../approved-plans-v2_7/q36a/explain.txt | 140 ++-- .../approved-plans-v2_7/q47.sf100/explain.txt | 160 ++-- .../approved-plans-v2_7/q47/explain.txt | 158 ++-- .../approved-plans-v2_7/q49.sf100/explain.txt | 336 ++++---- .../approved-plans-v2_7/q49/explain.txt | 316 ++++---- .../q51a.sf100/explain.txt | 306 +++---- .../approved-plans-v2_7/q51a/explain.txt | 294 +++---- .../approved-plans-v2_7/q57.sf100/explain.txt | 160 ++-- .../approved-plans-v2_7/q57/explain.txt | 158 ++-- .../approved-plans-v2_7/q5a.sf100/explain.txt | 350 ++++---- .../approved-plans-v2_7/q5a/explain.txt | 328 ++++---- .../approved-plans-v2_7/q6.sf100/explain.txt | 154 ++-- .../approved-plans-v2_7/q6/explain.txt | 168 ++-- .../approved-plans-v2_7/q64.sf100/explain.txt | 756 +++++++++--------- .../approved-plans-v2_7/q64/explain.txt | 660 +++++++-------- .../q67a.sf100/explain.txt | 346 ++++---- .../approved-plans-v2_7/q67a/explain.txt | 340 ++++---- .../q70a.sf100/explain.txt | 152 ++-- .../approved-plans-v2_7/q70a/explain.txt | 158 ++-- .../approved-plans-v2_7/q72.sf100/explain.txt | 244 +++--- .../approved-plans-v2_7/q72/explain.txt | 220 ++--- .../approved-plans-v2_7/q74.sf100/explain.txt | 328 ++++---- .../approved-plans-v2_7/q74/explain.txt | 298 +++---- .../approved-plans-v2_7/q75.sf100/explain.txt | 470 +++++------ .../approved-plans-v2_7/q75/explain.txt | 470 +++++------ .../q77a.sf100/explain.txt | 428 +++++----- .../approved-plans-v2_7/q77a/explain.txt | 428 +++++----- .../approved-plans-v2_7/q78.sf100/explain.txt | 282 +++---- .../approved-plans-v2_7/q78/explain.txt | 282 +++---- .../q80a.sf100/explain.txt | 494 ++++++------ .../approved-plans-v2_7/q80a/explain.txt | 494 ++++++------ .../q86a.sf100/explain.txt | 120 +-- .../approved-plans-v2_7/q86a/explain.txt | 120 +-- .../approved-plans-v2_7/q98.sf100/explain.txt | 92 +-- .../approved-plans-v2_7/q98/explain.txt | 60 +- .../tpch-plan-stability/q1/explain.txt | 12 +- .../tpch-plan-stability/q10/explain.txt | 70 +- .../tpch-plan-stability/q11/explain.txt | 98 +-- .../tpch-plan-stability/q12/explain.txt | 22 +- .../tpch-plan-stability/q13/explain.txt | 44 +- .../tpch-plan-stability/q14/explain.txt | 16 +- .../tpch-plan-stability/q15/explain.txt | 62 +- .../tpch-plan-stability/q16/explain.txt | 66 +- .../tpch-plan-stability/q17/explain.txt | 62 +- .../tpch-plan-stability/q18/explain.txt | 60 +- .../tpch-plan-stability/q19/explain.txt | 16 +- .../tpch-plan-stability/q2/explain.txt | 162 ++-- .../tpch-plan-stability/q20/explain.txt | 78 +- .../tpch-plan-stability/q21/explain.txt | 82 +- .../tpch-plan-stability/q22/explain.txt | 64 +- .../tpch-plan-stability/q3/explain.txt | 52 +- .../tpch-plan-stability/q4/explain.txt | 22 +- .../tpch-plan-stability/q5/explain.txt | 116 +-- .../tpch-plan-stability/q6/explain.txt | 6 +- .../tpch-plan-stability/q7/explain.txt | 102 +-- .../tpch-plan-stability/q8/explain.txt | 154 ++-- .../tpch-plan-stability/q9/explain.txt | 108 +-- .../apache/spark/sql/PlanStabilitySuite.scala | 11 +- .../apache/spark/sql/SQLQueryTestHelper.scala | 1 + .../sql/execution/debug/DebuggingSuite.scala | 4 +- 331 files changed, 27432 insertions(+), 27422 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala index 8ceaf88e875e7..c02beea4f879c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala @@ -37,7 +37,7 @@ abstract class Exchange extends UnaryExecNode { override def output: Seq[Attribute] = child.output final override val nodePatterns: Seq[TreePattern] = Seq(EXCHANGE) - override def stringArgs: Iterator[Any] = super.stringArgs ++ Iterator(s"[id=#$id]") + override def stringArgs: Iterator[Any] = super.stringArgs ++ Iterator(s"[plan_id=$id]") } /** diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out index f5e5b46d29ce6..5e9e9d6c7d6e4 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out @@ -67,10 +67,10 @@ Aggregate [sum(distinct val#x) AS sum(DISTINCT val)#xL] == Physical Plan == AdaptiveSparkPlan isFinalPlan=false +- HashAggregate(keys=[], functions=[sum(distinct val#x)], output=[sum(DISTINCT val)#xL]) - +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#x] + +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [plan_id=x] +- HashAggregate(keys=[], functions=[partial_sum(distinct val#x)], output=[sum#xL]) +- HashAggregate(keys=[val#x], functions=[], output=[val#x]) - +- Exchange hashpartitioning(val#x, 4), ENSURE_REQUIREMENTS, [id=#x] + +- Exchange hashpartitioning(val#x, 4), ENSURE_REQUIREMENTS, [plan_id=x] +- HashAggregate(keys=[val#x], functions=[], output=[val#x]) +- FileScan parquet default.explain_temp1[val#x] Batched: true, DataFilters: [], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp1], PartitionFilters: [], PushedFilters: [], ReadSchema: struct @@ -116,7 +116,7 @@ Results [2]: [key#x, max#x] (4) Exchange Input [2]: [key#x, max#x] -Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [plan_id=x] (5) HashAggregate Input [2]: [key#x, max#x] @@ -127,7 +127,7 @@ Results [2]: [key#x, max(val#x)#x AS max(val)#x] (6) Exchange Input [2]: [key#x, max(val)#x] -Arguments: rangepartitioning(key#x ASC NULLS FIRST, 4), ENSURE_REQUIREMENTS, [id=#x] +Arguments: rangepartitioning(key#x ASC NULLS FIRST, 4), ENSURE_REQUIREMENTS, [plan_id=x] (7) Sort Input [2]: [key#x, max(val)#x] @@ -178,7 +178,7 @@ Results [2]: [key#x, max#x] (4) Exchange Input [2]: [key#x, max#x] -Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [plan_id=x] (5) HashAggregate Input [2]: [key#x, max#x] @@ -249,7 +249,7 @@ Results [2]: [key#x, val#x] (7) Exchange Input [2]: [key#x, val#x] -Arguments: hashpartitioning(key#x, val#x, 4), ENSURE_REQUIREMENTS, [id=#x] +Arguments: hashpartitioning(key#x, val#x, 4), ENSURE_REQUIREMENTS, [plan_id=x] (8) HashAggregate Input [2]: [key#x, val#x] @@ -306,7 +306,7 @@ Condition : isnotnull(key#x) (5) BroadcastExchange Input [2]: [key#x, val#x] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#x] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=x] (6) BroadcastHashJoin Left keys [1]: [key#x] @@ -355,7 +355,7 @@ Condition : isnotnull(key#x) (4) BroadcastExchange Input [2]: [key#x, val#x] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#x] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=x] (5) BroadcastHashJoin Left keys [1]: [key#x] @@ -438,7 +438,7 @@ Results [1]: [max#x] (8) Exchange Input [1]: [max#x] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=x] (9) HashAggregate Input [1]: [max#x] @@ -485,7 +485,7 @@ Results [1]: [max#x] (15) Exchange Input [1]: [max#x] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=x] (16) HashAggregate Input [1]: [max#x] @@ -569,7 +569,7 @@ Results [1]: [max#x] (8) Exchange Input [1]: [max#x] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=x] (9) HashAggregate Input [1]: [max#x] @@ -616,7 +616,7 @@ Results [2]: [sum#x, count#xL] (15) Exchange Input [2]: [sum#x, count#xL] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=x] (16) HashAggregate Input [2]: [sum#x, count#xL] @@ -682,7 +682,7 @@ Results [2]: [sum#x, count#xL] (6) Exchange Input [2]: [sum#x, count#xL] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=x] (7) HashAggregate Input [2]: [sum#x, count#xL] @@ -718,7 +718,7 @@ Results [2]: [sum#x, count#xL] (11) Exchange Input [2]: [sum#x, count#xL] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=x] (12) HashAggregate Input [2]: [sum#x, count#xL] @@ -777,7 +777,7 @@ Condition : (isnotnull(key#x) AND (key#x > 10)) (5) BroadcastExchange Input [2]: [key#x, val#x] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#x] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=x] (6) BroadcastHashJoin Left keys [1]: [key#x] @@ -837,7 +837,7 @@ Results [2]: [key#x, max#x] (4) Exchange Input [2]: [key#x, max#x] -Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [plan_id=x] (5) HashAggregate Input [2]: [key#x, max#x] @@ -866,7 +866,7 @@ Results [2]: [key#x, max#x] (9) Exchange Input [2]: [key#x, max#x] -Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [plan_id=x] (10) HashAggregate Input [2]: [key#x, max#x] @@ -877,7 +877,7 @@ Results [2]: [key#x, max(val#x)#x AS max(val)#x] (11) BroadcastExchange Input [2]: [key#x, max(val)#x] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#x] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=x] (12) BroadcastHashJoin Left keys [1]: [key#x] @@ -964,7 +964,7 @@ Results [3]: [count#xL, sum#xL, count#xL] (3) Exchange Input [3]: [count#xL, sum#xL, count#xL] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=x] (4) HashAggregate Input [3]: [count#xL, sum#xL, count#xL] @@ -1009,7 +1009,7 @@ Results [2]: [key#x, buf#x] (3) Exchange Input [2]: [key#x, buf#x] -Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [plan_id=x] (4) ObjectHashAggregate Input [2]: [key#x, buf#x] @@ -1060,7 +1060,7 @@ Results [2]: [key#x, min#x] (4) Exchange Input [2]: [key#x, min#x] -Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [plan_id=x] (5) Sort Input [2]: [key#x, min#x] diff --git a/sql/core/src/test/resources/sql-tests/results/explain-cbo.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-cbo.sql.out index 086b4d2e0b194..902af430b55ad 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain-cbo.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain-cbo.sql.out @@ -72,7 +72,7 @@ AdaptiveSparkPlan isFinalPlan=false : +- HashAggregate(keys=[], functions=[max(csales#xL)], output=[tpcds_cmax#xL]) : +- HashAggregate(keys=[], functions=[partial_max(csales#xL)], output=[max#xL]) : +- HashAggregate(keys=[], functions=[sum(b#x)], output=[csales#xL]) - : +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#x] + : +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [plan_id=x] : +- HashAggregate(keys=[], functions=[partial_sum(b#x)], output=[sum#xL]) : +- Project [b#x] : +- Filter (isnotnull(a#x) AND (a#x < 100)) diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out index 4e552d51a3953..71237166dc1b3 100644 --- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out @@ -66,10 +66,10 @@ Aggregate [sum(distinct val#x) AS sum(DISTINCT val)#xL] == Physical Plan == *HashAggregate(keys=[], functions=[sum(distinct val#x)], output=[sum(DISTINCT val)#xL]) -+- Exchange SinglePartition, ENSURE_REQUIREMENTS, [id=#x] ++- Exchange SinglePartition, ENSURE_REQUIREMENTS, [plan_id=x] +- *HashAggregate(keys=[], functions=[partial_sum(distinct val#x)], output=[sum#xL]) +- *HashAggregate(keys=[val#x], functions=[], output=[val#x]) - +- Exchange hashpartitioning(val#x, 4), ENSURE_REQUIREMENTS, [id=#x] + +- Exchange hashpartitioning(val#x, 4), ENSURE_REQUIREMENTS, [plan_id=x] +- *HashAggregate(keys=[val#x], functions=[], output=[val#x]) +- *ColumnarToRow +- FileScan parquet default.explain_temp1[val#x] Batched: true, DataFilters: [], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp1], PartitionFilters: [], PushedFilters: [], ReadSchema: struct @@ -119,7 +119,7 @@ Results [2]: [key#x, max#x] (5) Exchange Input [2]: [key#x, max#x] -Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [plan_id=x] (6) HashAggregate [codegen id : 2] Input [2]: [key#x, max#x] @@ -130,7 +130,7 @@ Results [2]: [key#x, max(val#x)#x AS max(val)#x] (7) Exchange Input [2]: [key#x, max(val)#x] -Arguments: rangepartitioning(key#x ASC NULLS FIRST, 4), ENSURE_REQUIREMENTS, [id=#x] +Arguments: rangepartitioning(key#x ASC NULLS FIRST, 4), ENSURE_REQUIREMENTS, [plan_id=x] (8) Sort [codegen id : 3] Input [2]: [key#x, max(val)#x] @@ -180,7 +180,7 @@ Results [2]: [key#x, max#x] (5) Exchange Input [2]: [key#x, max#x] -Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [plan_id=x] (6) HashAggregate [codegen id : 2] Input [2]: [key#x, max#x] @@ -254,7 +254,7 @@ Results [2]: [key#x, val#x] (9) Exchange Input [2]: [key#x, val#x] -Arguments: hashpartitioning(key#x, val#x, 4), ENSURE_REQUIREMENTS, [id=#x] +Arguments: hashpartitioning(key#x, val#x, 4), ENSURE_REQUIREMENTS, [plan_id=x] (10) HashAggregate [codegen id : 4] Input [2]: [key#x, val#x] @@ -314,7 +314,7 @@ Condition : isnotnull(key#x) (7) BroadcastExchange Input [2]: [key#x, val#x] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#x] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=x] (8) BroadcastHashJoin [codegen id : 2] Left keys [1]: [key#x] @@ -366,7 +366,7 @@ Condition : isnotnull(key#x) (6) BroadcastExchange Input [2]: [key#x, val#x] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#x] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=x] (7) BroadcastHashJoin [codegen id : 2] Left keys [1]: [key#x] @@ -447,7 +447,7 @@ Results [1]: [max#x] (9) Exchange Input [1]: [max#x] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=x] (10) HashAggregate [codegen id : 2] Input [1]: [max#x] @@ -493,7 +493,7 @@ Results [1]: [max#x] (16) Exchange Input [1]: [max#x] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=x] (17) HashAggregate [codegen id : 2] Input [1]: [max#x] @@ -575,7 +575,7 @@ Results [1]: [max#x] (9) Exchange Input [1]: [max#x] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=x] (10) HashAggregate [codegen id : 2] Input [1]: [max#x] @@ -621,7 +621,7 @@ Results [2]: [sum#x, count#xL] (16) Exchange Input [2]: [sum#x, count#xL] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=x] (17) HashAggregate [codegen id : 2] Input [2]: [sum#x, count#xL] @@ -685,7 +685,7 @@ Results [2]: [sum#x, count#xL] (7) Exchange Input [2]: [sum#x, count#xL] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=x] (8) HashAggregate [codegen id : 2] Input [2]: [sum#x, count#xL] @@ -749,7 +749,7 @@ Condition : (isnotnull(key#x) AND (key#x > 10)) (7) BroadcastExchange Input [2]: [key#x, val#x] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#x] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=x] (8) BroadcastHashJoin [codegen id : 2] Left keys [1]: [key#x] @@ -805,7 +805,7 @@ Results [2]: [key#x, max#x] (5) Exchange Input [2]: [key#x, max#x] -Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [plan_id=x] (6) HashAggregate [codegen id : 4] Input [2]: [key#x, max#x] @@ -826,7 +826,7 @@ Results [2]: [key#x, max(val#x)#x AS max(val)#x] (9) BroadcastExchange Input [2]: [key#x, max(val)#x] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#x] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=x] (10) BroadcastHashJoin [codegen id : 4] Left keys [1]: [key#x] @@ -912,7 +912,7 @@ Results [3]: [count#xL, sum#xL, count#xL] (4) Exchange Input [3]: [count#xL, sum#xL, count#xL] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#x] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=x] (5) HashAggregate [codegen id : 2] Input [3]: [count#xL, sum#xL, count#xL] @@ -956,7 +956,7 @@ Results [2]: [key#x, buf#x] (4) Exchange Input [2]: [key#x, buf#x] -Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [plan_id=x] (5) ObjectHashAggregate Input [2]: [key#x, buf#x] @@ -1006,7 +1006,7 @@ Results [2]: [key#x, min#x] (5) Exchange Input [2]: [key#x, min#x] -Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [id=#x] +Arguments: hashpartitioning(key#x, 4), ENSURE_REQUIREMENTS, [plan_id=x] (6) Sort [codegen id : 2] Input [2]: [key#x, min#x] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/explain.txt index 02ff6fcce531d..6c0040711fe53 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/explain.txt @@ -65,119 +65,119 @@ Condition : ((isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#3)) AND (4) Exchange Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] -Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#4] +Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0 (6) Scan parquet default.web_sales -Output [2]: [ws_bill_customer_sk#5, ws_sold_date_sk#6] +Output [2]: [ws_bill_customer_sk#4, ws_sold_date_sk#5] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#6), dynamicpruningexpression(ws_sold_date_sk#6 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ws_sold_date_sk#5), dynamicpruningexpression(ws_sold_date_sk#5 IN dynamicpruning#6)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (7) ColumnarToRow [codegen id : 4] -Input [2]: [ws_bill_customer_sk#5, ws_sold_date_sk#6] +Input [2]: [ws_bill_customer_sk#4, ws_sold_date_sk#5] (8) Filter [codegen id : 4] -Input [2]: [ws_bill_customer_sk#5, ws_sold_date_sk#6] -Condition : isnotnull(ws_bill_customer_sk#5) +Input [2]: [ws_bill_customer_sk#4, ws_sold_date_sk#5] +Condition : isnotnull(ws_bill_customer_sk#4) (9) ReusedExchange [Reuses operator id: 53] -Output [1]: [d_date_sk#8] +Output [1]: [d_date_sk#7] (10) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ws_sold_date_sk#6] -Right keys [1]: [d_date_sk#8] +Left keys [1]: [ws_sold_date_sk#5] +Right keys [1]: [d_date_sk#7] Join condition: None (11) Project [codegen id : 4] -Output [1]: [ws_bill_customer_sk#5 AS customer_sk#9] -Input [3]: [ws_bill_customer_sk#5, ws_sold_date_sk#6, d_date_sk#8] +Output [1]: [ws_bill_customer_sk#4 AS customer_sk#8] +Input [3]: [ws_bill_customer_sk#4, ws_sold_date_sk#5, d_date_sk#7] (12) Scan parquet default.catalog_sales -Output [2]: [cs_ship_customer_sk#10, cs_sold_date_sk#11] +Output [2]: [cs_ship_customer_sk#9, cs_sold_date_sk#10] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#11), dynamicpruningexpression(cs_sold_date_sk#11 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(cs_sold_date_sk#10), dynamicpruningexpression(cs_sold_date_sk#10 IN dynamicpruning#6)] PushedFilters: [IsNotNull(cs_ship_customer_sk)] ReadSchema: struct (13) ColumnarToRow [codegen id : 6] -Input [2]: [cs_ship_customer_sk#10, cs_sold_date_sk#11] +Input [2]: [cs_ship_customer_sk#9, cs_sold_date_sk#10] (14) Filter [codegen id : 6] -Input [2]: [cs_ship_customer_sk#10, cs_sold_date_sk#11] -Condition : isnotnull(cs_ship_customer_sk#10) +Input [2]: [cs_ship_customer_sk#9, cs_sold_date_sk#10] +Condition : isnotnull(cs_ship_customer_sk#9) (15) ReusedExchange [Reuses operator id: 53] -Output [1]: [d_date_sk#12] +Output [1]: [d_date_sk#11] (16) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_sold_date_sk#11] -Right keys [1]: [d_date_sk#12] +Left keys [1]: [cs_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] Join condition: None (17) Project [codegen id : 6] -Output [1]: [cs_ship_customer_sk#10 AS customer_sk#13] -Input [3]: [cs_ship_customer_sk#10, cs_sold_date_sk#11, d_date_sk#12] +Output [1]: [cs_ship_customer_sk#9 AS customer_sk#12] +Input [3]: [cs_ship_customer_sk#9, cs_sold_date_sk#10, d_date_sk#11] (18) Union (19) Exchange -Input [1]: [customer_sk#9] -Arguments: hashpartitioning(customer_sk#9, 5), ENSURE_REQUIREMENTS, [id=#14] +Input [1]: [customer_sk#8] +Arguments: hashpartitioning(customer_sk#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] (20) Sort [codegen id : 7] -Input [1]: [customer_sk#9] -Arguments: [customer_sk#9 ASC NULLS FIRST], false, 0 +Input [1]: [customer_sk#8] +Arguments: [customer_sk#8 ASC NULLS FIRST], false, 0 (21) SortMergeJoin [codegen id : 8] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [customer_sk#9] +Right keys [1]: [customer_sk#8] Join condition: None (22) Scan parquet default.store_sales -Output [2]: [ss_customer_sk#15, ss_sold_date_sk#16] +Output [2]: [ss_customer_sk#13, ss_sold_date_sk#14] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#16), dynamicpruningexpression(ss_sold_date_sk#16 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ss_sold_date_sk#14), dynamicpruningexpression(ss_sold_date_sk#14 IN dynamicpruning#6)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 10] -Input [2]: [ss_customer_sk#15, ss_sold_date_sk#16] +Input [2]: [ss_customer_sk#13, ss_sold_date_sk#14] (24) Filter [codegen id : 10] -Input [2]: [ss_customer_sk#15, ss_sold_date_sk#16] -Condition : isnotnull(ss_customer_sk#15) +Input [2]: [ss_customer_sk#13, ss_sold_date_sk#14] +Condition : isnotnull(ss_customer_sk#13) (25) ReusedExchange [Reuses operator id: 53] -Output [1]: [d_date_sk#17] +Output [1]: [d_date_sk#15] (26) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_sold_date_sk#16] -Right keys [1]: [d_date_sk#17] +Left keys [1]: [ss_sold_date_sk#14] +Right keys [1]: [d_date_sk#15] Join condition: None (27) Project [codegen id : 10] -Output [1]: [ss_customer_sk#15 AS customer_sk#18] -Input [3]: [ss_customer_sk#15, ss_sold_date_sk#16, d_date_sk#17] +Output [1]: [ss_customer_sk#13 AS customer_sk#16] +Input [3]: [ss_customer_sk#13, ss_sold_date_sk#14, d_date_sk#15] (28) Exchange -Input [1]: [customer_sk#18] -Arguments: hashpartitioning(customer_sk#18, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [1]: [customer_sk#16] +Arguments: hashpartitioning(customer_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=3] (29) Sort [codegen id : 11] -Input [1]: [customer_sk#18] -Arguments: [customer_sk#18 ASC NULLS FIRST], false, 0 +Input [1]: [customer_sk#16] +Arguments: [customer_sk#16 ASC NULLS FIRST], false, 0 (30) SortMergeJoin [codegen id : 13] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [customer_sk#18] +Right keys [1]: [customer_sk#16] Join condition: None (31) Project [codegen id : 13] @@ -185,88 +185,88 @@ Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] (32) Scan parquet default.customer_address -Output [2]: [ca_address_sk#20, ca_county#21] +Output [2]: [ca_address_sk#17, ca_county#18] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_county, [Dona Ana County,Douglas County,Gaines County,Richland County,Walker County]), IsNotNull(ca_address_sk)] ReadSchema: struct (33) ColumnarToRow [codegen id : 12] -Input [2]: [ca_address_sk#20, ca_county#21] +Input [2]: [ca_address_sk#17, ca_county#18] (34) Filter [codegen id : 12] -Input [2]: [ca_address_sk#20, ca_county#21] -Condition : (ca_county#21 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#20)) +Input [2]: [ca_address_sk#17, ca_county#18] +Condition : (ca_county#18 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#17)) (35) Project [codegen id : 12] -Output [1]: [ca_address_sk#20] -Input [2]: [ca_address_sk#20, ca_county#21] +Output [1]: [ca_address_sk#17] +Input [2]: [ca_address_sk#17, ca_county#18] (36) BroadcastExchange -Input [1]: [ca_address_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Input [1]: [ca_address_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (37) BroadcastHashJoin [codegen id : 13] Left keys [1]: [c_current_addr_sk#3] -Right keys [1]: [ca_address_sk#20] +Right keys [1]: [ca_address_sk#17] Join condition: None (38) Project [codegen id : 13] Output [1]: [c_current_cdemo_sk#2] -Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#20] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#17] (39) BroadcastExchange Input [1]: [c_current_cdemo_sk#2] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (40) Scan parquet default.customer_demographics -Output [9]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Output [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (41) ColumnarToRow -Input [9]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] (42) Filter -Input [9]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] -Condition : isnotnull(cd_demo_sk#24) +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Condition : isnotnull(cd_demo_sk#19) (43) BroadcastHashJoin [codegen id : 14] Left keys [1]: [c_current_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#24] +Right keys [1]: [cd_demo_sk#19] Join condition: None (44) Project [codegen id : 14] -Output [8]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] -Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Output [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] (45) HashAggregate [codegen id : 14] -Input [8]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] -Keys [8]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Input [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#33] -Results [9]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32, count#34] +Aggregate Attributes [1]: [count#28] +Results [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] (46) Exchange -Input [9]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32, count#34] -Arguments: hashpartitioning(cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32, 5), ENSURE_REQUIREMENTS, [id=#35] +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] +Arguments: hashpartitioning(cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, 5), ENSURE_REQUIREMENTS, [plan_id=6] (47) HashAggregate [codegen id : 15] -Input [9]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32, count#34] -Keys [8]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#36] -Results [14]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, count(1)#36 AS cnt1#37, cd_purchase_estimate#28, count(1)#36 AS cnt2#38, cd_credit_rating#29, count(1)#36 AS cnt3#39, cd_dep_count#30, count(1)#36 AS cnt4#40, cd_dep_employed_count#31, count(1)#36 AS cnt5#41, cd_dep_college_count#32, count(1)#36 AS cnt6#42] +Aggregate Attributes [1]: [count(1)#30] +Results [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, count(1)#30 AS cnt1#31, cd_purchase_estimate#23, count(1)#30 AS cnt2#32, cd_credit_rating#24, count(1)#30 AS cnt3#33, cd_dep_count#25, count(1)#30 AS cnt4#34, cd_dep_employed_count#26, count(1)#30 AS cnt5#35, cd_dep_college_count#27, count(1)#30 AS cnt6#36] (48) TakeOrderedAndProject -Input [14]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cnt1#37, cd_purchase_estimate#28, cnt2#38, cd_credit_rating#29, cnt3#39, cd_dep_count#30, cnt4#40, cd_dep_employed_count#31, cnt5#41, cd_dep_college_count#32, cnt6#42] -Arguments: 100, [cd_gender#25 ASC NULLS FIRST, cd_marital_status#26 ASC NULLS FIRST, cd_education_status#27 ASC NULLS FIRST, cd_purchase_estimate#28 ASC NULLS FIRST, cd_credit_rating#29 ASC NULLS FIRST, cd_dep_count#30 ASC NULLS FIRST, cd_dep_employed_count#31 ASC NULLS FIRST, cd_dep_college_count#32 ASC NULLS FIRST], [cd_gender#25, cd_marital_status#26, cd_education_status#27, cnt1#37, cd_purchase_estimate#28, cnt2#38, cd_credit_rating#29, cnt3#39, cd_dep_count#30, cnt4#40, cd_dep_employed_count#31, cnt5#41, cd_dep_college_count#32, cnt6#42] +Input [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#31, cd_purchase_estimate#23, cnt2#32, cd_credit_rating#24, cnt3#33, cd_dep_count#25, cnt4#34, cd_dep_employed_count#26, cnt5#35, cd_dep_college_count#27, cnt6#36] +Arguments: 100, [cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_education_status#22 ASC NULLS FIRST, cd_purchase_estimate#23 ASC NULLS FIRST, cd_credit_rating#24 ASC NULLS FIRST, cd_dep_count#25 ASC NULLS FIRST, cd_dep_employed_count#26 ASC NULLS FIRST, cd_dep_college_count#27 ASC NULLS FIRST], [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#31, cd_purchase_estimate#23, cnt2#32, cd_credit_rating#24, cnt3#33, cd_dep_count#25, cnt4#34, cd_dep_employed_count#26, cnt5#35, cd_dep_college_count#27, cnt6#36] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ws_sold_date_sk#6 IN dynamicpruning#7 +Subquery:1 Hosting operator id = 6 Hosting Expression = ws_sold_date_sk#5 IN dynamicpruning#6 BroadcastExchange (53) +- * Project (52) +- * Filter (51) @@ -275,29 +275,29 @@ BroadcastExchange (53) (49) Scan parquet default.date_dim -Output [3]: [d_date_sk#8, d_year#43, d_moy#44] +Output [3]: [d_date_sk#7, d_year#37, d_moy#38] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,7), IsNotNull(d_date_sk)] ReadSchema: struct (50) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#43, d_moy#44] +Input [3]: [d_date_sk#7, d_year#37, d_moy#38] (51) Filter [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#43, d_moy#44] -Condition : (((((isnotnull(d_year#43) AND isnotnull(d_moy#44)) AND (d_year#43 = 2002)) AND (d_moy#44 >= 4)) AND (d_moy#44 <= 7)) AND isnotnull(d_date_sk#8)) +Input [3]: [d_date_sk#7, d_year#37, d_moy#38] +Condition : (((((isnotnull(d_year#37) AND isnotnull(d_moy#38)) AND (d_year#37 = 2002)) AND (d_moy#38 >= 4)) AND (d_moy#38 <= 7)) AND isnotnull(d_date_sk#7)) (52) Project [codegen id : 1] -Output [1]: [d_date_sk#8] -Input [3]: [d_date_sk#8, d_year#43, d_moy#44] +Output [1]: [d_date_sk#7] +Input [3]: [d_date_sk#7, d_year#37, d_moy#38] (53) BroadcastExchange -Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#45] +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] -Subquery:2 Hosting operator id = 12 Hosting Expression = cs_sold_date_sk#11 IN dynamicpruning#7 +Subquery:2 Hosting operator id = 12 Hosting Expression = cs_sold_date_sk#10 IN dynamicpruning#6 -Subquery:3 Hosting operator id = 22 Hosting Expression = ss_sold_date_sk#16 IN dynamicpruning#7 +Subquery:3 Hosting operator id = 22 Hosting Expression = ss_sold_date_sk#14 IN dynamicpruning#6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/explain.txt index a565ac5010026..3494945bb469f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/explain.txt @@ -117,7 +117,7 @@ Input [3]: [cs_ship_customer_sk#9, cs_sold_date_sk#10, d_date_sk#11] (17) BroadcastExchange Input [1]: [customer_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (18) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#1] @@ -125,39 +125,39 @@ Right keys [1]: [customer_sk#8] Join condition: None (19) Scan parquet default.store_sales -Output [2]: [ss_customer_sk#14, ss_sold_date_sk#15] +Output [2]: [ss_customer_sk#13, ss_sold_date_sk#14] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#15), dynamicpruningexpression(ss_sold_date_sk#15 IN dynamicpruning#6)] +PartitionFilters: [isnotnull(ss_sold_date_sk#14), dynamicpruningexpression(ss_sold_date_sk#14 IN dynamicpruning#6)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (20) ColumnarToRow [codegen id : 6] -Input [2]: [ss_customer_sk#14, ss_sold_date_sk#15] +Input [2]: [ss_customer_sk#13, ss_sold_date_sk#14] (21) Filter [codegen id : 6] -Input [2]: [ss_customer_sk#14, ss_sold_date_sk#15] -Condition : isnotnull(ss_customer_sk#14) +Input [2]: [ss_customer_sk#13, ss_sold_date_sk#14] +Condition : isnotnull(ss_customer_sk#13) (22) ReusedExchange [Reuses operator id: 49] -Output [1]: [d_date_sk#16] +Output [1]: [d_date_sk#15] (23) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_sold_date_sk#15] -Right keys [1]: [d_date_sk#16] +Left keys [1]: [ss_sold_date_sk#14] +Right keys [1]: [d_date_sk#15] Join condition: None (24) Project [codegen id : 6] -Output [1]: [ss_customer_sk#14 AS customer_sk#17] -Input [3]: [ss_customer_sk#14, ss_sold_date_sk#15, d_date_sk#16] +Output [1]: [ss_customer_sk#13 AS customer_sk#16] +Input [3]: [ss_customer_sk#13, ss_sold_date_sk#14, d_date_sk#15] (25) BroadcastExchange -Input [1]: [customer_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [1]: [customer_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (26) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [customer_sk#17] +Right keys [1]: [customer_sk#16] Join condition: None (27) Project [codegen id : 9] @@ -165,84 +165,84 @@ Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] (28) Scan parquet default.customer_address -Output [2]: [ca_address_sk#19, ca_county#20] +Output [2]: [ca_address_sk#17, ca_county#18] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_county, [Dona Ana County,Douglas County,Gaines County,Richland County,Walker County]), IsNotNull(ca_address_sk)] ReadSchema: struct (29) ColumnarToRow [codegen id : 7] -Input [2]: [ca_address_sk#19, ca_county#20] +Input [2]: [ca_address_sk#17, ca_county#18] (30) Filter [codegen id : 7] -Input [2]: [ca_address_sk#19, ca_county#20] -Condition : (ca_county#20 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#19)) +Input [2]: [ca_address_sk#17, ca_county#18] +Condition : (ca_county#18 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#17)) (31) Project [codegen id : 7] -Output [1]: [ca_address_sk#19] -Input [2]: [ca_address_sk#19, ca_county#20] +Output [1]: [ca_address_sk#17] +Input [2]: [ca_address_sk#17, ca_county#18] (32) BroadcastExchange -Input [1]: [ca_address_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] +Input [1]: [ca_address_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (33) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_current_addr_sk#3] -Right keys [1]: [ca_address_sk#19] +Right keys [1]: [ca_address_sk#17] Join condition: None (34) Project [codegen id : 9] Output [1]: [c_current_cdemo_sk#2] -Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#19] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#17] (35) Scan parquet default.customer_demographics -Output [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Output [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (36) ColumnarToRow [codegen id : 8] -Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] (37) Filter [codegen id : 8] -Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Condition : isnotnull(cd_demo_sk#22) +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Condition : isnotnull(cd_demo_sk#19) (38) BroadcastExchange -Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] +Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (39) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_current_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#22] +Right keys [1]: [cd_demo_sk#19] Join condition: None (40) Project [codegen id : 9] -Output [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Output [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] (41) HashAggregate [codegen id : 9] -Input [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Keys [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Input [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#32] -Results [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] +Aggregate Attributes [1]: [count#28] +Results [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] (42) Exchange -Input [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] -Arguments: hashpartitioning(cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] +Arguments: hashpartitioning(cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, 5), ENSURE_REQUIREMENTS, [plan_id=5] (43) HashAggregate [codegen id : 10] -Input [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] -Keys [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29] +Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#35] -Results [14]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, count(1)#35 AS cnt1#36, cd_purchase_estimate#26, count(1)#35 AS cnt2#37, cd_credit_rating#27, count(1)#35 AS cnt3#38, cd_dep_count#28, count(1)#35 AS cnt4#39, cd_dep_employed_count#29, count(1)#35 AS cnt5#40, cd_dep_college_count#30, count(1)#35 AS cnt6#41] +Aggregate Attributes [1]: [count(1)#30] +Results [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, count(1)#30 AS cnt1#31, cd_purchase_estimate#23, count(1)#30 AS cnt2#32, cd_credit_rating#24, count(1)#30 AS cnt3#33, cd_dep_count#25, count(1)#30 AS cnt4#34, cd_dep_employed_count#26, count(1)#30 AS cnt5#35, cd_dep_college_count#27, count(1)#30 AS cnt6#36] (44) TakeOrderedAndProject -Input [14]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cnt1#36, cd_purchase_estimate#26, cnt2#37, cd_credit_rating#27, cnt3#38, cd_dep_count#28, cnt4#39, cd_dep_employed_count#29, cnt5#40, cd_dep_college_count#30, cnt6#41] -Arguments: 100, [cd_gender#23 ASC NULLS FIRST, cd_marital_status#24 ASC NULLS FIRST, cd_education_status#25 ASC NULLS FIRST, cd_purchase_estimate#26 ASC NULLS FIRST, cd_credit_rating#27 ASC NULLS FIRST, cd_dep_count#28 ASC NULLS FIRST, cd_dep_employed_count#29 ASC NULLS FIRST, cd_dep_college_count#30 ASC NULLS FIRST], [cd_gender#23, cd_marital_status#24, cd_education_status#25, cnt1#36, cd_purchase_estimate#26, cnt2#37, cd_credit_rating#27, cnt3#38, cd_dep_count#28, cnt4#39, cd_dep_employed_count#29, cnt5#40, cd_dep_college_count#30, cnt6#41] +Input [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#31, cd_purchase_estimate#23, cnt2#32, cd_credit_rating#24, cnt3#33, cd_dep_count#25, cnt4#34, cd_dep_employed_count#26, cnt5#35, cd_dep_college_count#27, cnt6#36] +Arguments: 100, [cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_education_status#22 ASC NULLS FIRST, cd_purchase_estimate#23 ASC NULLS FIRST, cd_credit_rating#24 ASC NULLS FIRST, cd_dep_count#25 ASC NULLS FIRST, cd_dep_employed_count#26 ASC NULLS FIRST, cd_dep_college_count#27 ASC NULLS FIRST], [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#31, cd_purchase_estimate#23, cnt2#32, cd_credit_rating#24, cnt3#33, cd_dep_count#25, cnt4#34, cd_dep_employed_count#26, cnt5#35, cd_dep_college_count#27, cnt6#36] ===== Subqueries ===== @@ -255,29 +255,29 @@ BroadcastExchange (49) (45) Scan parquet default.date_dim -Output [3]: [d_date_sk#7, d_year#42, d_moy#43] +Output [3]: [d_date_sk#7, d_year#37, d_moy#38] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,7), IsNotNull(d_date_sk)] ReadSchema: struct (46) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#42, d_moy#43] +Input [3]: [d_date_sk#7, d_year#37, d_moy#38] (47) Filter [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#42, d_moy#43] -Condition : (((((isnotnull(d_year#42) AND isnotnull(d_moy#43)) AND (d_year#42 = 2002)) AND (d_moy#43 >= 4)) AND (d_moy#43 <= 7)) AND isnotnull(d_date_sk#7)) +Input [3]: [d_date_sk#7, d_year#37, d_moy#38] +Condition : (((((isnotnull(d_year#37) AND isnotnull(d_moy#38)) AND (d_year#37 = 2002)) AND (d_moy#38 >= 4)) AND (d_moy#38 <= 7)) AND isnotnull(d_date_sk#7)) (48) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [3]: [d_date_sk#7, d_year#42, d_moy#43] +Input [3]: [d_date_sk#7, d_year#37, d_moy#38] (49) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#44] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] Subquery:2 Hosting operator id = 10 Hosting Expression = cs_sold_date_sk#10 IN dynamicpruning#6 -Subquery:3 Hosting operator id = 19 Hosting Expression = ss_sold_date_sk#15 IN dynamicpruning#6 +Subquery:3 Hosting operator id = 19 Hosting Expression = ss_sold_date_sk#14 IN dynamicpruning#6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/explain.txt index 6b4fce45b44f2..27ebf292c53d1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/explain.txt @@ -60,169 +60,169 @@ Input [3]: [d_date_sk#1, d_year#2, d_moy#3] (5) BroadcastExchange Input [1]: [d_date_sk#1] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (6) Scan parquet default.store_sales -Output [5]: [ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8, ss_sold_date_sk#9] +Output [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#9), (ss_sold_date_sk#9 >= 2451484), (ss_sold_date_sk#9 <= 2451513), dynamicpruningexpression(ss_sold_date_sk#9 IN dynamicpruning#10)] +PartitionFilters: [isnotnull(ss_sold_date_sk#8), (ss_sold_date_sk#8 >= 2451484), (ss_sold_date_sk#8 <= 2451513), dynamicpruningexpression(ss_sold_date_sk#8 IN dynamicpruning#9)] PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (7) ColumnarToRow -Input [5]: [ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8, ss_sold_date_sk#9] +Input [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] (8) Filter -Input [5]: [ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8, ss_sold_date_sk#9] -Condition : ((isnotnull(ss_item_sk#5) AND isnotnull(ss_customer_sk#6)) AND isnotnull(ss_store_sk#7)) +Input [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] +Condition : ((isnotnull(ss_item_sk#4) AND isnotnull(ss_customer_sk#5)) AND isnotnull(ss_store_sk#6)) (9) BroadcastHashJoin [codegen id : 2] Left keys [1]: [d_date_sk#1] -Right keys [1]: [ss_sold_date_sk#9] +Right keys [1]: [ss_sold_date_sk#8] Join condition: None (10) Project [codegen id : 2] -Output [4]: [ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] -Input [6]: [d_date_sk#1, ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8, ss_sold_date_sk#9] +Output [4]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7] +Input [6]: [d_date_sk#1, ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] (11) BroadcastExchange -Input [4]: [ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#11] +Input [4]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=2] (12) Scan parquet default.customer -Output [2]: [c_customer_sk#12, c_current_addr_sk#13] +Output [2]: [c_customer_sk#10, c_current_addr_sk#11] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (13) ColumnarToRow -Input [2]: [c_customer_sk#12, c_current_addr_sk#13] +Input [2]: [c_customer_sk#10, c_current_addr_sk#11] (14) Filter -Input [2]: [c_customer_sk#12, c_current_addr_sk#13] -Condition : (isnotnull(c_customer_sk#12) AND isnotnull(c_current_addr_sk#13)) +Input [2]: [c_customer_sk#10, c_current_addr_sk#11] +Condition : (isnotnull(c_customer_sk#10) AND isnotnull(c_current_addr_sk#11)) (15) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_customer_sk#6] -Right keys [1]: [c_customer_sk#12] +Left keys [1]: [ss_customer_sk#5] +Right keys [1]: [c_customer_sk#10] Join condition: None (16) Project [codegen id : 4] -Output [4]: [ss_item_sk#5, ss_store_sk#7, ss_ext_sales_price#8, c_current_addr_sk#13] -Input [6]: [ss_item_sk#5, ss_customer_sk#6, ss_store_sk#7, ss_ext_sales_price#8, c_customer_sk#12, c_current_addr_sk#13] +Output [4]: [ss_item_sk#4, ss_store_sk#6, ss_ext_sales_price#7, c_current_addr_sk#11] +Input [6]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, c_customer_sk#10, c_current_addr_sk#11] (17) Scan parquet default.store -Output [2]: [s_store_sk#14, s_zip#15] +Output [2]: [s_store_sk#12, s_zip#13] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)] ReadSchema: struct (18) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#14, s_zip#15] +Input [2]: [s_store_sk#12, s_zip#13] (19) Filter [codegen id : 3] -Input [2]: [s_store_sk#14, s_zip#15] -Condition : (isnotnull(s_zip#15) AND isnotnull(s_store_sk#14)) +Input [2]: [s_store_sk#12, s_zip#13] +Condition : (isnotnull(s_zip#13) AND isnotnull(s_store_sk#12)) (20) BroadcastExchange -Input [2]: [s_store_sk#14, s_zip#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] +Input [2]: [s_store_sk#12, s_zip#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (21) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#7] -Right keys [1]: [s_store_sk#14] +Left keys [1]: [ss_store_sk#6] +Right keys [1]: [s_store_sk#12] Join condition: None (22) Project [codegen id : 4] -Output [4]: [ss_item_sk#5, ss_ext_sales_price#8, c_current_addr_sk#13, s_zip#15] -Input [6]: [ss_item_sk#5, ss_store_sk#7, ss_ext_sales_price#8, c_current_addr_sk#13, s_store_sk#14, s_zip#15] +Output [4]: [ss_item_sk#4, ss_ext_sales_price#7, c_current_addr_sk#11, s_zip#13] +Input [6]: [ss_item_sk#4, ss_store_sk#6, ss_ext_sales_price#7, c_current_addr_sk#11, s_store_sk#12, s_zip#13] (23) BroadcastExchange -Input [4]: [ss_item_sk#5, ss_ext_sales_price#8, c_current_addr_sk#13, s_zip#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#17] +Input [4]: [ss_item_sk#4, ss_ext_sales_price#7, c_current_addr_sk#11, s_zip#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=4] (24) Scan parquet default.customer_address -Output [2]: [ca_address_sk#18, ca_zip#19] +Output [2]: [ca_address_sk#14, ca_zip#15] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)] ReadSchema: struct (25) ColumnarToRow -Input [2]: [ca_address_sk#18, ca_zip#19] +Input [2]: [ca_address_sk#14, ca_zip#15] (26) Filter -Input [2]: [ca_address_sk#18, ca_zip#19] -Condition : (isnotnull(ca_address_sk#18) AND isnotnull(ca_zip#19)) +Input [2]: [ca_address_sk#14, ca_zip#15] +Condition : (isnotnull(ca_address_sk#14) AND isnotnull(ca_zip#15)) (27) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [c_current_addr_sk#13] -Right keys [1]: [ca_address_sk#18] -Join condition: NOT (substr(ca_zip#19, 1, 5) = substr(s_zip#15, 1, 5)) +Left keys [1]: [c_current_addr_sk#11] +Right keys [1]: [ca_address_sk#14] +Join condition: NOT (substr(ca_zip#15, 1, 5) = substr(s_zip#13, 1, 5)) (28) Project [codegen id : 6] -Output [2]: [ss_item_sk#5, ss_ext_sales_price#8] -Input [6]: [ss_item_sk#5, ss_ext_sales_price#8, c_current_addr_sk#13, s_zip#15, ca_address_sk#18, ca_zip#19] +Output [2]: [ss_item_sk#4, ss_ext_sales_price#7] +Input [6]: [ss_item_sk#4, ss_ext_sales_price#7, c_current_addr_sk#11, s_zip#13, ca_address_sk#14, ca_zip#15] (29) Scan parquet default.item -Output [6]: [i_item_sk#20, i_brand_id#21, i_brand#22, i_manufact_id#23, i_manufact#24, i_manager_id#25] +Output [6]: [i_item_sk#16, i_brand_id#17, i_brand#18, i_manufact_id#19, i_manufact#20, i_manager_id#21] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,7), IsNotNull(i_item_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 5] -Input [6]: [i_item_sk#20, i_brand_id#21, i_brand#22, i_manufact_id#23, i_manufact#24, i_manager_id#25] +Input [6]: [i_item_sk#16, i_brand_id#17, i_brand#18, i_manufact_id#19, i_manufact#20, i_manager_id#21] (31) Filter [codegen id : 5] -Input [6]: [i_item_sk#20, i_brand_id#21, i_brand#22, i_manufact_id#23, i_manufact#24, i_manager_id#25] -Condition : ((isnotnull(i_manager_id#25) AND (i_manager_id#25 = 7)) AND isnotnull(i_item_sk#20)) +Input [6]: [i_item_sk#16, i_brand_id#17, i_brand#18, i_manufact_id#19, i_manufact#20, i_manager_id#21] +Condition : ((isnotnull(i_manager_id#21) AND (i_manager_id#21 = 7)) AND isnotnull(i_item_sk#16)) (32) Project [codegen id : 5] -Output [5]: [i_item_sk#20, i_brand_id#21, i_brand#22, i_manufact_id#23, i_manufact#24] -Input [6]: [i_item_sk#20, i_brand_id#21, i_brand#22, i_manufact_id#23, i_manufact#24, i_manager_id#25] +Output [5]: [i_item_sk#16, i_brand_id#17, i_brand#18, i_manufact_id#19, i_manufact#20] +Input [6]: [i_item_sk#16, i_brand_id#17, i_brand#18, i_manufact_id#19, i_manufact#20, i_manager_id#21] (33) BroadcastExchange -Input [5]: [i_item_sk#20, i_brand_id#21, i_brand#22, i_manufact_id#23, i_manufact#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Input [5]: [i_item_sk#16, i_brand_id#17, i_brand#18, i_manufact_id#19, i_manufact#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (34) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_item_sk#5] -Right keys [1]: [i_item_sk#20] +Left keys [1]: [ss_item_sk#4] +Right keys [1]: [i_item_sk#16] Join condition: None (35) Project [codegen id : 6] -Output [5]: [ss_ext_sales_price#8, i_brand_id#21, i_brand#22, i_manufact_id#23, i_manufact#24] -Input [7]: [ss_item_sk#5, ss_ext_sales_price#8, i_item_sk#20, i_brand_id#21, i_brand#22, i_manufact_id#23, i_manufact#24] +Output [5]: [ss_ext_sales_price#7, i_brand_id#17, i_brand#18, i_manufact_id#19, i_manufact#20] +Input [7]: [ss_item_sk#4, ss_ext_sales_price#7, i_item_sk#16, i_brand_id#17, i_brand#18, i_manufact_id#19, i_manufact#20] (36) HashAggregate [codegen id : 6] -Input [5]: [ss_ext_sales_price#8, i_brand_id#21, i_brand#22, i_manufact_id#23, i_manufact#24] -Keys [4]: [i_brand#22, i_brand_id#21, i_manufact_id#23, i_manufact#24] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#8))] -Aggregate Attributes [1]: [sum#27] -Results [5]: [i_brand#22, i_brand_id#21, i_manufact_id#23, i_manufact#24, sum#28] +Input [5]: [ss_ext_sales_price#7, i_brand_id#17, i_brand#18, i_manufact_id#19, i_manufact#20] +Keys [4]: [i_brand#18, i_brand_id#17, i_manufact_id#19, i_manufact#20] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#7))] +Aggregate Attributes [1]: [sum#22] +Results [5]: [i_brand#18, i_brand_id#17, i_manufact_id#19, i_manufact#20, sum#23] (37) Exchange -Input [5]: [i_brand#22, i_brand_id#21, i_manufact_id#23, i_manufact#24, sum#28] -Arguments: hashpartitioning(i_brand#22, i_brand_id#21, i_manufact_id#23, i_manufact#24, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [5]: [i_brand#18, i_brand_id#17, i_manufact_id#19, i_manufact#20, sum#23] +Arguments: hashpartitioning(i_brand#18, i_brand_id#17, i_manufact_id#19, i_manufact#20, 5), ENSURE_REQUIREMENTS, [plan_id=6] (38) HashAggregate [codegen id : 7] -Input [5]: [i_brand#22, i_brand_id#21, i_manufact_id#23, i_manufact#24, sum#28] -Keys [4]: [i_brand#22, i_brand_id#21, i_manufact_id#23, i_manufact#24] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#8))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#8))#30] -Results [5]: [i_brand_id#21 AS brand_id#31, i_brand#22 AS brand#32, i_manufact_id#23, i_manufact#24, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#8))#30,17,2) AS ext_price#33] +Input [5]: [i_brand#18, i_brand_id#17, i_manufact_id#19, i_manufact#20, sum#23] +Keys [4]: [i_brand#18, i_brand_id#17, i_manufact_id#19, i_manufact#20] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#7))#24] +Results [5]: [i_brand_id#17 AS brand_id#25, i_brand#18 AS brand#26, i_manufact_id#19, i_manufact#20, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#24,17,2) AS ext_price#27] (39) TakeOrderedAndProject -Input [5]: [brand_id#31, brand#32, i_manufact_id#23, i_manufact#24, ext_price#33] -Arguments: 100, [ext_price#33 DESC NULLS LAST, brand#32 ASC NULLS FIRST, brand_id#31 ASC NULLS FIRST, i_manufact_id#23 ASC NULLS FIRST, i_manufact#24 ASC NULLS FIRST], [brand_id#31, brand#32, i_manufact_id#23, i_manufact#24, ext_price#33] +Input [5]: [brand_id#25, brand#26, i_manufact_id#19, i_manufact#20, ext_price#27] +Arguments: 100, [ext_price#27 DESC NULLS LAST, brand#26 ASC NULLS FIRST, brand_id#25 ASC NULLS FIRST, i_manufact_id#19 ASC NULLS FIRST, i_manufact#20 ASC NULLS FIRST], [brand_id#25, brand#26, i_manufact_id#19, i_manufact#20, ext_price#27] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#9 IN dynamicpruning#10 +Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#8 IN dynamicpruning#9 ReusedExchange (40) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/explain.txt index e58fb8a1e1364..a13166ff0ceb3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/explain.txt @@ -75,7 +75,7 @@ Condition : ((isnotnull(ss_item_sk#4) AND isnotnull(ss_customer_sk#5)) AND isnot (8) BroadcastExchange Input [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[4, int, true] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[4, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 6] Left keys [1]: [d_date_sk#1] @@ -87,136 +87,136 @@ Output [4]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7 Input [6]: [d_date_sk#1, ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] (11) Scan parquet default.item -Output [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] +Output [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,7), IsNotNull(i_item_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] (13) Filter [codegen id : 2] -Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] -Condition : ((isnotnull(i_manager_id#15) AND (i_manager_id#15 = 7)) AND isnotnull(i_item_sk#10)) +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] +Condition : ((isnotnull(i_manager_id#14) AND (i_manager_id#14 = 7)) AND isnotnull(i_item_sk#9)) (14) Project [codegen id : 2] -Output [5]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] -Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] +Output [5]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] (15) BroadcastExchange -Input [5]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] +Input [5]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_item_sk#4] -Right keys [1]: [i_item_sk#10] +Right keys [1]: [i_item_sk#9] Join condition: None (17) Project [codegen id : 6] -Output [7]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] -Input [9]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] +Output [7]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Input [9]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] (18) Scan parquet default.customer -Output [2]: [c_customer_sk#17, c_current_addr_sk#18] +Output [2]: [c_customer_sk#15, c_current_addr_sk#16] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] -Input [2]: [c_customer_sk#17, c_current_addr_sk#18] +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] (20) Filter [codegen id : 3] -Input [2]: [c_customer_sk#17, c_current_addr_sk#18] -Condition : (isnotnull(c_customer_sk#17) AND isnotnull(c_current_addr_sk#18)) +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] +Condition : (isnotnull(c_customer_sk#15) AND isnotnull(c_current_addr_sk#16)) (21) BroadcastExchange -Input [2]: [c_customer_sk#17, c_current_addr_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (22) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#5] -Right keys [1]: [c_customer_sk#17] +Right keys [1]: [c_customer_sk#15] Join condition: None (23) Project [codegen id : 6] -Output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_current_addr_sk#18] -Input [9]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_customer_sk#17, c_current_addr_sk#18] +Output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16] +Input [9]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_customer_sk#15, c_current_addr_sk#16] (24) Scan parquet default.customer_address -Output [2]: [ca_address_sk#20, ca_zip#21] +Output [2]: [ca_address_sk#17, ca_zip#18] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)] ReadSchema: struct (25) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#20, ca_zip#21] +Input [2]: [ca_address_sk#17, ca_zip#18] (26) Filter [codegen id : 4] -Input [2]: [ca_address_sk#20, ca_zip#21] -Condition : (isnotnull(ca_address_sk#20) AND isnotnull(ca_zip#21)) +Input [2]: [ca_address_sk#17, ca_zip#18] +Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_zip#18)) (27) BroadcastExchange -Input [2]: [ca_address_sk#20, ca_zip#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] +Input [2]: [ca_address_sk#17, ca_zip#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (28) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [c_current_addr_sk#18] -Right keys [1]: [ca_address_sk#20] +Left keys [1]: [c_current_addr_sk#16] +Right keys [1]: [ca_address_sk#17] Join condition: None (29) Project [codegen id : 6] -Output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, ca_zip#21] -Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_current_addr_sk#18, ca_address_sk#20, ca_zip#21] +Output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18] +Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16, ca_address_sk#17, ca_zip#18] (30) Scan parquet default.store -Output [2]: [s_store_sk#23, s_zip#24] +Output [2]: [s_store_sk#19, s_zip#20] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 5] -Input [2]: [s_store_sk#23, s_zip#24] +Input [2]: [s_store_sk#19, s_zip#20] (32) Filter [codegen id : 5] -Input [2]: [s_store_sk#23, s_zip#24] -Condition : (isnotnull(s_zip#24) AND isnotnull(s_store_sk#23)) +Input [2]: [s_store_sk#19, s_zip#20] +Condition : (isnotnull(s_zip#20) AND isnotnull(s_store_sk#19)) (33) BroadcastExchange -Input [2]: [s_store_sk#23, s_zip#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] +Input [2]: [s_store_sk#19, s_zip#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (34) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_store_sk#6] -Right keys [1]: [s_store_sk#23] -Join condition: NOT (substr(ca_zip#21, 1, 5) = substr(s_zip#24, 1, 5)) +Right keys [1]: [s_store_sk#19] +Join condition: NOT (substr(ca_zip#18, 1, 5) = substr(s_zip#20, 1, 5)) (35) Project [codegen id : 6] -Output [5]: [ss_ext_sales_price#7, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] -Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, ca_zip#21, s_store_sk#23, s_zip#24] +Output [5]: [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18, s_store_sk#19, s_zip#20] (36) HashAggregate [codegen id : 6] -Input [5]: [ss_ext_sales_price#7, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] -Keys [4]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14] +Input [5]: [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Keys [4]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#7))] -Aggregate Attributes [1]: [sum#26] -Results [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] +Aggregate Attributes [1]: [sum#21] +Results [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#22] (37) Exchange -Input [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] -Arguments: hashpartitioning(i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#22] +Arguments: hashpartitioning(i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, 5), ENSURE_REQUIREMENTS, [plan_id=6] (38) HashAggregate [codegen id : 7] -Input [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] -Keys [4]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14] +Input [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#22] +Keys [4]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#7))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#7))#29] -Results [5]: [i_brand_id#11 AS brand_id#30, i_brand#12 AS brand#31, i_manufact_id#13, i_manufact#14, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#29,17,2) AS ext_price#32] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#7))#23] +Results [5]: [i_brand_id#10 AS brand_id#24, i_brand#11 AS brand#25, i_manufact_id#12, i_manufact#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#23,17,2) AS ext_price#26] (39) TakeOrderedAndProject -Input [5]: [brand_id#30, brand#31, i_manufact_id#13, i_manufact#14, ext_price#32] -Arguments: 100, [ext_price#32 DESC NULLS LAST, brand#31 ASC NULLS FIRST, brand_id#30 ASC NULLS FIRST, i_manufact_id#13 ASC NULLS FIRST, i_manufact#14 ASC NULLS FIRST], [brand_id#30, brand#31, i_manufact_id#13, i_manufact#14, ext_price#32] +Input [5]: [brand_id#24, brand#25, i_manufact_id#12, i_manufact#13, ext_price#26] +Arguments: 100, [ext_price#26 DESC NULLS LAST, brand#25 ASC NULLS FIRST, brand_id#24 ASC NULLS FIRST, i_manufact_id#12 ASC NULLS FIRST, i_manufact#13 ASC NULLS FIRST], [brand_id#24, brand#25, i_manufact_id#12, i_manufact#13, ext_price#26] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt index 65572cfd770d0..ae00dddad017c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt @@ -121,7 +121,7 @@ Input [4]: [cd_demo_sk#11, cd_gender#12, cd_marital_status#13, cd_education_stat (11) BroadcastExchange Input [1]: [cd_demo_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_cdemo_sk#2] @@ -133,76 +133,76 @@ Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sal Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, cd_demo_sk#11] (14) Scan parquet default.store -Output [2]: [s_store_sk#16, s_state#17] +Output [2]: [s_store_sk#15, s_state#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [In(s_state, [AL,SD,TN]), IsNotNull(s_store_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#16, s_state#17] +Input [2]: [s_store_sk#15, s_state#16] (16) Filter [codegen id : 3] -Input [2]: [s_store_sk#16, s_state#17] -Condition : (s_state#17 IN (TN,AL,SD) AND isnotnull(s_store_sk#16)) +Input [2]: [s_store_sk#15, s_state#16] +Condition : (s_state#16 IN (TN,AL,SD) AND isnotnull(s_store_sk#15)) (17) BroadcastExchange -Input [2]: [s_store_sk#16, s_state#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] +Input [2]: [s_store_sk#15, s_state#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#16] +Right keys [1]: [s_store_sk#15] Join condition: None (19) Project [codegen id : 5] -Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#17] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#16, s_state#17] +Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16] (20) Scan parquet default.item -Output [2]: [i_item_sk#19, i_item_id#20] +Output [2]: [i_item_sk#17, i_item_id#18] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#19, i_item_id#20] +Input [2]: [i_item_sk#17, i_item_id#18] (22) Filter [codegen id : 4] -Input [2]: [i_item_sk#19, i_item_id#20] -Condition : isnotnull(i_item_sk#19) +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) (23) BroadcastExchange -Input [2]: [i_item_sk#19, i_item_id#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#19] +Right keys [1]: [i_item_sk#17] Join condition: None (25) Project [codegen id : 5] -Output [6]: [i_item_id#20, s_state#17, ss_quantity#4 AS agg1#22, ss_list_price#5 AS agg2#23, ss_coupon_amt#7 AS agg3#24, ss_sales_price#6 AS agg4#25] -Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#17, i_item_sk#19, i_item_id#20] +Output [6]: [i_item_id#18, s_state#16, ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16, i_item_sk#17, i_item_id#18] (26) HashAggregate [codegen id : 5] -Input [6]: [i_item_id#20, s_state#17, agg1#22, agg2#23, agg3#24, agg4#25] -Keys [2]: [i_item_id#20, s_state#17] -Functions [4]: [partial_avg(agg1#22), partial_avg(UnscaledValue(agg2#23)), partial_avg(UnscaledValue(agg3#24)), partial_avg(UnscaledValue(agg4#25))] -Aggregate Attributes [8]: [sum#26, count#27, sum#28, count#29, sum#30, count#31, sum#32, count#33] -Results [10]: [i_item_id#20, s_state#17, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] +Input [6]: [i_item_id#18, s_state#16, agg1#19, agg2#20, agg3#21, agg4#22] +Keys [2]: [i_item_id#18, s_state#16] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#23, count#24, sum#25, count#26, sum#27, count#28, sum#29, count#30] +Results [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] (27) Exchange -Input [10]: [i_item_id#20, s_state#17, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] -Arguments: hashpartitioning(i_item_id#20, s_state#17, 5), ENSURE_REQUIREMENTS, [id=#42] +Input [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Arguments: hashpartitioning(i_item_id#18, s_state#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 6] -Input [10]: [i_item_id#20, s_state#17, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] -Keys [2]: [i_item_id#20, s_state#17] -Functions [4]: [avg(agg1#22), avg(UnscaledValue(agg2#23)), avg(UnscaledValue(agg3#24)), avg(UnscaledValue(agg4#25))] -Aggregate Attributes [4]: [avg(agg1#22)#43, avg(UnscaledValue(agg2#23))#44, avg(UnscaledValue(agg3#24))#45, avg(UnscaledValue(agg4#25))#46] -Results [7]: [i_item_id#20, s_state#17, 0 AS g_state#47, avg(agg1#22)#43 AS agg1#48, cast((avg(UnscaledValue(agg2#23))#44 / 100.0) as decimal(11,6)) AS agg2#49, cast((avg(UnscaledValue(agg3#24))#45 / 100.0) as decimal(11,6)) AS agg3#50, cast((avg(UnscaledValue(agg4#25))#46 / 100.0) as decimal(11,6)) AS agg4#51] +Input [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Keys [2]: [i_item_id#18, s_state#16] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#39, avg(UnscaledValue(agg2#20))#40, avg(UnscaledValue(agg3#21))#41, avg(UnscaledValue(agg4#22))#42] +Results [7]: [i_item_id#18, s_state#16, 0 AS g_state#43, avg(agg1#19)#39 AS agg1#44, cast((avg(UnscaledValue(agg2#20))#40 / 100.0) as decimal(11,6)) AS agg2#45, cast((avg(UnscaledValue(agg3#21))#41 / 100.0) as decimal(11,6)) AS agg3#46, cast((avg(UnscaledValue(agg4#22))#42 / 100.0) as decimal(11,6)) AS agg4#47] (29) Scan parquet default.store_sales Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] @@ -232,35 +232,35 @@ Output [7]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_ Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#10] (35) Scan parquet default.store -Output [2]: [s_store_sk#16, s_state#17] +Output [2]: [s_store_sk#15, s_state#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [In(s_state, [AL,SD,TN]), IsNotNull(s_store_sk)] ReadSchema: struct (36) ColumnarToRow [codegen id : 8] -Input [2]: [s_store_sk#16, s_state#17] +Input [2]: [s_store_sk#15, s_state#16] (37) Filter [codegen id : 8] -Input [2]: [s_store_sk#16, s_state#17] -Condition : (s_state#17 IN (TN,AL,SD) AND isnotnull(s_store_sk#16)) +Input [2]: [s_store_sk#15, s_state#16] +Condition : (s_state#16 IN (TN,AL,SD) AND isnotnull(s_store_sk#15)) (38) Project [codegen id : 8] -Output [1]: [s_store_sk#16] -Input [2]: [s_store_sk#16, s_state#17] +Output [1]: [s_store_sk#15] +Input [2]: [s_store_sk#15, s_state#16] (39) BroadcastExchange -Input [1]: [s_store_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52] +Input [1]: [s_store_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (40) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#16] +Right keys [1]: [s_store_sk#15] Join condition: None (41) Project [codegen id : 11] Output [6]: [ss_item_sk#1, ss_cdemo_sk#2, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#16] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15] (42) ReusedExchange [Reuses operator id: 11] Output [1]: [cd_demo_sk#11] @@ -275,34 +275,34 @@ Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_ Input [7]: [ss_item_sk#1, ss_cdemo_sk#2, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, cd_demo_sk#11] (45) ReusedExchange [Reuses operator id: 23] -Output [2]: [i_item_sk#19, i_item_id#20] +Output [2]: [i_item_sk#17, i_item_id#18] (46) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#19] +Right keys [1]: [i_item_sk#17] Join condition: None (47) Project [codegen id : 11] -Output [5]: [i_item_id#20, ss_quantity#4 AS agg1#22, ss_list_price#5 AS agg2#23, ss_coupon_amt#7 AS agg3#24, ss_sales_price#6 AS agg4#25] -Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#19, i_item_id#20] +Output [5]: [i_item_id#18, ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#17, i_item_id#18] (48) HashAggregate [codegen id : 11] -Input [5]: [i_item_id#20, agg1#22, agg2#23, agg3#24, agg4#25] -Keys [1]: [i_item_id#20] -Functions [4]: [partial_avg(agg1#22), partial_avg(UnscaledValue(agg2#23)), partial_avg(UnscaledValue(agg3#24)), partial_avg(UnscaledValue(agg4#25))] -Aggregate Attributes [8]: [sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] -Results [9]: [i_item_id#20, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] +Input [5]: [i_item_id#18, agg1#19, agg2#20, agg3#21, agg4#22] +Keys [1]: [i_item_id#18] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#48, count#49, sum#50, count#51, sum#52, count#53, sum#54, count#55] +Results [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] (49) Exchange -Input [9]: [i_item_id#20, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] -Arguments: hashpartitioning(i_item_id#20, 5), ENSURE_REQUIREMENTS, [id=#69] +Input [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] +Arguments: hashpartitioning(i_item_id#18, 5), ENSURE_REQUIREMENTS, [plan_id=6] (50) HashAggregate [codegen id : 12] -Input [9]: [i_item_id#20, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] -Keys [1]: [i_item_id#20] -Functions [4]: [avg(agg1#22), avg(UnscaledValue(agg2#23)), avg(UnscaledValue(agg3#24)), avg(UnscaledValue(agg4#25))] -Aggregate Attributes [4]: [avg(agg1#22)#70, avg(UnscaledValue(agg2#23))#71, avg(UnscaledValue(agg3#24))#72, avg(UnscaledValue(agg4#25))#73] -Results [7]: [i_item_id#20, null AS s_state#74, 1 AS g_state#75, avg(agg1#22)#70 AS agg1#76, cast((avg(UnscaledValue(agg2#23))#71 / 100.0) as decimal(11,6)) AS agg2#77, cast((avg(UnscaledValue(agg3#24))#72 / 100.0) as decimal(11,6)) AS agg3#78, cast((avg(UnscaledValue(agg4#25))#73 / 100.0) as decimal(11,6)) AS agg4#79] +Input [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] +Keys [1]: [i_item_id#18] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#64, avg(UnscaledValue(agg2#20))#65, avg(UnscaledValue(agg3#21))#66, avg(UnscaledValue(agg4#22))#67] +Results [7]: [i_item_id#18, null AS s_state#68, 1 AS g_state#69, avg(agg1#19)#64 AS agg1#70, cast((avg(UnscaledValue(agg2#20))#65 / 100.0) as decimal(11,6)) AS agg2#71, cast((avg(UnscaledValue(agg3#21))#66 / 100.0) as decimal(11,6)) AS agg3#72, cast((avg(UnscaledValue(agg4#22))#67 / 100.0) as decimal(11,6)) AS agg4#73] (51) Scan parquet default.store_sales Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] @@ -332,16 +332,16 @@ Output [7]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_ Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#10] (57) ReusedExchange [Reuses operator id: 39] -Output [1]: [s_store_sk#16] +Output [1]: [s_store_sk#15] (58) BroadcastHashJoin [codegen id : 17] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#16] +Right keys [1]: [s_store_sk#15] Join condition: None (59) Project [codegen id : 17] Output [6]: [ss_item_sk#1, ss_cdemo_sk#2, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#16] +Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15] (60) ReusedExchange [Reuses operator id: 11] Output [1]: [cd_demo_sk#11] @@ -356,55 +356,55 @@ Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_ Input [7]: [ss_item_sk#1, ss_cdemo_sk#2, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, cd_demo_sk#11] (63) Scan parquet default.item -Output [1]: [i_item_sk#19] +Output [1]: [i_item_sk#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (64) ColumnarToRow [codegen id : 16] -Input [1]: [i_item_sk#19] +Input [1]: [i_item_sk#17] (65) Filter [codegen id : 16] -Input [1]: [i_item_sk#19] -Condition : isnotnull(i_item_sk#19) +Input [1]: [i_item_sk#17] +Condition : isnotnull(i_item_sk#17) (66) BroadcastExchange -Input [1]: [i_item_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#80] +Input [1]: [i_item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] (67) BroadcastHashJoin [codegen id : 17] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#19] +Right keys [1]: [i_item_sk#17] Join condition: None (68) Project [codegen id : 17] -Output [4]: [ss_quantity#4 AS agg1#22, ss_list_price#5 AS agg2#23, ss_coupon_amt#7 AS agg3#24, ss_sales_price#6 AS agg4#25] -Input [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#19] +Output [4]: [ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#17] (69) HashAggregate [codegen id : 17] -Input [4]: [agg1#22, agg2#23, agg3#24, agg4#25] +Input [4]: [agg1#19, agg2#20, agg3#21, agg4#22] Keys: [] -Functions [4]: [partial_avg(agg1#22), partial_avg(UnscaledValue(agg2#23)), partial_avg(UnscaledValue(agg3#24)), partial_avg(UnscaledValue(agg4#25))] -Aggregate Attributes [8]: [sum#81, count#82, sum#83, count#84, sum#85, count#86, sum#87, count#88] -Results [8]: [sum#89, count#90, sum#91, count#92, sum#93, count#94, sum#95, count#96] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#74, count#75, sum#76, count#77, sum#78, count#79, sum#80, count#81] +Results [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] (70) Exchange -Input [8]: [sum#89, count#90, sum#91, count#92, sum#93, count#94, sum#95, count#96] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#97] +Input [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] (71) HashAggregate [codegen id : 18] -Input [8]: [sum#89, count#90, sum#91, count#92, sum#93, count#94, sum#95, count#96] +Input [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] Keys: [] -Functions [4]: [avg(agg1#22), avg(UnscaledValue(agg2#23)), avg(UnscaledValue(agg3#24)), avg(UnscaledValue(agg4#25))] -Aggregate Attributes [4]: [avg(agg1#22)#98, avg(UnscaledValue(agg2#23))#99, avg(UnscaledValue(agg3#24))#100, avg(UnscaledValue(agg4#25))#101] -Results [7]: [null AS i_item_id#102, null AS s_state#103, 1 AS g_state#104, avg(agg1#22)#98 AS agg1#105, cast((avg(UnscaledValue(agg2#23))#99 / 100.0) as decimal(11,6)) AS agg2#106, cast((avg(UnscaledValue(agg3#24))#100 / 100.0) as decimal(11,6)) AS agg3#107, cast((avg(UnscaledValue(agg4#25))#101 / 100.0) as decimal(11,6)) AS agg4#108] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#90, avg(UnscaledValue(agg2#20))#91, avg(UnscaledValue(agg3#21))#92, avg(UnscaledValue(agg4#22))#93] +Results [7]: [null AS i_item_id#94, null AS s_state#95, 1 AS g_state#96, avg(agg1#19)#90 AS agg1#97, cast((avg(UnscaledValue(agg2#20))#91 / 100.0) as decimal(11,6)) AS agg2#98, cast((avg(UnscaledValue(agg3#21))#92 / 100.0) as decimal(11,6)) AS agg3#99, cast((avg(UnscaledValue(agg4#22))#93 / 100.0) as decimal(11,6)) AS agg4#100] (72) Union (73) TakeOrderedAndProject -Input [7]: [i_item_id#20, s_state#17, g_state#47, agg1#48, agg2#49, agg3#50, agg4#51] -Arguments: 100, [i_item_id#20 ASC NULLS FIRST, s_state#17 ASC NULLS FIRST], [i_item_id#20, s_state#17, g_state#47, agg1#48, agg2#49, agg3#50, agg4#51] +Input [7]: [i_item_id#18, s_state#16, g_state#43, agg1#44, agg2#45, agg3#46, agg4#47] +Arguments: 100, [i_item_id#18 ASC NULLS FIRST, s_state#16 ASC NULLS FIRST], [i_item_id#18, s_state#16, g_state#43, agg1#44, agg2#45, agg3#46, agg4#47] ===== Subqueries ===== @@ -417,26 +417,26 @@ BroadcastExchange (78) (74) Scan parquet default.date_dim -Output [2]: [d_date_sk#10, d_year#109] +Output [2]: [d_date_sk#10, d_year#101] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), GreaterThanOrEqual(d_date_sk,2451545), LessThanOrEqual(d_date_sk,2451910), IsNotNull(d_date_sk)] ReadSchema: struct (75) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#10, d_year#109] +Input [2]: [d_date_sk#10, d_year#101] (76) Filter [codegen id : 1] -Input [2]: [d_date_sk#10, d_year#109] -Condition : ((((isnotnull(d_year#109) AND (d_year#109 = 2000)) AND (d_date_sk#10 >= 2451545)) AND (d_date_sk#10 <= 2451910)) AND isnotnull(d_date_sk#10)) +Input [2]: [d_date_sk#10, d_year#101] +Condition : ((((isnotnull(d_year#101) AND (d_year#101 = 2000)) AND (d_date_sk#10 >= 2451545)) AND (d_date_sk#10 <= 2451910)) AND isnotnull(d_date_sk#10)) (77) Project [codegen id : 1] Output [1]: [d_date_sk#10] -Input [2]: [d_date_sk#10, d_year#109] +Input [2]: [d_date_sk#10, d_year#101] (78) BroadcastExchange Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#110] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] Subquery:2 Hosting operator id = 29 Hosting Expression = ss_sold_date_sk#8 IN dynamicpruning#9 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/explain.txt index 0fe719015aaa3..c181d2097b5f7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/explain.txt @@ -109,7 +109,7 @@ Input [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_stat (8) BroadcastExchange Input [1]: [cd_demo_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_cdemo_sk#2] @@ -121,88 +121,88 @@ Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sal Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#10] (11) ReusedExchange [Reuses operator id: 78] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (13) Project [codegen id : 5] Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#15] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14] (14) Scan parquet default.store -Output [2]: [s_store_sk#16, s_state#17] +Output [2]: [s_store_sk#15, s_state#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [In(s_state, [AL,SD,TN]), IsNotNull(s_store_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#16, s_state#17] +Input [2]: [s_store_sk#15, s_state#16] (16) Filter [codegen id : 3] -Input [2]: [s_store_sk#16, s_state#17] -Condition : (s_state#17 IN (TN,AL,SD) AND isnotnull(s_store_sk#16)) +Input [2]: [s_store_sk#15, s_state#16] +Condition : (s_state#16 IN (TN,AL,SD) AND isnotnull(s_store_sk#15)) (17) BroadcastExchange -Input [2]: [s_store_sk#16, s_state#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] +Input [2]: [s_store_sk#15, s_state#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#16] +Right keys [1]: [s_store_sk#15] Join condition: None (19) Project [codegen id : 5] -Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#17] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#16, s_state#17] +Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16] (20) Scan parquet default.item -Output [2]: [i_item_sk#19, i_item_id#20] +Output [2]: [i_item_sk#17, i_item_id#18] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#19, i_item_id#20] +Input [2]: [i_item_sk#17, i_item_id#18] (22) Filter [codegen id : 4] -Input [2]: [i_item_sk#19, i_item_id#20] -Condition : isnotnull(i_item_sk#19) +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) (23) BroadcastExchange -Input [2]: [i_item_sk#19, i_item_id#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#19] +Right keys [1]: [i_item_sk#17] Join condition: None (25) Project [codegen id : 5] -Output [6]: [i_item_id#20, s_state#17, ss_quantity#4 AS agg1#22, ss_list_price#5 AS agg2#23, ss_coupon_amt#7 AS agg3#24, ss_sales_price#6 AS agg4#25] -Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#17, i_item_sk#19, i_item_id#20] +Output [6]: [i_item_id#18, s_state#16, ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16, i_item_sk#17, i_item_id#18] (26) HashAggregate [codegen id : 5] -Input [6]: [i_item_id#20, s_state#17, agg1#22, agg2#23, agg3#24, agg4#25] -Keys [2]: [i_item_id#20, s_state#17] -Functions [4]: [partial_avg(agg1#22), partial_avg(UnscaledValue(agg2#23)), partial_avg(UnscaledValue(agg3#24)), partial_avg(UnscaledValue(agg4#25))] -Aggregate Attributes [8]: [sum#26, count#27, sum#28, count#29, sum#30, count#31, sum#32, count#33] -Results [10]: [i_item_id#20, s_state#17, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] +Input [6]: [i_item_id#18, s_state#16, agg1#19, agg2#20, agg3#21, agg4#22] +Keys [2]: [i_item_id#18, s_state#16] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#23, count#24, sum#25, count#26, sum#27, count#28, sum#29, count#30] +Results [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] (27) Exchange -Input [10]: [i_item_id#20, s_state#17, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] -Arguments: hashpartitioning(i_item_id#20, s_state#17, 5), ENSURE_REQUIREMENTS, [id=#42] +Input [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Arguments: hashpartitioning(i_item_id#18, s_state#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 6] -Input [10]: [i_item_id#20, s_state#17, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] -Keys [2]: [i_item_id#20, s_state#17] -Functions [4]: [avg(agg1#22), avg(UnscaledValue(agg2#23)), avg(UnscaledValue(agg3#24)), avg(UnscaledValue(agg4#25))] -Aggregate Attributes [4]: [avg(agg1#22)#43, avg(UnscaledValue(agg2#23))#44, avg(UnscaledValue(agg3#24))#45, avg(UnscaledValue(agg4#25))#46] -Results [7]: [i_item_id#20, s_state#17, 0 AS g_state#47, avg(agg1#22)#43 AS agg1#48, cast((avg(UnscaledValue(agg2#23))#44 / 100.0) as decimal(11,6)) AS agg2#49, cast((avg(UnscaledValue(agg3#24))#45 / 100.0) as decimal(11,6)) AS agg3#50, cast((avg(UnscaledValue(agg4#25))#46 / 100.0) as decimal(11,6)) AS agg4#51] +Input [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Keys [2]: [i_item_id#18, s_state#16] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#39, avg(UnscaledValue(agg2#20))#40, avg(UnscaledValue(agg3#21))#41, avg(UnscaledValue(agg4#22))#42] +Results [7]: [i_item_id#18, s_state#16, 0 AS g_state#43, avg(agg1#19)#39 AS agg1#44, cast((avg(UnscaledValue(agg2#20))#40 / 100.0) as decimal(11,6)) AS agg2#45, cast((avg(UnscaledValue(agg3#21))#41 / 100.0) as decimal(11,6)) AS agg3#46, cast((avg(UnscaledValue(agg4#22))#42 / 100.0) as decimal(11,6)) AS agg4#47] (29) Scan parquet default.store_sales Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] @@ -232,77 +232,77 @@ Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sal Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#10] (35) ReusedExchange [Reuses operator id: 78] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (36) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (37) Project [codegen id : 11] Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#15] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14] (38) Scan parquet default.store -Output [2]: [s_store_sk#16, s_state#17] +Output [2]: [s_store_sk#15, s_state#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [In(s_state, [AL,SD,TN]), IsNotNull(s_store_sk)] ReadSchema: struct (39) ColumnarToRow [codegen id : 9] -Input [2]: [s_store_sk#16, s_state#17] +Input [2]: [s_store_sk#15, s_state#16] (40) Filter [codegen id : 9] -Input [2]: [s_store_sk#16, s_state#17] -Condition : (s_state#17 IN (TN,AL,SD) AND isnotnull(s_store_sk#16)) +Input [2]: [s_store_sk#15, s_state#16] +Condition : (s_state#16 IN (TN,AL,SD) AND isnotnull(s_store_sk#15)) (41) Project [codegen id : 9] -Output [1]: [s_store_sk#16] -Input [2]: [s_store_sk#16, s_state#17] +Output [1]: [s_store_sk#15] +Input [2]: [s_store_sk#15, s_state#16] (42) BroadcastExchange -Input [1]: [s_store_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52] +Input [1]: [s_store_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (43) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#16] +Right keys [1]: [s_store_sk#15] Join condition: None (44) Project [codegen id : 11] Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#16] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15] (45) ReusedExchange [Reuses operator id: 23] -Output [2]: [i_item_sk#19, i_item_id#20] +Output [2]: [i_item_sk#17, i_item_id#18] (46) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#19] +Right keys [1]: [i_item_sk#17] Join condition: None (47) Project [codegen id : 11] -Output [5]: [i_item_id#20, ss_quantity#4 AS agg1#22, ss_list_price#5 AS agg2#23, ss_coupon_amt#7 AS agg3#24, ss_sales_price#6 AS agg4#25] -Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#19, i_item_id#20] +Output [5]: [i_item_id#18, ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#17, i_item_id#18] (48) HashAggregate [codegen id : 11] -Input [5]: [i_item_id#20, agg1#22, agg2#23, agg3#24, agg4#25] -Keys [1]: [i_item_id#20] -Functions [4]: [partial_avg(agg1#22), partial_avg(UnscaledValue(agg2#23)), partial_avg(UnscaledValue(agg3#24)), partial_avg(UnscaledValue(agg4#25))] -Aggregate Attributes [8]: [sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] -Results [9]: [i_item_id#20, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] +Input [5]: [i_item_id#18, agg1#19, agg2#20, agg3#21, agg4#22] +Keys [1]: [i_item_id#18] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#48, count#49, sum#50, count#51, sum#52, count#53, sum#54, count#55] +Results [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] (49) Exchange -Input [9]: [i_item_id#20, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] -Arguments: hashpartitioning(i_item_id#20, 5), ENSURE_REQUIREMENTS, [id=#69] +Input [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] +Arguments: hashpartitioning(i_item_id#18, 5), ENSURE_REQUIREMENTS, [plan_id=6] (50) HashAggregate [codegen id : 12] -Input [9]: [i_item_id#20, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] -Keys [1]: [i_item_id#20] -Functions [4]: [avg(agg1#22), avg(UnscaledValue(agg2#23)), avg(UnscaledValue(agg3#24)), avg(UnscaledValue(agg4#25))] -Aggregate Attributes [4]: [avg(agg1#22)#70, avg(UnscaledValue(agg2#23))#71, avg(UnscaledValue(agg3#24))#72, avg(UnscaledValue(agg4#25))#73] -Results [7]: [i_item_id#20, null AS s_state#74, 1 AS g_state#75, avg(agg1#22)#70 AS agg1#76, cast((avg(UnscaledValue(agg2#23))#71 / 100.0) as decimal(11,6)) AS agg2#77, cast((avg(UnscaledValue(agg3#24))#72 / 100.0) as decimal(11,6)) AS agg3#78, cast((avg(UnscaledValue(agg4#25))#73 / 100.0) as decimal(11,6)) AS agg4#79] +Input [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] +Keys [1]: [i_item_id#18] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#64, avg(UnscaledValue(agg2#20))#65, avg(UnscaledValue(agg3#21))#66, avg(UnscaledValue(agg4#22))#67] +Results [7]: [i_item_id#18, null AS s_state#68, 1 AS g_state#69, avg(agg1#19)#64 AS agg1#70, cast((avg(UnscaledValue(agg2#20))#65 / 100.0) as decimal(11,6)) AS agg2#71, cast((avg(UnscaledValue(agg3#21))#66 / 100.0) as decimal(11,6)) AS agg3#72, cast((avg(UnscaledValue(agg4#22))#67 / 100.0) as decimal(11,6)) AS agg4#73] (51) Scan parquet default.store_sales Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] @@ -332,79 +332,79 @@ Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sal Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#10] (57) ReusedExchange [Reuses operator id: 78] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (58) BroadcastHashJoin [codegen id : 17] Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (59) Project [codegen id : 17] Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#15] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14] (60) ReusedExchange [Reuses operator id: 42] -Output [1]: [s_store_sk#16] +Output [1]: [s_store_sk#15] (61) BroadcastHashJoin [codegen id : 17] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#16] +Right keys [1]: [s_store_sk#15] Join condition: None (62) Project [codegen id : 17] Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#16] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15] (63) Scan parquet default.item -Output [1]: [i_item_sk#19] +Output [1]: [i_item_sk#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (64) ColumnarToRow [codegen id : 16] -Input [1]: [i_item_sk#19] +Input [1]: [i_item_sk#17] (65) Filter [codegen id : 16] -Input [1]: [i_item_sk#19] -Condition : isnotnull(i_item_sk#19) +Input [1]: [i_item_sk#17] +Condition : isnotnull(i_item_sk#17) (66) BroadcastExchange -Input [1]: [i_item_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#80] +Input [1]: [i_item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] (67) BroadcastHashJoin [codegen id : 17] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#19] +Right keys [1]: [i_item_sk#17] Join condition: None (68) Project [codegen id : 17] -Output [4]: [ss_quantity#4 AS agg1#22, ss_list_price#5 AS agg2#23, ss_coupon_amt#7 AS agg3#24, ss_sales_price#6 AS agg4#25] -Input [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#19] +Output [4]: [ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#17] (69) HashAggregate [codegen id : 17] -Input [4]: [agg1#22, agg2#23, agg3#24, agg4#25] +Input [4]: [agg1#19, agg2#20, agg3#21, agg4#22] Keys: [] -Functions [4]: [partial_avg(agg1#22), partial_avg(UnscaledValue(agg2#23)), partial_avg(UnscaledValue(agg3#24)), partial_avg(UnscaledValue(agg4#25))] -Aggregate Attributes [8]: [sum#81, count#82, sum#83, count#84, sum#85, count#86, sum#87, count#88] -Results [8]: [sum#89, count#90, sum#91, count#92, sum#93, count#94, sum#95, count#96] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#74, count#75, sum#76, count#77, sum#78, count#79, sum#80, count#81] +Results [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] (70) Exchange -Input [8]: [sum#89, count#90, sum#91, count#92, sum#93, count#94, sum#95, count#96] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#97] +Input [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] (71) HashAggregate [codegen id : 18] -Input [8]: [sum#89, count#90, sum#91, count#92, sum#93, count#94, sum#95, count#96] +Input [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] Keys: [] -Functions [4]: [avg(agg1#22), avg(UnscaledValue(agg2#23)), avg(UnscaledValue(agg3#24)), avg(UnscaledValue(agg4#25))] -Aggregate Attributes [4]: [avg(agg1#22)#98, avg(UnscaledValue(agg2#23))#99, avg(UnscaledValue(agg3#24))#100, avg(UnscaledValue(agg4#25))#101] -Results [7]: [null AS i_item_id#102, null AS s_state#103, 1 AS g_state#104, avg(agg1#22)#98 AS agg1#105, cast((avg(UnscaledValue(agg2#23))#99 / 100.0) as decimal(11,6)) AS agg2#106, cast((avg(UnscaledValue(agg3#24))#100 / 100.0) as decimal(11,6)) AS agg3#107, cast((avg(UnscaledValue(agg4#25))#101 / 100.0) as decimal(11,6)) AS agg4#108] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#90, avg(UnscaledValue(agg2#20))#91, avg(UnscaledValue(agg3#21))#92, avg(UnscaledValue(agg4#22))#93] +Results [7]: [null AS i_item_id#94, null AS s_state#95, 1 AS g_state#96, avg(agg1#19)#90 AS agg1#97, cast((avg(UnscaledValue(agg2#20))#91 / 100.0) as decimal(11,6)) AS agg2#98, cast((avg(UnscaledValue(agg3#21))#92 / 100.0) as decimal(11,6)) AS agg3#99, cast((avg(UnscaledValue(agg4#22))#93 / 100.0) as decimal(11,6)) AS agg4#100] (72) Union (73) TakeOrderedAndProject -Input [7]: [i_item_id#20, s_state#17, g_state#47, agg1#48, agg2#49, agg3#50, agg4#51] -Arguments: 100, [i_item_id#20 ASC NULLS FIRST, s_state#17 ASC NULLS FIRST], [i_item_id#20, s_state#17, g_state#47, agg1#48, agg2#49, agg3#50, agg4#51] +Input [7]: [i_item_id#18, s_state#16, g_state#43, agg1#44, agg2#45, agg3#46, agg4#47] +Arguments: 100, [i_item_id#18 ASC NULLS FIRST, s_state#16 ASC NULLS FIRST], [i_item_id#18, s_state#16, g_state#43, agg1#44, agg2#45, agg3#46, agg4#47] ===== Subqueries ===== @@ -417,26 +417,26 @@ BroadcastExchange (78) (74) Scan parquet default.date_dim -Output [2]: [d_date_sk#15, d_year#109] +Output [2]: [d_date_sk#14, d_year#101] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), GreaterThanOrEqual(d_date_sk,2451545), LessThanOrEqual(d_date_sk,2451910), IsNotNull(d_date_sk)] ReadSchema: struct (75) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#109] +Input [2]: [d_date_sk#14, d_year#101] (76) Filter [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#109] -Condition : ((((isnotnull(d_year#109) AND (d_year#109 = 2000)) AND (d_date_sk#15 >= 2451545)) AND (d_date_sk#15 <= 2451910)) AND isnotnull(d_date_sk#15)) +Input [2]: [d_date_sk#14, d_year#101] +Condition : ((((isnotnull(d_year#101) AND (d_year#101 = 2000)) AND (d_date_sk#14 >= 2451545)) AND (d_date_sk#14 <= 2451910)) AND isnotnull(d_date_sk#14)) (77) Project [codegen id : 1] -Output [1]: [d_date_sk#15] -Input [2]: [d_date_sk#15, d_year#109] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#101] (78) BroadcastExchange -Input [1]: [d_date_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#110] +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] Subquery:2 Hosting operator id = 29 Hosting Expression = ss_sold_date_sk#8 IN dynamicpruning#9 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/explain.txt index 36258b96121f4..2878785c1740e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/explain.txt @@ -53,7 +53,7 @@ Input [4]: [i_item_sk#5, i_brand_id#6, i_brand#7, i_manufact_id#8] (8) BroadcastExchange Input [3]: [i_item_sk#5, i_brand_id#6, i_brand#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#1] @@ -65,38 +65,38 @@ Output [4]: [ss_net_profit#2, ss_sold_date_sk#3, i_brand_id#6, i_brand#7] Input [6]: [ss_item_sk#1, ss_net_profit#2, ss_sold_date_sk#3, i_item_sk#5, i_brand_id#6, i_brand#7] (11) ReusedExchange [Reuses operator id: 22] -Output [2]: [d_date_sk#10, d_year#11] +Output [2]: [d_date_sk#9, d_year#10] (12) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_sold_date_sk#3] -Right keys [1]: [d_date_sk#10] +Right keys [1]: [d_date_sk#9] Join condition: None (13) Project [codegen id : 3] -Output [4]: [d_year#11, ss_net_profit#2, i_brand_id#6, i_brand#7] -Input [6]: [ss_net_profit#2, ss_sold_date_sk#3, i_brand_id#6, i_brand#7, d_date_sk#10, d_year#11] +Output [4]: [d_year#10, ss_net_profit#2, i_brand_id#6, i_brand#7] +Input [6]: [ss_net_profit#2, ss_sold_date_sk#3, i_brand_id#6, i_brand#7, d_date_sk#9, d_year#10] (14) HashAggregate [codegen id : 3] -Input [4]: [d_year#11, ss_net_profit#2, i_brand_id#6, i_brand#7] -Keys [3]: [d_year#11, i_brand#7, i_brand_id#6] +Input [4]: [d_year#10, ss_net_profit#2, i_brand_id#6, i_brand#7] +Keys [3]: [d_year#10, i_brand#7, i_brand_id#6] Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum#12] -Results [4]: [d_year#11, i_brand#7, i_brand_id#6, sum#13] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#10, i_brand#7, i_brand_id#6, sum#12] (15) Exchange -Input [4]: [d_year#11, i_brand#7, i_brand_id#6, sum#13] -Arguments: hashpartitioning(d_year#11, i_brand#7, i_brand_id#6, 5), ENSURE_REQUIREMENTS, [id=#14] +Input [4]: [d_year#10, i_brand#7, i_brand_id#6, sum#12] +Arguments: hashpartitioning(d_year#10, i_brand#7, i_brand_id#6, 5), ENSURE_REQUIREMENTS, [plan_id=2] (16) HashAggregate [codegen id : 4] -Input [4]: [d_year#11, i_brand#7, i_brand_id#6, sum#13] -Keys [3]: [d_year#11, i_brand#7, i_brand_id#6] +Input [4]: [d_year#10, i_brand#7, i_brand_id#6, sum#12] +Keys [3]: [d_year#10, i_brand#7, i_brand_id#6] Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#15] -Results [4]: [d_year#11, i_brand_id#6 AS brand_id#16, i_brand#7 AS brand#17, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#15,17,2) AS sum_agg#18] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#13] +Results [4]: [d_year#10, i_brand_id#6 AS brand_id#14, i_brand#7 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#13,17,2) AS sum_agg#16] (17) TakeOrderedAndProject -Input [4]: [d_year#11, brand_id#16, brand#17, sum_agg#18] -Arguments: 100, [d_year#11 ASC NULLS FIRST, sum_agg#18 DESC NULLS LAST, brand_id#16 ASC NULLS FIRST], [d_year#11, brand_id#16, brand#17, sum_agg#18] +Input [4]: [d_year#10, brand_id#14, brand#15, sum_agg#16] +Arguments: 100, [d_year#10 ASC NULLS FIRST, sum_agg#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [d_year#10, brand_id#14, brand#15, sum_agg#16] ===== Subqueries ===== @@ -109,25 +109,25 @@ BroadcastExchange (22) (18) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#11, d_moy#19] +Output [3]: [d_date_sk#9, d_year#10, d_moy#17] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,12), Or(Or(Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2415355),LessThanOrEqual(d_date_sk,2415385)),And(GreaterThanOrEqual(d_date_sk,2415720),LessThanOrEqual(d_date_sk,2415750))),Or(And(GreaterThanOrEqual(d_date_sk,2416085),LessThanOrEqual(d_date_sk,2416115)),And(GreaterThanOrEqual(d_date_sk,2416450),LessThanOrEqual(d_date_sk,2416480)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2416816),LessThanOrEqual(d_date_sk,2416846)),And(GreaterThanOrEqual(d_date_sk,2417181),LessThanOrEqual(d_date_sk,2417211))),And(GreaterThanOrEqual(d_date_sk,2417546),LessThanOrEqual(d_date_sk,2417576)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2417911),LessThanOrEqual(d_date_sk,2417941)),And(GreaterThanOrEqual(d_date_sk,2418277),LessThanOrEqual(d_date_sk,2418307))),And(GreaterThanOrEqual(d_date_sk,2418642),LessThanOrEqual(d_date_sk,2418672))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2419007),LessThanOrEqual(d_date_sk,2419037)),And(GreaterThanOrEqual(d_date_sk,2419372),LessThanOrEqual(d_date_sk,2419402))),And(GreaterThanOrEqual(d_date_sk,2419738),LessThanOrEqual(d_date_sk,2419768))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2420103),LessThanOrEqual(d_date_sk,2420133)),And(GreaterThanOrEqual(d_date_sk,2420468),LessThanOrEqual(d_date_sk,2420498))),And(GreaterThanOrEqual(d_date_sk,2420833),LessThanOrEqual(d_date_sk,2420863))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2421199),LessThanOrEqual(d_date_sk,2421229)),And(GreaterThanOrEqual(d_date_sk,2421564),LessThanOrEqual(d_date_sk,2421594))),And(GreaterThanOrEqual(d_date_sk,2421929),LessThanOrEqual(d_date_sk,2421959)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2422294),LessThanOrEqual(d_date_sk,2422324)),And(GreaterThanOrEqual(d_date_sk,2422660),LessThanOrEqual(d_date_sk,2422690))),And(GreaterThanOrEqual(d_date_sk,2423025),LessThanOrEqual(d_date_sk,2423055))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2423390),LessThanOrEqual(d_date_sk,2423420)),And(GreaterThanOrEqual(d_date_sk,2423755),LessThanOrEqual(d_date_sk,2423785))),And(GreaterThanOrEqual(d_date_sk,2424121),LessThanOrEqual(d_date_sk,2424151)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2424486),LessThanOrEqual(d_date_sk,2424516)),And(GreaterThanOrEqual(d_date_sk,2424851),LessThanOrEqual(d_date_sk,2424881))),Or(And(GreaterThanOrEqual(d_date_sk,2425216),LessThanOrEqual(d_date_sk,2425246)),And(GreaterThanOrEqual(d_date_sk,2425582),LessThanOrEqual(d_date_sk,2425612)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2425947),LessThanOrEqual(d_date_sk,2425977)),And(GreaterThanOrEqual(d_date_sk,2426312),LessThanOrEqual(d_date_sk,2426342))),And(GreaterThanOrEqual(d_date_sk,2426677),LessThanOrEqual(d_date_sk,2426707)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2427043),LessThanOrEqual(d_date_sk,2427073)),And(GreaterThanOrEqual(d_date_sk,2427408),LessThanOrEqual(d_date_sk,2427438))),And(GreaterThanOrEqual(d_date_sk,2427773),LessThanOrEqual(d_date_sk,2427803))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2428138),LessThanOrEqual(d_date_sk,2428168)),And(GreaterThanOrEqual(d_date_sk,2428504),LessThanOrEqual(d_date_sk,2428534))),And(GreaterThanOrEqual(d_date_sk,2428869),LessThanOrEqual(d_date_sk,2428899))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2429234),LessThanOrEqual(d_date_sk,2429264)),And(GreaterThanOrEqual(d_date_sk,2429599),LessThanOrEqual(d_date_sk,2429629))),And(GreaterThanOrEqual(d_date_sk,2429965),LessThanOrEqual(d_date_sk,2429995))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2430330),LessThanOrEqual(d_date_sk,2430360)),And(GreaterThanOrEqual(d_date_sk,2430695),LessThanOrEqual(d_date_sk,2430725))),And(GreaterThanOrEqual(d_date_sk,2431060),LessThanOrEqual(d_date_sk,2431090)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2431426),LessThanOrEqual(d_date_sk,2431456)),And(GreaterThanOrEqual(d_date_sk,2431791),LessThanOrEqual(d_date_sk,2431821))),And(GreaterThanOrEqual(d_date_sk,2432156),LessThanOrEqual(d_date_sk,2432186))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2432521),LessThanOrEqual(d_date_sk,2432551)),And(GreaterThanOrEqual(d_date_sk,2432887),LessThanOrEqual(d_date_sk,2432917))),And(GreaterThanOrEqual(d_date_sk,2433252),LessThanOrEqual(d_date_sk,2433282))))))),Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2433617),LessThanOrEqual(d_date_sk,2433647)),And(GreaterThanOrEqual(d_date_sk,2433982),LessThanOrEqual(d_date_sk,2434012))),Or(And(GreaterThanOrEqual(d_date_sk,2434348),LessThanOrEqual(d_date_sk,2434378)),And(GreaterThanOrEqual(d_date_sk,2434713),LessThanOrEqual(d_date_sk,2434743)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2435078),LessThanOrEqual(d_date_sk,2435108)),And(GreaterThanOrEqual(d_date_sk,2435443),LessThanOrEqual(d_date_sk,2435473))),And(GreaterThanOrEqual(d_date_sk,2435809),LessThanOrEqual(d_date_sk,2435839)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2436174),LessThanOrEqual(d_date_sk,2436204)),And(GreaterThanOrEqual(d_date_sk,2436539),LessThanOrEqual(d_date_sk,2436569))),And(GreaterThanOrEqual(d_date_sk,2436904),LessThanOrEqual(d_date_sk,2436934))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2437270),LessThanOrEqual(d_date_sk,2437300)),And(GreaterThanOrEqual(d_date_sk,2437635),LessThanOrEqual(d_date_sk,2437665))),And(GreaterThanOrEqual(d_date_sk,2438000),LessThanOrEqual(d_date_sk,2438030))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2438365),LessThanOrEqual(d_date_sk,2438395)),And(GreaterThanOrEqual(d_date_sk,2438731),LessThanOrEqual(d_date_sk,2438761))),And(GreaterThanOrEqual(d_date_sk,2439096),LessThanOrEqual(d_date_sk,2439126))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2439461),LessThanOrEqual(d_date_sk,2439491)),And(GreaterThanOrEqual(d_date_sk,2439826),LessThanOrEqual(d_date_sk,2439856))),And(GreaterThanOrEqual(d_date_sk,2440192),LessThanOrEqual(d_date_sk,2440222)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2440557),LessThanOrEqual(d_date_sk,2440587)),And(GreaterThanOrEqual(d_date_sk,2440922),LessThanOrEqual(d_date_sk,2440952))),And(GreaterThanOrEqual(d_date_sk,2441287),LessThanOrEqual(d_date_sk,2441317))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2441653),LessThanOrEqual(d_date_sk,2441683)),And(GreaterThanOrEqual(d_date_sk,2442018),LessThanOrEqual(d_date_sk,2442048))),And(GreaterThanOrEqual(d_date_sk,2442383),LessThanOrEqual(d_date_sk,2442413)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2442748),LessThanOrEqual(d_date_sk,2442778)),And(GreaterThanOrEqual(d_date_sk,2443114),LessThanOrEqual(d_date_sk,2443144))),Or(And(GreaterThanOrEqual(d_date_sk,2443479),LessThanOrEqual(d_date_sk,2443509)),And(GreaterThanOrEqual(d_date_sk,2443844),LessThanOrEqual(d_date_sk,2443874)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2444209),LessThanOrEqual(d_date_sk,2444239)),And(GreaterThanOrEqual(d_date_sk,2444575),LessThanOrEqual(d_date_sk,2444605))),And(GreaterThanOrEqual(d_date_sk,2444940),LessThanOrEqual(d_date_sk,2444970)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2445305),LessThanOrEqual(d_date_sk,2445335)),And(GreaterThanOrEqual(d_date_sk,2445670),LessThanOrEqual(d_date_sk,2445700))),And(GreaterThanOrEqual(d_date_sk,2446036),LessThanOrEqual(d_date_sk,2446066))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2446401),LessThanOrEqual(d_date_sk,2446431)),And(GreaterThanOrEqual(d_date_sk,2446766),LessThanOrEqual(d_date_sk,2446796))),And(GreaterThanOrEqual(d_date_sk,2447131),LessThanOrEqual(d_date_sk,2447161))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2447497),LessThanOrEqual(d_date_sk,2447527)),And(GreaterThanOrEqual(d_date_sk,2447862),LessThanOrEqual(d_date_sk,2447892))),And(GreaterThanOrEqual(d_date_sk,2448227),LessThanOrEqual(d_date_sk,2448257))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2448592),LessThanOrEqual(d_date_sk,2448622)),And(GreaterThanOrEqual(d_date_sk,2448958),LessThanOrEqual(d_date_sk,2448988))),And(GreaterThanOrEqual(d_date_sk,2449323),LessThanOrEqual(d_date_sk,2449353)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2449688),LessThanOrEqual(d_date_sk,2449718)),And(GreaterThanOrEqual(d_date_sk,2450053),LessThanOrEqual(d_date_sk,2450083))),And(GreaterThanOrEqual(d_date_sk,2450419),LessThanOrEqual(d_date_sk,2450449))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2450784),LessThanOrEqual(d_date_sk,2450814)),And(GreaterThanOrEqual(d_date_sk,2451149),LessThanOrEqual(d_date_sk,2451179))),And(GreaterThanOrEqual(d_date_sk,2451514),LessThanOrEqual(d_date_sk,2451544)))))))),Or(Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2451880),LessThanOrEqual(d_date_sk,2451910)),And(GreaterThanOrEqual(d_date_sk,2452245),LessThanOrEqual(d_date_sk,2452275))),Or(And(GreaterThanOrEqual(d_date_sk,2452610),LessThanOrEqual(d_date_sk,2452640)),And(GreaterThanOrEqual(d_date_sk,2452975),LessThanOrEqual(d_date_sk,2453005)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2453341),LessThanOrEqual(d_date_sk,2453371)),And(GreaterThanOrEqual(d_date_sk,2453706),LessThanOrEqual(d_date_sk,2453736))),And(GreaterThanOrEqual(d_date_sk,2454071),LessThanOrEqual(d_date_sk,2454101)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2454436),LessThanOrEqual(d_date_sk,2454466)),And(GreaterThanOrEqual(d_date_sk,2454802),LessThanOrEqual(d_date_sk,2454832))),And(GreaterThanOrEqual(d_date_sk,2455167),LessThanOrEqual(d_date_sk,2455197))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2455532),LessThanOrEqual(d_date_sk,2455562)),And(GreaterThanOrEqual(d_date_sk,2455897),LessThanOrEqual(d_date_sk,2455927))),And(GreaterThanOrEqual(d_date_sk,2456263),LessThanOrEqual(d_date_sk,2456293))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2456628),LessThanOrEqual(d_date_sk,2456658)),And(GreaterThanOrEqual(d_date_sk,2456993),LessThanOrEqual(d_date_sk,2457023))),And(GreaterThanOrEqual(d_date_sk,2457358),LessThanOrEqual(d_date_sk,2457388))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2457724),LessThanOrEqual(d_date_sk,2457754)),And(GreaterThanOrEqual(d_date_sk,2458089),LessThanOrEqual(d_date_sk,2458119))),And(GreaterThanOrEqual(d_date_sk,2458454),LessThanOrEqual(d_date_sk,2458484)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2458819),LessThanOrEqual(d_date_sk,2458849)),And(GreaterThanOrEqual(d_date_sk,2459185),LessThanOrEqual(d_date_sk,2459215))),And(GreaterThanOrEqual(d_date_sk,2459550),LessThanOrEqual(d_date_sk,2459580))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2459915),LessThanOrEqual(d_date_sk,2459945)),And(GreaterThanOrEqual(d_date_sk,2460280),LessThanOrEqual(d_date_sk,2460310))),And(GreaterThanOrEqual(d_date_sk,2460646),LessThanOrEqual(d_date_sk,2460676)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2461011),LessThanOrEqual(d_date_sk,2461041)),And(GreaterThanOrEqual(d_date_sk,2461376),LessThanOrEqual(d_date_sk,2461406))),Or(And(GreaterThanOrEqual(d_date_sk,2461741),LessThanOrEqual(d_date_sk,2461771)),And(GreaterThanOrEqual(d_date_sk,2462107),LessThanOrEqual(d_date_sk,2462137)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2462472),LessThanOrEqual(d_date_sk,2462502)),And(GreaterThanOrEqual(d_date_sk,2462837),LessThanOrEqual(d_date_sk,2462867))),And(GreaterThanOrEqual(d_date_sk,2463202),LessThanOrEqual(d_date_sk,2463232)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2463568),LessThanOrEqual(d_date_sk,2463598)),And(GreaterThanOrEqual(d_date_sk,2463933),LessThanOrEqual(d_date_sk,2463963))),And(GreaterThanOrEqual(d_date_sk,2464298),LessThanOrEqual(d_date_sk,2464328))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2464663),LessThanOrEqual(d_date_sk,2464693)),And(GreaterThanOrEqual(d_date_sk,2465029),LessThanOrEqual(d_date_sk,2465059))),And(GreaterThanOrEqual(d_date_sk,2465394),LessThanOrEqual(d_date_sk,2465424))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2465759),LessThanOrEqual(d_date_sk,2465789)),And(GreaterThanOrEqual(d_date_sk,2466124),LessThanOrEqual(d_date_sk,2466154))),And(GreaterThanOrEqual(d_date_sk,2466490),LessThanOrEqual(d_date_sk,2466520))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2466855),LessThanOrEqual(d_date_sk,2466885)),And(GreaterThanOrEqual(d_date_sk,2467220),LessThanOrEqual(d_date_sk,2467250))),And(GreaterThanOrEqual(d_date_sk,2467585),LessThanOrEqual(d_date_sk,2467615)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2467951),LessThanOrEqual(d_date_sk,2467981)),And(GreaterThanOrEqual(d_date_sk,2468316),LessThanOrEqual(d_date_sk,2468346))),And(GreaterThanOrEqual(d_date_sk,2468681),LessThanOrEqual(d_date_sk,2468711))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2469046),LessThanOrEqual(d_date_sk,2469076)),And(GreaterThanOrEqual(d_date_sk,2469412),LessThanOrEqual(d_date_sk,2469442))),And(GreaterThanOrEqual(d_date_sk,2469777),LessThanOrEqual(d_date_sk,2469807))))))),Or(Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2470142),LessThanOrEqual(d_date_sk,2470172)),And(GreaterThanOrEqual(d_date_sk,2470507),LessThanOrEqual(d_date_sk,2470537))),Or(And(GreaterThanOrEqual(d_date_sk,2470873),LessThanOrEqual(d_date_sk,2470903)),And(GreaterThanOrEqual(d_date_sk,2471238),LessThanOrEqual(d_date_sk,2471268)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2471603),LessThanOrEqual(d_date_sk,2471633)),And(GreaterThanOrEqual(d_date_sk,2471968),LessThanOrEqual(d_date_sk,2471998))),And(GreaterThanOrEqual(d_date_sk,2472334),LessThanOrEqual(d_date_sk,2472364)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2472699),LessThanOrEqual(d_date_sk,2472729)),And(GreaterThanOrEqual(d_date_sk,2473064),LessThanOrEqual(d_date_sk,2473094))),And(GreaterThanOrEqual(d_date_sk,2473429),LessThanOrEqual(d_date_sk,2473459))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2473795),LessThanOrEqual(d_date_sk,2473825)),And(GreaterThanOrEqual(d_date_sk,2474160),LessThanOrEqual(d_date_sk,2474190))),And(GreaterThanOrEqual(d_date_sk,2474525),LessThanOrEqual(d_date_sk,2474555))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2474890),LessThanOrEqual(d_date_sk,2474920)),And(GreaterThanOrEqual(d_date_sk,2475256),LessThanOrEqual(d_date_sk,2475286))),And(GreaterThanOrEqual(d_date_sk,2475621),LessThanOrEqual(d_date_sk,2475651))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2475986),LessThanOrEqual(d_date_sk,2476016)),And(GreaterThanOrEqual(d_date_sk,2476351),LessThanOrEqual(d_date_sk,2476381))),And(GreaterThanOrEqual(d_date_sk,2476717),LessThanOrEqual(d_date_sk,2476747)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2477082),LessThanOrEqual(d_date_sk,2477112)),And(GreaterThanOrEqual(d_date_sk,2477447),LessThanOrEqual(d_date_sk,2477477))),And(GreaterThanOrEqual(d_date_sk,2477812),LessThanOrEqual(d_date_sk,2477842))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2478178),LessThanOrEqual(d_date_sk,2478208)),And(GreaterThanOrEqual(d_date_sk,2478543),LessThanOrEqual(d_date_sk,2478573))),And(GreaterThanOrEqual(d_date_sk,2478908),LessThanOrEqual(d_date_sk,2478938)))))),Or(Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2479273),LessThanOrEqual(d_date_sk,2479303)),And(GreaterThanOrEqual(d_date_sk,2479639),LessThanOrEqual(d_date_sk,2479669))),Or(And(GreaterThanOrEqual(d_date_sk,2480004),LessThanOrEqual(d_date_sk,2480034)),And(GreaterThanOrEqual(d_date_sk,2480369),LessThanOrEqual(d_date_sk,2480399)))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2480734),LessThanOrEqual(d_date_sk,2480764)),And(GreaterThanOrEqual(d_date_sk,2481100),LessThanOrEqual(d_date_sk,2481130))),And(GreaterThanOrEqual(d_date_sk,2481465),LessThanOrEqual(d_date_sk,2481495)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2481830),LessThanOrEqual(d_date_sk,2481860)),And(GreaterThanOrEqual(d_date_sk,2482195),LessThanOrEqual(d_date_sk,2482225))),And(GreaterThanOrEqual(d_date_sk,2482561),LessThanOrEqual(d_date_sk,2482591))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2482926),LessThanOrEqual(d_date_sk,2482956)),And(GreaterThanOrEqual(d_date_sk,2483291),LessThanOrEqual(d_date_sk,2483321))),And(GreaterThanOrEqual(d_date_sk,2483656),LessThanOrEqual(d_date_sk,2483686))))),Or(Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2484022),LessThanOrEqual(d_date_sk,2484052)),And(GreaterThanOrEqual(d_date_sk,2484387),LessThanOrEqual(d_date_sk,2484417))),And(GreaterThanOrEqual(d_date_sk,2484752),LessThanOrEqual(d_date_sk,2484782))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2485117),LessThanOrEqual(d_date_sk,2485147)),And(GreaterThanOrEqual(d_date_sk,2485483),LessThanOrEqual(d_date_sk,2485513))),And(GreaterThanOrEqual(d_date_sk,2485848),LessThanOrEqual(d_date_sk,2485878)))),Or(Or(Or(And(GreaterThanOrEqual(d_date_sk,2486213),LessThanOrEqual(d_date_sk,2486243)),And(GreaterThanOrEqual(d_date_sk,2486578),LessThanOrEqual(d_date_sk,2486608))),And(GreaterThanOrEqual(d_date_sk,2486944),LessThanOrEqual(d_date_sk,2486974))),Or(Or(And(GreaterThanOrEqual(d_date_sk,2487309),LessThanOrEqual(d_date_sk,2487339)),And(GreaterThanOrEqual(d_date_sk,2487674),LessThanOrEqual(d_date_sk,2487704))),And(GreaterThanOrEqual(d_date_sk,2488039),LessThanOrEqual(d_date_sk,2488069))))))))), IsNotNull(d_date_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_moy#19] +Input [3]: [d_date_sk#9, d_year#10, d_moy#17] (20) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_moy#19] -Condition : (((isnotnull(d_moy#19) AND (d_moy#19 = 12)) AND ((((((((((d_date_sk#10 >= 2415355) AND (d_date_sk#10 <= 2415385)) OR ((d_date_sk#10 >= 2415720) AND (d_date_sk#10 <= 2415750))) OR (((d_date_sk#10 >= 2416085) AND (d_date_sk#10 <= 2416115)) OR ((d_date_sk#10 >= 2416450) AND (d_date_sk#10 <= 2416480)))) OR ((((d_date_sk#10 >= 2416816) AND (d_date_sk#10 <= 2416846)) OR ((d_date_sk#10 >= 2417181) AND (d_date_sk#10 <= 2417211))) OR ((d_date_sk#10 >= 2417546) AND (d_date_sk#10 <= 2417576)))) OR (((((d_date_sk#10 >= 2417911) AND (d_date_sk#10 <= 2417941)) OR ((d_date_sk#10 >= 2418277) AND (d_date_sk#10 <= 2418307))) OR ((d_date_sk#10 >= 2418642) AND (d_date_sk#10 <= 2418672))) OR ((((d_date_sk#10 >= 2419007) AND (d_date_sk#10 <= 2419037)) OR ((d_date_sk#10 >= 2419372) AND (d_date_sk#10 <= 2419402))) OR ((d_date_sk#10 >= 2419738) AND (d_date_sk#10 <= 2419768))))) OR ((((((d_date_sk#10 >= 2420103) AND (d_date_sk#10 <= 2420133)) OR ((d_date_sk#10 >= 2420468) AND (d_date_sk#10 <= 2420498))) OR ((d_date_sk#10 >= 2420833) AND (d_date_sk#10 <= 2420863))) OR ((((d_date_sk#10 >= 2421199) AND (d_date_sk#10 <= 2421229)) OR ((d_date_sk#10 >= 2421564) AND (d_date_sk#10 <= 2421594))) OR ((d_date_sk#10 >= 2421929) AND (d_date_sk#10 <= 2421959)))) OR (((((d_date_sk#10 >= 2422294) AND (d_date_sk#10 <= 2422324)) OR ((d_date_sk#10 >= 2422660) AND (d_date_sk#10 <= 2422690))) OR ((d_date_sk#10 >= 2423025) AND (d_date_sk#10 <= 2423055))) OR ((((d_date_sk#10 >= 2423390) AND (d_date_sk#10 <= 2423420)) OR ((d_date_sk#10 >= 2423755) AND (d_date_sk#10 <= 2423785))) OR ((d_date_sk#10 >= 2424121) AND (d_date_sk#10 <= 2424151)))))) OR (((((((d_date_sk#10 >= 2424486) AND (d_date_sk#10 <= 2424516)) OR ((d_date_sk#10 >= 2424851) AND (d_date_sk#10 <= 2424881))) OR (((d_date_sk#10 >= 2425216) AND (d_date_sk#10 <= 2425246)) OR ((d_date_sk#10 >= 2425582) AND (d_date_sk#10 <= 2425612)))) OR ((((d_date_sk#10 >= 2425947) AND (d_date_sk#10 <= 2425977)) OR ((d_date_sk#10 >= 2426312) AND (d_date_sk#10 <= 2426342))) OR ((d_date_sk#10 >= 2426677) AND (d_date_sk#10 <= 2426707)))) OR (((((d_date_sk#10 >= 2427043) AND (d_date_sk#10 <= 2427073)) OR ((d_date_sk#10 >= 2427408) AND (d_date_sk#10 <= 2427438))) OR ((d_date_sk#10 >= 2427773) AND (d_date_sk#10 <= 2427803))) OR ((((d_date_sk#10 >= 2428138) AND (d_date_sk#10 <= 2428168)) OR ((d_date_sk#10 >= 2428504) AND (d_date_sk#10 <= 2428534))) OR ((d_date_sk#10 >= 2428869) AND (d_date_sk#10 <= 2428899))))) OR ((((((d_date_sk#10 >= 2429234) AND (d_date_sk#10 <= 2429264)) OR ((d_date_sk#10 >= 2429599) AND (d_date_sk#10 <= 2429629))) OR ((d_date_sk#10 >= 2429965) AND (d_date_sk#10 <= 2429995))) OR ((((d_date_sk#10 >= 2430330) AND (d_date_sk#10 <= 2430360)) OR ((d_date_sk#10 >= 2430695) AND (d_date_sk#10 <= 2430725))) OR ((d_date_sk#10 >= 2431060) AND (d_date_sk#10 <= 2431090)))) OR (((((d_date_sk#10 >= 2431426) AND (d_date_sk#10 <= 2431456)) OR ((d_date_sk#10 >= 2431791) AND (d_date_sk#10 <= 2431821))) OR ((d_date_sk#10 >= 2432156) AND (d_date_sk#10 <= 2432186))) OR ((((d_date_sk#10 >= 2432521) AND (d_date_sk#10 <= 2432551)) OR ((d_date_sk#10 >= 2432887) AND (d_date_sk#10 <= 2432917))) OR ((d_date_sk#10 >= 2433252) AND (d_date_sk#10 <= 2433282))))))) OR ((((((((d_date_sk#10 >= 2433617) AND (d_date_sk#10 <= 2433647)) OR ((d_date_sk#10 >= 2433982) AND (d_date_sk#10 <= 2434012))) OR (((d_date_sk#10 >= 2434348) AND (d_date_sk#10 <= 2434378)) OR ((d_date_sk#10 >= 2434713) AND (d_date_sk#10 <= 2434743)))) OR ((((d_date_sk#10 >= 2435078) AND (d_date_sk#10 <= 2435108)) OR ((d_date_sk#10 >= 2435443) AND (d_date_sk#10 <= 2435473))) OR ((d_date_sk#10 >= 2435809) AND (d_date_sk#10 <= 2435839)))) OR (((((d_date_sk#10 >= 2436174) AND (d_date_sk#10 <= 2436204)) OR ((d_date_sk#10 >= 2436539) AND (d_date_sk#10 <= 2436569))) OR ((d_date_sk#10 >= 2436904) AND (d_date_sk#10 <= 2436934))) OR ((((d_date_sk#10 >= 2437270) AND (d_date_sk#10 <= 2437300)) OR ((d_date_sk#10 >= 2437635) AND (d_date_sk#10 <= 2437665))) OR ((d_date_sk#10 >= 2438000) AND (d_date_sk#10 <= 2438030))))) OR ((((((d_date_sk#10 >= 2438365) AND (d_date_sk#10 <= 2438395)) OR ((d_date_sk#10 >= 2438731) AND (d_date_sk#10 <= 2438761))) OR ((d_date_sk#10 >= 2439096) AND (d_date_sk#10 <= 2439126))) OR ((((d_date_sk#10 >= 2439461) AND (d_date_sk#10 <= 2439491)) OR ((d_date_sk#10 >= 2439826) AND (d_date_sk#10 <= 2439856))) OR ((d_date_sk#10 >= 2440192) AND (d_date_sk#10 <= 2440222)))) OR (((((d_date_sk#10 >= 2440557) AND (d_date_sk#10 <= 2440587)) OR ((d_date_sk#10 >= 2440922) AND (d_date_sk#10 <= 2440952))) OR ((d_date_sk#10 >= 2441287) AND (d_date_sk#10 <= 2441317))) OR ((((d_date_sk#10 >= 2441653) AND (d_date_sk#10 <= 2441683)) OR ((d_date_sk#10 >= 2442018) AND (d_date_sk#10 <= 2442048))) OR ((d_date_sk#10 >= 2442383) AND (d_date_sk#10 <= 2442413)))))) OR (((((((d_date_sk#10 >= 2442748) AND (d_date_sk#10 <= 2442778)) OR ((d_date_sk#10 >= 2443114) AND (d_date_sk#10 <= 2443144))) OR (((d_date_sk#10 >= 2443479) AND (d_date_sk#10 <= 2443509)) OR ((d_date_sk#10 >= 2443844) AND (d_date_sk#10 <= 2443874)))) OR ((((d_date_sk#10 >= 2444209) AND (d_date_sk#10 <= 2444239)) OR ((d_date_sk#10 >= 2444575) AND (d_date_sk#10 <= 2444605))) OR ((d_date_sk#10 >= 2444940) AND (d_date_sk#10 <= 2444970)))) OR (((((d_date_sk#10 >= 2445305) AND (d_date_sk#10 <= 2445335)) OR ((d_date_sk#10 >= 2445670) AND (d_date_sk#10 <= 2445700))) OR ((d_date_sk#10 >= 2446036) AND (d_date_sk#10 <= 2446066))) OR ((((d_date_sk#10 >= 2446401) AND (d_date_sk#10 <= 2446431)) OR ((d_date_sk#10 >= 2446766) AND (d_date_sk#10 <= 2446796))) OR ((d_date_sk#10 >= 2447131) AND (d_date_sk#10 <= 2447161))))) OR ((((((d_date_sk#10 >= 2447497) AND (d_date_sk#10 <= 2447527)) OR ((d_date_sk#10 >= 2447862) AND (d_date_sk#10 <= 2447892))) OR ((d_date_sk#10 >= 2448227) AND (d_date_sk#10 <= 2448257))) OR ((((d_date_sk#10 >= 2448592) AND (d_date_sk#10 <= 2448622)) OR ((d_date_sk#10 >= 2448958) AND (d_date_sk#10 <= 2448988))) OR ((d_date_sk#10 >= 2449323) AND (d_date_sk#10 <= 2449353)))) OR (((((d_date_sk#10 >= 2449688) AND (d_date_sk#10 <= 2449718)) OR ((d_date_sk#10 >= 2450053) AND (d_date_sk#10 <= 2450083))) OR ((d_date_sk#10 >= 2450419) AND (d_date_sk#10 <= 2450449))) OR ((((d_date_sk#10 >= 2450784) AND (d_date_sk#10 <= 2450814)) OR ((d_date_sk#10 >= 2451149) AND (d_date_sk#10 <= 2451179))) OR ((d_date_sk#10 >= 2451514) AND (d_date_sk#10 <= 2451544)))))))) OR (((((((((d_date_sk#10 >= 2451880) AND (d_date_sk#10 <= 2451910)) OR ((d_date_sk#10 >= 2452245) AND (d_date_sk#10 <= 2452275))) OR (((d_date_sk#10 >= 2452610) AND (d_date_sk#10 <= 2452640)) OR ((d_date_sk#10 >= 2452975) AND (d_date_sk#10 <= 2453005)))) OR ((((d_date_sk#10 >= 2453341) AND (d_date_sk#10 <= 2453371)) OR ((d_date_sk#10 >= 2453706) AND (d_date_sk#10 <= 2453736))) OR ((d_date_sk#10 >= 2454071) AND (d_date_sk#10 <= 2454101)))) OR (((((d_date_sk#10 >= 2454436) AND (d_date_sk#10 <= 2454466)) OR ((d_date_sk#10 >= 2454802) AND (d_date_sk#10 <= 2454832))) OR ((d_date_sk#10 >= 2455167) AND (d_date_sk#10 <= 2455197))) OR ((((d_date_sk#10 >= 2455532) AND (d_date_sk#10 <= 2455562)) OR ((d_date_sk#10 >= 2455897) AND (d_date_sk#10 <= 2455927))) OR ((d_date_sk#10 >= 2456263) AND (d_date_sk#10 <= 2456293))))) OR ((((((d_date_sk#10 >= 2456628) AND (d_date_sk#10 <= 2456658)) OR ((d_date_sk#10 >= 2456993) AND (d_date_sk#10 <= 2457023))) OR ((d_date_sk#10 >= 2457358) AND (d_date_sk#10 <= 2457388))) OR ((((d_date_sk#10 >= 2457724) AND (d_date_sk#10 <= 2457754)) OR ((d_date_sk#10 >= 2458089) AND (d_date_sk#10 <= 2458119))) OR ((d_date_sk#10 >= 2458454) AND (d_date_sk#10 <= 2458484)))) OR (((((d_date_sk#10 >= 2458819) AND (d_date_sk#10 <= 2458849)) OR ((d_date_sk#10 >= 2459185) AND (d_date_sk#10 <= 2459215))) OR ((d_date_sk#10 >= 2459550) AND (d_date_sk#10 <= 2459580))) OR ((((d_date_sk#10 >= 2459915) AND (d_date_sk#10 <= 2459945)) OR ((d_date_sk#10 >= 2460280) AND (d_date_sk#10 <= 2460310))) OR ((d_date_sk#10 >= 2460646) AND (d_date_sk#10 <= 2460676)))))) OR (((((((d_date_sk#10 >= 2461011) AND (d_date_sk#10 <= 2461041)) OR ((d_date_sk#10 >= 2461376) AND (d_date_sk#10 <= 2461406))) OR (((d_date_sk#10 >= 2461741) AND (d_date_sk#10 <= 2461771)) OR ((d_date_sk#10 >= 2462107) AND (d_date_sk#10 <= 2462137)))) OR ((((d_date_sk#10 >= 2462472) AND (d_date_sk#10 <= 2462502)) OR ((d_date_sk#10 >= 2462837) AND (d_date_sk#10 <= 2462867))) OR ((d_date_sk#10 >= 2463202) AND (d_date_sk#10 <= 2463232)))) OR (((((d_date_sk#10 >= 2463568) AND (d_date_sk#10 <= 2463598)) OR ((d_date_sk#10 >= 2463933) AND (d_date_sk#10 <= 2463963))) OR ((d_date_sk#10 >= 2464298) AND (d_date_sk#10 <= 2464328))) OR ((((d_date_sk#10 >= 2464663) AND (d_date_sk#10 <= 2464693)) OR ((d_date_sk#10 >= 2465029) AND (d_date_sk#10 <= 2465059))) OR ((d_date_sk#10 >= 2465394) AND (d_date_sk#10 <= 2465424))))) OR ((((((d_date_sk#10 >= 2465759) AND (d_date_sk#10 <= 2465789)) OR ((d_date_sk#10 >= 2466124) AND (d_date_sk#10 <= 2466154))) OR ((d_date_sk#10 >= 2466490) AND (d_date_sk#10 <= 2466520))) OR ((((d_date_sk#10 >= 2466855) AND (d_date_sk#10 <= 2466885)) OR ((d_date_sk#10 >= 2467220) AND (d_date_sk#10 <= 2467250))) OR ((d_date_sk#10 >= 2467585) AND (d_date_sk#10 <= 2467615)))) OR (((((d_date_sk#10 >= 2467951) AND (d_date_sk#10 <= 2467981)) OR ((d_date_sk#10 >= 2468316) AND (d_date_sk#10 <= 2468346))) OR ((d_date_sk#10 >= 2468681) AND (d_date_sk#10 <= 2468711))) OR ((((d_date_sk#10 >= 2469046) AND (d_date_sk#10 <= 2469076)) OR ((d_date_sk#10 >= 2469412) AND (d_date_sk#10 <= 2469442))) OR ((d_date_sk#10 >= 2469777) AND (d_date_sk#10 <= 2469807))))))) OR ((((((((d_date_sk#10 >= 2470142) AND (d_date_sk#10 <= 2470172)) OR ((d_date_sk#10 >= 2470507) AND (d_date_sk#10 <= 2470537))) OR (((d_date_sk#10 >= 2470873) AND (d_date_sk#10 <= 2470903)) OR ((d_date_sk#10 >= 2471238) AND (d_date_sk#10 <= 2471268)))) OR ((((d_date_sk#10 >= 2471603) AND (d_date_sk#10 <= 2471633)) OR ((d_date_sk#10 >= 2471968) AND (d_date_sk#10 <= 2471998))) OR ((d_date_sk#10 >= 2472334) AND (d_date_sk#10 <= 2472364)))) OR (((((d_date_sk#10 >= 2472699) AND (d_date_sk#10 <= 2472729)) OR ((d_date_sk#10 >= 2473064) AND (d_date_sk#10 <= 2473094))) OR ((d_date_sk#10 >= 2473429) AND (d_date_sk#10 <= 2473459))) OR ((((d_date_sk#10 >= 2473795) AND (d_date_sk#10 <= 2473825)) OR ((d_date_sk#10 >= 2474160) AND (d_date_sk#10 <= 2474190))) OR ((d_date_sk#10 >= 2474525) AND (d_date_sk#10 <= 2474555))))) OR ((((((d_date_sk#10 >= 2474890) AND (d_date_sk#10 <= 2474920)) OR ((d_date_sk#10 >= 2475256) AND (d_date_sk#10 <= 2475286))) OR ((d_date_sk#10 >= 2475621) AND (d_date_sk#10 <= 2475651))) OR ((((d_date_sk#10 >= 2475986) AND (d_date_sk#10 <= 2476016)) OR ((d_date_sk#10 >= 2476351) AND (d_date_sk#10 <= 2476381))) OR ((d_date_sk#10 >= 2476717) AND (d_date_sk#10 <= 2476747)))) OR (((((d_date_sk#10 >= 2477082) AND (d_date_sk#10 <= 2477112)) OR ((d_date_sk#10 >= 2477447) AND (d_date_sk#10 <= 2477477))) OR ((d_date_sk#10 >= 2477812) AND (d_date_sk#10 <= 2477842))) OR ((((d_date_sk#10 >= 2478178) AND (d_date_sk#10 <= 2478208)) OR ((d_date_sk#10 >= 2478543) AND (d_date_sk#10 <= 2478573))) OR ((d_date_sk#10 >= 2478908) AND (d_date_sk#10 <= 2478938)))))) OR (((((((d_date_sk#10 >= 2479273) AND (d_date_sk#10 <= 2479303)) OR ((d_date_sk#10 >= 2479639) AND (d_date_sk#10 <= 2479669))) OR (((d_date_sk#10 >= 2480004) AND (d_date_sk#10 <= 2480034)) OR ((d_date_sk#10 >= 2480369) AND (d_date_sk#10 <= 2480399)))) OR ((((d_date_sk#10 >= 2480734) AND (d_date_sk#10 <= 2480764)) OR ((d_date_sk#10 >= 2481100) AND (d_date_sk#10 <= 2481130))) OR ((d_date_sk#10 >= 2481465) AND (d_date_sk#10 <= 2481495)))) OR (((((d_date_sk#10 >= 2481830) AND (d_date_sk#10 <= 2481860)) OR ((d_date_sk#10 >= 2482195) AND (d_date_sk#10 <= 2482225))) OR ((d_date_sk#10 >= 2482561) AND (d_date_sk#10 <= 2482591))) OR ((((d_date_sk#10 >= 2482926) AND (d_date_sk#10 <= 2482956)) OR ((d_date_sk#10 >= 2483291) AND (d_date_sk#10 <= 2483321))) OR ((d_date_sk#10 >= 2483656) AND (d_date_sk#10 <= 2483686))))) OR ((((((d_date_sk#10 >= 2484022) AND (d_date_sk#10 <= 2484052)) OR ((d_date_sk#10 >= 2484387) AND (d_date_sk#10 <= 2484417))) OR ((d_date_sk#10 >= 2484752) AND (d_date_sk#10 <= 2484782))) OR ((((d_date_sk#10 >= 2485117) AND (d_date_sk#10 <= 2485147)) OR ((d_date_sk#10 >= 2485483) AND (d_date_sk#10 <= 2485513))) OR ((d_date_sk#10 >= 2485848) AND (d_date_sk#10 <= 2485878)))) OR (((((d_date_sk#10 >= 2486213) AND (d_date_sk#10 <= 2486243)) OR ((d_date_sk#10 >= 2486578) AND (d_date_sk#10 <= 2486608))) OR ((d_date_sk#10 >= 2486944) AND (d_date_sk#10 <= 2486974))) OR ((((d_date_sk#10 >= 2487309) AND (d_date_sk#10 <= 2487339)) OR ((d_date_sk#10 >= 2487674) AND (d_date_sk#10 <= 2487704))) OR ((d_date_sk#10 >= 2488039) AND (d_date_sk#10 <= 2488069)))))))))) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#9, d_year#10, d_moy#17] +Condition : (((isnotnull(d_moy#17) AND (d_moy#17 = 12)) AND ((((((((((d_date_sk#9 >= 2415355) AND (d_date_sk#9 <= 2415385)) OR ((d_date_sk#9 >= 2415720) AND (d_date_sk#9 <= 2415750))) OR (((d_date_sk#9 >= 2416085) AND (d_date_sk#9 <= 2416115)) OR ((d_date_sk#9 >= 2416450) AND (d_date_sk#9 <= 2416480)))) OR ((((d_date_sk#9 >= 2416816) AND (d_date_sk#9 <= 2416846)) OR ((d_date_sk#9 >= 2417181) AND (d_date_sk#9 <= 2417211))) OR ((d_date_sk#9 >= 2417546) AND (d_date_sk#9 <= 2417576)))) OR (((((d_date_sk#9 >= 2417911) AND (d_date_sk#9 <= 2417941)) OR ((d_date_sk#9 >= 2418277) AND (d_date_sk#9 <= 2418307))) OR ((d_date_sk#9 >= 2418642) AND (d_date_sk#9 <= 2418672))) OR ((((d_date_sk#9 >= 2419007) AND (d_date_sk#9 <= 2419037)) OR ((d_date_sk#9 >= 2419372) AND (d_date_sk#9 <= 2419402))) OR ((d_date_sk#9 >= 2419738) AND (d_date_sk#9 <= 2419768))))) OR ((((((d_date_sk#9 >= 2420103) AND (d_date_sk#9 <= 2420133)) OR ((d_date_sk#9 >= 2420468) AND (d_date_sk#9 <= 2420498))) OR ((d_date_sk#9 >= 2420833) AND (d_date_sk#9 <= 2420863))) OR ((((d_date_sk#9 >= 2421199) AND (d_date_sk#9 <= 2421229)) OR ((d_date_sk#9 >= 2421564) AND (d_date_sk#9 <= 2421594))) OR ((d_date_sk#9 >= 2421929) AND (d_date_sk#9 <= 2421959)))) OR (((((d_date_sk#9 >= 2422294) AND (d_date_sk#9 <= 2422324)) OR ((d_date_sk#9 >= 2422660) AND (d_date_sk#9 <= 2422690))) OR ((d_date_sk#9 >= 2423025) AND (d_date_sk#9 <= 2423055))) OR ((((d_date_sk#9 >= 2423390) AND (d_date_sk#9 <= 2423420)) OR ((d_date_sk#9 >= 2423755) AND (d_date_sk#9 <= 2423785))) OR ((d_date_sk#9 >= 2424121) AND (d_date_sk#9 <= 2424151)))))) OR (((((((d_date_sk#9 >= 2424486) AND (d_date_sk#9 <= 2424516)) OR ((d_date_sk#9 >= 2424851) AND (d_date_sk#9 <= 2424881))) OR (((d_date_sk#9 >= 2425216) AND (d_date_sk#9 <= 2425246)) OR ((d_date_sk#9 >= 2425582) AND (d_date_sk#9 <= 2425612)))) OR ((((d_date_sk#9 >= 2425947) AND (d_date_sk#9 <= 2425977)) OR ((d_date_sk#9 >= 2426312) AND (d_date_sk#9 <= 2426342))) OR ((d_date_sk#9 >= 2426677) AND (d_date_sk#9 <= 2426707)))) OR (((((d_date_sk#9 >= 2427043) AND (d_date_sk#9 <= 2427073)) OR ((d_date_sk#9 >= 2427408) AND (d_date_sk#9 <= 2427438))) OR ((d_date_sk#9 >= 2427773) AND (d_date_sk#9 <= 2427803))) OR ((((d_date_sk#9 >= 2428138) AND (d_date_sk#9 <= 2428168)) OR ((d_date_sk#9 >= 2428504) AND (d_date_sk#9 <= 2428534))) OR ((d_date_sk#9 >= 2428869) AND (d_date_sk#9 <= 2428899))))) OR ((((((d_date_sk#9 >= 2429234) AND (d_date_sk#9 <= 2429264)) OR ((d_date_sk#9 >= 2429599) AND (d_date_sk#9 <= 2429629))) OR ((d_date_sk#9 >= 2429965) AND (d_date_sk#9 <= 2429995))) OR ((((d_date_sk#9 >= 2430330) AND (d_date_sk#9 <= 2430360)) OR ((d_date_sk#9 >= 2430695) AND (d_date_sk#9 <= 2430725))) OR ((d_date_sk#9 >= 2431060) AND (d_date_sk#9 <= 2431090)))) OR (((((d_date_sk#9 >= 2431426) AND (d_date_sk#9 <= 2431456)) OR ((d_date_sk#9 >= 2431791) AND (d_date_sk#9 <= 2431821))) OR ((d_date_sk#9 >= 2432156) AND (d_date_sk#9 <= 2432186))) OR ((((d_date_sk#9 >= 2432521) AND (d_date_sk#9 <= 2432551)) OR ((d_date_sk#9 >= 2432887) AND (d_date_sk#9 <= 2432917))) OR ((d_date_sk#9 >= 2433252) AND (d_date_sk#9 <= 2433282))))))) OR ((((((((d_date_sk#9 >= 2433617) AND (d_date_sk#9 <= 2433647)) OR ((d_date_sk#9 >= 2433982) AND (d_date_sk#9 <= 2434012))) OR (((d_date_sk#9 >= 2434348) AND (d_date_sk#9 <= 2434378)) OR ((d_date_sk#9 >= 2434713) AND (d_date_sk#9 <= 2434743)))) OR ((((d_date_sk#9 >= 2435078) AND (d_date_sk#9 <= 2435108)) OR ((d_date_sk#9 >= 2435443) AND (d_date_sk#9 <= 2435473))) OR ((d_date_sk#9 >= 2435809) AND (d_date_sk#9 <= 2435839)))) OR (((((d_date_sk#9 >= 2436174) AND (d_date_sk#9 <= 2436204)) OR ((d_date_sk#9 >= 2436539) AND (d_date_sk#9 <= 2436569))) OR ((d_date_sk#9 >= 2436904) AND (d_date_sk#9 <= 2436934))) OR ((((d_date_sk#9 >= 2437270) AND (d_date_sk#9 <= 2437300)) OR ((d_date_sk#9 >= 2437635) AND (d_date_sk#9 <= 2437665))) OR ((d_date_sk#9 >= 2438000) AND (d_date_sk#9 <= 2438030))))) OR ((((((d_date_sk#9 >= 2438365) AND (d_date_sk#9 <= 2438395)) OR ((d_date_sk#9 >= 2438731) AND (d_date_sk#9 <= 2438761))) OR ((d_date_sk#9 >= 2439096) AND (d_date_sk#9 <= 2439126))) OR ((((d_date_sk#9 >= 2439461) AND (d_date_sk#9 <= 2439491)) OR ((d_date_sk#9 >= 2439826) AND (d_date_sk#9 <= 2439856))) OR ((d_date_sk#9 >= 2440192) AND (d_date_sk#9 <= 2440222)))) OR (((((d_date_sk#9 >= 2440557) AND (d_date_sk#9 <= 2440587)) OR ((d_date_sk#9 >= 2440922) AND (d_date_sk#9 <= 2440952))) OR ((d_date_sk#9 >= 2441287) AND (d_date_sk#9 <= 2441317))) OR ((((d_date_sk#9 >= 2441653) AND (d_date_sk#9 <= 2441683)) OR ((d_date_sk#9 >= 2442018) AND (d_date_sk#9 <= 2442048))) OR ((d_date_sk#9 >= 2442383) AND (d_date_sk#9 <= 2442413)))))) OR (((((((d_date_sk#9 >= 2442748) AND (d_date_sk#9 <= 2442778)) OR ((d_date_sk#9 >= 2443114) AND (d_date_sk#9 <= 2443144))) OR (((d_date_sk#9 >= 2443479) AND (d_date_sk#9 <= 2443509)) OR ((d_date_sk#9 >= 2443844) AND (d_date_sk#9 <= 2443874)))) OR ((((d_date_sk#9 >= 2444209) AND (d_date_sk#9 <= 2444239)) OR ((d_date_sk#9 >= 2444575) AND (d_date_sk#9 <= 2444605))) OR ((d_date_sk#9 >= 2444940) AND (d_date_sk#9 <= 2444970)))) OR (((((d_date_sk#9 >= 2445305) AND (d_date_sk#9 <= 2445335)) OR ((d_date_sk#9 >= 2445670) AND (d_date_sk#9 <= 2445700))) OR ((d_date_sk#9 >= 2446036) AND (d_date_sk#9 <= 2446066))) OR ((((d_date_sk#9 >= 2446401) AND (d_date_sk#9 <= 2446431)) OR ((d_date_sk#9 >= 2446766) AND (d_date_sk#9 <= 2446796))) OR ((d_date_sk#9 >= 2447131) AND (d_date_sk#9 <= 2447161))))) OR ((((((d_date_sk#9 >= 2447497) AND (d_date_sk#9 <= 2447527)) OR ((d_date_sk#9 >= 2447862) AND (d_date_sk#9 <= 2447892))) OR ((d_date_sk#9 >= 2448227) AND (d_date_sk#9 <= 2448257))) OR ((((d_date_sk#9 >= 2448592) AND (d_date_sk#9 <= 2448622)) OR ((d_date_sk#9 >= 2448958) AND (d_date_sk#9 <= 2448988))) OR ((d_date_sk#9 >= 2449323) AND (d_date_sk#9 <= 2449353)))) OR (((((d_date_sk#9 >= 2449688) AND (d_date_sk#9 <= 2449718)) OR ((d_date_sk#9 >= 2450053) AND (d_date_sk#9 <= 2450083))) OR ((d_date_sk#9 >= 2450419) AND (d_date_sk#9 <= 2450449))) OR ((((d_date_sk#9 >= 2450784) AND (d_date_sk#9 <= 2450814)) OR ((d_date_sk#9 >= 2451149) AND (d_date_sk#9 <= 2451179))) OR ((d_date_sk#9 >= 2451514) AND (d_date_sk#9 <= 2451544)))))))) OR (((((((((d_date_sk#9 >= 2451880) AND (d_date_sk#9 <= 2451910)) OR ((d_date_sk#9 >= 2452245) AND (d_date_sk#9 <= 2452275))) OR (((d_date_sk#9 >= 2452610) AND (d_date_sk#9 <= 2452640)) OR ((d_date_sk#9 >= 2452975) AND (d_date_sk#9 <= 2453005)))) OR ((((d_date_sk#9 >= 2453341) AND (d_date_sk#9 <= 2453371)) OR ((d_date_sk#9 >= 2453706) AND (d_date_sk#9 <= 2453736))) OR ((d_date_sk#9 >= 2454071) AND (d_date_sk#9 <= 2454101)))) OR (((((d_date_sk#9 >= 2454436) AND (d_date_sk#9 <= 2454466)) OR ((d_date_sk#9 >= 2454802) AND (d_date_sk#9 <= 2454832))) OR ((d_date_sk#9 >= 2455167) AND (d_date_sk#9 <= 2455197))) OR ((((d_date_sk#9 >= 2455532) AND (d_date_sk#9 <= 2455562)) OR ((d_date_sk#9 >= 2455897) AND (d_date_sk#9 <= 2455927))) OR ((d_date_sk#9 >= 2456263) AND (d_date_sk#9 <= 2456293))))) OR ((((((d_date_sk#9 >= 2456628) AND (d_date_sk#9 <= 2456658)) OR ((d_date_sk#9 >= 2456993) AND (d_date_sk#9 <= 2457023))) OR ((d_date_sk#9 >= 2457358) AND (d_date_sk#9 <= 2457388))) OR ((((d_date_sk#9 >= 2457724) AND (d_date_sk#9 <= 2457754)) OR ((d_date_sk#9 >= 2458089) AND (d_date_sk#9 <= 2458119))) OR ((d_date_sk#9 >= 2458454) AND (d_date_sk#9 <= 2458484)))) OR (((((d_date_sk#9 >= 2458819) AND (d_date_sk#9 <= 2458849)) OR ((d_date_sk#9 >= 2459185) AND (d_date_sk#9 <= 2459215))) OR ((d_date_sk#9 >= 2459550) AND (d_date_sk#9 <= 2459580))) OR ((((d_date_sk#9 >= 2459915) AND (d_date_sk#9 <= 2459945)) OR ((d_date_sk#9 >= 2460280) AND (d_date_sk#9 <= 2460310))) OR ((d_date_sk#9 >= 2460646) AND (d_date_sk#9 <= 2460676)))))) OR (((((((d_date_sk#9 >= 2461011) AND (d_date_sk#9 <= 2461041)) OR ((d_date_sk#9 >= 2461376) AND (d_date_sk#9 <= 2461406))) OR (((d_date_sk#9 >= 2461741) AND (d_date_sk#9 <= 2461771)) OR ((d_date_sk#9 >= 2462107) AND (d_date_sk#9 <= 2462137)))) OR ((((d_date_sk#9 >= 2462472) AND (d_date_sk#9 <= 2462502)) OR ((d_date_sk#9 >= 2462837) AND (d_date_sk#9 <= 2462867))) OR ((d_date_sk#9 >= 2463202) AND (d_date_sk#9 <= 2463232)))) OR (((((d_date_sk#9 >= 2463568) AND (d_date_sk#9 <= 2463598)) OR ((d_date_sk#9 >= 2463933) AND (d_date_sk#9 <= 2463963))) OR ((d_date_sk#9 >= 2464298) AND (d_date_sk#9 <= 2464328))) OR ((((d_date_sk#9 >= 2464663) AND (d_date_sk#9 <= 2464693)) OR ((d_date_sk#9 >= 2465029) AND (d_date_sk#9 <= 2465059))) OR ((d_date_sk#9 >= 2465394) AND (d_date_sk#9 <= 2465424))))) OR ((((((d_date_sk#9 >= 2465759) AND (d_date_sk#9 <= 2465789)) OR ((d_date_sk#9 >= 2466124) AND (d_date_sk#9 <= 2466154))) OR ((d_date_sk#9 >= 2466490) AND (d_date_sk#9 <= 2466520))) OR ((((d_date_sk#9 >= 2466855) AND (d_date_sk#9 <= 2466885)) OR ((d_date_sk#9 >= 2467220) AND (d_date_sk#9 <= 2467250))) OR ((d_date_sk#9 >= 2467585) AND (d_date_sk#9 <= 2467615)))) OR (((((d_date_sk#9 >= 2467951) AND (d_date_sk#9 <= 2467981)) OR ((d_date_sk#9 >= 2468316) AND (d_date_sk#9 <= 2468346))) OR ((d_date_sk#9 >= 2468681) AND (d_date_sk#9 <= 2468711))) OR ((((d_date_sk#9 >= 2469046) AND (d_date_sk#9 <= 2469076)) OR ((d_date_sk#9 >= 2469412) AND (d_date_sk#9 <= 2469442))) OR ((d_date_sk#9 >= 2469777) AND (d_date_sk#9 <= 2469807))))))) OR ((((((((d_date_sk#9 >= 2470142) AND (d_date_sk#9 <= 2470172)) OR ((d_date_sk#9 >= 2470507) AND (d_date_sk#9 <= 2470537))) OR (((d_date_sk#9 >= 2470873) AND (d_date_sk#9 <= 2470903)) OR ((d_date_sk#9 >= 2471238) AND (d_date_sk#9 <= 2471268)))) OR ((((d_date_sk#9 >= 2471603) AND (d_date_sk#9 <= 2471633)) OR ((d_date_sk#9 >= 2471968) AND (d_date_sk#9 <= 2471998))) OR ((d_date_sk#9 >= 2472334) AND (d_date_sk#9 <= 2472364)))) OR (((((d_date_sk#9 >= 2472699) AND (d_date_sk#9 <= 2472729)) OR ((d_date_sk#9 >= 2473064) AND (d_date_sk#9 <= 2473094))) OR ((d_date_sk#9 >= 2473429) AND (d_date_sk#9 <= 2473459))) OR ((((d_date_sk#9 >= 2473795) AND (d_date_sk#9 <= 2473825)) OR ((d_date_sk#9 >= 2474160) AND (d_date_sk#9 <= 2474190))) OR ((d_date_sk#9 >= 2474525) AND (d_date_sk#9 <= 2474555))))) OR ((((((d_date_sk#9 >= 2474890) AND (d_date_sk#9 <= 2474920)) OR ((d_date_sk#9 >= 2475256) AND (d_date_sk#9 <= 2475286))) OR ((d_date_sk#9 >= 2475621) AND (d_date_sk#9 <= 2475651))) OR ((((d_date_sk#9 >= 2475986) AND (d_date_sk#9 <= 2476016)) OR ((d_date_sk#9 >= 2476351) AND (d_date_sk#9 <= 2476381))) OR ((d_date_sk#9 >= 2476717) AND (d_date_sk#9 <= 2476747)))) OR (((((d_date_sk#9 >= 2477082) AND (d_date_sk#9 <= 2477112)) OR ((d_date_sk#9 >= 2477447) AND (d_date_sk#9 <= 2477477))) OR ((d_date_sk#9 >= 2477812) AND (d_date_sk#9 <= 2477842))) OR ((((d_date_sk#9 >= 2478178) AND (d_date_sk#9 <= 2478208)) OR ((d_date_sk#9 >= 2478543) AND (d_date_sk#9 <= 2478573))) OR ((d_date_sk#9 >= 2478908) AND (d_date_sk#9 <= 2478938)))))) OR (((((((d_date_sk#9 >= 2479273) AND (d_date_sk#9 <= 2479303)) OR ((d_date_sk#9 >= 2479639) AND (d_date_sk#9 <= 2479669))) OR (((d_date_sk#9 >= 2480004) AND (d_date_sk#9 <= 2480034)) OR ((d_date_sk#9 >= 2480369) AND (d_date_sk#9 <= 2480399)))) OR ((((d_date_sk#9 >= 2480734) AND (d_date_sk#9 <= 2480764)) OR ((d_date_sk#9 >= 2481100) AND (d_date_sk#9 <= 2481130))) OR ((d_date_sk#9 >= 2481465) AND (d_date_sk#9 <= 2481495)))) OR (((((d_date_sk#9 >= 2481830) AND (d_date_sk#9 <= 2481860)) OR ((d_date_sk#9 >= 2482195) AND (d_date_sk#9 <= 2482225))) OR ((d_date_sk#9 >= 2482561) AND (d_date_sk#9 <= 2482591))) OR ((((d_date_sk#9 >= 2482926) AND (d_date_sk#9 <= 2482956)) OR ((d_date_sk#9 >= 2483291) AND (d_date_sk#9 <= 2483321))) OR ((d_date_sk#9 >= 2483656) AND (d_date_sk#9 <= 2483686))))) OR ((((((d_date_sk#9 >= 2484022) AND (d_date_sk#9 <= 2484052)) OR ((d_date_sk#9 >= 2484387) AND (d_date_sk#9 <= 2484417))) OR ((d_date_sk#9 >= 2484752) AND (d_date_sk#9 <= 2484782))) OR ((((d_date_sk#9 >= 2485117) AND (d_date_sk#9 <= 2485147)) OR ((d_date_sk#9 >= 2485483) AND (d_date_sk#9 <= 2485513))) OR ((d_date_sk#9 >= 2485848) AND (d_date_sk#9 <= 2485878)))) OR (((((d_date_sk#9 >= 2486213) AND (d_date_sk#9 <= 2486243)) OR ((d_date_sk#9 >= 2486578) AND (d_date_sk#9 <= 2486608))) OR ((d_date_sk#9 >= 2486944) AND (d_date_sk#9 <= 2486974))) OR ((((d_date_sk#9 >= 2487309) AND (d_date_sk#9 <= 2487339)) OR ((d_date_sk#9 >= 2487674) AND (d_date_sk#9 <= 2487704))) OR ((d_date_sk#9 >= 2488039) AND (d_date_sk#9 <= 2488069)))))))))) AND isnotnull(d_date_sk#9)) (21) Project [codegen id : 1] -Output [2]: [d_date_sk#10, d_year#11] -Input [3]: [d_date_sk#10, d_year#11, d_moy#19] +Output [2]: [d_date_sk#9, d_year#10] +Input [3]: [d_date_sk#9, d_year#10, d_moy#17] (22) BroadcastExchange -Input [2]: [d_date_sk#10, d_year#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] +Input [2]: [d_date_sk#9, d_year#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/explain.txt index a60caf19f23df..c6c1e51e9360e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/explain.txt @@ -57,7 +57,7 @@ Condition : isnotnull(ss_item_sk#4) (8) BroadcastExchange Input [3]: [ss_item_sk#4, ss_net_profit#5, ss_sold_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [d_date_sk#1] @@ -69,55 +69,55 @@ Output [3]: [d_year#2, ss_item_sk#4, ss_net_profit#5] Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_net_profit#5, ss_sold_date_sk#6] (11) Scan parquet default.item -Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,436), IsNotNull(i_item_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] (13) Filter [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] -Condition : ((isnotnull(i_manufact_id#11) AND (i_manufact_id#11 = 436)) AND isnotnull(i_item_sk#8)) +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] +Condition : ((isnotnull(i_manufact_id#10) AND (i_manufact_id#10 = 436)) AND isnotnull(i_item_sk#7)) (14) Project [codegen id : 2] -Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] +Output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] (15) BroadcastExchange -Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#4] -Right keys [1]: [i_item_sk#8] +Right keys [1]: [i_item_sk#7] Join condition: None (17) Project [codegen id : 3] -Output [4]: [d_year#2, ss_net_profit#5, i_brand_id#9, i_brand#10] -Input [6]: [d_year#2, ss_item_sk#4, ss_net_profit#5, i_item_sk#8, i_brand_id#9, i_brand#10] +Output [4]: [d_year#2, ss_net_profit#5, i_brand_id#8, i_brand#9] +Input [6]: [d_year#2, ss_item_sk#4, ss_net_profit#5, i_item_sk#7, i_brand_id#8, i_brand#9] (18) HashAggregate [codegen id : 3] -Input [4]: [d_year#2, ss_net_profit#5, i_brand_id#9, i_brand#10] -Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Input [4]: [d_year#2, ss_net_profit#5, i_brand_id#8, i_brand#9] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#5))] -Aggregate Attributes [1]: [sum#13] -Results [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] (19) Exchange -Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] -Arguments: hashpartitioning(d_year#2, i_brand#10, i_brand_id#9, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Arguments: hashpartitioning(d_year#2, i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 4] -Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] -Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] Functions [1]: [sum(UnscaledValue(ss_net_profit#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#5))#16] -Results [4]: [d_year#2, i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#16,17,2) AS sum_agg#19] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#5))#13] +Results [4]: [d_year#2, i_brand_id#8 AS brand_id#14, i_brand#9 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#13,17,2) AS sum_agg#16] (21) TakeOrderedAndProject -Input [4]: [d_year#2, brand_id#17, brand#18, sum_agg#19] -Arguments: 100, [d_year#2 ASC NULLS FIRST, sum_agg#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#2, brand_id#17, brand#18, sum_agg#19] +Input [4]: [d_year#2, brand_id#14, brand#15, sum_agg#16] +Arguments: 100, [d_year#2 ASC NULLS FIRST, sum_agg#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [d_year#2, brand_id#14, brand#15, sum_agg#16] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt index bdc8690e8d3ba..bdd5dda489c61 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt @@ -83,7 +83,7 @@ Input [2]: [s_store_sk#8, s_county#9] (11) BroadcastExchange Input [1]: [s_store_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#3] @@ -95,104 +95,104 @@ Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8] (14) Scan parquet default.household_demographics -Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,Unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] (16) Filter [codegen id : 3] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] -Condition : (((((isnotnull(hd_vehicle_count#14) AND isnotnull(hd_dep_count#13)) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = Unknown ))) AND (hd_vehicle_count#14 > 0)) AND ((cast(hd_dep_count#13 as double) / cast(hd_vehicle_count#14 as double)) > 1.2)) AND isnotnull(hd_demo_sk#11)) +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] +Condition : (((((isnotnull(hd_vehicle_count#13) AND isnotnull(hd_dep_count#12)) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = Unknown ))) AND (hd_vehicle_count#13 > 0)) AND ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.2)) AND isnotnull(hd_demo_sk#10)) (17) Project [codegen id : 3] -Output [1]: [hd_demo_sk#11] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Output [1]: [hd_demo_sk#10] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] (18) BroadcastExchange -Input [1]: [hd_demo_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Input [1]: [hd_demo_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#11] +Right keys [1]: [hd_demo_sk#10] Join condition: None (20) Project [codegen id : 4] Output [2]: [ss_customer_sk#1, ss_ticket_number#4] -Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#10] (21) HashAggregate [codegen id : 4] Input [2]: [ss_customer_sk#1, ss_ticket_number#4] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#16] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] +Aggregate Attributes [1]: [count#14] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] (22) Exchange -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] -Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] (23) HashAggregate [codegen id : 5] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#19] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#19 AS cnt#20] +Aggregate Attributes [1]: [count(1)#16] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#16 AS cnt#17] (24) Filter [codegen id : 5] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20] -Condition : ((cnt#20 >= 15) AND (cnt#20 <= 20)) +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17] +Condition : ((cnt#17 >= 15) AND (cnt#17 <= 20)) (25) Exchange -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20] -Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17] +Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4] (26) Sort [codegen id : 6] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17] Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0 (27) Scan parquet default.customer -Output [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] +Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 7] -Input [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] (29) Filter [codegen id : 7] -Input [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] -Condition : isnotnull(c_customer_sk#22) +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Condition : isnotnull(c_customer_sk#18) (30) Exchange -Input [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] -Arguments: hashpartitioning(c_customer_sk#22, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Arguments: hashpartitioning(c_customer_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=5] (31) Sort [codegen id : 8] -Input [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] -Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0 (32) SortMergeJoin [codegen id : 9] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#22] +Right keys [1]: [c_customer_sk#18] Join condition: None (33) Project [codegen id : 9] -Output [6]: [c_last_name#25, c_first_name#24, c_salutation#23, c_preferred_cust_flag#26, ss_ticket_number#4, cnt#20] -Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20, c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] +Output [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17, c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] (34) Exchange -Input [6]: [c_last_name#25, c_first_name#24, c_salutation#23, c_preferred_cust_flag#26, ss_ticket_number#4, cnt#20] -Arguments: rangepartitioning(c_last_name#25 ASC NULLS FIRST, c_first_name#24 ASC NULLS FIRST, c_salutation#23 ASC NULLS FIRST, c_preferred_cust_flag#26 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: rangepartitioning(c_last_name#21 ASC NULLS FIRST, c_first_name#20 ASC NULLS FIRST, c_salutation#19 ASC NULLS FIRST, c_preferred_cust_flag#22 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=6] (35) Sort [codegen id : 10] -Input [6]: [c_last_name#25, c_first_name#24, c_salutation#23, c_preferred_cust_flag#26, ss_ticket_number#4, cnt#20] -Arguments: [c_last_name#25 ASC NULLS FIRST, c_first_name#24 ASC NULLS FIRST, c_salutation#23 ASC NULLS FIRST, c_preferred_cust_flag#26 DESC NULLS LAST], true, 0 +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: [c_last_name#21 ASC NULLS FIRST, c_first_name#20 ASC NULLS FIRST, c_salutation#19 ASC NULLS FIRST, c_preferred_cust_flag#22 DESC NULLS LAST], true, 0 ===== Subqueries ===== @@ -205,25 +205,25 @@ BroadcastExchange (40) (36) Scan parquet default.date_dim -Output [3]: [d_date_sk#7, d_year#29, d_dom#30] +Output [3]: [d_date_sk#7, d_year#23, d_dom#24] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1998,1999,2000]), GreaterThanOrEqual(d_date_sk,2450816), LessThanOrEqual(d_date_sk,2451910), IsNotNull(d_date_sk)] ReadSchema: struct (37) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#29, d_dom#30] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] (38) Filter [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#29, d_dom#30] -Condition : (((((((d_dom#30 >= 1) AND (d_dom#30 <= 3)) OR ((d_dom#30 >= 25) AND (d_dom#30 <= 28))) AND d_year#29 IN (1998,1999,2000)) AND (d_date_sk#7 >= 2450816)) AND (d_date_sk#7 <= 2451910)) AND isnotnull(d_date_sk#7)) +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] +Condition : (((((((d_dom#24 >= 1) AND (d_dom#24 <= 3)) OR ((d_dom#24 >= 25) AND (d_dom#24 <= 28))) AND d_year#23 IN (1998,1999,2000)) AND (d_date_sk#7 >= 2450816)) AND (d_date_sk#7 <= 2451910)) AND isnotnull(d_date_sk#7)) (39) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [3]: [d_date_sk#7, d_year#29, d_dom#30] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] (40) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#31] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt index b2b1d9c78229f..dddd98f235cd1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt @@ -80,7 +80,7 @@ Input [2]: [s_store_sk#8, s_county#9] (11) BroadcastExchange Input [1]: [s_store_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#3] @@ -92,92 +92,92 @@ Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8] (14) Scan parquet default.household_demographics -Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,Unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] (16) Filter [codegen id : 3] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] -Condition : (((((isnotnull(hd_vehicle_count#14) AND isnotnull(hd_dep_count#13)) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = Unknown ))) AND (hd_vehicle_count#14 > 0)) AND ((cast(hd_dep_count#13 as double) / cast(hd_vehicle_count#14 as double)) > 1.2)) AND isnotnull(hd_demo_sk#11)) +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] +Condition : (((((isnotnull(hd_vehicle_count#13) AND isnotnull(hd_dep_count#12)) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = Unknown ))) AND (hd_vehicle_count#13 > 0)) AND ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.2)) AND isnotnull(hd_demo_sk#10)) (17) Project [codegen id : 3] -Output [1]: [hd_demo_sk#11] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Output [1]: [hd_demo_sk#10] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] (18) BroadcastExchange -Input [1]: [hd_demo_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Input [1]: [hd_demo_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#11] +Right keys [1]: [hd_demo_sk#10] Join condition: None (20) Project [codegen id : 4] Output [2]: [ss_customer_sk#1, ss_ticket_number#4] -Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#10] (21) HashAggregate [codegen id : 4] Input [2]: [ss_customer_sk#1, ss_ticket_number#4] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#16] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] +Aggregate Attributes [1]: [count#14] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] (22) Exchange -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] -Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] (23) HashAggregate [codegen id : 6] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#19] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#19 AS cnt#20] +Aggregate Attributes [1]: [count(1)#16] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#16 AS cnt#17] (24) Filter [codegen id : 6] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20] -Condition : ((cnt#20 >= 15) AND (cnt#20 <= 20)) +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17] +Condition : ((cnt#17 >= 15) AND (cnt#17 <= 20)) (25) Scan parquet default.customer -Output [5]: [c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] +Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 5] -Input [5]: [c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] (27) Filter [codegen id : 5] -Input [5]: [c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] -Condition : isnotnull(c_customer_sk#21) +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Condition : isnotnull(c_customer_sk#18) (28) BroadcastExchange -Input [5]: [c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (29) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#21] +Right keys [1]: [c_customer_sk#18] Join condition: None (30) Project [codegen id : 6] -Output [6]: [c_last_name#24, c_first_name#23, c_salutation#22, c_preferred_cust_flag#25, ss_ticket_number#4, cnt#20] -Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20, c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] +Output [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17, c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] (31) Exchange -Input [6]: [c_last_name#24, c_first_name#23, c_salutation#22, c_preferred_cust_flag#25, ss_ticket_number#4, cnt#20] -Arguments: rangepartitioning(c_last_name#24 ASC NULLS FIRST, c_first_name#23 ASC NULLS FIRST, c_salutation#22 ASC NULLS FIRST, c_preferred_cust_flag#25 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: rangepartitioning(c_last_name#21 ASC NULLS FIRST, c_first_name#20 ASC NULLS FIRST, c_salutation#19 ASC NULLS FIRST, c_preferred_cust_flag#22 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=5] (32) Sort [codegen id : 7] -Input [6]: [c_last_name#24, c_first_name#23, c_salutation#22, c_preferred_cust_flag#25, ss_ticket_number#4, cnt#20] -Arguments: [c_last_name#24 ASC NULLS FIRST, c_first_name#23 ASC NULLS FIRST, c_salutation#22 ASC NULLS FIRST, c_preferred_cust_flag#25 DESC NULLS LAST], true, 0 +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: [c_last_name#21 ASC NULLS FIRST, c_first_name#20 ASC NULLS FIRST, c_salutation#19 ASC NULLS FIRST, c_preferred_cust_flag#22 DESC NULLS LAST], true, 0 ===== Subqueries ===== @@ -190,25 +190,25 @@ BroadcastExchange (37) (33) Scan parquet default.date_dim -Output [3]: [d_date_sk#7, d_year#28, d_dom#29] +Output [3]: [d_date_sk#7, d_year#23, d_dom#24] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1998,1999,2000]), GreaterThanOrEqual(d_date_sk,2450816), LessThanOrEqual(d_date_sk,2451910), IsNotNull(d_date_sk)] ReadSchema: struct (34) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#28, d_dom#29] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] (35) Filter [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#28, d_dom#29] -Condition : (((((((d_dom#29 >= 1) AND (d_dom#29 <= 3)) OR ((d_dom#29 >= 25) AND (d_dom#29 <= 28))) AND d_year#28 IN (1998,1999,2000)) AND (d_date_sk#7 >= 2450816)) AND (d_date_sk#7 <= 2451910)) AND isnotnull(d_date_sk#7)) +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] +Condition : (((((((d_dom#24 >= 1) AND (d_dom#24 <= 3)) OR ((d_dom#24 >= 25) AND (d_dom#24 <= 28))) AND d_year#23 IN (1998,1999,2000)) AND (d_date_sk#7 >= 2450816)) AND (d_date_sk#7 <= 2451910)) AND isnotnull(d_date_sk#7)) (36) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [3]: [d_date_sk#7, d_year#28, d_dom#29] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] (37) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/explain.txt index 5e83e995a3766..a7ad5b0917504 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/explain.txt @@ -42,88 +42,88 @@ Input [3]: [d_date_sk#1, d_year#2, d_moy#3] (5) BroadcastExchange Input [2]: [d_date_sk#1, d_year#2] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (6) Scan parquet default.store_sales -Output [3]: [ss_item_sk#5, ss_ext_sales_price#6, ss_sold_date_sk#7] +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#7), (ss_sold_date_sk#7 >= 2451149), (ss_sold_date_sk#7 <= 2451179), dynamicpruningexpression(ss_sold_date_sk#7 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(ss_sold_date_sk#6), (ss_sold_date_sk#6 >= 2451149), (ss_sold_date_sk#6 <= 2451179), dynamicpruningexpression(ss_sold_date_sk#6 IN dynamicpruning#7)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (7) ColumnarToRow -Input [3]: [ss_item_sk#5, ss_ext_sales_price#6, ss_sold_date_sk#7] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] (8) Filter -Input [3]: [ss_item_sk#5, ss_ext_sales_price#6, ss_sold_date_sk#7] -Condition : isnotnull(ss_item_sk#5) +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [d_date_sk#1] -Right keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [ss_sold_date_sk#6] Join condition: None (10) Project [codegen id : 3] -Output [3]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6] -Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#5, ss_ext_sales_price#6, ss_sold_date_sk#7] +Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] +Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] (11) Scan parquet default.item -Output [4]: [i_item_sk#9, i_category_id#10, i_category#11, i_manager_id#12] +Output [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [i_item_sk#9, i_category_id#10, i_category#11, i_manager_id#12] +Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] (13) Filter [codegen id : 2] -Input [4]: [i_item_sk#9, i_category_id#10, i_category#11, i_manager_id#12] -Condition : ((isnotnull(i_manager_id#12) AND (i_manager_id#12 = 1)) AND isnotnull(i_item_sk#9)) +Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8)) (14) Project [codegen id : 2] -Output [3]: [i_item_sk#9, i_category_id#10, i_category#11] -Input [4]: [i_item_sk#9, i_category_id#10, i_category#11, i_manager_id#12] +Output [3]: [i_item_sk#8, i_category_id#9, i_category#10] +Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] (15) BroadcastExchange -Input [3]: [i_item_sk#9, i_category_id#10, i_category#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] +Input [3]: [i_item_sk#8, i_category_id#9, i_category#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_item_sk#5] -Right keys [1]: [i_item_sk#9] +Left keys [1]: [ss_item_sk#4] +Right keys [1]: [i_item_sk#8] Join condition: None (17) Project [codegen id : 3] -Output [4]: [d_year#2, ss_ext_sales_price#6, i_category_id#10, i_category#11] -Input [6]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#9, i_category_id#10, i_category#11] +Output [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#9, i_category#10] +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#8, i_category_id#9, i_category#10] (18) HashAggregate [codegen id : 3] -Input [4]: [d_year#2, ss_ext_sales_price#6, i_category_id#10, i_category#11] -Keys [3]: [d_year#2, i_category_id#10, i_category#11] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum#14] -Results [4]: [d_year#2, i_category_id#10, i_category#11, sum#15] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#9, i_category#10] +Keys [3]: [d_year#2, i_category_id#9, i_category#10] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#12] +Results [4]: [d_year#2, i_category_id#9, i_category#10, sum#13] (19) Exchange -Input [4]: [d_year#2, i_category_id#10, i_category#11, sum#15] -Arguments: hashpartitioning(d_year#2, i_category_id#10, i_category#11, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [4]: [d_year#2, i_category_id#9, i_category#10, sum#13] +Arguments: hashpartitioning(d_year#2, i_category_id#9, i_category#10, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 4] -Input [4]: [d_year#2, i_category_id#10, i_category#11, sum#15] -Keys [3]: [d_year#2, i_category_id#10, i_category#11] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#17] -Results [4]: [d_year#2, i_category_id#10, i_category#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#17,17,2) AS sum(ss_ext_sales_price)#18] +Input [4]: [d_year#2, i_category_id#9, i_category#10, sum#13] +Keys [3]: [d_year#2, i_category_id#9, i_category#10] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#14] +Results [4]: [d_year#2, i_category_id#9, i_category#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#14,17,2) AS sum(ss_ext_sales_price)#15] (21) TakeOrderedAndProject -Input [4]: [d_year#2, i_category_id#10, i_category#11, sum(ss_ext_sales_price)#18] -Arguments: 100, [sum(ss_ext_sales_price)#18 DESC NULLS LAST, d_year#2 ASC NULLS FIRST, i_category_id#10 ASC NULLS FIRST, i_category#11 ASC NULLS FIRST], [d_year#2, i_category_id#10, i_category#11, sum(ss_ext_sales_price)#18] +Input [4]: [d_year#2, i_category_id#9, i_category#10, sum(ss_ext_sales_price)#15] +Arguments: 100, [sum(ss_ext_sales_price)#15 DESC NULLS LAST, d_year#2 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_category#10 ASC NULLS FIRST], [d_year#2, i_category_id#9, i_category#10, sum(ss_ext_sales_price)#15] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8 +Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7 ReusedExchange (22) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/explain.txt index 255f39414f908..a77e0cac14259 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/explain.txt @@ -57,7 +57,7 @@ Condition : isnotnull(ss_item_sk#4) (8) BroadcastExchange Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [d_date_sk#1] @@ -69,55 +69,55 @@ Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] (11) Scan parquet default.item -Output [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] +Output [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] (13) Filter [codegen id : 2] -Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] -Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8)) +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 1)) AND isnotnull(i_item_sk#7)) (14) Project [codegen id : 2] -Output [3]: [i_item_sk#8, i_category_id#9, i_category#10] -Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] +Output [3]: [i_item_sk#7, i_category_id#8, i_category#9] +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] (15) BroadcastExchange -Input [3]: [i_item_sk#8, i_category_id#9, i_category#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +Input [3]: [i_item_sk#7, i_category_id#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#4] -Right keys [1]: [i_item_sk#8] +Right keys [1]: [i_item_sk#7] Join condition: None (17) Project [codegen id : 3] -Output [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#9, i_category#10] -Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#8, i_category_id#9, i_category#10] +Output [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9] +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_category_id#8, i_category#9] (18) HashAggregate [codegen id : 3] -Input [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#9, i_category#10] -Keys [3]: [d_year#2, i_category_id#9, i_category#10] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9] +Keys [3]: [d_year#2, i_category_id#8, i_category#9] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum#13] -Results [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#2, i_category_id#8, i_category#9, sum#12] (19) Exchange -Input [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] -Arguments: hashpartitioning(d_year#2, i_category_id#9, i_category#10, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum#12] +Arguments: hashpartitioning(d_year#2, i_category_id#8, i_category#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 4] -Input [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] -Keys [3]: [d_year#2, i_category_id#9, i_category#10] +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum#12] +Keys [3]: [d_year#2, i_category_id#8, i_category#9] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#16] -Results [4]: [d_year#2, i_category_id#9, i_category#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#16,17,2) AS sum(ss_ext_sales_price)#17] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [4]: [d_year#2, i_category_id#8, i_category#9, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS sum(ss_ext_sales_price)#14] (21) TakeOrderedAndProject -Input [4]: [d_year#2, i_category_id#9, i_category#10, sum(ss_ext_sales_price)#17] -Arguments: 100, [sum(ss_ext_sales_price)#17 DESC NULLS LAST, d_year#2 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_category#10 ASC NULLS FIRST], [d_year#2, i_category_id#9, i_category#10, sum(ss_ext_sales_price)#17] +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#14] +Arguments: 100, [sum(ss_ext_sales_price)#14 DESC NULLS LAST, d_year#2 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST, i_category#9 ASC NULLS FIRST], [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#14] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/explain.txt index 72454cf27c3ee..bd95073599fb2 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/explain.txt @@ -42,88 +42,88 @@ Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] (5) BroadcastExchange Input [2]: [d_date_sk#1, d_day_name#3] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (6) Scan parquet default.store_sales -Output [3]: [ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Output [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#7), (ss_sold_date_sk#7 >= 2450816), (ss_sold_date_sk#7 <= 2451179), dynamicpruningexpression(ss_sold_date_sk#7 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(ss_sold_date_sk#6), (ss_sold_date_sk#6 >= 2450816), (ss_sold_date_sk#6 <= 2451179), dynamicpruningexpression(ss_sold_date_sk#6 IN dynamicpruning#7)] PushedFilters: [IsNotNull(ss_store_sk)] ReadSchema: struct (7) ColumnarToRow -Input [3]: [ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] (8) Filter -Input [3]: [ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] -Condition : isnotnull(ss_store_sk#5) +Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_store_sk#4) (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [d_date_sk#1] -Right keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [ss_sold_date_sk#6] Join condition: None (10) Project [codegen id : 3] -Output [3]: [d_day_name#3, ss_store_sk#5, ss_sales_price#6] -Input [5]: [d_date_sk#1, d_day_name#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Output [3]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5] +Input [5]: [d_date_sk#1, d_day_name#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] (11) Scan parquet default.store -Output [4]: [s_store_sk#9, s_store_id#10, s_store_name#11, s_gmt_offset#12] +Output [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [s_store_sk#9, s_store_id#10, s_store_name#11, s_gmt_offset#12] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] (13) Filter [codegen id : 2] -Input [4]: [s_store_sk#9, s_store_id#10, s_store_name#11, s_gmt_offset#12] -Condition : ((isnotnull(s_gmt_offset#12) AND (s_gmt_offset#12 = -5.00)) AND isnotnull(s_store_sk#9)) +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Condition : ((isnotnull(s_gmt_offset#11) AND (s_gmt_offset#11 = -5.00)) AND isnotnull(s_store_sk#8)) (14) Project [codegen id : 2] -Output [3]: [s_store_sk#9, s_store_id#10, s_store_name#11] -Input [4]: [s_store_sk#9, s_store_id#10, s_store_name#11, s_gmt_offset#12] +Output [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] (15) BroadcastExchange -Input [3]: [s_store_sk#9, s_store_id#10, s_store_name#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] +Input [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_store_sk#5] -Right keys [1]: [s_store_sk#9] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#8] Join condition: None (17) Project [codegen id : 3] -Output [4]: [d_day_name#3, ss_sales_price#6, s_store_id#10, s_store_name#11] -Input [6]: [d_day_name#3, ss_store_sk#5, ss_sales_price#6, s_store_sk#9, s_store_id#10, s_store_name#11] +Output [4]: [d_day_name#3, ss_sales_price#5, s_store_id#9, s_store_name#10] +Input [6]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5, s_store_sk#8, s_store_id#9, s_store_name#10] (18) HashAggregate [codegen id : 3] -Input [4]: [d_day_name#3, ss_sales_price#6, s_store_id#10, s_store_name#11] -Keys [2]: [s_store_name#11, s_store_id#10] -Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#6 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#6 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#6 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#6 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#6 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#6 END))] -Aggregate Attributes [7]: [sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] -Results [9]: [s_store_name#11, s_store_id#10, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26, sum#27] +Input [4]: [d_day_name#3, ss_sales_price#5, s_store_id#9, s_store_name#10] +Keys [2]: [s_store_name#10, s_store_id#9] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] +Aggregate Attributes [7]: [sum#12, sum#13, sum#14, sum#15, sum#16, sum#17, sum#18] +Results [9]: [s_store_name#10, s_store_id#9, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] (19) Exchange -Input [9]: [s_store_name#11, s_store_id#10, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26, sum#27] -Arguments: hashpartitioning(s_store_name#11, s_store_id#10, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [9]: [s_store_name#10, s_store_id#9, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] +Arguments: hashpartitioning(s_store_name#10, s_store_id#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 4] -Input [9]: [s_store_name#11, s_store_id#10, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26, sum#27] -Keys [2]: [s_store_name#11, s_store_id#10] -Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#6 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#6 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#6 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#6 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#6 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#6 END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#6 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#6 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#6 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#6 END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#6 END))#34, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#6 END))#35] -Results [9]: [s_store_name#11, s_store_id#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#6 END))#29,17,2) AS sun_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#6 END))#30,17,2) AS mon_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#6 END))#31,17,2) AS tue_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 END))#32,17,2) AS wed_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#6 END))#33,17,2) AS thu_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#6 END))#34,17,2) AS fri_sales#41, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#6 END))#35,17,2) AS sat_sales#42] +Input [9]: [s_store_name#10, s_store_id#9, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] +Keys [2]: [s_store_name#10, s_store_id#9] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#32] +Results [9]: [s_store_name#10, s_store_id#9, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#26,17,2) AS sun_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#27,17,2) AS mon_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#28,17,2) AS tue_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#29,17,2) AS wed_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#30,17,2) AS thu_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#31,17,2) AS fri_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#32,17,2) AS sat_sales#39] (21) TakeOrderedAndProject -Input [9]: [s_store_name#11, s_store_id#10, sun_sales#36, mon_sales#37, tue_sales#38, wed_sales#39, thu_sales#40, fri_sales#41, sat_sales#42] -Arguments: 100, [s_store_name#11 ASC NULLS FIRST, s_store_id#10 ASC NULLS FIRST, sun_sales#36 ASC NULLS FIRST, mon_sales#37 ASC NULLS FIRST, tue_sales#38 ASC NULLS FIRST, wed_sales#39 ASC NULLS FIRST, thu_sales#40 ASC NULLS FIRST, fri_sales#41 ASC NULLS FIRST, sat_sales#42 ASC NULLS FIRST], [s_store_name#11, s_store_id#10, sun_sales#36, mon_sales#37, tue_sales#38, wed_sales#39, thu_sales#40, fri_sales#41, sat_sales#42] +Input [9]: [s_store_name#10, s_store_id#9, sun_sales#33, mon_sales#34, tue_sales#35, wed_sales#36, thu_sales#37, fri_sales#38, sat_sales#39] +Arguments: 100, [s_store_name#10 ASC NULLS FIRST, s_store_id#9 ASC NULLS FIRST, sun_sales#33 ASC NULLS FIRST, mon_sales#34 ASC NULLS FIRST, tue_sales#35 ASC NULLS FIRST, wed_sales#36 ASC NULLS FIRST, thu_sales#37 ASC NULLS FIRST, fri_sales#38 ASC NULLS FIRST, sat_sales#39 ASC NULLS FIRST], [s_store_name#10, s_store_id#9, sun_sales#33, mon_sales#34, tue_sales#35, wed_sales#36, thu_sales#37, fri_sales#38, sat_sales#39] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8 +Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7 ReusedExchange (22) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/explain.txt index 74911c78e91fe..18475905e6d84 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/explain.txt @@ -57,7 +57,7 @@ Condition : isnotnull(ss_store_sk#4) (8) BroadcastExchange Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [d_date_sk#1] @@ -69,55 +69,55 @@ Output [3]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5] Input [5]: [d_date_sk#1, d_day_name#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] (11) Scan parquet default.store -Output [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Output [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] (13) Filter [codegen id : 2] -Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] -Condition : ((isnotnull(s_gmt_offset#11) AND (s_gmt_offset#11 = -5.00)) AND isnotnull(s_store_sk#8)) +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] +Condition : ((isnotnull(s_gmt_offset#10) AND (s_gmt_offset#10 = -5.00)) AND isnotnull(s_store_sk#7)) (14) Project [codegen id : 2] -Output [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] -Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Output [3]: [s_store_sk#7, s_store_id#8, s_store_name#9] +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] (15) BroadcastExchange -Input [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +Input [3]: [s_store_sk#7, s_store_id#8, s_store_name#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_store_sk#4] -Right keys [1]: [s_store_sk#8] +Right keys [1]: [s_store_sk#7] Join condition: None (17) Project [codegen id : 3] -Output [4]: [d_day_name#3, ss_sales_price#5, s_store_id#9, s_store_name#10] -Input [6]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5, s_store_sk#8, s_store_id#9, s_store_name#10] +Output [4]: [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9] +Input [6]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5, s_store_sk#7, s_store_id#8, s_store_name#9] (18) HashAggregate [codegen id : 3] -Input [4]: [d_day_name#3, ss_sales_price#5, s_store_id#9, s_store_name#10] -Keys [2]: [s_store_name#10, s_store_id#9] +Input [4]: [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9] +Keys [2]: [s_store_name#9, s_store_id#8] Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] -Aggregate Attributes [7]: [sum#13, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19] -Results [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Aggregate Attributes [7]: [sum#11, sum#12, sum#13, sum#14, sum#15, sum#16, sum#17] +Results [9]: [s_store_name#9, s_store_id#8, sum#18, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24] (19) Exchange -Input [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] -Arguments: hashpartitioning(s_store_name#10, s_store_id#9, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [9]: [s_store_name#9, s_store_id#8, sum#18, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(s_store_name#9, s_store_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 4] -Input [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] -Keys [2]: [s_store_name#10, s_store_id#9] +Input [9]: [s_store_name#9, s_store_id#8, sum#18, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24] +Keys [2]: [s_store_name#9, s_store_id#8] Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#34] -Results [9]: [s_store_name#10, s_store_id#9, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#34,17,2) AS sat_sales#41] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#31] +Results [9]: [s_store_name#9, s_store_id#8, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#25,17,2) AS sun_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#26,17,2) AS mon_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#27,17,2) AS tue_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#28,17,2) AS wed_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#29,17,2) AS thu_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#30,17,2) AS fri_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#31,17,2) AS sat_sales#38] (21) TakeOrderedAndProject -Input [9]: [s_store_name#10, s_store_id#9, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41] -Arguments: 100, [s_store_name#10 ASC NULLS FIRST, s_store_id#9 ASC NULLS FIRST, sun_sales#35 ASC NULLS FIRST, mon_sales#36 ASC NULLS FIRST, tue_sales#37 ASC NULLS FIRST, wed_sales#38 ASC NULLS FIRST, thu_sales#39 ASC NULLS FIRST, fri_sales#40 ASC NULLS FIRST, sat_sales#41 ASC NULLS FIRST], [s_store_name#10, s_store_id#9, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41] +Input [9]: [s_store_name#9, s_store_id#8, sun_sales#32, mon_sales#33, tue_sales#34, wed_sales#35, thu_sales#36, fri_sales#37, sat_sales#38] +Arguments: 100, [s_store_name#9 ASC NULLS FIRST, s_store_id#8 ASC NULLS FIRST, sun_sales#32 ASC NULLS FIRST, mon_sales#33 ASC NULLS FIRST, tue_sales#34 ASC NULLS FIRST, wed_sales#35 ASC NULLS FIRST, thu_sales#36 ASC NULLS FIRST, fri_sales#37 ASC NULLS FIRST, sat_sales#38 ASC NULLS FIRST], [s_store_name#9, s_store_id#8, sun_sales#32, mon_sales#33, tue_sales#34, wed_sales#35, thu_sales#36, fri_sales#37, sat_sales#38] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/explain.txt index 4ea916a38b26b..48a3708fa4de4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/explain.txt @@ -95,7 +95,7 @@ Input [2]: [s_store_sk#11, s_city#12] (11) BroadcastExchange Input [1]: [s_store_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#4] @@ -107,155 +107,155 @@ Output [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#11] (14) Scan parquet default.household_demographics -Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Output [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [Or(EqualTo(hd_dep_count,5),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Input [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] (16) Filter [codegen id : 3] -Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : (((hd_dep_count#15 = 5) OR (hd_vehicle_count#16 = 3)) AND isnotnull(hd_demo_sk#14)) +Input [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] +Condition : (((hd_dep_count#14 = 5) OR (hd_vehicle_count#15 = 3)) AND isnotnull(hd_demo_sk#13)) (17) Project [codegen id : 3] -Output [1]: [hd_demo_sk#14] -Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Output [1]: [hd_demo_sk#13] +Input [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] (18) BroadcastExchange -Input [1]: [hd_demo_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] +Input [1]: [hd_demo_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#14] +Right keys [1]: [hd_demo_sk#13] Join condition: None (20) Project [codegen id : 4] Output [5]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] -Input [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, hd_demo_sk#14] +Input [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, hd_demo_sk#13] (21) Exchange Input [5]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] -Arguments: hashpartitioning(ss_addr_sk#3, 5), ENSURE_REQUIREMENTS, [id=#18] +Arguments: hashpartitioning(ss_addr_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) Sort [codegen id : 5] Input [5]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] Arguments: [ss_addr_sk#3 ASC NULLS FIRST], false, 0 (23) Scan parquet default.customer_address -Output [2]: [ca_address_sk#19, ca_city#20] +Output [2]: [ca_address_sk#16, ca_city#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] ReadSchema: struct (24) ColumnarToRow [codegen id : 6] -Input [2]: [ca_address_sk#19, ca_city#20] +Input [2]: [ca_address_sk#16, ca_city#17] (25) Filter [codegen id : 6] -Input [2]: [ca_address_sk#19, ca_city#20] -Condition : (isnotnull(ca_address_sk#19) AND isnotnull(ca_city#20)) +Input [2]: [ca_address_sk#16, ca_city#17] +Condition : (isnotnull(ca_address_sk#16) AND isnotnull(ca_city#17)) (26) Exchange -Input [2]: [ca_address_sk#19, ca_city#20] -Arguments: hashpartitioning(ca_address_sk#19, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [2]: [ca_address_sk#16, ca_city#17] +Arguments: hashpartitioning(ca_address_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] (27) Sort [codegen id : 7] -Input [2]: [ca_address_sk#19, ca_city#20] -Arguments: [ca_address_sk#19 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#16, ca_city#17] +Arguments: [ca_address_sk#16 ASC NULLS FIRST], false, 0 (28) SortMergeJoin [codegen id : 8] Left keys [1]: [ss_addr_sk#3] -Right keys [1]: [ca_address_sk#19] +Right keys [1]: [ca_address_sk#16] Join condition: None (29) Project [codegen id : 8] -Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#20] -Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_address_sk#19, ca_city#20] +Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#17] +Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_address_sk#16, ca_city#17] (30) HashAggregate [codegen id : 8] -Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#20] -Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#20] +Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#17] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#17] Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#6)), partial_sum(UnscaledValue(ss_net_profit#7))] -Aggregate Attributes [2]: [sum#22, sum#23] -Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#20, sum#24, sum#25] +Aggregate Attributes [2]: [sum#18, sum#19] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#17, sum#20, sum#21] (31) HashAggregate [codegen id : 8] -Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#20, sum#24, sum#25] -Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#20] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#17, sum#20, sum#21] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#17] Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#26, sum(UnscaledValue(ss_net_profit#7))#27] -Results [5]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#20 AS bought_city#28, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#26,17,2) AS amt#29, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#27,17,2) AS profit#30] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#22, sum(UnscaledValue(ss_net_profit#7))#23] +Results [5]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#17 AS bought_city#24, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#22,17,2) AS amt#25, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#23,17,2) AS profit#26] (32) Exchange -Input [5]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#28, amt#29, profit#30] -Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [5]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#24, amt#25, profit#26] +Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=5] (33) Sort [codegen id : 9] -Input [5]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#28, amt#29, profit#30] +Input [5]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#24, amt#25, profit#26] Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0 (34) Scan parquet default.customer -Output [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] +Output [4]: [c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 10] -Input [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] +Input [4]: [c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30] (36) Filter [codegen id : 10] -Input [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] -Condition : (isnotnull(c_customer_sk#32) AND isnotnull(c_current_addr_sk#33)) +Input [4]: [c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30] +Condition : (isnotnull(c_customer_sk#27) AND isnotnull(c_current_addr_sk#28)) (37) Exchange -Input [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] -Arguments: hashpartitioning(c_customer_sk#32, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [4]: [c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30] +Arguments: hashpartitioning(c_customer_sk#27, 5), ENSURE_REQUIREMENTS, [plan_id=6] (38) Sort [codegen id : 11] -Input [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] -Arguments: [c_customer_sk#32 ASC NULLS FIRST], false, 0 +Input [4]: [c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30] +Arguments: [c_customer_sk#27 ASC NULLS FIRST], false, 0 (39) SortMergeJoin [codegen id : 12] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#32] +Right keys [1]: [c_customer_sk#27] Join condition: None (40) Project [codegen id : 12] -Output [7]: [ss_ticket_number#5, bought_city#28, amt#29, profit#30, c_current_addr_sk#33, c_first_name#34, c_last_name#35] -Input [9]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#28, amt#29, profit#30, c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] +Output [7]: [ss_ticket_number#5, bought_city#24, amt#25, profit#26, c_current_addr_sk#28, c_first_name#29, c_last_name#30] +Input [9]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#24, amt#25, profit#26, c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30] (41) Exchange -Input [7]: [ss_ticket_number#5, bought_city#28, amt#29, profit#30, c_current_addr_sk#33, c_first_name#34, c_last_name#35] -Arguments: hashpartitioning(c_current_addr_sk#33, 5), ENSURE_REQUIREMENTS, [id=#37] +Input [7]: [ss_ticket_number#5, bought_city#24, amt#25, profit#26, c_current_addr_sk#28, c_first_name#29, c_last_name#30] +Arguments: hashpartitioning(c_current_addr_sk#28, 5), ENSURE_REQUIREMENTS, [plan_id=7] (42) Sort [codegen id : 13] -Input [7]: [ss_ticket_number#5, bought_city#28, amt#29, profit#30, c_current_addr_sk#33, c_first_name#34, c_last_name#35] -Arguments: [c_current_addr_sk#33 ASC NULLS FIRST], false, 0 +Input [7]: [ss_ticket_number#5, bought_city#24, amt#25, profit#26, c_current_addr_sk#28, c_first_name#29, c_last_name#30] +Arguments: [c_current_addr_sk#28 ASC NULLS FIRST], false, 0 (43) ReusedExchange [Reuses operator id: 26] -Output [2]: [ca_address_sk#38, ca_city#39] +Output [2]: [ca_address_sk#31, ca_city#32] (44) Sort [codegen id : 15] -Input [2]: [ca_address_sk#38, ca_city#39] -Arguments: [ca_address_sk#38 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#31, ca_city#32] +Arguments: [ca_address_sk#31 ASC NULLS FIRST], false, 0 (45) SortMergeJoin [codegen id : 16] -Left keys [1]: [c_current_addr_sk#33] -Right keys [1]: [ca_address_sk#38] -Join condition: NOT (ca_city#39 = bought_city#28) +Left keys [1]: [c_current_addr_sk#28] +Right keys [1]: [ca_address_sk#31] +Join condition: NOT (ca_city#32 = bought_city#24) (46) Project [codegen id : 16] -Output [7]: [c_last_name#35, c_first_name#34, ca_city#39, bought_city#28, ss_ticket_number#5, amt#29, profit#30] -Input [9]: [ss_ticket_number#5, bought_city#28, amt#29, profit#30, c_current_addr_sk#33, c_first_name#34, c_last_name#35, ca_address_sk#38, ca_city#39] +Output [7]: [c_last_name#30, c_first_name#29, ca_city#32, bought_city#24, ss_ticket_number#5, amt#25, profit#26] +Input [9]: [ss_ticket_number#5, bought_city#24, amt#25, profit#26, c_current_addr_sk#28, c_first_name#29, c_last_name#30, ca_address_sk#31, ca_city#32] (47) TakeOrderedAndProject -Input [7]: [c_last_name#35, c_first_name#34, ca_city#39, bought_city#28, ss_ticket_number#5, amt#29, profit#30] -Arguments: 100, [c_last_name#35 ASC NULLS FIRST, c_first_name#34 ASC NULLS FIRST, ca_city#39 ASC NULLS FIRST, bought_city#28 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#35, c_first_name#34, ca_city#39, bought_city#28, ss_ticket_number#5, amt#29, profit#30] +Input [7]: [c_last_name#30, c_first_name#29, ca_city#32, bought_city#24, ss_ticket_number#5, amt#25, profit#26] +Arguments: 100, [c_last_name#30 ASC NULLS FIRST, c_first_name#29 ASC NULLS FIRST, ca_city#32 ASC NULLS FIRST, bought_city#24 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#30, c_first_name#29, ca_city#32, bought_city#24, ss_ticket_number#5, amt#25, profit#26] ===== Subqueries ===== @@ -268,25 +268,25 @@ BroadcastExchange (52) (48) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#40, d_dow#41] +Output [3]: [d_date_sk#10, d_year#33, d_dow#34] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_dow, [0,6]), In(d_year, [1999,2000,2001]), In(d_date_sk, [2451181,2451182,2451188,2451189,2451195,2451196,2451202,2451203,2451209,2451210,2451216,2451217,2451223,2451224,2451230,2451231,2451237,2451238,2451244,2451245,2451251,2451252,2451258,2451259,2451265,2451266,2451272,2451273,2451279,2451280,2451286,2451287,2451293,2451294,2451300,2451301,2451307,2451308,2451314,2451315,2451321,2451322,2451328,2451329,2451335,2451336,2451342,2451343,2451349,2451350,2451356,2451357,2451363,2451364,2451370,2451371,2451377,2451378,2451384,2451385,2451391,2451392,2451398,2451399,2451405,2451406,2451412,2451413,2451419,2451420,2451426,2451427,2451433,2451434,2451440,2451441,2451447,2451448,2451454,2451455,2451461,2451462,2451468,2451469,2451475,2451476,2451482,2451483,2451489,2451490,2451496,2451497,2451503,2451504,2451510,2451511,2451517,2451518,2451524,2451525,2451531,2451532,2451538,2451539,2451545,2451546,2451552,2451553,2451559,2451560,2451566,2451567,2451573,2451574,2451580,2451581,2451587,2451588,2451594,2451595,2451601,2451602,2451608,2451609,2451615,2451616,2451622,2451623,2451629,2451630,2451636,2451637,2451643,2451644,2451650,2451651,2451657,2451658,2451664,2451665,2451671,2451672,2451678,2451679,2451685,2451686,2451692,2451693,2451699,2451700,2451706,2451707,2451713,2451714,2451720,2451721,2451727,2451728,2451734,2451735,2451741,2451742,2451748,2451749,2451755,2451756,2451762,2451763,2451769,2451770,2451776,2451777,2451783,2451784,2451790,2451791,2451797,2451798,2451804,2451805,2451811,2451812,2451818,2451819,2451825,2451826,2451832,2451833,2451839,2451840,2451846,2451847,2451853,2451854,2451860,2451861,2451867,2451868,2451874,2451875,2451881,2451882,2451888,2451889,2451895,2451896,2451902,2451903,2451909,2451910,2451916,2451917,2451923,2451924,2451930,2451931,2451937,2451938,2451944,2451945,2451951,2451952,2451958,2451959,2451965,2451966,2451972,2451973,2451979,2451980,2451986,2451987,2451993,2451994,2452000,2452001,2452007,2452008,2452014,2452015,2452021,2452022,2452028,2452029,2452035,2452036,2452042,2452043,2452049,2452050,2452056,2452057,2452063,2452064,2452070,2452071,2452077,2452078,2452084,2452085,2452091,2452092,2452098,2452099,2452105,2452106,2452112,2452113,2452119,2452120,2452126,2452127,2452133,2452134,2452140,2452141,2452147,2452148,2452154,2452155,2452161,2452162,2452168,2452169,2452175,2452176,2452182,2452183,2452189,2452190,2452196,2452197,2452203,2452204,2452210,2452211,2452217,2452218,2452224,2452225,2452231,2452232,2452238,2452239,2452245,2452246,2452252,2452253,2452259,2452260,2452266,2452267,2452273,2452274]), IsNotNull(d_date_sk)] ReadSchema: struct (49) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#40, d_dow#41] +Input [3]: [d_date_sk#10, d_year#33, d_dow#34] (50) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#40, d_dow#41] -Condition : (((d_dow#41 IN (6,0) AND d_year#40 IN (1999,2000,2001)) AND d_date_sk#10 INSET 2451181, 2451182, 2451188, 2451189, 2451195, 2451196, 2451202, 2451203, 2451209, 2451210, 2451216, 2451217, 2451223, 2451224, 2451230, 2451231, 2451237, 2451238, 2451244, 2451245, 2451251, 2451252, 2451258, 2451259, 2451265, 2451266, 2451272, 2451273, 2451279, 2451280, 2451286, 2451287, 2451293, 2451294, 2451300, 2451301, 2451307, 2451308, 2451314, 2451315, 2451321, 2451322, 2451328, 2451329, 2451335, 2451336, 2451342, 2451343, 2451349, 2451350, 2451356, 2451357, 2451363, 2451364, 2451370, 2451371, 2451377, 2451378, 2451384, 2451385, 2451391, 2451392, 2451398, 2451399, 2451405, 2451406, 2451412, 2451413, 2451419, 2451420, 2451426, 2451427, 2451433, 2451434, 2451440, 2451441, 2451447, 2451448, 2451454, 2451455, 2451461, 2451462, 2451468, 2451469, 2451475, 2451476, 2451482, 2451483, 2451489, 2451490, 2451496, 2451497, 2451503, 2451504, 2451510, 2451511, 2451517, 2451518, 2451524, 2451525, 2451531, 2451532, 2451538, 2451539, 2451545, 2451546, 2451552, 2451553, 2451559, 2451560, 2451566, 2451567, 2451573, 2451574, 2451580, 2451581, 2451587, 2451588, 2451594, 2451595, 2451601, 2451602, 2451608, 2451609, 2451615, 2451616, 2451622, 2451623, 2451629, 2451630, 2451636, 2451637, 2451643, 2451644, 2451650, 2451651, 2451657, 2451658, 2451664, 2451665, 2451671, 2451672, 2451678, 2451679, 2451685, 2451686, 2451692, 2451693, 2451699, 2451700, 2451706, 2451707, 2451713, 2451714, 2451720, 2451721, 2451727, 2451728, 2451734, 2451735, 2451741, 2451742, 2451748, 2451749, 2451755, 2451756, 2451762, 2451763, 2451769, 2451770, 2451776, 2451777, 2451783, 2451784, 2451790, 2451791, 2451797, 2451798, 2451804, 2451805, 2451811, 2451812, 2451818, 2451819, 2451825, 2451826, 2451832, 2451833, 2451839, 2451840, 2451846, 2451847, 2451853, 2451854, 2451860, 2451861, 2451867, 2451868, 2451874, 2451875, 2451881, 2451882, 2451888, 2451889, 2451895, 2451896, 2451902, 2451903, 2451909, 2451910, 2451916, 2451917, 2451923, 2451924, 2451930, 2451931, 2451937, 2451938, 2451944, 2451945, 2451951, 2451952, 2451958, 2451959, 2451965, 2451966, 2451972, 2451973, 2451979, 2451980, 2451986, 2451987, 2451993, 2451994, 2452000, 2452001, 2452007, 2452008, 2452014, 2452015, 2452021, 2452022, 2452028, 2452029, 2452035, 2452036, 2452042, 2452043, 2452049, 2452050, 2452056, 2452057, 2452063, 2452064, 2452070, 2452071, 2452077, 2452078, 2452084, 2452085, 2452091, 2452092, 2452098, 2452099, 2452105, 2452106, 2452112, 2452113, 2452119, 2452120, 2452126, 2452127, 2452133, 2452134, 2452140, 2452141, 2452147, 2452148, 2452154, 2452155, 2452161, 2452162, 2452168, 2452169, 2452175, 2452176, 2452182, 2452183, 2452189, 2452190, 2452196, 2452197, 2452203, 2452204, 2452210, 2452211, 2452217, 2452218, 2452224, 2452225, 2452231, 2452232, 2452238, 2452239, 2452245, 2452246, 2452252, 2452253, 2452259, 2452260, 2452266, 2452267, 2452273, 2452274) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#10, d_year#33, d_dow#34] +Condition : (((d_dow#34 IN (6,0) AND d_year#33 IN (1999,2000,2001)) AND d_date_sk#10 INSET 2451181, 2451182, 2451188, 2451189, 2451195, 2451196, 2451202, 2451203, 2451209, 2451210, 2451216, 2451217, 2451223, 2451224, 2451230, 2451231, 2451237, 2451238, 2451244, 2451245, 2451251, 2451252, 2451258, 2451259, 2451265, 2451266, 2451272, 2451273, 2451279, 2451280, 2451286, 2451287, 2451293, 2451294, 2451300, 2451301, 2451307, 2451308, 2451314, 2451315, 2451321, 2451322, 2451328, 2451329, 2451335, 2451336, 2451342, 2451343, 2451349, 2451350, 2451356, 2451357, 2451363, 2451364, 2451370, 2451371, 2451377, 2451378, 2451384, 2451385, 2451391, 2451392, 2451398, 2451399, 2451405, 2451406, 2451412, 2451413, 2451419, 2451420, 2451426, 2451427, 2451433, 2451434, 2451440, 2451441, 2451447, 2451448, 2451454, 2451455, 2451461, 2451462, 2451468, 2451469, 2451475, 2451476, 2451482, 2451483, 2451489, 2451490, 2451496, 2451497, 2451503, 2451504, 2451510, 2451511, 2451517, 2451518, 2451524, 2451525, 2451531, 2451532, 2451538, 2451539, 2451545, 2451546, 2451552, 2451553, 2451559, 2451560, 2451566, 2451567, 2451573, 2451574, 2451580, 2451581, 2451587, 2451588, 2451594, 2451595, 2451601, 2451602, 2451608, 2451609, 2451615, 2451616, 2451622, 2451623, 2451629, 2451630, 2451636, 2451637, 2451643, 2451644, 2451650, 2451651, 2451657, 2451658, 2451664, 2451665, 2451671, 2451672, 2451678, 2451679, 2451685, 2451686, 2451692, 2451693, 2451699, 2451700, 2451706, 2451707, 2451713, 2451714, 2451720, 2451721, 2451727, 2451728, 2451734, 2451735, 2451741, 2451742, 2451748, 2451749, 2451755, 2451756, 2451762, 2451763, 2451769, 2451770, 2451776, 2451777, 2451783, 2451784, 2451790, 2451791, 2451797, 2451798, 2451804, 2451805, 2451811, 2451812, 2451818, 2451819, 2451825, 2451826, 2451832, 2451833, 2451839, 2451840, 2451846, 2451847, 2451853, 2451854, 2451860, 2451861, 2451867, 2451868, 2451874, 2451875, 2451881, 2451882, 2451888, 2451889, 2451895, 2451896, 2451902, 2451903, 2451909, 2451910, 2451916, 2451917, 2451923, 2451924, 2451930, 2451931, 2451937, 2451938, 2451944, 2451945, 2451951, 2451952, 2451958, 2451959, 2451965, 2451966, 2451972, 2451973, 2451979, 2451980, 2451986, 2451987, 2451993, 2451994, 2452000, 2452001, 2452007, 2452008, 2452014, 2452015, 2452021, 2452022, 2452028, 2452029, 2452035, 2452036, 2452042, 2452043, 2452049, 2452050, 2452056, 2452057, 2452063, 2452064, 2452070, 2452071, 2452077, 2452078, 2452084, 2452085, 2452091, 2452092, 2452098, 2452099, 2452105, 2452106, 2452112, 2452113, 2452119, 2452120, 2452126, 2452127, 2452133, 2452134, 2452140, 2452141, 2452147, 2452148, 2452154, 2452155, 2452161, 2452162, 2452168, 2452169, 2452175, 2452176, 2452182, 2452183, 2452189, 2452190, 2452196, 2452197, 2452203, 2452204, 2452210, 2452211, 2452217, 2452218, 2452224, 2452225, 2452231, 2452232, 2452238, 2452239, 2452245, 2452246, 2452252, 2452253, 2452259, 2452260, 2452266, 2452267, 2452273, 2452274) AND isnotnull(d_date_sk#10)) (51) Project [codegen id : 1] Output [1]: [d_date_sk#10] -Input [3]: [d_date_sk#10, d_year#40, d_dow#41] +Input [3]: [d_date_sk#10, d_year#33, d_dow#34] (52) BroadcastExchange Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/explain.txt index 5d36c1d7cd2f5..618ab37aa7c98 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/explain.txt @@ -87,7 +87,7 @@ Input [2]: [s_store_sk#11, s_city#12] (11) BroadcastExchange Input [1]: [s_store_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_store_sk#4] @@ -99,123 +99,123 @@ Output [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#11] (14) Scan parquet default.household_demographics -Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Output [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [Or(EqualTo(hd_dep_count,5),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Input [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] (16) Filter [codegen id : 3] -Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : (((hd_dep_count#15 = 5) OR (hd_vehicle_count#16 = 3)) AND isnotnull(hd_demo_sk#14)) +Input [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] +Condition : (((hd_dep_count#14 = 5) OR (hd_vehicle_count#15 = 3)) AND isnotnull(hd_demo_sk#13)) (17) Project [codegen id : 3] -Output [1]: [hd_demo_sk#14] -Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Output [1]: [hd_demo_sk#13] +Input [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] (18) BroadcastExchange -Input [1]: [hd_demo_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] +Input [1]: [hd_demo_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#14] +Right keys [1]: [hd_demo_sk#13] Join condition: None (20) Project [codegen id : 5] Output [5]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] -Input [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, hd_demo_sk#14] +Input [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, hd_demo_sk#13] (21) Scan parquet default.customer_address -Output [2]: [ca_address_sk#18, ca_city#19] +Output [2]: [ca_address_sk#16, ca_city#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] ReadSchema: struct (22) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#18, ca_city#19] +Input [2]: [ca_address_sk#16, ca_city#17] (23) Filter [codegen id : 4] -Input [2]: [ca_address_sk#18, ca_city#19] -Condition : (isnotnull(ca_address_sk#18) AND isnotnull(ca_city#19)) +Input [2]: [ca_address_sk#16, ca_city#17] +Condition : (isnotnull(ca_address_sk#16) AND isnotnull(ca_city#17)) (24) BroadcastExchange -Input [2]: [ca_address_sk#18, ca_city#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] +Input [2]: [ca_address_sk#16, ca_city#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (25) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_addr_sk#3] -Right keys [1]: [ca_address_sk#18] +Right keys [1]: [ca_address_sk#16] Join condition: None (26) Project [codegen id : 5] -Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#19] -Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_address_sk#18, ca_city#19] +Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#17] +Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_address_sk#16, ca_city#17] (27) HashAggregate [codegen id : 5] -Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#19] -Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19] +Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#17] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#17] Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#6)), partial_sum(UnscaledValue(ss_net_profit#7))] -Aggregate Attributes [2]: [sum#21, sum#22] -Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#23, sum#24] +Aggregate Attributes [2]: [sum#18, sum#19] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#17, sum#20, sum#21] (28) Exchange -Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#23, sum#24] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, 5), ENSURE_REQUIREMENTS, [id=#25] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#17, sum#20, sum#21] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#17, 5), ENSURE_REQUIREMENTS, [plan_id=4] (29) HashAggregate [codegen id : 8] -Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#23, sum#24] -Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#17, sum#20, sum#21] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#17] Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#26, sum(UnscaledValue(ss_net_profit#7))#27] -Results [5]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#19 AS bought_city#28, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#26,17,2) AS amt#29, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#27,17,2) AS profit#30] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#22, sum(UnscaledValue(ss_net_profit#7))#23] +Results [5]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#17 AS bought_city#24, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#22,17,2) AS amt#25, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#23,17,2) AS profit#26] (30) Scan parquet default.customer -Output [4]: [c_customer_sk#31, c_current_addr_sk#32, c_first_name#33, c_last_name#34] +Output [4]: [c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 6] -Input [4]: [c_customer_sk#31, c_current_addr_sk#32, c_first_name#33, c_last_name#34] +Input [4]: [c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30] (32) Filter [codegen id : 6] -Input [4]: [c_customer_sk#31, c_current_addr_sk#32, c_first_name#33, c_last_name#34] -Condition : (isnotnull(c_customer_sk#31) AND isnotnull(c_current_addr_sk#32)) +Input [4]: [c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30] +Condition : (isnotnull(c_customer_sk#27) AND isnotnull(c_current_addr_sk#28)) (33) BroadcastExchange -Input [4]: [c_customer_sk#31, c_current_addr_sk#32, c_first_name#33, c_last_name#34] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#35] +Input [4]: [c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (34) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#31] +Right keys [1]: [c_customer_sk#27] Join condition: None (35) Project [codegen id : 8] -Output [7]: [ss_ticket_number#5, bought_city#28, amt#29, profit#30, c_current_addr_sk#32, c_first_name#33, c_last_name#34] -Input [9]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#28, amt#29, profit#30, c_customer_sk#31, c_current_addr_sk#32, c_first_name#33, c_last_name#34] +Output [7]: [ss_ticket_number#5, bought_city#24, amt#25, profit#26, c_current_addr_sk#28, c_first_name#29, c_last_name#30] +Input [9]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#24, amt#25, profit#26, c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30] (36) ReusedExchange [Reuses operator id: 24] -Output [2]: [ca_address_sk#36, ca_city#37] +Output [2]: [ca_address_sk#31, ca_city#32] (37) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [c_current_addr_sk#32] -Right keys [1]: [ca_address_sk#36] -Join condition: NOT (ca_city#37 = bought_city#28) +Left keys [1]: [c_current_addr_sk#28] +Right keys [1]: [ca_address_sk#31] +Join condition: NOT (ca_city#32 = bought_city#24) (38) Project [codegen id : 8] -Output [7]: [c_last_name#34, c_first_name#33, ca_city#37, bought_city#28, ss_ticket_number#5, amt#29, profit#30] -Input [9]: [ss_ticket_number#5, bought_city#28, amt#29, profit#30, c_current_addr_sk#32, c_first_name#33, c_last_name#34, ca_address_sk#36, ca_city#37] +Output [7]: [c_last_name#30, c_first_name#29, ca_city#32, bought_city#24, ss_ticket_number#5, amt#25, profit#26] +Input [9]: [ss_ticket_number#5, bought_city#24, amt#25, profit#26, c_current_addr_sk#28, c_first_name#29, c_last_name#30, ca_address_sk#31, ca_city#32] (39) TakeOrderedAndProject -Input [7]: [c_last_name#34, c_first_name#33, ca_city#37, bought_city#28, ss_ticket_number#5, amt#29, profit#30] -Arguments: 100, [c_last_name#34 ASC NULLS FIRST, c_first_name#33 ASC NULLS FIRST, ca_city#37 ASC NULLS FIRST, bought_city#28 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#34, c_first_name#33, ca_city#37, bought_city#28, ss_ticket_number#5, amt#29, profit#30] +Input [7]: [c_last_name#30, c_first_name#29, ca_city#32, bought_city#24, ss_ticket_number#5, amt#25, profit#26] +Arguments: 100, [c_last_name#30 ASC NULLS FIRST, c_first_name#29 ASC NULLS FIRST, ca_city#32 ASC NULLS FIRST, bought_city#24 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#30, c_first_name#29, ca_city#32, bought_city#24, ss_ticket_number#5, amt#25, profit#26] ===== Subqueries ===== @@ -228,25 +228,25 @@ BroadcastExchange (44) (40) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#38, d_dow#39] +Output [3]: [d_date_sk#10, d_year#33, d_dow#34] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_dow, [0,6]), In(d_year, [1999,2000,2001]), In(d_date_sk, [2451181,2451182,2451188,2451189,2451195,2451196,2451202,2451203,2451209,2451210,2451216,2451217,2451223,2451224,2451230,2451231,2451237,2451238,2451244,2451245,2451251,2451252,2451258,2451259,2451265,2451266,2451272,2451273,2451279,2451280,2451286,2451287,2451293,2451294,2451300,2451301,2451307,2451308,2451314,2451315,2451321,2451322,2451328,2451329,2451335,2451336,2451342,2451343,2451349,2451350,2451356,2451357,2451363,2451364,2451370,2451371,2451377,2451378,2451384,2451385,2451391,2451392,2451398,2451399,2451405,2451406,2451412,2451413,2451419,2451420,2451426,2451427,2451433,2451434,2451440,2451441,2451447,2451448,2451454,2451455,2451461,2451462,2451468,2451469,2451475,2451476,2451482,2451483,2451489,2451490,2451496,2451497,2451503,2451504,2451510,2451511,2451517,2451518,2451524,2451525,2451531,2451532,2451538,2451539,2451545,2451546,2451552,2451553,2451559,2451560,2451566,2451567,2451573,2451574,2451580,2451581,2451587,2451588,2451594,2451595,2451601,2451602,2451608,2451609,2451615,2451616,2451622,2451623,2451629,2451630,2451636,2451637,2451643,2451644,2451650,2451651,2451657,2451658,2451664,2451665,2451671,2451672,2451678,2451679,2451685,2451686,2451692,2451693,2451699,2451700,2451706,2451707,2451713,2451714,2451720,2451721,2451727,2451728,2451734,2451735,2451741,2451742,2451748,2451749,2451755,2451756,2451762,2451763,2451769,2451770,2451776,2451777,2451783,2451784,2451790,2451791,2451797,2451798,2451804,2451805,2451811,2451812,2451818,2451819,2451825,2451826,2451832,2451833,2451839,2451840,2451846,2451847,2451853,2451854,2451860,2451861,2451867,2451868,2451874,2451875,2451881,2451882,2451888,2451889,2451895,2451896,2451902,2451903,2451909,2451910,2451916,2451917,2451923,2451924,2451930,2451931,2451937,2451938,2451944,2451945,2451951,2451952,2451958,2451959,2451965,2451966,2451972,2451973,2451979,2451980,2451986,2451987,2451993,2451994,2452000,2452001,2452007,2452008,2452014,2452015,2452021,2452022,2452028,2452029,2452035,2452036,2452042,2452043,2452049,2452050,2452056,2452057,2452063,2452064,2452070,2452071,2452077,2452078,2452084,2452085,2452091,2452092,2452098,2452099,2452105,2452106,2452112,2452113,2452119,2452120,2452126,2452127,2452133,2452134,2452140,2452141,2452147,2452148,2452154,2452155,2452161,2452162,2452168,2452169,2452175,2452176,2452182,2452183,2452189,2452190,2452196,2452197,2452203,2452204,2452210,2452211,2452217,2452218,2452224,2452225,2452231,2452232,2452238,2452239,2452245,2452246,2452252,2452253,2452259,2452260,2452266,2452267,2452273,2452274]), IsNotNull(d_date_sk)] ReadSchema: struct (41) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#38, d_dow#39] +Input [3]: [d_date_sk#10, d_year#33, d_dow#34] (42) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#38, d_dow#39] -Condition : (((d_dow#39 IN (6,0) AND d_year#38 IN (1999,2000,2001)) AND d_date_sk#10 INSET 2451181, 2451182, 2451188, 2451189, 2451195, 2451196, 2451202, 2451203, 2451209, 2451210, 2451216, 2451217, 2451223, 2451224, 2451230, 2451231, 2451237, 2451238, 2451244, 2451245, 2451251, 2451252, 2451258, 2451259, 2451265, 2451266, 2451272, 2451273, 2451279, 2451280, 2451286, 2451287, 2451293, 2451294, 2451300, 2451301, 2451307, 2451308, 2451314, 2451315, 2451321, 2451322, 2451328, 2451329, 2451335, 2451336, 2451342, 2451343, 2451349, 2451350, 2451356, 2451357, 2451363, 2451364, 2451370, 2451371, 2451377, 2451378, 2451384, 2451385, 2451391, 2451392, 2451398, 2451399, 2451405, 2451406, 2451412, 2451413, 2451419, 2451420, 2451426, 2451427, 2451433, 2451434, 2451440, 2451441, 2451447, 2451448, 2451454, 2451455, 2451461, 2451462, 2451468, 2451469, 2451475, 2451476, 2451482, 2451483, 2451489, 2451490, 2451496, 2451497, 2451503, 2451504, 2451510, 2451511, 2451517, 2451518, 2451524, 2451525, 2451531, 2451532, 2451538, 2451539, 2451545, 2451546, 2451552, 2451553, 2451559, 2451560, 2451566, 2451567, 2451573, 2451574, 2451580, 2451581, 2451587, 2451588, 2451594, 2451595, 2451601, 2451602, 2451608, 2451609, 2451615, 2451616, 2451622, 2451623, 2451629, 2451630, 2451636, 2451637, 2451643, 2451644, 2451650, 2451651, 2451657, 2451658, 2451664, 2451665, 2451671, 2451672, 2451678, 2451679, 2451685, 2451686, 2451692, 2451693, 2451699, 2451700, 2451706, 2451707, 2451713, 2451714, 2451720, 2451721, 2451727, 2451728, 2451734, 2451735, 2451741, 2451742, 2451748, 2451749, 2451755, 2451756, 2451762, 2451763, 2451769, 2451770, 2451776, 2451777, 2451783, 2451784, 2451790, 2451791, 2451797, 2451798, 2451804, 2451805, 2451811, 2451812, 2451818, 2451819, 2451825, 2451826, 2451832, 2451833, 2451839, 2451840, 2451846, 2451847, 2451853, 2451854, 2451860, 2451861, 2451867, 2451868, 2451874, 2451875, 2451881, 2451882, 2451888, 2451889, 2451895, 2451896, 2451902, 2451903, 2451909, 2451910, 2451916, 2451917, 2451923, 2451924, 2451930, 2451931, 2451937, 2451938, 2451944, 2451945, 2451951, 2451952, 2451958, 2451959, 2451965, 2451966, 2451972, 2451973, 2451979, 2451980, 2451986, 2451987, 2451993, 2451994, 2452000, 2452001, 2452007, 2452008, 2452014, 2452015, 2452021, 2452022, 2452028, 2452029, 2452035, 2452036, 2452042, 2452043, 2452049, 2452050, 2452056, 2452057, 2452063, 2452064, 2452070, 2452071, 2452077, 2452078, 2452084, 2452085, 2452091, 2452092, 2452098, 2452099, 2452105, 2452106, 2452112, 2452113, 2452119, 2452120, 2452126, 2452127, 2452133, 2452134, 2452140, 2452141, 2452147, 2452148, 2452154, 2452155, 2452161, 2452162, 2452168, 2452169, 2452175, 2452176, 2452182, 2452183, 2452189, 2452190, 2452196, 2452197, 2452203, 2452204, 2452210, 2452211, 2452217, 2452218, 2452224, 2452225, 2452231, 2452232, 2452238, 2452239, 2452245, 2452246, 2452252, 2452253, 2452259, 2452260, 2452266, 2452267, 2452273, 2452274) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#10, d_year#33, d_dow#34] +Condition : (((d_dow#34 IN (6,0) AND d_year#33 IN (1999,2000,2001)) AND d_date_sk#10 INSET 2451181, 2451182, 2451188, 2451189, 2451195, 2451196, 2451202, 2451203, 2451209, 2451210, 2451216, 2451217, 2451223, 2451224, 2451230, 2451231, 2451237, 2451238, 2451244, 2451245, 2451251, 2451252, 2451258, 2451259, 2451265, 2451266, 2451272, 2451273, 2451279, 2451280, 2451286, 2451287, 2451293, 2451294, 2451300, 2451301, 2451307, 2451308, 2451314, 2451315, 2451321, 2451322, 2451328, 2451329, 2451335, 2451336, 2451342, 2451343, 2451349, 2451350, 2451356, 2451357, 2451363, 2451364, 2451370, 2451371, 2451377, 2451378, 2451384, 2451385, 2451391, 2451392, 2451398, 2451399, 2451405, 2451406, 2451412, 2451413, 2451419, 2451420, 2451426, 2451427, 2451433, 2451434, 2451440, 2451441, 2451447, 2451448, 2451454, 2451455, 2451461, 2451462, 2451468, 2451469, 2451475, 2451476, 2451482, 2451483, 2451489, 2451490, 2451496, 2451497, 2451503, 2451504, 2451510, 2451511, 2451517, 2451518, 2451524, 2451525, 2451531, 2451532, 2451538, 2451539, 2451545, 2451546, 2451552, 2451553, 2451559, 2451560, 2451566, 2451567, 2451573, 2451574, 2451580, 2451581, 2451587, 2451588, 2451594, 2451595, 2451601, 2451602, 2451608, 2451609, 2451615, 2451616, 2451622, 2451623, 2451629, 2451630, 2451636, 2451637, 2451643, 2451644, 2451650, 2451651, 2451657, 2451658, 2451664, 2451665, 2451671, 2451672, 2451678, 2451679, 2451685, 2451686, 2451692, 2451693, 2451699, 2451700, 2451706, 2451707, 2451713, 2451714, 2451720, 2451721, 2451727, 2451728, 2451734, 2451735, 2451741, 2451742, 2451748, 2451749, 2451755, 2451756, 2451762, 2451763, 2451769, 2451770, 2451776, 2451777, 2451783, 2451784, 2451790, 2451791, 2451797, 2451798, 2451804, 2451805, 2451811, 2451812, 2451818, 2451819, 2451825, 2451826, 2451832, 2451833, 2451839, 2451840, 2451846, 2451847, 2451853, 2451854, 2451860, 2451861, 2451867, 2451868, 2451874, 2451875, 2451881, 2451882, 2451888, 2451889, 2451895, 2451896, 2451902, 2451903, 2451909, 2451910, 2451916, 2451917, 2451923, 2451924, 2451930, 2451931, 2451937, 2451938, 2451944, 2451945, 2451951, 2451952, 2451958, 2451959, 2451965, 2451966, 2451972, 2451973, 2451979, 2451980, 2451986, 2451987, 2451993, 2451994, 2452000, 2452001, 2452007, 2452008, 2452014, 2452015, 2452021, 2452022, 2452028, 2452029, 2452035, 2452036, 2452042, 2452043, 2452049, 2452050, 2452056, 2452057, 2452063, 2452064, 2452070, 2452071, 2452077, 2452078, 2452084, 2452085, 2452091, 2452092, 2452098, 2452099, 2452105, 2452106, 2452112, 2452113, 2452119, 2452120, 2452126, 2452127, 2452133, 2452134, 2452140, 2452141, 2452147, 2452148, 2452154, 2452155, 2452161, 2452162, 2452168, 2452169, 2452175, 2452176, 2452182, 2452183, 2452189, 2452190, 2452196, 2452197, 2452203, 2452204, 2452210, 2452211, 2452217, 2452218, 2452224, 2452225, 2452231, 2452232, 2452238, 2452239, 2452245, 2452246, 2452252, 2452253, 2452259, 2452260, 2452266, 2452267, 2452273, 2452274) AND isnotnull(d_date_sk#10)) (43) Project [codegen id : 1] Output [1]: [d_date_sk#10] -Input [3]: [d_date_sk#10, d_year#38, d_dow#39] +Input [3]: [d_date_sk#10, d_year#33, d_dow#34] (44) BroadcastExchange Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/explain.txt index 02f680fc0dd1a..eb1ec98eb6304 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/explain.txt @@ -42,88 +42,88 @@ Input [3]: [d_date_sk#1, d_year#2, d_moy#3] (5) BroadcastExchange Input [2]: [d_date_sk#1, d_year#2] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (6) Scan parquet default.store_sales -Output [3]: [ss_item_sk#5, ss_ext_sales_price#6, ss_sold_date_sk#7] +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#7), (ss_sold_date_sk#7 >= 2451149), (ss_sold_date_sk#7 <= 2451179), dynamicpruningexpression(ss_sold_date_sk#7 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(ss_sold_date_sk#6), (ss_sold_date_sk#6 >= 2451149), (ss_sold_date_sk#6 <= 2451179), dynamicpruningexpression(ss_sold_date_sk#6 IN dynamicpruning#7)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (7) ColumnarToRow -Input [3]: [ss_item_sk#5, ss_ext_sales_price#6, ss_sold_date_sk#7] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] (8) Filter -Input [3]: [ss_item_sk#5, ss_ext_sales_price#6, ss_sold_date_sk#7] -Condition : isnotnull(ss_item_sk#5) +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [d_date_sk#1] -Right keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [ss_sold_date_sk#6] Join condition: None (10) Project [codegen id : 3] -Output [3]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6] -Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#5, ss_ext_sales_price#6, ss_sold_date_sk#7] +Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] +Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] (11) Scan parquet default.item -Output [4]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manager_id#12] +Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manager_id#12] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] (13) Filter [codegen id : 2] -Input [4]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manager_id#12] -Condition : ((isnotnull(i_manager_id#12) AND (i_manager_id#12 = 1)) AND isnotnull(i_item_sk#9)) +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8)) (14) Project [codegen id : 2] -Output [3]: [i_item_sk#9, i_brand_id#10, i_brand#11] -Input [4]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manager_id#12] +Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] (15) BroadcastExchange -Input [3]: [i_item_sk#9, i_brand_id#10, i_brand#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] +Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_item_sk#5] -Right keys [1]: [i_item_sk#9] +Left keys [1]: [ss_item_sk#4] +Right keys [1]: [i_item_sk#8] Join condition: None (17) Project [codegen id : 3] -Output [4]: [d_year#2, ss_ext_sales_price#6, i_brand_id#10, i_brand#11] -Input [6]: [d_year#2, ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#9, i_brand_id#10, i_brand#11] +Output [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#9, i_brand#10] +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#8, i_brand_id#9, i_brand#10] (18) HashAggregate [codegen id : 3] -Input [4]: [d_year#2, ss_ext_sales_price#6, i_brand_id#10, i_brand#11] -Keys [3]: [d_year#2, i_brand#11, i_brand_id#10] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum#14] -Results [4]: [d_year#2, i_brand#11, i_brand_id#10, sum#15] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#9, i_brand#10] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#12] +Results [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#13] (19) Exchange -Input [4]: [d_year#2, i_brand#11, i_brand_id#10, sum#15] -Arguments: hashpartitioning(d_year#2, i_brand#11, i_brand_id#10, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#13] +Arguments: hashpartitioning(d_year#2, i_brand#10, i_brand_id#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 4] -Input [4]: [d_year#2, i_brand#11, i_brand_id#10, sum#15] -Keys [3]: [d_year#2, i_brand#11, i_brand_id#10] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#17] -Results [4]: [d_year#2, i_brand_id#10 AS brand_id#18, i_brand#11 AS brand#19, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#17,17,2) AS ext_price#20] +Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#13] +Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#14] +Results [4]: [d_year#2, i_brand_id#9 AS brand_id#15, i_brand#10 AS brand#16, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#14,17,2) AS ext_price#17] (21) TakeOrderedAndProject -Input [4]: [d_year#2, brand_id#18, brand#19, ext_price#20] -Arguments: 100, [d_year#2 ASC NULLS FIRST, ext_price#20 DESC NULLS LAST, brand_id#18 ASC NULLS FIRST], [d_year#2, brand_id#18, brand#19, ext_price#20] +Input [4]: [d_year#2, brand_id#15, brand#16, ext_price#17] +Arguments: 100, [d_year#2 ASC NULLS FIRST, ext_price#17 DESC NULLS LAST, brand_id#15 ASC NULLS FIRST], [d_year#2, brand_id#15, brand#16, ext_price#17] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8 +Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7 ReusedExchange (22) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/explain.txt index 8d081d60f9e85..012a1cc0d40b5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/explain.txt @@ -57,7 +57,7 @@ Condition : isnotnull(ss_item_sk#4) (8) BroadcastExchange Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [d_date_sk#1] @@ -69,55 +69,55 @@ Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] (11) Scan parquet default.item -Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] (13) Filter [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] -Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8)) +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 1)) AND isnotnull(i_item_sk#7)) (14) Project [codegen id : 2] -Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] (15) BroadcastExchange -Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#4] -Right keys [1]: [i_item_sk#8] +Right keys [1]: [i_item_sk#7] Join condition: None (17) Project [codegen id : 3] -Output [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#9, i_brand#10] -Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#8, i_brand_id#9, i_brand#10] +Output [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] (18) HashAggregate [codegen id : 3] -Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#9, i_brand#10] -Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum#13] -Results [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] (19) Exchange -Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] -Arguments: hashpartitioning(d_year#2, i_brand#10, i_brand_id#9, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Arguments: hashpartitioning(d_year#2, i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 4] -Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] -Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#16] -Results [4]: [d_year#2, i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#16,17,2) AS ext_price#19] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [4]: [d_year#2, i_brand_id#8 AS brand_id#14, i_brand#9 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS ext_price#16] (21) TakeOrderedAndProject -Input [4]: [d_year#2, brand_id#17, brand#18, ext_price#19] -Arguments: 100, [d_year#2 ASC NULLS FIRST, ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#2, brand_id#17, brand#18, ext_price#19] +Input [4]: [d_year#2, brand_id#14, brand#15, ext_price#16] +Arguments: 100, [d_year#2 ASC NULLS FIRST, ext_price#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [d_year#2, brand_id#14, brand#15, ext_price#16] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt index 42b83c9c7d830..321d0ec4277f6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt @@ -49,116 +49,116 @@ Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] (5) BroadcastExchange Input [2]: [i_item_sk#1, i_manufact_id#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (6) Scan parquet default.store_sales -Output [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] +Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#14), (ss_sold_date_sk#14 >= 2451911), (ss_sold_date_sk#14 <= 2452275), dynamicpruningexpression(ss_sold_date_sk#14 IN dynamicpruning#15)] +PartitionFilters: [isnotnull(ss_sold_date_sk#13), (ss_sold_date_sk#13 >= 2451911), (ss_sold_date_sk#13 <= 2452275), dynamicpruningexpression(ss_sold_date_sk#13 IN dynamicpruning#14)] PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (7) ColumnarToRow -Input [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] (8) Filter -Input [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] -Condition : (isnotnull(ss_item_sk#11) AND isnotnull(ss_store_sk#12)) +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) (9) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] -Right keys [1]: [ss_item_sk#11] +Right keys [1]: [ss_item_sk#10] Join condition: None (10) Project [codegen id : 4] -Output [4]: [i_manufact_id#5, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] -Input [6]: [i_item_sk#1, i_manufact_id#5, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] +Output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Input [6]: [i_item_sk#1, i_manufact_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] (11) Scan parquet default.store -Output [1]: [s_store_sk#16] +Output [1]: [s_store_sk#15] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [1]: [s_store_sk#16] +Input [1]: [s_store_sk#15] (13) Filter [codegen id : 2] -Input [1]: [s_store_sk#16] -Condition : isnotnull(s_store_sk#16) +Input [1]: [s_store_sk#15] +Condition : isnotnull(s_store_sk#15) (14) BroadcastExchange -Input [1]: [s_store_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] +Input [1]: [s_store_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#12] -Right keys [1]: [s_store_sk#16] +Left keys [1]: [ss_store_sk#11] +Right keys [1]: [s_store_sk#15] Join condition: None (16) Project [codegen id : 4] -Output [3]: [i_manufact_id#5, ss_sales_price#13, ss_sold_date_sk#14] -Input [5]: [i_manufact_id#5, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14, s_store_sk#16] +Output [3]: [i_manufact_id#5, ss_sales_price#12, ss_sold_date_sk#13] +Input [5]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, s_store_sk#15] (17) ReusedExchange [Reuses operator id: 33] -Output [2]: [d_date_sk#18, d_qoy#19] +Output [2]: [d_date_sk#16, d_qoy#17] (18) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#14] -Right keys [1]: [d_date_sk#18] +Left keys [1]: [ss_sold_date_sk#13] +Right keys [1]: [d_date_sk#16] Join condition: None (19) Project [codegen id : 4] -Output [3]: [i_manufact_id#5, ss_sales_price#13, d_qoy#19] -Input [5]: [i_manufact_id#5, ss_sales_price#13, ss_sold_date_sk#14, d_date_sk#18, d_qoy#19] +Output [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#17] +Input [5]: [i_manufact_id#5, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#16, d_qoy#17] (20) HashAggregate [codegen id : 4] -Input [3]: [i_manufact_id#5, ss_sales_price#13, d_qoy#19] -Keys [2]: [i_manufact_id#5, d_qoy#19] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#13))] -Aggregate Attributes [1]: [sum#20] -Results [3]: [i_manufact_id#5, d_qoy#19, sum#21] +Input [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#17] +Keys [2]: [i_manufact_id#5, d_qoy#17] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [i_manufact_id#5, d_qoy#17, sum#19] (21) Exchange -Input [3]: [i_manufact_id#5, d_qoy#19, sum#21] -Arguments: hashpartitioning(i_manufact_id#5, d_qoy#19, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [3]: [i_manufact_id#5, d_qoy#17, sum#19] +Arguments: hashpartitioning(i_manufact_id#5, d_qoy#17, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 5] -Input [3]: [i_manufact_id#5, d_qoy#19, sum#21] -Keys [2]: [i_manufact_id#5, d_qoy#19] -Functions [1]: [sum(UnscaledValue(ss_sales_price#13))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#13))#23] -Results [3]: [i_manufact_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#23,17,2) AS sum_sales#24, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#23,17,2) AS _w0#25] +Input [3]: [i_manufact_id#5, d_qoy#17, sum#19] +Keys [2]: [i_manufact_id#5, d_qoy#17] +Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#20] +Results [3]: [i_manufact_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS _w0#22] (23) Exchange -Input [3]: [i_manufact_id#5, sum_sales#24, _w0#25] -Arguments: hashpartitioning(i_manufact_id#5, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_manufact_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) Sort [codegen id : 6] -Input [3]: [i_manufact_id#5, sum_sales#24, _w0#25] +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] Arguments: [i_manufact_id#5 ASC NULLS FIRST], false, 0 (25) Window -Input [3]: [i_manufact_id#5, sum_sales#24, _w0#25] -Arguments: [avg(_w0#25) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#27], [i_manufact_id#5] +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#23], [i_manufact_id#5] (26) Filter [codegen id : 7] -Input [4]: [i_manufact_id#5, sum_sales#24, _w0#25, avg_quarterly_sales#27] -Condition : (isnotnull(avg_quarterly_sales#27) AND ((avg_quarterly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] +Condition : (isnotnull(avg_quarterly_sales#23) AND ((avg_quarterly_sales#23 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) (27) Project [codegen id : 7] -Output [3]: [i_manufact_id#5, sum_sales#24, avg_quarterly_sales#27] -Input [4]: [i_manufact_id#5, sum_sales#24, _w0#25, avg_quarterly_sales#27] +Output [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] +Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] (28) TakeOrderedAndProject -Input [3]: [i_manufact_id#5, sum_sales#24, avg_quarterly_sales#27] -Arguments: 100, [avg_quarterly_sales#27 ASC NULLS FIRST, sum_sales#24 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#24, avg_quarterly_sales#27] +Input [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] +Arguments: 100, [avg_quarterly_sales#23 ASC NULLS FIRST, sum_sales#21 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#14 IN dynamicpruning#15 +Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#13 IN dynamicpruning#14 BroadcastExchange (33) +- * Project (32) +- * Filter (31) @@ -167,25 +167,25 @@ BroadcastExchange (33) (29) Scan parquet default.date_dim -Output [3]: [d_date_sk#18, d_month_seq#28, d_qoy#19] +Output [3]: [d_date_sk#16, d_month_seq#24, d_qoy#17] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_month_seq, [1212,1213,1214,1215,1216,1217,1218,1219,1220,1221,1222,1223]), GreaterThanOrEqual(d_date_sk,2451911), LessThanOrEqual(d_date_sk,2452275), IsNotNull(d_date_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#18, d_month_seq#28, d_qoy#19] +Input [3]: [d_date_sk#16, d_month_seq#24, d_qoy#17] (31) Filter [codegen id : 1] -Input [3]: [d_date_sk#18, d_month_seq#28, d_qoy#19] -Condition : (((d_month_seq#28 INSET 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223 AND (d_date_sk#18 >= 2451911)) AND (d_date_sk#18 <= 2452275)) AND isnotnull(d_date_sk#18)) +Input [3]: [d_date_sk#16, d_month_seq#24, d_qoy#17] +Condition : (((d_month_seq#24 INSET 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223 AND (d_date_sk#16 >= 2451911)) AND (d_date_sk#16 <= 2452275)) AND isnotnull(d_date_sk#16)) (32) Project [codegen id : 1] -Output [2]: [d_date_sk#18, d_qoy#19] -Input [3]: [d_date_sk#18, d_month_seq#28, d_qoy#19] +Output [2]: [d_date_sk#16, d_qoy#17] +Input [3]: [d_date_sk#16, d_month_seq#24, d_qoy#17] (33) BroadcastExchange -Input [2]: [d_date_sk#18, d_qoy#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] +Input [2]: [d_date_sk#16, d_qoy#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt index e7ae5ce6dcfb7..c91dd8a11b5f7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt @@ -64,7 +64,7 @@ Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) (8) BroadcastExchange Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] @@ -76,85 +76,85 @@ Output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk Input [6]: [i_item_sk#1, i_manufact_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] (11) ReusedExchange [Reuses operator id: 33] -Output [2]: [d_date_sk#16, d_qoy#17] +Output [2]: [d_date_sk#15, d_qoy#16] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_sold_date_sk#13] -Right keys [1]: [d_date_sk#16] +Right keys [1]: [d_date_sk#15] Join condition: None (13) Project [codegen id : 4] -Output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#17] -Input [6]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#16, d_qoy#17] +Output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16] +Input [6]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#15, d_qoy#16] (14) Scan parquet default.store -Output [1]: [s_store_sk#18] +Output [1]: [s_store_sk#17] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [1]: [s_store_sk#18] +Input [1]: [s_store_sk#17] (16) Filter [codegen id : 3] -Input [1]: [s_store_sk#18] -Condition : isnotnull(s_store_sk#18) +Input [1]: [s_store_sk#17] +Condition : isnotnull(s_store_sk#17) (17) BroadcastExchange -Input [1]: [s_store_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] +Input [1]: [s_store_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#11] -Right keys [1]: [s_store_sk#18] +Right keys [1]: [s_store_sk#17] Join condition: None (19) Project [codegen id : 4] -Output [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#17] -Input [5]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#17, s_store_sk#18] +Output [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#16] +Input [5]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16, s_store_sk#17] (20) HashAggregate [codegen id : 4] -Input [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#17] -Keys [2]: [i_manufact_id#5, d_qoy#17] +Input [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#16] +Keys [2]: [i_manufact_id#5, d_qoy#16] Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] -Aggregate Attributes [1]: [sum#20] -Results [3]: [i_manufact_id#5, d_qoy#17, sum#21] +Aggregate Attributes [1]: [sum#18] +Results [3]: [i_manufact_id#5, d_qoy#16, sum#19] (21) Exchange -Input [3]: [i_manufact_id#5, d_qoy#17, sum#21] -Arguments: hashpartitioning(i_manufact_id#5, d_qoy#17, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [3]: [i_manufact_id#5, d_qoy#16, sum#19] +Arguments: hashpartitioning(i_manufact_id#5, d_qoy#16, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 5] -Input [3]: [i_manufact_id#5, d_qoy#17, sum#21] -Keys [2]: [i_manufact_id#5, d_qoy#17] +Input [3]: [i_manufact_id#5, d_qoy#16, sum#19] +Keys [2]: [i_manufact_id#5, d_qoy#16] Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#23] -Results [3]: [i_manufact_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#23,17,2) AS sum_sales#24, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#23,17,2) AS _w0#25] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#20] +Results [3]: [i_manufact_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS _w0#22] (23) Exchange -Input [3]: [i_manufact_id#5, sum_sales#24, _w0#25] -Arguments: hashpartitioning(i_manufact_id#5, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_manufact_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) Sort [codegen id : 6] -Input [3]: [i_manufact_id#5, sum_sales#24, _w0#25] +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] Arguments: [i_manufact_id#5 ASC NULLS FIRST], false, 0 (25) Window -Input [3]: [i_manufact_id#5, sum_sales#24, _w0#25] -Arguments: [avg(_w0#25) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#27], [i_manufact_id#5] +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#23], [i_manufact_id#5] (26) Filter [codegen id : 7] -Input [4]: [i_manufact_id#5, sum_sales#24, _w0#25, avg_quarterly_sales#27] -Condition : (isnotnull(avg_quarterly_sales#27) AND ((avg_quarterly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] +Condition : (isnotnull(avg_quarterly_sales#23) AND ((avg_quarterly_sales#23 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) (27) Project [codegen id : 7] -Output [3]: [i_manufact_id#5, sum_sales#24, avg_quarterly_sales#27] -Input [4]: [i_manufact_id#5, sum_sales#24, _w0#25, avg_quarterly_sales#27] +Output [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] +Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] (28) TakeOrderedAndProject -Input [3]: [i_manufact_id#5, sum_sales#24, avg_quarterly_sales#27] -Arguments: 100, [avg_quarterly_sales#27 ASC NULLS FIRST, sum_sales#24 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#24, avg_quarterly_sales#27] +Input [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] +Arguments: 100, [avg_quarterly_sales#23 ASC NULLS FIRST, sum_sales#21 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] ===== Subqueries ===== @@ -167,25 +167,25 @@ BroadcastExchange (33) (29) Scan parquet default.date_dim -Output [3]: [d_date_sk#16, d_month_seq#28, d_qoy#17] +Output [3]: [d_date_sk#15, d_month_seq#24, d_qoy#16] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_month_seq, [1212,1213,1214,1215,1216,1217,1218,1219,1220,1221,1222,1223]), GreaterThanOrEqual(d_date_sk,2451911), LessThanOrEqual(d_date_sk,2452275), IsNotNull(d_date_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#16, d_month_seq#28, d_qoy#17] +Input [3]: [d_date_sk#15, d_month_seq#24, d_qoy#16] (31) Filter [codegen id : 1] -Input [3]: [d_date_sk#16, d_month_seq#28, d_qoy#17] -Condition : (((d_month_seq#28 INSET 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223 AND (d_date_sk#16 >= 2451911)) AND (d_date_sk#16 <= 2452275)) AND isnotnull(d_date_sk#16)) +Input [3]: [d_date_sk#15, d_month_seq#24, d_qoy#16] +Condition : (((d_month_seq#24 INSET 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223 AND (d_date_sk#15 >= 2451911)) AND (d_date_sk#15 <= 2452275)) AND isnotnull(d_date_sk#15)) (32) Project [codegen id : 1] -Output [2]: [d_date_sk#16, d_qoy#17] -Input [3]: [d_date_sk#16, d_month_seq#28, d_qoy#17] +Output [2]: [d_date_sk#15, d_qoy#16] +Input [3]: [d_date_sk#15, d_month_seq#24, d_qoy#16] (33) BroadcastExchange -Input [2]: [d_date_sk#16, d_qoy#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] +Input [2]: [d_date_sk#15, d_qoy#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/explain.txt index df4d94f40bc16..1bed4285522f5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/explain.txt @@ -42,88 +42,88 @@ Input [3]: [d_date_sk#1, d_year#2, d_moy#3] (5) BroadcastExchange Input [1]: [d_date_sk#1] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (6) Scan parquet default.store_sales -Output [3]: [ss_item_sk#5, ss_ext_sales_price#6, ss_sold_date_sk#7] +Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#7), (ss_sold_date_sk#7 >= 2452215), (ss_sold_date_sk#7 <= 2452244), dynamicpruningexpression(ss_sold_date_sk#7 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(ss_sold_date_sk#6), (ss_sold_date_sk#6 >= 2452215), (ss_sold_date_sk#6 <= 2452244), dynamicpruningexpression(ss_sold_date_sk#6 IN dynamicpruning#7)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (7) ColumnarToRow -Input [3]: [ss_item_sk#5, ss_ext_sales_price#6, ss_sold_date_sk#7] +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] (8) Filter -Input [3]: [ss_item_sk#5, ss_ext_sales_price#6, ss_sold_date_sk#7] -Condition : isnotnull(ss_item_sk#5) +Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_item_sk#4) (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [d_date_sk#1] -Right keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [ss_sold_date_sk#6] Join condition: None (10) Project [codegen id : 3] -Output [2]: [ss_item_sk#5, ss_ext_sales_price#6] -Input [4]: [d_date_sk#1, ss_item_sk#5, ss_ext_sales_price#6, ss_sold_date_sk#7] +Output [2]: [ss_item_sk#4, ss_ext_sales_price#5] +Input [4]: [d_date_sk#1, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] (11) Scan parquet default.item -Output [4]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manager_id#12] +Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,48), IsNotNull(i_item_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manager_id#12] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] (13) Filter [codegen id : 2] -Input [4]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manager_id#12] -Condition : ((isnotnull(i_manager_id#12) AND (i_manager_id#12 = 48)) AND isnotnull(i_item_sk#9)) +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 48)) AND isnotnull(i_item_sk#8)) (14) Project [codegen id : 2] -Output [3]: [i_item_sk#9, i_brand_id#10, i_brand#11] -Input [4]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manager_id#12] +Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] (15) BroadcastExchange -Input [3]: [i_item_sk#9, i_brand_id#10, i_brand#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] +Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_item_sk#5] -Right keys [1]: [i_item_sk#9] +Left keys [1]: [ss_item_sk#4] +Right keys [1]: [i_item_sk#8] Join condition: None (17) Project [codegen id : 3] -Output [3]: [ss_ext_sales_price#6, i_brand_id#10, i_brand#11] -Input [5]: [ss_item_sk#5, ss_ext_sales_price#6, i_item_sk#9, i_brand_id#10, i_brand#11] +Output [3]: [ss_ext_sales_price#5, i_brand_id#9, i_brand#10] +Input [5]: [ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#8, i_brand_id#9, i_brand#10] (18) HashAggregate [codegen id : 3] -Input [3]: [ss_ext_sales_price#6, i_brand_id#10, i_brand#11] -Keys [2]: [i_brand#11, i_brand_id#10] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum#14] -Results [3]: [i_brand#11, i_brand_id#10, sum#15] +Input [3]: [ss_ext_sales_price#5, i_brand_id#9, i_brand#10] +Keys [2]: [i_brand#10, i_brand_id#9] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum#12] +Results [3]: [i_brand#10, i_brand_id#9, sum#13] (19) Exchange -Input [3]: [i_brand#11, i_brand_id#10, sum#15] -Arguments: hashpartitioning(i_brand#11, i_brand_id#10, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [3]: [i_brand#10, i_brand_id#9, sum#13] +Arguments: hashpartitioning(i_brand#10, i_brand_id#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 4] -Input [3]: [i_brand#11, i_brand_id#10, sum#15] -Keys [2]: [i_brand#11, i_brand_id#10] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#6))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#6))#17] -Results [3]: [i_brand_id#10 AS brand_id#18, i_brand#11 AS brand#19, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#17,17,2) AS ext_price#20] +Input [3]: [i_brand#10, i_brand_id#9, sum#13] +Keys [2]: [i_brand#10, i_brand_id#9] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#14] +Results [3]: [i_brand_id#9 AS brand_id#15, i_brand#10 AS brand#16, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#14,17,2) AS ext_price#17] (21) TakeOrderedAndProject -Input [3]: [brand_id#18, brand#19, ext_price#20] -Arguments: 100, [ext_price#20 DESC NULLS LAST, brand_id#18 ASC NULLS FIRST], [brand_id#18, brand#19, ext_price#20] +Input [3]: [brand_id#15, brand#16, ext_price#17] +Arguments: 100, [ext_price#17 DESC NULLS LAST, brand_id#15 ASC NULLS FIRST], [brand_id#15, brand#16, ext_price#17] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8 +Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7 ReusedExchange (22) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/explain.txt index a10fc3256db33..c9a41a6596900 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/explain.txt @@ -57,7 +57,7 @@ Condition : isnotnull(ss_item_sk#4) (8) BroadcastExchange Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [d_date_sk#1] @@ -69,55 +69,55 @@ Output [2]: [ss_item_sk#4, ss_ext_sales_price#5] Input [4]: [d_date_sk#1, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] (11) Scan parquet default.item -Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,48), IsNotNull(i_item_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] (13) Filter [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] -Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 48)) AND isnotnull(i_item_sk#8)) +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 48)) AND isnotnull(i_item_sk#7)) (14) Project [codegen id : 2] -Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] (15) BroadcastExchange -Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#4] -Right keys [1]: [i_item_sk#8] +Right keys [1]: [i_item_sk#7] Join condition: None (17) Project [codegen id : 3] -Output [3]: [ss_ext_sales_price#5, i_brand_id#9, i_brand#10] -Input [5]: [ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#8, i_brand_id#9, i_brand#10] +Output [3]: [ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Input [5]: [ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] (18) HashAggregate [codegen id : 3] -Input [3]: [ss_ext_sales_price#5, i_brand_id#9, i_brand#10] -Keys [2]: [i_brand#10, i_brand_id#9] +Input [3]: [ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [2]: [i_brand#9, i_brand_id#8] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum#13] -Results [3]: [i_brand#10, i_brand_id#9, sum#14] +Aggregate Attributes [1]: [sum#11] +Results [3]: [i_brand#9, i_brand_id#8, sum#12] (19) Exchange -Input [3]: [i_brand#10, i_brand_id#9, sum#14] -Arguments: hashpartitioning(i_brand#10, i_brand_id#9, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [3]: [i_brand#9, i_brand_id#8, sum#12] +Arguments: hashpartitioning(i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 4] -Input [3]: [i_brand#10, i_brand_id#9, sum#14] -Keys [2]: [i_brand#10, i_brand_id#9] +Input [3]: [i_brand#9, i_brand_id#8, sum#12] +Keys [2]: [i_brand#9, i_brand_id#8] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#16] -Results [3]: [i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#16,17,2) AS ext_price#19] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [3]: [i_brand_id#8 AS brand_id#14, i_brand#9 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS ext_price#16] (21) TakeOrderedAndProject -Input [3]: [brand_id#17, brand#18, ext_price#19] -Arguments: 100, [ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [brand_id#17, brand#18, ext_price#19] +Input [3]: [brand_id#14, brand#15, ext_price#16] +Arguments: 100, [ext_price#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [brand_id#14, brand#15, ext_price#16] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt index f260becf18e26..c2e49a39aa6bc 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt @@ -83,7 +83,7 @@ Condition : (isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) (7) BroadcastExchange Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 2] Left keys [1]: [ss_sold_date_sk#3] @@ -98,77 +98,77 @@ Input [6]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_w Input [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6] Keys [2]: [d_week_seq#5, ss_store_sk#1] Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] -Aggregate Attributes [7]: [sum#8, sum#9, sum#10, sum#11, sum#12, sum#13, sum#14] -Results [9]: [d_week_seq#5, ss_store_sk#1, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Aggregate Attributes [7]: [sum#7, sum#8, sum#9, sum#10, sum#11, sum#12, sum#13] +Results [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] (11) Exchange -Input [9]: [d_week_seq#5, ss_store_sk#1, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] -Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) HashAggregate [codegen id : 10] -Input [9]: [d_week_seq#5, ss_store_sk#1, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] Keys [2]: [d_week_seq#5, ss_store_sk#1] Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29] -Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#25,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29,17,2) AS sat_sales#36] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27] +Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23,17,2) AS tue_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34] (13) Scan parquet default.store -Output [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Output [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] (15) Filter [codegen id : 3] -Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] -Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38)) +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] +Condition : (isnotnull(s_store_sk#35) AND isnotnull(s_store_id#36)) (16) BroadcastExchange -Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40] +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (17) BroadcastHashJoin [codegen id : 10] Left keys [1]: [ss_store_sk#1] -Right keys [1]: [s_store_sk#37] +Right keys [1]: [s_store_sk#35] Join condition: None (18) Project [codegen id : 10] -Output [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39] -Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38, s_store_name#39] +Output [10]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37] +Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#35, s_store_id#36, s_store_name#37] (19) Scan parquet default.date_dim -Output [2]: [d_month_seq#41, d_week_seq#42] +Output [2]: [d_month_seq#38, d_week_seq#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1185), LessThanOrEqual(d_month_seq,1196), IsNotNull(d_week_seq)] ReadSchema: struct (20) ColumnarToRow [codegen id : 4] -Input [2]: [d_month_seq#41, d_week_seq#42] +Input [2]: [d_month_seq#38, d_week_seq#39] (21) Filter [codegen id : 4] -Input [2]: [d_month_seq#41, d_week_seq#42] -Condition : (((isnotnull(d_month_seq#41) AND (d_month_seq#41 >= 1185)) AND (d_month_seq#41 <= 1196)) AND isnotnull(d_week_seq#42)) +Input [2]: [d_month_seq#38, d_week_seq#39] +Condition : (((isnotnull(d_month_seq#38) AND (d_month_seq#38 >= 1185)) AND (d_month_seq#38 <= 1196)) AND isnotnull(d_week_seq#39)) (22) Project [codegen id : 4] -Output [1]: [d_week_seq#42] -Input [2]: [d_month_seq#41, d_week_seq#42] +Output [1]: [d_week_seq#39] +Input [2]: [d_month_seq#38, d_week_seq#39] (23) BroadcastExchange -Input [1]: [d_week_seq#42] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#43] +Input [1]: [d_week_seq#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (24) BroadcastHashJoin [codegen id : 10] Left keys [1]: [d_week_seq#5] -Right keys [1]: [d_week_seq#42] +Right keys [1]: [d_week_seq#39] Join condition: None (25) Project [codegen id : 10] -Output [10]: [s_store_name#39 AS s_store_name1#44, d_week_seq#5 AS d_week_seq1#45, s_store_id#38 AS s_store_id1#46, sun_sales#30 AS sun_sales1#47, mon_sales#31 AS mon_sales1#48, tue_sales#32 AS tue_sales1#49, wed_sales#33 AS wed_sales1#50, thu_sales#34 AS thu_sales1#51, fri_sales#35 AS fri_sales1#52, sat_sales#36 AS sat_sales1#53] -Input [11]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39, d_week_seq#42] +Output [10]: [s_store_name#37 AS s_store_name1#40, d_week_seq#5 AS d_week_seq1#41, s_store_id#36 AS s_store_id1#42, sun_sales#28 AS sun_sales1#43, mon_sales#29 AS mon_sales1#44, tue_sales#30 AS tue_sales1#45, wed_sales#31 AS wed_sales1#46, thu_sales#32 AS thu_sales1#47, fri_sales#33 AS fri_sales1#48, sat_sales#34 AS sat_sales1#49] +Input [11]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37, d_week_seq#39] (26) Scan parquet default.store_sales Output [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] @@ -201,92 +201,92 @@ Input [6]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_w Input [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6] Keys [2]: [d_week_seq#5, ss_store_sk#1] Functions [6]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] -Aggregate Attributes [6]: [sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] -Results [8]: [d_week_seq#5, ss_store_sk#1, sum#60, sum#61, sum#62, sum#63, sum#64, sum#65] +Aggregate Attributes [6]: [sum#50, sum#51, sum#52, sum#53, sum#54, sum#55] +Results [8]: [d_week_seq#5, ss_store_sk#1, sum#56, sum#57, sum#58, sum#59, sum#60, sum#61] (33) Exchange -Input [8]: [d_week_seq#5, ss_store_sk#1, sum#60, sum#61, sum#62, sum#63, sum#64, sum#65] -Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [id=#66] +Input [8]: [d_week_seq#5, ss_store_sk#1, sum#56, sum#57, sum#58, sum#59, sum#60, sum#61] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=5] (34) HashAggregate [codegen id : 9] -Input [8]: [d_week_seq#5, ss_store_sk#1, sum#60, sum#61, sum#62, sum#63, sum#64, sum#65] +Input [8]: [d_week_seq#5, ss_store_sk#1, sum#56, sum#57, sum#58, sum#59, sum#60, sum#61] Keys [2]: [d_week_seq#5, ss_store_sk#1] Functions [6]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] -Aggregate Attributes [6]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29] -Results [8]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29,17,2) AS sat_sales#36] +Aggregate Attributes [6]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27] +Results [8]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34] (35) Scan parquet default.store -Output [2]: [s_store_sk#67, s_store_id#68] +Output [2]: [s_store_sk#62, s_store_id#63] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] ReadSchema: struct (36) ColumnarToRow [codegen id : 7] -Input [2]: [s_store_sk#67, s_store_id#68] +Input [2]: [s_store_sk#62, s_store_id#63] (37) Filter [codegen id : 7] -Input [2]: [s_store_sk#67, s_store_id#68] -Condition : (isnotnull(s_store_sk#67) AND isnotnull(s_store_id#68)) +Input [2]: [s_store_sk#62, s_store_id#63] +Condition : (isnotnull(s_store_sk#62) AND isnotnull(s_store_id#63)) (38) BroadcastExchange -Input [2]: [s_store_sk#67, s_store_id#68] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#69] +Input [2]: [s_store_sk#62, s_store_id#63] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] (39) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_store_sk#1] -Right keys [1]: [s_store_sk#67] +Right keys [1]: [s_store_sk#62] Join condition: None (40) Project [codegen id : 9] -Output [8]: [d_week_seq#5, sun_sales#30, mon_sales#31, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#68] -Input [10]: [d_week_seq#5, ss_store_sk#1, sun_sales#30, mon_sales#31, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#67, s_store_id#68] +Output [8]: [d_week_seq#5, sun_sales#28, mon_sales#29, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#63] +Input [10]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#62, s_store_id#63] (41) Scan parquet default.date_dim -Output [2]: [d_month_seq#70, d_week_seq#71] +Output [2]: [d_month_seq#64, d_week_seq#65] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1197), LessThanOrEqual(d_month_seq,1208), IsNotNull(d_week_seq)] ReadSchema: struct (42) ColumnarToRow [codegen id : 8] -Input [2]: [d_month_seq#70, d_week_seq#71] +Input [2]: [d_month_seq#64, d_week_seq#65] (43) Filter [codegen id : 8] -Input [2]: [d_month_seq#70, d_week_seq#71] -Condition : (((isnotnull(d_month_seq#70) AND (d_month_seq#70 >= 1197)) AND (d_month_seq#70 <= 1208)) AND isnotnull(d_week_seq#71)) +Input [2]: [d_month_seq#64, d_week_seq#65] +Condition : (((isnotnull(d_month_seq#64) AND (d_month_seq#64 >= 1197)) AND (d_month_seq#64 <= 1208)) AND isnotnull(d_week_seq#65)) (44) Project [codegen id : 8] -Output [1]: [d_week_seq#71] -Input [2]: [d_month_seq#70, d_week_seq#71] +Output [1]: [d_week_seq#65] +Input [2]: [d_month_seq#64, d_week_seq#65] (45) BroadcastExchange -Input [1]: [d_week_seq#71] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#72] +Input [1]: [d_week_seq#65] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] (46) BroadcastHashJoin [codegen id : 9] Left keys [1]: [d_week_seq#5] -Right keys [1]: [d_week_seq#71] +Right keys [1]: [d_week_seq#65] Join condition: None (47) Project [codegen id : 9] -Output [8]: [d_week_seq#5 AS d_week_seq2#73, s_store_id#68 AS s_store_id2#74, sun_sales#30 AS sun_sales2#75, mon_sales#31 AS mon_sales2#76, wed_sales#33 AS wed_sales2#77, thu_sales#34 AS thu_sales2#78, fri_sales#35 AS fri_sales2#79, sat_sales#36 AS sat_sales2#80] -Input [9]: [d_week_seq#5, sun_sales#30, mon_sales#31, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#68, d_week_seq#71] +Output [8]: [d_week_seq#5 AS d_week_seq2#66, s_store_id#63 AS s_store_id2#67, sun_sales#28 AS sun_sales2#68, mon_sales#29 AS mon_sales2#69, wed_sales#31 AS wed_sales2#70, thu_sales#32 AS thu_sales2#71, fri_sales#33 AS fri_sales2#72, sat_sales#34 AS sat_sales2#73] +Input [9]: [d_week_seq#5, sun_sales#28, mon_sales#29, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#63, d_week_seq#65] (48) BroadcastExchange -Input [8]: [d_week_seq2#73, s_store_id2#74, sun_sales2#75, mon_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [id=#81] +Input [8]: [d_week_seq2#66, s_store_id2#67, sun_sales2#68, mon_sales2#69, wed_sales2#70, thu_sales2#71, fri_sales2#72, sat_sales2#73] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [plan_id=8] (49) BroadcastHashJoin [codegen id : 10] -Left keys [2]: [s_store_id1#46, d_week_seq1#45] -Right keys [2]: [s_store_id2#74, (d_week_seq2#73 - 52)] +Left keys [2]: [s_store_id1#42, d_week_seq1#41] +Right keys [2]: [s_store_id2#67, (d_week_seq2#66 - 52)] Join condition: None (50) Project [codegen id : 10] -Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#75)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#82, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#76)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#83, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales1#49)), DecimalType(37,20)) AS (tue_sales1 / tue_sales1)#84, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#77)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#85, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#78)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#86, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#79)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#87, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#80)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#88] -Input [18]: [s_store_name1#44, d_week_seq1#45, s_store_id1#46, sun_sales1#47, mon_sales1#48, tue_sales1#49, wed_sales1#50, thu_sales1#51, fri_sales1#52, sat_sales1#53, d_week_seq2#73, s_store_id2#74, sun_sales2#75, mon_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80] +Output [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, CheckOverflow((promote_precision(sun_sales1#43) / promote_precision(sun_sales2#68)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#74, CheckOverflow((promote_precision(mon_sales1#44) / promote_precision(mon_sales2#69)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#75, CheckOverflow((promote_precision(tue_sales1#45) / promote_precision(tue_sales1#45)), DecimalType(37,20)) AS (tue_sales1 / tue_sales1)#76, CheckOverflow((promote_precision(wed_sales1#46) / promote_precision(wed_sales2#70)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#77, CheckOverflow((promote_precision(thu_sales1#47) / promote_precision(thu_sales2#71)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#78, CheckOverflow((promote_precision(fri_sales1#48) / promote_precision(fri_sales2#72)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#79, CheckOverflow((promote_precision(sat_sales1#49) / promote_precision(sat_sales2#73)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#80] +Input [18]: [s_store_name1#40, d_week_seq1#41, s_store_id1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#66, s_store_id2#67, sun_sales2#68, mon_sales2#69, wed_sales2#70, thu_sales2#71, fri_sales2#72, sat_sales2#73] (51) TakeOrderedAndProject -Input [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#82, (mon_sales1 / mon_sales2)#83, (tue_sales1 / tue_sales1)#84, (wed_sales1 / wed_sales2)#85, (thu_sales1 / thu_sales2)#86, (fri_sales1 / fri_sales2)#87, (sat_sales1 / sat_sales2)#88] -Arguments: 100, [s_store_name1#44 ASC NULLS FIRST, s_store_id1#46 ASC NULLS FIRST, d_week_seq1#45 ASC NULLS FIRST], [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#82, (mon_sales1 / mon_sales2)#83, (tue_sales1 / tue_sales1)#84, (wed_sales1 / wed_sales2)#85, (thu_sales1 / thu_sales2)#86, (fri_sales1 / fri_sales2)#87, (sat_sales1 / sat_sales2)#88] +Input [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#74, (mon_sales1 / mon_sales2)#75, (tue_sales1 / tue_sales1)#76, (wed_sales1 / wed_sales2)#77, (thu_sales1 / thu_sales2)#78, (fri_sales1 / fri_sales2)#79, (sat_sales1 / sat_sales2)#80] +Arguments: 100, [s_store_name1#40 ASC NULLS FIRST, s_store_id1#42 ASC NULLS FIRST, d_week_seq1#41 ASC NULLS FIRST], [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#74, (mon_sales1 / mon_sales2)#75, (tue_sales1 / tue_sales1)#76, (wed_sales1 / wed_sales2)#77, (thu_sales1 / thu_sales2)#78, (fri_sales1 / fri_sales2)#79, (sat_sales1 / sat_sales2)#80] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/explain.txt index f260becf18e26..c2e49a39aa6bc 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/explain.txt @@ -83,7 +83,7 @@ Condition : (isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) (7) BroadcastExchange Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 2] Left keys [1]: [ss_sold_date_sk#3] @@ -98,77 +98,77 @@ Input [6]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_w Input [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6] Keys [2]: [d_week_seq#5, ss_store_sk#1] Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] -Aggregate Attributes [7]: [sum#8, sum#9, sum#10, sum#11, sum#12, sum#13, sum#14] -Results [9]: [d_week_seq#5, ss_store_sk#1, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Aggregate Attributes [7]: [sum#7, sum#8, sum#9, sum#10, sum#11, sum#12, sum#13] +Results [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] (11) Exchange -Input [9]: [d_week_seq#5, ss_store_sk#1, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] -Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) HashAggregate [codegen id : 10] -Input [9]: [d_week_seq#5, ss_store_sk#1, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] Keys [2]: [d_week_seq#5, ss_store_sk#1] Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29] -Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#25,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29,17,2) AS sat_sales#36] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27] +Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23,17,2) AS tue_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34] (13) Scan parquet default.store -Output [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Output [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] (15) Filter [codegen id : 3] -Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] -Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38)) +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] +Condition : (isnotnull(s_store_sk#35) AND isnotnull(s_store_id#36)) (16) BroadcastExchange -Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40] +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (17) BroadcastHashJoin [codegen id : 10] Left keys [1]: [ss_store_sk#1] -Right keys [1]: [s_store_sk#37] +Right keys [1]: [s_store_sk#35] Join condition: None (18) Project [codegen id : 10] -Output [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39] -Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38, s_store_name#39] +Output [10]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37] +Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#35, s_store_id#36, s_store_name#37] (19) Scan parquet default.date_dim -Output [2]: [d_month_seq#41, d_week_seq#42] +Output [2]: [d_month_seq#38, d_week_seq#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1185), LessThanOrEqual(d_month_seq,1196), IsNotNull(d_week_seq)] ReadSchema: struct (20) ColumnarToRow [codegen id : 4] -Input [2]: [d_month_seq#41, d_week_seq#42] +Input [2]: [d_month_seq#38, d_week_seq#39] (21) Filter [codegen id : 4] -Input [2]: [d_month_seq#41, d_week_seq#42] -Condition : (((isnotnull(d_month_seq#41) AND (d_month_seq#41 >= 1185)) AND (d_month_seq#41 <= 1196)) AND isnotnull(d_week_seq#42)) +Input [2]: [d_month_seq#38, d_week_seq#39] +Condition : (((isnotnull(d_month_seq#38) AND (d_month_seq#38 >= 1185)) AND (d_month_seq#38 <= 1196)) AND isnotnull(d_week_seq#39)) (22) Project [codegen id : 4] -Output [1]: [d_week_seq#42] -Input [2]: [d_month_seq#41, d_week_seq#42] +Output [1]: [d_week_seq#39] +Input [2]: [d_month_seq#38, d_week_seq#39] (23) BroadcastExchange -Input [1]: [d_week_seq#42] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#43] +Input [1]: [d_week_seq#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (24) BroadcastHashJoin [codegen id : 10] Left keys [1]: [d_week_seq#5] -Right keys [1]: [d_week_seq#42] +Right keys [1]: [d_week_seq#39] Join condition: None (25) Project [codegen id : 10] -Output [10]: [s_store_name#39 AS s_store_name1#44, d_week_seq#5 AS d_week_seq1#45, s_store_id#38 AS s_store_id1#46, sun_sales#30 AS sun_sales1#47, mon_sales#31 AS mon_sales1#48, tue_sales#32 AS tue_sales1#49, wed_sales#33 AS wed_sales1#50, thu_sales#34 AS thu_sales1#51, fri_sales#35 AS fri_sales1#52, sat_sales#36 AS sat_sales1#53] -Input [11]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39, d_week_seq#42] +Output [10]: [s_store_name#37 AS s_store_name1#40, d_week_seq#5 AS d_week_seq1#41, s_store_id#36 AS s_store_id1#42, sun_sales#28 AS sun_sales1#43, mon_sales#29 AS mon_sales1#44, tue_sales#30 AS tue_sales1#45, wed_sales#31 AS wed_sales1#46, thu_sales#32 AS thu_sales1#47, fri_sales#33 AS fri_sales1#48, sat_sales#34 AS sat_sales1#49] +Input [11]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37, d_week_seq#39] (26) Scan parquet default.store_sales Output [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3] @@ -201,92 +201,92 @@ Input [6]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_w Input [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6] Keys [2]: [d_week_seq#5, ss_store_sk#1] Functions [6]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] -Aggregate Attributes [6]: [sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] -Results [8]: [d_week_seq#5, ss_store_sk#1, sum#60, sum#61, sum#62, sum#63, sum#64, sum#65] +Aggregate Attributes [6]: [sum#50, sum#51, sum#52, sum#53, sum#54, sum#55] +Results [8]: [d_week_seq#5, ss_store_sk#1, sum#56, sum#57, sum#58, sum#59, sum#60, sum#61] (33) Exchange -Input [8]: [d_week_seq#5, ss_store_sk#1, sum#60, sum#61, sum#62, sum#63, sum#64, sum#65] -Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [id=#66] +Input [8]: [d_week_seq#5, ss_store_sk#1, sum#56, sum#57, sum#58, sum#59, sum#60, sum#61] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=5] (34) HashAggregate [codegen id : 9] -Input [8]: [d_week_seq#5, ss_store_sk#1, sum#60, sum#61, sum#62, sum#63, sum#64, sum#65] +Input [8]: [d_week_seq#5, ss_store_sk#1, sum#56, sum#57, sum#58, sum#59, sum#60, sum#61] Keys [2]: [d_week_seq#5, ss_store_sk#1] Functions [6]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] -Aggregate Attributes [6]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29] -Results [8]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29,17,2) AS sat_sales#36] +Aggregate Attributes [6]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27] +Results [8]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34] (35) Scan parquet default.store -Output [2]: [s_store_sk#67, s_store_id#68] +Output [2]: [s_store_sk#62, s_store_id#63] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] ReadSchema: struct (36) ColumnarToRow [codegen id : 7] -Input [2]: [s_store_sk#67, s_store_id#68] +Input [2]: [s_store_sk#62, s_store_id#63] (37) Filter [codegen id : 7] -Input [2]: [s_store_sk#67, s_store_id#68] -Condition : (isnotnull(s_store_sk#67) AND isnotnull(s_store_id#68)) +Input [2]: [s_store_sk#62, s_store_id#63] +Condition : (isnotnull(s_store_sk#62) AND isnotnull(s_store_id#63)) (38) BroadcastExchange -Input [2]: [s_store_sk#67, s_store_id#68] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#69] +Input [2]: [s_store_sk#62, s_store_id#63] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] (39) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_store_sk#1] -Right keys [1]: [s_store_sk#67] +Right keys [1]: [s_store_sk#62] Join condition: None (40) Project [codegen id : 9] -Output [8]: [d_week_seq#5, sun_sales#30, mon_sales#31, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#68] -Input [10]: [d_week_seq#5, ss_store_sk#1, sun_sales#30, mon_sales#31, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#67, s_store_id#68] +Output [8]: [d_week_seq#5, sun_sales#28, mon_sales#29, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#63] +Input [10]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#62, s_store_id#63] (41) Scan parquet default.date_dim -Output [2]: [d_month_seq#70, d_week_seq#71] +Output [2]: [d_month_seq#64, d_week_seq#65] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1197), LessThanOrEqual(d_month_seq,1208), IsNotNull(d_week_seq)] ReadSchema: struct (42) ColumnarToRow [codegen id : 8] -Input [2]: [d_month_seq#70, d_week_seq#71] +Input [2]: [d_month_seq#64, d_week_seq#65] (43) Filter [codegen id : 8] -Input [2]: [d_month_seq#70, d_week_seq#71] -Condition : (((isnotnull(d_month_seq#70) AND (d_month_seq#70 >= 1197)) AND (d_month_seq#70 <= 1208)) AND isnotnull(d_week_seq#71)) +Input [2]: [d_month_seq#64, d_week_seq#65] +Condition : (((isnotnull(d_month_seq#64) AND (d_month_seq#64 >= 1197)) AND (d_month_seq#64 <= 1208)) AND isnotnull(d_week_seq#65)) (44) Project [codegen id : 8] -Output [1]: [d_week_seq#71] -Input [2]: [d_month_seq#70, d_week_seq#71] +Output [1]: [d_week_seq#65] +Input [2]: [d_month_seq#64, d_week_seq#65] (45) BroadcastExchange -Input [1]: [d_week_seq#71] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#72] +Input [1]: [d_week_seq#65] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] (46) BroadcastHashJoin [codegen id : 9] Left keys [1]: [d_week_seq#5] -Right keys [1]: [d_week_seq#71] +Right keys [1]: [d_week_seq#65] Join condition: None (47) Project [codegen id : 9] -Output [8]: [d_week_seq#5 AS d_week_seq2#73, s_store_id#68 AS s_store_id2#74, sun_sales#30 AS sun_sales2#75, mon_sales#31 AS mon_sales2#76, wed_sales#33 AS wed_sales2#77, thu_sales#34 AS thu_sales2#78, fri_sales#35 AS fri_sales2#79, sat_sales#36 AS sat_sales2#80] -Input [9]: [d_week_seq#5, sun_sales#30, mon_sales#31, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#68, d_week_seq#71] +Output [8]: [d_week_seq#5 AS d_week_seq2#66, s_store_id#63 AS s_store_id2#67, sun_sales#28 AS sun_sales2#68, mon_sales#29 AS mon_sales2#69, wed_sales#31 AS wed_sales2#70, thu_sales#32 AS thu_sales2#71, fri_sales#33 AS fri_sales2#72, sat_sales#34 AS sat_sales2#73] +Input [9]: [d_week_seq#5, sun_sales#28, mon_sales#29, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#63, d_week_seq#65] (48) BroadcastExchange -Input [8]: [d_week_seq2#73, s_store_id2#74, sun_sales2#75, mon_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [id=#81] +Input [8]: [d_week_seq2#66, s_store_id2#67, sun_sales2#68, mon_sales2#69, wed_sales2#70, thu_sales2#71, fri_sales2#72, sat_sales2#73] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [plan_id=8] (49) BroadcastHashJoin [codegen id : 10] -Left keys [2]: [s_store_id1#46, d_week_seq1#45] -Right keys [2]: [s_store_id2#74, (d_week_seq2#73 - 52)] +Left keys [2]: [s_store_id1#42, d_week_seq1#41] +Right keys [2]: [s_store_id2#67, (d_week_seq2#66 - 52)] Join condition: None (50) Project [codegen id : 10] -Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#75)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#82, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#76)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#83, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales1#49)), DecimalType(37,20)) AS (tue_sales1 / tue_sales1)#84, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#77)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#85, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#78)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#86, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#79)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#87, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#80)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#88] -Input [18]: [s_store_name1#44, d_week_seq1#45, s_store_id1#46, sun_sales1#47, mon_sales1#48, tue_sales1#49, wed_sales1#50, thu_sales1#51, fri_sales1#52, sat_sales1#53, d_week_seq2#73, s_store_id2#74, sun_sales2#75, mon_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80] +Output [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, CheckOverflow((promote_precision(sun_sales1#43) / promote_precision(sun_sales2#68)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#74, CheckOverflow((promote_precision(mon_sales1#44) / promote_precision(mon_sales2#69)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#75, CheckOverflow((promote_precision(tue_sales1#45) / promote_precision(tue_sales1#45)), DecimalType(37,20)) AS (tue_sales1 / tue_sales1)#76, CheckOverflow((promote_precision(wed_sales1#46) / promote_precision(wed_sales2#70)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#77, CheckOverflow((promote_precision(thu_sales1#47) / promote_precision(thu_sales2#71)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#78, CheckOverflow((promote_precision(fri_sales1#48) / promote_precision(fri_sales2#72)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#79, CheckOverflow((promote_precision(sat_sales1#49) / promote_precision(sat_sales2#73)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#80] +Input [18]: [s_store_name1#40, d_week_seq1#41, s_store_id1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#66, s_store_id2#67, sun_sales2#68, mon_sales2#69, wed_sales2#70, thu_sales2#71, fri_sales2#72, sat_sales2#73] (51) TakeOrderedAndProject -Input [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#82, (mon_sales1 / mon_sales2)#83, (tue_sales1 / tue_sales1)#84, (wed_sales1 / wed_sales2)#85, (thu_sales1 / thu_sales2)#86, (fri_sales1 / fri_sales2)#87, (sat_sales1 / sat_sales2)#88] -Arguments: 100, [s_store_name1#44 ASC NULLS FIRST, s_store_id1#46 ASC NULLS FIRST, d_week_seq1#45 ASC NULLS FIRST], [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#82, (mon_sales1 / mon_sales2)#83, (tue_sales1 / tue_sales1)#84, (wed_sales1 / wed_sales2)#85, (thu_sales1 / thu_sales2)#86, (fri_sales1 / fri_sales2)#87, (sat_sales1 / sat_sales2)#88] +Input [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#74, (mon_sales1 / mon_sales2)#75, (tue_sales1 / tue_sales1)#76, (wed_sales1 / wed_sales2)#77, (thu_sales1 / thu_sales2)#78, (fri_sales1 / fri_sales2)#79, (sat_sales1 / sat_sales2)#80] +Arguments: 100, [s_store_name1#40 ASC NULLS FIRST, s_store_id1#42 ASC NULLS FIRST, d_week_seq1#41 ASC NULLS FIRST], [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#74, (mon_sales1 / mon_sales2)#75, (tue_sales1 / tue_sales1)#76, (wed_sales1 / wed_sales2)#77, (thu_sales1 / thu_sales2)#78, (fri_sales1 / fri_sales2)#79, (sat_sales1 / sat_sales2)#80] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt index 698d6f41f8871..8a225052b4ff7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt @@ -49,116 +49,116 @@ Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] (5) BroadcastExchange Input [2]: [i_item_sk#1, i_manager_id#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (6) Scan parquet default.store_sales -Output [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] +Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#14), (ss_sold_date_sk#14 >= 2452123), (ss_sold_date_sk#14 <= 2452487), dynamicpruningexpression(ss_sold_date_sk#14 IN dynamicpruning#15)] +PartitionFilters: [isnotnull(ss_sold_date_sk#13), (ss_sold_date_sk#13 >= 2452123), (ss_sold_date_sk#13 <= 2452487), dynamicpruningexpression(ss_sold_date_sk#13 IN dynamicpruning#14)] PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (7) ColumnarToRow -Input [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] (8) Filter -Input [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] -Condition : (isnotnull(ss_item_sk#11) AND isnotnull(ss_store_sk#12)) +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) (9) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] -Right keys [1]: [ss_item_sk#11] +Right keys [1]: [ss_item_sk#10] Join condition: None (10) Project [codegen id : 4] -Output [4]: [i_manager_id#5, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] -Input [6]: [i_item_sk#1, i_manager_id#5, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] +Output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Input [6]: [i_item_sk#1, i_manager_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] (11) Scan parquet default.store -Output [1]: [s_store_sk#16] +Output [1]: [s_store_sk#15] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [1]: [s_store_sk#16] +Input [1]: [s_store_sk#15] (13) Filter [codegen id : 2] -Input [1]: [s_store_sk#16] -Condition : isnotnull(s_store_sk#16) +Input [1]: [s_store_sk#15] +Condition : isnotnull(s_store_sk#15) (14) BroadcastExchange -Input [1]: [s_store_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] +Input [1]: [s_store_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#12] -Right keys [1]: [s_store_sk#16] +Left keys [1]: [ss_store_sk#11] +Right keys [1]: [s_store_sk#15] Join condition: None (16) Project [codegen id : 4] -Output [3]: [i_manager_id#5, ss_sales_price#13, ss_sold_date_sk#14] -Input [5]: [i_manager_id#5, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14, s_store_sk#16] +Output [3]: [i_manager_id#5, ss_sales_price#12, ss_sold_date_sk#13] +Input [5]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, s_store_sk#15] (17) ReusedExchange [Reuses operator id: 33] -Output [2]: [d_date_sk#18, d_moy#19] +Output [2]: [d_date_sk#16, d_moy#17] (18) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#14] -Right keys [1]: [d_date_sk#18] +Left keys [1]: [ss_sold_date_sk#13] +Right keys [1]: [d_date_sk#16] Join condition: None (19) Project [codegen id : 4] -Output [3]: [i_manager_id#5, ss_sales_price#13, d_moy#19] -Input [5]: [i_manager_id#5, ss_sales_price#13, ss_sold_date_sk#14, d_date_sk#18, d_moy#19] +Output [3]: [i_manager_id#5, ss_sales_price#12, d_moy#17] +Input [5]: [i_manager_id#5, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#16, d_moy#17] (20) HashAggregate [codegen id : 4] -Input [3]: [i_manager_id#5, ss_sales_price#13, d_moy#19] -Keys [2]: [i_manager_id#5, d_moy#19] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#13))] -Aggregate Attributes [1]: [sum#20] -Results [3]: [i_manager_id#5, d_moy#19, sum#21] +Input [3]: [i_manager_id#5, ss_sales_price#12, d_moy#17] +Keys [2]: [i_manager_id#5, d_moy#17] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [i_manager_id#5, d_moy#17, sum#19] (21) Exchange -Input [3]: [i_manager_id#5, d_moy#19, sum#21] -Arguments: hashpartitioning(i_manager_id#5, d_moy#19, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [3]: [i_manager_id#5, d_moy#17, sum#19] +Arguments: hashpartitioning(i_manager_id#5, d_moy#17, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 5] -Input [3]: [i_manager_id#5, d_moy#19, sum#21] -Keys [2]: [i_manager_id#5, d_moy#19] -Functions [1]: [sum(UnscaledValue(ss_sales_price#13))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#13))#23] -Results [3]: [i_manager_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#23,17,2) AS sum_sales#24, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#23,17,2) AS _w0#25] +Input [3]: [i_manager_id#5, d_moy#17, sum#19] +Keys [2]: [i_manager_id#5, d_moy#17] +Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#20] +Results [3]: [i_manager_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS _w0#22] (23) Exchange -Input [3]: [i_manager_id#5, sum_sales#24, _w0#25] -Arguments: hashpartitioning(i_manager_id#5, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_manager_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) Sort [codegen id : 6] -Input [3]: [i_manager_id#5, sum_sales#24, _w0#25] +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] Arguments: [i_manager_id#5 ASC NULLS FIRST], false, 0 (25) Window -Input [3]: [i_manager_id#5, sum_sales#24, _w0#25] -Arguments: [avg(_w0#25) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#27], [i_manager_id#5] +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#23], [i_manager_id#5] (26) Filter [codegen id : 7] -Input [4]: [i_manager_id#5, sum_sales#24, _w0#25, avg_monthly_sales#27] -Condition : (isnotnull(avg_monthly_sales#27) AND ((avg_monthly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] +Condition : (isnotnull(avg_monthly_sales#23) AND ((avg_monthly_sales#23 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) (27) Project [codegen id : 7] -Output [3]: [i_manager_id#5, sum_sales#24, avg_monthly_sales#27] -Input [4]: [i_manager_id#5, sum_sales#24, _w0#25, avg_monthly_sales#27] +Output [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] +Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] (28) TakeOrderedAndProject -Input [3]: [i_manager_id#5, sum_sales#24, avg_monthly_sales#27] -Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#27 ASC NULLS FIRST, sum_sales#24 ASC NULLS FIRST], [i_manager_id#5, sum_sales#24, avg_monthly_sales#27] +Input [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] +Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#23 ASC NULLS FIRST, sum_sales#21 ASC NULLS FIRST], [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#14 IN dynamicpruning#15 +Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#13 IN dynamicpruning#14 BroadcastExchange (33) +- * Project (32) +- * Filter (31) @@ -167,25 +167,25 @@ BroadcastExchange (33) (29) Scan parquet default.date_dim -Output [3]: [d_date_sk#18, d_month_seq#28, d_moy#19] +Output [3]: [d_date_sk#16, d_month_seq#24, d_moy#17] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_month_seq, [1219,1220,1221,1222,1223,1224,1225,1226,1227,1228,1229,1230]), GreaterThanOrEqual(d_date_sk,2452123), LessThanOrEqual(d_date_sk,2452487), IsNotNull(d_date_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#18, d_month_seq#28, d_moy#19] +Input [3]: [d_date_sk#16, d_month_seq#24, d_moy#17] (31) Filter [codegen id : 1] -Input [3]: [d_date_sk#18, d_month_seq#28, d_moy#19] -Condition : (((d_month_seq#28 INSET 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230 AND (d_date_sk#18 >= 2452123)) AND (d_date_sk#18 <= 2452487)) AND isnotnull(d_date_sk#18)) +Input [3]: [d_date_sk#16, d_month_seq#24, d_moy#17] +Condition : (((d_month_seq#24 INSET 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230 AND (d_date_sk#16 >= 2452123)) AND (d_date_sk#16 <= 2452487)) AND isnotnull(d_date_sk#16)) (32) Project [codegen id : 1] -Output [2]: [d_date_sk#18, d_moy#19] -Input [3]: [d_date_sk#18, d_month_seq#28, d_moy#19] +Output [2]: [d_date_sk#16, d_moy#17] +Input [3]: [d_date_sk#16, d_month_seq#24, d_moy#17] (33) BroadcastExchange -Input [2]: [d_date_sk#18, d_moy#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] +Input [2]: [d_date_sk#16, d_moy#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt index 99146cf1d2829..bdb9612bfa2f4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt @@ -64,7 +64,7 @@ Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) (8) BroadcastExchange Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] @@ -76,85 +76,85 @@ Output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk# Input [6]: [i_item_sk#1, i_manager_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] (11) ReusedExchange [Reuses operator id: 33] -Output [2]: [d_date_sk#16, d_moy#17] +Output [2]: [d_date_sk#15, d_moy#16] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_sold_date_sk#13] -Right keys [1]: [d_date_sk#16] +Right keys [1]: [d_date_sk#15] Join condition: None (13) Project [codegen id : 4] -Output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#17] -Input [6]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#16, d_moy#17] +Output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16] +Input [6]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#15, d_moy#16] (14) Scan parquet default.store -Output [1]: [s_store_sk#18] +Output [1]: [s_store_sk#17] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [1]: [s_store_sk#18] +Input [1]: [s_store_sk#17] (16) Filter [codegen id : 3] -Input [1]: [s_store_sk#18] -Condition : isnotnull(s_store_sk#18) +Input [1]: [s_store_sk#17] +Condition : isnotnull(s_store_sk#17) (17) BroadcastExchange -Input [1]: [s_store_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] +Input [1]: [s_store_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#11] -Right keys [1]: [s_store_sk#18] +Right keys [1]: [s_store_sk#17] Join condition: None (19) Project [codegen id : 4] -Output [3]: [i_manager_id#5, ss_sales_price#12, d_moy#17] -Input [5]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#17, s_store_sk#18] +Output [3]: [i_manager_id#5, ss_sales_price#12, d_moy#16] +Input [5]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16, s_store_sk#17] (20) HashAggregate [codegen id : 4] -Input [3]: [i_manager_id#5, ss_sales_price#12, d_moy#17] -Keys [2]: [i_manager_id#5, d_moy#17] +Input [3]: [i_manager_id#5, ss_sales_price#12, d_moy#16] +Keys [2]: [i_manager_id#5, d_moy#16] Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] -Aggregate Attributes [1]: [sum#20] -Results [3]: [i_manager_id#5, d_moy#17, sum#21] +Aggregate Attributes [1]: [sum#18] +Results [3]: [i_manager_id#5, d_moy#16, sum#19] (21) Exchange -Input [3]: [i_manager_id#5, d_moy#17, sum#21] -Arguments: hashpartitioning(i_manager_id#5, d_moy#17, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [3]: [i_manager_id#5, d_moy#16, sum#19] +Arguments: hashpartitioning(i_manager_id#5, d_moy#16, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 5] -Input [3]: [i_manager_id#5, d_moy#17, sum#21] -Keys [2]: [i_manager_id#5, d_moy#17] +Input [3]: [i_manager_id#5, d_moy#16, sum#19] +Keys [2]: [i_manager_id#5, d_moy#16] Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#23] -Results [3]: [i_manager_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#23,17,2) AS sum_sales#24, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#23,17,2) AS _w0#25] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#20] +Results [3]: [i_manager_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS _w0#22] (23) Exchange -Input [3]: [i_manager_id#5, sum_sales#24, _w0#25] -Arguments: hashpartitioning(i_manager_id#5, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_manager_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) Sort [codegen id : 6] -Input [3]: [i_manager_id#5, sum_sales#24, _w0#25] +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] Arguments: [i_manager_id#5 ASC NULLS FIRST], false, 0 (25) Window -Input [3]: [i_manager_id#5, sum_sales#24, _w0#25] -Arguments: [avg(_w0#25) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#27], [i_manager_id#5] +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#23], [i_manager_id#5] (26) Filter [codegen id : 7] -Input [4]: [i_manager_id#5, sum_sales#24, _w0#25, avg_monthly_sales#27] -Condition : (isnotnull(avg_monthly_sales#27) AND ((avg_monthly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] +Condition : (isnotnull(avg_monthly_sales#23) AND ((avg_monthly_sales#23 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) (27) Project [codegen id : 7] -Output [3]: [i_manager_id#5, sum_sales#24, avg_monthly_sales#27] -Input [4]: [i_manager_id#5, sum_sales#24, _w0#25, avg_monthly_sales#27] +Output [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] +Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] (28) TakeOrderedAndProject -Input [3]: [i_manager_id#5, sum_sales#24, avg_monthly_sales#27] -Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#27 ASC NULLS FIRST, sum_sales#24 ASC NULLS FIRST], [i_manager_id#5, sum_sales#24, avg_monthly_sales#27] +Input [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] +Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#23 ASC NULLS FIRST, sum_sales#21 ASC NULLS FIRST], [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] ===== Subqueries ===== @@ -167,25 +167,25 @@ BroadcastExchange (33) (29) Scan parquet default.date_dim -Output [3]: [d_date_sk#16, d_month_seq#28, d_moy#17] +Output [3]: [d_date_sk#15, d_month_seq#24, d_moy#16] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_month_seq, [1219,1220,1221,1222,1223,1224,1225,1226,1227,1228,1229,1230]), GreaterThanOrEqual(d_date_sk,2452123), LessThanOrEqual(d_date_sk,2452487), IsNotNull(d_date_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#16, d_month_seq#28, d_moy#17] +Input [3]: [d_date_sk#15, d_month_seq#24, d_moy#16] (31) Filter [codegen id : 1] -Input [3]: [d_date_sk#16, d_month_seq#28, d_moy#17] -Condition : (((d_month_seq#28 INSET 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230 AND (d_date_sk#16 >= 2452123)) AND (d_date_sk#16 <= 2452487)) AND isnotnull(d_date_sk#16)) +Input [3]: [d_date_sk#15, d_month_seq#24, d_moy#16] +Condition : (((d_month_seq#24 INSET 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230 AND (d_date_sk#15 >= 2452123)) AND (d_date_sk#15 <= 2452487)) AND isnotnull(d_date_sk#15)) (32) Project [codegen id : 1] -Output [2]: [d_date_sk#16, d_moy#17] -Input [3]: [d_date_sk#16, d_month_seq#28, d_moy#17] +Output [2]: [d_date_sk#15, d_moy#16] +Input [3]: [d_date_sk#15, d_month_seq#24, d_moy#16] (33) BroadcastExchange -Input [2]: [d_date_sk#16, d_moy#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] +Input [2]: [d_date_sk#15, d_moy#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/explain.txt index aabb4fe67f387..1be6b422f019d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/explain.txt @@ -75,152 +75,152 @@ Results [3]: [ss_store_sk#2, ss_item_sk#1, sum#8] (8) Exchange Input [3]: [ss_store_sk#2, ss_item_sk#1, sum#8] -Arguments: hashpartitioning(ss_store_sk#2, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ss_store_sk#2, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (9) HashAggregate [codegen id : 7] Input [3]: [ss_store_sk#2, ss_item_sk#1, sum#8] Keys [2]: [ss_store_sk#2, ss_item_sk#1] Functions [1]: [sum(UnscaledValue(ss_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#3))#10] -Results [3]: [ss_store_sk#2, ss_item_sk#1, MakeDecimal(sum(UnscaledValue(ss_sales_price#3))#10,17,2) AS revenue#11] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#3))#9] +Results [3]: [ss_store_sk#2, ss_item_sk#1, MakeDecimal(sum(UnscaledValue(ss_sales_price#3))#9,17,2) AS revenue#10] (10) Filter [codegen id : 7] -Input [3]: [ss_store_sk#2, ss_item_sk#1, revenue#11] -Condition : isnotnull(revenue#11) +Input [3]: [ss_store_sk#2, ss_item_sk#1, revenue#10] +Condition : isnotnull(revenue#10) (11) Scan parquet default.store_sales -Output [4]: [ss_item_sk#12, ss_store_sk#13, ss_sales_price#14, ss_sold_date_sk#15] +Output [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#15), (ss_sold_date_sk#15 >= 2451911), (ss_sold_date_sk#15 <= 2452275), dynamicpruningexpression(ss_sold_date_sk#15 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ss_sold_date_sk#14), (ss_sold_date_sk#14 >= 2451911), (ss_sold_date_sk#14 <= 2452275), dynamicpruningexpression(ss_sold_date_sk#14 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ss_store_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 4] -Input [4]: [ss_item_sk#12, ss_store_sk#13, ss_sales_price#14, ss_sold_date_sk#15] +Input [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] (13) Filter [codegen id : 4] -Input [4]: [ss_item_sk#12, ss_store_sk#13, ss_sales_price#14, ss_sold_date_sk#15] -Condition : isnotnull(ss_store_sk#13) +Input [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] +Condition : isnotnull(ss_store_sk#12) (14) ReusedExchange [Reuses operator id: 43] -Output [1]: [d_date_sk#16] +Output [1]: [d_date_sk#15] (15) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#15] -Right keys [1]: [d_date_sk#16] +Left keys [1]: [ss_sold_date_sk#14] +Right keys [1]: [d_date_sk#15] Join condition: None (16) Project [codegen id : 4] -Output [3]: [ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] -Input [5]: [ss_item_sk#12, ss_store_sk#13, ss_sales_price#14, ss_sold_date_sk#15, d_date_sk#16] +Output [3]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Input [5]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14, d_date_sk#15] (17) HashAggregate [codegen id : 4] -Input [3]: [ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] -Keys [2]: [ss_store_sk#13, ss_item_sk#12] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#14))] -Aggregate Attributes [1]: [sum#17] -Results [3]: [ss_store_sk#13, ss_item_sk#12, sum#18] +Input [3]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Keys [2]: [ss_store_sk#12, ss_item_sk#11] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#13))] +Aggregate Attributes [1]: [sum#16] +Results [3]: [ss_store_sk#12, ss_item_sk#11, sum#17] (18) Exchange -Input [3]: [ss_store_sk#13, ss_item_sk#12, sum#18] -Arguments: hashpartitioning(ss_store_sk#13, ss_item_sk#12, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [3]: [ss_store_sk#12, ss_item_sk#11, sum#17] +Arguments: hashpartitioning(ss_store_sk#12, ss_item_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=2] (19) HashAggregate [codegen id : 5] -Input [3]: [ss_store_sk#13, ss_item_sk#12, sum#18] -Keys [2]: [ss_store_sk#13, ss_item_sk#12] -Functions [1]: [sum(UnscaledValue(ss_sales_price#14))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#14))#20] -Results [2]: [ss_store_sk#13, MakeDecimal(sum(UnscaledValue(ss_sales_price#14))#20,17,2) AS revenue#21] +Input [3]: [ss_store_sk#12, ss_item_sk#11, sum#17] +Keys [2]: [ss_store_sk#12, ss_item_sk#11] +Functions [1]: [sum(UnscaledValue(ss_sales_price#13))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#13))#18] +Results [2]: [ss_store_sk#12, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#18,17,2) AS revenue#19] (20) HashAggregate [codegen id : 5] -Input [2]: [ss_store_sk#13, revenue#21] -Keys [1]: [ss_store_sk#13] -Functions [1]: [partial_avg(revenue#21)] -Aggregate Attributes [2]: [sum#22, count#23] -Results [3]: [ss_store_sk#13, sum#24, count#25] +Input [2]: [ss_store_sk#12, revenue#19] +Keys [1]: [ss_store_sk#12] +Functions [1]: [partial_avg(revenue#19)] +Aggregate Attributes [2]: [sum#20, count#21] +Results [3]: [ss_store_sk#12, sum#22, count#23] (21) Exchange -Input [3]: [ss_store_sk#13, sum#24, count#25] -Arguments: hashpartitioning(ss_store_sk#13, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [3]: [ss_store_sk#12, sum#22, count#23] +Arguments: hashpartitioning(ss_store_sk#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 6] -Input [3]: [ss_store_sk#13, sum#24, count#25] -Keys [1]: [ss_store_sk#13] -Functions [1]: [avg(revenue#21)] -Aggregate Attributes [1]: [avg(revenue#21)#27] -Results [2]: [ss_store_sk#13, avg(revenue#21)#27 AS ave#28] +Input [3]: [ss_store_sk#12, sum#22, count#23] +Keys [1]: [ss_store_sk#12] +Functions [1]: [avg(revenue#19)] +Aggregate Attributes [1]: [avg(revenue#19)#24] +Results [2]: [ss_store_sk#12, avg(revenue#19)#24 AS ave#25] (23) BroadcastExchange -Input [2]: [ss_store_sk#13, ave#28] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] +Input [2]: [ss_store_sk#12, ave#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (24) BroadcastHashJoin [codegen id : 7] Left keys [1]: [ss_store_sk#2] -Right keys [1]: [ss_store_sk#13] -Join condition: (cast(revenue#11 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#28)), DecimalType(23,7))) +Right keys [1]: [ss_store_sk#12] +Join condition: (cast(revenue#10 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#25)), DecimalType(23,7))) (25) Project [codegen id : 7] -Output [3]: [ss_store_sk#2, ss_item_sk#1, revenue#11] -Input [5]: [ss_store_sk#2, ss_item_sk#1, revenue#11, ss_store_sk#13, ave#28] +Output [3]: [ss_store_sk#2, ss_item_sk#1, revenue#10] +Input [5]: [ss_store_sk#2, ss_item_sk#1, revenue#10, ss_store_sk#12, ave#25] (26) BroadcastExchange -Input [3]: [ss_store_sk#2, ss_item_sk#1, revenue#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] +Input [3]: [ss_store_sk#2, ss_item_sk#1, revenue#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (27) Scan parquet default.store -Output [2]: [s_store_sk#31, s_store_name#32] +Output [2]: [s_store_sk#26, s_store_name#27] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (28) ColumnarToRow -Input [2]: [s_store_sk#31, s_store_name#32] +Input [2]: [s_store_sk#26, s_store_name#27] (29) Filter -Input [2]: [s_store_sk#31, s_store_name#32] -Condition : isnotnull(s_store_sk#31) +Input [2]: [s_store_sk#26, s_store_name#27] +Condition : isnotnull(s_store_sk#26) (30) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_store_sk#2] -Right keys [1]: [s_store_sk#31] +Right keys [1]: [s_store_sk#26] Join condition: None (31) Project [codegen id : 8] -Output [3]: [ss_item_sk#1, revenue#11, s_store_name#32] -Input [5]: [ss_store_sk#2, ss_item_sk#1, revenue#11, s_store_sk#31, s_store_name#32] +Output [3]: [ss_item_sk#1, revenue#10, s_store_name#27] +Input [5]: [ss_store_sk#2, ss_item_sk#1, revenue#10, s_store_sk#26, s_store_name#27] (32) BroadcastExchange -Input [3]: [ss_item_sk#1, revenue#11, s_store_name#32] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] +Input [3]: [ss_item_sk#1, revenue#10, s_store_name#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] (33) Scan parquet default.item -Output [5]: [i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Output [5]: [i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (34) ColumnarToRow -Input [5]: [i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Input [5]: [i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32] (35) Filter -Input [5]: [i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] -Condition : isnotnull(i_item_sk#34) +Input [5]: [i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32] +Condition : isnotnull(i_item_sk#28) (36) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#34] +Right keys [1]: [i_item_sk#28] Join condition: None (37) Project [codegen id : 9] -Output [6]: [s_store_name#32, i_item_desc#35, revenue#11, i_current_price#36, i_wholesale_cost#37, i_brand#38] -Input [8]: [ss_item_sk#1, revenue#11, s_store_name#32, i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Output [6]: [s_store_name#27, i_item_desc#29, revenue#10, i_current_price#30, i_wholesale_cost#31, i_brand#32] +Input [8]: [ss_item_sk#1, revenue#10, s_store_name#27, i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32] (38) TakeOrderedAndProject -Input [6]: [s_store_name#32, i_item_desc#35, revenue#11, i_current_price#36, i_wholesale_cost#37, i_brand#38] -Arguments: 100, [s_store_name#32 ASC NULLS FIRST, i_item_desc#35 ASC NULLS FIRST], [s_store_name#32, i_item_desc#35, revenue#11, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Input [6]: [s_store_name#27, i_item_desc#29, revenue#10, i_current_price#30, i_wholesale_cost#31, i_brand#32] +Arguments: 100, [s_store_name#27 ASC NULLS FIRST, i_item_desc#29 ASC NULLS FIRST], [s_store_name#27, i_item_desc#29, revenue#10, i_current_price#30, i_wholesale_cost#31, i_brand#32] ===== Subqueries ===== @@ -233,27 +233,27 @@ BroadcastExchange (43) (39) Scan parquet default.date_dim -Output [2]: [d_date_sk#6, d_month_seq#39] +Output [2]: [d_date_sk#6, d_month_seq#33] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), GreaterThanOrEqual(d_date_sk,2451911), LessThanOrEqual(d_date_sk,2452275), IsNotNull(d_date_sk)] ReadSchema: struct (40) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#6, d_month_seq#39] +Input [2]: [d_date_sk#6, d_month_seq#33] (41) Filter [codegen id : 1] -Input [2]: [d_date_sk#6, d_month_seq#39] -Condition : (((((isnotnull(d_month_seq#39) AND (d_month_seq#39 >= 1212)) AND (d_month_seq#39 <= 1223)) AND (d_date_sk#6 >= 2451911)) AND (d_date_sk#6 <= 2452275)) AND isnotnull(d_date_sk#6)) +Input [2]: [d_date_sk#6, d_month_seq#33] +Condition : (((((isnotnull(d_month_seq#33) AND (d_month_seq#33 >= 1212)) AND (d_month_seq#33 <= 1223)) AND (d_date_sk#6 >= 2451911)) AND (d_date_sk#6 <= 2452275)) AND isnotnull(d_date_sk#6)) (42) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [2]: [d_date_sk#6, d_month_seq#39] +Input [2]: [d_date_sk#6, d_month_seq#33] (43) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] -Subquery:2 Hosting operator id = 11 Hosting Expression = ss_sold_date_sk#15 IN dynamicpruning#5 +Subquery:2 Hosting operator id = 11 Hosting Expression = ss_sold_date_sk#14 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/explain.txt index 019f4fa4c7076..f6dfdf7342ff9 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/explain.txt @@ -89,22 +89,22 @@ Results [3]: [ss_store_sk#4, ss_item_sk#3, sum#10] (11) Exchange Input [3]: [ss_store_sk#4, ss_item_sk#3, sum#10] -Arguments: hashpartitioning(ss_store_sk#4, ss_item_sk#3, 5), ENSURE_REQUIREMENTS, [id=#11] +Arguments: hashpartitioning(ss_store_sk#4, ss_item_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=1] (12) HashAggregate [codegen id : 3] Input [3]: [ss_store_sk#4, ss_item_sk#3, sum#10] Keys [2]: [ss_store_sk#4, ss_item_sk#3] Functions [1]: [sum(UnscaledValue(ss_sales_price#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#5))#12] -Results [3]: [ss_store_sk#4, ss_item_sk#3, MakeDecimal(sum(UnscaledValue(ss_sales_price#5))#12,17,2) AS revenue#13] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#5))#11] +Results [3]: [ss_store_sk#4, ss_item_sk#3, MakeDecimal(sum(UnscaledValue(ss_sales_price#5))#11,17,2) AS revenue#12] (13) Filter [codegen id : 3] -Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#13] -Condition : isnotnull(revenue#13) +Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#12] +Condition : isnotnull(revenue#12) (14) BroadcastExchange -Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 9] Left keys [1]: [s_store_sk#1] @@ -112,115 +112,115 @@ Right keys [1]: [ss_store_sk#4] Join condition: None (16) Project [codegen id : 9] -Output [4]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#13] -Input [5]: [s_store_sk#1, s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#13] +Output [4]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12] +Input [5]: [s_store_sk#1, s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12] (17) Scan parquet default.item -Output [5]: [i_item_sk#15, i_item_desc#16, i_current_price#17, i_wholesale_cost#18, i_brand#19] +Output [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (18) ColumnarToRow [codegen id : 4] -Input [5]: [i_item_sk#15, i_item_desc#16, i_current_price#17, i_wholesale_cost#18, i_brand#19] +Input [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] (19) Filter [codegen id : 4] -Input [5]: [i_item_sk#15, i_item_desc#16, i_current_price#17, i_wholesale_cost#18, i_brand#19] -Condition : isnotnull(i_item_sk#15) +Input [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Condition : isnotnull(i_item_sk#13) (20) BroadcastExchange -Input [5]: [i_item_sk#15, i_item_desc#16, i_current_price#17, i_wholesale_cost#18, i_brand#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] +Input [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (21) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_item_sk#3] -Right keys [1]: [i_item_sk#15] +Right keys [1]: [i_item_sk#13] Join condition: None (22) Project [codegen id : 9] -Output [7]: [s_store_name#2, ss_store_sk#4, revenue#13, i_item_desc#16, i_current_price#17, i_wholesale_cost#18, i_brand#19] -Input [9]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#13, i_item_sk#15, i_item_desc#16, i_current_price#17, i_wholesale_cost#18, i_brand#19] +Output [7]: [s_store_name#2, ss_store_sk#4, revenue#12, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Input [9]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12, i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] (23) Scan parquet default.store_sales -Output [4]: [ss_item_sk#21, ss_store_sk#22, ss_sales_price#23, ss_sold_date_sk#24] +Output [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#24), (ss_sold_date_sk#24 >= 2451911), (ss_sold_date_sk#24 <= 2452275), dynamicpruningexpression(ss_sold_date_sk#24 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ss_sold_date_sk#21), (ss_sold_date_sk#21 >= 2451911), (ss_sold_date_sk#21 <= 2452275), dynamicpruningexpression(ss_sold_date_sk#21 IN dynamicpruning#7)] PushedFilters: [IsNotNull(ss_store_sk)] ReadSchema: struct (24) ColumnarToRow [codegen id : 6] -Input [4]: [ss_item_sk#21, ss_store_sk#22, ss_sales_price#23, ss_sold_date_sk#24] +Input [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21] (25) Filter [codegen id : 6] -Input [4]: [ss_item_sk#21, ss_store_sk#22, ss_sales_price#23, ss_sold_date_sk#24] -Condition : isnotnull(ss_store_sk#22) +Input [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21] +Condition : isnotnull(ss_store_sk#19) (26) ReusedExchange [Reuses operator id: 43] -Output [1]: [d_date_sk#25] +Output [1]: [d_date_sk#22] (27) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_sold_date_sk#24] -Right keys [1]: [d_date_sk#25] +Left keys [1]: [ss_sold_date_sk#21] +Right keys [1]: [d_date_sk#22] Join condition: None (28) Project [codegen id : 6] -Output [3]: [ss_item_sk#21, ss_store_sk#22, ss_sales_price#23] -Input [5]: [ss_item_sk#21, ss_store_sk#22, ss_sales_price#23, ss_sold_date_sk#24, d_date_sk#25] +Output [3]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20] +Input [5]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21, d_date_sk#22] (29) HashAggregate [codegen id : 6] -Input [3]: [ss_item_sk#21, ss_store_sk#22, ss_sales_price#23] -Keys [2]: [ss_store_sk#22, ss_item_sk#21] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#23))] -Aggregate Attributes [1]: [sum#26] -Results [3]: [ss_store_sk#22, ss_item_sk#21, sum#27] +Input [3]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20] +Keys [2]: [ss_store_sk#19, ss_item_sk#18] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#20))] +Aggregate Attributes [1]: [sum#23] +Results [3]: [ss_store_sk#19, ss_item_sk#18, sum#24] (30) Exchange -Input [3]: [ss_store_sk#22, ss_item_sk#21, sum#27] -Arguments: hashpartitioning(ss_store_sk#22, ss_item_sk#21, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [3]: [ss_store_sk#19, ss_item_sk#18, sum#24] +Arguments: hashpartitioning(ss_store_sk#19, ss_item_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=4] (31) HashAggregate [codegen id : 7] -Input [3]: [ss_store_sk#22, ss_item_sk#21, sum#27] -Keys [2]: [ss_store_sk#22, ss_item_sk#21] -Functions [1]: [sum(UnscaledValue(ss_sales_price#23))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#23))#29] -Results [2]: [ss_store_sk#22, MakeDecimal(sum(UnscaledValue(ss_sales_price#23))#29,17,2) AS revenue#30] +Input [3]: [ss_store_sk#19, ss_item_sk#18, sum#24] +Keys [2]: [ss_store_sk#19, ss_item_sk#18] +Functions [1]: [sum(UnscaledValue(ss_sales_price#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#20))#25] +Results [2]: [ss_store_sk#19, MakeDecimal(sum(UnscaledValue(ss_sales_price#20))#25,17,2) AS revenue#26] (32) HashAggregate [codegen id : 7] -Input [2]: [ss_store_sk#22, revenue#30] -Keys [1]: [ss_store_sk#22] -Functions [1]: [partial_avg(revenue#30)] -Aggregate Attributes [2]: [sum#31, count#32] -Results [3]: [ss_store_sk#22, sum#33, count#34] +Input [2]: [ss_store_sk#19, revenue#26] +Keys [1]: [ss_store_sk#19] +Functions [1]: [partial_avg(revenue#26)] +Aggregate Attributes [2]: [sum#27, count#28] +Results [3]: [ss_store_sk#19, sum#29, count#30] (33) Exchange -Input [3]: [ss_store_sk#22, sum#33, count#34] -Arguments: hashpartitioning(ss_store_sk#22, 5), ENSURE_REQUIREMENTS, [id=#35] +Input [3]: [ss_store_sk#19, sum#29, count#30] +Arguments: hashpartitioning(ss_store_sk#19, 5), ENSURE_REQUIREMENTS, [plan_id=5] (34) HashAggregate [codegen id : 8] -Input [3]: [ss_store_sk#22, sum#33, count#34] -Keys [1]: [ss_store_sk#22] -Functions [1]: [avg(revenue#30)] -Aggregate Attributes [1]: [avg(revenue#30)#36] -Results [2]: [ss_store_sk#22, avg(revenue#30)#36 AS ave#37] +Input [3]: [ss_store_sk#19, sum#29, count#30] +Keys [1]: [ss_store_sk#19] +Functions [1]: [avg(revenue#26)] +Aggregate Attributes [1]: [avg(revenue#26)#31] +Results [2]: [ss_store_sk#19, avg(revenue#26)#31 AS ave#32] (35) BroadcastExchange -Input [2]: [ss_store_sk#22, ave#37] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#38] +Input [2]: [ss_store_sk#19, ave#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] (36) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_store_sk#4] -Right keys [1]: [ss_store_sk#22] -Join condition: (cast(revenue#13 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#37)), DecimalType(23,7))) +Right keys [1]: [ss_store_sk#19] +Join condition: (cast(revenue#12 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#32)), DecimalType(23,7))) (37) Project [codegen id : 9] -Output [6]: [s_store_name#2, i_item_desc#16, revenue#13, i_current_price#17, i_wholesale_cost#18, i_brand#19] -Input [9]: [s_store_name#2, ss_store_sk#4, revenue#13, i_item_desc#16, i_current_price#17, i_wholesale_cost#18, i_brand#19, ss_store_sk#22, ave#37] +Output [6]: [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Input [9]: [s_store_name#2, ss_store_sk#4, revenue#12, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17, ss_store_sk#19, ave#32] (38) TakeOrderedAndProject -Input [6]: [s_store_name#2, i_item_desc#16, revenue#13, i_current_price#17, i_wholesale_cost#18, i_brand#19] -Arguments: 100, [s_store_name#2 ASC NULLS FIRST, i_item_desc#16 ASC NULLS FIRST], [s_store_name#2, i_item_desc#16, revenue#13, i_current_price#17, i_wholesale_cost#18, i_brand#19] +Input [6]: [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Arguments: 100, [s_store_name#2 ASC NULLS FIRST, i_item_desc#14 ASC NULLS FIRST], [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17] ===== Subqueries ===== @@ -233,27 +233,27 @@ BroadcastExchange (43) (39) Scan parquet default.date_dim -Output [2]: [d_date_sk#8, d_month_seq#39] +Output [2]: [d_date_sk#8, d_month_seq#33] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), GreaterThanOrEqual(d_date_sk,2451911), LessThanOrEqual(d_date_sk,2452275), IsNotNull(d_date_sk)] ReadSchema: struct (40) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#8, d_month_seq#39] +Input [2]: [d_date_sk#8, d_month_seq#33] (41) Filter [codegen id : 1] -Input [2]: [d_date_sk#8, d_month_seq#39] -Condition : (((((isnotnull(d_month_seq#39) AND (d_month_seq#39 >= 1212)) AND (d_month_seq#39 <= 1223)) AND (d_date_sk#8 >= 2451911)) AND (d_date_sk#8 <= 2452275)) AND isnotnull(d_date_sk#8)) +Input [2]: [d_date_sk#8, d_month_seq#33] +Condition : (((((isnotnull(d_month_seq#33) AND (d_month_seq#33 >= 1212)) AND (d_month_seq#33 <= 1223)) AND (d_date_sk#8 >= 2451911)) AND (d_date_sk#8 <= 2452275)) AND isnotnull(d_date_sk#8)) (42) Project [codegen id : 1] Output [1]: [d_date_sk#8] -Input [2]: [d_date_sk#8, d_month_seq#39] +Input [2]: [d_date_sk#8, d_month_seq#33] (43) BroadcastExchange Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] -Subquery:2 Hosting operator id = 23 Hosting Expression = ss_sold_date_sk#24 IN dynamicpruning#7 +Subquery:2 Hosting operator id = 23 Hosting Expression = ss_sold_date_sk#21 IN dynamicpruning#7 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/explain.txt index 020f7344233b1..48a9282c67f20 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/explain.txt @@ -93,7 +93,7 @@ Input [2]: [s_store_sk#12, s_city#13] (11) BroadcastExchange Input [1]: [s_store_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#4] @@ -105,150 +105,150 @@ Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, s_store_sk#12] (14) Scan parquet default.household_demographics -Output [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [Or(EqualTo(hd_dep_count,5),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] (16) Filter [codegen id : 3] -Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] -Condition : (((hd_dep_count#16 = 5) OR (hd_vehicle_count#17 = 3)) AND isnotnull(hd_demo_sk#15)) +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : (((hd_dep_count#15 = 5) OR (hd_vehicle_count#16 = 3)) AND isnotnull(hd_demo_sk#14)) (17) Project [codegen id : 3] -Output [1]: [hd_demo_sk#15] -Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Output [1]: [hd_demo_sk#14] +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] (18) BroadcastExchange -Input [1]: [hd_demo_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [1]: [hd_demo_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#15] +Right keys [1]: [hd_demo_sk#14] Join condition: None (20) Project [codegen id : 4] Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] -Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, hd_demo_sk#15] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, hd_demo_sk#14] (21) BroadcastExchange Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=3] (22) Scan parquet default.customer_address -Output [2]: [ca_address_sk#20, ca_city#21] +Output [2]: [ca_address_sk#17, ca_city#18] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] ReadSchema: struct (23) ColumnarToRow -Input [2]: [ca_address_sk#20, ca_city#21] +Input [2]: [ca_address_sk#17, ca_city#18] (24) Filter -Input [2]: [ca_address_sk#20, ca_city#21] -Condition : (isnotnull(ca_address_sk#20) AND isnotnull(ca_city#21)) +Input [2]: [ca_address_sk#17, ca_city#18] +Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_city#18)) (25) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_addr_sk#3] -Right keys [1]: [ca_address_sk#20] +Right keys [1]: [ca_address_sk#17] Join condition: None (26) Project [codegen id : 5] -Output [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#21] -Input [8]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_address_sk#20, ca_city#21] +Output [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#18] +Input [8]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_address_sk#17, ca_city#18] (27) HashAggregate [codegen id : 5] -Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#21] -Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#21] +Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#18] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18] Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#6)), partial_sum(UnscaledValue(ss_ext_list_price#7)), partial_sum(UnscaledValue(ss_ext_tax#8))] -Aggregate Attributes [3]: [sum#22, sum#23, sum#24] -Results [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#21, sum#25, sum#26, sum#27] +Aggregate Attributes [3]: [sum#19, sum#20, sum#21] +Results [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#22, sum#23, sum#24] (28) Exchange -Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#21, sum#25, sum#26, sum#27] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#21, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, 5), ENSURE_REQUIREMENTS, [plan_id=4] (29) HashAggregate [codegen id : 6] -Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#21, sum#25, sum#26, sum#27] -Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#21] +Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#22, sum#23, sum#24] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18] Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#6)), sum(UnscaledValue(ss_ext_list_price#7)), sum(UnscaledValue(ss_ext_tax#8))] -Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#6))#29, sum(UnscaledValue(ss_ext_list_price#7))#30, sum(UnscaledValue(ss_ext_tax#8))#31] -Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#21 AS bought_city#32, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#29,17,2) AS extended_price#33, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#7))#30,17,2) AS list_price#34, MakeDecimal(sum(UnscaledValue(ss_ext_tax#8))#31,17,2) AS extended_tax#35] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#6))#25, sum(UnscaledValue(ss_ext_list_price#7))#26, sum(UnscaledValue(ss_ext_tax#8))#27] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#18 AS bought_city#28, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#25,17,2) AS extended_price#29, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#7))#26,17,2) AS list_price#30, MakeDecimal(sum(UnscaledValue(ss_ext_tax#8))#27,17,2) AS extended_tax#31] (30) BroadcastExchange -Input [6]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#32, extended_price#33, list_price#34, extended_tax#35] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#36] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#28, extended_price#29, list_price#30, extended_tax#31] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=5] (31) Scan parquet default.customer -Output [4]: [c_customer_sk#37, c_current_addr_sk#38, c_first_name#39, c_last_name#40] +Output [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (32) ColumnarToRow -Input [4]: [c_customer_sk#37, c_current_addr_sk#38, c_first_name#39, c_last_name#40] +Input [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] (33) Filter -Input [4]: [c_customer_sk#37, c_current_addr_sk#38, c_first_name#39, c_last_name#40] -Condition : (isnotnull(c_customer_sk#37) AND isnotnull(c_current_addr_sk#38)) +Input [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] +Condition : (isnotnull(c_customer_sk#32) AND isnotnull(c_current_addr_sk#33)) (34) BroadcastHashJoin [codegen id : 7] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#37] +Right keys [1]: [c_customer_sk#32] Join condition: None (35) Project [codegen id : 7] -Output [8]: [ss_ticket_number#5, bought_city#32, extended_price#33, list_price#34, extended_tax#35, c_current_addr_sk#38, c_first_name#39, c_last_name#40] -Input [10]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#32, extended_price#33, list_price#34, extended_tax#35, c_customer_sk#37, c_current_addr_sk#38, c_first_name#39, c_last_name#40] +Output [8]: [ss_ticket_number#5, bought_city#28, extended_price#29, list_price#30, extended_tax#31, c_current_addr_sk#33, c_first_name#34, c_last_name#35] +Input [10]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#28, extended_price#29, list_price#30, extended_tax#31, c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] (36) Exchange -Input [8]: [ss_ticket_number#5, bought_city#32, extended_price#33, list_price#34, extended_tax#35, c_current_addr_sk#38, c_first_name#39, c_last_name#40] -Arguments: hashpartitioning(c_current_addr_sk#38, 5), ENSURE_REQUIREMENTS, [id=#41] +Input [8]: [ss_ticket_number#5, bought_city#28, extended_price#29, list_price#30, extended_tax#31, c_current_addr_sk#33, c_first_name#34, c_last_name#35] +Arguments: hashpartitioning(c_current_addr_sk#33, 5), ENSURE_REQUIREMENTS, [plan_id=6] (37) Sort [codegen id : 8] -Input [8]: [ss_ticket_number#5, bought_city#32, extended_price#33, list_price#34, extended_tax#35, c_current_addr_sk#38, c_first_name#39, c_last_name#40] -Arguments: [c_current_addr_sk#38 ASC NULLS FIRST], false, 0 +Input [8]: [ss_ticket_number#5, bought_city#28, extended_price#29, list_price#30, extended_tax#31, c_current_addr_sk#33, c_first_name#34, c_last_name#35] +Arguments: [c_current_addr_sk#33 ASC NULLS FIRST], false, 0 (38) Scan parquet default.customer_address -Output [2]: [ca_address_sk#42, ca_city#43] +Output [2]: [ca_address_sk#36, ca_city#37] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] ReadSchema: struct (39) ColumnarToRow [codegen id : 9] -Input [2]: [ca_address_sk#42, ca_city#43] +Input [2]: [ca_address_sk#36, ca_city#37] (40) Filter [codegen id : 9] -Input [2]: [ca_address_sk#42, ca_city#43] -Condition : (isnotnull(ca_address_sk#42) AND isnotnull(ca_city#43)) +Input [2]: [ca_address_sk#36, ca_city#37] +Condition : (isnotnull(ca_address_sk#36) AND isnotnull(ca_city#37)) (41) Exchange -Input [2]: [ca_address_sk#42, ca_city#43] -Arguments: hashpartitioning(ca_address_sk#42, 5), ENSURE_REQUIREMENTS, [id=#44] +Input [2]: [ca_address_sk#36, ca_city#37] +Arguments: hashpartitioning(ca_address_sk#36, 5), ENSURE_REQUIREMENTS, [plan_id=7] (42) Sort [codegen id : 10] -Input [2]: [ca_address_sk#42, ca_city#43] -Arguments: [ca_address_sk#42 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#36, ca_city#37] +Arguments: [ca_address_sk#36 ASC NULLS FIRST], false, 0 (43) SortMergeJoin [codegen id : 11] -Left keys [1]: [c_current_addr_sk#38] -Right keys [1]: [ca_address_sk#42] -Join condition: NOT (ca_city#43 = bought_city#32) +Left keys [1]: [c_current_addr_sk#33] +Right keys [1]: [ca_address_sk#36] +Join condition: NOT (ca_city#37 = bought_city#28) (44) Project [codegen id : 11] -Output [8]: [c_last_name#40, c_first_name#39, ca_city#43, bought_city#32, ss_ticket_number#5, extended_price#33, extended_tax#35, list_price#34] -Input [10]: [ss_ticket_number#5, bought_city#32, extended_price#33, list_price#34, extended_tax#35, c_current_addr_sk#38, c_first_name#39, c_last_name#40, ca_address_sk#42, ca_city#43] +Output [8]: [c_last_name#35, c_first_name#34, ca_city#37, bought_city#28, ss_ticket_number#5, extended_price#29, extended_tax#31, list_price#30] +Input [10]: [ss_ticket_number#5, bought_city#28, extended_price#29, list_price#30, extended_tax#31, c_current_addr_sk#33, c_first_name#34, c_last_name#35, ca_address_sk#36, ca_city#37] (45) TakeOrderedAndProject -Input [8]: [c_last_name#40, c_first_name#39, ca_city#43, bought_city#32, ss_ticket_number#5, extended_price#33, extended_tax#35, list_price#34] -Arguments: 100, [c_last_name#40 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#40, c_first_name#39, ca_city#43, bought_city#32, ss_ticket_number#5, extended_price#33, extended_tax#35, list_price#34] +Input [8]: [c_last_name#35, c_first_name#34, ca_city#37, bought_city#28, ss_ticket_number#5, extended_price#29, extended_tax#31, list_price#30] +Arguments: 100, [c_last_name#35 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#35, c_first_name#34, ca_city#37, bought_city#28, ss_ticket_number#5, extended_price#29, extended_tax#31, list_price#30] ===== Subqueries ===== @@ -261,25 +261,25 @@ BroadcastExchange (50) (46) Scan parquet default.date_dim -Output [3]: [d_date_sk#11, d_year#45, d_dom#46] +Output [3]: [d_date_sk#11, d_year#38, d_dom#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), In(d_date_sk, [2451180,2451181,2451211,2451212,2451239,2451240,2451270,2451271,2451300,2451301,2451331,2451332,2451361,2451362,2451392,2451393,2451423,2451424,2451453,2451454,2451484,2451485,2451514,2451515,2451545,2451546,2451576,2451577,2451605,2451606,2451636,2451637,2451666,2451667,2451697,2451698,2451727,2451728,2451758,2451759,2451789,2451790,2451819,2451820,2451850,2451851,2451880,2451881,2451911,2451912,2451942,2451943,2451970,2451971,2452001,2452002,2452031,2452032,2452062,2452063,2452092,2452093,2452123,2452124,2452154,2452155,2452184,2452185,2452215,2452216,2452245,2452246]), IsNotNull(d_date_sk)] ReadSchema: struct (47) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#45, d_dom#46] +Input [3]: [d_date_sk#11, d_year#38, d_dom#39] (48) Filter [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#45, d_dom#46] -Condition : (((((isnotnull(d_dom#46) AND (d_dom#46 >= 1)) AND (d_dom#46 <= 2)) AND d_year#45 IN (1999,2000,2001)) AND d_date_sk#11 INSET 2451180, 2451181, 2451211, 2451212, 2451239, 2451240, 2451270, 2451271, 2451300, 2451301, 2451331, 2451332, 2451361, 2451362, 2451392, 2451393, 2451423, 2451424, 2451453, 2451454, 2451484, 2451485, 2451514, 2451515, 2451545, 2451546, 2451576, 2451577, 2451605, 2451606, 2451636, 2451637, 2451666, 2451667, 2451697, 2451698, 2451727, 2451728, 2451758, 2451759, 2451789, 2451790, 2451819, 2451820, 2451850, 2451851, 2451880, 2451881, 2451911, 2451912, 2451942, 2451943, 2451970, 2451971, 2452001, 2452002, 2452031, 2452032, 2452062, 2452063, 2452092, 2452093, 2452123, 2452124, 2452154, 2452155, 2452184, 2452185, 2452215, 2452216, 2452245, 2452246) AND isnotnull(d_date_sk#11)) +Input [3]: [d_date_sk#11, d_year#38, d_dom#39] +Condition : (((((isnotnull(d_dom#39) AND (d_dom#39 >= 1)) AND (d_dom#39 <= 2)) AND d_year#38 IN (1999,2000,2001)) AND d_date_sk#11 INSET 2451180, 2451181, 2451211, 2451212, 2451239, 2451240, 2451270, 2451271, 2451300, 2451301, 2451331, 2451332, 2451361, 2451362, 2451392, 2451393, 2451423, 2451424, 2451453, 2451454, 2451484, 2451485, 2451514, 2451515, 2451545, 2451546, 2451576, 2451577, 2451605, 2451606, 2451636, 2451637, 2451666, 2451667, 2451697, 2451698, 2451727, 2451728, 2451758, 2451759, 2451789, 2451790, 2451819, 2451820, 2451850, 2451851, 2451880, 2451881, 2451911, 2451912, 2451942, 2451943, 2451970, 2451971, 2452001, 2452002, 2452031, 2452032, 2452062, 2452063, 2452092, 2452093, 2452123, 2452124, 2452154, 2452155, 2452184, 2452185, 2452215, 2452216, 2452245, 2452246) AND isnotnull(d_date_sk#11)) (49) Project [codegen id : 1] Output [1]: [d_date_sk#11] -Input [3]: [d_date_sk#11, d_year#45, d_dom#46] +Input [3]: [d_date_sk#11, d_year#38, d_dom#39] (50) BroadcastExchange Input [1]: [d_date_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#47] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/explain.txt index 63f007cf1205e..d0cf7f029917d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/explain.txt @@ -87,7 +87,7 @@ Input [2]: [s_store_sk#12, s_city#13] (11) BroadcastExchange Input [1]: [s_store_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_store_sk#4] @@ -99,123 +99,123 @@ Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, s_store_sk#12] (14) Scan parquet default.household_demographics -Output [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [Or(EqualTo(hd_dep_count,5),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] (16) Filter [codegen id : 3] -Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] -Condition : (((hd_dep_count#16 = 5) OR (hd_vehicle_count#17 = 3)) AND isnotnull(hd_demo_sk#15)) +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : (((hd_dep_count#15 = 5) OR (hd_vehicle_count#16 = 3)) AND isnotnull(hd_demo_sk#14)) (17) Project [codegen id : 3] -Output [1]: [hd_demo_sk#15] -Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Output [1]: [hd_demo_sk#14] +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] (18) BroadcastExchange -Input [1]: [hd_demo_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [1]: [hd_demo_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#15] +Right keys [1]: [hd_demo_sk#14] Join condition: None (20) Project [codegen id : 5] Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] -Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, hd_demo_sk#15] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, hd_demo_sk#14] (21) Scan parquet default.customer_address -Output [2]: [ca_address_sk#19, ca_city#20] +Output [2]: [ca_address_sk#17, ca_city#18] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] ReadSchema: struct (22) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#19, ca_city#20] +Input [2]: [ca_address_sk#17, ca_city#18] (23) Filter [codegen id : 4] -Input [2]: [ca_address_sk#19, ca_city#20] -Condition : (isnotnull(ca_address_sk#19) AND isnotnull(ca_city#20)) +Input [2]: [ca_address_sk#17, ca_city#18] +Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_city#18)) (24) BroadcastExchange -Input [2]: [ca_address_sk#19, ca_city#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] +Input [2]: [ca_address_sk#17, ca_city#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (25) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_addr_sk#3] -Right keys [1]: [ca_address_sk#19] +Right keys [1]: [ca_address_sk#17] Join condition: None (26) Project [codegen id : 5] -Output [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#20] -Input [8]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_address_sk#19, ca_city#20] +Output [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#18] +Input [8]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_address_sk#17, ca_city#18] (27) HashAggregate [codegen id : 5] -Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#20] -Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#20] +Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#18] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18] Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#6)), partial_sum(UnscaledValue(ss_ext_list_price#7)), partial_sum(UnscaledValue(ss_ext_tax#8))] -Aggregate Attributes [3]: [sum#22, sum#23, sum#24] -Results [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#20, sum#25, sum#26, sum#27] +Aggregate Attributes [3]: [sum#19, sum#20, sum#21] +Results [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#22, sum#23, sum#24] (28) Exchange -Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#20, sum#25, sum#26, sum#27] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#20, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, 5), ENSURE_REQUIREMENTS, [plan_id=4] (29) HashAggregate [codegen id : 8] -Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#20, sum#25, sum#26, sum#27] -Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#20] +Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#22, sum#23, sum#24] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18] Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#6)), sum(UnscaledValue(ss_ext_list_price#7)), sum(UnscaledValue(ss_ext_tax#8))] -Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#6))#29, sum(UnscaledValue(ss_ext_list_price#7))#30, sum(UnscaledValue(ss_ext_tax#8))#31] -Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#20 AS bought_city#32, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#29,17,2) AS extended_price#33, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#7))#30,17,2) AS list_price#34, MakeDecimal(sum(UnscaledValue(ss_ext_tax#8))#31,17,2) AS extended_tax#35] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#6))#25, sum(UnscaledValue(ss_ext_list_price#7))#26, sum(UnscaledValue(ss_ext_tax#8))#27] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#18 AS bought_city#28, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#25,17,2) AS extended_price#29, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#7))#26,17,2) AS list_price#30, MakeDecimal(sum(UnscaledValue(ss_ext_tax#8))#27,17,2) AS extended_tax#31] (30) Scan parquet default.customer -Output [4]: [c_customer_sk#36, c_current_addr_sk#37, c_first_name#38, c_last_name#39] +Output [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 6] -Input [4]: [c_customer_sk#36, c_current_addr_sk#37, c_first_name#38, c_last_name#39] +Input [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] (32) Filter [codegen id : 6] -Input [4]: [c_customer_sk#36, c_current_addr_sk#37, c_first_name#38, c_last_name#39] -Condition : (isnotnull(c_customer_sk#36) AND isnotnull(c_current_addr_sk#37)) +Input [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] +Condition : (isnotnull(c_customer_sk#32) AND isnotnull(c_current_addr_sk#33)) (33) BroadcastExchange -Input [4]: [c_customer_sk#36, c_current_addr_sk#37, c_first_name#38, c_last_name#39] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40] +Input [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (34) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#36] +Right keys [1]: [c_customer_sk#32] Join condition: None (35) Project [codegen id : 8] -Output [8]: [ss_ticket_number#5, bought_city#32, extended_price#33, list_price#34, extended_tax#35, c_current_addr_sk#37, c_first_name#38, c_last_name#39] -Input [10]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#32, extended_price#33, list_price#34, extended_tax#35, c_customer_sk#36, c_current_addr_sk#37, c_first_name#38, c_last_name#39] +Output [8]: [ss_ticket_number#5, bought_city#28, extended_price#29, list_price#30, extended_tax#31, c_current_addr_sk#33, c_first_name#34, c_last_name#35] +Input [10]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#28, extended_price#29, list_price#30, extended_tax#31, c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] (36) ReusedExchange [Reuses operator id: 24] -Output [2]: [ca_address_sk#41, ca_city#42] +Output [2]: [ca_address_sk#36, ca_city#37] (37) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [c_current_addr_sk#37] -Right keys [1]: [ca_address_sk#41] -Join condition: NOT (ca_city#42 = bought_city#32) +Left keys [1]: [c_current_addr_sk#33] +Right keys [1]: [ca_address_sk#36] +Join condition: NOT (ca_city#37 = bought_city#28) (38) Project [codegen id : 8] -Output [8]: [c_last_name#39, c_first_name#38, ca_city#42, bought_city#32, ss_ticket_number#5, extended_price#33, extended_tax#35, list_price#34] -Input [10]: [ss_ticket_number#5, bought_city#32, extended_price#33, list_price#34, extended_tax#35, c_current_addr_sk#37, c_first_name#38, c_last_name#39, ca_address_sk#41, ca_city#42] +Output [8]: [c_last_name#35, c_first_name#34, ca_city#37, bought_city#28, ss_ticket_number#5, extended_price#29, extended_tax#31, list_price#30] +Input [10]: [ss_ticket_number#5, bought_city#28, extended_price#29, list_price#30, extended_tax#31, c_current_addr_sk#33, c_first_name#34, c_last_name#35, ca_address_sk#36, ca_city#37] (39) TakeOrderedAndProject -Input [8]: [c_last_name#39, c_first_name#38, ca_city#42, bought_city#32, ss_ticket_number#5, extended_price#33, extended_tax#35, list_price#34] -Arguments: 100, [c_last_name#39 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#39, c_first_name#38, ca_city#42, bought_city#32, ss_ticket_number#5, extended_price#33, extended_tax#35, list_price#34] +Input [8]: [c_last_name#35, c_first_name#34, ca_city#37, bought_city#28, ss_ticket_number#5, extended_price#29, extended_tax#31, list_price#30] +Arguments: 100, [c_last_name#35 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#35, c_first_name#34, ca_city#37, bought_city#28, ss_ticket_number#5, extended_price#29, extended_tax#31, list_price#30] ===== Subqueries ===== @@ -228,25 +228,25 @@ BroadcastExchange (44) (40) Scan parquet default.date_dim -Output [3]: [d_date_sk#11, d_year#43, d_dom#44] +Output [3]: [d_date_sk#11, d_year#38, d_dom#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), In(d_date_sk, [2451180,2451181,2451211,2451212,2451239,2451240,2451270,2451271,2451300,2451301,2451331,2451332,2451361,2451362,2451392,2451393,2451423,2451424,2451453,2451454,2451484,2451485,2451514,2451515,2451545,2451546,2451576,2451577,2451605,2451606,2451636,2451637,2451666,2451667,2451697,2451698,2451727,2451728,2451758,2451759,2451789,2451790,2451819,2451820,2451850,2451851,2451880,2451881,2451911,2451912,2451942,2451943,2451970,2451971,2452001,2452002,2452031,2452032,2452062,2452063,2452092,2452093,2452123,2452124,2452154,2452155,2452184,2452185,2452215,2452216,2452245,2452246]), IsNotNull(d_date_sk)] ReadSchema: struct (41) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#43, d_dom#44] +Input [3]: [d_date_sk#11, d_year#38, d_dom#39] (42) Filter [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#43, d_dom#44] -Condition : (((((isnotnull(d_dom#44) AND (d_dom#44 >= 1)) AND (d_dom#44 <= 2)) AND d_year#43 IN (1999,2000,2001)) AND d_date_sk#11 INSET 2451180, 2451181, 2451211, 2451212, 2451239, 2451240, 2451270, 2451271, 2451300, 2451301, 2451331, 2451332, 2451361, 2451362, 2451392, 2451393, 2451423, 2451424, 2451453, 2451454, 2451484, 2451485, 2451514, 2451515, 2451545, 2451546, 2451576, 2451577, 2451605, 2451606, 2451636, 2451637, 2451666, 2451667, 2451697, 2451698, 2451727, 2451728, 2451758, 2451759, 2451789, 2451790, 2451819, 2451820, 2451850, 2451851, 2451880, 2451881, 2451911, 2451912, 2451942, 2451943, 2451970, 2451971, 2452001, 2452002, 2452031, 2452032, 2452062, 2452063, 2452092, 2452093, 2452123, 2452124, 2452154, 2452155, 2452184, 2452185, 2452215, 2452216, 2452245, 2452246) AND isnotnull(d_date_sk#11)) +Input [3]: [d_date_sk#11, d_year#38, d_dom#39] +Condition : (((((isnotnull(d_dom#39) AND (d_dom#39 >= 1)) AND (d_dom#39 <= 2)) AND d_year#38 IN (1999,2000,2001)) AND d_date_sk#11 INSET 2451180, 2451181, 2451211, 2451212, 2451239, 2451240, 2451270, 2451271, 2451300, 2451301, 2451331, 2451332, 2451361, 2451362, 2451392, 2451393, 2451423, 2451424, 2451453, 2451454, 2451484, 2451485, 2451514, 2451515, 2451545, 2451546, 2451576, 2451577, 2451605, 2451606, 2451636, 2451637, 2451666, 2451667, 2451697, 2451698, 2451727, 2451728, 2451758, 2451759, 2451789, 2451790, 2451819, 2451820, 2451850, 2451851, 2451880, 2451881, 2451911, 2451912, 2451942, 2451943, 2451970, 2451971, 2452001, 2452002, 2452031, 2452032, 2452062, 2452063, 2452092, 2452093, 2452123, 2452124, 2452154, 2452155, 2452184, 2452185, 2452215, 2452216, 2452245, 2452246) AND isnotnull(d_date_sk#11)) (43) Project [codegen id : 1] Output [1]: [d_date_sk#11] -Input [3]: [d_date_sk#11, d_year#43, d_dom#44] +Input [3]: [d_date_sk#11, d_year#38, d_dom#39] (44) BroadcastExchange Input [1]: [d_date_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#45] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt index adbc64c43ff2f..7e79f01f2ed3c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt @@ -78,7 +78,7 @@ Input [3]: [p_promo_sk#11, p_channel_email#12, p_channel_event#13] (11) BroadcastExchange Input [1]: [p_promo_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_promo_sk#3] @@ -90,84 +90,84 @@ Output [6]: [ss_item_sk#1, ss_cdemo_sk#2, ss_quantity#4, ss_list_price#5, ss_sal Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, p_promo_sk#11] (14) Scan parquet default.customer_demographics -Output [4]: [cd_demo_sk#15, cd_gender#16, cd_marital_status#17, cd_education_status#18] +Output [4]: [cd_demo_sk#14, cd_gender#15, cd_marital_status#16, cd_education_status#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_gender), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), EqualTo(cd_gender,F), EqualTo(cd_marital_status,W), EqualTo(cd_education_status,Primary ), IsNotNull(cd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [4]: [cd_demo_sk#15, cd_gender#16, cd_marital_status#17, cd_education_status#18] +Input [4]: [cd_demo_sk#14, cd_gender#15, cd_marital_status#16, cd_education_status#17] (16) Filter [codegen id : 3] -Input [4]: [cd_demo_sk#15, cd_gender#16, cd_marital_status#17, cd_education_status#18] -Condition : ((((((isnotnull(cd_gender#16) AND isnotnull(cd_marital_status#17)) AND isnotnull(cd_education_status#18)) AND (cd_gender#16 = F)) AND (cd_marital_status#17 = W)) AND (cd_education_status#18 = Primary )) AND isnotnull(cd_demo_sk#15)) +Input [4]: [cd_demo_sk#14, cd_gender#15, cd_marital_status#16, cd_education_status#17] +Condition : ((((((isnotnull(cd_gender#15) AND isnotnull(cd_marital_status#16)) AND isnotnull(cd_education_status#17)) AND (cd_gender#15 = F)) AND (cd_marital_status#16 = W)) AND (cd_education_status#17 = Primary )) AND isnotnull(cd_demo_sk#14)) (17) Project [codegen id : 3] -Output [1]: [cd_demo_sk#15] -Input [4]: [cd_demo_sk#15, cd_gender#16, cd_marital_status#17, cd_education_status#18] +Output [1]: [cd_demo_sk#14] +Input [4]: [cd_demo_sk#14, cd_gender#15, cd_marital_status#16, cd_education_status#17] (18) BroadcastExchange -Input [1]: [cd_demo_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] +Input [1]: [cd_demo_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#15] +Right keys [1]: [cd_demo_sk#14] Join condition: None (20) Project [codegen id : 5] Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [7]: [ss_item_sk#1, ss_cdemo_sk#2, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, cd_demo_sk#15] +Input [7]: [ss_item_sk#1, ss_cdemo_sk#2, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, cd_demo_sk#14] (21) Scan parquet default.item -Output [2]: [i_item_sk#20, i_item_id#21] +Output [2]: [i_item_sk#18, i_item_id#19] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (22) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#20, i_item_id#21] +Input [2]: [i_item_sk#18, i_item_id#19] (23) Filter [codegen id : 4] -Input [2]: [i_item_sk#20, i_item_id#21] -Condition : isnotnull(i_item_sk#20) +Input [2]: [i_item_sk#18, i_item_id#19] +Condition : isnotnull(i_item_sk#18) (24) BroadcastExchange -Input [2]: [i_item_sk#20, i_item_id#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] +Input [2]: [i_item_sk#18, i_item_id#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (25) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#20] +Right keys [1]: [i_item_sk#18] Join condition: None (26) Project [codegen id : 5] -Output [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#21] -Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#20, i_item_id#21] +Output [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19] +Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#18, i_item_id#19] (27) HashAggregate [codegen id : 5] -Input [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#21] -Keys [1]: [i_item_id#21] +Input [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19] +Keys [1]: [i_item_id#19] Functions [4]: [partial_avg(ss_quantity#4), partial_avg(UnscaledValue(ss_list_price#5)), partial_avg(UnscaledValue(ss_coupon_amt#7)), partial_avg(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [8]: [sum#23, count#24, sum#25, count#26, sum#27, count#28, sum#29, count#30] -Results [9]: [i_item_id#21, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Aggregate Attributes [8]: [sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Results [9]: [i_item_id#19, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] (28) Exchange -Input [9]: [i_item_id#21, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] -Arguments: hashpartitioning(i_item_id#21, 5), ENSURE_REQUIREMENTS, [id=#39] +Input [9]: [i_item_id#19, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Arguments: hashpartitioning(i_item_id#19, 5), ENSURE_REQUIREMENTS, [plan_id=4] (29) HashAggregate [codegen id : 6] -Input [9]: [i_item_id#21, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] -Keys [1]: [i_item_id#21] +Input [9]: [i_item_id#19, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Keys [1]: [i_item_id#19] Functions [4]: [avg(ss_quantity#4), avg(UnscaledValue(ss_list_price#5)), avg(UnscaledValue(ss_coupon_amt#7)), avg(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [4]: [avg(ss_quantity#4)#40, avg(UnscaledValue(ss_list_price#5))#41, avg(UnscaledValue(ss_coupon_amt#7))#42, avg(UnscaledValue(ss_sales_price#6))#43] -Results [5]: [i_item_id#21, avg(ss_quantity#4)#40 AS agg1#44, cast((avg(UnscaledValue(ss_list_price#5))#41 / 100.0) as decimal(11,6)) AS agg2#45, cast((avg(UnscaledValue(ss_coupon_amt#7))#42 / 100.0) as decimal(11,6)) AS agg3#46, cast((avg(UnscaledValue(ss_sales_price#6))#43 / 100.0) as decimal(11,6)) AS agg4#47] +Aggregate Attributes [4]: [avg(ss_quantity#4)#36, avg(UnscaledValue(ss_list_price#5))#37, avg(UnscaledValue(ss_coupon_amt#7))#38, avg(UnscaledValue(ss_sales_price#6))#39] +Results [5]: [i_item_id#19, avg(ss_quantity#4)#36 AS agg1#40, cast((avg(UnscaledValue(ss_list_price#5))#37 / 100.0) as decimal(11,6)) AS agg2#41, cast((avg(UnscaledValue(ss_coupon_amt#7))#38 / 100.0) as decimal(11,6)) AS agg3#42, cast((avg(UnscaledValue(ss_sales_price#6))#39 / 100.0) as decimal(11,6)) AS agg4#43] (30) TakeOrderedAndProject -Input [5]: [i_item_id#21, agg1#44, agg2#45, agg3#46, agg4#47] -Arguments: 100, [i_item_id#21 ASC NULLS FIRST], [i_item_id#21, agg1#44, agg2#45, agg3#46, agg4#47] +Input [5]: [i_item_id#19, agg1#40, agg2#41, agg3#42, agg4#43] +Arguments: 100, [i_item_id#19 ASC NULLS FIRST], [i_item_id#19, agg1#40, agg2#41, agg3#42, agg4#43] ===== Subqueries ===== @@ -180,25 +180,25 @@ BroadcastExchange (35) (31) Scan parquet default.date_dim -Output [2]: [d_date_sk#10, d_year#48] +Output [2]: [d_date_sk#10, d_year#44] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), GreaterThanOrEqual(d_date_sk,2450815), LessThanOrEqual(d_date_sk,2451179), IsNotNull(d_date_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#10, d_year#48] +Input [2]: [d_date_sk#10, d_year#44] (33) Filter [codegen id : 1] -Input [2]: [d_date_sk#10, d_year#48] -Condition : ((((isnotnull(d_year#48) AND (d_year#48 = 1998)) AND (d_date_sk#10 >= 2450815)) AND (d_date_sk#10 <= 2451179)) AND isnotnull(d_date_sk#10)) +Input [2]: [d_date_sk#10, d_year#44] +Condition : ((((isnotnull(d_year#44) AND (d_year#44 = 1998)) AND (d_date_sk#10 >= 2450815)) AND (d_date_sk#10 <= 2451179)) AND isnotnull(d_date_sk#10)) (34) Project [codegen id : 1] Output [1]: [d_date_sk#10] -Input [2]: [d_date_sk#10, d_year#48] +Input [2]: [d_date_sk#10, d_year#44] (35) BroadcastExchange Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#49] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/explain.txt index afcfd4816dcad..54fb333a149fb 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/explain.txt @@ -66,7 +66,7 @@ Input [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_stat (8) BroadcastExchange Input [1]: [cd_demo_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_cdemo_sk#2] @@ -78,96 +78,96 @@ Output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sal Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#10] (11) ReusedExchange [Reuses operator id: 35] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (13) Project [codegen id : 5] Output [6]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#15] +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14] (14) Scan parquet default.item -Output [2]: [i_item_sk#16, i_item_id#17] +Output [2]: [i_item_sk#15, i_item_id#16] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [2]: [i_item_sk#16, i_item_id#17] +Input [2]: [i_item_sk#15, i_item_id#16] (16) Filter [codegen id : 3] -Input [2]: [i_item_sk#16, i_item_id#17] -Condition : isnotnull(i_item_sk#16) +Input [2]: [i_item_sk#15, i_item_id#16] +Condition : isnotnull(i_item_sk#15) (17) BroadcastExchange -Input [2]: [i_item_sk#16, i_item_id#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] +Input [2]: [i_item_sk#15, i_item_id#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#16] +Right keys [1]: [i_item_sk#15] Join condition: None (19) Project [codegen id : 5] -Output [6]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#17] -Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#16, i_item_id#17] +Output [6]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#15, i_item_id#16] (20) Scan parquet default.promotion -Output [3]: [p_promo_sk#19, p_channel_email#20, p_channel_event#21] +Output [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] Batched: true Location [not included in comparison]/{warehouse_dir}/promotion] PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 4] -Input [3]: [p_promo_sk#19, p_channel_email#20, p_channel_event#21] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] (22) Filter [codegen id : 4] -Input [3]: [p_promo_sk#19, p_channel_email#20, p_channel_event#21] -Condition : (((p_channel_email#20 = N) OR (p_channel_event#21 = N)) AND isnotnull(p_promo_sk#19)) +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Condition : (((p_channel_email#18 = N) OR (p_channel_event#19 = N)) AND isnotnull(p_promo_sk#17)) (23) Project [codegen id : 4] -Output [1]: [p_promo_sk#19] -Input [3]: [p_promo_sk#19, p_channel_email#20, p_channel_event#21] +Output [1]: [p_promo_sk#17] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] (24) BroadcastExchange -Input [1]: [p_promo_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Input [1]: [p_promo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (25) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_promo_sk#3] -Right keys [1]: [p_promo_sk#19] +Right keys [1]: [p_promo_sk#17] Join condition: None (26) Project [codegen id : 5] -Output [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#17] -Input [7]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#17, p_promo_sk#19] +Output [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Input [7]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16, p_promo_sk#17] (27) HashAggregate [codegen id : 5] -Input [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#17] -Keys [1]: [i_item_id#17] +Input [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Keys [1]: [i_item_id#16] Functions [4]: [partial_avg(ss_quantity#4), partial_avg(UnscaledValue(ss_list_price#5)), partial_avg(UnscaledValue(ss_coupon_amt#7)), partial_avg(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [8]: [sum#23, count#24, sum#25, count#26, sum#27, count#28, sum#29, count#30] -Results [9]: [i_item_id#17, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Aggregate Attributes [8]: [sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Results [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] (28) Exchange -Input [9]: [i_item_id#17, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] -Arguments: hashpartitioning(i_item_id#17, 5), ENSURE_REQUIREMENTS, [id=#39] +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Arguments: hashpartitioning(i_item_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] (29) HashAggregate [codegen id : 6] -Input [9]: [i_item_id#17, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] -Keys [1]: [i_item_id#17] +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Keys [1]: [i_item_id#16] Functions [4]: [avg(ss_quantity#4), avg(UnscaledValue(ss_list_price#5)), avg(UnscaledValue(ss_coupon_amt#7)), avg(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [4]: [avg(ss_quantity#4)#40, avg(UnscaledValue(ss_list_price#5))#41, avg(UnscaledValue(ss_coupon_amt#7))#42, avg(UnscaledValue(ss_sales_price#6))#43] -Results [5]: [i_item_id#17, avg(ss_quantity#4)#40 AS agg1#44, cast((avg(UnscaledValue(ss_list_price#5))#41 / 100.0) as decimal(11,6)) AS agg2#45, cast((avg(UnscaledValue(ss_coupon_amt#7))#42 / 100.0) as decimal(11,6)) AS agg3#46, cast((avg(UnscaledValue(ss_sales_price#6))#43 / 100.0) as decimal(11,6)) AS agg4#47] +Aggregate Attributes [4]: [avg(ss_quantity#4)#36, avg(UnscaledValue(ss_list_price#5))#37, avg(UnscaledValue(ss_coupon_amt#7))#38, avg(UnscaledValue(ss_sales_price#6))#39] +Results [5]: [i_item_id#16, avg(ss_quantity#4)#36 AS agg1#40, cast((avg(UnscaledValue(ss_list_price#5))#37 / 100.0) as decimal(11,6)) AS agg2#41, cast((avg(UnscaledValue(ss_coupon_amt#7))#38 / 100.0) as decimal(11,6)) AS agg3#42, cast((avg(UnscaledValue(ss_sales_price#6))#39 / 100.0) as decimal(11,6)) AS agg4#43] (30) TakeOrderedAndProject -Input [5]: [i_item_id#17, agg1#44, agg2#45, agg3#46, agg4#47] -Arguments: 100, [i_item_id#17 ASC NULLS FIRST], [i_item_id#17, agg1#44, agg2#45, agg3#46, agg4#47] +Input [5]: [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] +Arguments: 100, [i_item_id#16 ASC NULLS FIRST], [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] ===== Subqueries ===== @@ -180,25 +180,25 @@ BroadcastExchange (35) (31) Scan parquet default.date_dim -Output [2]: [d_date_sk#15, d_year#48] +Output [2]: [d_date_sk#14, d_year#44] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), GreaterThanOrEqual(d_date_sk,2450815), LessThanOrEqual(d_date_sk,2451179), IsNotNull(d_date_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#48] +Input [2]: [d_date_sk#14, d_year#44] (33) Filter [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#48] -Condition : ((((isnotnull(d_year#48) AND (d_year#48 = 1998)) AND (d_date_sk#15 >= 2450815)) AND (d_date_sk#15 <= 2451179)) AND isnotnull(d_date_sk#15)) +Input [2]: [d_date_sk#14, d_year#44] +Condition : ((((isnotnull(d_year#44) AND (d_year#44 = 1998)) AND (d_date_sk#14 >= 2450815)) AND (d_date_sk#14 <= 2451179)) AND isnotnull(d_date_sk#14)) (34) Project [codegen id : 1] -Output [1]: [d_date_sk#15] -Input [2]: [d_date_sk#15, d_year#48] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#44] (35) BroadcastExchange -Input [1]: [d_date_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#49] +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt index 1419499d54d5e..4cd58b1442653 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt @@ -80,7 +80,7 @@ Input [2]: [s_store_sk#8, s_county#9] (11) BroadcastExchange Input [1]: [s_store_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#3] @@ -92,92 +92,92 @@ Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8] (14) Scan parquet default.household_demographics -Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,Unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] (16) Filter [codegen id : 3] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] -Condition : (((((isnotnull(hd_vehicle_count#14) AND isnotnull(hd_dep_count#13)) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = Unknown ))) AND (hd_vehicle_count#14 > 0)) AND ((cast(hd_dep_count#13 as double) / cast(hd_vehicle_count#14 as double)) > 1.0)) AND isnotnull(hd_demo_sk#11)) +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] +Condition : (((((isnotnull(hd_vehicle_count#13) AND isnotnull(hd_dep_count#12)) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = Unknown ))) AND (hd_vehicle_count#13 > 0)) AND ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.0)) AND isnotnull(hd_demo_sk#10)) (17) Project [codegen id : 3] -Output [1]: [hd_demo_sk#11] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Output [1]: [hd_demo_sk#10] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] (18) BroadcastExchange -Input [1]: [hd_demo_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Input [1]: [hd_demo_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#11] +Right keys [1]: [hd_demo_sk#10] Join condition: None (20) Project [codegen id : 4] Output [2]: [ss_customer_sk#1, ss_ticket_number#4] -Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#10] (21) HashAggregate [codegen id : 4] Input [2]: [ss_customer_sk#1, ss_ticket_number#4] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#16] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] +Aggregate Attributes [1]: [count#14] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] (22) Exchange -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] -Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] (23) HashAggregate [codegen id : 5] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#19] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#19 AS cnt#20] +Aggregate Attributes [1]: [count(1)#16] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#16 AS cnt#17] (24) Filter [codegen id : 5] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20] -Condition : ((cnt#20 >= 1) AND (cnt#20 <= 5)) +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17] +Condition : ((cnt#17 >= 1) AND (cnt#17 <= 5)) (25) BroadcastExchange -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#21] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=4] (26) Scan parquet default.customer -Output [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] +Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (27) ColumnarToRow -Input [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] (28) Filter -Input [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] -Condition : isnotnull(c_customer_sk#22) +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Condition : isnotnull(c_customer_sk#18) (29) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#22] +Right keys [1]: [c_customer_sk#18] Join condition: None (30) Project [codegen id : 6] -Output [6]: [c_last_name#25, c_first_name#24, c_salutation#23, c_preferred_cust_flag#26, ss_ticket_number#4, cnt#20] -Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20, c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] +Output [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17, c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] (31) Exchange -Input [6]: [c_last_name#25, c_first_name#24, c_salutation#23, c_preferred_cust_flag#26, ss_ticket_number#4, cnt#20] -Arguments: rangepartitioning(cnt#20 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: rangepartitioning(cnt#17 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=5] (32) Sort [codegen id : 7] -Input [6]: [c_last_name#25, c_first_name#24, c_salutation#23, c_preferred_cust_flag#26, ss_ticket_number#4, cnt#20] -Arguments: [cnt#20 DESC NULLS LAST], true, 0 +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: [cnt#17 DESC NULLS LAST], true, 0 ===== Subqueries ===== @@ -190,25 +190,25 @@ BroadcastExchange (37) (33) Scan parquet default.date_dim -Output [3]: [d_date_sk#7, d_year#28, d_dom#29] +Output [3]: [d_date_sk#7, d_year#23, d_dom#24] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1998,1999,2000]), In(d_date_sk, [2450815,2450816,2450846,2450847,2450874,2450875,2450905,2450906,2450935,2450936,2450966,2450967,2450996,2450997,2451027,2451028,2451058,2451059,2451088,2451089,2451119,2451120,2451149,2451150,2451180,2451181,2451211,2451212,2451239,2451240,2451270,2451271,2451300,2451301,2451331,2451332,2451361,2451362,2451392,2451393,2451423,2451424,2451453,2451454,2451484,2451485,2451514,2451515,2451545,2451546,2451576,2451577,2451605,2451606,2451636,2451637,2451666,2451667,2451697,2451698,2451727,2451728,2451758,2451759,2451789,2451790,2451819,2451820,2451850,2451851,2451880,2451881]), IsNotNull(d_date_sk)] ReadSchema: struct (34) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#28, d_dom#29] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] (35) Filter [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#28, d_dom#29] -Condition : (((((isnotnull(d_dom#29) AND (d_dom#29 >= 1)) AND (d_dom#29 <= 2)) AND d_year#28 IN (1998,1999,2000)) AND d_date_sk#7 INSET 2450815, 2450816, 2450846, 2450847, 2450874, 2450875, 2450905, 2450906, 2450935, 2450936, 2450966, 2450967, 2450996, 2450997, 2451027, 2451028, 2451058, 2451059, 2451088, 2451089, 2451119, 2451120, 2451149, 2451150, 2451180, 2451181, 2451211, 2451212, 2451239, 2451240, 2451270, 2451271, 2451300, 2451301, 2451331, 2451332, 2451361, 2451362, 2451392, 2451393, 2451423, 2451424, 2451453, 2451454, 2451484, 2451485, 2451514, 2451515, 2451545, 2451546, 2451576, 2451577, 2451605, 2451606, 2451636, 2451637, 2451666, 2451667, 2451697, 2451698, 2451727, 2451728, 2451758, 2451759, 2451789, 2451790, 2451819, 2451820, 2451850, 2451851, 2451880, 2451881) AND isnotnull(d_date_sk#7)) +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] +Condition : (((((isnotnull(d_dom#24) AND (d_dom#24 >= 1)) AND (d_dom#24 <= 2)) AND d_year#23 IN (1998,1999,2000)) AND d_date_sk#7 INSET 2450815, 2450816, 2450846, 2450847, 2450874, 2450875, 2450905, 2450906, 2450935, 2450936, 2450966, 2450967, 2450996, 2450997, 2451027, 2451028, 2451058, 2451059, 2451088, 2451089, 2451119, 2451120, 2451149, 2451150, 2451180, 2451181, 2451211, 2451212, 2451239, 2451240, 2451270, 2451271, 2451300, 2451301, 2451331, 2451332, 2451361, 2451362, 2451392, 2451393, 2451423, 2451424, 2451453, 2451454, 2451484, 2451485, 2451514, 2451515, 2451545, 2451546, 2451576, 2451577, 2451605, 2451606, 2451636, 2451637, 2451666, 2451667, 2451697, 2451698, 2451727, 2451728, 2451758, 2451759, 2451789, 2451790, 2451819, 2451820, 2451850, 2451851, 2451880, 2451881) AND isnotnull(d_date_sk#7)) (36) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [3]: [d_date_sk#7, d_year#28, d_dom#29] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] (37) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt index e86ea3648e7a2..de22608640529 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt @@ -80,7 +80,7 @@ Input [2]: [s_store_sk#8, s_county#9] (11) BroadcastExchange Input [1]: [s_store_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#3] @@ -92,92 +92,92 @@ Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8] (14) Scan parquet default.household_demographics -Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,Unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] (16) Filter [codegen id : 3] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] -Condition : (((((isnotnull(hd_vehicle_count#14) AND isnotnull(hd_dep_count#13)) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = Unknown ))) AND (hd_vehicle_count#14 > 0)) AND ((cast(hd_dep_count#13 as double) / cast(hd_vehicle_count#14 as double)) > 1.0)) AND isnotnull(hd_demo_sk#11)) +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] +Condition : (((((isnotnull(hd_vehicle_count#13) AND isnotnull(hd_dep_count#12)) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = Unknown ))) AND (hd_vehicle_count#13 > 0)) AND ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.0)) AND isnotnull(hd_demo_sk#10)) (17) Project [codegen id : 3] -Output [1]: [hd_demo_sk#11] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Output [1]: [hd_demo_sk#10] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] (18) BroadcastExchange -Input [1]: [hd_demo_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Input [1]: [hd_demo_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#11] +Right keys [1]: [hd_demo_sk#10] Join condition: None (20) Project [codegen id : 4] Output [2]: [ss_customer_sk#1, ss_ticket_number#4] -Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#10] (21) HashAggregate [codegen id : 4] Input [2]: [ss_customer_sk#1, ss_ticket_number#4] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#16] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] +Aggregate Attributes [1]: [count#14] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] (22) Exchange -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] -Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] (23) HashAggregate [codegen id : 6] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#19] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#19 AS cnt#20] +Aggregate Attributes [1]: [count(1)#16] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#16 AS cnt#17] (24) Filter [codegen id : 6] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20] -Condition : ((cnt#20 >= 1) AND (cnt#20 <= 5)) +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17] +Condition : ((cnt#17 >= 1) AND (cnt#17 <= 5)) (25) Scan parquet default.customer -Output [5]: [c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] +Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 5] -Input [5]: [c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] (27) Filter [codegen id : 5] -Input [5]: [c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] -Condition : isnotnull(c_customer_sk#21) +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Condition : isnotnull(c_customer_sk#18) (28) BroadcastExchange -Input [5]: [c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (29) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#21] +Right keys [1]: [c_customer_sk#18] Join condition: None (30) Project [codegen id : 6] -Output [6]: [c_last_name#24, c_first_name#23, c_salutation#22, c_preferred_cust_flag#25, ss_ticket_number#4, cnt#20] -Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20, c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] +Output [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17, c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] (31) Exchange -Input [6]: [c_last_name#24, c_first_name#23, c_salutation#22, c_preferred_cust_flag#25, ss_ticket_number#4, cnt#20] -Arguments: rangepartitioning(cnt#20 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: rangepartitioning(cnt#17 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=5] (32) Sort [codegen id : 7] -Input [6]: [c_last_name#24, c_first_name#23, c_salutation#22, c_preferred_cust_flag#25, ss_ticket_number#4, cnt#20] -Arguments: [cnt#20 DESC NULLS LAST], true, 0 +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: [cnt#17 DESC NULLS LAST], true, 0 ===== Subqueries ===== @@ -190,25 +190,25 @@ BroadcastExchange (37) (33) Scan parquet default.date_dim -Output [3]: [d_date_sk#7, d_year#28, d_dom#29] +Output [3]: [d_date_sk#7, d_year#23, d_dom#24] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1998,1999,2000]), In(d_date_sk, [2450815,2450816,2450846,2450847,2450874,2450875,2450905,2450906,2450935,2450936,2450966,2450967,2450996,2450997,2451027,2451028,2451058,2451059,2451088,2451089,2451119,2451120,2451149,2451150,2451180,2451181,2451211,2451212,2451239,2451240,2451270,2451271,2451300,2451301,2451331,2451332,2451361,2451362,2451392,2451393,2451423,2451424,2451453,2451454,2451484,2451485,2451514,2451515,2451545,2451546,2451576,2451577,2451605,2451606,2451636,2451637,2451666,2451667,2451697,2451698,2451727,2451728,2451758,2451759,2451789,2451790,2451819,2451820,2451850,2451851,2451880,2451881]), IsNotNull(d_date_sk)] ReadSchema: struct (34) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#28, d_dom#29] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] (35) Filter [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#28, d_dom#29] -Condition : (((((isnotnull(d_dom#29) AND (d_dom#29 >= 1)) AND (d_dom#29 <= 2)) AND d_year#28 IN (1998,1999,2000)) AND d_date_sk#7 INSET 2450815, 2450816, 2450846, 2450847, 2450874, 2450875, 2450905, 2450906, 2450935, 2450936, 2450966, 2450967, 2450996, 2450997, 2451027, 2451028, 2451058, 2451059, 2451088, 2451089, 2451119, 2451120, 2451149, 2451150, 2451180, 2451181, 2451211, 2451212, 2451239, 2451240, 2451270, 2451271, 2451300, 2451301, 2451331, 2451332, 2451361, 2451362, 2451392, 2451393, 2451423, 2451424, 2451453, 2451454, 2451484, 2451485, 2451514, 2451515, 2451545, 2451546, 2451576, 2451577, 2451605, 2451606, 2451636, 2451637, 2451666, 2451667, 2451697, 2451698, 2451727, 2451728, 2451758, 2451759, 2451789, 2451790, 2451819, 2451820, 2451850, 2451851, 2451880, 2451881) AND isnotnull(d_date_sk#7)) +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] +Condition : (((((isnotnull(d_dom#24) AND (d_dom#24 >= 1)) AND (d_dom#24 <= 2)) AND d_year#23 IN (1998,1999,2000)) AND d_date_sk#7 INSET 2450815, 2450816, 2450846, 2450847, 2450874, 2450875, 2450905, 2450906, 2450935, 2450936, 2450966, 2450967, 2450996, 2450997, 2451027, 2451028, 2451058, 2451059, 2451088, 2451089, 2451119, 2451120, 2451149, 2451150, 2451180, 2451181, 2451211, 2451212, 2451239, 2451240, 2451270, 2451271, 2451300, 2451301, 2451331, 2451332, 2451361, 2451362, 2451392, 2451393, 2451423, 2451424, 2451453, 2451454, 2451484, 2451485, 2451514, 2451515, 2451545, 2451546, 2451576, 2451577, 2451605, 2451606, 2451636, 2451637, 2451666, 2451667, 2451697, 2451698, 2451727, 2451728, 2451758, 2451759, 2451789, 2451790, 2451819, 2451820, 2451850, 2451851, 2451880, 2451881) AND isnotnull(d_date_sk#7)) (36) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [3]: [d_date_sk#7, d_year#28, d_dom#29] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] (37) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/explain.txt index 739c29798ad6e..4943e951d7223 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/explain.txt @@ -81,7 +81,7 @@ Input [3]: [hd_demo_sk#11, hd_dep_count#12, hd_vehicle_count#13] (11) BroadcastExchange Input [1]: [hd_demo_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] @@ -93,96 +93,96 @@ Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, hd_demo_sk#11] (14) Scan parquet default.store -Output [3]: [s_store_sk#15, s_number_employees#16, s_city#17] +Output [3]: [s_store_sk#14, s_number_employees#15, s_city#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_number_employees,295), IsNotNull(s_store_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [3]: [s_store_sk#15, s_number_employees#16, s_city#17] +Input [3]: [s_store_sk#14, s_number_employees#15, s_city#16] (16) Filter [codegen id : 3] -Input [3]: [s_store_sk#15, s_number_employees#16, s_city#17] -Condition : (((isnotnull(s_number_employees#16) AND (s_number_employees#16 >= 200)) AND (s_number_employees#16 <= 295)) AND isnotnull(s_store_sk#15)) +Input [3]: [s_store_sk#14, s_number_employees#15, s_city#16] +Condition : (((isnotnull(s_number_employees#15) AND (s_number_employees#15 >= 200)) AND (s_number_employees#15 <= 295)) AND isnotnull(s_store_sk#14)) (17) Project [codegen id : 3] -Output [2]: [s_store_sk#15, s_city#17] -Input [3]: [s_store_sk#15, s_number_employees#16, s_city#17] +Output [2]: [s_store_sk#14, s_city#16] +Input [3]: [s_store_sk#14, s_number_employees#15, s_city#16] (18) BroadcastExchange -Input [2]: [s_store_sk#15, s_city#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [2]: [s_store_sk#14, s_city#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#4] -Right keys [1]: [s_store_sk#15] +Right keys [1]: [s_store_sk#14] Join condition: None (20) Project [codegen id : 4] -Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#17] -Input [8]: [ss_customer_sk#1, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#15, s_city#17] +Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#16] +Input [8]: [ss_customer_sk#1, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#14, s_city#16] (21) HashAggregate [codegen id : 4] -Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#17] -Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#17] +Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#16] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#16] Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#6)), partial_sum(UnscaledValue(ss_net_profit#7))] -Aggregate Attributes [2]: [sum#19, sum#20] -Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#17, sum#21, sum#22] +Aggregate Attributes [2]: [sum#17, sum#18] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#16, sum#19, sum#20] (22) Exchange -Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#17, sum#21, sum#22] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#17, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#16, sum#19, sum#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#16, 5), ENSURE_REQUIREMENTS, [plan_id=3] (23) HashAggregate [codegen id : 5] -Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#17, sum#21, sum#22] -Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#17] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#16, sum#19, sum#20] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#16] Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#24, sum(UnscaledValue(ss_net_profit#7))#25] -Results [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#17, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#24,17,2) AS amt#26, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#25,17,2) AS profit#27] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#21, sum(UnscaledValue(ss_net_profit#7))#22] +Results [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#16, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#21,17,2) AS amt#23, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#22,17,2) AS profit#24] (24) Exchange -Input [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#17, amt#26, profit#27] -Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#16, amt#23, profit#24] +Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4] (25) Sort [codegen id : 6] -Input [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#17, amt#26, profit#27] +Input [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#16, amt#23, profit#24] Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0 (26) Scan parquet default.customer -Output [3]: [c_customer_sk#29, c_first_name#30, c_last_name#31] +Output [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 7] -Input [3]: [c_customer_sk#29, c_first_name#30, c_last_name#31] +Input [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] (28) Filter [codegen id : 7] -Input [3]: [c_customer_sk#29, c_first_name#30, c_last_name#31] -Condition : isnotnull(c_customer_sk#29) +Input [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] +Condition : isnotnull(c_customer_sk#25) (29) Exchange -Input [3]: [c_customer_sk#29, c_first_name#30, c_last_name#31] -Arguments: hashpartitioning(c_customer_sk#29, 5), ENSURE_REQUIREMENTS, [id=#32] +Input [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] +Arguments: hashpartitioning(c_customer_sk#25, 5), ENSURE_REQUIREMENTS, [plan_id=5] (30) Sort [codegen id : 8] -Input [3]: [c_customer_sk#29, c_first_name#30, c_last_name#31] -Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 +Input [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] +Arguments: [c_customer_sk#25 ASC NULLS FIRST], false, 0 (31) SortMergeJoin [codegen id : 9] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#29] +Right keys [1]: [c_customer_sk#25] Join condition: None (32) Project [codegen id : 9] -Output [7]: [c_last_name#31, c_first_name#30, substr(s_city#17, 1, 30) AS substr(s_city, 1, 30)#33, ss_ticket_number#5, amt#26, profit#27, s_city#17] -Input [8]: [ss_ticket_number#5, ss_customer_sk#1, s_city#17, amt#26, profit#27, c_customer_sk#29, c_first_name#30, c_last_name#31] +Output [7]: [c_last_name#27, c_first_name#26, substr(s_city#16, 1, 30) AS substr(s_city, 1, 30)#28, ss_ticket_number#5, amt#23, profit#24, s_city#16] +Input [8]: [ss_ticket_number#5, ss_customer_sk#1, s_city#16, amt#23, profit#24, c_customer_sk#25, c_first_name#26, c_last_name#27] (33) TakeOrderedAndProject -Input [7]: [c_last_name#31, c_first_name#30, substr(s_city, 1, 30)#33, ss_ticket_number#5, amt#26, profit#27, s_city#17] -Arguments: 100, [c_last_name#31 ASC NULLS FIRST, c_first_name#30 ASC NULLS FIRST, substr(s_city#17, 1, 30) ASC NULLS FIRST, profit#27 ASC NULLS FIRST], [c_last_name#31, c_first_name#30, substr(s_city, 1, 30)#33, ss_ticket_number#5, amt#26, profit#27] +Input [7]: [c_last_name#27, c_first_name#26, substr(s_city, 1, 30)#28, ss_ticket_number#5, amt#23, profit#24, s_city#16] +Arguments: 100, [c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, substr(s_city#16, 1, 30) ASC NULLS FIRST, profit#24 ASC NULLS FIRST], [c_last_name#27, c_first_name#26, substr(s_city, 1, 30)#28, ss_ticket_number#5, amt#23, profit#24] ===== Subqueries ===== @@ -195,25 +195,25 @@ BroadcastExchange (38) (34) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#34, d_dow#35] +Output [3]: [d_date_sk#10, d_year#29, d_dow#30] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_dow), EqualTo(d_dow,1), In(d_year, [1998,1999,2000]), GreaterThanOrEqual(d_date_sk,2450819), LessThanOrEqual(d_date_sk,2451904), IsNotNull(d_date_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#34, d_dow#35] +Input [3]: [d_date_sk#10, d_year#29, d_dow#30] (36) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#34, d_dow#35] -Condition : (((((isnotnull(d_dow#35) AND (d_dow#35 = 1)) AND d_year#34 IN (1998,1999,2000)) AND (d_date_sk#10 >= 2450819)) AND (d_date_sk#10 <= 2451904)) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#10, d_year#29, d_dow#30] +Condition : (((((isnotnull(d_dow#30) AND (d_dow#30 = 1)) AND d_year#29 IN (1998,1999,2000)) AND (d_date_sk#10 >= 2450819)) AND (d_date_sk#10 <= 2451904)) AND isnotnull(d_date_sk#10)) (37) Project [codegen id : 1] Output [1]: [d_date_sk#10] -Input [3]: [d_date_sk#10, d_year#34, d_dow#35] +Input [3]: [d_date_sk#10, d_year#29, d_dow#30] (38) BroadcastExchange Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/explain.txt index b00a5500d7c9a..eedd3e68fc2b6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/explain.txt @@ -78,7 +78,7 @@ Input [3]: [s_store_sk#11, s_number_employees#12, s_city#13] (11) BroadcastExchange Input [2]: [s_store_sk#11, s_city#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#4] @@ -90,84 +90,84 @@ Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#11, s_city#13] (14) Scan parquet default.household_demographics -Output [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [Or(EqualTo(hd_dep_count,8),GreaterThan(hd_vehicle_count,0)), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] (16) Filter [codegen id : 3] -Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] -Condition : (((hd_dep_count#16 = 8) OR (hd_vehicle_count#17 > 0)) AND isnotnull(hd_demo_sk#15)) +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : (((hd_dep_count#15 = 8) OR (hd_vehicle_count#16 > 0)) AND isnotnull(hd_demo_sk#14)) (17) Project [codegen id : 3] -Output [1]: [hd_demo_sk#15] -Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Output [1]: [hd_demo_sk#14] +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] (18) BroadcastExchange -Input [1]: [hd_demo_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [1]: [hd_demo_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#15] +Right keys [1]: [hd_demo_sk#14] Join condition: None (20) Project [codegen id : 4] Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#13] -Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#13, hd_demo_sk#15] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#13, hd_demo_sk#14] (21) HashAggregate [codegen id : 4] Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#13] Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13] Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#6)), partial_sum(UnscaledValue(ss_net_profit#7))] -Aggregate Attributes [2]: [sum#19, sum#20] -Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13, sum#21, sum#22] +Aggregate Attributes [2]: [sum#17, sum#18] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13, sum#19, sum#20] (22) Exchange -Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13, sum#21, sum#22] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13, sum#19, sum#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13, 5), ENSURE_REQUIREMENTS, [plan_id=3] (23) HashAggregate [codegen id : 6] -Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13, sum#21, sum#22] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13, sum#19, sum#20] Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13] Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#24, sum(UnscaledValue(ss_net_profit#7))#25] -Results [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#13, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#24,17,2) AS amt#26, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#25,17,2) AS profit#27] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#21, sum(UnscaledValue(ss_net_profit#7))#22] +Results [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#13, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#21,17,2) AS amt#23, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#22,17,2) AS profit#24] (24) Scan parquet default.customer -Output [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30] +Output [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 5] -Input [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30] +Input [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] (26) Filter [codegen id : 5] -Input [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30] -Condition : isnotnull(c_customer_sk#28) +Input [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] +Condition : isnotnull(c_customer_sk#25) (27) BroadcastExchange -Input [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] +Input [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (28) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#28] +Right keys [1]: [c_customer_sk#25] Join condition: None (29) Project [codegen id : 6] -Output [7]: [c_last_name#30, c_first_name#29, substr(s_city#13, 1, 30) AS substr(s_city, 1, 30)#32, ss_ticket_number#5, amt#26, profit#27, s_city#13] -Input [8]: [ss_ticket_number#5, ss_customer_sk#1, s_city#13, amt#26, profit#27, c_customer_sk#28, c_first_name#29, c_last_name#30] +Output [7]: [c_last_name#27, c_first_name#26, substr(s_city#13, 1, 30) AS substr(s_city, 1, 30)#28, ss_ticket_number#5, amt#23, profit#24, s_city#13] +Input [8]: [ss_ticket_number#5, ss_customer_sk#1, s_city#13, amt#23, profit#24, c_customer_sk#25, c_first_name#26, c_last_name#27] (30) TakeOrderedAndProject -Input [7]: [c_last_name#30, c_first_name#29, substr(s_city, 1, 30)#32, ss_ticket_number#5, amt#26, profit#27, s_city#13] -Arguments: 100, [c_last_name#30 ASC NULLS FIRST, c_first_name#29 ASC NULLS FIRST, substr(s_city#13, 1, 30) ASC NULLS FIRST, profit#27 ASC NULLS FIRST], [c_last_name#30, c_first_name#29, substr(s_city, 1, 30)#32, ss_ticket_number#5, amt#26, profit#27] +Input [7]: [c_last_name#27, c_first_name#26, substr(s_city, 1, 30)#28, ss_ticket_number#5, amt#23, profit#24, s_city#13] +Arguments: 100, [c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, substr(s_city#13, 1, 30) ASC NULLS FIRST, profit#24 ASC NULLS FIRST], [c_last_name#27, c_first_name#26, substr(s_city, 1, 30)#28, ss_ticket_number#5, amt#23, profit#24] ===== Subqueries ===== @@ -180,25 +180,25 @@ BroadcastExchange (35) (31) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#33, d_dow#34] +Output [3]: [d_date_sk#10, d_year#29, d_dow#30] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_dow), EqualTo(d_dow,1), In(d_year, [1998,1999,2000]), GreaterThanOrEqual(d_date_sk,2450819), LessThanOrEqual(d_date_sk,2451904), IsNotNull(d_date_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#33, d_dow#34] +Input [3]: [d_date_sk#10, d_year#29, d_dow#30] (33) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#33, d_dow#34] -Condition : (((((isnotnull(d_dow#34) AND (d_dow#34 = 1)) AND d_year#33 IN (1998,1999,2000)) AND (d_date_sk#10 >= 2450819)) AND (d_date_sk#10 <= 2451904)) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#10, d_year#29, d_dow#30] +Condition : (((((isnotnull(d_dow#30) AND (d_dow#30 = 1)) AND d_year#29 IN (1998,1999,2000)) AND (d_date_sk#10 >= 2450819)) AND (d_date_sk#10 <= 2451904)) AND isnotnull(d_date_sk#10)) (34) Project [codegen id : 1] Output [1]: [d_date_sk#10] -Input [3]: [d_date_sk#10, d_year#33, d_dow#34] +Input [3]: [d_date_sk#10, d_year#29, d_dow#30] (35) BroadcastExchange Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt index 8b19320021538..a2cfd6b66801d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt @@ -71,7 +71,7 @@ Condition : isnotnull(s_store_sk#8) (10) BroadcastExchange Input [3]: [s_store_sk#8, s_store_name#9, s_company_name#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#2] @@ -83,73 +83,73 @@ Output [5]: [ss_item_sk#1, ss_sales_price#3, d_moy#7, s_store_name#9, s_company_ Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, d_moy#7, s_store_sk#8, s_store_name#9, s_company_name#10] (13) Scan parquet default.item -Output [4]: [i_item_sk#12, i_brand#13, i_class#14, i_category#15] +Output [4]: [i_item_sk#11, i_brand#12, i_class#13, i_category#14] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [Or(And(In(i_category, [Books ,Electronics ,Home ]),In(i_class, [musical ,parenting ,wallpaper ])),And(In(i_category, [Jewelry ,Men ,Shoes ]),In(i_class, [birdal ,pants ,womens ]))), IsNotNull(i_item_sk)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [4]: [i_item_sk#12, i_brand#13, i_class#14, i_category#15] +Input [4]: [i_item_sk#11, i_brand#12, i_class#13, i_category#14] (15) Filter [codegen id : 3] -Input [4]: [i_item_sk#12, i_brand#13, i_class#14, i_category#15] -Condition : (((i_category#15 IN (Home ,Books ,Electronics ) AND i_class#14 IN (wallpaper ,parenting ,musical )) OR (i_category#15 IN (Shoes ,Jewelry ,Men ) AND i_class#14 IN (womens ,birdal ,pants ))) AND isnotnull(i_item_sk#12)) +Input [4]: [i_item_sk#11, i_brand#12, i_class#13, i_category#14] +Condition : (((i_category#14 IN (Home ,Books ,Electronics ) AND i_class#13 IN (wallpaper ,parenting ,musical )) OR (i_category#14 IN (Shoes ,Jewelry ,Men ) AND i_class#13 IN (womens ,birdal ,pants ))) AND isnotnull(i_item_sk#11)) (16) BroadcastExchange -Input [4]: [i_item_sk#12, i_brand#13, i_class#14, i_category#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] +Input [4]: [i_item_sk#11, i_brand#12, i_class#13, i_category#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#12] +Right keys [1]: [i_item_sk#11] Join condition: None (18) Project [codegen id : 4] -Output [7]: [i_brand#13, i_class#14, i_category#15, ss_sales_price#3, d_moy#7, s_store_name#9, s_company_name#10] -Input [9]: [ss_item_sk#1, ss_sales_price#3, d_moy#7, s_store_name#9, s_company_name#10, i_item_sk#12, i_brand#13, i_class#14, i_category#15] +Output [7]: [i_brand#12, i_class#13, i_category#14, ss_sales_price#3, d_moy#7, s_store_name#9, s_company_name#10] +Input [9]: [ss_item_sk#1, ss_sales_price#3, d_moy#7, s_store_name#9, s_company_name#10, i_item_sk#11, i_brand#12, i_class#13, i_category#14] (19) HashAggregate [codegen id : 4] -Input [7]: [i_brand#13, i_class#14, i_category#15, ss_sales_price#3, d_moy#7, s_store_name#9, s_company_name#10] -Keys [6]: [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7] +Input [7]: [i_brand#12, i_class#13, i_category#14, ss_sales_price#3, d_moy#7, s_store_name#9, s_company_name#10] +Keys [6]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7] Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#3))] -Aggregate Attributes [1]: [sum#17] -Results [7]: [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, sum#18] +Aggregate Attributes [1]: [sum#15] +Results [7]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum#16] (20) Exchange -Input [7]: [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, sum#18] -Arguments: hashpartitioning(i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [7]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum#16] +Arguments: hashpartitioning(i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) HashAggregate [codegen id : 5] -Input [7]: [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, sum#18] -Keys [6]: [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7] +Input [7]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum#16] +Keys [6]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7] Functions [1]: [sum(UnscaledValue(ss_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#3))#20] -Results [8]: [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, MakeDecimal(sum(UnscaledValue(ss_sales_price#3))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#3))#20,17,2) AS _w0#22] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#3))#17] +Results [8]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, MakeDecimal(sum(UnscaledValue(ss_sales_price#3))#17,17,2) AS sum_sales#18, MakeDecimal(sum(UnscaledValue(ss_sales_price#3))#17,17,2) AS _w0#19] (22) Exchange -Input [8]: [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#21, _w0#22] -Arguments: hashpartitioning(i_category#15, i_brand#13, s_store_name#9, s_company_name#10, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [8]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#18, _w0#19] +Arguments: hashpartitioning(i_category#14, i_brand#12, s_store_name#9, s_company_name#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] (23) Sort [codegen id : 6] -Input [8]: [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#21, _w0#22] -Arguments: [i_category#15 ASC NULLS FIRST, i_brand#13 ASC NULLS FIRST, s_store_name#9 ASC NULLS FIRST, s_company_name#10 ASC NULLS FIRST], false, 0 +Input [8]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#18, _w0#19] +Arguments: [i_category#14 ASC NULLS FIRST, i_brand#12 ASC NULLS FIRST, s_store_name#9 ASC NULLS FIRST, s_company_name#10 ASC NULLS FIRST], false, 0 (24) Window -Input [8]: [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#21, _w0#22] -Arguments: [avg(_w0#22) windowspecdefinition(i_category#15, i_brand#13, s_store_name#9, s_company_name#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#24], [i_category#15, i_brand#13, s_store_name#9, s_company_name#10] +Input [8]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#18, _w0#19] +Arguments: [avg(_w0#19) windowspecdefinition(i_category#14, i_brand#12, s_store_name#9, s_company_name#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#20], [i_category#14, i_brand#12, s_store_name#9, s_company_name#10] (25) Filter [codegen id : 7] -Input [9]: [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#21, _w0#22, avg_monthly_sales#24] -Condition : (isnotnull(avg_monthly_sales#24) AND (NOT (avg_monthly_sales#24 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Input [9]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#18, _w0#19, avg_monthly_sales#20] +Condition : (isnotnull(avg_monthly_sales#20) AND (NOT (avg_monthly_sales#20 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) (26) Project [codegen id : 7] -Output [8]: [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#21, avg_monthly_sales#24] -Input [9]: [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#21, _w0#22, avg_monthly_sales#24] +Output [8]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#18, avg_monthly_sales#20] +Input [9]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#18, _w0#19, avg_monthly_sales#20] (27) TakeOrderedAndProject -Input [8]: [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#21, avg_monthly_sales#24] -Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#9 ASC NULLS FIRST], [i_category#15, i_class#14, i_brand#13, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#21, avg_monthly_sales#24] +Input [8]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#18, avg_monthly_sales#20] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#9 ASC NULLS FIRST], [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#18, avg_monthly_sales#20] ===== Subqueries ===== @@ -162,25 +162,25 @@ BroadcastExchange (32) (28) Scan parquet default.date_dim -Output [3]: [d_date_sk#6, d_year#25, d_moy#7] +Output [3]: [d_date_sk#6, d_year#21, d_moy#7] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), GreaterThanOrEqual(d_date_sk,2451545), LessThanOrEqual(d_date_sk,2451910), IsNotNull(d_date_sk)] ReadSchema: struct (29) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#25, d_moy#7] +Input [3]: [d_date_sk#6, d_year#21, d_moy#7] (30) Filter [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#25, d_moy#7] -Condition : ((((isnotnull(d_year#25) AND (d_year#25 = 2000)) AND (d_date_sk#6 >= 2451545)) AND (d_date_sk#6 <= 2451910)) AND isnotnull(d_date_sk#6)) +Input [3]: [d_date_sk#6, d_year#21, d_moy#7] +Condition : ((((isnotnull(d_year#21) AND (d_year#21 = 2000)) AND (d_date_sk#6 >= 2451545)) AND (d_date_sk#6 <= 2451910)) AND isnotnull(d_date_sk#6)) (31) Project [codegen id : 1] Output [2]: [d_date_sk#6, d_moy#7] -Input [3]: [d_date_sk#6, d_year#25, d_moy#7] +Input [3]: [d_date_sk#6, d_year#21, d_moy#7] (32) BroadcastExchange Input [2]: [d_date_sk#6, d_moy#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt index 5d3ea6d0cb7be..4ee1a5b7c2937 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt @@ -59,7 +59,7 @@ Condition : (isnotnull(ss_item_sk#5) AND isnotnull(ss_store_sk#6)) (7) BroadcastExchange Input [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] @@ -71,85 +71,85 @@ Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7 Input [8]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] (10) ReusedExchange [Reuses operator id: 32] -Output [2]: [d_date_sk#11, d_moy#12] +Output [2]: [d_date_sk#10, d_moy#11] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#11] +Right keys [1]: [d_date_sk#10] Join condition: None (12) Project [codegen id : 4] -Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#12] -Input [8]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8, d_date_sk#11, d_moy#12] +Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11] +Input [8]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8, d_date_sk#10, d_moy#11] (13) Scan parquet default.store -Output [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] (15) Filter [codegen id : 3] -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] -Condition : isnotnull(s_store_sk#13) +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Condition : isnotnull(s_store_sk#12) (16) BroadcastExchange -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#6] -Right keys [1]: [s_store_sk#13] +Right keys [1]: [s_store_sk#12] Join condition: None (18) Project [codegen id : 4] -Output [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#12, s_store_name#14, s_company_name#15] -Input [9]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#12, s_store_sk#13, s_store_name#14, s_company_name#15] +Output [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14] +Input [9]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11, s_store_sk#12, s_store_name#13, s_company_name#14] (19) HashAggregate [codegen id : 4] -Input [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#12, s_store_name#14, s_company_name#15] -Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12] +Input [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11] Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#7))] -Aggregate Attributes [1]: [sum#17] -Results [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum#18] +Aggregate Attributes [1]: [sum#15] +Results [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] (20) Exchange -Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum#18] -Arguments: hashpartitioning(i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] +Arguments: hashpartitioning(i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) HashAggregate [codegen id : 5] -Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum#18] -Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12] +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11] Functions [1]: [sum(UnscaledValue(ss_sales_price#7))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#7))#20] -Results [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#20,17,2) AS _w0#22] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#7))#17] +Results [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#17,17,2) AS sum_sales#18, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#17,17,2) AS _w0#19] (22) Exchange -Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22] -Arguments: hashpartitioning(i_category#4, i_brand#2, s_store_name#14, s_company_name#15, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: hashpartitioning(i_category#4, i_brand#2, s_store_name#13, s_company_name#14, 5), ENSURE_REQUIREMENTS, [plan_id=4] (23) Sort [codegen id : 6] -Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22] -Arguments: [i_category#4 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST, s_company_name#15 ASC NULLS FIRST], false, 0 +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: [i_category#4 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST, s_company_name#14 ASC NULLS FIRST], false, 0 (24) Window -Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22] -Arguments: [avg(_w0#22) windowspecdefinition(i_category#4, i_brand#2, s_store_name#14, s_company_name#15, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#24], [i_category#4, i_brand#2, s_store_name#14, s_company_name#15] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: [avg(_w0#19) windowspecdefinition(i_category#4, i_brand#2, s_store_name#13, s_company_name#14, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#20], [i_category#4, i_brand#2, s_store_name#13, s_company_name#14] (25) Filter [codegen id : 7] -Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22, avg_monthly_sales#24] -Condition : (isnotnull(avg_monthly_sales#24) AND (NOT (avg_monthly_sales#24 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20] +Condition : (isnotnull(avg_monthly_sales#20) AND (NOT (avg_monthly_sales#20 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) (26) Project [codegen id : 7] -Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24] -Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22, avg_monthly_sales#24] +Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20] (27) TakeOrderedAndProject -Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24] -Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] ===== Subqueries ===== @@ -162,25 +162,25 @@ BroadcastExchange (32) (28) Scan parquet default.date_dim -Output [3]: [d_date_sk#11, d_year#25, d_moy#12] +Output [3]: [d_date_sk#10, d_year#21, d_moy#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), GreaterThanOrEqual(d_date_sk,2451545), LessThanOrEqual(d_date_sk,2451910), IsNotNull(d_date_sk)] ReadSchema: struct (29) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#25, d_moy#12] +Input [3]: [d_date_sk#10, d_year#21, d_moy#11] (30) Filter [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#25, d_moy#12] -Condition : ((((isnotnull(d_year#25) AND (d_year#25 = 2000)) AND (d_date_sk#11 >= 2451545)) AND (d_date_sk#11 <= 2451910)) AND isnotnull(d_date_sk#11)) +Input [3]: [d_date_sk#10, d_year#21, d_moy#11] +Condition : ((((isnotnull(d_year#21) AND (d_year#21 = 2000)) AND (d_date_sk#10 >= 2451545)) AND (d_date_sk#10 <= 2451910)) AND isnotnull(d_date_sk#10)) (31) Project [codegen id : 1] -Output [2]: [d_date_sk#11, d_moy#12] -Input [3]: [d_date_sk#11, d_year#25, d_moy#12] +Output [2]: [d_date_sk#10, d_moy#11] +Input [3]: [d_date_sk#10, d_year#21, d_moy#11] (32) BroadcastExchange -Input [2]: [d_date_sk#11, d_moy#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Input [2]: [d_date_sk#10, d_moy#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/explain.txt index e630982cc606b..a0080476fc022 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/explain.txt @@ -55,88 +55,88 @@ Input [4]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, d_date_sk#5] (7) Exchange Input [2]: [ss_item_sk#1, ss_ext_sales_price#2] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#6] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (8) Sort [codegen id : 3] Input [2]: [ss_item_sk#1, ss_ext_sales_price#2] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (9) Scan parquet default.item -Output [6]: [i_item_sk#7, i_item_id#8, i_item_desc#9, i_current_price#10, i_class#11, i_category#12] +Output [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [In(i_category, [Books ,Jewelry ,Sports ]), IsNotNull(i_item_sk)] ReadSchema: struct (10) ColumnarToRow [codegen id : 4] -Input [6]: [i_item_sk#7, i_item_id#8, i_item_desc#9, i_current_price#10, i_class#11, i_category#12] +Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] (11) Filter [codegen id : 4] -Input [6]: [i_item_sk#7, i_item_id#8, i_item_desc#9, i_current_price#10, i_class#11, i_category#12] -Condition : (i_category#12 IN (Jewelry ,Sports ,Books ) AND isnotnull(i_item_sk#7)) +Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Condition : (i_category#11 IN (Jewelry ,Sports ,Books ) AND isnotnull(i_item_sk#6)) (12) Exchange -Input [6]: [i_item_sk#7, i_item_id#8, i_item_desc#9, i_current_price#10, i_class#11, i_category#12] -Arguments: hashpartitioning(i_item_sk#7, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Arguments: hashpartitioning(i_item_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=2] (13) Sort [codegen id : 5] -Input [6]: [i_item_sk#7, i_item_id#8, i_item_desc#9, i_current_price#10, i_class#11, i_category#12] -Arguments: [i_item_sk#7 ASC NULLS FIRST], false, 0 +Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 (14) SortMergeJoin [codegen id : 6] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#7] +Right keys [1]: [i_item_sk#6] Join condition: None (15) Project [codegen id : 6] -Output [6]: [ss_ext_sales_price#2, i_item_id#8, i_item_desc#9, i_current_price#10, i_class#11, i_category#12] -Input [8]: [ss_item_sk#1, ss_ext_sales_price#2, i_item_sk#7, i_item_id#8, i_item_desc#9, i_current_price#10, i_class#11, i_category#12] +Output [6]: [ss_ext_sales_price#2, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Input [8]: [ss_item_sk#1, ss_ext_sales_price#2, i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] (16) HashAggregate [codegen id : 6] -Input [6]: [ss_ext_sales_price#2, i_item_id#8, i_item_desc#9, i_current_price#10, i_class#11, i_category#12] -Keys [5]: [i_item_id#8, i_item_desc#9, i_category#12, i_class#11, i_current_price#10] +Input [6]: [ss_ext_sales_price#2, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Keys [5]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#14] -Results [6]: [i_item_id#8, i_item_desc#9, i_category#12, i_class#11, i_current_price#10, sum#15] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#13] (17) Exchange -Input [6]: [i_item_id#8, i_item_desc#9, i_category#12, i_class#11, i_current_price#10, sum#15] -Arguments: hashpartitioning(i_item_id#8, i_item_desc#9, i_category#12, i_class#11, i_current_price#10, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#13] +Arguments: hashpartitioning(i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) HashAggregate [codegen id : 7] -Input [6]: [i_item_id#8, i_item_desc#9, i_category#12, i_class#11, i_current_price#10, sum#15] -Keys [5]: [i_item_id#8, i_item_desc#9, i_category#12, i_class#11, i_current_price#10] +Input [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#13] +Keys [5]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#17] -Results [8]: [i_item_desc#9, i_category#12, i_class#11, i_current_price#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#17,17,2) AS _w1#20, i_item_id#8] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14] +Results [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w1#17, i_item_id#7] (19) Exchange -Input [8]: [i_item_desc#9, i_category#12, i_class#11, i_current_price#10, itemrevenue#18, _w0#19, _w1#20, i_item_id#8] -Arguments: hashpartitioning(i_class#11, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, _w0#16, _w1#17, i_item_id#7] +Arguments: hashpartitioning(i_class#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] (20) Sort [codegen id : 8] -Input [8]: [i_item_desc#9, i_category#12, i_class#11, i_current_price#10, itemrevenue#18, _w0#19, _w1#20, i_item_id#8] -Arguments: [i_class#11 ASC NULLS FIRST], false, 0 +Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, _w0#16, _w1#17, i_item_id#7] +Arguments: [i_class#10 ASC NULLS FIRST], false, 0 (21) Window -Input [8]: [i_item_desc#9, i_category#12, i_class#11, i_current_price#10, itemrevenue#18, _w0#19, _w1#20, i_item_id#8] -Arguments: [sum(_w1#20) windowspecdefinition(i_class#11, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#11] +Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, _w0#16, _w1#17, i_item_id#7] +Arguments: [sum(_w1#17) windowspecdefinition(i_class#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#10] (22) Project [codegen id : 9] -Output [7]: [i_item_desc#9, i_category#12, i_class#11, i_current_price#10, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17)) AS revenueratio#23, i_item_id#8] -Input [9]: [i_item_desc#9, i_category#12, i_class#11, i_current_price#10, itemrevenue#18, _w0#19, _w1#20, i_item_id#8, _we0#22] +Output [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19, i_item_id#7] +Input [9]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, _w0#16, _w1#17, i_item_id#7, _we0#18] (23) Exchange -Input [7]: [i_item_desc#9, i_category#12, i_class#11, i_current_price#10, itemrevenue#18, revenueratio#23, i_item_id#8] -Arguments: rangepartitioning(i_category#12 ASC NULLS FIRST, i_class#11 ASC NULLS FIRST, i_item_id#8 ASC NULLS FIRST, i_item_desc#9 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, revenueratio#19, i_item_id#7] +Arguments: rangepartitioning(i_category#11 ASC NULLS FIRST, i_class#10 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#8 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=5] (24) Sort [codegen id : 10] -Input [7]: [i_item_desc#9, i_category#12, i_class#11, i_current_price#10, itemrevenue#18, revenueratio#23, i_item_id#8] -Arguments: [i_category#12 ASC NULLS FIRST, i_class#11 ASC NULLS FIRST, i_item_id#8 ASC NULLS FIRST, i_item_desc#9 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], true, 0 +Input [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, revenueratio#19, i_item_id#7] +Arguments: [i_category#11 ASC NULLS FIRST, i_class#10 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#8 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], true, 0 (25) Project [codegen id : 10] -Output [6]: [i_item_desc#9, i_category#12, i_class#11, i_current_price#10, itemrevenue#18, revenueratio#23] -Input [7]: [i_item_desc#9, i_category#12, i_class#11, i_current_price#10, itemrevenue#18, revenueratio#23, i_item_id#8] +Output [6]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, revenueratio#19] +Input [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, revenueratio#19, i_item_id#7] ===== Subqueries ===== @@ -149,25 +149,25 @@ BroadcastExchange (30) (26) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_date#25] +Output [2]: [d_date_sk#5, d_date#20] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2001-01-01), LessThanOrEqual(d_date,2001-01-31), GreaterThanOrEqual(d_date_sk,2451911), LessThanOrEqual(d_date_sk,2451941), IsNotNull(d_date_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_date#25] +Input [2]: [d_date_sk#5, d_date#20] (28) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_date#25] -Condition : (((((isnotnull(d_date#25) AND (d_date#25 >= 2001-01-01)) AND (d_date#25 <= 2001-01-31)) AND (d_date_sk#5 >= 2451911)) AND (d_date_sk#5 <= 2451941)) AND isnotnull(d_date_sk#5)) +Input [2]: [d_date_sk#5, d_date#20] +Condition : (((((isnotnull(d_date#20) AND (d_date#20 >= 2001-01-01)) AND (d_date#20 <= 2001-01-31)) AND (d_date_sk#5 >= 2451911)) AND (d_date_sk#5 <= 2451941)) AND isnotnull(d_date_sk#5)) (29) Project [codegen id : 1] Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_date#25] +Input [2]: [d_date_sk#5, d_date#20] (30) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/explain.txt index fc2390f392247..a87e71a75e1ac 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/explain.txt @@ -54,7 +54,7 @@ Condition : (i_category#10 IN (Jewelry (7) BroadcastExchange Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#1] @@ -66,62 +66,62 @@ Output [7]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#6, i_item_desc#7 Input [9]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (10) ReusedExchange [Reuses operator id: 27] -Output [1]: [d_date_sk#12] +Output [1]: [d_date_sk#11] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_sold_date_sk#3] -Right keys [1]: [d_date_sk#12] +Right keys [1]: [d_date_sk#11] Join condition: None (12) Project [codegen id : 3] Output [6]: [ss_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] -Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#12] +Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#11] (13) HashAggregate [codegen id : 3] Input [6]: [ss_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#13] -Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] (14) Exchange -Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] -Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 4] -Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#16] -Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#16,17,2) AS itemrevenue#17, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#16,17,2) AS _w0#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#16,17,2) AS _w1#19, i_item_id#6] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14] +Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w1#17, i_item_id#6] (16) Exchange -Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6] -Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] +Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (17) Sort [codegen id : 5] -Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] Arguments: [i_class#9 ASC NULLS FIRST], false, 0 (18) Window -Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6] -Arguments: [sum(_w1#19) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#21], [i_class#9] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] +Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9] (19) Project [codegen id : 6] -Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17)) AS revenueratio#22, i_item_id#6] -Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6, _we0#21] +Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19, i_item_id#6] +Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6, _we0#18] (20) Exchange -Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22, i_item_id#6] -Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#22 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6] +Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=4] (21) Sort [codegen id : 7] -Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22, i_item_id#6] -Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#22 ASC NULLS FIRST], true, 0 +Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6] +Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], true, 0 (22) Project [codegen id : 7] -Output [6]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22] -Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22, i_item_id#6] +Output [6]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] +Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6] ===== Subqueries ===== @@ -134,25 +134,25 @@ BroadcastExchange (27) (23) Scan parquet default.date_dim -Output [2]: [d_date_sk#12, d_date#24] +Output [2]: [d_date_sk#11, d_date#20] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2001-01-01), LessThanOrEqual(d_date,2001-01-31), GreaterThanOrEqual(d_date_sk,2451911), LessThanOrEqual(d_date_sk,2451941), IsNotNull(d_date_sk)] ReadSchema: struct (24) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#12, d_date#24] +Input [2]: [d_date_sk#11, d_date#20] (25) Filter [codegen id : 1] -Input [2]: [d_date_sk#12, d_date#24] -Condition : (((((isnotnull(d_date#24) AND (d_date#24 >= 2001-01-01)) AND (d_date#24 <= 2001-01-31)) AND (d_date_sk#12 >= 2451911)) AND (d_date_sk#12 <= 2451941)) AND isnotnull(d_date_sk#12)) +Input [2]: [d_date_sk#11, d_date#20] +Condition : (((((isnotnull(d_date#20) AND (d_date#20 >= 2001-01-01)) AND (d_date#20 <= 2001-01-31)) AND (d_date_sk#11 >= 2451911)) AND (d_date_sk#11 <= 2451941)) AND isnotnull(d_date_sk#11)) (26) Project [codegen id : 1] -Output [1]: [d_date_sk#12] -Input [2]: [d_date_sk#12, d_date#24] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#20] (27) BroadcastExchange -Input [1]: [d_date_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/explain.txt index 1ebc9a69a3865..073a29fd22304 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/explain.txt @@ -27,7 +27,7 @@ Results [12]: [ss_sold_date_sk#9, count#21, count#22, max#23, max#24, max#25, ma (4) Exchange Input [12]: [ss_sold_date_sk#9, count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31] -Arguments: hashpartitioning(ss_sold_date_sk#9, 5), ENSURE_REQUIREMENTS, [id=#32] +Arguments: hashpartitioning(ss_sold_date_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) HashAggregate [codegen id : 2] Input [12]: [ss_sold_date_sk#9, count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31] @@ -40,17 +40,17 @@ Results [12]: [ss_sold_date_sk#9, count#21, count#22, max#23, max#24, max#25, ma Input [12]: [ss_sold_date_sk#9, count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31] Keys: [] Functions [12]: [merge_count(1), merge_count(ss_sold_date_sk#9), merge_max(ss_sold_date_sk#9), merge_max(ss_sold_time_sk#1), merge_max(ss_item_sk#2), merge_max(ss_customer_sk#3), merge_max(ss_cdemo_sk#4), merge_max(ss_hdemo_sk#5), merge_max(ss_addr_sk#6), merge_max(ss_store_sk#7), merge_max(ss_promo_sk#8), partial_count(distinct ss_sold_date_sk#9)] -Aggregate Attributes [12]: [count(1)#10, count(ss_sold_date_sk#9)#11, max(ss_sold_date_sk#9)#12, max(ss_sold_time_sk#1)#13, max(ss_item_sk#2)#14, max(ss_customer_sk#3)#15, max(ss_cdemo_sk#4)#16, max(ss_hdemo_sk#5)#17, max(ss_addr_sk#6)#18, max(ss_store_sk#7)#19, max(ss_promo_sk#8)#20, count(ss_sold_date_sk#9)#33] -Results [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#34] +Aggregate Attributes [12]: [count(1)#10, count(ss_sold_date_sk#9)#11, max(ss_sold_date_sk#9)#12, max(ss_sold_time_sk#1)#13, max(ss_item_sk#2)#14, max(ss_customer_sk#3)#15, max(ss_cdemo_sk#4)#16, max(ss_hdemo_sk#5)#17, max(ss_addr_sk#6)#18, max(ss_store_sk#7)#19, max(ss_promo_sk#8)#20, count(ss_sold_date_sk#9)#32] +Results [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#33] (7) Exchange -Input [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#34] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#35] +Input [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#33] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] (8) HashAggregate [codegen id : 3] -Input [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#34] +Input [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#33] Keys: [] Functions [12]: [count(1), count(ss_sold_date_sk#9), max(ss_sold_date_sk#9), max(ss_sold_time_sk#1), max(ss_item_sk#2), max(ss_customer_sk#3), max(ss_cdemo_sk#4), max(ss_hdemo_sk#5), max(ss_addr_sk#6), max(ss_store_sk#7), max(ss_promo_sk#8), count(distinct ss_sold_date_sk#9)] -Aggregate Attributes [12]: [count(1)#10, count(ss_sold_date_sk#9)#11, max(ss_sold_date_sk#9)#12, max(ss_sold_time_sk#1)#13, max(ss_item_sk#2)#14, max(ss_customer_sk#3)#15, max(ss_cdemo_sk#4)#16, max(ss_hdemo_sk#5)#17, max(ss_addr_sk#6)#18, max(ss_store_sk#7)#19, max(ss_promo_sk#8)#20, count(ss_sold_date_sk#9)#33] -Results [12]: [count(1)#10 AS total#36, count(ss_sold_date_sk#9)#11 AS not_null_total#37, count(ss_sold_date_sk#9)#33 AS unique_days#38, max(ss_sold_date_sk#9)#12 AS max_ss_sold_date_sk#39, max(ss_sold_time_sk#1)#13 AS max_ss_sold_time_sk#40, max(ss_item_sk#2)#14 AS max_ss_item_sk#41, max(ss_customer_sk#3)#15 AS max_ss_customer_sk#42, max(ss_cdemo_sk#4)#16 AS max_ss_cdemo_sk#43, max(ss_hdemo_sk#5)#17 AS max_ss_hdemo_sk#44, max(ss_addr_sk#6)#18 AS max_ss_addr_sk#45, max(ss_store_sk#7)#19 AS max_ss_store_sk#46, max(ss_promo_sk#8)#20 AS max_ss_promo_sk#47] +Aggregate Attributes [12]: [count(1)#10, count(ss_sold_date_sk#9)#11, max(ss_sold_date_sk#9)#12, max(ss_sold_time_sk#1)#13, max(ss_item_sk#2)#14, max(ss_customer_sk#3)#15, max(ss_cdemo_sk#4)#16, max(ss_hdemo_sk#5)#17, max(ss_addr_sk#6)#18, max(ss_store_sk#7)#19, max(ss_promo_sk#8)#20, count(ss_sold_date_sk#9)#32] +Results [12]: [count(1)#10 AS total#34, count(ss_sold_date_sk#9)#11 AS not_null_total#35, count(ss_sold_date_sk#9)#32 AS unique_days#36, max(ss_sold_date_sk#9)#12 AS max_ss_sold_date_sk#37, max(ss_sold_time_sk#1)#13 AS max_ss_sold_time_sk#38, max(ss_item_sk#2)#14 AS max_ss_item_sk#39, max(ss_customer_sk#3)#15 AS max_ss_customer_sk#40, max(ss_cdemo_sk#4)#16 AS max_ss_cdemo_sk#41, max(ss_hdemo_sk#5)#17 AS max_ss_hdemo_sk#42, max(ss_addr_sk#6)#18 AS max_ss_addr_sk#43, max(ss_store_sk#7)#19 AS max_ss_store_sk#44, max(ss_promo_sk#8)#20 AS max_ss_promo_sk#45] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/explain.txt index 1ebc9a69a3865..073a29fd22304 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/explain.txt @@ -27,7 +27,7 @@ Results [12]: [ss_sold_date_sk#9, count#21, count#22, max#23, max#24, max#25, ma (4) Exchange Input [12]: [ss_sold_date_sk#9, count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31] -Arguments: hashpartitioning(ss_sold_date_sk#9, 5), ENSURE_REQUIREMENTS, [id=#32] +Arguments: hashpartitioning(ss_sold_date_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) HashAggregate [codegen id : 2] Input [12]: [ss_sold_date_sk#9, count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31] @@ -40,17 +40,17 @@ Results [12]: [ss_sold_date_sk#9, count#21, count#22, max#23, max#24, max#25, ma Input [12]: [ss_sold_date_sk#9, count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31] Keys: [] Functions [12]: [merge_count(1), merge_count(ss_sold_date_sk#9), merge_max(ss_sold_date_sk#9), merge_max(ss_sold_time_sk#1), merge_max(ss_item_sk#2), merge_max(ss_customer_sk#3), merge_max(ss_cdemo_sk#4), merge_max(ss_hdemo_sk#5), merge_max(ss_addr_sk#6), merge_max(ss_store_sk#7), merge_max(ss_promo_sk#8), partial_count(distinct ss_sold_date_sk#9)] -Aggregate Attributes [12]: [count(1)#10, count(ss_sold_date_sk#9)#11, max(ss_sold_date_sk#9)#12, max(ss_sold_time_sk#1)#13, max(ss_item_sk#2)#14, max(ss_customer_sk#3)#15, max(ss_cdemo_sk#4)#16, max(ss_hdemo_sk#5)#17, max(ss_addr_sk#6)#18, max(ss_store_sk#7)#19, max(ss_promo_sk#8)#20, count(ss_sold_date_sk#9)#33] -Results [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#34] +Aggregate Attributes [12]: [count(1)#10, count(ss_sold_date_sk#9)#11, max(ss_sold_date_sk#9)#12, max(ss_sold_time_sk#1)#13, max(ss_item_sk#2)#14, max(ss_customer_sk#3)#15, max(ss_cdemo_sk#4)#16, max(ss_hdemo_sk#5)#17, max(ss_addr_sk#6)#18, max(ss_store_sk#7)#19, max(ss_promo_sk#8)#20, count(ss_sold_date_sk#9)#32] +Results [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#33] (7) Exchange -Input [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#34] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#35] +Input [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#33] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] (8) HashAggregate [codegen id : 3] -Input [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#34] +Input [12]: [count#21, count#22, max#23, max#24, max#25, max#26, max#27, max#28, max#29, max#30, max#31, count#33] Keys: [] Functions [12]: [count(1), count(ss_sold_date_sk#9), max(ss_sold_date_sk#9), max(ss_sold_time_sk#1), max(ss_item_sk#2), max(ss_customer_sk#3), max(ss_cdemo_sk#4), max(ss_hdemo_sk#5), max(ss_addr_sk#6), max(ss_store_sk#7), max(ss_promo_sk#8), count(distinct ss_sold_date_sk#9)] -Aggregate Attributes [12]: [count(1)#10, count(ss_sold_date_sk#9)#11, max(ss_sold_date_sk#9)#12, max(ss_sold_time_sk#1)#13, max(ss_item_sk#2)#14, max(ss_customer_sk#3)#15, max(ss_cdemo_sk#4)#16, max(ss_hdemo_sk#5)#17, max(ss_addr_sk#6)#18, max(ss_store_sk#7)#19, max(ss_promo_sk#8)#20, count(ss_sold_date_sk#9)#33] -Results [12]: [count(1)#10 AS total#36, count(ss_sold_date_sk#9)#11 AS not_null_total#37, count(ss_sold_date_sk#9)#33 AS unique_days#38, max(ss_sold_date_sk#9)#12 AS max_ss_sold_date_sk#39, max(ss_sold_time_sk#1)#13 AS max_ss_sold_time_sk#40, max(ss_item_sk#2)#14 AS max_ss_item_sk#41, max(ss_customer_sk#3)#15 AS max_ss_customer_sk#42, max(ss_cdemo_sk#4)#16 AS max_ss_cdemo_sk#43, max(ss_hdemo_sk#5)#17 AS max_ss_hdemo_sk#44, max(ss_addr_sk#6)#18 AS max_ss_addr_sk#45, max(ss_store_sk#7)#19 AS max_ss_store_sk#46, max(ss_promo_sk#8)#20 AS max_ss_promo_sk#47] +Aggregate Attributes [12]: [count(1)#10, count(ss_sold_date_sk#9)#11, max(ss_sold_date_sk#9)#12, max(ss_sold_time_sk#1)#13, max(ss_item_sk#2)#14, max(ss_customer_sk#3)#15, max(ss_cdemo_sk#4)#16, max(ss_hdemo_sk#5)#17, max(ss_addr_sk#6)#18, max(ss_store_sk#7)#19, max(ss_promo_sk#8)#20, count(ss_sold_date_sk#9)#32] +Results [12]: [count(1)#10 AS total#34, count(ss_sold_date_sk#9)#11 AS not_null_total#35, count(ss_sold_date_sk#9)#32 AS unique_days#36, max(ss_sold_date_sk#9)#12 AS max_ss_sold_date_sk#37, max(ss_sold_time_sk#1)#13 AS max_ss_sold_time_sk#38, max(ss_item_sk#2)#14 AS max_ss_item_sk#39, max(ss_customer_sk#3)#15 AS max_ss_customer_sk#40, max(ss_cdemo_sk#4)#16 AS max_ss_cdemo_sk#41, max(ss_hdemo_sk#5)#17 AS max_ss_hdemo_sk#42, max(ss_addr_sk#6)#18 AS max_ss_addr_sk#43, max(ss_store_sk#7)#19 AS max_ss_store_sk#44, max(ss_promo_sk#8)#20 AS max_ss_promo_sk#45] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/explain.txt index 0ac812675e8f5..abc24d2519c68 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/explain.txt @@ -80,18 +80,18 @@ Results [3]: [sr_customer_sk#1, sr_store_sk#2, sum#8] (8) Exchange Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#8] -Arguments: hashpartitioning(sr_customer_sk#1, sr_store_sk#2, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(sr_customer_sk#1, sr_store_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=1] (9) HashAggregate [codegen id : 8] Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#8] Keys [2]: [sr_customer_sk#1, sr_store_sk#2] Functions [1]: [sum(UnscaledValue(sr_return_amt#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#3))#10] -Results [3]: [sr_customer_sk#1 AS ctr_customer_sk#11, sr_store_sk#2 AS ctr_store_sk#12, MakeDecimal(sum(UnscaledValue(sr_return_amt#3))#10,17,2) AS ctr_total_return#13] +Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#3))#9] +Results [3]: [sr_customer_sk#1 AS ctr_customer_sk#10, sr_store_sk#2 AS ctr_store_sk#11, MakeDecimal(sum(UnscaledValue(sr_return_amt#3))#9,17,2) AS ctr_total_return#12] (10) Filter [codegen id : 8] -Input [3]: [ctr_customer_sk#11, ctr_store_sk#12, ctr_total_return#13] -Condition : isnotnull(ctr_total_return#13) +Input [3]: [ctr_customer_sk#10, ctr_store_sk#11, ctr_total_return#12] +Condition : isnotnull(ctr_total_return#12) (11) Scan parquet default.store_returns Output [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] @@ -124,128 +124,128 @@ Input [5]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_s Input [3]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3] Keys [2]: [sr_customer_sk#1, sr_store_sk#2] Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#3))] -Aggregate Attributes [1]: [sum#14] -Results [3]: [sr_customer_sk#1, sr_store_sk#2, sum#15] +Aggregate Attributes [1]: [sum#13] +Results [3]: [sr_customer_sk#1, sr_store_sk#2, sum#14] (18) Exchange -Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#15] -Arguments: hashpartitioning(sr_customer_sk#1, sr_store_sk#2, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#14] +Arguments: hashpartitioning(sr_customer_sk#1, sr_store_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=2] (19) HashAggregate [codegen id : 5] -Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#15] +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#14] Keys [2]: [sr_customer_sk#1, sr_store_sk#2] Functions [1]: [sum(UnscaledValue(sr_return_amt#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#3))#10] -Results [2]: [sr_store_sk#2 AS ctr_store_sk#12, MakeDecimal(sum(UnscaledValue(sr_return_amt#3))#10,17,2) AS ctr_total_return#13] +Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#3))#9] +Results [2]: [sr_store_sk#2 AS ctr_store_sk#11, MakeDecimal(sum(UnscaledValue(sr_return_amt#3))#9,17,2) AS ctr_total_return#12] (20) HashAggregate [codegen id : 5] -Input [2]: [ctr_store_sk#12, ctr_total_return#13] -Keys [1]: [ctr_store_sk#12] -Functions [1]: [partial_avg(ctr_total_return#13)] -Aggregate Attributes [2]: [sum#17, count#18] -Results [3]: [ctr_store_sk#12, sum#19, count#20] +Input [2]: [ctr_store_sk#11, ctr_total_return#12] +Keys [1]: [ctr_store_sk#11] +Functions [1]: [partial_avg(ctr_total_return#12)] +Aggregate Attributes [2]: [sum#15, count#16] +Results [3]: [ctr_store_sk#11, sum#17, count#18] (21) Exchange -Input [3]: [ctr_store_sk#12, sum#19, count#20] -Arguments: hashpartitioning(ctr_store_sk#12, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [3]: [ctr_store_sk#11, sum#17, count#18] +Arguments: hashpartitioning(ctr_store_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 6] -Input [3]: [ctr_store_sk#12, sum#19, count#20] -Keys [1]: [ctr_store_sk#12] -Functions [1]: [avg(ctr_total_return#13)] -Aggregate Attributes [1]: [avg(ctr_total_return#13)#22] -Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#13)#22) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#23, ctr_store_sk#12 AS ctr_store_sk#12#24] +Input [3]: [ctr_store_sk#11, sum#17, count#18] +Keys [1]: [ctr_store_sk#11] +Functions [1]: [avg(ctr_total_return#12)] +Aggregate Attributes [1]: [avg(ctr_total_return#12)#19] +Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#12)#19) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11 AS ctr_store_sk#11#21] (23) Filter [codegen id : 6] -Input [2]: [(avg(ctr_total_return) * 1.2)#23, ctr_store_sk#12#24] -Condition : isnotnull((avg(ctr_total_return) * 1.2)#23) +Input [2]: [(avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11#21] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#20) (24) BroadcastExchange -Input [2]: [(avg(ctr_total_return) * 1.2)#23, ctr_store_sk#12#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#25] +Input [2]: [(avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=4] (25) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ctr_store_sk#12] -Right keys [1]: [ctr_store_sk#12#24] -Join condition: (cast(ctr_total_return#13 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#23) +Left keys [1]: [ctr_store_sk#11] +Right keys [1]: [ctr_store_sk#11#21] +Join condition: (cast(ctr_total_return#12 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#20) (26) Project [codegen id : 8] -Output [2]: [ctr_customer_sk#11, ctr_store_sk#12] -Input [5]: [ctr_customer_sk#11, ctr_store_sk#12, ctr_total_return#13, (avg(ctr_total_return) * 1.2)#23, ctr_store_sk#12#24] +Output [2]: [ctr_customer_sk#10, ctr_store_sk#11] +Input [5]: [ctr_customer_sk#10, ctr_store_sk#11, ctr_total_return#12, (avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11#21] (27) Scan parquet default.store -Output [2]: [s_store_sk#26, s_state#27] +Output [2]: [s_store_sk#22, s_state#23] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 7] -Input [2]: [s_store_sk#26, s_state#27] +Input [2]: [s_store_sk#22, s_state#23] (29) Filter [codegen id : 7] -Input [2]: [s_store_sk#26, s_state#27] -Condition : ((isnotnull(s_state#27) AND (s_state#27 = TN)) AND isnotnull(s_store_sk#26)) +Input [2]: [s_store_sk#22, s_state#23] +Condition : ((isnotnull(s_state#23) AND (s_state#23 = TN)) AND isnotnull(s_store_sk#22)) (30) Project [codegen id : 7] -Output [1]: [s_store_sk#26] -Input [2]: [s_store_sk#26, s_state#27] +Output [1]: [s_store_sk#22] +Input [2]: [s_store_sk#22, s_state#23] (31) BroadcastExchange -Input [1]: [s_store_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28] +Input [1]: [s_store_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (32) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ctr_store_sk#12] -Right keys [1]: [s_store_sk#26] +Left keys [1]: [ctr_store_sk#11] +Right keys [1]: [s_store_sk#22] Join condition: None (33) Project [codegen id : 8] -Output [1]: [ctr_customer_sk#11] -Input [3]: [ctr_customer_sk#11, ctr_store_sk#12, s_store_sk#26] +Output [1]: [ctr_customer_sk#10] +Input [3]: [ctr_customer_sk#10, ctr_store_sk#11, s_store_sk#22] (34) Exchange -Input [1]: [ctr_customer_sk#11] -Arguments: hashpartitioning(ctr_customer_sk#11, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [1]: [ctr_customer_sk#10] +Arguments: hashpartitioning(ctr_customer_sk#10, 5), ENSURE_REQUIREMENTS, [plan_id=6] (35) Sort [codegen id : 9] -Input [1]: [ctr_customer_sk#11] -Arguments: [ctr_customer_sk#11 ASC NULLS FIRST], false, 0 +Input [1]: [ctr_customer_sk#10] +Arguments: [ctr_customer_sk#10 ASC NULLS FIRST], false, 0 (36) Scan parquet default.customer -Output [2]: [c_customer_sk#30, c_customer_id#31] +Output [2]: [c_customer_sk#24, c_customer_id#25] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (37) ColumnarToRow [codegen id : 10] -Input [2]: [c_customer_sk#30, c_customer_id#31] +Input [2]: [c_customer_sk#24, c_customer_id#25] (38) Filter [codegen id : 10] -Input [2]: [c_customer_sk#30, c_customer_id#31] -Condition : isnotnull(c_customer_sk#30) +Input [2]: [c_customer_sk#24, c_customer_id#25] +Condition : isnotnull(c_customer_sk#24) (39) Exchange -Input [2]: [c_customer_sk#30, c_customer_id#31] -Arguments: hashpartitioning(c_customer_sk#30, 5), ENSURE_REQUIREMENTS, [id=#32] +Input [2]: [c_customer_sk#24, c_customer_id#25] +Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [plan_id=7] (40) Sort [codegen id : 11] -Input [2]: [c_customer_sk#30, c_customer_id#31] -Arguments: [c_customer_sk#30 ASC NULLS FIRST], false, 0 +Input [2]: [c_customer_sk#24, c_customer_id#25] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (41) SortMergeJoin [codegen id : 12] -Left keys [1]: [ctr_customer_sk#11] -Right keys [1]: [c_customer_sk#30] +Left keys [1]: [ctr_customer_sk#10] +Right keys [1]: [c_customer_sk#24] Join condition: None (42) Project [codegen id : 12] -Output [1]: [c_customer_id#31] -Input [3]: [ctr_customer_sk#11, c_customer_sk#30, c_customer_id#31] +Output [1]: [c_customer_id#25] +Input [3]: [ctr_customer_sk#10, c_customer_sk#24, c_customer_id#25] (43) TakeOrderedAndProject -Input [1]: [c_customer_id#31] -Arguments: 100, [c_customer_id#31 ASC NULLS FIRST], [c_customer_id#31] +Input [1]: [c_customer_id#25] +Arguments: 100, [c_customer_id#25 ASC NULLS FIRST], [c_customer_id#25] ===== Subqueries ===== @@ -258,26 +258,26 @@ BroadcastExchange (48) (44) Scan parquet default.date_dim -Output [2]: [d_date_sk#6, d_year#33] +Output [2]: [d_date_sk#6, d_year#26] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (45) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#6, d_year#33] +Input [2]: [d_date_sk#6, d_year#26] (46) Filter [codegen id : 1] -Input [2]: [d_date_sk#6, d_year#33] -Condition : ((isnotnull(d_year#33) AND (d_year#33 = 2000)) AND isnotnull(d_date_sk#6)) +Input [2]: [d_date_sk#6, d_year#26] +Condition : ((isnotnull(d_year#26) AND (d_year#26 = 2000)) AND isnotnull(d_date_sk#6)) (47) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [2]: [d_date_sk#6, d_year#33] +Input [2]: [d_date_sk#6, d_year#26] (48) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#34] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] Subquery:2 Hosting operator id = 11 Hosting Expression = sr_returned_date_sk#4 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt index bfdc1e926597b..4d620e81d9998 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt @@ -77,18 +77,18 @@ Results [3]: [sr_customer_sk#1, sr_store_sk#2, sum#8] (8) Exchange Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#8] -Arguments: hashpartitioning(sr_customer_sk#1, sr_store_sk#2, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(sr_customer_sk#1, sr_store_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=1] (9) HashAggregate [codegen id : 9] Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#8] Keys [2]: [sr_customer_sk#1, sr_store_sk#2] Functions [1]: [sum(UnscaledValue(sr_return_amt#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#3))#10] -Results [3]: [sr_customer_sk#1 AS ctr_customer_sk#11, sr_store_sk#2 AS ctr_store_sk#12, MakeDecimal(sum(UnscaledValue(sr_return_amt#3))#10,17,2) AS ctr_total_return#13] +Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#3))#9] +Results [3]: [sr_customer_sk#1 AS ctr_customer_sk#10, sr_store_sk#2 AS ctr_store_sk#11, MakeDecimal(sum(UnscaledValue(sr_return_amt#3))#9,17,2) AS ctr_total_return#12] (10) Filter [codegen id : 9] -Input [3]: [ctr_customer_sk#11, ctr_store_sk#12, ctr_total_return#13] -Condition : isnotnull(ctr_total_return#13) +Input [3]: [ctr_customer_sk#10, ctr_store_sk#11, ctr_total_return#12] +Condition : isnotnull(ctr_total_return#12) (11) Scan parquet default.store_returns Output [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4] @@ -121,116 +121,116 @@ Input [5]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_s Input [3]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3] Keys [2]: [sr_customer_sk#1, sr_store_sk#2] Functions [1]: [partial_sum(UnscaledValue(sr_return_amt#3))] -Aggregate Attributes [1]: [sum#14] -Results [3]: [sr_customer_sk#1, sr_store_sk#2, sum#15] +Aggregate Attributes [1]: [sum#13] +Results [3]: [sr_customer_sk#1, sr_store_sk#2, sum#14] (18) Exchange -Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#15] -Arguments: hashpartitioning(sr_customer_sk#1, sr_store_sk#2, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#14] +Arguments: hashpartitioning(sr_customer_sk#1, sr_store_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=2] (19) HashAggregate [codegen id : 5] -Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#15] +Input [3]: [sr_customer_sk#1, sr_store_sk#2, sum#14] Keys [2]: [sr_customer_sk#1, sr_store_sk#2] Functions [1]: [sum(UnscaledValue(sr_return_amt#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#3))#10] -Results [2]: [sr_store_sk#2 AS ctr_store_sk#12, MakeDecimal(sum(UnscaledValue(sr_return_amt#3))#10,17,2) AS ctr_total_return#13] +Aggregate Attributes [1]: [sum(UnscaledValue(sr_return_amt#3))#9] +Results [2]: [sr_store_sk#2 AS ctr_store_sk#11, MakeDecimal(sum(UnscaledValue(sr_return_amt#3))#9,17,2) AS ctr_total_return#12] (20) HashAggregate [codegen id : 5] -Input [2]: [ctr_store_sk#12, ctr_total_return#13] -Keys [1]: [ctr_store_sk#12] -Functions [1]: [partial_avg(ctr_total_return#13)] -Aggregate Attributes [2]: [sum#17, count#18] -Results [3]: [ctr_store_sk#12, sum#19, count#20] +Input [2]: [ctr_store_sk#11, ctr_total_return#12] +Keys [1]: [ctr_store_sk#11] +Functions [1]: [partial_avg(ctr_total_return#12)] +Aggregate Attributes [2]: [sum#15, count#16] +Results [3]: [ctr_store_sk#11, sum#17, count#18] (21) Exchange -Input [3]: [ctr_store_sk#12, sum#19, count#20] -Arguments: hashpartitioning(ctr_store_sk#12, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [3]: [ctr_store_sk#11, sum#17, count#18] +Arguments: hashpartitioning(ctr_store_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 6] -Input [3]: [ctr_store_sk#12, sum#19, count#20] -Keys [1]: [ctr_store_sk#12] -Functions [1]: [avg(ctr_total_return#13)] -Aggregate Attributes [1]: [avg(ctr_total_return#13)#22] -Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#13)#22) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#23, ctr_store_sk#12 AS ctr_store_sk#12#24] +Input [3]: [ctr_store_sk#11, sum#17, count#18] +Keys [1]: [ctr_store_sk#11] +Functions [1]: [avg(ctr_total_return#12)] +Aggregate Attributes [1]: [avg(ctr_total_return#12)#19] +Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#12)#19) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11 AS ctr_store_sk#11#21] (23) Filter [codegen id : 6] -Input [2]: [(avg(ctr_total_return) * 1.2)#23, ctr_store_sk#12#24] -Condition : isnotnull((avg(ctr_total_return) * 1.2)#23) +Input [2]: [(avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11#21] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#20) (24) BroadcastExchange -Input [2]: [(avg(ctr_total_return) * 1.2)#23, ctr_store_sk#12#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#25] +Input [2]: [(avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=4] (25) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ctr_store_sk#12] -Right keys [1]: [ctr_store_sk#12#24] -Join condition: (cast(ctr_total_return#13 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#23) +Left keys [1]: [ctr_store_sk#11] +Right keys [1]: [ctr_store_sk#11#21] +Join condition: (cast(ctr_total_return#12 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#20) (26) Project [codegen id : 9] -Output [2]: [ctr_customer_sk#11, ctr_store_sk#12] -Input [5]: [ctr_customer_sk#11, ctr_store_sk#12, ctr_total_return#13, (avg(ctr_total_return) * 1.2)#23, ctr_store_sk#12#24] +Output [2]: [ctr_customer_sk#10, ctr_store_sk#11] +Input [5]: [ctr_customer_sk#10, ctr_store_sk#11, ctr_total_return#12, (avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11#21] (27) Scan parquet default.store -Output [2]: [s_store_sk#26, s_state#27] +Output [2]: [s_store_sk#22, s_state#23] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 7] -Input [2]: [s_store_sk#26, s_state#27] +Input [2]: [s_store_sk#22, s_state#23] (29) Filter [codegen id : 7] -Input [2]: [s_store_sk#26, s_state#27] -Condition : ((isnotnull(s_state#27) AND (s_state#27 = TN)) AND isnotnull(s_store_sk#26)) +Input [2]: [s_store_sk#22, s_state#23] +Condition : ((isnotnull(s_state#23) AND (s_state#23 = TN)) AND isnotnull(s_store_sk#22)) (30) Project [codegen id : 7] -Output [1]: [s_store_sk#26] -Input [2]: [s_store_sk#26, s_state#27] +Output [1]: [s_store_sk#22] +Input [2]: [s_store_sk#22, s_state#23] (31) BroadcastExchange -Input [1]: [s_store_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28] +Input [1]: [s_store_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (32) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ctr_store_sk#12] -Right keys [1]: [s_store_sk#26] +Left keys [1]: [ctr_store_sk#11] +Right keys [1]: [s_store_sk#22] Join condition: None (33) Project [codegen id : 9] -Output [1]: [ctr_customer_sk#11] -Input [3]: [ctr_customer_sk#11, ctr_store_sk#12, s_store_sk#26] +Output [1]: [ctr_customer_sk#10] +Input [3]: [ctr_customer_sk#10, ctr_store_sk#11, s_store_sk#22] (34) Scan parquet default.customer -Output [2]: [c_customer_sk#29, c_customer_id#30] +Output [2]: [c_customer_sk#24, c_customer_id#25] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 8] -Input [2]: [c_customer_sk#29, c_customer_id#30] +Input [2]: [c_customer_sk#24, c_customer_id#25] (36) Filter [codegen id : 8] -Input [2]: [c_customer_sk#29, c_customer_id#30] -Condition : isnotnull(c_customer_sk#29) +Input [2]: [c_customer_sk#24, c_customer_id#25] +Condition : isnotnull(c_customer_sk#24) (37) BroadcastExchange -Input [2]: [c_customer_sk#29, c_customer_id#30] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] +Input [2]: [c_customer_sk#24, c_customer_id#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] (38) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ctr_customer_sk#11] -Right keys [1]: [c_customer_sk#29] +Left keys [1]: [ctr_customer_sk#10] +Right keys [1]: [c_customer_sk#24] Join condition: None (39) Project [codegen id : 9] -Output [1]: [c_customer_id#30] -Input [3]: [ctr_customer_sk#11, c_customer_sk#29, c_customer_id#30] +Output [1]: [c_customer_id#25] +Input [3]: [ctr_customer_sk#10, c_customer_sk#24, c_customer_id#25] (40) TakeOrderedAndProject -Input [1]: [c_customer_id#30] -Arguments: 100, [c_customer_id#30 ASC NULLS FIRST], [c_customer_id#30] +Input [1]: [c_customer_id#25] +Arguments: 100, [c_customer_id#25 ASC NULLS FIRST], [c_customer_id#25] ===== Subqueries ===== @@ -243,26 +243,26 @@ BroadcastExchange (45) (41) Scan parquet default.date_dim -Output [2]: [d_date_sk#6, d_year#32] +Output [2]: [d_date_sk#6, d_year#26] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (42) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#6, d_year#32] +Input [2]: [d_date_sk#6, d_year#26] (43) Filter [codegen id : 1] -Input [2]: [d_date_sk#6, d_year#32] -Condition : ((isnotnull(d_year#32) AND (d_year#32 = 2000)) AND isnotnull(d_date_sk#6)) +Input [2]: [d_date_sk#6, d_year#26] +Condition : ((isnotnull(d_year#26) AND (d_year#26 = 2000)) AND isnotnull(d_date_sk#6)) (44) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [2]: [d_date_sk#6, d_year#32] +Input [2]: [d_date_sk#6, d_year#26] (45) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] Subquery:2 Hosting operator id = 11 Hosting Expression = sr_returned_date_sk#4 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/explain.txt index ef732e7c1dd40..a442e8069e647 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/explain.txt @@ -68,115 +68,115 @@ Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) (4) Exchange Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] -Arguments: hashpartitioning(c_customer_sk#3, 5), ENSURE_REQUIREMENTS, [id=#6] +Arguments: hashpartitioning(c_customer_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] Arguments: [c_customer_sk#3 ASC NULLS FIRST], false, 0 (6) Scan parquet default.store_sales -Output [2]: [ss_customer_sk#7, ss_sold_date_sk#8] +Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#8), dynamicpruningexpression(ss_sold_date_sk#8 IN dynamicpruning#9)] +PartitionFilters: [isnotnull(ss_sold_date_sk#7), dynamicpruningexpression(ss_sold_date_sk#7 IN dynamicpruning#8)] ReadSchema: struct (7) ColumnarToRow [codegen id : 4] -Input [2]: [ss_customer_sk#7, ss_sold_date_sk#8] +Input [2]: [ss_customer_sk#6, ss_sold_date_sk#7] (8) ReusedExchange [Reuses operator id: 56] -Output [1]: [d_date_sk#10] +Output [1]: [d_date_sk#9] (9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#10] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#9] Join condition: None (10) Project [codegen id : 4] -Output [1]: [ss_customer_sk#7] -Input [3]: [ss_customer_sk#7, ss_sold_date_sk#8, d_date_sk#10] +Output [1]: [ss_customer_sk#6] +Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#9] (11) Exchange -Input [1]: [ss_customer_sk#7] -Arguments: hashpartitioning(ss_customer_sk#7, 5), ENSURE_REQUIREMENTS, [id=#11] +Input [1]: [ss_customer_sk#6] +Arguments: hashpartitioning(ss_customer_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) Sort [codegen id : 5] -Input [1]: [ss_customer_sk#7] -Arguments: [ss_customer_sk#7 ASC NULLS FIRST], false, 0 +Input [1]: [ss_customer_sk#6] +Arguments: [ss_customer_sk#6 ASC NULLS FIRST], false, 0 (13) SortMergeJoin [codegen id : 6] Left keys [1]: [c_customer_sk#3] -Right keys [1]: [ss_customer_sk#7] +Right keys [1]: [ss_customer_sk#6] Join condition: None (14) Scan parquet default.web_sales -Output [2]: [ws_bill_customer_sk#12, ws_sold_date_sk#13] +Output [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#13), dynamicpruningexpression(ws_sold_date_sk#13 IN dynamicpruning#9)] +PartitionFilters: [isnotnull(ws_sold_date_sk#11), dynamicpruningexpression(ws_sold_date_sk#11 IN dynamicpruning#8)] ReadSchema: struct (15) ColumnarToRow [codegen id : 8] -Input [2]: [ws_bill_customer_sk#12, ws_sold_date_sk#13] +Input [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] (16) ReusedExchange [Reuses operator id: 56] -Output [1]: [d_date_sk#14] +Output [1]: [d_date_sk#12] (17) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_sold_date_sk#13] -Right keys [1]: [d_date_sk#14] +Left keys [1]: [ws_sold_date_sk#11] +Right keys [1]: [d_date_sk#12] Join condition: None (18) Project [codegen id : 8] -Output [1]: [ws_bill_customer_sk#12] -Input [3]: [ws_bill_customer_sk#12, ws_sold_date_sk#13, d_date_sk#14] +Output [1]: [ws_bill_customer_sk#10] +Input [3]: [ws_bill_customer_sk#10, ws_sold_date_sk#11, d_date_sk#12] (19) Exchange -Input [1]: [ws_bill_customer_sk#12] -Arguments: hashpartitioning(ws_bill_customer_sk#12, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [1]: [ws_bill_customer_sk#10] +Arguments: hashpartitioning(ws_bill_customer_sk#10, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) Sort [codegen id : 9] -Input [1]: [ws_bill_customer_sk#12] -Arguments: [ws_bill_customer_sk#12 ASC NULLS FIRST], false, 0 +Input [1]: [ws_bill_customer_sk#10] +Arguments: [ws_bill_customer_sk#10 ASC NULLS FIRST], false, 0 (21) SortMergeJoin [codegen id : 10] Left keys [1]: [c_customer_sk#3] -Right keys [1]: [ws_bill_customer_sk#12] +Right keys [1]: [ws_bill_customer_sk#10] Join condition: None (22) Scan parquet default.catalog_sales -Output [2]: [cs_ship_customer_sk#16, cs_sold_date_sk#17] +Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#17), dynamicpruningexpression(cs_sold_date_sk#17 IN dynamicpruning#9)] +PartitionFilters: [isnotnull(cs_sold_date_sk#14), dynamicpruningexpression(cs_sold_date_sk#14 IN dynamicpruning#8)] ReadSchema: struct (23) ColumnarToRow [codegen id : 12] -Input [2]: [cs_ship_customer_sk#16, cs_sold_date_sk#17] +Input [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] (24) ReusedExchange [Reuses operator id: 56] -Output [1]: [d_date_sk#18] +Output [1]: [d_date_sk#15] (25) BroadcastHashJoin [codegen id : 12] -Left keys [1]: [cs_sold_date_sk#17] -Right keys [1]: [d_date_sk#18] +Left keys [1]: [cs_sold_date_sk#14] +Right keys [1]: [d_date_sk#15] Join condition: None (26) Project [codegen id : 12] -Output [1]: [cs_ship_customer_sk#16] -Input [3]: [cs_ship_customer_sk#16, cs_sold_date_sk#17, d_date_sk#18] +Output [1]: [cs_ship_customer_sk#13] +Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15] (27) Exchange -Input [1]: [cs_ship_customer_sk#16] -Arguments: hashpartitioning(cs_ship_customer_sk#16, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [1]: [cs_ship_customer_sk#13] +Arguments: hashpartitioning(cs_ship_customer_sk#13, 5), ENSURE_REQUIREMENTS, [plan_id=4] (28) Sort [codegen id : 13] -Input [1]: [cs_ship_customer_sk#16] -Arguments: [cs_ship_customer_sk#16 ASC NULLS FIRST], false, 0 +Input [1]: [cs_ship_customer_sk#13] +Arguments: [cs_ship_customer_sk#13 ASC NULLS FIRST], false, 0 (29) SortMergeJoin [codegen id : 15] Left keys [1]: [c_customer_sk#3] -Right keys [1]: [cs_ship_customer_sk#16] +Right keys [1]: [cs_ship_customer_sk#13] Join condition: None (30) Filter [codegen id : 15] @@ -188,100 +188,100 @@ Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] (32) Scan parquet default.customer_address -Output [2]: [ca_address_sk#20, ca_county#21] +Output [2]: [ca_address_sk#16, ca_county#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_county, [Dona Ana County,Jefferson County,La Porte County,Rush County,Toole County]), IsNotNull(ca_address_sk)] ReadSchema: struct (33) ColumnarToRow [codegen id : 14] -Input [2]: [ca_address_sk#20, ca_county#21] +Input [2]: [ca_address_sk#16, ca_county#17] (34) Filter [codegen id : 14] -Input [2]: [ca_address_sk#20, ca_county#21] -Condition : (ca_county#21 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) AND isnotnull(ca_address_sk#20)) +Input [2]: [ca_address_sk#16, ca_county#17] +Condition : (ca_county#17 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) AND isnotnull(ca_address_sk#16)) (35) Project [codegen id : 14] -Output [1]: [ca_address_sk#20] -Input [2]: [ca_address_sk#20, ca_county#21] +Output [1]: [ca_address_sk#16] +Input [2]: [ca_address_sk#16, ca_county#17] (36) BroadcastExchange -Input [1]: [ca_address_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Input [1]: [ca_address_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (37) BroadcastHashJoin [codegen id : 15] Left keys [1]: [c_current_addr_sk#5] -Right keys [1]: [ca_address_sk#20] +Right keys [1]: [ca_address_sk#16] Join condition: None (38) Project [codegen id : 15] Output [1]: [c_current_cdemo_sk#4] -Input [3]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#20] +Input [3]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#16] (39) Exchange Input [1]: [c_current_cdemo_sk#4] -Arguments: hashpartitioning(c_current_cdemo_sk#4, 5), ENSURE_REQUIREMENTS, [id=#23] +Arguments: hashpartitioning(c_current_cdemo_sk#4, 5), ENSURE_REQUIREMENTS, [plan_id=6] (40) Sort [codegen id : 16] Input [1]: [c_current_cdemo_sk#4] Arguments: [c_current_cdemo_sk#4 ASC NULLS FIRST], false, 0 (41) Scan parquet default.customer_demographics -Output [9]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Output [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (42) ColumnarToRow [codegen id : 17] -Input [9]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] (43) Filter [codegen id : 17] -Input [9]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] -Condition : isnotnull(cd_demo_sk#24) +Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Condition : isnotnull(cd_demo_sk#18) (44) Exchange -Input [9]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] -Arguments: hashpartitioning(cd_demo_sk#24, 5), ENSURE_REQUIREMENTS, [id=#33] +Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Arguments: hashpartitioning(cd_demo_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=7] (45) Sort [codegen id : 18] -Input [9]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] -Arguments: [cd_demo_sk#24 ASC NULLS FIRST], false, 0 +Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Arguments: [cd_demo_sk#18 ASC NULLS FIRST], false, 0 (46) SortMergeJoin [codegen id : 19] Left keys [1]: [c_current_cdemo_sk#4] -Right keys [1]: [cd_demo_sk#24] +Right keys [1]: [cd_demo_sk#18] Join condition: None (47) Project [codegen id : 19] -Output [8]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] -Input [10]: [c_current_cdemo_sk#4, cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Output [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Input [10]: [c_current_cdemo_sk#4, cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] (48) HashAggregate [codegen id : 19] -Input [8]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] -Keys [8]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Input [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Keys [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#34] -Results [9]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32, count#35] +Aggregate Attributes [1]: [count#27] +Results [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28] (49) Exchange -Input [9]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32, count#35] -Arguments: hashpartitioning(cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28] +Arguments: hashpartitioning(cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, 5), ENSURE_REQUIREMENTS, [plan_id=8] (50) HashAggregate [codegen id : 20] -Input [9]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32, count#35] -Keys [8]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cd_purchase_estimate#28, cd_credit_rating#29, cd_dep_count#30, cd_dep_employed_count#31, cd_dep_college_count#32] +Input [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28] +Keys [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#37] -Results [14]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, count(1)#37 AS cnt1#38, cd_purchase_estimate#28, count(1)#37 AS cnt2#39, cd_credit_rating#29, count(1)#37 AS cnt3#40, cd_dep_count#30, count(1)#37 AS cnt4#41, cd_dep_employed_count#31, count(1)#37 AS cnt5#42, cd_dep_college_count#32, count(1)#37 AS cnt6#43] +Aggregate Attributes [1]: [count(1)#29] +Results [14]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, count(1)#29 AS cnt1#30, cd_purchase_estimate#22, count(1)#29 AS cnt2#31, cd_credit_rating#23, count(1)#29 AS cnt3#32, cd_dep_count#24, count(1)#29 AS cnt4#33, cd_dep_employed_count#25, count(1)#29 AS cnt5#34, cd_dep_college_count#26, count(1)#29 AS cnt6#35] (51) TakeOrderedAndProject -Input [14]: [cd_gender#25, cd_marital_status#26, cd_education_status#27, cnt1#38, cd_purchase_estimate#28, cnt2#39, cd_credit_rating#29, cnt3#40, cd_dep_count#30, cnt4#41, cd_dep_employed_count#31, cnt5#42, cd_dep_college_count#32, cnt6#43] -Arguments: 100, [cd_gender#25 ASC NULLS FIRST, cd_marital_status#26 ASC NULLS FIRST, cd_education_status#27 ASC NULLS FIRST, cd_purchase_estimate#28 ASC NULLS FIRST, cd_credit_rating#29 ASC NULLS FIRST, cd_dep_count#30 ASC NULLS FIRST, cd_dep_employed_count#31 ASC NULLS FIRST, cd_dep_college_count#32 ASC NULLS FIRST], [cd_gender#25, cd_marital_status#26, cd_education_status#27, cnt1#38, cd_purchase_estimate#28, cnt2#39, cd_credit_rating#29, cnt3#40, cd_dep_count#30, cnt4#41, cd_dep_employed_count#31, cnt5#42, cd_dep_college_count#32, cnt6#43] +Input [14]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cnt1#30, cd_purchase_estimate#22, cnt2#31, cd_credit_rating#23, cnt3#32, cd_dep_count#24, cnt4#33, cd_dep_employed_count#25, cnt5#34, cd_dep_college_count#26, cnt6#35] +Arguments: 100, [cd_gender#19 ASC NULLS FIRST, cd_marital_status#20 ASC NULLS FIRST, cd_education_status#21 ASC NULLS FIRST, cd_purchase_estimate#22 ASC NULLS FIRST, cd_credit_rating#23 ASC NULLS FIRST, cd_dep_count#24 ASC NULLS FIRST, cd_dep_employed_count#25 ASC NULLS FIRST, cd_dep_college_count#26 ASC NULLS FIRST], [cd_gender#19, cd_marital_status#20, cd_education_status#21, cnt1#30, cd_purchase_estimate#22, cnt2#31, cd_credit_rating#23, cnt3#32, cd_dep_count#24, cnt4#33, cd_dep_employed_count#25, cnt5#34, cd_dep_college_count#26, cnt6#35] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#8 IN dynamicpruning#9 +Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8 BroadcastExchange (56) +- * Project (55) +- * Filter (54) @@ -290,29 +290,29 @@ BroadcastExchange (56) (52) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#44, d_moy#45] +Output [3]: [d_date_sk#9, d_year#36, d_moy#37] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThanOrEqual(d_moy,4), IsNotNull(d_date_sk)] ReadSchema: struct (53) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#44, d_moy#45] +Input [3]: [d_date_sk#9, d_year#36, d_moy#37] (54) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#44, d_moy#45] -Condition : (((((isnotnull(d_year#44) AND isnotnull(d_moy#45)) AND (d_year#44 = 2002)) AND (d_moy#45 >= 1)) AND (d_moy#45 <= 4)) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#9, d_year#36, d_moy#37] +Condition : (((((isnotnull(d_year#36) AND isnotnull(d_moy#37)) AND (d_year#36 = 2002)) AND (d_moy#37 >= 1)) AND (d_moy#37 <= 4)) AND isnotnull(d_date_sk#9)) (55) Project [codegen id : 1] -Output [1]: [d_date_sk#10] -Input [3]: [d_date_sk#10, d_year#44, d_moy#45] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#36, d_moy#37] (56) BroadcastExchange -Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#46] +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] -Subquery:2 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#13 IN dynamicpruning#9 +Subquery:2 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#11 IN dynamicpruning#8 -Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#17 IN dynamicpruning#9 +Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#14 IN dynamicpruning#8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt index 80c23b3f113a0..2496ee87e6c25 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt @@ -82,7 +82,7 @@ Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#9] (9) BroadcastExchange Input [1]: [ss_customer_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (10) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#3] @@ -90,65 +90,65 @@ Right keys [1]: [ss_customer_sk#6] Join condition: None (11) Scan parquet default.web_sales -Output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Output [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#12), dynamicpruningexpression(ws_sold_date_sk#12 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(ws_sold_date_sk#11), dynamicpruningexpression(ws_sold_date_sk#11 IN dynamicpruning#8)] ReadSchema: struct (12) ColumnarToRow [codegen id : 4] -Input [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Input [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] (13) ReusedExchange [Reuses operator id: 48] -Output [1]: [d_date_sk#13] +Output [1]: [d_date_sk#12] (14) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ws_sold_date_sk#12] -Right keys [1]: [d_date_sk#13] +Left keys [1]: [ws_sold_date_sk#11] +Right keys [1]: [d_date_sk#12] Join condition: None (15) Project [codegen id : 4] -Output [1]: [ws_bill_customer_sk#11] -Input [3]: [ws_bill_customer_sk#11, ws_sold_date_sk#12, d_date_sk#13] +Output [1]: [ws_bill_customer_sk#10] +Input [3]: [ws_bill_customer_sk#10, ws_sold_date_sk#11, d_date_sk#12] (16) BroadcastExchange -Input [1]: [ws_bill_customer_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Input [1]: [ws_bill_customer_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#3] -Right keys [1]: [ws_bill_customer_sk#11] +Right keys [1]: [ws_bill_customer_sk#10] Join condition: None (18) Scan parquet default.catalog_sales -Output [2]: [cs_ship_customer_sk#15, cs_sold_date_sk#16] +Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#16), dynamicpruningexpression(cs_sold_date_sk#16 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(cs_sold_date_sk#14), dynamicpruningexpression(cs_sold_date_sk#14 IN dynamicpruning#8)] ReadSchema: struct (19) ColumnarToRow [codegen id : 6] -Input [2]: [cs_ship_customer_sk#15, cs_sold_date_sk#16] +Input [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] (20) ReusedExchange [Reuses operator id: 48] -Output [1]: [d_date_sk#17] +Output [1]: [d_date_sk#15] (21) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_sold_date_sk#16] -Right keys [1]: [d_date_sk#17] +Left keys [1]: [cs_sold_date_sk#14] +Right keys [1]: [d_date_sk#15] Join condition: None (22) Project [codegen id : 6] -Output [1]: [cs_ship_customer_sk#15] -Input [3]: [cs_ship_customer_sk#15, cs_sold_date_sk#16, d_date_sk#17] +Output [1]: [cs_ship_customer_sk#13] +Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15] (23) BroadcastExchange -Input [1]: [cs_ship_customer_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [1]: [cs_ship_customer_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#3] -Right keys [1]: [cs_ship_customer_sk#15] +Right keys [1]: [cs_ship_customer_sk#13] Join condition: None (25) Filter [codegen id : 9] @@ -160,84 +160,84 @@ Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] (27) Scan parquet default.customer_address -Output [2]: [ca_address_sk#19, ca_county#20] +Output [2]: [ca_address_sk#16, ca_county#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_county, [Dona Ana County,Jefferson County,La Porte County,Rush County,Toole County]), IsNotNull(ca_address_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 7] -Input [2]: [ca_address_sk#19, ca_county#20] +Input [2]: [ca_address_sk#16, ca_county#17] (29) Filter [codegen id : 7] -Input [2]: [ca_address_sk#19, ca_county#20] -Condition : (ca_county#20 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) AND isnotnull(ca_address_sk#19)) +Input [2]: [ca_address_sk#16, ca_county#17] +Condition : (ca_county#17 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) AND isnotnull(ca_address_sk#16)) (30) Project [codegen id : 7] -Output [1]: [ca_address_sk#19] -Input [2]: [ca_address_sk#19, ca_county#20] +Output [1]: [ca_address_sk#16] +Input [2]: [ca_address_sk#16, ca_county#17] (31) BroadcastExchange -Input [1]: [ca_address_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] +Input [1]: [ca_address_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (32) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_current_addr_sk#5] -Right keys [1]: [ca_address_sk#19] +Right keys [1]: [ca_address_sk#16] Join condition: None (33) Project [codegen id : 9] Output [1]: [c_current_cdemo_sk#4] -Input [3]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#19] +Input [3]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#16] (34) Scan parquet default.customer_demographics -Output [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Output [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 8] -Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] (36) Filter [codegen id : 8] -Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Condition : isnotnull(cd_demo_sk#22) +Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Condition : isnotnull(cd_demo_sk#18) (37) BroadcastExchange -Input [9]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] +Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (38) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_current_cdemo_sk#4] -Right keys [1]: [cd_demo_sk#22] +Right keys [1]: [cd_demo_sk#18] Join condition: None (39) Project [codegen id : 9] -Output [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Input [10]: [c_current_cdemo_sk#4, cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Output [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Input [10]: [c_current_cdemo_sk#4, cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] (40) HashAggregate [codegen id : 9] -Input [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Keys [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Input [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Keys [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#32] -Results [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] +Aggregate Attributes [1]: [count#27] +Results [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28] (41) Exchange -Input [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] -Arguments: hashpartitioning(cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28] +Arguments: hashpartitioning(cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, 5), ENSURE_REQUIREMENTS, [plan_id=6] (42) HashAggregate [codegen id : 10] -Input [9]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#33] -Keys [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Input [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28] +Keys [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#35] -Results [14]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, count(1)#35 AS cnt1#36, cd_purchase_estimate#26, count(1)#35 AS cnt2#37, cd_credit_rating#27, count(1)#35 AS cnt3#38, cd_dep_count#28, count(1)#35 AS cnt4#39, cd_dep_employed_count#29, count(1)#35 AS cnt5#40, cd_dep_college_count#30, count(1)#35 AS cnt6#41] +Aggregate Attributes [1]: [count(1)#29] +Results [14]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, count(1)#29 AS cnt1#30, cd_purchase_estimate#22, count(1)#29 AS cnt2#31, cd_credit_rating#23, count(1)#29 AS cnt3#32, cd_dep_count#24, count(1)#29 AS cnt4#33, cd_dep_employed_count#25, count(1)#29 AS cnt5#34, cd_dep_college_count#26, count(1)#29 AS cnt6#35] (43) TakeOrderedAndProject -Input [14]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cnt1#36, cd_purchase_estimate#26, cnt2#37, cd_credit_rating#27, cnt3#38, cd_dep_count#28, cnt4#39, cd_dep_employed_count#29, cnt5#40, cd_dep_college_count#30, cnt6#41] -Arguments: 100, [cd_gender#23 ASC NULLS FIRST, cd_marital_status#24 ASC NULLS FIRST, cd_education_status#25 ASC NULLS FIRST, cd_purchase_estimate#26 ASC NULLS FIRST, cd_credit_rating#27 ASC NULLS FIRST, cd_dep_count#28 ASC NULLS FIRST, cd_dep_employed_count#29 ASC NULLS FIRST, cd_dep_college_count#30 ASC NULLS FIRST], [cd_gender#23, cd_marital_status#24, cd_education_status#25, cnt1#36, cd_purchase_estimate#26, cnt2#37, cd_credit_rating#27, cnt3#38, cd_dep_count#28, cnt4#39, cd_dep_employed_count#29, cnt5#40, cd_dep_college_count#30, cnt6#41] +Input [14]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cnt1#30, cd_purchase_estimate#22, cnt2#31, cd_credit_rating#23, cnt3#32, cd_dep_count#24, cnt4#33, cd_dep_employed_count#25, cnt5#34, cd_dep_college_count#26, cnt6#35] +Arguments: 100, [cd_gender#19 ASC NULLS FIRST, cd_marital_status#20 ASC NULLS FIRST, cd_education_status#21 ASC NULLS FIRST, cd_purchase_estimate#22 ASC NULLS FIRST, cd_credit_rating#23 ASC NULLS FIRST, cd_dep_count#24 ASC NULLS FIRST, cd_dep_employed_count#25 ASC NULLS FIRST, cd_dep_college_count#26 ASC NULLS FIRST], [cd_gender#19, cd_marital_status#20, cd_education_status#21, cnt1#30, cd_purchase_estimate#22, cnt2#31, cd_credit_rating#23, cnt3#32, cd_dep_count#24, cnt4#33, cd_dep_employed_count#25, cnt5#34, cd_dep_college_count#26, cnt6#35] ===== Subqueries ===== @@ -250,29 +250,29 @@ BroadcastExchange (48) (44) Scan parquet default.date_dim -Output [3]: [d_date_sk#9, d_year#42, d_moy#43] +Output [3]: [d_date_sk#9, d_year#36, d_moy#37] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThanOrEqual(d_moy,4), IsNotNull(d_date_sk)] ReadSchema: struct (45) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#9, d_year#42, d_moy#43] +Input [3]: [d_date_sk#9, d_year#36, d_moy#37] (46) Filter [codegen id : 1] -Input [3]: [d_date_sk#9, d_year#42, d_moy#43] -Condition : (((((isnotnull(d_year#42) AND isnotnull(d_moy#43)) AND (d_year#42 = 2002)) AND (d_moy#43 >= 1)) AND (d_moy#43 <= 4)) AND isnotnull(d_date_sk#9)) +Input [3]: [d_date_sk#9, d_year#36, d_moy#37] +Condition : (((((isnotnull(d_year#36) AND isnotnull(d_moy#37)) AND (d_year#36 = 2002)) AND (d_moy#37 >= 1)) AND (d_moy#37 <= 4)) AND isnotnull(d_date_sk#9)) (47) Project [codegen id : 1] Output [1]: [d_date_sk#9] -Input [3]: [d_date_sk#9, d_year#42, d_moy#43] +Input [3]: [d_date_sk#9, d_year#36, d_moy#37] (48) BroadcastExchange Input [1]: [d_date_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#44] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] -Subquery:2 Hosting operator id = 11 Hosting Expression = ws_sold_date_sk#12 IN dynamicpruning#8 +Subquery:2 Hosting operator id = 11 Hosting Expression = ws_sold_date_sk#11 IN dynamicpruning#8 -Subquery:3 Hosting operator id = 18 Hosting Expression = cs_sold_date_sk#16 IN dynamicpruning#8 +Subquery:3 Hosting operator id = 18 Hosting Expression = cs_sold_date_sk#14 IN dynamicpruning#8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/explain.txt index 4d8179a75c6ea..796c9fc63a0a3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/explain.txt @@ -110,338 +110,338 @@ Input [6]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, ss_sol (7) Exchange Input [4]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7] -Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#8] +Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (8) Sort [codegen id : 3] Input [4]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7] Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0 (9) Scan parquet default.customer -Output [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Output [8]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (10) ColumnarToRow [codegen id : 4] -Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Input [8]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15] (11) Filter [codegen id : 4] -Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] -Condition : (isnotnull(c_customer_sk#9) AND isnotnull(c_customer_id#10)) +Input [8]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15] +Condition : (isnotnull(c_customer_sk#8) AND isnotnull(c_customer_id#9)) (12) Exchange -Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] -Arguments: hashpartitioning(c_customer_sk#9, 5), ENSURE_REQUIREMENTS, [id=#17] +Input [8]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15] +Arguments: hashpartitioning(c_customer_sk#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] (13) Sort [codegen id : 5] -Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] -Arguments: [c_customer_sk#9 ASC NULLS FIRST], false, 0 +Input [8]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15] +Arguments: [c_customer_sk#8 ASC NULLS FIRST], false, 0 (14) SortMergeJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#9] +Right keys [1]: [c_customer_sk#8] Join condition: None (15) Project [codegen id : 6] -Output [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7] -Input [12]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7, c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Output [10]: [c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7] +Input [12]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7, c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15] (16) HashAggregate [codegen id : 6] -Input [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7] -Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Input [10]: [c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7] +Keys [8]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15] Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum#18] -Results [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#19] +Aggregate Attributes [1]: [sum#16] +Results [9]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, sum#17] (17) Exchange -Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#19] -Arguments: hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [9]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, sum#17] +Arguments: hashpartitioning(c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) HashAggregate [codegen id : 7] -Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#19] -Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Input [9]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, sum#17] +Keys [8]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15] Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))#21] -Results [2]: [c_customer_id#10 AS customer_id#22, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))#21,18,2) AS year_total#23] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))#18] +Results [2]: [c_customer_id#9 AS customer_id#19, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))#18,18,2) AS year_total#20] (19) Filter [codegen id : 7] -Input [2]: [customer_id#22, year_total#23] -Condition : (isnotnull(year_total#23) AND (year_total#23 > 0.00)) +Input [2]: [customer_id#19, year_total#20] +Condition : (isnotnull(year_total#20) AND (year_total#20 > 0.00)) (20) Exchange -Input [2]: [customer_id#22, year_total#23] -Arguments: hashpartitioning(customer_id#22, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [2]: [customer_id#19, year_total#20] +Arguments: hashpartitioning(customer_id#19, 5), ENSURE_REQUIREMENTS, [plan_id=4] (21) Sort [codegen id : 8] -Input [2]: [customer_id#22, year_total#23] -Arguments: [customer_id#22 ASC NULLS FIRST], false, 0 +Input [2]: [customer_id#19, year_total#20] +Arguments: [customer_id#19 ASC NULLS FIRST], false, 0 (22) Scan parquet default.store_sales -Output [4]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, ss_sold_date_sk#28] +Output [4]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, ss_sold_date_sk#24] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#28), dynamicpruningexpression(ss_sold_date_sk#28 IN dynamicpruning#29)] +PartitionFilters: [isnotnull(ss_sold_date_sk#24), dynamicpruningexpression(ss_sold_date_sk#24 IN dynamicpruning#25)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 10] -Input [4]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, ss_sold_date_sk#28] +Input [4]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, ss_sold_date_sk#24] (24) Filter [codegen id : 10] -Input [4]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, ss_sold_date_sk#28] -Condition : isnotnull(ss_customer_sk#25) +Input [4]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, ss_sold_date_sk#24] +Condition : isnotnull(ss_customer_sk#21) (25) ReusedExchange [Reuses operator id: 88] -Output [2]: [d_date_sk#30, d_year#31] +Output [2]: [d_date_sk#26, d_year#27] (26) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_sold_date_sk#28] -Right keys [1]: [d_date_sk#30] +Left keys [1]: [ss_sold_date_sk#24] +Right keys [1]: [d_date_sk#26] Join condition: None (27) Project [codegen id : 10] -Output [4]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, d_year#31] -Input [6]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, ss_sold_date_sk#28, d_date_sk#30, d_year#31] +Output [4]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, d_year#27] +Input [6]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, ss_sold_date_sk#24, d_date_sk#26, d_year#27] (28) Exchange -Input [4]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, d_year#31] -Arguments: hashpartitioning(ss_customer_sk#25, 5), ENSURE_REQUIREMENTS, [id=#32] +Input [4]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, d_year#27] +Arguments: hashpartitioning(ss_customer_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=5] (29) Sort [codegen id : 11] -Input [4]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, d_year#31] -Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0 +Input [4]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, d_year#27] +Arguments: [ss_customer_sk#21 ASC NULLS FIRST], false, 0 (30) ReusedExchange [Reuses operator id: 12] -Output [8]: [c_customer_sk#33, c_customer_id#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40] +Output [8]: [c_customer_sk#28, c_customer_id#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35] (31) Sort [codegen id : 13] -Input [8]: [c_customer_sk#33, c_customer_id#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40] -Arguments: [c_customer_sk#33 ASC NULLS FIRST], false, 0 +Input [8]: [c_customer_sk#28, c_customer_id#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35] +Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0 (32) SortMergeJoin [codegen id : 14] -Left keys [1]: [ss_customer_sk#25] -Right keys [1]: [c_customer_sk#33] +Left keys [1]: [ss_customer_sk#21] +Right keys [1]: [c_customer_sk#28] Join condition: None (33) Project [codegen id : 14] -Output [10]: [c_customer_id#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, ss_ext_discount_amt#26, ss_ext_list_price#27, d_year#31] -Input [12]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, d_year#31, c_customer_sk#33, c_customer_id#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40] +Output [10]: [c_customer_id#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, ss_ext_discount_amt#22, ss_ext_list_price#23, d_year#27] +Input [12]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, d_year#27, c_customer_sk#28, c_customer_id#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35] (34) HashAggregate [codegen id : 14] -Input [10]: [c_customer_id#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, ss_ext_discount_amt#26, ss_ext_list_price#27, d_year#31] -Keys [8]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40] -Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum#41] -Results [9]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, sum#42] +Input [10]: [c_customer_id#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, ss_ext_discount_amt#22, ss_ext_list_price#23, d_year#27] +Keys [8]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#23 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#22 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum#36] +Results [9]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, sum#37] (35) Exchange -Input [9]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, sum#42] -Arguments: hashpartitioning(c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, 5), ENSURE_REQUIREMENTS, [id=#43] +Input [9]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, sum#37] +Arguments: hashpartitioning(c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, 5), ENSURE_REQUIREMENTS, [plan_id=6] (36) HashAggregate [codegen id : 15] -Input [9]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, sum#42] -Keys [8]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40] -Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2))))#21] -Results [3]: [c_customer_id#34 AS customer_id#44, c_preferred_cust_flag#37 AS customer_preferred_cust_flag#45, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2))))#21,18,2) AS year_total#46] +Input [9]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, sum#37] +Keys [8]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#23 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#22 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#23 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#22 as decimal(8,2)))), DecimalType(8,2))))#18] +Results [3]: [c_customer_id#29 AS customer_id#38, c_preferred_cust_flag#32 AS customer_preferred_cust_flag#39, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#23 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#22 as decimal(8,2)))), DecimalType(8,2))))#18,18,2) AS year_total#40] (37) Exchange -Input [3]: [customer_id#44, customer_preferred_cust_flag#45, year_total#46] -Arguments: hashpartitioning(customer_id#44, 5), ENSURE_REQUIREMENTS, [id=#47] +Input [3]: [customer_id#38, customer_preferred_cust_flag#39, year_total#40] +Arguments: hashpartitioning(customer_id#38, 5), ENSURE_REQUIREMENTS, [plan_id=7] (38) Sort [codegen id : 16] -Input [3]: [customer_id#44, customer_preferred_cust_flag#45, year_total#46] -Arguments: [customer_id#44 ASC NULLS FIRST], false, 0 +Input [3]: [customer_id#38, customer_preferred_cust_flag#39, year_total#40] +Arguments: [customer_id#38 ASC NULLS FIRST], false, 0 (39) SortMergeJoin [codegen id : 17] -Left keys [1]: [customer_id#22] -Right keys [1]: [customer_id#44] +Left keys [1]: [customer_id#19] +Right keys [1]: [customer_id#38] Join condition: None (40) Project [codegen id : 17] -Output [4]: [customer_id#22, year_total#23, customer_preferred_cust_flag#45, year_total#46] -Input [5]: [customer_id#22, year_total#23, customer_id#44, customer_preferred_cust_flag#45, year_total#46] +Output [4]: [customer_id#19, year_total#20, customer_preferred_cust_flag#39, year_total#40] +Input [5]: [customer_id#19, year_total#20, customer_id#38, customer_preferred_cust_flag#39, year_total#40] (41) Scan parquet default.web_sales -Output [4]: [ws_bill_customer_sk#48, ws_ext_discount_amt#49, ws_ext_list_price#50, ws_sold_date_sk#51] +Output [4]: [ws_bill_customer_sk#41, ws_ext_discount_amt#42, ws_ext_list_price#43, ws_sold_date_sk#44] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#51), dynamicpruningexpression(ws_sold_date_sk#51 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#44), dynamicpruningexpression(ws_sold_date_sk#44 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (42) ColumnarToRow [codegen id : 19] -Input [4]: [ws_bill_customer_sk#48, ws_ext_discount_amt#49, ws_ext_list_price#50, ws_sold_date_sk#51] +Input [4]: [ws_bill_customer_sk#41, ws_ext_discount_amt#42, ws_ext_list_price#43, ws_sold_date_sk#44] (43) Filter [codegen id : 19] -Input [4]: [ws_bill_customer_sk#48, ws_ext_discount_amt#49, ws_ext_list_price#50, ws_sold_date_sk#51] -Condition : isnotnull(ws_bill_customer_sk#48) +Input [4]: [ws_bill_customer_sk#41, ws_ext_discount_amt#42, ws_ext_list_price#43, ws_sold_date_sk#44] +Condition : isnotnull(ws_bill_customer_sk#41) (44) ReusedExchange [Reuses operator id: 84] -Output [2]: [d_date_sk#52, d_year#53] +Output [2]: [d_date_sk#45, d_year#46] (45) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [ws_sold_date_sk#51] -Right keys [1]: [d_date_sk#52] +Left keys [1]: [ws_sold_date_sk#44] +Right keys [1]: [d_date_sk#45] Join condition: None (46) Project [codegen id : 19] -Output [4]: [ws_bill_customer_sk#48, ws_ext_discount_amt#49, ws_ext_list_price#50, d_year#53] -Input [6]: [ws_bill_customer_sk#48, ws_ext_discount_amt#49, ws_ext_list_price#50, ws_sold_date_sk#51, d_date_sk#52, d_year#53] +Output [4]: [ws_bill_customer_sk#41, ws_ext_discount_amt#42, ws_ext_list_price#43, d_year#46] +Input [6]: [ws_bill_customer_sk#41, ws_ext_discount_amt#42, ws_ext_list_price#43, ws_sold_date_sk#44, d_date_sk#45, d_year#46] (47) Exchange -Input [4]: [ws_bill_customer_sk#48, ws_ext_discount_amt#49, ws_ext_list_price#50, d_year#53] -Arguments: hashpartitioning(ws_bill_customer_sk#48, 5), ENSURE_REQUIREMENTS, [id=#54] +Input [4]: [ws_bill_customer_sk#41, ws_ext_discount_amt#42, ws_ext_list_price#43, d_year#46] +Arguments: hashpartitioning(ws_bill_customer_sk#41, 5), ENSURE_REQUIREMENTS, [plan_id=8] (48) Sort [codegen id : 20] -Input [4]: [ws_bill_customer_sk#48, ws_ext_discount_amt#49, ws_ext_list_price#50, d_year#53] -Arguments: [ws_bill_customer_sk#48 ASC NULLS FIRST], false, 0 +Input [4]: [ws_bill_customer_sk#41, ws_ext_discount_amt#42, ws_ext_list_price#43, d_year#46] +Arguments: [ws_bill_customer_sk#41 ASC NULLS FIRST], false, 0 (49) ReusedExchange [Reuses operator id: 12] -Output [8]: [c_customer_sk#55, c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62] +Output [8]: [c_customer_sk#47, c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54] (50) Sort [codegen id : 22] -Input [8]: [c_customer_sk#55, c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62] -Arguments: [c_customer_sk#55 ASC NULLS FIRST], false, 0 +Input [8]: [c_customer_sk#47, c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54] +Arguments: [c_customer_sk#47 ASC NULLS FIRST], false, 0 (51) SortMergeJoin [codegen id : 23] -Left keys [1]: [ws_bill_customer_sk#48] -Right keys [1]: [c_customer_sk#55] +Left keys [1]: [ws_bill_customer_sk#41] +Right keys [1]: [c_customer_sk#47] Join condition: None (52) Project [codegen id : 23] -Output [10]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_ext_discount_amt#49, ws_ext_list_price#50, d_year#53] -Input [12]: [ws_bill_customer_sk#48, ws_ext_discount_amt#49, ws_ext_list_price#50, d_year#53, c_customer_sk#55, c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62] +Output [10]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, ws_ext_discount_amt#42, ws_ext_list_price#43, d_year#46] +Input [12]: [ws_bill_customer_sk#41, ws_ext_discount_amt#42, ws_ext_list_price#43, d_year#46, c_customer_sk#47, c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54] (53) HashAggregate [codegen id : 23] -Input [10]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, ws_ext_discount_amt#49, ws_ext_list_price#50, d_year#53] -Keys [8]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#53] -Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#50 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#49 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum#63] -Results [9]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#53, sum#64] +Input [10]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, ws_ext_discount_amt#42, ws_ext_list_price#43, d_year#46] +Keys [8]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, d_year#46] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#43 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#42 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum#55] +Results [9]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, d_year#46, sum#56] (54) Exchange -Input [9]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#53, sum#64] -Arguments: hashpartitioning(c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#53, 5), ENSURE_REQUIREMENTS, [id=#65] +Input [9]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, d_year#46, sum#56] +Arguments: hashpartitioning(c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, d_year#46, 5), ENSURE_REQUIREMENTS, [plan_id=9] (55) HashAggregate [codegen id : 24] -Input [9]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#53, sum#64] -Keys [8]: [c_customer_id#56, c_first_name#57, c_last_name#58, c_preferred_cust_flag#59, c_birth_country#60, c_login#61, c_email_address#62, d_year#53] -Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#50 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#49 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#50 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#49 as decimal(8,2)))), DecimalType(8,2))))#66] -Results [2]: [c_customer_id#56 AS customer_id#67, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#50 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#49 as decimal(8,2)))), DecimalType(8,2))))#66,18,2) AS year_total#68] +Input [9]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, d_year#46, sum#56] +Keys [8]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, d_year#46] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#43 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#42 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#43 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#42 as decimal(8,2)))), DecimalType(8,2))))#57] +Results [2]: [c_customer_id#48 AS customer_id#58, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#43 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#42 as decimal(8,2)))), DecimalType(8,2))))#57,18,2) AS year_total#59] (56) Filter [codegen id : 24] -Input [2]: [customer_id#67, year_total#68] -Condition : (isnotnull(year_total#68) AND (year_total#68 > 0.00)) +Input [2]: [customer_id#58, year_total#59] +Condition : (isnotnull(year_total#59) AND (year_total#59 > 0.00)) (57) Exchange -Input [2]: [customer_id#67, year_total#68] -Arguments: hashpartitioning(customer_id#67, 5), ENSURE_REQUIREMENTS, [id=#69] +Input [2]: [customer_id#58, year_total#59] +Arguments: hashpartitioning(customer_id#58, 5), ENSURE_REQUIREMENTS, [plan_id=10] (58) Sort [codegen id : 25] -Input [2]: [customer_id#67, year_total#68] -Arguments: [customer_id#67 ASC NULLS FIRST], false, 0 +Input [2]: [customer_id#58, year_total#59] +Arguments: [customer_id#58 ASC NULLS FIRST], false, 0 (59) SortMergeJoin [codegen id : 26] -Left keys [1]: [customer_id#22] -Right keys [1]: [customer_id#67] +Left keys [1]: [customer_id#19] +Right keys [1]: [customer_id#58] Join condition: None (60) Project [codegen id : 26] -Output [5]: [customer_id#22, year_total#23, customer_preferred_cust_flag#45, year_total#46, year_total#68] -Input [6]: [customer_id#22, year_total#23, customer_preferred_cust_flag#45, year_total#46, customer_id#67, year_total#68] +Output [5]: [customer_id#19, year_total#20, customer_preferred_cust_flag#39, year_total#40, year_total#59] +Input [6]: [customer_id#19, year_total#20, customer_preferred_cust_flag#39, year_total#40, customer_id#58, year_total#59] (61) Scan parquet default.web_sales -Output [4]: [ws_bill_customer_sk#70, ws_ext_discount_amt#71, ws_ext_list_price#72, ws_sold_date_sk#73] +Output [4]: [ws_bill_customer_sk#60, ws_ext_discount_amt#61, ws_ext_list_price#62, ws_sold_date_sk#63] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#73), dynamicpruningexpression(ws_sold_date_sk#73 IN dynamicpruning#29)] +PartitionFilters: [isnotnull(ws_sold_date_sk#63), dynamicpruningexpression(ws_sold_date_sk#63 IN dynamicpruning#25)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (62) ColumnarToRow [codegen id : 28] -Input [4]: [ws_bill_customer_sk#70, ws_ext_discount_amt#71, ws_ext_list_price#72, ws_sold_date_sk#73] +Input [4]: [ws_bill_customer_sk#60, ws_ext_discount_amt#61, ws_ext_list_price#62, ws_sold_date_sk#63] (63) Filter [codegen id : 28] -Input [4]: [ws_bill_customer_sk#70, ws_ext_discount_amt#71, ws_ext_list_price#72, ws_sold_date_sk#73] -Condition : isnotnull(ws_bill_customer_sk#70) +Input [4]: [ws_bill_customer_sk#60, ws_ext_discount_amt#61, ws_ext_list_price#62, ws_sold_date_sk#63] +Condition : isnotnull(ws_bill_customer_sk#60) (64) ReusedExchange [Reuses operator id: 88] -Output [2]: [d_date_sk#74, d_year#75] +Output [2]: [d_date_sk#64, d_year#65] (65) BroadcastHashJoin [codegen id : 28] -Left keys [1]: [ws_sold_date_sk#73] -Right keys [1]: [d_date_sk#74] +Left keys [1]: [ws_sold_date_sk#63] +Right keys [1]: [d_date_sk#64] Join condition: None (66) Project [codegen id : 28] -Output [4]: [ws_bill_customer_sk#70, ws_ext_discount_amt#71, ws_ext_list_price#72, d_year#75] -Input [6]: [ws_bill_customer_sk#70, ws_ext_discount_amt#71, ws_ext_list_price#72, ws_sold_date_sk#73, d_date_sk#74, d_year#75] +Output [4]: [ws_bill_customer_sk#60, ws_ext_discount_amt#61, ws_ext_list_price#62, d_year#65] +Input [6]: [ws_bill_customer_sk#60, ws_ext_discount_amt#61, ws_ext_list_price#62, ws_sold_date_sk#63, d_date_sk#64, d_year#65] (67) Exchange -Input [4]: [ws_bill_customer_sk#70, ws_ext_discount_amt#71, ws_ext_list_price#72, d_year#75] -Arguments: hashpartitioning(ws_bill_customer_sk#70, 5), ENSURE_REQUIREMENTS, [id=#76] +Input [4]: [ws_bill_customer_sk#60, ws_ext_discount_amt#61, ws_ext_list_price#62, d_year#65] +Arguments: hashpartitioning(ws_bill_customer_sk#60, 5), ENSURE_REQUIREMENTS, [plan_id=11] (68) Sort [codegen id : 29] -Input [4]: [ws_bill_customer_sk#70, ws_ext_discount_amt#71, ws_ext_list_price#72, d_year#75] -Arguments: [ws_bill_customer_sk#70 ASC NULLS FIRST], false, 0 +Input [4]: [ws_bill_customer_sk#60, ws_ext_discount_amt#61, ws_ext_list_price#62, d_year#65] +Arguments: [ws_bill_customer_sk#60 ASC NULLS FIRST], false, 0 (69) ReusedExchange [Reuses operator id: 12] -Output [8]: [c_customer_sk#77, c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84] +Output [8]: [c_customer_sk#66, c_customer_id#67, c_first_name#68, c_last_name#69, c_preferred_cust_flag#70, c_birth_country#71, c_login#72, c_email_address#73] (70) Sort [codegen id : 31] -Input [8]: [c_customer_sk#77, c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84] -Arguments: [c_customer_sk#77 ASC NULLS FIRST], false, 0 +Input [8]: [c_customer_sk#66, c_customer_id#67, c_first_name#68, c_last_name#69, c_preferred_cust_flag#70, c_birth_country#71, c_login#72, c_email_address#73] +Arguments: [c_customer_sk#66 ASC NULLS FIRST], false, 0 (71) SortMergeJoin [codegen id : 32] -Left keys [1]: [ws_bill_customer_sk#70] -Right keys [1]: [c_customer_sk#77] +Left keys [1]: [ws_bill_customer_sk#60] +Right keys [1]: [c_customer_sk#66] Join condition: None (72) Project [codegen id : 32] -Output [10]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, ws_ext_discount_amt#71, ws_ext_list_price#72, d_year#75] -Input [12]: [ws_bill_customer_sk#70, ws_ext_discount_amt#71, ws_ext_list_price#72, d_year#75, c_customer_sk#77, c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84] +Output [10]: [c_customer_id#67, c_first_name#68, c_last_name#69, c_preferred_cust_flag#70, c_birth_country#71, c_login#72, c_email_address#73, ws_ext_discount_amt#61, ws_ext_list_price#62, d_year#65] +Input [12]: [ws_bill_customer_sk#60, ws_ext_discount_amt#61, ws_ext_list_price#62, d_year#65, c_customer_sk#66, c_customer_id#67, c_first_name#68, c_last_name#69, c_preferred_cust_flag#70, c_birth_country#71, c_login#72, c_email_address#73] (73) HashAggregate [codegen id : 32] -Input [10]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, ws_ext_discount_amt#71, ws_ext_list_price#72, d_year#75] -Keys [8]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#75] -Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum#85] -Results [9]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#75, sum#86] +Input [10]: [c_customer_id#67, c_first_name#68, c_last_name#69, c_preferred_cust_flag#70, c_birth_country#71, c_login#72, c_email_address#73, ws_ext_discount_amt#61, ws_ext_list_price#62, d_year#65] +Keys [8]: [c_customer_id#67, c_first_name#68, c_last_name#69, c_preferred_cust_flag#70, c_birth_country#71, c_login#72, c_email_address#73, d_year#65] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#62 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#61 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum#74] +Results [9]: [c_customer_id#67, c_first_name#68, c_last_name#69, c_preferred_cust_flag#70, c_birth_country#71, c_login#72, c_email_address#73, d_year#65, sum#75] (74) Exchange -Input [9]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#75, sum#86] -Arguments: hashpartitioning(c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#75, 5), ENSURE_REQUIREMENTS, [id=#87] +Input [9]: [c_customer_id#67, c_first_name#68, c_last_name#69, c_preferred_cust_flag#70, c_birth_country#71, c_login#72, c_email_address#73, d_year#65, sum#75] +Arguments: hashpartitioning(c_customer_id#67, c_first_name#68, c_last_name#69, c_preferred_cust_flag#70, c_birth_country#71, c_login#72, c_email_address#73, d_year#65, 5), ENSURE_REQUIREMENTS, [plan_id=12] (75) HashAggregate [codegen id : 33] -Input [9]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#75, sum#86] -Keys [8]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#75] -Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2))))#66] -Results [2]: [c_customer_id#78 AS customer_id#88, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2))))#66,18,2) AS year_total#89] +Input [9]: [c_customer_id#67, c_first_name#68, c_last_name#69, c_preferred_cust_flag#70, c_birth_country#71, c_login#72, c_email_address#73, d_year#65, sum#75] +Keys [8]: [c_customer_id#67, c_first_name#68, c_last_name#69, c_preferred_cust_flag#70, c_birth_country#71, c_login#72, c_email_address#73, d_year#65] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#62 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#61 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#62 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#61 as decimal(8,2)))), DecimalType(8,2))))#57] +Results [2]: [c_customer_id#67 AS customer_id#76, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#62 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#61 as decimal(8,2)))), DecimalType(8,2))))#57,18,2) AS year_total#77] (76) Exchange -Input [2]: [customer_id#88, year_total#89] -Arguments: hashpartitioning(customer_id#88, 5), ENSURE_REQUIREMENTS, [id=#90] +Input [2]: [customer_id#76, year_total#77] +Arguments: hashpartitioning(customer_id#76, 5), ENSURE_REQUIREMENTS, [plan_id=13] (77) Sort [codegen id : 34] -Input [2]: [customer_id#88, year_total#89] -Arguments: [customer_id#88 ASC NULLS FIRST], false, 0 +Input [2]: [customer_id#76, year_total#77] +Arguments: [customer_id#76 ASC NULLS FIRST], false, 0 (78) SortMergeJoin [codegen id : 35] -Left keys [1]: [customer_id#22] -Right keys [1]: [customer_id#88] -Join condition: (CASE WHEN (year_total#68 > 0.00) THEN CheckOverflow((promote_precision(year_total#89) / promote_precision(year_total#68)), DecimalType(38,20)) END > CASE WHEN (year_total#23 > 0.00) THEN CheckOverflow((promote_precision(year_total#46) / promote_precision(year_total#23)), DecimalType(38,20)) END) +Left keys [1]: [customer_id#19] +Right keys [1]: [customer_id#76] +Join condition: (CASE WHEN (year_total#59 > 0.00) THEN CheckOverflow((promote_precision(year_total#77) / promote_precision(year_total#59)), DecimalType(38,20)) END > CASE WHEN (year_total#20 > 0.00) THEN CheckOverflow((promote_precision(year_total#40) / promote_precision(year_total#20)), DecimalType(38,20)) END) (79) Project [codegen id : 35] -Output [1]: [customer_preferred_cust_flag#45] -Input [7]: [customer_id#22, year_total#23, customer_preferred_cust_flag#45, year_total#46, year_total#68, customer_id#88, year_total#89] +Output [1]: [customer_preferred_cust_flag#39] +Input [7]: [customer_id#19, year_total#20, customer_preferred_cust_flag#39, year_total#40, year_total#59, customer_id#76, year_total#77] (80) TakeOrderedAndProject -Input [1]: [customer_preferred_cust_flag#45] -Arguments: 100, [customer_preferred_cust_flag#45 ASC NULLS FIRST], [customer_preferred_cust_flag#45] +Input [1]: [customer_preferred_cust_flag#39] +Arguments: 100, [customer_preferred_cust_flag#39 ASC NULLS FIRST], [customer_preferred_cust_flag#39] ===== Subqueries ===== @@ -468,9 +468,9 @@ Condition : ((isnotnull(d_year#7) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk (84) BroadcastExchange Input [2]: [d_date_sk#6, d_year#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#91] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14] -Subquery:2 Hosting operator id = 22 Hosting Expression = ss_sold_date_sk#28 IN dynamicpruning#29 +Subquery:2 Hosting operator id = 22 Hosting Expression = ss_sold_date_sk#24 IN dynamicpruning#25 BroadcastExchange (88) +- * Filter (87) +- * ColumnarToRow (86) @@ -478,25 +478,25 @@ BroadcastExchange (88) (85) Scan parquet default.date_dim -Output [2]: [d_date_sk#30, d_year#31] +Output [2]: [d_date_sk#26, d_year#27] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] ReadSchema: struct (86) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#30, d_year#31] +Input [2]: [d_date_sk#26, d_year#27] (87) Filter [codegen id : 1] -Input [2]: [d_date_sk#30, d_year#31] -Condition : ((isnotnull(d_year#31) AND (d_year#31 = 2002)) AND isnotnull(d_date_sk#30)) +Input [2]: [d_date_sk#26, d_year#27] +Condition : ((isnotnull(d_year#27) AND (d_year#27 = 2002)) AND isnotnull(d_date_sk#26)) (88) BroadcastExchange -Input [2]: [d_date_sk#30, d_year#31] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#92] +Input [2]: [d_date_sk#26, d_year#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=15] -Subquery:3 Hosting operator id = 41 Hosting Expression = ws_sold_date_sk#51 IN dynamicpruning#5 +Subquery:3 Hosting operator id = 41 Hosting Expression = ws_sold_date_sk#44 IN dynamicpruning#5 -Subquery:4 Hosting operator id = 61 Hosting Expression = ws_sold_date_sk#73 IN dynamicpruning#29 +Subquery:4 Hosting operator id = 61 Hosting Expression = ws_sold_date_sk#63 IN dynamicpruning#25 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt index 8cb7c021fb3ea..9fc721d003998 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt @@ -104,7 +104,7 @@ Condition : isnotnull(ss_customer_sk#9) (7) BroadcastExchange Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 3] Left keys [1]: [c_customer_sk#1] @@ -116,301 +116,301 @@ Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_f Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] (10) ReusedExchange [Reuses operator id: 76] -Output [2]: [d_date_sk#15, d_year#16] +Output [2]: [d_date_sk#14, d_year#15] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_sold_date_sk#12] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (12) Project [codegen id : 3] -Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#16] -Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12, d_date_sk#15, d_year#16] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#15] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12, d_date_sk#14, d_year#15] (13) HashAggregate [codegen id : 3] -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#16] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum#17] -Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] +Aggregate Attributes [1]: [sum#16] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#17] (14) Exchange -Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#17] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 16] -Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#17] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))#20] -Results [2]: [c_customer_id#2 AS customer_id#21, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))#20,18,2) AS year_total#22] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))#18] +Results [2]: [c_customer_id#2 AS customer_id#19, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))#18,18,2) AS year_total#20] (16) Filter [codegen id : 16] -Input [2]: [customer_id#21, year_total#22] -Condition : (isnotnull(year_total#22) AND (year_total#22 > 0.00)) +Input [2]: [customer_id#19, year_total#20] +Condition : (isnotnull(year_total#20) AND (year_total#20 > 0.00)) (17) Scan parquet default.customer -Output [8]: [c_customer_sk#23, c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30] +Output [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (18) ColumnarToRow [codegen id : 6] -Input [8]: [c_customer_sk#23, c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30] +Input [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28] (19) Filter [codegen id : 6] -Input [8]: [c_customer_sk#23, c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30] -Condition : (isnotnull(c_customer_sk#23) AND isnotnull(c_customer_id#24)) +Input [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_customer_id#22)) (20) Scan parquet default.store_sales -Output [4]: [ss_customer_sk#31, ss_ext_discount_amt#32, ss_ext_list_price#33, ss_sold_date_sk#34] +Output [4]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_list_price#31, ss_sold_date_sk#32] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#34), dynamicpruningexpression(ss_sold_date_sk#34 IN dynamicpruning#35)] +PartitionFilters: [isnotnull(ss_sold_date_sk#32), dynamicpruningexpression(ss_sold_date_sk#32 IN dynamicpruning#33)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 4] -Input [4]: [ss_customer_sk#31, ss_ext_discount_amt#32, ss_ext_list_price#33, ss_sold_date_sk#34] +Input [4]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_list_price#31, ss_sold_date_sk#32] (22) Filter [codegen id : 4] -Input [4]: [ss_customer_sk#31, ss_ext_discount_amt#32, ss_ext_list_price#33, ss_sold_date_sk#34] -Condition : isnotnull(ss_customer_sk#31) +Input [4]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_list_price#31, ss_sold_date_sk#32] +Condition : isnotnull(ss_customer_sk#29) (23) BroadcastExchange -Input [4]: [ss_customer_sk#31, ss_ext_discount_amt#32, ss_ext_list_price#33, ss_sold_date_sk#34] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#36] +Input [4]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_list_price#31, ss_sold_date_sk#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [c_customer_sk#23] -Right keys [1]: [ss_customer_sk#31] +Left keys [1]: [c_customer_sk#21] +Right keys [1]: [ss_customer_sk#29] Join condition: None (25) Project [codegen id : 6] -Output [10]: [c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, ss_ext_discount_amt#32, ss_ext_list_price#33, ss_sold_date_sk#34] -Input [12]: [c_customer_sk#23, c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, ss_customer_sk#31, ss_ext_discount_amt#32, ss_ext_list_price#33, ss_sold_date_sk#34] +Output [10]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_list_price#31, ss_sold_date_sk#32] +Input [12]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_list_price#31, ss_sold_date_sk#32] (26) ReusedExchange [Reuses operator id: 80] -Output [2]: [d_date_sk#37, d_year#38] +Output [2]: [d_date_sk#34, d_year#35] (27) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_sold_date_sk#34] -Right keys [1]: [d_date_sk#37] +Left keys [1]: [ss_sold_date_sk#32] +Right keys [1]: [d_date_sk#34] Join condition: None (28) Project [codegen id : 6] -Output [10]: [c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, ss_ext_discount_amt#32, ss_ext_list_price#33, d_year#38] -Input [12]: [c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, ss_ext_discount_amt#32, ss_ext_list_price#33, ss_sold_date_sk#34, d_date_sk#37, d_year#38] +Output [10]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_list_price#31, d_year#35] +Input [12]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_list_price#31, ss_sold_date_sk#32, d_date_sk#34, d_year#35] (29) HashAggregate [codegen id : 6] -Input [10]: [c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, ss_ext_discount_amt#32, ss_ext_list_price#33, d_year#38] -Keys [8]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30] -Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum#39] -Results [9]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, sum#40] +Input [10]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_list_price#31, d_year#35] +Keys [8]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#31 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum#36] +Results [9]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, sum#37] (30) Exchange -Input [9]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, sum#40] -Arguments: hashpartitioning(c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, 5), ENSURE_REQUIREMENTS, [id=#41] +Input [9]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, sum#37] +Arguments: hashpartitioning(c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, 5), ENSURE_REQUIREMENTS, [plan_id=4] (31) HashAggregate [codegen id : 7] -Input [9]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, sum#40] -Keys [8]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30] -Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2))))#20] -Results [3]: [c_customer_id#24 AS customer_id#42, c_preferred_cust_flag#27 AS customer_preferred_cust_flag#43, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2))))#20,18,2) AS year_total#44] +Input [9]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, sum#37] +Keys [8]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#31 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#31 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(8,2)))), DecimalType(8,2))))#18] +Results [3]: [c_customer_id#22 AS customer_id#38, c_preferred_cust_flag#25 AS customer_preferred_cust_flag#39, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#31 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(8,2)))), DecimalType(8,2))))#18,18,2) AS year_total#40] (32) BroadcastExchange -Input [3]: [customer_id#42, customer_preferred_cust_flag#43, year_total#44] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#45] +Input [3]: [customer_id#38, customer_preferred_cust_flag#39, year_total#40] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=5] (33) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [customer_id#21] -Right keys [1]: [customer_id#42] +Left keys [1]: [customer_id#19] +Right keys [1]: [customer_id#38] Join condition: None (34) Project [codegen id : 16] -Output [4]: [customer_id#21, year_total#22, customer_preferred_cust_flag#43, year_total#44] -Input [5]: [customer_id#21, year_total#22, customer_id#42, customer_preferred_cust_flag#43, year_total#44] +Output [4]: [customer_id#19, year_total#20, customer_preferred_cust_flag#39, year_total#40] +Input [5]: [customer_id#19, year_total#20, customer_id#38, customer_preferred_cust_flag#39, year_total#40] (35) Scan parquet default.customer -Output [8]: [c_customer_sk#46, c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53] +Output [8]: [c_customer_sk#41, c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (36) ColumnarToRow [codegen id : 10] -Input [8]: [c_customer_sk#46, c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53] +Input [8]: [c_customer_sk#41, c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48] (37) Filter [codegen id : 10] -Input [8]: [c_customer_sk#46, c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53] -Condition : (isnotnull(c_customer_sk#46) AND isnotnull(c_customer_id#47)) +Input [8]: [c_customer_sk#41, c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48] +Condition : (isnotnull(c_customer_sk#41) AND isnotnull(c_customer_id#42)) (38) Scan parquet default.web_sales -Output [4]: [ws_bill_customer_sk#54, ws_ext_discount_amt#55, ws_ext_list_price#56, ws_sold_date_sk#57] +Output [4]: [ws_bill_customer_sk#49, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#57), dynamicpruningexpression(ws_sold_date_sk#57 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(ws_sold_date_sk#52), dynamicpruningexpression(ws_sold_date_sk#52 IN dynamicpruning#13)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (39) ColumnarToRow [codegen id : 8] -Input [4]: [ws_bill_customer_sk#54, ws_ext_discount_amt#55, ws_ext_list_price#56, ws_sold_date_sk#57] +Input [4]: [ws_bill_customer_sk#49, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] (40) Filter [codegen id : 8] -Input [4]: [ws_bill_customer_sk#54, ws_ext_discount_amt#55, ws_ext_list_price#56, ws_sold_date_sk#57] -Condition : isnotnull(ws_bill_customer_sk#54) +Input [4]: [ws_bill_customer_sk#49, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] +Condition : isnotnull(ws_bill_customer_sk#49) (41) BroadcastExchange -Input [4]: [ws_bill_customer_sk#54, ws_ext_discount_amt#55, ws_ext_list_price#56, ws_sold_date_sk#57] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#58] +Input [4]: [ws_bill_customer_sk#49, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] (42) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [c_customer_sk#46] -Right keys [1]: [ws_bill_customer_sk#54] +Left keys [1]: [c_customer_sk#41] +Right keys [1]: [ws_bill_customer_sk#49] Join condition: None (43) Project [codegen id : 10] -Output [10]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, ws_ext_discount_amt#55, ws_ext_list_price#56, ws_sold_date_sk#57] -Input [12]: [c_customer_sk#46, c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, ws_bill_customer_sk#54, ws_ext_discount_amt#55, ws_ext_list_price#56, ws_sold_date_sk#57] +Output [10]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] +Input [12]: [c_customer_sk#41, c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, ws_bill_customer_sk#49, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52] (44) ReusedExchange [Reuses operator id: 76] -Output [2]: [d_date_sk#59, d_year#60] +Output [2]: [d_date_sk#53, d_year#54] (45) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ws_sold_date_sk#57] -Right keys [1]: [d_date_sk#59] +Left keys [1]: [ws_sold_date_sk#52] +Right keys [1]: [d_date_sk#53] Join condition: None (46) Project [codegen id : 10] -Output [10]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, ws_ext_discount_amt#55, ws_ext_list_price#56, d_year#60] -Input [12]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, ws_ext_discount_amt#55, ws_ext_list_price#56, ws_sold_date_sk#57, d_date_sk#59, d_year#60] +Output [10]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, ws_ext_discount_amt#50, ws_ext_list_price#51, d_year#54] +Input [12]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52, d_date_sk#53, d_year#54] (47) HashAggregate [codegen id : 10] -Input [10]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, ws_ext_discount_amt#55, ws_ext_list_price#56, d_year#60] -Keys [8]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, d_year#60] -Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#56 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#55 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum#61] -Results [9]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, d_year#60, sum#62] +Input [10]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, ws_ext_discount_amt#50, ws_ext_list_price#51, d_year#54] +Keys [8]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#50 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum#55] +Results [9]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54, sum#56] (48) Exchange -Input [9]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, d_year#60, sum#62] -Arguments: hashpartitioning(c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, d_year#60, 5), ENSURE_REQUIREMENTS, [id=#63] +Input [9]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54, sum#56] +Arguments: hashpartitioning(c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54, 5), ENSURE_REQUIREMENTS, [plan_id=7] (49) HashAggregate [codegen id : 11] -Input [9]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, d_year#60, sum#62] -Keys [8]: [c_customer_id#47, c_first_name#48, c_last_name#49, c_preferred_cust_flag#50, c_birth_country#51, c_login#52, c_email_address#53, d_year#60] -Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#56 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#55 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#56 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#55 as decimal(8,2)))), DecimalType(8,2))))#64] -Results [2]: [c_customer_id#47 AS customer_id#65, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#56 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#55 as decimal(8,2)))), DecimalType(8,2))))#64,18,2) AS year_total#66] +Input [9]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54, sum#56] +Keys [8]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#50 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#50 as decimal(8,2)))), DecimalType(8,2))))#57] +Results [2]: [c_customer_id#42 AS customer_id#58, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#50 as decimal(8,2)))), DecimalType(8,2))))#57,18,2) AS year_total#59] (50) Filter [codegen id : 11] -Input [2]: [customer_id#65, year_total#66] -Condition : (isnotnull(year_total#66) AND (year_total#66 > 0.00)) +Input [2]: [customer_id#58, year_total#59] +Condition : (isnotnull(year_total#59) AND (year_total#59 > 0.00)) (51) BroadcastExchange -Input [2]: [customer_id#65, year_total#66] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#67] +Input [2]: [customer_id#58, year_total#59] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=8] (52) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [customer_id#21] -Right keys [1]: [customer_id#65] +Left keys [1]: [customer_id#19] +Right keys [1]: [customer_id#58] Join condition: None (53) Project [codegen id : 16] -Output [5]: [customer_id#21, year_total#22, customer_preferred_cust_flag#43, year_total#44, year_total#66] -Input [6]: [customer_id#21, year_total#22, customer_preferred_cust_flag#43, year_total#44, customer_id#65, year_total#66] +Output [5]: [customer_id#19, year_total#20, customer_preferred_cust_flag#39, year_total#40, year_total#59] +Input [6]: [customer_id#19, year_total#20, customer_preferred_cust_flag#39, year_total#40, customer_id#58, year_total#59] (54) Scan parquet default.customer -Output [8]: [c_customer_sk#68, c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75] +Output [8]: [c_customer_sk#60, c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (55) ColumnarToRow [codegen id : 14] -Input [8]: [c_customer_sk#68, c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75] +Input [8]: [c_customer_sk#60, c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67] (56) Filter [codegen id : 14] -Input [8]: [c_customer_sk#68, c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75] -Condition : (isnotnull(c_customer_sk#68) AND isnotnull(c_customer_id#69)) +Input [8]: [c_customer_sk#60, c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67] +Condition : (isnotnull(c_customer_sk#60) AND isnotnull(c_customer_id#61)) (57) Scan parquet default.web_sales -Output [4]: [ws_bill_customer_sk#76, ws_ext_discount_amt#77, ws_ext_list_price#78, ws_sold_date_sk#79] +Output [4]: [ws_bill_customer_sk#68, ws_ext_discount_amt#69, ws_ext_list_price#70, ws_sold_date_sk#71] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#79), dynamicpruningexpression(ws_sold_date_sk#79 IN dynamicpruning#35)] +PartitionFilters: [isnotnull(ws_sold_date_sk#71), dynamicpruningexpression(ws_sold_date_sk#71 IN dynamicpruning#33)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (58) ColumnarToRow [codegen id : 12] -Input [4]: [ws_bill_customer_sk#76, ws_ext_discount_amt#77, ws_ext_list_price#78, ws_sold_date_sk#79] +Input [4]: [ws_bill_customer_sk#68, ws_ext_discount_amt#69, ws_ext_list_price#70, ws_sold_date_sk#71] (59) Filter [codegen id : 12] -Input [4]: [ws_bill_customer_sk#76, ws_ext_discount_amt#77, ws_ext_list_price#78, ws_sold_date_sk#79] -Condition : isnotnull(ws_bill_customer_sk#76) +Input [4]: [ws_bill_customer_sk#68, ws_ext_discount_amt#69, ws_ext_list_price#70, ws_sold_date_sk#71] +Condition : isnotnull(ws_bill_customer_sk#68) (60) BroadcastExchange -Input [4]: [ws_bill_customer_sk#76, ws_ext_discount_amt#77, ws_ext_list_price#78, ws_sold_date_sk#79] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#80] +Input [4]: [ws_bill_customer_sk#68, ws_ext_discount_amt#69, ws_ext_list_price#70, ws_sold_date_sk#71] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] (61) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [c_customer_sk#68] -Right keys [1]: [ws_bill_customer_sk#76] +Left keys [1]: [c_customer_sk#60] +Right keys [1]: [ws_bill_customer_sk#68] Join condition: None (62) Project [codegen id : 14] -Output [10]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, ws_ext_discount_amt#77, ws_ext_list_price#78, ws_sold_date_sk#79] -Input [12]: [c_customer_sk#68, c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, ws_bill_customer_sk#76, ws_ext_discount_amt#77, ws_ext_list_price#78, ws_sold_date_sk#79] +Output [10]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, ws_ext_discount_amt#69, ws_ext_list_price#70, ws_sold_date_sk#71] +Input [12]: [c_customer_sk#60, c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, ws_bill_customer_sk#68, ws_ext_discount_amt#69, ws_ext_list_price#70, ws_sold_date_sk#71] (63) ReusedExchange [Reuses operator id: 80] -Output [2]: [d_date_sk#81, d_year#82] +Output [2]: [d_date_sk#72, d_year#73] (64) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ws_sold_date_sk#79] -Right keys [1]: [d_date_sk#81] +Left keys [1]: [ws_sold_date_sk#71] +Right keys [1]: [d_date_sk#72] Join condition: None (65) Project [codegen id : 14] -Output [10]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, ws_ext_discount_amt#77, ws_ext_list_price#78, d_year#82] -Input [12]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, ws_ext_discount_amt#77, ws_ext_list_price#78, ws_sold_date_sk#79, d_date_sk#81, d_year#82] +Output [10]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, ws_ext_discount_amt#69, ws_ext_list_price#70, d_year#73] +Input [12]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, ws_ext_discount_amt#69, ws_ext_list_price#70, ws_sold_date_sk#71, d_date_sk#72, d_year#73] (66) HashAggregate [codegen id : 14] -Input [10]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, ws_ext_discount_amt#77, ws_ext_list_price#78, d_year#82] -Keys [8]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#82] -Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#78 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#77 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum#83] -Results [9]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#82, sum#84] +Input [10]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, ws_ext_discount_amt#69, ws_ext_list_price#70, d_year#73] +Keys [8]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#70 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#69 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum#74] +Results [9]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73, sum#75] (67) Exchange -Input [9]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#82, sum#84] -Arguments: hashpartitioning(c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#82, 5), ENSURE_REQUIREMENTS, [id=#85] +Input [9]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73, sum#75] +Arguments: hashpartitioning(c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73, 5), ENSURE_REQUIREMENTS, [plan_id=10] (68) HashAggregate [codegen id : 15] -Input [9]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#82, sum#84] -Keys [8]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#82] -Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#78 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#77 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#78 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#77 as decimal(8,2)))), DecimalType(8,2))))#64] -Results [2]: [c_customer_id#69 AS customer_id#86, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#78 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#77 as decimal(8,2)))), DecimalType(8,2))))#64,18,2) AS year_total#87] +Input [9]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73, sum#75] +Keys [8]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#70 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#69 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#70 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#69 as decimal(8,2)))), DecimalType(8,2))))#57] +Results [2]: [c_customer_id#61 AS customer_id#76, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#70 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#69 as decimal(8,2)))), DecimalType(8,2))))#57,18,2) AS year_total#77] (69) BroadcastExchange -Input [2]: [customer_id#86, year_total#87] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#88] +Input [2]: [customer_id#76, year_total#77] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=11] (70) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [customer_id#21] -Right keys [1]: [customer_id#86] -Join condition: (CASE WHEN (year_total#66 > 0.00) THEN CheckOverflow((promote_precision(year_total#87) / promote_precision(year_total#66)), DecimalType(38,20)) END > CASE WHEN (year_total#22 > 0.00) THEN CheckOverflow((promote_precision(year_total#44) / promote_precision(year_total#22)), DecimalType(38,20)) END) +Left keys [1]: [customer_id#19] +Right keys [1]: [customer_id#76] +Join condition: (CASE WHEN (year_total#59 > 0.00) THEN CheckOverflow((promote_precision(year_total#77) / promote_precision(year_total#59)), DecimalType(38,20)) END > CASE WHEN (year_total#20 > 0.00) THEN CheckOverflow((promote_precision(year_total#40) / promote_precision(year_total#20)), DecimalType(38,20)) END) (71) Project [codegen id : 16] -Output [1]: [customer_preferred_cust_flag#43] -Input [7]: [customer_id#21, year_total#22, customer_preferred_cust_flag#43, year_total#44, year_total#66, customer_id#86, year_total#87] +Output [1]: [customer_preferred_cust_flag#39] +Input [7]: [customer_id#19, year_total#20, customer_preferred_cust_flag#39, year_total#40, year_total#59, customer_id#76, year_total#77] (72) TakeOrderedAndProject -Input [1]: [customer_preferred_cust_flag#43] -Arguments: 100, [customer_preferred_cust_flag#43 ASC NULLS FIRST], [customer_preferred_cust_flag#43] +Input [1]: [customer_preferred_cust_flag#39] +Arguments: 100, [customer_preferred_cust_flag#39 ASC NULLS FIRST], [customer_preferred_cust_flag#39] ===== Subqueries ===== @@ -422,24 +422,24 @@ BroadcastExchange (76) (73) Scan parquet default.date_dim -Output [2]: [d_date_sk#15, d_year#16] +Output [2]: [d_date_sk#14, d_year#15] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (74) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#16] +Input [2]: [d_date_sk#14, d_year#15] (75) Filter [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#16] -Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2001)) AND isnotnull(d_date_sk#14)) (76) BroadcastExchange -Input [2]: [d_date_sk#15, d_year#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#89] +Input [2]: [d_date_sk#14, d_year#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] -Subquery:2 Hosting operator id = 20 Hosting Expression = ss_sold_date_sk#34 IN dynamicpruning#35 +Subquery:2 Hosting operator id = 20 Hosting Expression = ss_sold_date_sk#32 IN dynamicpruning#33 BroadcastExchange (80) +- * Filter (79) +- * ColumnarToRow (78) @@ -447,25 +447,25 @@ BroadcastExchange (80) (77) Scan parquet default.date_dim -Output [2]: [d_date_sk#37, d_year#38] +Output [2]: [d_date_sk#34, d_year#35] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] ReadSchema: struct (78) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#37, d_year#38] +Input [2]: [d_date_sk#34, d_year#35] (79) Filter [codegen id : 1] -Input [2]: [d_date_sk#37, d_year#38] -Condition : ((isnotnull(d_year#38) AND (d_year#38 = 2002)) AND isnotnull(d_date_sk#37)) +Input [2]: [d_date_sk#34, d_year#35] +Condition : ((isnotnull(d_year#35) AND (d_year#35 = 2002)) AND isnotnull(d_date_sk#34)) (80) BroadcastExchange -Input [2]: [d_date_sk#37, d_year#38] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#90] +Input [2]: [d_date_sk#34, d_year#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=13] -Subquery:3 Hosting operator id = 38 Hosting Expression = ws_sold_date_sk#57 IN dynamicpruning#13 +Subquery:3 Hosting operator id = 38 Hosting Expression = ws_sold_date_sk#52 IN dynamicpruning#13 -Subquery:4 Hosting operator id = 57 Hosting Expression = ws_sold_date_sk#79 IN dynamicpruning#35 +Subquery:4 Hosting operator id = 57 Hosting Expression = ws_sold_date_sk#71 IN dynamicpruning#33 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/explain.txt index 0f0b678bb7074..71275fad79347 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/explain.txt @@ -41,92 +41,92 @@ Condition : isnotnull(ws_item_sk#1) (4) Exchange Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] -Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#5] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] Arguments: [ws_item_sk#1 ASC NULLS FIRST], false, 0 (6) Scan parquet default.item -Output [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (8) Filter [codegen id : 3] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Condition : (i_category#11 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#6)) +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Condition : (i_category#10 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#5)) (9) Exchange -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Arguments: hashpartitioning(i_item_sk#6, 5), ENSURE_REQUIREMENTS, [id=#12] +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Arguments: hashpartitioning(i_item_sk#5, 5), ENSURE_REQUIREMENTS, [plan_id=2] (10) Sort [codegen id : 4] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Arguments: [i_item_sk#5 ASC NULLS FIRST], false, 0 (11) SortMergeJoin [codegen id : 6] Left keys [1]: [ws_item_sk#1] -Right keys [1]: [i_item_sk#6] +Right keys [1]: [i_item_sk#5] Join condition: None (12) Project [codegen id : 6] -Output [7]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Input [9]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Output [7]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Input [9]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (13) ReusedExchange [Reuses operator id: 28] -Output [1]: [d_date_sk#13] +Output [1]: [d_date_sk#11] (14) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ws_sold_date_sk#3] -Right keys [1]: [d_date_sk#13] +Right keys [1]: [d_date_sk#11] Join condition: None (15) Project [codegen id : 6] -Output [6]: [ws_ext_sales_price#2, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Input [8]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11, d_date_sk#13] +Output [6]: [ws_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Input [8]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#11] (16) HashAggregate [codegen id : 6] -Input [6]: [ws_ext_sales_price#2, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Keys [5]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9] +Input [6]: [ws_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#14] -Results [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] (17) Exchange -Input [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] -Arguments: hashpartitioning(i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) HashAggregate [codegen id : 7] -Input [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] -Keys [5]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#2))#17] -Results [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#17,17,2) AS _w1#20, i_item_id#7] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#2))#14] +Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w1#17, i_item_id#6] (19) Exchange -Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, i_item_id#7] -Arguments: hashpartitioning(i_class#10, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] +Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=4] (20) Sort [codegen id : 8] -Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, i_item_id#7] -Arguments: [i_class#10 ASC NULLS FIRST], false, 0 +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] +Arguments: [i_class#9 ASC NULLS FIRST], false, 0 (21) Window -Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, i_item_id#7] -Arguments: [sum(_w1#20) windowspecdefinition(i_class#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#10] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] +Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9] (22) Project [codegen id : 9] -Output [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17)) AS revenueratio#23, i_item_id#7] -Input [9]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, i_item_id#7, _we0#22] +Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19, i_item_id#6] +Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6, _we0#18] (23) TakeOrderedAndProject -Input [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, revenueratio#23, i_item_id#7] -Arguments: 100, [i_category#11 ASC NULLS FIRST, i_class#10 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#8 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, revenueratio#23] +Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6] +Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] ===== Subqueries ===== @@ -139,25 +139,25 @@ BroadcastExchange (28) (24) Scan parquet default.date_dim -Output [2]: [d_date_sk#13, d_date#24] +Output [2]: [d_date_sk#11, d_date#20] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#13, d_date#24] +Input [2]: [d_date_sk#11, d_date#20] (26) Filter [codegen id : 1] -Input [2]: [d_date_sk#13, d_date#24] -Condition : (((isnotnull(d_date#24) AND (d_date#24 >= 1999-02-22)) AND (d_date#24 <= 1999-03-24)) AND isnotnull(d_date_sk#13)) +Input [2]: [d_date_sk#11, d_date#20] +Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11)) (27) Project [codegen id : 1] -Output [1]: [d_date_sk#13] -Input [2]: [d_date_sk#13, d_date#24] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#20] (28) BroadcastExchange -Input [1]: [d_date_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt index 0b4dfea762918..a82f017df7c11 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt @@ -52,7 +52,7 @@ Condition : (i_category#10 IN (Sports (7) BroadcastExchange Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ws_item_sk#1] @@ -64,54 +64,54 @@ Output [7]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#6, i_item_desc#7 Input [9]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (10) ReusedExchange [Reuses operator id: 25] -Output [1]: [d_date_sk#12] +Output [1]: [d_date_sk#11] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ws_sold_date_sk#3] -Right keys [1]: [d_date_sk#12] +Right keys [1]: [d_date_sk#11] Join condition: None (12) Project [codegen id : 3] Output [6]: [ws_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] -Input [8]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#12] +Input [8]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#11] (13) HashAggregate [codegen id : 3] Input [6]: [ws_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#13] -Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] (14) Exchange -Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] -Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 4] -Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#2))#16] -Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#16,17,2) AS itemrevenue#17, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#16,17,2) AS _w0#18, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#16,17,2) AS _w1#19, i_item_id#6] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#2))#14] +Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w1#17, i_item_id#6] (16) Exchange -Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6] -Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] +Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (17) Sort [codegen id : 5] -Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] Arguments: [i_class#9 ASC NULLS FIRST], false, 0 (18) Window -Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6] -Arguments: [sum(_w1#19) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#21], [i_class#9] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] +Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9] (19) Project [codegen id : 6] -Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17)) AS revenueratio#22, i_item_id#6] -Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6, _we0#21] +Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19, i_item_id#6] +Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6, _we0#18] (20) TakeOrderedAndProject -Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22, i_item_id#6] -Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#22 ASC NULLS FIRST], [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22] +Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6] +Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] ===== Subqueries ===== @@ -124,25 +124,25 @@ BroadcastExchange (25) (21) Scan parquet default.date_dim -Output [2]: [d_date_sk#12, d_date#23] +Output [2]: [d_date_sk#11, d_date#20] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] ReadSchema: struct (22) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#12, d_date#23] +Input [2]: [d_date_sk#11, d_date#20] (23) Filter [codegen id : 1] -Input [2]: [d_date_sk#12, d_date#23] -Condition : (((isnotnull(d_date#23) AND (d_date#23 >= 1999-02-22)) AND (d_date#23 <= 1999-03-24)) AND isnotnull(d_date_sk#12)) +Input [2]: [d_date_sk#11, d_date#20] +Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11)) (24) Project [codegen id : 1] -Output [1]: [d_date_sk#12] -Input [2]: [d_date_sk#12, d_date#23] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#20] (25) BroadcastExchange -Input [1]: [d_date_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt index 9d6b17e613ef1..8c4cdad53f666 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt @@ -66,7 +66,7 @@ Condition : (isnotnull(cd_demo_sk#12) AND ((((cd_marital_status#13 = M) AND (cd_ (7) BroadcastExchange Input [3]: [cd_demo_sk#12, cd_marital_status#13, cd_education_status#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_cdemo_sk#1] @@ -78,119 +78,119 @@ Output [11]: [ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sale Input [13]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, cd_demo_sk#12, cd_marital_status#13, cd_education_status#14] (10) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#16, hd_dep_count#17] +Output [2]: [hd_demo_sk#15, hd_dep_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_demo_sk), Or(EqualTo(hd_dep_count,3),EqualTo(hd_dep_count,1))] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [2]: [hd_demo_sk#16, hd_dep_count#17] +Input [2]: [hd_demo_sk#15, hd_dep_count#16] (12) Filter [codegen id : 2] -Input [2]: [hd_demo_sk#16, hd_dep_count#17] -Condition : (isnotnull(hd_demo_sk#16) AND ((hd_dep_count#17 = 3) OR (hd_dep_count#17 = 1))) +Input [2]: [hd_demo_sk#15, hd_dep_count#16] +Condition : (isnotnull(hd_demo_sk#15) AND ((hd_dep_count#16 = 3) OR (hd_dep_count#16 = 1))) (13) BroadcastExchange -Input [2]: [hd_demo_sk#16, hd_dep_count#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] +Input [2]: [hd_demo_sk#15, hd_dep_count#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#16] -Join condition: (((((((cd_marital_status#13 = M) AND (cd_education_status#14 = Advanced Degree )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) AND (hd_dep_count#17 = 3)) OR (((((cd_marital_status#13 = S) AND (cd_education_status#14 = College )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00)) AND (hd_dep_count#17 = 1))) OR (((((cd_marital_status#13 = W) AND (cd_education_status#14 = 2 yr Degree )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00)) AND (hd_dep_count#17 = 1))) +Right keys [1]: [hd_demo_sk#15] +Join condition: (((((((cd_marital_status#13 = M) AND (cd_education_status#14 = Advanced Degree )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) AND (hd_dep_count#16 = 3)) OR (((((cd_marital_status#13 = S) AND (cd_education_status#14 = College )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00)) AND (hd_dep_count#16 = 1))) OR (((((cd_marital_status#13 = W) AND (cd_education_status#14 = 2 yr Degree )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00)) AND (hd_dep_count#16 = 1))) (15) Project [codegen id : 6] Output [7]: [ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10] -Input [13]: [ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, cd_marital_status#13, cd_education_status#14, hd_demo_sk#16, hd_dep_count#17] +Input [13]: [ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, cd_marital_status#13, cd_education_status#14, hd_demo_sk#15, hd_dep_count#16] (16) ReusedExchange [Reuses operator id: 39] -Output [1]: [d_date_sk#19] +Output [1]: [d_date_sk#17] (17) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_sold_date_sk#10] -Right keys [1]: [d_date_sk#19] +Right keys [1]: [d_date_sk#17] Join condition: None (18) Project [codegen id : 6] Output [6]: [ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9] -Input [8]: [ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, d_date_sk#19] +Input [8]: [ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, d_date_sk#17] (19) Scan parquet default.store -Output [1]: [s_store_sk#20] +Output [1]: [s_store_sk#18] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (20) ColumnarToRow [codegen id : 4] -Input [1]: [s_store_sk#20] +Input [1]: [s_store_sk#18] (21) Filter [codegen id : 4] -Input [1]: [s_store_sk#20] -Condition : isnotnull(s_store_sk#20) +Input [1]: [s_store_sk#18] +Condition : isnotnull(s_store_sk#18) (22) BroadcastExchange -Input [1]: [s_store_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] +Input [1]: [s_store_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (23) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_store_sk#4] -Right keys [1]: [s_store_sk#20] +Right keys [1]: [s_store_sk#18] Join condition: None (24) Project [codegen id : 6] Output [5]: [ss_addr_sk#3, ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9] -Input [7]: [ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, s_store_sk#20] +Input [7]: [ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, s_store_sk#18] (25) Scan parquet default.customer_address -Output [3]: [ca_address_sk#22, ca_state#23, ca_country#24] +Output [3]: [ca_address_sk#19, ca_state#20, ca_country#21] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [OH,TX]),In(ca_state, [KY,NM,OR])),In(ca_state, [MS,TX,VA]))] ReadSchema: struct (26) ColumnarToRow [codegen id : 5] -Input [3]: [ca_address_sk#22, ca_state#23, ca_country#24] +Input [3]: [ca_address_sk#19, ca_state#20, ca_country#21] (27) Filter [codegen id : 5] -Input [3]: [ca_address_sk#22, ca_state#23, ca_country#24] -Condition : (((isnotnull(ca_country#24) AND (ca_country#24 = United States)) AND isnotnull(ca_address_sk#22)) AND ((ca_state#23 IN (TX,OH) OR ca_state#23 IN (OR,NM,KY)) OR ca_state#23 IN (VA,TX,MS))) +Input [3]: [ca_address_sk#19, ca_state#20, ca_country#21] +Condition : (((isnotnull(ca_country#21) AND (ca_country#21 = United States)) AND isnotnull(ca_address_sk#19)) AND ((ca_state#20 IN (TX,OH) OR ca_state#20 IN (OR,NM,KY)) OR ca_state#20 IN (VA,TX,MS))) (28) Project [codegen id : 5] -Output [2]: [ca_address_sk#22, ca_state#23] -Input [3]: [ca_address_sk#22, ca_state#23, ca_country#24] +Output [2]: [ca_address_sk#19, ca_state#20] +Input [3]: [ca_address_sk#19, ca_state#20, ca_country#21] (29) BroadcastExchange -Input [2]: [ca_address_sk#22, ca_state#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [2]: [ca_address_sk#19, ca_state#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (30) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_addr_sk#3] -Right keys [1]: [ca_address_sk#22] -Join condition: ((((ca_state#23 IN (TX,OH) AND (ss_net_profit#9 >= 100.00)) AND (ss_net_profit#9 <= 200.00)) OR ((ca_state#23 IN (OR,NM,KY) AND (ss_net_profit#9 >= 150.00)) AND (ss_net_profit#9 <= 300.00))) OR ((ca_state#23 IN (VA,TX,MS) AND (ss_net_profit#9 >= 50.00)) AND (ss_net_profit#9 <= 250.00))) +Right keys [1]: [ca_address_sk#19] +Join condition: ((((ca_state#20 IN (TX,OH) AND (ss_net_profit#9 >= 100.00)) AND (ss_net_profit#9 <= 200.00)) OR ((ca_state#20 IN (OR,NM,KY) AND (ss_net_profit#9 >= 150.00)) AND (ss_net_profit#9 <= 300.00))) OR ((ca_state#20 IN (VA,TX,MS) AND (ss_net_profit#9 >= 50.00)) AND (ss_net_profit#9 <= 250.00))) (31) Project [codegen id : 6] Output [3]: [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] -Input [7]: [ss_addr_sk#3, ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ca_address_sk#22, ca_state#23] +Input [7]: [ss_addr_sk#3, ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ca_address_sk#19, ca_state#20] (32) HashAggregate [codegen id : 6] Input [3]: [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] Keys: [] Functions [4]: [partial_avg(ss_quantity#5), partial_avg(UnscaledValue(ss_ext_sales_price#7)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#8)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#8))] -Aggregate Attributes [7]: [sum#26, count#27, sum#28, count#29, sum#30, count#31, sum#32] -Results [7]: [sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39] +Aggregate Attributes [7]: [sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28] +Results [7]: [sum#29, count#30, sum#31, count#32, sum#33, count#34, sum#35] (33) Exchange -Input [7]: [sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#40] +Input [7]: [sum#29, count#30, sum#31, count#32, sum#33, count#34, sum#35] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] (34) HashAggregate [codegen id : 7] -Input [7]: [sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39] +Input [7]: [sum#29, count#30, sum#31, count#32, sum#33, count#34, sum#35] Keys: [] Functions [4]: [avg(ss_quantity#5), avg(UnscaledValue(ss_ext_sales_price#7)), avg(UnscaledValue(ss_ext_wholesale_cost#8)), sum(UnscaledValue(ss_ext_wholesale_cost#8))] -Aggregate Attributes [4]: [avg(ss_quantity#5)#41, avg(UnscaledValue(ss_ext_sales_price#7))#42, avg(UnscaledValue(ss_ext_wholesale_cost#8))#43, sum(UnscaledValue(ss_ext_wholesale_cost#8))#44] -Results [4]: [avg(ss_quantity#5)#41 AS avg(ss_quantity)#45, cast((avg(UnscaledValue(ss_ext_sales_price#7))#42 / 100.0) as decimal(11,6)) AS avg(ss_ext_sales_price)#46, cast((avg(UnscaledValue(ss_ext_wholesale_cost#8))#43 / 100.0) as decimal(11,6)) AS avg(ss_ext_wholesale_cost)#47, MakeDecimal(sum(UnscaledValue(ss_ext_wholesale_cost#8))#44,17,2) AS sum(ss_ext_wholesale_cost)#48] +Aggregate Attributes [4]: [avg(ss_quantity#5)#36, avg(UnscaledValue(ss_ext_sales_price#7))#37, avg(UnscaledValue(ss_ext_wholesale_cost#8))#38, sum(UnscaledValue(ss_ext_wholesale_cost#8))#39] +Results [4]: [avg(ss_quantity#5)#36 AS avg(ss_quantity)#40, cast((avg(UnscaledValue(ss_ext_sales_price#7))#37 / 100.0) as decimal(11,6)) AS avg(ss_ext_sales_price)#41, cast((avg(UnscaledValue(ss_ext_wholesale_cost#8))#38 / 100.0) as decimal(11,6)) AS avg(ss_ext_wholesale_cost)#42, MakeDecimal(sum(UnscaledValue(ss_ext_wholesale_cost#8))#39,17,2) AS sum(ss_ext_wholesale_cost)#43] ===== Subqueries ===== @@ -203,25 +203,25 @@ BroadcastExchange (39) (35) Scan parquet default.date_dim -Output [2]: [d_date_sk#19, d_year#49] +Output [2]: [d_date_sk#17, d_year#44] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (36) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#19, d_year#49] +Input [2]: [d_date_sk#17, d_year#44] (37) Filter [codegen id : 1] -Input [2]: [d_date_sk#19, d_year#49] -Condition : ((isnotnull(d_year#49) AND (d_year#49 = 2001)) AND isnotnull(d_date_sk#19)) +Input [2]: [d_date_sk#17, d_year#44] +Condition : ((isnotnull(d_year#44) AND (d_year#44 = 2001)) AND isnotnull(d_date_sk#17)) (38) Project [codegen id : 1] -Output [1]: [d_date_sk#19] -Input [2]: [d_date_sk#19, d_year#49] +Output [1]: [d_date_sk#17] +Input [2]: [d_date_sk#17, d_year#44] (39) BroadcastExchange -Input [1]: [d_date_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50] +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt index 59e8cf7c4d063..2fe09e98424a7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt @@ -66,7 +66,7 @@ Condition : isnotnull(s_store_sk#12) (7) BroadcastExchange Input [1]: [s_store_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_store_sk#4] @@ -78,119 +78,119 @@ Output [9]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales Input [11]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, s_store_sk#12] (10) Scan parquet default.customer_address -Output [3]: [ca_address_sk#14, ca_state#15, ca_country#16] +Output [3]: [ca_address_sk#13, ca_state#14, ca_country#15] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [OH,TX]),In(ca_state, [KY,NM,OR])),In(ca_state, [MS,TX,VA]))] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [3]: [ca_address_sk#14, ca_state#15, ca_country#16] +Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] (12) Filter [codegen id : 2] -Input [3]: [ca_address_sk#14, ca_state#15, ca_country#16] -Condition : (((isnotnull(ca_country#16) AND (ca_country#16 = United States)) AND isnotnull(ca_address_sk#14)) AND ((ca_state#15 IN (TX,OH) OR ca_state#15 IN (OR,NM,KY)) OR ca_state#15 IN (VA,TX,MS))) +Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] +Condition : (((isnotnull(ca_country#15) AND (ca_country#15 = United States)) AND isnotnull(ca_address_sk#13)) AND ((ca_state#14 IN (TX,OH) OR ca_state#14 IN (OR,NM,KY)) OR ca_state#14 IN (VA,TX,MS))) (13) Project [codegen id : 2] -Output [2]: [ca_address_sk#14, ca_state#15] -Input [3]: [ca_address_sk#14, ca_state#15, ca_country#16] +Output [2]: [ca_address_sk#13, ca_state#14] +Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] (14) BroadcastExchange -Input [2]: [ca_address_sk#14, ca_state#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] +Input [2]: [ca_address_sk#13, ca_state#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_addr_sk#3] -Right keys [1]: [ca_address_sk#14] -Join condition: ((((ca_state#15 IN (TX,OH) AND (ss_net_profit#9 >= 100.00)) AND (ss_net_profit#9 <= 200.00)) OR ((ca_state#15 IN (OR,NM,KY) AND (ss_net_profit#9 >= 150.00)) AND (ss_net_profit#9 <= 300.00))) OR ((ca_state#15 IN (VA,TX,MS) AND (ss_net_profit#9 >= 50.00)) AND (ss_net_profit#9 <= 250.00))) +Right keys [1]: [ca_address_sk#13] +Join condition: ((((ca_state#14 IN (TX,OH) AND (ss_net_profit#9 >= 100.00)) AND (ss_net_profit#9 <= 200.00)) OR ((ca_state#14 IN (OR,NM,KY) AND (ss_net_profit#9 >= 150.00)) AND (ss_net_profit#9 <= 300.00))) OR ((ca_state#14 IN (VA,TX,MS) AND (ss_net_profit#9 >= 50.00)) AND (ss_net_profit#9 <= 250.00))) (16) Project [codegen id : 6] Output [7]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10] -Input [11]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, ca_address_sk#14, ca_state#15] +Input [11]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, ca_address_sk#13, ca_state#14] (17) ReusedExchange [Reuses operator id: 39] -Output [1]: [d_date_sk#18] +Output [1]: [d_date_sk#16] (18) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_sold_date_sk#10] -Right keys [1]: [d_date_sk#18] +Right keys [1]: [d_date_sk#16] Join condition: None (19) Project [codegen id : 6] Output [6]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] -Input [8]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10, d_date_sk#18] +Input [8]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10, d_date_sk#16] (20) Scan parquet default.customer_demographics -Output [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] +Output [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree )),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College ))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree )))] ReadSchema: struct (21) ColumnarToRow [codegen id : 4] -Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] (22) Filter [codegen id : 4] -Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] -Condition : (isnotnull(cd_demo_sk#19) AND ((((cd_marital_status#20 = M) AND (cd_education_status#21 = Advanced Degree )) OR ((cd_marital_status#20 = S) AND (cd_education_status#21 = College ))) OR ((cd_marital_status#20 = W) AND (cd_education_status#21 = 2 yr Degree )))) +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Condition : (isnotnull(cd_demo_sk#17) AND ((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) OR ((cd_marital_status#18 = S) AND (cd_education_status#19 = College ))) OR ((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )))) (23) BroadcastExchange -Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_cdemo_sk#1] -Right keys [1]: [cd_demo_sk#19] -Join condition: ((((((cd_marital_status#20 = M) AND (cd_education_status#21 = Advanced Degree )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) OR ((((cd_marital_status#20 = S) AND (cd_education_status#21 = College )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00))) OR ((((cd_marital_status#20 = W) AND (cd_education_status#21 = 2 yr Degree )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00))) +Right keys [1]: [cd_demo_sk#17] +Join condition: ((((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) OR ((((cd_marital_status#18 = S) AND (cd_education_status#19 = College )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00))) OR ((((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00))) (25) Project [codegen id : 6] -Output [7]: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#20, cd_education_status#21] -Input [9]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] +Output [7]: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19] +Input [9]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] (26) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#23, hd_dep_count#24] +Output [2]: [hd_demo_sk#20, hd_dep_count#21] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_demo_sk), Or(EqualTo(hd_dep_count,3),EqualTo(hd_dep_count,1))] ReadSchema: struct (27) ColumnarToRow [codegen id : 5] -Input [2]: [hd_demo_sk#23, hd_dep_count#24] +Input [2]: [hd_demo_sk#20, hd_dep_count#21] (28) Filter [codegen id : 5] -Input [2]: [hd_demo_sk#23, hd_dep_count#24] -Condition : (isnotnull(hd_demo_sk#23) AND ((hd_dep_count#24 = 3) OR (hd_dep_count#24 = 1))) +Input [2]: [hd_demo_sk#20, hd_dep_count#21] +Condition : (isnotnull(hd_demo_sk#20) AND ((hd_dep_count#21 = 3) OR (hd_dep_count#21 = 1))) (29) BroadcastExchange -Input [2]: [hd_demo_sk#23, hd_dep_count#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] +Input [2]: [hd_demo_sk#20, hd_dep_count#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (30) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#23] -Join condition: (((((((cd_marital_status#20 = M) AND (cd_education_status#21 = Advanced Degree )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) AND (hd_dep_count#24 = 3)) OR (((((cd_marital_status#20 = S) AND (cd_education_status#21 = College )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00)) AND (hd_dep_count#24 = 1))) OR (((((cd_marital_status#20 = W) AND (cd_education_status#21 = 2 yr Degree )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00)) AND (hd_dep_count#24 = 1))) +Right keys [1]: [hd_demo_sk#20] +Join condition: (((((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) AND (hd_dep_count#21 = 3)) OR (((((cd_marital_status#18 = S) AND (cd_education_status#19 = College )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00)) AND (hd_dep_count#21 = 1))) OR (((((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00)) AND (hd_dep_count#21 = 1))) (31) Project [codegen id : 6] Output [3]: [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] -Input [9]: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#20, cd_education_status#21, hd_demo_sk#23, hd_dep_count#24] +Input [9]: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19, hd_demo_sk#20, hd_dep_count#21] (32) HashAggregate [codegen id : 6] Input [3]: [ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8] Keys: [] Functions [4]: [partial_avg(ss_quantity#5), partial_avg(UnscaledValue(ss_ext_sales_price#7)), partial_avg(UnscaledValue(ss_ext_wholesale_cost#8)), partial_sum(UnscaledValue(ss_ext_wholesale_cost#8))] -Aggregate Attributes [7]: [sum#26, count#27, sum#28, count#29, sum#30, count#31, sum#32] -Results [7]: [sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39] +Aggregate Attributes [7]: [sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28] +Results [7]: [sum#29, count#30, sum#31, count#32, sum#33, count#34, sum#35] (33) Exchange -Input [7]: [sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#40] +Input [7]: [sum#29, count#30, sum#31, count#32, sum#33, count#34, sum#35] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] (34) HashAggregate [codegen id : 7] -Input [7]: [sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39] +Input [7]: [sum#29, count#30, sum#31, count#32, sum#33, count#34, sum#35] Keys: [] Functions [4]: [avg(ss_quantity#5), avg(UnscaledValue(ss_ext_sales_price#7)), avg(UnscaledValue(ss_ext_wholesale_cost#8)), sum(UnscaledValue(ss_ext_wholesale_cost#8))] -Aggregate Attributes [4]: [avg(ss_quantity#5)#41, avg(UnscaledValue(ss_ext_sales_price#7))#42, avg(UnscaledValue(ss_ext_wholesale_cost#8))#43, sum(UnscaledValue(ss_ext_wholesale_cost#8))#44] -Results [4]: [avg(ss_quantity#5)#41 AS avg(ss_quantity)#45, cast((avg(UnscaledValue(ss_ext_sales_price#7))#42 / 100.0) as decimal(11,6)) AS avg(ss_ext_sales_price)#46, cast((avg(UnscaledValue(ss_ext_wholesale_cost#8))#43 / 100.0) as decimal(11,6)) AS avg(ss_ext_wholesale_cost)#47, MakeDecimal(sum(UnscaledValue(ss_ext_wholesale_cost#8))#44,17,2) AS sum(ss_ext_wholesale_cost)#48] +Aggregate Attributes [4]: [avg(ss_quantity#5)#36, avg(UnscaledValue(ss_ext_sales_price#7))#37, avg(UnscaledValue(ss_ext_wholesale_cost#8))#38, sum(UnscaledValue(ss_ext_wholesale_cost#8))#39] +Results [4]: [avg(ss_quantity#5)#36 AS avg(ss_quantity)#40, cast((avg(UnscaledValue(ss_ext_sales_price#7))#37 / 100.0) as decimal(11,6)) AS avg(ss_ext_sales_price)#41, cast((avg(UnscaledValue(ss_ext_wholesale_cost#8))#38 / 100.0) as decimal(11,6)) AS avg(ss_ext_wholesale_cost)#42, MakeDecimal(sum(UnscaledValue(ss_ext_wholesale_cost#8))#39,17,2) AS sum(ss_ext_wholesale_cost)#43] ===== Subqueries ===== @@ -203,25 +203,25 @@ BroadcastExchange (39) (35) Scan parquet default.date_dim -Output [2]: [d_date_sk#18, d_year#49] +Output [2]: [d_date_sk#16, d_year#44] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (36) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#18, d_year#49] +Input [2]: [d_date_sk#16, d_year#44] (37) Filter [codegen id : 1] -Input [2]: [d_date_sk#18, d_year#49] -Condition : ((isnotnull(d_year#49) AND (d_year#49 = 2001)) AND isnotnull(d_date_sk#18)) +Input [2]: [d_date_sk#16, d_year#44] +Condition : ((isnotnull(d_year#44) AND (d_year#44 = 2001)) AND isnotnull(d_date_sk#16)) (38) Project [codegen id : 1] -Output [1]: [d_date_sk#18] -Input [2]: [d_date_sk#18, d_year#49] +Output [1]: [d_date_sk#16] +Input [2]: [d_date_sk#16, d_year#44] (39) BroadcastExchange -Input [1]: [d_date_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50] +Input [1]: [d_date_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt index 4105a94131dda..f5b3161c7e777 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt @@ -141,545 +141,545 @@ Condition : isnotnull(ss_item_sk#1) (4) Exchange Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#6] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (6) Scan parquet default.item -Output [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10] +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] ReadSchema: struct (7) ColumnarToRow [codegen id : 19] -Input [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] (8) Filter [codegen id : 19] -Input [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10] -Condition : ((isnotnull(i_brand_id#8) AND isnotnull(i_class_id#9)) AND isnotnull(i_category_id#10)) +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : ((isnotnull(i_brand_id#7) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) (9) Scan parquet default.store_sales -Output [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Output [2]: [ss_item_sk#10, ss_sold_date_sk#11] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#12), dynamicpruningexpression(ss_sold_date_sk#12 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(ss_sold_date_sk#11), dynamicpruningexpression(ss_sold_date_sk#11 IN dynamicpruning#12)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (10) ColumnarToRow [codegen id : 11] -Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Input [2]: [ss_item_sk#10, ss_sold_date_sk#11] (11) Filter [codegen id : 11] -Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] -Condition : isnotnull(ss_item_sk#11) +Input [2]: [ss_item_sk#10, ss_sold_date_sk#11] +Condition : isnotnull(ss_item_sk#10) (12) ReusedExchange [Reuses operator id: 152] -Output [1]: [d_date_sk#14] +Output [1]: [d_date_sk#13] (13) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ss_sold_date_sk#12] -Right keys [1]: [d_date_sk#14] +Left keys [1]: [ss_sold_date_sk#11] +Right keys [1]: [d_date_sk#13] Join condition: None (14) Project [codegen id : 11] -Output [1]: [ss_item_sk#11] -Input [3]: [ss_item_sk#11, ss_sold_date_sk#12, d_date_sk#14] +Output [1]: [ss_item_sk#10] +Input [3]: [ss_item_sk#10, ss_sold_date_sk#11, d_date_sk#13] (15) Scan parquet default.item -Output [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] +Output [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] ReadSchema: struct (16) ColumnarToRow [codegen id : 4] -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] (17) Filter [codegen id : 4] -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] -Condition : (((isnotnull(i_item_sk#15) AND isnotnull(i_brand_id#16)) AND isnotnull(i_class_id#17)) AND isnotnull(i_category_id#18)) +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] +Condition : (((isnotnull(i_item_sk#14) AND isnotnull(i_brand_id#15)) AND isnotnull(i_class_id#16)) AND isnotnull(i_category_id#17)) (18) Exchange -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] -Arguments: hashpartitioning(coalesce(i_brand_id#16, 0), isnull(i_brand_id#16), coalesce(i_class_id#17, 0), isnull(i_class_id#17), coalesce(i_category_id#18, 0), isnull(i_category_id#18), 5), ENSURE_REQUIREMENTS, [id=#19] +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] +Arguments: hashpartitioning(coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17), 5), ENSURE_REQUIREMENTS, [plan_id=2] (19) Sort [codegen id : 5] -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] -Arguments: [coalesce(i_brand_id#16, 0) ASC NULLS FIRST, isnull(i_brand_id#16) ASC NULLS FIRST, coalesce(i_class_id#17, 0) ASC NULLS FIRST, isnull(i_class_id#17) ASC NULLS FIRST, coalesce(i_category_id#18, 0) ASC NULLS FIRST, isnull(i_category_id#18) ASC NULLS FIRST], false, 0 +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] +Arguments: [coalesce(i_brand_id#15, 0) ASC NULLS FIRST, isnull(i_brand_id#15) ASC NULLS FIRST, coalesce(i_class_id#16, 0) ASC NULLS FIRST, isnull(i_class_id#16) ASC NULLS FIRST, coalesce(i_category_id#17, 0) ASC NULLS FIRST, isnull(i_category_id#17) ASC NULLS FIRST], false, 0 (20) Scan parquet default.catalog_sales -Output [2]: [cs_item_sk#20, cs_sold_date_sk#21] +Output [2]: [cs_item_sk#18, cs_sold_date_sk#19] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#21), dynamicpruningexpression(cs_sold_date_sk#21 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(cs_sold_date_sk#19), dynamicpruningexpression(cs_sold_date_sk#19 IN dynamicpruning#12)] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 8] -Input [2]: [cs_item_sk#20, cs_sold_date_sk#21] +Input [2]: [cs_item_sk#18, cs_sold_date_sk#19] (22) Filter [codegen id : 8] -Input [2]: [cs_item_sk#20, cs_sold_date_sk#21] -Condition : isnotnull(cs_item_sk#20) +Input [2]: [cs_item_sk#18, cs_sold_date_sk#19] +Condition : isnotnull(cs_item_sk#18) (23) ReusedExchange [Reuses operator id: 152] -Output [1]: [d_date_sk#22] +Output [1]: [d_date_sk#20] (24) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_sold_date_sk#21] -Right keys [1]: [d_date_sk#22] +Left keys [1]: [cs_sold_date_sk#19] +Right keys [1]: [d_date_sk#20] Join condition: None (25) Project [codegen id : 8] -Output [1]: [cs_item_sk#20] -Input [3]: [cs_item_sk#20, cs_sold_date_sk#21, d_date_sk#22] +Output [1]: [cs_item_sk#18] +Input [3]: [cs_item_sk#18, cs_sold_date_sk#19, d_date_sk#20] (26) Scan parquet default.item -Output [4]: [i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] +Output [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 7] -Input [4]: [i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] +Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] (28) Filter [codegen id : 7] -Input [4]: [i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] -Condition : isnotnull(i_item_sk#23) +Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] +Condition : isnotnull(i_item_sk#21) (29) BroadcastExchange -Input [4]: [i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] +Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (30) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_item_sk#20] -Right keys [1]: [i_item_sk#23] +Left keys [1]: [cs_item_sk#18] +Right keys [1]: [i_item_sk#21] Join condition: None (31) Project [codegen id : 8] -Output [3]: [i_brand_id#24, i_class_id#25, i_category_id#26] -Input [5]: [cs_item_sk#20, i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] +Output [3]: [i_brand_id#22, i_class_id#23, i_category_id#24] +Input [5]: [cs_item_sk#18, i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] (32) Exchange -Input [3]: [i_brand_id#24, i_class_id#25, i_category_id#26] -Arguments: hashpartitioning(coalesce(i_brand_id#24, 0), isnull(i_brand_id#24), coalesce(i_class_id#25, 0), isnull(i_class_id#25), coalesce(i_category_id#26, 0), isnull(i_category_id#26), 5), ENSURE_REQUIREMENTS, [id=#28] +Input [3]: [i_brand_id#22, i_class_id#23, i_category_id#24] +Arguments: hashpartitioning(coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24), 5), ENSURE_REQUIREMENTS, [plan_id=4] (33) Sort [codegen id : 9] -Input [3]: [i_brand_id#24, i_class_id#25, i_category_id#26] -Arguments: [coalesce(i_brand_id#24, 0) ASC NULLS FIRST, isnull(i_brand_id#24) ASC NULLS FIRST, coalesce(i_class_id#25, 0) ASC NULLS FIRST, isnull(i_class_id#25) ASC NULLS FIRST, coalesce(i_category_id#26, 0) ASC NULLS FIRST, isnull(i_category_id#26) ASC NULLS FIRST], false, 0 +Input [3]: [i_brand_id#22, i_class_id#23, i_category_id#24] +Arguments: [coalesce(i_brand_id#22, 0) ASC NULLS FIRST, isnull(i_brand_id#22) ASC NULLS FIRST, coalesce(i_class_id#23, 0) ASC NULLS FIRST, isnull(i_class_id#23) ASC NULLS FIRST, coalesce(i_category_id#24, 0) ASC NULLS FIRST, isnull(i_category_id#24) ASC NULLS FIRST], false, 0 (34) SortMergeJoin [codegen id : 10] -Left keys [6]: [coalesce(i_brand_id#16, 0), isnull(i_brand_id#16), coalesce(i_class_id#17, 0), isnull(i_class_id#17), coalesce(i_category_id#18, 0), isnull(i_category_id#18)] -Right keys [6]: [coalesce(i_brand_id#24, 0), isnull(i_brand_id#24), coalesce(i_class_id#25, 0), isnull(i_class_id#25), coalesce(i_category_id#26, 0), isnull(i_category_id#26)] +Left keys [6]: [coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17)] +Right keys [6]: [coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24)] Join condition: None (35) BroadcastExchange -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#29] +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (36) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ss_item_sk#11] -Right keys [1]: [i_item_sk#15] +Left keys [1]: [ss_item_sk#10] +Right keys [1]: [i_item_sk#14] Join condition: None (37) Project [codegen id : 11] -Output [3]: [i_brand_id#16 AS brand_id#30, i_class_id#17 AS class_id#31, i_category_id#18 AS category_id#32] -Input [5]: [ss_item_sk#11, i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] +Output [3]: [i_brand_id#15 AS brand_id#25, i_class_id#16 AS class_id#26, i_category_id#17 AS category_id#27] +Input [5]: [ss_item_sk#10, i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] (38) HashAggregate [codegen id : 11] -Input [3]: [brand_id#30, class_id#31, category_id#32] -Keys [3]: [brand_id#30, class_id#31, category_id#32] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Keys [3]: [brand_id#25, class_id#26, category_id#27] Functions: [] Aggregate Attributes: [] -Results [3]: [brand_id#30, class_id#31, category_id#32] +Results [3]: [brand_id#25, class_id#26, category_id#27] (39) Exchange -Input [3]: [brand_id#30, class_id#31, category_id#32] -Arguments: hashpartitioning(brand_id#30, class_id#31, category_id#32, 5), ENSURE_REQUIREMENTS, [id=#33] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: hashpartitioning(brand_id#25, class_id#26, category_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=6] (40) HashAggregate [codegen id : 12] -Input [3]: [brand_id#30, class_id#31, category_id#32] -Keys [3]: [brand_id#30, class_id#31, category_id#32] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Keys [3]: [brand_id#25, class_id#26, category_id#27] Functions: [] Aggregate Attributes: [] -Results [3]: [brand_id#30, class_id#31, category_id#32] +Results [3]: [brand_id#25, class_id#26, category_id#27] (41) Exchange -Input [3]: [brand_id#30, class_id#31, category_id#32] -Arguments: hashpartitioning(coalesce(brand_id#30, 0), isnull(brand_id#30), coalesce(class_id#31, 0), isnull(class_id#31), coalesce(category_id#32, 0), isnull(category_id#32), 5), ENSURE_REQUIREMENTS, [id=#34] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: hashpartitioning(coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27), 5), ENSURE_REQUIREMENTS, [plan_id=7] (42) Sort [codegen id : 13] -Input [3]: [brand_id#30, class_id#31, category_id#32] -Arguments: [coalesce(brand_id#30, 0) ASC NULLS FIRST, isnull(brand_id#30) ASC NULLS FIRST, coalesce(class_id#31, 0) ASC NULLS FIRST, isnull(class_id#31) ASC NULLS FIRST, coalesce(category_id#32, 0) ASC NULLS FIRST, isnull(category_id#32) ASC NULLS FIRST], false, 0 +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: [coalesce(brand_id#25, 0) ASC NULLS FIRST, isnull(brand_id#25) ASC NULLS FIRST, coalesce(class_id#26, 0) ASC NULLS FIRST, isnull(class_id#26) ASC NULLS FIRST, coalesce(category_id#27, 0) ASC NULLS FIRST, isnull(category_id#27) ASC NULLS FIRST], false, 0 (43) Scan parquet default.web_sales -Output [2]: [ws_item_sk#35, ws_sold_date_sk#36] +Output [2]: [ws_item_sk#28, ws_sold_date_sk#29] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#36), dynamicpruningexpression(ws_sold_date_sk#36 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(ws_sold_date_sk#29), dynamicpruningexpression(ws_sold_date_sk#29 IN dynamicpruning#12)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 16] -Input [2]: [ws_item_sk#35, ws_sold_date_sk#36] +Input [2]: [ws_item_sk#28, ws_sold_date_sk#29] (45) Filter [codegen id : 16] -Input [2]: [ws_item_sk#35, ws_sold_date_sk#36] -Condition : isnotnull(ws_item_sk#35) +Input [2]: [ws_item_sk#28, ws_sold_date_sk#29] +Condition : isnotnull(ws_item_sk#28) (46) ReusedExchange [Reuses operator id: 152] -Output [1]: [d_date_sk#37] +Output [1]: [d_date_sk#30] (47) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [ws_sold_date_sk#36] -Right keys [1]: [d_date_sk#37] +Left keys [1]: [ws_sold_date_sk#29] +Right keys [1]: [d_date_sk#30] Join condition: None (48) Project [codegen id : 16] -Output [1]: [ws_item_sk#35] -Input [3]: [ws_item_sk#35, ws_sold_date_sk#36, d_date_sk#37] +Output [1]: [ws_item_sk#28] +Input [3]: [ws_item_sk#28, ws_sold_date_sk#29, d_date_sk#30] (49) ReusedExchange [Reuses operator id: 29] -Output [4]: [i_item_sk#38, i_brand_id#39, i_class_id#40, i_category_id#41] +Output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] (50) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [ws_item_sk#35] -Right keys [1]: [i_item_sk#38] +Left keys [1]: [ws_item_sk#28] +Right keys [1]: [i_item_sk#31] Join condition: None (51) Project [codegen id : 16] -Output [3]: [i_brand_id#39, i_class_id#40, i_category_id#41] -Input [5]: [ws_item_sk#35, i_item_sk#38, i_brand_id#39, i_class_id#40, i_category_id#41] +Output [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Input [5]: [ws_item_sk#28, i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] (52) Exchange -Input [3]: [i_brand_id#39, i_class_id#40, i_category_id#41] -Arguments: hashpartitioning(coalesce(i_brand_id#39, 0), isnull(i_brand_id#39), coalesce(i_class_id#40, 0), isnull(i_class_id#40), coalesce(i_category_id#41, 0), isnull(i_category_id#41), 5), ENSURE_REQUIREMENTS, [id=#42] +Input [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: hashpartitioning(coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34), 5), ENSURE_REQUIREMENTS, [plan_id=8] (53) Sort [codegen id : 17] -Input [3]: [i_brand_id#39, i_class_id#40, i_category_id#41] -Arguments: [coalesce(i_brand_id#39, 0) ASC NULLS FIRST, isnull(i_brand_id#39) ASC NULLS FIRST, coalesce(i_class_id#40, 0) ASC NULLS FIRST, isnull(i_class_id#40) ASC NULLS FIRST, coalesce(i_category_id#41, 0) ASC NULLS FIRST, isnull(i_category_id#41) ASC NULLS FIRST], false, 0 +Input [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [coalesce(i_brand_id#32, 0) ASC NULLS FIRST, isnull(i_brand_id#32) ASC NULLS FIRST, coalesce(i_class_id#33, 0) ASC NULLS FIRST, isnull(i_class_id#33) ASC NULLS FIRST, coalesce(i_category_id#34, 0) ASC NULLS FIRST, isnull(i_category_id#34) ASC NULLS FIRST], false, 0 (54) SortMergeJoin [codegen id : 18] -Left keys [6]: [coalesce(brand_id#30, 0), isnull(brand_id#30), coalesce(class_id#31, 0), isnull(class_id#31), coalesce(category_id#32, 0), isnull(category_id#32)] -Right keys [6]: [coalesce(i_brand_id#39, 0), isnull(i_brand_id#39), coalesce(i_class_id#40, 0), isnull(i_class_id#40), coalesce(i_category_id#41, 0), isnull(i_category_id#41)] +Left keys [6]: [coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27)] +Right keys [6]: [coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34)] Join condition: None (55) BroadcastExchange -Input [3]: [brand_id#30, class_id#31, category_id#32] -Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#43] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=9] (56) BroadcastHashJoin [codegen id : 19] -Left keys [3]: [i_brand_id#8, i_class_id#9, i_category_id#10] -Right keys [3]: [brand_id#30, class_id#31, category_id#32] +Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Right keys [3]: [brand_id#25, class_id#26, category_id#27] Join condition: None (57) Project [codegen id : 19] -Output [1]: [i_item_sk#7 AS ss_item_sk#44] -Input [7]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, brand_id#30, class_id#31, category_id#32] +Output [1]: [i_item_sk#6 AS ss_item_sk#35] +Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#25, class_id#26, category_id#27] (58) Exchange -Input [1]: [ss_item_sk#44] -Arguments: hashpartitioning(ss_item_sk#44, 5), ENSURE_REQUIREMENTS, [id=#45] +Input [1]: [ss_item_sk#35] +Arguments: hashpartitioning(ss_item_sk#35, 5), ENSURE_REQUIREMENTS, [plan_id=10] (59) Sort [codegen id : 20] -Input [1]: [ss_item_sk#44] -Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0 +Input [1]: [ss_item_sk#35] +Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0 (60) SortMergeJoin [codegen id : 43] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [ss_item_sk#44] +Right keys [1]: [ss_item_sk#35] Join condition: None (61) ReusedExchange [Reuses operator id: 147] -Output [1]: [d_date_sk#46] +Output [1]: [d_date_sk#36] (62) BroadcastHashJoin [codegen id : 43] Left keys [1]: [ss_sold_date_sk#4] -Right keys [1]: [d_date_sk#46] +Right keys [1]: [d_date_sk#36] Join condition: None (63) Project [codegen id : 43] Output [3]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3] -Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#46] +Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#36] (64) Scan parquet default.item -Output [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] +Output [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (65) ColumnarToRow [codegen id : 22] -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] (66) Filter [codegen id : 22] -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] -Condition : isnotnull(i_item_sk#47) +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] +Condition : isnotnull(i_item_sk#37) (67) Exchange -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] -Arguments: hashpartitioning(i_item_sk#47, 5), ENSURE_REQUIREMENTS, [id=#51] +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] +Arguments: hashpartitioning(i_item_sk#37, 5), ENSURE_REQUIREMENTS, [plan_id=11] (68) Sort [codegen id : 23] -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] -Arguments: [i_item_sk#47 ASC NULLS FIRST], false, 0 +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] +Arguments: [i_item_sk#37 ASC NULLS FIRST], false, 0 (69) ReusedExchange [Reuses operator id: 58] -Output [1]: [ss_item_sk#44] +Output [1]: [ss_item_sk#35] (70) Sort [codegen id : 41] -Input [1]: [ss_item_sk#44] -Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0 +Input [1]: [ss_item_sk#35] +Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0 (71) SortMergeJoin [codegen id : 42] -Left keys [1]: [i_item_sk#47] -Right keys [1]: [ss_item_sk#44] +Left keys [1]: [i_item_sk#37] +Right keys [1]: [ss_item_sk#35] Join condition: None (72) BroadcastExchange -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#52] +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] (73) BroadcastHashJoin [codegen id : 43] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#47] +Right keys [1]: [i_item_sk#37] Join condition: None (74) Project [codegen id : 43] -Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#48, i_class_id#49, i_category_id#50] -Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] +Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#38, i_class_id#39, i_category_id#40] +Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] (75) HashAggregate [codegen id : 43] -Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#48, i_class_id#49, i_category_id#50] -Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#38, i_class_id#39, i_category_id#40] +Keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40] Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#53, isEmpty#54, count#55] -Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58] +Aggregate Attributes [3]: [sum#41, isEmpty#42, count#43] +Results [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46] (76) Exchange -Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58] -Arguments: hashpartitioning(i_brand_id#48, i_class_id#49, i_category_id#50, 5), ENSURE_REQUIREMENTS, [id=#59] +Input [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46] +Arguments: hashpartitioning(i_brand_id#38, i_class_id#39, i_category_id#40, 5), ENSURE_REQUIREMENTS, [plan_id=13] (77) HashAggregate [codegen id : 44] -Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58] -Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] +Input [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46] +Keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40] Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#60, count(1)#61] -Results [5]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#60 AS sales#62, count(1)#61 AS number_sales#63] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47, count(1)#48] +Results [5]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47 AS sales#49, count(1)#48 AS number_sales#50] (78) Filter [codegen id : 44] -Input [5]: [i_brand_id#48, i_class_id#49, i_category_id#50, sales#62, number_sales#63] -Condition : (isnotnull(sales#62) AND (cast(sales#62 as decimal(32,6)) > cast(Subquery scalar-subquery#64, [id=#65] as decimal(32,6)))) +Input [5]: [i_brand_id#38, i_class_id#39, i_category_id#40, sales#49, number_sales#50] +Condition : (isnotnull(sales#49) AND (cast(sales#49 as decimal(32,6)) > cast(Subquery scalar-subquery#51, [id=#52] as decimal(32,6)))) (79) Project [codegen id : 44] -Output [6]: [sales#62, number_sales#63, store AS channel#66, i_brand_id#48, i_class_id#49, i_category_id#50] -Input [5]: [i_brand_id#48, i_class_id#49, i_category_id#50, sales#62, number_sales#63] +Output [6]: [sales#49, number_sales#50, store AS channel#53, i_brand_id#38, i_class_id#39, i_category_id#40] +Input [5]: [i_brand_id#38, i_class_id#39, i_category_id#40, sales#49, number_sales#50] (80) Scan parquet default.catalog_sales -Output [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70] +Output [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#70), dynamicpruningexpression(cs_sold_date_sk#70 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#57), dynamicpruningexpression(cs_sold_date_sk#57 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (81) ColumnarToRow [codegen id : 45] -Input [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70] +Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] (82) Filter [codegen id : 45] -Input [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70] -Condition : isnotnull(cs_item_sk#67) +Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] +Condition : isnotnull(cs_item_sk#54) (83) Exchange -Input [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70] -Arguments: hashpartitioning(cs_item_sk#67, 5), ENSURE_REQUIREMENTS, [id=#71] +Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] +Arguments: hashpartitioning(cs_item_sk#54, 5), ENSURE_REQUIREMENTS, [plan_id=14] (84) Sort [codegen id : 46] -Input [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70] -Arguments: [cs_item_sk#67 ASC NULLS FIRST], false, 0 +Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] +Arguments: [cs_item_sk#54 ASC NULLS FIRST], false, 0 (85) ReusedExchange [Reuses operator id: 58] -Output [1]: [ss_item_sk#44] +Output [1]: [ss_item_sk#35] (86) Sort [codegen id : 64] -Input [1]: [ss_item_sk#44] -Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0 +Input [1]: [ss_item_sk#35] +Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0 (87) SortMergeJoin [codegen id : 87] -Left keys [1]: [cs_item_sk#67] -Right keys [1]: [ss_item_sk#44] +Left keys [1]: [cs_item_sk#54] +Right keys [1]: [ss_item_sk#35] Join condition: None (88) ReusedExchange [Reuses operator id: 147] -Output [1]: [d_date_sk#72] +Output [1]: [d_date_sk#58] (89) BroadcastHashJoin [codegen id : 87] -Left keys [1]: [cs_sold_date_sk#70] -Right keys [1]: [d_date_sk#72] +Left keys [1]: [cs_sold_date_sk#57] +Right keys [1]: [d_date_sk#58] Join condition: None (90) Project [codegen id : 87] -Output [3]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69] -Input [5]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70, d_date_sk#72] +Output [3]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56] +Input [5]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57, d_date_sk#58] (91) ReusedExchange [Reuses operator id: 72] -Output [4]: [i_item_sk#73, i_brand_id#74, i_class_id#75, i_category_id#76] +Output [4]: [i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62] (92) BroadcastHashJoin [codegen id : 87] -Left keys [1]: [cs_item_sk#67] -Right keys [1]: [i_item_sk#73] +Left keys [1]: [cs_item_sk#54] +Right keys [1]: [i_item_sk#59] Join condition: None (93) Project [codegen id : 87] -Output [5]: [cs_quantity#68, cs_list_price#69, i_brand_id#74, i_class_id#75, i_category_id#76] -Input [7]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, i_item_sk#73, i_brand_id#74, i_class_id#75, i_category_id#76] +Output [5]: [cs_quantity#55, cs_list_price#56, i_brand_id#60, i_class_id#61, i_category_id#62] +Input [7]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62] (94) HashAggregate [codegen id : 87] -Input [5]: [cs_quantity#68, cs_list_price#69, i_brand_id#74, i_class_id#75, i_category_id#76] -Keys [3]: [i_brand_id#74, i_class_id#75, i_category_id#76] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#68 as decimal(12,2))) * promote_precision(cast(cs_list_price#69 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#77, isEmpty#78, count#79] -Results [6]: [i_brand_id#74, i_class_id#75, i_category_id#76, sum#80, isEmpty#81, count#82] +Input [5]: [cs_quantity#55, cs_list_price#56, i_brand_id#60, i_class_id#61, i_category_id#62] +Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] +Aggregate Attributes [3]: [sum#63, isEmpty#64, count#65] +Results [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#66, isEmpty#67, count#68] (95) Exchange -Input [6]: [i_brand_id#74, i_class_id#75, i_category_id#76, sum#80, isEmpty#81, count#82] -Arguments: hashpartitioning(i_brand_id#74, i_class_id#75, i_category_id#76, 5), ENSURE_REQUIREMENTS, [id=#83] +Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#66, isEmpty#67, count#68] +Arguments: hashpartitioning(i_brand_id#60, i_class_id#61, i_category_id#62, 5), ENSURE_REQUIREMENTS, [plan_id=15] (96) HashAggregate [codegen id : 88] -Input [6]: [i_brand_id#74, i_class_id#75, i_category_id#76, sum#80, isEmpty#81, count#82] -Keys [3]: [i_brand_id#74, i_class_id#75, i_category_id#76] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#68 as decimal(12,2))) * promote_precision(cast(cs_list_price#69 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#68 as decimal(12,2))) * promote_precision(cast(cs_list_price#69 as decimal(12,2)))), DecimalType(18,2)))#84, count(1)#85] -Results [5]: [i_brand_id#74, i_class_id#75, i_category_id#76, sum(CheckOverflow((promote_precision(cast(cs_quantity#68 as decimal(12,2))) * promote_precision(cast(cs_list_price#69 as decimal(12,2)))), DecimalType(18,2)))#84 AS sales#86, count(1)#85 AS number_sales#87] +Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#66, isEmpty#67, count#68] +Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2))), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#69, count(1)#70] +Results [5]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#69 AS sales#71, count(1)#70 AS number_sales#72] (97) Filter [codegen id : 88] -Input [5]: [i_brand_id#74, i_class_id#75, i_category_id#76, sales#86, number_sales#87] -Condition : (isnotnull(sales#86) AND (cast(sales#86 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#64, [id=#65] as decimal(32,6)))) +Input [5]: [i_brand_id#60, i_class_id#61, i_category_id#62, sales#71, number_sales#72] +Condition : (isnotnull(sales#71) AND (cast(sales#71 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#51, [id=#52] as decimal(32,6)))) (98) Project [codegen id : 88] -Output [6]: [sales#86, number_sales#87, catalog AS channel#88, i_brand_id#74, i_class_id#75, i_category_id#76] -Input [5]: [i_brand_id#74, i_class_id#75, i_category_id#76, sales#86, number_sales#87] +Output [6]: [sales#71, number_sales#72, catalog AS channel#73, i_brand_id#60, i_class_id#61, i_category_id#62] +Input [5]: [i_brand_id#60, i_class_id#61, i_category_id#62, sales#71, number_sales#72] (99) Scan parquet default.web_sales -Output [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92] +Output [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#92), dynamicpruningexpression(ws_sold_date_sk#92 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#77), dynamicpruningexpression(ws_sold_date_sk#77 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (100) ColumnarToRow [codegen id : 89] -Input [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92] +Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] (101) Filter [codegen id : 89] -Input [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92] -Condition : isnotnull(ws_item_sk#89) +Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] +Condition : isnotnull(ws_item_sk#74) (102) Exchange -Input [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92] -Arguments: hashpartitioning(ws_item_sk#89, 5), ENSURE_REQUIREMENTS, [id=#93] +Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] +Arguments: hashpartitioning(ws_item_sk#74, 5), ENSURE_REQUIREMENTS, [plan_id=16] (103) Sort [codegen id : 90] -Input [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92] -Arguments: [ws_item_sk#89 ASC NULLS FIRST], false, 0 +Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] +Arguments: [ws_item_sk#74 ASC NULLS FIRST], false, 0 (104) ReusedExchange [Reuses operator id: 58] -Output [1]: [ss_item_sk#44] +Output [1]: [ss_item_sk#35] (105) Sort [codegen id : 108] -Input [1]: [ss_item_sk#44] -Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0 +Input [1]: [ss_item_sk#35] +Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0 (106) SortMergeJoin [codegen id : 131] -Left keys [1]: [ws_item_sk#89] -Right keys [1]: [ss_item_sk#44] +Left keys [1]: [ws_item_sk#74] +Right keys [1]: [ss_item_sk#35] Join condition: None (107) ReusedExchange [Reuses operator id: 147] -Output [1]: [d_date_sk#94] +Output [1]: [d_date_sk#78] (108) BroadcastHashJoin [codegen id : 131] -Left keys [1]: [ws_sold_date_sk#92] -Right keys [1]: [d_date_sk#94] +Left keys [1]: [ws_sold_date_sk#77] +Right keys [1]: [d_date_sk#78] Join condition: None (109) Project [codegen id : 131] -Output [3]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91] -Input [5]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92, d_date_sk#94] +Output [3]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76] +Input [5]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77, d_date_sk#78] (110) ReusedExchange [Reuses operator id: 72] -Output [4]: [i_item_sk#95, i_brand_id#96, i_class_id#97, i_category_id#98] +Output [4]: [i_item_sk#79, i_brand_id#80, i_class_id#81, i_category_id#82] (111) BroadcastHashJoin [codegen id : 131] -Left keys [1]: [ws_item_sk#89] -Right keys [1]: [i_item_sk#95] +Left keys [1]: [ws_item_sk#74] +Right keys [1]: [i_item_sk#79] Join condition: None (112) Project [codegen id : 131] -Output [5]: [ws_quantity#90, ws_list_price#91, i_brand_id#96, i_class_id#97, i_category_id#98] -Input [7]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, i_item_sk#95, i_brand_id#96, i_class_id#97, i_category_id#98] +Output [5]: [ws_quantity#75, ws_list_price#76, i_brand_id#80, i_class_id#81, i_category_id#82] +Input [7]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, i_item_sk#79, i_brand_id#80, i_class_id#81, i_category_id#82] (113) HashAggregate [codegen id : 131] -Input [5]: [ws_quantity#90, ws_list_price#91, i_brand_id#96, i_class_id#97, i_category_id#98] -Keys [3]: [i_brand_id#96, i_class_id#97, i_category_id#98] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#90 as decimal(12,2))) * promote_precision(cast(ws_list_price#91 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#99, isEmpty#100, count#101] -Results [6]: [i_brand_id#96, i_class_id#97, i_category_id#98, sum#102, isEmpty#103, count#104] +Input [5]: [ws_quantity#75, ws_list_price#76, i_brand_id#80, i_class_id#81, i_category_id#82] +Keys [3]: [i_brand_id#80, i_class_id#81, i_category_id#82] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] +Aggregate Attributes [3]: [sum#83, isEmpty#84, count#85] +Results [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#86, isEmpty#87, count#88] (114) Exchange -Input [6]: [i_brand_id#96, i_class_id#97, i_category_id#98, sum#102, isEmpty#103, count#104] -Arguments: hashpartitioning(i_brand_id#96, i_class_id#97, i_category_id#98, 5), ENSURE_REQUIREMENTS, [id=#105] +Input [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#86, isEmpty#87, count#88] +Arguments: hashpartitioning(i_brand_id#80, i_class_id#81, i_category_id#82, 5), ENSURE_REQUIREMENTS, [plan_id=17] (115) HashAggregate [codegen id : 132] -Input [6]: [i_brand_id#96, i_class_id#97, i_category_id#98, sum#102, isEmpty#103, count#104] -Keys [3]: [i_brand_id#96, i_class_id#97, i_category_id#98] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#90 as decimal(12,2))) * promote_precision(cast(ws_list_price#91 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#90 as decimal(12,2))) * promote_precision(cast(ws_list_price#91 as decimal(12,2)))), DecimalType(18,2)))#106, count(1)#107] -Results [5]: [i_brand_id#96, i_class_id#97, i_category_id#98, sum(CheckOverflow((promote_precision(cast(ws_quantity#90 as decimal(12,2))) * promote_precision(cast(ws_list_price#91 as decimal(12,2)))), DecimalType(18,2)))#106 AS sales#108, count(1)#107 AS number_sales#109] +Input [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#86, isEmpty#87, count#88] +Keys [3]: [i_brand_id#80, i_class_id#81, i_category_id#82] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2))), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2)))#89, count(1)#90] +Results [5]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2)))#89 AS sales#91, count(1)#90 AS number_sales#92] (116) Filter [codegen id : 132] -Input [5]: [i_brand_id#96, i_class_id#97, i_category_id#98, sales#108, number_sales#109] -Condition : (isnotnull(sales#108) AND (cast(sales#108 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#64, [id=#65] as decimal(32,6)))) +Input [5]: [i_brand_id#80, i_class_id#81, i_category_id#82, sales#91, number_sales#92] +Condition : (isnotnull(sales#91) AND (cast(sales#91 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#51, [id=#52] as decimal(32,6)))) (117) Project [codegen id : 132] -Output [6]: [sales#108, number_sales#109, web AS channel#110, i_brand_id#96, i_class_id#97, i_category_id#98] -Input [5]: [i_brand_id#96, i_class_id#97, i_category_id#98, sales#108, number_sales#109] +Output [6]: [sales#91, number_sales#92, web AS channel#93, i_brand_id#80, i_class_id#81, i_category_id#82] +Input [5]: [i_brand_id#80, i_class_id#81, i_category_id#82, sales#91, number_sales#92] (118) Union (119) Expand [codegen id : 133] -Input [6]: [sales#62, number_sales#63, channel#66, i_brand_id#48, i_class_id#49, i_category_id#50] -Arguments: [[sales#62, number_sales#63, channel#66, i_brand_id#48, i_class_id#49, i_category_id#50, 0], [sales#62, number_sales#63, channel#66, i_brand_id#48, i_class_id#49, null, 1], [sales#62, number_sales#63, channel#66, i_brand_id#48, null, null, 3], [sales#62, number_sales#63, channel#66, null, null, null, 7], [sales#62, number_sales#63, null, null, null, null, 15]], [sales#62, number_sales#63, channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, spark_grouping_id#115] +Input [6]: [sales#49, number_sales#50, channel#53, i_brand_id#38, i_class_id#39, i_category_id#40] +Arguments: [[sales#49, number_sales#50, channel#53, i_brand_id#38, i_class_id#39, i_category_id#40, 0], [sales#49, number_sales#50, channel#53, i_brand_id#38, i_class_id#39, null, 1], [sales#49, number_sales#50, channel#53, i_brand_id#38, null, null, 3], [sales#49, number_sales#50, channel#53, null, null, null, 7], [sales#49, number_sales#50, null, null, null, null, 15]], [sales#49, number_sales#50, channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98] (120) HashAggregate [codegen id : 133] -Input [7]: [sales#62, number_sales#63, channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, spark_grouping_id#115] -Keys [5]: [channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, spark_grouping_id#115] -Functions [2]: [partial_sum(sales#62), partial_sum(number_sales#63)] -Aggregate Attributes [3]: [sum#116, isEmpty#117, sum#118] -Results [8]: [channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, spark_grouping_id#115, sum#119, isEmpty#120, sum#121] +Input [7]: [sales#49, number_sales#50, channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98] +Keys [5]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98] +Functions [2]: [partial_sum(sales#49), partial_sum(number_sales#50)] +Aggregate Attributes [3]: [sum#99, isEmpty#100, sum#101] +Results [8]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98, sum#102, isEmpty#103, sum#104] (121) Exchange -Input [8]: [channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, spark_grouping_id#115, sum#119, isEmpty#120, sum#121] -Arguments: hashpartitioning(channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, spark_grouping_id#115, 5), ENSURE_REQUIREMENTS, [id=#122] +Input [8]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98, sum#102, isEmpty#103, sum#104] +Arguments: hashpartitioning(channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98, 5), ENSURE_REQUIREMENTS, [plan_id=18] (122) HashAggregate [codegen id : 134] -Input [8]: [channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, spark_grouping_id#115, sum#119, isEmpty#120, sum#121] -Keys [5]: [channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, spark_grouping_id#115] -Functions [2]: [sum(sales#62), sum(number_sales#63)] -Aggregate Attributes [2]: [sum(sales#62)#123, sum(number_sales#63)#124] -Results [6]: [channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, sum(sales#62)#123 AS sum(sales)#125, sum(number_sales#63)#124 AS sum(number_sales)#126] +Input [8]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98, sum#102, isEmpty#103, sum#104] +Keys [5]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98] +Functions [2]: [sum(sales#49), sum(number_sales#50)] +Aggregate Attributes [2]: [sum(sales#49)#105, sum(number_sales#50)#106] +Results [6]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, sum(sales#49)#105 AS sum(sales)#107, sum(number_sales#50)#106 AS sum(number_sales)#108] (123) TakeOrderedAndProject -Input [6]: [channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, sum(sales)#125, sum(number_sales)#126] -Arguments: 100, [channel#111 ASC NULLS FIRST, i_brand_id#112 ASC NULLS FIRST, i_class_id#113 ASC NULLS FIRST, i_category_id#114 ASC NULLS FIRST], [channel#111, i_brand_id#112, i_class_id#113, i_category_id#114, sum(sales)#125, sum(number_sales)#126] +Input [6]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, sum(sales)#107, sum(number_sales)#108] +Arguments: 100, [channel#94 ASC NULLS FIRST, i_brand_id#95 ASC NULLS FIRST, i_class_id#96 ASC NULLS FIRST, i_category_id#97 ASC NULLS FIRST], [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, sum(sales)#107, sum(number_sales)#108] ===== Subqueries ===== -Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquery#64, [id=#65] +Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquery#51, [id=#52] * HashAggregate (142) +- Exchange (141) +- * HashAggregate (140) @@ -702,96 +702,96 @@ Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquer (124) Scan parquet default.store_sales -Output [3]: [ss_quantity#127, ss_list_price#128, ss_sold_date_sk#129] +Output [3]: [ss_quantity#109, ss_list_price#110, ss_sold_date_sk#111] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#129), dynamicpruningexpression(ss_sold_date_sk#129 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(ss_sold_date_sk#111), dynamicpruningexpression(ss_sold_date_sk#111 IN dynamicpruning#12)] ReadSchema: struct (125) ColumnarToRow [codegen id : 2] -Input [3]: [ss_quantity#127, ss_list_price#128, ss_sold_date_sk#129] +Input [3]: [ss_quantity#109, ss_list_price#110, ss_sold_date_sk#111] (126) ReusedExchange [Reuses operator id: 152] -Output [1]: [d_date_sk#130] +Output [1]: [d_date_sk#112] (127) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#129] -Right keys [1]: [d_date_sk#130] +Left keys [1]: [ss_sold_date_sk#111] +Right keys [1]: [d_date_sk#112] Join condition: None (128) Project [codegen id : 2] -Output [2]: [ss_quantity#127 AS quantity#131, ss_list_price#128 AS list_price#132] -Input [4]: [ss_quantity#127, ss_list_price#128, ss_sold_date_sk#129, d_date_sk#130] +Output [2]: [ss_quantity#109 AS quantity#113, ss_list_price#110 AS list_price#114] +Input [4]: [ss_quantity#109, ss_list_price#110, ss_sold_date_sk#111, d_date_sk#112] (129) Scan parquet default.catalog_sales -Output [3]: [cs_quantity#133, cs_list_price#134, cs_sold_date_sk#135] +Output [3]: [cs_quantity#115, cs_list_price#116, cs_sold_date_sk#117] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#135), dynamicpruningexpression(cs_sold_date_sk#135 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(cs_sold_date_sk#117), dynamicpruningexpression(cs_sold_date_sk#117 IN dynamicpruning#12)] ReadSchema: struct (130) ColumnarToRow [codegen id : 4] -Input [3]: [cs_quantity#133, cs_list_price#134, cs_sold_date_sk#135] +Input [3]: [cs_quantity#115, cs_list_price#116, cs_sold_date_sk#117] (131) ReusedExchange [Reuses operator id: 152] -Output [1]: [d_date_sk#136] +Output [1]: [d_date_sk#118] (132) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_sold_date_sk#135] -Right keys [1]: [d_date_sk#136] +Left keys [1]: [cs_sold_date_sk#117] +Right keys [1]: [d_date_sk#118] Join condition: None (133) Project [codegen id : 4] -Output [2]: [cs_quantity#133 AS quantity#137, cs_list_price#134 AS list_price#138] -Input [4]: [cs_quantity#133, cs_list_price#134, cs_sold_date_sk#135, d_date_sk#136] +Output [2]: [cs_quantity#115 AS quantity#119, cs_list_price#116 AS list_price#120] +Input [4]: [cs_quantity#115, cs_list_price#116, cs_sold_date_sk#117, d_date_sk#118] (134) Scan parquet default.web_sales -Output [3]: [ws_quantity#139, ws_list_price#140, ws_sold_date_sk#141] +Output [3]: [ws_quantity#121, ws_list_price#122, ws_sold_date_sk#123] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#141), dynamicpruningexpression(ws_sold_date_sk#141 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(ws_sold_date_sk#123), dynamicpruningexpression(ws_sold_date_sk#123 IN dynamicpruning#12)] ReadSchema: struct (135) ColumnarToRow [codegen id : 6] -Input [3]: [ws_quantity#139, ws_list_price#140, ws_sold_date_sk#141] +Input [3]: [ws_quantity#121, ws_list_price#122, ws_sold_date_sk#123] (136) ReusedExchange [Reuses operator id: 152] -Output [1]: [d_date_sk#142] +Output [1]: [d_date_sk#124] (137) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#141] -Right keys [1]: [d_date_sk#142] +Left keys [1]: [ws_sold_date_sk#123] +Right keys [1]: [d_date_sk#124] Join condition: None (138) Project [codegen id : 6] -Output [2]: [ws_quantity#139 AS quantity#143, ws_list_price#140 AS list_price#144] -Input [4]: [ws_quantity#139, ws_list_price#140, ws_sold_date_sk#141, d_date_sk#142] +Output [2]: [ws_quantity#121 AS quantity#125, ws_list_price#122 AS list_price#126] +Input [4]: [ws_quantity#121, ws_list_price#122, ws_sold_date_sk#123, d_date_sk#124] (139) Union (140) HashAggregate [codegen id : 7] -Input [2]: [quantity#131, list_price#132] +Input [2]: [quantity#113, list_price#114] Keys: [] -Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#131 as decimal(12,2))) * promote_precision(cast(list_price#132 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#145, count#146] -Results [2]: [sum#147, count#148] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#113 as decimal(12,2))) * promote_precision(cast(list_price#114 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#127, count#128] +Results [2]: [sum#129, count#130] (141) Exchange -Input [2]: [sum#147, count#148] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#149] +Input [2]: [sum#129, count#130] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=19] (142) HashAggregate [codegen id : 8] -Input [2]: [sum#147, count#148] +Input [2]: [sum#129, count#130] Keys: [] -Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#131 as decimal(12,2))) * promote_precision(cast(list_price#132 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#131 as decimal(12,2))) * promote_precision(cast(list_price#132 as decimal(12,2)))), DecimalType(18,2)))#150] -Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#131 as decimal(12,2))) * promote_precision(cast(list_price#132 as decimal(12,2)))), DecimalType(18,2)))#150 AS average_sales#151] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#113 as decimal(12,2))) * promote_precision(cast(list_price#114 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#113 as decimal(12,2))) * promote_precision(cast(list_price#114 as decimal(12,2)))), DecimalType(18,2)))#131] +Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#113 as decimal(12,2))) * promote_precision(cast(list_price#114 as decimal(12,2)))), DecimalType(18,2)))#131 AS average_sales#132] -Subquery:2 Hosting operator id = 124 Hosting Expression = ss_sold_date_sk#129 IN dynamicpruning#13 +Subquery:2 Hosting operator id = 124 Hosting Expression = ss_sold_date_sk#111 IN dynamicpruning#12 -Subquery:3 Hosting operator id = 129 Hosting Expression = cs_sold_date_sk#135 IN dynamicpruning#13 +Subquery:3 Hosting operator id = 129 Hosting Expression = cs_sold_date_sk#117 IN dynamicpruning#12 -Subquery:4 Hosting operator id = 134 Hosting Expression = ws_sold_date_sk#141 IN dynamicpruning#13 +Subquery:4 Hosting operator id = 134 Hosting Expression = ws_sold_date_sk#123 IN dynamicpruning#12 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5 BroadcastExchange (147) @@ -802,28 +802,28 @@ BroadcastExchange (147) (143) Scan parquet default.date_dim -Output [3]: [d_date_sk#46, d_year#152, d_moy#153] +Output [3]: [d_date_sk#36, d_year#133, d_moy#134] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)] ReadSchema: struct (144) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#46, d_year#152, d_moy#153] +Input [3]: [d_date_sk#36, d_year#133, d_moy#134] (145) Filter [codegen id : 1] -Input [3]: [d_date_sk#46, d_year#152, d_moy#153] -Condition : ((((isnotnull(d_year#152) AND isnotnull(d_moy#153)) AND (d_year#152 = 2001)) AND (d_moy#153 = 11)) AND isnotnull(d_date_sk#46)) +Input [3]: [d_date_sk#36, d_year#133, d_moy#134] +Condition : ((((isnotnull(d_year#133) AND isnotnull(d_moy#134)) AND (d_year#133 = 2001)) AND (d_moy#134 = 11)) AND isnotnull(d_date_sk#36)) (146) Project [codegen id : 1] -Output [1]: [d_date_sk#46] -Input [3]: [d_date_sk#46, d_year#152, d_moy#153] +Output [1]: [d_date_sk#36] +Input [3]: [d_date_sk#36, d_year#133, d_moy#134] (147) BroadcastExchange -Input [1]: [d_date_sk#46] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#154] +Input [1]: [d_date_sk#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=20] -Subquery:6 Hosting operator id = 9 Hosting Expression = ss_sold_date_sk#12 IN dynamicpruning#13 +Subquery:6 Hosting operator id = 9 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12 BroadcastExchange (152) +- * Project (151) +- * Filter (150) @@ -832,37 +832,37 @@ BroadcastExchange (152) (148) Scan parquet default.date_dim -Output [2]: [d_date_sk#14, d_year#155] +Output [2]: [d_date_sk#13, d_year#135] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (149) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#14, d_year#155] +Input [2]: [d_date_sk#13, d_year#135] (150) Filter [codegen id : 1] -Input [2]: [d_date_sk#14, d_year#155] -Condition : (((isnotnull(d_year#155) AND (d_year#155 >= 1999)) AND (d_year#155 <= 2001)) AND isnotnull(d_date_sk#14)) +Input [2]: [d_date_sk#13, d_year#135] +Condition : (((isnotnull(d_year#135) AND (d_year#135 >= 1999)) AND (d_year#135 <= 2001)) AND isnotnull(d_date_sk#13)) (151) Project [codegen id : 1] -Output [1]: [d_date_sk#14] -Input [2]: [d_date_sk#14, d_year#155] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_year#135] (152) BroadcastExchange -Input [1]: [d_date_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#156] +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=21] -Subquery:7 Hosting operator id = 20 Hosting Expression = cs_sold_date_sk#21 IN dynamicpruning#13 +Subquery:7 Hosting operator id = 20 Hosting Expression = cs_sold_date_sk#19 IN dynamicpruning#12 -Subquery:8 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#36 IN dynamicpruning#13 +Subquery:8 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#29 IN dynamicpruning#12 -Subquery:9 Hosting operator id = 97 Hosting Expression = ReusedSubquery Subquery scalar-subquery#64, [id=#65] +Subquery:9 Hosting operator id = 97 Hosting Expression = ReusedSubquery Subquery scalar-subquery#51, [id=#52] -Subquery:10 Hosting operator id = 80 Hosting Expression = cs_sold_date_sk#70 IN dynamicpruning#5 +Subquery:10 Hosting operator id = 80 Hosting Expression = cs_sold_date_sk#57 IN dynamicpruning#5 -Subquery:11 Hosting operator id = 116 Hosting Expression = ReusedSubquery Subquery scalar-subquery#64, [id=#65] +Subquery:11 Hosting operator id = 116 Hosting Expression = ReusedSubquery Subquery scalar-subquery#51, [id=#52] -Subquery:12 Hosting operator id = 99 Hosting Expression = ws_sold_date_sk#92 IN dynamicpruning#5 +Subquery:12 Hosting operator id = 99 Hosting Expression = ws_sold_date_sk#77 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt index 300cfd7ccbb21..fd17c5d762e21 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt @@ -195,7 +195,7 @@ Condition : isnotnull(i_item_sk#19) (19) BroadcastExchange Input [4]: [i_item_sk#19, i_brand_id#20, i_class_id#21, i_category_id#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (20) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_item_sk#17] @@ -207,20 +207,20 @@ Output [4]: [cs_sold_date_sk#18, i_brand_id#20, i_class_id#21, i_category_id#22] Input [6]: [cs_item_sk#17, cs_sold_date_sk#18, i_item_sk#19, i_brand_id#20, i_class_id#21, i_category_id#22] (22) ReusedExchange [Reuses operator id: 134] -Output [1]: [d_date_sk#24] +Output [1]: [d_date_sk#23] (23) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_sold_date_sk#18] -Right keys [1]: [d_date_sk#24] +Right keys [1]: [d_date_sk#23] Join condition: None (24) Project [codegen id : 3] Output [3]: [i_brand_id#20, i_class_id#21, i_category_id#22] -Input [5]: [cs_sold_date_sk#18, i_brand_id#20, i_class_id#21, i_category_id#22, d_date_sk#24] +Input [5]: [cs_sold_date_sk#18, i_brand_id#20, i_class_id#21, i_category_id#22, d_date_sk#23] (25) BroadcastExchange Input [3]: [i_brand_id#20, i_class_id#21, i_category_id#22] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#25] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=2] (26) BroadcastHashJoin [codegen id : 4] Left keys [6]: [coalesce(i_brand_id#14, 0), isnull(i_brand_id#14), coalesce(i_class_id#15, 0), isnull(i_class_id#15), coalesce(i_category_id#16, 0), isnull(i_category_id#16)] @@ -229,7 +229,7 @@ Join condition: None (27) BroadcastExchange Input [4]: [i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (28) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_item_sk#10] @@ -241,355 +241,355 @@ Output [4]: [ss_sold_date_sk#11, i_brand_id#14, i_class_id#15, i_category_id#16] Input [6]: [ss_item_sk#10, ss_sold_date_sk#11, i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16] (30) ReusedExchange [Reuses operator id: 134] -Output [1]: [d_date_sk#27] +Output [1]: [d_date_sk#24] (31) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_sold_date_sk#11] -Right keys [1]: [d_date_sk#27] +Right keys [1]: [d_date_sk#24] Join condition: None (32) Project [codegen id : 6] -Output [3]: [i_brand_id#14 AS brand_id#28, i_class_id#15 AS class_id#29, i_category_id#16 AS category_id#30] -Input [5]: [ss_sold_date_sk#11, i_brand_id#14, i_class_id#15, i_category_id#16, d_date_sk#27] +Output [3]: [i_brand_id#14 AS brand_id#25, i_class_id#15 AS class_id#26, i_category_id#16 AS category_id#27] +Input [5]: [ss_sold_date_sk#11, i_brand_id#14, i_class_id#15, i_category_id#16, d_date_sk#24] (33) HashAggregate [codegen id : 6] -Input [3]: [brand_id#28, class_id#29, category_id#30] -Keys [3]: [brand_id#28, class_id#29, category_id#30] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Keys [3]: [brand_id#25, class_id#26, category_id#27] Functions: [] Aggregate Attributes: [] -Results [3]: [brand_id#28, class_id#29, category_id#30] +Results [3]: [brand_id#25, class_id#26, category_id#27] (34) Exchange -Input [3]: [brand_id#28, class_id#29, category_id#30] -Arguments: hashpartitioning(brand_id#28, class_id#29, category_id#30, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: hashpartitioning(brand_id#25, class_id#26, category_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=4] (35) HashAggregate [codegen id : 10] -Input [3]: [brand_id#28, class_id#29, category_id#30] -Keys [3]: [brand_id#28, class_id#29, category_id#30] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Keys [3]: [brand_id#25, class_id#26, category_id#27] Functions: [] Aggregate Attributes: [] -Results [3]: [brand_id#28, class_id#29, category_id#30] +Results [3]: [brand_id#25, class_id#26, category_id#27] (36) Scan parquet default.web_sales -Output [2]: [ws_item_sk#32, ws_sold_date_sk#33] +Output [2]: [ws_item_sk#28, ws_sold_date_sk#29] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#33), dynamicpruningexpression(ws_sold_date_sk#33 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(ws_sold_date_sk#29), dynamicpruningexpression(ws_sold_date_sk#29 IN dynamicpruning#12)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (37) ColumnarToRow [codegen id : 9] -Input [2]: [ws_item_sk#32, ws_sold_date_sk#33] +Input [2]: [ws_item_sk#28, ws_sold_date_sk#29] (38) Filter [codegen id : 9] -Input [2]: [ws_item_sk#32, ws_sold_date_sk#33] -Condition : isnotnull(ws_item_sk#32) +Input [2]: [ws_item_sk#28, ws_sold_date_sk#29] +Condition : isnotnull(ws_item_sk#28) (39) ReusedExchange [Reuses operator id: 19] -Output [4]: [i_item_sk#34, i_brand_id#35, i_class_id#36, i_category_id#37] +Output [4]: [i_item_sk#30, i_brand_id#31, i_class_id#32, i_category_id#33] (40) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_item_sk#32] -Right keys [1]: [i_item_sk#34] +Left keys [1]: [ws_item_sk#28] +Right keys [1]: [i_item_sk#30] Join condition: None (41) Project [codegen id : 9] -Output [4]: [ws_sold_date_sk#33, i_brand_id#35, i_class_id#36, i_category_id#37] -Input [6]: [ws_item_sk#32, ws_sold_date_sk#33, i_item_sk#34, i_brand_id#35, i_class_id#36, i_category_id#37] +Output [4]: [ws_sold_date_sk#29, i_brand_id#31, i_class_id#32, i_category_id#33] +Input [6]: [ws_item_sk#28, ws_sold_date_sk#29, i_item_sk#30, i_brand_id#31, i_class_id#32, i_category_id#33] (42) ReusedExchange [Reuses operator id: 134] -Output [1]: [d_date_sk#38] +Output [1]: [d_date_sk#34] (43) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_sold_date_sk#33] -Right keys [1]: [d_date_sk#38] +Left keys [1]: [ws_sold_date_sk#29] +Right keys [1]: [d_date_sk#34] Join condition: None (44) Project [codegen id : 9] -Output [3]: [i_brand_id#35, i_class_id#36, i_category_id#37] -Input [5]: [ws_sold_date_sk#33, i_brand_id#35, i_class_id#36, i_category_id#37, d_date_sk#38] +Output [3]: [i_brand_id#31, i_class_id#32, i_category_id#33] +Input [5]: [ws_sold_date_sk#29, i_brand_id#31, i_class_id#32, i_category_id#33, d_date_sk#34] (45) BroadcastExchange -Input [3]: [i_brand_id#35, i_class_id#36, i_category_id#37] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#39] +Input [3]: [i_brand_id#31, i_class_id#32, i_category_id#33] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=5] (46) BroadcastHashJoin [codegen id : 10] -Left keys [6]: [coalesce(brand_id#28, 0), isnull(brand_id#28), coalesce(class_id#29, 0), isnull(class_id#29), coalesce(category_id#30, 0), isnull(category_id#30)] -Right keys [6]: [coalesce(i_brand_id#35, 0), isnull(i_brand_id#35), coalesce(i_class_id#36, 0), isnull(i_class_id#36), coalesce(i_category_id#37, 0), isnull(i_category_id#37)] +Left keys [6]: [coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27)] +Right keys [6]: [coalesce(i_brand_id#31, 0), isnull(i_brand_id#31), coalesce(i_class_id#32, 0), isnull(i_class_id#32), coalesce(i_category_id#33, 0), isnull(i_category_id#33)] Join condition: None (47) BroadcastExchange -Input [3]: [brand_id#28, class_id#29, category_id#30] -Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#40] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=6] (48) BroadcastHashJoin [codegen id : 11] Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] -Right keys [3]: [brand_id#28, class_id#29, category_id#30] +Right keys [3]: [brand_id#25, class_id#26, category_id#27] Join condition: None (49) Project [codegen id : 11] -Output [1]: [i_item_sk#6 AS ss_item_sk#41] -Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#28, class_id#29, category_id#30] +Output [1]: [i_item_sk#6 AS ss_item_sk#35] +Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#25, class_id#26, category_id#27] (50) BroadcastExchange -Input [1]: [ss_item_sk#41] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42] +Input [1]: [ss_item_sk#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] (51) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [ss_item_sk#41] +Right keys [1]: [ss_item_sk#35] Join condition: None (52) Scan parquet default.item -Output [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] +Output [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (53) ColumnarToRow [codegen id : 23] -Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] +Input [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] (54) Filter [codegen id : 23] -Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] -Condition : isnotnull(i_item_sk#43) +Input [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] +Condition : isnotnull(i_item_sk#36) (55) ReusedExchange [Reuses operator id: 50] -Output [1]: [ss_item_sk#41] +Output [1]: [ss_item_sk#35] (56) BroadcastHashJoin [codegen id : 23] -Left keys [1]: [i_item_sk#43] -Right keys [1]: [ss_item_sk#41] +Left keys [1]: [i_item_sk#36] +Right keys [1]: [ss_item_sk#35] Join condition: None (57) BroadcastExchange -Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#47] +Input [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] (58) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#43] +Right keys [1]: [i_item_sk#36] Join condition: None (59) Project [codegen id : 25] -Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_class_id#45, i_category_id#46] -Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] +Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#37, i_class_id#38, i_category_id#39] +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] (60) ReusedExchange [Reuses operator id: 129] -Output [1]: [d_date_sk#48] +Output [1]: [d_date_sk#40] (61) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_sold_date_sk#4] -Right keys [1]: [d_date_sk#48] +Right keys [1]: [d_date_sk#40] Join condition: None (62) Project [codegen id : 25] -Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46] -Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_class_id#45, i_category_id#46, d_date_sk#48] +Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#37, i_class_id#38, i_category_id#39] +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#37, i_class_id#38, i_category_id#39, d_date_sk#40] (63) HashAggregate [codegen id : 25] -Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46] -Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#37, i_class_id#38, i_category_id#39] +Keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39] Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#49, isEmpty#50, count#51] -Results [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54] +Aggregate Attributes [3]: [sum#41, isEmpty#42, count#43] +Results [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46] (64) Exchange -Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54] -Arguments: hashpartitioning(i_brand_id#44, i_class_id#45, i_category_id#46, 5), ENSURE_REQUIREMENTS, [id=#55] +Input [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46] +Arguments: hashpartitioning(i_brand_id#37, i_class_id#38, i_category_id#39, 5), ENSURE_REQUIREMENTS, [plan_id=9] (65) HashAggregate [codegen id : 26] -Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54] -Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46] +Input [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46] +Keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39] Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56, count(1)#57] -Results [5]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56 AS sales#58, count(1)#57 AS number_sales#59] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47, count(1)#48] +Results [5]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47 AS sales#49, count(1)#48 AS number_sales#50] (66) Filter [codegen id : 26] -Input [5]: [i_brand_id#44, i_class_id#45, i_category_id#46, sales#58, number_sales#59] -Condition : (isnotnull(sales#58) AND (cast(sales#58 as decimal(32,6)) > cast(Subquery scalar-subquery#60, [id=#61] as decimal(32,6)))) +Input [5]: [i_brand_id#37, i_class_id#38, i_category_id#39, sales#49, number_sales#50] +Condition : (isnotnull(sales#49) AND (cast(sales#49 as decimal(32,6)) > cast(Subquery scalar-subquery#51, [id=#52] as decimal(32,6)))) (67) Project [codegen id : 26] -Output [6]: [sales#58, number_sales#59, store AS channel#62, i_brand_id#44, i_class_id#45, i_category_id#46] -Input [5]: [i_brand_id#44, i_class_id#45, i_category_id#46, sales#58, number_sales#59] +Output [6]: [sales#49, number_sales#50, store AS channel#53, i_brand_id#37, i_class_id#38, i_category_id#39] +Input [5]: [i_brand_id#37, i_class_id#38, i_category_id#39, sales#49, number_sales#50] (68) Scan parquet default.catalog_sales -Output [4]: [cs_item_sk#63, cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66] +Output [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#66), dynamicpruningexpression(cs_sold_date_sk#66 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#57), dynamicpruningexpression(cs_sold_date_sk#57 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (69) ColumnarToRow [codegen id : 51] -Input [4]: [cs_item_sk#63, cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66] +Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] (70) Filter [codegen id : 51] -Input [4]: [cs_item_sk#63, cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66] -Condition : isnotnull(cs_item_sk#63) +Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] +Condition : isnotnull(cs_item_sk#54) (71) ReusedExchange [Reuses operator id: 50] -Output [1]: [ss_item_sk#41] +Output [1]: [ss_item_sk#35] (72) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [cs_item_sk#63] -Right keys [1]: [ss_item_sk#41] +Left keys [1]: [cs_item_sk#54] +Right keys [1]: [ss_item_sk#35] Join condition: None (73) ReusedExchange [Reuses operator id: 57] -Output [4]: [i_item_sk#67, i_brand_id#68, i_class_id#69, i_category_id#70] +Output [4]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61] (74) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [cs_item_sk#63] -Right keys [1]: [i_item_sk#67] +Left keys [1]: [cs_item_sk#54] +Right keys [1]: [i_item_sk#58] Join condition: None (75) Project [codegen id : 51] -Output [6]: [cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66, i_brand_id#68, i_class_id#69, i_category_id#70] -Input [8]: [cs_item_sk#63, cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66, i_item_sk#67, i_brand_id#68, i_class_id#69, i_category_id#70] +Output [6]: [cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57, i_brand_id#59, i_class_id#60, i_category_id#61] +Input [8]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61] (76) ReusedExchange [Reuses operator id: 129] -Output [1]: [d_date_sk#71] +Output [1]: [d_date_sk#62] (77) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [cs_sold_date_sk#66] -Right keys [1]: [d_date_sk#71] +Left keys [1]: [cs_sold_date_sk#57] +Right keys [1]: [d_date_sk#62] Join condition: None (78) Project [codegen id : 51] -Output [5]: [cs_quantity#64, cs_list_price#65, i_brand_id#68, i_class_id#69, i_category_id#70] -Input [7]: [cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66, i_brand_id#68, i_class_id#69, i_category_id#70, d_date_sk#71] +Output [5]: [cs_quantity#55, cs_list_price#56, i_brand_id#59, i_class_id#60, i_category_id#61] +Input [7]: [cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57, i_brand_id#59, i_class_id#60, i_category_id#61, d_date_sk#62] (79) HashAggregate [codegen id : 51] -Input [5]: [cs_quantity#64, cs_list_price#65, i_brand_id#68, i_class_id#69, i_category_id#70] -Keys [3]: [i_brand_id#68, i_class_id#69, i_category_id#70] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#72, isEmpty#73, count#74] -Results [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77] +Input [5]: [cs_quantity#55, cs_list_price#56, i_brand_id#59, i_class_id#60, i_category_id#61] +Keys [3]: [i_brand_id#59, i_class_id#60, i_category_id#61] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] +Aggregate Attributes [3]: [sum#63, isEmpty#64, count#65] +Results [6]: [i_brand_id#59, i_class_id#60, i_category_id#61, sum#66, isEmpty#67, count#68] (80) Exchange -Input [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77] -Arguments: hashpartitioning(i_brand_id#68, i_class_id#69, i_category_id#70, 5), ENSURE_REQUIREMENTS, [id=#78] +Input [6]: [i_brand_id#59, i_class_id#60, i_category_id#61, sum#66, isEmpty#67, count#68] +Arguments: hashpartitioning(i_brand_id#59, i_class_id#60, i_category_id#61, 5), ENSURE_REQUIREMENTS, [plan_id=10] (81) HashAggregate [codegen id : 52] -Input [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77] -Keys [3]: [i_brand_id#68, i_class_id#69, i_category_id#70] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#79, count(1)#80] -Results [5]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#79 AS sales#81, count(1)#80 AS number_sales#82] +Input [6]: [i_brand_id#59, i_class_id#60, i_category_id#61, sum#66, isEmpty#67, count#68] +Keys [3]: [i_brand_id#59, i_class_id#60, i_category_id#61] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2))), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#69, count(1)#70] +Results [5]: [i_brand_id#59, i_class_id#60, i_category_id#61, sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#69 AS sales#71, count(1)#70 AS number_sales#72] (82) Filter [codegen id : 52] -Input [5]: [i_brand_id#68, i_class_id#69, i_category_id#70, sales#81, number_sales#82] -Condition : (isnotnull(sales#81) AND (cast(sales#81 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#60, [id=#61] as decimal(32,6)))) +Input [5]: [i_brand_id#59, i_class_id#60, i_category_id#61, sales#71, number_sales#72] +Condition : (isnotnull(sales#71) AND (cast(sales#71 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#51, [id=#52] as decimal(32,6)))) (83) Project [codegen id : 52] -Output [6]: [sales#81, number_sales#82, catalog AS channel#83, i_brand_id#68, i_class_id#69, i_category_id#70] -Input [5]: [i_brand_id#68, i_class_id#69, i_category_id#70, sales#81, number_sales#82] +Output [6]: [sales#71, number_sales#72, catalog AS channel#73, i_brand_id#59, i_class_id#60, i_category_id#61] +Input [5]: [i_brand_id#59, i_class_id#60, i_category_id#61, sales#71, number_sales#72] (84) Scan parquet default.web_sales -Output [4]: [ws_item_sk#84, ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87] +Output [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#87), dynamicpruningexpression(ws_sold_date_sk#87 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#77), dynamicpruningexpression(ws_sold_date_sk#77 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (85) ColumnarToRow [codegen id : 77] -Input [4]: [ws_item_sk#84, ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87] +Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] (86) Filter [codegen id : 77] -Input [4]: [ws_item_sk#84, ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87] -Condition : isnotnull(ws_item_sk#84) +Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] +Condition : isnotnull(ws_item_sk#74) (87) ReusedExchange [Reuses operator id: 50] -Output [1]: [ss_item_sk#41] +Output [1]: [ss_item_sk#35] (88) BroadcastHashJoin [codegen id : 77] -Left keys [1]: [ws_item_sk#84] -Right keys [1]: [ss_item_sk#41] +Left keys [1]: [ws_item_sk#74] +Right keys [1]: [ss_item_sk#35] Join condition: None (89) ReusedExchange [Reuses operator id: 57] -Output [4]: [i_item_sk#88, i_brand_id#89, i_class_id#90, i_category_id#91] +Output [4]: [i_item_sk#78, i_brand_id#79, i_class_id#80, i_category_id#81] (90) BroadcastHashJoin [codegen id : 77] -Left keys [1]: [ws_item_sk#84] -Right keys [1]: [i_item_sk#88] +Left keys [1]: [ws_item_sk#74] +Right keys [1]: [i_item_sk#78] Join condition: None (91) Project [codegen id : 77] -Output [6]: [ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87, i_brand_id#89, i_class_id#90, i_category_id#91] -Input [8]: [ws_item_sk#84, ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87, i_item_sk#88, i_brand_id#89, i_class_id#90, i_category_id#91] +Output [6]: [ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77, i_brand_id#79, i_class_id#80, i_category_id#81] +Input [8]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77, i_item_sk#78, i_brand_id#79, i_class_id#80, i_category_id#81] (92) ReusedExchange [Reuses operator id: 129] -Output [1]: [d_date_sk#92] +Output [1]: [d_date_sk#82] (93) BroadcastHashJoin [codegen id : 77] -Left keys [1]: [ws_sold_date_sk#87] -Right keys [1]: [d_date_sk#92] +Left keys [1]: [ws_sold_date_sk#77] +Right keys [1]: [d_date_sk#82] Join condition: None (94) Project [codegen id : 77] -Output [5]: [ws_quantity#85, ws_list_price#86, i_brand_id#89, i_class_id#90, i_category_id#91] -Input [7]: [ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87, i_brand_id#89, i_class_id#90, i_category_id#91, d_date_sk#92] +Output [5]: [ws_quantity#75, ws_list_price#76, i_brand_id#79, i_class_id#80, i_category_id#81] +Input [7]: [ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77, i_brand_id#79, i_class_id#80, i_category_id#81, d_date_sk#82] (95) HashAggregate [codegen id : 77] -Input [5]: [ws_quantity#85, ws_list_price#86, i_brand_id#89, i_class_id#90, i_category_id#91] -Keys [3]: [i_brand_id#89, i_class_id#90, i_category_id#91] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#93, isEmpty#94, count#95] -Results [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98] +Input [5]: [ws_quantity#75, ws_list_price#76, i_brand_id#79, i_class_id#80, i_category_id#81] +Keys [3]: [i_brand_id#79, i_class_id#80, i_category_id#81] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] +Aggregate Attributes [3]: [sum#83, isEmpty#84, count#85] +Results [6]: [i_brand_id#79, i_class_id#80, i_category_id#81, sum#86, isEmpty#87, count#88] (96) Exchange -Input [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98] -Arguments: hashpartitioning(i_brand_id#89, i_class_id#90, i_category_id#91, 5), ENSURE_REQUIREMENTS, [id=#99] +Input [6]: [i_brand_id#79, i_class_id#80, i_category_id#81, sum#86, isEmpty#87, count#88] +Arguments: hashpartitioning(i_brand_id#79, i_class_id#80, i_category_id#81, 5), ENSURE_REQUIREMENTS, [plan_id=11] (97) HashAggregate [codegen id : 78] -Input [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98] -Keys [3]: [i_brand_id#89, i_class_id#90, i_category_id#91] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2)))#100, count(1)#101] -Results [5]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2)))#100 AS sales#102, count(1)#101 AS number_sales#103] +Input [6]: [i_brand_id#79, i_class_id#80, i_category_id#81, sum#86, isEmpty#87, count#88] +Keys [3]: [i_brand_id#79, i_class_id#80, i_category_id#81] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2))), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2)))#89, count(1)#90] +Results [5]: [i_brand_id#79, i_class_id#80, i_category_id#81, sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2)))#89 AS sales#91, count(1)#90 AS number_sales#92] (98) Filter [codegen id : 78] -Input [5]: [i_brand_id#89, i_class_id#90, i_category_id#91, sales#102, number_sales#103] -Condition : (isnotnull(sales#102) AND (cast(sales#102 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#60, [id=#61] as decimal(32,6)))) +Input [5]: [i_brand_id#79, i_class_id#80, i_category_id#81, sales#91, number_sales#92] +Condition : (isnotnull(sales#91) AND (cast(sales#91 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#51, [id=#52] as decimal(32,6)))) (99) Project [codegen id : 78] -Output [6]: [sales#102, number_sales#103, web AS channel#104, i_brand_id#89, i_class_id#90, i_category_id#91] -Input [5]: [i_brand_id#89, i_class_id#90, i_category_id#91, sales#102, number_sales#103] +Output [6]: [sales#91, number_sales#92, web AS channel#93, i_brand_id#79, i_class_id#80, i_category_id#81] +Input [5]: [i_brand_id#79, i_class_id#80, i_category_id#81, sales#91, number_sales#92] (100) Union (101) Expand [codegen id : 79] -Input [6]: [sales#58, number_sales#59, channel#62, i_brand_id#44, i_class_id#45, i_category_id#46] -Arguments: [[sales#58, number_sales#59, channel#62, i_brand_id#44, i_class_id#45, i_category_id#46, 0], [sales#58, number_sales#59, channel#62, i_brand_id#44, i_class_id#45, null, 1], [sales#58, number_sales#59, channel#62, i_brand_id#44, null, null, 3], [sales#58, number_sales#59, channel#62, null, null, null, 7], [sales#58, number_sales#59, null, null, null, null, 15]], [sales#58, number_sales#59, channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, spark_grouping_id#109] +Input [6]: [sales#49, number_sales#50, channel#53, i_brand_id#37, i_class_id#38, i_category_id#39] +Arguments: [[sales#49, number_sales#50, channel#53, i_brand_id#37, i_class_id#38, i_category_id#39, 0], [sales#49, number_sales#50, channel#53, i_brand_id#37, i_class_id#38, null, 1], [sales#49, number_sales#50, channel#53, i_brand_id#37, null, null, 3], [sales#49, number_sales#50, channel#53, null, null, null, 7], [sales#49, number_sales#50, null, null, null, null, 15]], [sales#49, number_sales#50, channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98] (102) HashAggregate [codegen id : 79] -Input [7]: [sales#58, number_sales#59, channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, spark_grouping_id#109] -Keys [5]: [channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, spark_grouping_id#109] -Functions [2]: [partial_sum(sales#58), partial_sum(number_sales#59)] -Aggregate Attributes [3]: [sum#110, isEmpty#111, sum#112] -Results [8]: [channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, spark_grouping_id#109, sum#113, isEmpty#114, sum#115] +Input [7]: [sales#49, number_sales#50, channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98] +Keys [5]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98] +Functions [2]: [partial_sum(sales#49), partial_sum(number_sales#50)] +Aggregate Attributes [3]: [sum#99, isEmpty#100, sum#101] +Results [8]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98, sum#102, isEmpty#103, sum#104] (103) Exchange -Input [8]: [channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, spark_grouping_id#109, sum#113, isEmpty#114, sum#115] -Arguments: hashpartitioning(channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, spark_grouping_id#109, 5), ENSURE_REQUIREMENTS, [id=#116] +Input [8]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98, sum#102, isEmpty#103, sum#104] +Arguments: hashpartitioning(channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98, 5), ENSURE_REQUIREMENTS, [plan_id=12] (104) HashAggregate [codegen id : 80] -Input [8]: [channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, spark_grouping_id#109, sum#113, isEmpty#114, sum#115] -Keys [5]: [channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, spark_grouping_id#109] -Functions [2]: [sum(sales#58), sum(number_sales#59)] -Aggregate Attributes [2]: [sum(sales#58)#117, sum(number_sales#59)#118] -Results [6]: [channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, sum(sales#58)#117 AS sum(sales)#119, sum(number_sales#59)#118 AS sum(number_sales)#120] +Input [8]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98, sum#102, isEmpty#103, sum#104] +Keys [5]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, spark_grouping_id#98] +Functions [2]: [sum(sales#49), sum(number_sales#50)] +Aggregate Attributes [2]: [sum(sales#49)#105, sum(number_sales#50)#106] +Results [6]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, sum(sales#49)#105 AS sum(sales)#107, sum(number_sales#50)#106 AS sum(number_sales)#108] (105) TakeOrderedAndProject -Input [6]: [channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, sum(sales)#119, sum(number_sales)#120] -Arguments: 100, [channel#105 ASC NULLS FIRST, i_brand_id#106 ASC NULLS FIRST, i_class_id#107 ASC NULLS FIRST, i_category_id#108 ASC NULLS FIRST], [channel#105, i_brand_id#106, i_class_id#107, i_category_id#108, sum(sales)#119, sum(number_sales)#120] +Input [6]: [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, sum(sales)#107, sum(number_sales)#108] +Arguments: 100, [channel#94 ASC NULLS FIRST, i_brand_id#95 ASC NULLS FIRST, i_class_id#96 ASC NULLS FIRST, i_category_id#97 ASC NULLS FIRST], [channel#94, i_brand_id#95, i_class_id#96, i_category_id#97, sum(sales)#107, sum(number_sales)#108] ===== Subqueries ===== -Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquery#60, [id=#61] +Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquery#51, [id=#52] * HashAggregate (124) +- Exchange (123) +- * HashAggregate (122) @@ -612,96 +612,96 @@ Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquer (106) Scan parquet default.store_sales -Output [3]: [ss_quantity#121, ss_list_price#122, ss_sold_date_sk#123] +Output [3]: [ss_quantity#109, ss_list_price#110, ss_sold_date_sk#111] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#123), dynamicpruningexpression(ss_sold_date_sk#123 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(ss_sold_date_sk#111), dynamicpruningexpression(ss_sold_date_sk#111 IN dynamicpruning#12)] ReadSchema: struct (107) ColumnarToRow [codegen id : 2] -Input [3]: [ss_quantity#121, ss_list_price#122, ss_sold_date_sk#123] +Input [3]: [ss_quantity#109, ss_list_price#110, ss_sold_date_sk#111] (108) ReusedExchange [Reuses operator id: 134] -Output [1]: [d_date_sk#124] +Output [1]: [d_date_sk#112] (109) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#123] -Right keys [1]: [d_date_sk#124] +Left keys [1]: [ss_sold_date_sk#111] +Right keys [1]: [d_date_sk#112] Join condition: None (110) Project [codegen id : 2] -Output [2]: [ss_quantity#121 AS quantity#125, ss_list_price#122 AS list_price#126] -Input [4]: [ss_quantity#121, ss_list_price#122, ss_sold_date_sk#123, d_date_sk#124] +Output [2]: [ss_quantity#109 AS quantity#113, ss_list_price#110 AS list_price#114] +Input [4]: [ss_quantity#109, ss_list_price#110, ss_sold_date_sk#111, d_date_sk#112] (111) Scan parquet default.catalog_sales -Output [3]: [cs_quantity#127, cs_list_price#128, cs_sold_date_sk#129] +Output [3]: [cs_quantity#115, cs_list_price#116, cs_sold_date_sk#117] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#129), dynamicpruningexpression(cs_sold_date_sk#129 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(cs_sold_date_sk#117), dynamicpruningexpression(cs_sold_date_sk#117 IN dynamicpruning#12)] ReadSchema: struct (112) ColumnarToRow [codegen id : 4] -Input [3]: [cs_quantity#127, cs_list_price#128, cs_sold_date_sk#129] +Input [3]: [cs_quantity#115, cs_list_price#116, cs_sold_date_sk#117] (113) ReusedExchange [Reuses operator id: 134] -Output [1]: [d_date_sk#130] +Output [1]: [d_date_sk#118] (114) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_sold_date_sk#129] -Right keys [1]: [d_date_sk#130] +Left keys [1]: [cs_sold_date_sk#117] +Right keys [1]: [d_date_sk#118] Join condition: None (115) Project [codegen id : 4] -Output [2]: [cs_quantity#127 AS quantity#131, cs_list_price#128 AS list_price#132] -Input [4]: [cs_quantity#127, cs_list_price#128, cs_sold_date_sk#129, d_date_sk#130] +Output [2]: [cs_quantity#115 AS quantity#119, cs_list_price#116 AS list_price#120] +Input [4]: [cs_quantity#115, cs_list_price#116, cs_sold_date_sk#117, d_date_sk#118] (116) Scan parquet default.web_sales -Output [3]: [ws_quantity#133, ws_list_price#134, ws_sold_date_sk#135] +Output [3]: [ws_quantity#121, ws_list_price#122, ws_sold_date_sk#123] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#135), dynamicpruningexpression(ws_sold_date_sk#135 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(ws_sold_date_sk#123), dynamicpruningexpression(ws_sold_date_sk#123 IN dynamicpruning#12)] ReadSchema: struct (117) ColumnarToRow [codegen id : 6] -Input [3]: [ws_quantity#133, ws_list_price#134, ws_sold_date_sk#135] +Input [3]: [ws_quantity#121, ws_list_price#122, ws_sold_date_sk#123] (118) ReusedExchange [Reuses operator id: 134] -Output [1]: [d_date_sk#136] +Output [1]: [d_date_sk#124] (119) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#135] -Right keys [1]: [d_date_sk#136] +Left keys [1]: [ws_sold_date_sk#123] +Right keys [1]: [d_date_sk#124] Join condition: None (120) Project [codegen id : 6] -Output [2]: [ws_quantity#133 AS quantity#137, ws_list_price#134 AS list_price#138] -Input [4]: [ws_quantity#133, ws_list_price#134, ws_sold_date_sk#135, d_date_sk#136] +Output [2]: [ws_quantity#121 AS quantity#125, ws_list_price#122 AS list_price#126] +Input [4]: [ws_quantity#121, ws_list_price#122, ws_sold_date_sk#123, d_date_sk#124] (121) Union (122) HashAggregate [codegen id : 7] -Input [2]: [quantity#125, list_price#126] +Input [2]: [quantity#113, list_price#114] Keys: [] -Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#139, count#140] -Results [2]: [sum#141, count#142] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#113 as decimal(12,2))) * promote_precision(cast(list_price#114 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#127, count#128] +Results [2]: [sum#129, count#130] (123) Exchange -Input [2]: [sum#141, count#142] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#143] +Input [2]: [sum#129, count#130] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=13] (124) HashAggregate [codegen id : 8] -Input [2]: [sum#141, count#142] +Input [2]: [sum#129, count#130] Keys: [] -Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2)))#144] -Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#125 as decimal(12,2))) * promote_precision(cast(list_price#126 as decimal(12,2)))), DecimalType(18,2)))#144 AS average_sales#145] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#113 as decimal(12,2))) * promote_precision(cast(list_price#114 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#113 as decimal(12,2))) * promote_precision(cast(list_price#114 as decimal(12,2)))), DecimalType(18,2)))#131] +Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#113 as decimal(12,2))) * promote_precision(cast(list_price#114 as decimal(12,2)))), DecimalType(18,2)))#131 AS average_sales#132] -Subquery:2 Hosting operator id = 106 Hosting Expression = ss_sold_date_sk#123 IN dynamicpruning#12 +Subquery:2 Hosting operator id = 106 Hosting Expression = ss_sold_date_sk#111 IN dynamicpruning#12 -Subquery:3 Hosting operator id = 111 Hosting Expression = cs_sold_date_sk#129 IN dynamicpruning#12 +Subquery:3 Hosting operator id = 111 Hosting Expression = cs_sold_date_sk#117 IN dynamicpruning#12 -Subquery:4 Hosting operator id = 116 Hosting Expression = ws_sold_date_sk#135 IN dynamicpruning#12 +Subquery:4 Hosting operator id = 116 Hosting Expression = ws_sold_date_sk#123 IN dynamicpruning#12 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5 BroadcastExchange (129) @@ -712,26 +712,26 @@ BroadcastExchange (129) (125) Scan parquet default.date_dim -Output [3]: [d_date_sk#48, d_year#146, d_moy#147] +Output [3]: [d_date_sk#40, d_year#133, d_moy#134] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)] ReadSchema: struct (126) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#48, d_year#146, d_moy#147] +Input [3]: [d_date_sk#40, d_year#133, d_moy#134] (127) Filter [codegen id : 1] -Input [3]: [d_date_sk#48, d_year#146, d_moy#147] -Condition : ((((isnotnull(d_year#146) AND isnotnull(d_moy#147)) AND (d_year#146 = 2001)) AND (d_moy#147 = 11)) AND isnotnull(d_date_sk#48)) +Input [3]: [d_date_sk#40, d_year#133, d_moy#134] +Condition : ((((isnotnull(d_year#133) AND isnotnull(d_moy#134)) AND (d_year#133 = 2001)) AND (d_moy#134 = 11)) AND isnotnull(d_date_sk#40)) (128) Project [codegen id : 1] -Output [1]: [d_date_sk#48] -Input [3]: [d_date_sk#48, d_year#146, d_moy#147] +Output [1]: [d_date_sk#40] +Input [3]: [d_date_sk#40, d_year#133, d_moy#134] (129) BroadcastExchange -Input [1]: [d_date_sk#48] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#148] +Input [1]: [d_date_sk#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14] Subquery:6 Hosting operator id = 7 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12 BroadcastExchange (134) @@ -742,37 +742,37 @@ BroadcastExchange (134) (130) Scan parquet default.date_dim -Output [2]: [d_date_sk#27, d_year#149] +Output [2]: [d_date_sk#24, d_year#135] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (131) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#27, d_year#149] +Input [2]: [d_date_sk#24, d_year#135] (132) Filter [codegen id : 1] -Input [2]: [d_date_sk#27, d_year#149] -Condition : (((isnotnull(d_year#149) AND (d_year#149 >= 1999)) AND (d_year#149 <= 2001)) AND isnotnull(d_date_sk#27)) +Input [2]: [d_date_sk#24, d_year#135] +Condition : (((isnotnull(d_year#135) AND (d_year#135 >= 1999)) AND (d_year#135 <= 2001)) AND isnotnull(d_date_sk#24)) (133) Project [codegen id : 1] -Output [1]: [d_date_sk#27] -Input [2]: [d_date_sk#27, d_year#149] +Output [1]: [d_date_sk#24] +Input [2]: [d_date_sk#24, d_year#135] (134) BroadcastExchange -Input [1]: [d_date_sk#27] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#150] +Input [1]: [d_date_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=15] Subquery:7 Hosting operator id = 13 Hosting Expression = cs_sold_date_sk#18 IN dynamicpruning#12 -Subquery:8 Hosting operator id = 36 Hosting Expression = ws_sold_date_sk#33 IN dynamicpruning#12 +Subquery:8 Hosting operator id = 36 Hosting Expression = ws_sold_date_sk#29 IN dynamicpruning#12 -Subquery:9 Hosting operator id = 82 Hosting Expression = ReusedSubquery Subquery scalar-subquery#60, [id=#61] +Subquery:9 Hosting operator id = 82 Hosting Expression = ReusedSubquery Subquery scalar-subquery#51, [id=#52] -Subquery:10 Hosting operator id = 68 Hosting Expression = cs_sold_date_sk#66 IN dynamicpruning#5 +Subquery:10 Hosting operator id = 68 Hosting Expression = cs_sold_date_sk#57 IN dynamicpruning#5 -Subquery:11 Hosting operator id = 98 Hosting Expression = ReusedSubquery Subquery scalar-subquery#60, [id=#61] +Subquery:11 Hosting operator id = 98 Hosting Expression = ReusedSubquery Subquery scalar-subquery#51, [id=#52] -Subquery:12 Hosting operator id = 84 Hosting Expression = ws_sold_date_sk#87 IN dynamicpruning#5 +Subquery:12 Hosting operator id = 84 Hosting Expression = ws_sold_date_sk#77 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt index 3f0acc0ea73be..20e5edb303893 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt @@ -117,437 +117,437 @@ Condition : isnotnull(ss_item_sk#1) (4) Exchange Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#6] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (6) Scan parquet default.item -Output [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10] +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] ReadSchema: struct (7) ColumnarToRow [codegen id : 19] -Input [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] (8) Filter [codegen id : 19] -Input [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10] -Condition : ((isnotnull(i_brand_id#8) AND isnotnull(i_class_id#9)) AND isnotnull(i_category_id#10)) +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : ((isnotnull(i_brand_id#7) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) (9) Scan parquet default.store_sales -Output [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Output [2]: [ss_item_sk#10, ss_sold_date_sk#11] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#12), dynamicpruningexpression(ss_sold_date_sk#12 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(ss_sold_date_sk#11), dynamicpruningexpression(ss_sold_date_sk#11 IN dynamicpruning#12)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (10) ColumnarToRow [codegen id : 11] -Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Input [2]: [ss_item_sk#10, ss_sold_date_sk#11] (11) Filter [codegen id : 11] -Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] -Condition : isnotnull(ss_item_sk#11) +Input [2]: [ss_item_sk#10, ss_sold_date_sk#11] +Condition : isnotnull(ss_item_sk#10) (12) ReusedExchange [Reuses operator id: 132] -Output [1]: [d_date_sk#14] +Output [1]: [d_date_sk#13] (13) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ss_sold_date_sk#12] -Right keys [1]: [d_date_sk#14] +Left keys [1]: [ss_sold_date_sk#11] +Right keys [1]: [d_date_sk#13] Join condition: None (14) Project [codegen id : 11] -Output [1]: [ss_item_sk#11] -Input [3]: [ss_item_sk#11, ss_sold_date_sk#12, d_date_sk#14] +Output [1]: [ss_item_sk#10] +Input [3]: [ss_item_sk#10, ss_sold_date_sk#11, d_date_sk#13] (15) Scan parquet default.item -Output [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] +Output [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] ReadSchema: struct (16) ColumnarToRow [codegen id : 4] -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] (17) Filter [codegen id : 4] -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] -Condition : (((isnotnull(i_item_sk#15) AND isnotnull(i_brand_id#16)) AND isnotnull(i_class_id#17)) AND isnotnull(i_category_id#18)) +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] +Condition : (((isnotnull(i_item_sk#14) AND isnotnull(i_brand_id#15)) AND isnotnull(i_class_id#16)) AND isnotnull(i_category_id#17)) (18) Exchange -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] -Arguments: hashpartitioning(coalesce(i_brand_id#16, 0), isnull(i_brand_id#16), coalesce(i_class_id#17, 0), isnull(i_class_id#17), coalesce(i_category_id#18, 0), isnull(i_category_id#18), 5), ENSURE_REQUIREMENTS, [id=#19] +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] +Arguments: hashpartitioning(coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17), 5), ENSURE_REQUIREMENTS, [plan_id=2] (19) Sort [codegen id : 5] -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] -Arguments: [coalesce(i_brand_id#16, 0) ASC NULLS FIRST, isnull(i_brand_id#16) ASC NULLS FIRST, coalesce(i_class_id#17, 0) ASC NULLS FIRST, isnull(i_class_id#17) ASC NULLS FIRST, coalesce(i_category_id#18, 0) ASC NULLS FIRST, isnull(i_category_id#18) ASC NULLS FIRST], false, 0 +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] +Arguments: [coalesce(i_brand_id#15, 0) ASC NULLS FIRST, isnull(i_brand_id#15) ASC NULLS FIRST, coalesce(i_class_id#16, 0) ASC NULLS FIRST, isnull(i_class_id#16) ASC NULLS FIRST, coalesce(i_category_id#17, 0) ASC NULLS FIRST, isnull(i_category_id#17) ASC NULLS FIRST], false, 0 (20) Scan parquet default.catalog_sales -Output [2]: [cs_item_sk#20, cs_sold_date_sk#21] +Output [2]: [cs_item_sk#18, cs_sold_date_sk#19] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#21), dynamicpruningexpression(cs_sold_date_sk#21 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(cs_sold_date_sk#19), dynamicpruningexpression(cs_sold_date_sk#19 IN dynamicpruning#12)] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 8] -Input [2]: [cs_item_sk#20, cs_sold_date_sk#21] +Input [2]: [cs_item_sk#18, cs_sold_date_sk#19] (22) Filter [codegen id : 8] -Input [2]: [cs_item_sk#20, cs_sold_date_sk#21] -Condition : isnotnull(cs_item_sk#20) +Input [2]: [cs_item_sk#18, cs_sold_date_sk#19] +Condition : isnotnull(cs_item_sk#18) (23) ReusedExchange [Reuses operator id: 132] -Output [1]: [d_date_sk#22] +Output [1]: [d_date_sk#20] (24) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_sold_date_sk#21] -Right keys [1]: [d_date_sk#22] +Left keys [1]: [cs_sold_date_sk#19] +Right keys [1]: [d_date_sk#20] Join condition: None (25) Project [codegen id : 8] -Output [1]: [cs_item_sk#20] -Input [3]: [cs_item_sk#20, cs_sold_date_sk#21, d_date_sk#22] +Output [1]: [cs_item_sk#18] +Input [3]: [cs_item_sk#18, cs_sold_date_sk#19, d_date_sk#20] (26) Scan parquet default.item -Output [4]: [i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] +Output [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 7] -Input [4]: [i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] +Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] (28) Filter [codegen id : 7] -Input [4]: [i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] -Condition : isnotnull(i_item_sk#23) +Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] +Condition : isnotnull(i_item_sk#21) (29) BroadcastExchange -Input [4]: [i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] +Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (30) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_item_sk#20] -Right keys [1]: [i_item_sk#23] +Left keys [1]: [cs_item_sk#18] +Right keys [1]: [i_item_sk#21] Join condition: None (31) Project [codegen id : 8] -Output [3]: [i_brand_id#24, i_class_id#25, i_category_id#26] -Input [5]: [cs_item_sk#20, i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] +Output [3]: [i_brand_id#22, i_class_id#23, i_category_id#24] +Input [5]: [cs_item_sk#18, i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] (32) Exchange -Input [3]: [i_brand_id#24, i_class_id#25, i_category_id#26] -Arguments: hashpartitioning(coalesce(i_brand_id#24, 0), isnull(i_brand_id#24), coalesce(i_class_id#25, 0), isnull(i_class_id#25), coalesce(i_category_id#26, 0), isnull(i_category_id#26), 5), ENSURE_REQUIREMENTS, [id=#28] +Input [3]: [i_brand_id#22, i_class_id#23, i_category_id#24] +Arguments: hashpartitioning(coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24), 5), ENSURE_REQUIREMENTS, [plan_id=4] (33) Sort [codegen id : 9] -Input [3]: [i_brand_id#24, i_class_id#25, i_category_id#26] -Arguments: [coalesce(i_brand_id#24, 0) ASC NULLS FIRST, isnull(i_brand_id#24) ASC NULLS FIRST, coalesce(i_class_id#25, 0) ASC NULLS FIRST, isnull(i_class_id#25) ASC NULLS FIRST, coalesce(i_category_id#26, 0) ASC NULLS FIRST, isnull(i_category_id#26) ASC NULLS FIRST], false, 0 +Input [3]: [i_brand_id#22, i_class_id#23, i_category_id#24] +Arguments: [coalesce(i_brand_id#22, 0) ASC NULLS FIRST, isnull(i_brand_id#22) ASC NULLS FIRST, coalesce(i_class_id#23, 0) ASC NULLS FIRST, isnull(i_class_id#23) ASC NULLS FIRST, coalesce(i_category_id#24, 0) ASC NULLS FIRST, isnull(i_category_id#24) ASC NULLS FIRST], false, 0 (34) SortMergeJoin [codegen id : 10] -Left keys [6]: [coalesce(i_brand_id#16, 0), isnull(i_brand_id#16), coalesce(i_class_id#17, 0), isnull(i_class_id#17), coalesce(i_category_id#18, 0), isnull(i_category_id#18)] -Right keys [6]: [coalesce(i_brand_id#24, 0), isnull(i_brand_id#24), coalesce(i_class_id#25, 0), isnull(i_class_id#25), coalesce(i_category_id#26, 0), isnull(i_category_id#26)] +Left keys [6]: [coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17)] +Right keys [6]: [coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24)] Join condition: None (35) BroadcastExchange -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#29] +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (36) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ss_item_sk#11] -Right keys [1]: [i_item_sk#15] +Left keys [1]: [ss_item_sk#10] +Right keys [1]: [i_item_sk#14] Join condition: None (37) Project [codegen id : 11] -Output [3]: [i_brand_id#16 AS brand_id#30, i_class_id#17 AS class_id#31, i_category_id#18 AS category_id#32] -Input [5]: [ss_item_sk#11, i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] +Output [3]: [i_brand_id#15 AS brand_id#25, i_class_id#16 AS class_id#26, i_category_id#17 AS category_id#27] +Input [5]: [ss_item_sk#10, i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] (38) HashAggregate [codegen id : 11] -Input [3]: [brand_id#30, class_id#31, category_id#32] -Keys [3]: [brand_id#30, class_id#31, category_id#32] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Keys [3]: [brand_id#25, class_id#26, category_id#27] Functions: [] Aggregate Attributes: [] -Results [3]: [brand_id#30, class_id#31, category_id#32] +Results [3]: [brand_id#25, class_id#26, category_id#27] (39) Exchange -Input [3]: [brand_id#30, class_id#31, category_id#32] -Arguments: hashpartitioning(brand_id#30, class_id#31, category_id#32, 5), ENSURE_REQUIREMENTS, [id=#33] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: hashpartitioning(brand_id#25, class_id#26, category_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=6] (40) HashAggregate [codegen id : 12] -Input [3]: [brand_id#30, class_id#31, category_id#32] -Keys [3]: [brand_id#30, class_id#31, category_id#32] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Keys [3]: [brand_id#25, class_id#26, category_id#27] Functions: [] Aggregate Attributes: [] -Results [3]: [brand_id#30, class_id#31, category_id#32] +Results [3]: [brand_id#25, class_id#26, category_id#27] (41) Exchange -Input [3]: [brand_id#30, class_id#31, category_id#32] -Arguments: hashpartitioning(coalesce(brand_id#30, 0), isnull(brand_id#30), coalesce(class_id#31, 0), isnull(class_id#31), coalesce(category_id#32, 0), isnull(category_id#32), 5), ENSURE_REQUIREMENTS, [id=#34] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: hashpartitioning(coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27), 5), ENSURE_REQUIREMENTS, [plan_id=7] (42) Sort [codegen id : 13] -Input [3]: [brand_id#30, class_id#31, category_id#32] -Arguments: [coalesce(brand_id#30, 0) ASC NULLS FIRST, isnull(brand_id#30) ASC NULLS FIRST, coalesce(class_id#31, 0) ASC NULLS FIRST, isnull(class_id#31) ASC NULLS FIRST, coalesce(category_id#32, 0) ASC NULLS FIRST, isnull(category_id#32) ASC NULLS FIRST], false, 0 +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: [coalesce(brand_id#25, 0) ASC NULLS FIRST, isnull(brand_id#25) ASC NULLS FIRST, coalesce(class_id#26, 0) ASC NULLS FIRST, isnull(class_id#26) ASC NULLS FIRST, coalesce(category_id#27, 0) ASC NULLS FIRST, isnull(category_id#27) ASC NULLS FIRST], false, 0 (43) Scan parquet default.web_sales -Output [2]: [ws_item_sk#35, ws_sold_date_sk#36] +Output [2]: [ws_item_sk#28, ws_sold_date_sk#29] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#36), dynamicpruningexpression(ws_sold_date_sk#36 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(ws_sold_date_sk#29), dynamicpruningexpression(ws_sold_date_sk#29 IN dynamicpruning#12)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 16] -Input [2]: [ws_item_sk#35, ws_sold_date_sk#36] +Input [2]: [ws_item_sk#28, ws_sold_date_sk#29] (45) Filter [codegen id : 16] -Input [2]: [ws_item_sk#35, ws_sold_date_sk#36] -Condition : isnotnull(ws_item_sk#35) +Input [2]: [ws_item_sk#28, ws_sold_date_sk#29] +Condition : isnotnull(ws_item_sk#28) (46) ReusedExchange [Reuses operator id: 132] -Output [1]: [d_date_sk#37] +Output [1]: [d_date_sk#30] (47) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [ws_sold_date_sk#36] -Right keys [1]: [d_date_sk#37] +Left keys [1]: [ws_sold_date_sk#29] +Right keys [1]: [d_date_sk#30] Join condition: None (48) Project [codegen id : 16] -Output [1]: [ws_item_sk#35] -Input [3]: [ws_item_sk#35, ws_sold_date_sk#36, d_date_sk#37] +Output [1]: [ws_item_sk#28] +Input [3]: [ws_item_sk#28, ws_sold_date_sk#29, d_date_sk#30] (49) ReusedExchange [Reuses operator id: 29] -Output [4]: [i_item_sk#38, i_brand_id#39, i_class_id#40, i_category_id#41] +Output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] (50) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [ws_item_sk#35] -Right keys [1]: [i_item_sk#38] +Left keys [1]: [ws_item_sk#28] +Right keys [1]: [i_item_sk#31] Join condition: None (51) Project [codegen id : 16] -Output [3]: [i_brand_id#39, i_class_id#40, i_category_id#41] -Input [5]: [ws_item_sk#35, i_item_sk#38, i_brand_id#39, i_class_id#40, i_category_id#41] +Output [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Input [5]: [ws_item_sk#28, i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] (52) Exchange -Input [3]: [i_brand_id#39, i_class_id#40, i_category_id#41] -Arguments: hashpartitioning(coalesce(i_brand_id#39, 0), isnull(i_brand_id#39), coalesce(i_class_id#40, 0), isnull(i_class_id#40), coalesce(i_category_id#41, 0), isnull(i_category_id#41), 5), ENSURE_REQUIREMENTS, [id=#42] +Input [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: hashpartitioning(coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34), 5), ENSURE_REQUIREMENTS, [plan_id=8] (53) Sort [codegen id : 17] -Input [3]: [i_brand_id#39, i_class_id#40, i_category_id#41] -Arguments: [coalesce(i_brand_id#39, 0) ASC NULLS FIRST, isnull(i_brand_id#39) ASC NULLS FIRST, coalesce(i_class_id#40, 0) ASC NULLS FIRST, isnull(i_class_id#40) ASC NULLS FIRST, coalesce(i_category_id#41, 0) ASC NULLS FIRST, isnull(i_category_id#41) ASC NULLS FIRST], false, 0 +Input [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [coalesce(i_brand_id#32, 0) ASC NULLS FIRST, isnull(i_brand_id#32) ASC NULLS FIRST, coalesce(i_class_id#33, 0) ASC NULLS FIRST, isnull(i_class_id#33) ASC NULLS FIRST, coalesce(i_category_id#34, 0) ASC NULLS FIRST, isnull(i_category_id#34) ASC NULLS FIRST], false, 0 (54) SortMergeJoin [codegen id : 18] -Left keys [6]: [coalesce(brand_id#30, 0), isnull(brand_id#30), coalesce(class_id#31, 0), isnull(class_id#31), coalesce(category_id#32, 0), isnull(category_id#32)] -Right keys [6]: [coalesce(i_brand_id#39, 0), isnull(i_brand_id#39), coalesce(i_class_id#40, 0), isnull(i_class_id#40), coalesce(i_category_id#41, 0), isnull(i_category_id#41)] +Left keys [6]: [coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27)] +Right keys [6]: [coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34)] Join condition: None (55) BroadcastExchange -Input [3]: [brand_id#30, class_id#31, category_id#32] -Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#43] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=9] (56) BroadcastHashJoin [codegen id : 19] -Left keys [3]: [i_brand_id#8, i_class_id#9, i_category_id#10] -Right keys [3]: [brand_id#30, class_id#31, category_id#32] +Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Right keys [3]: [brand_id#25, class_id#26, category_id#27] Join condition: None (57) Project [codegen id : 19] -Output [1]: [i_item_sk#7 AS ss_item_sk#44] -Input [7]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, brand_id#30, class_id#31, category_id#32] +Output [1]: [i_item_sk#6 AS ss_item_sk#35] +Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#25, class_id#26, category_id#27] (58) Exchange -Input [1]: [ss_item_sk#44] -Arguments: hashpartitioning(ss_item_sk#44, 5), ENSURE_REQUIREMENTS, [id=#45] +Input [1]: [ss_item_sk#35] +Arguments: hashpartitioning(ss_item_sk#35, 5), ENSURE_REQUIREMENTS, [plan_id=10] (59) Sort [codegen id : 20] -Input [1]: [ss_item_sk#44] -Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0 +Input [1]: [ss_item_sk#35] +Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0 (60) SortMergeJoin [codegen id : 43] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [ss_item_sk#44] +Right keys [1]: [ss_item_sk#35] Join condition: None (61) ReusedExchange [Reuses operator id: 123] -Output [1]: [d_date_sk#46] +Output [1]: [d_date_sk#36] (62) BroadcastHashJoin [codegen id : 43] Left keys [1]: [ss_sold_date_sk#4] -Right keys [1]: [d_date_sk#46] +Right keys [1]: [d_date_sk#36] Join condition: None (63) Project [codegen id : 43] Output [3]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3] -Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#46] +Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#36] (64) Scan parquet default.item -Output [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] +Output [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] ReadSchema: struct (65) ColumnarToRow [codegen id : 22] -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] (66) Filter [codegen id : 22] -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] -Condition : (((isnotnull(i_item_sk#47) AND isnotnull(i_brand_id#48)) AND isnotnull(i_class_id#49)) AND isnotnull(i_category_id#50)) +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] +Condition : (((isnotnull(i_item_sk#37) AND isnotnull(i_brand_id#38)) AND isnotnull(i_class_id#39)) AND isnotnull(i_category_id#40)) (67) Exchange -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] -Arguments: hashpartitioning(i_item_sk#47, 5), ENSURE_REQUIREMENTS, [id=#51] +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] +Arguments: hashpartitioning(i_item_sk#37, 5), ENSURE_REQUIREMENTS, [plan_id=11] (68) Sort [codegen id : 23] -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] -Arguments: [i_item_sk#47 ASC NULLS FIRST], false, 0 +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] +Arguments: [i_item_sk#37 ASC NULLS FIRST], false, 0 (69) ReusedExchange [Reuses operator id: 58] -Output [1]: [ss_item_sk#44] +Output [1]: [ss_item_sk#35] (70) Sort [codegen id : 41] -Input [1]: [ss_item_sk#44] -Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0 +Input [1]: [ss_item_sk#35] +Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0 (71) SortMergeJoin [codegen id : 42] -Left keys [1]: [i_item_sk#47] -Right keys [1]: [ss_item_sk#44] +Left keys [1]: [i_item_sk#37] +Right keys [1]: [ss_item_sk#35] Join condition: None (72) BroadcastExchange -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#52] +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] (73) BroadcastHashJoin [codegen id : 43] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#47] +Right keys [1]: [i_item_sk#37] Join condition: None (74) Project [codegen id : 43] -Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#48, i_class_id#49, i_category_id#50] -Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] +Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#38, i_class_id#39, i_category_id#40] +Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] (75) HashAggregate [codegen id : 43] -Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#48, i_class_id#49, i_category_id#50] -Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#38, i_class_id#39, i_category_id#40] +Keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40] Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#53, isEmpty#54, count#55] -Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58] +Aggregate Attributes [3]: [sum#41, isEmpty#42, count#43] +Results [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46] (76) Exchange -Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58] -Arguments: hashpartitioning(i_brand_id#48, i_class_id#49, i_category_id#50, 5), ENSURE_REQUIREMENTS, [id=#59] +Input [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46] +Arguments: hashpartitioning(i_brand_id#38, i_class_id#39, i_category_id#40, 5), ENSURE_REQUIREMENTS, [plan_id=13] (77) HashAggregate [codegen id : 88] -Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58] -Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] +Input [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46] +Keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40] Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#60, count(1)#61] -Results [6]: [store AS channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#60 AS sales#63, count(1)#61 AS number_sales#64] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47, count(1)#48] +Results [6]: [store AS channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47 AS sales#50, count(1)#48 AS number_sales#51] (78) Filter [codegen id : 88] -Input [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] -Condition : (isnotnull(sales#63) AND (cast(sales#63 as decimal(32,6)) > cast(Subquery scalar-subquery#65, [id=#66] as decimal(32,6)))) +Input [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sales#50, number_sales#51] +Condition : (isnotnull(sales#50) AND (cast(sales#50 as decimal(32,6)) > cast(Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) (79) Scan parquet default.store_sales -Output [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70] +Output [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#70), dynamicpruningexpression(ss_sold_date_sk#70 IN dynamicpruning#71)] +PartitionFilters: [isnotnull(ss_sold_date_sk#57), dynamicpruningexpression(ss_sold_date_sk#57 IN dynamicpruning#58)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (80) ColumnarToRow [codegen id : 44] -Input [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70] +Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57] (81) Filter [codegen id : 44] -Input [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70] -Condition : isnotnull(ss_item_sk#67) +Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57] +Condition : isnotnull(ss_item_sk#54) (82) Exchange -Input [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70] -Arguments: hashpartitioning(ss_item_sk#67, 5), ENSURE_REQUIREMENTS, [id=#72] +Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57] +Arguments: hashpartitioning(ss_item_sk#54, 5), ENSURE_REQUIREMENTS, [plan_id=14] (83) Sort [codegen id : 45] -Input [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70] -Arguments: [ss_item_sk#67 ASC NULLS FIRST], false, 0 +Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57] +Arguments: [ss_item_sk#54 ASC NULLS FIRST], false, 0 (84) ReusedExchange [Reuses operator id: 58] -Output [1]: [ss_item_sk#44] +Output [1]: [ss_item_sk#35] (85) Sort [codegen id : 63] -Input [1]: [ss_item_sk#44] -Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0 +Input [1]: [ss_item_sk#35] +Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0 (86) SortMergeJoin [codegen id : 86] -Left keys [1]: [ss_item_sk#67] -Right keys [1]: [ss_item_sk#44] +Left keys [1]: [ss_item_sk#54] +Right keys [1]: [ss_item_sk#35] Join condition: None (87) ReusedExchange [Reuses operator id: 137] -Output [1]: [d_date_sk#73] +Output [1]: [d_date_sk#59] (88) BroadcastHashJoin [codegen id : 86] -Left keys [1]: [ss_sold_date_sk#70] -Right keys [1]: [d_date_sk#73] +Left keys [1]: [ss_sold_date_sk#57] +Right keys [1]: [d_date_sk#59] Join condition: None (89) Project [codegen id : 86] -Output [3]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69] -Input [5]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70, d_date_sk#73] +Output [3]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56] +Input [5]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57, d_date_sk#59] (90) ReusedExchange [Reuses operator id: 72] -Output [4]: [i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77] +Output [4]: [i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63] (91) BroadcastHashJoin [codegen id : 86] -Left keys [1]: [ss_item_sk#67] -Right keys [1]: [i_item_sk#74] +Left keys [1]: [ss_item_sk#54] +Right keys [1]: [i_item_sk#60] Join condition: None (92) Project [codegen id : 86] -Output [5]: [ss_quantity#68, ss_list_price#69, i_brand_id#75, i_class_id#76, i_category_id#77] -Input [7]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77] +Output [5]: [ss_quantity#55, ss_list_price#56, i_brand_id#61, i_class_id#62, i_category_id#63] +Input [7]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63] (93) HashAggregate [codegen id : 86] -Input [5]: [ss_quantity#68, ss_list_price#69, i_brand_id#75, i_class_id#76, i_category_id#77] -Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_list_price#69 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#78, isEmpty#79, count#80] -Results [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83] +Input [5]: [ss_quantity#55, ss_list_price#56, i_brand_id#61, i_class_id#62, i_category_id#63] +Keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] +Aggregate Attributes [3]: [sum#64, isEmpty#65, count#66] +Results [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#67, isEmpty#68, count#69] (94) Exchange -Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83] -Arguments: hashpartitioning(i_brand_id#75, i_class_id#76, i_category_id#77, 5), ENSURE_REQUIREMENTS, [id=#84] +Input [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#67, isEmpty#68, count#69] +Arguments: hashpartitioning(i_brand_id#61, i_class_id#62, i_category_id#63, 5), ENSURE_REQUIREMENTS, [plan_id=15] (95) HashAggregate [codegen id : 87] -Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83] -Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_list_price#69 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_list_price#69 as decimal(12,2)))), DecimalType(18,2)))#85, count(1)#86] -Results [6]: [store AS channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_list_price#69 as decimal(12,2)))), DecimalType(18,2)))#85 AS sales#88, count(1)#86 AS number_sales#89] +Input [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#67, isEmpty#68, count#69] +Keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2))), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#70, count(1)#71] +Results [6]: [store AS channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#70 AS sales#73, count(1)#71 AS number_sales#74] (96) Filter [codegen id : 87] -Input [6]: [channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89] -Condition : (isnotnull(sales#88) AND (cast(sales#88 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#65, [id=#66] as decimal(32,6)))) +Input [6]: [channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sales#73, number_sales#74] +Condition : (isnotnull(sales#73) AND (cast(sales#73 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) (97) BroadcastExchange -Input [6]: [channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89] -Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#90] +Input [6]: [channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sales#73, number_sales#74] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [plan_id=16] (98) BroadcastHashJoin [codegen id : 88] -Left keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] -Right keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77] +Left keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40] +Right keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] Join condition: None (99) TakeOrderedAndProject -Input [12]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89] -Arguments: 100, [i_brand_id#48 ASC NULLS FIRST, i_class_id#49 ASC NULLS FIRST, i_category_id#50 ASC NULLS FIRST], [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89] +Input [12]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sales#50, number_sales#51, channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sales#73, number_sales#74] +Arguments: 100, [i_brand_id#38 ASC NULLS FIRST, i_class_id#39 ASC NULLS FIRST, i_category_id#40 ASC NULLS FIRST], [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sales#50, number_sales#51, channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sales#73, number_sales#74] ===== Subqueries ===== -Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquery#65, [id=#66] +Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquery#52, [id=#53] * HashAggregate (118) +- Exchange (117) +- * HashAggregate (116) @@ -570,96 +570,96 @@ Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquer (100) Scan parquet default.store_sales -Output [3]: [ss_quantity#91, ss_list_price#92, ss_sold_date_sk#93] +Output [3]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#93), dynamicpruningexpression(ss_sold_date_sk#93 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(ss_sold_date_sk#77), dynamicpruningexpression(ss_sold_date_sk#77 IN dynamicpruning#12)] ReadSchema: struct (101) ColumnarToRow [codegen id : 2] -Input [3]: [ss_quantity#91, ss_list_price#92, ss_sold_date_sk#93] +Input [3]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77] (102) ReusedExchange [Reuses operator id: 132] -Output [1]: [d_date_sk#94] +Output [1]: [d_date_sk#78] (103) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#93] -Right keys [1]: [d_date_sk#94] +Left keys [1]: [ss_sold_date_sk#77] +Right keys [1]: [d_date_sk#78] Join condition: None (104) Project [codegen id : 2] -Output [2]: [ss_quantity#91 AS quantity#95, ss_list_price#92 AS list_price#96] -Input [4]: [ss_quantity#91, ss_list_price#92, ss_sold_date_sk#93, d_date_sk#94] +Output [2]: [ss_quantity#75 AS quantity#79, ss_list_price#76 AS list_price#80] +Input [4]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77, d_date_sk#78] (105) Scan parquet default.catalog_sales -Output [3]: [cs_quantity#97, cs_list_price#98, cs_sold_date_sk#99] +Output [3]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#99), dynamicpruningexpression(cs_sold_date_sk#99 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(cs_sold_date_sk#83), dynamicpruningexpression(cs_sold_date_sk#83 IN dynamicpruning#12)] ReadSchema: struct (106) ColumnarToRow [codegen id : 4] -Input [3]: [cs_quantity#97, cs_list_price#98, cs_sold_date_sk#99] +Input [3]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83] (107) ReusedExchange [Reuses operator id: 132] -Output [1]: [d_date_sk#100] +Output [1]: [d_date_sk#84] (108) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_sold_date_sk#99] -Right keys [1]: [d_date_sk#100] +Left keys [1]: [cs_sold_date_sk#83] +Right keys [1]: [d_date_sk#84] Join condition: None (109) Project [codegen id : 4] -Output [2]: [cs_quantity#97 AS quantity#101, cs_list_price#98 AS list_price#102] -Input [4]: [cs_quantity#97, cs_list_price#98, cs_sold_date_sk#99, d_date_sk#100] +Output [2]: [cs_quantity#81 AS quantity#85, cs_list_price#82 AS list_price#86] +Input [4]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83, d_date_sk#84] (110) Scan parquet default.web_sales -Output [3]: [ws_quantity#103, ws_list_price#104, ws_sold_date_sk#105] +Output [3]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#105), dynamicpruningexpression(ws_sold_date_sk#105 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(ws_sold_date_sk#89), dynamicpruningexpression(ws_sold_date_sk#89 IN dynamicpruning#12)] ReadSchema: struct (111) ColumnarToRow [codegen id : 6] -Input [3]: [ws_quantity#103, ws_list_price#104, ws_sold_date_sk#105] +Input [3]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89] (112) ReusedExchange [Reuses operator id: 132] -Output [1]: [d_date_sk#106] +Output [1]: [d_date_sk#90] (113) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#105] -Right keys [1]: [d_date_sk#106] +Left keys [1]: [ws_sold_date_sk#89] +Right keys [1]: [d_date_sk#90] Join condition: None (114) Project [codegen id : 6] -Output [2]: [ws_quantity#103 AS quantity#107, ws_list_price#104 AS list_price#108] -Input [4]: [ws_quantity#103, ws_list_price#104, ws_sold_date_sk#105, d_date_sk#106] +Output [2]: [ws_quantity#87 AS quantity#91, ws_list_price#88 AS list_price#92] +Input [4]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89, d_date_sk#90] (115) Union (116) HashAggregate [codegen id : 7] -Input [2]: [quantity#95, list_price#96] +Input [2]: [quantity#79, list_price#80] Keys: [] -Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#95 as decimal(12,2))) * promote_precision(cast(list_price#96 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#109, count#110] -Results [2]: [sum#111, count#112] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#93, count#94] +Results [2]: [sum#95, count#96] (117) Exchange -Input [2]: [sum#111, count#112] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#113] +Input [2]: [sum#95, count#96] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=17] (118) HashAggregate [codegen id : 8] -Input [2]: [sum#111, count#112] +Input [2]: [sum#95, count#96] Keys: [] -Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#95 as decimal(12,2))) * promote_precision(cast(list_price#96 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#95 as decimal(12,2))) * promote_precision(cast(list_price#96 as decimal(12,2)))), DecimalType(18,2)))#114] -Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#95 as decimal(12,2))) * promote_precision(cast(list_price#96 as decimal(12,2)))), DecimalType(18,2)))#114 AS average_sales#115] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))#97] +Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))#97 AS average_sales#98] -Subquery:2 Hosting operator id = 100 Hosting Expression = ss_sold_date_sk#93 IN dynamicpruning#13 +Subquery:2 Hosting operator id = 100 Hosting Expression = ss_sold_date_sk#77 IN dynamicpruning#12 -Subquery:3 Hosting operator id = 105 Hosting Expression = cs_sold_date_sk#99 IN dynamicpruning#13 +Subquery:3 Hosting operator id = 105 Hosting Expression = cs_sold_date_sk#83 IN dynamicpruning#12 -Subquery:4 Hosting operator id = 110 Hosting Expression = ws_sold_date_sk#105 IN dynamicpruning#13 +Subquery:4 Hosting operator id = 110 Hosting Expression = ws_sold_date_sk#89 IN dynamicpruning#12 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5 BroadcastExchange (123) @@ -670,28 +670,28 @@ BroadcastExchange (123) (119) Scan parquet default.date_dim -Output [2]: [d_date_sk#46, d_week_seq#116] +Output [2]: [d_date_sk#36, d_week_seq#99] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] ReadSchema: struct (120) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#46, d_week_seq#116] +Input [2]: [d_date_sk#36, d_week_seq#99] (121) Filter [codegen id : 1] -Input [2]: [d_date_sk#46, d_week_seq#116] -Condition : ((isnotnull(d_week_seq#116) AND (d_week_seq#116 = Subquery scalar-subquery#117, [id=#118])) AND isnotnull(d_date_sk#46)) +Input [2]: [d_date_sk#36, d_week_seq#99] +Condition : ((isnotnull(d_week_seq#99) AND (d_week_seq#99 = Subquery scalar-subquery#100, [id=#101])) AND isnotnull(d_date_sk#36)) (122) Project [codegen id : 1] -Output [1]: [d_date_sk#46] -Input [2]: [d_date_sk#46, d_week_seq#116] +Output [1]: [d_date_sk#36] +Input [2]: [d_date_sk#36, d_week_seq#99] (123) BroadcastExchange -Input [1]: [d_date_sk#46] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#119] +Input [1]: [d_date_sk#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=18] -Subquery:6 Hosting operator id = 121 Hosting Expression = Subquery scalar-subquery#117, [id=#118] +Subquery:6 Hosting operator id = 121 Hosting Expression = Subquery scalar-subquery#100, [id=#101] * Project (127) +- * Filter (126) +- * ColumnarToRow (125) @@ -699,24 +699,24 @@ Subquery:6 Hosting operator id = 121 Hosting Expression = Subquery scalar-subque (124) Scan parquet default.date_dim -Output [4]: [d_week_seq#120, d_year#121, d_moy#122, d_dom#123] +Output [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,2000), EqualTo(d_moy,12), EqualTo(d_dom,11)] ReadSchema: struct (125) ColumnarToRow [codegen id : 1] -Input [4]: [d_week_seq#120, d_year#121, d_moy#122, d_dom#123] +Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105] (126) Filter [codegen id : 1] -Input [4]: [d_week_seq#120, d_year#121, d_moy#122, d_dom#123] -Condition : (((((isnotnull(d_year#121) AND isnotnull(d_moy#122)) AND isnotnull(d_dom#123)) AND (d_year#121 = 2000)) AND (d_moy#122 = 12)) AND (d_dom#123 = 11)) +Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105] +Condition : (((((isnotnull(d_year#103) AND isnotnull(d_moy#104)) AND isnotnull(d_dom#105)) AND (d_year#103 = 2000)) AND (d_moy#104 = 12)) AND (d_dom#105 = 11)) (127) Project [codegen id : 1] -Output [1]: [d_week_seq#120] -Input [4]: [d_week_seq#120, d_year#121, d_moy#122, d_dom#123] +Output [1]: [d_week_seq#102] +Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105] -Subquery:7 Hosting operator id = 9 Hosting Expression = ss_sold_date_sk#12 IN dynamicpruning#13 +Subquery:7 Hosting operator id = 9 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12 BroadcastExchange (132) +- * Project (131) +- * Filter (130) @@ -725,34 +725,34 @@ BroadcastExchange (132) (128) Scan parquet default.date_dim -Output [2]: [d_date_sk#14, d_year#124] +Output [2]: [d_date_sk#13, d_year#106] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (129) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#14, d_year#124] +Input [2]: [d_date_sk#13, d_year#106] (130) Filter [codegen id : 1] -Input [2]: [d_date_sk#14, d_year#124] -Condition : (((isnotnull(d_year#124) AND (d_year#124 >= 1999)) AND (d_year#124 <= 2001)) AND isnotnull(d_date_sk#14)) +Input [2]: [d_date_sk#13, d_year#106] +Condition : (((isnotnull(d_year#106) AND (d_year#106 >= 1999)) AND (d_year#106 <= 2001)) AND isnotnull(d_date_sk#13)) (131) Project [codegen id : 1] -Output [1]: [d_date_sk#14] -Input [2]: [d_date_sk#14, d_year#124] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_year#106] (132) BroadcastExchange -Input [1]: [d_date_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#125] +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=19] -Subquery:8 Hosting operator id = 20 Hosting Expression = cs_sold_date_sk#21 IN dynamicpruning#13 +Subquery:8 Hosting operator id = 20 Hosting Expression = cs_sold_date_sk#19 IN dynamicpruning#12 -Subquery:9 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#36 IN dynamicpruning#13 +Subquery:9 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#29 IN dynamicpruning#12 -Subquery:10 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#65, [id=#66] +Subquery:10 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53] -Subquery:11 Hosting operator id = 79 Hosting Expression = ss_sold_date_sk#70 IN dynamicpruning#71 +Subquery:11 Hosting operator id = 79 Hosting Expression = ss_sold_date_sk#57 IN dynamicpruning#58 BroadcastExchange (137) +- * Project (136) +- * Filter (135) @@ -761,28 +761,28 @@ BroadcastExchange (137) (133) Scan parquet default.date_dim -Output [2]: [d_date_sk#73, d_week_seq#126] +Output [2]: [d_date_sk#59, d_week_seq#107] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] ReadSchema: struct (134) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#73, d_week_seq#126] +Input [2]: [d_date_sk#59, d_week_seq#107] (135) Filter [codegen id : 1] -Input [2]: [d_date_sk#73, d_week_seq#126] -Condition : ((isnotnull(d_week_seq#126) AND (d_week_seq#126 = Subquery scalar-subquery#127, [id=#128])) AND isnotnull(d_date_sk#73)) +Input [2]: [d_date_sk#59, d_week_seq#107] +Condition : ((isnotnull(d_week_seq#107) AND (d_week_seq#107 = Subquery scalar-subquery#108, [id=#109])) AND isnotnull(d_date_sk#59)) (136) Project [codegen id : 1] -Output [1]: [d_date_sk#73] -Input [2]: [d_date_sk#73, d_week_seq#126] +Output [1]: [d_date_sk#59] +Input [2]: [d_date_sk#59, d_week_seq#107] (137) BroadcastExchange -Input [1]: [d_date_sk#73] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#129] +Input [1]: [d_date_sk#59] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=20] -Subquery:12 Hosting operator id = 135 Hosting Expression = Subquery scalar-subquery#127, [id=#128] +Subquery:12 Hosting operator id = 135 Hosting Expression = Subquery scalar-subquery#108, [id=#109] * Project (141) +- * Filter (140) +- * ColumnarToRow (139) @@ -790,21 +790,21 @@ Subquery:12 Hosting operator id = 135 Hosting Expression = Subquery scalar-subqu (138) Scan parquet default.date_dim -Output [4]: [d_week_seq#130, d_year#131, d_moy#132, d_dom#133] +Output [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,11)] ReadSchema: struct (139) ColumnarToRow [codegen id : 1] -Input [4]: [d_week_seq#130, d_year#131, d_moy#132, d_dom#133] +Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113] (140) Filter [codegen id : 1] -Input [4]: [d_week_seq#130, d_year#131, d_moy#132, d_dom#133] -Condition : (((((isnotnull(d_year#131) AND isnotnull(d_moy#132)) AND isnotnull(d_dom#133)) AND (d_year#131 = 1999)) AND (d_moy#132 = 12)) AND (d_dom#133 = 11)) +Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113] +Condition : (((((isnotnull(d_year#111) AND isnotnull(d_moy#112)) AND isnotnull(d_dom#113)) AND (d_year#111 = 1999)) AND (d_moy#112 = 12)) AND (d_dom#113 = 11)) (141) Project [codegen id : 1] -Output [1]: [d_week_seq#130] -Input [4]: [d_week_seq#130, d_year#131, d_moy#132, d_dom#133] +Output [1]: [d_week_seq#110] +Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt index 69be776d2ac28..1972d6840d1ee 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt @@ -174,7 +174,7 @@ Condition : isnotnull(i_item_sk#19) (19) BroadcastExchange Input [4]: [i_item_sk#19, i_brand_id#20, i_class_id#21, i_category_id#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (20) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_item_sk#17] @@ -186,20 +186,20 @@ Output [4]: [cs_sold_date_sk#18, i_brand_id#20, i_class_id#21, i_category_id#22] Input [6]: [cs_item_sk#17, cs_sold_date_sk#18, i_item_sk#19, i_brand_id#20, i_class_id#21, i_category_id#22] (22) ReusedExchange [Reuses operator id: 117] -Output [1]: [d_date_sk#24] +Output [1]: [d_date_sk#23] (23) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_sold_date_sk#18] -Right keys [1]: [d_date_sk#24] +Right keys [1]: [d_date_sk#23] Join condition: None (24) Project [codegen id : 3] Output [3]: [i_brand_id#20, i_class_id#21, i_category_id#22] -Input [5]: [cs_sold_date_sk#18, i_brand_id#20, i_class_id#21, i_category_id#22, d_date_sk#24] +Input [5]: [cs_sold_date_sk#18, i_brand_id#20, i_class_id#21, i_category_id#22, d_date_sk#23] (25) BroadcastExchange Input [3]: [i_brand_id#20, i_class_id#21, i_category_id#22] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#25] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=2] (26) BroadcastHashJoin [codegen id : 4] Left keys [6]: [coalesce(i_brand_id#14, 0), isnull(i_brand_id#14), coalesce(i_class_id#15, 0), isnull(i_class_id#15), coalesce(i_category_id#16, 0), isnull(i_category_id#16)] @@ -208,7 +208,7 @@ Join condition: None (27) BroadcastExchange Input [4]: [i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (28) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_item_sk#10] @@ -220,259 +220,259 @@ Output [4]: [ss_sold_date_sk#11, i_brand_id#14, i_class_id#15, i_category_id#16] Input [6]: [ss_item_sk#10, ss_sold_date_sk#11, i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16] (30) ReusedExchange [Reuses operator id: 117] -Output [1]: [d_date_sk#27] +Output [1]: [d_date_sk#24] (31) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_sold_date_sk#11] -Right keys [1]: [d_date_sk#27] +Right keys [1]: [d_date_sk#24] Join condition: None (32) Project [codegen id : 6] -Output [3]: [i_brand_id#14 AS brand_id#28, i_class_id#15 AS class_id#29, i_category_id#16 AS category_id#30] -Input [5]: [ss_sold_date_sk#11, i_brand_id#14, i_class_id#15, i_category_id#16, d_date_sk#27] +Output [3]: [i_brand_id#14 AS brand_id#25, i_class_id#15 AS class_id#26, i_category_id#16 AS category_id#27] +Input [5]: [ss_sold_date_sk#11, i_brand_id#14, i_class_id#15, i_category_id#16, d_date_sk#24] (33) HashAggregate [codegen id : 6] -Input [3]: [brand_id#28, class_id#29, category_id#30] -Keys [3]: [brand_id#28, class_id#29, category_id#30] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Keys [3]: [brand_id#25, class_id#26, category_id#27] Functions: [] Aggregate Attributes: [] -Results [3]: [brand_id#28, class_id#29, category_id#30] +Results [3]: [brand_id#25, class_id#26, category_id#27] (34) Exchange -Input [3]: [brand_id#28, class_id#29, category_id#30] -Arguments: hashpartitioning(brand_id#28, class_id#29, category_id#30, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: hashpartitioning(brand_id#25, class_id#26, category_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=4] (35) HashAggregate [codegen id : 10] -Input [3]: [brand_id#28, class_id#29, category_id#30] -Keys [3]: [brand_id#28, class_id#29, category_id#30] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Keys [3]: [brand_id#25, class_id#26, category_id#27] Functions: [] Aggregate Attributes: [] -Results [3]: [brand_id#28, class_id#29, category_id#30] +Results [3]: [brand_id#25, class_id#26, category_id#27] (36) Scan parquet default.web_sales -Output [2]: [ws_item_sk#32, ws_sold_date_sk#33] +Output [2]: [ws_item_sk#28, ws_sold_date_sk#29] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#33), dynamicpruningexpression(ws_sold_date_sk#33 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(ws_sold_date_sk#29), dynamicpruningexpression(ws_sold_date_sk#29 IN dynamicpruning#12)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (37) ColumnarToRow [codegen id : 9] -Input [2]: [ws_item_sk#32, ws_sold_date_sk#33] +Input [2]: [ws_item_sk#28, ws_sold_date_sk#29] (38) Filter [codegen id : 9] -Input [2]: [ws_item_sk#32, ws_sold_date_sk#33] -Condition : isnotnull(ws_item_sk#32) +Input [2]: [ws_item_sk#28, ws_sold_date_sk#29] +Condition : isnotnull(ws_item_sk#28) (39) ReusedExchange [Reuses operator id: 19] -Output [4]: [i_item_sk#34, i_brand_id#35, i_class_id#36, i_category_id#37] +Output [4]: [i_item_sk#30, i_brand_id#31, i_class_id#32, i_category_id#33] (40) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_item_sk#32] -Right keys [1]: [i_item_sk#34] +Left keys [1]: [ws_item_sk#28] +Right keys [1]: [i_item_sk#30] Join condition: None (41) Project [codegen id : 9] -Output [4]: [ws_sold_date_sk#33, i_brand_id#35, i_class_id#36, i_category_id#37] -Input [6]: [ws_item_sk#32, ws_sold_date_sk#33, i_item_sk#34, i_brand_id#35, i_class_id#36, i_category_id#37] +Output [4]: [ws_sold_date_sk#29, i_brand_id#31, i_class_id#32, i_category_id#33] +Input [6]: [ws_item_sk#28, ws_sold_date_sk#29, i_item_sk#30, i_brand_id#31, i_class_id#32, i_category_id#33] (42) ReusedExchange [Reuses operator id: 117] -Output [1]: [d_date_sk#38] +Output [1]: [d_date_sk#34] (43) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_sold_date_sk#33] -Right keys [1]: [d_date_sk#38] +Left keys [1]: [ws_sold_date_sk#29] +Right keys [1]: [d_date_sk#34] Join condition: None (44) Project [codegen id : 9] -Output [3]: [i_brand_id#35, i_class_id#36, i_category_id#37] -Input [5]: [ws_sold_date_sk#33, i_brand_id#35, i_class_id#36, i_category_id#37, d_date_sk#38] +Output [3]: [i_brand_id#31, i_class_id#32, i_category_id#33] +Input [5]: [ws_sold_date_sk#29, i_brand_id#31, i_class_id#32, i_category_id#33, d_date_sk#34] (45) BroadcastExchange -Input [3]: [i_brand_id#35, i_class_id#36, i_category_id#37] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#39] +Input [3]: [i_brand_id#31, i_class_id#32, i_category_id#33] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=5] (46) BroadcastHashJoin [codegen id : 10] -Left keys [6]: [coalesce(brand_id#28, 0), isnull(brand_id#28), coalesce(class_id#29, 0), isnull(class_id#29), coalesce(category_id#30, 0), isnull(category_id#30)] -Right keys [6]: [coalesce(i_brand_id#35, 0), isnull(i_brand_id#35), coalesce(i_class_id#36, 0), isnull(i_class_id#36), coalesce(i_category_id#37, 0), isnull(i_category_id#37)] +Left keys [6]: [coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27)] +Right keys [6]: [coalesce(i_brand_id#31, 0), isnull(i_brand_id#31), coalesce(i_class_id#32, 0), isnull(i_class_id#32), coalesce(i_category_id#33, 0), isnull(i_category_id#33)] Join condition: None (47) BroadcastExchange -Input [3]: [brand_id#28, class_id#29, category_id#30] -Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#40] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=6] (48) BroadcastHashJoin [codegen id : 11] Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] -Right keys [3]: [brand_id#28, class_id#29, category_id#30] +Right keys [3]: [brand_id#25, class_id#26, category_id#27] Join condition: None (49) Project [codegen id : 11] -Output [1]: [i_item_sk#6 AS ss_item_sk#41] -Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#28, class_id#29, category_id#30] +Output [1]: [i_item_sk#6 AS ss_item_sk#35] +Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#25, class_id#26, category_id#27] (50) BroadcastExchange -Input [1]: [ss_item_sk#41] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42] +Input [1]: [ss_item_sk#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] (51) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [ss_item_sk#41] +Right keys [1]: [ss_item_sk#35] Join condition: None (52) Scan parquet default.item -Output [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] +Output [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] ReadSchema: struct (53) ColumnarToRow [codegen id : 23] -Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] +Input [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] (54) Filter [codegen id : 23] -Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] -Condition : (((isnotnull(i_item_sk#43) AND isnotnull(i_brand_id#44)) AND isnotnull(i_class_id#45)) AND isnotnull(i_category_id#46)) +Input [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] +Condition : (((isnotnull(i_item_sk#36) AND isnotnull(i_brand_id#37)) AND isnotnull(i_class_id#38)) AND isnotnull(i_category_id#39)) (55) ReusedExchange [Reuses operator id: 50] -Output [1]: [ss_item_sk#41] +Output [1]: [ss_item_sk#35] (56) BroadcastHashJoin [codegen id : 23] -Left keys [1]: [i_item_sk#43] -Right keys [1]: [ss_item_sk#41] +Left keys [1]: [i_item_sk#36] +Right keys [1]: [ss_item_sk#35] Join condition: None (57) BroadcastExchange -Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#47] +Input [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] (58) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#43] +Right keys [1]: [i_item_sk#36] Join condition: None (59) Project [codegen id : 25] -Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_class_id#45, i_category_id#46] -Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] +Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#37, i_class_id#38, i_category_id#39] +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] (60) ReusedExchange [Reuses operator id: 108] -Output [1]: [d_date_sk#48] +Output [1]: [d_date_sk#40] (61) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_sold_date_sk#4] -Right keys [1]: [d_date_sk#48] +Right keys [1]: [d_date_sk#40] Join condition: None (62) Project [codegen id : 25] -Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46] -Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_class_id#45, i_category_id#46, d_date_sk#48] +Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#37, i_class_id#38, i_category_id#39] +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#37, i_class_id#38, i_category_id#39, d_date_sk#40] (63) HashAggregate [codegen id : 25] -Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46] -Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#37, i_class_id#38, i_category_id#39] +Keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39] Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#49, isEmpty#50, count#51] -Results [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54] +Aggregate Attributes [3]: [sum#41, isEmpty#42, count#43] +Results [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46] (64) Exchange -Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54] -Arguments: hashpartitioning(i_brand_id#44, i_class_id#45, i_category_id#46, 5), ENSURE_REQUIREMENTS, [id=#55] +Input [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46] +Arguments: hashpartitioning(i_brand_id#37, i_class_id#38, i_category_id#39, 5), ENSURE_REQUIREMENTS, [plan_id=9] (65) HashAggregate [codegen id : 52] -Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54] -Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46] +Input [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46] +Keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39] Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56, count(1)#57] -Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56 AS sales#59, count(1)#57 AS number_sales#60] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47, count(1)#48] +Results [6]: [store AS channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47 AS sales#50, count(1)#48 AS number_sales#51] (66) Filter [codegen id : 52] -Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60] -Condition : (isnotnull(sales#59) AND (cast(sales#59 as decimal(32,6)) > cast(Subquery scalar-subquery#61, [id=#62] as decimal(32,6)))) +Input [6]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sales#50, number_sales#51] +Condition : (isnotnull(sales#50) AND (cast(sales#50 as decimal(32,6)) > cast(Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) (67) Scan parquet default.store_sales -Output [4]: [ss_item_sk#63, ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66] +Output [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#66), dynamicpruningexpression(ss_sold_date_sk#66 IN dynamicpruning#67)] +PartitionFilters: [isnotnull(ss_sold_date_sk#57), dynamicpruningexpression(ss_sold_date_sk#57 IN dynamicpruning#58)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (68) ColumnarToRow [codegen id : 50] -Input [4]: [ss_item_sk#63, ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66] +Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57] (69) Filter [codegen id : 50] -Input [4]: [ss_item_sk#63, ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66] -Condition : isnotnull(ss_item_sk#63) +Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57] +Condition : isnotnull(ss_item_sk#54) (70) ReusedExchange [Reuses operator id: 50] -Output [1]: [ss_item_sk#41] +Output [1]: [ss_item_sk#35] (71) BroadcastHashJoin [codegen id : 50] -Left keys [1]: [ss_item_sk#63] -Right keys [1]: [ss_item_sk#41] +Left keys [1]: [ss_item_sk#54] +Right keys [1]: [ss_item_sk#35] Join condition: None (72) ReusedExchange [Reuses operator id: 57] -Output [4]: [i_item_sk#68, i_brand_id#69, i_class_id#70, i_category_id#71] +Output [4]: [i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62] (73) BroadcastHashJoin [codegen id : 50] -Left keys [1]: [ss_item_sk#63] -Right keys [1]: [i_item_sk#68] +Left keys [1]: [ss_item_sk#54] +Right keys [1]: [i_item_sk#59] Join condition: None (74) Project [codegen id : 50] -Output [6]: [ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66, i_brand_id#69, i_class_id#70, i_category_id#71] -Input [8]: [ss_item_sk#63, ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66, i_item_sk#68, i_brand_id#69, i_class_id#70, i_category_id#71] +Output [6]: [ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57, i_brand_id#60, i_class_id#61, i_category_id#62] +Input [8]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57, i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62] (75) ReusedExchange [Reuses operator id: 122] -Output [1]: [d_date_sk#72] +Output [1]: [d_date_sk#63] (76) BroadcastHashJoin [codegen id : 50] -Left keys [1]: [ss_sold_date_sk#66] -Right keys [1]: [d_date_sk#72] +Left keys [1]: [ss_sold_date_sk#57] +Right keys [1]: [d_date_sk#63] Join condition: None (77) Project [codegen id : 50] -Output [5]: [ss_quantity#64, ss_list_price#65, i_brand_id#69, i_class_id#70, i_category_id#71] -Input [7]: [ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66, i_brand_id#69, i_class_id#70, i_category_id#71, d_date_sk#72] +Output [5]: [ss_quantity#55, ss_list_price#56, i_brand_id#60, i_class_id#61, i_category_id#62] +Input [7]: [ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57, i_brand_id#60, i_class_id#61, i_category_id#62, d_date_sk#63] (78) HashAggregate [codegen id : 50] -Input [5]: [ss_quantity#64, ss_list_price#65, i_brand_id#69, i_class_id#70, i_category_id#71] -Keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#73, isEmpty#74, count#75] -Results [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78] +Input [5]: [ss_quantity#55, ss_list_price#56, i_brand_id#60, i_class_id#61, i_category_id#62] +Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] +Aggregate Attributes [3]: [sum#64, isEmpty#65, count#66] +Results [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#67, isEmpty#68, count#69] (79) Exchange -Input [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78] -Arguments: hashpartitioning(i_brand_id#69, i_class_id#70, i_category_id#71, 5), ENSURE_REQUIREMENTS, [id=#79] +Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#67, isEmpty#68, count#69] +Arguments: hashpartitioning(i_brand_id#60, i_class_id#61, i_category_id#62, 5), ENSURE_REQUIREMENTS, [plan_id=10] (80) HashAggregate [codegen id : 51] -Input [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78] -Keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#80, count(1)#81] -Results [6]: [store AS channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#80 AS sales#83, count(1)#81 AS number_sales#84] +Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#67, isEmpty#68, count#69] +Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2))), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#70, count(1)#71] +Results [6]: [store AS channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#70 AS sales#73, count(1)#71 AS number_sales#74] (81) Filter [codegen id : 51] -Input [6]: [channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84] -Condition : (isnotnull(sales#83) AND (cast(sales#83 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#61, [id=#62] as decimal(32,6)))) +Input [6]: [channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sales#73, number_sales#74] +Condition : (isnotnull(sales#73) AND (cast(sales#73 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) (82) BroadcastExchange -Input [6]: [channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84] -Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#85] +Input [6]: [channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sales#73, number_sales#74] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [plan_id=11] (83) BroadcastHashJoin [codegen id : 52] -Left keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46] -Right keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71] +Left keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39] +Right keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62] Join condition: None (84) TakeOrderedAndProject -Input [12]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60, channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84] -Arguments: 100, [i_brand_id#44 ASC NULLS FIRST, i_class_id#45 ASC NULLS FIRST, i_category_id#46 ASC NULLS FIRST], [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60, channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84] +Input [12]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sales#50, number_sales#51, channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sales#73, number_sales#74] +Arguments: 100, [i_brand_id#37 ASC NULLS FIRST, i_class_id#38 ASC NULLS FIRST, i_category_id#39 ASC NULLS FIRST], [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sales#50, number_sales#51, channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sales#73, number_sales#74] ===== Subqueries ===== -Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquery#61, [id=#62] +Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquery#52, [id=#53] * HashAggregate (103) +- Exchange (102) +- * HashAggregate (101) @@ -495,96 +495,96 @@ Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquer (85) Scan parquet default.store_sales -Output [3]: [ss_quantity#86, ss_list_price#87, ss_sold_date_sk#88] +Output [3]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#88), dynamicpruningexpression(ss_sold_date_sk#88 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(ss_sold_date_sk#77), dynamicpruningexpression(ss_sold_date_sk#77 IN dynamicpruning#12)] ReadSchema: struct (86) ColumnarToRow [codegen id : 2] -Input [3]: [ss_quantity#86, ss_list_price#87, ss_sold_date_sk#88] +Input [3]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77] (87) ReusedExchange [Reuses operator id: 117] -Output [1]: [d_date_sk#89] +Output [1]: [d_date_sk#78] (88) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#88] -Right keys [1]: [d_date_sk#89] +Left keys [1]: [ss_sold_date_sk#77] +Right keys [1]: [d_date_sk#78] Join condition: None (89) Project [codegen id : 2] -Output [2]: [ss_quantity#86 AS quantity#90, ss_list_price#87 AS list_price#91] -Input [4]: [ss_quantity#86, ss_list_price#87, ss_sold_date_sk#88, d_date_sk#89] +Output [2]: [ss_quantity#75 AS quantity#79, ss_list_price#76 AS list_price#80] +Input [4]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77, d_date_sk#78] (90) Scan parquet default.catalog_sales -Output [3]: [cs_quantity#92, cs_list_price#93, cs_sold_date_sk#94] +Output [3]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#94), dynamicpruningexpression(cs_sold_date_sk#94 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(cs_sold_date_sk#83), dynamicpruningexpression(cs_sold_date_sk#83 IN dynamicpruning#12)] ReadSchema: struct (91) ColumnarToRow [codegen id : 4] -Input [3]: [cs_quantity#92, cs_list_price#93, cs_sold_date_sk#94] +Input [3]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83] (92) ReusedExchange [Reuses operator id: 117] -Output [1]: [d_date_sk#95] +Output [1]: [d_date_sk#84] (93) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_sold_date_sk#94] -Right keys [1]: [d_date_sk#95] +Left keys [1]: [cs_sold_date_sk#83] +Right keys [1]: [d_date_sk#84] Join condition: None (94) Project [codegen id : 4] -Output [2]: [cs_quantity#92 AS quantity#96, cs_list_price#93 AS list_price#97] -Input [4]: [cs_quantity#92, cs_list_price#93, cs_sold_date_sk#94, d_date_sk#95] +Output [2]: [cs_quantity#81 AS quantity#85, cs_list_price#82 AS list_price#86] +Input [4]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83, d_date_sk#84] (95) Scan parquet default.web_sales -Output [3]: [ws_quantity#98, ws_list_price#99, ws_sold_date_sk#100] +Output [3]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#100), dynamicpruningexpression(ws_sold_date_sk#100 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(ws_sold_date_sk#89), dynamicpruningexpression(ws_sold_date_sk#89 IN dynamicpruning#12)] ReadSchema: struct (96) ColumnarToRow [codegen id : 6] -Input [3]: [ws_quantity#98, ws_list_price#99, ws_sold_date_sk#100] +Input [3]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89] (97) ReusedExchange [Reuses operator id: 117] -Output [1]: [d_date_sk#101] +Output [1]: [d_date_sk#90] (98) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#100] -Right keys [1]: [d_date_sk#101] +Left keys [1]: [ws_sold_date_sk#89] +Right keys [1]: [d_date_sk#90] Join condition: None (99) Project [codegen id : 6] -Output [2]: [ws_quantity#98 AS quantity#102, ws_list_price#99 AS list_price#103] -Input [4]: [ws_quantity#98, ws_list_price#99, ws_sold_date_sk#100, d_date_sk#101] +Output [2]: [ws_quantity#87 AS quantity#91, ws_list_price#88 AS list_price#92] +Input [4]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89, d_date_sk#90] (100) Union (101) HashAggregate [codegen id : 7] -Input [2]: [quantity#90, list_price#91] +Input [2]: [quantity#79, list_price#80] Keys: [] -Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#104, count#105] -Results [2]: [sum#106, count#107] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#93, count#94] +Results [2]: [sum#95, count#96] (102) Exchange -Input [2]: [sum#106, count#107] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#108] +Input [2]: [sum#95, count#96] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12] (103) HashAggregate [codegen id : 8] -Input [2]: [sum#106, count#107] +Input [2]: [sum#95, count#96] Keys: [] -Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))#109] -Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))#109 AS average_sales#110] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))#97] +Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))#97 AS average_sales#98] -Subquery:2 Hosting operator id = 85 Hosting Expression = ss_sold_date_sk#88 IN dynamicpruning#12 +Subquery:2 Hosting operator id = 85 Hosting Expression = ss_sold_date_sk#77 IN dynamicpruning#12 -Subquery:3 Hosting operator id = 90 Hosting Expression = cs_sold_date_sk#94 IN dynamicpruning#12 +Subquery:3 Hosting operator id = 90 Hosting Expression = cs_sold_date_sk#83 IN dynamicpruning#12 -Subquery:4 Hosting operator id = 95 Hosting Expression = ws_sold_date_sk#100 IN dynamicpruning#12 +Subquery:4 Hosting operator id = 95 Hosting Expression = ws_sold_date_sk#89 IN dynamicpruning#12 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5 BroadcastExchange (108) @@ -595,28 +595,28 @@ BroadcastExchange (108) (104) Scan parquet default.date_dim -Output [2]: [d_date_sk#48, d_week_seq#111] +Output [2]: [d_date_sk#40, d_week_seq#99] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] ReadSchema: struct (105) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#48, d_week_seq#111] +Input [2]: [d_date_sk#40, d_week_seq#99] (106) Filter [codegen id : 1] -Input [2]: [d_date_sk#48, d_week_seq#111] -Condition : ((isnotnull(d_week_seq#111) AND (d_week_seq#111 = Subquery scalar-subquery#112, [id=#113])) AND isnotnull(d_date_sk#48)) +Input [2]: [d_date_sk#40, d_week_seq#99] +Condition : ((isnotnull(d_week_seq#99) AND (d_week_seq#99 = Subquery scalar-subquery#100, [id=#101])) AND isnotnull(d_date_sk#40)) (107) Project [codegen id : 1] -Output [1]: [d_date_sk#48] -Input [2]: [d_date_sk#48, d_week_seq#111] +Output [1]: [d_date_sk#40] +Input [2]: [d_date_sk#40, d_week_seq#99] (108) BroadcastExchange -Input [1]: [d_date_sk#48] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#114] +Input [1]: [d_date_sk#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=13] -Subquery:6 Hosting operator id = 106 Hosting Expression = Subquery scalar-subquery#112, [id=#113] +Subquery:6 Hosting operator id = 106 Hosting Expression = Subquery scalar-subquery#100, [id=#101] * Project (112) +- * Filter (111) +- * ColumnarToRow (110) @@ -624,22 +624,22 @@ Subquery:6 Hosting operator id = 106 Hosting Expression = Subquery scalar-subque (109) Scan parquet default.date_dim -Output [4]: [d_week_seq#115, d_year#116, d_moy#117, d_dom#118] +Output [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,2000), EqualTo(d_moy,12), EqualTo(d_dom,11)] ReadSchema: struct (110) ColumnarToRow [codegen id : 1] -Input [4]: [d_week_seq#115, d_year#116, d_moy#117, d_dom#118] +Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105] (111) Filter [codegen id : 1] -Input [4]: [d_week_seq#115, d_year#116, d_moy#117, d_dom#118] -Condition : (((((isnotnull(d_year#116) AND isnotnull(d_moy#117)) AND isnotnull(d_dom#118)) AND (d_year#116 = 2000)) AND (d_moy#117 = 12)) AND (d_dom#118 = 11)) +Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105] +Condition : (((((isnotnull(d_year#103) AND isnotnull(d_moy#104)) AND isnotnull(d_dom#105)) AND (d_year#103 = 2000)) AND (d_moy#104 = 12)) AND (d_dom#105 = 11)) (112) Project [codegen id : 1] -Output [1]: [d_week_seq#115] -Input [4]: [d_week_seq#115, d_year#116, d_moy#117, d_dom#118] +Output [1]: [d_week_seq#102] +Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105] Subquery:7 Hosting operator id = 7 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12 BroadcastExchange (117) @@ -650,34 +650,34 @@ BroadcastExchange (117) (113) Scan parquet default.date_dim -Output [2]: [d_date_sk#27, d_year#119] +Output [2]: [d_date_sk#24, d_year#106] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (114) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#27, d_year#119] +Input [2]: [d_date_sk#24, d_year#106] (115) Filter [codegen id : 1] -Input [2]: [d_date_sk#27, d_year#119] -Condition : (((isnotnull(d_year#119) AND (d_year#119 >= 1999)) AND (d_year#119 <= 2001)) AND isnotnull(d_date_sk#27)) +Input [2]: [d_date_sk#24, d_year#106] +Condition : (((isnotnull(d_year#106) AND (d_year#106 >= 1999)) AND (d_year#106 <= 2001)) AND isnotnull(d_date_sk#24)) (116) Project [codegen id : 1] -Output [1]: [d_date_sk#27] -Input [2]: [d_date_sk#27, d_year#119] +Output [1]: [d_date_sk#24] +Input [2]: [d_date_sk#24, d_year#106] (117) BroadcastExchange -Input [1]: [d_date_sk#27] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#120] +Input [1]: [d_date_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14] Subquery:8 Hosting operator id = 13 Hosting Expression = cs_sold_date_sk#18 IN dynamicpruning#12 -Subquery:9 Hosting operator id = 36 Hosting Expression = ws_sold_date_sk#33 IN dynamicpruning#12 +Subquery:9 Hosting operator id = 36 Hosting Expression = ws_sold_date_sk#29 IN dynamicpruning#12 -Subquery:10 Hosting operator id = 81 Hosting Expression = ReusedSubquery Subquery scalar-subquery#61, [id=#62] +Subquery:10 Hosting operator id = 81 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53] -Subquery:11 Hosting operator id = 67 Hosting Expression = ss_sold_date_sk#66 IN dynamicpruning#67 +Subquery:11 Hosting operator id = 67 Hosting Expression = ss_sold_date_sk#57 IN dynamicpruning#58 BroadcastExchange (122) +- * Project (121) +- * Filter (120) @@ -686,28 +686,28 @@ BroadcastExchange (122) (118) Scan parquet default.date_dim -Output [2]: [d_date_sk#72, d_week_seq#121] +Output [2]: [d_date_sk#63, d_week_seq#107] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] ReadSchema: struct (119) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#72, d_week_seq#121] +Input [2]: [d_date_sk#63, d_week_seq#107] (120) Filter [codegen id : 1] -Input [2]: [d_date_sk#72, d_week_seq#121] -Condition : ((isnotnull(d_week_seq#121) AND (d_week_seq#121 = Subquery scalar-subquery#122, [id=#123])) AND isnotnull(d_date_sk#72)) +Input [2]: [d_date_sk#63, d_week_seq#107] +Condition : ((isnotnull(d_week_seq#107) AND (d_week_seq#107 = Subquery scalar-subquery#108, [id=#109])) AND isnotnull(d_date_sk#63)) (121) Project [codegen id : 1] -Output [1]: [d_date_sk#72] -Input [2]: [d_date_sk#72, d_week_seq#121] +Output [1]: [d_date_sk#63] +Input [2]: [d_date_sk#63, d_week_seq#107] (122) BroadcastExchange -Input [1]: [d_date_sk#72] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#124] +Input [1]: [d_date_sk#63] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=15] -Subquery:12 Hosting operator id = 120 Hosting Expression = Subquery scalar-subquery#122, [id=#123] +Subquery:12 Hosting operator id = 120 Hosting Expression = Subquery scalar-subquery#108, [id=#109] * Project (126) +- * Filter (125) +- * ColumnarToRow (124) @@ -715,21 +715,21 @@ Subquery:12 Hosting operator id = 120 Hosting Expression = Subquery scalar-subqu (123) Scan parquet default.date_dim -Output [4]: [d_week_seq#125, d_year#126, d_moy#127, d_dom#128] +Output [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,11)] ReadSchema: struct (124) ColumnarToRow [codegen id : 1] -Input [4]: [d_week_seq#125, d_year#126, d_moy#127, d_dom#128] +Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113] (125) Filter [codegen id : 1] -Input [4]: [d_week_seq#125, d_year#126, d_moy#127, d_dom#128] -Condition : (((((isnotnull(d_year#126) AND isnotnull(d_moy#127)) AND isnotnull(d_dom#128)) AND (d_year#126 = 1999)) AND (d_moy#127 = 12)) AND (d_dom#128 = 11)) +Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113] +Condition : (((((isnotnull(d_year#111) AND isnotnull(d_moy#112)) AND isnotnull(d_dom#113)) AND (d_year#111 = 1999)) AND (d_moy#112 = 12)) AND (d_dom#113 = 11)) (126) Project [codegen id : 1] -Output [1]: [d_week_seq#125] -Input [4]: [d_week_seq#125, d_year#126, d_moy#127, d_dom#128] +Output [1]: [d_week_seq#110] +Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/explain.txt index 55c39ed2e3288..b87297409526a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/explain.txt @@ -58,103 +58,103 @@ Input [4]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3, d_date_s (7) Exchange Input [2]: [cs_bill_customer_sk#1, cs_sales_price#2] -Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#6] +Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (8) Sort [codegen id : 3] Input [2]: [cs_bill_customer_sk#1, cs_sales_price#2] Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0 (9) Scan parquet default.customer -Output [2]: [c_customer_sk#7, c_current_addr_sk#8] +Output [2]: [c_customer_sk#6, c_current_addr_sk#7] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (10) ColumnarToRow [codegen id : 4] -Input [2]: [c_customer_sk#7, c_current_addr_sk#8] +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] (11) Filter [codegen id : 4] -Input [2]: [c_customer_sk#7, c_current_addr_sk#8] -Condition : (isnotnull(c_customer_sk#7) AND isnotnull(c_current_addr_sk#8)) +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] +Condition : (isnotnull(c_customer_sk#6) AND isnotnull(c_current_addr_sk#7)) (12) Exchange -Input [2]: [c_customer_sk#7, c_current_addr_sk#8] -Arguments: hashpartitioning(c_current_addr_sk#8, 5), ENSURE_REQUIREMENTS, [id=#9] +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] +Arguments: hashpartitioning(c_current_addr_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (13) Sort [codegen id : 5] -Input [2]: [c_customer_sk#7, c_current_addr_sk#8] -Arguments: [c_current_addr_sk#8 ASC NULLS FIRST], false, 0 +Input [2]: [c_customer_sk#6, c_current_addr_sk#7] +Arguments: [c_current_addr_sk#7 ASC NULLS FIRST], false, 0 (14) Scan parquet default.customer_address -Output [3]: [ca_address_sk#10, ca_state#11, ca_zip#12] +Output [3]: [ca_address_sk#8, ca_state#9, ca_zip#10] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 6] -Input [3]: [ca_address_sk#10, ca_state#11, ca_zip#12] +Input [3]: [ca_address_sk#8, ca_state#9, ca_zip#10] (16) Filter [codegen id : 6] -Input [3]: [ca_address_sk#10, ca_state#11, ca_zip#12] -Condition : isnotnull(ca_address_sk#10) +Input [3]: [ca_address_sk#8, ca_state#9, ca_zip#10] +Condition : isnotnull(ca_address_sk#8) (17) Exchange -Input [3]: [ca_address_sk#10, ca_state#11, ca_zip#12] -Arguments: hashpartitioning(ca_address_sk#10, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [3]: [ca_address_sk#8, ca_state#9, ca_zip#10] +Arguments: hashpartitioning(ca_address_sk#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) Sort [codegen id : 7] -Input [3]: [ca_address_sk#10, ca_state#11, ca_zip#12] -Arguments: [ca_address_sk#10 ASC NULLS FIRST], false, 0 +Input [3]: [ca_address_sk#8, ca_state#9, ca_zip#10] +Arguments: [ca_address_sk#8 ASC NULLS FIRST], false, 0 (19) SortMergeJoin [codegen id : 8] -Left keys [1]: [c_current_addr_sk#8] -Right keys [1]: [ca_address_sk#10] +Left keys [1]: [c_current_addr_sk#7] +Right keys [1]: [ca_address_sk#8] Join condition: None (20) Project [codegen id : 8] -Output [3]: [c_customer_sk#7, ca_state#11, ca_zip#12] -Input [5]: [c_customer_sk#7, c_current_addr_sk#8, ca_address_sk#10, ca_state#11, ca_zip#12] +Output [3]: [c_customer_sk#6, ca_state#9, ca_zip#10] +Input [5]: [c_customer_sk#6, c_current_addr_sk#7, ca_address_sk#8, ca_state#9, ca_zip#10] (21) Exchange -Input [3]: [c_customer_sk#7, ca_state#11, ca_zip#12] -Arguments: hashpartitioning(c_customer_sk#7, 5), ENSURE_REQUIREMENTS, [id=#14] +Input [3]: [c_customer_sk#6, ca_state#9, ca_zip#10] +Arguments: hashpartitioning(c_customer_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=4] (22) Sort [codegen id : 9] -Input [3]: [c_customer_sk#7, ca_state#11, ca_zip#12] -Arguments: [c_customer_sk#7 ASC NULLS FIRST], false, 0 +Input [3]: [c_customer_sk#6, ca_state#9, ca_zip#10] +Arguments: [c_customer_sk#6 ASC NULLS FIRST], false, 0 (23) SortMergeJoin [codegen id : 10] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#7] -Join condition: ((substr(ca_zip#12, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR ca_state#11 IN (CA,WA,GA)) OR (cs_sales_price#2 > 500.00)) +Right keys [1]: [c_customer_sk#6] +Join condition: ((substr(ca_zip#10, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR ca_state#9 IN (CA,WA,GA)) OR (cs_sales_price#2 > 500.00)) (24) Project [codegen id : 10] -Output [2]: [cs_sales_price#2, ca_zip#12] -Input [5]: [cs_bill_customer_sk#1, cs_sales_price#2, c_customer_sk#7, ca_state#11, ca_zip#12] +Output [2]: [cs_sales_price#2, ca_zip#10] +Input [5]: [cs_bill_customer_sk#1, cs_sales_price#2, c_customer_sk#6, ca_state#9, ca_zip#10] (25) HashAggregate [codegen id : 10] -Input [2]: [cs_sales_price#2, ca_zip#12] -Keys [1]: [ca_zip#12] +Input [2]: [cs_sales_price#2, ca_zip#10] +Keys [1]: [ca_zip#10] Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#2))] -Aggregate Attributes [1]: [sum#15] -Results [2]: [ca_zip#12, sum#16] +Aggregate Attributes [1]: [sum#11] +Results [2]: [ca_zip#10, sum#12] (26) Exchange -Input [2]: [ca_zip#12, sum#16] -Arguments: hashpartitioning(ca_zip#12, 5), ENSURE_REQUIREMENTS, [id=#17] +Input [2]: [ca_zip#10, sum#12] +Arguments: hashpartitioning(ca_zip#10, 5), ENSURE_REQUIREMENTS, [plan_id=5] (27) HashAggregate [codegen id : 11] -Input [2]: [ca_zip#12, sum#16] -Keys [1]: [ca_zip#12] +Input [2]: [ca_zip#10, sum#12] +Keys [1]: [ca_zip#10] Functions [1]: [sum(UnscaledValue(cs_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#2))#18] -Results [2]: [ca_zip#12, MakeDecimal(sum(UnscaledValue(cs_sales_price#2))#18,17,2) AS sum(cs_sales_price)#19] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#2))#13] +Results [2]: [ca_zip#10, MakeDecimal(sum(UnscaledValue(cs_sales_price#2))#13,17,2) AS sum(cs_sales_price)#14] (28) TakeOrderedAndProject -Input [2]: [ca_zip#12, sum(cs_sales_price)#19] -Arguments: 100, [ca_zip#12 ASC NULLS FIRST], [ca_zip#12, sum(cs_sales_price)#19] +Input [2]: [ca_zip#10, sum(cs_sales_price)#14] +Arguments: 100, [ca_zip#10 ASC NULLS FIRST], [ca_zip#10, sum(cs_sales_price)#14] ===== Subqueries ===== @@ -167,25 +167,25 @@ BroadcastExchange (33) (29) Scan parquet default.date_dim -Output [3]: [d_date_sk#5, d_year#20, d_qoy#21] +Output [3]: [d_date_sk#5, d_year#15, d_qoy#16] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#5, d_year#20, d_qoy#21] +Input [3]: [d_date_sk#5, d_year#15, d_qoy#16] (31) Filter [codegen id : 1] -Input [3]: [d_date_sk#5, d_year#20, d_qoy#21] -Condition : ((((isnotnull(d_qoy#21) AND isnotnull(d_year#20)) AND (d_qoy#21 = 2)) AND (d_year#20 = 2001)) AND isnotnull(d_date_sk#5)) +Input [3]: [d_date_sk#5, d_year#15, d_qoy#16] +Condition : ((((isnotnull(d_qoy#16) AND isnotnull(d_year#15)) AND (d_qoy#16 = 2)) AND (d_year#15 = 2001)) AND isnotnull(d_date_sk#5)) (32) Project [codegen id : 1] Output [1]: [d_date_sk#5] -Input [3]: [d_date_sk#5, d_year#20, d_qoy#21] +Input [3]: [d_date_sk#5, d_year#15, d_qoy#16] (33) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt index fabc11130f172..47e84bc6e570f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt @@ -54,7 +54,7 @@ Condition : (isnotnull(c_customer_sk#5) AND isnotnull(c_current_addr_sk#6)) (7) BroadcastExchange Input [2]: [c_customer_sk#5, c_current_addr_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_bill_customer_sk#1] @@ -66,65 +66,65 @@ Output [3]: [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#6] Input [5]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3, c_customer_sk#5, c_current_addr_sk#6] (10) Scan parquet default.customer_address -Output [3]: [ca_address_sk#8, ca_state#9, ca_zip#10] +Output [3]: [ca_address_sk#7, ca_state#8, ca_zip#9] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [3]: [ca_address_sk#8, ca_state#9, ca_zip#10] +Input [3]: [ca_address_sk#7, ca_state#8, ca_zip#9] (12) Filter [codegen id : 2] -Input [3]: [ca_address_sk#8, ca_state#9, ca_zip#10] -Condition : isnotnull(ca_address_sk#8) +Input [3]: [ca_address_sk#7, ca_state#8, ca_zip#9] +Condition : isnotnull(ca_address_sk#7) (13) BroadcastExchange -Input [3]: [ca_address_sk#8, ca_state#9, ca_zip#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Input [3]: [ca_address_sk#7, ca_state#8, ca_zip#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 4] Left keys [1]: [c_current_addr_sk#6] -Right keys [1]: [ca_address_sk#8] -Join condition: ((substr(ca_zip#10, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR ca_state#9 IN (CA,WA,GA)) OR (cs_sales_price#2 > 500.00)) +Right keys [1]: [ca_address_sk#7] +Join condition: ((substr(ca_zip#9, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR ca_state#8 IN (CA,WA,GA)) OR (cs_sales_price#2 > 500.00)) (15) Project [codegen id : 4] -Output [3]: [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#10] -Input [6]: [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#6, ca_address_sk#8, ca_state#9, ca_zip#10] +Output [3]: [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#9] +Input [6]: [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#6, ca_address_sk#7, ca_state#8, ca_zip#9] (16) ReusedExchange [Reuses operator id: 27] -Output [1]: [d_date_sk#12] +Output [1]: [d_date_sk#10] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_sold_date_sk#3] -Right keys [1]: [d_date_sk#12] +Right keys [1]: [d_date_sk#10] Join condition: None (18) Project [codegen id : 4] -Output [2]: [cs_sales_price#2, ca_zip#10] -Input [4]: [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#10, d_date_sk#12] +Output [2]: [cs_sales_price#2, ca_zip#9] +Input [4]: [cs_sales_price#2, cs_sold_date_sk#3, ca_zip#9, d_date_sk#10] (19) HashAggregate [codegen id : 4] -Input [2]: [cs_sales_price#2, ca_zip#10] -Keys [1]: [ca_zip#10] +Input [2]: [cs_sales_price#2, ca_zip#9] +Keys [1]: [ca_zip#9] Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#2))] -Aggregate Attributes [1]: [sum#13] -Results [2]: [ca_zip#10, sum#14] +Aggregate Attributes [1]: [sum#11] +Results [2]: [ca_zip#9, sum#12] (20) Exchange -Input [2]: [ca_zip#10, sum#14] -Arguments: hashpartitioning(ca_zip#10, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [2]: [ca_zip#9, sum#12] +Arguments: hashpartitioning(ca_zip#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) HashAggregate [codegen id : 5] -Input [2]: [ca_zip#10, sum#14] -Keys [1]: [ca_zip#10] +Input [2]: [ca_zip#9, sum#12] +Keys [1]: [ca_zip#9] Functions [1]: [sum(UnscaledValue(cs_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#2))#16] -Results [2]: [ca_zip#10, MakeDecimal(sum(UnscaledValue(cs_sales_price#2))#16,17,2) AS sum(cs_sales_price)#17] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#2))#13] +Results [2]: [ca_zip#9, MakeDecimal(sum(UnscaledValue(cs_sales_price#2))#13,17,2) AS sum(cs_sales_price)#14] (22) TakeOrderedAndProject -Input [2]: [ca_zip#10, sum(cs_sales_price)#17] -Arguments: 100, [ca_zip#10 ASC NULLS FIRST], [ca_zip#10, sum(cs_sales_price)#17] +Input [2]: [ca_zip#9, sum(cs_sales_price)#14] +Arguments: 100, [ca_zip#9 ASC NULLS FIRST], [ca_zip#9, sum(cs_sales_price)#14] ===== Subqueries ===== @@ -137,25 +137,25 @@ BroadcastExchange (27) (23) Scan parquet default.date_dim -Output [3]: [d_date_sk#12, d_year#18, d_qoy#19] +Output [3]: [d_date_sk#10, d_year#15, d_qoy#16] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (24) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#12, d_year#18, d_qoy#19] +Input [3]: [d_date_sk#10, d_year#15, d_qoy#16] (25) Filter [codegen id : 1] -Input [3]: [d_date_sk#12, d_year#18, d_qoy#19] -Condition : ((((isnotnull(d_qoy#19) AND isnotnull(d_year#18)) AND (d_qoy#19 = 2)) AND (d_year#18 = 2001)) AND isnotnull(d_date_sk#12)) +Input [3]: [d_date_sk#10, d_year#15, d_qoy#16] +Condition : ((((isnotnull(d_qoy#16) AND isnotnull(d_year#15)) AND (d_qoy#16 = 2)) AND (d_year#15 = 2001)) AND isnotnull(d_date_sk#10)) (26) Project [codegen id : 1] -Output [1]: [d_date_sk#12] -Input [3]: [d_date_sk#12, d_year#18, d_qoy#19] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#15, d_qoy#16] (27) BroadcastExchange -Input [1]: [d_date_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt index cd26a5c2495dd..ed76e1ab09444 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt @@ -66,190 +66,190 @@ Input [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_wareho (5) Exchange Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] -Arguments: hashpartitioning(cs_order_number#5, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(cs_order_number#5, 5), ENSURE_REQUIREMENTS, [plan_id=1] (6) Sort [codegen id : 2] Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] Arguments: [cs_order_number#5 ASC NULLS FIRST], false, 0 (7) Scan parquet default.catalog_sales -Output [3]: [cs_warehouse_sk#10, cs_order_number#11, cs_sold_date_sk#12] +Output [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_sales] ReadSchema: struct (8) ColumnarToRow [codegen id : 3] -Input [3]: [cs_warehouse_sk#10, cs_order_number#11, cs_sold_date_sk#12] +Input [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] (9) Project [codegen id : 3] -Output [2]: [cs_warehouse_sk#10, cs_order_number#11] -Input [3]: [cs_warehouse_sk#10, cs_order_number#11, cs_sold_date_sk#12] +Output [2]: [cs_warehouse_sk#9, cs_order_number#10] +Input [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] (10) Exchange -Input [2]: [cs_warehouse_sk#10, cs_order_number#11] -Arguments: hashpartitioning(cs_order_number#11, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [2]: [cs_warehouse_sk#9, cs_order_number#10] +Arguments: hashpartitioning(cs_order_number#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [2]: [cs_warehouse_sk#10, cs_order_number#11] -Arguments: [cs_order_number#11 ASC NULLS FIRST], false, 0 +Input [2]: [cs_warehouse_sk#9, cs_order_number#10] +Arguments: [cs_order_number#10 ASC NULLS FIRST], false, 0 (12) SortMergeJoin [codegen id : 5] Left keys [1]: [cs_order_number#5] -Right keys [1]: [cs_order_number#11] -Join condition: NOT (cs_warehouse_sk#4 = cs_warehouse_sk#10) +Right keys [1]: [cs_order_number#10] +Join condition: NOT (cs_warehouse_sk#4 = cs_warehouse_sk#9) (13) Project [codegen id : 5] Output [6]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] (14) Scan parquet default.catalog_returns -Output [2]: [cr_order_number#14, cr_returned_date_sk#15] +Output [2]: [cr_order_number#12, cr_returned_date_sk#13] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] ReadSchema: struct (15) ColumnarToRow [codegen id : 6] -Input [2]: [cr_order_number#14, cr_returned_date_sk#15] +Input [2]: [cr_order_number#12, cr_returned_date_sk#13] (16) Project [codegen id : 6] -Output [1]: [cr_order_number#14] -Input [2]: [cr_order_number#14, cr_returned_date_sk#15] +Output [1]: [cr_order_number#12] +Input [2]: [cr_order_number#12, cr_returned_date_sk#13] (17) Exchange -Input [1]: [cr_order_number#14] -Arguments: hashpartitioning(cr_order_number#14, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [1]: [cr_order_number#12] +Arguments: hashpartitioning(cr_order_number#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) Sort [codegen id : 7] -Input [1]: [cr_order_number#14] -Arguments: [cr_order_number#14 ASC NULLS FIRST], false, 0 +Input [1]: [cr_order_number#12] +Arguments: [cr_order_number#12 ASC NULLS FIRST], false, 0 (19) SortMergeJoin [codegen id : 11] Left keys [1]: [cs_order_number#5] -Right keys [1]: [cr_order_number#14] +Right keys [1]: [cr_order_number#12] Join condition: None (20) Scan parquet default.customer_address -Output [2]: [ca_address_sk#17, ca_state#18] +Output [2]: [ca_address_sk#14, ca_state#15] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 8] -Input [2]: [ca_address_sk#17, ca_state#18] +Input [2]: [ca_address_sk#14, ca_state#15] (22) Filter [codegen id : 8] -Input [2]: [ca_address_sk#17, ca_state#18] -Condition : ((isnotnull(ca_state#18) AND (ca_state#18 = GA)) AND isnotnull(ca_address_sk#17)) +Input [2]: [ca_address_sk#14, ca_state#15] +Condition : ((isnotnull(ca_state#15) AND (ca_state#15 = GA)) AND isnotnull(ca_address_sk#14)) (23) Project [codegen id : 8] -Output [1]: [ca_address_sk#17] -Input [2]: [ca_address_sk#17, ca_state#18] +Output [1]: [ca_address_sk#14] +Input [2]: [ca_address_sk#14, ca_state#15] (24) BroadcastExchange -Input [1]: [ca_address_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] +Input [1]: [ca_address_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (25) BroadcastHashJoin [codegen id : 11] Left keys [1]: [cs_ship_addr_sk#2] -Right keys [1]: [ca_address_sk#17] +Right keys [1]: [ca_address_sk#14] Join condition: None (26) Project [codegen id : 11] Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] -Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, ca_address_sk#17] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, ca_address_sk#14] (27) Scan parquet default.call_center -Output [2]: [cc_call_center_sk#20, cc_county#21] +Output [2]: [cc_call_center_sk#16, cc_county#17] Batched: true Location [not included in comparison]/{warehouse_dir}/call_center] PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 9] -Input [2]: [cc_call_center_sk#20, cc_county#21] +Input [2]: [cc_call_center_sk#16, cc_county#17] (29) Filter [codegen id : 9] -Input [2]: [cc_call_center_sk#20, cc_county#21] -Condition : ((isnotnull(cc_county#21) AND (cc_county#21 = Williamson County)) AND isnotnull(cc_call_center_sk#20)) +Input [2]: [cc_call_center_sk#16, cc_county#17] +Condition : ((isnotnull(cc_county#17) AND (cc_county#17 = Williamson County)) AND isnotnull(cc_call_center_sk#16)) (30) Project [codegen id : 9] -Output [1]: [cc_call_center_sk#20] -Input [2]: [cc_call_center_sk#20, cc_county#21] +Output [1]: [cc_call_center_sk#16] +Input [2]: [cc_call_center_sk#16, cc_county#17] (31) BroadcastExchange -Input [1]: [cc_call_center_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Input [1]: [cc_call_center_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (32) BroadcastHashJoin [codegen id : 11] Left keys [1]: [cs_call_center_sk#3] -Right keys [1]: [cc_call_center_sk#20] +Right keys [1]: [cc_call_center_sk#16] Join condition: None (33) Project [codegen id : 11] Output [4]: [cs_ship_date_sk#1, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] -Input [6]: [cs_ship_date_sk#1, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cc_call_center_sk#20] +Input [6]: [cs_ship_date_sk#1, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cc_call_center_sk#16] (34) Scan parquet default.date_dim -Output [2]: [d_date_sk#23, d_date#24] +Output [2]: [d_date_sk#18, d_date#19] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2002-02-01), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 10] -Input [2]: [d_date_sk#23, d_date#24] +Input [2]: [d_date_sk#18, d_date#19] (36) Filter [codegen id : 10] -Input [2]: [d_date_sk#23, d_date#24] -Condition : (((isnotnull(d_date#24) AND (d_date#24 >= 2002-02-01)) AND (d_date#24 <= 2002-04-02)) AND isnotnull(d_date_sk#23)) +Input [2]: [d_date_sk#18, d_date#19] +Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 2002-02-01)) AND (d_date#19 <= 2002-04-02)) AND isnotnull(d_date_sk#18)) (37) Project [codegen id : 10] -Output [1]: [d_date_sk#23] -Input [2]: [d_date_sk#23, d_date#24] +Output [1]: [d_date_sk#18] +Input [2]: [d_date_sk#18, d_date#19] (38) BroadcastExchange -Input [1]: [d_date_sk#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] (39) BroadcastHashJoin [codegen id : 11] Left keys [1]: [cs_ship_date_sk#1] -Right keys [1]: [d_date_sk#23] +Right keys [1]: [d_date_sk#18] Join condition: None (40) Project [codegen id : 11] Output [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] -Input [5]: [cs_ship_date_sk#1, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, d_date_sk#23] +Input [5]: [cs_ship_date_sk#1, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, d_date_sk#18] (41) HashAggregate [codegen id : 11] Input [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] Keys [1]: [cs_order_number#5] Functions [2]: [partial_sum(UnscaledValue(cs_ext_ship_cost#6)), partial_sum(UnscaledValue(cs_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#26, sum(UnscaledValue(cs_net_profit#7))#27] -Results [3]: [cs_order_number#5, sum#28, sum#29] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21] +Results [3]: [cs_order_number#5, sum#22, sum#23] (42) HashAggregate [codegen id : 11] -Input [3]: [cs_order_number#5, sum#28, sum#29] +Input [3]: [cs_order_number#5, sum#22, sum#23] Keys [1]: [cs_order_number#5] Functions [2]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#26, sum(UnscaledValue(cs_net_profit#7))#27] -Results [3]: [cs_order_number#5, sum#28, sum#29] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21] +Results [3]: [cs_order_number#5, sum#22, sum#23] (43) HashAggregate [codegen id : 11] -Input [3]: [cs_order_number#5, sum#28, sum#29] +Input [3]: [cs_order_number#5, sum#22, sum#23] Keys: [] Functions [3]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7)), partial_count(distinct cs_order_number#5)] -Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#26, sum(UnscaledValue(cs_net_profit#7))#27, count(cs_order_number#5)#30] -Results [3]: [sum#28, sum#29, count#31] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21, count(cs_order_number#5)#24] +Results [3]: [sum#22, sum#23, count#25] (44) Exchange -Input [3]: [sum#28, sum#29, count#31] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#32] +Input [3]: [sum#22, sum#23, count#25] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (45) HashAggregate [codegen id : 12] -Input [3]: [sum#28, sum#29, count#31] +Input [3]: [sum#22, sum#23, count#25] Keys: [] Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net_profit#7)), count(distinct cs_order_number#5)] -Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#26, sum(UnscaledValue(cs_net_profit#7))#27, count(cs_order_number#5)#30] -Results [3]: [count(cs_order_number#5)#30 AS order count #33, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#26,17,2) AS total shipping cost #34, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#27,17,2) AS total net profit #35] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21, count(cs_order_number#5)#24] +Results [3]: [count(cs_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#20,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#21,17,2) AS total net profit #28] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt index 210be116962bb..a88526686772c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt @@ -66,190 +66,190 @@ Input [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_wareho (5) Exchange Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] -Arguments: hashpartitioning(cs_order_number#5, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(cs_order_number#5, 5), ENSURE_REQUIREMENTS, [plan_id=1] (6) Sort [codegen id : 2] Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] Arguments: [cs_order_number#5 ASC NULLS FIRST], false, 0 (7) Scan parquet default.catalog_sales -Output [3]: [cs_warehouse_sk#10, cs_order_number#11, cs_sold_date_sk#12] +Output [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_sales] ReadSchema: struct (8) ColumnarToRow [codegen id : 3] -Input [3]: [cs_warehouse_sk#10, cs_order_number#11, cs_sold_date_sk#12] +Input [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] (9) Project [codegen id : 3] -Output [2]: [cs_warehouse_sk#10, cs_order_number#11] -Input [3]: [cs_warehouse_sk#10, cs_order_number#11, cs_sold_date_sk#12] +Output [2]: [cs_warehouse_sk#9, cs_order_number#10] +Input [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11] (10) Exchange -Input [2]: [cs_warehouse_sk#10, cs_order_number#11] -Arguments: hashpartitioning(cs_order_number#11, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [2]: [cs_warehouse_sk#9, cs_order_number#10] +Arguments: hashpartitioning(cs_order_number#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [2]: [cs_warehouse_sk#10, cs_order_number#11] -Arguments: [cs_order_number#11 ASC NULLS FIRST], false, 0 +Input [2]: [cs_warehouse_sk#9, cs_order_number#10] +Arguments: [cs_order_number#10 ASC NULLS FIRST], false, 0 (12) SortMergeJoin [codegen id : 5] Left keys [1]: [cs_order_number#5] -Right keys [1]: [cs_order_number#11] -Join condition: NOT (cs_warehouse_sk#4 = cs_warehouse_sk#10) +Right keys [1]: [cs_order_number#10] +Join condition: NOT (cs_warehouse_sk#4 = cs_warehouse_sk#9) (13) Project [codegen id : 5] Output [6]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] (14) Scan parquet default.catalog_returns -Output [2]: [cr_order_number#14, cr_returned_date_sk#15] +Output [2]: [cr_order_number#12, cr_returned_date_sk#13] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] ReadSchema: struct (15) ColumnarToRow [codegen id : 6] -Input [2]: [cr_order_number#14, cr_returned_date_sk#15] +Input [2]: [cr_order_number#12, cr_returned_date_sk#13] (16) Project [codegen id : 6] -Output [1]: [cr_order_number#14] -Input [2]: [cr_order_number#14, cr_returned_date_sk#15] +Output [1]: [cr_order_number#12] +Input [2]: [cr_order_number#12, cr_returned_date_sk#13] (17) Exchange -Input [1]: [cr_order_number#14] -Arguments: hashpartitioning(cr_order_number#14, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [1]: [cr_order_number#12] +Arguments: hashpartitioning(cr_order_number#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) Sort [codegen id : 7] -Input [1]: [cr_order_number#14] -Arguments: [cr_order_number#14 ASC NULLS FIRST], false, 0 +Input [1]: [cr_order_number#12] +Arguments: [cr_order_number#12 ASC NULLS FIRST], false, 0 (19) SortMergeJoin [codegen id : 11] Left keys [1]: [cs_order_number#5] -Right keys [1]: [cr_order_number#14] +Right keys [1]: [cr_order_number#12] Join condition: None (20) Scan parquet default.date_dim -Output [2]: [d_date_sk#17, d_date#18] +Output [2]: [d_date_sk#14, d_date#15] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2002-02-01), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 8] -Input [2]: [d_date_sk#17, d_date#18] +Input [2]: [d_date_sk#14, d_date#15] (22) Filter [codegen id : 8] -Input [2]: [d_date_sk#17, d_date#18] -Condition : (((isnotnull(d_date#18) AND (d_date#18 >= 2002-02-01)) AND (d_date#18 <= 2002-04-02)) AND isnotnull(d_date_sk#17)) +Input [2]: [d_date_sk#14, d_date#15] +Condition : (((isnotnull(d_date#15) AND (d_date#15 >= 2002-02-01)) AND (d_date#15 <= 2002-04-02)) AND isnotnull(d_date_sk#14)) (23) Project [codegen id : 8] -Output [1]: [d_date_sk#17] -Input [2]: [d_date_sk#17, d_date#18] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_date#15] (24) BroadcastExchange -Input [1]: [d_date_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (25) BroadcastHashJoin [codegen id : 11] Left keys [1]: [cs_ship_date_sk#1] -Right keys [1]: [d_date_sk#17] +Right keys [1]: [d_date_sk#14] Join condition: None (26) Project [codegen id : 11] Output [5]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] -Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, d_date_sk#17] +Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, d_date_sk#14] (27) Scan parquet default.customer_address -Output [2]: [ca_address_sk#20, ca_state#21] +Output [2]: [ca_address_sk#16, ca_state#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 9] -Input [2]: [ca_address_sk#20, ca_state#21] +Input [2]: [ca_address_sk#16, ca_state#17] (29) Filter [codegen id : 9] -Input [2]: [ca_address_sk#20, ca_state#21] -Condition : ((isnotnull(ca_state#21) AND (ca_state#21 = GA)) AND isnotnull(ca_address_sk#20)) +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = GA)) AND isnotnull(ca_address_sk#16)) (30) Project [codegen id : 9] -Output [1]: [ca_address_sk#20] -Input [2]: [ca_address_sk#20, ca_state#21] +Output [1]: [ca_address_sk#16] +Input [2]: [ca_address_sk#16, ca_state#17] (31) BroadcastExchange -Input [1]: [ca_address_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Input [1]: [ca_address_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (32) BroadcastHashJoin [codegen id : 11] Left keys [1]: [cs_ship_addr_sk#2] -Right keys [1]: [ca_address_sk#20] +Right keys [1]: [ca_address_sk#16] Join condition: None (33) Project [codegen id : 11] Output [4]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] -Input [6]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, ca_address_sk#20] +Input [6]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, ca_address_sk#16] (34) Scan parquet default.call_center -Output [2]: [cc_call_center_sk#23, cc_county#24] +Output [2]: [cc_call_center_sk#18, cc_county#19] Batched: true Location [not included in comparison]/{warehouse_dir}/call_center] PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 10] -Input [2]: [cc_call_center_sk#23, cc_county#24] +Input [2]: [cc_call_center_sk#18, cc_county#19] (36) Filter [codegen id : 10] -Input [2]: [cc_call_center_sk#23, cc_county#24] -Condition : ((isnotnull(cc_county#24) AND (cc_county#24 = Williamson County)) AND isnotnull(cc_call_center_sk#23)) +Input [2]: [cc_call_center_sk#18, cc_county#19] +Condition : ((isnotnull(cc_county#19) AND (cc_county#19 = Williamson County)) AND isnotnull(cc_call_center_sk#18)) (37) Project [codegen id : 10] -Output [1]: [cc_call_center_sk#23] -Input [2]: [cc_call_center_sk#23, cc_county#24] +Output [1]: [cc_call_center_sk#18] +Input [2]: [cc_call_center_sk#18, cc_county#19] (38) BroadcastExchange -Input [1]: [cc_call_center_sk#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [cc_call_center_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] (39) BroadcastHashJoin [codegen id : 11] Left keys [1]: [cs_call_center_sk#3] -Right keys [1]: [cc_call_center_sk#23] +Right keys [1]: [cc_call_center_sk#18] Join condition: None (40) Project [codegen id : 11] Output [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] -Input [5]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cc_call_center_sk#23] +Input [5]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cc_call_center_sk#18] (41) HashAggregate [codegen id : 11] Input [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7] Keys [1]: [cs_order_number#5] Functions [2]: [partial_sum(UnscaledValue(cs_ext_ship_cost#6)), partial_sum(UnscaledValue(cs_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#26, sum(UnscaledValue(cs_net_profit#7))#27] -Results [3]: [cs_order_number#5, sum#28, sum#29] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21] +Results [3]: [cs_order_number#5, sum#22, sum#23] (42) HashAggregate [codegen id : 11] -Input [3]: [cs_order_number#5, sum#28, sum#29] +Input [3]: [cs_order_number#5, sum#22, sum#23] Keys [1]: [cs_order_number#5] Functions [2]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#26, sum(UnscaledValue(cs_net_profit#7))#27] -Results [3]: [cs_order_number#5, sum#28, sum#29] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21] +Results [3]: [cs_order_number#5, sum#22, sum#23] (43) HashAggregate [codegen id : 11] -Input [3]: [cs_order_number#5, sum#28, sum#29] +Input [3]: [cs_order_number#5, sum#22, sum#23] Keys: [] Functions [3]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7)), partial_count(distinct cs_order_number#5)] -Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#26, sum(UnscaledValue(cs_net_profit#7))#27, count(cs_order_number#5)#30] -Results [3]: [sum#28, sum#29, count#31] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21, count(cs_order_number#5)#24] +Results [3]: [sum#22, sum#23, count#25] (44) Exchange -Input [3]: [sum#28, sum#29, count#31] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#32] +Input [3]: [sum#22, sum#23, count#25] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (45) HashAggregate [codegen id : 12] -Input [3]: [sum#28, sum#29, count#31] +Input [3]: [sum#22, sum#23, count#25] Keys: [] Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net_profit#7)), count(distinct cs_order_number#5)] -Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#26, sum(UnscaledValue(cs_net_profit#7))#27, count(cs_order_number#5)#30] -Results [3]: [count(cs_order_number#5)#30 AS order count #33, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#26,17,2) AS total shipping cost #34, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#27,17,2) AS total net profit #35] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21, count(cs_order_number#5)#24] +Results [3]: [count(cs_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#20,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#21,17,2) AS total net profit #28] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt index d61798f6ad06e..53df5af321707 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt @@ -93,7 +93,7 @@ Condition : isnotnull(s_store_sk#9) (10) BroadcastExchange Input [2]: [s_store_sk#9, s_state#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_store_sk#3] @@ -106,168 +106,168 @@ Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s (13) Exchange Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#12] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (14) Sort [codegen id : 4] Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (15) Scan parquet default.item -Output [3]: [i_item_sk#13, i_item_id#14, i_item_desc#15] +Output [3]: [i_item_sk#11, i_item_id#12, i_item_desc#13] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (16) ColumnarToRow [codegen id : 5] -Input [3]: [i_item_sk#13, i_item_id#14, i_item_desc#15] +Input [3]: [i_item_sk#11, i_item_id#12, i_item_desc#13] (17) Filter [codegen id : 5] -Input [3]: [i_item_sk#13, i_item_id#14, i_item_desc#15] -Condition : isnotnull(i_item_sk#13) +Input [3]: [i_item_sk#11, i_item_id#12, i_item_desc#13] +Condition : isnotnull(i_item_sk#11) (18) Exchange -Input [3]: [i_item_sk#13, i_item_id#14, i_item_desc#15] -Arguments: hashpartitioning(i_item_sk#13, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [3]: [i_item_sk#11, i_item_id#12, i_item_desc#13] +Arguments: hashpartitioning(i_item_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) Sort [codegen id : 6] -Input [3]: [i_item_sk#13, i_item_id#14, i_item_desc#15] -Arguments: [i_item_sk#13 ASC NULLS FIRST], false, 0 +Input [3]: [i_item_sk#11, i_item_id#12, i_item_desc#13] +Arguments: [i_item_sk#11 ASC NULLS FIRST], false, 0 (20) SortMergeJoin [codegen id : 7] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#13] +Right keys [1]: [i_item_sk#11] Join condition: None (21) Project [codegen id : 7] -Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15] -Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10, i_item_sk#13, i_item_id#14, i_item_desc#15] +Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10, i_item_id#12, i_item_desc#13] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10, i_item_sk#11, i_item_id#12, i_item_desc#13] (22) Exchange -Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15] -Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [id=#17] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10, i_item_id#12, i_item_desc#13] +Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [plan_id=4] (23) Sort [codegen id : 8] -Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10, i_item_id#12, i_item_desc#13] Arguments: [ss_customer_sk#2 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0 (24) Scan parquet default.store_returns -Output [5]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21, sr_returned_date_sk#22] +Output [5]: [sr_item_sk#14, sr_customer_sk#15, sr_ticket_number#16, sr_return_quantity#17, sr_returned_date_sk#18] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(sr_returned_date_sk#22), dynamicpruningexpression(sr_returned_date_sk#22 IN dynamicpruning#23)] +PartitionFilters: [isnotnull(sr_returned_date_sk#18), dynamicpruningexpression(sr_returned_date_sk#18 IN dynamicpruning#19)] PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] ReadSchema: struct (25) ColumnarToRow [codegen id : 10] -Input [5]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21, sr_returned_date_sk#22] +Input [5]: [sr_item_sk#14, sr_customer_sk#15, sr_ticket_number#16, sr_return_quantity#17, sr_returned_date_sk#18] (26) Filter [codegen id : 10] -Input [5]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21, sr_returned_date_sk#22] -Condition : ((isnotnull(sr_customer_sk#19) AND isnotnull(sr_item_sk#18)) AND isnotnull(sr_ticket_number#20)) +Input [5]: [sr_item_sk#14, sr_customer_sk#15, sr_ticket_number#16, sr_return_quantity#17, sr_returned_date_sk#18] +Condition : ((isnotnull(sr_customer_sk#15) AND isnotnull(sr_item_sk#14)) AND isnotnull(sr_ticket_number#16)) (27) ReusedExchange [Reuses operator id: 59] -Output [1]: [d_date_sk#24] +Output [1]: [d_date_sk#20] (28) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [sr_returned_date_sk#22] -Right keys [1]: [d_date_sk#24] +Left keys [1]: [sr_returned_date_sk#18] +Right keys [1]: [d_date_sk#20] Join condition: None (29) Project [codegen id : 10] -Output [4]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21] -Input [6]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21, sr_returned_date_sk#22, d_date_sk#24] +Output [4]: [sr_item_sk#14, sr_customer_sk#15, sr_ticket_number#16, sr_return_quantity#17] +Input [6]: [sr_item_sk#14, sr_customer_sk#15, sr_ticket_number#16, sr_return_quantity#17, sr_returned_date_sk#18, d_date_sk#20] (30) Exchange -Input [4]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21] -Arguments: hashpartitioning(sr_customer_sk#19, sr_item_sk#18, sr_ticket_number#20, 5), ENSURE_REQUIREMENTS, [id=#25] +Input [4]: [sr_item_sk#14, sr_customer_sk#15, sr_ticket_number#16, sr_return_quantity#17] +Arguments: hashpartitioning(sr_customer_sk#15, sr_item_sk#14, sr_ticket_number#16, 5), ENSURE_REQUIREMENTS, [plan_id=5] (31) Sort [codegen id : 11] -Input [4]: [sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21] -Arguments: [sr_customer_sk#19 ASC NULLS FIRST, sr_item_sk#18 ASC NULLS FIRST, sr_ticket_number#20 ASC NULLS FIRST], false, 0 +Input [4]: [sr_item_sk#14, sr_customer_sk#15, sr_ticket_number#16, sr_return_quantity#17] +Arguments: [sr_customer_sk#15 ASC NULLS FIRST, sr_item_sk#14 ASC NULLS FIRST, sr_ticket_number#16 ASC NULLS FIRST], false, 0 (32) SortMergeJoin [codegen id : 12] Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4] -Right keys [3]: [sr_customer_sk#19, sr_item_sk#18, sr_ticket_number#20] +Right keys [3]: [sr_customer_sk#15, sr_item_sk#14, sr_ticket_number#16] Join condition: None (33) Project [codegen id : 12] -Output [7]: [ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15, sr_item_sk#18, sr_customer_sk#19, sr_return_quantity#21] -Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15, sr_item_sk#18, sr_customer_sk#19, sr_ticket_number#20, sr_return_quantity#21] +Output [7]: [ss_quantity#5, s_state#10, i_item_id#12, i_item_desc#13, sr_item_sk#14, sr_customer_sk#15, sr_return_quantity#17] +Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10, i_item_id#12, i_item_desc#13, sr_item_sk#14, sr_customer_sk#15, sr_ticket_number#16, sr_return_quantity#17] (34) Exchange -Input [7]: [ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15, sr_item_sk#18, sr_customer_sk#19, sr_return_quantity#21] -Arguments: hashpartitioning(sr_customer_sk#19, sr_item_sk#18, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [7]: [ss_quantity#5, s_state#10, i_item_id#12, i_item_desc#13, sr_item_sk#14, sr_customer_sk#15, sr_return_quantity#17] +Arguments: hashpartitioning(sr_customer_sk#15, sr_item_sk#14, 5), ENSURE_REQUIREMENTS, [plan_id=6] (35) Sort [codegen id : 13] -Input [7]: [ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15, sr_item_sk#18, sr_customer_sk#19, sr_return_quantity#21] -Arguments: [sr_customer_sk#19 ASC NULLS FIRST, sr_item_sk#18 ASC NULLS FIRST], false, 0 +Input [7]: [ss_quantity#5, s_state#10, i_item_id#12, i_item_desc#13, sr_item_sk#14, sr_customer_sk#15, sr_return_quantity#17] +Arguments: [sr_customer_sk#15 ASC NULLS FIRST, sr_item_sk#14 ASC NULLS FIRST], false, 0 (36) Scan parquet default.catalog_sales -Output [4]: [cs_bill_customer_sk#27, cs_item_sk#28, cs_quantity#29, cs_sold_date_sk#30] +Output [4]: [cs_bill_customer_sk#21, cs_item_sk#22, cs_quantity#23, cs_sold_date_sk#24] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#30), dynamicpruningexpression(cs_sold_date_sk#30 IN dynamicpruning#23)] +PartitionFilters: [isnotnull(cs_sold_date_sk#24), dynamicpruningexpression(cs_sold_date_sk#24 IN dynamicpruning#19)] PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] ReadSchema: struct (37) ColumnarToRow [codegen id : 15] -Input [4]: [cs_bill_customer_sk#27, cs_item_sk#28, cs_quantity#29, cs_sold_date_sk#30] +Input [4]: [cs_bill_customer_sk#21, cs_item_sk#22, cs_quantity#23, cs_sold_date_sk#24] (38) Filter [codegen id : 15] -Input [4]: [cs_bill_customer_sk#27, cs_item_sk#28, cs_quantity#29, cs_sold_date_sk#30] -Condition : (isnotnull(cs_bill_customer_sk#27) AND isnotnull(cs_item_sk#28)) +Input [4]: [cs_bill_customer_sk#21, cs_item_sk#22, cs_quantity#23, cs_sold_date_sk#24] +Condition : (isnotnull(cs_bill_customer_sk#21) AND isnotnull(cs_item_sk#22)) (39) ReusedExchange [Reuses operator id: 59] -Output [1]: [d_date_sk#31] +Output [1]: [d_date_sk#25] (40) BroadcastHashJoin [codegen id : 15] -Left keys [1]: [cs_sold_date_sk#30] -Right keys [1]: [d_date_sk#31] +Left keys [1]: [cs_sold_date_sk#24] +Right keys [1]: [d_date_sk#25] Join condition: None (41) Project [codegen id : 15] -Output [3]: [cs_bill_customer_sk#27, cs_item_sk#28, cs_quantity#29] -Input [5]: [cs_bill_customer_sk#27, cs_item_sk#28, cs_quantity#29, cs_sold_date_sk#30, d_date_sk#31] +Output [3]: [cs_bill_customer_sk#21, cs_item_sk#22, cs_quantity#23] +Input [5]: [cs_bill_customer_sk#21, cs_item_sk#22, cs_quantity#23, cs_sold_date_sk#24, d_date_sk#25] (42) Exchange -Input [3]: [cs_bill_customer_sk#27, cs_item_sk#28, cs_quantity#29] -Arguments: hashpartitioning(cs_bill_customer_sk#27, cs_item_sk#28, 5), ENSURE_REQUIREMENTS, [id=#32] +Input [3]: [cs_bill_customer_sk#21, cs_item_sk#22, cs_quantity#23] +Arguments: hashpartitioning(cs_bill_customer_sk#21, cs_item_sk#22, 5), ENSURE_REQUIREMENTS, [plan_id=7] (43) Sort [codegen id : 16] -Input [3]: [cs_bill_customer_sk#27, cs_item_sk#28, cs_quantity#29] -Arguments: [cs_bill_customer_sk#27 ASC NULLS FIRST, cs_item_sk#28 ASC NULLS FIRST], false, 0 +Input [3]: [cs_bill_customer_sk#21, cs_item_sk#22, cs_quantity#23] +Arguments: [cs_bill_customer_sk#21 ASC NULLS FIRST, cs_item_sk#22 ASC NULLS FIRST], false, 0 (44) SortMergeJoin [codegen id : 17] -Left keys [2]: [sr_customer_sk#19, sr_item_sk#18] -Right keys [2]: [cs_bill_customer_sk#27, cs_item_sk#28] +Left keys [2]: [sr_customer_sk#15, sr_item_sk#14] +Right keys [2]: [cs_bill_customer_sk#21, cs_item_sk#22] Join condition: None (45) Project [codegen id : 17] -Output [6]: [ss_quantity#5, sr_return_quantity#21, cs_quantity#29, s_state#10, i_item_id#14, i_item_desc#15] -Input [10]: [ss_quantity#5, s_state#10, i_item_id#14, i_item_desc#15, sr_item_sk#18, sr_customer_sk#19, sr_return_quantity#21, cs_bill_customer_sk#27, cs_item_sk#28, cs_quantity#29] +Output [6]: [ss_quantity#5, sr_return_quantity#17, cs_quantity#23, s_state#10, i_item_id#12, i_item_desc#13] +Input [10]: [ss_quantity#5, s_state#10, i_item_id#12, i_item_desc#13, sr_item_sk#14, sr_customer_sk#15, sr_return_quantity#17, cs_bill_customer_sk#21, cs_item_sk#22, cs_quantity#23] (46) HashAggregate [codegen id : 17] -Input [6]: [ss_quantity#5, sr_return_quantity#21, cs_quantity#29, s_state#10, i_item_id#14, i_item_desc#15] -Keys [3]: [i_item_id#14, i_item_desc#15, s_state#10] -Functions [9]: [partial_count(ss_quantity#5), partial_avg(ss_quantity#5), partial_stddev_samp(cast(ss_quantity#5 as double)), partial_count(sr_return_quantity#21), partial_avg(sr_return_quantity#21), partial_stddev_samp(cast(sr_return_quantity#21 as double)), partial_count(cs_quantity#29), partial_avg(cs_quantity#29), partial_stddev_samp(cast(cs_quantity#29 as double))] -Aggregate Attributes [18]: [count#33, sum#34, count#35, n#36, avg#37, m2#38, count#39, sum#40, count#41, n#42, avg#43, m2#44, count#45, sum#46, count#47, n#48, avg#49, m2#50] -Results [21]: [i_item_id#14, i_item_desc#15, s_state#10, count#51, sum#52, count#53, n#54, avg#55, m2#56, count#57, sum#58, count#59, n#60, avg#61, m2#62, count#63, sum#64, count#65, n#66, avg#67, m2#68] +Input [6]: [ss_quantity#5, sr_return_quantity#17, cs_quantity#23, s_state#10, i_item_id#12, i_item_desc#13] +Keys [3]: [i_item_id#12, i_item_desc#13, s_state#10] +Functions [9]: [partial_count(ss_quantity#5), partial_avg(ss_quantity#5), partial_stddev_samp(cast(ss_quantity#5 as double)), partial_count(sr_return_quantity#17), partial_avg(sr_return_quantity#17), partial_stddev_samp(cast(sr_return_quantity#17 as double)), partial_count(cs_quantity#23), partial_avg(cs_quantity#23), partial_stddev_samp(cast(cs_quantity#23 as double))] +Aggregate Attributes [18]: [count#26, sum#27, count#28, n#29, avg#30, m2#31, count#32, sum#33, count#34, n#35, avg#36, m2#37, count#38, sum#39, count#40, n#41, avg#42, m2#43] +Results [21]: [i_item_id#12, i_item_desc#13, s_state#10, count#44, sum#45, count#46, n#47, avg#48, m2#49, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61] (47) Exchange -Input [21]: [i_item_id#14, i_item_desc#15, s_state#10, count#51, sum#52, count#53, n#54, avg#55, m2#56, count#57, sum#58, count#59, n#60, avg#61, m2#62, count#63, sum#64, count#65, n#66, avg#67, m2#68] -Arguments: hashpartitioning(i_item_id#14, i_item_desc#15, s_state#10, 5), ENSURE_REQUIREMENTS, [id=#69] +Input [21]: [i_item_id#12, i_item_desc#13, s_state#10, count#44, sum#45, count#46, n#47, avg#48, m2#49, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61] +Arguments: hashpartitioning(i_item_id#12, i_item_desc#13, s_state#10, 5), ENSURE_REQUIREMENTS, [plan_id=8] (48) HashAggregate [codegen id : 18] -Input [21]: [i_item_id#14, i_item_desc#15, s_state#10, count#51, sum#52, count#53, n#54, avg#55, m2#56, count#57, sum#58, count#59, n#60, avg#61, m2#62, count#63, sum#64, count#65, n#66, avg#67, m2#68] -Keys [3]: [i_item_id#14, i_item_desc#15, s_state#10] -Functions [9]: [count(ss_quantity#5), avg(ss_quantity#5), stddev_samp(cast(ss_quantity#5 as double)), count(sr_return_quantity#21), avg(sr_return_quantity#21), stddev_samp(cast(sr_return_quantity#21 as double)), count(cs_quantity#29), avg(cs_quantity#29), stddev_samp(cast(cs_quantity#29 as double))] -Aggregate Attributes [9]: [count(ss_quantity#5)#70, avg(ss_quantity#5)#71, stddev_samp(cast(ss_quantity#5 as double))#72, count(sr_return_quantity#21)#73, avg(sr_return_quantity#21)#74, stddev_samp(cast(sr_return_quantity#21 as double))#75, count(cs_quantity#29)#76, avg(cs_quantity#29)#77, stddev_samp(cast(cs_quantity#29 as double))#78] -Results [15]: [i_item_id#14, i_item_desc#15, s_state#10, count(ss_quantity#5)#70 AS store_sales_quantitycount#79, avg(ss_quantity#5)#71 AS store_sales_quantityave#80, stddev_samp(cast(ss_quantity#5 as double))#72 AS store_sales_quantitystdev#81, (stddev_samp(cast(ss_quantity#5 as double))#72 / avg(ss_quantity#5)#71) AS store_sales_quantitycov#82, count(sr_return_quantity#21)#73 AS as_store_returns_quantitycount#83, avg(sr_return_quantity#21)#74 AS as_store_returns_quantityave#84, stddev_samp(cast(sr_return_quantity#21 as double))#75 AS as_store_returns_quantitystdev#85, (stddev_samp(cast(sr_return_quantity#21 as double))#75 / avg(sr_return_quantity#21)#74) AS store_returns_quantitycov#86, count(cs_quantity#29)#76 AS catalog_sales_quantitycount#87, avg(cs_quantity#29)#77 AS catalog_sales_quantityave#88, (stddev_samp(cast(cs_quantity#29 as double))#78 / avg(cs_quantity#29)#77) AS catalog_sales_quantitystdev#89, (stddev_samp(cast(cs_quantity#29 as double))#78 / avg(cs_quantity#29)#77) AS catalog_sales_quantitycov#90] +Input [21]: [i_item_id#12, i_item_desc#13, s_state#10, count#44, sum#45, count#46, n#47, avg#48, m2#49, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61] +Keys [3]: [i_item_id#12, i_item_desc#13, s_state#10] +Functions [9]: [count(ss_quantity#5), avg(ss_quantity#5), stddev_samp(cast(ss_quantity#5 as double)), count(sr_return_quantity#17), avg(sr_return_quantity#17), stddev_samp(cast(sr_return_quantity#17 as double)), count(cs_quantity#23), avg(cs_quantity#23), stddev_samp(cast(cs_quantity#23 as double))] +Aggregate Attributes [9]: [count(ss_quantity#5)#62, avg(ss_quantity#5)#63, stddev_samp(cast(ss_quantity#5 as double))#64, count(sr_return_quantity#17)#65, avg(sr_return_quantity#17)#66, stddev_samp(cast(sr_return_quantity#17 as double))#67, count(cs_quantity#23)#68, avg(cs_quantity#23)#69, stddev_samp(cast(cs_quantity#23 as double))#70] +Results [15]: [i_item_id#12, i_item_desc#13, s_state#10, count(ss_quantity#5)#62 AS store_sales_quantitycount#71, avg(ss_quantity#5)#63 AS store_sales_quantityave#72, stddev_samp(cast(ss_quantity#5 as double))#64 AS store_sales_quantitystdev#73, (stddev_samp(cast(ss_quantity#5 as double))#64 / avg(ss_quantity#5)#63) AS store_sales_quantitycov#74, count(sr_return_quantity#17)#65 AS as_store_returns_quantitycount#75, avg(sr_return_quantity#17)#66 AS as_store_returns_quantityave#76, stddev_samp(cast(sr_return_quantity#17 as double))#67 AS as_store_returns_quantitystdev#77, (stddev_samp(cast(sr_return_quantity#17 as double))#67 / avg(sr_return_quantity#17)#66) AS store_returns_quantitycov#78, count(cs_quantity#23)#68 AS catalog_sales_quantitycount#79, avg(cs_quantity#23)#69 AS catalog_sales_quantityave#80, (stddev_samp(cast(cs_quantity#23 as double))#70 / avg(cs_quantity#23)#69) AS catalog_sales_quantitystdev#81, (stddev_samp(cast(cs_quantity#23 as double))#70 / avg(cs_quantity#23)#69) AS catalog_sales_quantitycov#82] (49) TakeOrderedAndProject -Input [15]: [i_item_id#14, i_item_desc#15, s_state#10, store_sales_quantitycount#79, store_sales_quantityave#80, store_sales_quantitystdev#81, store_sales_quantitycov#82, as_store_returns_quantitycount#83, as_store_returns_quantityave#84, as_store_returns_quantitystdev#85, store_returns_quantitycov#86, catalog_sales_quantitycount#87, catalog_sales_quantityave#88, catalog_sales_quantitystdev#89, catalog_sales_quantitycov#90] -Arguments: 100, [i_item_id#14 ASC NULLS FIRST, i_item_desc#15 ASC NULLS FIRST, s_state#10 ASC NULLS FIRST], [i_item_id#14, i_item_desc#15, s_state#10, store_sales_quantitycount#79, store_sales_quantityave#80, store_sales_quantitystdev#81, store_sales_quantitycov#82, as_store_returns_quantitycount#83, as_store_returns_quantityave#84, as_store_returns_quantitystdev#85, store_returns_quantitycov#86, catalog_sales_quantitycount#87, catalog_sales_quantityave#88, catalog_sales_quantitystdev#89, catalog_sales_quantitycov#90] +Input [15]: [i_item_id#12, i_item_desc#13, s_state#10, store_sales_quantitycount#71, store_sales_quantityave#72, store_sales_quantitystdev#73, store_sales_quantitycov#74, as_store_returns_quantitycount#75, as_store_returns_quantityave#76, as_store_returns_quantitystdev#77, store_returns_quantitycov#78, catalog_sales_quantitycount#79, catalog_sales_quantityave#80, catalog_sales_quantitystdev#81, catalog_sales_quantitycov#82] +Arguments: 100, [i_item_id#12 ASC NULLS FIRST, i_item_desc#13 ASC NULLS FIRST, s_state#10 ASC NULLS FIRST], [i_item_id#12, i_item_desc#13, s_state#10, store_sales_quantitycount#71, store_sales_quantityave#72, store_sales_quantitystdev#73, store_sales_quantitycov#74, as_store_returns_quantitycount#75, as_store_returns_quantityave#76, as_store_returns_quantitystdev#77, store_returns_quantitycov#78, catalog_sales_quantitycount#79, catalog_sales_quantityave#80, catalog_sales_quantitystdev#81, catalog_sales_quantitycov#82] ===== Subqueries ===== @@ -280,28 +280,28 @@ BroadcastExchange (54) (50) Scan parquet default.date_dim -Output [2]: [d_date_sk#8, d_quarter_name#91] +Output [2]: [d_date_sk#8, d_quarter_name#83] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_quarter_name), EqualTo(d_quarter_name,2001Q1), IsNotNull(d_date_sk)] ReadSchema: struct (51) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#8, d_quarter_name#91] +Input [2]: [d_date_sk#8, d_quarter_name#83] (52) Filter [codegen id : 1] -Input [2]: [d_date_sk#8, d_quarter_name#91] -Condition : ((isnotnull(d_quarter_name#91) AND (d_quarter_name#91 = 2001Q1)) AND isnotnull(d_date_sk#8)) +Input [2]: [d_date_sk#8, d_quarter_name#83] +Condition : ((isnotnull(d_quarter_name#83) AND (d_quarter_name#83 = 2001Q1)) AND isnotnull(d_date_sk#8)) (53) Project [codegen id : 1] Output [1]: [d_date_sk#8] -Input [2]: [d_date_sk#8, d_quarter_name#91] +Input [2]: [d_date_sk#8, d_quarter_name#83] (54) BroadcastExchange Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#92] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] -Subquery:2 Hosting operator id = 24 Hosting Expression = sr_returned_date_sk#22 IN dynamicpruning#23 +Subquery:2 Hosting operator id = 24 Hosting Expression = sr_returned_date_sk#18 IN dynamicpruning#19 BroadcastExchange (59) +- * Project (58) +- * Filter (57) @@ -310,27 +310,27 @@ BroadcastExchange (59) (55) Scan parquet default.date_dim -Output [2]: [d_date_sk#24, d_quarter_name#93] +Output [2]: [d_date_sk#20, d_quarter_name#84] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)] ReadSchema: struct (56) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#24, d_quarter_name#93] +Input [2]: [d_date_sk#20, d_quarter_name#84] (57) Filter [codegen id : 1] -Input [2]: [d_date_sk#24, d_quarter_name#93] -Condition : (d_quarter_name#93 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#24)) +Input [2]: [d_date_sk#20, d_quarter_name#84] +Condition : (d_quarter_name#84 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#20)) (58) Project [codegen id : 1] -Output [1]: [d_date_sk#24] -Input [2]: [d_date_sk#24, d_quarter_name#93] +Output [1]: [d_date_sk#20] +Input [2]: [d_date_sk#20, d_quarter_name#84] (59) BroadcastExchange -Input [1]: [d_date_sk#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#94] +Input [1]: [d_date_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] -Subquery:3 Hosting operator id = 36 Hosting Expression = cs_sold_date_sk#30 IN dynamicpruning#23 +Subquery:3 Hosting operator id = 36 Hosting Expression = cs_sold_date_sk#24 IN dynamicpruning#19 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt index d63f34438d5da..ba615c26f227d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt @@ -73,7 +73,7 @@ Condition : ((isnotnull(sr_customer_sk#9) AND isnotnull(sr_item_sk#8)) AND isnot (7) BroadcastExchange Input [5]: [sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11, sr_returned_date_sk#12] -Arguments: HashedRelationBroadcastMode(List(input[1, int, false], input[0, int, false], input[2, int, false]),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(input[1, int, false], input[0, int, false], input[2, int, false]),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 8] Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4] @@ -85,144 +85,144 @@ Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_i Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11, sr_returned_date_sk#12] (10) Scan parquet default.catalog_sales -Output [4]: [cs_bill_customer_sk#15, cs_item_sk#16, cs_quantity#17, cs_sold_date_sk#18] +Output [4]: [cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16, cs_sold_date_sk#17] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#18), dynamicpruningexpression(cs_sold_date_sk#18 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(cs_sold_date_sk#17), dynamicpruningexpression(cs_sold_date_sk#17 IN dynamicpruning#13)] PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [4]: [cs_bill_customer_sk#15, cs_item_sk#16, cs_quantity#17, cs_sold_date_sk#18] +Input [4]: [cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16, cs_sold_date_sk#17] (12) Filter [codegen id : 2] -Input [4]: [cs_bill_customer_sk#15, cs_item_sk#16, cs_quantity#17, cs_sold_date_sk#18] -Condition : (isnotnull(cs_bill_customer_sk#15) AND isnotnull(cs_item_sk#16)) +Input [4]: [cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16, cs_sold_date_sk#17] +Condition : (isnotnull(cs_bill_customer_sk#14) AND isnotnull(cs_item_sk#15)) (13) BroadcastExchange -Input [4]: [cs_bill_customer_sk#15, cs_item_sk#16, cs_quantity#17, cs_sold_date_sk#18] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [id=#19] +Input [4]: [cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16, cs_sold_date_sk#17] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 8] Left keys [2]: [sr_customer_sk#9, sr_item_sk#8] -Right keys [2]: [cs_bill_customer_sk#15, cs_item_sk#16] +Right keys [2]: [cs_bill_customer_sk#14, cs_item_sk#15] Join condition: None (15) Project [codegen id : 8] -Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#11, sr_returned_date_sk#12, cs_quantity#17, cs_sold_date_sk#18] -Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11, sr_returned_date_sk#12, cs_bill_customer_sk#15, cs_item_sk#16, cs_quantity#17, cs_sold_date_sk#18] +Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#11, sr_returned_date_sk#12, cs_quantity#16, cs_sold_date_sk#17] +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11, sr_returned_date_sk#12, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16, cs_sold_date_sk#17] (16) ReusedExchange [Reuses operator id: 45] -Output [1]: [d_date_sk#20] +Output [1]: [d_date_sk#18] (17) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_sold_date_sk#6] -Right keys [1]: [d_date_sk#20] +Right keys [1]: [d_date_sk#18] Join condition: None (18) Project [codegen id : 8] -Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, sr_returned_date_sk#12, cs_quantity#17, cs_sold_date_sk#18] -Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#11, sr_returned_date_sk#12, cs_quantity#17, cs_sold_date_sk#18, d_date_sk#20] +Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, sr_returned_date_sk#12, cs_quantity#16, cs_sold_date_sk#17] +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#11, sr_returned_date_sk#12, cs_quantity#16, cs_sold_date_sk#17, d_date_sk#18] (19) ReusedExchange [Reuses operator id: 50] -Output [1]: [d_date_sk#21] +Output [1]: [d_date_sk#19] (20) BroadcastHashJoin [codegen id : 8] Left keys [1]: [sr_returned_date_sk#12] -Right keys [1]: [d_date_sk#21] +Right keys [1]: [d_date_sk#19] Join condition: None (21) Project [codegen id : 8] -Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#17, cs_sold_date_sk#18] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, sr_returned_date_sk#12, cs_quantity#17, cs_sold_date_sk#18, d_date_sk#21] +Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#16, cs_sold_date_sk#17] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, sr_returned_date_sk#12, cs_quantity#16, cs_sold_date_sk#17, d_date_sk#19] (22) ReusedExchange [Reuses operator id: 50] -Output [1]: [d_date_sk#22] +Output [1]: [d_date_sk#20] (23) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_sold_date_sk#18] -Right keys [1]: [d_date_sk#22] +Left keys [1]: [cs_sold_date_sk#17] +Right keys [1]: [d_date_sk#20] Join condition: None (24) Project [codegen id : 8] -Output [5]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#17] -Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#17, cs_sold_date_sk#18, d_date_sk#22] +Output [5]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#16] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#16, cs_sold_date_sk#17, d_date_sk#20] (25) Scan parquet default.store -Output [2]: [s_store_sk#23, s_state#24] +Output [2]: [s_store_sk#21, s_state#22] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 6] -Input [2]: [s_store_sk#23, s_state#24] +Input [2]: [s_store_sk#21, s_state#22] (27) Filter [codegen id : 6] -Input [2]: [s_store_sk#23, s_state#24] -Condition : isnotnull(s_store_sk#23) +Input [2]: [s_store_sk#21, s_state#22] +Condition : isnotnull(s_store_sk#21) (28) BroadcastExchange -Input [2]: [s_store_sk#23, s_state#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] +Input [2]: [s_store_sk#21, s_state#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (29) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#23] +Right keys [1]: [s_store_sk#21] Join condition: None (30) Project [codegen id : 8] -Output [5]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#11, cs_quantity#17, s_state#24] -Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#17, s_store_sk#23, s_state#24] +Output [5]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#11, cs_quantity#16, s_state#22] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#16, s_store_sk#21, s_state#22] (31) Scan parquet default.item -Output [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] +Output [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 7] -Input [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] +Input [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] (33) Filter [codegen id : 7] -Input [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] -Condition : isnotnull(i_item_sk#26) +Input [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Condition : isnotnull(i_item_sk#23) (34) BroadcastExchange -Input [3]: [i_item_sk#26, i_item_id#27, i_item_desc#28] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#29] +Input [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (35) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#26] +Right keys [1]: [i_item_sk#23] Join condition: None (36) Project [codegen id : 8] -Output [6]: [ss_quantity#5, sr_return_quantity#11, cs_quantity#17, s_state#24, i_item_id#27, i_item_desc#28] -Input [8]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#11, cs_quantity#17, s_state#24, i_item_sk#26, i_item_id#27, i_item_desc#28] +Output [6]: [ss_quantity#5, sr_return_quantity#11, cs_quantity#16, s_state#22, i_item_id#24, i_item_desc#25] +Input [8]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#11, cs_quantity#16, s_state#22, i_item_sk#23, i_item_id#24, i_item_desc#25] (37) HashAggregate [codegen id : 8] -Input [6]: [ss_quantity#5, sr_return_quantity#11, cs_quantity#17, s_state#24, i_item_id#27, i_item_desc#28] -Keys [3]: [i_item_id#27, i_item_desc#28, s_state#24] -Functions [9]: [partial_count(ss_quantity#5), partial_avg(ss_quantity#5), partial_stddev_samp(cast(ss_quantity#5 as double)), partial_count(sr_return_quantity#11), partial_avg(sr_return_quantity#11), partial_stddev_samp(cast(sr_return_quantity#11 as double)), partial_count(cs_quantity#17), partial_avg(cs_quantity#17), partial_stddev_samp(cast(cs_quantity#17 as double))] -Aggregate Attributes [18]: [count#30, sum#31, count#32, n#33, avg#34, m2#35, count#36, sum#37, count#38, n#39, avg#40, m2#41, count#42, sum#43, count#44, n#45, avg#46, m2#47] -Results [21]: [i_item_id#27, i_item_desc#28, s_state#24, count#48, sum#49, count#50, n#51, avg#52, m2#53, count#54, sum#55, count#56, n#57, avg#58, m2#59, count#60, sum#61, count#62, n#63, avg#64, m2#65] +Input [6]: [ss_quantity#5, sr_return_quantity#11, cs_quantity#16, s_state#22, i_item_id#24, i_item_desc#25] +Keys [3]: [i_item_id#24, i_item_desc#25, s_state#22] +Functions [9]: [partial_count(ss_quantity#5), partial_avg(ss_quantity#5), partial_stddev_samp(cast(ss_quantity#5 as double)), partial_count(sr_return_quantity#11), partial_avg(sr_return_quantity#11), partial_stddev_samp(cast(sr_return_quantity#11 as double)), partial_count(cs_quantity#16), partial_avg(cs_quantity#16), partial_stddev_samp(cast(cs_quantity#16 as double))] +Aggregate Attributes [18]: [count#26, sum#27, count#28, n#29, avg#30, m2#31, count#32, sum#33, count#34, n#35, avg#36, m2#37, count#38, sum#39, count#40, n#41, avg#42, m2#43] +Results [21]: [i_item_id#24, i_item_desc#25, s_state#22, count#44, sum#45, count#46, n#47, avg#48, m2#49, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61] (38) Exchange -Input [21]: [i_item_id#27, i_item_desc#28, s_state#24, count#48, sum#49, count#50, n#51, avg#52, m2#53, count#54, sum#55, count#56, n#57, avg#58, m2#59, count#60, sum#61, count#62, n#63, avg#64, m2#65] -Arguments: hashpartitioning(i_item_id#27, i_item_desc#28, s_state#24, 5), ENSURE_REQUIREMENTS, [id=#66] +Input [21]: [i_item_id#24, i_item_desc#25, s_state#22, count#44, sum#45, count#46, n#47, avg#48, m2#49, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61] +Arguments: hashpartitioning(i_item_id#24, i_item_desc#25, s_state#22, 5), ENSURE_REQUIREMENTS, [plan_id=5] (39) HashAggregate [codegen id : 9] -Input [21]: [i_item_id#27, i_item_desc#28, s_state#24, count#48, sum#49, count#50, n#51, avg#52, m2#53, count#54, sum#55, count#56, n#57, avg#58, m2#59, count#60, sum#61, count#62, n#63, avg#64, m2#65] -Keys [3]: [i_item_id#27, i_item_desc#28, s_state#24] -Functions [9]: [count(ss_quantity#5), avg(ss_quantity#5), stddev_samp(cast(ss_quantity#5 as double)), count(sr_return_quantity#11), avg(sr_return_quantity#11), stddev_samp(cast(sr_return_quantity#11 as double)), count(cs_quantity#17), avg(cs_quantity#17), stddev_samp(cast(cs_quantity#17 as double))] -Aggregate Attributes [9]: [count(ss_quantity#5)#67, avg(ss_quantity#5)#68, stddev_samp(cast(ss_quantity#5 as double))#69, count(sr_return_quantity#11)#70, avg(sr_return_quantity#11)#71, stddev_samp(cast(sr_return_quantity#11 as double))#72, count(cs_quantity#17)#73, avg(cs_quantity#17)#74, stddev_samp(cast(cs_quantity#17 as double))#75] -Results [15]: [i_item_id#27, i_item_desc#28, s_state#24, count(ss_quantity#5)#67 AS store_sales_quantitycount#76, avg(ss_quantity#5)#68 AS store_sales_quantityave#77, stddev_samp(cast(ss_quantity#5 as double))#69 AS store_sales_quantitystdev#78, (stddev_samp(cast(ss_quantity#5 as double))#69 / avg(ss_quantity#5)#68) AS store_sales_quantitycov#79, count(sr_return_quantity#11)#70 AS as_store_returns_quantitycount#80, avg(sr_return_quantity#11)#71 AS as_store_returns_quantityave#81, stddev_samp(cast(sr_return_quantity#11 as double))#72 AS as_store_returns_quantitystdev#82, (stddev_samp(cast(sr_return_quantity#11 as double))#72 / avg(sr_return_quantity#11)#71) AS store_returns_quantitycov#83, count(cs_quantity#17)#73 AS catalog_sales_quantitycount#84, avg(cs_quantity#17)#74 AS catalog_sales_quantityave#85, (stddev_samp(cast(cs_quantity#17 as double))#75 / avg(cs_quantity#17)#74) AS catalog_sales_quantitystdev#86, (stddev_samp(cast(cs_quantity#17 as double))#75 / avg(cs_quantity#17)#74) AS catalog_sales_quantitycov#87] +Input [21]: [i_item_id#24, i_item_desc#25, s_state#22, count#44, sum#45, count#46, n#47, avg#48, m2#49, count#50, sum#51, count#52, n#53, avg#54, m2#55, count#56, sum#57, count#58, n#59, avg#60, m2#61] +Keys [3]: [i_item_id#24, i_item_desc#25, s_state#22] +Functions [9]: [count(ss_quantity#5), avg(ss_quantity#5), stddev_samp(cast(ss_quantity#5 as double)), count(sr_return_quantity#11), avg(sr_return_quantity#11), stddev_samp(cast(sr_return_quantity#11 as double)), count(cs_quantity#16), avg(cs_quantity#16), stddev_samp(cast(cs_quantity#16 as double))] +Aggregate Attributes [9]: [count(ss_quantity#5)#62, avg(ss_quantity#5)#63, stddev_samp(cast(ss_quantity#5 as double))#64, count(sr_return_quantity#11)#65, avg(sr_return_quantity#11)#66, stddev_samp(cast(sr_return_quantity#11 as double))#67, count(cs_quantity#16)#68, avg(cs_quantity#16)#69, stddev_samp(cast(cs_quantity#16 as double))#70] +Results [15]: [i_item_id#24, i_item_desc#25, s_state#22, count(ss_quantity#5)#62 AS store_sales_quantitycount#71, avg(ss_quantity#5)#63 AS store_sales_quantityave#72, stddev_samp(cast(ss_quantity#5 as double))#64 AS store_sales_quantitystdev#73, (stddev_samp(cast(ss_quantity#5 as double))#64 / avg(ss_quantity#5)#63) AS store_sales_quantitycov#74, count(sr_return_quantity#11)#65 AS as_store_returns_quantitycount#75, avg(sr_return_quantity#11)#66 AS as_store_returns_quantityave#76, stddev_samp(cast(sr_return_quantity#11 as double))#67 AS as_store_returns_quantitystdev#77, (stddev_samp(cast(sr_return_quantity#11 as double))#67 / avg(sr_return_quantity#11)#66) AS store_returns_quantitycov#78, count(cs_quantity#16)#68 AS catalog_sales_quantitycount#79, avg(cs_quantity#16)#69 AS catalog_sales_quantityave#80, (stddev_samp(cast(cs_quantity#16 as double))#70 / avg(cs_quantity#16)#69) AS catalog_sales_quantitystdev#81, (stddev_samp(cast(cs_quantity#16 as double))#70 / avg(cs_quantity#16)#69) AS catalog_sales_quantitycov#82] (40) TakeOrderedAndProject -Input [15]: [i_item_id#27, i_item_desc#28, s_state#24, store_sales_quantitycount#76, store_sales_quantityave#77, store_sales_quantitystdev#78, store_sales_quantitycov#79, as_store_returns_quantitycount#80, as_store_returns_quantityave#81, as_store_returns_quantitystdev#82, store_returns_quantitycov#83, catalog_sales_quantitycount#84, catalog_sales_quantityave#85, catalog_sales_quantitystdev#86, catalog_sales_quantitycov#87] -Arguments: 100, [i_item_id#27 ASC NULLS FIRST, i_item_desc#28 ASC NULLS FIRST, s_state#24 ASC NULLS FIRST], [i_item_id#27, i_item_desc#28, s_state#24, store_sales_quantitycount#76, store_sales_quantityave#77, store_sales_quantitystdev#78, store_sales_quantitycov#79, as_store_returns_quantitycount#80, as_store_returns_quantityave#81, as_store_returns_quantitystdev#82, store_returns_quantitycov#83, catalog_sales_quantitycount#84, catalog_sales_quantityave#85, catalog_sales_quantitystdev#86, catalog_sales_quantitycov#87] +Input [15]: [i_item_id#24, i_item_desc#25, s_state#22, store_sales_quantitycount#71, store_sales_quantityave#72, store_sales_quantitystdev#73, store_sales_quantitycov#74, as_store_returns_quantitycount#75, as_store_returns_quantityave#76, as_store_returns_quantitystdev#77, store_returns_quantitycov#78, catalog_sales_quantitycount#79, catalog_sales_quantityave#80, catalog_sales_quantitystdev#81, catalog_sales_quantitycov#82] +Arguments: 100, [i_item_id#24 ASC NULLS FIRST, i_item_desc#25 ASC NULLS FIRST, s_state#22 ASC NULLS FIRST], [i_item_id#24, i_item_desc#25, s_state#22, store_sales_quantitycount#71, store_sales_quantityave#72, store_sales_quantitystdev#73, store_sales_quantitycov#74, as_store_returns_quantitycount#75, as_store_returns_quantityave#76, as_store_returns_quantitystdev#77, store_returns_quantitycov#78, catalog_sales_quantitycount#79, catalog_sales_quantityave#80, catalog_sales_quantitystdev#81, catalog_sales_quantitycov#82] ===== Subqueries ===== @@ -235,26 +235,26 @@ BroadcastExchange (45) (41) Scan parquet default.date_dim -Output [2]: [d_date_sk#20, d_quarter_name#88] +Output [2]: [d_date_sk#18, d_quarter_name#83] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_quarter_name), EqualTo(d_quarter_name,2001Q1), IsNotNull(d_date_sk)] ReadSchema: struct (42) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#20, d_quarter_name#88] +Input [2]: [d_date_sk#18, d_quarter_name#83] (43) Filter [codegen id : 1] -Input [2]: [d_date_sk#20, d_quarter_name#88] -Condition : ((isnotnull(d_quarter_name#88) AND (d_quarter_name#88 = 2001Q1)) AND isnotnull(d_date_sk#20)) +Input [2]: [d_date_sk#18, d_quarter_name#83] +Condition : ((isnotnull(d_quarter_name#83) AND (d_quarter_name#83 = 2001Q1)) AND isnotnull(d_date_sk#18)) (44) Project [codegen id : 1] -Output [1]: [d_date_sk#20] -Input [2]: [d_date_sk#20, d_quarter_name#88] +Output [1]: [d_date_sk#18] +Input [2]: [d_date_sk#18, d_quarter_name#83] (45) BroadcastExchange -Input [1]: [d_date_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#89] +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] Subquery:2 Hosting operator id = 4 Hosting Expression = sr_returned_date_sk#12 IN dynamicpruning#13 BroadcastExchange (50) @@ -265,27 +265,27 @@ BroadcastExchange (50) (46) Scan parquet default.date_dim -Output [2]: [d_date_sk#21, d_quarter_name#90] +Output [2]: [d_date_sk#19, d_quarter_name#84] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_quarter_name, [2001Q1,2001Q2,2001Q3]), IsNotNull(d_date_sk)] ReadSchema: struct (47) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#21, d_quarter_name#90] +Input [2]: [d_date_sk#19, d_quarter_name#84] (48) Filter [codegen id : 1] -Input [2]: [d_date_sk#21, d_quarter_name#90] -Condition : (d_quarter_name#90 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#21)) +Input [2]: [d_date_sk#19, d_quarter_name#84] +Condition : (d_quarter_name#84 IN (2001Q1,2001Q2,2001Q3) AND isnotnull(d_date_sk#19)) (49) Project [codegen id : 1] -Output [1]: [d_date_sk#21] -Input [2]: [d_date_sk#21, d_quarter_name#90] +Output [1]: [d_date_sk#19] +Input [2]: [d_date_sk#19, d_quarter_name#84] (50) BroadcastExchange -Input [1]: [d_date_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#91] +Input [1]: [d_date_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] -Subquery:3 Hosting operator id = 10 Hosting Expression = cs_sold_date_sk#18 IN dynamicpruning#13 +Subquery:3 Hosting operator id = 10 Hosting Expression = cs_sold_date_sk#17 IN dynamicpruning#13 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt index 242ef26397ae2..ad8b313ee1daf 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt @@ -85,7 +85,7 @@ Input [4]: [cd_demo_sk#11, cd_gender#12, cd_education_status#13, cd_dep_count#14 (8) BroadcastExchange Input [2]: [cd_demo_sk#11, cd_dep_count#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_bill_cdemo_sk#2] @@ -97,178 +97,178 @@ Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5 Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#11, cd_dep_count#14] (11) ReusedExchange [Reuses operator id: 54] -Output [1]: [d_date_sk#16] +Output [1]: [d_date_sk#15] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_sold_date_sk#9] -Right keys [1]: [d_date_sk#16] +Right keys [1]: [d_date_sk#15] Join condition: None (13) Project [codegen id : 4] Output [8]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14] -Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, d_date_sk#16] +Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, d_date_sk#15] (14) Scan parquet default.item -Output [2]: [i_item_sk#17, i_item_id#18] +Output [2]: [i_item_sk#16, i_item_id#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [2]: [i_item_sk#17, i_item_id#18] +Input [2]: [i_item_sk#16, i_item_id#17] (16) Filter [codegen id : 3] -Input [2]: [i_item_sk#17, i_item_id#18] -Condition : isnotnull(i_item_sk#17) +Input [2]: [i_item_sk#16, i_item_id#17] +Condition : isnotnull(i_item_sk#16) (17) BroadcastExchange -Input [2]: [i_item_sk#17, i_item_id#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] +Input [2]: [i_item_sk#16, i_item_id#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_item_sk#3] -Right keys [1]: [i_item_sk#17] +Right keys [1]: [i_item_sk#16] Join condition: None (19) Project [codegen id : 4] -Output [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#18] -Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_sk#17, i_item_id#18] +Output [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17] +Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_sk#16, i_item_id#17] (20) Exchange -Input [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#18] -Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17] +Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) Sort [codegen id : 5] -Input [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#18] +Input [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17] Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0 (22) Scan parquet default.customer -Output [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] +Output [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [In(c_birth_month, [1,12,2,6,8,9]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 7] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] (24) Filter [codegen id : 7] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] -Condition : (((c_birth_month#24 IN (1,6,8,9,12,2) AND isnotnull(c_customer_sk#21)) AND isnotnull(c_current_cdemo_sk#22)) AND isnotnull(c_current_addr_sk#23)) +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] +Condition : (((c_birth_month#21 IN (1,6,8,9,12,2) AND isnotnull(c_customer_sk#18)) AND isnotnull(c_current_cdemo_sk#19)) AND isnotnull(c_current_addr_sk#20)) (25) Project [codegen id : 7] -Output [4]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_year#25] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] +Output [4]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_year#22] +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] (26) Scan parquet default.customer_address -Output [4]: [ca_address_sk#26, ca_county#27, ca_state#28, ca_country#29] +Output [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_state, [IN,MS,ND,NM,OK,VA]), IsNotNull(ca_address_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 6] -Input [4]: [ca_address_sk#26, ca_county#27, ca_state#28, ca_country#29] +Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] (28) Filter [codegen id : 6] -Input [4]: [ca_address_sk#26, ca_county#27, ca_state#28, ca_country#29] -Condition : (ca_state#28 IN (MS,IN,ND,OK,NM,VA) AND isnotnull(ca_address_sk#26)) +Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] +Condition : (ca_state#25 IN (MS,IN,ND,OK,NM,VA) AND isnotnull(ca_address_sk#23)) (29) BroadcastExchange -Input [4]: [ca_address_sk#26, ca_county#27, ca_state#28, ca_country#29] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#30] +Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (30) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_addr_sk#23] -Right keys [1]: [ca_address_sk#26] +Left keys [1]: [c_current_addr_sk#20] +Right keys [1]: [ca_address_sk#23] Join condition: None (31) Project [codegen id : 7] -Output [6]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25, ca_county#27, ca_state#28, ca_country#29] -Input [8]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_year#25, ca_address_sk#26, ca_county#27, ca_state#28, ca_country#29] +Output [6]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26] +Input [8]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_year#22, ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] (32) Exchange -Input [6]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25, ca_county#27, ca_state#28, ca_country#29] -Arguments: hashpartitioning(c_current_cdemo_sk#22, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [6]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26] +Arguments: hashpartitioning(c_current_cdemo_sk#19, 5), ENSURE_REQUIREMENTS, [plan_id=5] (33) Sort [codegen id : 8] -Input [6]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25, ca_county#27, ca_state#28, ca_country#29] -Arguments: [c_current_cdemo_sk#22 ASC NULLS FIRST], false, 0 +Input [6]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26] +Arguments: [c_current_cdemo_sk#19 ASC NULLS FIRST], false, 0 (34) Scan parquet default.customer_demographics -Output [1]: [cd_demo_sk#32] +Output [1]: [cd_demo_sk#27] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 9] -Input [1]: [cd_demo_sk#32] +Input [1]: [cd_demo_sk#27] (36) Filter [codegen id : 9] -Input [1]: [cd_demo_sk#32] -Condition : isnotnull(cd_demo_sk#32) +Input [1]: [cd_demo_sk#27] +Condition : isnotnull(cd_demo_sk#27) (37) Exchange -Input [1]: [cd_demo_sk#32] -Arguments: hashpartitioning(cd_demo_sk#32, 5), ENSURE_REQUIREMENTS, [id=#33] +Input [1]: [cd_demo_sk#27] +Arguments: hashpartitioning(cd_demo_sk#27, 5), ENSURE_REQUIREMENTS, [plan_id=6] (38) Sort [codegen id : 10] -Input [1]: [cd_demo_sk#32] -Arguments: [cd_demo_sk#32 ASC NULLS FIRST], false, 0 +Input [1]: [cd_demo_sk#27] +Arguments: [cd_demo_sk#27 ASC NULLS FIRST], false, 0 (39) SortMergeJoin [codegen id : 11] -Left keys [1]: [c_current_cdemo_sk#22] -Right keys [1]: [cd_demo_sk#32] +Left keys [1]: [c_current_cdemo_sk#19] +Right keys [1]: [cd_demo_sk#27] Join condition: None (40) Project [codegen id : 11] -Output [5]: [c_customer_sk#21, c_birth_year#25, ca_county#27, ca_state#28, ca_country#29] -Input [7]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25, ca_county#27, ca_state#28, ca_country#29, cd_demo_sk#32] +Output [5]: [c_customer_sk#18, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26] +Input [7]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26, cd_demo_sk#27] (41) Exchange -Input [5]: [c_customer_sk#21, c_birth_year#25, ca_county#27, ca_state#28, ca_country#29] -Arguments: hashpartitioning(c_customer_sk#21, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [5]: [c_customer_sk#18, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26] +Arguments: hashpartitioning(c_customer_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=7] (42) Sort [codegen id : 12] -Input [5]: [c_customer_sk#21, c_birth_year#25, ca_county#27, ca_state#28, ca_country#29] -Arguments: [c_customer_sk#21 ASC NULLS FIRST], false, 0 +Input [5]: [c_customer_sk#18, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26] +Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0 (43) SortMergeJoin [codegen id : 13] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#21] +Right keys [1]: [c_customer_sk#18] Join condition: None (44) Project [codegen id : 13] -Output [11]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#25, i_item_id#18, ca_country#29, ca_state#28, ca_county#27] -Input [13]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#18, c_customer_sk#21, c_birth_year#25, ca_county#27, ca_state#28, ca_country#29] +Output [11]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#22, i_item_id#17, ca_country#26, ca_state#25, ca_county#24] +Input [13]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17, c_customer_sk#18, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26] (45) Expand [codegen id : 13] -Input [11]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#25, i_item_id#18, ca_country#29, ca_state#28, ca_county#27] -Arguments: [[cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#25, i_item_id#18, ca_country#29, ca_state#28, ca_county#27, 0], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#25, i_item_id#18, ca_country#29, ca_state#28, null, 1], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#25, i_item_id#18, ca_country#29, null, null, 3], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#25, i_item_id#18, null, null, null, 7], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#25, null, null, null, null, 15]], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#25, i_item_id#35, ca_country#36, ca_state#37, ca_county#38, spark_grouping_id#39] +Input [11]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#22, i_item_id#17, ca_country#26, ca_state#25, ca_county#24] +Arguments: [[cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#22, i_item_id#17, ca_country#26, ca_state#25, ca_county#24, 0], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#22, i_item_id#17, ca_country#26, ca_state#25, null, 1], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#22, i_item_id#17, ca_country#26, null, null, 3], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#22, i_item_id#17, null, null, null, 7], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#22, null, null, null, null, 15]], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#22, i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] (46) HashAggregate [codegen id : 13] -Input [12]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#25, i_item_id#35, ca_country#36, ca_state#37, ca_county#38, spark_grouping_id#39] -Keys [5]: [i_item_id#35, ca_country#36, ca_state#37, ca_county#38, spark_grouping_id#39] -Functions [7]: [partial_avg(cast(cs_quantity#4 as decimal(12,2))), partial_avg(cast(cs_list_price#5 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#7 as decimal(12,2))), partial_avg(cast(cs_sales_price#6 as decimal(12,2))), partial_avg(cast(cs_net_profit#8 as decimal(12,2))), partial_avg(cast(c_birth_year#25 as decimal(12,2))), partial_avg(cast(cd_dep_count#14 as decimal(12,2)))] -Aggregate Attributes [14]: [sum#40, count#41, sum#42, count#43, sum#44, count#45, sum#46, count#47, sum#48, count#49, sum#50, count#51, sum#52, count#53] -Results [19]: [i_item_id#35, ca_country#36, ca_state#37, ca_county#38, spark_grouping_id#39, sum#54, count#55, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63, sum#64, count#65, sum#66, count#67] +Input [12]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#22, i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Keys [5]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Functions [7]: [partial_avg(cast(cs_quantity#4 as decimal(12,2))), partial_avg(cast(cs_list_price#5 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#7 as decimal(12,2))), partial_avg(cast(cs_sales_price#6 as decimal(12,2))), partial_avg(cast(cs_net_profit#8 as decimal(12,2))), partial_avg(cast(c_birth_year#22 as decimal(12,2))), partial_avg(cast(cd_dep_count#14 as decimal(12,2)))] +Aggregate Attributes [14]: [sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46] +Results [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] (47) Exchange -Input [19]: [i_item_id#35, ca_country#36, ca_state#37, ca_county#38, spark_grouping_id#39, sum#54, count#55, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63, sum#64, count#65, sum#66, count#67] -Arguments: hashpartitioning(i_item_id#35, ca_country#36, ca_state#37, ca_county#38, spark_grouping_id#39, 5), ENSURE_REQUIREMENTS, [id=#68] +Input [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] +Arguments: hashpartitioning(i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, 5), ENSURE_REQUIREMENTS, [plan_id=8] (48) HashAggregate [codegen id : 14] -Input [19]: [i_item_id#35, ca_country#36, ca_state#37, ca_county#38, spark_grouping_id#39, sum#54, count#55, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63, sum#64, count#65, sum#66, count#67] -Keys [5]: [i_item_id#35, ca_country#36, ca_state#37, ca_county#38, spark_grouping_id#39] -Functions [7]: [avg(cast(cs_quantity#4 as decimal(12,2))), avg(cast(cs_list_price#5 as decimal(12,2))), avg(cast(cs_coupon_amt#7 as decimal(12,2))), avg(cast(cs_sales_price#6 as decimal(12,2))), avg(cast(cs_net_profit#8 as decimal(12,2))), avg(cast(c_birth_year#25 as decimal(12,2))), avg(cast(cd_dep_count#14 as decimal(12,2)))] -Aggregate Attributes [7]: [avg(cast(cs_quantity#4 as decimal(12,2)))#69, avg(cast(cs_list_price#5 as decimal(12,2)))#70, avg(cast(cs_coupon_amt#7 as decimal(12,2)))#71, avg(cast(cs_sales_price#6 as decimal(12,2)))#72, avg(cast(cs_net_profit#8 as decimal(12,2)))#73, avg(cast(c_birth_year#25 as decimal(12,2)))#74, avg(cast(cd_dep_count#14 as decimal(12,2)))#75] -Results [11]: [i_item_id#35, ca_country#36, ca_state#37, ca_county#38, avg(cast(cs_quantity#4 as decimal(12,2)))#69 AS agg1#76, avg(cast(cs_list_price#5 as decimal(12,2)))#70 AS agg2#77, avg(cast(cs_coupon_amt#7 as decimal(12,2)))#71 AS agg3#78, avg(cast(cs_sales_price#6 as decimal(12,2)))#72 AS agg4#79, avg(cast(cs_net_profit#8 as decimal(12,2)))#73 AS agg5#80, avg(cast(c_birth_year#25 as decimal(12,2)))#74 AS agg6#81, avg(cast(cd_dep_count#14 as decimal(12,2)))#75 AS agg7#82] +Input [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] +Keys [5]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Functions [7]: [avg(cast(cs_quantity#4 as decimal(12,2))), avg(cast(cs_list_price#5 as decimal(12,2))), avg(cast(cs_coupon_amt#7 as decimal(12,2))), avg(cast(cs_sales_price#6 as decimal(12,2))), avg(cast(cs_net_profit#8 as decimal(12,2))), avg(cast(c_birth_year#22 as decimal(12,2))), avg(cast(cd_dep_count#14 as decimal(12,2)))] +Aggregate Attributes [7]: [avg(cast(cs_quantity#4 as decimal(12,2)))#61, avg(cast(cs_list_price#5 as decimal(12,2)))#62, avg(cast(cs_coupon_amt#7 as decimal(12,2)))#63, avg(cast(cs_sales_price#6 as decimal(12,2)))#64, avg(cast(cs_net_profit#8 as decimal(12,2)))#65, avg(cast(c_birth_year#22 as decimal(12,2)))#66, avg(cast(cd_dep_count#14 as decimal(12,2)))#67] +Results [11]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, avg(cast(cs_quantity#4 as decimal(12,2)))#61 AS agg1#68, avg(cast(cs_list_price#5 as decimal(12,2)))#62 AS agg2#69, avg(cast(cs_coupon_amt#7 as decimal(12,2)))#63 AS agg3#70, avg(cast(cs_sales_price#6 as decimal(12,2)))#64 AS agg4#71, avg(cast(cs_net_profit#8 as decimal(12,2)))#65 AS agg5#72, avg(cast(c_birth_year#22 as decimal(12,2)))#66 AS agg6#73, avg(cast(cd_dep_count#14 as decimal(12,2)))#67 AS agg7#74] (49) TakeOrderedAndProject -Input [11]: [i_item_id#35, ca_country#36, ca_state#37, ca_county#38, agg1#76, agg2#77, agg3#78, agg4#79, agg5#80, agg6#81, agg7#82] -Arguments: 100, [ca_country#36 ASC NULLS FIRST, ca_state#37 ASC NULLS FIRST, ca_county#38 ASC NULLS FIRST, i_item_id#35 ASC NULLS FIRST], [i_item_id#35, ca_country#36, ca_state#37, ca_county#38, agg1#76, agg2#77, agg3#78, agg4#79, agg5#80, agg6#81, agg7#82] +Input [11]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#68, agg2#69, agg3#70, agg4#71, agg5#72, agg6#73, agg7#74] +Arguments: 100, [ca_country#29 ASC NULLS FIRST, ca_state#30 ASC NULLS FIRST, ca_county#31 ASC NULLS FIRST, i_item_id#28 ASC NULLS FIRST], [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#68, agg2#69, agg3#70, agg4#71, agg5#72, agg6#73, agg7#74] ===== Subqueries ===== @@ -281,25 +281,25 @@ BroadcastExchange (54) (50) Scan parquet default.date_dim -Output [2]: [d_date_sk#16, d_year#83] +Output [2]: [d_date_sk#15, d_year#75] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)] ReadSchema: struct (51) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#16, d_year#83] +Input [2]: [d_date_sk#15, d_year#75] (52) Filter [codegen id : 1] -Input [2]: [d_date_sk#16, d_year#83] -Condition : ((isnotnull(d_year#83) AND (d_year#83 = 1998)) AND isnotnull(d_date_sk#16)) +Input [2]: [d_date_sk#15, d_year#75] +Condition : ((isnotnull(d_year#75) AND (d_year#75 = 1998)) AND isnotnull(d_date_sk#15)) (53) Project [codegen id : 1] -Output [1]: [d_date_sk#16] -Input [2]: [d_date_sk#16, d_year#83] +Output [1]: [d_date_sk#15] +Input [2]: [d_date_sk#15, d_year#75] (54) BroadcastExchange -Input [1]: [d_date_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#84] +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt index d1b5044290a25..4d7f4d7e976b6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt @@ -79,7 +79,7 @@ Input [4]: [cd_demo_sk#11, cd_gender#12, cd_education_status#13, cd_dep_count#14 (8) BroadcastExchange Input [2]: [cd_demo_sk#11, cd_dep_count#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 7] Left keys [1]: [cs_bill_cdemo_sk#2] @@ -91,154 +91,154 @@ Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5 Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#11, cd_dep_count#14] (11) Scan parquet default.customer -Output [5]: [c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_month#19, c_birth_year#20] +Output [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [In(c_birth_month, [1,12,2,6,8,9]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [5]: [c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_month#19, c_birth_year#20] +Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] (13) Filter [codegen id : 2] -Input [5]: [c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_month#19, c_birth_year#20] -Condition : (((c_birth_month#19 IN (1,6,8,9,12,2) AND isnotnull(c_customer_sk#16)) AND isnotnull(c_current_cdemo_sk#17)) AND isnotnull(c_current_addr_sk#18)) +Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] +Condition : (((c_birth_month#18 IN (1,6,8,9,12,2) AND isnotnull(c_customer_sk#15)) AND isnotnull(c_current_cdemo_sk#16)) AND isnotnull(c_current_addr_sk#17)) (14) Project [codegen id : 2] -Output [4]: [c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] -Input [5]: [c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_month#19, c_birth_year#20] +Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] (15) BroadcastExchange -Input [4]: [c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] +Input [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 7] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#16] +Right keys [1]: [c_customer_sk#15] Join condition: None (17) Project [codegen id : 7] -Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] -Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] (18) Scan parquet default.customer_demographics -Output [1]: [cd_demo_sk#22] +Output [1]: [cd_demo_sk#20] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] -Input [1]: [cd_demo_sk#22] +Input [1]: [cd_demo_sk#20] (20) Filter [codegen id : 3] -Input [1]: [cd_demo_sk#22] -Condition : isnotnull(cd_demo_sk#22) +Input [1]: [cd_demo_sk#20] +Condition : isnotnull(cd_demo_sk#20) (21) BroadcastExchange -Input [1]: [cd_demo_sk#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] +Input [1]: [cd_demo_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (22) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_cdemo_sk#17] -Right keys [1]: [cd_demo_sk#22] +Left keys [1]: [c_current_cdemo_sk#16] +Right keys [1]: [cd_demo_sk#20] Join condition: None (23) Project [codegen id : 7] -Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#18, c_birth_year#20] -Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20, cd_demo_sk#22] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#17, c_birth_year#19] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#20] (24) Scan parquet default.customer_address -Output [4]: [ca_address_sk#24, ca_county#25, ca_state#26, ca_country#27] +Output [4]: [ca_address_sk#21, ca_county#22, ca_state#23, ca_country#24] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_state, [IN,MS,ND,NM,OK,VA]), IsNotNull(ca_address_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 4] -Input [4]: [ca_address_sk#24, ca_county#25, ca_state#26, ca_country#27] +Input [4]: [ca_address_sk#21, ca_county#22, ca_state#23, ca_country#24] (26) Filter [codegen id : 4] -Input [4]: [ca_address_sk#24, ca_county#25, ca_state#26, ca_country#27] -Condition : (ca_state#26 IN (MS,IN,ND,OK,NM,VA) AND isnotnull(ca_address_sk#24)) +Input [4]: [ca_address_sk#21, ca_county#22, ca_state#23, ca_country#24] +Condition : (ca_state#23 IN (MS,IN,ND,OK,NM,VA) AND isnotnull(ca_address_sk#21)) (27) BroadcastExchange -Input [4]: [ca_address_sk#24, ca_county#25, ca_state#26, ca_country#27] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] +Input [4]: [ca_address_sk#21, ca_county#22, ca_state#23, ca_country#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (28) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_addr_sk#18] -Right keys [1]: [ca_address_sk#24] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#21] Join condition: None (29) Project [codegen id : 7] -Output [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#20, ca_county#25, ca_state#26, ca_country#27] -Input [14]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#18, c_birth_year#20, ca_address_sk#24, ca_county#25, ca_state#26, ca_country#27] +Output [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#19, ca_county#22, ca_state#23, ca_country#24] +Input [14]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#17, c_birth_year#19, ca_address_sk#21, ca_county#22, ca_state#23, ca_country#24] (30) ReusedExchange [Reuses operator id: 48] -Output [1]: [d_date_sk#29] +Output [1]: [d_date_sk#25] (31) BroadcastHashJoin [codegen id : 7] Left keys [1]: [cs_sold_date_sk#9] -Right keys [1]: [d_date_sk#29] +Right keys [1]: [d_date_sk#25] Join condition: None (32) Project [codegen id : 7] -Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, ca_county#25, ca_state#26, ca_country#27] -Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#20, ca_county#25, ca_state#26, ca_country#27, d_date_sk#29] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, ca_county#22, ca_state#23, ca_country#24] +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#19, ca_county#22, ca_state#23, ca_country#24, d_date_sk#25] (33) Scan parquet default.item -Output [2]: [i_item_sk#30, i_item_id#31] +Output [2]: [i_item_sk#26, i_item_id#27] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (34) ColumnarToRow [codegen id : 6] -Input [2]: [i_item_sk#30, i_item_id#31] +Input [2]: [i_item_sk#26, i_item_id#27] (35) Filter [codegen id : 6] -Input [2]: [i_item_sk#30, i_item_id#31] -Condition : isnotnull(i_item_sk#30) +Input [2]: [i_item_sk#26, i_item_id#27] +Condition : isnotnull(i_item_sk#26) (36) BroadcastExchange -Input [2]: [i_item_sk#30, i_item_id#31] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#32] +Input [2]: [i_item_sk#26, i_item_id#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (37) BroadcastHashJoin [codegen id : 7] Left keys [1]: [cs_item_sk#3] -Right keys [1]: [i_item_sk#30] +Right keys [1]: [i_item_sk#26] Join condition: None (38) Project [codegen id : 7] -Output [11]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, i_item_id#31, ca_country#27, ca_state#26, ca_county#25] -Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, ca_county#25, ca_state#26, ca_country#27, i_item_sk#30, i_item_id#31] +Output [11]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, i_item_id#27, ca_country#24, ca_state#23, ca_county#22] +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, ca_county#22, ca_state#23, ca_country#24, i_item_sk#26, i_item_id#27] (39) Expand [codegen id : 7] -Input [11]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, i_item_id#31, ca_country#27, ca_state#26, ca_county#25] -Arguments: [[cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, i_item_id#31, ca_country#27, ca_state#26, ca_county#25, 0], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, i_item_id#31, ca_country#27, ca_state#26, null, 1], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, i_item_id#31, ca_country#27, null, null, 3], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, i_item_id#31, null, null, null, 7], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, null, null, null, null, 15]], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, i_item_id#33, ca_country#34, ca_state#35, ca_county#36, spark_grouping_id#37] +Input [11]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, i_item_id#27, ca_country#24, ca_state#23, ca_county#22] +Arguments: [[cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, i_item_id#27, ca_country#24, ca_state#23, ca_county#22, 0], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, i_item_id#27, ca_country#24, ca_state#23, null, 1], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, i_item_id#27, ca_country#24, null, null, 3], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, i_item_id#27, null, null, null, 7], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, null, null, null, null, 15]], [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] (40) HashAggregate [codegen id : 7] -Input [12]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, i_item_id#33, ca_country#34, ca_state#35, ca_county#36, spark_grouping_id#37] -Keys [5]: [i_item_id#33, ca_country#34, ca_state#35, ca_county#36, spark_grouping_id#37] -Functions [7]: [partial_avg(cast(cs_quantity#4 as decimal(12,2))), partial_avg(cast(cs_list_price#5 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#7 as decimal(12,2))), partial_avg(cast(cs_sales_price#6 as decimal(12,2))), partial_avg(cast(cs_net_profit#8 as decimal(12,2))), partial_avg(cast(c_birth_year#20 as decimal(12,2))), partial_avg(cast(cd_dep_count#14 as decimal(12,2)))] -Aggregate Attributes [14]: [sum#38, count#39, sum#40, count#41, sum#42, count#43, sum#44, count#45, sum#46, count#47, sum#48, count#49, sum#50, count#51] -Results [19]: [i_item_id#33, ca_country#34, ca_state#35, ca_county#36, spark_grouping_id#37, sum#52, count#53, sum#54, count#55, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63, sum#64, count#65] +Input [12]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Keys [5]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Functions [7]: [partial_avg(cast(cs_quantity#4 as decimal(12,2))), partial_avg(cast(cs_list_price#5 as decimal(12,2))), partial_avg(cast(cs_coupon_amt#7 as decimal(12,2))), partial_avg(cast(cs_sales_price#6 as decimal(12,2))), partial_avg(cast(cs_net_profit#8 as decimal(12,2))), partial_avg(cast(c_birth_year#19 as decimal(12,2))), partial_avg(cast(cd_dep_count#14 as decimal(12,2)))] +Aggregate Attributes [14]: [sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46] +Results [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] (41) Exchange -Input [19]: [i_item_id#33, ca_country#34, ca_state#35, ca_county#36, spark_grouping_id#37, sum#52, count#53, sum#54, count#55, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63, sum#64, count#65] -Arguments: hashpartitioning(i_item_id#33, ca_country#34, ca_state#35, ca_county#36, spark_grouping_id#37, 5), ENSURE_REQUIREMENTS, [id=#66] +Input [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] +Arguments: hashpartitioning(i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, 5), ENSURE_REQUIREMENTS, [plan_id=6] (42) HashAggregate [codegen id : 8] -Input [19]: [i_item_id#33, ca_country#34, ca_state#35, ca_county#36, spark_grouping_id#37, sum#52, count#53, sum#54, count#55, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63, sum#64, count#65] -Keys [5]: [i_item_id#33, ca_country#34, ca_state#35, ca_county#36, spark_grouping_id#37] -Functions [7]: [avg(cast(cs_quantity#4 as decimal(12,2))), avg(cast(cs_list_price#5 as decimal(12,2))), avg(cast(cs_coupon_amt#7 as decimal(12,2))), avg(cast(cs_sales_price#6 as decimal(12,2))), avg(cast(cs_net_profit#8 as decimal(12,2))), avg(cast(c_birth_year#20 as decimal(12,2))), avg(cast(cd_dep_count#14 as decimal(12,2)))] -Aggregate Attributes [7]: [avg(cast(cs_quantity#4 as decimal(12,2)))#67, avg(cast(cs_list_price#5 as decimal(12,2)))#68, avg(cast(cs_coupon_amt#7 as decimal(12,2)))#69, avg(cast(cs_sales_price#6 as decimal(12,2)))#70, avg(cast(cs_net_profit#8 as decimal(12,2)))#71, avg(cast(c_birth_year#20 as decimal(12,2)))#72, avg(cast(cd_dep_count#14 as decimal(12,2)))#73] -Results [11]: [i_item_id#33, ca_country#34, ca_state#35, ca_county#36, avg(cast(cs_quantity#4 as decimal(12,2)))#67 AS agg1#74, avg(cast(cs_list_price#5 as decimal(12,2)))#68 AS agg2#75, avg(cast(cs_coupon_amt#7 as decimal(12,2)))#69 AS agg3#76, avg(cast(cs_sales_price#6 as decimal(12,2)))#70 AS agg4#77, avg(cast(cs_net_profit#8 as decimal(12,2)))#71 AS agg5#78, avg(cast(c_birth_year#20 as decimal(12,2)))#72 AS agg6#79, avg(cast(cd_dep_count#14 as decimal(12,2)))#73 AS agg7#80] +Input [19]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32, sum#47, count#48, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] +Keys [5]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, spark_grouping_id#32] +Functions [7]: [avg(cast(cs_quantity#4 as decimal(12,2))), avg(cast(cs_list_price#5 as decimal(12,2))), avg(cast(cs_coupon_amt#7 as decimal(12,2))), avg(cast(cs_sales_price#6 as decimal(12,2))), avg(cast(cs_net_profit#8 as decimal(12,2))), avg(cast(c_birth_year#19 as decimal(12,2))), avg(cast(cd_dep_count#14 as decimal(12,2)))] +Aggregate Attributes [7]: [avg(cast(cs_quantity#4 as decimal(12,2)))#61, avg(cast(cs_list_price#5 as decimal(12,2)))#62, avg(cast(cs_coupon_amt#7 as decimal(12,2)))#63, avg(cast(cs_sales_price#6 as decimal(12,2)))#64, avg(cast(cs_net_profit#8 as decimal(12,2)))#65, avg(cast(c_birth_year#19 as decimal(12,2)))#66, avg(cast(cd_dep_count#14 as decimal(12,2)))#67] +Results [11]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, avg(cast(cs_quantity#4 as decimal(12,2)))#61 AS agg1#68, avg(cast(cs_list_price#5 as decimal(12,2)))#62 AS agg2#69, avg(cast(cs_coupon_amt#7 as decimal(12,2)))#63 AS agg3#70, avg(cast(cs_sales_price#6 as decimal(12,2)))#64 AS agg4#71, avg(cast(cs_net_profit#8 as decimal(12,2)))#65 AS agg5#72, avg(cast(c_birth_year#19 as decimal(12,2)))#66 AS agg6#73, avg(cast(cd_dep_count#14 as decimal(12,2)))#67 AS agg7#74] (43) TakeOrderedAndProject -Input [11]: [i_item_id#33, ca_country#34, ca_state#35, ca_county#36, agg1#74, agg2#75, agg3#76, agg4#77, agg5#78, agg6#79, agg7#80] -Arguments: 100, [ca_country#34 ASC NULLS FIRST, ca_state#35 ASC NULLS FIRST, ca_county#36 ASC NULLS FIRST, i_item_id#33 ASC NULLS FIRST], [i_item_id#33, ca_country#34, ca_state#35, ca_county#36, agg1#74, agg2#75, agg3#76, agg4#77, agg5#78, agg6#79, agg7#80] +Input [11]: [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#68, agg2#69, agg3#70, agg4#71, agg5#72, agg6#73, agg7#74] +Arguments: 100, [ca_country#29 ASC NULLS FIRST, ca_state#30 ASC NULLS FIRST, ca_county#31 ASC NULLS FIRST, i_item_id#28 ASC NULLS FIRST], [i_item_id#28, ca_country#29, ca_state#30, ca_county#31, agg1#68, agg2#69, agg3#70, agg4#71, agg5#72, agg6#73, agg7#74] ===== Subqueries ===== @@ -251,25 +251,25 @@ BroadcastExchange (48) (44) Scan parquet default.date_dim -Output [2]: [d_date_sk#29, d_year#81] +Output [2]: [d_date_sk#25, d_year#75] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)] ReadSchema: struct (45) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#29, d_year#81] +Input [2]: [d_date_sk#25, d_year#75] (46) Filter [codegen id : 1] -Input [2]: [d_date_sk#29, d_year#81] -Condition : ((isnotnull(d_year#81) AND (d_year#81 = 1998)) AND isnotnull(d_date_sk#29)) +Input [2]: [d_date_sk#25, d_year#75] +Condition : ((isnotnull(d_year#75) AND (d_year#75 = 1998)) AND isnotnull(d_date_sk#25)) (47) Project [codegen id : 1] -Output [1]: [d_date_sk#29] -Input [2]: [d_date_sk#29, d_year#81] +Output [1]: [d_date_sk#25] +Input [2]: [d_date_sk#25, d_year#75] (48) BroadcastExchange -Input [1]: [d_date_sk#29] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#82] +Input [1]: [d_date_sk#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt index 980ca7ba560c9..560d7c1a9c642 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt @@ -77,7 +77,7 @@ Input [6]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#1 (8) BroadcastExchange Input [5]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_item_sk#1] @@ -89,143 +89,143 @@ Output [8]: [ss_customer_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_sold_date Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_sold_date_sk#5, i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11] (11) ReusedExchange [Reuses operator id: 46] -Output [1]: [d_date_sk#14] +Output [1]: [d_date_sk#13] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_sold_date_sk#5] -Right keys [1]: [d_date_sk#14] +Right keys [1]: [d_date_sk#13] Join condition: None (13) Project [codegen id : 4] Output [7]: [ss_customer_sk#2, ss_store_sk#3, ss_ext_sales_price#4, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11] -Input [9]: [ss_customer_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_sold_date_sk#5, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11, d_date_sk#14] +Input [9]: [ss_customer_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_sold_date_sk#5, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11, d_date_sk#13] (14) Scan parquet default.store -Output [2]: [s_store_sk#15, s_zip#16] +Output [2]: [s_store_sk#14, s_zip#15] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#15, s_zip#16] +Input [2]: [s_store_sk#14, s_zip#15] (16) Filter [codegen id : 3] -Input [2]: [s_store_sk#15, s_zip#16] -Condition : (isnotnull(s_zip#16) AND isnotnull(s_store_sk#15)) +Input [2]: [s_store_sk#14, s_zip#15] +Condition : (isnotnull(s_zip#15) AND isnotnull(s_store_sk#14)) (17) BroadcastExchange -Input [2]: [s_store_sk#15, s_zip#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] +Input [2]: [s_store_sk#14, s_zip#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#15] +Right keys [1]: [s_store_sk#14] Join condition: None (19) Project [codegen id : 4] -Output [7]: [ss_customer_sk#2, ss_ext_sales_price#4, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11, s_zip#16] -Input [9]: [ss_customer_sk#2, ss_store_sk#3, ss_ext_sales_price#4, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11, s_store_sk#15, s_zip#16] +Output [7]: [ss_customer_sk#2, ss_ext_sales_price#4, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11, s_zip#15] +Input [9]: [ss_customer_sk#2, ss_store_sk#3, ss_ext_sales_price#4, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11, s_store_sk#14, s_zip#15] (20) Exchange -Input [7]: [ss_customer_sk#2, ss_ext_sales_price#4, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11, s_zip#16] -Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [7]: [ss_customer_sk#2, ss_ext_sales_price#4, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11, s_zip#15] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) Sort [codegen id : 5] -Input [7]: [ss_customer_sk#2, ss_ext_sales_price#4, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11, s_zip#16] +Input [7]: [ss_customer_sk#2, ss_ext_sales_price#4, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11, s_zip#15] Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 (22) Scan parquet default.customer -Output [2]: [c_customer_sk#19, c_current_addr_sk#20] +Output [2]: [c_customer_sk#16, c_current_addr_sk#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 6] -Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Input [2]: [c_customer_sk#16, c_current_addr_sk#17] (24) Filter [codegen id : 6] -Input [2]: [c_customer_sk#19, c_current_addr_sk#20] -Condition : (isnotnull(c_customer_sk#19) AND isnotnull(c_current_addr_sk#20)) +Input [2]: [c_customer_sk#16, c_current_addr_sk#17] +Condition : (isnotnull(c_customer_sk#16) AND isnotnull(c_current_addr_sk#17)) (25) Exchange -Input [2]: [c_customer_sk#19, c_current_addr_sk#20] -Arguments: hashpartitioning(c_current_addr_sk#20, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [2]: [c_customer_sk#16, c_current_addr_sk#17] +Arguments: hashpartitioning(c_current_addr_sk#17, 5), ENSURE_REQUIREMENTS, [plan_id=4] (26) Sort [codegen id : 7] -Input [2]: [c_customer_sk#19, c_current_addr_sk#20] -Arguments: [c_current_addr_sk#20 ASC NULLS FIRST], false, 0 +Input [2]: [c_customer_sk#16, c_current_addr_sk#17] +Arguments: [c_current_addr_sk#17 ASC NULLS FIRST], false, 0 (27) Scan parquet default.customer_address -Output [2]: [ca_address_sk#22, ca_zip#23] +Output [2]: [ca_address_sk#18, ca_zip#19] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)] ReadSchema: struct (28) ColumnarToRow [codegen id : 8] -Input [2]: [ca_address_sk#22, ca_zip#23] +Input [2]: [ca_address_sk#18, ca_zip#19] (29) Filter [codegen id : 8] -Input [2]: [ca_address_sk#22, ca_zip#23] -Condition : (isnotnull(ca_address_sk#22) AND isnotnull(ca_zip#23)) +Input [2]: [ca_address_sk#18, ca_zip#19] +Condition : (isnotnull(ca_address_sk#18) AND isnotnull(ca_zip#19)) (30) Exchange -Input [2]: [ca_address_sk#22, ca_zip#23] -Arguments: hashpartitioning(ca_address_sk#22, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [2]: [ca_address_sk#18, ca_zip#19] +Arguments: hashpartitioning(ca_address_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=5] (31) Sort [codegen id : 9] -Input [2]: [ca_address_sk#22, ca_zip#23] -Arguments: [ca_address_sk#22 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#18, ca_zip#19] +Arguments: [ca_address_sk#18 ASC NULLS FIRST], false, 0 (32) SortMergeJoin [codegen id : 10] -Left keys [1]: [c_current_addr_sk#20] -Right keys [1]: [ca_address_sk#22] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#18] Join condition: None (33) Project [codegen id : 10] -Output [2]: [c_customer_sk#19, ca_zip#23] -Input [4]: [c_customer_sk#19, c_current_addr_sk#20, ca_address_sk#22, ca_zip#23] +Output [2]: [c_customer_sk#16, ca_zip#19] +Input [4]: [c_customer_sk#16, c_current_addr_sk#17, ca_address_sk#18, ca_zip#19] (34) Exchange -Input [2]: [c_customer_sk#19, ca_zip#23] -Arguments: hashpartitioning(c_customer_sk#19, 5), ENSURE_REQUIREMENTS, [id=#25] +Input [2]: [c_customer_sk#16, ca_zip#19] +Arguments: hashpartitioning(c_customer_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=6] (35) Sort [codegen id : 11] -Input [2]: [c_customer_sk#19, ca_zip#23] -Arguments: [c_customer_sk#19 ASC NULLS FIRST], false, 0 +Input [2]: [c_customer_sk#16, ca_zip#19] +Arguments: [c_customer_sk#16 ASC NULLS FIRST], false, 0 (36) SortMergeJoin [codegen id : 12] Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#19] -Join condition: NOT (substr(ca_zip#23, 1, 5) = substr(s_zip#16, 1, 5)) +Right keys [1]: [c_customer_sk#16] +Join condition: NOT (substr(ca_zip#19, 1, 5) = substr(s_zip#15, 1, 5)) (37) Project [codegen id : 12] Output [5]: [ss_ext_sales_price#4, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11] -Input [9]: [ss_customer_sk#2, ss_ext_sales_price#4, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11, s_zip#16, c_customer_sk#19, ca_zip#23] +Input [9]: [ss_customer_sk#2, ss_ext_sales_price#4, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11, s_zip#15, c_customer_sk#16, ca_zip#19] (38) HashAggregate [codegen id : 12] Input [5]: [ss_ext_sales_price#4, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11] Keys [4]: [i_brand#9, i_brand_id#8, i_manufact_id#10, i_manufact#11] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#4))] -Aggregate Attributes [1]: [sum#26] -Results [5]: [i_brand#9, i_brand_id#8, i_manufact_id#10, i_manufact#11, sum#27] +Aggregate Attributes [1]: [sum#20] +Results [5]: [i_brand#9, i_brand_id#8, i_manufact_id#10, i_manufact#11, sum#21] (39) Exchange -Input [5]: [i_brand#9, i_brand_id#8, i_manufact_id#10, i_manufact#11, sum#27] -Arguments: hashpartitioning(i_brand#9, i_brand_id#8, i_manufact_id#10, i_manufact#11, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [5]: [i_brand#9, i_brand_id#8, i_manufact_id#10, i_manufact#11, sum#21] +Arguments: hashpartitioning(i_brand#9, i_brand_id#8, i_manufact_id#10, i_manufact#11, 5), ENSURE_REQUIREMENTS, [plan_id=7] (40) HashAggregate [codegen id : 13] -Input [5]: [i_brand#9, i_brand_id#8, i_manufact_id#10, i_manufact#11, sum#27] +Input [5]: [i_brand#9, i_brand_id#8, i_manufact_id#10, i_manufact#11, sum#21] Keys [4]: [i_brand#9, i_brand_id#8, i_manufact_id#10, i_manufact#11] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#4))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#4))#29] -Results [5]: [i_brand_id#8 AS brand_id#30, i_brand#9 AS brand#31, i_manufact_id#10, i_manufact#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#29,17,2) AS ext_price#32] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#4))#22] +Results [5]: [i_brand_id#8 AS brand_id#23, i_brand#9 AS brand#24, i_manufact_id#10, i_manufact#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#4))#22,17,2) AS ext_price#25] (41) TakeOrderedAndProject -Input [5]: [brand_id#30, brand#31, i_manufact_id#10, i_manufact#11, ext_price#32] -Arguments: 100, [ext_price#32 DESC NULLS LAST, brand#31 ASC NULLS FIRST, brand_id#30 ASC NULLS FIRST, i_manufact_id#10 ASC NULLS FIRST, i_manufact#11 ASC NULLS FIRST], [brand_id#30, brand#31, i_manufact_id#10, i_manufact#11, ext_price#32] +Input [5]: [brand_id#23, brand#24, i_manufact_id#10, i_manufact#11, ext_price#25] +Arguments: 100, [ext_price#25 DESC NULLS LAST, brand#24 ASC NULLS FIRST, brand_id#23 ASC NULLS FIRST, i_manufact_id#10 ASC NULLS FIRST, i_manufact#11 ASC NULLS FIRST], [brand_id#23, brand#24, i_manufact_id#10, i_manufact#11, ext_price#25] ===== Subqueries ===== @@ -238,25 +238,25 @@ BroadcastExchange (46) (42) Scan parquet default.date_dim -Output [3]: [d_date_sk#14, d_year#33, d_moy#34] +Output [3]: [d_date_sk#13, d_year#26, d_moy#27] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1998), IsNotNull(d_date_sk)] ReadSchema: struct (43) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#14, d_year#33, d_moy#34] +Input [3]: [d_date_sk#13, d_year#26, d_moy#27] (44) Filter [codegen id : 1] -Input [3]: [d_date_sk#14, d_year#33, d_moy#34] -Condition : ((((isnotnull(d_moy#34) AND isnotnull(d_year#33)) AND (d_moy#34 = 11)) AND (d_year#33 = 1998)) AND isnotnull(d_date_sk#14)) +Input [3]: [d_date_sk#13, d_year#26, d_moy#27] +Condition : ((((isnotnull(d_moy#27) AND isnotnull(d_year#26)) AND (d_moy#27 = 11)) AND (d_year#26 = 1998)) AND isnotnull(d_date_sk#13)) (45) Project [codegen id : 1] -Output [1]: [d_date_sk#14] -Input [3]: [d_date_sk#14, d_year#33, d_moy#34] +Output [1]: [d_date_sk#13] +Input [3]: [d_date_sk#13, d_year#26, d_moy#27] (46) BroadcastExchange -Input [1]: [d_date_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#35] +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt index e321ddc156860..34a1f715bda37 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt @@ -75,7 +75,7 @@ Condition : ((isnotnull(ss_item_sk#4) AND isnotnull(ss_customer_sk#5)) AND isnot (8) BroadcastExchange Input [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[4, int, true] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[4, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 6] Left keys [1]: [d_date_sk#1] @@ -87,136 +87,136 @@ Output [4]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7 Input [6]: [d_date_sk#1, ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8] (11) Scan parquet default.item -Output [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] +Output [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,8), IsNotNull(i_item_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] (13) Filter [codegen id : 2] -Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] -Condition : ((isnotnull(i_manager_id#15) AND (i_manager_id#15 = 8)) AND isnotnull(i_item_sk#10)) +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] +Condition : ((isnotnull(i_manager_id#14) AND (i_manager_id#14 = 8)) AND isnotnull(i_item_sk#9)) (14) Project [codegen id : 2] -Output [5]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] -Input [6]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, i_manager_id#15] +Output [5]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Input [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14] (15) BroadcastExchange -Input [5]: [i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] +Input [5]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_item_sk#4] -Right keys [1]: [i_item_sk#10] +Right keys [1]: [i_item_sk#9] Join condition: None (17) Project [codegen id : 6] -Output [7]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] -Input [9]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_item_sk#10, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] +Output [7]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Input [9]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] (18) Scan parquet default.customer -Output [2]: [c_customer_sk#17, c_current_addr_sk#18] +Output [2]: [c_customer_sk#15, c_current_addr_sk#16] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] -Input [2]: [c_customer_sk#17, c_current_addr_sk#18] +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] (20) Filter [codegen id : 3] -Input [2]: [c_customer_sk#17, c_current_addr_sk#18] -Condition : (isnotnull(c_customer_sk#17) AND isnotnull(c_current_addr_sk#18)) +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] +Condition : (isnotnull(c_customer_sk#15) AND isnotnull(c_current_addr_sk#16)) (21) BroadcastExchange -Input [2]: [c_customer_sk#17, c_current_addr_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (22) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#5] -Right keys [1]: [c_customer_sk#17] +Right keys [1]: [c_customer_sk#15] Join condition: None (23) Project [codegen id : 6] -Output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_current_addr_sk#18] -Input [9]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_customer_sk#17, c_current_addr_sk#18] +Output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16] +Input [9]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_customer_sk#15, c_current_addr_sk#16] (24) Scan parquet default.customer_address -Output [2]: [ca_address_sk#20, ca_zip#21] +Output [2]: [ca_address_sk#17, ca_zip#18] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_zip)] ReadSchema: struct (25) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#20, ca_zip#21] +Input [2]: [ca_address_sk#17, ca_zip#18] (26) Filter [codegen id : 4] -Input [2]: [ca_address_sk#20, ca_zip#21] -Condition : (isnotnull(ca_address_sk#20) AND isnotnull(ca_zip#21)) +Input [2]: [ca_address_sk#17, ca_zip#18] +Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_zip#18)) (27) BroadcastExchange -Input [2]: [ca_address_sk#20, ca_zip#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] +Input [2]: [ca_address_sk#17, ca_zip#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (28) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [c_current_addr_sk#18] -Right keys [1]: [ca_address_sk#20] +Left keys [1]: [c_current_addr_sk#16] +Right keys [1]: [ca_address_sk#17] Join condition: None (29) Project [codegen id : 6] -Output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, ca_zip#21] -Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, c_current_addr_sk#18, ca_address_sk#20, ca_zip#21] +Output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18] +Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16, ca_address_sk#17, ca_zip#18] (30) Scan parquet default.store -Output [2]: [s_store_sk#23, s_zip#24] +Output [2]: [s_store_sk#19, s_zip#20] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_zip), IsNotNull(s_store_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 5] -Input [2]: [s_store_sk#23, s_zip#24] +Input [2]: [s_store_sk#19, s_zip#20] (32) Filter [codegen id : 5] -Input [2]: [s_store_sk#23, s_zip#24] -Condition : (isnotnull(s_zip#24) AND isnotnull(s_store_sk#23)) +Input [2]: [s_store_sk#19, s_zip#20] +Condition : (isnotnull(s_zip#20) AND isnotnull(s_store_sk#19)) (33) BroadcastExchange -Input [2]: [s_store_sk#23, s_zip#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] +Input [2]: [s_store_sk#19, s_zip#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (34) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_store_sk#6] -Right keys [1]: [s_store_sk#23] -Join condition: NOT (substr(ca_zip#21, 1, 5) = substr(s_zip#24, 1, 5)) +Right keys [1]: [s_store_sk#19] +Join condition: NOT (substr(ca_zip#18, 1, 5) = substr(s_zip#20, 1, 5)) (35) Project [codegen id : 6] -Output [5]: [ss_ext_sales_price#7, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] -Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14, ca_zip#21, s_store_sk#23, s_zip#24] +Output [5]: [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18, s_store_sk#19, s_zip#20] (36) HashAggregate [codegen id : 6] -Input [5]: [ss_ext_sales_price#7, i_brand_id#11, i_brand#12, i_manufact_id#13, i_manufact#14] -Keys [4]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14] +Input [5]: [ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13] +Keys [4]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#7))] -Aggregate Attributes [1]: [sum#26] -Results [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] +Aggregate Attributes [1]: [sum#21] +Results [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#22] (37) Exchange -Input [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] -Arguments: hashpartitioning(i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#22] +Arguments: hashpartitioning(i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, 5), ENSURE_REQUIREMENTS, [plan_id=6] (38) HashAggregate [codegen id : 7] -Input [5]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14, sum#27] -Keys [4]: [i_brand#12, i_brand_id#11, i_manufact_id#13, i_manufact#14] +Input [5]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13, sum#22] +Keys [4]: [i_brand#11, i_brand_id#10, i_manufact_id#12, i_manufact#13] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#7))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#7))#29] -Results [5]: [i_brand_id#11 AS brand_id#30, i_brand#12 AS brand#31, i_manufact_id#13, i_manufact#14, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#29,17,2) AS ext_price#32] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#7))#23] +Results [5]: [i_brand_id#10 AS brand_id#24, i_brand#11 AS brand#25, i_manufact_id#12, i_manufact#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#7))#23,17,2) AS ext_price#26] (39) TakeOrderedAndProject -Input [5]: [brand_id#30, brand#31, i_manufact_id#13, i_manufact#14, ext_price#32] -Arguments: 100, [ext_price#32 DESC NULLS LAST, brand#31 ASC NULLS FIRST, brand_id#30 ASC NULLS FIRST, i_manufact_id#13 ASC NULLS FIRST, i_manufact#14 ASC NULLS FIRST], [brand_id#30, brand#31, i_manufact_id#13, i_manufact#14, ext_price#32] +Input [5]: [brand_id#24, brand#25, i_manufact_id#12, i_manufact#13, ext_price#26] +Arguments: 100, [ext_price#26 DESC NULLS LAST, brand#25 ASC NULLS FIRST, brand_id#24 ASC NULLS FIRST, i_manufact_id#12 ASC NULLS FIRST, i_manufact#13 ASC NULLS FIRST], [brand_id#24, brand#25, i_manufact_id#12, i_manufact#13, ext_price#26] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt index 8f188db553004..031dc924069e6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt @@ -84,7 +84,7 @@ Condition : (isnotnull(d_date_sk#9) AND isnotnull(d_week_seq#10)) (11) BroadcastExchange Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [sold_date_sk#3] @@ -99,110 +99,110 @@ Input [5]: [sold_date_sk#3, sales_price#4, d_date_sk#9, d_week_seq#10, d_day_nam Input [3]: [sales_price#4, d_week_seq#10, d_day_name#11] Keys [1]: [d_week_seq#10] Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] -Aggregate Attributes [7]: [sum#13, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19] -Results [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Aggregate Attributes [7]: [sum#12, sum#13, sum#14, sum#15, sum#16, sum#17, sum#18] +Results [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] (15) Exchange -Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] -Arguments: hashpartitioning(d_week_seq#10, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] +Arguments: hashpartitioning(d_week_seq#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] (16) HashAggregate [codegen id : 12] -Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Input [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] Keys [1]: [d_week_seq#10] Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#34] -Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#34,17,2) AS sat_sales#41] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32] +Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26,17,2) AS sun_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27,17,2) AS mon_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28,17,2) AS tue_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29,17,2) AS wed_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30,17,2) AS thu_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31,17,2) AS fri_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32,17,2) AS sat_sales#39] (17) Scan parquet default.date_dim -Output [2]: [d_week_seq#42, d_year#43] +Output [2]: [d_week_seq#40, d_year#41] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)] ReadSchema: struct (18) ColumnarToRow [codegen id : 5] -Input [2]: [d_week_seq#42, d_year#43] +Input [2]: [d_week_seq#40, d_year#41] (19) Filter [codegen id : 5] -Input [2]: [d_week_seq#42, d_year#43] -Condition : ((isnotnull(d_year#43) AND (d_year#43 = 2001)) AND isnotnull(d_week_seq#42)) +Input [2]: [d_week_seq#40, d_year#41] +Condition : ((isnotnull(d_year#41) AND (d_year#41 = 2001)) AND isnotnull(d_week_seq#40)) (20) Project [codegen id : 5] -Output [1]: [d_week_seq#42] -Input [2]: [d_week_seq#42, d_year#43] +Output [1]: [d_week_seq#40] +Input [2]: [d_week_seq#40, d_year#41] (21) BroadcastExchange -Input [1]: [d_week_seq#42] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#44] +Input [1]: [d_week_seq#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (22) BroadcastHashJoin [codegen id : 12] Left keys [1]: [d_week_seq#10] -Right keys [1]: [d_week_seq#42] +Right keys [1]: [d_week_seq#40] Join condition: None (23) Project [codegen id : 12] -Output [8]: [d_week_seq#10 AS d_week_seq1#45, sun_sales#35 AS sun_sales1#46, mon_sales#36 AS mon_sales1#47, tue_sales#37 AS tue_sales1#48, wed_sales#38 AS wed_sales1#49, thu_sales#39 AS thu_sales1#50, fri_sales#40 AS fri_sales1#51, sat_sales#41 AS sat_sales1#52] -Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#42] +Output [8]: [d_week_seq#10 AS d_week_seq1#42, sun_sales#33 AS sun_sales1#43, mon_sales#34 AS mon_sales1#44, tue_sales#35 AS tue_sales1#45, wed_sales#36 AS wed_sales1#46, thu_sales#37 AS thu_sales1#47, fri_sales#38 AS fri_sales1#48, sat_sales#39 AS sat_sales1#49] +Input [9]: [d_week_seq#10, sun_sales#33, mon_sales#34, tue_sales#35, wed_sales#36, thu_sales#37, fri_sales#38, sat_sales#39, d_week_seq#40] (24) ReusedExchange [Reuses operator id: 15] -Output [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] +Output [8]: [d_week_seq#10, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56] (25) HashAggregate [codegen id : 11] -Input [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] +Input [8]: [d_week_seq#10, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56] Keys [1]: [d_week_seq#10] Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#34] -Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#34,17,2) AS sat_sales#41] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32] +Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26,17,2) AS sun_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27,17,2) AS mon_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28,17,2) AS tue_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29,17,2) AS wed_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30,17,2) AS thu_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31,17,2) AS fri_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32,17,2) AS sat_sales#39] (26) Scan parquet default.date_dim -Output [2]: [d_week_seq#60, d_year#61] +Output [2]: [d_week_seq#57, d_year#58] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)] ReadSchema: struct (27) ColumnarToRow [codegen id : 10] -Input [2]: [d_week_seq#60, d_year#61] +Input [2]: [d_week_seq#57, d_year#58] (28) Filter [codegen id : 10] -Input [2]: [d_week_seq#60, d_year#61] -Condition : ((isnotnull(d_year#61) AND (d_year#61 = 2002)) AND isnotnull(d_week_seq#60)) +Input [2]: [d_week_seq#57, d_year#58] +Condition : ((isnotnull(d_year#58) AND (d_year#58 = 2002)) AND isnotnull(d_week_seq#57)) (29) Project [codegen id : 10] -Output [1]: [d_week_seq#60] -Input [2]: [d_week_seq#60, d_year#61] +Output [1]: [d_week_seq#57] +Input [2]: [d_week_seq#57, d_year#58] (30) BroadcastExchange -Input [1]: [d_week_seq#60] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#62] +Input [1]: [d_week_seq#57] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (31) BroadcastHashJoin [codegen id : 11] Left keys [1]: [d_week_seq#10] -Right keys [1]: [d_week_seq#60] +Right keys [1]: [d_week_seq#57] Join condition: None (32) Project [codegen id : 11] -Output [8]: [d_week_seq#10 AS d_week_seq2#63, sun_sales#35 AS sun_sales2#64, mon_sales#36 AS mon_sales2#65, tue_sales#37 AS tue_sales2#66, wed_sales#38 AS wed_sales2#67, thu_sales#39 AS thu_sales2#68, fri_sales#40 AS fri_sales2#69, sat_sales#41 AS sat_sales2#70] -Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#60] +Output [8]: [d_week_seq#10 AS d_week_seq2#59, sun_sales#33 AS sun_sales2#60, mon_sales#34 AS mon_sales2#61, tue_sales#35 AS tue_sales2#62, wed_sales#36 AS wed_sales2#63, thu_sales#37 AS thu_sales2#64, fri_sales#38 AS fri_sales2#65, sat_sales#39 AS sat_sales2#66] +Input [9]: [d_week_seq#10, sun_sales#33, mon_sales#34, tue_sales#35, wed_sales#36, thu_sales#37, fri_sales#38, sat_sales#39, d_week_seq#57] (33) BroadcastExchange -Input [8]: [d_week_seq2#63, sun_sales2#64, mon_sales2#65, tue_sales2#66, wed_sales2#67, thu_sales2#68, fri_sales2#69, sat_sales2#70] -Arguments: HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint)),false), [id=#71] +Input [8]: [d_week_seq2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66] +Arguments: HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint)),false), [plan_id=5] (34) BroadcastHashJoin [codegen id : 12] -Left keys [1]: [d_week_seq1#45] -Right keys [1]: [(d_week_seq2#63 - 53)] +Left keys [1]: [d_week_seq1#42] +Right keys [1]: [(d_week_seq2#59 - 53)] Join condition: None (35) Project [codegen id : 12] -Output [8]: [d_week_seq1#45, round(CheckOverflow((promote_precision(sun_sales1#46) / promote_precision(sun_sales2#64)), DecimalType(37,20)), 2) AS round((sun_sales1 / sun_sales2), 2)#72, round(CheckOverflow((promote_precision(mon_sales1#47) / promote_precision(mon_sales2#65)), DecimalType(37,20)), 2) AS round((mon_sales1 / mon_sales2), 2)#73, round(CheckOverflow((promote_precision(tue_sales1#48) / promote_precision(tue_sales2#66)), DecimalType(37,20)), 2) AS round((tue_sales1 / tue_sales2), 2)#74, round(CheckOverflow((promote_precision(wed_sales1#49) / promote_precision(wed_sales2#67)), DecimalType(37,20)), 2) AS round((wed_sales1 / wed_sales2), 2)#75, round(CheckOverflow((promote_precision(thu_sales1#50) / promote_precision(thu_sales2#68)), DecimalType(37,20)), 2) AS round((thu_sales1 / thu_sales2), 2)#76, round(CheckOverflow((promote_precision(fri_sales1#51) / promote_precision(fri_sales2#69)), DecimalType(37,20)), 2) AS round((fri_sales1 / fri_sales2), 2)#77, round(CheckOverflow((promote_precision(sat_sales1#52) / promote_precision(sat_sales2#70)), DecimalType(37,20)), 2) AS round((sat_sales1 / sat_sales2), 2)#78] -Input [16]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52, d_week_seq2#63, sun_sales2#64, mon_sales2#65, tue_sales2#66, wed_sales2#67, thu_sales2#68, fri_sales2#69, sat_sales2#70] +Output [8]: [d_week_seq1#42, round(CheckOverflow((promote_precision(sun_sales1#43) / promote_precision(sun_sales2#60)), DecimalType(37,20)), 2) AS round((sun_sales1 / sun_sales2), 2)#67, round(CheckOverflow((promote_precision(mon_sales1#44) / promote_precision(mon_sales2#61)), DecimalType(37,20)), 2) AS round((mon_sales1 / mon_sales2), 2)#68, round(CheckOverflow((promote_precision(tue_sales1#45) / promote_precision(tue_sales2#62)), DecimalType(37,20)), 2) AS round((tue_sales1 / tue_sales2), 2)#69, round(CheckOverflow((promote_precision(wed_sales1#46) / promote_precision(wed_sales2#63)), DecimalType(37,20)), 2) AS round((wed_sales1 / wed_sales2), 2)#70, round(CheckOverflow((promote_precision(thu_sales1#47) / promote_precision(thu_sales2#64)), DecimalType(37,20)), 2) AS round((thu_sales1 / thu_sales2), 2)#71, round(CheckOverflow((promote_precision(fri_sales1#48) / promote_precision(fri_sales2#65)), DecimalType(37,20)), 2) AS round((fri_sales1 / fri_sales2), 2)#72, round(CheckOverflow((promote_precision(sat_sales1#49) / promote_precision(sat_sales2#66)), DecimalType(37,20)), 2) AS round((sat_sales1 / sat_sales2), 2)#73] +Input [16]: [d_week_seq1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66] (36) Exchange -Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#72, round((mon_sales1 / mon_sales2), 2)#73, round((tue_sales1 / tue_sales2), 2)#74, round((wed_sales1 / wed_sales2), 2)#75, round((thu_sales1 / thu_sales2), 2)#76, round((fri_sales1 / fri_sales2), 2)#77, round((sat_sales1 / sat_sales2), 2)#78] -Arguments: rangepartitioning(d_week_seq1#45 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#79] +Input [8]: [d_week_seq1#42, round((sun_sales1 / sun_sales2), 2)#67, round((mon_sales1 / mon_sales2), 2)#68, round((tue_sales1 / tue_sales2), 2)#69, round((wed_sales1 / wed_sales2), 2)#70, round((thu_sales1 / thu_sales2), 2)#71, round((fri_sales1 / fri_sales2), 2)#72, round((sat_sales1 / sat_sales2), 2)#73] +Arguments: rangepartitioning(d_week_seq1#42 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=6] (37) Sort [codegen id : 13] -Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#72, round((mon_sales1 / mon_sales2), 2)#73, round((tue_sales1 / tue_sales2), 2)#74, round((wed_sales1 / wed_sales2), 2)#75, round((thu_sales1 / thu_sales2), 2)#76, round((fri_sales1 / fri_sales2), 2)#77, round((sat_sales1 / sat_sales2), 2)#78] -Arguments: [d_week_seq1#45 ASC NULLS FIRST], true, 0 +Input [8]: [d_week_seq1#42, round((sun_sales1 / sun_sales2), 2)#67, round((mon_sales1 / mon_sales2), 2)#68, round((tue_sales1 / tue_sales2), 2)#69, round((wed_sales1 / wed_sales2), 2)#70, round((thu_sales1 / thu_sales2), 2)#71, round((fri_sales1 / fri_sales2), 2)#72, round((sat_sales1 / sat_sales2), 2)#73] +Arguments: [d_week_seq1#42 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt index 8f188db553004..031dc924069e6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt @@ -84,7 +84,7 @@ Condition : (isnotnull(d_date_sk#9) AND isnotnull(d_week_seq#10)) (11) BroadcastExchange Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [sold_date_sk#3] @@ -99,110 +99,110 @@ Input [5]: [sold_date_sk#3, sales_price#4, d_date_sk#9, d_week_seq#10, d_day_nam Input [3]: [sales_price#4, d_week_seq#10, d_day_name#11] Keys [1]: [d_week_seq#10] Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] -Aggregate Attributes [7]: [sum#13, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19] -Results [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Aggregate Attributes [7]: [sum#12, sum#13, sum#14, sum#15, sum#16, sum#17, sum#18] +Results [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] (15) Exchange -Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] -Arguments: hashpartitioning(d_week_seq#10, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] +Arguments: hashpartitioning(d_week_seq#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] (16) HashAggregate [codegen id : 12] -Input [8]: [d_week_seq#10, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Input [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] Keys [1]: [d_week_seq#10] Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#34] -Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#34,17,2) AS sat_sales#41] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32] +Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26,17,2) AS sun_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27,17,2) AS mon_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28,17,2) AS tue_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29,17,2) AS wed_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30,17,2) AS thu_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31,17,2) AS fri_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32,17,2) AS sat_sales#39] (17) Scan parquet default.date_dim -Output [2]: [d_week_seq#42, d_year#43] +Output [2]: [d_week_seq#40, d_year#41] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)] ReadSchema: struct (18) ColumnarToRow [codegen id : 5] -Input [2]: [d_week_seq#42, d_year#43] +Input [2]: [d_week_seq#40, d_year#41] (19) Filter [codegen id : 5] -Input [2]: [d_week_seq#42, d_year#43] -Condition : ((isnotnull(d_year#43) AND (d_year#43 = 2001)) AND isnotnull(d_week_seq#42)) +Input [2]: [d_week_seq#40, d_year#41] +Condition : ((isnotnull(d_year#41) AND (d_year#41 = 2001)) AND isnotnull(d_week_seq#40)) (20) Project [codegen id : 5] -Output [1]: [d_week_seq#42] -Input [2]: [d_week_seq#42, d_year#43] +Output [1]: [d_week_seq#40] +Input [2]: [d_week_seq#40, d_year#41] (21) BroadcastExchange -Input [1]: [d_week_seq#42] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#44] +Input [1]: [d_week_seq#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (22) BroadcastHashJoin [codegen id : 12] Left keys [1]: [d_week_seq#10] -Right keys [1]: [d_week_seq#42] +Right keys [1]: [d_week_seq#40] Join condition: None (23) Project [codegen id : 12] -Output [8]: [d_week_seq#10 AS d_week_seq1#45, sun_sales#35 AS sun_sales1#46, mon_sales#36 AS mon_sales1#47, tue_sales#37 AS tue_sales1#48, wed_sales#38 AS wed_sales1#49, thu_sales#39 AS thu_sales1#50, fri_sales#40 AS fri_sales1#51, sat_sales#41 AS sat_sales1#52] -Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#42] +Output [8]: [d_week_seq#10 AS d_week_seq1#42, sun_sales#33 AS sun_sales1#43, mon_sales#34 AS mon_sales1#44, tue_sales#35 AS tue_sales1#45, wed_sales#36 AS wed_sales1#46, thu_sales#37 AS thu_sales1#47, fri_sales#38 AS fri_sales1#48, sat_sales#39 AS sat_sales1#49] +Input [9]: [d_week_seq#10, sun_sales#33, mon_sales#34, tue_sales#35, wed_sales#36, thu_sales#37, fri_sales#38, sat_sales#39, d_week_seq#40] (24) ReusedExchange [Reuses operator id: 15] -Output [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] +Output [8]: [d_week_seq#10, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56] (25) HashAggregate [codegen id : 11] -Input [8]: [d_week_seq#10, sum#53, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59] +Input [8]: [d_week_seq#10, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56] Keys [1]: [d_week_seq#10] Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#34] -Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#34,17,2) AS sat_sales#41] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32] +Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday ) THEN sales_price#4 END))#26,17,2) AS sun_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday ) THEN sales_price#4 END))#27,17,2) AS mon_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday ) THEN sales_price#4 END))#28,17,2) AS tue_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29,17,2) AS wed_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30,17,2) AS thu_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday ) THEN sales_price#4 END))#31,17,2) AS fri_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32,17,2) AS sat_sales#39] (26) Scan parquet default.date_dim -Output [2]: [d_week_seq#60, d_year#61] +Output [2]: [d_week_seq#57, d_year#58] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)] ReadSchema: struct (27) ColumnarToRow [codegen id : 10] -Input [2]: [d_week_seq#60, d_year#61] +Input [2]: [d_week_seq#57, d_year#58] (28) Filter [codegen id : 10] -Input [2]: [d_week_seq#60, d_year#61] -Condition : ((isnotnull(d_year#61) AND (d_year#61 = 2002)) AND isnotnull(d_week_seq#60)) +Input [2]: [d_week_seq#57, d_year#58] +Condition : ((isnotnull(d_year#58) AND (d_year#58 = 2002)) AND isnotnull(d_week_seq#57)) (29) Project [codegen id : 10] -Output [1]: [d_week_seq#60] -Input [2]: [d_week_seq#60, d_year#61] +Output [1]: [d_week_seq#57] +Input [2]: [d_week_seq#57, d_year#58] (30) BroadcastExchange -Input [1]: [d_week_seq#60] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#62] +Input [1]: [d_week_seq#57] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (31) BroadcastHashJoin [codegen id : 11] Left keys [1]: [d_week_seq#10] -Right keys [1]: [d_week_seq#60] +Right keys [1]: [d_week_seq#57] Join condition: None (32) Project [codegen id : 11] -Output [8]: [d_week_seq#10 AS d_week_seq2#63, sun_sales#35 AS sun_sales2#64, mon_sales#36 AS mon_sales2#65, tue_sales#37 AS tue_sales2#66, wed_sales#38 AS wed_sales2#67, thu_sales#39 AS thu_sales2#68, fri_sales#40 AS fri_sales2#69, sat_sales#41 AS sat_sales2#70] -Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#60] +Output [8]: [d_week_seq#10 AS d_week_seq2#59, sun_sales#33 AS sun_sales2#60, mon_sales#34 AS mon_sales2#61, tue_sales#35 AS tue_sales2#62, wed_sales#36 AS wed_sales2#63, thu_sales#37 AS thu_sales2#64, fri_sales#38 AS fri_sales2#65, sat_sales#39 AS sat_sales2#66] +Input [9]: [d_week_seq#10, sun_sales#33, mon_sales#34, tue_sales#35, wed_sales#36, thu_sales#37, fri_sales#38, sat_sales#39, d_week_seq#57] (33) BroadcastExchange -Input [8]: [d_week_seq2#63, sun_sales2#64, mon_sales2#65, tue_sales2#66, wed_sales2#67, thu_sales2#68, fri_sales2#69, sat_sales2#70] -Arguments: HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint)),false), [id=#71] +Input [8]: [d_week_seq2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66] +Arguments: HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint)),false), [plan_id=5] (34) BroadcastHashJoin [codegen id : 12] -Left keys [1]: [d_week_seq1#45] -Right keys [1]: [(d_week_seq2#63 - 53)] +Left keys [1]: [d_week_seq1#42] +Right keys [1]: [(d_week_seq2#59 - 53)] Join condition: None (35) Project [codegen id : 12] -Output [8]: [d_week_seq1#45, round(CheckOverflow((promote_precision(sun_sales1#46) / promote_precision(sun_sales2#64)), DecimalType(37,20)), 2) AS round((sun_sales1 / sun_sales2), 2)#72, round(CheckOverflow((promote_precision(mon_sales1#47) / promote_precision(mon_sales2#65)), DecimalType(37,20)), 2) AS round((mon_sales1 / mon_sales2), 2)#73, round(CheckOverflow((promote_precision(tue_sales1#48) / promote_precision(tue_sales2#66)), DecimalType(37,20)), 2) AS round((tue_sales1 / tue_sales2), 2)#74, round(CheckOverflow((promote_precision(wed_sales1#49) / promote_precision(wed_sales2#67)), DecimalType(37,20)), 2) AS round((wed_sales1 / wed_sales2), 2)#75, round(CheckOverflow((promote_precision(thu_sales1#50) / promote_precision(thu_sales2#68)), DecimalType(37,20)), 2) AS round((thu_sales1 / thu_sales2), 2)#76, round(CheckOverflow((promote_precision(fri_sales1#51) / promote_precision(fri_sales2#69)), DecimalType(37,20)), 2) AS round((fri_sales1 / fri_sales2), 2)#77, round(CheckOverflow((promote_precision(sat_sales1#52) / promote_precision(sat_sales2#70)), DecimalType(37,20)), 2) AS round((sat_sales1 / sat_sales2), 2)#78] -Input [16]: [d_week_seq1#45, sun_sales1#46, mon_sales1#47, tue_sales1#48, wed_sales1#49, thu_sales1#50, fri_sales1#51, sat_sales1#52, d_week_seq2#63, sun_sales2#64, mon_sales2#65, tue_sales2#66, wed_sales2#67, thu_sales2#68, fri_sales2#69, sat_sales2#70] +Output [8]: [d_week_seq1#42, round(CheckOverflow((promote_precision(sun_sales1#43) / promote_precision(sun_sales2#60)), DecimalType(37,20)), 2) AS round((sun_sales1 / sun_sales2), 2)#67, round(CheckOverflow((promote_precision(mon_sales1#44) / promote_precision(mon_sales2#61)), DecimalType(37,20)), 2) AS round((mon_sales1 / mon_sales2), 2)#68, round(CheckOverflow((promote_precision(tue_sales1#45) / promote_precision(tue_sales2#62)), DecimalType(37,20)), 2) AS round((tue_sales1 / tue_sales2), 2)#69, round(CheckOverflow((promote_precision(wed_sales1#46) / promote_precision(wed_sales2#63)), DecimalType(37,20)), 2) AS round((wed_sales1 / wed_sales2), 2)#70, round(CheckOverflow((promote_precision(thu_sales1#47) / promote_precision(thu_sales2#64)), DecimalType(37,20)), 2) AS round((thu_sales1 / thu_sales2), 2)#71, round(CheckOverflow((promote_precision(fri_sales1#48) / promote_precision(fri_sales2#65)), DecimalType(37,20)), 2) AS round((fri_sales1 / fri_sales2), 2)#72, round(CheckOverflow((promote_precision(sat_sales1#49) / promote_precision(sat_sales2#66)), DecimalType(37,20)), 2) AS round((sat_sales1 / sat_sales2), 2)#73] +Input [16]: [d_week_seq1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66] (36) Exchange -Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#72, round((mon_sales1 / mon_sales2), 2)#73, round((tue_sales1 / tue_sales2), 2)#74, round((wed_sales1 / wed_sales2), 2)#75, round((thu_sales1 / thu_sales2), 2)#76, round((fri_sales1 / fri_sales2), 2)#77, round((sat_sales1 / sat_sales2), 2)#78] -Arguments: rangepartitioning(d_week_seq1#45 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#79] +Input [8]: [d_week_seq1#42, round((sun_sales1 / sun_sales2), 2)#67, round((mon_sales1 / mon_sales2), 2)#68, round((tue_sales1 / tue_sales2), 2)#69, round((wed_sales1 / wed_sales2), 2)#70, round((thu_sales1 / thu_sales2), 2)#71, round((fri_sales1 / fri_sales2), 2)#72, round((sat_sales1 / sat_sales2), 2)#73] +Arguments: rangepartitioning(d_week_seq1#42 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=6] (37) Sort [codegen id : 13] -Input [8]: [d_week_seq1#45, round((sun_sales1 / sun_sales2), 2)#72, round((mon_sales1 / mon_sales2), 2)#73, round((tue_sales1 / tue_sales2), 2)#74, round((wed_sales1 / wed_sales2), 2)#75, round((thu_sales1 / thu_sales2), 2)#76, round((fri_sales1 / fri_sales2), 2)#77, round((sat_sales1 / sat_sales2), 2)#78] -Arguments: [d_week_seq1#45 ASC NULLS FIRST], true, 0 +Input [8]: [d_week_seq1#42, round((sun_sales1 / sun_sales2), 2)#67, round((mon_sales1 / mon_sales2), 2)#68, round((tue_sales1 / tue_sales2), 2)#69, round((wed_sales1 / wed_sales2), 2)#70, round((thu_sales1 / thu_sales2), 2)#71, round((fri_sales1 / fri_sales2), 2)#72, round((sat_sales1 / sat_sales2), 2)#73] +Arguments: [d_week_seq1#42 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/explain.txt index 09e4cd2a57054..99d27cdf9cb6f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/explain.txt @@ -41,92 +41,92 @@ Condition : isnotnull(cs_item_sk#1) (4) Exchange Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] -Arguments: hashpartitioning(cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#5] +Arguments: hashpartitioning(cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] Arguments: [cs_item_sk#1 ASC NULLS FIRST], false, 0 (6) Scan parquet default.item -Output [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (8) Filter [codegen id : 3] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Condition : (i_category#11 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#6)) +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Condition : (i_category#10 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#5)) (9) Exchange -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Arguments: hashpartitioning(i_item_sk#6, 5), ENSURE_REQUIREMENTS, [id=#12] +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Arguments: hashpartitioning(i_item_sk#5, 5), ENSURE_REQUIREMENTS, [plan_id=2] (10) Sort [codegen id : 4] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Arguments: [i_item_sk#5 ASC NULLS FIRST], false, 0 (11) SortMergeJoin [codegen id : 6] Left keys [1]: [cs_item_sk#1] -Right keys [1]: [i_item_sk#6] +Right keys [1]: [i_item_sk#5] Join condition: None (12) Project [codegen id : 6] -Output [7]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Input [9]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Output [7]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Input [9]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (13) ReusedExchange [Reuses operator id: 28] -Output [1]: [d_date_sk#13] +Output [1]: [d_date_sk#11] (14) BroadcastHashJoin [codegen id : 6] Left keys [1]: [cs_sold_date_sk#3] -Right keys [1]: [d_date_sk#13] +Right keys [1]: [d_date_sk#11] Join condition: None (15) Project [codegen id : 6] -Output [6]: [cs_ext_sales_price#2, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Input [8]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11, d_date_sk#13] +Output [6]: [cs_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Input [8]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#11] (16) HashAggregate [codegen id : 6] -Input [6]: [cs_ext_sales_price#2, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Keys [5]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9] +Input [6]: [cs_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#14] -Results [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] (17) Exchange -Input [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] -Arguments: hashpartitioning(i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) HashAggregate [codegen id : 7] -Input [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] -Keys [5]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#2))#17] -Results [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#17,17,2) AS _w1#20, i_item_id#7] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#2))#14] +Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w1#17, i_item_id#6] (19) Exchange -Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, i_item_id#7] -Arguments: hashpartitioning(i_class#10, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] +Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=4] (20) Sort [codegen id : 8] -Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, i_item_id#7] -Arguments: [i_class#10 ASC NULLS FIRST], false, 0 +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] +Arguments: [i_class#9 ASC NULLS FIRST], false, 0 (21) Window -Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, i_item_id#7] -Arguments: [sum(_w1#20) windowspecdefinition(i_class#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#10] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] +Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9] (22) Project [codegen id : 9] -Output [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17)) AS revenueratio#23, i_item_id#7] -Input [9]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, i_item_id#7, _we0#22] +Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19, i_item_id#6] +Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6, _we0#18] (23) TakeOrderedAndProject -Input [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, revenueratio#23, i_item_id#7] -Arguments: 100, [i_category#11 ASC NULLS FIRST, i_class#10 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#8 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, revenueratio#23] +Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6] +Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] ===== Subqueries ===== @@ -139,25 +139,25 @@ BroadcastExchange (28) (24) Scan parquet default.date_dim -Output [2]: [d_date_sk#13, d_date#24] +Output [2]: [d_date_sk#11, d_date#20] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#13, d_date#24] +Input [2]: [d_date_sk#11, d_date#20] (26) Filter [codegen id : 1] -Input [2]: [d_date_sk#13, d_date#24] -Condition : (((isnotnull(d_date#24) AND (d_date#24 >= 1999-02-22)) AND (d_date#24 <= 1999-03-24)) AND isnotnull(d_date_sk#13)) +Input [2]: [d_date_sk#11, d_date#20] +Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11)) (27) Project [codegen id : 1] -Output [1]: [d_date_sk#13] -Input [2]: [d_date_sk#13, d_date#24] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#20] (28) BroadcastExchange -Input [1]: [d_date_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt index 8b9d47316f293..775fba161d8f6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt @@ -52,7 +52,7 @@ Condition : (i_category#10 IN (Sports (7) BroadcastExchange Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_item_sk#1] @@ -64,54 +64,54 @@ Output [7]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#6, i_item_desc#7 Input [9]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (10) ReusedExchange [Reuses operator id: 25] -Output [1]: [d_date_sk#12] +Output [1]: [d_date_sk#11] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_sold_date_sk#3] -Right keys [1]: [d_date_sk#12] +Right keys [1]: [d_date_sk#11] Join condition: None (12) Project [codegen id : 3] Output [6]: [cs_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] -Input [8]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#12] +Input [8]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#11] (13) HashAggregate [codegen id : 3] Input [6]: [cs_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#13] -Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] (14) Exchange -Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] -Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 4] -Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#2))#16] -Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#16,17,2) AS itemrevenue#17, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#16,17,2) AS _w0#18, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#16,17,2) AS _w1#19, i_item_id#6] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#2))#14] +Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w1#17, i_item_id#6] (16) Exchange -Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6] -Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] +Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (17) Sort [codegen id : 5] -Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] Arguments: [i_class#9 ASC NULLS FIRST], false, 0 (18) Window -Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6] -Arguments: [sum(_w1#19) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#21], [i_class#9] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] +Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9] (19) Project [codegen id : 6] -Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17)) AS revenueratio#22, i_item_id#6] -Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6, _we0#21] +Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19, i_item_id#6] +Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6, _we0#18] (20) TakeOrderedAndProject -Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22, i_item_id#6] -Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#22 ASC NULLS FIRST], [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22] +Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6] +Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] ===== Subqueries ===== @@ -124,25 +124,25 @@ BroadcastExchange (25) (21) Scan parquet default.date_dim -Output [2]: [d_date_sk#12, d_date#23] +Output [2]: [d_date_sk#11, d_date#20] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] ReadSchema: struct (22) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#12, d_date#23] +Input [2]: [d_date_sk#11, d_date#20] (23) Filter [codegen id : 1] -Input [2]: [d_date_sk#12, d_date#23] -Condition : (((isnotnull(d_date#23) AND (d_date#23 >= 1999-02-22)) AND (d_date#23 <= 1999-03-24)) AND isnotnull(d_date_sk#12)) +Input [2]: [d_date_sk#11, d_date#20] +Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11)) (24) Project [codegen id : 1] -Output [1]: [d_date_sk#12] -Input [2]: [d_date_sk#12, d_date#23] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#20] (25) BroadcastExchange -Input [1]: [d_date_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt index 96e1bacaa55ae..abc5b2a95fa69 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt @@ -60,7 +60,7 @@ Input [3]: [i_item_sk#6, i_item_id#7, i_current_price#8] (8) BroadcastExchange Input [2]: [i_item_sk#6, i_item_id#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_item_sk#1] @@ -72,69 +72,69 @@ Output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_i Input [6]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6, i_item_id#7] (11) ReusedExchange [Reuses operator id: 28] -Output [2]: [d_date_sk#10, d_date#11] +Output [2]: [d_date_sk#9, d_date#10] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_date_sk#4] -Right keys [1]: [d_date_sk#10] +Right keys [1]: [d_date_sk#9] Join condition: None (13) Project [codegen id : 4] -Output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_id#7, d_date#11] -Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_id#7, d_date_sk#10, d_date#11] +Output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_id#7, d_date#10] +Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_id#7, d_date_sk#9, d_date#10] (14) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#12, w_warehouse_name#13] +Output [2]: [w_warehouse_sk#11, w_warehouse_name#12] Batched: true Location [not included in comparison]/{warehouse_dir}/warehouse] PushedFilters: [IsNotNull(w_warehouse_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [2]: [w_warehouse_sk#12, w_warehouse_name#13] +Input [2]: [w_warehouse_sk#11, w_warehouse_name#12] (16) Filter [codegen id : 3] -Input [2]: [w_warehouse_sk#12, w_warehouse_name#13] -Condition : isnotnull(w_warehouse_sk#12) +Input [2]: [w_warehouse_sk#11, w_warehouse_name#12] +Condition : isnotnull(w_warehouse_sk#11) (17) BroadcastExchange -Input [2]: [w_warehouse_sk#12, w_warehouse_name#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Input [2]: [w_warehouse_sk#11, w_warehouse_name#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_warehouse_sk#2] -Right keys [1]: [w_warehouse_sk#12] +Right keys [1]: [w_warehouse_sk#11] Join condition: None (19) Project [codegen id : 4] -Output [4]: [inv_quantity_on_hand#3, w_warehouse_name#13, i_item_id#7, d_date#11] -Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_id#7, d_date#11, w_warehouse_sk#12, w_warehouse_name#13] +Output [4]: [inv_quantity_on_hand#3, w_warehouse_name#12, i_item_id#7, d_date#10] +Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_id#7, d_date#10, w_warehouse_sk#11, w_warehouse_name#12] (20) HashAggregate [codegen id : 4] -Input [4]: [inv_quantity_on_hand#3, w_warehouse_name#13, i_item_id#7, d_date#11] -Keys [2]: [w_warehouse_name#13, i_item_id#7] -Functions [2]: [partial_sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), partial_sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] -Aggregate Attributes [2]: [sum#15, sum#16] -Results [4]: [w_warehouse_name#13, i_item_id#7, sum#17, sum#18] +Input [4]: [inv_quantity_on_hand#3, w_warehouse_name#12, i_item_id#7, d_date#10] +Keys [2]: [w_warehouse_name#12, i_item_id#7] +Functions [2]: [partial_sum(CASE WHEN (d_date#10 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), partial_sum(CASE WHEN (d_date#10 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] +Aggregate Attributes [2]: [sum#13, sum#14] +Results [4]: [w_warehouse_name#12, i_item_id#7, sum#15, sum#16] (21) Exchange -Input [4]: [w_warehouse_name#13, i_item_id#7, sum#17, sum#18] -Arguments: hashpartitioning(w_warehouse_name#13, i_item_id#7, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [4]: [w_warehouse_name#12, i_item_id#7, sum#15, sum#16] +Arguments: hashpartitioning(w_warehouse_name#12, i_item_id#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 5] -Input [4]: [w_warehouse_name#13, i_item_id#7, sum#17, sum#18] -Keys [2]: [w_warehouse_name#13, i_item_id#7] -Functions [2]: [sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] -Aggregate Attributes [2]: [sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#20, sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#21] -Results [4]: [w_warehouse_name#13, i_item_id#7, sum(CASE WHEN (d_date#11 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#20 AS inv_before#22, sum(CASE WHEN (d_date#11 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#21 AS inv_after#23] +Input [4]: [w_warehouse_name#12, i_item_id#7, sum#15, sum#16] +Keys [2]: [w_warehouse_name#12, i_item_id#7] +Functions [2]: [sum(CASE WHEN (d_date#10 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), sum(CASE WHEN (d_date#10 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] +Aggregate Attributes [2]: [sum(CASE WHEN (d_date#10 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#17, sum(CASE WHEN (d_date#10 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#18] +Results [4]: [w_warehouse_name#12, i_item_id#7, sum(CASE WHEN (d_date#10 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#17 AS inv_before#19, sum(CASE WHEN (d_date#10 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#18 AS inv_after#20] (23) Filter [codegen id : 5] -Input [4]: [w_warehouse_name#13, i_item_id#7, inv_before#22, inv_after#23] -Condition : ((isnotnull(inv_before#22) AND isnotnull(inv_after#23)) AND (((inv_before#22 > 0) AND ((cast(inv_after#23 as double) / cast(inv_before#22 as double)) >= 0.666667)) AND ((cast(inv_after#23 as double) / cast(inv_before#22 as double)) <= 1.5))) +Input [4]: [w_warehouse_name#12, i_item_id#7, inv_before#19, inv_after#20] +Condition : ((isnotnull(inv_before#19) AND isnotnull(inv_after#20)) AND (((inv_before#19 > 0) AND ((cast(inv_after#20 as double) / cast(inv_before#19 as double)) >= 0.666667)) AND ((cast(inv_after#20 as double) / cast(inv_before#19 as double)) <= 1.5))) (24) TakeOrderedAndProject -Input [4]: [w_warehouse_name#13, i_item_id#7, inv_before#22, inv_after#23] -Arguments: 100, [w_warehouse_name#13 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST], [w_warehouse_name#13, i_item_id#7, inv_before#22, inv_after#23] +Input [4]: [w_warehouse_name#12, i_item_id#7, inv_before#19, inv_after#20] +Arguments: 100, [w_warehouse_name#12 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST], [w_warehouse_name#12, i_item_id#7, inv_before#19, inv_after#20] ===== Subqueries ===== @@ -146,21 +146,21 @@ BroadcastExchange (28) (25) Scan parquet default.date_dim -Output [2]: [d_date_sk#10, d_date#11] +Output [2]: [d_date_sk#9, d_date#10] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#10, d_date#11] +Input [2]: [d_date_sk#9, d_date#10] (27) Filter [codegen id : 1] -Input [2]: [d_date_sk#10, d_date#11] -Condition : (((isnotnull(d_date#11) AND (d_date#11 >= 2000-02-10)) AND (d_date#11 <= 2000-04-10)) AND isnotnull(d_date_sk#10)) +Input [2]: [d_date_sk#9, d_date#10] +Condition : (((isnotnull(d_date#10) AND (d_date#10 >= 2000-02-10)) AND (d_date#10 <= 2000-04-10)) AND isnotnull(d_date_sk#9)) (28) BroadcastExchange -Input [2]: [d_date_sk#10, d_date#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#24] +Input [2]: [d_date_sk#9, d_date#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt index 01bba5ee5f966..491d823d17a8a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt @@ -56,7 +56,7 @@ Condition : isnotnull(w_warehouse_sk#6) (7) BroadcastExchange Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_warehouse_sk#2] @@ -68,73 +68,73 @@ Output [4]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_n Input [6]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_sk#6, w_warehouse_name#7] (10) Scan parquet default.item -Output [3]: [i_item_sk#9, i_item_id#10, i_current_price#11] +Output [3]: [i_item_sk#8, i_item_id#9, i_current_price#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [3]: [i_item_sk#9, i_item_id#10, i_current_price#11] +Input [3]: [i_item_sk#8, i_item_id#9, i_current_price#10] (12) Filter [codegen id : 2] -Input [3]: [i_item_sk#9, i_item_id#10, i_current_price#11] -Condition : (((isnotnull(i_current_price#11) AND (i_current_price#11 >= 0.99)) AND (i_current_price#11 <= 1.49)) AND isnotnull(i_item_sk#9)) +Input [3]: [i_item_sk#8, i_item_id#9, i_current_price#10] +Condition : (((isnotnull(i_current_price#10) AND (i_current_price#10 >= 0.99)) AND (i_current_price#10 <= 1.49)) AND isnotnull(i_item_sk#8)) (13) Project [codegen id : 2] -Output [2]: [i_item_sk#9, i_item_id#10] -Input [3]: [i_item_sk#9, i_item_id#10, i_current_price#11] +Output [2]: [i_item_sk#8, i_item_id#9] +Input [3]: [i_item_sk#8, i_item_id#9, i_current_price#10] (14) BroadcastExchange -Input [2]: [i_item_sk#9, i_item_id#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +Input [2]: [i_item_sk#8, i_item_id#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_item_sk#1] -Right keys [1]: [i_item_sk#9] +Right keys [1]: [i_item_sk#8] Join condition: None (16) Project [codegen id : 4] -Output [4]: [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#7, i_item_id#10] -Input [6]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#7, i_item_sk#9, i_item_id#10] +Output [4]: [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#7, i_item_id#9] +Input [6]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#7, i_item_sk#8, i_item_id#9] (17) ReusedExchange [Reuses operator id: 28] -Output [2]: [d_date_sk#13, d_date#14] +Output [2]: [d_date_sk#11, d_date#12] (18) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_date_sk#4] -Right keys [1]: [d_date_sk#13] +Right keys [1]: [d_date_sk#11] Join condition: None (19) Project [codegen id : 4] -Output [4]: [inv_quantity_on_hand#3, w_warehouse_name#7, i_item_id#10, d_date#14] -Input [6]: [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#7, i_item_id#10, d_date_sk#13, d_date#14] +Output [4]: [inv_quantity_on_hand#3, w_warehouse_name#7, i_item_id#9, d_date#12] +Input [6]: [inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#7, i_item_id#9, d_date_sk#11, d_date#12] (20) HashAggregate [codegen id : 4] -Input [4]: [inv_quantity_on_hand#3, w_warehouse_name#7, i_item_id#10, d_date#14] -Keys [2]: [w_warehouse_name#7, i_item_id#10] -Functions [2]: [partial_sum(CASE WHEN (d_date#14 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), partial_sum(CASE WHEN (d_date#14 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] -Aggregate Attributes [2]: [sum#15, sum#16] -Results [4]: [w_warehouse_name#7, i_item_id#10, sum#17, sum#18] +Input [4]: [inv_quantity_on_hand#3, w_warehouse_name#7, i_item_id#9, d_date#12] +Keys [2]: [w_warehouse_name#7, i_item_id#9] +Functions [2]: [partial_sum(CASE WHEN (d_date#12 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), partial_sum(CASE WHEN (d_date#12 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] +Aggregate Attributes [2]: [sum#13, sum#14] +Results [4]: [w_warehouse_name#7, i_item_id#9, sum#15, sum#16] (21) Exchange -Input [4]: [w_warehouse_name#7, i_item_id#10, sum#17, sum#18] -Arguments: hashpartitioning(w_warehouse_name#7, i_item_id#10, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [4]: [w_warehouse_name#7, i_item_id#9, sum#15, sum#16] +Arguments: hashpartitioning(w_warehouse_name#7, i_item_id#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 5] -Input [4]: [w_warehouse_name#7, i_item_id#10, sum#17, sum#18] -Keys [2]: [w_warehouse_name#7, i_item_id#10] -Functions [2]: [sum(CASE WHEN (d_date#14 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), sum(CASE WHEN (d_date#14 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] -Aggregate Attributes [2]: [sum(CASE WHEN (d_date#14 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#20, sum(CASE WHEN (d_date#14 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#21] -Results [4]: [w_warehouse_name#7, i_item_id#10, sum(CASE WHEN (d_date#14 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#20 AS inv_before#22, sum(CASE WHEN (d_date#14 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#21 AS inv_after#23] +Input [4]: [w_warehouse_name#7, i_item_id#9, sum#15, sum#16] +Keys [2]: [w_warehouse_name#7, i_item_id#9] +Functions [2]: [sum(CASE WHEN (d_date#12 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END), sum(CASE WHEN (d_date#12 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)] +Aggregate Attributes [2]: [sum(CASE WHEN (d_date#12 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#17, sum(CASE WHEN (d_date#12 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#18] +Results [4]: [w_warehouse_name#7, i_item_id#9, sum(CASE WHEN (d_date#12 < 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#17 AS inv_before#19, sum(CASE WHEN (d_date#12 >= 2000-03-11) THEN inv_quantity_on_hand#3 ELSE 0 END)#18 AS inv_after#20] (23) Filter [codegen id : 5] -Input [4]: [w_warehouse_name#7, i_item_id#10, inv_before#22, inv_after#23] -Condition : ((isnotnull(inv_before#22) AND isnotnull(inv_after#23)) AND (((inv_before#22 > 0) AND ((cast(inv_after#23 as double) / cast(inv_before#22 as double)) >= 0.666667)) AND ((cast(inv_after#23 as double) / cast(inv_before#22 as double)) <= 1.5))) +Input [4]: [w_warehouse_name#7, i_item_id#9, inv_before#19, inv_after#20] +Condition : ((isnotnull(inv_before#19) AND isnotnull(inv_after#20)) AND (((inv_before#19 > 0) AND ((cast(inv_after#20 as double) / cast(inv_before#19 as double)) >= 0.666667)) AND ((cast(inv_after#20 as double) / cast(inv_before#19 as double)) <= 1.5))) (24) TakeOrderedAndProject -Input [4]: [w_warehouse_name#7, i_item_id#10, inv_before#22, inv_after#23] -Arguments: 100, [w_warehouse_name#7 ASC NULLS FIRST, i_item_id#10 ASC NULLS FIRST], [w_warehouse_name#7, i_item_id#10, inv_before#22, inv_after#23] +Input [4]: [w_warehouse_name#7, i_item_id#9, inv_before#19, inv_after#20] +Arguments: 100, [w_warehouse_name#7 ASC NULLS FIRST, i_item_id#9 ASC NULLS FIRST], [w_warehouse_name#7, i_item_id#9, inv_before#19, inv_after#20] ===== Subqueries ===== @@ -146,21 +146,21 @@ BroadcastExchange (28) (25) Scan parquet default.date_dim -Output [2]: [d_date_sk#13, d_date#14] +Output [2]: [d_date_sk#11, d_date#12] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#13, d_date#14] +Input [2]: [d_date_sk#11, d_date#12] (27) Filter [codegen id : 1] -Input [2]: [d_date_sk#13, d_date#14] -Condition : (((isnotnull(d_date#14) AND (d_date#14 >= 2000-02-10)) AND (d_date#14 <= 2000-04-10)) AND isnotnull(d_date_sk#13)) +Input [2]: [d_date_sk#11, d_date#12] +Condition : (((isnotnull(d_date#12) AND (d_date#12 >= 2000-02-10)) AND (d_date#12 <= 2000-04-10)) AND isnotnull(d_date_sk#11)) (28) BroadcastExchange -Input [2]: [d_date_sk#13, d_date#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#24] +Input [2]: [d_date_sk#11, d_date#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/explain.txt index 4258a9c1a71c1..e8f1913db5c5d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/explain.txt @@ -58,7 +58,7 @@ Condition : isnotnull(w_warehouse_sk#6) (7) BroadcastExchange Input [1]: [w_warehouse_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 3] Left keys [1]: [inv_warehouse_sk#2] @@ -70,81 +70,81 @@ Output [3]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4] Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_sk#6] (10) ReusedExchange [Reuses operator id: 31] -Output [1]: [d_date_sk#8] +Output [1]: [d_date_sk#7] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [inv_date_sk#4] -Right keys [1]: [d_date_sk#8] +Right keys [1]: [d_date_sk#7] Join condition: None (12) Project [codegen id : 3] Output [2]: [inv_item_sk#1, inv_quantity_on_hand#3] -Input [4]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, d_date_sk#8] +Input [4]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, d_date_sk#7] (13) Exchange Input [2]: [inv_item_sk#1, inv_quantity_on_hand#3] -Arguments: hashpartitioning(inv_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(inv_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (14) Sort [codegen id : 4] Input [2]: [inv_item_sk#1, inv_quantity_on_hand#3] Arguments: [inv_item_sk#1 ASC NULLS FIRST], false, 0 (15) Scan parquet default.item -Output [5]: [i_item_sk#10, i_brand#11, i_class#12, i_category#13, i_product_name#14] +Output [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (16) ColumnarToRow [codegen id : 5] -Input [5]: [i_item_sk#10, i_brand#11, i_class#12, i_category#13, i_product_name#14] +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] (17) Filter [codegen id : 5] -Input [5]: [i_item_sk#10, i_brand#11, i_class#12, i_category#13, i_product_name#14] -Condition : isnotnull(i_item_sk#10) +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Condition : isnotnull(i_item_sk#8) (18) Exchange -Input [5]: [i_item_sk#10, i_brand#11, i_class#12, i_category#13, i_product_name#14] -Arguments: hashpartitioning(i_item_sk#10, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Arguments: hashpartitioning(i_item_sk#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) Sort [codegen id : 6] -Input [5]: [i_item_sk#10, i_brand#11, i_class#12, i_category#13, i_product_name#14] -Arguments: [i_item_sk#10 ASC NULLS FIRST], false, 0 +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Arguments: [i_item_sk#8 ASC NULLS FIRST], false, 0 (20) SortMergeJoin [codegen id : 7] Left keys [1]: [inv_item_sk#1] -Right keys [1]: [i_item_sk#10] +Right keys [1]: [i_item_sk#8] Join condition: None (21) Project [codegen id : 7] -Output [5]: [inv_quantity_on_hand#3, i_product_name#14, i_brand#11, i_class#12, i_category#13] -Input [7]: [inv_item_sk#1, inv_quantity_on_hand#3, i_item_sk#10, i_brand#11, i_class#12, i_category#13, i_product_name#14] +Output [5]: [inv_quantity_on_hand#3, i_product_name#12, i_brand#9, i_class#10, i_category#11] +Input [7]: [inv_item_sk#1, inv_quantity_on_hand#3, i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] (22) Expand [codegen id : 7] -Input [5]: [inv_quantity_on_hand#3, i_product_name#14, i_brand#11, i_class#12, i_category#13] -Arguments: [[inv_quantity_on_hand#3, i_product_name#14, i_brand#11, i_class#12, i_category#13, 0], [inv_quantity_on_hand#3, i_product_name#14, i_brand#11, i_class#12, null, 1], [inv_quantity_on_hand#3, i_product_name#14, i_brand#11, null, null, 3], [inv_quantity_on_hand#3, i_product_name#14, null, null, null, 7], [inv_quantity_on_hand#3, null, null, null, null, 15]], [inv_quantity_on_hand#3, i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20] +Input [5]: [inv_quantity_on_hand#3, i_product_name#12, i_brand#9, i_class#10, i_category#11] +Arguments: [[inv_quantity_on_hand#3, i_product_name#12, i_brand#9, i_class#10, i_category#11, 0], [inv_quantity_on_hand#3, i_product_name#12, i_brand#9, i_class#10, null, 1], [inv_quantity_on_hand#3, i_product_name#12, i_brand#9, null, null, 3], [inv_quantity_on_hand#3, i_product_name#12, null, null, null, 7], [inv_quantity_on_hand#3, null, null, null, null, 15]], [inv_quantity_on_hand#3, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] (23) HashAggregate [codegen id : 7] -Input [6]: [inv_quantity_on_hand#3, i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20] -Keys [5]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20] +Input [6]: [inv_quantity_on_hand#3, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] Functions [1]: [partial_avg(inv_quantity_on_hand#3)] -Aggregate Attributes [2]: [sum#21, count#22] -Results [7]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20, sum#23, count#24] +Aggregate Attributes [2]: [sum#18, count#19] +Results [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] (24) Exchange -Input [7]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20, sum#23, count#24] -Arguments: hashpartitioning(i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20, 5), ENSURE_REQUIREMENTS, [id=#25] +Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] +Arguments: hashpartitioning(i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, 5), ENSURE_REQUIREMENTS, [plan_id=4] (25) HashAggregate [codegen id : 8] -Input [7]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20, sum#23, count#24] -Keys [5]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, spark_grouping_id#20] +Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] +Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] Functions [1]: [avg(inv_quantity_on_hand#3)] -Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#26] -Results [5]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, avg(inv_quantity_on_hand#3)#26 AS qoh#27] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#22] +Results [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, avg(inv_quantity_on_hand#3)#22 AS qoh#23] (26) TakeOrderedAndProject -Input [5]: [i_product_name#16, i_brand#17, i_class#18, i_category#19, qoh#27] -Arguments: 100, [qoh#27 ASC NULLS FIRST, i_product_name#16 ASC NULLS FIRST, i_brand#17 ASC NULLS FIRST, i_class#18 ASC NULLS FIRST, i_category#19 ASC NULLS FIRST], [i_product_name#16, i_brand#17, i_class#18, i_category#19, qoh#27] +Input [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#23] +Arguments: 100, [qoh#23 ASC NULLS FIRST, i_product_name#13 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, i_class#15 ASC NULLS FIRST, i_category#16 ASC NULLS FIRST], [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#23] ===== Subqueries ===== @@ -157,25 +157,25 @@ BroadcastExchange (31) (27) Scan parquet default.date_dim -Output [2]: [d_date_sk#8, d_month_seq#28] +Output [2]: [d_date_sk#7, d_month_seq#24] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#8, d_month_seq#28] +Input [2]: [d_date_sk#7, d_month_seq#24] (29) Filter [codegen id : 1] -Input [2]: [d_date_sk#8, d_month_seq#28] -Condition : (((isnotnull(d_month_seq#28) AND (d_month_seq#28 >= 1200)) AND (d_month_seq#28 <= 1211)) AND isnotnull(d_date_sk#8)) +Input [2]: [d_date_sk#7, d_month_seq#24] +Condition : (((isnotnull(d_month_seq#24) AND (d_month_seq#24 >= 1200)) AND (d_month_seq#24 <= 1211)) AND isnotnull(d_date_sk#7)) (30) Project [codegen id : 1] -Output [1]: [d_date_sk#8] -Input [2]: [d_date_sk#8, d_month_seq#28] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_month_seq#24] (31) BroadcastExchange -Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt index c1b0b42178114..9700943b1264d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt @@ -67,7 +67,7 @@ Condition : isnotnull(i_item_sk#7) (10) BroadcastExchange Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_item_sk#1] @@ -79,57 +79,57 @@ Output [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i Input [8]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] (13) Scan parquet default.warehouse -Output [1]: [w_warehouse_sk#13] +Output [1]: [w_warehouse_sk#12] Batched: true Location [not included in comparison]/{warehouse_dir}/warehouse] PushedFilters: [IsNotNull(w_warehouse_sk)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [1]: [w_warehouse_sk#13] +Input [1]: [w_warehouse_sk#12] (15) Filter [codegen id : 3] -Input [1]: [w_warehouse_sk#13] -Condition : isnotnull(w_warehouse_sk#13) +Input [1]: [w_warehouse_sk#12] +Condition : isnotnull(w_warehouse_sk#12) (16) BroadcastExchange -Input [1]: [w_warehouse_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Input [1]: [w_warehouse_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_warehouse_sk#2] -Right keys [1]: [w_warehouse_sk#13] +Right keys [1]: [w_warehouse_sk#12] Join condition: None (18) Project [codegen id : 4] Output [5]: [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10] -Input [7]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11, w_warehouse_sk#13] +Input [7]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11, w_warehouse_sk#12] (19) Expand [codegen id : 4] Input [5]: [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10] -Arguments: [[inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10, 0], [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, null, 1], [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, null, null, 3], [inv_quantity_on_hand#3, i_product_name#11, null, null, null, 7], [inv_quantity_on_hand#3, null, null, null, null, 15]], [inv_quantity_on_hand#3, i_product_name#15, i_brand#16, i_class#17, i_category#18, spark_grouping_id#19] +Arguments: [[inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, i_category#10, 0], [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, i_class#9, null, 1], [inv_quantity_on_hand#3, i_product_name#11, i_brand#8, null, null, 3], [inv_quantity_on_hand#3, i_product_name#11, null, null, null, 7], [inv_quantity_on_hand#3, null, null, null, null, 15]], [inv_quantity_on_hand#3, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] (20) HashAggregate [codegen id : 4] -Input [6]: [inv_quantity_on_hand#3, i_product_name#15, i_brand#16, i_class#17, i_category#18, spark_grouping_id#19] -Keys [5]: [i_product_name#15, i_brand#16, i_class#17, i_category#18, spark_grouping_id#19] +Input [6]: [inv_quantity_on_hand#3, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] Functions [1]: [partial_avg(inv_quantity_on_hand#3)] -Aggregate Attributes [2]: [sum#20, count#21] -Results [7]: [i_product_name#15, i_brand#16, i_class#17, i_category#18, spark_grouping_id#19, sum#22, count#23] +Aggregate Attributes [2]: [sum#18, count#19] +Results [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] (21) Exchange -Input [7]: [i_product_name#15, i_brand#16, i_class#17, i_category#18, spark_grouping_id#19, sum#22, count#23] -Arguments: hashpartitioning(i_product_name#15, i_brand#16, i_class#17, i_category#18, spark_grouping_id#19, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] +Arguments: hashpartitioning(i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 5] -Input [7]: [i_product_name#15, i_brand#16, i_class#17, i_category#18, spark_grouping_id#19, sum#22, count#23] -Keys [5]: [i_product_name#15, i_brand#16, i_class#17, i_category#18, spark_grouping_id#19] +Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] +Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] Functions [1]: [avg(inv_quantity_on_hand#3)] -Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#25] -Results [5]: [i_product_name#15, i_brand#16, i_class#17, i_category#18, avg(inv_quantity_on_hand#3)#25 AS qoh#26] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#22] +Results [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, avg(inv_quantity_on_hand#3)#22 AS qoh#23] (23) TakeOrderedAndProject -Input [5]: [i_product_name#15, i_brand#16, i_class#17, i_category#18, qoh#26] -Arguments: 100, [qoh#26 ASC NULLS FIRST, i_product_name#15 ASC NULLS FIRST, i_brand#16 ASC NULLS FIRST, i_class#17 ASC NULLS FIRST, i_category#18 ASC NULLS FIRST], [i_product_name#15, i_brand#16, i_class#17, i_category#18, qoh#26] +Input [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#23] +Arguments: 100, [qoh#23 ASC NULLS FIRST, i_product_name#13 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, i_class#15 ASC NULLS FIRST, i_category#16 ASC NULLS FIRST], [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#23] ===== Subqueries ===== @@ -142,25 +142,25 @@ BroadcastExchange (28) (24) Scan parquet default.date_dim -Output [2]: [d_date_sk#6, d_month_seq#27] +Output [2]: [d_date_sk#6, d_month_seq#24] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#6, d_month_seq#27] +Input [2]: [d_date_sk#6, d_month_seq#24] (26) Filter [codegen id : 1] -Input [2]: [d_date_sk#6, d_month_seq#27] -Condition : (((isnotnull(d_month_seq#27) AND (d_month_seq#27 >= 1200)) AND (d_month_seq#27 <= 1211)) AND isnotnull(d_date_sk#6)) +Input [2]: [d_date_sk#6, d_month_seq#24] +Condition : (((isnotnull(d_month_seq#24) AND (d_month_seq#24 >= 1200)) AND (d_month_seq#24 <= 1211)) AND isnotnull(d_date_sk#6)) (27) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [2]: [d_date_sk#6, d_month_seq#27] +Input [2]: [d_date_sk#6, d_month_seq#24] (28) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt index 7f419ce3eaf6d..84fdda1e313c6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt @@ -103,107 +103,107 @@ Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, (3) Exchange Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] -Arguments: hashpartitioning(cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#7] +Arguments: hashpartitioning(cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=1] (4) Sort [codegen id : 2] Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] Arguments: [cs_item_sk#2 ASC NULLS FIRST], false, 0 (5) Scan parquet default.store_sales -Output [2]: [ss_item_sk#8, ss_sold_date_sk#9] +Output [2]: [ss_item_sk#7, ss_sold_date_sk#8] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#9), dynamicpruningexpression(ss_sold_date_sk#9 IN dynamicpruning#10)] +PartitionFilters: [isnotnull(ss_sold_date_sk#8), dynamicpruningexpression(ss_sold_date_sk#8 IN dynamicpruning#9)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (6) ColumnarToRow [codegen id : 4] -Input [2]: [ss_item_sk#8, ss_sold_date_sk#9] +Input [2]: [ss_item_sk#7, ss_sold_date_sk#8] (7) Filter [codegen id : 4] -Input [2]: [ss_item_sk#8, ss_sold_date_sk#9] -Condition : isnotnull(ss_item_sk#8) +Input [2]: [ss_item_sk#7, ss_sold_date_sk#8] +Condition : isnotnull(ss_item_sk#7) (8) ReusedExchange [Reuses operator id: 100] -Output [2]: [d_date_sk#11, d_date#12] +Output [2]: [d_date_sk#10, d_date#11] (9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#9] -Right keys [1]: [d_date_sk#11] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#10] Join condition: None (10) Project [codegen id : 4] -Output [2]: [ss_item_sk#8, d_date#12] -Input [4]: [ss_item_sk#8, ss_sold_date_sk#9, d_date_sk#11, d_date#12] +Output [2]: [ss_item_sk#7, d_date#11] +Input [4]: [ss_item_sk#7, ss_sold_date_sk#8, d_date_sk#10, d_date#11] (11) Exchange -Input [2]: [ss_item_sk#8, d_date#12] -Arguments: hashpartitioning(ss_item_sk#8, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [2]: [ss_item_sk#7, d_date#11] +Arguments: hashpartitioning(ss_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) Sort [codegen id : 5] -Input [2]: [ss_item_sk#8, d_date#12] -Arguments: [ss_item_sk#8 ASC NULLS FIRST], false, 0 +Input [2]: [ss_item_sk#7, d_date#11] +Arguments: [ss_item_sk#7 ASC NULLS FIRST], false, 0 (13) Scan parquet default.item -Output [2]: [i_item_sk#14, i_item_desc#15] +Output [2]: [i_item_sk#12, i_item_desc#13] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (14) ColumnarToRow [codegen id : 6] -Input [2]: [i_item_sk#14, i_item_desc#15] +Input [2]: [i_item_sk#12, i_item_desc#13] (15) Filter [codegen id : 6] -Input [2]: [i_item_sk#14, i_item_desc#15] -Condition : isnotnull(i_item_sk#14) +Input [2]: [i_item_sk#12, i_item_desc#13] +Condition : isnotnull(i_item_sk#12) (16) Exchange -Input [2]: [i_item_sk#14, i_item_desc#15] -Arguments: hashpartitioning(i_item_sk#14, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [2]: [i_item_sk#12, i_item_desc#13] +Arguments: hashpartitioning(i_item_sk#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] (17) Sort [codegen id : 7] -Input [2]: [i_item_sk#14, i_item_desc#15] -Arguments: [i_item_sk#14 ASC NULLS FIRST], false, 0 +Input [2]: [i_item_sk#12, i_item_desc#13] +Arguments: [i_item_sk#12 ASC NULLS FIRST], false, 0 (18) SortMergeJoin [codegen id : 8] -Left keys [1]: [ss_item_sk#8] -Right keys [1]: [i_item_sk#14] +Left keys [1]: [ss_item_sk#7] +Right keys [1]: [i_item_sk#12] Join condition: None (19) Project [codegen id : 8] -Output [3]: [d_date#12, i_item_sk#14, substr(i_item_desc#15, 1, 30) AS _groupingexpression#17] -Input [4]: [ss_item_sk#8, d_date#12, i_item_sk#14, i_item_desc#15] +Output [3]: [d_date#11, i_item_sk#12, substr(i_item_desc#13, 1, 30) AS _groupingexpression#14] +Input [4]: [ss_item_sk#7, d_date#11, i_item_sk#12, i_item_desc#13] (20) HashAggregate [codegen id : 8] -Input [3]: [d_date#12, i_item_sk#14, _groupingexpression#17] -Keys [3]: [_groupingexpression#17, i_item_sk#14, d_date#12] +Input [3]: [d_date#11, i_item_sk#12, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, i_item_sk#12, d_date#11] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#18] -Results [4]: [_groupingexpression#17, i_item_sk#14, d_date#12, count#19] +Aggregate Attributes [1]: [count#15] +Results [4]: [_groupingexpression#14, i_item_sk#12, d_date#11, count#16] (21) HashAggregate [codegen id : 8] -Input [4]: [_groupingexpression#17, i_item_sk#14, d_date#12, count#19] -Keys [3]: [_groupingexpression#17, i_item_sk#14, d_date#12] +Input [4]: [_groupingexpression#14, i_item_sk#12, d_date#11, count#16] +Keys [3]: [_groupingexpression#14, i_item_sk#12, d_date#11] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#20] -Results [2]: [i_item_sk#14 AS item_sk#21, count(1)#20 AS cnt#22] +Aggregate Attributes [1]: [count(1)#17] +Results [2]: [i_item_sk#12 AS item_sk#18, count(1)#17 AS cnt#19] (22) Filter [codegen id : 8] -Input [2]: [item_sk#21, cnt#22] -Condition : (cnt#22 > 4) +Input [2]: [item_sk#18, cnt#19] +Condition : (cnt#19 > 4) (23) Project [codegen id : 8] -Output [1]: [item_sk#21] -Input [2]: [item_sk#21, cnt#22] +Output [1]: [item_sk#18] +Input [2]: [item_sk#18, cnt#19] (24) Sort [codegen id : 8] -Input [1]: [item_sk#21] -Arguments: [item_sk#21 ASC NULLS FIRST], false, 0 +Input [1]: [item_sk#18] +Arguments: [item_sk#18 ASC NULLS FIRST], false, 0 (25) SortMergeJoin [codegen id : 9] Left keys [1]: [cs_item_sk#2] -Right keys [1]: [item_sk#21] +Right keys [1]: [item_sk#18] Join condition: None (26) Project [codegen id : 9] @@ -212,98 +212,98 @@ Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, (27) Exchange Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] -Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#23] +Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4] (28) Sort [codegen id : 10] Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0 (29) Scan parquet default.store_sales -Output [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, ss_sold_date_sk#27] +Output [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 11] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, ss_sold_date_sk#27] +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23] (31) Filter [codegen id : 11] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, ss_sold_date_sk#27] -Condition : isnotnull(ss_customer_sk#24) +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23] +Condition : isnotnull(ss_customer_sk#20) (32) Project [codegen id : 11] -Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, ss_sold_date_sk#27] +Output [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22] +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23] (33) Exchange -Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] -Arguments: hashpartitioning(ss_customer_sk#24, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22] +Arguments: hashpartitioning(ss_customer_sk#20, 5), ENSURE_REQUIREMENTS, [plan_id=5] (34) Sort [codegen id : 12] -Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] -Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0 +Input [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22] +Arguments: [ss_customer_sk#20 ASC NULLS FIRST], false, 0 (35) Scan parquet default.customer -Output [1]: [c_customer_sk#29] +Output [1]: [c_customer_sk#24] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (36) ColumnarToRow [codegen id : 13] -Input [1]: [c_customer_sk#29] +Input [1]: [c_customer_sk#24] (37) Filter [codegen id : 13] -Input [1]: [c_customer_sk#29] -Condition : isnotnull(c_customer_sk#29) +Input [1]: [c_customer_sk#24] +Condition : isnotnull(c_customer_sk#24) (38) Exchange -Input [1]: [c_customer_sk#29] -Arguments: hashpartitioning(c_customer_sk#29, 5), ENSURE_REQUIREMENTS, [id=#30] +Input [1]: [c_customer_sk#24] +Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [plan_id=6] (39) Sort [codegen id : 14] -Input [1]: [c_customer_sk#29] -Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (40) SortMergeJoin [codegen id : 15] -Left keys [1]: [ss_customer_sk#24] -Right keys [1]: [c_customer_sk#29] +Left keys [1]: [ss_customer_sk#20] +Right keys [1]: [c_customer_sk#24] Join condition: None (41) Project [codegen id : 15] -Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#29] +Output [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24] +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, c_customer_sk#24] (42) HashAggregate [codegen id : 15] -Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29] -Keys [1]: [c_customer_sk#29] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#31, isEmpty#32] -Results [3]: [c_customer_sk#29, sum#33, isEmpty#34] +Input [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24] +Keys [1]: [c_customer_sk#24] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#25, isEmpty#26] +Results [3]: [c_customer_sk#24, sum#27, isEmpty#28] (43) HashAggregate [codegen id : 15] -Input [3]: [c_customer_sk#29, sum#33, isEmpty#34] -Keys [1]: [c_customer_sk#29] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35] -Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36] +Input [3]: [c_customer_sk#24, sum#27, isEmpty#28] +Keys [1]: [c_customer_sk#24] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29] +Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30] (44) Filter [codegen id : 15] -Input [2]: [c_customer_sk#29, ssales#36] -Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8)))) +Input [2]: [c_customer_sk#24, ssales#30] +Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8)))) (45) Project [codegen id : 15] -Output [1]: [c_customer_sk#29] -Input [2]: [c_customer_sk#29, ssales#36] +Output [1]: [c_customer_sk#24] +Input [2]: [c_customer_sk#24, ssales#30] (46) Sort [codegen id : 15] -Input [1]: [c_customer_sk#29] -Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (47) SortMergeJoin [codegen id : 17] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#29] +Right keys [1]: [c_customer_sk#24] Join condition: None (48) Project [codegen id : 17] @@ -311,190 +311,190 @@ Output [3]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] (49) ReusedExchange [Reuses operator id: 95] -Output [1]: [d_date_sk#39] +Output [1]: [d_date_sk#33] (50) BroadcastHashJoin [codegen id : 17] Left keys [1]: [cs_sold_date_sk#5] -Right keys [1]: [d_date_sk#39] +Right keys [1]: [d_date_sk#33] Join condition: None (51) Project [codegen id : 17] -Output [1]: [CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)) AS sales#40] -Input [4]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#39] +Output [1]: [CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)) AS sales#34] +Input [4]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#33] (52) Scan parquet default.web_sales -Output [5]: [ws_item_sk#41, ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] +Output [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#45), dynamicpruningexpression(ws_sold_date_sk#45 IN dynamicpruning#6)] +PartitionFilters: [isnotnull(ws_sold_date_sk#39), dynamicpruningexpression(ws_sold_date_sk#39 IN dynamicpruning#6)] ReadSchema: struct (53) ColumnarToRow [codegen id : 18] -Input [5]: [ws_item_sk#41, ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] +Input [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] (54) Exchange -Input [5]: [ws_item_sk#41, ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] -Arguments: hashpartitioning(ws_item_sk#41, 5), ENSURE_REQUIREMENTS, [id=#46] +Input [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Arguments: hashpartitioning(ws_item_sk#35, 5), ENSURE_REQUIREMENTS, [plan_id=7] (55) Sort [codegen id : 19] -Input [5]: [ws_item_sk#41, ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] -Arguments: [ws_item_sk#41 ASC NULLS FIRST], false, 0 +Input [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Arguments: [ws_item_sk#35 ASC NULLS FIRST], false, 0 (56) ReusedExchange [Reuses operator id: 11] -Output [2]: [ss_item_sk#8, d_date#12] +Output [2]: [ss_item_sk#7, d_date#11] (57) Sort [codegen id : 22] -Input [2]: [ss_item_sk#8, d_date#12] -Arguments: [ss_item_sk#8 ASC NULLS FIRST], false, 0 +Input [2]: [ss_item_sk#7, d_date#11] +Arguments: [ss_item_sk#7 ASC NULLS FIRST], false, 0 (58) ReusedExchange [Reuses operator id: 16] -Output [2]: [i_item_sk#14, i_item_desc#15] +Output [2]: [i_item_sk#12, i_item_desc#13] (59) Sort [codegen id : 24] -Input [2]: [i_item_sk#14, i_item_desc#15] -Arguments: [i_item_sk#14 ASC NULLS FIRST], false, 0 +Input [2]: [i_item_sk#12, i_item_desc#13] +Arguments: [i_item_sk#12 ASC NULLS FIRST], false, 0 (60) SortMergeJoin [codegen id : 25] -Left keys [1]: [ss_item_sk#8] -Right keys [1]: [i_item_sk#14] +Left keys [1]: [ss_item_sk#7] +Right keys [1]: [i_item_sk#12] Join condition: None (61) Project [codegen id : 25] -Output [3]: [d_date#12, i_item_sk#14, substr(i_item_desc#15, 1, 30) AS _groupingexpression#17] -Input [4]: [ss_item_sk#8, d_date#12, i_item_sk#14, i_item_desc#15] +Output [3]: [d_date#11, i_item_sk#12, substr(i_item_desc#13, 1, 30) AS _groupingexpression#14] +Input [4]: [ss_item_sk#7, d_date#11, i_item_sk#12, i_item_desc#13] (62) HashAggregate [codegen id : 25] -Input [3]: [d_date#12, i_item_sk#14, _groupingexpression#17] -Keys [3]: [_groupingexpression#17, i_item_sk#14, d_date#12] +Input [3]: [d_date#11, i_item_sk#12, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, i_item_sk#12, d_date#11] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#18] -Results [4]: [_groupingexpression#17, i_item_sk#14, d_date#12, count#19] +Aggregate Attributes [1]: [count#15] +Results [4]: [_groupingexpression#14, i_item_sk#12, d_date#11, count#16] (63) HashAggregate [codegen id : 25] -Input [4]: [_groupingexpression#17, i_item_sk#14, d_date#12, count#19] -Keys [3]: [_groupingexpression#17, i_item_sk#14, d_date#12] +Input [4]: [_groupingexpression#14, i_item_sk#12, d_date#11, count#16] +Keys [3]: [_groupingexpression#14, i_item_sk#12, d_date#11] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#20] -Results [2]: [i_item_sk#14 AS item_sk#21, count(1)#20 AS cnt#22] +Aggregate Attributes [1]: [count(1)#17] +Results [2]: [i_item_sk#12 AS item_sk#18, count(1)#17 AS cnt#19] (64) Filter [codegen id : 25] -Input [2]: [item_sk#21, cnt#22] -Condition : (cnt#22 > 4) +Input [2]: [item_sk#18, cnt#19] +Condition : (cnt#19 > 4) (65) Project [codegen id : 25] -Output [1]: [item_sk#21] -Input [2]: [item_sk#21, cnt#22] +Output [1]: [item_sk#18] +Input [2]: [item_sk#18, cnt#19] (66) Sort [codegen id : 25] -Input [1]: [item_sk#21] -Arguments: [item_sk#21 ASC NULLS FIRST], false, 0 +Input [1]: [item_sk#18] +Arguments: [item_sk#18 ASC NULLS FIRST], false, 0 (67) SortMergeJoin [codegen id : 26] -Left keys [1]: [ws_item_sk#41] -Right keys [1]: [item_sk#21] +Left keys [1]: [ws_item_sk#35] +Right keys [1]: [item_sk#18] Join condition: None (68) Project [codegen id : 26] -Output [4]: [ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] -Input [5]: [ws_item_sk#41, ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] +Output [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Input [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] (69) Exchange -Input [4]: [ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] -Arguments: hashpartitioning(ws_bill_customer_sk#42, 5), ENSURE_REQUIREMENTS, [id=#47] +Input [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Arguments: hashpartitioning(ws_bill_customer_sk#36, 5), ENSURE_REQUIREMENTS, [plan_id=8] (70) Sort [codegen id : 27] -Input [4]: [ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] -Arguments: [ws_bill_customer_sk#42 ASC NULLS FIRST], false, 0 +Input [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Arguments: [ws_bill_customer_sk#36 ASC NULLS FIRST], false, 0 (71) ReusedExchange [Reuses operator id: 33] -Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] +Output [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22] (72) Sort [codegen id : 29] -Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] -Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0 +Input [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22] +Arguments: [ss_customer_sk#20 ASC NULLS FIRST], false, 0 (73) ReusedExchange [Reuses operator id: 38] -Output [1]: [c_customer_sk#29] +Output [1]: [c_customer_sk#24] (74) Sort [codegen id : 31] -Input [1]: [c_customer_sk#29] -Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (75) SortMergeJoin [codegen id : 32] -Left keys [1]: [ss_customer_sk#24] -Right keys [1]: [c_customer_sk#29] +Left keys [1]: [ss_customer_sk#20] +Right keys [1]: [c_customer_sk#24] Join condition: None (76) Project [codegen id : 32] -Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#29] +Output [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24] +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, c_customer_sk#24] (77) HashAggregate [codegen id : 32] -Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29] -Keys [1]: [c_customer_sk#29] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#48, isEmpty#49] -Results [3]: [c_customer_sk#29, sum#50, isEmpty#51] +Input [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24] +Keys [1]: [c_customer_sk#24] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#40, isEmpty#41] +Results [3]: [c_customer_sk#24, sum#42, isEmpty#43] (78) HashAggregate [codegen id : 32] -Input [3]: [c_customer_sk#29, sum#50, isEmpty#51] -Keys [1]: [c_customer_sk#29] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35] -Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36] +Input [3]: [c_customer_sk#24, sum#42, isEmpty#43] +Keys [1]: [c_customer_sk#24] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29] +Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30] (79) Filter [codegen id : 32] -Input [2]: [c_customer_sk#29, ssales#36] -Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8)))) +Input [2]: [c_customer_sk#24, ssales#30] +Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8)))) (80) Project [codegen id : 32] -Output [1]: [c_customer_sk#29] -Input [2]: [c_customer_sk#29, ssales#36] +Output [1]: [c_customer_sk#24] +Input [2]: [c_customer_sk#24, ssales#30] (81) Sort [codegen id : 32] -Input [1]: [c_customer_sk#29] -Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (82) SortMergeJoin [codegen id : 34] -Left keys [1]: [ws_bill_customer_sk#42] -Right keys [1]: [c_customer_sk#29] +Left keys [1]: [ws_bill_customer_sk#36] +Right keys [1]: [c_customer_sk#24] Join condition: None (83) Project [codegen id : 34] -Output [3]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] -Input [4]: [ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] +Output [3]: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Input [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] (84) ReusedExchange [Reuses operator id: 95] -Output [1]: [d_date_sk#52] +Output [1]: [d_date_sk#44] (85) BroadcastHashJoin [codegen id : 34] -Left keys [1]: [ws_sold_date_sk#45] -Right keys [1]: [d_date_sk#52] +Left keys [1]: [ws_sold_date_sk#39] +Right keys [1]: [d_date_sk#44] Join condition: None (86) Project [codegen id : 34] -Output [1]: [CheckOverflow((promote_precision(cast(ws_quantity#43 as decimal(12,2))) * promote_precision(cast(ws_list_price#44 as decimal(12,2)))), DecimalType(18,2)) AS sales#53] -Input [4]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45, d_date_sk#52] +Output [1]: [CheckOverflow((promote_precision(cast(ws_quantity#37 as decimal(12,2))) * promote_precision(cast(ws_list_price#38 as decimal(12,2)))), DecimalType(18,2)) AS sales#45] +Input [4]: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, d_date_sk#44] (87) Union (88) HashAggregate [codegen id : 35] -Input [1]: [sales#40] +Input [1]: [sales#34] Keys: [] -Functions [1]: [partial_sum(sales#40)] -Aggregate Attributes [2]: [sum#54, isEmpty#55] -Results [2]: [sum#56, isEmpty#57] +Functions [1]: [partial_sum(sales#34)] +Aggregate Attributes [2]: [sum#46, isEmpty#47] +Results [2]: [sum#48, isEmpty#49] (89) Exchange -Input [2]: [sum#56, isEmpty#57] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#58] +Input [2]: [sum#48, isEmpty#49] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9] (90) HashAggregate [codegen id : 36] -Input [2]: [sum#56, isEmpty#57] +Input [2]: [sum#48, isEmpty#49] Keys: [] -Functions [1]: [sum(sales#40)] -Aggregate Attributes [1]: [sum(sales#40)#59] -Results [1]: [sum(sales#40)#59 AS sum(sales)#60] +Functions [1]: [sum(sales#34)] +Aggregate Attributes [1]: [sum(sales#34)#50] +Results [1]: [sum(sales#34)#50 AS sum(sales)#51] ===== Subqueries ===== @@ -507,28 +507,28 @@ BroadcastExchange (95) (91) Scan parquet default.date_dim -Output [3]: [d_date_sk#39, d_year#61, d_moy#62] +Output [3]: [d_date_sk#33, d_year#52, d_moy#53] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] ReadSchema: struct (92) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#39, d_year#61, d_moy#62] +Input [3]: [d_date_sk#33, d_year#52, d_moy#53] (93) Filter [codegen id : 1] -Input [3]: [d_date_sk#39, d_year#61, d_moy#62] -Condition : ((((isnotnull(d_year#61) AND isnotnull(d_moy#62)) AND (d_year#61 = 2000)) AND (d_moy#62 = 2)) AND isnotnull(d_date_sk#39)) +Input [3]: [d_date_sk#33, d_year#52, d_moy#53] +Condition : ((((isnotnull(d_year#52) AND isnotnull(d_moy#53)) AND (d_year#52 = 2000)) AND (d_moy#53 = 2)) AND isnotnull(d_date_sk#33)) (94) Project [codegen id : 1] -Output [1]: [d_date_sk#39] -Input [3]: [d_date_sk#39, d_year#61, d_moy#62] +Output [1]: [d_date_sk#33] +Input [3]: [d_date_sk#33, d_year#52, d_moy#53] (95) BroadcastExchange -Input [1]: [d_date_sk#39] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#63] +Input [1]: [d_date_sk#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] -Subquery:2 Hosting operator id = 5 Hosting Expression = ss_sold_date_sk#9 IN dynamicpruning#10 +Subquery:2 Hosting operator id = 5 Hosting Expression = ss_sold_date_sk#8 IN dynamicpruning#9 BroadcastExchange (100) +- * Project (99) +- * Filter (98) @@ -537,28 +537,28 @@ BroadcastExchange (100) (96) Scan parquet default.date_dim -Output [3]: [d_date_sk#11, d_date#12, d_year#64] +Output [3]: [d_date_sk#10, d_date#11, d_year#54] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct (97) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#11, d_date#12, d_year#64] +Input [3]: [d_date_sk#10, d_date#11, d_year#54] (98) Filter [codegen id : 1] -Input [3]: [d_date_sk#11, d_date#12, d_year#64] -Condition : (d_year#64 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#11)) +Input [3]: [d_date_sk#10, d_date#11, d_year#54] +Condition : (d_year#54 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10)) (99) Project [codegen id : 1] -Output [2]: [d_date_sk#11, d_date#12] -Input [3]: [d_date_sk#11, d_date#12, d_year#64] +Output [2]: [d_date_sk#10, d_date#11] +Input [3]: [d_date_sk#10, d_date#11, d_year#54] (100) BroadcastExchange -Input [2]: [d_date_sk#11, d_date#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#65] +Input [2]: [d_date_sk#10, d_date#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11] -Subquery:3 Hosting operator id = 44 Hosting Expression = Subquery scalar-subquery#37, [id=#38] +Subquery:3 Hosting operator id = 44 Hosting Expression = Subquery scalar-subquery#31, [id=#32] * HashAggregate (117) +- Exchange (116) +- * HashAggregate (115) @@ -579,89 +579,89 @@ Subquery:3 Hosting operator id = 44 Hosting Expression = Subquery scalar-subquer (101) Scan parquet default.store_sales -Output [4]: [ss_customer_sk#66, ss_quantity#67, ss_sales_price#68, ss_sold_date_sk#69] +Output [4]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#69), dynamicpruningexpression(ss_sold_date_sk#69 IN dynamicpruning#70)] +PartitionFilters: [isnotnull(ss_sold_date_sk#58), dynamicpruningexpression(ss_sold_date_sk#58 IN dynamicpruning#59)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (102) ColumnarToRow [codegen id : 2] -Input [4]: [ss_customer_sk#66, ss_quantity#67, ss_sales_price#68, ss_sold_date_sk#69] +Input [4]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58] (103) Filter [codegen id : 2] -Input [4]: [ss_customer_sk#66, ss_quantity#67, ss_sales_price#68, ss_sold_date_sk#69] -Condition : isnotnull(ss_customer_sk#66) +Input [4]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58] +Condition : isnotnull(ss_customer_sk#55) (104) ReusedExchange [Reuses operator id: 122] -Output [1]: [d_date_sk#71] +Output [1]: [d_date_sk#60] (105) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#69] -Right keys [1]: [d_date_sk#71] +Left keys [1]: [ss_sold_date_sk#58] +Right keys [1]: [d_date_sk#60] Join condition: None (106) Project [codegen id : 2] -Output [3]: [ss_customer_sk#66, ss_quantity#67, ss_sales_price#68] -Input [5]: [ss_customer_sk#66, ss_quantity#67, ss_sales_price#68, ss_sold_date_sk#69, d_date_sk#71] +Output [3]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57] +Input [5]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58, d_date_sk#60] (107) Exchange -Input [3]: [ss_customer_sk#66, ss_quantity#67, ss_sales_price#68] -Arguments: hashpartitioning(ss_customer_sk#66, 5), ENSURE_REQUIREMENTS, [id=#72] +Input [3]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57] +Arguments: hashpartitioning(ss_customer_sk#55, 5), ENSURE_REQUIREMENTS, [plan_id=12] (108) Sort [codegen id : 3] -Input [3]: [ss_customer_sk#66, ss_quantity#67, ss_sales_price#68] -Arguments: [ss_customer_sk#66 ASC NULLS FIRST], false, 0 +Input [3]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57] +Arguments: [ss_customer_sk#55 ASC NULLS FIRST], false, 0 (109) ReusedExchange [Reuses operator id: 38] -Output [1]: [c_customer_sk#73] +Output [1]: [c_customer_sk#61] (110) Sort [codegen id : 5] -Input [1]: [c_customer_sk#73] -Arguments: [c_customer_sk#73 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#61] +Arguments: [c_customer_sk#61 ASC NULLS FIRST], false, 0 (111) SortMergeJoin [codegen id : 6] -Left keys [1]: [ss_customer_sk#66] -Right keys [1]: [c_customer_sk#73] +Left keys [1]: [ss_customer_sk#55] +Right keys [1]: [c_customer_sk#61] Join condition: None (112) Project [codegen id : 6] -Output [3]: [ss_quantity#67, ss_sales_price#68, c_customer_sk#73] -Input [4]: [ss_customer_sk#66, ss_quantity#67, ss_sales_price#68, c_customer_sk#73] +Output [3]: [ss_quantity#56, ss_sales_price#57, c_customer_sk#61] +Input [4]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, c_customer_sk#61] (113) HashAggregate [codegen id : 6] -Input [3]: [ss_quantity#67, ss_sales_price#68, c_customer_sk#73] -Keys [1]: [c_customer_sk#73] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#67 as decimal(12,2))) * promote_precision(cast(ss_sales_price#68 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#74, isEmpty#75] -Results [3]: [c_customer_sk#73, sum#76, isEmpty#77] +Input [3]: [ss_quantity#56, ss_sales_price#57, c_customer_sk#61] +Keys [1]: [c_customer_sk#61] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#56 as decimal(12,2))) * promote_precision(cast(ss_sales_price#57 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#62, isEmpty#63] +Results [3]: [c_customer_sk#61, sum#64, isEmpty#65] (114) HashAggregate [codegen id : 6] -Input [3]: [c_customer_sk#73, sum#76, isEmpty#77] -Keys [1]: [c_customer_sk#73] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#67 as decimal(12,2))) * promote_precision(cast(ss_sales_price#68 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#67 as decimal(12,2))) * promote_precision(cast(ss_sales_price#68 as decimal(12,2)))), DecimalType(18,2)))#78] -Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#67 as decimal(12,2))) * promote_precision(cast(ss_sales_price#68 as decimal(12,2)))), DecimalType(18,2)))#78 AS csales#79] +Input [3]: [c_customer_sk#61, sum#64, isEmpty#65] +Keys [1]: [c_customer_sk#61] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#56 as decimal(12,2))) * promote_precision(cast(ss_sales_price#57 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#56 as decimal(12,2))) * promote_precision(cast(ss_sales_price#57 as decimal(12,2)))), DecimalType(18,2)))#66] +Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#56 as decimal(12,2))) * promote_precision(cast(ss_sales_price#57 as decimal(12,2)))), DecimalType(18,2)))#66 AS csales#67] (115) HashAggregate [codegen id : 6] -Input [1]: [csales#79] +Input [1]: [csales#67] Keys: [] -Functions [1]: [partial_max(csales#79)] -Aggregate Attributes [1]: [max#80] -Results [1]: [max#81] +Functions [1]: [partial_max(csales#67)] +Aggregate Attributes [1]: [max#68] +Results [1]: [max#69] (116) Exchange -Input [1]: [max#81] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#82] +Input [1]: [max#69] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=13] (117) HashAggregate [codegen id : 7] -Input [1]: [max#81] +Input [1]: [max#69] Keys: [] -Functions [1]: [max(csales#79)] -Aggregate Attributes [1]: [max(csales#79)#83] -Results [1]: [max(csales#79)#83 AS tpcds_cmax#84] +Functions [1]: [max(csales#67)] +Aggregate Attributes [1]: [max(csales#67)#70] +Results [1]: [max(csales#67)#70 AS tpcds_cmax#71] -Subquery:4 Hosting operator id = 101 Hosting Expression = ss_sold_date_sk#69 IN dynamicpruning#70 +Subquery:4 Hosting operator id = 101 Hosting Expression = ss_sold_date_sk#58 IN dynamicpruning#59 BroadcastExchange (122) +- * Project (121) +- * Filter (120) @@ -670,29 +670,29 @@ BroadcastExchange (122) (118) Scan parquet default.date_dim -Output [2]: [d_date_sk#71, d_year#85] +Output [2]: [d_date_sk#60, d_year#72] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct (119) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#71, d_year#85] +Input [2]: [d_date_sk#60, d_year#72] (120) Filter [codegen id : 1] -Input [2]: [d_date_sk#71, d_year#85] -Condition : (d_year#85 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#71)) +Input [2]: [d_date_sk#60, d_year#72] +Condition : (d_year#72 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#60)) (121) Project [codegen id : 1] -Output [1]: [d_date_sk#71] -Input [2]: [d_date_sk#71, d_year#85] +Output [1]: [d_date_sk#60] +Input [2]: [d_date_sk#60, d_year#72] (122) BroadcastExchange -Input [1]: [d_date_sk#71] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#86] +Input [1]: [d_date_sk#60] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14] -Subquery:5 Hosting operator id = 52 Hosting Expression = ws_sold_date_sk#45 IN dynamicpruning#6 +Subquery:5 Hosting operator id = 52 Hosting Expression = ws_sold_date_sk#39 IN dynamicpruning#6 -Subquery:6 Hosting operator id = 79 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38] +Subquery:6 Hosting operator id = 79 Hosting Expression = ReusedSubquery Subquery scalar-subquery#31, [id=#32] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt index 58d6c22f3fd05..8e372bc426361 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt @@ -120,7 +120,7 @@ Condition : isnotnull(i_item_sk#12) (12) BroadcastExchange Input [2]: [i_item_sk#12, i_item_desc#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (13) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#7] @@ -128,42 +128,42 @@ Right keys [1]: [i_item_sk#12] Join condition: None (14) Project [codegen id : 3] -Output [3]: [d_date#11, i_item_sk#12, substr(i_item_desc#13, 1, 30) AS _groupingexpression#15] +Output [3]: [d_date#11, i_item_sk#12, substr(i_item_desc#13, 1, 30) AS _groupingexpression#14] Input [4]: [ss_item_sk#7, d_date#11, i_item_sk#12, i_item_desc#13] (15) HashAggregate [codegen id : 3] -Input [3]: [d_date#11, i_item_sk#12, _groupingexpression#15] -Keys [3]: [_groupingexpression#15, i_item_sk#12, d_date#11] +Input [3]: [d_date#11, i_item_sk#12, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, i_item_sk#12, d_date#11] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#16] -Results [4]: [_groupingexpression#15, i_item_sk#12, d_date#11, count#17] +Aggregate Attributes [1]: [count#15] +Results [4]: [_groupingexpression#14, i_item_sk#12, d_date#11, count#16] (16) Exchange -Input [4]: [_groupingexpression#15, i_item_sk#12, d_date#11, count#17] -Arguments: hashpartitioning(_groupingexpression#15, i_item_sk#12, d_date#11, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [4]: [_groupingexpression#14, i_item_sk#12, d_date#11, count#16] +Arguments: hashpartitioning(_groupingexpression#14, i_item_sk#12, d_date#11, 5), ENSURE_REQUIREMENTS, [plan_id=2] (17) HashAggregate [codegen id : 4] -Input [4]: [_groupingexpression#15, i_item_sk#12, d_date#11, count#17] -Keys [3]: [_groupingexpression#15, i_item_sk#12, d_date#11] +Input [4]: [_groupingexpression#14, i_item_sk#12, d_date#11, count#16] +Keys [3]: [_groupingexpression#14, i_item_sk#12, d_date#11] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#19] -Results [2]: [i_item_sk#12 AS item_sk#20, count(1)#19 AS cnt#21] +Aggregate Attributes [1]: [count(1)#17] +Results [2]: [i_item_sk#12 AS item_sk#18, count(1)#17 AS cnt#19] (18) Filter [codegen id : 4] -Input [2]: [item_sk#20, cnt#21] -Condition : (cnt#21 > 4) +Input [2]: [item_sk#18, cnt#19] +Condition : (cnt#19 > 4) (19) Project [codegen id : 4] -Output [1]: [item_sk#20] -Input [2]: [item_sk#20, cnt#21] +Output [1]: [item_sk#18] +Input [2]: [item_sk#18, cnt#19] (20) BroadcastExchange -Input [1]: [item_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Input [1]: [item_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (21) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_item_sk#2] -Right keys [1]: [item_sk#20] +Right keys [1]: [item_sk#18] Join condition: None (22) Project [codegen id : 5] @@ -172,90 +172,90 @@ Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, (23) Exchange Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] -Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#23] +Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) Sort [codegen id : 6] Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0 (25) Scan parquet default.store_sales -Output [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, ss_sold_date_sk#27] +Output [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 8] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, ss_sold_date_sk#27] +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23] (27) Filter [codegen id : 8] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, ss_sold_date_sk#27] -Condition : isnotnull(ss_customer_sk#24) +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23] +Condition : isnotnull(ss_customer_sk#20) (28) Project [codegen id : 8] -Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, ss_sold_date_sk#27] +Output [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22] +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23] (29) Scan parquet default.customer -Output [1]: [c_customer_sk#28] +Output [1]: [c_customer_sk#24] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 7] -Input [1]: [c_customer_sk#28] +Input [1]: [c_customer_sk#24] (31) Filter [codegen id : 7] -Input [1]: [c_customer_sk#28] -Condition : isnotnull(c_customer_sk#28) +Input [1]: [c_customer_sk#24] +Condition : isnotnull(c_customer_sk#24) (32) BroadcastExchange -Input [1]: [c_customer_sk#28] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#29] +Input [1]: [c_customer_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (33) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_customer_sk#24] -Right keys [1]: [c_customer_sk#28] +Left keys [1]: [ss_customer_sk#20] +Right keys [1]: [c_customer_sk#24] Join condition: None (34) Project [codegen id : 8] -Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28] +Output [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24] +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, c_customer_sk#24] (35) HashAggregate [codegen id : 8] -Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28] -Keys [1]: [c_customer_sk#28] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#30, isEmpty#31] -Results [3]: [c_customer_sk#28, sum#32, isEmpty#33] +Input [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24] +Keys [1]: [c_customer_sk#24] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#25, isEmpty#26] +Results [3]: [c_customer_sk#24, sum#27, isEmpty#28] (36) Exchange -Input [3]: [c_customer_sk#28, sum#32, isEmpty#33] -Arguments: hashpartitioning(c_customer_sk#28, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [3]: [c_customer_sk#24, sum#27, isEmpty#28] +Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [plan_id=6] (37) HashAggregate [codegen id : 9] -Input [3]: [c_customer_sk#28, sum#32, isEmpty#33] -Keys [1]: [c_customer_sk#28] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35] -Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36] +Input [3]: [c_customer_sk#24, sum#27, isEmpty#28] +Keys [1]: [c_customer_sk#24] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29] +Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30] (38) Filter [codegen id : 9] -Input [2]: [c_customer_sk#28, ssales#36] -Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8)))) +Input [2]: [c_customer_sk#24, ssales#30] +Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8)))) (39) Project [codegen id : 9] -Output [1]: [c_customer_sk#28] -Input [2]: [c_customer_sk#28, ssales#36] +Output [1]: [c_customer_sk#24] +Input [2]: [c_customer_sk#24, ssales#30] (40) Sort [codegen id : 9] -Input [1]: [c_customer_sk#28] -Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (41) SortMergeJoin [codegen id : 11] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#28] +Right keys [1]: [c_customer_sk#24] Join condition: None (42) Project [codegen id : 11] @@ -263,109 +263,109 @@ Output [3]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] (43) ReusedExchange [Reuses operator id: 71] -Output [1]: [d_date_sk#39] +Output [1]: [d_date_sk#33] (44) BroadcastHashJoin [codegen id : 11] Left keys [1]: [cs_sold_date_sk#5] -Right keys [1]: [d_date_sk#39] +Right keys [1]: [d_date_sk#33] Join condition: None (45) Project [codegen id : 11] -Output [1]: [CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)) AS sales#40] -Input [4]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#39] +Output [1]: [CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)) AS sales#34] +Input [4]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#33] (46) Scan parquet default.web_sales -Output [5]: [ws_item_sk#41, ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] +Output [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#45), dynamicpruningexpression(ws_sold_date_sk#45 IN dynamicpruning#6)] +PartitionFilters: [isnotnull(ws_sold_date_sk#39), dynamicpruningexpression(ws_sold_date_sk#39 IN dynamicpruning#6)] ReadSchema: struct (47) ColumnarToRow [codegen id : 16] -Input [5]: [ws_item_sk#41, ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] +Input [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] (48) ReusedExchange [Reuses operator id: 20] -Output [1]: [item_sk#20] +Output [1]: [item_sk#18] (49) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [ws_item_sk#41] -Right keys [1]: [item_sk#20] +Left keys [1]: [ws_item_sk#35] +Right keys [1]: [item_sk#18] Join condition: None (50) Project [codegen id : 16] -Output [4]: [ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] -Input [5]: [ws_item_sk#41, ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] +Output [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Input [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] (51) Exchange -Input [4]: [ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] -Arguments: hashpartitioning(ws_bill_customer_sk#42, 5), ENSURE_REQUIREMENTS, [id=#46] +Input [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Arguments: hashpartitioning(ws_bill_customer_sk#36, 5), ENSURE_REQUIREMENTS, [plan_id=7] (52) Sort [codegen id : 17] -Input [4]: [ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] -Arguments: [ws_bill_customer_sk#42 ASC NULLS FIRST], false, 0 +Input [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Arguments: [ws_bill_customer_sk#36 ASC NULLS FIRST], false, 0 (53) ReusedExchange [Reuses operator id: 36] -Output [3]: [c_customer_sk#28, sum#47, isEmpty#48] +Output [3]: [c_customer_sk#24, sum#40, isEmpty#41] (54) HashAggregate [codegen id : 20] -Input [3]: [c_customer_sk#28, sum#47, isEmpty#48] -Keys [1]: [c_customer_sk#28] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35] -Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36] +Input [3]: [c_customer_sk#24, sum#40, isEmpty#41] +Keys [1]: [c_customer_sk#24] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29] +Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30] (55) Filter [codegen id : 20] -Input [2]: [c_customer_sk#28, ssales#36] -Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8)))) +Input [2]: [c_customer_sk#24, ssales#30] +Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8)))) (56) Project [codegen id : 20] -Output [1]: [c_customer_sk#28] -Input [2]: [c_customer_sk#28, ssales#36] +Output [1]: [c_customer_sk#24] +Input [2]: [c_customer_sk#24, ssales#30] (57) Sort [codegen id : 20] -Input [1]: [c_customer_sk#28] -Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (58) SortMergeJoin [codegen id : 22] -Left keys [1]: [ws_bill_customer_sk#42] -Right keys [1]: [c_customer_sk#28] +Left keys [1]: [ws_bill_customer_sk#36] +Right keys [1]: [c_customer_sk#24] Join condition: None (59) Project [codegen id : 22] -Output [3]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] -Input [4]: [ws_bill_customer_sk#42, ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45] +Output [3]: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] +Input [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39] (60) ReusedExchange [Reuses operator id: 71] -Output [1]: [d_date_sk#49] +Output [1]: [d_date_sk#42] (61) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [ws_sold_date_sk#45] -Right keys [1]: [d_date_sk#49] +Left keys [1]: [ws_sold_date_sk#39] +Right keys [1]: [d_date_sk#42] Join condition: None (62) Project [codegen id : 22] -Output [1]: [CheckOverflow((promote_precision(cast(ws_quantity#43 as decimal(12,2))) * promote_precision(cast(ws_list_price#44 as decimal(12,2)))), DecimalType(18,2)) AS sales#50] -Input [4]: [ws_quantity#43, ws_list_price#44, ws_sold_date_sk#45, d_date_sk#49] +Output [1]: [CheckOverflow((promote_precision(cast(ws_quantity#37 as decimal(12,2))) * promote_precision(cast(ws_list_price#38 as decimal(12,2)))), DecimalType(18,2)) AS sales#43] +Input [4]: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, d_date_sk#42] (63) Union (64) HashAggregate [codegen id : 23] -Input [1]: [sales#40] +Input [1]: [sales#34] Keys: [] -Functions [1]: [partial_sum(sales#40)] -Aggregate Attributes [2]: [sum#51, isEmpty#52] -Results [2]: [sum#53, isEmpty#54] +Functions [1]: [partial_sum(sales#34)] +Aggregate Attributes [2]: [sum#44, isEmpty#45] +Results [2]: [sum#46, isEmpty#47] (65) Exchange -Input [2]: [sum#53, isEmpty#54] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#55] +Input [2]: [sum#46, isEmpty#47] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] (66) HashAggregate [codegen id : 24] -Input [2]: [sum#53, isEmpty#54] +Input [2]: [sum#46, isEmpty#47] Keys: [] -Functions [1]: [sum(sales#40)] -Aggregate Attributes [1]: [sum(sales#40)#56] -Results [1]: [sum(sales#40)#56 AS sum(sales)#57] +Functions [1]: [sum(sales#34)] +Aggregate Attributes [1]: [sum(sales#34)#48] +Results [1]: [sum(sales#34)#48 AS sum(sales)#49] ===== Subqueries ===== @@ -378,26 +378,26 @@ BroadcastExchange (71) (67) Scan parquet default.date_dim -Output [3]: [d_date_sk#39, d_year#58, d_moy#59] +Output [3]: [d_date_sk#33, d_year#50, d_moy#51] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] ReadSchema: struct (68) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#39, d_year#58, d_moy#59] +Input [3]: [d_date_sk#33, d_year#50, d_moy#51] (69) Filter [codegen id : 1] -Input [3]: [d_date_sk#39, d_year#58, d_moy#59] -Condition : ((((isnotnull(d_year#58) AND isnotnull(d_moy#59)) AND (d_year#58 = 2000)) AND (d_moy#59 = 2)) AND isnotnull(d_date_sk#39)) +Input [3]: [d_date_sk#33, d_year#50, d_moy#51] +Condition : ((((isnotnull(d_year#50) AND isnotnull(d_moy#51)) AND (d_year#50 = 2000)) AND (d_moy#51 = 2)) AND isnotnull(d_date_sk#33)) (70) Project [codegen id : 1] -Output [1]: [d_date_sk#39] -Input [3]: [d_date_sk#39, d_year#58, d_moy#59] +Output [1]: [d_date_sk#33] +Input [3]: [d_date_sk#33, d_year#50, d_moy#51] (71) BroadcastExchange -Input [1]: [d_date_sk#39] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#60] +Input [1]: [d_date_sk#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] Subquery:2 Hosting operator id = 3 Hosting Expression = ss_sold_date_sk#8 IN dynamicpruning#9 BroadcastExchange (76) @@ -408,28 +408,28 @@ BroadcastExchange (76) (72) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_date#11, d_year#61] +Output [3]: [d_date_sk#10, d_date#11, d_year#52] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct (73) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_date#11, d_year#61] +Input [3]: [d_date_sk#10, d_date#11, d_year#52] (74) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_date#11, d_year#61] -Condition : (d_year#61 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#10, d_date#11, d_year#52] +Condition : (d_year#52 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10)) (75) Project [codegen id : 1] Output [2]: [d_date_sk#10, d_date#11] -Input [3]: [d_date_sk#10, d_date#11, d_year#61] +Input [3]: [d_date_sk#10, d_date#11, d_year#52] (76) BroadcastExchange Input [2]: [d_date_sk#10, d_date#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#62] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] -Subquery:3 Hosting operator id = 38 Hosting Expression = Subquery scalar-subquery#37, [id=#38] +Subquery:3 Hosting operator id = 38 Hosting Expression = Subquery scalar-subquery#31, [id=#32] * HashAggregate (91) +- Exchange (90) +- * HashAggregate (89) @@ -448,81 +448,81 @@ Subquery:3 Hosting operator id = 38 Hosting Expression = Subquery scalar-subquer (77) Scan parquet default.store_sales -Output [4]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66] +Output [4]: [ss_customer_sk#53, ss_quantity#54, ss_sales_price#55, ss_sold_date_sk#56] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#66), dynamicpruningexpression(ss_sold_date_sk#66 IN dynamicpruning#67)] +PartitionFilters: [isnotnull(ss_sold_date_sk#56), dynamicpruningexpression(ss_sold_date_sk#56 IN dynamicpruning#57)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (78) ColumnarToRow [codegen id : 3] -Input [4]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66] +Input [4]: [ss_customer_sk#53, ss_quantity#54, ss_sales_price#55, ss_sold_date_sk#56] (79) Filter [codegen id : 3] -Input [4]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66] -Condition : isnotnull(ss_customer_sk#63) +Input [4]: [ss_customer_sk#53, ss_quantity#54, ss_sales_price#55, ss_sold_date_sk#56] +Condition : isnotnull(ss_customer_sk#53) (80) ReusedExchange [Reuses operator id: 32] -Output [1]: [c_customer_sk#68] +Output [1]: [c_customer_sk#58] (81) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_customer_sk#63] -Right keys [1]: [c_customer_sk#68] +Left keys [1]: [ss_customer_sk#53] +Right keys [1]: [c_customer_sk#58] Join condition: None (82) Project [codegen id : 3] -Output [4]: [ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66, c_customer_sk#68] -Input [5]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66, c_customer_sk#68] +Output [4]: [ss_quantity#54, ss_sales_price#55, ss_sold_date_sk#56, c_customer_sk#58] +Input [5]: [ss_customer_sk#53, ss_quantity#54, ss_sales_price#55, ss_sold_date_sk#56, c_customer_sk#58] (83) ReusedExchange [Reuses operator id: 96] -Output [1]: [d_date_sk#69] +Output [1]: [d_date_sk#59] (84) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_sold_date_sk#66] -Right keys [1]: [d_date_sk#69] +Left keys [1]: [ss_sold_date_sk#56] +Right keys [1]: [d_date_sk#59] Join condition: None (85) Project [codegen id : 3] -Output [3]: [ss_quantity#64, ss_sales_price#65, c_customer_sk#68] -Input [5]: [ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66, c_customer_sk#68, d_date_sk#69] +Output [3]: [ss_quantity#54, ss_sales_price#55, c_customer_sk#58] +Input [5]: [ss_quantity#54, ss_sales_price#55, ss_sold_date_sk#56, c_customer_sk#58, d_date_sk#59] (86) HashAggregate [codegen id : 3] -Input [3]: [ss_quantity#64, ss_sales_price#65, c_customer_sk#68] -Keys [1]: [c_customer_sk#68] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#70, isEmpty#71] -Results [3]: [c_customer_sk#68, sum#72, isEmpty#73] +Input [3]: [ss_quantity#54, ss_sales_price#55, c_customer_sk#58] +Keys [1]: [c_customer_sk#58] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#54 as decimal(12,2))) * promote_precision(cast(ss_sales_price#55 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#60, isEmpty#61] +Results [3]: [c_customer_sk#58, sum#62, isEmpty#63] (87) Exchange -Input [3]: [c_customer_sk#68, sum#72, isEmpty#73] -Arguments: hashpartitioning(c_customer_sk#68, 5), ENSURE_REQUIREMENTS, [id=#74] +Input [3]: [c_customer_sk#58, sum#62, isEmpty#63] +Arguments: hashpartitioning(c_customer_sk#58, 5), ENSURE_REQUIREMENTS, [plan_id=11] (88) HashAggregate [codegen id : 4] -Input [3]: [c_customer_sk#68, sum#72, isEmpty#73] -Keys [1]: [c_customer_sk#68] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2)))#75] -Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2)))#75 AS csales#76] +Input [3]: [c_customer_sk#58, sum#62, isEmpty#63] +Keys [1]: [c_customer_sk#58] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#54 as decimal(12,2))) * promote_precision(cast(ss_sales_price#55 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#54 as decimal(12,2))) * promote_precision(cast(ss_sales_price#55 as decimal(12,2)))), DecimalType(18,2)))#64] +Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#54 as decimal(12,2))) * promote_precision(cast(ss_sales_price#55 as decimal(12,2)))), DecimalType(18,2)))#64 AS csales#65] (89) HashAggregate [codegen id : 4] -Input [1]: [csales#76] +Input [1]: [csales#65] Keys: [] -Functions [1]: [partial_max(csales#76)] -Aggregate Attributes [1]: [max#77] -Results [1]: [max#78] +Functions [1]: [partial_max(csales#65)] +Aggregate Attributes [1]: [max#66] +Results [1]: [max#67] (90) Exchange -Input [1]: [max#78] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#79] +Input [1]: [max#67] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12] (91) HashAggregate [codegen id : 5] -Input [1]: [max#78] +Input [1]: [max#67] Keys: [] -Functions [1]: [max(csales#76)] -Aggregate Attributes [1]: [max(csales#76)#80] -Results [1]: [max(csales#76)#80 AS tpcds_cmax#81] +Functions [1]: [max(csales#65)] +Aggregate Attributes [1]: [max(csales#65)#68] +Results [1]: [max(csales#65)#68 AS tpcds_cmax#69] -Subquery:4 Hosting operator id = 77 Hosting Expression = ss_sold_date_sk#66 IN dynamicpruning#67 +Subquery:4 Hosting operator id = 77 Hosting Expression = ss_sold_date_sk#56 IN dynamicpruning#57 BroadcastExchange (96) +- * Project (95) +- * Filter (94) @@ -531,29 +531,29 @@ BroadcastExchange (96) (92) Scan parquet default.date_dim -Output [2]: [d_date_sk#69, d_year#82] +Output [2]: [d_date_sk#59, d_year#70] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct (93) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#69, d_year#82] +Input [2]: [d_date_sk#59, d_year#70] (94) Filter [codegen id : 1] -Input [2]: [d_date_sk#69, d_year#82] -Condition : (d_year#82 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#69)) +Input [2]: [d_date_sk#59, d_year#70] +Condition : (d_year#70 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#59)) (95) Project [codegen id : 1] -Output [1]: [d_date_sk#69] -Input [2]: [d_date_sk#69, d_year#82] +Output [1]: [d_date_sk#59] +Input [2]: [d_date_sk#59, d_year#70] (96) BroadcastExchange -Input [1]: [d_date_sk#69] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#83] +Input [1]: [d_date_sk#59] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=13] -Subquery:5 Hosting operator id = 46 Hosting Expression = ws_sold_date_sk#45 IN dynamicpruning#6 +Subquery:5 Hosting operator id = 46 Hosting Expression = ws_sold_date_sk#39 IN dynamicpruning#6 -Subquery:6 Hosting operator id = 55 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38] +Subquery:6 Hosting operator id = 55 Hosting Expression = ReusedSubquery Subquery scalar-subquery#31, [id=#32] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt index 4d1109078e346..49fdf20838477 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt @@ -147,107 +147,107 @@ Condition : isnotnull(cs_bill_customer_sk#1) (4) Exchange Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] -Arguments: hashpartitioning(cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#7] +Arguments: hashpartitioning(cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] Arguments: [cs_item_sk#2 ASC NULLS FIRST], false, 0 (6) Scan parquet default.store_sales -Output [2]: [ss_item_sk#8, ss_sold_date_sk#9] +Output [2]: [ss_item_sk#7, ss_sold_date_sk#8] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#9), dynamicpruningexpression(ss_sold_date_sk#9 IN dynamicpruning#10)] +PartitionFilters: [isnotnull(ss_sold_date_sk#8), dynamicpruningexpression(ss_sold_date_sk#8 IN dynamicpruning#9)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (7) ColumnarToRow [codegen id : 4] -Input [2]: [ss_item_sk#8, ss_sold_date_sk#9] +Input [2]: [ss_item_sk#7, ss_sold_date_sk#8] (8) Filter [codegen id : 4] -Input [2]: [ss_item_sk#8, ss_sold_date_sk#9] -Condition : isnotnull(ss_item_sk#8) +Input [2]: [ss_item_sk#7, ss_sold_date_sk#8] +Condition : isnotnull(ss_item_sk#7) (9) ReusedExchange [Reuses operator id: 139] -Output [2]: [d_date_sk#11, d_date#12] +Output [2]: [d_date_sk#10, d_date#11] (10) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#9] -Right keys [1]: [d_date_sk#11] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#10] Join condition: None (11) Project [codegen id : 4] -Output [2]: [ss_item_sk#8, d_date#12] -Input [4]: [ss_item_sk#8, ss_sold_date_sk#9, d_date_sk#11, d_date#12] +Output [2]: [ss_item_sk#7, d_date#11] +Input [4]: [ss_item_sk#7, ss_sold_date_sk#8, d_date_sk#10, d_date#11] (12) Exchange -Input [2]: [ss_item_sk#8, d_date#12] -Arguments: hashpartitioning(ss_item_sk#8, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [2]: [ss_item_sk#7, d_date#11] +Arguments: hashpartitioning(ss_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (13) Sort [codegen id : 5] -Input [2]: [ss_item_sk#8, d_date#12] -Arguments: [ss_item_sk#8 ASC NULLS FIRST], false, 0 +Input [2]: [ss_item_sk#7, d_date#11] +Arguments: [ss_item_sk#7 ASC NULLS FIRST], false, 0 (14) Scan parquet default.item -Output [2]: [i_item_sk#14, i_item_desc#15] +Output [2]: [i_item_sk#12, i_item_desc#13] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 6] -Input [2]: [i_item_sk#14, i_item_desc#15] +Input [2]: [i_item_sk#12, i_item_desc#13] (16) Filter [codegen id : 6] -Input [2]: [i_item_sk#14, i_item_desc#15] -Condition : isnotnull(i_item_sk#14) +Input [2]: [i_item_sk#12, i_item_desc#13] +Condition : isnotnull(i_item_sk#12) (17) Exchange -Input [2]: [i_item_sk#14, i_item_desc#15] -Arguments: hashpartitioning(i_item_sk#14, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [2]: [i_item_sk#12, i_item_desc#13] +Arguments: hashpartitioning(i_item_sk#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) Sort [codegen id : 7] -Input [2]: [i_item_sk#14, i_item_desc#15] -Arguments: [i_item_sk#14 ASC NULLS FIRST], false, 0 +Input [2]: [i_item_sk#12, i_item_desc#13] +Arguments: [i_item_sk#12 ASC NULLS FIRST], false, 0 (19) SortMergeJoin [codegen id : 8] -Left keys [1]: [ss_item_sk#8] -Right keys [1]: [i_item_sk#14] +Left keys [1]: [ss_item_sk#7] +Right keys [1]: [i_item_sk#12] Join condition: None (20) Project [codegen id : 8] -Output [3]: [d_date#12, i_item_sk#14, substr(i_item_desc#15, 1, 30) AS _groupingexpression#17] -Input [4]: [ss_item_sk#8, d_date#12, i_item_sk#14, i_item_desc#15] +Output [3]: [d_date#11, i_item_sk#12, substr(i_item_desc#13, 1, 30) AS _groupingexpression#14] +Input [4]: [ss_item_sk#7, d_date#11, i_item_sk#12, i_item_desc#13] (21) HashAggregate [codegen id : 8] -Input [3]: [d_date#12, i_item_sk#14, _groupingexpression#17] -Keys [3]: [_groupingexpression#17, i_item_sk#14, d_date#12] +Input [3]: [d_date#11, i_item_sk#12, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, i_item_sk#12, d_date#11] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#18] -Results [4]: [_groupingexpression#17, i_item_sk#14, d_date#12, count#19] +Aggregate Attributes [1]: [count#15] +Results [4]: [_groupingexpression#14, i_item_sk#12, d_date#11, count#16] (22) HashAggregate [codegen id : 8] -Input [4]: [_groupingexpression#17, i_item_sk#14, d_date#12, count#19] -Keys [3]: [_groupingexpression#17, i_item_sk#14, d_date#12] +Input [4]: [_groupingexpression#14, i_item_sk#12, d_date#11, count#16] +Keys [3]: [_groupingexpression#14, i_item_sk#12, d_date#11] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#20] -Results [2]: [i_item_sk#14 AS item_sk#21, count(1)#20 AS cnt#22] +Aggregate Attributes [1]: [count(1)#17] +Results [2]: [i_item_sk#12 AS item_sk#18, count(1)#17 AS cnt#19] (23) Filter [codegen id : 8] -Input [2]: [item_sk#21, cnt#22] -Condition : (cnt#22 > 4) +Input [2]: [item_sk#18, cnt#19] +Condition : (cnt#19 > 4) (24) Project [codegen id : 8] -Output [1]: [item_sk#21] -Input [2]: [item_sk#21, cnt#22] +Output [1]: [item_sk#18] +Input [2]: [item_sk#18, cnt#19] (25) Sort [codegen id : 8] -Input [1]: [item_sk#21] -Arguments: [item_sk#21 ASC NULLS FIRST], false, 0 +Input [1]: [item_sk#18] +Arguments: [item_sk#18 ASC NULLS FIRST], false, 0 (26) SortMergeJoin [codegen id : 9] Left keys [1]: [cs_item_sk#2] -Right keys [1]: [item_sk#21] +Right keys [1]: [item_sk#18] Join condition: None (27) Project [codegen id : 9] @@ -256,463 +256,463 @@ Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, (28) Exchange Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] -Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#23] +Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4] (29) Sort [codegen id : 10] Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0 (30) Scan parquet default.store_sales -Output [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, ss_sold_date_sk#27] +Output [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 11] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, ss_sold_date_sk#27] +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23] (32) Filter [codegen id : 11] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, ss_sold_date_sk#27] -Condition : isnotnull(ss_customer_sk#24) +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23] +Condition : isnotnull(ss_customer_sk#20) (33) Project [codegen id : 11] -Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, ss_sold_date_sk#27] +Output [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22] +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23] (34) Exchange -Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] -Arguments: hashpartitioning(ss_customer_sk#24, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22] +Arguments: hashpartitioning(ss_customer_sk#20, 5), ENSURE_REQUIREMENTS, [plan_id=5] (35) Sort [codegen id : 12] -Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] -Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0 +Input [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22] +Arguments: [ss_customer_sk#20 ASC NULLS FIRST], false, 0 (36) Scan parquet default.customer -Output [1]: [c_customer_sk#29] +Output [1]: [c_customer_sk#24] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (37) ColumnarToRow [codegen id : 13] -Input [1]: [c_customer_sk#29] +Input [1]: [c_customer_sk#24] (38) Filter [codegen id : 13] -Input [1]: [c_customer_sk#29] -Condition : isnotnull(c_customer_sk#29) +Input [1]: [c_customer_sk#24] +Condition : isnotnull(c_customer_sk#24) (39) Exchange -Input [1]: [c_customer_sk#29] -Arguments: hashpartitioning(c_customer_sk#29, 5), ENSURE_REQUIREMENTS, [id=#30] +Input [1]: [c_customer_sk#24] +Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [plan_id=6] (40) Sort [codegen id : 14] -Input [1]: [c_customer_sk#29] -Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (41) SortMergeJoin [codegen id : 15] -Left keys [1]: [ss_customer_sk#24] -Right keys [1]: [c_customer_sk#29] +Left keys [1]: [ss_customer_sk#20] +Right keys [1]: [c_customer_sk#24] Join condition: None (42) Project [codegen id : 15] -Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#29] +Output [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24] +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, c_customer_sk#24] (43) HashAggregate [codegen id : 15] -Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29] -Keys [1]: [c_customer_sk#29] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#31, isEmpty#32] -Results [3]: [c_customer_sk#29, sum#33, isEmpty#34] +Input [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24] +Keys [1]: [c_customer_sk#24] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#25, isEmpty#26] +Results [3]: [c_customer_sk#24, sum#27, isEmpty#28] (44) HashAggregate [codegen id : 15] -Input [3]: [c_customer_sk#29, sum#33, isEmpty#34] -Keys [1]: [c_customer_sk#29] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35] -Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36] +Input [3]: [c_customer_sk#24, sum#27, isEmpty#28] +Keys [1]: [c_customer_sk#24] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29] +Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30] (45) Filter [codegen id : 15] -Input [2]: [c_customer_sk#29, ssales#36] -Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8)))) +Input [2]: [c_customer_sk#24, ssales#30] +Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8)))) (46) Project [codegen id : 15] -Output [1]: [c_customer_sk#29] -Input [2]: [c_customer_sk#29, ssales#36] +Output [1]: [c_customer_sk#24] +Input [2]: [c_customer_sk#24, ssales#30] (47) Sort [codegen id : 15] -Input [1]: [c_customer_sk#29] -Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (48) SortMergeJoin [codegen id : 17] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#29] +Right keys [1]: [c_customer_sk#24] Join condition: None (49) ReusedExchange [Reuses operator id: 134] -Output [1]: [d_date_sk#39] +Output [1]: [d_date_sk#33] (50) BroadcastHashJoin [codegen id : 17] Left keys [1]: [cs_sold_date_sk#5] -Right keys [1]: [d_date_sk#39] +Right keys [1]: [d_date_sk#33] Join condition: None (51) Project [codegen id : 17] Output [3]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4] -Input [5]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#39] +Input [5]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#33] (52) Scan parquet default.customer -Output [3]: [c_customer_sk#40, c_first_name#41, c_last_name#42] +Output [3]: [c_customer_sk#34, c_first_name#35, c_last_name#36] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (53) ColumnarToRow [codegen id : 18] -Input [3]: [c_customer_sk#40, c_first_name#41, c_last_name#42] +Input [3]: [c_customer_sk#34, c_first_name#35, c_last_name#36] (54) Filter [codegen id : 18] -Input [3]: [c_customer_sk#40, c_first_name#41, c_last_name#42] -Condition : isnotnull(c_customer_sk#40) +Input [3]: [c_customer_sk#34, c_first_name#35, c_last_name#36] +Condition : isnotnull(c_customer_sk#34) (55) Exchange -Input [3]: [c_customer_sk#40, c_first_name#41, c_last_name#42] -Arguments: hashpartitioning(c_customer_sk#40, 5), ENSURE_REQUIREMENTS, [id=#43] +Input [3]: [c_customer_sk#34, c_first_name#35, c_last_name#36] +Arguments: hashpartitioning(c_customer_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=7] (56) Sort [codegen id : 19] -Input [3]: [c_customer_sk#40, c_first_name#41, c_last_name#42] -Arguments: [c_customer_sk#40 ASC NULLS FIRST], false, 0 +Input [3]: [c_customer_sk#34, c_first_name#35, c_last_name#36] +Arguments: [c_customer_sk#34 ASC NULLS FIRST], false, 0 (57) ReusedExchange [Reuses operator id: 34] -Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] +Output [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22] (58) Sort [codegen id : 21] -Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] -Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0 +Input [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22] +Arguments: [ss_customer_sk#20 ASC NULLS FIRST], false, 0 (59) ReusedExchange [Reuses operator id: 39] -Output [1]: [c_customer_sk#29] +Output [1]: [c_customer_sk#24] (60) Sort [codegen id : 23] -Input [1]: [c_customer_sk#29] -Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (61) SortMergeJoin [codegen id : 24] -Left keys [1]: [ss_customer_sk#24] -Right keys [1]: [c_customer_sk#29] +Left keys [1]: [ss_customer_sk#20] +Right keys [1]: [c_customer_sk#24] Join condition: None (62) Project [codegen id : 24] -Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#29] +Output [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24] +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, c_customer_sk#24] (63) HashAggregate [codegen id : 24] -Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29] -Keys [1]: [c_customer_sk#29] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#31, isEmpty#32] -Results [3]: [c_customer_sk#29, sum#33, isEmpty#34] +Input [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24] +Keys [1]: [c_customer_sk#24] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#25, isEmpty#26] +Results [3]: [c_customer_sk#24, sum#27, isEmpty#28] (64) HashAggregate [codegen id : 24] -Input [3]: [c_customer_sk#29, sum#33, isEmpty#34] -Keys [1]: [c_customer_sk#29] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35] -Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36] +Input [3]: [c_customer_sk#24, sum#27, isEmpty#28] +Keys [1]: [c_customer_sk#24] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29] +Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30] (65) Filter [codegen id : 24] -Input [2]: [c_customer_sk#29, ssales#36] -Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8)))) +Input [2]: [c_customer_sk#24, ssales#30] +Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8)))) (66) Project [codegen id : 24] -Output [1]: [c_customer_sk#29] -Input [2]: [c_customer_sk#29, ssales#36] +Output [1]: [c_customer_sk#24] +Input [2]: [c_customer_sk#24, ssales#30] (67) Sort [codegen id : 24] -Input [1]: [c_customer_sk#29] -Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (68) SortMergeJoin [codegen id : 25] -Left keys [1]: [c_customer_sk#40] -Right keys [1]: [c_customer_sk#29] +Left keys [1]: [c_customer_sk#34] +Right keys [1]: [c_customer_sk#24] Join condition: None (69) SortMergeJoin [codegen id : 26] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#40] +Right keys [1]: [c_customer_sk#34] Join condition: None (70) Project [codegen id : 26] -Output [4]: [cs_quantity#3, cs_list_price#4, c_first_name#41, c_last_name#42] -Input [6]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, c_customer_sk#40, c_first_name#41, c_last_name#42] +Output [4]: [cs_quantity#3, cs_list_price#4, c_first_name#35, c_last_name#36] +Input [6]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, c_customer_sk#34, c_first_name#35, c_last_name#36] (71) HashAggregate [codegen id : 26] -Input [4]: [cs_quantity#3, cs_list_price#4, c_first_name#41, c_last_name#42] -Keys [2]: [c_last_name#42, c_first_name#41] +Input [4]: [cs_quantity#3, cs_list_price#4, c_first_name#35, c_last_name#36] +Keys [2]: [c_last_name#36, c_first_name#35] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#44, isEmpty#45] -Results [4]: [c_last_name#42, c_first_name#41, sum#46, isEmpty#47] +Aggregate Attributes [2]: [sum#37, isEmpty#38] +Results [4]: [c_last_name#36, c_first_name#35, sum#39, isEmpty#40] (72) Exchange -Input [4]: [c_last_name#42, c_first_name#41, sum#46, isEmpty#47] -Arguments: hashpartitioning(c_last_name#42, c_first_name#41, 5), ENSURE_REQUIREMENTS, [id=#48] +Input [4]: [c_last_name#36, c_first_name#35, sum#39, isEmpty#40] +Arguments: hashpartitioning(c_last_name#36, c_first_name#35, 5), ENSURE_REQUIREMENTS, [plan_id=8] (73) HashAggregate [codegen id : 27] -Input [4]: [c_last_name#42, c_first_name#41, sum#46, isEmpty#47] -Keys [2]: [c_last_name#42, c_first_name#41] +Input [4]: [c_last_name#36, c_first_name#35, sum#39, isEmpty#40] +Keys [2]: [c_last_name#36, c_first_name#35] Functions [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))#49] -Results [3]: [c_last_name#42, c_first_name#41, sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))#49 AS sales#50] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))#41] +Results [3]: [c_last_name#36, c_first_name#35, sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))#41 AS sales#42] (74) Scan parquet default.web_sales -Output [5]: [ws_item_sk#51, ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55] +Output [5]: [ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#55), dynamicpruningexpression(ws_sold_date_sk#55 IN dynamicpruning#6)] +PartitionFilters: [isnotnull(ws_sold_date_sk#47), dynamicpruningexpression(ws_sold_date_sk#47 IN dynamicpruning#6)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (75) ColumnarToRow [codegen id : 28] -Input [5]: [ws_item_sk#51, ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55] +Input [5]: [ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47] (76) Filter [codegen id : 28] -Input [5]: [ws_item_sk#51, ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55] -Condition : isnotnull(ws_bill_customer_sk#52) +Input [5]: [ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47] +Condition : isnotnull(ws_bill_customer_sk#44) (77) Exchange -Input [5]: [ws_item_sk#51, ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55] -Arguments: hashpartitioning(ws_item_sk#51, 5), ENSURE_REQUIREMENTS, [id=#56] +Input [5]: [ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47] +Arguments: hashpartitioning(ws_item_sk#43, 5), ENSURE_REQUIREMENTS, [plan_id=9] (78) Sort [codegen id : 29] -Input [5]: [ws_item_sk#51, ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55] -Arguments: [ws_item_sk#51 ASC NULLS FIRST], false, 0 +Input [5]: [ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47] +Arguments: [ws_item_sk#43 ASC NULLS FIRST], false, 0 (79) ReusedExchange [Reuses operator id: 12] -Output [2]: [ss_item_sk#8, d_date#12] +Output [2]: [ss_item_sk#7, d_date#11] (80) Sort [codegen id : 32] -Input [2]: [ss_item_sk#8, d_date#12] -Arguments: [ss_item_sk#8 ASC NULLS FIRST], false, 0 +Input [2]: [ss_item_sk#7, d_date#11] +Arguments: [ss_item_sk#7 ASC NULLS FIRST], false, 0 (81) ReusedExchange [Reuses operator id: 17] -Output [2]: [i_item_sk#14, i_item_desc#15] +Output [2]: [i_item_sk#12, i_item_desc#13] (82) Sort [codegen id : 34] -Input [2]: [i_item_sk#14, i_item_desc#15] -Arguments: [i_item_sk#14 ASC NULLS FIRST], false, 0 +Input [2]: [i_item_sk#12, i_item_desc#13] +Arguments: [i_item_sk#12 ASC NULLS FIRST], false, 0 (83) SortMergeJoin [codegen id : 35] -Left keys [1]: [ss_item_sk#8] -Right keys [1]: [i_item_sk#14] +Left keys [1]: [ss_item_sk#7] +Right keys [1]: [i_item_sk#12] Join condition: None (84) Project [codegen id : 35] -Output [3]: [d_date#12, i_item_sk#14, substr(i_item_desc#15, 1, 30) AS _groupingexpression#17] -Input [4]: [ss_item_sk#8, d_date#12, i_item_sk#14, i_item_desc#15] +Output [3]: [d_date#11, i_item_sk#12, substr(i_item_desc#13, 1, 30) AS _groupingexpression#14] +Input [4]: [ss_item_sk#7, d_date#11, i_item_sk#12, i_item_desc#13] (85) HashAggregate [codegen id : 35] -Input [3]: [d_date#12, i_item_sk#14, _groupingexpression#17] -Keys [3]: [_groupingexpression#17, i_item_sk#14, d_date#12] +Input [3]: [d_date#11, i_item_sk#12, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, i_item_sk#12, d_date#11] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#18] -Results [4]: [_groupingexpression#17, i_item_sk#14, d_date#12, count#19] +Aggregate Attributes [1]: [count#15] +Results [4]: [_groupingexpression#14, i_item_sk#12, d_date#11, count#16] (86) HashAggregate [codegen id : 35] -Input [4]: [_groupingexpression#17, i_item_sk#14, d_date#12, count#19] -Keys [3]: [_groupingexpression#17, i_item_sk#14, d_date#12] +Input [4]: [_groupingexpression#14, i_item_sk#12, d_date#11, count#16] +Keys [3]: [_groupingexpression#14, i_item_sk#12, d_date#11] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#20] -Results [2]: [i_item_sk#14 AS item_sk#21, count(1)#20 AS cnt#22] +Aggregate Attributes [1]: [count(1)#17] +Results [2]: [i_item_sk#12 AS item_sk#18, count(1)#17 AS cnt#19] (87) Filter [codegen id : 35] -Input [2]: [item_sk#21, cnt#22] -Condition : (cnt#22 > 4) +Input [2]: [item_sk#18, cnt#19] +Condition : (cnt#19 > 4) (88) Project [codegen id : 35] -Output [1]: [item_sk#21] -Input [2]: [item_sk#21, cnt#22] +Output [1]: [item_sk#18] +Input [2]: [item_sk#18, cnt#19] (89) Sort [codegen id : 35] -Input [1]: [item_sk#21] -Arguments: [item_sk#21 ASC NULLS FIRST], false, 0 +Input [1]: [item_sk#18] +Arguments: [item_sk#18 ASC NULLS FIRST], false, 0 (90) SortMergeJoin [codegen id : 36] -Left keys [1]: [ws_item_sk#51] -Right keys [1]: [item_sk#21] +Left keys [1]: [ws_item_sk#43] +Right keys [1]: [item_sk#18] Join condition: None (91) Project [codegen id : 36] -Output [4]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55] -Input [5]: [ws_item_sk#51, ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55] +Output [4]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47] +Input [5]: [ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47] (92) Exchange -Input [4]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55] -Arguments: hashpartitioning(ws_bill_customer_sk#52, 5), ENSURE_REQUIREMENTS, [id=#57] +Input [4]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47] +Arguments: hashpartitioning(ws_bill_customer_sk#44, 5), ENSURE_REQUIREMENTS, [plan_id=10] (93) Sort [codegen id : 37] -Input [4]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55] -Arguments: [ws_bill_customer_sk#52 ASC NULLS FIRST], false, 0 +Input [4]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47] +Arguments: [ws_bill_customer_sk#44 ASC NULLS FIRST], false, 0 (94) ReusedExchange [Reuses operator id: 34] -Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] +Output [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22] (95) Sort [codegen id : 39] -Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] -Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0 +Input [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22] +Arguments: [ss_customer_sk#20 ASC NULLS FIRST], false, 0 (96) ReusedExchange [Reuses operator id: 39] -Output [1]: [c_customer_sk#29] +Output [1]: [c_customer_sk#24] (97) Sort [codegen id : 41] -Input [1]: [c_customer_sk#29] -Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (98) SortMergeJoin [codegen id : 42] -Left keys [1]: [ss_customer_sk#24] -Right keys [1]: [c_customer_sk#29] +Left keys [1]: [ss_customer_sk#20] +Right keys [1]: [c_customer_sk#24] Join condition: None (99) Project [codegen id : 42] -Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#29] +Output [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24] +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, c_customer_sk#24] (100) HashAggregate [codegen id : 42] -Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29] -Keys [1]: [c_customer_sk#29] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#58, isEmpty#59] -Results [3]: [c_customer_sk#29, sum#60, isEmpty#61] +Input [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24] +Keys [1]: [c_customer_sk#24] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#48, isEmpty#49] +Results [3]: [c_customer_sk#24, sum#50, isEmpty#51] (101) HashAggregate [codegen id : 42] -Input [3]: [c_customer_sk#29, sum#60, isEmpty#61] -Keys [1]: [c_customer_sk#29] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35] -Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36] +Input [3]: [c_customer_sk#24, sum#50, isEmpty#51] +Keys [1]: [c_customer_sk#24] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29] +Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30] (102) Filter [codegen id : 42] -Input [2]: [c_customer_sk#29, ssales#36] -Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8)))) +Input [2]: [c_customer_sk#24, ssales#30] +Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8)))) (103) Project [codegen id : 42] -Output [1]: [c_customer_sk#29] -Input [2]: [c_customer_sk#29, ssales#36] +Output [1]: [c_customer_sk#24] +Input [2]: [c_customer_sk#24, ssales#30] (104) Sort [codegen id : 42] -Input [1]: [c_customer_sk#29] -Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (105) SortMergeJoin [codegen id : 44] -Left keys [1]: [ws_bill_customer_sk#52] -Right keys [1]: [c_customer_sk#29] +Left keys [1]: [ws_bill_customer_sk#44] +Right keys [1]: [c_customer_sk#24] Join condition: None (106) ReusedExchange [Reuses operator id: 134] -Output [1]: [d_date_sk#62] +Output [1]: [d_date_sk#52] (107) BroadcastHashJoin [codegen id : 44] -Left keys [1]: [ws_sold_date_sk#55] -Right keys [1]: [d_date_sk#62] +Left keys [1]: [ws_sold_date_sk#47] +Right keys [1]: [d_date_sk#52] Join condition: None (108) Project [codegen id : 44] -Output [3]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54] -Input [5]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, ws_sold_date_sk#55, d_date_sk#62] +Output [3]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46] +Input [5]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47, d_date_sk#52] (109) ReusedExchange [Reuses operator id: 55] -Output [3]: [c_customer_sk#63, c_first_name#64, c_last_name#65] +Output [3]: [c_customer_sk#53, c_first_name#54, c_last_name#55] (110) Sort [codegen id : 46] -Input [3]: [c_customer_sk#63, c_first_name#64, c_last_name#65] -Arguments: [c_customer_sk#63 ASC NULLS FIRST], false, 0 +Input [3]: [c_customer_sk#53, c_first_name#54, c_last_name#55] +Arguments: [c_customer_sk#53 ASC NULLS FIRST], false, 0 (111) ReusedExchange [Reuses operator id: 34] -Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] +Output [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22] (112) Sort [codegen id : 48] -Input [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] -Arguments: [ss_customer_sk#24 ASC NULLS FIRST], false, 0 +Input [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22] +Arguments: [ss_customer_sk#20 ASC NULLS FIRST], false, 0 (113) ReusedExchange [Reuses operator id: 39] -Output [1]: [c_customer_sk#29] +Output [1]: [c_customer_sk#24] (114) Sort [codegen id : 50] -Input [1]: [c_customer_sk#29] -Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (115) SortMergeJoin [codegen id : 51] -Left keys [1]: [ss_customer_sk#24] -Right keys [1]: [c_customer_sk#29] +Left keys [1]: [ss_customer_sk#20] +Right keys [1]: [c_customer_sk#24] Join condition: None (116) Project [codegen id : 51] -Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#29] +Output [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24] +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, c_customer_sk#24] (117) HashAggregate [codegen id : 51] -Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#29] -Keys [1]: [c_customer_sk#29] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#58, isEmpty#59] -Results [3]: [c_customer_sk#29, sum#60, isEmpty#61] +Input [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24] +Keys [1]: [c_customer_sk#24] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#48, isEmpty#49] +Results [3]: [c_customer_sk#24, sum#50, isEmpty#51] (118) HashAggregate [codegen id : 51] -Input [3]: [c_customer_sk#29, sum#60, isEmpty#61] -Keys [1]: [c_customer_sk#29] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35] -Results [2]: [c_customer_sk#29, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36] +Input [3]: [c_customer_sk#24, sum#50, isEmpty#51] +Keys [1]: [c_customer_sk#24] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29] +Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30] (119) Filter [codegen id : 51] -Input [2]: [c_customer_sk#29, ssales#36] -Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8)))) +Input [2]: [c_customer_sk#24, ssales#30] +Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8)))) (120) Project [codegen id : 51] -Output [1]: [c_customer_sk#29] -Input [2]: [c_customer_sk#29, ssales#36] +Output [1]: [c_customer_sk#24] +Input [2]: [c_customer_sk#24, ssales#30] (121) Sort [codegen id : 51] -Input [1]: [c_customer_sk#29] -Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (122) SortMergeJoin [codegen id : 52] -Left keys [1]: [c_customer_sk#63] -Right keys [1]: [c_customer_sk#29] +Left keys [1]: [c_customer_sk#53] +Right keys [1]: [c_customer_sk#24] Join condition: None (123) SortMergeJoin [codegen id : 53] -Left keys [1]: [ws_bill_customer_sk#52] -Right keys [1]: [c_customer_sk#63] +Left keys [1]: [ws_bill_customer_sk#44] +Right keys [1]: [c_customer_sk#53] Join condition: None (124) Project [codegen id : 53] -Output [4]: [ws_quantity#53, ws_list_price#54, c_first_name#64, c_last_name#65] -Input [6]: [ws_bill_customer_sk#52, ws_quantity#53, ws_list_price#54, c_customer_sk#63, c_first_name#64, c_last_name#65] +Output [4]: [ws_quantity#45, ws_list_price#46, c_first_name#54, c_last_name#55] +Input [6]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, c_customer_sk#53, c_first_name#54, c_last_name#55] (125) HashAggregate [codegen id : 53] -Input [4]: [ws_quantity#53, ws_list_price#54, c_first_name#64, c_last_name#65] -Keys [2]: [c_last_name#65, c_first_name#64] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#66, isEmpty#67] -Results [4]: [c_last_name#65, c_first_name#64, sum#68, isEmpty#69] +Input [4]: [ws_quantity#45, ws_list_price#46, c_first_name#54, c_last_name#55] +Keys [2]: [c_last_name#55, c_first_name#54] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#45 as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#56, isEmpty#57] +Results [4]: [c_last_name#55, c_first_name#54, sum#58, isEmpty#59] (126) Exchange -Input [4]: [c_last_name#65, c_first_name#64, sum#68, isEmpty#69] -Arguments: hashpartitioning(c_last_name#65, c_first_name#64, 5), ENSURE_REQUIREMENTS, [id=#70] +Input [4]: [c_last_name#55, c_first_name#54, sum#58, isEmpty#59] +Arguments: hashpartitioning(c_last_name#55, c_first_name#54, 5), ENSURE_REQUIREMENTS, [plan_id=11] (127) HashAggregate [codegen id : 54] -Input [4]: [c_last_name#65, c_first_name#64, sum#68, isEmpty#69] -Keys [2]: [c_last_name#65, c_first_name#64] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2)))#71] -Results [3]: [c_last_name#65, c_first_name#64, sum(CheckOverflow((promote_precision(cast(ws_quantity#53 as decimal(12,2))) * promote_precision(cast(ws_list_price#54 as decimal(12,2)))), DecimalType(18,2)))#71 AS sales#72] +Input [4]: [c_last_name#55, c_first_name#54, sum#58, isEmpty#59] +Keys [2]: [c_last_name#55, c_first_name#54] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#45 as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#45 as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2)))#60] +Results [3]: [c_last_name#55, c_first_name#54, sum(CheckOverflow((promote_precision(cast(ws_quantity#45 as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2)))#60 AS sales#61] (128) Union (129) TakeOrderedAndProject -Input [3]: [c_last_name#42, c_first_name#41, sales#50] -Arguments: 100, [c_last_name#42 ASC NULLS FIRST, c_first_name#41 ASC NULLS FIRST, sales#50 ASC NULLS FIRST], [c_last_name#42, c_first_name#41, sales#50] +Input [3]: [c_last_name#36, c_first_name#35, sales#42] +Arguments: 100, [c_last_name#36 ASC NULLS FIRST, c_first_name#35 ASC NULLS FIRST, sales#42 ASC NULLS FIRST], [c_last_name#36, c_first_name#35, sales#42] ===== Subqueries ===== @@ -725,28 +725,28 @@ BroadcastExchange (134) (130) Scan parquet default.date_dim -Output [3]: [d_date_sk#39, d_year#73, d_moy#74] +Output [3]: [d_date_sk#33, d_year#62, d_moy#63] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] ReadSchema: struct (131) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#39, d_year#73, d_moy#74] +Input [3]: [d_date_sk#33, d_year#62, d_moy#63] (132) Filter [codegen id : 1] -Input [3]: [d_date_sk#39, d_year#73, d_moy#74] -Condition : ((((isnotnull(d_year#73) AND isnotnull(d_moy#74)) AND (d_year#73 = 2000)) AND (d_moy#74 = 2)) AND isnotnull(d_date_sk#39)) +Input [3]: [d_date_sk#33, d_year#62, d_moy#63] +Condition : ((((isnotnull(d_year#62) AND isnotnull(d_moy#63)) AND (d_year#62 = 2000)) AND (d_moy#63 = 2)) AND isnotnull(d_date_sk#33)) (133) Project [codegen id : 1] -Output [1]: [d_date_sk#39] -Input [3]: [d_date_sk#39, d_year#73, d_moy#74] +Output [1]: [d_date_sk#33] +Input [3]: [d_date_sk#33, d_year#62, d_moy#63] (134) BroadcastExchange -Input [1]: [d_date_sk#39] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#75] +Input [1]: [d_date_sk#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] -Subquery:2 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#9 IN dynamicpruning#10 +Subquery:2 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#8 IN dynamicpruning#9 BroadcastExchange (139) +- * Project (138) +- * Filter (137) @@ -755,28 +755,28 @@ BroadcastExchange (139) (135) Scan parquet default.date_dim -Output [3]: [d_date_sk#11, d_date#12, d_year#76] +Output [3]: [d_date_sk#10, d_date#11, d_year#64] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct (136) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#11, d_date#12, d_year#76] +Input [3]: [d_date_sk#10, d_date#11, d_year#64] (137) Filter [codegen id : 1] -Input [3]: [d_date_sk#11, d_date#12, d_year#76] -Condition : (d_year#76 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#11)) +Input [3]: [d_date_sk#10, d_date#11, d_year#64] +Condition : (d_year#64 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10)) (138) Project [codegen id : 1] -Output [2]: [d_date_sk#11, d_date#12] -Input [3]: [d_date_sk#11, d_date#12, d_year#76] +Output [2]: [d_date_sk#10, d_date#11] +Input [3]: [d_date_sk#10, d_date#11, d_year#64] (139) BroadcastExchange -Input [2]: [d_date_sk#11, d_date#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#77] +Input [2]: [d_date_sk#10, d_date#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=13] -Subquery:3 Hosting operator id = 45 Hosting Expression = Subquery scalar-subquery#37, [id=#38] +Subquery:3 Hosting operator id = 45 Hosting Expression = Subquery scalar-subquery#31, [id=#32] * HashAggregate (156) +- Exchange (155) +- * HashAggregate (154) @@ -797,89 +797,89 @@ Subquery:3 Hosting operator id = 45 Hosting Expression = Subquery scalar-subquer (140) Scan parquet default.store_sales -Output [4]: [ss_customer_sk#78, ss_quantity#79, ss_sales_price#80, ss_sold_date_sk#81] +Output [4]: [ss_customer_sk#65, ss_quantity#66, ss_sales_price#67, ss_sold_date_sk#68] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#81), dynamicpruningexpression(ss_sold_date_sk#81 IN dynamicpruning#82)] +PartitionFilters: [isnotnull(ss_sold_date_sk#68), dynamicpruningexpression(ss_sold_date_sk#68 IN dynamicpruning#69)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (141) ColumnarToRow [codegen id : 2] -Input [4]: [ss_customer_sk#78, ss_quantity#79, ss_sales_price#80, ss_sold_date_sk#81] +Input [4]: [ss_customer_sk#65, ss_quantity#66, ss_sales_price#67, ss_sold_date_sk#68] (142) Filter [codegen id : 2] -Input [4]: [ss_customer_sk#78, ss_quantity#79, ss_sales_price#80, ss_sold_date_sk#81] -Condition : isnotnull(ss_customer_sk#78) +Input [4]: [ss_customer_sk#65, ss_quantity#66, ss_sales_price#67, ss_sold_date_sk#68] +Condition : isnotnull(ss_customer_sk#65) (143) ReusedExchange [Reuses operator id: 161] -Output [1]: [d_date_sk#83] +Output [1]: [d_date_sk#70] (144) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#81] -Right keys [1]: [d_date_sk#83] +Left keys [1]: [ss_sold_date_sk#68] +Right keys [1]: [d_date_sk#70] Join condition: None (145) Project [codegen id : 2] -Output [3]: [ss_customer_sk#78, ss_quantity#79, ss_sales_price#80] -Input [5]: [ss_customer_sk#78, ss_quantity#79, ss_sales_price#80, ss_sold_date_sk#81, d_date_sk#83] +Output [3]: [ss_customer_sk#65, ss_quantity#66, ss_sales_price#67] +Input [5]: [ss_customer_sk#65, ss_quantity#66, ss_sales_price#67, ss_sold_date_sk#68, d_date_sk#70] (146) Exchange -Input [3]: [ss_customer_sk#78, ss_quantity#79, ss_sales_price#80] -Arguments: hashpartitioning(ss_customer_sk#78, 5), ENSURE_REQUIREMENTS, [id=#84] +Input [3]: [ss_customer_sk#65, ss_quantity#66, ss_sales_price#67] +Arguments: hashpartitioning(ss_customer_sk#65, 5), ENSURE_REQUIREMENTS, [plan_id=14] (147) Sort [codegen id : 3] -Input [3]: [ss_customer_sk#78, ss_quantity#79, ss_sales_price#80] -Arguments: [ss_customer_sk#78 ASC NULLS FIRST], false, 0 +Input [3]: [ss_customer_sk#65, ss_quantity#66, ss_sales_price#67] +Arguments: [ss_customer_sk#65 ASC NULLS FIRST], false, 0 (148) ReusedExchange [Reuses operator id: 39] -Output [1]: [c_customer_sk#85] +Output [1]: [c_customer_sk#71] (149) Sort [codegen id : 5] -Input [1]: [c_customer_sk#85] -Arguments: [c_customer_sk#85 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#71] +Arguments: [c_customer_sk#71 ASC NULLS FIRST], false, 0 (150) SortMergeJoin [codegen id : 6] -Left keys [1]: [ss_customer_sk#78] -Right keys [1]: [c_customer_sk#85] +Left keys [1]: [ss_customer_sk#65] +Right keys [1]: [c_customer_sk#71] Join condition: None (151) Project [codegen id : 6] -Output [3]: [ss_quantity#79, ss_sales_price#80, c_customer_sk#85] -Input [4]: [ss_customer_sk#78, ss_quantity#79, ss_sales_price#80, c_customer_sk#85] +Output [3]: [ss_quantity#66, ss_sales_price#67, c_customer_sk#71] +Input [4]: [ss_customer_sk#65, ss_quantity#66, ss_sales_price#67, c_customer_sk#71] (152) HashAggregate [codegen id : 6] -Input [3]: [ss_quantity#79, ss_sales_price#80, c_customer_sk#85] -Keys [1]: [c_customer_sk#85] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#79 as decimal(12,2))) * promote_precision(cast(ss_sales_price#80 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#86, isEmpty#87] -Results [3]: [c_customer_sk#85, sum#88, isEmpty#89] +Input [3]: [ss_quantity#66, ss_sales_price#67, c_customer_sk#71] +Keys [1]: [c_customer_sk#71] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#66 as decimal(12,2))) * promote_precision(cast(ss_sales_price#67 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#72, isEmpty#73] +Results [3]: [c_customer_sk#71, sum#74, isEmpty#75] (153) HashAggregate [codegen id : 6] -Input [3]: [c_customer_sk#85, sum#88, isEmpty#89] -Keys [1]: [c_customer_sk#85] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#79 as decimal(12,2))) * promote_precision(cast(ss_sales_price#80 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#79 as decimal(12,2))) * promote_precision(cast(ss_sales_price#80 as decimal(12,2)))), DecimalType(18,2)))#90] -Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#79 as decimal(12,2))) * promote_precision(cast(ss_sales_price#80 as decimal(12,2)))), DecimalType(18,2)))#90 AS csales#91] +Input [3]: [c_customer_sk#71, sum#74, isEmpty#75] +Keys [1]: [c_customer_sk#71] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#66 as decimal(12,2))) * promote_precision(cast(ss_sales_price#67 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#66 as decimal(12,2))) * promote_precision(cast(ss_sales_price#67 as decimal(12,2)))), DecimalType(18,2)))#76] +Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#66 as decimal(12,2))) * promote_precision(cast(ss_sales_price#67 as decimal(12,2)))), DecimalType(18,2)))#76 AS csales#77] (154) HashAggregate [codegen id : 6] -Input [1]: [csales#91] +Input [1]: [csales#77] Keys: [] -Functions [1]: [partial_max(csales#91)] -Aggregate Attributes [1]: [max#92] -Results [1]: [max#93] +Functions [1]: [partial_max(csales#77)] +Aggregate Attributes [1]: [max#78] +Results [1]: [max#79] (155) Exchange -Input [1]: [max#93] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#94] +Input [1]: [max#79] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15] (156) HashAggregate [codegen id : 7] -Input [1]: [max#93] +Input [1]: [max#79] Keys: [] -Functions [1]: [max(csales#91)] -Aggregate Attributes [1]: [max(csales#91)#95] -Results [1]: [max(csales#91)#95 AS tpcds_cmax#96] +Functions [1]: [max(csales#77)] +Aggregate Attributes [1]: [max(csales#77)#80] +Results [1]: [max(csales#77)#80 AS tpcds_cmax#81] -Subquery:4 Hosting operator id = 140 Hosting Expression = ss_sold_date_sk#81 IN dynamicpruning#82 +Subquery:4 Hosting operator id = 140 Hosting Expression = ss_sold_date_sk#68 IN dynamicpruning#69 BroadcastExchange (161) +- * Project (160) +- * Filter (159) @@ -888,33 +888,33 @@ BroadcastExchange (161) (157) Scan parquet default.date_dim -Output [2]: [d_date_sk#83, d_year#97] +Output [2]: [d_date_sk#70, d_year#82] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct (158) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#83, d_year#97] +Input [2]: [d_date_sk#70, d_year#82] (159) Filter [codegen id : 1] -Input [2]: [d_date_sk#83, d_year#97] -Condition : (d_year#97 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#83)) +Input [2]: [d_date_sk#70, d_year#82] +Condition : (d_year#82 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#70)) (160) Project [codegen id : 1] -Output [1]: [d_date_sk#83] -Input [2]: [d_date_sk#83, d_year#97] +Output [1]: [d_date_sk#70] +Input [2]: [d_date_sk#70, d_year#82] (161) BroadcastExchange -Input [1]: [d_date_sk#83] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#98] +Input [1]: [d_date_sk#70] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=16] -Subquery:5 Hosting operator id = 65 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38] +Subquery:5 Hosting operator id = 65 Hosting Expression = ReusedSubquery Subquery scalar-subquery#31, [id=#32] -Subquery:6 Hosting operator id = 74 Hosting Expression = ws_sold_date_sk#55 IN dynamicpruning#6 +Subquery:6 Hosting operator id = 74 Hosting Expression = ws_sold_date_sk#47 IN dynamicpruning#6 -Subquery:7 Hosting operator id = 102 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38] +Subquery:7 Hosting operator id = 102 Hosting Expression = ReusedSubquery Subquery scalar-subquery#31, [id=#32] -Subquery:8 Hosting operator id = 119 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38] +Subquery:8 Hosting operator id = 119 Hosting Expression = ReusedSubquery Subquery scalar-subquery#31, [id=#32] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt index bea457e24dca9..a688a3d70f2a0 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt @@ -146,7 +146,7 @@ Condition : isnotnull(i_item_sk#12) (13) BroadcastExchange Input [2]: [i_item_sk#12, i_item_desc#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (14) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#7] @@ -154,42 +154,42 @@ Right keys [1]: [i_item_sk#12] Join condition: None (15) Project [codegen id : 3] -Output [3]: [d_date#11, i_item_sk#12, substr(i_item_desc#13, 1, 30) AS _groupingexpression#15] +Output [3]: [d_date#11, i_item_sk#12, substr(i_item_desc#13, 1, 30) AS _groupingexpression#14] Input [4]: [ss_item_sk#7, d_date#11, i_item_sk#12, i_item_desc#13] (16) HashAggregate [codegen id : 3] -Input [3]: [d_date#11, i_item_sk#12, _groupingexpression#15] -Keys [3]: [_groupingexpression#15, i_item_sk#12, d_date#11] +Input [3]: [d_date#11, i_item_sk#12, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, i_item_sk#12, d_date#11] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#16] -Results [4]: [_groupingexpression#15, i_item_sk#12, d_date#11, count#17] +Aggregate Attributes [1]: [count#15] +Results [4]: [_groupingexpression#14, i_item_sk#12, d_date#11, count#16] (17) Exchange -Input [4]: [_groupingexpression#15, i_item_sk#12, d_date#11, count#17] -Arguments: hashpartitioning(_groupingexpression#15, i_item_sk#12, d_date#11, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [4]: [_groupingexpression#14, i_item_sk#12, d_date#11, count#16] +Arguments: hashpartitioning(_groupingexpression#14, i_item_sk#12, d_date#11, 5), ENSURE_REQUIREMENTS, [plan_id=2] (18) HashAggregate [codegen id : 4] -Input [4]: [_groupingexpression#15, i_item_sk#12, d_date#11, count#17] -Keys [3]: [_groupingexpression#15, i_item_sk#12, d_date#11] +Input [4]: [_groupingexpression#14, i_item_sk#12, d_date#11, count#16] +Keys [3]: [_groupingexpression#14, i_item_sk#12, d_date#11] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#19] -Results [2]: [i_item_sk#12 AS item_sk#20, count(1)#19 AS cnt#21] +Aggregate Attributes [1]: [count(1)#17] +Results [2]: [i_item_sk#12 AS item_sk#18, count(1)#17 AS cnt#19] (19) Filter [codegen id : 4] -Input [2]: [item_sk#20, cnt#21] -Condition : (cnt#21 > 4) +Input [2]: [item_sk#18, cnt#19] +Condition : (cnt#19 > 4) (20) Project [codegen id : 4] -Output [1]: [item_sk#20] -Input [2]: [item_sk#20, cnt#21] +Output [1]: [item_sk#18] +Input [2]: [item_sk#18, cnt#19] (21) BroadcastExchange -Input [1]: [item_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Input [1]: [item_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (22) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_item_sk#2] -Right keys [1]: [item_sk#20] +Right keys [1]: [item_sk#18] Join condition: None (23) Project [codegen id : 5] @@ -198,293 +198,293 @@ Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, (24) Exchange Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] -Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#23] +Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4] (25) Sort [codegen id : 6] Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5] Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0 (26) Scan parquet default.store_sales -Output [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, ss_sold_date_sk#27] +Output [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 8] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, ss_sold_date_sk#27] +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23] (28) Filter [codegen id : 8] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, ss_sold_date_sk#27] -Condition : isnotnull(ss_customer_sk#24) +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23] +Condition : isnotnull(ss_customer_sk#20) (29) Project [codegen id : 8] -Output [3]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, ss_sold_date_sk#27] +Output [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22] +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23] (30) Scan parquet default.customer -Output [1]: [c_customer_sk#28] +Output [1]: [c_customer_sk#24] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 7] -Input [1]: [c_customer_sk#28] +Input [1]: [c_customer_sk#24] (32) Filter [codegen id : 7] -Input [1]: [c_customer_sk#28] -Condition : isnotnull(c_customer_sk#28) +Input [1]: [c_customer_sk#24] +Condition : isnotnull(c_customer_sk#24) (33) BroadcastExchange -Input [1]: [c_customer_sk#28] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#29] +Input [1]: [c_customer_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (34) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_customer_sk#24] -Right keys [1]: [c_customer_sk#28] +Left keys [1]: [ss_customer_sk#20] +Right keys [1]: [c_customer_sk#24] Join condition: None (35) Project [codegen id : 8] -Output [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28] -Input [4]: [ss_customer_sk#24, ss_quantity#25, ss_sales_price#26, c_customer_sk#28] +Output [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24] +Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, c_customer_sk#24] (36) HashAggregate [codegen id : 8] -Input [3]: [ss_quantity#25, ss_sales_price#26, c_customer_sk#28] -Keys [1]: [c_customer_sk#28] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#30, isEmpty#31] -Results [3]: [c_customer_sk#28, sum#32, isEmpty#33] +Input [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24] +Keys [1]: [c_customer_sk#24] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#25, isEmpty#26] +Results [3]: [c_customer_sk#24, sum#27, isEmpty#28] (37) Exchange -Input [3]: [c_customer_sk#28, sum#32, isEmpty#33] -Arguments: hashpartitioning(c_customer_sk#28, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [3]: [c_customer_sk#24, sum#27, isEmpty#28] +Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [plan_id=6] (38) HashAggregate [codegen id : 9] -Input [3]: [c_customer_sk#28, sum#32, isEmpty#33] -Keys [1]: [c_customer_sk#28] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35] -Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36] +Input [3]: [c_customer_sk#24, sum#27, isEmpty#28] +Keys [1]: [c_customer_sk#24] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29] +Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30] (39) Filter [codegen id : 9] -Input [2]: [c_customer_sk#28, ssales#36] -Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8)))) +Input [2]: [c_customer_sk#24, ssales#30] +Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8)))) (40) Project [codegen id : 9] -Output [1]: [c_customer_sk#28] -Input [2]: [c_customer_sk#28, ssales#36] +Output [1]: [c_customer_sk#24] +Input [2]: [c_customer_sk#24, ssales#30] (41) Sort [codegen id : 9] -Input [1]: [c_customer_sk#28] -Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (42) SortMergeJoin [codegen id : 17] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#28] +Right keys [1]: [c_customer_sk#24] Join condition: None (43) Scan parquet default.customer -Output [3]: [c_customer_sk#39, c_first_name#40, c_last_name#41] +Output [3]: [c_customer_sk#33, c_first_name#34, c_last_name#35] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 10] -Input [3]: [c_customer_sk#39, c_first_name#40, c_last_name#41] +Input [3]: [c_customer_sk#33, c_first_name#34, c_last_name#35] (45) Filter [codegen id : 10] -Input [3]: [c_customer_sk#39, c_first_name#40, c_last_name#41] -Condition : isnotnull(c_customer_sk#39) +Input [3]: [c_customer_sk#33, c_first_name#34, c_last_name#35] +Condition : isnotnull(c_customer_sk#33) (46) Exchange -Input [3]: [c_customer_sk#39, c_first_name#40, c_last_name#41] -Arguments: hashpartitioning(c_customer_sk#39, 5), ENSURE_REQUIREMENTS, [id=#42] +Input [3]: [c_customer_sk#33, c_first_name#34, c_last_name#35] +Arguments: hashpartitioning(c_customer_sk#33, 5), ENSURE_REQUIREMENTS, [plan_id=7] (47) Sort [codegen id : 11] -Input [3]: [c_customer_sk#39, c_first_name#40, c_last_name#41] -Arguments: [c_customer_sk#39 ASC NULLS FIRST], false, 0 +Input [3]: [c_customer_sk#33, c_first_name#34, c_last_name#35] +Arguments: [c_customer_sk#33 ASC NULLS FIRST], false, 0 (48) ReusedExchange [Reuses operator id: 37] -Output [3]: [c_customer_sk#28, sum#32, isEmpty#33] +Output [3]: [c_customer_sk#24, sum#27, isEmpty#28] (49) HashAggregate [codegen id : 14] -Input [3]: [c_customer_sk#28, sum#32, isEmpty#33] -Keys [1]: [c_customer_sk#28] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35] -Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36] +Input [3]: [c_customer_sk#24, sum#27, isEmpty#28] +Keys [1]: [c_customer_sk#24] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29] +Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30] (50) Filter [codegen id : 14] -Input [2]: [c_customer_sk#28, ssales#36] -Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8)))) +Input [2]: [c_customer_sk#24, ssales#30] +Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8)))) (51) Project [codegen id : 14] -Output [1]: [c_customer_sk#28] -Input [2]: [c_customer_sk#28, ssales#36] +Output [1]: [c_customer_sk#24] +Input [2]: [c_customer_sk#24, ssales#30] (52) Sort [codegen id : 14] -Input [1]: [c_customer_sk#28] -Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (53) SortMergeJoin [codegen id : 15] -Left keys [1]: [c_customer_sk#39] -Right keys [1]: [c_customer_sk#28] +Left keys [1]: [c_customer_sk#33] +Right keys [1]: [c_customer_sk#24] Join condition: None (54) BroadcastExchange -Input [3]: [c_customer_sk#39, c_first_name#40, c_last_name#41] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#43] +Input [3]: [c_customer_sk#33, c_first_name#34, c_last_name#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] (55) BroadcastHashJoin [codegen id : 17] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#39] +Right keys [1]: [c_customer_sk#33] Join condition: None (56) Project [codegen id : 17] -Output [5]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#40, c_last_name#41] -Input [7]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_customer_sk#39, c_first_name#40, c_last_name#41] +Output [5]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#34, c_last_name#35] +Input [7]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_customer_sk#33, c_first_name#34, c_last_name#35] (57) ReusedExchange [Reuses operator id: 92] -Output [1]: [d_date_sk#44] +Output [1]: [d_date_sk#36] (58) BroadcastHashJoin [codegen id : 17] Left keys [1]: [cs_sold_date_sk#5] -Right keys [1]: [d_date_sk#44] +Right keys [1]: [d_date_sk#36] Join condition: None (59) Project [codegen id : 17] -Output [4]: [cs_quantity#3, cs_list_price#4, c_first_name#40, c_last_name#41] -Input [6]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#40, c_last_name#41, d_date_sk#44] +Output [4]: [cs_quantity#3, cs_list_price#4, c_first_name#34, c_last_name#35] +Input [6]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#34, c_last_name#35, d_date_sk#36] (60) HashAggregate [codegen id : 17] -Input [4]: [cs_quantity#3, cs_list_price#4, c_first_name#40, c_last_name#41] -Keys [2]: [c_last_name#41, c_first_name#40] +Input [4]: [cs_quantity#3, cs_list_price#4, c_first_name#34, c_last_name#35] +Keys [2]: [c_last_name#35, c_first_name#34] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#45, isEmpty#46] -Results [4]: [c_last_name#41, c_first_name#40, sum#47, isEmpty#48] +Aggregate Attributes [2]: [sum#37, isEmpty#38] +Results [4]: [c_last_name#35, c_first_name#34, sum#39, isEmpty#40] (61) Exchange -Input [4]: [c_last_name#41, c_first_name#40, sum#47, isEmpty#48] -Arguments: hashpartitioning(c_last_name#41, c_first_name#40, 5), ENSURE_REQUIREMENTS, [id=#49] +Input [4]: [c_last_name#35, c_first_name#34, sum#39, isEmpty#40] +Arguments: hashpartitioning(c_last_name#35, c_first_name#34, 5), ENSURE_REQUIREMENTS, [plan_id=9] (62) HashAggregate [codegen id : 18] -Input [4]: [c_last_name#41, c_first_name#40, sum#47, isEmpty#48] -Keys [2]: [c_last_name#41, c_first_name#40] +Input [4]: [c_last_name#35, c_first_name#34, sum#39, isEmpty#40] +Keys [2]: [c_last_name#35, c_first_name#34] Functions [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))#50] -Results [3]: [c_last_name#41, c_first_name#40, sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))#50 AS sales#51] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))#41] +Results [3]: [c_last_name#35, c_first_name#34, sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))#41 AS sales#42] (63) Scan parquet default.web_sales -Output [5]: [ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56] +Output [5]: [ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#56), dynamicpruningexpression(ws_sold_date_sk#56 IN dynamicpruning#6)] +PartitionFilters: [isnotnull(ws_sold_date_sk#47), dynamicpruningexpression(ws_sold_date_sk#47 IN dynamicpruning#6)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (64) ColumnarToRow [codegen id : 23] -Input [5]: [ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56] +Input [5]: [ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47] (65) Filter [codegen id : 23] -Input [5]: [ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56] -Condition : isnotnull(ws_bill_customer_sk#53) +Input [5]: [ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47] +Condition : isnotnull(ws_bill_customer_sk#44) (66) ReusedExchange [Reuses operator id: 21] -Output [1]: [item_sk#20] +Output [1]: [item_sk#18] (67) BroadcastHashJoin [codegen id : 23] -Left keys [1]: [ws_item_sk#52] -Right keys [1]: [item_sk#20] +Left keys [1]: [ws_item_sk#43] +Right keys [1]: [item_sk#18] Join condition: None (68) Project [codegen id : 23] -Output [4]: [ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56] -Input [5]: [ws_item_sk#52, ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56] +Output [4]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47] +Input [5]: [ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47] (69) Exchange -Input [4]: [ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56] -Arguments: hashpartitioning(ws_bill_customer_sk#53, 5), ENSURE_REQUIREMENTS, [id=#57] +Input [4]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47] +Arguments: hashpartitioning(ws_bill_customer_sk#44, 5), ENSURE_REQUIREMENTS, [plan_id=10] (70) Sort [codegen id : 24] -Input [4]: [ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56] -Arguments: [ws_bill_customer_sk#53 ASC NULLS FIRST], false, 0 +Input [4]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47] +Arguments: [ws_bill_customer_sk#44 ASC NULLS FIRST], false, 0 (71) ReusedExchange [Reuses operator id: 37] -Output [3]: [c_customer_sk#28, sum#58, isEmpty#59] +Output [3]: [c_customer_sk#24, sum#48, isEmpty#49] (72) HashAggregate [codegen id : 27] -Input [3]: [c_customer_sk#28, sum#58, isEmpty#59] -Keys [1]: [c_customer_sk#28] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35] -Results [2]: [c_customer_sk#28, sum(CheckOverflow((promote_precision(cast(ss_quantity#25 as decimal(12,2))) * promote_precision(cast(ss_sales_price#26 as decimal(12,2)))), DecimalType(18,2)))#35 AS ssales#36] +Input [3]: [c_customer_sk#24, sum#48, isEmpty#49] +Keys [1]: [c_customer_sk#24] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29] +Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30] (73) Filter [codegen id : 27] -Input [2]: [c_customer_sk#28, ssales#36] -Condition : (isnotnull(ssales#36) AND (cast(ssales#36 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#37, [id=#38] as decimal(32,6)))), DecimalType(38,8)))) +Input [2]: [c_customer_sk#24, ssales#30] +Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8)))) (74) Project [codegen id : 27] -Output [1]: [c_customer_sk#28] -Input [2]: [c_customer_sk#28, ssales#36] +Output [1]: [c_customer_sk#24] +Input [2]: [c_customer_sk#24, ssales#30] (75) Sort [codegen id : 27] -Input [1]: [c_customer_sk#28] -Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#24] +Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 (76) SortMergeJoin [codegen id : 35] -Left keys [1]: [ws_bill_customer_sk#53] -Right keys [1]: [c_customer_sk#28] +Left keys [1]: [ws_bill_customer_sk#44] +Right keys [1]: [c_customer_sk#24] Join condition: None (77) ReusedExchange [Reuses operator id: 54] -Output [3]: [c_customer_sk#60, c_first_name#61, c_last_name#62] +Output [3]: [c_customer_sk#50, c_first_name#51, c_last_name#52] (78) BroadcastHashJoin [codegen id : 35] -Left keys [1]: [ws_bill_customer_sk#53] -Right keys [1]: [c_customer_sk#60] +Left keys [1]: [ws_bill_customer_sk#44] +Right keys [1]: [c_customer_sk#50] Join condition: None (79) Project [codegen id : 35] -Output [5]: [ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56, c_first_name#61, c_last_name#62] -Input [7]: [ws_bill_customer_sk#53, ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56, c_customer_sk#60, c_first_name#61, c_last_name#62] +Output [5]: [ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47, c_first_name#51, c_last_name#52] +Input [7]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47, c_customer_sk#50, c_first_name#51, c_last_name#52] (80) ReusedExchange [Reuses operator id: 92] -Output [1]: [d_date_sk#63] +Output [1]: [d_date_sk#53] (81) BroadcastHashJoin [codegen id : 35] -Left keys [1]: [ws_sold_date_sk#56] -Right keys [1]: [d_date_sk#63] +Left keys [1]: [ws_sold_date_sk#47] +Right keys [1]: [d_date_sk#53] Join condition: None (82) Project [codegen id : 35] -Output [4]: [ws_quantity#54, ws_list_price#55, c_first_name#61, c_last_name#62] -Input [6]: [ws_quantity#54, ws_list_price#55, ws_sold_date_sk#56, c_first_name#61, c_last_name#62, d_date_sk#63] +Output [4]: [ws_quantity#45, ws_list_price#46, c_first_name#51, c_last_name#52] +Input [6]: [ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47, c_first_name#51, c_last_name#52, d_date_sk#53] (83) HashAggregate [codegen id : 35] -Input [4]: [ws_quantity#54, ws_list_price#55, c_first_name#61, c_last_name#62] -Keys [2]: [c_last_name#62, c_first_name#61] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#54 as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#64, isEmpty#65] -Results [4]: [c_last_name#62, c_first_name#61, sum#66, isEmpty#67] +Input [4]: [ws_quantity#45, ws_list_price#46, c_first_name#51, c_last_name#52] +Keys [2]: [c_last_name#52, c_first_name#51] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#45 as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#54, isEmpty#55] +Results [4]: [c_last_name#52, c_first_name#51, sum#56, isEmpty#57] (84) Exchange -Input [4]: [c_last_name#62, c_first_name#61, sum#66, isEmpty#67] -Arguments: hashpartitioning(c_last_name#62, c_first_name#61, 5), ENSURE_REQUIREMENTS, [id=#68] +Input [4]: [c_last_name#52, c_first_name#51, sum#56, isEmpty#57] +Arguments: hashpartitioning(c_last_name#52, c_first_name#51, 5), ENSURE_REQUIREMENTS, [plan_id=11] (85) HashAggregate [codegen id : 36] -Input [4]: [c_last_name#62, c_first_name#61, sum#66, isEmpty#67] -Keys [2]: [c_last_name#62, c_first_name#61] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#54 as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#54 as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2)))#69] -Results [3]: [c_last_name#62, c_first_name#61, sum(CheckOverflow((promote_precision(cast(ws_quantity#54 as decimal(12,2))) * promote_precision(cast(ws_list_price#55 as decimal(12,2)))), DecimalType(18,2)))#69 AS sales#70] +Input [4]: [c_last_name#52, c_first_name#51, sum#56, isEmpty#57] +Keys [2]: [c_last_name#52, c_first_name#51] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#45 as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#45 as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2)))#58] +Results [3]: [c_last_name#52, c_first_name#51, sum(CheckOverflow((promote_precision(cast(ws_quantity#45 as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2)))#58 AS sales#59] (86) Union (87) TakeOrderedAndProject -Input [3]: [c_last_name#41, c_first_name#40, sales#51] -Arguments: 100, [c_last_name#41 ASC NULLS FIRST, c_first_name#40 ASC NULLS FIRST, sales#51 ASC NULLS FIRST], [c_last_name#41, c_first_name#40, sales#51] +Input [3]: [c_last_name#35, c_first_name#34, sales#42] +Arguments: 100, [c_last_name#35 ASC NULLS FIRST, c_first_name#34 ASC NULLS FIRST, sales#42 ASC NULLS FIRST], [c_last_name#35, c_first_name#34, sales#42] ===== Subqueries ===== @@ -497,26 +497,26 @@ BroadcastExchange (92) (88) Scan parquet default.date_dim -Output [3]: [d_date_sk#44, d_year#71, d_moy#72] +Output [3]: [d_date_sk#36, d_year#60, d_moy#61] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)] ReadSchema: struct (89) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#44, d_year#71, d_moy#72] +Input [3]: [d_date_sk#36, d_year#60, d_moy#61] (90) Filter [codegen id : 1] -Input [3]: [d_date_sk#44, d_year#71, d_moy#72] -Condition : ((((isnotnull(d_year#71) AND isnotnull(d_moy#72)) AND (d_year#71 = 2000)) AND (d_moy#72 = 2)) AND isnotnull(d_date_sk#44)) +Input [3]: [d_date_sk#36, d_year#60, d_moy#61] +Condition : ((((isnotnull(d_year#60) AND isnotnull(d_moy#61)) AND (d_year#60 = 2000)) AND (d_moy#61 = 2)) AND isnotnull(d_date_sk#36)) (91) Project [codegen id : 1] -Output [1]: [d_date_sk#44] -Input [3]: [d_date_sk#44, d_year#71, d_moy#72] +Output [1]: [d_date_sk#36] +Input [3]: [d_date_sk#36, d_year#60, d_moy#61] (92) BroadcastExchange -Input [1]: [d_date_sk#44] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#73] +Input [1]: [d_date_sk#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] Subquery:2 Hosting operator id = 4 Hosting Expression = ss_sold_date_sk#8 IN dynamicpruning#9 BroadcastExchange (97) @@ -527,28 +527,28 @@ BroadcastExchange (97) (93) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_date#11, d_year#74] +Output [3]: [d_date_sk#10, d_date#11, d_year#62] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct (94) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_date#11, d_year#74] +Input [3]: [d_date_sk#10, d_date#11, d_year#62] (95) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_date#11, d_year#74] -Condition : (d_year#74 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#10, d_date#11, d_year#62] +Condition : (d_year#62 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10)) (96) Project [codegen id : 1] Output [2]: [d_date_sk#10, d_date#11] -Input [3]: [d_date_sk#10, d_date#11, d_year#74] +Input [3]: [d_date_sk#10, d_date#11, d_year#62] (97) BroadcastExchange Input [2]: [d_date_sk#10, d_date#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#75] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=13] -Subquery:3 Hosting operator id = 39 Hosting Expression = Subquery scalar-subquery#37, [id=#38] +Subquery:3 Hosting operator id = 39 Hosting Expression = Subquery scalar-subquery#31, [id=#32] * HashAggregate (112) +- Exchange (111) +- * HashAggregate (110) @@ -567,81 +567,81 @@ Subquery:3 Hosting operator id = 39 Hosting Expression = Subquery scalar-subquer (98) Scan parquet default.store_sales -Output [4]: [ss_customer_sk#76, ss_quantity#77, ss_sales_price#78, ss_sold_date_sk#79] +Output [4]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#79), dynamicpruningexpression(ss_sold_date_sk#79 IN dynamicpruning#80)] +PartitionFilters: [isnotnull(ss_sold_date_sk#66), dynamicpruningexpression(ss_sold_date_sk#66 IN dynamicpruning#67)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (99) ColumnarToRow [codegen id : 3] -Input [4]: [ss_customer_sk#76, ss_quantity#77, ss_sales_price#78, ss_sold_date_sk#79] +Input [4]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66] (100) Filter [codegen id : 3] -Input [4]: [ss_customer_sk#76, ss_quantity#77, ss_sales_price#78, ss_sold_date_sk#79] -Condition : isnotnull(ss_customer_sk#76) +Input [4]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66] +Condition : isnotnull(ss_customer_sk#63) (101) ReusedExchange [Reuses operator id: 33] -Output [1]: [c_customer_sk#81] +Output [1]: [c_customer_sk#68] (102) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_customer_sk#76] -Right keys [1]: [c_customer_sk#81] +Left keys [1]: [ss_customer_sk#63] +Right keys [1]: [c_customer_sk#68] Join condition: None (103) Project [codegen id : 3] -Output [4]: [ss_quantity#77, ss_sales_price#78, ss_sold_date_sk#79, c_customer_sk#81] -Input [5]: [ss_customer_sk#76, ss_quantity#77, ss_sales_price#78, ss_sold_date_sk#79, c_customer_sk#81] +Output [4]: [ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66, c_customer_sk#68] +Input [5]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66, c_customer_sk#68] (104) ReusedExchange [Reuses operator id: 117] -Output [1]: [d_date_sk#82] +Output [1]: [d_date_sk#69] (105) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_sold_date_sk#79] -Right keys [1]: [d_date_sk#82] +Left keys [1]: [ss_sold_date_sk#66] +Right keys [1]: [d_date_sk#69] Join condition: None (106) Project [codegen id : 3] -Output [3]: [ss_quantity#77, ss_sales_price#78, c_customer_sk#81] -Input [5]: [ss_quantity#77, ss_sales_price#78, ss_sold_date_sk#79, c_customer_sk#81, d_date_sk#82] +Output [3]: [ss_quantity#64, ss_sales_price#65, c_customer_sk#68] +Input [5]: [ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66, c_customer_sk#68, d_date_sk#69] (107) HashAggregate [codegen id : 3] -Input [3]: [ss_quantity#77, ss_sales_price#78, c_customer_sk#81] -Keys [1]: [c_customer_sk#81] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#77 as decimal(12,2))) * promote_precision(cast(ss_sales_price#78 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#83, isEmpty#84] -Results [3]: [c_customer_sk#81, sum#85, isEmpty#86] +Input [3]: [ss_quantity#64, ss_sales_price#65, c_customer_sk#68] +Keys [1]: [c_customer_sk#68] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#70, isEmpty#71] +Results [3]: [c_customer_sk#68, sum#72, isEmpty#73] (108) Exchange -Input [3]: [c_customer_sk#81, sum#85, isEmpty#86] -Arguments: hashpartitioning(c_customer_sk#81, 5), ENSURE_REQUIREMENTS, [id=#87] +Input [3]: [c_customer_sk#68, sum#72, isEmpty#73] +Arguments: hashpartitioning(c_customer_sk#68, 5), ENSURE_REQUIREMENTS, [plan_id=14] (109) HashAggregate [codegen id : 4] -Input [3]: [c_customer_sk#81, sum#85, isEmpty#86] -Keys [1]: [c_customer_sk#81] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#77 as decimal(12,2))) * promote_precision(cast(ss_sales_price#78 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#77 as decimal(12,2))) * promote_precision(cast(ss_sales_price#78 as decimal(12,2)))), DecimalType(18,2)))#88] -Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#77 as decimal(12,2))) * promote_precision(cast(ss_sales_price#78 as decimal(12,2)))), DecimalType(18,2)))#88 AS csales#89] +Input [3]: [c_customer_sk#68, sum#72, isEmpty#73] +Keys [1]: [c_customer_sk#68] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2)))#74] +Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2)))#74 AS csales#75] (110) HashAggregate [codegen id : 4] -Input [1]: [csales#89] +Input [1]: [csales#75] Keys: [] -Functions [1]: [partial_max(csales#89)] -Aggregate Attributes [1]: [max#90] -Results [1]: [max#91] +Functions [1]: [partial_max(csales#75)] +Aggregate Attributes [1]: [max#76] +Results [1]: [max#77] (111) Exchange -Input [1]: [max#91] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#92] +Input [1]: [max#77] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15] (112) HashAggregate [codegen id : 5] -Input [1]: [max#91] +Input [1]: [max#77] Keys: [] -Functions [1]: [max(csales#89)] -Aggregate Attributes [1]: [max(csales#89)#93] -Results [1]: [max(csales#89)#93 AS tpcds_cmax#94] +Functions [1]: [max(csales#75)] +Aggregate Attributes [1]: [max(csales#75)#78] +Results [1]: [max(csales#75)#78 AS tpcds_cmax#79] -Subquery:4 Hosting operator id = 98 Hosting Expression = ss_sold_date_sk#79 IN dynamicpruning#80 +Subquery:4 Hosting operator id = 98 Hosting Expression = ss_sold_date_sk#66 IN dynamicpruning#67 BroadcastExchange (117) +- * Project (116) +- * Filter (115) @@ -650,31 +650,31 @@ BroadcastExchange (117) (113) Scan parquet default.date_dim -Output [2]: [d_date_sk#82, d_year#95] +Output [2]: [d_date_sk#69, d_year#80] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)] ReadSchema: struct (114) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#82, d_year#95] +Input [2]: [d_date_sk#69, d_year#80] (115) Filter [codegen id : 1] -Input [2]: [d_date_sk#82, d_year#95] -Condition : (d_year#95 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#82)) +Input [2]: [d_date_sk#69, d_year#80] +Condition : (d_year#80 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#69)) (116) Project [codegen id : 1] -Output [1]: [d_date_sk#82] -Input [2]: [d_date_sk#82, d_year#95] +Output [1]: [d_date_sk#69] +Input [2]: [d_date_sk#69, d_year#80] (117) BroadcastExchange -Input [1]: [d_date_sk#82] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#96] +Input [1]: [d_date_sk#69] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=16] -Subquery:5 Hosting operator id = 50 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38] +Subquery:5 Hosting operator id = 50 Hosting Expression = ReusedSubquery Subquery scalar-subquery#31, [id=#32] -Subquery:6 Hosting operator id = 63 Hosting Expression = ws_sold_date_sk#56 IN dynamicpruning#6 +Subquery:6 Hosting operator id = 63 Hosting Expression = ws_sold_date_sk#47 IN dynamicpruning#6 -Subquery:7 Hosting operator id = 73 Hosting Expression = ReusedSubquery Subquery scalar-subquery#37, [id=#38] +Subquery:7 Hosting operator id = 73 Hosting Expression = ReusedSubquery Subquery scalar-subquery#31, [id=#32] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt index 7b82aed515f39..5ee962f66e3b7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt @@ -84,7 +84,7 @@ Condition : ((isnotnull(i_color#10) AND (i_color#10 = pale )) AND (8) BroadcastExchange Input [6]: [i_item_sk#7, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 2] Left keys [1]: [ss_item_sk#1] @@ -97,187 +97,187 @@ Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, (11) Exchange Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] -Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#14] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) Sort [codegen id : 3] Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 (13) Scan parquet default.customer -Output [4]: [c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] +Output [4]: [c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] ReadSchema: struct (14) ColumnarToRow [codegen id : 4] -Input [4]: [c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] +Input [4]: [c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] (15) Filter [codegen id : 4] -Input [4]: [c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] -Condition : (isnotnull(c_customer_sk#15) AND isnotnull(c_birth_country#18)) +Input [4]: [c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] +Condition : (isnotnull(c_customer_sk#13) AND isnotnull(c_birth_country#16)) (16) Exchange -Input [4]: [c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] -Arguments: hashpartitioning(c_customer_sk#15, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [4]: [c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] +Arguments: hashpartitioning(c_customer_sk#13, 5), ENSURE_REQUIREMENTS, [plan_id=3] (17) Sort [codegen id : 5] -Input [4]: [c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] -Arguments: [c_customer_sk#15 ASC NULLS FIRST], false, 0 +Input [4]: [c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] +Arguments: [c_customer_sk#13 ASC NULLS FIRST], false, 0 (18) SortMergeJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#15] +Right keys [1]: [c_customer_sk#13] Join condition: None (19) Project [codegen id : 6] -Output [12]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] -Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] +Output [12]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] (20) Exchange -Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] -Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4] (21) Sort [codegen id : 7] -Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 (22) Scan parquet default.store_returns -Output [3]: [sr_item_sk#21, sr_ticket_number#22, sr_returned_date_sk#23] +Output [3]: [sr_item_sk#17, sr_ticket_number#18, sr_returned_date_sk#19] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 8] -Input [3]: [sr_item_sk#21, sr_ticket_number#22, sr_returned_date_sk#23] +Input [3]: [sr_item_sk#17, sr_ticket_number#18, sr_returned_date_sk#19] (24) Filter [codegen id : 8] -Input [3]: [sr_item_sk#21, sr_ticket_number#22, sr_returned_date_sk#23] -Condition : (isnotnull(sr_ticket_number#22) AND isnotnull(sr_item_sk#21)) +Input [3]: [sr_item_sk#17, sr_ticket_number#18, sr_returned_date_sk#19] +Condition : (isnotnull(sr_ticket_number#18) AND isnotnull(sr_item_sk#17)) (25) Project [codegen id : 8] -Output [2]: [sr_item_sk#21, sr_ticket_number#22] -Input [3]: [sr_item_sk#21, sr_ticket_number#22, sr_returned_date_sk#23] +Output [2]: [sr_item_sk#17, sr_ticket_number#18] +Input [3]: [sr_item_sk#17, sr_ticket_number#18, sr_returned_date_sk#19] (26) Exchange -Input [2]: [sr_item_sk#21, sr_ticket_number#22] -Arguments: hashpartitioning(sr_ticket_number#22, sr_item_sk#21, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [2]: [sr_item_sk#17, sr_ticket_number#18] +Arguments: hashpartitioning(sr_ticket_number#18, sr_item_sk#17, 5), ENSURE_REQUIREMENTS, [plan_id=5] (27) Sort [codegen id : 9] -Input [2]: [sr_item_sk#21, sr_ticket_number#22] -Arguments: [sr_ticket_number#22 ASC NULLS FIRST, sr_item_sk#21 ASC NULLS FIRST], false, 0 +Input [2]: [sr_item_sk#17, sr_ticket_number#18] +Arguments: [sr_ticket_number#18 ASC NULLS FIRST, sr_item_sk#17 ASC NULLS FIRST], false, 0 (28) SortMergeJoin [codegen id : 12] Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] -Right keys [2]: [sr_ticket_number#22, sr_item_sk#21] +Right keys [2]: [sr_ticket_number#18, sr_item_sk#17] Join condition: None (29) Project [codegen id : 12] -Output [10]: [ss_store_sk#3, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] -Input [14]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18, sr_item_sk#21, sr_ticket_number#22] +Output [10]: [ss_store_sk#3, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] +Input [14]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16, sr_item_sk#17, sr_ticket_number#18] (30) Scan parquet default.store -Output [5]: [s_store_sk#25, s_store_name#26, s_market_id#27, s_state#28, s_zip#29] +Output [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] ReadSchema: struct (31) ColumnarToRow [codegen id : 10] -Input [5]: [s_store_sk#25, s_store_name#26, s_market_id#27, s_state#28, s_zip#29] +Input [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24] (32) Filter [codegen id : 10] -Input [5]: [s_store_sk#25, s_store_name#26, s_market_id#27, s_state#28, s_zip#29] -Condition : (((isnotnull(s_market_id#27) AND (s_market_id#27 = 8)) AND isnotnull(s_store_sk#25)) AND isnotnull(s_zip#29)) +Input [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24] +Condition : (((isnotnull(s_market_id#22) AND (s_market_id#22 = 8)) AND isnotnull(s_store_sk#20)) AND isnotnull(s_zip#24)) (33) Project [codegen id : 10] -Output [4]: [s_store_sk#25, s_store_name#26, s_state#28, s_zip#29] -Input [5]: [s_store_sk#25, s_store_name#26, s_market_id#27, s_state#28, s_zip#29] +Output [4]: [s_store_sk#20, s_store_name#21, s_state#23, s_zip#24] +Input [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24] (34) BroadcastExchange -Input [4]: [s_store_sk#25, s_store_name#26, s_state#28, s_zip#29] -Arguments: HashedRelationBroadcastMode(List(input[3, string, true]),false), [id=#30] +Input [4]: [s_store_sk#20, s_store_name#21, s_state#23, s_zip#24] +Arguments: HashedRelationBroadcastMode(List(input[3, string, true]),false), [plan_id=6] (35) Scan parquet default.customer_address -Output [3]: [ca_state#31, ca_zip#32, ca_country#33] +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] ReadSchema: struct (36) ColumnarToRow -Input [3]: [ca_state#31, ca_zip#32, ca_country#33] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] (37) Filter -Input [3]: [ca_state#31, ca_zip#32, ca_country#33] -Condition : (isnotnull(ca_country#33) AND isnotnull(ca_zip#32)) +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) (38) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [s_zip#29] -Right keys [1]: [ca_zip#32] +Left keys [1]: [s_zip#24] +Right keys [1]: [ca_zip#26] Join condition: None (39) Project [codegen id : 11] -Output [5]: [s_store_sk#25, s_store_name#26, s_state#28, ca_state#31, ca_country#33] -Input [7]: [s_store_sk#25, s_store_name#26, s_state#28, s_zip#29, ca_state#31, ca_zip#32, ca_country#33] +Output [5]: [s_store_sk#20, s_store_name#21, s_state#23, ca_state#25, ca_country#27] +Input [7]: [s_store_sk#20, s_store_name#21, s_state#23, s_zip#24, ca_state#25, ca_zip#26, ca_country#27] (40) BroadcastExchange -Input [5]: [s_store_sk#25, s_store_name#26, s_state#28, ca_state#31, ca_country#33] -Arguments: HashedRelationBroadcastMode(List(input[0, int, true], upper(input[4, string, true])),false), [id=#34] +Input [5]: [s_store_sk#20, s_store_name#21, s_state#23, ca_state#25, ca_country#27] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], upper(input[4, string, true])),false), [plan_id=7] (41) BroadcastHashJoin [codegen id : 12] -Left keys [2]: [ss_store_sk#3, c_birth_country#18] -Right keys [2]: [s_store_sk#25, upper(ca_country#33)] +Left keys [2]: [ss_store_sk#3, c_birth_country#16] +Right keys [2]: [s_store_sk#20, upper(ca_country#27)] Join condition: None (42) Project [codegen id : 12] -Output [11]: [ss_net_paid#5, s_store_name#26, s_state#28, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, ca_state#31] -Input [15]: [ss_store_sk#3, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18, s_store_sk#25, s_store_name#26, s_state#28, ca_state#31, ca_country#33] +Output [11]: [ss_net_paid#5, s_store_name#21, s_state#23, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, ca_state#25] +Input [15]: [ss_store_sk#3, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16, s_store_sk#20, s_store_name#21, s_state#23, ca_state#25, ca_country#27] (43) HashAggregate [codegen id : 12] -Input [11]: [ss_net_paid#5, s_store_name#26, s_state#28, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, ca_state#31] -Keys [10]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9] +Input [11]: [ss_net_paid#5, s_store_name#21, s_state#23, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, ca_state#25] +Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9] Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum#35] -Results [11]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#36] +Aggregate Attributes [1]: [sum#28] +Results [11]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#29] (44) Exchange -Input [11]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#36] -Arguments: hashpartitioning(c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, 5), ENSURE_REQUIREMENTS, [id=#37] +Input [11]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#29] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, 5), ENSURE_REQUIREMENTS, [plan_id=8] (45) HashAggregate [codegen id : 13] -Input [11]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#36] -Keys [10]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9] +Input [11]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#29] +Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9] Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#38] -Results [4]: [c_last_name#17, c_first_name#16, s_store_name#26, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#38,17,2) AS netpaid#39] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [4]: [c_last_name#15, c_first_name#14, s_store_name#21, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] (46) HashAggregate [codegen id : 13] -Input [4]: [c_last_name#17, c_first_name#16, s_store_name#26, netpaid#39] -Keys [3]: [c_last_name#17, c_first_name#16, s_store_name#26] -Functions [1]: [partial_sum(netpaid#39)] -Aggregate Attributes [2]: [sum#40, isEmpty#41] -Results [5]: [c_last_name#17, c_first_name#16, s_store_name#26, sum#42, isEmpty#43] +Input [4]: [c_last_name#15, c_first_name#14, s_store_name#21, netpaid#31] +Keys [3]: [c_last_name#15, c_first_name#14, s_store_name#21] +Functions [1]: [partial_sum(netpaid#31)] +Aggregate Attributes [2]: [sum#32, isEmpty#33] +Results [5]: [c_last_name#15, c_first_name#14, s_store_name#21, sum#34, isEmpty#35] (47) Exchange -Input [5]: [c_last_name#17, c_first_name#16, s_store_name#26, sum#42, isEmpty#43] -Arguments: hashpartitioning(c_last_name#17, c_first_name#16, s_store_name#26, 5), ENSURE_REQUIREMENTS, [id=#44] +Input [5]: [c_last_name#15, c_first_name#14, s_store_name#21, sum#34, isEmpty#35] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, s_store_name#21, 5), ENSURE_REQUIREMENTS, [plan_id=9] (48) HashAggregate [codegen id : 14] -Input [5]: [c_last_name#17, c_first_name#16, s_store_name#26, sum#42, isEmpty#43] -Keys [3]: [c_last_name#17, c_first_name#16, s_store_name#26] -Functions [1]: [sum(netpaid#39)] -Aggregate Attributes [1]: [sum(netpaid#39)#45] -Results [4]: [c_last_name#17, c_first_name#16, s_store_name#26, sum(netpaid#39)#45 AS paid#46] +Input [5]: [c_last_name#15, c_first_name#14, s_store_name#21, sum#34, isEmpty#35] +Keys [3]: [c_last_name#15, c_first_name#14, s_store_name#21] +Functions [1]: [sum(netpaid#31)] +Aggregate Attributes [1]: [sum(netpaid#31)#36] +Results [4]: [c_last_name#15, c_first_name#14, s_store_name#21, sum(netpaid#31)#36 AS paid#37] (49) Filter [codegen id : 14] -Input [4]: [c_last_name#17, c_first_name#16, s_store_name#26, paid#46] -Condition : (isnotnull(paid#46) AND (cast(paid#46 as decimal(33,8)) > cast(Subquery scalar-subquery#47, [id=#48] as decimal(33,8)))) +Input [4]: [c_last_name#15, c_first_name#14, s_store_name#21, paid#37] +Condition : (isnotnull(paid#37) AND (cast(paid#37 as decimal(33,8)) > cast(Subquery scalar-subquery#38, [id=#39] as decimal(33,8)))) ===== Subqueries ===== -Subquery:1 Hosting operator id = 49 Hosting Expression = Subquery scalar-subquery#47, [id=#48] +Subquery:1 Hosting operator id = 49 Hosting Expression = Subquery scalar-subquery#38, [id=#39] * HashAggregate (96) +- Exchange (95) +- * HashAggregate (94) @@ -346,42 +346,42 @@ Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] (54) Scan parquet default.store -Output [5]: [s_store_sk#25, s_store_name#26, s_market_id#27, s_state#28, s_zip#29] +Output [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] ReadSchema: struct (55) ColumnarToRow [codegen id : 1] -Input [5]: [s_store_sk#25, s_store_name#26, s_market_id#27, s_state#28, s_zip#29] +Input [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24] (56) Filter [codegen id : 1] -Input [5]: [s_store_sk#25, s_store_name#26, s_market_id#27, s_state#28, s_zip#29] -Condition : (((isnotnull(s_market_id#27) AND (s_market_id#27 = 8)) AND isnotnull(s_store_sk#25)) AND isnotnull(s_zip#29)) +Input [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24] +Condition : (((isnotnull(s_market_id#22) AND (s_market_id#22 = 8)) AND isnotnull(s_store_sk#20)) AND isnotnull(s_zip#24)) (57) Project [codegen id : 1] -Output [4]: [s_store_sk#25, s_store_name#26, s_state#28, s_zip#29] -Input [5]: [s_store_sk#25, s_store_name#26, s_market_id#27, s_state#28, s_zip#29] +Output [4]: [s_store_sk#20, s_store_name#21, s_state#23, s_zip#24] +Input [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24] (58) BroadcastExchange -Input [4]: [s_store_sk#25, s_store_name#26, s_state#28, s_zip#29] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#49] +Input [4]: [s_store_sk#20, s_store_name#21, s_state#23, s_zip#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] (59) BroadcastHashJoin [codegen id : 2] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#25] +Right keys [1]: [s_store_sk#20] Join condition: None (60) Project [codegen id : 2] -Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29] -Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, s_store_sk#25, s_store_name#26, s_state#28, s_zip#29] +Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, s_store_sk#20, s_store_name#21, s_state#23, s_zip#24] (61) Exchange -Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#50] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=11] (62) Sort [codegen id : 3] -Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (63) Scan parquet default.item @@ -400,7 +400,7 @@ Condition : isnotnull(i_item_sk#7) (66) Exchange Input [6]: [i_item_sk#7, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] -Arguments: hashpartitioning(i_item_sk#7, 5), ENSURE_REQUIREMENTS, [id=#51] +Arguments: hashpartitioning(i_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=12] (67) Sort [codegen id : 5] Input [6]: [i_item_sk#7, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] @@ -412,130 +412,130 @@ Right keys [1]: [i_item_sk#7] Join condition: None (69) Project [codegen id : 6] -Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] -Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_item_sk#7, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] +Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_item_sk#7, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] (70) Exchange -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] -Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#52] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=13] (71) Sort [codegen id : 7] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 (72) ReusedExchange [Reuses operator id: 16] -Output [4]: [c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] +Output [4]: [c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] (73) Sort [codegen id : 9] -Input [4]: [c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] -Arguments: [c_customer_sk#15 ASC NULLS FIRST], false, 0 +Input [4]: [c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] +Arguments: [c_customer_sk#13 ASC NULLS FIRST], false, 0 (74) SortMergeJoin [codegen id : 10] Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#15] +Right keys [1]: [c_customer_sk#13] Join condition: None (75) Project [codegen id : 10] -Output [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] -Input [16]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] +Output [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] +Input [16]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] (76) Exchange -Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] -Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#53] +Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=14] (77) Sort [codegen id : 11] -Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] +Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 (78) ReusedExchange [Reuses operator id: 26] -Output [2]: [sr_item_sk#21, sr_ticket_number#22] +Output [2]: [sr_item_sk#17, sr_ticket_number#18] (79) Sort [codegen id : 13] -Input [2]: [sr_item_sk#21, sr_ticket_number#22] -Arguments: [sr_ticket_number#22 ASC NULLS FIRST, sr_item_sk#21 ASC NULLS FIRST], false, 0 +Input [2]: [sr_item_sk#17, sr_ticket_number#18] +Arguments: [sr_ticket_number#18 ASC NULLS FIRST, sr_item_sk#17 ASC NULLS FIRST], false, 0 (80) SortMergeJoin [codegen id : 14] Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] -Right keys [2]: [sr_ticket_number#22, sr_item_sk#21] +Right keys [2]: [sr_ticket_number#18, sr_item_sk#17] Join condition: None (81) Project [codegen id : 14] -Output [12]: [ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] -Input [16]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18, sr_item_sk#21, sr_ticket_number#22] +Output [12]: [ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] +Input [16]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16, sr_item_sk#17, sr_ticket_number#18] (82) Exchange -Input [12]: [ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] -Arguments: hashpartitioning(c_birth_country#18, s_zip#29, 5), ENSURE_REQUIREMENTS, [id=#54] +Input [12]: [ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] +Arguments: hashpartitioning(c_birth_country#16, s_zip#24, 5), ENSURE_REQUIREMENTS, [plan_id=15] (83) Sort [codegen id : 15] -Input [12]: [ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] -Arguments: [c_birth_country#18 ASC NULLS FIRST, s_zip#29 ASC NULLS FIRST], false, 0 +Input [12]: [ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] +Arguments: [c_birth_country#16 ASC NULLS FIRST, s_zip#24 ASC NULLS FIRST], false, 0 (84) Scan parquet default.customer_address -Output [3]: [ca_state#31, ca_zip#32, ca_country#33] +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] ReadSchema: struct (85) ColumnarToRow [codegen id : 16] -Input [3]: [ca_state#31, ca_zip#32, ca_country#33] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] (86) Filter [codegen id : 16] -Input [3]: [ca_state#31, ca_zip#32, ca_country#33] -Condition : (isnotnull(ca_country#33) AND isnotnull(ca_zip#32)) +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) (87) Exchange -Input [3]: [ca_state#31, ca_zip#32, ca_country#33] -Arguments: hashpartitioning(upper(ca_country#33), ca_zip#32, 5), ENSURE_REQUIREMENTS, [id=#55] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: hashpartitioning(upper(ca_country#27), ca_zip#26, 5), ENSURE_REQUIREMENTS, [plan_id=16] (88) Sort [codegen id : 17] -Input [3]: [ca_state#31, ca_zip#32, ca_country#33] -Arguments: [upper(ca_country#33) ASC NULLS FIRST, ca_zip#32 ASC NULLS FIRST], false, 0 +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: [upper(ca_country#27) ASC NULLS FIRST, ca_zip#26 ASC NULLS FIRST], false, 0 (89) SortMergeJoin [codegen id : 18] -Left keys [2]: [c_birth_country#18, s_zip#29] -Right keys [2]: [upper(ca_country#33), ca_zip#32] +Left keys [2]: [c_birth_country#16, s_zip#24] +Right keys [2]: [upper(ca_country#27), ca_zip#26] Join condition: None (90) Project [codegen id : 18] -Output [11]: [ss_net_paid#5, s_store_name#26, s_state#28, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, ca_state#31] -Input [15]: [ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18, ca_state#31, ca_zip#32, ca_country#33] +Output [11]: [ss_net_paid#5, s_store_name#21, s_state#23, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16, ca_state#25, ca_zip#26, ca_country#27] (91) HashAggregate [codegen id : 18] -Input [11]: [ss_net_paid#5, s_store_name#26, s_state#28, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, ca_state#31] -Keys [10]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9] +Input [11]: [ss_net_paid#5, s_store_name#21, s_state#23, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, ca_state#25] +Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9] Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum#56] -Results [11]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#57] +Aggregate Attributes [1]: [sum#40] +Results [11]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#41] (92) Exchange -Input [11]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#57] -Arguments: hashpartitioning(c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, 5), ENSURE_REQUIREMENTS, [id=#58] +Input [11]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#41] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, 5), ENSURE_REQUIREMENTS, [plan_id=17] (93) HashAggregate [codegen id : 19] -Input [11]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#57] -Keys [10]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9] +Input [11]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#41] +Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9] Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#38] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#38,17,2) AS netpaid#39] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] (94) HashAggregate [codegen id : 19] -Input [1]: [netpaid#39] +Input [1]: [netpaid#31] Keys: [] -Functions [1]: [partial_avg(netpaid#39)] -Aggregate Attributes [2]: [sum#59, count#60] -Results [2]: [sum#61, count#62] +Functions [1]: [partial_avg(netpaid#31)] +Aggregate Attributes [2]: [sum#42, count#43] +Results [2]: [sum#44, count#45] (95) Exchange -Input [2]: [sum#61, count#62] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#63] +Input [2]: [sum#44, count#45] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=18] (96) HashAggregate [codegen id : 20] -Input [2]: [sum#61, count#62] +Input [2]: [sum#44, count#45] Keys: [] -Functions [1]: [avg(netpaid#39)] -Aggregate Attributes [1]: [avg(netpaid#39)#64] -Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#39)#64)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#65] +Functions [1]: [avg(netpaid#31)] +Aggregate Attributes [1]: [avg(netpaid#31)#46] +Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#31)#46)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#47] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt index d1fa0bd182199..0ee0f297a3e98 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt @@ -67,202 +67,202 @@ Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s (5) Exchange Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] -Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#7] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (6) Sort [codegen id : 2] Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 (7) Scan parquet default.store_returns -Output [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] ReadSchema: struct (8) ColumnarToRow [codegen id : 3] -Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] (9) Filter [codegen id : 3] -Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] -Condition : (isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#8)) +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : (isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#7)) (10) Project [codegen id : 3] -Output [2]: [sr_item_sk#8, sr_ticket_number#9] -Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] (11) Exchange -Input [2]: [sr_item_sk#8, sr_ticket_number#9] -Arguments: hashpartitioning(sr_ticket_number#9, sr_item_sk#8, 5), ENSURE_REQUIREMENTS, [id=#11] +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: hashpartitioning(sr_ticket_number#8, sr_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) Sort [codegen id : 4] -Input [2]: [sr_item_sk#8, sr_ticket_number#9] -Arguments: [sr_ticket_number#9 ASC NULLS FIRST, sr_item_sk#8 ASC NULLS FIRST], false, 0 +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 (13) SortMergeJoin [codegen id : 9] Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] -Right keys [2]: [sr_ticket_number#9, sr_item_sk#8] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] Join condition: None (14) Project [codegen id : 9] Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] -Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#8, sr_ticket_number#9] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] (15) Scan parquet default.store -Output [5]: [s_store_sk#12, s_store_name#13, s_market_id#14, s_state#15, s_zip#16] +Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] ReadSchema: struct (16) ColumnarToRow [codegen id : 5] -Input [5]: [s_store_sk#12, s_store_name#13, s_market_id#14, s_state#15, s_zip#16] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] (17) Filter [codegen id : 5] -Input [5]: [s_store_sk#12, s_store_name#13, s_market_id#14, s_state#15, s_zip#16] -Condition : (((isnotnull(s_market_id#14) AND (s_market_id#14 = 8)) AND isnotnull(s_store_sk#12)) AND isnotnull(s_zip#16)) +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Condition : (((isnotnull(s_market_id#12) AND (s_market_id#12 = 8)) AND isnotnull(s_store_sk#10)) AND isnotnull(s_zip#14)) (18) Project [codegen id : 5] -Output [4]: [s_store_sk#12, s_store_name#13, s_state#15, s_zip#16] -Input [5]: [s_store_sk#12, s_store_name#13, s_market_id#14, s_state#15, s_zip#16] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] (19) BroadcastExchange -Input [4]: [s_store_sk#12, s_store_name#13, s_state#15, s_zip#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] +Input [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (20) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#12] +Right keys [1]: [s_store_sk#10] Join condition: None (21) Project [codegen id : 9] -Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16] -Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#12, s_store_name#13, s_state#15, s_zip#16] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] (22) Scan parquet default.item -Output [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale ), IsNotNull(i_item_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 6] -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] (24) Filter [codegen id : 6] -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Condition : ((isnotnull(i_color#21) AND (i_color#21 = pale )) AND isnotnull(i_item_sk#18)) +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = pale )) AND isnotnull(i_item_sk#15)) (25) BroadcastExchange -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#24] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (26) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#18] +Right keys [1]: [i_item_sk#15] Join condition: None (27) Project [codegen id : 9] -Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] (28) Scan parquet default.customer -Output [4]: [c_customer_sk#25, c_first_name#26, c_last_name#27, c_birth_country#28] +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] ReadSchema: struct (29) ColumnarToRow [codegen id : 7] -Input [4]: [c_customer_sk#25, c_first_name#26, c_last_name#27, c_birth_country#28] +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] (30) Filter [codegen id : 7] -Input [4]: [c_customer_sk#25, c_first_name#26, c_last_name#27, c_birth_country#28] -Condition : (isnotnull(c_customer_sk#25) AND isnotnull(c_birth_country#28)) +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_birth_country#24)) (31) BroadcastExchange -Input [4]: [c_customer_sk#25, c_first_name#26, c_last_name#27, c_birth_country#28] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#29] +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (32) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#25] +Right keys [1]: [c_customer_sk#21] Join condition: None (33) Project [codegen id : 9] -Output [12]: [ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#26, c_last_name#27, c_birth_country#28] -Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_customer_sk#25, c_first_name#26, c_last_name#27, c_birth_country#28] +Output [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] (34) Scan parquet default.customer_address -Output [3]: [ca_state#30, ca_zip#31, ca_country#32] +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] ReadSchema: struct (35) ColumnarToRow [codegen id : 8] -Input [3]: [ca_state#30, ca_zip#31, ca_country#32] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] (36) Filter [codegen id : 8] -Input [3]: [ca_state#30, ca_zip#31, ca_country#32] -Condition : (isnotnull(ca_country#32) AND isnotnull(ca_zip#31)) +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) (37) BroadcastExchange -Input [3]: [ca_state#30, ca_zip#31, ca_country#32] -Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [id=#33] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [plan_id=6] (38) BroadcastHashJoin [codegen id : 9] -Left keys [2]: [c_birth_country#28, s_zip#16] -Right keys [2]: [upper(ca_country#32), ca_zip#31] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] Join condition: None (39) Project [codegen id : 9] -Output [11]: [ss_net_paid#5, s_store_name#13, s_state#15, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#26, c_last_name#27, ca_state#30] -Input [15]: [ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#26, c_last_name#27, c_birth_country#28, ca_state#30, ca_zip#31, ca_country#32] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] (40) HashAggregate [codegen id : 9] -Input [11]: [ss_net_paid#5, s_store_name#13, s_state#15, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#26, c_last_name#27, ca_state#30] -Keys [10]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum#34] -Results [11]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#35] +Aggregate Attributes [1]: [sum#28] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] (41) Exchange -Input [11]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#35] -Arguments: hashpartitioning(c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=7] (42) HashAggregate [codegen id : 10] -Input [11]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#35] -Keys [10]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#37] -Results [4]: [c_last_name#27, c_first_name#26, s_store_name#13, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#37,17,2) AS netpaid#38] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] (43) HashAggregate [codegen id : 10] -Input [4]: [c_last_name#27, c_first_name#26, s_store_name#13, netpaid#38] -Keys [3]: [c_last_name#27, c_first_name#26, s_store_name#13] -Functions [1]: [partial_sum(netpaid#38)] -Aggregate Attributes [2]: [sum#39, isEmpty#40] -Results [5]: [c_last_name#27, c_first_name#26, s_store_name#13, sum#41, isEmpty#42] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, netpaid#31] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [partial_sum(netpaid#31)] +Aggregate Attributes [2]: [sum#32, isEmpty#33] +Results [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] (44) Exchange -Input [5]: [c_last_name#27, c_first_name#26, s_store_name#13, sum#41, isEmpty#42] -Arguments: hashpartitioning(c_last_name#27, c_first_name#26, s_store_name#13, 5), ENSURE_REQUIREMENTS, [id=#43] +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=8] (45) HashAggregate [codegen id : 11] -Input [5]: [c_last_name#27, c_first_name#26, s_store_name#13, sum#41, isEmpty#42] -Keys [3]: [c_last_name#27, c_first_name#26, s_store_name#13] -Functions [1]: [sum(netpaid#38)] -Aggregate Attributes [1]: [sum(netpaid#38)#44] -Results [4]: [c_last_name#27, c_first_name#26, s_store_name#13, sum(netpaid#38)#44 AS paid#45] +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [sum(netpaid#31)] +Aggregate Attributes [1]: [sum(netpaid#31)#36] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, sum(netpaid#31)#36 AS paid#37] (46) Filter [codegen id : 11] -Input [4]: [c_last_name#27, c_first_name#26, s_store_name#13, paid#45] -Condition : (isnotnull(paid#45) AND (cast(paid#45 as decimal(33,8)) > cast(Subquery scalar-subquery#46, [id=#47] as decimal(33,8)))) +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, paid#37] +Condition : (isnotnull(paid#37) AND (cast(paid#37 as decimal(33,8)) > cast(Subquery scalar-subquery#38, [id=#39] as decimal(33,8)))) ===== Subqueries ===== -Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquery#46, [id=#47] +Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquery#38, [id=#39] * HashAggregate (73) +- Exchange (72) +- * HashAggregate (71) @@ -300,118 +300,118 @@ Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 (49) ReusedExchange [Reuses operator id: 11] -Output [2]: [sr_item_sk#8, sr_ticket_number#9] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] (50) Sort [codegen id : 4] -Input [2]: [sr_item_sk#8, sr_ticket_number#9] -Arguments: [sr_ticket_number#9 ASC NULLS FIRST, sr_item_sk#8 ASC NULLS FIRST], false, 0 +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 (51) SortMergeJoin [codegen id : 9] Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] -Right keys [2]: [sr_ticket_number#9, sr_item_sk#8] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] Join condition: None (52) Project [codegen id : 9] Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] -Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#8, sr_ticket_number#9] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] (53) ReusedExchange [Reuses operator id: 19] -Output [4]: [s_store_sk#12, s_store_name#13, s_state#15, s_zip#16] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] (54) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#12] +Right keys [1]: [s_store_sk#10] Join condition: None (55) Project [codegen id : 9] -Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16] -Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#12, s_store_name#13, s_state#15, s_zip#16] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] (56) Scan parquet default.item -Output [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (57) ColumnarToRow [codegen id : 6] -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] (58) Filter [codegen id : 6] -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Condition : isnotnull(i_item_sk#18) +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : isnotnull(i_item_sk#15) (59) BroadcastExchange -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] (60) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#18] +Right keys [1]: [i_item_sk#15] Join condition: None (61) Project [codegen id : 9] -Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] (62) ReusedExchange [Reuses operator id: 31] -Output [4]: [c_customer_sk#25, c_first_name#26, c_last_name#27, c_birth_country#28] +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] (63) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#25] +Right keys [1]: [c_customer_sk#21] Join condition: None (64) Project [codegen id : 9] -Output [12]: [ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#26, c_last_name#27, c_birth_country#28] -Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_customer_sk#25, c_first_name#26, c_last_name#27, c_birth_country#28] +Output [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] (65) ReusedExchange [Reuses operator id: 37] -Output [3]: [ca_state#30, ca_zip#31, ca_country#32] +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] (66) BroadcastHashJoin [codegen id : 9] -Left keys [2]: [c_birth_country#28, s_zip#16] -Right keys [2]: [upper(ca_country#32), ca_zip#31] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] Join condition: None (67) Project [codegen id : 9] -Output [11]: [ss_net_paid#5, s_store_name#13, s_state#15, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#26, c_last_name#27, ca_state#30] -Input [15]: [ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#26, c_last_name#27, c_birth_country#28, ca_state#30, ca_zip#31, ca_country#32] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] (68) HashAggregate [codegen id : 9] -Input [11]: [ss_net_paid#5, s_store_name#13, s_state#15, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#26, c_last_name#27, ca_state#30] -Keys [10]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum#49] -Results [11]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#50] +Aggregate Attributes [1]: [sum#40] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] (69) Exchange -Input [11]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#50] -Arguments: hashpartitioning(c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, 5), ENSURE_REQUIREMENTS, [id=#51] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=10] (70) HashAggregate [codegen id : 10] -Input [11]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#50] -Keys [10]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#37] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#37,17,2) AS netpaid#38] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] (71) HashAggregate [codegen id : 10] -Input [1]: [netpaid#38] +Input [1]: [netpaid#31] Keys: [] -Functions [1]: [partial_avg(netpaid#38)] -Aggregate Attributes [2]: [sum#52, count#53] -Results [2]: [sum#54, count#55] +Functions [1]: [partial_avg(netpaid#31)] +Aggregate Attributes [2]: [sum#42, count#43] +Results [2]: [sum#44, count#45] (72) Exchange -Input [2]: [sum#54, count#55] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#56] +Input [2]: [sum#44, count#45] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11] (73) HashAggregate [codegen id : 11] -Input [2]: [sum#54, count#55] +Input [2]: [sum#44, count#45] Keys: [] -Functions [1]: [avg(netpaid#38)] -Aggregate Attributes [1]: [avg(netpaid#38)#57] -Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#38)#57)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#58] +Functions [1]: [avg(netpaid#31)] +Aggregate Attributes [1]: [avg(netpaid#31)#46] +Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#31)#46)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#47] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt index fa921b7f2b622..9511d6c4f8e31 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt @@ -84,7 +84,7 @@ Condition : ((isnotnull(i_color#10) AND (i_color#10 = chiffon )) AND (8) BroadcastExchange Input [6]: [i_item_sk#7, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 2] Left keys [1]: [ss_item_sk#1] @@ -97,187 +97,187 @@ Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, (11) Exchange Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] -Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#14] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) Sort [codegen id : 3] Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 (13) Scan parquet default.customer -Output [4]: [c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] +Output [4]: [c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] ReadSchema: struct (14) ColumnarToRow [codegen id : 4] -Input [4]: [c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] +Input [4]: [c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] (15) Filter [codegen id : 4] -Input [4]: [c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] -Condition : (isnotnull(c_customer_sk#15) AND isnotnull(c_birth_country#18)) +Input [4]: [c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] +Condition : (isnotnull(c_customer_sk#13) AND isnotnull(c_birth_country#16)) (16) Exchange -Input [4]: [c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] -Arguments: hashpartitioning(c_customer_sk#15, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [4]: [c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] +Arguments: hashpartitioning(c_customer_sk#13, 5), ENSURE_REQUIREMENTS, [plan_id=3] (17) Sort [codegen id : 5] -Input [4]: [c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] -Arguments: [c_customer_sk#15 ASC NULLS FIRST], false, 0 +Input [4]: [c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] +Arguments: [c_customer_sk#13 ASC NULLS FIRST], false, 0 (18) SortMergeJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#15] +Right keys [1]: [c_customer_sk#13] Join condition: None (19) Project [codegen id : 6] -Output [12]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] -Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] +Output [12]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] (20) Exchange -Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] -Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4] (21) Sort [codegen id : 7] -Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 (22) Scan parquet default.store_returns -Output [3]: [sr_item_sk#21, sr_ticket_number#22, sr_returned_date_sk#23] +Output [3]: [sr_item_sk#17, sr_ticket_number#18, sr_returned_date_sk#19] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 8] -Input [3]: [sr_item_sk#21, sr_ticket_number#22, sr_returned_date_sk#23] +Input [3]: [sr_item_sk#17, sr_ticket_number#18, sr_returned_date_sk#19] (24) Filter [codegen id : 8] -Input [3]: [sr_item_sk#21, sr_ticket_number#22, sr_returned_date_sk#23] -Condition : (isnotnull(sr_ticket_number#22) AND isnotnull(sr_item_sk#21)) +Input [3]: [sr_item_sk#17, sr_ticket_number#18, sr_returned_date_sk#19] +Condition : (isnotnull(sr_ticket_number#18) AND isnotnull(sr_item_sk#17)) (25) Project [codegen id : 8] -Output [2]: [sr_item_sk#21, sr_ticket_number#22] -Input [3]: [sr_item_sk#21, sr_ticket_number#22, sr_returned_date_sk#23] +Output [2]: [sr_item_sk#17, sr_ticket_number#18] +Input [3]: [sr_item_sk#17, sr_ticket_number#18, sr_returned_date_sk#19] (26) Exchange -Input [2]: [sr_item_sk#21, sr_ticket_number#22] -Arguments: hashpartitioning(sr_ticket_number#22, sr_item_sk#21, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [2]: [sr_item_sk#17, sr_ticket_number#18] +Arguments: hashpartitioning(sr_ticket_number#18, sr_item_sk#17, 5), ENSURE_REQUIREMENTS, [plan_id=5] (27) Sort [codegen id : 9] -Input [2]: [sr_item_sk#21, sr_ticket_number#22] -Arguments: [sr_ticket_number#22 ASC NULLS FIRST, sr_item_sk#21 ASC NULLS FIRST], false, 0 +Input [2]: [sr_item_sk#17, sr_ticket_number#18] +Arguments: [sr_ticket_number#18 ASC NULLS FIRST, sr_item_sk#17 ASC NULLS FIRST], false, 0 (28) SortMergeJoin [codegen id : 12] Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] -Right keys [2]: [sr_ticket_number#22, sr_item_sk#21] +Right keys [2]: [sr_ticket_number#18, sr_item_sk#17] Join condition: None (29) Project [codegen id : 12] -Output [10]: [ss_store_sk#3, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] -Input [14]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18, sr_item_sk#21, sr_ticket_number#22] +Output [10]: [ss_store_sk#3, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] +Input [14]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16, sr_item_sk#17, sr_ticket_number#18] (30) Scan parquet default.store -Output [5]: [s_store_sk#25, s_store_name#26, s_market_id#27, s_state#28, s_zip#29] +Output [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] ReadSchema: struct (31) ColumnarToRow [codegen id : 10] -Input [5]: [s_store_sk#25, s_store_name#26, s_market_id#27, s_state#28, s_zip#29] +Input [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24] (32) Filter [codegen id : 10] -Input [5]: [s_store_sk#25, s_store_name#26, s_market_id#27, s_state#28, s_zip#29] -Condition : (((isnotnull(s_market_id#27) AND (s_market_id#27 = 8)) AND isnotnull(s_store_sk#25)) AND isnotnull(s_zip#29)) +Input [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24] +Condition : (((isnotnull(s_market_id#22) AND (s_market_id#22 = 8)) AND isnotnull(s_store_sk#20)) AND isnotnull(s_zip#24)) (33) Project [codegen id : 10] -Output [4]: [s_store_sk#25, s_store_name#26, s_state#28, s_zip#29] -Input [5]: [s_store_sk#25, s_store_name#26, s_market_id#27, s_state#28, s_zip#29] +Output [4]: [s_store_sk#20, s_store_name#21, s_state#23, s_zip#24] +Input [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24] (34) BroadcastExchange -Input [4]: [s_store_sk#25, s_store_name#26, s_state#28, s_zip#29] -Arguments: HashedRelationBroadcastMode(List(input[3, string, true]),false), [id=#30] +Input [4]: [s_store_sk#20, s_store_name#21, s_state#23, s_zip#24] +Arguments: HashedRelationBroadcastMode(List(input[3, string, true]),false), [plan_id=6] (35) Scan parquet default.customer_address -Output [3]: [ca_state#31, ca_zip#32, ca_country#33] +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] ReadSchema: struct (36) ColumnarToRow -Input [3]: [ca_state#31, ca_zip#32, ca_country#33] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] (37) Filter -Input [3]: [ca_state#31, ca_zip#32, ca_country#33] -Condition : (isnotnull(ca_country#33) AND isnotnull(ca_zip#32)) +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) (38) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [s_zip#29] -Right keys [1]: [ca_zip#32] +Left keys [1]: [s_zip#24] +Right keys [1]: [ca_zip#26] Join condition: None (39) Project [codegen id : 11] -Output [5]: [s_store_sk#25, s_store_name#26, s_state#28, ca_state#31, ca_country#33] -Input [7]: [s_store_sk#25, s_store_name#26, s_state#28, s_zip#29, ca_state#31, ca_zip#32, ca_country#33] +Output [5]: [s_store_sk#20, s_store_name#21, s_state#23, ca_state#25, ca_country#27] +Input [7]: [s_store_sk#20, s_store_name#21, s_state#23, s_zip#24, ca_state#25, ca_zip#26, ca_country#27] (40) BroadcastExchange -Input [5]: [s_store_sk#25, s_store_name#26, s_state#28, ca_state#31, ca_country#33] -Arguments: HashedRelationBroadcastMode(List(input[0, int, true], upper(input[4, string, true])),false), [id=#34] +Input [5]: [s_store_sk#20, s_store_name#21, s_state#23, ca_state#25, ca_country#27] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], upper(input[4, string, true])),false), [plan_id=7] (41) BroadcastHashJoin [codegen id : 12] -Left keys [2]: [ss_store_sk#3, c_birth_country#18] -Right keys [2]: [s_store_sk#25, upper(ca_country#33)] +Left keys [2]: [ss_store_sk#3, c_birth_country#16] +Right keys [2]: [s_store_sk#20, upper(ca_country#27)] Join condition: None (42) Project [codegen id : 12] -Output [11]: [ss_net_paid#5, s_store_name#26, s_state#28, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, ca_state#31] -Input [15]: [ss_store_sk#3, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18, s_store_sk#25, s_store_name#26, s_state#28, ca_state#31, ca_country#33] +Output [11]: [ss_net_paid#5, s_store_name#21, s_state#23, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, ca_state#25] +Input [15]: [ss_store_sk#3, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16, s_store_sk#20, s_store_name#21, s_state#23, ca_state#25, ca_country#27] (43) HashAggregate [codegen id : 12] -Input [11]: [ss_net_paid#5, s_store_name#26, s_state#28, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, ca_state#31] -Keys [10]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9] +Input [11]: [ss_net_paid#5, s_store_name#21, s_state#23, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, ca_state#25] +Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9] Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum#35] -Results [11]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#36] +Aggregate Attributes [1]: [sum#28] +Results [11]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#29] (44) Exchange -Input [11]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#36] -Arguments: hashpartitioning(c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, 5), ENSURE_REQUIREMENTS, [id=#37] +Input [11]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#29] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, 5), ENSURE_REQUIREMENTS, [plan_id=8] (45) HashAggregate [codegen id : 13] -Input [11]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#36] -Keys [10]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9] +Input [11]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#29] +Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9] Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#38] -Results [4]: [c_last_name#17, c_first_name#16, s_store_name#26, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#38,17,2) AS netpaid#39] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [4]: [c_last_name#15, c_first_name#14, s_store_name#21, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] (46) HashAggregate [codegen id : 13] -Input [4]: [c_last_name#17, c_first_name#16, s_store_name#26, netpaid#39] -Keys [3]: [c_last_name#17, c_first_name#16, s_store_name#26] -Functions [1]: [partial_sum(netpaid#39)] -Aggregate Attributes [2]: [sum#40, isEmpty#41] -Results [5]: [c_last_name#17, c_first_name#16, s_store_name#26, sum#42, isEmpty#43] +Input [4]: [c_last_name#15, c_first_name#14, s_store_name#21, netpaid#31] +Keys [3]: [c_last_name#15, c_first_name#14, s_store_name#21] +Functions [1]: [partial_sum(netpaid#31)] +Aggregate Attributes [2]: [sum#32, isEmpty#33] +Results [5]: [c_last_name#15, c_first_name#14, s_store_name#21, sum#34, isEmpty#35] (47) Exchange -Input [5]: [c_last_name#17, c_first_name#16, s_store_name#26, sum#42, isEmpty#43] -Arguments: hashpartitioning(c_last_name#17, c_first_name#16, s_store_name#26, 5), ENSURE_REQUIREMENTS, [id=#44] +Input [5]: [c_last_name#15, c_first_name#14, s_store_name#21, sum#34, isEmpty#35] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, s_store_name#21, 5), ENSURE_REQUIREMENTS, [plan_id=9] (48) HashAggregate [codegen id : 14] -Input [5]: [c_last_name#17, c_first_name#16, s_store_name#26, sum#42, isEmpty#43] -Keys [3]: [c_last_name#17, c_first_name#16, s_store_name#26] -Functions [1]: [sum(netpaid#39)] -Aggregate Attributes [1]: [sum(netpaid#39)#45] -Results [4]: [c_last_name#17, c_first_name#16, s_store_name#26, sum(netpaid#39)#45 AS paid#46] +Input [5]: [c_last_name#15, c_first_name#14, s_store_name#21, sum#34, isEmpty#35] +Keys [3]: [c_last_name#15, c_first_name#14, s_store_name#21] +Functions [1]: [sum(netpaid#31)] +Aggregate Attributes [1]: [sum(netpaid#31)#36] +Results [4]: [c_last_name#15, c_first_name#14, s_store_name#21, sum(netpaid#31)#36 AS paid#37] (49) Filter [codegen id : 14] -Input [4]: [c_last_name#17, c_first_name#16, s_store_name#26, paid#46] -Condition : (isnotnull(paid#46) AND (cast(paid#46 as decimal(33,8)) > cast(Subquery scalar-subquery#47, [id=#48] as decimal(33,8)))) +Input [4]: [c_last_name#15, c_first_name#14, s_store_name#21, paid#37] +Condition : (isnotnull(paid#37) AND (cast(paid#37 as decimal(33,8)) > cast(Subquery scalar-subquery#38, [id=#39] as decimal(33,8)))) ===== Subqueries ===== -Subquery:1 Hosting operator id = 49 Hosting Expression = Subquery scalar-subquery#47, [id=#48] +Subquery:1 Hosting operator id = 49 Hosting Expression = Subquery scalar-subquery#38, [id=#39] * HashAggregate (96) +- Exchange (95) +- * HashAggregate (94) @@ -346,42 +346,42 @@ Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6] (54) Scan parquet default.store -Output [5]: [s_store_sk#25, s_store_name#26, s_market_id#27, s_state#28, s_zip#29] +Output [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] ReadSchema: struct (55) ColumnarToRow [codegen id : 1] -Input [5]: [s_store_sk#25, s_store_name#26, s_market_id#27, s_state#28, s_zip#29] +Input [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24] (56) Filter [codegen id : 1] -Input [5]: [s_store_sk#25, s_store_name#26, s_market_id#27, s_state#28, s_zip#29] -Condition : (((isnotnull(s_market_id#27) AND (s_market_id#27 = 8)) AND isnotnull(s_store_sk#25)) AND isnotnull(s_zip#29)) +Input [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24] +Condition : (((isnotnull(s_market_id#22) AND (s_market_id#22 = 8)) AND isnotnull(s_store_sk#20)) AND isnotnull(s_zip#24)) (57) Project [codegen id : 1] -Output [4]: [s_store_sk#25, s_store_name#26, s_state#28, s_zip#29] -Input [5]: [s_store_sk#25, s_store_name#26, s_market_id#27, s_state#28, s_zip#29] +Output [4]: [s_store_sk#20, s_store_name#21, s_state#23, s_zip#24] +Input [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24] (58) BroadcastExchange -Input [4]: [s_store_sk#25, s_store_name#26, s_state#28, s_zip#29] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#49] +Input [4]: [s_store_sk#20, s_store_name#21, s_state#23, s_zip#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] (59) BroadcastHashJoin [codegen id : 2] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#25] +Right keys [1]: [s_store_sk#20] Join condition: None (60) Project [codegen id : 2] -Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29] -Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, s_store_sk#25, s_store_name#26, s_state#28, s_zip#29] +Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, s_store_sk#20, s_store_name#21, s_state#23, s_zip#24] (61) Exchange -Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#50] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=11] (62) Sort [codegen id : 3] -Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (63) Scan parquet default.item @@ -400,7 +400,7 @@ Condition : isnotnull(i_item_sk#7) (66) Exchange Input [6]: [i_item_sk#7, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] -Arguments: hashpartitioning(i_item_sk#7, 5), ENSURE_REQUIREMENTS, [id=#51] +Arguments: hashpartitioning(i_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=12] (67) Sort [codegen id : 5] Input [6]: [i_item_sk#7, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] @@ -412,130 +412,130 @@ Right keys [1]: [i_item_sk#7] Join condition: None (69) Project [codegen id : 6] -Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] -Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_item_sk#7, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] +Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_item_sk#7, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] (70) Exchange -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] -Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#52] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=13] (71) Sort [codegen id : 7] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12] Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 (72) ReusedExchange [Reuses operator id: 16] -Output [4]: [c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] +Output [4]: [c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] (73) Sort [codegen id : 9] -Input [4]: [c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] -Arguments: [c_customer_sk#15 ASC NULLS FIRST], false, 0 +Input [4]: [c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] +Arguments: [c_customer_sk#13 ASC NULLS FIRST], false, 0 (74) SortMergeJoin [codegen id : 10] Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#15] +Right keys [1]: [c_customer_sk#13] Join condition: None (75) Project [codegen id : 10] -Output [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] -Input [16]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_customer_sk#15, c_first_name#16, c_last_name#17, c_birth_country#18] +Output [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] +Input [16]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] (76) Exchange -Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] -Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#53] +Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=14] (77) Sort [codegen id : 11] -Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] +Input [14]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 (78) ReusedExchange [Reuses operator id: 26] -Output [2]: [sr_item_sk#21, sr_ticket_number#22] +Output [2]: [sr_item_sk#17, sr_ticket_number#18] (79) Sort [codegen id : 13] -Input [2]: [sr_item_sk#21, sr_ticket_number#22] -Arguments: [sr_ticket_number#22 ASC NULLS FIRST, sr_item_sk#21 ASC NULLS FIRST], false, 0 +Input [2]: [sr_item_sk#17, sr_ticket_number#18] +Arguments: [sr_ticket_number#18 ASC NULLS FIRST, sr_item_sk#17 ASC NULLS FIRST], false, 0 (80) SortMergeJoin [codegen id : 14] Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] -Right keys [2]: [sr_ticket_number#22, sr_item_sk#21] +Right keys [2]: [sr_ticket_number#18, sr_item_sk#17] Join condition: None (81) Project [codegen id : 14] -Output [12]: [ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] -Input [16]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18, sr_item_sk#21, sr_ticket_number#22] +Output [12]: [ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] +Input [16]: [ss_item_sk#1, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16, sr_item_sk#17, sr_ticket_number#18] (82) Exchange -Input [12]: [ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] -Arguments: hashpartitioning(c_birth_country#18, s_zip#29, 5), ENSURE_REQUIREMENTS, [id=#54] +Input [12]: [ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] +Arguments: hashpartitioning(c_birth_country#16, s_zip#24, 5), ENSURE_REQUIREMENTS, [plan_id=15] (83) Sort [codegen id : 15] -Input [12]: [ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18] -Arguments: [c_birth_country#18 ASC NULLS FIRST, s_zip#29 ASC NULLS FIRST], false, 0 +Input [12]: [ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16] +Arguments: [c_birth_country#16 ASC NULLS FIRST, s_zip#24 ASC NULLS FIRST], false, 0 (84) Scan parquet default.customer_address -Output [3]: [ca_state#31, ca_zip#32, ca_country#33] +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] ReadSchema: struct (85) ColumnarToRow [codegen id : 16] -Input [3]: [ca_state#31, ca_zip#32, ca_country#33] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] (86) Filter [codegen id : 16] -Input [3]: [ca_state#31, ca_zip#32, ca_country#33] -Condition : (isnotnull(ca_country#33) AND isnotnull(ca_zip#32)) +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) (87) Exchange -Input [3]: [ca_state#31, ca_zip#32, ca_country#33] -Arguments: hashpartitioning(upper(ca_country#33), ca_zip#32, 5), ENSURE_REQUIREMENTS, [id=#55] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: hashpartitioning(upper(ca_country#27), ca_zip#26, 5), ENSURE_REQUIREMENTS, [plan_id=16] (88) Sort [codegen id : 17] -Input [3]: [ca_state#31, ca_zip#32, ca_country#33] -Arguments: [upper(ca_country#33) ASC NULLS FIRST, ca_zip#32 ASC NULLS FIRST], false, 0 +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: [upper(ca_country#27) ASC NULLS FIRST, ca_zip#26 ASC NULLS FIRST], false, 0 (89) SortMergeJoin [codegen id : 18] -Left keys [2]: [c_birth_country#18, s_zip#29] -Right keys [2]: [upper(ca_country#33), ca_zip#32] +Left keys [2]: [c_birth_country#16, s_zip#24] +Right keys [2]: [upper(ca_country#27), ca_zip#26] Join condition: None (90) Project [codegen id : 18] -Output [11]: [ss_net_paid#5, s_store_name#26, s_state#28, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, ca_state#31] -Input [15]: [ss_net_paid#5, s_store_name#26, s_state#28, s_zip#29, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, c_birth_country#18, ca_state#31, ca_zip#32, ca_country#33] +Output [11]: [ss_net_paid#5, s_store_name#21, s_state#23, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16, ca_state#25, ca_zip#26, ca_country#27] (91) HashAggregate [codegen id : 18] -Input [11]: [ss_net_paid#5, s_store_name#26, s_state#28, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#16, c_last_name#17, ca_state#31] -Keys [10]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9] +Input [11]: [ss_net_paid#5, s_store_name#21, s_state#23, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, ca_state#25] +Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9] Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum#56] -Results [11]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#57] +Aggregate Attributes [1]: [sum#40] +Results [11]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#41] (92) Exchange -Input [11]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#57] -Arguments: hashpartitioning(c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, 5), ENSURE_REQUIREMENTS, [id=#58] +Input [11]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#41] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, 5), ENSURE_REQUIREMENTS, [plan_id=17] (93) HashAggregate [codegen id : 19] -Input [11]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#57] -Keys [10]: [c_last_name#17, c_first_name#16, s_store_name#26, ca_state#31, s_state#28, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9] +Input [11]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9, sum#41] +Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#21, ca_state#25, s_state#23, i_color#10, i_current_price#8, i_manager_id#12, i_units#11, i_size#9] Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#38] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#38,17,2) AS netpaid#39] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] (94) HashAggregate [codegen id : 19] -Input [1]: [netpaid#39] +Input [1]: [netpaid#31] Keys: [] -Functions [1]: [partial_avg(netpaid#39)] -Aggregate Attributes [2]: [sum#59, count#60] -Results [2]: [sum#61, count#62] +Functions [1]: [partial_avg(netpaid#31)] +Aggregate Attributes [2]: [sum#42, count#43] +Results [2]: [sum#44, count#45] (95) Exchange -Input [2]: [sum#61, count#62] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#63] +Input [2]: [sum#44, count#45] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=18] (96) HashAggregate [codegen id : 20] -Input [2]: [sum#61, count#62] +Input [2]: [sum#44, count#45] Keys: [] -Functions [1]: [avg(netpaid#39)] -Aggregate Attributes [1]: [avg(netpaid#39)#64] -Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#39)#64)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#65] +Functions [1]: [avg(netpaid#31)] +Aggregate Attributes [1]: [avg(netpaid#31)#46] +Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#31)#46)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#47] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt index e1a6c33699efd..cb8f5a3ade315 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt @@ -67,202 +67,202 @@ Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s (5) Exchange Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] -Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#7] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (6) Sort [codegen id : 2] Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 (7) Scan parquet default.store_returns -Output [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] ReadSchema: struct (8) ColumnarToRow [codegen id : 3] -Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] (9) Filter [codegen id : 3] -Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] -Condition : (isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#8)) +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : (isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#7)) (10) Project [codegen id : 3] -Output [2]: [sr_item_sk#8, sr_ticket_number#9] -Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] (11) Exchange -Input [2]: [sr_item_sk#8, sr_ticket_number#9] -Arguments: hashpartitioning(sr_ticket_number#9, sr_item_sk#8, 5), ENSURE_REQUIREMENTS, [id=#11] +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: hashpartitioning(sr_ticket_number#8, sr_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) Sort [codegen id : 4] -Input [2]: [sr_item_sk#8, sr_ticket_number#9] -Arguments: [sr_ticket_number#9 ASC NULLS FIRST, sr_item_sk#8 ASC NULLS FIRST], false, 0 +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 (13) SortMergeJoin [codegen id : 9] Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] -Right keys [2]: [sr_ticket_number#9, sr_item_sk#8] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] Join condition: None (14) Project [codegen id : 9] Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] -Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#8, sr_ticket_number#9] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] (15) Scan parquet default.store -Output [5]: [s_store_sk#12, s_store_name#13, s_market_id#14, s_state#15, s_zip#16] +Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] ReadSchema: struct (16) ColumnarToRow [codegen id : 5] -Input [5]: [s_store_sk#12, s_store_name#13, s_market_id#14, s_state#15, s_zip#16] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] (17) Filter [codegen id : 5] -Input [5]: [s_store_sk#12, s_store_name#13, s_market_id#14, s_state#15, s_zip#16] -Condition : (((isnotnull(s_market_id#14) AND (s_market_id#14 = 8)) AND isnotnull(s_store_sk#12)) AND isnotnull(s_zip#16)) +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Condition : (((isnotnull(s_market_id#12) AND (s_market_id#12 = 8)) AND isnotnull(s_store_sk#10)) AND isnotnull(s_zip#14)) (18) Project [codegen id : 5] -Output [4]: [s_store_sk#12, s_store_name#13, s_state#15, s_zip#16] -Input [5]: [s_store_sk#12, s_store_name#13, s_market_id#14, s_state#15, s_zip#16] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] (19) BroadcastExchange -Input [4]: [s_store_sk#12, s_store_name#13, s_state#15, s_zip#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] +Input [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (20) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#12] +Right keys [1]: [s_store_sk#10] Join condition: None (21) Project [codegen id : 9] -Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16] -Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#12, s_store_name#13, s_state#15, s_zip#16] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] (22) Scan parquet default.item -Output [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_color), EqualTo(i_color,chiffon ), IsNotNull(i_item_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 6] -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] (24) Filter [codegen id : 6] -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Condition : ((isnotnull(i_color#21) AND (i_color#21 = chiffon )) AND isnotnull(i_item_sk#18)) +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = chiffon )) AND isnotnull(i_item_sk#15)) (25) BroadcastExchange -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#24] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (26) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#18] +Right keys [1]: [i_item_sk#15] Join condition: None (27) Project [codegen id : 9] -Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] (28) Scan parquet default.customer -Output [4]: [c_customer_sk#25, c_first_name#26, c_last_name#27, c_birth_country#28] +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_birth_country)] ReadSchema: struct (29) ColumnarToRow [codegen id : 7] -Input [4]: [c_customer_sk#25, c_first_name#26, c_last_name#27, c_birth_country#28] +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] (30) Filter [codegen id : 7] -Input [4]: [c_customer_sk#25, c_first_name#26, c_last_name#27, c_birth_country#28] -Condition : (isnotnull(c_customer_sk#25) AND isnotnull(c_birth_country#28)) +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_birth_country#24)) (31) BroadcastExchange -Input [4]: [c_customer_sk#25, c_first_name#26, c_last_name#27, c_birth_country#28] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#29] +Input [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (32) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#25] +Right keys [1]: [c_customer_sk#21] Join condition: None (33) Project [codegen id : 9] -Output [12]: [ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#26, c_last_name#27, c_birth_country#28] -Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_customer_sk#25, c_first_name#26, c_last_name#27, c_birth_country#28] +Output [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] (34) Scan parquet default.customer_address -Output [3]: [ca_state#30, ca_zip#31, ca_country#32] +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_country), IsNotNull(ca_zip)] ReadSchema: struct (35) ColumnarToRow [codegen id : 8] -Input [3]: [ca_state#30, ca_zip#31, ca_country#32] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] (36) Filter [codegen id : 8] -Input [3]: [ca_state#30, ca_zip#31, ca_country#32] -Condition : (isnotnull(ca_country#32) AND isnotnull(ca_zip#31)) +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26)) (37) BroadcastExchange -Input [3]: [ca_state#30, ca_zip#31, ca_country#32] -Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [id=#33] +Input [3]: [ca_state#25, ca_zip#26, ca_country#27] +Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), input[1, string, false]),false), [plan_id=6] (38) BroadcastHashJoin [codegen id : 9] -Left keys [2]: [c_birth_country#28, s_zip#16] -Right keys [2]: [upper(ca_country#32), ca_zip#31] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] Join condition: None (39) Project [codegen id : 9] -Output [11]: [ss_net_paid#5, s_store_name#13, s_state#15, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#26, c_last_name#27, ca_state#30] -Input [15]: [ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#26, c_last_name#27, c_birth_country#28, ca_state#30, ca_zip#31, ca_country#32] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] (40) HashAggregate [codegen id : 9] -Input [11]: [ss_net_paid#5, s_store_name#13, s_state#15, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#26, c_last_name#27, ca_state#30] -Keys [10]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum#34] -Results [11]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#35] +Aggregate Attributes [1]: [sum#28] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] (41) Exchange -Input [11]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#35] -Arguments: hashpartitioning(c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=7] (42) HashAggregate [codegen id : 10] -Input [11]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#35] -Keys [10]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#29] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#37] -Results [4]: [c_last_name#27, c_first_name#26, s_store_name#13, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#37,17,2) AS netpaid#38] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] (43) HashAggregate [codegen id : 10] -Input [4]: [c_last_name#27, c_first_name#26, s_store_name#13, netpaid#38] -Keys [3]: [c_last_name#27, c_first_name#26, s_store_name#13] -Functions [1]: [partial_sum(netpaid#38)] -Aggregate Attributes [2]: [sum#39, isEmpty#40] -Results [5]: [c_last_name#27, c_first_name#26, s_store_name#13, sum#41, isEmpty#42] +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, netpaid#31] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [partial_sum(netpaid#31)] +Aggregate Attributes [2]: [sum#32, isEmpty#33] +Results [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] (44) Exchange -Input [5]: [c_last_name#27, c_first_name#26, s_store_name#13, sum#41, isEmpty#42] -Arguments: hashpartitioning(c_last_name#27, c_first_name#26, s_store_name#13, 5), ENSURE_REQUIREMENTS, [id=#43] +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=8] (45) HashAggregate [codegen id : 11] -Input [5]: [c_last_name#27, c_first_name#26, s_store_name#13, sum#41, isEmpty#42] -Keys [3]: [c_last_name#27, c_first_name#26, s_store_name#13] -Functions [1]: [sum(netpaid#38)] -Aggregate Attributes [1]: [sum(netpaid#38)#44] -Results [4]: [c_last_name#27, c_first_name#26, s_store_name#13, sum(netpaid#38)#44 AS paid#45] +Input [5]: [c_last_name#23, c_first_name#22, s_store_name#11, sum#34, isEmpty#35] +Keys [3]: [c_last_name#23, c_first_name#22, s_store_name#11] +Functions [1]: [sum(netpaid#31)] +Aggregate Attributes [1]: [sum(netpaid#31)#36] +Results [4]: [c_last_name#23, c_first_name#22, s_store_name#11, sum(netpaid#31)#36 AS paid#37] (46) Filter [codegen id : 11] -Input [4]: [c_last_name#27, c_first_name#26, s_store_name#13, paid#45] -Condition : (isnotnull(paid#45) AND (cast(paid#45 as decimal(33,8)) > cast(Subquery scalar-subquery#46, [id=#47] as decimal(33,8)))) +Input [4]: [c_last_name#23, c_first_name#22, s_store_name#11, paid#37] +Condition : (isnotnull(paid#37) AND (cast(paid#37 as decimal(33,8)) > cast(Subquery scalar-subquery#38, [id=#39] as decimal(33,8)))) ===== Subqueries ===== -Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquery#46, [id=#47] +Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquery#38, [id=#39] * HashAggregate (73) +- Exchange (72) +- * HashAggregate (71) @@ -300,118 +300,118 @@ Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 (49) ReusedExchange [Reuses operator id: 11] -Output [2]: [sr_item_sk#8, sr_ticket_number#9] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] (50) Sort [codegen id : 4] -Input [2]: [sr_item_sk#8, sr_ticket_number#9] -Arguments: [sr_ticket_number#9 ASC NULLS FIRST, sr_item_sk#8 ASC NULLS FIRST], false, 0 +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 (51) SortMergeJoin [codegen id : 9] Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] -Right keys [2]: [sr_ticket_number#9, sr_item_sk#8] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] Join condition: None (52) Project [codegen id : 9] Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] -Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#8, sr_ticket_number#9] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] (53) ReusedExchange [Reuses operator id: 19] -Output [4]: [s_store_sk#12, s_store_name#13, s_state#15, s_zip#16] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] (54) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#12] +Right keys [1]: [s_store_sk#10] Join condition: None (55) Project [codegen id : 9] -Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16] -Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#12, s_store_name#13, s_state#15, s_zip#16] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] (56) Scan parquet default.item -Output [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (57) ColumnarToRow [codegen id : 6] -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] (58) Filter [codegen id : 6] -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Condition : isnotnull(i_item_sk#18) +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : isnotnull(i_item_sk#15) (59) BroadcastExchange -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] (60) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#18] +Right keys [1]: [i_item_sk#15] Join condition: None (61) Project [codegen id : 9] -Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] (62) ReusedExchange [Reuses operator id: 31] -Output [4]: [c_customer_sk#25, c_first_name#26, c_last_name#27, c_birth_country#28] +Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] (63) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#25] +Right keys [1]: [c_customer_sk#21] Join condition: None (64) Project [codegen id : 9] -Output [12]: [ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#26, c_last_name#27, c_birth_country#28] -Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_customer_sk#25, c_first_name#26, c_last_name#27, c_birth_country#28] +Output [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24] +Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24] (65) ReusedExchange [Reuses operator id: 37] -Output [3]: [ca_state#30, ca_zip#31, ca_country#32] +Output [3]: [ca_state#25, ca_zip#26, ca_country#27] (66) BroadcastHashJoin [codegen id : 9] -Left keys [2]: [c_birth_country#28, s_zip#16] -Right keys [2]: [upper(ca_country#32), ca_zip#31] +Left keys [2]: [c_birth_country#24, s_zip#14] +Right keys [2]: [upper(ca_country#27), ca_zip#26] Join condition: None (67) Project [codegen id : 9] -Output [11]: [ss_net_paid#5, s_store_name#13, s_state#15, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#26, c_last_name#27, ca_state#30] -Input [15]: [ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#26, c_last_name#27, c_birth_country#28, ca_state#30, ca_zip#31, ca_country#32] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Input [15]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24, ca_state#25, ca_zip#26, ca_country#27] (68) HashAggregate [codegen id : 9] -Input [11]: [ss_net_paid#5, s_store_name#13, s_state#15, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#26, c_last_name#27, ca_state#30] -Keys [10]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, ca_state#25] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum#49] -Results [11]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#50] +Aggregate Attributes [1]: [sum#40] +Results [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] (69) Exchange -Input [11]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#50] -Arguments: hashpartitioning(c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, 5), ENSURE_REQUIREMENTS, [id=#51] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] +Arguments: hashpartitioning(c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=10] (70) HashAggregate [codegen id : 10] -Input [11]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#50] -Keys [10]: [c_last_name#27, c_first_name#26, s_store_name#13, ca_state#30, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20] +Input [11]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#41] +Keys [10]: [c_last_name#23, c_first_name#22, s_store_name#11, ca_state#25, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#37] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#37,17,2) AS netpaid#38] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#30] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#30,17,2) AS netpaid#31] (71) HashAggregate [codegen id : 10] -Input [1]: [netpaid#38] +Input [1]: [netpaid#31] Keys: [] -Functions [1]: [partial_avg(netpaid#38)] -Aggregate Attributes [2]: [sum#52, count#53] -Results [2]: [sum#54, count#55] +Functions [1]: [partial_avg(netpaid#31)] +Aggregate Attributes [2]: [sum#42, count#43] +Results [2]: [sum#44, count#45] (72) Exchange -Input [2]: [sum#54, count#55] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#56] +Input [2]: [sum#44, count#45] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11] (73) HashAggregate [codegen id : 11] -Input [2]: [sum#54, count#55] +Input [2]: [sum#44, count#45] Keys: [] -Functions [1]: [avg(netpaid#38)] -Aggregate Attributes [1]: [avg(netpaid#38)#57] -Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#38)#57)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#58] +Functions [1]: [avg(netpaid#31)] +Aggregate Attributes [1]: [avg(netpaid#31)#46] +Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#31)#46)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#47] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt index fc55789fab16a..f30a62f5c1ace 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt @@ -93,7 +93,7 @@ Condition : isnotnull(s_store_sk#9) (10) BroadcastExchange Input [3]: [s_store_sk#9, s_store_id#10, s_store_name#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_store_sk#3] @@ -106,168 +106,168 @@ Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s (13) Exchange Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#13] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (14) Sort [codegen id : 4] Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (15) Scan parquet default.item -Output [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16] +Output [3]: [i_item_sk#12, i_item_id#13, i_item_desc#14] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (16) ColumnarToRow [codegen id : 5] -Input [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16] +Input [3]: [i_item_sk#12, i_item_id#13, i_item_desc#14] (17) Filter [codegen id : 5] -Input [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16] -Condition : isnotnull(i_item_sk#14) +Input [3]: [i_item_sk#12, i_item_id#13, i_item_desc#14] +Condition : isnotnull(i_item_sk#12) (18) Exchange -Input [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16] -Arguments: hashpartitioning(i_item_sk#14, 5), ENSURE_REQUIREMENTS, [id=#17] +Input [3]: [i_item_sk#12, i_item_id#13, i_item_desc#14] +Arguments: hashpartitioning(i_item_sk#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) Sort [codegen id : 6] -Input [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16] -Arguments: [i_item_sk#14 ASC NULLS FIRST], false, 0 +Input [3]: [i_item_sk#12, i_item_id#13, i_item_desc#14] +Arguments: [i_item_sk#12 ASC NULLS FIRST], false, 0 (20) SortMergeJoin [codegen id : 7] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#14] +Right keys [1]: [i_item_sk#12] Join condition: None (21) Project [codegen id : 7] -Output [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16] -Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_sk#14, i_item_id#15, i_item_desc#16] +Output [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_sk#12, i_item_id#13, i_item_desc#14] (22) Exchange -Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16] -Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14] +Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [plan_id=4] (23) Sort [codegen id : 8] -Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14] Arguments: [ss_customer_sk#2 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0 (24) Scan parquet default.store_returns -Output [5]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_net_loss#22, sr_returned_date_sk#23] +Output [5]: [sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_net_loss#18, sr_returned_date_sk#19] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(sr_returned_date_sk#23), dynamicpruningexpression(sr_returned_date_sk#23 IN dynamicpruning#24)] +PartitionFilters: [isnotnull(sr_returned_date_sk#19), dynamicpruningexpression(sr_returned_date_sk#19 IN dynamicpruning#20)] PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] ReadSchema: struct (25) ColumnarToRow [codegen id : 10] -Input [5]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_net_loss#22, sr_returned_date_sk#23] +Input [5]: [sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_net_loss#18, sr_returned_date_sk#19] (26) Filter [codegen id : 10] -Input [5]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_net_loss#22, sr_returned_date_sk#23] -Condition : ((isnotnull(sr_customer_sk#20) AND isnotnull(sr_item_sk#19)) AND isnotnull(sr_ticket_number#21)) +Input [5]: [sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_net_loss#18, sr_returned_date_sk#19] +Condition : ((isnotnull(sr_customer_sk#16) AND isnotnull(sr_item_sk#15)) AND isnotnull(sr_ticket_number#17)) (27) ReusedExchange [Reuses operator id: 59] -Output [1]: [d_date_sk#25] +Output [1]: [d_date_sk#21] (28) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [sr_returned_date_sk#23] -Right keys [1]: [d_date_sk#25] +Left keys [1]: [sr_returned_date_sk#19] +Right keys [1]: [d_date_sk#21] Join condition: None (29) Project [codegen id : 10] -Output [4]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_net_loss#22] -Input [6]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_net_loss#22, sr_returned_date_sk#23, d_date_sk#25] +Output [4]: [sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_net_loss#18] +Input [6]: [sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_net_loss#18, sr_returned_date_sk#19, d_date_sk#21] (30) Exchange -Input [4]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_net_loss#22] -Arguments: hashpartitioning(sr_customer_sk#20, sr_item_sk#19, sr_ticket_number#21, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [4]: [sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_net_loss#18] +Arguments: hashpartitioning(sr_customer_sk#16, sr_item_sk#15, sr_ticket_number#17, 5), ENSURE_REQUIREMENTS, [plan_id=5] (31) Sort [codegen id : 11] -Input [4]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_net_loss#22] -Arguments: [sr_customer_sk#20 ASC NULLS FIRST, sr_item_sk#19 ASC NULLS FIRST, sr_ticket_number#21 ASC NULLS FIRST], false, 0 +Input [4]: [sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_net_loss#18] +Arguments: [sr_customer_sk#16 ASC NULLS FIRST, sr_item_sk#15 ASC NULLS FIRST, sr_ticket_number#17 ASC NULLS FIRST], false, 0 (32) SortMergeJoin [codegen id : 12] Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4] -Right keys [3]: [sr_customer_sk#20, sr_item_sk#19, sr_ticket_number#21] +Right keys [3]: [sr_customer_sk#16, sr_item_sk#15, sr_ticket_number#17] Join condition: None (33) Project [codegen id : 12] -Output [8]: [ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_net_loss#22] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_net_loss#22] +Output [8]: [ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14, sr_item_sk#15, sr_customer_sk#16, sr_net_loss#18] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14, sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_net_loss#18] (34) Exchange -Input [8]: [ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_net_loss#22] -Arguments: hashpartitioning(sr_customer_sk#20, sr_item_sk#19, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [8]: [ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14, sr_item_sk#15, sr_customer_sk#16, sr_net_loss#18] +Arguments: hashpartitioning(sr_customer_sk#16, sr_item_sk#15, 5), ENSURE_REQUIREMENTS, [plan_id=6] (35) Sort [codegen id : 13] -Input [8]: [ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_net_loss#22] -Arguments: [sr_customer_sk#20 ASC NULLS FIRST, sr_item_sk#19 ASC NULLS FIRST], false, 0 +Input [8]: [ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14, sr_item_sk#15, sr_customer_sk#16, sr_net_loss#18] +Arguments: [sr_customer_sk#16 ASC NULLS FIRST, sr_item_sk#15 ASC NULLS FIRST], false, 0 (36) Scan parquet default.catalog_sales -Output [4]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_net_profit#30, cs_sold_date_sk#31] +Output [4]: [cs_bill_customer_sk#22, cs_item_sk#23, cs_net_profit#24, cs_sold_date_sk#25] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#31), dynamicpruningexpression(cs_sold_date_sk#31 IN dynamicpruning#24)] +PartitionFilters: [isnotnull(cs_sold_date_sk#25), dynamicpruningexpression(cs_sold_date_sk#25 IN dynamicpruning#20)] PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] ReadSchema: struct (37) ColumnarToRow [codegen id : 15] -Input [4]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_net_profit#30, cs_sold_date_sk#31] +Input [4]: [cs_bill_customer_sk#22, cs_item_sk#23, cs_net_profit#24, cs_sold_date_sk#25] (38) Filter [codegen id : 15] -Input [4]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_net_profit#30, cs_sold_date_sk#31] -Condition : (isnotnull(cs_bill_customer_sk#28) AND isnotnull(cs_item_sk#29)) +Input [4]: [cs_bill_customer_sk#22, cs_item_sk#23, cs_net_profit#24, cs_sold_date_sk#25] +Condition : (isnotnull(cs_bill_customer_sk#22) AND isnotnull(cs_item_sk#23)) (39) ReusedExchange [Reuses operator id: 59] -Output [1]: [d_date_sk#32] +Output [1]: [d_date_sk#26] (40) BroadcastHashJoin [codegen id : 15] -Left keys [1]: [cs_sold_date_sk#31] -Right keys [1]: [d_date_sk#32] +Left keys [1]: [cs_sold_date_sk#25] +Right keys [1]: [d_date_sk#26] Join condition: None (41) Project [codegen id : 15] -Output [3]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_net_profit#30] -Input [5]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_net_profit#30, cs_sold_date_sk#31, d_date_sk#32] +Output [3]: [cs_bill_customer_sk#22, cs_item_sk#23, cs_net_profit#24] +Input [5]: [cs_bill_customer_sk#22, cs_item_sk#23, cs_net_profit#24, cs_sold_date_sk#25, d_date_sk#26] (42) Exchange -Input [3]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_net_profit#30] -Arguments: hashpartitioning(cs_bill_customer_sk#28, cs_item_sk#29, 5), ENSURE_REQUIREMENTS, [id=#33] +Input [3]: [cs_bill_customer_sk#22, cs_item_sk#23, cs_net_profit#24] +Arguments: hashpartitioning(cs_bill_customer_sk#22, cs_item_sk#23, 5), ENSURE_REQUIREMENTS, [plan_id=7] (43) Sort [codegen id : 16] -Input [3]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_net_profit#30] -Arguments: [cs_bill_customer_sk#28 ASC NULLS FIRST, cs_item_sk#29 ASC NULLS FIRST], false, 0 +Input [3]: [cs_bill_customer_sk#22, cs_item_sk#23, cs_net_profit#24] +Arguments: [cs_bill_customer_sk#22 ASC NULLS FIRST, cs_item_sk#23 ASC NULLS FIRST], false, 0 (44) SortMergeJoin [codegen id : 17] -Left keys [2]: [sr_customer_sk#20, sr_item_sk#19] -Right keys [2]: [cs_bill_customer_sk#28, cs_item_sk#29] +Left keys [2]: [sr_customer_sk#16, sr_item_sk#15] +Right keys [2]: [cs_bill_customer_sk#22, cs_item_sk#23] Join condition: None (45) Project [codegen id : 17] -Output [7]: [ss_net_profit#5, sr_net_loss#22, cs_net_profit#30, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16] -Input [11]: [ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_net_loss#22, cs_bill_customer_sk#28, cs_item_sk#29, cs_net_profit#30] +Output [7]: [ss_net_profit#5, sr_net_loss#18, cs_net_profit#24, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14] +Input [11]: [ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14, sr_item_sk#15, sr_customer_sk#16, sr_net_loss#18, cs_bill_customer_sk#22, cs_item_sk#23, cs_net_profit#24] (46) HashAggregate [codegen id : 17] -Input [7]: [ss_net_profit#5, sr_net_loss#22, cs_net_profit#30, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16] -Keys [4]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11] -Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#5)), partial_sum(UnscaledValue(sr_net_loss#22)), partial_sum(UnscaledValue(cs_net_profit#30))] -Aggregate Attributes [3]: [sum#34, sum#35, sum#36] -Results [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#37, sum#38, sum#39] +Input [7]: [ss_net_profit#5, sr_net_loss#18, cs_net_profit#24, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14] +Keys [4]: [i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11] +Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#5)), partial_sum(UnscaledValue(sr_net_loss#18)), partial_sum(UnscaledValue(cs_net_profit#24))] +Aggregate Attributes [3]: [sum#27, sum#28, sum#29] +Results [7]: [i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11, sum#30, sum#31, sum#32] (47) Exchange -Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#37, sum#38, sum#39] -Arguments: hashpartitioning(i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, 5), ENSURE_REQUIREMENTS, [id=#40] +Input [7]: [i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11, sum#30, sum#31, sum#32] +Arguments: hashpartitioning(i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=8] (48) HashAggregate [codegen id : 18] -Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#37, sum#38, sum#39] -Keys [4]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11] -Functions [3]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(sr_net_loss#22)), sum(UnscaledValue(cs_net_profit#30))] -Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#5))#41, sum(UnscaledValue(sr_net_loss#22))#42, sum(UnscaledValue(cs_net_profit#30))#43] -Results [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#41,17,2) AS store_sales_profit#44, MakeDecimal(sum(UnscaledValue(sr_net_loss#22))#42,17,2) AS store_returns_loss#45, MakeDecimal(sum(UnscaledValue(cs_net_profit#30))#43,17,2) AS catalog_sales_profit#46] +Input [7]: [i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11, sum#30, sum#31, sum#32] +Keys [4]: [i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11] +Functions [3]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(sr_net_loss#18)), sum(UnscaledValue(cs_net_profit#24))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#5))#33, sum(UnscaledValue(sr_net_loss#18))#34, sum(UnscaledValue(cs_net_profit#24))#35] +Results [7]: [i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#33,17,2) AS store_sales_profit#36, MakeDecimal(sum(UnscaledValue(sr_net_loss#18))#34,17,2) AS store_returns_loss#37, MakeDecimal(sum(UnscaledValue(cs_net_profit#24))#35,17,2) AS catalog_sales_profit#38] (49) TakeOrderedAndProject -Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, store_sales_profit#44, store_returns_loss#45, catalog_sales_profit#46] -Arguments: 100, [i_item_id#15 ASC NULLS FIRST, i_item_desc#16 ASC NULLS FIRST, s_store_id#10 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST], [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, store_sales_profit#44, store_returns_loss#45, catalog_sales_profit#46] +Input [7]: [i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11, store_sales_profit#36, store_returns_loss#37, catalog_sales_profit#38] +Arguments: 100, [i_item_id#13 ASC NULLS FIRST, i_item_desc#14 ASC NULLS FIRST, s_store_id#10 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST], [i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11, store_sales_profit#36, store_returns_loss#37, catalog_sales_profit#38] ===== Subqueries ===== @@ -280,28 +280,28 @@ BroadcastExchange (54) (50) Scan parquet default.date_dim -Output [3]: [d_date_sk#8, d_year#47, d_moy#48] +Output [3]: [d_date_sk#8, d_year#39, d_moy#40] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,4), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (51) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#47, d_moy#48] +Input [3]: [d_date_sk#8, d_year#39, d_moy#40] (52) Filter [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#47, d_moy#48] -Condition : ((((isnotnull(d_moy#48) AND isnotnull(d_year#47)) AND (d_moy#48 = 4)) AND (d_year#47 = 2001)) AND isnotnull(d_date_sk#8)) +Input [3]: [d_date_sk#8, d_year#39, d_moy#40] +Condition : ((((isnotnull(d_moy#40) AND isnotnull(d_year#39)) AND (d_moy#40 = 4)) AND (d_year#39 = 2001)) AND isnotnull(d_date_sk#8)) (53) Project [codegen id : 1] Output [1]: [d_date_sk#8] -Input [3]: [d_date_sk#8, d_year#47, d_moy#48] +Input [3]: [d_date_sk#8, d_year#39, d_moy#40] (54) BroadcastExchange Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#49] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] -Subquery:2 Hosting operator id = 24 Hosting Expression = sr_returned_date_sk#23 IN dynamicpruning#24 +Subquery:2 Hosting operator id = 24 Hosting Expression = sr_returned_date_sk#19 IN dynamicpruning#20 BroadcastExchange (59) +- * Project (58) +- * Filter (57) @@ -310,27 +310,27 @@ BroadcastExchange (59) (55) Scan parquet default.date_dim -Output [3]: [d_date_sk#25, d_year#50, d_moy#51] +Output [3]: [d_date_sk#21, d_year#41, d_moy#42] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (56) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#25, d_year#50, d_moy#51] +Input [3]: [d_date_sk#21, d_year#41, d_moy#42] (57) Filter [codegen id : 1] -Input [3]: [d_date_sk#25, d_year#50, d_moy#51] -Condition : (((((isnotnull(d_moy#51) AND isnotnull(d_year#50)) AND (d_moy#51 >= 4)) AND (d_moy#51 <= 10)) AND (d_year#50 = 2001)) AND isnotnull(d_date_sk#25)) +Input [3]: [d_date_sk#21, d_year#41, d_moy#42] +Condition : (((((isnotnull(d_moy#42) AND isnotnull(d_year#41)) AND (d_moy#42 >= 4)) AND (d_moy#42 <= 10)) AND (d_year#41 = 2001)) AND isnotnull(d_date_sk#21)) (58) Project [codegen id : 1] -Output [1]: [d_date_sk#25] -Input [3]: [d_date_sk#25, d_year#50, d_moy#51] +Output [1]: [d_date_sk#21] +Input [3]: [d_date_sk#21, d_year#41, d_moy#42] (59) BroadcastExchange -Input [1]: [d_date_sk#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52] +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] -Subquery:3 Hosting operator id = 36 Hosting Expression = cs_sold_date_sk#31 IN dynamicpruning#24 +Subquery:3 Hosting operator id = 36 Hosting Expression = cs_sold_date_sk#25 IN dynamicpruning#20 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt index b9d08511b6dce..c97bce93b3140 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt @@ -73,7 +73,7 @@ Condition : ((isnotnull(sr_customer_sk#9) AND isnotnull(sr_item_sk#8)) AND isnot (7) BroadcastExchange Input [5]: [sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11, sr_returned_date_sk#12] -Arguments: HashedRelationBroadcastMode(List(input[1, int, false], input[0, int, false], input[2, int, false]),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(input[1, int, false], input[0, int, false], input[2, int, false]),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 8] Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4] @@ -85,144 +85,144 @@ Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11, sr_returned_date_sk#12] (10) Scan parquet default.catalog_sales -Output [4]: [cs_bill_customer_sk#15, cs_item_sk#16, cs_net_profit#17, cs_sold_date_sk#18] +Output [4]: [cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16, cs_sold_date_sk#17] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#18), dynamicpruningexpression(cs_sold_date_sk#18 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(cs_sold_date_sk#17), dynamicpruningexpression(cs_sold_date_sk#17 IN dynamicpruning#13)] PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [4]: [cs_bill_customer_sk#15, cs_item_sk#16, cs_net_profit#17, cs_sold_date_sk#18] +Input [4]: [cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16, cs_sold_date_sk#17] (12) Filter [codegen id : 2] -Input [4]: [cs_bill_customer_sk#15, cs_item_sk#16, cs_net_profit#17, cs_sold_date_sk#18] -Condition : (isnotnull(cs_bill_customer_sk#15) AND isnotnull(cs_item_sk#16)) +Input [4]: [cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16, cs_sold_date_sk#17] +Condition : (isnotnull(cs_bill_customer_sk#14) AND isnotnull(cs_item_sk#15)) (13) BroadcastExchange -Input [4]: [cs_bill_customer_sk#15, cs_item_sk#16, cs_net_profit#17, cs_sold_date_sk#18] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [id=#19] +Input [4]: [cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16, cs_sold_date_sk#17] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 8] Left keys [2]: [sr_customer_sk#9, sr_item_sk#8] -Right keys [2]: [cs_bill_customer_sk#15, cs_item_sk#16] +Right keys [2]: [cs_bill_customer_sk#14, cs_item_sk#15] Join condition: None (15) Project [codegen id : 8] -Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#11, sr_returned_date_sk#12, cs_net_profit#17, cs_sold_date_sk#18] -Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#8, sr_customer_sk#9, sr_net_loss#11, sr_returned_date_sk#12, cs_bill_customer_sk#15, cs_item_sk#16, cs_net_profit#17, cs_sold_date_sk#18] +Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#11, sr_returned_date_sk#12, cs_net_profit#16, cs_sold_date_sk#17] +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#8, sr_customer_sk#9, sr_net_loss#11, sr_returned_date_sk#12, cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16, cs_sold_date_sk#17] (16) ReusedExchange [Reuses operator id: 45] -Output [1]: [d_date_sk#20] +Output [1]: [d_date_sk#18] (17) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_sold_date_sk#6] -Right keys [1]: [d_date_sk#20] +Right keys [1]: [d_date_sk#18] Join condition: None (18) Project [codegen id : 8] -Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#11, sr_returned_date_sk#12, cs_net_profit#17, cs_sold_date_sk#18] -Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#11, sr_returned_date_sk#12, cs_net_profit#17, cs_sold_date_sk#18, d_date_sk#20] +Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#11, sr_returned_date_sk#12, cs_net_profit#16, cs_sold_date_sk#17] +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_net_loss#11, sr_returned_date_sk#12, cs_net_profit#16, cs_sold_date_sk#17, d_date_sk#18] (19) ReusedExchange [Reuses operator id: 50] -Output [1]: [d_date_sk#21] +Output [1]: [d_date_sk#19] (20) BroadcastHashJoin [codegen id : 8] Left keys [1]: [sr_returned_date_sk#12] -Right keys [1]: [d_date_sk#21] +Right keys [1]: [d_date_sk#19] Join condition: None (21) Project [codegen id : 8] -Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#11, cs_net_profit#17, cs_sold_date_sk#18] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#11, sr_returned_date_sk#12, cs_net_profit#17, cs_sold_date_sk#18, d_date_sk#21] +Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#11, cs_net_profit#16, cs_sold_date_sk#17] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#11, sr_returned_date_sk#12, cs_net_profit#16, cs_sold_date_sk#17, d_date_sk#19] (22) ReusedExchange [Reuses operator id: 50] -Output [1]: [d_date_sk#22] +Output [1]: [d_date_sk#20] (23) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_sold_date_sk#18] -Right keys [1]: [d_date_sk#22] +Left keys [1]: [cs_sold_date_sk#17] +Right keys [1]: [d_date_sk#20] Join condition: None (24) Project [codegen id : 8] -Output [5]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#11, cs_net_profit#17] -Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#11, cs_net_profit#17, cs_sold_date_sk#18, d_date_sk#22] +Output [5]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#11, cs_net_profit#16] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#11, cs_net_profit#16, cs_sold_date_sk#17, d_date_sk#20] (25) Scan parquet default.store -Output [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Output [3]: [s_store_sk#21, s_store_id#22, s_store_name#23] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 6] -Input [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] +Input [3]: [s_store_sk#21, s_store_id#22, s_store_name#23] (27) Filter [codegen id : 6] -Input [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] -Condition : isnotnull(s_store_sk#23) +Input [3]: [s_store_sk#21, s_store_id#22, s_store_name#23] +Condition : isnotnull(s_store_sk#21) (28) BroadcastExchange -Input [3]: [s_store_sk#23, s_store_id#24, s_store_name#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] +Input [3]: [s_store_sk#21, s_store_id#22, s_store_name#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (29) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#23] +Right keys [1]: [s_store_sk#21] Join condition: None (30) Project [codegen id : 8] -Output [6]: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#11, cs_net_profit#17, s_store_id#24, s_store_name#25] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#11, cs_net_profit#17, s_store_sk#23, s_store_id#24, s_store_name#25] +Output [6]: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#11, cs_net_profit#16, s_store_id#22, s_store_name#23] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#11, cs_net_profit#16, s_store_sk#21, s_store_id#22, s_store_name#23] (31) Scan parquet default.item -Output [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] +Output [3]: [i_item_sk#24, i_item_id#25, i_item_desc#26] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 7] -Input [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] +Input [3]: [i_item_sk#24, i_item_id#25, i_item_desc#26] (33) Filter [codegen id : 7] -Input [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] -Condition : isnotnull(i_item_sk#27) +Input [3]: [i_item_sk#24, i_item_id#25, i_item_desc#26] +Condition : isnotnull(i_item_sk#24) (34) BroadcastExchange -Input [3]: [i_item_sk#27, i_item_id#28, i_item_desc#29] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#30] +Input [3]: [i_item_sk#24, i_item_id#25, i_item_desc#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (35) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#27] +Right keys [1]: [i_item_sk#24] Join condition: None (36) Project [codegen id : 8] -Output [7]: [ss_net_profit#5, sr_net_loss#11, cs_net_profit#17, s_store_id#24, s_store_name#25, i_item_id#28, i_item_desc#29] -Input [9]: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#11, cs_net_profit#17, s_store_id#24, s_store_name#25, i_item_sk#27, i_item_id#28, i_item_desc#29] +Output [7]: [ss_net_profit#5, sr_net_loss#11, cs_net_profit#16, s_store_id#22, s_store_name#23, i_item_id#25, i_item_desc#26] +Input [9]: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#11, cs_net_profit#16, s_store_id#22, s_store_name#23, i_item_sk#24, i_item_id#25, i_item_desc#26] (37) HashAggregate [codegen id : 8] -Input [7]: [ss_net_profit#5, sr_net_loss#11, cs_net_profit#17, s_store_id#24, s_store_name#25, i_item_id#28, i_item_desc#29] -Keys [4]: [i_item_id#28, i_item_desc#29, s_store_id#24, s_store_name#25] -Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#5)), partial_sum(UnscaledValue(sr_net_loss#11)), partial_sum(UnscaledValue(cs_net_profit#17))] -Aggregate Attributes [3]: [sum#31, sum#32, sum#33] -Results [7]: [i_item_id#28, i_item_desc#29, s_store_id#24, s_store_name#25, sum#34, sum#35, sum#36] +Input [7]: [ss_net_profit#5, sr_net_loss#11, cs_net_profit#16, s_store_id#22, s_store_name#23, i_item_id#25, i_item_desc#26] +Keys [4]: [i_item_id#25, i_item_desc#26, s_store_id#22, s_store_name#23] +Functions [3]: [partial_sum(UnscaledValue(ss_net_profit#5)), partial_sum(UnscaledValue(sr_net_loss#11)), partial_sum(UnscaledValue(cs_net_profit#16))] +Aggregate Attributes [3]: [sum#27, sum#28, sum#29] +Results [7]: [i_item_id#25, i_item_desc#26, s_store_id#22, s_store_name#23, sum#30, sum#31, sum#32] (38) Exchange -Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#24, s_store_name#25, sum#34, sum#35, sum#36] -Arguments: hashpartitioning(i_item_id#28, i_item_desc#29, s_store_id#24, s_store_name#25, 5), ENSURE_REQUIREMENTS, [id=#37] +Input [7]: [i_item_id#25, i_item_desc#26, s_store_id#22, s_store_name#23, sum#30, sum#31, sum#32] +Arguments: hashpartitioning(i_item_id#25, i_item_desc#26, s_store_id#22, s_store_name#23, 5), ENSURE_REQUIREMENTS, [plan_id=5] (39) HashAggregate [codegen id : 9] -Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#24, s_store_name#25, sum#34, sum#35, sum#36] -Keys [4]: [i_item_id#28, i_item_desc#29, s_store_id#24, s_store_name#25] -Functions [3]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(sr_net_loss#11)), sum(UnscaledValue(cs_net_profit#17))] -Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#5))#38, sum(UnscaledValue(sr_net_loss#11))#39, sum(UnscaledValue(cs_net_profit#17))#40] -Results [7]: [i_item_id#28, i_item_desc#29, s_store_id#24, s_store_name#25, MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#38,17,2) AS store_sales_profit#41, MakeDecimal(sum(UnscaledValue(sr_net_loss#11))#39,17,2) AS store_returns_loss#42, MakeDecimal(sum(UnscaledValue(cs_net_profit#17))#40,17,2) AS catalog_sales_profit#43] +Input [7]: [i_item_id#25, i_item_desc#26, s_store_id#22, s_store_name#23, sum#30, sum#31, sum#32] +Keys [4]: [i_item_id#25, i_item_desc#26, s_store_id#22, s_store_name#23] +Functions [3]: [sum(UnscaledValue(ss_net_profit#5)), sum(UnscaledValue(sr_net_loss#11)), sum(UnscaledValue(cs_net_profit#16))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_net_profit#5))#33, sum(UnscaledValue(sr_net_loss#11))#34, sum(UnscaledValue(cs_net_profit#16))#35] +Results [7]: [i_item_id#25, i_item_desc#26, s_store_id#22, s_store_name#23, MakeDecimal(sum(UnscaledValue(ss_net_profit#5))#33,17,2) AS store_sales_profit#36, MakeDecimal(sum(UnscaledValue(sr_net_loss#11))#34,17,2) AS store_returns_loss#37, MakeDecimal(sum(UnscaledValue(cs_net_profit#16))#35,17,2) AS catalog_sales_profit#38] (40) TakeOrderedAndProject -Input [7]: [i_item_id#28, i_item_desc#29, s_store_id#24, s_store_name#25, store_sales_profit#41, store_returns_loss#42, catalog_sales_profit#43] -Arguments: 100, [i_item_id#28 ASC NULLS FIRST, i_item_desc#29 ASC NULLS FIRST, s_store_id#24 ASC NULLS FIRST, s_store_name#25 ASC NULLS FIRST], [i_item_id#28, i_item_desc#29, s_store_id#24, s_store_name#25, store_sales_profit#41, store_returns_loss#42, catalog_sales_profit#43] +Input [7]: [i_item_id#25, i_item_desc#26, s_store_id#22, s_store_name#23, store_sales_profit#36, store_returns_loss#37, catalog_sales_profit#38] +Arguments: 100, [i_item_id#25 ASC NULLS FIRST, i_item_desc#26 ASC NULLS FIRST, s_store_id#22 ASC NULLS FIRST, s_store_name#23 ASC NULLS FIRST], [i_item_id#25, i_item_desc#26, s_store_id#22, s_store_name#23, store_sales_profit#36, store_returns_loss#37, catalog_sales_profit#38] ===== Subqueries ===== @@ -235,26 +235,26 @@ BroadcastExchange (45) (41) Scan parquet default.date_dim -Output [3]: [d_date_sk#20, d_year#44, d_moy#45] +Output [3]: [d_date_sk#18, d_year#39, d_moy#40] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,4), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (42) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#20, d_year#44, d_moy#45] +Input [3]: [d_date_sk#18, d_year#39, d_moy#40] (43) Filter [codegen id : 1] -Input [3]: [d_date_sk#20, d_year#44, d_moy#45] -Condition : ((((isnotnull(d_moy#45) AND isnotnull(d_year#44)) AND (d_moy#45 = 4)) AND (d_year#44 = 2001)) AND isnotnull(d_date_sk#20)) +Input [3]: [d_date_sk#18, d_year#39, d_moy#40] +Condition : ((((isnotnull(d_moy#40) AND isnotnull(d_year#39)) AND (d_moy#40 = 4)) AND (d_year#39 = 2001)) AND isnotnull(d_date_sk#18)) (44) Project [codegen id : 1] -Output [1]: [d_date_sk#20] -Input [3]: [d_date_sk#20, d_year#44, d_moy#45] +Output [1]: [d_date_sk#18] +Input [3]: [d_date_sk#18, d_year#39, d_moy#40] (45) BroadcastExchange -Input [1]: [d_date_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#46] +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] Subquery:2 Hosting operator id = 4 Hosting Expression = sr_returned_date_sk#12 IN dynamicpruning#13 BroadcastExchange (50) @@ -265,27 +265,27 @@ BroadcastExchange (50) (46) Scan parquet default.date_dim -Output [3]: [d_date_sk#21, d_year#47, d_moy#48] +Output [3]: [d_date_sk#19, d_year#41, d_moy#42] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,10), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (47) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#21, d_year#47, d_moy#48] +Input [3]: [d_date_sk#19, d_year#41, d_moy#42] (48) Filter [codegen id : 1] -Input [3]: [d_date_sk#21, d_year#47, d_moy#48] -Condition : (((((isnotnull(d_moy#48) AND isnotnull(d_year#47)) AND (d_moy#48 >= 4)) AND (d_moy#48 <= 10)) AND (d_year#47 = 2001)) AND isnotnull(d_date_sk#21)) +Input [3]: [d_date_sk#19, d_year#41, d_moy#42] +Condition : (((((isnotnull(d_moy#42) AND isnotnull(d_year#41)) AND (d_moy#42 >= 4)) AND (d_moy#42 <= 10)) AND (d_year#41 = 2001)) AND isnotnull(d_date_sk#19)) (49) Project [codegen id : 1] -Output [1]: [d_date_sk#21] -Input [3]: [d_date_sk#21, d_year#47, d_moy#48] +Output [1]: [d_date_sk#19] +Input [3]: [d_date_sk#19, d_year#41, d_moy#42] (50) BroadcastExchange -Input [1]: [d_date_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#49] +Input [1]: [d_date_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] -Subquery:3 Hosting operator id = 10 Hosting Expression = cs_sold_date_sk#18 IN dynamicpruning#13 +Subquery:3 Hosting operator id = 10 Hosting Expression = cs_sold_date_sk#17 IN dynamicpruning#13 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/explain.txt index 2a9a6d00ba3c8..794c0071290d7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/explain.txt @@ -66,7 +66,7 @@ Input [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_stat (8) BroadcastExchange Input [1]: [cd_demo_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_bill_cdemo_sk#1] @@ -78,96 +78,96 @@ Output [7]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sal Input [9]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, cd_demo_sk#10] (11) Scan parquet default.promotion -Output [3]: [p_promo_sk#15, p_channel_email#16, p_channel_event#17] +Output [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16] Batched: true Location [not included in comparison]/{warehouse_dir}/promotion] PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [3]: [p_promo_sk#15, p_channel_email#16, p_channel_event#17] +Input [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16] (13) Filter [codegen id : 2] -Input [3]: [p_promo_sk#15, p_channel_email#16, p_channel_event#17] -Condition : (((p_channel_email#16 = N) OR (p_channel_event#17 = N)) AND isnotnull(p_promo_sk#15)) +Input [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16] +Condition : (((p_channel_email#15 = N) OR (p_channel_event#16 = N)) AND isnotnull(p_promo_sk#14)) (14) Project [codegen id : 2] -Output [1]: [p_promo_sk#15] -Input [3]: [p_promo_sk#15, p_channel_email#16, p_channel_event#17] +Output [1]: [p_promo_sk#14] +Input [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16] (15) BroadcastExchange -Input [1]: [p_promo_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [1]: [p_promo_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_promo_sk#3] -Right keys [1]: [p_promo_sk#15] +Right keys [1]: [p_promo_sk#14] Join condition: None (17) Project [codegen id : 5] Output [6]: [cs_item_sk#2, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8] -Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, p_promo_sk#15] +Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, p_promo_sk#14] (18) ReusedExchange [Reuses operator id: 35] -Output [1]: [d_date_sk#19] +Output [1]: [d_date_sk#17] (19) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_sold_date_sk#8] -Right keys [1]: [d_date_sk#19] +Right keys [1]: [d_date_sk#17] Join condition: None (20) Project [codegen id : 5] Output [5]: [cs_item_sk#2, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7] -Input [7]: [cs_item_sk#2, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, d_date_sk#19] +Input [7]: [cs_item_sk#2, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, d_date_sk#17] (21) Scan parquet default.item -Output [2]: [i_item_sk#20, i_item_id#21] +Output [2]: [i_item_sk#18, i_item_id#19] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (22) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#20, i_item_id#21] +Input [2]: [i_item_sk#18, i_item_id#19] (23) Filter [codegen id : 4] -Input [2]: [i_item_sk#20, i_item_id#21] -Condition : isnotnull(i_item_sk#20) +Input [2]: [i_item_sk#18, i_item_id#19] +Condition : isnotnull(i_item_sk#18) (24) BroadcastExchange -Input [2]: [i_item_sk#20, i_item_id#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] +Input [2]: [i_item_sk#18, i_item_id#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (25) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_item_sk#2] -Right keys [1]: [i_item_sk#20] +Right keys [1]: [i_item_sk#18] Join condition: None (26) Project [codegen id : 5] -Output [5]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#21] -Input [7]: [cs_item_sk#2, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_sk#20, i_item_id#21] +Output [5]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#19] +Input [7]: [cs_item_sk#2, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_sk#18, i_item_id#19] (27) HashAggregate [codegen id : 5] -Input [5]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#21] -Keys [1]: [i_item_id#21] +Input [5]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#19] +Keys [1]: [i_item_id#19] Functions [4]: [partial_avg(cs_quantity#4), partial_avg(UnscaledValue(cs_list_price#5)), partial_avg(UnscaledValue(cs_coupon_amt#7)), partial_avg(UnscaledValue(cs_sales_price#6))] -Aggregate Attributes [8]: [sum#23, count#24, sum#25, count#26, sum#27, count#28, sum#29, count#30] -Results [9]: [i_item_id#21, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Aggregate Attributes [8]: [sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Results [9]: [i_item_id#19, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] (28) Exchange -Input [9]: [i_item_id#21, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] -Arguments: hashpartitioning(i_item_id#21, 5), ENSURE_REQUIREMENTS, [id=#39] +Input [9]: [i_item_id#19, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Arguments: hashpartitioning(i_item_id#19, 5), ENSURE_REQUIREMENTS, [plan_id=4] (29) HashAggregate [codegen id : 6] -Input [9]: [i_item_id#21, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] -Keys [1]: [i_item_id#21] +Input [9]: [i_item_id#19, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Keys [1]: [i_item_id#19] Functions [4]: [avg(cs_quantity#4), avg(UnscaledValue(cs_list_price#5)), avg(UnscaledValue(cs_coupon_amt#7)), avg(UnscaledValue(cs_sales_price#6))] -Aggregate Attributes [4]: [avg(cs_quantity#4)#40, avg(UnscaledValue(cs_list_price#5))#41, avg(UnscaledValue(cs_coupon_amt#7))#42, avg(UnscaledValue(cs_sales_price#6))#43] -Results [5]: [i_item_id#21, avg(cs_quantity#4)#40 AS agg1#44, cast((avg(UnscaledValue(cs_list_price#5))#41 / 100.0) as decimal(11,6)) AS agg2#45, cast((avg(UnscaledValue(cs_coupon_amt#7))#42 / 100.0) as decimal(11,6)) AS agg3#46, cast((avg(UnscaledValue(cs_sales_price#6))#43 / 100.0) as decimal(11,6)) AS agg4#47] +Aggregate Attributes [4]: [avg(cs_quantity#4)#36, avg(UnscaledValue(cs_list_price#5))#37, avg(UnscaledValue(cs_coupon_amt#7))#38, avg(UnscaledValue(cs_sales_price#6))#39] +Results [5]: [i_item_id#19, avg(cs_quantity#4)#36 AS agg1#40, cast((avg(UnscaledValue(cs_list_price#5))#37 / 100.0) as decimal(11,6)) AS agg2#41, cast((avg(UnscaledValue(cs_coupon_amt#7))#38 / 100.0) as decimal(11,6)) AS agg3#42, cast((avg(UnscaledValue(cs_sales_price#6))#39 / 100.0) as decimal(11,6)) AS agg4#43] (30) TakeOrderedAndProject -Input [5]: [i_item_id#21, agg1#44, agg2#45, agg3#46, agg4#47] -Arguments: 100, [i_item_id#21 ASC NULLS FIRST], [i_item_id#21, agg1#44, agg2#45, agg3#46, agg4#47] +Input [5]: [i_item_id#19, agg1#40, agg2#41, agg3#42, agg4#43] +Arguments: 100, [i_item_id#19 ASC NULLS FIRST], [i_item_id#19, agg1#40, agg2#41, agg3#42, agg4#43] ===== Subqueries ===== @@ -180,25 +180,25 @@ BroadcastExchange (35) (31) Scan parquet default.date_dim -Output [2]: [d_date_sk#19, d_year#48] +Output [2]: [d_date_sk#17, d_year#44] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#19, d_year#48] +Input [2]: [d_date_sk#17, d_year#44] (33) Filter [codegen id : 1] -Input [2]: [d_date_sk#19, d_year#48] -Condition : ((isnotnull(d_year#48) AND (d_year#48 = 2000)) AND isnotnull(d_date_sk#19)) +Input [2]: [d_date_sk#17, d_year#44] +Condition : ((isnotnull(d_year#44) AND (d_year#44 = 2000)) AND isnotnull(d_date_sk#17)) (34) Project [codegen id : 1] -Output [1]: [d_date_sk#19] -Input [2]: [d_date_sk#19, d_year#48] +Output [1]: [d_date_sk#17] +Input [2]: [d_date_sk#17, d_year#44] (35) BroadcastExchange -Input [1]: [d_date_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#49] +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt index bde65bfda5cbf..400c7ef4af608 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt @@ -66,7 +66,7 @@ Input [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_stat (8) BroadcastExchange Input [1]: [cd_demo_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_bill_cdemo_sk#1] @@ -78,96 +78,96 @@ Output [7]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sal Input [9]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, cd_demo_sk#10] (11) ReusedExchange [Reuses operator id: 35] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_sold_date_sk#8] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (13) Project [codegen id : 5] Output [6]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7] -Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, d_date_sk#15] +Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, d_date_sk#14] (14) Scan parquet default.item -Output [2]: [i_item_sk#16, i_item_id#17] +Output [2]: [i_item_sk#15, i_item_id#16] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [2]: [i_item_sk#16, i_item_id#17] +Input [2]: [i_item_sk#15, i_item_id#16] (16) Filter [codegen id : 3] -Input [2]: [i_item_sk#16, i_item_id#17] -Condition : isnotnull(i_item_sk#16) +Input [2]: [i_item_sk#15, i_item_id#16] +Condition : isnotnull(i_item_sk#15) (17) BroadcastExchange -Input [2]: [i_item_sk#16, i_item_id#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] +Input [2]: [i_item_sk#15, i_item_id#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_item_sk#2] -Right keys [1]: [i_item_sk#16] +Right keys [1]: [i_item_sk#15] Join condition: None (19) Project [codegen id : 5] -Output [6]: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#17] -Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_sk#16, i_item_id#17] +Output [6]: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] +Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_sk#15, i_item_id#16] (20) Scan parquet default.promotion -Output [3]: [p_promo_sk#19, p_channel_email#20, p_channel_event#21] +Output [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] Batched: true Location [not included in comparison]/{warehouse_dir}/promotion] PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 4] -Input [3]: [p_promo_sk#19, p_channel_email#20, p_channel_event#21] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] (22) Filter [codegen id : 4] -Input [3]: [p_promo_sk#19, p_channel_email#20, p_channel_event#21] -Condition : (((p_channel_email#20 = N) OR (p_channel_event#21 = N)) AND isnotnull(p_promo_sk#19)) +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Condition : (((p_channel_email#18 = N) OR (p_channel_event#19 = N)) AND isnotnull(p_promo_sk#17)) (23) Project [codegen id : 4] -Output [1]: [p_promo_sk#19] -Input [3]: [p_promo_sk#19, p_channel_email#20, p_channel_event#21] +Output [1]: [p_promo_sk#17] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] (24) BroadcastExchange -Input [1]: [p_promo_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Input [1]: [p_promo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (25) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_promo_sk#3] -Right keys [1]: [p_promo_sk#19] +Right keys [1]: [p_promo_sk#17] Join condition: None (26) Project [codegen id : 5] -Output [5]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#17] -Input [7]: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#17, p_promo_sk#19] +Output [5]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] +Input [7]: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16, p_promo_sk#17] (27) HashAggregate [codegen id : 5] -Input [5]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#17] -Keys [1]: [i_item_id#17] +Input [5]: [cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16] +Keys [1]: [i_item_id#16] Functions [4]: [partial_avg(cs_quantity#4), partial_avg(UnscaledValue(cs_list_price#5)), partial_avg(UnscaledValue(cs_coupon_amt#7)), partial_avg(UnscaledValue(cs_sales_price#6))] -Aggregate Attributes [8]: [sum#23, count#24, sum#25, count#26, sum#27, count#28, sum#29, count#30] -Results [9]: [i_item_id#17, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Aggregate Attributes [8]: [sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Results [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] (28) Exchange -Input [9]: [i_item_id#17, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] -Arguments: hashpartitioning(i_item_id#17, 5), ENSURE_REQUIREMENTS, [id=#39] +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Arguments: hashpartitioning(i_item_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] (29) HashAggregate [codegen id : 6] -Input [9]: [i_item_id#17, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] -Keys [1]: [i_item_id#17] +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Keys [1]: [i_item_id#16] Functions [4]: [avg(cs_quantity#4), avg(UnscaledValue(cs_list_price#5)), avg(UnscaledValue(cs_coupon_amt#7)), avg(UnscaledValue(cs_sales_price#6))] -Aggregate Attributes [4]: [avg(cs_quantity#4)#40, avg(UnscaledValue(cs_list_price#5))#41, avg(UnscaledValue(cs_coupon_amt#7))#42, avg(UnscaledValue(cs_sales_price#6))#43] -Results [5]: [i_item_id#17, avg(cs_quantity#4)#40 AS agg1#44, cast((avg(UnscaledValue(cs_list_price#5))#41 / 100.0) as decimal(11,6)) AS agg2#45, cast((avg(UnscaledValue(cs_coupon_amt#7))#42 / 100.0) as decimal(11,6)) AS agg3#46, cast((avg(UnscaledValue(cs_sales_price#6))#43 / 100.0) as decimal(11,6)) AS agg4#47] +Aggregate Attributes [4]: [avg(cs_quantity#4)#36, avg(UnscaledValue(cs_list_price#5))#37, avg(UnscaledValue(cs_coupon_amt#7))#38, avg(UnscaledValue(cs_sales_price#6))#39] +Results [5]: [i_item_id#16, avg(cs_quantity#4)#36 AS agg1#40, cast((avg(UnscaledValue(cs_list_price#5))#37 / 100.0) as decimal(11,6)) AS agg2#41, cast((avg(UnscaledValue(cs_coupon_amt#7))#38 / 100.0) as decimal(11,6)) AS agg3#42, cast((avg(UnscaledValue(cs_sales_price#6))#39 / 100.0) as decimal(11,6)) AS agg4#43] (30) TakeOrderedAndProject -Input [5]: [i_item_id#17, agg1#44, agg2#45, agg3#46, agg4#47] -Arguments: 100, [i_item_id#17 ASC NULLS FIRST], [i_item_id#17, agg1#44, agg2#45, agg3#46, agg4#47] +Input [5]: [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] +Arguments: 100, [i_item_id#16 ASC NULLS FIRST], [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] ===== Subqueries ===== @@ -180,25 +180,25 @@ BroadcastExchange (35) (31) Scan parquet default.date_dim -Output [2]: [d_date_sk#15, d_year#48] +Output [2]: [d_date_sk#14, d_year#44] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#48] +Input [2]: [d_date_sk#14, d_year#44] (33) Filter [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#48] -Condition : ((isnotnull(d_year#48) AND (d_year#48 = 2000)) AND isnotnull(d_date_sk#15)) +Input [2]: [d_date_sk#14, d_year#44] +Condition : ((isnotnull(d_year#44) AND (d_year#44 = 2000)) AND isnotnull(d_date_sk#14)) (34) Project [codegen id : 1] -Output [1]: [d_date_sk#15] -Input [2]: [d_date_sk#15, d_year#48] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#44] (35) BroadcastExchange -Input [1]: [d_date_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#49] +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/explain.txt index 7600710283f2a..b75522de2a074 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/explain.txt @@ -66,7 +66,7 @@ Input [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_stat (8) BroadcastExchange Input [1]: [cd_demo_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_cdemo_sk#2] @@ -78,96 +78,96 @@ Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sal Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#10] (11) ReusedExchange [Reuses operator id: 35] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (13) Project [codegen id : 5] Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#15] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14] (14) Scan parquet default.store -Output [2]: [s_store_sk#16, s_state#17] +Output [2]: [s_store_sk#15, s_state#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#16, s_state#17] +Input [2]: [s_store_sk#15, s_state#16] (16) Filter [codegen id : 3] -Input [2]: [s_store_sk#16, s_state#17] -Condition : ((isnotnull(s_state#17) AND (s_state#17 = TN)) AND isnotnull(s_store_sk#16)) +Input [2]: [s_store_sk#15, s_state#16] +Condition : ((isnotnull(s_state#16) AND (s_state#16 = TN)) AND isnotnull(s_store_sk#15)) (17) BroadcastExchange -Input [2]: [s_store_sk#16, s_state#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] +Input [2]: [s_store_sk#15, s_state#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#16] +Right keys [1]: [s_store_sk#15] Join condition: None (19) Project [codegen id : 5] -Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#17] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#16, s_state#17] +Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16] (20) Scan parquet default.item -Output [2]: [i_item_sk#19, i_item_id#20] +Output [2]: [i_item_sk#17, i_item_id#18] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#19, i_item_id#20] +Input [2]: [i_item_sk#17, i_item_id#18] (22) Filter [codegen id : 4] -Input [2]: [i_item_sk#19, i_item_id#20] -Condition : isnotnull(i_item_sk#19) +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) (23) BroadcastExchange -Input [2]: [i_item_sk#19, i_item_id#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#19] +Right keys [1]: [i_item_sk#17] Join condition: None (25) Project [codegen id : 5] -Output [6]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#20, s_state#17] -Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#17, i_item_sk#19, i_item_id#20] +Output [6]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16] +Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16, i_item_sk#17, i_item_id#18] (26) Expand [codegen id : 5] -Input [6]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#20, s_state#17] -Arguments: [[ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#20, s_state#17, 0], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#20, null, 1], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, null, null, 3]], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#22, s_state#23, spark_grouping_id#24] +Input [6]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16] +Arguments: [[ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16, 0], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, null, 1], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, null, null, 3]], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19, s_state#20, spark_grouping_id#21] (27) HashAggregate [codegen id : 5] -Input [7]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#22, s_state#23, spark_grouping_id#24] -Keys [3]: [i_item_id#22, s_state#23, spark_grouping_id#24] +Input [7]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19, s_state#20, spark_grouping_id#21] +Keys [3]: [i_item_id#19, s_state#20, spark_grouping_id#21] Functions [4]: [partial_avg(ss_quantity#4), partial_avg(UnscaledValue(ss_list_price#5)), partial_avg(UnscaledValue(ss_coupon_amt#7)), partial_avg(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [8]: [sum#25, count#26, sum#27, count#28, sum#29, count#30, sum#31, count#32] -Results [11]: [i_item_id#22, s_state#23, spark_grouping_id#24, sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39, count#40] +Aggregate Attributes [8]: [sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28, count#29] +Results [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#30, count#31, sum#32, count#33, sum#34, count#35, sum#36, count#37] (28) Exchange -Input [11]: [i_item_id#22, s_state#23, spark_grouping_id#24, sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39, count#40] -Arguments: hashpartitioning(i_item_id#22, s_state#23, spark_grouping_id#24, 5), ENSURE_REQUIREMENTS, [id=#41] +Input [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#30, count#31, sum#32, count#33, sum#34, count#35, sum#36, count#37] +Arguments: hashpartitioning(i_item_id#19, s_state#20, spark_grouping_id#21, 5), ENSURE_REQUIREMENTS, [plan_id=4] (29) HashAggregate [codegen id : 6] -Input [11]: [i_item_id#22, s_state#23, spark_grouping_id#24, sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39, count#40] -Keys [3]: [i_item_id#22, s_state#23, spark_grouping_id#24] +Input [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#30, count#31, sum#32, count#33, sum#34, count#35, sum#36, count#37] +Keys [3]: [i_item_id#19, s_state#20, spark_grouping_id#21] Functions [4]: [avg(ss_quantity#4), avg(UnscaledValue(ss_list_price#5)), avg(UnscaledValue(ss_coupon_amt#7)), avg(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [4]: [avg(ss_quantity#4)#42, avg(UnscaledValue(ss_list_price#5))#43, avg(UnscaledValue(ss_coupon_amt#7))#44, avg(UnscaledValue(ss_sales_price#6))#45] -Results [7]: [i_item_id#22, s_state#23, cast((shiftright(spark_grouping_id#24, 0) & 1) as tinyint) AS g_state#46, avg(ss_quantity#4)#42 AS agg1#47, cast((avg(UnscaledValue(ss_list_price#5))#43 / 100.0) as decimal(11,6)) AS agg2#48, cast((avg(UnscaledValue(ss_coupon_amt#7))#44 / 100.0) as decimal(11,6)) AS agg3#49, cast((avg(UnscaledValue(ss_sales_price#6))#45 / 100.0) as decimal(11,6)) AS agg4#50] +Aggregate Attributes [4]: [avg(ss_quantity#4)#38, avg(UnscaledValue(ss_list_price#5))#39, avg(UnscaledValue(ss_coupon_amt#7))#40, avg(UnscaledValue(ss_sales_price#6))#41] +Results [7]: [i_item_id#19, s_state#20, cast((shiftright(spark_grouping_id#21, 0) & 1) as tinyint) AS g_state#42, avg(ss_quantity#4)#38 AS agg1#43, cast((avg(UnscaledValue(ss_list_price#5))#39 / 100.0) as decimal(11,6)) AS agg2#44, cast((avg(UnscaledValue(ss_coupon_amt#7))#40 / 100.0) as decimal(11,6)) AS agg3#45, cast((avg(UnscaledValue(ss_sales_price#6))#41 / 100.0) as decimal(11,6)) AS agg4#46] (30) TakeOrderedAndProject -Input [7]: [i_item_id#22, s_state#23, g_state#46, agg1#47, agg2#48, agg3#49, agg4#50] -Arguments: 100, [i_item_id#22 ASC NULLS FIRST, s_state#23 ASC NULLS FIRST], [i_item_id#22, s_state#23, g_state#46, agg1#47, agg2#48, agg3#49, agg4#50] +Input [7]: [i_item_id#19, s_state#20, g_state#42, agg1#43, agg2#44, agg3#45, agg4#46] +Arguments: 100, [i_item_id#19 ASC NULLS FIRST, s_state#20 ASC NULLS FIRST], [i_item_id#19, s_state#20, g_state#42, agg1#43, agg2#44, agg3#45, agg4#46] ===== Subqueries ===== @@ -180,25 +180,25 @@ BroadcastExchange (35) (31) Scan parquet default.date_dim -Output [2]: [d_date_sk#15, d_year#51] +Output [2]: [d_date_sk#14, d_year#47] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#51] +Input [2]: [d_date_sk#14, d_year#47] (33) Filter [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#51] -Condition : ((isnotnull(d_year#51) AND (d_year#51 = 2002)) AND isnotnull(d_date_sk#15)) +Input [2]: [d_date_sk#14, d_year#47] +Condition : ((isnotnull(d_year#47) AND (d_year#47 = 2002)) AND isnotnull(d_date_sk#14)) (34) Project [codegen id : 1] -Output [1]: [d_date_sk#15] -Input [2]: [d_date_sk#15, d_year#51] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#47] (35) BroadcastExchange -Input [1]: [d_date_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52] +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt index 7600710283f2a..b75522de2a074 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt @@ -66,7 +66,7 @@ Input [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_stat (8) BroadcastExchange Input [1]: [cd_demo_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_cdemo_sk#2] @@ -78,96 +78,96 @@ Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sal Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#10] (11) ReusedExchange [Reuses operator id: 35] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (13) Project [codegen id : 5] Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#15] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14] (14) Scan parquet default.store -Output [2]: [s_store_sk#16, s_state#17] +Output [2]: [s_store_sk#15, s_state#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#16, s_state#17] +Input [2]: [s_store_sk#15, s_state#16] (16) Filter [codegen id : 3] -Input [2]: [s_store_sk#16, s_state#17] -Condition : ((isnotnull(s_state#17) AND (s_state#17 = TN)) AND isnotnull(s_store_sk#16)) +Input [2]: [s_store_sk#15, s_state#16] +Condition : ((isnotnull(s_state#16) AND (s_state#16 = TN)) AND isnotnull(s_store_sk#15)) (17) BroadcastExchange -Input [2]: [s_store_sk#16, s_state#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] +Input [2]: [s_store_sk#15, s_state#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#16] +Right keys [1]: [s_store_sk#15] Join condition: None (19) Project [codegen id : 5] -Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#17] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#16, s_state#17] +Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16] (20) Scan parquet default.item -Output [2]: [i_item_sk#19, i_item_id#20] +Output [2]: [i_item_sk#17, i_item_id#18] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#19, i_item_id#20] +Input [2]: [i_item_sk#17, i_item_id#18] (22) Filter [codegen id : 4] -Input [2]: [i_item_sk#19, i_item_id#20] -Condition : isnotnull(i_item_sk#19) +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) (23) BroadcastExchange -Input [2]: [i_item_sk#19, i_item_id#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#19] +Right keys [1]: [i_item_sk#17] Join condition: None (25) Project [codegen id : 5] -Output [6]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#20, s_state#17] -Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#17, i_item_sk#19, i_item_id#20] +Output [6]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16] +Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16, i_item_sk#17, i_item_id#18] (26) Expand [codegen id : 5] -Input [6]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#20, s_state#17] -Arguments: [[ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#20, s_state#17, 0], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#20, null, 1], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, null, null, 3]], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#22, s_state#23, spark_grouping_id#24] +Input [6]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16] +Arguments: [[ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, s_state#16, 0], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#18, null, 1], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, null, null, 3]], [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19, s_state#20, spark_grouping_id#21] (27) HashAggregate [codegen id : 5] -Input [7]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#22, s_state#23, spark_grouping_id#24] -Keys [3]: [i_item_id#22, s_state#23, spark_grouping_id#24] +Input [7]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19, s_state#20, spark_grouping_id#21] +Keys [3]: [i_item_id#19, s_state#20, spark_grouping_id#21] Functions [4]: [partial_avg(ss_quantity#4), partial_avg(UnscaledValue(ss_list_price#5)), partial_avg(UnscaledValue(ss_coupon_amt#7)), partial_avg(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [8]: [sum#25, count#26, sum#27, count#28, sum#29, count#30, sum#31, count#32] -Results [11]: [i_item_id#22, s_state#23, spark_grouping_id#24, sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39, count#40] +Aggregate Attributes [8]: [sum#22, count#23, sum#24, count#25, sum#26, count#27, sum#28, count#29] +Results [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#30, count#31, sum#32, count#33, sum#34, count#35, sum#36, count#37] (28) Exchange -Input [11]: [i_item_id#22, s_state#23, spark_grouping_id#24, sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39, count#40] -Arguments: hashpartitioning(i_item_id#22, s_state#23, spark_grouping_id#24, 5), ENSURE_REQUIREMENTS, [id=#41] +Input [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#30, count#31, sum#32, count#33, sum#34, count#35, sum#36, count#37] +Arguments: hashpartitioning(i_item_id#19, s_state#20, spark_grouping_id#21, 5), ENSURE_REQUIREMENTS, [plan_id=4] (29) HashAggregate [codegen id : 6] -Input [11]: [i_item_id#22, s_state#23, spark_grouping_id#24, sum#33, count#34, sum#35, count#36, sum#37, count#38, sum#39, count#40] -Keys [3]: [i_item_id#22, s_state#23, spark_grouping_id#24] +Input [11]: [i_item_id#19, s_state#20, spark_grouping_id#21, sum#30, count#31, sum#32, count#33, sum#34, count#35, sum#36, count#37] +Keys [3]: [i_item_id#19, s_state#20, spark_grouping_id#21] Functions [4]: [avg(ss_quantity#4), avg(UnscaledValue(ss_list_price#5)), avg(UnscaledValue(ss_coupon_amt#7)), avg(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [4]: [avg(ss_quantity#4)#42, avg(UnscaledValue(ss_list_price#5))#43, avg(UnscaledValue(ss_coupon_amt#7))#44, avg(UnscaledValue(ss_sales_price#6))#45] -Results [7]: [i_item_id#22, s_state#23, cast((shiftright(spark_grouping_id#24, 0) & 1) as tinyint) AS g_state#46, avg(ss_quantity#4)#42 AS agg1#47, cast((avg(UnscaledValue(ss_list_price#5))#43 / 100.0) as decimal(11,6)) AS agg2#48, cast((avg(UnscaledValue(ss_coupon_amt#7))#44 / 100.0) as decimal(11,6)) AS agg3#49, cast((avg(UnscaledValue(ss_sales_price#6))#45 / 100.0) as decimal(11,6)) AS agg4#50] +Aggregate Attributes [4]: [avg(ss_quantity#4)#38, avg(UnscaledValue(ss_list_price#5))#39, avg(UnscaledValue(ss_coupon_amt#7))#40, avg(UnscaledValue(ss_sales_price#6))#41] +Results [7]: [i_item_id#19, s_state#20, cast((shiftright(spark_grouping_id#21, 0) & 1) as tinyint) AS g_state#42, avg(ss_quantity#4)#38 AS agg1#43, cast((avg(UnscaledValue(ss_list_price#5))#39 / 100.0) as decimal(11,6)) AS agg2#44, cast((avg(UnscaledValue(ss_coupon_amt#7))#40 / 100.0) as decimal(11,6)) AS agg3#45, cast((avg(UnscaledValue(ss_sales_price#6))#41 / 100.0) as decimal(11,6)) AS agg4#46] (30) TakeOrderedAndProject -Input [7]: [i_item_id#22, s_state#23, g_state#46, agg1#47, agg2#48, agg3#49, agg4#50] -Arguments: 100, [i_item_id#22 ASC NULLS FIRST, s_state#23 ASC NULLS FIRST], [i_item_id#22, s_state#23, g_state#46, agg1#47, agg2#48, agg3#49, agg4#50] +Input [7]: [i_item_id#19, s_state#20, g_state#42, agg1#43, agg2#44, agg3#45, agg4#46] +Arguments: 100, [i_item_id#19 ASC NULLS FIRST, s_state#20 ASC NULLS FIRST], [i_item_id#19, s_state#20, g_state#42, agg1#43, agg2#44, agg3#45, agg4#46] ===== Subqueries ===== @@ -180,25 +180,25 @@ BroadcastExchange (35) (31) Scan parquet default.date_dim -Output [2]: [d_date_sk#15, d_year#51] +Output [2]: [d_date_sk#14, d_year#47] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#51] +Input [2]: [d_date_sk#14, d_year#47] (33) Filter [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#51] -Condition : ((isnotnull(d_year#51) AND (d_year#51 = 2002)) AND isnotnull(d_date_sk#15)) +Input [2]: [d_date_sk#14, d_year#47] +Condition : ((isnotnull(d_year#47) AND (d_year#47 = 2002)) AND isnotnull(d_date_sk#14)) (34) Project [codegen id : 1] -Output [1]: [d_date_sk#15] -Input [2]: [d_date_sk#15, d_year#51] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#47] (35) BroadcastExchange -Input [1]: [d_date_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52] +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt index eec45ea549531..6c140d3d95a9d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt @@ -98,7 +98,7 @@ Results [4]: [ss_list_price#3, sum#8, count#9, count#10] (6) Exchange Input [4]: [ss_list_price#3, sum#8, count#9, count#10] -Arguments: hashpartitioning(ss_list_price#3, 5), ENSURE_REQUIREMENTS, [id=#11] +Arguments: hashpartitioning(ss_list_price#3, 5), ENSURE_REQUIREMENTS, [plan_id=1] (7) HashAggregate [codegen id : 2] Input [4]: [ss_list_price#3, sum#8, count#9, count#10] @@ -111,321 +111,321 @@ Results [4]: [ss_list_price#3, sum#8, count#9, count#10] Input [4]: [ss_list_price#3, sum#8, count#9, count#10] Keys: [] Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7, count(ss_list_price#3)#12] -Results [4]: [sum#8, count#9, count#10, count#13] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7, count(ss_list_price#3)#11] +Results [4]: [sum#8, count#9, count#10, count#12] (9) Exchange -Input [4]: [sum#8, count#9, count#10, count#13] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#14] +Input [4]: [sum#8, count#9, count#10, count#12] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] (10) HashAggregate [codegen id : 18] -Input [4]: [sum#8, count#9, count#10, count#13] +Input [4]: [sum#8, count#9, count#10, count#12] Keys: [] Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7, count(ss_list_price#3)#12] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#6 / 100.0) as decimal(11,6)) AS B1_LP#15, count(ss_list_price#3)#7 AS B1_CNT#16, count(ss_list_price#3)#12 AS B1_CNTD#17] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7, count(ss_list_price#3)#11] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#6 / 100.0) as decimal(11,6)) AS B1_LP#13, count(ss_list_price#3)#7 AS B1_CNT#14, count(ss_list_price#3)#11 AS B1_CNTD#15] (11) Scan parquet default.store_sales -Output [5]: [ss_quantity#18, ss_wholesale_cost#19, ss_list_price#20, ss_coupon_amt#21, ss_sold_date_sk#22] +Output [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10), Or(Or(And(GreaterThanOrEqual(ss_list_price,90.00),LessThanOrEqual(ss_list_price,100.00)),And(GreaterThanOrEqual(ss_coupon_amt,2323.00),LessThanOrEqual(ss_coupon_amt,3323.00))),And(GreaterThanOrEqual(ss_wholesale_cost,31.00),LessThanOrEqual(ss_wholesale_cost,51.00)))] ReadSchema: struct (12) ColumnarToRow [codegen id : 3] -Input [5]: [ss_quantity#18, ss_wholesale_cost#19, ss_list_price#20, ss_coupon_amt#21, ss_sold_date_sk#22] +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] (13) Filter [codegen id : 3] -Input [5]: [ss_quantity#18, ss_wholesale_cost#19, ss_list_price#20, ss_coupon_amt#21, ss_sold_date_sk#22] -Condition : (((isnotnull(ss_quantity#18) AND (ss_quantity#18 >= 6)) AND (ss_quantity#18 <= 10)) AND ((((ss_list_price#20 >= 90.00) AND (ss_list_price#20 <= 100.00)) OR ((ss_coupon_amt#21 >= 2323.00) AND (ss_coupon_amt#21 <= 3323.00))) OR ((ss_wholesale_cost#19 >= 31.00) AND (ss_wholesale_cost#19 <= 51.00)))) +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] +Condition : (((isnotnull(ss_quantity#16) AND (ss_quantity#16 >= 6)) AND (ss_quantity#16 <= 10)) AND ((((ss_list_price#18 >= 90.00) AND (ss_list_price#18 <= 100.00)) OR ((ss_coupon_amt#19 >= 2323.00) AND (ss_coupon_amt#19 <= 3323.00))) OR ((ss_wholesale_cost#17 >= 31.00) AND (ss_wholesale_cost#17 <= 51.00)))) (14) Project [codegen id : 3] -Output [1]: [ss_list_price#20] -Input [5]: [ss_quantity#18, ss_wholesale_cost#19, ss_list_price#20, ss_coupon_amt#21, ss_sold_date_sk#22] +Output [1]: [ss_list_price#18] +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] (15) HashAggregate [codegen id : 3] -Input [1]: [ss_list_price#20] -Keys [1]: [ss_list_price#20] -Functions [2]: [partial_avg(UnscaledValue(ss_list_price#20)), partial_count(ss_list_price#20)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#20))#23, count(ss_list_price#20)#24] -Results [4]: [ss_list_price#20, sum#25, count#26, count#27] +Input [1]: [ss_list_price#18] +Keys [1]: [ss_list_price#18] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#18)), partial_count(ss_list_price#18)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22] +Results [4]: [ss_list_price#18, sum#23, count#24, count#25] (16) Exchange -Input [4]: [ss_list_price#20, sum#25, count#26, count#27] -Arguments: hashpartitioning(ss_list_price#20, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [4]: [ss_list_price#18, sum#23, count#24, count#25] +Arguments: hashpartitioning(ss_list_price#18, 5), ENSURE_REQUIREMENTS, [plan_id=3] (17) HashAggregate [codegen id : 4] -Input [4]: [ss_list_price#20, sum#25, count#26, count#27] -Keys [1]: [ss_list_price#20] -Functions [2]: [merge_avg(UnscaledValue(ss_list_price#20)), merge_count(ss_list_price#20)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#20))#23, count(ss_list_price#20)#24] -Results [4]: [ss_list_price#20, sum#25, count#26, count#27] +Input [4]: [ss_list_price#18, sum#23, count#24, count#25] +Keys [1]: [ss_list_price#18] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#18)), merge_count(ss_list_price#18)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22] +Results [4]: [ss_list_price#18, sum#23, count#24, count#25] (18) HashAggregate [codegen id : 4] -Input [4]: [ss_list_price#20, sum#25, count#26, count#27] +Input [4]: [ss_list_price#18, sum#23, count#24, count#25] Keys: [] -Functions [3]: [merge_avg(UnscaledValue(ss_list_price#20)), merge_count(ss_list_price#20), partial_count(distinct ss_list_price#20)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#20))#23, count(ss_list_price#20)#24, count(ss_list_price#20)#29] -Results [4]: [sum#25, count#26, count#27, count#30] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#18)), merge_count(ss_list_price#18), partial_count(distinct ss_list_price#18)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22, count(ss_list_price#18)#26] +Results [4]: [sum#23, count#24, count#25, count#27] (19) Exchange -Input [4]: [sum#25, count#26, count#27, count#30] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#31] +Input [4]: [sum#23, count#24, count#25, count#27] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (20) HashAggregate [codegen id : 5] -Input [4]: [sum#25, count#26, count#27, count#30] +Input [4]: [sum#23, count#24, count#25, count#27] Keys: [] -Functions [3]: [avg(UnscaledValue(ss_list_price#20)), count(ss_list_price#20), count(distinct ss_list_price#20)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#20))#23, count(ss_list_price#20)#24, count(ss_list_price#20)#29] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#20))#23 / 100.0) as decimal(11,6)) AS B2_LP#32, count(ss_list_price#20)#24 AS B2_CNT#33, count(ss_list_price#20)#29 AS B2_CNTD#34] +Functions [3]: [avg(UnscaledValue(ss_list_price#18)), count(ss_list_price#18), count(distinct ss_list_price#18)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22, count(ss_list_price#18)#26] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#18))#21 / 100.0) as decimal(11,6)) AS B2_LP#28, count(ss_list_price#18)#22 AS B2_CNT#29, count(ss_list_price#18)#26 AS B2_CNTD#30] (21) BroadcastExchange -Input [3]: [B2_LP#32, B2_CNT#33, B2_CNTD#34] -Arguments: IdentityBroadcastMode, [id=#35] +Input [3]: [B2_LP#28, B2_CNT#29, B2_CNTD#30] +Arguments: IdentityBroadcastMode, [plan_id=5] (22) BroadcastNestedLoopJoin [codegen id : 18] Join condition: None (23) Scan parquet default.store_sales -Output [5]: [ss_quantity#36, ss_wholesale_cost#37, ss_list_price#38, ss_coupon_amt#39, ss_sold_date_sk#40] +Output [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15), Or(Or(And(GreaterThanOrEqual(ss_list_price,142.00),LessThanOrEqual(ss_list_price,152.00)),And(GreaterThanOrEqual(ss_coupon_amt,12214.00),LessThanOrEqual(ss_coupon_amt,13214.00))),And(GreaterThanOrEqual(ss_wholesale_cost,79.00),LessThanOrEqual(ss_wholesale_cost,99.00)))] ReadSchema: struct (24) ColumnarToRow [codegen id : 6] -Input [5]: [ss_quantity#36, ss_wholesale_cost#37, ss_list_price#38, ss_coupon_amt#39, ss_sold_date_sk#40] +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] (25) Filter [codegen id : 6] -Input [5]: [ss_quantity#36, ss_wholesale_cost#37, ss_list_price#38, ss_coupon_amt#39, ss_sold_date_sk#40] -Condition : (((isnotnull(ss_quantity#36) AND (ss_quantity#36 >= 11)) AND (ss_quantity#36 <= 15)) AND ((((ss_list_price#38 >= 142.00) AND (ss_list_price#38 <= 152.00)) OR ((ss_coupon_amt#39 >= 12214.00) AND (ss_coupon_amt#39 <= 13214.00))) OR ((ss_wholesale_cost#37 >= 79.00) AND (ss_wholesale_cost#37 <= 99.00)))) +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] +Condition : (((isnotnull(ss_quantity#31) AND (ss_quantity#31 >= 11)) AND (ss_quantity#31 <= 15)) AND ((((ss_list_price#33 >= 142.00) AND (ss_list_price#33 <= 152.00)) OR ((ss_coupon_amt#34 >= 12214.00) AND (ss_coupon_amt#34 <= 13214.00))) OR ((ss_wholesale_cost#32 >= 79.00) AND (ss_wholesale_cost#32 <= 99.00)))) (26) Project [codegen id : 6] -Output [1]: [ss_list_price#38] -Input [5]: [ss_quantity#36, ss_wholesale_cost#37, ss_list_price#38, ss_coupon_amt#39, ss_sold_date_sk#40] +Output [1]: [ss_list_price#33] +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] (27) HashAggregate [codegen id : 6] -Input [1]: [ss_list_price#38] -Keys [1]: [ss_list_price#38] -Functions [2]: [partial_avg(UnscaledValue(ss_list_price#38)), partial_count(ss_list_price#38)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#38))#41, count(ss_list_price#38)#42] -Results [4]: [ss_list_price#38, sum#43, count#44, count#45] +Input [1]: [ss_list_price#33] +Keys [1]: [ss_list_price#33] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#33)), partial_count(ss_list_price#33)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37] +Results [4]: [ss_list_price#33, sum#38, count#39, count#40] (28) Exchange -Input [4]: [ss_list_price#38, sum#43, count#44, count#45] -Arguments: hashpartitioning(ss_list_price#38, 5), ENSURE_REQUIREMENTS, [id=#46] +Input [4]: [ss_list_price#33, sum#38, count#39, count#40] +Arguments: hashpartitioning(ss_list_price#33, 5), ENSURE_REQUIREMENTS, [plan_id=6] (29) HashAggregate [codegen id : 7] -Input [4]: [ss_list_price#38, sum#43, count#44, count#45] -Keys [1]: [ss_list_price#38] -Functions [2]: [merge_avg(UnscaledValue(ss_list_price#38)), merge_count(ss_list_price#38)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#38))#41, count(ss_list_price#38)#42] -Results [4]: [ss_list_price#38, sum#43, count#44, count#45] +Input [4]: [ss_list_price#33, sum#38, count#39, count#40] +Keys [1]: [ss_list_price#33] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#33)), merge_count(ss_list_price#33)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37] +Results [4]: [ss_list_price#33, sum#38, count#39, count#40] (30) HashAggregate [codegen id : 7] -Input [4]: [ss_list_price#38, sum#43, count#44, count#45] +Input [4]: [ss_list_price#33, sum#38, count#39, count#40] Keys: [] -Functions [3]: [merge_avg(UnscaledValue(ss_list_price#38)), merge_count(ss_list_price#38), partial_count(distinct ss_list_price#38)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#38))#41, count(ss_list_price#38)#42, count(ss_list_price#38)#47] -Results [4]: [sum#43, count#44, count#45, count#48] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#33)), merge_count(ss_list_price#33), partial_count(distinct ss_list_price#33)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37, count(ss_list_price#33)#41] +Results [4]: [sum#38, count#39, count#40, count#42] (31) Exchange -Input [4]: [sum#43, count#44, count#45, count#48] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#49] +Input [4]: [sum#38, count#39, count#40, count#42] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (32) HashAggregate [codegen id : 8] -Input [4]: [sum#43, count#44, count#45, count#48] +Input [4]: [sum#38, count#39, count#40, count#42] Keys: [] -Functions [3]: [avg(UnscaledValue(ss_list_price#38)), count(ss_list_price#38), count(distinct ss_list_price#38)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#38))#41, count(ss_list_price#38)#42, count(ss_list_price#38)#47] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#38))#41 / 100.0) as decimal(11,6)) AS B3_LP#50, count(ss_list_price#38)#42 AS B3_CNT#51, count(ss_list_price#38)#47 AS B3_CNTD#52] +Functions [3]: [avg(UnscaledValue(ss_list_price#33)), count(ss_list_price#33), count(distinct ss_list_price#33)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37, count(ss_list_price#33)#41] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#33))#36 / 100.0) as decimal(11,6)) AS B3_LP#43, count(ss_list_price#33)#37 AS B3_CNT#44, count(ss_list_price#33)#41 AS B3_CNTD#45] (33) BroadcastExchange -Input [3]: [B3_LP#50, B3_CNT#51, B3_CNTD#52] -Arguments: IdentityBroadcastMode, [id=#53] +Input [3]: [B3_LP#43, B3_CNT#44, B3_CNTD#45] +Arguments: IdentityBroadcastMode, [plan_id=8] (34) BroadcastNestedLoopJoin [codegen id : 18] Join condition: None (35) Scan parquet default.store_sales -Output [5]: [ss_quantity#54, ss_wholesale_cost#55, ss_list_price#56, ss_coupon_amt#57, ss_sold_date_sk#58] +Output [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20), Or(Or(And(GreaterThanOrEqual(ss_list_price,135.00),LessThanOrEqual(ss_list_price,145.00)),And(GreaterThanOrEqual(ss_coupon_amt,6071.00),LessThanOrEqual(ss_coupon_amt,7071.00))),And(GreaterThanOrEqual(ss_wholesale_cost,38.00),LessThanOrEqual(ss_wholesale_cost,58.00)))] ReadSchema: struct (36) ColumnarToRow [codegen id : 9] -Input [5]: [ss_quantity#54, ss_wholesale_cost#55, ss_list_price#56, ss_coupon_amt#57, ss_sold_date_sk#58] +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] (37) Filter [codegen id : 9] -Input [5]: [ss_quantity#54, ss_wholesale_cost#55, ss_list_price#56, ss_coupon_amt#57, ss_sold_date_sk#58] -Condition : (((isnotnull(ss_quantity#54) AND (ss_quantity#54 >= 16)) AND (ss_quantity#54 <= 20)) AND ((((ss_list_price#56 >= 135.00) AND (ss_list_price#56 <= 145.00)) OR ((ss_coupon_amt#57 >= 6071.00) AND (ss_coupon_amt#57 <= 7071.00))) OR ((ss_wholesale_cost#55 >= 38.00) AND (ss_wholesale_cost#55 <= 58.00)))) +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] +Condition : (((isnotnull(ss_quantity#46) AND (ss_quantity#46 >= 16)) AND (ss_quantity#46 <= 20)) AND ((((ss_list_price#48 >= 135.00) AND (ss_list_price#48 <= 145.00)) OR ((ss_coupon_amt#49 >= 6071.00) AND (ss_coupon_amt#49 <= 7071.00))) OR ((ss_wholesale_cost#47 >= 38.00) AND (ss_wholesale_cost#47 <= 58.00)))) (38) Project [codegen id : 9] -Output [1]: [ss_list_price#56] -Input [5]: [ss_quantity#54, ss_wholesale_cost#55, ss_list_price#56, ss_coupon_amt#57, ss_sold_date_sk#58] +Output [1]: [ss_list_price#48] +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] (39) HashAggregate [codegen id : 9] -Input [1]: [ss_list_price#56] -Keys [1]: [ss_list_price#56] -Functions [2]: [partial_avg(UnscaledValue(ss_list_price#56)), partial_count(ss_list_price#56)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#56))#59, count(ss_list_price#56)#60] -Results [4]: [ss_list_price#56, sum#61, count#62, count#63] +Input [1]: [ss_list_price#48] +Keys [1]: [ss_list_price#48] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#48)), partial_count(ss_list_price#48)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52] +Results [4]: [ss_list_price#48, sum#53, count#54, count#55] (40) Exchange -Input [4]: [ss_list_price#56, sum#61, count#62, count#63] -Arguments: hashpartitioning(ss_list_price#56, 5), ENSURE_REQUIREMENTS, [id=#64] +Input [4]: [ss_list_price#48, sum#53, count#54, count#55] +Arguments: hashpartitioning(ss_list_price#48, 5), ENSURE_REQUIREMENTS, [plan_id=9] (41) HashAggregate [codegen id : 10] -Input [4]: [ss_list_price#56, sum#61, count#62, count#63] -Keys [1]: [ss_list_price#56] -Functions [2]: [merge_avg(UnscaledValue(ss_list_price#56)), merge_count(ss_list_price#56)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#56))#59, count(ss_list_price#56)#60] -Results [4]: [ss_list_price#56, sum#61, count#62, count#63] +Input [4]: [ss_list_price#48, sum#53, count#54, count#55] +Keys [1]: [ss_list_price#48] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#48)), merge_count(ss_list_price#48)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52] +Results [4]: [ss_list_price#48, sum#53, count#54, count#55] (42) HashAggregate [codegen id : 10] -Input [4]: [ss_list_price#56, sum#61, count#62, count#63] +Input [4]: [ss_list_price#48, sum#53, count#54, count#55] Keys: [] -Functions [3]: [merge_avg(UnscaledValue(ss_list_price#56)), merge_count(ss_list_price#56), partial_count(distinct ss_list_price#56)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#56))#59, count(ss_list_price#56)#60, count(ss_list_price#56)#65] -Results [4]: [sum#61, count#62, count#63, count#66] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#48)), merge_count(ss_list_price#48), partial_count(distinct ss_list_price#48)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52, count(ss_list_price#48)#56] +Results [4]: [sum#53, count#54, count#55, count#57] (43) Exchange -Input [4]: [sum#61, count#62, count#63, count#66] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#67] +Input [4]: [sum#53, count#54, count#55, count#57] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] (44) HashAggregate [codegen id : 11] -Input [4]: [sum#61, count#62, count#63, count#66] +Input [4]: [sum#53, count#54, count#55, count#57] Keys: [] -Functions [3]: [avg(UnscaledValue(ss_list_price#56)), count(ss_list_price#56), count(distinct ss_list_price#56)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#56))#59, count(ss_list_price#56)#60, count(ss_list_price#56)#65] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#56))#59 / 100.0) as decimal(11,6)) AS B4_LP#68, count(ss_list_price#56)#60 AS B4_CNT#69, count(ss_list_price#56)#65 AS B4_CNTD#70] +Functions [3]: [avg(UnscaledValue(ss_list_price#48)), count(ss_list_price#48), count(distinct ss_list_price#48)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52, count(ss_list_price#48)#56] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#48))#51 / 100.0) as decimal(11,6)) AS B4_LP#58, count(ss_list_price#48)#52 AS B4_CNT#59, count(ss_list_price#48)#56 AS B4_CNTD#60] (45) BroadcastExchange -Input [3]: [B4_LP#68, B4_CNT#69, B4_CNTD#70] -Arguments: IdentityBroadcastMode, [id=#71] +Input [3]: [B4_LP#58, B4_CNT#59, B4_CNTD#60] +Arguments: IdentityBroadcastMode, [plan_id=11] (46) BroadcastNestedLoopJoin [codegen id : 18] Join condition: None (47) Scan parquet default.store_sales -Output [5]: [ss_quantity#72, ss_wholesale_cost#73, ss_list_price#74, ss_coupon_amt#75, ss_sold_date_sk#76] +Output [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25), Or(Or(And(GreaterThanOrEqual(ss_list_price,122.00),LessThanOrEqual(ss_list_price,132.00)),And(GreaterThanOrEqual(ss_coupon_amt,836.00),LessThanOrEqual(ss_coupon_amt,1836.00))),And(GreaterThanOrEqual(ss_wholesale_cost,17.00),LessThanOrEqual(ss_wholesale_cost,37.00)))] ReadSchema: struct (48) ColumnarToRow [codegen id : 12] -Input [5]: [ss_quantity#72, ss_wholesale_cost#73, ss_list_price#74, ss_coupon_amt#75, ss_sold_date_sk#76] +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] (49) Filter [codegen id : 12] -Input [5]: [ss_quantity#72, ss_wholesale_cost#73, ss_list_price#74, ss_coupon_amt#75, ss_sold_date_sk#76] -Condition : (((isnotnull(ss_quantity#72) AND (ss_quantity#72 >= 21)) AND (ss_quantity#72 <= 25)) AND ((((ss_list_price#74 >= 122.00) AND (ss_list_price#74 <= 132.00)) OR ((ss_coupon_amt#75 >= 836.00) AND (ss_coupon_amt#75 <= 1836.00))) OR ((ss_wholesale_cost#73 >= 17.00) AND (ss_wholesale_cost#73 <= 37.00)))) +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] +Condition : (((isnotnull(ss_quantity#61) AND (ss_quantity#61 >= 21)) AND (ss_quantity#61 <= 25)) AND ((((ss_list_price#63 >= 122.00) AND (ss_list_price#63 <= 132.00)) OR ((ss_coupon_amt#64 >= 836.00) AND (ss_coupon_amt#64 <= 1836.00))) OR ((ss_wholesale_cost#62 >= 17.00) AND (ss_wholesale_cost#62 <= 37.00)))) (50) Project [codegen id : 12] -Output [1]: [ss_list_price#74] -Input [5]: [ss_quantity#72, ss_wholesale_cost#73, ss_list_price#74, ss_coupon_amt#75, ss_sold_date_sk#76] +Output [1]: [ss_list_price#63] +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] (51) HashAggregate [codegen id : 12] -Input [1]: [ss_list_price#74] -Keys [1]: [ss_list_price#74] -Functions [2]: [partial_avg(UnscaledValue(ss_list_price#74)), partial_count(ss_list_price#74)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#74))#77, count(ss_list_price#74)#78] -Results [4]: [ss_list_price#74, sum#79, count#80, count#81] +Input [1]: [ss_list_price#63] +Keys [1]: [ss_list_price#63] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#63)), partial_count(ss_list_price#63)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67] +Results [4]: [ss_list_price#63, sum#68, count#69, count#70] (52) Exchange -Input [4]: [ss_list_price#74, sum#79, count#80, count#81] -Arguments: hashpartitioning(ss_list_price#74, 5), ENSURE_REQUIREMENTS, [id=#82] +Input [4]: [ss_list_price#63, sum#68, count#69, count#70] +Arguments: hashpartitioning(ss_list_price#63, 5), ENSURE_REQUIREMENTS, [plan_id=12] (53) HashAggregate [codegen id : 13] -Input [4]: [ss_list_price#74, sum#79, count#80, count#81] -Keys [1]: [ss_list_price#74] -Functions [2]: [merge_avg(UnscaledValue(ss_list_price#74)), merge_count(ss_list_price#74)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#74))#77, count(ss_list_price#74)#78] -Results [4]: [ss_list_price#74, sum#79, count#80, count#81] +Input [4]: [ss_list_price#63, sum#68, count#69, count#70] +Keys [1]: [ss_list_price#63] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#63)), merge_count(ss_list_price#63)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67] +Results [4]: [ss_list_price#63, sum#68, count#69, count#70] (54) HashAggregate [codegen id : 13] -Input [4]: [ss_list_price#74, sum#79, count#80, count#81] +Input [4]: [ss_list_price#63, sum#68, count#69, count#70] Keys: [] -Functions [3]: [merge_avg(UnscaledValue(ss_list_price#74)), merge_count(ss_list_price#74), partial_count(distinct ss_list_price#74)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#74))#77, count(ss_list_price#74)#78, count(ss_list_price#74)#83] -Results [4]: [sum#79, count#80, count#81, count#84] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#63)), merge_count(ss_list_price#63), partial_count(distinct ss_list_price#63)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67, count(ss_list_price#63)#71] +Results [4]: [sum#68, count#69, count#70, count#72] (55) Exchange -Input [4]: [sum#79, count#80, count#81, count#84] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#85] +Input [4]: [sum#68, count#69, count#70, count#72] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=13] (56) HashAggregate [codegen id : 14] -Input [4]: [sum#79, count#80, count#81, count#84] +Input [4]: [sum#68, count#69, count#70, count#72] Keys: [] -Functions [3]: [avg(UnscaledValue(ss_list_price#74)), count(ss_list_price#74), count(distinct ss_list_price#74)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#74))#77, count(ss_list_price#74)#78, count(ss_list_price#74)#83] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#74))#77 / 100.0) as decimal(11,6)) AS B5_LP#86, count(ss_list_price#74)#78 AS B5_CNT#87, count(ss_list_price#74)#83 AS B5_CNTD#88] +Functions [3]: [avg(UnscaledValue(ss_list_price#63)), count(ss_list_price#63), count(distinct ss_list_price#63)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67, count(ss_list_price#63)#71] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#63))#66 / 100.0) as decimal(11,6)) AS B5_LP#73, count(ss_list_price#63)#67 AS B5_CNT#74, count(ss_list_price#63)#71 AS B5_CNTD#75] (57) BroadcastExchange -Input [3]: [B5_LP#86, B5_CNT#87, B5_CNTD#88] -Arguments: IdentityBroadcastMode, [id=#89] +Input [3]: [B5_LP#73, B5_CNT#74, B5_CNTD#75] +Arguments: IdentityBroadcastMode, [plan_id=14] (58) BroadcastNestedLoopJoin [codegen id : 18] Join condition: None (59) Scan parquet default.store_sales -Output [5]: [ss_quantity#90, ss_wholesale_cost#91, ss_list_price#92, ss_coupon_amt#93, ss_sold_date_sk#94] +Output [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30), Or(Or(And(GreaterThanOrEqual(ss_list_price,154.00),LessThanOrEqual(ss_list_price,164.00)),And(GreaterThanOrEqual(ss_coupon_amt,7326.00),LessThanOrEqual(ss_coupon_amt,8326.00))),And(GreaterThanOrEqual(ss_wholesale_cost,7.00),LessThanOrEqual(ss_wholesale_cost,27.00)))] ReadSchema: struct (60) ColumnarToRow [codegen id : 15] -Input [5]: [ss_quantity#90, ss_wholesale_cost#91, ss_list_price#92, ss_coupon_amt#93, ss_sold_date_sk#94] +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] (61) Filter [codegen id : 15] -Input [5]: [ss_quantity#90, ss_wholesale_cost#91, ss_list_price#92, ss_coupon_amt#93, ss_sold_date_sk#94] -Condition : (((isnotnull(ss_quantity#90) AND (ss_quantity#90 >= 26)) AND (ss_quantity#90 <= 30)) AND ((((ss_list_price#92 >= 154.00) AND (ss_list_price#92 <= 164.00)) OR ((ss_coupon_amt#93 >= 7326.00) AND (ss_coupon_amt#93 <= 8326.00))) OR ((ss_wholesale_cost#91 >= 7.00) AND (ss_wholesale_cost#91 <= 27.00)))) +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] +Condition : (((isnotnull(ss_quantity#76) AND (ss_quantity#76 >= 26)) AND (ss_quantity#76 <= 30)) AND ((((ss_list_price#78 >= 154.00) AND (ss_list_price#78 <= 164.00)) OR ((ss_coupon_amt#79 >= 7326.00) AND (ss_coupon_amt#79 <= 8326.00))) OR ((ss_wholesale_cost#77 >= 7.00) AND (ss_wholesale_cost#77 <= 27.00)))) (62) Project [codegen id : 15] -Output [1]: [ss_list_price#92] -Input [5]: [ss_quantity#90, ss_wholesale_cost#91, ss_list_price#92, ss_coupon_amt#93, ss_sold_date_sk#94] +Output [1]: [ss_list_price#78] +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] (63) HashAggregate [codegen id : 15] -Input [1]: [ss_list_price#92] -Keys [1]: [ss_list_price#92] -Functions [2]: [partial_avg(UnscaledValue(ss_list_price#92)), partial_count(ss_list_price#92)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#92))#95, count(ss_list_price#92)#96] -Results [4]: [ss_list_price#92, sum#97, count#98, count#99] +Input [1]: [ss_list_price#78] +Keys [1]: [ss_list_price#78] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#78)), partial_count(ss_list_price#78)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82] +Results [4]: [ss_list_price#78, sum#83, count#84, count#85] (64) Exchange -Input [4]: [ss_list_price#92, sum#97, count#98, count#99] -Arguments: hashpartitioning(ss_list_price#92, 5), ENSURE_REQUIREMENTS, [id=#100] +Input [4]: [ss_list_price#78, sum#83, count#84, count#85] +Arguments: hashpartitioning(ss_list_price#78, 5), ENSURE_REQUIREMENTS, [plan_id=15] (65) HashAggregate [codegen id : 16] -Input [4]: [ss_list_price#92, sum#97, count#98, count#99] -Keys [1]: [ss_list_price#92] -Functions [2]: [merge_avg(UnscaledValue(ss_list_price#92)), merge_count(ss_list_price#92)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#92))#95, count(ss_list_price#92)#96] -Results [4]: [ss_list_price#92, sum#97, count#98, count#99] +Input [4]: [ss_list_price#78, sum#83, count#84, count#85] +Keys [1]: [ss_list_price#78] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#78)), merge_count(ss_list_price#78)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82] +Results [4]: [ss_list_price#78, sum#83, count#84, count#85] (66) HashAggregate [codegen id : 16] -Input [4]: [ss_list_price#92, sum#97, count#98, count#99] +Input [4]: [ss_list_price#78, sum#83, count#84, count#85] Keys: [] -Functions [3]: [merge_avg(UnscaledValue(ss_list_price#92)), merge_count(ss_list_price#92), partial_count(distinct ss_list_price#92)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#92))#95, count(ss_list_price#92)#96, count(ss_list_price#92)#101] -Results [4]: [sum#97, count#98, count#99, count#102] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#78)), merge_count(ss_list_price#78), partial_count(distinct ss_list_price#78)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82, count(ss_list_price#78)#86] +Results [4]: [sum#83, count#84, count#85, count#87] (67) Exchange -Input [4]: [sum#97, count#98, count#99, count#102] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#103] +Input [4]: [sum#83, count#84, count#85, count#87] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=16] (68) HashAggregate [codegen id : 17] -Input [4]: [sum#97, count#98, count#99, count#102] +Input [4]: [sum#83, count#84, count#85, count#87] Keys: [] -Functions [3]: [avg(UnscaledValue(ss_list_price#92)), count(ss_list_price#92), count(distinct ss_list_price#92)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#92))#95, count(ss_list_price#92)#96, count(ss_list_price#92)#101] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#92))#95 / 100.0) as decimal(11,6)) AS B6_LP#104, count(ss_list_price#92)#96 AS B6_CNT#105, count(ss_list_price#92)#101 AS B6_CNTD#106] +Functions [3]: [avg(UnscaledValue(ss_list_price#78)), count(ss_list_price#78), count(distinct ss_list_price#78)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82, count(ss_list_price#78)#86] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#78))#81 / 100.0) as decimal(11,6)) AS B6_LP#88, count(ss_list_price#78)#82 AS B6_CNT#89, count(ss_list_price#78)#86 AS B6_CNTD#90] (69) BroadcastExchange -Input [3]: [B6_LP#104, B6_CNT#105, B6_CNTD#106] -Arguments: IdentityBroadcastMode, [id=#107] +Input [3]: [B6_LP#88, B6_CNT#89, B6_CNTD#90] +Arguments: IdentityBroadcastMode, [plan_id=17] (70) BroadcastNestedLoopJoin [codegen id : 18] Join condition: None diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt index eec45ea549531..6c140d3d95a9d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt @@ -98,7 +98,7 @@ Results [4]: [ss_list_price#3, sum#8, count#9, count#10] (6) Exchange Input [4]: [ss_list_price#3, sum#8, count#9, count#10] -Arguments: hashpartitioning(ss_list_price#3, 5), ENSURE_REQUIREMENTS, [id=#11] +Arguments: hashpartitioning(ss_list_price#3, 5), ENSURE_REQUIREMENTS, [plan_id=1] (7) HashAggregate [codegen id : 2] Input [4]: [ss_list_price#3, sum#8, count#9, count#10] @@ -111,321 +111,321 @@ Results [4]: [ss_list_price#3, sum#8, count#9, count#10] Input [4]: [ss_list_price#3, sum#8, count#9, count#10] Keys: [] Functions [3]: [merge_avg(UnscaledValue(ss_list_price#3)), merge_count(ss_list_price#3), partial_count(distinct ss_list_price#3)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7, count(ss_list_price#3)#12] -Results [4]: [sum#8, count#9, count#10, count#13] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7, count(ss_list_price#3)#11] +Results [4]: [sum#8, count#9, count#10, count#12] (9) Exchange -Input [4]: [sum#8, count#9, count#10, count#13] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#14] +Input [4]: [sum#8, count#9, count#10, count#12] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] (10) HashAggregate [codegen id : 18] -Input [4]: [sum#8, count#9, count#10, count#13] +Input [4]: [sum#8, count#9, count#10, count#12] Keys: [] Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), count(distinct ss_list_price#3)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7, count(ss_list_price#3)#12] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#6 / 100.0) as decimal(11,6)) AS B1_LP#15, count(ss_list_price#3)#7 AS B1_CNT#16, count(ss_list_price#3)#12 AS B1_CNTD#17] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7, count(ss_list_price#3)#11] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#6 / 100.0) as decimal(11,6)) AS B1_LP#13, count(ss_list_price#3)#7 AS B1_CNT#14, count(ss_list_price#3)#11 AS B1_CNTD#15] (11) Scan parquet default.store_sales -Output [5]: [ss_quantity#18, ss_wholesale_cost#19, ss_list_price#20, ss_coupon_amt#21, ss_sold_date_sk#22] +Output [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,6), LessThanOrEqual(ss_quantity,10), Or(Or(And(GreaterThanOrEqual(ss_list_price,90.00),LessThanOrEqual(ss_list_price,100.00)),And(GreaterThanOrEqual(ss_coupon_amt,2323.00),LessThanOrEqual(ss_coupon_amt,3323.00))),And(GreaterThanOrEqual(ss_wholesale_cost,31.00),LessThanOrEqual(ss_wholesale_cost,51.00)))] ReadSchema: struct (12) ColumnarToRow [codegen id : 3] -Input [5]: [ss_quantity#18, ss_wholesale_cost#19, ss_list_price#20, ss_coupon_amt#21, ss_sold_date_sk#22] +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] (13) Filter [codegen id : 3] -Input [5]: [ss_quantity#18, ss_wholesale_cost#19, ss_list_price#20, ss_coupon_amt#21, ss_sold_date_sk#22] -Condition : (((isnotnull(ss_quantity#18) AND (ss_quantity#18 >= 6)) AND (ss_quantity#18 <= 10)) AND ((((ss_list_price#20 >= 90.00) AND (ss_list_price#20 <= 100.00)) OR ((ss_coupon_amt#21 >= 2323.00) AND (ss_coupon_amt#21 <= 3323.00))) OR ((ss_wholesale_cost#19 >= 31.00) AND (ss_wholesale_cost#19 <= 51.00)))) +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] +Condition : (((isnotnull(ss_quantity#16) AND (ss_quantity#16 >= 6)) AND (ss_quantity#16 <= 10)) AND ((((ss_list_price#18 >= 90.00) AND (ss_list_price#18 <= 100.00)) OR ((ss_coupon_amt#19 >= 2323.00) AND (ss_coupon_amt#19 <= 3323.00))) OR ((ss_wholesale_cost#17 >= 31.00) AND (ss_wholesale_cost#17 <= 51.00)))) (14) Project [codegen id : 3] -Output [1]: [ss_list_price#20] -Input [5]: [ss_quantity#18, ss_wholesale_cost#19, ss_list_price#20, ss_coupon_amt#21, ss_sold_date_sk#22] +Output [1]: [ss_list_price#18] +Input [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20] (15) HashAggregate [codegen id : 3] -Input [1]: [ss_list_price#20] -Keys [1]: [ss_list_price#20] -Functions [2]: [partial_avg(UnscaledValue(ss_list_price#20)), partial_count(ss_list_price#20)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#20))#23, count(ss_list_price#20)#24] -Results [4]: [ss_list_price#20, sum#25, count#26, count#27] +Input [1]: [ss_list_price#18] +Keys [1]: [ss_list_price#18] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#18)), partial_count(ss_list_price#18)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22] +Results [4]: [ss_list_price#18, sum#23, count#24, count#25] (16) Exchange -Input [4]: [ss_list_price#20, sum#25, count#26, count#27] -Arguments: hashpartitioning(ss_list_price#20, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [4]: [ss_list_price#18, sum#23, count#24, count#25] +Arguments: hashpartitioning(ss_list_price#18, 5), ENSURE_REQUIREMENTS, [plan_id=3] (17) HashAggregate [codegen id : 4] -Input [4]: [ss_list_price#20, sum#25, count#26, count#27] -Keys [1]: [ss_list_price#20] -Functions [2]: [merge_avg(UnscaledValue(ss_list_price#20)), merge_count(ss_list_price#20)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#20))#23, count(ss_list_price#20)#24] -Results [4]: [ss_list_price#20, sum#25, count#26, count#27] +Input [4]: [ss_list_price#18, sum#23, count#24, count#25] +Keys [1]: [ss_list_price#18] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#18)), merge_count(ss_list_price#18)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22] +Results [4]: [ss_list_price#18, sum#23, count#24, count#25] (18) HashAggregate [codegen id : 4] -Input [4]: [ss_list_price#20, sum#25, count#26, count#27] +Input [4]: [ss_list_price#18, sum#23, count#24, count#25] Keys: [] -Functions [3]: [merge_avg(UnscaledValue(ss_list_price#20)), merge_count(ss_list_price#20), partial_count(distinct ss_list_price#20)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#20))#23, count(ss_list_price#20)#24, count(ss_list_price#20)#29] -Results [4]: [sum#25, count#26, count#27, count#30] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#18)), merge_count(ss_list_price#18), partial_count(distinct ss_list_price#18)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22, count(ss_list_price#18)#26] +Results [4]: [sum#23, count#24, count#25, count#27] (19) Exchange -Input [4]: [sum#25, count#26, count#27, count#30] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#31] +Input [4]: [sum#23, count#24, count#25, count#27] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (20) HashAggregate [codegen id : 5] -Input [4]: [sum#25, count#26, count#27, count#30] +Input [4]: [sum#23, count#24, count#25, count#27] Keys: [] -Functions [3]: [avg(UnscaledValue(ss_list_price#20)), count(ss_list_price#20), count(distinct ss_list_price#20)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#20))#23, count(ss_list_price#20)#24, count(ss_list_price#20)#29] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#20))#23 / 100.0) as decimal(11,6)) AS B2_LP#32, count(ss_list_price#20)#24 AS B2_CNT#33, count(ss_list_price#20)#29 AS B2_CNTD#34] +Functions [3]: [avg(UnscaledValue(ss_list_price#18)), count(ss_list_price#18), count(distinct ss_list_price#18)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#18))#21, count(ss_list_price#18)#22, count(ss_list_price#18)#26] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#18))#21 / 100.0) as decimal(11,6)) AS B2_LP#28, count(ss_list_price#18)#22 AS B2_CNT#29, count(ss_list_price#18)#26 AS B2_CNTD#30] (21) BroadcastExchange -Input [3]: [B2_LP#32, B2_CNT#33, B2_CNTD#34] -Arguments: IdentityBroadcastMode, [id=#35] +Input [3]: [B2_LP#28, B2_CNT#29, B2_CNTD#30] +Arguments: IdentityBroadcastMode, [plan_id=5] (22) BroadcastNestedLoopJoin [codegen id : 18] Join condition: None (23) Scan parquet default.store_sales -Output [5]: [ss_quantity#36, ss_wholesale_cost#37, ss_list_price#38, ss_coupon_amt#39, ss_sold_date_sk#40] +Output [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,11), LessThanOrEqual(ss_quantity,15), Or(Or(And(GreaterThanOrEqual(ss_list_price,142.00),LessThanOrEqual(ss_list_price,152.00)),And(GreaterThanOrEqual(ss_coupon_amt,12214.00),LessThanOrEqual(ss_coupon_amt,13214.00))),And(GreaterThanOrEqual(ss_wholesale_cost,79.00),LessThanOrEqual(ss_wholesale_cost,99.00)))] ReadSchema: struct (24) ColumnarToRow [codegen id : 6] -Input [5]: [ss_quantity#36, ss_wholesale_cost#37, ss_list_price#38, ss_coupon_amt#39, ss_sold_date_sk#40] +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] (25) Filter [codegen id : 6] -Input [5]: [ss_quantity#36, ss_wholesale_cost#37, ss_list_price#38, ss_coupon_amt#39, ss_sold_date_sk#40] -Condition : (((isnotnull(ss_quantity#36) AND (ss_quantity#36 >= 11)) AND (ss_quantity#36 <= 15)) AND ((((ss_list_price#38 >= 142.00) AND (ss_list_price#38 <= 152.00)) OR ((ss_coupon_amt#39 >= 12214.00) AND (ss_coupon_amt#39 <= 13214.00))) OR ((ss_wholesale_cost#37 >= 79.00) AND (ss_wholesale_cost#37 <= 99.00)))) +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] +Condition : (((isnotnull(ss_quantity#31) AND (ss_quantity#31 >= 11)) AND (ss_quantity#31 <= 15)) AND ((((ss_list_price#33 >= 142.00) AND (ss_list_price#33 <= 152.00)) OR ((ss_coupon_amt#34 >= 12214.00) AND (ss_coupon_amt#34 <= 13214.00))) OR ((ss_wholesale_cost#32 >= 79.00) AND (ss_wholesale_cost#32 <= 99.00)))) (26) Project [codegen id : 6] -Output [1]: [ss_list_price#38] -Input [5]: [ss_quantity#36, ss_wholesale_cost#37, ss_list_price#38, ss_coupon_amt#39, ss_sold_date_sk#40] +Output [1]: [ss_list_price#33] +Input [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35] (27) HashAggregate [codegen id : 6] -Input [1]: [ss_list_price#38] -Keys [1]: [ss_list_price#38] -Functions [2]: [partial_avg(UnscaledValue(ss_list_price#38)), partial_count(ss_list_price#38)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#38))#41, count(ss_list_price#38)#42] -Results [4]: [ss_list_price#38, sum#43, count#44, count#45] +Input [1]: [ss_list_price#33] +Keys [1]: [ss_list_price#33] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#33)), partial_count(ss_list_price#33)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37] +Results [4]: [ss_list_price#33, sum#38, count#39, count#40] (28) Exchange -Input [4]: [ss_list_price#38, sum#43, count#44, count#45] -Arguments: hashpartitioning(ss_list_price#38, 5), ENSURE_REQUIREMENTS, [id=#46] +Input [4]: [ss_list_price#33, sum#38, count#39, count#40] +Arguments: hashpartitioning(ss_list_price#33, 5), ENSURE_REQUIREMENTS, [plan_id=6] (29) HashAggregate [codegen id : 7] -Input [4]: [ss_list_price#38, sum#43, count#44, count#45] -Keys [1]: [ss_list_price#38] -Functions [2]: [merge_avg(UnscaledValue(ss_list_price#38)), merge_count(ss_list_price#38)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#38))#41, count(ss_list_price#38)#42] -Results [4]: [ss_list_price#38, sum#43, count#44, count#45] +Input [4]: [ss_list_price#33, sum#38, count#39, count#40] +Keys [1]: [ss_list_price#33] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#33)), merge_count(ss_list_price#33)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37] +Results [4]: [ss_list_price#33, sum#38, count#39, count#40] (30) HashAggregate [codegen id : 7] -Input [4]: [ss_list_price#38, sum#43, count#44, count#45] +Input [4]: [ss_list_price#33, sum#38, count#39, count#40] Keys: [] -Functions [3]: [merge_avg(UnscaledValue(ss_list_price#38)), merge_count(ss_list_price#38), partial_count(distinct ss_list_price#38)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#38))#41, count(ss_list_price#38)#42, count(ss_list_price#38)#47] -Results [4]: [sum#43, count#44, count#45, count#48] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#33)), merge_count(ss_list_price#33), partial_count(distinct ss_list_price#33)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37, count(ss_list_price#33)#41] +Results [4]: [sum#38, count#39, count#40, count#42] (31) Exchange -Input [4]: [sum#43, count#44, count#45, count#48] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#49] +Input [4]: [sum#38, count#39, count#40, count#42] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (32) HashAggregate [codegen id : 8] -Input [4]: [sum#43, count#44, count#45, count#48] +Input [4]: [sum#38, count#39, count#40, count#42] Keys: [] -Functions [3]: [avg(UnscaledValue(ss_list_price#38)), count(ss_list_price#38), count(distinct ss_list_price#38)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#38))#41, count(ss_list_price#38)#42, count(ss_list_price#38)#47] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#38))#41 / 100.0) as decimal(11,6)) AS B3_LP#50, count(ss_list_price#38)#42 AS B3_CNT#51, count(ss_list_price#38)#47 AS B3_CNTD#52] +Functions [3]: [avg(UnscaledValue(ss_list_price#33)), count(ss_list_price#33), count(distinct ss_list_price#33)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#33))#36, count(ss_list_price#33)#37, count(ss_list_price#33)#41] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#33))#36 / 100.0) as decimal(11,6)) AS B3_LP#43, count(ss_list_price#33)#37 AS B3_CNT#44, count(ss_list_price#33)#41 AS B3_CNTD#45] (33) BroadcastExchange -Input [3]: [B3_LP#50, B3_CNT#51, B3_CNTD#52] -Arguments: IdentityBroadcastMode, [id=#53] +Input [3]: [B3_LP#43, B3_CNT#44, B3_CNTD#45] +Arguments: IdentityBroadcastMode, [plan_id=8] (34) BroadcastNestedLoopJoin [codegen id : 18] Join condition: None (35) Scan parquet default.store_sales -Output [5]: [ss_quantity#54, ss_wholesale_cost#55, ss_list_price#56, ss_coupon_amt#57, ss_sold_date_sk#58] +Output [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,16), LessThanOrEqual(ss_quantity,20), Or(Or(And(GreaterThanOrEqual(ss_list_price,135.00),LessThanOrEqual(ss_list_price,145.00)),And(GreaterThanOrEqual(ss_coupon_amt,6071.00),LessThanOrEqual(ss_coupon_amt,7071.00))),And(GreaterThanOrEqual(ss_wholesale_cost,38.00),LessThanOrEqual(ss_wholesale_cost,58.00)))] ReadSchema: struct (36) ColumnarToRow [codegen id : 9] -Input [5]: [ss_quantity#54, ss_wholesale_cost#55, ss_list_price#56, ss_coupon_amt#57, ss_sold_date_sk#58] +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] (37) Filter [codegen id : 9] -Input [5]: [ss_quantity#54, ss_wholesale_cost#55, ss_list_price#56, ss_coupon_amt#57, ss_sold_date_sk#58] -Condition : (((isnotnull(ss_quantity#54) AND (ss_quantity#54 >= 16)) AND (ss_quantity#54 <= 20)) AND ((((ss_list_price#56 >= 135.00) AND (ss_list_price#56 <= 145.00)) OR ((ss_coupon_amt#57 >= 6071.00) AND (ss_coupon_amt#57 <= 7071.00))) OR ((ss_wholesale_cost#55 >= 38.00) AND (ss_wholesale_cost#55 <= 58.00)))) +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] +Condition : (((isnotnull(ss_quantity#46) AND (ss_quantity#46 >= 16)) AND (ss_quantity#46 <= 20)) AND ((((ss_list_price#48 >= 135.00) AND (ss_list_price#48 <= 145.00)) OR ((ss_coupon_amt#49 >= 6071.00) AND (ss_coupon_amt#49 <= 7071.00))) OR ((ss_wholesale_cost#47 >= 38.00) AND (ss_wholesale_cost#47 <= 58.00)))) (38) Project [codegen id : 9] -Output [1]: [ss_list_price#56] -Input [5]: [ss_quantity#54, ss_wholesale_cost#55, ss_list_price#56, ss_coupon_amt#57, ss_sold_date_sk#58] +Output [1]: [ss_list_price#48] +Input [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50] (39) HashAggregate [codegen id : 9] -Input [1]: [ss_list_price#56] -Keys [1]: [ss_list_price#56] -Functions [2]: [partial_avg(UnscaledValue(ss_list_price#56)), partial_count(ss_list_price#56)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#56))#59, count(ss_list_price#56)#60] -Results [4]: [ss_list_price#56, sum#61, count#62, count#63] +Input [1]: [ss_list_price#48] +Keys [1]: [ss_list_price#48] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#48)), partial_count(ss_list_price#48)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52] +Results [4]: [ss_list_price#48, sum#53, count#54, count#55] (40) Exchange -Input [4]: [ss_list_price#56, sum#61, count#62, count#63] -Arguments: hashpartitioning(ss_list_price#56, 5), ENSURE_REQUIREMENTS, [id=#64] +Input [4]: [ss_list_price#48, sum#53, count#54, count#55] +Arguments: hashpartitioning(ss_list_price#48, 5), ENSURE_REQUIREMENTS, [plan_id=9] (41) HashAggregate [codegen id : 10] -Input [4]: [ss_list_price#56, sum#61, count#62, count#63] -Keys [1]: [ss_list_price#56] -Functions [2]: [merge_avg(UnscaledValue(ss_list_price#56)), merge_count(ss_list_price#56)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#56))#59, count(ss_list_price#56)#60] -Results [4]: [ss_list_price#56, sum#61, count#62, count#63] +Input [4]: [ss_list_price#48, sum#53, count#54, count#55] +Keys [1]: [ss_list_price#48] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#48)), merge_count(ss_list_price#48)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52] +Results [4]: [ss_list_price#48, sum#53, count#54, count#55] (42) HashAggregate [codegen id : 10] -Input [4]: [ss_list_price#56, sum#61, count#62, count#63] +Input [4]: [ss_list_price#48, sum#53, count#54, count#55] Keys: [] -Functions [3]: [merge_avg(UnscaledValue(ss_list_price#56)), merge_count(ss_list_price#56), partial_count(distinct ss_list_price#56)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#56))#59, count(ss_list_price#56)#60, count(ss_list_price#56)#65] -Results [4]: [sum#61, count#62, count#63, count#66] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#48)), merge_count(ss_list_price#48), partial_count(distinct ss_list_price#48)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52, count(ss_list_price#48)#56] +Results [4]: [sum#53, count#54, count#55, count#57] (43) Exchange -Input [4]: [sum#61, count#62, count#63, count#66] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#67] +Input [4]: [sum#53, count#54, count#55, count#57] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] (44) HashAggregate [codegen id : 11] -Input [4]: [sum#61, count#62, count#63, count#66] +Input [4]: [sum#53, count#54, count#55, count#57] Keys: [] -Functions [3]: [avg(UnscaledValue(ss_list_price#56)), count(ss_list_price#56), count(distinct ss_list_price#56)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#56))#59, count(ss_list_price#56)#60, count(ss_list_price#56)#65] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#56))#59 / 100.0) as decimal(11,6)) AS B4_LP#68, count(ss_list_price#56)#60 AS B4_CNT#69, count(ss_list_price#56)#65 AS B4_CNTD#70] +Functions [3]: [avg(UnscaledValue(ss_list_price#48)), count(ss_list_price#48), count(distinct ss_list_price#48)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#48))#51, count(ss_list_price#48)#52, count(ss_list_price#48)#56] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#48))#51 / 100.0) as decimal(11,6)) AS B4_LP#58, count(ss_list_price#48)#52 AS B4_CNT#59, count(ss_list_price#48)#56 AS B4_CNTD#60] (45) BroadcastExchange -Input [3]: [B4_LP#68, B4_CNT#69, B4_CNTD#70] -Arguments: IdentityBroadcastMode, [id=#71] +Input [3]: [B4_LP#58, B4_CNT#59, B4_CNTD#60] +Arguments: IdentityBroadcastMode, [plan_id=11] (46) BroadcastNestedLoopJoin [codegen id : 18] Join condition: None (47) Scan parquet default.store_sales -Output [5]: [ss_quantity#72, ss_wholesale_cost#73, ss_list_price#74, ss_coupon_amt#75, ss_sold_date_sk#76] +Output [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,25), Or(Or(And(GreaterThanOrEqual(ss_list_price,122.00),LessThanOrEqual(ss_list_price,132.00)),And(GreaterThanOrEqual(ss_coupon_amt,836.00),LessThanOrEqual(ss_coupon_amt,1836.00))),And(GreaterThanOrEqual(ss_wholesale_cost,17.00),LessThanOrEqual(ss_wholesale_cost,37.00)))] ReadSchema: struct (48) ColumnarToRow [codegen id : 12] -Input [5]: [ss_quantity#72, ss_wholesale_cost#73, ss_list_price#74, ss_coupon_amt#75, ss_sold_date_sk#76] +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] (49) Filter [codegen id : 12] -Input [5]: [ss_quantity#72, ss_wholesale_cost#73, ss_list_price#74, ss_coupon_amt#75, ss_sold_date_sk#76] -Condition : (((isnotnull(ss_quantity#72) AND (ss_quantity#72 >= 21)) AND (ss_quantity#72 <= 25)) AND ((((ss_list_price#74 >= 122.00) AND (ss_list_price#74 <= 132.00)) OR ((ss_coupon_amt#75 >= 836.00) AND (ss_coupon_amt#75 <= 1836.00))) OR ((ss_wholesale_cost#73 >= 17.00) AND (ss_wholesale_cost#73 <= 37.00)))) +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] +Condition : (((isnotnull(ss_quantity#61) AND (ss_quantity#61 >= 21)) AND (ss_quantity#61 <= 25)) AND ((((ss_list_price#63 >= 122.00) AND (ss_list_price#63 <= 132.00)) OR ((ss_coupon_amt#64 >= 836.00) AND (ss_coupon_amt#64 <= 1836.00))) OR ((ss_wholesale_cost#62 >= 17.00) AND (ss_wholesale_cost#62 <= 37.00)))) (50) Project [codegen id : 12] -Output [1]: [ss_list_price#74] -Input [5]: [ss_quantity#72, ss_wholesale_cost#73, ss_list_price#74, ss_coupon_amt#75, ss_sold_date_sk#76] +Output [1]: [ss_list_price#63] +Input [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65] (51) HashAggregate [codegen id : 12] -Input [1]: [ss_list_price#74] -Keys [1]: [ss_list_price#74] -Functions [2]: [partial_avg(UnscaledValue(ss_list_price#74)), partial_count(ss_list_price#74)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#74))#77, count(ss_list_price#74)#78] -Results [4]: [ss_list_price#74, sum#79, count#80, count#81] +Input [1]: [ss_list_price#63] +Keys [1]: [ss_list_price#63] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#63)), partial_count(ss_list_price#63)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67] +Results [4]: [ss_list_price#63, sum#68, count#69, count#70] (52) Exchange -Input [4]: [ss_list_price#74, sum#79, count#80, count#81] -Arguments: hashpartitioning(ss_list_price#74, 5), ENSURE_REQUIREMENTS, [id=#82] +Input [4]: [ss_list_price#63, sum#68, count#69, count#70] +Arguments: hashpartitioning(ss_list_price#63, 5), ENSURE_REQUIREMENTS, [plan_id=12] (53) HashAggregate [codegen id : 13] -Input [4]: [ss_list_price#74, sum#79, count#80, count#81] -Keys [1]: [ss_list_price#74] -Functions [2]: [merge_avg(UnscaledValue(ss_list_price#74)), merge_count(ss_list_price#74)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#74))#77, count(ss_list_price#74)#78] -Results [4]: [ss_list_price#74, sum#79, count#80, count#81] +Input [4]: [ss_list_price#63, sum#68, count#69, count#70] +Keys [1]: [ss_list_price#63] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#63)), merge_count(ss_list_price#63)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67] +Results [4]: [ss_list_price#63, sum#68, count#69, count#70] (54) HashAggregate [codegen id : 13] -Input [4]: [ss_list_price#74, sum#79, count#80, count#81] +Input [4]: [ss_list_price#63, sum#68, count#69, count#70] Keys: [] -Functions [3]: [merge_avg(UnscaledValue(ss_list_price#74)), merge_count(ss_list_price#74), partial_count(distinct ss_list_price#74)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#74))#77, count(ss_list_price#74)#78, count(ss_list_price#74)#83] -Results [4]: [sum#79, count#80, count#81, count#84] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#63)), merge_count(ss_list_price#63), partial_count(distinct ss_list_price#63)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67, count(ss_list_price#63)#71] +Results [4]: [sum#68, count#69, count#70, count#72] (55) Exchange -Input [4]: [sum#79, count#80, count#81, count#84] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#85] +Input [4]: [sum#68, count#69, count#70, count#72] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=13] (56) HashAggregate [codegen id : 14] -Input [4]: [sum#79, count#80, count#81, count#84] +Input [4]: [sum#68, count#69, count#70, count#72] Keys: [] -Functions [3]: [avg(UnscaledValue(ss_list_price#74)), count(ss_list_price#74), count(distinct ss_list_price#74)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#74))#77, count(ss_list_price#74)#78, count(ss_list_price#74)#83] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#74))#77 / 100.0) as decimal(11,6)) AS B5_LP#86, count(ss_list_price#74)#78 AS B5_CNT#87, count(ss_list_price#74)#83 AS B5_CNTD#88] +Functions [3]: [avg(UnscaledValue(ss_list_price#63)), count(ss_list_price#63), count(distinct ss_list_price#63)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#63))#66, count(ss_list_price#63)#67, count(ss_list_price#63)#71] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#63))#66 / 100.0) as decimal(11,6)) AS B5_LP#73, count(ss_list_price#63)#67 AS B5_CNT#74, count(ss_list_price#63)#71 AS B5_CNTD#75] (57) BroadcastExchange -Input [3]: [B5_LP#86, B5_CNT#87, B5_CNTD#88] -Arguments: IdentityBroadcastMode, [id=#89] +Input [3]: [B5_LP#73, B5_CNT#74, B5_CNTD#75] +Arguments: IdentityBroadcastMode, [plan_id=14] (58) BroadcastNestedLoopJoin [codegen id : 18] Join condition: None (59) Scan parquet default.store_sales -Output [5]: [ss_quantity#90, ss_wholesale_cost#91, ss_list_price#92, ss_coupon_amt#93, ss_sold_date_sk#94] +Output [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,26), LessThanOrEqual(ss_quantity,30), Or(Or(And(GreaterThanOrEqual(ss_list_price,154.00),LessThanOrEqual(ss_list_price,164.00)),And(GreaterThanOrEqual(ss_coupon_amt,7326.00),LessThanOrEqual(ss_coupon_amt,8326.00))),And(GreaterThanOrEqual(ss_wholesale_cost,7.00),LessThanOrEqual(ss_wholesale_cost,27.00)))] ReadSchema: struct (60) ColumnarToRow [codegen id : 15] -Input [5]: [ss_quantity#90, ss_wholesale_cost#91, ss_list_price#92, ss_coupon_amt#93, ss_sold_date_sk#94] +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] (61) Filter [codegen id : 15] -Input [5]: [ss_quantity#90, ss_wholesale_cost#91, ss_list_price#92, ss_coupon_amt#93, ss_sold_date_sk#94] -Condition : (((isnotnull(ss_quantity#90) AND (ss_quantity#90 >= 26)) AND (ss_quantity#90 <= 30)) AND ((((ss_list_price#92 >= 154.00) AND (ss_list_price#92 <= 164.00)) OR ((ss_coupon_amt#93 >= 7326.00) AND (ss_coupon_amt#93 <= 8326.00))) OR ((ss_wholesale_cost#91 >= 7.00) AND (ss_wholesale_cost#91 <= 27.00)))) +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] +Condition : (((isnotnull(ss_quantity#76) AND (ss_quantity#76 >= 26)) AND (ss_quantity#76 <= 30)) AND ((((ss_list_price#78 >= 154.00) AND (ss_list_price#78 <= 164.00)) OR ((ss_coupon_amt#79 >= 7326.00) AND (ss_coupon_amt#79 <= 8326.00))) OR ((ss_wholesale_cost#77 >= 7.00) AND (ss_wholesale_cost#77 <= 27.00)))) (62) Project [codegen id : 15] -Output [1]: [ss_list_price#92] -Input [5]: [ss_quantity#90, ss_wholesale_cost#91, ss_list_price#92, ss_coupon_amt#93, ss_sold_date_sk#94] +Output [1]: [ss_list_price#78] +Input [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80] (63) HashAggregate [codegen id : 15] -Input [1]: [ss_list_price#92] -Keys [1]: [ss_list_price#92] -Functions [2]: [partial_avg(UnscaledValue(ss_list_price#92)), partial_count(ss_list_price#92)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#92))#95, count(ss_list_price#92)#96] -Results [4]: [ss_list_price#92, sum#97, count#98, count#99] +Input [1]: [ss_list_price#78] +Keys [1]: [ss_list_price#78] +Functions [2]: [partial_avg(UnscaledValue(ss_list_price#78)), partial_count(ss_list_price#78)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82] +Results [4]: [ss_list_price#78, sum#83, count#84, count#85] (64) Exchange -Input [4]: [ss_list_price#92, sum#97, count#98, count#99] -Arguments: hashpartitioning(ss_list_price#92, 5), ENSURE_REQUIREMENTS, [id=#100] +Input [4]: [ss_list_price#78, sum#83, count#84, count#85] +Arguments: hashpartitioning(ss_list_price#78, 5), ENSURE_REQUIREMENTS, [plan_id=15] (65) HashAggregate [codegen id : 16] -Input [4]: [ss_list_price#92, sum#97, count#98, count#99] -Keys [1]: [ss_list_price#92] -Functions [2]: [merge_avg(UnscaledValue(ss_list_price#92)), merge_count(ss_list_price#92)] -Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#92))#95, count(ss_list_price#92)#96] -Results [4]: [ss_list_price#92, sum#97, count#98, count#99] +Input [4]: [ss_list_price#78, sum#83, count#84, count#85] +Keys [1]: [ss_list_price#78] +Functions [2]: [merge_avg(UnscaledValue(ss_list_price#78)), merge_count(ss_list_price#78)] +Aggregate Attributes [2]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82] +Results [4]: [ss_list_price#78, sum#83, count#84, count#85] (66) HashAggregate [codegen id : 16] -Input [4]: [ss_list_price#92, sum#97, count#98, count#99] +Input [4]: [ss_list_price#78, sum#83, count#84, count#85] Keys: [] -Functions [3]: [merge_avg(UnscaledValue(ss_list_price#92)), merge_count(ss_list_price#92), partial_count(distinct ss_list_price#92)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#92))#95, count(ss_list_price#92)#96, count(ss_list_price#92)#101] -Results [4]: [sum#97, count#98, count#99, count#102] +Functions [3]: [merge_avg(UnscaledValue(ss_list_price#78)), merge_count(ss_list_price#78), partial_count(distinct ss_list_price#78)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82, count(ss_list_price#78)#86] +Results [4]: [sum#83, count#84, count#85, count#87] (67) Exchange -Input [4]: [sum#97, count#98, count#99, count#102] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#103] +Input [4]: [sum#83, count#84, count#85, count#87] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=16] (68) HashAggregate [codegen id : 17] -Input [4]: [sum#97, count#98, count#99, count#102] +Input [4]: [sum#83, count#84, count#85, count#87] Keys: [] -Functions [3]: [avg(UnscaledValue(ss_list_price#92)), count(ss_list_price#92), count(distinct ss_list_price#92)] -Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#92))#95, count(ss_list_price#92)#96, count(ss_list_price#92)#101] -Results [3]: [cast((avg(UnscaledValue(ss_list_price#92))#95 / 100.0) as decimal(11,6)) AS B6_LP#104, count(ss_list_price#92)#96 AS B6_CNT#105, count(ss_list_price#92)#101 AS B6_CNTD#106] +Functions [3]: [avg(UnscaledValue(ss_list_price#78)), count(ss_list_price#78), count(distinct ss_list_price#78)] +Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#78))#81, count(ss_list_price#78)#82, count(ss_list_price#78)#86] +Results [3]: [cast((avg(UnscaledValue(ss_list_price#78))#81 / 100.0) as decimal(11,6)) AS B6_LP#88, count(ss_list_price#78)#82 AS B6_CNT#89, count(ss_list_price#78)#86 AS B6_CNTD#90] (69) BroadcastExchange -Input [3]: [B6_LP#104, B6_CNT#105, B6_CNTD#106] -Arguments: IdentityBroadcastMode, [id=#107] +Input [3]: [B6_LP#88, B6_CNT#89, B6_CNTD#90] +Arguments: IdentityBroadcastMode, [plan_id=17] (70) BroadcastNestedLoopJoin [codegen id : 18] Join condition: None diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt index 221439075d24d..779cf2e924e9d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt @@ -93,7 +93,7 @@ Condition : isnotnull(s_store_sk#9) (10) BroadcastExchange Input [3]: [s_store_sk#9, s_store_id#10, s_store_name#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_store_sk#3] @@ -106,168 +106,168 @@ Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s (13) Exchange Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#13] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (14) Sort [codegen id : 4] Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (15) Scan parquet default.item -Output [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16] +Output [3]: [i_item_sk#12, i_item_id#13, i_item_desc#14] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (16) ColumnarToRow [codegen id : 5] -Input [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16] +Input [3]: [i_item_sk#12, i_item_id#13, i_item_desc#14] (17) Filter [codegen id : 5] -Input [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16] -Condition : isnotnull(i_item_sk#14) +Input [3]: [i_item_sk#12, i_item_id#13, i_item_desc#14] +Condition : isnotnull(i_item_sk#12) (18) Exchange -Input [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16] -Arguments: hashpartitioning(i_item_sk#14, 5), ENSURE_REQUIREMENTS, [id=#17] +Input [3]: [i_item_sk#12, i_item_id#13, i_item_desc#14] +Arguments: hashpartitioning(i_item_sk#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) Sort [codegen id : 6] -Input [3]: [i_item_sk#14, i_item_id#15, i_item_desc#16] -Arguments: [i_item_sk#14 ASC NULLS FIRST], false, 0 +Input [3]: [i_item_sk#12, i_item_id#13, i_item_desc#14] +Arguments: [i_item_sk#12 ASC NULLS FIRST], false, 0 (20) SortMergeJoin [codegen id : 7] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#14] +Right keys [1]: [i_item_sk#12] Join condition: None (21) Project [codegen id : 7] -Output [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16] -Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11, i_item_sk#14, i_item_id#15, i_item_desc#16] +Output [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11, i_item_sk#12, i_item_id#13, i_item_desc#14] (22) Exchange -Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16] -Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14] +Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [plan_id=4] (23) Sort [codegen id : 8] -Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14] Arguments: [ss_customer_sk#2 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0 (24) Scan parquet default.store_returns -Output [5]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_return_quantity#22, sr_returned_date_sk#23] +Output [5]: [sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_return_quantity#18, sr_returned_date_sk#19] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(sr_returned_date_sk#23), dynamicpruningexpression(sr_returned_date_sk#23 IN dynamicpruning#24)] +PartitionFilters: [isnotnull(sr_returned_date_sk#19), dynamicpruningexpression(sr_returned_date_sk#19 IN dynamicpruning#20)] PushedFilters: [IsNotNull(sr_customer_sk), IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] ReadSchema: struct (25) ColumnarToRow [codegen id : 10] -Input [5]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_return_quantity#22, sr_returned_date_sk#23] +Input [5]: [sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_return_quantity#18, sr_returned_date_sk#19] (26) Filter [codegen id : 10] -Input [5]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_return_quantity#22, sr_returned_date_sk#23] -Condition : ((isnotnull(sr_customer_sk#20) AND isnotnull(sr_item_sk#19)) AND isnotnull(sr_ticket_number#21)) +Input [5]: [sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_return_quantity#18, sr_returned_date_sk#19] +Condition : ((isnotnull(sr_customer_sk#16) AND isnotnull(sr_item_sk#15)) AND isnotnull(sr_ticket_number#17)) (27) ReusedExchange [Reuses operator id: 59] -Output [1]: [d_date_sk#25] +Output [1]: [d_date_sk#21] (28) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [sr_returned_date_sk#23] -Right keys [1]: [d_date_sk#25] +Left keys [1]: [sr_returned_date_sk#19] +Right keys [1]: [d_date_sk#21] Join condition: None (29) Project [codegen id : 10] -Output [4]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_return_quantity#22] -Input [6]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_return_quantity#22, sr_returned_date_sk#23, d_date_sk#25] +Output [4]: [sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_return_quantity#18] +Input [6]: [sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_return_quantity#18, sr_returned_date_sk#19, d_date_sk#21] (30) Exchange -Input [4]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_return_quantity#22] -Arguments: hashpartitioning(sr_customer_sk#20, sr_item_sk#19, sr_ticket_number#21, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [4]: [sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_return_quantity#18] +Arguments: hashpartitioning(sr_customer_sk#16, sr_item_sk#15, sr_ticket_number#17, 5), ENSURE_REQUIREMENTS, [plan_id=5] (31) Sort [codegen id : 11] -Input [4]: [sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_return_quantity#22] -Arguments: [sr_customer_sk#20 ASC NULLS FIRST, sr_item_sk#19 ASC NULLS FIRST, sr_ticket_number#21 ASC NULLS FIRST], false, 0 +Input [4]: [sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_return_quantity#18] +Arguments: [sr_customer_sk#16 ASC NULLS FIRST, sr_item_sk#15 ASC NULLS FIRST, sr_ticket_number#17 ASC NULLS FIRST], false, 0 (32) SortMergeJoin [codegen id : 12] Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4] -Right keys [3]: [sr_customer_sk#20, sr_item_sk#19, sr_ticket_number#21] +Right keys [3]: [sr_customer_sk#16, sr_item_sk#15, sr_ticket_number#17] Join condition: None (33) Project [codegen id : 12] -Output [8]: [ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_return_quantity#22] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_ticket_number#21, sr_return_quantity#22] +Output [8]: [ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14, sr_item_sk#15, sr_customer_sk#16, sr_return_quantity#18] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14, sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_return_quantity#18] (34) Exchange -Input [8]: [ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_return_quantity#22] -Arguments: hashpartitioning(sr_customer_sk#20, sr_item_sk#19, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [8]: [ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14, sr_item_sk#15, sr_customer_sk#16, sr_return_quantity#18] +Arguments: hashpartitioning(sr_customer_sk#16, sr_item_sk#15, 5), ENSURE_REQUIREMENTS, [plan_id=6] (35) Sort [codegen id : 13] -Input [8]: [ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_return_quantity#22] -Arguments: [sr_customer_sk#20 ASC NULLS FIRST, sr_item_sk#19 ASC NULLS FIRST], false, 0 +Input [8]: [ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14, sr_item_sk#15, sr_customer_sk#16, sr_return_quantity#18] +Arguments: [sr_customer_sk#16 ASC NULLS FIRST, sr_item_sk#15 ASC NULLS FIRST], false, 0 (36) Scan parquet default.catalog_sales -Output [4]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_quantity#30, cs_sold_date_sk#31] +Output [4]: [cs_bill_customer_sk#22, cs_item_sk#23, cs_quantity#24, cs_sold_date_sk#25] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#31), dynamicpruningexpression(cs_sold_date_sk#31 IN dynamicpruning#32)] +PartitionFilters: [isnotnull(cs_sold_date_sk#25), dynamicpruningexpression(cs_sold_date_sk#25 IN dynamicpruning#26)] PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] ReadSchema: struct (37) ColumnarToRow [codegen id : 15] -Input [4]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_quantity#30, cs_sold_date_sk#31] +Input [4]: [cs_bill_customer_sk#22, cs_item_sk#23, cs_quantity#24, cs_sold_date_sk#25] (38) Filter [codegen id : 15] -Input [4]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_quantity#30, cs_sold_date_sk#31] -Condition : (isnotnull(cs_bill_customer_sk#28) AND isnotnull(cs_item_sk#29)) +Input [4]: [cs_bill_customer_sk#22, cs_item_sk#23, cs_quantity#24, cs_sold_date_sk#25] +Condition : (isnotnull(cs_bill_customer_sk#22) AND isnotnull(cs_item_sk#23)) (39) ReusedExchange [Reuses operator id: 64] -Output [1]: [d_date_sk#33] +Output [1]: [d_date_sk#27] (40) BroadcastHashJoin [codegen id : 15] -Left keys [1]: [cs_sold_date_sk#31] -Right keys [1]: [d_date_sk#33] +Left keys [1]: [cs_sold_date_sk#25] +Right keys [1]: [d_date_sk#27] Join condition: None (41) Project [codegen id : 15] -Output [3]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_quantity#30] -Input [5]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_quantity#30, cs_sold_date_sk#31, d_date_sk#33] +Output [3]: [cs_bill_customer_sk#22, cs_item_sk#23, cs_quantity#24] +Input [5]: [cs_bill_customer_sk#22, cs_item_sk#23, cs_quantity#24, cs_sold_date_sk#25, d_date_sk#27] (42) Exchange -Input [3]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_quantity#30] -Arguments: hashpartitioning(cs_bill_customer_sk#28, cs_item_sk#29, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [3]: [cs_bill_customer_sk#22, cs_item_sk#23, cs_quantity#24] +Arguments: hashpartitioning(cs_bill_customer_sk#22, cs_item_sk#23, 5), ENSURE_REQUIREMENTS, [plan_id=7] (43) Sort [codegen id : 16] -Input [3]: [cs_bill_customer_sk#28, cs_item_sk#29, cs_quantity#30] -Arguments: [cs_bill_customer_sk#28 ASC NULLS FIRST, cs_item_sk#29 ASC NULLS FIRST], false, 0 +Input [3]: [cs_bill_customer_sk#22, cs_item_sk#23, cs_quantity#24] +Arguments: [cs_bill_customer_sk#22 ASC NULLS FIRST, cs_item_sk#23 ASC NULLS FIRST], false, 0 (44) SortMergeJoin [codegen id : 17] -Left keys [2]: [sr_customer_sk#20, sr_item_sk#19] -Right keys [2]: [cs_bill_customer_sk#28, cs_item_sk#29] +Left keys [2]: [sr_customer_sk#16, sr_item_sk#15] +Right keys [2]: [cs_bill_customer_sk#22, cs_item_sk#23] Join condition: None (45) Project [codegen id : 17] -Output [7]: [ss_quantity#5, sr_return_quantity#22, cs_quantity#30, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16] -Input [11]: [ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16, sr_item_sk#19, sr_customer_sk#20, sr_return_quantity#22, cs_bill_customer_sk#28, cs_item_sk#29, cs_quantity#30] +Output [7]: [ss_quantity#5, sr_return_quantity#18, cs_quantity#24, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14] +Input [11]: [ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14, sr_item_sk#15, sr_customer_sk#16, sr_return_quantity#18, cs_bill_customer_sk#22, cs_item_sk#23, cs_quantity#24] (46) HashAggregate [codegen id : 17] -Input [7]: [ss_quantity#5, sr_return_quantity#22, cs_quantity#30, s_store_id#10, s_store_name#11, i_item_id#15, i_item_desc#16] -Keys [4]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11] -Functions [3]: [partial_sum(ss_quantity#5), partial_sum(sr_return_quantity#22), partial_sum(cs_quantity#30)] -Aggregate Attributes [3]: [sum#35, sum#36, sum#37] -Results [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#38, sum#39, sum#40] +Input [7]: [ss_quantity#5, sr_return_quantity#18, cs_quantity#24, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14] +Keys [4]: [i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11] +Functions [3]: [partial_sum(ss_quantity#5), partial_sum(sr_return_quantity#18), partial_sum(cs_quantity#24)] +Aggregate Attributes [3]: [sum#28, sum#29, sum#30] +Results [7]: [i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11, sum#31, sum#32, sum#33] (47) Exchange -Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#38, sum#39, sum#40] -Arguments: hashpartitioning(i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, 5), ENSURE_REQUIREMENTS, [id=#41] +Input [7]: [i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11, sum#31, sum#32, sum#33] +Arguments: hashpartitioning(i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=8] (48) HashAggregate [codegen id : 18] -Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum#38, sum#39, sum#40] -Keys [4]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11] -Functions [3]: [sum(ss_quantity#5), sum(sr_return_quantity#22), sum(cs_quantity#30)] -Aggregate Attributes [3]: [sum(ss_quantity#5)#42, sum(sr_return_quantity#22)#43, sum(cs_quantity#30)#44] -Results [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, sum(ss_quantity#5)#42 AS store_sales_quantity#45, sum(sr_return_quantity#22)#43 AS store_returns_quantity#46, sum(cs_quantity#30)#44 AS catalog_sales_quantity#47] +Input [7]: [i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11, sum#31, sum#32, sum#33] +Keys [4]: [i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11] +Functions [3]: [sum(ss_quantity#5), sum(sr_return_quantity#18), sum(cs_quantity#24)] +Aggregate Attributes [3]: [sum(ss_quantity#5)#34, sum(sr_return_quantity#18)#35, sum(cs_quantity#24)#36] +Results [7]: [i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11, sum(ss_quantity#5)#34 AS store_sales_quantity#37, sum(sr_return_quantity#18)#35 AS store_returns_quantity#38, sum(cs_quantity#24)#36 AS catalog_sales_quantity#39] (49) TakeOrderedAndProject -Input [7]: [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, store_sales_quantity#45, store_returns_quantity#46, catalog_sales_quantity#47] -Arguments: 100, [i_item_id#15 ASC NULLS FIRST, i_item_desc#16 ASC NULLS FIRST, s_store_id#10 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST], [i_item_id#15, i_item_desc#16, s_store_id#10, s_store_name#11, store_sales_quantity#45, store_returns_quantity#46, catalog_sales_quantity#47] +Input [7]: [i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11, store_sales_quantity#37, store_returns_quantity#38, catalog_sales_quantity#39] +Arguments: 100, [i_item_id#13 ASC NULLS FIRST, i_item_desc#14 ASC NULLS FIRST, s_store_id#10 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST], [i_item_id#13, i_item_desc#14, s_store_id#10, s_store_name#11, store_sales_quantity#37, store_returns_quantity#38, catalog_sales_quantity#39] ===== Subqueries ===== @@ -280,28 +280,28 @@ BroadcastExchange (54) (50) Scan parquet default.date_dim -Output [3]: [d_date_sk#8, d_year#48, d_moy#49] +Output [3]: [d_date_sk#8, d_year#40, d_moy#41] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)] ReadSchema: struct (51) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#48, d_moy#49] +Input [3]: [d_date_sk#8, d_year#40, d_moy#41] (52) Filter [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#48, d_moy#49] -Condition : ((((isnotnull(d_moy#49) AND isnotnull(d_year#48)) AND (d_moy#49 = 9)) AND (d_year#48 = 1999)) AND isnotnull(d_date_sk#8)) +Input [3]: [d_date_sk#8, d_year#40, d_moy#41] +Condition : ((((isnotnull(d_moy#41) AND isnotnull(d_year#40)) AND (d_moy#41 = 9)) AND (d_year#40 = 1999)) AND isnotnull(d_date_sk#8)) (53) Project [codegen id : 1] Output [1]: [d_date_sk#8] -Input [3]: [d_date_sk#8, d_year#48, d_moy#49] +Input [3]: [d_date_sk#8, d_year#40, d_moy#41] (54) BroadcastExchange Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] -Subquery:2 Hosting operator id = 24 Hosting Expression = sr_returned_date_sk#23 IN dynamicpruning#24 +Subquery:2 Hosting operator id = 24 Hosting Expression = sr_returned_date_sk#19 IN dynamicpruning#20 BroadcastExchange (59) +- * Project (58) +- * Filter (57) @@ -310,28 +310,28 @@ BroadcastExchange (59) (55) Scan parquet default.date_dim -Output [3]: [d_date_sk#25, d_year#51, d_moy#52] +Output [3]: [d_date_sk#21, d_year#42, d_moy#43] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), EqualTo(d_year,1999), IsNotNull(d_date_sk)] ReadSchema: struct (56) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#25, d_year#51, d_moy#52] +Input [3]: [d_date_sk#21, d_year#42, d_moy#43] (57) Filter [codegen id : 1] -Input [3]: [d_date_sk#25, d_year#51, d_moy#52] -Condition : (((((isnotnull(d_moy#52) AND isnotnull(d_year#51)) AND (d_moy#52 >= 9)) AND (d_moy#52 <= 12)) AND (d_year#51 = 1999)) AND isnotnull(d_date_sk#25)) +Input [3]: [d_date_sk#21, d_year#42, d_moy#43] +Condition : (((((isnotnull(d_moy#43) AND isnotnull(d_year#42)) AND (d_moy#43 >= 9)) AND (d_moy#43 <= 12)) AND (d_year#42 = 1999)) AND isnotnull(d_date_sk#21)) (58) Project [codegen id : 1] -Output [1]: [d_date_sk#25] -Input [3]: [d_date_sk#25, d_year#51, d_moy#52] +Output [1]: [d_date_sk#21] +Input [3]: [d_date_sk#21, d_year#42, d_moy#43] (59) BroadcastExchange -Input [1]: [d_date_sk#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53] +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] -Subquery:3 Hosting operator id = 36 Hosting Expression = cs_sold_date_sk#31 IN dynamicpruning#32 +Subquery:3 Hosting operator id = 36 Hosting Expression = cs_sold_date_sk#25 IN dynamicpruning#26 BroadcastExchange (64) +- * Project (63) +- * Filter (62) @@ -340,25 +340,25 @@ BroadcastExchange (64) (60) Scan parquet default.date_dim -Output [2]: [d_date_sk#33, d_year#54] +Output [2]: [d_date_sk#27, d_year#44] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] ReadSchema: struct (61) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#33, d_year#54] +Input [2]: [d_date_sk#27, d_year#44] (62) Filter [codegen id : 1] -Input [2]: [d_date_sk#33, d_year#54] -Condition : (d_year#54 IN (1999,2000,2001) AND isnotnull(d_date_sk#33)) +Input [2]: [d_date_sk#27, d_year#44] +Condition : (d_year#44 IN (1999,2000,2001) AND isnotnull(d_date_sk#27)) (63) Project [codegen id : 1] -Output [1]: [d_date_sk#33] -Input [2]: [d_date_sk#33, d_year#54] +Output [1]: [d_date_sk#27] +Input [2]: [d_date_sk#27, d_year#44] (64) BroadcastExchange -Input [1]: [d_date_sk#33] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#55] +Input [1]: [d_date_sk#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt index 86096ab89650c..28e49ac7927dc 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt @@ -73,7 +73,7 @@ Condition : ((isnotnull(sr_customer_sk#9) AND isnotnull(sr_item_sk#8)) AND isnot (7) BroadcastExchange Input [5]: [sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11, sr_returned_date_sk#12] -Arguments: HashedRelationBroadcastMode(List(input[1, int, false], input[0, int, false], input[2, int, false]),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(input[1, int, false], input[0, int, false], input[2, int, false]),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 8] Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4] @@ -85,144 +85,144 @@ Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_i Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11, sr_returned_date_sk#12] (10) Scan parquet default.catalog_sales -Output [4]: [cs_bill_customer_sk#15, cs_item_sk#16, cs_quantity#17, cs_sold_date_sk#18] +Output [4]: [cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16, cs_sold_date_sk#17] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#18), dynamicpruningexpression(cs_sold_date_sk#18 IN dynamicpruning#19)] +PartitionFilters: [isnotnull(cs_sold_date_sk#17), dynamicpruningexpression(cs_sold_date_sk#17 IN dynamicpruning#18)] PushedFilters: [IsNotNull(cs_bill_customer_sk), IsNotNull(cs_item_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [4]: [cs_bill_customer_sk#15, cs_item_sk#16, cs_quantity#17, cs_sold_date_sk#18] +Input [4]: [cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16, cs_sold_date_sk#17] (12) Filter [codegen id : 2] -Input [4]: [cs_bill_customer_sk#15, cs_item_sk#16, cs_quantity#17, cs_sold_date_sk#18] -Condition : (isnotnull(cs_bill_customer_sk#15) AND isnotnull(cs_item_sk#16)) +Input [4]: [cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16, cs_sold_date_sk#17] +Condition : (isnotnull(cs_bill_customer_sk#14) AND isnotnull(cs_item_sk#15)) (13) BroadcastExchange -Input [4]: [cs_bill_customer_sk#15, cs_item_sk#16, cs_quantity#17, cs_sold_date_sk#18] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [id=#20] +Input [4]: [cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16, cs_sold_date_sk#17] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[1, int, false] as bigint) & 4294967295))),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 8] Left keys [2]: [sr_customer_sk#9, sr_item_sk#8] -Right keys [2]: [cs_bill_customer_sk#15, cs_item_sk#16] +Right keys [2]: [cs_bill_customer_sk#14, cs_item_sk#15] Join condition: None (15) Project [codegen id : 8] -Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#11, sr_returned_date_sk#12, cs_quantity#17, cs_sold_date_sk#18] -Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11, sr_returned_date_sk#12, cs_bill_customer_sk#15, cs_item_sk#16, cs_quantity#17, cs_sold_date_sk#18] +Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#11, sr_returned_date_sk#12, cs_quantity#16, cs_sold_date_sk#17] +Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11, sr_returned_date_sk#12, cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16, cs_sold_date_sk#17] (16) ReusedExchange [Reuses operator id: 45] -Output [1]: [d_date_sk#21] +Output [1]: [d_date_sk#19] (17) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_sold_date_sk#6] -Right keys [1]: [d_date_sk#21] +Right keys [1]: [d_date_sk#19] Join condition: None (18) Project [codegen id : 8] -Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, sr_returned_date_sk#12, cs_quantity#17, cs_sold_date_sk#18] -Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#11, sr_returned_date_sk#12, cs_quantity#17, cs_sold_date_sk#18, d_date_sk#21] +Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, sr_returned_date_sk#12, cs_quantity#16, cs_sold_date_sk#17] +Input [9]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_return_quantity#11, sr_returned_date_sk#12, cs_quantity#16, cs_sold_date_sk#17, d_date_sk#19] (19) ReusedExchange [Reuses operator id: 50] -Output [1]: [d_date_sk#22] +Output [1]: [d_date_sk#20] (20) BroadcastHashJoin [codegen id : 8] Left keys [1]: [sr_returned_date_sk#12] -Right keys [1]: [d_date_sk#22] +Right keys [1]: [d_date_sk#20] Join condition: None (21) Project [codegen id : 8] -Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#17, cs_sold_date_sk#18] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, sr_returned_date_sk#12, cs_quantity#17, cs_sold_date_sk#18, d_date_sk#22] +Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#16, cs_sold_date_sk#17] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, sr_returned_date_sk#12, cs_quantity#16, cs_sold_date_sk#17, d_date_sk#20] (22) ReusedExchange [Reuses operator id: 55] -Output [1]: [d_date_sk#23] +Output [1]: [d_date_sk#21] (23) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_sold_date_sk#18] -Right keys [1]: [d_date_sk#23] +Left keys [1]: [cs_sold_date_sk#17] +Right keys [1]: [d_date_sk#21] Join condition: None (24) Project [codegen id : 8] -Output [5]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#17] -Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#17, cs_sold_date_sk#18, d_date_sk#23] +Output [5]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#16] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#16, cs_sold_date_sk#17, d_date_sk#21] (25) Scan parquet default.store -Output [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Output [3]: [s_store_sk#22, s_store_id#23, s_store_name#24] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 6] -Input [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] +Input [3]: [s_store_sk#22, s_store_id#23, s_store_name#24] (27) Filter [codegen id : 6] -Input [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] -Condition : isnotnull(s_store_sk#24) +Input [3]: [s_store_sk#22, s_store_id#23, s_store_name#24] +Condition : isnotnull(s_store_sk#22) (28) BroadcastExchange -Input [3]: [s_store_sk#24, s_store_id#25, s_store_name#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] +Input [3]: [s_store_sk#22, s_store_id#23, s_store_name#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (29) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#24] +Right keys [1]: [s_store_sk#22] Join condition: None (30) Project [codegen id : 8] -Output [6]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#11, cs_quantity#17, s_store_id#25, s_store_name#26] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#17, s_store_sk#24, s_store_id#25, s_store_name#26] +Output [6]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#11, cs_quantity#16, s_store_id#23, s_store_name#24] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#16, s_store_sk#22, s_store_id#23, s_store_name#24] (31) Scan parquet default.item -Output [3]: [i_item_sk#28, i_item_id#29, i_item_desc#30] +Output [3]: [i_item_sk#25, i_item_id#26, i_item_desc#27] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 7] -Input [3]: [i_item_sk#28, i_item_id#29, i_item_desc#30] +Input [3]: [i_item_sk#25, i_item_id#26, i_item_desc#27] (33) Filter [codegen id : 7] -Input [3]: [i_item_sk#28, i_item_id#29, i_item_desc#30] -Condition : isnotnull(i_item_sk#28) +Input [3]: [i_item_sk#25, i_item_id#26, i_item_desc#27] +Condition : isnotnull(i_item_sk#25) (34) BroadcastExchange -Input [3]: [i_item_sk#28, i_item_id#29, i_item_desc#30] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] +Input [3]: [i_item_sk#25, i_item_id#26, i_item_desc#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (35) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#28] +Right keys [1]: [i_item_sk#25] Join condition: None (36) Project [codegen id : 8] -Output [7]: [ss_quantity#5, sr_return_quantity#11, cs_quantity#17, s_store_id#25, s_store_name#26, i_item_id#29, i_item_desc#30] -Input [9]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#11, cs_quantity#17, s_store_id#25, s_store_name#26, i_item_sk#28, i_item_id#29, i_item_desc#30] +Output [7]: [ss_quantity#5, sr_return_quantity#11, cs_quantity#16, s_store_id#23, s_store_name#24, i_item_id#26, i_item_desc#27] +Input [9]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#11, cs_quantity#16, s_store_id#23, s_store_name#24, i_item_sk#25, i_item_id#26, i_item_desc#27] (37) HashAggregate [codegen id : 8] -Input [7]: [ss_quantity#5, sr_return_quantity#11, cs_quantity#17, s_store_id#25, s_store_name#26, i_item_id#29, i_item_desc#30] -Keys [4]: [i_item_id#29, i_item_desc#30, s_store_id#25, s_store_name#26] -Functions [3]: [partial_sum(ss_quantity#5), partial_sum(sr_return_quantity#11), partial_sum(cs_quantity#17)] -Aggregate Attributes [3]: [sum#32, sum#33, sum#34] -Results [7]: [i_item_id#29, i_item_desc#30, s_store_id#25, s_store_name#26, sum#35, sum#36, sum#37] +Input [7]: [ss_quantity#5, sr_return_quantity#11, cs_quantity#16, s_store_id#23, s_store_name#24, i_item_id#26, i_item_desc#27] +Keys [4]: [i_item_id#26, i_item_desc#27, s_store_id#23, s_store_name#24] +Functions [3]: [partial_sum(ss_quantity#5), partial_sum(sr_return_quantity#11), partial_sum(cs_quantity#16)] +Aggregate Attributes [3]: [sum#28, sum#29, sum#30] +Results [7]: [i_item_id#26, i_item_desc#27, s_store_id#23, s_store_name#24, sum#31, sum#32, sum#33] (38) Exchange -Input [7]: [i_item_id#29, i_item_desc#30, s_store_id#25, s_store_name#26, sum#35, sum#36, sum#37] -Arguments: hashpartitioning(i_item_id#29, i_item_desc#30, s_store_id#25, s_store_name#26, 5), ENSURE_REQUIREMENTS, [id=#38] +Input [7]: [i_item_id#26, i_item_desc#27, s_store_id#23, s_store_name#24, sum#31, sum#32, sum#33] +Arguments: hashpartitioning(i_item_id#26, i_item_desc#27, s_store_id#23, s_store_name#24, 5), ENSURE_REQUIREMENTS, [plan_id=5] (39) HashAggregate [codegen id : 9] -Input [7]: [i_item_id#29, i_item_desc#30, s_store_id#25, s_store_name#26, sum#35, sum#36, sum#37] -Keys [4]: [i_item_id#29, i_item_desc#30, s_store_id#25, s_store_name#26] -Functions [3]: [sum(ss_quantity#5), sum(sr_return_quantity#11), sum(cs_quantity#17)] -Aggregate Attributes [3]: [sum(ss_quantity#5)#39, sum(sr_return_quantity#11)#40, sum(cs_quantity#17)#41] -Results [7]: [i_item_id#29, i_item_desc#30, s_store_id#25, s_store_name#26, sum(ss_quantity#5)#39 AS store_sales_quantity#42, sum(sr_return_quantity#11)#40 AS store_returns_quantity#43, sum(cs_quantity#17)#41 AS catalog_sales_quantity#44] +Input [7]: [i_item_id#26, i_item_desc#27, s_store_id#23, s_store_name#24, sum#31, sum#32, sum#33] +Keys [4]: [i_item_id#26, i_item_desc#27, s_store_id#23, s_store_name#24] +Functions [3]: [sum(ss_quantity#5), sum(sr_return_quantity#11), sum(cs_quantity#16)] +Aggregate Attributes [3]: [sum(ss_quantity#5)#34, sum(sr_return_quantity#11)#35, sum(cs_quantity#16)#36] +Results [7]: [i_item_id#26, i_item_desc#27, s_store_id#23, s_store_name#24, sum(ss_quantity#5)#34 AS store_sales_quantity#37, sum(sr_return_quantity#11)#35 AS store_returns_quantity#38, sum(cs_quantity#16)#36 AS catalog_sales_quantity#39] (40) TakeOrderedAndProject -Input [7]: [i_item_id#29, i_item_desc#30, s_store_id#25, s_store_name#26, store_sales_quantity#42, store_returns_quantity#43, catalog_sales_quantity#44] -Arguments: 100, [i_item_id#29 ASC NULLS FIRST, i_item_desc#30 ASC NULLS FIRST, s_store_id#25 ASC NULLS FIRST, s_store_name#26 ASC NULLS FIRST], [i_item_id#29, i_item_desc#30, s_store_id#25, s_store_name#26, store_sales_quantity#42, store_returns_quantity#43, catalog_sales_quantity#44] +Input [7]: [i_item_id#26, i_item_desc#27, s_store_id#23, s_store_name#24, store_sales_quantity#37, store_returns_quantity#38, catalog_sales_quantity#39] +Arguments: 100, [i_item_id#26 ASC NULLS FIRST, i_item_desc#27 ASC NULLS FIRST, s_store_id#23 ASC NULLS FIRST, s_store_name#24 ASC NULLS FIRST], [i_item_id#26, i_item_desc#27, s_store_id#23, s_store_name#24, store_sales_quantity#37, store_returns_quantity#38, catalog_sales_quantity#39] ===== Subqueries ===== @@ -235,26 +235,26 @@ BroadcastExchange (45) (41) Scan parquet default.date_dim -Output [3]: [d_date_sk#21, d_year#45, d_moy#46] +Output [3]: [d_date_sk#19, d_year#40, d_moy#41] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,9), EqualTo(d_year,1999), IsNotNull(d_date_sk)] ReadSchema: struct (42) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#21, d_year#45, d_moy#46] +Input [3]: [d_date_sk#19, d_year#40, d_moy#41] (43) Filter [codegen id : 1] -Input [3]: [d_date_sk#21, d_year#45, d_moy#46] -Condition : ((((isnotnull(d_moy#46) AND isnotnull(d_year#45)) AND (d_moy#46 = 9)) AND (d_year#45 = 1999)) AND isnotnull(d_date_sk#21)) +Input [3]: [d_date_sk#19, d_year#40, d_moy#41] +Condition : ((((isnotnull(d_moy#41) AND isnotnull(d_year#40)) AND (d_moy#41 = 9)) AND (d_year#40 = 1999)) AND isnotnull(d_date_sk#19)) (44) Project [codegen id : 1] -Output [1]: [d_date_sk#21] -Input [3]: [d_date_sk#21, d_year#45, d_moy#46] +Output [1]: [d_date_sk#19] +Input [3]: [d_date_sk#19, d_year#40, d_moy#41] (45) BroadcastExchange -Input [1]: [d_date_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#47] +Input [1]: [d_date_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] Subquery:2 Hosting operator id = 4 Hosting Expression = sr_returned_date_sk#12 IN dynamicpruning#13 BroadcastExchange (50) @@ -265,28 +265,28 @@ BroadcastExchange (50) (46) Scan parquet default.date_dim -Output [3]: [d_date_sk#22, d_year#48, d_moy#49] +Output [3]: [d_date_sk#20, d_year#42, d_moy#43] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), GreaterThanOrEqual(d_moy,9), LessThanOrEqual(d_moy,12), EqualTo(d_year,1999), IsNotNull(d_date_sk)] ReadSchema: struct (47) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#22, d_year#48, d_moy#49] +Input [3]: [d_date_sk#20, d_year#42, d_moy#43] (48) Filter [codegen id : 1] -Input [3]: [d_date_sk#22, d_year#48, d_moy#49] -Condition : (((((isnotnull(d_moy#49) AND isnotnull(d_year#48)) AND (d_moy#49 >= 9)) AND (d_moy#49 <= 12)) AND (d_year#48 = 1999)) AND isnotnull(d_date_sk#22)) +Input [3]: [d_date_sk#20, d_year#42, d_moy#43] +Condition : (((((isnotnull(d_moy#43) AND isnotnull(d_year#42)) AND (d_moy#43 >= 9)) AND (d_moy#43 <= 12)) AND (d_year#42 = 1999)) AND isnotnull(d_date_sk#20)) (49) Project [codegen id : 1] -Output [1]: [d_date_sk#22] -Input [3]: [d_date_sk#22, d_year#48, d_moy#49] +Output [1]: [d_date_sk#20] +Input [3]: [d_date_sk#20, d_year#42, d_moy#43] (50) BroadcastExchange -Input [1]: [d_date_sk#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50] +Input [1]: [d_date_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] -Subquery:3 Hosting operator id = 10 Hosting Expression = cs_sold_date_sk#18 IN dynamicpruning#19 +Subquery:3 Hosting operator id = 10 Hosting Expression = cs_sold_date_sk#17 IN dynamicpruning#18 BroadcastExchange (55) +- * Project (54) +- * Filter (53) @@ -295,25 +295,25 @@ BroadcastExchange (55) (51) Scan parquet default.date_dim -Output [2]: [d_date_sk#23, d_year#51] +Output [2]: [d_date_sk#21, d_year#44] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] ReadSchema: struct (52) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#23, d_year#51] +Input [2]: [d_date_sk#21, d_year#44] (53) Filter [codegen id : 1] -Input [2]: [d_date_sk#23, d_year#51] -Condition : (d_year#51 IN (1999,2000,2001) AND isnotnull(d_date_sk#23)) +Input [2]: [d_date_sk#21, d_year#44] +Condition : (d_year#44 IN (1999,2000,2001) AND isnotnull(d_date_sk#21)) (54) Project [codegen id : 1] -Output [1]: [d_date_sk#23] -Input [2]: [d_date_sk#23, d_year#51] +Output [1]: [d_date_sk#21] +Input [2]: [d_date_sk#21, d_year#44] (55) BroadcastExchange -Input [1]: [d_date_sk#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52] +Input [1]: [d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/explain.txt index 8ec42f473ee9e..cd0368c14a253 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/explain.txt @@ -53,7 +53,7 @@ Input [4]: [i_item_sk#5, i_brand_id#6, i_brand#7, i_manufact_id#8] (8) BroadcastExchange Input [3]: [i_item_sk#5, i_brand_id#6, i_brand#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#1] @@ -65,38 +65,38 @@ Output [4]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_brand_id#6, i_brand#7] Input [6]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#5, i_brand_id#6, i_brand#7] (11) ReusedExchange [Reuses operator id: 22] -Output [2]: [d_date_sk#10, d_year#11] +Output [2]: [d_date_sk#9, d_year#10] (12) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_sold_date_sk#3] -Right keys [1]: [d_date_sk#10] +Right keys [1]: [d_date_sk#9] Join condition: None (13) Project [codegen id : 3] -Output [4]: [d_year#11, ss_ext_sales_price#2, i_brand_id#6, i_brand#7] -Input [6]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_brand_id#6, i_brand#7, d_date_sk#10, d_year#11] +Output [4]: [d_year#10, ss_ext_sales_price#2, i_brand_id#6, i_brand#7] +Input [6]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_brand_id#6, i_brand#7, d_date_sk#9, d_year#10] (14) HashAggregate [codegen id : 3] -Input [4]: [d_year#11, ss_ext_sales_price#2, i_brand_id#6, i_brand#7] -Keys [3]: [d_year#11, i_brand#7, i_brand_id#6] +Input [4]: [d_year#10, ss_ext_sales_price#2, i_brand_id#6, i_brand#7] +Keys [3]: [d_year#10, i_brand#7, i_brand_id#6] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#12] -Results [4]: [d_year#11, i_brand#7, i_brand_id#6, sum#13] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#10, i_brand#7, i_brand_id#6, sum#12] (15) Exchange -Input [4]: [d_year#11, i_brand#7, i_brand_id#6, sum#13] -Arguments: hashpartitioning(d_year#11, i_brand#7, i_brand_id#6, 5), ENSURE_REQUIREMENTS, [id=#14] +Input [4]: [d_year#10, i_brand#7, i_brand_id#6, sum#12] +Arguments: hashpartitioning(d_year#10, i_brand#7, i_brand_id#6, 5), ENSURE_REQUIREMENTS, [plan_id=2] (16) HashAggregate [codegen id : 4] -Input [4]: [d_year#11, i_brand#7, i_brand_id#6, sum#13] -Keys [3]: [d_year#11, i_brand#7, i_brand_id#6] +Input [4]: [d_year#10, i_brand#7, i_brand_id#6, sum#12] +Keys [3]: [d_year#10, i_brand#7, i_brand_id#6] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#15] -Results [4]: [d_year#11, i_brand_id#6 AS brand_id#16, i_brand#7 AS brand#17, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#15,17,2) AS sum_agg#18] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#13] +Results [4]: [d_year#10, i_brand_id#6 AS brand_id#14, i_brand#7 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#13,17,2) AS sum_agg#16] (17) TakeOrderedAndProject -Input [4]: [d_year#11, brand_id#16, brand#17, sum_agg#18] -Arguments: 100, [d_year#11 ASC NULLS FIRST, sum_agg#18 DESC NULLS LAST, brand_id#16 ASC NULLS FIRST], [d_year#11, brand_id#16, brand#17, sum_agg#18] +Input [4]: [d_year#10, brand_id#14, brand#15, sum_agg#16] +Arguments: 100, [d_year#10 ASC NULLS FIRST, sum_agg#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [d_year#10, brand_id#14, brand#15, sum_agg#16] ===== Subqueries ===== @@ -109,25 +109,25 @@ BroadcastExchange (22) (18) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#11, d_moy#19] +Output [3]: [d_date_sk#9, d_year#10, d_moy#17] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), EqualTo(d_moy,11), IsNotNull(d_date_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_moy#19] +Input [3]: [d_date_sk#9, d_year#10, d_moy#17] (20) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_moy#19] -Condition : ((isnotnull(d_moy#19) AND (d_moy#19 = 11)) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#9, d_year#10, d_moy#17] +Condition : ((isnotnull(d_moy#17) AND (d_moy#17 = 11)) AND isnotnull(d_date_sk#9)) (21) Project [codegen id : 1] -Output [2]: [d_date_sk#10, d_year#11] -Input [3]: [d_date_sk#10, d_year#11, d_moy#19] +Output [2]: [d_date_sk#9, d_year#10] +Input [3]: [d_date_sk#9, d_year#10, d_moy#17] (22) BroadcastExchange -Input [2]: [d_date_sk#10, d_year#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] +Input [2]: [d_date_sk#9, d_year#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt index 77eb7fefd4d05..6d1938cf12ce9 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt @@ -57,7 +57,7 @@ Condition : isnotnull(ss_item_sk#4) (8) BroadcastExchange Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [d_date_sk#1] @@ -69,55 +69,55 @@ Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] (11) Scan parquet default.item -Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,128), IsNotNull(i_item_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] (13) Filter [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] -Condition : ((isnotnull(i_manufact_id#11) AND (i_manufact_id#11 = 128)) AND isnotnull(i_item_sk#8)) +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] +Condition : ((isnotnull(i_manufact_id#10) AND (i_manufact_id#10 = 128)) AND isnotnull(i_item_sk#7)) (14) Project [codegen id : 2] -Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manufact_id#11] +Output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10] (15) BroadcastExchange -Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#4] -Right keys [1]: [i_item_sk#8] +Right keys [1]: [i_item_sk#7] Join condition: None (17) Project [codegen id : 3] -Output [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#9, i_brand#10] -Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#8, i_brand_id#9, i_brand#10] +Output [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] (18) HashAggregate [codegen id : 3] -Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#9, i_brand#10] -Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum#13] -Results [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] (19) Exchange -Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] -Arguments: hashpartitioning(d_year#2, i_brand#10, i_brand_id#9, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Arguments: hashpartitioning(d_year#2, i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 4] -Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] -Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#16] -Results [4]: [d_year#2, i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#16,17,2) AS sum_agg#19] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [4]: [d_year#2, i_brand_id#8 AS brand_id#14, i_brand#9 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS sum_agg#16] (21) TakeOrderedAndProject -Input [4]: [d_year#2, brand_id#17, brand#18, sum_agg#19] -Arguments: 100, [d_year#2 ASC NULLS FIRST, sum_agg#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#2, brand_id#17, brand#18, sum_agg#19] +Input [4]: [d_year#2, brand_id#14, brand#15, sum_agg#16] +Arguments: 100, [d_year#2 ASC NULLS FIRST, sum_agg#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [d_year#2, brand_id#14, brand#15, sum_agg#16] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/explain.txt index b2d52de3cae98..0c596694f83d6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/explain.txt @@ -90,7 +90,7 @@ Input [2]: [ca_address_sk#15, ca_state#16] (8) BroadcastExchange Input [1]: [ca_address_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 2] Left keys [1]: [c_current_addr_sk#3] @@ -103,216 +103,216 @@ Input [15]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation (11) BroadcastExchange Input [13]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (12) Scan parquet default.web_returns -Output [4]: [wr_returning_customer_sk#19, wr_returning_addr_sk#20, wr_return_amt#21, wr_returned_date_sk#22] +Output [4]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19, wr_returned_date_sk#20] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(wr_returned_date_sk#22), dynamicpruningexpression(wr_returned_date_sk#22 IN dynamicpruning#23)] +PartitionFilters: [isnotnull(wr_returned_date_sk#20), dynamicpruningexpression(wr_returned_date_sk#20 IN dynamicpruning#21)] PushedFilters: [IsNotNull(wr_returning_addr_sk), IsNotNull(wr_returning_customer_sk)] ReadSchema: struct (13) ColumnarToRow [codegen id : 4] -Input [4]: [wr_returning_customer_sk#19, wr_returning_addr_sk#20, wr_return_amt#21, wr_returned_date_sk#22] +Input [4]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19, wr_returned_date_sk#20] (14) Filter [codegen id : 4] -Input [4]: [wr_returning_customer_sk#19, wr_returning_addr_sk#20, wr_return_amt#21, wr_returned_date_sk#22] -Condition : (isnotnull(wr_returning_addr_sk#20) AND isnotnull(wr_returning_customer_sk#19)) +Input [4]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19, wr_returned_date_sk#20] +Condition : (isnotnull(wr_returning_addr_sk#18) AND isnotnull(wr_returning_customer_sk#17)) (15) ReusedExchange [Reuses operator id: 60] -Output [1]: [d_date_sk#24] +Output [1]: [d_date_sk#22] (16) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [wr_returned_date_sk#22] -Right keys [1]: [d_date_sk#24] +Left keys [1]: [wr_returned_date_sk#20] +Right keys [1]: [d_date_sk#22] Join condition: None (17) Project [codegen id : 4] -Output [3]: [wr_returning_customer_sk#19, wr_returning_addr_sk#20, wr_return_amt#21] -Input [5]: [wr_returning_customer_sk#19, wr_returning_addr_sk#20, wr_return_amt#21, wr_returned_date_sk#22, d_date_sk#24] +Output [3]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19] +Input [5]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19, wr_returned_date_sk#20, d_date_sk#22] (18) Exchange -Input [3]: [wr_returning_customer_sk#19, wr_returning_addr_sk#20, wr_return_amt#21] -Arguments: hashpartitioning(wr_returning_addr_sk#20, 5), ENSURE_REQUIREMENTS, [id=#25] +Input [3]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19] +Arguments: hashpartitioning(wr_returning_addr_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) Sort [codegen id : 5] -Input [3]: [wr_returning_customer_sk#19, wr_returning_addr_sk#20, wr_return_amt#21] -Arguments: [wr_returning_addr_sk#20 ASC NULLS FIRST], false, 0 +Input [3]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19] +Arguments: [wr_returning_addr_sk#18 ASC NULLS FIRST], false, 0 (20) Scan parquet default.customer_address -Output [2]: [ca_address_sk#26, ca_state#27] +Output [2]: [ca_address_sk#23, ca_state#24] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] ReadSchema: struct (21) ColumnarToRow [codegen id : 6] -Input [2]: [ca_address_sk#26, ca_state#27] +Input [2]: [ca_address_sk#23, ca_state#24] (22) Filter [codegen id : 6] -Input [2]: [ca_address_sk#26, ca_state#27] -Condition : (isnotnull(ca_address_sk#26) AND isnotnull(ca_state#27)) +Input [2]: [ca_address_sk#23, ca_state#24] +Condition : (isnotnull(ca_address_sk#23) AND isnotnull(ca_state#24)) (23) Exchange -Input [2]: [ca_address_sk#26, ca_state#27] -Arguments: hashpartitioning(ca_address_sk#26, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [2]: [ca_address_sk#23, ca_state#24] +Arguments: hashpartitioning(ca_address_sk#23, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) Sort [codegen id : 7] -Input [2]: [ca_address_sk#26, ca_state#27] -Arguments: [ca_address_sk#26 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#23, ca_state#24] +Arguments: [ca_address_sk#23 ASC NULLS FIRST], false, 0 (25) SortMergeJoin [codegen id : 8] -Left keys [1]: [wr_returning_addr_sk#20] -Right keys [1]: [ca_address_sk#26] +Left keys [1]: [wr_returning_addr_sk#18] +Right keys [1]: [ca_address_sk#23] Join condition: None (26) Project [codegen id : 8] -Output [3]: [wr_returning_customer_sk#19, wr_return_amt#21, ca_state#27] -Input [5]: [wr_returning_customer_sk#19, wr_returning_addr_sk#20, wr_return_amt#21, ca_address_sk#26, ca_state#27] +Output [3]: [wr_returning_customer_sk#17, wr_return_amt#19, ca_state#24] +Input [5]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19, ca_address_sk#23, ca_state#24] (27) HashAggregate [codegen id : 8] -Input [3]: [wr_returning_customer_sk#19, wr_return_amt#21, ca_state#27] -Keys [2]: [wr_returning_customer_sk#19, ca_state#27] -Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#21))] -Aggregate Attributes [1]: [sum#29] -Results [3]: [wr_returning_customer_sk#19, ca_state#27, sum#30] +Input [3]: [wr_returning_customer_sk#17, wr_return_amt#19, ca_state#24] +Keys [2]: [wr_returning_customer_sk#17, ca_state#24] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#19))] +Aggregate Attributes [1]: [sum#25] +Results [3]: [wr_returning_customer_sk#17, ca_state#24, sum#26] (28) Exchange -Input [3]: [wr_returning_customer_sk#19, ca_state#27, sum#30] -Arguments: hashpartitioning(wr_returning_customer_sk#19, ca_state#27, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [3]: [wr_returning_customer_sk#17, ca_state#24, sum#26] +Arguments: hashpartitioning(wr_returning_customer_sk#17, ca_state#24, 5), ENSURE_REQUIREMENTS, [plan_id=5] (29) HashAggregate -Input [3]: [wr_returning_customer_sk#19, ca_state#27, sum#30] -Keys [2]: [wr_returning_customer_sk#19, ca_state#27] -Functions [1]: [sum(UnscaledValue(wr_return_amt#21))] -Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#21))#32] -Results [3]: [wr_returning_customer_sk#19 AS ctr_customer_sk#33, ca_state#27 AS ctr_state#34, MakeDecimal(sum(UnscaledValue(wr_return_amt#21))#32,17,2) AS ctr_total_return#35] +Input [3]: [wr_returning_customer_sk#17, ca_state#24, sum#26] +Keys [2]: [wr_returning_customer_sk#17, ca_state#24] +Functions [1]: [sum(UnscaledValue(wr_return_amt#19))] +Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#19))#27] +Results [3]: [wr_returning_customer_sk#17 AS ctr_customer_sk#28, ca_state#24 AS ctr_state#29, MakeDecimal(sum(UnscaledValue(wr_return_amt#19))#27,17,2) AS ctr_total_return#30] (30) Filter -Input [3]: [ctr_customer_sk#33, ctr_state#34, ctr_total_return#35] -Condition : isnotnull(ctr_total_return#35) +Input [3]: [ctr_customer_sk#28, ctr_state#29, ctr_total_return#30] +Condition : isnotnull(ctr_total_return#30) (31) BroadcastHashJoin [codegen id : 17] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ctr_customer_sk#33] +Right keys [1]: [ctr_customer_sk#28] Join condition: None (32) Project [codegen id : 17] -Output [14]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14, ctr_state#34, ctr_total_return#35] -Input [16]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14, ctr_customer_sk#33, ctr_state#34, ctr_total_return#35] +Output [14]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14, ctr_state#29, ctr_total_return#30] +Input [16]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14, ctr_customer_sk#28, ctr_state#29, ctr_total_return#30] (33) Scan parquet default.web_returns -Output [4]: [wr_returning_customer_sk#19, wr_returning_addr_sk#20, wr_return_amt#21, wr_returned_date_sk#22] +Output [4]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19, wr_returned_date_sk#20] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(wr_returned_date_sk#22), dynamicpruningexpression(wr_returned_date_sk#22 IN dynamicpruning#23)] +PartitionFilters: [isnotnull(wr_returned_date_sk#20), dynamicpruningexpression(wr_returned_date_sk#20 IN dynamicpruning#21)] PushedFilters: [IsNotNull(wr_returning_addr_sk)] ReadSchema: struct (34) ColumnarToRow [codegen id : 10] -Input [4]: [wr_returning_customer_sk#19, wr_returning_addr_sk#20, wr_return_amt#21, wr_returned_date_sk#22] +Input [4]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19, wr_returned_date_sk#20] (35) Filter [codegen id : 10] -Input [4]: [wr_returning_customer_sk#19, wr_returning_addr_sk#20, wr_return_amt#21, wr_returned_date_sk#22] -Condition : isnotnull(wr_returning_addr_sk#20) +Input [4]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19, wr_returned_date_sk#20] +Condition : isnotnull(wr_returning_addr_sk#18) (36) ReusedExchange [Reuses operator id: 60] -Output [1]: [d_date_sk#24] +Output [1]: [d_date_sk#22] (37) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [wr_returned_date_sk#22] -Right keys [1]: [d_date_sk#24] +Left keys [1]: [wr_returned_date_sk#20] +Right keys [1]: [d_date_sk#22] Join condition: None (38) Project [codegen id : 10] -Output [3]: [wr_returning_customer_sk#19, wr_returning_addr_sk#20, wr_return_amt#21] -Input [5]: [wr_returning_customer_sk#19, wr_returning_addr_sk#20, wr_return_amt#21, wr_returned_date_sk#22, d_date_sk#24] +Output [3]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19] +Input [5]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19, wr_returned_date_sk#20, d_date_sk#22] (39) Exchange -Input [3]: [wr_returning_customer_sk#19, wr_returning_addr_sk#20, wr_return_amt#21] -Arguments: hashpartitioning(wr_returning_addr_sk#20, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [3]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19] +Arguments: hashpartitioning(wr_returning_addr_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=6] (40) Sort [codegen id : 11] -Input [3]: [wr_returning_customer_sk#19, wr_returning_addr_sk#20, wr_return_amt#21] -Arguments: [wr_returning_addr_sk#20 ASC NULLS FIRST], false, 0 +Input [3]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19] +Arguments: [wr_returning_addr_sk#18 ASC NULLS FIRST], false, 0 (41) ReusedExchange [Reuses operator id: 23] -Output [2]: [ca_address_sk#26, ca_state#27] +Output [2]: [ca_address_sk#23, ca_state#24] (42) Sort [codegen id : 13] -Input [2]: [ca_address_sk#26, ca_state#27] -Arguments: [ca_address_sk#26 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#23, ca_state#24] +Arguments: [ca_address_sk#23 ASC NULLS FIRST], false, 0 (43) SortMergeJoin [codegen id : 14] -Left keys [1]: [wr_returning_addr_sk#20] -Right keys [1]: [ca_address_sk#26] +Left keys [1]: [wr_returning_addr_sk#18] +Right keys [1]: [ca_address_sk#23] Join condition: None (44) Project [codegen id : 14] -Output [3]: [wr_returning_customer_sk#19, wr_return_amt#21, ca_state#27] -Input [5]: [wr_returning_customer_sk#19, wr_returning_addr_sk#20, wr_return_amt#21, ca_address_sk#26, ca_state#27] +Output [3]: [wr_returning_customer_sk#17, wr_return_amt#19, ca_state#24] +Input [5]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19, ca_address_sk#23, ca_state#24] (45) HashAggregate [codegen id : 14] -Input [3]: [wr_returning_customer_sk#19, wr_return_amt#21, ca_state#27] -Keys [2]: [wr_returning_customer_sk#19, ca_state#27] -Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#21))] -Aggregate Attributes [1]: [sum#37] -Results [3]: [wr_returning_customer_sk#19, ca_state#27, sum#38] +Input [3]: [wr_returning_customer_sk#17, wr_return_amt#19, ca_state#24] +Keys [2]: [wr_returning_customer_sk#17, ca_state#24] +Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#19))] +Aggregate Attributes [1]: [sum#31] +Results [3]: [wr_returning_customer_sk#17, ca_state#24, sum#32] (46) Exchange -Input [3]: [wr_returning_customer_sk#19, ca_state#27, sum#38] -Arguments: hashpartitioning(wr_returning_customer_sk#19, ca_state#27, 5), ENSURE_REQUIREMENTS, [id=#39] +Input [3]: [wr_returning_customer_sk#17, ca_state#24, sum#32] +Arguments: hashpartitioning(wr_returning_customer_sk#17, ca_state#24, 5), ENSURE_REQUIREMENTS, [plan_id=7] (47) HashAggregate [codegen id : 15] -Input [3]: [wr_returning_customer_sk#19, ca_state#27, sum#38] -Keys [2]: [wr_returning_customer_sk#19, ca_state#27] -Functions [1]: [sum(UnscaledValue(wr_return_amt#21))] -Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#21))#32] -Results [2]: [ca_state#27 AS ctr_state#34, MakeDecimal(sum(UnscaledValue(wr_return_amt#21))#32,17,2) AS ctr_total_return#35] +Input [3]: [wr_returning_customer_sk#17, ca_state#24, sum#32] +Keys [2]: [wr_returning_customer_sk#17, ca_state#24] +Functions [1]: [sum(UnscaledValue(wr_return_amt#19))] +Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#19))#27] +Results [2]: [ca_state#24 AS ctr_state#29, MakeDecimal(sum(UnscaledValue(wr_return_amt#19))#27,17,2) AS ctr_total_return#30] (48) HashAggregate [codegen id : 15] -Input [2]: [ctr_state#34, ctr_total_return#35] -Keys [1]: [ctr_state#34] -Functions [1]: [partial_avg(ctr_total_return#35)] -Aggregate Attributes [2]: [sum#40, count#41] -Results [3]: [ctr_state#34, sum#42, count#43] +Input [2]: [ctr_state#29, ctr_total_return#30] +Keys [1]: [ctr_state#29] +Functions [1]: [partial_avg(ctr_total_return#30)] +Aggregate Attributes [2]: [sum#33, count#34] +Results [3]: [ctr_state#29, sum#35, count#36] (49) Exchange -Input [3]: [ctr_state#34, sum#42, count#43] -Arguments: hashpartitioning(ctr_state#34, 5), ENSURE_REQUIREMENTS, [id=#44] +Input [3]: [ctr_state#29, sum#35, count#36] +Arguments: hashpartitioning(ctr_state#29, 5), ENSURE_REQUIREMENTS, [plan_id=8] (50) HashAggregate [codegen id : 16] -Input [3]: [ctr_state#34, sum#42, count#43] -Keys [1]: [ctr_state#34] -Functions [1]: [avg(ctr_total_return#35)] -Aggregate Attributes [1]: [avg(ctr_total_return#35)#45] -Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#35)#45) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#46, ctr_state#34 AS ctr_state#34#47] +Input [3]: [ctr_state#29, sum#35, count#36] +Keys [1]: [ctr_state#29] +Functions [1]: [avg(ctr_total_return#30)] +Aggregate Attributes [1]: [avg(ctr_total_return#30)#37] +Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#30)#37) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#38, ctr_state#29 AS ctr_state#29#39] (51) Filter [codegen id : 16] -Input [2]: [(avg(ctr_total_return) * 1.2)#46, ctr_state#34#47] -Condition : isnotnull((avg(ctr_total_return) * 1.2)#46) +Input [2]: [(avg(ctr_total_return) * 1.2)#38, ctr_state#29#39] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#38) (52) BroadcastExchange -Input [2]: [(avg(ctr_total_return) * 1.2)#46, ctr_state#34#47] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#48] +Input [2]: [(avg(ctr_total_return) * 1.2)#38, ctr_state#29#39] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=9] (53) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ctr_state#34] -Right keys [1]: [ctr_state#34#47] -Join condition: (cast(ctr_total_return#35 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#46) +Left keys [1]: [ctr_state#29] +Right keys [1]: [ctr_state#29#39] +Join condition: (cast(ctr_total_return#30 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#38) (54) Project [codegen id : 17] -Output [13]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14, ctr_total_return#35] -Input [16]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14, ctr_state#34, ctr_total_return#35, (avg(ctr_total_return) * 1.2)#46, ctr_state#34#47] +Output [13]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14, ctr_total_return#30] +Input [16]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14, ctr_state#29, ctr_total_return#30, (avg(ctr_total_return) * 1.2)#38, ctr_state#29#39] (55) TakeOrderedAndProject -Input [13]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14, ctr_total_return#35] -Arguments: 100, [c_customer_id#2 ASC NULLS FIRST, c_salutation#4 ASC NULLS FIRST, c_first_name#5 ASC NULLS FIRST, c_last_name#6 ASC NULLS FIRST, c_preferred_cust_flag#7 ASC NULLS FIRST, c_birth_day#8 ASC NULLS FIRST, c_birth_month#9 ASC NULLS FIRST, c_birth_year#10 ASC NULLS FIRST, c_birth_country#11 ASC NULLS FIRST, c_login#12 ASC NULLS FIRST, c_email_address#13 ASC NULLS FIRST, c_last_review_date#14 ASC NULLS FIRST, ctr_total_return#35 ASC NULLS FIRST], [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14, ctr_total_return#35] +Input [13]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14, ctr_total_return#30] +Arguments: 100, [c_customer_id#2 ASC NULLS FIRST, c_salutation#4 ASC NULLS FIRST, c_first_name#5 ASC NULLS FIRST, c_last_name#6 ASC NULLS FIRST, c_preferred_cust_flag#7 ASC NULLS FIRST, c_birth_day#8 ASC NULLS FIRST, c_birth_month#9 ASC NULLS FIRST, c_birth_year#10 ASC NULLS FIRST, c_birth_country#11 ASC NULLS FIRST, c_login#12 ASC NULLS FIRST, c_email_address#13 ASC NULLS FIRST, c_last_review_date#14 ASC NULLS FIRST, ctr_total_return#30 ASC NULLS FIRST], [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14, ctr_total_return#30] ===== Subqueries ===== -Subquery:1 Hosting operator id = 12 Hosting Expression = wr_returned_date_sk#22 IN dynamicpruning#23 +Subquery:1 Hosting operator id = 12 Hosting Expression = wr_returned_date_sk#20 IN dynamicpruning#21 BroadcastExchange (60) +- * Project (59) +- * Filter (58) @@ -321,27 +321,27 @@ BroadcastExchange (60) (56) Scan parquet default.date_dim -Output [2]: [d_date_sk#24, d_year#49] +Output [2]: [d_date_sk#22, d_year#40] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] ReadSchema: struct (57) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#24, d_year#49] +Input [2]: [d_date_sk#22, d_year#40] (58) Filter [codegen id : 1] -Input [2]: [d_date_sk#24, d_year#49] -Condition : ((isnotnull(d_year#49) AND (d_year#49 = 2002)) AND isnotnull(d_date_sk#24)) +Input [2]: [d_date_sk#22, d_year#40] +Condition : ((isnotnull(d_year#40) AND (d_year#40 = 2002)) AND isnotnull(d_date_sk#22)) (59) Project [codegen id : 1] -Output [1]: [d_date_sk#24] -Input [2]: [d_date_sk#24, d_year#49] +Output [1]: [d_date_sk#22] +Input [2]: [d_date_sk#22, d_year#40] (60) BroadcastExchange -Input [1]: [d_date_sk#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50] +Input [1]: [d_date_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] -Subquery:2 Hosting operator id = 33 Hosting Expression = wr_returned_date_sk#22 IN dynamicpruning#23 +Subquery:2 Hosting operator id = 33 Hosting Expression = wr_returned_date_sk#20 IN dynamicpruning#21 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt index 333930275bbd1..a7256765c8464 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt @@ -93,7 +93,7 @@ Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_state#8)) (10) BroadcastExchange Input [2]: [ca_address_sk#7, ca_state#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [wr_returning_addr_sk#2] @@ -108,23 +108,23 @@ Input [5]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, Input [3]: [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8] Keys [2]: [wr_returning_customer_sk#1, ca_state#8] Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#3))] -Aggregate Attributes [1]: [sum#10] -Results [3]: [wr_returning_customer_sk#1, ca_state#8, sum#11] +Aggregate Attributes [1]: [sum#9] +Results [3]: [wr_returning_customer_sk#1, ca_state#8, sum#10] (14) Exchange -Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#11] -Arguments: hashpartitioning(wr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [id=#12] +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#10] +Arguments: hashpartitioning(wr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 11] -Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#11] +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#10] Keys [2]: [wr_returning_customer_sk#1, ca_state#8] Functions [1]: [sum(UnscaledValue(wr_return_amt#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#3))#13] -Results [3]: [wr_returning_customer_sk#1 AS ctr_customer_sk#14, ca_state#8 AS ctr_state#15, MakeDecimal(sum(UnscaledValue(wr_return_amt#3))#13,17,2) AS ctr_total_return#16] +Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#3))#11] +Results [3]: [wr_returning_customer_sk#1 AS ctr_customer_sk#12, ca_state#8 AS ctr_state#13, MakeDecimal(sum(UnscaledValue(wr_return_amt#3))#11,17,2) AS ctr_total_return#14] (16) Filter [codegen id : 11] -Input [3]: [ctr_customer_sk#14, ctr_state#15, ctr_total_return#16] -Condition : isnotnull(ctr_total_return#16) +Input [3]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14] +Condition : isnotnull(ctr_total_return#14) (17) Scan parquet default.web_returns Output [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4] @@ -169,116 +169,116 @@ Input [5]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, Input [3]: [wr_returning_customer_sk#1, wr_return_amt#3, ca_state#8] Keys [2]: [wr_returning_customer_sk#1, ca_state#8] Functions [1]: [partial_sum(UnscaledValue(wr_return_amt#3))] -Aggregate Attributes [1]: [sum#17] -Results [3]: [wr_returning_customer_sk#1, ca_state#8, sum#18] +Aggregate Attributes [1]: [sum#15] +Results [3]: [wr_returning_customer_sk#1, ca_state#8, sum#16] (27) Exchange -Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#18] -Arguments: hashpartitioning(wr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#16] +Arguments: hashpartitioning(wr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (28) HashAggregate [codegen id : 7] -Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#18] +Input [3]: [wr_returning_customer_sk#1, ca_state#8, sum#16] Keys [2]: [wr_returning_customer_sk#1, ca_state#8] Functions [1]: [sum(UnscaledValue(wr_return_amt#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#3))#13] -Results [2]: [ca_state#8 AS ctr_state#15, MakeDecimal(sum(UnscaledValue(wr_return_amt#3))#13,17,2) AS ctr_total_return#16] +Aggregate Attributes [1]: [sum(UnscaledValue(wr_return_amt#3))#11] +Results [2]: [ca_state#8 AS ctr_state#13, MakeDecimal(sum(UnscaledValue(wr_return_amt#3))#11,17,2) AS ctr_total_return#14] (29) HashAggregate [codegen id : 7] -Input [2]: [ctr_state#15, ctr_total_return#16] -Keys [1]: [ctr_state#15] -Functions [1]: [partial_avg(ctr_total_return#16)] -Aggregate Attributes [2]: [sum#20, count#21] -Results [3]: [ctr_state#15, sum#22, count#23] +Input [2]: [ctr_state#13, ctr_total_return#14] +Keys [1]: [ctr_state#13] +Functions [1]: [partial_avg(ctr_total_return#14)] +Aggregate Attributes [2]: [sum#17, count#18] +Results [3]: [ctr_state#13, sum#19, count#20] (30) Exchange -Input [3]: [ctr_state#15, sum#22, count#23] -Arguments: hashpartitioning(ctr_state#15, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [3]: [ctr_state#13, sum#19, count#20] +Arguments: hashpartitioning(ctr_state#13, 5), ENSURE_REQUIREMENTS, [plan_id=4] (31) HashAggregate [codegen id : 8] -Input [3]: [ctr_state#15, sum#22, count#23] -Keys [1]: [ctr_state#15] -Functions [1]: [avg(ctr_total_return#16)] -Aggregate Attributes [1]: [avg(ctr_total_return#16)#25] -Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#16)#25) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#26, ctr_state#15 AS ctr_state#15#27] +Input [3]: [ctr_state#13, sum#19, count#20] +Keys [1]: [ctr_state#13] +Functions [1]: [avg(ctr_total_return#14)] +Aggregate Attributes [1]: [avg(ctr_total_return#14)#21] +Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#14)#21) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#22, ctr_state#13 AS ctr_state#13#23] (32) Filter [codegen id : 8] -Input [2]: [(avg(ctr_total_return) * 1.2)#26, ctr_state#15#27] -Condition : isnotnull((avg(ctr_total_return) * 1.2)#26) +Input [2]: [(avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#22) (33) BroadcastExchange -Input [2]: [(avg(ctr_total_return) * 1.2)#26, ctr_state#15#27] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#28] +Input [2]: [(avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=5] (34) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ctr_state#15] -Right keys [1]: [ctr_state#15#27] -Join condition: (cast(ctr_total_return#16 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#26) +Left keys [1]: [ctr_state#13] +Right keys [1]: [ctr_state#13#23] +Join condition: (cast(ctr_total_return#14 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#22) (35) Project [codegen id : 11] -Output [2]: [ctr_customer_sk#14, ctr_total_return#16] -Input [5]: [ctr_customer_sk#14, ctr_state#15, ctr_total_return#16, (avg(ctr_total_return) * 1.2)#26, ctr_state#15#27] +Output [2]: [ctr_customer_sk#12, ctr_total_return#14] +Input [5]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14, (avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] (36) Scan parquet default.customer -Output [14]: [c_customer_sk#29, c_customer_id#30, c_current_addr_sk#31, c_salutation#32, c_first_name#33, c_last_name#34, c_preferred_cust_flag#35, c_birth_day#36, c_birth_month#37, c_birth_year#38, c_birth_country#39, c_login#40, c_email_address#41, c_last_review_date#42] +Output [14]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (37) ColumnarToRow [codegen id : 9] -Input [14]: [c_customer_sk#29, c_customer_id#30, c_current_addr_sk#31, c_salutation#32, c_first_name#33, c_last_name#34, c_preferred_cust_flag#35, c_birth_day#36, c_birth_month#37, c_birth_year#38, c_birth_country#39, c_login#40, c_email_address#41, c_last_review_date#42] +Input [14]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] (38) Filter [codegen id : 9] -Input [14]: [c_customer_sk#29, c_customer_id#30, c_current_addr_sk#31, c_salutation#32, c_first_name#33, c_last_name#34, c_preferred_cust_flag#35, c_birth_day#36, c_birth_month#37, c_birth_year#38, c_birth_country#39, c_login#40, c_email_address#41, c_last_review_date#42] -Condition : (isnotnull(c_customer_sk#29) AND isnotnull(c_current_addr_sk#31)) +Input [14]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] +Condition : (isnotnull(c_customer_sk#24) AND isnotnull(c_current_addr_sk#26)) (39) BroadcastExchange -Input [14]: [c_customer_sk#29, c_customer_id#30, c_current_addr_sk#31, c_salutation#32, c_first_name#33, c_last_name#34, c_preferred_cust_flag#35, c_birth_day#36, c_birth_month#37, c_birth_year#38, c_birth_country#39, c_login#40, c_email_address#41, c_last_review_date#42] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#43] +Input [14]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] (40) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ctr_customer_sk#14] -Right keys [1]: [c_customer_sk#29] +Left keys [1]: [ctr_customer_sk#12] +Right keys [1]: [c_customer_sk#24] Join condition: None (41) Project [codegen id : 11] -Output [14]: [ctr_total_return#16, c_customer_id#30, c_current_addr_sk#31, c_salutation#32, c_first_name#33, c_last_name#34, c_preferred_cust_flag#35, c_birth_day#36, c_birth_month#37, c_birth_year#38, c_birth_country#39, c_login#40, c_email_address#41, c_last_review_date#42] -Input [16]: [ctr_customer_sk#14, ctr_total_return#16, c_customer_sk#29, c_customer_id#30, c_current_addr_sk#31, c_salutation#32, c_first_name#33, c_last_name#34, c_preferred_cust_flag#35, c_birth_day#36, c_birth_month#37, c_birth_year#38, c_birth_country#39, c_login#40, c_email_address#41, c_last_review_date#42] +Output [14]: [ctr_total_return#14, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] +Input [16]: [ctr_customer_sk#12, ctr_total_return#14, c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37] (42) Scan parquet default.customer_address -Output [2]: [ca_address_sk#44, ca_state#45] +Output [2]: [ca_address_sk#38, ca_state#39] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] ReadSchema: struct (43) ColumnarToRow [codegen id : 10] -Input [2]: [ca_address_sk#44, ca_state#45] +Input [2]: [ca_address_sk#38, ca_state#39] (44) Filter [codegen id : 10] -Input [2]: [ca_address_sk#44, ca_state#45] -Condition : ((isnotnull(ca_state#45) AND (ca_state#45 = GA)) AND isnotnull(ca_address_sk#44)) +Input [2]: [ca_address_sk#38, ca_state#39] +Condition : ((isnotnull(ca_state#39) AND (ca_state#39 = GA)) AND isnotnull(ca_address_sk#38)) (45) Project [codegen id : 10] -Output [1]: [ca_address_sk#44] -Input [2]: [ca_address_sk#44, ca_state#45] +Output [1]: [ca_address_sk#38] +Input [2]: [ca_address_sk#38, ca_state#39] (46) BroadcastExchange -Input [1]: [ca_address_sk#44] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#46] +Input [1]: [ca_address_sk#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] (47) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [c_current_addr_sk#31] -Right keys [1]: [ca_address_sk#44] +Left keys [1]: [c_current_addr_sk#26] +Right keys [1]: [ca_address_sk#38] Join condition: None (48) Project [codegen id : 11] -Output [13]: [c_customer_id#30, c_salutation#32, c_first_name#33, c_last_name#34, c_preferred_cust_flag#35, c_birth_day#36, c_birth_month#37, c_birth_year#38, c_birth_country#39, c_login#40, c_email_address#41, c_last_review_date#42, ctr_total_return#16] -Input [15]: [ctr_total_return#16, c_customer_id#30, c_current_addr_sk#31, c_salutation#32, c_first_name#33, c_last_name#34, c_preferred_cust_flag#35, c_birth_day#36, c_birth_month#37, c_birth_year#38, c_birth_country#39, c_login#40, c_email_address#41, c_last_review_date#42, ca_address_sk#44] +Output [13]: [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37, ctr_total_return#14] +Input [15]: [ctr_total_return#14, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37, ca_address_sk#38] (49) TakeOrderedAndProject -Input [13]: [c_customer_id#30, c_salutation#32, c_first_name#33, c_last_name#34, c_preferred_cust_flag#35, c_birth_day#36, c_birth_month#37, c_birth_year#38, c_birth_country#39, c_login#40, c_email_address#41, c_last_review_date#42, ctr_total_return#16] -Arguments: 100, [c_customer_id#30 ASC NULLS FIRST, c_salutation#32 ASC NULLS FIRST, c_first_name#33 ASC NULLS FIRST, c_last_name#34 ASC NULLS FIRST, c_preferred_cust_flag#35 ASC NULLS FIRST, c_birth_day#36 ASC NULLS FIRST, c_birth_month#37 ASC NULLS FIRST, c_birth_year#38 ASC NULLS FIRST, c_birth_country#39 ASC NULLS FIRST, c_login#40 ASC NULLS FIRST, c_email_address#41 ASC NULLS FIRST, c_last_review_date#42 ASC NULLS FIRST, ctr_total_return#16 ASC NULLS FIRST], [c_customer_id#30, c_salutation#32, c_first_name#33, c_last_name#34, c_preferred_cust_flag#35, c_birth_day#36, c_birth_month#37, c_birth_year#38, c_birth_country#39, c_login#40, c_email_address#41, c_last_review_date#42, ctr_total_return#16] +Input [13]: [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37, ctr_total_return#14] +Arguments: 100, [c_customer_id#25 ASC NULLS FIRST, c_salutation#27 ASC NULLS FIRST, c_first_name#28 ASC NULLS FIRST, c_last_name#29 ASC NULLS FIRST, c_preferred_cust_flag#30 ASC NULLS FIRST, c_birth_day#31 ASC NULLS FIRST, c_birth_month#32 ASC NULLS FIRST, c_birth_year#33 ASC NULLS FIRST, c_birth_country#34 ASC NULLS FIRST, c_login#35 ASC NULLS FIRST, c_email_address#36 ASC NULLS FIRST, c_last_review_date#37 ASC NULLS FIRST, ctr_total_return#14 ASC NULLS FIRST], [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37, ctr_total_return#14] ===== Subqueries ===== @@ -291,26 +291,26 @@ BroadcastExchange (54) (50) Scan parquet default.date_dim -Output [2]: [d_date_sk#6, d_year#47] +Output [2]: [d_date_sk#6, d_year#40] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] ReadSchema: struct (51) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#6, d_year#47] +Input [2]: [d_date_sk#6, d_year#40] (52) Filter [codegen id : 1] -Input [2]: [d_date_sk#6, d_year#47] -Condition : ((isnotnull(d_year#47) AND (d_year#47 = 2002)) AND isnotnull(d_date_sk#6)) +Input [2]: [d_date_sk#6, d_year#40] +Condition : ((isnotnull(d_year#40) AND (d_year#40 = 2002)) AND isnotnull(d_date_sk#6)) (53) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [2]: [d_date_sk#6, d_year#47] +Input [2]: [d_date_sk#6, d_year#40] (54) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#48] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] Subquery:2 Hosting operator id = 17 Hosting Expression = wr_returned_date_sk#4 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt index c1bff1a691dc7..b7d0702c9e9a7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt @@ -140,478 +140,478 @@ Input [6]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, d_date_sk#5, (7) Exchange Input [4]: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#6, d_qoy#7] -Arguments: hashpartitioning(ss_addr_sk#1, 5), ENSURE_REQUIREMENTS, [id=#8] +Arguments: hashpartitioning(ss_addr_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (8) Sort [codegen id : 3] Input [4]: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#6, d_qoy#7] Arguments: [ss_addr_sk#1 ASC NULLS FIRST], false, 0 (9) Scan parquet default.customer_address -Output [2]: [ca_address_sk#9, ca_county#10] +Output [2]: [ca_address_sk#8, ca_county#9] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county)] ReadSchema: struct (10) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#9, ca_county#10] +Input [2]: [ca_address_sk#8, ca_county#9] (11) Filter [codegen id : 4] -Input [2]: [ca_address_sk#9, ca_county#10] -Condition : (isnotnull(ca_address_sk#9) AND isnotnull(ca_county#10)) +Input [2]: [ca_address_sk#8, ca_county#9] +Condition : (isnotnull(ca_address_sk#8) AND isnotnull(ca_county#9)) (12) Exchange -Input [2]: [ca_address_sk#9, ca_county#10] -Arguments: hashpartitioning(ca_address_sk#9, 5), ENSURE_REQUIREMENTS, [id=#11] +Input [2]: [ca_address_sk#8, ca_county#9] +Arguments: hashpartitioning(ca_address_sk#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] (13) Sort [codegen id : 5] -Input [2]: [ca_address_sk#9, ca_county#10] -Arguments: [ca_address_sk#9 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#8, ca_county#9] +Arguments: [ca_address_sk#8 ASC NULLS FIRST], false, 0 (14) SortMergeJoin [codegen id : 6] Left keys [1]: [ss_addr_sk#1] -Right keys [1]: [ca_address_sk#9] +Right keys [1]: [ca_address_sk#8] Join condition: None (15) Project [codegen id : 6] -Output [4]: [ss_ext_sales_price#2, d_year#6, d_qoy#7, ca_county#10] -Input [6]: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#6, d_qoy#7, ca_address_sk#9, ca_county#10] +Output [4]: [ss_ext_sales_price#2, d_year#6, d_qoy#7, ca_county#9] +Input [6]: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#6, d_qoy#7, ca_address_sk#8, ca_county#9] (16) HashAggregate [codegen id : 6] -Input [4]: [ss_ext_sales_price#2, d_year#6, d_qoy#7, ca_county#10] -Keys [3]: [ca_county#10, d_qoy#7, d_year#6] +Input [4]: [ss_ext_sales_price#2, d_year#6, d_qoy#7, ca_county#9] +Keys [3]: [ca_county#9, d_qoy#7, d_year#6] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#12] -Results [4]: [ca_county#10, d_qoy#7, d_year#6, sum#13] +Aggregate Attributes [1]: [sum#10] +Results [4]: [ca_county#9, d_qoy#7, d_year#6, sum#11] (17) Exchange -Input [4]: [ca_county#10, d_qoy#7, d_year#6, sum#13] -Arguments: hashpartitioning(ca_county#10, d_qoy#7, d_year#6, 5), ENSURE_REQUIREMENTS, [id=#14] +Input [4]: [ca_county#9, d_qoy#7, d_year#6, sum#11] +Arguments: hashpartitioning(ca_county#9, d_qoy#7, d_year#6, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) HashAggregate [codegen id : 42] -Input [4]: [ca_county#10, d_qoy#7, d_year#6, sum#13] -Keys [3]: [ca_county#10, d_qoy#7, d_year#6] +Input [4]: [ca_county#9, d_qoy#7, d_year#6, sum#11] +Keys [3]: [ca_county#9, d_qoy#7, d_year#6] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#15] -Results [2]: [ca_county#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#15,17,2) AS store_sales#16] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#12] +Results [2]: [ca_county#9, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS store_sales#13] (19) Scan parquet default.store_sales -Output [3]: [ss_addr_sk#17, ss_ext_sales_price#18, ss_sold_date_sk#19] +Output [3]: [ss_addr_sk#14, ss_ext_sales_price#15, ss_sold_date_sk#16] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#19), dynamicpruningexpression(ss_sold_date_sk#19 IN dynamicpruning#20)] +PartitionFilters: [isnotnull(ss_sold_date_sk#16), dynamicpruningexpression(ss_sold_date_sk#16 IN dynamicpruning#17)] PushedFilters: [IsNotNull(ss_addr_sk)] ReadSchema: struct (20) ColumnarToRow [codegen id : 8] -Input [3]: [ss_addr_sk#17, ss_ext_sales_price#18, ss_sold_date_sk#19] +Input [3]: [ss_addr_sk#14, ss_ext_sales_price#15, ss_sold_date_sk#16] (21) Filter [codegen id : 8] -Input [3]: [ss_addr_sk#17, ss_ext_sales_price#18, ss_sold_date_sk#19] -Condition : isnotnull(ss_addr_sk#17) +Input [3]: [ss_addr_sk#14, ss_ext_sales_price#15, ss_sold_date_sk#16] +Condition : isnotnull(ss_addr_sk#14) (22) ReusedExchange [Reuses operator id: 118] -Output [3]: [d_date_sk#21, d_year#22, d_qoy#23] +Output [3]: [d_date_sk#18, d_year#19, d_qoy#20] (23) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_sold_date_sk#19] -Right keys [1]: [d_date_sk#21] +Left keys [1]: [ss_sold_date_sk#16] +Right keys [1]: [d_date_sk#18] Join condition: None (24) Project [codegen id : 8] -Output [4]: [ss_addr_sk#17, ss_ext_sales_price#18, d_year#22, d_qoy#23] -Input [6]: [ss_addr_sk#17, ss_ext_sales_price#18, ss_sold_date_sk#19, d_date_sk#21, d_year#22, d_qoy#23] +Output [4]: [ss_addr_sk#14, ss_ext_sales_price#15, d_year#19, d_qoy#20] +Input [6]: [ss_addr_sk#14, ss_ext_sales_price#15, ss_sold_date_sk#16, d_date_sk#18, d_year#19, d_qoy#20] (25) Exchange -Input [4]: [ss_addr_sk#17, ss_ext_sales_price#18, d_year#22, d_qoy#23] -Arguments: hashpartitioning(ss_addr_sk#17, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [4]: [ss_addr_sk#14, ss_ext_sales_price#15, d_year#19, d_qoy#20] +Arguments: hashpartitioning(ss_addr_sk#14, 5), ENSURE_REQUIREMENTS, [plan_id=4] (26) Sort [codegen id : 9] -Input [4]: [ss_addr_sk#17, ss_ext_sales_price#18, d_year#22, d_qoy#23] -Arguments: [ss_addr_sk#17 ASC NULLS FIRST], false, 0 +Input [4]: [ss_addr_sk#14, ss_ext_sales_price#15, d_year#19, d_qoy#20] +Arguments: [ss_addr_sk#14 ASC NULLS FIRST], false, 0 (27) ReusedExchange [Reuses operator id: 12] -Output [2]: [ca_address_sk#25, ca_county#26] +Output [2]: [ca_address_sk#21, ca_county#22] (28) Sort [codegen id : 11] -Input [2]: [ca_address_sk#25, ca_county#26] -Arguments: [ca_address_sk#25 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#21, ca_county#22] +Arguments: [ca_address_sk#21 ASC NULLS FIRST], false, 0 (29) SortMergeJoin [codegen id : 12] -Left keys [1]: [ss_addr_sk#17] -Right keys [1]: [ca_address_sk#25] +Left keys [1]: [ss_addr_sk#14] +Right keys [1]: [ca_address_sk#21] Join condition: None (30) Project [codegen id : 12] -Output [4]: [ss_ext_sales_price#18, d_year#22, d_qoy#23, ca_county#26] -Input [6]: [ss_addr_sk#17, ss_ext_sales_price#18, d_year#22, d_qoy#23, ca_address_sk#25, ca_county#26] +Output [4]: [ss_ext_sales_price#15, d_year#19, d_qoy#20, ca_county#22] +Input [6]: [ss_addr_sk#14, ss_ext_sales_price#15, d_year#19, d_qoy#20, ca_address_sk#21, ca_county#22] (31) HashAggregate [codegen id : 12] -Input [4]: [ss_ext_sales_price#18, d_year#22, d_qoy#23, ca_county#26] -Keys [3]: [ca_county#26, d_qoy#23, d_year#22] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#18))] -Aggregate Attributes [1]: [sum#27] -Results [4]: [ca_county#26, d_qoy#23, d_year#22, sum#28] +Input [4]: [ss_ext_sales_price#15, d_year#19, d_qoy#20, ca_county#22] +Keys [3]: [ca_county#22, d_qoy#20, d_year#19] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#15))] +Aggregate Attributes [1]: [sum#23] +Results [4]: [ca_county#22, d_qoy#20, d_year#19, sum#24] (32) Exchange -Input [4]: [ca_county#26, d_qoy#23, d_year#22, sum#28] -Arguments: hashpartitioning(ca_county#26, d_qoy#23, d_year#22, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [4]: [ca_county#22, d_qoy#20, d_year#19, sum#24] +Arguments: hashpartitioning(ca_county#22, d_qoy#20, d_year#19, 5), ENSURE_REQUIREMENTS, [plan_id=5] (33) HashAggregate [codegen id : 13] -Input [4]: [ca_county#26, d_qoy#23, d_year#22, sum#28] -Keys [3]: [ca_county#26, d_qoy#23, d_year#22] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#18))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#18))#15] -Results [2]: [ca_county#26, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#18))#15,17,2) AS store_sales#30] +Input [4]: [ca_county#22, d_qoy#20, d_year#19, sum#24] +Keys [3]: [ca_county#22, d_qoy#20, d_year#19] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#15))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#15))#12] +Results [2]: [ca_county#22, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#15))#12,17,2) AS store_sales#25] (34) BroadcastExchange -Input [2]: [ca_county#26, store_sales#30] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#31] +Input [2]: [ca_county#22, store_sales#25] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] (35) BroadcastHashJoin [codegen id : 42] -Left keys [1]: [ca_county#10] -Right keys [1]: [ca_county#26] +Left keys [1]: [ca_county#9] +Right keys [1]: [ca_county#22] Join condition: None (36) Project [codegen id : 42] -Output [3]: [ca_county#10, store_sales#16, store_sales#30] -Input [4]: [ca_county#10, store_sales#16, ca_county#26, store_sales#30] +Output [3]: [ca_county#9, store_sales#13, store_sales#25] +Input [4]: [ca_county#9, store_sales#13, ca_county#22, store_sales#25] (37) Scan parquet default.store_sales -Output [3]: [ss_addr_sk#32, ss_ext_sales_price#33, ss_sold_date_sk#34] +Output [3]: [ss_addr_sk#26, ss_ext_sales_price#27, ss_sold_date_sk#28] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#34), dynamicpruningexpression(ss_sold_date_sk#34 IN dynamicpruning#35)] +PartitionFilters: [isnotnull(ss_sold_date_sk#28), dynamicpruningexpression(ss_sold_date_sk#28 IN dynamicpruning#29)] PushedFilters: [IsNotNull(ss_addr_sk)] ReadSchema: struct (38) ColumnarToRow [codegen id : 15] -Input [3]: [ss_addr_sk#32, ss_ext_sales_price#33, ss_sold_date_sk#34] +Input [3]: [ss_addr_sk#26, ss_ext_sales_price#27, ss_sold_date_sk#28] (39) Filter [codegen id : 15] -Input [3]: [ss_addr_sk#32, ss_ext_sales_price#33, ss_sold_date_sk#34] -Condition : isnotnull(ss_addr_sk#32) +Input [3]: [ss_addr_sk#26, ss_ext_sales_price#27, ss_sold_date_sk#28] +Condition : isnotnull(ss_addr_sk#26) (40) ReusedExchange [Reuses operator id: 122] -Output [3]: [d_date_sk#36, d_year#37, d_qoy#38] +Output [3]: [d_date_sk#30, d_year#31, d_qoy#32] (41) BroadcastHashJoin [codegen id : 15] -Left keys [1]: [ss_sold_date_sk#34] -Right keys [1]: [d_date_sk#36] +Left keys [1]: [ss_sold_date_sk#28] +Right keys [1]: [d_date_sk#30] Join condition: None (42) Project [codegen id : 15] -Output [4]: [ss_addr_sk#32, ss_ext_sales_price#33, d_year#37, d_qoy#38] -Input [6]: [ss_addr_sk#32, ss_ext_sales_price#33, ss_sold_date_sk#34, d_date_sk#36, d_year#37, d_qoy#38] +Output [4]: [ss_addr_sk#26, ss_ext_sales_price#27, d_year#31, d_qoy#32] +Input [6]: [ss_addr_sk#26, ss_ext_sales_price#27, ss_sold_date_sk#28, d_date_sk#30, d_year#31, d_qoy#32] (43) Exchange -Input [4]: [ss_addr_sk#32, ss_ext_sales_price#33, d_year#37, d_qoy#38] -Arguments: hashpartitioning(ss_addr_sk#32, 5), ENSURE_REQUIREMENTS, [id=#39] +Input [4]: [ss_addr_sk#26, ss_ext_sales_price#27, d_year#31, d_qoy#32] +Arguments: hashpartitioning(ss_addr_sk#26, 5), ENSURE_REQUIREMENTS, [plan_id=7] (44) Sort [codegen id : 16] -Input [4]: [ss_addr_sk#32, ss_ext_sales_price#33, d_year#37, d_qoy#38] -Arguments: [ss_addr_sk#32 ASC NULLS FIRST], false, 0 +Input [4]: [ss_addr_sk#26, ss_ext_sales_price#27, d_year#31, d_qoy#32] +Arguments: [ss_addr_sk#26 ASC NULLS FIRST], false, 0 (45) ReusedExchange [Reuses operator id: 12] -Output [2]: [ca_address_sk#40, ca_county#41] +Output [2]: [ca_address_sk#33, ca_county#34] (46) Sort [codegen id : 18] -Input [2]: [ca_address_sk#40, ca_county#41] -Arguments: [ca_address_sk#40 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#33, ca_county#34] +Arguments: [ca_address_sk#33 ASC NULLS FIRST], false, 0 (47) SortMergeJoin [codegen id : 19] -Left keys [1]: [ss_addr_sk#32] -Right keys [1]: [ca_address_sk#40] +Left keys [1]: [ss_addr_sk#26] +Right keys [1]: [ca_address_sk#33] Join condition: None (48) Project [codegen id : 19] -Output [4]: [ss_ext_sales_price#33, d_year#37, d_qoy#38, ca_county#41] -Input [6]: [ss_addr_sk#32, ss_ext_sales_price#33, d_year#37, d_qoy#38, ca_address_sk#40, ca_county#41] +Output [4]: [ss_ext_sales_price#27, d_year#31, d_qoy#32, ca_county#34] +Input [6]: [ss_addr_sk#26, ss_ext_sales_price#27, d_year#31, d_qoy#32, ca_address_sk#33, ca_county#34] (49) HashAggregate [codegen id : 19] -Input [4]: [ss_ext_sales_price#33, d_year#37, d_qoy#38, ca_county#41] -Keys [3]: [ca_county#41, d_qoy#38, d_year#37] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#33))] -Aggregate Attributes [1]: [sum#42] -Results [4]: [ca_county#41, d_qoy#38, d_year#37, sum#43] +Input [4]: [ss_ext_sales_price#27, d_year#31, d_qoy#32, ca_county#34] +Keys [3]: [ca_county#34, d_qoy#32, d_year#31] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#27))] +Aggregate Attributes [1]: [sum#35] +Results [4]: [ca_county#34, d_qoy#32, d_year#31, sum#36] (50) Exchange -Input [4]: [ca_county#41, d_qoy#38, d_year#37, sum#43] -Arguments: hashpartitioning(ca_county#41, d_qoy#38, d_year#37, 5), ENSURE_REQUIREMENTS, [id=#44] +Input [4]: [ca_county#34, d_qoy#32, d_year#31, sum#36] +Arguments: hashpartitioning(ca_county#34, d_qoy#32, d_year#31, 5), ENSURE_REQUIREMENTS, [plan_id=8] (51) HashAggregate [codegen id : 20] -Input [4]: [ca_county#41, d_qoy#38, d_year#37, sum#43] -Keys [3]: [ca_county#41, d_qoy#38, d_year#37] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#33))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#33))#15] -Results [3]: [ca_county#41, d_year#37, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#33))#15,17,2) AS store_sales#45] +Input [4]: [ca_county#34, d_qoy#32, d_year#31, sum#36] +Keys [3]: [ca_county#34, d_qoy#32, d_year#31] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#27))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#27))#12] +Results [3]: [ca_county#34, d_year#31, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#27))#12,17,2) AS store_sales#37] (52) BroadcastExchange -Input [3]: [ca_county#41, d_year#37, store_sales#45] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#46] +Input [3]: [ca_county#34, d_year#31, store_sales#37] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=9] (53) BroadcastHashJoin [codegen id : 42] -Left keys [1]: [ca_county#10] -Right keys [1]: [ca_county#41] +Left keys [1]: [ca_county#9] +Right keys [1]: [ca_county#34] Join condition: None (54) Project [codegen id : 42] -Output [5]: [store_sales#16, store_sales#30, ca_county#41, d_year#37, store_sales#45] -Input [6]: [ca_county#10, store_sales#16, store_sales#30, ca_county#41, d_year#37, store_sales#45] +Output [5]: [store_sales#13, store_sales#25, ca_county#34, d_year#31, store_sales#37] +Input [6]: [ca_county#9, store_sales#13, store_sales#25, ca_county#34, d_year#31, store_sales#37] (55) Scan parquet default.web_sales -Output [3]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, ws_sold_date_sk#49] +Output [3]: [ws_bill_addr_sk#38, ws_ext_sales_price#39, ws_sold_date_sk#40] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#49), dynamicpruningexpression(ws_sold_date_sk#49 IN dynamicpruning#35)] +PartitionFilters: [isnotnull(ws_sold_date_sk#40), dynamicpruningexpression(ws_sold_date_sk#40 IN dynamicpruning#29)] PushedFilters: [IsNotNull(ws_bill_addr_sk)] ReadSchema: struct (56) ColumnarToRow [codegen id : 22] -Input [3]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, ws_sold_date_sk#49] +Input [3]: [ws_bill_addr_sk#38, ws_ext_sales_price#39, ws_sold_date_sk#40] (57) Filter [codegen id : 22] -Input [3]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, ws_sold_date_sk#49] -Condition : isnotnull(ws_bill_addr_sk#47) +Input [3]: [ws_bill_addr_sk#38, ws_ext_sales_price#39, ws_sold_date_sk#40] +Condition : isnotnull(ws_bill_addr_sk#38) (58) ReusedExchange [Reuses operator id: 122] -Output [3]: [d_date_sk#50, d_year#51, d_qoy#52] +Output [3]: [d_date_sk#41, d_year#42, d_qoy#43] (59) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [ws_sold_date_sk#49] -Right keys [1]: [d_date_sk#50] +Left keys [1]: [ws_sold_date_sk#40] +Right keys [1]: [d_date_sk#41] Join condition: None (60) Project [codegen id : 22] -Output [4]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, d_year#51, d_qoy#52] -Input [6]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, ws_sold_date_sk#49, d_date_sk#50, d_year#51, d_qoy#52] +Output [4]: [ws_bill_addr_sk#38, ws_ext_sales_price#39, d_year#42, d_qoy#43] +Input [6]: [ws_bill_addr_sk#38, ws_ext_sales_price#39, ws_sold_date_sk#40, d_date_sk#41, d_year#42, d_qoy#43] (61) Exchange -Input [4]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, d_year#51, d_qoy#52] -Arguments: hashpartitioning(ws_bill_addr_sk#47, 5), ENSURE_REQUIREMENTS, [id=#53] +Input [4]: [ws_bill_addr_sk#38, ws_ext_sales_price#39, d_year#42, d_qoy#43] +Arguments: hashpartitioning(ws_bill_addr_sk#38, 5), ENSURE_REQUIREMENTS, [plan_id=10] (62) Sort [codegen id : 23] -Input [4]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, d_year#51, d_qoy#52] -Arguments: [ws_bill_addr_sk#47 ASC NULLS FIRST], false, 0 +Input [4]: [ws_bill_addr_sk#38, ws_ext_sales_price#39, d_year#42, d_qoy#43] +Arguments: [ws_bill_addr_sk#38 ASC NULLS FIRST], false, 0 (63) ReusedExchange [Reuses operator id: 12] -Output [2]: [ca_address_sk#54, ca_county#55] +Output [2]: [ca_address_sk#44, ca_county#45] (64) Sort [codegen id : 25] -Input [2]: [ca_address_sk#54, ca_county#55] -Arguments: [ca_address_sk#54 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#44, ca_county#45] +Arguments: [ca_address_sk#44 ASC NULLS FIRST], false, 0 (65) SortMergeJoin [codegen id : 26] -Left keys [1]: [ws_bill_addr_sk#47] -Right keys [1]: [ca_address_sk#54] +Left keys [1]: [ws_bill_addr_sk#38] +Right keys [1]: [ca_address_sk#44] Join condition: None (66) Project [codegen id : 26] -Output [4]: [ws_ext_sales_price#48, d_year#51, d_qoy#52, ca_county#55] -Input [6]: [ws_bill_addr_sk#47, ws_ext_sales_price#48, d_year#51, d_qoy#52, ca_address_sk#54, ca_county#55] +Output [4]: [ws_ext_sales_price#39, d_year#42, d_qoy#43, ca_county#45] +Input [6]: [ws_bill_addr_sk#38, ws_ext_sales_price#39, d_year#42, d_qoy#43, ca_address_sk#44, ca_county#45] (67) HashAggregate [codegen id : 26] -Input [4]: [ws_ext_sales_price#48, d_year#51, d_qoy#52, ca_county#55] -Keys [3]: [ca_county#55, d_qoy#52, d_year#51] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#48))] -Aggregate Attributes [1]: [sum#56] -Results [4]: [ca_county#55, d_qoy#52, d_year#51, sum#57] +Input [4]: [ws_ext_sales_price#39, d_year#42, d_qoy#43, ca_county#45] +Keys [3]: [ca_county#45, d_qoy#43, d_year#42] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#39))] +Aggregate Attributes [1]: [sum#46] +Results [4]: [ca_county#45, d_qoy#43, d_year#42, sum#47] (68) Exchange -Input [4]: [ca_county#55, d_qoy#52, d_year#51, sum#57] -Arguments: hashpartitioning(ca_county#55, d_qoy#52, d_year#51, 5), ENSURE_REQUIREMENTS, [id=#58] +Input [4]: [ca_county#45, d_qoy#43, d_year#42, sum#47] +Arguments: hashpartitioning(ca_county#45, d_qoy#43, d_year#42, 5), ENSURE_REQUIREMENTS, [plan_id=11] (69) HashAggregate [codegen id : 41] -Input [4]: [ca_county#55, d_qoy#52, d_year#51, sum#57] -Keys [3]: [ca_county#55, d_qoy#52, d_year#51] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#48))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#48))#59] -Results [2]: [ca_county#55, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#48))#59,17,2) AS web_sales#60] +Input [4]: [ca_county#45, d_qoy#43, d_year#42, sum#47] +Keys [3]: [ca_county#45, d_qoy#43, d_year#42] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#39))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#39))#48] +Results [2]: [ca_county#45, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#39))#48,17,2) AS web_sales#49] (70) Scan parquet default.web_sales -Output [3]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, ws_sold_date_sk#63] +Output [3]: [ws_bill_addr_sk#50, ws_ext_sales_price#51, ws_sold_date_sk#52] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#63), dynamicpruningexpression(ws_sold_date_sk#63 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(ws_sold_date_sk#52), dynamicpruningexpression(ws_sold_date_sk#52 IN dynamicpruning#4)] PushedFilters: [IsNotNull(ws_bill_addr_sk)] ReadSchema: struct (71) ColumnarToRow [codegen id : 28] -Input [3]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, ws_sold_date_sk#63] +Input [3]: [ws_bill_addr_sk#50, ws_ext_sales_price#51, ws_sold_date_sk#52] (72) Filter [codegen id : 28] -Input [3]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, ws_sold_date_sk#63] -Condition : isnotnull(ws_bill_addr_sk#61) +Input [3]: [ws_bill_addr_sk#50, ws_ext_sales_price#51, ws_sold_date_sk#52] +Condition : isnotnull(ws_bill_addr_sk#50) (73) ReusedExchange [Reuses operator id: 114] -Output [3]: [d_date_sk#64, d_year#65, d_qoy#66] +Output [3]: [d_date_sk#53, d_year#54, d_qoy#55] (74) BroadcastHashJoin [codegen id : 28] -Left keys [1]: [ws_sold_date_sk#63] -Right keys [1]: [d_date_sk#64] +Left keys [1]: [ws_sold_date_sk#52] +Right keys [1]: [d_date_sk#53] Join condition: None (75) Project [codegen id : 28] -Output [4]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, d_year#65, d_qoy#66] -Input [6]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, ws_sold_date_sk#63, d_date_sk#64, d_year#65, d_qoy#66] +Output [4]: [ws_bill_addr_sk#50, ws_ext_sales_price#51, d_year#54, d_qoy#55] +Input [6]: [ws_bill_addr_sk#50, ws_ext_sales_price#51, ws_sold_date_sk#52, d_date_sk#53, d_year#54, d_qoy#55] (76) Exchange -Input [4]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, d_year#65, d_qoy#66] -Arguments: hashpartitioning(ws_bill_addr_sk#61, 5), ENSURE_REQUIREMENTS, [id=#67] +Input [4]: [ws_bill_addr_sk#50, ws_ext_sales_price#51, d_year#54, d_qoy#55] +Arguments: hashpartitioning(ws_bill_addr_sk#50, 5), ENSURE_REQUIREMENTS, [plan_id=12] (77) Sort [codegen id : 29] -Input [4]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, d_year#65, d_qoy#66] -Arguments: [ws_bill_addr_sk#61 ASC NULLS FIRST], false, 0 +Input [4]: [ws_bill_addr_sk#50, ws_ext_sales_price#51, d_year#54, d_qoy#55] +Arguments: [ws_bill_addr_sk#50 ASC NULLS FIRST], false, 0 (78) ReusedExchange [Reuses operator id: 12] -Output [2]: [ca_address_sk#68, ca_county#69] +Output [2]: [ca_address_sk#56, ca_county#57] (79) Sort [codegen id : 31] -Input [2]: [ca_address_sk#68, ca_county#69] -Arguments: [ca_address_sk#68 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#56, ca_county#57] +Arguments: [ca_address_sk#56 ASC NULLS FIRST], false, 0 (80) SortMergeJoin [codegen id : 32] -Left keys [1]: [ws_bill_addr_sk#61] -Right keys [1]: [ca_address_sk#68] +Left keys [1]: [ws_bill_addr_sk#50] +Right keys [1]: [ca_address_sk#56] Join condition: None (81) Project [codegen id : 32] -Output [4]: [ws_ext_sales_price#62, d_year#65, d_qoy#66, ca_county#69] -Input [6]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, d_year#65, d_qoy#66, ca_address_sk#68, ca_county#69] +Output [4]: [ws_ext_sales_price#51, d_year#54, d_qoy#55, ca_county#57] +Input [6]: [ws_bill_addr_sk#50, ws_ext_sales_price#51, d_year#54, d_qoy#55, ca_address_sk#56, ca_county#57] (82) HashAggregate [codegen id : 32] -Input [4]: [ws_ext_sales_price#62, d_year#65, d_qoy#66, ca_county#69] -Keys [3]: [ca_county#69, d_qoy#66, d_year#65] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#62))] -Aggregate Attributes [1]: [sum#70] -Results [4]: [ca_county#69, d_qoy#66, d_year#65, sum#71] +Input [4]: [ws_ext_sales_price#51, d_year#54, d_qoy#55, ca_county#57] +Keys [3]: [ca_county#57, d_qoy#55, d_year#54] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#51))] +Aggregate Attributes [1]: [sum#58] +Results [4]: [ca_county#57, d_qoy#55, d_year#54, sum#59] (83) Exchange -Input [4]: [ca_county#69, d_qoy#66, d_year#65, sum#71] -Arguments: hashpartitioning(ca_county#69, d_qoy#66, d_year#65, 5), ENSURE_REQUIREMENTS, [id=#72] +Input [4]: [ca_county#57, d_qoy#55, d_year#54, sum#59] +Arguments: hashpartitioning(ca_county#57, d_qoy#55, d_year#54, 5), ENSURE_REQUIREMENTS, [plan_id=13] (84) HashAggregate [codegen id : 33] -Input [4]: [ca_county#69, d_qoy#66, d_year#65, sum#71] -Keys [3]: [ca_county#69, d_qoy#66, d_year#65] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#62))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#62))#59] -Results [2]: [ca_county#69, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#62))#59,17,2) AS web_sales#73] +Input [4]: [ca_county#57, d_qoy#55, d_year#54, sum#59] +Keys [3]: [ca_county#57, d_qoy#55, d_year#54] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#51))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#51))#48] +Results [2]: [ca_county#57, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#51))#48,17,2) AS web_sales#60] (85) BroadcastExchange -Input [2]: [ca_county#69, web_sales#73] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#74] +Input [2]: [ca_county#57, web_sales#60] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=14] (86) BroadcastHashJoin [codegen id : 41] -Left keys [1]: [ca_county#55] -Right keys [1]: [ca_county#69] +Left keys [1]: [ca_county#45] +Right keys [1]: [ca_county#57] Join condition: None (87) Project [codegen id : 41] -Output [3]: [ca_county#55, web_sales#60, web_sales#73] -Input [4]: [ca_county#55, web_sales#60, ca_county#69, web_sales#73] +Output [3]: [ca_county#45, web_sales#49, web_sales#60] +Input [4]: [ca_county#45, web_sales#49, ca_county#57, web_sales#60] (88) Scan parquet default.web_sales -Output [3]: [ws_bill_addr_sk#75, ws_ext_sales_price#76, ws_sold_date_sk#77] +Output [3]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, ws_sold_date_sk#63] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#77), dynamicpruningexpression(ws_sold_date_sk#77 IN dynamicpruning#20)] +PartitionFilters: [isnotnull(ws_sold_date_sk#63), dynamicpruningexpression(ws_sold_date_sk#63 IN dynamicpruning#17)] PushedFilters: [IsNotNull(ws_bill_addr_sk)] ReadSchema: struct (89) ColumnarToRow [codegen id : 35] -Input [3]: [ws_bill_addr_sk#75, ws_ext_sales_price#76, ws_sold_date_sk#77] +Input [3]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, ws_sold_date_sk#63] (90) Filter [codegen id : 35] -Input [3]: [ws_bill_addr_sk#75, ws_ext_sales_price#76, ws_sold_date_sk#77] -Condition : isnotnull(ws_bill_addr_sk#75) +Input [3]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, ws_sold_date_sk#63] +Condition : isnotnull(ws_bill_addr_sk#61) (91) ReusedExchange [Reuses operator id: 118] -Output [3]: [d_date_sk#78, d_year#79, d_qoy#80] +Output [3]: [d_date_sk#64, d_year#65, d_qoy#66] (92) BroadcastHashJoin [codegen id : 35] -Left keys [1]: [ws_sold_date_sk#77] -Right keys [1]: [d_date_sk#78] +Left keys [1]: [ws_sold_date_sk#63] +Right keys [1]: [d_date_sk#64] Join condition: None (93) Project [codegen id : 35] -Output [4]: [ws_bill_addr_sk#75, ws_ext_sales_price#76, d_year#79, d_qoy#80] -Input [6]: [ws_bill_addr_sk#75, ws_ext_sales_price#76, ws_sold_date_sk#77, d_date_sk#78, d_year#79, d_qoy#80] +Output [4]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, d_year#65, d_qoy#66] +Input [6]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, ws_sold_date_sk#63, d_date_sk#64, d_year#65, d_qoy#66] (94) Exchange -Input [4]: [ws_bill_addr_sk#75, ws_ext_sales_price#76, d_year#79, d_qoy#80] -Arguments: hashpartitioning(ws_bill_addr_sk#75, 5), ENSURE_REQUIREMENTS, [id=#81] +Input [4]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, d_year#65, d_qoy#66] +Arguments: hashpartitioning(ws_bill_addr_sk#61, 5), ENSURE_REQUIREMENTS, [plan_id=15] (95) Sort [codegen id : 36] -Input [4]: [ws_bill_addr_sk#75, ws_ext_sales_price#76, d_year#79, d_qoy#80] -Arguments: [ws_bill_addr_sk#75 ASC NULLS FIRST], false, 0 +Input [4]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, d_year#65, d_qoy#66] +Arguments: [ws_bill_addr_sk#61 ASC NULLS FIRST], false, 0 (96) ReusedExchange [Reuses operator id: 12] -Output [2]: [ca_address_sk#82, ca_county#83] +Output [2]: [ca_address_sk#67, ca_county#68] (97) Sort [codegen id : 38] -Input [2]: [ca_address_sk#82, ca_county#83] -Arguments: [ca_address_sk#82 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#67, ca_county#68] +Arguments: [ca_address_sk#67 ASC NULLS FIRST], false, 0 (98) SortMergeJoin [codegen id : 39] -Left keys [1]: [ws_bill_addr_sk#75] -Right keys [1]: [ca_address_sk#82] +Left keys [1]: [ws_bill_addr_sk#61] +Right keys [1]: [ca_address_sk#67] Join condition: None (99) Project [codegen id : 39] -Output [4]: [ws_ext_sales_price#76, d_year#79, d_qoy#80, ca_county#83] -Input [6]: [ws_bill_addr_sk#75, ws_ext_sales_price#76, d_year#79, d_qoy#80, ca_address_sk#82, ca_county#83] +Output [4]: [ws_ext_sales_price#62, d_year#65, d_qoy#66, ca_county#68] +Input [6]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, d_year#65, d_qoy#66, ca_address_sk#67, ca_county#68] (100) HashAggregate [codegen id : 39] -Input [4]: [ws_ext_sales_price#76, d_year#79, d_qoy#80, ca_county#83] -Keys [3]: [ca_county#83, d_qoy#80, d_year#79] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#76))] -Aggregate Attributes [1]: [sum#84] -Results [4]: [ca_county#83, d_qoy#80, d_year#79, sum#85] +Input [4]: [ws_ext_sales_price#62, d_year#65, d_qoy#66, ca_county#68] +Keys [3]: [ca_county#68, d_qoy#66, d_year#65] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#62))] +Aggregate Attributes [1]: [sum#69] +Results [4]: [ca_county#68, d_qoy#66, d_year#65, sum#70] (101) Exchange -Input [4]: [ca_county#83, d_qoy#80, d_year#79, sum#85] -Arguments: hashpartitioning(ca_county#83, d_qoy#80, d_year#79, 5), ENSURE_REQUIREMENTS, [id=#86] +Input [4]: [ca_county#68, d_qoy#66, d_year#65, sum#70] +Arguments: hashpartitioning(ca_county#68, d_qoy#66, d_year#65, 5), ENSURE_REQUIREMENTS, [plan_id=16] (102) HashAggregate [codegen id : 40] -Input [4]: [ca_county#83, d_qoy#80, d_year#79, sum#85] -Keys [3]: [ca_county#83, d_qoy#80, d_year#79] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#76))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#76))#59] -Results [2]: [ca_county#83, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#76))#59,17,2) AS web_sales#87] +Input [4]: [ca_county#68, d_qoy#66, d_year#65, sum#70] +Keys [3]: [ca_county#68, d_qoy#66, d_year#65] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#62))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#62))#48] +Results [2]: [ca_county#68, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#62))#48,17,2) AS web_sales#71] (103) BroadcastExchange -Input [2]: [ca_county#83, web_sales#87] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#88] +Input [2]: [ca_county#68, web_sales#71] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=17] (104) BroadcastHashJoin [codegen id : 41] -Left keys [1]: [ca_county#55] -Right keys [1]: [ca_county#83] +Left keys [1]: [ca_county#45] +Right keys [1]: [ca_county#68] Join condition: None (105) Project [codegen id : 41] -Output [4]: [ca_county#55, web_sales#60, web_sales#73, web_sales#87] -Input [5]: [ca_county#55, web_sales#60, web_sales#73, ca_county#83, web_sales#87] +Output [4]: [ca_county#45, web_sales#49, web_sales#60, web_sales#71] +Input [5]: [ca_county#45, web_sales#49, web_sales#60, ca_county#68, web_sales#71] (106) BroadcastExchange -Input [4]: [ca_county#55, web_sales#60, web_sales#73, web_sales#87] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#89] +Input [4]: [ca_county#45, web_sales#49, web_sales#60, web_sales#71] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=18] (107) BroadcastHashJoin [codegen id : 42] -Left keys [1]: [ca_county#41] -Right keys [1]: [ca_county#55] -Join condition: ((CASE WHEN (web_sales#60 > 0.00) THEN CheckOverflow((promote_precision(web_sales#73) / promote_precision(web_sales#60)), DecimalType(37,20)) END > CASE WHEN (store_sales#45 > 0.00) THEN CheckOverflow((promote_precision(store_sales#16) / promote_precision(store_sales#45)), DecimalType(37,20)) END) AND (CASE WHEN (web_sales#73 > 0.00) THEN CheckOverflow((promote_precision(web_sales#87) / promote_precision(web_sales#73)), DecimalType(37,20)) END > CASE WHEN (store_sales#16 > 0.00) THEN CheckOverflow((promote_precision(store_sales#30) / promote_precision(store_sales#16)), DecimalType(37,20)) END)) +Left keys [1]: [ca_county#34] +Right keys [1]: [ca_county#45] +Join condition: ((CASE WHEN (web_sales#49 > 0.00) THEN CheckOverflow((promote_precision(web_sales#60) / promote_precision(web_sales#49)), DecimalType(37,20)) END > CASE WHEN (store_sales#37 > 0.00) THEN CheckOverflow((promote_precision(store_sales#13) / promote_precision(store_sales#37)), DecimalType(37,20)) END) AND (CASE WHEN (web_sales#60 > 0.00) THEN CheckOverflow((promote_precision(web_sales#71) / promote_precision(web_sales#60)), DecimalType(37,20)) END > CASE WHEN (store_sales#13 > 0.00) THEN CheckOverflow((promote_precision(store_sales#25) / promote_precision(store_sales#13)), DecimalType(37,20)) END)) (108) Project [codegen id : 42] -Output [6]: [ca_county#41, d_year#37, CheckOverflow((promote_precision(web_sales#73) / promote_precision(web_sales#60)), DecimalType(37,20)) AS web_q1_q2_increase#90, CheckOverflow((promote_precision(store_sales#16) / promote_precision(store_sales#45)), DecimalType(37,20)) AS store_q1_q2_increase#91, CheckOverflow((promote_precision(web_sales#87) / promote_precision(web_sales#73)), DecimalType(37,20)) AS web_q2_q3_increase#92, CheckOverflow((promote_precision(store_sales#30) / promote_precision(store_sales#16)), DecimalType(37,20)) AS store_q2_q3_increase#93] -Input [9]: [store_sales#16, store_sales#30, ca_county#41, d_year#37, store_sales#45, ca_county#55, web_sales#60, web_sales#73, web_sales#87] +Output [6]: [ca_county#34, d_year#31, CheckOverflow((promote_precision(web_sales#60) / promote_precision(web_sales#49)), DecimalType(37,20)) AS web_q1_q2_increase#72, CheckOverflow((promote_precision(store_sales#13) / promote_precision(store_sales#37)), DecimalType(37,20)) AS store_q1_q2_increase#73, CheckOverflow((promote_precision(web_sales#71) / promote_precision(web_sales#60)), DecimalType(37,20)) AS web_q2_q3_increase#74, CheckOverflow((promote_precision(store_sales#25) / promote_precision(store_sales#13)), DecimalType(37,20)) AS store_q2_q3_increase#75] +Input [9]: [store_sales#13, store_sales#25, ca_county#34, d_year#31, store_sales#37, ca_county#45, web_sales#49, web_sales#60, web_sales#71] (109) Exchange -Input [6]: [ca_county#41, d_year#37, web_q1_q2_increase#90, store_q1_q2_increase#91, web_q2_q3_increase#92, store_q2_q3_increase#93] -Arguments: rangepartitioning(ca_county#41 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#94] +Input [6]: [ca_county#34, d_year#31, web_q1_q2_increase#72, store_q1_q2_increase#73, web_q2_q3_increase#74, store_q2_q3_increase#75] +Arguments: rangepartitioning(ca_county#34 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=19] (110) Sort [codegen id : 43] -Input [6]: [ca_county#41, d_year#37, web_q1_q2_increase#90, store_q1_q2_increase#91, web_q2_q3_increase#92, store_q2_q3_increase#93] -Arguments: [ca_county#41 ASC NULLS FIRST], true, 0 +Input [6]: [ca_county#34, d_year#31, web_q1_q2_increase#72, store_q1_q2_increase#73, web_q2_q3_increase#74, store_q2_q3_increase#75] +Arguments: [ca_county#34 ASC NULLS FIRST], true, 0 ===== Subqueries ===== @@ -638,9 +638,9 @@ Condition : ((((isnotnull(d_qoy#7) AND isnotnull(d_year#6)) AND (d_qoy#7 = 2)) A (114) BroadcastExchange Input [3]: [d_date_sk#5, d_year#6, d_qoy#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#95] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=20] -Subquery:2 Hosting operator id = 19 Hosting Expression = ss_sold_date_sk#19 IN dynamicpruning#20 +Subquery:2 Hosting operator id = 19 Hosting Expression = ss_sold_date_sk#16 IN dynamicpruning#17 BroadcastExchange (118) +- * Filter (117) +- * ColumnarToRow (116) @@ -648,24 +648,24 @@ BroadcastExchange (118) (115) Scan parquet default.date_dim -Output [3]: [d_date_sk#21, d_year#22, d_qoy#23] +Output [3]: [d_date_sk#18, d_year#19, d_qoy#20] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (116) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#21, d_year#22, d_qoy#23] +Input [3]: [d_date_sk#18, d_year#19, d_qoy#20] (117) Filter [codegen id : 1] -Input [3]: [d_date_sk#21, d_year#22, d_qoy#23] -Condition : ((((isnotnull(d_qoy#23) AND isnotnull(d_year#22)) AND (d_qoy#23 = 3)) AND (d_year#22 = 2000)) AND isnotnull(d_date_sk#21)) +Input [3]: [d_date_sk#18, d_year#19, d_qoy#20] +Condition : ((((isnotnull(d_qoy#20) AND isnotnull(d_year#19)) AND (d_qoy#20 = 3)) AND (d_year#19 = 2000)) AND isnotnull(d_date_sk#18)) (118) BroadcastExchange -Input [3]: [d_date_sk#21, d_year#22, d_qoy#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#96] +Input [3]: [d_date_sk#18, d_year#19, d_qoy#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=21] -Subquery:3 Hosting operator id = 37 Hosting Expression = ss_sold_date_sk#34 IN dynamicpruning#35 +Subquery:3 Hosting operator id = 37 Hosting Expression = ss_sold_date_sk#28 IN dynamicpruning#29 BroadcastExchange (122) +- * Filter (121) +- * ColumnarToRow (120) @@ -673,27 +673,27 @@ BroadcastExchange (122) (119) Scan parquet default.date_dim -Output [3]: [d_date_sk#36, d_year#37, d_qoy#38] +Output [3]: [d_date_sk#30, d_year#31, d_qoy#32] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,1), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (120) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#36, d_year#37, d_qoy#38] +Input [3]: [d_date_sk#30, d_year#31, d_qoy#32] (121) Filter [codegen id : 1] -Input [3]: [d_date_sk#36, d_year#37, d_qoy#38] -Condition : ((((isnotnull(d_qoy#38) AND isnotnull(d_year#37)) AND (d_qoy#38 = 1)) AND (d_year#37 = 2000)) AND isnotnull(d_date_sk#36)) +Input [3]: [d_date_sk#30, d_year#31, d_qoy#32] +Condition : ((((isnotnull(d_qoy#32) AND isnotnull(d_year#31)) AND (d_qoy#32 = 1)) AND (d_year#31 = 2000)) AND isnotnull(d_date_sk#30)) (122) BroadcastExchange -Input [3]: [d_date_sk#36, d_year#37, d_qoy#38] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#97] +Input [3]: [d_date_sk#30, d_year#31, d_qoy#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=22] -Subquery:4 Hosting operator id = 55 Hosting Expression = ws_sold_date_sk#49 IN dynamicpruning#35 +Subquery:4 Hosting operator id = 55 Hosting Expression = ws_sold_date_sk#40 IN dynamicpruning#29 -Subquery:5 Hosting operator id = 70 Hosting Expression = ws_sold_date_sk#63 IN dynamicpruning#4 +Subquery:5 Hosting operator id = 70 Hosting Expression = ws_sold_date_sk#52 IN dynamicpruning#4 -Subquery:6 Hosting operator id = 88 Hosting Expression = ws_sold_date_sk#77 IN dynamicpruning#20 +Subquery:6 Hosting operator id = 88 Hosting Expression = ws_sold_date_sk#63 IN dynamicpruning#17 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt index d5c2cc3377a7e..5c1e8c1c5a96a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt @@ -134,7 +134,7 @@ Condition : (isnotnull(ca_address_sk#8) AND isnotnull(ca_county#9)) (10) BroadcastExchange Input [2]: [ca_address_sk#8, ca_county#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_addr_sk#1] @@ -149,368 +149,368 @@ Input [6]: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#6, d_qoy#7, ca_address_sk Input [4]: [ss_ext_sales_price#2, d_year#6, d_qoy#7, ca_county#9] Keys [3]: [ca_county#9, d_qoy#7, d_year#6] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#11] -Results [4]: [ca_county#9, d_qoy#7, d_year#6, sum#12] +Aggregate Attributes [1]: [sum#10] +Results [4]: [ca_county#9, d_qoy#7, d_year#6, sum#11] (14) Exchange -Input [4]: [ca_county#9, d_qoy#7, d_year#6, sum#12] -Arguments: hashpartitioning(ca_county#9, d_qoy#7, d_year#6, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [4]: [ca_county#9, d_qoy#7, d_year#6, sum#11] +Arguments: hashpartitioning(ca_county#9, d_qoy#7, d_year#6, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 24] -Input [4]: [ca_county#9, d_qoy#7, d_year#6, sum#12] +Input [4]: [ca_county#9, d_qoy#7, d_year#6, sum#11] Keys [3]: [ca_county#9, d_qoy#7, d_year#6] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14] -Results [3]: [ca_county#9, d_year#6, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS store_sales#15] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#12] +Results [3]: [ca_county#9, d_year#6, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS store_sales#13] (16) Scan parquet default.store_sales -Output [3]: [ss_addr_sk#16, ss_ext_sales_price#17, ss_sold_date_sk#18] +Output [3]: [ss_addr_sk#14, ss_ext_sales_price#15, ss_sold_date_sk#16] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#18), dynamicpruningexpression(ss_sold_date_sk#18 IN dynamicpruning#19)] +PartitionFilters: [isnotnull(ss_sold_date_sk#16), dynamicpruningexpression(ss_sold_date_sk#16 IN dynamicpruning#17)] PushedFilters: [IsNotNull(ss_addr_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 6] -Input [3]: [ss_addr_sk#16, ss_ext_sales_price#17, ss_sold_date_sk#18] +Input [3]: [ss_addr_sk#14, ss_ext_sales_price#15, ss_sold_date_sk#16] (18) Filter [codegen id : 6] -Input [3]: [ss_addr_sk#16, ss_ext_sales_price#17, ss_sold_date_sk#18] -Condition : isnotnull(ss_addr_sk#16) +Input [3]: [ss_addr_sk#14, ss_ext_sales_price#15, ss_sold_date_sk#16] +Condition : isnotnull(ss_addr_sk#14) (19) ReusedExchange [Reuses operator id: 98] -Output [3]: [d_date_sk#20, d_year#21, d_qoy#22] +Output [3]: [d_date_sk#18, d_year#19, d_qoy#20] (20) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_sold_date_sk#18] -Right keys [1]: [d_date_sk#20] +Left keys [1]: [ss_sold_date_sk#16] +Right keys [1]: [d_date_sk#18] Join condition: None (21) Project [codegen id : 6] -Output [4]: [ss_addr_sk#16, ss_ext_sales_price#17, d_year#21, d_qoy#22] -Input [6]: [ss_addr_sk#16, ss_ext_sales_price#17, ss_sold_date_sk#18, d_date_sk#20, d_year#21, d_qoy#22] +Output [4]: [ss_addr_sk#14, ss_ext_sales_price#15, d_year#19, d_qoy#20] +Input [6]: [ss_addr_sk#14, ss_ext_sales_price#15, ss_sold_date_sk#16, d_date_sk#18, d_year#19, d_qoy#20] (22) ReusedExchange [Reuses operator id: 10] -Output [2]: [ca_address_sk#23, ca_county#24] +Output [2]: [ca_address_sk#21, ca_county#22] (23) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_addr_sk#16] -Right keys [1]: [ca_address_sk#23] +Left keys [1]: [ss_addr_sk#14] +Right keys [1]: [ca_address_sk#21] Join condition: None (24) Project [codegen id : 6] -Output [4]: [ss_ext_sales_price#17, d_year#21, d_qoy#22, ca_county#24] -Input [6]: [ss_addr_sk#16, ss_ext_sales_price#17, d_year#21, d_qoy#22, ca_address_sk#23, ca_county#24] +Output [4]: [ss_ext_sales_price#15, d_year#19, d_qoy#20, ca_county#22] +Input [6]: [ss_addr_sk#14, ss_ext_sales_price#15, d_year#19, d_qoy#20, ca_address_sk#21, ca_county#22] (25) HashAggregate [codegen id : 6] -Input [4]: [ss_ext_sales_price#17, d_year#21, d_qoy#22, ca_county#24] -Keys [3]: [ca_county#24, d_qoy#22, d_year#21] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#17))] -Aggregate Attributes [1]: [sum#25] -Results [4]: [ca_county#24, d_qoy#22, d_year#21, sum#26] +Input [4]: [ss_ext_sales_price#15, d_year#19, d_qoy#20, ca_county#22] +Keys [3]: [ca_county#22, d_qoy#20, d_year#19] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#15))] +Aggregate Attributes [1]: [sum#23] +Results [4]: [ca_county#22, d_qoy#20, d_year#19, sum#24] (26) Exchange -Input [4]: [ca_county#24, d_qoy#22, d_year#21, sum#26] -Arguments: hashpartitioning(ca_county#24, d_qoy#22, d_year#21, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [4]: [ca_county#22, d_qoy#20, d_year#19, sum#24] +Arguments: hashpartitioning(ca_county#22, d_qoy#20, d_year#19, 5), ENSURE_REQUIREMENTS, [plan_id=3] (27) HashAggregate [codegen id : 7] -Input [4]: [ca_county#24, d_qoy#22, d_year#21, sum#26] -Keys [3]: [ca_county#24, d_qoy#22, d_year#21] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#17))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#17))#14] -Results [2]: [ca_county#24, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#17))#14,17,2) AS store_sales#28] +Input [4]: [ca_county#22, d_qoy#20, d_year#19, sum#24] +Keys [3]: [ca_county#22, d_qoy#20, d_year#19] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#15))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#15))#12] +Results [2]: [ca_county#22, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#15))#12,17,2) AS store_sales#25] (28) BroadcastExchange -Input [2]: [ca_county#24, store_sales#28] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#29] +Input [2]: [ca_county#22, store_sales#25] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=4] (29) BroadcastHashJoin [codegen id : 24] Left keys [1]: [ca_county#9] -Right keys [1]: [ca_county#24] +Right keys [1]: [ca_county#22] Join condition: None (30) Scan parquet default.store_sales -Output [3]: [ss_addr_sk#30, ss_ext_sales_price#31, ss_sold_date_sk#32] +Output [3]: [ss_addr_sk#26, ss_ext_sales_price#27, ss_sold_date_sk#28] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#32), dynamicpruningexpression(ss_sold_date_sk#32 IN dynamicpruning#33)] +PartitionFilters: [isnotnull(ss_sold_date_sk#28), dynamicpruningexpression(ss_sold_date_sk#28 IN dynamicpruning#29)] PushedFilters: [IsNotNull(ss_addr_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 10] -Input [3]: [ss_addr_sk#30, ss_ext_sales_price#31, ss_sold_date_sk#32] +Input [3]: [ss_addr_sk#26, ss_ext_sales_price#27, ss_sold_date_sk#28] (32) Filter [codegen id : 10] -Input [3]: [ss_addr_sk#30, ss_ext_sales_price#31, ss_sold_date_sk#32] -Condition : isnotnull(ss_addr_sk#30) +Input [3]: [ss_addr_sk#26, ss_ext_sales_price#27, ss_sold_date_sk#28] +Condition : isnotnull(ss_addr_sk#26) (33) ReusedExchange [Reuses operator id: 102] -Output [3]: [d_date_sk#34, d_year#35, d_qoy#36] +Output [3]: [d_date_sk#30, d_year#31, d_qoy#32] (34) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_sold_date_sk#32] -Right keys [1]: [d_date_sk#34] +Left keys [1]: [ss_sold_date_sk#28] +Right keys [1]: [d_date_sk#30] Join condition: None (35) Project [codegen id : 10] -Output [4]: [ss_addr_sk#30, ss_ext_sales_price#31, d_year#35, d_qoy#36] -Input [6]: [ss_addr_sk#30, ss_ext_sales_price#31, ss_sold_date_sk#32, d_date_sk#34, d_year#35, d_qoy#36] +Output [4]: [ss_addr_sk#26, ss_ext_sales_price#27, d_year#31, d_qoy#32] +Input [6]: [ss_addr_sk#26, ss_ext_sales_price#27, ss_sold_date_sk#28, d_date_sk#30, d_year#31, d_qoy#32] (36) ReusedExchange [Reuses operator id: 10] -Output [2]: [ca_address_sk#37, ca_county#38] +Output [2]: [ca_address_sk#33, ca_county#34] (37) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_addr_sk#30] -Right keys [1]: [ca_address_sk#37] +Left keys [1]: [ss_addr_sk#26] +Right keys [1]: [ca_address_sk#33] Join condition: None (38) Project [codegen id : 10] -Output [4]: [ss_ext_sales_price#31, d_year#35, d_qoy#36, ca_county#38] -Input [6]: [ss_addr_sk#30, ss_ext_sales_price#31, d_year#35, d_qoy#36, ca_address_sk#37, ca_county#38] +Output [4]: [ss_ext_sales_price#27, d_year#31, d_qoy#32, ca_county#34] +Input [6]: [ss_addr_sk#26, ss_ext_sales_price#27, d_year#31, d_qoy#32, ca_address_sk#33, ca_county#34] (39) HashAggregate [codegen id : 10] -Input [4]: [ss_ext_sales_price#31, d_year#35, d_qoy#36, ca_county#38] -Keys [3]: [ca_county#38, d_qoy#36, d_year#35] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#31))] -Aggregate Attributes [1]: [sum#39] -Results [4]: [ca_county#38, d_qoy#36, d_year#35, sum#40] +Input [4]: [ss_ext_sales_price#27, d_year#31, d_qoy#32, ca_county#34] +Keys [3]: [ca_county#34, d_qoy#32, d_year#31] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#27))] +Aggregate Attributes [1]: [sum#35] +Results [4]: [ca_county#34, d_qoy#32, d_year#31, sum#36] (40) Exchange -Input [4]: [ca_county#38, d_qoy#36, d_year#35, sum#40] -Arguments: hashpartitioning(ca_county#38, d_qoy#36, d_year#35, 5), ENSURE_REQUIREMENTS, [id=#41] +Input [4]: [ca_county#34, d_qoy#32, d_year#31, sum#36] +Arguments: hashpartitioning(ca_county#34, d_qoy#32, d_year#31, 5), ENSURE_REQUIREMENTS, [plan_id=5] (41) HashAggregate [codegen id : 11] -Input [4]: [ca_county#38, d_qoy#36, d_year#35, sum#40] -Keys [3]: [ca_county#38, d_qoy#36, d_year#35] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#31))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#31))#14] -Results [2]: [ca_county#38, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#31))#14,17,2) AS store_sales#42] +Input [4]: [ca_county#34, d_qoy#32, d_year#31, sum#36] +Keys [3]: [ca_county#34, d_qoy#32, d_year#31] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#27))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#27))#12] +Results [2]: [ca_county#34, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#27))#12,17,2) AS store_sales#37] (42) BroadcastExchange -Input [2]: [ca_county#38, store_sales#42] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#43] +Input [2]: [ca_county#34, store_sales#37] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] (43) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [ca_county#24] -Right keys [1]: [ca_county#38] +Left keys [1]: [ca_county#22] +Right keys [1]: [ca_county#34] Join condition: None (44) Project [codegen id : 24] -Output [5]: [ca_county#9, d_year#6, store_sales#15, store_sales#28, store_sales#42] -Input [7]: [ca_county#9, d_year#6, store_sales#15, ca_county#24, store_sales#28, ca_county#38, store_sales#42] +Output [5]: [ca_county#9, d_year#6, store_sales#13, store_sales#25, store_sales#37] +Input [7]: [ca_county#9, d_year#6, store_sales#13, ca_county#22, store_sales#25, ca_county#34, store_sales#37] (45) Scan parquet default.web_sales -Output [3]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, ws_sold_date_sk#46] +Output [3]: [ws_bill_addr_sk#38, ws_ext_sales_price#39, ws_sold_date_sk#40] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#46), dynamicpruningexpression(ws_sold_date_sk#46 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(ws_sold_date_sk#40), dynamicpruningexpression(ws_sold_date_sk#40 IN dynamicpruning#4)] PushedFilters: [IsNotNull(ws_bill_addr_sk)] ReadSchema: struct (46) ColumnarToRow [codegen id : 14] -Input [3]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, ws_sold_date_sk#46] +Input [3]: [ws_bill_addr_sk#38, ws_ext_sales_price#39, ws_sold_date_sk#40] (47) Filter [codegen id : 14] -Input [3]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, ws_sold_date_sk#46] -Condition : isnotnull(ws_bill_addr_sk#44) +Input [3]: [ws_bill_addr_sk#38, ws_ext_sales_price#39, ws_sold_date_sk#40] +Condition : isnotnull(ws_bill_addr_sk#38) (48) ReusedExchange [Reuses operator id: 94] -Output [3]: [d_date_sk#47, d_year#48, d_qoy#49] +Output [3]: [d_date_sk#41, d_year#42, d_qoy#43] (49) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ws_sold_date_sk#46] -Right keys [1]: [d_date_sk#47] +Left keys [1]: [ws_sold_date_sk#40] +Right keys [1]: [d_date_sk#41] Join condition: None (50) Project [codegen id : 14] -Output [4]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, d_year#48, d_qoy#49] -Input [6]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, ws_sold_date_sk#46, d_date_sk#47, d_year#48, d_qoy#49] +Output [4]: [ws_bill_addr_sk#38, ws_ext_sales_price#39, d_year#42, d_qoy#43] +Input [6]: [ws_bill_addr_sk#38, ws_ext_sales_price#39, ws_sold_date_sk#40, d_date_sk#41, d_year#42, d_qoy#43] (51) ReusedExchange [Reuses operator id: 10] -Output [2]: [ca_address_sk#50, ca_county#51] +Output [2]: [ca_address_sk#44, ca_county#45] (52) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ws_bill_addr_sk#44] -Right keys [1]: [ca_address_sk#50] +Left keys [1]: [ws_bill_addr_sk#38] +Right keys [1]: [ca_address_sk#44] Join condition: None (53) Project [codegen id : 14] -Output [4]: [ws_ext_sales_price#45, d_year#48, d_qoy#49, ca_county#51] -Input [6]: [ws_bill_addr_sk#44, ws_ext_sales_price#45, d_year#48, d_qoy#49, ca_address_sk#50, ca_county#51] +Output [4]: [ws_ext_sales_price#39, d_year#42, d_qoy#43, ca_county#45] +Input [6]: [ws_bill_addr_sk#38, ws_ext_sales_price#39, d_year#42, d_qoy#43, ca_address_sk#44, ca_county#45] (54) HashAggregate [codegen id : 14] -Input [4]: [ws_ext_sales_price#45, d_year#48, d_qoy#49, ca_county#51] -Keys [3]: [ca_county#51, d_qoy#49, d_year#48] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#45))] -Aggregate Attributes [1]: [sum#52] -Results [4]: [ca_county#51, d_qoy#49, d_year#48, sum#53] +Input [4]: [ws_ext_sales_price#39, d_year#42, d_qoy#43, ca_county#45] +Keys [3]: [ca_county#45, d_qoy#43, d_year#42] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#39))] +Aggregate Attributes [1]: [sum#46] +Results [4]: [ca_county#45, d_qoy#43, d_year#42, sum#47] (55) Exchange -Input [4]: [ca_county#51, d_qoy#49, d_year#48, sum#53] -Arguments: hashpartitioning(ca_county#51, d_qoy#49, d_year#48, 5), ENSURE_REQUIREMENTS, [id=#54] +Input [4]: [ca_county#45, d_qoy#43, d_year#42, sum#47] +Arguments: hashpartitioning(ca_county#45, d_qoy#43, d_year#42, 5), ENSURE_REQUIREMENTS, [plan_id=7] (56) HashAggregate [codegen id : 15] -Input [4]: [ca_county#51, d_qoy#49, d_year#48, sum#53] -Keys [3]: [ca_county#51, d_qoy#49, d_year#48] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#45))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#45))#55] -Results [2]: [ca_county#51, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#45))#55,17,2) AS web_sales#56] +Input [4]: [ca_county#45, d_qoy#43, d_year#42, sum#47] +Keys [3]: [ca_county#45, d_qoy#43, d_year#42] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#39))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#39))#48] +Results [2]: [ca_county#45, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#39))#48,17,2) AS web_sales#49] (57) BroadcastExchange -Input [2]: [ca_county#51, web_sales#56] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#57] +Input [2]: [ca_county#45, web_sales#49] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=8] (58) BroadcastHashJoin [codegen id : 24] Left keys [1]: [ca_county#9] -Right keys [1]: [ca_county#51] +Right keys [1]: [ca_county#45] Join condition: None (59) Scan parquet default.web_sales -Output [3]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, ws_sold_date_sk#60] +Output [3]: [ws_bill_addr_sk#50, ws_ext_sales_price#51, ws_sold_date_sk#52] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#60), dynamicpruningexpression(ws_sold_date_sk#60 IN dynamicpruning#19)] +PartitionFilters: [isnotnull(ws_sold_date_sk#52), dynamicpruningexpression(ws_sold_date_sk#52 IN dynamicpruning#17)] PushedFilters: [IsNotNull(ws_bill_addr_sk)] ReadSchema: struct (60) ColumnarToRow [codegen id : 18] -Input [3]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, ws_sold_date_sk#60] +Input [3]: [ws_bill_addr_sk#50, ws_ext_sales_price#51, ws_sold_date_sk#52] (61) Filter [codegen id : 18] -Input [3]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, ws_sold_date_sk#60] -Condition : isnotnull(ws_bill_addr_sk#58) +Input [3]: [ws_bill_addr_sk#50, ws_ext_sales_price#51, ws_sold_date_sk#52] +Condition : isnotnull(ws_bill_addr_sk#50) (62) ReusedExchange [Reuses operator id: 98] -Output [3]: [d_date_sk#61, d_year#62, d_qoy#63] +Output [3]: [d_date_sk#53, d_year#54, d_qoy#55] (63) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ws_sold_date_sk#60] -Right keys [1]: [d_date_sk#61] +Left keys [1]: [ws_sold_date_sk#52] +Right keys [1]: [d_date_sk#53] Join condition: None (64) Project [codegen id : 18] -Output [4]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, d_year#62, d_qoy#63] -Input [6]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, ws_sold_date_sk#60, d_date_sk#61, d_year#62, d_qoy#63] +Output [4]: [ws_bill_addr_sk#50, ws_ext_sales_price#51, d_year#54, d_qoy#55] +Input [6]: [ws_bill_addr_sk#50, ws_ext_sales_price#51, ws_sold_date_sk#52, d_date_sk#53, d_year#54, d_qoy#55] (65) ReusedExchange [Reuses operator id: 10] -Output [2]: [ca_address_sk#64, ca_county#65] +Output [2]: [ca_address_sk#56, ca_county#57] (66) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ws_bill_addr_sk#58] -Right keys [1]: [ca_address_sk#64] +Left keys [1]: [ws_bill_addr_sk#50] +Right keys [1]: [ca_address_sk#56] Join condition: None (67) Project [codegen id : 18] -Output [4]: [ws_ext_sales_price#59, d_year#62, d_qoy#63, ca_county#65] -Input [6]: [ws_bill_addr_sk#58, ws_ext_sales_price#59, d_year#62, d_qoy#63, ca_address_sk#64, ca_county#65] +Output [4]: [ws_ext_sales_price#51, d_year#54, d_qoy#55, ca_county#57] +Input [6]: [ws_bill_addr_sk#50, ws_ext_sales_price#51, d_year#54, d_qoy#55, ca_address_sk#56, ca_county#57] (68) HashAggregate [codegen id : 18] -Input [4]: [ws_ext_sales_price#59, d_year#62, d_qoy#63, ca_county#65] -Keys [3]: [ca_county#65, d_qoy#63, d_year#62] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#59))] -Aggregate Attributes [1]: [sum#66] -Results [4]: [ca_county#65, d_qoy#63, d_year#62, sum#67] +Input [4]: [ws_ext_sales_price#51, d_year#54, d_qoy#55, ca_county#57] +Keys [3]: [ca_county#57, d_qoy#55, d_year#54] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#51))] +Aggregate Attributes [1]: [sum#58] +Results [4]: [ca_county#57, d_qoy#55, d_year#54, sum#59] (69) Exchange -Input [4]: [ca_county#65, d_qoy#63, d_year#62, sum#67] -Arguments: hashpartitioning(ca_county#65, d_qoy#63, d_year#62, 5), ENSURE_REQUIREMENTS, [id=#68] +Input [4]: [ca_county#57, d_qoy#55, d_year#54, sum#59] +Arguments: hashpartitioning(ca_county#57, d_qoy#55, d_year#54, 5), ENSURE_REQUIREMENTS, [plan_id=9] (70) HashAggregate [codegen id : 19] -Input [4]: [ca_county#65, d_qoy#63, d_year#62, sum#67] -Keys [3]: [ca_county#65, d_qoy#63, d_year#62] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#59))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#59))#55] -Results [2]: [ca_county#65, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#59))#55,17,2) AS web_sales#69] +Input [4]: [ca_county#57, d_qoy#55, d_year#54, sum#59] +Keys [3]: [ca_county#57, d_qoy#55, d_year#54] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#51))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#51))#48] +Results [2]: [ca_county#57, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#51))#48,17,2) AS web_sales#60] (71) BroadcastExchange -Input [2]: [ca_county#65, web_sales#69] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#70] +Input [2]: [ca_county#57, web_sales#60] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=10] (72) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [ca_county#51] -Right keys [1]: [ca_county#65] -Join condition: (CASE WHEN (web_sales#56 > 0.00) THEN CheckOverflow((promote_precision(web_sales#69) / promote_precision(web_sales#56)), DecimalType(37,20)) END > CASE WHEN (store_sales#15 > 0.00) THEN CheckOverflow((promote_precision(store_sales#28) / promote_precision(store_sales#15)), DecimalType(37,20)) END) +Left keys [1]: [ca_county#45] +Right keys [1]: [ca_county#57] +Join condition: (CASE WHEN (web_sales#49 > 0.00) THEN CheckOverflow((promote_precision(web_sales#60) / promote_precision(web_sales#49)), DecimalType(37,20)) END > CASE WHEN (store_sales#13 > 0.00) THEN CheckOverflow((promote_precision(store_sales#25) / promote_precision(store_sales#13)), DecimalType(37,20)) END) (73) Project [codegen id : 24] -Output [8]: [ca_county#9, d_year#6, store_sales#15, store_sales#28, store_sales#42, ca_county#51, web_sales#56, web_sales#69] -Input [9]: [ca_county#9, d_year#6, store_sales#15, store_sales#28, store_sales#42, ca_county#51, web_sales#56, ca_county#65, web_sales#69] +Output [8]: [ca_county#9, d_year#6, store_sales#13, store_sales#25, store_sales#37, ca_county#45, web_sales#49, web_sales#60] +Input [9]: [ca_county#9, d_year#6, store_sales#13, store_sales#25, store_sales#37, ca_county#45, web_sales#49, ca_county#57, web_sales#60] (74) Scan parquet default.web_sales -Output [3]: [ws_bill_addr_sk#71, ws_ext_sales_price#72, ws_sold_date_sk#73] +Output [3]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, ws_sold_date_sk#63] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#73), dynamicpruningexpression(ws_sold_date_sk#73 IN dynamicpruning#33)] +PartitionFilters: [isnotnull(ws_sold_date_sk#63), dynamicpruningexpression(ws_sold_date_sk#63 IN dynamicpruning#29)] PushedFilters: [IsNotNull(ws_bill_addr_sk)] ReadSchema: struct (75) ColumnarToRow [codegen id : 22] -Input [3]: [ws_bill_addr_sk#71, ws_ext_sales_price#72, ws_sold_date_sk#73] +Input [3]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, ws_sold_date_sk#63] (76) Filter [codegen id : 22] -Input [3]: [ws_bill_addr_sk#71, ws_ext_sales_price#72, ws_sold_date_sk#73] -Condition : isnotnull(ws_bill_addr_sk#71) +Input [3]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, ws_sold_date_sk#63] +Condition : isnotnull(ws_bill_addr_sk#61) (77) ReusedExchange [Reuses operator id: 102] -Output [3]: [d_date_sk#74, d_year#75, d_qoy#76] +Output [3]: [d_date_sk#64, d_year#65, d_qoy#66] (78) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [ws_sold_date_sk#73] -Right keys [1]: [d_date_sk#74] +Left keys [1]: [ws_sold_date_sk#63] +Right keys [1]: [d_date_sk#64] Join condition: None (79) Project [codegen id : 22] -Output [4]: [ws_bill_addr_sk#71, ws_ext_sales_price#72, d_year#75, d_qoy#76] -Input [6]: [ws_bill_addr_sk#71, ws_ext_sales_price#72, ws_sold_date_sk#73, d_date_sk#74, d_year#75, d_qoy#76] +Output [4]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, d_year#65, d_qoy#66] +Input [6]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, ws_sold_date_sk#63, d_date_sk#64, d_year#65, d_qoy#66] (80) ReusedExchange [Reuses operator id: 10] -Output [2]: [ca_address_sk#77, ca_county#78] +Output [2]: [ca_address_sk#67, ca_county#68] (81) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [ws_bill_addr_sk#71] -Right keys [1]: [ca_address_sk#77] +Left keys [1]: [ws_bill_addr_sk#61] +Right keys [1]: [ca_address_sk#67] Join condition: None (82) Project [codegen id : 22] -Output [4]: [ws_ext_sales_price#72, d_year#75, d_qoy#76, ca_county#78] -Input [6]: [ws_bill_addr_sk#71, ws_ext_sales_price#72, d_year#75, d_qoy#76, ca_address_sk#77, ca_county#78] +Output [4]: [ws_ext_sales_price#62, d_year#65, d_qoy#66, ca_county#68] +Input [6]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, d_year#65, d_qoy#66, ca_address_sk#67, ca_county#68] (83) HashAggregate [codegen id : 22] -Input [4]: [ws_ext_sales_price#72, d_year#75, d_qoy#76, ca_county#78] -Keys [3]: [ca_county#78, d_qoy#76, d_year#75] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#72))] -Aggregate Attributes [1]: [sum#79] -Results [4]: [ca_county#78, d_qoy#76, d_year#75, sum#80] +Input [4]: [ws_ext_sales_price#62, d_year#65, d_qoy#66, ca_county#68] +Keys [3]: [ca_county#68, d_qoy#66, d_year#65] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#62))] +Aggregate Attributes [1]: [sum#69] +Results [4]: [ca_county#68, d_qoy#66, d_year#65, sum#70] (84) Exchange -Input [4]: [ca_county#78, d_qoy#76, d_year#75, sum#80] -Arguments: hashpartitioning(ca_county#78, d_qoy#76, d_year#75, 5), ENSURE_REQUIREMENTS, [id=#81] +Input [4]: [ca_county#68, d_qoy#66, d_year#65, sum#70] +Arguments: hashpartitioning(ca_county#68, d_qoy#66, d_year#65, 5), ENSURE_REQUIREMENTS, [plan_id=11] (85) HashAggregate [codegen id : 23] -Input [4]: [ca_county#78, d_qoy#76, d_year#75, sum#80] -Keys [3]: [ca_county#78, d_qoy#76, d_year#75] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#72))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#72))#55] -Results [2]: [ca_county#78, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#72))#55,17,2) AS web_sales#82] +Input [4]: [ca_county#68, d_qoy#66, d_year#65, sum#70] +Keys [3]: [ca_county#68, d_qoy#66, d_year#65] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#62))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#62))#48] +Results [2]: [ca_county#68, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#62))#48,17,2) AS web_sales#71] (86) BroadcastExchange -Input [2]: [ca_county#78, web_sales#82] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#83] +Input [2]: [ca_county#68, web_sales#71] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=12] (87) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [ca_county#51] -Right keys [1]: [ca_county#78] -Join condition: (CASE WHEN (web_sales#69 > 0.00) THEN CheckOverflow((promote_precision(web_sales#82) / promote_precision(web_sales#69)), DecimalType(37,20)) END > CASE WHEN (store_sales#28 > 0.00) THEN CheckOverflow((promote_precision(store_sales#42) / promote_precision(store_sales#28)), DecimalType(37,20)) END) +Left keys [1]: [ca_county#45] +Right keys [1]: [ca_county#68] +Join condition: (CASE WHEN (web_sales#60 > 0.00) THEN CheckOverflow((promote_precision(web_sales#71) / promote_precision(web_sales#60)), DecimalType(37,20)) END > CASE WHEN (store_sales#25 > 0.00) THEN CheckOverflow((promote_precision(store_sales#37) / promote_precision(store_sales#25)), DecimalType(37,20)) END) (88) Project [codegen id : 24] -Output [6]: [ca_county#9, d_year#6, CheckOverflow((promote_precision(web_sales#69) / promote_precision(web_sales#56)), DecimalType(37,20)) AS web_q1_q2_increase#84, CheckOverflow((promote_precision(store_sales#28) / promote_precision(store_sales#15)), DecimalType(37,20)) AS store_q1_q2_increase#85, CheckOverflow((promote_precision(web_sales#82) / promote_precision(web_sales#69)), DecimalType(37,20)) AS web_q2_q3_increase#86, CheckOverflow((promote_precision(store_sales#42) / promote_precision(store_sales#28)), DecimalType(37,20)) AS store_q2_q3_increase#87] -Input [10]: [ca_county#9, d_year#6, store_sales#15, store_sales#28, store_sales#42, ca_county#51, web_sales#56, web_sales#69, ca_county#78, web_sales#82] +Output [6]: [ca_county#9, d_year#6, CheckOverflow((promote_precision(web_sales#60) / promote_precision(web_sales#49)), DecimalType(37,20)) AS web_q1_q2_increase#72, CheckOverflow((promote_precision(store_sales#25) / promote_precision(store_sales#13)), DecimalType(37,20)) AS store_q1_q2_increase#73, CheckOverflow((promote_precision(web_sales#71) / promote_precision(web_sales#60)), DecimalType(37,20)) AS web_q2_q3_increase#74, CheckOverflow((promote_precision(store_sales#37) / promote_precision(store_sales#25)), DecimalType(37,20)) AS store_q2_q3_increase#75] +Input [10]: [ca_county#9, d_year#6, store_sales#13, store_sales#25, store_sales#37, ca_county#45, web_sales#49, web_sales#60, ca_county#68, web_sales#71] (89) Exchange -Input [6]: [ca_county#9, d_year#6, web_q1_q2_increase#84, store_q1_q2_increase#85, web_q2_q3_increase#86, store_q2_q3_increase#87] -Arguments: rangepartitioning(ca_county#9 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#88] +Input [6]: [ca_county#9, d_year#6, web_q1_q2_increase#72, store_q1_q2_increase#73, web_q2_q3_increase#74, store_q2_q3_increase#75] +Arguments: rangepartitioning(ca_county#9 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=13] (90) Sort [codegen id : 25] -Input [6]: [ca_county#9, d_year#6, web_q1_q2_increase#84, store_q1_q2_increase#85, web_q2_q3_increase#86, store_q2_q3_increase#87] +Input [6]: [ca_county#9, d_year#6, web_q1_q2_increase#72, store_q1_q2_increase#73, web_q2_q3_increase#74, store_q2_q3_increase#75] Arguments: [ca_county#9 ASC NULLS FIRST], true, 0 ===== Subqueries ===== @@ -538,9 +538,9 @@ Condition : ((((isnotnull(d_qoy#7) AND isnotnull(d_year#6)) AND (d_qoy#7 = 1)) A (94) BroadcastExchange Input [3]: [d_date_sk#5, d_year#6, d_qoy#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#89] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14] -Subquery:2 Hosting operator id = 16 Hosting Expression = ss_sold_date_sk#18 IN dynamicpruning#19 +Subquery:2 Hosting operator id = 16 Hosting Expression = ss_sold_date_sk#16 IN dynamicpruning#17 BroadcastExchange (98) +- * Filter (97) +- * ColumnarToRow (96) @@ -548,24 +548,24 @@ BroadcastExchange (98) (95) Scan parquet default.date_dim -Output [3]: [d_date_sk#20, d_year#21, d_qoy#22] +Output [3]: [d_date_sk#18, d_year#19, d_qoy#20] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (96) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#20, d_year#21, d_qoy#22] +Input [3]: [d_date_sk#18, d_year#19, d_qoy#20] (97) Filter [codegen id : 1] -Input [3]: [d_date_sk#20, d_year#21, d_qoy#22] -Condition : ((((isnotnull(d_qoy#22) AND isnotnull(d_year#21)) AND (d_qoy#22 = 2)) AND (d_year#21 = 2000)) AND isnotnull(d_date_sk#20)) +Input [3]: [d_date_sk#18, d_year#19, d_qoy#20] +Condition : ((((isnotnull(d_qoy#20) AND isnotnull(d_year#19)) AND (d_qoy#20 = 2)) AND (d_year#19 = 2000)) AND isnotnull(d_date_sk#18)) (98) BroadcastExchange -Input [3]: [d_date_sk#20, d_year#21, d_qoy#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#90] +Input [3]: [d_date_sk#18, d_year#19, d_qoy#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=15] -Subquery:3 Hosting operator id = 30 Hosting Expression = ss_sold_date_sk#32 IN dynamicpruning#33 +Subquery:3 Hosting operator id = 30 Hosting Expression = ss_sold_date_sk#28 IN dynamicpruning#29 BroadcastExchange (102) +- * Filter (101) +- * ColumnarToRow (100) @@ -573,27 +573,27 @@ BroadcastExchange (102) (99) Scan parquet default.date_dim -Output [3]: [d_date_sk#34, d_year#35, d_qoy#36] +Output [3]: [d_date_sk#30, d_year#31, d_qoy#32] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,3), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (100) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#34, d_year#35, d_qoy#36] +Input [3]: [d_date_sk#30, d_year#31, d_qoy#32] (101) Filter [codegen id : 1] -Input [3]: [d_date_sk#34, d_year#35, d_qoy#36] -Condition : ((((isnotnull(d_qoy#36) AND isnotnull(d_year#35)) AND (d_qoy#36 = 3)) AND (d_year#35 = 2000)) AND isnotnull(d_date_sk#34)) +Input [3]: [d_date_sk#30, d_year#31, d_qoy#32] +Condition : ((((isnotnull(d_qoy#32) AND isnotnull(d_year#31)) AND (d_qoy#32 = 3)) AND (d_year#31 = 2000)) AND isnotnull(d_date_sk#30)) (102) BroadcastExchange -Input [3]: [d_date_sk#34, d_year#35, d_qoy#36] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#91] +Input [3]: [d_date_sk#30, d_year#31, d_qoy#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=16] -Subquery:4 Hosting operator id = 45 Hosting Expression = ws_sold_date_sk#46 IN dynamicpruning#4 +Subquery:4 Hosting operator id = 45 Hosting Expression = ws_sold_date_sk#40 IN dynamicpruning#4 -Subquery:5 Hosting operator id = 59 Hosting Expression = ws_sold_date_sk#60 IN dynamicpruning#19 +Subquery:5 Hosting operator id = 59 Hosting Expression = ws_sold_date_sk#52 IN dynamicpruning#17 -Subquery:6 Hosting operator id = 74 Hosting Expression = ws_sold_date_sk#73 IN dynamicpruning#33 +Subquery:6 Hosting operator id = 74 Hosting Expression = ws_sold_date_sk#63 IN dynamicpruning#29 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt index 92ba279df59fe..e7ae6145b4332 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt @@ -48,113 +48,113 @@ Input [2]: [i_item_sk#1, i_manufact_id#2] (5) BroadcastExchange Input [1]: [i_item_sk#1] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (6) Scan parquet default.catalog_sales -Output [3]: [cs_item_sk#4, cs_ext_discount_amt#5, cs_sold_date_sk#6] +Output [3]: [cs_item_sk#3, cs_ext_discount_amt#4, cs_sold_date_sk#5] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#6), dynamicpruningexpression(cs_sold_date_sk#6 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(cs_sold_date_sk#5), dynamicpruningexpression(cs_sold_date_sk#5 IN dynamicpruning#6)] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [3]: [cs_item_sk#4, cs_ext_discount_amt#5, cs_sold_date_sk#6] +Input [3]: [cs_item_sk#3, cs_ext_discount_amt#4, cs_sold_date_sk#5] (8) Filter [codegen id : 3] -Input [3]: [cs_item_sk#4, cs_ext_discount_amt#5, cs_sold_date_sk#6] -Condition : isnotnull(cs_item_sk#4) +Input [3]: [cs_item_sk#3, cs_ext_discount_amt#4, cs_sold_date_sk#5] +Condition : isnotnull(cs_item_sk#3) (9) ReusedExchange [Reuses operator id: 32] -Output [1]: [d_date_sk#8] +Output [1]: [d_date_sk#7] (10) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [cs_sold_date_sk#6] -Right keys [1]: [d_date_sk#8] +Left keys [1]: [cs_sold_date_sk#5] +Right keys [1]: [d_date_sk#7] Join condition: None (11) Project [codegen id : 3] -Output [2]: [cs_item_sk#4, cs_ext_discount_amt#5] -Input [4]: [cs_item_sk#4, cs_ext_discount_amt#5, cs_sold_date_sk#6, d_date_sk#8] +Output [2]: [cs_item_sk#3, cs_ext_discount_amt#4] +Input [4]: [cs_item_sk#3, cs_ext_discount_amt#4, cs_sold_date_sk#5, d_date_sk#7] (12) HashAggregate [codegen id : 3] -Input [2]: [cs_item_sk#4, cs_ext_discount_amt#5] -Keys [1]: [cs_item_sk#4] -Functions [1]: [partial_avg(UnscaledValue(cs_ext_discount_amt#5))] -Aggregate Attributes [2]: [sum#9, count#10] -Results [3]: [cs_item_sk#4, sum#11, count#12] +Input [2]: [cs_item_sk#3, cs_ext_discount_amt#4] +Keys [1]: [cs_item_sk#3] +Functions [1]: [partial_avg(UnscaledValue(cs_ext_discount_amt#4))] +Aggregate Attributes [2]: [sum#8, count#9] +Results [3]: [cs_item_sk#3, sum#10, count#11] (13) Exchange -Input [3]: [cs_item_sk#4, sum#11, count#12] -Arguments: hashpartitioning(cs_item_sk#4, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [3]: [cs_item_sk#3, sum#10, count#11] +Arguments: hashpartitioning(cs_item_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=2] (14) HashAggregate -Input [3]: [cs_item_sk#4, sum#11, count#12] -Keys [1]: [cs_item_sk#4] -Functions [1]: [avg(UnscaledValue(cs_ext_discount_amt#5))] -Aggregate Attributes [1]: [avg(UnscaledValue(cs_ext_discount_amt#5))#14] -Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(cs_ext_discount_amt#5))#14 / 100.0) as decimal(11,6)))), DecimalType(14,7)) AS (1.3 * avg(cs_ext_discount_amt))#15, cs_item_sk#4] +Input [3]: [cs_item_sk#3, sum#10, count#11] +Keys [1]: [cs_item_sk#3] +Functions [1]: [avg(UnscaledValue(cs_ext_discount_amt#4))] +Aggregate Attributes [1]: [avg(UnscaledValue(cs_ext_discount_amt#4))#12] +Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(cs_ext_discount_amt#4))#12 / 100.0) as decimal(11,6)))), DecimalType(14,7)) AS (1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#3] (15) Filter -Input [2]: [(1.3 * avg(cs_ext_discount_amt))#15, cs_item_sk#4] -Condition : isnotnull((1.3 * avg(cs_ext_discount_amt))#15) +Input [2]: [(1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#3] +Condition : isnotnull((1.3 * avg(cs_ext_discount_amt))#13) (16) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] -Right keys [1]: [cs_item_sk#4] +Right keys [1]: [cs_item_sk#3] Join condition: None (17) Project [codegen id : 4] -Output [2]: [i_item_sk#1, (1.3 * avg(cs_ext_discount_amt))#15] -Input [3]: [i_item_sk#1, (1.3 * avg(cs_ext_discount_amt))#15, cs_item_sk#4] +Output [2]: [i_item_sk#1, (1.3 * avg(cs_ext_discount_amt))#13] +Input [3]: [i_item_sk#1, (1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#3] (18) BroadcastExchange -Input [2]: [i_item_sk#1, (1.3 * avg(cs_ext_discount_amt))#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] +Input [2]: [i_item_sk#1, (1.3 * avg(cs_ext_discount_amt))#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (19) Scan parquet default.catalog_sales -Output [3]: [cs_item_sk#17, cs_ext_discount_amt#18, cs_sold_date_sk#19] +Output [3]: [cs_item_sk#14, cs_ext_discount_amt#15, cs_sold_date_sk#16] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#19), dynamicpruningexpression(cs_sold_date_sk#19 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(cs_sold_date_sk#16), dynamicpruningexpression(cs_sold_date_sk#16 IN dynamicpruning#6)] PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_ext_discount_amt)] ReadSchema: struct (20) ColumnarToRow -Input [3]: [cs_item_sk#17, cs_ext_discount_amt#18, cs_sold_date_sk#19] +Input [3]: [cs_item_sk#14, cs_ext_discount_amt#15, cs_sold_date_sk#16] (21) Filter -Input [3]: [cs_item_sk#17, cs_ext_discount_amt#18, cs_sold_date_sk#19] -Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_ext_discount_amt#18)) +Input [3]: [cs_item_sk#14, cs_ext_discount_amt#15, cs_sold_date_sk#16] +Condition : (isnotnull(cs_item_sk#14) AND isnotnull(cs_ext_discount_amt#15)) (22) BroadcastHashJoin [codegen id : 6] Left keys [1]: [i_item_sk#1] -Right keys [1]: [cs_item_sk#17] -Join condition: (cast(cs_ext_discount_amt#18 as decimal(14,7)) > (1.3 * avg(cs_ext_discount_amt))#15) +Right keys [1]: [cs_item_sk#14] +Join condition: (cast(cs_ext_discount_amt#15 as decimal(14,7)) > (1.3 * avg(cs_ext_discount_amt))#13) (23) Project [codegen id : 6] -Output [1]: [cs_sold_date_sk#19] -Input [5]: [i_item_sk#1, (1.3 * avg(cs_ext_discount_amt))#15, cs_item_sk#17, cs_ext_discount_amt#18, cs_sold_date_sk#19] +Output [1]: [cs_sold_date_sk#16] +Input [5]: [i_item_sk#1, (1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#14, cs_ext_discount_amt#15, cs_sold_date_sk#16] (24) ReusedExchange [Reuses operator id: 32] -Output [1]: [d_date_sk#20] +Output [1]: [d_date_sk#17] (25) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_sold_date_sk#19] -Right keys [1]: [d_date_sk#20] +Left keys [1]: [cs_sold_date_sk#16] +Right keys [1]: [d_date_sk#17] Join condition: None (26) Project [codegen id : 6] -Output [1]: [1 AS excess discount amount #21] -Input [2]: [cs_sold_date_sk#19, d_date_sk#20] +Output [1]: [1 AS excess discount amount #18] +Input [2]: [cs_sold_date_sk#16, d_date_sk#17] (27) CollectLimit -Input [1]: [excess discount amount #21] +Input [1]: [excess discount amount #18] Arguments: 100 ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = cs_sold_date_sk#6 IN dynamicpruning#7 +Subquery:1 Hosting operator id = 6 Hosting Expression = cs_sold_date_sk#5 IN dynamicpruning#6 BroadcastExchange (32) +- * Project (31) +- * Filter (30) @@ -163,27 +163,27 @@ BroadcastExchange (32) (28) Scan parquet default.date_dim -Output [2]: [d_date_sk#8, d_date#22] +Output [2]: [d_date_sk#7, d_date#19] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] ReadSchema: struct (29) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#8, d_date#22] +Input [2]: [d_date_sk#7, d_date#19] (30) Filter [codegen id : 1] -Input [2]: [d_date_sk#8, d_date#22] -Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 2000-01-27)) AND (d_date#22 <= 2000-04-26)) AND isnotnull(d_date_sk#8)) +Input [2]: [d_date_sk#7, d_date#19] +Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 2000-01-27)) AND (d_date#19 <= 2000-04-26)) AND isnotnull(d_date_sk#7)) (31) Project [codegen id : 1] -Output [1]: [d_date_sk#8] -Input [2]: [d_date_sk#8, d_date#22] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#19] (32) BroadcastExchange -Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] -Subquery:2 Hosting operator id = 19 Hosting Expression = cs_sold_date_sk#19 IN dynamicpruning#7 +Subquery:2 Hosting operator id = 19 Hosting Expression = cs_sold_date_sk#16 IN dynamicpruning#6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt index e221defe867c1..abbb43c8c75d7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt @@ -63,7 +63,7 @@ Input [2]: [i_item_sk#5, i_manufact_id#6] (8) BroadcastExchange Input [1]: [i_item_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 6] Left keys [1]: [cs_item_sk#1] @@ -75,81 +75,81 @@ Output [3]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#5] Input [4]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#5] (11) Scan parquet default.catalog_sales -Output [3]: [cs_item_sk#8, cs_ext_discount_amt#9, cs_sold_date_sk#10] +Output [3]: [cs_item_sk#7, cs_ext_discount_amt#8, cs_sold_date_sk#9] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#10), dynamicpruningexpression(cs_sold_date_sk#10 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(cs_sold_date_sk#9), dynamicpruningexpression(cs_sold_date_sk#9 IN dynamicpruning#4)] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 3] -Input [3]: [cs_item_sk#8, cs_ext_discount_amt#9, cs_sold_date_sk#10] +Input [3]: [cs_item_sk#7, cs_ext_discount_amt#8, cs_sold_date_sk#9] (13) Filter [codegen id : 3] -Input [3]: [cs_item_sk#8, cs_ext_discount_amt#9, cs_sold_date_sk#10] -Condition : isnotnull(cs_item_sk#8) +Input [3]: [cs_item_sk#7, cs_ext_discount_amt#8, cs_sold_date_sk#9] +Condition : isnotnull(cs_item_sk#7) (14) ReusedExchange [Reuses operator id: 32] -Output [1]: [d_date_sk#11] +Output [1]: [d_date_sk#10] (15) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [cs_sold_date_sk#10] -Right keys [1]: [d_date_sk#11] +Left keys [1]: [cs_sold_date_sk#9] +Right keys [1]: [d_date_sk#10] Join condition: None (16) Project [codegen id : 3] -Output [2]: [cs_item_sk#8, cs_ext_discount_amt#9] -Input [4]: [cs_item_sk#8, cs_ext_discount_amt#9, cs_sold_date_sk#10, d_date_sk#11] +Output [2]: [cs_item_sk#7, cs_ext_discount_amt#8] +Input [4]: [cs_item_sk#7, cs_ext_discount_amt#8, cs_sold_date_sk#9, d_date_sk#10] (17) HashAggregate [codegen id : 3] -Input [2]: [cs_item_sk#8, cs_ext_discount_amt#9] -Keys [1]: [cs_item_sk#8] -Functions [1]: [partial_avg(UnscaledValue(cs_ext_discount_amt#9))] -Aggregate Attributes [2]: [sum#12, count#13] -Results [3]: [cs_item_sk#8, sum#14, count#15] +Input [2]: [cs_item_sk#7, cs_ext_discount_amt#8] +Keys [1]: [cs_item_sk#7] +Functions [1]: [partial_avg(UnscaledValue(cs_ext_discount_amt#8))] +Aggregate Attributes [2]: [sum#11, count#12] +Results [3]: [cs_item_sk#7, sum#13, count#14] (18) Exchange -Input [3]: [cs_item_sk#8, sum#14, count#15] -Arguments: hashpartitioning(cs_item_sk#8, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [3]: [cs_item_sk#7, sum#13, count#14] +Arguments: hashpartitioning(cs_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (19) HashAggregate [codegen id : 4] -Input [3]: [cs_item_sk#8, sum#14, count#15] -Keys [1]: [cs_item_sk#8] -Functions [1]: [avg(UnscaledValue(cs_ext_discount_amt#9))] -Aggregate Attributes [1]: [avg(UnscaledValue(cs_ext_discount_amt#9))#17] -Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(cs_ext_discount_amt#9))#17 / 100.0) as decimal(11,6)))), DecimalType(14,7)) AS (1.3 * avg(cs_ext_discount_amt))#18, cs_item_sk#8] +Input [3]: [cs_item_sk#7, sum#13, count#14] +Keys [1]: [cs_item_sk#7] +Functions [1]: [avg(UnscaledValue(cs_ext_discount_amt#8))] +Aggregate Attributes [1]: [avg(UnscaledValue(cs_ext_discount_amt#8))#15] +Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(cs_ext_discount_amt#8))#15 / 100.0) as decimal(11,6)))), DecimalType(14,7)) AS (1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#7] (20) Filter [codegen id : 4] -Input [2]: [(1.3 * avg(cs_ext_discount_amt))#18, cs_item_sk#8] -Condition : isnotnull((1.3 * avg(cs_ext_discount_amt))#18) +Input [2]: [(1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#7] +Condition : isnotnull((1.3 * avg(cs_ext_discount_amt))#16) (21) BroadcastExchange -Input [2]: [(1.3 * avg(cs_ext_discount_amt))#18, cs_item_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#19] +Input [2]: [(1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=3] (22) BroadcastHashJoin [codegen id : 6] Left keys [1]: [i_item_sk#5] -Right keys [1]: [cs_item_sk#8] -Join condition: (cast(cs_ext_discount_amt#2 as decimal(14,7)) > (1.3 * avg(cs_ext_discount_amt))#18) +Right keys [1]: [cs_item_sk#7] +Join condition: (cast(cs_ext_discount_amt#2 as decimal(14,7)) > (1.3 * avg(cs_ext_discount_amt))#16) (23) Project [codegen id : 6] Output [1]: [cs_sold_date_sk#3] -Input [5]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#5, (1.3 * avg(cs_ext_discount_amt))#18, cs_item_sk#8] +Input [5]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#5, (1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#7] (24) ReusedExchange [Reuses operator id: 32] -Output [1]: [d_date_sk#20] +Output [1]: [d_date_sk#17] (25) BroadcastHashJoin [codegen id : 6] Left keys [1]: [cs_sold_date_sk#3] -Right keys [1]: [d_date_sk#20] +Right keys [1]: [d_date_sk#17] Join condition: None (26) Project [codegen id : 6] -Output [1]: [1 AS excess discount amount #21] -Input [2]: [cs_sold_date_sk#3, d_date_sk#20] +Output [1]: [1 AS excess discount amount #18] +Input [2]: [cs_sold_date_sk#3, d_date_sk#17] (27) CollectLimit -Input [1]: [excess discount amount #21] +Input [1]: [excess discount amount #18] Arguments: 100 ===== Subqueries ===== @@ -163,27 +163,27 @@ BroadcastExchange (32) (28) Scan parquet default.date_dim -Output [2]: [d_date_sk#20, d_date#22] +Output [2]: [d_date_sk#17, d_date#19] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] ReadSchema: struct (29) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#20, d_date#22] +Input [2]: [d_date_sk#17, d_date#19] (30) Filter [codegen id : 1] -Input [2]: [d_date_sk#20, d_date#22] -Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 2000-01-27)) AND (d_date#22 <= 2000-04-26)) AND isnotnull(d_date_sk#20)) +Input [2]: [d_date_sk#17, d_date#19] +Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 2000-01-27)) AND (d_date#19 <= 2000-04-26)) AND isnotnull(d_date_sk#17)) (31) Project [codegen id : 1] -Output [1]: [d_date_sk#20] -Input [2]: [d_date_sk#20, d_date#22] +Output [1]: [d_date_sk#17] +Input [2]: [d_date_sk#17, d_date#19] (32) BroadcastExchange -Input [1]: [d_date_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] -Subquery:2 Hosting operator id = 11 Hosting Expression = cs_sold_date_sk#10 IN dynamicpruning#4 +Subquery:2 Hosting operator id = 11 Hosting Expression = cs_sold_date_sk#9 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt index b1e601e2093ae..2eb042f1dd8ae 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt @@ -125,7 +125,7 @@ Input [2]: [i_category#9, i_manufact_id#10] (14) BroadcastExchange Input [1]: [i_manufact_id#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (15) BroadcastHashJoin [codegen id : 3] Left keys [1]: [i_manufact_id#8] @@ -134,7 +134,7 @@ Join condition: None (16) BroadcastExchange Input [2]: [i_item_sk#7, i_manufact_id#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_item_sk#1] @@ -146,215 +146,215 @@ Output [3]: [ss_addr_sk#2, ss_ext_sales_price#3, i_manufact_id#8] Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, i_item_sk#7, i_manufact_id#8] (19) Scan parquet default.customer_address -Output [2]: [ca_address_sk#13, ca_gmt_offset#14] +Output [2]: [ca_address_sk#11, ca_gmt_offset#12] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] ReadSchema: struct (20) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#13, ca_gmt_offset#14] +Input [2]: [ca_address_sk#11, ca_gmt_offset#12] (21) Filter [codegen id : 4] -Input [2]: [ca_address_sk#13, ca_gmt_offset#14] -Condition : ((isnotnull(ca_gmt_offset#14) AND (ca_gmt_offset#14 = -5.00)) AND isnotnull(ca_address_sk#13)) +Input [2]: [ca_address_sk#11, ca_gmt_offset#12] +Condition : ((isnotnull(ca_gmt_offset#12) AND (ca_gmt_offset#12 = -5.00)) AND isnotnull(ca_address_sk#11)) (22) Project [codegen id : 4] -Output [1]: [ca_address_sk#13] -Input [2]: [ca_address_sk#13, ca_gmt_offset#14] +Output [1]: [ca_address_sk#11] +Input [2]: [ca_address_sk#11, ca_gmt_offset#12] (23) BroadcastExchange -Input [1]: [ca_address_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Input [1]: [ca_address_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_addr_sk#2] -Right keys [1]: [ca_address_sk#13] +Right keys [1]: [ca_address_sk#11] Join condition: None (25) Project [codegen id : 5] Output [2]: [ss_ext_sales_price#3, i_manufact_id#8] -Input [4]: [ss_addr_sk#2, ss_ext_sales_price#3, i_manufact_id#8, ca_address_sk#13] +Input [4]: [ss_addr_sk#2, ss_ext_sales_price#3, i_manufact_id#8, ca_address_sk#11] (26) HashAggregate [codegen id : 5] Input [2]: [ss_ext_sales_price#3, i_manufact_id#8] Keys [1]: [i_manufact_id#8] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum#16] -Results [2]: [i_manufact_id#8, sum#17] +Aggregate Attributes [1]: [sum#13] +Results [2]: [i_manufact_id#8, sum#14] (27) Exchange -Input [2]: [i_manufact_id#8, sum#17] -Arguments: hashpartitioning(i_manufact_id#8, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [2]: [i_manufact_id#8, sum#14] +Arguments: hashpartitioning(i_manufact_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 6] -Input [2]: [i_manufact_id#8, sum#17] +Input [2]: [i_manufact_id#8, sum#14] Keys [1]: [i_manufact_id#8] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#19] -Results [2]: [i_manufact_id#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#19,17,2) AS total_sales#20] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#15] +Results [2]: [i_manufact_id#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS total_sales#16] (29) Scan parquet default.catalog_sales -Output [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] +Output [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#24), dynamicpruningexpression(cs_sold_date_sk#24 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#20), dynamicpruningexpression(cs_sold_date_sk#20 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 11] -Input [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] +Input [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] (31) Filter [codegen id : 11] -Input [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] -Condition : (isnotnull(cs_bill_addr_sk#21) AND isnotnull(cs_item_sk#22)) +Input [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] +Condition : (isnotnull(cs_bill_addr_sk#17) AND isnotnull(cs_item_sk#18)) (32) ReusedExchange [Reuses operator id: 68] -Output [1]: [d_date_sk#25] +Output [1]: [d_date_sk#21] (33) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_sold_date_sk#24] -Right keys [1]: [d_date_sk#25] +Left keys [1]: [cs_sold_date_sk#20] +Right keys [1]: [d_date_sk#21] Join condition: None (34) Project [codegen id : 11] -Output [3]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23] -Input [5]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24, d_date_sk#25] +Output [3]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19] +Input [5]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20, d_date_sk#21] (35) ReusedExchange [Reuses operator id: 16] -Output [2]: [i_item_sk#26, i_manufact_id#27] +Output [2]: [i_item_sk#22, i_manufact_id#23] (36) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_item_sk#22] -Right keys [1]: [i_item_sk#26] +Left keys [1]: [cs_item_sk#18] +Right keys [1]: [i_item_sk#22] Join condition: None (37) Project [codegen id : 11] -Output [3]: [cs_bill_addr_sk#21, cs_ext_sales_price#23, i_manufact_id#27] -Input [5]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, i_item_sk#26, i_manufact_id#27] +Output [3]: [cs_bill_addr_sk#17, cs_ext_sales_price#19, i_manufact_id#23] +Input [5]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, i_item_sk#22, i_manufact_id#23] (38) ReusedExchange [Reuses operator id: 23] -Output [1]: [ca_address_sk#28] +Output [1]: [ca_address_sk#24] (39) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_bill_addr_sk#21] -Right keys [1]: [ca_address_sk#28] +Left keys [1]: [cs_bill_addr_sk#17] +Right keys [1]: [ca_address_sk#24] Join condition: None (40) Project [codegen id : 11] -Output [2]: [cs_ext_sales_price#23, i_manufact_id#27] -Input [4]: [cs_bill_addr_sk#21, cs_ext_sales_price#23, i_manufact_id#27, ca_address_sk#28] +Output [2]: [cs_ext_sales_price#19, i_manufact_id#23] +Input [4]: [cs_bill_addr_sk#17, cs_ext_sales_price#19, i_manufact_id#23, ca_address_sk#24] (41) HashAggregate [codegen id : 11] -Input [2]: [cs_ext_sales_price#23, i_manufact_id#27] -Keys [1]: [i_manufact_id#27] -Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#23))] -Aggregate Attributes [1]: [sum#29] -Results [2]: [i_manufact_id#27, sum#30] +Input [2]: [cs_ext_sales_price#19, i_manufact_id#23] +Keys [1]: [i_manufact_id#23] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#19))] +Aggregate Attributes [1]: [sum#25] +Results [2]: [i_manufact_id#23, sum#26] (42) Exchange -Input [2]: [i_manufact_id#27, sum#30] -Arguments: hashpartitioning(i_manufact_id#27, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [2]: [i_manufact_id#23, sum#26] +Arguments: hashpartitioning(i_manufact_id#23, 5), ENSURE_REQUIREMENTS, [plan_id=5] (43) HashAggregate [codegen id : 12] -Input [2]: [i_manufact_id#27, sum#30] -Keys [1]: [i_manufact_id#27] -Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#23))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#23))#32] -Results [2]: [i_manufact_id#27, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#23))#32,17,2) AS total_sales#33] +Input [2]: [i_manufact_id#23, sum#26] +Keys [1]: [i_manufact_id#23] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#19))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#19))#27] +Results [2]: [i_manufact_id#23, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#19))#27,17,2) AS total_sales#28] (44) Scan parquet default.web_sales -Output [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] +Output [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#37), dynamicpruningexpression(ws_sold_date_sk#37 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#32), dynamicpruningexpression(ws_sold_date_sk#32 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] ReadSchema: struct (45) ColumnarToRow [codegen id : 17] -Input [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] +Input [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] (46) Filter [codegen id : 17] -Input [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] -Condition : (isnotnull(ws_bill_addr_sk#35) AND isnotnull(ws_item_sk#34)) +Input [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] +Condition : (isnotnull(ws_bill_addr_sk#30) AND isnotnull(ws_item_sk#29)) (47) ReusedExchange [Reuses operator id: 68] -Output [1]: [d_date_sk#38] +Output [1]: [d_date_sk#33] (48) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_sold_date_sk#37] -Right keys [1]: [d_date_sk#38] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#33] Join condition: None (49) Project [codegen id : 17] -Output [3]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36] -Input [5]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37, d_date_sk#38] +Output [3]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31] +Input [5]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32, d_date_sk#33] (50) ReusedExchange [Reuses operator id: 16] -Output [2]: [i_item_sk#39, i_manufact_id#40] +Output [2]: [i_item_sk#34, i_manufact_id#35] (51) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_item_sk#34] -Right keys [1]: [i_item_sk#39] +Left keys [1]: [ws_item_sk#29] +Right keys [1]: [i_item_sk#34] Join condition: None (52) Project [codegen id : 17] -Output [3]: [ws_bill_addr_sk#35, ws_ext_sales_price#36, i_manufact_id#40] -Input [5]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, i_item_sk#39, i_manufact_id#40] +Output [3]: [ws_bill_addr_sk#30, ws_ext_sales_price#31, i_manufact_id#35] +Input [5]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, i_item_sk#34, i_manufact_id#35] (53) ReusedExchange [Reuses operator id: 23] -Output [1]: [ca_address_sk#41] +Output [1]: [ca_address_sk#36] (54) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_bill_addr_sk#35] -Right keys [1]: [ca_address_sk#41] +Left keys [1]: [ws_bill_addr_sk#30] +Right keys [1]: [ca_address_sk#36] Join condition: None (55) Project [codegen id : 17] -Output [2]: [ws_ext_sales_price#36, i_manufact_id#40] -Input [4]: [ws_bill_addr_sk#35, ws_ext_sales_price#36, i_manufact_id#40, ca_address_sk#41] +Output [2]: [ws_ext_sales_price#31, i_manufact_id#35] +Input [4]: [ws_bill_addr_sk#30, ws_ext_sales_price#31, i_manufact_id#35, ca_address_sk#36] (56) HashAggregate [codegen id : 17] -Input [2]: [ws_ext_sales_price#36, i_manufact_id#40] -Keys [1]: [i_manufact_id#40] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#36))] -Aggregate Attributes [1]: [sum#42] -Results [2]: [i_manufact_id#40, sum#43] +Input [2]: [ws_ext_sales_price#31, i_manufact_id#35] +Keys [1]: [i_manufact_id#35] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#31))] +Aggregate Attributes [1]: [sum#37] +Results [2]: [i_manufact_id#35, sum#38] (57) Exchange -Input [2]: [i_manufact_id#40, sum#43] -Arguments: hashpartitioning(i_manufact_id#40, 5), ENSURE_REQUIREMENTS, [id=#44] +Input [2]: [i_manufact_id#35, sum#38] +Arguments: hashpartitioning(i_manufact_id#35, 5), ENSURE_REQUIREMENTS, [plan_id=6] (58) HashAggregate [codegen id : 18] -Input [2]: [i_manufact_id#40, sum#43] -Keys [1]: [i_manufact_id#40] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#36))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#36))#45] -Results [2]: [i_manufact_id#40, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#36))#45,17,2) AS total_sales#46] +Input [2]: [i_manufact_id#35, sum#38] +Keys [1]: [i_manufact_id#35] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#31))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#31))#39] +Results [2]: [i_manufact_id#35, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#31))#39,17,2) AS total_sales#40] (59) Union (60) HashAggregate [codegen id : 19] -Input [2]: [i_manufact_id#8, total_sales#20] +Input [2]: [i_manufact_id#8, total_sales#16] Keys [1]: [i_manufact_id#8] -Functions [1]: [partial_sum(total_sales#20)] -Aggregate Attributes [2]: [sum#47, isEmpty#48] -Results [3]: [i_manufact_id#8, sum#49, isEmpty#50] +Functions [1]: [partial_sum(total_sales#16)] +Aggregate Attributes [2]: [sum#41, isEmpty#42] +Results [3]: [i_manufact_id#8, sum#43, isEmpty#44] (61) Exchange -Input [3]: [i_manufact_id#8, sum#49, isEmpty#50] -Arguments: hashpartitioning(i_manufact_id#8, 5), ENSURE_REQUIREMENTS, [id=#51] +Input [3]: [i_manufact_id#8, sum#43, isEmpty#44] +Arguments: hashpartitioning(i_manufact_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=7] (62) HashAggregate [codegen id : 20] -Input [3]: [i_manufact_id#8, sum#49, isEmpty#50] +Input [3]: [i_manufact_id#8, sum#43, isEmpty#44] Keys [1]: [i_manufact_id#8] -Functions [1]: [sum(total_sales#20)] -Aggregate Attributes [1]: [sum(total_sales#20)#52] -Results [2]: [i_manufact_id#8, sum(total_sales#20)#52 AS total_sales#53] +Functions [1]: [sum(total_sales#16)] +Aggregate Attributes [1]: [sum(total_sales#16)#45] +Results [2]: [i_manufact_id#8, sum(total_sales#16)#45 AS total_sales#46] (63) TakeOrderedAndProject -Input [2]: [i_manufact_id#8, total_sales#53] -Arguments: 100, [total_sales#53 ASC NULLS FIRST], [i_manufact_id#8, total_sales#53] +Input [2]: [i_manufact_id#8, total_sales#46] +Arguments: 100, [total_sales#46 ASC NULLS FIRST], [i_manufact_id#8, total_sales#46] ===== Subqueries ===== @@ -367,29 +367,29 @@ BroadcastExchange (68) (64) Scan parquet default.date_dim -Output [3]: [d_date_sk#6, d_year#54, d_moy#55] +Output [3]: [d_date_sk#6, d_year#47, d_moy#48] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,5), IsNotNull(d_date_sk)] ReadSchema: struct (65) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] (66) Filter [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] -Condition : ((((isnotnull(d_year#54) AND isnotnull(d_moy#55)) AND (d_year#54 = 1998)) AND (d_moy#55 = 5)) AND isnotnull(d_date_sk#6)) +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] +Condition : ((((isnotnull(d_year#47) AND isnotnull(d_moy#48)) AND (d_year#47 = 1998)) AND (d_moy#48 = 5)) AND isnotnull(d_date_sk#6)) (67) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] (68) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#56] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] -Subquery:2 Hosting operator id = 29 Hosting Expression = cs_sold_date_sk#24 IN dynamicpruning#5 +Subquery:2 Hosting operator id = 29 Hosting Expression = cs_sold_date_sk#20 IN dynamicpruning#5 -Subquery:3 Hosting operator id = 44 Hosting Expression = ws_sold_date_sk#37 IN dynamicpruning#5 +Subquery:3 Hosting operator id = 44 Hosting Expression = ws_sold_date_sk#32 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt index 2dc29dcb3a78e..e7285697fcf83 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt @@ -111,7 +111,7 @@ Input [2]: [ca_address_sk#7, ca_gmt_offset#8] (11) BroadcastExchange Input [1]: [ca_address_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_addr_sk#2] @@ -123,238 +123,238 @@ Output [2]: [ss_item_sk#1, ss_ext_sales_price#3] Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#7] (14) Scan parquet default.item -Output [2]: [i_item_sk#10, i_manufact_id#11] +Output [2]: [i_item_sk#9, i_manufact_id#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#10, i_manufact_id#11] +Input [2]: [i_item_sk#9, i_manufact_id#10] (16) Filter [codegen id : 4] -Input [2]: [i_item_sk#10, i_manufact_id#11] -Condition : isnotnull(i_item_sk#10) +Input [2]: [i_item_sk#9, i_manufact_id#10] +Condition : isnotnull(i_item_sk#9) (17) Scan parquet default.item -Output [2]: [i_category#12, i_manufact_id#13] +Output [2]: [i_category#11, i_manufact_id#12] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Electronics )] ReadSchema: struct (18) ColumnarToRow [codegen id : 3] -Input [2]: [i_category#12, i_manufact_id#13] +Input [2]: [i_category#11, i_manufact_id#12] (19) Filter [codegen id : 3] -Input [2]: [i_category#12, i_manufact_id#13] -Condition : (isnotnull(i_category#12) AND (i_category#12 = Electronics )) +Input [2]: [i_category#11, i_manufact_id#12] +Condition : (isnotnull(i_category#11) AND (i_category#11 = Electronics )) (20) Project [codegen id : 3] -Output [1]: [i_manufact_id#13] -Input [2]: [i_category#12, i_manufact_id#13] +Output [1]: [i_manufact_id#12] +Input [2]: [i_category#11, i_manufact_id#12] (21) BroadcastExchange -Input [1]: [i_manufact_id#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Input [1]: [i_manufact_id#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (22) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_manufact_id#11] -Right keys [1]: [i_manufact_id#13] +Left keys [1]: [i_manufact_id#10] +Right keys [1]: [i_manufact_id#12] Join condition: None (23) BroadcastExchange -Input [2]: [i_item_sk#10, i_manufact_id#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] +Input [2]: [i_item_sk#9, i_manufact_id#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#10] +Right keys [1]: [i_item_sk#9] Join condition: None (25) Project [codegen id : 5] -Output [2]: [ss_ext_sales_price#3, i_manufact_id#11] -Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_manufact_id#11] +Output [2]: [ss_ext_sales_price#3, i_manufact_id#10] +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#9, i_manufact_id#10] (26) HashAggregate [codegen id : 5] -Input [2]: [ss_ext_sales_price#3, i_manufact_id#11] -Keys [1]: [i_manufact_id#11] +Input [2]: [ss_ext_sales_price#3, i_manufact_id#10] +Keys [1]: [i_manufact_id#10] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum#16] -Results [2]: [i_manufact_id#11, sum#17] +Aggregate Attributes [1]: [sum#13] +Results [2]: [i_manufact_id#10, sum#14] (27) Exchange -Input [2]: [i_manufact_id#11, sum#17] -Arguments: hashpartitioning(i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [2]: [i_manufact_id#10, sum#14] +Arguments: hashpartitioning(i_manufact_id#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 6] -Input [2]: [i_manufact_id#11, sum#17] -Keys [1]: [i_manufact_id#11] +Input [2]: [i_manufact_id#10, sum#14] +Keys [1]: [i_manufact_id#10] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#19] -Results [2]: [i_manufact_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#19,17,2) AS total_sales#20] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#15] +Results [2]: [i_manufact_id#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS total_sales#16] (29) Scan parquet default.catalog_sales -Output [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] +Output [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#24), dynamicpruningexpression(cs_sold_date_sk#24 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#20), dynamicpruningexpression(cs_sold_date_sk#20 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 11] -Input [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] +Input [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] (31) Filter [codegen id : 11] -Input [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] -Condition : (isnotnull(cs_bill_addr_sk#21) AND isnotnull(cs_item_sk#22)) +Input [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] +Condition : (isnotnull(cs_bill_addr_sk#17) AND isnotnull(cs_item_sk#18)) (32) ReusedExchange [Reuses operator id: 68] -Output [1]: [d_date_sk#25] +Output [1]: [d_date_sk#21] (33) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_sold_date_sk#24] -Right keys [1]: [d_date_sk#25] +Left keys [1]: [cs_sold_date_sk#20] +Right keys [1]: [d_date_sk#21] Join condition: None (34) Project [codegen id : 11] -Output [3]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23] -Input [5]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24, d_date_sk#25] +Output [3]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19] +Input [5]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20, d_date_sk#21] (35) ReusedExchange [Reuses operator id: 11] -Output [1]: [ca_address_sk#26] +Output [1]: [ca_address_sk#22] (36) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_bill_addr_sk#21] -Right keys [1]: [ca_address_sk#26] +Left keys [1]: [cs_bill_addr_sk#17] +Right keys [1]: [ca_address_sk#22] Join condition: None (37) Project [codegen id : 11] -Output [2]: [cs_item_sk#22, cs_ext_sales_price#23] -Input [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, ca_address_sk#26] +Output [2]: [cs_item_sk#18, cs_ext_sales_price#19] +Input [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, ca_address_sk#22] (38) ReusedExchange [Reuses operator id: 23] -Output [2]: [i_item_sk#27, i_manufact_id#28] +Output [2]: [i_item_sk#23, i_manufact_id#24] (39) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_item_sk#22] -Right keys [1]: [i_item_sk#27] +Left keys [1]: [cs_item_sk#18] +Right keys [1]: [i_item_sk#23] Join condition: None (40) Project [codegen id : 11] -Output [2]: [cs_ext_sales_price#23, i_manufact_id#28] -Input [4]: [cs_item_sk#22, cs_ext_sales_price#23, i_item_sk#27, i_manufact_id#28] +Output [2]: [cs_ext_sales_price#19, i_manufact_id#24] +Input [4]: [cs_item_sk#18, cs_ext_sales_price#19, i_item_sk#23, i_manufact_id#24] (41) HashAggregate [codegen id : 11] -Input [2]: [cs_ext_sales_price#23, i_manufact_id#28] -Keys [1]: [i_manufact_id#28] -Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#23))] -Aggregate Attributes [1]: [sum#29] -Results [2]: [i_manufact_id#28, sum#30] +Input [2]: [cs_ext_sales_price#19, i_manufact_id#24] +Keys [1]: [i_manufact_id#24] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#19))] +Aggregate Attributes [1]: [sum#25] +Results [2]: [i_manufact_id#24, sum#26] (42) Exchange -Input [2]: [i_manufact_id#28, sum#30] -Arguments: hashpartitioning(i_manufact_id#28, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [2]: [i_manufact_id#24, sum#26] +Arguments: hashpartitioning(i_manufact_id#24, 5), ENSURE_REQUIREMENTS, [plan_id=5] (43) HashAggregate [codegen id : 12] -Input [2]: [i_manufact_id#28, sum#30] -Keys [1]: [i_manufact_id#28] -Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#23))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#23))#32] -Results [2]: [i_manufact_id#28, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#23))#32,17,2) AS total_sales#33] +Input [2]: [i_manufact_id#24, sum#26] +Keys [1]: [i_manufact_id#24] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#19))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#19))#27] +Results [2]: [i_manufact_id#24, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#19))#27,17,2) AS total_sales#28] (44) Scan parquet default.web_sales -Output [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] +Output [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#37), dynamicpruningexpression(ws_sold_date_sk#37 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#32), dynamicpruningexpression(ws_sold_date_sk#32 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] ReadSchema: struct (45) ColumnarToRow [codegen id : 17] -Input [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] +Input [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] (46) Filter [codegen id : 17] -Input [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] -Condition : (isnotnull(ws_bill_addr_sk#35) AND isnotnull(ws_item_sk#34)) +Input [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] +Condition : (isnotnull(ws_bill_addr_sk#30) AND isnotnull(ws_item_sk#29)) (47) ReusedExchange [Reuses operator id: 68] -Output [1]: [d_date_sk#38] +Output [1]: [d_date_sk#33] (48) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_sold_date_sk#37] -Right keys [1]: [d_date_sk#38] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#33] Join condition: None (49) Project [codegen id : 17] -Output [3]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36] -Input [5]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37, d_date_sk#38] +Output [3]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31] +Input [5]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32, d_date_sk#33] (50) ReusedExchange [Reuses operator id: 11] -Output [1]: [ca_address_sk#39] +Output [1]: [ca_address_sk#34] (51) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_bill_addr_sk#35] -Right keys [1]: [ca_address_sk#39] +Left keys [1]: [ws_bill_addr_sk#30] +Right keys [1]: [ca_address_sk#34] Join condition: None (52) Project [codegen id : 17] -Output [2]: [ws_item_sk#34, ws_ext_sales_price#36] -Input [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ca_address_sk#39] +Output [2]: [ws_item_sk#29, ws_ext_sales_price#31] +Input [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ca_address_sk#34] (53) ReusedExchange [Reuses operator id: 23] -Output [2]: [i_item_sk#40, i_manufact_id#41] +Output [2]: [i_item_sk#35, i_manufact_id#36] (54) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_item_sk#34] -Right keys [1]: [i_item_sk#40] +Left keys [1]: [ws_item_sk#29] +Right keys [1]: [i_item_sk#35] Join condition: None (55) Project [codegen id : 17] -Output [2]: [ws_ext_sales_price#36, i_manufact_id#41] -Input [4]: [ws_item_sk#34, ws_ext_sales_price#36, i_item_sk#40, i_manufact_id#41] +Output [2]: [ws_ext_sales_price#31, i_manufact_id#36] +Input [4]: [ws_item_sk#29, ws_ext_sales_price#31, i_item_sk#35, i_manufact_id#36] (56) HashAggregate [codegen id : 17] -Input [2]: [ws_ext_sales_price#36, i_manufact_id#41] -Keys [1]: [i_manufact_id#41] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#36))] -Aggregate Attributes [1]: [sum#42] -Results [2]: [i_manufact_id#41, sum#43] +Input [2]: [ws_ext_sales_price#31, i_manufact_id#36] +Keys [1]: [i_manufact_id#36] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#31))] +Aggregate Attributes [1]: [sum#37] +Results [2]: [i_manufact_id#36, sum#38] (57) Exchange -Input [2]: [i_manufact_id#41, sum#43] -Arguments: hashpartitioning(i_manufact_id#41, 5), ENSURE_REQUIREMENTS, [id=#44] +Input [2]: [i_manufact_id#36, sum#38] +Arguments: hashpartitioning(i_manufact_id#36, 5), ENSURE_REQUIREMENTS, [plan_id=6] (58) HashAggregate [codegen id : 18] -Input [2]: [i_manufact_id#41, sum#43] -Keys [1]: [i_manufact_id#41] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#36))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#36))#45] -Results [2]: [i_manufact_id#41, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#36))#45,17,2) AS total_sales#46] +Input [2]: [i_manufact_id#36, sum#38] +Keys [1]: [i_manufact_id#36] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#31))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#31))#39] +Results [2]: [i_manufact_id#36, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#31))#39,17,2) AS total_sales#40] (59) Union (60) HashAggregate [codegen id : 19] -Input [2]: [i_manufact_id#11, total_sales#20] -Keys [1]: [i_manufact_id#11] -Functions [1]: [partial_sum(total_sales#20)] -Aggregate Attributes [2]: [sum#47, isEmpty#48] -Results [3]: [i_manufact_id#11, sum#49, isEmpty#50] +Input [2]: [i_manufact_id#10, total_sales#16] +Keys [1]: [i_manufact_id#10] +Functions [1]: [partial_sum(total_sales#16)] +Aggregate Attributes [2]: [sum#41, isEmpty#42] +Results [3]: [i_manufact_id#10, sum#43, isEmpty#44] (61) Exchange -Input [3]: [i_manufact_id#11, sum#49, isEmpty#50] -Arguments: hashpartitioning(i_manufact_id#11, 5), ENSURE_REQUIREMENTS, [id=#51] +Input [3]: [i_manufact_id#10, sum#43, isEmpty#44] +Arguments: hashpartitioning(i_manufact_id#10, 5), ENSURE_REQUIREMENTS, [plan_id=7] (62) HashAggregate [codegen id : 20] -Input [3]: [i_manufact_id#11, sum#49, isEmpty#50] -Keys [1]: [i_manufact_id#11] -Functions [1]: [sum(total_sales#20)] -Aggregate Attributes [1]: [sum(total_sales#20)#52] -Results [2]: [i_manufact_id#11, sum(total_sales#20)#52 AS total_sales#53] +Input [3]: [i_manufact_id#10, sum#43, isEmpty#44] +Keys [1]: [i_manufact_id#10] +Functions [1]: [sum(total_sales#16)] +Aggregate Attributes [1]: [sum(total_sales#16)#45] +Results [2]: [i_manufact_id#10, sum(total_sales#16)#45 AS total_sales#46] (63) TakeOrderedAndProject -Input [2]: [i_manufact_id#11, total_sales#53] -Arguments: 100, [total_sales#53 ASC NULLS FIRST], [i_manufact_id#11, total_sales#53] +Input [2]: [i_manufact_id#10, total_sales#46] +Arguments: 100, [total_sales#46 ASC NULLS FIRST], [i_manufact_id#10, total_sales#46] ===== Subqueries ===== @@ -367,29 +367,29 @@ BroadcastExchange (68) (64) Scan parquet default.date_dim -Output [3]: [d_date_sk#6, d_year#54, d_moy#55] +Output [3]: [d_date_sk#6, d_year#47, d_moy#48] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,5), IsNotNull(d_date_sk)] ReadSchema: struct (65) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] (66) Filter [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] -Condition : ((((isnotnull(d_year#54) AND isnotnull(d_moy#55)) AND (d_year#54 = 1998)) AND (d_moy#55 = 5)) AND isnotnull(d_date_sk#6)) +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] +Condition : ((((isnotnull(d_year#47) AND isnotnull(d_moy#48)) AND (d_year#47 = 1998)) AND (d_moy#48 = 5)) AND isnotnull(d_date_sk#6)) (67) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] (68) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#56] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] -Subquery:2 Hosting operator id = 29 Hosting Expression = cs_sold_date_sk#24 IN dynamicpruning#5 +Subquery:2 Hosting operator id = 29 Hosting Expression = cs_sold_date_sk#20 IN dynamicpruning#5 -Subquery:3 Hosting operator id = 44 Hosting Expression = ws_sold_date_sk#37 IN dynamicpruning#5 +Subquery:3 Hosting operator id = 44 Hosting Expression = ws_sold_date_sk#32 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/explain.txt index 796fc705e2651..5e39492758d51 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/explain.txt @@ -70,115 +70,115 @@ Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) (4) Exchange Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] -Arguments: hashpartitioning(c_customer_sk#3, 5), ENSURE_REQUIREMENTS, [id=#6] +Arguments: hashpartitioning(c_customer_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] Arguments: [c_customer_sk#3 ASC NULLS FIRST], false, 0 (6) Scan parquet default.store_sales -Output [2]: [ss_customer_sk#7, ss_sold_date_sk#8] +Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#8), dynamicpruningexpression(ss_sold_date_sk#8 IN dynamicpruning#9)] +PartitionFilters: [isnotnull(ss_sold_date_sk#7), dynamicpruningexpression(ss_sold_date_sk#7 IN dynamicpruning#8)] ReadSchema: struct (7) ColumnarToRow [codegen id : 4] -Input [2]: [ss_customer_sk#7, ss_sold_date_sk#8] +Input [2]: [ss_customer_sk#6, ss_sold_date_sk#7] (8) ReusedExchange [Reuses operator id: 58] -Output [1]: [d_date_sk#10] +Output [1]: [d_date_sk#9] (9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#10] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#9] Join condition: None (10) Project [codegen id : 4] -Output [1]: [ss_customer_sk#7] -Input [3]: [ss_customer_sk#7, ss_sold_date_sk#8, d_date_sk#10] +Output [1]: [ss_customer_sk#6] +Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#9] (11) Exchange -Input [1]: [ss_customer_sk#7] -Arguments: hashpartitioning(ss_customer_sk#7, 5), ENSURE_REQUIREMENTS, [id=#11] +Input [1]: [ss_customer_sk#6] +Arguments: hashpartitioning(ss_customer_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) Sort [codegen id : 5] -Input [1]: [ss_customer_sk#7] -Arguments: [ss_customer_sk#7 ASC NULLS FIRST], false, 0 +Input [1]: [ss_customer_sk#6] +Arguments: [ss_customer_sk#6 ASC NULLS FIRST], false, 0 (13) SortMergeJoin [codegen id : 6] Left keys [1]: [c_customer_sk#3] -Right keys [1]: [ss_customer_sk#7] +Right keys [1]: [ss_customer_sk#6] Join condition: None (14) Scan parquet default.web_sales -Output [2]: [ws_bill_customer_sk#12, ws_sold_date_sk#13] +Output [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#13), dynamicpruningexpression(ws_sold_date_sk#13 IN dynamicpruning#9)] +PartitionFilters: [isnotnull(ws_sold_date_sk#11), dynamicpruningexpression(ws_sold_date_sk#11 IN dynamicpruning#8)] ReadSchema: struct (15) ColumnarToRow [codegen id : 8] -Input [2]: [ws_bill_customer_sk#12, ws_sold_date_sk#13] +Input [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] (16) ReusedExchange [Reuses operator id: 58] -Output [1]: [d_date_sk#14] +Output [1]: [d_date_sk#12] (17) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_sold_date_sk#13] -Right keys [1]: [d_date_sk#14] +Left keys [1]: [ws_sold_date_sk#11] +Right keys [1]: [d_date_sk#12] Join condition: None (18) Project [codegen id : 8] -Output [1]: [ws_bill_customer_sk#12] -Input [3]: [ws_bill_customer_sk#12, ws_sold_date_sk#13, d_date_sk#14] +Output [1]: [ws_bill_customer_sk#10] +Input [3]: [ws_bill_customer_sk#10, ws_sold_date_sk#11, d_date_sk#12] (19) Exchange -Input [1]: [ws_bill_customer_sk#12] -Arguments: hashpartitioning(ws_bill_customer_sk#12, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [1]: [ws_bill_customer_sk#10] +Arguments: hashpartitioning(ws_bill_customer_sk#10, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) Sort [codegen id : 9] -Input [1]: [ws_bill_customer_sk#12] -Arguments: [ws_bill_customer_sk#12 ASC NULLS FIRST], false, 0 +Input [1]: [ws_bill_customer_sk#10] +Arguments: [ws_bill_customer_sk#10 ASC NULLS FIRST], false, 0 (21) SortMergeJoin [codegen id : 10] Left keys [1]: [c_customer_sk#3] -Right keys [1]: [ws_bill_customer_sk#12] +Right keys [1]: [ws_bill_customer_sk#10] Join condition: None (22) Scan parquet default.catalog_sales -Output [2]: [cs_ship_customer_sk#16, cs_sold_date_sk#17] +Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#17), dynamicpruningexpression(cs_sold_date_sk#17 IN dynamicpruning#9)] +PartitionFilters: [isnotnull(cs_sold_date_sk#14), dynamicpruningexpression(cs_sold_date_sk#14 IN dynamicpruning#8)] ReadSchema: struct (23) ColumnarToRow [codegen id : 12] -Input [2]: [cs_ship_customer_sk#16, cs_sold_date_sk#17] +Input [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] (24) ReusedExchange [Reuses operator id: 58] -Output [1]: [d_date_sk#18] +Output [1]: [d_date_sk#15] (25) BroadcastHashJoin [codegen id : 12] -Left keys [1]: [cs_sold_date_sk#17] -Right keys [1]: [d_date_sk#18] +Left keys [1]: [cs_sold_date_sk#14] +Right keys [1]: [d_date_sk#15] Join condition: None (26) Project [codegen id : 12] -Output [1]: [cs_ship_customer_sk#16] -Input [3]: [cs_ship_customer_sk#16, cs_sold_date_sk#17, d_date_sk#18] +Output [1]: [cs_ship_customer_sk#13] +Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15] (27) Exchange -Input [1]: [cs_ship_customer_sk#16] -Arguments: hashpartitioning(cs_ship_customer_sk#16, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [1]: [cs_ship_customer_sk#13] +Arguments: hashpartitioning(cs_ship_customer_sk#13, 5), ENSURE_REQUIREMENTS, [plan_id=4] (28) Sort [codegen id : 13] -Input [1]: [cs_ship_customer_sk#16] -Arguments: [cs_ship_customer_sk#16 ASC NULLS FIRST], false, 0 +Input [1]: [cs_ship_customer_sk#13] +Arguments: [cs_ship_customer_sk#13 ASC NULLS FIRST], false, 0 (29) SortMergeJoin [codegen id : 14] Left keys [1]: [c_customer_sk#3] -Right keys [1]: [cs_ship_customer_sk#16] +Right keys [1]: [cs_ship_customer_sk#13] Join condition: None (30) Filter [codegen id : 14] @@ -191,107 +191,107 @@ Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2 (32) Exchange Input [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] -Arguments: hashpartitioning(c_current_addr_sk#5, 5), ENSURE_REQUIREMENTS, [id=#20] +Arguments: hashpartitioning(c_current_addr_sk#5, 5), ENSURE_REQUIREMENTS, [plan_id=5] (33) Sort [codegen id : 15] Input [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] Arguments: [c_current_addr_sk#5 ASC NULLS FIRST], false, 0 (34) Scan parquet default.customer_address -Output [2]: [ca_address_sk#21, ca_state#22] +Output [2]: [ca_address_sk#16, ca_state#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 16] -Input [2]: [ca_address_sk#21, ca_state#22] +Input [2]: [ca_address_sk#16, ca_state#17] (36) Filter [codegen id : 16] -Input [2]: [ca_address_sk#21, ca_state#22] -Condition : isnotnull(ca_address_sk#21) +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : isnotnull(ca_address_sk#16) (37) Exchange -Input [2]: [ca_address_sk#21, ca_state#22] -Arguments: hashpartitioning(ca_address_sk#21, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [2]: [ca_address_sk#16, ca_state#17] +Arguments: hashpartitioning(ca_address_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=6] (38) Sort [codegen id : 17] -Input [2]: [ca_address_sk#21, ca_state#22] -Arguments: [ca_address_sk#21 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#16, ca_state#17] +Arguments: [ca_address_sk#16 ASC NULLS FIRST], false, 0 (39) SortMergeJoin [codegen id : 18] Left keys [1]: [c_current_addr_sk#5] -Right keys [1]: [ca_address_sk#21] +Right keys [1]: [ca_address_sk#16] Join condition: None (40) Project [codegen id : 18] -Output [2]: [c_current_cdemo_sk#4, ca_state#22] -Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#21, ca_state#22] +Output [2]: [c_current_cdemo_sk#4, ca_state#17] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#16, ca_state#17] (41) Exchange -Input [2]: [c_current_cdemo_sk#4, ca_state#22] -Arguments: hashpartitioning(c_current_cdemo_sk#4, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [2]: [c_current_cdemo_sk#4, ca_state#17] +Arguments: hashpartitioning(c_current_cdemo_sk#4, 5), ENSURE_REQUIREMENTS, [plan_id=7] (42) Sort [codegen id : 19] -Input [2]: [c_current_cdemo_sk#4, ca_state#22] +Input [2]: [c_current_cdemo_sk#4, ca_state#17] Arguments: [c_current_cdemo_sk#4 ASC NULLS FIRST], false, 0 (43) Scan parquet default.customer_demographics -Output [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Output [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 20] -Input [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] (45) Filter [codegen id : 20] -Input [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Condition : isnotnull(cd_demo_sk#25) +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Condition : isnotnull(cd_demo_sk#18) (46) Exchange -Input [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Arguments: hashpartitioning(cd_demo_sk#25, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Arguments: hashpartitioning(cd_demo_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=8] (47) Sort [codegen id : 21] -Input [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Arguments: [cd_demo_sk#25 ASC NULLS FIRST], false, 0 +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Arguments: [cd_demo_sk#18 ASC NULLS FIRST], false, 0 (48) SortMergeJoin [codegen id : 22] Left keys [1]: [c_current_cdemo_sk#4] -Right keys [1]: [cd_demo_sk#25] +Right keys [1]: [cd_demo_sk#18] Join condition: None (49) Project [codegen id : 22] -Output [6]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Input [8]: [c_current_cdemo_sk#4, ca_state#22, cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Output [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Input [8]: [c_current_cdemo_sk#4, ca_state#17, cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] (50) HashAggregate [codegen id : 22] -Input [6]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Keys [6]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Functions [10]: [partial_count(1), partial_min(cd_dep_count#28), partial_max(cd_dep_count#28), partial_avg(cd_dep_count#28), partial_min(cd_dep_employed_count#29), partial_max(cd_dep_employed_count#29), partial_avg(cd_dep_employed_count#29), partial_min(cd_dep_college_count#30), partial_max(cd_dep_college_count#30), partial_avg(cd_dep_college_count#30)] -Aggregate Attributes [13]: [count#32, min#33, max#34, sum#35, count#36, min#37, max#38, sum#39, count#40, min#41, max#42, sum#43, count#44] -Results [19]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#45, min#46, max#47, sum#48, count#49, min#50, max#51, sum#52, count#53, min#54, max#55, sum#56, count#57] +Input [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Keys [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Functions [10]: [partial_count(1), partial_min(cd_dep_count#21), partial_max(cd_dep_count#21), partial_avg(cd_dep_count#21), partial_min(cd_dep_employed_count#22), partial_max(cd_dep_employed_count#22), partial_avg(cd_dep_employed_count#22), partial_min(cd_dep_college_count#23), partial_max(cd_dep_college_count#23), partial_avg(cd_dep_college_count#23)] +Aggregate Attributes [13]: [count#24, min#25, max#26, sum#27, count#28, min#29, max#30, sum#31, count#32, min#33, max#34, sum#35, count#36] +Results [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, min#38, max#39, sum#40, count#41, min#42, max#43, sum#44, count#45, min#46, max#47, sum#48, count#49] (51) Exchange -Input [19]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#45, min#46, max#47, sum#48, count#49, min#50, max#51, sum#52, count#53, min#54, max#55, sum#56, count#57] -Arguments: hashpartitioning(ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, 5), ENSURE_REQUIREMENTS, [id=#58] +Input [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, min#38, max#39, sum#40, count#41, min#42, max#43, sum#44, count#45, min#46, max#47, sum#48, count#49] +Arguments: hashpartitioning(ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, 5), ENSURE_REQUIREMENTS, [plan_id=9] (52) HashAggregate [codegen id : 23] -Input [19]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#45, min#46, max#47, sum#48, count#49, min#50, max#51, sum#52, count#53, min#54, max#55, sum#56, count#57] -Keys [6]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Functions [10]: [count(1), min(cd_dep_count#28), max(cd_dep_count#28), avg(cd_dep_count#28), min(cd_dep_employed_count#29), max(cd_dep_employed_count#29), avg(cd_dep_employed_count#29), min(cd_dep_college_count#30), max(cd_dep_college_count#30), avg(cd_dep_college_count#30)] -Aggregate Attributes [10]: [count(1)#59, min(cd_dep_count#28)#60, max(cd_dep_count#28)#61, avg(cd_dep_count#28)#62, min(cd_dep_employed_count#29)#63, max(cd_dep_employed_count#29)#64, avg(cd_dep_employed_count#29)#65, min(cd_dep_college_count#30)#66, max(cd_dep_college_count#30)#67, avg(cd_dep_college_count#30)#68] -Results [18]: [ca_state#22, cd_gender#26, cd_marital_status#27, count(1)#59 AS cnt1#69, min(cd_dep_count#28)#60 AS min(cd_dep_count)#70, max(cd_dep_count#28)#61 AS max(cd_dep_count)#71, avg(cd_dep_count#28)#62 AS avg(cd_dep_count)#72, cd_dep_employed_count#29, count(1)#59 AS cnt2#73, min(cd_dep_employed_count#29)#63 AS min(cd_dep_employed_count)#74, max(cd_dep_employed_count#29)#64 AS max(cd_dep_employed_count)#75, avg(cd_dep_employed_count#29)#65 AS avg(cd_dep_employed_count)#76, cd_dep_college_count#30, count(1)#59 AS cnt3#77, min(cd_dep_college_count#30)#66 AS min(cd_dep_college_count)#78, max(cd_dep_college_count#30)#67 AS max(cd_dep_college_count)#79, avg(cd_dep_college_count#30)#68 AS avg(cd_dep_college_count)#80, cd_dep_count#28] +Input [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, min#38, max#39, sum#40, count#41, min#42, max#43, sum#44, count#45, min#46, max#47, sum#48, count#49] +Keys [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Functions [10]: [count(1), min(cd_dep_count#21), max(cd_dep_count#21), avg(cd_dep_count#21), min(cd_dep_employed_count#22), max(cd_dep_employed_count#22), avg(cd_dep_employed_count#22), min(cd_dep_college_count#23), max(cd_dep_college_count#23), avg(cd_dep_college_count#23)] +Aggregate Attributes [10]: [count(1)#50, min(cd_dep_count#21)#51, max(cd_dep_count#21)#52, avg(cd_dep_count#21)#53, min(cd_dep_employed_count#22)#54, max(cd_dep_employed_count#22)#55, avg(cd_dep_employed_count#22)#56, min(cd_dep_college_count#23)#57, max(cd_dep_college_count#23)#58, avg(cd_dep_college_count#23)#59] +Results [18]: [ca_state#17, cd_gender#19, cd_marital_status#20, count(1)#50 AS cnt1#60, min(cd_dep_count#21)#51 AS min(cd_dep_count)#61, max(cd_dep_count#21)#52 AS max(cd_dep_count)#62, avg(cd_dep_count#21)#53 AS avg(cd_dep_count)#63, cd_dep_employed_count#22, count(1)#50 AS cnt2#64, min(cd_dep_employed_count#22)#54 AS min(cd_dep_employed_count)#65, max(cd_dep_employed_count#22)#55 AS max(cd_dep_employed_count)#66, avg(cd_dep_employed_count#22)#56 AS avg(cd_dep_employed_count)#67, cd_dep_college_count#23, count(1)#50 AS cnt3#68, min(cd_dep_college_count#23)#57 AS min(cd_dep_college_count)#69, max(cd_dep_college_count#23)#58 AS max(cd_dep_college_count)#70, avg(cd_dep_college_count#23)#59 AS avg(cd_dep_college_count)#71, cd_dep_count#21] (53) TakeOrderedAndProject -Input [18]: [ca_state#22, cd_gender#26, cd_marital_status#27, cnt1#69, min(cd_dep_count)#70, max(cd_dep_count)#71, avg(cd_dep_count)#72, cd_dep_employed_count#29, cnt2#73, min(cd_dep_employed_count)#74, max(cd_dep_employed_count)#75, avg(cd_dep_employed_count)#76, cd_dep_college_count#30, cnt3#77, min(cd_dep_college_count)#78, max(cd_dep_college_count)#79, avg(cd_dep_college_count)#80, cd_dep_count#28] -Arguments: 100, [ca_state#22 ASC NULLS FIRST, cd_gender#26 ASC NULLS FIRST, cd_marital_status#27 ASC NULLS FIRST, cd_dep_count#28 ASC NULLS FIRST, cd_dep_employed_count#29 ASC NULLS FIRST, cd_dep_college_count#30 ASC NULLS FIRST], [ca_state#22, cd_gender#26, cd_marital_status#27, cnt1#69, min(cd_dep_count)#70, max(cd_dep_count)#71, avg(cd_dep_count)#72, cd_dep_employed_count#29, cnt2#73, min(cd_dep_employed_count)#74, max(cd_dep_employed_count)#75, avg(cd_dep_employed_count)#76, cd_dep_college_count#30, cnt3#77, min(cd_dep_college_count)#78, max(cd_dep_college_count)#79, avg(cd_dep_college_count)#80] +Input [18]: [ca_state#17, cd_gender#19, cd_marital_status#20, cnt1#60, min(cd_dep_count)#61, max(cd_dep_count)#62, avg(cd_dep_count)#63, cd_dep_employed_count#22, cnt2#64, min(cd_dep_employed_count)#65, max(cd_dep_employed_count)#66, avg(cd_dep_employed_count)#67, cd_dep_college_count#23, cnt3#68, min(cd_dep_college_count)#69, max(cd_dep_college_count)#70, avg(cd_dep_college_count)#71, cd_dep_count#21] +Arguments: 100, [ca_state#17 ASC NULLS FIRST, cd_gender#19 ASC NULLS FIRST, cd_marital_status#20 ASC NULLS FIRST, cd_dep_count#21 ASC NULLS FIRST, cd_dep_employed_count#22 ASC NULLS FIRST, cd_dep_college_count#23 ASC NULLS FIRST], [ca_state#17, cd_gender#19, cd_marital_status#20, cnt1#60, min(cd_dep_count)#61, max(cd_dep_count)#62, avg(cd_dep_count)#63, cd_dep_employed_count#22, cnt2#64, min(cd_dep_employed_count)#65, max(cd_dep_employed_count)#66, avg(cd_dep_employed_count)#67, cd_dep_college_count#23, cnt3#68, min(cd_dep_college_count)#69, max(cd_dep_college_count)#70, avg(cd_dep_college_count)#71] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#8 IN dynamicpruning#9 +Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8 BroadcastExchange (58) +- * Project (57) +- * Filter (56) @@ -300,29 +300,29 @@ BroadcastExchange (58) (54) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#81, d_qoy#82] +Output [3]: [d_date_sk#9, d_year#72, d_qoy#73] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)] ReadSchema: struct (55) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#81, d_qoy#82] +Input [3]: [d_date_sk#9, d_year#72, d_qoy#73] (56) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#81, d_qoy#82] -Condition : ((((isnotnull(d_year#81) AND isnotnull(d_qoy#82)) AND (d_year#81 = 2002)) AND (d_qoy#82 < 4)) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#9, d_year#72, d_qoy#73] +Condition : ((((isnotnull(d_year#72) AND isnotnull(d_qoy#73)) AND (d_year#72 = 2002)) AND (d_qoy#73 < 4)) AND isnotnull(d_date_sk#9)) (57) Project [codegen id : 1] -Output [1]: [d_date_sk#10] -Input [3]: [d_date_sk#10, d_year#81, d_qoy#82] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#72, d_qoy#73] (58) BroadcastExchange -Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#83] +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] -Subquery:2 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#13 IN dynamicpruning#9 +Subquery:2 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#11 IN dynamicpruning#8 -Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#17 IN dynamicpruning#9 +Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#14 IN dynamicpruning#8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt index e5a9030386262..0375a3a65954d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt @@ -81,7 +81,7 @@ Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#9] (9) BroadcastExchange Input [1]: [ss_customer_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (10) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#3] @@ -89,65 +89,65 @@ Right keys [1]: [ss_customer_sk#6] Join condition: None (11) Scan parquet default.web_sales -Output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Output [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#12), dynamicpruningexpression(ws_sold_date_sk#12 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(ws_sold_date_sk#11), dynamicpruningexpression(ws_sold_date_sk#11 IN dynamicpruning#8)] ReadSchema: struct (12) ColumnarToRow [codegen id : 4] -Input [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Input [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] (13) ReusedExchange [Reuses operator id: 47] -Output [1]: [d_date_sk#13] +Output [1]: [d_date_sk#12] (14) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ws_sold_date_sk#12] -Right keys [1]: [d_date_sk#13] +Left keys [1]: [ws_sold_date_sk#11] +Right keys [1]: [d_date_sk#12] Join condition: None (15) Project [codegen id : 4] -Output [1]: [ws_bill_customer_sk#11] -Input [3]: [ws_bill_customer_sk#11, ws_sold_date_sk#12, d_date_sk#13] +Output [1]: [ws_bill_customer_sk#10] +Input [3]: [ws_bill_customer_sk#10, ws_sold_date_sk#11, d_date_sk#12] (16) BroadcastExchange -Input [1]: [ws_bill_customer_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Input [1]: [ws_bill_customer_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#3] -Right keys [1]: [ws_bill_customer_sk#11] +Right keys [1]: [ws_bill_customer_sk#10] Join condition: None (18) Scan parquet default.catalog_sales -Output [2]: [cs_ship_customer_sk#15, cs_sold_date_sk#16] +Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#16), dynamicpruningexpression(cs_sold_date_sk#16 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(cs_sold_date_sk#14), dynamicpruningexpression(cs_sold_date_sk#14 IN dynamicpruning#8)] ReadSchema: struct (19) ColumnarToRow [codegen id : 6] -Input [2]: [cs_ship_customer_sk#15, cs_sold_date_sk#16] +Input [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] (20) ReusedExchange [Reuses operator id: 47] -Output [1]: [d_date_sk#17] +Output [1]: [d_date_sk#15] (21) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_sold_date_sk#16] -Right keys [1]: [d_date_sk#17] +Left keys [1]: [cs_sold_date_sk#14] +Right keys [1]: [d_date_sk#15] Join condition: None (22) Project [codegen id : 6] -Output [1]: [cs_ship_customer_sk#15] -Input [3]: [cs_ship_customer_sk#15, cs_sold_date_sk#16, d_date_sk#17] +Output [1]: [cs_ship_customer_sk#13] +Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15] (23) BroadcastExchange -Input [1]: [cs_ship_customer_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [1]: [cs_ship_customer_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#3] -Right keys [1]: [cs_ship_customer_sk#15] +Right keys [1]: [cs_ship_customer_sk#13] Join condition: None (25) Filter [codegen id : 9] @@ -159,80 +159,80 @@ Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] (27) Scan parquet default.customer_address -Output [2]: [ca_address_sk#19, ca_state#20] +Output [2]: [ca_address_sk#16, ca_state#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 7] -Input [2]: [ca_address_sk#19, ca_state#20] +Input [2]: [ca_address_sk#16, ca_state#17] (29) Filter [codegen id : 7] -Input [2]: [ca_address_sk#19, ca_state#20] -Condition : isnotnull(ca_address_sk#19) +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : isnotnull(ca_address_sk#16) (30) BroadcastExchange -Input [2]: [ca_address_sk#19, ca_state#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] +Input [2]: [ca_address_sk#16, ca_state#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (31) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_current_addr_sk#5] -Right keys [1]: [ca_address_sk#19] +Right keys [1]: [ca_address_sk#16] Join condition: None (32) Project [codegen id : 9] -Output [2]: [c_current_cdemo_sk#4, ca_state#20] -Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#19, ca_state#20] +Output [2]: [c_current_cdemo_sk#4, ca_state#17] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#16, ca_state#17] (33) Scan parquet default.customer_demographics -Output [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Output [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (34) ColumnarToRow [codegen id : 8] -Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] (35) Filter [codegen id : 8] -Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Condition : isnotnull(cd_demo_sk#22) +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Condition : isnotnull(cd_demo_sk#18) (36) BroadcastExchange -Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (37) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_current_cdemo_sk#4] -Right keys [1]: [cd_demo_sk#22] +Right keys [1]: [cd_demo_sk#18] Join condition: None (38) Project [codegen id : 9] -Output [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Input [8]: [c_current_cdemo_sk#4, ca_state#20, cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Output [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Input [8]: [c_current_cdemo_sk#4, ca_state#17, cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] (39) HashAggregate [codegen id : 9] -Input [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Keys [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Functions [10]: [partial_count(1), partial_min(cd_dep_count#25), partial_max(cd_dep_count#25), partial_avg(cd_dep_count#25), partial_min(cd_dep_employed_count#26), partial_max(cd_dep_employed_count#26), partial_avg(cd_dep_employed_count#26), partial_min(cd_dep_college_count#27), partial_max(cd_dep_college_count#27), partial_avg(cd_dep_college_count#27)] -Aggregate Attributes [13]: [count#29, min#30, max#31, sum#32, count#33, min#34, max#35, sum#36, count#37, min#38, max#39, sum#40, count#41] -Results [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50, min#51, max#52, sum#53, count#54] +Input [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Keys [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Functions [10]: [partial_count(1), partial_min(cd_dep_count#21), partial_max(cd_dep_count#21), partial_avg(cd_dep_count#21), partial_min(cd_dep_employed_count#22), partial_max(cd_dep_employed_count#22), partial_avg(cd_dep_employed_count#22), partial_min(cd_dep_college_count#23), partial_max(cd_dep_college_count#23), partial_avg(cd_dep_college_count#23)] +Aggregate Attributes [13]: [count#24, min#25, max#26, sum#27, count#28, min#29, max#30, sum#31, count#32, min#33, max#34, sum#35, count#36] +Results [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, min#38, max#39, sum#40, count#41, min#42, max#43, sum#44, count#45, min#46, max#47, sum#48, count#49] (40) Exchange -Input [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50, min#51, max#52, sum#53, count#54] -Arguments: hashpartitioning(ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, 5), ENSURE_REQUIREMENTS, [id=#55] +Input [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, min#38, max#39, sum#40, count#41, min#42, max#43, sum#44, count#45, min#46, max#47, sum#48, count#49] +Arguments: hashpartitioning(ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, 5), ENSURE_REQUIREMENTS, [plan_id=6] (41) HashAggregate [codegen id : 10] -Input [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, min#43, max#44, sum#45, count#46, min#47, max#48, sum#49, count#50, min#51, max#52, sum#53, count#54] -Keys [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Functions [10]: [count(1), min(cd_dep_count#25), max(cd_dep_count#25), avg(cd_dep_count#25), min(cd_dep_employed_count#26), max(cd_dep_employed_count#26), avg(cd_dep_employed_count#26), min(cd_dep_college_count#27), max(cd_dep_college_count#27), avg(cd_dep_college_count#27)] -Aggregate Attributes [10]: [count(1)#56, min(cd_dep_count#25)#57, max(cd_dep_count#25)#58, avg(cd_dep_count#25)#59, min(cd_dep_employed_count#26)#60, max(cd_dep_employed_count#26)#61, avg(cd_dep_employed_count#26)#62, min(cd_dep_college_count#27)#63, max(cd_dep_college_count#27)#64, avg(cd_dep_college_count#27)#65] -Results [18]: [ca_state#20, cd_gender#23, cd_marital_status#24, count(1)#56 AS cnt1#66, min(cd_dep_count#25)#57 AS min(cd_dep_count)#67, max(cd_dep_count#25)#58 AS max(cd_dep_count)#68, avg(cd_dep_count#25)#59 AS avg(cd_dep_count)#69, cd_dep_employed_count#26, count(1)#56 AS cnt2#70, min(cd_dep_employed_count#26)#60 AS min(cd_dep_employed_count)#71, max(cd_dep_employed_count#26)#61 AS max(cd_dep_employed_count)#72, avg(cd_dep_employed_count#26)#62 AS avg(cd_dep_employed_count)#73, cd_dep_college_count#27, count(1)#56 AS cnt3#74, min(cd_dep_college_count#27)#63 AS min(cd_dep_college_count)#75, max(cd_dep_college_count#27)#64 AS max(cd_dep_college_count)#76, avg(cd_dep_college_count#27)#65 AS avg(cd_dep_college_count)#77, cd_dep_count#25] +Input [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, min#38, max#39, sum#40, count#41, min#42, max#43, sum#44, count#45, min#46, max#47, sum#48, count#49] +Keys [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Functions [10]: [count(1), min(cd_dep_count#21), max(cd_dep_count#21), avg(cd_dep_count#21), min(cd_dep_employed_count#22), max(cd_dep_employed_count#22), avg(cd_dep_employed_count#22), min(cd_dep_college_count#23), max(cd_dep_college_count#23), avg(cd_dep_college_count#23)] +Aggregate Attributes [10]: [count(1)#50, min(cd_dep_count#21)#51, max(cd_dep_count#21)#52, avg(cd_dep_count#21)#53, min(cd_dep_employed_count#22)#54, max(cd_dep_employed_count#22)#55, avg(cd_dep_employed_count#22)#56, min(cd_dep_college_count#23)#57, max(cd_dep_college_count#23)#58, avg(cd_dep_college_count#23)#59] +Results [18]: [ca_state#17, cd_gender#19, cd_marital_status#20, count(1)#50 AS cnt1#60, min(cd_dep_count#21)#51 AS min(cd_dep_count)#61, max(cd_dep_count#21)#52 AS max(cd_dep_count)#62, avg(cd_dep_count#21)#53 AS avg(cd_dep_count)#63, cd_dep_employed_count#22, count(1)#50 AS cnt2#64, min(cd_dep_employed_count#22)#54 AS min(cd_dep_employed_count)#65, max(cd_dep_employed_count#22)#55 AS max(cd_dep_employed_count)#66, avg(cd_dep_employed_count#22)#56 AS avg(cd_dep_employed_count)#67, cd_dep_college_count#23, count(1)#50 AS cnt3#68, min(cd_dep_college_count#23)#57 AS min(cd_dep_college_count)#69, max(cd_dep_college_count#23)#58 AS max(cd_dep_college_count)#70, avg(cd_dep_college_count#23)#59 AS avg(cd_dep_college_count)#71, cd_dep_count#21] (42) TakeOrderedAndProject -Input [18]: [ca_state#20, cd_gender#23, cd_marital_status#24, cnt1#66, min(cd_dep_count)#67, max(cd_dep_count)#68, avg(cd_dep_count)#69, cd_dep_employed_count#26, cnt2#70, min(cd_dep_employed_count)#71, max(cd_dep_employed_count)#72, avg(cd_dep_employed_count)#73, cd_dep_college_count#27, cnt3#74, min(cd_dep_college_count)#75, max(cd_dep_college_count)#76, avg(cd_dep_college_count)#77, cd_dep_count#25] -Arguments: 100, [ca_state#20 ASC NULLS FIRST, cd_gender#23 ASC NULLS FIRST, cd_marital_status#24 ASC NULLS FIRST, cd_dep_count#25 ASC NULLS FIRST, cd_dep_employed_count#26 ASC NULLS FIRST, cd_dep_college_count#27 ASC NULLS FIRST], [ca_state#20, cd_gender#23, cd_marital_status#24, cnt1#66, min(cd_dep_count)#67, max(cd_dep_count)#68, avg(cd_dep_count)#69, cd_dep_employed_count#26, cnt2#70, min(cd_dep_employed_count)#71, max(cd_dep_employed_count)#72, avg(cd_dep_employed_count)#73, cd_dep_college_count#27, cnt3#74, min(cd_dep_college_count)#75, max(cd_dep_college_count)#76, avg(cd_dep_college_count)#77] +Input [18]: [ca_state#17, cd_gender#19, cd_marital_status#20, cnt1#60, min(cd_dep_count)#61, max(cd_dep_count)#62, avg(cd_dep_count)#63, cd_dep_employed_count#22, cnt2#64, min(cd_dep_employed_count)#65, max(cd_dep_employed_count)#66, avg(cd_dep_employed_count)#67, cd_dep_college_count#23, cnt3#68, min(cd_dep_college_count)#69, max(cd_dep_college_count)#70, avg(cd_dep_college_count)#71, cd_dep_count#21] +Arguments: 100, [ca_state#17 ASC NULLS FIRST, cd_gender#19 ASC NULLS FIRST, cd_marital_status#20 ASC NULLS FIRST, cd_dep_count#21 ASC NULLS FIRST, cd_dep_employed_count#22 ASC NULLS FIRST, cd_dep_college_count#23 ASC NULLS FIRST], [ca_state#17, cd_gender#19, cd_marital_status#20, cnt1#60, min(cd_dep_count)#61, max(cd_dep_count)#62, avg(cd_dep_count)#63, cd_dep_employed_count#22, cnt2#64, min(cd_dep_employed_count)#65, max(cd_dep_employed_count)#66, avg(cd_dep_employed_count)#67, cd_dep_college_count#23, cnt3#68, min(cd_dep_college_count)#69, max(cd_dep_college_count)#70, avg(cd_dep_college_count)#71] ===== Subqueries ===== @@ -245,29 +245,29 @@ BroadcastExchange (47) (43) Scan parquet default.date_dim -Output [3]: [d_date_sk#9, d_year#78, d_qoy#79] +Output [3]: [d_date_sk#9, d_year#72, d_qoy#73] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#9, d_year#78, d_qoy#79] +Input [3]: [d_date_sk#9, d_year#72, d_qoy#73] (45) Filter [codegen id : 1] -Input [3]: [d_date_sk#9, d_year#78, d_qoy#79] -Condition : ((((isnotnull(d_year#78) AND isnotnull(d_qoy#79)) AND (d_year#78 = 2002)) AND (d_qoy#79 < 4)) AND isnotnull(d_date_sk#9)) +Input [3]: [d_date_sk#9, d_year#72, d_qoy#73] +Condition : ((((isnotnull(d_year#72) AND isnotnull(d_qoy#73)) AND (d_year#72 = 2002)) AND (d_qoy#73 < 4)) AND isnotnull(d_date_sk#9)) (46) Project [codegen id : 1] Output [1]: [d_date_sk#9] -Input [3]: [d_date_sk#9, d_year#78, d_qoy#79] +Input [3]: [d_date_sk#9, d_year#72, d_qoy#73] (47) BroadcastExchange Input [1]: [d_date_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#80] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] -Subquery:2 Hosting operator id = 11 Hosting Expression = ws_sold_date_sk#12 IN dynamicpruning#8 +Subquery:2 Hosting operator id = 11 Hosting Expression = ws_sold_date_sk#11 IN dynamicpruning#8 -Subquery:3 Hosting operator id = 18 Hosting Expression = cs_sold_date_sk#16 IN dynamicpruning#8 +Subquery:3 Hosting operator id = 18 Hosting Expression = cs_sold_date_sk#14 IN dynamicpruning#8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/explain.txt index 81050cfbb4475..facdf679490c2 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/explain.txt @@ -76,7 +76,7 @@ Input [2]: [s_store_sk#8, s_state#9] (11) BroadcastExchange Input [1]: [s_store_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#2] @@ -88,73 +88,73 @@ Output [3]: [ss_item_sk#1, ss_ext_sales_price#3, ss_net_profit#4] Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] (14) Scan parquet default.item -Output [3]: [i_item_sk#11, i_class#12, i_category#13] +Output [3]: [i_item_sk#10, i_class#11, i_category#12] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [3]: [i_item_sk#11, i_class#12, i_category#13] +Input [3]: [i_item_sk#10, i_class#11, i_category#12] (16) Filter [codegen id : 3] -Input [3]: [i_item_sk#11, i_class#12, i_category#13] -Condition : isnotnull(i_item_sk#11) +Input [3]: [i_item_sk#10, i_class#11, i_category#12] +Condition : isnotnull(i_item_sk#10) (17) BroadcastExchange -Input [3]: [i_item_sk#11, i_class#12, i_category#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Input [3]: [i_item_sk#10, i_class#11, i_category#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#11] +Right keys [1]: [i_item_sk#10] Join condition: None (19) Project [codegen id : 4] -Output [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#12] -Input [6]: [ss_item_sk#1, ss_ext_sales_price#3, ss_net_profit#4, i_item_sk#11, i_class#12, i_category#13] +Output [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#12, i_class#11] +Input [6]: [ss_item_sk#1, ss_ext_sales_price#3, ss_net_profit#4, i_item_sk#10, i_class#11, i_category#12] (20) Expand [codegen id : 4] -Input [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#12] -Arguments: [[ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#12, 0], [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, null, 1], [ss_ext_sales_price#3, ss_net_profit#4, null, null, 3]], [ss_ext_sales_price#3, ss_net_profit#4, i_category#15, i_class#16, spark_grouping_id#17] +Input [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#12, i_class#11] +Arguments: [[ss_ext_sales_price#3, ss_net_profit#4, i_category#12, i_class#11, 0], [ss_ext_sales_price#3, ss_net_profit#4, i_category#12, null, 1], [ss_ext_sales_price#3, ss_net_profit#4, null, null, 3]], [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#14, spark_grouping_id#15] (21) HashAggregate [codegen id : 4] -Input [5]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#15, i_class#16, spark_grouping_id#17] -Keys [3]: [i_category#15, i_class#16, spark_grouping_id#17] +Input [5]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#14, spark_grouping_id#15] +Keys [3]: [i_category#13, i_class#14, spark_grouping_id#15] Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#4)), partial_sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [2]: [sum#18, sum#19] -Results [5]: [i_category#15, i_class#16, spark_grouping_id#17, sum#20, sum#21] +Aggregate Attributes [2]: [sum#16, sum#17] +Results [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#18, sum#19] (22) Exchange -Input [5]: [i_category#15, i_class#16, spark_grouping_id#17, sum#20, sum#21] -Arguments: hashpartitioning(i_category#15, i_class#16, spark_grouping_id#17, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#18, sum#19] +Arguments: hashpartitioning(i_category#13, i_class#14, spark_grouping_id#15, 5), ENSURE_REQUIREMENTS, [plan_id=3] (23) HashAggregate [codegen id : 5] -Input [5]: [i_category#15, i_class#16, spark_grouping_id#17, sum#20, sum#21] -Keys [3]: [i_category#15, i_class#16, spark_grouping_id#17] +Input [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#18, sum#19] +Keys [3]: [i_category#13, i_class#14, spark_grouping_id#15] Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#23, sum(UnscaledValue(ss_ext_sales_price#3))#24] -Results [7]: [CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#23,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#24,17,2))), DecimalType(37,20)) AS gross_margin#25, i_category#15, i_class#16, (cast((shiftright(spark_grouping_id#17, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint)) AS lochierarchy#26, (cast((shiftright(spark_grouping_id#17, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint)) AS _w1#27, CASE WHEN (cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint) = 0) THEN i_category#15 END AS _w2#28, CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#23,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#24,17,2))), DecimalType(37,20)) AS _w3#29] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#20, sum(UnscaledValue(ss_ext_sales_price#3))#21] +Results [7]: [CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2))), DecimalType(37,20)) AS gross_margin#22, i_category#13, i_class#14, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS lochierarchy#23, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS _w1#24, CASE WHEN (cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint) = 0) THEN i_category#13 END AS _w2#25, CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2))), DecimalType(37,20)) AS _w3#26] (24) Exchange -Input [7]: [gross_margin#25, i_category#15, i_class#16, lochierarchy#26, _w1#27, _w2#28, _w3#29] -Arguments: hashpartitioning(_w1#27, _w2#28, 5), ENSURE_REQUIREMENTS, [id=#30] +Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w1#24, _w2#25, _w3#26] +Arguments: hashpartitioning(_w1#24, _w2#25, 5), ENSURE_REQUIREMENTS, [plan_id=4] (25) Sort [codegen id : 6] -Input [7]: [gross_margin#25, i_category#15, i_class#16, lochierarchy#26, _w1#27, _w2#28, _w3#29] -Arguments: [_w1#27 ASC NULLS FIRST, _w2#28 ASC NULLS FIRST, _w3#29 ASC NULLS FIRST], false, 0 +Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w1#24, _w2#25, _w3#26] +Arguments: [_w1#24 ASC NULLS FIRST, _w2#25 ASC NULLS FIRST, _w3#26 ASC NULLS FIRST], false, 0 (26) Window -Input [7]: [gross_margin#25, i_category#15, i_class#16, lochierarchy#26, _w1#27, _w2#28, _w3#29] -Arguments: [rank(_w3#29) windowspecdefinition(_w1#27, _w2#28, _w3#29 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#31], [_w1#27, _w2#28], [_w3#29 ASC NULLS FIRST] +Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w1#24, _w2#25, _w3#26] +Arguments: [rank(_w3#26) windowspecdefinition(_w1#24, _w2#25, _w3#26 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#27], [_w1#24, _w2#25], [_w3#26 ASC NULLS FIRST] (27) Project [codegen id : 7] -Output [5]: [gross_margin#25, i_category#15, i_class#16, lochierarchy#26, rank_within_parent#31] -Input [8]: [gross_margin#25, i_category#15, i_class#16, lochierarchy#26, _w1#27, _w2#28, _w3#29, rank_within_parent#31] +Output [5]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27] +Input [8]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w1#24, _w2#25, _w3#26, rank_within_parent#27] (28) TakeOrderedAndProject -Input [5]: [gross_margin#25, i_category#15, i_class#16, lochierarchy#26, rank_within_parent#31] -Arguments: 100, [lochierarchy#26 DESC NULLS LAST, CASE WHEN (lochierarchy#26 = 0) THEN i_category#15 END ASC NULLS FIRST, rank_within_parent#31 ASC NULLS FIRST], [gross_margin#25, i_category#15, i_class#16, lochierarchy#26, rank_within_parent#31] +Input [5]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27] +Arguments: 100, [lochierarchy#23 DESC NULLS LAST, CASE WHEN (lochierarchy#23 = 0) THEN i_category#13 END ASC NULLS FIRST, rank_within_parent#27 ASC NULLS FIRST], [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27] ===== Subqueries ===== @@ -167,25 +167,25 @@ BroadcastExchange (33) (29) Scan parquet default.date_dim -Output [2]: [d_date_sk#7, d_year#32] +Output [2]: [d_date_sk#7, d_year#28] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#7, d_year#32] +Input [2]: [d_date_sk#7, d_year#28] (31) Filter [codegen id : 1] -Input [2]: [d_date_sk#7, d_year#32] -Condition : ((isnotnull(d_year#32) AND (d_year#32 = 2001)) AND isnotnull(d_date_sk#7)) +Input [2]: [d_date_sk#7, d_year#28] +Condition : ((isnotnull(d_year#28) AND (d_year#28 = 2001)) AND isnotnull(d_date_sk#7)) (32) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [2]: [d_date_sk#7, d_year#32] +Input [2]: [d_date_sk#7, d_year#28] (33) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt index 7ef898a59a2c1..afd243d75648d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt @@ -72,7 +72,7 @@ Condition : isnotnull(i_item_sk#8) (10) BroadcastExchange Input [3]: [i_item_sk#8, i_class#9, i_category#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_item_sk#1] @@ -84,77 +84,77 @@ Output [5]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_ Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_item_sk#8, i_class#9, i_category#10] (13) Scan parquet default.store -Output [2]: [s_store_sk#12, s_state#13] +Output [2]: [s_store_sk#11, s_state#12] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#12, s_state#13] +Input [2]: [s_store_sk#11, s_state#12] (15) Filter [codegen id : 3] -Input [2]: [s_store_sk#12, s_state#13] -Condition : ((isnotnull(s_state#13) AND (s_state#13 = TN)) AND isnotnull(s_store_sk#12)) +Input [2]: [s_store_sk#11, s_state#12] +Condition : ((isnotnull(s_state#12) AND (s_state#12 = TN)) AND isnotnull(s_store_sk#11)) (16) Project [codegen id : 3] -Output [1]: [s_store_sk#12] -Input [2]: [s_store_sk#12, s_state#13] +Output [1]: [s_store_sk#11] +Input [2]: [s_store_sk#11, s_state#12] (17) BroadcastExchange -Input [1]: [s_store_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Input [1]: [s_store_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#2] -Right keys [1]: [s_store_sk#12] +Right keys [1]: [s_store_sk#11] Join condition: None (19) Project [codegen id : 4] Output [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9] -Input [6]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10, s_store_sk#12] +Input [6]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10, s_store_sk#11] (20) Expand [codegen id : 4] Input [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9] -Arguments: [[ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9, 0], [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, null, 1], [ss_ext_sales_price#3, ss_net_profit#4, null, null, 3]], [ss_ext_sales_price#3, ss_net_profit#4, i_category#15, i_class#16, spark_grouping_id#17] +Arguments: [[ss_ext_sales_price#3, ss_net_profit#4, i_category#10, i_class#9, 0], [ss_ext_sales_price#3, ss_net_profit#4, i_category#10, null, 1], [ss_ext_sales_price#3, ss_net_profit#4, null, null, 3]], [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#14, spark_grouping_id#15] (21) HashAggregate [codegen id : 4] -Input [5]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#15, i_class#16, spark_grouping_id#17] -Keys [3]: [i_category#15, i_class#16, spark_grouping_id#17] +Input [5]: [ss_ext_sales_price#3, ss_net_profit#4, i_category#13, i_class#14, spark_grouping_id#15] +Keys [3]: [i_category#13, i_class#14, spark_grouping_id#15] Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#4)), partial_sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [2]: [sum#18, sum#19] -Results [5]: [i_category#15, i_class#16, spark_grouping_id#17, sum#20, sum#21] +Aggregate Attributes [2]: [sum#16, sum#17] +Results [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#18, sum#19] (22) Exchange -Input [5]: [i_category#15, i_class#16, spark_grouping_id#17, sum#20, sum#21] -Arguments: hashpartitioning(i_category#15, i_class#16, spark_grouping_id#17, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#18, sum#19] +Arguments: hashpartitioning(i_category#13, i_class#14, spark_grouping_id#15, 5), ENSURE_REQUIREMENTS, [plan_id=3] (23) HashAggregate [codegen id : 5] -Input [5]: [i_category#15, i_class#16, spark_grouping_id#17, sum#20, sum#21] -Keys [3]: [i_category#15, i_class#16, spark_grouping_id#17] +Input [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#18, sum#19] +Keys [3]: [i_category#13, i_class#14, spark_grouping_id#15] Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#23, sum(UnscaledValue(ss_ext_sales_price#3))#24] -Results [7]: [CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#23,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#24,17,2))), DecimalType(37,20)) AS gross_margin#25, i_category#15, i_class#16, (cast((shiftright(spark_grouping_id#17, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint)) AS lochierarchy#26, (cast((shiftright(spark_grouping_id#17, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint)) AS _w1#27, CASE WHEN (cast((shiftright(spark_grouping_id#17, 0) & 1) as tinyint) = 0) THEN i_category#15 END AS _w2#28, CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#23,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#24,17,2))), DecimalType(37,20)) AS _w3#29] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#20, sum(UnscaledValue(ss_ext_sales_price#3))#21] +Results [7]: [CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2))), DecimalType(37,20)) AS gross_margin#22, i_category#13, i_class#14, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS lochierarchy#23, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS _w1#24, CASE WHEN (cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint) = 0) THEN i_category#13 END AS _w2#25, CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2))), DecimalType(37,20)) AS _w3#26] (24) Exchange -Input [7]: [gross_margin#25, i_category#15, i_class#16, lochierarchy#26, _w1#27, _w2#28, _w3#29] -Arguments: hashpartitioning(_w1#27, _w2#28, 5), ENSURE_REQUIREMENTS, [id=#30] +Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w1#24, _w2#25, _w3#26] +Arguments: hashpartitioning(_w1#24, _w2#25, 5), ENSURE_REQUIREMENTS, [plan_id=4] (25) Sort [codegen id : 6] -Input [7]: [gross_margin#25, i_category#15, i_class#16, lochierarchy#26, _w1#27, _w2#28, _w3#29] -Arguments: [_w1#27 ASC NULLS FIRST, _w2#28 ASC NULLS FIRST, _w3#29 ASC NULLS FIRST], false, 0 +Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w1#24, _w2#25, _w3#26] +Arguments: [_w1#24 ASC NULLS FIRST, _w2#25 ASC NULLS FIRST, _w3#26 ASC NULLS FIRST], false, 0 (26) Window -Input [7]: [gross_margin#25, i_category#15, i_class#16, lochierarchy#26, _w1#27, _w2#28, _w3#29] -Arguments: [rank(_w3#29) windowspecdefinition(_w1#27, _w2#28, _w3#29 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#31], [_w1#27, _w2#28], [_w3#29 ASC NULLS FIRST] +Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w1#24, _w2#25, _w3#26] +Arguments: [rank(_w3#26) windowspecdefinition(_w1#24, _w2#25, _w3#26 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#27], [_w1#24, _w2#25], [_w3#26 ASC NULLS FIRST] (27) Project [codegen id : 7] -Output [5]: [gross_margin#25, i_category#15, i_class#16, lochierarchy#26, rank_within_parent#31] -Input [8]: [gross_margin#25, i_category#15, i_class#16, lochierarchy#26, _w1#27, _w2#28, _w3#29, rank_within_parent#31] +Output [5]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27] +Input [8]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w1#24, _w2#25, _w3#26, rank_within_parent#27] (28) TakeOrderedAndProject -Input [5]: [gross_margin#25, i_category#15, i_class#16, lochierarchy#26, rank_within_parent#31] -Arguments: 100, [lochierarchy#26 DESC NULLS LAST, CASE WHEN (lochierarchy#26 = 0) THEN i_category#15 END ASC NULLS FIRST, rank_within_parent#31 ASC NULLS FIRST], [gross_margin#25, i_category#15, i_class#16, lochierarchy#26, rank_within_parent#31] +Input [5]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27] +Arguments: 100, [lochierarchy#23 DESC NULLS LAST, CASE WHEN (lochierarchy#23 = 0) THEN i_category#13 END ASC NULLS FIRST, rank_within_parent#27 ASC NULLS FIRST], [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27] ===== Subqueries ===== @@ -167,25 +167,25 @@ BroadcastExchange (33) (29) Scan parquet default.date_dim -Output [2]: [d_date_sk#7, d_year#32] +Output [2]: [d_date_sk#7, d_year#28] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#7, d_year#32] +Input [2]: [d_date_sk#7, d_year#28] (31) Filter [codegen id : 1] -Input [2]: [d_date_sk#7, d_year#32] -Condition : ((isnotnull(d_year#32) AND (d_year#32 = 2001)) AND isnotnull(d_date_sk#7)) +Input [2]: [d_date_sk#7, d_year#28] +Condition : ((isnotnull(d_year#28) AND (d_year#28 = 2001)) AND isnotnull(d_date_sk#7)) (32) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [2]: [d_date_sk#7, d_year#32] +Input [2]: [d_date_sk#7, d_year#28] (33) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/explain.txt index df9567ae2c5e9..5da89e60d9ca2 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/explain.txt @@ -49,90 +49,90 @@ Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufa (5) BroadcastExchange Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (6) Scan parquet default.inventory -Output [3]: [inv_item_sk#7, inv_quantity_on_hand#8, inv_date_sk#9] +Output [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(inv_date_sk#9), dynamicpruningexpression(inv_date_sk#9 IN dynamicpruning#10)] +PartitionFilters: [isnotnull(inv_date_sk#8), dynamicpruningexpression(inv_date_sk#8 IN dynamicpruning#9)] PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(inv_quantity_on_hand,500), IsNotNull(inv_item_sk)] ReadSchema: struct (7) ColumnarToRow -Input [3]: [inv_item_sk#7, inv_quantity_on_hand#8, inv_date_sk#9] +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] (8) Filter -Input [3]: [inv_item_sk#7, inv_quantity_on_hand#8, inv_date_sk#9] -Condition : (((isnotnull(inv_quantity_on_hand#8) AND (inv_quantity_on_hand#8 >= 100)) AND (inv_quantity_on_hand#8 <= 500)) AND isnotnull(inv_item_sk#7)) +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Condition : (((isnotnull(inv_quantity_on_hand#7) AND (inv_quantity_on_hand#7 >= 100)) AND (inv_quantity_on_hand#7 <= 500)) AND isnotnull(inv_item_sk#6)) (9) Project -Output [2]: [inv_item_sk#7, inv_date_sk#9] -Input [3]: [inv_item_sk#7, inv_quantity_on_hand#8, inv_date_sk#9] +Output [2]: [inv_item_sk#6, inv_date_sk#8] +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] (10) BroadcastHashJoin [codegen id : 3] Left keys [1]: [i_item_sk#1] -Right keys [1]: [inv_item_sk#7] +Right keys [1]: [inv_item_sk#6] Join condition: None (11) Project [codegen id : 3] -Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#9] -Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_item_sk#7, inv_date_sk#9] +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_item_sk#6, inv_date_sk#8] (12) ReusedExchange [Reuses operator id: 33] -Output [1]: [d_date_sk#11] +Output [1]: [d_date_sk#10] (13) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [inv_date_sk#9] -Right keys [1]: [d_date_sk#11] +Left keys [1]: [inv_date_sk#8] +Right keys [1]: [d_date_sk#10] Join condition: None (14) Project [codegen id : 3] Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] -Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#9, d_date_sk#11] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8, d_date_sk#10] (15) Exchange Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] -Arguments: hashpartitioning(i_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#12] +Arguments: hashpartitioning(i_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (16) Sort [codegen id : 4] Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] Arguments: [i_item_sk#1 ASC NULLS FIRST], false, 0 (17) Scan parquet default.catalog_sales -Output [2]: [cs_item_sk#13, cs_sold_date_sk#14] +Output [2]: [cs_item_sk#11, cs_sold_date_sk#12] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_sales] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (18) ColumnarToRow [codegen id : 5] -Input [2]: [cs_item_sk#13, cs_sold_date_sk#14] +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] (19) Filter [codegen id : 5] -Input [2]: [cs_item_sk#13, cs_sold_date_sk#14] -Condition : isnotnull(cs_item_sk#13) +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] +Condition : isnotnull(cs_item_sk#11) (20) Project [codegen id : 5] -Output [1]: [cs_item_sk#13] -Input [2]: [cs_item_sk#13, cs_sold_date_sk#14] +Output [1]: [cs_item_sk#11] +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] (21) Exchange -Input [1]: [cs_item_sk#13] -Arguments: hashpartitioning(cs_item_sk#13, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [1]: [cs_item_sk#11] +Arguments: hashpartitioning(cs_item_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) Sort [codegen id : 6] -Input [1]: [cs_item_sk#13] -Arguments: [cs_item_sk#13 ASC NULLS FIRST], false, 0 +Input [1]: [cs_item_sk#11] +Arguments: [cs_item_sk#11 ASC NULLS FIRST], false, 0 (23) SortMergeJoin [codegen id : 7] Left keys [1]: [i_item_sk#1] -Right keys [1]: [cs_item_sk#13] +Right keys [1]: [cs_item_sk#11] Join condition: None (24) Project [codegen id : 7] Output [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, cs_item_sk#13] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, cs_item_sk#11] (25) HashAggregate [codegen id : 7] Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] @@ -143,7 +143,7 @@ Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] (26) Exchange Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, [id=#16] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, [plan_id=4] (27) HashAggregate [codegen id : 8] Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] @@ -158,7 +158,7 @@ Arguments: 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_cu ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = inv_date_sk#9 IN dynamicpruning#10 +Subquery:1 Hosting operator id = 6 Hosting Expression = inv_date_sk#8 IN dynamicpruning#9 BroadcastExchange (33) +- * Project (32) +- * Filter (31) @@ -167,25 +167,25 @@ BroadcastExchange (33) (29) Scan parquet default.date_dim -Output [2]: [d_date_sk#11, d_date#17] +Output [2]: [d_date_sk#10, d_date#13] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-01), LessThanOrEqual(d_date,2000-04-01), IsNotNull(d_date_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#11, d_date#17] +Input [2]: [d_date_sk#10, d_date#13] (31) Filter [codegen id : 1] -Input [2]: [d_date_sk#11, d_date#17] -Condition : (((isnotnull(d_date#17) AND (d_date#17 >= 2000-02-01)) AND (d_date#17 <= 2000-04-01)) AND isnotnull(d_date_sk#11)) +Input [2]: [d_date_sk#10, d_date#13] +Condition : (((isnotnull(d_date#13) AND (d_date#13 >= 2000-02-01)) AND (d_date#13 <= 2000-04-01)) AND isnotnull(d_date_sk#10)) (32) Project [codegen id : 1] -Output [1]: [d_date_sk#11] -Input [2]: [d_date_sk#11, d_date#17] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#13] (33) BroadcastExchange -Input [1]: [d_date_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt index 956b8c98a30c2..408973dcd4536 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt @@ -65,7 +65,7 @@ Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] (9) BroadcastExchange Input [2]: [inv_item_sk#6, inv_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (10) BroadcastHashJoin [codegen id : 3] Left keys [1]: [i_item_sk#1] @@ -77,47 +77,47 @@ Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_dat Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_item_sk#6, inv_date_sk#8] (12) ReusedExchange [Reuses operator id: 30] -Output [1]: [d_date_sk#11] +Output [1]: [d_date_sk#10] (13) BroadcastHashJoin [codegen id : 3] Left keys [1]: [inv_date_sk#8] -Right keys [1]: [d_date_sk#11] +Right keys [1]: [d_date_sk#10] Join condition: None (14) Project [codegen id : 3] Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] -Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8, d_date_sk#11] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8, d_date_sk#10] (15) BroadcastExchange Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) Scan parquet default.catalog_sales -Output [2]: [cs_item_sk#13, cs_sold_date_sk#14] +Output [2]: [cs_item_sk#11, cs_sold_date_sk#12] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_sales] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (17) ColumnarToRow -Input [2]: [cs_item_sk#13, cs_sold_date_sk#14] +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] (18) Filter -Input [2]: [cs_item_sk#13, cs_sold_date_sk#14] -Condition : isnotnull(cs_item_sk#13) +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] +Condition : isnotnull(cs_item_sk#11) (19) Project -Output [1]: [cs_item_sk#13] -Input [2]: [cs_item_sk#13, cs_sold_date_sk#14] +Output [1]: [cs_item_sk#11] +Input [2]: [cs_item_sk#11, cs_sold_date_sk#12] (20) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] -Right keys [1]: [cs_item_sk#13] +Right keys [1]: [cs_item_sk#11] Join condition: None (21) Project [codegen id : 4] Output [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, cs_item_sk#13] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, cs_item_sk#11] (22) HashAggregate [codegen id : 4] Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] @@ -128,7 +128,7 @@ Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] (23) Exchange Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, [id=#15] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, [plan_id=3] (24) HashAggregate [codegen id : 5] Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] @@ -152,25 +152,25 @@ BroadcastExchange (30) (26) Scan parquet default.date_dim -Output [2]: [d_date_sk#11, d_date#16] +Output [2]: [d_date_sk#10, d_date#13] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-01), LessThanOrEqual(d_date,2000-04-01), IsNotNull(d_date_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#11, d_date#16] +Input [2]: [d_date_sk#10, d_date#13] (28) Filter [codegen id : 1] -Input [2]: [d_date_sk#11, d_date#16] -Condition : (((isnotnull(d_date#16) AND (d_date#16 >= 2000-02-01)) AND (d_date#16 <= 2000-04-01)) AND isnotnull(d_date_sk#11)) +Input [2]: [d_date_sk#10, d_date#13] +Condition : (((isnotnull(d_date#13) AND (d_date#13 >= 2000-02-01)) AND (d_date#13 <= 2000-04-01)) AND isnotnull(d_date_sk#10)) (29) Project [codegen id : 1] -Output [1]: [d_date_sk#11] -Input [2]: [d_date_sk#11, d_date#16] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#13] (30) BroadcastExchange -Input [1]: [d_date_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt index 3d266ee2c01c7..ceab5457e3d2d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt @@ -90,254 +90,254 @@ Input [4]: [ss_customer_sk#1, ss_sold_date_sk#2, d_date_sk#4, d_date#5] (7) Exchange Input [2]: [ss_customer_sk#1, d_date#5] -Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#6] +Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (8) Sort [codegen id : 3] Input [2]: [ss_customer_sk#1, d_date#5] Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0 (9) Scan parquet default.customer -Output [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] +Output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (10) ColumnarToRow [codegen id : 4] -Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] (11) Filter [codegen id : 4] -Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] -Condition : isnotnull(c_customer_sk#7) +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Condition : isnotnull(c_customer_sk#6) (12) Exchange -Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] -Arguments: hashpartitioning(c_customer_sk#7, 5), ENSURE_REQUIREMENTS, [id=#10] +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: hashpartitioning(c_customer_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=2] (13) Sort [codegen id : 5] -Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] -Arguments: [c_customer_sk#7 ASC NULLS FIRST], false, 0 +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_customer_sk#6 ASC NULLS FIRST], false, 0 (14) SortMergeJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#7] +Right keys [1]: [c_customer_sk#6] Join condition: None (15) Project [codegen id : 6] -Output [3]: [c_last_name#9, c_first_name#8, d_date#5] -Input [5]: [ss_customer_sk#1, d_date#5, c_customer_sk#7, c_first_name#8, c_last_name#9] +Output [3]: [c_last_name#8, c_first_name#7, d_date#5] +Input [5]: [ss_customer_sk#1, d_date#5, c_customer_sk#6, c_first_name#7, c_last_name#8] (16) HashAggregate [codegen id : 6] -Input [3]: [c_last_name#9, c_first_name#8, d_date#5] -Keys [3]: [c_last_name#9, c_first_name#8, d_date#5] +Input [3]: [c_last_name#8, c_first_name#7, d_date#5] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#5] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#9, c_first_name#8, d_date#5] +Results [3]: [c_last_name#8, c_first_name#7, d_date#5] (17) Exchange -Input [3]: [c_last_name#9, c_first_name#8, d_date#5] -Arguments: hashpartitioning(c_last_name#9, c_first_name#8, d_date#5, 5), ENSURE_REQUIREMENTS, [id=#11] +Input [3]: [c_last_name#8, c_first_name#7, d_date#5] +Arguments: hashpartitioning(c_last_name#8, c_first_name#7, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) HashAggregate [codegen id : 7] -Input [3]: [c_last_name#9, c_first_name#8, d_date#5] -Keys [3]: [c_last_name#9, c_first_name#8, d_date#5] +Input [3]: [c_last_name#8, c_first_name#7, d_date#5] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#5] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#9, c_first_name#8, d_date#5] +Results [3]: [c_last_name#8, c_first_name#7, d_date#5] (19) Exchange -Input [3]: [c_last_name#9, c_first_name#8, d_date#5] -Arguments: hashpartitioning(coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#5, 1970-01-01), isnull(d_date#5), 5), ENSURE_REQUIREMENTS, [id=#12] +Input [3]: [c_last_name#8, c_first_name#7, d_date#5] +Arguments: hashpartitioning(coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5), 5), ENSURE_REQUIREMENTS, [plan_id=4] (20) Sort [codegen id : 8] -Input [3]: [c_last_name#9, c_first_name#8, d_date#5] -Arguments: [coalesce(c_last_name#9, ) ASC NULLS FIRST, isnull(c_last_name#9) ASC NULLS FIRST, coalesce(c_first_name#8, ) ASC NULLS FIRST, isnull(c_first_name#8) ASC NULLS FIRST, coalesce(d_date#5, 1970-01-01) ASC NULLS FIRST, isnull(d_date#5) ASC NULLS FIRST], false, 0 +Input [3]: [c_last_name#8, c_first_name#7, d_date#5] +Arguments: [coalesce(c_last_name#8, ) ASC NULLS FIRST, isnull(c_last_name#8) ASC NULLS FIRST, coalesce(c_first_name#7, ) ASC NULLS FIRST, isnull(c_first_name#7) ASC NULLS FIRST, coalesce(d_date#5, 1970-01-01) ASC NULLS FIRST, isnull(d_date#5) ASC NULLS FIRST], false, 0 (21) Scan parquet default.catalog_sales -Output [2]: [cs_bill_customer_sk#13, cs_sold_date_sk#14] +Output [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#14), dynamicpruningexpression(cs_sold_date_sk#14 IN dynamicpruning#3)] +PartitionFilters: [isnotnull(cs_sold_date_sk#10), dynamicpruningexpression(cs_sold_date_sk#10 IN dynamicpruning#3)] PushedFilters: [IsNotNull(cs_bill_customer_sk)] ReadSchema: struct (22) ColumnarToRow [codegen id : 10] -Input [2]: [cs_bill_customer_sk#13, cs_sold_date_sk#14] +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] (23) Filter [codegen id : 10] -Input [2]: [cs_bill_customer_sk#13, cs_sold_date_sk#14] -Condition : isnotnull(cs_bill_customer_sk#13) +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Condition : isnotnull(cs_bill_customer_sk#9) (24) ReusedExchange [Reuses operator id: 65] -Output [2]: [d_date_sk#15, d_date#16] +Output [2]: [d_date_sk#11, d_date#12] (25) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#14] -Right keys [1]: [d_date_sk#15] +Left keys [1]: [cs_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] Join condition: None (26) Project [codegen id : 10] -Output [2]: [cs_bill_customer_sk#13, d_date#16] -Input [4]: [cs_bill_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15, d_date#16] +Output [2]: [cs_bill_customer_sk#9, d_date#12] +Input [4]: [cs_bill_customer_sk#9, cs_sold_date_sk#10, d_date_sk#11, d_date#12] (27) Exchange -Input [2]: [cs_bill_customer_sk#13, d_date#16] -Arguments: hashpartitioning(cs_bill_customer_sk#13, 5), ENSURE_REQUIREMENTS, [id=#17] +Input [2]: [cs_bill_customer_sk#9, d_date#12] +Arguments: hashpartitioning(cs_bill_customer_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=5] (28) Sort [codegen id : 11] -Input [2]: [cs_bill_customer_sk#13, d_date#16] -Arguments: [cs_bill_customer_sk#13 ASC NULLS FIRST], false, 0 +Input [2]: [cs_bill_customer_sk#9, d_date#12] +Arguments: [cs_bill_customer_sk#9 ASC NULLS FIRST], false, 0 (29) ReusedExchange [Reuses operator id: 12] -Output [3]: [c_customer_sk#18, c_first_name#19, c_last_name#20] +Output [3]: [c_customer_sk#13, c_first_name#14, c_last_name#15] (30) Sort [codegen id : 13] -Input [3]: [c_customer_sk#18, c_first_name#19, c_last_name#20] -Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0 +Input [3]: [c_customer_sk#13, c_first_name#14, c_last_name#15] +Arguments: [c_customer_sk#13 ASC NULLS FIRST], false, 0 (31) SortMergeJoin [codegen id : 14] -Left keys [1]: [cs_bill_customer_sk#13] -Right keys [1]: [c_customer_sk#18] +Left keys [1]: [cs_bill_customer_sk#9] +Right keys [1]: [c_customer_sk#13] Join condition: None (32) Project [codegen id : 14] -Output [3]: [c_last_name#20, c_first_name#19, d_date#16] -Input [5]: [cs_bill_customer_sk#13, d_date#16, c_customer_sk#18, c_first_name#19, c_last_name#20] +Output [3]: [c_last_name#15, c_first_name#14, d_date#12] +Input [5]: [cs_bill_customer_sk#9, d_date#12, c_customer_sk#13, c_first_name#14, c_last_name#15] (33) HashAggregate [codegen id : 14] -Input [3]: [c_last_name#20, c_first_name#19, d_date#16] -Keys [3]: [c_last_name#20, c_first_name#19, d_date#16] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#20, c_first_name#19, d_date#16] +Results [3]: [c_last_name#15, c_first_name#14, d_date#12] (34) Exchange -Input [3]: [c_last_name#20, c_first_name#19, d_date#16] -Arguments: hashpartitioning(c_last_name#20, c_first_name#19, d_date#16, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, d_date#12, 5), ENSURE_REQUIREMENTS, [plan_id=6] (35) HashAggregate [codegen id : 15] -Input [3]: [c_last_name#20, c_first_name#19, d_date#16] -Keys [3]: [c_last_name#20, c_first_name#19, d_date#16] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#20, c_first_name#19, d_date#16] +Results [3]: [c_last_name#15, c_first_name#14, d_date#12] (36) Exchange -Input [3]: [c_last_name#20, c_first_name#19, d_date#16] -Arguments: hashpartitioning(coalesce(c_last_name#20, ), isnull(c_last_name#20), coalesce(c_first_name#19, ), isnull(c_first_name#19), coalesce(d_date#16, 1970-01-01), isnull(d_date#16), 5), ENSURE_REQUIREMENTS, [id=#22] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: hashpartitioning(coalesce(c_last_name#15, ), isnull(c_last_name#15), coalesce(c_first_name#14, ), isnull(c_first_name#14), coalesce(d_date#12, 1970-01-01), isnull(d_date#12), 5), ENSURE_REQUIREMENTS, [plan_id=7] (37) Sort [codegen id : 16] -Input [3]: [c_last_name#20, c_first_name#19, d_date#16] -Arguments: [coalesce(c_last_name#20, ) ASC NULLS FIRST, isnull(c_last_name#20) ASC NULLS FIRST, coalesce(c_first_name#19, ) ASC NULLS FIRST, isnull(c_first_name#19) ASC NULLS FIRST, coalesce(d_date#16, 1970-01-01) ASC NULLS FIRST, isnull(d_date#16) ASC NULLS FIRST], false, 0 +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: [coalesce(c_last_name#15, ) ASC NULLS FIRST, isnull(c_last_name#15) ASC NULLS FIRST, coalesce(c_first_name#14, ) ASC NULLS FIRST, isnull(c_first_name#14) ASC NULLS FIRST, coalesce(d_date#12, 1970-01-01) ASC NULLS FIRST, isnull(d_date#12) ASC NULLS FIRST], false, 0 (38) SortMergeJoin [codegen id : 17] -Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)] -Right keys [6]: [coalesce(c_last_name#20, ), isnull(c_last_name#20), coalesce(c_first_name#19, ), isnull(c_first_name#19), coalesce(d_date#16, 1970-01-01), isnull(d_date#16)] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)] +Right keys [6]: [coalesce(c_last_name#15, ), isnull(c_last_name#15), coalesce(c_first_name#14, ), isnull(c_first_name#14), coalesce(d_date#12, 1970-01-01), isnull(d_date#12)] Join condition: None (39) Scan parquet default.web_sales -Output [2]: [ws_bill_customer_sk#23, ws_sold_date_sk#24] +Output [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#24), dynamicpruningexpression(ws_sold_date_sk#24 IN dynamicpruning#3)] +PartitionFilters: [isnotnull(ws_sold_date_sk#17), dynamicpruningexpression(ws_sold_date_sk#17 IN dynamicpruning#3)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (40) ColumnarToRow [codegen id : 19] -Input [2]: [ws_bill_customer_sk#23, ws_sold_date_sk#24] +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] (41) Filter [codegen id : 19] -Input [2]: [ws_bill_customer_sk#23, ws_sold_date_sk#24] -Condition : isnotnull(ws_bill_customer_sk#23) +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Condition : isnotnull(ws_bill_customer_sk#16) (42) ReusedExchange [Reuses operator id: 65] -Output [2]: [d_date_sk#25, d_date#26] +Output [2]: [d_date_sk#18, d_date#19] (43) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [ws_sold_date_sk#24] -Right keys [1]: [d_date_sk#25] +Left keys [1]: [ws_sold_date_sk#17] +Right keys [1]: [d_date_sk#18] Join condition: None (44) Project [codegen id : 19] -Output [2]: [ws_bill_customer_sk#23, d_date#26] -Input [4]: [ws_bill_customer_sk#23, ws_sold_date_sk#24, d_date_sk#25, d_date#26] +Output [2]: [ws_bill_customer_sk#16, d_date#19] +Input [4]: [ws_bill_customer_sk#16, ws_sold_date_sk#17, d_date_sk#18, d_date#19] (45) Exchange -Input [2]: [ws_bill_customer_sk#23, d_date#26] -Arguments: hashpartitioning(ws_bill_customer_sk#23, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [2]: [ws_bill_customer_sk#16, d_date#19] +Arguments: hashpartitioning(ws_bill_customer_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=8] (46) Sort [codegen id : 20] -Input [2]: [ws_bill_customer_sk#23, d_date#26] -Arguments: [ws_bill_customer_sk#23 ASC NULLS FIRST], false, 0 +Input [2]: [ws_bill_customer_sk#16, d_date#19] +Arguments: [ws_bill_customer_sk#16 ASC NULLS FIRST], false, 0 (47) ReusedExchange [Reuses operator id: 12] -Output [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30] +Output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] (48) Sort [codegen id : 22] -Input [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30] -Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0 +Input [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] +Arguments: [c_customer_sk#20 ASC NULLS FIRST], false, 0 (49) SortMergeJoin [codegen id : 23] -Left keys [1]: [ws_bill_customer_sk#23] -Right keys [1]: [c_customer_sk#28] +Left keys [1]: [ws_bill_customer_sk#16] +Right keys [1]: [c_customer_sk#20] Join condition: None (50) Project [codegen id : 23] -Output [3]: [c_last_name#30, c_first_name#29, d_date#26] -Input [5]: [ws_bill_customer_sk#23, d_date#26, c_customer_sk#28, c_first_name#29, c_last_name#30] +Output [3]: [c_last_name#22, c_first_name#21, d_date#19] +Input [5]: [ws_bill_customer_sk#16, d_date#19, c_customer_sk#20, c_first_name#21, c_last_name#22] (51) HashAggregate [codegen id : 23] -Input [3]: [c_last_name#30, c_first_name#29, d_date#26] -Keys [3]: [c_last_name#30, c_first_name#29, d_date#26] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#30, c_first_name#29, d_date#26] +Results [3]: [c_last_name#22, c_first_name#21, d_date#19] (52) Exchange -Input [3]: [c_last_name#30, c_first_name#29, d_date#26] -Arguments: hashpartitioning(c_last_name#30, c_first_name#29, d_date#26, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: hashpartitioning(c_last_name#22, c_first_name#21, d_date#19, 5), ENSURE_REQUIREMENTS, [plan_id=9] (53) HashAggregate [codegen id : 24] -Input [3]: [c_last_name#30, c_first_name#29, d_date#26] -Keys [3]: [c_last_name#30, c_first_name#29, d_date#26] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#30, c_first_name#29, d_date#26] +Results [3]: [c_last_name#22, c_first_name#21, d_date#19] (54) Exchange -Input [3]: [c_last_name#30, c_first_name#29, d_date#26] -Arguments: hashpartitioning(coalesce(c_last_name#30, ), isnull(c_last_name#30), coalesce(c_first_name#29, ), isnull(c_first_name#29), coalesce(d_date#26, 1970-01-01), isnull(d_date#26), 5), ENSURE_REQUIREMENTS, [id=#32] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: hashpartitioning(coalesce(c_last_name#22, ), isnull(c_last_name#22), coalesce(c_first_name#21, ), isnull(c_first_name#21), coalesce(d_date#19, 1970-01-01), isnull(d_date#19), 5), ENSURE_REQUIREMENTS, [plan_id=10] (55) Sort [codegen id : 25] -Input [3]: [c_last_name#30, c_first_name#29, d_date#26] -Arguments: [coalesce(c_last_name#30, ) ASC NULLS FIRST, isnull(c_last_name#30) ASC NULLS FIRST, coalesce(c_first_name#29, ) ASC NULLS FIRST, isnull(c_first_name#29) ASC NULLS FIRST, coalesce(d_date#26, 1970-01-01) ASC NULLS FIRST, isnull(d_date#26) ASC NULLS FIRST], false, 0 +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: [coalesce(c_last_name#22, ) ASC NULLS FIRST, isnull(c_last_name#22) ASC NULLS FIRST, coalesce(c_first_name#21, ) ASC NULLS FIRST, isnull(c_first_name#21) ASC NULLS FIRST, coalesce(d_date#19, 1970-01-01) ASC NULLS FIRST, isnull(d_date#19) ASC NULLS FIRST], false, 0 (56) SortMergeJoin [codegen id : 26] -Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)] -Right keys [6]: [coalesce(c_last_name#30, ), isnull(c_last_name#30), coalesce(c_first_name#29, ), isnull(c_first_name#29), coalesce(d_date#26, 1970-01-01), isnull(d_date#26)] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)] +Right keys [6]: [coalesce(c_last_name#22, ), isnull(c_last_name#22), coalesce(c_first_name#21, ), isnull(c_first_name#21), coalesce(d_date#19, 1970-01-01), isnull(d_date#19)] Join condition: None (57) Project [codegen id : 26] Output: [] -Input [3]: [c_last_name#9, c_first_name#8, d_date#5] +Input [3]: [c_last_name#8, c_first_name#7, d_date#5] (58) HashAggregate [codegen id : 26] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#33] -Results [1]: [count#34] +Aggregate Attributes [1]: [count#23] +Results [1]: [count#24] (59) Exchange -Input [1]: [count#34] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#35] +Input [1]: [count#24] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11] (60) HashAggregate [codegen id : 27] -Input [1]: [count#34] +Input [1]: [count#24] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#36] -Results [1]: [count(1)#36 AS count(1)#37] +Aggregate Attributes [1]: [count(1)#25] +Results [1]: [count(1)#25 AS count(1)#26] ===== Subqueries ===== @@ -350,29 +350,29 @@ BroadcastExchange (65) (61) Scan parquet default.date_dim -Output [3]: [d_date_sk#4, d_date#5, d_month_seq#38] +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#27] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (62) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#4, d_date#5, d_month_seq#38] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#27] (63) Filter [codegen id : 1] -Input [3]: [d_date_sk#4, d_date#5, d_month_seq#38] -Condition : (((isnotnull(d_month_seq#38) AND (d_month_seq#38 >= 1200)) AND (d_month_seq#38 <= 1211)) AND isnotnull(d_date_sk#4)) +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#27] +Condition : (((isnotnull(d_month_seq#27) AND (d_month_seq#27 >= 1200)) AND (d_month_seq#27 <= 1211)) AND isnotnull(d_date_sk#4)) (64) Project [codegen id : 1] Output [2]: [d_date_sk#4, d_date#5] -Input [3]: [d_date_sk#4, d_date#5, d_month_seq#38] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#27] (65) BroadcastExchange Input [2]: [d_date_sk#4, d_date#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] -Subquery:2 Hosting operator id = 21 Hosting Expression = cs_sold_date_sk#14 IN dynamicpruning#3 +Subquery:2 Hosting operator id = 21 Hosting Expression = cs_sold_date_sk#10 IN dynamicpruning#3 -Subquery:3 Hosting operator id = 39 Hosting Expression = ws_sold_date_sk#24 IN dynamicpruning#3 +Subquery:3 Hosting operator id = 39 Hosting Expression = ws_sold_date_sk#17 IN dynamicpruning#3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt index 60190c9f39e43..442f3f99716a6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt @@ -91,7 +91,7 @@ Condition : isnotnull(c_customer_sk#6) (10) BroadcastExchange Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_customer_sk#1] @@ -111,7 +111,7 @@ Results [3]: [c_last_name#8, c_first_name#7, d_date#5] (14) Exchange Input [3]: [c_last_name#8, c_first_name#7, d_date#5] -Arguments: hashpartitioning(c_last_name#8, c_first_name#7, d_date#5, 5), ENSURE_REQUIREMENTS, [id=#10] +Arguments: hashpartitioning(c_last_name#8, c_first_name#7, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 12] Input [3]: [c_last_name#8, c_first_name#7, d_date#5] @@ -121,135 +121,135 @@ Aggregate Attributes: [] Results [3]: [c_last_name#8, c_first_name#7, d_date#5] (16) Scan parquet default.catalog_sales -Output [2]: [cs_bill_customer_sk#11, cs_sold_date_sk#12] +Output [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#12), dynamicpruningexpression(cs_sold_date_sk#12 IN dynamicpruning#3)] +PartitionFilters: [isnotnull(cs_sold_date_sk#10), dynamicpruningexpression(cs_sold_date_sk#10 IN dynamicpruning#3)] PushedFilters: [IsNotNull(cs_bill_customer_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 6] -Input [2]: [cs_bill_customer_sk#11, cs_sold_date_sk#12] +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] (18) Filter [codegen id : 6] -Input [2]: [cs_bill_customer_sk#11, cs_sold_date_sk#12] -Condition : isnotnull(cs_bill_customer_sk#11) +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Condition : isnotnull(cs_bill_customer_sk#9) (19) ReusedExchange [Reuses operator id: 52] -Output [2]: [d_date_sk#13, d_date#14] +Output [2]: [d_date_sk#11, d_date#12] (20) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_sold_date_sk#12] -Right keys [1]: [d_date_sk#13] +Left keys [1]: [cs_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] Join condition: None (21) Project [codegen id : 6] -Output [2]: [cs_bill_customer_sk#11, d_date#14] -Input [4]: [cs_bill_customer_sk#11, cs_sold_date_sk#12, d_date_sk#13, d_date#14] +Output [2]: [cs_bill_customer_sk#9, d_date#12] +Input [4]: [cs_bill_customer_sk#9, cs_sold_date_sk#10, d_date_sk#11, d_date#12] (22) ReusedExchange [Reuses operator id: 10] -Output [3]: [c_customer_sk#15, c_first_name#16, c_last_name#17] +Output [3]: [c_customer_sk#13, c_first_name#14, c_last_name#15] (23) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_bill_customer_sk#11] -Right keys [1]: [c_customer_sk#15] +Left keys [1]: [cs_bill_customer_sk#9] +Right keys [1]: [c_customer_sk#13] Join condition: None (24) Project [codegen id : 6] -Output [3]: [c_last_name#17, c_first_name#16, d_date#14] -Input [5]: [cs_bill_customer_sk#11, d_date#14, c_customer_sk#15, c_first_name#16, c_last_name#17] +Output [3]: [c_last_name#15, c_first_name#14, d_date#12] +Input [5]: [cs_bill_customer_sk#9, d_date#12, c_customer_sk#13, c_first_name#14, c_last_name#15] (25) HashAggregate [codegen id : 6] -Input [3]: [c_last_name#17, c_first_name#16, d_date#14] -Keys [3]: [c_last_name#17, c_first_name#16, d_date#14] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#17, c_first_name#16, d_date#14] +Results [3]: [c_last_name#15, c_first_name#14, d_date#12] (26) Exchange -Input [3]: [c_last_name#17, c_first_name#16, d_date#14] -Arguments: hashpartitioning(c_last_name#17, c_first_name#16, d_date#14, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, d_date#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] (27) HashAggregate [codegen id : 7] -Input [3]: [c_last_name#17, c_first_name#16, d_date#14] -Keys [3]: [c_last_name#17, c_first_name#16, d_date#14] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#17, c_first_name#16, d_date#14] +Results [3]: [c_last_name#15, c_first_name#14, d_date#12] (28) BroadcastExchange -Input [3]: [c_last_name#17, c_first_name#16, d_date#14] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [id=#19] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [plan_id=4] (29) BroadcastHashJoin [codegen id : 12] Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)] -Right keys [6]: [coalesce(c_last_name#17, ), isnull(c_last_name#17), coalesce(c_first_name#16, ), isnull(c_first_name#16), coalesce(d_date#14, 1970-01-01), isnull(d_date#14)] +Right keys [6]: [coalesce(c_last_name#15, ), isnull(c_last_name#15), coalesce(c_first_name#14, ), isnull(c_first_name#14), coalesce(d_date#12, 1970-01-01), isnull(d_date#12)] Join condition: None (30) Scan parquet default.web_sales -Output [2]: [ws_bill_customer_sk#20, ws_sold_date_sk#21] +Output [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#21), dynamicpruningexpression(ws_sold_date_sk#21 IN dynamicpruning#3)] +PartitionFilters: [isnotnull(ws_sold_date_sk#17), dynamicpruningexpression(ws_sold_date_sk#17 IN dynamicpruning#3)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 10] -Input [2]: [ws_bill_customer_sk#20, ws_sold_date_sk#21] +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] (32) Filter [codegen id : 10] -Input [2]: [ws_bill_customer_sk#20, ws_sold_date_sk#21] -Condition : isnotnull(ws_bill_customer_sk#20) +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Condition : isnotnull(ws_bill_customer_sk#16) (33) ReusedExchange [Reuses operator id: 52] -Output [2]: [d_date_sk#22, d_date#23] +Output [2]: [d_date_sk#18, d_date#19] (34) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ws_sold_date_sk#21] -Right keys [1]: [d_date_sk#22] +Left keys [1]: [ws_sold_date_sk#17] +Right keys [1]: [d_date_sk#18] Join condition: None (35) Project [codegen id : 10] -Output [2]: [ws_bill_customer_sk#20, d_date#23] -Input [4]: [ws_bill_customer_sk#20, ws_sold_date_sk#21, d_date_sk#22, d_date#23] +Output [2]: [ws_bill_customer_sk#16, d_date#19] +Input [4]: [ws_bill_customer_sk#16, ws_sold_date_sk#17, d_date_sk#18, d_date#19] (36) ReusedExchange [Reuses operator id: 10] -Output [3]: [c_customer_sk#24, c_first_name#25, c_last_name#26] +Output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] (37) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ws_bill_customer_sk#20] -Right keys [1]: [c_customer_sk#24] +Left keys [1]: [ws_bill_customer_sk#16] +Right keys [1]: [c_customer_sk#20] Join condition: None (38) Project [codegen id : 10] -Output [3]: [c_last_name#26, c_first_name#25, d_date#23] -Input [5]: [ws_bill_customer_sk#20, d_date#23, c_customer_sk#24, c_first_name#25, c_last_name#26] +Output [3]: [c_last_name#22, c_first_name#21, d_date#19] +Input [5]: [ws_bill_customer_sk#16, d_date#19, c_customer_sk#20, c_first_name#21, c_last_name#22] (39) HashAggregate [codegen id : 10] -Input [3]: [c_last_name#26, c_first_name#25, d_date#23] -Keys [3]: [c_last_name#26, c_first_name#25, d_date#23] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#26, c_first_name#25, d_date#23] +Results [3]: [c_last_name#22, c_first_name#21, d_date#19] (40) Exchange -Input [3]: [c_last_name#26, c_first_name#25, d_date#23] -Arguments: hashpartitioning(c_last_name#26, c_first_name#25, d_date#23, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: hashpartitioning(c_last_name#22, c_first_name#21, d_date#19, 5), ENSURE_REQUIREMENTS, [plan_id=5] (41) HashAggregate [codegen id : 11] -Input [3]: [c_last_name#26, c_first_name#25, d_date#23] -Keys [3]: [c_last_name#26, c_first_name#25, d_date#23] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#26, c_first_name#25, d_date#23] +Results [3]: [c_last_name#22, c_first_name#21, d_date#19] (42) BroadcastExchange -Input [3]: [c_last_name#26, c_first_name#25, d_date#23] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [id=#28] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [plan_id=6] (43) BroadcastHashJoin [codegen id : 12] Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)] -Right keys [6]: [coalesce(c_last_name#26, ), isnull(c_last_name#26), coalesce(c_first_name#25, ), isnull(c_first_name#25), coalesce(d_date#23, 1970-01-01), isnull(d_date#23)] +Right keys [6]: [coalesce(c_last_name#22, ), isnull(c_last_name#22), coalesce(c_first_name#21, ), isnull(c_first_name#21), coalesce(d_date#19, 1970-01-01), isnull(d_date#19)] Join condition: None (44) Project [codegen id : 12] @@ -260,19 +260,19 @@ Input [3]: [c_last_name#8, c_first_name#7, d_date#5] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#29] -Results [1]: [count#30] +Aggregate Attributes [1]: [count#23] +Results [1]: [count#24] (46) Exchange -Input [1]: [count#30] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#31] +Input [1]: [count#24] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (47) HashAggregate [codegen id : 13] -Input [1]: [count#30] +Input [1]: [count#24] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#32] -Results [1]: [count(1)#32 AS count(1)#33] +Aggregate Attributes [1]: [count(1)#25] +Results [1]: [count(1)#25 AS count(1)#26] ===== Subqueries ===== @@ -285,29 +285,29 @@ BroadcastExchange (52) (48) Scan parquet default.date_dim -Output [3]: [d_date_sk#4, d_date#5, d_month_seq#34] +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#27] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (49) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#4, d_date#5, d_month_seq#34] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#27] (50) Filter [codegen id : 1] -Input [3]: [d_date_sk#4, d_date#5, d_month_seq#34] -Condition : (((isnotnull(d_month_seq#34) AND (d_month_seq#34 >= 1200)) AND (d_month_seq#34 <= 1211)) AND isnotnull(d_date_sk#4)) +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#27] +Condition : (((isnotnull(d_month_seq#27) AND (d_month_seq#27 >= 1200)) AND (d_month_seq#27 <= 1211)) AND isnotnull(d_date_sk#4)) (51) Project [codegen id : 1] Output [2]: [d_date_sk#4, d_date#5] -Input [3]: [d_date_sk#4, d_date#5, d_month_seq#34] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#27] (52) BroadcastExchange Input [2]: [d_date_sk#4, d_date#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] -Subquery:2 Hosting operator id = 16 Hosting Expression = cs_sold_date_sk#12 IN dynamicpruning#3 +Subquery:2 Hosting operator id = 16 Hosting Expression = cs_sold_date_sk#10 IN dynamicpruning#3 -Subquery:3 Hosting operator id = 30 Hosting Expression = ws_sold_date_sk#21 IN dynamicpruning#3 +Subquery:3 Hosting operator id = 30 Hosting Expression = ws_sold_date_sk#17 IN dynamicpruning#3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/explain.txt index ef3f68796cf26..64b986d1a6fe4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/explain.txt @@ -91,7 +91,7 @@ Condition : isnotnull(i_item_sk#8) (10) BroadcastExchange Input [1]: [i_item_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_item_sk#1] @@ -103,163 +103,163 @@ Output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, d_moy#7, i_item_sk#8] Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, d_moy#7, i_item_sk#8] (13) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#10, w_warehouse_name#11] +Output [2]: [w_warehouse_sk#9, w_warehouse_name#10] Batched: true Location [not included in comparison]/{warehouse_dir}/warehouse] PushedFilters: [IsNotNull(w_warehouse_sk)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [2]: [w_warehouse_sk#10, w_warehouse_name#11] +Input [2]: [w_warehouse_sk#9, w_warehouse_name#10] (15) Filter [codegen id : 3] -Input [2]: [w_warehouse_sk#10, w_warehouse_name#11] -Condition : isnotnull(w_warehouse_sk#10) +Input [2]: [w_warehouse_sk#9, w_warehouse_name#10] +Condition : isnotnull(w_warehouse_sk#9) (16) BroadcastExchange -Input [2]: [w_warehouse_sk#10, w_warehouse_name#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] +Input [2]: [w_warehouse_sk#9, w_warehouse_name#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_warehouse_sk#2] -Right keys [1]: [w_warehouse_sk#10] +Right keys [1]: [w_warehouse_sk#9] Join condition: None (18) Project [codegen id : 4] -Output [5]: [inv_quantity_on_hand#3, i_item_sk#8, w_warehouse_sk#10, w_warehouse_name#11, d_moy#7] -Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, d_moy#7, i_item_sk#8, w_warehouse_sk#10, w_warehouse_name#11] +Output [5]: [inv_quantity_on_hand#3, i_item_sk#8, w_warehouse_sk#9, w_warehouse_name#10, d_moy#7] +Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, d_moy#7, i_item_sk#8, w_warehouse_sk#9, w_warehouse_name#10] (19) HashAggregate [codegen id : 4] -Input [5]: [inv_quantity_on_hand#3, i_item_sk#8, w_warehouse_sk#10, w_warehouse_name#11, d_moy#7] -Keys [4]: [w_warehouse_name#11, w_warehouse_sk#10, i_item_sk#8, d_moy#7] +Input [5]: [inv_quantity_on_hand#3, i_item_sk#8, w_warehouse_sk#9, w_warehouse_name#10, d_moy#7] +Keys [4]: [w_warehouse_name#10, w_warehouse_sk#9, i_item_sk#8, d_moy#7] Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#3 as double)), partial_avg(inv_quantity_on_hand#3)] -Aggregate Attributes [5]: [n#13, avg#14, m2#15, sum#16, count#17] -Results [9]: [w_warehouse_name#11, w_warehouse_sk#10, i_item_sk#8, d_moy#7, n#18, avg#19, m2#20, sum#21, count#22] +Aggregate Attributes [5]: [n#11, avg#12, m2#13, sum#14, count#15] +Results [9]: [w_warehouse_name#10, w_warehouse_sk#9, i_item_sk#8, d_moy#7, n#16, avg#17, m2#18, sum#19, count#20] (20) Exchange -Input [9]: [w_warehouse_name#11, w_warehouse_sk#10, i_item_sk#8, d_moy#7, n#18, avg#19, m2#20, sum#21, count#22] -Arguments: hashpartitioning(w_warehouse_name#11, w_warehouse_sk#10, i_item_sk#8, d_moy#7, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [9]: [w_warehouse_name#10, w_warehouse_sk#9, i_item_sk#8, d_moy#7, n#16, avg#17, m2#18, sum#19, count#20] +Arguments: hashpartitioning(w_warehouse_name#10, w_warehouse_sk#9, i_item_sk#8, d_moy#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) HashAggregate [codegen id : 5] -Input [9]: [w_warehouse_name#11, w_warehouse_sk#10, i_item_sk#8, d_moy#7, n#18, avg#19, m2#20, sum#21, count#22] -Keys [4]: [w_warehouse_name#11, w_warehouse_sk#10, i_item_sk#8, d_moy#7] +Input [9]: [w_warehouse_name#10, w_warehouse_sk#9, i_item_sk#8, d_moy#7, n#16, avg#17, m2#18, sum#19, count#20] +Keys [4]: [w_warehouse_name#10, w_warehouse_sk#9, i_item_sk#8, d_moy#7] Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double)), avg(inv_quantity_on_hand#3)] -Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double))#24, avg(inv_quantity_on_hand#3)#25] -Results [5]: [w_warehouse_sk#10, i_item_sk#8, d_moy#7, stddev_samp(cast(inv_quantity_on_hand#3 as double))#24 AS stdev#26, avg(inv_quantity_on_hand#3)#25 AS mean#27] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double))#21, avg(inv_quantity_on_hand#3)#22] +Results [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, stddev_samp(cast(inv_quantity_on_hand#3 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#3)#22 AS mean#24] (22) Filter [codegen id : 5] -Input [5]: [w_warehouse_sk#10, i_item_sk#8, d_moy#7, stdev#26, mean#27] -Condition : ((isnotnull(stdev#26) AND isnotnull(mean#27)) AND (NOT coalesce((mean#27 = 0.0), false) AND ((stdev#26 / mean#27) > 1.0))) +Input [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, stdev#23, mean#24] +Condition : ((isnotnull(stdev#23) AND isnotnull(mean#24)) AND (NOT coalesce((mean#24 = 0.0), false) AND ((stdev#23 / mean#24) > 1.0))) (23) Project [codegen id : 5] -Output [5]: [w_warehouse_sk#10, i_item_sk#8, d_moy#7, mean#27, CASE WHEN (mean#27 = 0.0) THEN null ELSE (stdev#26 / mean#27) END AS cov#28] -Input [5]: [w_warehouse_sk#10, i_item_sk#8, d_moy#7, stdev#26, mean#27] +Output [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, mean#24, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#25] +Input [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, stdev#23, mean#24] (24) Exchange -Input [5]: [w_warehouse_sk#10, i_item_sk#8, d_moy#7, mean#27, cov#28] -Arguments: hashpartitioning(i_item_sk#8, w_warehouse_sk#10, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, mean#24, cov#25] +Arguments: hashpartitioning(i_item_sk#8, w_warehouse_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=4] (25) Sort [codegen id : 6] -Input [5]: [w_warehouse_sk#10, i_item_sk#8, d_moy#7, mean#27, cov#28] -Arguments: [i_item_sk#8 ASC NULLS FIRST, w_warehouse_sk#10 ASC NULLS FIRST], false, 0 +Input [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, mean#24, cov#25] +Arguments: [i_item_sk#8 ASC NULLS FIRST, w_warehouse_sk#9 ASC NULLS FIRST], false, 0 (26) Scan parquet default.inventory -Output [4]: [inv_item_sk#30, inv_warehouse_sk#31, inv_quantity_on_hand#32, inv_date_sk#33] +Output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(inv_date_sk#33), dynamicpruningexpression(inv_date_sk#33 IN dynamicpruning#34)] +PartitionFilters: [isnotnull(inv_date_sk#29), dynamicpruningexpression(inv_date_sk#29 IN dynamicpruning#30)] PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 10] -Input [4]: [inv_item_sk#30, inv_warehouse_sk#31, inv_quantity_on_hand#32, inv_date_sk#33] +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] (28) Filter [codegen id : 10] -Input [4]: [inv_item_sk#30, inv_warehouse_sk#31, inv_quantity_on_hand#32, inv_date_sk#33] -Condition : (isnotnull(inv_item_sk#30) AND isnotnull(inv_warehouse_sk#31)) +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Condition : (isnotnull(inv_item_sk#26) AND isnotnull(inv_warehouse_sk#27)) (29) ReusedExchange [Reuses operator id: 57] -Output [2]: [d_date_sk#35, d_moy#36] +Output [2]: [d_date_sk#31, d_moy#32] (30) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [inv_date_sk#33] -Right keys [1]: [d_date_sk#35] +Left keys [1]: [inv_date_sk#29] +Right keys [1]: [d_date_sk#31] Join condition: None (31) Project [codegen id : 10] -Output [4]: [inv_item_sk#30, inv_warehouse_sk#31, inv_quantity_on_hand#32, d_moy#36] -Input [6]: [inv_item_sk#30, inv_warehouse_sk#31, inv_quantity_on_hand#32, inv_date_sk#33, d_date_sk#35, d_moy#36] +Output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, d_moy#32] +Input [6]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, d_date_sk#31, d_moy#32] (32) ReusedExchange [Reuses operator id: 10] -Output [1]: [i_item_sk#37] +Output [1]: [i_item_sk#33] (33) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [inv_item_sk#30] -Right keys [1]: [i_item_sk#37] +Left keys [1]: [inv_item_sk#26] +Right keys [1]: [i_item_sk#33] Join condition: None (34) Project [codegen id : 10] -Output [4]: [inv_warehouse_sk#31, inv_quantity_on_hand#32, d_moy#36, i_item_sk#37] -Input [5]: [inv_item_sk#30, inv_warehouse_sk#31, inv_quantity_on_hand#32, d_moy#36, i_item_sk#37] +Output [4]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, d_moy#32, i_item_sk#33] +Input [5]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, d_moy#32, i_item_sk#33] (35) ReusedExchange [Reuses operator id: 16] -Output [2]: [w_warehouse_sk#38, w_warehouse_name#39] +Output [2]: [w_warehouse_sk#34, w_warehouse_name#35] (36) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [inv_warehouse_sk#31] -Right keys [1]: [w_warehouse_sk#38] +Left keys [1]: [inv_warehouse_sk#27] +Right keys [1]: [w_warehouse_sk#34] Join condition: None (37) Project [codegen id : 10] -Output [5]: [inv_quantity_on_hand#32, i_item_sk#37, w_warehouse_sk#38, w_warehouse_name#39, d_moy#36] -Input [6]: [inv_warehouse_sk#31, inv_quantity_on_hand#32, d_moy#36, i_item_sk#37, w_warehouse_sk#38, w_warehouse_name#39] +Output [5]: [inv_quantity_on_hand#28, i_item_sk#33, w_warehouse_sk#34, w_warehouse_name#35, d_moy#32] +Input [6]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, d_moy#32, i_item_sk#33, w_warehouse_sk#34, w_warehouse_name#35] (38) HashAggregate [codegen id : 10] -Input [5]: [inv_quantity_on_hand#32, i_item_sk#37, w_warehouse_sk#38, w_warehouse_name#39, d_moy#36] -Keys [4]: [w_warehouse_name#39, w_warehouse_sk#38, i_item_sk#37, d_moy#36] -Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#32 as double)), partial_avg(inv_quantity_on_hand#32)] -Aggregate Attributes [5]: [n#40, avg#41, m2#42, sum#43, count#44] -Results [9]: [w_warehouse_name#39, w_warehouse_sk#38, i_item_sk#37, d_moy#36, n#45, avg#46, m2#47, sum#48, count#49] +Input [5]: [inv_quantity_on_hand#28, i_item_sk#33, w_warehouse_sk#34, w_warehouse_name#35, d_moy#32] +Keys [4]: [w_warehouse_name#35, w_warehouse_sk#34, i_item_sk#33, d_moy#32] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#28 as double)), partial_avg(inv_quantity_on_hand#28)] +Aggregate Attributes [5]: [n#36, avg#37, m2#38, sum#39, count#40] +Results [9]: [w_warehouse_name#35, w_warehouse_sk#34, i_item_sk#33, d_moy#32, n#41, avg#42, m2#43, sum#44, count#45] (39) Exchange -Input [9]: [w_warehouse_name#39, w_warehouse_sk#38, i_item_sk#37, d_moy#36, n#45, avg#46, m2#47, sum#48, count#49] -Arguments: hashpartitioning(w_warehouse_name#39, w_warehouse_sk#38, i_item_sk#37, d_moy#36, 5), ENSURE_REQUIREMENTS, [id=#50] +Input [9]: [w_warehouse_name#35, w_warehouse_sk#34, i_item_sk#33, d_moy#32, n#41, avg#42, m2#43, sum#44, count#45] +Arguments: hashpartitioning(w_warehouse_name#35, w_warehouse_sk#34, i_item_sk#33, d_moy#32, 5), ENSURE_REQUIREMENTS, [plan_id=5] (40) HashAggregate [codegen id : 11] -Input [9]: [w_warehouse_name#39, w_warehouse_sk#38, i_item_sk#37, d_moy#36, n#45, avg#46, m2#47, sum#48, count#49] -Keys [4]: [w_warehouse_name#39, w_warehouse_sk#38, i_item_sk#37, d_moy#36] -Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#32 as double)), avg(inv_quantity_on_hand#32)] -Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#32 as double))#24, avg(inv_quantity_on_hand#32)#25] -Results [5]: [w_warehouse_sk#38, i_item_sk#37, d_moy#36, stddev_samp(cast(inv_quantity_on_hand#32 as double))#24 AS stdev#26, avg(inv_quantity_on_hand#32)#25 AS mean#27] +Input [9]: [w_warehouse_name#35, w_warehouse_sk#34, i_item_sk#33, d_moy#32, n#41, avg#42, m2#43, sum#44, count#45] +Keys [4]: [w_warehouse_name#35, w_warehouse_sk#34, i_item_sk#33, d_moy#32] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double)), avg(inv_quantity_on_hand#28)] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double))#21, avg(inv_quantity_on_hand#28)#22] +Results [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, stddev_samp(cast(inv_quantity_on_hand#28 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#28)#22 AS mean#24] (41) Filter [codegen id : 11] -Input [5]: [w_warehouse_sk#38, i_item_sk#37, d_moy#36, stdev#26, mean#27] -Condition : ((isnotnull(stdev#26) AND isnotnull(mean#27)) AND (NOT coalesce((mean#27 = 0.0), false) AND ((stdev#26 / mean#27) > 1.0))) +Input [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, stdev#23, mean#24] +Condition : ((isnotnull(stdev#23) AND isnotnull(mean#24)) AND (NOT coalesce((mean#24 = 0.0), false) AND ((stdev#23 / mean#24) > 1.0))) (42) Project [codegen id : 11] -Output [5]: [w_warehouse_sk#38, i_item_sk#37, d_moy#36, mean#27 AS mean#51, CASE WHEN (mean#27 = 0.0) THEN null ELSE (stdev#26 / mean#27) END AS cov#52] -Input [5]: [w_warehouse_sk#38, i_item_sk#37, d_moy#36, stdev#26, mean#27] +Output [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, mean#24 AS mean#46, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#47] +Input [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, stdev#23, mean#24] (43) Exchange -Input [5]: [w_warehouse_sk#38, i_item_sk#37, d_moy#36, mean#51, cov#52] -Arguments: hashpartitioning(i_item_sk#37, w_warehouse_sk#38, 5), ENSURE_REQUIREMENTS, [id=#53] +Input [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, mean#46, cov#47] +Arguments: hashpartitioning(i_item_sk#33, w_warehouse_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=6] (44) Sort [codegen id : 12] -Input [5]: [w_warehouse_sk#38, i_item_sk#37, d_moy#36, mean#51, cov#52] -Arguments: [i_item_sk#37 ASC NULLS FIRST, w_warehouse_sk#38 ASC NULLS FIRST], false, 0 +Input [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, mean#46, cov#47] +Arguments: [i_item_sk#33 ASC NULLS FIRST, w_warehouse_sk#34 ASC NULLS FIRST], false, 0 (45) SortMergeJoin [codegen id : 13] -Left keys [2]: [i_item_sk#8, w_warehouse_sk#10] -Right keys [2]: [i_item_sk#37, w_warehouse_sk#38] +Left keys [2]: [i_item_sk#8, w_warehouse_sk#9] +Right keys [2]: [i_item_sk#33, w_warehouse_sk#34] Join condition: None (46) Exchange -Input [10]: [w_warehouse_sk#10, i_item_sk#8, d_moy#7, mean#27, cov#28, w_warehouse_sk#38, i_item_sk#37, d_moy#36, mean#51, cov#52] -Arguments: rangepartitioning(w_warehouse_sk#10 ASC NULLS FIRST, i_item_sk#8 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, mean#27 ASC NULLS FIRST, cov#28 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, mean#51 ASC NULLS FIRST, cov#52 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#54] +Input [10]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, mean#24, cov#25, w_warehouse_sk#34, i_item_sk#33, d_moy#32, mean#46, cov#47] +Arguments: rangepartitioning(w_warehouse_sk#9 ASC NULLS FIRST, i_item_sk#8 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST, mean#46 ASC NULLS FIRST, cov#47 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=7] (47) Sort [codegen id : 14] -Input [10]: [w_warehouse_sk#10, i_item_sk#8, d_moy#7, mean#27, cov#28, w_warehouse_sk#38, i_item_sk#37, d_moy#36, mean#51, cov#52] -Arguments: [w_warehouse_sk#10 ASC NULLS FIRST, i_item_sk#8 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, mean#27 ASC NULLS FIRST, cov#28 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, mean#51 ASC NULLS FIRST, cov#52 ASC NULLS FIRST], true, 0 +Input [10]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, mean#24, cov#25, w_warehouse_sk#34, i_item_sk#33, d_moy#32, mean#46, cov#47] +Arguments: [w_warehouse_sk#9 ASC NULLS FIRST, i_item_sk#8 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST, mean#46 ASC NULLS FIRST, cov#47 ASC NULLS FIRST], true, 0 ===== Subqueries ===== @@ -272,28 +272,28 @@ BroadcastExchange (52) (48) Scan parquet default.date_dim -Output [3]: [d_date_sk#6, d_year#55, d_moy#7] +Output [3]: [d_date_sk#6, d_year#48, d_moy#7] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] ReadSchema: struct (49) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#55, d_moy#7] +Input [3]: [d_date_sk#6, d_year#48, d_moy#7] (50) Filter [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#55, d_moy#7] -Condition : ((((isnotnull(d_year#55) AND isnotnull(d_moy#7)) AND (d_year#55 = 2001)) AND (d_moy#7 = 1)) AND isnotnull(d_date_sk#6)) +Input [3]: [d_date_sk#6, d_year#48, d_moy#7] +Condition : ((((isnotnull(d_year#48) AND isnotnull(d_moy#7)) AND (d_year#48 = 2001)) AND (d_moy#7 = 1)) AND isnotnull(d_date_sk#6)) (51) Project [codegen id : 1] Output [2]: [d_date_sk#6, d_moy#7] -Input [3]: [d_date_sk#6, d_year#55, d_moy#7] +Input [3]: [d_date_sk#6, d_year#48, d_moy#7] (52) BroadcastExchange Input [2]: [d_date_sk#6, d_moy#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#56] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] -Subquery:2 Hosting operator id = 26 Hosting Expression = inv_date_sk#33 IN dynamicpruning#34 +Subquery:2 Hosting operator id = 26 Hosting Expression = inv_date_sk#29 IN dynamicpruning#30 BroadcastExchange (57) +- * Project (56) +- * Filter (55) @@ -302,25 +302,25 @@ BroadcastExchange (57) (53) Scan parquet default.date_dim -Output [3]: [d_date_sk#35, d_year#57, d_moy#36] +Output [3]: [d_date_sk#31, d_year#49, d_moy#32] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] ReadSchema: struct (54) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#35, d_year#57, d_moy#36] +Input [3]: [d_date_sk#31, d_year#49, d_moy#32] (55) Filter [codegen id : 1] -Input [3]: [d_date_sk#35, d_year#57, d_moy#36] -Condition : ((((isnotnull(d_year#57) AND isnotnull(d_moy#36)) AND (d_year#57 = 2001)) AND (d_moy#36 = 2)) AND isnotnull(d_date_sk#35)) +Input [3]: [d_date_sk#31, d_year#49, d_moy#32] +Condition : ((((isnotnull(d_year#49) AND isnotnull(d_moy#32)) AND (d_year#49 = 2001)) AND (d_moy#32 = 2)) AND isnotnull(d_date_sk#31)) (56) Project [codegen id : 1] -Output [2]: [d_date_sk#35, d_moy#36] -Input [3]: [d_date_sk#35, d_year#57, d_moy#36] +Output [2]: [d_date_sk#31, d_moy#32] +Input [3]: [d_date_sk#31, d_year#49, d_moy#32] (57) BroadcastExchange -Input [2]: [d_date_sk#35, d_moy#36] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#58] +Input [2]: [d_date_sk#31, d_moy#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt index 541a5903e362e..bbf9b5185e776 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt @@ -76,7 +76,7 @@ Condition : isnotnull(i_item_sk#6) (7) BroadcastExchange Input [1]: [i_item_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_item_sk#1] @@ -88,163 +88,163 @@ Output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_s Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6] (10) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#8, w_warehouse_name#9] +Output [2]: [w_warehouse_sk#7, w_warehouse_name#8] Batched: true Location [not included in comparison]/{warehouse_dir}/warehouse] PushedFilters: [IsNotNull(w_warehouse_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [2]: [w_warehouse_sk#8, w_warehouse_name#9] +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] (12) Filter [codegen id : 2] -Input [2]: [w_warehouse_sk#8, w_warehouse_name#9] -Condition : isnotnull(w_warehouse_sk#8) +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] +Condition : isnotnull(w_warehouse_sk#7) (13) BroadcastExchange -Input [2]: [w_warehouse_sk#8, w_warehouse_name#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_warehouse_sk#2] -Right keys [1]: [w_warehouse_sk#8] +Right keys [1]: [w_warehouse_sk#7] Join condition: None (15) Project [codegen id : 4] -Output [5]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6, w_warehouse_sk#8, w_warehouse_name#9] -Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6, w_warehouse_sk#8, w_warehouse_name#9] +Output [5]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6, w_warehouse_sk#7, w_warehouse_name#8] +Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6, w_warehouse_sk#7, w_warehouse_name#8] (16) ReusedExchange [Reuses operator id: 49] -Output [2]: [d_date_sk#11, d_moy#12] +Output [2]: [d_date_sk#9, d_moy#10] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_date_sk#4] -Right keys [1]: [d_date_sk#11] +Right keys [1]: [d_date_sk#9] Join condition: None (18) Project [codegen id : 4] -Output [5]: [inv_quantity_on_hand#3, i_item_sk#6, w_warehouse_sk#8, w_warehouse_name#9, d_moy#12] -Input [7]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6, w_warehouse_sk#8, w_warehouse_name#9, d_date_sk#11, d_moy#12] +Output [5]: [inv_quantity_on_hand#3, i_item_sk#6, w_warehouse_sk#7, w_warehouse_name#8, d_moy#10] +Input [7]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6, w_warehouse_sk#7, w_warehouse_name#8, d_date_sk#9, d_moy#10] (19) HashAggregate [codegen id : 4] -Input [5]: [inv_quantity_on_hand#3, i_item_sk#6, w_warehouse_sk#8, w_warehouse_name#9, d_moy#12] -Keys [4]: [w_warehouse_name#9, w_warehouse_sk#8, i_item_sk#6, d_moy#12] +Input [5]: [inv_quantity_on_hand#3, i_item_sk#6, w_warehouse_sk#7, w_warehouse_name#8, d_moy#10] +Keys [4]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#6, d_moy#10] Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#3 as double)), partial_avg(inv_quantity_on_hand#3)] -Aggregate Attributes [5]: [n#13, avg#14, m2#15, sum#16, count#17] -Results [9]: [w_warehouse_name#9, w_warehouse_sk#8, i_item_sk#6, d_moy#12, n#18, avg#19, m2#20, sum#21, count#22] +Aggregate Attributes [5]: [n#11, avg#12, m2#13, sum#14, count#15] +Results [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#6, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] (20) Exchange -Input [9]: [w_warehouse_name#9, w_warehouse_sk#8, i_item_sk#6, d_moy#12, n#18, avg#19, m2#20, sum#21, count#22] -Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sk#8, i_item_sk#6, d_moy#12, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#6, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] +Arguments: hashpartitioning(w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#6, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) HashAggregate [codegen id : 10] -Input [9]: [w_warehouse_name#9, w_warehouse_sk#8, i_item_sk#6, d_moy#12, n#18, avg#19, m2#20, sum#21, count#22] -Keys [4]: [w_warehouse_name#9, w_warehouse_sk#8, i_item_sk#6, d_moy#12] +Input [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#6, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] +Keys [4]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#6, d_moy#10] Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double)), avg(inv_quantity_on_hand#3)] -Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double))#24, avg(inv_quantity_on_hand#3)#25] -Results [5]: [w_warehouse_sk#8, i_item_sk#6, d_moy#12, stddev_samp(cast(inv_quantity_on_hand#3 as double))#24 AS stdev#26, avg(inv_quantity_on_hand#3)#25 AS mean#27] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double))#21, avg(inv_quantity_on_hand#3)#22] +Results [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, stddev_samp(cast(inv_quantity_on_hand#3 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#3)#22 AS mean#24] (22) Filter [codegen id : 10] -Input [5]: [w_warehouse_sk#8, i_item_sk#6, d_moy#12, stdev#26, mean#27] -Condition : ((isnotnull(stdev#26) AND isnotnull(mean#27)) AND (NOT coalesce((mean#27 = 0.0), false) AND ((stdev#26 / mean#27) > 1.0))) +Input [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, stdev#23, mean#24] +Condition : ((isnotnull(stdev#23) AND isnotnull(mean#24)) AND (NOT coalesce((mean#24 = 0.0), false) AND ((stdev#23 / mean#24) > 1.0))) (23) Project [codegen id : 10] -Output [5]: [w_warehouse_sk#8, i_item_sk#6, d_moy#12, mean#27, CASE WHEN (mean#27 = 0.0) THEN null ELSE (stdev#26 / mean#27) END AS cov#28] -Input [5]: [w_warehouse_sk#8, i_item_sk#6, d_moy#12, stdev#26, mean#27] +Output [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, mean#24, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#25] +Input [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, stdev#23, mean#24] (24) Scan parquet default.inventory -Output [4]: [inv_item_sk#29, inv_warehouse_sk#30, inv_quantity_on_hand#31, inv_date_sk#32] +Output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(inv_date_sk#32), dynamicpruningexpression(inv_date_sk#32 IN dynamicpruning#33)] +PartitionFilters: [isnotnull(inv_date_sk#29), dynamicpruningexpression(inv_date_sk#29 IN dynamicpruning#30)] PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 8] -Input [4]: [inv_item_sk#29, inv_warehouse_sk#30, inv_quantity_on_hand#31, inv_date_sk#32] +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] (26) Filter [codegen id : 8] -Input [4]: [inv_item_sk#29, inv_warehouse_sk#30, inv_quantity_on_hand#31, inv_date_sk#32] -Condition : (isnotnull(inv_item_sk#29) AND isnotnull(inv_warehouse_sk#30)) +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Condition : (isnotnull(inv_item_sk#26) AND isnotnull(inv_warehouse_sk#27)) (27) ReusedExchange [Reuses operator id: 7] -Output [1]: [i_item_sk#34] +Output [1]: [i_item_sk#31] (28) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [inv_item_sk#29] -Right keys [1]: [i_item_sk#34] +Left keys [1]: [inv_item_sk#26] +Right keys [1]: [i_item_sk#31] Join condition: None (29) Project [codegen id : 8] -Output [4]: [inv_warehouse_sk#30, inv_quantity_on_hand#31, inv_date_sk#32, i_item_sk#34] -Input [5]: [inv_item_sk#29, inv_warehouse_sk#30, inv_quantity_on_hand#31, inv_date_sk#32, i_item_sk#34] +Output [4]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#31] +Input [5]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#31] (30) ReusedExchange [Reuses operator id: 13] -Output [2]: [w_warehouse_sk#35, w_warehouse_name#36] +Output [2]: [w_warehouse_sk#32, w_warehouse_name#33] (31) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [inv_warehouse_sk#30] -Right keys [1]: [w_warehouse_sk#35] +Left keys [1]: [inv_warehouse_sk#27] +Right keys [1]: [w_warehouse_sk#32] Join condition: None (32) Project [codegen id : 8] -Output [5]: [inv_quantity_on_hand#31, inv_date_sk#32, i_item_sk#34, w_warehouse_sk#35, w_warehouse_name#36] -Input [6]: [inv_warehouse_sk#30, inv_quantity_on_hand#31, inv_date_sk#32, i_item_sk#34, w_warehouse_sk#35, w_warehouse_name#36] +Output [5]: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#31, w_warehouse_sk#32, w_warehouse_name#33] +Input [6]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#31, w_warehouse_sk#32, w_warehouse_name#33] (33) ReusedExchange [Reuses operator id: 54] -Output [2]: [d_date_sk#37, d_moy#38] +Output [2]: [d_date_sk#34, d_moy#35] (34) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [inv_date_sk#32] -Right keys [1]: [d_date_sk#37] +Left keys [1]: [inv_date_sk#29] +Right keys [1]: [d_date_sk#34] Join condition: None (35) Project [codegen id : 8] -Output [5]: [inv_quantity_on_hand#31, i_item_sk#34, w_warehouse_sk#35, w_warehouse_name#36, d_moy#38] -Input [7]: [inv_quantity_on_hand#31, inv_date_sk#32, i_item_sk#34, w_warehouse_sk#35, w_warehouse_name#36, d_date_sk#37, d_moy#38] +Output [5]: [inv_quantity_on_hand#28, i_item_sk#31, w_warehouse_sk#32, w_warehouse_name#33, d_moy#35] +Input [7]: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#31, w_warehouse_sk#32, w_warehouse_name#33, d_date_sk#34, d_moy#35] (36) HashAggregate [codegen id : 8] -Input [5]: [inv_quantity_on_hand#31, i_item_sk#34, w_warehouse_sk#35, w_warehouse_name#36, d_moy#38] -Keys [4]: [w_warehouse_name#36, w_warehouse_sk#35, i_item_sk#34, d_moy#38] -Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#31 as double)), partial_avg(inv_quantity_on_hand#31)] -Aggregate Attributes [5]: [n#39, avg#40, m2#41, sum#42, count#43] -Results [9]: [w_warehouse_name#36, w_warehouse_sk#35, i_item_sk#34, d_moy#38, n#44, avg#45, m2#46, sum#47, count#48] +Input [5]: [inv_quantity_on_hand#28, i_item_sk#31, w_warehouse_sk#32, w_warehouse_name#33, d_moy#35] +Keys [4]: [w_warehouse_name#33, w_warehouse_sk#32, i_item_sk#31, d_moy#35] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#28 as double)), partial_avg(inv_quantity_on_hand#28)] +Aggregate Attributes [5]: [n#36, avg#37, m2#38, sum#39, count#40] +Results [9]: [w_warehouse_name#33, w_warehouse_sk#32, i_item_sk#31, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] (37) Exchange -Input [9]: [w_warehouse_name#36, w_warehouse_sk#35, i_item_sk#34, d_moy#38, n#44, avg#45, m2#46, sum#47, count#48] -Arguments: hashpartitioning(w_warehouse_name#36, w_warehouse_sk#35, i_item_sk#34, d_moy#38, 5), ENSURE_REQUIREMENTS, [id=#49] +Input [9]: [w_warehouse_name#33, w_warehouse_sk#32, i_item_sk#31, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] +Arguments: hashpartitioning(w_warehouse_name#33, w_warehouse_sk#32, i_item_sk#31, d_moy#35, 5), ENSURE_REQUIREMENTS, [plan_id=4] (38) HashAggregate [codegen id : 9] -Input [9]: [w_warehouse_name#36, w_warehouse_sk#35, i_item_sk#34, d_moy#38, n#44, avg#45, m2#46, sum#47, count#48] -Keys [4]: [w_warehouse_name#36, w_warehouse_sk#35, i_item_sk#34, d_moy#38] -Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#31 as double)), avg(inv_quantity_on_hand#31)] -Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#31 as double))#24, avg(inv_quantity_on_hand#31)#25] -Results [5]: [w_warehouse_sk#35, i_item_sk#34, d_moy#38, stddev_samp(cast(inv_quantity_on_hand#31 as double))#24 AS stdev#26, avg(inv_quantity_on_hand#31)#25 AS mean#27] +Input [9]: [w_warehouse_name#33, w_warehouse_sk#32, i_item_sk#31, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] +Keys [4]: [w_warehouse_name#33, w_warehouse_sk#32, i_item_sk#31, d_moy#35] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double)), avg(inv_quantity_on_hand#28)] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double))#21, avg(inv_quantity_on_hand#28)#22] +Results [5]: [w_warehouse_sk#32, i_item_sk#31, d_moy#35, stddev_samp(cast(inv_quantity_on_hand#28 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#28)#22 AS mean#24] (39) Filter [codegen id : 9] -Input [5]: [w_warehouse_sk#35, i_item_sk#34, d_moy#38, stdev#26, mean#27] -Condition : ((isnotnull(stdev#26) AND isnotnull(mean#27)) AND (NOT coalesce((mean#27 = 0.0), false) AND ((stdev#26 / mean#27) > 1.0))) +Input [5]: [w_warehouse_sk#32, i_item_sk#31, d_moy#35, stdev#23, mean#24] +Condition : ((isnotnull(stdev#23) AND isnotnull(mean#24)) AND (NOT coalesce((mean#24 = 0.0), false) AND ((stdev#23 / mean#24) > 1.0))) (40) Project [codegen id : 9] -Output [5]: [w_warehouse_sk#35, i_item_sk#34, d_moy#38, mean#27 AS mean#50, CASE WHEN (mean#27 = 0.0) THEN null ELSE (stdev#26 / mean#27) END AS cov#51] -Input [5]: [w_warehouse_sk#35, i_item_sk#34, d_moy#38, stdev#26, mean#27] +Output [5]: [w_warehouse_sk#32, i_item_sk#31, d_moy#35, mean#24 AS mean#46, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#47] +Input [5]: [w_warehouse_sk#32, i_item_sk#31, d_moy#35, stdev#23, mean#24] (41) BroadcastExchange -Input [5]: [w_warehouse_sk#35, i_item_sk#34, d_moy#38, mean#50, cov#51] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [id=#52] +Input [5]: [w_warehouse_sk#32, i_item_sk#31, d_moy#35, mean#46, cov#47] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=5] (42) BroadcastHashJoin [codegen id : 10] -Left keys [2]: [i_item_sk#6, w_warehouse_sk#8] -Right keys [2]: [i_item_sk#34, w_warehouse_sk#35] +Left keys [2]: [i_item_sk#6, w_warehouse_sk#7] +Right keys [2]: [i_item_sk#31, w_warehouse_sk#32] Join condition: None (43) Exchange -Input [10]: [w_warehouse_sk#8, i_item_sk#6, d_moy#12, mean#27, cov#28, w_warehouse_sk#35, i_item_sk#34, d_moy#38, mean#50, cov#51] -Arguments: rangepartitioning(w_warehouse_sk#8 ASC NULLS FIRST, i_item_sk#6 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, mean#27 ASC NULLS FIRST, cov#28 ASC NULLS FIRST, d_moy#38 ASC NULLS FIRST, mean#50 ASC NULLS FIRST, cov#51 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#53] +Input [10]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, mean#24, cov#25, w_warehouse_sk#32, i_item_sk#31, d_moy#35, mean#46, cov#47] +Arguments: rangepartitioning(w_warehouse_sk#7 ASC NULLS FIRST, i_item_sk#6 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#46 ASC NULLS FIRST, cov#47 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=6] (44) Sort [codegen id : 11] -Input [10]: [w_warehouse_sk#8, i_item_sk#6, d_moy#12, mean#27, cov#28, w_warehouse_sk#35, i_item_sk#34, d_moy#38, mean#50, cov#51] -Arguments: [w_warehouse_sk#8 ASC NULLS FIRST, i_item_sk#6 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, mean#27 ASC NULLS FIRST, cov#28 ASC NULLS FIRST, d_moy#38 ASC NULLS FIRST, mean#50 ASC NULLS FIRST, cov#51 ASC NULLS FIRST], true, 0 +Input [10]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, mean#24, cov#25, w_warehouse_sk#32, i_item_sk#31, d_moy#35, mean#46, cov#47] +Arguments: [w_warehouse_sk#7 ASC NULLS FIRST, i_item_sk#6 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#46 ASC NULLS FIRST, cov#47 ASC NULLS FIRST], true, 0 ===== Subqueries ===== @@ -257,28 +257,28 @@ BroadcastExchange (49) (45) Scan parquet default.date_dim -Output [3]: [d_date_sk#11, d_year#54, d_moy#12] +Output [3]: [d_date_sk#9, d_year#48, d_moy#10] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] ReadSchema: struct (46) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#54, d_moy#12] +Input [3]: [d_date_sk#9, d_year#48, d_moy#10] (47) Filter [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#54, d_moy#12] -Condition : ((((isnotnull(d_year#54) AND isnotnull(d_moy#12)) AND (d_year#54 = 2001)) AND (d_moy#12 = 1)) AND isnotnull(d_date_sk#11)) +Input [3]: [d_date_sk#9, d_year#48, d_moy#10] +Condition : ((((isnotnull(d_year#48) AND isnotnull(d_moy#10)) AND (d_year#48 = 2001)) AND (d_moy#10 = 1)) AND isnotnull(d_date_sk#9)) (48) Project [codegen id : 1] -Output [2]: [d_date_sk#11, d_moy#12] -Input [3]: [d_date_sk#11, d_year#54, d_moy#12] +Output [2]: [d_date_sk#9, d_moy#10] +Input [3]: [d_date_sk#9, d_year#48, d_moy#10] (49) BroadcastExchange -Input [2]: [d_date_sk#11, d_moy#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#55] +Input [2]: [d_date_sk#9, d_moy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] -Subquery:2 Hosting operator id = 24 Hosting Expression = inv_date_sk#32 IN dynamicpruning#33 +Subquery:2 Hosting operator id = 24 Hosting Expression = inv_date_sk#29 IN dynamicpruning#30 BroadcastExchange (54) +- * Project (53) +- * Filter (52) @@ -287,25 +287,25 @@ BroadcastExchange (54) (50) Scan parquet default.date_dim -Output [3]: [d_date_sk#37, d_year#56, d_moy#38] +Output [3]: [d_date_sk#34, d_year#49, d_moy#35] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] ReadSchema: struct (51) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#37, d_year#56, d_moy#38] +Input [3]: [d_date_sk#34, d_year#49, d_moy#35] (52) Filter [codegen id : 1] -Input [3]: [d_date_sk#37, d_year#56, d_moy#38] -Condition : ((((isnotnull(d_year#56) AND isnotnull(d_moy#38)) AND (d_year#56 = 2001)) AND (d_moy#38 = 2)) AND isnotnull(d_date_sk#37)) +Input [3]: [d_date_sk#34, d_year#49, d_moy#35] +Condition : ((((isnotnull(d_year#49) AND isnotnull(d_moy#35)) AND (d_year#49 = 2001)) AND (d_moy#35 = 2)) AND isnotnull(d_date_sk#34)) (53) Project [codegen id : 1] -Output [2]: [d_date_sk#37, d_moy#38] -Input [3]: [d_date_sk#37, d_year#56, d_moy#38] +Output [2]: [d_date_sk#34, d_moy#35] +Input [3]: [d_date_sk#34, d_year#49, d_moy#35] (54) BroadcastExchange -Input [2]: [d_date_sk#37, d_moy#38] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#57] +Input [2]: [d_date_sk#34, d_moy#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/explain.txt index b774c9688384c..c9208985327a3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/explain.txt @@ -91,7 +91,7 @@ Condition : isnotnull(i_item_sk#8) (10) BroadcastExchange Input [1]: [i_item_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_item_sk#1] @@ -103,163 +103,163 @@ Output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, d_moy#7, i_item_sk#8] Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, d_moy#7, i_item_sk#8] (13) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#10, w_warehouse_name#11] +Output [2]: [w_warehouse_sk#9, w_warehouse_name#10] Batched: true Location [not included in comparison]/{warehouse_dir}/warehouse] PushedFilters: [IsNotNull(w_warehouse_sk)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [2]: [w_warehouse_sk#10, w_warehouse_name#11] +Input [2]: [w_warehouse_sk#9, w_warehouse_name#10] (15) Filter [codegen id : 3] -Input [2]: [w_warehouse_sk#10, w_warehouse_name#11] -Condition : isnotnull(w_warehouse_sk#10) +Input [2]: [w_warehouse_sk#9, w_warehouse_name#10] +Condition : isnotnull(w_warehouse_sk#9) (16) BroadcastExchange -Input [2]: [w_warehouse_sk#10, w_warehouse_name#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] +Input [2]: [w_warehouse_sk#9, w_warehouse_name#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_warehouse_sk#2] -Right keys [1]: [w_warehouse_sk#10] +Right keys [1]: [w_warehouse_sk#9] Join condition: None (18) Project [codegen id : 4] -Output [5]: [inv_quantity_on_hand#3, i_item_sk#8, w_warehouse_sk#10, w_warehouse_name#11, d_moy#7] -Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, d_moy#7, i_item_sk#8, w_warehouse_sk#10, w_warehouse_name#11] +Output [5]: [inv_quantity_on_hand#3, i_item_sk#8, w_warehouse_sk#9, w_warehouse_name#10, d_moy#7] +Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, d_moy#7, i_item_sk#8, w_warehouse_sk#9, w_warehouse_name#10] (19) HashAggregate [codegen id : 4] -Input [5]: [inv_quantity_on_hand#3, i_item_sk#8, w_warehouse_sk#10, w_warehouse_name#11, d_moy#7] -Keys [4]: [w_warehouse_name#11, w_warehouse_sk#10, i_item_sk#8, d_moy#7] +Input [5]: [inv_quantity_on_hand#3, i_item_sk#8, w_warehouse_sk#9, w_warehouse_name#10, d_moy#7] +Keys [4]: [w_warehouse_name#10, w_warehouse_sk#9, i_item_sk#8, d_moy#7] Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#3 as double)), partial_avg(inv_quantity_on_hand#3)] -Aggregate Attributes [5]: [n#13, avg#14, m2#15, sum#16, count#17] -Results [9]: [w_warehouse_name#11, w_warehouse_sk#10, i_item_sk#8, d_moy#7, n#18, avg#19, m2#20, sum#21, count#22] +Aggregate Attributes [5]: [n#11, avg#12, m2#13, sum#14, count#15] +Results [9]: [w_warehouse_name#10, w_warehouse_sk#9, i_item_sk#8, d_moy#7, n#16, avg#17, m2#18, sum#19, count#20] (20) Exchange -Input [9]: [w_warehouse_name#11, w_warehouse_sk#10, i_item_sk#8, d_moy#7, n#18, avg#19, m2#20, sum#21, count#22] -Arguments: hashpartitioning(w_warehouse_name#11, w_warehouse_sk#10, i_item_sk#8, d_moy#7, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [9]: [w_warehouse_name#10, w_warehouse_sk#9, i_item_sk#8, d_moy#7, n#16, avg#17, m2#18, sum#19, count#20] +Arguments: hashpartitioning(w_warehouse_name#10, w_warehouse_sk#9, i_item_sk#8, d_moy#7, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) HashAggregate [codegen id : 5] -Input [9]: [w_warehouse_name#11, w_warehouse_sk#10, i_item_sk#8, d_moy#7, n#18, avg#19, m2#20, sum#21, count#22] -Keys [4]: [w_warehouse_name#11, w_warehouse_sk#10, i_item_sk#8, d_moy#7] +Input [9]: [w_warehouse_name#10, w_warehouse_sk#9, i_item_sk#8, d_moy#7, n#16, avg#17, m2#18, sum#19, count#20] +Keys [4]: [w_warehouse_name#10, w_warehouse_sk#9, i_item_sk#8, d_moy#7] Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double)), avg(inv_quantity_on_hand#3)] -Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double))#24, avg(inv_quantity_on_hand#3)#25] -Results [5]: [w_warehouse_sk#10, i_item_sk#8, d_moy#7, stddev_samp(cast(inv_quantity_on_hand#3 as double))#24 AS stdev#26, avg(inv_quantity_on_hand#3)#25 AS mean#27] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double))#21, avg(inv_quantity_on_hand#3)#22] +Results [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, stddev_samp(cast(inv_quantity_on_hand#3 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#3)#22 AS mean#24] (22) Filter [codegen id : 5] -Input [5]: [w_warehouse_sk#10, i_item_sk#8, d_moy#7, stdev#26, mean#27] -Condition : ((isnotnull(stdev#26) AND isnotnull(mean#27)) AND ((NOT coalesce((mean#27 = 0.0), false) AND ((stdev#26 / mean#27) > 1.0)) AND ((stdev#26 / mean#27) > 1.5))) +Input [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, stdev#23, mean#24] +Condition : ((isnotnull(stdev#23) AND isnotnull(mean#24)) AND ((NOT coalesce((mean#24 = 0.0), false) AND ((stdev#23 / mean#24) > 1.0)) AND ((stdev#23 / mean#24) > 1.5))) (23) Project [codegen id : 5] -Output [5]: [w_warehouse_sk#10, i_item_sk#8, d_moy#7, mean#27, CASE WHEN (mean#27 = 0.0) THEN null ELSE (stdev#26 / mean#27) END AS cov#28] -Input [5]: [w_warehouse_sk#10, i_item_sk#8, d_moy#7, stdev#26, mean#27] +Output [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, mean#24, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#25] +Input [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, stdev#23, mean#24] (24) Exchange -Input [5]: [w_warehouse_sk#10, i_item_sk#8, d_moy#7, mean#27, cov#28] -Arguments: hashpartitioning(i_item_sk#8, w_warehouse_sk#10, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, mean#24, cov#25] +Arguments: hashpartitioning(i_item_sk#8, w_warehouse_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=4] (25) Sort [codegen id : 6] -Input [5]: [w_warehouse_sk#10, i_item_sk#8, d_moy#7, mean#27, cov#28] -Arguments: [i_item_sk#8 ASC NULLS FIRST, w_warehouse_sk#10 ASC NULLS FIRST], false, 0 +Input [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, mean#24, cov#25] +Arguments: [i_item_sk#8 ASC NULLS FIRST, w_warehouse_sk#9 ASC NULLS FIRST], false, 0 (26) Scan parquet default.inventory -Output [4]: [inv_item_sk#30, inv_warehouse_sk#31, inv_quantity_on_hand#32, inv_date_sk#33] +Output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(inv_date_sk#33), dynamicpruningexpression(inv_date_sk#33 IN dynamicpruning#34)] +PartitionFilters: [isnotnull(inv_date_sk#29), dynamicpruningexpression(inv_date_sk#29 IN dynamicpruning#30)] PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 10] -Input [4]: [inv_item_sk#30, inv_warehouse_sk#31, inv_quantity_on_hand#32, inv_date_sk#33] +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] (28) Filter [codegen id : 10] -Input [4]: [inv_item_sk#30, inv_warehouse_sk#31, inv_quantity_on_hand#32, inv_date_sk#33] -Condition : (isnotnull(inv_item_sk#30) AND isnotnull(inv_warehouse_sk#31)) +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Condition : (isnotnull(inv_item_sk#26) AND isnotnull(inv_warehouse_sk#27)) (29) ReusedExchange [Reuses operator id: 57] -Output [2]: [d_date_sk#35, d_moy#36] +Output [2]: [d_date_sk#31, d_moy#32] (30) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [inv_date_sk#33] -Right keys [1]: [d_date_sk#35] +Left keys [1]: [inv_date_sk#29] +Right keys [1]: [d_date_sk#31] Join condition: None (31) Project [codegen id : 10] -Output [4]: [inv_item_sk#30, inv_warehouse_sk#31, inv_quantity_on_hand#32, d_moy#36] -Input [6]: [inv_item_sk#30, inv_warehouse_sk#31, inv_quantity_on_hand#32, inv_date_sk#33, d_date_sk#35, d_moy#36] +Output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, d_moy#32] +Input [6]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, d_date_sk#31, d_moy#32] (32) ReusedExchange [Reuses operator id: 10] -Output [1]: [i_item_sk#37] +Output [1]: [i_item_sk#33] (33) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [inv_item_sk#30] -Right keys [1]: [i_item_sk#37] +Left keys [1]: [inv_item_sk#26] +Right keys [1]: [i_item_sk#33] Join condition: None (34) Project [codegen id : 10] -Output [4]: [inv_warehouse_sk#31, inv_quantity_on_hand#32, d_moy#36, i_item_sk#37] -Input [5]: [inv_item_sk#30, inv_warehouse_sk#31, inv_quantity_on_hand#32, d_moy#36, i_item_sk#37] +Output [4]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, d_moy#32, i_item_sk#33] +Input [5]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, d_moy#32, i_item_sk#33] (35) ReusedExchange [Reuses operator id: 16] -Output [2]: [w_warehouse_sk#38, w_warehouse_name#39] +Output [2]: [w_warehouse_sk#34, w_warehouse_name#35] (36) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [inv_warehouse_sk#31] -Right keys [1]: [w_warehouse_sk#38] +Left keys [1]: [inv_warehouse_sk#27] +Right keys [1]: [w_warehouse_sk#34] Join condition: None (37) Project [codegen id : 10] -Output [5]: [inv_quantity_on_hand#32, i_item_sk#37, w_warehouse_sk#38, w_warehouse_name#39, d_moy#36] -Input [6]: [inv_warehouse_sk#31, inv_quantity_on_hand#32, d_moy#36, i_item_sk#37, w_warehouse_sk#38, w_warehouse_name#39] +Output [5]: [inv_quantity_on_hand#28, i_item_sk#33, w_warehouse_sk#34, w_warehouse_name#35, d_moy#32] +Input [6]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, d_moy#32, i_item_sk#33, w_warehouse_sk#34, w_warehouse_name#35] (38) HashAggregate [codegen id : 10] -Input [5]: [inv_quantity_on_hand#32, i_item_sk#37, w_warehouse_sk#38, w_warehouse_name#39, d_moy#36] -Keys [4]: [w_warehouse_name#39, w_warehouse_sk#38, i_item_sk#37, d_moy#36] -Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#32 as double)), partial_avg(inv_quantity_on_hand#32)] -Aggregate Attributes [5]: [n#40, avg#41, m2#42, sum#43, count#44] -Results [9]: [w_warehouse_name#39, w_warehouse_sk#38, i_item_sk#37, d_moy#36, n#45, avg#46, m2#47, sum#48, count#49] +Input [5]: [inv_quantity_on_hand#28, i_item_sk#33, w_warehouse_sk#34, w_warehouse_name#35, d_moy#32] +Keys [4]: [w_warehouse_name#35, w_warehouse_sk#34, i_item_sk#33, d_moy#32] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#28 as double)), partial_avg(inv_quantity_on_hand#28)] +Aggregate Attributes [5]: [n#36, avg#37, m2#38, sum#39, count#40] +Results [9]: [w_warehouse_name#35, w_warehouse_sk#34, i_item_sk#33, d_moy#32, n#41, avg#42, m2#43, sum#44, count#45] (39) Exchange -Input [9]: [w_warehouse_name#39, w_warehouse_sk#38, i_item_sk#37, d_moy#36, n#45, avg#46, m2#47, sum#48, count#49] -Arguments: hashpartitioning(w_warehouse_name#39, w_warehouse_sk#38, i_item_sk#37, d_moy#36, 5), ENSURE_REQUIREMENTS, [id=#50] +Input [9]: [w_warehouse_name#35, w_warehouse_sk#34, i_item_sk#33, d_moy#32, n#41, avg#42, m2#43, sum#44, count#45] +Arguments: hashpartitioning(w_warehouse_name#35, w_warehouse_sk#34, i_item_sk#33, d_moy#32, 5), ENSURE_REQUIREMENTS, [plan_id=5] (40) HashAggregate [codegen id : 11] -Input [9]: [w_warehouse_name#39, w_warehouse_sk#38, i_item_sk#37, d_moy#36, n#45, avg#46, m2#47, sum#48, count#49] -Keys [4]: [w_warehouse_name#39, w_warehouse_sk#38, i_item_sk#37, d_moy#36] -Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#32 as double)), avg(inv_quantity_on_hand#32)] -Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#32 as double))#24, avg(inv_quantity_on_hand#32)#25] -Results [5]: [w_warehouse_sk#38, i_item_sk#37, d_moy#36, stddev_samp(cast(inv_quantity_on_hand#32 as double))#24 AS stdev#26, avg(inv_quantity_on_hand#32)#25 AS mean#27] +Input [9]: [w_warehouse_name#35, w_warehouse_sk#34, i_item_sk#33, d_moy#32, n#41, avg#42, m2#43, sum#44, count#45] +Keys [4]: [w_warehouse_name#35, w_warehouse_sk#34, i_item_sk#33, d_moy#32] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double)), avg(inv_quantity_on_hand#28)] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double))#21, avg(inv_quantity_on_hand#28)#22] +Results [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, stddev_samp(cast(inv_quantity_on_hand#28 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#28)#22 AS mean#24] (41) Filter [codegen id : 11] -Input [5]: [w_warehouse_sk#38, i_item_sk#37, d_moy#36, stdev#26, mean#27] -Condition : ((isnotnull(stdev#26) AND isnotnull(mean#27)) AND (NOT coalesce((mean#27 = 0.0), false) AND ((stdev#26 / mean#27) > 1.0))) +Input [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, stdev#23, mean#24] +Condition : ((isnotnull(stdev#23) AND isnotnull(mean#24)) AND (NOT coalesce((mean#24 = 0.0), false) AND ((stdev#23 / mean#24) > 1.0))) (42) Project [codegen id : 11] -Output [5]: [w_warehouse_sk#38, i_item_sk#37, d_moy#36, mean#27 AS mean#51, CASE WHEN (mean#27 = 0.0) THEN null ELSE (stdev#26 / mean#27) END AS cov#52] -Input [5]: [w_warehouse_sk#38, i_item_sk#37, d_moy#36, stdev#26, mean#27] +Output [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, mean#24 AS mean#46, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#47] +Input [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, stdev#23, mean#24] (43) Exchange -Input [5]: [w_warehouse_sk#38, i_item_sk#37, d_moy#36, mean#51, cov#52] -Arguments: hashpartitioning(i_item_sk#37, w_warehouse_sk#38, 5), ENSURE_REQUIREMENTS, [id=#53] +Input [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, mean#46, cov#47] +Arguments: hashpartitioning(i_item_sk#33, w_warehouse_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=6] (44) Sort [codegen id : 12] -Input [5]: [w_warehouse_sk#38, i_item_sk#37, d_moy#36, mean#51, cov#52] -Arguments: [i_item_sk#37 ASC NULLS FIRST, w_warehouse_sk#38 ASC NULLS FIRST], false, 0 +Input [5]: [w_warehouse_sk#34, i_item_sk#33, d_moy#32, mean#46, cov#47] +Arguments: [i_item_sk#33 ASC NULLS FIRST, w_warehouse_sk#34 ASC NULLS FIRST], false, 0 (45) SortMergeJoin [codegen id : 13] -Left keys [2]: [i_item_sk#8, w_warehouse_sk#10] -Right keys [2]: [i_item_sk#37, w_warehouse_sk#38] +Left keys [2]: [i_item_sk#8, w_warehouse_sk#9] +Right keys [2]: [i_item_sk#33, w_warehouse_sk#34] Join condition: None (46) Exchange -Input [10]: [w_warehouse_sk#10, i_item_sk#8, d_moy#7, mean#27, cov#28, w_warehouse_sk#38, i_item_sk#37, d_moy#36, mean#51, cov#52] -Arguments: rangepartitioning(w_warehouse_sk#10 ASC NULLS FIRST, i_item_sk#8 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, mean#27 ASC NULLS FIRST, cov#28 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, mean#51 ASC NULLS FIRST, cov#52 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#54] +Input [10]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, mean#24, cov#25, w_warehouse_sk#34, i_item_sk#33, d_moy#32, mean#46, cov#47] +Arguments: rangepartitioning(w_warehouse_sk#9 ASC NULLS FIRST, i_item_sk#8 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST, mean#46 ASC NULLS FIRST, cov#47 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=7] (47) Sort [codegen id : 14] -Input [10]: [w_warehouse_sk#10, i_item_sk#8, d_moy#7, mean#27, cov#28, w_warehouse_sk#38, i_item_sk#37, d_moy#36, mean#51, cov#52] -Arguments: [w_warehouse_sk#10 ASC NULLS FIRST, i_item_sk#8 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, mean#27 ASC NULLS FIRST, cov#28 ASC NULLS FIRST, d_moy#36 ASC NULLS FIRST, mean#51 ASC NULLS FIRST, cov#52 ASC NULLS FIRST], true, 0 +Input [10]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, mean#24, cov#25, w_warehouse_sk#34, i_item_sk#33, d_moy#32, mean#46, cov#47] +Arguments: [w_warehouse_sk#9 ASC NULLS FIRST, i_item_sk#8 ASC NULLS FIRST, d_moy#7 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST, mean#46 ASC NULLS FIRST, cov#47 ASC NULLS FIRST], true, 0 ===== Subqueries ===== @@ -272,28 +272,28 @@ BroadcastExchange (52) (48) Scan parquet default.date_dim -Output [3]: [d_date_sk#6, d_year#55, d_moy#7] +Output [3]: [d_date_sk#6, d_year#48, d_moy#7] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] ReadSchema: struct (49) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#55, d_moy#7] +Input [3]: [d_date_sk#6, d_year#48, d_moy#7] (50) Filter [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#55, d_moy#7] -Condition : ((((isnotnull(d_year#55) AND isnotnull(d_moy#7)) AND (d_year#55 = 2001)) AND (d_moy#7 = 1)) AND isnotnull(d_date_sk#6)) +Input [3]: [d_date_sk#6, d_year#48, d_moy#7] +Condition : ((((isnotnull(d_year#48) AND isnotnull(d_moy#7)) AND (d_year#48 = 2001)) AND (d_moy#7 = 1)) AND isnotnull(d_date_sk#6)) (51) Project [codegen id : 1] Output [2]: [d_date_sk#6, d_moy#7] -Input [3]: [d_date_sk#6, d_year#55, d_moy#7] +Input [3]: [d_date_sk#6, d_year#48, d_moy#7] (52) BroadcastExchange Input [2]: [d_date_sk#6, d_moy#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#56] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] -Subquery:2 Hosting operator id = 26 Hosting Expression = inv_date_sk#33 IN dynamicpruning#34 +Subquery:2 Hosting operator id = 26 Hosting Expression = inv_date_sk#29 IN dynamicpruning#30 BroadcastExchange (57) +- * Project (56) +- * Filter (55) @@ -302,25 +302,25 @@ BroadcastExchange (57) (53) Scan parquet default.date_dim -Output [3]: [d_date_sk#35, d_year#57, d_moy#36] +Output [3]: [d_date_sk#31, d_year#49, d_moy#32] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] ReadSchema: struct (54) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#35, d_year#57, d_moy#36] +Input [3]: [d_date_sk#31, d_year#49, d_moy#32] (55) Filter [codegen id : 1] -Input [3]: [d_date_sk#35, d_year#57, d_moy#36] -Condition : ((((isnotnull(d_year#57) AND isnotnull(d_moy#36)) AND (d_year#57 = 2001)) AND (d_moy#36 = 2)) AND isnotnull(d_date_sk#35)) +Input [3]: [d_date_sk#31, d_year#49, d_moy#32] +Condition : ((((isnotnull(d_year#49) AND isnotnull(d_moy#32)) AND (d_year#49 = 2001)) AND (d_moy#32 = 2)) AND isnotnull(d_date_sk#31)) (56) Project [codegen id : 1] -Output [2]: [d_date_sk#35, d_moy#36] -Input [3]: [d_date_sk#35, d_year#57, d_moy#36] +Output [2]: [d_date_sk#31, d_moy#32] +Input [3]: [d_date_sk#31, d_year#49, d_moy#32] (57) BroadcastExchange -Input [2]: [d_date_sk#35, d_moy#36] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#58] +Input [2]: [d_date_sk#31, d_moy#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt index 4252efd8f6932..3a8329c86102e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt @@ -76,7 +76,7 @@ Condition : isnotnull(i_item_sk#6) (7) BroadcastExchange Input [1]: [i_item_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_item_sk#1] @@ -88,163 +88,163 @@ Output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_s Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6] (10) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#8, w_warehouse_name#9] +Output [2]: [w_warehouse_sk#7, w_warehouse_name#8] Batched: true Location [not included in comparison]/{warehouse_dir}/warehouse] PushedFilters: [IsNotNull(w_warehouse_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [2]: [w_warehouse_sk#8, w_warehouse_name#9] +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] (12) Filter [codegen id : 2] -Input [2]: [w_warehouse_sk#8, w_warehouse_name#9] -Condition : isnotnull(w_warehouse_sk#8) +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] +Condition : isnotnull(w_warehouse_sk#7) (13) BroadcastExchange -Input [2]: [w_warehouse_sk#8, w_warehouse_name#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] +Input [2]: [w_warehouse_sk#7, w_warehouse_name#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_warehouse_sk#2] -Right keys [1]: [w_warehouse_sk#8] +Right keys [1]: [w_warehouse_sk#7] Join condition: None (15) Project [codegen id : 4] -Output [5]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6, w_warehouse_sk#8, w_warehouse_name#9] -Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6, w_warehouse_sk#8, w_warehouse_name#9] +Output [5]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6, w_warehouse_sk#7, w_warehouse_name#8] +Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6, w_warehouse_sk#7, w_warehouse_name#8] (16) ReusedExchange [Reuses operator id: 49] -Output [2]: [d_date_sk#11, d_moy#12] +Output [2]: [d_date_sk#9, d_moy#10] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_date_sk#4] -Right keys [1]: [d_date_sk#11] +Right keys [1]: [d_date_sk#9] Join condition: None (18) Project [codegen id : 4] -Output [5]: [inv_quantity_on_hand#3, i_item_sk#6, w_warehouse_sk#8, w_warehouse_name#9, d_moy#12] -Input [7]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6, w_warehouse_sk#8, w_warehouse_name#9, d_date_sk#11, d_moy#12] +Output [5]: [inv_quantity_on_hand#3, i_item_sk#6, w_warehouse_sk#7, w_warehouse_name#8, d_moy#10] +Input [7]: [inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6, w_warehouse_sk#7, w_warehouse_name#8, d_date_sk#9, d_moy#10] (19) HashAggregate [codegen id : 4] -Input [5]: [inv_quantity_on_hand#3, i_item_sk#6, w_warehouse_sk#8, w_warehouse_name#9, d_moy#12] -Keys [4]: [w_warehouse_name#9, w_warehouse_sk#8, i_item_sk#6, d_moy#12] +Input [5]: [inv_quantity_on_hand#3, i_item_sk#6, w_warehouse_sk#7, w_warehouse_name#8, d_moy#10] +Keys [4]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#6, d_moy#10] Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#3 as double)), partial_avg(inv_quantity_on_hand#3)] -Aggregate Attributes [5]: [n#13, avg#14, m2#15, sum#16, count#17] -Results [9]: [w_warehouse_name#9, w_warehouse_sk#8, i_item_sk#6, d_moy#12, n#18, avg#19, m2#20, sum#21, count#22] +Aggregate Attributes [5]: [n#11, avg#12, m2#13, sum#14, count#15] +Results [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#6, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] (20) Exchange -Input [9]: [w_warehouse_name#9, w_warehouse_sk#8, i_item_sk#6, d_moy#12, n#18, avg#19, m2#20, sum#21, count#22] -Arguments: hashpartitioning(w_warehouse_name#9, w_warehouse_sk#8, i_item_sk#6, d_moy#12, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#6, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] +Arguments: hashpartitioning(w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#6, d_moy#10, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) HashAggregate [codegen id : 10] -Input [9]: [w_warehouse_name#9, w_warehouse_sk#8, i_item_sk#6, d_moy#12, n#18, avg#19, m2#20, sum#21, count#22] -Keys [4]: [w_warehouse_name#9, w_warehouse_sk#8, i_item_sk#6, d_moy#12] +Input [9]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#6, d_moy#10, n#16, avg#17, m2#18, sum#19, count#20] +Keys [4]: [w_warehouse_name#8, w_warehouse_sk#7, i_item_sk#6, d_moy#10] Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double)), avg(inv_quantity_on_hand#3)] -Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double))#24, avg(inv_quantity_on_hand#3)#25] -Results [5]: [w_warehouse_sk#8, i_item_sk#6, d_moy#12, stddev_samp(cast(inv_quantity_on_hand#3 as double))#24 AS stdev#26, avg(inv_quantity_on_hand#3)#25 AS mean#27] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#3 as double))#21, avg(inv_quantity_on_hand#3)#22] +Results [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, stddev_samp(cast(inv_quantity_on_hand#3 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#3)#22 AS mean#24] (22) Filter [codegen id : 10] -Input [5]: [w_warehouse_sk#8, i_item_sk#6, d_moy#12, stdev#26, mean#27] -Condition : ((isnotnull(stdev#26) AND isnotnull(mean#27)) AND ((NOT coalesce((mean#27 = 0.0), false) AND ((stdev#26 / mean#27) > 1.0)) AND ((stdev#26 / mean#27) > 1.5))) +Input [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, stdev#23, mean#24] +Condition : ((isnotnull(stdev#23) AND isnotnull(mean#24)) AND ((NOT coalesce((mean#24 = 0.0), false) AND ((stdev#23 / mean#24) > 1.0)) AND ((stdev#23 / mean#24) > 1.5))) (23) Project [codegen id : 10] -Output [5]: [w_warehouse_sk#8, i_item_sk#6, d_moy#12, mean#27, CASE WHEN (mean#27 = 0.0) THEN null ELSE (stdev#26 / mean#27) END AS cov#28] -Input [5]: [w_warehouse_sk#8, i_item_sk#6, d_moy#12, stdev#26, mean#27] +Output [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, mean#24, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#25] +Input [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, stdev#23, mean#24] (24) Scan parquet default.inventory -Output [4]: [inv_item_sk#29, inv_warehouse_sk#30, inv_quantity_on_hand#31, inv_date_sk#32] +Output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(inv_date_sk#32), dynamicpruningexpression(inv_date_sk#32 IN dynamicpruning#33)] +PartitionFilters: [isnotnull(inv_date_sk#29), dynamicpruningexpression(inv_date_sk#29 IN dynamicpruning#30)] PushedFilters: [IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 8] -Input [4]: [inv_item_sk#29, inv_warehouse_sk#30, inv_quantity_on_hand#31, inv_date_sk#32] +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] (26) Filter [codegen id : 8] -Input [4]: [inv_item_sk#29, inv_warehouse_sk#30, inv_quantity_on_hand#31, inv_date_sk#32] -Condition : (isnotnull(inv_item_sk#29) AND isnotnull(inv_warehouse_sk#30)) +Input [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29] +Condition : (isnotnull(inv_item_sk#26) AND isnotnull(inv_warehouse_sk#27)) (27) ReusedExchange [Reuses operator id: 7] -Output [1]: [i_item_sk#34] +Output [1]: [i_item_sk#31] (28) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [inv_item_sk#29] -Right keys [1]: [i_item_sk#34] +Left keys [1]: [inv_item_sk#26] +Right keys [1]: [i_item_sk#31] Join condition: None (29) Project [codegen id : 8] -Output [4]: [inv_warehouse_sk#30, inv_quantity_on_hand#31, inv_date_sk#32, i_item_sk#34] -Input [5]: [inv_item_sk#29, inv_warehouse_sk#30, inv_quantity_on_hand#31, inv_date_sk#32, i_item_sk#34] +Output [4]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#31] +Input [5]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#31] (30) ReusedExchange [Reuses operator id: 13] -Output [2]: [w_warehouse_sk#35, w_warehouse_name#36] +Output [2]: [w_warehouse_sk#32, w_warehouse_name#33] (31) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [inv_warehouse_sk#30] -Right keys [1]: [w_warehouse_sk#35] +Left keys [1]: [inv_warehouse_sk#27] +Right keys [1]: [w_warehouse_sk#32] Join condition: None (32) Project [codegen id : 8] -Output [5]: [inv_quantity_on_hand#31, inv_date_sk#32, i_item_sk#34, w_warehouse_sk#35, w_warehouse_name#36] -Input [6]: [inv_warehouse_sk#30, inv_quantity_on_hand#31, inv_date_sk#32, i_item_sk#34, w_warehouse_sk#35, w_warehouse_name#36] +Output [5]: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#31, w_warehouse_sk#32, w_warehouse_name#33] +Input [6]: [inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#31, w_warehouse_sk#32, w_warehouse_name#33] (33) ReusedExchange [Reuses operator id: 54] -Output [2]: [d_date_sk#37, d_moy#38] +Output [2]: [d_date_sk#34, d_moy#35] (34) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [inv_date_sk#32] -Right keys [1]: [d_date_sk#37] +Left keys [1]: [inv_date_sk#29] +Right keys [1]: [d_date_sk#34] Join condition: None (35) Project [codegen id : 8] -Output [5]: [inv_quantity_on_hand#31, i_item_sk#34, w_warehouse_sk#35, w_warehouse_name#36, d_moy#38] -Input [7]: [inv_quantity_on_hand#31, inv_date_sk#32, i_item_sk#34, w_warehouse_sk#35, w_warehouse_name#36, d_date_sk#37, d_moy#38] +Output [5]: [inv_quantity_on_hand#28, i_item_sk#31, w_warehouse_sk#32, w_warehouse_name#33, d_moy#35] +Input [7]: [inv_quantity_on_hand#28, inv_date_sk#29, i_item_sk#31, w_warehouse_sk#32, w_warehouse_name#33, d_date_sk#34, d_moy#35] (36) HashAggregate [codegen id : 8] -Input [5]: [inv_quantity_on_hand#31, i_item_sk#34, w_warehouse_sk#35, w_warehouse_name#36, d_moy#38] -Keys [4]: [w_warehouse_name#36, w_warehouse_sk#35, i_item_sk#34, d_moy#38] -Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#31 as double)), partial_avg(inv_quantity_on_hand#31)] -Aggregate Attributes [5]: [n#39, avg#40, m2#41, sum#42, count#43] -Results [9]: [w_warehouse_name#36, w_warehouse_sk#35, i_item_sk#34, d_moy#38, n#44, avg#45, m2#46, sum#47, count#48] +Input [5]: [inv_quantity_on_hand#28, i_item_sk#31, w_warehouse_sk#32, w_warehouse_name#33, d_moy#35] +Keys [4]: [w_warehouse_name#33, w_warehouse_sk#32, i_item_sk#31, d_moy#35] +Functions [2]: [partial_stddev_samp(cast(inv_quantity_on_hand#28 as double)), partial_avg(inv_quantity_on_hand#28)] +Aggregate Attributes [5]: [n#36, avg#37, m2#38, sum#39, count#40] +Results [9]: [w_warehouse_name#33, w_warehouse_sk#32, i_item_sk#31, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] (37) Exchange -Input [9]: [w_warehouse_name#36, w_warehouse_sk#35, i_item_sk#34, d_moy#38, n#44, avg#45, m2#46, sum#47, count#48] -Arguments: hashpartitioning(w_warehouse_name#36, w_warehouse_sk#35, i_item_sk#34, d_moy#38, 5), ENSURE_REQUIREMENTS, [id=#49] +Input [9]: [w_warehouse_name#33, w_warehouse_sk#32, i_item_sk#31, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] +Arguments: hashpartitioning(w_warehouse_name#33, w_warehouse_sk#32, i_item_sk#31, d_moy#35, 5), ENSURE_REQUIREMENTS, [plan_id=4] (38) HashAggregate [codegen id : 9] -Input [9]: [w_warehouse_name#36, w_warehouse_sk#35, i_item_sk#34, d_moy#38, n#44, avg#45, m2#46, sum#47, count#48] -Keys [4]: [w_warehouse_name#36, w_warehouse_sk#35, i_item_sk#34, d_moy#38] -Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#31 as double)), avg(inv_quantity_on_hand#31)] -Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#31 as double))#24, avg(inv_quantity_on_hand#31)#25] -Results [5]: [w_warehouse_sk#35, i_item_sk#34, d_moy#38, stddev_samp(cast(inv_quantity_on_hand#31 as double))#24 AS stdev#26, avg(inv_quantity_on_hand#31)#25 AS mean#27] +Input [9]: [w_warehouse_name#33, w_warehouse_sk#32, i_item_sk#31, d_moy#35, n#41, avg#42, m2#43, sum#44, count#45] +Keys [4]: [w_warehouse_name#33, w_warehouse_sk#32, i_item_sk#31, d_moy#35] +Functions [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double)), avg(inv_quantity_on_hand#28)] +Aggregate Attributes [2]: [stddev_samp(cast(inv_quantity_on_hand#28 as double))#21, avg(inv_quantity_on_hand#28)#22] +Results [5]: [w_warehouse_sk#32, i_item_sk#31, d_moy#35, stddev_samp(cast(inv_quantity_on_hand#28 as double))#21 AS stdev#23, avg(inv_quantity_on_hand#28)#22 AS mean#24] (39) Filter [codegen id : 9] -Input [5]: [w_warehouse_sk#35, i_item_sk#34, d_moy#38, stdev#26, mean#27] -Condition : ((isnotnull(stdev#26) AND isnotnull(mean#27)) AND (NOT coalesce((mean#27 = 0.0), false) AND ((stdev#26 / mean#27) > 1.0))) +Input [5]: [w_warehouse_sk#32, i_item_sk#31, d_moy#35, stdev#23, mean#24] +Condition : ((isnotnull(stdev#23) AND isnotnull(mean#24)) AND (NOT coalesce((mean#24 = 0.0), false) AND ((stdev#23 / mean#24) > 1.0))) (40) Project [codegen id : 9] -Output [5]: [w_warehouse_sk#35, i_item_sk#34, d_moy#38, mean#27 AS mean#50, CASE WHEN (mean#27 = 0.0) THEN null ELSE (stdev#26 / mean#27) END AS cov#51] -Input [5]: [w_warehouse_sk#35, i_item_sk#34, d_moy#38, stdev#26, mean#27] +Output [5]: [w_warehouse_sk#32, i_item_sk#31, d_moy#35, mean#24 AS mean#46, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#47] +Input [5]: [w_warehouse_sk#32, i_item_sk#31, d_moy#35, stdev#23, mean#24] (41) BroadcastExchange -Input [5]: [w_warehouse_sk#35, i_item_sk#34, d_moy#38, mean#50, cov#51] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [id=#52] +Input [5]: [w_warehouse_sk#32, i_item_sk#31, d_moy#35, mean#46, cov#47] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=5] (42) BroadcastHashJoin [codegen id : 10] -Left keys [2]: [i_item_sk#6, w_warehouse_sk#8] -Right keys [2]: [i_item_sk#34, w_warehouse_sk#35] +Left keys [2]: [i_item_sk#6, w_warehouse_sk#7] +Right keys [2]: [i_item_sk#31, w_warehouse_sk#32] Join condition: None (43) Exchange -Input [10]: [w_warehouse_sk#8, i_item_sk#6, d_moy#12, mean#27, cov#28, w_warehouse_sk#35, i_item_sk#34, d_moy#38, mean#50, cov#51] -Arguments: rangepartitioning(w_warehouse_sk#8 ASC NULLS FIRST, i_item_sk#6 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, mean#27 ASC NULLS FIRST, cov#28 ASC NULLS FIRST, d_moy#38 ASC NULLS FIRST, mean#50 ASC NULLS FIRST, cov#51 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#53] +Input [10]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, mean#24, cov#25, w_warehouse_sk#32, i_item_sk#31, d_moy#35, mean#46, cov#47] +Arguments: rangepartitioning(w_warehouse_sk#7 ASC NULLS FIRST, i_item_sk#6 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#46 ASC NULLS FIRST, cov#47 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=6] (44) Sort [codegen id : 11] -Input [10]: [w_warehouse_sk#8, i_item_sk#6, d_moy#12, mean#27, cov#28, w_warehouse_sk#35, i_item_sk#34, d_moy#38, mean#50, cov#51] -Arguments: [w_warehouse_sk#8 ASC NULLS FIRST, i_item_sk#6 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, mean#27 ASC NULLS FIRST, cov#28 ASC NULLS FIRST, d_moy#38 ASC NULLS FIRST, mean#50 ASC NULLS FIRST, cov#51 ASC NULLS FIRST], true, 0 +Input [10]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, mean#24, cov#25, w_warehouse_sk#32, i_item_sk#31, d_moy#35, mean#46, cov#47] +Arguments: [w_warehouse_sk#7 ASC NULLS FIRST, i_item_sk#6 ASC NULLS FIRST, d_moy#10 ASC NULLS FIRST, mean#24 ASC NULLS FIRST, cov#25 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, mean#46 ASC NULLS FIRST, cov#47 ASC NULLS FIRST], true, 0 ===== Subqueries ===== @@ -257,28 +257,28 @@ BroadcastExchange (49) (45) Scan parquet default.date_dim -Output [3]: [d_date_sk#11, d_year#54, d_moy#12] +Output [3]: [d_date_sk#9, d_year#48, d_moy#10] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,1), IsNotNull(d_date_sk)] ReadSchema: struct (46) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#54, d_moy#12] +Input [3]: [d_date_sk#9, d_year#48, d_moy#10] (47) Filter [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#54, d_moy#12] -Condition : ((((isnotnull(d_year#54) AND isnotnull(d_moy#12)) AND (d_year#54 = 2001)) AND (d_moy#12 = 1)) AND isnotnull(d_date_sk#11)) +Input [3]: [d_date_sk#9, d_year#48, d_moy#10] +Condition : ((((isnotnull(d_year#48) AND isnotnull(d_moy#10)) AND (d_year#48 = 2001)) AND (d_moy#10 = 1)) AND isnotnull(d_date_sk#9)) (48) Project [codegen id : 1] -Output [2]: [d_date_sk#11, d_moy#12] -Input [3]: [d_date_sk#11, d_year#54, d_moy#12] +Output [2]: [d_date_sk#9, d_moy#10] +Input [3]: [d_date_sk#9, d_year#48, d_moy#10] (49) BroadcastExchange -Input [2]: [d_date_sk#11, d_moy#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#55] +Input [2]: [d_date_sk#9, d_moy#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] -Subquery:2 Hosting operator id = 24 Hosting Expression = inv_date_sk#32 IN dynamicpruning#33 +Subquery:2 Hosting operator id = 24 Hosting Expression = inv_date_sk#29 IN dynamicpruning#30 BroadcastExchange (54) +- * Project (53) +- * Filter (52) @@ -287,25 +287,25 @@ BroadcastExchange (54) (50) Scan parquet default.date_dim -Output [3]: [d_date_sk#37, d_year#56, d_moy#38] +Output [3]: [d_date_sk#34, d_year#49, d_moy#35] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] ReadSchema: struct (51) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#37, d_year#56, d_moy#38] +Input [3]: [d_date_sk#34, d_year#49, d_moy#35] (52) Filter [codegen id : 1] -Input [3]: [d_date_sk#37, d_year#56, d_moy#38] -Condition : ((((isnotnull(d_year#56) AND isnotnull(d_moy#38)) AND (d_year#56 = 2001)) AND (d_moy#38 = 2)) AND isnotnull(d_date_sk#37)) +Input [3]: [d_date_sk#34, d_year#49, d_moy#35] +Condition : ((((isnotnull(d_year#49) AND isnotnull(d_moy#35)) AND (d_year#49 = 2001)) AND (d_moy#35 = 2)) AND isnotnull(d_date_sk#34)) (53) Project [codegen id : 1] -Output [2]: [d_date_sk#37, d_moy#38] -Input [3]: [d_date_sk#37, d_year#56, d_moy#38] +Output [2]: [d_date_sk#34, d_moy#35] +Input [3]: [d_date_sk#34, d_year#49, d_moy#35] (54) BroadcastExchange -Input [2]: [d_date_sk#37, d_moy#38] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#57] +Input [2]: [d_date_sk#34, d_moy#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/explain.txt index 7ebe44763c25a..526b14d5ddd37 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/explain.txt @@ -148,510 +148,510 @@ Input [8]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_sales_price#3, ss_ex (7) Exchange Input [6]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_sales_price#3, ss_ext_wholesale_cost#4, ss_ext_list_price#5, d_year#9] -Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#10] +Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (8) Sort [codegen id : 3] Input [6]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_sales_price#3, ss_ext_wholesale_cost#4, ss_ext_list_price#5, d_year#9] Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0 (9) Scan parquet default.customer -Output [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] +Output [8]: [c_customer_sk#10, c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (10) ColumnarToRow [codegen id : 4] -Input [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] +Input [8]: [c_customer_sk#10, c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] (11) Filter [codegen id : 4] -Input [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] -Condition : (isnotnull(c_customer_sk#11) AND isnotnull(c_customer_id#12)) +Input [8]: [c_customer_sk#10, c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] +Condition : (isnotnull(c_customer_sk#10) AND isnotnull(c_customer_id#11)) (12) Exchange -Input [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] -Arguments: hashpartitioning(c_customer_sk#11, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [8]: [c_customer_sk#10, c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] +Arguments: hashpartitioning(c_customer_sk#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] (13) Sort [codegen id : 5] -Input [8]: [c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] -Arguments: [c_customer_sk#11 ASC NULLS FIRST], false, 0 +Input [8]: [c_customer_sk#10, c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] +Arguments: [c_customer_sk#10 ASC NULLS FIRST], false, 0 (14) SortMergeJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#11] +Right keys [1]: [c_customer_sk#10] Join condition: None (15) Project [codegen id : 6] -Output [12]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, ss_ext_discount_amt#2, ss_ext_sales_price#3, ss_ext_wholesale_cost#4, ss_ext_list_price#5, d_year#9] -Input [14]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_sales_price#3, ss_ext_wholesale_cost#4, ss_ext_list_price#5, d_year#9, c_customer_sk#11, c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18] +Output [12]: [c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_ext_discount_amt#2, ss_ext_sales_price#3, ss_ext_wholesale_cost#4, ss_ext_list_price#5, d_year#9] +Input [14]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_sales_price#3, ss_ext_wholesale_cost#4, ss_ext_list_price#5, d_year#9, c_customer_sk#10, c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17] (16) HashAggregate [codegen id : 6] -Input [12]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, ss_ext_discount_amt#2, ss_ext_sales_price#3, ss_ext_wholesale_cost#4, ss_ext_list_price#5, d_year#9] -Keys [8]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#9] +Input [12]: [c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_ext_discount_amt#2, ss_ext_sales_price#3, ss_ext_wholesale_cost#4, ss_ext_list_price#5, d_year#9] +Keys [8]: [c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#9] Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [2]: [sum#20, isEmpty#21] -Results [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#9, sum#22, isEmpty#23] +Aggregate Attributes [2]: [sum#18, isEmpty#19] +Results [10]: [c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#9, sum#20, isEmpty#21] (17) Exchange -Input [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#9, sum#22, isEmpty#23] -Arguments: hashpartitioning(c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#9, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [10]: [c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#9, sum#20, isEmpty#21] +Arguments: hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) HashAggregate [codegen id : 7] -Input [10]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#9, sum#22, isEmpty#23] -Keys [8]: [c_customer_id#12, c_first_name#13, c_last_name#14, c_preferred_cust_flag#15, c_birth_country#16, c_login#17, c_email_address#18, d_year#9] +Input [10]: [c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#9, sum#20, isEmpty#21] +Keys [8]: [c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#9] Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#25] -Results [2]: [c_customer_id#12 AS customer_id#26, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#25 AS year_total#27] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#22] +Results [2]: [c_customer_id#11 AS customer_id#23, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#22 AS year_total#24] (19) Filter [codegen id : 7] -Input [2]: [customer_id#26, year_total#27] -Condition : (isnotnull(year_total#27) AND (year_total#27 > 0.000000)) +Input [2]: [customer_id#23, year_total#24] +Condition : (isnotnull(year_total#24) AND (year_total#24 > 0.000000)) (20) Exchange -Input [2]: [customer_id#26, year_total#27] -Arguments: hashpartitioning(customer_id#26, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [2]: [customer_id#23, year_total#24] +Arguments: hashpartitioning(customer_id#23, 5), ENSURE_REQUIREMENTS, [plan_id=4] (21) Sort [codegen id : 8] -Input [2]: [customer_id#26, year_total#27] -Arguments: [customer_id#26 ASC NULLS FIRST], false, 0 +Input [2]: [customer_id#23, year_total#24] +Arguments: [customer_id#23 ASC NULLS FIRST], false, 0 (22) Scan parquet default.store_sales -Output [6]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34] +Output [6]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_sales_price#27, ss_ext_wholesale_cost#28, ss_ext_list_price#29, ss_sold_date_sk#30] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#34), dynamicpruningexpression(ss_sold_date_sk#34 IN dynamicpruning#35)] +PartitionFilters: [isnotnull(ss_sold_date_sk#30), dynamicpruningexpression(ss_sold_date_sk#30 IN dynamicpruning#31)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 10] -Input [6]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34] +Input [6]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_sales_price#27, ss_ext_wholesale_cost#28, ss_ext_list_price#29, ss_sold_date_sk#30] (24) Filter [codegen id : 10] -Input [6]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34] -Condition : isnotnull(ss_customer_sk#29) +Input [6]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_sales_price#27, ss_ext_wholesale_cost#28, ss_ext_list_price#29, ss_sold_date_sk#30] +Condition : isnotnull(ss_customer_sk#25) (25) ReusedExchange [Reuses operator id: 126] -Output [2]: [d_date_sk#36, d_year#37] +Output [2]: [d_date_sk#32, d_year#33] (26) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_sold_date_sk#34] -Right keys [1]: [d_date_sk#36] +Left keys [1]: [ss_sold_date_sk#30] +Right keys [1]: [d_date_sk#32] Join condition: None (27) Project [codegen id : 10] -Output [6]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, d_year#37] -Input [8]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, ss_sold_date_sk#34, d_date_sk#36, d_year#37] +Output [6]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_sales_price#27, ss_ext_wholesale_cost#28, ss_ext_list_price#29, d_year#33] +Input [8]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_sales_price#27, ss_ext_wholesale_cost#28, ss_ext_list_price#29, ss_sold_date_sk#30, d_date_sk#32, d_year#33] (28) Exchange -Input [6]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, d_year#37] -Arguments: hashpartitioning(ss_customer_sk#29, 5), ENSURE_REQUIREMENTS, [id=#38] +Input [6]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_sales_price#27, ss_ext_wholesale_cost#28, ss_ext_list_price#29, d_year#33] +Arguments: hashpartitioning(ss_customer_sk#25, 5), ENSURE_REQUIREMENTS, [plan_id=5] (29) Sort [codegen id : 11] -Input [6]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, d_year#37] -Arguments: [ss_customer_sk#29 ASC NULLS FIRST], false, 0 +Input [6]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_sales_price#27, ss_ext_wholesale_cost#28, ss_ext_list_price#29, d_year#33] +Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0 (30) ReusedExchange [Reuses operator id: 12] -Output [8]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46] +Output [8]: [c_customer_sk#34, c_customer_id#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_country#39, c_login#40, c_email_address#41] (31) Sort [codegen id : 13] -Input [8]: [c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46] -Arguments: [c_customer_sk#39 ASC NULLS FIRST], false, 0 +Input [8]: [c_customer_sk#34, c_customer_id#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_country#39, c_login#40, c_email_address#41] +Arguments: [c_customer_sk#34 ASC NULLS FIRST], false, 0 (32) SortMergeJoin [codegen id : 14] -Left keys [1]: [ss_customer_sk#29] -Right keys [1]: [c_customer_sk#39] +Left keys [1]: [ss_customer_sk#25] +Right keys [1]: [c_customer_sk#34] Join condition: None (33) Project [codegen id : 14] -Output [12]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, d_year#37] -Input [14]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, d_year#37, c_customer_sk#39, c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46] +Output [12]: [c_customer_id#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_country#39, c_login#40, c_email_address#41, ss_ext_discount_amt#26, ss_ext_sales_price#27, ss_ext_wholesale_cost#28, ss_ext_list_price#29, d_year#33] +Input [14]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_sales_price#27, ss_ext_wholesale_cost#28, ss_ext_list_price#29, d_year#33, c_customer_sk#34, c_customer_id#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_country#39, c_login#40, c_email_address#41] (34) HashAggregate [codegen id : 14] -Input [12]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, ss_ext_discount_amt#30, ss_ext_sales_price#31, ss_ext_wholesale_cost#32, ss_ext_list_price#33, d_year#37] -Keys [8]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#37] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#32 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#31 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [2]: [sum#47, isEmpty#48] -Results [10]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#37, sum#49, isEmpty#50] +Input [12]: [c_customer_id#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_country#39, c_login#40, c_email_address#41, ss_ext_discount_amt#26, ss_ext_sales_price#27, ss_ext_wholesale_cost#28, ss_ext_list_price#29, d_year#33] +Keys [8]: [c_customer_id#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_country#39, c_login#40, c_email_address#41, d_year#33] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#29 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#27 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [2]: [sum#42, isEmpty#43] +Results [10]: [c_customer_id#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_country#39, c_login#40, c_email_address#41, d_year#33, sum#44, isEmpty#45] (35) Exchange -Input [10]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#37, sum#49, isEmpty#50] -Arguments: hashpartitioning(c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#37, 5), ENSURE_REQUIREMENTS, [id=#51] +Input [10]: [c_customer_id#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_country#39, c_login#40, c_email_address#41, d_year#33, sum#44, isEmpty#45] +Arguments: hashpartitioning(c_customer_id#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_country#39, c_login#40, c_email_address#41, d_year#33, 5), ENSURE_REQUIREMENTS, [plan_id=6] (36) HashAggregate [codegen id : 15] -Input [10]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#37, sum#49, isEmpty#50] -Keys [8]: [c_customer_id#40, c_first_name#41, c_last_name#42, c_preferred_cust_flag#43, c_birth_country#44, c_login#45, c_email_address#46, d_year#37] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#32 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#31 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#32 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#31 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#25] -Results [8]: [c_customer_id#40 AS customer_id#52, c_first_name#41 AS customer_first_name#53, c_last_name#42 AS customer_last_name#54, c_preferred_cust_flag#43 AS customer_preferred_cust_flag#55, c_birth_country#44 AS customer_birth_country#56, c_login#45 AS customer_login#57, c_email_address#46 AS customer_email_address#58, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#32 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#31 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#25 AS year_total#59] +Input [10]: [c_customer_id#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_country#39, c_login#40, c_email_address#41, d_year#33, sum#44, isEmpty#45] +Keys [8]: [c_customer_id#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_country#39, c_login#40, c_email_address#41, d_year#33] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#29 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#27 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#29 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#27 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#22] +Results [8]: [c_customer_id#35 AS customer_id#46, c_first_name#36 AS customer_first_name#47, c_last_name#37 AS customer_last_name#48, c_preferred_cust_flag#38 AS customer_preferred_cust_flag#49, c_birth_country#39 AS customer_birth_country#50, c_login#40 AS customer_login#51, c_email_address#41 AS customer_email_address#52, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#29 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#27 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#22 AS year_total#53] (37) Exchange -Input [8]: [customer_id#52, customer_first_name#53, customer_last_name#54, customer_preferred_cust_flag#55, customer_birth_country#56, customer_login#57, customer_email_address#58, year_total#59] -Arguments: hashpartitioning(customer_id#52, 5), ENSURE_REQUIREMENTS, [id=#60] +Input [8]: [customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53] +Arguments: hashpartitioning(customer_id#46, 5), ENSURE_REQUIREMENTS, [plan_id=7] (38) Sort [codegen id : 16] -Input [8]: [customer_id#52, customer_first_name#53, customer_last_name#54, customer_preferred_cust_flag#55, customer_birth_country#56, customer_login#57, customer_email_address#58, year_total#59] -Arguments: [customer_id#52 ASC NULLS FIRST], false, 0 +Input [8]: [customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53] +Arguments: [customer_id#46 ASC NULLS FIRST], false, 0 (39) SortMergeJoin [codegen id : 17] -Left keys [1]: [customer_id#26] -Right keys [1]: [customer_id#52] +Left keys [1]: [customer_id#23] +Right keys [1]: [customer_id#46] Join condition: None (40) Scan parquet default.catalog_sales -Output [6]: [cs_bill_customer_sk#61, cs_ext_discount_amt#62, cs_ext_sales_price#63, cs_ext_wholesale_cost#64, cs_ext_list_price#65, cs_sold_date_sk#66] +Output [6]: [cs_bill_customer_sk#54, cs_ext_discount_amt#55, cs_ext_sales_price#56, cs_ext_wholesale_cost#57, cs_ext_list_price#58, cs_sold_date_sk#59] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#66), dynamicpruningexpression(cs_sold_date_sk#66 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(cs_sold_date_sk#59), dynamicpruningexpression(cs_sold_date_sk#59 IN dynamicpruning#7)] PushedFilters: [IsNotNull(cs_bill_customer_sk)] ReadSchema: struct (41) ColumnarToRow [codegen id : 19] -Input [6]: [cs_bill_customer_sk#61, cs_ext_discount_amt#62, cs_ext_sales_price#63, cs_ext_wholesale_cost#64, cs_ext_list_price#65, cs_sold_date_sk#66] +Input [6]: [cs_bill_customer_sk#54, cs_ext_discount_amt#55, cs_ext_sales_price#56, cs_ext_wholesale_cost#57, cs_ext_list_price#58, cs_sold_date_sk#59] (42) Filter [codegen id : 19] -Input [6]: [cs_bill_customer_sk#61, cs_ext_discount_amt#62, cs_ext_sales_price#63, cs_ext_wholesale_cost#64, cs_ext_list_price#65, cs_sold_date_sk#66] -Condition : isnotnull(cs_bill_customer_sk#61) +Input [6]: [cs_bill_customer_sk#54, cs_ext_discount_amt#55, cs_ext_sales_price#56, cs_ext_wholesale_cost#57, cs_ext_list_price#58, cs_sold_date_sk#59] +Condition : isnotnull(cs_bill_customer_sk#54) (43) ReusedExchange [Reuses operator id: 122] -Output [2]: [d_date_sk#67, d_year#68] +Output [2]: [d_date_sk#60, d_year#61] (44) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_sold_date_sk#66] -Right keys [1]: [d_date_sk#67] +Left keys [1]: [cs_sold_date_sk#59] +Right keys [1]: [d_date_sk#60] Join condition: None (45) Project [codegen id : 19] -Output [6]: [cs_bill_customer_sk#61, cs_ext_discount_amt#62, cs_ext_sales_price#63, cs_ext_wholesale_cost#64, cs_ext_list_price#65, d_year#68] -Input [8]: [cs_bill_customer_sk#61, cs_ext_discount_amt#62, cs_ext_sales_price#63, cs_ext_wholesale_cost#64, cs_ext_list_price#65, cs_sold_date_sk#66, d_date_sk#67, d_year#68] +Output [6]: [cs_bill_customer_sk#54, cs_ext_discount_amt#55, cs_ext_sales_price#56, cs_ext_wholesale_cost#57, cs_ext_list_price#58, d_year#61] +Input [8]: [cs_bill_customer_sk#54, cs_ext_discount_amt#55, cs_ext_sales_price#56, cs_ext_wholesale_cost#57, cs_ext_list_price#58, cs_sold_date_sk#59, d_date_sk#60, d_year#61] (46) Exchange -Input [6]: [cs_bill_customer_sk#61, cs_ext_discount_amt#62, cs_ext_sales_price#63, cs_ext_wholesale_cost#64, cs_ext_list_price#65, d_year#68] -Arguments: hashpartitioning(cs_bill_customer_sk#61, 5), ENSURE_REQUIREMENTS, [id=#69] +Input [6]: [cs_bill_customer_sk#54, cs_ext_discount_amt#55, cs_ext_sales_price#56, cs_ext_wholesale_cost#57, cs_ext_list_price#58, d_year#61] +Arguments: hashpartitioning(cs_bill_customer_sk#54, 5), ENSURE_REQUIREMENTS, [plan_id=8] (47) Sort [codegen id : 20] -Input [6]: [cs_bill_customer_sk#61, cs_ext_discount_amt#62, cs_ext_sales_price#63, cs_ext_wholesale_cost#64, cs_ext_list_price#65, d_year#68] -Arguments: [cs_bill_customer_sk#61 ASC NULLS FIRST], false, 0 +Input [6]: [cs_bill_customer_sk#54, cs_ext_discount_amt#55, cs_ext_sales_price#56, cs_ext_wholesale_cost#57, cs_ext_list_price#58, d_year#61] +Arguments: [cs_bill_customer_sk#54 ASC NULLS FIRST], false, 0 (48) ReusedExchange [Reuses operator id: 12] -Output [8]: [c_customer_sk#70, c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77] +Output [8]: [c_customer_sk#62, c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69] (49) Sort [codegen id : 22] -Input [8]: [c_customer_sk#70, c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77] -Arguments: [c_customer_sk#70 ASC NULLS FIRST], false, 0 +Input [8]: [c_customer_sk#62, c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69] +Arguments: [c_customer_sk#62 ASC NULLS FIRST], false, 0 (50) SortMergeJoin [codegen id : 23] -Left keys [1]: [cs_bill_customer_sk#61] -Right keys [1]: [c_customer_sk#70] +Left keys [1]: [cs_bill_customer_sk#54] +Right keys [1]: [c_customer_sk#62] Join condition: None (51) Project [codegen id : 23] -Output [12]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, cs_ext_discount_amt#62, cs_ext_sales_price#63, cs_ext_wholesale_cost#64, cs_ext_list_price#65, d_year#68] -Input [14]: [cs_bill_customer_sk#61, cs_ext_discount_amt#62, cs_ext_sales_price#63, cs_ext_wholesale_cost#64, cs_ext_list_price#65, d_year#68, c_customer_sk#70, c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77] +Output [12]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, cs_ext_discount_amt#55, cs_ext_sales_price#56, cs_ext_wholesale_cost#57, cs_ext_list_price#58, d_year#61] +Input [14]: [cs_bill_customer_sk#54, cs_ext_discount_amt#55, cs_ext_sales_price#56, cs_ext_wholesale_cost#57, cs_ext_list_price#58, d_year#61, c_customer_sk#62, c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69] (52) HashAggregate [codegen id : 23] -Input [12]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, cs_ext_discount_amt#62, cs_ext_sales_price#63, cs_ext_wholesale_cost#64, cs_ext_list_price#65, d_year#68] -Keys [8]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#68] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#65 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#64 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#62 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#63 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [2]: [sum#78, isEmpty#79] -Results [10]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#68, sum#80, isEmpty#81] +Input [12]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, cs_ext_discount_amt#55, cs_ext_sales_price#56, cs_ext_wholesale_cost#57, cs_ext_list_price#58, d_year#61] +Keys [8]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#61] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#57 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#55 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#56 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [2]: [sum#70, isEmpty#71] +Results [10]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#61, sum#72, isEmpty#73] (53) Exchange -Input [10]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#68, sum#80, isEmpty#81] -Arguments: hashpartitioning(c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#68, 5), ENSURE_REQUIREMENTS, [id=#82] +Input [10]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#61, sum#72, isEmpty#73] +Arguments: hashpartitioning(c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#61, 5), ENSURE_REQUIREMENTS, [plan_id=9] (54) HashAggregate [codegen id : 24] -Input [10]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#68, sum#80, isEmpty#81] -Keys [8]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#68] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#65 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#64 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#62 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#63 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#65 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#64 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#62 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#63 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#83] -Results [2]: [c_customer_id#71 AS customer_id#84, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#65 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#64 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#62 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#63 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#83 AS year_total#85] +Input [10]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#61, sum#72, isEmpty#73] +Keys [8]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#61] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#57 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#55 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#56 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#57 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#55 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#56 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#74] +Results [2]: [c_customer_id#63 AS customer_id#75, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#57 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#55 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#56 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#74 AS year_total#76] (55) Filter [codegen id : 24] -Input [2]: [customer_id#84, year_total#85] -Condition : (isnotnull(year_total#85) AND (year_total#85 > 0.000000)) +Input [2]: [customer_id#75, year_total#76] +Condition : (isnotnull(year_total#76) AND (year_total#76 > 0.000000)) (56) Exchange -Input [2]: [customer_id#84, year_total#85] -Arguments: hashpartitioning(customer_id#84, 5), ENSURE_REQUIREMENTS, [id=#86] +Input [2]: [customer_id#75, year_total#76] +Arguments: hashpartitioning(customer_id#75, 5), ENSURE_REQUIREMENTS, [plan_id=10] (57) Sort [codegen id : 25] -Input [2]: [customer_id#84, year_total#85] -Arguments: [customer_id#84 ASC NULLS FIRST], false, 0 +Input [2]: [customer_id#75, year_total#76] +Arguments: [customer_id#75 ASC NULLS FIRST], false, 0 (58) SortMergeJoin [codegen id : 26] -Left keys [1]: [customer_id#26] -Right keys [1]: [customer_id#84] +Left keys [1]: [customer_id#23] +Right keys [1]: [customer_id#75] Join condition: None (59) Project [codegen id : 26] -Output [11]: [customer_id#26, year_total#27, customer_id#52, customer_first_name#53, customer_last_name#54, customer_preferred_cust_flag#55, customer_birth_country#56, customer_login#57, customer_email_address#58, year_total#59, year_total#85] -Input [12]: [customer_id#26, year_total#27, customer_id#52, customer_first_name#53, customer_last_name#54, customer_preferred_cust_flag#55, customer_birth_country#56, customer_login#57, customer_email_address#58, year_total#59, customer_id#84, year_total#85] +Output [11]: [customer_id#23, year_total#24, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53, year_total#76] +Input [12]: [customer_id#23, year_total#24, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53, customer_id#75, year_total#76] (60) Scan parquet default.catalog_sales -Output [6]: [cs_bill_customer_sk#87, cs_ext_discount_amt#88, cs_ext_sales_price#89, cs_ext_wholesale_cost#90, cs_ext_list_price#91, cs_sold_date_sk#92] +Output [6]: [cs_bill_customer_sk#77, cs_ext_discount_amt#78, cs_ext_sales_price#79, cs_ext_wholesale_cost#80, cs_ext_list_price#81, cs_sold_date_sk#82] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#92), dynamicpruningexpression(cs_sold_date_sk#92 IN dynamicpruning#35)] +PartitionFilters: [isnotnull(cs_sold_date_sk#82), dynamicpruningexpression(cs_sold_date_sk#82 IN dynamicpruning#31)] PushedFilters: [IsNotNull(cs_bill_customer_sk)] ReadSchema: struct (61) ColumnarToRow [codegen id : 28] -Input [6]: [cs_bill_customer_sk#87, cs_ext_discount_amt#88, cs_ext_sales_price#89, cs_ext_wholesale_cost#90, cs_ext_list_price#91, cs_sold_date_sk#92] +Input [6]: [cs_bill_customer_sk#77, cs_ext_discount_amt#78, cs_ext_sales_price#79, cs_ext_wholesale_cost#80, cs_ext_list_price#81, cs_sold_date_sk#82] (62) Filter [codegen id : 28] -Input [6]: [cs_bill_customer_sk#87, cs_ext_discount_amt#88, cs_ext_sales_price#89, cs_ext_wholesale_cost#90, cs_ext_list_price#91, cs_sold_date_sk#92] -Condition : isnotnull(cs_bill_customer_sk#87) +Input [6]: [cs_bill_customer_sk#77, cs_ext_discount_amt#78, cs_ext_sales_price#79, cs_ext_wholesale_cost#80, cs_ext_list_price#81, cs_sold_date_sk#82] +Condition : isnotnull(cs_bill_customer_sk#77) (63) ReusedExchange [Reuses operator id: 126] -Output [2]: [d_date_sk#93, d_year#94] +Output [2]: [d_date_sk#83, d_year#84] (64) BroadcastHashJoin [codegen id : 28] -Left keys [1]: [cs_sold_date_sk#92] -Right keys [1]: [d_date_sk#93] +Left keys [1]: [cs_sold_date_sk#82] +Right keys [1]: [d_date_sk#83] Join condition: None (65) Project [codegen id : 28] -Output [6]: [cs_bill_customer_sk#87, cs_ext_discount_amt#88, cs_ext_sales_price#89, cs_ext_wholesale_cost#90, cs_ext_list_price#91, d_year#94] -Input [8]: [cs_bill_customer_sk#87, cs_ext_discount_amt#88, cs_ext_sales_price#89, cs_ext_wholesale_cost#90, cs_ext_list_price#91, cs_sold_date_sk#92, d_date_sk#93, d_year#94] +Output [6]: [cs_bill_customer_sk#77, cs_ext_discount_amt#78, cs_ext_sales_price#79, cs_ext_wholesale_cost#80, cs_ext_list_price#81, d_year#84] +Input [8]: [cs_bill_customer_sk#77, cs_ext_discount_amt#78, cs_ext_sales_price#79, cs_ext_wholesale_cost#80, cs_ext_list_price#81, cs_sold_date_sk#82, d_date_sk#83, d_year#84] (66) Exchange -Input [6]: [cs_bill_customer_sk#87, cs_ext_discount_amt#88, cs_ext_sales_price#89, cs_ext_wholesale_cost#90, cs_ext_list_price#91, d_year#94] -Arguments: hashpartitioning(cs_bill_customer_sk#87, 5), ENSURE_REQUIREMENTS, [id=#95] +Input [6]: [cs_bill_customer_sk#77, cs_ext_discount_amt#78, cs_ext_sales_price#79, cs_ext_wholesale_cost#80, cs_ext_list_price#81, d_year#84] +Arguments: hashpartitioning(cs_bill_customer_sk#77, 5), ENSURE_REQUIREMENTS, [plan_id=11] (67) Sort [codegen id : 29] -Input [6]: [cs_bill_customer_sk#87, cs_ext_discount_amt#88, cs_ext_sales_price#89, cs_ext_wholesale_cost#90, cs_ext_list_price#91, d_year#94] -Arguments: [cs_bill_customer_sk#87 ASC NULLS FIRST], false, 0 +Input [6]: [cs_bill_customer_sk#77, cs_ext_discount_amt#78, cs_ext_sales_price#79, cs_ext_wholesale_cost#80, cs_ext_list_price#81, d_year#84] +Arguments: [cs_bill_customer_sk#77 ASC NULLS FIRST], false, 0 (68) ReusedExchange [Reuses operator id: 12] -Output [8]: [c_customer_sk#96, c_customer_id#97, c_first_name#98, c_last_name#99, c_preferred_cust_flag#100, c_birth_country#101, c_login#102, c_email_address#103] +Output [8]: [c_customer_sk#85, c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92] (69) Sort [codegen id : 31] -Input [8]: [c_customer_sk#96, c_customer_id#97, c_first_name#98, c_last_name#99, c_preferred_cust_flag#100, c_birth_country#101, c_login#102, c_email_address#103] -Arguments: [c_customer_sk#96 ASC NULLS FIRST], false, 0 +Input [8]: [c_customer_sk#85, c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92] +Arguments: [c_customer_sk#85 ASC NULLS FIRST], false, 0 (70) SortMergeJoin [codegen id : 32] -Left keys [1]: [cs_bill_customer_sk#87] -Right keys [1]: [c_customer_sk#96] +Left keys [1]: [cs_bill_customer_sk#77] +Right keys [1]: [c_customer_sk#85] Join condition: None (71) Project [codegen id : 32] -Output [12]: [c_customer_id#97, c_first_name#98, c_last_name#99, c_preferred_cust_flag#100, c_birth_country#101, c_login#102, c_email_address#103, cs_ext_discount_amt#88, cs_ext_sales_price#89, cs_ext_wholesale_cost#90, cs_ext_list_price#91, d_year#94] -Input [14]: [cs_bill_customer_sk#87, cs_ext_discount_amt#88, cs_ext_sales_price#89, cs_ext_wholesale_cost#90, cs_ext_list_price#91, d_year#94, c_customer_sk#96, c_customer_id#97, c_first_name#98, c_last_name#99, c_preferred_cust_flag#100, c_birth_country#101, c_login#102, c_email_address#103] +Output [12]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, cs_ext_discount_amt#78, cs_ext_sales_price#79, cs_ext_wholesale_cost#80, cs_ext_list_price#81, d_year#84] +Input [14]: [cs_bill_customer_sk#77, cs_ext_discount_amt#78, cs_ext_sales_price#79, cs_ext_wholesale_cost#80, cs_ext_list_price#81, d_year#84, c_customer_sk#85, c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92] (72) HashAggregate [codegen id : 32] -Input [12]: [c_customer_id#97, c_first_name#98, c_last_name#99, c_preferred_cust_flag#100, c_birth_country#101, c_login#102, c_email_address#103, cs_ext_discount_amt#88, cs_ext_sales_price#89, cs_ext_wholesale_cost#90, cs_ext_list_price#91, d_year#94] -Keys [8]: [c_customer_id#97, c_first_name#98, c_last_name#99, c_preferred_cust_flag#100, c_birth_country#101, c_login#102, c_email_address#103, d_year#94] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#91 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#90 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#88 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#89 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [2]: [sum#104, isEmpty#105] -Results [10]: [c_customer_id#97, c_first_name#98, c_last_name#99, c_preferred_cust_flag#100, c_birth_country#101, c_login#102, c_email_address#103, d_year#94, sum#106, isEmpty#107] +Input [12]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, cs_ext_discount_amt#78, cs_ext_sales_price#79, cs_ext_wholesale_cost#80, cs_ext_list_price#81, d_year#84] +Keys [8]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#84] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#81 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#80 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#78 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#79 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [2]: [sum#93, isEmpty#94] +Results [10]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#84, sum#95, isEmpty#96] (73) Exchange -Input [10]: [c_customer_id#97, c_first_name#98, c_last_name#99, c_preferred_cust_flag#100, c_birth_country#101, c_login#102, c_email_address#103, d_year#94, sum#106, isEmpty#107] -Arguments: hashpartitioning(c_customer_id#97, c_first_name#98, c_last_name#99, c_preferred_cust_flag#100, c_birth_country#101, c_login#102, c_email_address#103, d_year#94, 5), ENSURE_REQUIREMENTS, [id=#108] +Input [10]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#84, sum#95, isEmpty#96] +Arguments: hashpartitioning(c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#84, 5), ENSURE_REQUIREMENTS, [plan_id=12] (74) HashAggregate [codegen id : 33] -Input [10]: [c_customer_id#97, c_first_name#98, c_last_name#99, c_preferred_cust_flag#100, c_birth_country#101, c_login#102, c_email_address#103, d_year#94, sum#106, isEmpty#107] -Keys [8]: [c_customer_id#97, c_first_name#98, c_last_name#99, c_preferred_cust_flag#100, c_birth_country#101, c_login#102, c_email_address#103, d_year#94] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#91 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#90 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#88 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#89 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#91 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#90 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#88 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#89 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#83] -Results [2]: [c_customer_id#97 AS customer_id#109, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#91 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#90 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#88 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#89 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#83 AS year_total#110] +Input [10]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#84, sum#95, isEmpty#96] +Keys [8]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#84] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#81 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#80 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#78 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#79 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#81 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#80 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#78 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#79 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#74] +Results [2]: [c_customer_id#86 AS customer_id#97, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#81 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#80 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#78 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#79 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#74 AS year_total#98] (75) Exchange -Input [2]: [customer_id#109, year_total#110] -Arguments: hashpartitioning(customer_id#109, 5), ENSURE_REQUIREMENTS, [id=#111] +Input [2]: [customer_id#97, year_total#98] +Arguments: hashpartitioning(customer_id#97, 5), ENSURE_REQUIREMENTS, [plan_id=13] (76) Sort [codegen id : 34] -Input [2]: [customer_id#109, year_total#110] -Arguments: [customer_id#109 ASC NULLS FIRST], false, 0 +Input [2]: [customer_id#97, year_total#98] +Arguments: [customer_id#97 ASC NULLS FIRST], false, 0 (77) SortMergeJoin [codegen id : 35] -Left keys [1]: [customer_id#26] -Right keys [1]: [customer_id#109] -Join condition: (CASE WHEN (year_total#85 > 0.000000) THEN CheckOverflow((promote_precision(year_total#110) / promote_precision(year_total#85)), DecimalType(38,14)) END > CASE WHEN (year_total#27 > 0.000000) THEN CheckOverflow((promote_precision(year_total#59) / promote_precision(year_total#27)), DecimalType(38,14)) END) +Left keys [1]: [customer_id#23] +Right keys [1]: [customer_id#97] +Join condition: (CASE WHEN (year_total#76 > 0.000000) THEN CheckOverflow((promote_precision(year_total#98) / promote_precision(year_total#76)), DecimalType(38,14)) END > CASE WHEN (year_total#24 > 0.000000) THEN CheckOverflow((promote_precision(year_total#53) / promote_precision(year_total#24)), DecimalType(38,14)) END) (78) Project [codegen id : 35] -Output [10]: [customer_id#26, customer_id#52, customer_first_name#53, customer_last_name#54, customer_preferred_cust_flag#55, customer_birth_country#56, customer_login#57, customer_email_address#58, year_total#85, year_total#110] -Input [13]: [customer_id#26, year_total#27, customer_id#52, customer_first_name#53, customer_last_name#54, customer_preferred_cust_flag#55, customer_birth_country#56, customer_login#57, customer_email_address#58, year_total#59, year_total#85, customer_id#109, year_total#110] +Output [10]: [customer_id#23, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#76, year_total#98] +Input [13]: [customer_id#23, year_total#24, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53, year_total#76, customer_id#97, year_total#98] (79) Scan parquet default.web_sales -Output [6]: [ws_bill_customer_sk#112, ws_ext_discount_amt#113, ws_ext_sales_price#114, ws_ext_wholesale_cost#115, ws_ext_list_price#116, ws_sold_date_sk#117] +Output [6]: [ws_bill_customer_sk#99, ws_ext_discount_amt#100, ws_ext_sales_price#101, ws_ext_wholesale_cost#102, ws_ext_list_price#103, ws_sold_date_sk#104] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#117), dynamicpruningexpression(ws_sold_date_sk#117 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ws_sold_date_sk#104), dynamicpruningexpression(ws_sold_date_sk#104 IN dynamicpruning#7)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (80) ColumnarToRow [codegen id : 37] -Input [6]: [ws_bill_customer_sk#112, ws_ext_discount_amt#113, ws_ext_sales_price#114, ws_ext_wholesale_cost#115, ws_ext_list_price#116, ws_sold_date_sk#117] +Input [6]: [ws_bill_customer_sk#99, ws_ext_discount_amt#100, ws_ext_sales_price#101, ws_ext_wholesale_cost#102, ws_ext_list_price#103, ws_sold_date_sk#104] (81) Filter [codegen id : 37] -Input [6]: [ws_bill_customer_sk#112, ws_ext_discount_amt#113, ws_ext_sales_price#114, ws_ext_wholesale_cost#115, ws_ext_list_price#116, ws_sold_date_sk#117] -Condition : isnotnull(ws_bill_customer_sk#112) +Input [6]: [ws_bill_customer_sk#99, ws_ext_discount_amt#100, ws_ext_sales_price#101, ws_ext_wholesale_cost#102, ws_ext_list_price#103, ws_sold_date_sk#104] +Condition : isnotnull(ws_bill_customer_sk#99) (82) ReusedExchange [Reuses operator id: 122] -Output [2]: [d_date_sk#118, d_year#119] +Output [2]: [d_date_sk#105, d_year#106] (83) BroadcastHashJoin [codegen id : 37] -Left keys [1]: [ws_sold_date_sk#117] -Right keys [1]: [d_date_sk#118] +Left keys [1]: [ws_sold_date_sk#104] +Right keys [1]: [d_date_sk#105] Join condition: None (84) Project [codegen id : 37] -Output [6]: [ws_bill_customer_sk#112, ws_ext_discount_amt#113, ws_ext_sales_price#114, ws_ext_wholesale_cost#115, ws_ext_list_price#116, d_year#119] -Input [8]: [ws_bill_customer_sk#112, ws_ext_discount_amt#113, ws_ext_sales_price#114, ws_ext_wholesale_cost#115, ws_ext_list_price#116, ws_sold_date_sk#117, d_date_sk#118, d_year#119] +Output [6]: [ws_bill_customer_sk#99, ws_ext_discount_amt#100, ws_ext_sales_price#101, ws_ext_wholesale_cost#102, ws_ext_list_price#103, d_year#106] +Input [8]: [ws_bill_customer_sk#99, ws_ext_discount_amt#100, ws_ext_sales_price#101, ws_ext_wholesale_cost#102, ws_ext_list_price#103, ws_sold_date_sk#104, d_date_sk#105, d_year#106] (85) Exchange -Input [6]: [ws_bill_customer_sk#112, ws_ext_discount_amt#113, ws_ext_sales_price#114, ws_ext_wholesale_cost#115, ws_ext_list_price#116, d_year#119] -Arguments: hashpartitioning(ws_bill_customer_sk#112, 5), ENSURE_REQUIREMENTS, [id=#120] +Input [6]: [ws_bill_customer_sk#99, ws_ext_discount_amt#100, ws_ext_sales_price#101, ws_ext_wholesale_cost#102, ws_ext_list_price#103, d_year#106] +Arguments: hashpartitioning(ws_bill_customer_sk#99, 5), ENSURE_REQUIREMENTS, [plan_id=14] (86) Sort [codegen id : 38] -Input [6]: [ws_bill_customer_sk#112, ws_ext_discount_amt#113, ws_ext_sales_price#114, ws_ext_wholesale_cost#115, ws_ext_list_price#116, d_year#119] -Arguments: [ws_bill_customer_sk#112 ASC NULLS FIRST], false, 0 +Input [6]: [ws_bill_customer_sk#99, ws_ext_discount_amt#100, ws_ext_sales_price#101, ws_ext_wholesale_cost#102, ws_ext_list_price#103, d_year#106] +Arguments: [ws_bill_customer_sk#99 ASC NULLS FIRST], false, 0 (87) ReusedExchange [Reuses operator id: 12] -Output [8]: [c_customer_sk#121, c_customer_id#122, c_first_name#123, c_last_name#124, c_preferred_cust_flag#125, c_birth_country#126, c_login#127, c_email_address#128] +Output [8]: [c_customer_sk#107, c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114] (88) Sort [codegen id : 40] -Input [8]: [c_customer_sk#121, c_customer_id#122, c_first_name#123, c_last_name#124, c_preferred_cust_flag#125, c_birth_country#126, c_login#127, c_email_address#128] -Arguments: [c_customer_sk#121 ASC NULLS FIRST], false, 0 +Input [8]: [c_customer_sk#107, c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114] +Arguments: [c_customer_sk#107 ASC NULLS FIRST], false, 0 (89) SortMergeJoin [codegen id : 41] -Left keys [1]: [ws_bill_customer_sk#112] -Right keys [1]: [c_customer_sk#121] +Left keys [1]: [ws_bill_customer_sk#99] +Right keys [1]: [c_customer_sk#107] Join condition: None (90) Project [codegen id : 41] -Output [12]: [c_customer_id#122, c_first_name#123, c_last_name#124, c_preferred_cust_flag#125, c_birth_country#126, c_login#127, c_email_address#128, ws_ext_discount_amt#113, ws_ext_sales_price#114, ws_ext_wholesale_cost#115, ws_ext_list_price#116, d_year#119] -Input [14]: [ws_bill_customer_sk#112, ws_ext_discount_amt#113, ws_ext_sales_price#114, ws_ext_wholesale_cost#115, ws_ext_list_price#116, d_year#119, c_customer_sk#121, c_customer_id#122, c_first_name#123, c_last_name#124, c_preferred_cust_flag#125, c_birth_country#126, c_login#127, c_email_address#128] +Output [12]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, ws_ext_discount_amt#100, ws_ext_sales_price#101, ws_ext_wholesale_cost#102, ws_ext_list_price#103, d_year#106] +Input [14]: [ws_bill_customer_sk#99, ws_ext_discount_amt#100, ws_ext_sales_price#101, ws_ext_wholesale_cost#102, ws_ext_list_price#103, d_year#106, c_customer_sk#107, c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114] (91) HashAggregate [codegen id : 41] -Input [12]: [c_customer_id#122, c_first_name#123, c_last_name#124, c_preferred_cust_flag#125, c_birth_country#126, c_login#127, c_email_address#128, ws_ext_discount_amt#113, ws_ext_sales_price#114, ws_ext_wholesale_cost#115, ws_ext_list_price#116, d_year#119] -Keys [8]: [c_customer_id#122, c_first_name#123, c_last_name#124, c_preferred_cust_flag#125, c_birth_country#126, c_login#127, c_email_address#128, d_year#119] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#116 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#115 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#113 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#114 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [2]: [sum#129, isEmpty#130] -Results [10]: [c_customer_id#122, c_first_name#123, c_last_name#124, c_preferred_cust_flag#125, c_birth_country#126, c_login#127, c_email_address#128, d_year#119, sum#131, isEmpty#132] +Input [12]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, ws_ext_discount_amt#100, ws_ext_sales_price#101, ws_ext_wholesale_cost#102, ws_ext_list_price#103, d_year#106] +Keys [8]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, d_year#106] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#103 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#102 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#100 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#101 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [2]: [sum#115, isEmpty#116] +Results [10]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, d_year#106, sum#117, isEmpty#118] (92) Exchange -Input [10]: [c_customer_id#122, c_first_name#123, c_last_name#124, c_preferred_cust_flag#125, c_birth_country#126, c_login#127, c_email_address#128, d_year#119, sum#131, isEmpty#132] -Arguments: hashpartitioning(c_customer_id#122, c_first_name#123, c_last_name#124, c_preferred_cust_flag#125, c_birth_country#126, c_login#127, c_email_address#128, d_year#119, 5), ENSURE_REQUIREMENTS, [id=#133] +Input [10]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, d_year#106, sum#117, isEmpty#118] +Arguments: hashpartitioning(c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, d_year#106, 5), ENSURE_REQUIREMENTS, [plan_id=15] (93) HashAggregate [codegen id : 42] -Input [10]: [c_customer_id#122, c_first_name#123, c_last_name#124, c_preferred_cust_flag#125, c_birth_country#126, c_login#127, c_email_address#128, d_year#119, sum#131, isEmpty#132] -Keys [8]: [c_customer_id#122, c_first_name#123, c_last_name#124, c_preferred_cust_flag#125, c_birth_country#126, c_login#127, c_email_address#128, d_year#119] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#116 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#115 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#113 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#114 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#116 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#115 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#113 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#114 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#134] -Results [2]: [c_customer_id#122 AS customer_id#135, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#116 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#115 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#113 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#114 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#134 AS year_total#136] +Input [10]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, d_year#106, sum#117, isEmpty#118] +Keys [8]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, d_year#106] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#103 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#102 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#100 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#101 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#103 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#102 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#100 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#101 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#119] +Results [2]: [c_customer_id#108 AS customer_id#120, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#103 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#102 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#100 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#101 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#119 AS year_total#121] (94) Filter [codegen id : 42] -Input [2]: [customer_id#135, year_total#136] -Condition : (isnotnull(year_total#136) AND (year_total#136 > 0.000000)) +Input [2]: [customer_id#120, year_total#121] +Condition : (isnotnull(year_total#121) AND (year_total#121 > 0.000000)) (95) Exchange -Input [2]: [customer_id#135, year_total#136] -Arguments: hashpartitioning(customer_id#135, 5), ENSURE_REQUIREMENTS, [id=#137] +Input [2]: [customer_id#120, year_total#121] +Arguments: hashpartitioning(customer_id#120, 5), ENSURE_REQUIREMENTS, [plan_id=16] (96) Sort [codegen id : 43] -Input [2]: [customer_id#135, year_total#136] -Arguments: [customer_id#135 ASC NULLS FIRST], false, 0 +Input [2]: [customer_id#120, year_total#121] +Arguments: [customer_id#120 ASC NULLS FIRST], false, 0 (97) SortMergeJoin [codegen id : 44] -Left keys [1]: [customer_id#26] -Right keys [1]: [customer_id#135] +Left keys [1]: [customer_id#23] +Right keys [1]: [customer_id#120] Join condition: None (98) Project [codegen id : 44] -Output [11]: [customer_id#26, customer_id#52, customer_first_name#53, customer_last_name#54, customer_preferred_cust_flag#55, customer_birth_country#56, customer_login#57, customer_email_address#58, year_total#85, year_total#110, year_total#136] -Input [12]: [customer_id#26, customer_id#52, customer_first_name#53, customer_last_name#54, customer_preferred_cust_flag#55, customer_birth_country#56, customer_login#57, customer_email_address#58, year_total#85, year_total#110, customer_id#135, year_total#136] +Output [11]: [customer_id#23, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#76, year_total#98, year_total#121] +Input [12]: [customer_id#23, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#76, year_total#98, customer_id#120, year_total#121] (99) Scan parquet default.web_sales -Output [6]: [ws_bill_customer_sk#138, ws_ext_discount_amt#139, ws_ext_sales_price#140, ws_ext_wholesale_cost#141, ws_ext_list_price#142, ws_sold_date_sk#143] +Output [6]: [ws_bill_customer_sk#122, ws_ext_discount_amt#123, ws_ext_sales_price#124, ws_ext_wholesale_cost#125, ws_ext_list_price#126, ws_sold_date_sk#127] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#143), dynamicpruningexpression(ws_sold_date_sk#143 IN dynamicpruning#35)] +PartitionFilters: [isnotnull(ws_sold_date_sk#127), dynamicpruningexpression(ws_sold_date_sk#127 IN dynamicpruning#31)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (100) ColumnarToRow [codegen id : 46] -Input [6]: [ws_bill_customer_sk#138, ws_ext_discount_amt#139, ws_ext_sales_price#140, ws_ext_wholesale_cost#141, ws_ext_list_price#142, ws_sold_date_sk#143] +Input [6]: [ws_bill_customer_sk#122, ws_ext_discount_amt#123, ws_ext_sales_price#124, ws_ext_wholesale_cost#125, ws_ext_list_price#126, ws_sold_date_sk#127] (101) Filter [codegen id : 46] -Input [6]: [ws_bill_customer_sk#138, ws_ext_discount_amt#139, ws_ext_sales_price#140, ws_ext_wholesale_cost#141, ws_ext_list_price#142, ws_sold_date_sk#143] -Condition : isnotnull(ws_bill_customer_sk#138) +Input [6]: [ws_bill_customer_sk#122, ws_ext_discount_amt#123, ws_ext_sales_price#124, ws_ext_wholesale_cost#125, ws_ext_list_price#126, ws_sold_date_sk#127] +Condition : isnotnull(ws_bill_customer_sk#122) (102) ReusedExchange [Reuses operator id: 126] -Output [2]: [d_date_sk#144, d_year#145] +Output [2]: [d_date_sk#128, d_year#129] (103) BroadcastHashJoin [codegen id : 46] -Left keys [1]: [ws_sold_date_sk#143] -Right keys [1]: [d_date_sk#144] +Left keys [1]: [ws_sold_date_sk#127] +Right keys [1]: [d_date_sk#128] Join condition: None (104) Project [codegen id : 46] -Output [6]: [ws_bill_customer_sk#138, ws_ext_discount_amt#139, ws_ext_sales_price#140, ws_ext_wholesale_cost#141, ws_ext_list_price#142, d_year#145] -Input [8]: [ws_bill_customer_sk#138, ws_ext_discount_amt#139, ws_ext_sales_price#140, ws_ext_wholesale_cost#141, ws_ext_list_price#142, ws_sold_date_sk#143, d_date_sk#144, d_year#145] +Output [6]: [ws_bill_customer_sk#122, ws_ext_discount_amt#123, ws_ext_sales_price#124, ws_ext_wholesale_cost#125, ws_ext_list_price#126, d_year#129] +Input [8]: [ws_bill_customer_sk#122, ws_ext_discount_amt#123, ws_ext_sales_price#124, ws_ext_wholesale_cost#125, ws_ext_list_price#126, ws_sold_date_sk#127, d_date_sk#128, d_year#129] (105) Exchange -Input [6]: [ws_bill_customer_sk#138, ws_ext_discount_amt#139, ws_ext_sales_price#140, ws_ext_wholesale_cost#141, ws_ext_list_price#142, d_year#145] -Arguments: hashpartitioning(ws_bill_customer_sk#138, 5), ENSURE_REQUIREMENTS, [id=#146] +Input [6]: [ws_bill_customer_sk#122, ws_ext_discount_amt#123, ws_ext_sales_price#124, ws_ext_wholesale_cost#125, ws_ext_list_price#126, d_year#129] +Arguments: hashpartitioning(ws_bill_customer_sk#122, 5), ENSURE_REQUIREMENTS, [plan_id=17] (106) Sort [codegen id : 47] -Input [6]: [ws_bill_customer_sk#138, ws_ext_discount_amt#139, ws_ext_sales_price#140, ws_ext_wholesale_cost#141, ws_ext_list_price#142, d_year#145] -Arguments: [ws_bill_customer_sk#138 ASC NULLS FIRST], false, 0 +Input [6]: [ws_bill_customer_sk#122, ws_ext_discount_amt#123, ws_ext_sales_price#124, ws_ext_wholesale_cost#125, ws_ext_list_price#126, d_year#129] +Arguments: [ws_bill_customer_sk#122 ASC NULLS FIRST], false, 0 (107) ReusedExchange [Reuses operator id: 12] -Output [8]: [c_customer_sk#147, c_customer_id#148, c_first_name#149, c_last_name#150, c_preferred_cust_flag#151, c_birth_country#152, c_login#153, c_email_address#154] +Output [8]: [c_customer_sk#130, c_customer_id#131, c_first_name#132, c_last_name#133, c_preferred_cust_flag#134, c_birth_country#135, c_login#136, c_email_address#137] (108) Sort [codegen id : 49] -Input [8]: [c_customer_sk#147, c_customer_id#148, c_first_name#149, c_last_name#150, c_preferred_cust_flag#151, c_birth_country#152, c_login#153, c_email_address#154] -Arguments: [c_customer_sk#147 ASC NULLS FIRST], false, 0 +Input [8]: [c_customer_sk#130, c_customer_id#131, c_first_name#132, c_last_name#133, c_preferred_cust_flag#134, c_birth_country#135, c_login#136, c_email_address#137] +Arguments: [c_customer_sk#130 ASC NULLS FIRST], false, 0 (109) SortMergeJoin [codegen id : 50] -Left keys [1]: [ws_bill_customer_sk#138] -Right keys [1]: [c_customer_sk#147] +Left keys [1]: [ws_bill_customer_sk#122] +Right keys [1]: [c_customer_sk#130] Join condition: None (110) Project [codegen id : 50] -Output [12]: [c_customer_id#148, c_first_name#149, c_last_name#150, c_preferred_cust_flag#151, c_birth_country#152, c_login#153, c_email_address#154, ws_ext_discount_amt#139, ws_ext_sales_price#140, ws_ext_wholesale_cost#141, ws_ext_list_price#142, d_year#145] -Input [14]: [ws_bill_customer_sk#138, ws_ext_discount_amt#139, ws_ext_sales_price#140, ws_ext_wholesale_cost#141, ws_ext_list_price#142, d_year#145, c_customer_sk#147, c_customer_id#148, c_first_name#149, c_last_name#150, c_preferred_cust_flag#151, c_birth_country#152, c_login#153, c_email_address#154] +Output [12]: [c_customer_id#131, c_first_name#132, c_last_name#133, c_preferred_cust_flag#134, c_birth_country#135, c_login#136, c_email_address#137, ws_ext_discount_amt#123, ws_ext_sales_price#124, ws_ext_wholesale_cost#125, ws_ext_list_price#126, d_year#129] +Input [14]: [ws_bill_customer_sk#122, ws_ext_discount_amt#123, ws_ext_sales_price#124, ws_ext_wholesale_cost#125, ws_ext_list_price#126, d_year#129, c_customer_sk#130, c_customer_id#131, c_first_name#132, c_last_name#133, c_preferred_cust_flag#134, c_birth_country#135, c_login#136, c_email_address#137] (111) HashAggregate [codegen id : 50] -Input [12]: [c_customer_id#148, c_first_name#149, c_last_name#150, c_preferred_cust_flag#151, c_birth_country#152, c_login#153, c_email_address#154, ws_ext_discount_amt#139, ws_ext_sales_price#140, ws_ext_wholesale_cost#141, ws_ext_list_price#142, d_year#145] -Keys [8]: [c_customer_id#148, c_first_name#149, c_last_name#150, c_preferred_cust_flag#151, c_birth_country#152, c_login#153, c_email_address#154, d_year#145] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#142 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#141 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#139 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#140 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [2]: [sum#155, isEmpty#156] -Results [10]: [c_customer_id#148, c_first_name#149, c_last_name#150, c_preferred_cust_flag#151, c_birth_country#152, c_login#153, c_email_address#154, d_year#145, sum#157, isEmpty#158] +Input [12]: [c_customer_id#131, c_first_name#132, c_last_name#133, c_preferred_cust_flag#134, c_birth_country#135, c_login#136, c_email_address#137, ws_ext_discount_amt#123, ws_ext_sales_price#124, ws_ext_wholesale_cost#125, ws_ext_list_price#126, d_year#129] +Keys [8]: [c_customer_id#131, c_first_name#132, c_last_name#133, c_preferred_cust_flag#134, c_birth_country#135, c_login#136, c_email_address#137, d_year#129] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#126 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#125 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#123 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#124 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [2]: [sum#138, isEmpty#139] +Results [10]: [c_customer_id#131, c_first_name#132, c_last_name#133, c_preferred_cust_flag#134, c_birth_country#135, c_login#136, c_email_address#137, d_year#129, sum#140, isEmpty#141] (112) Exchange -Input [10]: [c_customer_id#148, c_first_name#149, c_last_name#150, c_preferred_cust_flag#151, c_birth_country#152, c_login#153, c_email_address#154, d_year#145, sum#157, isEmpty#158] -Arguments: hashpartitioning(c_customer_id#148, c_first_name#149, c_last_name#150, c_preferred_cust_flag#151, c_birth_country#152, c_login#153, c_email_address#154, d_year#145, 5), ENSURE_REQUIREMENTS, [id=#159] +Input [10]: [c_customer_id#131, c_first_name#132, c_last_name#133, c_preferred_cust_flag#134, c_birth_country#135, c_login#136, c_email_address#137, d_year#129, sum#140, isEmpty#141] +Arguments: hashpartitioning(c_customer_id#131, c_first_name#132, c_last_name#133, c_preferred_cust_flag#134, c_birth_country#135, c_login#136, c_email_address#137, d_year#129, 5), ENSURE_REQUIREMENTS, [plan_id=18] (113) HashAggregate [codegen id : 51] -Input [10]: [c_customer_id#148, c_first_name#149, c_last_name#150, c_preferred_cust_flag#151, c_birth_country#152, c_login#153, c_email_address#154, d_year#145, sum#157, isEmpty#158] -Keys [8]: [c_customer_id#148, c_first_name#149, c_last_name#150, c_preferred_cust_flag#151, c_birth_country#152, c_login#153, c_email_address#154, d_year#145] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#142 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#141 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#139 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#140 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#142 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#141 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#139 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#140 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#134] -Results [2]: [c_customer_id#148 AS customer_id#160, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#142 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#141 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#139 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#140 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#134 AS year_total#161] +Input [10]: [c_customer_id#131, c_first_name#132, c_last_name#133, c_preferred_cust_flag#134, c_birth_country#135, c_login#136, c_email_address#137, d_year#129, sum#140, isEmpty#141] +Keys [8]: [c_customer_id#131, c_first_name#132, c_last_name#133, c_preferred_cust_flag#134, c_birth_country#135, c_login#136, c_email_address#137, d_year#129] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#126 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#125 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#123 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#124 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#126 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#125 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#123 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#124 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#119] +Results [2]: [c_customer_id#131 AS customer_id#142, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#126 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#125 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#123 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#124 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#119 AS year_total#143] (114) Exchange -Input [2]: [customer_id#160, year_total#161] -Arguments: hashpartitioning(customer_id#160, 5), ENSURE_REQUIREMENTS, [id=#162] +Input [2]: [customer_id#142, year_total#143] +Arguments: hashpartitioning(customer_id#142, 5), ENSURE_REQUIREMENTS, [plan_id=19] (115) Sort [codegen id : 52] -Input [2]: [customer_id#160, year_total#161] -Arguments: [customer_id#160 ASC NULLS FIRST], false, 0 +Input [2]: [customer_id#142, year_total#143] +Arguments: [customer_id#142 ASC NULLS FIRST], false, 0 (116) SortMergeJoin [codegen id : 53] -Left keys [1]: [customer_id#26] -Right keys [1]: [customer_id#160] -Join condition: (CASE WHEN (year_total#85 > 0.000000) THEN CheckOverflow((promote_precision(year_total#110) / promote_precision(year_total#85)), DecimalType(38,14)) END > CASE WHEN (year_total#136 > 0.000000) THEN CheckOverflow((promote_precision(year_total#161) / promote_precision(year_total#136)), DecimalType(38,14)) END) +Left keys [1]: [customer_id#23] +Right keys [1]: [customer_id#142] +Join condition: (CASE WHEN (year_total#76 > 0.000000) THEN CheckOverflow((promote_precision(year_total#98) / promote_precision(year_total#76)), DecimalType(38,14)) END > CASE WHEN (year_total#121 > 0.000000) THEN CheckOverflow((promote_precision(year_total#143) / promote_precision(year_total#121)), DecimalType(38,14)) END) (117) Project [codegen id : 53] -Output [7]: [customer_id#52, customer_first_name#53, customer_last_name#54, customer_preferred_cust_flag#55, customer_birth_country#56, customer_login#57, customer_email_address#58] -Input [13]: [customer_id#26, customer_id#52, customer_first_name#53, customer_last_name#54, customer_preferred_cust_flag#55, customer_birth_country#56, customer_login#57, customer_email_address#58, year_total#85, year_total#110, year_total#136, customer_id#160, year_total#161] +Output [7]: [customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52] +Input [13]: [customer_id#23, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#76, year_total#98, year_total#121, customer_id#142, year_total#143] (118) TakeOrderedAndProject -Input [7]: [customer_id#52, customer_first_name#53, customer_last_name#54, customer_preferred_cust_flag#55, customer_birth_country#56, customer_login#57, customer_email_address#58] -Arguments: 100, [customer_id#52 ASC NULLS FIRST, customer_first_name#53 ASC NULLS FIRST, customer_last_name#54 ASC NULLS FIRST, customer_preferred_cust_flag#55 ASC NULLS FIRST, customer_birth_country#56 ASC NULLS FIRST, customer_login#57 ASC NULLS FIRST, customer_email_address#58 ASC NULLS FIRST], [customer_id#52, customer_first_name#53, customer_last_name#54, customer_preferred_cust_flag#55, customer_birth_country#56, customer_login#57, customer_email_address#58] +Input [7]: [customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52] +Arguments: 100, [customer_id#46 ASC NULLS FIRST, customer_first_name#47 ASC NULLS FIRST, customer_last_name#48 ASC NULLS FIRST, customer_preferred_cust_flag#49 ASC NULLS FIRST, customer_birth_country#50 ASC NULLS FIRST, customer_login#51 ASC NULLS FIRST, customer_email_address#52 ASC NULLS FIRST], [customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52] ===== Subqueries ===== @@ -678,9 +678,9 @@ Condition : ((isnotnull(d_year#9) AND (d_year#9 = 2001)) AND isnotnull(d_date_sk (122) BroadcastExchange Input [2]: [d_date_sk#8, d_year#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#163] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=20] -Subquery:2 Hosting operator id = 22 Hosting Expression = ss_sold_date_sk#34 IN dynamicpruning#35 +Subquery:2 Hosting operator id = 22 Hosting Expression = ss_sold_date_sk#30 IN dynamicpruning#31 BroadcastExchange (126) +- * Filter (125) +- * ColumnarToRow (124) @@ -688,29 +688,29 @@ BroadcastExchange (126) (123) Scan parquet default.date_dim -Output [2]: [d_date_sk#36, d_year#37] +Output [2]: [d_date_sk#32, d_year#33] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] ReadSchema: struct (124) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#36, d_year#37] +Input [2]: [d_date_sk#32, d_year#33] (125) Filter [codegen id : 1] -Input [2]: [d_date_sk#36, d_year#37] -Condition : ((isnotnull(d_year#37) AND (d_year#37 = 2002)) AND isnotnull(d_date_sk#36)) +Input [2]: [d_date_sk#32, d_year#33] +Condition : ((isnotnull(d_year#33) AND (d_year#33 = 2002)) AND isnotnull(d_date_sk#32)) (126) BroadcastExchange -Input [2]: [d_date_sk#36, d_year#37] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#164] +Input [2]: [d_date_sk#32, d_year#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=21] -Subquery:3 Hosting operator id = 40 Hosting Expression = cs_sold_date_sk#66 IN dynamicpruning#7 +Subquery:3 Hosting operator id = 40 Hosting Expression = cs_sold_date_sk#59 IN dynamicpruning#7 -Subquery:4 Hosting operator id = 60 Hosting Expression = cs_sold_date_sk#92 IN dynamicpruning#35 +Subquery:4 Hosting operator id = 60 Hosting Expression = cs_sold_date_sk#82 IN dynamicpruning#31 -Subquery:5 Hosting operator id = 79 Hosting Expression = ws_sold_date_sk#117 IN dynamicpruning#7 +Subquery:5 Hosting operator id = 79 Hosting Expression = ws_sold_date_sk#104 IN dynamicpruning#7 -Subquery:6 Hosting operator id = 99 Hosting Expression = ws_sold_date_sk#143 IN dynamicpruning#35 +Subquery:6 Hosting operator id = 99 Hosting Expression = ws_sold_date_sk#127 IN dynamicpruning#31 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt index b0af6fb5e1627..5ff09a7763c0e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt @@ -140,7 +140,7 @@ Condition : isnotnull(ss_customer_sk#9) (7) BroadcastExchange Input [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 3] Left keys [1]: [c_customer_sk#1] @@ -152,471 +152,471 @@ Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_f Input [14]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14] (10) ReusedExchange [Reuses operator id: 112] -Output [2]: [d_date_sk#17, d_year#18] +Output [2]: [d_date_sk#16, d_year#17] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_sold_date_sk#14] -Right keys [1]: [d_date_sk#17] +Right keys [1]: [d_date_sk#16] Join condition: None (12) Project [codegen id : 3] -Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#18] -Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14, d_date_sk#17, d_year#18] +Output [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#17] +Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14, d_date_sk#16, d_year#17] (13) HashAggregate [codegen id : 3] -Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#18] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#18] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#17] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [2]: [sum#19, isEmpty#20] -Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#18, sum#21, isEmpty#22] +Aggregate Attributes [2]: [sum#18, isEmpty#19] +Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#20, isEmpty#21] (14) Exchange -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#18, sum#21, isEmpty#22] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#18, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#20, isEmpty#21] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 24] -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#18, sum#21, isEmpty#22] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#18] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#20, isEmpty#21] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17] Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#24] -Results [2]: [c_customer_id#2 AS customer_id#25, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#24 AS year_total#26] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#22] +Results [2]: [c_customer_id#2 AS customer_id#23, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#22 AS year_total#24] (16) Filter [codegen id : 24] -Input [2]: [customer_id#25, year_total#26] -Condition : (isnotnull(year_total#26) AND (year_total#26 > 0.000000)) +Input [2]: [customer_id#23, year_total#24] +Condition : (isnotnull(year_total#24) AND (year_total#24 > 0.000000)) (17) Scan parquet default.customer -Output [8]: [c_customer_sk#27, c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34] +Output [8]: [c_customer_sk#25, c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (18) ColumnarToRow [codegen id : 6] -Input [8]: [c_customer_sk#27, c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34] +Input [8]: [c_customer_sk#25, c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32] (19) Filter [codegen id : 6] -Input [8]: [c_customer_sk#27, c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34] -Condition : (isnotnull(c_customer_sk#27) AND isnotnull(c_customer_id#28)) +Input [8]: [c_customer_sk#25, c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32] +Condition : (isnotnull(c_customer_sk#25) AND isnotnull(c_customer_id#26)) (20) Scan parquet default.store_sales -Output [6]: [ss_customer_sk#35, ss_ext_discount_amt#36, ss_ext_sales_price#37, ss_ext_wholesale_cost#38, ss_ext_list_price#39, ss_sold_date_sk#40] +Output [6]: [ss_customer_sk#33, ss_ext_discount_amt#34, ss_ext_sales_price#35, ss_ext_wholesale_cost#36, ss_ext_list_price#37, ss_sold_date_sk#38] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#40), dynamicpruningexpression(ss_sold_date_sk#40 IN dynamicpruning#41)] +PartitionFilters: [isnotnull(ss_sold_date_sk#38), dynamicpruningexpression(ss_sold_date_sk#38 IN dynamicpruning#39)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 4] -Input [6]: [ss_customer_sk#35, ss_ext_discount_amt#36, ss_ext_sales_price#37, ss_ext_wholesale_cost#38, ss_ext_list_price#39, ss_sold_date_sk#40] +Input [6]: [ss_customer_sk#33, ss_ext_discount_amt#34, ss_ext_sales_price#35, ss_ext_wholesale_cost#36, ss_ext_list_price#37, ss_sold_date_sk#38] (22) Filter [codegen id : 4] -Input [6]: [ss_customer_sk#35, ss_ext_discount_amt#36, ss_ext_sales_price#37, ss_ext_wholesale_cost#38, ss_ext_list_price#39, ss_sold_date_sk#40] -Condition : isnotnull(ss_customer_sk#35) +Input [6]: [ss_customer_sk#33, ss_ext_discount_amt#34, ss_ext_sales_price#35, ss_ext_wholesale_cost#36, ss_ext_list_price#37, ss_sold_date_sk#38] +Condition : isnotnull(ss_customer_sk#33) (23) BroadcastExchange -Input [6]: [ss_customer_sk#35, ss_ext_discount_amt#36, ss_ext_sales_price#37, ss_ext_wholesale_cost#38, ss_ext_list_price#39, ss_sold_date_sk#40] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#42] +Input [6]: [ss_customer_sk#33, ss_ext_discount_amt#34, ss_ext_sales_price#35, ss_ext_wholesale_cost#36, ss_ext_list_price#37, ss_sold_date_sk#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [c_customer_sk#27] -Right keys [1]: [ss_customer_sk#35] +Left keys [1]: [c_customer_sk#25] +Right keys [1]: [ss_customer_sk#33] Join condition: None (25) Project [codegen id : 6] -Output [12]: [c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34, ss_ext_discount_amt#36, ss_ext_sales_price#37, ss_ext_wholesale_cost#38, ss_ext_list_price#39, ss_sold_date_sk#40] -Input [14]: [c_customer_sk#27, c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34, ss_customer_sk#35, ss_ext_discount_amt#36, ss_ext_sales_price#37, ss_ext_wholesale_cost#38, ss_ext_list_price#39, ss_sold_date_sk#40] +Output [12]: [c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32, ss_ext_discount_amt#34, ss_ext_sales_price#35, ss_ext_wholesale_cost#36, ss_ext_list_price#37, ss_sold_date_sk#38] +Input [14]: [c_customer_sk#25, c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32, ss_customer_sk#33, ss_ext_discount_amt#34, ss_ext_sales_price#35, ss_ext_wholesale_cost#36, ss_ext_list_price#37, ss_sold_date_sk#38] (26) ReusedExchange [Reuses operator id: 116] -Output [2]: [d_date_sk#43, d_year#44] +Output [2]: [d_date_sk#40, d_year#41] (27) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_sold_date_sk#40] -Right keys [1]: [d_date_sk#43] +Left keys [1]: [ss_sold_date_sk#38] +Right keys [1]: [d_date_sk#40] Join condition: None (28) Project [codegen id : 6] -Output [12]: [c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34, ss_ext_discount_amt#36, ss_ext_sales_price#37, ss_ext_wholesale_cost#38, ss_ext_list_price#39, d_year#44] -Input [14]: [c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34, ss_ext_discount_amt#36, ss_ext_sales_price#37, ss_ext_wholesale_cost#38, ss_ext_list_price#39, ss_sold_date_sk#40, d_date_sk#43, d_year#44] +Output [12]: [c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32, ss_ext_discount_amt#34, ss_ext_sales_price#35, ss_ext_wholesale_cost#36, ss_ext_list_price#37, d_year#41] +Input [14]: [c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32, ss_ext_discount_amt#34, ss_ext_sales_price#35, ss_ext_wholesale_cost#36, ss_ext_list_price#37, ss_sold_date_sk#38, d_date_sk#40, d_year#41] (29) HashAggregate [codegen id : 6] -Input [12]: [c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34, ss_ext_discount_amt#36, ss_ext_sales_price#37, ss_ext_wholesale_cost#38, ss_ext_list_price#39, d_year#44] -Keys [8]: [c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34, d_year#44] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#39 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#38 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#36 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#37 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [2]: [sum#45, isEmpty#46] -Results [10]: [c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34, d_year#44, sum#47, isEmpty#48] +Input [12]: [c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32, ss_ext_discount_amt#34, ss_ext_sales_price#35, ss_ext_wholesale_cost#36, ss_ext_list_price#37, d_year#41] +Keys [8]: [c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32, d_year#41] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#37 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#36 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#34 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#35 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [2]: [sum#42, isEmpty#43] +Results [10]: [c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32, d_year#41, sum#44, isEmpty#45] (30) Exchange -Input [10]: [c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34, d_year#44, sum#47, isEmpty#48] -Arguments: hashpartitioning(c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34, d_year#44, 5), ENSURE_REQUIREMENTS, [id=#49] +Input [10]: [c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32, d_year#41, sum#44, isEmpty#45] +Arguments: hashpartitioning(c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32, d_year#41, 5), ENSURE_REQUIREMENTS, [plan_id=4] (31) HashAggregate [codegen id : 7] -Input [10]: [c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34, d_year#44, sum#47, isEmpty#48] -Keys [8]: [c_customer_id#28, c_first_name#29, c_last_name#30, c_preferred_cust_flag#31, c_birth_country#32, c_login#33, c_email_address#34, d_year#44] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#39 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#38 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#36 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#37 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#39 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#38 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#36 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#37 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#24] -Results [8]: [c_customer_id#28 AS customer_id#50, c_first_name#29 AS customer_first_name#51, c_last_name#30 AS customer_last_name#52, c_preferred_cust_flag#31 AS customer_preferred_cust_flag#53, c_birth_country#32 AS customer_birth_country#54, c_login#33 AS customer_login#55, c_email_address#34 AS customer_email_address#56, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#39 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#38 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#36 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#37 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#24 AS year_total#57] +Input [10]: [c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32, d_year#41, sum#44, isEmpty#45] +Keys [8]: [c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32, d_year#41] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#37 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#36 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#34 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#35 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#37 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#36 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#34 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#35 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#22] +Results [8]: [c_customer_id#26 AS customer_id#46, c_first_name#27 AS customer_first_name#47, c_last_name#28 AS customer_last_name#48, c_preferred_cust_flag#29 AS customer_preferred_cust_flag#49, c_birth_country#30 AS customer_birth_country#50, c_login#31 AS customer_login#51, c_email_address#32 AS customer_email_address#52, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#37 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#36 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#34 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#35 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#22 AS year_total#53] (32) BroadcastExchange -Input [8]: [customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#57] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#58] +Input [8]: [customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=5] (33) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [customer_id#25] -Right keys [1]: [customer_id#50] +Left keys [1]: [customer_id#23] +Right keys [1]: [customer_id#46] Join condition: None (34) Scan parquet default.customer -Output [8]: [c_customer_sk#59, c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66] +Output [8]: [c_customer_sk#54, c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (35) ColumnarToRow [codegen id : 10] -Input [8]: [c_customer_sk#59, c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66] +Input [8]: [c_customer_sk#54, c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61] (36) Filter [codegen id : 10] -Input [8]: [c_customer_sk#59, c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66] -Condition : (isnotnull(c_customer_sk#59) AND isnotnull(c_customer_id#60)) +Input [8]: [c_customer_sk#54, c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61] +Condition : (isnotnull(c_customer_sk#54) AND isnotnull(c_customer_id#55)) (37) Scan parquet default.catalog_sales -Output [6]: [cs_bill_customer_sk#67, cs_ext_discount_amt#68, cs_ext_sales_price#69, cs_ext_wholesale_cost#70, cs_ext_list_price#71, cs_sold_date_sk#72] +Output [6]: [cs_bill_customer_sk#62, cs_ext_discount_amt#63, cs_ext_sales_price#64, cs_ext_wholesale_cost#65, cs_ext_list_price#66, cs_sold_date_sk#67] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#72), dynamicpruningexpression(cs_sold_date_sk#72 IN dynamicpruning#15)] +PartitionFilters: [isnotnull(cs_sold_date_sk#67), dynamicpruningexpression(cs_sold_date_sk#67 IN dynamicpruning#15)] PushedFilters: [IsNotNull(cs_bill_customer_sk)] ReadSchema: struct (38) ColumnarToRow [codegen id : 8] -Input [6]: [cs_bill_customer_sk#67, cs_ext_discount_amt#68, cs_ext_sales_price#69, cs_ext_wholesale_cost#70, cs_ext_list_price#71, cs_sold_date_sk#72] +Input [6]: [cs_bill_customer_sk#62, cs_ext_discount_amt#63, cs_ext_sales_price#64, cs_ext_wholesale_cost#65, cs_ext_list_price#66, cs_sold_date_sk#67] (39) Filter [codegen id : 8] -Input [6]: [cs_bill_customer_sk#67, cs_ext_discount_amt#68, cs_ext_sales_price#69, cs_ext_wholesale_cost#70, cs_ext_list_price#71, cs_sold_date_sk#72] -Condition : isnotnull(cs_bill_customer_sk#67) +Input [6]: [cs_bill_customer_sk#62, cs_ext_discount_amt#63, cs_ext_sales_price#64, cs_ext_wholesale_cost#65, cs_ext_list_price#66, cs_sold_date_sk#67] +Condition : isnotnull(cs_bill_customer_sk#62) (40) BroadcastExchange -Input [6]: [cs_bill_customer_sk#67, cs_ext_discount_amt#68, cs_ext_sales_price#69, cs_ext_wholesale_cost#70, cs_ext_list_price#71, cs_sold_date_sk#72] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#73] +Input [6]: [cs_bill_customer_sk#62, cs_ext_discount_amt#63, cs_ext_sales_price#64, cs_ext_wholesale_cost#65, cs_ext_list_price#66, cs_sold_date_sk#67] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] (41) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [c_customer_sk#59] -Right keys [1]: [cs_bill_customer_sk#67] +Left keys [1]: [c_customer_sk#54] +Right keys [1]: [cs_bill_customer_sk#62] Join condition: None (42) Project [codegen id : 10] -Output [12]: [c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66, cs_ext_discount_amt#68, cs_ext_sales_price#69, cs_ext_wholesale_cost#70, cs_ext_list_price#71, cs_sold_date_sk#72] -Input [14]: [c_customer_sk#59, c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66, cs_bill_customer_sk#67, cs_ext_discount_amt#68, cs_ext_sales_price#69, cs_ext_wholesale_cost#70, cs_ext_list_price#71, cs_sold_date_sk#72] +Output [12]: [c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61, cs_ext_discount_amt#63, cs_ext_sales_price#64, cs_ext_wholesale_cost#65, cs_ext_list_price#66, cs_sold_date_sk#67] +Input [14]: [c_customer_sk#54, c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61, cs_bill_customer_sk#62, cs_ext_discount_amt#63, cs_ext_sales_price#64, cs_ext_wholesale_cost#65, cs_ext_list_price#66, cs_sold_date_sk#67] (43) ReusedExchange [Reuses operator id: 112] -Output [2]: [d_date_sk#74, d_year#75] +Output [2]: [d_date_sk#68, d_year#69] (44) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#72] -Right keys [1]: [d_date_sk#74] +Left keys [1]: [cs_sold_date_sk#67] +Right keys [1]: [d_date_sk#68] Join condition: None (45) Project [codegen id : 10] -Output [12]: [c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66, cs_ext_discount_amt#68, cs_ext_sales_price#69, cs_ext_wholesale_cost#70, cs_ext_list_price#71, d_year#75] -Input [14]: [c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66, cs_ext_discount_amt#68, cs_ext_sales_price#69, cs_ext_wholesale_cost#70, cs_ext_list_price#71, cs_sold_date_sk#72, d_date_sk#74, d_year#75] +Output [12]: [c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61, cs_ext_discount_amt#63, cs_ext_sales_price#64, cs_ext_wholesale_cost#65, cs_ext_list_price#66, d_year#69] +Input [14]: [c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61, cs_ext_discount_amt#63, cs_ext_sales_price#64, cs_ext_wholesale_cost#65, cs_ext_list_price#66, cs_sold_date_sk#67, d_date_sk#68, d_year#69] (46) HashAggregate [codegen id : 10] -Input [12]: [c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66, cs_ext_discount_amt#68, cs_ext_sales_price#69, cs_ext_wholesale_cost#70, cs_ext_list_price#71, d_year#75] -Keys [8]: [c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66, d_year#75] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#71 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#70 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#68 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#69 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [2]: [sum#76, isEmpty#77] -Results [10]: [c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66, d_year#75, sum#78, isEmpty#79] +Input [12]: [c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61, cs_ext_discount_amt#63, cs_ext_sales_price#64, cs_ext_wholesale_cost#65, cs_ext_list_price#66, d_year#69] +Keys [8]: [c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61, d_year#69] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#66 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#65 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#63 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#64 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [2]: [sum#70, isEmpty#71] +Results [10]: [c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61, d_year#69, sum#72, isEmpty#73] (47) Exchange -Input [10]: [c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66, d_year#75, sum#78, isEmpty#79] -Arguments: hashpartitioning(c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66, d_year#75, 5), ENSURE_REQUIREMENTS, [id=#80] +Input [10]: [c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61, d_year#69, sum#72, isEmpty#73] +Arguments: hashpartitioning(c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61, d_year#69, 5), ENSURE_REQUIREMENTS, [plan_id=7] (48) HashAggregate [codegen id : 11] -Input [10]: [c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66, d_year#75, sum#78, isEmpty#79] -Keys [8]: [c_customer_id#60, c_first_name#61, c_last_name#62, c_preferred_cust_flag#63, c_birth_country#64, c_login#65, c_email_address#66, d_year#75] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#71 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#70 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#68 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#69 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#71 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#70 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#68 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#69 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#81] -Results [2]: [c_customer_id#60 AS customer_id#82, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#71 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#70 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#68 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#69 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#81 AS year_total#83] +Input [10]: [c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61, d_year#69, sum#72, isEmpty#73] +Keys [8]: [c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61, d_year#69] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#66 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#65 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#63 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#64 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#66 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#65 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#63 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#64 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#74] +Results [2]: [c_customer_id#55 AS customer_id#75, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#66 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#65 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#63 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#64 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#74 AS year_total#76] (49) Filter [codegen id : 11] -Input [2]: [customer_id#82, year_total#83] -Condition : (isnotnull(year_total#83) AND (year_total#83 > 0.000000)) +Input [2]: [customer_id#75, year_total#76] +Condition : (isnotnull(year_total#76) AND (year_total#76 > 0.000000)) (50) BroadcastExchange -Input [2]: [customer_id#82, year_total#83] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#84] +Input [2]: [customer_id#75, year_total#76] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=8] (51) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [customer_id#25] -Right keys [1]: [customer_id#82] +Left keys [1]: [customer_id#23] +Right keys [1]: [customer_id#75] Join condition: None (52) Project [codegen id : 24] -Output [11]: [customer_id#25, year_total#26, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#57, year_total#83] -Input [12]: [customer_id#25, year_total#26, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#57, customer_id#82, year_total#83] +Output [11]: [customer_id#23, year_total#24, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53, year_total#76] +Input [12]: [customer_id#23, year_total#24, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53, customer_id#75, year_total#76] (53) Scan parquet default.customer -Output [8]: [c_customer_sk#85, c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92] +Output [8]: [c_customer_sk#77, c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (54) ColumnarToRow [codegen id : 14] -Input [8]: [c_customer_sk#85, c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92] +Input [8]: [c_customer_sk#77, c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84] (55) Filter [codegen id : 14] -Input [8]: [c_customer_sk#85, c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92] -Condition : (isnotnull(c_customer_sk#85) AND isnotnull(c_customer_id#86)) +Input [8]: [c_customer_sk#77, c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84] +Condition : (isnotnull(c_customer_sk#77) AND isnotnull(c_customer_id#78)) (56) Scan parquet default.catalog_sales -Output [6]: [cs_bill_customer_sk#93, cs_ext_discount_amt#94, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] +Output [6]: [cs_bill_customer_sk#85, cs_ext_discount_amt#86, cs_ext_sales_price#87, cs_ext_wholesale_cost#88, cs_ext_list_price#89, cs_sold_date_sk#90] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#98), dynamicpruningexpression(cs_sold_date_sk#98 IN dynamicpruning#41)] +PartitionFilters: [isnotnull(cs_sold_date_sk#90), dynamicpruningexpression(cs_sold_date_sk#90 IN dynamicpruning#39)] PushedFilters: [IsNotNull(cs_bill_customer_sk)] ReadSchema: struct (57) ColumnarToRow [codegen id : 12] -Input [6]: [cs_bill_customer_sk#93, cs_ext_discount_amt#94, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] +Input [6]: [cs_bill_customer_sk#85, cs_ext_discount_amt#86, cs_ext_sales_price#87, cs_ext_wholesale_cost#88, cs_ext_list_price#89, cs_sold_date_sk#90] (58) Filter [codegen id : 12] -Input [6]: [cs_bill_customer_sk#93, cs_ext_discount_amt#94, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] -Condition : isnotnull(cs_bill_customer_sk#93) +Input [6]: [cs_bill_customer_sk#85, cs_ext_discount_amt#86, cs_ext_sales_price#87, cs_ext_wholesale_cost#88, cs_ext_list_price#89, cs_sold_date_sk#90] +Condition : isnotnull(cs_bill_customer_sk#85) (59) BroadcastExchange -Input [6]: [cs_bill_customer_sk#93, cs_ext_discount_amt#94, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#99] +Input [6]: [cs_bill_customer_sk#85, cs_ext_discount_amt#86, cs_ext_sales_price#87, cs_ext_wholesale_cost#88, cs_ext_list_price#89, cs_sold_date_sk#90] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] (60) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [c_customer_sk#85] -Right keys [1]: [cs_bill_customer_sk#93] +Left keys [1]: [c_customer_sk#77] +Right keys [1]: [cs_bill_customer_sk#85] Join condition: None (61) Project [codegen id : 14] -Output [12]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, cs_ext_discount_amt#94, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] -Input [14]: [c_customer_sk#85, c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, cs_bill_customer_sk#93, cs_ext_discount_amt#94, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98] +Output [12]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, cs_ext_discount_amt#86, cs_ext_sales_price#87, cs_ext_wholesale_cost#88, cs_ext_list_price#89, cs_sold_date_sk#90] +Input [14]: [c_customer_sk#77, c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, cs_bill_customer_sk#85, cs_ext_discount_amt#86, cs_ext_sales_price#87, cs_ext_wholesale_cost#88, cs_ext_list_price#89, cs_sold_date_sk#90] (62) ReusedExchange [Reuses operator id: 116] -Output [2]: [d_date_sk#100, d_year#101] +Output [2]: [d_date_sk#91, d_year#92] (63) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [cs_sold_date_sk#98] -Right keys [1]: [d_date_sk#100] +Left keys [1]: [cs_sold_date_sk#90] +Right keys [1]: [d_date_sk#91] Join condition: None (64) Project [codegen id : 14] -Output [12]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, cs_ext_discount_amt#94, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, d_year#101] -Input [14]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, cs_ext_discount_amt#94, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, cs_sold_date_sk#98, d_date_sk#100, d_year#101] +Output [12]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, cs_ext_discount_amt#86, cs_ext_sales_price#87, cs_ext_wholesale_cost#88, cs_ext_list_price#89, d_year#92] +Input [14]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, cs_ext_discount_amt#86, cs_ext_sales_price#87, cs_ext_wholesale_cost#88, cs_ext_list_price#89, cs_sold_date_sk#90, d_date_sk#91, d_year#92] (65) HashAggregate [codegen id : 14] -Input [12]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, cs_ext_discount_amt#94, cs_ext_sales_price#95, cs_ext_wholesale_cost#96, cs_ext_list_price#97, d_year#101] -Keys [8]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#101] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#94 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [2]: [sum#102, isEmpty#103] -Results [10]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#101, sum#104, isEmpty#105] +Input [12]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, cs_ext_discount_amt#86, cs_ext_sales_price#87, cs_ext_wholesale_cost#88, cs_ext_list_price#89, d_year#92] +Keys [8]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#92] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#89 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#88 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#86 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#87 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [2]: [sum#93, isEmpty#94] +Results [10]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#92, sum#95, isEmpty#96] (66) Exchange -Input [10]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#101, sum#104, isEmpty#105] -Arguments: hashpartitioning(c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#101, 5), ENSURE_REQUIREMENTS, [id=#106] +Input [10]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#92, sum#95, isEmpty#96] +Arguments: hashpartitioning(c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#92, 5), ENSURE_REQUIREMENTS, [plan_id=10] (67) HashAggregate [codegen id : 15] -Input [10]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#101, sum#104, isEmpty#105] -Keys [8]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#101] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#94 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#94 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#81] -Results [2]: [c_customer_id#86 AS customer_id#107, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#97 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#96 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#94 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#95 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#81 AS year_total#108] +Input [10]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#92, sum#95, isEmpty#96] +Keys [8]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#92] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#89 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#88 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#86 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#87 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#89 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#88 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#86 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#87 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#74] +Results [2]: [c_customer_id#78 AS customer_id#97, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#89 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#88 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#86 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#87 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#74 AS year_total#98] (68) BroadcastExchange -Input [2]: [customer_id#107, year_total#108] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#109] +Input [2]: [customer_id#97, year_total#98] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=11] (69) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [customer_id#25] -Right keys [1]: [customer_id#107] -Join condition: (CASE WHEN (year_total#83 > 0.000000) THEN CheckOverflow((promote_precision(year_total#108) / promote_precision(year_total#83)), DecimalType(38,14)) END > CASE WHEN (year_total#26 > 0.000000) THEN CheckOverflow((promote_precision(year_total#57) / promote_precision(year_total#26)), DecimalType(38,14)) END) +Left keys [1]: [customer_id#23] +Right keys [1]: [customer_id#97] +Join condition: (CASE WHEN (year_total#76 > 0.000000) THEN CheckOverflow((promote_precision(year_total#98) / promote_precision(year_total#76)), DecimalType(38,14)) END > CASE WHEN (year_total#24 > 0.000000) THEN CheckOverflow((promote_precision(year_total#53) / promote_precision(year_total#24)), DecimalType(38,14)) END) (70) Project [codegen id : 24] -Output [10]: [customer_id#25, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#83, year_total#108] -Input [13]: [customer_id#25, year_total#26, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#57, year_total#83, customer_id#107, year_total#108] +Output [10]: [customer_id#23, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#76, year_total#98] +Input [13]: [customer_id#23, year_total#24, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53, year_total#76, customer_id#97, year_total#98] (71) Scan parquet default.customer -Output [8]: [c_customer_sk#110, c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117] +Output [8]: [c_customer_sk#99, c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (72) ColumnarToRow [codegen id : 18] -Input [8]: [c_customer_sk#110, c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117] +Input [8]: [c_customer_sk#99, c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106] (73) Filter [codegen id : 18] -Input [8]: [c_customer_sk#110, c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117] -Condition : (isnotnull(c_customer_sk#110) AND isnotnull(c_customer_id#111)) +Input [8]: [c_customer_sk#99, c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106] +Condition : (isnotnull(c_customer_sk#99) AND isnotnull(c_customer_id#100)) (74) Scan parquet default.web_sales -Output [6]: [ws_bill_customer_sk#118, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] +Output [6]: [ws_bill_customer_sk#107, ws_ext_discount_amt#108, ws_ext_sales_price#109, ws_ext_wholesale_cost#110, ws_ext_list_price#111, ws_sold_date_sk#112] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#123), dynamicpruningexpression(ws_sold_date_sk#123 IN dynamicpruning#15)] +PartitionFilters: [isnotnull(ws_sold_date_sk#112), dynamicpruningexpression(ws_sold_date_sk#112 IN dynamicpruning#15)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (75) ColumnarToRow [codegen id : 16] -Input [6]: [ws_bill_customer_sk#118, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] +Input [6]: [ws_bill_customer_sk#107, ws_ext_discount_amt#108, ws_ext_sales_price#109, ws_ext_wholesale_cost#110, ws_ext_list_price#111, ws_sold_date_sk#112] (76) Filter [codegen id : 16] -Input [6]: [ws_bill_customer_sk#118, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] -Condition : isnotnull(ws_bill_customer_sk#118) +Input [6]: [ws_bill_customer_sk#107, ws_ext_discount_amt#108, ws_ext_sales_price#109, ws_ext_wholesale_cost#110, ws_ext_list_price#111, ws_sold_date_sk#112] +Condition : isnotnull(ws_bill_customer_sk#107) (77) BroadcastExchange -Input [6]: [ws_bill_customer_sk#118, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#124] +Input [6]: [ws_bill_customer_sk#107, ws_ext_discount_amt#108, ws_ext_sales_price#109, ws_ext_wholesale_cost#110, ws_ext_list_price#111, ws_sold_date_sk#112] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] (78) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [c_customer_sk#110] -Right keys [1]: [ws_bill_customer_sk#118] +Left keys [1]: [c_customer_sk#99] +Right keys [1]: [ws_bill_customer_sk#107] Join condition: None (79) Project [codegen id : 18] -Output [12]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] -Input [14]: [c_customer_sk#110, c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, ws_bill_customer_sk#118, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123] +Output [12]: [c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106, ws_ext_discount_amt#108, ws_ext_sales_price#109, ws_ext_wholesale_cost#110, ws_ext_list_price#111, ws_sold_date_sk#112] +Input [14]: [c_customer_sk#99, c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106, ws_bill_customer_sk#107, ws_ext_discount_amt#108, ws_ext_sales_price#109, ws_ext_wholesale_cost#110, ws_ext_list_price#111, ws_sold_date_sk#112] (80) ReusedExchange [Reuses operator id: 112] -Output [2]: [d_date_sk#125, d_year#126] +Output [2]: [d_date_sk#113, d_year#114] (81) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ws_sold_date_sk#123] -Right keys [1]: [d_date_sk#125] +Left keys [1]: [ws_sold_date_sk#112] +Right keys [1]: [d_date_sk#113] Join condition: None (82) Project [codegen id : 18] -Output [12]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, d_year#126] -Input [14]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, ws_sold_date_sk#123, d_date_sk#125, d_year#126] +Output [12]: [c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106, ws_ext_discount_amt#108, ws_ext_sales_price#109, ws_ext_wholesale_cost#110, ws_ext_list_price#111, d_year#114] +Input [14]: [c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106, ws_ext_discount_amt#108, ws_ext_sales_price#109, ws_ext_wholesale_cost#110, ws_ext_list_price#111, ws_sold_date_sk#112, d_date_sk#113, d_year#114] (83) HashAggregate [codegen id : 18] -Input [12]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, ws_ext_discount_amt#119, ws_ext_sales_price#120, ws_ext_wholesale_cost#121, ws_ext_list_price#122, d_year#126] -Keys [8]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [2]: [sum#127, isEmpty#128] -Results [10]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126, sum#129, isEmpty#130] +Input [12]: [c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106, ws_ext_discount_amt#108, ws_ext_sales_price#109, ws_ext_wholesale_cost#110, ws_ext_list_price#111, d_year#114] +Keys [8]: [c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106, d_year#114] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#111 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#110 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#108 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#109 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [2]: [sum#115, isEmpty#116] +Results [10]: [c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106, d_year#114, sum#117, isEmpty#118] (84) Exchange -Input [10]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126, sum#129, isEmpty#130] -Arguments: hashpartitioning(c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126, 5), ENSURE_REQUIREMENTS, [id=#131] +Input [10]: [c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106, d_year#114, sum#117, isEmpty#118] +Arguments: hashpartitioning(c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106, d_year#114, 5), ENSURE_REQUIREMENTS, [plan_id=13] (85) HashAggregate [codegen id : 19] -Input [10]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126, sum#129, isEmpty#130] -Keys [8]: [c_customer_id#111, c_first_name#112, c_last_name#113, c_preferred_cust_flag#114, c_birth_country#115, c_login#116, c_email_address#117, d_year#126] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#132] -Results [2]: [c_customer_id#111 AS customer_id#133, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#122 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#121 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#119 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#120 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#132 AS year_total#134] +Input [10]: [c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106, d_year#114, sum#117, isEmpty#118] +Keys [8]: [c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106, d_year#114] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#111 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#110 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#108 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#109 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#111 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#110 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#108 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#109 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#119] +Results [2]: [c_customer_id#100 AS customer_id#120, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#111 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#110 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#108 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#109 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#119 AS year_total#121] (86) Filter [codegen id : 19] -Input [2]: [customer_id#133, year_total#134] -Condition : (isnotnull(year_total#134) AND (year_total#134 > 0.000000)) +Input [2]: [customer_id#120, year_total#121] +Condition : (isnotnull(year_total#121) AND (year_total#121 > 0.000000)) (87) BroadcastExchange -Input [2]: [customer_id#133, year_total#134] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#135] +Input [2]: [customer_id#120, year_total#121] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=14] (88) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [customer_id#25] -Right keys [1]: [customer_id#133] +Left keys [1]: [customer_id#23] +Right keys [1]: [customer_id#120] Join condition: None (89) Project [codegen id : 24] -Output [11]: [customer_id#25, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#83, year_total#108, year_total#134] -Input [12]: [customer_id#25, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#83, year_total#108, customer_id#133, year_total#134] +Output [11]: [customer_id#23, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#76, year_total#98, year_total#121] +Input [12]: [customer_id#23, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#76, year_total#98, customer_id#120, year_total#121] (90) Scan parquet default.customer -Output [8]: [c_customer_sk#136, c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143] +Output [8]: [c_customer_sk#122, c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (91) ColumnarToRow [codegen id : 22] -Input [8]: [c_customer_sk#136, c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143] +Input [8]: [c_customer_sk#122, c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129] (92) Filter [codegen id : 22] -Input [8]: [c_customer_sk#136, c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143] -Condition : (isnotnull(c_customer_sk#136) AND isnotnull(c_customer_id#137)) +Input [8]: [c_customer_sk#122, c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129] +Condition : (isnotnull(c_customer_sk#122) AND isnotnull(c_customer_id#123)) (93) Scan parquet default.web_sales -Output [6]: [ws_bill_customer_sk#144, ws_ext_discount_amt#145, ws_ext_sales_price#146, ws_ext_wholesale_cost#147, ws_ext_list_price#148, ws_sold_date_sk#149] +Output [6]: [ws_bill_customer_sk#130, ws_ext_discount_amt#131, ws_ext_sales_price#132, ws_ext_wholesale_cost#133, ws_ext_list_price#134, ws_sold_date_sk#135] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#149), dynamicpruningexpression(ws_sold_date_sk#149 IN dynamicpruning#41)] +PartitionFilters: [isnotnull(ws_sold_date_sk#135), dynamicpruningexpression(ws_sold_date_sk#135 IN dynamicpruning#39)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (94) ColumnarToRow [codegen id : 20] -Input [6]: [ws_bill_customer_sk#144, ws_ext_discount_amt#145, ws_ext_sales_price#146, ws_ext_wholesale_cost#147, ws_ext_list_price#148, ws_sold_date_sk#149] +Input [6]: [ws_bill_customer_sk#130, ws_ext_discount_amt#131, ws_ext_sales_price#132, ws_ext_wholesale_cost#133, ws_ext_list_price#134, ws_sold_date_sk#135] (95) Filter [codegen id : 20] -Input [6]: [ws_bill_customer_sk#144, ws_ext_discount_amt#145, ws_ext_sales_price#146, ws_ext_wholesale_cost#147, ws_ext_list_price#148, ws_sold_date_sk#149] -Condition : isnotnull(ws_bill_customer_sk#144) +Input [6]: [ws_bill_customer_sk#130, ws_ext_discount_amt#131, ws_ext_sales_price#132, ws_ext_wholesale_cost#133, ws_ext_list_price#134, ws_sold_date_sk#135] +Condition : isnotnull(ws_bill_customer_sk#130) (96) BroadcastExchange -Input [6]: [ws_bill_customer_sk#144, ws_ext_discount_amt#145, ws_ext_sales_price#146, ws_ext_wholesale_cost#147, ws_ext_list_price#148, ws_sold_date_sk#149] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#150] +Input [6]: [ws_bill_customer_sk#130, ws_ext_discount_amt#131, ws_ext_sales_price#132, ws_ext_wholesale_cost#133, ws_ext_list_price#134, ws_sold_date_sk#135] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=15] (97) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [c_customer_sk#136] -Right keys [1]: [ws_bill_customer_sk#144] +Left keys [1]: [c_customer_sk#122] +Right keys [1]: [ws_bill_customer_sk#130] Join condition: None (98) Project [codegen id : 22] -Output [12]: [c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143, ws_ext_discount_amt#145, ws_ext_sales_price#146, ws_ext_wholesale_cost#147, ws_ext_list_price#148, ws_sold_date_sk#149] -Input [14]: [c_customer_sk#136, c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143, ws_bill_customer_sk#144, ws_ext_discount_amt#145, ws_ext_sales_price#146, ws_ext_wholesale_cost#147, ws_ext_list_price#148, ws_sold_date_sk#149] +Output [12]: [c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129, ws_ext_discount_amt#131, ws_ext_sales_price#132, ws_ext_wholesale_cost#133, ws_ext_list_price#134, ws_sold_date_sk#135] +Input [14]: [c_customer_sk#122, c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129, ws_bill_customer_sk#130, ws_ext_discount_amt#131, ws_ext_sales_price#132, ws_ext_wholesale_cost#133, ws_ext_list_price#134, ws_sold_date_sk#135] (99) ReusedExchange [Reuses operator id: 116] -Output [2]: [d_date_sk#151, d_year#152] +Output [2]: [d_date_sk#136, d_year#137] (100) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [ws_sold_date_sk#149] -Right keys [1]: [d_date_sk#151] +Left keys [1]: [ws_sold_date_sk#135] +Right keys [1]: [d_date_sk#136] Join condition: None (101) Project [codegen id : 22] -Output [12]: [c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143, ws_ext_discount_amt#145, ws_ext_sales_price#146, ws_ext_wholesale_cost#147, ws_ext_list_price#148, d_year#152] -Input [14]: [c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143, ws_ext_discount_amt#145, ws_ext_sales_price#146, ws_ext_wholesale_cost#147, ws_ext_list_price#148, ws_sold_date_sk#149, d_date_sk#151, d_year#152] +Output [12]: [c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129, ws_ext_discount_amt#131, ws_ext_sales_price#132, ws_ext_wholesale_cost#133, ws_ext_list_price#134, d_year#137] +Input [14]: [c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129, ws_ext_discount_amt#131, ws_ext_sales_price#132, ws_ext_wholesale_cost#133, ws_ext_list_price#134, ws_sold_date_sk#135, d_date_sk#136, d_year#137] (102) HashAggregate [codegen id : 22] -Input [12]: [c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143, ws_ext_discount_amt#145, ws_ext_sales_price#146, ws_ext_wholesale_cost#147, ws_ext_list_price#148, d_year#152] -Keys [8]: [c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143, d_year#152] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#148 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#147 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#145 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#146 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [2]: [sum#153, isEmpty#154] -Results [10]: [c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143, d_year#152, sum#155, isEmpty#156] +Input [12]: [c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129, ws_ext_discount_amt#131, ws_ext_sales_price#132, ws_ext_wholesale_cost#133, ws_ext_list_price#134, d_year#137] +Keys [8]: [c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129, d_year#137] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#134 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#133 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#131 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#132 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [2]: [sum#138, isEmpty#139] +Results [10]: [c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129, d_year#137, sum#140, isEmpty#141] (103) Exchange -Input [10]: [c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143, d_year#152, sum#155, isEmpty#156] -Arguments: hashpartitioning(c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143, d_year#152, 5), ENSURE_REQUIREMENTS, [id=#157] +Input [10]: [c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129, d_year#137, sum#140, isEmpty#141] +Arguments: hashpartitioning(c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129, d_year#137, 5), ENSURE_REQUIREMENTS, [plan_id=16] (104) HashAggregate [codegen id : 23] -Input [10]: [c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143, d_year#152, sum#155, isEmpty#156] -Keys [8]: [c_customer_id#137, c_first_name#138, c_last_name#139, c_preferred_cust_flag#140, c_birth_country#141, c_login#142, c_email_address#143, d_year#152] -Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#148 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#147 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#145 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#146 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#148 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#147 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#145 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#146 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#132] -Results [2]: [c_customer_id#137 AS customer_id#158, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#148 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#147 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#145 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#146 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#132 AS year_total#159] +Input [10]: [c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129, d_year#137, sum#140, isEmpty#141] +Keys [8]: [c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129, d_year#137] +Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#134 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#133 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#131 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#132 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#134 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#133 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#131 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#132 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#119] +Results [2]: [c_customer_id#123 AS customer_id#142, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#134 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#133 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#131 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#132 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#119 AS year_total#143] (105) BroadcastExchange -Input [2]: [customer_id#158, year_total#159] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#160] +Input [2]: [customer_id#142, year_total#143] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=17] (106) BroadcastHashJoin [codegen id : 24] -Left keys [1]: [customer_id#25] -Right keys [1]: [customer_id#158] -Join condition: (CASE WHEN (year_total#83 > 0.000000) THEN CheckOverflow((promote_precision(year_total#108) / promote_precision(year_total#83)), DecimalType(38,14)) END > CASE WHEN (year_total#134 > 0.000000) THEN CheckOverflow((promote_precision(year_total#159) / promote_precision(year_total#134)), DecimalType(38,14)) END) +Left keys [1]: [customer_id#23] +Right keys [1]: [customer_id#142] +Join condition: (CASE WHEN (year_total#76 > 0.000000) THEN CheckOverflow((promote_precision(year_total#98) / promote_precision(year_total#76)), DecimalType(38,14)) END > CASE WHEN (year_total#121 > 0.000000) THEN CheckOverflow((promote_precision(year_total#143) / promote_precision(year_total#121)), DecimalType(38,14)) END) (107) Project [codegen id : 24] -Output [7]: [customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56] -Input [13]: [customer_id#25, customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56, year_total#83, year_total#108, year_total#134, customer_id#158, year_total#159] +Output [7]: [customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52] +Input [13]: [customer_id#23, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#76, year_total#98, year_total#121, customer_id#142, year_total#143] (108) TakeOrderedAndProject -Input [7]: [customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56] -Arguments: 100, [customer_id#50 ASC NULLS FIRST, customer_first_name#51 ASC NULLS FIRST, customer_last_name#52 ASC NULLS FIRST, customer_preferred_cust_flag#53 ASC NULLS FIRST, customer_birth_country#54 ASC NULLS FIRST, customer_login#55 ASC NULLS FIRST, customer_email_address#56 ASC NULLS FIRST], [customer_id#50, customer_first_name#51, customer_last_name#52, customer_preferred_cust_flag#53, customer_birth_country#54, customer_login#55, customer_email_address#56] +Input [7]: [customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52] +Arguments: 100, [customer_id#46 ASC NULLS FIRST, customer_first_name#47 ASC NULLS FIRST, customer_last_name#48 ASC NULLS FIRST, customer_preferred_cust_flag#49 ASC NULLS FIRST, customer_birth_country#50 ASC NULLS FIRST, customer_login#51 ASC NULLS FIRST, customer_email_address#52 ASC NULLS FIRST], [customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52] ===== Subqueries ===== @@ -628,24 +628,24 @@ BroadcastExchange (112) (109) Scan parquet default.date_dim -Output [2]: [d_date_sk#17, d_year#18] +Output [2]: [d_date_sk#16, d_year#17] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (110) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#17, d_year#18] +Input [2]: [d_date_sk#16, d_year#17] (111) Filter [codegen id : 1] -Input [2]: [d_date_sk#17, d_year#18] -Condition : ((isnotnull(d_year#18) AND (d_year#18 = 2001)) AND isnotnull(d_date_sk#17)) +Input [2]: [d_date_sk#16, d_year#17] +Condition : ((isnotnull(d_year#17) AND (d_year#17 = 2001)) AND isnotnull(d_date_sk#16)) (112) BroadcastExchange -Input [2]: [d_date_sk#17, d_year#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#161] +Input [2]: [d_date_sk#16, d_year#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=18] -Subquery:2 Hosting operator id = 20 Hosting Expression = ss_sold_date_sk#40 IN dynamicpruning#41 +Subquery:2 Hosting operator id = 20 Hosting Expression = ss_sold_date_sk#38 IN dynamicpruning#39 BroadcastExchange (116) +- * Filter (115) +- * ColumnarToRow (114) @@ -653,29 +653,29 @@ BroadcastExchange (116) (113) Scan parquet default.date_dim -Output [2]: [d_date_sk#43, d_year#44] +Output [2]: [d_date_sk#40, d_year#41] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] ReadSchema: struct (114) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#43, d_year#44] +Input [2]: [d_date_sk#40, d_year#41] (115) Filter [codegen id : 1] -Input [2]: [d_date_sk#43, d_year#44] -Condition : ((isnotnull(d_year#44) AND (d_year#44 = 2002)) AND isnotnull(d_date_sk#43)) +Input [2]: [d_date_sk#40, d_year#41] +Condition : ((isnotnull(d_year#41) AND (d_year#41 = 2002)) AND isnotnull(d_date_sk#40)) (116) BroadcastExchange -Input [2]: [d_date_sk#43, d_year#44] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#162] +Input [2]: [d_date_sk#40, d_year#41] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=19] -Subquery:3 Hosting operator id = 37 Hosting Expression = cs_sold_date_sk#72 IN dynamicpruning#15 +Subquery:3 Hosting operator id = 37 Hosting Expression = cs_sold_date_sk#67 IN dynamicpruning#15 -Subquery:4 Hosting operator id = 56 Hosting Expression = cs_sold_date_sk#98 IN dynamicpruning#41 +Subquery:4 Hosting operator id = 56 Hosting Expression = cs_sold_date_sk#90 IN dynamicpruning#39 -Subquery:5 Hosting operator id = 74 Hosting Expression = ws_sold_date_sk#123 IN dynamicpruning#15 +Subquery:5 Hosting operator id = 74 Hosting Expression = ws_sold_date_sk#112 IN dynamicpruning#15 -Subquery:6 Hosting operator id = 93 Hosting Expression = ws_sold_date_sk#149 IN dynamicpruning#41 +Subquery:6 Hosting operator id = 93 Hosting Expression = ws_sold_date_sk#135 IN dynamicpruning#39 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt index 32d76db8cdf3a..1b3fddff09c93 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt @@ -51,138 +51,138 @@ Condition : (isnotnull(cs_warehouse_sk#1) AND isnotnull(cs_item_sk#2)) (4) Exchange Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] -Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#7] +Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] Arguments: [cs_order_number#3 ASC NULLS FIRST, cs_item_sk#2 ASC NULLS FIRST], false, 0 (6) Scan parquet default.catalog_returns -Output [4]: [cr_item_sk#8, cr_order_number#9, cr_refunded_cash#10, cr_returned_date_sk#11] +Output [4]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9, cr_returned_date_sk#10] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [4]: [cr_item_sk#8, cr_order_number#9, cr_refunded_cash#10, cr_returned_date_sk#11] +Input [4]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9, cr_returned_date_sk#10] (8) Filter [codegen id : 3] -Input [4]: [cr_item_sk#8, cr_order_number#9, cr_refunded_cash#10, cr_returned_date_sk#11] -Condition : (isnotnull(cr_order_number#9) AND isnotnull(cr_item_sk#8)) +Input [4]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9, cr_returned_date_sk#10] +Condition : (isnotnull(cr_order_number#8) AND isnotnull(cr_item_sk#7)) (9) Project [codegen id : 3] -Output [3]: [cr_item_sk#8, cr_order_number#9, cr_refunded_cash#10] -Input [4]: [cr_item_sk#8, cr_order_number#9, cr_refunded_cash#10, cr_returned_date_sk#11] +Output [3]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9] +Input [4]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9, cr_returned_date_sk#10] (10) Exchange -Input [3]: [cr_item_sk#8, cr_order_number#9, cr_refunded_cash#10] -Arguments: hashpartitioning(cr_order_number#9, cr_item_sk#8, 5), ENSURE_REQUIREMENTS, [id=#12] +Input [3]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9] +Arguments: hashpartitioning(cr_order_number#8, cr_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [3]: [cr_item_sk#8, cr_order_number#9, cr_refunded_cash#10] -Arguments: [cr_order_number#9 ASC NULLS FIRST, cr_item_sk#8 ASC NULLS FIRST], false, 0 +Input [3]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9] +Arguments: [cr_order_number#8 ASC NULLS FIRST, cr_item_sk#7 ASC NULLS FIRST], false, 0 (12) SortMergeJoin [codegen id : 8] Left keys [2]: [cs_order_number#3, cs_item_sk#2] -Right keys [2]: [cr_order_number#9, cr_item_sk#8] +Right keys [2]: [cr_order_number#8, cr_item_sk#7] Join condition: None (13) Project [codegen id : 8] -Output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#10] -Input [8]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5, cr_item_sk#8, cr_order_number#9, cr_refunded_cash#10] +Output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9] +Input [8]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5, cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9] (14) Scan parquet default.item -Output [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] +Output [3]: [i_item_sk#11, i_item_id#12, i_current_price#13] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 5] -Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] +Input [3]: [i_item_sk#11, i_item_id#12, i_current_price#13] (16) Filter [codegen id : 5] -Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] -Condition : (((isnotnull(i_current_price#15) AND (i_current_price#15 >= 0.99)) AND (i_current_price#15 <= 1.49)) AND isnotnull(i_item_sk#13)) +Input [3]: [i_item_sk#11, i_item_id#12, i_current_price#13] +Condition : (((isnotnull(i_current_price#13) AND (i_current_price#13 >= 0.99)) AND (i_current_price#13 <= 1.49)) AND isnotnull(i_item_sk#11)) (17) Project [codegen id : 5] -Output [2]: [i_item_sk#13, i_item_id#14] -Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] +Output [2]: [i_item_sk#11, i_item_id#12] +Input [3]: [i_item_sk#11, i_item_id#12, i_current_price#13] (18) BroadcastExchange -Input [2]: [i_item_sk#13, i_item_id#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] +Input [2]: [i_item_sk#11, i_item_id#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (19) BroadcastHashJoin [codegen id : 8] Left keys [1]: [cs_item_sk#2] -Right keys [1]: [i_item_sk#13] +Right keys [1]: [i_item_sk#11] Join condition: None (20) Project [codegen id : 8] -Output [5]: [cs_warehouse_sk#1, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#10, i_item_id#14] -Input [7]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#10, i_item_sk#13, i_item_id#14] +Output [5]: [cs_warehouse_sk#1, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9, i_item_id#12] +Input [7]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9, i_item_sk#11, i_item_id#12] (21) ReusedExchange [Reuses operator id: 37] -Output [2]: [d_date_sk#17, d_date#18] +Output [2]: [d_date_sk#14, d_date#15] (22) BroadcastHashJoin [codegen id : 8] Left keys [1]: [cs_sold_date_sk#5] -Right keys [1]: [d_date_sk#17] +Right keys [1]: [d_date_sk#14] Join condition: None (23) Project [codegen id : 8] -Output [5]: [cs_warehouse_sk#1, cs_sales_price#4, cr_refunded_cash#10, i_item_id#14, d_date#18] -Input [7]: [cs_warehouse_sk#1, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#10, i_item_id#14, d_date_sk#17, d_date#18] +Output [5]: [cs_warehouse_sk#1, cs_sales_price#4, cr_refunded_cash#9, i_item_id#12, d_date#15] +Input [7]: [cs_warehouse_sk#1, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9, i_item_id#12, d_date_sk#14, d_date#15] (24) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#19, w_state#20] +Output [2]: [w_warehouse_sk#16, w_state#17] Batched: true Location [not included in comparison]/{warehouse_dir}/warehouse] PushedFilters: [IsNotNull(w_warehouse_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 7] -Input [2]: [w_warehouse_sk#19, w_state#20] +Input [2]: [w_warehouse_sk#16, w_state#17] (26) Filter [codegen id : 7] -Input [2]: [w_warehouse_sk#19, w_state#20] -Condition : isnotnull(w_warehouse_sk#19) +Input [2]: [w_warehouse_sk#16, w_state#17] +Condition : isnotnull(w_warehouse_sk#16) (27) BroadcastExchange -Input [2]: [w_warehouse_sk#19, w_state#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] +Input [2]: [w_warehouse_sk#16, w_state#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (28) BroadcastHashJoin [codegen id : 8] Left keys [1]: [cs_warehouse_sk#1] -Right keys [1]: [w_warehouse_sk#19] +Right keys [1]: [w_warehouse_sk#16] Join condition: None (29) Project [codegen id : 8] -Output [5]: [cs_sales_price#4, cr_refunded_cash#10, w_state#20, i_item_id#14, d_date#18] -Input [7]: [cs_warehouse_sk#1, cs_sales_price#4, cr_refunded_cash#10, i_item_id#14, d_date#18, w_warehouse_sk#19, w_state#20] +Output [5]: [cs_sales_price#4, cr_refunded_cash#9, w_state#17, i_item_id#12, d_date#15] +Input [7]: [cs_warehouse_sk#1, cs_sales_price#4, cr_refunded_cash#9, i_item_id#12, d_date#15, w_warehouse_sk#16, w_state#17] (30) HashAggregate [codegen id : 8] -Input [5]: [cs_sales_price#4, cr_refunded_cash#10, w_state#20, i_item_id#14, d_date#18] -Keys [2]: [w_state#20, i_item_id#14] -Functions [2]: [partial_sum(CASE WHEN (d_date#18 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#18 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)] -Aggregate Attributes [4]: [sum#22, isEmpty#23, sum#24, isEmpty#25] -Results [6]: [w_state#20, i_item_id#14, sum#26, isEmpty#27, sum#28, isEmpty#29] +Input [5]: [cs_sales_price#4, cr_refunded_cash#9, w_state#17, i_item_id#12, d_date#15] +Keys [2]: [w_state#17, i_item_id#12] +Functions [2]: [partial_sum(CASE WHEN (d_date#15 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#15 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)] +Aggregate Attributes [4]: [sum#18, isEmpty#19, sum#20, isEmpty#21] +Results [6]: [w_state#17, i_item_id#12, sum#22, isEmpty#23, sum#24, isEmpty#25] (31) Exchange -Input [6]: [w_state#20, i_item_id#14, sum#26, isEmpty#27, sum#28, isEmpty#29] -Arguments: hashpartitioning(w_state#20, i_item_id#14, 5), ENSURE_REQUIREMENTS, [id=#30] +Input [6]: [w_state#17, i_item_id#12, sum#22, isEmpty#23, sum#24, isEmpty#25] +Arguments: hashpartitioning(w_state#17, i_item_id#12, 5), ENSURE_REQUIREMENTS, [plan_id=5] (32) HashAggregate [codegen id : 9] -Input [6]: [w_state#20, i_item_id#14, sum#26, isEmpty#27, sum#28, isEmpty#29] -Keys [2]: [w_state#20, i_item_id#14] -Functions [2]: [sum(CASE WHEN (d_date#18 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), sum(CASE WHEN (d_date#18 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)] -Aggregate Attributes [2]: [sum(CASE WHEN (d_date#18 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#31, sum(CASE WHEN (d_date#18 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#32] -Results [4]: [w_state#20, i_item_id#14, sum(CASE WHEN (d_date#18 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#31 AS sales_before#33, sum(CASE WHEN (d_date#18 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#32 AS sales_after#34] +Input [6]: [w_state#17, i_item_id#12, sum#22, isEmpty#23, sum#24, isEmpty#25] +Keys [2]: [w_state#17, i_item_id#12] +Functions [2]: [sum(CASE WHEN (d_date#15 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), sum(CASE WHEN (d_date#15 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)] +Aggregate Attributes [2]: [sum(CASE WHEN (d_date#15 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#26, sum(CASE WHEN (d_date#15 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#27] +Results [4]: [w_state#17, i_item_id#12, sum(CASE WHEN (d_date#15 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#26 AS sales_before#28, sum(CASE WHEN (d_date#15 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#27 AS sales_after#29] (33) TakeOrderedAndProject -Input [4]: [w_state#20, i_item_id#14, sales_before#33, sales_after#34] -Arguments: 100, [w_state#20 ASC NULLS FIRST, i_item_id#14 ASC NULLS FIRST], [w_state#20, i_item_id#14, sales_before#33, sales_after#34] +Input [4]: [w_state#17, i_item_id#12, sales_before#28, sales_after#29] +Arguments: 100, [w_state#17 ASC NULLS FIRST, i_item_id#12 ASC NULLS FIRST], [w_state#17, i_item_id#12, sales_before#28, sales_after#29] ===== Subqueries ===== @@ -194,21 +194,21 @@ BroadcastExchange (37) (34) Scan parquet default.date_dim -Output [2]: [d_date_sk#17, d_date#18] +Output [2]: [d_date_sk#14, d_date#15] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#17, d_date#18] +Input [2]: [d_date_sk#14, d_date#15] (36) Filter [codegen id : 1] -Input [2]: [d_date_sk#17, d_date#18] -Condition : (((isnotnull(d_date#18) AND (d_date#18 >= 2000-02-10)) AND (d_date#18 <= 2000-04-10)) AND isnotnull(d_date_sk#17)) +Input [2]: [d_date_sk#14, d_date#15] +Condition : (((isnotnull(d_date#15) AND (d_date#15 >= 2000-02-10)) AND (d_date#15 <= 2000-04-10)) AND isnotnull(d_date_sk#14)) (37) BroadcastExchange -Input [2]: [d_date_sk#17, d_date#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#35] +Input [2]: [d_date_sk#14, d_date#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt index f1a79d04f36bc..5ea29201e6cd0 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt @@ -51,138 +51,138 @@ Condition : (isnotnull(cs_warehouse_sk#1) AND isnotnull(cs_item_sk#2)) (4) Exchange Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] -Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#7] +Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5] Arguments: [cs_order_number#3 ASC NULLS FIRST, cs_item_sk#2 ASC NULLS FIRST], false, 0 (6) Scan parquet default.catalog_returns -Output [4]: [cr_item_sk#8, cr_order_number#9, cr_refunded_cash#10, cr_returned_date_sk#11] +Output [4]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9, cr_returned_date_sk#10] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [4]: [cr_item_sk#8, cr_order_number#9, cr_refunded_cash#10, cr_returned_date_sk#11] +Input [4]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9, cr_returned_date_sk#10] (8) Filter [codegen id : 3] -Input [4]: [cr_item_sk#8, cr_order_number#9, cr_refunded_cash#10, cr_returned_date_sk#11] -Condition : (isnotnull(cr_order_number#9) AND isnotnull(cr_item_sk#8)) +Input [4]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9, cr_returned_date_sk#10] +Condition : (isnotnull(cr_order_number#8) AND isnotnull(cr_item_sk#7)) (9) Project [codegen id : 3] -Output [3]: [cr_item_sk#8, cr_order_number#9, cr_refunded_cash#10] -Input [4]: [cr_item_sk#8, cr_order_number#9, cr_refunded_cash#10, cr_returned_date_sk#11] +Output [3]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9] +Input [4]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9, cr_returned_date_sk#10] (10) Exchange -Input [3]: [cr_item_sk#8, cr_order_number#9, cr_refunded_cash#10] -Arguments: hashpartitioning(cr_order_number#9, cr_item_sk#8, 5), ENSURE_REQUIREMENTS, [id=#12] +Input [3]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9] +Arguments: hashpartitioning(cr_order_number#8, cr_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [3]: [cr_item_sk#8, cr_order_number#9, cr_refunded_cash#10] -Arguments: [cr_order_number#9 ASC NULLS FIRST, cr_item_sk#8 ASC NULLS FIRST], false, 0 +Input [3]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9] +Arguments: [cr_order_number#8 ASC NULLS FIRST, cr_item_sk#7 ASC NULLS FIRST], false, 0 (12) SortMergeJoin [codegen id : 8] Left keys [2]: [cs_order_number#3, cs_item_sk#2] -Right keys [2]: [cr_order_number#9, cr_item_sk#8] +Right keys [2]: [cr_order_number#8, cr_item_sk#7] Join condition: None (13) Project [codegen id : 8] -Output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#10] -Input [8]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5, cr_item_sk#8, cr_order_number#9, cr_refunded_cash#10] +Output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9] +Input [8]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5, cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9] (14) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#13, w_state#14] +Output [2]: [w_warehouse_sk#11, w_state#12] Batched: true Location [not included in comparison]/{warehouse_dir}/warehouse] PushedFilters: [IsNotNull(w_warehouse_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 5] -Input [2]: [w_warehouse_sk#13, w_state#14] +Input [2]: [w_warehouse_sk#11, w_state#12] (16) Filter [codegen id : 5] -Input [2]: [w_warehouse_sk#13, w_state#14] -Condition : isnotnull(w_warehouse_sk#13) +Input [2]: [w_warehouse_sk#11, w_state#12] +Condition : isnotnull(w_warehouse_sk#11) (17) BroadcastExchange -Input [2]: [w_warehouse_sk#13, w_state#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] +Input [2]: [w_warehouse_sk#11, w_state#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (18) BroadcastHashJoin [codegen id : 8] Left keys [1]: [cs_warehouse_sk#1] -Right keys [1]: [w_warehouse_sk#13] +Right keys [1]: [w_warehouse_sk#11] Join condition: None (19) Project [codegen id : 8] -Output [5]: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#10, w_state#14] -Input [7]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#10, w_warehouse_sk#13, w_state#14] +Output [5]: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9, w_state#12] +Input [7]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9, w_warehouse_sk#11, w_state#12] (20) Scan parquet default.item -Output [3]: [i_item_sk#16, i_item_id#17, i_current_price#18] +Output [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 6] -Input [3]: [i_item_sk#16, i_item_id#17, i_current_price#18] +Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] (22) Filter [codegen id : 6] -Input [3]: [i_item_sk#16, i_item_id#17, i_current_price#18] -Condition : (((isnotnull(i_current_price#18) AND (i_current_price#18 >= 0.99)) AND (i_current_price#18 <= 1.49)) AND isnotnull(i_item_sk#16)) +Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] +Condition : (((isnotnull(i_current_price#15) AND (i_current_price#15 >= 0.99)) AND (i_current_price#15 <= 1.49)) AND isnotnull(i_item_sk#13)) (23) Project [codegen id : 6] -Output [2]: [i_item_sk#16, i_item_id#17] -Input [3]: [i_item_sk#16, i_item_id#17, i_current_price#18] +Output [2]: [i_item_sk#13, i_item_id#14] +Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15] (24) BroadcastExchange -Input [2]: [i_item_sk#16, i_item_id#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] +Input [2]: [i_item_sk#13, i_item_id#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (25) BroadcastHashJoin [codegen id : 8] Left keys [1]: [cs_item_sk#2] -Right keys [1]: [i_item_sk#16] +Right keys [1]: [i_item_sk#13] Join condition: None (26) Project [codegen id : 8] -Output [5]: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#10, w_state#14, i_item_id#17] -Input [7]: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#10, w_state#14, i_item_sk#16, i_item_id#17] +Output [5]: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9, w_state#12, i_item_id#14] +Input [7]: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9, w_state#12, i_item_sk#13, i_item_id#14] (27) ReusedExchange [Reuses operator id: 37] -Output [2]: [d_date_sk#20, d_date#21] +Output [2]: [d_date_sk#16, d_date#17] (28) BroadcastHashJoin [codegen id : 8] Left keys [1]: [cs_sold_date_sk#5] -Right keys [1]: [d_date_sk#20] +Right keys [1]: [d_date_sk#16] Join condition: None (29) Project [codegen id : 8] -Output [5]: [cs_sales_price#4, cr_refunded_cash#10, w_state#14, i_item_id#17, d_date#21] -Input [7]: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#10, w_state#14, i_item_id#17, d_date_sk#20, d_date#21] +Output [5]: [cs_sales_price#4, cr_refunded_cash#9, w_state#12, i_item_id#14, d_date#17] +Input [7]: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9, w_state#12, i_item_id#14, d_date_sk#16, d_date#17] (30) HashAggregate [codegen id : 8] -Input [5]: [cs_sales_price#4, cr_refunded_cash#10, w_state#14, i_item_id#17, d_date#21] -Keys [2]: [w_state#14, i_item_id#17] -Functions [2]: [partial_sum(CASE WHEN (d_date#21 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#21 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)] -Aggregate Attributes [4]: [sum#22, isEmpty#23, sum#24, isEmpty#25] -Results [6]: [w_state#14, i_item_id#17, sum#26, isEmpty#27, sum#28, isEmpty#29] +Input [5]: [cs_sales_price#4, cr_refunded_cash#9, w_state#12, i_item_id#14, d_date#17] +Keys [2]: [w_state#12, i_item_id#14] +Functions [2]: [partial_sum(CASE WHEN (d_date#17 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#17 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)] +Aggregate Attributes [4]: [sum#18, isEmpty#19, sum#20, isEmpty#21] +Results [6]: [w_state#12, i_item_id#14, sum#22, isEmpty#23, sum#24, isEmpty#25] (31) Exchange -Input [6]: [w_state#14, i_item_id#17, sum#26, isEmpty#27, sum#28, isEmpty#29] -Arguments: hashpartitioning(w_state#14, i_item_id#17, 5), ENSURE_REQUIREMENTS, [id=#30] +Input [6]: [w_state#12, i_item_id#14, sum#22, isEmpty#23, sum#24, isEmpty#25] +Arguments: hashpartitioning(w_state#12, i_item_id#14, 5), ENSURE_REQUIREMENTS, [plan_id=5] (32) HashAggregate [codegen id : 9] -Input [6]: [w_state#14, i_item_id#17, sum#26, isEmpty#27, sum#28, isEmpty#29] -Keys [2]: [w_state#14, i_item_id#17] -Functions [2]: [sum(CASE WHEN (d_date#21 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), sum(CASE WHEN (d_date#21 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)] -Aggregate Attributes [2]: [sum(CASE WHEN (d_date#21 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#31, sum(CASE WHEN (d_date#21 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#32] -Results [4]: [w_state#14, i_item_id#17, sum(CASE WHEN (d_date#21 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#31 AS sales_before#33, sum(CASE WHEN (d_date#21 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#10 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#32 AS sales_after#34] +Input [6]: [w_state#12, i_item_id#14, sum#22, isEmpty#23, sum#24, isEmpty#25] +Keys [2]: [w_state#12, i_item_id#14] +Functions [2]: [sum(CASE WHEN (d_date#17 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), sum(CASE WHEN (d_date#17 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)] +Aggregate Attributes [2]: [sum(CASE WHEN (d_date#17 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#26, sum(CASE WHEN (d_date#17 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#27] +Results [4]: [w_state#12, i_item_id#14, sum(CASE WHEN (d_date#17 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#26 AS sales_before#28, sum(CASE WHEN (d_date#17 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#27 AS sales_after#29] (33) TakeOrderedAndProject -Input [4]: [w_state#14, i_item_id#17, sales_before#33, sales_after#34] -Arguments: 100, [w_state#14 ASC NULLS FIRST, i_item_id#17 ASC NULLS FIRST], [w_state#14, i_item_id#17, sales_before#33, sales_after#34] +Input [4]: [w_state#12, i_item_id#14, sales_before#28, sales_after#29] +Arguments: 100, [w_state#12 ASC NULLS FIRST, i_item_id#14 ASC NULLS FIRST], [w_state#12, i_item_id#14, sales_before#28, sales_after#29] ===== Subqueries ===== @@ -194,21 +194,21 @@ BroadcastExchange (37) (34) Scan parquet default.date_dim -Output [2]: [d_date_sk#20, d_date#21] +Output [2]: [d_date_sk#16, d_date#17] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#20, d_date#21] +Input [2]: [d_date_sk#16, d_date#17] (36) Filter [codegen id : 1] -Input [2]: [d_date_sk#20, d_date#21] -Condition : (((isnotnull(d_date#21) AND (d_date#21 >= 2000-02-10)) AND (d_date#21 <= 2000-04-10)) AND isnotnull(d_date_sk#20)) +Input [2]: [d_date_sk#16, d_date#17] +Condition : (((isnotnull(d_date#17) AND (d_date#17 >= 2000-02-10)) AND (d_date#17 <= 2000-04-10)) AND isnotnull(d_date_sk#16)) (37) BroadcastExchange -Input [2]: [d_date_sk#20, d_date#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#35] +Input [2]: [d_date_sk#16, d_date#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt index 33c03d2b767dd..131e5cef4a7a7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt @@ -66,26 +66,26 @@ Results [2]: [i_manufact#5, count#10] (10) Exchange Input [2]: [i_manufact#5, count#10] -Arguments: hashpartitioning(i_manufact#5, 5), ENSURE_REQUIREMENTS, [id=#11] +Arguments: hashpartitioning(i_manufact#5, 5), ENSURE_REQUIREMENTS, [plan_id=1] (11) HashAggregate [codegen id : 2] Input [2]: [i_manufact#5, count#10] Keys [1]: [i_manufact#5] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#12] -Results [2]: [count(1)#12 AS item_cnt#13, i_manufact#5] +Aggregate Attributes [1]: [count(1)#11] +Results [2]: [count(1)#11 AS item_cnt#12, i_manufact#5] (12) Filter [codegen id : 2] -Input [2]: [item_cnt#13, i_manufact#5] -Condition : (item_cnt#13 > 0) +Input [2]: [item_cnt#12, i_manufact#5] +Condition : (item_cnt#12 > 0) (13) Project [codegen id : 2] Output [1]: [i_manufact#5] -Input [2]: [item_cnt#13, i_manufact#5] +Input [2]: [item_cnt#12, i_manufact#5] (14) BroadcastExchange Input [1]: [i_manufact#5] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 3] Left keys [1]: [i_manufact#2] @@ -105,7 +105,7 @@ Results [1]: [i_product_name#3] (18) Exchange Input [1]: [i_product_name#3] -Arguments: hashpartitioning(i_product_name#3, 5), ENSURE_REQUIREMENTS, [id=#15] +Arguments: hashpartitioning(i_product_name#3, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) HashAggregate [codegen id : 4] Input [1]: [i_product_name#3] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt index 33c03d2b767dd..131e5cef4a7a7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt @@ -66,26 +66,26 @@ Results [2]: [i_manufact#5, count#10] (10) Exchange Input [2]: [i_manufact#5, count#10] -Arguments: hashpartitioning(i_manufact#5, 5), ENSURE_REQUIREMENTS, [id=#11] +Arguments: hashpartitioning(i_manufact#5, 5), ENSURE_REQUIREMENTS, [plan_id=1] (11) HashAggregate [codegen id : 2] Input [2]: [i_manufact#5, count#10] Keys [1]: [i_manufact#5] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#12] -Results [2]: [count(1)#12 AS item_cnt#13, i_manufact#5] +Aggregate Attributes [1]: [count(1)#11] +Results [2]: [count(1)#11 AS item_cnt#12, i_manufact#5] (12) Filter [codegen id : 2] -Input [2]: [item_cnt#13, i_manufact#5] -Condition : (item_cnt#13 > 0) +Input [2]: [item_cnt#12, i_manufact#5] +Condition : (item_cnt#12 > 0) (13) Project [codegen id : 2] Output [1]: [i_manufact#5] -Input [2]: [item_cnt#13, i_manufact#5] +Input [2]: [item_cnt#12, i_manufact#5] (14) BroadcastExchange Input [1]: [i_manufact#5] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 3] Left keys [1]: [i_manufact#2] @@ -105,7 +105,7 @@ Results [1]: [i_product_name#3] (18) Exchange Input [1]: [i_product_name#3] -Arguments: hashpartitioning(i_product_name#3, 5), ENSURE_REQUIREMENTS, [id=#15] +Arguments: hashpartitioning(i_product_name#3, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) HashAggregate [codegen id : 4] Input [1]: [i_product_name#3] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/explain.txt index 30bd5d90995f4..71f39eacc018e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/explain.txt @@ -53,7 +53,7 @@ Input [4]: [i_item_sk#5, i_category_id#6, i_category#7, i_manager_id#8] (8) BroadcastExchange Input [3]: [i_item_sk#5, i_category_id#6, i_category#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#1] @@ -65,38 +65,38 @@ Output [4]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_category_id#6, i_categor Input [6]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#5, i_category_id#6, i_category#7] (11) ReusedExchange [Reuses operator id: 22] -Output [2]: [d_date_sk#10, d_year#11] +Output [2]: [d_date_sk#9, d_year#10] (12) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_sold_date_sk#3] -Right keys [1]: [d_date_sk#10] +Right keys [1]: [d_date_sk#9] Join condition: None (13) Project [codegen id : 3] -Output [4]: [d_year#11, ss_ext_sales_price#2, i_category_id#6, i_category#7] -Input [6]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_category_id#6, i_category#7, d_date_sk#10, d_year#11] +Output [4]: [d_year#10, ss_ext_sales_price#2, i_category_id#6, i_category#7] +Input [6]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_category_id#6, i_category#7, d_date_sk#9, d_year#10] (14) HashAggregate [codegen id : 3] -Input [4]: [d_year#11, ss_ext_sales_price#2, i_category_id#6, i_category#7] -Keys [3]: [d_year#11, i_category_id#6, i_category#7] +Input [4]: [d_year#10, ss_ext_sales_price#2, i_category_id#6, i_category#7] +Keys [3]: [d_year#10, i_category_id#6, i_category#7] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#12] -Results [4]: [d_year#11, i_category_id#6, i_category#7, sum#13] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#10, i_category_id#6, i_category#7, sum#12] (15) Exchange -Input [4]: [d_year#11, i_category_id#6, i_category#7, sum#13] -Arguments: hashpartitioning(d_year#11, i_category_id#6, i_category#7, 5), ENSURE_REQUIREMENTS, [id=#14] +Input [4]: [d_year#10, i_category_id#6, i_category#7, sum#12] +Arguments: hashpartitioning(d_year#10, i_category_id#6, i_category#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (16) HashAggregate [codegen id : 4] -Input [4]: [d_year#11, i_category_id#6, i_category#7, sum#13] -Keys [3]: [d_year#11, i_category_id#6, i_category#7] +Input [4]: [d_year#10, i_category_id#6, i_category#7, sum#12] +Keys [3]: [d_year#10, i_category_id#6, i_category#7] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#15] -Results [4]: [d_year#11, i_category_id#6, i_category#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#15,17,2) AS sum(ss_ext_sales_price)#16] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#13] +Results [4]: [d_year#10, i_category_id#6, i_category#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#13,17,2) AS sum(ss_ext_sales_price)#14] (17) TakeOrderedAndProject -Input [4]: [d_year#11, i_category_id#6, i_category#7, sum(ss_ext_sales_price)#16] -Arguments: 100, [sum(ss_ext_sales_price)#16 DESC NULLS LAST, d_year#11 ASC NULLS FIRST, i_category_id#6 ASC NULLS FIRST, i_category#7 ASC NULLS FIRST], [d_year#11, i_category_id#6, i_category#7, sum(ss_ext_sales_price)#16] +Input [4]: [d_year#10, i_category_id#6, i_category#7, sum(ss_ext_sales_price)#14] +Arguments: 100, [sum(ss_ext_sales_price)#14 DESC NULLS LAST, d_year#10 ASC NULLS FIRST, i_category_id#6 ASC NULLS FIRST, i_category#7 ASC NULLS FIRST], [d_year#10, i_category_id#6, i_category#7, sum(ss_ext_sales_price)#14] ===== Subqueries ===== @@ -109,25 +109,25 @@ BroadcastExchange (22) (18) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#11, d_moy#17] +Output [3]: [d_date_sk#9, d_year#10, d_moy#15] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_moy#17] +Input [3]: [d_date_sk#9, d_year#10, d_moy#15] (20) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_moy#17] -Condition : ((((isnotnull(d_moy#17) AND isnotnull(d_year#11)) AND (d_moy#17 = 11)) AND (d_year#11 = 2000)) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#9, d_year#10, d_moy#15] +Condition : ((((isnotnull(d_moy#15) AND isnotnull(d_year#10)) AND (d_moy#15 = 11)) AND (d_year#10 = 2000)) AND isnotnull(d_date_sk#9)) (21) Project [codegen id : 1] -Output [2]: [d_date_sk#10, d_year#11] -Input [3]: [d_date_sk#10, d_year#11, d_moy#17] +Output [2]: [d_date_sk#9, d_year#10] +Input [3]: [d_date_sk#9, d_year#10, d_moy#15] (22) BroadcastExchange -Input [2]: [d_date_sk#10, d_year#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [2]: [d_date_sk#9, d_year#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt index d16b4bca7c3f4..2de983e587e2b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt @@ -57,7 +57,7 @@ Condition : isnotnull(ss_item_sk#4) (8) BroadcastExchange Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [d_date_sk#1] @@ -69,55 +69,55 @@ Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] (11) Scan parquet default.item -Output [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] +Output [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] (13) Filter [codegen id : 2] -Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] -Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8)) +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 1)) AND isnotnull(i_item_sk#7)) (14) Project [codegen id : 2] -Output [3]: [i_item_sk#8, i_category_id#9, i_category#10] -Input [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11] +Output [3]: [i_item_sk#7, i_category_id#8, i_category#9] +Input [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10] (15) BroadcastExchange -Input [3]: [i_item_sk#8, i_category_id#9, i_category#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +Input [3]: [i_item_sk#7, i_category_id#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#4] -Right keys [1]: [i_item_sk#8] +Right keys [1]: [i_item_sk#7] Join condition: None (17) Project [codegen id : 3] -Output [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#9, i_category#10] -Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#8, i_category_id#9, i_category#10] +Output [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9] +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_category_id#8, i_category#9] (18) HashAggregate [codegen id : 3] -Input [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#9, i_category#10] -Keys [3]: [d_year#2, i_category_id#9, i_category#10] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_category_id#8, i_category#9] +Keys [3]: [d_year#2, i_category_id#8, i_category#9] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum#13] -Results [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#2, i_category_id#8, i_category#9, sum#12] (19) Exchange -Input [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] -Arguments: hashpartitioning(d_year#2, i_category_id#9, i_category#10, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum#12] +Arguments: hashpartitioning(d_year#2, i_category_id#8, i_category#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 4] -Input [4]: [d_year#2, i_category_id#9, i_category#10, sum#14] -Keys [3]: [d_year#2, i_category_id#9, i_category#10] +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum#12] +Keys [3]: [d_year#2, i_category_id#8, i_category#9] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#16] -Results [4]: [d_year#2, i_category_id#9, i_category#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#16,17,2) AS sum(ss_ext_sales_price)#17] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [4]: [d_year#2, i_category_id#8, i_category#9, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS sum(ss_ext_sales_price)#14] (21) TakeOrderedAndProject -Input [4]: [d_year#2, i_category_id#9, i_category#10, sum(ss_ext_sales_price)#17] -Arguments: 100, [sum(ss_ext_sales_price)#17 DESC NULLS LAST, d_year#2 ASC NULLS FIRST, i_category_id#9 ASC NULLS FIRST, i_category#10 ASC NULLS FIRST], [d_year#2, i_category_id#9, i_category#10, sum(ss_ext_sales_price)#17] +Input [4]: [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#14] +Arguments: 100, [sum(ss_ext_sales_price)#14 DESC NULLS LAST, d_year#2 ASC NULLS FIRST, i_category_id#8 ASC NULLS FIRST, i_category#9 ASC NULLS FIRST], [d_year#2, i_category_id#8, i_category#9, sum(ss_ext_sales_price)#14] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/explain.txt index 7efea217cb931..480be419b0ad9 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/explain.txt @@ -42,88 +42,88 @@ Input [3]: [d_date_sk#1, d_year#2, d_day_name#3] (5) BroadcastExchange Input [2]: [d_date_sk#1, d_day_name#3] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#4] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (6) Scan parquet default.store_sales -Output [3]: [ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Output [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#7), dynamicpruningexpression(ss_sold_date_sk#7 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(ss_sold_date_sk#6), dynamicpruningexpression(ss_sold_date_sk#6 IN dynamicpruning#7)] PushedFilters: [IsNotNull(ss_store_sk)] ReadSchema: struct (7) ColumnarToRow -Input [3]: [ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] (8) Filter -Input [3]: [ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] -Condition : isnotnull(ss_store_sk#5) +Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] +Condition : isnotnull(ss_store_sk#4) (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [d_date_sk#1] -Right keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [ss_sold_date_sk#6] Join condition: None (10) Project [codegen id : 3] -Output [3]: [d_day_name#3, ss_store_sk#5, ss_sales_price#6] -Input [5]: [d_date_sk#1, d_day_name#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] +Output [3]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5] +Input [5]: [d_date_sk#1, d_day_name#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] (11) Scan parquet default.store -Output [4]: [s_store_sk#9, s_store_id#10, s_store_name#11, s_gmt_offset#12] +Output [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [s_store_sk#9, s_store_id#10, s_store_name#11, s_gmt_offset#12] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] (13) Filter [codegen id : 2] -Input [4]: [s_store_sk#9, s_store_id#10, s_store_name#11, s_gmt_offset#12] -Condition : ((isnotnull(s_gmt_offset#12) AND (s_gmt_offset#12 = -5.00)) AND isnotnull(s_store_sk#9)) +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Condition : ((isnotnull(s_gmt_offset#11) AND (s_gmt_offset#11 = -5.00)) AND isnotnull(s_store_sk#8)) (14) Project [codegen id : 2] -Output [3]: [s_store_sk#9, s_store_id#10, s_store_name#11] -Input [4]: [s_store_sk#9, s_store_id#10, s_store_name#11, s_gmt_offset#12] +Output [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] +Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] (15) BroadcastExchange -Input [3]: [s_store_sk#9, s_store_id#10, s_store_name#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] +Input [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ss_store_sk#5] -Right keys [1]: [s_store_sk#9] +Left keys [1]: [ss_store_sk#4] +Right keys [1]: [s_store_sk#8] Join condition: None (17) Project [codegen id : 3] -Output [4]: [d_day_name#3, ss_sales_price#6, s_store_id#10, s_store_name#11] -Input [6]: [d_day_name#3, ss_store_sk#5, ss_sales_price#6, s_store_sk#9, s_store_id#10, s_store_name#11] +Output [4]: [d_day_name#3, ss_sales_price#5, s_store_id#9, s_store_name#10] +Input [6]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5, s_store_sk#8, s_store_id#9, s_store_name#10] (18) HashAggregate [codegen id : 3] -Input [4]: [d_day_name#3, ss_sales_price#6, s_store_id#10, s_store_name#11] -Keys [2]: [s_store_name#11, s_store_id#10] -Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#6 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#6 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#6 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#6 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#6 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#6 END))] -Aggregate Attributes [7]: [sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] -Results [9]: [s_store_name#11, s_store_id#10, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26, sum#27] +Input [4]: [d_day_name#3, ss_sales_price#5, s_store_id#9, s_store_name#10] +Keys [2]: [s_store_name#10, s_store_id#9] +Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] +Aggregate Attributes [7]: [sum#12, sum#13, sum#14, sum#15, sum#16, sum#17, sum#18] +Results [9]: [s_store_name#10, s_store_id#9, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] (19) Exchange -Input [9]: [s_store_name#11, s_store_id#10, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26, sum#27] -Arguments: hashpartitioning(s_store_name#11, s_store_id#10, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [9]: [s_store_name#10, s_store_id#9, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] +Arguments: hashpartitioning(s_store_name#10, s_store_id#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 4] -Input [9]: [s_store_name#11, s_store_id#10, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26, sum#27] -Keys [2]: [s_store_name#11, s_store_id#10] -Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#6 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#6 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#6 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#6 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#6 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#6 END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#6 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#6 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#6 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#6 END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#6 END))#34, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#6 END))#35] -Results [9]: [s_store_name#11, s_store_id#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#6 END))#29,17,2) AS sun_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#6 END))#30,17,2) AS mon_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#6 END))#31,17,2) AS tue_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#6 END))#32,17,2) AS wed_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#6 END))#33,17,2) AS thu_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#6 END))#34,17,2) AS fri_sales#41, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#6 END))#35,17,2) AS sat_sales#42] +Input [9]: [s_store_name#10, s_store_id#9, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25] +Keys [2]: [s_store_name#10, s_store_id#9] +Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#32] +Results [9]: [s_store_name#10, s_store_id#9, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#26,17,2) AS sun_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#27,17,2) AS mon_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#28,17,2) AS tue_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#29,17,2) AS wed_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#30,17,2) AS thu_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#31,17,2) AS fri_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#32,17,2) AS sat_sales#39] (21) TakeOrderedAndProject -Input [9]: [s_store_name#11, s_store_id#10, sun_sales#36, mon_sales#37, tue_sales#38, wed_sales#39, thu_sales#40, fri_sales#41, sat_sales#42] -Arguments: 100, [s_store_name#11 ASC NULLS FIRST, s_store_id#10 ASC NULLS FIRST, sun_sales#36 ASC NULLS FIRST, mon_sales#37 ASC NULLS FIRST, tue_sales#38 ASC NULLS FIRST, wed_sales#39 ASC NULLS FIRST, thu_sales#40 ASC NULLS FIRST, fri_sales#41 ASC NULLS FIRST, sat_sales#42 ASC NULLS FIRST], [s_store_name#11, s_store_id#10, sun_sales#36, mon_sales#37, tue_sales#38, wed_sales#39, thu_sales#40, fri_sales#41, sat_sales#42] +Input [9]: [s_store_name#10, s_store_id#9, sun_sales#33, mon_sales#34, tue_sales#35, wed_sales#36, thu_sales#37, fri_sales#38, sat_sales#39] +Arguments: 100, [s_store_name#10 ASC NULLS FIRST, s_store_id#9 ASC NULLS FIRST, sun_sales#33 ASC NULLS FIRST, mon_sales#34 ASC NULLS FIRST, tue_sales#35 ASC NULLS FIRST, wed_sales#36 ASC NULLS FIRST, thu_sales#37 ASC NULLS FIRST, fri_sales#38 ASC NULLS FIRST, sat_sales#39 ASC NULLS FIRST], [s_store_name#10, s_store_id#9, sun_sales#33, mon_sales#34, tue_sales#35, wed_sales#36, thu_sales#37, fri_sales#38, sat_sales#39] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8 +Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7 ReusedExchange (22) diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt index 5a14bfcb5d0c1..74d1ae65e581b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt @@ -57,7 +57,7 @@ Condition : isnotnull(ss_store_sk#4) (8) BroadcastExchange Input [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [d_date_sk#1] @@ -69,55 +69,55 @@ Output [3]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5] Input [5]: [d_date_sk#1, d_day_name#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6] (11) Scan parquet default.store -Output [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Output [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] (13) Filter [codegen id : 2] -Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] -Condition : ((isnotnull(s_gmt_offset#11) AND (s_gmt_offset#11 = -5.00)) AND isnotnull(s_store_sk#8)) +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] +Condition : ((isnotnull(s_gmt_offset#10) AND (s_gmt_offset#10 = -5.00)) AND isnotnull(s_store_sk#7)) (14) Project [codegen id : 2] -Output [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] -Input [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11] +Output [3]: [s_store_sk#7, s_store_id#8, s_store_name#9] +Input [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10] (15) BroadcastExchange -Input [3]: [s_store_sk#8, s_store_id#9, s_store_name#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +Input [3]: [s_store_sk#7, s_store_id#8, s_store_name#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_store_sk#4] -Right keys [1]: [s_store_sk#8] +Right keys [1]: [s_store_sk#7] Join condition: None (17) Project [codegen id : 3] -Output [4]: [d_day_name#3, ss_sales_price#5, s_store_id#9, s_store_name#10] -Input [6]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5, s_store_sk#8, s_store_id#9, s_store_name#10] +Output [4]: [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9] +Input [6]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5, s_store_sk#7, s_store_id#8, s_store_name#9] (18) HashAggregate [codegen id : 3] -Input [4]: [d_day_name#3, ss_sales_price#5, s_store_id#9, s_store_name#10] -Keys [2]: [s_store_name#10, s_store_id#9] +Input [4]: [d_day_name#3, ss_sales_price#5, s_store_id#8, s_store_name#9] +Keys [2]: [s_store_name#9, s_store_id#8] Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] -Aggregate Attributes [7]: [sum#13, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19] -Results [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] +Aggregate Attributes [7]: [sum#11, sum#12, sum#13, sum#14, sum#15, sum#16, sum#17] +Results [9]: [s_store_name#9, s_store_id#8, sum#18, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24] (19) Exchange -Input [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] -Arguments: hashpartitioning(s_store_name#10, s_store_id#9, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [9]: [s_store_name#9, s_store_id#8, sum#18, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(s_store_name#9, s_store_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 4] -Input [9]: [s_store_name#10, s_store_id#9, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26] -Keys [2]: [s_store_name#10, s_store_id#9] +Input [9]: [s_store_name#9, s_store_id#8, sum#18, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24] +Keys [2]: [s_store_name#9, s_store_id#8] Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END)), sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#34] -Results [9]: [s_store_name#10, s_store_id#9, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#34,17,2) AS sat_sales#41] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#31] +Results [9]: [s_store_name#9, s_store_id#8, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Sunday ) THEN ss_sales_price#5 END))#25,17,2) AS sun_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Monday ) THEN ss_sales_price#5 END))#26,17,2) AS mon_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Tuesday ) THEN ss_sales_price#5 END))#27,17,2) AS tue_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Wednesday) THEN ss_sales_price#5 END))#28,17,2) AS wed_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Thursday ) THEN ss_sales_price#5 END))#29,17,2) AS thu_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Friday ) THEN ss_sales_price#5 END))#30,17,2) AS fri_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#3 = Saturday ) THEN ss_sales_price#5 END))#31,17,2) AS sat_sales#38] (21) TakeOrderedAndProject -Input [9]: [s_store_name#10, s_store_id#9, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41] -Arguments: 100, [s_store_name#10 ASC NULLS FIRST, s_store_id#9 ASC NULLS FIRST, sun_sales#35 ASC NULLS FIRST, mon_sales#36 ASC NULLS FIRST, tue_sales#37 ASC NULLS FIRST, wed_sales#38 ASC NULLS FIRST, thu_sales#39 ASC NULLS FIRST, fri_sales#40 ASC NULLS FIRST, sat_sales#41 ASC NULLS FIRST], [s_store_name#10, s_store_id#9, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41] +Input [9]: [s_store_name#9, s_store_id#8, sun_sales#32, mon_sales#33, tue_sales#34, wed_sales#35, thu_sales#36, fri_sales#37, sat_sales#38] +Arguments: 100, [s_store_name#9 ASC NULLS FIRST, s_store_id#8 ASC NULLS FIRST, sun_sales#32 ASC NULLS FIRST, mon_sales#33 ASC NULLS FIRST, tue_sales#34 ASC NULLS FIRST, wed_sales#35 ASC NULLS FIRST, thu_sales#36 ASC NULLS FIRST, fri_sales#37 ASC NULLS FIRST, sat_sales#38 ASC NULLS FIRST], [s_store_name#9, s_store_id#8, sun_sales#32, mon_sales#33, tue_sales#34, wed_sales#35, thu_sales#36, fri_sales#37, sat_sales#38] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/explain.txt index 0d7aa6dbdfbb8..afe71668e80f9 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/explain.txt @@ -59,117 +59,117 @@ Results [3]: [ss_item_sk#1, sum#7, count#8] (6) Exchange Input [3]: [ss_item_sk#1, sum#7, count#8] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (7) HashAggregate [codegen id : 2] Input [3]: [ss_item_sk#1, sum#7, count#8] Keys [1]: [ss_item_sk#1] Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#10] -Results [2]: [ss_item_sk#1 AS item_sk#11, cast((avg(UnscaledValue(ss_net_profit#3))#10 / 100.0) as decimal(11,6)) AS rank_col#12] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#9] +Results [2]: [ss_item_sk#1 AS item_sk#10, cast((avg(UnscaledValue(ss_net_profit#3))#9 / 100.0) as decimal(11,6)) AS rank_col#11] (8) Filter [codegen id : 2] -Input [2]: [item_sk#11, rank_col#12] -Condition : (isnotnull(rank_col#12) AND (cast(rank_col#12 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery scalar-subquery#13, [id=#14])), DecimalType(13,7)))) +Input [2]: [item_sk#10, rank_col#11] +Condition : (isnotnull(rank_col#11) AND (cast(rank_col#11 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery scalar-subquery#12, [id=#13])), DecimalType(13,7)))) (9) Exchange -Input [2]: [item_sk#11, rank_col#12] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#15] +Input [2]: [item_sk#10, rank_col#11] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] (10) Sort [codegen id : 3] -Input [2]: [item_sk#11, rank_col#12] -Arguments: [rank_col#12 ASC NULLS FIRST], false, 0 +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank_col#11 ASC NULLS FIRST], false, 0 (11) Window -Input [2]: [item_sk#11, rank_col#12] -Arguments: [rank(rank_col#12) windowspecdefinition(rank_col#12 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#16], [rank_col#12 ASC NULLS FIRST] +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank(rank_col#11) windowspecdefinition(rank_col#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#14], [rank_col#11 ASC NULLS FIRST] (12) Filter [codegen id : 10] -Input [3]: [item_sk#11, rank_col#12, rnk#16] -Condition : ((rnk#16 < 11) AND isnotnull(item_sk#11)) +Input [3]: [item_sk#10, rank_col#11, rnk#14] +Condition : ((rnk#14 < 11) AND isnotnull(item_sk#10)) (13) Project [codegen id : 10] -Output [2]: [item_sk#11, rnk#16] -Input [3]: [item_sk#11, rank_col#12, rnk#16] +Output [2]: [item_sk#10, rnk#14] +Input [3]: [item_sk#10, rank_col#11, rnk#14] (14) ReusedExchange [Reuses operator id: 9] -Output [2]: [item_sk#17, rank_col#18] +Output [2]: [item_sk#15, rank_col#16] (15) Sort [codegen id : 6] -Input [2]: [item_sk#17, rank_col#18] -Arguments: [rank_col#18 DESC NULLS LAST], false, 0 +Input [2]: [item_sk#15, rank_col#16] +Arguments: [rank_col#16 DESC NULLS LAST], false, 0 (16) Window -Input [2]: [item_sk#17, rank_col#18] -Arguments: [rank(rank_col#18) windowspecdefinition(rank_col#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#19], [rank_col#18 DESC NULLS LAST] +Input [2]: [item_sk#15, rank_col#16] +Arguments: [rank(rank_col#16) windowspecdefinition(rank_col#16 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#17], [rank_col#16 DESC NULLS LAST] (17) Filter [codegen id : 7] -Input [3]: [item_sk#17, rank_col#18, rnk#19] -Condition : ((rnk#19 < 11) AND isnotnull(item_sk#17)) +Input [3]: [item_sk#15, rank_col#16, rnk#17] +Condition : ((rnk#17 < 11) AND isnotnull(item_sk#15)) (18) Project [codegen id : 7] -Output [2]: [item_sk#17, rnk#19] -Input [3]: [item_sk#17, rank_col#18, rnk#19] +Output [2]: [item_sk#15, rnk#17] +Input [3]: [item_sk#15, rank_col#16, rnk#17] (19) BroadcastExchange -Input [2]: [item_sk#17, rnk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#20] +Input [2]: [item_sk#15, rnk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=3] (20) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [rnk#16] -Right keys [1]: [rnk#19] +Left keys [1]: [rnk#14] +Right keys [1]: [rnk#17] Join condition: None (21) Project [codegen id : 10] -Output [3]: [item_sk#11, rnk#16, item_sk#17] -Input [4]: [item_sk#11, rnk#16, item_sk#17, rnk#19] +Output [3]: [item_sk#10, rnk#14, item_sk#15] +Input [4]: [item_sk#10, rnk#14, item_sk#15, rnk#17] (22) Scan parquet default.item -Output [2]: [i_item_sk#21, i_product_name#22] +Output [2]: [i_item_sk#18, i_product_name#19] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 8] -Input [2]: [i_item_sk#21, i_product_name#22] +Input [2]: [i_item_sk#18, i_product_name#19] (24) Filter [codegen id : 8] -Input [2]: [i_item_sk#21, i_product_name#22] -Condition : isnotnull(i_item_sk#21) +Input [2]: [i_item_sk#18, i_product_name#19] +Condition : isnotnull(i_item_sk#18) (25) BroadcastExchange -Input [2]: [i_item_sk#21, i_product_name#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] +Input [2]: [i_item_sk#18, i_product_name#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (26) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [item_sk#11] -Right keys [1]: [i_item_sk#21] +Left keys [1]: [item_sk#10] +Right keys [1]: [i_item_sk#18] Join condition: None (27) Project [codegen id : 10] -Output [3]: [rnk#16, item_sk#17, i_product_name#22] -Input [5]: [item_sk#11, rnk#16, item_sk#17, i_item_sk#21, i_product_name#22] +Output [3]: [rnk#14, item_sk#15, i_product_name#19] +Input [5]: [item_sk#10, rnk#14, item_sk#15, i_item_sk#18, i_product_name#19] (28) ReusedExchange [Reuses operator id: 25] -Output [2]: [i_item_sk#24, i_product_name#25] +Output [2]: [i_item_sk#20, i_product_name#21] (29) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [item_sk#17] -Right keys [1]: [i_item_sk#24] +Left keys [1]: [item_sk#15] +Right keys [1]: [i_item_sk#20] Join condition: None (30) Project [codegen id : 10] -Output [3]: [rnk#16, i_product_name#22 AS best_performing#26, i_product_name#25 AS worst_performing#27] -Input [5]: [rnk#16, item_sk#17, i_product_name#22, i_item_sk#24, i_product_name#25] +Output [3]: [rnk#14, i_product_name#19 AS best_performing#22, i_product_name#21 AS worst_performing#23] +Input [5]: [rnk#14, item_sk#15, i_product_name#19, i_item_sk#20, i_product_name#21] (31) TakeOrderedAndProject -Input [3]: [rnk#16, best_performing#26, worst_performing#27] -Arguments: 100, [rnk#16 ASC NULLS FIRST], [rnk#16, best_performing#26, worst_performing#27] +Input [3]: [rnk#14, best_performing#22, worst_performing#23] +Arguments: 100, [rnk#14 ASC NULLS FIRST], [rnk#14, best_performing#22, worst_performing#23] ===== Subqueries ===== -Subquery:1 Hosting operator id = 8 Hosting Expression = Subquery scalar-subquery#13, [id=#14] +Subquery:1 Hosting operator id = 8 Hosting Expression = Subquery scalar-subquery#12, [id=#13] * HashAggregate (38) +- Exchange (37) +- * HashAggregate (36) @@ -180,39 +180,39 @@ Subquery:1 Hosting operator id = 8 Hosting Expression = Subquery scalar-subquery (32) Scan parquet default.store_sales -Output [4]: [ss_addr_sk#28, ss_store_sk#29, ss_net_profit#30, ss_sold_date_sk#31] +Output [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)] ReadSchema: struct (33) ColumnarToRow [codegen id : 1] -Input [4]: [ss_addr_sk#28, ss_store_sk#29, ss_net_profit#30, ss_sold_date_sk#31] +Input [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27] (34) Filter [codegen id : 1] -Input [4]: [ss_addr_sk#28, ss_store_sk#29, ss_net_profit#30, ss_sold_date_sk#31] -Condition : ((isnotnull(ss_store_sk#29) AND (ss_store_sk#29 = 4)) AND isnull(ss_addr_sk#28)) +Input [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27] +Condition : ((isnotnull(ss_store_sk#25) AND (ss_store_sk#25 = 4)) AND isnull(ss_addr_sk#24)) (35) Project [codegen id : 1] -Output [2]: [ss_store_sk#29, ss_net_profit#30] -Input [4]: [ss_addr_sk#28, ss_store_sk#29, ss_net_profit#30, ss_sold_date_sk#31] +Output [2]: [ss_store_sk#25, ss_net_profit#26] +Input [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27] (36) HashAggregate [codegen id : 1] -Input [2]: [ss_store_sk#29, ss_net_profit#30] -Keys [1]: [ss_store_sk#29] -Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#30))] -Aggregate Attributes [2]: [sum#32, count#33] -Results [3]: [ss_store_sk#29, sum#34, count#35] +Input [2]: [ss_store_sk#25, ss_net_profit#26] +Keys [1]: [ss_store_sk#25] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#26))] +Aggregate Attributes [2]: [sum#28, count#29] +Results [3]: [ss_store_sk#25, sum#30, count#31] (37) Exchange -Input [3]: [ss_store_sk#29, sum#34, count#35] -Arguments: hashpartitioning(ss_store_sk#29, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [3]: [ss_store_sk#25, sum#30, count#31] +Arguments: hashpartitioning(ss_store_sk#25, 5), ENSURE_REQUIREMENTS, [plan_id=5] (38) HashAggregate [codegen id : 2] -Input [3]: [ss_store_sk#29, sum#34, count#35] -Keys [1]: [ss_store_sk#29] -Functions [1]: [avg(UnscaledValue(ss_net_profit#30))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#30))#37] -Results [1]: [cast((avg(UnscaledValue(ss_net_profit#30))#37 / 100.0) as decimal(11,6)) AS rank_col#38] +Input [3]: [ss_store_sk#25, sum#30, count#31] +Keys [1]: [ss_store_sk#25] +Functions [1]: [avg(UnscaledValue(ss_net_profit#26))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#26))#32] +Results [1]: [cast((avg(UnscaledValue(ss_net_profit#26))#32 / 100.0) as decimal(11,6)) AS rank_col#33] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt index 5783d8b49b6a0..32534fa455e62 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt @@ -60,121 +60,121 @@ Results [3]: [ss_item_sk#1, sum#7, count#8] (6) Exchange Input [3]: [ss_item_sk#1, sum#7, count#8] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (7) HashAggregate [codegen id : 2] Input [3]: [ss_item_sk#1, sum#7, count#8] Keys [1]: [ss_item_sk#1] Functions [1]: [avg(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#10] -Results [2]: [ss_item_sk#1 AS item_sk#11, cast((avg(UnscaledValue(ss_net_profit#3))#10 / 100.0) as decimal(11,6)) AS rank_col#12] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#3))#9] +Results [2]: [ss_item_sk#1 AS item_sk#10, cast((avg(UnscaledValue(ss_net_profit#3))#9 / 100.0) as decimal(11,6)) AS rank_col#11] (8) Filter [codegen id : 2] -Input [2]: [item_sk#11, rank_col#12] -Condition : (isnotnull(rank_col#12) AND (cast(rank_col#12 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery scalar-subquery#13, [id=#14])), DecimalType(13,7)))) +Input [2]: [item_sk#10, rank_col#11] +Condition : (isnotnull(rank_col#11) AND (cast(rank_col#11 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery scalar-subquery#12, [id=#13])), DecimalType(13,7)))) (9) Exchange -Input [2]: [item_sk#11, rank_col#12] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#15] +Input [2]: [item_sk#10, rank_col#11] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] (10) Sort [codegen id : 3] -Input [2]: [item_sk#11, rank_col#12] -Arguments: [rank_col#12 ASC NULLS FIRST], false, 0 +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank_col#11 ASC NULLS FIRST], false, 0 (11) Window -Input [2]: [item_sk#11, rank_col#12] -Arguments: [rank(rank_col#12) windowspecdefinition(rank_col#12 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#16], [rank_col#12 ASC NULLS FIRST] +Input [2]: [item_sk#10, rank_col#11] +Arguments: [rank(rank_col#11) windowspecdefinition(rank_col#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#14], [rank_col#11 ASC NULLS FIRST] (12) Filter [codegen id : 4] -Input [3]: [item_sk#11, rank_col#12, rnk#16] -Condition : ((rnk#16 < 11) AND isnotnull(item_sk#11)) +Input [3]: [item_sk#10, rank_col#11, rnk#14] +Condition : ((rnk#14 < 11) AND isnotnull(item_sk#10)) (13) Project [codegen id : 4] -Output [2]: [item_sk#11, rnk#16] -Input [3]: [item_sk#11, rank_col#12, rnk#16] +Output [2]: [item_sk#10, rnk#14] +Input [3]: [item_sk#10, rank_col#11, rnk#14] (14) Sort [codegen id : 4] -Input [2]: [item_sk#11, rnk#16] -Arguments: [rnk#16 ASC NULLS FIRST], false, 0 +Input [2]: [item_sk#10, rnk#14] +Arguments: [rnk#14 ASC NULLS FIRST], false, 0 (15) ReusedExchange [Reuses operator id: 9] -Output [2]: [item_sk#17, rank_col#18] +Output [2]: [item_sk#15, rank_col#16] (16) Sort [codegen id : 7] -Input [2]: [item_sk#17, rank_col#18] -Arguments: [rank_col#18 DESC NULLS LAST], false, 0 +Input [2]: [item_sk#15, rank_col#16] +Arguments: [rank_col#16 DESC NULLS LAST], false, 0 (17) Window -Input [2]: [item_sk#17, rank_col#18] -Arguments: [rank(rank_col#18) windowspecdefinition(rank_col#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#19], [rank_col#18 DESC NULLS LAST] +Input [2]: [item_sk#15, rank_col#16] +Arguments: [rank(rank_col#16) windowspecdefinition(rank_col#16 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rnk#17], [rank_col#16 DESC NULLS LAST] (18) Filter [codegen id : 8] -Input [3]: [item_sk#17, rank_col#18, rnk#19] -Condition : ((rnk#19 < 11) AND isnotnull(item_sk#17)) +Input [3]: [item_sk#15, rank_col#16, rnk#17] +Condition : ((rnk#17 < 11) AND isnotnull(item_sk#15)) (19) Project [codegen id : 8] -Output [2]: [item_sk#17, rnk#19] -Input [3]: [item_sk#17, rank_col#18, rnk#19] +Output [2]: [item_sk#15, rnk#17] +Input [3]: [item_sk#15, rank_col#16, rnk#17] (20) Sort [codegen id : 8] -Input [2]: [item_sk#17, rnk#19] -Arguments: [rnk#19 ASC NULLS FIRST], false, 0 +Input [2]: [item_sk#15, rnk#17] +Arguments: [rnk#17 ASC NULLS FIRST], false, 0 (21) SortMergeJoin [codegen id : 11] -Left keys [1]: [rnk#16] -Right keys [1]: [rnk#19] +Left keys [1]: [rnk#14] +Right keys [1]: [rnk#17] Join condition: None (22) Project [codegen id : 11] -Output [3]: [item_sk#11, rnk#16, item_sk#17] -Input [4]: [item_sk#11, rnk#16, item_sk#17, rnk#19] +Output [3]: [item_sk#10, rnk#14, item_sk#15] +Input [4]: [item_sk#10, rnk#14, item_sk#15, rnk#17] (23) Scan parquet default.item -Output [2]: [i_item_sk#20, i_product_name#21] +Output [2]: [i_item_sk#18, i_product_name#19] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (24) ColumnarToRow [codegen id : 9] -Input [2]: [i_item_sk#20, i_product_name#21] +Input [2]: [i_item_sk#18, i_product_name#19] (25) Filter [codegen id : 9] -Input [2]: [i_item_sk#20, i_product_name#21] -Condition : isnotnull(i_item_sk#20) +Input [2]: [i_item_sk#18, i_product_name#19] +Condition : isnotnull(i_item_sk#18) (26) BroadcastExchange -Input [2]: [i_item_sk#20, i_product_name#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] +Input [2]: [i_item_sk#18, i_product_name#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (27) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [item_sk#11] -Right keys [1]: [i_item_sk#20] +Left keys [1]: [item_sk#10] +Right keys [1]: [i_item_sk#18] Join condition: None (28) Project [codegen id : 11] -Output [3]: [rnk#16, item_sk#17, i_product_name#21] -Input [5]: [item_sk#11, rnk#16, item_sk#17, i_item_sk#20, i_product_name#21] +Output [3]: [rnk#14, item_sk#15, i_product_name#19] +Input [5]: [item_sk#10, rnk#14, item_sk#15, i_item_sk#18, i_product_name#19] (29) ReusedExchange [Reuses operator id: 26] -Output [2]: [i_item_sk#23, i_product_name#24] +Output [2]: [i_item_sk#20, i_product_name#21] (30) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [item_sk#17] -Right keys [1]: [i_item_sk#23] +Left keys [1]: [item_sk#15] +Right keys [1]: [i_item_sk#20] Join condition: None (31) Project [codegen id : 11] -Output [3]: [rnk#16, i_product_name#21 AS best_performing#25, i_product_name#24 AS worst_performing#26] -Input [5]: [rnk#16, item_sk#17, i_product_name#21, i_item_sk#23, i_product_name#24] +Output [3]: [rnk#14, i_product_name#19 AS best_performing#22, i_product_name#21 AS worst_performing#23] +Input [5]: [rnk#14, item_sk#15, i_product_name#19, i_item_sk#20, i_product_name#21] (32) TakeOrderedAndProject -Input [3]: [rnk#16, best_performing#25, worst_performing#26] -Arguments: 100, [rnk#16 ASC NULLS FIRST], [rnk#16, best_performing#25, worst_performing#26] +Input [3]: [rnk#14, best_performing#22, worst_performing#23] +Arguments: 100, [rnk#14 ASC NULLS FIRST], [rnk#14, best_performing#22, worst_performing#23] ===== Subqueries ===== -Subquery:1 Hosting operator id = 8 Hosting Expression = Subquery scalar-subquery#13, [id=#14] +Subquery:1 Hosting operator id = 8 Hosting Expression = Subquery scalar-subquery#12, [id=#13] * HashAggregate (39) +- Exchange (38) +- * HashAggregate (37) @@ -185,39 +185,39 @@ Subquery:1 Hosting operator id = 8 Hosting Expression = Subquery scalar-subquery (33) Scan parquet default.store_sales -Output [4]: [ss_addr_sk#27, ss_store_sk#28, ss_net_profit#29, ss_sold_date_sk#30] +Output [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_store_sk), EqualTo(ss_store_sk,4), IsNull(ss_addr_sk)] ReadSchema: struct (34) ColumnarToRow [codegen id : 1] -Input [4]: [ss_addr_sk#27, ss_store_sk#28, ss_net_profit#29, ss_sold_date_sk#30] +Input [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27] (35) Filter [codegen id : 1] -Input [4]: [ss_addr_sk#27, ss_store_sk#28, ss_net_profit#29, ss_sold_date_sk#30] -Condition : ((isnotnull(ss_store_sk#28) AND (ss_store_sk#28 = 4)) AND isnull(ss_addr_sk#27)) +Input [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27] +Condition : ((isnotnull(ss_store_sk#25) AND (ss_store_sk#25 = 4)) AND isnull(ss_addr_sk#24)) (36) Project [codegen id : 1] -Output [2]: [ss_store_sk#28, ss_net_profit#29] -Input [4]: [ss_addr_sk#27, ss_store_sk#28, ss_net_profit#29, ss_sold_date_sk#30] +Output [2]: [ss_store_sk#25, ss_net_profit#26] +Input [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27] (37) HashAggregate [codegen id : 1] -Input [2]: [ss_store_sk#28, ss_net_profit#29] -Keys [1]: [ss_store_sk#28] -Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#29))] -Aggregate Attributes [2]: [sum#31, count#32] -Results [3]: [ss_store_sk#28, sum#33, count#34] +Input [2]: [ss_store_sk#25, ss_net_profit#26] +Keys [1]: [ss_store_sk#25] +Functions [1]: [partial_avg(UnscaledValue(ss_net_profit#26))] +Aggregate Attributes [2]: [sum#28, count#29] +Results [3]: [ss_store_sk#25, sum#30, count#31] (38) Exchange -Input [3]: [ss_store_sk#28, sum#33, count#34] -Arguments: hashpartitioning(ss_store_sk#28, 5), ENSURE_REQUIREMENTS, [id=#35] +Input [3]: [ss_store_sk#25, sum#30, count#31] +Arguments: hashpartitioning(ss_store_sk#25, 5), ENSURE_REQUIREMENTS, [plan_id=4] (39) HashAggregate [codegen id : 2] -Input [3]: [ss_store_sk#28, sum#33, count#34] -Keys [1]: [ss_store_sk#28] -Functions [1]: [avg(UnscaledValue(ss_net_profit#29))] -Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#29))#36] -Results [1]: [cast((avg(UnscaledValue(ss_net_profit#29))#36 / 100.0) as decimal(11,6)) AS rank_col#37] +Input [3]: [ss_store_sk#25, sum#30, count#31] +Keys [1]: [ss_store_sk#25] +Functions [1]: [avg(UnscaledValue(ss_net_profit#26))] +Aggregate Attributes [1]: [avg(UnscaledValue(ss_net_profit#26))#32] +Results [1]: [cast((avg(UnscaledValue(ss_net_profit#26))#32 / 100.0) as decimal(11,6)) AS rank_col#33] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt index d5a234a920211..05c0d0077dc66 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt @@ -86,7 +86,7 @@ Condition : isnotnull(i_item_sk#8) (10) BroadcastExchange Input [2]: [i_item_sk#8, i_item_id#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ws_item_sk#2] @@ -99,138 +99,138 @@ Input [5]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, i_item_sk#8, (13) Exchange Input [3]: [ws_bill_customer_sk#3, ws_sales_price#4, i_item_id#9] -Arguments: hashpartitioning(ws_bill_customer_sk#3, 5), ENSURE_REQUIREMENTS, [id=#11] +Arguments: hashpartitioning(ws_bill_customer_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=2] (14) Sort [codegen id : 4] Input [3]: [ws_bill_customer_sk#3, ws_sales_price#4, i_item_id#9] Arguments: [ws_bill_customer_sk#3 ASC NULLS FIRST], false, 0 (15) Scan parquet default.customer -Output [2]: [c_customer_sk#12, c_current_addr_sk#13] +Output [2]: [c_customer_sk#10, c_current_addr_sk#11] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (16) ColumnarToRow [codegen id : 5] -Input [2]: [c_customer_sk#12, c_current_addr_sk#13] +Input [2]: [c_customer_sk#10, c_current_addr_sk#11] (17) Filter [codegen id : 5] -Input [2]: [c_customer_sk#12, c_current_addr_sk#13] -Condition : (isnotnull(c_customer_sk#12) AND isnotnull(c_current_addr_sk#13)) +Input [2]: [c_customer_sk#10, c_current_addr_sk#11] +Condition : (isnotnull(c_customer_sk#10) AND isnotnull(c_current_addr_sk#11)) (18) Exchange -Input [2]: [c_customer_sk#12, c_current_addr_sk#13] -Arguments: hashpartitioning(c_current_addr_sk#13, 5), ENSURE_REQUIREMENTS, [id=#14] +Input [2]: [c_customer_sk#10, c_current_addr_sk#11] +Arguments: hashpartitioning(c_current_addr_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) Sort [codegen id : 6] -Input [2]: [c_customer_sk#12, c_current_addr_sk#13] -Arguments: [c_current_addr_sk#13 ASC NULLS FIRST], false, 0 +Input [2]: [c_customer_sk#10, c_current_addr_sk#11] +Arguments: [c_current_addr_sk#11 ASC NULLS FIRST], false, 0 (20) Scan parquet default.customer_address -Output [3]: [ca_address_sk#15, ca_city#16, ca_zip#17] +Output [3]: [ca_address_sk#12, ca_city#13, ca_zip#14] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 7] -Input [3]: [ca_address_sk#15, ca_city#16, ca_zip#17] +Input [3]: [ca_address_sk#12, ca_city#13, ca_zip#14] (22) Filter [codegen id : 7] -Input [3]: [ca_address_sk#15, ca_city#16, ca_zip#17] -Condition : isnotnull(ca_address_sk#15) +Input [3]: [ca_address_sk#12, ca_city#13, ca_zip#14] +Condition : isnotnull(ca_address_sk#12) (23) Exchange -Input [3]: [ca_address_sk#15, ca_city#16, ca_zip#17] -Arguments: hashpartitioning(ca_address_sk#15, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [3]: [ca_address_sk#12, ca_city#13, ca_zip#14] +Arguments: hashpartitioning(ca_address_sk#12, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) Sort [codegen id : 8] -Input [3]: [ca_address_sk#15, ca_city#16, ca_zip#17] -Arguments: [ca_address_sk#15 ASC NULLS FIRST], false, 0 +Input [3]: [ca_address_sk#12, ca_city#13, ca_zip#14] +Arguments: [ca_address_sk#12 ASC NULLS FIRST], false, 0 (25) SortMergeJoin [codegen id : 9] -Left keys [1]: [c_current_addr_sk#13] -Right keys [1]: [ca_address_sk#15] +Left keys [1]: [c_current_addr_sk#11] +Right keys [1]: [ca_address_sk#12] Join condition: None (26) Project [codegen id : 9] -Output [3]: [c_customer_sk#12, ca_city#16, ca_zip#17] -Input [5]: [c_customer_sk#12, c_current_addr_sk#13, ca_address_sk#15, ca_city#16, ca_zip#17] +Output [3]: [c_customer_sk#10, ca_city#13, ca_zip#14] +Input [5]: [c_customer_sk#10, c_current_addr_sk#11, ca_address_sk#12, ca_city#13, ca_zip#14] (27) Exchange -Input [3]: [c_customer_sk#12, ca_city#16, ca_zip#17] -Arguments: hashpartitioning(c_customer_sk#12, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [3]: [c_customer_sk#10, ca_city#13, ca_zip#14] +Arguments: hashpartitioning(c_customer_sk#10, 5), ENSURE_REQUIREMENTS, [plan_id=5] (28) Sort [codegen id : 10] -Input [3]: [c_customer_sk#12, ca_city#16, ca_zip#17] -Arguments: [c_customer_sk#12 ASC NULLS FIRST], false, 0 +Input [3]: [c_customer_sk#10, ca_city#13, ca_zip#14] +Arguments: [c_customer_sk#10 ASC NULLS FIRST], false, 0 (29) SortMergeJoin [codegen id : 12] Left keys [1]: [ws_bill_customer_sk#3] -Right keys [1]: [c_customer_sk#12] +Right keys [1]: [c_customer_sk#10] Join condition: None (30) Project [codegen id : 12] -Output [4]: [ws_sales_price#4, ca_city#16, ca_zip#17, i_item_id#9] -Input [6]: [ws_bill_customer_sk#3, ws_sales_price#4, i_item_id#9, c_customer_sk#12, ca_city#16, ca_zip#17] +Output [4]: [ws_sales_price#4, ca_city#13, ca_zip#14, i_item_id#9] +Input [6]: [ws_bill_customer_sk#3, ws_sales_price#4, i_item_id#9, c_customer_sk#10, ca_city#13, ca_zip#14] (31) Scan parquet default.item -Output [2]: [i_item_sk#20, i_item_id#21] +Output [2]: [i_item_sk#15, i_item_id#16] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [In(i_item_sk, [11,13,17,19,2,23,29,3,5,7])] ReadSchema: struct (32) ColumnarToRow [codegen id : 11] -Input [2]: [i_item_sk#20, i_item_id#21] +Input [2]: [i_item_sk#15, i_item_id#16] (33) Filter [codegen id : 11] -Input [2]: [i_item_sk#20, i_item_id#21] -Condition : i_item_sk#20 IN (2,3,5,7,11,13,17,19,23,29) +Input [2]: [i_item_sk#15, i_item_id#16] +Condition : i_item_sk#15 IN (2,3,5,7,11,13,17,19,23,29) (34) Project [codegen id : 11] -Output [1]: [i_item_id#21] -Input [2]: [i_item_sk#20, i_item_id#21] +Output [1]: [i_item_id#16] +Input [2]: [i_item_sk#15, i_item_id#16] (35) BroadcastExchange -Input [1]: [i_item_id#21] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#22] +Input [1]: [i_item_id#16] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] (36) BroadcastHashJoin [codegen id : 12] Left keys [1]: [i_item_id#9] -Right keys [1]: [i_item_id#21] +Right keys [1]: [i_item_id#16] Join condition: None (37) Filter [codegen id : 12] -Input [5]: [ws_sales_price#4, ca_city#16, ca_zip#17, i_item_id#9, exists#1] -Condition : (substr(ca_zip#17, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR exists#1) +Input [5]: [ws_sales_price#4, ca_city#13, ca_zip#14, i_item_id#9, exists#1] +Condition : (substr(ca_zip#14, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR exists#1) (38) Project [codegen id : 12] -Output [3]: [ws_sales_price#4, ca_city#16, ca_zip#17] -Input [5]: [ws_sales_price#4, ca_city#16, ca_zip#17, i_item_id#9, exists#1] +Output [3]: [ws_sales_price#4, ca_city#13, ca_zip#14] +Input [5]: [ws_sales_price#4, ca_city#13, ca_zip#14, i_item_id#9, exists#1] (39) HashAggregate [codegen id : 12] -Input [3]: [ws_sales_price#4, ca_city#16, ca_zip#17] -Keys [2]: [ca_zip#17, ca_city#16] +Input [3]: [ws_sales_price#4, ca_city#13, ca_zip#14] +Keys [2]: [ca_zip#14, ca_city#13] Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#4))] -Aggregate Attributes [1]: [sum#23] -Results [3]: [ca_zip#17, ca_city#16, sum#24] +Aggregate Attributes [1]: [sum#17] +Results [3]: [ca_zip#14, ca_city#13, sum#18] (40) Exchange -Input [3]: [ca_zip#17, ca_city#16, sum#24] -Arguments: hashpartitioning(ca_zip#17, ca_city#16, 5), ENSURE_REQUIREMENTS, [id=#25] +Input [3]: [ca_zip#14, ca_city#13, sum#18] +Arguments: hashpartitioning(ca_zip#14, ca_city#13, 5), ENSURE_REQUIREMENTS, [plan_id=7] (41) HashAggregate [codegen id : 13] -Input [3]: [ca_zip#17, ca_city#16, sum#24] -Keys [2]: [ca_zip#17, ca_city#16] +Input [3]: [ca_zip#14, ca_city#13, sum#18] +Keys [2]: [ca_zip#14, ca_city#13] Functions [1]: [sum(UnscaledValue(ws_sales_price#4))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#4))#26] -Results [3]: [ca_zip#17, ca_city#16, MakeDecimal(sum(UnscaledValue(ws_sales_price#4))#26,17,2) AS sum(ws_sales_price)#27] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#4))#19] +Results [3]: [ca_zip#14, ca_city#13, MakeDecimal(sum(UnscaledValue(ws_sales_price#4))#19,17,2) AS sum(ws_sales_price)#20] (42) TakeOrderedAndProject -Input [3]: [ca_zip#17, ca_city#16, sum(ws_sales_price)#27] -Arguments: 100, [ca_zip#17 ASC NULLS FIRST, ca_city#16 ASC NULLS FIRST], [ca_zip#17, ca_city#16, sum(ws_sales_price)#27] +Input [3]: [ca_zip#14, ca_city#13, sum(ws_sales_price)#20] +Arguments: 100, [ca_zip#14 ASC NULLS FIRST, ca_city#13 ASC NULLS FIRST], [ca_zip#14, ca_city#13, sum(ws_sales_price)#20] ===== Subqueries ===== @@ -243,25 +243,25 @@ BroadcastExchange (47) (43) Scan parquet default.date_dim -Output [3]: [d_date_sk#7, d_year#28, d_qoy#29] +Output [3]: [d_date_sk#7, d_year#21, d_qoy#22] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#28, d_qoy#29] +Input [3]: [d_date_sk#7, d_year#21, d_qoy#22] (45) Filter [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#28, d_qoy#29] -Condition : ((((isnotnull(d_qoy#29) AND isnotnull(d_year#28)) AND (d_qoy#29 = 2)) AND (d_year#28 = 2001)) AND isnotnull(d_date_sk#7)) +Input [3]: [d_date_sk#7, d_year#21, d_qoy#22] +Condition : ((((isnotnull(d_qoy#22) AND isnotnull(d_year#21)) AND (d_qoy#22 = 2)) AND (d_year#21 = 2001)) AND isnotnull(d_date_sk#7)) (46) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [3]: [d_date_sk#7, d_year#28, d_qoy#29] +Input [3]: [d_date_sk#7, d_year#21, d_qoy#22] (47) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt index 80f9379309580..7dc60b8a0bb81 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt @@ -68,7 +68,7 @@ Condition : (isnotnull(c_customer_sk#7) AND isnotnull(c_current_addr_sk#8)) (7) BroadcastExchange Input [2]: [c_customer_sk#7, c_current_addr_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ws_bill_customer_sk#3] @@ -80,127 +80,127 @@ Output [4]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_s Input [6]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5, c_customer_sk#7, c_current_addr_sk#8] (10) Scan parquet default.customer_address -Output [3]: [ca_address_sk#10, ca_city#11, ca_zip#12] +Output [3]: [ca_address_sk#9, ca_city#10, ca_zip#11] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [3]: [ca_address_sk#10, ca_city#11, ca_zip#12] +Input [3]: [ca_address_sk#9, ca_city#10, ca_zip#11] (12) Filter [codegen id : 2] -Input [3]: [ca_address_sk#10, ca_city#11, ca_zip#12] -Condition : isnotnull(ca_address_sk#10) +Input [3]: [ca_address_sk#9, ca_city#10, ca_zip#11] +Condition : isnotnull(ca_address_sk#9) (13) BroadcastExchange -Input [3]: [ca_address_sk#10, ca_city#11, ca_zip#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] +Input [3]: [ca_address_sk#9, ca_city#10, ca_zip#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 6] Left keys [1]: [c_current_addr_sk#8] -Right keys [1]: [ca_address_sk#10] +Right keys [1]: [ca_address_sk#9] Join condition: None (15) Project [codegen id : 6] -Output [5]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#11, ca_zip#12] -Input [7]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#8, ca_address_sk#10, ca_city#11, ca_zip#12] +Output [5]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#10, ca_zip#11] +Input [7]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#8, ca_address_sk#9, ca_city#10, ca_zip#11] (16) ReusedExchange [Reuses operator id: 41] -Output [1]: [d_date_sk#14] +Output [1]: [d_date_sk#12] (17) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ws_sold_date_sk#5] -Right keys [1]: [d_date_sk#14] +Right keys [1]: [d_date_sk#12] Join condition: None (18) Project [codegen id : 6] -Output [4]: [ws_item_sk#2, ws_sales_price#4, ca_city#11, ca_zip#12] -Input [6]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#11, ca_zip#12, d_date_sk#14] +Output [4]: [ws_item_sk#2, ws_sales_price#4, ca_city#10, ca_zip#11] +Input [6]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#10, ca_zip#11, d_date_sk#12] (19) Scan parquet default.item -Output [2]: [i_item_sk#15, i_item_id#16] +Output [2]: [i_item_sk#13, i_item_id#14] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (20) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#15, i_item_id#16] +Input [2]: [i_item_sk#13, i_item_id#14] (21) Filter [codegen id : 4] -Input [2]: [i_item_sk#15, i_item_id#16] -Condition : isnotnull(i_item_sk#15) +Input [2]: [i_item_sk#13, i_item_id#14] +Condition : isnotnull(i_item_sk#13) (22) BroadcastExchange -Input [2]: [i_item_sk#15, i_item_id#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] +Input [2]: [i_item_sk#13, i_item_id#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (23) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ws_item_sk#2] -Right keys [1]: [i_item_sk#15] +Right keys [1]: [i_item_sk#13] Join condition: None (24) Project [codegen id : 6] -Output [4]: [ws_sales_price#4, ca_city#11, ca_zip#12, i_item_id#16] -Input [6]: [ws_item_sk#2, ws_sales_price#4, ca_city#11, ca_zip#12, i_item_sk#15, i_item_id#16] +Output [4]: [ws_sales_price#4, ca_city#10, ca_zip#11, i_item_id#14] +Input [6]: [ws_item_sk#2, ws_sales_price#4, ca_city#10, ca_zip#11, i_item_sk#13, i_item_id#14] (25) Scan parquet default.item -Output [2]: [i_item_sk#18, i_item_id#19] +Output [2]: [i_item_sk#15, i_item_id#16] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [In(i_item_sk, [11,13,17,19,2,23,29,3,5,7])] ReadSchema: struct (26) ColumnarToRow [codegen id : 5] -Input [2]: [i_item_sk#18, i_item_id#19] +Input [2]: [i_item_sk#15, i_item_id#16] (27) Filter [codegen id : 5] -Input [2]: [i_item_sk#18, i_item_id#19] -Condition : i_item_sk#18 IN (2,3,5,7,11,13,17,19,23,29) +Input [2]: [i_item_sk#15, i_item_id#16] +Condition : i_item_sk#15 IN (2,3,5,7,11,13,17,19,23,29) (28) Project [codegen id : 5] -Output [1]: [i_item_id#19] -Input [2]: [i_item_sk#18, i_item_id#19] +Output [1]: [i_item_id#16] +Input [2]: [i_item_sk#15, i_item_id#16] (29) BroadcastExchange -Input [1]: [i_item_id#19] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#20] +Input [1]: [i_item_id#16] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=4] (30) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [i_item_id#16] -Right keys [1]: [i_item_id#19] +Left keys [1]: [i_item_id#14] +Right keys [1]: [i_item_id#16] Join condition: None (31) Filter [codegen id : 6] -Input [5]: [ws_sales_price#4, ca_city#11, ca_zip#12, i_item_id#16, exists#1] -Condition : (substr(ca_zip#12, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR exists#1) +Input [5]: [ws_sales_price#4, ca_city#10, ca_zip#11, i_item_id#14, exists#1] +Condition : (substr(ca_zip#11, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR exists#1) (32) Project [codegen id : 6] -Output [3]: [ws_sales_price#4, ca_city#11, ca_zip#12] -Input [5]: [ws_sales_price#4, ca_city#11, ca_zip#12, i_item_id#16, exists#1] +Output [3]: [ws_sales_price#4, ca_city#10, ca_zip#11] +Input [5]: [ws_sales_price#4, ca_city#10, ca_zip#11, i_item_id#14, exists#1] (33) HashAggregate [codegen id : 6] -Input [3]: [ws_sales_price#4, ca_city#11, ca_zip#12] -Keys [2]: [ca_zip#12, ca_city#11] +Input [3]: [ws_sales_price#4, ca_city#10, ca_zip#11] +Keys [2]: [ca_zip#11, ca_city#10] Functions [1]: [partial_sum(UnscaledValue(ws_sales_price#4))] -Aggregate Attributes [1]: [sum#21] -Results [3]: [ca_zip#12, ca_city#11, sum#22] +Aggregate Attributes [1]: [sum#17] +Results [3]: [ca_zip#11, ca_city#10, sum#18] (34) Exchange -Input [3]: [ca_zip#12, ca_city#11, sum#22] -Arguments: hashpartitioning(ca_zip#12, ca_city#11, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [3]: [ca_zip#11, ca_city#10, sum#18] +Arguments: hashpartitioning(ca_zip#11, ca_city#10, 5), ENSURE_REQUIREMENTS, [plan_id=5] (35) HashAggregate [codegen id : 7] -Input [3]: [ca_zip#12, ca_city#11, sum#22] -Keys [2]: [ca_zip#12, ca_city#11] +Input [3]: [ca_zip#11, ca_city#10, sum#18] +Keys [2]: [ca_zip#11, ca_city#10] Functions [1]: [sum(UnscaledValue(ws_sales_price#4))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#4))#24] -Results [3]: [ca_zip#12, ca_city#11, MakeDecimal(sum(UnscaledValue(ws_sales_price#4))#24,17,2) AS sum(ws_sales_price)#25] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#4))#19] +Results [3]: [ca_zip#11, ca_city#10, MakeDecimal(sum(UnscaledValue(ws_sales_price#4))#19,17,2) AS sum(ws_sales_price)#20] (36) TakeOrderedAndProject -Input [3]: [ca_zip#12, ca_city#11, sum(ws_sales_price)#25] -Arguments: 100, [ca_zip#12 ASC NULLS FIRST, ca_city#11 ASC NULLS FIRST], [ca_zip#12, ca_city#11, sum(ws_sales_price)#25] +Input [3]: [ca_zip#11, ca_city#10, sum(ws_sales_price)#20] +Arguments: 100, [ca_zip#11 ASC NULLS FIRST, ca_city#10 ASC NULLS FIRST], [ca_zip#11, ca_city#10, sum(ws_sales_price)#20] ===== Subqueries ===== @@ -213,25 +213,25 @@ BroadcastExchange (41) (37) Scan parquet default.date_dim -Output [3]: [d_date_sk#14, d_year#26, d_qoy#27] +Output [3]: [d_date_sk#12, d_year#21, d_qoy#22] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (38) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#14, d_year#26, d_qoy#27] +Input [3]: [d_date_sk#12, d_year#21, d_qoy#22] (39) Filter [codegen id : 1] -Input [3]: [d_date_sk#14, d_year#26, d_qoy#27] -Condition : ((((isnotnull(d_qoy#27) AND isnotnull(d_year#26)) AND (d_qoy#27 = 2)) AND (d_year#26 = 2001)) AND isnotnull(d_date_sk#14)) +Input [3]: [d_date_sk#12, d_year#21, d_qoy#22] +Condition : ((((isnotnull(d_qoy#22) AND isnotnull(d_year#21)) AND (d_qoy#22 = 2)) AND (d_year#21 = 2001)) AND isnotnull(d_date_sk#12)) (40) Project [codegen id : 1] -Output [1]: [d_date_sk#14] -Input [3]: [d_date_sk#14, d_year#26, d_qoy#27] +Output [1]: [d_date_sk#12] +Input [3]: [d_date_sk#12, d_year#21, d_qoy#22] (41) BroadcastExchange -Input [1]: [d_date_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28] +Input [1]: [d_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/explain.txt index 07017d1b64697..c4c1d5c370771 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/explain.txt @@ -64,202 +64,202 @@ Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#2)) (4) Exchange Input [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4] -Arguments: hashpartitioning(c_current_addr_sk#2, 5), ENSURE_REQUIREMENTS, [id=#5] +Arguments: hashpartitioning(c_current_addr_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4] Arguments: [c_current_addr_sk#2 ASC NULLS FIRST], false, 0 (6) Scan parquet default.customer_address -Output [2]: [ca_address_sk#6, ca_city#7] +Output [2]: [ca_address_sk#5, ca_city#6] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [2]: [ca_address_sk#6, ca_city#7] +Input [2]: [ca_address_sk#5, ca_city#6] (8) Filter [codegen id : 3] -Input [2]: [ca_address_sk#6, ca_city#7] -Condition : (isnotnull(ca_address_sk#6) AND isnotnull(ca_city#7)) +Input [2]: [ca_address_sk#5, ca_city#6] +Condition : (isnotnull(ca_address_sk#5) AND isnotnull(ca_city#6)) (9) Exchange -Input [2]: [ca_address_sk#6, ca_city#7] -Arguments: hashpartitioning(ca_address_sk#6, 5), ENSURE_REQUIREMENTS, [id=#8] +Input [2]: [ca_address_sk#5, ca_city#6] +Arguments: hashpartitioning(ca_address_sk#5, 5), ENSURE_REQUIREMENTS, [plan_id=2] (10) Sort [codegen id : 4] -Input [2]: [ca_address_sk#6, ca_city#7] -Arguments: [ca_address_sk#6 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#5, ca_city#6] +Arguments: [ca_address_sk#5 ASC NULLS FIRST], false, 0 (11) SortMergeJoin [codegen id : 5] Left keys [1]: [c_current_addr_sk#2] -Right keys [1]: [ca_address_sk#6] +Right keys [1]: [ca_address_sk#5] Join condition: None (12) Project [codegen id : 5] -Output [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#7] -Input [6]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4, ca_address_sk#6, ca_city#7] +Output [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#6] +Input [6]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4, ca_address_sk#5, ca_city#6] (13) Exchange -Input [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#7] -Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#9] +Input [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#6] +Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] (14) Sort [codegen id : 6] -Input [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#7] +Input [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#6] Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0 (15) Scan parquet default.store_sales -Output [8]: [ss_customer_sk#10, ss_hdemo_sk#11, ss_addr_sk#12, ss_store_sk#13, ss_ticket_number#14, ss_coupon_amt#15, ss_net_profit#16, ss_sold_date_sk#17] +Output [8]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13, ss_sold_date_sk#14] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#17), dynamicpruningexpression(ss_sold_date_sk#17 IN dynamicpruning#18)] +PartitionFilters: [isnotnull(ss_sold_date_sk#14), dynamicpruningexpression(ss_sold_date_sk#14 IN dynamicpruning#15)] PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] ReadSchema: struct (16) ColumnarToRow [codegen id : 10] -Input [8]: [ss_customer_sk#10, ss_hdemo_sk#11, ss_addr_sk#12, ss_store_sk#13, ss_ticket_number#14, ss_coupon_amt#15, ss_net_profit#16, ss_sold_date_sk#17] +Input [8]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13, ss_sold_date_sk#14] (17) Filter [codegen id : 10] -Input [8]: [ss_customer_sk#10, ss_hdemo_sk#11, ss_addr_sk#12, ss_store_sk#13, ss_ticket_number#14, ss_coupon_amt#15, ss_net_profit#16, ss_sold_date_sk#17] -Condition : (((isnotnull(ss_store_sk#13) AND isnotnull(ss_hdemo_sk#11)) AND isnotnull(ss_addr_sk#12)) AND isnotnull(ss_customer_sk#10)) +Input [8]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13, ss_sold_date_sk#14] +Condition : (((isnotnull(ss_store_sk#10) AND isnotnull(ss_hdemo_sk#8)) AND isnotnull(ss_addr_sk#9)) AND isnotnull(ss_customer_sk#7)) (18) ReusedExchange [Reuses operator id: 52] -Output [1]: [d_date_sk#19] +Output [1]: [d_date_sk#16] (19) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_sold_date_sk#17] -Right keys [1]: [d_date_sk#19] +Left keys [1]: [ss_sold_date_sk#14] +Right keys [1]: [d_date_sk#16] Join condition: None (20) Project [codegen id : 10] -Output [7]: [ss_customer_sk#10, ss_hdemo_sk#11, ss_addr_sk#12, ss_store_sk#13, ss_ticket_number#14, ss_coupon_amt#15, ss_net_profit#16] -Input [9]: [ss_customer_sk#10, ss_hdemo_sk#11, ss_addr_sk#12, ss_store_sk#13, ss_ticket_number#14, ss_coupon_amt#15, ss_net_profit#16, ss_sold_date_sk#17, d_date_sk#19] +Output [7]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13] +Input [9]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13, ss_sold_date_sk#14, d_date_sk#16] (21) Scan parquet default.store -Output [2]: [s_store_sk#20, s_city#21] +Output [2]: [s_store_sk#17, s_city#18] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)] ReadSchema: struct (22) ColumnarToRow [codegen id : 8] -Input [2]: [s_store_sk#20, s_city#21] +Input [2]: [s_store_sk#17, s_city#18] (23) Filter [codegen id : 8] -Input [2]: [s_store_sk#20, s_city#21] -Condition : (s_city#21 IN (Fairview,Midway) AND isnotnull(s_store_sk#20)) +Input [2]: [s_store_sk#17, s_city#18] +Condition : (s_city#18 IN (Fairview,Midway) AND isnotnull(s_store_sk#17)) (24) Project [codegen id : 8] -Output [1]: [s_store_sk#20] -Input [2]: [s_store_sk#20, s_city#21] +Output [1]: [s_store_sk#17] +Input [2]: [s_store_sk#17, s_city#18] (25) BroadcastExchange -Input [1]: [s_store_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Input [1]: [s_store_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (26) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_store_sk#13] -Right keys [1]: [s_store_sk#20] +Left keys [1]: [ss_store_sk#10] +Right keys [1]: [s_store_sk#17] Join condition: None (27) Project [codegen id : 10] -Output [6]: [ss_customer_sk#10, ss_hdemo_sk#11, ss_addr_sk#12, ss_ticket_number#14, ss_coupon_amt#15, ss_net_profit#16] -Input [8]: [ss_customer_sk#10, ss_hdemo_sk#11, ss_addr_sk#12, ss_store_sk#13, ss_ticket_number#14, ss_coupon_amt#15, ss_net_profit#16, s_store_sk#20] +Output [6]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13] +Input [8]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13, s_store_sk#17] (28) Scan parquet default.household_demographics -Output [3]: [hd_demo_sk#23, hd_dep_count#24, hd_vehicle_count#25] +Output [3]: [hd_demo_sk#19, hd_dep_count#20, hd_vehicle_count#21] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] ReadSchema: struct (29) ColumnarToRow [codegen id : 9] -Input [3]: [hd_demo_sk#23, hd_dep_count#24, hd_vehicle_count#25] +Input [3]: [hd_demo_sk#19, hd_dep_count#20, hd_vehicle_count#21] (30) Filter [codegen id : 9] -Input [3]: [hd_demo_sk#23, hd_dep_count#24, hd_vehicle_count#25] -Condition : (((hd_dep_count#24 = 4) OR (hd_vehicle_count#25 = 3)) AND isnotnull(hd_demo_sk#23)) +Input [3]: [hd_demo_sk#19, hd_dep_count#20, hd_vehicle_count#21] +Condition : (((hd_dep_count#20 = 4) OR (hd_vehicle_count#21 = 3)) AND isnotnull(hd_demo_sk#19)) (31) Project [codegen id : 9] -Output [1]: [hd_demo_sk#23] -Input [3]: [hd_demo_sk#23, hd_dep_count#24, hd_vehicle_count#25] +Output [1]: [hd_demo_sk#19] +Input [3]: [hd_demo_sk#19, hd_dep_count#20, hd_vehicle_count#21] (32) BroadcastExchange -Input [1]: [hd_demo_sk#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Input [1]: [hd_demo_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (33) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_hdemo_sk#11] -Right keys [1]: [hd_demo_sk#23] +Left keys [1]: [ss_hdemo_sk#8] +Right keys [1]: [hd_demo_sk#19] Join condition: None (34) Project [codegen id : 10] -Output [5]: [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#14, ss_coupon_amt#15, ss_net_profit#16] -Input [7]: [ss_customer_sk#10, ss_hdemo_sk#11, ss_addr_sk#12, ss_ticket_number#14, ss_coupon_amt#15, ss_net_profit#16, hd_demo_sk#23] +Output [5]: [ss_customer_sk#7, ss_addr_sk#9, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13] +Input [7]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13, hd_demo_sk#19] (35) Exchange -Input [5]: [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#14, ss_coupon_amt#15, ss_net_profit#16] -Arguments: hashpartitioning(ss_addr_sk#12, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [5]: [ss_customer_sk#7, ss_addr_sk#9, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13] +Arguments: hashpartitioning(ss_addr_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=6] (36) Sort [codegen id : 11] -Input [5]: [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#14, ss_coupon_amt#15, ss_net_profit#16] -Arguments: [ss_addr_sk#12 ASC NULLS FIRST], false, 0 +Input [5]: [ss_customer_sk#7, ss_addr_sk#9, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13] +Arguments: [ss_addr_sk#9 ASC NULLS FIRST], false, 0 (37) ReusedExchange [Reuses operator id: 9] -Output [2]: [ca_address_sk#28, ca_city#29] +Output [2]: [ca_address_sk#22, ca_city#23] (38) Sort [codegen id : 13] -Input [2]: [ca_address_sk#28, ca_city#29] -Arguments: [ca_address_sk#28 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#22, ca_city#23] +Arguments: [ca_address_sk#22 ASC NULLS FIRST], false, 0 (39) SortMergeJoin [codegen id : 14] -Left keys [1]: [ss_addr_sk#12] -Right keys [1]: [ca_address_sk#28] +Left keys [1]: [ss_addr_sk#9] +Right keys [1]: [ca_address_sk#22] Join condition: None (40) Project [codegen id : 14] -Output [6]: [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#14, ss_coupon_amt#15, ss_net_profit#16, ca_city#29] -Input [7]: [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#14, ss_coupon_amt#15, ss_net_profit#16, ca_address_sk#28, ca_city#29] +Output [6]: [ss_customer_sk#7, ss_addr_sk#9, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13, ca_city#23] +Input [7]: [ss_customer_sk#7, ss_addr_sk#9, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13, ca_address_sk#22, ca_city#23] (41) HashAggregate [codegen id : 14] -Input [6]: [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#14, ss_coupon_amt#15, ss_net_profit#16, ca_city#29] -Keys [4]: [ss_ticket_number#14, ss_customer_sk#10, ss_addr_sk#12, ca_city#29] -Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#15)), partial_sum(UnscaledValue(ss_net_profit#16))] -Aggregate Attributes [2]: [sum#30, sum#31] -Results [6]: [ss_ticket_number#14, ss_customer_sk#10, ss_addr_sk#12, ca_city#29, sum#32, sum#33] +Input [6]: [ss_customer_sk#7, ss_addr_sk#9, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13, ca_city#23] +Keys [4]: [ss_ticket_number#11, ss_customer_sk#7, ss_addr_sk#9, ca_city#23] +Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#12)), partial_sum(UnscaledValue(ss_net_profit#13))] +Aggregate Attributes [2]: [sum#24, sum#25] +Results [6]: [ss_ticket_number#11, ss_customer_sk#7, ss_addr_sk#9, ca_city#23, sum#26, sum#27] (42) HashAggregate [codegen id : 14] -Input [6]: [ss_ticket_number#14, ss_customer_sk#10, ss_addr_sk#12, ca_city#29, sum#32, sum#33] -Keys [4]: [ss_ticket_number#14, ss_customer_sk#10, ss_addr_sk#12, ca_city#29] -Functions [2]: [sum(UnscaledValue(ss_coupon_amt#15)), sum(UnscaledValue(ss_net_profit#16))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#15))#34, sum(UnscaledValue(ss_net_profit#16))#35] -Results [5]: [ss_ticket_number#14, ss_customer_sk#10, ca_city#29 AS bought_city#36, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#15))#34,17,2) AS amt#37, MakeDecimal(sum(UnscaledValue(ss_net_profit#16))#35,17,2) AS profit#38] +Input [6]: [ss_ticket_number#11, ss_customer_sk#7, ss_addr_sk#9, ca_city#23, sum#26, sum#27] +Keys [4]: [ss_ticket_number#11, ss_customer_sk#7, ss_addr_sk#9, ca_city#23] +Functions [2]: [sum(UnscaledValue(ss_coupon_amt#12)), sum(UnscaledValue(ss_net_profit#13))] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#12))#28, sum(UnscaledValue(ss_net_profit#13))#29] +Results [5]: [ss_ticket_number#11, ss_customer_sk#7, ca_city#23 AS bought_city#30, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#12))#28,17,2) AS amt#31, MakeDecimal(sum(UnscaledValue(ss_net_profit#13))#29,17,2) AS profit#32] (43) Exchange -Input [5]: [ss_ticket_number#14, ss_customer_sk#10, bought_city#36, amt#37, profit#38] -Arguments: hashpartitioning(ss_customer_sk#10, 5), ENSURE_REQUIREMENTS, [id=#39] +Input [5]: [ss_ticket_number#11, ss_customer_sk#7, bought_city#30, amt#31, profit#32] +Arguments: hashpartitioning(ss_customer_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=7] (44) Sort [codegen id : 15] -Input [5]: [ss_ticket_number#14, ss_customer_sk#10, bought_city#36, amt#37, profit#38] -Arguments: [ss_customer_sk#10 ASC NULLS FIRST], false, 0 +Input [5]: [ss_ticket_number#11, ss_customer_sk#7, bought_city#30, amt#31, profit#32] +Arguments: [ss_customer_sk#7 ASC NULLS FIRST], false, 0 (45) SortMergeJoin [codegen id : 16] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ss_customer_sk#10] -Join condition: NOT (ca_city#7 = bought_city#36) +Right keys [1]: [ss_customer_sk#7] +Join condition: NOT (ca_city#6 = bought_city#30) (46) Project [codegen id : 16] -Output [7]: [c_last_name#4, c_first_name#3, ca_city#7, bought_city#36, ss_ticket_number#14, amt#37, profit#38] -Input [9]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#7, ss_ticket_number#14, ss_customer_sk#10, bought_city#36, amt#37, profit#38] +Output [7]: [c_last_name#4, c_first_name#3, ca_city#6, bought_city#30, ss_ticket_number#11, amt#31, profit#32] +Input [9]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#6, ss_ticket_number#11, ss_customer_sk#7, bought_city#30, amt#31, profit#32] (47) TakeOrderedAndProject -Input [7]: [c_last_name#4, c_first_name#3, ca_city#7, bought_city#36, ss_ticket_number#14, amt#37, profit#38] -Arguments: 100, [c_last_name#4 ASC NULLS FIRST, c_first_name#3 ASC NULLS FIRST, ca_city#7 ASC NULLS FIRST, bought_city#36 ASC NULLS FIRST, ss_ticket_number#14 ASC NULLS FIRST], [c_last_name#4, c_first_name#3, ca_city#7, bought_city#36, ss_ticket_number#14, amt#37, profit#38] +Input [7]: [c_last_name#4, c_first_name#3, ca_city#6, bought_city#30, ss_ticket_number#11, amt#31, profit#32] +Arguments: 100, [c_last_name#4 ASC NULLS FIRST, c_first_name#3 ASC NULLS FIRST, ca_city#6 ASC NULLS FIRST, bought_city#30 ASC NULLS FIRST, ss_ticket_number#11 ASC NULLS FIRST], [c_last_name#4, c_first_name#3, ca_city#6, bought_city#30, ss_ticket_number#11, amt#31, profit#32] ===== Subqueries ===== -Subquery:1 Hosting operator id = 15 Hosting Expression = ss_sold_date_sk#17 IN dynamicpruning#18 +Subquery:1 Hosting operator id = 15 Hosting Expression = ss_sold_date_sk#14 IN dynamicpruning#15 BroadcastExchange (52) +- * Project (51) +- * Filter (50) @@ -268,25 +268,25 @@ BroadcastExchange (52) (48) Scan parquet default.date_dim -Output [3]: [d_date_sk#19, d_year#40, d_dow#41] +Output [3]: [d_date_sk#16, d_year#33, d_dow#34] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_dow, [0,6]), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] ReadSchema: struct (49) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#19, d_year#40, d_dow#41] +Input [3]: [d_date_sk#16, d_year#33, d_dow#34] (50) Filter [codegen id : 1] -Input [3]: [d_date_sk#19, d_year#40, d_dow#41] -Condition : ((d_dow#41 IN (6,0) AND d_year#40 IN (1999,2000,2001)) AND isnotnull(d_date_sk#19)) +Input [3]: [d_date_sk#16, d_year#33, d_dow#34] +Condition : ((d_dow#34 IN (6,0) AND d_year#33 IN (1999,2000,2001)) AND isnotnull(d_date_sk#16)) (51) Project [codegen id : 1] -Output [1]: [d_date_sk#19] -Input [3]: [d_date_sk#19, d_year#40, d_dow#41] +Output [1]: [d_date_sk#16] +Input [3]: [d_date_sk#16, d_year#33, d_dow#34] (52) BroadcastExchange -Input [1]: [d_date_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42] +Input [1]: [d_date_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt index 879f2f2ed51d7..c48542dd8ffad 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt @@ -87,7 +87,7 @@ Input [2]: [s_store_sk#11, s_city#12] (11) BroadcastExchange Input [1]: [s_store_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_store_sk#4] @@ -99,123 +99,123 @@ Output [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#11] (14) Scan parquet default.household_demographics -Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Output [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Input [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] (16) Filter [codegen id : 3] -Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] -Condition : (((hd_dep_count#15 = 4) OR (hd_vehicle_count#16 = 3)) AND isnotnull(hd_demo_sk#14)) +Input [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] +Condition : (((hd_dep_count#14 = 4) OR (hd_vehicle_count#15 = 3)) AND isnotnull(hd_demo_sk#13)) (17) Project [codegen id : 3] -Output [1]: [hd_demo_sk#14] -Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Output [1]: [hd_demo_sk#13] +Input [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15] (18) BroadcastExchange -Input [1]: [hd_demo_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] +Input [1]: [hd_demo_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#14] +Right keys [1]: [hd_demo_sk#13] Join condition: None (20) Project [codegen id : 5] Output [5]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7] -Input [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, hd_demo_sk#14] +Input [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, hd_demo_sk#13] (21) Scan parquet default.customer_address -Output [2]: [ca_address_sk#18, ca_city#19] +Output [2]: [ca_address_sk#16, ca_city#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] ReadSchema: struct (22) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#18, ca_city#19] +Input [2]: [ca_address_sk#16, ca_city#17] (23) Filter [codegen id : 4] -Input [2]: [ca_address_sk#18, ca_city#19] -Condition : (isnotnull(ca_address_sk#18) AND isnotnull(ca_city#19)) +Input [2]: [ca_address_sk#16, ca_city#17] +Condition : (isnotnull(ca_address_sk#16) AND isnotnull(ca_city#17)) (24) BroadcastExchange -Input [2]: [ca_address_sk#18, ca_city#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] +Input [2]: [ca_address_sk#16, ca_city#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (25) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_addr_sk#3] -Right keys [1]: [ca_address_sk#18] +Right keys [1]: [ca_address_sk#16] Join condition: None (26) Project [codegen id : 5] -Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#19] -Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_address_sk#18, ca_city#19] +Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#17] +Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_address_sk#16, ca_city#17] (27) HashAggregate [codegen id : 5] -Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#19] -Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19] +Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ca_city#17] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#17] Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#6)), partial_sum(UnscaledValue(ss_net_profit#7))] -Aggregate Attributes [2]: [sum#21, sum#22] -Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#23, sum#24] +Aggregate Attributes [2]: [sum#18, sum#19] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#17, sum#20, sum#21] (28) Exchange -Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#23, sum#24] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, 5), ENSURE_REQUIREMENTS, [id=#25] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#17, sum#20, sum#21] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#17, 5), ENSURE_REQUIREMENTS, [plan_id=4] (29) HashAggregate [codegen id : 8] -Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19, sum#23, sum#24] -Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#19] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#17, sum#20, sum#21] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#17] Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#26, sum(UnscaledValue(ss_net_profit#7))#27] -Results [5]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#19 AS bought_city#28, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#26,17,2) AS amt#29, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#27,17,2) AS profit#30] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#22, sum(UnscaledValue(ss_net_profit#7))#23] +Results [5]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#17 AS bought_city#24, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#22,17,2) AS amt#25, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#23,17,2) AS profit#26] (30) Scan parquet default.customer -Output [4]: [c_customer_sk#31, c_current_addr_sk#32, c_first_name#33, c_last_name#34] +Output [4]: [c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 6] -Input [4]: [c_customer_sk#31, c_current_addr_sk#32, c_first_name#33, c_last_name#34] +Input [4]: [c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30] (32) Filter [codegen id : 6] -Input [4]: [c_customer_sk#31, c_current_addr_sk#32, c_first_name#33, c_last_name#34] -Condition : (isnotnull(c_customer_sk#31) AND isnotnull(c_current_addr_sk#32)) +Input [4]: [c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30] +Condition : (isnotnull(c_customer_sk#27) AND isnotnull(c_current_addr_sk#28)) (33) BroadcastExchange -Input [4]: [c_customer_sk#31, c_current_addr_sk#32, c_first_name#33, c_last_name#34] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#35] +Input [4]: [c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (34) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#31] +Right keys [1]: [c_customer_sk#27] Join condition: None (35) Project [codegen id : 8] -Output [7]: [ss_ticket_number#5, bought_city#28, amt#29, profit#30, c_current_addr_sk#32, c_first_name#33, c_last_name#34] -Input [9]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#28, amt#29, profit#30, c_customer_sk#31, c_current_addr_sk#32, c_first_name#33, c_last_name#34] +Output [7]: [ss_ticket_number#5, bought_city#24, amt#25, profit#26, c_current_addr_sk#28, c_first_name#29, c_last_name#30] +Input [9]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#24, amt#25, profit#26, c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30] (36) ReusedExchange [Reuses operator id: 24] -Output [2]: [ca_address_sk#36, ca_city#37] +Output [2]: [ca_address_sk#31, ca_city#32] (37) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [c_current_addr_sk#32] -Right keys [1]: [ca_address_sk#36] -Join condition: NOT (ca_city#37 = bought_city#28) +Left keys [1]: [c_current_addr_sk#28] +Right keys [1]: [ca_address_sk#31] +Join condition: NOT (ca_city#32 = bought_city#24) (38) Project [codegen id : 8] -Output [7]: [c_last_name#34, c_first_name#33, ca_city#37, bought_city#28, ss_ticket_number#5, amt#29, profit#30] -Input [9]: [ss_ticket_number#5, bought_city#28, amt#29, profit#30, c_current_addr_sk#32, c_first_name#33, c_last_name#34, ca_address_sk#36, ca_city#37] +Output [7]: [c_last_name#30, c_first_name#29, ca_city#32, bought_city#24, ss_ticket_number#5, amt#25, profit#26] +Input [9]: [ss_ticket_number#5, bought_city#24, amt#25, profit#26, c_current_addr_sk#28, c_first_name#29, c_last_name#30, ca_address_sk#31, ca_city#32] (39) TakeOrderedAndProject -Input [7]: [c_last_name#34, c_first_name#33, ca_city#37, bought_city#28, ss_ticket_number#5, amt#29, profit#30] -Arguments: 100, [c_last_name#34 ASC NULLS FIRST, c_first_name#33 ASC NULLS FIRST, ca_city#37 ASC NULLS FIRST, bought_city#28 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#34, c_first_name#33, ca_city#37, bought_city#28, ss_ticket_number#5, amt#29, profit#30] +Input [7]: [c_last_name#30, c_first_name#29, ca_city#32, bought_city#24, ss_ticket_number#5, amt#25, profit#26] +Arguments: 100, [c_last_name#30 ASC NULLS FIRST, c_first_name#29 ASC NULLS FIRST, ca_city#32 ASC NULLS FIRST, bought_city#24 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#30, c_first_name#29, ca_city#32, bought_city#24, ss_ticket_number#5, amt#25, profit#26] ===== Subqueries ===== @@ -228,25 +228,25 @@ BroadcastExchange (44) (40) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#38, d_dow#39] +Output [3]: [d_date_sk#10, d_year#33, d_dow#34] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_dow, [0,6]), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] ReadSchema: struct (41) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#38, d_dow#39] +Input [3]: [d_date_sk#10, d_year#33, d_dow#34] (42) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#38, d_dow#39] -Condition : ((d_dow#39 IN (6,0) AND d_year#38 IN (1999,2000,2001)) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#10, d_year#33, d_dow#34] +Condition : ((d_dow#34 IN (6,0) AND d_year#33 IN (1999,2000,2001)) AND isnotnull(d_date_sk#10)) (43) Project [codegen id : 1] Output [1]: [d_date_sk#10] -Input [3]: [d_date_sk#10, d_year#38, d_dow#39] +Input [3]: [d_date_sk#10, d_year#33, d_dow#34] (44) BroadcastExchange Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt index 23dfbecdbca9d..0dad98fba1969 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt @@ -96,7 +96,7 @@ Condition : ((isnotnull(s_store_sk#9) AND isnotnull(s_store_name#10)) AND isnotn (10) BroadcastExchange Input [3]: [s_store_sk#9, s_store_name#10, s_company_name#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_store_sk#2] @@ -109,175 +109,175 @@ Input [8]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, d_year#7, d_moy#8, s_ (13) Exchange Input [6]: [ss_item_sk#1, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#13] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (14) Sort [codegen id : 4] Input [6]: [ss_item_sk#1, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (15) Scan parquet default.item -Output [3]: [i_item_sk#14, i_brand#15, i_category#16] +Output [3]: [i_item_sk#12, i_brand#13, i_category#14] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] ReadSchema: struct (16) ColumnarToRow [codegen id : 5] -Input [3]: [i_item_sk#14, i_brand#15, i_category#16] +Input [3]: [i_item_sk#12, i_brand#13, i_category#14] (17) Filter [codegen id : 5] -Input [3]: [i_item_sk#14, i_brand#15, i_category#16] -Condition : ((isnotnull(i_item_sk#14) AND isnotnull(i_category#16)) AND isnotnull(i_brand#15)) +Input [3]: [i_item_sk#12, i_brand#13, i_category#14] +Condition : ((isnotnull(i_item_sk#12) AND isnotnull(i_category#14)) AND isnotnull(i_brand#13)) (18) Exchange -Input [3]: [i_item_sk#14, i_brand#15, i_category#16] -Arguments: hashpartitioning(i_item_sk#14, 5), ENSURE_REQUIREMENTS, [id=#17] +Input [3]: [i_item_sk#12, i_brand#13, i_category#14] +Arguments: hashpartitioning(i_item_sk#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) Sort [codegen id : 6] -Input [3]: [i_item_sk#14, i_brand#15, i_category#16] -Arguments: [i_item_sk#14 ASC NULLS FIRST], false, 0 +Input [3]: [i_item_sk#12, i_brand#13, i_category#14] +Arguments: [i_item_sk#12 ASC NULLS FIRST], false, 0 (20) SortMergeJoin [codegen id : 7] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#14] +Right keys [1]: [i_item_sk#12] Join condition: None (21) Project [codegen id : 7] -Output [7]: [i_brand#15, i_category#16, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11] -Input [9]: [ss_item_sk#1, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11, i_item_sk#14, i_brand#15, i_category#16] +Output [7]: [i_brand#13, i_category#14, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11] +Input [9]: [ss_item_sk#1, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11, i_item_sk#12, i_brand#13, i_category#14] (22) HashAggregate [codegen id : 7] -Input [7]: [i_brand#15, i_category#16, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11] -Keys [6]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8] +Input [7]: [i_brand#13, i_category#14, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11] +Keys [6]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8] Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#3))] -Aggregate Attributes [1]: [sum#18] -Results [7]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum#19] +Aggregate Attributes [1]: [sum#15] +Results [7]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum#16] (23) Exchange -Input [7]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum#19] -Arguments: hashpartitioning(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [7]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum#16] +Arguments: hashpartitioning(i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) HashAggregate [codegen id : 8] -Input [7]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum#19] -Keys [6]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8] +Input [7]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum#16] +Keys [6]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8] Functions [1]: [sum(UnscaledValue(ss_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#3))#21] -Results [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, MakeDecimal(sum(UnscaledValue(ss_sales_price#3))#21,17,2) AS sum_sales#22, MakeDecimal(sum(UnscaledValue(ss_sales_price#3))#21,17,2) AS _w0#23] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#3))#17] +Results [8]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, MakeDecimal(sum(UnscaledValue(ss_sales_price#3))#17,17,2) AS sum_sales#18, MakeDecimal(sum(UnscaledValue(ss_sales_price#3))#17,17,2) AS _w0#19] (25) Exchange -Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23] -Arguments: hashpartitioning(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [8]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19] +Arguments: hashpartitioning(i_category#14, i_brand#13, s_store_name#10, s_company_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] (26) Sort [codegen id : 9] -Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23] -Arguments: [i_category#16 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, s_company_name#11 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 +Input [8]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19] +Arguments: [i_category#14 ASC NULLS FIRST, i_brand#13 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, s_company_name#11 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 (27) Window -Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23] -Arguments: [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#25], [i_category#16, i_brand#15, s_store_name#10, s_company_name#11], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] +Input [8]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19] +Arguments: [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#20], [i_category#14, i_brand#13, s_store_name#10, s_company_name#11], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] (28) Filter [codegen id : 10] -Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23, rn#25] +Input [9]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19, rn#20] Condition : (isnotnull(d_year#7) AND (d_year#7 = 1999)) (29) Window -Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23, rn#25] -Arguments: [avg(_w0#23) windowspecdefinition(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#26], [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7] +Input [9]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19, rn#20] +Arguments: [avg(_w0#19) windowspecdefinition(i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#21], [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7] (30) Filter [codegen id : 11] -Input [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23, rn#25, avg_monthly_sales#26] -Condition : ((isnotnull(avg_monthly_sales#26) AND (avg_monthly_sales#26 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) +Input [10]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19, rn#20, avg_monthly_sales#21] +Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) (31) Project [codegen id : 11] -Output [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25] -Input [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23, rn#25, avg_monthly_sales#26] +Output [9]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, avg_monthly_sales#21, rn#20] +Input [10]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19, rn#20, avg_monthly_sales#21] (32) Exchange -Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25] -Arguments: hashpartitioning(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#25, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [9]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, avg_monthly_sales#21, rn#20] +Arguments: hashpartitioning(i_category#14, i_brand#13, s_store_name#10, s_company_name#11, rn#20, 5), ENSURE_REQUIREMENTS, [plan_id=6] (33) Sort [codegen id : 12] -Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25] -Arguments: [i_category#16 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, s_company_name#11 ASC NULLS FIRST, rn#25 ASC NULLS FIRST], false, 0 +Input [9]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, avg_monthly_sales#21, rn#20] +Arguments: [i_category#14 ASC NULLS FIRST, i_brand#13 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, s_company_name#11 ASC NULLS FIRST, rn#20 ASC NULLS FIRST], false, 0 (34) ReusedExchange [Reuses operator id: 23] -Output [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum#34] +Output [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum#28] (35) HashAggregate [codegen id : 20] -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum#34] -Keys [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33] -Functions [1]: [sum(UnscaledValue(ss_sales_price#35))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#35))#21] -Results [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, MakeDecimal(sum(UnscaledValue(ss_sales_price#35))#21,17,2) AS sum_sales#22] +Input [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum#28] +Keys [6]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27] +Functions [1]: [sum(UnscaledValue(ss_sales_price#29))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#29))#17] +Results [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, MakeDecimal(sum(UnscaledValue(ss_sales_price#29))#17,17,2) AS sum_sales#18] (36) Exchange -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#22] -Arguments: hashpartitioning(i_category#28, i_brand#29, s_store_name#30, s_company_name#31, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum_sales#18] +Arguments: hashpartitioning(i_category#22, i_brand#23, s_store_name#24, s_company_name#25, 5), ENSURE_REQUIREMENTS, [plan_id=7] (37) Sort [codegen id : 21] -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#22] -Arguments: [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, s_store_name#30 ASC NULLS FIRST, s_company_name#31 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 +Input [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum_sales#18] +Arguments: [i_category#22 ASC NULLS FIRST, i_brand#23 ASC NULLS FIRST, s_store_name#24 ASC NULLS FIRST, s_company_name#25 ASC NULLS FIRST, d_year#26 ASC NULLS FIRST, d_moy#27 ASC NULLS FIRST], false, 0 (38) Window -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#22] -Arguments: [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#37], [i_category#28, i_brand#29, s_store_name#30, s_company_name#31], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] +Input [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum_sales#18] +Arguments: [rank(d_year#26, d_moy#27) windowspecdefinition(i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26 ASC NULLS FIRST, d_moy#27 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#30], [i_category#22, i_brand#23, s_store_name#24, s_company_name#25], [d_year#26 ASC NULLS FIRST, d_moy#27 ASC NULLS FIRST] (39) Project [codegen id : 22] -Output [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#22 AS sum_sales#38, rn#37] -Input [8]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#22, rn#37] +Output [6]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, sum_sales#18 AS sum_sales#31, rn#30] +Input [8]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum_sales#18, rn#30] (40) Exchange -Input [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#38, rn#37] -Arguments: hashpartitioning(i_category#28, i_brand#29, s_store_name#30, s_company_name#31, (rn#37 + 1), 5), ENSURE_REQUIREMENTS, [id=#39] +Input [6]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, sum_sales#31, rn#30] +Arguments: hashpartitioning(i_category#22, i_brand#23, s_store_name#24, s_company_name#25, (rn#30 + 1), 5), ENSURE_REQUIREMENTS, [plan_id=8] (41) Sort [codegen id : 23] -Input [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#38, rn#37] -Arguments: [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, s_store_name#30 ASC NULLS FIRST, s_company_name#31 ASC NULLS FIRST, (rn#37 + 1) ASC NULLS FIRST], false, 0 +Input [6]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, sum_sales#31, rn#30] +Arguments: [i_category#22 ASC NULLS FIRST, i_brand#23 ASC NULLS FIRST, s_store_name#24 ASC NULLS FIRST, s_company_name#25 ASC NULLS FIRST, (rn#30 + 1) ASC NULLS FIRST], false, 0 (42) SortMergeJoin [codegen id : 24] -Left keys [5]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#25] -Right keys [5]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, (rn#37 + 1)] +Left keys [5]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, rn#20] +Right keys [5]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, (rn#30 + 1)] Join condition: None (43) Project [codegen id : 24] -Output [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, sum_sales#38] -Input [15]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#38, rn#37] +Output [10]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, avg_monthly_sales#21, rn#20, sum_sales#31] +Input [15]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, avg_monthly_sales#21, rn#20, i_category#22, i_brand#23, s_store_name#24, s_company_name#25, sum_sales#31, rn#30] (44) ReusedExchange [Reuses operator id: 36] -Output [7]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44, d_moy#45, sum_sales#22] +Output [7]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36, d_moy#37, sum_sales#18] (45) Sort [codegen id : 33] -Input [7]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44, d_moy#45, sum_sales#22] -Arguments: [i_category#40 ASC NULLS FIRST, i_brand#41 ASC NULLS FIRST, s_store_name#42 ASC NULLS FIRST, s_company_name#43 ASC NULLS FIRST, d_year#44 ASC NULLS FIRST, d_moy#45 ASC NULLS FIRST], false, 0 +Input [7]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36, d_moy#37, sum_sales#18] +Arguments: [i_category#32 ASC NULLS FIRST, i_brand#33 ASC NULLS FIRST, s_store_name#34 ASC NULLS FIRST, s_company_name#35 ASC NULLS FIRST, d_year#36 ASC NULLS FIRST, d_moy#37 ASC NULLS FIRST], false, 0 (46) Window -Input [7]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44, d_moy#45, sum_sales#22] -Arguments: [rank(d_year#44, d_moy#45) windowspecdefinition(i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44 ASC NULLS FIRST, d_moy#45 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#46], [i_category#40, i_brand#41, s_store_name#42, s_company_name#43], [d_year#44 ASC NULLS FIRST, d_moy#45 ASC NULLS FIRST] +Input [7]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36, d_moy#37, sum_sales#18] +Arguments: [rank(d_year#36, d_moy#37) windowspecdefinition(i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36 ASC NULLS FIRST, d_moy#37 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#38], [i_category#32, i_brand#33, s_store_name#34, s_company_name#35], [d_year#36 ASC NULLS FIRST, d_moy#37 ASC NULLS FIRST] (47) Project [codegen id : 34] -Output [6]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, sum_sales#22 AS sum_sales#47, rn#46] -Input [8]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44, d_moy#45, sum_sales#22, rn#46] +Output [6]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, sum_sales#18 AS sum_sales#39, rn#38] +Input [8]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36, d_moy#37, sum_sales#18, rn#38] (48) Exchange -Input [6]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, sum_sales#47, rn#46] -Arguments: hashpartitioning(i_category#40, i_brand#41, s_store_name#42, s_company_name#43, (rn#46 - 1), 5), ENSURE_REQUIREMENTS, [id=#48] +Input [6]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, sum_sales#39, rn#38] +Arguments: hashpartitioning(i_category#32, i_brand#33, s_store_name#34, s_company_name#35, (rn#38 - 1), 5), ENSURE_REQUIREMENTS, [plan_id=9] (49) Sort [codegen id : 35] -Input [6]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, sum_sales#47, rn#46] -Arguments: [i_category#40 ASC NULLS FIRST, i_brand#41 ASC NULLS FIRST, s_store_name#42 ASC NULLS FIRST, s_company_name#43 ASC NULLS FIRST, (rn#46 - 1) ASC NULLS FIRST], false, 0 +Input [6]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, sum_sales#39, rn#38] +Arguments: [i_category#32 ASC NULLS FIRST, i_brand#33 ASC NULLS FIRST, s_store_name#34 ASC NULLS FIRST, s_company_name#35 ASC NULLS FIRST, (rn#38 - 1) ASC NULLS FIRST], false, 0 (50) SortMergeJoin [codegen id : 36] -Left keys [5]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#25] -Right keys [5]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, (rn#46 - 1)] +Left keys [5]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, rn#20] +Right keys [5]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, (rn#38 - 1)] Join condition: None (51) Project [codegen id : 36] -Output [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, sum_sales#38 AS psum#49, sum_sales#47 AS nsum#50] -Input [16]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, sum_sales#38, i_category#40, i_brand#41, s_store_name#42, s_company_name#43, sum_sales#47, rn#46] +Output [10]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#21, sum_sales#18, sum_sales#31 AS psum#40, sum_sales#39 AS nsum#41] +Input [16]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, avg_monthly_sales#21, rn#20, sum_sales#31, i_category#32, i_brand#33, s_store_name#34, s_company_name#35, sum_sales#39, rn#38] (52) TakeOrderedAndProject -Input [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#49, nsum#50] -Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST], [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#49, nsum#50] +Input [10]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST], [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41] ===== Subqueries ===== @@ -304,6 +304,6 @@ Condition : ((((d_year#7 = 1999) OR ((d_year#7 = 1998) AND (d_moy#8 = 12))) OR ( (56) BroadcastExchange Input [3]: [d_date_sk#6, d_year#7, d_moy#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#51] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt index e7faf392ad879..e55defd7ff65c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt @@ -77,7 +77,7 @@ Condition : (isnotnull(ss_item_sk#4) AND isnotnull(ss_store_sk#5)) (7) BroadcastExchange Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] @@ -89,160 +89,160 @@ Output [5]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_d Input [7]: [i_item_sk#1, i_brand#2, i_category#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] (10) ReusedExchange [Reuses operator id: 49] -Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_sold_date_sk#7] -Right keys [1]: [d_date_sk#10] +Right keys [1]: [d_date_sk#9] Join condition: None (12) Project [codegen id : 4] -Output [6]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#11, d_moy#12] -Input [8]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#10, d_year#11, d_moy#12] +Output [6]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#10, d_moy#11] +Input [8]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#9, d_year#10, d_moy#11] (13) Scan parquet default.store -Output [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] (15) Filter [codegen id : 3] -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] -Condition : ((isnotnull(s_store_sk#13) AND isnotnull(s_store_name#14)) AND isnotnull(s_company_name#15)) +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Condition : ((isnotnull(s_store_sk#12) AND isnotnull(s_store_name#13)) AND isnotnull(s_company_name#14)) (16) BroadcastExchange -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#5] -Right keys [1]: [s_store_sk#13] +Right keys [1]: [s_store_sk#12] Join condition: None (18) Project [codegen id : 4] -Output [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#11, d_moy#12, s_store_name#14, s_company_name#15] -Input [9]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#11, d_moy#12, s_store_sk#13, s_store_name#14, s_company_name#15] +Output [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#10, d_moy#11, s_store_name#13, s_company_name#14] +Input [9]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#10, d_moy#11, s_store_sk#12, s_store_name#13, s_company_name#14] (19) HashAggregate [codegen id : 4] -Input [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#11, d_moy#12, s_store_name#14, s_company_name#15] -Keys [6]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12] +Input [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#10, d_moy#11, s_store_name#13, s_company_name#14] +Keys [6]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11] Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [1]: [sum#17] -Results [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum#18] +Aggregate Attributes [1]: [sum#15] +Results [7]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum#16] (20) Exchange -Input [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum#18] -Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [7]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum#16] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) HashAggregate [codegen id : 5] -Input [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum#18] -Keys [6]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12] +Input [7]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum#16] +Keys [6]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11] Functions [1]: [sum(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#6))#20] -Results [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#20,17,2) AS _w0#22] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#6))#17] +Results [8]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#17,17,2) AS sum_sales#18, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#17,17,2) AS _w0#19] (22) Exchange -Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, _w0#22] -Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [8]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#13, s_company_name#14, 5), ENSURE_REQUIREMENTS, [plan_id=4] (23) Sort [codegen id : 6] -Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, _w0#22] -Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST, s_company_name#15 ASC NULLS FIRST, d_year#11 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST], false, 0 +Input [8]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST, s_company_name#14 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], false, 0 (24) Window -Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, _w0#22] -Arguments: [rank(d_year#11, d_moy#12) windowspecdefinition(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#24], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15], [d_year#11 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST] +Input [8]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19] +Arguments: [rank(d_year#10, d_moy#11) windowspecdefinition(i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#20], [i_category#3, i_brand#2, s_store_name#13, s_company_name#14], [d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST] (25) Filter [codegen id : 7] -Input [9]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, _w0#22, rn#24] -Condition : (isnotnull(d_year#11) AND (d_year#11 = 1999)) +Input [9]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19, rn#20] +Condition : (isnotnull(d_year#10) AND (d_year#10 = 1999)) (26) Window -Input [9]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, _w0#22, rn#24] -Arguments: [avg(_w0#22) windowspecdefinition(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#25], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11] +Input [9]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19, rn#20] +Arguments: [avg(_w0#19) windowspecdefinition(i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#21], [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10] (27) Filter [codegen id : 22] -Input [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, _w0#22, rn#24, avg_monthly_sales#25] -Condition : ((isnotnull(avg_monthly_sales#25) AND (avg_monthly_sales#25 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) +Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19, rn#20, avg_monthly_sales#21] +Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) (28) Project [codegen id : 22] -Output [9]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, avg_monthly_sales#25, rn#24] -Input [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, _w0#22, rn#24, avg_monthly_sales#25] +Output [9]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, avg_monthly_sales#21, rn#20] +Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19, rn#20, avg_monthly_sales#21] (29) ReusedExchange [Reuses operator id: 20] -Output [7]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30, d_moy#31, sum#32] +Output [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum#28] (30) HashAggregate [codegen id : 12] -Input [7]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30, d_moy#31, sum#32] -Keys [6]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30, d_moy#31] -Functions [1]: [sum(UnscaledValue(ss_sales_price#33))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#33))#20] -Results [7]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30, d_moy#31, MakeDecimal(sum(UnscaledValue(ss_sales_price#33))#20,17,2) AS sum_sales#21] +Input [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum#28] +Keys [6]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27] +Functions [1]: [sum(UnscaledValue(ss_sales_price#29))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#29))#17] +Results [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, MakeDecimal(sum(UnscaledValue(ss_sales_price#29))#17,17,2) AS sum_sales#18] (31) Exchange -Input [7]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30, d_moy#31, sum_sales#21] -Arguments: hashpartitioning(i_category#26, i_brand#27, s_store_name#28, s_company_name#29, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum_sales#18] +Arguments: hashpartitioning(i_category#22, i_brand#23, s_store_name#24, s_company_name#25, 5), ENSURE_REQUIREMENTS, [plan_id=5] (32) Sort [codegen id : 13] -Input [7]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30, d_moy#31, sum_sales#21] -Arguments: [i_category#26 ASC NULLS FIRST, i_brand#27 ASC NULLS FIRST, s_store_name#28 ASC NULLS FIRST, s_company_name#29 ASC NULLS FIRST, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST], false, 0 +Input [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum_sales#18] +Arguments: [i_category#22 ASC NULLS FIRST, i_brand#23 ASC NULLS FIRST, s_store_name#24 ASC NULLS FIRST, s_company_name#25 ASC NULLS FIRST, d_year#26 ASC NULLS FIRST, d_moy#27 ASC NULLS FIRST], false, 0 (33) Window -Input [7]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30, d_moy#31, sum_sales#21] -Arguments: [rank(d_year#30, d_moy#31) windowspecdefinition(i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#26, i_brand#27, s_store_name#28, s_company_name#29], [d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST] +Input [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum_sales#18] +Arguments: [rank(d_year#26, d_moy#27) windowspecdefinition(i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26 ASC NULLS FIRST, d_moy#27 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#30], [i_category#22, i_brand#23, s_store_name#24, s_company_name#25], [d_year#26 ASC NULLS FIRST, d_moy#27 ASC NULLS FIRST] (34) Project [codegen id : 14] -Output [6]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, sum_sales#21 AS sum_sales#36, rn#35] -Input [8]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30, d_moy#31, sum_sales#21, rn#35] +Output [6]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, sum_sales#18 AS sum_sales#31, rn#30] +Input [8]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum_sales#18, rn#30] (35) BroadcastExchange -Input [6]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, sum_sales#36, rn#35] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [id=#37] +Input [6]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, sum_sales#31, rn#30] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=6] (36) BroadcastHashJoin [codegen id : 22] -Left keys [5]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, rn#24] -Right keys [5]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, (rn#35 + 1)] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, rn#20] +Right keys [5]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, (rn#30 + 1)] Join condition: None (37) Project [codegen id : 22] -Output [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, avg_monthly_sales#25, rn#24, sum_sales#36] -Input [15]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, avg_monthly_sales#25, rn#24, i_category#26, i_brand#27, s_store_name#28, s_company_name#29, sum_sales#36, rn#35] +Output [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, avg_monthly_sales#21, rn#20, sum_sales#31] +Input [15]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, avg_monthly_sales#21, rn#20, i_category#22, i_brand#23, s_store_name#24, s_company_name#25, sum_sales#31, rn#30] (38) ReusedExchange [Reuses operator id: 31] -Output [7]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42, d_moy#43, sum_sales#21] +Output [7]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36, d_moy#37, sum_sales#18] (39) Sort [codegen id : 20] -Input [7]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42, d_moy#43, sum_sales#21] -Arguments: [i_category#38 ASC NULLS FIRST, i_brand#39 ASC NULLS FIRST, s_store_name#40 ASC NULLS FIRST, s_company_name#41 ASC NULLS FIRST, d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST], false, 0 +Input [7]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36, d_moy#37, sum_sales#18] +Arguments: [i_category#32 ASC NULLS FIRST, i_brand#33 ASC NULLS FIRST, s_store_name#34 ASC NULLS FIRST, s_company_name#35 ASC NULLS FIRST, d_year#36 ASC NULLS FIRST, d_moy#37 ASC NULLS FIRST], false, 0 (40) Window -Input [7]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42, d_moy#43, sum_sales#21] -Arguments: [rank(d_year#42, d_moy#43) windowspecdefinition(i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#44], [i_category#38, i_brand#39, s_store_name#40, s_company_name#41], [d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST] +Input [7]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36, d_moy#37, sum_sales#18] +Arguments: [rank(d_year#36, d_moy#37) windowspecdefinition(i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36 ASC NULLS FIRST, d_moy#37 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#38], [i_category#32, i_brand#33, s_store_name#34, s_company_name#35], [d_year#36 ASC NULLS FIRST, d_moy#37 ASC NULLS FIRST] (41) Project [codegen id : 21] -Output [6]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, sum_sales#21 AS sum_sales#45, rn#44] -Input [8]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42, d_moy#43, sum_sales#21, rn#44] +Output [6]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, sum_sales#18 AS sum_sales#39, rn#38] +Input [8]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36, d_moy#37, sum_sales#18, rn#38] (42) BroadcastExchange -Input [6]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, sum_sales#45, rn#44] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [id=#46] +Input [6]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, sum_sales#39, rn#38] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=7] (43) BroadcastHashJoin [codegen id : 22] -Left keys [5]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, rn#24] -Right keys [5]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, (rn#44 - 1)] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, rn#20] +Right keys [5]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, (rn#38 - 1)] Join condition: None (44) Project [codegen id : 22] -Output [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, avg_monthly_sales#25, sum_sales#21, sum_sales#36 AS psum#47, sum_sales#45 AS nsum#48] -Input [16]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, avg_monthly_sales#25, rn#24, sum_sales#36, i_category#38, i_brand#39, s_store_name#40, s_company_name#41, sum_sales#45, rn#44] +Output [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#21, sum_sales#18, sum_sales#31 AS psum#40, sum_sales#39 AS nsum#41] +Input [16]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, avg_monthly_sales#21, rn#20, sum_sales#31, i_category#32, i_brand#33, s_store_name#34, s_company_name#35, sum_sales#39, rn#38] (45) TakeOrderedAndProject -Input [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, avg_monthly_sales#25, sum_sales#21, psum#47, nsum#48] -Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, avg_monthly_sales#25, sum_sales#21, psum#47, nsum#48] +Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41] ===== Subqueries ===== @@ -254,21 +254,21 @@ BroadcastExchange (49) (46) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] ReadSchema: struct (47) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] (48) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] -Condition : ((((d_year#11 = 1999) OR ((d_year#11 = 1998) AND (d_moy#12 = 12))) OR ((d_year#11 = 2000) AND (d_moy#12 = 1))) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) (49) BroadcastExchange -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#49] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/explain.txt index c1cdc24ac44d8..5fa1b48ce7546 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/explain.txt @@ -60,7 +60,7 @@ Condition : isnotnull(s_store_sk#9) (7) BroadcastExchange Input [1]: [s_store_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_store_sk#3] @@ -72,92 +72,92 @@ Output [6]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_ne Input [8]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, s_store_sk#9] (10) Scan parquet default.customer_demographics -Output [3]: [cd_demo_sk#11, cd_marital_status#12, cd_education_status#13] +Output [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,4 yr Degree )),And(EqualTo(cd_marital_status,D),EqualTo(cd_education_status,2 yr Degree ))),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College )))] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [3]: [cd_demo_sk#11, cd_marital_status#12, cd_education_status#13] +Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] (12) Filter [codegen id : 2] -Input [3]: [cd_demo_sk#11, cd_marital_status#12, cd_education_status#13] -Condition : (isnotnull(cd_demo_sk#11) AND ((((cd_marital_status#12 = M) AND (cd_education_status#13 = 4 yr Degree )) OR ((cd_marital_status#12 = D) AND (cd_education_status#13 = 2 yr Degree ))) OR ((cd_marital_status#12 = S) AND (cd_education_status#13 = College )))) +Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] +Condition : (isnotnull(cd_demo_sk#10) AND ((((cd_marital_status#11 = M) AND (cd_education_status#12 = 4 yr Degree )) OR ((cd_marital_status#11 = D) AND (cd_education_status#12 = 2 yr Degree ))) OR ((cd_marital_status#11 = S) AND (cd_education_status#12 = College )))) (13) BroadcastExchange -Input [3]: [cd_demo_sk#11, cd_marital_status#12, cd_education_status#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_cdemo_sk#1] -Right keys [1]: [cd_demo_sk#11] -Join condition: ((((((cd_marital_status#12 = M) AND (cd_education_status#13 = 4 yr Degree )) AND (ss_sales_price#5 >= 100.00)) AND (ss_sales_price#5 <= 150.00)) OR ((((cd_marital_status#12 = D) AND (cd_education_status#13 = 2 yr Degree )) AND (ss_sales_price#5 >= 50.00)) AND (ss_sales_price#5 <= 100.00))) OR ((((cd_marital_status#12 = S) AND (cd_education_status#13 = College )) AND (ss_sales_price#5 >= 150.00)) AND (ss_sales_price#5 <= 200.00))) +Right keys [1]: [cd_demo_sk#10] +Join condition: ((((((cd_marital_status#11 = M) AND (cd_education_status#12 = 4 yr Degree )) AND (ss_sales_price#5 >= 100.00)) AND (ss_sales_price#5 <= 150.00)) OR ((((cd_marital_status#11 = D) AND (cd_education_status#12 = 2 yr Degree )) AND (ss_sales_price#5 >= 50.00)) AND (ss_sales_price#5 <= 100.00))) OR ((((cd_marital_status#11 = S) AND (cd_education_status#12 = College )) AND (ss_sales_price#5 >= 150.00)) AND (ss_sales_price#5 <= 200.00))) (15) Project [codegen id : 5] Output [4]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7] -Input [9]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, cd_demo_sk#11, cd_marital_status#12, cd_education_status#13] +Input [9]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] (16) Scan parquet default.customer_address -Output [3]: [ca_address_sk#15, ca_state#16, ca_country#17] +Output [3]: [ca_address_sk#13, ca_state#14, ca_country#15] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [CO,OH,TX]),In(ca_state, [KY,MN,OR])),In(ca_state, [CA,MS,VA]))] ReadSchema: struct (17) ColumnarToRow [codegen id : 3] -Input [3]: [ca_address_sk#15, ca_state#16, ca_country#17] +Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] (18) Filter [codegen id : 3] -Input [3]: [ca_address_sk#15, ca_state#16, ca_country#17] -Condition : (((isnotnull(ca_country#17) AND (ca_country#17 = United States)) AND isnotnull(ca_address_sk#15)) AND ((ca_state#16 IN (CO,OH,TX) OR ca_state#16 IN (OR,MN,KY)) OR ca_state#16 IN (VA,CA,MS))) +Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] +Condition : (((isnotnull(ca_country#15) AND (ca_country#15 = United States)) AND isnotnull(ca_address_sk#13)) AND ((ca_state#14 IN (CO,OH,TX) OR ca_state#14 IN (OR,MN,KY)) OR ca_state#14 IN (VA,CA,MS))) (19) Project [codegen id : 3] -Output [2]: [ca_address_sk#15, ca_state#16] -Input [3]: [ca_address_sk#15, ca_state#16, ca_country#17] +Output [2]: [ca_address_sk#13, ca_state#14] +Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] (20) BroadcastExchange -Input [2]: [ca_address_sk#15, ca_state#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [2]: [ca_address_sk#13, ca_state#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (21) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_addr_sk#2] -Right keys [1]: [ca_address_sk#15] -Join condition: ((((ca_state#16 IN (CO,OH,TX) AND (ss_net_profit#6 >= 0.00)) AND (ss_net_profit#6 <= 2000.00)) OR ((ca_state#16 IN (OR,MN,KY) AND (ss_net_profit#6 >= 150.00)) AND (ss_net_profit#6 <= 3000.00))) OR ((ca_state#16 IN (VA,CA,MS) AND (ss_net_profit#6 >= 50.00)) AND (ss_net_profit#6 <= 25000.00))) +Right keys [1]: [ca_address_sk#13] +Join condition: ((((ca_state#14 IN (CO,OH,TX) AND (ss_net_profit#6 >= 0.00)) AND (ss_net_profit#6 <= 2000.00)) OR ((ca_state#14 IN (OR,MN,KY) AND (ss_net_profit#6 >= 150.00)) AND (ss_net_profit#6 <= 3000.00))) OR ((ca_state#14 IN (VA,CA,MS) AND (ss_net_profit#6 >= 50.00)) AND (ss_net_profit#6 <= 25000.00))) (22) Project [codegen id : 5] Output [2]: [ss_quantity#4, ss_sold_date_sk#7] -Input [6]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7, ca_address_sk#15, ca_state#16] +Input [6]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7, ca_address_sk#13, ca_state#14] (23) ReusedExchange [Reuses operator id: 33] -Output [1]: [d_date_sk#19] +Output [1]: [d_date_sk#16] (24) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_sold_date_sk#7] -Right keys [1]: [d_date_sk#19] +Right keys [1]: [d_date_sk#16] Join condition: None (25) Project [codegen id : 5] Output [1]: [ss_quantity#4] -Input [3]: [ss_quantity#4, ss_sold_date_sk#7, d_date_sk#19] +Input [3]: [ss_quantity#4, ss_sold_date_sk#7, d_date_sk#16] (26) HashAggregate [codegen id : 5] Input [1]: [ss_quantity#4] Keys: [] Functions [1]: [partial_sum(ss_quantity#4)] -Aggregate Attributes [1]: [sum#20] -Results [1]: [sum#21] +Aggregate Attributes [1]: [sum#17] +Results [1]: [sum#18] (27) Exchange -Input [1]: [sum#21] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#22] +Input [1]: [sum#18] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 6] -Input [1]: [sum#21] +Input [1]: [sum#18] Keys: [] Functions [1]: [sum(ss_quantity#4)] -Aggregate Attributes [1]: [sum(ss_quantity#4)#23] -Results [1]: [sum(ss_quantity#4)#23 AS sum(ss_quantity)#24] +Aggregate Attributes [1]: [sum(ss_quantity#4)#19] +Results [1]: [sum(ss_quantity#4)#19 AS sum(ss_quantity)#20] ===== Subqueries ===== @@ -170,25 +170,25 @@ BroadcastExchange (33) (29) Scan parquet default.date_dim -Output [2]: [d_date_sk#19, d_year#25] +Output [2]: [d_date_sk#16, d_year#21] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#19, d_year#25] +Input [2]: [d_date_sk#16, d_year#21] (31) Filter [codegen id : 1] -Input [2]: [d_date_sk#19, d_year#25] -Condition : ((isnotnull(d_year#25) AND (d_year#25 = 2001)) AND isnotnull(d_date_sk#19)) +Input [2]: [d_date_sk#16, d_year#21] +Condition : ((isnotnull(d_year#21) AND (d_year#21 = 2001)) AND isnotnull(d_date_sk#16)) (32) Project [codegen id : 1] -Output [1]: [d_date_sk#19] -Input [2]: [d_date_sk#19, d_year#25] +Output [1]: [d_date_sk#16] +Input [2]: [d_date_sk#16, d_year#21] (33) BroadcastExchange -Input [1]: [d_date_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Input [1]: [d_date_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt index c1cdc24ac44d8..5fa1b48ce7546 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt @@ -60,7 +60,7 @@ Condition : isnotnull(s_store_sk#9) (7) BroadcastExchange Input [1]: [s_store_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_store_sk#3] @@ -72,92 +72,92 @@ Output [6]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_ne Input [8]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, s_store_sk#9] (10) Scan parquet default.customer_demographics -Output [3]: [cd_demo_sk#11, cd_marital_status#12, cd_education_status#13] +Output [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,4 yr Degree )),And(EqualTo(cd_marital_status,D),EqualTo(cd_education_status,2 yr Degree ))),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College )))] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [3]: [cd_demo_sk#11, cd_marital_status#12, cd_education_status#13] +Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] (12) Filter [codegen id : 2] -Input [3]: [cd_demo_sk#11, cd_marital_status#12, cd_education_status#13] -Condition : (isnotnull(cd_demo_sk#11) AND ((((cd_marital_status#12 = M) AND (cd_education_status#13 = 4 yr Degree )) OR ((cd_marital_status#12 = D) AND (cd_education_status#13 = 2 yr Degree ))) OR ((cd_marital_status#12 = S) AND (cd_education_status#13 = College )))) +Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] +Condition : (isnotnull(cd_demo_sk#10) AND ((((cd_marital_status#11 = M) AND (cd_education_status#12 = 4 yr Degree )) OR ((cd_marital_status#11 = D) AND (cd_education_status#12 = 2 yr Degree ))) OR ((cd_marital_status#11 = S) AND (cd_education_status#12 = College )))) (13) BroadcastExchange -Input [3]: [cd_demo_sk#11, cd_marital_status#12, cd_education_status#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Input [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_cdemo_sk#1] -Right keys [1]: [cd_demo_sk#11] -Join condition: ((((((cd_marital_status#12 = M) AND (cd_education_status#13 = 4 yr Degree )) AND (ss_sales_price#5 >= 100.00)) AND (ss_sales_price#5 <= 150.00)) OR ((((cd_marital_status#12 = D) AND (cd_education_status#13 = 2 yr Degree )) AND (ss_sales_price#5 >= 50.00)) AND (ss_sales_price#5 <= 100.00))) OR ((((cd_marital_status#12 = S) AND (cd_education_status#13 = College )) AND (ss_sales_price#5 >= 150.00)) AND (ss_sales_price#5 <= 200.00))) +Right keys [1]: [cd_demo_sk#10] +Join condition: ((((((cd_marital_status#11 = M) AND (cd_education_status#12 = 4 yr Degree )) AND (ss_sales_price#5 >= 100.00)) AND (ss_sales_price#5 <= 150.00)) OR ((((cd_marital_status#11 = D) AND (cd_education_status#12 = 2 yr Degree )) AND (ss_sales_price#5 >= 50.00)) AND (ss_sales_price#5 <= 100.00))) OR ((((cd_marital_status#11 = S) AND (cd_education_status#12 = College )) AND (ss_sales_price#5 >= 150.00)) AND (ss_sales_price#5 <= 200.00))) (15) Project [codegen id : 5] Output [4]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7] -Input [9]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, cd_demo_sk#11, cd_marital_status#12, cd_education_status#13] +Input [9]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, cd_demo_sk#10, cd_marital_status#11, cd_education_status#12] (16) Scan parquet default.customer_address -Output [3]: [ca_address_sk#15, ca_state#16, ca_country#17] +Output [3]: [ca_address_sk#13, ca_state#14, ca_country#15] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [CO,OH,TX]),In(ca_state, [KY,MN,OR])),In(ca_state, [CA,MS,VA]))] ReadSchema: struct (17) ColumnarToRow [codegen id : 3] -Input [3]: [ca_address_sk#15, ca_state#16, ca_country#17] +Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] (18) Filter [codegen id : 3] -Input [3]: [ca_address_sk#15, ca_state#16, ca_country#17] -Condition : (((isnotnull(ca_country#17) AND (ca_country#17 = United States)) AND isnotnull(ca_address_sk#15)) AND ((ca_state#16 IN (CO,OH,TX) OR ca_state#16 IN (OR,MN,KY)) OR ca_state#16 IN (VA,CA,MS))) +Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] +Condition : (((isnotnull(ca_country#15) AND (ca_country#15 = United States)) AND isnotnull(ca_address_sk#13)) AND ((ca_state#14 IN (CO,OH,TX) OR ca_state#14 IN (OR,MN,KY)) OR ca_state#14 IN (VA,CA,MS))) (19) Project [codegen id : 3] -Output [2]: [ca_address_sk#15, ca_state#16] -Input [3]: [ca_address_sk#15, ca_state#16, ca_country#17] +Output [2]: [ca_address_sk#13, ca_state#14] +Input [3]: [ca_address_sk#13, ca_state#14, ca_country#15] (20) BroadcastExchange -Input [2]: [ca_address_sk#15, ca_state#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [2]: [ca_address_sk#13, ca_state#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (21) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_addr_sk#2] -Right keys [1]: [ca_address_sk#15] -Join condition: ((((ca_state#16 IN (CO,OH,TX) AND (ss_net_profit#6 >= 0.00)) AND (ss_net_profit#6 <= 2000.00)) OR ((ca_state#16 IN (OR,MN,KY) AND (ss_net_profit#6 >= 150.00)) AND (ss_net_profit#6 <= 3000.00))) OR ((ca_state#16 IN (VA,CA,MS) AND (ss_net_profit#6 >= 50.00)) AND (ss_net_profit#6 <= 25000.00))) +Right keys [1]: [ca_address_sk#13] +Join condition: ((((ca_state#14 IN (CO,OH,TX) AND (ss_net_profit#6 >= 0.00)) AND (ss_net_profit#6 <= 2000.00)) OR ((ca_state#14 IN (OR,MN,KY) AND (ss_net_profit#6 >= 150.00)) AND (ss_net_profit#6 <= 3000.00))) OR ((ca_state#14 IN (VA,CA,MS) AND (ss_net_profit#6 >= 50.00)) AND (ss_net_profit#6 <= 25000.00))) (22) Project [codegen id : 5] Output [2]: [ss_quantity#4, ss_sold_date_sk#7] -Input [6]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7, ca_address_sk#15, ca_state#16] +Input [6]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7, ca_address_sk#13, ca_state#14] (23) ReusedExchange [Reuses operator id: 33] -Output [1]: [d_date_sk#19] +Output [1]: [d_date_sk#16] (24) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_sold_date_sk#7] -Right keys [1]: [d_date_sk#19] +Right keys [1]: [d_date_sk#16] Join condition: None (25) Project [codegen id : 5] Output [1]: [ss_quantity#4] -Input [3]: [ss_quantity#4, ss_sold_date_sk#7, d_date_sk#19] +Input [3]: [ss_quantity#4, ss_sold_date_sk#7, d_date_sk#16] (26) HashAggregate [codegen id : 5] Input [1]: [ss_quantity#4] Keys: [] Functions [1]: [partial_sum(ss_quantity#4)] -Aggregate Attributes [1]: [sum#20] -Results [1]: [sum#21] +Aggregate Attributes [1]: [sum#17] +Results [1]: [sum#18] (27) Exchange -Input [1]: [sum#21] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#22] +Input [1]: [sum#18] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 6] -Input [1]: [sum#21] +Input [1]: [sum#18] Keys: [] Functions [1]: [sum(ss_quantity#4)] -Aggregate Attributes [1]: [sum(ss_quantity#4)#23] -Results [1]: [sum(ss_quantity#4)#23 AS sum(ss_quantity)#24] +Aggregate Attributes [1]: [sum(ss_quantity#4)#19] +Results [1]: [sum(ss_quantity#4)#19 AS sum(ss_quantity)#20] ===== Subqueries ===== @@ -170,25 +170,25 @@ BroadcastExchange (33) (29) Scan parquet default.date_dim -Output [2]: [d_date_sk#19, d_year#25] +Output [2]: [d_date_sk#16, d_year#21] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#19, d_year#25] +Input [2]: [d_date_sk#16, d_year#21] (31) Filter [codegen id : 1] -Input [2]: [d_date_sk#19, d_year#25] -Condition : ((isnotnull(d_year#25) AND (d_year#25 = 2001)) AND isnotnull(d_date_sk#19)) +Input [2]: [d_date_sk#16, d_year#21] +Condition : ((isnotnull(d_year#21) AND (d_year#21 = 2001)) AND isnotnull(d_date_sk#16)) (32) Project [codegen id : 1] -Output [1]: [d_date_sk#19] -Input [2]: [d_date_sk#19, d_year#25] +Output [1]: [d_date_sk#16] +Input [2]: [d_date_sk#16, d_year#21] (33) BroadcastExchange -Input [1]: [d_date_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Input [1]: [d_date_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/explain.txt index 65606c025adc4..a73794d268177 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/explain.txt @@ -120,356 +120,356 @@ Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_so (8) Exchange Input [4]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4] -Arguments: hashpartitioning(ws_order_number#2, ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ws_order_number#2, ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (9) Sort [codegen id : 3] Input [4]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4] Arguments: [ws_order_number#2 ASC NULLS FIRST, ws_item_sk#1 ASC NULLS FIRST], false, 0 (10) Scan parquet default.web_returns -Output [5]: [wr_item_sk#10, wr_order_number#11, wr_return_quantity#12, wr_return_amt#13, wr_returned_date_sk#14] +Output [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 4] -Input [5]: [wr_item_sk#10, wr_order_number#11, wr_return_quantity#12, wr_return_amt#13, wr_returned_date_sk#14] +Input [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13] (12) Filter [codegen id : 4] -Input [5]: [wr_item_sk#10, wr_order_number#11, wr_return_quantity#12, wr_return_amt#13, wr_returned_date_sk#14] -Condition : (((isnotnull(wr_return_amt#13) AND (wr_return_amt#13 > 10000.00)) AND isnotnull(wr_order_number#11)) AND isnotnull(wr_item_sk#10)) +Input [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13] +Condition : (((isnotnull(wr_return_amt#12) AND (wr_return_amt#12 > 10000.00)) AND isnotnull(wr_order_number#10)) AND isnotnull(wr_item_sk#9)) (13) Project [codegen id : 4] -Output [4]: [wr_item_sk#10, wr_order_number#11, wr_return_quantity#12, wr_return_amt#13] -Input [5]: [wr_item_sk#10, wr_order_number#11, wr_return_quantity#12, wr_return_amt#13, wr_returned_date_sk#14] +Output [4]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12] +Input [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13] (14) Exchange -Input [4]: [wr_item_sk#10, wr_order_number#11, wr_return_quantity#12, wr_return_amt#13] -Arguments: hashpartitioning(wr_order_number#11, wr_item_sk#10, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [4]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12] +Arguments: hashpartitioning(wr_order_number#10, wr_item_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) Sort [codegen id : 5] -Input [4]: [wr_item_sk#10, wr_order_number#11, wr_return_quantity#12, wr_return_amt#13] -Arguments: [wr_order_number#11 ASC NULLS FIRST, wr_item_sk#10 ASC NULLS FIRST], false, 0 +Input [4]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12] +Arguments: [wr_order_number#10 ASC NULLS FIRST, wr_item_sk#9 ASC NULLS FIRST], false, 0 (16) SortMergeJoin [codegen id : 6] Left keys [2]: [ws_order_number#2, ws_item_sk#1] -Right keys [2]: [wr_order_number#11, wr_item_sk#10] +Right keys [2]: [wr_order_number#10, wr_item_sk#9] Join condition: None (17) Project [codegen id : 6] -Output [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#12, wr_return_amt#13] -Input [8]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, wr_item_sk#10, wr_order_number#11, wr_return_quantity#12, wr_return_amt#13] +Output [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#11, wr_return_amt#12] +Input [8]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12] (18) HashAggregate [codegen id : 6] -Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#12, wr_return_amt#13] +Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#11, wr_return_amt#12] Keys [1]: [ws_item_sk#1] -Functions [4]: [partial_sum(coalesce(wr_return_quantity#12, 0)), partial_sum(coalesce(ws_quantity#3, 0)), partial_sum(coalesce(cast(wr_return_amt#13 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] -Aggregate Attributes [6]: [sum#16, sum#17, sum#18, isEmpty#19, sum#20, isEmpty#21] -Results [7]: [ws_item_sk#1, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] +Functions [4]: [partial_sum(coalesce(wr_return_quantity#11, 0)), partial_sum(coalesce(ws_quantity#3, 0)), partial_sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#14, sum#15, sum#16, isEmpty#17, sum#18, isEmpty#19] +Results [7]: [ws_item_sk#1, sum#20, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] (19) Exchange -Input [7]: [ws_item_sk#1, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] -Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [7]: [ws_item_sk#1, sum#20, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 7] -Input [7]: [ws_item_sk#1, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] +Input [7]: [ws_item_sk#1, sum#20, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] Keys [1]: [ws_item_sk#1] -Functions [4]: [sum(coalesce(wr_return_quantity#12, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#13 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] -Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#12, 0))#29, sum(coalesce(ws_quantity#3, 0))#30, sum(coalesce(cast(wr_return_amt#13 as decimal(12,2)), 0.00))#31, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#32] -Results [3]: [ws_item_sk#1 AS item#33, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#12, 0))#29 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#30 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#34, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#13 as decimal(12,2)), 0.00))#31 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#32 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#35] +Functions [4]: [sum(coalesce(wr_return_quantity#11, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#11, 0))#26, sum(coalesce(ws_quantity#3, 0))#27, sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#28, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29] +Results [3]: [ws_item_sk#1 AS item#30, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#11, 0))#26 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#27 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#31, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#28 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#32] (21) Exchange -Input [3]: [item#33, return_ratio#34, currency_ratio#35] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#36] +Input [3]: [item#30, return_ratio#31, currency_ratio#32] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (22) Sort [codegen id : 8] -Input [3]: [item#33, return_ratio#34, currency_ratio#35] -Arguments: [return_ratio#34 ASC NULLS FIRST], false, 0 +Input [3]: [item#30, return_ratio#31, currency_ratio#32] +Arguments: [return_ratio#31 ASC NULLS FIRST], false, 0 (23) Window -Input [3]: [item#33, return_ratio#34, currency_ratio#35] -Arguments: [rank(return_ratio#34) windowspecdefinition(return_ratio#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#37], [return_ratio#34 ASC NULLS FIRST] +Input [3]: [item#30, return_ratio#31, currency_ratio#32] +Arguments: [rank(return_ratio#31) windowspecdefinition(return_ratio#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#33], [return_ratio#31 ASC NULLS FIRST] (24) Sort [codegen id : 9] -Input [4]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37] -Arguments: [currency_ratio#35 ASC NULLS FIRST], false, 0 +Input [4]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33] +Arguments: [currency_ratio#32 ASC NULLS FIRST], false, 0 (25) Window -Input [4]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37] -Arguments: [rank(currency_ratio#35) windowspecdefinition(currency_ratio#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#38], [currency_ratio#35 ASC NULLS FIRST] +Input [4]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33] +Arguments: [rank(currency_ratio#32) windowspecdefinition(currency_ratio#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#34], [currency_ratio#32 ASC NULLS FIRST] (26) Filter [codegen id : 10] -Input [5]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37, currency_rank#38] -Condition : ((return_rank#37 <= 10) OR (currency_rank#38 <= 10)) +Input [5]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33, currency_rank#34] +Condition : ((return_rank#33 <= 10) OR (currency_rank#34 <= 10)) (27) Project [codegen id : 10] -Output [5]: [web AS channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] -Input [5]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37, currency_rank#38] +Output [5]: [web AS channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Input [5]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33, currency_rank#34] (28) Scan parquet default.catalog_sales -Output [6]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43, cs_net_profit#44, cs_sold_date_sk#45] +Output [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#45), dynamicpruningexpression(cs_sold_date_sk#45 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(cs_sold_date_sk#41), dynamicpruningexpression(cs_sold_date_sk#41 IN dynamicpruning#7)] PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_net_paid), IsNotNull(cs_quantity), GreaterThan(cs_net_profit,1.00), GreaterThan(cs_net_paid,0.00), GreaterThan(cs_quantity,0), IsNotNull(cs_order_number), IsNotNull(cs_item_sk)] ReadSchema: struct (29) ColumnarToRow [codegen id : 12] -Input [6]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43, cs_net_profit#44, cs_sold_date_sk#45] +Input [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41] (30) Filter [codegen id : 12] -Input [6]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43, cs_net_profit#44, cs_sold_date_sk#45] -Condition : (((((((isnotnull(cs_net_profit#44) AND isnotnull(cs_net_paid#43)) AND isnotnull(cs_quantity#42)) AND (cs_net_profit#44 > 1.00)) AND (cs_net_paid#43 > 0.00)) AND (cs_quantity#42 > 0)) AND isnotnull(cs_order_number#41)) AND isnotnull(cs_item_sk#40)) +Input [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41] +Condition : (((((((isnotnull(cs_net_profit#40) AND isnotnull(cs_net_paid#39)) AND isnotnull(cs_quantity#38)) AND (cs_net_profit#40 > 1.00)) AND (cs_net_paid#39 > 0.00)) AND (cs_quantity#38 > 0)) AND isnotnull(cs_order_number#37)) AND isnotnull(cs_item_sk#36)) (31) Project [codegen id : 12] -Output [5]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43, cs_sold_date_sk#45] -Input [6]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43, cs_net_profit#44, cs_sold_date_sk#45] +Output [5]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_sold_date_sk#41] +Input [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41] (32) ReusedExchange [Reuses operator id: 91] -Output [1]: [d_date_sk#46] +Output [1]: [d_date_sk#42] (33) BroadcastHashJoin [codegen id : 12] -Left keys [1]: [cs_sold_date_sk#45] -Right keys [1]: [d_date_sk#46] +Left keys [1]: [cs_sold_date_sk#41] +Right keys [1]: [d_date_sk#42] Join condition: None (34) Project [codegen id : 12] -Output [4]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43] -Input [6]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43, cs_sold_date_sk#45, d_date_sk#46] +Output [4]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39] +Input [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_sold_date_sk#41, d_date_sk#42] (35) Exchange -Input [4]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43] -Arguments: hashpartitioning(cs_order_number#41, cs_item_sk#40, 5), ENSURE_REQUIREMENTS, [id=#47] +Input [4]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39] +Arguments: hashpartitioning(cs_order_number#37, cs_item_sk#36, 5), ENSURE_REQUIREMENTS, [plan_id=5] (36) Sort [codegen id : 13] -Input [4]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43] -Arguments: [cs_order_number#41 ASC NULLS FIRST, cs_item_sk#40 ASC NULLS FIRST], false, 0 +Input [4]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39] +Arguments: [cs_order_number#37 ASC NULLS FIRST, cs_item_sk#36 ASC NULLS FIRST], false, 0 (37) Scan parquet default.catalog_returns -Output [5]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51, cr_returned_date_sk#52] +Output [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] ReadSchema: struct (38) ColumnarToRow [codegen id : 14] -Input [5]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51, cr_returned_date_sk#52] +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] (39) Filter [codegen id : 14] -Input [5]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51, cr_returned_date_sk#52] -Condition : (((isnotnull(cr_return_amount#51) AND (cr_return_amount#51 > 10000.00)) AND isnotnull(cr_order_number#49)) AND isnotnull(cr_item_sk#48)) +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] +Condition : (((isnotnull(cr_return_amount#46) AND (cr_return_amount#46 > 10000.00)) AND isnotnull(cr_order_number#44)) AND isnotnull(cr_item_sk#43)) (40) Project [codegen id : 14] -Output [4]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] -Input [5]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51, cr_returned_date_sk#52] +Output [4]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46] +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] (41) Exchange -Input [4]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] -Arguments: hashpartitioning(cr_order_number#49, cr_item_sk#48, 5), ENSURE_REQUIREMENTS, [id=#53] +Input [4]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46] +Arguments: hashpartitioning(cr_order_number#44, cr_item_sk#43, 5), ENSURE_REQUIREMENTS, [plan_id=6] (42) Sort [codegen id : 15] -Input [4]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] -Arguments: [cr_order_number#49 ASC NULLS FIRST, cr_item_sk#48 ASC NULLS FIRST], false, 0 +Input [4]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46] +Arguments: [cr_order_number#44 ASC NULLS FIRST, cr_item_sk#43 ASC NULLS FIRST], false, 0 (43) SortMergeJoin [codegen id : 16] -Left keys [2]: [cs_order_number#41, cs_item_sk#40] -Right keys [2]: [cr_order_number#49, cr_item_sk#48] +Left keys [2]: [cs_order_number#37, cs_item_sk#36] +Right keys [2]: [cr_order_number#44, cr_item_sk#43] Join condition: None (44) Project [codegen id : 16] -Output [5]: [cs_item_sk#40, cs_quantity#42, cs_net_paid#43, cr_return_quantity#50, cr_return_amount#51] -Input [8]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43, cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] +Output [5]: [cs_item_sk#36, cs_quantity#38, cs_net_paid#39, cr_return_quantity#45, cr_return_amount#46] +Input [8]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46] (45) HashAggregate [codegen id : 16] -Input [5]: [cs_item_sk#40, cs_quantity#42, cs_net_paid#43, cr_return_quantity#50, cr_return_amount#51] -Keys [1]: [cs_item_sk#40] -Functions [4]: [partial_sum(coalesce(cr_return_quantity#50, 0)), partial_sum(coalesce(cs_quantity#42, 0)), partial_sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#43 as decimal(12,2)), 0.00))] -Aggregate Attributes [6]: [sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#59] -Results [7]: [cs_item_sk#40, sum#60, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Input [5]: [cs_item_sk#36, cs_quantity#38, cs_net_paid#39, cr_return_quantity#45, cr_return_amount#46] +Keys [1]: [cs_item_sk#36] +Functions [4]: [partial_sum(coalesce(cr_return_quantity#45, 0)), partial_sum(coalesce(cs_quantity#38, 0)), partial_sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#48, sum#49, sum#50, isEmpty#51, sum#52, isEmpty#53] +Results [7]: [cs_item_sk#36, sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#59] (46) Exchange -Input [7]: [cs_item_sk#40, sum#60, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] -Arguments: hashpartitioning(cs_item_sk#40, 5), ENSURE_REQUIREMENTS, [id=#66] +Input [7]: [cs_item_sk#36, sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#59] +Arguments: hashpartitioning(cs_item_sk#36, 5), ENSURE_REQUIREMENTS, [plan_id=7] (47) HashAggregate [codegen id : 17] -Input [7]: [cs_item_sk#40, sum#60, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] -Keys [1]: [cs_item_sk#40] -Functions [4]: [sum(coalesce(cr_return_quantity#50, 0)), sum(coalesce(cs_quantity#42, 0)), sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#43 as decimal(12,2)), 0.00))] -Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#50, 0))#67, sum(coalesce(cs_quantity#42, 0))#68, sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00))#69, sum(coalesce(cast(cs_net_paid#43 as decimal(12,2)), 0.00))#70] -Results [3]: [cs_item_sk#40 AS item#71, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#50, 0))#67 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#42, 0))#68 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#72, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00))#69 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#43 as decimal(12,2)), 0.00))#70 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#73] +Input [7]: [cs_item_sk#36, sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#59] +Keys [1]: [cs_item_sk#36] +Functions [4]: [sum(coalesce(cr_return_quantity#45, 0)), sum(coalesce(cs_quantity#38, 0)), sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#45, 0))#60, sum(coalesce(cs_quantity#38, 0))#61, sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#62, sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63] +Results [3]: [cs_item_sk#36 AS item#64, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#45, 0))#60 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#38, 0))#61 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#65, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#62 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#66] (48) Exchange -Input [3]: [item#71, return_ratio#72, currency_ratio#73] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#74] +Input [3]: [item#64, return_ratio#65, currency_ratio#66] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] (49) Sort [codegen id : 18] -Input [3]: [item#71, return_ratio#72, currency_ratio#73] -Arguments: [return_ratio#72 ASC NULLS FIRST], false, 0 +Input [3]: [item#64, return_ratio#65, currency_ratio#66] +Arguments: [return_ratio#65 ASC NULLS FIRST], false, 0 (50) Window -Input [3]: [item#71, return_ratio#72, currency_ratio#73] -Arguments: [rank(return_ratio#72) windowspecdefinition(return_ratio#72 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#75], [return_ratio#72 ASC NULLS FIRST] +Input [3]: [item#64, return_ratio#65, currency_ratio#66] +Arguments: [rank(return_ratio#65) windowspecdefinition(return_ratio#65 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#67], [return_ratio#65 ASC NULLS FIRST] (51) Sort [codegen id : 19] -Input [4]: [item#71, return_ratio#72, currency_ratio#73, return_rank#75] -Arguments: [currency_ratio#73 ASC NULLS FIRST], false, 0 +Input [4]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67] +Arguments: [currency_ratio#66 ASC NULLS FIRST], false, 0 (52) Window -Input [4]: [item#71, return_ratio#72, currency_ratio#73, return_rank#75] -Arguments: [rank(currency_ratio#73) windowspecdefinition(currency_ratio#73 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#76], [currency_ratio#73 ASC NULLS FIRST] +Input [4]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67] +Arguments: [rank(currency_ratio#66) windowspecdefinition(currency_ratio#66 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#68], [currency_ratio#66 ASC NULLS FIRST] (53) Filter [codegen id : 20] -Input [5]: [item#71, return_ratio#72, currency_ratio#73, return_rank#75, currency_rank#76] -Condition : ((return_rank#75 <= 10) OR (currency_rank#76 <= 10)) +Input [5]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67, currency_rank#68] +Condition : ((return_rank#67 <= 10) OR (currency_rank#68 <= 10)) (54) Project [codegen id : 20] -Output [5]: [catalog AS channel#77, item#71, return_ratio#72, return_rank#75, currency_rank#76] -Input [5]: [item#71, return_ratio#72, currency_ratio#73, return_rank#75, currency_rank#76] +Output [5]: [catalog AS channel#69, item#64, return_ratio#65, return_rank#67, currency_rank#68] +Input [5]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67, currency_rank#68] (55) Scan parquet default.store_sales -Output [6]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_net_profit#82, ss_sold_date_sk#83] +Output [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#83), dynamicpruningexpression(ss_sold_date_sk#83 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ss_sold_date_sk#75), dynamicpruningexpression(ss_sold_date_sk#75 IN dynamicpruning#7)] PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_net_paid), IsNotNull(ss_quantity), GreaterThan(ss_net_profit,1.00), GreaterThan(ss_net_paid,0.00), GreaterThan(ss_quantity,0), IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk)] ReadSchema: struct (56) ColumnarToRow [codegen id : 22] -Input [6]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_net_profit#82, ss_sold_date_sk#83] +Input [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75] (57) Filter [codegen id : 22] -Input [6]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_net_profit#82, ss_sold_date_sk#83] -Condition : (((((((isnotnull(ss_net_profit#82) AND isnotnull(ss_net_paid#81)) AND isnotnull(ss_quantity#80)) AND (ss_net_profit#82 > 1.00)) AND (ss_net_paid#81 > 0.00)) AND (ss_quantity#80 > 0)) AND isnotnull(ss_ticket_number#79)) AND isnotnull(ss_item_sk#78)) +Input [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75] +Condition : (((((((isnotnull(ss_net_profit#74) AND isnotnull(ss_net_paid#73)) AND isnotnull(ss_quantity#72)) AND (ss_net_profit#74 > 1.00)) AND (ss_net_paid#73 > 0.00)) AND (ss_quantity#72 > 0)) AND isnotnull(ss_ticket_number#71)) AND isnotnull(ss_item_sk#70)) (58) Project [codegen id : 22] -Output [5]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_sold_date_sk#83] -Input [6]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_net_profit#82, ss_sold_date_sk#83] +Output [5]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_sold_date_sk#75] +Input [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75] (59) ReusedExchange [Reuses operator id: 91] -Output [1]: [d_date_sk#84] +Output [1]: [d_date_sk#76] (60) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [ss_sold_date_sk#83] -Right keys [1]: [d_date_sk#84] +Left keys [1]: [ss_sold_date_sk#75] +Right keys [1]: [d_date_sk#76] Join condition: None (61) Project [codegen id : 22] -Output [4]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81] -Input [6]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_sold_date_sk#83, d_date_sk#84] +Output [4]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73] +Input [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_sold_date_sk#75, d_date_sk#76] (62) Exchange -Input [4]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81] -Arguments: hashpartitioning(ss_ticket_number#79, ss_item_sk#78, 5), ENSURE_REQUIREMENTS, [id=#85] +Input [4]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73] +Arguments: hashpartitioning(ss_ticket_number#71, ss_item_sk#70, 5), ENSURE_REQUIREMENTS, [plan_id=9] (63) Sort [codegen id : 23] -Input [4]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81] -Arguments: [ss_ticket_number#79 ASC NULLS FIRST, ss_item_sk#78 ASC NULLS FIRST], false, 0 +Input [4]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73] +Arguments: [ss_ticket_number#71 ASC NULLS FIRST, ss_item_sk#70 ASC NULLS FIRST], false, 0 (64) Scan parquet default.store_returns -Output [5]: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89, sr_returned_date_sk#90] +Output [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] ReadSchema: struct (65) ColumnarToRow [codegen id : 24] -Input [5]: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89, sr_returned_date_sk#90] +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] (66) Filter [codegen id : 24] -Input [5]: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89, sr_returned_date_sk#90] -Condition : (((isnotnull(sr_return_amt#89) AND (sr_return_amt#89 > 10000.00)) AND isnotnull(sr_ticket_number#87)) AND isnotnull(sr_item_sk#86)) +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] +Condition : (((isnotnull(sr_return_amt#80) AND (sr_return_amt#80 > 10000.00)) AND isnotnull(sr_ticket_number#78)) AND isnotnull(sr_item_sk#77)) (67) Project [codegen id : 24] -Output [4]: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89] -Input [5]: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89, sr_returned_date_sk#90] +Output [4]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80] +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] (68) Exchange -Input [4]: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89] -Arguments: hashpartitioning(sr_ticket_number#87, sr_item_sk#86, 5), ENSURE_REQUIREMENTS, [id=#91] +Input [4]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80] +Arguments: hashpartitioning(sr_ticket_number#78, sr_item_sk#77, 5), ENSURE_REQUIREMENTS, [plan_id=10] (69) Sort [codegen id : 25] -Input [4]: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89] -Arguments: [sr_ticket_number#87 ASC NULLS FIRST, sr_item_sk#86 ASC NULLS FIRST], false, 0 +Input [4]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80] +Arguments: [sr_ticket_number#78 ASC NULLS FIRST, sr_item_sk#77 ASC NULLS FIRST], false, 0 (70) SortMergeJoin [codegen id : 26] -Left keys [2]: [ss_ticket_number#79, ss_item_sk#78] -Right keys [2]: [sr_ticket_number#87, sr_item_sk#86] +Left keys [2]: [ss_ticket_number#71, ss_item_sk#70] +Right keys [2]: [sr_ticket_number#78, sr_item_sk#77] Join condition: None (71) Project [codegen id : 26] -Output [5]: [ss_item_sk#78, ss_quantity#80, ss_net_paid#81, sr_return_quantity#88, sr_return_amt#89] -Input [8]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89] +Output [5]: [ss_item_sk#70, ss_quantity#72, ss_net_paid#73, sr_return_quantity#79, sr_return_amt#80] +Input [8]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80] (72) HashAggregate [codegen id : 26] -Input [5]: [ss_item_sk#78, ss_quantity#80, ss_net_paid#81, sr_return_quantity#88, sr_return_amt#89] -Keys [1]: [ss_item_sk#78] -Functions [4]: [partial_sum(coalesce(sr_return_quantity#88, 0)), partial_sum(coalesce(ss_quantity#80, 0)), partial_sum(coalesce(cast(sr_return_amt#89 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))] -Aggregate Attributes [6]: [sum#92, sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] -Results [7]: [ss_item_sk#78, sum#98, sum#99, sum#100, isEmpty#101, sum#102, isEmpty#103] +Input [5]: [ss_item_sk#70, ss_quantity#72, ss_net_paid#73, sr_return_quantity#79, sr_return_amt#80] +Keys [1]: [ss_item_sk#70] +Functions [4]: [partial_sum(coalesce(sr_return_quantity#79, 0)), partial_sum(coalesce(ss_quantity#72, 0)), partial_sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#82, sum#83, sum#84, isEmpty#85, sum#86, isEmpty#87] +Results [7]: [ss_item_sk#70, sum#88, sum#89, sum#90, isEmpty#91, sum#92, isEmpty#93] (73) Exchange -Input [7]: [ss_item_sk#78, sum#98, sum#99, sum#100, isEmpty#101, sum#102, isEmpty#103] -Arguments: hashpartitioning(ss_item_sk#78, 5), ENSURE_REQUIREMENTS, [id=#104] +Input [7]: [ss_item_sk#70, sum#88, sum#89, sum#90, isEmpty#91, sum#92, isEmpty#93] +Arguments: hashpartitioning(ss_item_sk#70, 5), ENSURE_REQUIREMENTS, [plan_id=11] (74) HashAggregate [codegen id : 27] -Input [7]: [ss_item_sk#78, sum#98, sum#99, sum#100, isEmpty#101, sum#102, isEmpty#103] -Keys [1]: [ss_item_sk#78] -Functions [4]: [sum(coalesce(sr_return_quantity#88, 0)), sum(coalesce(ss_quantity#80, 0)), sum(coalesce(cast(sr_return_amt#89 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))] -Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#88, 0))#105, sum(coalesce(ss_quantity#80, 0))#106, sum(coalesce(cast(sr_return_amt#89 as decimal(12,2)), 0.00))#107, sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))#108] -Results [3]: [ss_item_sk#78 AS item#109, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#88, 0))#105 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#80, 0))#106 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#110, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#89 as decimal(12,2)), 0.00))#107 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))#108 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#111] +Input [7]: [ss_item_sk#70, sum#88, sum#89, sum#90, isEmpty#91, sum#92, isEmpty#93] +Keys [1]: [ss_item_sk#70] +Functions [4]: [sum(coalesce(sr_return_quantity#79, 0)), sum(coalesce(ss_quantity#72, 0)), sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#79, 0))#94, sum(coalesce(ss_quantity#72, 0))#95, sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#96, sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97] +Results [3]: [ss_item_sk#70 AS item#98, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#79, 0))#94 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#72, 0))#95 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#99, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#96 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#100] (75) Exchange -Input [3]: [item#109, return_ratio#110, currency_ratio#111] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#112] +Input [3]: [item#98, return_ratio#99, currency_ratio#100] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12] (76) Sort [codegen id : 28] -Input [3]: [item#109, return_ratio#110, currency_ratio#111] -Arguments: [return_ratio#110 ASC NULLS FIRST], false, 0 +Input [3]: [item#98, return_ratio#99, currency_ratio#100] +Arguments: [return_ratio#99 ASC NULLS FIRST], false, 0 (77) Window -Input [3]: [item#109, return_ratio#110, currency_ratio#111] -Arguments: [rank(return_ratio#110) windowspecdefinition(return_ratio#110 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#113], [return_ratio#110 ASC NULLS FIRST] +Input [3]: [item#98, return_ratio#99, currency_ratio#100] +Arguments: [rank(return_ratio#99) windowspecdefinition(return_ratio#99 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#101], [return_ratio#99 ASC NULLS FIRST] (78) Sort [codegen id : 29] -Input [4]: [item#109, return_ratio#110, currency_ratio#111, return_rank#113] -Arguments: [currency_ratio#111 ASC NULLS FIRST], false, 0 +Input [4]: [item#98, return_ratio#99, currency_ratio#100, return_rank#101] +Arguments: [currency_ratio#100 ASC NULLS FIRST], false, 0 (79) Window -Input [4]: [item#109, return_ratio#110, currency_ratio#111, return_rank#113] -Arguments: [rank(currency_ratio#111) windowspecdefinition(currency_ratio#111 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#114], [currency_ratio#111 ASC NULLS FIRST] +Input [4]: [item#98, return_ratio#99, currency_ratio#100, return_rank#101] +Arguments: [rank(currency_ratio#100) windowspecdefinition(currency_ratio#100 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#102], [currency_ratio#100 ASC NULLS FIRST] (80) Filter [codegen id : 30] -Input [5]: [item#109, return_ratio#110, currency_ratio#111, return_rank#113, currency_rank#114] -Condition : ((return_rank#113 <= 10) OR (currency_rank#114 <= 10)) +Input [5]: [item#98, return_ratio#99, currency_ratio#100, return_rank#101, currency_rank#102] +Condition : ((return_rank#101 <= 10) OR (currency_rank#102 <= 10)) (81) Project [codegen id : 30] -Output [5]: [store AS channel#115, item#109, return_ratio#110, return_rank#113, currency_rank#114] -Input [5]: [item#109, return_ratio#110, currency_ratio#111, return_rank#113, currency_rank#114] +Output [5]: [store AS channel#103, item#98, return_ratio#99, return_rank#101, currency_rank#102] +Input [5]: [item#98, return_ratio#99, currency_ratio#100, return_rank#101, currency_rank#102] (82) Union (83) HashAggregate [codegen id : 31] -Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] -Keys [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Input [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Keys [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Results [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] (84) Exchange -Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] -Arguments: hashpartitioning(channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38, 5), ENSURE_REQUIREMENTS, [id=#116] +Input [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Arguments: hashpartitioning(channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34, 5), ENSURE_REQUIREMENTS, [plan_id=13] (85) HashAggregate [codegen id : 32] -Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] -Keys [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Input [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Keys [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Results [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] (86) TakeOrderedAndProject -Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] -Arguments: 100, [channel#39 ASC NULLS FIRST, return_rank#37 ASC NULLS FIRST, currency_rank#38 ASC NULLS FIRST], [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Input [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Arguments: 100, [channel#35 ASC NULLS FIRST, return_rank#33 ASC NULLS FIRST, currency_rank#34 ASC NULLS FIRST], [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] ===== Subqueries ===== @@ -482,29 +482,29 @@ BroadcastExchange (91) (87) Scan parquet default.date_dim -Output [3]: [d_date_sk#8, d_year#117, d_moy#118] +Output [3]: [d_date_sk#8, d_year#104, d_moy#105] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)] ReadSchema: struct (88) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#117, d_moy#118] +Input [3]: [d_date_sk#8, d_year#104, d_moy#105] (89) Filter [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#117, d_moy#118] -Condition : ((((isnotnull(d_year#117) AND isnotnull(d_moy#118)) AND (d_year#117 = 2001)) AND (d_moy#118 = 12)) AND isnotnull(d_date_sk#8)) +Input [3]: [d_date_sk#8, d_year#104, d_moy#105] +Condition : ((((isnotnull(d_year#104) AND isnotnull(d_moy#105)) AND (d_year#104 = 2001)) AND (d_moy#105 = 12)) AND isnotnull(d_date_sk#8)) (90) Project [codegen id : 1] Output [1]: [d_date_sk#8] -Input [3]: [d_date_sk#8, d_year#117, d_moy#118] +Input [3]: [d_date_sk#8, d_year#104, d_moy#105] (91) BroadcastExchange Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#119] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14] -Subquery:2 Hosting operator id = 28 Hosting Expression = cs_sold_date_sk#45 IN dynamicpruning#7 +Subquery:2 Hosting operator id = 28 Hosting Expression = cs_sold_date_sk#41 IN dynamicpruning#7 -Subquery:3 Hosting operator id = 55 Hosting Expression = ss_sold_date_sk#83 IN dynamicpruning#7 +Subquery:3 Hosting operator id = 55 Hosting Expression = ss_sold_date_sk#75 IN dynamicpruning#7 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt index ac64de5188462..371ed43fb99f6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt @@ -99,332 +99,332 @@ Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_ne (5) BroadcastExchange Input [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=1] (6) Scan parquet default.web_returns -Output [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13] +Output [5]: [wr_item_sk#8, wr_order_number#9, wr_return_quantity#10, wr_return_amt#11, wr_returned_date_sk#12] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] ReadSchema: struct (7) ColumnarToRow -Input [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13] +Input [5]: [wr_item_sk#8, wr_order_number#9, wr_return_quantity#10, wr_return_amt#11, wr_returned_date_sk#12] (8) Filter -Input [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13] -Condition : (((isnotnull(wr_return_amt#12) AND (wr_return_amt#12 > 10000.00)) AND isnotnull(wr_order_number#10)) AND isnotnull(wr_item_sk#9)) +Input [5]: [wr_item_sk#8, wr_order_number#9, wr_return_quantity#10, wr_return_amt#11, wr_returned_date_sk#12] +Condition : (((isnotnull(wr_return_amt#11) AND (wr_return_amt#11 > 10000.00)) AND isnotnull(wr_order_number#9)) AND isnotnull(wr_item_sk#8)) (9) Project -Output [4]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12] -Input [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13] +Output [4]: [wr_item_sk#8, wr_order_number#9, wr_return_quantity#10, wr_return_amt#11] +Input [5]: [wr_item_sk#8, wr_order_number#9, wr_return_quantity#10, wr_return_amt#11, wr_returned_date_sk#12] (10) BroadcastHashJoin [codegen id : 3] Left keys [2]: [ws_order_number#2, ws_item_sk#1] -Right keys [2]: [wr_order_number#10, wr_item_sk#9] +Right keys [2]: [wr_order_number#9, wr_item_sk#8] Join condition: None (11) Project [codegen id : 3] -Output [6]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#11, wr_return_amt#12] -Input [9]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12] +Output [6]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#10, wr_return_amt#11] +Input [9]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_item_sk#8, wr_order_number#9, wr_return_quantity#10, wr_return_amt#11] (12) ReusedExchange [Reuses operator id: 82] -Output [1]: [d_date_sk#14] +Output [1]: [d_date_sk#13] (13) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ws_sold_date_sk#6] -Right keys [1]: [d_date_sk#14] +Right keys [1]: [d_date_sk#13] Join condition: None (14) Project [codegen id : 3] -Output [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#11, wr_return_amt#12] -Input [7]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#11, wr_return_amt#12, d_date_sk#14] +Output [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#10, wr_return_amt#11] +Input [7]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#10, wr_return_amt#11, d_date_sk#13] (15) HashAggregate [codegen id : 3] -Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#11, wr_return_amt#12] +Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#10, wr_return_amt#11] Keys [1]: [ws_item_sk#1] -Functions [4]: [partial_sum(coalesce(wr_return_quantity#11, 0)), partial_sum(coalesce(ws_quantity#3, 0)), partial_sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] -Aggregate Attributes [6]: [sum#15, sum#16, sum#17, isEmpty#18, sum#19, isEmpty#20] -Results [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] +Functions [4]: [partial_sum(coalesce(wr_return_quantity#10, 0)), partial_sum(coalesce(ws_quantity#3, 0)), partial_sum(coalesce(cast(wr_return_amt#11 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#14, sum#15, sum#16, isEmpty#17, sum#18, isEmpty#19] +Results [7]: [ws_item_sk#1, sum#20, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] (16) Exchange -Input [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] -Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [7]: [ws_item_sk#1, sum#20, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (17) HashAggregate [codegen id : 4] -Input [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] +Input [7]: [ws_item_sk#1, sum#20, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] Keys [1]: [ws_item_sk#1] -Functions [4]: [sum(coalesce(wr_return_quantity#11, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] -Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#11, 0))#28, sum(coalesce(ws_quantity#3, 0))#29, sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#30, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#31] -Results [3]: [ws_item_sk#1 AS item#32, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#11, 0))#28 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#29 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#33, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#30 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#31 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#34] +Functions [4]: [sum(coalesce(wr_return_quantity#10, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#11 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#10, 0))#26, sum(coalesce(ws_quantity#3, 0))#27, sum(coalesce(cast(wr_return_amt#11 as decimal(12,2)), 0.00))#28, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29] +Results [3]: [ws_item_sk#1 AS item#30, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#10, 0))#26 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#27 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#31, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#11 as decimal(12,2)), 0.00))#28 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#32] (18) Exchange -Input [3]: [item#32, return_ratio#33, currency_ratio#34] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#35] +Input [3]: [item#30, return_ratio#31, currency_ratio#32] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=3] (19) Sort [codegen id : 5] -Input [3]: [item#32, return_ratio#33, currency_ratio#34] -Arguments: [return_ratio#33 ASC NULLS FIRST], false, 0 +Input [3]: [item#30, return_ratio#31, currency_ratio#32] +Arguments: [return_ratio#31 ASC NULLS FIRST], false, 0 (20) Window -Input [3]: [item#32, return_ratio#33, currency_ratio#34] -Arguments: [rank(return_ratio#33) windowspecdefinition(return_ratio#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#36], [return_ratio#33 ASC NULLS FIRST] +Input [3]: [item#30, return_ratio#31, currency_ratio#32] +Arguments: [rank(return_ratio#31) windowspecdefinition(return_ratio#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#33], [return_ratio#31 ASC NULLS FIRST] (21) Sort [codegen id : 6] -Input [4]: [item#32, return_ratio#33, currency_ratio#34, return_rank#36] -Arguments: [currency_ratio#34 ASC NULLS FIRST], false, 0 +Input [4]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33] +Arguments: [currency_ratio#32 ASC NULLS FIRST], false, 0 (22) Window -Input [4]: [item#32, return_ratio#33, currency_ratio#34, return_rank#36] -Arguments: [rank(currency_ratio#34) windowspecdefinition(currency_ratio#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#37], [currency_ratio#34 ASC NULLS FIRST] +Input [4]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33] +Arguments: [rank(currency_ratio#32) windowspecdefinition(currency_ratio#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#34], [currency_ratio#32 ASC NULLS FIRST] (23) Filter [codegen id : 7] -Input [5]: [item#32, return_ratio#33, currency_ratio#34, return_rank#36, currency_rank#37] -Condition : ((return_rank#36 <= 10) OR (currency_rank#37 <= 10)) +Input [5]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33, currency_rank#34] +Condition : ((return_rank#33 <= 10) OR (currency_rank#34 <= 10)) (24) Project [codegen id : 7] -Output [5]: [web AS channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] -Input [5]: [item#32, return_ratio#33, currency_ratio#34, return_rank#36, currency_rank#37] +Output [5]: [web AS channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Input [5]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33, currency_rank#34] (25) Scan parquet default.catalog_sales -Output [6]: [cs_item_sk#39, cs_order_number#40, cs_quantity#41, cs_net_paid#42, cs_net_profit#43, cs_sold_date_sk#44] +Output [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#44), dynamicpruningexpression(cs_sold_date_sk#44 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(cs_sold_date_sk#41), dynamicpruningexpression(cs_sold_date_sk#41 IN dynamicpruning#7)] PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_net_paid), IsNotNull(cs_quantity), GreaterThan(cs_net_profit,1.00), GreaterThan(cs_net_paid,0.00), GreaterThan(cs_quantity,0), IsNotNull(cs_order_number), IsNotNull(cs_item_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 8] -Input [6]: [cs_item_sk#39, cs_order_number#40, cs_quantity#41, cs_net_paid#42, cs_net_profit#43, cs_sold_date_sk#44] +Input [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41] (27) Filter [codegen id : 8] -Input [6]: [cs_item_sk#39, cs_order_number#40, cs_quantity#41, cs_net_paid#42, cs_net_profit#43, cs_sold_date_sk#44] -Condition : (((((((isnotnull(cs_net_profit#43) AND isnotnull(cs_net_paid#42)) AND isnotnull(cs_quantity#41)) AND (cs_net_profit#43 > 1.00)) AND (cs_net_paid#42 > 0.00)) AND (cs_quantity#41 > 0)) AND isnotnull(cs_order_number#40)) AND isnotnull(cs_item_sk#39)) +Input [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41] +Condition : (((((((isnotnull(cs_net_profit#40) AND isnotnull(cs_net_paid#39)) AND isnotnull(cs_quantity#38)) AND (cs_net_profit#40 > 1.00)) AND (cs_net_paid#39 > 0.00)) AND (cs_quantity#38 > 0)) AND isnotnull(cs_order_number#37)) AND isnotnull(cs_item_sk#36)) (28) Project [codegen id : 8] -Output [5]: [cs_item_sk#39, cs_order_number#40, cs_quantity#41, cs_net_paid#42, cs_sold_date_sk#44] -Input [6]: [cs_item_sk#39, cs_order_number#40, cs_quantity#41, cs_net_paid#42, cs_net_profit#43, cs_sold_date_sk#44] +Output [5]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_sold_date_sk#41] +Input [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41] (29) BroadcastExchange -Input [5]: [cs_item_sk#39, cs_order_number#40, cs_quantity#41, cs_net_paid#42, cs_sold_date_sk#44] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [id=#45] +Input [5]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_sold_date_sk#41] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=4] (30) Scan parquet default.catalog_returns -Output [5]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49, cr_returned_date_sk#50] +Output [5]: [cr_item_sk#42, cr_order_number#43, cr_return_quantity#44, cr_return_amount#45, cr_returned_date_sk#46] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] ReadSchema: struct (31) ColumnarToRow -Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49, cr_returned_date_sk#50] +Input [5]: [cr_item_sk#42, cr_order_number#43, cr_return_quantity#44, cr_return_amount#45, cr_returned_date_sk#46] (32) Filter -Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49, cr_returned_date_sk#50] -Condition : (((isnotnull(cr_return_amount#49) AND (cr_return_amount#49 > 10000.00)) AND isnotnull(cr_order_number#47)) AND isnotnull(cr_item_sk#46)) +Input [5]: [cr_item_sk#42, cr_order_number#43, cr_return_quantity#44, cr_return_amount#45, cr_returned_date_sk#46] +Condition : (((isnotnull(cr_return_amount#45) AND (cr_return_amount#45 > 10000.00)) AND isnotnull(cr_order_number#43)) AND isnotnull(cr_item_sk#42)) (33) Project -Output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] -Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49, cr_returned_date_sk#50] +Output [4]: [cr_item_sk#42, cr_order_number#43, cr_return_quantity#44, cr_return_amount#45] +Input [5]: [cr_item_sk#42, cr_order_number#43, cr_return_quantity#44, cr_return_amount#45, cr_returned_date_sk#46] (34) BroadcastHashJoin [codegen id : 10] -Left keys [2]: [cs_order_number#40, cs_item_sk#39] -Right keys [2]: [cr_order_number#47, cr_item_sk#46] +Left keys [2]: [cs_order_number#37, cs_item_sk#36] +Right keys [2]: [cr_order_number#43, cr_item_sk#42] Join condition: None (35) Project [codegen id : 10] -Output [6]: [cs_item_sk#39, cs_quantity#41, cs_net_paid#42, cs_sold_date_sk#44, cr_return_quantity#48, cr_return_amount#49] -Input [9]: [cs_item_sk#39, cs_order_number#40, cs_quantity#41, cs_net_paid#42, cs_sold_date_sk#44, cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] +Output [6]: [cs_item_sk#36, cs_quantity#38, cs_net_paid#39, cs_sold_date_sk#41, cr_return_quantity#44, cr_return_amount#45] +Input [9]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_sold_date_sk#41, cr_item_sk#42, cr_order_number#43, cr_return_quantity#44, cr_return_amount#45] (36) ReusedExchange [Reuses operator id: 82] -Output [1]: [d_date_sk#51] +Output [1]: [d_date_sk#47] (37) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#44] -Right keys [1]: [d_date_sk#51] +Left keys [1]: [cs_sold_date_sk#41] +Right keys [1]: [d_date_sk#47] Join condition: None (38) Project [codegen id : 10] -Output [5]: [cs_item_sk#39, cs_quantity#41, cs_net_paid#42, cr_return_quantity#48, cr_return_amount#49] -Input [7]: [cs_item_sk#39, cs_quantity#41, cs_net_paid#42, cs_sold_date_sk#44, cr_return_quantity#48, cr_return_amount#49, d_date_sk#51] +Output [5]: [cs_item_sk#36, cs_quantity#38, cs_net_paid#39, cr_return_quantity#44, cr_return_amount#45] +Input [7]: [cs_item_sk#36, cs_quantity#38, cs_net_paid#39, cs_sold_date_sk#41, cr_return_quantity#44, cr_return_amount#45, d_date_sk#47] (39) HashAggregate [codegen id : 10] -Input [5]: [cs_item_sk#39, cs_quantity#41, cs_net_paid#42, cr_return_quantity#48, cr_return_amount#49] -Keys [1]: [cs_item_sk#39] -Functions [4]: [partial_sum(coalesce(cr_return_quantity#48, 0)), partial_sum(coalesce(cs_quantity#41, 0)), partial_sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#42 as decimal(12,2)), 0.00))] -Aggregate Attributes [6]: [sum#52, sum#53, sum#54, isEmpty#55, sum#56, isEmpty#57] -Results [7]: [cs_item_sk#39, sum#58, sum#59, sum#60, isEmpty#61, sum#62, isEmpty#63] +Input [5]: [cs_item_sk#36, cs_quantity#38, cs_net_paid#39, cr_return_quantity#44, cr_return_amount#45] +Keys [1]: [cs_item_sk#36] +Functions [4]: [partial_sum(coalesce(cr_return_quantity#44, 0)), partial_sum(coalesce(cs_quantity#38, 0)), partial_sum(coalesce(cast(cr_return_amount#45 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#48, sum#49, sum#50, isEmpty#51, sum#52, isEmpty#53] +Results [7]: [cs_item_sk#36, sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#59] (40) Exchange -Input [7]: [cs_item_sk#39, sum#58, sum#59, sum#60, isEmpty#61, sum#62, isEmpty#63] -Arguments: hashpartitioning(cs_item_sk#39, 5), ENSURE_REQUIREMENTS, [id=#64] +Input [7]: [cs_item_sk#36, sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#59] +Arguments: hashpartitioning(cs_item_sk#36, 5), ENSURE_REQUIREMENTS, [plan_id=5] (41) HashAggregate [codegen id : 11] -Input [7]: [cs_item_sk#39, sum#58, sum#59, sum#60, isEmpty#61, sum#62, isEmpty#63] -Keys [1]: [cs_item_sk#39] -Functions [4]: [sum(coalesce(cr_return_quantity#48, 0)), sum(coalesce(cs_quantity#41, 0)), sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#42 as decimal(12,2)), 0.00))] -Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#48, 0))#65, sum(coalesce(cs_quantity#41, 0))#66, sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#67, sum(coalesce(cast(cs_net_paid#42 as decimal(12,2)), 0.00))#68] -Results [3]: [cs_item_sk#39 AS item#69, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#48, 0))#65 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#41, 0))#66 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#70, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#67 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#42 as decimal(12,2)), 0.00))#68 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#71] +Input [7]: [cs_item_sk#36, sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#59] +Keys [1]: [cs_item_sk#36] +Functions [4]: [sum(coalesce(cr_return_quantity#44, 0)), sum(coalesce(cs_quantity#38, 0)), sum(coalesce(cast(cr_return_amount#45 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#44, 0))#60, sum(coalesce(cs_quantity#38, 0))#61, sum(coalesce(cast(cr_return_amount#45 as decimal(12,2)), 0.00))#62, sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63] +Results [3]: [cs_item_sk#36 AS item#64, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#44, 0))#60 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#38, 0))#61 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#65, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#45 as decimal(12,2)), 0.00))#62 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#66] (42) Exchange -Input [3]: [item#69, return_ratio#70, currency_ratio#71] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#72] +Input [3]: [item#64, return_ratio#65, currency_ratio#66] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] (43) Sort [codegen id : 12] -Input [3]: [item#69, return_ratio#70, currency_ratio#71] -Arguments: [return_ratio#70 ASC NULLS FIRST], false, 0 +Input [3]: [item#64, return_ratio#65, currency_ratio#66] +Arguments: [return_ratio#65 ASC NULLS FIRST], false, 0 (44) Window -Input [3]: [item#69, return_ratio#70, currency_ratio#71] -Arguments: [rank(return_ratio#70) windowspecdefinition(return_ratio#70 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#73], [return_ratio#70 ASC NULLS FIRST] +Input [3]: [item#64, return_ratio#65, currency_ratio#66] +Arguments: [rank(return_ratio#65) windowspecdefinition(return_ratio#65 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#67], [return_ratio#65 ASC NULLS FIRST] (45) Sort [codegen id : 13] -Input [4]: [item#69, return_ratio#70, currency_ratio#71, return_rank#73] -Arguments: [currency_ratio#71 ASC NULLS FIRST], false, 0 +Input [4]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67] +Arguments: [currency_ratio#66 ASC NULLS FIRST], false, 0 (46) Window -Input [4]: [item#69, return_ratio#70, currency_ratio#71, return_rank#73] -Arguments: [rank(currency_ratio#71) windowspecdefinition(currency_ratio#71 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#74], [currency_ratio#71 ASC NULLS FIRST] +Input [4]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67] +Arguments: [rank(currency_ratio#66) windowspecdefinition(currency_ratio#66 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#68], [currency_ratio#66 ASC NULLS FIRST] (47) Filter [codegen id : 14] -Input [5]: [item#69, return_ratio#70, currency_ratio#71, return_rank#73, currency_rank#74] -Condition : ((return_rank#73 <= 10) OR (currency_rank#74 <= 10)) +Input [5]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67, currency_rank#68] +Condition : ((return_rank#67 <= 10) OR (currency_rank#68 <= 10)) (48) Project [codegen id : 14] -Output [5]: [catalog AS channel#75, item#69, return_ratio#70, return_rank#73, currency_rank#74] -Input [5]: [item#69, return_ratio#70, currency_ratio#71, return_rank#73, currency_rank#74] +Output [5]: [catalog AS channel#69, item#64, return_ratio#65, return_rank#67, currency_rank#68] +Input [5]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67, currency_rank#68] (49) Scan parquet default.store_sales -Output [6]: [ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80, ss_sold_date_sk#81] +Output [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#81), dynamicpruningexpression(ss_sold_date_sk#81 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ss_sold_date_sk#75), dynamicpruningexpression(ss_sold_date_sk#75 IN dynamicpruning#7)] PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_net_paid), IsNotNull(ss_quantity), GreaterThan(ss_net_profit,1.00), GreaterThan(ss_net_paid,0.00), GreaterThan(ss_quantity,0), IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk)] ReadSchema: struct (50) ColumnarToRow [codegen id : 15] -Input [6]: [ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80, ss_sold_date_sk#81] +Input [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75] (51) Filter [codegen id : 15] -Input [6]: [ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80, ss_sold_date_sk#81] -Condition : (((((((isnotnull(ss_net_profit#80) AND isnotnull(ss_net_paid#79)) AND isnotnull(ss_quantity#78)) AND (ss_net_profit#80 > 1.00)) AND (ss_net_paid#79 > 0.00)) AND (ss_quantity#78 > 0)) AND isnotnull(ss_ticket_number#77)) AND isnotnull(ss_item_sk#76)) +Input [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75] +Condition : (((((((isnotnull(ss_net_profit#74) AND isnotnull(ss_net_paid#73)) AND isnotnull(ss_quantity#72)) AND (ss_net_profit#74 > 1.00)) AND (ss_net_paid#73 > 0.00)) AND (ss_quantity#72 > 0)) AND isnotnull(ss_ticket_number#71)) AND isnotnull(ss_item_sk#70)) (52) Project [codegen id : 15] -Output [5]: [ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_sold_date_sk#81] -Input [6]: [ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80, ss_sold_date_sk#81] +Output [5]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_sold_date_sk#75] +Input [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75] (53) BroadcastExchange -Input [5]: [ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_sold_date_sk#81] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [id=#82] +Input [5]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_sold_date_sk#75] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=7] (54) Scan parquet default.store_returns -Output [5]: [sr_item_sk#83, sr_ticket_number#84, sr_return_quantity#85, sr_return_amt#86, sr_returned_date_sk#87] +Output [5]: [sr_item_sk#76, sr_ticket_number#77, sr_return_quantity#78, sr_return_amt#79, sr_returned_date_sk#80] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] ReadSchema: struct (55) ColumnarToRow -Input [5]: [sr_item_sk#83, sr_ticket_number#84, sr_return_quantity#85, sr_return_amt#86, sr_returned_date_sk#87] +Input [5]: [sr_item_sk#76, sr_ticket_number#77, sr_return_quantity#78, sr_return_amt#79, sr_returned_date_sk#80] (56) Filter -Input [5]: [sr_item_sk#83, sr_ticket_number#84, sr_return_quantity#85, sr_return_amt#86, sr_returned_date_sk#87] -Condition : (((isnotnull(sr_return_amt#86) AND (sr_return_amt#86 > 10000.00)) AND isnotnull(sr_ticket_number#84)) AND isnotnull(sr_item_sk#83)) +Input [5]: [sr_item_sk#76, sr_ticket_number#77, sr_return_quantity#78, sr_return_amt#79, sr_returned_date_sk#80] +Condition : (((isnotnull(sr_return_amt#79) AND (sr_return_amt#79 > 10000.00)) AND isnotnull(sr_ticket_number#77)) AND isnotnull(sr_item_sk#76)) (57) Project -Output [4]: [sr_item_sk#83, sr_ticket_number#84, sr_return_quantity#85, sr_return_amt#86] -Input [5]: [sr_item_sk#83, sr_ticket_number#84, sr_return_quantity#85, sr_return_amt#86, sr_returned_date_sk#87] +Output [4]: [sr_item_sk#76, sr_ticket_number#77, sr_return_quantity#78, sr_return_amt#79] +Input [5]: [sr_item_sk#76, sr_ticket_number#77, sr_return_quantity#78, sr_return_amt#79, sr_returned_date_sk#80] (58) BroadcastHashJoin [codegen id : 17] -Left keys [2]: [ss_ticket_number#77, ss_item_sk#76] -Right keys [2]: [sr_ticket_number#84, sr_item_sk#83] +Left keys [2]: [ss_ticket_number#71, ss_item_sk#70] +Right keys [2]: [sr_ticket_number#77, sr_item_sk#76] Join condition: None (59) Project [codegen id : 17] -Output [6]: [ss_item_sk#76, ss_quantity#78, ss_net_paid#79, ss_sold_date_sk#81, sr_return_quantity#85, sr_return_amt#86] -Input [9]: [ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_sold_date_sk#81, sr_item_sk#83, sr_ticket_number#84, sr_return_quantity#85, sr_return_amt#86] +Output [6]: [ss_item_sk#70, ss_quantity#72, ss_net_paid#73, ss_sold_date_sk#75, sr_return_quantity#78, sr_return_amt#79] +Input [9]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_sold_date_sk#75, sr_item_sk#76, sr_ticket_number#77, sr_return_quantity#78, sr_return_amt#79] (60) ReusedExchange [Reuses operator id: 82] -Output [1]: [d_date_sk#88] +Output [1]: [d_date_sk#81] (61) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ss_sold_date_sk#81] -Right keys [1]: [d_date_sk#88] +Left keys [1]: [ss_sold_date_sk#75] +Right keys [1]: [d_date_sk#81] Join condition: None (62) Project [codegen id : 17] -Output [5]: [ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#85, sr_return_amt#86] -Input [7]: [ss_item_sk#76, ss_quantity#78, ss_net_paid#79, ss_sold_date_sk#81, sr_return_quantity#85, sr_return_amt#86, d_date_sk#88] +Output [5]: [ss_item_sk#70, ss_quantity#72, ss_net_paid#73, sr_return_quantity#78, sr_return_amt#79] +Input [7]: [ss_item_sk#70, ss_quantity#72, ss_net_paid#73, ss_sold_date_sk#75, sr_return_quantity#78, sr_return_amt#79, d_date_sk#81] (63) HashAggregate [codegen id : 17] -Input [5]: [ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#85, sr_return_amt#86] -Keys [1]: [ss_item_sk#76] -Functions [4]: [partial_sum(coalesce(sr_return_quantity#85, 0)), partial_sum(coalesce(ss_quantity#78, 0)), partial_sum(coalesce(cast(sr_return_amt#86 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))] -Aggregate Attributes [6]: [sum#89, sum#90, sum#91, isEmpty#92, sum#93, isEmpty#94] -Results [7]: [ss_item_sk#76, sum#95, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Input [5]: [ss_item_sk#70, ss_quantity#72, ss_net_paid#73, sr_return_quantity#78, sr_return_amt#79] +Keys [1]: [ss_item_sk#70] +Functions [4]: [partial_sum(coalesce(sr_return_quantity#78, 0)), partial_sum(coalesce(ss_quantity#72, 0)), partial_sum(coalesce(cast(sr_return_amt#79 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#82, sum#83, sum#84, isEmpty#85, sum#86, isEmpty#87] +Results [7]: [ss_item_sk#70, sum#88, sum#89, sum#90, isEmpty#91, sum#92, isEmpty#93] (64) Exchange -Input [7]: [ss_item_sk#76, sum#95, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] -Arguments: hashpartitioning(ss_item_sk#76, 5), ENSURE_REQUIREMENTS, [id=#101] +Input [7]: [ss_item_sk#70, sum#88, sum#89, sum#90, isEmpty#91, sum#92, isEmpty#93] +Arguments: hashpartitioning(ss_item_sk#70, 5), ENSURE_REQUIREMENTS, [plan_id=8] (65) HashAggregate [codegen id : 18] -Input [7]: [ss_item_sk#76, sum#95, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] -Keys [1]: [ss_item_sk#76] -Functions [4]: [sum(coalesce(sr_return_quantity#85, 0)), sum(coalesce(ss_quantity#78, 0)), sum(coalesce(cast(sr_return_amt#86 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))] -Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#85, 0))#102, sum(coalesce(ss_quantity#78, 0))#103, sum(coalesce(cast(sr_return_amt#86 as decimal(12,2)), 0.00))#104, sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#105] -Results [3]: [ss_item_sk#76 AS item#106, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#85, 0))#102 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#78, 0))#103 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#107, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#86 as decimal(12,2)), 0.00))#104 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#105 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#108] +Input [7]: [ss_item_sk#70, sum#88, sum#89, sum#90, isEmpty#91, sum#92, isEmpty#93] +Keys [1]: [ss_item_sk#70] +Functions [4]: [sum(coalesce(sr_return_quantity#78, 0)), sum(coalesce(ss_quantity#72, 0)), sum(coalesce(cast(sr_return_amt#79 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#78, 0))#94, sum(coalesce(ss_quantity#72, 0))#95, sum(coalesce(cast(sr_return_amt#79 as decimal(12,2)), 0.00))#96, sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97] +Results [3]: [ss_item_sk#70 AS item#98, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#78, 0))#94 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#72, 0))#95 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#99, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#79 as decimal(12,2)), 0.00))#96 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#100] (66) Exchange -Input [3]: [item#106, return_ratio#107, currency_ratio#108] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#109] +Input [3]: [item#98, return_ratio#99, currency_ratio#100] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9] (67) Sort [codegen id : 19] -Input [3]: [item#106, return_ratio#107, currency_ratio#108] -Arguments: [return_ratio#107 ASC NULLS FIRST], false, 0 +Input [3]: [item#98, return_ratio#99, currency_ratio#100] +Arguments: [return_ratio#99 ASC NULLS FIRST], false, 0 (68) Window -Input [3]: [item#106, return_ratio#107, currency_ratio#108] -Arguments: [rank(return_ratio#107) windowspecdefinition(return_ratio#107 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#110], [return_ratio#107 ASC NULLS FIRST] +Input [3]: [item#98, return_ratio#99, currency_ratio#100] +Arguments: [rank(return_ratio#99) windowspecdefinition(return_ratio#99 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#101], [return_ratio#99 ASC NULLS FIRST] (69) Sort [codegen id : 20] -Input [4]: [item#106, return_ratio#107, currency_ratio#108, return_rank#110] -Arguments: [currency_ratio#108 ASC NULLS FIRST], false, 0 +Input [4]: [item#98, return_ratio#99, currency_ratio#100, return_rank#101] +Arguments: [currency_ratio#100 ASC NULLS FIRST], false, 0 (70) Window -Input [4]: [item#106, return_ratio#107, currency_ratio#108, return_rank#110] -Arguments: [rank(currency_ratio#108) windowspecdefinition(currency_ratio#108 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#111], [currency_ratio#108 ASC NULLS FIRST] +Input [4]: [item#98, return_ratio#99, currency_ratio#100, return_rank#101] +Arguments: [rank(currency_ratio#100) windowspecdefinition(currency_ratio#100 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#102], [currency_ratio#100 ASC NULLS FIRST] (71) Filter [codegen id : 21] -Input [5]: [item#106, return_ratio#107, currency_ratio#108, return_rank#110, currency_rank#111] -Condition : ((return_rank#110 <= 10) OR (currency_rank#111 <= 10)) +Input [5]: [item#98, return_ratio#99, currency_ratio#100, return_rank#101, currency_rank#102] +Condition : ((return_rank#101 <= 10) OR (currency_rank#102 <= 10)) (72) Project [codegen id : 21] -Output [5]: [store AS channel#112, item#106, return_ratio#107, return_rank#110, currency_rank#111] -Input [5]: [item#106, return_ratio#107, currency_ratio#108, return_rank#110, currency_rank#111] +Output [5]: [store AS channel#103, item#98, return_ratio#99, return_rank#101, currency_rank#102] +Input [5]: [item#98, return_ratio#99, currency_ratio#100, return_rank#101, currency_rank#102] (73) Union (74) HashAggregate [codegen id : 22] -Input [5]: [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] -Keys [5]: [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] +Input [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Keys [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] +Results [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] (75) Exchange -Input [5]: [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] -Arguments: hashpartitioning(channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37, 5), ENSURE_REQUIREMENTS, [id=#113] +Input [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Arguments: hashpartitioning(channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34, 5), ENSURE_REQUIREMENTS, [plan_id=10] (76) HashAggregate [codegen id : 23] -Input [5]: [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] -Keys [5]: [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] +Input [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Keys [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] +Results [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] (77) TakeOrderedAndProject -Input [5]: [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] -Arguments: 100, [channel#38 ASC NULLS FIRST, return_rank#36 ASC NULLS FIRST, currency_rank#37 ASC NULLS FIRST], [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] +Input [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Arguments: 100, [channel#35 ASC NULLS FIRST, return_rank#33 ASC NULLS FIRST, currency_rank#34 ASC NULLS FIRST], [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] ===== Subqueries ===== @@ -437,29 +437,29 @@ BroadcastExchange (82) (78) Scan parquet default.date_dim -Output [3]: [d_date_sk#14, d_year#114, d_moy#115] +Output [3]: [d_date_sk#13, d_year#104, d_moy#105] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)] ReadSchema: struct (79) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#14, d_year#114, d_moy#115] +Input [3]: [d_date_sk#13, d_year#104, d_moy#105] (80) Filter [codegen id : 1] -Input [3]: [d_date_sk#14, d_year#114, d_moy#115] -Condition : ((((isnotnull(d_year#114) AND isnotnull(d_moy#115)) AND (d_year#114 = 2001)) AND (d_moy#115 = 12)) AND isnotnull(d_date_sk#14)) +Input [3]: [d_date_sk#13, d_year#104, d_moy#105] +Condition : ((((isnotnull(d_year#104) AND isnotnull(d_moy#105)) AND (d_year#104 = 2001)) AND (d_moy#105 = 12)) AND isnotnull(d_date_sk#13)) (81) Project [codegen id : 1] -Output [1]: [d_date_sk#14] -Input [3]: [d_date_sk#14, d_year#114, d_moy#115] +Output [1]: [d_date_sk#13] +Input [3]: [d_date_sk#13, d_year#104, d_moy#105] (82) BroadcastExchange -Input [1]: [d_date_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#116] +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11] -Subquery:2 Hosting operator id = 25 Hosting Expression = cs_sold_date_sk#44 IN dynamicpruning#7 +Subquery:2 Hosting operator id = 25 Hosting Expression = cs_sold_date_sk#41 IN dynamicpruning#7 -Subquery:3 Hosting operator id = 49 Hosting Expression = ss_sold_date_sk#81 IN dynamicpruning#7 +Subquery:3 Hosting operator id = 49 Hosting Expression = ss_sold_date_sk#75 IN dynamicpruning#7 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt index 29a88fbab1b3c..08dff69c2d637 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt @@ -134,7 +134,7 @@ Condition : isnotnull(s_store_sk#22) (13) BroadcastExchange Input [2]: [s_store_sk#22, s_store_id#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (14) BroadcastHashJoin [codegen id : 5] Left keys [1]: [store_sk#6] @@ -146,290 +146,290 @@ Output [6]: [date_sk#7, sales_price#8, profit#9, return_amt#10, net_loss#11, s_s Input [8]: [store_sk#6, date_sk#7, sales_price#8, profit#9, return_amt#10, net_loss#11, s_store_sk#22, s_store_id#23] (16) ReusedExchange [Reuses operator id: 82] -Output [1]: [d_date_sk#25] +Output [1]: [d_date_sk#24] (17) BroadcastHashJoin [codegen id : 5] Left keys [1]: [date_sk#7] -Right keys [1]: [d_date_sk#25] +Right keys [1]: [d_date_sk#24] Join condition: None (18) Project [codegen id : 5] Output [5]: [sales_price#8, profit#9, return_amt#10, net_loss#11, s_store_id#23] -Input [7]: [date_sk#7, sales_price#8, profit#9, return_amt#10, net_loss#11, s_store_id#23, d_date_sk#25] +Input [7]: [date_sk#7, sales_price#8, profit#9, return_amt#10, net_loss#11, s_store_id#23, d_date_sk#24] (19) HashAggregate [codegen id : 5] Input [5]: [sales_price#8, profit#9, return_amt#10, net_loss#11, s_store_id#23] Keys [1]: [s_store_id#23] Functions [4]: [partial_sum(UnscaledValue(sales_price#8)), partial_sum(UnscaledValue(return_amt#10)), partial_sum(UnscaledValue(profit#9)), partial_sum(UnscaledValue(net_loss#11))] -Aggregate Attributes [4]: [sum#26, sum#27, sum#28, sum#29] -Results [5]: [s_store_id#23, sum#30, sum#31, sum#32, sum#33] +Aggregate Attributes [4]: [sum#25, sum#26, sum#27, sum#28] +Results [5]: [s_store_id#23, sum#29, sum#30, sum#31, sum#32] (20) Exchange -Input [5]: [s_store_id#23, sum#30, sum#31, sum#32, sum#33] -Arguments: hashpartitioning(s_store_id#23, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [5]: [s_store_id#23, sum#29, sum#30, sum#31, sum#32] +Arguments: hashpartitioning(s_store_id#23, 5), ENSURE_REQUIREMENTS, [plan_id=2] (21) HashAggregate [codegen id : 6] -Input [5]: [s_store_id#23, sum#30, sum#31, sum#32, sum#33] +Input [5]: [s_store_id#23, sum#29, sum#30, sum#31, sum#32] Keys [1]: [s_store_id#23] Functions [4]: [sum(UnscaledValue(sales_price#8)), sum(UnscaledValue(return_amt#10)), sum(UnscaledValue(profit#9)), sum(UnscaledValue(net_loss#11))] -Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#8))#35, sum(UnscaledValue(return_amt#10))#36, sum(UnscaledValue(profit#9))#37, sum(UnscaledValue(net_loss#11))#38] -Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#8))#35,17,2) AS sales#39, MakeDecimal(sum(UnscaledValue(return_amt#10))#36,17,2) AS returns#40, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#37,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#38,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#41, store channel AS channel#42, concat(store, s_store_id#23) AS id#43] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#8))#33, sum(UnscaledValue(return_amt#10))#34, sum(UnscaledValue(profit#9))#35, sum(UnscaledValue(net_loss#11))#36] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#8))#33,17,2) AS sales#37, MakeDecimal(sum(UnscaledValue(return_amt#10))#34,17,2) AS returns#38, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#35,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#36,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#39, store channel AS channel#40, concat(store, s_store_id#23) AS id#41] (22) Scan parquet default.catalog_sales -Output [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] +Output [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#47), dynamicpruningexpression(cs_sold_date_sk#47 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#45), dynamicpruningexpression(cs_sold_date_sk#45 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cs_catalog_page_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 7] -Input [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] (24) Filter [codegen id : 7] -Input [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] -Condition : isnotnull(cs_catalog_page_sk#44) +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Condition : isnotnull(cs_catalog_page_sk#42) (25) Project [codegen id : 7] -Output [6]: [cs_catalog_page_sk#44 AS page_sk#48, cs_sold_date_sk#47 AS date_sk#49, cs_ext_sales_price#45 AS sales_price#50, cs_net_profit#46 AS profit#51, 0.00 AS return_amt#52, 0.00 AS net_loss#53] -Input [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] +Output [6]: [cs_catalog_page_sk#42 AS page_sk#46, cs_sold_date_sk#45 AS date_sk#47, cs_ext_sales_price#43 AS sales_price#48, cs_net_profit#44 AS profit#49, 0.00 AS return_amt#50, 0.00 AS net_loss#51] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] (26) Scan parquet default.catalog_returns -Output [4]: [cr_catalog_page_sk#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Output [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cr_returned_date_sk#57), dynamicpruningexpression(cr_returned_date_sk#57 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cr_returned_date_sk#55), dynamicpruningexpression(cr_returned_date_sk#55 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cr_catalog_page_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 8] -Input [4]: [cr_catalog_page_sk#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] (28) Filter [codegen id : 8] -Input [4]: [cr_catalog_page_sk#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] -Condition : isnotnull(cr_catalog_page_sk#54) +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] +Condition : isnotnull(cr_catalog_page_sk#52) (29) Project [codegen id : 8] -Output [6]: [cr_catalog_page_sk#54 AS page_sk#58, cr_returned_date_sk#57 AS date_sk#59, 0.00 AS sales_price#60, 0.00 AS profit#61, cr_return_amount#55 AS return_amt#62, cr_net_loss#56 AS net_loss#63] -Input [4]: [cr_catalog_page_sk#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Output [6]: [cr_catalog_page_sk#52 AS page_sk#56, cr_returned_date_sk#55 AS date_sk#57, 0.00 AS sales_price#58, 0.00 AS profit#59, cr_return_amount#53 AS return_amt#60, cr_net_loss#54 AS net_loss#61] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] (30) Union (31) Scan parquet default.catalog_page -Output [2]: [cp_catalog_page_sk#64, cp_catalog_page_id#65] +Output [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_page] PushedFilters: [IsNotNull(cp_catalog_page_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 9] -Input [2]: [cp_catalog_page_sk#64, cp_catalog_page_id#65] +Input [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63] (33) Filter [codegen id : 9] -Input [2]: [cp_catalog_page_sk#64, cp_catalog_page_id#65] -Condition : isnotnull(cp_catalog_page_sk#64) +Input [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63] +Condition : isnotnull(cp_catalog_page_sk#62) (34) BroadcastExchange -Input [2]: [cp_catalog_page_sk#64, cp_catalog_page_id#65] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#66] +Input [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (35) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [page_sk#48] -Right keys [1]: [cp_catalog_page_sk#64] +Left keys [1]: [page_sk#46] +Right keys [1]: [cp_catalog_page_sk#62] Join condition: None (36) Project [codegen id : 11] -Output [6]: [date_sk#49, sales_price#50, profit#51, return_amt#52, net_loss#53, cp_catalog_page_id#65] -Input [8]: [page_sk#48, date_sk#49, sales_price#50, profit#51, return_amt#52, net_loss#53, cp_catalog_page_sk#64, cp_catalog_page_id#65] +Output [6]: [date_sk#47, sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#63] +Input [8]: [page_sk#46, date_sk#47, sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_sk#62, cp_catalog_page_id#63] (37) ReusedExchange [Reuses operator id: 82] -Output [1]: [d_date_sk#67] +Output [1]: [d_date_sk#64] (38) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [date_sk#49] -Right keys [1]: [d_date_sk#67] +Left keys [1]: [date_sk#47] +Right keys [1]: [d_date_sk#64] Join condition: None (39) Project [codegen id : 11] -Output [5]: [sales_price#50, profit#51, return_amt#52, net_loss#53, cp_catalog_page_id#65] -Input [7]: [date_sk#49, sales_price#50, profit#51, return_amt#52, net_loss#53, cp_catalog_page_id#65, d_date_sk#67] +Output [5]: [sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#63] +Input [7]: [date_sk#47, sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#63, d_date_sk#64] (40) HashAggregate [codegen id : 11] -Input [5]: [sales_price#50, profit#51, return_amt#52, net_loss#53, cp_catalog_page_id#65] -Keys [1]: [cp_catalog_page_id#65] -Functions [4]: [partial_sum(UnscaledValue(sales_price#50)), partial_sum(UnscaledValue(return_amt#52)), partial_sum(UnscaledValue(profit#51)), partial_sum(UnscaledValue(net_loss#53))] -Aggregate Attributes [4]: [sum#68, sum#69, sum#70, sum#71] -Results [5]: [cp_catalog_page_id#65, sum#72, sum#73, sum#74, sum#75] +Input [5]: [sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#63] +Keys [1]: [cp_catalog_page_id#63] +Functions [4]: [partial_sum(UnscaledValue(sales_price#48)), partial_sum(UnscaledValue(return_amt#50)), partial_sum(UnscaledValue(profit#49)), partial_sum(UnscaledValue(net_loss#51))] +Aggregate Attributes [4]: [sum#65, sum#66, sum#67, sum#68] +Results [5]: [cp_catalog_page_id#63, sum#69, sum#70, sum#71, sum#72] (41) Exchange -Input [5]: [cp_catalog_page_id#65, sum#72, sum#73, sum#74, sum#75] -Arguments: hashpartitioning(cp_catalog_page_id#65, 5), ENSURE_REQUIREMENTS, [id=#76] +Input [5]: [cp_catalog_page_id#63, sum#69, sum#70, sum#71, sum#72] +Arguments: hashpartitioning(cp_catalog_page_id#63, 5), ENSURE_REQUIREMENTS, [plan_id=4] (42) HashAggregate [codegen id : 12] -Input [5]: [cp_catalog_page_id#65, sum#72, sum#73, sum#74, sum#75] -Keys [1]: [cp_catalog_page_id#65] -Functions [4]: [sum(UnscaledValue(sales_price#50)), sum(UnscaledValue(return_amt#52)), sum(UnscaledValue(profit#51)), sum(UnscaledValue(net_loss#53))] -Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#50))#77, sum(UnscaledValue(return_amt#52))#78, sum(UnscaledValue(profit#51))#79, sum(UnscaledValue(net_loss#53))#80] -Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#50))#77,17,2) AS sales#81, MakeDecimal(sum(UnscaledValue(return_amt#52))#78,17,2) AS returns#82, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#51))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#53))#80,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#83, catalog channel AS channel#84, concat(catalog_page, cp_catalog_page_id#65) AS id#85] +Input [5]: [cp_catalog_page_id#63, sum#69, sum#70, sum#71, sum#72] +Keys [1]: [cp_catalog_page_id#63] +Functions [4]: [sum(UnscaledValue(sales_price#48)), sum(UnscaledValue(return_amt#50)), sum(UnscaledValue(profit#49)), sum(UnscaledValue(net_loss#51))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#48))#73, sum(UnscaledValue(return_amt#50))#74, sum(UnscaledValue(profit#49))#75, sum(UnscaledValue(net_loss#51))#76] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#48))#73,17,2) AS sales#77, MakeDecimal(sum(UnscaledValue(return_amt#50))#74,17,2) AS returns#78, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#49))#75,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#51))#76,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#79, catalog channel AS channel#80, concat(catalog_page, cp_catalog_page_id#63) AS id#81] (43) Scan parquet default.web_sales -Output [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89] +Output [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#89), dynamicpruningexpression(ws_sold_date_sk#89 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#85), dynamicpruningexpression(ws_sold_date_sk#85 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_web_site_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 13] -Input [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] (45) Filter [codegen id : 13] -Input [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89] -Condition : isnotnull(ws_web_site_sk#86) +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] +Condition : isnotnull(ws_web_site_sk#82) (46) Project [codegen id : 13] -Output [6]: [ws_web_site_sk#86 AS wsr_web_site_sk#90, ws_sold_date_sk#89 AS date_sk#91, ws_ext_sales_price#87 AS sales_price#92, ws_net_profit#88 AS profit#93, 0.00 AS return_amt#94, 0.00 AS net_loss#95] -Input [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89] +Output [6]: [ws_web_site_sk#82 AS wsr_web_site_sk#86, ws_sold_date_sk#85 AS date_sk#87, ws_ext_sales_price#83 AS sales_price#88, ws_net_profit#84 AS profit#89, 0.00 AS return_amt#90, 0.00 AS net_loss#91] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] (47) Scan parquet default.web_returns -Output [5]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100] +Output [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(wr_returned_date_sk#100), dynamicpruningexpression(wr_returned_date_sk#100 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(wr_returned_date_sk#96), dynamicpruningexpression(wr_returned_date_sk#96 IN dynamicpruning#5)] ReadSchema: struct (48) ColumnarToRow [codegen id : 14] -Input [5]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100] +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] (49) Exchange -Input [5]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100] -Arguments: hashpartitioning(wr_item_sk#96, wr_order_number#97, 5), ENSURE_REQUIREMENTS, [id=#101] +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Arguments: hashpartitioning(wr_item_sk#92, wr_order_number#93, 5), ENSURE_REQUIREMENTS, [plan_id=5] (50) Sort [codegen id : 15] -Input [5]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100] -Arguments: [wr_item_sk#96 ASC NULLS FIRST, wr_order_number#97 ASC NULLS FIRST], false, 0 +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Arguments: [wr_item_sk#92 ASC NULLS FIRST, wr_order_number#93 ASC NULLS FIRST], false, 0 (51) Scan parquet default.web_sales -Output [4]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104, ws_sold_date_sk#105] +Output [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] ReadSchema: struct (52) ColumnarToRow [codegen id : 16] -Input [4]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104, ws_sold_date_sk#105] +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] (53) Filter [codegen id : 16] -Input [4]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104, ws_sold_date_sk#105] -Condition : ((isnotnull(ws_item_sk#102) AND isnotnull(ws_order_number#104)) AND isnotnull(ws_web_site_sk#103)) +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] +Condition : ((isnotnull(ws_item_sk#97) AND isnotnull(ws_order_number#99)) AND isnotnull(ws_web_site_sk#98)) (54) Project [codegen id : 16] -Output [3]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104] -Input [4]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104, ws_sold_date_sk#105] +Output [3]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] (55) Exchange -Input [3]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104] -Arguments: hashpartitioning(ws_item_sk#102, ws_order_number#104, 5), ENSURE_REQUIREMENTS, [id=#106] +Input [3]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] +Arguments: hashpartitioning(ws_item_sk#97, ws_order_number#99, 5), ENSURE_REQUIREMENTS, [plan_id=6] (56) Sort [codegen id : 17] -Input [3]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104] -Arguments: [ws_item_sk#102 ASC NULLS FIRST, ws_order_number#104 ASC NULLS FIRST], false, 0 +Input [3]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] +Arguments: [ws_item_sk#97 ASC NULLS FIRST, ws_order_number#99 ASC NULLS FIRST], false, 0 (57) SortMergeJoin [codegen id : 18] -Left keys [2]: [wr_item_sk#96, wr_order_number#97] -Right keys [2]: [ws_item_sk#102, ws_order_number#104] +Left keys [2]: [wr_item_sk#92, wr_order_number#93] +Right keys [2]: [ws_item_sk#97, ws_order_number#99] Join condition: None (58) Project [codegen id : 18] -Output [6]: [ws_web_site_sk#103 AS wsr_web_site_sk#107, wr_returned_date_sk#100 AS date_sk#108, 0.00 AS sales_price#109, 0.00 AS profit#110, wr_return_amt#98 AS return_amt#111, wr_net_loss#99 AS net_loss#112] -Input [8]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100, ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104] +Output [6]: [ws_web_site_sk#98 AS wsr_web_site_sk#101, wr_returned_date_sk#96 AS date_sk#102, 0.00 AS sales_price#103, 0.00 AS profit#104, wr_return_amt#94 AS return_amt#105, wr_net_loss#95 AS net_loss#106] +Input [8]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96, ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] (59) Union (60) Scan parquet default.web_site -Output [2]: [web_site_sk#113, web_site_id#114] +Output [2]: [web_site_sk#107, web_site_id#108] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_site_sk)] ReadSchema: struct (61) ColumnarToRow [codegen id : 19] -Input [2]: [web_site_sk#113, web_site_id#114] +Input [2]: [web_site_sk#107, web_site_id#108] (62) Filter [codegen id : 19] -Input [2]: [web_site_sk#113, web_site_id#114] -Condition : isnotnull(web_site_sk#113) +Input [2]: [web_site_sk#107, web_site_id#108] +Condition : isnotnull(web_site_sk#107) (63) BroadcastExchange -Input [2]: [web_site_sk#113, web_site_id#114] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#115] +Input [2]: [web_site_sk#107, web_site_id#108] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] (64) BroadcastHashJoin [codegen id : 21] -Left keys [1]: [wsr_web_site_sk#90] -Right keys [1]: [web_site_sk#113] +Left keys [1]: [wsr_web_site_sk#86] +Right keys [1]: [web_site_sk#107] Join condition: None (65) Project [codegen id : 21] -Output [6]: [date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#114] -Input [8]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#113, web_site_id#114] +Output [6]: [date_sk#87, sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#108] +Input [8]: [wsr_web_site_sk#86, date_sk#87, sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_sk#107, web_site_id#108] (66) ReusedExchange [Reuses operator id: 82] -Output [1]: [d_date_sk#116] +Output [1]: [d_date_sk#109] (67) BroadcastHashJoin [codegen id : 21] -Left keys [1]: [date_sk#91] -Right keys [1]: [d_date_sk#116] +Left keys [1]: [date_sk#87] +Right keys [1]: [d_date_sk#109] Join condition: None (68) Project [codegen id : 21] -Output [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#114] -Input [7]: [date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#114, d_date_sk#116] +Output [5]: [sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#108] +Input [7]: [date_sk#87, sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#108, d_date_sk#109] (69) HashAggregate [codegen id : 21] -Input [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#114] -Keys [1]: [web_site_id#114] -Functions [4]: [partial_sum(UnscaledValue(sales_price#92)), partial_sum(UnscaledValue(return_amt#94)), partial_sum(UnscaledValue(profit#93)), partial_sum(UnscaledValue(net_loss#95))] -Aggregate Attributes [4]: [sum#117, sum#118, sum#119, sum#120] -Results [5]: [web_site_id#114, sum#121, sum#122, sum#123, sum#124] +Input [5]: [sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#108] +Keys [1]: [web_site_id#108] +Functions [4]: [partial_sum(UnscaledValue(sales_price#88)), partial_sum(UnscaledValue(return_amt#90)), partial_sum(UnscaledValue(profit#89)), partial_sum(UnscaledValue(net_loss#91))] +Aggregate Attributes [4]: [sum#110, sum#111, sum#112, sum#113] +Results [5]: [web_site_id#108, sum#114, sum#115, sum#116, sum#117] (70) Exchange -Input [5]: [web_site_id#114, sum#121, sum#122, sum#123, sum#124] -Arguments: hashpartitioning(web_site_id#114, 5), ENSURE_REQUIREMENTS, [id=#125] +Input [5]: [web_site_id#108, sum#114, sum#115, sum#116, sum#117] +Arguments: hashpartitioning(web_site_id#108, 5), ENSURE_REQUIREMENTS, [plan_id=8] (71) HashAggregate [codegen id : 22] -Input [5]: [web_site_id#114, sum#121, sum#122, sum#123, sum#124] -Keys [1]: [web_site_id#114] -Functions [4]: [sum(UnscaledValue(sales_price#92)), sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)), sum(UnscaledValue(net_loss#95))] -Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#126, sum(UnscaledValue(return_amt#94))#127, sum(UnscaledValue(profit#93))#128, sum(UnscaledValue(net_loss#95))#129] -Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#126,17,2) AS sales#130, MakeDecimal(sum(UnscaledValue(return_amt#94))#127,17,2) AS returns#131, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#128,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#129,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#132, web channel AS channel#133, concat(web_site, web_site_id#114) AS id#134] +Input [5]: [web_site_id#108, sum#114, sum#115, sum#116, sum#117] +Keys [1]: [web_site_id#108] +Functions [4]: [sum(UnscaledValue(sales_price#88)), sum(UnscaledValue(return_amt#90)), sum(UnscaledValue(profit#89)), sum(UnscaledValue(net_loss#91))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#88))#118, sum(UnscaledValue(return_amt#90))#119, sum(UnscaledValue(profit#89))#120, sum(UnscaledValue(net_loss#91))#121] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#122, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#123, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#124, web channel AS channel#125, concat(web_site, web_site_id#108) AS id#126] (72) Union (73) Expand [codegen id : 23] -Input [5]: [sales#39, returns#40, profit#41, channel#42, id#43] -Arguments: [[sales#39, returns#40, profit#41, channel#42, id#43, 0], [sales#39, returns#40, profit#41, channel#42, null, 1], [sales#39, returns#40, profit#41, null, null, 3]], [sales#39, returns#40, profit#41, channel#135, id#136, spark_grouping_id#137] +Input [5]: [sales#37, returns#38, profit#39, channel#40, id#41] +Arguments: [[sales#37, returns#38, profit#39, channel#40, id#41, 0], [sales#37, returns#38, profit#39, channel#40, null, 1], [sales#37, returns#38, profit#39, null, null, 3]], [sales#37, returns#38, profit#39, channel#127, id#128, spark_grouping_id#129] (74) HashAggregate [codegen id : 23] -Input [6]: [sales#39, returns#40, profit#41, channel#135, id#136, spark_grouping_id#137] -Keys [3]: [channel#135, id#136, spark_grouping_id#137] -Functions [3]: [partial_sum(sales#39), partial_sum(returns#40), partial_sum(profit#41)] -Aggregate Attributes [6]: [sum#138, isEmpty#139, sum#140, isEmpty#141, sum#142, isEmpty#143] -Results [9]: [channel#135, id#136, spark_grouping_id#137, sum#144, isEmpty#145, sum#146, isEmpty#147, sum#148, isEmpty#149] +Input [6]: [sales#37, returns#38, profit#39, channel#127, id#128, spark_grouping_id#129] +Keys [3]: [channel#127, id#128, spark_grouping_id#129] +Functions [3]: [partial_sum(sales#37), partial_sum(returns#38), partial_sum(profit#39)] +Aggregate Attributes [6]: [sum#130, isEmpty#131, sum#132, isEmpty#133, sum#134, isEmpty#135] +Results [9]: [channel#127, id#128, spark_grouping_id#129, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] (75) Exchange -Input [9]: [channel#135, id#136, spark_grouping_id#137, sum#144, isEmpty#145, sum#146, isEmpty#147, sum#148, isEmpty#149] -Arguments: hashpartitioning(channel#135, id#136, spark_grouping_id#137, 5), ENSURE_REQUIREMENTS, [id=#150] +Input [9]: [channel#127, id#128, spark_grouping_id#129, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] +Arguments: hashpartitioning(channel#127, id#128, spark_grouping_id#129, 5), ENSURE_REQUIREMENTS, [plan_id=9] (76) HashAggregate [codegen id : 24] -Input [9]: [channel#135, id#136, spark_grouping_id#137, sum#144, isEmpty#145, sum#146, isEmpty#147, sum#148, isEmpty#149] -Keys [3]: [channel#135, id#136, spark_grouping_id#137] -Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] -Aggregate Attributes [3]: [sum(sales#39)#151, sum(returns#40)#152, sum(profit#41)#153] -Results [5]: [channel#135, id#136, sum(sales#39)#151 AS sales#154, sum(returns#40)#152 AS returns#155, sum(profit#41)#153 AS profit#156] +Input [9]: [channel#127, id#128, spark_grouping_id#129, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] +Keys [3]: [channel#127, id#128, spark_grouping_id#129] +Functions [3]: [sum(sales#37), sum(returns#38), sum(profit#39)] +Aggregate Attributes [3]: [sum(sales#37)#142, sum(returns#38)#143, sum(profit#39)#144] +Results [5]: [channel#127, id#128, sum(sales#37)#142 AS sales#145, sum(returns#38)#143 AS returns#146, sum(profit#39)#144 AS profit#147] (77) TakeOrderedAndProject -Input [5]: [channel#135, id#136, sales#154, returns#155, profit#156] -Arguments: 100, [channel#135 ASC NULLS FIRST, id#136 ASC NULLS FIRST], [channel#135, id#136, sales#154, returns#155, profit#156] +Input [5]: [channel#127, id#128, sales#145, returns#146, profit#147] +Arguments: 100, [channel#127 ASC NULLS FIRST, id#128 ASC NULLS FIRST], [channel#127, id#128, sales#145, returns#146, profit#147] ===== Subqueries ===== @@ -442,35 +442,35 @@ BroadcastExchange (82) (78) Scan parquet default.date_dim -Output [2]: [d_date_sk#25, d_date#157] +Output [2]: [d_date_sk#24, d_date#148] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), IsNotNull(d_date_sk)] ReadSchema: struct (79) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#25, d_date#157] +Input [2]: [d_date_sk#24, d_date#148] (80) Filter [codegen id : 1] -Input [2]: [d_date_sk#25, d_date#157] -Condition : (((isnotnull(d_date#157) AND (d_date#157 >= 2000-08-23)) AND (d_date#157 <= 2000-09-06)) AND isnotnull(d_date_sk#25)) +Input [2]: [d_date_sk#24, d_date#148] +Condition : (((isnotnull(d_date#148) AND (d_date#148 >= 2000-08-23)) AND (d_date#148 <= 2000-09-06)) AND isnotnull(d_date_sk#24)) (81) Project [codegen id : 1] -Output [1]: [d_date_sk#25] -Input [2]: [d_date_sk#25, d_date#157] +Output [1]: [d_date_sk#24] +Input [2]: [d_date_sk#24, d_date#148] (82) BroadcastExchange -Input [1]: [d_date_sk#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#158] +Input [1]: [d_date_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] Subquery:2 Hosting operator id = 5 Hosting Expression = sr_returned_date_sk#15 IN dynamicpruning#5 -Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#47 IN dynamicpruning#5 +Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#45 IN dynamicpruning#5 -Subquery:4 Hosting operator id = 26 Hosting Expression = cr_returned_date_sk#57 IN dynamicpruning#5 +Subquery:4 Hosting operator id = 26 Hosting Expression = cr_returned_date_sk#55 IN dynamicpruning#5 -Subquery:5 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#89 IN dynamicpruning#5 +Subquery:5 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#85 IN dynamicpruning#5 -Subquery:6 Hosting operator id = 47 Hosting Expression = wr_returned_date_sk#100 IN dynamicpruning#5 +Subquery:6 Hosting operator id = 47 Hosting Expression = wr_returned_date_sk#96 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt index a9e5929f70b54..ad669da0de9e3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt @@ -143,7 +143,7 @@ Condition : isnotnull(s_store_sk#23) (16) BroadcastExchange Input [2]: [s_store_sk#23, s_store_id#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (17) BroadcastHashJoin [codegen id : 5] Left keys [1]: [store_sk#6] @@ -158,263 +158,263 @@ Input [7]: [store_sk#6, sales_price#8, profit#9, return_amt#10, net_loss#11, s_s Input [5]: [sales_price#8, profit#9, return_amt#10, net_loss#11, s_store_id#24] Keys [1]: [s_store_id#24] Functions [4]: [partial_sum(UnscaledValue(sales_price#8)), partial_sum(UnscaledValue(return_amt#10)), partial_sum(UnscaledValue(profit#9)), partial_sum(UnscaledValue(net_loss#11))] -Aggregate Attributes [4]: [sum#26, sum#27, sum#28, sum#29] -Results [5]: [s_store_id#24, sum#30, sum#31, sum#32, sum#33] +Aggregate Attributes [4]: [sum#25, sum#26, sum#27, sum#28] +Results [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] (20) Exchange -Input [5]: [s_store_id#24, sum#30, sum#31, sum#32, sum#33] -Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] +Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [plan_id=2] (21) HashAggregate [codegen id : 6] -Input [5]: [s_store_id#24, sum#30, sum#31, sum#32, sum#33] +Input [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] Keys [1]: [s_store_id#24] Functions [4]: [sum(UnscaledValue(sales_price#8)), sum(UnscaledValue(return_amt#10)), sum(UnscaledValue(profit#9)), sum(UnscaledValue(net_loss#11))] -Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#8))#35, sum(UnscaledValue(return_amt#10))#36, sum(UnscaledValue(profit#9))#37, sum(UnscaledValue(net_loss#11))#38] -Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#8))#35,17,2) AS sales#39, MakeDecimal(sum(UnscaledValue(return_amt#10))#36,17,2) AS returns#40, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#37,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#38,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#41, store channel AS channel#42, concat(store, s_store_id#24) AS id#43] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#8))#33, sum(UnscaledValue(return_amt#10))#34, sum(UnscaledValue(profit#9))#35, sum(UnscaledValue(net_loss#11))#36] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#8))#33,17,2) AS sales#37, MakeDecimal(sum(UnscaledValue(return_amt#10))#34,17,2) AS returns#38, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#35,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#36,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#39, store channel AS channel#40, concat(store, s_store_id#24) AS id#41] (22) Scan parquet default.catalog_sales -Output [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] +Output [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#47), dynamicpruningexpression(cs_sold_date_sk#47 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#45), dynamicpruningexpression(cs_sold_date_sk#45 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cs_catalog_page_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 7] -Input [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] (24) Filter [codegen id : 7] -Input [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] -Condition : isnotnull(cs_catalog_page_sk#44) +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Condition : isnotnull(cs_catalog_page_sk#42) (25) Project [codegen id : 7] -Output [6]: [cs_catalog_page_sk#44 AS page_sk#48, cs_sold_date_sk#47 AS date_sk#49, cs_ext_sales_price#45 AS sales_price#50, cs_net_profit#46 AS profit#51, 0.00 AS return_amt#52, 0.00 AS net_loss#53] -Input [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] +Output [6]: [cs_catalog_page_sk#42 AS page_sk#46, cs_sold_date_sk#45 AS date_sk#47, cs_ext_sales_price#43 AS sales_price#48, cs_net_profit#44 AS profit#49, 0.00 AS return_amt#50, 0.00 AS net_loss#51] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] (26) Scan parquet default.catalog_returns -Output [4]: [cr_catalog_page_sk#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Output [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cr_returned_date_sk#57), dynamicpruningexpression(cr_returned_date_sk#57 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cr_returned_date_sk#55), dynamicpruningexpression(cr_returned_date_sk#55 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cr_catalog_page_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 8] -Input [4]: [cr_catalog_page_sk#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] (28) Filter [codegen id : 8] -Input [4]: [cr_catalog_page_sk#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] -Condition : isnotnull(cr_catalog_page_sk#54) +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] +Condition : isnotnull(cr_catalog_page_sk#52) (29) Project [codegen id : 8] -Output [6]: [cr_catalog_page_sk#54 AS page_sk#58, cr_returned_date_sk#57 AS date_sk#59, 0.00 AS sales_price#60, 0.00 AS profit#61, cr_return_amount#55 AS return_amt#62, cr_net_loss#56 AS net_loss#63] -Input [4]: [cr_catalog_page_sk#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Output [6]: [cr_catalog_page_sk#52 AS page_sk#56, cr_returned_date_sk#55 AS date_sk#57, 0.00 AS sales_price#58, 0.00 AS profit#59, cr_return_amount#53 AS return_amt#60, cr_net_loss#54 AS net_loss#61] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] (30) Union (31) ReusedExchange [Reuses operator id: 79] -Output [1]: [d_date_sk#64] +Output [1]: [d_date_sk#62] (32) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [date_sk#49] -Right keys [1]: [d_date_sk#64] +Left keys [1]: [date_sk#47] +Right keys [1]: [d_date_sk#62] Join condition: None (33) Project [codegen id : 11] -Output [5]: [page_sk#48, sales_price#50, profit#51, return_amt#52, net_loss#53] -Input [7]: [page_sk#48, date_sk#49, sales_price#50, profit#51, return_amt#52, net_loss#53, d_date_sk#64] +Output [5]: [page_sk#46, sales_price#48, profit#49, return_amt#50, net_loss#51] +Input [7]: [page_sk#46, date_sk#47, sales_price#48, profit#49, return_amt#50, net_loss#51, d_date_sk#62] (34) Scan parquet default.catalog_page -Output [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Output [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_page] PushedFilters: [IsNotNull(cp_catalog_page_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 10] -Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] (36) Filter [codegen id : 10] -Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] -Condition : isnotnull(cp_catalog_page_sk#65) +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] +Condition : isnotnull(cp_catalog_page_sk#63) (37) BroadcastExchange -Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#67] +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (38) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [page_sk#48] -Right keys [1]: [cp_catalog_page_sk#65] +Left keys [1]: [page_sk#46] +Right keys [1]: [cp_catalog_page_sk#63] Join condition: None (39) Project [codegen id : 11] -Output [5]: [sales_price#50, profit#51, return_amt#52, net_loss#53, cp_catalog_page_id#66] -Input [7]: [page_sk#48, sales_price#50, profit#51, return_amt#52, net_loss#53, cp_catalog_page_sk#65, cp_catalog_page_id#66] +Output [5]: [sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#64] +Input [7]: [page_sk#46, sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_sk#63, cp_catalog_page_id#64] (40) HashAggregate [codegen id : 11] -Input [5]: [sales_price#50, profit#51, return_amt#52, net_loss#53, cp_catalog_page_id#66] -Keys [1]: [cp_catalog_page_id#66] -Functions [4]: [partial_sum(UnscaledValue(sales_price#50)), partial_sum(UnscaledValue(return_amt#52)), partial_sum(UnscaledValue(profit#51)), partial_sum(UnscaledValue(net_loss#53))] -Aggregate Attributes [4]: [sum#68, sum#69, sum#70, sum#71] -Results [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] +Input [5]: [sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#64] +Keys [1]: [cp_catalog_page_id#64] +Functions [4]: [partial_sum(UnscaledValue(sales_price#48)), partial_sum(UnscaledValue(return_amt#50)), partial_sum(UnscaledValue(profit#49)), partial_sum(UnscaledValue(net_loss#51))] +Aggregate Attributes [4]: [sum#65, sum#66, sum#67, sum#68] +Results [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] (41) Exchange -Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] -Arguments: hashpartitioning(cp_catalog_page_id#66, 5), ENSURE_REQUIREMENTS, [id=#76] +Input [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] +Arguments: hashpartitioning(cp_catalog_page_id#64, 5), ENSURE_REQUIREMENTS, [plan_id=4] (42) HashAggregate [codegen id : 12] -Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] -Keys [1]: [cp_catalog_page_id#66] -Functions [4]: [sum(UnscaledValue(sales_price#50)), sum(UnscaledValue(return_amt#52)), sum(UnscaledValue(profit#51)), sum(UnscaledValue(net_loss#53))] -Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#50))#77, sum(UnscaledValue(return_amt#52))#78, sum(UnscaledValue(profit#51))#79, sum(UnscaledValue(net_loss#53))#80] -Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#50))#77,17,2) AS sales#81, MakeDecimal(sum(UnscaledValue(return_amt#52))#78,17,2) AS returns#82, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#51))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#53))#80,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#83, catalog channel AS channel#84, concat(catalog_page, cp_catalog_page_id#66) AS id#85] +Input [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] +Keys [1]: [cp_catalog_page_id#64] +Functions [4]: [sum(UnscaledValue(sales_price#48)), sum(UnscaledValue(return_amt#50)), sum(UnscaledValue(profit#49)), sum(UnscaledValue(net_loss#51))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#48))#73, sum(UnscaledValue(return_amt#50))#74, sum(UnscaledValue(profit#49))#75, sum(UnscaledValue(net_loss#51))#76] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#48))#73,17,2) AS sales#77, MakeDecimal(sum(UnscaledValue(return_amt#50))#74,17,2) AS returns#78, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#49))#75,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#51))#76,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#79, catalog channel AS channel#80, concat(catalog_page, cp_catalog_page_id#64) AS id#81] (43) Scan parquet default.web_sales -Output [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89] +Output [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#89), dynamicpruningexpression(ws_sold_date_sk#89 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#85), dynamicpruningexpression(ws_sold_date_sk#85 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_web_site_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 13] -Input [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] (45) Filter [codegen id : 13] -Input [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89] -Condition : isnotnull(ws_web_site_sk#86) +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] +Condition : isnotnull(ws_web_site_sk#82) (46) Project [codegen id : 13] -Output [6]: [ws_web_site_sk#86 AS wsr_web_site_sk#90, ws_sold_date_sk#89 AS date_sk#91, ws_ext_sales_price#87 AS sales_price#92, ws_net_profit#88 AS profit#93, 0.00 AS return_amt#94, 0.00 AS net_loss#95] -Input [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89] +Output [6]: [ws_web_site_sk#82 AS wsr_web_site_sk#86, ws_sold_date_sk#85 AS date_sk#87, ws_ext_sales_price#83 AS sales_price#88, ws_net_profit#84 AS profit#89, 0.00 AS return_amt#90, 0.00 AS net_loss#91] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] (47) Scan parquet default.web_returns -Output [5]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100] +Output [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(wr_returned_date_sk#100), dynamicpruningexpression(wr_returned_date_sk#100 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(wr_returned_date_sk#96), dynamicpruningexpression(wr_returned_date_sk#96 IN dynamicpruning#5)] ReadSchema: struct (48) ColumnarToRow [codegen id : 14] -Input [5]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100] +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] (49) BroadcastExchange -Input [5]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295))),false), [id=#101] +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295))),false), [plan_id=5] (50) Scan parquet default.web_sales -Output [4]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104, ws_sold_date_sk#105] +Output [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] ReadSchema: struct (51) ColumnarToRow -Input [4]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104, ws_sold_date_sk#105] +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] (52) Filter -Input [4]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104, ws_sold_date_sk#105] -Condition : ((isnotnull(ws_item_sk#102) AND isnotnull(ws_order_number#104)) AND isnotnull(ws_web_site_sk#103)) +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] +Condition : ((isnotnull(ws_item_sk#97) AND isnotnull(ws_order_number#99)) AND isnotnull(ws_web_site_sk#98)) (53) Project -Output [3]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104] -Input [4]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104, ws_sold_date_sk#105] +Output [3]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] (54) BroadcastHashJoin [codegen id : 15] -Left keys [2]: [wr_item_sk#96, wr_order_number#97] -Right keys [2]: [ws_item_sk#102, ws_order_number#104] +Left keys [2]: [wr_item_sk#92, wr_order_number#93] +Right keys [2]: [ws_item_sk#97, ws_order_number#99] Join condition: None (55) Project [codegen id : 15] -Output [6]: [ws_web_site_sk#103 AS wsr_web_site_sk#106, wr_returned_date_sk#100 AS date_sk#107, 0.00 AS sales_price#108, 0.00 AS profit#109, wr_return_amt#98 AS return_amt#110, wr_net_loss#99 AS net_loss#111] -Input [8]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100, ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104] +Output [6]: [ws_web_site_sk#98 AS wsr_web_site_sk#101, wr_returned_date_sk#96 AS date_sk#102, 0.00 AS sales_price#103, 0.00 AS profit#104, wr_return_amt#94 AS return_amt#105, wr_net_loss#95 AS net_loss#106] +Input [8]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96, ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] (56) Union (57) ReusedExchange [Reuses operator id: 79] -Output [1]: [d_date_sk#112] +Output [1]: [d_date_sk#107] (58) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [date_sk#91] -Right keys [1]: [d_date_sk#112] +Left keys [1]: [date_sk#87] +Right keys [1]: [d_date_sk#107] Join condition: None (59) Project [codegen id : 18] -Output [5]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95] -Input [7]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, d_date_sk#112] +Output [5]: [wsr_web_site_sk#86, sales_price#88, profit#89, return_amt#90, net_loss#91] +Input [7]: [wsr_web_site_sk#86, date_sk#87, sales_price#88, profit#89, return_amt#90, net_loss#91, d_date_sk#107] (60) Scan parquet default.web_site -Output [2]: [web_site_sk#113, web_site_id#114] +Output [2]: [web_site_sk#108, web_site_id#109] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_site_sk)] ReadSchema: struct (61) ColumnarToRow [codegen id : 17] -Input [2]: [web_site_sk#113, web_site_id#114] +Input [2]: [web_site_sk#108, web_site_id#109] (62) Filter [codegen id : 17] -Input [2]: [web_site_sk#113, web_site_id#114] -Condition : isnotnull(web_site_sk#113) +Input [2]: [web_site_sk#108, web_site_id#109] +Condition : isnotnull(web_site_sk#108) (63) BroadcastExchange -Input [2]: [web_site_sk#113, web_site_id#114] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#115] +Input [2]: [web_site_sk#108, web_site_id#109] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] (64) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [wsr_web_site_sk#90] -Right keys [1]: [web_site_sk#113] +Left keys [1]: [wsr_web_site_sk#86] +Right keys [1]: [web_site_sk#108] Join condition: None (65) Project [codegen id : 18] -Output [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#114] -Input [7]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#113, web_site_id#114] +Output [5]: [sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#109] +Input [7]: [wsr_web_site_sk#86, sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_sk#108, web_site_id#109] (66) HashAggregate [codegen id : 18] -Input [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#114] -Keys [1]: [web_site_id#114] -Functions [4]: [partial_sum(UnscaledValue(sales_price#92)), partial_sum(UnscaledValue(return_amt#94)), partial_sum(UnscaledValue(profit#93)), partial_sum(UnscaledValue(net_loss#95))] -Aggregate Attributes [4]: [sum#116, sum#117, sum#118, sum#119] -Results [5]: [web_site_id#114, sum#120, sum#121, sum#122, sum#123] +Input [5]: [sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#109] +Keys [1]: [web_site_id#109] +Functions [4]: [partial_sum(UnscaledValue(sales_price#88)), partial_sum(UnscaledValue(return_amt#90)), partial_sum(UnscaledValue(profit#89)), partial_sum(UnscaledValue(net_loss#91))] +Aggregate Attributes [4]: [sum#110, sum#111, sum#112, sum#113] +Results [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] (67) Exchange -Input [5]: [web_site_id#114, sum#120, sum#121, sum#122, sum#123] -Arguments: hashpartitioning(web_site_id#114, 5), ENSURE_REQUIREMENTS, [id=#124] +Input [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] +Arguments: hashpartitioning(web_site_id#109, 5), ENSURE_REQUIREMENTS, [plan_id=7] (68) HashAggregate [codegen id : 19] -Input [5]: [web_site_id#114, sum#120, sum#121, sum#122, sum#123] -Keys [1]: [web_site_id#114] -Functions [4]: [sum(UnscaledValue(sales_price#92)), sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)), sum(UnscaledValue(net_loss#95))] -Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#125, sum(UnscaledValue(return_amt#94))#126, sum(UnscaledValue(profit#93))#127, sum(UnscaledValue(net_loss#95))#128] -Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#92))#125,17,2) AS sales#129, MakeDecimal(sum(UnscaledValue(return_amt#94))#126,17,2) AS returns#130, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#127,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#128,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#131, web channel AS channel#132, concat(web_site, web_site_id#114) AS id#133] +Input [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] +Keys [1]: [web_site_id#109] +Functions [4]: [sum(UnscaledValue(sales_price#88)), sum(UnscaledValue(return_amt#90)), sum(UnscaledValue(profit#89)), sum(UnscaledValue(net_loss#91))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#88))#118, sum(UnscaledValue(return_amt#90))#119, sum(UnscaledValue(profit#89))#120, sum(UnscaledValue(net_loss#91))#121] +Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#122, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#123, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#124, web channel AS channel#125, concat(web_site, web_site_id#109) AS id#126] (69) Union (70) Expand [codegen id : 20] -Input [5]: [sales#39, returns#40, profit#41, channel#42, id#43] -Arguments: [[sales#39, returns#40, profit#41, channel#42, id#43, 0], [sales#39, returns#40, profit#41, channel#42, null, 1], [sales#39, returns#40, profit#41, null, null, 3]], [sales#39, returns#40, profit#41, channel#134, id#135, spark_grouping_id#136] +Input [5]: [sales#37, returns#38, profit#39, channel#40, id#41] +Arguments: [[sales#37, returns#38, profit#39, channel#40, id#41, 0], [sales#37, returns#38, profit#39, channel#40, null, 1], [sales#37, returns#38, profit#39, null, null, 3]], [sales#37, returns#38, profit#39, channel#127, id#128, spark_grouping_id#129] (71) HashAggregate [codegen id : 20] -Input [6]: [sales#39, returns#40, profit#41, channel#134, id#135, spark_grouping_id#136] -Keys [3]: [channel#134, id#135, spark_grouping_id#136] -Functions [3]: [partial_sum(sales#39), partial_sum(returns#40), partial_sum(profit#41)] -Aggregate Attributes [6]: [sum#137, isEmpty#138, sum#139, isEmpty#140, sum#141, isEmpty#142] -Results [9]: [channel#134, id#135, spark_grouping_id#136, sum#143, isEmpty#144, sum#145, isEmpty#146, sum#147, isEmpty#148] +Input [6]: [sales#37, returns#38, profit#39, channel#127, id#128, spark_grouping_id#129] +Keys [3]: [channel#127, id#128, spark_grouping_id#129] +Functions [3]: [partial_sum(sales#37), partial_sum(returns#38), partial_sum(profit#39)] +Aggregate Attributes [6]: [sum#130, isEmpty#131, sum#132, isEmpty#133, sum#134, isEmpty#135] +Results [9]: [channel#127, id#128, spark_grouping_id#129, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] (72) Exchange -Input [9]: [channel#134, id#135, spark_grouping_id#136, sum#143, isEmpty#144, sum#145, isEmpty#146, sum#147, isEmpty#148] -Arguments: hashpartitioning(channel#134, id#135, spark_grouping_id#136, 5), ENSURE_REQUIREMENTS, [id=#149] +Input [9]: [channel#127, id#128, spark_grouping_id#129, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] +Arguments: hashpartitioning(channel#127, id#128, spark_grouping_id#129, 5), ENSURE_REQUIREMENTS, [plan_id=8] (73) HashAggregate [codegen id : 21] -Input [9]: [channel#134, id#135, spark_grouping_id#136, sum#143, isEmpty#144, sum#145, isEmpty#146, sum#147, isEmpty#148] -Keys [3]: [channel#134, id#135, spark_grouping_id#136] -Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] -Aggregate Attributes [3]: [sum(sales#39)#150, sum(returns#40)#151, sum(profit#41)#152] -Results [5]: [channel#134, id#135, sum(sales#39)#150 AS sales#153, sum(returns#40)#151 AS returns#154, sum(profit#41)#152 AS profit#155] +Input [9]: [channel#127, id#128, spark_grouping_id#129, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] +Keys [3]: [channel#127, id#128, spark_grouping_id#129] +Functions [3]: [sum(sales#37), sum(returns#38), sum(profit#39)] +Aggregate Attributes [3]: [sum(sales#37)#142, sum(returns#38)#143, sum(profit#39)#144] +Results [5]: [channel#127, id#128, sum(sales#37)#142 AS sales#145, sum(returns#38)#143 AS returns#146, sum(profit#39)#144 AS profit#147] (74) TakeOrderedAndProject -Input [5]: [channel#134, id#135, sales#153, returns#154, profit#155] -Arguments: 100, [channel#134 ASC NULLS FIRST, id#135 ASC NULLS FIRST], [channel#134, id#135, sales#153, returns#154, profit#155] +Input [5]: [channel#127, id#128, sales#145, returns#146, profit#147] +Arguments: 100, [channel#127 ASC NULLS FIRST, id#128 ASC NULLS FIRST], [channel#127, id#128, sales#145, returns#146, profit#147] ===== Subqueries ===== @@ -427,35 +427,35 @@ BroadcastExchange (79) (75) Scan parquet default.date_dim -Output [2]: [d_date_sk#22, d_date#156] +Output [2]: [d_date_sk#22, d_date#148] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-06), IsNotNull(d_date_sk)] ReadSchema: struct (76) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#22, d_date#156] +Input [2]: [d_date_sk#22, d_date#148] (77) Filter [codegen id : 1] -Input [2]: [d_date_sk#22, d_date#156] -Condition : (((isnotnull(d_date#156) AND (d_date#156 >= 2000-08-23)) AND (d_date#156 <= 2000-09-06)) AND isnotnull(d_date_sk#22)) +Input [2]: [d_date_sk#22, d_date#148] +Condition : (((isnotnull(d_date#148) AND (d_date#148 >= 2000-08-23)) AND (d_date#148 <= 2000-09-06)) AND isnotnull(d_date_sk#22)) (78) Project [codegen id : 1] Output [1]: [d_date_sk#22] -Input [2]: [d_date_sk#22, d_date#156] +Input [2]: [d_date_sk#22, d_date#148] (79) BroadcastExchange Input [1]: [d_date_sk#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#157] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] Subquery:2 Hosting operator id = 5 Hosting Expression = sr_returned_date_sk#15 IN dynamicpruning#5 -Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#47 IN dynamicpruning#5 +Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#45 IN dynamicpruning#5 -Subquery:4 Hosting operator id = 26 Hosting Expression = cr_returned_date_sk#57 IN dynamicpruning#5 +Subquery:4 Hosting operator id = 26 Hosting Expression = cr_returned_date_sk#55 IN dynamicpruning#5 -Subquery:5 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#89 IN dynamicpruning#5 +Subquery:5 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#85 IN dynamicpruning#5 -Subquery:6 Hosting operator id = 47 Hosting Expression = wr_returned_date_sk#100 IN dynamicpruning#5 +Subquery:6 Hosting operator id = 47 Hosting Expression = wr_returned_date_sk#96 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt index 97f8d567853e7..c286bc1e2d331 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt @@ -61,119 +61,119 @@ Input [5]: [sr_item_sk#1, sr_customer_sk#2, sr_ticket_number#3, sr_returned_date (7) Exchange Input [4]: [sr_item_sk#1, sr_customer_sk#2, sr_ticket_number#3, sr_returned_date_sk#4] -Arguments: hashpartitioning(sr_ticket_number#3, sr_item_sk#1, sr_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#7] +Arguments: hashpartitioning(sr_ticket_number#3, sr_item_sk#1, sr_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=1] (8) Sort [codegen id : 3] Input [4]: [sr_item_sk#1, sr_customer_sk#2, sr_ticket_number#3, sr_returned_date_sk#4] Arguments: [sr_ticket_number#3 ASC NULLS FIRST, sr_item_sk#1 ASC NULLS FIRST, sr_customer_sk#2 ASC NULLS FIRST], false, 0 (9) Scan parquet default.store_sales -Output [5]: [ss_item_sk#8, ss_customer_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_sold_date_sk#12] +Output [5]: [ss_item_sk#7, ss_customer_sk#8, ss_store_sk#9, ss_ticket_number#10, ss_sold_date_sk#11] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#12)] +PartitionFilters: [isnotnull(ss_sold_date_sk#11)] PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (10) ColumnarToRow [codegen id : 4] -Input [5]: [ss_item_sk#8, ss_customer_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_sold_date_sk#12] +Input [5]: [ss_item_sk#7, ss_customer_sk#8, ss_store_sk#9, ss_ticket_number#10, ss_sold_date_sk#11] (11) Filter [codegen id : 4] -Input [5]: [ss_item_sk#8, ss_customer_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_sold_date_sk#12] -Condition : (((isnotnull(ss_ticket_number#11) AND isnotnull(ss_item_sk#8)) AND isnotnull(ss_customer_sk#9)) AND isnotnull(ss_store_sk#10)) +Input [5]: [ss_item_sk#7, ss_customer_sk#8, ss_store_sk#9, ss_ticket_number#10, ss_sold_date_sk#11] +Condition : (((isnotnull(ss_ticket_number#10) AND isnotnull(ss_item_sk#7)) AND isnotnull(ss_customer_sk#8)) AND isnotnull(ss_store_sk#9)) (12) Exchange -Input [5]: [ss_item_sk#8, ss_customer_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_sold_date_sk#12] -Arguments: hashpartitioning(ss_ticket_number#11, ss_item_sk#8, ss_customer_sk#9, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [5]: [ss_item_sk#7, ss_customer_sk#8, ss_store_sk#9, ss_ticket_number#10, ss_sold_date_sk#11] +Arguments: hashpartitioning(ss_ticket_number#10, ss_item_sk#7, ss_customer_sk#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] (13) Sort [codegen id : 5] -Input [5]: [ss_item_sk#8, ss_customer_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_sold_date_sk#12] -Arguments: [ss_ticket_number#11 ASC NULLS FIRST, ss_item_sk#8 ASC NULLS FIRST, ss_customer_sk#9 ASC NULLS FIRST], false, 0 +Input [5]: [ss_item_sk#7, ss_customer_sk#8, ss_store_sk#9, ss_ticket_number#10, ss_sold_date_sk#11] +Arguments: [ss_ticket_number#10 ASC NULLS FIRST, ss_item_sk#7 ASC NULLS FIRST, ss_customer_sk#8 ASC NULLS FIRST], false, 0 (14) SortMergeJoin [codegen id : 8] Left keys [3]: [sr_ticket_number#3, sr_item_sk#1, sr_customer_sk#2] -Right keys [3]: [ss_ticket_number#11, ss_item_sk#8, ss_customer_sk#9] +Right keys [3]: [ss_ticket_number#10, ss_item_sk#7, ss_customer_sk#8] Join condition: None (15) Project [codegen id : 8] -Output [3]: [sr_returned_date_sk#4, ss_store_sk#10, ss_sold_date_sk#12] -Input [9]: [sr_item_sk#1, sr_customer_sk#2, sr_ticket_number#3, sr_returned_date_sk#4, ss_item_sk#8, ss_customer_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_sold_date_sk#12] +Output [3]: [sr_returned_date_sk#4, ss_store_sk#9, ss_sold_date_sk#11] +Input [9]: [sr_item_sk#1, sr_customer_sk#2, sr_ticket_number#3, sr_returned_date_sk#4, ss_item_sk#7, ss_customer_sk#8, ss_store_sk#9, ss_ticket_number#10, ss_sold_date_sk#11] (16) Scan parquet default.date_dim -Output [1]: [d_date_sk#14] +Output [1]: [d_date_sk#12] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 6] -Input [1]: [d_date_sk#14] +Input [1]: [d_date_sk#12] (18) Filter [codegen id : 6] -Input [1]: [d_date_sk#14] -Condition : isnotnull(d_date_sk#14) +Input [1]: [d_date_sk#12] +Condition : isnotnull(d_date_sk#12) (19) BroadcastExchange -Input [1]: [d_date_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] +Input [1]: [d_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (20) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_sold_date_sk#12] -Right keys [1]: [d_date_sk#14] +Left keys [1]: [ss_sold_date_sk#11] +Right keys [1]: [d_date_sk#12] Join condition: None (21) Project [codegen id : 8] -Output [3]: [sr_returned_date_sk#4, ss_store_sk#10, ss_sold_date_sk#12] -Input [4]: [sr_returned_date_sk#4, ss_store_sk#10, ss_sold_date_sk#12, d_date_sk#14] +Output [3]: [sr_returned_date_sk#4, ss_store_sk#9, ss_sold_date_sk#11] +Input [4]: [sr_returned_date_sk#4, ss_store_sk#9, ss_sold_date_sk#11, d_date_sk#12] (22) Scan parquet default.store -Output [11]: [s_store_sk#16, s_store_name#17, s_company_id#18, s_street_number#19, s_street_name#20, s_street_type#21, s_suite_number#22, s_city#23, s_county#24, s_state#25, s_zip#26] +Output [11]: [s_store_sk#13, s_store_name#14, s_company_id#15, s_street_number#16, s_street_name#17, s_street_type#18, s_suite_number#19, s_city#20, s_county#21, s_state#22, s_zip#23] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 7] -Input [11]: [s_store_sk#16, s_store_name#17, s_company_id#18, s_street_number#19, s_street_name#20, s_street_type#21, s_suite_number#22, s_city#23, s_county#24, s_state#25, s_zip#26] +Input [11]: [s_store_sk#13, s_store_name#14, s_company_id#15, s_street_number#16, s_street_name#17, s_street_type#18, s_suite_number#19, s_city#20, s_county#21, s_state#22, s_zip#23] (24) Filter [codegen id : 7] -Input [11]: [s_store_sk#16, s_store_name#17, s_company_id#18, s_street_number#19, s_street_name#20, s_street_type#21, s_suite_number#22, s_city#23, s_county#24, s_state#25, s_zip#26] -Condition : isnotnull(s_store_sk#16) +Input [11]: [s_store_sk#13, s_store_name#14, s_company_id#15, s_street_number#16, s_street_name#17, s_street_type#18, s_suite_number#19, s_city#20, s_county#21, s_state#22, s_zip#23] +Condition : isnotnull(s_store_sk#13) (25) BroadcastExchange -Input [11]: [s_store_sk#16, s_store_name#17, s_company_id#18, s_street_number#19, s_street_name#20, s_street_type#21, s_suite_number#22, s_city#23, s_county#24, s_state#25, s_zip#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] +Input [11]: [s_store_sk#13, s_store_name#14, s_company_id#15, s_street_number#16, s_street_name#17, s_street_type#18, s_suite_number#19, s_city#20, s_county#21, s_state#22, s_zip#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (26) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_store_sk#10] -Right keys [1]: [s_store_sk#16] +Left keys [1]: [ss_store_sk#9] +Right keys [1]: [s_store_sk#13] Join condition: None (27) Project [codegen id : 8] -Output [12]: [ss_sold_date_sk#12, sr_returned_date_sk#4, s_store_name#17, s_company_id#18, s_street_number#19, s_street_name#20, s_street_type#21, s_suite_number#22, s_city#23, s_county#24, s_state#25, s_zip#26] -Input [14]: [sr_returned_date_sk#4, ss_store_sk#10, ss_sold_date_sk#12, s_store_sk#16, s_store_name#17, s_company_id#18, s_street_number#19, s_street_name#20, s_street_type#21, s_suite_number#22, s_city#23, s_county#24, s_state#25, s_zip#26] +Output [12]: [ss_sold_date_sk#11, sr_returned_date_sk#4, s_store_name#14, s_company_id#15, s_street_number#16, s_street_name#17, s_street_type#18, s_suite_number#19, s_city#20, s_county#21, s_state#22, s_zip#23] +Input [14]: [sr_returned_date_sk#4, ss_store_sk#9, ss_sold_date_sk#11, s_store_sk#13, s_store_name#14, s_company_id#15, s_street_number#16, s_street_name#17, s_street_type#18, s_suite_number#19, s_city#20, s_county#21, s_state#22, s_zip#23] (28) HashAggregate [codegen id : 8] -Input [12]: [ss_sold_date_sk#12, sr_returned_date_sk#4, s_store_name#17, s_company_id#18, s_street_number#19, s_street_name#20, s_street_type#21, s_suite_number#22, s_city#23, s_county#24, s_state#25, s_zip#26] -Keys [10]: [s_store_name#17, s_company_id#18, s_street_number#19, s_street_name#20, s_street_type#21, s_suite_number#22, s_city#23, s_county#24, s_state#25, s_zip#26] -Functions [5]: [partial_sum(CASE WHEN ((sr_returned_date_sk#4 - ss_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#12) > 30) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#12) > 60) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#12) > 90) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((sr_returned_date_sk#4 - ss_sold_date_sk#12) > 120) THEN 1 ELSE 0 END)] -Aggregate Attributes [5]: [sum#28, sum#29, sum#30, sum#31, sum#32] -Results [15]: [s_store_name#17, s_company_id#18, s_street_number#19, s_street_name#20, s_street_type#21, s_suite_number#22, s_city#23, s_county#24, s_state#25, s_zip#26, sum#33, sum#34, sum#35, sum#36, sum#37] +Input [12]: [ss_sold_date_sk#11, sr_returned_date_sk#4, s_store_name#14, s_company_id#15, s_street_number#16, s_street_name#17, s_street_type#18, s_suite_number#19, s_city#20, s_county#21, s_state#22, s_zip#23] +Keys [10]: [s_store_name#14, s_company_id#15, s_street_number#16, s_street_name#17, s_street_type#18, s_suite_number#19, s_city#20, s_county#21, s_state#22, s_zip#23] +Functions [5]: [partial_sum(CASE WHEN ((sr_returned_date_sk#4 - ss_sold_date_sk#11) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#11) > 30) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#11) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#11) > 60) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#11) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#11) > 90) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#11) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((sr_returned_date_sk#4 - ss_sold_date_sk#11) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum#24, sum#25, sum#26, sum#27, sum#28] +Results [15]: [s_store_name#14, s_company_id#15, s_street_number#16, s_street_name#17, s_street_type#18, s_suite_number#19, s_city#20, s_county#21, s_state#22, s_zip#23, sum#29, sum#30, sum#31, sum#32, sum#33] (29) Exchange -Input [15]: [s_store_name#17, s_company_id#18, s_street_number#19, s_street_name#20, s_street_type#21, s_suite_number#22, s_city#23, s_county#24, s_state#25, s_zip#26, sum#33, sum#34, sum#35, sum#36, sum#37] -Arguments: hashpartitioning(s_store_name#17, s_company_id#18, s_street_number#19, s_street_name#20, s_street_type#21, s_suite_number#22, s_city#23, s_county#24, s_state#25, s_zip#26, 5), ENSURE_REQUIREMENTS, [id=#38] +Input [15]: [s_store_name#14, s_company_id#15, s_street_number#16, s_street_name#17, s_street_type#18, s_suite_number#19, s_city#20, s_county#21, s_state#22, s_zip#23, sum#29, sum#30, sum#31, sum#32, sum#33] +Arguments: hashpartitioning(s_store_name#14, s_company_id#15, s_street_number#16, s_street_name#17, s_street_type#18, s_suite_number#19, s_city#20, s_county#21, s_state#22, s_zip#23, 5), ENSURE_REQUIREMENTS, [plan_id=5] (30) HashAggregate [codegen id : 9] -Input [15]: [s_store_name#17, s_company_id#18, s_street_number#19, s_street_name#20, s_street_type#21, s_suite_number#22, s_city#23, s_county#24, s_state#25, s_zip#26, sum#33, sum#34, sum#35, sum#36, sum#37] -Keys [10]: [s_store_name#17, s_company_id#18, s_street_number#19, s_street_name#20, s_street_type#21, s_suite_number#22, s_city#23, s_county#24, s_state#25, s_zip#26] -Functions [5]: [sum(CASE WHEN ((sr_returned_date_sk#4 - ss_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#12) > 30) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#12) > 60) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#12) > 90) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((sr_returned_date_sk#4 - ss_sold_date_sk#12) > 120) THEN 1 ELSE 0 END)] -Aggregate Attributes [5]: [sum(CASE WHEN ((sr_returned_date_sk#4 - ss_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END)#39, sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#12) > 30) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END)#40, sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#12) > 60) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END)#41, sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#12) > 90) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END)#42, sum(CASE WHEN ((sr_returned_date_sk#4 - ss_sold_date_sk#12) > 120) THEN 1 ELSE 0 END)#43] -Results [15]: [s_store_name#17, s_company_id#18, s_street_number#19, s_street_name#20, s_street_type#21, s_suite_number#22, s_city#23, s_county#24, s_state#25, s_zip#26, sum(CASE WHEN ((sr_returned_date_sk#4 - ss_sold_date_sk#12) <= 30) THEN 1 ELSE 0 END)#39 AS 30 days #44, sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#12) > 30) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#12) <= 60)) THEN 1 ELSE 0 END)#40 AS 31 - 60 days #45, sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#12) > 60) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#12) <= 90)) THEN 1 ELSE 0 END)#41 AS 61 - 90 days #46, sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#12) > 90) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#12) <= 120)) THEN 1 ELSE 0 END)#42 AS 91 - 120 days #47, sum(CASE WHEN ((sr_returned_date_sk#4 - ss_sold_date_sk#12) > 120) THEN 1 ELSE 0 END)#43 AS >120 days #48] +Input [15]: [s_store_name#14, s_company_id#15, s_street_number#16, s_street_name#17, s_street_type#18, s_suite_number#19, s_city#20, s_county#21, s_state#22, s_zip#23, sum#29, sum#30, sum#31, sum#32, sum#33] +Keys [10]: [s_store_name#14, s_company_id#15, s_street_number#16, s_street_name#17, s_street_type#18, s_suite_number#19, s_city#20, s_county#21, s_state#22, s_zip#23] +Functions [5]: [sum(CASE WHEN ((sr_returned_date_sk#4 - ss_sold_date_sk#11) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#11) > 30) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#11) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#11) > 60) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#11) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#11) > 90) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#11) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((sr_returned_date_sk#4 - ss_sold_date_sk#11) > 120) THEN 1 ELSE 0 END)] +Aggregate Attributes [5]: [sum(CASE WHEN ((sr_returned_date_sk#4 - ss_sold_date_sk#11) <= 30) THEN 1 ELSE 0 END)#34, sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#11) > 30) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#11) <= 60)) THEN 1 ELSE 0 END)#35, sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#11) > 60) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#11) <= 90)) THEN 1 ELSE 0 END)#36, sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#11) > 90) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#11) <= 120)) THEN 1 ELSE 0 END)#37, sum(CASE WHEN ((sr_returned_date_sk#4 - ss_sold_date_sk#11) > 120) THEN 1 ELSE 0 END)#38] +Results [15]: [s_store_name#14, s_company_id#15, s_street_number#16, s_street_name#17, s_street_type#18, s_suite_number#19, s_city#20, s_county#21, s_state#22, s_zip#23, sum(CASE WHEN ((sr_returned_date_sk#4 - ss_sold_date_sk#11) <= 30) THEN 1 ELSE 0 END)#34 AS 30 days #39, sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#11) > 30) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#11) <= 60)) THEN 1 ELSE 0 END)#35 AS 31 - 60 days #40, sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#11) > 60) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#11) <= 90)) THEN 1 ELSE 0 END)#36 AS 61 - 90 days #41, sum(CASE WHEN (((sr_returned_date_sk#4 - ss_sold_date_sk#11) > 90) AND ((sr_returned_date_sk#4 - ss_sold_date_sk#11) <= 120)) THEN 1 ELSE 0 END)#37 AS 91 - 120 days #42, sum(CASE WHEN ((sr_returned_date_sk#4 - ss_sold_date_sk#11) > 120) THEN 1 ELSE 0 END)#38 AS >120 days #43] (31) TakeOrderedAndProject -Input [15]: [s_store_name#17, s_company_id#18, s_street_number#19, s_street_name#20, s_street_type#21, s_suite_number#22, s_city#23, s_county#24, s_state#25, s_zip#26, 30 days #44, 31 - 60 days #45, 61 - 90 days #46, 91 - 120 days #47, >120 days #48] -Arguments: 100, [s_store_name#17 ASC NULLS FIRST, s_company_id#18 ASC NULLS FIRST, s_street_number#19 ASC NULLS FIRST, s_street_name#20 ASC NULLS FIRST, s_street_type#21 ASC NULLS FIRST, s_suite_number#22 ASC NULLS FIRST, s_city#23 ASC NULLS FIRST, s_county#24 ASC NULLS FIRST, s_state#25 ASC NULLS FIRST, s_zip#26 ASC NULLS FIRST], [s_store_name#17, s_company_id#18, s_street_number#19, s_street_name#20, s_street_type#21, s_suite_number#22, s_city#23, s_county#24, s_state#25, s_zip#26, 30 days #44, 31 - 60 days #45, 61 - 90 days #46, 91 - 120 days #47, >120 days #48] +Input [15]: [s_store_name#14, s_company_id#15, s_street_number#16, s_street_name#17, s_street_type#18, s_suite_number#19, s_city#20, s_county#21, s_state#22, s_zip#23, 30 days #39, 31 - 60 days #40, 61 - 90 days #41, 91 - 120 days #42, >120 days #43] +Arguments: 100, [s_store_name#14 ASC NULLS FIRST, s_company_id#15 ASC NULLS FIRST, s_street_number#16 ASC NULLS FIRST, s_street_name#17 ASC NULLS FIRST, s_street_type#18 ASC NULLS FIRST, s_suite_number#19 ASC NULLS FIRST, s_city#20 ASC NULLS FIRST, s_county#21 ASC NULLS FIRST, s_state#22 ASC NULLS FIRST, s_zip#23 ASC NULLS FIRST], [s_store_name#14, s_company_id#15, s_street_number#16, s_street_name#17, s_street_type#18, s_suite_number#19, s_city#20, s_county#21, s_state#22, s_zip#23, 30 days #39, 31 - 60 days #40, 61 - 90 days #41, 91 - 120 days #42, >120 days #43] ===== Subqueries ===== @@ -186,25 +186,25 @@ BroadcastExchange (36) (32) Scan parquet default.date_dim -Output [3]: [d_date_sk#6, d_year#49, d_moy#50] +Output [3]: [d_date_sk#6, d_year#44, d_moy#45] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)] ReadSchema: struct (33) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#49, d_moy#50] +Input [3]: [d_date_sk#6, d_year#44, d_moy#45] (34) Filter [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#49, d_moy#50] -Condition : ((((isnotnull(d_year#49) AND isnotnull(d_moy#50)) AND (d_year#49 = 2001)) AND (d_moy#50 = 8)) AND isnotnull(d_date_sk#6)) +Input [3]: [d_date_sk#6, d_year#44, d_moy#45] +Condition : ((((isnotnull(d_year#44) AND isnotnull(d_moy#45)) AND (d_year#44 = 2001)) AND (d_moy#45 = 8)) AND isnotnull(d_date_sk#6)) (35) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [3]: [d_date_sk#6, d_year#49, d_moy#50] +Input [3]: [d_date_sk#6, d_year#44, d_moy#45] (36) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#51] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt index 060953c80f295..925bee4192e0b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt @@ -61,7 +61,7 @@ Condition : ((isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#6)) AND isn (7) BroadcastExchange Input [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] -Arguments: HashedRelationBroadcastMode(List(input[2, int, false], input[0, int, false], input[1, int, false]),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(input[2, int, false], input[0, int, false], input[1, int, false]),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 5] Left keys [3]: [ss_ticket_number#4, ss_item_sk#1, ss_customer_sk#2] @@ -73,92 +73,92 @@ Output [3]: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9] Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] (10) Scan parquet default.store -Output [11]: [s_store_sk#12, s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22] +Output [11]: [s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [11]: [s_store_sk#12, s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22] +Input [11]: [s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] (12) Filter [codegen id : 2] -Input [11]: [s_store_sk#12, s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22] -Condition : isnotnull(s_store_sk#12) +Input [11]: [s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Condition : isnotnull(s_store_sk#11) (13) BroadcastExchange -Input [11]: [s_store_sk#12, s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] +Input [11]: [s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#12] +Right keys [1]: [s_store_sk#11] Join condition: None (15) Project [codegen id : 5] -Output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22] -Input [14]: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_sk#12, s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22] +Output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Input [14]: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] (16) Scan parquet default.date_dim -Output [1]: [d_date_sk#24] +Output [1]: [d_date_sk#22] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 3] -Input [1]: [d_date_sk#24] +Input [1]: [d_date_sk#22] (18) Filter [codegen id : 3] -Input [1]: [d_date_sk#24] -Condition : isnotnull(d_date_sk#24) +Input [1]: [d_date_sk#22] +Condition : isnotnull(d_date_sk#22) (19) BroadcastExchange -Input [1]: [d_date_sk#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] +Input [1]: [d_date_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (20) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_sold_date_sk#5] -Right keys [1]: [d_date_sk#24] +Right keys [1]: [d_date_sk#22] Join condition: None (21) Project [codegen id : 5] -Output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22] -Input [13]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22, d_date_sk#24] +Output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Input [13]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, d_date_sk#22] (22) ReusedExchange [Reuses operator id: 33] -Output [1]: [d_date_sk#26] +Output [1]: [d_date_sk#23] (23) BroadcastHashJoin [codegen id : 5] Left keys [1]: [sr_returned_date_sk#9] -Right keys [1]: [d_date_sk#26] +Right keys [1]: [d_date_sk#23] Join condition: None (24) Project [codegen id : 5] -Output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22] -Input [13]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22, d_date_sk#26] +Output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Input [13]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, d_date_sk#23] (25) HashAggregate [codegen id : 5] -Input [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22] -Keys [10]: [s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22] +Input [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] +Keys [10]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] Functions [5]: [partial_sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] -Aggregate Attributes [5]: [sum#27, sum#28, sum#29, sum#30, sum#31] -Results [15]: [s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22, sum#32, sum#33, sum#34, sum#35, sum#36] +Aggregate Attributes [5]: [sum#24, sum#25, sum#26, sum#27, sum#28] +Results [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#29, sum#30, sum#31, sum#32, sum#33] (26) Exchange -Input [15]: [s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22, sum#32, sum#33, sum#34, sum#35, sum#36] -Arguments: hashpartitioning(s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22, 5), ENSURE_REQUIREMENTS, [id=#37] +Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#29, sum#30, sum#31, sum#32, sum#33] +Arguments: hashpartitioning(s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 5), ENSURE_REQUIREMENTS, [plan_id=4] (27) HashAggregate [codegen id : 6] -Input [15]: [s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22, sum#32, sum#33, sum#34, sum#35, sum#36] -Keys [10]: [s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22] +Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum#29, sum#30, sum#31, sum#32, sum#33] +Keys [10]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21] Functions [5]: [sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] -Aggregate Attributes [5]: [sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#38, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#39, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#40, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#41, sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#42] -Results [15]: [s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22, sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#38 AS 30 days #43, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#39 AS 31 - 60 days #44, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#40 AS 61 - 90 days #45, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#41 AS 91 - 120 days #46, sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#42 AS >120 days #47] +Aggregate Attributes [5]: [sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#34, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#35, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#36, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#37, sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#38] +Results [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#34 AS 30 days #39, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 30) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#35 AS 31 - 60 days #40, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 60) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#36 AS 61 - 90 days #41, sum(CASE WHEN (((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 90) AND ((sr_returned_date_sk#9 - ss_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#37 AS 91 - 120 days #42, sum(CASE WHEN ((sr_returned_date_sk#9 - ss_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#38 AS >120 days #43] (28) TakeOrderedAndProject -Input [15]: [s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22, 30 days #43, 31 - 60 days #44, 61 - 90 days #45, 91 - 120 days #46, >120 days #47] -Arguments: 100, [s_store_name#13 ASC NULLS FIRST, s_company_id#14 ASC NULLS FIRST, s_street_number#15 ASC NULLS FIRST, s_street_name#16 ASC NULLS FIRST, s_street_type#17 ASC NULLS FIRST, s_suite_number#18 ASC NULLS FIRST, s_city#19 ASC NULLS FIRST, s_county#20 ASC NULLS FIRST, s_state#21 ASC NULLS FIRST, s_zip#22 ASC NULLS FIRST], [s_store_name#13, s_company_id#14, s_street_number#15, s_street_name#16, s_street_type#17, s_suite_number#18, s_city#19, s_county#20, s_state#21, s_zip#22, 30 days #43, 31 - 60 days #44, 61 - 90 days #45, 91 - 120 days #46, >120 days #47] +Input [15]: [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 30 days #39, 31 - 60 days #40, 61 - 90 days #41, 91 - 120 days #42, >120 days #43] +Arguments: 100, [s_store_name#12 ASC NULLS FIRST, s_company_id#13 ASC NULLS FIRST, s_street_number#14 ASC NULLS FIRST, s_street_name#15 ASC NULLS FIRST, s_street_type#16 ASC NULLS FIRST, s_suite_number#17 ASC NULLS FIRST, s_city#18 ASC NULLS FIRST, s_county#19 ASC NULLS FIRST, s_state#20 ASC NULLS FIRST, s_zip#21 ASC NULLS FIRST], [s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21, 30 days #39, 31 - 60 days #40, 61 - 90 days #41, 91 - 120 days #42, >120 days #43] ===== Subqueries ===== @@ -171,25 +171,25 @@ BroadcastExchange (33) (29) Scan parquet default.date_dim -Output [3]: [d_date_sk#26, d_year#48, d_moy#49] +Output [3]: [d_date_sk#23, d_year#44, d_moy#45] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,8), IsNotNull(d_date_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#26, d_year#48, d_moy#49] +Input [3]: [d_date_sk#23, d_year#44, d_moy#45] (31) Filter [codegen id : 1] -Input [3]: [d_date_sk#26, d_year#48, d_moy#49] -Condition : ((((isnotnull(d_year#48) AND isnotnull(d_moy#49)) AND (d_year#48 = 2001)) AND (d_moy#49 = 8)) AND isnotnull(d_date_sk#26)) +Input [3]: [d_date_sk#23, d_year#44, d_moy#45] +Condition : ((((isnotnull(d_year#44) AND isnotnull(d_moy#45)) AND (d_year#44 = 2001)) AND (d_moy#45 = 8)) AND isnotnull(d_date_sk#23)) (32) Project [codegen id : 1] -Output [1]: [d_date_sk#26] -Input [3]: [d_date_sk#26, d_year#48, d_moy#49] +Output [1]: [d_date_sk#23] +Input [3]: [d_date_sk#23, d_year#44, d_moy#45] (33) BroadcastExchange -Input [1]: [d_date_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50] +Input [1]: [d_date_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/explain.txt index cbb189e2de060..1b1b37929bb6b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/explain.txt @@ -74,136 +74,136 @@ Results [3]: [ws_item_sk#1, d_date#6, sum#8] (8) Exchange Input [3]: [ws_item_sk#1, d_date#6, sum#8] -Arguments: hashpartitioning(ws_item_sk#1, d_date#6, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ws_item_sk#1, d_date#6, 5), ENSURE_REQUIREMENTS, [plan_id=1] (9) HashAggregate [codegen id : 3] Input [3]: [ws_item_sk#1, d_date#6, sum#8] Keys [2]: [ws_item_sk#1, d_date#6] Functions [1]: [sum(UnscaledValue(ws_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#2))#10] -Results [4]: [ws_item_sk#1 AS item_sk#11, d_date#6, MakeDecimal(sum(UnscaledValue(ws_sales_price#2))#10,17,2) AS _w0#12, ws_item_sk#1] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#2))#9] +Results [4]: [ws_item_sk#1 AS item_sk#10, d_date#6, MakeDecimal(sum(UnscaledValue(ws_sales_price#2))#9,17,2) AS _w0#11, ws_item_sk#1] (10) Exchange -Input [4]: [item_sk#11, d_date#6, _w0#12, ws_item_sk#1] -Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [4]: [item_sk#10, d_date#6, _w0#11, ws_item_sk#1] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [4]: [item_sk#11, d_date#6, _w0#12, ws_item_sk#1] +Input [4]: [item_sk#10, d_date#6, _w0#11, ws_item_sk#1] Arguments: [ws_item_sk#1 ASC NULLS FIRST, d_date#6 ASC NULLS FIRST], false, 0 (12) Window -Input [4]: [item_sk#11, d_date#6, _w0#12, ws_item_sk#1] -Arguments: [sum(_w0#12) windowspecdefinition(ws_item_sk#1, d_date#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#14], [ws_item_sk#1], [d_date#6 ASC NULLS FIRST] +Input [4]: [item_sk#10, d_date#6, _w0#11, ws_item_sk#1] +Arguments: [sum(_w0#11) windowspecdefinition(ws_item_sk#1, d_date#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#12], [ws_item_sk#1], [d_date#6 ASC NULLS FIRST] (13) Project [codegen id : 5] -Output [3]: [item_sk#11, d_date#6, cume_sales#14] -Input [5]: [item_sk#11, d_date#6, _w0#12, ws_item_sk#1, cume_sales#14] +Output [3]: [item_sk#10, d_date#6, cume_sales#12] +Input [5]: [item_sk#10, d_date#6, _w0#11, ws_item_sk#1, cume_sales#12] (14) Exchange -Input [3]: [item_sk#11, d_date#6, cume_sales#14] -Arguments: hashpartitioning(item_sk#11, d_date#6, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [3]: [item_sk#10, d_date#6, cume_sales#12] +Arguments: hashpartitioning(item_sk#10, d_date#6, 5), ENSURE_REQUIREMENTS, [plan_id=3] (15) Sort [codegen id : 6] -Input [3]: [item_sk#11, d_date#6, cume_sales#14] -Arguments: [item_sk#11 ASC NULLS FIRST, d_date#6 ASC NULLS FIRST], false, 0 +Input [3]: [item_sk#10, d_date#6, cume_sales#12] +Arguments: [item_sk#10 ASC NULLS FIRST, d_date#6 ASC NULLS FIRST], false, 0 (16) Scan parquet default.store_sales -Output [3]: [ss_item_sk#16, ss_sales_price#17, ss_sold_date_sk#18] +Output [3]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#18), dynamicpruningexpression(ss_sold_date_sk#18 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(ss_sold_date_sk#15), dynamicpruningexpression(ss_sold_date_sk#15 IN dynamicpruning#4)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 8] -Input [3]: [ss_item_sk#16, ss_sales_price#17, ss_sold_date_sk#18] +Input [3]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15] (18) Filter [codegen id : 8] -Input [3]: [ss_item_sk#16, ss_sales_price#17, ss_sold_date_sk#18] -Condition : isnotnull(ss_item_sk#16) +Input [3]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15] +Condition : isnotnull(ss_item_sk#13) (19) ReusedExchange [Reuses operator id: 42] -Output [2]: [d_date_sk#19, d_date#20] +Output [2]: [d_date_sk#16, d_date#17] (20) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_sold_date_sk#18] -Right keys [1]: [d_date_sk#19] +Left keys [1]: [ss_sold_date_sk#15] +Right keys [1]: [d_date_sk#16] Join condition: None (21) Project [codegen id : 8] -Output [3]: [ss_item_sk#16, ss_sales_price#17, d_date#20] -Input [5]: [ss_item_sk#16, ss_sales_price#17, ss_sold_date_sk#18, d_date_sk#19, d_date#20] +Output [3]: [ss_item_sk#13, ss_sales_price#14, d_date#17] +Input [5]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15, d_date_sk#16, d_date#17] (22) HashAggregate [codegen id : 8] -Input [3]: [ss_item_sk#16, ss_sales_price#17, d_date#20] -Keys [2]: [ss_item_sk#16, d_date#20] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#17))] -Aggregate Attributes [1]: [sum#21] -Results [3]: [ss_item_sk#16, d_date#20, sum#22] +Input [3]: [ss_item_sk#13, ss_sales_price#14, d_date#17] +Keys [2]: [ss_item_sk#13, d_date#17] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#14))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [ss_item_sk#13, d_date#17, sum#19] (23) Exchange -Input [3]: [ss_item_sk#16, d_date#20, sum#22] -Arguments: hashpartitioning(ss_item_sk#16, d_date#20, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [3]: [ss_item_sk#13, d_date#17, sum#19] +Arguments: hashpartitioning(ss_item_sk#13, d_date#17, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) HashAggregate [codegen id : 9] -Input [3]: [ss_item_sk#16, d_date#20, sum#22] -Keys [2]: [ss_item_sk#16, d_date#20] -Functions [1]: [sum(UnscaledValue(ss_sales_price#17))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#17))#24] -Results [4]: [ss_item_sk#16 AS item_sk#25, d_date#20, MakeDecimal(sum(UnscaledValue(ss_sales_price#17))#24,17,2) AS _w0#26, ss_item_sk#16] +Input [3]: [ss_item_sk#13, d_date#17, sum#19] +Keys [2]: [ss_item_sk#13, d_date#17] +Functions [1]: [sum(UnscaledValue(ss_sales_price#14))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#14))#20] +Results [4]: [ss_item_sk#13 AS item_sk#21, d_date#17, MakeDecimal(sum(UnscaledValue(ss_sales_price#14))#20,17,2) AS _w0#22, ss_item_sk#13] (25) Exchange -Input [4]: [item_sk#25, d_date#20, _w0#26, ss_item_sk#16] -Arguments: hashpartitioning(ss_item_sk#16, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [4]: [item_sk#21, d_date#17, _w0#22, ss_item_sk#13] +Arguments: hashpartitioning(ss_item_sk#13, 5), ENSURE_REQUIREMENTS, [plan_id=5] (26) Sort [codegen id : 10] -Input [4]: [item_sk#25, d_date#20, _w0#26, ss_item_sk#16] -Arguments: [ss_item_sk#16 ASC NULLS FIRST, d_date#20 ASC NULLS FIRST], false, 0 +Input [4]: [item_sk#21, d_date#17, _w0#22, ss_item_sk#13] +Arguments: [ss_item_sk#13 ASC NULLS FIRST, d_date#17 ASC NULLS FIRST], false, 0 (27) Window -Input [4]: [item_sk#25, d_date#20, _w0#26, ss_item_sk#16] -Arguments: [sum(_w0#26) windowspecdefinition(ss_item_sk#16, d_date#20 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#28], [ss_item_sk#16], [d_date#20 ASC NULLS FIRST] +Input [4]: [item_sk#21, d_date#17, _w0#22, ss_item_sk#13] +Arguments: [sum(_w0#22) windowspecdefinition(ss_item_sk#13, d_date#17 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#23], [ss_item_sk#13], [d_date#17 ASC NULLS FIRST] (28) Project [codegen id : 11] -Output [3]: [item_sk#25, d_date#20, cume_sales#28] -Input [5]: [item_sk#25, d_date#20, _w0#26, ss_item_sk#16, cume_sales#28] +Output [3]: [item_sk#21, d_date#17, cume_sales#23] +Input [5]: [item_sk#21, d_date#17, _w0#22, ss_item_sk#13, cume_sales#23] (29) Exchange -Input [3]: [item_sk#25, d_date#20, cume_sales#28] -Arguments: hashpartitioning(item_sk#25, d_date#20, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [3]: [item_sk#21, d_date#17, cume_sales#23] +Arguments: hashpartitioning(item_sk#21, d_date#17, 5), ENSURE_REQUIREMENTS, [plan_id=6] (30) Sort [codegen id : 12] -Input [3]: [item_sk#25, d_date#20, cume_sales#28] -Arguments: [item_sk#25 ASC NULLS FIRST, d_date#20 ASC NULLS FIRST], false, 0 +Input [3]: [item_sk#21, d_date#17, cume_sales#23] +Arguments: [item_sk#21 ASC NULLS FIRST, d_date#17 ASC NULLS FIRST], false, 0 (31) SortMergeJoin [codegen id : 13] -Left keys [2]: [item_sk#11, d_date#6] -Right keys [2]: [item_sk#25, d_date#20] +Left keys [2]: [item_sk#10, d_date#6] +Right keys [2]: [item_sk#21, d_date#17] Join condition: None (32) Project [codegen id : 13] -Output [4]: [CASE WHEN isnotnull(item_sk#11) THEN item_sk#11 ELSE item_sk#25 END AS item_sk#30, CASE WHEN isnotnull(d_date#6) THEN d_date#6 ELSE d_date#20 END AS d_date#31, cume_sales#14 AS web_sales#32, cume_sales#28 AS store_sales#33] -Input [6]: [item_sk#11, d_date#6, cume_sales#14, item_sk#25, d_date#20, cume_sales#28] +Output [4]: [CASE WHEN isnotnull(item_sk#10) THEN item_sk#10 ELSE item_sk#21 END AS item_sk#24, CASE WHEN isnotnull(d_date#6) THEN d_date#6 ELSE d_date#17 END AS d_date#25, cume_sales#12 AS web_sales#26, cume_sales#23 AS store_sales#27] +Input [6]: [item_sk#10, d_date#6, cume_sales#12, item_sk#21, d_date#17, cume_sales#23] (33) Exchange -Input [4]: [item_sk#30, d_date#31, web_sales#32, store_sales#33] -Arguments: hashpartitioning(item_sk#30, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [4]: [item_sk#24, d_date#25, web_sales#26, store_sales#27] +Arguments: hashpartitioning(item_sk#24, 5), ENSURE_REQUIREMENTS, [plan_id=7] (34) Sort [codegen id : 14] -Input [4]: [item_sk#30, d_date#31, web_sales#32, store_sales#33] -Arguments: [item_sk#30 ASC NULLS FIRST, d_date#31 ASC NULLS FIRST], false, 0 +Input [4]: [item_sk#24, d_date#25, web_sales#26, store_sales#27] +Arguments: [item_sk#24 ASC NULLS FIRST, d_date#25 ASC NULLS FIRST], false, 0 (35) Window -Input [4]: [item_sk#30, d_date#31, web_sales#32, store_sales#33] -Arguments: [max(web_sales#32) windowspecdefinition(item_sk#30, d_date#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#35, max(store_sales#33) windowspecdefinition(item_sk#30, d_date#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#36], [item_sk#30], [d_date#31 ASC NULLS FIRST] +Input [4]: [item_sk#24, d_date#25, web_sales#26, store_sales#27] +Arguments: [max(web_sales#26) windowspecdefinition(item_sk#24, d_date#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#28, max(store_sales#27) windowspecdefinition(item_sk#24, d_date#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#29], [item_sk#24], [d_date#25 ASC NULLS FIRST] (36) Filter [codegen id : 15] -Input [6]: [item_sk#30, d_date#31, web_sales#32, store_sales#33, web_cumulative#35, store_cumulative#36] -Condition : ((isnotnull(web_cumulative#35) AND isnotnull(store_cumulative#36)) AND (web_cumulative#35 > store_cumulative#36)) +Input [6]: [item_sk#24, d_date#25, web_sales#26, store_sales#27, web_cumulative#28, store_cumulative#29] +Condition : ((isnotnull(web_cumulative#28) AND isnotnull(store_cumulative#29)) AND (web_cumulative#28 > store_cumulative#29)) (37) TakeOrderedAndProject -Input [6]: [item_sk#30, d_date#31, web_sales#32, store_sales#33, web_cumulative#35, store_cumulative#36] -Arguments: 100, [item_sk#30 ASC NULLS FIRST, d_date#31 ASC NULLS FIRST], [item_sk#30, d_date#31, web_sales#32, store_sales#33, web_cumulative#35, store_cumulative#36] +Input [6]: [item_sk#24, d_date#25, web_sales#26, store_sales#27, web_cumulative#28, store_cumulative#29] +Arguments: 100, [item_sk#24 ASC NULLS FIRST, d_date#25 ASC NULLS FIRST], [item_sk#24, d_date#25, web_sales#26, store_sales#27, web_cumulative#28, store_cumulative#29] ===== Subqueries ===== @@ -216,27 +216,27 @@ BroadcastExchange (42) (38) Scan parquet default.date_dim -Output [3]: [d_date_sk#5, d_date#6, d_month_seq#37] +Output [3]: [d_date_sk#5, d_date#6, d_month_seq#30] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (39) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#5, d_date#6, d_month_seq#37] +Input [3]: [d_date_sk#5, d_date#6, d_month_seq#30] (40) Filter [codegen id : 1] -Input [3]: [d_date_sk#5, d_date#6, d_month_seq#37] -Condition : (((isnotnull(d_month_seq#37) AND (d_month_seq#37 >= 1200)) AND (d_month_seq#37 <= 1211)) AND isnotnull(d_date_sk#5)) +Input [3]: [d_date_sk#5, d_date#6, d_month_seq#30] +Condition : (((isnotnull(d_month_seq#30) AND (d_month_seq#30 >= 1200)) AND (d_month_seq#30 <= 1211)) AND isnotnull(d_date_sk#5)) (41) Project [codegen id : 1] Output [2]: [d_date_sk#5, d_date#6] -Input [3]: [d_date_sk#5, d_date#6, d_month_seq#37] +Input [3]: [d_date_sk#5, d_date#6, d_month_seq#30] (42) BroadcastExchange Input [2]: [d_date_sk#5, d_date#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] -Subquery:2 Hosting operator id = 16 Hosting Expression = ss_sold_date_sk#18 IN dynamicpruning#4 +Subquery:2 Hosting operator id = 16 Hosting Expression = ss_sold_date_sk#15 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt index cbb189e2de060..1b1b37929bb6b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt @@ -74,136 +74,136 @@ Results [3]: [ws_item_sk#1, d_date#6, sum#8] (8) Exchange Input [3]: [ws_item_sk#1, d_date#6, sum#8] -Arguments: hashpartitioning(ws_item_sk#1, d_date#6, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ws_item_sk#1, d_date#6, 5), ENSURE_REQUIREMENTS, [plan_id=1] (9) HashAggregate [codegen id : 3] Input [3]: [ws_item_sk#1, d_date#6, sum#8] Keys [2]: [ws_item_sk#1, d_date#6] Functions [1]: [sum(UnscaledValue(ws_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#2))#10] -Results [4]: [ws_item_sk#1 AS item_sk#11, d_date#6, MakeDecimal(sum(UnscaledValue(ws_sales_price#2))#10,17,2) AS _w0#12, ws_item_sk#1] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#2))#9] +Results [4]: [ws_item_sk#1 AS item_sk#10, d_date#6, MakeDecimal(sum(UnscaledValue(ws_sales_price#2))#9,17,2) AS _w0#11, ws_item_sk#1] (10) Exchange -Input [4]: [item_sk#11, d_date#6, _w0#12, ws_item_sk#1] -Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [4]: [item_sk#10, d_date#6, _w0#11, ws_item_sk#1] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [4]: [item_sk#11, d_date#6, _w0#12, ws_item_sk#1] +Input [4]: [item_sk#10, d_date#6, _w0#11, ws_item_sk#1] Arguments: [ws_item_sk#1 ASC NULLS FIRST, d_date#6 ASC NULLS FIRST], false, 0 (12) Window -Input [4]: [item_sk#11, d_date#6, _w0#12, ws_item_sk#1] -Arguments: [sum(_w0#12) windowspecdefinition(ws_item_sk#1, d_date#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#14], [ws_item_sk#1], [d_date#6 ASC NULLS FIRST] +Input [4]: [item_sk#10, d_date#6, _w0#11, ws_item_sk#1] +Arguments: [sum(_w0#11) windowspecdefinition(ws_item_sk#1, d_date#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#12], [ws_item_sk#1], [d_date#6 ASC NULLS FIRST] (13) Project [codegen id : 5] -Output [3]: [item_sk#11, d_date#6, cume_sales#14] -Input [5]: [item_sk#11, d_date#6, _w0#12, ws_item_sk#1, cume_sales#14] +Output [3]: [item_sk#10, d_date#6, cume_sales#12] +Input [5]: [item_sk#10, d_date#6, _w0#11, ws_item_sk#1, cume_sales#12] (14) Exchange -Input [3]: [item_sk#11, d_date#6, cume_sales#14] -Arguments: hashpartitioning(item_sk#11, d_date#6, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [3]: [item_sk#10, d_date#6, cume_sales#12] +Arguments: hashpartitioning(item_sk#10, d_date#6, 5), ENSURE_REQUIREMENTS, [plan_id=3] (15) Sort [codegen id : 6] -Input [3]: [item_sk#11, d_date#6, cume_sales#14] -Arguments: [item_sk#11 ASC NULLS FIRST, d_date#6 ASC NULLS FIRST], false, 0 +Input [3]: [item_sk#10, d_date#6, cume_sales#12] +Arguments: [item_sk#10 ASC NULLS FIRST, d_date#6 ASC NULLS FIRST], false, 0 (16) Scan parquet default.store_sales -Output [3]: [ss_item_sk#16, ss_sales_price#17, ss_sold_date_sk#18] +Output [3]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#18), dynamicpruningexpression(ss_sold_date_sk#18 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(ss_sold_date_sk#15), dynamicpruningexpression(ss_sold_date_sk#15 IN dynamicpruning#4)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 8] -Input [3]: [ss_item_sk#16, ss_sales_price#17, ss_sold_date_sk#18] +Input [3]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15] (18) Filter [codegen id : 8] -Input [3]: [ss_item_sk#16, ss_sales_price#17, ss_sold_date_sk#18] -Condition : isnotnull(ss_item_sk#16) +Input [3]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15] +Condition : isnotnull(ss_item_sk#13) (19) ReusedExchange [Reuses operator id: 42] -Output [2]: [d_date_sk#19, d_date#20] +Output [2]: [d_date_sk#16, d_date#17] (20) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_sold_date_sk#18] -Right keys [1]: [d_date_sk#19] +Left keys [1]: [ss_sold_date_sk#15] +Right keys [1]: [d_date_sk#16] Join condition: None (21) Project [codegen id : 8] -Output [3]: [ss_item_sk#16, ss_sales_price#17, d_date#20] -Input [5]: [ss_item_sk#16, ss_sales_price#17, ss_sold_date_sk#18, d_date_sk#19, d_date#20] +Output [3]: [ss_item_sk#13, ss_sales_price#14, d_date#17] +Input [5]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15, d_date_sk#16, d_date#17] (22) HashAggregate [codegen id : 8] -Input [3]: [ss_item_sk#16, ss_sales_price#17, d_date#20] -Keys [2]: [ss_item_sk#16, d_date#20] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#17))] -Aggregate Attributes [1]: [sum#21] -Results [3]: [ss_item_sk#16, d_date#20, sum#22] +Input [3]: [ss_item_sk#13, ss_sales_price#14, d_date#17] +Keys [2]: [ss_item_sk#13, d_date#17] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#14))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [ss_item_sk#13, d_date#17, sum#19] (23) Exchange -Input [3]: [ss_item_sk#16, d_date#20, sum#22] -Arguments: hashpartitioning(ss_item_sk#16, d_date#20, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [3]: [ss_item_sk#13, d_date#17, sum#19] +Arguments: hashpartitioning(ss_item_sk#13, d_date#17, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) HashAggregate [codegen id : 9] -Input [3]: [ss_item_sk#16, d_date#20, sum#22] -Keys [2]: [ss_item_sk#16, d_date#20] -Functions [1]: [sum(UnscaledValue(ss_sales_price#17))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#17))#24] -Results [4]: [ss_item_sk#16 AS item_sk#25, d_date#20, MakeDecimal(sum(UnscaledValue(ss_sales_price#17))#24,17,2) AS _w0#26, ss_item_sk#16] +Input [3]: [ss_item_sk#13, d_date#17, sum#19] +Keys [2]: [ss_item_sk#13, d_date#17] +Functions [1]: [sum(UnscaledValue(ss_sales_price#14))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#14))#20] +Results [4]: [ss_item_sk#13 AS item_sk#21, d_date#17, MakeDecimal(sum(UnscaledValue(ss_sales_price#14))#20,17,2) AS _w0#22, ss_item_sk#13] (25) Exchange -Input [4]: [item_sk#25, d_date#20, _w0#26, ss_item_sk#16] -Arguments: hashpartitioning(ss_item_sk#16, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [4]: [item_sk#21, d_date#17, _w0#22, ss_item_sk#13] +Arguments: hashpartitioning(ss_item_sk#13, 5), ENSURE_REQUIREMENTS, [plan_id=5] (26) Sort [codegen id : 10] -Input [4]: [item_sk#25, d_date#20, _w0#26, ss_item_sk#16] -Arguments: [ss_item_sk#16 ASC NULLS FIRST, d_date#20 ASC NULLS FIRST], false, 0 +Input [4]: [item_sk#21, d_date#17, _w0#22, ss_item_sk#13] +Arguments: [ss_item_sk#13 ASC NULLS FIRST, d_date#17 ASC NULLS FIRST], false, 0 (27) Window -Input [4]: [item_sk#25, d_date#20, _w0#26, ss_item_sk#16] -Arguments: [sum(_w0#26) windowspecdefinition(ss_item_sk#16, d_date#20 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#28], [ss_item_sk#16], [d_date#20 ASC NULLS FIRST] +Input [4]: [item_sk#21, d_date#17, _w0#22, ss_item_sk#13] +Arguments: [sum(_w0#22) windowspecdefinition(ss_item_sk#13, d_date#17 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS cume_sales#23], [ss_item_sk#13], [d_date#17 ASC NULLS FIRST] (28) Project [codegen id : 11] -Output [3]: [item_sk#25, d_date#20, cume_sales#28] -Input [5]: [item_sk#25, d_date#20, _w0#26, ss_item_sk#16, cume_sales#28] +Output [3]: [item_sk#21, d_date#17, cume_sales#23] +Input [5]: [item_sk#21, d_date#17, _w0#22, ss_item_sk#13, cume_sales#23] (29) Exchange -Input [3]: [item_sk#25, d_date#20, cume_sales#28] -Arguments: hashpartitioning(item_sk#25, d_date#20, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [3]: [item_sk#21, d_date#17, cume_sales#23] +Arguments: hashpartitioning(item_sk#21, d_date#17, 5), ENSURE_REQUIREMENTS, [plan_id=6] (30) Sort [codegen id : 12] -Input [3]: [item_sk#25, d_date#20, cume_sales#28] -Arguments: [item_sk#25 ASC NULLS FIRST, d_date#20 ASC NULLS FIRST], false, 0 +Input [3]: [item_sk#21, d_date#17, cume_sales#23] +Arguments: [item_sk#21 ASC NULLS FIRST, d_date#17 ASC NULLS FIRST], false, 0 (31) SortMergeJoin [codegen id : 13] -Left keys [2]: [item_sk#11, d_date#6] -Right keys [2]: [item_sk#25, d_date#20] +Left keys [2]: [item_sk#10, d_date#6] +Right keys [2]: [item_sk#21, d_date#17] Join condition: None (32) Project [codegen id : 13] -Output [4]: [CASE WHEN isnotnull(item_sk#11) THEN item_sk#11 ELSE item_sk#25 END AS item_sk#30, CASE WHEN isnotnull(d_date#6) THEN d_date#6 ELSE d_date#20 END AS d_date#31, cume_sales#14 AS web_sales#32, cume_sales#28 AS store_sales#33] -Input [6]: [item_sk#11, d_date#6, cume_sales#14, item_sk#25, d_date#20, cume_sales#28] +Output [4]: [CASE WHEN isnotnull(item_sk#10) THEN item_sk#10 ELSE item_sk#21 END AS item_sk#24, CASE WHEN isnotnull(d_date#6) THEN d_date#6 ELSE d_date#17 END AS d_date#25, cume_sales#12 AS web_sales#26, cume_sales#23 AS store_sales#27] +Input [6]: [item_sk#10, d_date#6, cume_sales#12, item_sk#21, d_date#17, cume_sales#23] (33) Exchange -Input [4]: [item_sk#30, d_date#31, web_sales#32, store_sales#33] -Arguments: hashpartitioning(item_sk#30, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [4]: [item_sk#24, d_date#25, web_sales#26, store_sales#27] +Arguments: hashpartitioning(item_sk#24, 5), ENSURE_REQUIREMENTS, [plan_id=7] (34) Sort [codegen id : 14] -Input [4]: [item_sk#30, d_date#31, web_sales#32, store_sales#33] -Arguments: [item_sk#30 ASC NULLS FIRST, d_date#31 ASC NULLS FIRST], false, 0 +Input [4]: [item_sk#24, d_date#25, web_sales#26, store_sales#27] +Arguments: [item_sk#24 ASC NULLS FIRST, d_date#25 ASC NULLS FIRST], false, 0 (35) Window -Input [4]: [item_sk#30, d_date#31, web_sales#32, store_sales#33] -Arguments: [max(web_sales#32) windowspecdefinition(item_sk#30, d_date#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#35, max(store_sales#33) windowspecdefinition(item_sk#30, d_date#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#36], [item_sk#30], [d_date#31 ASC NULLS FIRST] +Input [4]: [item_sk#24, d_date#25, web_sales#26, store_sales#27] +Arguments: [max(web_sales#26) windowspecdefinition(item_sk#24, d_date#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS web_cumulative#28, max(store_sales#27) windowspecdefinition(item_sk#24, d_date#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS store_cumulative#29], [item_sk#24], [d_date#25 ASC NULLS FIRST] (36) Filter [codegen id : 15] -Input [6]: [item_sk#30, d_date#31, web_sales#32, store_sales#33, web_cumulative#35, store_cumulative#36] -Condition : ((isnotnull(web_cumulative#35) AND isnotnull(store_cumulative#36)) AND (web_cumulative#35 > store_cumulative#36)) +Input [6]: [item_sk#24, d_date#25, web_sales#26, store_sales#27, web_cumulative#28, store_cumulative#29] +Condition : ((isnotnull(web_cumulative#28) AND isnotnull(store_cumulative#29)) AND (web_cumulative#28 > store_cumulative#29)) (37) TakeOrderedAndProject -Input [6]: [item_sk#30, d_date#31, web_sales#32, store_sales#33, web_cumulative#35, store_cumulative#36] -Arguments: 100, [item_sk#30 ASC NULLS FIRST, d_date#31 ASC NULLS FIRST], [item_sk#30, d_date#31, web_sales#32, store_sales#33, web_cumulative#35, store_cumulative#36] +Input [6]: [item_sk#24, d_date#25, web_sales#26, store_sales#27, web_cumulative#28, store_cumulative#29] +Arguments: 100, [item_sk#24 ASC NULLS FIRST, d_date#25 ASC NULLS FIRST], [item_sk#24, d_date#25, web_sales#26, store_sales#27, web_cumulative#28, store_cumulative#29] ===== Subqueries ===== @@ -216,27 +216,27 @@ BroadcastExchange (42) (38) Scan parquet default.date_dim -Output [3]: [d_date_sk#5, d_date#6, d_month_seq#37] +Output [3]: [d_date_sk#5, d_date#6, d_month_seq#30] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (39) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#5, d_date#6, d_month_seq#37] +Input [3]: [d_date_sk#5, d_date#6, d_month_seq#30] (40) Filter [codegen id : 1] -Input [3]: [d_date_sk#5, d_date#6, d_month_seq#37] -Condition : (((isnotnull(d_month_seq#37) AND (d_month_seq#37 >= 1200)) AND (d_month_seq#37 <= 1211)) AND isnotnull(d_date_sk#5)) +Input [3]: [d_date_sk#5, d_date#6, d_month_seq#30] +Condition : (((isnotnull(d_month_seq#30) AND (d_month_seq#30 >= 1200)) AND (d_month_seq#30 <= 1211)) AND isnotnull(d_date_sk#5)) (41) Project [codegen id : 1] Output [2]: [d_date_sk#5, d_date#6] -Input [3]: [d_date_sk#5, d_date#6, d_month_seq#37] +Input [3]: [d_date_sk#5, d_date#6, d_month_seq#30] (42) BroadcastExchange Input [2]: [d_date_sk#5, d_date#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] -Subquery:2 Hosting operator id = 16 Hosting Expression = ss_sold_date_sk#18 IN dynamicpruning#4 +Subquery:2 Hosting operator id = 16 Hosting Expression = ss_sold_date_sk#15 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt index 2c8c5e970052f..ac70b590af0cd 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt @@ -53,7 +53,7 @@ Input [4]: [i_item_sk#5, i_brand_id#6, i_brand#7, i_manager_id#8] (8) BroadcastExchange Input [3]: [i_item_sk#5, i_brand_id#6, i_brand#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#1] @@ -65,38 +65,38 @@ Output [4]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_brand_id#6, i_brand#7] Input [6]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#5, i_brand_id#6, i_brand#7] (11) ReusedExchange [Reuses operator id: 22] -Output [2]: [d_date_sk#10, d_year#11] +Output [2]: [d_date_sk#9, d_year#10] (12) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_sold_date_sk#3] -Right keys [1]: [d_date_sk#10] +Right keys [1]: [d_date_sk#9] Join condition: None (13) Project [codegen id : 3] -Output [4]: [d_year#11, ss_ext_sales_price#2, i_brand_id#6, i_brand#7] -Input [6]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_brand_id#6, i_brand#7, d_date_sk#10, d_year#11] +Output [4]: [d_year#10, ss_ext_sales_price#2, i_brand_id#6, i_brand#7] +Input [6]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_brand_id#6, i_brand#7, d_date_sk#9, d_year#10] (14) HashAggregate [codegen id : 3] -Input [4]: [d_year#11, ss_ext_sales_price#2, i_brand_id#6, i_brand#7] -Keys [3]: [d_year#11, i_brand#7, i_brand_id#6] +Input [4]: [d_year#10, ss_ext_sales_price#2, i_brand_id#6, i_brand#7] +Keys [3]: [d_year#10, i_brand#7, i_brand_id#6] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#12] -Results [4]: [d_year#11, i_brand#7, i_brand_id#6, sum#13] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#10, i_brand#7, i_brand_id#6, sum#12] (15) Exchange -Input [4]: [d_year#11, i_brand#7, i_brand_id#6, sum#13] -Arguments: hashpartitioning(d_year#11, i_brand#7, i_brand_id#6, 5), ENSURE_REQUIREMENTS, [id=#14] +Input [4]: [d_year#10, i_brand#7, i_brand_id#6, sum#12] +Arguments: hashpartitioning(d_year#10, i_brand#7, i_brand_id#6, 5), ENSURE_REQUIREMENTS, [plan_id=2] (16) HashAggregate [codegen id : 4] -Input [4]: [d_year#11, i_brand#7, i_brand_id#6, sum#13] -Keys [3]: [d_year#11, i_brand#7, i_brand_id#6] +Input [4]: [d_year#10, i_brand#7, i_brand_id#6, sum#12] +Keys [3]: [d_year#10, i_brand#7, i_brand_id#6] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#15] -Results [4]: [d_year#11, i_brand_id#6 AS brand_id#16, i_brand#7 AS brand#17, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#15,17,2) AS ext_price#18] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#13] +Results [4]: [d_year#10, i_brand_id#6 AS brand_id#14, i_brand#7 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#13,17,2) AS ext_price#16] (17) TakeOrderedAndProject -Input [4]: [d_year#11, brand_id#16, brand#17, ext_price#18] -Arguments: 100, [d_year#11 ASC NULLS FIRST, ext_price#18 DESC NULLS LAST, brand_id#16 ASC NULLS FIRST], [d_year#11, brand_id#16, brand#17, ext_price#18] +Input [4]: [d_year#10, brand_id#14, brand#15, ext_price#16] +Arguments: 100, [d_year#10 ASC NULLS FIRST, ext_price#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [d_year#10, brand_id#14, brand#15, ext_price#16] ===== Subqueries ===== @@ -109,25 +109,25 @@ BroadcastExchange (22) (18) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#11, d_moy#19] +Output [3]: [d_date_sk#9, d_year#10, d_moy#17] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_moy#19] +Input [3]: [d_date_sk#9, d_year#10, d_moy#17] (20) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_moy#19] -Condition : ((((isnotnull(d_moy#19) AND isnotnull(d_year#11)) AND (d_moy#19 = 11)) AND (d_year#11 = 2000)) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#9, d_year#10, d_moy#17] +Condition : ((((isnotnull(d_moy#17) AND isnotnull(d_year#10)) AND (d_moy#17 = 11)) AND (d_year#10 = 2000)) AND isnotnull(d_date_sk#9)) (21) Project [codegen id : 1] -Output [2]: [d_date_sk#10, d_year#11] -Input [3]: [d_date_sk#10, d_year#11, d_moy#19] +Output [2]: [d_date_sk#9, d_year#10] +Input [3]: [d_date_sk#9, d_year#10, d_moy#17] (22) BroadcastExchange -Input [2]: [d_date_sk#10, d_year#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] +Input [2]: [d_date_sk#9, d_year#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt index e6e106706fb07..c2dcd53f86ae3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt @@ -57,7 +57,7 @@ Condition : isnotnull(ss_item_sk#4) (8) BroadcastExchange Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [d_date_sk#1] @@ -69,55 +69,55 @@ Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5] Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] (11) Scan parquet default.item -Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,1), IsNotNull(i_item_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] (13) Filter [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] -Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 1)) AND isnotnull(i_item_sk#8)) +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 1)) AND isnotnull(i_item_sk#7)) (14) Project [codegen id : 2] -Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] (15) BroadcastExchange -Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#4] -Right keys [1]: [i_item_sk#8] +Right keys [1]: [i_item_sk#7] Join condition: None (17) Project [codegen id : 3] -Output [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#9, i_brand#10] -Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#8, i_brand_id#9, i_brand#10] +Output [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Input [6]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] (18) HashAggregate [codegen id : 3] -Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#9, i_brand#10] -Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Input [4]: [d_year#2, ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum#13] -Results [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] +Aggregate Attributes [1]: [sum#11] +Results [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] (19) Exchange -Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] -Arguments: hashpartitioning(d_year#2, i_brand#10, i_brand_id#9, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Arguments: hashpartitioning(d_year#2, i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 4] -Input [4]: [d_year#2, i_brand#10, i_brand_id#9, sum#14] -Keys [3]: [d_year#2, i_brand#10, i_brand_id#9] +Input [4]: [d_year#2, i_brand#9, i_brand_id#8, sum#12] +Keys [3]: [d_year#2, i_brand#9, i_brand_id#8] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#16] -Results [4]: [d_year#2, i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#16,17,2) AS ext_price#19] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [4]: [d_year#2, i_brand_id#8 AS brand_id#14, i_brand#9 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS ext_price#16] (21) TakeOrderedAndProject -Input [4]: [d_year#2, brand_id#17, brand#18, ext_price#19] -Arguments: 100, [d_year#2 ASC NULLS FIRST, ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [d_year#2, brand_id#17, brand#18, ext_price#19] +Input [4]: [d_year#2, brand_id#14, brand#15, ext_price#16] +Arguments: 100, [d_year#2 ASC NULLS FIRST, ext_price#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [d_year#2, brand_id#14, brand#15, ext_price#16] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt index 694852c3ed6b0..507650dfadc19 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt @@ -49,116 +49,116 @@ Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5] (5) BroadcastExchange Input [2]: [i_item_sk#1, i_manufact_id#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (6) Scan parquet default.store_sales -Output [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] +Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#14), dynamicpruningexpression(ss_sold_date_sk#14 IN dynamicpruning#15)] +PartitionFilters: [isnotnull(ss_sold_date_sk#13), dynamicpruningexpression(ss_sold_date_sk#13 IN dynamicpruning#14)] PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (7) ColumnarToRow -Input [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] (8) Filter -Input [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] -Condition : (isnotnull(ss_item_sk#11) AND isnotnull(ss_store_sk#12)) +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) (9) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] -Right keys [1]: [ss_item_sk#11] +Right keys [1]: [ss_item_sk#10] Join condition: None (10) Project [codegen id : 4] -Output [4]: [i_manufact_id#5, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] -Input [6]: [i_item_sk#1, i_manufact_id#5, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] +Output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Input [6]: [i_item_sk#1, i_manufact_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] (11) Scan parquet default.store -Output [1]: [s_store_sk#16] +Output [1]: [s_store_sk#15] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [1]: [s_store_sk#16] +Input [1]: [s_store_sk#15] (13) Filter [codegen id : 2] -Input [1]: [s_store_sk#16] -Condition : isnotnull(s_store_sk#16) +Input [1]: [s_store_sk#15] +Condition : isnotnull(s_store_sk#15) (14) BroadcastExchange -Input [1]: [s_store_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] +Input [1]: [s_store_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#12] -Right keys [1]: [s_store_sk#16] +Left keys [1]: [ss_store_sk#11] +Right keys [1]: [s_store_sk#15] Join condition: None (16) Project [codegen id : 4] -Output [3]: [i_manufact_id#5, ss_sales_price#13, ss_sold_date_sk#14] -Input [5]: [i_manufact_id#5, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14, s_store_sk#16] +Output [3]: [i_manufact_id#5, ss_sales_price#12, ss_sold_date_sk#13] +Input [5]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, s_store_sk#15] (17) ReusedExchange [Reuses operator id: 33] -Output [2]: [d_date_sk#18, d_qoy#19] +Output [2]: [d_date_sk#16, d_qoy#17] (18) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#14] -Right keys [1]: [d_date_sk#18] +Left keys [1]: [ss_sold_date_sk#13] +Right keys [1]: [d_date_sk#16] Join condition: None (19) Project [codegen id : 4] -Output [3]: [i_manufact_id#5, ss_sales_price#13, d_qoy#19] -Input [5]: [i_manufact_id#5, ss_sales_price#13, ss_sold_date_sk#14, d_date_sk#18, d_qoy#19] +Output [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#17] +Input [5]: [i_manufact_id#5, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#16, d_qoy#17] (20) HashAggregate [codegen id : 4] -Input [3]: [i_manufact_id#5, ss_sales_price#13, d_qoy#19] -Keys [2]: [i_manufact_id#5, d_qoy#19] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#13))] -Aggregate Attributes [1]: [sum#20] -Results [3]: [i_manufact_id#5, d_qoy#19, sum#21] +Input [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#17] +Keys [2]: [i_manufact_id#5, d_qoy#17] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [i_manufact_id#5, d_qoy#17, sum#19] (21) Exchange -Input [3]: [i_manufact_id#5, d_qoy#19, sum#21] -Arguments: hashpartitioning(i_manufact_id#5, d_qoy#19, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [3]: [i_manufact_id#5, d_qoy#17, sum#19] +Arguments: hashpartitioning(i_manufact_id#5, d_qoy#17, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 5] -Input [3]: [i_manufact_id#5, d_qoy#19, sum#21] -Keys [2]: [i_manufact_id#5, d_qoy#19] -Functions [1]: [sum(UnscaledValue(ss_sales_price#13))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#13))#23] -Results [3]: [i_manufact_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#23,17,2) AS sum_sales#24, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#23,17,2) AS _w0#25] +Input [3]: [i_manufact_id#5, d_qoy#17, sum#19] +Keys [2]: [i_manufact_id#5, d_qoy#17] +Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#20] +Results [3]: [i_manufact_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS _w0#22] (23) Exchange -Input [3]: [i_manufact_id#5, sum_sales#24, _w0#25] -Arguments: hashpartitioning(i_manufact_id#5, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_manufact_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) Sort [codegen id : 6] -Input [3]: [i_manufact_id#5, sum_sales#24, _w0#25] +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] Arguments: [i_manufact_id#5 ASC NULLS FIRST], false, 0 (25) Window -Input [3]: [i_manufact_id#5, sum_sales#24, _w0#25] -Arguments: [avg(_w0#25) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#27], [i_manufact_id#5] +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#23], [i_manufact_id#5] (26) Filter [codegen id : 7] -Input [4]: [i_manufact_id#5, sum_sales#24, _w0#25, avg_quarterly_sales#27] -Condition : (isnotnull(avg_quarterly_sales#27) AND ((avg_quarterly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] +Condition : (isnotnull(avg_quarterly_sales#23) AND ((avg_quarterly_sales#23 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) (27) Project [codegen id : 7] -Output [3]: [i_manufact_id#5, sum_sales#24, avg_quarterly_sales#27] -Input [4]: [i_manufact_id#5, sum_sales#24, _w0#25, avg_quarterly_sales#27] +Output [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] +Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] (28) TakeOrderedAndProject -Input [3]: [i_manufact_id#5, sum_sales#24, avg_quarterly_sales#27] -Arguments: 100, [avg_quarterly_sales#27 ASC NULLS FIRST, sum_sales#24 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#24, avg_quarterly_sales#27] +Input [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] +Arguments: 100, [avg_quarterly_sales#23 ASC NULLS FIRST, sum_sales#21 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#14 IN dynamicpruning#15 +Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#13 IN dynamicpruning#14 BroadcastExchange (33) +- * Project (32) +- * Filter (31) @@ -167,25 +167,25 @@ BroadcastExchange (33) (29) Scan parquet default.date_dim -Output [3]: [d_date_sk#18, d_month_seq#28, d_qoy#19] +Output [3]: [d_date_sk#16, d_month_seq#24, d_qoy#17] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_month_seq, [1200,1201,1202,1203,1204,1205,1206,1207,1208,1209,1210,1211]), IsNotNull(d_date_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#18, d_month_seq#28, d_qoy#19] +Input [3]: [d_date_sk#16, d_month_seq#24, d_qoy#17] (31) Filter [codegen id : 1] -Input [3]: [d_date_sk#18, d_month_seq#28, d_qoy#19] -Condition : (d_month_seq#28 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#18)) +Input [3]: [d_date_sk#16, d_month_seq#24, d_qoy#17] +Condition : (d_month_seq#24 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#16)) (32) Project [codegen id : 1] -Output [2]: [d_date_sk#18, d_qoy#19] -Input [3]: [d_date_sk#18, d_month_seq#28, d_qoy#19] +Output [2]: [d_date_sk#16, d_qoy#17] +Input [3]: [d_date_sk#16, d_month_seq#24, d_qoy#17] (33) BroadcastExchange -Input [2]: [d_date_sk#18, d_qoy#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] +Input [2]: [d_date_sk#16, d_qoy#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt index 91364dcce16e4..9e09bfb0dfb3f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt @@ -64,7 +64,7 @@ Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) (8) BroadcastExchange Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] @@ -76,85 +76,85 @@ Output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk Input [6]: [i_item_sk#1, i_manufact_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] (11) ReusedExchange [Reuses operator id: 33] -Output [2]: [d_date_sk#16, d_qoy#17] +Output [2]: [d_date_sk#15, d_qoy#16] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_sold_date_sk#13] -Right keys [1]: [d_date_sk#16] +Right keys [1]: [d_date_sk#15] Join condition: None (13) Project [codegen id : 4] -Output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#17] -Input [6]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#16, d_qoy#17] +Output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16] +Input [6]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#15, d_qoy#16] (14) Scan parquet default.store -Output [1]: [s_store_sk#18] +Output [1]: [s_store_sk#17] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [1]: [s_store_sk#18] +Input [1]: [s_store_sk#17] (16) Filter [codegen id : 3] -Input [1]: [s_store_sk#18] -Condition : isnotnull(s_store_sk#18) +Input [1]: [s_store_sk#17] +Condition : isnotnull(s_store_sk#17) (17) BroadcastExchange -Input [1]: [s_store_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] +Input [1]: [s_store_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#11] -Right keys [1]: [s_store_sk#18] +Right keys [1]: [s_store_sk#17] Join condition: None (19) Project [codegen id : 4] -Output [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#17] -Input [5]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#17, s_store_sk#18] +Output [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#16] +Input [5]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16, s_store_sk#17] (20) HashAggregate [codegen id : 4] -Input [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#17] -Keys [2]: [i_manufact_id#5, d_qoy#17] +Input [3]: [i_manufact_id#5, ss_sales_price#12, d_qoy#16] +Keys [2]: [i_manufact_id#5, d_qoy#16] Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] -Aggregate Attributes [1]: [sum#20] -Results [3]: [i_manufact_id#5, d_qoy#17, sum#21] +Aggregate Attributes [1]: [sum#18] +Results [3]: [i_manufact_id#5, d_qoy#16, sum#19] (21) Exchange -Input [3]: [i_manufact_id#5, d_qoy#17, sum#21] -Arguments: hashpartitioning(i_manufact_id#5, d_qoy#17, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [3]: [i_manufact_id#5, d_qoy#16, sum#19] +Arguments: hashpartitioning(i_manufact_id#5, d_qoy#16, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 5] -Input [3]: [i_manufact_id#5, d_qoy#17, sum#21] -Keys [2]: [i_manufact_id#5, d_qoy#17] +Input [3]: [i_manufact_id#5, d_qoy#16, sum#19] +Keys [2]: [i_manufact_id#5, d_qoy#16] Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#23] -Results [3]: [i_manufact_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#23,17,2) AS sum_sales#24, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#23,17,2) AS _w0#25] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#20] +Results [3]: [i_manufact_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS _w0#22] (23) Exchange -Input [3]: [i_manufact_id#5, sum_sales#24, _w0#25] -Arguments: hashpartitioning(i_manufact_id#5, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_manufact_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) Sort [codegen id : 6] -Input [3]: [i_manufact_id#5, sum_sales#24, _w0#25] +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] Arguments: [i_manufact_id#5 ASC NULLS FIRST], false, 0 (25) Window -Input [3]: [i_manufact_id#5, sum_sales#24, _w0#25] -Arguments: [avg(_w0#25) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#27], [i_manufact_id#5] +Input [3]: [i_manufact_id#5, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_manufact_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_quarterly_sales#23], [i_manufact_id#5] (26) Filter [codegen id : 7] -Input [4]: [i_manufact_id#5, sum_sales#24, _w0#25, avg_quarterly_sales#27] -Condition : (isnotnull(avg_quarterly_sales#27) AND ((avg_quarterly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#27 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] +Condition : (isnotnull(avg_quarterly_sales#23) AND ((avg_quarterly_sales#23 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) (27) Project [codegen id : 7] -Output [3]: [i_manufact_id#5, sum_sales#24, avg_quarterly_sales#27] -Input [4]: [i_manufact_id#5, sum_sales#24, _w0#25, avg_quarterly_sales#27] +Output [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] +Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23] (28) TakeOrderedAndProject -Input [3]: [i_manufact_id#5, sum_sales#24, avg_quarterly_sales#27] -Arguments: 100, [avg_quarterly_sales#27 ASC NULLS FIRST, sum_sales#24 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#24, avg_quarterly_sales#27] +Input [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] +Arguments: 100, [avg_quarterly_sales#23 ASC NULLS FIRST, sum_sales#21 ASC NULLS FIRST, i_manufact_id#5 ASC NULLS FIRST], [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23] ===== Subqueries ===== @@ -167,25 +167,25 @@ BroadcastExchange (33) (29) Scan parquet default.date_dim -Output [3]: [d_date_sk#16, d_month_seq#28, d_qoy#17] +Output [3]: [d_date_sk#15, d_month_seq#24, d_qoy#16] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_month_seq, [1200,1201,1202,1203,1204,1205,1206,1207,1208,1209,1210,1211]), IsNotNull(d_date_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#16, d_month_seq#28, d_qoy#17] +Input [3]: [d_date_sk#15, d_month_seq#24, d_qoy#16] (31) Filter [codegen id : 1] -Input [3]: [d_date_sk#16, d_month_seq#28, d_qoy#17] -Condition : (d_month_seq#28 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#16)) +Input [3]: [d_date_sk#15, d_month_seq#24, d_qoy#16] +Condition : (d_month_seq#24 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#15)) (32) Project [codegen id : 1] -Output [2]: [d_date_sk#16, d_qoy#17] -Input [3]: [d_date_sk#16, d_month_seq#28, d_qoy#17] +Output [2]: [d_date_sk#15, d_qoy#16] +Input [3]: [d_date_sk#15, d_month_seq#24, d_qoy#16] (33) BroadcastExchange -Input [2]: [d_date_sk#16, d_qoy#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] +Input [2]: [d_date_sk#15, d_qoy#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt index 543281ef9100e..9d1a92004c794 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt @@ -90,7 +90,7 @@ Condition : (isnotnull(s_county#4) AND isnotnull(s_state#5)) (7) BroadcastExchange Input [2]: [s_county#4, s_state#5] -Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, string, false]),false), [id=#6] +Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, string, false]),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 2] Left keys [2]: [ca_county#2, ca_state#3] @@ -103,240 +103,240 @@ Input [5]: [ca_address_sk#1, ca_county#2, ca_state#3, s_county#4, s_state#5] (10) BroadcastExchange Input [1]: [ca_address_sk#1] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (11) Scan parquet default.catalog_sales -Output [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] +Output [3]: [cs_bill_customer_sk#6, cs_item_sk#7, cs_sold_date_sk#8] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#10), dynamicpruningexpression(cs_sold_date_sk#10 IN dynamicpruning#11)] +PartitionFilters: [isnotnull(cs_sold_date_sk#8), dynamicpruningexpression(cs_sold_date_sk#8 IN dynamicpruning#9)] PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 3] -Input [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] +Input [3]: [cs_bill_customer_sk#6, cs_item_sk#7, cs_sold_date_sk#8] (13) Filter [codegen id : 3] -Input [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] -Condition : (isnotnull(cs_item_sk#9) AND isnotnull(cs_bill_customer_sk#8)) +Input [3]: [cs_bill_customer_sk#6, cs_item_sk#7, cs_sold_date_sk#8] +Condition : (isnotnull(cs_item_sk#7) AND isnotnull(cs_bill_customer_sk#6)) (14) Project [codegen id : 3] -Output [3]: [cs_sold_date_sk#10 AS sold_date_sk#12, cs_bill_customer_sk#8 AS customer_sk#13, cs_item_sk#9 AS item_sk#14] -Input [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] +Output [3]: [cs_sold_date_sk#8 AS sold_date_sk#10, cs_bill_customer_sk#6 AS customer_sk#11, cs_item_sk#7 AS item_sk#12] +Input [3]: [cs_bill_customer_sk#6, cs_item_sk#7, cs_sold_date_sk#8] (15) Scan parquet default.web_sales -Output [3]: [ws_item_sk#15, ws_bill_customer_sk#16, ws_sold_date_sk#17] +Output [3]: [ws_item_sk#13, ws_bill_customer_sk#14, ws_sold_date_sk#15] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#17), dynamicpruningexpression(ws_sold_date_sk#17 IN dynamicpruning#11)] +PartitionFilters: [isnotnull(ws_sold_date_sk#15), dynamicpruningexpression(ws_sold_date_sk#15 IN dynamicpruning#9)] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (16) ColumnarToRow [codegen id : 4] -Input [3]: [ws_item_sk#15, ws_bill_customer_sk#16, ws_sold_date_sk#17] +Input [3]: [ws_item_sk#13, ws_bill_customer_sk#14, ws_sold_date_sk#15] (17) Filter [codegen id : 4] -Input [3]: [ws_item_sk#15, ws_bill_customer_sk#16, ws_sold_date_sk#17] -Condition : (isnotnull(ws_item_sk#15) AND isnotnull(ws_bill_customer_sk#16)) +Input [3]: [ws_item_sk#13, ws_bill_customer_sk#14, ws_sold_date_sk#15] +Condition : (isnotnull(ws_item_sk#13) AND isnotnull(ws_bill_customer_sk#14)) (18) Project [codegen id : 4] -Output [3]: [ws_sold_date_sk#17 AS sold_date_sk#18, ws_bill_customer_sk#16 AS customer_sk#19, ws_item_sk#15 AS item_sk#20] -Input [3]: [ws_item_sk#15, ws_bill_customer_sk#16, ws_sold_date_sk#17] +Output [3]: [ws_sold_date_sk#15 AS sold_date_sk#16, ws_bill_customer_sk#14 AS customer_sk#17, ws_item_sk#13 AS item_sk#18] +Input [3]: [ws_item_sk#13, ws_bill_customer_sk#14, ws_sold_date_sk#15] (19) Union (20) ReusedExchange [Reuses operator id: 64] -Output [1]: [d_date_sk#21] +Output [1]: [d_date_sk#19] (21) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [sold_date_sk#12] -Right keys [1]: [d_date_sk#21] +Left keys [1]: [sold_date_sk#10] +Right keys [1]: [d_date_sk#19] Join condition: None (22) Project [codegen id : 7] -Output [2]: [customer_sk#13, item_sk#14] -Input [4]: [sold_date_sk#12, customer_sk#13, item_sk#14, d_date_sk#21] +Output [2]: [customer_sk#11, item_sk#12] +Input [4]: [sold_date_sk#10, customer_sk#11, item_sk#12, d_date_sk#19] (23) Scan parquet default.item -Output [3]: [i_item_sk#22, i_class#23, i_category#24] +Output [3]: [i_item_sk#20, i_class#21, i_category#22] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_category), IsNotNull(i_class), EqualTo(i_category,Women ), EqualTo(i_class,maternity ), IsNotNull(i_item_sk)] ReadSchema: struct (24) ColumnarToRow [codegen id : 6] -Input [3]: [i_item_sk#22, i_class#23, i_category#24] +Input [3]: [i_item_sk#20, i_class#21, i_category#22] (25) Filter [codegen id : 6] -Input [3]: [i_item_sk#22, i_class#23, i_category#24] -Condition : ((((isnotnull(i_category#24) AND isnotnull(i_class#23)) AND (i_category#24 = Women )) AND (i_class#23 = maternity )) AND isnotnull(i_item_sk#22)) +Input [3]: [i_item_sk#20, i_class#21, i_category#22] +Condition : ((((isnotnull(i_category#22) AND isnotnull(i_class#21)) AND (i_category#22 = Women )) AND (i_class#21 = maternity )) AND isnotnull(i_item_sk#20)) (26) Project [codegen id : 6] -Output [1]: [i_item_sk#22] -Input [3]: [i_item_sk#22, i_class#23, i_category#24] +Output [1]: [i_item_sk#20] +Input [3]: [i_item_sk#20, i_class#21, i_category#22] (27) BroadcastExchange -Input [1]: [i_item_sk#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [i_item_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (28) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [item_sk#14] -Right keys [1]: [i_item_sk#22] +Left keys [1]: [item_sk#12] +Right keys [1]: [i_item_sk#20] Join condition: None (29) Project [codegen id : 7] -Output [1]: [customer_sk#13] -Input [3]: [customer_sk#13, item_sk#14, i_item_sk#22] +Output [1]: [customer_sk#11] +Input [3]: [customer_sk#11, item_sk#12, i_item_sk#20] (30) Exchange -Input [1]: [customer_sk#13] -Arguments: hashpartitioning(customer_sk#13, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [1]: [customer_sk#11] +Arguments: hashpartitioning(customer_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=4] (31) Sort [codegen id : 8] -Input [1]: [customer_sk#13] -Arguments: [customer_sk#13 ASC NULLS FIRST], false, 0 +Input [1]: [customer_sk#11] +Arguments: [customer_sk#11 ASC NULLS FIRST], false, 0 (32) Scan parquet default.customer -Output [2]: [c_customer_sk#27, c_current_addr_sk#28] +Output [2]: [c_customer_sk#23, c_current_addr_sk#24] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (33) ColumnarToRow [codegen id : 9] -Input [2]: [c_customer_sk#27, c_current_addr_sk#28] +Input [2]: [c_customer_sk#23, c_current_addr_sk#24] (34) Filter [codegen id : 9] -Input [2]: [c_customer_sk#27, c_current_addr_sk#28] -Condition : (isnotnull(c_customer_sk#27) AND isnotnull(c_current_addr_sk#28)) +Input [2]: [c_customer_sk#23, c_current_addr_sk#24] +Condition : (isnotnull(c_customer_sk#23) AND isnotnull(c_current_addr_sk#24)) (35) Exchange -Input [2]: [c_customer_sk#27, c_current_addr_sk#28] -Arguments: hashpartitioning(c_customer_sk#27, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [2]: [c_customer_sk#23, c_current_addr_sk#24] +Arguments: hashpartitioning(c_customer_sk#23, 5), ENSURE_REQUIREMENTS, [plan_id=5] (36) Sort [codegen id : 10] -Input [2]: [c_customer_sk#27, c_current_addr_sk#28] -Arguments: [c_customer_sk#27 ASC NULLS FIRST], false, 0 +Input [2]: [c_customer_sk#23, c_current_addr_sk#24] +Arguments: [c_customer_sk#23 ASC NULLS FIRST], false, 0 (37) SortMergeJoin -Left keys [1]: [customer_sk#13] -Right keys [1]: [c_customer_sk#27] +Left keys [1]: [customer_sk#11] +Right keys [1]: [c_customer_sk#23] Join condition: None (38) Project -Output [2]: [c_customer_sk#27, c_current_addr_sk#28] -Input [3]: [customer_sk#13, c_customer_sk#27, c_current_addr_sk#28] +Output [2]: [c_customer_sk#23, c_current_addr_sk#24] +Input [3]: [customer_sk#11, c_customer_sk#23, c_current_addr_sk#24] (39) HashAggregate -Input [2]: [c_customer_sk#27, c_current_addr_sk#28] -Keys [2]: [c_customer_sk#27, c_current_addr_sk#28] +Input [2]: [c_customer_sk#23, c_current_addr_sk#24] +Keys [2]: [c_customer_sk#23, c_current_addr_sk#24] Functions: [] Aggregate Attributes: [] -Results [2]: [c_customer_sk#27, c_current_addr_sk#28] +Results [2]: [c_customer_sk#23, c_current_addr_sk#24] (40) HashAggregate -Input [2]: [c_customer_sk#27, c_current_addr_sk#28] -Keys [2]: [c_customer_sk#27, c_current_addr_sk#28] +Input [2]: [c_customer_sk#23, c_current_addr_sk#24] +Keys [2]: [c_customer_sk#23, c_current_addr_sk#24] Functions: [] Aggregate Attributes: [] -Results [2]: [c_customer_sk#27, c_current_addr_sk#28] +Results [2]: [c_customer_sk#23, c_current_addr_sk#24] (41) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ca_address_sk#1] -Right keys [1]: [c_current_addr_sk#28] +Right keys [1]: [c_current_addr_sk#24] Join condition: None (42) Project [codegen id : 11] -Output [1]: [c_customer_sk#27] -Input [3]: [ca_address_sk#1, c_customer_sk#27, c_current_addr_sk#28] +Output [1]: [c_customer_sk#23] +Input [3]: [ca_address_sk#1, c_customer_sk#23, c_current_addr_sk#24] (43) Sort [codegen id : 11] -Input [1]: [c_customer_sk#27] -Arguments: [c_customer_sk#27 ASC NULLS FIRST], false, 0 +Input [1]: [c_customer_sk#23] +Arguments: [c_customer_sk#23 ASC NULLS FIRST], false, 0 (44) Scan parquet default.store_sales -Output [3]: [ss_customer_sk#30, ss_ext_sales_price#31, ss_sold_date_sk#32] +Output [3]: [ss_customer_sk#25, ss_ext_sales_price#26, ss_sold_date_sk#27] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#32), dynamicpruningexpression(ss_sold_date_sk#32 IN dynamicpruning#33)] +PartitionFilters: [isnotnull(ss_sold_date_sk#27), dynamicpruningexpression(ss_sold_date_sk#27 IN dynamicpruning#28)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (45) ColumnarToRow [codegen id : 13] -Input [3]: [ss_customer_sk#30, ss_ext_sales_price#31, ss_sold_date_sk#32] +Input [3]: [ss_customer_sk#25, ss_ext_sales_price#26, ss_sold_date_sk#27] (46) Filter [codegen id : 13] -Input [3]: [ss_customer_sk#30, ss_ext_sales_price#31, ss_sold_date_sk#32] -Condition : isnotnull(ss_customer_sk#30) +Input [3]: [ss_customer_sk#25, ss_ext_sales_price#26, ss_sold_date_sk#27] +Condition : isnotnull(ss_customer_sk#25) (47) ReusedExchange [Reuses operator id: 69] -Output [1]: [d_date_sk#34] +Output [1]: [d_date_sk#29] (48) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ss_sold_date_sk#32] -Right keys [1]: [d_date_sk#34] +Left keys [1]: [ss_sold_date_sk#27] +Right keys [1]: [d_date_sk#29] Join condition: None (49) Project [codegen id : 13] -Output [2]: [ss_customer_sk#30, ss_ext_sales_price#31] -Input [4]: [ss_customer_sk#30, ss_ext_sales_price#31, ss_sold_date_sk#32, d_date_sk#34] +Output [2]: [ss_customer_sk#25, ss_ext_sales_price#26] +Input [4]: [ss_customer_sk#25, ss_ext_sales_price#26, ss_sold_date_sk#27, d_date_sk#29] (50) Exchange -Input [2]: [ss_customer_sk#30, ss_ext_sales_price#31] -Arguments: hashpartitioning(ss_customer_sk#30, 5), ENSURE_REQUIREMENTS, [id=#35] +Input [2]: [ss_customer_sk#25, ss_ext_sales_price#26] +Arguments: hashpartitioning(ss_customer_sk#25, 5), ENSURE_REQUIREMENTS, [plan_id=6] (51) Sort [codegen id : 14] -Input [2]: [ss_customer_sk#30, ss_ext_sales_price#31] -Arguments: [ss_customer_sk#30 ASC NULLS FIRST], false, 0 +Input [2]: [ss_customer_sk#25, ss_ext_sales_price#26] +Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0 (52) SortMergeJoin [codegen id : 15] -Left keys [1]: [c_customer_sk#27] -Right keys [1]: [ss_customer_sk#30] +Left keys [1]: [c_customer_sk#23] +Right keys [1]: [ss_customer_sk#25] Join condition: None (53) Project [codegen id : 15] -Output [2]: [c_customer_sk#27, ss_ext_sales_price#31] -Input [3]: [c_customer_sk#27, ss_customer_sk#30, ss_ext_sales_price#31] +Output [2]: [c_customer_sk#23, ss_ext_sales_price#26] +Input [3]: [c_customer_sk#23, ss_customer_sk#25, ss_ext_sales_price#26] (54) HashAggregate [codegen id : 15] -Input [2]: [c_customer_sk#27, ss_ext_sales_price#31] -Keys [1]: [c_customer_sk#27] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#31))] -Aggregate Attributes [1]: [sum#36] -Results [2]: [c_customer_sk#27, sum#37] +Input [2]: [c_customer_sk#23, ss_ext_sales_price#26] +Keys [1]: [c_customer_sk#23] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#26))] +Aggregate Attributes [1]: [sum#30] +Results [2]: [c_customer_sk#23, sum#31] (55) HashAggregate [codegen id : 15] -Input [2]: [c_customer_sk#27, sum#37] -Keys [1]: [c_customer_sk#27] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#31))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#31))#38] -Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#31))#38,17,2)) / 50.00), DecimalType(21,6)) as int) AS segment#39] +Input [2]: [c_customer_sk#23, sum#31] +Keys [1]: [c_customer_sk#23] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#26))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#26))#32] +Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#26))#32,17,2)) / 50.00), DecimalType(21,6)) as int) AS segment#33] (56) HashAggregate [codegen id : 15] -Input [1]: [segment#39] -Keys [1]: [segment#39] +Input [1]: [segment#33] +Keys [1]: [segment#33] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#40] -Results [2]: [segment#39, count#41] +Aggregate Attributes [1]: [count#34] +Results [2]: [segment#33, count#35] (57) Exchange -Input [2]: [segment#39, count#41] -Arguments: hashpartitioning(segment#39, 5), ENSURE_REQUIREMENTS, [id=#42] +Input [2]: [segment#33, count#35] +Arguments: hashpartitioning(segment#33, 5), ENSURE_REQUIREMENTS, [plan_id=7] (58) HashAggregate [codegen id : 16] -Input [2]: [segment#39, count#41] -Keys [1]: [segment#39] +Input [2]: [segment#33, count#35] +Keys [1]: [segment#33] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#43] -Results [3]: [segment#39, count(1)#43 AS num_customers#44, (segment#39 * 50) AS segment_base#45] +Aggregate Attributes [1]: [count(1)#36] +Results [3]: [segment#33, count(1)#36 AS num_customers#37, (segment#33 * 50) AS segment_base#38] (59) TakeOrderedAndProject -Input [3]: [segment#39, num_customers#44, segment_base#45] -Arguments: 100, [segment#39 ASC NULLS FIRST, num_customers#44 ASC NULLS FIRST], [segment#39, num_customers#44, segment_base#45] +Input [3]: [segment#33, num_customers#37, segment_base#38] +Arguments: 100, [segment#33 ASC NULLS FIRST, num_customers#37 ASC NULLS FIRST], [segment#33, num_customers#37, segment_base#38] ===== Subqueries ===== -Subquery:1 Hosting operator id = 11 Hosting Expression = cs_sold_date_sk#10 IN dynamicpruning#11 +Subquery:1 Hosting operator id = 11 Hosting Expression = cs_sold_date_sk#8 IN dynamicpruning#9 BroadcastExchange (64) +- * Project (63) +- * Filter (62) @@ -345,30 +345,30 @@ BroadcastExchange (64) (60) Scan parquet default.date_dim -Output [3]: [d_date_sk#21, d_year#46, d_moy#47] +Output [3]: [d_date_sk#19, d_year#39, d_moy#40] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)] ReadSchema: struct (61) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#21, d_year#46, d_moy#47] +Input [3]: [d_date_sk#19, d_year#39, d_moy#40] (62) Filter [codegen id : 1] -Input [3]: [d_date_sk#21, d_year#46, d_moy#47] -Condition : ((((isnotnull(d_moy#47) AND isnotnull(d_year#46)) AND (d_moy#47 = 12)) AND (d_year#46 = 1998)) AND isnotnull(d_date_sk#21)) +Input [3]: [d_date_sk#19, d_year#39, d_moy#40] +Condition : ((((isnotnull(d_moy#40) AND isnotnull(d_year#39)) AND (d_moy#40 = 12)) AND (d_year#39 = 1998)) AND isnotnull(d_date_sk#19)) (63) Project [codegen id : 1] -Output [1]: [d_date_sk#21] -Input [3]: [d_date_sk#21, d_year#46, d_moy#47] +Output [1]: [d_date_sk#19] +Input [3]: [d_date_sk#19, d_year#39, d_moy#40] (64) BroadcastExchange -Input [1]: [d_date_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#48] +Input [1]: [d_date_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] -Subquery:2 Hosting operator id = 15 Hosting Expression = ws_sold_date_sk#17 IN dynamicpruning#11 +Subquery:2 Hosting operator id = 15 Hosting Expression = ws_sold_date_sk#15 IN dynamicpruning#9 -Subquery:3 Hosting operator id = 44 Hosting Expression = ss_sold_date_sk#32 IN dynamicpruning#33 +Subquery:3 Hosting operator id = 44 Hosting Expression = ss_sold_date_sk#27 IN dynamicpruning#28 BroadcastExchange (69) +- * Project (68) +- * Filter (67) @@ -377,28 +377,28 @@ BroadcastExchange (69) (65) Scan parquet default.date_dim -Output [2]: [d_date_sk#34, d_month_seq#49] +Output [2]: [d_date_sk#29, d_month_seq#41] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] ReadSchema: struct (66) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#34, d_month_seq#49] +Input [2]: [d_date_sk#29, d_month_seq#41] (67) Filter [codegen id : 1] -Input [2]: [d_date_sk#34, d_month_seq#49] -Condition : (((isnotnull(d_month_seq#49) AND (d_month_seq#49 >= Subquery scalar-subquery#50, [id=#51])) AND (d_month_seq#49 <= Subquery scalar-subquery#52, [id=#53])) AND isnotnull(d_date_sk#34)) +Input [2]: [d_date_sk#29, d_month_seq#41] +Condition : (((isnotnull(d_month_seq#41) AND (d_month_seq#41 >= Subquery scalar-subquery#42, [id=#43])) AND (d_month_seq#41 <= Subquery scalar-subquery#44, [id=#45])) AND isnotnull(d_date_sk#29)) (68) Project [codegen id : 1] -Output [1]: [d_date_sk#34] -Input [2]: [d_date_sk#34, d_month_seq#49] +Output [1]: [d_date_sk#29] +Input [2]: [d_date_sk#29, d_month_seq#41] (69) BroadcastExchange -Input [1]: [d_date_sk#34] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#54] +Input [1]: [d_date_sk#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] -Subquery:4 Hosting operator id = 67 Hosting Expression = Subquery scalar-subquery#50, [id=#51] +Subquery:4 Hosting operator id = 67 Hosting Expression = Subquery scalar-subquery#42, [id=#43] * HashAggregate (76) +- Exchange (75) +- * HashAggregate (74) @@ -409,42 +409,42 @@ Subquery:4 Hosting operator id = 67 Hosting Expression = Subquery scalar-subquer (70) Scan parquet default.date_dim -Output [3]: [d_month_seq#55, d_year#56, d_moy#57] +Output [3]: [d_month_seq#46, d_year#47, d_moy#48] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] ReadSchema: struct (71) ColumnarToRow [codegen id : 1] -Input [3]: [d_month_seq#55, d_year#56, d_moy#57] +Input [3]: [d_month_seq#46, d_year#47, d_moy#48] (72) Filter [codegen id : 1] -Input [3]: [d_month_seq#55, d_year#56, d_moy#57] -Condition : (((isnotnull(d_year#56) AND isnotnull(d_moy#57)) AND (d_year#56 = 1998)) AND (d_moy#57 = 12)) +Input [3]: [d_month_seq#46, d_year#47, d_moy#48] +Condition : (((isnotnull(d_year#47) AND isnotnull(d_moy#48)) AND (d_year#47 = 1998)) AND (d_moy#48 = 12)) (73) Project [codegen id : 1] -Output [1]: [(d_month_seq#55 + 1) AS (d_month_seq + 1)#58] -Input [3]: [d_month_seq#55, d_year#56, d_moy#57] +Output [1]: [(d_month_seq#46 + 1) AS (d_month_seq + 1)#49] +Input [3]: [d_month_seq#46, d_year#47, d_moy#48] (74) HashAggregate [codegen id : 1] -Input [1]: [(d_month_seq + 1)#58] -Keys [1]: [(d_month_seq + 1)#58] +Input [1]: [(d_month_seq + 1)#49] +Keys [1]: [(d_month_seq + 1)#49] Functions: [] Aggregate Attributes: [] -Results [1]: [(d_month_seq + 1)#58] +Results [1]: [(d_month_seq + 1)#49] (75) Exchange -Input [1]: [(d_month_seq + 1)#58] -Arguments: hashpartitioning((d_month_seq + 1)#58, 5), ENSURE_REQUIREMENTS, [id=#59] +Input [1]: [(d_month_seq + 1)#49] +Arguments: hashpartitioning((d_month_seq + 1)#49, 5), ENSURE_REQUIREMENTS, [plan_id=10] (76) HashAggregate [codegen id : 2] -Input [1]: [(d_month_seq + 1)#58] -Keys [1]: [(d_month_seq + 1)#58] +Input [1]: [(d_month_seq + 1)#49] +Keys [1]: [(d_month_seq + 1)#49] Functions: [] Aggregate Attributes: [] -Results [1]: [(d_month_seq + 1)#58] +Results [1]: [(d_month_seq + 1)#49] -Subquery:5 Hosting operator id = 67 Hosting Expression = Subquery scalar-subquery#52, [id=#53] +Subquery:5 Hosting operator id = 67 Hosting Expression = Subquery scalar-subquery#44, [id=#45] * HashAggregate (83) +- Exchange (82) +- * HashAggregate (81) @@ -455,39 +455,39 @@ Subquery:5 Hosting operator id = 67 Hosting Expression = Subquery scalar-subquer (77) Scan parquet default.date_dim -Output [3]: [d_month_seq#60, d_year#61, d_moy#62] +Output [3]: [d_month_seq#50, d_year#51, d_moy#52] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] ReadSchema: struct (78) ColumnarToRow [codegen id : 1] -Input [3]: [d_month_seq#60, d_year#61, d_moy#62] +Input [3]: [d_month_seq#50, d_year#51, d_moy#52] (79) Filter [codegen id : 1] -Input [3]: [d_month_seq#60, d_year#61, d_moy#62] -Condition : (((isnotnull(d_year#61) AND isnotnull(d_moy#62)) AND (d_year#61 = 1998)) AND (d_moy#62 = 12)) +Input [3]: [d_month_seq#50, d_year#51, d_moy#52] +Condition : (((isnotnull(d_year#51) AND isnotnull(d_moy#52)) AND (d_year#51 = 1998)) AND (d_moy#52 = 12)) (80) Project [codegen id : 1] -Output [1]: [(d_month_seq#60 + 3) AS (d_month_seq + 3)#63] -Input [3]: [d_month_seq#60, d_year#61, d_moy#62] +Output [1]: [(d_month_seq#50 + 3) AS (d_month_seq + 3)#53] +Input [3]: [d_month_seq#50, d_year#51, d_moy#52] (81) HashAggregate [codegen id : 1] -Input [1]: [(d_month_seq + 3)#63] -Keys [1]: [(d_month_seq + 3)#63] +Input [1]: [(d_month_seq + 3)#53] +Keys [1]: [(d_month_seq + 3)#53] Functions: [] Aggregate Attributes: [] -Results [1]: [(d_month_seq + 3)#63] +Results [1]: [(d_month_seq + 3)#53] (82) Exchange -Input [1]: [(d_month_seq + 3)#63] -Arguments: hashpartitioning((d_month_seq + 3)#63, 5), ENSURE_REQUIREMENTS, [id=#64] +Input [1]: [(d_month_seq + 3)#53] +Arguments: hashpartitioning((d_month_seq + 3)#53, 5), ENSURE_REQUIREMENTS, [plan_id=11] (83) HashAggregate [codegen id : 2] -Input [1]: [(d_month_seq + 3)#63] -Keys [1]: [(d_month_seq + 3)#63] +Input [1]: [(d_month_seq + 3)#53] +Keys [1]: [(d_month_seq + 3)#53] Functions: [] Aggregate Attributes: [] -Results [1]: [(d_month_seq + 3)#63] +Results [1]: [(d_month_seq + 3)#53] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt index 4c65587bee530..3377b58db4712 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt @@ -117,7 +117,7 @@ Input [3]: [i_item_sk#14, i_class#15, i_category#16] (14) BroadcastExchange Input [1]: [i_item_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (15) BroadcastHashJoin [codegen id : 6] Left keys [1]: [item_sk#7] @@ -129,195 +129,195 @@ Output [2]: [sold_date_sk#5, customer_sk#6] Input [4]: [sold_date_sk#5, customer_sk#6, item_sk#7, i_item_sk#14] (17) ReusedExchange [Reuses operator id: 61] -Output [1]: [d_date_sk#18] +Output [1]: [d_date_sk#17] (18) BroadcastHashJoin [codegen id : 6] Left keys [1]: [sold_date_sk#5] -Right keys [1]: [d_date_sk#18] +Right keys [1]: [d_date_sk#17] Join condition: None (19) Project [codegen id : 6] Output [1]: [customer_sk#6] -Input [3]: [sold_date_sk#5, customer_sk#6, d_date_sk#18] +Input [3]: [sold_date_sk#5, customer_sk#6, d_date_sk#17] (20) Scan parquet default.customer -Output [2]: [c_customer_sk#19, c_current_addr_sk#20] +Output [2]: [c_customer_sk#18, c_current_addr_sk#19] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 5] -Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Input [2]: [c_customer_sk#18, c_current_addr_sk#19] (22) Filter [codegen id : 5] -Input [2]: [c_customer_sk#19, c_current_addr_sk#20] -Condition : (isnotnull(c_customer_sk#19) AND isnotnull(c_current_addr_sk#20)) +Input [2]: [c_customer_sk#18, c_current_addr_sk#19] +Condition : (isnotnull(c_customer_sk#18) AND isnotnull(c_current_addr_sk#19)) (23) BroadcastExchange -Input [2]: [c_customer_sk#19, c_current_addr_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] +Input [2]: [c_customer_sk#18, c_current_addr_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (24) BroadcastHashJoin [codegen id : 6] Left keys [1]: [customer_sk#6] -Right keys [1]: [c_customer_sk#19] +Right keys [1]: [c_customer_sk#18] Join condition: None (25) Project [codegen id : 6] -Output [2]: [c_customer_sk#19, c_current_addr_sk#20] -Input [3]: [customer_sk#6, c_customer_sk#19, c_current_addr_sk#20] +Output [2]: [c_customer_sk#18, c_current_addr_sk#19] +Input [3]: [customer_sk#6, c_customer_sk#18, c_current_addr_sk#19] (26) HashAggregate [codegen id : 6] -Input [2]: [c_customer_sk#19, c_current_addr_sk#20] -Keys [2]: [c_customer_sk#19, c_current_addr_sk#20] +Input [2]: [c_customer_sk#18, c_current_addr_sk#19] +Keys [2]: [c_customer_sk#18, c_current_addr_sk#19] Functions: [] Aggregate Attributes: [] -Results [2]: [c_customer_sk#19, c_current_addr_sk#20] +Results [2]: [c_customer_sk#18, c_current_addr_sk#19] (27) Exchange -Input [2]: [c_customer_sk#19, c_current_addr_sk#20] -Arguments: hashpartitioning(c_customer_sk#19, c_current_addr_sk#20, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [2]: [c_customer_sk#18, c_current_addr_sk#19] +Arguments: hashpartitioning(c_customer_sk#18, c_current_addr_sk#19, 5), ENSURE_REQUIREMENTS, [plan_id=3] (28) HashAggregate [codegen id : 11] -Input [2]: [c_customer_sk#19, c_current_addr_sk#20] -Keys [2]: [c_customer_sk#19, c_current_addr_sk#20] +Input [2]: [c_customer_sk#18, c_current_addr_sk#19] +Keys [2]: [c_customer_sk#18, c_current_addr_sk#19] Functions: [] Aggregate Attributes: [] -Results [2]: [c_customer_sk#19, c_current_addr_sk#20] +Results [2]: [c_customer_sk#18, c_current_addr_sk#19] (29) Scan parquet default.store_sales -Output [3]: [ss_customer_sk#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Output [3]: [ss_customer_sk#20, ss_ext_sales_price#21, ss_sold_date_sk#22] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#25), dynamicpruningexpression(ss_sold_date_sk#25 IN dynamicpruning#26)] +PartitionFilters: [isnotnull(ss_sold_date_sk#22), dynamicpruningexpression(ss_sold_date_sk#22 IN dynamicpruning#23)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 7] -Input [3]: [ss_customer_sk#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Input [3]: [ss_customer_sk#20, ss_ext_sales_price#21, ss_sold_date_sk#22] (31) Filter [codegen id : 7] -Input [3]: [ss_customer_sk#23, ss_ext_sales_price#24, ss_sold_date_sk#25] -Condition : isnotnull(ss_customer_sk#23) +Input [3]: [ss_customer_sk#20, ss_ext_sales_price#21, ss_sold_date_sk#22] +Condition : isnotnull(ss_customer_sk#20) (32) BroadcastExchange -Input [3]: [ss_customer_sk#23, ss_ext_sales_price#24, ss_sold_date_sk#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] +Input [3]: [ss_customer_sk#20, ss_ext_sales_price#21, ss_sold_date_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (33) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [c_customer_sk#19] -Right keys [1]: [ss_customer_sk#23] +Left keys [1]: [c_customer_sk#18] +Right keys [1]: [ss_customer_sk#20] Join condition: None (34) Project [codegen id : 11] -Output [4]: [c_customer_sk#19, c_current_addr_sk#20, ss_ext_sales_price#24, ss_sold_date_sk#25] -Input [5]: [c_customer_sk#19, c_current_addr_sk#20, ss_customer_sk#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Output [4]: [c_customer_sk#18, c_current_addr_sk#19, ss_ext_sales_price#21, ss_sold_date_sk#22] +Input [5]: [c_customer_sk#18, c_current_addr_sk#19, ss_customer_sk#20, ss_ext_sales_price#21, ss_sold_date_sk#22] (35) Scan parquet default.customer_address -Output [3]: [ca_address_sk#28, ca_county#29, ca_state#30] +Output [3]: [ca_address_sk#24, ca_county#25, ca_state#26] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_county), IsNotNull(ca_state)] ReadSchema: struct (36) ColumnarToRow [codegen id : 8] -Input [3]: [ca_address_sk#28, ca_county#29, ca_state#30] +Input [3]: [ca_address_sk#24, ca_county#25, ca_state#26] (37) Filter [codegen id : 8] -Input [3]: [ca_address_sk#28, ca_county#29, ca_state#30] -Condition : ((isnotnull(ca_address_sk#28) AND isnotnull(ca_county#29)) AND isnotnull(ca_state#30)) +Input [3]: [ca_address_sk#24, ca_county#25, ca_state#26] +Condition : ((isnotnull(ca_address_sk#24) AND isnotnull(ca_county#25)) AND isnotnull(ca_state#26)) (38) BroadcastExchange -Input [3]: [ca_address_sk#28, ca_county#29, ca_state#30] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] +Input [3]: [ca_address_sk#24, ca_county#25, ca_state#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (39) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [c_current_addr_sk#20] -Right keys [1]: [ca_address_sk#28] +Left keys [1]: [c_current_addr_sk#19] +Right keys [1]: [ca_address_sk#24] Join condition: None (40) Project [codegen id : 11] -Output [5]: [c_customer_sk#19, ss_ext_sales_price#24, ss_sold_date_sk#25, ca_county#29, ca_state#30] -Input [7]: [c_customer_sk#19, c_current_addr_sk#20, ss_ext_sales_price#24, ss_sold_date_sk#25, ca_address_sk#28, ca_county#29, ca_state#30] +Output [5]: [c_customer_sk#18, ss_ext_sales_price#21, ss_sold_date_sk#22, ca_county#25, ca_state#26] +Input [7]: [c_customer_sk#18, c_current_addr_sk#19, ss_ext_sales_price#21, ss_sold_date_sk#22, ca_address_sk#24, ca_county#25, ca_state#26] (41) Scan parquet default.store -Output [2]: [s_county#32, s_state#33] +Output [2]: [s_county#27, s_state#28] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_county), IsNotNull(s_state)] ReadSchema: struct (42) ColumnarToRow [codegen id : 9] -Input [2]: [s_county#32, s_state#33] +Input [2]: [s_county#27, s_state#28] (43) Filter [codegen id : 9] -Input [2]: [s_county#32, s_state#33] -Condition : (isnotnull(s_county#32) AND isnotnull(s_state#33)) +Input [2]: [s_county#27, s_state#28] +Condition : (isnotnull(s_county#27) AND isnotnull(s_state#28)) (44) BroadcastExchange -Input [2]: [s_county#32, s_state#33] -Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, string, false]),false), [id=#34] +Input [2]: [s_county#27, s_state#28] +Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, string, false]),false), [plan_id=6] (45) BroadcastHashJoin [codegen id : 11] -Left keys [2]: [ca_county#29, ca_state#30] -Right keys [2]: [s_county#32, s_state#33] +Left keys [2]: [ca_county#25, ca_state#26] +Right keys [2]: [s_county#27, s_state#28] Join condition: None (46) Project [codegen id : 11] -Output [3]: [c_customer_sk#19, ss_ext_sales_price#24, ss_sold_date_sk#25] -Input [7]: [c_customer_sk#19, ss_ext_sales_price#24, ss_sold_date_sk#25, ca_county#29, ca_state#30, s_county#32, s_state#33] +Output [3]: [c_customer_sk#18, ss_ext_sales_price#21, ss_sold_date_sk#22] +Input [7]: [c_customer_sk#18, ss_ext_sales_price#21, ss_sold_date_sk#22, ca_county#25, ca_state#26, s_county#27, s_state#28] (47) ReusedExchange [Reuses operator id: 66] -Output [1]: [d_date_sk#35] +Output [1]: [d_date_sk#29] (48) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ss_sold_date_sk#25] -Right keys [1]: [d_date_sk#35] +Left keys [1]: [ss_sold_date_sk#22] +Right keys [1]: [d_date_sk#29] Join condition: None (49) Project [codegen id : 11] -Output [2]: [c_customer_sk#19, ss_ext_sales_price#24] -Input [4]: [c_customer_sk#19, ss_ext_sales_price#24, ss_sold_date_sk#25, d_date_sk#35] +Output [2]: [c_customer_sk#18, ss_ext_sales_price#21] +Input [4]: [c_customer_sk#18, ss_ext_sales_price#21, ss_sold_date_sk#22, d_date_sk#29] (50) HashAggregate [codegen id : 11] -Input [2]: [c_customer_sk#19, ss_ext_sales_price#24] -Keys [1]: [c_customer_sk#19] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#24))] -Aggregate Attributes [1]: [sum#36] -Results [2]: [c_customer_sk#19, sum#37] +Input [2]: [c_customer_sk#18, ss_ext_sales_price#21] +Keys [1]: [c_customer_sk#18] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#21))] +Aggregate Attributes [1]: [sum#30] +Results [2]: [c_customer_sk#18, sum#31] (51) Exchange -Input [2]: [c_customer_sk#19, sum#37] -Arguments: hashpartitioning(c_customer_sk#19, 5), ENSURE_REQUIREMENTS, [id=#38] +Input [2]: [c_customer_sk#18, sum#31] +Arguments: hashpartitioning(c_customer_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=7] (52) HashAggregate [codegen id : 12] -Input [2]: [c_customer_sk#19, sum#37] -Keys [1]: [c_customer_sk#19] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#24))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#24))#39] -Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#24))#39,17,2)) / 50.00), DecimalType(21,6)) as int) AS segment#40] +Input [2]: [c_customer_sk#18, sum#31] +Keys [1]: [c_customer_sk#18] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#21))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#21))#32] +Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#21))#32,17,2)) / 50.00), DecimalType(21,6)) as int) AS segment#33] (53) HashAggregate [codegen id : 12] -Input [1]: [segment#40] -Keys [1]: [segment#40] +Input [1]: [segment#33] +Keys [1]: [segment#33] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#41] -Results [2]: [segment#40, count#42] +Aggregate Attributes [1]: [count#34] +Results [2]: [segment#33, count#35] (54) Exchange -Input [2]: [segment#40, count#42] -Arguments: hashpartitioning(segment#40, 5), ENSURE_REQUIREMENTS, [id=#43] +Input [2]: [segment#33, count#35] +Arguments: hashpartitioning(segment#33, 5), ENSURE_REQUIREMENTS, [plan_id=8] (55) HashAggregate [codegen id : 13] -Input [2]: [segment#40, count#42] -Keys [1]: [segment#40] +Input [2]: [segment#33, count#35] +Keys [1]: [segment#33] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#44] -Results [3]: [segment#40, count(1)#44 AS num_customers#45, (segment#40 * 50) AS segment_base#46] +Aggregate Attributes [1]: [count(1)#36] +Results [3]: [segment#33, count(1)#36 AS num_customers#37, (segment#33 * 50) AS segment_base#38] (56) TakeOrderedAndProject -Input [3]: [segment#40, num_customers#45, segment_base#46] -Arguments: 100, [segment#40 ASC NULLS FIRST, num_customers#45 ASC NULLS FIRST], [segment#40, num_customers#45, segment_base#46] +Input [3]: [segment#33, num_customers#37, segment_base#38] +Arguments: 100, [segment#33 ASC NULLS FIRST, num_customers#37 ASC NULLS FIRST], [segment#33, num_customers#37, segment_base#38] ===== Subqueries ===== @@ -330,30 +330,30 @@ BroadcastExchange (61) (57) Scan parquet default.date_dim -Output [3]: [d_date_sk#18, d_year#47, d_moy#48] +Output [3]: [d_date_sk#17, d_year#39, d_moy#40] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,12), EqualTo(d_year,1998), IsNotNull(d_date_sk)] ReadSchema: struct (58) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#18, d_year#47, d_moy#48] +Input [3]: [d_date_sk#17, d_year#39, d_moy#40] (59) Filter [codegen id : 1] -Input [3]: [d_date_sk#18, d_year#47, d_moy#48] -Condition : ((((isnotnull(d_moy#48) AND isnotnull(d_year#47)) AND (d_moy#48 = 12)) AND (d_year#47 = 1998)) AND isnotnull(d_date_sk#18)) +Input [3]: [d_date_sk#17, d_year#39, d_moy#40] +Condition : ((((isnotnull(d_moy#40) AND isnotnull(d_year#39)) AND (d_moy#40 = 12)) AND (d_year#39 = 1998)) AND isnotnull(d_date_sk#17)) (60) Project [codegen id : 1] -Output [1]: [d_date_sk#18] -Input [3]: [d_date_sk#18, d_year#47, d_moy#48] +Output [1]: [d_date_sk#17] +Input [3]: [d_date_sk#17, d_year#39, d_moy#40] (61) BroadcastExchange -Input [1]: [d_date_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#49] +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] Subquery:2 Hosting operator id = 5 Hosting Expression = ws_sold_date_sk#10 IN dynamicpruning#4 -Subquery:3 Hosting operator id = 29 Hosting Expression = ss_sold_date_sk#25 IN dynamicpruning#26 +Subquery:3 Hosting operator id = 29 Hosting Expression = ss_sold_date_sk#22 IN dynamicpruning#23 BroadcastExchange (66) +- * Project (65) +- * Filter (64) @@ -362,28 +362,28 @@ BroadcastExchange (66) (62) Scan parquet default.date_dim -Output [2]: [d_date_sk#35, d_month_seq#50] +Output [2]: [d_date_sk#29, d_month_seq#41] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] ReadSchema: struct (63) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#35, d_month_seq#50] +Input [2]: [d_date_sk#29, d_month_seq#41] (64) Filter [codegen id : 1] -Input [2]: [d_date_sk#35, d_month_seq#50] -Condition : (((isnotnull(d_month_seq#50) AND (d_month_seq#50 >= Subquery scalar-subquery#51, [id=#52])) AND (d_month_seq#50 <= Subquery scalar-subquery#53, [id=#54])) AND isnotnull(d_date_sk#35)) +Input [2]: [d_date_sk#29, d_month_seq#41] +Condition : (((isnotnull(d_month_seq#41) AND (d_month_seq#41 >= Subquery scalar-subquery#42, [id=#43])) AND (d_month_seq#41 <= Subquery scalar-subquery#44, [id=#45])) AND isnotnull(d_date_sk#29)) (65) Project [codegen id : 1] -Output [1]: [d_date_sk#35] -Input [2]: [d_date_sk#35, d_month_seq#50] +Output [1]: [d_date_sk#29] +Input [2]: [d_date_sk#29, d_month_seq#41] (66) BroadcastExchange -Input [1]: [d_date_sk#35] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#55] +Input [1]: [d_date_sk#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] -Subquery:4 Hosting operator id = 64 Hosting Expression = Subquery scalar-subquery#51, [id=#52] +Subquery:4 Hosting operator id = 64 Hosting Expression = Subquery scalar-subquery#42, [id=#43] * HashAggregate (73) +- Exchange (72) +- * HashAggregate (71) @@ -394,42 +394,42 @@ Subquery:4 Hosting operator id = 64 Hosting Expression = Subquery scalar-subquer (67) Scan parquet default.date_dim -Output [3]: [d_month_seq#56, d_year#57, d_moy#58] +Output [3]: [d_month_seq#46, d_year#47, d_moy#48] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] ReadSchema: struct (68) ColumnarToRow [codegen id : 1] -Input [3]: [d_month_seq#56, d_year#57, d_moy#58] +Input [3]: [d_month_seq#46, d_year#47, d_moy#48] (69) Filter [codegen id : 1] -Input [3]: [d_month_seq#56, d_year#57, d_moy#58] -Condition : (((isnotnull(d_year#57) AND isnotnull(d_moy#58)) AND (d_year#57 = 1998)) AND (d_moy#58 = 12)) +Input [3]: [d_month_seq#46, d_year#47, d_moy#48] +Condition : (((isnotnull(d_year#47) AND isnotnull(d_moy#48)) AND (d_year#47 = 1998)) AND (d_moy#48 = 12)) (70) Project [codegen id : 1] -Output [1]: [(d_month_seq#56 + 1) AS (d_month_seq + 1)#59] -Input [3]: [d_month_seq#56, d_year#57, d_moy#58] +Output [1]: [(d_month_seq#46 + 1) AS (d_month_seq + 1)#49] +Input [3]: [d_month_seq#46, d_year#47, d_moy#48] (71) HashAggregate [codegen id : 1] -Input [1]: [(d_month_seq + 1)#59] -Keys [1]: [(d_month_seq + 1)#59] +Input [1]: [(d_month_seq + 1)#49] +Keys [1]: [(d_month_seq + 1)#49] Functions: [] Aggregate Attributes: [] -Results [1]: [(d_month_seq + 1)#59] +Results [1]: [(d_month_seq + 1)#49] (72) Exchange -Input [1]: [(d_month_seq + 1)#59] -Arguments: hashpartitioning((d_month_seq + 1)#59, 5), ENSURE_REQUIREMENTS, [id=#60] +Input [1]: [(d_month_seq + 1)#49] +Arguments: hashpartitioning((d_month_seq + 1)#49, 5), ENSURE_REQUIREMENTS, [plan_id=11] (73) HashAggregate [codegen id : 2] -Input [1]: [(d_month_seq + 1)#59] -Keys [1]: [(d_month_seq + 1)#59] +Input [1]: [(d_month_seq + 1)#49] +Keys [1]: [(d_month_seq + 1)#49] Functions: [] Aggregate Attributes: [] -Results [1]: [(d_month_seq + 1)#59] +Results [1]: [(d_month_seq + 1)#49] -Subquery:5 Hosting operator id = 64 Hosting Expression = Subquery scalar-subquery#53, [id=#54] +Subquery:5 Hosting operator id = 64 Hosting Expression = Subquery scalar-subquery#44, [id=#45] * HashAggregate (80) +- Exchange (79) +- * HashAggregate (78) @@ -440,39 +440,39 @@ Subquery:5 Hosting operator id = 64 Hosting Expression = Subquery scalar-subquer (74) Scan parquet default.date_dim -Output [3]: [d_month_seq#61, d_year#62, d_moy#63] +Output [3]: [d_month_seq#50, d_year#51, d_moy#52] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,12)] ReadSchema: struct (75) ColumnarToRow [codegen id : 1] -Input [3]: [d_month_seq#61, d_year#62, d_moy#63] +Input [3]: [d_month_seq#50, d_year#51, d_moy#52] (76) Filter [codegen id : 1] -Input [3]: [d_month_seq#61, d_year#62, d_moy#63] -Condition : (((isnotnull(d_year#62) AND isnotnull(d_moy#63)) AND (d_year#62 = 1998)) AND (d_moy#63 = 12)) +Input [3]: [d_month_seq#50, d_year#51, d_moy#52] +Condition : (((isnotnull(d_year#51) AND isnotnull(d_moy#52)) AND (d_year#51 = 1998)) AND (d_moy#52 = 12)) (77) Project [codegen id : 1] -Output [1]: [(d_month_seq#61 + 3) AS (d_month_seq + 3)#64] -Input [3]: [d_month_seq#61, d_year#62, d_moy#63] +Output [1]: [(d_month_seq#50 + 3) AS (d_month_seq + 3)#53] +Input [3]: [d_month_seq#50, d_year#51, d_moy#52] (78) HashAggregate [codegen id : 1] -Input [1]: [(d_month_seq + 3)#64] -Keys [1]: [(d_month_seq + 3)#64] +Input [1]: [(d_month_seq + 3)#53] +Keys [1]: [(d_month_seq + 3)#53] Functions: [] Aggregate Attributes: [] -Results [1]: [(d_month_seq + 3)#64] +Results [1]: [(d_month_seq + 3)#53] (79) Exchange -Input [1]: [(d_month_seq + 3)#64] -Arguments: hashpartitioning((d_month_seq + 3)#64, 5), ENSURE_REQUIREMENTS, [id=#65] +Input [1]: [(d_month_seq + 3)#53] +Arguments: hashpartitioning((d_month_seq + 3)#53, 5), ENSURE_REQUIREMENTS, [plan_id=12] (80) HashAggregate [codegen id : 2] -Input [1]: [(d_month_seq + 3)#64] -Keys [1]: [(d_month_seq + 3)#64] +Input [1]: [(d_month_seq + 3)#53] +Keys [1]: [(d_month_seq + 3)#53] Functions: [] Aggregate Attributes: [] -Results [1]: [(d_month_seq + 3)#64] +Results [1]: [(d_month_seq + 3)#53] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt index c6870bd3ac6a3..2d13f622c6ce1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt @@ -53,7 +53,7 @@ Input [4]: [i_item_sk#5, i_brand_id#6, i_brand#7, i_manager_id#8] (8) BroadcastExchange Input [3]: [i_item_sk#5, i_brand_id#6, i_brand#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#1] @@ -65,38 +65,38 @@ Output [4]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_brand_id#6, i_brand#7] Input [6]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#5, i_brand_id#6, i_brand#7] (11) ReusedExchange [Reuses operator id: 22] -Output [1]: [d_date_sk#10] +Output [1]: [d_date_sk#9] (12) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_sold_date_sk#3] -Right keys [1]: [d_date_sk#10] +Right keys [1]: [d_date_sk#9] Join condition: None (13) Project [codegen id : 3] Output [3]: [ss_ext_sales_price#2, i_brand_id#6, i_brand#7] -Input [5]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_brand_id#6, i_brand#7, d_date_sk#10] +Input [5]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_brand_id#6, i_brand#7, d_date_sk#9] (14) HashAggregate [codegen id : 3] Input [3]: [ss_ext_sales_price#2, i_brand_id#6, i_brand#7] Keys [2]: [i_brand#7, i_brand_id#6] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#11] -Results [3]: [i_brand#7, i_brand_id#6, sum#12] +Aggregate Attributes [1]: [sum#10] +Results [3]: [i_brand#7, i_brand_id#6, sum#11] (15) Exchange -Input [3]: [i_brand#7, i_brand_id#6, sum#12] -Arguments: hashpartitioning(i_brand#7, i_brand_id#6, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [3]: [i_brand#7, i_brand_id#6, sum#11] +Arguments: hashpartitioning(i_brand#7, i_brand_id#6, 5), ENSURE_REQUIREMENTS, [plan_id=2] (16) HashAggregate [codegen id : 4] -Input [3]: [i_brand#7, i_brand_id#6, sum#12] +Input [3]: [i_brand#7, i_brand_id#6, sum#11] Keys [2]: [i_brand#7, i_brand_id#6] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14] -Results [3]: [i_brand_id#6 AS brand_id#15, i_brand#7 AS brand#16, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS ext_price#17] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#12] +Results [3]: [i_brand_id#6 AS brand_id#13, i_brand#7 AS brand#14, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS ext_price#15] (17) TakeOrderedAndProject -Input [3]: [brand_id#15, brand#16, ext_price#17] -Arguments: 100, [ext_price#17 DESC NULLS LAST, brand_id#15 ASC NULLS FIRST], [brand_id#15, brand#16, ext_price#17] +Input [3]: [brand_id#13, brand#14, ext_price#15] +Arguments: 100, [ext_price#15 DESC NULLS LAST, brand_id#13 ASC NULLS FIRST], [brand_id#13, brand#14, ext_price#15] ===== Subqueries ===== @@ -109,25 +109,25 @@ BroadcastExchange (22) (18) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#18, d_moy#19] +Output [3]: [d_date_sk#9, d_year#16, d_moy#17] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#18, d_moy#19] +Input [3]: [d_date_sk#9, d_year#16, d_moy#17] (20) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#18, d_moy#19] -Condition : ((((isnotnull(d_moy#19) AND isnotnull(d_year#18)) AND (d_moy#19 = 11)) AND (d_year#18 = 1999)) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#9, d_year#16, d_moy#17] +Condition : ((((isnotnull(d_moy#17) AND isnotnull(d_year#16)) AND (d_moy#17 = 11)) AND (d_year#16 = 1999)) AND isnotnull(d_date_sk#9)) (21) Project [codegen id : 1] -Output [1]: [d_date_sk#10] -Input [3]: [d_date_sk#10, d_year#18, d_moy#19] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#16, d_moy#17] (22) BroadcastExchange -Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt index 25b423382e332..782b7ffb509b7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt @@ -57,7 +57,7 @@ Condition : isnotnull(ss_item_sk#4) (8) BroadcastExchange Input [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [d_date_sk#1] @@ -69,55 +69,55 @@ Output [2]: [ss_item_sk#4, ss_ext_sales_price#5] Input [4]: [d_date_sk#1, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6] (11) Scan parquet default.item -Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_manager_id), EqualTo(i_manager_id,28), IsNotNull(i_item_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] (13) Filter [codegen id : 2] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] -Condition : ((isnotnull(i_manager_id#11) AND (i_manager_id#11 = 28)) AND isnotnull(i_item_sk#8)) +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] +Condition : ((isnotnull(i_manager_id#10) AND (i_manager_id#10 = 28)) AND isnotnull(i_item_sk#7)) (14) Project [codegen id : 2] -Output [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Input [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11] +Output [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Input [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10] (15) BroadcastExchange -Input [3]: [i_item_sk#8, i_brand_id#9, i_brand#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +Input [3]: [i_item_sk#7, i_brand_id#8, i_brand#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#4] -Right keys [1]: [i_item_sk#8] +Right keys [1]: [i_item_sk#7] Join condition: None (17) Project [codegen id : 3] -Output [3]: [ss_ext_sales_price#5, i_brand_id#9, i_brand#10] -Input [5]: [ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#8, i_brand_id#9, i_brand#10] +Output [3]: [ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Input [5]: [ss_item_sk#4, ss_ext_sales_price#5, i_item_sk#7, i_brand_id#8, i_brand#9] (18) HashAggregate [codegen id : 3] -Input [3]: [ss_ext_sales_price#5, i_brand_id#9, i_brand#10] -Keys [2]: [i_brand#10, i_brand_id#9] +Input [3]: [ss_ext_sales_price#5, i_brand_id#8, i_brand#9] +Keys [2]: [i_brand#9, i_brand_id#8] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum#13] -Results [3]: [i_brand#10, i_brand_id#9, sum#14] +Aggregate Attributes [1]: [sum#11] +Results [3]: [i_brand#9, i_brand_id#8, sum#12] (19) Exchange -Input [3]: [i_brand#10, i_brand_id#9, sum#14] -Arguments: hashpartitioning(i_brand#10, i_brand_id#9, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [3]: [i_brand#9, i_brand_id#8, sum#12] +Arguments: hashpartitioning(i_brand#9, i_brand_id#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 4] -Input [3]: [i_brand#10, i_brand_id#9, sum#14] -Keys [2]: [i_brand#10, i_brand_id#9] +Input [3]: [i_brand#9, i_brand_id#8, sum#12] +Keys [2]: [i_brand#9, i_brand_id#8] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#16] -Results [3]: [i_brand_id#9 AS brand_id#17, i_brand#10 AS brand#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#16,17,2) AS ext_price#19] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#13] +Results [3]: [i_brand_id#8 AS brand_id#14, i_brand#9 AS brand#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#13,17,2) AS ext_price#16] (21) TakeOrderedAndProject -Input [3]: [brand_id#17, brand#18, ext_price#19] -Arguments: 100, [ext_price#19 DESC NULLS LAST, brand_id#17 ASC NULLS FIRST], [brand_id#17, brand#18, ext_price#19] +Input [3]: [brand_id#14, brand#15, ext_price#16] +Arguments: 100, [ext_price#16 DESC NULLS LAST, brand_id#14 ASC NULLS FIRST], [brand_id#14, brand#15, ext_price#16] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/explain.txt index a7b59fe5d5ea7..7f10a26fd226f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/explain.txt @@ -111,7 +111,7 @@ Input [2]: [ca_address_sk#7, ca_gmt_offset#8] (11) BroadcastExchange Input [1]: [ca_address_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_addr_sk#2] @@ -123,238 +123,238 @@ Output [2]: [ss_item_sk#1, ss_ext_sales_price#3] Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#7] (14) Scan parquet default.item -Output [2]: [i_item_sk#10, i_item_id#11] +Output [2]: [i_item_sk#9, i_item_id#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#10, i_item_id#11] +Input [2]: [i_item_sk#9, i_item_id#10] (16) Filter [codegen id : 4] -Input [2]: [i_item_sk#10, i_item_id#11] -Condition : isnotnull(i_item_sk#10) +Input [2]: [i_item_sk#9, i_item_id#10] +Condition : isnotnull(i_item_sk#9) (17) Scan parquet default.item -Output [2]: [i_item_id#12, i_color#13] +Output [2]: [i_item_id#11, i_color#12] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [In(i_color, [blanched ,burnished ,slate ])] ReadSchema: struct (18) ColumnarToRow [codegen id : 3] -Input [2]: [i_item_id#12, i_color#13] +Input [2]: [i_item_id#11, i_color#12] (19) Filter [codegen id : 3] -Input [2]: [i_item_id#12, i_color#13] -Condition : i_color#13 IN (slate ,blanched ,burnished ) +Input [2]: [i_item_id#11, i_color#12] +Condition : i_color#12 IN (slate ,blanched ,burnished ) (20) Project [codegen id : 3] -Output [1]: [i_item_id#12] -Input [2]: [i_item_id#12, i_color#13] +Output [1]: [i_item_id#11] +Input [2]: [i_item_id#11, i_color#12] (21) BroadcastExchange -Input [1]: [i_item_id#12] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#14] +Input [1]: [i_item_id#11] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=2] (22) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_item_id#11] -Right keys [1]: [i_item_id#12] +Left keys [1]: [i_item_id#10] +Right keys [1]: [i_item_id#11] Join condition: None (23) BroadcastExchange -Input [2]: [i_item_sk#10, i_item_id#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] +Input [2]: [i_item_sk#9, i_item_id#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#10] +Right keys [1]: [i_item_sk#9] Join condition: None (25) Project [codegen id : 5] -Output [2]: [ss_ext_sales_price#3, i_item_id#11] -Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_item_id#11] +Output [2]: [ss_ext_sales_price#3, i_item_id#10] +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#9, i_item_id#10] (26) HashAggregate [codegen id : 5] -Input [2]: [ss_ext_sales_price#3, i_item_id#11] -Keys [1]: [i_item_id#11] +Input [2]: [ss_ext_sales_price#3, i_item_id#10] +Keys [1]: [i_item_id#10] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum#16] -Results [2]: [i_item_id#11, sum#17] +Aggregate Attributes [1]: [sum#13] +Results [2]: [i_item_id#10, sum#14] (27) Exchange -Input [2]: [i_item_id#11, sum#17] -Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [2]: [i_item_id#10, sum#14] +Arguments: hashpartitioning(i_item_id#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 6] -Input [2]: [i_item_id#11, sum#17] -Keys [1]: [i_item_id#11] +Input [2]: [i_item_id#10, sum#14] +Keys [1]: [i_item_id#10] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#19] -Results [2]: [i_item_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#19,17,2) AS total_sales#20] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#15] +Results [2]: [i_item_id#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS total_sales#16] (29) Scan parquet default.catalog_sales -Output [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] +Output [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#24), dynamicpruningexpression(cs_sold_date_sk#24 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#20), dynamicpruningexpression(cs_sold_date_sk#20 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 11] -Input [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] +Input [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] (31) Filter [codegen id : 11] -Input [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] -Condition : (isnotnull(cs_bill_addr_sk#21) AND isnotnull(cs_item_sk#22)) +Input [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] +Condition : (isnotnull(cs_bill_addr_sk#17) AND isnotnull(cs_item_sk#18)) (32) ReusedExchange [Reuses operator id: 68] -Output [1]: [d_date_sk#25] +Output [1]: [d_date_sk#21] (33) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_sold_date_sk#24] -Right keys [1]: [d_date_sk#25] +Left keys [1]: [cs_sold_date_sk#20] +Right keys [1]: [d_date_sk#21] Join condition: None (34) Project [codegen id : 11] -Output [3]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23] -Input [5]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24, d_date_sk#25] +Output [3]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19] +Input [5]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20, d_date_sk#21] (35) ReusedExchange [Reuses operator id: 11] -Output [1]: [ca_address_sk#26] +Output [1]: [ca_address_sk#22] (36) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_bill_addr_sk#21] -Right keys [1]: [ca_address_sk#26] +Left keys [1]: [cs_bill_addr_sk#17] +Right keys [1]: [ca_address_sk#22] Join condition: None (37) Project [codegen id : 11] -Output [2]: [cs_item_sk#22, cs_ext_sales_price#23] -Input [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, ca_address_sk#26] +Output [2]: [cs_item_sk#18, cs_ext_sales_price#19] +Input [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, ca_address_sk#22] (38) ReusedExchange [Reuses operator id: 23] -Output [2]: [i_item_sk#27, i_item_id#28] +Output [2]: [i_item_sk#23, i_item_id#24] (39) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_item_sk#22] -Right keys [1]: [i_item_sk#27] +Left keys [1]: [cs_item_sk#18] +Right keys [1]: [i_item_sk#23] Join condition: None (40) Project [codegen id : 11] -Output [2]: [cs_ext_sales_price#23, i_item_id#28] -Input [4]: [cs_item_sk#22, cs_ext_sales_price#23, i_item_sk#27, i_item_id#28] +Output [2]: [cs_ext_sales_price#19, i_item_id#24] +Input [4]: [cs_item_sk#18, cs_ext_sales_price#19, i_item_sk#23, i_item_id#24] (41) HashAggregate [codegen id : 11] -Input [2]: [cs_ext_sales_price#23, i_item_id#28] -Keys [1]: [i_item_id#28] -Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#23))] -Aggregate Attributes [1]: [sum#29] -Results [2]: [i_item_id#28, sum#30] +Input [2]: [cs_ext_sales_price#19, i_item_id#24] +Keys [1]: [i_item_id#24] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#19))] +Aggregate Attributes [1]: [sum#25] +Results [2]: [i_item_id#24, sum#26] (42) Exchange -Input [2]: [i_item_id#28, sum#30] -Arguments: hashpartitioning(i_item_id#28, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [2]: [i_item_id#24, sum#26] +Arguments: hashpartitioning(i_item_id#24, 5), ENSURE_REQUIREMENTS, [plan_id=5] (43) HashAggregate [codegen id : 12] -Input [2]: [i_item_id#28, sum#30] -Keys [1]: [i_item_id#28] -Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#23))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#23))#32] -Results [2]: [i_item_id#28, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#23))#32,17,2) AS total_sales#33] +Input [2]: [i_item_id#24, sum#26] +Keys [1]: [i_item_id#24] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#19))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#19))#27] +Results [2]: [i_item_id#24, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#19))#27,17,2) AS total_sales#28] (44) Scan parquet default.web_sales -Output [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] +Output [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#37), dynamicpruningexpression(ws_sold_date_sk#37 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#32), dynamicpruningexpression(ws_sold_date_sk#32 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] ReadSchema: struct (45) ColumnarToRow [codegen id : 17] -Input [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] +Input [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] (46) Filter [codegen id : 17] -Input [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] -Condition : (isnotnull(ws_bill_addr_sk#35) AND isnotnull(ws_item_sk#34)) +Input [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] +Condition : (isnotnull(ws_bill_addr_sk#30) AND isnotnull(ws_item_sk#29)) (47) ReusedExchange [Reuses operator id: 68] -Output [1]: [d_date_sk#38] +Output [1]: [d_date_sk#33] (48) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_sold_date_sk#37] -Right keys [1]: [d_date_sk#38] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#33] Join condition: None (49) Project [codegen id : 17] -Output [3]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36] -Input [5]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37, d_date_sk#38] +Output [3]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31] +Input [5]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32, d_date_sk#33] (50) ReusedExchange [Reuses operator id: 11] -Output [1]: [ca_address_sk#39] +Output [1]: [ca_address_sk#34] (51) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_bill_addr_sk#35] -Right keys [1]: [ca_address_sk#39] +Left keys [1]: [ws_bill_addr_sk#30] +Right keys [1]: [ca_address_sk#34] Join condition: None (52) Project [codegen id : 17] -Output [2]: [ws_item_sk#34, ws_ext_sales_price#36] -Input [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ca_address_sk#39] +Output [2]: [ws_item_sk#29, ws_ext_sales_price#31] +Input [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ca_address_sk#34] (53) ReusedExchange [Reuses operator id: 23] -Output [2]: [i_item_sk#40, i_item_id#41] +Output [2]: [i_item_sk#35, i_item_id#36] (54) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_item_sk#34] -Right keys [1]: [i_item_sk#40] +Left keys [1]: [ws_item_sk#29] +Right keys [1]: [i_item_sk#35] Join condition: None (55) Project [codegen id : 17] -Output [2]: [ws_ext_sales_price#36, i_item_id#41] -Input [4]: [ws_item_sk#34, ws_ext_sales_price#36, i_item_sk#40, i_item_id#41] +Output [2]: [ws_ext_sales_price#31, i_item_id#36] +Input [4]: [ws_item_sk#29, ws_ext_sales_price#31, i_item_sk#35, i_item_id#36] (56) HashAggregate [codegen id : 17] -Input [2]: [ws_ext_sales_price#36, i_item_id#41] -Keys [1]: [i_item_id#41] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#36))] -Aggregate Attributes [1]: [sum#42] -Results [2]: [i_item_id#41, sum#43] +Input [2]: [ws_ext_sales_price#31, i_item_id#36] +Keys [1]: [i_item_id#36] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#31))] +Aggregate Attributes [1]: [sum#37] +Results [2]: [i_item_id#36, sum#38] (57) Exchange -Input [2]: [i_item_id#41, sum#43] -Arguments: hashpartitioning(i_item_id#41, 5), ENSURE_REQUIREMENTS, [id=#44] +Input [2]: [i_item_id#36, sum#38] +Arguments: hashpartitioning(i_item_id#36, 5), ENSURE_REQUIREMENTS, [plan_id=6] (58) HashAggregate [codegen id : 18] -Input [2]: [i_item_id#41, sum#43] -Keys [1]: [i_item_id#41] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#36))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#36))#45] -Results [2]: [i_item_id#41, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#36))#45,17,2) AS total_sales#46] +Input [2]: [i_item_id#36, sum#38] +Keys [1]: [i_item_id#36] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#31))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#31))#39] +Results [2]: [i_item_id#36, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#31))#39,17,2) AS total_sales#40] (59) Union (60) HashAggregate [codegen id : 19] -Input [2]: [i_item_id#11, total_sales#20] -Keys [1]: [i_item_id#11] -Functions [1]: [partial_sum(total_sales#20)] -Aggregate Attributes [2]: [sum#47, isEmpty#48] -Results [3]: [i_item_id#11, sum#49, isEmpty#50] +Input [2]: [i_item_id#10, total_sales#16] +Keys [1]: [i_item_id#10] +Functions [1]: [partial_sum(total_sales#16)] +Aggregate Attributes [2]: [sum#41, isEmpty#42] +Results [3]: [i_item_id#10, sum#43, isEmpty#44] (61) Exchange -Input [3]: [i_item_id#11, sum#49, isEmpty#50] -Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [id=#51] +Input [3]: [i_item_id#10, sum#43, isEmpty#44] +Arguments: hashpartitioning(i_item_id#10, 5), ENSURE_REQUIREMENTS, [plan_id=7] (62) HashAggregate [codegen id : 20] -Input [3]: [i_item_id#11, sum#49, isEmpty#50] -Keys [1]: [i_item_id#11] -Functions [1]: [sum(total_sales#20)] -Aggregate Attributes [1]: [sum(total_sales#20)#52] -Results [2]: [i_item_id#11, sum(total_sales#20)#52 AS total_sales#53] +Input [3]: [i_item_id#10, sum#43, isEmpty#44] +Keys [1]: [i_item_id#10] +Functions [1]: [sum(total_sales#16)] +Aggregate Attributes [1]: [sum(total_sales#16)#45] +Results [2]: [i_item_id#10, sum(total_sales#16)#45 AS total_sales#46] (63) TakeOrderedAndProject -Input [2]: [i_item_id#11, total_sales#53] -Arguments: 100, [total_sales#53 ASC NULLS FIRST], [i_item_id#11, total_sales#53] +Input [2]: [i_item_id#10, total_sales#46] +Arguments: 100, [total_sales#46 ASC NULLS FIRST], [i_item_id#10, total_sales#46] ===== Subqueries ===== @@ -367,29 +367,29 @@ BroadcastExchange (68) (64) Scan parquet default.date_dim -Output [3]: [d_date_sk#6, d_year#54, d_moy#55] +Output [3]: [d_date_sk#6, d_year#47, d_moy#48] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] ReadSchema: struct (65) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] (66) Filter [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] -Condition : ((((isnotnull(d_year#54) AND isnotnull(d_moy#55)) AND (d_year#54 = 2001)) AND (d_moy#55 = 2)) AND isnotnull(d_date_sk#6)) +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] +Condition : ((((isnotnull(d_year#47) AND isnotnull(d_moy#48)) AND (d_year#47 = 2001)) AND (d_moy#48 = 2)) AND isnotnull(d_date_sk#6)) (67) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] (68) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#56] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] -Subquery:2 Hosting operator id = 29 Hosting Expression = cs_sold_date_sk#24 IN dynamicpruning#5 +Subquery:2 Hosting operator id = 29 Hosting Expression = cs_sold_date_sk#20 IN dynamicpruning#5 -Subquery:3 Hosting operator id = 44 Hosting Expression = ws_sold_date_sk#37 IN dynamicpruning#5 +Subquery:3 Hosting operator id = 44 Hosting Expression = ws_sold_date_sk#32 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt index a7b59fe5d5ea7..7f10a26fd226f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt @@ -111,7 +111,7 @@ Input [2]: [ca_address_sk#7, ca_gmt_offset#8] (11) BroadcastExchange Input [1]: [ca_address_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_addr_sk#2] @@ -123,238 +123,238 @@ Output [2]: [ss_item_sk#1, ss_ext_sales_price#3] Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#7] (14) Scan parquet default.item -Output [2]: [i_item_sk#10, i_item_id#11] +Output [2]: [i_item_sk#9, i_item_id#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#10, i_item_id#11] +Input [2]: [i_item_sk#9, i_item_id#10] (16) Filter [codegen id : 4] -Input [2]: [i_item_sk#10, i_item_id#11] -Condition : isnotnull(i_item_sk#10) +Input [2]: [i_item_sk#9, i_item_id#10] +Condition : isnotnull(i_item_sk#9) (17) Scan parquet default.item -Output [2]: [i_item_id#12, i_color#13] +Output [2]: [i_item_id#11, i_color#12] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [In(i_color, [blanched ,burnished ,slate ])] ReadSchema: struct (18) ColumnarToRow [codegen id : 3] -Input [2]: [i_item_id#12, i_color#13] +Input [2]: [i_item_id#11, i_color#12] (19) Filter [codegen id : 3] -Input [2]: [i_item_id#12, i_color#13] -Condition : i_color#13 IN (slate ,blanched ,burnished ) +Input [2]: [i_item_id#11, i_color#12] +Condition : i_color#12 IN (slate ,blanched ,burnished ) (20) Project [codegen id : 3] -Output [1]: [i_item_id#12] -Input [2]: [i_item_id#12, i_color#13] +Output [1]: [i_item_id#11] +Input [2]: [i_item_id#11, i_color#12] (21) BroadcastExchange -Input [1]: [i_item_id#12] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#14] +Input [1]: [i_item_id#11] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=2] (22) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_item_id#11] -Right keys [1]: [i_item_id#12] +Left keys [1]: [i_item_id#10] +Right keys [1]: [i_item_id#11] Join condition: None (23) BroadcastExchange -Input [2]: [i_item_sk#10, i_item_id#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] +Input [2]: [i_item_sk#9, i_item_id#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#10] +Right keys [1]: [i_item_sk#9] Join condition: None (25) Project [codegen id : 5] -Output [2]: [ss_ext_sales_price#3, i_item_id#11] -Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_item_id#11] +Output [2]: [ss_ext_sales_price#3, i_item_id#10] +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#9, i_item_id#10] (26) HashAggregate [codegen id : 5] -Input [2]: [ss_ext_sales_price#3, i_item_id#11] -Keys [1]: [i_item_id#11] +Input [2]: [ss_ext_sales_price#3, i_item_id#10] +Keys [1]: [i_item_id#10] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum#16] -Results [2]: [i_item_id#11, sum#17] +Aggregate Attributes [1]: [sum#13] +Results [2]: [i_item_id#10, sum#14] (27) Exchange -Input [2]: [i_item_id#11, sum#17] -Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [2]: [i_item_id#10, sum#14] +Arguments: hashpartitioning(i_item_id#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 6] -Input [2]: [i_item_id#11, sum#17] -Keys [1]: [i_item_id#11] +Input [2]: [i_item_id#10, sum#14] +Keys [1]: [i_item_id#10] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#19] -Results [2]: [i_item_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#19,17,2) AS total_sales#20] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#15] +Results [2]: [i_item_id#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS total_sales#16] (29) Scan parquet default.catalog_sales -Output [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] +Output [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#24), dynamicpruningexpression(cs_sold_date_sk#24 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#20), dynamicpruningexpression(cs_sold_date_sk#20 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 11] -Input [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] +Input [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] (31) Filter [codegen id : 11] -Input [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] -Condition : (isnotnull(cs_bill_addr_sk#21) AND isnotnull(cs_item_sk#22)) +Input [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] +Condition : (isnotnull(cs_bill_addr_sk#17) AND isnotnull(cs_item_sk#18)) (32) ReusedExchange [Reuses operator id: 68] -Output [1]: [d_date_sk#25] +Output [1]: [d_date_sk#21] (33) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_sold_date_sk#24] -Right keys [1]: [d_date_sk#25] +Left keys [1]: [cs_sold_date_sk#20] +Right keys [1]: [d_date_sk#21] Join condition: None (34) Project [codegen id : 11] -Output [3]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23] -Input [5]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24, d_date_sk#25] +Output [3]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19] +Input [5]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20, d_date_sk#21] (35) ReusedExchange [Reuses operator id: 11] -Output [1]: [ca_address_sk#26] +Output [1]: [ca_address_sk#22] (36) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_bill_addr_sk#21] -Right keys [1]: [ca_address_sk#26] +Left keys [1]: [cs_bill_addr_sk#17] +Right keys [1]: [ca_address_sk#22] Join condition: None (37) Project [codegen id : 11] -Output [2]: [cs_item_sk#22, cs_ext_sales_price#23] -Input [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, ca_address_sk#26] +Output [2]: [cs_item_sk#18, cs_ext_sales_price#19] +Input [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, ca_address_sk#22] (38) ReusedExchange [Reuses operator id: 23] -Output [2]: [i_item_sk#27, i_item_id#28] +Output [2]: [i_item_sk#23, i_item_id#24] (39) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_item_sk#22] -Right keys [1]: [i_item_sk#27] +Left keys [1]: [cs_item_sk#18] +Right keys [1]: [i_item_sk#23] Join condition: None (40) Project [codegen id : 11] -Output [2]: [cs_ext_sales_price#23, i_item_id#28] -Input [4]: [cs_item_sk#22, cs_ext_sales_price#23, i_item_sk#27, i_item_id#28] +Output [2]: [cs_ext_sales_price#19, i_item_id#24] +Input [4]: [cs_item_sk#18, cs_ext_sales_price#19, i_item_sk#23, i_item_id#24] (41) HashAggregate [codegen id : 11] -Input [2]: [cs_ext_sales_price#23, i_item_id#28] -Keys [1]: [i_item_id#28] -Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#23))] -Aggregate Attributes [1]: [sum#29] -Results [2]: [i_item_id#28, sum#30] +Input [2]: [cs_ext_sales_price#19, i_item_id#24] +Keys [1]: [i_item_id#24] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#19))] +Aggregate Attributes [1]: [sum#25] +Results [2]: [i_item_id#24, sum#26] (42) Exchange -Input [2]: [i_item_id#28, sum#30] -Arguments: hashpartitioning(i_item_id#28, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [2]: [i_item_id#24, sum#26] +Arguments: hashpartitioning(i_item_id#24, 5), ENSURE_REQUIREMENTS, [plan_id=5] (43) HashAggregate [codegen id : 12] -Input [2]: [i_item_id#28, sum#30] -Keys [1]: [i_item_id#28] -Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#23))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#23))#32] -Results [2]: [i_item_id#28, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#23))#32,17,2) AS total_sales#33] +Input [2]: [i_item_id#24, sum#26] +Keys [1]: [i_item_id#24] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#19))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#19))#27] +Results [2]: [i_item_id#24, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#19))#27,17,2) AS total_sales#28] (44) Scan parquet default.web_sales -Output [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] +Output [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#37), dynamicpruningexpression(ws_sold_date_sk#37 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#32), dynamicpruningexpression(ws_sold_date_sk#32 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] ReadSchema: struct (45) ColumnarToRow [codegen id : 17] -Input [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] +Input [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] (46) Filter [codegen id : 17] -Input [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] -Condition : (isnotnull(ws_bill_addr_sk#35) AND isnotnull(ws_item_sk#34)) +Input [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] +Condition : (isnotnull(ws_bill_addr_sk#30) AND isnotnull(ws_item_sk#29)) (47) ReusedExchange [Reuses operator id: 68] -Output [1]: [d_date_sk#38] +Output [1]: [d_date_sk#33] (48) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_sold_date_sk#37] -Right keys [1]: [d_date_sk#38] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#33] Join condition: None (49) Project [codegen id : 17] -Output [3]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36] -Input [5]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37, d_date_sk#38] +Output [3]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31] +Input [5]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32, d_date_sk#33] (50) ReusedExchange [Reuses operator id: 11] -Output [1]: [ca_address_sk#39] +Output [1]: [ca_address_sk#34] (51) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_bill_addr_sk#35] -Right keys [1]: [ca_address_sk#39] +Left keys [1]: [ws_bill_addr_sk#30] +Right keys [1]: [ca_address_sk#34] Join condition: None (52) Project [codegen id : 17] -Output [2]: [ws_item_sk#34, ws_ext_sales_price#36] -Input [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ca_address_sk#39] +Output [2]: [ws_item_sk#29, ws_ext_sales_price#31] +Input [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ca_address_sk#34] (53) ReusedExchange [Reuses operator id: 23] -Output [2]: [i_item_sk#40, i_item_id#41] +Output [2]: [i_item_sk#35, i_item_id#36] (54) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_item_sk#34] -Right keys [1]: [i_item_sk#40] +Left keys [1]: [ws_item_sk#29] +Right keys [1]: [i_item_sk#35] Join condition: None (55) Project [codegen id : 17] -Output [2]: [ws_ext_sales_price#36, i_item_id#41] -Input [4]: [ws_item_sk#34, ws_ext_sales_price#36, i_item_sk#40, i_item_id#41] +Output [2]: [ws_ext_sales_price#31, i_item_id#36] +Input [4]: [ws_item_sk#29, ws_ext_sales_price#31, i_item_sk#35, i_item_id#36] (56) HashAggregate [codegen id : 17] -Input [2]: [ws_ext_sales_price#36, i_item_id#41] -Keys [1]: [i_item_id#41] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#36))] -Aggregate Attributes [1]: [sum#42] -Results [2]: [i_item_id#41, sum#43] +Input [2]: [ws_ext_sales_price#31, i_item_id#36] +Keys [1]: [i_item_id#36] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#31))] +Aggregate Attributes [1]: [sum#37] +Results [2]: [i_item_id#36, sum#38] (57) Exchange -Input [2]: [i_item_id#41, sum#43] -Arguments: hashpartitioning(i_item_id#41, 5), ENSURE_REQUIREMENTS, [id=#44] +Input [2]: [i_item_id#36, sum#38] +Arguments: hashpartitioning(i_item_id#36, 5), ENSURE_REQUIREMENTS, [plan_id=6] (58) HashAggregate [codegen id : 18] -Input [2]: [i_item_id#41, sum#43] -Keys [1]: [i_item_id#41] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#36))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#36))#45] -Results [2]: [i_item_id#41, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#36))#45,17,2) AS total_sales#46] +Input [2]: [i_item_id#36, sum#38] +Keys [1]: [i_item_id#36] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#31))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#31))#39] +Results [2]: [i_item_id#36, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#31))#39,17,2) AS total_sales#40] (59) Union (60) HashAggregate [codegen id : 19] -Input [2]: [i_item_id#11, total_sales#20] -Keys [1]: [i_item_id#11] -Functions [1]: [partial_sum(total_sales#20)] -Aggregate Attributes [2]: [sum#47, isEmpty#48] -Results [3]: [i_item_id#11, sum#49, isEmpty#50] +Input [2]: [i_item_id#10, total_sales#16] +Keys [1]: [i_item_id#10] +Functions [1]: [partial_sum(total_sales#16)] +Aggregate Attributes [2]: [sum#41, isEmpty#42] +Results [3]: [i_item_id#10, sum#43, isEmpty#44] (61) Exchange -Input [3]: [i_item_id#11, sum#49, isEmpty#50] -Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [id=#51] +Input [3]: [i_item_id#10, sum#43, isEmpty#44] +Arguments: hashpartitioning(i_item_id#10, 5), ENSURE_REQUIREMENTS, [plan_id=7] (62) HashAggregate [codegen id : 20] -Input [3]: [i_item_id#11, sum#49, isEmpty#50] -Keys [1]: [i_item_id#11] -Functions [1]: [sum(total_sales#20)] -Aggregate Attributes [1]: [sum(total_sales#20)#52] -Results [2]: [i_item_id#11, sum(total_sales#20)#52 AS total_sales#53] +Input [3]: [i_item_id#10, sum#43, isEmpty#44] +Keys [1]: [i_item_id#10] +Functions [1]: [sum(total_sales#16)] +Aggregate Attributes [1]: [sum(total_sales#16)#45] +Results [2]: [i_item_id#10, sum(total_sales#16)#45 AS total_sales#46] (63) TakeOrderedAndProject -Input [2]: [i_item_id#11, total_sales#53] -Arguments: 100, [total_sales#53 ASC NULLS FIRST], [i_item_id#11, total_sales#53] +Input [2]: [i_item_id#10, total_sales#46] +Arguments: 100, [total_sales#46 ASC NULLS FIRST], [i_item_id#10, total_sales#46] ===== Subqueries ===== @@ -367,29 +367,29 @@ BroadcastExchange (68) (64) Scan parquet default.date_dim -Output [3]: [d_date_sk#6, d_year#54, d_moy#55] +Output [3]: [d_date_sk#6, d_year#47, d_moy#48] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,2), IsNotNull(d_date_sk)] ReadSchema: struct (65) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] (66) Filter [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] -Condition : ((((isnotnull(d_year#54) AND isnotnull(d_moy#55)) AND (d_year#54 = 2001)) AND (d_moy#55 = 2)) AND isnotnull(d_date_sk#6)) +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] +Condition : ((((isnotnull(d_year#47) AND isnotnull(d_moy#48)) AND (d_year#47 = 2001)) AND (d_moy#48 = 2)) AND isnotnull(d_date_sk#6)) (67) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] (68) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#56] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] -Subquery:2 Hosting operator id = 29 Hosting Expression = cs_sold_date_sk#24 IN dynamicpruning#5 +Subquery:2 Hosting operator id = 29 Hosting Expression = cs_sold_date_sk#20 IN dynamicpruning#5 -Subquery:3 Hosting operator id = 44 Hosting Expression = ws_sold_date_sk#37 IN dynamicpruning#5 +Subquery:3 Hosting operator id = 44 Hosting Expression = ws_sold_date_sk#32 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt index 0b933a733f888..cd93eea6a1c8e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt @@ -96,7 +96,7 @@ Condition : (isnotnull(cc_call_center_sk#9) AND isnotnull(cc_name#10)) (10) BroadcastExchange Input [2]: [cc_call_center_sk#9, cc_name#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_call_center_sk#1] @@ -109,175 +109,175 @@ Input [7]: [cs_call_center_sk#1, cs_item_sk#2, cs_sales_price#3, d_year#7, d_moy (13) Exchange Input [5]: [cs_item_sk#2, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10] -Arguments: hashpartitioning(cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#12] +Arguments: hashpartitioning(cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=2] (14) Sort [codegen id : 4] Input [5]: [cs_item_sk#2, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10] Arguments: [cs_item_sk#2 ASC NULLS FIRST], false, 0 (15) Scan parquet default.item -Output [3]: [i_item_sk#13, i_brand#14, i_category#15] +Output [3]: [i_item_sk#11, i_brand#12, i_category#13] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] ReadSchema: struct (16) ColumnarToRow [codegen id : 5] -Input [3]: [i_item_sk#13, i_brand#14, i_category#15] +Input [3]: [i_item_sk#11, i_brand#12, i_category#13] (17) Filter [codegen id : 5] -Input [3]: [i_item_sk#13, i_brand#14, i_category#15] -Condition : ((isnotnull(i_item_sk#13) AND isnotnull(i_category#15)) AND isnotnull(i_brand#14)) +Input [3]: [i_item_sk#11, i_brand#12, i_category#13] +Condition : ((isnotnull(i_item_sk#11) AND isnotnull(i_category#13)) AND isnotnull(i_brand#12)) (18) Exchange -Input [3]: [i_item_sk#13, i_brand#14, i_category#15] -Arguments: hashpartitioning(i_item_sk#13, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [3]: [i_item_sk#11, i_brand#12, i_category#13] +Arguments: hashpartitioning(i_item_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) Sort [codegen id : 6] -Input [3]: [i_item_sk#13, i_brand#14, i_category#15] -Arguments: [i_item_sk#13 ASC NULLS FIRST], false, 0 +Input [3]: [i_item_sk#11, i_brand#12, i_category#13] +Arguments: [i_item_sk#11 ASC NULLS FIRST], false, 0 (20) SortMergeJoin [codegen id : 7] Left keys [1]: [cs_item_sk#2] -Right keys [1]: [i_item_sk#13] +Right keys [1]: [i_item_sk#11] Join condition: None (21) Project [codegen id : 7] -Output [6]: [i_brand#14, i_category#15, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10] -Input [8]: [cs_item_sk#2, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10, i_item_sk#13, i_brand#14, i_category#15] +Output [6]: [i_brand#12, i_category#13, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10] +Input [8]: [cs_item_sk#2, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10, i_item_sk#11, i_brand#12, i_category#13] (22) HashAggregate [codegen id : 7] -Input [6]: [i_brand#14, i_category#15, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10] -Keys [5]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8] +Input [6]: [i_brand#12, i_category#13, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10] +Keys [5]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8] Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#3))] -Aggregate Attributes [1]: [sum#17] -Results [6]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum#18] +Aggregate Attributes [1]: [sum#14] +Results [6]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum#15] (23) Exchange -Input [6]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum#18] -Arguments: hashpartitioning(i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [6]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum#15] +Arguments: hashpartitioning(i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) HashAggregate [codegen id : 8] -Input [6]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum#18] -Keys [5]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8] +Input [6]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum#15] +Keys [5]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8] Functions [1]: [sum(UnscaledValue(cs_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#3))#20] -Results [7]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, MakeDecimal(sum(UnscaledValue(cs_sales_price#3))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(cs_sales_price#3))#20,17,2) AS _w0#22] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#3))#16] +Results [7]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, MakeDecimal(sum(UnscaledValue(cs_sales_price#3))#16,17,2) AS sum_sales#17, MakeDecimal(sum(UnscaledValue(cs_sales_price#3))#16,17,2) AS _w0#18] (25) Exchange -Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22] -Arguments: hashpartitioning(i_category#15, i_brand#14, cc_name#10, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [7]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18] +Arguments: hashpartitioning(i_category#13, i_brand#12, cc_name#10, 5), ENSURE_REQUIREMENTS, [plan_id=5] (26) Sort [codegen id : 9] -Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22] -Arguments: [i_category#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 +Input [7]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18] +Arguments: [i_category#13 ASC NULLS FIRST, i_brand#12 ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 (27) Window -Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22] -Arguments: [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#15, i_brand#14, cc_name#10, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#24], [i_category#15, i_brand#14, cc_name#10], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] +Input [7]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18] +Arguments: [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#13, i_brand#12, cc_name#10, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#19], [i_category#13, i_brand#12, cc_name#10], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] (28) Filter [codegen id : 10] -Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22, rn#24] +Input [8]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18, rn#19] Condition : (isnotnull(d_year#7) AND (d_year#7 = 1999)) (29) Window -Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22, rn#24] -Arguments: [avg(_w0#22) windowspecdefinition(i_category#15, i_brand#14, cc_name#10, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#25], [i_category#15, i_brand#14, cc_name#10, d_year#7] +Input [8]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18, rn#19] +Arguments: [avg(_w0#18) windowspecdefinition(i_category#13, i_brand#12, cc_name#10, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#20], [i_category#13, i_brand#12, cc_name#10, d_year#7] (30) Filter [codegen id : 11] -Input [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22, rn#24, avg_monthly_sales#25] -Condition : ((isnotnull(avg_monthly_sales#25) AND (avg_monthly_sales#25 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) +Input [9]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] +Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) (31) Project [codegen id : 11] -Output [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24] -Input [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22, rn#24, avg_monthly_sales#25] +Output [8]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, avg_monthly_sales#20, rn#19] +Input [9]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] (32) Exchange -Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24] -Arguments: hashpartitioning(i_category#15, i_brand#14, cc_name#10, rn#24, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [8]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, avg_monthly_sales#20, rn#19] +Arguments: hashpartitioning(i_category#13, i_brand#12, cc_name#10, rn#19, 5), ENSURE_REQUIREMENTS, [plan_id=6] (33) Sort [codegen id : 12] -Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24] -Arguments: [i_category#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST, rn#24 ASC NULLS FIRST], false, 0 +Input [8]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, avg_monthly_sales#20, rn#19] +Arguments: [i_category#13 ASC NULLS FIRST, i_brand#12 ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST, rn#19 ASC NULLS FIRST], false, 0 (34) ReusedExchange [Reuses operator id: 23] -Output [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum#32] +Output [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum#26] (35) HashAggregate [codegen id : 20] -Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum#32] -Keys [5]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31] -Functions [1]: [sum(UnscaledValue(cs_sales_price#33))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#33))#20] -Results [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, MakeDecimal(sum(UnscaledValue(cs_sales_price#33))#20,17,2) AS sum_sales#21] +Input [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum#26] +Keys [5]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25] +Functions [1]: [sum(UnscaledValue(cs_sales_price#27))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#27))#16] +Results [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, MakeDecimal(sum(UnscaledValue(cs_sales_price#27))#16,17,2) AS sum_sales#17] (36) Exchange -Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#21] -Arguments: hashpartitioning(i_category#27, i_brand#28, cc_name#29, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum_sales#17] +Arguments: hashpartitioning(i_category#21, i_brand#22, cc_name#23, 5), ENSURE_REQUIREMENTS, [plan_id=7] (37) Sort [codegen id : 21] -Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#21] -Arguments: [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, cc_name#29 ASC NULLS FIRST, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST], false, 0 +Input [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum_sales#17] +Arguments: [i_category#21 ASC NULLS FIRST, i_brand#22 ASC NULLS FIRST, cc_name#23 ASC NULLS FIRST, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST], false, 0 (38) Window -Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#21] -Arguments: [rank(d_year#30, d_moy#31) windowspecdefinition(i_category#27, i_brand#28, cc_name#29, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#27, i_brand#28, cc_name#29], [d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST] +Input [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum_sales#17] +Arguments: [rank(d_year#24, d_moy#25) windowspecdefinition(i_category#21, i_brand#22, cc_name#23, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#28], [i_category#21, i_brand#22, cc_name#23], [d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST] (39) Project [codegen id : 22] -Output [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#21 AS sum_sales#36, rn#35] -Input [7]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#21, rn#35] +Output [5]: [i_category#21, i_brand#22, cc_name#23, sum_sales#17 AS sum_sales#29, rn#28] +Input [7]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum_sales#17, rn#28] (40) Exchange -Input [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#36, rn#35] -Arguments: hashpartitioning(i_category#27, i_brand#28, cc_name#29, (rn#35 + 1), 5), ENSURE_REQUIREMENTS, [id=#37] +Input [5]: [i_category#21, i_brand#22, cc_name#23, sum_sales#29, rn#28] +Arguments: hashpartitioning(i_category#21, i_brand#22, cc_name#23, (rn#28 + 1), 5), ENSURE_REQUIREMENTS, [plan_id=8] (41) Sort [codegen id : 23] -Input [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#36, rn#35] -Arguments: [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, cc_name#29 ASC NULLS FIRST, (rn#35 + 1) ASC NULLS FIRST], false, 0 +Input [5]: [i_category#21, i_brand#22, cc_name#23, sum_sales#29, rn#28] +Arguments: [i_category#21 ASC NULLS FIRST, i_brand#22 ASC NULLS FIRST, cc_name#23 ASC NULLS FIRST, (rn#28 + 1) ASC NULLS FIRST], false, 0 (42) SortMergeJoin [codegen id : 24] -Left keys [4]: [i_category#15, i_brand#14, cc_name#10, rn#24] -Right keys [4]: [i_category#27, i_brand#28, cc_name#29, (rn#35 + 1)] +Left keys [4]: [i_category#13, i_brand#12, cc_name#10, rn#19] +Right keys [4]: [i_category#21, i_brand#22, cc_name#23, (rn#28 + 1)] Join condition: None (43) Project [codegen id : 24] -Output [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, sum_sales#36] -Input [13]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, i_category#27, i_brand#28, cc_name#29, sum_sales#36, rn#35] +Output [9]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#29] +Input [13]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, avg_monthly_sales#20, rn#19, i_category#21, i_brand#22, cc_name#23, sum_sales#29, rn#28] (44) ReusedExchange [Reuses operator id: 36] -Output [6]: [i_category#38, i_brand#39, cc_name#40, d_year#41, d_moy#42, sum_sales#21] +Output [6]: [i_category#30, i_brand#31, cc_name#32, d_year#33, d_moy#34, sum_sales#17] (45) Sort [codegen id : 33] -Input [6]: [i_category#38, i_brand#39, cc_name#40, d_year#41, d_moy#42, sum_sales#21] -Arguments: [i_category#38 ASC NULLS FIRST, i_brand#39 ASC NULLS FIRST, cc_name#40 ASC NULLS FIRST, d_year#41 ASC NULLS FIRST, d_moy#42 ASC NULLS FIRST], false, 0 +Input [6]: [i_category#30, i_brand#31, cc_name#32, d_year#33, d_moy#34, sum_sales#17] +Arguments: [i_category#30 ASC NULLS FIRST, i_brand#31 ASC NULLS FIRST, cc_name#32 ASC NULLS FIRST, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST], false, 0 (46) Window -Input [6]: [i_category#38, i_brand#39, cc_name#40, d_year#41, d_moy#42, sum_sales#21] -Arguments: [rank(d_year#41, d_moy#42) windowspecdefinition(i_category#38, i_brand#39, cc_name#40, d_year#41 ASC NULLS FIRST, d_moy#42 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#43], [i_category#38, i_brand#39, cc_name#40], [d_year#41 ASC NULLS FIRST, d_moy#42 ASC NULLS FIRST] +Input [6]: [i_category#30, i_brand#31, cc_name#32, d_year#33, d_moy#34, sum_sales#17] +Arguments: [rank(d_year#33, d_moy#34) windowspecdefinition(i_category#30, i_brand#31, cc_name#32, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#30, i_brand#31, cc_name#32], [d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST] (47) Project [codegen id : 34] -Output [5]: [i_category#38, i_brand#39, cc_name#40, sum_sales#21 AS sum_sales#44, rn#43] -Input [7]: [i_category#38, i_brand#39, cc_name#40, d_year#41, d_moy#42, sum_sales#21, rn#43] +Output [5]: [i_category#30, i_brand#31, cc_name#32, sum_sales#17 AS sum_sales#36, rn#35] +Input [7]: [i_category#30, i_brand#31, cc_name#32, d_year#33, d_moy#34, sum_sales#17, rn#35] (48) Exchange -Input [5]: [i_category#38, i_brand#39, cc_name#40, sum_sales#44, rn#43] -Arguments: hashpartitioning(i_category#38, i_brand#39, cc_name#40, (rn#43 - 1), 5), ENSURE_REQUIREMENTS, [id=#45] +Input [5]: [i_category#30, i_brand#31, cc_name#32, sum_sales#36, rn#35] +Arguments: hashpartitioning(i_category#30, i_brand#31, cc_name#32, (rn#35 - 1), 5), ENSURE_REQUIREMENTS, [plan_id=9] (49) Sort [codegen id : 35] -Input [5]: [i_category#38, i_brand#39, cc_name#40, sum_sales#44, rn#43] -Arguments: [i_category#38 ASC NULLS FIRST, i_brand#39 ASC NULLS FIRST, cc_name#40 ASC NULLS FIRST, (rn#43 - 1) ASC NULLS FIRST], false, 0 +Input [5]: [i_category#30, i_brand#31, cc_name#32, sum_sales#36, rn#35] +Arguments: [i_category#30 ASC NULLS FIRST, i_brand#31 ASC NULLS FIRST, cc_name#32 ASC NULLS FIRST, (rn#35 - 1) ASC NULLS FIRST], false, 0 (50) SortMergeJoin [codegen id : 36] -Left keys [4]: [i_category#15, i_brand#14, cc_name#10, rn#24] -Right keys [4]: [i_category#38, i_brand#39, cc_name#40, (rn#43 - 1)] +Left keys [4]: [i_category#13, i_brand#12, cc_name#10, rn#19] +Right keys [4]: [i_category#30, i_brand#31, cc_name#32, (rn#35 - 1)] Join condition: None (51) Project [codegen id : 36] -Output [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, sum_sales#36 AS psum#46, sum_sales#44 AS nsum#47] -Input [14]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, sum_sales#36, i_category#38, i_brand#39, cc_name#40, sum_sales#44, rn#43] +Output [9]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#20, sum_sales#17, sum_sales#29 AS psum#37, sum_sales#36 AS nsum#38] +Input [14]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#29, i_category#30, i_brand#31, cc_name#32, sum_sales#36, rn#35] (52) TakeOrderedAndProject -Input [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#46, nsum#47] -Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST], [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#46, nsum#47] +Input [9]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST], [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38] ===== Subqueries ===== @@ -304,6 +304,6 @@ Condition : ((((d_year#7 = 1999) OR ((d_year#7 = 1998) AND (d_moy#8 = 12))) OR ( (56) BroadcastExchange Input [3]: [d_date_sk#6, d_year#7, d_moy#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt index 6b2736ef4008f..7abc61f31e616 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt @@ -77,7 +77,7 @@ Condition : (isnotnull(cs_item_sk#5) AND isnotnull(cs_call_center_sk#4)) (7) BroadcastExchange Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] @@ -89,160 +89,160 @@ Output [5]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_ Input [7]: [i_item_sk#1, i_brand#2, i_category#3, cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] (10) ReusedExchange [Reuses operator id: 49] -Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_sold_date_sk#7] -Right keys [1]: [d_date_sk#10] +Right keys [1]: [d_date_sk#9] Join condition: None (12) Project [codegen id : 4] -Output [6]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#11, d_moy#12] -Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7, d_date_sk#10, d_year#11, d_moy#12] +Output [6]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#10, d_moy#11] +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7, d_date_sk#9, d_year#10, d_moy#11] (13) Scan parquet default.call_center -Output [2]: [cc_call_center_sk#13, cc_name#14] +Output [2]: [cc_call_center_sk#12, cc_name#13] Batched: true Location [not included in comparison]/{warehouse_dir}/call_center] PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [2]: [cc_call_center_sk#13, cc_name#14] +Input [2]: [cc_call_center_sk#12, cc_name#13] (15) Filter [codegen id : 3] -Input [2]: [cc_call_center_sk#13, cc_name#14] -Condition : (isnotnull(cc_call_center_sk#13) AND isnotnull(cc_name#14)) +Input [2]: [cc_call_center_sk#12, cc_name#13] +Condition : (isnotnull(cc_call_center_sk#12) AND isnotnull(cc_name#13)) (16) BroadcastExchange -Input [2]: [cc_call_center_sk#13, cc_name#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] +Input [2]: [cc_call_center_sk#12, cc_name#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_call_center_sk#4] -Right keys [1]: [cc_call_center_sk#13] +Right keys [1]: [cc_call_center_sk#12] Join condition: None (18) Project [codegen id : 4] -Output [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#11, d_moy#12, cc_name#14] -Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#11, d_moy#12, cc_call_center_sk#13, cc_name#14] +Output [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#10, d_moy#11, cc_name#13] +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#10, d_moy#11, cc_call_center_sk#12, cc_name#13] (19) HashAggregate [codegen id : 4] -Input [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#11, d_moy#12, cc_name#14] -Keys [5]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12] +Input [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#10, d_moy#11, cc_name#13] +Keys [5]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11] Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#6))] -Aggregate Attributes [1]: [sum#16] -Results [6]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum#17] +Aggregate Attributes [1]: [sum#14] +Results [6]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum#15] (20) Exchange -Input [6]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum#17] -Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [6]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum#15] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) HashAggregate [codegen id : 5] -Input [6]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum#17] -Keys [5]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12] +Input [6]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum#15] +Keys [5]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11] Functions [1]: [sum(UnscaledValue(cs_sales_price#6))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#6))#19] -Results [7]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#19,17,2) AS sum_sales#20, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#19,17,2) AS _w0#21] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#6))#16] +Results [7]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#16,17,2) AS sum_sales#17, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#16,17,2) AS _w0#18] (22) Exchange -Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, _w0#21] -Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#14, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [7]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#13, 5), ENSURE_REQUIREMENTS, [plan_id=4] (23) Sort [codegen id : 6] -Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, _w0#21] -Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#14 ASC NULLS FIRST, d_year#11 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST], false, 0 +Input [7]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#13 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], false, 0 (24) Window -Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, _w0#21] -Arguments: [rank(d_year#11, d_moy#12) windowspecdefinition(i_category#3, i_brand#2, cc_name#14, d_year#11 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#23], [i_category#3, i_brand#2, cc_name#14], [d_year#11 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST] +Input [7]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18] +Arguments: [rank(d_year#10, d_moy#11) windowspecdefinition(i_category#3, i_brand#2, cc_name#13, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#19], [i_category#3, i_brand#2, cc_name#13], [d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST] (25) Filter [codegen id : 7] -Input [8]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, _w0#21, rn#23] -Condition : (isnotnull(d_year#11) AND (d_year#11 = 1999)) +Input [8]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18, rn#19] +Condition : (isnotnull(d_year#10) AND (d_year#10 = 1999)) (26) Window -Input [8]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, _w0#21, rn#23] -Arguments: [avg(_w0#21) windowspecdefinition(i_category#3, i_brand#2, cc_name#14, d_year#11, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#24], [i_category#3, i_brand#2, cc_name#14, d_year#11] +Input [8]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18, rn#19] +Arguments: [avg(_w0#18) windowspecdefinition(i_category#3, i_brand#2, cc_name#13, d_year#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#20], [i_category#3, i_brand#2, cc_name#13, d_year#10] (27) Filter [codegen id : 22] -Input [9]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, _w0#21, rn#23, avg_monthly_sales#24] -Condition : ((isnotnull(avg_monthly_sales#24) AND (avg_monthly_sales#24 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) +Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] +Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) (28) Project [codegen id : 22] -Output [8]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, avg_monthly_sales#24, rn#23] -Input [9]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, _w0#21, rn#23, avg_monthly_sales#24] +Output [8]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, avg_monthly_sales#20, rn#19] +Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] (29) ReusedExchange [Reuses operator id: 20] -Output [6]: [i_category#25, i_brand#26, cc_name#27, d_year#28, d_moy#29, sum#30] +Output [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum#26] (30) HashAggregate [codegen id : 12] -Input [6]: [i_category#25, i_brand#26, cc_name#27, d_year#28, d_moy#29, sum#30] -Keys [5]: [i_category#25, i_brand#26, cc_name#27, d_year#28, d_moy#29] -Functions [1]: [sum(UnscaledValue(cs_sales_price#31))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#31))#19] -Results [6]: [i_category#25, i_brand#26, cc_name#27, d_year#28, d_moy#29, MakeDecimal(sum(UnscaledValue(cs_sales_price#31))#19,17,2) AS sum_sales#20] +Input [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum#26] +Keys [5]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25] +Functions [1]: [sum(UnscaledValue(cs_sales_price#27))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#27))#16] +Results [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, MakeDecimal(sum(UnscaledValue(cs_sales_price#27))#16,17,2) AS sum_sales#17] (31) Exchange -Input [6]: [i_category#25, i_brand#26, cc_name#27, d_year#28, d_moy#29, sum_sales#20] -Arguments: hashpartitioning(i_category#25, i_brand#26, cc_name#27, 5), ENSURE_REQUIREMENTS, [id=#32] +Input [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum_sales#17] +Arguments: hashpartitioning(i_category#21, i_brand#22, cc_name#23, 5), ENSURE_REQUIREMENTS, [plan_id=5] (32) Sort [codegen id : 13] -Input [6]: [i_category#25, i_brand#26, cc_name#27, d_year#28, d_moy#29, sum_sales#20] -Arguments: [i_category#25 ASC NULLS FIRST, i_brand#26 ASC NULLS FIRST, cc_name#27 ASC NULLS FIRST, d_year#28 ASC NULLS FIRST, d_moy#29 ASC NULLS FIRST], false, 0 +Input [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum_sales#17] +Arguments: [i_category#21 ASC NULLS FIRST, i_brand#22 ASC NULLS FIRST, cc_name#23 ASC NULLS FIRST, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST], false, 0 (33) Window -Input [6]: [i_category#25, i_brand#26, cc_name#27, d_year#28, d_moy#29, sum_sales#20] -Arguments: [rank(d_year#28, d_moy#29) windowspecdefinition(i_category#25, i_brand#26, cc_name#27, d_year#28 ASC NULLS FIRST, d_moy#29 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#33], [i_category#25, i_brand#26, cc_name#27], [d_year#28 ASC NULLS FIRST, d_moy#29 ASC NULLS FIRST] +Input [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum_sales#17] +Arguments: [rank(d_year#24, d_moy#25) windowspecdefinition(i_category#21, i_brand#22, cc_name#23, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#28], [i_category#21, i_brand#22, cc_name#23], [d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST] (34) Project [codegen id : 14] -Output [5]: [i_category#25, i_brand#26, cc_name#27, sum_sales#20 AS sum_sales#34, rn#33] -Input [7]: [i_category#25, i_brand#26, cc_name#27, d_year#28, d_moy#29, sum_sales#20, rn#33] +Output [5]: [i_category#21, i_brand#22, cc_name#23, sum_sales#17 AS sum_sales#29, rn#28] +Input [7]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum_sales#17, rn#28] (35) BroadcastExchange -Input [5]: [i_category#25, i_brand#26, cc_name#27, sum_sales#34, rn#33] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [id=#35] +Input [5]: [i_category#21, i_brand#22, cc_name#23, sum_sales#29, rn#28] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=6] (36) BroadcastHashJoin [codegen id : 22] -Left keys [4]: [i_category#3, i_brand#2, cc_name#14, rn#23] -Right keys [4]: [i_category#25, i_brand#26, cc_name#27, (rn#33 + 1)] +Left keys [4]: [i_category#3, i_brand#2, cc_name#13, rn#19] +Right keys [4]: [i_category#21, i_brand#22, cc_name#23, (rn#28 + 1)] Join condition: None (37) Project [codegen id : 22] -Output [9]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, avg_monthly_sales#24, rn#23, sum_sales#34] -Input [13]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, avg_monthly_sales#24, rn#23, i_category#25, i_brand#26, cc_name#27, sum_sales#34, rn#33] +Output [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#29] +Input [13]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, avg_monthly_sales#20, rn#19, i_category#21, i_brand#22, cc_name#23, sum_sales#29, rn#28] (38) ReusedExchange [Reuses operator id: 31] -Output [6]: [i_category#36, i_brand#37, cc_name#38, d_year#39, d_moy#40, sum_sales#20] +Output [6]: [i_category#30, i_brand#31, cc_name#32, d_year#33, d_moy#34, sum_sales#17] (39) Sort [codegen id : 20] -Input [6]: [i_category#36, i_brand#37, cc_name#38, d_year#39, d_moy#40, sum_sales#20] -Arguments: [i_category#36 ASC NULLS FIRST, i_brand#37 ASC NULLS FIRST, cc_name#38 ASC NULLS FIRST, d_year#39 ASC NULLS FIRST, d_moy#40 ASC NULLS FIRST], false, 0 +Input [6]: [i_category#30, i_brand#31, cc_name#32, d_year#33, d_moy#34, sum_sales#17] +Arguments: [i_category#30 ASC NULLS FIRST, i_brand#31 ASC NULLS FIRST, cc_name#32 ASC NULLS FIRST, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST], false, 0 (40) Window -Input [6]: [i_category#36, i_brand#37, cc_name#38, d_year#39, d_moy#40, sum_sales#20] -Arguments: [rank(d_year#39, d_moy#40) windowspecdefinition(i_category#36, i_brand#37, cc_name#38, d_year#39 ASC NULLS FIRST, d_moy#40 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#41], [i_category#36, i_brand#37, cc_name#38], [d_year#39 ASC NULLS FIRST, d_moy#40 ASC NULLS FIRST] +Input [6]: [i_category#30, i_brand#31, cc_name#32, d_year#33, d_moy#34, sum_sales#17] +Arguments: [rank(d_year#33, d_moy#34) windowspecdefinition(i_category#30, i_brand#31, cc_name#32, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#30, i_brand#31, cc_name#32], [d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST] (41) Project [codegen id : 21] -Output [5]: [i_category#36, i_brand#37, cc_name#38, sum_sales#20 AS sum_sales#42, rn#41] -Input [7]: [i_category#36, i_brand#37, cc_name#38, d_year#39, d_moy#40, sum_sales#20, rn#41] +Output [5]: [i_category#30, i_brand#31, cc_name#32, sum_sales#17 AS sum_sales#36, rn#35] +Input [7]: [i_category#30, i_brand#31, cc_name#32, d_year#33, d_moy#34, sum_sales#17, rn#35] (42) BroadcastExchange -Input [5]: [i_category#36, i_brand#37, cc_name#38, sum_sales#42, rn#41] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [id=#43] +Input [5]: [i_category#30, i_brand#31, cc_name#32, sum_sales#36, rn#35] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=7] (43) BroadcastHashJoin [codegen id : 22] -Left keys [4]: [i_category#3, i_brand#2, cc_name#14, rn#23] -Right keys [4]: [i_category#36, i_brand#37, cc_name#38, (rn#41 - 1)] +Left keys [4]: [i_category#3, i_brand#2, cc_name#13, rn#19] +Right keys [4]: [i_category#30, i_brand#31, cc_name#32, (rn#35 - 1)] Join condition: None (44) Project [codegen id : 22] -Output [9]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, avg_monthly_sales#24, sum_sales#20, sum_sales#34 AS psum#44, sum_sales#42 AS nsum#45] -Input [14]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, avg_monthly_sales#24, rn#23, sum_sales#34, i_category#36, i_brand#37, cc_name#38, sum_sales#42, rn#41] +Output [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#20, sum_sales#17, sum_sales#29 AS psum#37, sum_sales#36 AS nsum#38] +Input [14]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#29, i_category#30, i_brand#31, cc_name#32, sum_sales#36, rn#35] (45) TakeOrderedAndProject -Input [9]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, avg_monthly_sales#24, sum_sales#20, psum#44, nsum#45] -Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, cc_name#14 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, avg_monthly_sales#24, sum_sales#20, psum#44, nsum#45] +Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, cc_name#13 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38] ===== Subqueries ===== @@ -254,21 +254,21 @@ BroadcastExchange (49) (46) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] ReadSchema: struct (47) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] (48) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] -Condition : ((((d_year#11 = 1999) OR ((d_year#11 = 1998) AND (d_moy#12 = 12))) OR ((d_year#11 = 2000) AND (d_moy#12 = 1))) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) (49) BroadcastExchange -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#46] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/explain.txt index abbd29292b260..2f88e2378af8b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/explain.txt @@ -93,7 +93,7 @@ Condition : (isnotnull(i_item_sk#6) AND isnotnull(i_item_id#7)) (10) BroadcastExchange Input [2]: [i_item_sk#6, i_item_id#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_item_sk#1] @@ -108,175 +108,175 @@ Input [4]: [ss_item_sk#1, ss_ext_sales_price#2, i_item_sk#6, i_item_id#7] Input [2]: [ss_ext_sales_price#2, i_item_id#7] Keys [1]: [i_item_id#7] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#9] -Results [2]: [i_item_id#7, sum#10] +Aggregate Attributes [1]: [sum#8] +Results [2]: [i_item_id#7, sum#9] (14) Exchange -Input [2]: [i_item_id#7, sum#10] -Arguments: hashpartitioning(i_item_id#7, 5), ENSURE_REQUIREMENTS, [id=#11] +Input [2]: [i_item_id#7, sum#9] +Arguments: hashpartitioning(i_item_id#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 15] -Input [2]: [i_item_id#7, sum#10] +Input [2]: [i_item_id#7, sum#9] Keys [1]: [i_item_id#7] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#12] -Results [2]: [i_item_id#7 AS item_id#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS ss_item_rev#14] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#10] +Results [2]: [i_item_id#7 AS item_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#10,17,2) AS ss_item_rev#12] (16) Filter [codegen id : 15] -Input [2]: [item_id#13, ss_item_rev#14] -Condition : isnotnull(ss_item_rev#14) +Input [2]: [item_id#11, ss_item_rev#12] +Condition : isnotnull(ss_item_rev#12) (17) Scan parquet default.catalog_sales -Output [3]: [cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17] +Output [3]: [cs_item_sk#13, cs_ext_sales_price#14, cs_sold_date_sk#15] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#17), dynamicpruningexpression(cs_sold_date_sk#17 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(cs_sold_date_sk#15), dynamicpruningexpression(cs_sold_date_sk#15 IN dynamicpruning#4)] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (18) ColumnarToRow [codegen id : 8] -Input [3]: [cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17] +Input [3]: [cs_item_sk#13, cs_ext_sales_price#14, cs_sold_date_sk#15] (19) Filter [codegen id : 8] -Input [3]: [cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17] -Condition : isnotnull(cs_item_sk#15) +Input [3]: [cs_item_sk#13, cs_ext_sales_price#14, cs_sold_date_sk#15] +Condition : isnotnull(cs_item_sk#13) (20) ReusedExchange [Reuses operator id: 60] -Output [1]: [d_date_sk#18] +Output [1]: [d_date_sk#16] (21) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_sold_date_sk#17] -Right keys [1]: [d_date_sk#18] +Left keys [1]: [cs_sold_date_sk#15] +Right keys [1]: [d_date_sk#16] Join condition: None (22) Project [codegen id : 8] -Output [2]: [cs_item_sk#15, cs_ext_sales_price#16] -Input [4]: [cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17, d_date_sk#18] +Output [2]: [cs_item_sk#13, cs_ext_sales_price#14] +Input [4]: [cs_item_sk#13, cs_ext_sales_price#14, cs_sold_date_sk#15, d_date_sk#16] (23) ReusedExchange [Reuses operator id: 10] -Output [2]: [i_item_sk#19, i_item_id#20] +Output [2]: [i_item_sk#17, i_item_id#18] (24) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_item_sk#15] -Right keys [1]: [i_item_sk#19] +Left keys [1]: [cs_item_sk#13] +Right keys [1]: [i_item_sk#17] Join condition: None (25) Project [codegen id : 8] -Output [2]: [cs_ext_sales_price#16, i_item_id#20] -Input [4]: [cs_item_sk#15, cs_ext_sales_price#16, i_item_sk#19, i_item_id#20] +Output [2]: [cs_ext_sales_price#14, i_item_id#18] +Input [4]: [cs_item_sk#13, cs_ext_sales_price#14, i_item_sk#17, i_item_id#18] (26) HashAggregate [codegen id : 8] -Input [2]: [cs_ext_sales_price#16, i_item_id#20] -Keys [1]: [i_item_id#20] -Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#16))] -Aggregate Attributes [1]: [sum#21] -Results [2]: [i_item_id#20, sum#22] +Input [2]: [cs_ext_sales_price#14, i_item_id#18] +Keys [1]: [i_item_id#18] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#14))] +Aggregate Attributes [1]: [sum#19] +Results [2]: [i_item_id#18, sum#20] (27) Exchange -Input [2]: [i_item_id#20, sum#22] -Arguments: hashpartitioning(i_item_id#20, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [2]: [i_item_id#18, sum#20] +Arguments: hashpartitioning(i_item_id#18, 5), ENSURE_REQUIREMENTS, [plan_id=3] (28) HashAggregate [codegen id : 9] -Input [2]: [i_item_id#20, sum#22] -Keys [1]: [i_item_id#20] -Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#16))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#16))#24] -Results [2]: [i_item_id#20 AS item_id#25, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#16))#24,17,2) AS cs_item_rev#26] +Input [2]: [i_item_id#18, sum#20] +Keys [1]: [i_item_id#18] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#14))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#14))#21] +Results [2]: [i_item_id#18 AS item_id#22, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#14))#21,17,2) AS cs_item_rev#23] (29) Filter [codegen id : 9] -Input [2]: [item_id#25, cs_item_rev#26] -Condition : isnotnull(cs_item_rev#26) +Input [2]: [item_id#22, cs_item_rev#23] +Condition : isnotnull(cs_item_rev#23) (30) BroadcastExchange -Input [2]: [item_id#25, cs_item_rev#26] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#27] +Input [2]: [item_id#22, cs_item_rev#23] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=4] (31) BroadcastHashJoin [codegen id : 15] -Left keys [1]: [item_id#13] -Right keys [1]: [item_id#25] -Join condition: ((((cast(ss_item_rev#14 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#26)), DecimalType(19,3))) AND (cast(ss_item_rev#14 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#26)), DecimalType(20,3)))) AND (cast(cs_item_rev#26 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#14)), DecimalType(19,3)))) AND (cast(cs_item_rev#26 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#14)), DecimalType(20,3)))) +Left keys [1]: [item_id#11] +Right keys [1]: [item_id#22] +Join condition: ((((cast(ss_item_rev#12 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#23)), DecimalType(19,3))) AND (cast(ss_item_rev#12 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#23)), DecimalType(20,3)))) AND (cast(cs_item_rev#23 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#12)), DecimalType(19,3)))) AND (cast(cs_item_rev#23 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#12)), DecimalType(20,3)))) (32) Project [codegen id : 15] -Output [3]: [item_id#13, ss_item_rev#14, cs_item_rev#26] -Input [4]: [item_id#13, ss_item_rev#14, item_id#25, cs_item_rev#26] +Output [3]: [item_id#11, ss_item_rev#12, cs_item_rev#23] +Input [4]: [item_id#11, ss_item_rev#12, item_id#22, cs_item_rev#23] (33) Scan parquet default.web_sales -Output [3]: [ws_item_sk#28, ws_ext_sales_price#29, ws_sold_date_sk#30] +Output [3]: [ws_item_sk#24, ws_ext_sales_price#25, ws_sold_date_sk#26] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#30), dynamicpruningexpression(ws_sold_date_sk#30 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(ws_sold_date_sk#26), dynamicpruningexpression(ws_sold_date_sk#26 IN dynamicpruning#4)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (34) ColumnarToRow [codegen id : 13] -Input [3]: [ws_item_sk#28, ws_ext_sales_price#29, ws_sold_date_sk#30] +Input [3]: [ws_item_sk#24, ws_ext_sales_price#25, ws_sold_date_sk#26] (35) Filter [codegen id : 13] -Input [3]: [ws_item_sk#28, ws_ext_sales_price#29, ws_sold_date_sk#30] -Condition : isnotnull(ws_item_sk#28) +Input [3]: [ws_item_sk#24, ws_ext_sales_price#25, ws_sold_date_sk#26] +Condition : isnotnull(ws_item_sk#24) (36) ReusedExchange [Reuses operator id: 60] -Output [1]: [d_date_sk#31] +Output [1]: [d_date_sk#27] (37) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ws_sold_date_sk#30] -Right keys [1]: [d_date_sk#31] +Left keys [1]: [ws_sold_date_sk#26] +Right keys [1]: [d_date_sk#27] Join condition: None (38) Project [codegen id : 13] -Output [2]: [ws_item_sk#28, ws_ext_sales_price#29] -Input [4]: [ws_item_sk#28, ws_ext_sales_price#29, ws_sold_date_sk#30, d_date_sk#31] +Output [2]: [ws_item_sk#24, ws_ext_sales_price#25] +Input [4]: [ws_item_sk#24, ws_ext_sales_price#25, ws_sold_date_sk#26, d_date_sk#27] (39) ReusedExchange [Reuses operator id: 10] -Output [2]: [i_item_sk#32, i_item_id#33] +Output [2]: [i_item_sk#28, i_item_id#29] (40) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ws_item_sk#28] -Right keys [1]: [i_item_sk#32] +Left keys [1]: [ws_item_sk#24] +Right keys [1]: [i_item_sk#28] Join condition: None (41) Project [codegen id : 13] -Output [2]: [ws_ext_sales_price#29, i_item_id#33] -Input [4]: [ws_item_sk#28, ws_ext_sales_price#29, i_item_sk#32, i_item_id#33] +Output [2]: [ws_ext_sales_price#25, i_item_id#29] +Input [4]: [ws_item_sk#24, ws_ext_sales_price#25, i_item_sk#28, i_item_id#29] (42) HashAggregate [codegen id : 13] -Input [2]: [ws_ext_sales_price#29, i_item_id#33] -Keys [1]: [i_item_id#33] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#29))] -Aggregate Attributes [1]: [sum#34] -Results [2]: [i_item_id#33, sum#35] +Input [2]: [ws_ext_sales_price#25, i_item_id#29] +Keys [1]: [i_item_id#29] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#25))] +Aggregate Attributes [1]: [sum#30] +Results [2]: [i_item_id#29, sum#31] (43) Exchange -Input [2]: [i_item_id#33, sum#35] -Arguments: hashpartitioning(i_item_id#33, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [2]: [i_item_id#29, sum#31] +Arguments: hashpartitioning(i_item_id#29, 5), ENSURE_REQUIREMENTS, [plan_id=5] (44) HashAggregate [codegen id : 14] -Input [2]: [i_item_id#33, sum#35] -Keys [1]: [i_item_id#33] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#29))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#29))#37] -Results [2]: [i_item_id#33 AS item_id#38, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#29))#37,17,2) AS ws_item_rev#39] +Input [2]: [i_item_id#29, sum#31] +Keys [1]: [i_item_id#29] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#25))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#25))#32] +Results [2]: [i_item_id#29 AS item_id#33, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#25))#32,17,2) AS ws_item_rev#34] (45) Filter [codegen id : 14] -Input [2]: [item_id#38, ws_item_rev#39] -Condition : isnotnull(ws_item_rev#39) +Input [2]: [item_id#33, ws_item_rev#34] +Condition : isnotnull(ws_item_rev#34) (46) BroadcastExchange -Input [2]: [item_id#38, ws_item_rev#39] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40] +Input [2]: [item_id#33, ws_item_rev#34] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] (47) BroadcastHashJoin [codegen id : 15] -Left keys [1]: [item_id#13] -Right keys [1]: [item_id#38] -Join condition: ((((((((cast(ss_item_rev#14 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#39)), DecimalType(19,3))) AND (cast(ss_item_rev#14 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#39)), DecimalType(20,3)))) AND (cast(cs_item_rev#26 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#39)), DecimalType(19,3)))) AND (cast(cs_item_rev#26 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#39)), DecimalType(20,3)))) AND (cast(ws_item_rev#39 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#14)), DecimalType(19,3)))) AND (cast(ws_item_rev#39 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#14)), DecimalType(20,3)))) AND (cast(ws_item_rev#39 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#26)), DecimalType(19,3)))) AND (cast(ws_item_rev#39 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#26)), DecimalType(20,3)))) +Left keys [1]: [item_id#11] +Right keys [1]: [item_id#33] +Join condition: ((((((((cast(ss_item_rev#12 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#34)), DecimalType(19,3))) AND (cast(ss_item_rev#12 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#34)), DecimalType(20,3)))) AND (cast(cs_item_rev#23 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#34)), DecimalType(19,3)))) AND (cast(cs_item_rev#23 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#34)), DecimalType(20,3)))) AND (cast(ws_item_rev#34 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#12)), DecimalType(19,3)))) AND (cast(ws_item_rev#34 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#12)), DecimalType(20,3)))) AND (cast(ws_item_rev#34 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#23)), DecimalType(19,3)))) AND (cast(ws_item_rev#34 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#23)), DecimalType(20,3)))) (48) Project [codegen id : 15] -Output [8]: [item_id#13, ss_item_rev#14, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ss_dev#41, cs_item_rev#26, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#26 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS cs_dev#42, ws_item_rev#39, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#39 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ws_dev#43, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2))) / 3.00), DecimalType(23,6)) AS average#44] -Input [5]: [item_id#13, ss_item_rev#14, cs_item_rev#26, item_id#38, ws_item_rev#39] +Output [8]: [item_id#11, ss_item_rev#12, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(18,2))) + promote_precision(cast(cs_item_rev#23 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#34 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ss_dev#35, cs_item_rev#23, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#23 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(18,2))) + promote_precision(cast(cs_item_rev#23 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#34 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS cs_dev#36, ws_item_rev#34, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#34 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(18,2))) + promote_precision(cast(cs_item_rev#23 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#34 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ws_dev#37, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(18,2))) + promote_precision(cast(cs_item_rev#23 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#34 as decimal(19,2)))), DecimalType(19,2))) / 3.00), DecimalType(23,6)) AS average#38] +Input [5]: [item_id#11, ss_item_rev#12, cs_item_rev#23, item_id#33, ws_item_rev#34] (49) TakeOrderedAndProject -Input [8]: [item_id#13, ss_item_rev#14, ss_dev#41, cs_item_rev#26, cs_dev#42, ws_item_rev#39, ws_dev#43, average#44] -Arguments: 100, [item_id#13 ASC NULLS FIRST, ss_item_rev#14 ASC NULLS FIRST], [item_id#13, ss_item_rev#14, ss_dev#41, cs_item_rev#26, cs_dev#42, ws_item_rev#39, ws_dev#43, average#44] +Input [8]: [item_id#11, ss_item_rev#12, ss_dev#35, cs_item_rev#23, cs_dev#36, ws_item_rev#34, ws_dev#37, average#38] +Arguments: 100, [item_id#11 ASC NULLS FIRST, ss_item_rev#12 ASC NULLS FIRST], [item_id#11, ss_item_rev#12, ss_dev#35, cs_item_rev#23, cs_dev#36, ws_item_rev#34, ws_dev#37, average#38] ===== Subqueries ===== @@ -295,55 +295,55 @@ BroadcastExchange (60) (50) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_date#45] +Output [2]: [d_date_sk#5, d_date#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date_sk)] ReadSchema: struct (51) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#5, d_date#45] +Input [2]: [d_date_sk#5, d_date#39] (52) Filter [codegen id : 2] -Input [2]: [d_date_sk#5, d_date#45] +Input [2]: [d_date_sk#5, d_date#39] Condition : isnotnull(d_date_sk#5) (53) Scan parquet default.date_dim -Output [2]: [d_date#46, d_week_seq#47] +Output [2]: [d_date#40, d_week_seq#41] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_week_seq)] ReadSchema: struct (54) ColumnarToRow [codegen id : 1] -Input [2]: [d_date#46, d_week_seq#47] +Input [2]: [d_date#40, d_week_seq#41] (55) Filter [codegen id : 1] -Input [2]: [d_date#46, d_week_seq#47] -Condition : (isnotnull(d_week_seq#47) AND (d_week_seq#47 = Subquery scalar-subquery#48, [id=#49])) +Input [2]: [d_date#40, d_week_seq#41] +Condition : (isnotnull(d_week_seq#41) AND (d_week_seq#41 = Subquery scalar-subquery#42, [id=#43])) (56) Project [codegen id : 1] -Output [1]: [d_date#46] -Input [2]: [d_date#46, d_week_seq#47] +Output [1]: [d_date#40] +Input [2]: [d_date#40, d_week_seq#41] (57) BroadcastExchange -Input [1]: [d_date#46] -Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#50] +Input [1]: [d_date#40] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [plan_id=7] (58) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [d_date#45] -Right keys [1]: [d_date#46] +Left keys [1]: [d_date#39] +Right keys [1]: [d_date#40] Join condition: None (59) Project [codegen id : 2] Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_date#45] +Input [2]: [d_date_sk#5, d_date#39] (60) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#51] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] -Subquery:2 Hosting operator id = 55 Hosting Expression = Subquery scalar-subquery#48, [id=#49] +Subquery:2 Hosting operator id = 55 Hosting Expression = Subquery scalar-subquery#42, [id=#43] * Project (64) +- * Filter (63) +- * ColumnarToRow (62) @@ -351,25 +351,25 @@ Subquery:2 Hosting operator id = 55 Hosting Expression = Subquery scalar-subquer (61) Scan parquet default.date_dim -Output [2]: [d_date#52, d_week_seq#53] +Output [2]: [d_date#44, d_week_seq#45] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), EqualTo(d_date,2000-01-03)] ReadSchema: struct (62) ColumnarToRow [codegen id : 1] -Input [2]: [d_date#52, d_week_seq#53] +Input [2]: [d_date#44, d_week_seq#45] (63) Filter [codegen id : 1] -Input [2]: [d_date#52, d_week_seq#53] -Condition : (isnotnull(d_date#52) AND (d_date#52 = 2000-01-03)) +Input [2]: [d_date#44, d_week_seq#45] +Condition : (isnotnull(d_date#44) AND (d_date#44 = 2000-01-03)) (64) Project [codegen id : 1] -Output [1]: [d_week_seq#53] -Input [2]: [d_date#52, d_week_seq#53] +Output [1]: [d_week_seq#45] +Input [2]: [d_date#44, d_week_seq#45] -Subquery:3 Hosting operator id = 17 Hosting Expression = cs_sold_date_sk#17 IN dynamicpruning#4 +Subquery:3 Hosting operator id = 17 Hosting Expression = cs_sold_date_sk#15 IN dynamicpruning#4 -Subquery:4 Hosting operator id = 33 Hosting Expression = ws_sold_date_sk#30 IN dynamicpruning#4 +Subquery:4 Hosting operator id = 33 Hosting Expression = ws_sold_date_sk#26 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt index 47651c0f92dca..4f2b027a0b088 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt @@ -81,7 +81,7 @@ Condition : (isnotnull(i_item_sk#5) AND isnotnull(i_item_id#6)) (7) BroadcastExchange Input [2]: [i_item_sk#5, i_item_id#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_item_sk#1] @@ -93,190 +93,190 @@ Output [3]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#6] Input [5]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#5, i_item_id#6] (10) ReusedExchange [Reuses operator id: 60] -Output [1]: [d_date_sk#8] +Output [1]: [d_date_sk#7] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_sold_date_sk#3] -Right keys [1]: [d_date_sk#8] +Right keys [1]: [d_date_sk#7] Join condition: None (12) Project [codegen id : 4] Output [2]: [ss_ext_sales_price#2, i_item_id#6] -Input [4]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#6, d_date_sk#8] +Input [4]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#6, d_date_sk#7] (13) HashAggregate [codegen id : 4] Input [2]: [ss_ext_sales_price#2, i_item_id#6] Keys [1]: [i_item_id#6] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#9] -Results [2]: [i_item_id#6, sum#10] +Aggregate Attributes [1]: [sum#8] +Results [2]: [i_item_id#6, sum#9] (14) Exchange -Input [2]: [i_item_id#6, sum#10] -Arguments: hashpartitioning(i_item_id#6, 5), ENSURE_REQUIREMENTS, [id=#11] +Input [2]: [i_item_id#6, sum#9] +Arguments: hashpartitioning(i_item_id#6, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 15] -Input [2]: [i_item_id#6, sum#10] +Input [2]: [i_item_id#6, sum#9] Keys [1]: [i_item_id#6] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#12] -Results [2]: [i_item_id#6 AS item_id#13, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS ss_item_rev#14] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#10] +Results [2]: [i_item_id#6 AS item_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#10,17,2) AS ss_item_rev#12] (16) Filter [codegen id : 15] -Input [2]: [item_id#13, ss_item_rev#14] -Condition : isnotnull(ss_item_rev#14) +Input [2]: [item_id#11, ss_item_rev#12] +Condition : isnotnull(ss_item_rev#12) (17) Scan parquet default.catalog_sales -Output [3]: [cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17] +Output [3]: [cs_item_sk#13, cs_ext_sales_price#14, cs_sold_date_sk#15] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#17), dynamicpruningexpression(cs_sold_date_sk#17 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(cs_sold_date_sk#15), dynamicpruningexpression(cs_sold_date_sk#15 IN dynamicpruning#4)] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (18) ColumnarToRow [codegen id : 8] -Input [3]: [cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17] +Input [3]: [cs_item_sk#13, cs_ext_sales_price#14, cs_sold_date_sk#15] (19) Filter [codegen id : 8] -Input [3]: [cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17] -Condition : isnotnull(cs_item_sk#15) +Input [3]: [cs_item_sk#13, cs_ext_sales_price#14, cs_sold_date_sk#15] +Condition : isnotnull(cs_item_sk#13) (20) ReusedExchange [Reuses operator id: 7] -Output [2]: [i_item_sk#18, i_item_id#19] +Output [2]: [i_item_sk#16, i_item_id#17] (21) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_item_sk#15] -Right keys [1]: [i_item_sk#18] +Left keys [1]: [cs_item_sk#13] +Right keys [1]: [i_item_sk#16] Join condition: None (22) Project [codegen id : 8] -Output [3]: [cs_ext_sales_price#16, cs_sold_date_sk#17, i_item_id#19] -Input [5]: [cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17, i_item_sk#18, i_item_id#19] +Output [3]: [cs_ext_sales_price#14, cs_sold_date_sk#15, i_item_id#17] +Input [5]: [cs_item_sk#13, cs_ext_sales_price#14, cs_sold_date_sk#15, i_item_sk#16, i_item_id#17] (23) ReusedExchange [Reuses operator id: 60] -Output [1]: [d_date_sk#20] +Output [1]: [d_date_sk#18] (24) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_sold_date_sk#17] -Right keys [1]: [d_date_sk#20] +Left keys [1]: [cs_sold_date_sk#15] +Right keys [1]: [d_date_sk#18] Join condition: None (25) Project [codegen id : 8] -Output [2]: [cs_ext_sales_price#16, i_item_id#19] -Input [4]: [cs_ext_sales_price#16, cs_sold_date_sk#17, i_item_id#19, d_date_sk#20] +Output [2]: [cs_ext_sales_price#14, i_item_id#17] +Input [4]: [cs_ext_sales_price#14, cs_sold_date_sk#15, i_item_id#17, d_date_sk#18] (26) HashAggregate [codegen id : 8] -Input [2]: [cs_ext_sales_price#16, i_item_id#19] -Keys [1]: [i_item_id#19] -Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#16))] -Aggregate Attributes [1]: [sum#21] -Results [2]: [i_item_id#19, sum#22] +Input [2]: [cs_ext_sales_price#14, i_item_id#17] +Keys [1]: [i_item_id#17] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#14))] +Aggregate Attributes [1]: [sum#19] +Results [2]: [i_item_id#17, sum#20] (27) Exchange -Input [2]: [i_item_id#19, sum#22] -Arguments: hashpartitioning(i_item_id#19, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [2]: [i_item_id#17, sum#20] +Arguments: hashpartitioning(i_item_id#17, 5), ENSURE_REQUIREMENTS, [plan_id=3] (28) HashAggregate [codegen id : 9] -Input [2]: [i_item_id#19, sum#22] -Keys [1]: [i_item_id#19] -Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#16))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#16))#24] -Results [2]: [i_item_id#19 AS item_id#25, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#16))#24,17,2) AS cs_item_rev#26] +Input [2]: [i_item_id#17, sum#20] +Keys [1]: [i_item_id#17] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#14))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#14))#21] +Results [2]: [i_item_id#17 AS item_id#22, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#14))#21,17,2) AS cs_item_rev#23] (29) Filter [codegen id : 9] -Input [2]: [item_id#25, cs_item_rev#26] -Condition : isnotnull(cs_item_rev#26) +Input [2]: [item_id#22, cs_item_rev#23] +Condition : isnotnull(cs_item_rev#23) (30) BroadcastExchange -Input [2]: [item_id#25, cs_item_rev#26] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#27] +Input [2]: [item_id#22, cs_item_rev#23] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=4] (31) BroadcastHashJoin [codegen id : 15] -Left keys [1]: [item_id#13] -Right keys [1]: [item_id#25] -Join condition: ((((cast(ss_item_rev#14 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#26)), DecimalType(19,3))) AND (cast(ss_item_rev#14 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#26)), DecimalType(20,3)))) AND (cast(cs_item_rev#26 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#14)), DecimalType(19,3)))) AND (cast(cs_item_rev#26 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#14)), DecimalType(20,3)))) +Left keys [1]: [item_id#11] +Right keys [1]: [item_id#22] +Join condition: ((((cast(ss_item_rev#12 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#23)), DecimalType(19,3))) AND (cast(ss_item_rev#12 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#23)), DecimalType(20,3)))) AND (cast(cs_item_rev#23 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#12)), DecimalType(19,3)))) AND (cast(cs_item_rev#23 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#12)), DecimalType(20,3)))) (32) Project [codegen id : 15] -Output [3]: [item_id#13, ss_item_rev#14, cs_item_rev#26] -Input [4]: [item_id#13, ss_item_rev#14, item_id#25, cs_item_rev#26] +Output [3]: [item_id#11, ss_item_rev#12, cs_item_rev#23] +Input [4]: [item_id#11, ss_item_rev#12, item_id#22, cs_item_rev#23] (33) Scan parquet default.web_sales -Output [3]: [ws_item_sk#28, ws_ext_sales_price#29, ws_sold_date_sk#30] +Output [3]: [ws_item_sk#24, ws_ext_sales_price#25, ws_sold_date_sk#26] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#30), dynamicpruningexpression(ws_sold_date_sk#30 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(ws_sold_date_sk#26), dynamicpruningexpression(ws_sold_date_sk#26 IN dynamicpruning#4)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (34) ColumnarToRow [codegen id : 13] -Input [3]: [ws_item_sk#28, ws_ext_sales_price#29, ws_sold_date_sk#30] +Input [3]: [ws_item_sk#24, ws_ext_sales_price#25, ws_sold_date_sk#26] (35) Filter [codegen id : 13] -Input [3]: [ws_item_sk#28, ws_ext_sales_price#29, ws_sold_date_sk#30] -Condition : isnotnull(ws_item_sk#28) +Input [3]: [ws_item_sk#24, ws_ext_sales_price#25, ws_sold_date_sk#26] +Condition : isnotnull(ws_item_sk#24) (36) ReusedExchange [Reuses operator id: 7] -Output [2]: [i_item_sk#31, i_item_id#32] +Output [2]: [i_item_sk#27, i_item_id#28] (37) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ws_item_sk#28] -Right keys [1]: [i_item_sk#31] +Left keys [1]: [ws_item_sk#24] +Right keys [1]: [i_item_sk#27] Join condition: None (38) Project [codegen id : 13] -Output [3]: [ws_ext_sales_price#29, ws_sold_date_sk#30, i_item_id#32] -Input [5]: [ws_item_sk#28, ws_ext_sales_price#29, ws_sold_date_sk#30, i_item_sk#31, i_item_id#32] +Output [3]: [ws_ext_sales_price#25, ws_sold_date_sk#26, i_item_id#28] +Input [5]: [ws_item_sk#24, ws_ext_sales_price#25, ws_sold_date_sk#26, i_item_sk#27, i_item_id#28] (39) ReusedExchange [Reuses operator id: 60] -Output [1]: [d_date_sk#33] +Output [1]: [d_date_sk#29] (40) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ws_sold_date_sk#30] -Right keys [1]: [d_date_sk#33] +Left keys [1]: [ws_sold_date_sk#26] +Right keys [1]: [d_date_sk#29] Join condition: None (41) Project [codegen id : 13] -Output [2]: [ws_ext_sales_price#29, i_item_id#32] -Input [4]: [ws_ext_sales_price#29, ws_sold_date_sk#30, i_item_id#32, d_date_sk#33] +Output [2]: [ws_ext_sales_price#25, i_item_id#28] +Input [4]: [ws_ext_sales_price#25, ws_sold_date_sk#26, i_item_id#28, d_date_sk#29] (42) HashAggregate [codegen id : 13] -Input [2]: [ws_ext_sales_price#29, i_item_id#32] -Keys [1]: [i_item_id#32] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#29))] -Aggregate Attributes [1]: [sum#34] -Results [2]: [i_item_id#32, sum#35] +Input [2]: [ws_ext_sales_price#25, i_item_id#28] +Keys [1]: [i_item_id#28] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#25))] +Aggregate Attributes [1]: [sum#30] +Results [2]: [i_item_id#28, sum#31] (43) Exchange -Input [2]: [i_item_id#32, sum#35] -Arguments: hashpartitioning(i_item_id#32, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [2]: [i_item_id#28, sum#31] +Arguments: hashpartitioning(i_item_id#28, 5), ENSURE_REQUIREMENTS, [plan_id=5] (44) HashAggregate [codegen id : 14] -Input [2]: [i_item_id#32, sum#35] -Keys [1]: [i_item_id#32] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#29))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#29))#37] -Results [2]: [i_item_id#32 AS item_id#38, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#29))#37,17,2) AS ws_item_rev#39] +Input [2]: [i_item_id#28, sum#31] +Keys [1]: [i_item_id#28] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#25))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#25))#32] +Results [2]: [i_item_id#28 AS item_id#33, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#25))#32,17,2) AS ws_item_rev#34] (45) Filter [codegen id : 14] -Input [2]: [item_id#38, ws_item_rev#39] -Condition : isnotnull(ws_item_rev#39) +Input [2]: [item_id#33, ws_item_rev#34] +Condition : isnotnull(ws_item_rev#34) (46) BroadcastExchange -Input [2]: [item_id#38, ws_item_rev#39] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40] +Input [2]: [item_id#33, ws_item_rev#34] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] (47) BroadcastHashJoin [codegen id : 15] -Left keys [1]: [item_id#13] -Right keys [1]: [item_id#38] -Join condition: ((((((((cast(ss_item_rev#14 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#39)), DecimalType(19,3))) AND (cast(ss_item_rev#14 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#39)), DecimalType(20,3)))) AND (cast(cs_item_rev#26 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#39)), DecimalType(19,3)))) AND (cast(cs_item_rev#26 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#39)), DecimalType(20,3)))) AND (cast(ws_item_rev#39 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#14)), DecimalType(19,3)))) AND (cast(ws_item_rev#39 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#14)), DecimalType(20,3)))) AND (cast(ws_item_rev#39 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#26)), DecimalType(19,3)))) AND (cast(ws_item_rev#39 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#26)), DecimalType(20,3)))) +Left keys [1]: [item_id#11] +Right keys [1]: [item_id#33] +Join condition: ((((((((cast(ss_item_rev#12 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#34)), DecimalType(19,3))) AND (cast(ss_item_rev#12 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#34)), DecimalType(20,3)))) AND (cast(cs_item_rev#23 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#34)), DecimalType(19,3)))) AND (cast(cs_item_rev#23 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#34)), DecimalType(20,3)))) AND (cast(ws_item_rev#34 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#12)), DecimalType(19,3)))) AND (cast(ws_item_rev#34 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#12)), DecimalType(20,3)))) AND (cast(ws_item_rev#34 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#23)), DecimalType(19,3)))) AND (cast(ws_item_rev#34 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#23)), DecimalType(20,3)))) (48) Project [codegen id : 15] -Output [8]: [item_id#13, ss_item_rev#14, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ss_dev#41, cs_item_rev#26, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#26 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS cs_dev#42, ws_item_rev#39, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#39 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ws_dev#43, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#14 as decimal(18,2))) + promote_precision(cast(cs_item_rev#26 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#39 as decimal(19,2)))), DecimalType(19,2))) / 3.00), DecimalType(23,6)) AS average#44] -Input [5]: [item_id#13, ss_item_rev#14, cs_item_rev#26, item_id#38, ws_item_rev#39] +Output [8]: [item_id#11, ss_item_rev#12, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(18,2))) + promote_precision(cast(cs_item_rev#23 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#34 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ss_dev#35, cs_item_rev#23, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#23 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(18,2))) + promote_precision(cast(cs_item_rev#23 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#34 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS cs_dev#36, ws_item_rev#34, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#34 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(18,2))) + promote_precision(cast(cs_item_rev#23 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#34 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ws_dev#37, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(18,2))) + promote_precision(cast(cs_item_rev#23 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#34 as decimal(19,2)))), DecimalType(19,2))) / 3.00), DecimalType(23,6)) AS average#38] +Input [5]: [item_id#11, ss_item_rev#12, cs_item_rev#23, item_id#33, ws_item_rev#34] (49) TakeOrderedAndProject -Input [8]: [item_id#13, ss_item_rev#14, ss_dev#41, cs_item_rev#26, cs_dev#42, ws_item_rev#39, ws_dev#43, average#44] -Arguments: 100, [item_id#13 ASC NULLS FIRST, ss_item_rev#14 ASC NULLS FIRST], [item_id#13, ss_item_rev#14, ss_dev#41, cs_item_rev#26, cs_dev#42, ws_item_rev#39, ws_dev#43, average#44] +Input [8]: [item_id#11, ss_item_rev#12, ss_dev#35, cs_item_rev#23, cs_dev#36, ws_item_rev#34, ws_dev#37, average#38] +Arguments: 100, [item_id#11 ASC NULLS FIRST, ss_item_rev#12 ASC NULLS FIRST], [item_id#11, ss_item_rev#12, ss_dev#35, cs_item_rev#23, cs_dev#36, ws_item_rev#34, ws_dev#37, average#38] ===== Subqueries ===== @@ -295,55 +295,55 @@ BroadcastExchange (60) (50) Scan parquet default.date_dim -Output [2]: [d_date_sk#8, d_date#45] +Output [2]: [d_date_sk#7, d_date#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date_sk)] ReadSchema: struct (51) ColumnarToRow [codegen id : 2] -Input [2]: [d_date_sk#8, d_date#45] +Input [2]: [d_date_sk#7, d_date#39] (52) Filter [codegen id : 2] -Input [2]: [d_date_sk#8, d_date#45] -Condition : isnotnull(d_date_sk#8) +Input [2]: [d_date_sk#7, d_date#39] +Condition : isnotnull(d_date_sk#7) (53) Scan parquet default.date_dim -Output [2]: [d_date#46, d_week_seq#47] +Output [2]: [d_date#40, d_week_seq#41] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_week_seq)] ReadSchema: struct (54) ColumnarToRow [codegen id : 1] -Input [2]: [d_date#46, d_week_seq#47] +Input [2]: [d_date#40, d_week_seq#41] (55) Filter [codegen id : 1] -Input [2]: [d_date#46, d_week_seq#47] -Condition : (isnotnull(d_week_seq#47) AND (d_week_seq#47 = Subquery scalar-subquery#48, [id=#49])) +Input [2]: [d_date#40, d_week_seq#41] +Condition : (isnotnull(d_week_seq#41) AND (d_week_seq#41 = Subquery scalar-subquery#42, [id=#43])) (56) Project [codegen id : 1] -Output [1]: [d_date#46] -Input [2]: [d_date#46, d_week_seq#47] +Output [1]: [d_date#40] +Input [2]: [d_date#40, d_week_seq#41] (57) BroadcastExchange -Input [1]: [d_date#46] -Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#50] +Input [1]: [d_date#40] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [plan_id=7] (58) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [d_date#45] -Right keys [1]: [d_date#46] +Left keys [1]: [d_date#39] +Right keys [1]: [d_date#40] Join condition: None (59) Project [codegen id : 2] -Output [1]: [d_date_sk#8] -Input [2]: [d_date_sk#8, d_date#45] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#39] (60) BroadcastExchange -Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#51] +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] -Subquery:2 Hosting operator id = 55 Hosting Expression = Subquery scalar-subquery#48, [id=#49] +Subquery:2 Hosting operator id = 55 Hosting Expression = Subquery scalar-subquery#42, [id=#43] * Project (64) +- * Filter (63) +- * ColumnarToRow (62) @@ -351,25 +351,25 @@ Subquery:2 Hosting operator id = 55 Hosting Expression = Subquery scalar-subquer (61) Scan parquet default.date_dim -Output [2]: [d_date#52, d_week_seq#53] +Output [2]: [d_date#44, d_week_seq#45] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), EqualTo(d_date,2000-01-03)] ReadSchema: struct (62) ColumnarToRow [codegen id : 1] -Input [2]: [d_date#52, d_week_seq#53] +Input [2]: [d_date#44, d_week_seq#45] (63) Filter [codegen id : 1] -Input [2]: [d_date#52, d_week_seq#53] -Condition : (isnotnull(d_date#52) AND (d_date#52 = 2000-01-03)) +Input [2]: [d_date#44, d_week_seq#45] +Condition : (isnotnull(d_date#44) AND (d_date#44 = 2000-01-03)) (64) Project [codegen id : 1] -Output [1]: [d_week_seq#53] -Input [2]: [d_date#52, d_week_seq#53] +Output [1]: [d_week_seq#45] +Input [2]: [d_date#44, d_week_seq#45] -Subquery:3 Hosting operator id = 17 Hosting Expression = cs_sold_date_sk#17 IN dynamicpruning#4 +Subquery:3 Hosting operator id = 17 Hosting Expression = cs_sold_date_sk#15 IN dynamicpruning#4 -Subquery:4 Hosting operator id = 33 Hosting Expression = ws_sold_date_sk#30 IN dynamicpruning#4 +Subquery:4 Hosting operator id = 33 Hosting Expression = ws_sold_date_sk#26 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt index 1e9c240705bd8..e9788fb36a43c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt @@ -76,7 +76,7 @@ Condition : (isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) (7) BroadcastExchange Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 2] Left keys [1]: [ss_sold_date_sk#3] @@ -91,160 +91,160 @@ Input [6]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_w Input [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6] Keys [2]: [d_week_seq#5, ss_store_sk#1] Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] -Aggregate Attributes [7]: [sum#8, sum#9, sum#10, sum#11, sum#12, sum#13, sum#14] -Results [9]: [d_week_seq#5, ss_store_sk#1, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Aggregate Attributes [7]: [sum#7, sum#8, sum#9, sum#10, sum#11, sum#12, sum#13] +Results [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] (11) Exchange -Input [9]: [d_week_seq#5, ss_store_sk#1, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] -Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) HashAggregate [codegen id : 10] -Input [9]: [d_week_seq#5, ss_store_sk#1, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] Keys [2]: [d_week_seq#5, ss_store_sk#1] Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29] -Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#25,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29,17,2) AS sat_sales#36] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27] +Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23,17,2) AS tue_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34] (13) Scan parquet default.store -Output [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Output [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] (15) Filter [codegen id : 3] -Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] -Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38)) +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] +Condition : (isnotnull(s_store_sk#35) AND isnotnull(s_store_id#36)) (16) BroadcastExchange -Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40] +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (17) BroadcastHashJoin [codegen id : 10] Left keys [1]: [ss_store_sk#1] -Right keys [1]: [s_store_sk#37] +Right keys [1]: [s_store_sk#35] Join condition: None (18) Project [codegen id : 10] -Output [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39] -Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38, s_store_name#39] +Output [10]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37] +Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#35, s_store_id#36, s_store_name#37] (19) Scan parquet default.date_dim -Output [2]: [d_month_seq#41, d_week_seq#42] +Output [2]: [d_month_seq#38, d_week_seq#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_week_seq)] ReadSchema: struct (20) ColumnarToRow [codegen id : 4] -Input [2]: [d_month_seq#41, d_week_seq#42] +Input [2]: [d_month_seq#38, d_week_seq#39] (21) Filter [codegen id : 4] -Input [2]: [d_month_seq#41, d_week_seq#42] -Condition : (((isnotnull(d_month_seq#41) AND (d_month_seq#41 >= 1212)) AND (d_month_seq#41 <= 1223)) AND isnotnull(d_week_seq#42)) +Input [2]: [d_month_seq#38, d_week_seq#39] +Condition : (((isnotnull(d_month_seq#38) AND (d_month_seq#38 >= 1212)) AND (d_month_seq#38 <= 1223)) AND isnotnull(d_week_seq#39)) (22) Project [codegen id : 4] -Output [1]: [d_week_seq#42] -Input [2]: [d_month_seq#41, d_week_seq#42] +Output [1]: [d_week_seq#39] +Input [2]: [d_month_seq#38, d_week_seq#39] (23) BroadcastExchange -Input [1]: [d_week_seq#42] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#43] +Input [1]: [d_week_seq#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (24) BroadcastHashJoin [codegen id : 10] Left keys [1]: [d_week_seq#5] -Right keys [1]: [d_week_seq#42] +Right keys [1]: [d_week_seq#39] Join condition: None (25) Project [codegen id : 10] -Output [10]: [s_store_name#39 AS s_store_name1#44, d_week_seq#5 AS d_week_seq1#45, s_store_id#38 AS s_store_id1#46, sun_sales#30 AS sun_sales1#47, mon_sales#31 AS mon_sales1#48, tue_sales#32 AS tue_sales1#49, wed_sales#33 AS wed_sales1#50, thu_sales#34 AS thu_sales1#51, fri_sales#35 AS fri_sales1#52, sat_sales#36 AS sat_sales1#53] -Input [11]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39, d_week_seq#42] +Output [10]: [s_store_name#37 AS s_store_name1#40, d_week_seq#5 AS d_week_seq1#41, s_store_id#36 AS s_store_id1#42, sun_sales#28 AS sun_sales1#43, mon_sales#29 AS mon_sales1#44, tue_sales#30 AS tue_sales1#45, wed_sales#31 AS wed_sales1#46, thu_sales#32 AS thu_sales1#47, fri_sales#33 AS fri_sales1#48, sat_sales#34 AS sat_sales1#49] +Input [11]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37, d_week_seq#39] (26) ReusedExchange [Reuses operator id: 11] -Output [9]: [d_week_seq#5, ss_store_sk#1, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60] +Output [9]: [d_week_seq#5, ss_store_sk#1, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56] (27) HashAggregate [codegen id : 9] -Input [9]: [d_week_seq#5, ss_store_sk#1, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60] +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56] Keys [2]: [d_week_seq#5, ss_store_sk#1] Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29] -Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#25,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29,17,2) AS sat_sales#36] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27] +Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23,17,2) AS tue_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34] (28) Scan parquet default.store -Output [2]: [s_store_sk#61, s_store_id#62] +Output [2]: [s_store_sk#57, s_store_id#58] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] ReadSchema: struct (29) ColumnarToRow [codegen id : 7] -Input [2]: [s_store_sk#61, s_store_id#62] +Input [2]: [s_store_sk#57, s_store_id#58] (30) Filter [codegen id : 7] -Input [2]: [s_store_sk#61, s_store_id#62] -Condition : (isnotnull(s_store_sk#61) AND isnotnull(s_store_id#62)) +Input [2]: [s_store_sk#57, s_store_id#58] +Condition : (isnotnull(s_store_sk#57) AND isnotnull(s_store_id#58)) (31) BroadcastExchange -Input [2]: [s_store_sk#61, s_store_id#62] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#63] +Input [2]: [s_store_sk#57, s_store_id#58] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (32) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_store_sk#1] -Right keys [1]: [s_store_sk#61] +Right keys [1]: [s_store_sk#57] Join condition: None (33) Project [codegen id : 9] -Output [9]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#62] -Input [11]: [d_week_seq#5, ss_store_sk#1, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#61, s_store_id#62] +Output [9]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#58] +Input [11]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#57, s_store_id#58] (34) Scan parquet default.date_dim -Output [2]: [d_month_seq#64, d_week_seq#65] +Output [2]: [d_month_seq#59, d_week_seq#60] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235), IsNotNull(d_week_seq)] ReadSchema: struct (35) ColumnarToRow [codegen id : 8] -Input [2]: [d_month_seq#64, d_week_seq#65] +Input [2]: [d_month_seq#59, d_week_seq#60] (36) Filter [codegen id : 8] -Input [2]: [d_month_seq#64, d_week_seq#65] -Condition : (((isnotnull(d_month_seq#64) AND (d_month_seq#64 >= 1224)) AND (d_month_seq#64 <= 1235)) AND isnotnull(d_week_seq#65)) +Input [2]: [d_month_seq#59, d_week_seq#60] +Condition : (((isnotnull(d_month_seq#59) AND (d_month_seq#59 >= 1224)) AND (d_month_seq#59 <= 1235)) AND isnotnull(d_week_seq#60)) (37) Project [codegen id : 8] -Output [1]: [d_week_seq#65] -Input [2]: [d_month_seq#64, d_week_seq#65] +Output [1]: [d_week_seq#60] +Input [2]: [d_month_seq#59, d_week_seq#60] (38) BroadcastExchange -Input [1]: [d_week_seq#65] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#66] +Input [1]: [d_week_seq#60] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] (39) BroadcastHashJoin [codegen id : 9] Left keys [1]: [d_week_seq#5] -Right keys [1]: [d_week_seq#65] +Right keys [1]: [d_week_seq#60] Join condition: None (40) Project [codegen id : 9] -Output [9]: [d_week_seq#5 AS d_week_seq2#67, s_store_id#62 AS s_store_id2#68, sun_sales#30 AS sun_sales2#69, mon_sales#31 AS mon_sales2#70, tue_sales#32 AS tue_sales2#71, wed_sales#33 AS wed_sales2#72, thu_sales#34 AS thu_sales2#73, fri_sales#35 AS fri_sales2#74, sat_sales#36 AS sat_sales2#75] -Input [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#62, d_week_seq#65] +Output [9]: [d_week_seq#5 AS d_week_seq2#61, s_store_id#58 AS s_store_id2#62, sun_sales#28 AS sun_sales2#63, mon_sales#29 AS mon_sales2#64, tue_sales#30 AS tue_sales2#65, wed_sales#31 AS wed_sales2#66, thu_sales#32 AS thu_sales2#67, fri_sales#33 AS fri_sales2#68, sat_sales#34 AS sat_sales2#69] +Input [10]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#58, d_week_seq#60] (41) BroadcastExchange -Input [9]: [d_week_seq2#67, s_store_id2#68, sun_sales2#69, mon_sales2#70, tue_sales2#71, wed_sales2#72, thu_sales2#73, fri_sales2#74, sat_sales2#75] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [id=#76] +Input [9]: [d_week_seq2#61, s_store_id2#62, sun_sales2#63, mon_sales2#64, tue_sales2#65, wed_sales2#66, thu_sales2#67, fri_sales2#68, sat_sales2#69] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [plan_id=7] (42) BroadcastHashJoin [codegen id : 10] -Left keys [2]: [s_store_id1#46, d_week_seq1#45] -Right keys [2]: [s_store_id2#68, (d_week_seq2#67 - 52)] +Left keys [2]: [s_store_id1#42, d_week_seq1#41] +Right keys [2]: [s_store_id2#62, (d_week_seq2#61 - 52)] Join condition: None (43) Project [codegen id : 10] -Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#69)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#77, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#70)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#78, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales2#71)), DecimalType(37,20)) AS (tue_sales1 / tue_sales2)#79, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#72)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#80, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#73)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#81, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#74)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#82, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#75)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#83] -Input [19]: [s_store_name1#44, d_week_seq1#45, s_store_id1#46, sun_sales1#47, mon_sales1#48, tue_sales1#49, wed_sales1#50, thu_sales1#51, fri_sales1#52, sat_sales1#53, d_week_seq2#67, s_store_id2#68, sun_sales2#69, mon_sales2#70, tue_sales2#71, wed_sales2#72, thu_sales2#73, fri_sales2#74, sat_sales2#75] +Output [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, CheckOverflow((promote_precision(sun_sales1#43) / promote_precision(sun_sales2#63)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#70, CheckOverflow((promote_precision(mon_sales1#44) / promote_precision(mon_sales2#64)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#71, CheckOverflow((promote_precision(tue_sales1#45) / promote_precision(tue_sales2#65)), DecimalType(37,20)) AS (tue_sales1 / tue_sales2)#72, CheckOverflow((promote_precision(wed_sales1#46) / promote_precision(wed_sales2#66)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#73, CheckOverflow((promote_precision(thu_sales1#47) / promote_precision(thu_sales2#67)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#74, CheckOverflow((promote_precision(fri_sales1#48) / promote_precision(fri_sales2#68)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#75, CheckOverflow((promote_precision(sat_sales1#49) / promote_precision(sat_sales2#69)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#76] +Input [19]: [s_store_name1#40, d_week_seq1#41, s_store_id1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#61, s_store_id2#62, sun_sales2#63, mon_sales2#64, tue_sales2#65, wed_sales2#66, thu_sales2#67, fri_sales2#68, sat_sales2#69] (44) TakeOrderedAndProject -Input [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#77, (mon_sales1 / mon_sales2)#78, (tue_sales1 / tue_sales2)#79, (wed_sales1 / wed_sales2)#80, (thu_sales1 / thu_sales2)#81, (fri_sales1 / fri_sales2)#82, (sat_sales1 / sat_sales2)#83] -Arguments: 100, [s_store_name1#44 ASC NULLS FIRST, s_store_id1#46 ASC NULLS FIRST, d_week_seq1#45 ASC NULLS FIRST], [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#77, (mon_sales1 / mon_sales2)#78, (tue_sales1 / tue_sales2)#79, (wed_sales1 / wed_sales2)#80, (thu_sales1 / thu_sales2)#81, (fri_sales1 / fri_sales2)#82, (sat_sales1 / sat_sales2)#83] +Input [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#70, (mon_sales1 / mon_sales2)#71, (tue_sales1 / tue_sales2)#72, (wed_sales1 / wed_sales2)#73, (thu_sales1 / thu_sales2)#74, (fri_sales1 / fri_sales2)#75, (sat_sales1 / sat_sales2)#76] +Arguments: 100, [s_store_name1#40 ASC NULLS FIRST, s_store_id1#42 ASC NULLS FIRST, d_week_seq1#41 ASC NULLS FIRST], [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#70, (mon_sales1 / mon_sales2)#71, (tue_sales1 / tue_sales2)#72, (wed_sales1 / wed_sales2)#73, (thu_sales1 / thu_sales2)#74, (fri_sales1 / fri_sales2)#75, (sat_sales1 / sat_sales2)#76] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt index 1e9c240705bd8..e9788fb36a43c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt @@ -76,7 +76,7 @@ Condition : (isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) (7) BroadcastExchange Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 2] Left keys [1]: [ss_sold_date_sk#3] @@ -91,160 +91,160 @@ Input [6]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_w Input [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6] Keys [2]: [d_week_seq#5, ss_store_sk#1] Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] -Aggregate Attributes [7]: [sum#8, sum#9, sum#10, sum#11, sum#12, sum#13, sum#14] -Results [9]: [d_week_seq#5, ss_store_sk#1, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Aggregate Attributes [7]: [sum#7, sum#8, sum#9, sum#10, sum#11, sum#12, sum#13] +Results [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] (11) Exchange -Input [9]: [d_week_seq#5, ss_store_sk#1, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] -Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] +Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) HashAggregate [codegen id : 10] -Input [9]: [d_week_seq#5, ss_store_sk#1, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21] +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20] Keys [2]: [d_week_seq#5, ss_store_sk#1] Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29] -Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#25,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29,17,2) AS sat_sales#36] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27] +Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23,17,2) AS tue_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34] (13) Scan parquet default.store -Output [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Output [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] (15) Filter [codegen id : 3] -Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] -Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38)) +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] +Condition : (isnotnull(s_store_sk#35) AND isnotnull(s_store_id#36)) (16) BroadcastExchange -Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40] +Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (17) BroadcastHashJoin [codegen id : 10] Left keys [1]: [ss_store_sk#1] -Right keys [1]: [s_store_sk#37] +Right keys [1]: [s_store_sk#35] Join condition: None (18) Project [codegen id : 10] -Output [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39] -Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38, s_store_name#39] +Output [10]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37] +Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#35, s_store_id#36, s_store_name#37] (19) Scan parquet default.date_dim -Output [2]: [d_month_seq#41, d_week_seq#42] +Output [2]: [d_month_seq#38, d_week_seq#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_week_seq)] ReadSchema: struct (20) ColumnarToRow [codegen id : 4] -Input [2]: [d_month_seq#41, d_week_seq#42] +Input [2]: [d_month_seq#38, d_week_seq#39] (21) Filter [codegen id : 4] -Input [2]: [d_month_seq#41, d_week_seq#42] -Condition : (((isnotnull(d_month_seq#41) AND (d_month_seq#41 >= 1212)) AND (d_month_seq#41 <= 1223)) AND isnotnull(d_week_seq#42)) +Input [2]: [d_month_seq#38, d_week_seq#39] +Condition : (((isnotnull(d_month_seq#38) AND (d_month_seq#38 >= 1212)) AND (d_month_seq#38 <= 1223)) AND isnotnull(d_week_seq#39)) (22) Project [codegen id : 4] -Output [1]: [d_week_seq#42] -Input [2]: [d_month_seq#41, d_week_seq#42] +Output [1]: [d_week_seq#39] +Input [2]: [d_month_seq#38, d_week_seq#39] (23) BroadcastExchange -Input [1]: [d_week_seq#42] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#43] +Input [1]: [d_week_seq#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (24) BroadcastHashJoin [codegen id : 10] Left keys [1]: [d_week_seq#5] -Right keys [1]: [d_week_seq#42] +Right keys [1]: [d_week_seq#39] Join condition: None (25) Project [codegen id : 10] -Output [10]: [s_store_name#39 AS s_store_name1#44, d_week_seq#5 AS d_week_seq1#45, s_store_id#38 AS s_store_id1#46, sun_sales#30 AS sun_sales1#47, mon_sales#31 AS mon_sales1#48, tue_sales#32 AS tue_sales1#49, wed_sales#33 AS wed_sales1#50, thu_sales#34 AS thu_sales1#51, fri_sales#35 AS fri_sales1#52, sat_sales#36 AS sat_sales1#53] -Input [11]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39, d_week_seq#42] +Output [10]: [s_store_name#37 AS s_store_name1#40, d_week_seq#5 AS d_week_seq1#41, s_store_id#36 AS s_store_id1#42, sun_sales#28 AS sun_sales1#43, mon_sales#29 AS mon_sales1#44, tue_sales#30 AS tue_sales1#45, wed_sales#31 AS wed_sales1#46, thu_sales#32 AS thu_sales1#47, fri_sales#33 AS fri_sales1#48, sat_sales#34 AS sat_sales1#49] +Input [11]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37, d_week_seq#39] (26) ReusedExchange [Reuses operator id: 11] -Output [9]: [d_week_seq#5, ss_store_sk#1, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60] +Output [9]: [d_week_seq#5, ss_store_sk#1, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56] (27) HashAggregate [codegen id : 9] -Input [9]: [d_week_seq#5, ss_store_sk#1, sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60] +Input [9]: [d_week_seq#5, ss_store_sk#1, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56] Keys [2]: [d_week_seq#5, ss_store_sk#1] Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))] -Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29] -Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#25,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29,17,2) AS sat_sales#36] +Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27] +Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday ) THEN ss_sales_price#2 END))#23,17,2) AS tue_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34] (28) Scan parquet default.store -Output [2]: [s_store_sk#61, s_store_id#62] +Output [2]: [s_store_sk#57, s_store_id#58] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)] ReadSchema: struct (29) ColumnarToRow [codegen id : 7] -Input [2]: [s_store_sk#61, s_store_id#62] +Input [2]: [s_store_sk#57, s_store_id#58] (30) Filter [codegen id : 7] -Input [2]: [s_store_sk#61, s_store_id#62] -Condition : (isnotnull(s_store_sk#61) AND isnotnull(s_store_id#62)) +Input [2]: [s_store_sk#57, s_store_id#58] +Condition : (isnotnull(s_store_sk#57) AND isnotnull(s_store_id#58)) (31) BroadcastExchange -Input [2]: [s_store_sk#61, s_store_id#62] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#63] +Input [2]: [s_store_sk#57, s_store_id#58] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (32) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_store_sk#1] -Right keys [1]: [s_store_sk#61] +Right keys [1]: [s_store_sk#57] Join condition: None (33) Project [codegen id : 9] -Output [9]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#62] -Input [11]: [d_week_seq#5, ss_store_sk#1, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#61, s_store_id#62] +Output [9]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#58] +Input [11]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#57, s_store_id#58] (34) Scan parquet default.date_dim -Output [2]: [d_month_seq#64, d_week_seq#65] +Output [2]: [d_month_seq#59, d_week_seq#60] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235), IsNotNull(d_week_seq)] ReadSchema: struct (35) ColumnarToRow [codegen id : 8] -Input [2]: [d_month_seq#64, d_week_seq#65] +Input [2]: [d_month_seq#59, d_week_seq#60] (36) Filter [codegen id : 8] -Input [2]: [d_month_seq#64, d_week_seq#65] -Condition : (((isnotnull(d_month_seq#64) AND (d_month_seq#64 >= 1224)) AND (d_month_seq#64 <= 1235)) AND isnotnull(d_week_seq#65)) +Input [2]: [d_month_seq#59, d_week_seq#60] +Condition : (((isnotnull(d_month_seq#59) AND (d_month_seq#59 >= 1224)) AND (d_month_seq#59 <= 1235)) AND isnotnull(d_week_seq#60)) (37) Project [codegen id : 8] -Output [1]: [d_week_seq#65] -Input [2]: [d_month_seq#64, d_week_seq#65] +Output [1]: [d_week_seq#60] +Input [2]: [d_month_seq#59, d_week_seq#60] (38) BroadcastExchange -Input [1]: [d_week_seq#65] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#66] +Input [1]: [d_week_seq#60] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] (39) BroadcastHashJoin [codegen id : 9] Left keys [1]: [d_week_seq#5] -Right keys [1]: [d_week_seq#65] +Right keys [1]: [d_week_seq#60] Join condition: None (40) Project [codegen id : 9] -Output [9]: [d_week_seq#5 AS d_week_seq2#67, s_store_id#62 AS s_store_id2#68, sun_sales#30 AS sun_sales2#69, mon_sales#31 AS mon_sales2#70, tue_sales#32 AS tue_sales2#71, wed_sales#33 AS wed_sales2#72, thu_sales#34 AS thu_sales2#73, fri_sales#35 AS fri_sales2#74, sat_sales#36 AS sat_sales2#75] -Input [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#62, d_week_seq#65] +Output [9]: [d_week_seq#5 AS d_week_seq2#61, s_store_id#58 AS s_store_id2#62, sun_sales#28 AS sun_sales2#63, mon_sales#29 AS mon_sales2#64, tue_sales#30 AS tue_sales2#65, wed_sales#31 AS wed_sales2#66, thu_sales#32 AS thu_sales2#67, fri_sales#33 AS fri_sales2#68, sat_sales#34 AS sat_sales2#69] +Input [10]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#58, d_week_seq#60] (41) BroadcastExchange -Input [9]: [d_week_seq2#67, s_store_id2#68, sun_sales2#69, mon_sales2#70, tue_sales2#71, wed_sales2#72, thu_sales2#73, fri_sales2#74, sat_sales2#75] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [id=#76] +Input [9]: [d_week_seq2#61, s_store_id2#62, sun_sales2#63, mon_sales2#64, tue_sales2#65, wed_sales2#66, thu_sales2#67, fri_sales2#68, sat_sales2#69] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [plan_id=7] (42) BroadcastHashJoin [codegen id : 10] -Left keys [2]: [s_store_id1#46, d_week_seq1#45] -Right keys [2]: [s_store_id2#68, (d_week_seq2#67 - 52)] +Left keys [2]: [s_store_id1#42, d_week_seq1#41] +Right keys [2]: [s_store_id2#62, (d_week_seq2#61 - 52)] Join condition: None (43) Project [codegen id : 10] -Output [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, CheckOverflow((promote_precision(sun_sales1#47) / promote_precision(sun_sales2#69)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#77, CheckOverflow((promote_precision(mon_sales1#48) / promote_precision(mon_sales2#70)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#78, CheckOverflow((promote_precision(tue_sales1#49) / promote_precision(tue_sales2#71)), DecimalType(37,20)) AS (tue_sales1 / tue_sales2)#79, CheckOverflow((promote_precision(wed_sales1#50) / promote_precision(wed_sales2#72)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#80, CheckOverflow((promote_precision(thu_sales1#51) / promote_precision(thu_sales2#73)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#81, CheckOverflow((promote_precision(fri_sales1#52) / promote_precision(fri_sales2#74)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#82, CheckOverflow((promote_precision(sat_sales1#53) / promote_precision(sat_sales2#75)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#83] -Input [19]: [s_store_name1#44, d_week_seq1#45, s_store_id1#46, sun_sales1#47, mon_sales1#48, tue_sales1#49, wed_sales1#50, thu_sales1#51, fri_sales1#52, sat_sales1#53, d_week_seq2#67, s_store_id2#68, sun_sales2#69, mon_sales2#70, tue_sales2#71, wed_sales2#72, thu_sales2#73, fri_sales2#74, sat_sales2#75] +Output [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, CheckOverflow((promote_precision(sun_sales1#43) / promote_precision(sun_sales2#63)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#70, CheckOverflow((promote_precision(mon_sales1#44) / promote_precision(mon_sales2#64)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#71, CheckOverflow((promote_precision(tue_sales1#45) / promote_precision(tue_sales2#65)), DecimalType(37,20)) AS (tue_sales1 / tue_sales2)#72, CheckOverflow((promote_precision(wed_sales1#46) / promote_precision(wed_sales2#66)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#73, CheckOverflow((promote_precision(thu_sales1#47) / promote_precision(thu_sales2#67)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#74, CheckOverflow((promote_precision(fri_sales1#48) / promote_precision(fri_sales2#68)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#75, CheckOverflow((promote_precision(sat_sales1#49) / promote_precision(sat_sales2#69)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#76] +Input [19]: [s_store_name1#40, d_week_seq1#41, s_store_id1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#61, s_store_id2#62, sun_sales2#63, mon_sales2#64, tue_sales2#65, wed_sales2#66, thu_sales2#67, fri_sales2#68, sat_sales2#69] (44) TakeOrderedAndProject -Input [10]: [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#77, (mon_sales1 / mon_sales2)#78, (tue_sales1 / tue_sales2)#79, (wed_sales1 / wed_sales2)#80, (thu_sales1 / thu_sales2)#81, (fri_sales1 / fri_sales2)#82, (sat_sales1 / sat_sales2)#83] -Arguments: 100, [s_store_name1#44 ASC NULLS FIRST, s_store_id1#46 ASC NULLS FIRST, d_week_seq1#45 ASC NULLS FIRST], [s_store_name1#44, s_store_id1#46, d_week_seq1#45, (sun_sales1 / sun_sales2)#77, (mon_sales1 / mon_sales2)#78, (tue_sales1 / tue_sales2)#79, (wed_sales1 / wed_sales2)#80, (thu_sales1 / thu_sales2)#81, (fri_sales1 / fri_sales2)#82, (sat_sales1 / sat_sales2)#83] +Input [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#70, (mon_sales1 / mon_sales2)#71, (tue_sales1 / tue_sales2)#72, (wed_sales1 / wed_sales2)#73, (thu_sales1 / thu_sales2)#74, (fri_sales1 / fri_sales2)#75, (sat_sales1 / sat_sales2)#76] +Arguments: 100, [s_store_name1#40 ASC NULLS FIRST, s_store_id1#42 ASC NULLS FIRST, d_week_seq1#41 ASC NULLS FIRST], [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#70, (mon_sales1 / mon_sales2)#71, (tue_sales1 / tue_sales2)#72, (wed_sales1 / wed_sales2)#73, (thu_sales1 / thu_sales2)#74, (fri_sales1 / fri_sales2)#75, (sat_sales1 / sat_sales2)#76] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/explain.txt index 5ba9cf5ab20b6..76c4cf1b4354c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/explain.txt @@ -111,7 +111,7 @@ Input [2]: [ca_address_sk#7, ca_gmt_offset#8] (11) BroadcastExchange Input [1]: [ca_address_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_addr_sk#2] @@ -123,238 +123,238 @@ Output [2]: [ss_item_sk#1, ss_ext_sales_price#3] Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#7] (14) Scan parquet default.item -Output [2]: [i_item_sk#10, i_item_id#11] +Output [2]: [i_item_sk#9, i_item_id#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#10, i_item_id#11] +Input [2]: [i_item_sk#9, i_item_id#10] (16) Filter [codegen id : 4] -Input [2]: [i_item_sk#10, i_item_id#11] -Condition : isnotnull(i_item_sk#10) +Input [2]: [i_item_sk#9, i_item_id#10] +Condition : isnotnull(i_item_sk#9) (17) Scan parquet default.item -Output [2]: [i_item_id#12, i_category#13] +Output [2]: [i_item_id#11, i_category#12] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Music )] ReadSchema: struct (18) ColumnarToRow [codegen id : 3] -Input [2]: [i_item_id#12, i_category#13] +Input [2]: [i_item_id#11, i_category#12] (19) Filter [codegen id : 3] -Input [2]: [i_item_id#12, i_category#13] -Condition : (isnotnull(i_category#13) AND (i_category#13 = Music )) +Input [2]: [i_item_id#11, i_category#12] +Condition : (isnotnull(i_category#12) AND (i_category#12 = Music )) (20) Project [codegen id : 3] -Output [1]: [i_item_id#12] -Input [2]: [i_item_id#12, i_category#13] +Output [1]: [i_item_id#11] +Input [2]: [i_item_id#11, i_category#12] (21) BroadcastExchange -Input [1]: [i_item_id#12] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#14] +Input [1]: [i_item_id#11] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=2] (22) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_item_id#11] -Right keys [1]: [i_item_id#12] +Left keys [1]: [i_item_id#10] +Right keys [1]: [i_item_id#11] Join condition: None (23) BroadcastExchange -Input [2]: [i_item_sk#10, i_item_id#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] +Input [2]: [i_item_sk#9, i_item_id#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#10] +Right keys [1]: [i_item_sk#9] Join condition: None (25) Project [codegen id : 5] -Output [2]: [ss_ext_sales_price#3, i_item_id#11] -Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_item_id#11] +Output [2]: [ss_ext_sales_price#3, i_item_id#10] +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#9, i_item_id#10] (26) HashAggregate [codegen id : 5] -Input [2]: [ss_ext_sales_price#3, i_item_id#11] -Keys [1]: [i_item_id#11] +Input [2]: [ss_ext_sales_price#3, i_item_id#10] +Keys [1]: [i_item_id#10] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum#16] -Results [2]: [i_item_id#11, sum#17] +Aggregate Attributes [1]: [sum#13] +Results [2]: [i_item_id#10, sum#14] (27) Exchange -Input [2]: [i_item_id#11, sum#17] -Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [2]: [i_item_id#10, sum#14] +Arguments: hashpartitioning(i_item_id#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 6] -Input [2]: [i_item_id#11, sum#17] -Keys [1]: [i_item_id#11] +Input [2]: [i_item_id#10, sum#14] +Keys [1]: [i_item_id#10] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#19] -Results [2]: [i_item_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#19,17,2) AS total_sales#20] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#15] +Results [2]: [i_item_id#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS total_sales#16] (29) Scan parquet default.catalog_sales -Output [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] +Output [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#24), dynamicpruningexpression(cs_sold_date_sk#24 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#20), dynamicpruningexpression(cs_sold_date_sk#20 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 11] -Input [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] +Input [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] (31) Filter [codegen id : 11] -Input [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] -Condition : (isnotnull(cs_bill_addr_sk#21) AND isnotnull(cs_item_sk#22)) +Input [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] +Condition : (isnotnull(cs_bill_addr_sk#17) AND isnotnull(cs_item_sk#18)) (32) ReusedExchange [Reuses operator id: 68] -Output [1]: [d_date_sk#25] +Output [1]: [d_date_sk#21] (33) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_sold_date_sk#24] -Right keys [1]: [d_date_sk#25] +Left keys [1]: [cs_sold_date_sk#20] +Right keys [1]: [d_date_sk#21] Join condition: None (34) Project [codegen id : 11] -Output [3]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23] -Input [5]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24, d_date_sk#25] +Output [3]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19] +Input [5]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20, d_date_sk#21] (35) ReusedExchange [Reuses operator id: 11] -Output [1]: [ca_address_sk#26] +Output [1]: [ca_address_sk#22] (36) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_bill_addr_sk#21] -Right keys [1]: [ca_address_sk#26] +Left keys [1]: [cs_bill_addr_sk#17] +Right keys [1]: [ca_address_sk#22] Join condition: None (37) Project [codegen id : 11] -Output [2]: [cs_item_sk#22, cs_ext_sales_price#23] -Input [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, ca_address_sk#26] +Output [2]: [cs_item_sk#18, cs_ext_sales_price#19] +Input [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, ca_address_sk#22] (38) ReusedExchange [Reuses operator id: 23] -Output [2]: [i_item_sk#27, i_item_id#28] +Output [2]: [i_item_sk#23, i_item_id#24] (39) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_item_sk#22] -Right keys [1]: [i_item_sk#27] +Left keys [1]: [cs_item_sk#18] +Right keys [1]: [i_item_sk#23] Join condition: None (40) Project [codegen id : 11] -Output [2]: [cs_ext_sales_price#23, i_item_id#28] -Input [4]: [cs_item_sk#22, cs_ext_sales_price#23, i_item_sk#27, i_item_id#28] +Output [2]: [cs_ext_sales_price#19, i_item_id#24] +Input [4]: [cs_item_sk#18, cs_ext_sales_price#19, i_item_sk#23, i_item_id#24] (41) HashAggregate [codegen id : 11] -Input [2]: [cs_ext_sales_price#23, i_item_id#28] -Keys [1]: [i_item_id#28] -Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#23))] -Aggregate Attributes [1]: [sum#29] -Results [2]: [i_item_id#28, sum#30] +Input [2]: [cs_ext_sales_price#19, i_item_id#24] +Keys [1]: [i_item_id#24] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#19))] +Aggregate Attributes [1]: [sum#25] +Results [2]: [i_item_id#24, sum#26] (42) Exchange -Input [2]: [i_item_id#28, sum#30] -Arguments: hashpartitioning(i_item_id#28, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [2]: [i_item_id#24, sum#26] +Arguments: hashpartitioning(i_item_id#24, 5), ENSURE_REQUIREMENTS, [plan_id=5] (43) HashAggregate [codegen id : 12] -Input [2]: [i_item_id#28, sum#30] -Keys [1]: [i_item_id#28] -Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#23))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#23))#32] -Results [2]: [i_item_id#28, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#23))#32,17,2) AS total_sales#33] +Input [2]: [i_item_id#24, sum#26] +Keys [1]: [i_item_id#24] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#19))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#19))#27] +Results [2]: [i_item_id#24, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#19))#27,17,2) AS total_sales#28] (44) Scan parquet default.web_sales -Output [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] +Output [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#37), dynamicpruningexpression(ws_sold_date_sk#37 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#32), dynamicpruningexpression(ws_sold_date_sk#32 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] ReadSchema: struct (45) ColumnarToRow [codegen id : 17] -Input [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] +Input [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] (46) Filter [codegen id : 17] -Input [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] -Condition : (isnotnull(ws_bill_addr_sk#35) AND isnotnull(ws_item_sk#34)) +Input [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] +Condition : (isnotnull(ws_bill_addr_sk#30) AND isnotnull(ws_item_sk#29)) (47) ReusedExchange [Reuses operator id: 68] -Output [1]: [d_date_sk#38] +Output [1]: [d_date_sk#33] (48) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_sold_date_sk#37] -Right keys [1]: [d_date_sk#38] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#33] Join condition: None (49) Project [codegen id : 17] -Output [3]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36] -Input [5]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37, d_date_sk#38] +Output [3]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31] +Input [5]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32, d_date_sk#33] (50) ReusedExchange [Reuses operator id: 11] -Output [1]: [ca_address_sk#39] +Output [1]: [ca_address_sk#34] (51) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_bill_addr_sk#35] -Right keys [1]: [ca_address_sk#39] +Left keys [1]: [ws_bill_addr_sk#30] +Right keys [1]: [ca_address_sk#34] Join condition: None (52) Project [codegen id : 17] -Output [2]: [ws_item_sk#34, ws_ext_sales_price#36] -Input [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ca_address_sk#39] +Output [2]: [ws_item_sk#29, ws_ext_sales_price#31] +Input [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ca_address_sk#34] (53) ReusedExchange [Reuses operator id: 23] -Output [2]: [i_item_sk#40, i_item_id#41] +Output [2]: [i_item_sk#35, i_item_id#36] (54) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_item_sk#34] -Right keys [1]: [i_item_sk#40] +Left keys [1]: [ws_item_sk#29] +Right keys [1]: [i_item_sk#35] Join condition: None (55) Project [codegen id : 17] -Output [2]: [ws_ext_sales_price#36, i_item_id#41] -Input [4]: [ws_item_sk#34, ws_ext_sales_price#36, i_item_sk#40, i_item_id#41] +Output [2]: [ws_ext_sales_price#31, i_item_id#36] +Input [4]: [ws_item_sk#29, ws_ext_sales_price#31, i_item_sk#35, i_item_id#36] (56) HashAggregate [codegen id : 17] -Input [2]: [ws_ext_sales_price#36, i_item_id#41] -Keys [1]: [i_item_id#41] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#36))] -Aggregate Attributes [1]: [sum#42] -Results [2]: [i_item_id#41, sum#43] +Input [2]: [ws_ext_sales_price#31, i_item_id#36] +Keys [1]: [i_item_id#36] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#31))] +Aggregate Attributes [1]: [sum#37] +Results [2]: [i_item_id#36, sum#38] (57) Exchange -Input [2]: [i_item_id#41, sum#43] -Arguments: hashpartitioning(i_item_id#41, 5), ENSURE_REQUIREMENTS, [id=#44] +Input [2]: [i_item_id#36, sum#38] +Arguments: hashpartitioning(i_item_id#36, 5), ENSURE_REQUIREMENTS, [plan_id=6] (58) HashAggregate [codegen id : 18] -Input [2]: [i_item_id#41, sum#43] -Keys [1]: [i_item_id#41] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#36))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#36))#45] -Results [2]: [i_item_id#41, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#36))#45,17,2) AS total_sales#46] +Input [2]: [i_item_id#36, sum#38] +Keys [1]: [i_item_id#36] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#31))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#31))#39] +Results [2]: [i_item_id#36, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#31))#39,17,2) AS total_sales#40] (59) Union (60) HashAggregate [codegen id : 19] -Input [2]: [i_item_id#11, total_sales#20] -Keys [1]: [i_item_id#11] -Functions [1]: [partial_sum(total_sales#20)] -Aggregate Attributes [2]: [sum#47, isEmpty#48] -Results [3]: [i_item_id#11, sum#49, isEmpty#50] +Input [2]: [i_item_id#10, total_sales#16] +Keys [1]: [i_item_id#10] +Functions [1]: [partial_sum(total_sales#16)] +Aggregate Attributes [2]: [sum#41, isEmpty#42] +Results [3]: [i_item_id#10, sum#43, isEmpty#44] (61) Exchange -Input [3]: [i_item_id#11, sum#49, isEmpty#50] -Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [id=#51] +Input [3]: [i_item_id#10, sum#43, isEmpty#44] +Arguments: hashpartitioning(i_item_id#10, 5), ENSURE_REQUIREMENTS, [plan_id=7] (62) HashAggregate [codegen id : 20] -Input [3]: [i_item_id#11, sum#49, isEmpty#50] -Keys [1]: [i_item_id#11] -Functions [1]: [sum(total_sales#20)] -Aggregate Attributes [1]: [sum(total_sales#20)#52] -Results [2]: [i_item_id#11, sum(total_sales#20)#52 AS total_sales#53] +Input [3]: [i_item_id#10, sum#43, isEmpty#44] +Keys [1]: [i_item_id#10] +Functions [1]: [sum(total_sales#16)] +Aggregate Attributes [1]: [sum(total_sales#16)#45] +Results [2]: [i_item_id#10, sum(total_sales#16)#45 AS total_sales#46] (63) TakeOrderedAndProject -Input [2]: [i_item_id#11, total_sales#53] -Arguments: 100, [i_item_id#11 ASC NULLS FIRST, total_sales#53 ASC NULLS FIRST], [i_item_id#11, total_sales#53] +Input [2]: [i_item_id#10, total_sales#46] +Arguments: 100, [i_item_id#10 ASC NULLS FIRST, total_sales#46 ASC NULLS FIRST], [i_item_id#10, total_sales#46] ===== Subqueries ===== @@ -367,29 +367,29 @@ BroadcastExchange (68) (64) Scan parquet default.date_dim -Output [3]: [d_date_sk#6, d_year#54, d_moy#55] +Output [3]: [d_date_sk#6, d_year#47, d_moy#48] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,9), IsNotNull(d_date_sk)] ReadSchema: struct (65) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] (66) Filter [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] -Condition : ((((isnotnull(d_year#54) AND isnotnull(d_moy#55)) AND (d_year#54 = 1998)) AND (d_moy#55 = 9)) AND isnotnull(d_date_sk#6)) +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] +Condition : ((((isnotnull(d_year#47) AND isnotnull(d_moy#48)) AND (d_year#47 = 1998)) AND (d_moy#48 = 9)) AND isnotnull(d_date_sk#6)) (67) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] (68) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#56] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] -Subquery:2 Hosting operator id = 29 Hosting Expression = cs_sold_date_sk#24 IN dynamicpruning#5 +Subquery:2 Hosting operator id = 29 Hosting Expression = cs_sold_date_sk#20 IN dynamicpruning#5 -Subquery:3 Hosting operator id = 44 Hosting Expression = ws_sold_date_sk#37 IN dynamicpruning#5 +Subquery:3 Hosting operator id = 44 Hosting Expression = ws_sold_date_sk#32 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt index 5ba9cf5ab20b6..76c4cf1b4354c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt @@ -111,7 +111,7 @@ Input [2]: [ca_address_sk#7, ca_gmt_offset#8] (11) BroadcastExchange Input [1]: [ca_address_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_addr_sk#2] @@ -123,238 +123,238 @@ Output [2]: [ss_item_sk#1, ss_ext_sales_price#3] Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#7] (14) Scan parquet default.item -Output [2]: [i_item_sk#10, i_item_id#11] +Output [2]: [i_item_sk#9, i_item_id#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#10, i_item_id#11] +Input [2]: [i_item_sk#9, i_item_id#10] (16) Filter [codegen id : 4] -Input [2]: [i_item_sk#10, i_item_id#11] -Condition : isnotnull(i_item_sk#10) +Input [2]: [i_item_sk#9, i_item_id#10] +Condition : isnotnull(i_item_sk#9) (17) Scan parquet default.item -Output [2]: [i_item_id#12, i_category#13] +Output [2]: [i_item_id#11, i_category#12] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Music )] ReadSchema: struct (18) ColumnarToRow [codegen id : 3] -Input [2]: [i_item_id#12, i_category#13] +Input [2]: [i_item_id#11, i_category#12] (19) Filter [codegen id : 3] -Input [2]: [i_item_id#12, i_category#13] -Condition : (isnotnull(i_category#13) AND (i_category#13 = Music )) +Input [2]: [i_item_id#11, i_category#12] +Condition : (isnotnull(i_category#12) AND (i_category#12 = Music )) (20) Project [codegen id : 3] -Output [1]: [i_item_id#12] -Input [2]: [i_item_id#12, i_category#13] +Output [1]: [i_item_id#11] +Input [2]: [i_item_id#11, i_category#12] (21) BroadcastExchange -Input [1]: [i_item_id#12] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#14] +Input [1]: [i_item_id#11] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=2] (22) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [i_item_id#11] -Right keys [1]: [i_item_id#12] +Left keys [1]: [i_item_id#10] +Right keys [1]: [i_item_id#11] Join condition: None (23) BroadcastExchange -Input [2]: [i_item_sk#10, i_item_id#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] +Input [2]: [i_item_sk#9, i_item_id#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#10] +Right keys [1]: [i_item_sk#9] Join condition: None (25) Project [codegen id : 5] -Output [2]: [ss_ext_sales_price#3, i_item_id#11] -Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#10, i_item_id#11] +Output [2]: [ss_ext_sales_price#3, i_item_id#10] +Input [4]: [ss_item_sk#1, ss_ext_sales_price#3, i_item_sk#9, i_item_id#10] (26) HashAggregate [codegen id : 5] -Input [2]: [ss_ext_sales_price#3, i_item_id#11] -Keys [1]: [i_item_id#11] +Input [2]: [ss_ext_sales_price#3, i_item_id#10] +Keys [1]: [i_item_id#10] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum#16] -Results [2]: [i_item_id#11, sum#17] +Aggregate Attributes [1]: [sum#13] +Results [2]: [i_item_id#10, sum#14] (27) Exchange -Input [2]: [i_item_id#11, sum#17] -Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [2]: [i_item_id#10, sum#14] +Arguments: hashpartitioning(i_item_id#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 6] -Input [2]: [i_item_id#11, sum#17] -Keys [1]: [i_item_id#11] +Input [2]: [i_item_id#10, sum#14] +Keys [1]: [i_item_id#10] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#19] -Results [2]: [i_item_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#19,17,2) AS total_sales#20] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#15] +Results [2]: [i_item_id#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS total_sales#16] (29) Scan parquet default.catalog_sales -Output [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] +Output [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#24), dynamicpruningexpression(cs_sold_date_sk#24 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#20), dynamicpruningexpression(cs_sold_date_sk#20 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cs_bill_addr_sk), IsNotNull(cs_item_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 11] -Input [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] +Input [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] (31) Filter [codegen id : 11] -Input [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24] -Condition : (isnotnull(cs_bill_addr_sk#21) AND isnotnull(cs_item_sk#22)) +Input [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20] +Condition : (isnotnull(cs_bill_addr_sk#17) AND isnotnull(cs_item_sk#18)) (32) ReusedExchange [Reuses operator id: 68] -Output [1]: [d_date_sk#25] +Output [1]: [d_date_sk#21] (33) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_sold_date_sk#24] -Right keys [1]: [d_date_sk#25] +Left keys [1]: [cs_sold_date_sk#20] +Right keys [1]: [d_date_sk#21] Join condition: None (34) Project [codegen id : 11] -Output [3]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23] -Input [5]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, cs_sold_date_sk#24, d_date_sk#25] +Output [3]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19] +Input [5]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20, d_date_sk#21] (35) ReusedExchange [Reuses operator id: 11] -Output [1]: [ca_address_sk#26] +Output [1]: [ca_address_sk#22] (36) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_bill_addr_sk#21] -Right keys [1]: [ca_address_sk#26] +Left keys [1]: [cs_bill_addr_sk#17] +Right keys [1]: [ca_address_sk#22] Join condition: None (37) Project [codegen id : 11] -Output [2]: [cs_item_sk#22, cs_ext_sales_price#23] -Input [4]: [cs_bill_addr_sk#21, cs_item_sk#22, cs_ext_sales_price#23, ca_address_sk#26] +Output [2]: [cs_item_sk#18, cs_ext_sales_price#19] +Input [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, ca_address_sk#22] (38) ReusedExchange [Reuses operator id: 23] -Output [2]: [i_item_sk#27, i_item_id#28] +Output [2]: [i_item_sk#23, i_item_id#24] (39) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_item_sk#22] -Right keys [1]: [i_item_sk#27] +Left keys [1]: [cs_item_sk#18] +Right keys [1]: [i_item_sk#23] Join condition: None (40) Project [codegen id : 11] -Output [2]: [cs_ext_sales_price#23, i_item_id#28] -Input [4]: [cs_item_sk#22, cs_ext_sales_price#23, i_item_sk#27, i_item_id#28] +Output [2]: [cs_ext_sales_price#19, i_item_id#24] +Input [4]: [cs_item_sk#18, cs_ext_sales_price#19, i_item_sk#23, i_item_id#24] (41) HashAggregate [codegen id : 11] -Input [2]: [cs_ext_sales_price#23, i_item_id#28] -Keys [1]: [i_item_id#28] -Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#23))] -Aggregate Attributes [1]: [sum#29] -Results [2]: [i_item_id#28, sum#30] +Input [2]: [cs_ext_sales_price#19, i_item_id#24] +Keys [1]: [i_item_id#24] +Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#19))] +Aggregate Attributes [1]: [sum#25] +Results [2]: [i_item_id#24, sum#26] (42) Exchange -Input [2]: [i_item_id#28, sum#30] -Arguments: hashpartitioning(i_item_id#28, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [2]: [i_item_id#24, sum#26] +Arguments: hashpartitioning(i_item_id#24, 5), ENSURE_REQUIREMENTS, [plan_id=5] (43) HashAggregate [codegen id : 12] -Input [2]: [i_item_id#28, sum#30] -Keys [1]: [i_item_id#28] -Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#23))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#23))#32] -Results [2]: [i_item_id#28, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#23))#32,17,2) AS total_sales#33] +Input [2]: [i_item_id#24, sum#26] +Keys [1]: [i_item_id#24] +Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#19))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#19))#27] +Results [2]: [i_item_id#24, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#19))#27,17,2) AS total_sales#28] (44) Scan parquet default.web_sales -Output [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] +Output [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#37), dynamicpruningexpression(ws_sold_date_sk#37 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#32), dynamicpruningexpression(ws_sold_date_sk#32 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_bill_addr_sk), IsNotNull(ws_item_sk)] ReadSchema: struct (45) ColumnarToRow [codegen id : 17] -Input [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] +Input [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] (46) Filter [codegen id : 17] -Input [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37] -Condition : (isnotnull(ws_bill_addr_sk#35) AND isnotnull(ws_item_sk#34)) +Input [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32] +Condition : (isnotnull(ws_bill_addr_sk#30) AND isnotnull(ws_item_sk#29)) (47) ReusedExchange [Reuses operator id: 68] -Output [1]: [d_date_sk#38] +Output [1]: [d_date_sk#33] (48) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_sold_date_sk#37] -Right keys [1]: [d_date_sk#38] +Left keys [1]: [ws_sold_date_sk#32] +Right keys [1]: [d_date_sk#33] Join condition: None (49) Project [codegen id : 17] -Output [3]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36] -Input [5]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ws_sold_date_sk#37, d_date_sk#38] +Output [3]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31] +Input [5]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32, d_date_sk#33] (50) ReusedExchange [Reuses operator id: 11] -Output [1]: [ca_address_sk#39] +Output [1]: [ca_address_sk#34] (51) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_bill_addr_sk#35] -Right keys [1]: [ca_address_sk#39] +Left keys [1]: [ws_bill_addr_sk#30] +Right keys [1]: [ca_address_sk#34] Join condition: None (52) Project [codegen id : 17] -Output [2]: [ws_item_sk#34, ws_ext_sales_price#36] -Input [4]: [ws_item_sk#34, ws_bill_addr_sk#35, ws_ext_sales_price#36, ca_address_sk#39] +Output [2]: [ws_item_sk#29, ws_ext_sales_price#31] +Input [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ca_address_sk#34] (53) ReusedExchange [Reuses operator id: 23] -Output [2]: [i_item_sk#40, i_item_id#41] +Output [2]: [i_item_sk#35, i_item_id#36] (54) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_item_sk#34] -Right keys [1]: [i_item_sk#40] +Left keys [1]: [ws_item_sk#29] +Right keys [1]: [i_item_sk#35] Join condition: None (55) Project [codegen id : 17] -Output [2]: [ws_ext_sales_price#36, i_item_id#41] -Input [4]: [ws_item_sk#34, ws_ext_sales_price#36, i_item_sk#40, i_item_id#41] +Output [2]: [ws_ext_sales_price#31, i_item_id#36] +Input [4]: [ws_item_sk#29, ws_ext_sales_price#31, i_item_sk#35, i_item_id#36] (56) HashAggregate [codegen id : 17] -Input [2]: [ws_ext_sales_price#36, i_item_id#41] -Keys [1]: [i_item_id#41] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#36))] -Aggregate Attributes [1]: [sum#42] -Results [2]: [i_item_id#41, sum#43] +Input [2]: [ws_ext_sales_price#31, i_item_id#36] +Keys [1]: [i_item_id#36] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#31))] +Aggregate Attributes [1]: [sum#37] +Results [2]: [i_item_id#36, sum#38] (57) Exchange -Input [2]: [i_item_id#41, sum#43] -Arguments: hashpartitioning(i_item_id#41, 5), ENSURE_REQUIREMENTS, [id=#44] +Input [2]: [i_item_id#36, sum#38] +Arguments: hashpartitioning(i_item_id#36, 5), ENSURE_REQUIREMENTS, [plan_id=6] (58) HashAggregate [codegen id : 18] -Input [2]: [i_item_id#41, sum#43] -Keys [1]: [i_item_id#41] -Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#36))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#36))#45] -Results [2]: [i_item_id#41, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#36))#45,17,2) AS total_sales#46] +Input [2]: [i_item_id#36, sum#38] +Keys [1]: [i_item_id#36] +Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#31))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#31))#39] +Results [2]: [i_item_id#36, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#31))#39,17,2) AS total_sales#40] (59) Union (60) HashAggregate [codegen id : 19] -Input [2]: [i_item_id#11, total_sales#20] -Keys [1]: [i_item_id#11] -Functions [1]: [partial_sum(total_sales#20)] -Aggregate Attributes [2]: [sum#47, isEmpty#48] -Results [3]: [i_item_id#11, sum#49, isEmpty#50] +Input [2]: [i_item_id#10, total_sales#16] +Keys [1]: [i_item_id#10] +Functions [1]: [partial_sum(total_sales#16)] +Aggregate Attributes [2]: [sum#41, isEmpty#42] +Results [3]: [i_item_id#10, sum#43, isEmpty#44] (61) Exchange -Input [3]: [i_item_id#11, sum#49, isEmpty#50] -Arguments: hashpartitioning(i_item_id#11, 5), ENSURE_REQUIREMENTS, [id=#51] +Input [3]: [i_item_id#10, sum#43, isEmpty#44] +Arguments: hashpartitioning(i_item_id#10, 5), ENSURE_REQUIREMENTS, [plan_id=7] (62) HashAggregate [codegen id : 20] -Input [3]: [i_item_id#11, sum#49, isEmpty#50] -Keys [1]: [i_item_id#11] -Functions [1]: [sum(total_sales#20)] -Aggregate Attributes [1]: [sum(total_sales#20)#52] -Results [2]: [i_item_id#11, sum(total_sales#20)#52 AS total_sales#53] +Input [3]: [i_item_id#10, sum#43, isEmpty#44] +Keys [1]: [i_item_id#10] +Functions [1]: [sum(total_sales#16)] +Aggregate Attributes [1]: [sum(total_sales#16)#45] +Results [2]: [i_item_id#10, sum(total_sales#16)#45 AS total_sales#46] (63) TakeOrderedAndProject -Input [2]: [i_item_id#11, total_sales#53] -Arguments: 100, [i_item_id#11 ASC NULLS FIRST, total_sales#53 ASC NULLS FIRST], [i_item_id#11, total_sales#53] +Input [2]: [i_item_id#10, total_sales#46] +Arguments: 100, [i_item_id#10 ASC NULLS FIRST, total_sales#46 ASC NULLS FIRST], [i_item_id#10, total_sales#46] ===== Subqueries ===== @@ -367,29 +367,29 @@ BroadcastExchange (68) (64) Scan parquet default.date_dim -Output [3]: [d_date_sk#6, d_year#54, d_moy#55] +Output [3]: [d_date_sk#6, d_year#47, d_moy#48] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,9), IsNotNull(d_date_sk)] ReadSchema: struct (65) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] (66) Filter [codegen id : 1] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] -Condition : ((((isnotnull(d_year#54) AND isnotnull(d_moy#55)) AND (d_year#54 = 1998)) AND (d_moy#55 = 9)) AND isnotnull(d_date_sk#6)) +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] +Condition : ((((isnotnull(d_year#47) AND isnotnull(d_moy#48)) AND (d_year#47 = 1998)) AND (d_moy#48 = 9)) AND isnotnull(d_date_sk#6)) (67) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [3]: [d_date_sk#6, d_year#54, d_moy#55] +Input [3]: [d_date_sk#6, d_year#47, d_moy#48] (68) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#56] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] -Subquery:2 Hosting operator id = 29 Hosting Expression = cs_sold_date_sk#24 IN dynamicpruning#5 +Subquery:2 Hosting operator id = 29 Hosting Expression = cs_sold_date_sk#20 IN dynamicpruning#5 -Subquery:3 Hosting operator id = 44 Hosting Expression = ws_sold_date_sk#37 IN dynamicpruning#5 +Subquery:3 Hosting operator id = 44 Hosting Expression = ws_sold_date_sk#32 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt index e83c4be6f7e5a..9fe0cc0d94dc1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt @@ -112,7 +112,7 @@ Input [2]: [i_item_sk#9, i_category#10] (11) BroadcastExchange Input [1]: [i_item_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 7] Left keys [1]: [ss_item_sk#1] @@ -124,234 +124,234 @@ Output [4]: [ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price# Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, i_item_sk#9] (14) Scan parquet default.promotion -Output [4]: [p_promo_sk#12, p_channel_dmail#13, p_channel_email#14, p_channel_tv#15] +Output [4]: [p_promo_sk#11, p_channel_dmail#12, p_channel_email#13, p_channel_tv#14] Batched: true Location [not included in comparison]/{warehouse_dir}/promotion] PushedFilters: [Or(Or(EqualTo(p_channel_dmail,Y),EqualTo(p_channel_email,Y)),EqualTo(p_channel_tv,Y)), IsNotNull(p_promo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [4]: [p_promo_sk#12, p_channel_dmail#13, p_channel_email#14, p_channel_tv#15] +Input [4]: [p_promo_sk#11, p_channel_dmail#12, p_channel_email#13, p_channel_tv#14] (16) Filter [codegen id : 3] -Input [4]: [p_promo_sk#12, p_channel_dmail#13, p_channel_email#14, p_channel_tv#15] -Condition : ((((p_channel_dmail#13 = Y) OR (p_channel_email#14 = Y)) OR (p_channel_tv#15 = Y)) AND isnotnull(p_promo_sk#12)) +Input [4]: [p_promo_sk#11, p_channel_dmail#12, p_channel_email#13, p_channel_tv#14] +Condition : ((((p_channel_dmail#12 = Y) OR (p_channel_email#13 = Y)) OR (p_channel_tv#14 = Y)) AND isnotnull(p_promo_sk#11)) (17) Project [codegen id : 3] -Output [1]: [p_promo_sk#12] -Input [4]: [p_promo_sk#12, p_channel_dmail#13, p_channel_email#14, p_channel_tv#15] +Output [1]: [p_promo_sk#11] +Input [4]: [p_promo_sk#11, p_channel_dmail#12, p_channel_email#13, p_channel_tv#14] (18) BroadcastExchange -Input [1]: [p_promo_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] +Input [1]: [p_promo_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 7] Left keys [1]: [ss_promo_sk#4] -Right keys [1]: [p_promo_sk#12] +Right keys [1]: [p_promo_sk#11] Join condition: None (20) Project [codegen id : 7] Output [3]: [ss_customer_sk#2, ss_store_sk#3, ss_ext_sales_price#5] -Input [5]: [ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, p_promo_sk#12] +Input [5]: [ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, p_promo_sk#11] (21) Scan parquet default.store -Output [2]: [s_store_sk#17, s_gmt_offset#18] +Output [2]: [s_store_sk#15, s_gmt_offset#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_gmt_offset), EqualTo(s_gmt_offset,-5.00), IsNotNull(s_store_sk)] ReadSchema: struct (22) ColumnarToRow [codegen id : 4] -Input [2]: [s_store_sk#17, s_gmt_offset#18] +Input [2]: [s_store_sk#15, s_gmt_offset#16] (23) Filter [codegen id : 4] -Input [2]: [s_store_sk#17, s_gmt_offset#18] -Condition : ((isnotnull(s_gmt_offset#18) AND (s_gmt_offset#18 = -5.00)) AND isnotnull(s_store_sk#17)) +Input [2]: [s_store_sk#15, s_gmt_offset#16] +Condition : ((isnotnull(s_gmt_offset#16) AND (s_gmt_offset#16 = -5.00)) AND isnotnull(s_store_sk#15)) (24) Project [codegen id : 4] -Output [1]: [s_store_sk#17] -Input [2]: [s_store_sk#17, s_gmt_offset#18] +Output [1]: [s_store_sk#15] +Input [2]: [s_store_sk#15, s_gmt_offset#16] (25) BroadcastExchange -Input [1]: [s_store_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] +Input [1]: [s_store_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (26) BroadcastHashJoin [codegen id : 7] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#17] +Right keys [1]: [s_store_sk#15] Join condition: None (27) Project [codegen id : 7] Output [2]: [ss_customer_sk#2, ss_ext_sales_price#5] -Input [4]: [ss_customer_sk#2, ss_store_sk#3, ss_ext_sales_price#5, s_store_sk#17] +Input [4]: [ss_customer_sk#2, ss_store_sk#3, ss_ext_sales_price#5, s_store_sk#15] (28) Scan parquet default.customer -Output [2]: [c_customer_sk#20, c_current_addr_sk#21] +Output [2]: [c_customer_sk#17, c_current_addr_sk#18] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (29) ColumnarToRow [codegen id : 6] -Input [2]: [c_customer_sk#20, c_current_addr_sk#21] +Input [2]: [c_customer_sk#17, c_current_addr_sk#18] (30) Filter [codegen id : 6] -Input [2]: [c_customer_sk#20, c_current_addr_sk#21] -Condition : (isnotnull(c_customer_sk#20) AND isnotnull(c_current_addr_sk#21)) +Input [2]: [c_customer_sk#17, c_current_addr_sk#18] +Condition : (isnotnull(c_customer_sk#17) AND isnotnull(c_current_addr_sk#18)) (31) Scan parquet default.customer_address -Output [2]: [ca_address_sk#22, ca_gmt_offset#23] +Output [2]: [ca_address_sk#19, ca_gmt_offset#20] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 5] -Input [2]: [ca_address_sk#22, ca_gmt_offset#23] +Input [2]: [ca_address_sk#19, ca_gmt_offset#20] (33) Filter [codegen id : 5] -Input [2]: [ca_address_sk#22, ca_gmt_offset#23] -Condition : ((isnotnull(ca_gmt_offset#23) AND (ca_gmt_offset#23 = -5.00)) AND isnotnull(ca_address_sk#22)) +Input [2]: [ca_address_sk#19, ca_gmt_offset#20] +Condition : ((isnotnull(ca_gmt_offset#20) AND (ca_gmt_offset#20 = -5.00)) AND isnotnull(ca_address_sk#19)) (34) Project [codegen id : 5] -Output [1]: [ca_address_sk#22] -Input [2]: [ca_address_sk#22, ca_gmt_offset#23] +Output [1]: [ca_address_sk#19] +Input [2]: [ca_address_sk#19, ca_gmt_offset#20] (35) BroadcastExchange -Input [1]: [ca_address_sk#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] +Input [1]: [ca_address_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (36) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [c_current_addr_sk#21] -Right keys [1]: [ca_address_sk#22] +Left keys [1]: [c_current_addr_sk#18] +Right keys [1]: [ca_address_sk#19] Join condition: None (37) Project [codegen id : 6] -Output [1]: [c_customer_sk#20] -Input [3]: [c_customer_sk#20, c_current_addr_sk#21, ca_address_sk#22] +Output [1]: [c_customer_sk#17] +Input [3]: [c_customer_sk#17, c_current_addr_sk#18, ca_address_sk#19] (38) BroadcastExchange -Input [1]: [c_customer_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [c_customer_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (39) BroadcastHashJoin [codegen id : 7] Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#20] +Right keys [1]: [c_customer_sk#17] Join condition: None (40) Project [codegen id : 7] Output [1]: [ss_ext_sales_price#5] -Input [3]: [ss_customer_sk#2, ss_ext_sales_price#5, c_customer_sk#20] +Input [3]: [ss_customer_sk#2, ss_ext_sales_price#5, c_customer_sk#17] (41) HashAggregate [codegen id : 7] Input [1]: [ss_ext_sales_price#5] Keys: [] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum#26] -Results [1]: [sum#27] +Aggregate Attributes [1]: [sum#21] +Results [1]: [sum#22] (42) Exchange -Input [1]: [sum#27] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#28] +Input [1]: [sum#22] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] (43) HashAggregate [codegen id : 15] -Input [1]: [sum#27] +Input [1]: [sum#22] Keys: [] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#29] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#29,17,2) AS promotions#30] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#23] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#23,17,2) AS promotions#24] (44) Scan parquet default.store_sales -Output [5]: [ss_item_sk#31, ss_customer_sk#32, ss_store_sk#33, ss_ext_sales_price#34, ss_sold_date_sk#35] +Output [5]: [ss_item_sk#25, ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28, ss_sold_date_sk#29] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#35), dynamicpruningexpression(ss_sold_date_sk#35 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ss_sold_date_sk#29), dynamicpruningexpression(ss_sold_date_sk#29 IN dynamicpruning#7)] PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] ReadSchema: struct (45) ColumnarToRow [codegen id : 13] -Input [5]: [ss_item_sk#31, ss_customer_sk#32, ss_store_sk#33, ss_ext_sales_price#34, ss_sold_date_sk#35] +Input [5]: [ss_item_sk#25, ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28, ss_sold_date_sk#29] (46) Filter [codegen id : 13] -Input [5]: [ss_item_sk#31, ss_customer_sk#32, ss_store_sk#33, ss_ext_sales_price#34, ss_sold_date_sk#35] -Condition : ((isnotnull(ss_store_sk#33) AND isnotnull(ss_customer_sk#32)) AND isnotnull(ss_item_sk#31)) +Input [5]: [ss_item_sk#25, ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28, ss_sold_date_sk#29] +Condition : ((isnotnull(ss_store_sk#27) AND isnotnull(ss_customer_sk#26)) AND isnotnull(ss_item_sk#25)) (47) ReusedExchange [Reuses operator id: 69] -Output [1]: [d_date_sk#36] +Output [1]: [d_date_sk#30] (48) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ss_sold_date_sk#35] -Right keys [1]: [d_date_sk#36] +Left keys [1]: [ss_sold_date_sk#29] +Right keys [1]: [d_date_sk#30] Join condition: None (49) Project [codegen id : 13] -Output [4]: [ss_item_sk#31, ss_customer_sk#32, ss_store_sk#33, ss_ext_sales_price#34] -Input [6]: [ss_item_sk#31, ss_customer_sk#32, ss_store_sk#33, ss_ext_sales_price#34, ss_sold_date_sk#35, d_date_sk#36] +Output [4]: [ss_item_sk#25, ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28] +Input [6]: [ss_item_sk#25, ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28, ss_sold_date_sk#29, d_date_sk#30] (50) ReusedExchange [Reuses operator id: 11] -Output [1]: [i_item_sk#37] +Output [1]: [i_item_sk#31] (51) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ss_item_sk#31] -Right keys [1]: [i_item_sk#37] +Left keys [1]: [ss_item_sk#25] +Right keys [1]: [i_item_sk#31] Join condition: None (52) Project [codegen id : 13] -Output [3]: [ss_customer_sk#32, ss_store_sk#33, ss_ext_sales_price#34] -Input [5]: [ss_item_sk#31, ss_customer_sk#32, ss_store_sk#33, ss_ext_sales_price#34, i_item_sk#37] +Output [3]: [ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28] +Input [5]: [ss_item_sk#25, ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28, i_item_sk#31] (53) ReusedExchange [Reuses operator id: 25] -Output [1]: [s_store_sk#38] +Output [1]: [s_store_sk#32] (54) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ss_store_sk#33] -Right keys [1]: [s_store_sk#38] +Left keys [1]: [ss_store_sk#27] +Right keys [1]: [s_store_sk#32] Join condition: None (55) Project [codegen id : 13] -Output [2]: [ss_customer_sk#32, ss_ext_sales_price#34] -Input [4]: [ss_customer_sk#32, ss_store_sk#33, ss_ext_sales_price#34, s_store_sk#38] +Output [2]: [ss_customer_sk#26, ss_ext_sales_price#28] +Input [4]: [ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28, s_store_sk#32] (56) ReusedExchange [Reuses operator id: 38] -Output [1]: [c_customer_sk#39] +Output [1]: [c_customer_sk#33] (57) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ss_customer_sk#32] -Right keys [1]: [c_customer_sk#39] +Left keys [1]: [ss_customer_sk#26] +Right keys [1]: [c_customer_sk#33] Join condition: None (58) Project [codegen id : 13] -Output [1]: [ss_ext_sales_price#34] -Input [3]: [ss_customer_sk#32, ss_ext_sales_price#34, c_customer_sk#39] +Output [1]: [ss_ext_sales_price#28] +Input [3]: [ss_customer_sk#26, ss_ext_sales_price#28, c_customer_sk#33] (59) HashAggregate [codegen id : 13] -Input [1]: [ss_ext_sales_price#34] +Input [1]: [ss_ext_sales_price#28] Keys: [] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#34))] -Aggregate Attributes [1]: [sum#40] -Results [1]: [sum#41] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#28))] +Aggregate Attributes [1]: [sum#34] +Results [1]: [sum#35] (60) Exchange -Input [1]: [sum#41] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#42] +Input [1]: [sum#35] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (61) HashAggregate [codegen id : 14] -Input [1]: [sum#41] +Input [1]: [sum#35] Keys: [] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#34))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#34))#43] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#34))#43,17,2) AS total#44] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#28))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#28))#36] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#28))#36,17,2) AS total#37] (62) BroadcastExchange -Input [1]: [total#44] -Arguments: IdentityBroadcastMode, [id=#45] +Input [1]: [total#37] +Arguments: IdentityBroadcastMode, [plan_id=8] (63) BroadcastNestedLoopJoin [codegen id : 15] Join condition: None (64) Project [codegen id : 15] -Output [3]: [promotions#30, total#44, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#30 as decimal(15,4))) / promote_precision(cast(total#44 as decimal(15,4)))), DecimalType(35,20))) * 100.00000000000000000000), DecimalType(38,19)) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#46] -Input [2]: [promotions#30, total#44] +Output [3]: [promotions#24, total#37, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#24 as decimal(15,4))) / promote_precision(cast(total#37 as decimal(15,4)))), DecimalType(35,20))) * 100.00000000000000000000), DecimalType(38,19)) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#38] +Input [2]: [promotions#24, total#37] ===== Subqueries ===== @@ -364,27 +364,27 @@ BroadcastExchange (69) (65) Scan parquet default.date_dim -Output [3]: [d_date_sk#8, d_year#47, d_moy#48] +Output [3]: [d_date_sk#8, d_year#39, d_moy#40] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] ReadSchema: struct (66) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#47, d_moy#48] +Input [3]: [d_date_sk#8, d_year#39, d_moy#40] (67) Filter [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#47, d_moy#48] -Condition : ((((isnotnull(d_year#47) AND isnotnull(d_moy#48)) AND (d_year#47 = 1998)) AND (d_moy#48 = 11)) AND isnotnull(d_date_sk#8)) +Input [3]: [d_date_sk#8, d_year#39, d_moy#40] +Condition : ((((isnotnull(d_year#39) AND isnotnull(d_moy#40)) AND (d_year#39 = 1998)) AND (d_moy#40 = 11)) AND isnotnull(d_date_sk#8)) (68) Project [codegen id : 1] Output [1]: [d_date_sk#8] -Input [3]: [d_date_sk#8, d_year#47, d_moy#48] +Input [3]: [d_date_sk#8, d_year#39, d_moy#40] (69) BroadcastExchange Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#49] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] -Subquery:2 Hosting operator id = 44 Hosting Expression = ss_sold_date_sk#35 IN dynamicpruning#7 +Subquery:2 Hosting operator id = 44 Hosting Expression = ss_sold_date_sk#29 IN dynamicpruning#7 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt index ebf1161c7a1f0..356747eb87d6f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt @@ -103,7 +103,7 @@ Input [2]: [s_store_sk#8, s_gmt_offset#9] (8) BroadcastExchange Input [1]: [s_store_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 7] Left keys [1]: [ss_store_sk#3] @@ -115,258 +115,258 @@ Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6, s_store_sk#8] (11) Scan parquet default.promotion -Output [4]: [p_promo_sk#11, p_channel_dmail#12, p_channel_email#13, p_channel_tv#14] +Output [4]: [p_promo_sk#10, p_channel_dmail#11, p_channel_email#12, p_channel_tv#13] Batched: true Location [not included in comparison]/{warehouse_dir}/promotion] PushedFilters: [Or(Or(EqualTo(p_channel_dmail,Y),EqualTo(p_channel_email,Y)),EqualTo(p_channel_tv,Y)), IsNotNull(p_promo_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [p_promo_sk#11, p_channel_dmail#12, p_channel_email#13, p_channel_tv#14] +Input [4]: [p_promo_sk#10, p_channel_dmail#11, p_channel_email#12, p_channel_tv#13] (13) Filter [codegen id : 2] -Input [4]: [p_promo_sk#11, p_channel_dmail#12, p_channel_email#13, p_channel_tv#14] -Condition : ((((p_channel_dmail#12 = Y) OR (p_channel_email#13 = Y)) OR (p_channel_tv#14 = Y)) AND isnotnull(p_promo_sk#11)) +Input [4]: [p_promo_sk#10, p_channel_dmail#11, p_channel_email#12, p_channel_tv#13] +Condition : ((((p_channel_dmail#11 = Y) OR (p_channel_email#12 = Y)) OR (p_channel_tv#13 = Y)) AND isnotnull(p_promo_sk#10)) (14) Project [codegen id : 2] -Output [1]: [p_promo_sk#11] -Input [4]: [p_promo_sk#11, p_channel_dmail#12, p_channel_email#13, p_channel_tv#14] +Output [1]: [p_promo_sk#10] +Input [4]: [p_promo_sk#10, p_channel_dmail#11, p_channel_email#12, p_channel_tv#13] (15) BroadcastExchange -Input [1]: [p_promo_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Input [1]: [p_promo_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 7] Left keys [1]: [ss_promo_sk#4] -Right keys [1]: [p_promo_sk#11] +Right keys [1]: [p_promo_sk#10] Join condition: None (17) Project [codegen id : 7] Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6] -Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6, p_promo_sk#11] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6, p_promo_sk#10] (18) ReusedExchange [Reuses operator id: 72] -Output [1]: [d_date_sk#16] +Output [1]: [d_date_sk#14] (19) BroadcastHashJoin [codegen id : 7] Left keys [1]: [ss_sold_date_sk#6] -Right keys [1]: [d_date_sk#16] +Right keys [1]: [d_date_sk#14] Join condition: None (20) Project [codegen id : 7] Output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5] -Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6, d_date_sk#16] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6, d_date_sk#14] (21) Scan parquet default.customer -Output [2]: [c_customer_sk#17, c_current_addr_sk#18] +Output [2]: [c_customer_sk#15, c_current_addr_sk#16] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (22) ColumnarToRow [codegen id : 4] -Input [2]: [c_customer_sk#17, c_current_addr_sk#18] +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] (23) Filter [codegen id : 4] -Input [2]: [c_customer_sk#17, c_current_addr_sk#18] -Condition : (isnotnull(c_customer_sk#17) AND isnotnull(c_current_addr_sk#18)) +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] +Condition : (isnotnull(c_customer_sk#15) AND isnotnull(c_current_addr_sk#16)) (24) BroadcastExchange -Input [2]: [c_customer_sk#17, c_current_addr_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] +Input [2]: [c_customer_sk#15, c_current_addr_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (25) BroadcastHashJoin [codegen id : 7] Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#17] +Right keys [1]: [c_customer_sk#15] Join condition: None (26) Project [codegen id : 7] -Output [3]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#18] -Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, c_customer_sk#17, c_current_addr_sk#18] +Output [3]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#16] +Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, c_customer_sk#15, c_current_addr_sk#16] (27) Scan parquet default.customer_address -Output [2]: [ca_address_sk#20, ca_gmt_offset#21] +Output [2]: [ca_address_sk#17, ca_gmt_offset#18] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-5.00), IsNotNull(ca_address_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 5] -Input [2]: [ca_address_sk#20, ca_gmt_offset#21] +Input [2]: [ca_address_sk#17, ca_gmt_offset#18] (29) Filter [codegen id : 5] -Input [2]: [ca_address_sk#20, ca_gmt_offset#21] -Condition : ((isnotnull(ca_gmt_offset#21) AND (ca_gmt_offset#21 = -5.00)) AND isnotnull(ca_address_sk#20)) +Input [2]: [ca_address_sk#17, ca_gmt_offset#18] +Condition : ((isnotnull(ca_gmt_offset#18) AND (ca_gmt_offset#18 = -5.00)) AND isnotnull(ca_address_sk#17)) (30) Project [codegen id : 5] -Output [1]: [ca_address_sk#20] -Input [2]: [ca_address_sk#20, ca_gmt_offset#21] +Output [1]: [ca_address_sk#17] +Input [2]: [ca_address_sk#17, ca_gmt_offset#18] (31) BroadcastExchange -Input [1]: [ca_address_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Input [1]: [ca_address_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (32) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_addr_sk#18] -Right keys [1]: [ca_address_sk#20] +Left keys [1]: [c_current_addr_sk#16] +Right keys [1]: [ca_address_sk#17] Join condition: None (33) Project [codegen id : 7] Output [2]: [ss_item_sk#1, ss_ext_sales_price#5] -Input [4]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#18, ca_address_sk#20] +Input [4]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#16, ca_address_sk#17] (34) Scan parquet default.item -Output [2]: [i_item_sk#23, i_category#24] +Output [2]: [i_item_sk#19, i_category#20] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_category), EqualTo(i_category,Jewelry ), IsNotNull(i_item_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 6] -Input [2]: [i_item_sk#23, i_category#24] +Input [2]: [i_item_sk#19, i_category#20] (36) Filter [codegen id : 6] -Input [2]: [i_item_sk#23, i_category#24] -Condition : ((isnotnull(i_category#24) AND (i_category#24 = Jewelry )) AND isnotnull(i_item_sk#23)) +Input [2]: [i_item_sk#19, i_category#20] +Condition : ((isnotnull(i_category#20) AND (i_category#20 = Jewelry )) AND isnotnull(i_item_sk#19)) (37) Project [codegen id : 6] -Output [1]: [i_item_sk#23] -Input [2]: [i_item_sk#23, i_category#24] +Output [1]: [i_item_sk#19] +Input [2]: [i_item_sk#19, i_category#20] (38) BroadcastExchange -Input [1]: [i_item_sk#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [i_item_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (39) BroadcastHashJoin [codegen id : 7] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#23] +Right keys [1]: [i_item_sk#19] Join condition: None (40) Project [codegen id : 7] Output [1]: [ss_ext_sales_price#5] -Input [3]: [ss_item_sk#1, ss_ext_sales_price#5, i_item_sk#23] +Input [3]: [ss_item_sk#1, ss_ext_sales_price#5, i_item_sk#19] (41) HashAggregate [codegen id : 7] Input [1]: [ss_ext_sales_price#5] Keys: [] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum#26] -Results [1]: [sum#27] +Aggregate Attributes [1]: [sum#21] +Results [1]: [sum#22] (42) Exchange -Input [1]: [sum#27] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#28] +Input [1]: [sum#22] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] (43) HashAggregate [codegen id : 15] -Input [1]: [sum#27] +Input [1]: [sum#22] Keys: [] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#29] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#29,17,2) AS promotions#30] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#23] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#23,17,2) AS promotions#24] (44) Scan parquet default.store_sales -Output [5]: [ss_item_sk#31, ss_customer_sk#32, ss_store_sk#33, ss_ext_sales_price#34, ss_sold_date_sk#35] +Output [5]: [ss_item_sk#25, ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28, ss_sold_date_sk#29] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#35), dynamicpruningexpression(ss_sold_date_sk#35 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ss_sold_date_sk#29), dynamicpruningexpression(ss_sold_date_sk#29 IN dynamicpruning#7)] PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] ReadSchema: struct (45) ColumnarToRow [codegen id : 13] -Input [5]: [ss_item_sk#31, ss_customer_sk#32, ss_store_sk#33, ss_ext_sales_price#34, ss_sold_date_sk#35] +Input [5]: [ss_item_sk#25, ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28, ss_sold_date_sk#29] (46) Filter [codegen id : 13] -Input [5]: [ss_item_sk#31, ss_customer_sk#32, ss_store_sk#33, ss_ext_sales_price#34, ss_sold_date_sk#35] -Condition : ((isnotnull(ss_store_sk#33) AND isnotnull(ss_customer_sk#32)) AND isnotnull(ss_item_sk#31)) +Input [5]: [ss_item_sk#25, ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28, ss_sold_date_sk#29] +Condition : ((isnotnull(ss_store_sk#27) AND isnotnull(ss_customer_sk#26)) AND isnotnull(ss_item_sk#25)) (47) ReusedExchange [Reuses operator id: 8] -Output [1]: [s_store_sk#36] +Output [1]: [s_store_sk#30] (48) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ss_store_sk#33] -Right keys [1]: [s_store_sk#36] +Left keys [1]: [ss_store_sk#27] +Right keys [1]: [s_store_sk#30] Join condition: None (49) Project [codegen id : 13] -Output [4]: [ss_item_sk#31, ss_customer_sk#32, ss_ext_sales_price#34, ss_sold_date_sk#35] -Input [6]: [ss_item_sk#31, ss_customer_sk#32, ss_store_sk#33, ss_ext_sales_price#34, ss_sold_date_sk#35, s_store_sk#36] +Output [4]: [ss_item_sk#25, ss_customer_sk#26, ss_ext_sales_price#28, ss_sold_date_sk#29] +Input [6]: [ss_item_sk#25, ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28, ss_sold_date_sk#29, s_store_sk#30] (50) ReusedExchange [Reuses operator id: 72] -Output [1]: [d_date_sk#37] +Output [1]: [d_date_sk#31] (51) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ss_sold_date_sk#35] -Right keys [1]: [d_date_sk#37] +Left keys [1]: [ss_sold_date_sk#29] +Right keys [1]: [d_date_sk#31] Join condition: None (52) Project [codegen id : 13] -Output [3]: [ss_item_sk#31, ss_customer_sk#32, ss_ext_sales_price#34] -Input [5]: [ss_item_sk#31, ss_customer_sk#32, ss_ext_sales_price#34, ss_sold_date_sk#35, d_date_sk#37] +Output [3]: [ss_item_sk#25, ss_customer_sk#26, ss_ext_sales_price#28] +Input [5]: [ss_item_sk#25, ss_customer_sk#26, ss_ext_sales_price#28, ss_sold_date_sk#29, d_date_sk#31] (53) ReusedExchange [Reuses operator id: 24] -Output [2]: [c_customer_sk#38, c_current_addr_sk#39] +Output [2]: [c_customer_sk#32, c_current_addr_sk#33] (54) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ss_customer_sk#32] -Right keys [1]: [c_customer_sk#38] +Left keys [1]: [ss_customer_sk#26] +Right keys [1]: [c_customer_sk#32] Join condition: None (55) Project [codegen id : 13] -Output [3]: [ss_item_sk#31, ss_ext_sales_price#34, c_current_addr_sk#39] -Input [5]: [ss_item_sk#31, ss_customer_sk#32, ss_ext_sales_price#34, c_customer_sk#38, c_current_addr_sk#39] +Output [3]: [ss_item_sk#25, ss_ext_sales_price#28, c_current_addr_sk#33] +Input [5]: [ss_item_sk#25, ss_customer_sk#26, ss_ext_sales_price#28, c_customer_sk#32, c_current_addr_sk#33] (56) ReusedExchange [Reuses operator id: 31] -Output [1]: [ca_address_sk#40] +Output [1]: [ca_address_sk#34] (57) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [c_current_addr_sk#39] -Right keys [1]: [ca_address_sk#40] +Left keys [1]: [c_current_addr_sk#33] +Right keys [1]: [ca_address_sk#34] Join condition: None (58) Project [codegen id : 13] -Output [2]: [ss_item_sk#31, ss_ext_sales_price#34] -Input [4]: [ss_item_sk#31, ss_ext_sales_price#34, c_current_addr_sk#39, ca_address_sk#40] +Output [2]: [ss_item_sk#25, ss_ext_sales_price#28] +Input [4]: [ss_item_sk#25, ss_ext_sales_price#28, c_current_addr_sk#33, ca_address_sk#34] (59) ReusedExchange [Reuses operator id: 38] -Output [1]: [i_item_sk#41] +Output [1]: [i_item_sk#35] (60) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ss_item_sk#31] -Right keys [1]: [i_item_sk#41] +Left keys [1]: [ss_item_sk#25] +Right keys [1]: [i_item_sk#35] Join condition: None (61) Project [codegen id : 13] -Output [1]: [ss_ext_sales_price#34] -Input [3]: [ss_item_sk#31, ss_ext_sales_price#34, i_item_sk#41] +Output [1]: [ss_ext_sales_price#28] +Input [3]: [ss_item_sk#25, ss_ext_sales_price#28, i_item_sk#35] (62) HashAggregate [codegen id : 13] -Input [1]: [ss_ext_sales_price#34] +Input [1]: [ss_ext_sales_price#28] Keys: [] -Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#34))] -Aggregate Attributes [1]: [sum#42] -Results [1]: [sum#43] +Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#28))] +Aggregate Attributes [1]: [sum#36] +Results [1]: [sum#37] (63) Exchange -Input [1]: [sum#43] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#44] +Input [1]: [sum#37] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (64) HashAggregate [codegen id : 14] -Input [1]: [sum#43] +Input [1]: [sum#37] Keys: [] -Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#34))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#34))#45] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#34))#45,17,2) AS total#46] +Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#28))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#28))#38] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#28))#38,17,2) AS total#39] (65) BroadcastExchange -Input [1]: [total#46] -Arguments: IdentityBroadcastMode, [id=#47] +Input [1]: [total#39] +Arguments: IdentityBroadcastMode, [plan_id=8] (66) BroadcastNestedLoopJoin [codegen id : 15] Join condition: None (67) Project [codegen id : 15] -Output [3]: [promotions#30, total#46, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#30 as decimal(15,4))) / promote_precision(cast(total#46 as decimal(15,4)))), DecimalType(35,20))) * 100.00000000000000000000), DecimalType(38,19)) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#48] -Input [2]: [promotions#30, total#46] +Output [3]: [promotions#24, total#39, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#24 as decimal(15,4))) / promote_precision(cast(total#39 as decimal(15,4)))), DecimalType(35,20))) * 100.00000000000000000000), DecimalType(38,19)) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#40] +Input [2]: [promotions#24, total#39] ===== Subqueries ===== @@ -379,27 +379,27 @@ BroadcastExchange (72) (68) Scan parquet default.date_dim -Output [3]: [d_date_sk#16, d_year#49, d_moy#50] +Output [3]: [d_date_sk#14, d_year#41, d_moy#42] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] ReadSchema: struct (69) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#16, d_year#49, d_moy#50] +Input [3]: [d_date_sk#14, d_year#41, d_moy#42] (70) Filter [codegen id : 1] -Input [3]: [d_date_sk#16, d_year#49, d_moy#50] -Condition : ((((isnotnull(d_year#49) AND isnotnull(d_moy#50)) AND (d_year#49 = 1998)) AND (d_moy#50 = 11)) AND isnotnull(d_date_sk#16)) +Input [3]: [d_date_sk#14, d_year#41, d_moy#42] +Condition : ((((isnotnull(d_year#41) AND isnotnull(d_moy#42)) AND (d_year#41 = 1998)) AND (d_moy#42 = 11)) AND isnotnull(d_date_sk#14)) (71) Project [codegen id : 1] -Output [1]: [d_date_sk#16] -Input [3]: [d_date_sk#16, d_year#49, d_moy#50] +Output [1]: [d_date_sk#14] +Input [3]: [d_date_sk#14, d_year#41, d_moy#42] (72) BroadcastExchange -Input [1]: [d_date_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#51] +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] -Subquery:2 Hosting operator id = 44 Hosting Expression = ss_sold_date_sk#35 IN dynamicpruning#7 +Subquery:2 Hosting operator id = 44 Hosting Expression = ss_sold_date_sk#29 IN dynamicpruning#7 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt index 0c82f6182c240..b0bdb98017246 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt @@ -67,7 +67,7 @@ Input [2]: [d_date_sk#6, d_month_seq#7] (8) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ws_ship_date_sk#1] @@ -79,105 +79,105 @@ Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehous Input [6]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, d_date_sk#6] (11) Scan parquet default.web_site -Output [2]: [web_site_sk#9, web_name#10] +Output [2]: [web_site_sk#8, web_name#9] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_site_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [2]: [web_site_sk#9, web_name#10] +Input [2]: [web_site_sk#8, web_name#9] (13) Filter [codegen id : 2] -Input [2]: [web_site_sk#9, web_name#10] -Condition : isnotnull(web_site_sk#9) +Input [2]: [web_site_sk#8, web_name#9] +Condition : isnotnull(web_site_sk#8) (14) BroadcastExchange -Input [2]: [web_site_sk#9, web_name#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Input [2]: [web_site_sk#8, web_name#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ws_web_site_sk#2] -Right keys [1]: [web_site_sk#9] +Right keys [1]: [web_site_sk#8] Join condition: None (16) Project [codegen id : 5] -Output [5]: [ws_ship_date_sk#1, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, web_name#10] -Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, web_site_sk#9, web_name#10] +Output [5]: [ws_ship_date_sk#1, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, web_name#9] +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, web_site_sk#8, web_name#9] (17) Scan parquet default.ship_mode -Output [2]: [sm_ship_mode_sk#12, sm_type#13] +Output [2]: [sm_ship_mode_sk#10, sm_type#11] Batched: true Location [not included in comparison]/{warehouse_dir}/ship_mode] PushedFilters: [IsNotNull(sm_ship_mode_sk)] ReadSchema: struct (18) ColumnarToRow [codegen id : 3] -Input [2]: [sm_ship_mode_sk#12, sm_type#13] +Input [2]: [sm_ship_mode_sk#10, sm_type#11] (19) Filter [codegen id : 3] -Input [2]: [sm_ship_mode_sk#12, sm_type#13] -Condition : isnotnull(sm_ship_mode_sk#12) +Input [2]: [sm_ship_mode_sk#10, sm_type#11] +Condition : isnotnull(sm_ship_mode_sk#10) (20) BroadcastExchange -Input [2]: [sm_ship_mode_sk#12, sm_type#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Input [2]: [sm_ship_mode_sk#10, sm_type#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (21) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ws_ship_mode_sk#3] -Right keys [1]: [sm_ship_mode_sk#12] +Right keys [1]: [sm_ship_mode_sk#10] Join condition: None (22) Project [codegen id : 5] -Output [5]: [ws_ship_date_sk#1, ws_warehouse_sk#4, ws_sold_date_sk#5, web_name#10, sm_type#13] -Input [7]: [ws_ship_date_sk#1, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, web_name#10, sm_ship_mode_sk#12, sm_type#13] +Output [5]: [ws_ship_date_sk#1, ws_warehouse_sk#4, ws_sold_date_sk#5, web_name#9, sm_type#11] +Input [7]: [ws_ship_date_sk#1, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, web_name#9, sm_ship_mode_sk#10, sm_type#11] (23) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#15, w_warehouse_name#16] +Output [2]: [w_warehouse_sk#12, w_warehouse_name#13] Batched: true Location [not included in comparison]/{warehouse_dir}/warehouse] PushedFilters: [IsNotNull(w_warehouse_sk)] ReadSchema: struct (24) ColumnarToRow [codegen id : 4] -Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] +Input [2]: [w_warehouse_sk#12, w_warehouse_name#13] (25) Filter [codegen id : 4] -Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] -Condition : isnotnull(w_warehouse_sk#15) +Input [2]: [w_warehouse_sk#12, w_warehouse_name#13] +Condition : isnotnull(w_warehouse_sk#12) (26) BroadcastExchange -Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] +Input [2]: [w_warehouse_sk#12, w_warehouse_name#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (27) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ws_warehouse_sk#4] -Right keys [1]: [w_warehouse_sk#15] +Right keys [1]: [w_warehouse_sk#12] Join condition: None (28) Project [codegen id : 5] -Output [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#13, web_name#10, substr(w_warehouse_name#16, 1, 20) AS _groupingexpression#18] -Input [7]: [ws_ship_date_sk#1, ws_warehouse_sk#4, ws_sold_date_sk#5, web_name#10, sm_type#13, w_warehouse_sk#15, w_warehouse_name#16] +Output [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#11, web_name#9, substr(w_warehouse_name#13, 1, 20) AS _groupingexpression#14] +Input [7]: [ws_ship_date_sk#1, ws_warehouse_sk#4, ws_sold_date_sk#5, web_name#9, sm_type#11, w_warehouse_sk#12, w_warehouse_name#13] (29) HashAggregate [codegen id : 5] -Input [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#13, web_name#10, _groupingexpression#18] -Keys [3]: [_groupingexpression#18, sm_type#13, web_name#10] +Input [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#11, web_name#9, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, sm_type#11, web_name#9] Functions [5]: [partial_sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] -Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] -Results [8]: [_groupingexpression#18, sm_type#13, web_name#10, sum#24, sum#25, sum#26, sum#27, sum#28] +Aggregate Attributes [5]: [sum#15, sum#16, sum#17, sum#18, sum#19] +Results [8]: [_groupingexpression#14, sm_type#11, web_name#9, sum#20, sum#21, sum#22, sum#23, sum#24] (30) Exchange -Input [8]: [_groupingexpression#18, sm_type#13, web_name#10, sum#24, sum#25, sum#26, sum#27, sum#28] -Arguments: hashpartitioning(_groupingexpression#18, sm_type#13, web_name#10, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [8]: [_groupingexpression#14, sm_type#11, web_name#9, sum#20, sum#21, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(_groupingexpression#14, sm_type#11, web_name#9, 5), ENSURE_REQUIREMENTS, [plan_id=5] (31) HashAggregate [codegen id : 6] -Input [8]: [_groupingexpression#18, sm_type#13, web_name#10, sum#24, sum#25, sum#26, sum#27, sum#28] -Keys [3]: [_groupingexpression#18, sm_type#13, web_name#10] +Input [8]: [_groupingexpression#14, sm_type#11, web_name#9, sum#20, sum#21, sum#22, sum#23, sum#24] +Keys [3]: [_groupingexpression#14, sm_type#11, web_name#9] Functions [5]: [sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] -Aggregate Attributes [5]: [sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#30, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#31, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#32, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#33, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#34] -Results [8]: [_groupingexpression#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#30 AS 30 days #36, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#31 AS 31 - 60 days #37, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#32 AS 61 - 90 days #38, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#33 AS 91 - 120 days #39, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#34 AS >120 days #40] +Aggregate Attributes [5]: [sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29] +Results [8]: [_groupingexpression#14 AS substr(w_warehouse_name, 1, 20)#30, sm_type#11, web_name#9, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25 AS 30 days #31, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26 AS 31 - 60 days #32, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27 AS 61 - 90 days #33, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28 AS 91 - 120 days #34, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29 AS >120 days #35] (32) TakeOrderedAndProject -Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] -Arguments: 100, [substr(w_warehouse_name, 1, 20)#35 ASC NULLS FIRST, sm_type#13 ASC NULLS FIRST, web_name#10 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#35, sm_type#13, web_name#10, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] +Input [8]: [substr(w_warehouse_name, 1, 20)#30, sm_type#11, web_name#9, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] +Arguments: 100, [substr(w_warehouse_name, 1, 20)#30 ASC NULLS FIRST, sm_type#11 ASC NULLS FIRST, web_name#9 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#30, sm_type#11, web_name#9, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt index 752025ebea0a5..66b58e864668b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt @@ -63,7 +63,7 @@ Condition : isnotnull(w_warehouse_sk#6) (7) BroadcastExchange Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ws_warehouse_sk#4] @@ -75,109 +75,109 @@ Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_dat Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, w_warehouse_sk#6, w_warehouse_name#7] (10) Scan parquet default.ship_mode -Output [2]: [sm_ship_mode_sk#9, sm_type#10] +Output [2]: [sm_ship_mode_sk#8, sm_type#9] Batched: true Location [not included in comparison]/{warehouse_dir}/ship_mode] PushedFilters: [IsNotNull(sm_ship_mode_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [2]: [sm_ship_mode_sk#9, sm_type#10] +Input [2]: [sm_ship_mode_sk#8, sm_type#9] (12) Filter [codegen id : 2] -Input [2]: [sm_ship_mode_sk#9, sm_type#10] -Condition : isnotnull(sm_ship_mode_sk#9) +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Condition : isnotnull(sm_ship_mode_sk#8) (13) BroadcastExchange -Input [2]: [sm_ship_mode_sk#9, sm_type#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ws_ship_mode_sk#3] -Right keys [1]: [sm_ship_mode_sk#9] +Right keys [1]: [sm_ship_mode_sk#8] Join condition: None (15) Project [codegen id : 5] -Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#10] -Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7, sm_ship_mode_sk#9, sm_type#10] +Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9] +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7, sm_ship_mode_sk#8, sm_type#9] (16) Scan parquet default.web_site -Output [2]: [web_site_sk#12, web_name#13] +Output [2]: [web_site_sk#10, web_name#11] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_site_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 3] -Input [2]: [web_site_sk#12, web_name#13] +Input [2]: [web_site_sk#10, web_name#11] (18) Filter [codegen id : 3] -Input [2]: [web_site_sk#12, web_name#13] -Condition : isnotnull(web_site_sk#12) +Input [2]: [web_site_sk#10, web_name#11] +Condition : isnotnull(web_site_sk#10) (19) BroadcastExchange -Input [2]: [web_site_sk#12, web_name#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Input [2]: [web_site_sk#10, web_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (20) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ws_web_site_sk#2] -Right keys [1]: [web_site_sk#12] +Right keys [1]: [web_site_sk#10] Join condition: None (21) Project [codegen id : 5] -Output [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#10, web_name#13] -Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#10, web_site_sk#12, web_name#13] +Output [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11] +Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_site_sk#10, web_name#11] (22) Scan parquet default.date_dim -Output [2]: [d_date_sk#15, d_month_seq#16] +Output [2]: [d_date_sk#12, d_month_seq#13] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 4] -Input [2]: [d_date_sk#15, d_month_seq#16] +Input [2]: [d_date_sk#12, d_month_seq#13] (24) Filter [codegen id : 4] -Input [2]: [d_date_sk#15, d_month_seq#16] -Condition : (((isnotnull(d_month_seq#16) AND (d_month_seq#16 >= 1200)) AND (d_month_seq#16 <= 1211)) AND isnotnull(d_date_sk#15)) +Input [2]: [d_date_sk#12, d_month_seq#13] +Condition : (((isnotnull(d_month_seq#13) AND (d_month_seq#13 >= 1200)) AND (d_month_seq#13 <= 1211)) AND isnotnull(d_date_sk#12)) (25) Project [codegen id : 4] -Output [1]: [d_date_sk#15] -Input [2]: [d_date_sk#15, d_month_seq#16] +Output [1]: [d_date_sk#12] +Input [2]: [d_date_sk#12, d_month_seq#13] (26) BroadcastExchange -Input [1]: [d_date_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] +Input [1]: [d_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (27) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ws_ship_date_sk#1] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#12] Join condition: None (28) Project [codegen id : 5] -Output [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#10, web_name#13, substr(w_warehouse_name#7, 1, 20) AS _groupingexpression#18] -Input [6]: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#10, web_name#13, d_date_sk#15] +Output [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#9, web_name#11, substr(w_warehouse_name#7, 1, 20) AS _groupingexpression#14] +Input [6]: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11, d_date_sk#12] (29) HashAggregate [codegen id : 5] -Input [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#10, web_name#13, _groupingexpression#18] -Keys [3]: [_groupingexpression#18, sm_type#10, web_name#13] +Input [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, sm_type#9, web_name#11, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, sm_type#9, web_name#11] Functions [5]: [partial_sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] -Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] -Results [8]: [_groupingexpression#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] +Aggregate Attributes [5]: [sum#15, sum#16, sum#17, sum#18, sum#19] +Results [8]: [_groupingexpression#14, sm_type#9, web_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] (30) Exchange -Input [8]: [_groupingexpression#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] -Arguments: hashpartitioning(_groupingexpression#18, sm_type#10, web_name#13, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [8]: [_groupingexpression#14, sm_type#9, web_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(_groupingexpression#14, sm_type#9, web_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] (31) HashAggregate [codegen id : 6] -Input [8]: [_groupingexpression#18, sm_type#10, web_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] -Keys [3]: [_groupingexpression#18, sm_type#10, web_name#13] +Input [8]: [_groupingexpression#14, sm_type#9, web_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Keys [3]: [_groupingexpression#14, sm_type#9, web_name#11] Functions [5]: [sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] -Aggregate Attributes [5]: [sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#30, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#31, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#32, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#33, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#34] -Results [8]: [_groupingexpression#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#30 AS 30 days #36, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#31 AS 31 - 60 days #37, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#32 AS 61 - 90 days #38, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#33 AS 91 - 120 days #39, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#34 AS >120 days #40] +Aggregate Attributes [5]: [sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29] +Results [8]: [_groupingexpression#14 AS substr(w_warehouse_name, 1, 20)#30, sm_type#9, web_name#11, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25 AS 30 days #31, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 30) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26 AS 31 - 60 days #32, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 60) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27 AS 61 - 90 days #33, sum(CASE WHEN (((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 90) AND ((ws_ship_date_sk#1 - ws_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28 AS 91 - 120 days #34, sum(CASE WHEN ((ws_ship_date_sk#1 - ws_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29 AS >120 days #35] (32) TakeOrderedAndProject -Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] -Arguments: 100, [substr(w_warehouse_name, 1, 20)#35 ASC NULLS FIRST, sm_type#10 ASC NULLS FIRST, web_name#13 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#35, sm_type#10, web_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] +Input [8]: [substr(w_warehouse_name, 1, 20)#30, sm_type#9, web_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] +Arguments: 100, [substr(w_warehouse_name, 1, 20)#30 ASC NULLS FIRST, sm_type#9 ASC NULLS FIRST, web_name#11 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#30, sm_type#9, web_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt index fe91e93a55aba..fdc937b6de91e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt @@ -49,116 +49,116 @@ Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5] (5) BroadcastExchange Input [2]: [i_item_sk#1, i_manager_id#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (6) Scan parquet default.store_sales -Output [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] +Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#14), dynamicpruningexpression(ss_sold_date_sk#14 IN dynamicpruning#15)] +PartitionFilters: [isnotnull(ss_sold_date_sk#13), dynamicpruningexpression(ss_sold_date_sk#13 IN dynamicpruning#14)] PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (7) ColumnarToRow -Input [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] (8) Filter -Input [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] -Condition : (isnotnull(ss_item_sk#11) AND isnotnull(ss_store_sk#12)) +Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) (9) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] -Right keys [1]: [ss_item_sk#11] +Right keys [1]: [ss_item_sk#10] Join condition: None (10) Project [codegen id : 4] -Output [4]: [i_manager_id#5, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] -Input [6]: [i_item_sk#1, i_manager_id#5, ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] +Output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] +Input [6]: [i_item_sk#1, i_manager_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] (11) Scan parquet default.store -Output [1]: [s_store_sk#16] +Output [1]: [s_store_sk#15] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [1]: [s_store_sk#16] +Input [1]: [s_store_sk#15] (13) Filter [codegen id : 2] -Input [1]: [s_store_sk#16] -Condition : isnotnull(s_store_sk#16) +Input [1]: [s_store_sk#15] +Condition : isnotnull(s_store_sk#15) (14) BroadcastExchange -Input [1]: [s_store_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] +Input [1]: [s_store_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#12] -Right keys [1]: [s_store_sk#16] +Left keys [1]: [ss_store_sk#11] +Right keys [1]: [s_store_sk#15] Join condition: None (16) Project [codegen id : 4] -Output [3]: [i_manager_id#5, ss_sales_price#13, ss_sold_date_sk#14] -Input [5]: [i_manager_id#5, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14, s_store_sk#16] +Output [3]: [i_manager_id#5, ss_sales_price#12, ss_sold_date_sk#13] +Input [5]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, s_store_sk#15] (17) ReusedExchange [Reuses operator id: 33] -Output [2]: [d_date_sk#18, d_moy#19] +Output [2]: [d_date_sk#16, d_moy#17] (18) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#14] -Right keys [1]: [d_date_sk#18] +Left keys [1]: [ss_sold_date_sk#13] +Right keys [1]: [d_date_sk#16] Join condition: None (19) Project [codegen id : 4] -Output [3]: [i_manager_id#5, ss_sales_price#13, d_moy#19] -Input [5]: [i_manager_id#5, ss_sales_price#13, ss_sold_date_sk#14, d_date_sk#18, d_moy#19] +Output [3]: [i_manager_id#5, ss_sales_price#12, d_moy#17] +Input [5]: [i_manager_id#5, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#16, d_moy#17] (20) HashAggregate [codegen id : 4] -Input [3]: [i_manager_id#5, ss_sales_price#13, d_moy#19] -Keys [2]: [i_manager_id#5, d_moy#19] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#13))] -Aggregate Attributes [1]: [sum#20] -Results [3]: [i_manager_id#5, d_moy#19, sum#21] +Input [3]: [i_manager_id#5, ss_sales_price#12, d_moy#17] +Keys [2]: [i_manager_id#5, d_moy#17] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum#18] +Results [3]: [i_manager_id#5, d_moy#17, sum#19] (21) Exchange -Input [3]: [i_manager_id#5, d_moy#19, sum#21] -Arguments: hashpartitioning(i_manager_id#5, d_moy#19, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [3]: [i_manager_id#5, d_moy#17, sum#19] +Arguments: hashpartitioning(i_manager_id#5, d_moy#17, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 5] -Input [3]: [i_manager_id#5, d_moy#19, sum#21] -Keys [2]: [i_manager_id#5, d_moy#19] -Functions [1]: [sum(UnscaledValue(ss_sales_price#13))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#13))#23] -Results [3]: [i_manager_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#23,17,2) AS sum_sales#24, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#23,17,2) AS _w0#25] +Input [3]: [i_manager_id#5, d_moy#17, sum#19] +Keys [2]: [i_manager_id#5, d_moy#17] +Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#20] +Results [3]: [i_manager_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS _w0#22] (23) Exchange -Input [3]: [i_manager_id#5, sum_sales#24, _w0#25] -Arguments: hashpartitioning(i_manager_id#5, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_manager_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) Sort [codegen id : 6] -Input [3]: [i_manager_id#5, sum_sales#24, _w0#25] +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] Arguments: [i_manager_id#5 ASC NULLS FIRST], false, 0 (25) Window -Input [3]: [i_manager_id#5, sum_sales#24, _w0#25] -Arguments: [avg(_w0#25) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#27], [i_manager_id#5] +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#23], [i_manager_id#5] (26) Filter [codegen id : 7] -Input [4]: [i_manager_id#5, sum_sales#24, _w0#25, avg_monthly_sales#27] -Condition : (isnotnull(avg_monthly_sales#27) AND ((avg_monthly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] +Condition : (isnotnull(avg_monthly_sales#23) AND ((avg_monthly_sales#23 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) (27) Project [codegen id : 7] -Output [3]: [i_manager_id#5, sum_sales#24, avg_monthly_sales#27] -Input [4]: [i_manager_id#5, sum_sales#24, _w0#25, avg_monthly_sales#27] +Output [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] +Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] (28) TakeOrderedAndProject -Input [3]: [i_manager_id#5, sum_sales#24, avg_monthly_sales#27] -Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#27 ASC NULLS FIRST, sum_sales#24 ASC NULLS FIRST], [i_manager_id#5, sum_sales#24, avg_monthly_sales#27] +Input [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] +Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#23 ASC NULLS FIRST, sum_sales#21 ASC NULLS FIRST], [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#14 IN dynamicpruning#15 +Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#13 IN dynamicpruning#14 BroadcastExchange (33) +- * Project (32) +- * Filter (31) @@ -167,25 +167,25 @@ BroadcastExchange (33) (29) Scan parquet default.date_dim -Output [3]: [d_date_sk#18, d_month_seq#28, d_moy#19] +Output [3]: [d_date_sk#16, d_month_seq#24, d_moy#17] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_month_seq, [1200,1201,1202,1203,1204,1205,1206,1207,1208,1209,1210,1211]), IsNotNull(d_date_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#18, d_month_seq#28, d_moy#19] +Input [3]: [d_date_sk#16, d_month_seq#24, d_moy#17] (31) Filter [codegen id : 1] -Input [3]: [d_date_sk#18, d_month_seq#28, d_moy#19] -Condition : (d_month_seq#28 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#18)) +Input [3]: [d_date_sk#16, d_month_seq#24, d_moy#17] +Condition : (d_month_seq#24 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#16)) (32) Project [codegen id : 1] -Output [2]: [d_date_sk#18, d_moy#19] -Input [3]: [d_date_sk#18, d_month_seq#28, d_moy#19] +Output [2]: [d_date_sk#16, d_moy#17] +Input [3]: [d_date_sk#16, d_month_seq#24, d_moy#17] (33) BroadcastExchange -Input [2]: [d_date_sk#18, d_moy#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] +Input [2]: [d_date_sk#16, d_moy#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt index ad0ca3ea63d42..51d52f254de03 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt @@ -64,7 +64,7 @@ Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11)) (8) BroadcastExchange Input [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] @@ -76,85 +76,85 @@ Output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk# Input [6]: [i_item_sk#1, i_manager_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13] (11) ReusedExchange [Reuses operator id: 33] -Output [2]: [d_date_sk#16, d_moy#17] +Output [2]: [d_date_sk#15, d_moy#16] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_sold_date_sk#13] -Right keys [1]: [d_date_sk#16] +Right keys [1]: [d_date_sk#15] Join condition: None (13) Project [codegen id : 4] -Output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#17] -Input [6]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#16, d_moy#17] +Output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16] +Input [6]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#15, d_moy#16] (14) Scan parquet default.store -Output [1]: [s_store_sk#18] +Output [1]: [s_store_sk#17] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [1]: [s_store_sk#18] +Input [1]: [s_store_sk#17] (16) Filter [codegen id : 3] -Input [1]: [s_store_sk#18] -Condition : isnotnull(s_store_sk#18) +Input [1]: [s_store_sk#17] +Condition : isnotnull(s_store_sk#17) (17) BroadcastExchange -Input [1]: [s_store_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] +Input [1]: [s_store_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#11] -Right keys [1]: [s_store_sk#18] +Right keys [1]: [s_store_sk#17] Join condition: None (19) Project [codegen id : 4] -Output [3]: [i_manager_id#5, ss_sales_price#12, d_moy#17] -Input [5]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#17, s_store_sk#18] +Output [3]: [i_manager_id#5, ss_sales_price#12, d_moy#16] +Input [5]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16, s_store_sk#17] (20) HashAggregate [codegen id : 4] -Input [3]: [i_manager_id#5, ss_sales_price#12, d_moy#17] -Keys [2]: [i_manager_id#5, d_moy#17] +Input [3]: [i_manager_id#5, ss_sales_price#12, d_moy#16] +Keys [2]: [i_manager_id#5, d_moy#16] Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#12))] -Aggregate Attributes [1]: [sum#20] -Results [3]: [i_manager_id#5, d_moy#17, sum#21] +Aggregate Attributes [1]: [sum#18] +Results [3]: [i_manager_id#5, d_moy#16, sum#19] (21) Exchange -Input [3]: [i_manager_id#5, d_moy#17, sum#21] -Arguments: hashpartitioning(i_manager_id#5, d_moy#17, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [3]: [i_manager_id#5, d_moy#16, sum#19] +Arguments: hashpartitioning(i_manager_id#5, d_moy#16, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 5] -Input [3]: [i_manager_id#5, d_moy#17, sum#21] -Keys [2]: [i_manager_id#5, d_moy#17] +Input [3]: [i_manager_id#5, d_moy#16, sum#19] +Keys [2]: [i_manager_id#5, d_moy#16] Functions [1]: [sum(UnscaledValue(ss_sales_price#12))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#23] -Results [3]: [i_manager_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#23,17,2) AS sum_sales#24, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#23,17,2) AS _w0#25] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#12))#20] +Results [3]: [i_manager_id#5, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#12))#20,17,2) AS _w0#22] (23) Exchange -Input [3]: [i_manager_id#5, sum_sales#24, _w0#25] -Arguments: hashpartitioning(i_manager_id#5, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] +Arguments: hashpartitioning(i_manager_id#5, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) Sort [codegen id : 6] -Input [3]: [i_manager_id#5, sum_sales#24, _w0#25] +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] Arguments: [i_manager_id#5 ASC NULLS FIRST], false, 0 (25) Window -Input [3]: [i_manager_id#5, sum_sales#24, _w0#25] -Arguments: [avg(_w0#25) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#27], [i_manager_id#5] +Input [3]: [i_manager_id#5, sum_sales#21, _w0#22] +Arguments: [avg(_w0#22) windowspecdefinition(i_manager_id#5, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#23], [i_manager_id#5] (26) Filter [codegen id : 7] -Input [4]: [i_manager_id#5, sum_sales#24, _w0#25, avg_monthly_sales#27] -Condition : (isnotnull(avg_monthly_sales#27) AND ((avg_monthly_sales#27 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#24 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#27 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] +Condition : (isnotnull(avg_monthly_sales#23) AND ((avg_monthly_sales#23 > 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) (27) Project [codegen id : 7] -Output [3]: [i_manager_id#5, sum_sales#24, avg_monthly_sales#27] -Input [4]: [i_manager_id#5, sum_sales#24, _w0#25, avg_monthly_sales#27] +Output [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] +Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23] (28) TakeOrderedAndProject -Input [3]: [i_manager_id#5, sum_sales#24, avg_monthly_sales#27] -Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#27 ASC NULLS FIRST, sum_sales#24 ASC NULLS FIRST], [i_manager_id#5, sum_sales#24, avg_monthly_sales#27] +Input [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] +Arguments: 100, [i_manager_id#5 ASC NULLS FIRST, avg_monthly_sales#23 ASC NULLS FIRST, sum_sales#21 ASC NULLS FIRST], [i_manager_id#5, sum_sales#21, avg_monthly_sales#23] ===== Subqueries ===== @@ -167,25 +167,25 @@ BroadcastExchange (33) (29) Scan parquet default.date_dim -Output [3]: [d_date_sk#16, d_month_seq#28, d_moy#17] +Output [3]: [d_date_sk#15, d_month_seq#24, d_moy#16] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_month_seq, [1200,1201,1202,1203,1204,1205,1206,1207,1208,1209,1210,1211]), IsNotNull(d_date_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#16, d_month_seq#28, d_moy#17] +Input [3]: [d_date_sk#15, d_month_seq#24, d_moy#16] (31) Filter [codegen id : 1] -Input [3]: [d_date_sk#16, d_month_seq#28, d_moy#17] -Condition : (d_month_seq#28 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#16)) +Input [3]: [d_date_sk#15, d_month_seq#24, d_moy#16] +Condition : (d_month_seq#24 INSET 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211 AND isnotnull(d_date_sk#15)) (32) Project [codegen id : 1] -Output [2]: [d_date_sk#16, d_moy#17] -Input [3]: [d_date_sk#16, d_month_seq#28, d_moy#17] +Output [2]: [d_date_sk#15, d_moy#16] +Input [3]: [d_date_sk#15, d_month_seq#24, d_moy#16] (33) BroadcastExchange -Input [2]: [d_date_sk#16, d_moy#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] +Input [2]: [d_date_sk#15, d_moy#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/explain.txt index 474967b54286a..b18b10c69f2cd 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/explain.txt @@ -78,164 +78,164 @@ Results [3]: [ss_store_sk#2, ss_item_sk#1, sum#8] (8) Exchange Input [3]: [ss_store_sk#2, ss_item_sk#1, sum#8] -Arguments: hashpartitioning(ss_store_sk#2, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ss_store_sk#2, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (9) HashAggregate [codegen id : 8] Input [3]: [ss_store_sk#2, ss_item_sk#1, sum#8] Keys [2]: [ss_store_sk#2, ss_item_sk#1] Functions [1]: [sum(UnscaledValue(ss_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#3))#10] -Results [3]: [ss_store_sk#2, ss_item_sk#1, MakeDecimal(sum(UnscaledValue(ss_sales_price#3))#10,17,2) AS revenue#11] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#3))#9] +Results [3]: [ss_store_sk#2, ss_item_sk#1, MakeDecimal(sum(UnscaledValue(ss_sales_price#3))#9,17,2) AS revenue#10] (10) Filter [codegen id : 8] -Input [3]: [ss_store_sk#2, ss_item_sk#1, revenue#11] -Condition : isnotnull(revenue#11) +Input [3]: [ss_store_sk#2, ss_item_sk#1, revenue#10] +Condition : isnotnull(revenue#10) (11) Scan parquet default.store_sales -Output [4]: [ss_item_sk#12, ss_store_sk#13, ss_sales_price#14, ss_sold_date_sk#15] +Output [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#15), dynamicpruningexpression(ss_sold_date_sk#15 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ss_sold_date_sk#14), dynamicpruningexpression(ss_sold_date_sk#14 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ss_store_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 4] -Input [4]: [ss_item_sk#12, ss_store_sk#13, ss_sales_price#14, ss_sold_date_sk#15] +Input [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] (13) Filter [codegen id : 4] -Input [4]: [ss_item_sk#12, ss_store_sk#13, ss_sales_price#14, ss_sold_date_sk#15] -Condition : isnotnull(ss_store_sk#13) +Input [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14] +Condition : isnotnull(ss_store_sk#12) (14) ReusedExchange [Reuses operator id: 46] -Output [1]: [d_date_sk#16] +Output [1]: [d_date_sk#15] (15) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#15] -Right keys [1]: [d_date_sk#16] +Left keys [1]: [ss_sold_date_sk#14] +Right keys [1]: [d_date_sk#15] Join condition: None (16) Project [codegen id : 4] -Output [3]: [ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] -Input [5]: [ss_item_sk#12, ss_store_sk#13, ss_sales_price#14, ss_sold_date_sk#15, d_date_sk#16] +Output [3]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Input [5]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14, d_date_sk#15] (17) HashAggregate [codegen id : 4] -Input [3]: [ss_item_sk#12, ss_store_sk#13, ss_sales_price#14] -Keys [2]: [ss_store_sk#13, ss_item_sk#12] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#14))] -Aggregate Attributes [1]: [sum#17] -Results [3]: [ss_store_sk#13, ss_item_sk#12, sum#18] +Input [3]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13] +Keys [2]: [ss_store_sk#12, ss_item_sk#11] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#13))] +Aggregate Attributes [1]: [sum#16] +Results [3]: [ss_store_sk#12, ss_item_sk#11, sum#17] (18) Exchange -Input [3]: [ss_store_sk#13, ss_item_sk#12, sum#18] -Arguments: hashpartitioning(ss_store_sk#13, ss_item_sk#12, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [3]: [ss_store_sk#12, ss_item_sk#11, sum#17] +Arguments: hashpartitioning(ss_store_sk#12, ss_item_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=2] (19) HashAggregate [codegen id : 5] -Input [3]: [ss_store_sk#13, ss_item_sk#12, sum#18] -Keys [2]: [ss_store_sk#13, ss_item_sk#12] -Functions [1]: [sum(UnscaledValue(ss_sales_price#14))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#14))#20] -Results [2]: [ss_store_sk#13, MakeDecimal(sum(UnscaledValue(ss_sales_price#14))#20,17,2) AS revenue#21] +Input [3]: [ss_store_sk#12, ss_item_sk#11, sum#17] +Keys [2]: [ss_store_sk#12, ss_item_sk#11] +Functions [1]: [sum(UnscaledValue(ss_sales_price#13))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#13))#18] +Results [2]: [ss_store_sk#12, MakeDecimal(sum(UnscaledValue(ss_sales_price#13))#18,17,2) AS revenue#19] (20) HashAggregate [codegen id : 5] -Input [2]: [ss_store_sk#13, revenue#21] -Keys [1]: [ss_store_sk#13] -Functions [1]: [partial_avg(revenue#21)] -Aggregate Attributes [2]: [sum#22, count#23] -Results [3]: [ss_store_sk#13, sum#24, count#25] +Input [2]: [ss_store_sk#12, revenue#19] +Keys [1]: [ss_store_sk#12] +Functions [1]: [partial_avg(revenue#19)] +Aggregate Attributes [2]: [sum#20, count#21] +Results [3]: [ss_store_sk#12, sum#22, count#23] (21) Exchange -Input [3]: [ss_store_sk#13, sum#24, count#25] -Arguments: hashpartitioning(ss_store_sk#13, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [3]: [ss_store_sk#12, sum#22, count#23] +Arguments: hashpartitioning(ss_store_sk#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 6] -Input [3]: [ss_store_sk#13, sum#24, count#25] -Keys [1]: [ss_store_sk#13] -Functions [1]: [avg(revenue#21)] -Aggregate Attributes [1]: [avg(revenue#21)#27] -Results [2]: [ss_store_sk#13, avg(revenue#21)#27 AS ave#28] +Input [3]: [ss_store_sk#12, sum#22, count#23] +Keys [1]: [ss_store_sk#12] +Functions [1]: [avg(revenue#19)] +Aggregate Attributes [1]: [avg(revenue#19)#24] +Results [2]: [ss_store_sk#12, avg(revenue#19)#24 AS ave#25] (23) BroadcastExchange -Input [2]: [ss_store_sk#13, ave#28] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] +Input [2]: [ss_store_sk#12, ave#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (24) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_store_sk#2] -Right keys [1]: [ss_store_sk#13] -Join condition: (cast(revenue#11 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#28)), DecimalType(23,7))) +Right keys [1]: [ss_store_sk#12] +Join condition: (cast(revenue#10 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#25)), DecimalType(23,7))) (25) Project [codegen id : 8] -Output [3]: [ss_store_sk#2, ss_item_sk#1, revenue#11] -Input [5]: [ss_store_sk#2, ss_item_sk#1, revenue#11, ss_store_sk#13, ave#28] +Output [3]: [ss_store_sk#2, ss_item_sk#1, revenue#10] +Input [5]: [ss_store_sk#2, ss_item_sk#1, revenue#10, ss_store_sk#12, ave#25] (26) Scan parquet default.store -Output [2]: [s_store_sk#30, s_store_name#31] +Output [2]: [s_store_sk#26, s_store_name#27] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 7] -Input [2]: [s_store_sk#30, s_store_name#31] +Input [2]: [s_store_sk#26, s_store_name#27] (28) Filter [codegen id : 7] -Input [2]: [s_store_sk#30, s_store_name#31] -Condition : isnotnull(s_store_sk#30) +Input [2]: [s_store_sk#26, s_store_name#27] +Condition : isnotnull(s_store_sk#26) (29) BroadcastExchange -Input [2]: [s_store_sk#30, s_store_name#31] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#32] +Input [2]: [s_store_sk#26, s_store_name#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (30) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_store_sk#2] -Right keys [1]: [s_store_sk#30] +Right keys [1]: [s_store_sk#26] Join condition: None (31) Project [codegen id : 8] -Output [3]: [ss_item_sk#1, revenue#11, s_store_name#31] -Input [5]: [ss_store_sk#2, ss_item_sk#1, revenue#11, s_store_sk#30, s_store_name#31] +Output [3]: [ss_item_sk#1, revenue#10, s_store_name#27] +Input [5]: [ss_store_sk#2, ss_item_sk#1, revenue#10, s_store_sk#26, s_store_name#27] (32) Exchange -Input [3]: [ss_item_sk#1, revenue#11, s_store_name#31] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#33] +Input [3]: [ss_item_sk#1, revenue#10, s_store_name#27] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=6] (33) Sort [codegen id : 9] -Input [3]: [ss_item_sk#1, revenue#11, s_store_name#31] +Input [3]: [ss_item_sk#1, revenue#10, s_store_name#27] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (34) Scan parquet default.item -Output [5]: [i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Output [5]: [i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 10] -Input [5]: [i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Input [5]: [i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32] (36) Filter [codegen id : 10] -Input [5]: [i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] -Condition : isnotnull(i_item_sk#34) +Input [5]: [i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32] +Condition : isnotnull(i_item_sk#28) (37) Exchange -Input [5]: [i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] -Arguments: hashpartitioning(i_item_sk#34, 5), ENSURE_REQUIREMENTS, [id=#39] +Input [5]: [i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32] +Arguments: hashpartitioning(i_item_sk#28, 5), ENSURE_REQUIREMENTS, [plan_id=7] (38) Sort [codegen id : 11] -Input [5]: [i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] -Arguments: [i_item_sk#34 ASC NULLS FIRST], false, 0 +Input [5]: [i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32] +Arguments: [i_item_sk#28 ASC NULLS FIRST], false, 0 (39) SortMergeJoin [codegen id : 12] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#34] +Right keys [1]: [i_item_sk#28] Join condition: None (40) Project [codegen id : 12] -Output [6]: [s_store_name#31, i_item_desc#35, revenue#11, i_current_price#36, i_wholesale_cost#37, i_brand#38] -Input [8]: [ss_item_sk#1, revenue#11, s_store_name#31, i_item_sk#34, i_item_desc#35, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Output [6]: [s_store_name#27, i_item_desc#29, revenue#10, i_current_price#30, i_wholesale_cost#31, i_brand#32] +Input [8]: [ss_item_sk#1, revenue#10, s_store_name#27, i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32] (41) TakeOrderedAndProject -Input [6]: [s_store_name#31, i_item_desc#35, revenue#11, i_current_price#36, i_wholesale_cost#37, i_brand#38] -Arguments: 100, [s_store_name#31 ASC NULLS FIRST, i_item_desc#35 ASC NULLS FIRST], [s_store_name#31, i_item_desc#35, revenue#11, i_current_price#36, i_wholesale_cost#37, i_brand#38] +Input [6]: [s_store_name#27, i_item_desc#29, revenue#10, i_current_price#30, i_wholesale_cost#31, i_brand#32] +Arguments: 100, [s_store_name#27 ASC NULLS FIRST, i_item_desc#29 ASC NULLS FIRST], [s_store_name#27, i_item_desc#29, revenue#10, i_current_price#30, i_wholesale_cost#31, i_brand#32] ===== Subqueries ===== @@ -248,27 +248,27 @@ BroadcastExchange (46) (42) Scan parquet default.date_dim -Output [2]: [d_date_sk#6, d_month_seq#40] +Output [2]: [d_date_sk#6, d_month_seq#33] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187), IsNotNull(d_date_sk)] ReadSchema: struct (43) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#6, d_month_seq#40] +Input [2]: [d_date_sk#6, d_month_seq#33] (44) Filter [codegen id : 1] -Input [2]: [d_date_sk#6, d_month_seq#40] -Condition : (((isnotnull(d_month_seq#40) AND (d_month_seq#40 >= 1176)) AND (d_month_seq#40 <= 1187)) AND isnotnull(d_date_sk#6)) +Input [2]: [d_date_sk#6, d_month_seq#33] +Condition : (((isnotnull(d_month_seq#33) AND (d_month_seq#33 >= 1176)) AND (d_month_seq#33 <= 1187)) AND isnotnull(d_date_sk#6)) (45) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [2]: [d_date_sk#6, d_month_seq#40] +Input [2]: [d_date_sk#6, d_month_seq#33] (46) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#41] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] -Subquery:2 Hosting operator id = 11 Hosting Expression = ss_sold_date_sk#15 IN dynamicpruning#5 +Subquery:2 Hosting operator id = 11 Hosting Expression = ss_sold_date_sk#14 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt index c7967bfa915b8..dbacad80d20e0 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt @@ -89,22 +89,22 @@ Results [3]: [ss_store_sk#4, ss_item_sk#3, sum#10] (11) Exchange Input [3]: [ss_store_sk#4, ss_item_sk#3, sum#10] -Arguments: hashpartitioning(ss_store_sk#4, ss_item_sk#3, 5), ENSURE_REQUIREMENTS, [id=#11] +Arguments: hashpartitioning(ss_store_sk#4, ss_item_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=1] (12) HashAggregate [codegen id : 3] Input [3]: [ss_store_sk#4, ss_item_sk#3, sum#10] Keys [2]: [ss_store_sk#4, ss_item_sk#3] Functions [1]: [sum(UnscaledValue(ss_sales_price#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#5))#12] -Results [3]: [ss_store_sk#4, ss_item_sk#3, MakeDecimal(sum(UnscaledValue(ss_sales_price#5))#12,17,2) AS revenue#13] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#5))#11] +Results [3]: [ss_store_sk#4, ss_item_sk#3, MakeDecimal(sum(UnscaledValue(ss_sales_price#5))#11,17,2) AS revenue#12] (13) Filter [codegen id : 3] -Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#13] -Condition : isnotnull(revenue#13) +Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#12] +Condition : isnotnull(revenue#12) (14) BroadcastExchange -Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Input [3]: [ss_store_sk#4, ss_item_sk#3, revenue#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 9] Left keys [1]: [s_store_sk#1] @@ -112,115 +112,115 @@ Right keys [1]: [ss_store_sk#4] Join condition: None (16) Project [codegen id : 9] -Output [4]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#13] -Input [5]: [s_store_sk#1, s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#13] +Output [4]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12] +Input [5]: [s_store_sk#1, s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12] (17) Scan parquet default.item -Output [5]: [i_item_sk#15, i_item_desc#16, i_current_price#17, i_wholesale_cost#18, i_brand#19] +Output [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (18) ColumnarToRow [codegen id : 4] -Input [5]: [i_item_sk#15, i_item_desc#16, i_current_price#17, i_wholesale_cost#18, i_brand#19] +Input [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] (19) Filter [codegen id : 4] -Input [5]: [i_item_sk#15, i_item_desc#16, i_current_price#17, i_wholesale_cost#18, i_brand#19] -Condition : isnotnull(i_item_sk#15) +Input [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Condition : isnotnull(i_item_sk#13) (20) BroadcastExchange -Input [5]: [i_item_sk#15, i_item_desc#16, i_current_price#17, i_wholesale_cost#18, i_brand#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] +Input [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (21) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_item_sk#3] -Right keys [1]: [i_item_sk#15] +Right keys [1]: [i_item_sk#13] Join condition: None (22) Project [codegen id : 9] -Output [7]: [s_store_name#2, ss_store_sk#4, revenue#13, i_item_desc#16, i_current_price#17, i_wholesale_cost#18, i_brand#19] -Input [9]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#13, i_item_sk#15, i_item_desc#16, i_current_price#17, i_wholesale_cost#18, i_brand#19] +Output [7]: [s_store_name#2, ss_store_sk#4, revenue#12, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Input [9]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12, i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17] (23) Scan parquet default.store_sales -Output [4]: [ss_item_sk#21, ss_store_sk#22, ss_sales_price#23, ss_sold_date_sk#24] +Output [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#24), dynamicpruningexpression(ss_sold_date_sk#24 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ss_sold_date_sk#21), dynamicpruningexpression(ss_sold_date_sk#21 IN dynamicpruning#7)] PushedFilters: [IsNotNull(ss_store_sk)] ReadSchema: struct (24) ColumnarToRow [codegen id : 6] -Input [4]: [ss_item_sk#21, ss_store_sk#22, ss_sales_price#23, ss_sold_date_sk#24] +Input [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21] (25) Filter [codegen id : 6] -Input [4]: [ss_item_sk#21, ss_store_sk#22, ss_sales_price#23, ss_sold_date_sk#24] -Condition : isnotnull(ss_store_sk#22) +Input [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21] +Condition : isnotnull(ss_store_sk#19) (26) ReusedExchange [Reuses operator id: 43] -Output [1]: [d_date_sk#25] +Output [1]: [d_date_sk#22] (27) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_sold_date_sk#24] -Right keys [1]: [d_date_sk#25] +Left keys [1]: [ss_sold_date_sk#21] +Right keys [1]: [d_date_sk#22] Join condition: None (28) Project [codegen id : 6] -Output [3]: [ss_item_sk#21, ss_store_sk#22, ss_sales_price#23] -Input [5]: [ss_item_sk#21, ss_store_sk#22, ss_sales_price#23, ss_sold_date_sk#24, d_date_sk#25] +Output [3]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20] +Input [5]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21, d_date_sk#22] (29) HashAggregate [codegen id : 6] -Input [3]: [ss_item_sk#21, ss_store_sk#22, ss_sales_price#23] -Keys [2]: [ss_store_sk#22, ss_item_sk#21] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#23))] -Aggregate Attributes [1]: [sum#26] -Results [3]: [ss_store_sk#22, ss_item_sk#21, sum#27] +Input [3]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20] +Keys [2]: [ss_store_sk#19, ss_item_sk#18] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#20))] +Aggregate Attributes [1]: [sum#23] +Results [3]: [ss_store_sk#19, ss_item_sk#18, sum#24] (30) Exchange -Input [3]: [ss_store_sk#22, ss_item_sk#21, sum#27] -Arguments: hashpartitioning(ss_store_sk#22, ss_item_sk#21, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [3]: [ss_store_sk#19, ss_item_sk#18, sum#24] +Arguments: hashpartitioning(ss_store_sk#19, ss_item_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=4] (31) HashAggregate [codegen id : 7] -Input [3]: [ss_store_sk#22, ss_item_sk#21, sum#27] -Keys [2]: [ss_store_sk#22, ss_item_sk#21] -Functions [1]: [sum(UnscaledValue(ss_sales_price#23))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#23))#29] -Results [2]: [ss_store_sk#22, MakeDecimal(sum(UnscaledValue(ss_sales_price#23))#29,17,2) AS revenue#30] +Input [3]: [ss_store_sk#19, ss_item_sk#18, sum#24] +Keys [2]: [ss_store_sk#19, ss_item_sk#18] +Functions [1]: [sum(UnscaledValue(ss_sales_price#20))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#20))#25] +Results [2]: [ss_store_sk#19, MakeDecimal(sum(UnscaledValue(ss_sales_price#20))#25,17,2) AS revenue#26] (32) HashAggregate [codegen id : 7] -Input [2]: [ss_store_sk#22, revenue#30] -Keys [1]: [ss_store_sk#22] -Functions [1]: [partial_avg(revenue#30)] -Aggregate Attributes [2]: [sum#31, count#32] -Results [3]: [ss_store_sk#22, sum#33, count#34] +Input [2]: [ss_store_sk#19, revenue#26] +Keys [1]: [ss_store_sk#19] +Functions [1]: [partial_avg(revenue#26)] +Aggregate Attributes [2]: [sum#27, count#28] +Results [3]: [ss_store_sk#19, sum#29, count#30] (33) Exchange -Input [3]: [ss_store_sk#22, sum#33, count#34] -Arguments: hashpartitioning(ss_store_sk#22, 5), ENSURE_REQUIREMENTS, [id=#35] +Input [3]: [ss_store_sk#19, sum#29, count#30] +Arguments: hashpartitioning(ss_store_sk#19, 5), ENSURE_REQUIREMENTS, [plan_id=5] (34) HashAggregate [codegen id : 8] -Input [3]: [ss_store_sk#22, sum#33, count#34] -Keys [1]: [ss_store_sk#22] -Functions [1]: [avg(revenue#30)] -Aggregate Attributes [1]: [avg(revenue#30)#36] -Results [2]: [ss_store_sk#22, avg(revenue#30)#36 AS ave#37] +Input [3]: [ss_store_sk#19, sum#29, count#30] +Keys [1]: [ss_store_sk#19] +Functions [1]: [avg(revenue#26)] +Aggregate Attributes [1]: [avg(revenue#26)#31] +Results [2]: [ss_store_sk#19, avg(revenue#26)#31 AS ave#32] (35) BroadcastExchange -Input [2]: [ss_store_sk#22, ave#37] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#38] +Input [2]: [ss_store_sk#19, ave#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] (36) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_store_sk#4] -Right keys [1]: [ss_store_sk#22] -Join condition: (cast(revenue#13 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#37)), DecimalType(23,7))) +Right keys [1]: [ss_store_sk#19] +Join condition: (cast(revenue#12 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#32)), DecimalType(23,7))) (37) Project [codegen id : 9] -Output [6]: [s_store_name#2, i_item_desc#16, revenue#13, i_current_price#17, i_wholesale_cost#18, i_brand#19] -Input [9]: [s_store_name#2, ss_store_sk#4, revenue#13, i_item_desc#16, i_current_price#17, i_wholesale_cost#18, i_brand#19, ss_store_sk#22, ave#37] +Output [6]: [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Input [9]: [s_store_name#2, ss_store_sk#4, revenue#12, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17, ss_store_sk#19, ave#32] (38) TakeOrderedAndProject -Input [6]: [s_store_name#2, i_item_desc#16, revenue#13, i_current_price#17, i_wholesale_cost#18, i_brand#19] -Arguments: 100, [s_store_name#2 ASC NULLS FIRST, i_item_desc#16 ASC NULLS FIRST], [s_store_name#2, i_item_desc#16, revenue#13, i_current_price#17, i_wholesale_cost#18, i_brand#19] +Input [6]: [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17] +Arguments: 100, [s_store_name#2 ASC NULLS FIRST, i_item_desc#14 ASC NULLS FIRST], [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17] ===== Subqueries ===== @@ -233,27 +233,27 @@ BroadcastExchange (43) (39) Scan parquet default.date_dim -Output [2]: [d_date_sk#8, d_month_seq#39] +Output [2]: [d_date_sk#8, d_month_seq#33] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187), IsNotNull(d_date_sk)] ReadSchema: struct (40) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#8, d_month_seq#39] +Input [2]: [d_date_sk#8, d_month_seq#33] (41) Filter [codegen id : 1] -Input [2]: [d_date_sk#8, d_month_seq#39] -Condition : (((isnotnull(d_month_seq#39) AND (d_month_seq#39 >= 1176)) AND (d_month_seq#39 <= 1187)) AND isnotnull(d_date_sk#8)) +Input [2]: [d_date_sk#8, d_month_seq#33] +Condition : (((isnotnull(d_month_seq#33) AND (d_month_seq#33 >= 1176)) AND (d_month_seq#33 <= 1187)) AND isnotnull(d_date_sk#8)) (42) Project [codegen id : 1] Output [1]: [d_date_sk#8] -Input [2]: [d_date_sk#8, d_month_seq#39] +Input [2]: [d_date_sk#8, d_month_seq#33] (43) BroadcastExchange Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] -Subquery:2 Hosting operator id = 23 Hosting Expression = ss_sold_date_sk#24 IN dynamicpruning#7 +Subquery:2 Hosting operator id = 23 Hosting Expression = ss_sold_date_sk#21 IN dynamicpruning#7 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt index 85aa68cbedd88..fbcec9b4464be 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt @@ -88,7 +88,7 @@ Input [2]: [sm_ship_mode_sk#9, sm_carrier#10] (8) BroadcastExchange Input [1]: [sm_ship_mode_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ws_ship_mode_sk#2] @@ -100,197 +100,197 @@ Output [6]: [ws_sold_time_sk#1, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_p Input [8]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, sm_ship_mode_sk#9] (11) Scan parquet default.time_dim -Output [2]: [t_time_sk#12, t_time#13] +Output [2]: [t_time_sk#11, t_time#12] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_time), GreaterThanOrEqual(t_time,30838), LessThanOrEqual(t_time,59638), IsNotNull(t_time_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [2]: [t_time_sk#12, t_time#13] +Input [2]: [t_time_sk#11, t_time#12] (13) Filter [codegen id : 2] -Input [2]: [t_time_sk#12, t_time#13] -Condition : (((isnotnull(t_time#13) AND (t_time#13 >= 30838)) AND (t_time#13 <= 59638)) AND isnotnull(t_time_sk#12)) +Input [2]: [t_time_sk#11, t_time#12] +Condition : (((isnotnull(t_time#12) AND (t_time#12 >= 30838)) AND (t_time#12 <= 59638)) AND isnotnull(t_time_sk#11)) (14) Project [codegen id : 2] -Output [1]: [t_time_sk#12] -Input [2]: [t_time_sk#12, t_time#13] +Output [1]: [t_time_sk#11] +Input [2]: [t_time_sk#11, t_time#12] (15) BroadcastExchange -Input [1]: [t_time_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Input [1]: [t_time_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ws_sold_time_sk#1] -Right keys [1]: [t_time_sk#12] +Right keys [1]: [t_time_sk#11] Join condition: None (17) Project [codegen id : 5] Output [5]: [ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7] -Input [7]: [ws_sold_time_sk#1, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, t_time_sk#12] +Input [7]: [ws_sold_time_sk#1, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, t_time_sk#11] (18) ReusedExchange [Reuses operator id: 56] -Output [3]: [d_date_sk#15, d_year#16, d_moy#17] +Output [3]: [d_date_sk#13, d_year#14, d_moy#15] (19) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ws_sold_date_sk#7] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#13] Join condition: None (20) Project [codegen id : 5] -Output [6]: [ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, d_year#16, d_moy#17] -Input [8]: [ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, d_date_sk#15, d_year#16, d_moy#17] +Output [6]: [ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, d_year#14, d_moy#15] +Input [8]: [ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, d_date_sk#13, d_year#14, d_moy#15] (21) Scan parquet default.warehouse -Output [7]: [w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24] +Output [7]: [w_warehouse_sk#16, w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22] Batched: true Location [not included in comparison]/{warehouse_dir}/warehouse] PushedFilters: [IsNotNull(w_warehouse_sk)] ReadSchema: struct (22) ColumnarToRow [codegen id : 4] -Input [7]: [w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24] +Input [7]: [w_warehouse_sk#16, w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22] (23) Filter [codegen id : 4] -Input [7]: [w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24] -Condition : isnotnull(w_warehouse_sk#18) +Input [7]: [w_warehouse_sk#16, w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22] +Condition : isnotnull(w_warehouse_sk#16) (24) BroadcastExchange -Input [7]: [w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] +Input [7]: [w_warehouse_sk#16, w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (25) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ws_warehouse_sk#3] -Right keys [1]: [w_warehouse_sk#18] +Right keys [1]: [w_warehouse_sk#16] Join condition: None (26) Project [codegen id : 5] -Output [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16, d_moy#17] -Input [13]: [ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, d_year#16, d_moy#17, w_warehouse_sk#18, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24] +Output [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, d_year#14, d_moy#15] +Input [13]: [ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, d_year#14, d_moy#15, w_warehouse_sk#16, w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22] (27) HashAggregate [codegen id : 5] -Input [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16, d_moy#17] -Keys [7]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16] -Functions [24]: [partial_sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] -Aggregate Attributes [48]: [sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69, sum#70, isEmpty#71, sum#72, isEmpty#73] -Results [55]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] +Input [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, d_year#14, d_moy#15] +Keys [7]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, d_year#14] +Functions [24]: [partial_sum(CASE WHEN (d_moy#15 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] +Aggregate Attributes [48]: [sum#23, isEmpty#24, sum#25, isEmpty#26, sum#27, isEmpty#28, sum#29, isEmpty#30, sum#31, isEmpty#32, sum#33, isEmpty#34, sum#35, isEmpty#36, sum#37, isEmpty#38, sum#39, isEmpty#40, sum#41, isEmpty#42, sum#43, isEmpty#44, sum#45, isEmpty#46, sum#47, isEmpty#48, sum#49, isEmpty#50, sum#51, isEmpty#52, sum#53, isEmpty#54, sum#55, isEmpty#56, sum#57, isEmpty#58, sum#59, isEmpty#60, sum#61, isEmpty#62, sum#63, isEmpty#64, sum#65, isEmpty#66, sum#67, isEmpty#68, sum#69, isEmpty#70] +Results [55]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, d_year#14, sum#71, isEmpty#72, sum#73, isEmpty#74, sum#75, isEmpty#76, sum#77, isEmpty#78, sum#79, isEmpty#80, sum#81, isEmpty#82, sum#83, isEmpty#84, sum#85, isEmpty#86, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96, sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118] (28) Exchange -Input [55]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] -Arguments: hashpartitioning(w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16, 5), ENSURE_REQUIREMENTS, [id=#122] +Input [55]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, d_year#14, sum#71, isEmpty#72, sum#73, isEmpty#74, sum#75, isEmpty#76, sum#77, isEmpty#78, sum#79, isEmpty#80, sum#81, isEmpty#82, sum#83, isEmpty#84, sum#85, isEmpty#86, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96, sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118] +Arguments: hashpartitioning(w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, d_year#14, 5), ENSURE_REQUIREMENTS, [plan_id=4] (29) HashAggregate [codegen id : 6] -Input [55]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] -Keys [7]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, d_year#16] -Functions [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] -Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#146] -Results [32]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, DHL,BARIAN AS ship_carriers#147, d_year#16 AS year#148, sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#17 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#17 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#17 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#17 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#17 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#17 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#17 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#17 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#17 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#17 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#17 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#17 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#146 AS dec_net#172] +Input [55]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, d_year#14, sum#71, isEmpty#72, sum#73, isEmpty#74, sum#75, isEmpty#76, sum#77, isEmpty#78, sum#79, isEmpty#80, sum#81, isEmpty#82, sum#83, isEmpty#84, sum#85, isEmpty#86, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96, sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118] +Keys [7]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, d_year#14] +Functions [24]: [sum(CASE WHEN (d_moy#15 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] +Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#15 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#119, sum(CASE WHEN (d_moy#15 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#120, sum(CASE WHEN (d_moy#15 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#121, sum(CASE WHEN (d_moy#15 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#122, sum(CASE WHEN (d_moy#15 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#15 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#15 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#15 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#15 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#15 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#15 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#15 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#15 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#15 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#15 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#15 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#15 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#15 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#15 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#15 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#15 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#15 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#15 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#15 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#142] +Results [32]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, DHL,BARIAN AS ship_carriers#143, d_year#14 AS year#144, sum(CASE WHEN (d_moy#15 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#119 AS jan_sales#145, sum(CASE WHEN (d_moy#15 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#120 AS feb_sales#146, sum(CASE WHEN (d_moy#15 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#121 AS mar_sales#147, sum(CASE WHEN (d_moy#15 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#122 AS apr_sales#148, sum(CASE WHEN (d_moy#15 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#123 AS may_sales#149, sum(CASE WHEN (d_moy#15 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#124 AS jun_sales#150, sum(CASE WHEN (d_moy#15 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#125 AS jul_sales#151, sum(CASE WHEN (d_moy#15 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#126 AS aug_sales#152, sum(CASE WHEN (d_moy#15 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#127 AS sep_sales#153, sum(CASE WHEN (d_moy#15 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#128 AS oct_sales#154, sum(CASE WHEN (d_moy#15 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#129 AS nov_sales#155, sum(CASE WHEN (d_moy#15 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#130 AS dec_sales#156, sum(CASE WHEN (d_moy#15 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#131 AS jan_net#157, sum(CASE WHEN (d_moy#15 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#132 AS feb_net#158, sum(CASE WHEN (d_moy#15 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#133 AS mar_net#159, sum(CASE WHEN (d_moy#15 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#134 AS apr_net#160, sum(CASE WHEN (d_moy#15 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#135 AS may_net#161, sum(CASE WHEN (d_moy#15 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#136 AS jun_net#162, sum(CASE WHEN (d_moy#15 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#137 AS jul_net#163, sum(CASE WHEN (d_moy#15 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#138 AS aug_net#164, sum(CASE WHEN (d_moy#15 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#139 AS sep_net#165, sum(CASE WHEN (d_moy#15 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#140 AS oct_net#166, sum(CASE WHEN (d_moy#15 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#141 AS nov_net#167, sum(CASE WHEN (d_moy#15 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#142 AS dec_net#168] (30) Scan parquet default.catalog_sales -Output [7]: [cs_sold_time_sk#173, cs_ship_mode_sk#174, cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179] +Output [7]: [cs_sold_time_sk#169, cs_ship_mode_sk#170, cs_warehouse_sk#171, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, cs_sold_date_sk#175] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#179), dynamicpruningexpression(cs_sold_date_sk#179 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(cs_sold_date_sk#175), dynamicpruningexpression(cs_sold_date_sk#175 IN dynamicpruning#8)] PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_sold_time_sk), IsNotNull(cs_ship_mode_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 11] -Input [7]: [cs_sold_time_sk#173, cs_ship_mode_sk#174, cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179] +Input [7]: [cs_sold_time_sk#169, cs_ship_mode_sk#170, cs_warehouse_sk#171, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, cs_sold_date_sk#175] (32) Filter [codegen id : 11] -Input [7]: [cs_sold_time_sk#173, cs_ship_mode_sk#174, cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179] -Condition : ((isnotnull(cs_warehouse_sk#175) AND isnotnull(cs_sold_time_sk#173)) AND isnotnull(cs_ship_mode_sk#174)) +Input [7]: [cs_sold_time_sk#169, cs_ship_mode_sk#170, cs_warehouse_sk#171, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, cs_sold_date_sk#175] +Condition : ((isnotnull(cs_warehouse_sk#171) AND isnotnull(cs_sold_time_sk#169)) AND isnotnull(cs_ship_mode_sk#170)) (33) ReusedExchange [Reuses operator id: 8] -Output [1]: [sm_ship_mode_sk#180] +Output [1]: [sm_ship_mode_sk#176] (34) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_ship_mode_sk#174] -Right keys [1]: [sm_ship_mode_sk#180] +Left keys [1]: [cs_ship_mode_sk#170] +Right keys [1]: [sm_ship_mode_sk#176] Join condition: None (35) Project [codegen id : 11] -Output [6]: [cs_sold_time_sk#173, cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179] -Input [8]: [cs_sold_time_sk#173, cs_ship_mode_sk#174, cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179, sm_ship_mode_sk#180] +Output [6]: [cs_sold_time_sk#169, cs_warehouse_sk#171, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, cs_sold_date_sk#175] +Input [8]: [cs_sold_time_sk#169, cs_ship_mode_sk#170, cs_warehouse_sk#171, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, cs_sold_date_sk#175, sm_ship_mode_sk#176] (36) ReusedExchange [Reuses operator id: 15] -Output [1]: [t_time_sk#181] +Output [1]: [t_time_sk#177] (37) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_sold_time_sk#173] -Right keys [1]: [t_time_sk#181] +Left keys [1]: [cs_sold_time_sk#169] +Right keys [1]: [t_time_sk#177] Join condition: None (38) Project [codegen id : 11] -Output [5]: [cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179] -Input [7]: [cs_sold_time_sk#173, cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179, t_time_sk#181] +Output [5]: [cs_warehouse_sk#171, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, cs_sold_date_sk#175] +Input [7]: [cs_sold_time_sk#169, cs_warehouse_sk#171, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, cs_sold_date_sk#175, t_time_sk#177] (39) ReusedExchange [Reuses operator id: 56] -Output [3]: [d_date_sk#182, d_year#183, d_moy#184] +Output [3]: [d_date_sk#178, d_year#179, d_moy#180] (40) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_sold_date_sk#179] -Right keys [1]: [d_date_sk#182] +Left keys [1]: [cs_sold_date_sk#175] +Right keys [1]: [d_date_sk#178] Join condition: None (41) Project [codegen id : 11] -Output [6]: [cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, d_year#183, d_moy#184] -Input [8]: [cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179, d_date_sk#182, d_year#183, d_moy#184] +Output [6]: [cs_warehouse_sk#171, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, d_year#179, d_moy#180] +Input [8]: [cs_warehouse_sk#171, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, cs_sold_date_sk#175, d_date_sk#178, d_year#179, d_moy#180] (42) ReusedExchange [Reuses operator id: 24] -Output [7]: [w_warehouse_sk#185, w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191] +Output [7]: [w_warehouse_sk#181, w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187] (43) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_warehouse_sk#175] -Right keys [1]: [w_warehouse_sk#185] +Left keys [1]: [cs_warehouse_sk#171] +Right keys [1]: [w_warehouse_sk#181] Join condition: None (44) Project [codegen id : 11] -Output [11]: [cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183, d_moy#184] -Input [13]: [cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, d_year#183, d_moy#184, w_warehouse_sk#185, w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191] +Output [11]: [cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187, d_year#179, d_moy#180] +Input [13]: [cs_warehouse_sk#171, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, d_year#179, d_moy#180, w_warehouse_sk#181, w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187] (45) HashAggregate [codegen id : 11] -Input [11]: [cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183, d_moy#184] -Keys [7]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183] -Functions [24]: [partial_sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] -Aggregate Attributes [48]: [sum#192, isEmpty#193, sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199, sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205, sum#206, isEmpty#207, sum#208, isEmpty#209, sum#210, isEmpty#211, sum#212, isEmpty#213, sum#214, isEmpty#215, sum#216, isEmpty#217, sum#218, isEmpty#219, sum#220, isEmpty#221, sum#222, isEmpty#223, sum#224, isEmpty#225, sum#226, isEmpty#227, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239] -Results [55]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283, sum#284, isEmpty#285, sum#286, isEmpty#287] +Input [11]: [cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187, d_year#179, d_moy#180] +Keys [7]: [w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187, d_year#179] +Functions [24]: [partial_sum(CASE WHEN (d_moy#180 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] +Aggregate Attributes [48]: [sum#188, isEmpty#189, sum#190, isEmpty#191, sum#192, isEmpty#193, sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199, sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205, sum#206, isEmpty#207, sum#208, isEmpty#209, sum#210, isEmpty#211, sum#212, isEmpty#213, sum#214, isEmpty#215, sum#216, isEmpty#217, sum#218, isEmpty#219, sum#220, isEmpty#221, sum#222, isEmpty#223, sum#224, isEmpty#225, sum#226, isEmpty#227, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235] +Results [55]: [w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187, d_year#179, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283] (46) Exchange -Input [55]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283, sum#284, isEmpty#285, sum#286, isEmpty#287] -Arguments: hashpartitioning(w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183, 5), ENSURE_REQUIREMENTS, [id=#288] +Input [55]: [w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187, d_year#179, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283] +Arguments: hashpartitioning(w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187, d_year#179, 5), ENSURE_REQUIREMENTS, [plan_id=5] (47) HashAggregate [codegen id : 12] -Input [55]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283, sum#284, isEmpty#285, sum#286, isEmpty#287] -Keys [7]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, d_year#183] -Functions [24]: [sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] -Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#306, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#307, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#308, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#309, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#310, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#311, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#312] -Results [32]: [w_warehouse_name#186, w_warehouse_sq_ft#187, w_city#188, w_county#189, w_state#190, w_country#191, DHL,BARIAN AS ship_carriers#313, d_year#183 AS year#314, sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#289 AS jan_sales#315, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#290 AS feb_sales#316, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#291 AS mar_sales#317, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#292 AS apr_sales#318, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#293 AS may_sales#319, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#294 AS jun_sales#320, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#295 AS jul_sales#321, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#296 AS aug_sales#322, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#297 AS sep_sales#323, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#298 AS oct_sales#324, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#299 AS nov_sales#325, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#300 AS dec_sales#326, sum(CASE WHEN (d_moy#184 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#301 AS jan_net#327, sum(CASE WHEN (d_moy#184 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#302 AS feb_net#328, sum(CASE WHEN (d_moy#184 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#303 AS mar_net#329, sum(CASE WHEN (d_moy#184 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#304 AS apr_net#330, sum(CASE WHEN (d_moy#184 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#305 AS may_net#331, sum(CASE WHEN (d_moy#184 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#306 AS jun_net#332, sum(CASE WHEN (d_moy#184 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#307 AS jul_net#333, sum(CASE WHEN (d_moy#184 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#308 AS aug_net#334, sum(CASE WHEN (d_moy#184 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#309 AS sep_net#335, sum(CASE WHEN (d_moy#184 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#310 AS oct_net#336, sum(CASE WHEN (d_moy#184 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#311 AS nov_net#337, sum(CASE WHEN (d_moy#184 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#312 AS dec_net#338] +Input [55]: [w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187, d_year#179, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283] +Keys [7]: [w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187, d_year#179] +Functions [24]: [sum(CASE WHEN (d_moy#180 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] +Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#180 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#284, sum(CASE WHEN (d_moy#180 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#285, sum(CASE WHEN (d_moy#180 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#286, sum(CASE WHEN (d_moy#180 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#287, sum(CASE WHEN (d_moy#180 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#288, sum(CASE WHEN (d_moy#180 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#180 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#180 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#180 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#180 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#180 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#180 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#180 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#180 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#180 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#180 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#180 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#180 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#180 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#180 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#180 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#180 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#180 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#306, sum(CASE WHEN (d_moy#180 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#307] +Results [32]: [w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187, DHL,BARIAN AS ship_carriers#308, d_year#179 AS year#309, sum(CASE WHEN (d_moy#180 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#284 AS jan_sales#310, sum(CASE WHEN (d_moy#180 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#285 AS feb_sales#311, sum(CASE WHEN (d_moy#180 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#286 AS mar_sales#312, sum(CASE WHEN (d_moy#180 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#287 AS apr_sales#313, sum(CASE WHEN (d_moy#180 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#288 AS may_sales#314, sum(CASE WHEN (d_moy#180 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#289 AS jun_sales#315, sum(CASE WHEN (d_moy#180 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#290 AS jul_sales#316, sum(CASE WHEN (d_moy#180 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#291 AS aug_sales#317, sum(CASE WHEN (d_moy#180 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#292 AS sep_sales#318, sum(CASE WHEN (d_moy#180 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#293 AS oct_sales#319, sum(CASE WHEN (d_moy#180 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#294 AS nov_sales#320, sum(CASE WHEN (d_moy#180 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#295 AS dec_sales#321, sum(CASE WHEN (d_moy#180 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#296 AS jan_net#322, sum(CASE WHEN (d_moy#180 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#297 AS feb_net#323, sum(CASE WHEN (d_moy#180 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#298 AS mar_net#324, sum(CASE WHEN (d_moy#180 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#299 AS apr_net#325, sum(CASE WHEN (d_moy#180 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#300 AS may_net#326, sum(CASE WHEN (d_moy#180 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#301 AS jun_net#327, sum(CASE WHEN (d_moy#180 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#302 AS jul_net#328, sum(CASE WHEN (d_moy#180 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#303 AS aug_net#329, sum(CASE WHEN (d_moy#180 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#304 AS sep_net#330, sum(CASE WHEN (d_moy#180 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#305 AS oct_net#331, sum(CASE WHEN (d_moy#180 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#306 AS nov_net#332, sum(CASE WHEN (d_moy#180 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#307 AS dec_net#333] (48) Union (49) HashAggregate [codegen id : 13] -Input [32]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, jan_sales#149, feb_sales#150, mar_sales#151, apr_sales#152, may_sales#153, jun_sales#154, jul_sales#155, aug_sales#156, sep_sales#157, oct_sales#158, nov_sales#159, dec_sales#160, jan_net#161, feb_net#162, mar_net#163, apr_net#164, may_net#165, jun_net#166, jul_net#167, aug_net#168, sep_net#169, oct_net#170, nov_net#171, dec_net#172] -Keys [8]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148] -Functions [36]: [partial_sum(jan_sales#149), partial_sum(feb_sales#150), partial_sum(mar_sales#151), partial_sum(apr_sales#152), partial_sum(may_sales#153), partial_sum(jun_sales#154), partial_sum(jul_sales#155), partial_sum(aug_sales#156), partial_sum(sep_sales#157), partial_sum(oct_sales#158), partial_sum(nov_sales#159), partial_sum(dec_sales#160), partial_sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), partial_sum(jan_net#161), partial_sum(feb_net#162), partial_sum(mar_net#163), partial_sum(apr_net#164), partial_sum(may_net#165), partial_sum(jun_net#166), partial_sum(jul_net#167), partial_sum(aug_net#168), partial_sum(sep_net#169), partial_sum(oct_net#170), partial_sum(nov_net#171), partial_sum(dec_net#172)] -Aggregate Attributes [72]: [sum#339, isEmpty#340, sum#341, isEmpty#342, sum#343, isEmpty#344, sum#345, isEmpty#346, sum#347, isEmpty#348, sum#349, isEmpty#350, sum#351, isEmpty#352, sum#353, isEmpty#354, sum#355, isEmpty#356, sum#357, isEmpty#358, sum#359, isEmpty#360, sum#361, isEmpty#362, sum#363, isEmpty#364, sum#365, isEmpty#366, sum#367, isEmpty#368, sum#369, isEmpty#370, sum#371, isEmpty#372, sum#373, isEmpty#374, sum#375, isEmpty#376, sum#377, isEmpty#378, sum#379, isEmpty#380, sum#381, isEmpty#382, sum#383, isEmpty#384, sum#385, isEmpty#386, sum#387, isEmpty#388, sum#389, isEmpty#390, sum#391, isEmpty#392, sum#393, isEmpty#394, sum#395, isEmpty#396, sum#397, isEmpty#398, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410] -Results [80]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476, sum#477, isEmpty#478, sum#479, isEmpty#480, sum#481, isEmpty#482] +Input [32]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144, jan_sales#145, feb_sales#146, mar_sales#147, apr_sales#148, may_sales#149, jun_sales#150, jul_sales#151, aug_sales#152, sep_sales#153, oct_sales#154, nov_sales#155, dec_sales#156, jan_net#157, feb_net#158, mar_net#159, apr_net#160, may_net#161, jun_net#162, jul_net#163, aug_net#164, sep_net#165, oct_net#166, nov_net#167, dec_net#168] +Keys [8]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144] +Functions [36]: [partial_sum(jan_sales#145), partial_sum(feb_sales#146), partial_sum(mar_sales#147), partial_sum(apr_sales#148), partial_sum(may_sales#149), partial_sum(jun_sales#150), partial_sum(jul_sales#151), partial_sum(aug_sales#152), partial_sum(sep_sales#153), partial_sum(oct_sales#154), partial_sum(nov_sales#155), partial_sum(dec_sales#156), partial_sum(CheckOverflow((promote_precision(jan_sales#145) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(feb_sales#146) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(mar_sales#147) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(apr_sales#148) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(may_sales#149) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jun_sales#150) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jul_sales#151) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(aug_sales#152) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(sep_sales#153) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(oct_sales#154) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(nov_sales#155) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(dec_sales#156) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(jan_net#157), partial_sum(feb_net#158), partial_sum(mar_net#159), partial_sum(apr_net#160), partial_sum(may_net#161), partial_sum(jun_net#162), partial_sum(jul_net#163), partial_sum(aug_net#164), partial_sum(sep_net#165), partial_sum(oct_net#166), partial_sum(nov_net#167), partial_sum(dec_net#168)] +Aggregate Attributes [72]: [sum#334, isEmpty#335, sum#336, isEmpty#337, sum#338, isEmpty#339, sum#340, isEmpty#341, sum#342, isEmpty#343, sum#344, isEmpty#345, sum#346, isEmpty#347, sum#348, isEmpty#349, sum#350, isEmpty#351, sum#352, isEmpty#353, sum#354, isEmpty#355, sum#356, isEmpty#357, sum#358, isEmpty#359, sum#360, isEmpty#361, sum#362, isEmpty#363, sum#364, isEmpty#365, sum#366, isEmpty#367, sum#368, isEmpty#369, sum#370, isEmpty#371, sum#372, isEmpty#373, sum#374, isEmpty#375, sum#376, isEmpty#377, sum#378, isEmpty#379, sum#380, isEmpty#381, sum#382, isEmpty#383, sum#384, isEmpty#385, sum#386, isEmpty#387, sum#388, isEmpty#389, sum#390, isEmpty#391, sum#392, isEmpty#393, sum#394, isEmpty#395, sum#396, isEmpty#397, sum#398, isEmpty#399, sum#400, isEmpty#401, sum#402, isEmpty#403, sum#404, isEmpty#405] +Results [80]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144, sum#406, isEmpty#407, sum#408, isEmpty#409, sum#410, isEmpty#411, sum#412, isEmpty#413, sum#414, isEmpty#415, sum#416, isEmpty#417, sum#418, isEmpty#419, sum#420, isEmpty#421, sum#422, isEmpty#423, sum#424, isEmpty#425, sum#426, isEmpty#427, sum#428, isEmpty#429, sum#430, isEmpty#431, sum#432, isEmpty#433, sum#434, isEmpty#435, sum#436, isEmpty#437, sum#438, isEmpty#439, sum#440, isEmpty#441, sum#442, isEmpty#443, sum#444, isEmpty#445, sum#446, isEmpty#447, sum#448, isEmpty#449, sum#450, isEmpty#451, sum#452, isEmpty#453, sum#454, isEmpty#455, sum#456, isEmpty#457, sum#458, isEmpty#459, sum#460, isEmpty#461, sum#462, isEmpty#463, sum#464, isEmpty#465, sum#466, isEmpty#467, sum#468, isEmpty#469, sum#470, isEmpty#471, sum#472, isEmpty#473, sum#474, isEmpty#475, sum#476, isEmpty#477] (50) Exchange -Input [80]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476, sum#477, isEmpty#478, sum#479, isEmpty#480, sum#481, isEmpty#482] -Arguments: hashpartitioning(w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, 5), ENSURE_REQUIREMENTS, [id=#483] +Input [80]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144, sum#406, isEmpty#407, sum#408, isEmpty#409, sum#410, isEmpty#411, sum#412, isEmpty#413, sum#414, isEmpty#415, sum#416, isEmpty#417, sum#418, isEmpty#419, sum#420, isEmpty#421, sum#422, isEmpty#423, sum#424, isEmpty#425, sum#426, isEmpty#427, sum#428, isEmpty#429, sum#430, isEmpty#431, sum#432, isEmpty#433, sum#434, isEmpty#435, sum#436, isEmpty#437, sum#438, isEmpty#439, sum#440, isEmpty#441, sum#442, isEmpty#443, sum#444, isEmpty#445, sum#446, isEmpty#447, sum#448, isEmpty#449, sum#450, isEmpty#451, sum#452, isEmpty#453, sum#454, isEmpty#455, sum#456, isEmpty#457, sum#458, isEmpty#459, sum#460, isEmpty#461, sum#462, isEmpty#463, sum#464, isEmpty#465, sum#466, isEmpty#467, sum#468, isEmpty#469, sum#470, isEmpty#471, sum#472, isEmpty#473, sum#474, isEmpty#475, sum#476, isEmpty#477] +Arguments: hashpartitioning(w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144, 5), ENSURE_REQUIREMENTS, [plan_id=6] (51) HashAggregate [codegen id : 14] -Input [80]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476, sum#477, isEmpty#478, sum#479, isEmpty#480, sum#481, isEmpty#482] -Keys [8]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148] -Functions [36]: [sum(jan_sales#149), sum(feb_sales#150), sum(mar_sales#151), sum(apr_sales#152), sum(may_sales#153), sum(jun_sales#154), sum(jul_sales#155), sum(aug_sales#156), sum(sep_sales#157), sum(oct_sales#158), sum(nov_sales#159), sum(dec_sales#160), sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12))), sum(jan_net#161), sum(feb_net#162), sum(mar_net#163), sum(apr_net#164), sum(may_net#165), sum(jun_net#166), sum(jul_net#167), sum(aug_net#168), sum(sep_net#169), sum(oct_net#170), sum(nov_net#171), sum(dec_net#172)] -Aggregate Attributes [36]: [sum(jan_sales#149)#484, sum(feb_sales#150)#485, sum(mar_sales#151)#486, sum(apr_sales#152)#487, sum(may_sales#153)#488, sum(jun_sales#154)#489, sum(jul_sales#155)#490, sum(aug_sales#156)#491, sum(sep_sales#157)#492, sum(oct_sales#158)#493, sum(nov_sales#159)#494, sum(dec_sales#160)#495, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#496, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#497, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#498, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#499, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#500, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#501, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#502, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#503, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#504, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#505, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#506, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#507, sum(jan_net#161)#508, sum(feb_net#162)#509, sum(mar_net#163)#510, sum(apr_net#164)#511, sum(may_net#165)#512, sum(jun_net#166)#513, sum(jul_net#167)#514, sum(aug_net#168)#515, sum(sep_net#169)#516, sum(oct_net#170)#517, sum(nov_net#171)#518, sum(dec_net#172)#519] -Results [44]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, sum(jan_sales#149)#484 AS jan_sales#520, sum(feb_sales#150)#485 AS feb_sales#521, sum(mar_sales#151)#486 AS mar_sales#522, sum(apr_sales#152)#487 AS apr_sales#523, sum(may_sales#153)#488 AS may_sales#524, sum(jun_sales#154)#489 AS jun_sales#525, sum(jul_sales#155)#490 AS jul_sales#526, sum(aug_sales#156)#491 AS aug_sales#527, sum(sep_sales#157)#492 AS sep_sales#528, sum(oct_sales#158)#493 AS oct_sales#529, sum(nov_sales#159)#494 AS nov_sales#530, sum(dec_sales#160)#495 AS dec_sales#531, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#496 AS jan_sales_per_sq_foot#532, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#497 AS feb_sales_per_sq_foot#533, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#498 AS mar_sales_per_sq_foot#534, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#499 AS apr_sales_per_sq_foot#535, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#500 AS may_sales_per_sq_foot#536, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#501 AS jun_sales_per_sq_foot#537, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#502 AS jul_sales_per_sq_foot#538, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#503 AS aug_sales_per_sq_foot#539, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#504 AS sep_sales_per_sq_foot#540, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#505 AS oct_sales_per_sq_foot#541, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#506 AS nov_sales_per_sq_foot#542, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#20 as decimal(28,2)))), DecimalType(38,12)))#507 AS dec_sales_per_sq_foot#543, sum(jan_net#161)#508 AS jan_net#544, sum(feb_net#162)#509 AS feb_net#545, sum(mar_net#163)#510 AS mar_net#546, sum(apr_net#164)#511 AS apr_net#547, sum(may_net#165)#512 AS may_net#548, sum(jun_net#166)#513 AS jun_net#549, sum(jul_net#167)#514 AS jul_net#550, sum(aug_net#168)#515 AS aug_net#551, sum(sep_net#169)#516 AS sep_net#552, sum(oct_net#170)#517 AS oct_net#553, sum(nov_net#171)#518 AS nov_net#554, sum(dec_net#172)#519 AS dec_net#555] +Input [80]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144, sum#406, isEmpty#407, sum#408, isEmpty#409, sum#410, isEmpty#411, sum#412, isEmpty#413, sum#414, isEmpty#415, sum#416, isEmpty#417, sum#418, isEmpty#419, sum#420, isEmpty#421, sum#422, isEmpty#423, sum#424, isEmpty#425, sum#426, isEmpty#427, sum#428, isEmpty#429, sum#430, isEmpty#431, sum#432, isEmpty#433, sum#434, isEmpty#435, sum#436, isEmpty#437, sum#438, isEmpty#439, sum#440, isEmpty#441, sum#442, isEmpty#443, sum#444, isEmpty#445, sum#446, isEmpty#447, sum#448, isEmpty#449, sum#450, isEmpty#451, sum#452, isEmpty#453, sum#454, isEmpty#455, sum#456, isEmpty#457, sum#458, isEmpty#459, sum#460, isEmpty#461, sum#462, isEmpty#463, sum#464, isEmpty#465, sum#466, isEmpty#467, sum#468, isEmpty#469, sum#470, isEmpty#471, sum#472, isEmpty#473, sum#474, isEmpty#475, sum#476, isEmpty#477] +Keys [8]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144] +Functions [36]: [sum(jan_sales#145), sum(feb_sales#146), sum(mar_sales#147), sum(apr_sales#148), sum(may_sales#149), sum(jun_sales#150), sum(jul_sales#151), sum(aug_sales#152), sum(sep_sales#153), sum(oct_sales#154), sum(nov_sales#155), sum(dec_sales#156), sum(CheckOverflow((promote_precision(jan_sales#145) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(feb_sales#146) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(mar_sales#147) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(apr_sales#148) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(may_sales#149) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jun_sales#150) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jul_sales#151) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(aug_sales#152) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(sep_sales#153) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(oct_sales#154) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(nov_sales#155) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(dec_sales#156) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(jan_net#157), sum(feb_net#158), sum(mar_net#159), sum(apr_net#160), sum(may_net#161), sum(jun_net#162), sum(jul_net#163), sum(aug_net#164), sum(sep_net#165), sum(oct_net#166), sum(nov_net#167), sum(dec_net#168)] +Aggregate Attributes [36]: [sum(jan_sales#145)#478, sum(feb_sales#146)#479, sum(mar_sales#147)#480, sum(apr_sales#148)#481, sum(may_sales#149)#482, sum(jun_sales#150)#483, sum(jul_sales#151)#484, sum(aug_sales#152)#485, sum(sep_sales#153)#486, sum(oct_sales#154)#487, sum(nov_sales#155)#488, sum(dec_sales#156)#489, sum(CheckOverflow((promote_precision(jan_sales#145) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#490, sum(CheckOverflow((promote_precision(feb_sales#146) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#491, sum(CheckOverflow((promote_precision(mar_sales#147) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#492, sum(CheckOverflow((promote_precision(apr_sales#148) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#493, sum(CheckOverflow((promote_precision(may_sales#149) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#494, sum(CheckOverflow((promote_precision(jun_sales#150) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#495, sum(CheckOverflow((promote_precision(jul_sales#151) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#496, sum(CheckOverflow((promote_precision(aug_sales#152) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#497, sum(CheckOverflow((promote_precision(sep_sales#153) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#498, sum(CheckOverflow((promote_precision(oct_sales#154) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#499, sum(CheckOverflow((promote_precision(nov_sales#155) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#500, sum(CheckOverflow((promote_precision(dec_sales#156) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#501, sum(jan_net#157)#502, sum(feb_net#158)#503, sum(mar_net#159)#504, sum(apr_net#160)#505, sum(may_net#161)#506, sum(jun_net#162)#507, sum(jul_net#163)#508, sum(aug_net#164)#509, sum(sep_net#165)#510, sum(oct_net#166)#511, sum(nov_net#167)#512, sum(dec_net#168)#513] +Results [44]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144, sum(jan_sales#145)#478 AS jan_sales#514, sum(feb_sales#146)#479 AS feb_sales#515, sum(mar_sales#147)#480 AS mar_sales#516, sum(apr_sales#148)#481 AS apr_sales#517, sum(may_sales#149)#482 AS may_sales#518, sum(jun_sales#150)#483 AS jun_sales#519, sum(jul_sales#151)#484 AS jul_sales#520, sum(aug_sales#152)#485 AS aug_sales#521, sum(sep_sales#153)#486 AS sep_sales#522, sum(oct_sales#154)#487 AS oct_sales#523, sum(nov_sales#155)#488 AS nov_sales#524, sum(dec_sales#156)#489 AS dec_sales#525, sum(CheckOverflow((promote_precision(jan_sales#145) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#490 AS jan_sales_per_sq_foot#526, sum(CheckOverflow((promote_precision(feb_sales#146) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#491 AS feb_sales_per_sq_foot#527, sum(CheckOverflow((promote_precision(mar_sales#147) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#492 AS mar_sales_per_sq_foot#528, sum(CheckOverflow((promote_precision(apr_sales#148) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#493 AS apr_sales_per_sq_foot#529, sum(CheckOverflow((promote_precision(may_sales#149) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#494 AS may_sales_per_sq_foot#530, sum(CheckOverflow((promote_precision(jun_sales#150) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#495 AS jun_sales_per_sq_foot#531, sum(CheckOverflow((promote_precision(jul_sales#151) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#496 AS jul_sales_per_sq_foot#532, sum(CheckOverflow((promote_precision(aug_sales#152) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#497 AS aug_sales_per_sq_foot#533, sum(CheckOverflow((promote_precision(sep_sales#153) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#498 AS sep_sales_per_sq_foot#534, sum(CheckOverflow((promote_precision(oct_sales#154) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#499 AS oct_sales_per_sq_foot#535, sum(CheckOverflow((promote_precision(nov_sales#155) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#500 AS nov_sales_per_sq_foot#536, sum(CheckOverflow((promote_precision(dec_sales#156) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#501 AS dec_sales_per_sq_foot#537, sum(jan_net#157)#502 AS jan_net#538, sum(feb_net#158)#503 AS feb_net#539, sum(mar_net#159)#504 AS mar_net#540, sum(apr_net#160)#505 AS apr_net#541, sum(may_net#161)#506 AS may_net#542, sum(jun_net#162)#507 AS jun_net#543, sum(jul_net#163)#508 AS jul_net#544, sum(aug_net#164)#509 AS aug_net#545, sum(sep_net#165)#510 AS sep_net#546, sum(oct_net#166)#511 AS oct_net#547, sum(nov_net#167)#512 AS nov_net#548, sum(dec_net#168)#513 AS dec_net#549] (52) TakeOrderedAndProject -Input [44]: [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, jan_sales#520, feb_sales#521, mar_sales#522, apr_sales#523, may_sales#524, jun_sales#525, jul_sales#526, aug_sales#527, sep_sales#528, oct_sales#529, nov_sales#530, dec_sales#531, jan_sales_per_sq_foot#532, feb_sales_per_sq_foot#533, mar_sales_per_sq_foot#534, apr_sales_per_sq_foot#535, may_sales_per_sq_foot#536, jun_sales_per_sq_foot#537, jul_sales_per_sq_foot#538, aug_sales_per_sq_foot#539, sep_sales_per_sq_foot#540, oct_sales_per_sq_foot#541, nov_sales_per_sq_foot#542, dec_sales_per_sq_foot#543, jan_net#544, feb_net#545, mar_net#546, apr_net#547, may_net#548, jun_net#549, jul_net#550, aug_net#551, sep_net#552, oct_net#553, nov_net#554, dec_net#555] -Arguments: 100, [w_warehouse_name#19 ASC NULLS FIRST], [w_warehouse_name#19, w_warehouse_sq_ft#20, w_city#21, w_county#22, w_state#23, w_country#24, ship_carriers#147, year#148, jan_sales#520, feb_sales#521, mar_sales#522, apr_sales#523, may_sales#524, jun_sales#525, jul_sales#526, aug_sales#527, sep_sales#528, oct_sales#529, nov_sales#530, dec_sales#531, jan_sales_per_sq_foot#532, feb_sales_per_sq_foot#533, mar_sales_per_sq_foot#534, apr_sales_per_sq_foot#535, may_sales_per_sq_foot#536, jun_sales_per_sq_foot#537, jul_sales_per_sq_foot#538, aug_sales_per_sq_foot#539, sep_sales_per_sq_foot#540, oct_sales_per_sq_foot#541, nov_sales_per_sq_foot#542, dec_sales_per_sq_foot#543, jan_net#544, feb_net#545, mar_net#546, apr_net#547, may_net#548, jun_net#549, jul_net#550, aug_net#551, sep_net#552, oct_net#553, nov_net#554, dec_net#555] +Input [44]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144, jan_sales#514, feb_sales#515, mar_sales#516, apr_sales#517, may_sales#518, jun_sales#519, jul_sales#520, aug_sales#521, sep_sales#522, oct_sales#523, nov_sales#524, dec_sales#525, jan_sales_per_sq_foot#526, feb_sales_per_sq_foot#527, mar_sales_per_sq_foot#528, apr_sales_per_sq_foot#529, may_sales_per_sq_foot#530, jun_sales_per_sq_foot#531, jul_sales_per_sq_foot#532, aug_sales_per_sq_foot#533, sep_sales_per_sq_foot#534, oct_sales_per_sq_foot#535, nov_sales_per_sq_foot#536, dec_sales_per_sq_foot#537, jan_net#538, feb_net#539, mar_net#540, apr_net#541, may_net#542, jun_net#543, jul_net#544, aug_net#545, sep_net#546, oct_net#547, nov_net#548, dec_net#549] +Arguments: 100, [w_warehouse_name#17 ASC NULLS FIRST], [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144, jan_sales#514, feb_sales#515, mar_sales#516, apr_sales#517, may_sales#518, jun_sales#519, jul_sales#520, aug_sales#521, sep_sales#522, oct_sales#523, nov_sales#524, dec_sales#525, jan_sales_per_sq_foot#526, feb_sales_per_sq_foot#527, mar_sales_per_sq_foot#528, apr_sales_per_sq_foot#529, may_sales_per_sq_foot#530, jun_sales_per_sq_foot#531, jul_sales_per_sq_foot#532, aug_sales_per_sq_foot#533, sep_sales_per_sq_foot#534, oct_sales_per_sq_foot#535, nov_sales_per_sq_foot#536, dec_sales_per_sq_foot#537, jan_net#538, feb_net#539, mar_net#540, apr_net#541, may_net#542, jun_net#543, jul_net#544, aug_net#545, sep_net#546, oct_net#547, nov_net#548, dec_net#549] ===== Subqueries ===== @@ -302,23 +302,23 @@ BroadcastExchange (56) (53) Scan parquet default.date_dim -Output [3]: [d_date_sk#15, d_year#16, d_moy#17] +Output [3]: [d_date_sk#13, d_year#14, d_moy#15] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (54) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#15, d_year#16, d_moy#17] +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] (55) Filter [codegen id : 1] -Input [3]: [d_date_sk#15, d_year#16, d_moy#17] -Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2001)) AND isnotnull(d_date_sk#13)) (56) BroadcastExchange -Input [3]: [d_date_sk#15, d_year#16, d_moy#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#556] +Input [3]: [d_date_sk#13, d_year#14, d_moy#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] -Subquery:2 Hosting operator id = 30 Hosting Expression = cs_sold_date_sk#179 IN dynamicpruning#8 +Subquery:2 Hosting operator id = 30 Hosting Expression = cs_sold_date_sk#175 IN dynamicpruning#8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt index f0b239a262c26..b2e7d4ee9a6e5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt @@ -84,7 +84,7 @@ Condition : isnotnull(w_warehouse_sk#9) (7) BroadcastExchange Input [7]: [w_warehouse_sk#9, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ws_warehouse_sk#3] @@ -96,201 +96,201 @@ Output [12]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_ Input [14]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_sk#9, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15] (10) ReusedExchange [Reuses operator id: 56] -Output [3]: [d_date_sk#17, d_year#18, d_moy#19] +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] (11) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ws_sold_date_sk#7] -Right keys [1]: [d_date_sk#17] +Right keys [1]: [d_date_sk#16] Join condition: None (12) Project [codegen id : 5] -Output [13]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18, d_moy#19] -Input [15]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_date_sk#17, d_year#18, d_moy#19] +Output [13]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17, d_moy#18] +Input [15]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_date_sk#16, d_year#17, d_moy#18] (13) Scan parquet default.time_dim -Output [2]: [t_time_sk#20, t_time#21] +Output [2]: [t_time_sk#19, t_time#20] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_time), GreaterThanOrEqual(t_time,30838), LessThanOrEqual(t_time,59638), IsNotNull(t_time_sk)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [2]: [t_time_sk#20, t_time#21] +Input [2]: [t_time_sk#19, t_time#20] (15) Filter [codegen id : 3] -Input [2]: [t_time_sk#20, t_time#21] -Condition : (((isnotnull(t_time#21) AND (t_time#21 >= 30838)) AND (t_time#21 <= 59638)) AND isnotnull(t_time_sk#20)) +Input [2]: [t_time_sk#19, t_time#20] +Condition : (((isnotnull(t_time#20) AND (t_time#20 >= 30838)) AND (t_time#20 <= 59638)) AND isnotnull(t_time_sk#19)) (16) Project [codegen id : 3] -Output [1]: [t_time_sk#20] -Input [2]: [t_time_sk#20, t_time#21] +Output [1]: [t_time_sk#19] +Input [2]: [t_time_sk#19, t_time#20] (17) BroadcastExchange -Input [1]: [t_time_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Input [1]: [t_time_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ws_sold_time_sk#1] -Right keys [1]: [t_time_sk#20] +Right keys [1]: [t_time_sk#19] Join condition: None (19) Project [codegen id : 5] -Output [12]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18, d_moy#19] -Input [14]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18, d_moy#19, t_time_sk#20] +Output [12]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17, d_moy#18] +Input [14]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17, d_moy#18, t_time_sk#19] (20) Scan parquet default.ship_mode -Output [2]: [sm_ship_mode_sk#23, sm_carrier#24] +Output [2]: [sm_ship_mode_sk#21, sm_carrier#22] Batched: true Location [not included in comparison]/{warehouse_dir}/ship_mode] PushedFilters: [In(sm_carrier, [BARIAN ,DHL ]), IsNotNull(sm_ship_mode_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 4] -Input [2]: [sm_ship_mode_sk#23, sm_carrier#24] +Input [2]: [sm_ship_mode_sk#21, sm_carrier#22] (22) Filter [codegen id : 4] -Input [2]: [sm_ship_mode_sk#23, sm_carrier#24] -Condition : (sm_carrier#24 IN (DHL ,BARIAN ) AND isnotnull(sm_ship_mode_sk#23)) +Input [2]: [sm_ship_mode_sk#21, sm_carrier#22] +Condition : (sm_carrier#22 IN (DHL ,BARIAN ) AND isnotnull(sm_ship_mode_sk#21)) (23) Project [codegen id : 4] -Output [1]: [sm_ship_mode_sk#23] -Input [2]: [sm_ship_mode_sk#23, sm_carrier#24] +Output [1]: [sm_ship_mode_sk#21] +Input [2]: [sm_ship_mode_sk#21, sm_carrier#22] (24) BroadcastExchange -Input [1]: [sm_ship_mode_sk#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [sm_ship_mode_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (25) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ws_ship_mode_sk#2] -Right keys [1]: [sm_ship_mode_sk#23] +Right keys [1]: [sm_ship_mode_sk#21] Join condition: None (26) Project [codegen id : 5] -Output [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18, d_moy#19] -Input [13]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18, d_moy#19, sm_ship_mode_sk#23] +Output [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17, d_moy#18] +Input [13]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17, d_moy#18, sm_ship_mode_sk#21] (27) HashAggregate [codegen id : 5] -Input [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18, d_moy#19] -Keys [7]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18] -Functions [24]: [partial_sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] -Aggregate Attributes [48]: [sum#26, isEmpty#27, sum#28, isEmpty#29, sum#30, isEmpty#31, sum#32, isEmpty#33, sum#34, isEmpty#35, sum#36, isEmpty#37, sum#38, isEmpty#39, sum#40, isEmpty#41, sum#42, isEmpty#43, sum#44, isEmpty#45, sum#46, isEmpty#47, sum#48, isEmpty#49, sum#50, isEmpty#51, sum#52, isEmpty#53, sum#54, isEmpty#55, sum#56, isEmpty#57, sum#58, isEmpty#59, sum#60, isEmpty#61, sum#62, isEmpty#63, sum#64, isEmpty#65, sum#66, isEmpty#67, sum#68, isEmpty#69, sum#70, isEmpty#71, sum#72, isEmpty#73] -Results [55]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] +Input [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17, d_moy#18] +Keys [7]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17] +Functions [24]: [partial_sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] +Aggregate Attributes [48]: [sum#23, isEmpty#24, sum#25, isEmpty#26, sum#27, isEmpty#28, sum#29, isEmpty#30, sum#31, isEmpty#32, sum#33, isEmpty#34, sum#35, isEmpty#36, sum#37, isEmpty#38, sum#39, isEmpty#40, sum#41, isEmpty#42, sum#43, isEmpty#44, sum#45, isEmpty#46, sum#47, isEmpty#48, sum#49, isEmpty#50, sum#51, isEmpty#52, sum#53, isEmpty#54, sum#55, isEmpty#56, sum#57, isEmpty#58, sum#59, isEmpty#60, sum#61, isEmpty#62, sum#63, isEmpty#64, sum#65, isEmpty#66, sum#67, isEmpty#68, sum#69, isEmpty#70] +Results [55]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17, sum#71, isEmpty#72, sum#73, isEmpty#74, sum#75, isEmpty#76, sum#77, isEmpty#78, sum#79, isEmpty#80, sum#81, isEmpty#82, sum#83, isEmpty#84, sum#85, isEmpty#86, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96, sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118] (28) Exchange -Input [55]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] -Arguments: hashpartitioning(w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18, 5), ENSURE_REQUIREMENTS, [id=#122] +Input [55]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17, sum#71, isEmpty#72, sum#73, isEmpty#74, sum#75, isEmpty#76, sum#77, isEmpty#78, sum#79, isEmpty#80, sum#81, isEmpty#82, sum#83, isEmpty#84, sum#85, isEmpty#86, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96, sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118] +Arguments: hashpartitioning(w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17, 5), ENSURE_REQUIREMENTS, [plan_id=4] (29) HashAggregate [codegen id : 6] -Input [55]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18, sum#74, isEmpty#75, sum#76, isEmpty#77, sum#78, isEmpty#79, sum#80, isEmpty#81, sum#82, isEmpty#83, sum#84, isEmpty#85, sum#86, isEmpty#87, sum#88, isEmpty#89, sum#90, isEmpty#91, sum#92, isEmpty#93, sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105, sum#106, isEmpty#107, sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121] -Keys [7]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#18] -Functions [24]: [sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] -Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#142, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#143, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#144, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#145, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#146] -Results [32]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, DHL,BARIAN AS ship_carriers#147, d_year#18 AS year#148, sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#123 AS jan_sales#149, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#124 AS feb_sales#150, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#125 AS mar_sales#151, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#126 AS apr_sales#152, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#127 AS may_sales#153, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#128 AS jun_sales#154, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#129 AS jul_sales#155, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#130 AS aug_sales#156, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#131 AS sep_sales#157, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#132 AS oct_sales#158, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#133 AS nov_sales#159, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#134 AS dec_sales#160, sum(CASE WHEN (d_moy#19 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#135 AS jan_net#161, sum(CASE WHEN (d_moy#19 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#136 AS feb_net#162, sum(CASE WHEN (d_moy#19 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#137 AS mar_net#163, sum(CASE WHEN (d_moy#19 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#138 AS apr_net#164, sum(CASE WHEN (d_moy#19 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#139 AS may_net#165, sum(CASE WHEN (d_moy#19 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#140 AS jun_net#166, sum(CASE WHEN (d_moy#19 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#141 AS jul_net#167, sum(CASE WHEN (d_moy#19 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#142 AS aug_net#168, sum(CASE WHEN (d_moy#19 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#143 AS sep_net#169, sum(CASE WHEN (d_moy#19 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#144 AS oct_net#170, sum(CASE WHEN (d_moy#19 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#145 AS nov_net#171, sum(CASE WHEN (d_moy#19 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#146 AS dec_net#172] +Input [55]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17, sum#71, isEmpty#72, sum#73, isEmpty#74, sum#75, isEmpty#76, sum#77, isEmpty#78, sum#79, isEmpty#80, sum#81, isEmpty#82, sum#83, isEmpty#84, sum#85, isEmpty#86, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96, sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118] +Keys [7]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17] +Functions [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] +Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#119, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#120, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#121, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#122, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#142] +Results [32]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, DHL,BARIAN AS ship_carriers#143, d_year#17 AS year#144, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#119 AS jan_sales#145, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#120 AS feb_sales#146, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#121 AS mar_sales#147, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#122 AS apr_sales#148, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#123 AS may_sales#149, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#124 AS jun_sales#150, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#125 AS jul_sales#151, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#126 AS aug_sales#152, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#127 AS sep_sales#153, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#128 AS oct_sales#154, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#129 AS nov_sales#155, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#130 AS dec_sales#156, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#131 AS jan_net#157, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#132 AS feb_net#158, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#133 AS mar_net#159, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#134 AS apr_net#160, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#135 AS may_net#161, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#136 AS jun_net#162, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#137 AS jul_net#163, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#138 AS aug_net#164, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#139 AS sep_net#165, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#140 AS oct_net#166, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#141 AS nov_net#167, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#142 AS dec_net#168] (30) Scan parquet default.catalog_sales -Output [7]: [cs_sold_time_sk#173, cs_ship_mode_sk#174, cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179] +Output [7]: [cs_sold_time_sk#169, cs_ship_mode_sk#170, cs_warehouse_sk#171, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, cs_sold_date_sk#175] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#179), dynamicpruningexpression(cs_sold_date_sk#179 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(cs_sold_date_sk#175), dynamicpruningexpression(cs_sold_date_sk#175 IN dynamicpruning#8)] PushedFilters: [IsNotNull(cs_warehouse_sk), IsNotNull(cs_sold_time_sk), IsNotNull(cs_ship_mode_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 11] -Input [7]: [cs_sold_time_sk#173, cs_ship_mode_sk#174, cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179] +Input [7]: [cs_sold_time_sk#169, cs_ship_mode_sk#170, cs_warehouse_sk#171, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, cs_sold_date_sk#175] (32) Filter [codegen id : 11] -Input [7]: [cs_sold_time_sk#173, cs_ship_mode_sk#174, cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179] -Condition : ((isnotnull(cs_warehouse_sk#175) AND isnotnull(cs_sold_time_sk#173)) AND isnotnull(cs_ship_mode_sk#174)) +Input [7]: [cs_sold_time_sk#169, cs_ship_mode_sk#170, cs_warehouse_sk#171, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, cs_sold_date_sk#175] +Condition : ((isnotnull(cs_warehouse_sk#171) AND isnotnull(cs_sold_time_sk#169)) AND isnotnull(cs_ship_mode_sk#170)) (33) ReusedExchange [Reuses operator id: 7] -Output [7]: [w_warehouse_sk#180, w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186] +Output [7]: [w_warehouse_sk#176, w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182] (34) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_warehouse_sk#175] -Right keys [1]: [w_warehouse_sk#180] +Left keys [1]: [cs_warehouse_sk#171] +Right keys [1]: [w_warehouse_sk#176] Join condition: None (35) Project [codegen id : 11] -Output [12]: [cs_sold_time_sk#173, cs_ship_mode_sk#174, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179, w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186] -Input [14]: [cs_sold_time_sk#173, cs_ship_mode_sk#174, cs_warehouse_sk#175, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179, w_warehouse_sk#180, w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186] +Output [12]: [cs_sold_time_sk#169, cs_ship_mode_sk#170, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, cs_sold_date_sk#175, w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182] +Input [14]: [cs_sold_time_sk#169, cs_ship_mode_sk#170, cs_warehouse_sk#171, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, cs_sold_date_sk#175, w_warehouse_sk#176, w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182] (36) ReusedExchange [Reuses operator id: 56] -Output [3]: [d_date_sk#187, d_year#188, d_moy#189] +Output [3]: [d_date_sk#183, d_year#184, d_moy#185] (37) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_sold_date_sk#179] -Right keys [1]: [d_date_sk#187] +Left keys [1]: [cs_sold_date_sk#175] +Right keys [1]: [d_date_sk#183] Join condition: None (38) Project [codegen id : 11] -Output [13]: [cs_sold_time_sk#173, cs_ship_mode_sk#174, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188, d_moy#189] -Input [15]: [cs_sold_time_sk#173, cs_ship_mode_sk#174, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, cs_sold_date_sk#179, w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_date_sk#187, d_year#188, d_moy#189] +Output [13]: [cs_sold_time_sk#169, cs_ship_mode_sk#170, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_year#184, d_moy#185] +Input [15]: [cs_sold_time_sk#169, cs_ship_mode_sk#170, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, cs_sold_date_sk#175, w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_date_sk#183, d_year#184, d_moy#185] (39) ReusedExchange [Reuses operator id: 17] -Output [1]: [t_time_sk#190] +Output [1]: [t_time_sk#186] (40) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_sold_time_sk#173] -Right keys [1]: [t_time_sk#190] +Left keys [1]: [cs_sold_time_sk#169] +Right keys [1]: [t_time_sk#186] Join condition: None (41) Project [codegen id : 11] -Output [12]: [cs_ship_mode_sk#174, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188, d_moy#189] -Input [14]: [cs_sold_time_sk#173, cs_ship_mode_sk#174, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188, d_moy#189, t_time_sk#190] +Output [12]: [cs_ship_mode_sk#170, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_year#184, d_moy#185] +Input [14]: [cs_sold_time_sk#169, cs_ship_mode_sk#170, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_year#184, d_moy#185, t_time_sk#186] (42) ReusedExchange [Reuses operator id: 24] -Output [1]: [sm_ship_mode_sk#191] +Output [1]: [sm_ship_mode_sk#187] (43) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [cs_ship_mode_sk#174] -Right keys [1]: [sm_ship_mode_sk#191] +Left keys [1]: [cs_ship_mode_sk#170] +Right keys [1]: [sm_ship_mode_sk#187] Join condition: None (44) Project [codegen id : 11] -Output [11]: [cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188, d_moy#189] -Input [13]: [cs_ship_mode_sk#174, cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188, d_moy#189, sm_ship_mode_sk#191] +Output [11]: [cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_year#184, d_moy#185] +Input [13]: [cs_ship_mode_sk#170, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_year#184, d_moy#185, sm_ship_mode_sk#187] (45) HashAggregate [codegen id : 11] -Input [11]: [cs_quantity#176, cs_sales_price#177, cs_net_paid_inc_tax#178, w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188, d_moy#189] -Keys [7]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188] -Functions [24]: [partial_sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] -Aggregate Attributes [48]: [sum#192, isEmpty#193, sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199, sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205, sum#206, isEmpty#207, sum#208, isEmpty#209, sum#210, isEmpty#211, sum#212, isEmpty#213, sum#214, isEmpty#215, sum#216, isEmpty#217, sum#218, isEmpty#219, sum#220, isEmpty#221, sum#222, isEmpty#223, sum#224, isEmpty#225, sum#226, isEmpty#227, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235, sum#236, isEmpty#237, sum#238, isEmpty#239] -Results [55]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283, sum#284, isEmpty#285, sum#286, isEmpty#287] +Input [11]: [cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_year#184, d_moy#185] +Keys [7]: [w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_year#184] +Functions [24]: [partial_sum(CASE WHEN (d_moy#185 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] +Aggregate Attributes [48]: [sum#188, isEmpty#189, sum#190, isEmpty#191, sum#192, isEmpty#193, sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199, sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205, sum#206, isEmpty#207, sum#208, isEmpty#209, sum#210, isEmpty#211, sum#212, isEmpty#213, sum#214, isEmpty#215, sum#216, isEmpty#217, sum#218, isEmpty#219, sum#220, isEmpty#221, sum#222, isEmpty#223, sum#224, isEmpty#225, sum#226, isEmpty#227, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235] +Results [55]: [w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_year#184, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283] (46) Exchange -Input [55]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283, sum#284, isEmpty#285, sum#286, isEmpty#287] -Arguments: hashpartitioning(w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188, 5), ENSURE_REQUIREMENTS, [id=#288] +Input [55]: [w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_year#184, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283] +Arguments: hashpartitioning(w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_year#184, 5), ENSURE_REQUIREMENTS, [plan_id=5] (47) HashAggregate [codegen id : 12] -Input [55]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283, sum#284, isEmpty#285, sum#286, isEmpty#287] -Keys [7]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, d_year#188] -Functions [24]: [sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] -Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#306, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#307, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#308, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#309, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#310, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#311, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#312] -Results [32]: [w_warehouse_name#181, w_warehouse_sq_ft#182, w_city#183, w_county#184, w_state#185, w_country#186, DHL,BARIAN AS ship_carriers#313, d_year#188 AS year#314, sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#289 AS jan_sales#315, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#290 AS feb_sales#316, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#291 AS mar_sales#317, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#292 AS apr_sales#318, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#293 AS may_sales#319, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#294 AS jun_sales#320, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#295 AS jul_sales#321, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#296 AS aug_sales#322, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#297 AS sep_sales#323, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#298 AS oct_sales#324, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#299 AS nov_sales#325, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#177 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#300 AS dec_sales#326, sum(CASE WHEN (d_moy#189 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#301 AS jan_net#327, sum(CASE WHEN (d_moy#189 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#302 AS feb_net#328, sum(CASE WHEN (d_moy#189 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#303 AS mar_net#329, sum(CASE WHEN (d_moy#189 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#304 AS apr_net#330, sum(CASE WHEN (d_moy#189 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#305 AS may_net#331, sum(CASE WHEN (d_moy#189 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#306 AS jun_net#332, sum(CASE WHEN (d_moy#189 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#307 AS jul_net#333, sum(CASE WHEN (d_moy#189 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#308 AS aug_net#334, sum(CASE WHEN (d_moy#189 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#309 AS sep_net#335, sum(CASE WHEN (d_moy#189 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#310 AS oct_net#336, sum(CASE WHEN (d_moy#189 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#311 AS nov_net#337, sum(CASE WHEN (d_moy#189 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#178 as decimal(12,2))) * promote_precision(cast(cs_quantity#176 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#312 AS dec_net#338] +Input [55]: [w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_year#184, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283] +Keys [7]: [w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_year#184] +Functions [24]: [sum(CASE WHEN (d_moy#185 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)] +Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#185 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#284, sum(CASE WHEN (d_moy#185 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#285, sum(CASE WHEN (d_moy#185 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#286, sum(CASE WHEN (d_moy#185 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#287, sum(CASE WHEN (d_moy#185 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#288, sum(CASE WHEN (d_moy#185 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#185 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#185 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#185 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#185 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#185 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#185 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#185 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#185 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#185 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#185 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#185 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#185 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#185 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#185 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#185 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#185 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#185 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#306, sum(CASE WHEN (d_moy#185 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#307] +Results [32]: [w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, DHL,BARIAN AS ship_carriers#308, d_year#184 AS year#309, sum(CASE WHEN (d_moy#185 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#284 AS jan_sales#310, sum(CASE WHEN (d_moy#185 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#285 AS feb_sales#311, sum(CASE WHEN (d_moy#185 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#286 AS mar_sales#312, sum(CASE WHEN (d_moy#185 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#287 AS apr_sales#313, sum(CASE WHEN (d_moy#185 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#288 AS may_sales#314, sum(CASE WHEN (d_moy#185 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#289 AS jun_sales#315, sum(CASE WHEN (d_moy#185 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#290 AS jul_sales#316, sum(CASE WHEN (d_moy#185 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#291 AS aug_sales#317, sum(CASE WHEN (d_moy#185 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#292 AS sep_sales#318, sum(CASE WHEN (d_moy#185 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#293 AS oct_sales#319, sum(CASE WHEN (d_moy#185 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#294 AS nov_sales#320, sum(CASE WHEN (d_moy#185 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#295 AS dec_sales#321, sum(CASE WHEN (d_moy#185 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#296 AS jan_net#322, sum(CASE WHEN (d_moy#185 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#297 AS feb_net#323, sum(CASE WHEN (d_moy#185 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#298 AS mar_net#324, sum(CASE WHEN (d_moy#185 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#299 AS apr_net#325, sum(CASE WHEN (d_moy#185 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#300 AS may_net#326, sum(CASE WHEN (d_moy#185 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#301 AS jun_net#327, sum(CASE WHEN (d_moy#185 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#302 AS jul_net#328, sum(CASE WHEN (d_moy#185 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#303 AS aug_net#329, sum(CASE WHEN (d_moy#185 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#304 AS sep_net#330, sum(CASE WHEN (d_moy#185 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#305 AS oct_net#331, sum(CASE WHEN (d_moy#185 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#306 AS nov_net#332, sum(CASE WHEN (d_moy#185 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#307 AS dec_net#333] (48) Union (49) HashAggregate [codegen id : 13] -Input [32]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, jan_sales#149, feb_sales#150, mar_sales#151, apr_sales#152, may_sales#153, jun_sales#154, jul_sales#155, aug_sales#156, sep_sales#157, oct_sales#158, nov_sales#159, dec_sales#160, jan_net#161, feb_net#162, mar_net#163, apr_net#164, may_net#165, jun_net#166, jul_net#167, aug_net#168, sep_net#169, oct_net#170, nov_net#171, dec_net#172] -Keys [8]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148] -Functions [36]: [partial_sum(jan_sales#149), partial_sum(feb_sales#150), partial_sum(mar_sales#151), partial_sum(apr_sales#152), partial_sum(may_sales#153), partial_sum(jun_sales#154), partial_sum(jul_sales#155), partial_sum(aug_sales#156), partial_sum(sep_sales#157), partial_sum(oct_sales#158), partial_sum(nov_sales#159), partial_sum(dec_sales#160), partial_sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(jan_net#161), partial_sum(feb_net#162), partial_sum(mar_net#163), partial_sum(apr_net#164), partial_sum(may_net#165), partial_sum(jun_net#166), partial_sum(jul_net#167), partial_sum(aug_net#168), partial_sum(sep_net#169), partial_sum(oct_net#170), partial_sum(nov_net#171), partial_sum(dec_net#172)] -Aggregate Attributes [72]: [sum#339, isEmpty#340, sum#341, isEmpty#342, sum#343, isEmpty#344, sum#345, isEmpty#346, sum#347, isEmpty#348, sum#349, isEmpty#350, sum#351, isEmpty#352, sum#353, isEmpty#354, sum#355, isEmpty#356, sum#357, isEmpty#358, sum#359, isEmpty#360, sum#361, isEmpty#362, sum#363, isEmpty#364, sum#365, isEmpty#366, sum#367, isEmpty#368, sum#369, isEmpty#370, sum#371, isEmpty#372, sum#373, isEmpty#374, sum#375, isEmpty#376, sum#377, isEmpty#378, sum#379, isEmpty#380, sum#381, isEmpty#382, sum#383, isEmpty#384, sum#385, isEmpty#386, sum#387, isEmpty#388, sum#389, isEmpty#390, sum#391, isEmpty#392, sum#393, isEmpty#394, sum#395, isEmpty#396, sum#397, isEmpty#398, sum#399, isEmpty#400, sum#401, isEmpty#402, sum#403, isEmpty#404, sum#405, isEmpty#406, sum#407, isEmpty#408, sum#409, isEmpty#410] -Results [80]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476, sum#477, isEmpty#478, sum#479, isEmpty#480, sum#481, isEmpty#482] +Input [32]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144, jan_sales#145, feb_sales#146, mar_sales#147, apr_sales#148, may_sales#149, jun_sales#150, jul_sales#151, aug_sales#152, sep_sales#153, oct_sales#154, nov_sales#155, dec_sales#156, jan_net#157, feb_net#158, mar_net#159, apr_net#160, may_net#161, jun_net#162, jul_net#163, aug_net#164, sep_net#165, oct_net#166, nov_net#167, dec_net#168] +Keys [8]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144] +Functions [36]: [partial_sum(jan_sales#145), partial_sum(feb_sales#146), partial_sum(mar_sales#147), partial_sum(apr_sales#148), partial_sum(may_sales#149), partial_sum(jun_sales#150), partial_sum(jul_sales#151), partial_sum(aug_sales#152), partial_sum(sep_sales#153), partial_sum(oct_sales#154), partial_sum(nov_sales#155), partial_sum(dec_sales#156), partial_sum(CheckOverflow((promote_precision(jan_sales#145) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(feb_sales#146) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(mar_sales#147) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(apr_sales#148) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(may_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jun_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jul_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(aug_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(sep_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(oct_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(nov_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(dec_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(jan_net#157), partial_sum(feb_net#158), partial_sum(mar_net#159), partial_sum(apr_net#160), partial_sum(may_net#161), partial_sum(jun_net#162), partial_sum(jul_net#163), partial_sum(aug_net#164), partial_sum(sep_net#165), partial_sum(oct_net#166), partial_sum(nov_net#167), partial_sum(dec_net#168)] +Aggregate Attributes [72]: [sum#334, isEmpty#335, sum#336, isEmpty#337, sum#338, isEmpty#339, sum#340, isEmpty#341, sum#342, isEmpty#343, sum#344, isEmpty#345, sum#346, isEmpty#347, sum#348, isEmpty#349, sum#350, isEmpty#351, sum#352, isEmpty#353, sum#354, isEmpty#355, sum#356, isEmpty#357, sum#358, isEmpty#359, sum#360, isEmpty#361, sum#362, isEmpty#363, sum#364, isEmpty#365, sum#366, isEmpty#367, sum#368, isEmpty#369, sum#370, isEmpty#371, sum#372, isEmpty#373, sum#374, isEmpty#375, sum#376, isEmpty#377, sum#378, isEmpty#379, sum#380, isEmpty#381, sum#382, isEmpty#383, sum#384, isEmpty#385, sum#386, isEmpty#387, sum#388, isEmpty#389, sum#390, isEmpty#391, sum#392, isEmpty#393, sum#394, isEmpty#395, sum#396, isEmpty#397, sum#398, isEmpty#399, sum#400, isEmpty#401, sum#402, isEmpty#403, sum#404, isEmpty#405] +Results [80]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144, sum#406, isEmpty#407, sum#408, isEmpty#409, sum#410, isEmpty#411, sum#412, isEmpty#413, sum#414, isEmpty#415, sum#416, isEmpty#417, sum#418, isEmpty#419, sum#420, isEmpty#421, sum#422, isEmpty#423, sum#424, isEmpty#425, sum#426, isEmpty#427, sum#428, isEmpty#429, sum#430, isEmpty#431, sum#432, isEmpty#433, sum#434, isEmpty#435, sum#436, isEmpty#437, sum#438, isEmpty#439, sum#440, isEmpty#441, sum#442, isEmpty#443, sum#444, isEmpty#445, sum#446, isEmpty#447, sum#448, isEmpty#449, sum#450, isEmpty#451, sum#452, isEmpty#453, sum#454, isEmpty#455, sum#456, isEmpty#457, sum#458, isEmpty#459, sum#460, isEmpty#461, sum#462, isEmpty#463, sum#464, isEmpty#465, sum#466, isEmpty#467, sum#468, isEmpty#469, sum#470, isEmpty#471, sum#472, isEmpty#473, sum#474, isEmpty#475, sum#476, isEmpty#477] (50) Exchange -Input [80]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476, sum#477, isEmpty#478, sum#479, isEmpty#480, sum#481, isEmpty#482] -Arguments: hashpartitioning(w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, 5), ENSURE_REQUIREMENTS, [id=#483] +Input [80]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144, sum#406, isEmpty#407, sum#408, isEmpty#409, sum#410, isEmpty#411, sum#412, isEmpty#413, sum#414, isEmpty#415, sum#416, isEmpty#417, sum#418, isEmpty#419, sum#420, isEmpty#421, sum#422, isEmpty#423, sum#424, isEmpty#425, sum#426, isEmpty#427, sum#428, isEmpty#429, sum#430, isEmpty#431, sum#432, isEmpty#433, sum#434, isEmpty#435, sum#436, isEmpty#437, sum#438, isEmpty#439, sum#440, isEmpty#441, sum#442, isEmpty#443, sum#444, isEmpty#445, sum#446, isEmpty#447, sum#448, isEmpty#449, sum#450, isEmpty#451, sum#452, isEmpty#453, sum#454, isEmpty#455, sum#456, isEmpty#457, sum#458, isEmpty#459, sum#460, isEmpty#461, sum#462, isEmpty#463, sum#464, isEmpty#465, sum#466, isEmpty#467, sum#468, isEmpty#469, sum#470, isEmpty#471, sum#472, isEmpty#473, sum#474, isEmpty#475, sum#476, isEmpty#477] +Arguments: hashpartitioning(w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144, 5), ENSURE_REQUIREMENTS, [plan_id=6] (51) HashAggregate [codegen id : 14] -Input [80]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, sum#411, isEmpty#412, sum#413, isEmpty#414, sum#415, isEmpty#416, sum#417, isEmpty#418, sum#419, isEmpty#420, sum#421, isEmpty#422, sum#423, isEmpty#424, sum#425, isEmpty#426, sum#427, isEmpty#428, sum#429, isEmpty#430, sum#431, isEmpty#432, sum#433, isEmpty#434, sum#435, isEmpty#436, sum#437, isEmpty#438, sum#439, isEmpty#440, sum#441, isEmpty#442, sum#443, isEmpty#444, sum#445, isEmpty#446, sum#447, isEmpty#448, sum#449, isEmpty#450, sum#451, isEmpty#452, sum#453, isEmpty#454, sum#455, isEmpty#456, sum#457, isEmpty#458, sum#459, isEmpty#460, sum#461, isEmpty#462, sum#463, isEmpty#464, sum#465, isEmpty#466, sum#467, isEmpty#468, sum#469, isEmpty#470, sum#471, isEmpty#472, sum#473, isEmpty#474, sum#475, isEmpty#476, sum#477, isEmpty#478, sum#479, isEmpty#480, sum#481, isEmpty#482] -Keys [8]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148] -Functions [36]: [sum(jan_sales#149), sum(feb_sales#150), sum(mar_sales#151), sum(apr_sales#152), sum(may_sales#153), sum(jun_sales#154), sum(jul_sales#155), sum(aug_sales#156), sum(sep_sales#157), sum(oct_sales#158), sum(nov_sales#159), sum(dec_sales#160), sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(jan_net#161), sum(feb_net#162), sum(mar_net#163), sum(apr_net#164), sum(may_net#165), sum(jun_net#166), sum(jul_net#167), sum(aug_net#168), sum(sep_net#169), sum(oct_net#170), sum(nov_net#171), sum(dec_net#172)] -Aggregate Attributes [36]: [sum(jan_sales#149)#484, sum(feb_sales#150)#485, sum(mar_sales#151)#486, sum(apr_sales#152)#487, sum(may_sales#153)#488, sum(jun_sales#154)#489, sum(jul_sales#155)#490, sum(aug_sales#156)#491, sum(sep_sales#157)#492, sum(oct_sales#158)#493, sum(nov_sales#159)#494, sum(dec_sales#160)#495, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#496, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#497, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#498, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#499, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#500, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#501, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#502, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#503, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#504, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#505, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#506, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#507, sum(jan_net#161)#508, sum(feb_net#162)#509, sum(mar_net#163)#510, sum(apr_net#164)#511, sum(may_net#165)#512, sum(jun_net#166)#513, sum(jul_net#167)#514, sum(aug_net#168)#515, sum(sep_net#169)#516, sum(oct_net#170)#517, sum(nov_net#171)#518, sum(dec_net#172)#519] -Results [44]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, sum(jan_sales#149)#484 AS jan_sales#520, sum(feb_sales#150)#485 AS feb_sales#521, sum(mar_sales#151)#486 AS mar_sales#522, sum(apr_sales#152)#487 AS apr_sales#523, sum(may_sales#153)#488 AS may_sales#524, sum(jun_sales#154)#489 AS jun_sales#525, sum(jul_sales#155)#490 AS jul_sales#526, sum(aug_sales#156)#491 AS aug_sales#527, sum(sep_sales#157)#492 AS sep_sales#528, sum(oct_sales#158)#493 AS oct_sales#529, sum(nov_sales#159)#494 AS nov_sales#530, sum(dec_sales#160)#495 AS dec_sales#531, sum(CheckOverflow((promote_precision(jan_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#496 AS jan_sales_per_sq_foot#532, sum(CheckOverflow((promote_precision(feb_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#497 AS feb_sales_per_sq_foot#533, sum(CheckOverflow((promote_precision(mar_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#498 AS mar_sales_per_sq_foot#534, sum(CheckOverflow((promote_precision(apr_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#499 AS apr_sales_per_sq_foot#535, sum(CheckOverflow((promote_precision(may_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#500 AS may_sales_per_sq_foot#536, sum(CheckOverflow((promote_precision(jun_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#501 AS jun_sales_per_sq_foot#537, sum(CheckOverflow((promote_precision(jul_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#502 AS jul_sales_per_sq_foot#538, sum(CheckOverflow((promote_precision(aug_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#503 AS aug_sales_per_sq_foot#539, sum(CheckOverflow((promote_precision(sep_sales#157) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#504 AS sep_sales_per_sq_foot#540, sum(CheckOverflow((promote_precision(oct_sales#158) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#505 AS oct_sales_per_sq_foot#541, sum(CheckOverflow((promote_precision(nov_sales#159) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#506 AS nov_sales_per_sq_foot#542, sum(CheckOverflow((promote_precision(dec_sales#160) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#507 AS dec_sales_per_sq_foot#543, sum(jan_net#161)#508 AS jan_net#544, sum(feb_net#162)#509 AS feb_net#545, sum(mar_net#163)#510 AS mar_net#546, sum(apr_net#164)#511 AS apr_net#547, sum(may_net#165)#512 AS may_net#548, sum(jun_net#166)#513 AS jun_net#549, sum(jul_net#167)#514 AS jul_net#550, sum(aug_net#168)#515 AS aug_net#551, sum(sep_net#169)#516 AS sep_net#552, sum(oct_net#170)#517 AS oct_net#553, sum(nov_net#171)#518 AS nov_net#554, sum(dec_net#172)#519 AS dec_net#555] +Input [80]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144, sum#406, isEmpty#407, sum#408, isEmpty#409, sum#410, isEmpty#411, sum#412, isEmpty#413, sum#414, isEmpty#415, sum#416, isEmpty#417, sum#418, isEmpty#419, sum#420, isEmpty#421, sum#422, isEmpty#423, sum#424, isEmpty#425, sum#426, isEmpty#427, sum#428, isEmpty#429, sum#430, isEmpty#431, sum#432, isEmpty#433, sum#434, isEmpty#435, sum#436, isEmpty#437, sum#438, isEmpty#439, sum#440, isEmpty#441, sum#442, isEmpty#443, sum#444, isEmpty#445, sum#446, isEmpty#447, sum#448, isEmpty#449, sum#450, isEmpty#451, sum#452, isEmpty#453, sum#454, isEmpty#455, sum#456, isEmpty#457, sum#458, isEmpty#459, sum#460, isEmpty#461, sum#462, isEmpty#463, sum#464, isEmpty#465, sum#466, isEmpty#467, sum#468, isEmpty#469, sum#470, isEmpty#471, sum#472, isEmpty#473, sum#474, isEmpty#475, sum#476, isEmpty#477] +Keys [8]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144] +Functions [36]: [sum(jan_sales#145), sum(feb_sales#146), sum(mar_sales#147), sum(apr_sales#148), sum(may_sales#149), sum(jun_sales#150), sum(jul_sales#151), sum(aug_sales#152), sum(sep_sales#153), sum(oct_sales#154), sum(nov_sales#155), sum(dec_sales#156), sum(CheckOverflow((promote_precision(jan_sales#145) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(feb_sales#146) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(mar_sales#147) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(apr_sales#148) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(may_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jun_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jul_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(aug_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(sep_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(oct_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(nov_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(dec_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(jan_net#157), sum(feb_net#158), sum(mar_net#159), sum(apr_net#160), sum(may_net#161), sum(jun_net#162), sum(jul_net#163), sum(aug_net#164), sum(sep_net#165), sum(oct_net#166), sum(nov_net#167), sum(dec_net#168)] +Aggregate Attributes [36]: [sum(jan_sales#145)#478, sum(feb_sales#146)#479, sum(mar_sales#147)#480, sum(apr_sales#148)#481, sum(may_sales#149)#482, sum(jun_sales#150)#483, sum(jul_sales#151)#484, sum(aug_sales#152)#485, sum(sep_sales#153)#486, sum(oct_sales#154)#487, sum(nov_sales#155)#488, sum(dec_sales#156)#489, sum(CheckOverflow((promote_precision(jan_sales#145) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#490, sum(CheckOverflow((promote_precision(feb_sales#146) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#491, sum(CheckOverflow((promote_precision(mar_sales#147) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#492, sum(CheckOverflow((promote_precision(apr_sales#148) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#493, sum(CheckOverflow((promote_precision(may_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#494, sum(CheckOverflow((promote_precision(jun_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#495, sum(CheckOverflow((promote_precision(jul_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#496, sum(CheckOverflow((promote_precision(aug_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#497, sum(CheckOverflow((promote_precision(sep_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#498, sum(CheckOverflow((promote_precision(oct_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#499, sum(CheckOverflow((promote_precision(nov_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#500, sum(CheckOverflow((promote_precision(dec_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#501, sum(jan_net#157)#502, sum(feb_net#158)#503, sum(mar_net#159)#504, sum(apr_net#160)#505, sum(may_net#161)#506, sum(jun_net#162)#507, sum(jul_net#163)#508, sum(aug_net#164)#509, sum(sep_net#165)#510, sum(oct_net#166)#511, sum(nov_net#167)#512, sum(dec_net#168)#513] +Results [44]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144, sum(jan_sales#145)#478 AS jan_sales#514, sum(feb_sales#146)#479 AS feb_sales#515, sum(mar_sales#147)#480 AS mar_sales#516, sum(apr_sales#148)#481 AS apr_sales#517, sum(may_sales#149)#482 AS may_sales#518, sum(jun_sales#150)#483 AS jun_sales#519, sum(jul_sales#151)#484 AS jul_sales#520, sum(aug_sales#152)#485 AS aug_sales#521, sum(sep_sales#153)#486 AS sep_sales#522, sum(oct_sales#154)#487 AS oct_sales#523, sum(nov_sales#155)#488 AS nov_sales#524, sum(dec_sales#156)#489 AS dec_sales#525, sum(CheckOverflow((promote_precision(jan_sales#145) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#490 AS jan_sales_per_sq_foot#526, sum(CheckOverflow((promote_precision(feb_sales#146) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#491 AS feb_sales_per_sq_foot#527, sum(CheckOverflow((promote_precision(mar_sales#147) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#492 AS mar_sales_per_sq_foot#528, sum(CheckOverflow((promote_precision(apr_sales#148) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#493 AS apr_sales_per_sq_foot#529, sum(CheckOverflow((promote_precision(may_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#494 AS may_sales_per_sq_foot#530, sum(CheckOverflow((promote_precision(jun_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#495 AS jun_sales_per_sq_foot#531, sum(CheckOverflow((promote_precision(jul_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#496 AS jul_sales_per_sq_foot#532, sum(CheckOverflow((promote_precision(aug_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#497 AS aug_sales_per_sq_foot#533, sum(CheckOverflow((promote_precision(sep_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#498 AS sep_sales_per_sq_foot#534, sum(CheckOverflow((promote_precision(oct_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#499 AS oct_sales_per_sq_foot#535, sum(CheckOverflow((promote_precision(nov_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#500 AS nov_sales_per_sq_foot#536, sum(CheckOverflow((promote_precision(dec_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#501 AS dec_sales_per_sq_foot#537, sum(jan_net#157)#502 AS jan_net#538, sum(feb_net#158)#503 AS feb_net#539, sum(mar_net#159)#504 AS mar_net#540, sum(apr_net#160)#505 AS apr_net#541, sum(may_net#161)#506 AS may_net#542, sum(jun_net#162)#507 AS jun_net#543, sum(jul_net#163)#508 AS jul_net#544, sum(aug_net#164)#509 AS aug_net#545, sum(sep_net#165)#510 AS sep_net#546, sum(oct_net#166)#511 AS oct_net#547, sum(nov_net#167)#512 AS nov_net#548, sum(dec_net#168)#513 AS dec_net#549] (52) TakeOrderedAndProject -Input [44]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, jan_sales#520, feb_sales#521, mar_sales#522, apr_sales#523, may_sales#524, jun_sales#525, jul_sales#526, aug_sales#527, sep_sales#528, oct_sales#529, nov_sales#530, dec_sales#531, jan_sales_per_sq_foot#532, feb_sales_per_sq_foot#533, mar_sales_per_sq_foot#534, apr_sales_per_sq_foot#535, may_sales_per_sq_foot#536, jun_sales_per_sq_foot#537, jul_sales_per_sq_foot#538, aug_sales_per_sq_foot#539, sep_sales_per_sq_foot#540, oct_sales_per_sq_foot#541, nov_sales_per_sq_foot#542, dec_sales_per_sq_foot#543, jan_net#544, feb_net#545, mar_net#546, apr_net#547, may_net#548, jun_net#549, jul_net#550, aug_net#551, sep_net#552, oct_net#553, nov_net#554, dec_net#555] -Arguments: 100, [w_warehouse_name#10 ASC NULLS FIRST], [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#147, year#148, jan_sales#520, feb_sales#521, mar_sales#522, apr_sales#523, may_sales#524, jun_sales#525, jul_sales#526, aug_sales#527, sep_sales#528, oct_sales#529, nov_sales#530, dec_sales#531, jan_sales_per_sq_foot#532, feb_sales_per_sq_foot#533, mar_sales_per_sq_foot#534, apr_sales_per_sq_foot#535, may_sales_per_sq_foot#536, jun_sales_per_sq_foot#537, jul_sales_per_sq_foot#538, aug_sales_per_sq_foot#539, sep_sales_per_sq_foot#540, oct_sales_per_sq_foot#541, nov_sales_per_sq_foot#542, dec_sales_per_sq_foot#543, jan_net#544, feb_net#545, mar_net#546, apr_net#547, may_net#548, jun_net#549, jul_net#550, aug_net#551, sep_net#552, oct_net#553, nov_net#554, dec_net#555] +Input [44]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144, jan_sales#514, feb_sales#515, mar_sales#516, apr_sales#517, may_sales#518, jun_sales#519, jul_sales#520, aug_sales#521, sep_sales#522, oct_sales#523, nov_sales#524, dec_sales#525, jan_sales_per_sq_foot#526, feb_sales_per_sq_foot#527, mar_sales_per_sq_foot#528, apr_sales_per_sq_foot#529, may_sales_per_sq_foot#530, jun_sales_per_sq_foot#531, jul_sales_per_sq_foot#532, aug_sales_per_sq_foot#533, sep_sales_per_sq_foot#534, oct_sales_per_sq_foot#535, nov_sales_per_sq_foot#536, dec_sales_per_sq_foot#537, jan_net#538, feb_net#539, mar_net#540, apr_net#541, may_net#542, jun_net#543, jul_net#544, aug_net#545, sep_net#546, oct_net#547, nov_net#548, dec_net#549] +Arguments: 100, [w_warehouse_name#10 ASC NULLS FIRST], [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144, jan_sales#514, feb_sales#515, mar_sales#516, apr_sales#517, may_sales#518, jun_sales#519, jul_sales#520, aug_sales#521, sep_sales#522, oct_sales#523, nov_sales#524, dec_sales#525, jan_sales_per_sq_foot#526, feb_sales_per_sq_foot#527, mar_sales_per_sq_foot#528, apr_sales_per_sq_foot#529, may_sales_per_sq_foot#530, jun_sales_per_sq_foot#531, jul_sales_per_sq_foot#532, aug_sales_per_sq_foot#533, sep_sales_per_sq_foot#534, oct_sales_per_sq_foot#535, nov_sales_per_sq_foot#536, dec_sales_per_sq_foot#537, jan_net#538, feb_net#539, mar_net#540, apr_net#541, may_net#542, jun_net#543, jul_net#544, aug_net#545, sep_net#546, oct_net#547, nov_net#548, dec_net#549] ===== Subqueries ===== @@ -302,23 +302,23 @@ BroadcastExchange (56) (53) Scan parquet default.date_dim -Output [3]: [d_date_sk#17, d_year#18, d_moy#19] +Output [3]: [d_date_sk#16, d_year#17, d_moy#18] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (54) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#17, d_year#18, d_moy#19] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] (55) Filter [codegen id : 1] -Input [3]: [d_date_sk#17, d_year#18, d_moy#19] -Condition : ((isnotnull(d_year#18) AND (d_year#18 = 2001)) AND isnotnull(d_date_sk#17)) +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Condition : ((isnotnull(d_year#17) AND (d_year#17 = 2001)) AND isnotnull(d_date_sk#16)) (56) BroadcastExchange -Input [3]: [d_date_sk#17, d_year#18, d_moy#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#556] +Input [3]: [d_date_sk#16, d_year#17, d_moy#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] -Subquery:2 Hosting operator id = 30 Hosting Expression = cs_sold_date_sk#179 IN dynamicpruning#8 +Subquery:2 Hosting operator id = 30 Hosting Expression = cs_sold_date_sk#175 IN dynamicpruning#8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt index 5a6c73dbe6a98..e9b22c379e57d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt @@ -74,7 +74,7 @@ Condition : isnotnull(s_store_sk#11) (10) BroadcastExchange Input [2]: [s_store_sk#11, s_store_id#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_store_sk#2] @@ -87,84 +87,84 @@ Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year (13) Exchange Input [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#14] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (14) Sort [codegen id : 4] Input [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (15) Scan parquet default.item -Output [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] +Output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (16) ColumnarToRow [codegen id : 5] -Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] (17) Filter [codegen id : 5] -Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] -Condition : isnotnull(i_item_sk#15) +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Condition : isnotnull(i_item_sk#13) (18) Exchange -Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] -Arguments: hashpartitioning(i_item_sk#15, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: hashpartitioning(i_item_sk#13, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) Sort [codegen id : 6] -Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] -Arguments: [i_item_sk#15 ASC NULLS FIRST], false, 0 +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [i_item_sk#13 ASC NULLS FIRST], false, 0 (20) SortMergeJoin [codegen id : 7] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#15] +Right keys [1]: [i_item_sk#13] Join condition: None (21) Project [codegen id : 7] -Output [10]: [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] -Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] +Output [10]: [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] (22) Expand [codegen id : 7] -Input [10]: [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] -Arguments: [[ss_quantity#3, ss_sales_price#4, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, 0], [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, null, 1], [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, null, null, 3], [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, null, null, null, 7], [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#17, i_brand#16, i_product_name#19, null, null, null, null, 15], [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#17, i_brand#16, null, null, null, null, null, 31], [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#17, null, null, null, null, null, null, 63], [ss_quantity#3, ss_sales_price#4, i_category#18, null, null, null, null, null, null, null, 127], [ss_quantity#3, ss_sales_price#4, null, null, null, null, null, null, null, null, 255]], [ss_quantity#3, ss_sales_price#4, i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29] +Input [10]: [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Arguments: [[ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, 0], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, null, 1], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, null, null, 3], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, null, null, null, 7], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, null, null, null, null, 15], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, null, null, null, null, null, 31], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, null, null, null, null, null, null, 63], [ss_quantity#3, ss_sales_price#4, i_category#16, null, null, null, null, null, null, null, 127], [ss_quantity#3, ss_sales_price#4, null, null, null, null, null, null, null, null, 255]], [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] (23) HashAggregate [codegen id : 7] -Input [11]: [ss_quantity#3, ss_sales_price#4, i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29] -Keys [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29] +Input [11]: [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] +Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [2]: [sum#30, isEmpty#31] -Results [11]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, sum#32, isEmpty#33] +Aggregate Attributes [2]: [sum#27, isEmpty#28] +Results [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30] (24) Exchange -Input [11]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, sum#32, isEmpty#33] -Arguments: hashpartitioning(i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30] +Arguments: hashpartitioning(i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, 5), ENSURE_REQUIREMENTS, [plan_id=4] (25) HashAggregate [codegen id : 8] -Input [11]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29, sum#32, isEmpty#33] -Keys [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, spark_grouping_id#29] +Input [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30] +Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#35] -Results [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#35 AS sumsales#36] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#31] +Results [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#31 AS sumsales#32] (26) Exchange -Input [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36] -Arguments: hashpartitioning(i_category#21, 5), ENSURE_REQUIREMENTS, [id=#37] +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: hashpartitioning(i_category#18, 5), ENSURE_REQUIREMENTS, [plan_id=5] (27) Sort [codegen id : 9] -Input [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36] -Arguments: [i_category#21 ASC NULLS FIRST, sumsales#36 DESC NULLS LAST], false, 0 +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: [i_category#18 ASC NULLS FIRST, sumsales#32 DESC NULLS LAST], false, 0 (28) Window -Input [9]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36] -Arguments: [rank(sumsales#36) windowspecdefinition(i_category#21, sumsales#36 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#38], [i_category#21], [sumsales#36 DESC NULLS LAST] +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: [rank(sumsales#32) windowspecdefinition(i_category#18, sumsales#32 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#33], [i_category#18], [sumsales#32 DESC NULLS LAST] (29) Filter [codegen id : 10] -Input [10]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36, rk#38] -Condition : (rk#38 <= 100) +Input [10]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32, rk#33] +Condition : (rk#33 <= 100) (30) TakeOrderedAndProject -Input [10]: [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36, rk#38] -Arguments: 100, [i_category#21 ASC NULLS FIRST, i_class#22 ASC NULLS FIRST, i_brand#23 ASC NULLS FIRST, i_product_name#24 ASC NULLS FIRST, d_year#25 ASC NULLS FIRST, d_qoy#26 ASC NULLS FIRST, d_moy#27 ASC NULLS FIRST, s_store_id#28 ASC NULLS FIRST, sumsales#36 ASC NULLS FIRST, rk#38 ASC NULLS FIRST], [i_category#21, i_class#22, i_brand#23, i_product_name#24, d_year#25, d_qoy#26, d_moy#27, s_store_id#28, sumsales#36, rk#38] +Input [10]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32, rk#33] +Arguments: 100, [i_category#18 ASC NULLS FIRST, i_class#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, i_product_name#21 ASC NULLS FIRST, d_year#22 ASC NULLS FIRST, d_qoy#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST, s_store_id#25 ASC NULLS FIRST, sumsales#32 ASC NULLS FIRST, rk#33 ASC NULLS FIRST], [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32, rk#33] ===== Subqueries ===== @@ -177,25 +177,25 @@ BroadcastExchange (35) (31) Scan parquet default.date_dim -Output [5]: [d_date_sk#7, d_month_seq#39, d_year#8, d_moy#9, d_qoy#10] +Output [5]: [d_date_sk#7, d_month_seq#34, d_year#8, d_moy#9, d_qoy#10] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 1] -Input [5]: [d_date_sk#7, d_month_seq#39, d_year#8, d_moy#9, d_qoy#10] +Input [5]: [d_date_sk#7, d_month_seq#34, d_year#8, d_moy#9, d_qoy#10] (33) Filter [codegen id : 1] -Input [5]: [d_date_sk#7, d_month_seq#39, d_year#8, d_moy#9, d_qoy#10] -Condition : (((isnotnull(d_month_seq#39) AND (d_month_seq#39 >= 1200)) AND (d_month_seq#39 <= 1211)) AND isnotnull(d_date_sk#7)) +Input [5]: [d_date_sk#7, d_month_seq#34, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#34) AND (d_month_seq#34 >= 1200)) AND (d_month_seq#34 <= 1211)) AND isnotnull(d_date_sk#7)) (34) Project [codegen id : 1] Output [4]: [d_date_sk#7, d_year#8, d_moy#9, d_qoy#10] -Input [5]: [d_date_sk#7, d_month_seq#39, d_year#8, d_moy#9, d_qoy#10] +Input [5]: [d_date_sk#7, d_month_seq#34, d_year#8, d_moy#9, d_qoy#10] (35) BroadcastExchange Input [4]: [d_date_sk#7, d_year#8, d_moy#9, d_qoy#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt index 53f71a188fcb5..1619d25d1c12e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt @@ -71,7 +71,7 @@ Condition : isnotnull(s_store_sk#11) (10) BroadcastExchange Input [2]: [s_store_sk#11, s_store_id#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#2] @@ -83,73 +83,73 @@ Output [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_sk#11, s_store_id#12] (13) Scan parquet default.item -Output [5]: [i_item_sk#14, i_brand#15, i_class#16, i_category#17, i_product_name#18] +Output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [5]: [i_item_sk#14, i_brand#15, i_class#16, i_category#17, i_product_name#18] +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] (15) Filter [codegen id : 3] -Input [5]: [i_item_sk#14, i_brand#15, i_class#16, i_category#17, i_product_name#18] -Condition : isnotnull(i_item_sk#14) +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Condition : isnotnull(i_item_sk#13) (16) BroadcastExchange -Input [5]: [i_item_sk#14, i_brand#15, i_class#16, i_category#17, i_product_name#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#14] +Right keys [1]: [i_item_sk#13] Join condition: None (18) Project [codegen id : 4] -Output [10]: [ss_quantity#3, ss_sales_price#4, i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] -Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_item_sk#14, i_brand#15, i_class#16, i_category#17, i_product_name#18] +Output [10]: [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] (19) Expand [codegen id : 4] -Input [10]: [ss_quantity#3, ss_sales_price#4, i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] -Arguments: [[ss_quantity#3, ss_sales_price#4, i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, 0], [ss_quantity#3, ss_sales_price#4, i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, null, 1], [ss_quantity#3, ss_sales_price#4, i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, null, null, 3], [ss_quantity#3, ss_sales_price#4, i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, null, null, null, 7], [ss_quantity#3, ss_sales_price#4, i_category#17, i_class#16, i_brand#15, i_product_name#18, null, null, null, null, 15], [ss_quantity#3, ss_sales_price#4, i_category#17, i_class#16, i_brand#15, null, null, null, null, null, 31], [ss_quantity#3, ss_sales_price#4, i_category#17, i_class#16, null, null, null, null, null, null, 63], [ss_quantity#3, ss_sales_price#4, i_category#17, null, null, null, null, null, null, null, 127], [ss_quantity#3, ss_sales_price#4, null, null, null, null, null, null, null, null, 255]], [ss_quantity#3, ss_sales_price#4, i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28] +Input [10]: [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Arguments: [[ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, 0], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, null, 1], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, null, null, 3], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, null, null, null, 7], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, i_product_name#17, null, null, null, null, 15], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand#14, null, null, null, null, null, 31], [ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, null, null, null, null, null, null, 63], [ss_quantity#3, ss_sales_price#4, i_category#16, null, null, null, null, null, null, null, 127], [ss_quantity#3, ss_sales_price#4, null, null, null, null, null, null, null, null, 255]], [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] (20) HashAggregate [codegen id : 4] -Input [11]: [ss_quantity#3, ss_sales_price#4, i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28] -Keys [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28] +Input [11]: [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] +Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [2]: [sum#29, isEmpty#30] -Results [11]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28, sum#31, isEmpty#32] +Aggregate Attributes [2]: [sum#27, isEmpty#28] +Results [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30] (21) Exchange -Input [11]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28, sum#31, isEmpty#32] -Arguments: hashpartitioning(i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28, 5), ENSURE_REQUIREMENTS, [id=#33] +Input [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30] +Arguments: hashpartitioning(i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 5] -Input [11]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28, sum#31, isEmpty#32] -Keys [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, spark_grouping_id#28] +Input [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30] +Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#34] -Results [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#34 AS sumsales#35] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#31] +Results [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#31 AS sumsales#32] (23) Exchange -Input [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, sumsales#35] -Arguments: hashpartitioning(i_category#20, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: hashpartitioning(i_category#18, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) Sort [codegen id : 6] -Input [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, sumsales#35] -Arguments: [i_category#20 ASC NULLS FIRST, sumsales#35 DESC NULLS LAST], false, 0 +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: [i_category#18 ASC NULLS FIRST, sumsales#32 DESC NULLS LAST], false, 0 (25) Window -Input [9]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, sumsales#35] -Arguments: [rank(sumsales#35) windowspecdefinition(i_category#20, sumsales#35 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#37], [i_category#20], [sumsales#35 DESC NULLS LAST] +Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32] +Arguments: [rank(sumsales#32) windowspecdefinition(i_category#18, sumsales#32 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#33], [i_category#18], [sumsales#32 DESC NULLS LAST] (26) Filter [codegen id : 7] -Input [10]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, sumsales#35, rk#37] -Condition : (rk#37 <= 100) +Input [10]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32, rk#33] +Condition : (rk#33 <= 100) (27) TakeOrderedAndProject -Input [10]: [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, sumsales#35, rk#37] -Arguments: 100, [i_category#20 ASC NULLS FIRST, i_class#21 ASC NULLS FIRST, i_brand#22 ASC NULLS FIRST, i_product_name#23 ASC NULLS FIRST, d_year#24 ASC NULLS FIRST, d_qoy#25 ASC NULLS FIRST, d_moy#26 ASC NULLS FIRST, s_store_id#27 ASC NULLS FIRST, sumsales#35 ASC NULLS FIRST, rk#37 ASC NULLS FIRST], [i_category#20, i_class#21, i_brand#22, i_product_name#23, d_year#24, d_qoy#25, d_moy#26, s_store_id#27, sumsales#35, rk#37] +Input [10]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32, rk#33] +Arguments: 100, [i_category#18 ASC NULLS FIRST, i_class#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, i_product_name#21 ASC NULLS FIRST, d_year#22 ASC NULLS FIRST, d_qoy#23 ASC NULLS FIRST, d_moy#24 ASC NULLS FIRST, s_store_id#25 ASC NULLS FIRST, sumsales#32 ASC NULLS FIRST, rk#33 ASC NULLS FIRST], [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32, rk#33] ===== Subqueries ===== @@ -162,25 +162,25 @@ BroadcastExchange (32) (28) Scan parquet default.date_dim -Output [5]: [d_date_sk#7, d_month_seq#38, d_year#8, d_moy#9, d_qoy#10] +Output [5]: [d_date_sk#7, d_month_seq#34, d_year#8, d_moy#9, d_qoy#10] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (29) ColumnarToRow [codegen id : 1] -Input [5]: [d_date_sk#7, d_month_seq#38, d_year#8, d_moy#9, d_qoy#10] +Input [5]: [d_date_sk#7, d_month_seq#34, d_year#8, d_moy#9, d_qoy#10] (30) Filter [codegen id : 1] -Input [5]: [d_date_sk#7, d_month_seq#38, d_year#8, d_moy#9, d_qoy#10] -Condition : (((isnotnull(d_month_seq#38) AND (d_month_seq#38 >= 1200)) AND (d_month_seq#38 <= 1211)) AND isnotnull(d_date_sk#7)) +Input [5]: [d_date_sk#7, d_month_seq#34, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#34) AND (d_month_seq#34 >= 1200)) AND (d_month_seq#34 <= 1211)) AND isnotnull(d_date_sk#7)) (31) Project [codegen id : 1] Output [4]: [d_date_sk#7, d_year#8, d_moy#9, d_qoy#10] -Input [5]: [d_date_sk#7, d_month_seq#38, d_year#8, d_moy#9, d_qoy#10] +Input [5]: [d_date_sk#7, d_month_seq#34, d_year#8, d_moy#9, d_qoy#10] (32) BroadcastExchange Input [4]: [d_date_sk#7, d_year#8, d_moy#9, d_qoy#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/explain.txt index 2075dff70a542..d09aeb714c5b7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/explain.txt @@ -64,202 +64,202 @@ Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#2)) (4) Exchange Input [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4] -Arguments: hashpartitioning(c_current_addr_sk#2, 5), ENSURE_REQUIREMENTS, [id=#5] +Arguments: hashpartitioning(c_current_addr_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4] Arguments: [c_current_addr_sk#2 ASC NULLS FIRST], false, 0 (6) Scan parquet default.customer_address -Output [2]: [ca_address_sk#6, ca_city#7] +Output [2]: [ca_address_sk#5, ca_city#6] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [2]: [ca_address_sk#6, ca_city#7] +Input [2]: [ca_address_sk#5, ca_city#6] (8) Filter [codegen id : 3] -Input [2]: [ca_address_sk#6, ca_city#7] -Condition : (isnotnull(ca_address_sk#6) AND isnotnull(ca_city#7)) +Input [2]: [ca_address_sk#5, ca_city#6] +Condition : (isnotnull(ca_address_sk#5) AND isnotnull(ca_city#6)) (9) Exchange -Input [2]: [ca_address_sk#6, ca_city#7] -Arguments: hashpartitioning(ca_address_sk#6, 5), ENSURE_REQUIREMENTS, [id=#8] +Input [2]: [ca_address_sk#5, ca_city#6] +Arguments: hashpartitioning(ca_address_sk#5, 5), ENSURE_REQUIREMENTS, [plan_id=2] (10) Sort [codegen id : 4] -Input [2]: [ca_address_sk#6, ca_city#7] -Arguments: [ca_address_sk#6 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#5, ca_city#6] +Arguments: [ca_address_sk#5 ASC NULLS FIRST], false, 0 (11) SortMergeJoin [codegen id : 5] Left keys [1]: [c_current_addr_sk#2] -Right keys [1]: [ca_address_sk#6] +Right keys [1]: [ca_address_sk#5] Join condition: None (12) Project [codegen id : 5] -Output [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#7] -Input [6]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4, ca_address_sk#6, ca_city#7] +Output [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#6] +Input [6]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4, ca_address_sk#5, ca_city#6] (13) Exchange -Input [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#7] -Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#9] +Input [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#6] +Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] (14) Sort [codegen id : 6] -Input [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#7] +Input [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#6] Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0 (15) Scan parquet default.store_sales -Output [9]: [ss_customer_sk#10, ss_hdemo_sk#11, ss_addr_sk#12, ss_store_sk#13, ss_ticket_number#14, ss_ext_sales_price#15, ss_ext_list_price#16, ss_ext_tax#17, ss_sold_date_sk#18] +Output [9]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14, ss_sold_date_sk#15] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#18), dynamicpruningexpression(ss_sold_date_sk#18 IN dynamicpruning#19)] +PartitionFilters: [isnotnull(ss_sold_date_sk#15), dynamicpruningexpression(ss_sold_date_sk#15 IN dynamicpruning#16)] PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk), IsNotNull(ss_customer_sk)] ReadSchema: struct (16) ColumnarToRow [codegen id : 10] -Input [9]: [ss_customer_sk#10, ss_hdemo_sk#11, ss_addr_sk#12, ss_store_sk#13, ss_ticket_number#14, ss_ext_sales_price#15, ss_ext_list_price#16, ss_ext_tax#17, ss_sold_date_sk#18] +Input [9]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14, ss_sold_date_sk#15] (17) Filter [codegen id : 10] -Input [9]: [ss_customer_sk#10, ss_hdemo_sk#11, ss_addr_sk#12, ss_store_sk#13, ss_ticket_number#14, ss_ext_sales_price#15, ss_ext_list_price#16, ss_ext_tax#17, ss_sold_date_sk#18] -Condition : (((isnotnull(ss_store_sk#13) AND isnotnull(ss_hdemo_sk#11)) AND isnotnull(ss_addr_sk#12)) AND isnotnull(ss_customer_sk#10)) +Input [9]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14, ss_sold_date_sk#15] +Condition : (((isnotnull(ss_store_sk#10) AND isnotnull(ss_hdemo_sk#8)) AND isnotnull(ss_addr_sk#9)) AND isnotnull(ss_customer_sk#7)) (18) ReusedExchange [Reuses operator id: 52] -Output [1]: [d_date_sk#20] +Output [1]: [d_date_sk#17] (19) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_sold_date_sk#18] -Right keys [1]: [d_date_sk#20] +Left keys [1]: [ss_sold_date_sk#15] +Right keys [1]: [d_date_sk#17] Join condition: None (20) Project [codegen id : 10] -Output [8]: [ss_customer_sk#10, ss_hdemo_sk#11, ss_addr_sk#12, ss_store_sk#13, ss_ticket_number#14, ss_ext_sales_price#15, ss_ext_list_price#16, ss_ext_tax#17] -Input [10]: [ss_customer_sk#10, ss_hdemo_sk#11, ss_addr_sk#12, ss_store_sk#13, ss_ticket_number#14, ss_ext_sales_price#15, ss_ext_list_price#16, ss_ext_tax#17, ss_sold_date_sk#18, d_date_sk#20] +Output [8]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14] +Input [10]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14, ss_sold_date_sk#15, d_date_sk#17] (21) Scan parquet default.store -Output [2]: [s_store_sk#21, s_city#22] +Output [2]: [s_store_sk#18, s_city#19] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [In(s_city, [Fairview,Midway]), IsNotNull(s_store_sk)] ReadSchema: struct (22) ColumnarToRow [codegen id : 8] -Input [2]: [s_store_sk#21, s_city#22] +Input [2]: [s_store_sk#18, s_city#19] (23) Filter [codegen id : 8] -Input [2]: [s_store_sk#21, s_city#22] -Condition : (s_city#22 IN (Midway,Fairview) AND isnotnull(s_store_sk#21)) +Input [2]: [s_store_sk#18, s_city#19] +Condition : (s_city#19 IN (Midway,Fairview) AND isnotnull(s_store_sk#18)) (24) Project [codegen id : 8] -Output [1]: [s_store_sk#21] -Input [2]: [s_store_sk#21, s_city#22] +Output [1]: [s_store_sk#18] +Input [2]: [s_store_sk#18, s_city#19] (25) BroadcastExchange -Input [1]: [s_store_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] +Input [1]: [s_store_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (26) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_store_sk#13] -Right keys [1]: [s_store_sk#21] +Left keys [1]: [ss_store_sk#10] +Right keys [1]: [s_store_sk#18] Join condition: None (27) Project [codegen id : 10] -Output [7]: [ss_customer_sk#10, ss_hdemo_sk#11, ss_addr_sk#12, ss_ticket_number#14, ss_ext_sales_price#15, ss_ext_list_price#16, ss_ext_tax#17] -Input [9]: [ss_customer_sk#10, ss_hdemo_sk#11, ss_addr_sk#12, ss_store_sk#13, ss_ticket_number#14, ss_ext_sales_price#15, ss_ext_list_price#16, ss_ext_tax#17, s_store_sk#21] +Output [7]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14] +Input [9]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14, s_store_sk#18] (28) Scan parquet default.household_demographics -Output [3]: [hd_demo_sk#24, hd_dep_count#25, hd_vehicle_count#26] +Output [3]: [hd_demo_sk#20, hd_dep_count#21, hd_vehicle_count#22] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] ReadSchema: struct (29) ColumnarToRow [codegen id : 9] -Input [3]: [hd_demo_sk#24, hd_dep_count#25, hd_vehicle_count#26] +Input [3]: [hd_demo_sk#20, hd_dep_count#21, hd_vehicle_count#22] (30) Filter [codegen id : 9] -Input [3]: [hd_demo_sk#24, hd_dep_count#25, hd_vehicle_count#26] -Condition : (((hd_dep_count#25 = 4) OR (hd_vehicle_count#26 = 3)) AND isnotnull(hd_demo_sk#24)) +Input [3]: [hd_demo_sk#20, hd_dep_count#21, hd_vehicle_count#22] +Condition : (((hd_dep_count#21 = 4) OR (hd_vehicle_count#22 = 3)) AND isnotnull(hd_demo_sk#20)) (31) Project [codegen id : 9] -Output [1]: [hd_demo_sk#24] -Input [3]: [hd_demo_sk#24, hd_dep_count#25, hd_vehicle_count#26] +Output [1]: [hd_demo_sk#20] +Input [3]: [hd_demo_sk#20, hd_dep_count#21, hd_vehicle_count#22] (32) BroadcastExchange -Input [1]: [hd_demo_sk#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] +Input [1]: [hd_demo_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (33) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_hdemo_sk#11] -Right keys [1]: [hd_demo_sk#24] +Left keys [1]: [ss_hdemo_sk#8] +Right keys [1]: [hd_demo_sk#20] Join condition: None (34) Project [codegen id : 10] -Output [6]: [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#14, ss_ext_sales_price#15, ss_ext_list_price#16, ss_ext_tax#17] -Input [8]: [ss_customer_sk#10, ss_hdemo_sk#11, ss_addr_sk#12, ss_ticket_number#14, ss_ext_sales_price#15, ss_ext_list_price#16, ss_ext_tax#17, hd_demo_sk#24] +Output [6]: [ss_customer_sk#7, ss_addr_sk#9, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14] +Input [8]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14, hd_demo_sk#20] (35) Exchange -Input [6]: [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#14, ss_ext_sales_price#15, ss_ext_list_price#16, ss_ext_tax#17] -Arguments: hashpartitioning(ss_addr_sk#12, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [6]: [ss_customer_sk#7, ss_addr_sk#9, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14] +Arguments: hashpartitioning(ss_addr_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=6] (36) Sort [codegen id : 11] -Input [6]: [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#14, ss_ext_sales_price#15, ss_ext_list_price#16, ss_ext_tax#17] -Arguments: [ss_addr_sk#12 ASC NULLS FIRST], false, 0 +Input [6]: [ss_customer_sk#7, ss_addr_sk#9, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14] +Arguments: [ss_addr_sk#9 ASC NULLS FIRST], false, 0 (37) ReusedExchange [Reuses operator id: 9] -Output [2]: [ca_address_sk#29, ca_city#30] +Output [2]: [ca_address_sk#23, ca_city#24] (38) Sort [codegen id : 13] -Input [2]: [ca_address_sk#29, ca_city#30] -Arguments: [ca_address_sk#29 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#23, ca_city#24] +Arguments: [ca_address_sk#23 ASC NULLS FIRST], false, 0 (39) SortMergeJoin [codegen id : 14] -Left keys [1]: [ss_addr_sk#12] -Right keys [1]: [ca_address_sk#29] +Left keys [1]: [ss_addr_sk#9] +Right keys [1]: [ca_address_sk#23] Join condition: None (40) Project [codegen id : 14] -Output [7]: [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#14, ss_ext_sales_price#15, ss_ext_list_price#16, ss_ext_tax#17, ca_city#30] -Input [8]: [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#14, ss_ext_sales_price#15, ss_ext_list_price#16, ss_ext_tax#17, ca_address_sk#29, ca_city#30] +Output [7]: [ss_customer_sk#7, ss_addr_sk#9, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14, ca_city#24] +Input [8]: [ss_customer_sk#7, ss_addr_sk#9, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14, ca_address_sk#23, ca_city#24] (41) HashAggregate [codegen id : 14] -Input [7]: [ss_customer_sk#10, ss_addr_sk#12, ss_ticket_number#14, ss_ext_sales_price#15, ss_ext_list_price#16, ss_ext_tax#17, ca_city#30] -Keys [4]: [ss_ticket_number#14, ss_customer_sk#10, ss_addr_sk#12, ca_city#30] -Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#15)), partial_sum(UnscaledValue(ss_ext_list_price#16)), partial_sum(UnscaledValue(ss_ext_tax#17))] -Aggregate Attributes [3]: [sum#31, sum#32, sum#33] -Results [7]: [ss_ticket_number#14, ss_customer_sk#10, ss_addr_sk#12, ca_city#30, sum#34, sum#35, sum#36] +Input [7]: [ss_customer_sk#7, ss_addr_sk#9, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14, ca_city#24] +Keys [4]: [ss_ticket_number#11, ss_customer_sk#7, ss_addr_sk#9, ca_city#24] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#12)), partial_sum(UnscaledValue(ss_ext_list_price#13)), partial_sum(UnscaledValue(ss_ext_tax#14))] +Aggregate Attributes [3]: [sum#25, sum#26, sum#27] +Results [7]: [ss_ticket_number#11, ss_customer_sk#7, ss_addr_sk#9, ca_city#24, sum#28, sum#29, sum#30] (42) HashAggregate [codegen id : 14] -Input [7]: [ss_ticket_number#14, ss_customer_sk#10, ss_addr_sk#12, ca_city#30, sum#34, sum#35, sum#36] -Keys [4]: [ss_ticket_number#14, ss_customer_sk#10, ss_addr_sk#12, ca_city#30] -Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#15)), sum(UnscaledValue(ss_ext_list_price#16)), sum(UnscaledValue(ss_ext_tax#17))] -Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#15))#37, sum(UnscaledValue(ss_ext_list_price#16))#38, sum(UnscaledValue(ss_ext_tax#17))#39] -Results [6]: [ss_ticket_number#14, ss_customer_sk#10, ca_city#30 AS bought_city#40, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#15))#37,17,2) AS extended_price#41, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#16))#38,17,2) AS list_price#42, MakeDecimal(sum(UnscaledValue(ss_ext_tax#17))#39,17,2) AS extended_tax#43] +Input [7]: [ss_ticket_number#11, ss_customer_sk#7, ss_addr_sk#9, ca_city#24, sum#28, sum#29, sum#30] +Keys [4]: [ss_ticket_number#11, ss_customer_sk#7, ss_addr_sk#9, ca_city#24] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#12)), sum(UnscaledValue(ss_ext_list_price#13)), sum(UnscaledValue(ss_ext_tax#14))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#12))#31, sum(UnscaledValue(ss_ext_list_price#13))#32, sum(UnscaledValue(ss_ext_tax#14))#33] +Results [6]: [ss_ticket_number#11, ss_customer_sk#7, ca_city#24 AS bought_city#34, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#12))#31,17,2) AS extended_price#35, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#13))#32,17,2) AS list_price#36, MakeDecimal(sum(UnscaledValue(ss_ext_tax#14))#33,17,2) AS extended_tax#37] (43) Exchange -Input [6]: [ss_ticket_number#14, ss_customer_sk#10, bought_city#40, extended_price#41, list_price#42, extended_tax#43] -Arguments: hashpartitioning(ss_customer_sk#10, 5), ENSURE_REQUIREMENTS, [id=#44] +Input [6]: [ss_ticket_number#11, ss_customer_sk#7, bought_city#34, extended_price#35, list_price#36, extended_tax#37] +Arguments: hashpartitioning(ss_customer_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=7] (44) Sort [codegen id : 15] -Input [6]: [ss_ticket_number#14, ss_customer_sk#10, bought_city#40, extended_price#41, list_price#42, extended_tax#43] -Arguments: [ss_customer_sk#10 ASC NULLS FIRST], false, 0 +Input [6]: [ss_ticket_number#11, ss_customer_sk#7, bought_city#34, extended_price#35, list_price#36, extended_tax#37] +Arguments: [ss_customer_sk#7 ASC NULLS FIRST], false, 0 (45) SortMergeJoin [codegen id : 16] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ss_customer_sk#10] -Join condition: NOT (ca_city#7 = bought_city#40) +Right keys [1]: [ss_customer_sk#7] +Join condition: NOT (ca_city#6 = bought_city#34) (46) Project [codegen id : 16] -Output [8]: [c_last_name#4, c_first_name#3, ca_city#7, bought_city#40, ss_ticket_number#14, extended_price#41, extended_tax#43, list_price#42] -Input [10]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#7, ss_ticket_number#14, ss_customer_sk#10, bought_city#40, extended_price#41, list_price#42, extended_tax#43] +Output [8]: [c_last_name#4, c_first_name#3, ca_city#6, bought_city#34, ss_ticket_number#11, extended_price#35, extended_tax#37, list_price#36] +Input [10]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#6, ss_ticket_number#11, ss_customer_sk#7, bought_city#34, extended_price#35, list_price#36, extended_tax#37] (47) TakeOrderedAndProject -Input [8]: [c_last_name#4, c_first_name#3, ca_city#7, bought_city#40, ss_ticket_number#14, extended_price#41, extended_tax#43, list_price#42] -Arguments: 100, [c_last_name#4 ASC NULLS FIRST, ss_ticket_number#14 ASC NULLS FIRST], [c_last_name#4, c_first_name#3, ca_city#7, bought_city#40, ss_ticket_number#14, extended_price#41, extended_tax#43, list_price#42] +Input [8]: [c_last_name#4, c_first_name#3, ca_city#6, bought_city#34, ss_ticket_number#11, extended_price#35, extended_tax#37, list_price#36] +Arguments: 100, [c_last_name#4 ASC NULLS FIRST, ss_ticket_number#11 ASC NULLS FIRST], [c_last_name#4, c_first_name#3, ca_city#6, bought_city#34, ss_ticket_number#11, extended_price#35, extended_tax#37, list_price#36] ===== Subqueries ===== -Subquery:1 Hosting operator id = 15 Hosting Expression = ss_sold_date_sk#18 IN dynamicpruning#19 +Subquery:1 Hosting operator id = 15 Hosting Expression = ss_sold_date_sk#15 IN dynamicpruning#16 BroadcastExchange (52) +- * Project (51) +- * Filter (50) @@ -268,25 +268,25 @@ BroadcastExchange (52) (48) Scan parquet default.date_dim -Output [3]: [d_date_sk#20, d_year#45, d_dom#46] +Output [3]: [d_date_sk#17, d_year#38, d_dom#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] ReadSchema: struct (49) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#20, d_year#45, d_dom#46] +Input [3]: [d_date_sk#17, d_year#38, d_dom#39] (50) Filter [codegen id : 1] -Input [3]: [d_date_sk#20, d_year#45, d_dom#46] -Condition : ((((isnotnull(d_dom#46) AND (d_dom#46 >= 1)) AND (d_dom#46 <= 2)) AND d_year#45 IN (1999,2000,2001)) AND isnotnull(d_date_sk#20)) +Input [3]: [d_date_sk#17, d_year#38, d_dom#39] +Condition : ((((isnotnull(d_dom#39) AND (d_dom#39 >= 1)) AND (d_dom#39 <= 2)) AND d_year#38 IN (1999,2000,2001)) AND isnotnull(d_date_sk#17)) (51) Project [codegen id : 1] -Output [1]: [d_date_sk#20] -Input [3]: [d_date_sk#20, d_year#45, d_dom#46] +Output [1]: [d_date_sk#17] +Input [3]: [d_date_sk#17, d_year#38, d_dom#39] (52) BroadcastExchange -Input [1]: [d_date_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#47] +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt index 40336e3e7c909..c539e33cafc4f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt @@ -87,7 +87,7 @@ Input [2]: [s_store_sk#12, s_city#13] (11) BroadcastExchange Input [1]: [s_store_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_store_sk#4] @@ -99,123 +99,123 @@ Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, s_store_sk#12] (14) Scan parquet default.household_demographics -Output [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [Or(EqualTo(hd_dep_count,4),EqualTo(hd_vehicle_count,3)), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] (16) Filter [codegen id : 3] -Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] -Condition : (((hd_dep_count#16 = 4) OR (hd_vehicle_count#17 = 3)) AND isnotnull(hd_demo_sk#15)) +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : (((hd_dep_count#15 = 4) OR (hd_vehicle_count#16 = 3)) AND isnotnull(hd_demo_sk#14)) (17) Project [codegen id : 3] -Output [1]: [hd_demo_sk#15] -Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Output [1]: [hd_demo_sk#14] +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] (18) BroadcastExchange -Input [1]: [hd_demo_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [1]: [hd_demo_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#15] +Right keys [1]: [hd_demo_sk#14] Join condition: None (20) Project [codegen id : 5] Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8] -Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, hd_demo_sk#15] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, hd_demo_sk#14] (21) Scan parquet default.customer_address -Output [2]: [ca_address_sk#19, ca_city#20] +Output [2]: [ca_address_sk#17, ca_city#18] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_city)] ReadSchema: struct (22) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#19, ca_city#20] +Input [2]: [ca_address_sk#17, ca_city#18] (23) Filter [codegen id : 4] -Input [2]: [ca_address_sk#19, ca_city#20] -Condition : (isnotnull(ca_address_sk#19) AND isnotnull(ca_city#20)) +Input [2]: [ca_address_sk#17, ca_city#18] +Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_city#18)) (24) BroadcastExchange -Input [2]: [ca_address_sk#19, ca_city#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] +Input [2]: [ca_address_sk#17, ca_city#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (25) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_addr_sk#3] -Right keys [1]: [ca_address_sk#19] +Right keys [1]: [ca_address_sk#17] Join condition: None (26) Project [codegen id : 5] -Output [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#20] -Input [8]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_address_sk#19, ca_city#20] +Output [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#18] +Input [8]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_address_sk#17, ca_city#18] (27) HashAggregate [codegen id : 5] -Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#20] -Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#20] +Input [7]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ca_city#18] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18] Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#6)), partial_sum(UnscaledValue(ss_ext_list_price#7)), partial_sum(UnscaledValue(ss_ext_tax#8))] -Aggregate Attributes [3]: [sum#22, sum#23, sum#24] -Results [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#20, sum#25, sum#26, sum#27] +Aggregate Attributes [3]: [sum#19, sum#20, sum#21] +Results [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#22, sum#23, sum#24] (28) Exchange -Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#20, sum#25, sum#26, sum#27] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#20, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, 5), ENSURE_REQUIREMENTS, [plan_id=4] (29) HashAggregate [codegen id : 8] -Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#20, sum#25, sum#26, sum#27] -Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#20] +Input [7]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18, sum#22, sum#23, sum#24] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, ca_city#18] Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#6)), sum(UnscaledValue(ss_ext_list_price#7)), sum(UnscaledValue(ss_ext_tax#8))] -Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#6))#29, sum(UnscaledValue(ss_ext_list_price#7))#30, sum(UnscaledValue(ss_ext_tax#8))#31] -Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#20 AS bought_city#32, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#29,17,2) AS extended_price#33, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#7))#30,17,2) AS list_price#34, MakeDecimal(sum(UnscaledValue(ss_ext_tax#8))#31,17,2) AS extended_tax#35] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#6))#25, sum(UnscaledValue(ss_ext_list_price#7))#26, sum(UnscaledValue(ss_ext_tax#8))#27] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#18 AS bought_city#28, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#25,17,2) AS extended_price#29, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#7))#26,17,2) AS list_price#30, MakeDecimal(sum(UnscaledValue(ss_ext_tax#8))#27,17,2) AS extended_tax#31] (30) Scan parquet default.customer -Output [4]: [c_customer_sk#36, c_current_addr_sk#37, c_first_name#38, c_last_name#39] +Output [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 6] -Input [4]: [c_customer_sk#36, c_current_addr_sk#37, c_first_name#38, c_last_name#39] +Input [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] (32) Filter [codegen id : 6] -Input [4]: [c_customer_sk#36, c_current_addr_sk#37, c_first_name#38, c_last_name#39] -Condition : (isnotnull(c_customer_sk#36) AND isnotnull(c_current_addr_sk#37)) +Input [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] +Condition : (isnotnull(c_customer_sk#32) AND isnotnull(c_current_addr_sk#33)) (33) BroadcastExchange -Input [4]: [c_customer_sk#36, c_current_addr_sk#37, c_first_name#38, c_last_name#39] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#40] +Input [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (34) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#36] +Right keys [1]: [c_customer_sk#32] Join condition: None (35) Project [codegen id : 8] -Output [8]: [ss_ticket_number#5, bought_city#32, extended_price#33, list_price#34, extended_tax#35, c_current_addr_sk#37, c_first_name#38, c_last_name#39] -Input [10]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#32, extended_price#33, list_price#34, extended_tax#35, c_customer_sk#36, c_current_addr_sk#37, c_first_name#38, c_last_name#39] +Output [8]: [ss_ticket_number#5, bought_city#28, extended_price#29, list_price#30, extended_tax#31, c_current_addr_sk#33, c_first_name#34, c_last_name#35] +Input [10]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#28, extended_price#29, list_price#30, extended_tax#31, c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35] (36) ReusedExchange [Reuses operator id: 24] -Output [2]: [ca_address_sk#41, ca_city#42] +Output [2]: [ca_address_sk#36, ca_city#37] (37) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [c_current_addr_sk#37] -Right keys [1]: [ca_address_sk#41] -Join condition: NOT (ca_city#42 = bought_city#32) +Left keys [1]: [c_current_addr_sk#33] +Right keys [1]: [ca_address_sk#36] +Join condition: NOT (ca_city#37 = bought_city#28) (38) Project [codegen id : 8] -Output [8]: [c_last_name#39, c_first_name#38, ca_city#42, bought_city#32, ss_ticket_number#5, extended_price#33, extended_tax#35, list_price#34] -Input [10]: [ss_ticket_number#5, bought_city#32, extended_price#33, list_price#34, extended_tax#35, c_current_addr_sk#37, c_first_name#38, c_last_name#39, ca_address_sk#41, ca_city#42] +Output [8]: [c_last_name#35, c_first_name#34, ca_city#37, bought_city#28, ss_ticket_number#5, extended_price#29, extended_tax#31, list_price#30] +Input [10]: [ss_ticket_number#5, bought_city#28, extended_price#29, list_price#30, extended_tax#31, c_current_addr_sk#33, c_first_name#34, c_last_name#35, ca_address_sk#36, ca_city#37] (39) TakeOrderedAndProject -Input [8]: [c_last_name#39, c_first_name#38, ca_city#42, bought_city#32, ss_ticket_number#5, extended_price#33, extended_tax#35, list_price#34] -Arguments: 100, [c_last_name#39 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#39, c_first_name#38, ca_city#42, bought_city#32, ss_ticket_number#5, extended_price#33, extended_tax#35, list_price#34] +Input [8]: [c_last_name#35, c_first_name#34, ca_city#37, bought_city#28, ss_ticket_number#5, extended_price#29, extended_tax#31, list_price#30] +Arguments: 100, [c_last_name#35 ASC NULLS FIRST, ss_ticket_number#5 ASC NULLS FIRST], [c_last_name#35, c_first_name#34, ca_city#37, bought_city#28, ss_ticket_number#5, extended_price#29, extended_tax#31, list_price#30] ===== Subqueries ===== @@ -228,25 +228,25 @@ BroadcastExchange (44) (40) Scan parquet default.date_dim -Output [3]: [d_date_sk#11, d_year#43, d_dom#44] +Output [3]: [d_date_sk#11, d_year#38, d_dom#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] ReadSchema: struct (41) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#43, d_dom#44] +Input [3]: [d_date_sk#11, d_year#38, d_dom#39] (42) Filter [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#43, d_dom#44] -Condition : ((((isnotnull(d_dom#44) AND (d_dom#44 >= 1)) AND (d_dom#44 <= 2)) AND d_year#43 IN (1999,2000,2001)) AND isnotnull(d_date_sk#11)) +Input [3]: [d_date_sk#11, d_year#38, d_dom#39] +Condition : ((((isnotnull(d_dom#39) AND (d_dom#39 >= 1)) AND (d_dom#39 <= 2)) AND d_year#38 IN (1999,2000,2001)) AND isnotnull(d_date_sk#11)) (43) Project [codegen id : 1] Output [1]: [d_date_sk#11] -Input [3]: [d_date_sk#11, d_year#43, d_dom#44] +Input [3]: [d_date_sk#11, d_year#38, d_dom#39] (44) BroadcastExchange Input [1]: [d_date_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#45] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/explain.txt index 339d7d013972d..5d2197b82454e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/explain.txt @@ -64,115 +64,115 @@ Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) (4) Exchange Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] -Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#4] +Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0 (6) Scan parquet default.store_sales -Output [2]: [ss_customer_sk#5, ss_sold_date_sk#6] +Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#6), dynamicpruningexpression(ss_sold_date_sk#6 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ss_sold_date_sk#5), dynamicpruningexpression(ss_sold_date_sk#5 IN dynamicpruning#6)] ReadSchema: struct (7) ColumnarToRow [codegen id : 4] -Input [2]: [ss_customer_sk#5, ss_sold_date_sk#6] +Input [2]: [ss_customer_sk#4, ss_sold_date_sk#5] (8) ReusedExchange [Reuses operator id: 52] -Output [1]: [d_date_sk#8] +Output [1]: [d_date_sk#7] (9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#6] -Right keys [1]: [d_date_sk#8] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#7] Join condition: None (10) Project [codegen id : 4] -Output [1]: [ss_customer_sk#5] -Input [3]: [ss_customer_sk#5, ss_sold_date_sk#6, d_date_sk#8] +Output [1]: [ss_customer_sk#4] +Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#7] (11) Exchange -Input [1]: [ss_customer_sk#5] -Arguments: hashpartitioning(ss_customer_sk#5, 5), ENSURE_REQUIREMENTS, [id=#9] +Input [1]: [ss_customer_sk#4] +Arguments: hashpartitioning(ss_customer_sk#4, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) Sort [codegen id : 5] -Input [1]: [ss_customer_sk#5] -Arguments: [ss_customer_sk#5 ASC NULLS FIRST], false, 0 +Input [1]: [ss_customer_sk#4] +Arguments: [ss_customer_sk#4 ASC NULLS FIRST], false, 0 (13) SortMergeJoin [codegen id : 6] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ss_customer_sk#5] +Right keys [1]: [ss_customer_sk#4] Join condition: None (14) Scan parquet default.web_sales -Output [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] +Output [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#11), dynamicpruningexpression(ws_sold_date_sk#11 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ws_sold_date_sk#9), dynamicpruningexpression(ws_sold_date_sk#9 IN dynamicpruning#6)] ReadSchema: struct (15) ColumnarToRow [codegen id : 8] -Input [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] +Input [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9] (16) ReusedExchange [Reuses operator id: 52] -Output [1]: [d_date_sk#12] +Output [1]: [d_date_sk#10] (17) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_sold_date_sk#11] -Right keys [1]: [d_date_sk#12] +Left keys [1]: [ws_sold_date_sk#9] +Right keys [1]: [d_date_sk#10] Join condition: None (18) Project [codegen id : 8] -Output [1]: [ws_bill_customer_sk#10] -Input [3]: [ws_bill_customer_sk#10, ws_sold_date_sk#11, d_date_sk#12] +Output [1]: [ws_bill_customer_sk#8] +Input [3]: [ws_bill_customer_sk#8, ws_sold_date_sk#9, d_date_sk#10] (19) Exchange -Input [1]: [ws_bill_customer_sk#10] -Arguments: hashpartitioning(ws_bill_customer_sk#10, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [1]: [ws_bill_customer_sk#8] +Arguments: hashpartitioning(ws_bill_customer_sk#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) Sort [codegen id : 9] -Input [1]: [ws_bill_customer_sk#10] -Arguments: [ws_bill_customer_sk#10 ASC NULLS FIRST], false, 0 +Input [1]: [ws_bill_customer_sk#8] +Arguments: [ws_bill_customer_sk#8 ASC NULLS FIRST], false, 0 (21) SortMergeJoin [codegen id : 10] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ws_bill_customer_sk#10] +Right keys [1]: [ws_bill_customer_sk#8] Join condition: None (22) Scan parquet default.catalog_sales -Output [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Output [2]: [cs_ship_customer_sk#11, cs_sold_date_sk#12] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#15), dynamicpruningexpression(cs_sold_date_sk#15 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(cs_sold_date_sk#12), dynamicpruningexpression(cs_sold_date_sk#12 IN dynamicpruning#6)] ReadSchema: struct (23) ColumnarToRow [codegen id : 12] -Input [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Input [2]: [cs_ship_customer_sk#11, cs_sold_date_sk#12] (24) ReusedExchange [Reuses operator id: 52] -Output [1]: [d_date_sk#16] +Output [1]: [d_date_sk#13] (25) BroadcastHashJoin [codegen id : 12] -Left keys [1]: [cs_sold_date_sk#15] -Right keys [1]: [d_date_sk#16] +Left keys [1]: [cs_sold_date_sk#12] +Right keys [1]: [d_date_sk#13] Join condition: None (26) Project [codegen id : 12] -Output [1]: [cs_ship_customer_sk#14] -Input [3]: [cs_ship_customer_sk#14, cs_sold_date_sk#15, d_date_sk#16] +Output [1]: [cs_ship_customer_sk#11] +Input [3]: [cs_ship_customer_sk#11, cs_sold_date_sk#12, d_date_sk#13] (27) Exchange -Input [1]: [cs_ship_customer_sk#14] -Arguments: hashpartitioning(cs_ship_customer_sk#14, 5), ENSURE_REQUIREMENTS, [id=#17] +Input [1]: [cs_ship_customer_sk#11] +Arguments: hashpartitioning(cs_ship_customer_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=4] (28) Sort [codegen id : 13] -Input [1]: [cs_ship_customer_sk#14] -Arguments: [cs_ship_customer_sk#14 ASC NULLS FIRST], false, 0 +Input [1]: [cs_ship_customer_sk#11] +Arguments: [cs_ship_customer_sk#11 ASC NULLS FIRST], false, 0 (29) SortMergeJoin [codegen id : 15] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [cs_ship_customer_sk#14] +Right keys [1]: [cs_ship_customer_sk#11] Join condition: None (30) Project [codegen id : 15] @@ -180,88 +180,88 @@ Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] (31) Scan parquet default.customer_address -Output [2]: [ca_address_sk#18, ca_state#19] +Output [2]: [ca_address_sk#14, ca_state#15] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_state, [GA,KY,NM]), IsNotNull(ca_address_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 14] -Input [2]: [ca_address_sk#18, ca_state#19] +Input [2]: [ca_address_sk#14, ca_state#15] (33) Filter [codegen id : 14] -Input [2]: [ca_address_sk#18, ca_state#19] -Condition : (ca_state#19 IN (KY,GA,NM) AND isnotnull(ca_address_sk#18)) +Input [2]: [ca_address_sk#14, ca_state#15] +Condition : (ca_state#15 IN (KY,GA,NM) AND isnotnull(ca_address_sk#14)) (34) Project [codegen id : 14] -Output [1]: [ca_address_sk#18] -Input [2]: [ca_address_sk#18, ca_state#19] +Output [1]: [ca_address_sk#14] +Input [2]: [ca_address_sk#14, ca_state#15] (35) BroadcastExchange -Input [1]: [ca_address_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] +Input [1]: [ca_address_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (36) BroadcastHashJoin [codegen id : 15] Left keys [1]: [c_current_addr_sk#3] -Right keys [1]: [ca_address_sk#18] +Right keys [1]: [ca_address_sk#14] Join condition: None (37) Project [codegen id : 15] Output [1]: [c_current_cdemo_sk#2] -Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#18] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#14] (38) BroadcastExchange Input [1]: [c_current_cdemo_sk#2] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] (39) Scan parquet default.customer_demographics -Output [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27] +Output [6]: [cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (40) ColumnarToRow -Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27] +Input [6]: [cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21] (41) Filter -Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27] -Condition : isnotnull(cd_demo_sk#22) +Input [6]: [cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21] +Condition : isnotnull(cd_demo_sk#16) (42) BroadcastHashJoin [codegen id : 16] Left keys [1]: [c_current_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#22] +Right keys [1]: [cd_demo_sk#16] Join condition: None (43) Project [codegen id : 16] -Output [5]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27] -Input [7]: [c_current_cdemo_sk#2, cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27] +Output [5]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21] +Input [7]: [c_current_cdemo_sk#2, cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21] (44) HashAggregate [codegen id : 16] -Input [5]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27] -Keys [5]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27] +Input [5]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21] +Keys [5]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#28] -Results [6]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, count#29] +Aggregate Attributes [1]: [count#22] +Results [6]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21, count#23] (45) Exchange -Input [6]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, count#29] -Arguments: hashpartitioning(cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, 5), ENSURE_REQUIREMENTS, [id=#30] +Input [6]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21, count#23] +Arguments: hashpartitioning(cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21, 5), ENSURE_REQUIREMENTS, [plan_id=7] (46) HashAggregate [codegen id : 17] -Input [6]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27, count#29] -Keys [5]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cd_purchase_estimate#26, cd_credit_rating#27] +Input [6]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21, count#23] +Keys [5]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#31] -Results [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, count(1)#31 AS cnt1#32, cd_purchase_estimate#26, count(1)#31 AS cnt2#33, cd_credit_rating#27, count(1)#31 AS cnt3#34] +Aggregate Attributes [1]: [count(1)#24] +Results [8]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, count(1)#24 AS cnt1#25, cd_purchase_estimate#20, count(1)#24 AS cnt2#26, cd_credit_rating#21, count(1)#24 AS cnt3#27] (47) TakeOrderedAndProject -Input [8]: [cd_gender#23, cd_marital_status#24, cd_education_status#25, cnt1#32, cd_purchase_estimate#26, cnt2#33, cd_credit_rating#27, cnt3#34] -Arguments: 100, [cd_gender#23 ASC NULLS FIRST, cd_marital_status#24 ASC NULLS FIRST, cd_education_status#25 ASC NULLS FIRST, cd_purchase_estimate#26 ASC NULLS FIRST, cd_credit_rating#27 ASC NULLS FIRST], [cd_gender#23, cd_marital_status#24, cd_education_status#25, cnt1#32, cd_purchase_estimate#26, cnt2#33, cd_credit_rating#27, cnt3#34] +Input [8]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cnt1#25, cd_purchase_estimate#20, cnt2#26, cd_credit_rating#21, cnt3#27] +Arguments: 100, [cd_gender#17 ASC NULLS FIRST, cd_marital_status#18 ASC NULLS FIRST, cd_education_status#19 ASC NULLS FIRST, cd_purchase_estimate#20 ASC NULLS FIRST, cd_credit_rating#21 ASC NULLS FIRST], [cd_gender#17, cd_marital_status#18, cd_education_status#19, cnt1#25, cd_purchase_estimate#20, cnt2#26, cd_credit_rating#21, cnt3#27] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7 +Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#5 IN dynamicpruning#6 BroadcastExchange (52) +- * Project (51) +- * Filter (50) @@ -270,29 +270,29 @@ BroadcastExchange (52) (48) Scan parquet default.date_dim -Output [3]: [d_date_sk#8, d_year#35, d_moy#36] +Output [3]: [d_date_sk#7, d_year#28, d_moy#29] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,6), IsNotNull(d_date_sk)] ReadSchema: struct (49) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#35, d_moy#36] +Input [3]: [d_date_sk#7, d_year#28, d_moy#29] (50) Filter [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#35, d_moy#36] -Condition : (((((isnotnull(d_year#35) AND isnotnull(d_moy#36)) AND (d_year#35 = 2001)) AND (d_moy#36 >= 4)) AND (d_moy#36 <= 6)) AND isnotnull(d_date_sk#8)) +Input [3]: [d_date_sk#7, d_year#28, d_moy#29] +Condition : (((((isnotnull(d_year#28) AND isnotnull(d_moy#29)) AND (d_year#28 = 2001)) AND (d_moy#29 >= 4)) AND (d_moy#29 <= 6)) AND isnotnull(d_date_sk#7)) (51) Project [codegen id : 1] -Output [1]: [d_date_sk#8] -Input [3]: [d_date_sk#8, d_year#35, d_moy#36] +Output [1]: [d_date_sk#7] +Input [3]: [d_date_sk#7, d_year#28, d_moy#29] (52) BroadcastExchange -Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#37] +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] -Subquery:2 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#11 IN dynamicpruning#7 +Subquery:2 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#9 IN dynamicpruning#6 -Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#15 IN dynamicpruning#7 +Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#12 IN dynamicpruning#6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt index f509fa18d6971..b51f1f102ed03 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt @@ -81,7 +81,7 @@ Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#7] (9) BroadcastExchange Input [1]: [ss_customer_sk#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (10) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#1] @@ -89,65 +89,65 @@ Right keys [1]: [ss_customer_sk#4] Join condition: None (11) Scan parquet default.web_sales -Output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Output [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#10), dynamicpruningexpression(ws_sold_date_sk#10 IN dynamicpruning#6)] +PartitionFilters: [isnotnull(ws_sold_date_sk#9), dynamicpruningexpression(ws_sold_date_sk#9 IN dynamicpruning#6)] ReadSchema: struct (12) ColumnarToRow [codegen id : 4] -Input [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Input [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9] (13) ReusedExchange [Reuses operator id: 47] -Output [1]: [d_date_sk#11] +Output [1]: [d_date_sk#10] (14) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ws_sold_date_sk#10] -Right keys [1]: [d_date_sk#11] +Left keys [1]: [ws_sold_date_sk#9] +Right keys [1]: [d_date_sk#10] Join condition: None (15) Project [codegen id : 4] -Output [1]: [ws_bill_customer_sk#9] -Input [3]: [ws_bill_customer_sk#9, ws_sold_date_sk#10, d_date_sk#11] +Output [1]: [ws_bill_customer_sk#8] +Input [3]: [ws_bill_customer_sk#8, ws_sold_date_sk#9, d_date_sk#10] (16) BroadcastExchange -Input [1]: [ws_bill_customer_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +Input [1]: [ws_bill_customer_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ws_bill_customer_sk#9] +Right keys [1]: [ws_bill_customer_sk#8] Join condition: None (18) Scan parquet default.catalog_sales -Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Output [2]: [cs_ship_customer_sk#11, cs_sold_date_sk#12] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#14), dynamicpruningexpression(cs_sold_date_sk#14 IN dynamicpruning#6)] +PartitionFilters: [isnotnull(cs_sold_date_sk#12), dynamicpruningexpression(cs_sold_date_sk#12 IN dynamicpruning#6)] ReadSchema: struct (19) ColumnarToRow [codegen id : 6] -Input [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Input [2]: [cs_ship_customer_sk#11, cs_sold_date_sk#12] (20) ReusedExchange [Reuses operator id: 47] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#13] (21) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_sold_date_sk#14] -Right keys [1]: [d_date_sk#15] +Left keys [1]: [cs_sold_date_sk#12] +Right keys [1]: [d_date_sk#13] Join condition: None (22) Project [codegen id : 6] -Output [1]: [cs_ship_customer_sk#13] -Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15] +Output [1]: [cs_ship_customer_sk#11] +Input [3]: [cs_ship_customer_sk#11, cs_sold_date_sk#12, d_date_sk#13] (23) BroadcastExchange -Input [1]: [cs_ship_customer_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] +Input [1]: [cs_ship_customer_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [cs_ship_customer_sk#13] +Right keys [1]: [cs_ship_customer_sk#11] Join condition: None (25) Project [codegen id : 9] @@ -155,84 +155,84 @@ Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] (26) Scan parquet default.customer_address -Output [2]: [ca_address_sk#17, ca_state#18] +Output [2]: [ca_address_sk#14, ca_state#15] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_state, [GA,KY,NM]), IsNotNull(ca_address_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 7] -Input [2]: [ca_address_sk#17, ca_state#18] +Input [2]: [ca_address_sk#14, ca_state#15] (28) Filter [codegen id : 7] -Input [2]: [ca_address_sk#17, ca_state#18] -Condition : (ca_state#18 IN (KY,GA,NM) AND isnotnull(ca_address_sk#17)) +Input [2]: [ca_address_sk#14, ca_state#15] +Condition : (ca_state#15 IN (KY,GA,NM) AND isnotnull(ca_address_sk#14)) (29) Project [codegen id : 7] -Output [1]: [ca_address_sk#17] -Input [2]: [ca_address_sk#17, ca_state#18] +Output [1]: [ca_address_sk#14] +Input [2]: [ca_address_sk#14, ca_state#15] (30) BroadcastExchange -Input [1]: [ca_address_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] +Input [1]: [ca_address_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (31) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_current_addr_sk#3] -Right keys [1]: [ca_address_sk#17] +Right keys [1]: [ca_address_sk#14] Join condition: None (32) Project [codegen id : 9] Output [1]: [c_current_cdemo_sk#2] -Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#17] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#14] (33) Scan parquet default.customer_demographics -Output [6]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Output [6]: [cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (34) ColumnarToRow [codegen id : 8] -Input [6]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Input [6]: [cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21] (35) Filter [codegen id : 8] -Input [6]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] -Condition : isnotnull(cd_demo_sk#20) +Input [6]: [cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21] +Condition : isnotnull(cd_demo_sk#16) (36) BroadcastExchange -Input [6]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] +Input [6]: [cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (37) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_current_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#20] +Right keys [1]: [cd_demo_sk#16] Join condition: None (38) Project [codegen id : 9] -Output [5]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] -Input [7]: [c_current_cdemo_sk#2, cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Output [5]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21] +Input [7]: [c_current_cdemo_sk#2, cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21] (39) HashAggregate [codegen id : 9] -Input [5]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] -Keys [5]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Input [5]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21] +Keys [5]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#27] -Results [6]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, count#28] +Aggregate Attributes [1]: [count#22] +Results [6]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21, count#23] (40) Exchange -Input [6]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, count#28] -Arguments: hashpartitioning(cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [6]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21, count#23] +Arguments: hashpartitioning(cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21, 5), ENSURE_REQUIREMENTS, [plan_id=6] (41) HashAggregate [codegen id : 10] -Input [6]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, count#28] -Keys [5]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25] +Input [6]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21, count#23] +Keys [5]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#30] -Results [8]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, count(1)#30 AS cnt1#31, cd_purchase_estimate#24, count(1)#30 AS cnt2#32, cd_credit_rating#25, count(1)#30 AS cnt3#33] +Aggregate Attributes [1]: [count(1)#24] +Results [8]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, count(1)#24 AS cnt1#25, cd_purchase_estimate#20, count(1)#24 AS cnt2#26, cd_credit_rating#21, count(1)#24 AS cnt3#27] (42) TakeOrderedAndProject -Input [8]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cnt1#31, cd_purchase_estimate#24, cnt2#32, cd_credit_rating#25, cnt3#33] -Arguments: 100, [cd_gender#21 ASC NULLS FIRST, cd_marital_status#22 ASC NULLS FIRST, cd_education_status#23 ASC NULLS FIRST, cd_purchase_estimate#24 ASC NULLS FIRST, cd_credit_rating#25 ASC NULLS FIRST], [cd_gender#21, cd_marital_status#22, cd_education_status#23, cnt1#31, cd_purchase_estimate#24, cnt2#32, cd_credit_rating#25, cnt3#33] +Input [8]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cnt1#25, cd_purchase_estimate#20, cnt2#26, cd_credit_rating#21, cnt3#27] +Arguments: 100, [cd_gender#17 ASC NULLS FIRST, cd_marital_status#18 ASC NULLS FIRST, cd_education_status#19 ASC NULLS FIRST, cd_purchase_estimate#20 ASC NULLS FIRST, cd_credit_rating#21 ASC NULLS FIRST], [cd_gender#17, cd_marital_status#18, cd_education_status#19, cnt1#25, cd_purchase_estimate#20, cnt2#26, cd_credit_rating#21, cnt3#27] ===== Subqueries ===== @@ -245,29 +245,29 @@ BroadcastExchange (47) (43) Scan parquet default.date_dim -Output [3]: [d_date_sk#7, d_year#34, d_moy#35] +Output [3]: [d_date_sk#7, d_year#28, d_moy#29] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,6), IsNotNull(d_date_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#34, d_moy#35] +Input [3]: [d_date_sk#7, d_year#28, d_moy#29] (45) Filter [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#34, d_moy#35] -Condition : (((((isnotnull(d_year#34) AND isnotnull(d_moy#35)) AND (d_year#34 = 2001)) AND (d_moy#35 >= 4)) AND (d_moy#35 <= 6)) AND isnotnull(d_date_sk#7)) +Input [3]: [d_date_sk#7, d_year#28, d_moy#29] +Condition : (((((isnotnull(d_year#28) AND isnotnull(d_moy#29)) AND (d_year#28 = 2001)) AND (d_moy#29 >= 4)) AND (d_moy#29 <= 6)) AND isnotnull(d_date_sk#7)) (46) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [3]: [d_date_sk#7, d_year#34, d_moy#35] +Input [3]: [d_date_sk#7, d_year#28, d_moy#29] (47) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] -Subquery:2 Hosting operator id = 11 Hosting Expression = ws_sold_date_sk#10 IN dynamicpruning#6 +Subquery:2 Hosting operator id = 11 Hosting Expression = ws_sold_date_sk#9 IN dynamicpruning#6 -Subquery:3 Hosting operator id = 18 Hosting Expression = cs_sold_date_sk#14 IN dynamicpruning#6 +Subquery:3 Hosting operator id = 18 Hosting Expression = cs_sold_date_sk#12 IN dynamicpruning#6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/explain.txt index 129cee0449f6a..4a0446c5c454b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/explain.txt @@ -66,7 +66,7 @@ Input [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_stat (8) BroadcastExchange Input [1]: [cd_demo_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_cdemo_sk#2] @@ -78,96 +78,96 @@ Output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sal Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#10] (11) Scan parquet default.promotion -Output [3]: [p_promo_sk#15, p_channel_email#16, p_channel_event#17] +Output [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16] Batched: true Location [not included in comparison]/{warehouse_dir}/promotion] PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [3]: [p_promo_sk#15, p_channel_email#16, p_channel_event#17] +Input [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16] (13) Filter [codegen id : 2] -Input [3]: [p_promo_sk#15, p_channel_email#16, p_channel_event#17] -Condition : (((p_channel_email#16 = N) OR (p_channel_event#17 = N)) AND isnotnull(p_promo_sk#15)) +Input [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16] +Condition : (((p_channel_email#15 = N) OR (p_channel_event#16 = N)) AND isnotnull(p_promo_sk#14)) (14) Project [codegen id : 2] -Output [1]: [p_promo_sk#15] -Input [3]: [p_promo_sk#15, p_channel_email#16, p_channel_event#17] +Output [1]: [p_promo_sk#14] +Input [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16] (15) BroadcastExchange -Input [1]: [p_promo_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [1]: [p_promo_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_promo_sk#3] -Right keys [1]: [p_promo_sk#15] +Right keys [1]: [p_promo_sk#14] Join condition: None (17) Project [codegen id : 5] Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] -Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, p_promo_sk#15] +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, p_promo_sk#14] (18) ReusedExchange [Reuses operator id: 35] -Output [1]: [d_date_sk#19] +Output [1]: [d_date_sk#17] (19) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#19] +Right keys [1]: [d_date_sk#17] Join condition: None (20) Project [codegen id : 5] Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#19] +Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#17] (21) Scan parquet default.item -Output [2]: [i_item_sk#20, i_item_id#21] +Output [2]: [i_item_sk#18, i_item_id#19] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (22) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#20, i_item_id#21] +Input [2]: [i_item_sk#18, i_item_id#19] (23) Filter [codegen id : 4] -Input [2]: [i_item_sk#20, i_item_id#21] -Condition : isnotnull(i_item_sk#20) +Input [2]: [i_item_sk#18, i_item_id#19] +Condition : isnotnull(i_item_sk#18) (24) BroadcastExchange -Input [2]: [i_item_sk#20, i_item_id#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#22] +Input [2]: [i_item_sk#18, i_item_id#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (25) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#20] +Right keys [1]: [i_item_sk#18] Join condition: None (26) Project [codegen id : 5] -Output [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#21] -Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#20, i_item_id#21] +Output [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19] +Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#18, i_item_id#19] (27) HashAggregate [codegen id : 5] -Input [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#21] -Keys [1]: [i_item_id#21] +Input [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#19] +Keys [1]: [i_item_id#19] Functions [4]: [partial_avg(ss_quantity#4), partial_avg(UnscaledValue(ss_list_price#5)), partial_avg(UnscaledValue(ss_coupon_amt#7)), partial_avg(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [8]: [sum#23, count#24, sum#25, count#26, sum#27, count#28, sum#29, count#30] -Results [9]: [i_item_id#21, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Aggregate Attributes [8]: [sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Results [9]: [i_item_id#19, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] (28) Exchange -Input [9]: [i_item_id#21, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] -Arguments: hashpartitioning(i_item_id#21, 5), ENSURE_REQUIREMENTS, [id=#39] +Input [9]: [i_item_id#19, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Arguments: hashpartitioning(i_item_id#19, 5), ENSURE_REQUIREMENTS, [plan_id=4] (29) HashAggregate [codegen id : 6] -Input [9]: [i_item_id#21, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] -Keys [1]: [i_item_id#21] +Input [9]: [i_item_id#19, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Keys [1]: [i_item_id#19] Functions [4]: [avg(ss_quantity#4), avg(UnscaledValue(ss_list_price#5)), avg(UnscaledValue(ss_coupon_amt#7)), avg(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [4]: [avg(ss_quantity#4)#40, avg(UnscaledValue(ss_list_price#5))#41, avg(UnscaledValue(ss_coupon_amt#7))#42, avg(UnscaledValue(ss_sales_price#6))#43] -Results [5]: [i_item_id#21, avg(ss_quantity#4)#40 AS agg1#44, cast((avg(UnscaledValue(ss_list_price#5))#41 / 100.0) as decimal(11,6)) AS agg2#45, cast((avg(UnscaledValue(ss_coupon_amt#7))#42 / 100.0) as decimal(11,6)) AS agg3#46, cast((avg(UnscaledValue(ss_sales_price#6))#43 / 100.0) as decimal(11,6)) AS agg4#47] +Aggregate Attributes [4]: [avg(ss_quantity#4)#36, avg(UnscaledValue(ss_list_price#5))#37, avg(UnscaledValue(ss_coupon_amt#7))#38, avg(UnscaledValue(ss_sales_price#6))#39] +Results [5]: [i_item_id#19, avg(ss_quantity#4)#36 AS agg1#40, cast((avg(UnscaledValue(ss_list_price#5))#37 / 100.0) as decimal(11,6)) AS agg2#41, cast((avg(UnscaledValue(ss_coupon_amt#7))#38 / 100.0) as decimal(11,6)) AS agg3#42, cast((avg(UnscaledValue(ss_sales_price#6))#39 / 100.0) as decimal(11,6)) AS agg4#43] (30) TakeOrderedAndProject -Input [5]: [i_item_id#21, agg1#44, agg2#45, agg3#46, agg4#47] -Arguments: 100, [i_item_id#21 ASC NULLS FIRST], [i_item_id#21, agg1#44, agg2#45, agg3#46, agg4#47] +Input [5]: [i_item_id#19, agg1#40, agg2#41, agg3#42, agg4#43] +Arguments: 100, [i_item_id#19 ASC NULLS FIRST], [i_item_id#19, agg1#40, agg2#41, agg3#42, agg4#43] ===== Subqueries ===== @@ -180,25 +180,25 @@ BroadcastExchange (35) (31) Scan parquet default.date_dim -Output [2]: [d_date_sk#19, d_year#48] +Output [2]: [d_date_sk#17, d_year#44] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#19, d_year#48] +Input [2]: [d_date_sk#17, d_year#44] (33) Filter [codegen id : 1] -Input [2]: [d_date_sk#19, d_year#48] -Condition : ((isnotnull(d_year#48) AND (d_year#48 = 2000)) AND isnotnull(d_date_sk#19)) +Input [2]: [d_date_sk#17, d_year#44] +Condition : ((isnotnull(d_year#44) AND (d_year#44 = 2000)) AND isnotnull(d_date_sk#17)) (34) Project [codegen id : 1] -Output [1]: [d_date_sk#19] -Input [2]: [d_date_sk#19, d_year#48] +Output [1]: [d_date_sk#17] +Input [2]: [d_date_sk#17, d_year#44] (35) BroadcastExchange -Input [1]: [d_date_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#49] +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt index d01608cf02add..8a3e7d3fefaea 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt @@ -66,7 +66,7 @@ Input [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_stat (8) BroadcastExchange Input [1]: [cd_demo_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_cdemo_sk#2] @@ -78,96 +78,96 @@ Output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sal Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#10] (11) ReusedExchange [Reuses operator id: 35] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (13) Project [codegen id : 5] Output [6]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#15] +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14] (14) Scan parquet default.item -Output [2]: [i_item_sk#16, i_item_id#17] +Output [2]: [i_item_sk#15, i_item_id#16] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [2]: [i_item_sk#16, i_item_id#17] +Input [2]: [i_item_sk#15, i_item_id#16] (16) Filter [codegen id : 3] -Input [2]: [i_item_sk#16, i_item_id#17] -Condition : isnotnull(i_item_sk#16) +Input [2]: [i_item_sk#15, i_item_id#16] +Condition : isnotnull(i_item_sk#15) (17) BroadcastExchange -Input [2]: [i_item_sk#16, i_item_id#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] +Input [2]: [i_item_sk#15, i_item_id#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#16] +Right keys [1]: [i_item_sk#15] Join condition: None (19) Project [codegen id : 5] -Output [6]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#17] -Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#16, i_item_id#17] +Output [6]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#15, i_item_id#16] (20) Scan parquet default.promotion -Output [3]: [p_promo_sk#19, p_channel_email#20, p_channel_event#21] +Output [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] Batched: true Location [not included in comparison]/{warehouse_dir}/promotion] PushedFilters: [Or(EqualTo(p_channel_email,N),EqualTo(p_channel_event,N)), IsNotNull(p_promo_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 4] -Input [3]: [p_promo_sk#19, p_channel_email#20, p_channel_event#21] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] (22) Filter [codegen id : 4] -Input [3]: [p_promo_sk#19, p_channel_email#20, p_channel_event#21] -Condition : (((p_channel_email#20 = N) OR (p_channel_event#21 = N)) AND isnotnull(p_promo_sk#19)) +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] +Condition : (((p_channel_email#18 = N) OR (p_channel_event#19 = N)) AND isnotnull(p_promo_sk#17)) (23) Project [codegen id : 4] -Output [1]: [p_promo_sk#19] -Input [3]: [p_promo_sk#19, p_channel_email#20, p_channel_event#21] +Output [1]: [p_promo_sk#17] +Input [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19] (24) BroadcastExchange -Input [1]: [p_promo_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Input [1]: [p_promo_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (25) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_promo_sk#3] -Right keys [1]: [p_promo_sk#19] +Right keys [1]: [p_promo_sk#17] Join condition: None (26) Project [codegen id : 5] -Output [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#17] -Input [7]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#17, p_promo_sk#19] +Output [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Input [7]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16, p_promo_sk#17] (27) HashAggregate [codegen id : 5] -Input [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#17] -Keys [1]: [i_item_id#17] +Input [5]: [ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16] +Keys [1]: [i_item_id#16] Functions [4]: [partial_avg(ss_quantity#4), partial_avg(UnscaledValue(ss_list_price#5)), partial_avg(UnscaledValue(ss_coupon_amt#7)), partial_avg(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [8]: [sum#23, count#24, sum#25, count#26, sum#27, count#28, sum#29, count#30] -Results [9]: [i_item_id#17, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Aggregate Attributes [8]: [sum#20, count#21, sum#22, count#23, sum#24, count#25, sum#26, count#27] +Results [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] (28) Exchange -Input [9]: [i_item_id#17, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] -Arguments: hashpartitioning(i_item_id#17, 5), ENSURE_REQUIREMENTS, [id=#39] +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Arguments: hashpartitioning(i_item_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] (29) HashAggregate [codegen id : 6] -Input [9]: [i_item_id#17, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] -Keys [1]: [i_item_id#17] +Input [9]: [i_item_id#16, sum#28, count#29, sum#30, count#31, sum#32, count#33, sum#34, count#35] +Keys [1]: [i_item_id#16] Functions [4]: [avg(ss_quantity#4), avg(UnscaledValue(ss_list_price#5)), avg(UnscaledValue(ss_coupon_amt#7)), avg(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [4]: [avg(ss_quantity#4)#40, avg(UnscaledValue(ss_list_price#5))#41, avg(UnscaledValue(ss_coupon_amt#7))#42, avg(UnscaledValue(ss_sales_price#6))#43] -Results [5]: [i_item_id#17, avg(ss_quantity#4)#40 AS agg1#44, cast((avg(UnscaledValue(ss_list_price#5))#41 / 100.0) as decimal(11,6)) AS agg2#45, cast((avg(UnscaledValue(ss_coupon_amt#7))#42 / 100.0) as decimal(11,6)) AS agg3#46, cast((avg(UnscaledValue(ss_sales_price#6))#43 / 100.0) as decimal(11,6)) AS agg4#47] +Aggregate Attributes [4]: [avg(ss_quantity#4)#36, avg(UnscaledValue(ss_list_price#5))#37, avg(UnscaledValue(ss_coupon_amt#7))#38, avg(UnscaledValue(ss_sales_price#6))#39] +Results [5]: [i_item_id#16, avg(ss_quantity#4)#36 AS agg1#40, cast((avg(UnscaledValue(ss_list_price#5))#37 / 100.0) as decimal(11,6)) AS agg2#41, cast((avg(UnscaledValue(ss_coupon_amt#7))#38 / 100.0) as decimal(11,6)) AS agg3#42, cast((avg(UnscaledValue(ss_sales_price#6))#39 / 100.0) as decimal(11,6)) AS agg4#43] (30) TakeOrderedAndProject -Input [5]: [i_item_id#17, agg1#44, agg2#45, agg3#46, agg4#47] -Arguments: 100, [i_item_id#17 ASC NULLS FIRST], [i_item_id#17, agg1#44, agg2#45, agg3#46, agg4#47] +Input [5]: [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] +Arguments: 100, [i_item_id#16 ASC NULLS FIRST], [i_item_id#16, agg1#40, agg2#41, agg3#42, agg4#43] ===== Subqueries ===== @@ -180,25 +180,25 @@ BroadcastExchange (35) (31) Scan parquet default.date_dim -Output [2]: [d_date_sk#15, d_year#48] +Output [2]: [d_date_sk#14, d_year#44] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#48] +Input [2]: [d_date_sk#14, d_year#44] (33) Filter [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#48] -Condition : ((isnotnull(d_year#48) AND (d_year#48 = 2000)) AND isnotnull(d_date_sk#15)) +Input [2]: [d_date_sk#14, d_year#44] +Condition : ((isnotnull(d_year#44) AND (d_year#44 = 2000)) AND isnotnull(d_date_sk#14)) (34) Project [codegen id : 1] -Output [1]: [d_date_sk#15] -Input [2]: [d_date_sk#15, d_year#48] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#44] (35) BroadcastExchange -Input [1]: [d_date_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#49] +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/explain.txt index f18d02d50e0af..a417a39c633bc 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/explain.txt @@ -127,7 +127,7 @@ Condition : isnotnull(s_store_sk#13) (19) BroadcastExchange Input [2]: [s_store_sk#13, s_state#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (20) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#9] @@ -142,39 +142,39 @@ Input [4]: [ss_store_sk#9, ss_net_profit#10, s_store_sk#13, s_state#14] Input [2]: [ss_net_profit#10, s_state#14] Keys [1]: [s_state#14] Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#10))] -Aggregate Attributes [1]: [sum#16] -Results [2]: [s_state#14, sum#17] +Aggregate Attributes [1]: [sum#15] +Results [2]: [s_state#14, sum#16] (23) Exchange -Input [2]: [s_state#14, sum#17] -Arguments: hashpartitioning(s_state#14, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [2]: [s_state#14, sum#16] +Arguments: hashpartitioning(s_state#14, 5), ENSURE_REQUIREMENTS, [plan_id=2] (24) HashAggregate [codegen id : 5] -Input [2]: [s_state#14, sum#17] +Input [2]: [s_state#14, sum#16] Keys [1]: [s_state#14] Functions [1]: [sum(UnscaledValue(ss_net_profit#10))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#10))#19] -Results [3]: [s_state#14, s_state#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#19,17,2) AS _w2#20] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#10))#17] +Results [3]: [s_state#14, s_state#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#17,17,2) AS _w2#18] (25) Sort [codegen id : 5] -Input [3]: [s_state#14, s_state#14, _w2#20] -Arguments: [s_state#14 ASC NULLS FIRST, _w2#20 DESC NULLS LAST], false, 0 +Input [3]: [s_state#14, s_state#14, _w2#18] +Arguments: [s_state#14 ASC NULLS FIRST, _w2#18 DESC NULLS LAST], false, 0 (26) Window -Input [3]: [s_state#14, s_state#14, _w2#20] -Arguments: [rank(_w2#20) windowspecdefinition(s_state#14, _w2#20 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#21], [s_state#14], [_w2#20 DESC NULLS LAST] +Input [3]: [s_state#14, s_state#14, _w2#18] +Arguments: [rank(_w2#18) windowspecdefinition(s_state#14, _w2#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#19], [s_state#14], [_w2#18 DESC NULLS LAST] (27) Filter [codegen id : 6] -Input [4]: [s_state#14, s_state#14, _w2#20, ranking#21] -Condition : (ranking#21 <= 5) +Input [4]: [s_state#14, s_state#14, _w2#18, ranking#19] +Condition : (ranking#19 <= 5) (28) Project [codegen id : 6] Output [1]: [s_state#14] -Input [4]: [s_state#14, s_state#14, _w2#20, ranking#21] +Input [4]: [s_state#14, s_state#14, _w2#18, ranking#19] (29) BroadcastExchange Input [1]: [s_state#14] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#22] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=3] (30) BroadcastHashJoin [codegen id : 7] Left keys [1]: [s_state#8] @@ -183,7 +183,7 @@ Join condition: None (31) BroadcastExchange Input [3]: [s_store_sk#6, s_county#7, s_state#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (32) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_store_sk#1] @@ -196,45 +196,45 @@ Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#6, s_county#7, s_state#8] (34) Expand [codegen id : 8] Input [3]: [ss_net_profit#2, s_state#8, s_county#7] -Arguments: [[ss_net_profit#2, s_state#8, s_county#7, 0], [ss_net_profit#2, s_state#8, null, 1], [ss_net_profit#2, null, null, 3]], [ss_net_profit#2, s_state#24, s_county#25, spark_grouping_id#26] +Arguments: [[ss_net_profit#2, s_state#8, s_county#7, 0], [ss_net_profit#2, s_state#8, null, 1], [ss_net_profit#2, null, null, 3]], [ss_net_profit#2, s_state#20, s_county#21, spark_grouping_id#22] (35) HashAggregate [codegen id : 8] -Input [4]: [ss_net_profit#2, s_state#24, s_county#25, spark_grouping_id#26] -Keys [3]: [s_state#24, s_county#25, spark_grouping_id#26] +Input [4]: [ss_net_profit#2, s_state#20, s_county#21, spark_grouping_id#22] +Keys [3]: [s_state#20, s_county#21, spark_grouping_id#22] Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum#27] -Results [4]: [s_state#24, s_county#25, spark_grouping_id#26, sum#28] +Aggregate Attributes [1]: [sum#23] +Results [4]: [s_state#20, s_county#21, spark_grouping_id#22, sum#24] (36) Exchange -Input [4]: [s_state#24, s_county#25, spark_grouping_id#26, sum#28] -Arguments: hashpartitioning(s_state#24, s_county#25, spark_grouping_id#26, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [4]: [s_state#20, s_county#21, spark_grouping_id#22, sum#24] +Arguments: hashpartitioning(s_state#20, s_county#21, spark_grouping_id#22, 5), ENSURE_REQUIREMENTS, [plan_id=5] (37) HashAggregate [codegen id : 9] -Input [4]: [s_state#24, s_county#25, spark_grouping_id#26, sum#28] -Keys [3]: [s_state#24, s_county#25, spark_grouping_id#26] +Input [4]: [s_state#20, s_county#21, spark_grouping_id#22, sum#24] +Keys [3]: [s_state#20, s_county#21, spark_grouping_id#22] Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#30] -Results [7]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#30,17,2) AS total_sum#31, s_state#24, s_county#25, (cast((shiftright(spark_grouping_id#26, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#26, 0) & 1) as tinyint)) AS lochierarchy#32, (cast((shiftright(spark_grouping_id#26, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#26, 0) & 1) as tinyint)) AS _w1#33, CASE WHEN (cast((shiftright(spark_grouping_id#26, 0) & 1) as tinyint) = 0) THEN s_state#24 END AS _w2#34, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#30,17,2) AS _w3#35] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#25] +Results [7]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS total_sum#26, s_state#20, s_county#21, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS lochierarchy#27, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS _w1#28, CASE WHEN (cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint) = 0) THEN s_state#20 END AS _w2#29, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS _w3#30] (38) Exchange -Input [7]: [total_sum#31, s_state#24, s_county#25, lochierarchy#32, _w1#33, _w2#34, _w3#35] -Arguments: hashpartitioning(_w1#33, _w2#34, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w1#28, _w2#29, _w3#30] +Arguments: hashpartitioning(_w1#28, _w2#29, 5), ENSURE_REQUIREMENTS, [plan_id=6] (39) Sort [codegen id : 10] -Input [7]: [total_sum#31, s_state#24, s_county#25, lochierarchy#32, _w1#33, _w2#34, _w3#35] -Arguments: [_w1#33 ASC NULLS FIRST, _w2#34 ASC NULLS FIRST, _w3#35 DESC NULLS LAST], false, 0 +Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w1#28, _w2#29, _w3#30] +Arguments: [_w1#28 ASC NULLS FIRST, _w2#29 ASC NULLS FIRST, _w3#30 DESC NULLS LAST], false, 0 (40) Window -Input [7]: [total_sum#31, s_state#24, s_county#25, lochierarchy#32, _w1#33, _w2#34, _w3#35] -Arguments: [rank(_w3#35) windowspecdefinition(_w1#33, _w2#34, _w3#35 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#37], [_w1#33, _w2#34], [_w3#35 DESC NULLS LAST] +Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w1#28, _w2#29, _w3#30] +Arguments: [rank(_w3#30) windowspecdefinition(_w1#28, _w2#29, _w3#30 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#31], [_w1#28, _w2#29], [_w3#30 DESC NULLS LAST] (41) Project [codegen id : 11] -Output [5]: [total_sum#31, s_state#24, s_county#25, lochierarchy#32, rank_within_parent#37] -Input [8]: [total_sum#31, s_state#24, s_county#25, lochierarchy#32, _w1#33, _w2#34, _w3#35, rank_within_parent#37] +Output [5]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31] +Input [8]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w1#28, _w2#29, _w3#30, rank_within_parent#31] (42) TakeOrderedAndProject -Input [5]: [total_sum#31, s_state#24, s_county#25, lochierarchy#32, rank_within_parent#37] -Arguments: 100, [lochierarchy#32 DESC NULLS LAST, CASE WHEN (lochierarchy#32 = 0) THEN s_state#24 END ASC NULLS FIRST, rank_within_parent#37 ASC NULLS FIRST], [total_sum#31, s_state#24, s_county#25, lochierarchy#32, rank_within_parent#37] +Input [5]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31] +Arguments: 100, [lochierarchy#27 DESC NULLS LAST, CASE WHEN (lochierarchy#27 = 0) THEN s_state#20 END ASC NULLS FIRST, rank_within_parent#31 ASC NULLS FIRST], [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31] ===== Subqueries ===== @@ -247,26 +247,26 @@ BroadcastExchange (47) (43) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_month_seq#38] +Output [2]: [d_date_sk#5, d_month_seq#32] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#38] +Input [2]: [d_date_sk#5, d_month_seq#32] (45) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#38] -Condition : (((isnotnull(d_month_seq#38) AND (d_month_seq#38 >= 1200)) AND (d_month_seq#38 <= 1211)) AND isnotnull(d_date_sk#5)) +Input [2]: [d_date_sk#5, d_month_seq#32] +Condition : (((isnotnull(d_month_seq#32) AND (d_month_seq#32 >= 1200)) AND (d_month_seq#32 <= 1211)) AND isnotnull(d_date_sk#5)) (46) Project [codegen id : 1] Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_month_seq#38] +Input [2]: [d_date_sk#5, d_month_seq#32] (47) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] Subquery:2 Hosting operator id = 10 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt index 482d7a3975c56..92cae735b8b72 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt @@ -115,7 +115,7 @@ Condition : isnotnull(s_store_sk#12) (16) BroadcastExchange Input [2]: [s_store_sk#12, s_state#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#9] @@ -127,54 +127,54 @@ Output [3]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13] Input [5]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11, s_store_sk#12, s_state#13] (19) ReusedExchange [Reuses operator id: 47] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (20) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_sold_date_sk#11] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (21) Project [codegen id : 4] Output [2]: [ss_net_profit#10, s_state#13] -Input [4]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13, d_date_sk#15] +Input [4]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13, d_date_sk#14] (22) HashAggregate [codegen id : 4] Input [2]: [ss_net_profit#10, s_state#13] Keys [1]: [s_state#13] Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#10))] -Aggregate Attributes [1]: [sum#16] -Results [2]: [s_state#13, sum#17] +Aggregate Attributes [1]: [sum#15] +Results [2]: [s_state#13, sum#16] (23) Exchange -Input [2]: [s_state#13, sum#17] -Arguments: hashpartitioning(s_state#13, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [2]: [s_state#13, sum#16] +Arguments: hashpartitioning(s_state#13, 5), ENSURE_REQUIREMENTS, [plan_id=2] (24) HashAggregate [codegen id : 5] -Input [2]: [s_state#13, sum#17] +Input [2]: [s_state#13, sum#16] Keys [1]: [s_state#13] Functions [1]: [sum(UnscaledValue(ss_net_profit#10))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#10))#19] -Results [3]: [s_state#13, s_state#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#19,17,2) AS _w2#20] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#10))#17] +Results [3]: [s_state#13, s_state#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#17,17,2) AS _w2#18] (25) Sort [codegen id : 5] -Input [3]: [s_state#13, s_state#13, _w2#20] -Arguments: [s_state#13 ASC NULLS FIRST, _w2#20 DESC NULLS LAST], false, 0 +Input [3]: [s_state#13, s_state#13, _w2#18] +Arguments: [s_state#13 ASC NULLS FIRST, _w2#18 DESC NULLS LAST], false, 0 (26) Window -Input [3]: [s_state#13, s_state#13, _w2#20] -Arguments: [rank(_w2#20) windowspecdefinition(s_state#13, _w2#20 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#21], [s_state#13], [_w2#20 DESC NULLS LAST] +Input [3]: [s_state#13, s_state#13, _w2#18] +Arguments: [rank(_w2#18) windowspecdefinition(s_state#13, _w2#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#19], [s_state#13], [_w2#18 DESC NULLS LAST] (27) Filter [codegen id : 6] -Input [4]: [s_state#13, s_state#13, _w2#20, ranking#21] -Condition : (ranking#21 <= 5) +Input [4]: [s_state#13, s_state#13, _w2#18, ranking#19] +Condition : (ranking#19 <= 5) (28) Project [codegen id : 6] Output [1]: [s_state#13] -Input [4]: [s_state#13, s_state#13, _w2#20, ranking#21] +Input [4]: [s_state#13, s_state#13, _w2#18, ranking#19] (29) BroadcastExchange Input [1]: [s_state#13] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#22] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=3] (30) BroadcastHashJoin [codegen id : 7] Left keys [1]: [s_state#8] @@ -183,7 +183,7 @@ Join condition: None (31) BroadcastExchange Input [3]: [s_store_sk#6, s_county#7, s_state#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (32) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_store_sk#1] @@ -196,45 +196,45 @@ Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#6, s_county#7, s_state#8] (34) Expand [codegen id : 8] Input [3]: [ss_net_profit#2, s_state#8, s_county#7] -Arguments: [[ss_net_profit#2, s_state#8, s_county#7, 0], [ss_net_profit#2, s_state#8, null, 1], [ss_net_profit#2, null, null, 3]], [ss_net_profit#2, s_state#24, s_county#25, spark_grouping_id#26] +Arguments: [[ss_net_profit#2, s_state#8, s_county#7, 0], [ss_net_profit#2, s_state#8, null, 1], [ss_net_profit#2, null, null, 3]], [ss_net_profit#2, s_state#20, s_county#21, spark_grouping_id#22] (35) HashAggregate [codegen id : 8] -Input [4]: [ss_net_profit#2, s_state#24, s_county#25, spark_grouping_id#26] -Keys [3]: [s_state#24, s_county#25, spark_grouping_id#26] +Input [4]: [ss_net_profit#2, s_state#20, s_county#21, spark_grouping_id#22] +Keys [3]: [s_state#20, s_county#21, spark_grouping_id#22] Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum#27] -Results [4]: [s_state#24, s_county#25, spark_grouping_id#26, sum#28] +Aggregate Attributes [1]: [sum#23] +Results [4]: [s_state#20, s_county#21, spark_grouping_id#22, sum#24] (36) Exchange -Input [4]: [s_state#24, s_county#25, spark_grouping_id#26, sum#28] -Arguments: hashpartitioning(s_state#24, s_county#25, spark_grouping_id#26, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [4]: [s_state#20, s_county#21, spark_grouping_id#22, sum#24] +Arguments: hashpartitioning(s_state#20, s_county#21, spark_grouping_id#22, 5), ENSURE_REQUIREMENTS, [plan_id=5] (37) HashAggregate [codegen id : 9] -Input [4]: [s_state#24, s_county#25, spark_grouping_id#26, sum#28] -Keys [3]: [s_state#24, s_county#25, spark_grouping_id#26] +Input [4]: [s_state#20, s_county#21, spark_grouping_id#22, sum#24] +Keys [3]: [s_state#20, s_county#21, spark_grouping_id#22] Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#30] -Results [7]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#30,17,2) AS total_sum#31, s_state#24, s_county#25, (cast((shiftright(spark_grouping_id#26, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#26, 0) & 1) as tinyint)) AS lochierarchy#32, (cast((shiftright(spark_grouping_id#26, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#26, 0) & 1) as tinyint)) AS _w1#33, CASE WHEN (cast((shiftright(spark_grouping_id#26, 0) & 1) as tinyint) = 0) THEN s_state#24 END AS _w2#34, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#30,17,2) AS _w3#35] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#25] +Results [7]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS total_sum#26, s_state#20, s_county#21, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS lochierarchy#27, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS _w1#28, CASE WHEN (cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint) = 0) THEN s_state#20 END AS _w2#29, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS _w3#30] (38) Exchange -Input [7]: [total_sum#31, s_state#24, s_county#25, lochierarchy#32, _w1#33, _w2#34, _w3#35] -Arguments: hashpartitioning(_w1#33, _w2#34, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w1#28, _w2#29, _w3#30] +Arguments: hashpartitioning(_w1#28, _w2#29, 5), ENSURE_REQUIREMENTS, [plan_id=6] (39) Sort [codegen id : 10] -Input [7]: [total_sum#31, s_state#24, s_county#25, lochierarchy#32, _w1#33, _w2#34, _w3#35] -Arguments: [_w1#33 ASC NULLS FIRST, _w2#34 ASC NULLS FIRST, _w3#35 DESC NULLS LAST], false, 0 +Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w1#28, _w2#29, _w3#30] +Arguments: [_w1#28 ASC NULLS FIRST, _w2#29 ASC NULLS FIRST, _w3#30 DESC NULLS LAST], false, 0 (40) Window -Input [7]: [total_sum#31, s_state#24, s_county#25, lochierarchy#32, _w1#33, _w2#34, _w3#35] -Arguments: [rank(_w3#35) windowspecdefinition(_w1#33, _w2#34, _w3#35 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#37], [_w1#33, _w2#34], [_w3#35 DESC NULLS LAST] +Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w1#28, _w2#29, _w3#30] +Arguments: [rank(_w3#30) windowspecdefinition(_w1#28, _w2#29, _w3#30 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#31], [_w1#28, _w2#29], [_w3#30 DESC NULLS LAST] (41) Project [codegen id : 11] -Output [5]: [total_sum#31, s_state#24, s_county#25, lochierarchy#32, rank_within_parent#37] -Input [8]: [total_sum#31, s_state#24, s_county#25, lochierarchy#32, _w1#33, _w2#34, _w3#35, rank_within_parent#37] +Output [5]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31] +Input [8]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w1#28, _w2#29, _w3#30, rank_within_parent#31] (42) TakeOrderedAndProject -Input [5]: [total_sum#31, s_state#24, s_county#25, lochierarchy#32, rank_within_parent#37] -Arguments: 100, [lochierarchy#32 DESC NULLS LAST, CASE WHEN (lochierarchy#32 = 0) THEN s_state#24 END ASC NULLS FIRST, rank_within_parent#37 ASC NULLS FIRST], [total_sum#31, s_state#24, s_county#25, lochierarchy#32, rank_within_parent#37] +Input [5]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31] +Arguments: 100, [lochierarchy#27 DESC NULLS LAST, CASE WHEN (lochierarchy#27 = 0) THEN s_state#20 END ASC NULLS FIRST, rank_within_parent#31 ASC NULLS FIRST], [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31] ===== Subqueries ===== @@ -247,26 +247,26 @@ BroadcastExchange (47) (43) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_month_seq#38] +Output [2]: [d_date_sk#5, d_month_seq#32] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#38] +Input [2]: [d_date_sk#5, d_month_seq#32] (45) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#38] -Condition : (((isnotnull(d_month_seq#38) AND (d_month_seq#38 >= 1200)) AND (d_month_seq#38 <= 1211)) AND isnotnull(d_date_sk#5)) +Input [2]: [d_date_sk#5, d_month_seq#32] +Condition : (((isnotnull(d_month_seq#32) AND (d_month_seq#32 >= 1200)) AND (d_month_seq#32 <= 1211)) AND isnotnull(d_date_sk#5)) (46) Project [codegen id : 1] Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_month_seq#38] +Input [2]: [d_date_sk#5, d_month_seq#32] (47) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] Subquery:2 Hosting operator id = 10 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/explain.txt index 2977c4d96c94a..1eb2e39dc5bb9 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/explain.txt @@ -59,160 +59,160 @@ Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] (5) BroadcastExchange Input [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (6) Scan parquet default.web_sales -Output [4]: [ws_sold_time_sk#6, ws_item_sk#7, ws_ext_sales_price#8, ws_sold_date_sk#9] +Output [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#9), dynamicpruningexpression(ws_sold_date_sk#9 IN dynamicpruning#10)] +PartitionFilters: [isnotnull(ws_sold_date_sk#8), dynamicpruningexpression(ws_sold_date_sk#8 IN dynamicpruning#9)] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_time_sk)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [4]: [ws_sold_time_sk#6, ws_item_sk#7, ws_ext_sales_price#8, ws_sold_date_sk#9] +Input [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] (8) Filter [codegen id : 3] -Input [4]: [ws_sold_time_sk#6, ws_item_sk#7, ws_ext_sales_price#8, ws_sold_date_sk#9] -Condition : (isnotnull(ws_item_sk#7) AND isnotnull(ws_sold_time_sk#6)) +Input [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] +Condition : (isnotnull(ws_item_sk#6) AND isnotnull(ws_sold_time_sk#5)) (9) ReusedExchange [Reuses operator id: 43] -Output [1]: [d_date_sk#11] +Output [1]: [d_date_sk#10] (10) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ws_sold_date_sk#9] -Right keys [1]: [d_date_sk#11] +Left keys [1]: [ws_sold_date_sk#8] +Right keys [1]: [d_date_sk#10] Join condition: None (11) Project [codegen id : 3] -Output [3]: [ws_ext_sales_price#8 AS ext_price#12, ws_item_sk#7 AS sold_item_sk#13, ws_sold_time_sk#6 AS time_sk#14] -Input [5]: [ws_sold_time_sk#6, ws_item_sk#7, ws_ext_sales_price#8, ws_sold_date_sk#9, d_date_sk#11] +Output [3]: [ws_ext_sales_price#7 AS ext_price#11, ws_item_sk#6 AS sold_item_sk#12, ws_sold_time_sk#5 AS time_sk#13] +Input [5]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8, d_date_sk#10] (12) Scan parquet default.catalog_sales -Output [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Output [4]: [cs_sold_time_sk#14, cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#18), dynamicpruningexpression(cs_sold_date_sk#18 IN dynamicpruning#10)] +PartitionFilters: [isnotnull(cs_sold_date_sk#17), dynamicpruningexpression(cs_sold_date_sk#17 IN dynamicpruning#9)] PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_time_sk)] ReadSchema: struct (13) ColumnarToRow [codegen id : 5] -Input [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Input [4]: [cs_sold_time_sk#14, cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17] (14) Filter [codegen id : 5] -Input [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] -Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_sold_time_sk#15)) +Input [4]: [cs_sold_time_sk#14, cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17] +Condition : (isnotnull(cs_item_sk#15) AND isnotnull(cs_sold_time_sk#14)) (15) ReusedExchange [Reuses operator id: 43] -Output [1]: [d_date_sk#19] +Output [1]: [d_date_sk#18] (16) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_sold_date_sk#18] -Right keys [1]: [d_date_sk#19] +Left keys [1]: [cs_sold_date_sk#17] +Right keys [1]: [d_date_sk#18] Join condition: None (17) Project [codegen id : 5] -Output [3]: [cs_ext_sales_price#17 AS ext_price#20, cs_item_sk#16 AS sold_item_sk#21, cs_sold_time_sk#15 AS time_sk#22] -Input [5]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18, d_date_sk#19] +Output [3]: [cs_ext_sales_price#16 AS ext_price#19, cs_item_sk#15 AS sold_item_sk#20, cs_sold_time_sk#14 AS time_sk#21] +Input [5]: [cs_sold_time_sk#14, cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17, d_date_sk#18] (18) Scan parquet default.store_sales -Output [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Output [4]: [ss_sold_time_sk#22, ss_item_sk#23, ss_ext_sales_price#24, ss_sold_date_sk#25] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#26), dynamicpruningexpression(ss_sold_date_sk#26 IN dynamicpruning#10)] +PartitionFilters: [isnotnull(ss_sold_date_sk#25), dynamicpruningexpression(ss_sold_date_sk#25 IN dynamicpruning#9)] PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_time_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 7] -Input [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Input [4]: [ss_sold_time_sk#22, ss_item_sk#23, ss_ext_sales_price#24, ss_sold_date_sk#25] (20) Filter [codegen id : 7] -Input [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] -Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_time_sk#23)) +Input [4]: [ss_sold_time_sk#22, ss_item_sk#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Condition : (isnotnull(ss_item_sk#23) AND isnotnull(ss_sold_time_sk#22)) (21) ReusedExchange [Reuses operator id: 43] -Output [1]: [d_date_sk#27] +Output [1]: [d_date_sk#26] (22) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [ss_sold_date_sk#26] -Right keys [1]: [d_date_sk#27] +Left keys [1]: [ss_sold_date_sk#25] +Right keys [1]: [d_date_sk#26] Join condition: None (23) Project [codegen id : 7] -Output [3]: [ss_ext_sales_price#25 AS ext_price#28, ss_item_sk#24 AS sold_item_sk#29, ss_sold_time_sk#23 AS time_sk#30] -Input [5]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26, d_date_sk#27] +Output [3]: [ss_ext_sales_price#24 AS ext_price#27, ss_item_sk#23 AS sold_item_sk#28, ss_sold_time_sk#22 AS time_sk#29] +Input [5]: [ss_sold_time_sk#22, ss_item_sk#23, ss_ext_sales_price#24, ss_sold_date_sk#25, d_date_sk#26] (24) Union (25) BroadcastHashJoin [codegen id : 9] Left keys [1]: [i_item_sk#1] -Right keys [1]: [sold_item_sk#13] +Right keys [1]: [sold_item_sk#12] Join condition: None (26) Project [codegen id : 9] -Output [4]: [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14] -Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, ext_price#12, sold_item_sk#13, time_sk#14] +Output [4]: [i_brand_id#2, i_brand#3, ext_price#11, time_sk#13] +Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, ext_price#11, sold_item_sk#12, time_sk#13] (27) Scan parquet default.time_dim -Output [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Output [4]: [t_time_sk#30, t_hour#31, t_minute#32, t_meal_time#33] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [Or(EqualTo(t_meal_time,breakfast ),EqualTo(t_meal_time,dinner )), IsNotNull(t_time_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 8] -Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Input [4]: [t_time_sk#30, t_hour#31, t_minute#32, t_meal_time#33] (29) Filter [codegen id : 8] -Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] -Condition : (((t_meal_time#34 = breakfast ) OR (t_meal_time#34 = dinner )) AND isnotnull(t_time_sk#31)) +Input [4]: [t_time_sk#30, t_hour#31, t_minute#32, t_meal_time#33] +Condition : (((t_meal_time#33 = breakfast ) OR (t_meal_time#33 = dinner )) AND isnotnull(t_time_sk#30)) (30) Project [codegen id : 8] -Output [3]: [t_time_sk#31, t_hour#32, t_minute#33] -Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Output [3]: [t_time_sk#30, t_hour#31, t_minute#32] +Input [4]: [t_time_sk#30, t_hour#31, t_minute#32, t_meal_time#33] (31) BroadcastExchange -Input [3]: [t_time_sk#31, t_hour#32, t_minute#33] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#35] +Input [3]: [t_time_sk#30, t_hour#31, t_minute#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (32) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [time_sk#14] -Right keys [1]: [t_time_sk#31] +Left keys [1]: [time_sk#13] +Right keys [1]: [t_time_sk#30] Join condition: None (33) Project [codegen id : 9] -Output [5]: [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33] -Input [7]: [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14, t_time_sk#31, t_hour#32, t_minute#33] +Output [5]: [i_brand_id#2, i_brand#3, ext_price#11, t_hour#31, t_minute#32] +Input [7]: [i_brand_id#2, i_brand#3, ext_price#11, time_sk#13, t_time_sk#30, t_hour#31, t_minute#32] (34) HashAggregate [codegen id : 9] -Input [5]: [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33] -Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] -Functions [1]: [partial_sum(UnscaledValue(ext_price#12))] -Aggregate Attributes [1]: [sum#36] -Results [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] +Input [5]: [i_brand_id#2, i_brand#3, ext_price#11, t_hour#31, t_minute#32] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#31, t_minute#32] +Functions [1]: [partial_sum(UnscaledValue(ext_price#11))] +Aggregate Attributes [1]: [sum#34] +Results [5]: [i_brand#3, i_brand_id#2, t_hour#31, t_minute#32, sum#35] (35) Exchange -Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] -Arguments: hashpartitioning(i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, 5), ENSURE_REQUIREMENTS, [id=#38] +Input [5]: [i_brand#3, i_brand_id#2, t_hour#31, t_minute#32, sum#35] +Arguments: hashpartitioning(i_brand#3, i_brand_id#2, t_hour#31, t_minute#32, 5), ENSURE_REQUIREMENTS, [plan_id=3] (36) HashAggregate [codegen id : 10] -Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] -Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] -Functions [1]: [sum(UnscaledValue(ext_price#12))] -Aggregate Attributes [1]: [sum(UnscaledValue(ext_price#12))#39] -Results [5]: [i_brand_id#2 AS brand_id#40, i_brand#3 AS brand#41, t_hour#32, t_minute#33, MakeDecimal(sum(UnscaledValue(ext_price#12))#39,17,2) AS ext_price#42] +Input [5]: [i_brand#3, i_brand_id#2, t_hour#31, t_minute#32, sum#35] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#31, t_minute#32] +Functions [1]: [sum(UnscaledValue(ext_price#11))] +Aggregate Attributes [1]: [sum(UnscaledValue(ext_price#11))#36] +Results [5]: [i_brand_id#2 AS brand_id#37, i_brand#3 AS brand#38, t_hour#31, t_minute#32, MakeDecimal(sum(UnscaledValue(ext_price#11))#36,17,2) AS ext_price#39] (37) Exchange -Input [5]: [brand_id#40, brand#41, t_hour#32, t_minute#33, ext_price#42] -Arguments: rangepartitioning(ext_price#42 DESC NULLS LAST, brand_id#40 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#43] +Input [5]: [brand_id#37, brand#38, t_hour#31, t_minute#32, ext_price#39] +Arguments: rangepartitioning(ext_price#39 DESC NULLS LAST, brand_id#37 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=4] (38) Sort [codegen id : 11] -Input [5]: [brand_id#40, brand#41, t_hour#32, t_minute#33, ext_price#42] -Arguments: [ext_price#42 DESC NULLS LAST, brand_id#40 ASC NULLS FIRST], true, 0 +Input [5]: [brand_id#37, brand#38, t_hour#31, t_minute#32, ext_price#39] +Arguments: [ext_price#39 DESC NULLS LAST, brand_id#37 ASC NULLS FIRST], true, 0 ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ws_sold_date_sk#9 IN dynamicpruning#10 +Subquery:1 Hosting operator id = 6 Hosting Expression = ws_sold_date_sk#8 IN dynamicpruning#9 BroadcastExchange (43) +- * Project (42) +- * Filter (41) @@ -221,29 +221,29 @@ BroadcastExchange (43) (39) Scan parquet default.date_dim -Output [3]: [d_date_sk#11, d_year#44, d_moy#45] +Output [3]: [d_date_sk#10, d_year#40, d_moy#41] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] ReadSchema: struct (40) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#44, d_moy#45] +Input [3]: [d_date_sk#10, d_year#40, d_moy#41] (41) Filter [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#44, d_moy#45] -Condition : ((((isnotnull(d_moy#45) AND isnotnull(d_year#44)) AND (d_moy#45 = 11)) AND (d_year#44 = 1999)) AND isnotnull(d_date_sk#11)) +Input [3]: [d_date_sk#10, d_year#40, d_moy#41] +Condition : ((((isnotnull(d_moy#41) AND isnotnull(d_year#40)) AND (d_moy#41 = 11)) AND (d_year#40 = 1999)) AND isnotnull(d_date_sk#10)) (42) Project [codegen id : 1] -Output [1]: [d_date_sk#11] -Input [3]: [d_date_sk#11, d_year#44, d_moy#45] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#40, d_moy#41] (43) BroadcastExchange -Input [1]: [d_date_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#46] +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] -Subquery:2 Hosting operator id = 12 Hosting Expression = cs_sold_date_sk#18 IN dynamicpruning#10 +Subquery:2 Hosting operator id = 12 Hosting Expression = cs_sold_date_sk#17 IN dynamicpruning#9 -Subquery:3 Hosting operator id = 18 Hosting Expression = ss_sold_date_sk#26 IN dynamicpruning#10 +Subquery:3 Hosting operator id = 18 Hosting Expression = ss_sold_date_sk#25 IN dynamicpruning#9 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt index 2977c4d96c94a..1eb2e39dc5bb9 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt @@ -59,160 +59,160 @@ Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4] (5) BroadcastExchange Input [3]: [i_item_sk#1, i_brand_id#2, i_brand#3] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (6) Scan parquet default.web_sales -Output [4]: [ws_sold_time_sk#6, ws_item_sk#7, ws_ext_sales_price#8, ws_sold_date_sk#9] +Output [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#9), dynamicpruningexpression(ws_sold_date_sk#9 IN dynamicpruning#10)] +PartitionFilters: [isnotnull(ws_sold_date_sk#8), dynamicpruningexpression(ws_sold_date_sk#8 IN dynamicpruning#9)] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_sold_time_sk)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [4]: [ws_sold_time_sk#6, ws_item_sk#7, ws_ext_sales_price#8, ws_sold_date_sk#9] +Input [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] (8) Filter [codegen id : 3] -Input [4]: [ws_sold_time_sk#6, ws_item_sk#7, ws_ext_sales_price#8, ws_sold_date_sk#9] -Condition : (isnotnull(ws_item_sk#7) AND isnotnull(ws_sold_time_sk#6)) +Input [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8] +Condition : (isnotnull(ws_item_sk#6) AND isnotnull(ws_sold_time_sk#5)) (9) ReusedExchange [Reuses operator id: 43] -Output [1]: [d_date_sk#11] +Output [1]: [d_date_sk#10] (10) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ws_sold_date_sk#9] -Right keys [1]: [d_date_sk#11] +Left keys [1]: [ws_sold_date_sk#8] +Right keys [1]: [d_date_sk#10] Join condition: None (11) Project [codegen id : 3] -Output [3]: [ws_ext_sales_price#8 AS ext_price#12, ws_item_sk#7 AS sold_item_sk#13, ws_sold_time_sk#6 AS time_sk#14] -Input [5]: [ws_sold_time_sk#6, ws_item_sk#7, ws_ext_sales_price#8, ws_sold_date_sk#9, d_date_sk#11] +Output [3]: [ws_ext_sales_price#7 AS ext_price#11, ws_item_sk#6 AS sold_item_sk#12, ws_sold_time_sk#5 AS time_sk#13] +Input [5]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8, d_date_sk#10] (12) Scan parquet default.catalog_sales -Output [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Output [4]: [cs_sold_time_sk#14, cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#18), dynamicpruningexpression(cs_sold_date_sk#18 IN dynamicpruning#10)] +PartitionFilters: [isnotnull(cs_sold_date_sk#17), dynamicpruningexpression(cs_sold_date_sk#17 IN dynamicpruning#9)] PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_sold_time_sk)] ReadSchema: struct (13) ColumnarToRow [codegen id : 5] -Input [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] +Input [4]: [cs_sold_time_sk#14, cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17] (14) Filter [codegen id : 5] -Input [4]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18] -Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_sold_time_sk#15)) +Input [4]: [cs_sold_time_sk#14, cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17] +Condition : (isnotnull(cs_item_sk#15) AND isnotnull(cs_sold_time_sk#14)) (15) ReusedExchange [Reuses operator id: 43] -Output [1]: [d_date_sk#19] +Output [1]: [d_date_sk#18] (16) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_sold_date_sk#18] -Right keys [1]: [d_date_sk#19] +Left keys [1]: [cs_sold_date_sk#17] +Right keys [1]: [d_date_sk#18] Join condition: None (17) Project [codegen id : 5] -Output [3]: [cs_ext_sales_price#17 AS ext_price#20, cs_item_sk#16 AS sold_item_sk#21, cs_sold_time_sk#15 AS time_sk#22] -Input [5]: [cs_sold_time_sk#15, cs_item_sk#16, cs_ext_sales_price#17, cs_sold_date_sk#18, d_date_sk#19] +Output [3]: [cs_ext_sales_price#16 AS ext_price#19, cs_item_sk#15 AS sold_item_sk#20, cs_sold_time_sk#14 AS time_sk#21] +Input [5]: [cs_sold_time_sk#14, cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17, d_date_sk#18] (18) Scan parquet default.store_sales -Output [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Output [4]: [ss_sold_time_sk#22, ss_item_sk#23, ss_ext_sales_price#24, ss_sold_date_sk#25] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#26), dynamicpruningexpression(ss_sold_date_sk#26 IN dynamicpruning#10)] +PartitionFilters: [isnotnull(ss_sold_date_sk#25), dynamicpruningexpression(ss_sold_date_sk#25 IN dynamicpruning#9)] PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_sold_time_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 7] -Input [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Input [4]: [ss_sold_time_sk#22, ss_item_sk#23, ss_ext_sales_price#24, ss_sold_date_sk#25] (20) Filter [codegen id : 7] -Input [4]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26] -Condition : (isnotnull(ss_item_sk#24) AND isnotnull(ss_sold_time_sk#23)) +Input [4]: [ss_sold_time_sk#22, ss_item_sk#23, ss_ext_sales_price#24, ss_sold_date_sk#25] +Condition : (isnotnull(ss_item_sk#23) AND isnotnull(ss_sold_time_sk#22)) (21) ReusedExchange [Reuses operator id: 43] -Output [1]: [d_date_sk#27] +Output [1]: [d_date_sk#26] (22) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [ss_sold_date_sk#26] -Right keys [1]: [d_date_sk#27] +Left keys [1]: [ss_sold_date_sk#25] +Right keys [1]: [d_date_sk#26] Join condition: None (23) Project [codegen id : 7] -Output [3]: [ss_ext_sales_price#25 AS ext_price#28, ss_item_sk#24 AS sold_item_sk#29, ss_sold_time_sk#23 AS time_sk#30] -Input [5]: [ss_sold_time_sk#23, ss_item_sk#24, ss_ext_sales_price#25, ss_sold_date_sk#26, d_date_sk#27] +Output [3]: [ss_ext_sales_price#24 AS ext_price#27, ss_item_sk#23 AS sold_item_sk#28, ss_sold_time_sk#22 AS time_sk#29] +Input [5]: [ss_sold_time_sk#22, ss_item_sk#23, ss_ext_sales_price#24, ss_sold_date_sk#25, d_date_sk#26] (24) Union (25) BroadcastHashJoin [codegen id : 9] Left keys [1]: [i_item_sk#1] -Right keys [1]: [sold_item_sk#13] +Right keys [1]: [sold_item_sk#12] Join condition: None (26) Project [codegen id : 9] -Output [4]: [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14] -Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, ext_price#12, sold_item_sk#13, time_sk#14] +Output [4]: [i_brand_id#2, i_brand#3, ext_price#11, time_sk#13] +Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, ext_price#11, sold_item_sk#12, time_sk#13] (27) Scan parquet default.time_dim -Output [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Output [4]: [t_time_sk#30, t_hour#31, t_minute#32, t_meal_time#33] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [Or(EqualTo(t_meal_time,breakfast ),EqualTo(t_meal_time,dinner )), IsNotNull(t_time_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 8] -Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Input [4]: [t_time_sk#30, t_hour#31, t_minute#32, t_meal_time#33] (29) Filter [codegen id : 8] -Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] -Condition : (((t_meal_time#34 = breakfast ) OR (t_meal_time#34 = dinner )) AND isnotnull(t_time_sk#31)) +Input [4]: [t_time_sk#30, t_hour#31, t_minute#32, t_meal_time#33] +Condition : (((t_meal_time#33 = breakfast ) OR (t_meal_time#33 = dinner )) AND isnotnull(t_time_sk#30)) (30) Project [codegen id : 8] -Output [3]: [t_time_sk#31, t_hour#32, t_minute#33] -Input [4]: [t_time_sk#31, t_hour#32, t_minute#33, t_meal_time#34] +Output [3]: [t_time_sk#30, t_hour#31, t_minute#32] +Input [4]: [t_time_sk#30, t_hour#31, t_minute#32, t_meal_time#33] (31) BroadcastExchange -Input [3]: [t_time_sk#31, t_hour#32, t_minute#33] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#35] +Input [3]: [t_time_sk#30, t_hour#31, t_minute#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (32) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [time_sk#14] -Right keys [1]: [t_time_sk#31] +Left keys [1]: [time_sk#13] +Right keys [1]: [t_time_sk#30] Join condition: None (33) Project [codegen id : 9] -Output [5]: [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33] -Input [7]: [i_brand_id#2, i_brand#3, ext_price#12, time_sk#14, t_time_sk#31, t_hour#32, t_minute#33] +Output [5]: [i_brand_id#2, i_brand#3, ext_price#11, t_hour#31, t_minute#32] +Input [7]: [i_brand_id#2, i_brand#3, ext_price#11, time_sk#13, t_time_sk#30, t_hour#31, t_minute#32] (34) HashAggregate [codegen id : 9] -Input [5]: [i_brand_id#2, i_brand#3, ext_price#12, t_hour#32, t_minute#33] -Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] -Functions [1]: [partial_sum(UnscaledValue(ext_price#12))] -Aggregate Attributes [1]: [sum#36] -Results [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] +Input [5]: [i_brand_id#2, i_brand#3, ext_price#11, t_hour#31, t_minute#32] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#31, t_minute#32] +Functions [1]: [partial_sum(UnscaledValue(ext_price#11))] +Aggregate Attributes [1]: [sum#34] +Results [5]: [i_brand#3, i_brand_id#2, t_hour#31, t_minute#32, sum#35] (35) Exchange -Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] -Arguments: hashpartitioning(i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, 5), ENSURE_REQUIREMENTS, [id=#38] +Input [5]: [i_brand#3, i_brand_id#2, t_hour#31, t_minute#32, sum#35] +Arguments: hashpartitioning(i_brand#3, i_brand_id#2, t_hour#31, t_minute#32, 5), ENSURE_REQUIREMENTS, [plan_id=3] (36) HashAggregate [codegen id : 10] -Input [5]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33, sum#37] -Keys [4]: [i_brand#3, i_brand_id#2, t_hour#32, t_minute#33] -Functions [1]: [sum(UnscaledValue(ext_price#12))] -Aggregate Attributes [1]: [sum(UnscaledValue(ext_price#12))#39] -Results [5]: [i_brand_id#2 AS brand_id#40, i_brand#3 AS brand#41, t_hour#32, t_minute#33, MakeDecimal(sum(UnscaledValue(ext_price#12))#39,17,2) AS ext_price#42] +Input [5]: [i_brand#3, i_brand_id#2, t_hour#31, t_minute#32, sum#35] +Keys [4]: [i_brand#3, i_brand_id#2, t_hour#31, t_minute#32] +Functions [1]: [sum(UnscaledValue(ext_price#11))] +Aggregate Attributes [1]: [sum(UnscaledValue(ext_price#11))#36] +Results [5]: [i_brand_id#2 AS brand_id#37, i_brand#3 AS brand#38, t_hour#31, t_minute#32, MakeDecimal(sum(UnscaledValue(ext_price#11))#36,17,2) AS ext_price#39] (37) Exchange -Input [5]: [brand_id#40, brand#41, t_hour#32, t_minute#33, ext_price#42] -Arguments: rangepartitioning(ext_price#42 DESC NULLS LAST, brand_id#40 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#43] +Input [5]: [brand_id#37, brand#38, t_hour#31, t_minute#32, ext_price#39] +Arguments: rangepartitioning(ext_price#39 DESC NULLS LAST, brand_id#37 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=4] (38) Sort [codegen id : 11] -Input [5]: [brand_id#40, brand#41, t_hour#32, t_minute#33, ext_price#42] -Arguments: [ext_price#42 DESC NULLS LAST, brand_id#40 ASC NULLS FIRST], true, 0 +Input [5]: [brand_id#37, brand#38, t_hour#31, t_minute#32, ext_price#39] +Arguments: [ext_price#39 DESC NULLS LAST, brand_id#37 ASC NULLS FIRST], true, 0 ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ws_sold_date_sk#9 IN dynamicpruning#10 +Subquery:1 Hosting operator id = 6 Hosting Expression = ws_sold_date_sk#8 IN dynamicpruning#9 BroadcastExchange (43) +- * Project (42) +- * Filter (41) @@ -221,29 +221,29 @@ BroadcastExchange (43) (39) Scan parquet default.date_dim -Output [3]: [d_date_sk#11, d_year#44, d_moy#45] +Output [3]: [d_date_sk#10, d_year#40, d_moy#41] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_moy), IsNotNull(d_year), EqualTo(d_moy,11), EqualTo(d_year,1999), IsNotNull(d_date_sk)] ReadSchema: struct (40) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#44, d_moy#45] +Input [3]: [d_date_sk#10, d_year#40, d_moy#41] (41) Filter [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#44, d_moy#45] -Condition : ((((isnotnull(d_moy#45) AND isnotnull(d_year#44)) AND (d_moy#45 = 11)) AND (d_year#44 = 1999)) AND isnotnull(d_date_sk#11)) +Input [3]: [d_date_sk#10, d_year#40, d_moy#41] +Condition : ((((isnotnull(d_moy#41) AND isnotnull(d_year#40)) AND (d_moy#41 = 11)) AND (d_year#40 = 1999)) AND isnotnull(d_date_sk#10)) (42) Project [codegen id : 1] -Output [1]: [d_date_sk#11] -Input [3]: [d_date_sk#11, d_year#44, d_moy#45] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#40, d_moy#41] (43) BroadcastExchange -Input [1]: [d_date_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#46] +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] -Subquery:2 Hosting operator id = 12 Hosting Expression = cs_sold_date_sk#18 IN dynamicpruning#10 +Subquery:2 Hosting operator id = 12 Hosting Expression = cs_sold_date_sk#17 IN dynamicpruning#9 -Subquery:3 Hosting operator id = 18 Hosting Expression = ss_sold_date_sk#26 IN dynamicpruning#10 +Subquery:3 Hosting operator id = 18 Hosting Expression = ss_sold_date_sk#25 IN dynamicpruning#9 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt index c6971f3ea904b..036c6ae795946 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt @@ -106,7 +106,7 @@ Input [2]: [hd_demo_sk#10, hd_buy_potential#11] (8) BroadcastExchange Input [1]: [hd_demo_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_bill_hdemo_sk#3] @@ -118,272 +118,272 @@ Output [7]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#4, cs_promo_sk#5, Input [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, hd_demo_sk#10] (11) Scan parquet default.customer_demographics -Output [2]: [cd_demo_sk#13, cd_marital_status#14] +Output [2]: [cd_demo_sk#12, cd_marital_status#13] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,D), IsNotNull(cd_demo_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [2]: [cd_demo_sk#13, cd_marital_status#14] +Input [2]: [cd_demo_sk#12, cd_marital_status#13] (13) Filter [codegen id : 2] -Input [2]: [cd_demo_sk#13, cd_marital_status#14] -Condition : ((isnotnull(cd_marital_status#14) AND (cd_marital_status#14 = D)) AND isnotnull(cd_demo_sk#13)) +Input [2]: [cd_demo_sk#12, cd_marital_status#13] +Condition : ((isnotnull(cd_marital_status#13) AND (cd_marital_status#13 = D)) AND isnotnull(cd_demo_sk#12)) (14) Project [codegen id : 2] -Output [1]: [cd_demo_sk#13] -Input [2]: [cd_demo_sk#13, cd_marital_status#14] +Output [1]: [cd_demo_sk#12] +Input [2]: [cd_demo_sk#12, cd_marital_status#13] (15) BroadcastExchange -Input [1]: [cd_demo_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Input [1]: [cd_demo_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_bill_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#13] +Right keys [1]: [cd_demo_sk#12] Join condition: None (17) Project [codegen id : 4] Output [6]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] -Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, cd_demo_sk#13] +Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, cd_demo_sk#12] (18) Scan parquet default.date_dim -Output [2]: [d_date_sk#16, d_date#17] +Output [2]: [d_date_sk#14, d_date#15] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#16, d_date#17] +Input [2]: [d_date_sk#14, d_date#15] (20) Filter [codegen id : 3] -Input [2]: [d_date_sk#16, d_date#17] -Condition : (isnotnull(d_date#17) AND isnotnull(d_date_sk#16)) +Input [2]: [d_date_sk#14, d_date#15] +Condition : (isnotnull(d_date#15) AND isnotnull(d_date_sk#14)) (21) BroadcastExchange -Input [2]: [d_date_sk#16, d_date#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] +Input [2]: [d_date_sk#14, d_date#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (22) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_ship_date_sk#1] -Right keys [1]: [d_date_sk#16] +Right keys [1]: [d_date_sk#14] Join condition: None (23) Project [codegen id : 4] -Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17] -Input [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date_sk#16, d_date#17] +Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#15] +Input [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date_sk#14, d_date#15] (24) Exchange -Input [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17] -Arguments: hashpartitioning(cs_item_sk#4, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#15] +Arguments: hashpartitioning(cs_item_sk#4, 5), ENSURE_REQUIREMENTS, [plan_id=4] (25) Sort [codegen id : 5] -Input [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17] +Input [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#15] Arguments: [cs_item_sk#4 ASC NULLS FIRST], false, 0 (26) Scan parquet default.item -Output [2]: [i_item_sk#20, i_item_desc#21] +Output [2]: [i_item_sk#16, i_item_desc#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 6] -Input [2]: [i_item_sk#20, i_item_desc#21] +Input [2]: [i_item_sk#16, i_item_desc#17] (28) Filter [codegen id : 6] -Input [2]: [i_item_sk#20, i_item_desc#21] -Condition : isnotnull(i_item_sk#20) +Input [2]: [i_item_sk#16, i_item_desc#17] +Condition : isnotnull(i_item_sk#16) (29) Exchange -Input [2]: [i_item_sk#20, i_item_desc#21] -Arguments: hashpartitioning(i_item_sk#20, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [2]: [i_item_sk#16, i_item_desc#17] +Arguments: hashpartitioning(i_item_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=5] (30) Sort [codegen id : 7] -Input [2]: [i_item_sk#20, i_item_desc#21] -Arguments: [i_item_sk#20 ASC NULLS FIRST], false, 0 +Input [2]: [i_item_sk#16, i_item_desc#17] +Arguments: [i_item_sk#16 ASC NULLS FIRST], false, 0 (31) SortMergeJoin [codegen id : 10] Left keys [1]: [cs_item_sk#4] -Right keys [1]: [i_item_sk#20] +Right keys [1]: [i_item_sk#16] Join condition: None (32) Project [codegen id : 10] -Output [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17, i_item_desc#21] -Input [8]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17, i_item_sk#20, i_item_desc#21] +Output [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#15, i_item_desc#17] +Input [8]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#15, i_item_sk#16, i_item_desc#17] (33) ReusedExchange [Reuses operator id: 81] -Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26] +Output [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_date_sk#21] (34) BroadcastHashJoin [codegen id : 10] Left keys [1]: [cs_sold_date_sk#8] -Right keys [1]: [d_date_sk#23] -Join condition: (d_date#17 > date_add(d_date#24, 5)) +Right keys [1]: [d_date_sk#18] +Join condition: (d_date#15 > date_add(d_date#19, 5)) (35) Project [codegen id : 10] -Output [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26] -Input [11]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17, i_item_desc#21, d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26] +Output [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#17, d_week_seq#20, d_date_sk#21] +Input [11]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#15, i_item_desc#17, d_date_sk#18, d_date#19, d_week_seq#20, d_date_sk#21] (36) Exchange -Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26] -Arguments: hashpartitioning(cs_item_sk#4, d_date_sk#26, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#17, d_week_seq#20, d_date_sk#21] +Arguments: hashpartitioning(cs_item_sk#4, d_date_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=6] (37) Sort [codegen id : 11] -Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26] -Arguments: [cs_item_sk#4 ASC NULLS FIRST, d_date_sk#26 ASC NULLS FIRST], false, 0 +Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#17, d_week_seq#20, d_date_sk#21] +Arguments: [cs_item_sk#4 ASC NULLS FIRST, d_date_sk#21 ASC NULLS FIRST], false, 0 (38) Scan parquet default.inventory -Output [4]: [inv_item_sk#28, inv_warehouse_sk#29, inv_quantity_on_hand#30, inv_date_sk#31] +Output [4]: [inv_item_sk#22, inv_warehouse_sk#23, inv_quantity_on_hand#24, inv_date_sk#25] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(inv_date_sk#31), dynamicpruningexpression(true)] +PartitionFilters: [isnotnull(inv_date_sk#25), dynamicpruningexpression(true)] PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] ReadSchema: struct (39) ColumnarToRow [codegen id : 13] -Input [4]: [inv_item_sk#28, inv_warehouse_sk#29, inv_quantity_on_hand#30, inv_date_sk#31] +Input [4]: [inv_item_sk#22, inv_warehouse_sk#23, inv_quantity_on_hand#24, inv_date_sk#25] (40) Filter [codegen id : 13] -Input [4]: [inv_item_sk#28, inv_warehouse_sk#29, inv_quantity_on_hand#30, inv_date_sk#31] -Condition : ((isnotnull(inv_quantity_on_hand#30) AND isnotnull(inv_item_sk#28)) AND isnotnull(inv_warehouse_sk#29)) +Input [4]: [inv_item_sk#22, inv_warehouse_sk#23, inv_quantity_on_hand#24, inv_date_sk#25] +Condition : ((isnotnull(inv_quantity_on_hand#24) AND isnotnull(inv_item_sk#22)) AND isnotnull(inv_warehouse_sk#23)) (41) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#32, w_warehouse_name#33] +Output [2]: [w_warehouse_sk#26, w_warehouse_name#27] Batched: true Location [not included in comparison]/{warehouse_dir}/warehouse] PushedFilters: [IsNotNull(w_warehouse_sk)] ReadSchema: struct (42) ColumnarToRow [codegen id : 12] -Input [2]: [w_warehouse_sk#32, w_warehouse_name#33] +Input [2]: [w_warehouse_sk#26, w_warehouse_name#27] (43) Filter [codegen id : 12] -Input [2]: [w_warehouse_sk#32, w_warehouse_name#33] -Condition : isnotnull(w_warehouse_sk#32) +Input [2]: [w_warehouse_sk#26, w_warehouse_name#27] +Condition : isnotnull(w_warehouse_sk#26) (44) BroadcastExchange -Input [2]: [w_warehouse_sk#32, w_warehouse_name#33] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#34] +Input [2]: [w_warehouse_sk#26, w_warehouse_name#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] (45) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [inv_warehouse_sk#29] -Right keys [1]: [w_warehouse_sk#32] +Left keys [1]: [inv_warehouse_sk#23] +Right keys [1]: [w_warehouse_sk#26] Join condition: None (46) Project [codegen id : 13] -Output [4]: [inv_item_sk#28, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_name#33] -Input [6]: [inv_item_sk#28, inv_warehouse_sk#29, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_sk#32, w_warehouse_name#33] +Output [4]: [inv_item_sk#22, inv_quantity_on_hand#24, inv_date_sk#25, w_warehouse_name#27] +Input [6]: [inv_item_sk#22, inv_warehouse_sk#23, inv_quantity_on_hand#24, inv_date_sk#25, w_warehouse_sk#26, w_warehouse_name#27] (47) Exchange -Input [4]: [inv_item_sk#28, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_name#33] -Arguments: hashpartitioning(inv_item_sk#28, inv_date_sk#31, 5), ENSURE_REQUIREMENTS, [id=#35] +Input [4]: [inv_item_sk#22, inv_quantity_on_hand#24, inv_date_sk#25, w_warehouse_name#27] +Arguments: hashpartitioning(inv_item_sk#22, inv_date_sk#25, 5), ENSURE_REQUIREMENTS, [plan_id=8] (48) Sort [codegen id : 14] -Input [4]: [inv_item_sk#28, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_name#33] -Arguments: [inv_item_sk#28 ASC NULLS FIRST, inv_date_sk#31 ASC NULLS FIRST], false, 0 +Input [4]: [inv_item_sk#22, inv_quantity_on_hand#24, inv_date_sk#25, w_warehouse_name#27] +Arguments: [inv_item_sk#22 ASC NULLS FIRST, inv_date_sk#25 ASC NULLS FIRST], false, 0 (49) SortMergeJoin [codegen id : 16] -Left keys [2]: [cs_item_sk#4, d_date_sk#26] -Right keys [2]: [inv_item_sk#28, inv_date_sk#31] -Join condition: (inv_quantity_on_hand#30 < cs_quantity#7) +Left keys [2]: [cs_item_sk#4, d_date_sk#21] +Right keys [2]: [inv_item_sk#22, inv_date_sk#25] +Join condition: (inv_quantity_on_hand#24 < cs_quantity#7) (50) Project [codegen id : 16] -Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25] -Input [11]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26, inv_item_sk#28, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_name#33] +Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#27, i_item_desc#17, d_week_seq#20] +Input [11]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#17, d_week_seq#20, d_date_sk#21, inv_item_sk#22, inv_quantity_on_hand#24, inv_date_sk#25, w_warehouse_name#27] (51) Scan parquet default.promotion -Output [1]: [p_promo_sk#36] +Output [1]: [p_promo_sk#28] Batched: true Location [not included in comparison]/{warehouse_dir}/promotion] PushedFilters: [IsNotNull(p_promo_sk)] ReadSchema: struct (52) ColumnarToRow [codegen id : 15] -Input [1]: [p_promo_sk#36] +Input [1]: [p_promo_sk#28] (53) Filter [codegen id : 15] -Input [1]: [p_promo_sk#36] -Condition : isnotnull(p_promo_sk#36) +Input [1]: [p_promo_sk#28] +Condition : isnotnull(p_promo_sk#28) (54) BroadcastExchange -Input [1]: [p_promo_sk#36] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#37] +Input [1]: [p_promo_sk#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] (55) BroadcastHashJoin [codegen id : 16] Left keys [1]: [cs_promo_sk#5] -Right keys [1]: [p_promo_sk#36] +Right keys [1]: [p_promo_sk#28] Join condition: None (56) Project [codegen id : 16] -Output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25] -Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25, p_promo_sk#36] +Output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#27, i_item_desc#17, d_week_seq#20] +Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#27, i_item_desc#17, d_week_seq#20, p_promo_sk#28] (57) Exchange -Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25] -Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, [id=#38] +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#27, i_item_desc#17, d_week_seq#20] +Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, [plan_id=10] (58) Sort [codegen id : 17] -Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25] +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#27, i_item_desc#17, d_week_seq#20] Arguments: [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST], false, 0 (59) Scan parquet default.catalog_returns -Output [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41] +Output [3]: [cr_item_sk#29, cr_order_number#30, cr_returned_date_sk#31] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] ReadSchema: struct (60) ColumnarToRow [codegen id : 18] -Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41] +Input [3]: [cr_item_sk#29, cr_order_number#30, cr_returned_date_sk#31] (61) Filter [codegen id : 18] -Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41] -Condition : (isnotnull(cr_item_sk#39) AND isnotnull(cr_order_number#40)) +Input [3]: [cr_item_sk#29, cr_order_number#30, cr_returned_date_sk#31] +Condition : (isnotnull(cr_item_sk#29) AND isnotnull(cr_order_number#30)) (62) Project [codegen id : 18] -Output [2]: [cr_item_sk#39, cr_order_number#40] -Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41] +Output [2]: [cr_item_sk#29, cr_order_number#30] +Input [3]: [cr_item_sk#29, cr_order_number#30, cr_returned_date_sk#31] (63) Exchange -Input [2]: [cr_item_sk#39, cr_order_number#40] -Arguments: hashpartitioning(cr_item_sk#39, cr_order_number#40, 5), ENSURE_REQUIREMENTS, [id=#42] +Input [2]: [cr_item_sk#29, cr_order_number#30] +Arguments: hashpartitioning(cr_item_sk#29, cr_order_number#30, 5), ENSURE_REQUIREMENTS, [plan_id=11] (64) Sort [codegen id : 19] -Input [2]: [cr_item_sk#39, cr_order_number#40] -Arguments: [cr_item_sk#39 ASC NULLS FIRST, cr_order_number#40 ASC NULLS FIRST], false, 0 +Input [2]: [cr_item_sk#29, cr_order_number#30] +Arguments: [cr_item_sk#29 ASC NULLS FIRST, cr_order_number#30 ASC NULLS FIRST], false, 0 (65) SortMergeJoin [codegen id : 20] Left keys [2]: [cs_item_sk#4, cs_order_number#6] -Right keys [2]: [cr_item_sk#39, cr_order_number#40] +Right keys [2]: [cr_item_sk#29, cr_order_number#30] Join condition: None (66) Project [codegen id : 20] -Output [3]: [w_warehouse_name#33, i_item_desc#21, d_week_seq#25] -Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25, cr_item_sk#39, cr_order_number#40] +Output [3]: [w_warehouse_name#27, i_item_desc#17, d_week_seq#20] +Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#27, i_item_desc#17, d_week_seq#20, cr_item_sk#29, cr_order_number#30] (67) HashAggregate [codegen id : 20] -Input [3]: [w_warehouse_name#33, i_item_desc#21, d_week_seq#25] -Keys [3]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25] +Input [3]: [w_warehouse_name#27, i_item_desc#17, d_week_seq#20] +Keys [3]: [i_item_desc#17, w_warehouse_name#27, d_week_seq#20] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#43] -Results [4]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, count#44] +Aggregate Attributes [1]: [count#32] +Results [4]: [i_item_desc#17, w_warehouse_name#27, d_week_seq#20, count#33] (68) Exchange -Input [4]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, count#44] -Arguments: hashpartitioning(i_item_desc#21, w_warehouse_name#33, d_week_seq#25, 5), ENSURE_REQUIREMENTS, [id=#45] +Input [4]: [i_item_desc#17, w_warehouse_name#27, d_week_seq#20, count#33] +Arguments: hashpartitioning(i_item_desc#17, w_warehouse_name#27, d_week_seq#20, 5), ENSURE_REQUIREMENTS, [plan_id=12] (69) HashAggregate [codegen id : 21] -Input [4]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, count#44] -Keys [3]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25] +Input [4]: [i_item_desc#17, w_warehouse_name#27, d_week_seq#20, count#33] +Keys [3]: [i_item_desc#17, w_warehouse_name#27, d_week_seq#20] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#46] -Results [6]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, count(1)#46 AS no_promo#47, count(1)#46 AS promo#48, count(1)#46 AS total_cnt#49] +Aggregate Attributes [1]: [count(1)#34] +Results [6]: [i_item_desc#17, w_warehouse_name#27, d_week_seq#20, count(1)#34 AS no_promo#35, count(1)#34 AS promo#36, count(1)#34 AS total_cnt#37] (70) TakeOrderedAndProject -Input [6]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, no_promo#47, promo#48, total_cnt#49] -Arguments: 100, [total_cnt#49 DESC NULLS LAST, i_item_desc#21 ASC NULLS FIRST, w_warehouse_name#33 ASC NULLS FIRST, d_week_seq#25 ASC NULLS FIRST], [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, no_promo#47, promo#48, total_cnt#49] +Input [6]: [i_item_desc#17, w_warehouse_name#27, d_week_seq#20, no_promo#35, promo#36, total_cnt#37] +Arguments: 100, [total_cnt#37 DESC NULLS LAST, i_item_desc#17 ASC NULLS FIRST, w_warehouse_name#27 ASC NULLS FIRST, d_week_seq#20 ASC NULLS FIRST], [i_item_desc#17, w_warehouse_name#27, d_week_seq#20, no_promo#35, promo#36, total_cnt#37] ===== Subqueries ===== @@ -402,52 +402,52 @@ BroadcastExchange (81) (71) Scan parquet default.date_dim -Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#50] +Output [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_year#38] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] ReadSchema: struct (72) ColumnarToRow [codegen id : 1] -Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#50] +Input [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_year#38] (73) Filter [codegen id : 1] -Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#50] -Condition : ((((isnotnull(d_year#50) AND (d_year#50 = 1999)) AND isnotnull(d_date_sk#23)) AND isnotnull(d_week_seq#25)) AND isnotnull(d_date#24)) +Input [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_year#38] +Condition : ((((isnotnull(d_year#38) AND (d_year#38 = 1999)) AND isnotnull(d_date_sk#18)) AND isnotnull(d_week_seq#20)) AND isnotnull(d_date#19)) (74) Project [codegen id : 1] -Output [3]: [d_date_sk#23, d_date#24, d_week_seq#25] -Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#50] +Output [3]: [d_date_sk#18, d_date#19, d_week_seq#20] +Input [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_year#38] (75) BroadcastExchange -Input [3]: [d_date_sk#23, d_date#24, d_week_seq#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#51] +Input [3]: [d_date_sk#18, d_date#19, d_week_seq#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=13] (76) Scan parquet default.date_dim -Output [2]: [d_date_sk#26, d_week_seq#52] +Output [2]: [d_date_sk#21, d_week_seq#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] ReadSchema: struct (77) ColumnarToRow -Input [2]: [d_date_sk#26, d_week_seq#52] +Input [2]: [d_date_sk#21, d_week_seq#39] (78) Filter -Input [2]: [d_date_sk#26, d_week_seq#52] -Condition : (isnotnull(d_week_seq#52) AND isnotnull(d_date_sk#26)) +Input [2]: [d_date_sk#21, d_week_seq#39] +Condition : (isnotnull(d_week_seq#39) AND isnotnull(d_date_sk#21)) (79) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [d_week_seq#25] -Right keys [1]: [d_week_seq#52] +Left keys [1]: [d_week_seq#20] +Right keys [1]: [d_week_seq#39] Join condition: None (80) Project [codegen id : 2] -Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26] -Input [5]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26, d_week_seq#52] +Output [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_date_sk#21] +Input [5]: [d_date_sk#18, d_date#19, d_week_seq#20, d_date_sk#21, d_week_seq#39] (81) BroadcastExchange -Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53] +Input [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt index e8671f012f8dc..caf6cf2eb75af 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt @@ -103,7 +103,7 @@ Condition : ((isnotnull(inv_quantity_on_hand#12) AND isnotnull(inv_item_sk#10)) (7) BroadcastExchange Input [4]: [inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12, inv_date_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 10] Left keys [1]: [cs_item_sk#4] @@ -115,278 +115,278 @@ Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_ Input [12]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12, inv_date_sk#13] (10) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#15, w_warehouse_name#16] +Output [2]: [w_warehouse_sk#14, w_warehouse_name#15] Batched: true Location [not included in comparison]/{warehouse_dir}/warehouse] PushedFilters: [IsNotNull(w_warehouse_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] +Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] (12) Filter [codegen id : 2] -Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] -Condition : isnotnull(w_warehouse_sk#15) +Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] +Condition : isnotnull(w_warehouse_sk#14) (13) BroadcastExchange -Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] +Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 10] Left keys [1]: [inv_warehouse_sk#11] -Right keys [1]: [w_warehouse_sk#15] +Right keys [1]: [w_warehouse_sk#14] Join condition: None (15) Project [codegen id : 10] -Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#16] -Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#11, inv_date_sk#13, w_warehouse_sk#15, w_warehouse_name#16] +Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#11, inv_date_sk#13, w_warehouse_sk#14, w_warehouse_name#15] (16) Scan parquet default.item -Output [2]: [i_item_sk#18, i_item_desc#19] +Output [2]: [i_item_sk#16, i_item_desc#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 3] -Input [2]: [i_item_sk#18, i_item_desc#19] +Input [2]: [i_item_sk#16, i_item_desc#17] (18) Filter [codegen id : 3] -Input [2]: [i_item_sk#18, i_item_desc#19] -Condition : isnotnull(i_item_sk#18) +Input [2]: [i_item_sk#16, i_item_desc#17] +Condition : isnotnull(i_item_sk#16) (19) BroadcastExchange -Input [2]: [i_item_sk#18, i_item_desc#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] +Input [2]: [i_item_sk#16, i_item_desc#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (20) BroadcastHashJoin [codegen id : 10] Left keys [1]: [cs_item_sk#4] -Right keys [1]: [i_item_sk#18] +Right keys [1]: [i_item_sk#16] Join condition: None (21) Project [codegen id : 10] -Output [10]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#16, i_item_desc#19] -Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#16, i_item_sk#18, i_item_desc#19] +Output [10]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_sk#16, i_item_desc#17] (22) Scan parquet default.customer_demographics -Output [2]: [cd_demo_sk#21, cd_marital_status#22] +Output [2]: [cd_demo_sk#18, cd_marital_status#19] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,D), IsNotNull(cd_demo_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 4] -Input [2]: [cd_demo_sk#21, cd_marital_status#22] +Input [2]: [cd_demo_sk#18, cd_marital_status#19] (24) Filter [codegen id : 4] -Input [2]: [cd_demo_sk#21, cd_marital_status#22] -Condition : ((isnotnull(cd_marital_status#22) AND (cd_marital_status#22 = D)) AND isnotnull(cd_demo_sk#21)) +Input [2]: [cd_demo_sk#18, cd_marital_status#19] +Condition : ((isnotnull(cd_marital_status#19) AND (cd_marital_status#19 = D)) AND isnotnull(cd_demo_sk#18)) (25) Project [codegen id : 4] -Output [1]: [cd_demo_sk#21] -Input [2]: [cd_demo_sk#21, cd_marital_status#22] +Output [1]: [cd_demo_sk#18] +Input [2]: [cd_demo_sk#18, cd_marital_status#19] (26) BroadcastExchange -Input [1]: [cd_demo_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] +Input [1]: [cd_demo_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (27) BroadcastHashJoin [codegen id : 10] Left keys [1]: [cs_bill_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#21] +Right keys [1]: [cd_demo_sk#18] Join condition: None (28) Project [codegen id : 10] -Output [9]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#16, i_item_desc#19] -Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#16, i_item_desc#19, cd_demo_sk#21] +Output [9]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, cd_demo_sk#18] (29) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#24, hd_buy_potential#25] +Output [2]: [hd_demo_sk#20, hd_buy_potential#21] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,>10000 ), IsNotNull(hd_demo_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 5] -Input [2]: [hd_demo_sk#24, hd_buy_potential#25] +Input [2]: [hd_demo_sk#20, hd_buy_potential#21] (31) Filter [codegen id : 5] -Input [2]: [hd_demo_sk#24, hd_buy_potential#25] -Condition : ((isnotnull(hd_buy_potential#25) AND (hd_buy_potential#25 = >10000 )) AND isnotnull(hd_demo_sk#24)) +Input [2]: [hd_demo_sk#20, hd_buy_potential#21] +Condition : ((isnotnull(hd_buy_potential#21) AND (hd_buy_potential#21 = >10000 )) AND isnotnull(hd_demo_sk#20)) (32) Project [codegen id : 5] -Output [1]: [hd_demo_sk#24] -Input [2]: [hd_demo_sk#24, hd_buy_potential#25] +Output [1]: [hd_demo_sk#20] +Input [2]: [hd_demo_sk#20, hd_buy_potential#21] (33) BroadcastExchange -Input [1]: [hd_demo_sk#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Input [1]: [hd_demo_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (34) BroadcastHashJoin [codegen id : 10] Left keys [1]: [cs_bill_hdemo_sk#3] -Right keys [1]: [hd_demo_sk#24] +Right keys [1]: [hd_demo_sk#20] Join condition: None (35) Project [codegen id : 10] -Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#16, i_item_desc#19] -Input [10]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#16, i_item_desc#19, hd_demo_sk#24] +Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17] +Input [10]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, hd_demo_sk#20] (36) ReusedExchange [Reuses operator id: 75] -Output [3]: [d_date_sk#27, d_date#28, d_week_seq#29] +Output [3]: [d_date_sk#22, d_date#23, d_week_seq#24] (37) BroadcastHashJoin [codegen id : 10] Left keys [1]: [cs_sold_date_sk#8] -Right keys [1]: [d_date_sk#27] +Right keys [1]: [d_date_sk#22] Join condition: None (38) Project [codegen id : 10] -Output [9]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#13, w_warehouse_name#16, i_item_desc#19, d_date#28, d_week_seq#29] -Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#16, i_item_desc#19, d_date_sk#27, d_date#28, d_week_seq#29] +Output [9]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, d_date#23, d_week_seq#24] +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, d_date_sk#22, d_date#23, d_week_seq#24] (39) Scan parquet default.date_dim -Output [2]: [d_date_sk#30, d_week_seq#31] +Output [2]: [d_date_sk#25, d_week_seq#26] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] ReadSchema: struct (40) ColumnarToRow [codegen id : 7] -Input [2]: [d_date_sk#30, d_week_seq#31] +Input [2]: [d_date_sk#25, d_week_seq#26] (41) Filter [codegen id : 7] -Input [2]: [d_date_sk#30, d_week_seq#31] -Condition : (isnotnull(d_week_seq#31) AND isnotnull(d_date_sk#30)) +Input [2]: [d_date_sk#25, d_week_seq#26] +Condition : (isnotnull(d_week_seq#26) AND isnotnull(d_date_sk#25)) (42) BroadcastExchange -Input [2]: [d_date_sk#30, d_week_seq#31] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#32] +Input [2]: [d_date_sk#25, d_week_seq#26] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [plan_id=6] (43) BroadcastHashJoin [codegen id : 10] -Left keys [2]: [d_week_seq#29, inv_date_sk#13] -Right keys [2]: [d_week_seq#31, d_date_sk#30] +Left keys [2]: [d_week_seq#24, inv_date_sk#13] +Right keys [2]: [d_week_seq#26, d_date_sk#25] Join condition: None (44) Project [codegen id : 10] -Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#16, i_item_desc#19, d_date#28, d_week_seq#29] -Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#13, w_warehouse_name#16, i_item_desc#19, d_date#28, d_week_seq#29, d_date_sk#30, d_week_seq#31] +Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_date#23, d_week_seq#24] +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, d_date#23, d_week_seq#24, d_date_sk#25, d_week_seq#26] (45) Scan parquet default.date_dim -Output [2]: [d_date_sk#33, d_date#34] +Output [2]: [d_date_sk#27, d_date#28] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)] ReadSchema: struct (46) ColumnarToRow [codegen id : 8] -Input [2]: [d_date_sk#33, d_date#34] +Input [2]: [d_date_sk#27, d_date#28] (47) Filter [codegen id : 8] -Input [2]: [d_date_sk#33, d_date#34] -Condition : (isnotnull(d_date#34) AND isnotnull(d_date_sk#33)) +Input [2]: [d_date_sk#27, d_date#28] +Condition : (isnotnull(d_date#28) AND isnotnull(d_date_sk#27)) (48) BroadcastExchange -Input [2]: [d_date_sk#33, d_date#34] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#35] +Input [2]: [d_date_sk#27, d_date#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] (49) BroadcastHashJoin [codegen id : 10] Left keys [1]: [cs_ship_date_sk#1] -Right keys [1]: [d_date_sk#33] -Join condition: (d_date#34 > date_add(d_date#28, 5)) +Right keys [1]: [d_date_sk#27] +Join condition: (d_date#28 > date_add(d_date#23, 5)) (50) Project [codegen id : 10] -Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#16, i_item_desc#19, d_week_seq#29] -Input [10]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#16, i_item_desc#19, d_date#28, d_week_seq#29, d_date_sk#33, d_date#34] +Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_week_seq#24] +Input [10]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_date#23, d_week_seq#24, d_date_sk#27, d_date#28] (51) Scan parquet default.promotion -Output [1]: [p_promo_sk#36] +Output [1]: [p_promo_sk#29] Batched: true Location [not included in comparison]/{warehouse_dir}/promotion] PushedFilters: [IsNotNull(p_promo_sk)] ReadSchema: struct (52) ColumnarToRow [codegen id : 9] -Input [1]: [p_promo_sk#36] +Input [1]: [p_promo_sk#29] (53) Filter [codegen id : 9] -Input [1]: [p_promo_sk#36] -Condition : isnotnull(p_promo_sk#36) +Input [1]: [p_promo_sk#29] +Condition : isnotnull(p_promo_sk#29) (54) BroadcastExchange -Input [1]: [p_promo_sk#36] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#37] +Input [1]: [p_promo_sk#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] (55) BroadcastHashJoin [codegen id : 10] Left keys [1]: [cs_promo_sk#5] -Right keys [1]: [p_promo_sk#36] +Right keys [1]: [p_promo_sk#29] Join condition: None (56) Project [codegen id : 10] -Output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#16, i_item_desc#19, d_week_seq#29] -Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#16, i_item_desc#19, d_week_seq#29, p_promo_sk#36] +Output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_week_seq#24] +Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_week_seq#24, p_promo_sk#29] (57) Exchange -Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#16, i_item_desc#19, d_week_seq#29] -Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, [id=#38] +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_week_seq#24] +Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, [plan_id=9] (58) Sort [codegen id : 11] -Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#16, i_item_desc#19, d_week_seq#29] +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_week_seq#24] Arguments: [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST], false, 0 (59) Scan parquet default.catalog_returns -Output [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41] +Output [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] ReadSchema: struct (60) ColumnarToRow [codegen id : 12] -Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41] +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] (61) Filter [codegen id : 12] -Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41] -Condition : (isnotnull(cr_item_sk#39) AND isnotnull(cr_order_number#40)) +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Condition : (isnotnull(cr_item_sk#30) AND isnotnull(cr_order_number#31)) (62) Project [codegen id : 12] -Output [2]: [cr_item_sk#39, cr_order_number#40] -Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41] +Output [2]: [cr_item_sk#30, cr_order_number#31] +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] (63) Exchange -Input [2]: [cr_item_sk#39, cr_order_number#40] -Arguments: hashpartitioning(cr_item_sk#39, cr_order_number#40, 5), ENSURE_REQUIREMENTS, [id=#42] +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: hashpartitioning(cr_item_sk#30, cr_order_number#31, 5), ENSURE_REQUIREMENTS, [plan_id=10] (64) Sort [codegen id : 13] -Input [2]: [cr_item_sk#39, cr_order_number#40] -Arguments: [cr_item_sk#39 ASC NULLS FIRST, cr_order_number#40 ASC NULLS FIRST], false, 0 +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: [cr_item_sk#30 ASC NULLS FIRST, cr_order_number#31 ASC NULLS FIRST], false, 0 (65) SortMergeJoin [codegen id : 14] Left keys [2]: [cs_item_sk#4, cs_order_number#6] -Right keys [2]: [cr_item_sk#39, cr_order_number#40] +Right keys [2]: [cr_item_sk#30, cr_order_number#31] Join condition: None (66) Project [codegen id : 14] -Output [3]: [w_warehouse_name#16, i_item_desc#19, d_week_seq#29] -Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#16, i_item_desc#19, d_week_seq#29, cr_item_sk#39, cr_order_number#40] +Output [3]: [w_warehouse_name#15, i_item_desc#17, d_week_seq#24] +Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_week_seq#24, cr_item_sk#30, cr_order_number#31] (67) HashAggregate [codegen id : 14] -Input [3]: [w_warehouse_name#16, i_item_desc#19, d_week_seq#29] -Keys [3]: [i_item_desc#19, w_warehouse_name#16, d_week_seq#29] +Input [3]: [w_warehouse_name#15, i_item_desc#17, d_week_seq#24] +Keys [3]: [i_item_desc#17, w_warehouse_name#15, d_week_seq#24] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#43] -Results [4]: [i_item_desc#19, w_warehouse_name#16, d_week_seq#29, count#44] +Aggregate Attributes [1]: [count#33] +Results [4]: [i_item_desc#17, w_warehouse_name#15, d_week_seq#24, count#34] (68) Exchange -Input [4]: [i_item_desc#19, w_warehouse_name#16, d_week_seq#29, count#44] -Arguments: hashpartitioning(i_item_desc#19, w_warehouse_name#16, d_week_seq#29, 5), ENSURE_REQUIREMENTS, [id=#45] +Input [4]: [i_item_desc#17, w_warehouse_name#15, d_week_seq#24, count#34] +Arguments: hashpartitioning(i_item_desc#17, w_warehouse_name#15, d_week_seq#24, 5), ENSURE_REQUIREMENTS, [plan_id=11] (69) HashAggregate [codegen id : 15] -Input [4]: [i_item_desc#19, w_warehouse_name#16, d_week_seq#29, count#44] -Keys [3]: [i_item_desc#19, w_warehouse_name#16, d_week_seq#29] +Input [4]: [i_item_desc#17, w_warehouse_name#15, d_week_seq#24, count#34] +Keys [3]: [i_item_desc#17, w_warehouse_name#15, d_week_seq#24] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#46] -Results [6]: [i_item_desc#19, w_warehouse_name#16, d_week_seq#29, count(1)#46 AS no_promo#47, count(1)#46 AS promo#48, count(1)#46 AS total_cnt#49] +Aggregate Attributes [1]: [count(1)#35] +Results [6]: [i_item_desc#17, w_warehouse_name#15, d_week_seq#24, count(1)#35 AS no_promo#36, count(1)#35 AS promo#37, count(1)#35 AS total_cnt#38] (70) TakeOrderedAndProject -Input [6]: [i_item_desc#19, w_warehouse_name#16, d_week_seq#29, no_promo#47, promo#48, total_cnt#49] -Arguments: 100, [total_cnt#49 DESC NULLS LAST, i_item_desc#19 ASC NULLS FIRST, w_warehouse_name#16 ASC NULLS FIRST, d_week_seq#29 ASC NULLS FIRST], [i_item_desc#19, w_warehouse_name#16, d_week_seq#29, no_promo#47, promo#48, total_cnt#49] +Input [6]: [i_item_desc#17, w_warehouse_name#15, d_week_seq#24, no_promo#36, promo#37, total_cnt#38] +Arguments: 100, [total_cnt#38 DESC NULLS LAST, i_item_desc#17 ASC NULLS FIRST, w_warehouse_name#15 ASC NULLS FIRST, d_week_seq#24 ASC NULLS FIRST], [i_item_desc#17, w_warehouse_name#15, d_week_seq#24, no_promo#36, promo#37, total_cnt#38] ===== Subqueries ===== @@ -399,25 +399,25 @@ BroadcastExchange (75) (71) Scan parquet default.date_dim -Output [4]: [d_date_sk#27, d_date#28, d_week_seq#29, d_year#50] +Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] ReadSchema: struct (72) ColumnarToRow [codegen id : 1] -Input [4]: [d_date_sk#27, d_date#28, d_week_seq#29, d_year#50] +Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#39] (73) Filter [codegen id : 1] -Input [4]: [d_date_sk#27, d_date#28, d_week_seq#29, d_year#50] -Condition : ((((isnotnull(d_year#50) AND (d_year#50 = 1999)) AND isnotnull(d_date_sk#27)) AND isnotnull(d_week_seq#29)) AND isnotnull(d_date#28)) +Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#39] +Condition : ((((isnotnull(d_year#39) AND (d_year#39 = 1999)) AND isnotnull(d_date_sk#22)) AND isnotnull(d_week_seq#24)) AND isnotnull(d_date#23)) (74) Project [codegen id : 1] -Output [3]: [d_date_sk#27, d_date#28, d_week_seq#29] -Input [4]: [d_date_sk#27, d_date#28, d_week_seq#29, d_year#50] +Output [3]: [d_date_sk#22, d_date#23, d_week_seq#24] +Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#39] (75) BroadcastExchange -Input [3]: [d_date_sk#27, d_date#28, d_week_seq#29] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#51] +Input [3]: [d_date_sk#22, d_date#23, d_week_seq#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt index 4ef31a6a29263..e538e2ce20a8c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt @@ -83,7 +83,7 @@ Input [2]: [s_store_sk#8, s_county#9] (11) BroadcastExchange Input [1]: [s_store_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#3] @@ -95,104 +95,104 @@ Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8] (14) Scan parquet default.household_demographics -Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] (16) Filter [codegen id : 3] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] -Condition : (((((isnotnull(hd_vehicle_count#14) AND isnotnull(hd_dep_count#13)) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND ((cast(hd_dep_count#13 as double) / cast(hd_vehicle_count#14 as double)) > 1.0)) AND isnotnull(hd_demo_sk#11)) +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] +Condition : (((((isnotnull(hd_vehicle_count#13) AND isnotnull(hd_dep_count#12)) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = unknown ))) AND (hd_vehicle_count#13 > 0)) AND ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.0)) AND isnotnull(hd_demo_sk#10)) (17) Project [codegen id : 3] -Output [1]: [hd_demo_sk#11] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Output [1]: [hd_demo_sk#10] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] (18) BroadcastExchange -Input [1]: [hd_demo_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Input [1]: [hd_demo_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#11] +Right keys [1]: [hd_demo_sk#10] Join condition: None (20) Project [codegen id : 4] Output [2]: [ss_customer_sk#1, ss_ticket_number#4] -Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#10] (21) HashAggregate [codegen id : 4] Input [2]: [ss_customer_sk#1, ss_ticket_number#4] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#16] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] +Aggregate Attributes [1]: [count#14] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] (22) Exchange -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] -Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] (23) HashAggregate [codegen id : 5] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#19] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#19 AS cnt#20] +Aggregate Attributes [1]: [count(1)#16] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#16 AS cnt#17] (24) Filter [codegen id : 5] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20] -Condition : ((cnt#20 >= 1) AND (cnt#20 <= 5)) +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17] +Condition : ((cnt#17 >= 1) AND (cnt#17 <= 5)) (25) Exchange -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20] -Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17] +Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4] (26) Sort [codegen id : 6] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17] Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0 (27) Scan parquet default.customer -Output [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] +Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 7] -Input [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] (29) Filter [codegen id : 7] -Input [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] -Condition : isnotnull(c_customer_sk#22) +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Condition : isnotnull(c_customer_sk#18) (30) Exchange -Input [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] -Arguments: hashpartitioning(c_customer_sk#22, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Arguments: hashpartitioning(c_customer_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=5] (31) Sort [codegen id : 8] -Input [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] -Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0 (32) SortMergeJoin [codegen id : 9] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#22] +Right keys [1]: [c_customer_sk#18] Join condition: None (33) Project [codegen id : 9] -Output [6]: [c_last_name#25, c_first_name#24, c_salutation#23, c_preferred_cust_flag#26, ss_ticket_number#4, cnt#20] -Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20, c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] +Output [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17, c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] (34) Exchange -Input [6]: [c_last_name#25, c_first_name#24, c_salutation#23, c_preferred_cust_flag#26, ss_ticket_number#4, cnt#20] -Arguments: rangepartitioning(cnt#20 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: rangepartitioning(cnt#17 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=6] (35) Sort [codegen id : 10] -Input [6]: [c_last_name#25, c_first_name#24, c_salutation#23, c_preferred_cust_flag#26, ss_ticket_number#4, cnt#20] -Arguments: [cnt#20 DESC NULLS LAST], true, 0 +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: [cnt#17 DESC NULLS LAST], true, 0 ===== Subqueries ===== @@ -205,25 +205,25 @@ BroadcastExchange (40) (36) Scan parquet default.date_dim -Output [3]: [d_date_sk#7, d_year#29, d_dom#30] +Output [3]: [d_date_sk#7, d_year#23, d_dom#24] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] ReadSchema: struct (37) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#29, d_dom#30] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] (38) Filter [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#29, d_dom#30] -Condition : ((((isnotnull(d_dom#30) AND (d_dom#30 >= 1)) AND (d_dom#30 <= 2)) AND d_year#29 IN (1999,2000,2001)) AND isnotnull(d_date_sk#7)) +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] +Condition : ((((isnotnull(d_dom#24) AND (d_dom#24 >= 1)) AND (d_dom#24 <= 2)) AND d_year#23 IN (1999,2000,2001)) AND isnotnull(d_date_sk#7)) (39) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [3]: [d_date_sk#7, d_year#29, d_dom#30] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] (40) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#31] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt index e3a3d67bd653c..70e0397d07a0f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt @@ -80,7 +80,7 @@ Input [2]: [s_store_sk#8, s_county#9] (11) BroadcastExchange Input [1]: [s_store_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#3] @@ -92,92 +92,92 @@ Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8] (14) Scan parquet default.household_demographics -Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] (16) Filter [codegen id : 3] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] -Condition : (((((isnotnull(hd_vehicle_count#14) AND isnotnull(hd_dep_count#13)) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND ((cast(hd_dep_count#13 as double) / cast(hd_vehicle_count#14 as double)) > 1.0)) AND isnotnull(hd_demo_sk#11)) +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] +Condition : (((((isnotnull(hd_vehicle_count#13) AND isnotnull(hd_dep_count#12)) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = unknown ))) AND (hd_vehicle_count#13 > 0)) AND ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.0)) AND isnotnull(hd_demo_sk#10)) (17) Project [codegen id : 3] -Output [1]: [hd_demo_sk#11] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Output [1]: [hd_demo_sk#10] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] (18) BroadcastExchange -Input [1]: [hd_demo_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Input [1]: [hd_demo_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#11] +Right keys [1]: [hd_demo_sk#10] Join condition: None (20) Project [codegen id : 4] Output [2]: [ss_customer_sk#1, ss_ticket_number#4] -Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#10] (21) HashAggregate [codegen id : 4] Input [2]: [ss_customer_sk#1, ss_ticket_number#4] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#16] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] +Aggregate Attributes [1]: [count#14] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] (22) Exchange -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] -Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] (23) HashAggregate [codegen id : 6] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#19] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#19 AS cnt#20] +Aggregate Attributes [1]: [count(1)#16] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#16 AS cnt#17] (24) Filter [codegen id : 6] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20] -Condition : ((cnt#20 >= 1) AND (cnt#20 <= 5)) +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17] +Condition : ((cnt#17 >= 1) AND (cnt#17 <= 5)) (25) Scan parquet default.customer -Output [5]: [c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] +Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 5] -Input [5]: [c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] (27) Filter [codegen id : 5] -Input [5]: [c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] -Condition : isnotnull(c_customer_sk#21) +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Condition : isnotnull(c_customer_sk#18) (28) BroadcastExchange -Input [5]: [c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (29) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#21] +Right keys [1]: [c_customer_sk#18] Join condition: None (30) Project [codegen id : 6] -Output [6]: [c_last_name#24, c_first_name#23, c_salutation#22, c_preferred_cust_flag#25, ss_ticket_number#4, cnt#20] -Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20, c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] +Output [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17, c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] (31) Exchange -Input [6]: [c_last_name#24, c_first_name#23, c_salutation#22, c_preferred_cust_flag#25, ss_ticket_number#4, cnt#20] -Arguments: rangepartitioning(cnt#20 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: rangepartitioning(cnt#17 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=5] (32) Sort [codegen id : 7] -Input [6]: [c_last_name#24, c_first_name#23, c_salutation#22, c_preferred_cust_flag#25, ss_ticket_number#4, cnt#20] -Arguments: [cnt#20 DESC NULLS LAST], true, 0 +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: [cnt#17 DESC NULLS LAST], true, 0 ===== Subqueries ===== @@ -190,25 +190,25 @@ BroadcastExchange (37) (33) Scan parquet default.date_dim -Output [3]: [d_date_sk#7, d_year#28, d_dom#29] +Output [3]: [d_date_sk#7, d_year#23, d_dom#24] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_dom), GreaterThanOrEqual(d_dom,1), LessThanOrEqual(d_dom,2), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] ReadSchema: struct (34) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#28, d_dom#29] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] (35) Filter [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#28, d_dom#29] -Condition : ((((isnotnull(d_dom#29) AND (d_dom#29 >= 1)) AND (d_dom#29 <= 2)) AND d_year#28 IN (1999,2000,2001)) AND isnotnull(d_date_sk#7)) +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] +Condition : ((((isnotnull(d_dom#24) AND (d_dom#24 >= 1)) AND (d_dom#24 <= 2)) AND d_year#23 IN (1999,2000,2001)) AND isnotnull(d_date_sk#7)) (36) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [3]: [d_date_sk#7, d_year#28, d_dom#29] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] (37) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/explain.txt index 9284172139688..63ea84152629e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/explain.txt @@ -76,7 +76,7 @@ Condition : isnotnull(d_date_sk#5) (7) BroadcastExchange Input [3]: [d_date_sk#5, d_year#6, d_qoy#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_sold_date_sk#4] @@ -88,161 +88,161 @@ Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, d_year#6, d_qoy# Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#5, d_year#6, d_qoy#7] (10) Scan parquet default.item -Output [2]: [i_item_sk#9, i_category#10] +Output [2]: [i_item_sk#8, i_category#9] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [2]: [i_item_sk#9, i_category#10] +Input [2]: [i_item_sk#8, i_category#9] (12) Filter [codegen id : 2] -Input [2]: [i_item_sk#9, i_category#10] -Condition : isnotnull(i_item_sk#9) +Input [2]: [i_item_sk#8, i_category#9] +Condition : isnotnull(i_item_sk#8) (13) BroadcastExchange -Input [2]: [i_item_sk#9, i_category#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Input [2]: [i_item_sk#8, i_category#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#9] +Right keys [1]: [i_item_sk#8] Join condition: None (15) Project [codegen id : 3] -Output [6]: [store AS channel#12, ss_store_sk#2 AS col_name#13, d_year#6, d_qoy#7, i_category#10, ss_ext_sales_price#3 AS ext_sales_price#14] -Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, d_year#6, d_qoy#7, i_item_sk#9, i_category#10] +Output [6]: [store AS channel#10, ss_store_sk#2 AS col_name#11, d_year#6, d_qoy#7, i_category#9, ss_ext_sales_price#3 AS ext_sales_price#12] +Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, d_year#6, d_qoy#7, i_item_sk#8, i_category#9] (16) Scan parquet default.web_sales -Output [4]: [ws_item_sk#15, ws_ship_customer_sk#16, ws_ext_sales_price#17, ws_sold_date_sk#18] +Output [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#18)] +PartitionFilters: [isnotnull(ws_sold_date_sk#16)] PushedFilters: [IsNull(ws_ship_customer_sk), IsNotNull(ws_item_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 4] -Input [4]: [ws_item_sk#15, ws_ship_customer_sk#16, ws_ext_sales_price#17, ws_sold_date_sk#18] +Input [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] (18) Filter [codegen id : 4] -Input [4]: [ws_item_sk#15, ws_ship_customer_sk#16, ws_ext_sales_price#17, ws_sold_date_sk#18] -Condition : (isnull(ws_ship_customer_sk#16) AND isnotnull(ws_item_sk#15)) +Input [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] +Condition : (isnull(ws_ship_customer_sk#14) AND isnotnull(ws_item_sk#13)) (19) BroadcastExchange -Input [4]: [ws_item_sk#15, ws_ship_customer_sk#16, ws_ext_sales_price#17, ws_sold_date_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[3, int, true] as bigint)),false), [id=#19] +Input [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[3, int, true] as bigint)),false), [plan_id=3] (20) Scan parquet default.date_dim -Output [3]: [d_date_sk#20, d_year#21, d_qoy#22] +Output [3]: [d_date_sk#17, d_year#18, d_qoy#19] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date_sk)] ReadSchema: struct (21) ColumnarToRow -Input [3]: [d_date_sk#20, d_year#21, d_qoy#22] +Input [3]: [d_date_sk#17, d_year#18, d_qoy#19] (22) Filter -Input [3]: [d_date_sk#20, d_year#21, d_qoy#22] -Condition : isnotnull(d_date_sk#20) +Input [3]: [d_date_sk#17, d_year#18, d_qoy#19] +Condition : isnotnull(d_date_sk#17) (23) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ws_sold_date_sk#18] -Right keys [1]: [d_date_sk#20] +Left keys [1]: [ws_sold_date_sk#16] +Right keys [1]: [d_date_sk#17] Join condition: None (24) Project [codegen id : 5] -Output [5]: [ws_item_sk#15, ws_ship_customer_sk#16, ws_ext_sales_price#17, d_year#21, d_qoy#22] -Input [7]: [ws_item_sk#15, ws_ship_customer_sk#16, ws_ext_sales_price#17, ws_sold_date_sk#18, d_date_sk#20, d_year#21, d_qoy#22] +Output [5]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, d_year#18, d_qoy#19] +Input [7]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, d_date_sk#17, d_year#18, d_qoy#19] (25) BroadcastExchange -Input [5]: [ws_item_sk#15, ws_ship_customer_sk#16, ws_ext_sales_price#17, d_year#21, d_qoy#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] +Input [5]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, d_year#18, d_qoy#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (26) Scan parquet default.item -Output [2]: [i_item_sk#24, i_category#25] +Output [2]: [i_item_sk#20, i_category#21] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (27) ColumnarToRow -Input [2]: [i_item_sk#24, i_category#25] +Input [2]: [i_item_sk#20, i_category#21] (28) Filter -Input [2]: [i_item_sk#24, i_category#25] -Condition : isnotnull(i_item_sk#24) +Input [2]: [i_item_sk#20, i_category#21] +Condition : isnotnull(i_item_sk#20) (29) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_item_sk#15] -Right keys [1]: [i_item_sk#24] +Left keys [1]: [ws_item_sk#13] +Right keys [1]: [i_item_sk#20] Join condition: None (30) Project [codegen id : 6] -Output [6]: [web AS channel#26, ws_ship_customer_sk#16 AS col_name#27, d_year#21, d_qoy#22, i_category#25, ws_ext_sales_price#17 AS ext_sales_price#28] -Input [7]: [ws_item_sk#15, ws_ship_customer_sk#16, ws_ext_sales_price#17, d_year#21, d_qoy#22, i_item_sk#24, i_category#25] +Output [6]: [web AS channel#22, ws_ship_customer_sk#14 AS col_name#23, d_year#18, d_qoy#19, i_category#21, ws_ext_sales_price#15 AS ext_sales_price#24] +Input [7]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, d_year#18, d_qoy#19, i_item_sk#20, i_category#21] (31) Scan parquet default.catalog_sales -Output [4]: [cs_ship_addr_sk#29, cs_item_sk#30, cs_ext_sales_price#31, cs_sold_date_sk#32] +Output [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#32)] +PartitionFilters: [isnotnull(cs_sold_date_sk#28)] PushedFilters: [IsNull(cs_ship_addr_sk), IsNotNull(cs_item_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 9] -Input [4]: [cs_ship_addr_sk#29, cs_item_sk#30, cs_ext_sales_price#31, cs_sold_date_sk#32] +Input [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] (33) Filter [codegen id : 9] -Input [4]: [cs_ship_addr_sk#29, cs_item_sk#30, cs_ext_sales_price#31, cs_sold_date_sk#32] -Condition : (isnull(cs_ship_addr_sk#29) AND isnotnull(cs_item_sk#30)) +Input [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] +Condition : (isnull(cs_ship_addr_sk#25) AND isnotnull(cs_item_sk#26)) (34) ReusedExchange [Reuses operator id: 7] -Output [3]: [d_date_sk#33, d_year#34, d_qoy#35] +Output [3]: [d_date_sk#29, d_year#30, d_qoy#31] (35) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [cs_sold_date_sk#32] -Right keys [1]: [d_date_sk#33] +Left keys [1]: [cs_sold_date_sk#28] +Right keys [1]: [d_date_sk#29] Join condition: None (36) Project [codegen id : 9] -Output [5]: [cs_ship_addr_sk#29, cs_item_sk#30, cs_ext_sales_price#31, d_year#34, d_qoy#35] -Input [7]: [cs_ship_addr_sk#29, cs_item_sk#30, cs_ext_sales_price#31, cs_sold_date_sk#32, d_date_sk#33, d_year#34, d_qoy#35] +Output [5]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, d_year#30, d_qoy#31] +Input [7]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28, d_date_sk#29, d_year#30, d_qoy#31] (37) ReusedExchange [Reuses operator id: 13] -Output [2]: [i_item_sk#36, i_category#37] +Output [2]: [i_item_sk#32, i_category#33] (38) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [cs_item_sk#30] -Right keys [1]: [i_item_sk#36] +Left keys [1]: [cs_item_sk#26] +Right keys [1]: [i_item_sk#32] Join condition: None (39) Project [codegen id : 9] -Output [6]: [catalog AS channel#38, cs_ship_addr_sk#29 AS col_name#39, d_year#34, d_qoy#35, i_category#37, cs_ext_sales_price#31 AS ext_sales_price#40] -Input [7]: [cs_ship_addr_sk#29, cs_item_sk#30, cs_ext_sales_price#31, d_year#34, d_qoy#35, i_item_sk#36, i_category#37] +Output [6]: [catalog AS channel#34, cs_ship_addr_sk#25 AS col_name#35, d_year#30, d_qoy#31, i_category#33, cs_ext_sales_price#27 AS ext_sales_price#36] +Input [7]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, d_year#30, d_qoy#31, i_item_sk#32, i_category#33] (40) Union (41) HashAggregate [codegen id : 10] -Input [6]: [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10, ext_sales_price#14] -Keys [5]: [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10] -Functions [2]: [partial_count(1), partial_sum(UnscaledValue(ext_sales_price#14))] -Aggregate Attributes [2]: [count#41, sum#42] -Results [7]: [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10, count#43, sum#44] +Input [6]: [channel#10, col_name#11, d_year#6, d_qoy#7, i_category#9, ext_sales_price#12] +Keys [5]: [channel#10, col_name#11, d_year#6, d_qoy#7, i_category#9] +Functions [2]: [partial_count(1), partial_sum(UnscaledValue(ext_sales_price#12))] +Aggregate Attributes [2]: [count#37, sum#38] +Results [7]: [channel#10, col_name#11, d_year#6, d_qoy#7, i_category#9, count#39, sum#40] (42) Exchange -Input [7]: [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10, count#43, sum#44] -Arguments: hashpartitioning(channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10, 5), ENSURE_REQUIREMENTS, [id=#45] +Input [7]: [channel#10, col_name#11, d_year#6, d_qoy#7, i_category#9, count#39, sum#40] +Arguments: hashpartitioning(channel#10, col_name#11, d_year#6, d_qoy#7, i_category#9, 5), ENSURE_REQUIREMENTS, [plan_id=5] (43) HashAggregate [codegen id : 11] -Input [7]: [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10, count#43, sum#44] -Keys [5]: [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10] -Functions [2]: [count(1), sum(UnscaledValue(ext_sales_price#14))] -Aggregate Attributes [2]: [count(1)#46, sum(UnscaledValue(ext_sales_price#14))#47] -Results [7]: [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10, count(1)#46 AS sales_cnt#48, MakeDecimal(sum(UnscaledValue(ext_sales_price#14))#47,17,2) AS sales_amt#49] +Input [7]: [channel#10, col_name#11, d_year#6, d_qoy#7, i_category#9, count#39, sum#40] +Keys [5]: [channel#10, col_name#11, d_year#6, d_qoy#7, i_category#9] +Functions [2]: [count(1), sum(UnscaledValue(ext_sales_price#12))] +Aggregate Attributes [2]: [count(1)#41, sum(UnscaledValue(ext_sales_price#12))#42] +Results [7]: [channel#10, col_name#11, d_year#6, d_qoy#7, i_category#9, count(1)#41 AS sales_cnt#43, MakeDecimal(sum(UnscaledValue(ext_sales_price#12))#42,17,2) AS sales_amt#44] (44) TakeOrderedAndProject -Input [7]: [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10, sales_cnt#48, sales_amt#49] -Arguments: 100, [channel#12 ASC NULLS FIRST, col_name#13 ASC NULLS FIRST, d_year#6 ASC NULLS FIRST, d_qoy#7 ASC NULLS FIRST, i_category#10 ASC NULLS FIRST], [channel#12, col_name#13, d_year#6, d_qoy#7, i_category#10, sales_cnt#48, sales_amt#49] +Input [7]: [channel#10, col_name#11, d_year#6, d_qoy#7, i_category#9, sales_cnt#43, sales_amt#44] +Arguments: 100, [channel#10 ASC NULLS FIRST, col_name#11 ASC NULLS FIRST, d_year#6 ASC NULLS FIRST, d_qoy#7 ASC NULLS FIRST, i_category#9 ASC NULLS FIRST], [channel#10, col_name#11, d_year#6, d_qoy#7, i_category#9, sales_cnt#43, sales_amt#44] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt index 026c9396cd025..9a284c06f2b01 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt @@ -70,7 +70,7 @@ Condition : isnotnull(i_item_sk#5) (7) BroadcastExchange Input [2]: [i_item_sk#5, i_category#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#1] @@ -82,131 +82,131 @@ Output [4]: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category# Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_item_sk#5, i_category#6] (10) Scan parquet default.date_dim -Output [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Output [3]: [d_date_sk#7, d_year#8, d_qoy#9] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] (12) Filter [codegen id : 2] -Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] -Condition : isnotnull(d_date_sk#8) +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Condition : isnotnull(d_date_sk#7) (13) BroadcastExchange -Input [3]: [d_date_sk#8, d_year#9, d_qoy#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Input [3]: [d_date_sk#7, d_year#8, d_qoy#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_sold_date_sk#4] -Right keys [1]: [d_date_sk#8] +Right keys [1]: [d_date_sk#7] Join condition: None (15) Project [codegen id : 3] -Output [6]: [store AS channel#12, ss_store_sk#2 AS col_name#13, d_year#9, d_qoy#10, i_category#6, ss_ext_sales_price#3 AS ext_sales_price#14] -Input [7]: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6, d_date_sk#8, d_year#9, d_qoy#10] +Output [6]: [store AS channel#10, ss_store_sk#2 AS col_name#11, d_year#8, d_qoy#9, i_category#6, ss_ext_sales_price#3 AS ext_sales_price#12] +Input [7]: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6, d_date_sk#7, d_year#8, d_qoy#9] (16) Scan parquet default.web_sales -Output [4]: [ws_item_sk#15, ws_ship_customer_sk#16, ws_ext_sales_price#17, ws_sold_date_sk#18] +Output [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#18)] +PartitionFilters: [isnotnull(ws_sold_date_sk#16)] PushedFilters: [IsNull(ws_ship_customer_sk), IsNotNull(ws_item_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 6] -Input [4]: [ws_item_sk#15, ws_ship_customer_sk#16, ws_ext_sales_price#17, ws_sold_date_sk#18] +Input [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] (18) Filter [codegen id : 6] -Input [4]: [ws_item_sk#15, ws_ship_customer_sk#16, ws_ext_sales_price#17, ws_sold_date_sk#18] -Condition : (isnull(ws_ship_customer_sk#16) AND isnotnull(ws_item_sk#15)) +Input [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16] +Condition : (isnull(ws_ship_customer_sk#14) AND isnotnull(ws_item_sk#13)) (19) ReusedExchange [Reuses operator id: 7] -Output [2]: [i_item_sk#19, i_category#20] +Output [2]: [i_item_sk#17, i_category#18] (20) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_item_sk#15] -Right keys [1]: [i_item_sk#19] +Left keys [1]: [ws_item_sk#13] +Right keys [1]: [i_item_sk#17] Join condition: None (21) Project [codegen id : 6] -Output [4]: [ws_ship_customer_sk#16, ws_ext_sales_price#17, ws_sold_date_sk#18, i_category#20] -Input [6]: [ws_item_sk#15, ws_ship_customer_sk#16, ws_ext_sales_price#17, ws_sold_date_sk#18, i_item_sk#19, i_category#20] +Output [4]: [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18] +Input [6]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_item_sk#17, i_category#18] (22) ReusedExchange [Reuses operator id: 13] -Output [3]: [d_date_sk#21, d_year#22, d_qoy#23] +Output [3]: [d_date_sk#19, d_year#20, d_qoy#21] (23) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#18] -Right keys [1]: [d_date_sk#21] +Left keys [1]: [ws_sold_date_sk#16] +Right keys [1]: [d_date_sk#19] Join condition: None (24) Project [codegen id : 6] -Output [6]: [web AS channel#24, ws_ship_customer_sk#16 AS col_name#25, d_year#22, d_qoy#23, i_category#20, ws_ext_sales_price#17 AS ext_sales_price#26] -Input [7]: [ws_ship_customer_sk#16, ws_ext_sales_price#17, ws_sold_date_sk#18, i_category#20, d_date_sk#21, d_year#22, d_qoy#23] +Output [6]: [web AS channel#22, ws_ship_customer_sk#14 AS col_name#23, d_year#20, d_qoy#21, i_category#18, ws_ext_sales_price#15 AS ext_sales_price#24] +Input [7]: [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18, d_date_sk#19, d_year#20, d_qoy#21] (25) Scan parquet default.catalog_sales -Output [4]: [cs_ship_addr_sk#27, cs_item_sk#28, cs_ext_sales_price#29, cs_sold_date_sk#30] +Output [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#30)] +PartitionFilters: [isnotnull(cs_sold_date_sk#28)] PushedFilters: [IsNull(cs_ship_addr_sk), IsNotNull(cs_item_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 9] -Input [4]: [cs_ship_addr_sk#27, cs_item_sk#28, cs_ext_sales_price#29, cs_sold_date_sk#30] +Input [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] (27) Filter [codegen id : 9] -Input [4]: [cs_ship_addr_sk#27, cs_item_sk#28, cs_ext_sales_price#29, cs_sold_date_sk#30] -Condition : (isnull(cs_ship_addr_sk#27) AND isnotnull(cs_item_sk#28)) +Input [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28] +Condition : (isnull(cs_ship_addr_sk#25) AND isnotnull(cs_item_sk#26)) (28) ReusedExchange [Reuses operator id: 7] -Output [2]: [i_item_sk#31, i_category#32] +Output [2]: [i_item_sk#29, i_category#30] (29) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [cs_item_sk#28] -Right keys [1]: [i_item_sk#31] +Left keys [1]: [cs_item_sk#26] +Right keys [1]: [i_item_sk#29] Join condition: None (30) Project [codegen id : 9] -Output [4]: [cs_ship_addr_sk#27, cs_ext_sales_price#29, cs_sold_date_sk#30, i_category#32] -Input [6]: [cs_ship_addr_sk#27, cs_item_sk#28, cs_ext_sales_price#29, cs_sold_date_sk#30, i_item_sk#31, i_category#32] +Output [4]: [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30] +Input [6]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28, i_item_sk#29, i_category#30] (31) ReusedExchange [Reuses operator id: 13] -Output [3]: [d_date_sk#33, d_year#34, d_qoy#35] +Output [3]: [d_date_sk#31, d_year#32, d_qoy#33] (32) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [cs_sold_date_sk#30] -Right keys [1]: [d_date_sk#33] +Left keys [1]: [cs_sold_date_sk#28] +Right keys [1]: [d_date_sk#31] Join condition: None (33) Project [codegen id : 9] -Output [6]: [catalog AS channel#36, cs_ship_addr_sk#27 AS col_name#37, d_year#34, d_qoy#35, i_category#32, cs_ext_sales_price#29 AS ext_sales_price#38] -Input [7]: [cs_ship_addr_sk#27, cs_ext_sales_price#29, cs_sold_date_sk#30, i_category#32, d_date_sk#33, d_year#34, d_qoy#35] +Output [6]: [catalog AS channel#34, cs_ship_addr_sk#25 AS col_name#35, d_year#32, d_qoy#33, i_category#30, cs_ext_sales_price#27 AS ext_sales_price#36] +Input [7]: [cs_ship_addr_sk#25, cs_ext_sales_price#27, cs_sold_date_sk#28, i_category#30, d_date_sk#31, d_year#32, d_qoy#33] (34) Union (35) HashAggregate [codegen id : 10] -Input [6]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, ext_sales_price#14] -Keys [5]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6] -Functions [2]: [partial_count(1), partial_sum(UnscaledValue(ext_sales_price#14))] -Aggregate Attributes [2]: [count#39, sum#40] -Results [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, count#41, sum#42] +Input [6]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, ext_sales_price#12] +Keys [5]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6] +Functions [2]: [partial_count(1), partial_sum(UnscaledValue(ext_sales_price#12))] +Aggregate Attributes [2]: [count#37, sum#38] +Results [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count#39, sum#40] (36) Exchange -Input [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, count#41, sum#42] -Arguments: hashpartitioning(channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, 5), ENSURE_REQUIREMENTS, [id=#43] +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count#39, sum#40] +Arguments: hashpartitioning(channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, 5), ENSURE_REQUIREMENTS, [plan_id=3] (37) HashAggregate [codegen id : 11] -Input [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, count#41, sum#42] -Keys [5]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6] -Functions [2]: [count(1), sum(UnscaledValue(ext_sales_price#14))] -Aggregate Attributes [2]: [count(1)#44, sum(UnscaledValue(ext_sales_price#14))#45] -Results [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, count(1)#44 AS sales_cnt#46, MakeDecimal(sum(UnscaledValue(ext_sales_price#14))#45,17,2) AS sales_amt#47] +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count#39, sum#40] +Keys [5]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6] +Functions [2]: [count(1), sum(UnscaledValue(ext_sales_price#12))] +Aggregate Attributes [2]: [count(1)#41, sum(UnscaledValue(ext_sales_price#12))#42] +Results [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, count(1)#41 AS sales_cnt#43, MakeDecimal(sum(UnscaledValue(ext_sales_price#12))#42,17,2) AS sales_amt#44] (38) TakeOrderedAndProject -Input [7]: [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, sales_cnt#46, sales_amt#47] -Arguments: 100, [channel#12 ASC NULLS FIRST, col_name#13 ASC NULLS FIRST, d_year#9 ASC NULLS FIRST, d_qoy#10 ASC NULLS FIRST, i_category#6 ASC NULLS FIRST], [channel#12, col_name#13, d_year#9, d_qoy#10, i_category#6, sales_cnt#46, sales_amt#47] +Input [7]: [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, sales_cnt#43, sales_amt#44] +Arguments: 100, [channel#10 ASC NULLS FIRST, col_name#11 ASC NULLS FIRST, d_year#8 ASC NULLS FIRST, d_qoy#9 ASC NULLS FIRST, i_category#6 ASC NULLS FIRST], [channel#10, col_name#11, d_year#8, d_qoy#9, i_category#6, sales_cnt#43, sales_amt#44] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/explain.txt index 04a0ca4cd3027..ed253eecc3782 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/explain.txt @@ -129,7 +129,7 @@ Condition : isnotnull(s_store_sk#7) (10) BroadcastExchange Input [1]: [s_store_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_store_sk#1] @@ -144,350 +144,350 @@ Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] Input [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] Keys [1]: [s_store_sk#7] Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#2)), partial_sum(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [2]: [sum#9, sum#10] -Results [3]: [s_store_sk#7, sum#11, sum#12] +Aggregate Attributes [2]: [sum#8, sum#9] +Results [3]: [s_store_sk#7, sum#10, sum#11] (14) Exchange -Input [3]: [s_store_sk#7, sum#11, sum#12] -Arguments: hashpartitioning(s_store_sk#7, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [3]: [s_store_sk#7, sum#10, sum#11] +Arguments: hashpartitioning(s_store_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 8] -Input [3]: [s_store_sk#7, sum#11, sum#12] +Input [3]: [s_store_sk#7, sum#10, sum#11] Keys [1]: [s_store_sk#7] Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#2)), sum(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#2))#14, sum(UnscaledValue(ss_net_profit#3))#15] -Results [3]: [s_store_sk#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS sales#16, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#15,17,2) AS profit#17] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#2))#12, sum(UnscaledValue(ss_net_profit#3))#13] +Results [3]: [s_store_sk#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS sales#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#13,17,2) AS profit#15] (16) Scan parquet default.store_returns -Output [4]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, sr_returned_date_sk#21] +Output [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(sr_returned_date_sk#21), dynamicpruningexpression(sr_returned_date_sk#21 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(sr_returned_date_sk#19), dynamicpruningexpression(sr_returned_date_sk#19 IN dynamicpruning#5)] PushedFilters: [IsNotNull(sr_store_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 6] -Input [4]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, sr_returned_date_sk#21] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] (18) Filter [codegen id : 6] -Input [4]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, sr_returned_date_sk#21] -Condition : isnotnull(sr_store_sk#18) +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] +Condition : isnotnull(sr_store_sk#16) (19) ReusedExchange [Reuses operator id: 90] -Output [1]: [d_date_sk#22] +Output [1]: [d_date_sk#20] (20) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [sr_returned_date_sk#21] -Right keys [1]: [d_date_sk#22] +Left keys [1]: [sr_returned_date_sk#19] +Right keys [1]: [d_date_sk#20] Join condition: None (21) Project [codegen id : 6] -Output [3]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20] -Input [5]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, sr_returned_date_sk#21, d_date_sk#22] +Output [3]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18] +Input [5]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19, d_date_sk#20] (22) ReusedExchange [Reuses operator id: 10] -Output [1]: [s_store_sk#23] +Output [1]: [s_store_sk#21] (23) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [sr_store_sk#18] -Right keys [1]: [s_store_sk#23] +Left keys [1]: [sr_store_sk#16] +Right keys [1]: [s_store_sk#21] Join condition: None (24) Project [codegen id : 6] -Output [3]: [sr_return_amt#19, sr_net_loss#20, s_store_sk#23] -Input [4]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, s_store_sk#23] +Output [3]: [sr_return_amt#17, sr_net_loss#18, s_store_sk#21] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, s_store_sk#21] (25) HashAggregate [codegen id : 6] -Input [3]: [sr_return_amt#19, sr_net_loss#20, s_store_sk#23] -Keys [1]: [s_store_sk#23] -Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#19)), partial_sum(UnscaledValue(sr_net_loss#20))] -Aggregate Attributes [2]: [sum#24, sum#25] -Results [3]: [s_store_sk#23, sum#26, sum#27] +Input [3]: [sr_return_amt#17, sr_net_loss#18, s_store_sk#21] +Keys [1]: [s_store_sk#21] +Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#17)), partial_sum(UnscaledValue(sr_net_loss#18))] +Aggregate Attributes [2]: [sum#22, sum#23] +Results [3]: [s_store_sk#21, sum#24, sum#25] (26) Exchange -Input [3]: [s_store_sk#23, sum#26, sum#27] -Arguments: hashpartitioning(s_store_sk#23, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [3]: [s_store_sk#21, sum#24, sum#25] +Arguments: hashpartitioning(s_store_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=3] (27) HashAggregate [codegen id : 7] -Input [3]: [s_store_sk#23, sum#26, sum#27] -Keys [1]: [s_store_sk#23] -Functions [2]: [sum(UnscaledValue(sr_return_amt#19)), sum(UnscaledValue(sr_net_loss#20))] -Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#19))#29, sum(UnscaledValue(sr_net_loss#20))#30] -Results [3]: [s_store_sk#23, MakeDecimal(sum(UnscaledValue(sr_return_amt#19))#29,17,2) AS returns#31, MakeDecimal(sum(UnscaledValue(sr_net_loss#20))#30,17,2) AS profit_loss#32] +Input [3]: [s_store_sk#21, sum#24, sum#25] +Keys [1]: [s_store_sk#21] +Functions [2]: [sum(UnscaledValue(sr_return_amt#17)), sum(UnscaledValue(sr_net_loss#18))] +Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#17))#26, sum(UnscaledValue(sr_net_loss#18))#27] +Results [3]: [s_store_sk#21, MakeDecimal(sum(UnscaledValue(sr_return_amt#17))#26,17,2) AS returns#28, MakeDecimal(sum(UnscaledValue(sr_net_loss#18))#27,17,2) AS profit_loss#29] (28) BroadcastExchange -Input [3]: [s_store_sk#23, returns#31, profit_loss#32] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] +Input [3]: [s_store_sk#21, returns#28, profit_loss#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (29) BroadcastHashJoin [codegen id : 8] Left keys [1]: [s_store_sk#7] -Right keys [1]: [s_store_sk#23] +Right keys [1]: [s_store_sk#21] Join condition: None (30) Project [codegen id : 8] -Output [5]: [sales#16, coalesce(returns#31, 0.00) AS returns#34, CheckOverflow((promote_precision(cast(profit#17 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#35, store channel AS channel#36, s_store_sk#7 AS id#37] -Input [6]: [s_store_sk#7, sales#16, profit#17, s_store_sk#23, returns#31, profit_loss#32] +Output [5]: [sales#14, coalesce(returns#28, 0.00) AS returns#30, CheckOverflow((promote_precision(cast(profit#15 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#29, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#31, store channel AS channel#32, s_store_sk#7 AS id#33] +Input [6]: [s_store_sk#7, sales#14, profit#15, s_store_sk#21, returns#28, profit_loss#29] (31) Scan parquet default.catalog_sales -Output [4]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41] +Output [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#41), dynamicpruningexpression(cs_sold_date_sk#41 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#37), dynamicpruningexpression(cs_sold_date_sk#37 IN dynamicpruning#5)] ReadSchema: struct (32) ColumnarToRow [codegen id : 10] -Input [4]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41] +Input [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37] (33) ReusedExchange [Reuses operator id: 90] -Output [1]: [d_date_sk#42] +Output [1]: [d_date_sk#38] (34) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#41] -Right keys [1]: [d_date_sk#42] +Left keys [1]: [cs_sold_date_sk#37] +Right keys [1]: [d_date_sk#38] Join condition: None (35) Project [codegen id : 10] -Output [3]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40] -Input [5]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41, d_date_sk#42] +Output [3]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36] +Input [5]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37, d_date_sk#38] (36) HashAggregate [codegen id : 10] -Input [3]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40] -Keys [1]: [cs_call_center_sk#38] -Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#39)), partial_sum(UnscaledValue(cs_net_profit#40))] -Aggregate Attributes [2]: [sum#43, sum#44] -Results [3]: [cs_call_center_sk#38, sum#45, sum#46] +Input [3]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36] +Keys [1]: [cs_call_center_sk#34] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#35)), partial_sum(UnscaledValue(cs_net_profit#36))] +Aggregate Attributes [2]: [sum#39, sum#40] +Results [3]: [cs_call_center_sk#34, sum#41, sum#42] (37) Exchange -Input [3]: [cs_call_center_sk#38, sum#45, sum#46] -Arguments: hashpartitioning(cs_call_center_sk#38, 5), ENSURE_REQUIREMENTS, [id=#47] +Input [3]: [cs_call_center_sk#34, sum#41, sum#42] +Arguments: hashpartitioning(cs_call_center_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=5] (38) HashAggregate [codegen id : 14] -Input [3]: [cs_call_center_sk#38, sum#45, sum#46] -Keys [1]: [cs_call_center_sk#38] -Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#39)), sum(UnscaledValue(cs_net_profit#40))] -Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#39))#48, sum(UnscaledValue(cs_net_profit#40))#49] -Results [3]: [cs_call_center_sk#38, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#39))#48,17,2) AS sales#50, MakeDecimal(sum(UnscaledValue(cs_net_profit#40))#49,17,2) AS profit#51] +Input [3]: [cs_call_center_sk#34, sum#41, sum#42] +Keys [1]: [cs_call_center_sk#34] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#35)), sum(UnscaledValue(cs_net_profit#36))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#35))#43, sum(UnscaledValue(cs_net_profit#36))#44] +Results [3]: [cs_call_center_sk#34, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#35))#43,17,2) AS sales#45, MakeDecimal(sum(UnscaledValue(cs_net_profit#36))#44,17,2) AS profit#46] (39) Scan parquet default.catalog_returns -Output [3]: [cr_return_amount#52, cr_net_loss#53, cr_returned_date_sk#54] +Output [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cr_returned_date_sk#54), dynamicpruningexpression(cr_returned_date_sk#54 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cr_returned_date_sk#49), dynamicpruningexpression(cr_returned_date_sk#49 IN dynamicpruning#5)] ReadSchema: struct (40) ColumnarToRow [codegen id : 12] -Input [3]: [cr_return_amount#52, cr_net_loss#53, cr_returned_date_sk#54] +Input [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49] (41) ReusedExchange [Reuses operator id: 90] -Output [1]: [d_date_sk#55] +Output [1]: [d_date_sk#50] (42) BroadcastHashJoin [codegen id : 12] -Left keys [1]: [cr_returned_date_sk#54] -Right keys [1]: [d_date_sk#55] +Left keys [1]: [cr_returned_date_sk#49] +Right keys [1]: [d_date_sk#50] Join condition: None (43) Project [codegen id : 12] -Output [2]: [cr_return_amount#52, cr_net_loss#53] -Input [4]: [cr_return_amount#52, cr_net_loss#53, cr_returned_date_sk#54, d_date_sk#55] +Output [2]: [cr_return_amount#47, cr_net_loss#48] +Input [4]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49, d_date_sk#50] (44) HashAggregate [codegen id : 12] -Input [2]: [cr_return_amount#52, cr_net_loss#53] +Input [2]: [cr_return_amount#47, cr_net_loss#48] Keys: [] -Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#52)), partial_sum(UnscaledValue(cr_net_loss#53))] -Aggregate Attributes [2]: [sum#56, sum#57] -Results [2]: [sum#58, sum#59] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#47)), partial_sum(UnscaledValue(cr_net_loss#48))] +Aggregate Attributes [2]: [sum#51, sum#52] +Results [2]: [sum#53, sum#54] (45) Exchange -Input [2]: [sum#58, sum#59] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#60] +Input [2]: [sum#53, sum#54] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] (46) HashAggregate [codegen id : 13] -Input [2]: [sum#58, sum#59] +Input [2]: [sum#53, sum#54] Keys: [] -Functions [2]: [sum(UnscaledValue(cr_return_amount#52)), sum(UnscaledValue(cr_net_loss#53))] -Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#52))#61, sum(UnscaledValue(cr_net_loss#53))#62] -Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#52))#61,17,2) AS returns#63, MakeDecimal(sum(UnscaledValue(cr_net_loss#53))#62,17,2) AS profit_loss#64] +Functions [2]: [sum(UnscaledValue(cr_return_amount#47)), sum(UnscaledValue(cr_net_loss#48))] +Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#47))#55, sum(UnscaledValue(cr_net_loss#48))#56] +Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#47))#55,17,2) AS returns#57, MakeDecimal(sum(UnscaledValue(cr_net_loss#48))#56,17,2) AS profit_loss#58] (47) BroadcastExchange -Input [2]: [returns#63, profit_loss#64] -Arguments: IdentityBroadcastMode, [id=#65] +Input [2]: [returns#57, profit_loss#58] +Arguments: IdentityBroadcastMode, [plan_id=7] (48) BroadcastNestedLoopJoin [codegen id : 14] Join condition: None (49) Project [codegen id : 14] -Output [5]: [sales#50, returns#63, CheckOverflow((promote_precision(cast(profit#51 as decimal(18,2))) - promote_precision(cast(profit_loss#64 as decimal(18,2)))), DecimalType(18,2)) AS profit#66, catalog channel AS channel#67, cs_call_center_sk#38 AS id#68] -Input [5]: [cs_call_center_sk#38, sales#50, profit#51, returns#63, profit_loss#64] +Output [5]: [sales#45, returns#57, CheckOverflow((promote_precision(cast(profit#46 as decimal(18,2))) - promote_precision(cast(profit_loss#58 as decimal(18,2)))), DecimalType(18,2)) AS profit#59, catalog channel AS channel#60, cs_call_center_sk#34 AS id#61] +Input [5]: [cs_call_center_sk#34, sales#45, profit#46, returns#57, profit_loss#58] (50) Scan parquet default.web_sales -Output [4]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, ws_sold_date_sk#72] +Output [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#72), dynamicpruningexpression(ws_sold_date_sk#72 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#65), dynamicpruningexpression(ws_sold_date_sk#65 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_web_page_sk)] ReadSchema: struct (51) ColumnarToRow [codegen id : 17] -Input [4]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, ws_sold_date_sk#72] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] (52) Filter [codegen id : 17] -Input [4]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, ws_sold_date_sk#72] -Condition : isnotnull(ws_web_page_sk#69) +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] +Condition : isnotnull(ws_web_page_sk#62) (53) ReusedExchange [Reuses operator id: 90] -Output [1]: [d_date_sk#73] +Output [1]: [d_date_sk#66] (54) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_sold_date_sk#72] -Right keys [1]: [d_date_sk#73] +Left keys [1]: [ws_sold_date_sk#65] +Right keys [1]: [d_date_sk#66] Join condition: None (55) Project [codegen id : 17] -Output [3]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71] -Input [5]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, ws_sold_date_sk#72, d_date_sk#73] +Output [3]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64] +Input [5]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65, d_date_sk#66] (56) Scan parquet default.web_page -Output [1]: [wp_web_page_sk#74] +Output [1]: [wp_web_page_sk#67] Batched: true Location [not included in comparison]/{warehouse_dir}/web_page] PushedFilters: [IsNotNull(wp_web_page_sk)] ReadSchema: struct (57) ColumnarToRow [codegen id : 16] -Input [1]: [wp_web_page_sk#74] +Input [1]: [wp_web_page_sk#67] (58) Filter [codegen id : 16] -Input [1]: [wp_web_page_sk#74] -Condition : isnotnull(wp_web_page_sk#74) +Input [1]: [wp_web_page_sk#67] +Condition : isnotnull(wp_web_page_sk#67) (59) BroadcastExchange -Input [1]: [wp_web_page_sk#74] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#75] +Input [1]: [wp_web_page_sk#67] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] (60) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_web_page_sk#69] -Right keys [1]: [wp_web_page_sk#74] +Left keys [1]: [ws_web_page_sk#62] +Right keys [1]: [wp_web_page_sk#67] Join condition: None (61) Project [codegen id : 17] -Output [3]: [ws_ext_sales_price#70, ws_net_profit#71, wp_web_page_sk#74] -Input [4]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, wp_web_page_sk#74] +Output [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] (62) HashAggregate [codegen id : 17] -Input [3]: [ws_ext_sales_price#70, ws_net_profit#71, wp_web_page_sk#74] -Keys [1]: [wp_web_page_sk#74] -Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#70)), partial_sum(UnscaledValue(ws_net_profit#71))] -Aggregate Attributes [2]: [sum#76, sum#77] -Results [3]: [wp_web_page_sk#74, sum#78, sum#79] +Input [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] +Keys [1]: [wp_web_page_sk#67] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#63)), partial_sum(UnscaledValue(ws_net_profit#64))] +Aggregate Attributes [2]: [sum#68, sum#69] +Results [3]: [wp_web_page_sk#67, sum#70, sum#71] (63) Exchange -Input [3]: [wp_web_page_sk#74, sum#78, sum#79] -Arguments: hashpartitioning(wp_web_page_sk#74, 5), ENSURE_REQUIREMENTS, [id=#80] +Input [3]: [wp_web_page_sk#67, sum#70, sum#71] +Arguments: hashpartitioning(wp_web_page_sk#67, 5), ENSURE_REQUIREMENTS, [plan_id=9] (64) HashAggregate [codegen id : 22] -Input [3]: [wp_web_page_sk#74, sum#78, sum#79] -Keys [1]: [wp_web_page_sk#74] -Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#70)), sum(UnscaledValue(ws_net_profit#71))] -Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#70))#81, sum(UnscaledValue(ws_net_profit#71))#82] -Results [3]: [wp_web_page_sk#74, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#70))#81,17,2) AS sales#83, MakeDecimal(sum(UnscaledValue(ws_net_profit#71))#82,17,2) AS profit#84] +Input [3]: [wp_web_page_sk#67, sum#70, sum#71] +Keys [1]: [wp_web_page_sk#67] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#63)), sum(UnscaledValue(ws_net_profit#64))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#63))#72, sum(UnscaledValue(ws_net_profit#64))#73] +Results [3]: [wp_web_page_sk#67, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#63))#72,17,2) AS sales#74, MakeDecimal(sum(UnscaledValue(ws_net_profit#64))#73,17,2) AS profit#75] (65) Scan parquet default.web_returns -Output [4]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] +Output [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(wr_returned_date_sk#88), dynamicpruningexpression(wr_returned_date_sk#88 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(wr_returned_date_sk#79), dynamicpruningexpression(wr_returned_date_sk#79 IN dynamicpruning#5)] PushedFilters: [IsNotNull(wr_web_page_sk)] ReadSchema: struct (66) ColumnarToRow [codegen id : 20] -Input [4]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] (67) Filter [codegen id : 20] -Input [4]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] -Condition : isnotnull(wr_web_page_sk#85) +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] +Condition : isnotnull(wr_web_page_sk#76) (68) ReusedExchange [Reuses operator id: 90] -Output [1]: [d_date_sk#89] +Output [1]: [d_date_sk#80] (69) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [wr_returned_date_sk#88] -Right keys [1]: [d_date_sk#89] +Left keys [1]: [wr_returned_date_sk#79] +Right keys [1]: [d_date_sk#80] Join condition: None (70) Project [codegen id : 20] -Output [3]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87] -Input [5]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88, d_date_sk#89] +Output [3]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78] +Input [5]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79, d_date_sk#80] (71) ReusedExchange [Reuses operator id: 59] -Output [1]: [wp_web_page_sk#90] +Output [1]: [wp_web_page_sk#81] (72) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [wr_web_page_sk#85] -Right keys [1]: [wp_web_page_sk#90] +Left keys [1]: [wr_web_page_sk#76] +Right keys [1]: [wp_web_page_sk#81] Join condition: None (73) Project [codegen id : 20] -Output [3]: [wr_return_amt#86, wr_net_loss#87, wp_web_page_sk#90] -Input [4]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wp_web_page_sk#90] +Output [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] (74) HashAggregate [codegen id : 20] -Input [3]: [wr_return_amt#86, wr_net_loss#87, wp_web_page_sk#90] -Keys [1]: [wp_web_page_sk#90] -Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#86)), partial_sum(UnscaledValue(wr_net_loss#87))] -Aggregate Attributes [2]: [sum#91, sum#92] -Results [3]: [wp_web_page_sk#90, sum#93, sum#94] +Input [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] +Keys [1]: [wp_web_page_sk#81] +Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#77)), partial_sum(UnscaledValue(wr_net_loss#78))] +Aggregate Attributes [2]: [sum#82, sum#83] +Results [3]: [wp_web_page_sk#81, sum#84, sum#85] (75) Exchange -Input [3]: [wp_web_page_sk#90, sum#93, sum#94] -Arguments: hashpartitioning(wp_web_page_sk#90, 5), ENSURE_REQUIREMENTS, [id=#95] +Input [3]: [wp_web_page_sk#81, sum#84, sum#85] +Arguments: hashpartitioning(wp_web_page_sk#81, 5), ENSURE_REQUIREMENTS, [plan_id=10] (76) HashAggregate [codegen id : 21] -Input [3]: [wp_web_page_sk#90, sum#93, sum#94] -Keys [1]: [wp_web_page_sk#90] -Functions [2]: [sum(UnscaledValue(wr_return_amt#86)), sum(UnscaledValue(wr_net_loss#87))] -Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#86))#96, sum(UnscaledValue(wr_net_loss#87))#97] -Results [3]: [wp_web_page_sk#90, MakeDecimal(sum(UnscaledValue(wr_return_amt#86))#96,17,2) AS returns#98, MakeDecimal(sum(UnscaledValue(wr_net_loss#87))#97,17,2) AS profit_loss#99] +Input [3]: [wp_web_page_sk#81, sum#84, sum#85] +Keys [1]: [wp_web_page_sk#81] +Functions [2]: [sum(UnscaledValue(wr_return_amt#77)), sum(UnscaledValue(wr_net_loss#78))] +Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#77))#86, sum(UnscaledValue(wr_net_loss#78))#87] +Results [3]: [wp_web_page_sk#81, MakeDecimal(sum(UnscaledValue(wr_return_amt#77))#86,17,2) AS returns#88, MakeDecimal(sum(UnscaledValue(wr_net_loss#78))#87,17,2) AS profit_loss#89] (77) BroadcastExchange -Input [3]: [wp_web_page_sk#90, returns#98, profit_loss#99] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#100] +Input [3]: [wp_web_page_sk#81, returns#88, profit_loss#89] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11] (78) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [wp_web_page_sk#74] -Right keys [1]: [wp_web_page_sk#90] +Left keys [1]: [wp_web_page_sk#67] +Right keys [1]: [wp_web_page_sk#81] Join condition: None (79) Project [codegen id : 22] -Output [5]: [sales#83, coalesce(returns#98, 0.00) AS returns#101, CheckOverflow((promote_precision(cast(profit#84 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#99, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#102, web channel AS channel#103, wp_web_page_sk#74 AS id#104] -Input [6]: [wp_web_page_sk#74, sales#83, profit#84, wp_web_page_sk#90, returns#98, profit_loss#99] +Output [5]: [sales#74, coalesce(returns#88, 0.00) AS returns#90, CheckOverflow((promote_precision(cast(profit#75 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#89, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#91, web channel AS channel#92, wp_web_page_sk#67 AS id#93] +Input [6]: [wp_web_page_sk#67, sales#74, profit#75, wp_web_page_sk#81, returns#88, profit_loss#89] (80) Union (81) Expand [codegen id : 23] -Input [5]: [sales#16, returns#34, profit#35, channel#36, id#37] -Arguments: [[sales#16, returns#34, profit#35, channel#36, id#37, 0], [sales#16, returns#34, profit#35, channel#36, null, 1], [sales#16, returns#34, profit#35, null, null, 3]], [sales#16, returns#34, profit#35, channel#105, id#106, spark_grouping_id#107] +Input [5]: [sales#14, returns#30, profit#31, channel#32, id#33] +Arguments: [[sales#14, returns#30, profit#31, channel#32, id#33, 0], [sales#14, returns#30, profit#31, channel#32, null, 1], [sales#14, returns#30, profit#31, null, null, 3]], [sales#14, returns#30, profit#31, channel#94, id#95, spark_grouping_id#96] (82) HashAggregate [codegen id : 23] -Input [6]: [sales#16, returns#34, profit#35, channel#105, id#106, spark_grouping_id#107] -Keys [3]: [channel#105, id#106, spark_grouping_id#107] -Functions [3]: [partial_sum(sales#16), partial_sum(returns#34), partial_sum(profit#35)] -Aggregate Attributes [6]: [sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113] -Results [9]: [channel#105, id#106, spark_grouping_id#107, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119] +Input [6]: [sales#14, returns#30, profit#31, channel#94, id#95, spark_grouping_id#96] +Keys [3]: [channel#94, id#95, spark_grouping_id#96] +Functions [3]: [partial_sum(sales#14), partial_sum(returns#30), partial_sum(profit#31)] +Aggregate Attributes [6]: [sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102] +Results [9]: [channel#94, id#95, spark_grouping_id#96, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] (83) Exchange -Input [9]: [channel#105, id#106, spark_grouping_id#107, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119] -Arguments: hashpartitioning(channel#105, id#106, spark_grouping_id#107, 5), ENSURE_REQUIREMENTS, [id=#120] +Input [9]: [channel#94, id#95, spark_grouping_id#96, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] +Arguments: hashpartitioning(channel#94, id#95, spark_grouping_id#96, 5), ENSURE_REQUIREMENTS, [plan_id=12] (84) HashAggregate [codegen id : 24] -Input [9]: [channel#105, id#106, spark_grouping_id#107, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119] -Keys [3]: [channel#105, id#106, spark_grouping_id#107] -Functions [3]: [sum(sales#16), sum(returns#34), sum(profit#35)] -Aggregate Attributes [3]: [sum(sales#16)#121, sum(returns#34)#122, sum(profit#35)#123] -Results [5]: [channel#105, id#106, sum(sales#16)#121 AS sales#124, sum(returns#34)#122 AS returns#125, sum(profit#35)#123 AS profit#126] +Input [9]: [channel#94, id#95, spark_grouping_id#96, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] +Keys [3]: [channel#94, id#95, spark_grouping_id#96] +Functions [3]: [sum(sales#14), sum(returns#30), sum(profit#31)] +Aggregate Attributes [3]: [sum(sales#14)#109, sum(returns#30)#110, sum(profit#31)#111] +Results [5]: [channel#94, id#95, sum(sales#14)#109 AS sales#112, sum(returns#30)#110 AS returns#113, sum(profit#31)#111 AS profit#114] (85) TakeOrderedAndProject -Input [5]: [channel#105, id#106, sales#124, returns#125, profit#126] -Arguments: 100, [channel#105 ASC NULLS FIRST, id#106 ASC NULLS FIRST], [channel#105, id#106, sales#124, returns#125, profit#126] +Input [5]: [channel#94, id#95, sales#112, returns#113, profit#114] +Arguments: 100, [channel#94 ASC NULLS FIRST, id#95 ASC NULLS FIRST], [channel#94, id#95, sales#112, returns#113, profit#114] ===== Subqueries ===== @@ -500,35 +500,35 @@ BroadcastExchange (90) (86) Scan parquet default.date_dim -Output [2]: [d_date_sk#6, d_date#127] +Output [2]: [d_date_sk#6, d_date#115] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), IsNotNull(d_date_sk)] ReadSchema: struct (87) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#6, d_date#127] +Input [2]: [d_date_sk#6, d_date#115] (88) Filter [codegen id : 1] -Input [2]: [d_date_sk#6, d_date#127] -Condition : (((isnotnull(d_date#127) AND (d_date#127 >= 2000-08-03)) AND (d_date#127 <= 2000-09-02)) AND isnotnull(d_date_sk#6)) +Input [2]: [d_date_sk#6, d_date#115] +Condition : (((isnotnull(d_date#115) AND (d_date#115 >= 2000-08-03)) AND (d_date#115 <= 2000-09-02)) AND isnotnull(d_date_sk#6)) (89) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [2]: [d_date_sk#6, d_date#127] +Input [2]: [d_date_sk#6, d_date#115] (90) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#128] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=13] -Subquery:2 Hosting operator id = 16 Hosting Expression = sr_returned_date_sk#21 IN dynamicpruning#5 +Subquery:2 Hosting operator id = 16 Hosting Expression = sr_returned_date_sk#19 IN dynamicpruning#5 -Subquery:3 Hosting operator id = 31 Hosting Expression = cs_sold_date_sk#41 IN dynamicpruning#5 +Subquery:3 Hosting operator id = 31 Hosting Expression = cs_sold_date_sk#37 IN dynamicpruning#5 -Subquery:4 Hosting operator id = 39 Hosting Expression = cr_returned_date_sk#54 IN dynamicpruning#5 +Subquery:4 Hosting operator id = 39 Hosting Expression = cr_returned_date_sk#49 IN dynamicpruning#5 -Subquery:5 Hosting operator id = 50 Hosting Expression = ws_sold_date_sk#72 IN dynamicpruning#5 +Subquery:5 Hosting operator id = 50 Hosting Expression = ws_sold_date_sk#65 IN dynamicpruning#5 -Subquery:6 Hosting operator id = 65 Hosting Expression = wr_returned_date_sk#88 IN dynamicpruning#5 +Subquery:6 Hosting operator id = 65 Hosting Expression = wr_returned_date_sk#79 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt index c3cd748f43775..1406aa9f41eac 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt @@ -129,7 +129,7 @@ Condition : isnotnull(s_store_sk#7) (10) BroadcastExchange Input [1]: [s_store_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_store_sk#1] @@ -144,350 +144,350 @@ Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] Input [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] Keys [1]: [s_store_sk#7] Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#2)), partial_sum(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [2]: [sum#9, sum#10] -Results [3]: [s_store_sk#7, sum#11, sum#12] +Aggregate Attributes [2]: [sum#8, sum#9] +Results [3]: [s_store_sk#7, sum#10, sum#11] (14) Exchange -Input [3]: [s_store_sk#7, sum#11, sum#12] -Arguments: hashpartitioning(s_store_sk#7, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [3]: [s_store_sk#7, sum#10, sum#11] +Arguments: hashpartitioning(s_store_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 8] -Input [3]: [s_store_sk#7, sum#11, sum#12] +Input [3]: [s_store_sk#7, sum#10, sum#11] Keys [1]: [s_store_sk#7] Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#2)), sum(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#2))#14, sum(UnscaledValue(ss_net_profit#3))#15] -Results [3]: [s_store_sk#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS sales#16, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#15,17,2) AS profit#17] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#2))#12, sum(UnscaledValue(ss_net_profit#3))#13] +Results [3]: [s_store_sk#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS sales#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#13,17,2) AS profit#15] (16) Scan parquet default.store_returns -Output [4]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, sr_returned_date_sk#21] +Output [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(sr_returned_date_sk#21), dynamicpruningexpression(sr_returned_date_sk#21 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(sr_returned_date_sk#19), dynamicpruningexpression(sr_returned_date_sk#19 IN dynamicpruning#5)] PushedFilters: [IsNotNull(sr_store_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 6] -Input [4]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, sr_returned_date_sk#21] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] (18) Filter [codegen id : 6] -Input [4]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, sr_returned_date_sk#21] -Condition : isnotnull(sr_store_sk#18) +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] +Condition : isnotnull(sr_store_sk#16) (19) ReusedExchange [Reuses operator id: 90] -Output [1]: [d_date_sk#22] +Output [1]: [d_date_sk#20] (20) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [sr_returned_date_sk#21] -Right keys [1]: [d_date_sk#22] +Left keys [1]: [sr_returned_date_sk#19] +Right keys [1]: [d_date_sk#20] Join condition: None (21) Project [codegen id : 6] -Output [3]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20] -Input [5]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, sr_returned_date_sk#21, d_date_sk#22] +Output [3]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18] +Input [5]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19, d_date_sk#20] (22) ReusedExchange [Reuses operator id: 10] -Output [1]: [s_store_sk#23] +Output [1]: [s_store_sk#21] (23) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [sr_store_sk#18] -Right keys [1]: [s_store_sk#23] +Left keys [1]: [sr_store_sk#16] +Right keys [1]: [s_store_sk#21] Join condition: None (24) Project [codegen id : 6] -Output [3]: [sr_return_amt#19, sr_net_loss#20, s_store_sk#23] -Input [4]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, s_store_sk#23] +Output [3]: [sr_return_amt#17, sr_net_loss#18, s_store_sk#21] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, s_store_sk#21] (25) HashAggregate [codegen id : 6] -Input [3]: [sr_return_amt#19, sr_net_loss#20, s_store_sk#23] -Keys [1]: [s_store_sk#23] -Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#19)), partial_sum(UnscaledValue(sr_net_loss#20))] -Aggregate Attributes [2]: [sum#24, sum#25] -Results [3]: [s_store_sk#23, sum#26, sum#27] +Input [3]: [sr_return_amt#17, sr_net_loss#18, s_store_sk#21] +Keys [1]: [s_store_sk#21] +Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#17)), partial_sum(UnscaledValue(sr_net_loss#18))] +Aggregate Attributes [2]: [sum#22, sum#23] +Results [3]: [s_store_sk#21, sum#24, sum#25] (26) Exchange -Input [3]: [s_store_sk#23, sum#26, sum#27] -Arguments: hashpartitioning(s_store_sk#23, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [3]: [s_store_sk#21, sum#24, sum#25] +Arguments: hashpartitioning(s_store_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=3] (27) HashAggregate [codegen id : 7] -Input [3]: [s_store_sk#23, sum#26, sum#27] -Keys [1]: [s_store_sk#23] -Functions [2]: [sum(UnscaledValue(sr_return_amt#19)), sum(UnscaledValue(sr_net_loss#20))] -Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#19))#29, sum(UnscaledValue(sr_net_loss#20))#30] -Results [3]: [s_store_sk#23, MakeDecimal(sum(UnscaledValue(sr_return_amt#19))#29,17,2) AS returns#31, MakeDecimal(sum(UnscaledValue(sr_net_loss#20))#30,17,2) AS profit_loss#32] +Input [3]: [s_store_sk#21, sum#24, sum#25] +Keys [1]: [s_store_sk#21] +Functions [2]: [sum(UnscaledValue(sr_return_amt#17)), sum(UnscaledValue(sr_net_loss#18))] +Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#17))#26, sum(UnscaledValue(sr_net_loss#18))#27] +Results [3]: [s_store_sk#21, MakeDecimal(sum(UnscaledValue(sr_return_amt#17))#26,17,2) AS returns#28, MakeDecimal(sum(UnscaledValue(sr_net_loss#18))#27,17,2) AS profit_loss#29] (28) BroadcastExchange -Input [3]: [s_store_sk#23, returns#31, profit_loss#32] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] +Input [3]: [s_store_sk#21, returns#28, profit_loss#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (29) BroadcastHashJoin [codegen id : 8] Left keys [1]: [s_store_sk#7] -Right keys [1]: [s_store_sk#23] +Right keys [1]: [s_store_sk#21] Join condition: None (30) Project [codegen id : 8] -Output [5]: [sales#16, coalesce(returns#31, 0.00) AS returns#34, CheckOverflow((promote_precision(cast(profit#17 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#35, store channel AS channel#36, s_store_sk#7 AS id#37] -Input [6]: [s_store_sk#7, sales#16, profit#17, s_store_sk#23, returns#31, profit_loss#32] +Output [5]: [sales#14, coalesce(returns#28, 0.00) AS returns#30, CheckOverflow((promote_precision(cast(profit#15 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#29, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#31, store channel AS channel#32, s_store_sk#7 AS id#33] +Input [6]: [s_store_sk#7, sales#14, profit#15, s_store_sk#21, returns#28, profit_loss#29] (31) Scan parquet default.catalog_sales -Output [4]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41] +Output [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#41), dynamicpruningexpression(cs_sold_date_sk#41 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#37), dynamicpruningexpression(cs_sold_date_sk#37 IN dynamicpruning#5)] ReadSchema: struct (32) ColumnarToRow [codegen id : 10] -Input [4]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41] +Input [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37] (33) ReusedExchange [Reuses operator id: 90] -Output [1]: [d_date_sk#42] +Output [1]: [d_date_sk#38] (34) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#41] -Right keys [1]: [d_date_sk#42] +Left keys [1]: [cs_sold_date_sk#37] +Right keys [1]: [d_date_sk#38] Join condition: None (35) Project [codegen id : 10] -Output [3]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40] -Input [5]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41, d_date_sk#42] +Output [3]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36] +Input [5]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37, d_date_sk#38] (36) HashAggregate [codegen id : 10] -Input [3]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40] -Keys [1]: [cs_call_center_sk#38] -Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#39)), partial_sum(UnscaledValue(cs_net_profit#40))] -Aggregate Attributes [2]: [sum#43, sum#44] -Results [3]: [cs_call_center_sk#38, sum#45, sum#46] +Input [3]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36] +Keys [1]: [cs_call_center_sk#34] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#35)), partial_sum(UnscaledValue(cs_net_profit#36))] +Aggregate Attributes [2]: [sum#39, sum#40] +Results [3]: [cs_call_center_sk#34, sum#41, sum#42] (37) Exchange -Input [3]: [cs_call_center_sk#38, sum#45, sum#46] -Arguments: hashpartitioning(cs_call_center_sk#38, 5), ENSURE_REQUIREMENTS, [id=#47] +Input [3]: [cs_call_center_sk#34, sum#41, sum#42] +Arguments: hashpartitioning(cs_call_center_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=5] (38) HashAggregate [codegen id : 11] -Input [3]: [cs_call_center_sk#38, sum#45, sum#46] -Keys [1]: [cs_call_center_sk#38] -Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#39)), sum(UnscaledValue(cs_net_profit#40))] -Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#39))#48, sum(UnscaledValue(cs_net_profit#40))#49] -Results [3]: [cs_call_center_sk#38, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#39))#48,17,2) AS sales#50, MakeDecimal(sum(UnscaledValue(cs_net_profit#40))#49,17,2) AS profit#51] +Input [3]: [cs_call_center_sk#34, sum#41, sum#42] +Keys [1]: [cs_call_center_sk#34] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#35)), sum(UnscaledValue(cs_net_profit#36))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#35))#43, sum(UnscaledValue(cs_net_profit#36))#44] +Results [3]: [cs_call_center_sk#34, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#35))#43,17,2) AS sales#45, MakeDecimal(sum(UnscaledValue(cs_net_profit#36))#44,17,2) AS profit#46] (39) BroadcastExchange -Input [3]: [cs_call_center_sk#38, sales#50, profit#51] -Arguments: IdentityBroadcastMode, [id=#52] +Input [3]: [cs_call_center_sk#34, sales#45, profit#46] +Arguments: IdentityBroadcastMode, [plan_id=6] (40) Scan parquet default.catalog_returns -Output [3]: [cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] +Output [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cr_returned_date_sk#55), dynamicpruningexpression(cr_returned_date_sk#55 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cr_returned_date_sk#49), dynamicpruningexpression(cr_returned_date_sk#49 IN dynamicpruning#5)] ReadSchema: struct (41) ColumnarToRow [codegen id : 13] -Input [3]: [cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] +Input [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49] (42) ReusedExchange [Reuses operator id: 90] -Output [1]: [d_date_sk#56] +Output [1]: [d_date_sk#50] (43) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [cr_returned_date_sk#55] -Right keys [1]: [d_date_sk#56] +Left keys [1]: [cr_returned_date_sk#49] +Right keys [1]: [d_date_sk#50] Join condition: None (44) Project [codegen id : 13] -Output [2]: [cr_return_amount#53, cr_net_loss#54] -Input [4]: [cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55, d_date_sk#56] +Output [2]: [cr_return_amount#47, cr_net_loss#48] +Input [4]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49, d_date_sk#50] (45) HashAggregate [codegen id : 13] -Input [2]: [cr_return_amount#53, cr_net_loss#54] +Input [2]: [cr_return_amount#47, cr_net_loss#48] Keys: [] -Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#53)), partial_sum(UnscaledValue(cr_net_loss#54))] -Aggregate Attributes [2]: [sum#57, sum#58] -Results [2]: [sum#59, sum#60] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#47)), partial_sum(UnscaledValue(cr_net_loss#48))] +Aggregate Attributes [2]: [sum#51, sum#52] +Results [2]: [sum#53, sum#54] (46) Exchange -Input [2]: [sum#59, sum#60] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#61] +Input [2]: [sum#53, sum#54] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (47) HashAggregate -Input [2]: [sum#59, sum#60] +Input [2]: [sum#53, sum#54] Keys: [] -Functions [2]: [sum(UnscaledValue(cr_return_amount#53)), sum(UnscaledValue(cr_net_loss#54))] -Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#53))#62, sum(UnscaledValue(cr_net_loss#54))#63] -Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#53))#62,17,2) AS returns#64, MakeDecimal(sum(UnscaledValue(cr_net_loss#54))#63,17,2) AS profit_loss#65] +Functions [2]: [sum(UnscaledValue(cr_return_amount#47)), sum(UnscaledValue(cr_net_loss#48))] +Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#47))#55, sum(UnscaledValue(cr_net_loss#48))#56] +Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#47))#55,17,2) AS returns#57, MakeDecimal(sum(UnscaledValue(cr_net_loss#48))#56,17,2) AS profit_loss#58] (48) BroadcastNestedLoopJoin [codegen id : 14] Join condition: None (49) Project [codegen id : 14] -Output [5]: [sales#50, returns#64, CheckOverflow((promote_precision(cast(profit#51 as decimal(18,2))) - promote_precision(cast(profit_loss#65 as decimal(18,2)))), DecimalType(18,2)) AS profit#66, catalog channel AS channel#67, cs_call_center_sk#38 AS id#68] -Input [5]: [cs_call_center_sk#38, sales#50, profit#51, returns#64, profit_loss#65] +Output [5]: [sales#45, returns#57, CheckOverflow((promote_precision(cast(profit#46 as decimal(18,2))) - promote_precision(cast(profit_loss#58 as decimal(18,2)))), DecimalType(18,2)) AS profit#59, catalog channel AS channel#60, cs_call_center_sk#34 AS id#61] +Input [5]: [cs_call_center_sk#34, sales#45, profit#46, returns#57, profit_loss#58] (50) Scan parquet default.web_sales -Output [4]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, ws_sold_date_sk#72] +Output [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#72), dynamicpruningexpression(ws_sold_date_sk#72 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#65), dynamicpruningexpression(ws_sold_date_sk#65 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_web_page_sk)] ReadSchema: struct (51) ColumnarToRow [codegen id : 17] -Input [4]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, ws_sold_date_sk#72] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] (52) Filter [codegen id : 17] -Input [4]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, ws_sold_date_sk#72] -Condition : isnotnull(ws_web_page_sk#69) +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] +Condition : isnotnull(ws_web_page_sk#62) (53) ReusedExchange [Reuses operator id: 90] -Output [1]: [d_date_sk#73] +Output [1]: [d_date_sk#66] (54) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_sold_date_sk#72] -Right keys [1]: [d_date_sk#73] +Left keys [1]: [ws_sold_date_sk#65] +Right keys [1]: [d_date_sk#66] Join condition: None (55) Project [codegen id : 17] -Output [3]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71] -Input [5]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, ws_sold_date_sk#72, d_date_sk#73] +Output [3]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64] +Input [5]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65, d_date_sk#66] (56) Scan parquet default.web_page -Output [1]: [wp_web_page_sk#74] +Output [1]: [wp_web_page_sk#67] Batched: true Location [not included in comparison]/{warehouse_dir}/web_page] PushedFilters: [IsNotNull(wp_web_page_sk)] ReadSchema: struct (57) ColumnarToRow [codegen id : 16] -Input [1]: [wp_web_page_sk#74] +Input [1]: [wp_web_page_sk#67] (58) Filter [codegen id : 16] -Input [1]: [wp_web_page_sk#74] -Condition : isnotnull(wp_web_page_sk#74) +Input [1]: [wp_web_page_sk#67] +Condition : isnotnull(wp_web_page_sk#67) (59) BroadcastExchange -Input [1]: [wp_web_page_sk#74] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#75] +Input [1]: [wp_web_page_sk#67] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] (60) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_web_page_sk#69] -Right keys [1]: [wp_web_page_sk#74] +Left keys [1]: [ws_web_page_sk#62] +Right keys [1]: [wp_web_page_sk#67] Join condition: None (61) Project [codegen id : 17] -Output [3]: [ws_ext_sales_price#70, ws_net_profit#71, wp_web_page_sk#74] -Input [4]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, wp_web_page_sk#74] +Output [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] (62) HashAggregate [codegen id : 17] -Input [3]: [ws_ext_sales_price#70, ws_net_profit#71, wp_web_page_sk#74] -Keys [1]: [wp_web_page_sk#74] -Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#70)), partial_sum(UnscaledValue(ws_net_profit#71))] -Aggregate Attributes [2]: [sum#76, sum#77] -Results [3]: [wp_web_page_sk#74, sum#78, sum#79] +Input [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] +Keys [1]: [wp_web_page_sk#67] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#63)), partial_sum(UnscaledValue(ws_net_profit#64))] +Aggregate Attributes [2]: [sum#68, sum#69] +Results [3]: [wp_web_page_sk#67, sum#70, sum#71] (63) Exchange -Input [3]: [wp_web_page_sk#74, sum#78, sum#79] -Arguments: hashpartitioning(wp_web_page_sk#74, 5), ENSURE_REQUIREMENTS, [id=#80] +Input [3]: [wp_web_page_sk#67, sum#70, sum#71] +Arguments: hashpartitioning(wp_web_page_sk#67, 5), ENSURE_REQUIREMENTS, [plan_id=9] (64) HashAggregate [codegen id : 22] -Input [3]: [wp_web_page_sk#74, sum#78, sum#79] -Keys [1]: [wp_web_page_sk#74] -Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#70)), sum(UnscaledValue(ws_net_profit#71))] -Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#70))#81, sum(UnscaledValue(ws_net_profit#71))#82] -Results [3]: [wp_web_page_sk#74, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#70))#81,17,2) AS sales#83, MakeDecimal(sum(UnscaledValue(ws_net_profit#71))#82,17,2) AS profit#84] +Input [3]: [wp_web_page_sk#67, sum#70, sum#71] +Keys [1]: [wp_web_page_sk#67] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#63)), sum(UnscaledValue(ws_net_profit#64))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#63))#72, sum(UnscaledValue(ws_net_profit#64))#73] +Results [3]: [wp_web_page_sk#67, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#63))#72,17,2) AS sales#74, MakeDecimal(sum(UnscaledValue(ws_net_profit#64))#73,17,2) AS profit#75] (65) Scan parquet default.web_returns -Output [4]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] +Output [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(wr_returned_date_sk#88), dynamicpruningexpression(wr_returned_date_sk#88 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(wr_returned_date_sk#79), dynamicpruningexpression(wr_returned_date_sk#79 IN dynamicpruning#5)] PushedFilters: [IsNotNull(wr_web_page_sk)] ReadSchema: struct (66) ColumnarToRow [codegen id : 20] -Input [4]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] (67) Filter [codegen id : 20] -Input [4]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] -Condition : isnotnull(wr_web_page_sk#85) +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] +Condition : isnotnull(wr_web_page_sk#76) (68) ReusedExchange [Reuses operator id: 90] -Output [1]: [d_date_sk#89] +Output [1]: [d_date_sk#80] (69) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [wr_returned_date_sk#88] -Right keys [1]: [d_date_sk#89] +Left keys [1]: [wr_returned_date_sk#79] +Right keys [1]: [d_date_sk#80] Join condition: None (70) Project [codegen id : 20] -Output [3]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87] -Input [5]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88, d_date_sk#89] +Output [3]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78] +Input [5]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79, d_date_sk#80] (71) ReusedExchange [Reuses operator id: 59] -Output [1]: [wp_web_page_sk#90] +Output [1]: [wp_web_page_sk#81] (72) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [wr_web_page_sk#85] -Right keys [1]: [wp_web_page_sk#90] +Left keys [1]: [wr_web_page_sk#76] +Right keys [1]: [wp_web_page_sk#81] Join condition: None (73) Project [codegen id : 20] -Output [3]: [wr_return_amt#86, wr_net_loss#87, wp_web_page_sk#90] -Input [4]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wp_web_page_sk#90] +Output [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] (74) HashAggregate [codegen id : 20] -Input [3]: [wr_return_amt#86, wr_net_loss#87, wp_web_page_sk#90] -Keys [1]: [wp_web_page_sk#90] -Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#86)), partial_sum(UnscaledValue(wr_net_loss#87))] -Aggregate Attributes [2]: [sum#91, sum#92] -Results [3]: [wp_web_page_sk#90, sum#93, sum#94] +Input [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] +Keys [1]: [wp_web_page_sk#81] +Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#77)), partial_sum(UnscaledValue(wr_net_loss#78))] +Aggregate Attributes [2]: [sum#82, sum#83] +Results [3]: [wp_web_page_sk#81, sum#84, sum#85] (75) Exchange -Input [3]: [wp_web_page_sk#90, sum#93, sum#94] -Arguments: hashpartitioning(wp_web_page_sk#90, 5), ENSURE_REQUIREMENTS, [id=#95] +Input [3]: [wp_web_page_sk#81, sum#84, sum#85] +Arguments: hashpartitioning(wp_web_page_sk#81, 5), ENSURE_REQUIREMENTS, [plan_id=10] (76) HashAggregate [codegen id : 21] -Input [3]: [wp_web_page_sk#90, sum#93, sum#94] -Keys [1]: [wp_web_page_sk#90] -Functions [2]: [sum(UnscaledValue(wr_return_amt#86)), sum(UnscaledValue(wr_net_loss#87))] -Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#86))#96, sum(UnscaledValue(wr_net_loss#87))#97] -Results [3]: [wp_web_page_sk#90, MakeDecimal(sum(UnscaledValue(wr_return_amt#86))#96,17,2) AS returns#98, MakeDecimal(sum(UnscaledValue(wr_net_loss#87))#97,17,2) AS profit_loss#99] +Input [3]: [wp_web_page_sk#81, sum#84, sum#85] +Keys [1]: [wp_web_page_sk#81] +Functions [2]: [sum(UnscaledValue(wr_return_amt#77)), sum(UnscaledValue(wr_net_loss#78))] +Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#77))#86, sum(UnscaledValue(wr_net_loss#78))#87] +Results [3]: [wp_web_page_sk#81, MakeDecimal(sum(UnscaledValue(wr_return_amt#77))#86,17,2) AS returns#88, MakeDecimal(sum(UnscaledValue(wr_net_loss#78))#87,17,2) AS profit_loss#89] (77) BroadcastExchange -Input [3]: [wp_web_page_sk#90, returns#98, profit_loss#99] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#100] +Input [3]: [wp_web_page_sk#81, returns#88, profit_loss#89] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11] (78) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [wp_web_page_sk#74] -Right keys [1]: [wp_web_page_sk#90] +Left keys [1]: [wp_web_page_sk#67] +Right keys [1]: [wp_web_page_sk#81] Join condition: None (79) Project [codegen id : 22] -Output [5]: [sales#83, coalesce(returns#98, 0.00) AS returns#101, CheckOverflow((promote_precision(cast(profit#84 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#99, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#102, web channel AS channel#103, wp_web_page_sk#74 AS id#104] -Input [6]: [wp_web_page_sk#74, sales#83, profit#84, wp_web_page_sk#90, returns#98, profit_loss#99] +Output [5]: [sales#74, coalesce(returns#88, 0.00) AS returns#90, CheckOverflow((promote_precision(cast(profit#75 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#89, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#91, web channel AS channel#92, wp_web_page_sk#67 AS id#93] +Input [6]: [wp_web_page_sk#67, sales#74, profit#75, wp_web_page_sk#81, returns#88, profit_loss#89] (80) Union (81) Expand [codegen id : 23] -Input [5]: [sales#16, returns#34, profit#35, channel#36, id#37] -Arguments: [[sales#16, returns#34, profit#35, channel#36, id#37, 0], [sales#16, returns#34, profit#35, channel#36, null, 1], [sales#16, returns#34, profit#35, null, null, 3]], [sales#16, returns#34, profit#35, channel#105, id#106, spark_grouping_id#107] +Input [5]: [sales#14, returns#30, profit#31, channel#32, id#33] +Arguments: [[sales#14, returns#30, profit#31, channel#32, id#33, 0], [sales#14, returns#30, profit#31, channel#32, null, 1], [sales#14, returns#30, profit#31, null, null, 3]], [sales#14, returns#30, profit#31, channel#94, id#95, spark_grouping_id#96] (82) HashAggregate [codegen id : 23] -Input [6]: [sales#16, returns#34, profit#35, channel#105, id#106, spark_grouping_id#107] -Keys [3]: [channel#105, id#106, spark_grouping_id#107] -Functions [3]: [partial_sum(sales#16), partial_sum(returns#34), partial_sum(profit#35)] -Aggregate Attributes [6]: [sum#108, isEmpty#109, sum#110, isEmpty#111, sum#112, isEmpty#113] -Results [9]: [channel#105, id#106, spark_grouping_id#107, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119] +Input [6]: [sales#14, returns#30, profit#31, channel#94, id#95, spark_grouping_id#96] +Keys [3]: [channel#94, id#95, spark_grouping_id#96] +Functions [3]: [partial_sum(sales#14), partial_sum(returns#30), partial_sum(profit#31)] +Aggregate Attributes [6]: [sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102] +Results [9]: [channel#94, id#95, spark_grouping_id#96, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] (83) Exchange -Input [9]: [channel#105, id#106, spark_grouping_id#107, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119] -Arguments: hashpartitioning(channel#105, id#106, spark_grouping_id#107, 5), ENSURE_REQUIREMENTS, [id=#120] +Input [9]: [channel#94, id#95, spark_grouping_id#96, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] +Arguments: hashpartitioning(channel#94, id#95, spark_grouping_id#96, 5), ENSURE_REQUIREMENTS, [plan_id=12] (84) HashAggregate [codegen id : 24] -Input [9]: [channel#105, id#106, spark_grouping_id#107, sum#114, isEmpty#115, sum#116, isEmpty#117, sum#118, isEmpty#119] -Keys [3]: [channel#105, id#106, spark_grouping_id#107] -Functions [3]: [sum(sales#16), sum(returns#34), sum(profit#35)] -Aggregate Attributes [3]: [sum(sales#16)#121, sum(returns#34)#122, sum(profit#35)#123] -Results [5]: [channel#105, id#106, sum(sales#16)#121 AS sales#124, sum(returns#34)#122 AS returns#125, sum(profit#35)#123 AS profit#126] +Input [9]: [channel#94, id#95, spark_grouping_id#96, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108] +Keys [3]: [channel#94, id#95, spark_grouping_id#96] +Functions [3]: [sum(sales#14), sum(returns#30), sum(profit#31)] +Aggregate Attributes [3]: [sum(sales#14)#109, sum(returns#30)#110, sum(profit#31)#111] +Results [5]: [channel#94, id#95, sum(sales#14)#109 AS sales#112, sum(returns#30)#110 AS returns#113, sum(profit#31)#111 AS profit#114] (85) TakeOrderedAndProject -Input [5]: [channel#105, id#106, sales#124, returns#125, profit#126] -Arguments: 100, [channel#105 ASC NULLS FIRST, id#106 ASC NULLS FIRST], [channel#105, id#106, sales#124, returns#125, profit#126] +Input [5]: [channel#94, id#95, sales#112, returns#113, profit#114] +Arguments: 100, [channel#94 ASC NULLS FIRST, id#95 ASC NULLS FIRST], [channel#94, id#95, sales#112, returns#113, profit#114] ===== Subqueries ===== @@ -500,35 +500,35 @@ BroadcastExchange (90) (86) Scan parquet default.date_dim -Output [2]: [d_date_sk#6, d_date#127] +Output [2]: [d_date_sk#6, d_date#115] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-03), LessThanOrEqual(d_date,2000-09-02), IsNotNull(d_date_sk)] ReadSchema: struct (87) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#6, d_date#127] +Input [2]: [d_date_sk#6, d_date#115] (88) Filter [codegen id : 1] -Input [2]: [d_date_sk#6, d_date#127] -Condition : (((isnotnull(d_date#127) AND (d_date#127 >= 2000-08-03)) AND (d_date#127 <= 2000-09-02)) AND isnotnull(d_date_sk#6)) +Input [2]: [d_date_sk#6, d_date#115] +Condition : (((isnotnull(d_date#115) AND (d_date#115 >= 2000-08-03)) AND (d_date#115 <= 2000-09-02)) AND isnotnull(d_date_sk#6)) (89) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [2]: [d_date_sk#6, d_date#127] +Input [2]: [d_date_sk#6, d_date#115] (90) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#128] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=13] -Subquery:2 Hosting operator id = 16 Hosting Expression = sr_returned_date_sk#21 IN dynamicpruning#5 +Subquery:2 Hosting operator id = 16 Hosting Expression = sr_returned_date_sk#19 IN dynamicpruning#5 -Subquery:3 Hosting operator id = 31 Hosting Expression = cs_sold_date_sk#41 IN dynamicpruning#5 +Subquery:3 Hosting operator id = 31 Hosting Expression = cs_sold_date_sk#37 IN dynamicpruning#5 -Subquery:4 Hosting operator id = 40 Hosting Expression = cr_returned_date_sk#55 IN dynamicpruning#5 +Subquery:4 Hosting operator id = 40 Hosting Expression = cr_returned_date_sk#49 IN dynamicpruning#5 -Subquery:5 Hosting operator id = 50 Hosting Expression = ws_sold_date_sk#72 IN dynamicpruning#5 +Subquery:5 Hosting operator id = 50 Hosting Expression = ws_sold_date_sk#65 IN dynamicpruning#5 -Subquery:6 Hosting operator id = 65 Hosting Expression = wr_returned_date_sk#88 IN dynamicpruning#5 +Subquery:6 Hosting operator id = 65 Hosting Expression = wr_returned_date_sk#79 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/explain.txt index 386e889f9ef4a..4687d7445557e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/explain.txt @@ -81,7 +81,7 @@ Input [3]: [hd_demo_sk#11, hd_dep_count#12, hd_vehicle_count#13] (11) BroadcastExchange Input [1]: [hd_demo_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] @@ -93,96 +93,96 @@ Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, hd_demo_sk#11] (14) Scan parquet default.store -Output [3]: [s_store_sk#15, s_number_employees#16, s_city#17] +Output [3]: [s_store_sk#14, s_number_employees#15, s_city#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_number_employees), GreaterThanOrEqual(s_number_employees,200), LessThanOrEqual(s_number_employees,295), IsNotNull(s_store_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [3]: [s_store_sk#15, s_number_employees#16, s_city#17] +Input [3]: [s_store_sk#14, s_number_employees#15, s_city#16] (16) Filter [codegen id : 3] -Input [3]: [s_store_sk#15, s_number_employees#16, s_city#17] -Condition : (((isnotnull(s_number_employees#16) AND (s_number_employees#16 >= 200)) AND (s_number_employees#16 <= 295)) AND isnotnull(s_store_sk#15)) +Input [3]: [s_store_sk#14, s_number_employees#15, s_city#16] +Condition : (((isnotnull(s_number_employees#15) AND (s_number_employees#15 >= 200)) AND (s_number_employees#15 <= 295)) AND isnotnull(s_store_sk#14)) (17) Project [codegen id : 3] -Output [2]: [s_store_sk#15, s_city#17] -Input [3]: [s_store_sk#15, s_number_employees#16, s_city#17] +Output [2]: [s_store_sk#14, s_city#16] +Input [3]: [s_store_sk#14, s_number_employees#15, s_city#16] (18) BroadcastExchange -Input [2]: [s_store_sk#15, s_city#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [2]: [s_store_sk#14, s_city#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#4] -Right keys [1]: [s_store_sk#15] +Right keys [1]: [s_store_sk#14] Join condition: None (20) Project [codegen id : 4] -Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#17] -Input [8]: [ss_customer_sk#1, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#15, s_city#17] +Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#16] +Input [8]: [ss_customer_sk#1, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#14, s_city#16] (21) HashAggregate [codegen id : 4] -Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#17] -Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#17] +Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#16] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#16] Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#6)), partial_sum(UnscaledValue(ss_net_profit#7))] -Aggregate Attributes [2]: [sum#19, sum#20] -Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#17, sum#21, sum#22] +Aggregate Attributes [2]: [sum#17, sum#18] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#16, sum#19, sum#20] (22) Exchange -Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#17, sum#21, sum#22] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#17, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#16, sum#19, sum#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#16, 5), ENSURE_REQUIREMENTS, [plan_id=3] (23) HashAggregate [codegen id : 5] -Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#17, sum#21, sum#22] -Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#17] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#16, sum#19, sum#20] +Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#16] Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#24, sum(UnscaledValue(ss_net_profit#7))#25] -Results [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#17, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#24,17,2) AS amt#26, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#25,17,2) AS profit#27] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#21, sum(UnscaledValue(ss_net_profit#7))#22] +Results [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#16, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#21,17,2) AS amt#23, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#22,17,2) AS profit#24] (24) Exchange -Input [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#17, amt#26, profit#27] -Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#16, amt#23, profit#24] +Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4] (25) Sort [codegen id : 6] -Input [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#17, amt#26, profit#27] +Input [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#16, amt#23, profit#24] Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0 (26) Scan parquet default.customer -Output [3]: [c_customer_sk#29, c_first_name#30, c_last_name#31] +Output [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 7] -Input [3]: [c_customer_sk#29, c_first_name#30, c_last_name#31] +Input [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] (28) Filter [codegen id : 7] -Input [3]: [c_customer_sk#29, c_first_name#30, c_last_name#31] -Condition : isnotnull(c_customer_sk#29) +Input [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] +Condition : isnotnull(c_customer_sk#25) (29) Exchange -Input [3]: [c_customer_sk#29, c_first_name#30, c_last_name#31] -Arguments: hashpartitioning(c_customer_sk#29, 5), ENSURE_REQUIREMENTS, [id=#32] +Input [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] +Arguments: hashpartitioning(c_customer_sk#25, 5), ENSURE_REQUIREMENTS, [plan_id=5] (30) Sort [codegen id : 8] -Input [3]: [c_customer_sk#29, c_first_name#30, c_last_name#31] -Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0 +Input [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] +Arguments: [c_customer_sk#25 ASC NULLS FIRST], false, 0 (31) SortMergeJoin [codegen id : 9] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#29] +Right keys [1]: [c_customer_sk#25] Join condition: None (32) Project [codegen id : 9] -Output [7]: [c_last_name#31, c_first_name#30, substr(s_city#17, 1, 30) AS substr(s_city, 1, 30)#33, ss_ticket_number#5, amt#26, profit#27, s_city#17] -Input [8]: [ss_ticket_number#5, ss_customer_sk#1, s_city#17, amt#26, profit#27, c_customer_sk#29, c_first_name#30, c_last_name#31] +Output [7]: [c_last_name#27, c_first_name#26, substr(s_city#16, 1, 30) AS substr(s_city, 1, 30)#28, ss_ticket_number#5, amt#23, profit#24, s_city#16] +Input [8]: [ss_ticket_number#5, ss_customer_sk#1, s_city#16, amt#23, profit#24, c_customer_sk#25, c_first_name#26, c_last_name#27] (33) TakeOrderedAndProject -Input [7]: [c_last_name#31, c_first_name#30, substr(s_city, 1, 30)#33, ss_ticket_number#5, amt#26, profit#27, s_city#17] -Arguments: 100, [c_last_name#31 ASC NULLS FIRST, c_first_name#30 ASC NULLS FIRST, substr(s_city#17, 1, 30) ASC NULLS FIRST, profit#27 ASC NULLS FIRST], [c_last_name#31, c_first_name#30, substr(s_city, 1, 30)#33, ss_ticket_number#5, amt#26, profit#27] +Input [7]: [c_last_name#27, c_first_name#26, substr(s_city, 1, 30)#28, ss_ticket_number#5, amt#23, profit#24, s_city#16] +Arguments: 100, [c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, substr(s_city#16, 1, 30) ASC NULLS FIRST, profit#24 ASC NULLS FIRST], [c_last_name#27, c_first_name#26, substr(s_city, 1, 30)#28, ss_ticket_number#5, amt#23, profit#24] ===== Subqueries ===== @@ -195,25 +195,25 @@ BroadcastExchange (38) (34) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#34, d_dow#35] +Output [3]: [d_date_sk#10, d_year#29, d_dow#30] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_dow), EqualTo(d_dow,1), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#34, d_dow#35] +Input [3]: [d_date_sk#10, d_year#29, d_dow#30] (36) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#34, d_dow#35] -Condition : (((isnotnull(d_dow#35) AND (d_dow#35 = 1)) AND d_year#34 IN (1999,2000,2001)) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#10, d_year#29, d_dow#30] +Condition : (((isnotnull(d_dow#30) AND (d_dow#30 = 1)) AND d_year#29 IN (1999,2000,2001)) AND isnotnull(d_date_sk#10)) (37) Project [codegen id : 1] Output [1]: [d_date_sk#10] -Input [3]: [d_date_sk#10, d_year#34, d_dow#35] +Input [3]: [d_date_sk#10, d_year#29, d_dow#30] (38) BroadcastExchange Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt index 723a46f2bbcf4..3dedb7c5e356c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt @@ -78,7 +78,7 @@ Input [3]: [s_store_sk#11, s_number_employees#12, s_city#13] (11) BroadcastExchange Input [2]: [s_store_sk#11, s_city#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#4] @@ -90,84 +90,84 @@ Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#11, s_city#13] (14) Scan parquet default.household_demographics -Output [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [Or(EqualTo(hd_dep_count,6),GreaterThan(hd_vehicle_count,2)), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] (16) Filter [codegen id : 3] -Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] -Condition : (((hd_dep_count#16 = 6) OR (hd_vehicle_count#17 > 2)) AND isnotnull(hd_demo_sk#15)) +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] +Condition : (((hd_dep_count#15 = 6) OR (hd_vehicle_count#16 > 2)) AND isnotnull(hd_demo_sk#14)) (17) Project [codegen id : 3] -Output [1]: [hd_demo_sk#15] -Input [3]: [hd_demo_sk#15, hd_dep_count#16, hd_vehicle_count#17] +Output [1]: [hd_demo_sk#14] +Input [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16] (18) BroadcastExchange -Input [1]: [hd_demo_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [1]: [hd_demo_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#15] +Right keys [1]: [hd_demo_sk#14] Join condition: None (20) Project [codegen id : 4] Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#13] -Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#13, hd_demo_sk#15] +Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#13, hd_demo_sk#14] (21) HashAggregate [codegen id : 4] Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#13] Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13] Functions [2]: [partial_sum(UnscaledValue(ss_coupon_amt#6)), partial_sum(UnscaledValue(ss_net_profit#7))] -Aggregate Attributes [2]: [sum#19, sum#20] -Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13, sum#21, sum#22] +Aggregate Attributes [2]: [sum#17, sum#18] +Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13, sum#19, sum#20] (22) Exchange -Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13, sum#21, sum#22] -Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13, sum#19, sum#20] +Arguments: hashpartitioning(ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13, 5), ENSURE_REQUIREMENTS, [plan_id=3] (23) HashAggregate [codegen id : 6] -Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13, sum#21, sum#22] +Input [6]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13, sum#19, sum#20] Keys [4]: [ss_ticket_number#5, ss_customer_sk#1, ss_addr_sk#3, s_city#13] Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#24, sum(UnscaledValue(ss_net_profit#7))#25] -Results [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#13, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#24,17,2) AS amt#26, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#25,17,2) AS profit#27] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#21, sum(UnscaledValue(ss_net_profit#7))#22] +Results [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#13, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#21,17,2) AS amt#23, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#22,17,2) AS profit#24] (24) Scan parquet default.customer -Output [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30] +Output [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 5] -Input [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30] +Input [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] (26) Filter [codegen id : 5] -Input [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30] -Condition : isnotnull(c_customer_sk#28) +Input [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] +Condition : isnotnull(c_customer_sk#25) (27) BroadcastExchange -Input [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#31] +Input [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (28) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#28] +Right keys [1]: [c_customer_sk#25] Join condition: None (29) Project [codegen id : 6] -Output [7]: [c_last_name#30, c_first_name#29, substr(s_city#13, 1, 30) AS substr(s_city, 1, 30)#32, ss_ticket_number#5, amt#26, profit#27, s_city#13] -Input [8]: [ss_ticket_number#5, ss_customer_sk#1, s_city#13, amt#26, profit#27, c_customer_sk#28, c_first_name#29, c_last_name#30] +Output [7]: [c_last_name#27, c_first_name#26, substr(s_city#13, 1, 30) AS substr(s_city, 1, 30)#28, ss_ticket_number#5, amt#23, profit#24, s_city#13] +Input [8]: [ss_ticket_number#5, ss_customer_sk#1, s_city#13, amt#23, profit#24, c_customer_sk#25, c_first_name#26, c_last_name#27] (30) TakeOrderedAndProject -Input [7]: [c_last_name#30, c_first_name#29, substr(s_city, 1, 30)#32, ss_ticket_number#5, amt#26, profit#27, s_city#13] -Arguments: 100, [c_last_name#30 ASC NULLS FIRST, c_first_name#29 ASC NULLS FIRST, substr(s_city#13, 1, 30) ASC NULLS FIRST, profit#27 ASC NULLS FIRST], [c_last_name#30, c_first_name#29, substr(s_city, 1, 30)#32, ss_ticket_number#5, amt#26, profit#27] +Input [7]: [c_last_name#27, c_first_name#26, substr(s_city, 1, 30)#28, ss_ticket_number#5, amt#23, profit#24, s_city#13] +Arguments: 100, [c_last_name#27 ASC NULLS FIRST, c_first_name#26 ASC NULLS FIRST, substr(s_city#13, 1, 30) ASC NULLS FIRST, profit#24 ASC NULLS FIRST], [c_last_name#27, c_first_name#26, substr(s_city, 1, 30)#28, ss_ticket_number#5, amt#23, profit#24] ===== Subqueries ===== @@ -180,25 +180,25 @@ BroadcastExchange (35) (31) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#33, d_dow#34] +Output [3]: [d_date_sk#10, d_year#29, d_dow#30] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_dow), EqualTo(d_dow,1), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#33, d_dow#34] +Input [3]: [d_date_sk#10, d_year#29, d_dow#30] (33) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#33, d_dow#34] -Condition : (((isnotnull(d_dow#34) AND (d_dow#34 = 1)) AND d_year#33 IN (1999,2000,2001)) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#10, d_year#29, d_dow#30] +Condition : (((isnotnull(d_dow#30) AND (d_dow#30 = 1)) AND d_year#29 IN (1999,2000,2001)) AND isnotnull(d_date_sk#10)) (34) Project [codegen id : 1] Output [1]: [d_date_sk#10] -Input [3]: [d_date_sk#10, d_year#33, d_dow#34] +Input [3]: [d_date_sk#10, d_year#29, d_dow#30] (35) BroadcastExchange Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/explain.txt index e8c77e7de05eb..2ff71b73acc62 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/explain.txt @@ -93,7 +93,7 @@ Condition : (isnotnull(s_store_sk#6) AND isnotnull(s_zip#8)) (10) BroadcastExchange Input [3]: [s_store_sk#6, s_store_name#7, s_zip#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_store_sk#1] @@ -106,177 +106,177 @@ Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#6, s_store_name#7, s_zip# (13) Exchange Input [3]: [ss_net_profit#2, s_store_name#7, s_zip#8] -Arguments: hashpartitioning(substr(s_zip#8, 1, 2), 5), ENSURE_REQUIREMENTS, [id=#10] +Arguments: hashpartitioning(substr(s_zip#8, 1, 2), 5), ENSURE_REQUIREMENTS, [plan_id=2] (14) Sort [codegen id : 4] Input [3]: [ss_net_profit#2, s_store_name#7, s_zip#8] Arguments: [substr(s_zip#8, 1, 2) ASC NULLS FIRST], false, 0 (15) Scan parquet default.customer_address -Output [1]: [ca_zip#11] +Output [1]: [ca_zip#9] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] ReadSchema: struct (16) ColumnarToRow [codegen id : 11] -Input [1]: [ca_zip#11] +Input [1]: [ca_zip#9] (17) Filter [codegen id : 11] -Input [1]: [ca_zip#11] -Condition : (substr(ca_zip#11, 1, 5) INSET 10144, 10336, 10390, 10445, 10516, 10567, 11101, 11356, 11376, 11489, 11634, 11928, 12305, 13354, 13375, 13376, 13394, 13595, 13695, 13955, 14060, 14089, 14171, 14328, 14663, 14867, 14922, 15126, 15146, 15371, 15455, 15559, 15723, 15734, 15765, 15798, 15882, 16021, 16725, 16807, 17043, 17183, 17871, 17879, 17920, 18119, 18270, 18376, 18383, 18426, 18652, 18767, 18799, 18840, 18842, 18845, 18906, 19430, 19505, 19512, 19515, 19736, 19769, 19849, 20004, 20260, 20548, 21076, 21195, 21286, 21309, 21337, 21756, 22152, 22245, 22246, 22351, 22437, 22461, 22685, 22744, 22752, 22927, 23006, 23470, 23932, 23968, 24128, 24206, 24317, 24610, 24671, 24676, 24996, 25003, 25103, 25280, 25486, 25631, 25733, 25782, 25858, 25989, 26065, 26105, 26231, 26233, 26653, 26689, 26859, 27068, 27156, 27385, 27700, 28286, 28488, 28545, 28577, 28587, 28709, 28810, 28898, 28915, 29178, 29741, 29839, 30010, 30122, 30431, 30450, 30469, 30625, 30903, 31016, 31029, 31387, 31671, 31880, 32213, 32754, 33123, 33282, 33515, 33786, 34102, 34322, 34425, 35258, 35458, 35474, 35576, 35850, 35942, 36233, 36420, 36446, 36495, 36634, 37125, 37126, 37930, 38122, 38193, 38415, 38607, 38935, 39127, 39192, 39371, 39516, 39736, 39861, 39972, 40081, 40162, 40558, 40604, 41248, 41367, 41368, 41766, 41918, 42029, 42666, 42961, 43285, 43848, 43933, 44165, 44438, 45200, 45266, 45375, 45549, 45692, 45721, 45748, 46081, 46136, 46820, 47305, 47537, 47770, 48033, 48425, 48583, 49130, 49156, 49448, 50016, 50298, 50308, 50412, 51061, 51103, 51200, 51211, 51622, 51649, 51650, 51798, 51949, 52867, 53179, 53268, 53535, 53672, 54364, 54601, 54917, 55253, 55307, 55565, 56240, 56458, 56529, 56571, 56575, 56616, 56691, 56910, 57047, 57647, 57665, 57834, 57855, 58048, 58058, 58078, 58263, 58470, 58943, 59166, 59402, 60099, 60279, 60576, 61265, 61547, 61810, 61860, 62377, 62496, 62878, 62971, 63089, 63193, 63435, 63792, 63837, 63981, 64034, 64147, 64457, 64528, 64544, 65084, 65164, 66162, 66708, 66864, 67030, 67301, 67467, 67473, 67853, 67875, 67897, 68014, 68100, 68101, 68309, 68341, 68621, 68786, 68806, 68880, 68893, 68908, 69035, 69399, 69913, 69952, 70372, 70466, 70738, 71256, 71286, 71791, 71954, 72013, 72151, 72175, 72305, 72325, 72425, 72550, 72823, 73134, 73171, 73241, 73273, 73520, 73650, 74351, 75691, 76107, 76231, 76232, 76614, 76638, 76698, 77191, 77556, 77610, 77721, 78451, 78567, 78668, 78890, 79077, 79777, 79994, 81019, 81096, 81312, 81426, 82136, 82276, 82636, 83041, 83144, 83444, 83849, 83921, 83926, 83933, 84093, 84935, 85816, 86057, 86198, 86284, 86379, 87343, 87501, 87816, 88086, 88190, 88424, 88885, 89091, 89360, 90225, 90257, 90578, 91068, 91110, 91137, 91393, 92712, 94167, 94627, 94898, 94945, 94983, 96451, 96576, 96765, 96888, 96976, 97189, 97789, 98025, 98235, 98294, 98359, 98569, 99076, 99543 AND isnotnull(substr(ca_zip#11, 1, 5))) +Input [1]: [ca_zip#9] +Condition : (substr(ca_zip#9, 1, 5) INSET 10144, 10336, 10390, 10445, 10516, 10567, 11101, 11356, 11376, 11489, 11634, 11928, 12305, 13354, 13375, 13376, 13394, 13595, 13695, 13955, 14060, 14089, 14171, 14328, 14663, 14867, 14922, 15126, 15146, 15371, 15455, 15559, 15723, 15734, 15765, 15798, 15882, 16021, 16725, 16807, 17043, 17183, 17871, 17879, 17920, 18119, 18270, 18376, 18383, 18426, 18652, 18767, 18799, 18840, 18842, 18845, 18906, 19430, 19505, 19512, 19515, 19736, 19769, 19849, 20004, 20260, 20548, 21076, 21195, 21286, 21309, 21337, 21756, 22152, 22245, 22246, 22351, 22437, 22461, 22685, 22744, 22752, 22927, 23006, 23470, 23932, 23968, 24128, 24206, 24317, 24610, 24671, 24676, 24996, 25003, 25103, 25280, 25486, 25631, 25733, 25782, 25858, 25989, 26065, 26105, 26231, 26233, 26653, 26689, 26859, 27068, 27156, 27385, 27700, 28286, 28488, 28545, 28577, 28587, 28709, 28810, 28898, 28915, 29178, 29741, 29839, 30010, 30122, 30431, 30450, 30469, 30625, 30903, 31016, 31029, 31387, 31671, 31880, 32213, 32754, 33123, 33282, 33515, 33786, 34102, 34322, 34425, 35258, 35458, 35474, 35576, 35850, 35942, 36233, 36420, 36446, 36495, 36634, 37125, 37126, 37930, 38122, 38193, 38415, 38607, 38935, 39127, 39192, 39371, 39516, 39736, 39861, 39972, 40081, 40162, 40558, 40604, 41248, 41367, 41368, 41766, 41918, 42029, 42666, 42961, 43285, 43848, 43933, 44165, 44438, 45200, 45266, 45375, 45549, 45692, 45721, 45748, 46081, 46136, 46820, 47305, 47537, 47770, 48033, 48425, 48583, 49130, 49156, 49448, 50016, 50298, 50308, 50412, 51061, 51103, 51200, 51211, 51622, 51649, 51650, 51798, 51949, 52867, 53179, 53268, 53535, 53672, 54364, 54601, 54917, 55253, 55307, 55565, 56240, 56458, 56529, 56571, 56575, 56616, 56691, 56910, 57047, 57647, 57665, 57834, 57855, 58048, 58058, 58078, 58263, 58470, 58943, 59166, 59402, 60099, 60279, 60576, 61265, 61547, 61810, 61860, 62377, 62496, 62878, 62971, 63089, 63193, 63435, 63792, 63837, 63981, 64034, 64147, 64457, 64528, 64544, 65084, 65164, 66162, 66708, 66864, 67030, 67301, 67467, 67473, 67853, 67875, 67897, 68014, 68100, 68101, 68309, 68341, 68621, 68786, 68806, 68880, 68893, 68908, 69035, 69399, 69913, 69952, 70372, 70466, 70738, 71256, 71286, 71791, 71954, 72013, 72151, 72175, 72305, 72325, 72425, 72550, 72823, 73134, 73171, 73241, 73273, 73520, 73650, 74351, 75691, 76107, 76231, 76232, 76614, 76638, 76698, 77191, 77556, 77610, 77721, 78451, 78567, 78668, 78890, 79077, 79777, 79994, 81019, 81096, 81312, 81426, 82136, 82276, 82636, 83041, 83144, 83444, 83849, 83921, 83926, 83933, 84093, 84935, 85816, 86057, 86198, 86284, 86379, 87343, 87501, 87816, 88086, 88190, 88424, 88885, 89091, 89360, 90225, 90257, 90578, 91068, 91110, 91137, 91393, 92712, 94167, 94627, 94898, 94945, 94983, 96451, 96576, 96765, 96888, 96976, 97189, 97789, 98025, 98235, 98294, 98359, 98569, 99076, 99543 AND isnotnull(substr(ca_zip#9, 1, 5))) (18) Scan parquet default.customer_address -Output [2]: [ca_address_sk#12, ca_zip#13] +Output [2]: [ca_address_sk#10, ca_zip#11] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 5] -Input [2]: [ca_address_sk#12, ca_zip#13] +Input [2]: [ca_address_sk#10, ca_zip#11] (20) Filter [codegen id : 5] -Input [2]: [ca_address_sk#12, ca_zip#13] -Condition : isnotnull(ca_address_sk#12) +Input [2]: [ca_address_sk#10, ca_zip#11] +Condition : isnotnull(ca_address_sk#10) (21) Exchange -Input [2]: [ca_address_sk#12, ca_zip#13] -Arguments: hashpartitioning(ca_address_sk#12, 5), ENSURE_REQUIREMENTS, [id=#14] +Input [2]: [ca_address_sk#10, ca_zip#11] +Arguments: hashpartitioning(ca_address_sk#10, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) Sort [codegen id : 6] -Input [2]: [ca_address_sk#12, ca_zip#13] -Arguments: [ca_address_sk#12 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#10, ca_zip#11] +Arguments: [ca_address_sk#10 ASC NULLS FIRST], false, 0 (23) Scan parquet default.customer -Output [2]: [c_current_addr_sk#15, c_preferred_cust_flag#16] +Output [2]: [c_current_addr_sk#12, c_preferred_cust_flag#13] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)] ReadSchema: struct (24) ColumnarToRow [codegen id : 7] -Input [2]: [c_current_addr_sk#15, c_preferred_cust_flag#16] +Input [2]: [c_current_addr_sk#12, c_preferred_cust_flag#13] (25) Filter [codegen id : 7] -Input [2]: [c_current_addr_sk#15, c_preferred_cust_flag#16] -Condition : ((isnotnull(c_preferred_cust_flag#16) AND (c_preferred_cust_flag#16 = Y)) AND isnotnull(c_current_addr_sk#15)) +Input [2]: [c_current_addr_sk#12, c_preferred_cust_flag#13] +Condition : ((isnotnull(c_preferred_cust_flag#13) AND (c_preferred_cust_flag#13 = Y)) AND isnotnull(c_current_addr_sk#12)) (26) Project [codegen id : 7] -Output [1]: [c_current_addr_sk#15] -Input [2]: [c_current_addr_sk#15, c_preferred_cust_flag#16] +Output [1]: [c_current_addr_sk#12] +Input [2]: [c_current_addr_sk#12, c_preferred_cust_flag#13] (27) Exchange -Input [1]: [c_current_addr_sk#15] -Arguments: hashpartitioning(c_current_addr_sk#15, 5), ENSURE_REQUIREMENTS, [id=#17] +Input [1]: [c_current_addr_sk#12] +Arguments: hashpartitioning(c_current_addr_sk#12, 5), ENSURE_REQUIREMENTS, [plan_id=4] (28) Sort [codegen id : 8] -Input [1]: [c_current_addr_sk#15] -Arguments: [c_current_addr_sk#15 ASC NULLS FIRST], false, 0 +Input [1]: [c_current_addr_sk#12] +Arguments: [c_current_addr_sk#12 ASC NULLS FIRST], false, 0 (29) SortMergeJoin [codegen id : 9] -Left keys [1]: [ca_address_sk#12] -Right keys [1]: [c_current_addr_sk#15] +Left keys [1]: [ca_address_sk#10] +Right keys [1]: [c_current_addr_sk#12] Join condition: None (30) Project [codegen id : 9] -Output [1]: [ca_zip#13] -Input [3]: [ca_address_sk#12, ca_zip#13, c_current_addr_sk#15] +Output [1]: [ca_zip#11] +Input [3]: [ca_address_sk#10, ca_zip#11, c_current_addr_sk#12] (31) HashAggregate [codegen id : 9] -Input [1]: [ca_zip#13] -Keys [1]: [ca_zip#13] +Input [1]: [ca_zip#11] +Keys [1]: [ca_zip#11] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#18] -Results [2]: [ca_zip#13, count#19] +Aggregate Attributes [1]: [count#14] +Results [2]: [ca_zip#11, count#15] (32) Exchange -Input [2]: [ca_zip#13, count#19] -Arguments: hashpartitioning(ca_zip#13, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [2]: [ca_zip#11, count#15] +Arguments: hashpartitioning(ca_zip#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] (33) HashAggregate [codegen id : 10] -Input [2]: [ca_zip#13, count#19] -Keys [1]: [ca_zip#13] +Input [2]: [ca_zip#11, count#15] +Keys [1]: [ca_zip#11] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#21] -Results [2]: [substr(ca_zip#13, 1, 5) AS ca_zip#22, count(1)#21 AS cnt#23] +Aggregate Attributes [1]: [count(1)#16] +Results [2]: [substr(ca_zip#11, 1, 5) AS ca_zip#17, count(1)#16 AS cnt#18] (34) Filter [codegen id : 10] -Input [2]: [ca_zip#22, cnt#23] -Condition : (cnt#23 > 10) +Input [2]: [ca_zip#17, cnt#18] +Condition : (cnt#18 > 10) (35) Project [codegen id : 10] -Output [1]: [ca_zip#22] -Input [2]: [ca_zip#22, cnt#23] +Output [1]: [ca_zip#17] +Input [2]: [ca_zip#17, cnt#18] (36) BroadcastExchange -Input [1]: [ca_zip#22] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true])),false), [id=#24] +Input [1]: [ca_zip#17] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true])),false), [plan_id=6] (37) BroadcastHashJoin [codegen id : 11] -Left keys [2]: [coalesce(substr(ca_zip#11, 1, 5), ), isnull(substr(ca_zip#11, 1, 5))] -Right keys [2]: [coalesce(ca_zip#22, ), isnull(ca_zip#22)] +Left keys [2]: [coalesce(substr(ca_zip#9, 1, 5), ), isnull(substr(ca_zip#9, 1, 5))] +Right keys [2]: [coalesce(ca_zip#17, ), isnull(ca_zip#17)] Join condition: None (38) Project [codegen id : 11] -Output [1]: [substr(ca_zip#11, 1, 5) AS ca_zip#25] -Input [1]: [ca_zip#11] +Output [1]: [substr(ca_zip#9, 1, 5) AS ca_zip#19] +Input [1]: [ca_zip#9] (39) HashAggregate [codegen id : 11] -Input [1]: [ca_zip#25] -Keys [1]: [ca_zip#25] +Input [1]: [ca_zip#19] +Keys [1]: [ca_zip#19] Functions: [] Aggregate Attributes: [] -Results [1]: [ca_zip#25] +Results [1]: [ca_zip#19] (40) Exchange -Input [1]: [ca_zip#25] -Arguments: hashpartitioning(ca_zip#25, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [1]: [ca_zip#19] +Arguments: hashpartitioning(ca_zip#19, 5), ENSURE_REQUIREMENTS, [plan_id=7] (41) HashAggregate [codegen id : 12] -Input [1]: [ca_zip#25] -Keys [1]: [ca_zip#25] +Input [1]: [ca_zip#19] +Keys [1]: [ca_zip#19] Functions: [] Aggregate Attributes: [] -Results [1]: [ca_zip#25] +Results [1]: [ca_zip#19] (42) Exchange -Input [1]: [ca_zip#25] -Arguments: hashpartitioning(substr(ca_zip#25, 1, 2), 5), ENSURE_REQUIREMENTS, [id=#27] +Input [1]: [ca_zip#19] +Arguments: hashpartitioning(substr(ca_zip#19, 1, 2), 5), ENSURE_REQUIREMENTS, [plan_id=8] (43) Sort [codegen id : 13] -Input [1]: [ca_zip#25] -Arguments: [substr(ca_zip#25, 1, 2) ASC NULLS FIRST], false, 0 +Input [1]: [ca_zip#19] +Arguments: [substr(ca_zip#19, 1, 2) ASC NULLS FIRST], false, 0 (44) SortMergeJoin [codegen id : 14] Left keys [1]: [substr(s_zip#8, 1, 2)] -Right keys [1]: [substr(ca_zip#25, 1, 2)] +Right keys [1]: [substr(ca_zip#19, 1, 2)] Join condition: None (45) Project [codegen id : 14] Output [2]: [ss_net_profit#2, s_store_name#7] -Input [4]: [ss_net_profit#2, s_store_name#7, s_zip#8, ca_zip#25] +Input [4]: [ss_net_profit#2, s_store_name#7, s_zip#8, ca_zip#19] (46) HashAggregate [codegen id : 14] Input [2]: [ss_net_profit#2, s_store_name#7] Keys [1]: [s_store_name#7] Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum#28] -Results [2]: [s_store_name#7, sum#29] +Aggregate Attributes [1]: [sum#20] +Results [2]: [s_store_name#7, sum#21] (47) Exchange -Input [2]: [s_store_name#7, sum#29] -Arguments: hashpartitioning(s_store_name#7, 5), ENSURE_REQUIREMENTS, [id=#30] +Input [2]: [s_store_name#7, sum#21] +Arguments: hashpartitioning(s_store_name#7, 5), ENSURE_REQUIREMENTS, [plan_id=9] (48) HashAggregate [codegen id : 15] -Input [2]: [s_store_name#7, sum#29] +Input [2]: [s_store_name#7, sum#21] Keys [1]: [s_store_name#7] Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#31] -Results [2]: [s_store_name#7, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#31,17,2) AS sum(ss_net_profit)#32] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#22] +Results [2]: [s_store_name#7, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#22,17,2) AS sum(ss_net_profit)#23] (49) TakeOrderedAndProject -Input [2]: [s_store_name#7, sum(ss_net_profit)#32] -Arguments: 100, [s_store_name#7 ASC NULLS FIRST], [s_store_name#7, sum(ss_net_profit)#32] +Input [2]: [s_store_name#7, sum(ss_net_profit)#23] +Arguments: 100, [s_store_name#7 ASC NULLS FIRST], [s_store_name#7, sum(ss_net_profit)#23] ===== Subqueries ===== @@ -289,25 +289,25 @@ BroadcastExchange (54) (50) Scan parquet default.date_dim -Output [3]: [d_date_sk#5, d_year#33, d_qoy#34] +Output [3]: [d_date_sk#5, d_year#24, d_qoy#25] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,1998), IsNotNull(d_date_sk)] ReadSchema: struct (51) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#5, d_year#33, d_qoy#34] +Input [3]: [d_date_sk#5, d_year#24, d_qoy#25] (52) Filter [codegen id : 1] -Input [3]: [d_date_sk#5, d_year#33, d_qoy#34] -Condition : ((((isnotnull(d_qoy#34) AND isnotnull(d_year#33)) AND (d_qoy#34 = 2)) AND (d_year#33 = 1998)) AND isnotnull(d_date_sk#5)) +Input [3]: [d_date_sk#5, d_year#24, d_qoy#25] +Condition : ((((isnotnull(d_qoy#25) AND isnotnull(d_year#24)) AND (d_qoy#25 = 2)) AND (d_year#24 = 1998)) AND isnotnull(d_date_sk#5)) (53) Project [codegen id : 1] Output [1]: [d_date_sk#5] -Input [3]: [d_date_sk#5, d_year#33, d_qoy#34] +Input [3]: [d_date_sk#5, d_year#24, d_qoy#25] (54) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt index efac82f31fdb4..ca1658049240e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt @@ -87,7 +87,7 @@ Condition : (isnotnull(s_store_sk#6) AND isnotnull(s_zip#8)) (10) BroadcastExchange Input [3]: [s_store_sk#6, s_store_name#7, s_zip#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_store_sk#1] @@ -99,154 +99,154 @@ Output [3]: [ss_net_profit#2, s_store_name#7, s_zip#8] Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#6, s_store_name#7, s_zip#8] (13) Scan parquet default.customer_address -Output [1]: [ca_zip#10] +Output [1]: [ca_zip#9] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] ReadSchema: struct (14) ColumnarToRow [codegen id : 6] -Input [1]: [ca_zip#10] +Input [1]: [ca_zip#9] (15) Filter [codegen id : 6] -Input [1]: [ca_zip#10] -Condition : (substr(ca_zip#10, 1, 5) INSET 10144, 10336, 10390, 10445, 10516, 10567, 11101, 11356, 11376, 11489, 11634, 11928, 12305, 13354, 13375, 13376, 13394, 13595, 13695, 13955, 14060, 14089, 14171, 14328, 14663, 14867, 14922, 15126, 15146, 15371, 15455, 15559, 15723, 15734, 15765, 15798, 15882, 16021, 16725, 16807, 17043, 17183, 17871, 17879, 17920, 18119, 18270, 18376, 18383, 18426, 18652, 18767, 18799, 18840, 18842, 18845, 18906, 19430, 19505, 19512, 19515, 19736, 19769, 19849, 20004, 20260, 20548, 21076, 21195, 21286, 21309, 21337, 21756, 22152, 22245, 22246, 22351, 22437, 22461, 22685, 22744, 22752, 22927, 23006, 23470, 23932, 23968, 24128, 24206, 24317, 24610, 24671, 24676, 24996, 25003, 25103, 25280, 25486, 25631, 25733, 25782, 25858, 25989, 26065, 26105, 26231, 26233, 26653, 26689, 26859, 27068, 27156, 27385, 27700, 28286, 28488, 28545, 28577, 28587, 28709, 28810, 28898, 28915, 29178, 29741, 29839, 30010, 30122, 30431, 30450, 30469, 30625, 30903, 31016, 31029, 31387, 31671, 31880, 32213, 32754, 33123, 33282, 33515, 33786, 34102, 34322, 34425, 35258, 35458, 35474, 35576, 35850, 35942, 36233, 36420, 36446, 36495, 36634, 37125, 37126, 37930, 38122, 38193, 38415, 38607, 38935, 39127, 39192, 39371, 39516, 39736, 39861, 39972, 40081, 40162, 40558, 40604, 41248, 41367, 41368, 41766, 41918, 42029, 42666, 42961, 43285, 43848, 43933, 44165, 44438, 45200, 45266, 45375, 45549, 45692, 45721, 45748, 46081, 46136, 46820, 47305, 47537, 47770, 48033, 48425, 48583, 49130, 49156, 49448, 50016, 50298, 50308, 50412, 51061, 51103, 51200, 51211, 51622, 51649, 51650, 51798, 51949, 52867, 53179, 53268, 53535, 53672, 54364, 54601, 54917, 55253, 55307, 55565, 56240, 56458, 56529, 56571, 56575, 56616, 56691, 56910, 57047, 57647, 57665, 57834, 57855, 58048, 58058, 58078, 58263, 58470, 58943, 59166, 59402, 60099, 60279, 60576, 61265, 61547, 61810, 61860, 62377, 62496, 62878, 62971, 63089, 63193, 63435, 63792, 63837, 63981, 64034, 64147, 64457, 64528, 64544, 65084, 65164, 66162, 66708, 66864, 67030, 67301, 67467, 67473, 67853, 67875, 67897, 68014, 68100, 68101, 68309, 68341, 68621, 68786, 68806, 68880, 68893, 68908, 69035, 69399, 69913, 69952, 70372, 70466, 70738, 71256, 71286, 71791, 71954, 72013, 72151, 72175, 72305, 72325, 72425, 72550, 72823, 73134, 73171, 73241, 73273, 73520, 73650, 74351, 75691, 76107, 76231, 76232, 76614, 76638, 76698, 77191, 77556, 77610, 77721, 78451, 78567, 78668, 78890, 79077, 79777, 79994, 81019, 81096, 81312, 81426, 82136, 82276, 82636, 83041, 83144, 83444, 83849, 83921, 83926, 83933, 84093, 84935, 85816, 86057, 86198, 86284, 86379, 87343, 87501, 87816, 88086, 88190, 88424, 88885, 89091, 89360, 90225, 90257, 90578, 91068, 91110, 91137, 91393, 92712, 94167, 94627, 94898, 94945, 94983, 96451, 96576, 96765, 96888, 96976, 97189, 97789, 98025, 98235, 98294, 98359, 98569, 99076, 99543 AND isnotnull(substr(ca_zip#10, 1, 5))) +Input [1]: [ca_zip#9] +Condition : (substr(ca_zip#9, 1, 5) INSET 10144, 10336, 10390, 10445, 10516, 10567, 11101, 11356, 11376, 11489, 11634, 11928, 12305, 13354, 13375, 13376, 13394, 13595, 13695, 13955, 14060, 14089, 14171, 14328, 14663, 14867, 14922, 15126, 15146, 15371, 15455, 15559, 15723, 15734, 15765, 15798, 15882, 16021, 16725, 16807, 17043, 17183, 17871, 17879, 17920, 18119, 18270, 18376, 18383, 18426, 18652, 18767, 18799, 18840, 18842, 18845, 18906, 19430, 19505, 19512, 19515, 19736, 19769, 19849, 20004, 20260, 20548, 21076, 21195, 21286, 21309, 21337, 21756, 22152, 22245, 22246, 22351, 22437, 22461, 22685, 22744, 22752, 22927, 23006, 23470, 23932, 23968, 24128, 24206, 24317, 24610, 24671, 24676, 24996, 25003, 25103, 25280, 25486, 25631, 25733, 25782, 25858, 25989, 26065, 26105, 26231, 26233, 26653, 26689, 26859, 27068, 27156, 27385, 27700, 28286, 28488, 28545, 28577, 28587, 28709, 28810, 28898, 28915, 29178, 29741, 29839, 30010, 30122, 30431, 30450, 30469, 30625, 30903, 31016, 31029, 31387, 31671, 31880, 32213, 32754, 33123, 33282, 33515, 33786, 34102, 34322, 34425, 35258, 35458, 35474, 35576, 35850, 35942, 36233, 36420, 36446, 36495, 36634, 37125, 37126, 37930, 38122, 38193, 38415, 38607, 38935, 39127, 39192, 39371, 39516, 39736, 39861, 39972, 40081, 40162, 40558, 40604, 41248, 41367, 41368, 41766, 41918, 42029, 42666, 42961, 43285, 43848, 43933, 44165, 44438, 45200, 45266, 45375, 45549, 45692, 45721, 45748, 46081, 46136, 46820, 47305, 47537, 47770, 48033, 48425, 48583, 49130, 49156, 49448, 50016, 50298, 50308, 50412, 51061, 51103, 51200, 51211, 51622, 51649, 51650, 51798, 51949, 52867, 53179, 53268, 53535, 53672, 54364, 54601, 54917, 55253, 55307, 55565, 56240, 56458, 56529, 56571, 56575, 56616, 56691, 56910, 57047, 57647, 57665, 57834, 57855, 58048, 58058, 58078, 58263, 58470, 58943, 59166, 59402, 60099, 60279, 60576, 61265, 61547, 61810, 61860, 62377, 62496, 62878, 62971, 63089, 63193, 63435, 63792, 63837, 63981, 64034, 64147, 64457, 64528, 64544, 65084, 65164, 66162, 66708, 66864, 67030, 67301, 67467, 67473, 67853, 67875, 67897, 68014, 68100, 68101, 68309, 68341, 68621, 68786, 68806, 68880, 68893, 68908, 69035, 69399, 69913, 69952, 70372, 70466, 70738, 71256, 71286, 71791, 71954, 72013, 72151, 72175, 72305, 72325, 72425, 72550, 72823, 73134, 73171, 73241, 73273, 73520, 73650, 74351, 75691, 76107, 76231, 76232, 76614, 76638, 76698, 77191, 77556, 77610, 77721, 78451, 78567, 78668, 78890, 79077, 79777, 79994, 81019, 81096, 81312, 81426, 82136, 82276, 82636, 83041, 83144, 83444, 83849, 83921, 83926, 83933, 84093, 84935, 85816, 86057, 86198, 86284, 86379, 87343, 87501, 87816, 88086, 88190, 88424, 88885, 89091, 89360, 90225, 90257, 90578, 91068, 91110, 91137, 91393, 92712, 94167, 94627, 94898, 94945, 94983, 96451, 96576, 96765, 96888, 96976, 97189, 97789, 98025, 98235, 98294, 98359, 98569, 99076, 99543 AND isnotnull(substr(ca_zip#9, 1, 5))) (16) Scan parquet default.customer_address -Output [2]: [ca_address_sk#11, ca_zip#12] +Output [2]: [ca_address_sk#10, ca_zip#11] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#11, ca_zip#12] +Input [2]: [ca_address_sk#10, ca_zip#11] (18) Filter [codegen id : 4] -Input [2]: [ca_address_sk#11, ca_zip#12] -Condition : isnotnull(ca_address_sk#11) +Input [2]: [ca_address_sk#10, ca_zip#11] +Condition : isnotnull(ca_address_sk#10) (19) Scan parquet default.customer -Output [2]: [c_current_addr_sk#13, c_preferred_cust_flag#14] +Output [2]: [c_current_addr_sk#12, c_preferred_cust_flag#13] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)] ReadSchema: struct (20) ColumnarToRow [codegen id : 3] -Input [2]: [c_current_addr_sk#13, c_preferred_cust_flag#14] +Input [2]: [c_current_addr_sk#12, c_preferred_cust_flag#13] (21) Filter [codegen id : 3] -Input [2]: [c_current_addr_sk#13, c_preferred_cust_flag#14] -Condition : ((isnotnull(c_preferred_cust_flag#14) AND (c_preferred_cust_flag#14 = Y)) AND isnotnull(c_current_addr_sk#13)) +Input [2]: [c_current_addr_sk#12, c_preferred_cust_flag#13] +Condition : ((isnotnull(c_preferred_cust_flag#13) AND (c_preferred_cust_flag#13 = Y)) AND isnotnull(c_current_addr_sk#12)) (22) Project [codegen id : 3] -Output [1]: [c_current_addr_sk#13] -Input [2]: [c_current_addr_sk#13, c_preferred_cust_flag#14] +Output [1]: [c_current_addr_sk#12] +Input [2]: [c_current_addr_sk#12, c_preferred_cust_flag#13] (23) BroadcastExchange -Input [1]: [c_current_addr_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Input [1]: [c_current_addr_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (24) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ca_address_sk#11] -Right keys [1]: [c_current_addr_sk#13] +Left keys [1]: [ca_address_sk#10] +Right keys [1]: [c_current_addr_sk#12] Join condition: None (25) Project [codegen id : 4] -Output [1]: [ca_zip#12] -Input [3]: [ca_address_sk#11, ca_zip#12, c_current_addr_sk#13] +Output [1]: [ca_zip#11] +Input [3]: [ca_address_sk#10, ca_zip#11, c_current_addr_sk#12] (26) HashAggregate [codegen id : 4] -Input [1]: [ca_zip#12] -Keys [1]: [ca_zip#12] +Input [1]: [ca_zip#11] +Keys [1]: [ca_zip#11] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#16] -Results [2]: [ca_zip#12, count#17] +Aggregate Attributes [1]: [count#14] +Results [2]: [ca_zip#11, count#15] (27) Exchange -Input [2]: [ca_zip#12, count#17] -Arguments: hashpartitioning(ca_zip#12, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [2]: [ca_zip#11, count#15] +Arguments: hashpartitioning(ca_zip#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (28) HashAggregate [codegen id : 5] -Input [2]: [ca_zip#12, count#17] -Keys [1]: [ca_zip#12] +Input [2]: [ca_zip#11, count#15] +Keys [1]: [ca_zip#11] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#19] -Results [2]: [substr(ca_zip#12, 1, 5) AS ca_zip#20, count(1)#19 AS cnt#21] +Aggregate Attributes [1]: [count(1)#16] +Results [2]: [substr(ca_zip#11, 1, 5) AS ca_zip#17, count(1)#16 AS cnt#18] (29) Filter [codegen id : 5] -Input [2]: [ca_zip#20, cnt#21] -Condition : (cnt#21 > 10) +Input [2]: [ca_zip#17, cnt#18] +Condition : (cnt#18 > 10) (30) Project [codegen id : 5] -Output [1]: [ca_zip#20] -Input [2]: [ca_zip#20, cnt#21] +Output [1]: [ca_zip#17] +Input [2]: [ca_zip#17, cnt#18] (31) BroadcastExchange -Input [1]: [ca_zip#20] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true])),false), [id=#22] +Input [1]: [ca_zip#17] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true])),false), [plan_id=4] (32) BroadcastHashJoin [codegen id : 6] -Left keys [2]: [coalesce(substr(ca_zip#10, 1, 5), ), isnull(substr(ca_zip#10, 1, 5))] -Right keys [2]: [coalesce(ca_zip#20, ), isnull(ca_zip#20)] +Left keys [2]: [coalesce(substr(ca_zip#9, 1, 5), ), isnull(substr(ca_zip#9, 1, 5))] +Right keys [2]: [coalesce(ca_zip#17, ), isnull(ca_zip#17)] Join condition: None (33) Project [codegen id : 6] -Output [1]: [substr(ca_zip#10, 1, 5) AS ca_zip#23] -Input [1]: [ca_zip#10] +Output [1]: [substr(ca_zip#9, 1, 5) AS ca_zip#19] +Input [1]: [ca_zip#9] (34) HashAggregate [codegen id : 6] -Input [1]: [ca_zip#23] -Keys [1]: [ca_zip#23] +Input [1]: [ca_zip#19] +Keys [1]: [ca_zip#19] Functions: [] Aggregate Attributes: [] -Results [1]: [ca_zip#23] +Results [1]: [ca_zip#19] (35) Exchange -Input [1]: [ca_zip#23] -Arguments: hashpartitioning(ca_zip#23, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [1]: [ca_zip#19] +Arguments: hashpartitioning(ca_zip#19, 5), ENSURE_REQUIREMENTS, [plan_id=5] (36) HashAggregate [codegen id : 7] -Input [1]: [ca_zip#23] -Keys [1]: [ca_zip#23] +Input [1]: [ca_zip#19] +Keys [1]: [ca_zip#19] Functions: [] Aggregate Attributes: [] -Results [1]: [ca_zip#23] +Results [1]: [ca_zip#19] (37) BroadcastExchange -Input [1]: [ca_zip#23] -Arguments: HashedRelationBroadcastMode(List(substr(input[0, string, true], 1, 2)),false), [id=#25] +Input [1]: [ca_zip#19] +Arguments: HashedRelationBroadcastMode(List(substr(input[0, string, true], 1, 2)),false), [plan_id=6] (38) BroadcastHashJoin [codegen id : 8] Left keys [1]: [substr(s_zip#8, 1, 2)] -Right keys [1]: [substr(ca_zip#23, 1, 2)] +Right keys [1]: [substr(ca_zip#19, 1, 2)] Join condition: None (39) Project [codegen id : 8] Output [2]: [ss_net_profit#2, s_store_name#7] -Input [4]: [ss_net_profit#2, s_store_name#7, s_zip#8, ca_zip#23] +Input [4]: [ss_net_profit#2, s_store_name#7, s_zip#8, ca_zip#19] (40) HashAggregate [codegen id : 8] Input [2]: [ss_net_profit#2, s_store_name#7] Keys [1]: [s_store_name#7] Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum#26] -Results [2]: [s_store_name#7, sum#27] +Aggregate Attributes [1]: [sum#20] +Results [2]: [s_store_name#7, sum#21] (41) Exchange -Input [2]: [s_store_name#7, sum#27] -Arguments: hashpartitioning(s_store_name#7, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [2]: [s_store_name#7, sum#21] +Arguments: hashpartitioning(s_store_name#7, 5), ENSURE_REQUIREMENTS, [plan_id=7] (42) HashAggregate [codegen id : 9] -Input [2]: [s_store_name#7, sum#27] +Input [2]: [s_store_name#7, sum#21] Keys [1]: [s_store_name#7] Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#29] -Results [2]: [s_store_name#7, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#29,17,2) AS sum(ss_net_profit)#30] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#22] +Results [2]: [s_store_name#7, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#22,17,2) AS sum(ss_net_profit)#23] (43) TakeOrderedAndProject -Input [2]: [s_store_name#7, sum(ss_net_profit)#30] -Arguments: 100, [s_store_name#7 ASC NULLS FIRST], [s_store_name#7, sum(ss_net_profit)#30] +Input [2]: [s_store_name#7, sum(ss_net_profit)#23] +Arguments: 100, [s_store_name#7 ASC NULLS FIRST], [s_store_name#7, sum(ss_net_profit)#23] ===== Subqueries ===== @@ -259,25 +259,25 @@ BroadcastExchange (48) (44) Scan parquet default.date_dim -Output [3]: [d_date_sk#5, d_year#31, d_qoy#32] +Output [3]: [d_date_sk#5, d_year#24, d_qoy#25] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_qoy), IsNotNull(d_year), EqualTo(d_qoy,2), EqualTo(d_year,1998), IsNotNull(d_date_sk)] ReadSchema: struct (45) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#5, d_year#31, d_qoy#32] +Input [3]: [d_date_sk#5, d_year#24, d_qoy#25] (46) Filter [codegen id : 1] -Input [3]: [d_date_sk#5, d_year#31, d_qoy#32] -Condition : ((((isnotnull(d_qoy#32) AND isnotnull(d_year#31)) AND (d_qoy#32 = 2)) AND (d_year#31 = 1998)) AND isnotnull(d_date_sk#5)) +Input [3]: [d_date_sk#5, d_year#24, d_qoy#25] +Condition : ((((isnotnull(d_qoy#25) AND isnotnull(d_year#24)) AND (d_qoy#25 = 2)) AND (d_year#24 = 1998)) AND isnotnull(d_date_sk#5)) (47) Project [codegen id : 1] Output [1]: [d_date_sk#5] -Input [3]: [d_date_sk#5, d_year#31, d_qoy#32] +Input [3]: [d_date_sk#5, d_year#24, d_qoy#25] (48) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt index 9cc78e12028ff..19a460d27b10a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt @@ -125,471 +125,471 @@ Condition : ((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnul (4) Exchange Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] -Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0 (6) Scan parquet default.store_returns -Output [5]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Output [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [5]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] (8) Filter [codegen id : 3] -Input [5]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] -Condition : (isnotnull(sr_item_sk#10) AND isnotnull(sr_ticket_number#11)) +Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] +Condition : (isnotnull(sr_item_sk#9) AND isnotnull(sr_ticket_number#10)) (9) Project [codegen id : 3] -Output [4]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13] -Input [5]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Output [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] (10) Exchange -Input [4]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13] -Arguments: hashpartitioning(sr_item_sk#10, sr_ticket_number#11, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Arguments: hashpartitioning(sr_item_sk#9, sr_ticket_number#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [4]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13] -Arguments: [sr_item_sk#10 ASC NULLS FIRST, sr_ticket_number#11 ASC NULLS FIRST], false, 0 +Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Arguments: [sr_item_sk#9 ASC NULLS FIRST, sr_ticket_number#10 ASC NULLS FIRST], false, 0 (12) SortMergeJoin [codegen id : 9] Left keys [2]: [ss_item_sk#1, ss_ticket_number#4] -Right keys [2]: [sr_item_sk#10, sr_ticket_number#11] +Right keys [2]: [sr_item_sk#9, sr_ticket_number#10] Join condition: None (13) Project [codegen id : 9] -Output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#12, sr_net_loss#13] -Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13] +Output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12] +Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] (14) Scan parquet default.item -Output [2]: [i_item_sk#16, i_current_price#17] +Output [2]: [i_item_sk#14, i_current_price#15] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 5] -Input [2]: [i_item_sk#16, i_current_price#17] +Input [2]: [i_item_sk#14, i_current_price#15] (16) Filter [codegen id : 5] -Input [2]: [i_item_sk#16, i_current_price#17] -Condition : ((isnotnull(i_current_price#17) AND (i_current_price#17 > 50.00)) AND isnotnull(i_item_sk#16)) +Input [2]: [i_item_sk#14, i_current_price#15] +Condition : ((isnotnull(i_current_price#15) AND (i_current_price#15 > 50.00)) AND isnotnull(i_item_sk#14)) (17) Project [codegen id : 5] -Output [1]: [i_item_sk#16] -Input [2]: [i_item_sk#16, i_current_price#17] +Output [1]: [i_item_sk#14] +Input [2]: [i_item_sk#14, i_current_price#15] (18) BroadcastExchange -Input [1]: [i_item_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [1]: [i_item_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (19) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#16] +Right keys [1]: [i_item_sk#14] Join condition: None (20) Project [codegen id : 9] -Output [7]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#12, sr_net_loss#13] -Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#12, sr_net_loss#13, i_item_sk#16] +Output [7]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12, i_item_sk#14] (21) Scan parquet default.promotion -Output [2]: [p_promo_sk#19, p_channel_tv#20] +Output [2]: [p_promo_sk#16, p_channel_tv#17] Batched: true Location [not included in comparison]/{warehouse_dir}/promotion] PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] ReadSchema: struct (22) ColumnarToRow [codegen id : 6] -Input [2]: [p_promo_sk#19, p_channel_tv#20] +Input [2]: [p_promo_sk#16, p_channel_tv#17] (23) Filter [codegen id : 6] -Input [2]: [p_promo_sk#19, p_channel_tv#20] -Condition : ((isnotnull(p_channel_tv#20) AND (p_channel_tv#20 = N)) AND isnotnull(p_promo_sk#19)) +Input [2]: [p_promo_sk#16, p_channel_tv#17] +Condition : ((isnotnull(p_channel_tv#17) AND (p_channel_tv#17 = N)) AND isnotnull(p_promo_sk#16)) (24) Project [codegen id : 6] -Output [1]: [p_promo_sk#19] -Input [2]: [p_promo_sk#19, p_channel_tv#20] +Output [1]: [p_promo_sk#16] +Input [2]: [p_promo_sk#16, p_channel_tv#17] (25) BroadcastExchange -Input [1]: [p_promo_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] +Input [1]: [p_promo_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (26) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_promo_sk#3] -Right keys [1]: [p_promo_sk#19] +Right keys [1]: [p_promo_sk#16] Join condition: None (27) Project [codegen id : 9] -Output [6]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#12, sr_net_loss#13] -Input [8]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#12, sr_net_loss#13, p_promo_sk#19] +Output [6]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12] +Input [8]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12, p_promo_sk#16] (28) ReusedExchange [Reuses operator id: 112] -Output [1]: [d_date_sk#22] +Output [1]: [d_date_sk#18] (29) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_sold_date_sk#7] -Right keys [1]: [d_date_sk#22] +Right keys [1]: [d_date_sk#18] Join condition: None (30) Project [codegen id : 9] -Output [5]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13] -Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#12, sr_net_loss#13, d_date_sk#22] +Output [5]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12] +Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#18] (31) Scan parquet default.store -Output [2]: [s_store_sk#23, s_store_id#24] +Output [2]: [s_store_sk#19, s_store_id#20] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 8] -Input [2]: [s_store_sk#23, s_store_id#24] +Input [2]: [s_store_sk#19, s_store_id#20] (33) Filter [codegen id : 8] -Input [2]: [s_store_sk#23, s_store_id#24] -Condition : isnotnull(s_store_sk#23) +Input [2]: [s_store_sk#19, s_store_id#20] +Condition : isnotnull(s_store_sk#19) (34) BroadcastExchange -Input [2]: [s_store_sk#23, s_store_id#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] +Input [2]: [s_store_sk#19, s_store_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (35) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_store_sk#2] -Right keys [1]: [s_store_sk#23] +Right keys [1]: [s_store_sk#19] Join condition: None (36) Project [codegen id : 9] -Output [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#24] -Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_sk#23, s_store_id#24] +Output [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#20] +Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_sk#19, s_store_id#20] (37) HashAggregate [codegen id : 9] -Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#24] -Keys [1]: [s_store_id#24] -Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [5]: [sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] -Results [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] +Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#20] +Keys [1]: [s_store_id#20] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [5]: [sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Results [6]: [s_store_id#20, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] (38) Exchange -Input [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] -Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [6]: [s_store_id#20, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Arguments: hashpartitioning(s_store_id#20, 5), ENSURE_REQUIREMENTS, [plan_id=6] (39) HashAggregate [codegen id : 10] -Input [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] -Keys [1]: [s_store_id#24] -Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#37, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#39] -Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#37,17,2) AS sales#40, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38 AS returns#41, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#39 AS profit#42, store channel AS channel#43, concat(store, s_store_id#24) AS id#44] +Input [6]: [s_store_id#20, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Keys [1]: [s_store_id#20] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#31, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#33] +Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS sales#34, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32 AS returns#35, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#33 AS profit#36, store channel AS channel#37, concat(store, s_store_id#20) AS id#38] (40) Scan parquet default.catalog_sales -Output [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] +Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#51), dynamicpruningexpression(cs_sold_date_sk#51 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(cs_sold_date_sk#45), dynamicpruningexpression(cs_sold_date_sk#45 IN dynamicpruning#8)] PushedFilters: [IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] ReadSchema: struct (41) ColumnarToRow [codegen id : 11] -Input [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] (42) Filter [codegen id : 11] -Input [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] -Condition : ((isnotnull(cs_catalog_page_sk#45) AND isnotnull(cs_item_sk#46)) AND isnotnull(cs_promo_sk#47)) +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Condition : ((isnotnull(cs_catalog_page_sk#39) AND isnotnull(cs_item_sk#40)) AND isnotnull(cs_promo_sk#41)) (43) Exchange -Input [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] -Arguments: hashpartitioning(cs_item_sk#46, cs_order_number#48, 5), ENSURE_REQUIREMENTS, [id=#52] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: hashpartitioning(cs_item_sk#40, cs_order_number#42, 5), ENSURE_REQUIREMENTS, [plan_id=7] (44) Sort [codegen id : 12] -Input [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] -Arguments: [cs_item_sk#46 ASC NULLS FIRST, cs_order_number#48 ASC NULLS FIRST], false, 0 +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: [cs_item_sk#40 ASC NULLS FIRST, cs_order_number#42 ASC NULLS FIRST], false, 0 (45) Scan parquet default.catalog_returns -Output [5]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Output [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] ReadSchema: struct (46) ColumnarToRow [codegen id : 13] -Input [5]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] (47) Filter [codegen id : 13] -Input [5]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] -Condition : (isnotnull(cr_item_sk#53) AND isnotnull(cr_order_number#54)) +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] +Condition : (isnotnull(cr_item_sk#46) AND isnotnull(cr_order_number#47)) (48) Project [codegen id : 13] -Output [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] -Input [5]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] (49) Exchange -Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] -Arguments: hashpartitioning(cr_item_sk#53, cr_order_number#54, 5), ENSURE_REQUIREMENTS, [id=#58] +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: hashpartitioning(cr_item_sk#46, cr_order_number#47, 5), ENSURE_REQUIREMENTS, [plan_id=8] (50) Sort [codegen id : 14] -Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] -Arguments: [cr_item_sk#53 ASC NULLS FIRST, cr_order_number#54 ASC NULLS FIRST], false, 0 +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: [cr_item_sk#46 ASC NULLS FIRST, cr_order_number#47 ASC NULLS FIRST], false, 0 (51) SortMergeJoin [codegen id : 19] -Left keys [2]: [cs_item_sk#46, cs_order_number#48] -Right keys [2]: [cr_item_sk#53, cr_order_number#54] +Left keys [2]: [cs_item_sk#40, cs_order_number#42] +Right keys [2]: [cr_item_sk#46, cr_order_number#47] Join condition: None (52) Project [codegen id : 19] -Output [8]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_return_amount#55, cr_net_loss#56] -Input [11]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] +Output [8]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49] +Input [11]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] (53) ReusedExchange [Reuses operator id: 18] -Output [1]: [i_item_sk#59] +Output [1]: [i_item_sk#51] (54) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_item_sk#46] -Right keys [1]: [i_item_sk#59] +Left keys [1]: [cs_item_sk#40] +Right keys [1]: [i_item_sk#51] Join condition: None (55) Project [codegen id : 19] -Output [7]: [cs_catalog_page_sk#45, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_return_amount#55, cr_net_loss#56] -Input [9]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_return_amount#55, cr_net_loss#56, i_item_sk#59] +Output [7]: [cs_catalog_page_sk#39, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49] +Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, i_item_sk#51] (56) ReusedExchange [Reuses operator id: 25] -Output [1]: [p_promo_sk#60] +Output [1]: [p_promo_sk#52] (57) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_promo_sk#47] -Right keys [1]: [p_promo_sk#60] +Left keys [1]: [cs_promo_sk#41] +Right keys [1]: [p_promo_sk#52] Join condition: None (58) Project [codegen id : 19] -Output [6]: [cs_catalog_page_sk#45, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_return_amount#55, cr_net_loss#56] -Input [8]: [cs_catalog_page_sk#45, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_return_amount#55, cr_net_loss#56, p_promo_sk#60] +Output [6]: [cs_catalog_page_sk#39, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49] +Input [8]: [cs_catalog_page_sk#39, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, p_promo_sk#52] (59) ReusedExchange [Reuses operator id: 112] -Output [1]: [d_date_sk#61] +Output [1]: [d_date_sk#53] (60) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_sold_date_sk#51] -Right keys [1]: [d_date_sk#61] +Left keys [1]: [cs_sold_date_sk#45] +Right keys [1]: [d_date_sk#53] Join condition: None (61) Project [codegen id : 19] -Output [5]: [cs_catalog_page_sk#45, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56] -Input [7]: [cs_catalog_page_sk#45, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_return_amount#55, cr_net_loss#56, d_date_sk#61] +Output [5]: [cs_catalog_page_sk#39, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49] +Input [7]: [cs_catalog_page_sk#39, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, d_date_sk#53] (62) Scan parquet default.catalog_page -Output [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63] +Output [2]: [cp_catalog_page_sk#54, cp_catalog_page_id#55] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_page] PushedFilters: [IsNotNull(cp_catalog_page_sk)] ReadSchema: struct (63) ColumnarToRow [codegen id : 18] -Input [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63] +Input [2]: [cp_catalog_page_sk#54, cp_catalog_page_id#55] (64) Filter [codegen id : 18] -Input [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63] -Condition : isnotnull(cp_catalog_page_sk#62) +Input [2]: [cp_catalog_page_sk#54, cp_catalog_page_id#55] +Condition : isnotnull(cp_catalog_page_sk#54) (65) BroadcastExchange -Input [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#64] +Input [2]: [cp_catalog_page_sk#54, cp_catalog_page_id#55] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] (66) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_catalog_page_sk#45] -Right keys [1]: [cp_catalog_page_sk#62] +Left keys [1]: [cs_catalog_page_sk#39] +Right keys [1]: [cp_catalog_page_sk#54] Join condition: None (67) Project [codegen id : 19] -Output [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#63] -Input [7]: [cs_catalog_page_sk#45, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_sk#62, cp_catalog_page_id#63] +Output [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#55] +Input [7]: [cs_catalog_page_sk#39, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_sk#54, cp_catalog_page_id#55] (68) HashAggregate [codegen id : 19] -Input [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#63] -Keys [1]: [cp_catalog_page_id#63] -Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#49)), partial_sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [5]: [sum#65, sum#66, isEmpty#67, sum#68, isEmpty#69] -Results [6]: [cp_catalog_page_id#63, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74] +Input [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#55] +Keys [1]: [cp_catalog_page_id#55] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#43)), partial_sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [5]: [sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] +Results [6]: [cp_catalog_page_id#55, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] (69) Exchange -Input [6]: [cp_catalog_page_id#63, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74] -Arguments: hashpartitioning(cp_catalog_page_id#63, 5), ENSURE_REQUIREMENTS, [id=#75] +Input [6]: [cp_catalog_page_id#55, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Arguments: hashpartitioning(cp_catalog_page_id#55, 5), ENSURE_REQUIREMENTS, [plan_id=10] (70) HashAggregate [codegen id : 20] -Input [6]: [cp_catalog_page_id#63, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74] -Keys [1]: [cp_catalog_page_id#63] -Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#49)), sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#49))#76, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#78] -Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#49))#76,17,2) AS sales#79, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77 AS returns#80, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#78 AS profit#81, catalog channel AS channel#82, concat(catalog_page, cp_catalog_page_id#63) AS id#83] +Input [6]: [cp_catalog_page_id#55, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Keys [1]: [cp_catalog_page_id#55] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#43)), sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#43))#66, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67, sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#68] +Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#43))#66,17,2) AS sales#69, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67 AS returns#70, sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#68 AS profit#71, catalog channel AS channel#72, concat(catalog_page, cp_catalog_page_id#55) AS id#73] (71) Scan parquet default.web_sales -Output [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] +Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#90), dynamicpruningexpression(ws_sold_date_sk#90 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(ws_sold_date_sk#80), dynamicpruningexpression(ws_sold_date_sk#80 IN dynamicpruning#8)] PushedFilters: [IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)] ReadSchema: struct (72) ColumnarToRow [codegen id : 21] -Input [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] (73) Filter [codegen id : 21] -Input [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] -Condition : ((isnotnull(ws_web_site_sk#85) AND isnotnull(ws_item_sk#84)) AND isnotnull(ws_promo_sk#86)) +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Condition : ((isnotnull(ws_web_site_sk#75) AND isnotnull(ws_item_sk#74)) AND isnotnull(ws_promo_sk#76)) (74) Exchange -Input [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] -Arguments: hashpartitioning(ws_item_sk#84, ws_order_number#87, 5), ENSURE_REQUIREMENTS, [id=#91] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Arguments: hashpartitioning(ws_item_sk#74, ws_order_number#77, 5), ENSURE_REQUIREMENTS, [plan_id=11] (75) Sort [codegen id : 22] -Input [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] -Arguments: [ws_item_sk#84 ASC NULLS FIRST, ws_order_number#87 ASC NULLS FIRST], false, 0 +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Arguments: [ws_item_sk#74 ASC NULLS FIRST, ws_order_number#77 ASC NULLS FIRST], false, 0 (76) Scan parquet default.web_returns -Output [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Output [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] ReadSchema: struct (77) ColumnarToRow [codegen id : 23] -Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] (78) Filter [codegen id : 23] -Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] -Condition : (isnotnull(wr_item_sk#92) AND isnotnull(wr_order_number#93)) +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] +Condition : (isnotnull(wr_item_sk#81) AND isnotnull(wr_order_number#82)) (79) Project [codegen id : 23] -Output [4]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95] -Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Output [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] (80) Exchange -Input [4]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95] -Arguments: hashpartitioning(wr_item_sk#92, wr_order_number#93, 5), ENSURE_REQUIREMENTS, [id=#97] +Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Arguments: hashpartitioning(wr_item_sk#81, wr_order_number#82, 5), ENSURE_REQUIREMENTS, [plan_id=12] (81) Sort [codegen id : 24] -Input [4]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95] -Arguments: [wr_item_sk#92 ASC NULLS FIRST, wr_order_number#93 ASC NULLS FIRST], false, 0 +Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Arguments: [wr_item_sk#81 ASC NULLS FIRST, wr_order_number#82 ASC NULLS FIRST], false, 0 (82) SortMergeJoin [codegen id : 29] -Left keys [2]: [ws_item_sk#84, ws_order_number#87] -Right keys [2]: [wr_item_sk#92, wr_order_number#93] +Left keys [2]: [ws_item_sk#74, ws_order_number#77] +Right keys [2]: [wr_item_sk#81, wr_order_number#82] Join condition: None (83) Project [codegen id : 29] -Output [8]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_return_amt#94, wr_net_loss#95] -Input [11]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95] +Output [8]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84] +Input [11]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] (84) ReusedExchange [Reuses operator id: 18] -Output [1]: [i_item_sk#98] +Output [1]: [i_item_sk#86] (85) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_item_sk#84] -Right keys [1]: [i_item_sk#98] +Left keys [1]: [ws_item_sk#74] +Right keys [1]: [i_item_sk#86] Join condition: None (86) Project [codegen id : 29] -Output [7]: [ws_web_site_sk#85, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_return_amt#94, wr_net_loss#95] -Input [9]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_return_amt#94, wr_net_loss#95, i_item_sk#98] +Output [7]: [ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84] +Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, i_item_sk#86] (87) ReusedExchange [Reuses operator id: 25] -Output [1]: [p_promo_sk#99] +Output [1]: [p_promo_sk#87] (88) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_promo_sk#86] -Right keys [1]: [p_promo_sk#99] +Left keys [1]: [ws_promo_sk#76] +Right keys [1]: [p_promo_sk#87] Join condition: None (89) Project [codegen id : 29] -Output [6]: [ws_web_site_sk#85, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_return_amt#94, wr_net_loss#95] -Input [8]: [ws_web_site_sk#85, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_return_amt#94, wr_net_loss#95, p_promo_sk#99] +Output [6]: [ws_web_site_sk#75, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84] +Input [8]: [ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, p_promo_sk#87] (90) ReusedExchange [Reuses operator id: 112] -Output [1]: [d_date_sk#100] +Output [1]: [d_date_sk#88] (91) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_sold_date_sk#90] -Right keys [1]: [d_date_sk#100] +Left keys [1]: [ws_sold_date_sk#80] +Right keys [1]: [d_date_sk#88] Join condition: None (92) Project [codegen id : 29] -Output [5]: [ws_web_site_sk#85, ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95] -Input [7]: [ws_web_site_sk#85, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_return_amt#94, wr_net_loss#95, d_date_sk#100] +Output [5]: [ws_web_site_sk#75, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84] +Input [7]: [ws_web_site_sk#75, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, d_date_sk#88] (93) Scan parquet default.web_site -Output [2]: [web_site_sk#101, web_site_id#102] +Output [2]: [web_site_sk#89, web_site_id#90] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_site_sk)] ReadSchema: struct (94) ColumnarToRow [codegen id : 28] -Input [2]: [web_site_sk#101, web_site_id#102] +Input [2]: [web_site_sk#89, web_site_id#90] (95) Filter [codegen id : 28] -Input [2]: [web_site_sk#101, web_site_id#102] -Condition : isnotnull(web_site_sk#101) +Input [2]: [web_site_sk#89, web_site_id#90] +Condition : isnotnull(web_site_sk#89) (96) BroadcastExchange -Input [2]: [web_site_sk#101, web_site_id#102] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#103] +Input [2]: [web_site_sk#89, web_site_id#90] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=13] (97) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_web_site_sk#85] -Right keys [1]: [web_site_sk#101] +Left keys [1]: [ws_web_site_sk#75] +Right keys [1]: [web_site_sk#89] Join condition: None (98) Project [codegen id : 29] -Output [5]: [ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#102] -Input [7]: [ws_web_site_sk#85, ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_sk#101, web_site_id#102] +Output [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#90] +Input [7]: [ws_web_site_sk#75, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_sk#89, web_site_id#90] (99) HashAggregate [codegen id : 29] -Input [5]: [ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#102] -Keys [1]: [web_site_id#102] -Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#88)), partial_sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [5]: [sum#104, sum#105, isEmpty#106, sum#107, isEmpty#108] -Results [6]: [web_site_id#102, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113] +Input [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#90] +Keys [1]: [web_site_id#90] +Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#78)), partial_sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [5]: [sum#91, sum#92, isEmpty#93, sum#94, isEmpty#95] +Results [6]: [web_site_id#90, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] (100) Exchange -Input [6]: [web_site_id#102, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113] -Arguments: hashpartitioning(web_site_id#102, 5), ENSURE_REQUIREMENTS, [id=#114] +Input [6]: [web_site_id#90, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Arguments: hashpartitioning(web_site_id#90, 5), ENSURE_REQUIREMENTS, [plan_id=14] (101) HashAggregate [codegen id : 30] -Input [6]: [web_site_id#102, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113] -Keys [1]: [web_site_id#102] -Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#88)), sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#88))#115, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#117] -Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#88))#115,17,2) AS sales#118, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116 AS returns#119, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#117 AS profit#120, web channel AS channel#121, concat(web_site, web_site_id#102) AS id#122] +Input [6]: [web_site_id#90, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Keys [1]: [web_site_id#90] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#78)), sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#78))#101, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102, sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#103] +Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#78))#101,17,2) AS sales#104, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102 AS returns#105, sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#103 AS profit#106, web channel AS channel#107, concat(web_site, web_site_id#90) AS id#108] (102) Union (103) Expand [codegen id : 31] -Input [5]: [sales#40, returns#41, profit#42, channel#43, id#44] -Arguments: [[sales#40, returns#41, profit#42, channel#43, id#44, 0], [sales#40, returns#41, profit#42, channel#43, null, 1], [sales#40, returns#41, profit#42, null, null, 3]], [sales#40, returns#41, profit#42, channel#123, id#124, spark_grouping_id#125] +Input [5]: [sales#34, returns#35, profit#36, channel#37, id#38] +Arguments: [[sales#34, returns#35, profit#36, channel#37, id#38, 0], [sales#34, returns#35, profit#36, channel#37, null, 1], [sales#34, returns#35, profit#36, null, null, 3]], [sales#34, returns#35, profit#36, channel#109, id#110, spark_grouping_id#111] (104) HashAggregate [codegen id : 31] -Input [6]: [sales#40, returns#41, profit#42, channel#123, id#124, spark_grouping_id#125] -Keys [3]: [channel#123, id#124, spark_grouping_id#125] -Functions [3]: [partial_sum(sales#40), partial_sum(returns#41), partial_sum(profit#42)] -Aggregate Attributes [6]: [sum#126, isEmpty#127, sum#128, isEmpty#129, sum#130, isEmpty#131] -Results [9]: [channel#123, id#124, spark_grouping_id#125, sum#132, isEmpty#133, sum#134, isEmpty#135, sum#136, isEmpty#137] +Input [6]: [sales#34, returns#35, profit#36, channel#109, id#110, spark_grouping_id#111] +Keys [3]: [channel#109, id#110, spark_grouping_id#111] +Functions [3]: [partial_sum(sales#34), partial_sum(returns#35), partial_sum(profit#36)] +Aggregate Attributes [6]: [sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117] +Results [9]: [channel#109, id#110, spark_grouping_id#111, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] (105) Exchange -Input [9]: [channel#123, id#124, spark_grouping_id#125, sum#132, isEmpty#133, sum#134, isEmpty#135, sum#136, isEmpty#137] -Arguments: hashpartitioning(channel#123, id#124, spark_grouping_id#125, 5), ENSURE_REQUIREMENTS, [id=#138] +Input [9]: [channel#109, id#110, spark_grouping_id#111, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] +Arguments: hashpartitioning(channel#109, id#110, spark_grouping_id#111, 5), ENSURE_REQUIREMENTS, [plan_id=15] (106) HashAggregate [codegen id : 32] -Input [9]: [channel#123, id#124, spark_grouping_id#125, sum#132, isEmpty#133, sum#134, isEmpty#135, sum#136, isEmpty#137] -Keys [3]: [channel#123, id#124, spark_grouping_id#125] -Functions [3]: [sum(sales#40), sum(returns#41), sum(profit#42)] -Aggregate Attributes [3]: [sum(sales#40)#139, sum(returns#41)#140, sum(profit#42)#141] -Results [5]: [channel#123, id#124, sum(sales#40)#139 AS sales#142, sum(returns#41)#140 AS returns#143, sum(profit#42)#141 AS profit#144] +Input [9]: [channel#109, id#110, spark_grouping_id#111, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] +Keys [3]: [channel#109, id#110, spark_grouping_id#111] +Functions [3]: [sum(sales#34), sum(returns#35), sum(profit#36)] +Aggregate Attributes [3]: [sum(sales#34)#124, sum(returns#35)#125, sum(profit#36)#126] +Results [5]: [channel#109, id#110, sum(sales#34)#124 AS sales#127, sum(returns#35)#125 AS returns#128, sum(profit#36)#126 AS profit#129] (107) TakeOrderedAndProject -Input [5]: [channel#123, id#124, sales#142, returns#143, profit#144] -Arguments: 100, [channel#123 ASC NULLS FIRST, id#124 ASC NULLS FIRST], [channel#123, id#124, sales#142, returns#143, profit#144] +Input [5]: [channel#109, id#110, sales#127, returns#128, profit#129] +Arguments: 100, [channel#109 ASC NULLS FIRST, id#110 ASC NULLS FIRST], [channel#109, id#110, sales#127, returns#128, profit#129] ===== Subqueries ===== @@ -602,29 +602,29 @@ BroadcastExchange (112) (108) Scan parquet default.date_dim -Output [2]: [d_date_sk#22, d_date#145] +Output [2]: [d_date_sk#18, d_date#130] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), IsNotNull(d_date_sk)] ReadSchema: struct (109) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#22, d_date#145] +Input [2]: [d_date_sk#18, d_date#130] (110) Filter [codegen id : 1] -Input [2]: [d_date_sk#22, d_date#145] -Condition : (((isnotnull(d_date#145) AND (d_date#145 >= 2000-08-23)) AND (d_date#145 <= 2000-09-22)) AND isnotnull(d_date_sk#22)) +Input [2]: [d_date_sk#18, d_date#130] +Condition : (((isnotnull(d_date#130) AND (d_date#130 >= 2000-08-23)) AND (d_date#130 <= 2000-09-22)) AND isnotnull(d_date_sk#18)) (111) Project [codegen id : 1] -Output [1]: [d_date_sk#22] -Input [2]: [d_date_sk#22, d_date#145] +Output [1]: [d_date_sk#18] +Input [2]: [d_date_sk#18, d_date#130] (112) BroadcastExchange -Input [1]: [d_date_sk#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#146] +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=16] -Subquery:2 Hosting operator id = 40 Hosting Expression = cs_sold_date_sk#51 IN dynamicpruning#8 +Subquery:2 Hosting operator id = 40 Hosting Expression = cs_sold_date_sk#45 IN dynamicpruning#8 -Subquery:3 Hosting operator id = 71 Hosting Expression = ws_sold_date_sk#90 IN dynamicpruning#8 +Subquery:3 Hosting operator id = 71 Hosting Expression = ws_sold_date_sk#80 IN dynamicpruning#8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt index 20cf55dba4482..daf8834a68310 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt @@ -125,471 +125,471 @@ Condition : ((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnul (4) Exchange Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] -Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0 (6) Scan parquet default.store_returns -Output [5]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Output [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [5]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] (8) Filter [codegen id : 3] -Input [5]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] -Condition : (isnotnull(sr_item_sk#10) AND isnotnull(sr_ticket_number#11)) +Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] +Condition : (isnotnull(sr_item_sk#9) AND isnotnull(sr_ticket_number#10)) (9) Project [codegen id : 3] -Output [4]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13] -Input [5]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Output [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] (10) Exchange -Input [4]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13] -Arguments: hashpartitioning(sr_item_sk#10, sr_ticket_number#11, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Arguments: hashpartitioning(sr_item_sk#9, sr_ticket_number#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [4]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13] -Arguments: [sr_item_sk#10 ASC NULLS FIRST, sr_ticket_number#11 ASC NULLS FIRST], false, 0 +Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Arguments: [sr_item_sk#9 ASC NULLS FIRST, sr_ticket_number#10 ASC NULLS FIRST], false, 0 (12) SortMergeJoin [codegen id : 9] Left keys [2]: [ss_item_sk#1, ss_ticket_number#4] -Right keys [2]: [sr_item_sk#10, sr_ticket_number#11] +Right keys [2]: [sr_item_sk#9, sr_ticket_number#10] Join condition: None (13) Project [codegen id : 9] -Output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#12, sr_net_loss#13] -Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13] +Output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12] +Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] (14) ReusedExchange [Reuses operator id: 112] -Output [1]: [d_date_sk#16] +Output [1]: [d_date_sk#14] (15) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_sold_date_sk#7] -Right keys [1]: [d_date_sk#16] +Right keys [1]: [d_date_sk#14] Join condition: None (16) Project [codegen id : 9] -Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13] -Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#12, sr_net_loss#13, d_date_sk#16] +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#14] (17) Scan parquet default.store -Output [2]: [s_store_sk#17, s_store_id#18] +Output [2]: [s_store_sk#15, s_store_id#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (18) ColumnarToRow [codegen id : 6] -Input [2]: [s_store_sk#17, s_store_id#18] +Input [2]: [s_store_sk#15, s_store_id#16] (19) Filter [codegen id : 6] -Input [2]: [s_store_sk#17, s_store_id#18] -Condition : isnotnull(s_store_sk#17) +Input [2]: [s_store_sk#15, s_store_id#16] +Condition : isnotnull(s_store_sk#15) (20) BroadcastExchange -Input [2]: [s_store_sk#17, s_store_id#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] +Input [2]: [s_store_sk#15, s_store_id#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (21) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_store_sk#2] -Right keys [1]: [s_store_sk#17] +Right keys [1]: [s_store_sk#15] Join condition: None (22) Project [codegen id : 9] -Output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#18] -Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_sk#17, s_store_id#18] +Output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_sk#15, s_store_id#16] (23) Scan parquet default.item -Output [2]: [i_item_sk#20, i_current_price#21] +Output [2]: [i_item_sk#17, i_current_price#18] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)] ReadSchema: struct (24) ColumnarToRow [codegen id : 7] -Input [2]: [i_item_sk#20, i_current_price#21] +Input [2]: [i_item_sk#17, i_current_price#18] (25) Filter [codegen id : 7] -Input [2]: [i_item_sk#20, i_current_price#21] -Condition : ((isnotnull(i_current_price#21) AND (i_current_price#21 > 50.00)) AND isnotnull(i_item_sk#20)) +Input [2]: [i_item_sk#17, i_current_price#18] +Condition : ((isnotnull(i_current_price#18) AND (i_current_price#18 > 50.00)) AND isnotnull(i_item_sk#17)) (26) Project [codegen id : 7] -Output [1]: [i_item_sk#20] -Input [2]: [i_item_sk#20, i_current_price#21] +Output [1]: [i_item_sk#17] +Input [2]: [i_item_sk#17, i_current_price#18] (27) BroadcastExchange -Input [1]: [i_item_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Input [1]: [i_item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (28) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#20] +Right keys [1]: [i_item_sk#17] Join condition: None (29) Project [codegen id : 9] -Output [6]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#18] -Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#18, i_item_sk#20] +Output [6]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16] +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16, i_item_sk#17] (30) Scan parquet default.promotion -Output [2]: [p_promo_sk#23, p_channel_tv#24] +Output [2]: [p_promo_sk#19, p_channel_tv#20] Batched: true Location [not included in comparison]/{warehouse_dir}/promotion] PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 8] -Input [2]: [p_promo_sk#23, p_channel_tv#24] +Input [2]: [p_promo_sk#19, p_channel_tv#20] (32) Filter [codegen id : 8] -Input [2]: [p_promo_sk#23, p_channel_tv#24] -Condition : ((isnotnull(p_channel_tv#24) AND (p_channel_tv#24 = N)) AND isnotnull(p_promo_sk#23)) +Input [2]: [p_promo_sk#19, p_channel_tv#20] +Condition : ((isnotnull(p_channel_tv#20) AND (p_channel_tv#20 = N)) AND isnotnull(p_promo_sk#19)) (33) Project [codegen id : 8] -Output [1]: [p_promo_sk#23] -Input [2]: [p_promo_sk#23, p_channel_tv#24] +Output [1]: [p_promo_sk#19] +Input [2]: [p_promo_sk#19, p_channel_tv#20] (34) BroadcastExchange -Input [1]: [p_promo_sk#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [p_promo_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (35) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_promo_sk#3] -Right keys [1]: [p_promo_sk#23] +Right keys [1]: [p_promo_sk#19] Join condition: None (36) Project [codegen id : 9] -Output [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#18] -Input [7]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#18, p_promo_sk#23] +Output [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16] +Input [7]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16, p_promo_sk#19] (37) HashAggregate [codegen id : 9] -Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#18] -Keys [1]: [s_store_id#18] -Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [5]: [sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] -Results [6]: [s_store_id#18, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] +Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16] +Keys [1]: [s_store_id#16] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [5]: [sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Results [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] (38) Exchange -Input [6]: [s_store_id#18, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] -Arguments: hashpartitioning(s_store_id#18, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Arguments: hashpartitioning(s_store_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=6] (39) HashAggregate [codegen id : 10] -Input [6]: [s_store_id#18, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] -Keys [1]: [s_store_id#18] -Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#37, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#39] -Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#37,17,2) AS sales#40, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38 AS returns#41, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#39 AS profit#42, store channel AS channel#43, concat(store, s_store_id#18) AS id#44] +Input [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Keys [1]: [s_store_id#16] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#31, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#33] +Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS sales#34, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32 AS returns#35, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#33 AS profit#36, store channel AS channel#37, concat(store, s_store_id#16) AS id#38] (40) Scan parquet default.catalog_sales -Output [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] +Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#51), dynamicpruningexpression(cs_sold_date_sk#51 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(cs_sold_date_sk#45), dynamicpruningexpression(cs_sold_date_sk#45 IN dynamicpruning#8)] PushedFilters: [IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] ReadSchema: struct (41) ColumnarToRow [codegen id : 11] -Input [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] (42) Filter [codegen id : 11] -Input [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] -Condition : ((isnotnull(cs_catalog_page_sk#45) AND isnotnull(cs_item_sk#46)) AND isnotnull(cs_promo_sk#47)) +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Condition : ((isnotnull(cs_catalog_page_sk#39) AND isnotnull(cs_item_sk#40)) AND isnotnull(cs_promo_sk#41)) (43) Exchange -Input [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] -Arguments: hashpartitioning(cs_item_sk#46, cs_order_number#48, 5), ENSURE_REQUIREMENTS, [id=#52] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: hashpartitioning(cs_item_sk#40, cs_order_number#42, 5), ENSURE_REQUIREMENTS, [plan_id=7] (44) Sort [codegen id : 12] -Input [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] -Arguments: [cs_item_sk#46 ASC NULLS FIRST, cs_order_number#48 ASC NULLS FIRST], false, 0 +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: [cs_item_sk#40 ASC NULLS FIRST, cs_order_number#42 ASC NULLS FIRST], false, 0 (45) Scan parquet default.catalog_returns -Output [5]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Output [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] ReadSchema: struct (46) ColumnarToRow [codegen id : 13] -Input [5]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] (47) Filter [codegen id : 13] -Input [5]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] -Condition : (isnotnull(cr_item_sk#53) AND isnotnull(cr_order_number#54)) +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] +Condition : (isnotnull(cr_item_sk#46) AND isnotnull(cr_order_number#47)) (48) Project [codegen id : 13] -Output [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] -Input [5]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] (49) Exchange -Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] -Arguments: hashpartitioning(cr_item_sk#53, cr_order_number#54, 5), ENSURE_REQUIREMENTS, [id=#58] +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: hashpartitioning(cr_item_sk#46, cr_order_number#47, 5), ENSURE_REQUIREMENTS, [plan_id=8] (50) Sort [codegen id : 14] -Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] -Arguments: [cr_item_sk#53 ASC NULLS FIRST, cr_order_number#54 ASC NULLS FIRST], false, 0 +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: [cr_item_sk#46 ASC NULLS FIRST, cr_order_number#47 ASC NULLS FIRST], false, 0 (51) SortMergeJoin [codegen id : 19] -Left keys [2]: [cs_item_sk#46, cs_order_number#48] -Right keys [2]: [cr_item_sk#53, cr_order_number#54] +Left keys [2]: [cs_item_sk#40, cs_order_number#42] +Right keys [2]: [cr_item_sk#46, cr_order_number#47] Join condition: None (52) Project [codegen id : 19] -Output [8]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_return_amount#55, cr_net_loss#56] -Input [11]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] +Output [8]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49] +Input [11]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] (53) ReusedExchange [Reuses operator id: 112] -Output [1]: [d_date_sk#59] +Output [1]: [d_date_sk#51] (54) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_sold_date_sk#51] -Right keys [1]: [d_date_sk#59] +Left keys [1]: [cs_sold_date_sk#45] +Right keys [1]: [d_date_sk#51] Join condition: None (55) Project [codegen id : 19] -Output [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56] -Input [9]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_return_amount#55, cr_net_loss#56, d_date_sk#59] +Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49] +Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, d_date_sk#51] (56) Scan parquet default.catalog_page -Output [2]: [cp_catalog_page_sk#60, cp_catalog_page_id#61] +Output [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_page] PushedFilters: [IsNotNull(cp_catalog_page_sk)] ReadSchema: struct (57) ColumnarToRow [codegen id : 16] -Input [2]: [cp_catalog_page_sk#60, cp_catalog_page_id#61] +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] (58) Filter [codegen id : 16] -Input [2]: [cp_catalog_page_sk#60, cp_catalog_page_id#61] -Condition : isnotnull(cp_catalog_page_sk#60) +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] +Condition : isnotnull(cp_catalog_page_sk#52) (59) BroadcastExchange -Input [2]: [cp_catalog_page_sk#60, cp_catalog_page_id#61] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] (60) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_catalog_page_sk#45] -Right keys [1]: [cp_catalog_page_sk#60] +Left keys [1]: [cs_catalog_page_sk#39] +Right keys [1]: [cp_catalog_page_sk#52] Join condition: None (61) Project [codegen id : 19] -Output [7]: [cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#61] -Input [9]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_sk#60, cp_catalog_page_id#61] +Output [7]: [cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_sk#52, cp_catalog_page_id#53] (62) ReusedExchange [Reuses operator id: 27] -Output [1]: [i_item_sk#63] +Output [1]: [i_item_sk#54] (63) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_item_sk#46] -Right keys [1]: [i_item_sk#63] +Left keys [1]: [cs_item_sk#40] +Right keys [1]: [i_item_sk#54] Join condition: None (64) Project [codegen id : 19] -Output [6]: [cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#61] -Input [8]: [cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#61, i_item_sk#63] +Output [6]: [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [8]: [cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53, i_item_sk#54] (65) ReusedExchange [Reuses operator id: 34] -Output [1]: [p_promo_sk#64] +Output [1]: [p_promo_sk#55] (66) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_promo_sk#47] -Right keys [1]: [p_promo_sk#64] +Left keys [1]: [cs_promo_sk#41] +Right keys [1]: [p_promo_sk#55] Join condition: None (67) Project [codegen id : 19] -Output [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#61] -Input [7]: [cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#61, p_promo_sk#64] +Output [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [7]: [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53, p_promo_sk#55] (68) HashAggregate [codegen id : 19] -Input [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#61] -Keys [1]: [cp_catalog_page_id#61] -Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#49)), partial_sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [5]: [sum#65, sum#66, isEmpty#67, sum#68, isEmpty#69] -Results [6]: [cp_catalog_page_id#61, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74] +Input [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Keys [1]: [cp_catalog_page_id#53] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#43)), partial_sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [5]: [sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] +Results [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] (69) Exchange -Input [6]: [cp_catalog_page_id#61, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74] -Arguments: hashpartitioning(cp_catalog_page_id#61, 5), ENSURE_REQUIREMENTS, [id=#75] +Input [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Arguments: hashpartitioning(cp_catalog_page_id#53, 5), ENSURE_REQUIREMENTS, [plan_id=10] (70) HashAggregate [codegen id : 20] -Input [6]: [cp_catalog_page_id#61, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74] -Keys [1]: [cp_catalog_page_id#61] -Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#49)), sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#49))#76, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#78] -Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#49))#76,17,2) AS sales#79, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77 AS returns#80, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#78 AS profit#81, catalog channel AS channel#82, concat(catalog_page, cp_catalog_page_id#61) AS id#83] +Input [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Keys [1]: [cp_catalog_page_id#53] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#43)), sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#43))#66, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67, sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#68] +Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#43))#66,17,2) AS sales#69, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67 AS returns#70, sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#68 AS profit#71, catalog channel AS channel#72, concat(catalog_page, cp_catalog_page_id#53) AS id#73] (71) Scan parquet default.web_sales -Output [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] +Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#90), dynamicpruningexpression(ws_sold_date_sk#90 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(ws_sold_date_sk#80), dynamicpruningexpression(ws_sold_date_sk#80 IN dynamicpruning#8)] PushedFilters: [IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)] ReadSchema: struct (72) ColumnarToRow [codegen id : 21] -Input [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] (73) Filter [codegen id : 21] -Input [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] -Condition : ((isnotnull(ws_web_site_sk#85) AND isnotnull(ws_item_sk#84)) AND isnotnull(ws_promo_sk#86)) +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Condition : ((isnotnull(ws_web_site_sk#75) AND isnotnull(ws_item_sk#74)) AND isnotnull(ws_promo_sk#76)) (74) Exchange -Input [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] -Arguments: hashpartitioning(ws_item_sk#84, ws_order_number#87, 5), ENSURE_REQUIREMENTS, [id=#91] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Arguments: hashpartitioning(ws_item_sk#74, ws_order_number#77, 5), ENSURE_REQUIREMENTS, [plan_id=11] (75) Sort [codegen id : 22] -Input [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] -Arguments: [ws_item_sk#84 ASC NULLS FIRST, ws_order_number#87 ASC NULLS FIRST], false, 0 +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Arguments: [ws_item_sk#74 ASC NULLS FIRST, ws_order_number#77 ASC NULLS FIRST], false, 0 (76) Scan parquet default.web_returns -Output [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Output [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] ReadSchema: struct (77) ColumnarToRow [codegen id : 23] -Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] (78) Filter [codegen id : 23] -Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] -Condition : (isnotnull(wr_item_sk#92) AND isnotnull(wr_order_number#93)) +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] +Condition : (isnotnull(wr_item_sk#81) AND isnotnull(wr_order_number#82)) (79) Project [codegen id : 23] -Output [4]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95] -Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Output [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] (80) Exchange -Input [4]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95] -Arguments: hashpartitioning(wr_item_sk#92, wr_order_number#93, 5), ENSURE_REQUIREMENTS, [id=#97] +Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Arguments: hashpartitioning(wr_item_sk#81, wr_order_number#82, 5), ENSURE_REQUIREMENTS, [plan_id=12] (81) Sort [codegen id : 24] -Input [4]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95] -Arguments: [wr_item_sk#92 ASC NULLS FIRST, wr_order_number#93 ASC NULLS FIRST], false, 0 +Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Arguments: [wr_item_sk#81 ASC NULLS FIRST, wr_order_number#82 ASC NULLS FIRST], false, 0 (82) SortMergeJoin [codegen id : 29] -Left keys [2]: [ws_item_sk#84, ws_order_number#87] -Right keys [2]: [wr_item_sk#92, wr_order_number#93] +Left keys [2]: [ws_item_sk#74, ws_order_number#77] +Right keys [2]: [wr_item_sk#81, wr_order_number#82] Join condition: None (83) Project [codegen id : 29] -Output [8]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_return_amt#94, wr_net_loss#95] -Input [11]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95] +Output [8]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84] +Input [11]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] (84) ReusedExchange [Reuses operator id: 112] -Output [1]: [d_date_sk#98] +Output [1]: [d_date_sk#86] (85) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_sold_date_sk#90] -Right keys [1]: [d_date_sk#98] +Left keys [1]: [ws_sold_date_sk#80] +Right keys [1]: [d_date_sk#86] Join condition: None (86) Project [codegen id : 29] -Output [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95] -Input [9]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_return_amt#94, wr_net_loss#95, d_date_sk#98] +Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84] +Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, d_date_sk#86] (87) Scan parquet default.web_site -Output [2]: [web_site_sk#99, web_site_id#100] +Output [2]: [web_site_sk#87, web_site_id#88] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_site_sk)] ReadSchema: struct (88) ColumnarToRow [codegen id : 26] -Input [2]: [web_site_sk#99, web_site_id#100] +Input [2]: [web_site_sk#87, web_site_id#88] (89) Filter [codegen id : 26] -Input [2]: [web_site_sk#99, web_site_id#100] -Condition : isnotnull(web_site_sk#99) +Input [2]: [web_site_sk#87, web_site_id#88] +Condition : isnotnull(web_site_sk#87) (90) BroadcastExchange -Input [2]: [web_site_sk#99, web_site_id#100] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#101] +Input [2]: [web_site_sk#87, web_site_id#88] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=13] (91) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_web_site_sk#85] -Right keys [1]: [web_site_sk#99] +Left keys [1]: [ws_web_site_sk#75] +Right keys [1]: [web_site_sk#87] Join condition: None (92) Project [codegen id : 29] -Output [7]: [ws_item_sk#84, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#100] -Input [9]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_sk#99, web_site_id#100] +Output [7]: [ws_item_sk#74, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_sk#87, web_site_id#88] (93) ReusedExchange [Reuses operator id: 27] -Output [1]: [i_item_sk#102] +Output [1]: [i_item_sk#89] (94) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_item_sk#84] -Right keys [1]: [i_item_sk#102] +Left keys [1]: [ws_item_sk#74] +Right keys [1]: [i_item_sk#89] Join condition: None (95) Project [codegen id : 29] -Output [6]: [ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#100] -Input [8]: [ws_item_sk#84, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#100, i_item_sk#102] +Output [6]: [ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [8]: [ws_item_sk#74, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88, i_item_sk#89] (96) ReusedExchange [Reuses operator id: 34] -Output [1]: [p_promo_sk#103] +Output [1]: [p_promo_sk#90] (97) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_promo_sk#86] -Right keys [1]: [p_promo_sk#103] +Left keys [1]: [ws_promo_sk#76] +Right keys [1]: [p_promo_sk#90] Join condition: None (98) Project [codegen id : 29] -Output [5]: [ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#100] -Input [7]: [ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#100, p_promo_sk#103] +Output [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [7]: [ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88, p_promo_sk#90] (99) HashAggregate [codegen id : 29] -Input [5]: [ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#100] -Keys [1]: [web_site_id#100] -Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#88)), partial_sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [5]: [sum#104, sum#105, isEmpty#106, sum#107, isEmpty#108] -Results [6]: [web_site_id#100, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113] +Input [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Keys [1]: [web_site_id#88] +Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#78)), partial_sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [5]: [sum#91, sum#92, isEmpty#93, sum#94, isEmpty#95] +Results [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] (100) Exchange -Input [6]: [web_site_id#100, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113] -Arguments: hashpartitioning(web_site_id#100, 5), ENSURE_REQUIREMENTS, [id=#114] +Input [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Arguments: hashpartitioning(web_site_id#88, 5), ENSURE_REQUIREMENTS, [plan_id=14] (101) HashAggregate [codegen id : 30] -Input [6]: [web_site_id#100, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113] -Keys [1]: [web_site_id#100] -Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#88)), sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#88))#115, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#117] -Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#88))#115,17,2) AS sales#118, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116 AS returns#119, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#117 AS profit#120, web channel AS channel#121, concat(web_site, web_site_id#100) AS id#122] +Input [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Keys [1]: [web_site_id#88] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#78)), sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#78))#101, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102, sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#103] +Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#78))#101,17,2) AS sales#104, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102 AS returns#105, sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#103 AS profit#106, web channel AS channel#107, concat(web_site, web_site_id#88) AS id#108] (102) Union (103) Expand [codegen id : 31] -Input [5]: [sales#40, returns#41, profit#42, channel#43, id#44] -Arguments: [[sales#40, returns#41, profit#42, channel#43, id#44, 0], [sales#40, returns#41, profit#42, channel#43, null, 1], [sales#40, returns#41, profit#42, null, null, 3]], [sales#40, returns#41, profit#42, channel#123, id#124, spark_grouping_id#125] +Input [5]: [sales#34, returns#35, profit#36, channel#37, id#38] +Arguments: [[sales#34, returns#35, profit#36, channel#37, id#38, 0], [sales#34, returns#35, profit#36, channel#37, null, 1], [sales#34, returns#35, profit#36, null, null, 3]], [sales#34, returns#35, profit#36, channel#109, id#110, spark_grouping_id#111] (104) HashAggregate [codegen id : 31] -Input [6]: [sales#40, returns#41, profit#42, channel#123, id#124, spark_grouping_id#125] -Keys [3]: [channel#123, id#124, spark_grouping_id#125] -Functions [3]: [partial_sum(sales#40), partial_sum(returns#41), partial_sum(profit#42)] -Aggregate Attributes [6]: [sum#126, isEmpty#127, sum#128, isEmpty#129, sum#130, isEmpty#131] -Results [9]: [channel#123, id#124, spark_grouping_id#125, sum#132, isEmpty#133, sum#134, isEmpty#135, sum#136, isEmpty#137] +Input [6]: [sales#34, returns#35, profit#36, channel#109, id#110, spark_grouping_id#111] +Keys [3]: [channel#109, id#110, spark_grouping_id#111] +Functions [3]: [partial_sum(sales#34), partial_sum(returns#35), partial_sum(profit#36)] +Aggregate Attributes [6]: [sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117] +Results [9]: [channel#109, id#110, spark_grouping_id#111, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] (105) Exchange -Input [9]: [channel#123, id#124, spark_grouping_id#125, sum#132, isEmpty#133, sum#134, isEmpty#135, sum#136, isEmpty#137] -Arguments: hashpartitioning(channel#123, id#124, spark_grouping_id#125, 5), ENSURE_REQUIREMENTS, [id=#138] +Input [9]: [channel#109, id#110, spark_grouping_id#111, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] +Arguments: hashpartitioning(channel#109, id#110, spark_grouping_id#111, 5), ENSURE_REQUIREMENTS, [plan_id=15] (106) HashAggregate [codegen id : 32] -Input [9]: [channel#123, id#124, spark_grouping_id#125, sum#132, isEmpty#133, sum#134, isEmpty#135, sum#136, isEmpty#137] -Keys [3]: [channel#123, id#124, spark_grouping_id#125] -Functions [3]: [sum(sales#40), sum(returns#41), sum(profit#42)] -Aggregate Attributes [3]: [sum(sales#40)#139, sum(returns#41)#140, sum(profit#42)#141] -Results [5]: [channel#123, id#124, sum(sales#40)#139 AS sales#142, sum(returns#41)#140 AS returns#143, sum(profit#42)#141 AS profit#144] +Input [9]: [channel#109, id#110, spark_grouping_id#111, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123] +Keys [3]: [channel#109, id#110, spark_grouping_id#111] +Functions [3]: [sum(sales#34), sum(returns#35), sum(profit#36)] +Aggregate Attributes [3]: [sum(sales#34)#124, sum(returns#35)#125, sum(profit#36)#126] +Results [5]: [channel#109, id#110, sum(sales#34)#124 AS sales#127, sum(returns#35)#125 AS returns#128, sum(profit#36)#126 AS profit#129] (107) TakeOrderedAndProject -Input [5]: [channel#123, id#124, sales#142, returns#143, profit#144] -Arguments: 100, [channel#123 ASC NULLS FIRST, id#124 ASC NULLS FIRST], [channel#123, id#124, sales#142, returns#143, profit#144] +Input [5]: [channel#109, id#110, sales#127, returns#128, profit#129] +Arguments: 100, [channel#109 ASC NULLS FIRST, id#110 ASC NULLS FIRST], [channel#109, id#110, sales#127, returns#128, profit#129] ===== Subqueries ===== @@ -602,29 +602,29 @@ BroadcastExchange (112) (108) Scan parquet default.date_dim -Output [2]: [d_date_sk#16, d_date#145] +Output [2]: [d_date_sk#14, d_date#130] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), IsNotNull(d_date_sk)] ReadSchema: struct (109) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#16, d_date#145] +Input [2]: [d_date_sk#14, d_date#130] (110) Filter [codegen id : 1] -Input [2]: [d_date_sk#16, d_date#145] -Condition : (((isnotnull(d_date#145) AND (d_date#145 >= 2000-08-23)) AND (d_date#145 <= 2000-09-22)) AND isnotnull(d_date_sk#16)) +Input [2]: [d_date_sk#14, d_date#130] +Condition : (((isnotnull(d_date#130) AND (d_date#130 >= 2000-08-23)) AND (d_date#130 <= 2000-09-22)) AND isnotnull(d_date_sk#14)) (111) Project [codegen id : 1] -Output [1]: [d_date_sk#16] -Input [2]: [d_date_sk#16, d_date#145] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_date#130] (112) BroadcastExchange -Input [1]: [d_date_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#146] +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=16] -Subquery:2 Hosting operator id = 40 Hosting Expression = cs_sold_date_sk#51 IN dynamicpruning#8 +Subquery:2 Hosting operator id = 40 Hosting Expression = cs_sold_date_sk#45 IN dynamicpruning#8 -Subquery:3 Hosting operator id = 71 Hosting Expression = ws_sold_date_sk#90 IN dynamicpruning#8 +Subquery:3 Hosting operator id = 71 Hosting Expression = ws_sold_date_sk#80 IN dynamicpruning#8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt index 288df2457edf2..d37c984980d09 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt @@ -88,7 +88,7 @@ Condition : ((isnotnull(ca_state#14) AND (ca_state#14 = GA)) AND isnotnull(ca_ad (7) BroadcastExchange Input [12]: [ca_address_sk#7, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 2] Left keys [1]: [c_current_addr_sk#3] @@ -101,228 +101,228 @@ Input [18]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation (10) Exchange Input [16]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] -Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#20] +Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 3] Input [16]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18] Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0 (12) Scan parquet default.catalog_returns -Output [4]: [cr_returning_customer_sk#21, cr_returning_addr_sk#22, cr_return_amt_inc_tax#23, cr_returned_date_sk#24] +Output [4]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21, cr_returned_date_sk#22] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cr_returned_date_sk#24), dynamicpruningexpression(cr_returned_date_sk#24 IN dynamicpruning#25)] +PartitionFilters: [isnotnull(cr_returned_date_sk#22), dynamicpruningexpression(cr_returned_date_sk#22 IN dynamicpruning#23)] PushedFilters: [IsNotNull(cr_returning_addr_sk), IsNotNull(cr_returning_customer_sk)] ReadSchema: struct (13) ColumnarToRow [codegen id : 5] -Input [4]: [cr_returning_customer_sk#21, cr_returning_addr_sk#22, cr_return_amt_inc_tax#23, cr_returned_date_sk#24] +Input [4]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21, cr_returned_date_sk#22] (14) Filter [codegen id : 5] -Input [4]: [cr_returning_customer_sk#21, cr_returning_addr_sk#22, cr_return_amt_inc_tax#23, cr_returned_date_sk#24] -Condition : (isnotnull(cr_returning_addr_sk#22) AND isnotnull(cr_returning_customer_sk#21)) +Input [4]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21, cr_returned_date_sk#22] +Condition : (isnotnull(cr_returning_addr_sk#20) AND isnotnull(cr_returning_customer_sk#19)) (15) ReusedExchange [Reuses operator id: 62] -Output [1]: [d_date_sk#26] +Output [1]: [d_date_sk#24] (16) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cr_returned_date_sk#24] -Right keys [1]: [d_date_sk#26] +Left keys [1]: [cr_returned_date_sk#22] +Right keys [1]: [d_date_sk#24] Join condition: None (17) Project [codegen id : 5] -Output [3]: [cr_returning_customer_sk#21, cr_returning_addr_sk#22, cr_return_amt_inc_tax#23] -Input [5]: [cr_returning_customer_sk#21, cr_returning_addr_sk#22, cr_return_amt_inc_tax#23, cr_returned_date_sk#24, d_date_sk#26] +Output [3]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21] +Input [5]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21, cr_returned_date_sk#22, d_date_sk#24] (18) Exchange -Input [3]: [cr_returning_customer_sk#21, cr_returning_addr_sk#22, cr_return_amt_inc_tax#23] -Arguments: hashpartitioning(cr_returning_addr_sk#22, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [3]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21] +Arguments: hashpartitioning(cr_returning_addr_sk#20, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) Sort [codegen id : 6] -Input [3]: [cr_returning_customer_sk#21, cr_returning_addr_sk#22, cr_return_amt_inc_tax#23] -Arguments: [cr_returning_addr_sk#22 ASC NULLS FIRST], false, 0 +Input [3]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21] +Arguments: [cr_returning_addr_sk#20 ASC NULLS FIRST], false, 0 (20) Scan parquet default.customer_address -Output [2]: [ca_address_sk#28, ca_state#29] +Output [2]: [ca_address_sk#25, ca_state#26] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_state)] ReadSchema: struct (21) ColumnarToRow [codegen id : 7] -Input [2]: [ca_address_sk#28, ca_state#29] +Input [2]: [ca_address_sk#25, ca_state#26] (22) Filter [codegen id : 7] -Input [2]: [ca_address_sk#28, ca_state#29] -Condition : (isnotnull(ca_address_sk#28) AND isnotnull(ca_state#29)) +Input [2]: [ca_address_sk#25, ca_state#26] +Condition : (isnotnull(ca_address_sk#25) AND isnotnull(ca_state#26)) (23) Exchange -Input [2]: [ca_address_sk#28, ca_state#29] -Arguments: hashpartitioning(ca_address_sk#28, 5), ENSURE_REQUIREMENTS, [id=#30] +Input [2]: [ca_address_sk#25, ca_state#26] +Arguments: hashpartitioning(ca_address_sk#25, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) Sort [codegen id : 8] -Input [2]: [ca_address_sk#28, ca_state#29] -Arguments: [ca_address_sk#28 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#25, ca_state#26] +Arguments: [ca_address_sk#25 ASC NULLS FIRST], false, 0 (25) SortMergeJoin [codegen id : 9] -Left keys [1]: [cr_returning_addr_sk#22] -Right keys [1]: [ca_address_sk#28] +Left keys [1]: [cr_returning_addr_sk#20] +Right keys [1]: [ca_address_sk#25] Join condition: None (26) Project [codegen id : 9] -Output [3]: [cr_returning_customer_sk#21, cr_return_amt_inc_tax#23, ca_state#29] -Input [5]: [cr_returning_customer_sk#21, cr_returning_addr_sk#22, cr_return_amt_inc_tax#23, ca_address_sk#28, ca_state#29] +Output [3]: [cr_returning_customer_sk#19, cr_return_amt_inc_tax#21, ca_state#26] +Input [5]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21, ca_address_sk#25, ca_state#26] (27) HashAggregate [codegen id : 9] -Input [3]: [cr_returning_customer_sk#21, cr_return_amt_inc_tax#23, ca_state#29] -Keys [2]: [cr_returning_customer_sk#21, ca_state#29] -Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#23))] -Aggregate Attributes [1]: [sum#31] -Results [3]: [cr_returning_customer_sk#21, ca_state#29, sum#32] +Input [3]: [cr_returning_customer_sk#19, cr_return_amt_inc_tax#21, ca_state#26] +Keys [2]: [cr_returning_customer_sk#19, ca_state#26] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#21))] +Aggregate Attributes [1]: [sum#27] +Results [3]: [cr_returning_customer_sk#19, ca_state#26, sum#28] (28) Exchange -Input [3]: [cr_returning_customer_sk#21, ca_state#29, sum#32] -Arguments: hashpartitioning(cr_returning_customer_sk#21, ca_state#29, 5), ENSURE_REQUIREMENTS, [id=#33] +Input [3]: [cr_returning_customer_sk#19, ca_state#26, sum#28] +Arguments: hashpartitioning(cr_returning_customer_sk#19, ca_state#26, 5), ENSURE_REQUIREMENTS, [plan_id=5] (29) HashAggregate [codegen id : 10] -Input [3]: [cr_returning_customer_sk#21, ca_state#29, sum#32] -Keys [2]: [cr_returning_customer_sk#21, ca_state#29] -Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#23))] -Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#23))#34] -Results [3]: [cr_returning_customer_sk#21 AS ctr_customer_sk#35, ca_state#29 AS ctr_state#36, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#23))#34,17,2) AS ctr_total_return#37] +Input [3]: [cr_returning_customer_sk#19, ca_state#26, sum#28] +Keys [2]: [cr_returning_customer_sk#19, ca_state#26] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#21))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#21))#29] +Results [3]: [cr_returning_customer_sk#19 AS ctr_customer_sk#30, ca_state#26 AS ctr_state#31, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#21))#29,17,2) AS ctr_total_return#32] (30) Filter [codegen id : 10] -Input [3]: [ctr_customer_sk#35, ctr_state#36, ctr_total_return#37] -Condition : isnotnull(ctr_total_return#37) +Input [3]: [ctr_customer_sk#30, ctr_state#31, ctr_total_return#32] +Condition : isnotnull(ctr_total_return#32) (31) Exchange -Input [3]: [ctr_customer_sk#35, ctr_state#36, ctr_total_return#37] -Arguments: hashpartitioning(ctr_customer_sk#35, 5), ENSURE_REQUIREMENTS, [id=#38] +Input [3]: [ctr_customer_sk#30, ctr_state#31, ctr_total_return#32] +Arguments: hashpartitioning(ctr_customer_sk#30, 5), ENSURE_REQUIREMENTS, [plan_id=6] (32) Sort [codegen id : 11] -Input [3]: [ctr_customer_sk#35, ctr_state#36, ctr_total_return#37] -Arguments: [ctr_customer_sk#35 ASC NULLS FIRST], false, 0 +Input [3]: [ctr_customer_sk#30, ctr_state#31, ctr_total_return#32] +Arguments: [ctr_customer_sk#30 ASC NULLS FIRST], false, 0 (33) SortMergeJoin [codegen id : 20] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ctr_customer_sk#35] +Right keys [1]: [ctr_customer_sk#30] Join condition: None (34) Project [codegen id : 20] -Output [17]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_state#36, ctr_total_return#37] -Input [19]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_customer_sk#35, ctr_state#36, ctr_total_return#37] +Output [17]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_state#31, ctr_total_return#32] +Input [19]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_customer_sk#30, ctr_state#31, ctr_total_return#32] (35) Scan parquet default.catalog_returns -Output [4]: [cr_returning_customer_sk#21, cr_returning_addr_sk#22, cr_return_amt_inc_tax#23, cr_returned_date_sk#24] +Output [4]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21, cr_returned_date_sk#22] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cr_returned_date_sk#24), dynamicpruningexpression(cr_returned_date_sk#24 IN dynamicpruning#25)] +PartitionFilters: [isnotnull(cr_returned_date_sk#22), dynamicpruningexpression(cr_returned_date_sk#22 IN dynamicpruning#23)] PushedFilters: [IsNotNull(cr_returning_addr_sk)] ReadSchema: struct (36) ColumnarToRow [codegen id : 13] -Input [4]: [cr_returning_customer_sk#21, cr_returning_addr_sk#22, cr_return_amt_inc_tax#23, cr_returned_date_sk#24] +Input [4]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21, cr_returned_date_sk#22] (37) Filter [codegen id : 13] -Input [4]: [cr_returning_customer_sk#21, cr_returning_addr_sk#22, cr_return_amt_inc_tax#23, cr_returned_date_sk#24] -Condition : isnotnull(cr_returning_addr_sk#22) +Input [4]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21, cr_returned_date_sk#22] +Condition : isnotnull(cr_returning_addr_sk#20) (38) ReusedExchange [Reuses operator id: 62] -Output [1]: [d_date_sk#26] +Output [1]: [d_date_sk#24] (39) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [cr_returned_date_sk#24] -Right keys [1]: [d_date_sk#26] +Left keys [1]: [cr_returned_date_sk#22] +Right keys [1]: [d_date_sk#24] Join condition: None (40) Project [codegen id : 13] -Output [3]: [cr_returning_customer_sk#21, cr_returning_addr_sk#22, cr_return_amt_inc_tax#23] -Input [5]: [cr_returning_customer_sk#21, cr_returning_addr_sk#22, cr_return_amt_inc_tax#23, cr_returned_date_sk#24, d_date_sk#26] +Output [3]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21] +Input [5]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21, cr_returned_date_sk#22, d_date_sk#24] (41) Exchange -Input [3]: [cr_returning_customer_sk#21, cr_returning_addr_sk#22, cr_return_amt_inc_tax#23] -Arguments: hashpartitioning(cr_returning_addr_sk#22, 5), ENSURE_REQUIREMENTS, [id=#39] +Input [3]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21] +Arguments: hashpartitioning(cr_returning_addr_sk#20, 5), ENSURE_REQUIREMENTS, [plan_id=7] (42) Sort [codegen id : 14] -Input [3]: [cr_returning_customer_sk#21, cr_returning_addr_sk#22, cr_return_amt_inc_tax#23] -Arguments: [cr_returning_addr_sk#22 ASC NULLS FIRST], false, 0 +Input [3]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21] +Arguments: [cr_returning_addr_sk#20 ASC NULLS FIRST], false, 0 (43) ReusedExchange [Reuses operator id: 23] -Output [2]: [ca_address_sk#28, ca_state#29] +Output [2]: [ca_address_sk#25, ca_state#26] (44) Sort [codegen id : 16] -Input [2]: [ca_address_sk#28, ca_state#29] -Arguments: [ca_address_sk#28 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#25, ca_state#26] +Arguments: [ca_address_sk#25 ASC NULLS FIRST], false, 0 (45) SortMergeJoin [codegen id : 17] -Left keys [1]: [cr_returning_addr_sk#22] -Right keys [1]: [ca_address_sk#28] +Left keys [1]: [cr_returning_addr_sk#20] +Right keys [1]: [ca_address_sk#25] Join condition: None (46) Project [codegen id : 17] -Output [3]: [cr_returning_customer_sk#21, cr_return_amt_inc_tax#23, ca_state#29] -Input [5]: [cr_returning_customer_sk#21, cr_returning_addr_sk#22, cr_return_amt_inc_tax#23, ca_address_sk#28, ca_state#29] +Output [3]: [cr_returning_customer_sk#19, cr_return_amt_inc_tax#21, ca_state#26] +Input [5]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21, ca_address_sk#25, ca_state#26] (47) HashAggregate [codegen id : 17] -Input [3]: [cr_returning_customer_sk#21, cr_return_amt_inc_tax#23, ca_state#29] -Keys [2]: [cr_returning_customer_sk#21, ca_state#29] -Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#23))] -Aggregate Attributes [1]: [sum#40] -Results [3]: [cr_returning_customer_sk#21, ca_state#29, sum#41] +Input [3]: [cr_returning_customer_sk#19, cr_return_amt_inc_tax#21, ca_state#26] +Keys [2]: [cr_returning_customer_sk#19, ca_state#26] +Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#21))] +Aggregate Attributes [1]: [sum#33] +Results [3]: [cr_returning_customer_sk#19, ca_state#26, sum#34] (48) Exchange -Input [3]: [cr_returning_customer_sk#21, ca_state#29, sum#41] -Arguments: hashpartitioning(cr_returning_customer_sk#21, ca_state#29, 5), ENSURE_REQUIREMENTS, [id=#42] +Input [3]: [cr_returning_customer_sk#19, ca_state#26, sum#34] +Arguments: hashpartitioning(cr_returning_customer_sk#19, ca_state#26, 5), ENSURE_REQUIREMENTS, [plan_id=8] (49) HashAggregate [codegen id : 18] -Input [3]: [cr_returning_customer_sk#21, ca_state#29, sum#41] -Keys [2]: [cr_returning_customer_sk#21, ca_state#29] -Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#23))] -Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#23))#34] -Results [2]: [ca_state#29 AS ctr_state#36, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#23))#34,17,2) AS ctr_total_return#37] +Input [3]: [cr_returning_customer_sk#19, ca_state#26, sum#34] +Keys [2]: [cr_returning_customer_sk#19, ca_state#26] +Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#21))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#21))#29] +Results [2]: [ca_state#26 AS ctr_state#31, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#21))#29,17,2) AS ctr_total_return#32] (50) HashAggregate [codegen id : 18] -Input [2]: [ctr_state#36, ctr_total_return#37] -Keys [1]: [ctr_state#36] -Functions [1]: [partial_avg(ctr_total_return#37)] -Aggregate Attributes [2]: [sum#43, count#44] -Results [3]: [ctr_state#36, sum#45, count#46] +Input [2]: [ctr_state#31, ctr_total_return#32] +Keys [1]: [ctr_state#31] +Functions [1]: [partial_avg(ctr_total_return#32)] +Aggregate Attributes [2]: [sum#35, count#36] +Results [3]: [ctr_state#31, sum#37, count#38] (51) Exchange -Input [3]: [ctr_state#36, sum#45, count#46] -Arguments: hashpartitioning(ctr_state#36, 5), ENSURE_REQUIREMENTS, [id=#47] +Input [3]: [ctr_state#31, sum#37, count#38] +Arguments: hashpartitioning(ctr_state#31, 5), ENSURE_REQUIREMENTS, [plan_id=9] (52) HashAggregate [codegen id : 19] -Input [3]: [ctr_state#36, sum#45, count#46] -Keys [1]: [ctr_state#36] -Functions [1]: [avg(ctr_total_return#37)] -Aggregate Attributes [1]: [avg(ctr_total_return#37)#48] -Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#37)#48) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#49, ctr_state#36 AS ctr_state#36#50] +Input [3]: [ctr_state#31, sum#37, count#38] +Keys [1]: [ctr_state#31] +Functions [1]: [avg(ctr_total_return#32)] +Aggregate Attributes [1]: [avg(ctr_total_return#32)#39] +Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#32)#39) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#40, ctr_state#31 AS ctr_state#31#41] (53) Filter [codegen id : 19] -Input [2]: [(avg(ctr_total_return) * 1.2)#49, ctr_state#36#50] -Condition : isnotnull((avg(ctr_total_return) * 1.2)#49) +Input [2]: [(avg(ctr_total_return) * 1.2)#40, ctr_state#31#41] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#40) (54) BroadcastExchange -Input [2]: [(avg(ctr_total_return) * 1.2)#49, ctr_state#36#50] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#51] +Input [2]: [(avg(ctr_total_return) * 1.2)#40, ctr_state#31#41] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=10] (55) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [ctr_state#36] -Right keys [1]: [ctr_state#36#50] -Join condition: (cast(ctr_total_return#37 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#49) +Left keys [1]: [ctr_state#31] +Right keys [1]: [ctr_state#31#41] +Join condition: (cast(ctr_total_return#32 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#40) (56) Project [codegen id : 20] -Output [16]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_total_return#37] -Input [19]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_state#36, ctr_total_return#37, (avg(ctr_total_return) * 1.2)#49, ctr_state#36#50] +Output [16]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_total_return#32] +Input [19]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_state#31, ctr_total_return#32, (avg(ctr_total_return) * 1.2)#40, ctr_state#31#41] (57) TakeOrderedAndProject -Input [16]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_total_return#37] -Arguments: 100, [c_customer_id#2 ASC NULLS FIRST, c_salutation#4 ASC NULLS FIRST, c_first_name#5 ASC NULLS FIRST, c_last_name#6 ASC NULLS FIRST, ca_street_number#8 ASC NULLS FIRST, ca_street_name#9 ASC NULLS FIRST, ca_street_type#10 ASC NULLS FIRST, ca_suite_number#11 ASC NULLS FIRST, ca_city#12 ASC NULLS FIRST, ca_county#13 ASC NULLS FIRST, ca_state#14 ASC NULLS FIRST, ca_zip#15 ASC NULLS FIRST, ca_country#16 ASC NULLS FIRST, ca_gmt_offset#17 ASC NULLS FIRST, ca_location_type#18 ASC NULLS FIRST, ctr_total_return#37 ASC NULLS FIRST], [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_total_return#37] +Input [16]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_total_return#32] +Arguments: 100, [c_customer_id#2 ASC NULLS FIRST, c_salutation#4 ASC NULLS FIRST, c_first_name#5 ASC NULLS FIRST, c_last_name#6 ASC NULLS FIRST, ca_street_number#8 ASC NULLS FIRST, ca_street_name#9 ASC NULLS FIRST, ca_street_type#10 ASC NULLS FIRST, ca_suite_number#11 ASC NULLS FIRST, ca_city#12 ASC NULLS FIRST, ca_county#13 ASC NULLS FIRST, ca_state#14 ASC NULLS FIRST, ca_zip#15 ASC NULLS FIRST, ca_country#16 ASC NULLS FIRST, ca_gmt_offset#17 ASC NULLS FIRST, ca_location_type#18 ASC NULLS FIRST, ctr_total_return#32 ASC NULLS FIRST], [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_total_return#32] ===== Subqueries ===== -Subquery:1 Hosting operator id = 12 Hosting Expression = cr_returned_date_sk#24 IN dynamicpruning#25 +Subquery:1 Hosting operator id = 12 Hosting Expression = cr_returned_date_sk#22 IN dynamicpruning#23 BroadcastExchange (62) +- * Project (61) +- * Filter (60) @@ -331,27 +331,27 @@ BroadcastExchange (62) (58) Scan parquet default.date_dim -Output [2]: [d_date_sk#26, d_year#52] +Output [2]: [d_date_sk#24, d_year#42] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (59) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#26, d_year#52] +Input [2]: [d_date_sk#24, d_year#42] (60) Filter [codegen id : 1] -Input [2]: [d_date_sk#26, d_year#52] -Condition : ((isnotnull(d_year#52) AND (d_year#52 = 2000)) AND isnotnull(d_date_sk#26)) +Input [2]: [d_date_sk#24, d_year#42] +Condition : ((isnotnull(d_year#42) AND (d_year#42 = 2000)) AND isnotnull(d_date_sk#24)) (61) Project [codegen id : 1] -Output [1]: [d_date_sk#26] -Input [2]: [d_date_sk#26, d_year#52] +Output [1]: [d_date_sk#24] +Input [2]: [d_date_sk#24, d_year#42] (62) BroadcastExchange -Input [1]: [d_date_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53] +Input [1]: [d_date_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11] -Subquery:2 Hosting operator id = 35 Hosting Expression = cr_returned_date_sk#24 IN dynamicpruning#25 +Subquery:2 Hosting operator id = 35 Hosting Expression = cr_returned_date_sk#22 IN dynamicpruning#23 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt index 91bd90224827a..7b23beda1cbd0 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt @@ -92,7 +92,7 @@ Condition : (isnotnull(ca_address_sk#7) AND isnotnull(ca_state#8)) (10) BroadcastExchange Input [2]: [ca_address_sk#7, ca_state#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cr_returning_addr_sk#2] @@ -107,23 +107,23 @@ Input [5]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_in Input [3]: [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8] Keys [2]: [cr_returning_customer_sk#1, ca_state#8] Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#3))] -Aggregate Attributes [1]: [sum#10] -Results [3]: [cr_returning_customer_sk#1, ca_state#8, sum#11] +Aggregate Attributes [1]: [sum#9] +Results [3]: [cr_returning_customer_sk#1, ca_state#8, sum#10] (14) Exchange -Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#11] -Arguments: hashpartitioning(cr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [id=#12] +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#10] +Arguments: hashpartitioning(cr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 11] -Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#11] +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#10] Keys [2]: [cr_returning_customer_sk#1, ca_state#8] Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))#13] -Results [3]: [cr_returning_customer_sk#1 AS ctr_customer_sk#14, ca_state#8 AS ctr_state#15, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#3))#13,17,2) AS ctr_total_return#16] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))#11] +Results [3]: [cr_returning_customer_sk#1 AS ctr_customer_sk#12, ca_state#8 AS ctr_state#13, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#3))#11,17,2) AS ctr_total_return#14] (16) Filter [codegen id : 11] -Input [3]: [ctr_customer_sk#14, ctr_state#15, ctr_total_return#16] -Condition : isnotnull(ctr_total_return#16) +Input [3]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14] +Condition : isnotnull(ctr_total_return#14) (17) Scan parquet default.catalog_returns Output [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4] @@ -168,112 +168,112 @@ Input [5]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_in Input [3]: [cr_returning_customer_sk#1, cr_return_amt_inc_tax#3, ca_state#8] Keys [2]: [cr_returning_customer_sk#1, ca_state#8] Functions [1]: [partial_sum(UnscaledValue(cr_return_amt_inc_tax#3))] -Aggregate Attributes [1]: [sum#17] -Results [3]: [cr_returning_customer_sk#1, ca_state#8, sum#18] +Aggregate Attributes [1]: [sum#15] +Results [3]: [cr_returning_customer_sk#1, ca_state#8, sum#16] (27) Exchange -Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#18] -Arguments: hashpartitioning(cr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#16] +Arguments: hashpartitioning(cr_returning_customer_sk#1, ca_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (28) HashAggregate [codegen id : 7] -Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#18] +Input [3]: [cr_returning_customer_sk#1, ca_state#8, sum#16] Keys [2]: [cr_returning_customer_sk#1, ca_state#8] Functions [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))#13] -Results [2]: [ca_state#8 AS ctr_state#15, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#3))#13,17,2) AS ctr_total_return#16] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_return_amt_inc_tax#3))#11] +Results [2]: [ca_state#8 AS ctr_state#13, MakeDecimal(sum(UnscaledValue(cr_return_amt_inc_tax#3))#11,17,2) AS ctr_total_return#14] (29) HashAggregate [codegen id : 7] -Input [2]: [ctr_state#15, ctr_total_return#16] -Keys [1]: [ctr_state#15] -Functions [1]: [partial_avg(ctr_total_return#16)] -Aggregate Attributes [2]: [sum#20, count#21] -Results [3]: [ctr_state#15, sum#22, count#23] +Input [2]: [ctr_state#13, ctr_total_return#14] +Keys [1]: [ctr_state#13] +Functions [1]: [partial_avg(ctr_total_return#14)] +Aggregate Attributes [2]: [sum#17, count#18] +Results [3]: [ctr_state#13, sum#19, count#20] (30) Exchange -Input [3]: [ctr_state#15, sum#22, count#23] -Arguments: hashpartitioning(ctr_state#15, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [3]: [ctr_state#13, sum#19, count#20] +Arguments: hashpartitioning(ctr_state#13, 5), ENSURE_REQUIREMENTS, [plan_id=4] (31) HashAggregate [codegen id : 8] -Input [3]: [ctr_state#15, sum#22, count#23] -Keys [1]: [ctr_state#15] -Functions [1]: [avg(ctr_total_return#16)] -Aggregate Attributes [1]: [avg(ctr_total_return#16)#25] -Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#16)#25) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#26, ctr_state#15 AS ctr_state#15#27] +Input [3]: [ctr_state#13, sum#19, count#20] +Keys [1]: [ctr_state#13] +Functions [1]: [avg(ctr_total_return#14)] +Aggregate Attributes [1]: [avg(ctr_total_return#14)#21] +Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#14)#21) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#22, ctr_state#13 AS ctr_state#13#23] (32) Filter [codegen id : 8] -Input [2]: [(avg(ctr_total_return) * 1.2)#26, ctr_state#15#27] -Condition : isnotnull((avg(ctr_total_return) * 1.2)#26) +Input [2]: [(avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] +Condition : isnotnull((avg(ctr_total_return) * 1.2)#22) (33) BroadcastExchange -Input [2]: [(avg(ctr_total_return) * 1.2)#26, ctr_state#15#27] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#28] +Input [2]: [(avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=5] (34) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ctr_state#15] -Right keys [1]: [ctr_state#15#27] -Join condition: (cast(ctr_total_return#16 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#26) +Left keys [1]: [ctr_state#13] +Right keys [1]: [ctr_state#13#23] +Join condition: (cast(ctr_total_return#14 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#22) (35) Project [codegen id : 11] -Output [2]: [ctr_customer_sk#14, ctr_total_return#16] -Input [5]: [ctr_customer_sk#14, ctr_state#15, ctr_total_return#16, (avg(ctr_total_return) * 1.2)#26, ctr_state#15#27] +Output [2]: [ctr_customer_sk#12, ctr_total_return#14] +Input [5]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14, (avg(ctr_total_return) * 1.2)#22, ctr_state#13#23] (36) Scan parquet default.customer -Output [6]: [c_customer_sk#29, c_customer_id#30, c_current_addr_sk#31, c_salutation#32, c_first_name#33, c_last_name#34] +Output [6]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (37) ColumnarToRow [codegen id : 9] -Input [6]: [c_customer_sk#29, c_customer_id#30, c_current_addr_sk#31, c_salutation#32, c_first_name#33, c_last_name#34] +Input [6]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] (38) Filter [codegen id : 9] -Input [6]: [c_customer_sk#29, c_customer_id#30, c_current_addr_sk#31, c_salutation#32, c_first_name#33, c_last_name#34] -Condition : (isnotnull(c_customer_sk#29) AND isnotnull(c_current_addr_sk#31)) +Input [6]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] +Condition : (isnotnull(c_customer_sk#24) AND isnotnull(c_current_addr_sk#26)) (39) BroadcastExchange -Input [6]: [c_customer_sk#29, c_customer_id#30, c_current_addr_sk#31, c_salutation#32, c_first_name#33, c_last_name#34] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#35] +Input [6]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] (40) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ctr_customer_sk#14] -Right keys [1]: [c_customer_sk#29] +Left keys [1]: [ctr_customer_sk#12] +Right keys [1]: [c_customer_sk#24] Join condition: None (41) Project [codegen id : 11] -Output [6]: [ctr_total_return#16, c_customer_id#30, c_current_addr_sk#31, c_salutation#32, c_first_name#33, c_last_name#34] -Input [8]: [ctr_customer_sk#14, ctr_total_return#16, c_customer_sk#29, c_customer_id#30, c_current_addr_sk#31, c_salutation#32, c_first_name#33, c_last_name#34] +Output [6]: [ctr_total_return#14, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] +Input [8]: [ctr_customer_sk#12, ctr_total_return#14, c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29] (42) Scan parquet default.customer_address -Output [12]: [ca_address_sk#36, ca_street_number#37, ca_street_name#38, ca_street_type#39, ca_suite_number#40, ca_city#41, ca_county#42, ca_state#43, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] +Output [12]: [ca_address_sk#30, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)] ReadSchema: struct (43) ColumnarToRow [codegen id : 10] -Input [12]: [ca_address_sk#36, ca_street_number#37, ca_street_name#38, ca_street_type#39, ca_suite_number#40, ca_city#41, ca_county#42, ca_state#43, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] +Input [12]: [ca_address_sk#30, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41] (44) Filter [codegen id : 10] -Input [12]: [ca_address_sk#36, ca_street_number#37, ca_street_name#38, ca_street_type#39, ca_suite_number#40, ca_city#41, ca_county#42, ca_state#43, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] -Condition : ((isnotnull(ca_state#43) AND (ca_state#43 = GA)) AND isnotnull(ca_address_sk#36)) +Input [12]: [ca_address_sk#30, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41] +Condition : ((isnotnull(ca_state#37) AND (ca_state#37 = GA)) AND isnotnull(ca_address_sk#30)) (45) BroadcastExchange -Input [12]: [ca_address_sk#36, ca_street_number#37, ca_street_name#38, ca_street_type#39, ca_suite_number#40, ca_city#41, ca_county#42, ca_state#43, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48] +Input [12]: [ca_address_sk#30, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] (46) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [c_current_addr_sk#31] -Right keys [1]: [ca_address_sk#36] +Left keys [1]: [c_current_addr_sk#26] +Right keys [1]: [ca_address_sk#30] Join condition: None (47) Project [codegen id : 11] -Output [16]: [c_customer_id#30, c_salutation#32, c_first_name#33, c_last_name#34, ca_street_number#37, ca_street_name#38, ca_street_type#39, ca_suite_number#40, ca_city#41, ca_county#42, ca_state#43, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47, ctr_total_return#16] -Input [18]: [ctr_total_return#16, c_customer_id#30, c_current_addr_sk#31, c_salutation#32, c_first_name#33, c_last_name#34, ca_address_sk#36, ca_street_number#37, ca_street_name#38, ca_street_type#39, ca_suite_number#40, ca_city#41, ca_county#42, ca_state#43, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47] +Output [16]: [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41, ctr_total_return#14] +Input [18]: [ctr_total_return#14, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, ca_address_sk#30, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41] (48) TakeOrderedAndProject -Input [16]: [c_customer_id#30, c_salutation#32, c_first_name#33, c_last_name#34, ca_street_number#37, ca_street_name#38, ca_street_type#39, ca_suite_number#40, ca_city#41, ca_county#42, ca_state#43, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47, ctr_total_return#16] -Arguments: 100, [c_customer_id#30 ASC NULLS FIRST, c_salutation#32 ASC NULLS FIRST, c_first_name#33 ASC NULLS FIRST, c_last_name#34 ASC NULLS FIRST, ca_street_number#37 ASC NULLS FIRST, ca_street_name#38 ASC NULLS FIRST, ca_street_type#39 ASC NULLS FIRST, ca_suite_number#40 ASC NULLS FIRST, ca_city#41 ASC NULLS FIRST, ca_county#42 ASC NULLS FIRST, ca_state#43 ASC NULLS FIRST, ca_zip#44 ASC NULLS FIRST, ca_country#45 ASC NULLS FIRST, ca_gmt_offset#46 ASC NULLS FIRST, ca_location_type#47 ASC NULLS FIRST, ctr_total_return#16 ASC NULLS FIRST], [c_customer_id#30, c_salutation#32, c_first_name#33, c_last_name#34, ca_street_number#37, ca_street_name#38, ca_street_type#39, ca_suite_number#40, ca_city#41, ca_county#42, ca_state#43, ca_zip#44, ca_country#45, ca_gmt_offset#46, ca_location_type#47, ctr_total_return#16] +Input [16]: [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41, ctr_total_return#14] +Arguments: 100, [c_customer_id#25 ASC NULLS FIRST, c_salutation#27 ASC NULLS FIRST, c_first_name#28 ASC NULLS FIRST, c_last_name#29 ASC NULLS FIRST, ca_street_number#31 ASC NULLS FIRST, ca_street_name#32 ASC NULLS FIRST, ca_street_type#33 ASC NULLS FIRST, ca_suite_number#34 ASC NULLS FIRST, ca_city#35 ASC NULLS FIRST, ca_county#36 ASC NULLS FIRST, ca_state#37 ASC NULLS FIRST, ca_zip#38 ASC NULLS FIRST, ca_country#39 ASC NULLS FIRST, ca_gmt_offset#40 ASC NULLS FIRST, ca_location_type#41 ASC NULLS FIRST, ctr_total_return#14 ASC NULLS FIRST], [c_customer_id#25, c_salutation#27, c_first_name#28, c_last_name#29, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41, ctr_total_return#14] ===== Subqueries ===== @@ -286,26 +286,26 @@ BroadcastExchange (53) (49) Scan parquet default.date_dim -Output [2]: [d_date_sk#6, d_year#49] +Output [2]: [d_date_sk#6, d_year#42] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (50) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#6, d_year#49] +Input [2]: [d_date_sk#6, d_year#42] (51) Filter [codegen id : 1] -Input [2]: [d_date_sk#6, d_year#49] -Condition : ((isnotnull(d_year#49) AND (d_year#49 = 2000)) AND isnotnull(d_date_sk#6)) +Input [2]: [d_date_sk#6, d_year#42] +Condition : ((isnotnull(d_year#42) AND (d_year#42 = 2000)) AND isnotnull(d_date_sk#6)) (52) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [2]: [d_date_sk#6, d_year#49] +Input [2]: [d_date_sk#6, d_year#42] (53) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] Subquery:2 Hosting operator id = 17 Hosting Expression = cr_returned_date_sk#4 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/explain.txt index 1e0c78c7329c8..ab796ed189b9f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/explain.txt @@ -49,90 +49,90 @@ Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufa (5) BroadcastExchange Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#6] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (6) Scan parquet default.inventory -Output [3]: [inv_item_sk#7, inv_quantity_on_hand#8, inv_date_sk#9] +Output [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(inv_date_sk#9), dynamicpruningexpression(inv_date_sk#9 IN dynamicpruning#10)] +PartitionFilters: [isnotnull(inv_date_sk#8), dynamicpruningexpression(inv_date_sk#8 IN dynamicpruning#9)] PushedFilters: [IsNotNull(inv_quantity_on_hand), GreaterThanOrEqual(inv_quantity_on_hand,100), LessThanOrEqual(inv_quantity_on_hand,500), IsNotNull(inv_item_sk)] ReadSchema: struct (7) ColumnarToRow -Input [3]: [inv_item_sk#7, inv_quantity_on_hand#8, inv_date_sk#9] +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] (8) Filter -Input [3]: [inv_item_sk#7, inv_quantity_on_hand#8, inv_date_sk#9] -Condition : (((isnotnull(inv_quantity_on_hand#8) AND (inv_quantity_on_hand#8 >= 100)) AND (inv_quantity_on_hand#8 <= 500)) AND isnotnull(inv_item_sk#7)) +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] +Condition : (((isnotnull(inv_quantity_on_hand#7) AND (inv_quantity_on_hand#7 >= 100)) AND (inv_quantity_on_hand#7 <= 500)) AND isnotnull(inv_item_sk#6)) (9) Project -Output [2]: [inv_item_sk#7, inv_date_sk#9] -Input [3]: [inv_item_sk#7, inv_quantity_on_hand#8, inv_date_sk#9] +Output [2]: [inv_item_sk#6, inv_date_sk#8] +Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] (10) BroadcastHashJoin [codegen id : 3] Left keys [1]: [i_item_sk#1] -Right keys [1]: [inv_item_sk#7] +Right keys [1]: [inv_item_sk#6] Join condition: None (11) Project [codegen id : 3] -Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#9] -Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_item_sk#7, inv_date_sk#9] +Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_item_sk#6, inv_date_sk#8] (12) ReusedExchange [Reuses operator id: 33] -Output [1]: [d_date_sk#11] +Output [1]: [d_date_sk#10] (13) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [inv_date_sk#9] -Right keys [1]: [d_date_sk#11] +Left keys [1]: [inv_date_sk#8] +Right keys [1]: [d_date_sk#10] Join condition: None (14) Project [codegen id : 3] Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] -Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#9, d_date_sk#11] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8, d_date_sk#10] (15) Exchange Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] -Arguments: hashpartitioning(i_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#12] +Arguments: hashpartitioning(i_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (16) Sort [codegen id : 4] Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] Arguments: [i_item_sk#1 ASC NULLS FIRST], false, 0 (17) Scan parquet default.store_sales -Output [2]: [ss_item_sk#13, ss_sold_date_sk#14] +Output [2]: [ss_item_sk#11, ss_sold_date_sk#12] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (18) ColumnarToRow [codegen id : 5] -Input [2]: [ss_item_sk#13, ss_sold_date_sk#14] +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] (19) Filter [codegen id : 5] -Input [2]: [ss_item_sk#13, ss_sold_date_sk#14] -Condition : isnotnull(ss_item_sk#13) +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_item_sk#11) (20) Project [codegen id : 5] -Output [1]: [ss_item_sk#13] -Input [2]: [ss_item_sk#13, ss_sold_date_sk#14] +Output [1]: [ss_item_sk#11] +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] (21) Exchange -Input [1]: [ss_item_sk#13] -Arguments: hashpartitioning(ss_item_sk#13, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [1]: [ss_item_sk#11] +Arguments: hashpartitioning(ss_item_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) Sort [codegen id : 6] -Input [1]: [ss_item_sk#13] -Arguments: [ss_item_sk#13 ASC NULLS FIRST], false, 0 +Input [1]: [ss_item_sk#11] +Arguments: [ss_item_sk#11 ASC NULLS FIRST], false, 0 (23) SortMergeJoin [codegen id : 7] Left keys [1]: [i_item_sk#1] -Right keys [1]: [ss_item_sk#13] +Right keys [1]: [ss_item_sk#11] Join condition: None (24) Project [codegen id : 7] Output [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, ss_item_sk#13] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, ss_item_sk#11] (25) HashAggregate [codegen id : 7] Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] @@ -143,7 +143,7 @@ Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] (26) Exchange Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, [id=#16] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, [plan_id=4] (27) HashAggregate [codegen id : 8] Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] @@ -158,7 +158,7 @@ Arguments: 100, [i_item_id#2 ASC NULLS FIRST], [i_item_id#2, i_item_desc#3, i_cu ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = inv_date_sk#9 IN dynamicpruning#10 +Subquery:1 Hosting operator id = 6 Hosting Expression = inv_date_sk#8 IN dynamicpruning#9 BroadcastExchange (33) +- * Project (32) +- * Filter (31) @@ -167,25 +167,25 @@ BroadcastExchange (33) (29) Scan parquet default.date_dim -Output [2]: [d_date_sk#11, d_date#17] +Output [2]: [d_date_sk#10, d_date#13] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-05-25), LessThanOrEqual(d_date,2000-07-24), IsNotNull(d_date_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#11, d_date#17] +Input [2]: [d_date_sk#10, d_date#13] (31) Filter [codegen id : 1] -Input [2]: [d_date_sk#11, d_date#17] -Condition : (((isnotnull(d_date#17) AND (d_date#17 >= 2000-05-25)) AND (d_date#17 <= 2000-07-24)) AND isnotnull(d_date_sk#11)) +Input [2]: [d_date_sk#10, d_date#13] +Condition : (((isnotnull(d_date#13) AND (d_date#13 >= 2000-05-25)) AND (d_date#13 <= 2000-07-24)) AND isnotnull(d_date_sk#10)) (32) Project [codegen id : 1] -Output [1]: [d_date_sk#11] -Input [2]: [d_date_sk#11, d_date#17] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#13] (33) BroadcastExchange -Input [1]: [d_date_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt index 4e9ad2199c037..63519baec5dcd 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt @@ -65,7 +65,7 @@ Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8] (9) BroadcastExchange Input [2]: [inv_item_sk#6, inv_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (10) BroadcastHashJoin [codegen id : 3] Left keys [1]: [i_item_sk#1] @@ -77,47 +77,47 @@ Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_dat Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_item_sk#6, inv_date_sk#8] (12) ReusedExchange [Reuses operator id: 30] -Output [1]: [d_date_sk#11] +Output [1]: [d_date_sk#10] (13) BroadcastHashJoin [codegen id : 3] Left keys [1]: [inv_date_sk#8] -Right keys [1]: [d_date_sk#11] +Right keys [1]: [d_date_sk#10] Join condition: None (14) Project [codegen id : 3] Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] -Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8, d_date_sk#11] +Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date_sk#8, d_date_sk#10] (15) BroadcastExchange Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) Scan parquet default.store_sales -Output [2]: [ss_item_sk#13, ss_sold_date_sk#14] +Output [2]: [ss_item_sk#11, ss_sold_date_sk#12] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (17) ColumnarToRow -Input [2]: [ss_item_sk#13, ss_sold_date_sk#14] +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] (18) Filter -Input [2]: [ss_item_sk#13, ss_sold_date_sk#14] -Condition : isnotnull(ss_item_sk#13) +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Condition : isnotnull(ss_item_sk#11) (19) Project -Output [1]: [ss_item_sk#13] -Input [2]: [ss_item_sk#13, ss_sold_date_sk#14] +Output [1]: [ss_item_sk#11] +Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] (20) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] -Right keys [1]: [ss_item_sk#13] +Right keys [1]: [ss_item_sk#11] Join condition: None (21) Project [codegen id : 4] Output [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, ss_item_sk#13] +Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, ss_item_sk#11] (22) HashAggregate [codegen id : 4] Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] @@ -128,7 +128,7 @@ Results [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] (23) Exchange Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] -Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, [id=#15] +Arguments: hashpartitioning(i_item_id#2, i_item_desc#3, i_current_price#4, 5), ENSURE_REQUIREMENTS, [plan_id=3] (24) HashAggregate [codegen id : 5] Input [3]: [i_item_id#2, i_item_desc#3, i_current_price#4] @@ -152,25 +152,25 @@ BroadcastExchange (30) (26) Scan parquet default.date_dim -Output [2]: [d_date_sk#11, d_date#16] +Output [2]: [d_date_sk#10, d_date#13] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-05-25), LessThanOrEqual(d_date,2000-07-24), IsNotNull(d_date_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#11, d_date#16] +Input [2]: [d_date_sk#10, d_date#13] (28) Filter [codegen id : 1] -Input [2]: [d_date_sk#11, d_date#16] -Condition : (((isnotnull(d_date#16) AND (d_date#16 >= 2000-05-25)) AND (d_date#16 <= 2000-07-24)) AND isnotnull(d_date_sk#11)) +Input [2]: [d_date_sk#10, d_date#13] +Condition : (((isnotnull(d_date#13) AND (d_date#13 >= 2000-05-25)) AND (d_date#13 <= 2000-07-24)) AND isnotnull(d_date_sk#10)) (29) Project [codegen id : 1] -Output [1]: [d_date_sk#11] -Input [2]: [d_date_sk#11, d_date#16] +Output [1]: [d_date_sk#10] +Input [2]: [d_date_sk#10, d_date#13] (30) BroadcastExchange -Input [1]: [d_date_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/explain.txt index c46fce21c25a2..003e5e62ba0c3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/explain.txt @@ -78,7 +78,7 @@ Condition : (isnotnull(i_item_sk#5) AND isnotnull(i_item_id#6)) (7) BroadcastExchange Input [2]: [i_item_sk#5, i_item_id#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 5] Left keys [1]: [sr_item_sk#1] @@ -90,178 +90,178 @@ Output [3]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#6] Input [5]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3, i_item_sk#5, i_item_id#6] (10) ReusedExchange [Reuses operator id: 62] -Output [1]: [d_date_sk#8] +Output [1]: [d_date_sk#7] (11) BroadcastHashJoin [codegen id : 5] Left keys [1]: [sr_returned_date_sk#3] -Right keys [1]: [d_date_sk#8] +Right keys [1]: [d_date_sk#7] Join condition: None (12) Project [codegen id : 5] Output [2]: [sr_return_quantity#2, i_item_id#6] -Input [4]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#6, d_date_sk#8] +Input [4]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#6, d_date_sk#7] (13) HashAggregate [codegen id : 5] Input [2]: [sr_return_quantity#2, i_item_id#6] Keys [1]: [i_item_id#6] Functions [1]: [partial_sum(sr_return_quantity#2)] -Aggregate Attributes [1]: [sum#9] -Results [2]: [i_item_id#6, sum#10] +Aggregate Attributes [1]: [sum#8] +Results [2]: [i_item_id#6, sum#9] (14) Exchange -Input [2]: [i_item_id#6, sum#10] -Arguments: hashpartitioning(i_item_id#6, 5), ENSURE_REQUIREMENTS, [id=#11] +Input [2]: [i_item_id#6, sum#9] +Arguments: hashpartitioning(i_item_id#6, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 18] -Input [2]: [i_item_id#6, sum#10] +Input [2]: [i_item_id#6, sum#9] Keys [1]: [i_item_id#6] Functions [1]: [sum(sr_return_quantity#2)] -Aggregate Attributes [1]: [sum(sr_return_quantity#2)#12] -Results [2]: [i_item_id#6 AS item_id#13, sum(sr_return_quantity#2)#12 AS sr_item_qty#14] +Aggregate Attributes [1]: [sum(sr_return_quantity#2)#10] +Results [2]: [i_item_id#6 AS item_id#11, sum(sr_return_quantity#2)#10 AS sr_item_qty#12] (16) Scan parquet default.catalog_returns -Output [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] +Output [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cr_returned_date_sk#17), dynamicpruningexpression(cr_returned_date_sk#17 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(cr_returned_date_sk#15), dynamicpruningexpression(cr_returned_date_sk#15 IN dynamicpruning#4)] PushedFilters: [IsNotNull(cr_item_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 10] -Input [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] +Input [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15] (18) Filter [codegen id : 10] -Input [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] -Condition : isnotnull(cr_item_sk#15) +Input [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15] +Condition : isnotnull(cr_item_sk#13) (19) ReusedExchange [Reuses operator id: 7] -Output [2]: [i_item_sk#18, i_item_id#19] +Output [2]: [i_item_sk#16, i_item_id#17] (20) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cr_item_sk#15] -Right keys [1]: [i_item_sk#18] +Left keys [1]: [cr_item_sk#13] +Right keys [1]: [i_item_sk#16] Join condition: None (21) Project [codegen id : 10] -Output [3]: [cr_return_quantity#16, cr_returned_date_sk#17, i_item_id#19] -Input [5]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17, i_item_sk#18, i_item_id#19] +Output [3]: [cr_return_quantity#14, cr_returned_date_sk#15, i_item_id#17] +Input [5]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15, i_item_sk#16, i_item_id#17] (22) ReusedExchange [Reuses operator id: 62] -Output [1]: [d_date_sk#20] +Output [1]: [d_date_sk#18] (23) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cr_returned_date_sk#17] -Right keys [1]: [d_date_sk#20] +Left keys [1]: [cr_returned_date_sk#15] +Right keys [1]: [d_date_sk#18] Join condition: None (24) Project [codegen id : 10] -Output [2]: [cr_return_quantity#16, i_item_id#19] -Input [4]: [cr_return_quantity#16, cr_returned_date_sk#17, i_item_id#19, d_date_sk#20] +Output [2]: [cr_return_quantity#14, i_item_id#17] +Input [4]: [cr_return_quantity#14, cr_returned_date_sk#15, i_item_id#17, d_date_sk#18] (25) HashAggregate [codegen id : 10] -Input [2]: [cr_return_quantity#16, i_item_id#19] -Keys [1]: [i_item_id#19] -Functions [1]: [partial_sum(cr_return_quantity#16)] -Aggregate Attributes [1]: [sum#21] -Results [2]: [i_item_id#19, sum#22] +Input [2]: [cr_return_quantity#14, i_item_id#17] +Keys [1]: [i_item_id#17] +Functions [1]: [partial_sum(cr_return_quantity#14)] +Aggregate Attributes [1]: [sum#19] +Results [2]: [i_item_id#17, sum#20] (26) Exchange -Input [2]: [i_item_id#19, sum#22] -Arguments: hashpartitioning(i_item_id#19, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [2]: [i_item_id#17, sum#20] +Arguments: hashpartitioning(i_item_id#17, 5), ENSURE_REQUIREMENTS, [plan_id=3] (27) HashAggregate [codegen id : 11] -Input [2]: [i_item_id#19, sum#22] -Keys [1]: [i_item_id#19] -Functions [1]: [sum(cr_return_quantity#16)] -Aggregate Attributes [1]: [sum(cr_return_quantity#16)#24] -Results [2]: [i_item_id#19 AS item_id#25, sum(cr_return_quantity#16)#24 AS cr_item_qty#26] +Input [2]: [i_item_id#17, sum#20] +Keys [1]: [i_item_id#17] +Functions [1]: [sum(cr_return_quantity#14)] +Aggregate Attributes [1]: [sum(cr_return_quantity#14)#21] +Results [2]: [i_item_id#17 AS item_id#22, sum(cr_return_quantity#14)#21 AS cr_item_qty#23] (28) BroadcastExchange -Input [2]: [item_id#25, cr_item_qty#26] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#27] +Input [2]: [item_id#22, cr_item_qty#23] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=4] (29) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [item_id#13] -Right keys [1]: [item_id#25] +Left keys [1]: [item_id#11] +Right keys [1]: [item_id#22] Join condition: None (30) Project [codegen id : 18] -Output [3]: [item_id#13, sr_item_qty#14, cr_item_qty#26] -Input [4]: [item_id#13, sr_item_qty#14, item_id#25, cr_item_qty#26] +Output [3]: [item_id#11, sr_item_qty#12, cr_item_qty#23] +Input [4]: [item_id#11, sr_item_qty#12, item_id#22, cr_item_qty#23] (31) Scan parquet default.web_returns -Output [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] +Output [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(wr_returned_date_sk#30), dynamicpruningexpression(wr_returned_date_sk#30 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(wr_returned_date_sk#26), dynamicpruningexpression(wr_returned_date_sk#26 IN dynamicpruning#4)] PushedFilters: [IsNotNull(wr_item_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 16] -Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] +Input [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26] (33) Filter [codegen id : 16] -Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] -Condition : isnotnull(wr_item_sk#28) +Input [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26] +Condition : isnotnull(wr_item_sk#24) (34) ReusedExchange [Reuses operator id: 7] -Output [2]: [i_item_sk#31, i_item_id#32] +Output [2]: [i_item_sk#27, i_item_id#28] (35) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [wr_item_sk#28] -Right keys [1]: [i_item_sk#31] +Left keys [1]: [wr_item_sk#24] +Right keys [1]: [i_item_sk#27] Join condition: None (36) Project [codegen id : 16] -Output [3]: [wr_return_quantity#29, wr_returned_date_sk#30, i_item_id#32] -Input [5]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30, i_item_sk#31, i_item_id#32] +Output [3]: [wr_return_quantity#25, wr_returned_date_sk#26, i_item_id#28] +Input [5]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26, i_item_sk#27, i_item_id#28] (37) ReusedExchange [Reuses operator id: 62] -Output [1]: [d_date_sk#33] +Output [1]: [d_date_sk#29] (38) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [wr_returned_date_sk#30] -Right keys [1]: [d_date_sk#33] +Left keys [1]: [wr_returned_date_sk#26] +Right keys [1]: [d_date_sk#29] Join condition: None (39) Project [codegen id : 16] -Output [2]: [wr_return_quantity#29, i_item_id#32] -Input [4]: [wr_return_quantity#29, wr_returned_date_sk#30, i_item_id#32, d_date_sk#33] +Output [2]: [wr_return_quantity#25, i_item_id#28] +Input [4]: [wr_return_quantity#25, wr_returned_date_sk#26, i_item_id#28, d_date_sk#29] (40) HashAggregate [codegen id : 16] -Input [2]: [wr_return_quantity#29, i_item_id#32] -Keys [1]: [i_item_id#32] -Functions [1]: [partial_sum(wr_return_quantity#29)] -Aggregate Attributes [1]: [sum#34] -Results [2]: [i_item_id#32, sum#35] +Input [2]: [wr_return_quantity#25, i_item_id#28] +Keys [1]: [i_item_id#28] +Functions [1]: [partial_sum(wr_return_quantity#25)] +Aggregate Attributes [1]: [sum#30] +Results [2]: [i_item_id#28, sum#31] (41) Exchange -Input [2]: [i_item_id#32, sum#35] -Arguments: hashpartitioning(i_item_id#32, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [2]: [i_item_id#28, sum#31] +Arguments: hashpartitioning(i_item_id#28, 5), ENSURE_REQUIREMENTS, [plan_id=5] (42) HashAggregate [codegen id : 17] -Input [2]: [i_item_id#32, sum#35] -Keys [1]: [i_item_id#32] -Functions [1]: [sum(wr_return_quantity#29)] -Aggregate Attributes [1]: [sum(wr_return_quantity#29)#37] -Results [2]: [i_item_id#32 AS item_id#38, sum(wr_return_quantity#29)#37 AS wr_item_qty#39] +Input [2]: [i_item_id#28, sum#31] +Keys [1]: [i_item_id#28] +Functions [1]: [sum(wr_return_quantity#25)] +Aggregate Attributes [1]: [sum(wr_return_quantity#25)#32] +Results [2]: [i_item_id#28 AS item_id#33, sum(wr_return_quantity#25)#32 AS wr_item_qty#34] (43) BroadcastExchange -Input [2]: [item_id#38, wr_item_qty#39] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40] +Input [2]: [item_id#33, wr_item_qty#34] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] (44) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [item_id#13] -Right keys [1]: [item_id#38] +Left keys [1]: [item_id#11] +Right keys [1]: [item_id#33] Join condition: None (45) Project [codegen id : 18] -Output [8]: [item_id#13, sr_item_qty#14, (((cast(sr_item_qty#14 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#26, (((cast(cr_item_qty#26 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#44] -Input [5]: [item_id#13, sr_item_qty#14, cr_item_qty#26, item_id#38, wr_item_qty#39] +Output [8]: [item_id#11, sr_item_qty#12, (((cast(sr_item_qty#12 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS sr_dev#35, cr_item_qty#23, (((cast(cr_item_qty#23 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS cr_dev#36, wr_item_qty#34, (((cast(wr_item_qty#34 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS wr_dev#37, CheckOverflow((promote_precision(cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#38] +Input [5]: [item_id#11, sr_item_qty#12, cr_item_qty#23, item_id#33, wr_item_qty#34] (46) TakeOrderedAndProject -Input [8]: [item_id#13, sr_item_qty#14, sr_dev#41, cr_item_qty#26, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] -Arguments: 100, [item_id#13 ASC NULLS FIRST, sr_item_qty#14 ASC NULLS FIRST], [item_id#13, sr_item_qty#14, sr_dev#41, cr_item_qty#26, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] +Input [8]: [item_id#11, sr_item_qty#12, sr_dev#35, cr_item_qty#23, cr_dev#36, wr_item_qty#34, wr_dev#37, average#38] +Arguments: 100, [item_id#11 ASC NULLS FIRST, sr_item_qty#12 ASC NULLS FIRST], [item_id#11, sr_item_qty#12, sr_dev#35, cr_item_qty#23, cr_dev#36, wr_item_qty#34, wr_dev#37, average#38] ===== Subqueries ===== @@ -285,78 +285,78 @@ BroadcastExchange (62) (47) Scan parquet default.date_dim -Output [2]: [d_date_sk#8, d_date#45] +Output [2]: [d_date_sk#7, d_date#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date_sk)] ReadSchema: struct (48) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#8, d_date#45] +Input [2]: [d_date_sk#7, d_date#39] (49) Filter [codegen id : 3] -Input [2]: [d_date_sk#8, d_date#45] -Condition : isnotnull(d_date_sk#8) +Input [2]: [d_date_sk#7, d_date#39] +Condition : isnotnull(d_date_sk#7) (50) Scan parquet default.date_dim -Output [2]: [d_date#46, d_week_seq#47] +Output [2]: [d_date#40, d_week_seq#41] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] ReadSchema: struct (51) ColumnarToRow [codegen id : 2] -Input [2]: [d_date#46, d_week_seq#47] +Input [2]: [d_date#40, d_week_seq#41] (52) Scan parquet default.date_dim -Output [2]: [d_date#48, d_week_seq#49] +Output [2]: [d_date#42, d_week_seq#43] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_date, [2000-06-30,2000-09-27,2000-11-17])] ReadSchema: struct (53) ColumnarToRow [codegen id : 1] -Input [2]: [d_date#48, d_week_seq#49] +Input [2]: [d_date#42, d_week_seq#43] (54) Filter [codegen id : 1] -Input [2]: [d_date#48, d_week_seq#49] -Condition : d_date#48 IN (2000-06-30,2000-09-27,2000-11-17) +Input [2]: [d_date#42, d_week_seq#43] +Condition : d_date#42 IN (2000-06-30,2000-09-27,2000-11-17) (55) Project [codegen id : 1] -Output [1]: [d_week_seq#49] -Input [2]: [d_date#48, d_week_seq#49] +Output [1]: [d_week_seq#43] +Input [2]: [d_date#42, d_week_seq#43] (56) BroadcastExchange -Input [1]: [d_week_seq#49] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50] +Input [1]: [d_week_seq#43] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] (57) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [d_week_seq#47] -Right keys [1]: [d_week_seq#49] +Left keys [1]: [d_week_seq#41] +Right keys [1]: [d_week_seq#43] Join condition: None (58) Project [codegen id : 2] -Output [1]: [d_date#46] -Input [2]: [d_date#46, d_week_seq#47] +Output [1]: [d_date#40] +Input [2]: [d_date#40, d_week_seq#41] (59) BroadcastExchange -Input [1]: [d_date#46] -Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#51] +Input [1]: [d_date#40] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [plan_id=8] (60) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [d_date#45] -Right keys [1]: [d_date#46] +Left keys [1]: [d_date#39] +Right keys [1]: [d_date#40] Join condition: None (61) Project [codegen id : 3] -Output [1]: [d_date_sk#8] -Input [2]: [d_date_sk#8, d_date#45] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#39] (62) BroadcastExchange -Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52] +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] -Subquery:2 Hosting operator id = 16 Hosting Expression = cr_returned_date_sk#17 IN dynamicpruning#4 +Subquery:2 Hosting operator id = 16 Hosting Expression = cr_returned_date_sk#15 IN dynamicpruning#4 -Subquery:3 Hosting operator id = 31 Hosting Expression = wr_returned_date_sk#30 IN dynamicpruning#4 +Subquery:3 Hosting operator id = 31 Hosting Expression = wr_returned_date_sk#26 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/explain.txt index bda63681ef500..d4e45fedc6788 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/explain.txt @@ -90,7 +90,7 @@ Condition : (isnotnull(i_item_sk#6) AND isnotnull(i_item_id#7)) (10) BroadcastExchange Input [2]: [i_item_sk#6, i_item_id#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 5] Left keys [1]: [sr_item_sk#1] @@ -105,163 +105,163 @@ Input [4]: [sr_item_sk#1, sr_return_quantity#2, i_item_sk#6, i_item_id#7] Input [2]: [sr_return_quantity#2, i_item_id#7] Keys [1]: [i_item_id#7] Functions [1]: [partial_sum(sr_return_quantity#2)] -Aggregate Attributes [1]: [sum#9] -Results [2]: [i_item_id#7, sum#10] +Aggregate Attributes [1]: [sum#8] +Results [2]: [i_item_id#7, sum#9] (14) Exchange -Input [2]: [i_item_id#7, sum#10] -Arguments: hashpartitioning(i_item_id#7, 5), ENSURE_REQUIREMENTS, [id=#11] +Input [2]: [i_item_id#7, sum#9] +Arguments: hashpartitioning(i_item_id#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 18] -Input [2]: [i_item_id#7, sum#10] +Input [2]: [i_item_id#7, sum#9] Keys [1]: [i_item_id#7] Functions [1]: [sum(sr_return_quantity#2)] -Aggregate Attributes [1]: [sum(sr_return_quantity#2)#12] -Results [2]: [i_item_id#7 AS item_id#13, sum(sr_return_quantity#2)#12 AS sr_item_qty#14] +Aggregate Attributes [1]: [sum(sr_return_quantity#2)#10] +Results [2]: [i_item_id#7 AS item_id#11, sum(sr_return_quantity#2)#10 AS sr_item_qty#12] (16) Scan parquet default.catalog_returns -Output [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] +Output [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cr_returned_date_sk#17), dynamicpruningexpression(cr_returned_date_sk#17 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(cr_returned_date_sk#15), dynamicpruningexpression(cr_returned_date_sk#15 IN dynamicpruning#4)] PushedFilters: [IsNotNull(cr_item_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 10] -Input [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] +Input [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15] (18) Filter [codegen id : 10] -Input [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] -Condition : isnotnull(cr_item_sk#15) +Input [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15] +Condition : isnotnull(cr_item_sk#13) (19) ReusedExchange [Reuses operator id: 62] -Output [1]: [d_date_sk#18] +Output [1]: [d_date_sk#16] (20) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cr_returned_date_sk#17] -Right keys [1]: [d_date_sk#18] +Left keys [1]: [cr_returned_date_sk#15] +Right keys [1]: [d_date_sk#16] Join condition: None (21) Project [codegen id : 10] -Output [2]: [cr_item_sk#15, cr_return_quantity#16] -Input [4]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17, d_date_sk#18] +Output [2]: [cr_item_sk#13, cr_return_quantity#14] +Input [4]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15, d_date_sk#16] (22) ReusedExchange [Reuses operator id: 10] -Output [2]: [i_item_sk#19, i_item_id#20] +Output [2]: [i_item_sk#17, i_item_id#18] (23) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cr_item_sk#15] -Right keys [1]: [i_item_sk#19] +Left keys [1]: [cr_item_sk#13] +Right keys [1]: [i_item_sk#17] Join condition: None (24) Project [codegen id : 10] -Output [2]: [cr_return_quantity#16, i_item_id#20] -Input [4]: [cr_item_sk#15, cr_return_quantity#16, i_item_sk#19, i_item_id#20] +Output [2]: [cr_return_quantity#14, i_item_id#18] +Input [4]: [cr_item_sk#13, cr_return_quantity#14, i_item_sk#17, i_item_id#18] (25) HashAggregate [codegen id : 10] -Input [2]: [cr_return_quantity#16, i_item_id#20] -Keys [1]: [i_item_id#20] -Functions [1]: [partial_sum(cr_return_quantity#16)] -Aggregate Attributes [1]: [sum#21] -Results [2]: [i_item_id#20, sum#22] +Input [2]: [cr_return_quantity#14, i_item_id#18] +Keys [1]: [i_item_id#18] +Functions [1]: [partial_sum(cr_return_quantity#14)] +Aggregate Attributes [1]: [sum#19] +Results [2]: [i_item_id#18, sum#20] (26) Exchange -Input [2]: [i_item_id#20, sum#22] -Arguments: hashpartitioning(i_item_id#20, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [2]: [i_item_id#18, sum#20] +Arguments: hashpartitioning(i_item_id#18, 5), ENSURE_REQUIREMENTS, [plan_id=3] (27) HashAggregate [codegen id : 11] -Input [2]: [i_item_id#20, sum#22] -Keys [1]: [i_item_id#20] -Functions [1]: [sum(cr_return_quantity#16)] -Aggregate Attributes [1]: [sum(cr_return_quantity#16)#24] -Results [2]: [i_item_id#20 AS item_id#25, sum(cr_return_quantity#16)#24 AS cr_item_qty#26] +Input [2]: [i_item_id#18, sum#20] +Keys [1]: [i_item_id#18] +Functions [1]: [sum(cr_return_quantity#14)] +Aggregate Attributes [1]: [sum(cr_return_quantity#14)#21] +Results [2]: [i_item_id#18 AS item_id#22, sum(cr_return_quantity#14)#21 AS cr_item_qty#23] (28) BroadcastExchange -Input [2]: [item_id#25, cr_item_qty#26] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#27] +Input [2]: [item_id#22, cr_item_qty#23] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=4] (29) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [item_id#13] -Right keys [1]: [item_id#25] +Left keys [1]: [item_id#11] +Right keys [1]: [item_id#22] Join condition: None (30) Project [codegen id : 18] -Output [3]: [item_id#13, sr_item_qty#14, cr_item_qty#26] -Input [4]: [item_id#13, sr_item_qty#14, item_id#25, cr_item_qty#26] +Output [3]: [item_id#11, sr_item_qty#12, cr_item_qty#23] +Input [4]: [item_id#11, sr_item_qty#12, item_id#22, cr_item_qty#23] (31) Scan parquet default.web_returns -Output [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] +Output [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(wr_returned_date_sk#30), dynamicpruningexpression(wr_returned_date_sk#30 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(wr_returned_date_sk#26), dynamicpruningexpression(wr_returned_date_sk#26 IN dynamicpruning#4)] PushedFilters: [IsNotNull(wr_item_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 16] -Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] +Input [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26] (33) Filter [codegen id : 16] -Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] -Condition : isnotnull(wr_item_sk#28) +Input [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26] +Condition : isnotnull(wr_item_sk#24) (34) ReusedExchange [Reuses operator id: 62] -Output [1]: [d_date_sk#31] +Output [1]: [d_date_sk#27] (35) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [wr_returned_date_sk#30] -Right keys [1]: [d_date_sk#31] +Left keys [1]: [wr_returned_date_sk#26] +Right keys [1]: [d_date_sk#27] Join condition: None (36) Project [codegen id : 16] -Output [2]: [wr_item_sk#28, wr_return_quantity#29] -Input [4]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30, d_date_sk#31] +Output [2]: [wr_item_sk#24, wr_return_quantity#25] +Input [4]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26, d_date_sk#27] (37) ReusedExchange [Reuses operator id: 10] -Output [2]: [i_item_sk#32, i_item_id#33] +Output [2]: [i_item_sk#28, i_item_id#29] (38) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [wr_item_sk#28] -Right keys [1]: [i_item_sk#32] +Left keys [1]: [wr_item_sk#24] +Right keys [1]: [i_item_sk#28] Join condition: None (39) Project [codegen id : 16] -Output [2]: [wr_return_quantity#29, i_item_id#33] -Input [4]: [wr_item_sk#28, wr_return_quantity#29, i_item_sk#32, i_item_id#33] +Output [2]: [wr_return_quantity#25, i_item_id#29] +Input [4]: [wr_item_sk#24, wr_return_quantity#25, i_item_sk#28, i_item_id#29] (40) HashAggregate [codegen id : 16] -Input [2]: [wr_return_quantity#29, i_item_id#33] -Keys [1]: [i_item_id#33] -Functions [1]: [partial_sum(wr_return_quantity#29)] -Aggregate Attributes [1]: [sum#34] -Results [2]: [i_item_id#33, sum#35] +Input [2]: [wr_return_quantity#25, i_item_id#29] +Keys [1]: [i_item_id#29] +Functions [1]: [partial_sum(wr_return_quantity#25)] +Aggregate Attributes [1]: [sum#30] +Results [2]: [i_item_id#29, sum#31] (41) Exchange -Input [2]: [i_item_id#33, sum#35] -Arguments: hashpartitioning(i_item_id#33, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [2]: [i_item_id#29, sum#31] +Arguments: hashpartitioning(i_item_id#29, 5), ENSURE_REQUIREMENTS, [plan_id=5] (42) HashAggregate [codegen id : 17] -Input [2]: [i_item_id#33, sum#35] -Keys [1]: [i_item_id#33] -Functions [1]: [sum(wr_return_quantity#29)] -Aggregate Attributes [1]: [sum(wr_return_quantity#29)#37] -Results [2]: [i_item_id#33 AS item_id#38, sum(wr_return_quantity#29)#37 AS wr_item_qty#39] +Input [2]: [i_item_id#29, sum#31] +Keys [1]: [i_item_id#29] +Functions [1]: [sum(wr_return_quantity#25)] +Aggregate Attributes [1]: [sum(wr_return_quantity#25)#32] +Results [2]: [i_item_id#29 AS item_id#33, sum(wr_return_quantity#25)#32 AS wr_item_qty#34] (43) BroadcastExchange -Input [2]: [item_id#38, wr_item_qty#39] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40] +Input [2]: [item_id#33, wr_item_qty#34] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] (44) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [item_id#13] -Right keys [1]: [item_id#38] +Left keys [1]: [item_id#11] +Right keys [1]: [item_id#33] Join condition: None (45) Project [codegen id : 18] -Output [8]: [item_id#13, sr_item_qty#14, (((cast(sr_item_qty#14 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#26, (((cast(cr_item_qty#26 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#44] -Input [5]: [item_id#13, sr_item_qty#14, cr_item_qty#26, item_id#38, wr_item_qty#39] +Output [8]: [item_id#11, sr_item_qty#12, (((cast(sr_item_qty#12 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS sr_dev#35, cr_item_qty#23, (((cast(cr_item_qty#23 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS cr_dev#36, wr_item_qty#34, (((cast(wr_item_qty#34 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS wr_dev#37, CheckOverflow((promote_precision(cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#38] +Input [5]: [item_id#11, sr_item_qty#12, cr_item_qty#23, item_id#33, wr_item_qty#34] (46) TakeOrderedAndProject -Input [8]: [item_id#13, sr_item_qty#14, sr_dev#41, cr_item_qty#26, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] -Arguments: 100, [item_id#13 ASC NULLS FIRST, sr_item_qty#14 ASC NULLS FIRST], [item_id#13, sr_item_qty#14, sr_dev#41, cr_item_qty#26, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] +Input [8]: [item_id#11, sr_item_qty#12, sr_dev#35, cr_item_qty#23, cr_dev#36, wr_item_qty#34, wr_dev#37, average#38] +Arguments: 100, [item_id#11 ASC NULLS FIRST, sr_item_qty#12 ASC NULLS FIRST], [item_id#11, sr_item_qty#12, sr_dev#35, cr_item_qty#23, cr_dev#36, wr_item_qty#34, wr_dev#37, average#38] ===== Subqueries ===== @@ -285,78 +285,78 @@ BroadcastExchange (62) (47) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_date#45] +Output [2]: [d_date_sk#5, d_date#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date_sk)] ReadSchema: struct (48) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#5, d_date#45] +Input [2]: [d_date_sk#5, d_date#39] (49) Filter [codegen id : 3] -Input [2]: [d_date_sk#5, d_date#45] +Input [2]: [d_date_sk#5, d_date#39] Condition : isnotnull(d_date_sk#5) (50) Scan parquet default.date_dim -Output [2]: [d_date#46, d_week_seq#47] +Output [2]: [d_date#40, d_week_seq#41] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] ReadSchema: struct (51) ColumnarToRow [codegen id : 2] -Input [2]: [d_date#46, d_week_seq#47] +Input [2]: [d_date#40, d_week_seq#41] (52) Scan parquet default.date_dim -Output [2]: [d_date#48, d_week_seq#49] +Output [2]: [d_date#42, d_week_seq#43] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [In(d_date, [2000-06-30,2000-09-27,2000-11-17])] ReadSchema: struct (53) ColumnarToRow [codegen id : 1] -Input [2]: [d_date#48, d_week_seq#49] +Input [2]: [d_date#42, d_week_seq#43] (54) Filter [codegen id : 1] -Input [2]: [d_date#48, d_week_seq#49] -Condition : d_date#48 IN (2000-06-30,2000-09-27,2000-11-17) +Input [2]: [d_date#42, d_week_seq#43] +Condition : d_date#42 IN (2000-06-30,2000-09-27,2000-11-17) (55) Project [codegen id : 1] -Output [1]: [d_week_seq#49] -Input [2]: [d_date#48, d_week_seq#49] +Output [1]: [d_week_seq#43] +Input [2]: [d_date#42, d_week_seq#43] (56) BroadcastExchange -Input [1]: [d_week_seq#49] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50] +Input [1]: [d_week_seq#43] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] (57) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [d_week_seq#47] -Right keys [1]: [d_week_seq#49] +Left keys [1]: [d_week_seq#41] +Right keys [1]: [d_week_seq#43] Join condition: None (58) Project [codegen id : 2] -Output [1]: [d_date#46] -Input [2]: [d_date#46, d_week_seq#47] +Output [1]: [d_date#40] +Input [2]: [d_date#40, d_week_seq#41] (59) BroadcastExchange -Input [1]: [d_date#46] -Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#51] +Input [1]: [d_date#40] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [plan_id=8] (60) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [d_date#45] -Right keys [1]: [d_date#46] +Left keys [1]: [d_date#39] +Right keys [1]: [d_date#40] Join condition: None (61) Project [codegen id : 3] Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_date#45] +Input [2]: [d_date_sk#5, d_date#39] (62) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] -Subquery:2 Hosting operator id = 16 Hosting Expression = cr_returned_date_sk#17 IN dynamicpruning#4 +Subquery:2 Hosting operator id = 16 Hosting Expression = cr_returned_date_sk#15 IN dynamicpruning#4 -Subquery:3 Hosting operator id = 31 Hosting Expression = wr_returned_date_sk#30 IN dynamicpruning#4 +Subquery:3 Hosting operator id = 31 Hosting Expression = wr_returned_date_sk#26 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt index 3374a3dc3daae..a0be704ebd2a1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt @@ -90,7 +90,7 @@ Condition : (isnotnull(i_item_sk#6) AND isnotnull(i_item_id#7)) (10) BroadcastExchange Input [2]: [i_item_sk#6, i_item_id#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 5] Left keys [1]: [sr_item_sk#1] @@ -105,163 +105,163 @@ Input [4]: [sr_item_sk#1, sr_return_quantity#2, i_item_sk#6, i_item_id#7] Input [2]: [sr_return_quantity#2, i_item_id#7] Keys [1]: [i_item_id#7] Functions [1]: [partial_sum(sr_return_quantity#2)] -Aggregate Attributes [1]: [sum#9] -Results [2]: [i_item_id#7, sum#10] +Aggregate Attributes [1]: [sum#8] +Results [2]: [i_item_id#7, sum#9] (14) Exchange -Input [2]: [i_item_id#7, sum#10] -Arguments: hashpartitioning(i_item_id#7, 5), ENSURE_REQUIREMENTS, [id=#11] +Input [2]: [i_item_id#7, sum#9] +Arguments: hashpartitioning(i_item_id#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 18] -Input [2]: [i_item_id#7, sum#10] +Input [2]: [i_item_id#7, sum#9] Keys [1]: [i_item_id#7] Functions [1]: [sum(sr_return_quantity#2)] -Aggregate Attributes [1]: [sum(sr_return_quantity#2)#12] -Results [2]: [i_item_id#7 AS item_id#13, sum(sr_return_quantity#2)#12 AS sr_item_qty#14] +Aggregate Attributes [1]: [sum(sr_return_quantity#2)#10] +Results [2]: [i_item_id#7 AS item_id#11, sum(sr_return_quantity#2)#10 AS sr_item_qty#12] (16) Scan parquet default.catalog_returns -Output [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] +Output [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cr_returned_date_sk#17), dynamicpruningexpression(cr_returned_date_sk#17 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(cr_returned_date_sk#15), dynamicpruningexpression(cr_returned_date_sk#15 IN dynamicpruning#4)] PushedFilters: [IsNotNull(cr_item_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 10] -Input [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] +Input [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15] (18) Filter [codegen id : 10] -Input [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] -Condition : isnotnull(cr_item_sk#15) +Input [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15] +Condition : isnotnull(cr_item_sk#13) (19) ReusedExchange [Reuses operator id: 62] -Output [1]: [d_date_sk#18] +Output [1]: [d_date_sk#16] (20) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cr_returned_date_sk#17] -Right keys [1]: [d_date_sk#18] +Left keys [1]: [cr_returned_date_sk#15] +Right keys [1]: [d_date_sk#16] Join condition: None (21) Project [codegen id : 10] -Output [2]: [cr_item_sk#15, cr_return_quantity#16] -Input [4]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17, d_date_sk#18] +Output [2]: [cr_item_sk#13, cr_return_quantity#14] +Input [4]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15, d_date_sk#16] (22) ReusedExchange [Reuses operator id: 10] -Output [2]: [i_item_sk#19, i_item_id#20] +Output [2]: [i_item_sk#17, i_item_id#18] (23) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cr_item_sk#15] -Right keys [1]: [i_item_sk#19] +Left keys [1]: [cr_item_sk#13] +Right keys [1]: [i_item_sk#17] Join condition: None (24) Project [codegen id : 10] -Output [2]: [cr_return_quantity#16, i_item_id#20] -Input [4]: [cr_item_sk#15, cr_return_quantity#16, i_item_sk#19, i_item_id#20] +Output [2]: [cr_return_quantity#14, i_item_id#18] +Input [4]: [cr_item_sk#13, cr_return_quantity#14, i_item_sk#17, i_item_id#18] (25) HashAggregate [codegen id : 10] -Input [2]: [cr_return_quantity#16, i_item_id#20] -Keys [1]: [i_item_id#20] -Functions [1]: [partial_sum(cr_return_quantity#16)] -Aggregate Attributes [1]: [sum#21] -Results [2]: [i_item_id#20, sum#22] +Input [2]: [cr_return_quantity#14, i_item_id#18] +Keys [1]: [i_item_id#18] +Functions [1]: [partial_sum(cr_return_quantity#14)] +Aggregate Attributes [1]: [sum#19] +Results [2]: [i_item_id#18, sum#20] (26) Exchange -Input [2]: [i_item_id#20, sum#22] -Arguments: hashpartitioning(i_item_id#20, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [2]: [i_item_id#18, sum#20] +Arguments: hashpartitioning(i_item_id#18, 5), ENSURE_REQUIREMENTS, [plan_id=3] (27) HashAggregate [codegen id : 11] -Input [2]: [i_item_id#20, sum#22] -Keys [1]: [i_item_id#20] -Functions [1]: [sum(cr_return_quantity#16)] -Aggregate Attributes [1]: [sum(cr_return_quantity#16)#24] -Results [2]: [i_item_id#20 AS item_id#25, sum(cr_return_quantity#16)#24 AS cr_item_qty#26] +Input [2]: [i_item_id#18, sum#20] +Keys [1]: [i_item_id#18] +Functions [1]: [sum(cr_return_quantity#14)] +Aggregate Attributes [1]: [sum(cr_return_quantity#14)#21] +Results [2]: [i_item_id#18 AS item_id#22, sum(cr_return_quantity#14)#21 AS cr_item_qty#23] (28) BroadcastExchange -Input [2]: [item_id#25, cr_item_qty#26] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#27] +Input [2]: [item_id#22, cr_item_qty#23] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=4] (29) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [item_id#13] -Right keys [1]: [item_id#25] +Left keys [1]: [item_id#11] +Right keys [1]: [item_id#22] Join condition: None (30) Project [codegen id : 18] -Output [3]: [item_id#13, sr_item_qty#14, cr_item_qty#26] -Input [4]: [item_id#13, sr_item_qty#14, item_id#25, cr_item_qty#26] +Output [3]: [item_id#11, sr_item_qty#12, cr_item_qty#23] +Input [4]: [item_id#11, sr_item_qty#12, item_id#22, cr_item_qty#23] (31) Scan parquet default.web_returns -Output [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] +Output [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(wr_returned_date_sk#30), dynamicpruningexpression(wr_returned_date_sk#30 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(wr_returned_date_sk#26), dynamicpruningexpression(wr_returned_date_sk#26 IN dynamicpruning#4)] PushedFilters: [IsNotNull(wr_item_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 16] -Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] +Input [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26] (33) Filter [codegen id : 16] -Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] -Condition : isnotnull(wr_item_sk#28) +Input [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26] +Condition : isnotnull(wr_item_sk#24) (34) ReusedExchange [Reuses operator id: 62] -Output [1]: [d_date_sk#31] +Output [1]: [d_date_sk#27] (35) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [wr_returned_date_sk#30] -Right keys [1]: [d_date_sk#31] +Left keys [1]: [wr_returned_date_sk#26] +Right keys [1]: [d_date_sk#27] Join condition: None (36) Project [codegen id : 16] -Output [2]: [wr_item_sk#28, wr_return_quantity#29] -Input [4]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30, d_date_sk#31] +Output [2]: [wr_item_sk#24, wr_return_quantity#25] +Input [4]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26, d_date_sk#27] (37) ReusedExchange [Reuses operator id: 10] -Output [2]: [i_item_sk#32, i_item_id#33] +Output [2]: [i_item_sk#28, i_item_id#29] (38) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [wr_item_sk#28] -Right keys [1]: [i_item_sk#32] +Left keys [1]: [wr_item_sk#24] +Right keys [1]: [i_item_sk#28] Join condition: None (39) Project [codegen id : 16] -Output [2]: [wr_return_quantity#29, i_item_id#33] -Input [4]: [wr_item_sk#28, wr_return_quantity#29, i_item_sk#32, i_item_id#33] +Output [2]: [wr_return_quantity#25, i_item_id#29] +Input [4]: [wr_item_sk#24, wr_return_quantity#25, i_item_sk#28, i_item_id#29] (40) HashAggregate [codegen id : 16] -Input [2]: [wr_return_quantity#29, i_item_id#33] -Keys [1]: [i_item_id#33] -Functions [1]: [partial_sum(wr_return_quantity#29)] -Aggregate Attributes [1]: [sum#34] -Results [2]: [i_item_id#33, sum#35] +Input [2]: [wr_return_quantity#25, i_item_id#29] +Keys [1]: [i_item_id#29] +Functions [1]: [partial_sum(wr_return_quantity#25)] +Aggregate Attributes [1]: [sum#30] +Results [2]: [i_item_id#29, sum#31] (41) Exchange -Input [2]: [i_item_id#33, sum#35] -Arguments: hashpartitioning(i_item_id#33, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [2]: [i_item_id#29, sum#31] +Arguments: hashpartitioning(i_item_id#29, 5), ENSURE_REQUIREMENTS, [plan_id=5] (42) HashAggregate [codegen id : 17] -Input [2]: [i_item_id#33, sum#35] -Keys [1]: [i_item_id#33] -Functions [1]: [sum(wr_return_quantity#29)] -Aggregate Attributes [1]: [sum(wr_return_quantity#29)#37] -Results [2]: [i_item_id#33 AS item_id#38, sum(wr_return_quantity#29)#37 AS wr_item_qty#39] +Input [2]: [i_item_id#29, sum#31] +Keys [1]: [i_item_id#29] +Functions [1]: [sum(wr_return_quantity#25)] +Aggregate Attributes [1]: [sum(wr_return_quantity#25)#32] +Results [2]: [i_item_id#29 AS item_id#33, sum(wr_return_quantity#25)#32 AS wr_item_qty#34] (43) BroadcastExchange -Input [2]: [item_id#38, wr_item_qty#39] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40] +Input [2]: [item_id#33, wr_item_qty#34] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] (44) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [item_id#13] -Right keys [1]: [item_id#38] +Left keys [1]: [item_id#11] +Right keys [1]: [item_id#33] Join condition: None (45) Project [codegen id : 18] -Output [8]: [item_id#13, sr_item_qty#14, (((cast(sr_item_qty#14 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#26, (((cast(cr_item_qty#26 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#44] -Input [5]: [item_id#13, sr_item_qty#14, cr_item_qty#26, item_id#38, wr_item_qty#39] +Output [8]: [item_id#11, sr_item_qty#12, (((cast(sr_item_qty#12 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS sr_dev#35, cr_item_qty#23, (((cast(cr_item_qty#23 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS cr_dev#36, wr_item_qty#34, (((cast(wr_item_qty#34 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS wr_dev#37, CheckOverflow((promote_precision(cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#38] +Input [5]: [item_id#11, sr_item_qty#12, cr_item_qty#23, item_id#33, wr_item_qty#34] (46) TakeOrderedAndProject -Input [8]: [item_id#13, sr_item_qty#14, sr_dev#41, cr_item_qty#26, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] -Arguments: 100, [item_id#13 ASC NULLS FIRST, sr_item_qty#14 ASC NULLS FIRST], [item_id#13, sr_item_qty#14, sr_dev#41, cr_item_qty#26, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] +Input [8]: [item_id#11, sr_item_qty#12, sr_dev#35, cr_item_qty#23, cr_dev#36, wr_item_qty#34, wr_dev#37, average#38] +Arguments: 100, [item_id#11 ASC NULLS FIRST, sr_item_qty#12 ASC NULLS FIRST], [item_id#11, sr_item_qty#12, sr_dev#35, cr_item_qty#23, cr_dev#36, wr_item_qty#34, wr_dev#37, average#38] ===== Subqueries ===== @@ -285,77 +285,77 @@ BroadcastExchange (62) (47) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_date#45] +Output [2]: [d_date_sk#5, d_date#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date_sk)] ReadSchema: struct (48) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#5, d_date#45] +Input [2]: [d_date_sk#5, d_date#39] (49) Filter [codegen id : 3] -Input [2]: [d_date_sk#5, d_date#45] +Input [2]: [d_date_sk#5, d_date#39] Condition : isnotnull(d_date_sk#5) (50) Scan parquet default.date_dim -Output [2]: [d_date#46, d_week_seq#47] +Output [2]: [d_date#40, d_week_seq#41] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] ReadSchema: struct (51) ColumnarToRow [codegen id : 2] -Input [2]: [d_date#46, d_week_seq#47] +Input [2]: [d_date#40, d_week_seq#41] (52) Scan parquet default.date_dim -Output [2]: [d_date#48, d_week_seq#49] +Output [2]: [d_date#42, d_week_seq#43] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] ReadSchema: struct (53) ColumnarToRow [codegen id : 1] -Input [2]: [d_date#48, d_week_seq#49] +Input [2]: [d_date#42, d_week_seq#43] (54) Filter [codegen id : 1] -Input [2]: [d_date#48, d_week_seq#49] -Condition : cast(d_date#48 as string) IN (2000-06-30,2000-09-27,2000-11-17) +Input [2]: [d_date#42, d_week_seq#43] +Condition : cast(d_date#42 as string) IN (2000-06-30,2000-09-27,2000-11-17) (55) Project [codegen id : 1] -Output [1]: [d_week_seq#49] -Input [2]: [d_date#48, d_week_seq#49] +Output [1]: [d_week_seq#43] +Input [2]: [d_date#42, d_week_seq#43] (56) BroadcastExchange -Input [1]: [d_week_seq#49] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50] +Input [1]: [d_week_seq#43] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] (57) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [d_week_seq#47] -Right keys [1]: [d_week_seq#49] +Left keys [1]: [d_week_seq#41] +Right keys [1]: [d_week_seq#43] Join condition: None (58) Project [codegen id : 2] -Output [1]: [d_date#46] -Input [2]: [d_date#46, d_week_seq#47] +Output [1]: [d_date#40] +Input [2]: [d_date#40, d_week_seq#41] (59) BroadcastExchange -Input [1]: [d_date#46] -Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#51] +Input [1]: [d_date#40] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [plan_id=8] (60) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [d_date#45] -Right keys [1]: [d_date#46] +Left keys [1]: [d_date#39] +Right keys [1]: [d_date#40] Join condition: None (61) Project [codegen id : 3] Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_date#45] +Input [2]: [d_date_sk#5, d_date#39] (62) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] -Subquery:2 Hosting operator id = 16 Hosting Expression = cr_returned_date_sk#17 IN dynamicpruning#4 +Subquery:2 Hosting operator id = 16 Hosting Expression = cr_returned_date_sk#15 IN dynamicpruning#4 -Subquery:3 Hosting operator id = 31 Hosting Expression = wr_returned_date_sk#30 IN dynamicpruning#4 +Subquery:3 Hosting operator id = 31 Hosting Expression = wr_returned_date_sk#26 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt index 106d5dd3090e3..bc6c43f18683e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt @@ -78,7 +78,7 @@ Condition : (isnotnull(i_item_sk#5) AND isnotnull(i_item_id#6)) (7) BroadcastExchange Input [2]: [i_item_sk#5, i_item_id#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 5] Left keys [1]: [sr_item_sk#1] @@ -90,178 +90,178 @@ Output [3]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#6] Input [5]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3, i_item_sk#5, i_item_id#6] (10) ReusedExchange [Reuses operator id: 62] -Output [1]: [d_date_sk#8] +Output [1]: [d_date_sk#7] (11) BroadcastHashJoin [codegen id : 5] Left keys [1]: [sr_returned_date_sk#3] -Right keys [1]: [d_date_sk#8] +Right keys [1]: [d_date_sk#7] Join condition: None (12) Project [codegen id : 5] Output [2]: [sr_return_quantity#2, i_item_id#6] -Input [4]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#6, d_date_sk#8] +Input [4]: [sr_return_quantity#2, sr_returned_date_sk#3, i_item_id#6, d_date_sk#7] (13) HashAggregate [codegen id : 5] Input [2]: [sr_return_quantity#2, i_item_id#6] Keys [1]: [i_item_id#6] Functions [1]: [partial_sum(sr_return_quantity#2)] -Aggregate Attributes [1]: [sum#9] -Results [2]: [i_item_id#6, sum#10] +Aggregate Attributes [1]: [sum#8] +Results [2]: [i_item_id#6, sum#9] (14) Exchange -Input [2]: [i_item_id#6, sum#10] -Arguments: hashpartitioning(i_item_id#6, 5), ENSURE_REQUIREMENTS, [id=#11] +Input [2]: [i_item_id#6, sum#9] +Arguments: hashpartitioning(i_item_id#6, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 18] -Input [2]: [i_item_id#6, sum#10] +Input [2]: [i_item_id#6, sum#9] Keys [1]: [i_item_id#6] Functions [1]: [sum(sr_return_quantity#2)] -Aggregate Attributes [1]: [sum(sr_return_quantity#2)#12] -Results [2]: [i_item_id#6 AS item_id#13, sum(sr_return_quantity#2)#12 AS sr_item_qty#14] +Aggregate Attributes [1]: [sum(sr_return_quantity#2)#10] +Results [2]: [i_item_id#6 AS item_id#11, sum(sr_return_quantity#2)#10 AS sr_item_qty#12] (16) Scan parquet default.catalog_returns -Output [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] +Output [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cr_returned_date_sk#17), dynamicpruningexpression(cr_returned_date_sk#17 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(cr_returned_date_sk#15), dynamicpruningexpression(cr_returned_date_sk#15 IN dynamicpruning#4)] PushedFilters: [IsNotNull(cr_item_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 10] -Input [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] +Input [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15] (18) Filter [codegen id : 10] -Input [3]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17] -Condition : isnotnull(cr_item_sk#15) +Input [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15] +Condition : isnotnull(cr_item_sk#13) (19) ReusedExchange [Reuses operator id: 7] -Output [2]: [i_item_sk#18, i_item_id#19] +Output [2]: [i_item_sk#16, i_item_id#17] (20) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cr_item_sk#15] -Right keys [1]: [i_item_sk#18] +Left keys [1]: [cr_item_sk#13] +Right keys [1]: [i_item_sk#16] Join condition: None (21) Project [codegen id : 10] -Output [3]: [cr_return_quantity#16, cr_returned_date_sk#17, i_item_id#19] -Input [5]: [cr_item_sk#15, cr_return_quantity#16, cr_returned_date_sk#17, i_item_sk#18, i_item_id#19] +Output [3]: [cr_return_quantity#14, cr_returned_date_sk#15, i_item_id#17] +Input [5]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15, i_item_sk#16, i_item_id#17] (22) ReusedExchange [Reuses operator id: 62] -Output [1]: [d_date_sk#20] +Output [1]: [d_date_sk#18] (23) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cr_returned_date_sk#17] -Right keys [1]: [d_date_sk#20] +Left keys [1]: [cr_returned_date_sk#15] +Right keys [1]: [d_date_sk#18] Join condition: None (24) Project [codegen id : 10] -Output [2]: [cr_return_quantity#16, i_item_id#19] -Input [4]: [cr_return_quantity#16, cr_returned_date_sk#17, i_item_id#19, d_date_sk#20] +Output [2]: [cr_return_quantity#14, i_item_id#17] +Input [4]: [cr_return_quantity#14, cr_returned_date_sk#15, i_item_id#17, d_date_sk#18] (25) HashAggregate [codegen id : 10] -Input [2]: [cr_return_quantity#16, i_item_id#19] -Keys [1]: [i_item_id#19] -Functions [1]: [partial_sum(cr_return_quantity#16)] -Aggregate Attributes [1]: [sum#21] -Results [2]: [i_item_id#19, sum#22] +Input [2]: [cr_return_quantity#14, i_item_id#17] +Keys [1]: [i_item_id#17] +Functions [1]: [partial_sum(cr_return_quantity#14)] +Aggregate Attributes [1]: [sum#19] +Results [2]: [i_item_id#17, sum#20] (26) Exchange -Input [2]: [i_item_id#19, sum#22] -Arguments: hashpartitioning(i_item_id#19, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [2]: [i_item_id#17, sum#20] +Arguments: hashpartitioning(i_item_id#17, 5), ENSURE_REQUIREMENTS, [plan_id=3] (27) HashAggregate [codegen id : 11] -Input [2]: [i_item_id#19, sum#22] -Keys [1]: [i_item_id#19] -Functions [1]: [sum(cr_return_quantity#16)] -Aggregate Attributes [1]: [sum(cr_return_quantity#16)#24] -Results [2]: [i_item_id#19 AS item_id#25, sum(cr_return_quantity#16)#24 AS cr_item_qty#26] +Input [2]: [i_item_id#17, sum#20] +Keys [1]: [i_item_id#17] +Functions [1]: [sum(cr_return_quantity#14)] +Aggregate Attributes [1]: [sum(cr_return_quantity#14)#21] +Results [2]: [i_item_id#17 AS item_id#22, sum(cr_return_quantity#14)#21 AS cr_item_qty#23] (28) BroadcastExchange -Input [2]: [item_id#25, cr_item_qty#26] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#27] +Input [2]: [item_id#22, cr_item_qty#23] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=4] (29) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [item_id#13] -Right keys [1]: [item_id#25] +Left keys [1]: [item_id#11] +Right keys [1]: [item_id#22] Join condition: None (30) Project [codegen id : 18] -Output [3]: [item_id#13, sr_item_qty#14, cr_item_qty#26] -Input [4]: [item_id#13, sr_item_qty#14, item_id#25, cr_item_qty#26] +Output [3]: [item_id#11, sr_item_qty#12, cr_item_qty#23] +Input [4]: [item_id#11, sr_item_qty#12, item_id#22, cr_item_qty#23] (31) Scan parquet default.web_returns -Output [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] +Output [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(wr_returned_date_sk#30), dynamicpruningexpression(wr_returned_date_sk#30 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(wr_returned_date_sk#26), dynamicpruningexpression(wr_returned_date_sk#26 IN dynamicpruning#4)] PushedFilters: [IsNotNull(wr_item_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 16] -Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] +Input [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26] (33) Filter [codegen id : 16] -Input [3]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30] -Condition : isnotnull(wr_item_sk#28) +Input [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26] +Condition : isnotnull(wr_item_sk#24) (34) ReusedExchange [Reuses operator id: 7] -Output [2]: [i_item_sk#31, i_item_id#32] +Output [2]: [i_item_sk#27, i_item_id#28] (35) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [wr_item_sk#28] -Right keys [1]: [i_item_sk#31] +Left keys [1]: [wr_item_sk#24] +Right keys [1]: [i_item_sk#27] Join condition: None (36) Project [codegen id : 16] -Output [3]: [wr_return_quantity#29, wr_returned_date_sk#30, i_item_id#32] -Input [5]: [wr_item_sk#28, wr_return_quantity#29, wr_returned_date_sk#30, i_item_sk#31, i_item_id#32] +Output [3]: [wr_return_quantity#25, wr_returned_date_sk#26, i_item_id#28] +Input [5]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26, i_item_sk#27, i_item_id#28] (37) ReusedExchange [Reuses operator id: 62] -Output [1]: [d_date_sk#33] +Output [1]: [d_date_sk#29] (38) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [wr_returned_date_sk#30] -Right keys [1]: [d_date_sk#33] +Left keys [1]: [wr_returned_date_sk#26] +Right keys [1]: [d_date_sk#29] Join condition: None (39) Project [codegen id : 16] -Output [2]: [wr_return_quantity#29, i_item_id#32] -Input [4]: [wr_return_quantity#29, wr_returned_date_sk#30, i_item_id#32, d_date_sk#33] +Output [2]: [wr_return_quantity#25, i_item_id#28] +Input [4]: [wr_return_quantity#25, wr_returned_date_sk#26, i_item_id#28, d_date_sk#29] (40) HashAggregate [codegen id : 16] -Input [2]: [wr_return_quantity#29, i_item_id#32] -Keys [1]: [i_item_id#32] -Functions [1]: [partial_sum(wr_return_quantity#29)] -Aggregate Attributes [1]: [sum#34] -Results [2]: [i_item_id#32, sum#35] +Input [2]: [wr_return_quantity#25, i_item_id#28] +Keys [1]: [i_item_id#28] +Functions [1]: [partial_sum(wr_return_quantity#25)] +Aggregate Attributes [1]: [sum#30] +Results [2]: [i_item_id#28, sum#31] (41) Exchange -Input [2]: [i_item_id#32, sum#35] -Arguments: hashpartitioning(i_item_id#32, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [2]: [i_item_id#28, sum#31] +Arguments: hashpartitioning(i_item_id#28, 5), ENSURE_REQUIREMENTS, [plan_id=5] (42) HashAggregate [codegen id : 17] -Input [2]: [i_item_id#32, sum#35] -Keys [1]: [i_item_id#32] -Functions [1]: [sum(wr_return_quantity#29)] -Aggregate Attributes [1]: [sum(wr_return_quantity#29)#37] -Results [2]: [i_item_id#32 AS item_id#38, sum(wr_return_quantity#29)#37 AS wr_item_qty#39] +Input [2]: [i_item_id#28, sum#31] +Keys [1]: [i_item_id#28] +Functions [1]: [sum(wr_return_quantity#25)] +Aggregate Attributes [1]: [sum(wr_return_quantity#25)#32] +Results [2]: [i_item_id#28 AS item_id#33, sum(wr_return_quantity#25)#32 AS wr_item_qty#34] (43) BroadcastExchange -Input [2]: [item_id#38, wr_item_qty#39] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#40] +Input [2]: [item_id#33, wr_item_qty#34] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=6] (44) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [item_id#13] -Right keys [1]: [item_id#38] +Left keys [1]: [item_id#11] +Right keys [1]: [item_id#33] Join condition: None (45) Project [codegen id : 18] -Output [8]: [item_id#13, sr_item_qty#14, (((cast(sr_item_qty#14 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS sr_dev#41, cr_item_qty#26, (((cast(cr_item_qty#26 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS cr_dev#42, wr_item_qty#39, (((cast(wr_item_qty#39 as double) / cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as double)) / 3.0) * 100.0) AS wr_dev#43, CheckOverflow((promote_precision(cast(((sr_item_qty#14 + cr_item_qty#26) + wr_item_qty#39) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#44] -Input [5]: [item_id#13, sr_item_qty#14, cr_item_qty#26, item_id#38, wr_item_qty#39] +Output [8]: [item_id#11, sr_item_qty#12, (((cast(sr_item_qty#12 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS sr_dev#35, cr_item_qty#23, (((cast(cr_item_qty#23 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS cr_dev#36, wr_item_qty#34, (((cast(wr_item_qty#34 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS wr_dev#37, CheckOverflow((promote_precision(cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#38] +Input [5]: [item_id#11, sr_item_qty#12, cr_item_qty#23, item_id#33, wr_item_qty#34] (46) TakeOrderedAndProject -Input [8]: [item_id#13, sr_item_qty#14, sr_dev#41, cr_item_qty#26, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] -Arguments: 100, [item_id#13 ASC NULLS FIRST, sr_item_qty#14 ASC NULLS FIRST], [item_id#13, sr_item_qty#14, sr_dev#41, cr_item_qty#26, cr_dev#42, wr_item_qty#39, wr_dev#43, average#44] +Input [8]: [item_id#11, sr_item_qty#12, sr_dev#35, cr_item_qty#23, cr_dev#36, wr_item_qty#34, wr_dev#37, average#38] +Arguments: 100, [item_id#11 ASC NULLS FIRST, sr_item_qty#12 ASC NULLS FIRST], [item_id#11, sr_item_qty#12, sr_dev#35, cr_item_qty#23, cr_dev#36, wr_item_qty#34, wr_dev#37, average#38] ===== Subqueries ===== @@ -285,77 +285,77 @@ BroadcastExchange (62) (47) Scan parquet default.date_dim -Output [2]: [d_date_sk#8, d_date#45] +Output [2]: [d_date_sk#7, d_date#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date_sk)] ReadSchema: struct (48) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#8, d_date#45] +Input [2]: [d_date_sk#7, d_date#39] (49) Filter [codegen id : 3] -Input [2]: [d_date_sk#8, d_date#45] -Condition : isnotnull(d_date_sk#8) +Input [2]: [d_date_sk#7, d_date#39] +Condition : isnotnull(d_date_sk#7) (50) Scan parquet default.date_dim -Output [2]: [d_date#46, d_week_seq#47] +Output [2]: [d_date#40, d_week_seq#41] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] ReadSchema: struct (51) ColumnarToRow [codegen id : 2] -Input [2]: [d_date#46, d_week_seq#47] +Input [2]: [d_date#40, d_week_seq#41] (52) Scan parquet default.date_dim -Output [2]: [d_date#48, d_week_seq#49] +Output [2]: [d_date#42, d_week_seq#43] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] ReadSchema: struct (53) ColumnarToRow [codegen id : 1] -Input [2]: [d_date#48, d_week_seq#49] +Input [2]: [d_date#42, d_week_seq#43] (54) Filter [codegen id : 1] -Input [2]: [d_date#48, d_week_seq#49] -Condition : cast(d_date#48 as string) IN (2000-06-30,2000-09-27,2000-11-17) +Input [2]: [d_date#42, d_week_seq#43] +Condition : cast(d_date#42 as string) IN (2000-06-30,2000-09-27,2000-11-17) (55) Project [codegen id : 1] -Output [1]: [d_week_seq#49] -Input [2]: [d_date#48, d_week_seq#49] +Output [1]: [d_week_seq#43] +Input [2]: [d_date#42, d_week_seq#43] (56) BroadcastExchange -Input [1]: [d_week_seq#49] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#50] +Input [1]: [d_week_seq#43] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] (57) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [d_week_seq#47] -Right keys [1]: [d_week_seq#49] +Left keys [1]: [d_week_seq#41] +Right keys [1]: [d_week_seq#43] Join condition: None (58) Project [codegen id : 2] -Output [1]: [d_date#46] -Input [2]: [d_date#46, d_week_seq#47] +Output [1]: [d_date#40] +Input [2]: [d_date#40, d_week_seq#41] (59) BroadcastExchange -Input [1]: [d_date#46] -Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [id=#51] +Input [1]: [d_date#40] +Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [plan_id=8] (60) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [d_date#45] -Right keys [1]: [d_date#46] +Left keys [1]: [d_date#39] +Right keys [1]: [d_date#40] Join condition: None (61) Project [codegen id : 3] -Output [1]: [d_date_sk#8] -Input [2]: [d_date_sk#8, d_date#45] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#39] (62) BroadcastExchange -Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52] +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] -Subquery:2 Hosting operator id = 16 Hosting Expression = cr_returned_date_sk#17 IN dynamicpruning#4 +Subquery:2 Hosting operator id = 16 Hosting Expression = cr_returned_date_sk#15 IN dynamicpruning#4 -Subquery:3 Hosting operator id = 31 Hosting Expression = wr_returned_date_sk#30 IN dynamicpruning#4 +Subquery:3 Hosting operator id = 31 Hosting Expression = wr_returned_date_sk#26 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt index 9762d51e943e8..09a1023458e79 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt @@ -72,7 +72,7 @@ Input [2]: [ca_address_sk#7, ca_city#8] (8) BroadcastExchange Input [1]: [ca_address_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 4] Left keys [1]: [c_current_addr_sk#4] @@ -84,122 +84,122 @@ Output [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_firs Input [7]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6, ca_address_sk#7] (11) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Output [2]: [hd_demo_sk#9, hd_income_band_sk#10] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 3] -Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Input [2]: [hd_demo_sk#9, hd_income_band_sk#10] (13) Filter [codegen id : 3] -Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] -Condition : (isnotnull(hd_demo_sk#10) AND isnotnull(hd_income_band_sk#11)) +Input [2]: [hd_demo_sk#9, hd_income_band_sk#10] +Condition : (isnotnull(hd_demo_sk#9) AND isnotnull(hd_income_band_sk#10)) (14) Scan parquet default.income_band -Output [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Output [3]: [ib_income_band_sk#11, ib_lower_bound#12, ib_upper_bound#13] Batched: true Location [not included in comparison]/{warehouse_dir}/income_band] PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), LessThanOrEqual(ib_upper_bound,88128), IsNotNull(ib_income_band_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 2] -Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Input [3]: [ib_income_band_sk#11, ib_lower_bound#12, ib_upper_bound#13] (16) Filter [codegen id : 2] -Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] -Condition : ((((isnotnull(ib_lower_bound#13) AND isnotnull(ib_upper_bound#14)) AND (ib_lower_bound#13 >= 38128)) AND (ib_upper_bound#14 <= 88128)) AND isnotnull(ib_income_band_sk#12)) +Input [3]: [ib_income_band_sk#11, ib_lower_bound#12, ib_upper_bound#13] +Condition : ((((isnotnull(ib_lower_bound#12) AND isnotnull(ib_upper_bound#13)) AND (ib_lower_bound#12 >= 38128)) AND (ib_upper_bound#13 <= 88128)) AND isnotnull(ib_income_band_sk#11)) (17) Project [codegen id : 2] -Output [1]: [ib_income_band_sk#12] -Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Output [1]: [ib_income_band_sk#11] +Input [3]: [ib_income_band_sk#11, ib_lower_bound#12, ib_upper_bound#13] (18) BroadcastExchange -Input [1]: [ib_income_band_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Input [1]: [ib_income_band_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [hd_income_band_sk#11] -Right keys [1]: [ib_income_band_sk#12] +Left keys [1]: [hd_income_band_sk#10] +Right keys [1]: [ib_income_band_sk#11] Join condition: None (20) Project [codegen id : 3] -Output [1]: [hd_demo_sk#10] -Input [3]: [hd_demo_sk#10, hd_income_band_sk#11, ib_income_band_sk#12] +Output [1]: [hd_demo_sk#9] +Input [3]: [hd_demo_sk#9, hd_income_band_sk#10, ib_income_band_sk#11] (21) BroadcastExchange -Input [1]: [hd_demo_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] +Input [1]: [hd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (22) BroadcastHashJoin [codegen id : 4] Left keys [1]: [c_current_hdemo_sk#3] -Right keys [1]: [hd_demo_sk#10] +Right keys [1]: [hd_demo_sk#9] Join condition: None (23) Project [codegen id : 4] Output [4]: [c_customer_id#1, c_current_cdemo_sk#2, c_first_name#5, c_last_name#6] -Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, hd_demo_sk#10] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, hd_demo_sk#9] (24) BroadcastExchange Input [4]: [c_customer_id#1, c_current_cdemo_sk#2, c_first_name#5, c_last_name#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=4] (25) Scan parquet default.customer_demographics -Output [1]: [cd_demo_sk#18] +Output [1]: [cd_demo_sk#14] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (26) ColumnarToRow -Input [1]: [cd_demo_sk#18] +Input [1]: [cd_demo_sk#14] (27) Filter -Input [1]: [cd_demo_sk#18] -Condition : isnotnull(cd_demo_sk#18) +Input [1]: [cd_demo_sk#14] +Condition : isnotnull(cd_demo_sk#14) (28) BroadcastHashJoin [codegen id : 5] Left keys [1]: [c_current_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#18] +Right keys [1]: [cd_demo_sk#14] Join condition: None (29) Project [codegen id : 5] -Output [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#18] -Input [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_first_name#5, c_last_name#6, cd_demo_sk#18] +Output [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#14] +Input [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_first_name#5, c_last_name#6, cd_demo_sk#14] (30) BroadcastExchange -Input [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[3, int, true] as bigint)),false), [id=#19] +Input [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[3, int, true] as bigint)),false), [plan_id=5] (31) Scan parquet default.store_returns -Output [2]: [sr_cdemo_sk#20, sr_returned_date_sk#21] +Output [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_cdemo_sk)] ReadSchema: struct (32) ColumnarToRow -Input [2]: [sr_cdemo_sk#20, sr_returned_date_sk#21] +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] (33) Filter -Input [2]: [sr_cdemo_sk#20, sr_returned_date_sk#21] -Condition : isnotnull(sr_cdemo_sk#20) +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] +Condition : isnotnull(sr_cdemo_sk#15) (34) Project -Output [1]: [sr_cdemo_sk#20] -Input [2]: [sr_cdemo_sk#20, sr_returned_date_sk#21] +Output [1]: [sr_cdemo_sk#15] +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] (35) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cd_demo_sk#18] -Right keys [1]: [sr_cdemo_sk#20] +Left keys [1]: [cd_demo_sk#14] +Right keys [1]: [sr_cdemo_sk#15] Join condition: None (36) Project [codegen id : 6] -Output [3]: [c_customer_id#1 AS customer_id#22, concat(c_last_name#6, , , c_first_name#5) AS customername#23, c_customer_id#1] -Input [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#18, sr_cdemo_sk#20] +Output [3]: [c_customer_id#1 AS customer_id#17, concat(c_last_name#6, , , c_first_name#5) AS customername#18, c_customer_id#1] +Input [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#14, sr_cdemo_sk#15] (37) TakeOrderedAndProject -Input [3]: [customer_id#22, customername#23, c_customer_id#1] -Arguments: 100, [c_customer_id#1 ASC NULLS FIRST], [customer_id#22, customername#23] +Input [3]: [customer_id#17, customername#18, c_customer_id#1] +Arguments: 100, [c_customer_id#1 ASC NULLS FIRST], [customer_id#17, customername#18] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt index d5b84f52f4b00..4034953dc0b8c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt @@ -72,7 +72,7 @@ Input [2]: [ca_address_sk#7, ca_city#8] (8) BroadcastExchange Input [1]: [ca_address_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 5] Left keys [1]: [c_current_addr_sk#4] @@ -84,122 +84,122 @@ Output [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_firs Input [7]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6, ca_address_sk#7] (11) Scan parquet default.customer_demographics -Output [1]: [cd_demo_sk#10] +Output [1]: [cd_demo_sk#9] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [1]: [cd_demo_sk#10] +Input [1]: [cd_demo_sk#9] (13) Filter [codegen id : 2] -Input [1]: [cd_demo_sk#10] -Condition : isnotnull(cd_demo_sk#10) +Input [1]: [cd_demo_sk#9] +Condition : isnotnull(cd_demo_sk#9) (14) BroadcastExchange -Input [1]: [cd_demo_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Input [1]: [cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 5] Left keys [1]: [c_current_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#10] +Right keys [1]: [cd_demo_sk#9] Join condition: None (16) Project [codegen id : 5] -Output [5]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#10] -Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#10] +Output [5]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9] (17) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#12, hd_income_band_sk#13] +Output [2]: [hd_demo_sk#10, hd_income_band_sk#11] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] ReadSchema: struct (18) ColumnarToRow [codegen id : 3] -Input [2]: [hd_demo_sk#12, hd_income_band_sk#13] +Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] (19) Filter [codegen id : 3] -Input [2]: [hd_demo_sk#12, hd_income_band_sk#13] -Condition : (isnotnull(hd_demo_sk#12) AND isnotnull(hd_income_band_sk#13)) +Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Condition : (isnotnull(hd_demo_sk#10) AND isnotnull(hd_income_band_sk#11)) (20) BroadcastExchange -Input [2]: [hd_demo_sk#12, hd_income_band_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Input [2]: [hd_demo_sk#10, hd_income_band_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (21) BroadcastHashJoin [codegen id : 5] Left keys [1]: [c_current_hdemo_sk#3] -Right keys [1]: [hd_demo_sk#12] +Right keys [1]: [hd_demo_sk#10] Join condition: None (22) Project [codegen id : 5] -Output [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#10, hd_income_band_sk#13] -Input [7]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#10, hd_demo_sk#12, hd_income_band_sk#13] +Output [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11] +Input [7]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_demo_sk#10, hd_income_band_sk#11] (23) Scan parquet default.income_band -Output [3]: [ib_income_band_sk#15, ib_lower_bound#16, ib_upper_bound#17] +Output [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] Batched: true Location [not included in comparison]/{warehouse_dir}/income_band] PushedFilters: [IsNotNull(ib_lower_bound), IsNotNull(ib_upper_bound), GreaterThanOrEqual(ib_lower_bound,38128), LessThanOrEqual(ib_upper_bound,88128), IsNotNull(ib_income_band_sk)] ReadSchema: struct (24) ColumnarToRow [codegen id : 4] -Input [3]: [ib_income_band_sk#15, ib_lower_bound#16, ib_upper_bound#17] +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] (25) Filter [codegen id : 4] -Input [3]: [ib_income_band_sk#15, ib_lower_bound#16, ib_upper_bound#17] -Condition : ((((isnotnull(ib_lower_bound#16) AND isnotnull(ib_upper_bound#17)) AND (ib_lower_bound#16 >= 38128)) AND (ib_upper_bound#17 <= 88128)) AND isnotnull(ib_income_band_sk#15)) +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] +Condition : ((((isnotnull(ib_lower_bound#13) AND isnotnull(ib_upper_bound#14)) AND (ib_lower_bound#13 >= 38128)) AND (ib_upper_bound#14 <= 88128)) AND isnotnull(ib_income_band_sk#12)) (26) Project [codegen id : 4] -Output [1]: [ib_income_band_sk#15] -Input [3]: [ib_income_band_sk#15, ib_lower_bound#16, ib_upper_bound#17] +Output [1]: [ib_income_band_sk#12] +Input [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14] (27) BroadcastExchange -Input [1]: [ib_income_band_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [1]: [ib_income_band_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (28) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [hd_income_band_sk#13] -Right keys [1]: [ib_income_band_sk#15] +Left keys [1]: [hd_income_band_sk#11] +Right keys [1]: [ib_income_band_sk#12] Join condition: None (29) Project [codegen id : 5] -Output [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#10] -Input [6]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#10, hd_income_band_sk#13, ib_income_band_sk#15] +Output [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Input [6]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11, ib_income_band_sk#12] (30) BroadcastExchange -Input [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[3, int, true] as bigint)),false), [id=#19] +Input [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[3, int, true] as bigint)),false), [plan_id=5] (31) Scan parquet default.store_returns -Output [2]: [sr_cdemo_sk#20, sr_returned_date_sk#21] +Output [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_cdemo_sk)] ReadSchema: struct (32) ColumnarToRow -Input [2]: [sr_cdemo_sk#20, sr_returned_date_sk#21] +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] (33) Filter -Input [2]: [sr_cdemo_sk#20, sr_returned_date_sk#21] -Condition : isnotnull(sr_cdemo_sk#20) +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] +Condition : isnotnull(sr_cdemo_sk#15) (34) Project -Output [1]: [sr_cdemo_sk#20] -Input [2]: [sr_cdemo_sk#20, sr_returned_date_sk#21] +Output [1]: [sr_cdemo_sk#15] +Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16] (35) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cd_demo_sk#10] -Right keys [1]: [sr_cdemo_sk#20] +Left keys [1]: [cd_demo_sk#9] +Right keys [1]: [sr_cdemo_sk#15] Join condition: None (36) Project [codegen id : 6] -Output [3]: [c_customer_id#1 AS customer_id#22, concat(c_last_name#6, , , c_first_name#5) AS customername#23, c_customer_id#1] -Input [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#10, sr_cdemo_sk#20] +Output [3]: [c_customer_id#1 AS customer_id#17, concat(c_last_name#6, , , c_first_name#5) AS customername#18, c_customer_id#1] +Input [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, sr_cdemo_sk#15] (37) TakeOrderedAndProject -Input [3]: [customer_id#22, customername#23, c_customer_id#1] -Arguments: 100, [c_customer_id#1 ASC NULLS FIRST], [customer_id#22, customername#23] +Input [3]: [customer_id#17, customername#18, c_customer_id#1] +Arguments: 100, [c_customer_id#1 ASC NULLS FIRST], [customer_id#17, customername#18] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt index 82f2b017b6bf5..b4114f6eaa4a6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt @@ -86,7 +86,7 @@ Condition : isnotnull(wp_web_page_sk#9) (7) BroadcastExchange Input [1]: [wp_web_page_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 2] Left keys [1]: [ws_web_page_sk#2] @@ -99,204 +99,204 @@ Input [8]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws (10) Exchange Input [6]: [ws_item_sk#1, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] -Arguments: hashpartitioning(ws_item_sk#1, ws_order_number#3, 5), ENSURE_REQUIREMENTS, [id=#11] +Arguments: hashpartitioning(ws_item_sk#1, ws_order_number#3, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 3] Input [6]: [ws_item_sk#1, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] Arguments: [ws_item_sk#1 ASC NULLS FIRST, ws_order_number#3 ASC NULLS FIRST], false, 0 (12) Scan parquet default.web_returns -Output [9]: [wr_item_sk#12, wr_refunded_cdemo_sk#13, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#15, wr_reason_sk#16, wr_order_number#17, wr_fee#18, wr_refunded_cash#19, wr_returned_date_sk#20] +Output [9]: [wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17, wr_returned_date_sk#18] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number), IsNotNull(wr_refunded_cdemo_sk), IsNotNull(wr_returning_cdemo_sk), IsNotNull(wr_refunded_addr_sk), IsNotNull(wr_reason_sk)] ReadSchema: struct (13) ColumnarToRow [codegen id : 4] -Input [9]: [wr_item_sk#12, wr_refunded_cdemo_sk#13, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#15, wr_reason_sk#16, wr_order_number#17, wr_fee#18, wr_refunded_cash#19, wr_returned_date_sk#20] +Input [9]: [wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17, wr_returned_date_sk#18] (14) Filter [codegen id : 4] -Input [9]: [wr_item_sk#12, wr_refunded_cdemo_sk#13, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#15, wr_reason_sk#16, wr_order_number#17, wr_fee#18, wr_refunded_cash#19, wr_returned_date_sk#20] -Condition : (((((isnotnull(wr_item_sk#12) AND isnotnull(wr_order_number#17)) AND isnotnull(wr_refunded_cdemo_sk#13)) AND isnotnull(wr_returning_cdemo_sk#15)) AND isnotnull(wr_refunded_addr_sk#14)) AND isnotnull(wr_reason_sk#16)) +Input [9]: [wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17, wr_returned_date_sk#18] +Condition : (((((isnotnull(wr_item_sk#10) AND isnotnull(wr_order_number#15)) AND isnotnull(wr_refunded_cdemo_sk#11)) AND isnotnull(wr_returning_cdemo_sk#13)) AND isnotnull(wr_refunded_addr_sk#12)) AND isnotnull(wr_reason_sk#14)) (15) Project [codegen id : 4] -Output [8]: [wr_item_sk#12, wr_refunded_cdemo_sk#13, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#15, wr_reason_sk#16, wr_order_number#17, wr_fee#18, wr_refunded_cash#19] -Input [9]: [wr_item_sk#12, wr_refunded_cdemo_sk#13, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#15, wr_reason_sk#16, wr_order_number#17, wr_fee#18, wr_refunded_cash#19, wr_returned_date_sk#20] +Output [8]: [wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17] +Input [9]: [wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17, wr_returned_date_sk#18] (16) Exchange -Input [8]: [wr_item_sk#12, wr_refunded_cdemo_sk#13, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#15, wr_reason_sk#16, wr_order_number#17, wr_fee#18, wr_refunded_cash#19] -Arguments: hashpartitioning(wr_item_sk#12, wr_order_number#17, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [8]: [wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17] +Arguments: hashpartitioning(wr_item_sk#10, wr_order_number#15, 5), ENSURE_REQUIREMENTS, [plan_id=3] (17) Sort [codegen id : 5] -Input [8]: [wr_item_sk#12, wr_refunded_cdemo_sk#13, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#15, wr_reason_sk#16, wr_order_number#17, wr_fee#18, wr_refunded_cash#19] -Arguments: [wr_item_sk#12 ASC NULLS FIRST, wr_order_number#17 ASC NULLS FIRST], false, 0 +Input [8]: [wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17] +Arguments: [wr_item_sk#10 ASC NULLS FIRST, wr_order_number#15 ASC NULLS FIRST], false, 0 (18) SortMergeJoin [codegen id : 7] Left keys [2]: [ws_item_sk#1, ws_order_number#3] -Right keys [2]: [wr_item_sk#12, wr_order_number#17] +Right keys [2]: [wr_item_sk#10, wr_order_number#15] Join condition: None (19) Project [codegen id : 7] -Output [10]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#13, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#15, wr_reason_sk#16, wr_fee#18, wr_refunded_cash#19] -Input [14]: [ws_item_sk#1, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_item_sk#12, wr_refunded_cdemo_sk#13, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#15, wr_reason_sk#16, wr_order_number#17, wr_fee#18, wr_refunded_cash#19] +Output [10]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17] +Input [14]: [ws_item_sk#1, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17] (20) Scan parquet default.customer_demographics -Output [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] +Output [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree )),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College ))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree )))] ReadSchema: struct (21) ColumnarToRow [codegen id : 6] -Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] +Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] (22) Filter [codegen id : 6] -Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] -Condition : (((isnotnull(cd_demo_sk#22) AND isnotnull(cd_marital_status#23)) AND isnotnull(cd_education_status#24)) AND ((((cd_marital_status#23 = M) AND (cd_education_status#24 = Advanced Degree )) OR ((cd_marital_status#23 = S) AND (cd_education_status#24 = College ))) OR ((cd_marital_status#23 = W) AND (cd_education_status#24 = 2 yr Degree )))) +Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] +Condition : (((isnotnull(cd_demo_sk#19) AND isnotnull(cd_marital_status#20)) AND isnotnull(cd_education_status#21)) AND ((((cd_marital_status#20 = M) AND (cd_education_status#21 = Advanced Degree )) OR ((cd_marital_status#20 = S) AND (cd_education_status#21 = College ))) OR ((cd_marital_status#20 = W) AND (cd_education_status#21 = 2 yr Degree )))) (23) BroadcastExchange -Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] +Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (24) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [wr_refunded_cdemo_sk#13] -Right keys [1]: [cd_demo_sk#22] -Join condition: ((((((cd_marital_status#23 = M) AND (cd_education_status#24 = Advanced Degree )) AND (ws_sales_price#5 >= 100.00)) AND (ws_sales_price#5 <= 150.00)) OR ((((cd_marital_status#23 = S) AND (cd_education_status#24 = College )) AND (ws_sales_price#5 >= 50.00)) AND (ws_sales_price#5 <= 100.00))) OR ((((cd_marital_status#23 = W) AND (cd_education_status#24 = 2 yr Degree )) AND (ws_sales_price#5 >= 150.00)) AND (ws_sales_price#5 <= 200.00))) +Left keys [1]: [wr_refunded_cdemo_sk#11] +Right keys [1]: [cd_demo_sk#19] +Join condition: ((((((cd_marital_status#20 = M) AND (cd_education_status#21 = Advanced Degree )) AND (ws_sales_price#5 >= 100.00)) AND (ws_sales_price#5 <= 150.00)) OR ((((cd_marital_status#20 = S) AND (cd_education_status#21 = College )) AND (ws_sales_price#5 >= 50.00)) AND (ws_sales_price#5 <= 100.00))) OR ((((cd_marital_status#20 = W) AND (cd_education_status#21 = 2 yr Degree )) AND (ws_sales_price#5 >= 150.00)) AND (ws_sales_price#5 <= 200.00))) (25) Project [codegen id : 7] -Output [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#15, wr_reason_sk#16, wr_fee#18, wr_refunded_cash#19, cd_marital_status#23, cd_education_status#24] -Input [13]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#13, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#15, wr_reason_sk#16, wr_fee#18, wr_refunded_cash#19, cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] +Output [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_marital_status#20, cd_education_status#21] +Input [13]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] (26) Exchange -Input [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#15, wr_reason_sk#16, wr_fee#18, wr_refunded_cash#19, cd_marital_status#23, cd_education_status#24] -Arguments: hashpartitioning(wr_returning_cdemo_sk#15, cd_marital_status#23, cd_education_status#24, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_marital_status#20, cd_education_status#21] +Arguments: hashpartitioning(wr_returning_cdemo_sk#13, cd_marital_status#20, cd_education_status#21, 5), ENSURE_REQUIREMENTS, [plan_id=5] (27) Sort [codegen id : 8] -Input [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#15, wr_reason_sk#16, wr_fee#18, wr_refunded_cash#19, cd_marital_status#23, cd_education_status#24] -Arguments: [wr_returning_cdemo_sk#15 ASC NULLS FIRST, cd_marital_status#23 ASC NULLS FIRST, cd_education_status#24 ASC NULLS FIRST], false, 0 +Input [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_marital_status#20, cd_education_status#21] +Arguments: [wr_returning_cdemo_sk#13 ASC NULLS FIRST, cd_marital_status#20 ASC NULLS FIRST, cd_education_status#21 ASC NULLS FIRST], false, 0 (28) Scan parquet default.customer_demographics -Output [3]: [cd_demo_sk#27, cd_marital_status#28, cd_education_status#29] +Output [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status)] ReadSchema: struct (29) ColumnarToRow [codegen id : 9] -Input [3]: [cd_demo_sk#27, cd_marital_status#28, cd_education_status#29] +Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] (30) Filter [codegen id : 9] -Input [3]: [cd_demo_sk#27, cd_marital_status#28, cd_education_status#29] -Condition : ((isnotnull(cd_demo_sk#27) AND isnotnull(cd_marital_status#28)) AND isnotnull(cd_education_status#29)) +Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] +Condition : ((isnotnull(cd_demo_sk#22) AND isnotnull(cd_marital_status#23)) AND isnotnull(cd_education_status#24)) (31) Exchange -Input [3]: [cd_demo_sk#27, cd_marital_status#28, cd_education_status#29] -Arguments: hashpartitioning(cd_demo_sk#27, cd_marital_status#28, cd_education_status#29, 5), ENSURE_REQUIREMENTS, [id=#30] +Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] +Arguments: hashpartitioning(cd_demo_sk#22, cd_marital_status#23, cd_education_status#24, 5), ENSURE_REQUIREMENTS, [plan_id=6] (32) Sort [codegen id : 10] -Input [3]: [cd_demo_sk#27, cd_marital_status#28, cd_education_status#29] -Arguments: [cd_demo_sk#27 ASC NULLS FIRST, cd_marital_status#28 ASC NULLS FIRST, cd_education_status#29 ASC NULLS FIRST], false, 0 +Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] +Arguments: [cd_demo_sk#22 ASC NULLS FIRST, cd_marital_status#23 ASC NULLS FIRST, cd_education_status#24 ASC NULLS FIRST], false, 0 (33) SortMergeJoin [codegen id : 14] -Left keys [3]: [wr_returning_cdemo_sk#15, cd_marital_status#23, cd_education_status#24] -Right keys [3]: [cd_demo_sk#27, cd_marital_status#28, cd_education_status#29] +Left keys [3]: [wr_returning_cdemo_sk#13, cd_marital_status#20, cd_education_status#21] +Right keys [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] Join condition: None (34) Project [codegen id : 14] -Output [7]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#14, wr_reason_sk#16, wr_fee#18, wr_refunded_cash#19] -Input [13]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#14, wr_returning_cdemo_sk#15, wr_reason_sk#16, wr_fee#18, wr_refunded_cash#19, cd_marital_status#23, cd_education_status#24, cd_demo_sk#27, cd_marital_status#28, cd_education_status#29] +Output [7]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17] +Input [13]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_marital_status#20, cd_education_status#21, cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] (35) Scan parquet default.customer_address -Output [3]: [ca_address_sk#31, ca_state#32, ca_country#33] +Output [3]: [ca_address_sk#25, ca_state#26, ca_country#27] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [IN,NJ,OH]),In(ca_state, [CT,KY,WI])),In(ca_state, [AR,IA,LA]))] ReadSchema: struct (36) ColumnarToRow [codegen id : 11] -Input [3]: [ca_address_sk#31, ca_state#32, ca_country#33] +Input [3]: [ca_address_sk#25, ca_state#26, ca_country#27] (37) Filter [codegen id : 11] -Input [3]: [ca_address_sk#31, ca_state#32, ca_country#33] -Condition : (((isnotnull(ca_country#33) AND (ca_country#33 = United States)) AND isnotnull(ca_address_sk#31)) AND ((ca_state#32 IN (IN,OH,NJ) OR ca_state#32 IN (WI,CT,KY)) OR ca_state#32 IN (LA,IA,AR))) +Input [3]: [ca_address_sk#25, ca_state#26, ca_country#27] +Condition : (((isnotnull(ca_country#27) AND (ca_country#27 = United States)) AND isnotnull(ca_address_sk#25)) AND ((ca_state#26 IN (IN,OH,NJ) OR ca_state#26 IN (WI,CT,KY)) OR ca_state#26 IN (LA,IA,AR))) (38) Project [codegen id : 11] -Output [2]: [ca_address_sk#31, ca_state#32] -Input [3]: [ca_address_sk#31, ca_state#32, ca_country#33] +Output [2]: [ca_address_sk#25, ca_state#26] +Input [3]: [ca_address_sk#25, ca_state#26, ca_country#27] (39) BroadcastExchange -Input [2]: [ca_address_sk#31, ca_state#32] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#34] +Input [2]: [ca_address_sk#25, ca_state#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] (40) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [wr_refunded_addr_sk#14] -Right keys [1]: [ca_address_sk#31] -Join condition: ((((ca_state#32 IN (IN,OH,NJ) AND (ws_net_profit#6 >= 100.00)) AND (ws_net_profit#6 <= 200.00)) OR ((ca_state#32 IN (WI,CT,KY) AND (ws_net_profit#6 >= 150.00)) AND (ws_net_profit#6 <= 300.00))) OR ((ca_state#32 IN (LA,IA,AR) AND (ws_net_profit#6 >= 50.00)) AND (ws_net_profit#6 <= 250.00))) +Left keys [1]: [wr_refunded_addr_sk#12] +Right keys [1]: [ca_address_sk#25] +Join condition: ((((ca_state#26 IN (IN,OH,NJ) AND (ws_net_profit#6 >= 100.00)) AND (ws_net_profit#6 <= 200.00)) OR ((ca_state#26 IN (WI,CT,KY) AND (ws_net_profit#6 >= 150.00)) AND (ws_net_profit#6 <= 300.00))) OR ((ca_state#26 IN (LA,IA,AR) AND (ws_net_profit#6 >= 50.00)) AND (ws_net_profit#6 <= 250.00))) (41) Project [codegen id : 14] -Output [5]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#16, wr_fee#18, wr_refunded_cash#19] -Input [9]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#14, wr_reason_sk#16, wr_fee#18, wr_refunded_cash#19, ca_address_sk#31, ca_state#32] +Output [5]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17] +Input [9]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, ca_address_sk#25, ca_state#26] (42) ReusedExchange [Reuses operator id: 59] -Output [1]: [d_date_sk#35] +Output [1]: [d_date_sk#28] (43) BroadcastHashJoin [codegen id : 14] Left keys [1]: [ws_sold_date_sk#7] -Right keys [1]: [d_date_sk#35] +Right keys [1]: [d_date_sk#28] Join condition: None (44) Project [codegen id : 14] -Output [4]: [ws_quantity#4, wr_reason_sk#16, wr_fee#18, wr_refunded_cash#19] -Input [6]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#16, wr_fee#18, wr_refunded_cash#19, d_date_sk#35] +Output [4]: [ws_quantity#4, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17] +Input [6]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, d_date_sk#28] (45) Scan parquet default.reason -Output [2]: [r_reason_sk#36, r_reason_desc#37] +Output [2]: [r_reason_sk#29, r_reason_desc#30] Batched: true Location [not included in comparison]/{warehouse_dir}/reason] PushedFilters: [IsNotNull(r_reason_sk)] ReadSchema: struct (46) ColumnarToRow [codegen id : 13] -Input [2]: [r_reason_sk#36, r_reason_desc#37] +Input [2]: [r_reason_sk#29, r_reason_desc#30] (47) Filter [codegen id : 13] -Input [2]: [r_reason_sk#36, r_reason_desc#37] -Condition : isnotnull(r_reason_sk#36) +Input [2]: [r_reason_sk#29, r_reason_desc#30] +Condition : isnotnull(r_reason_sk#29) (48) BroadcastExchange -Input [2]: [r_reason_sk#36, r_reason_desc#37] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#38] +Input [2]: [r_reason_sk#29, r_reason_desc#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] (49) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [wr_reason_sk#16] -Right keys [1]: [r_reason_sk#36] +Left keys [1]: [wr_reason_sk#14] +Right keys [1]: [r_reason_sk#29] Join condition: None (50) Project [codegen id : 14] -Output [4]: [ws_quantity#4, wr_fee#18, wr_refunded_cash#19, r_reason_desc#37] -Input [6]: [ws_quantity#4, wr_reason_sk#16, wr_fee#18, wr_refunded_cash#19, r_reason_sk#36, r_reason_desc#37] +Output [4]: [ws_quantity#4, wr_fee#16, wr_refunded_cash#17, r_reason_desc#30] +Input [6]: [ws_quantity#4, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, r_reason_sk#29, r_reason_desc#30] (51) HashAggregate [codegen id : 14] -Input [4]: [ws_quantity#4, wr_fee#18, wr_refunded_cash#19, r_reason_desc#37] -Keys [1]: [r_reason_desc#37] -Functions [3]: [partial_avg(ws_quantity#4), partial_avg(UnscaledValue(wr_refunded_cash#19)), partial_avg(UnscaledValue(wr_fee#18))] -Aggregate Attributes [6]: [sum#39, count#40, sum#41, count#42, sum#43, count#44] -Results [7]: [r_reason_desc#37, sum#45, count#46, sum#47, count#48, sum#49, count#50] +Input [4]: [ws_quantity#4, wr_fee#16, wr_refunded_cash#17, r_reason_desc#30] +Keys [1]: [r_reason_desc#30] +Functions [3]: [partial_avg(ws_quantity#4), partial_avg(UnscaledValue(wr_refunded_cash#17)), partial_avg(UnscaledValue(wr_fee#16))] +Aggregate Attributes [6]: [sum#31, count#32, sum#33, count#34, sum#35, count#36] +Results [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42] (52) Exchange -Input [7]: [r_reason_desc#37, sum#45, count#46, sum#47, count#48, sum#49, count#50] -Arguments: hashpartitioning(r_reason_desc#37, 5), ENSURE_REQUIREMENTS, [id=#51] +Input [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Arguments: hashpartitioning(r_reason_desc#30, 5), ENSURE_REQUIREMENTS, [plan_id=9] (53) HashAggregate [codegen id : 15] -Input [7]: [r_reason_desc#37, sum#45, count#46, sum#47, count#48, sum#49, count#50] -Keys [1]: [r_reason_desc#37] -Functions [3]: [avg(ws_quantity#4), avg(UnscaledValue(wr_refunded_cash#19)), avg(UnscaledValue(wr_fee#18))] -Aggregate Attributes [3]: [avg(ws_quantity#4)#52, avg(UnscaledValue(wr_refunded_cash#19))#53, avg(UnscaledValue(wr_fee#18))#54] -Results [4]: [substr(r_reason_desc#37, 1, 20) AS substr(r_reason_desc, 1, 20)#55, avg(ws_quantity#4)#52 AS avg(ws_quantity)#56, cast((avg(UnscaledValue(wr_refunded_cash#19))#53 / 100.0) as decimal(11,6)) AS avg(wr_refunded_cash)#57, cast((avg(UnscaledValue(wr_fee#18))#54 / 100.0) as decimal(11,6)) AS avg(wr_fee)#58] +Input [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Keys [1]: [r_reason_desc#30] +Functions [3]: [avg(ws_quantity#4), avg(UnscaledValue(wr_refunded_cash#17)), avg(UnscaledValue(wr_fee#16))] +Aggregate Attributes [3]: [avg(ws_quantity#4)#43, avg(UnscaledValue(wr_refunded_cash#17))#44, avg(UnscaledValue(wr_fee#16))#45] +Results [4]: [substr(r_reason_desc#30, 1, 20) AS substr(r_reason_desc, 1, 20)#46, avg(ws_quantity#4)#43 AS avg(ws_quantity)#47, cast((avg(UnscaledValue(wr_refunded_cash#17))#44 / 100.0) as decimal(11,6)) AS avg(wr_refunded_cash)#48, cast((avg(UnscaledValue(wr_fee#16))#45 / 100.0) as decimal(11,6)) AS avg(wr_fee)#49] (54) TakeOrderedAndProject -Input [4]: [substr(r_reason_desc, 1, 20)#55, avg(ws_quantity)#56, avg(wr_refunded_cash)#57, avg(wr_fee)#58] -Arguments: 100, [substr(r_reason_desc, 1, 20)#55 ASC NULLS FIRST, avg(ws_quantity)#56 ASC NULLS FIRST, avg(wr_refunded_cash)#57 ASC NULLS FIRST, avg(wr_fee)#58 ASC NULLS FIRST], [substr(r_reason_desc, 1, 20)#55, avg(ws_quantity)#56, avg(wr_refunded_cash)#57, avg(wr_fee)#58] +Input [4]: [substr(r_reason_desc, 1, 20)#46, avg(ws_quantity)#47, avg(wr_refunded_cash)#48, avg(wr_fee)#49] +Arguments: 100, [substr(r_reason_desc, 1, 20)#46 ASC NULLS FIRST, avg(ws_quantity)#47 ASC NULLS FIRST, avg(wr_refunded_cash)#48 ASC NULLS FIRST, avg(wr_fee)#49 ASC NULLS FIRST], [substr(r_reason_desc, 1, 20)#46, avg(ws_quantity)#47, avg(wr_refunded_cash)#48, avg(wr_fee)#49] ===== Subqueries ===== @@ -309,25 +309,25 @@ BroadcastExchange (59) (55) Scan parquet default.date_dim -Output [2]: [d_date_sk#35, d_year#59] +Output [2]: [d_date_sk#28, d_year#50] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (56) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#35, d_year#59] +Input [2]: [d_date_sk#28, d_year#50] (57) Filter [codegen id : 1] -Input [2]: [d_date_sk#35, d_year#59] -Condition : ((isnotnull(d_year#59) AND (d_year#59 = 2000)) AND isnotnull(d_date_sk#35)) +Input [2]: [d_date_sk#28, d_year#50] +Condition : ((isnotnull(d_year#50) AND (d_year#50 = 2000)) AND isnotnull(d_date_sk#28)) (58) Project [codegen id : 1] -Output [1]: [d_date_sk#35] -Input [2]: [d_date_sk#35, d_year#59] +Output [1]: [d_date_sk#28] +Input [2]: [d_date_sk#28, d_year#50] (59) BroadcastExchange -Input [1]: [d_date_sk#35] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#60] +Input [1]: [d_date_sk#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt index 6ce464ad0805e..92d777b658b50 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt @@ -66,207 +66,207 @@ Condition : ((((isnotnull(ws_item_sk#1) AND isnotnull(ws_order_number#3)) AND is (4) BroadcastExchange Input [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[2, int, false] as bigint) & 4294967295))),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[2, int, false] as bigint) & 4294967295))),false), [plan_id=1] (5) Scan parquet default.web_returns -Output [9]: [wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17, wr_returned_date_sk#18] +Output [9]: [wr_item_sk#9, wr_refunded_cdemo_sk#10, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_order_number#14, wr_fee#15, wr_refunded_cash#16, wr_returned_date_sk#17] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number), IsNotNull(wr_refunded_cdemo_sk), IsNotNull(wr_returning_cdemo_sk), IsNotNull(wr_refunded_addr_sk), IsNotNull(wr_reason_sk)] ReadSchema: struct (6) ColumnarToRow -Input [9]: [wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17, wr_returned_date_sk#18] +Input [9]: [wr_item_sk#9, wr_refunded_cdemo_sk#10, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_order_number#14, wr_fee#15, wr_refunded_cash#16, wr_returned_date_sk#17] (7) Filter -Input [9]: [wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17, wr_returned_date_sk#18] -Condition : (((((isnotnull(wr_item_sk#10) AND isnotnull(wr_order_number#15)) AND isnotnull(wr_refunded_cdemo_sk#11)) AND isnotnull(wr_returning_cdemo_sk#13)) AND isnotnull(wr_refunded_addr_sk#12)) AND isnotnull(wr_reason_sk#14)) +Input [9]: [wr_item_sk#9, wr_refunded_cdemo_sk#10, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_order_number#14, wr_fee#15, wr_refunded_cash#16, wr_returned_date_sk#17] +Condition : (((((isnotnull(wr_item_sk#9) AND isnotnull(wr_order_number#14)) AND isnotnull(wr_refunded_cdemo_sk#10)) AND isnotnull(wr_returning_cdemo_sk#12)) AND isnotnull(wr_refunded_addr_sk#11)) AND isnotnull(wr_reason_sk#13)) (8) Project -Output [8]: [wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17] -Input [9]: [wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17, wr_returned_date_sk#18] +Output [8]: [wr_item_sk#9, wr_refunded_cdemo_sk#10, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_order_number#14, wr_fee#15, wr_refunded_cash#16] +Input [9]: [wr_item_sk#9, wr_refunded_cdemo_sk#10, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_order_number#14, wr_fee#15, wr_refunded_cash#16, wr_returned_date_sk#17] (9) BroadcastHashJoin [codegen id : 8] Left keys [2]: [ws_item_sk#1, ws_order_number#3] -Right keys [2]: [wr_item_sk#10, wr_order_number#15] +Right keys [2]: [wr_item_sk#9, wr_order_number#14] Join condition: None (10) Project [codegen id : 8] -Output [11]: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17] -Input [15]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17] +Output [11]: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#10, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16] +Input [15]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_item_sk#9, wr_refunded_cdemo_sk#10, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_order_number#14, wr_fee#15, wr_refunded_cash#16] (11) Scan parquet default.web_page -Output [1]: [wp_web_page_sk#19] +Output [1]: [wp_web_page_sk#18] Batched: true Location [not included in comparison]/{warehouse_dir}/web_page] PushedFilters: [IsNotNull(wp_web_page_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [1]: [wp_web_page_sk#19] +Input [1]: [wp_web_page_sk#18] (13) Filter [codegen id : 2] -Input [1]: [wp_web_page_sk#19] -Condition : isnotnull(wp_web_page_sk#19) +Input [1]: [wp_web_page_sk#18] +Condition : isnotnull(wp_web_page_sk#18) (14) BroadcastExchange -Input [1]: [wp_web_page_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] +Input [1]: [wp_web_page_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ws_web_page_sk#2] -Right keys [1]: [wp_web_page_sk#19] +Right keys [1]: [wp_web_page_sk#18] Join condition: None (16) Project [codegen id : 8] -Output [10]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17] -Input [12]: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, wp_web_page_sk#19] +Output [10]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#10, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16] +Input [12]: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#10, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16, wp_web_page_sk#18] (17) Scan parquet default.customer_demographics -Output [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Output [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree )),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College ))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree )))] ReadSchema: struct (18) ColumnarToRow [codegen id : 3] -Input [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] (19) Filter [codegen id : 3] -Input [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] -Condition : (((isnotnull(cd_demo_sk#21) AND isnotnull(cd_marital_status#22)) AND isnotnull(cd_education_status#23)) AND ((((cd_marital_status#22 = M) AND (cd_education_status#23 = Advanced Degree )) OR ((cd_marital_status#22 = S) AND (cd_education_status#23 = College ))) OR ((cd_marital_status#22 = W) AND (cd_education_status#23 = 2 yr Degree )))) +Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] +Condition : (((isnotnull(cd_demo_sk#19) AND isnotnull(cd_marital_status#20)) AND isnotnull(cd_education_status#21)) AND ((((cd_marital_status#20 = M) AND (cd_education_status#21 = Advanced Degree )) OR ((cd_marital_status#20 = S) AND (cd_education_status#21 = College ))) OR ((cd_marital_status#20 = W) AND (cd_education_status#21 = 2 yr Degree )))) (20) BroadcastExchange -Input [3]: [cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#24] +Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (21) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [wr_refunded_cdemo_sk#11] -Right keys [1]: [cd_demo_sk#21] -Join condition: ((((((cd_marital_status#22 = M) AND (cd_education_status#23 = Advanced Degree )) AND (ws_sales_price#5 >= 100.00)) AND (ws_sales_price#5 <= 150.00)) OR ((((cd_marital_status#22 = S) AND (cd_education_status#23 = College )) AND (ws_sales_price#5 >= 50.00)) AND (ws_sales_price#5 <= 100.00))) OR ((((cd_marital_status#22 = W) AND (cd_education_status#23 = 2 yr Degree )) AND (ws_sales_price#5 >= 150.00)) AND (ws_sales_price#5 <= 200.00))) +Left keys [1]: [wr_refunded_cdemo_sk#10] +Right keys [1]: [cd_demo_sk#19] +Join condition: ((((((cd_marital_status#20 = M) AND (cd_education_status#21 = Advanced Degree )) AND (ws_sales_price#5 >= 100.00)) AND (ws_sales_price#5 <= 150.00)) OR ((((cd_marital_status#20 = S) AND (cd_education_status#21 = College )) AND (ws_sales_price#5 >= 50.00)) AND (ws_sales_price#5 <= 100.00))) OR ((((cd_marital_status#20 = W) AND (cd_education_status#21 = 2 yr Degree )) AND (ws_sales_price#5 >= 150.00)) AND (ws_sales_price#5 <= 200.00))) (22) Project [codegen id : 8] -Output [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_marital_status#22, cd_education_status#23] -Input [13]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_demo_sk#21, cd_marital_status#22, cd_education_status#23] +Output [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16, cd_marital_status#20, cd_education_status#21] +Input [13]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#10, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16, cd_demo_sk#19, cd_marital_status#20, cd_education_status#21] (23) Scan parquet default.customer_demographics -Output [3]: [cd_demo_sk#25, cd_marital_status#26, cd_education_status#27] +Output [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status)] ReadSchema: struct (24) ColumnarToRow [codegen id : 4] -Input [3]: [cd_demo_sk#25, cd_marital_status#26, cd_education_status#27] +Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] (25) Filter [codegen id : 4] -Input [3]: [cd_demo_sk#25, cd_marital_status#26, cd_education_status#27] -Condition : ((isnotnull(cd_demo_sk#25) AND isnotnull(cd_marital_status#26)) AND isnotnull(cd_education_status#27)) +Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] +Condition : ((isnotnull(cd_demo_sk#22) AND isnotnull(cd_marital_status#23)) AND isnotnull(cd_education_status#24)) (26) BroadcastExchange -Input [3]: [cd_demo_sk#25, cd_marital_status#26, cd_education_status#27] -Arguments: HashedRelationBroadcastMode(List(input[0, int, false], input[1, string, false], input[2, string, false]),false), [id=#28] +Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] +Arguments: HashedRelationBroadcastMode(List(input[0, int, false], input[1, string, false], input[2, string, false]),false), [plan_id=4] (27) BroadcastHashJoin [codegen id : 8] -Left keys [3]: [wr_returning_cdemo_sk#13, cd_marital_status#22, cd_education_status#23] -Right keys [3]: [cd_demo_sk#25, cd_marital_status#26, cd_education_status#27] +Left keys [3]: [wr_returning_cdemo_sk#12, cd_marital_status#20, cd_education_status#21] +Right keys [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] Join condition: None (28) Project [codegen id : 8] -Output [7]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17] -Input [13]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_marital_status#22, cd_education_status#23, cd_demo_sk#25, cd_marital_status#26, cd_education_status#27] +Output [7]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#11, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16] +Input [13]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16, cd_marital_status#20, cd_education_status#21, cd_demo_sk#22, cd_marital_status#23, cd_education_status#24] (29) Scan parquet default.customer_address -Output [3]: [ca_address_sk#29, ca_state#30, ca_country#31] +Output [3]: [ca_address_sk#25, ca_state#26, ca_country#27] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [IN,NJ,OH]),In(ca_state, [CT,KY,WI])),In(ca_state, [AR,IA,LA]))] ReadSchema: struct (30) ColumnarToRow [codegen id : 5] -Input [3]: [ca_address_sk#29, ca_state#30, ca_country#31] +Input [3]: [ca_address_sk#25, ca_state#26, ca_country#27] (31) Filter [codegen id : 5] -Input [3]: [ca_address_sk#29, ca_state#30, ca_country#31] -Condition : (((isnotnull(ca_country#31) AND (ca_country#31 = United States)) AND isnotnull(ca_address_sk#29)) AND ((ca_state#30 IN (IN,OH,NJ) OR ca_state#30 IN (WI,CT,KY)) OR ca_state#30 IN (LA,IA,AR))) +Input [3]: [ca_address_sk#25, ca_state#26, ca_country#27] +Condition : (((isnotnull(ca_country#27) AND (ca_country#27 = United States)) AND isnotnull(ca_address_sk#25)) AND ((ca_state#26 IN (IN,OH,NJ) OR ca_state#26 IN (WI,CT,KY)) OR ca_state#26 IN (LA,IA,AR))) (32) Project [codegen id : 5] -Output [2]: [ca_address_sk#29, ca_state#30] -Input [3]: [ca_address_sk#29, ca_state#30, ca_country#31] +Output [2]: [ca_address_sk#25, ca_state#26] +Input [3]: [ca_address_sk#25, ca_state#26, ca_country#27] (33) BroadcastExchange -Input [2]: [ca_address_sk#29, ca_state#30] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#32] +Input [2]: [ca_address_sk#25, ca_state#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (34) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [wr_refunded_addr_sk#12] -Right keys [1]: [ca_address_sk#29] -Join condition: ((((ca_state#30 IN (IN,OH,NJ) AND (ws_net_profit#6 >= 100.00)) AND (ws_net_profit#6 <= 200.00)) OR ((ca_state#30 IN (WI,CT,KY) AND (ws_net_profit#6 >= 150.00)) AND (ws_net_profit#6 <= 300.00))) OR ((ca_state#30 IN (LA,IA,AR) AND (ws_net_profit#6 >= 50.00)) AND (ws_net_profit#6 <= 250.00))) +Left keys [1]: [wr_refunded_addr_sk#11] +Right keys [1]: [ca_address_sk#25] +Join condition: ((((ca_state#26 IN (IN,OH,NJ) AND (ws_net_profit#6 >= 100.00)) AND (ws_net_profit#6 <= 200.00)) OR ((ca_state#26 IN (WI,CT,KY) AND (ws_net_profit#6 >= 150.00)) AND (ws_net_profit#6 <= 300.00))) OR ((ca_state#26 IN (LA,IA,AR) AND (ws_net_profit#6 >= 50.00)) AND (ws_net_profit#6 <= 250.00))) (35) Project [codegen id : 8] -Output [5]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17] -Input [9]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, ca_address_sk#29, ca_state#30] +Output [5]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16] +Input [9]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#11, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16, ca_address_sk#25, ca_state#26] (36) ReusedExchange [Reuses operator id: 53] -Output [1]: [d_date_sk#33] +Output [1]: [d_date_sk#28] (37) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ws_sold_date_sk#7] -Right keys [1]: [d_date_sk#33] +Right keys [1]: [d_date_sk#28] Join condition: None (38) Project [codegen id : 8] -Output [4]: [ws_quantity#4, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17] -Input [6]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, d_date_sk#33] +Output [4]: [ws_quantity#4, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16] +Input [6]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16, d_date_sk#28] (39) Scan parquet default.reason -Output [2]: [r_reason_sk#34, r_reason_desc#35] +Output [2]: [r_reason_sk#29, r_reason_desc#30] Batched: true Location [not included in comparison]/{warehouse_dir}/reason] PushedFilters: [IsNotNull(r_reason_sk)] ReadSchema: struct (40) ColumnarToRow [codegen id : 7] -Input [2]: [r_reason_sk#34, r_reason_desc#35] +Input [2]: [r_reason_sk#29, r_reason_desc#30] (41) Filter [codegen id : 7] -Input [2]: [r_reason_sk#34, r_reason_desc#35] -Condition : isnotnull(r_reason_sk#34) +Input [2]: [r_reason_sk#29, r_reason_desc#30] +Condition : isnotnull(r_reason_sk#29) (42) BroadcastExchange -Input [2]: [r_reason_sk#34, r_reason_desc#35] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#36] +Input [2]: [r_reason_sk#29, r_reason_desc#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] (43) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [wr_reason_sk#14] -Right keys [1]: [r_reason_sk#34] +Left keys [1]: [wr_reason_sk#13] +Right keys [1]: [r_reason_sk#29] Join condition: None (44) Project [codegen id : 8] -Output [4]: [ws_quantity#4, wr_fee#16, wr_refunded_cash#17, r_reason_desc#35] -Input [6]: [ws_quantity#4, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, r_reason_sk#34, r_reason_desc#35] +Output [4]: [ws_quantity#4, wr_fee#15, wr_refunded_cash#16, r_reason_desc#30] +Input [6]: [ws_quantity#4, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16, r_reason_sk#29, r_reason_desc#30] (45) HashAggregate [codegen id : 8] -Input [4]: [ws_quantity#4, wr_fee#16, wr_refunded_cash#17, r_reason_desc#35] -Keys [1]: [r_reason_desc#35] -Functions [3]: [partial_avg(ws_quantity#4), partial_avg(UnscaledValue(wr_refunded_cash#17)), partial_avg(UnscaledValue(wr_fee#16))] -Aggregate Attributes [6]: [sum#37, count#38, sum#39, count#40, sum#41, count#42] -Results [7]: [r_reason_desc#35, sum#43, count#44, sum#45, count#46, sum#47, count#48] +Input [4]: [ws_quantity#4, wr_fee#15, wr_refunded_cash#16, r_reason_desc#30] +Keys [1]: [r_reason_desc#30] +Functions [3]: [partial_avg(ws_quantity#4), partial_avg(UnscaledValue(wr_refunded_cash#16)), partial_avg(UnscaledValue(wr_fee#15))] +Aggregate Attributes [6]: [sum#31, count#32, sum#33, count#34, sum#35, count#36] +Results [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42] (46) Exchange -Input [7]: [r_reason_desc#35, sum#43, count#44, sum#45, count#46, sum#47, count#48] -Arguments: hashpartitioning(r_reason_desc#35, 5), ENSURE_REQUIREMENTS, [id=#49] +Input [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Arguments: hashpartitioning(r_reason_desc#30, 5), ENSURE_REQUIREMENTS, [plan_id=7] (47) HashAggregate [codegen id : 9] -Input [7]: [r_reason_desc#35, sum#43, count#44, sum#45, count#46, sum#47, count#48] -Keys [1]: [r_reason_desc#35] -Functions [3]: [avg(ws_quantity#4), avg(UnscaledValue(wr_refunded_cash#17)), avg(UnscaledValue(wr_fee#16))] -Aggregate Attributes [3]: [avg(ws_quantity#4)#50, avg(UnscaledValue(wr_refunded_cash#17))#51, avg(UnscaledValue(wr_fee#16))#52] -Results [4]: [substr(r_reason_desc#35, 1, 20) AS substr(r_reason_desc, 1, 20)#53, avg(ws_quantity#4)#50 AS avg(ws_quantity)#54, cast((avg(UnscaledValue(wr_refunded_cash#17))#51 / 100.0) as decimal(11,6)) AS avg(wr_refunded_cash)#55, cast((avg(UnscaledValue(wr_fee#16))#52 / 100.0) as decimal(11,6)) AS avg(wr_fee)#56] +Input [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42] +Keys [1]: [r_reason_desc#30] +Functions [3]: [avg(ws_quantity#4), avg(UnscaledValue(wr_refunded_cash#16)), avg(UnscaledValue(wr_fee#15))] +Aggregate Attributes [3]: [avg(ws_quantity#4)#43, avg(UnscaledValue(wr_refunded_cash#16))#44, avg(UnscaledValue(wr_fee#15))#45] +Results [4]: [substr(r_reason_desc#30, 1, 20) AS substr(r_reason_desc, 1, 20)#46, avg(ws_quantity#4)#43 AS avg(ws_quantity)#47, cast((avg(UnscaledValue(wr_refunded_cash#16))#44 / 100.0) as decimal(11,6)) AS avg(wr_refunded_cash)#48, cast((avg(UnscaledValue(wr_fee#15))#45 / 100.0) as decimal(11,6)) AS avg(wr_fee)#49] (48) TakeOrderedAndProject -Input [4]: [substr(r_reason_desc, 1, 20)#53, avg(ws_quantity)#54, avg(wr_refunded_cash)#55, avg(wr_fee)#56] -Arguments: 100, [substr(r_reason_desc, 1, 20)#53 ASC NULLS FIRST, avg(ws_quantity)#54 ASC NULLS FIRST, avg(wr_refunded_cash)#55 ASC NULLS FIRST, avg(wr_fee)#56 ASC NULLS FIRST], [substr(r_reason_desc, 1, 20)#53, avg(ws_quantity)#54, avg(wr_refunded_cash)#55, avg(wr_fee)#56] +Input [4]: [substr(r_reason_desc, 1, 20)#46, avg(ws_quantity)#47, avg(wr_refunded_cash)#48, avg(wr_fee)#49] +Arguments: 100, [substr(r_reason_desc, 1, 20)#46 ASC NULLS FIRST, avg(ws_quantity)#47 ASC NULLS FIRST, avg(wr_refunded_cash)#48 ASC NULLS FIRST, avg(wr_fee)#49 ASC NULLS FIRST], [substr(r_reason_desc, 1, 20)#46, avg(ws_quantity)#47, avg(wr_refunded_cash)#48, avg(wr_fee)#49] ===== Subqueries ===== @@ -279,25 +279,25 @@ BroadcastExchange (53) (49) Scan parquet default.date_dim -Output [2]: [d_date_sk#33, d_year#57] +Output [2]: [d_date_sk#28, d_year#50] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (50) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#33, d_year#57] +Input [2]: [d_date_sk#28, d_year#50] (51) Filter [codegen id : 1] -Input [2]: [d_date_sk#33, d_year#57] -Condition : ((isnotnull(d_year#57) AND (d_year#57 = 2000)) AND isnotnull(d_date_sk#33)) +Input [2]: [d_date_sk#28, d_year#50] +Condition : ((isnotnull(d_year#50) AND (d_year#50 = 2000)) AND isnotnull(d_date_sk#28)) (52) Project [codegen id : 1] -Output [1]: [d_date_sk#33] -Input [2]: [d_date_sk#33, d_year#57] +Output [1]: [d_date_sk#28] +Input [2]: [d_date_sk#28, d_year#50] (53) BroadcastExchange -Input [1]: [d_date_sk#33] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#58] +Input [1]: [d_date_sk#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/explain.txt index 0cc089ebeb840..10251e779e817 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/explain.txt @@ -65,7 +65,7 @@ Condition : isnotnull(i_item_sk#6) (10) BroadcastExchange Input [3]: [i_item_sk#6, i_class#7, i_category#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ws_item_sk#1] @@ -78,45 +78,45 @@ Input [5]: [ws_item_sk#1, ws_net_paid#2, i_item_sk#6, i_class#7, i_category#8] (13) Expand [codegen id : 3] Input [3]: [ws_net_paid#2, i_category#8, i_class#7] -Arguments: [[ws_net_paid#2, i_category#8, i_class#7, 0], [ws_net_paid#2, i_category#8, null, 1], [ws_net_paid#2, null, null, 3]], [ws_net_paid#2, i_category#10, i_class#11, spark_grouping_id#12] +Arguments: [[ws_net_paid#2, i_category#8, i_class#7, 0], [ws_net_paid#2, i_category#8, null, 1], [ws_net_paid#2, null, null, 3]], [ws_net_paid#2, i_category#9, i_class#10, spark_grouping_id#11] (14) HashAggregate [codegen id : 3] -Input [4]: [ws_net_paid#2, i_category#10, i_class#11, spark_grouping_id#12] -Keys [3]: [i_category#10, i_class#11, spark_grouping_id#12] +Input [4]: [ws_net_paid#2, i_category#9, i_class#10, spark_grouping_id#11] +Keys [3]: [i_category#9, i_class#10, spark_grouping_id#11] Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#2))] -Aggregate Attributes [1]: [sum#13] -Results [4]: [i_category#10, i_class#11, spark_grouping_id#12, sum#14] +Aggregate Attributes [1]: [sum#12] +Results [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#13] (15) Exchange -Input [4]: [i_category#10, i_class#11, spark_grouping_id#12, sum#14] -Arguments: hashpartitioning(i_category#10, i_class#11, spark_grouping_id#12, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#13] +Arguments: hashpartitioning(i_category#9, i_class#10, spark_grouping_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=2] (16) HashAggregate [codegen id : 4] -Input [4]: [i_category#10, i_class#11, spark_grouping_id#12, sum#14] -Keys [3]: [i_category#10, i_class#11, spark_grouping_id#12] +Input [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#13] +Keys [3]: [i_category#9, i_class#10, spark_grouping_id#11] Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#16] -Results [7]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#16,17,2) AS total_sum#17, i_category#10, i_class#11, (cast((shiftright(spark_grouping_id#12, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#12, 0) & 1) as tinyint)) AS lochierarchy#18, (cast((shiftright(spark_grouping_id#12, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#12, 0) & 1) as tinyint)) AS _w1#19, CASE WHEN (cast((shiftright(spark_grouping_id#12, 0) & 1) as tinyint) = 0) THEN i_category#10 END AS _w2#20, MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#16,17,2) AS _w3#21] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#14] +Results [7]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS total_sum#15, i_category#9, i_class#10, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS lochierarchy#16, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS _w1#17, CASE WHEN (cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint) = 0) THEN i_category#9 END AS _w2#18, MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS _w3#19] (17) Exchange -Input [7]: [total_sum#17, i_category#10, i_class#11, lochierarchy#18, _w1#19, _w2#20, _w3#21] -Arguments: hashpartitioning(_w1#19, _w2#20, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w1#17, _w2#18, _w3#19] +Arguments: hashpartitioning(_w1#17, _w2#18, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) Sort [codegen id : 5] -Input [7]: [total_sum#17, i_category#10, i_class#11, lochierarchy#18, _w1#19, _w2#20, _w3#21] -Arguments: [_w1#19 ASC NULLS FIRST, _w2#20 ASC NULLS FIRST, _w3#21 DESC NULLS LAST], false, 0 +Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w1#17, _w2#18, _w3#19] +Arguments: [_w1#17 ASC NULLS FIRST, _w2#18 ASC NULLS FIRST, _w3#19 DESC NULLS LAST], false, 0 (19) Window -Input [7]: [total_sum#17, i_category#10, i_class#11, lochierarchy#18, _w1#19, _w2#20, _w3#21] -Arguments: [rank(_w3#21) windowspecdefinition(_w1#19, _w2#20, _w3#21 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#23], [_w1#19, _w2#20], [_w3#21 DESC NULLS LAST] +Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w1#17, _w2#18, _w3#19] +Arguments: [rank(_w3#19) windowspecdefinition(_w1#17, _w2#18, _w3#19 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#20], [_w1#17, _w2#18], [_w3#19 DESC NULLS LAST] (20) Project [codegen id : 6] -Output [5]: [total_sum#17, i_category#10, i_class#11, lochierarchy#18, rank_within_parent#23] -Input [8]: [total_sum#17, i_category#10, i_class#11, lochierarchy#18, _w1#19, _w2#20, _w3#21, rank_within_parent#23] +Output [5]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20] +Input [8]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w1#17, _w2#18, _w3#19, rank_within_parent#20] (21) TakeOrderedAndProject -Input [5]: [total_sum#17, i_category#10, i_class#11, lochierarchy#18, rank_within_parent#23] -Arguments: 100, [lochierarchy#18 DESC NULLS LAST, CASE WHEN (lochierarchy#18 = 0) THEN i_category#10 END ASC NULLS FIRST, rank_within_parent#23 ASC NULLS FIRST], [total_sum#17, i_category#10, i_class#11, lochierarchy#18, rank_within_parent#23] +Input [5]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20] +Arguments: 100, [lochierarchy#16 DESC NULLS LAST, CASE WHEN (lochierarchy#16 = 0) THEN i_category#9 END ASC NULLS FIRST, rank_within_parent#20 ASC NULLS FIRST], [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20] ===== Subqueries ===== @@ -129,25 +129,25 @@ BroadcastExchange (26) (22) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_month_seq#24] +Output [2]: [d_date_sk#5, d_month_seq#21] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#24] +Input [2]: [d_date_sk#5, d_month_seq#21] (24) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#24] -Condition : (((isnotnull(d_month_seq#24) AND (d_month_seq#24 >= 1200)) AND (d_month_seq#24 <= 1211)) AND isnotnull(d_date_sk#5)) +Input [2]: [d_date_sk#5, d_month_seq#21] +Condition : (((isnotnull(d_month_seq#21) AND (d_month_seq#21 >= 1200)) AND (d_month_seq#21 <= 1211)) AND isnotnull(d_date_sk#5)) (25) Project [codegen id : 1] Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_month_seq#24] +Input [2]: [d_date_sk#5, d_month_seq#21] (26) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt index 0cc089ebeb840..10251e779e817 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt @@ -65,7 +65,7 @@ Condition : isnotnull(i_item_sk#6) (10) BroadcastExchange Input [3]: [i_item_sk#6, i_class#7, i_category#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ws_item_sk#1] @@ -78,45 +78,45 @@ Input [5]: [ws_item_sk#1, ws_net_paid#2, i_item_sk#6, i_class#7, i_category#8] (13) Expand [codegen id : 3] Input [3]: [ws_net_paid#2, i_category#8, i_class#7] -Arguments: [[ws_net_paid#2, i_category#8, i_class#7, 0], [ws_net_paid#2, i_category#8, null, 1], [ws_net_paid#2, null, null, 3]], [ws_net_paid#2, i_category#10, i_class#11, spark_grouping_id#12] +Arguments: [[ws_net_paid#2, i_category#8, i_class#7, 0], [ws_net_paid#2, i_category#8, null, 1], [ws_net_paid#2, null, null, 3]], [ws_net_paid#2, i_category#9, i_class#10, spark_grouping_id#11] (14) HashAggregate [codegen id : 3] -Input [4]: [ws_net_paid#2, i_category#10, i_class#11, spark_grouping_id#12] -Keys [3]: [i_category#10, i_class#11, spark_grouping_id#12] +Input [4]: [ws_net_paid#2, i_category#9, i_class#10, spark_grouping_id#11] +Keys [3]: [i_category#9, i_class#10, spark_grouping_id#11] Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#2))] -Aggregate Attributes [1]: [sum#13] -Results [4]: [i_category#10, i_class#11, spark_grouping_id#12, sum#14] +Aggregate Attributes [1]: [sum#12] +Results [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#13] (15) Exchange -Input [4]: [i_category#10, i_class#11, spark_grouping_id#12, sum#14] -Arguments: hashpartitioning(i_category#10, i_class#11, spark_grouping_id#12, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#13] +Arguments: hashpartitioning(i_category#9, i_class#10, spark_grouping_id#11, 5), ENSURE_REQUIREMENTS, [plan_id=2] (16) HashAggregate [codegen id : 4] -Input [4]: [i_category#10, i_class#11, spark_grouping_id#12, sum#14] -Keys [3]: [i_category#10, i_class#11, spark_grouping_id#12] +Input [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#13] +Keys [3]: [i_category#9, i_class#10, spark_grouping_id#11] Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#16] -Results [7]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#16,17,2) AS total_sum#17, i_category#10, i_class#11, (cast((shiftright(spark_grouping_id#12, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#12, 0) & 1) as tinyint)) AS lochierarchy#18, (cast((shiftright(spark_grouping_id#12, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#12, 0) & 1) as tinyint)) AS _w1#19, CASE WHEN (cast((shiftright(spark_grouping_id#12, 0) & 1) as tinyint) = 0) THEN i_category#10 END AS _w2#20, MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#16,17,2) AS _w3#21] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#14] +Results [7]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS total_sum#15, i_category#9, i_class#10, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS lochierarchy#16, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS _w1#17, CASE WHEN (cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint) = 0) THEN i_category#9 END AS _w2#18, MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS _w3#19] (17) Exchange -Input [7]: [total_sum#17, i_category#10, i_class#11, lochierarchy#18, _w1#19, _w2#20, _w3#21] -Arguments: hashpartitioning(_w1#19, _w2#20, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w1#17, _w2#18, _w3#19] +Arguments: hashpartitioning(_w1#17, _w2#18, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) Sort [codegen id : 5] -Input [7]: [total_sum#17, i_category#10, i_class#11, lochierarchy#18, _w1#19, _w2#20, _w3#21] -Arguments: [_w1#19 ASC NULLS FIRST, _w2#20 ASC NULLS FIRST, _w3#21 DESC NULLS LAST], false, 0 +Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w1#17, _w2#18, _w3#19] +Arguments: [_w1#17 ASC NULLS FIRST, _w2#18 ASC NULLS FIRST, _w3#19 DESC NULLS LAST], false, 0 (19) Window -Input [7]: [total_sum#17, i_category#10, i_class#11, lochierarchy#18, _w1#19, _w2#20, _w3#21] -Arguments: [rank(_w3#21) windowspecdefinition(_w1#19, _w2#20, _w3#21 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#23], [_w1#19, _w2#20], [_w3#21 DESC NULLS LAST] +Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w1#17, _w2#18, _w3#19] +Arguments: [rank(_w3#19) windowspecdefinition(_w1#17, _w2#18, _w3#19 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#20], [_w1#17, _w2#18], [_w3#19 DESC NULLS LAST] (20) Project [codegen id : 6] -Output [5]: [total_sum#17, i_category#10, i_class#11, lochierarchy#18, rank_within_parent#23] -Input [8]: [total_sum#17, i_category#10, i_class#11, lochierarchy#18, _w1#19, _w2#20, _w3#21, rank_within_parent#23] +Output [5]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20] +Input [8]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w1#17, _w2#18, _w3#19, rank_within_parent#20] (21) TakeOrderedAndProject -Input [5]: [total_sum#17, i_category#10, i_class#11, lochierarchy#18, rank_within_parent#23] -Arguments: 100, [lochierarchy#18 DESC NULLS LAST, CASE WHEN (lochierarchy#18 = 0) THEN i_category#10 END ASC NULLS FIRST, rank_within_parent#23 ASC NULLS FIRST], [total_sum#17, i_category#10, i_class#11, lochierarchy#18, rank_within_parent#23] +Input [5]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20] +Arguments: 100, [lochierarchy#16 DESC NULLS LAST, CASE WHEN (lochierarchy#16 = 0) THEN i_category#9 END ASC NULLS FIRST, rank_within_parent#20 ASC NULLS FIRST], [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20] ===== Subqueries ===== @@ -129,25 +129,25 @@ BroadcastExchange (26) (22) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_month_seq#24] +Output [2]: [d_date_sk#5, d_month_seq#21] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#24] +Input [2]: [d_date_sk#5, d_month_seq#21] (24) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#24] -Condition : (((isnotnull(d_month_seq#24) AND (d_month_seq#24 >= 1200)) AND (d_month_seq#24 <= 1211)) AND isnotnull(d_date_sk#5)) +Input [2]: [d_date_sk#5, d_month_seq#21] +Condition : (((isnotnull(d_month_seq#21) AND (d_month_seq#21 >= 1200)) AND (d_month_seq#21 <= 1211)) AND isnotnull(d_date_sk#5)) (25) Project [codegen id : 1] Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_month_seq#24] +Input [2]: [d_date_sk#5, d_month_seq#21] (26) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/explain.txt index 38ecc6f3ed822..d254ec61e6d75 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/explain.txt @@ -90,254 +90,254 @@ Input [4]: [ss_customer_sk#1, ss_sold_date_sk#2, d_date_sk#4, d_date#5] (7) Exchange Input [2]: [ss_customer_sk#1, d_date#5] -Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#6] +Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (8) Sort [codegen id : 3] Input [2]: [ss_customer_sk#1, d_date#5] Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0 (9) Scan parquet default.customer -Output [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] +Output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (10) ColumnarToRow [codegen id : 4] -Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] (11) Filter [codegen id : 4] -Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] -Condition : isnotnull(c_customer_sk#7) +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Condition : isnotnull(c_customer_sk#6) (12) Exchange -Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] -Arguments: hashpartitioning(c_customer_sk#7, 5), ENSURE_REQUIREMENTS, [id=#10] +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: hashpartitioning(c_customer_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=2] (13) Sort [codegen id : 5] -Input [3]: [c_customer_sk#7, c_first_name#8, c_last_name#9] -Arguments: [c_customer_sk#7 ASC NULLS FIRST], false, 0 +Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] +Arguments: [c_customer_sk#6 ASC NULLS FIRST], false, 0 (14) SortMergeJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#7] +Right keys [1]: [c_customer_sk#6] Join condition: None (15) Project [codegen id : 6] -Output [3]: [c_last_name#9, c_first_name#8, d_date#5] -Input [5]: [ss_customer_sk#1, d_date#5, c_customer_sk#7, c_first_name#8, c_last_name#9] +Output [3]: [c_last_name#8, c_first_name#7, d_date#5] +Input [5]: [ss_customer_sk#1, d_date#5, c_customer_sk#6, c_first_name#7, c_last_name#8] (16) HashAggregate [codegen id : 6] -Input [3]: [c_last_name#9, c_first_name#8, d_date#5] -Keys [3]: [c_last_name#9, c_first_name#8, d_date#5] +Input [3]: [c_last_name#8, c_first_name#7, d_date#5] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#5] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#9, c_first_name#8, d_date#5] +Results [3]: [c_last_name#8, c_first_name#7, d_date#5] (17) Exchange -Input [3]: [c_last_name#9, c_first_name#8, d_date#5] -Arguments: hashpartitioning(c_last_name#9, c_first_name#8, d_date#5, 5), ENSURE_REQUIREMENTS, [id=#11] +Input [3]: [c_last_name#8, c_first_name#7, d_date#5] +Arguments: hashpartitioning(c_last_name#8, c_first_name#7, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) HashAggregate [codegen id : 7] -Input [3]: [c_last_name#9, c_first_name#8, d_date#5] -Keys [3]: [c_last_name#9, c_first_name#8, d_date#5] +Input [3]: [c_last_name#8, c_first_name#7, d_date#5] +Keys [3]: [c_last_name#8, c_first_name#7, d_date#5] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#9, c_first_name#8, d_date#5] +Results [3]: [c_last_name#8, c_first_name#7, d_date#5] (19) Exchange -Input [3]: [c_last_name#9, c_first_name#8, d_date#5] -Arguments: hashpartitioning(coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#5, 1970-01-01), isnull(d_date#5), 5), ENSURE_REQUIREMENTS, [id=#12] +Input [3]: [c_last_name#8, c_first_name#7, d_date#5] +Arguments: hashpartitioning(coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5), 5), ENSURE_REQUIREMENTS, [plan_id=4] (20) Sort [codegen id : 8] -Input [3]: [c_last_name#9, c_first_name#8, d_date#5] -Arguments: [coalesce(c_last_name#9, ) ASC NULLS FIRST, isnull(c_last_name#9) ASC NULLS FIRST, coalesce(c_first_name#8, ) ASC NULLS FIRST, isnull(c_first_name#8) ASC NULLS FIRST, coalesce(d_date#5, 1970-01-01) ASC NULLS FIRST, isnull(d_date#5) ASC NULLS FIRST], false, 0 +Input [3]: [c_last_name#8, c_first_name#7, d_date#5] +Arguments: [coalesce(c_last_name#8, ) ASC NULLS FIRST, isnull(c_last_name#8) ASC NULLS FIRST, coalesce(c_first_name#7, ) ASC NULLS FIRST, isnull(c_first_name#7) ASC NULLS FIRST, coalesce(d_date#5, 1970-01-01) ASC NULLS FIRST, isnull(d_date#5) ASC NULLS FIRST], false, 0 (21) Scan parquet default.catalog_sales -Output [2]: [cs_bill_customer_sk#13, cs_sold_date_sk#14] +Output [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#14), dynamicpruningexpression(cs_sold_date_sk#14 IN dynamicpruning#3)] +PartitionFilters: [isnotnull(cs_sold_date_sk#10), dynamicpruningexpression(cs_sold_date_sk#10 IN dynamicpruning#3)] PushedFilters: [IsNotNull(cs_bill_customer_sk)] ReadSchema: struct (22) ColumnarToRow [codegen id : 10] -Input [2]: [cs_bill_customer_sk#13, cs_sold_date_sk#14] +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] (23) Filter [codegen id : 10] -Input [2]: [cs_bill_customer_sk#13, cs_sold_date_sk#14] -Condition : isnotnull(cs_bill_customer_sk#13) +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Condition : isnotnull(cs_bill_customer_sk#9) (24) ReusedExchange [Reuses operator id: 65] -Output [2]: [d_date_sk#15, d_date#16] +Output [2]: [d_date_sk#11, d_date#12] (25) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#14] -Right keys [1]: [d_date_sk#15] +Left keys [1]: [cs_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] Join condition: None (26) Project [codegen id : 10] -Output [2]: [cs_bill_customer_sk#13, d_date#16] -Input [4]: [cs_bill_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15, d_date#16] +Output [2]: [cs_bill_customer_sk#9, d_date#12] +Input [4]: [cs_bill_customer_sk#9, cs_sold_date_sk#10, d_date_sk#11, d_date#12] (27) Exchange -Input [2]: [cs_bill_customer_sk#13, d_date#16] -Arguments: hashpartitioning(cs_bill_customer_sk#13, 5), ENSURE_REQUIREMENTS, [id=#17] +Input [2]: [cs_bill_customer_sk#9, d_date#12] +Arguments: hashpartitioning(cs_bill_customer_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=5] (28) Sort [codegen id : 11] -Input [2]: [cs_bill_customer_sk#13, d_date#16] -Arguments: [cs_bill_customer_sk#13 ASC NULLS FIRST], false, 0 +Input [2]: [cs_bill_customer_sk#9, d_date#12] +Arguments: [cs_bill_customer_sk#9 ASC NULLS FIRST], false, 0 (29) ReusedExchange [Reuses operator id: 12] -Output [3]: [c_customer_sk#18, c_first_name#19, c_last_name#20] +Output [3]: [c_customer_sk#13, c_first_name#14, c_last_name#15] (30) Sort [codegen id : 13] -Input [3]: [c_customer_sk#18, c_first_name#19, c_last_name#20] -Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0 +Input [3]: [c_customer_sk#13, c_first_name#14, c_last_name#15] +Arguments: [c_customer_sk#13 ASC NULLS FIRST], false, 0 (31) SortMergeJoin [codegen id : 14] -Left keys [1]: [cs_bill_customer_sk#13] -Right keys [1]: [c_customer_sk#18] +Left keys [1]: [cs_bill_customer_sk#9] +Right keys [1]: [c_customer_sk#13] Join condition: None (32) Project [codegen id : 14] -Output [3]: [c_last_name#20, c_first_name#19, d_date#16] -Input [5]: [cs_bill_customer_sk#13, d_date#16, c_customer_sk#18, c_first_name#19, c_last_name#20] +Output [3]: [c_last_name#15, c_first_name#14, d_date#12] +Input [5]: [cs_bill_customer_sk#9, d_date#12, c_customer_sk#13, c_first_name#14, c_last_name#15] (33) HashAggregate [codegen id : 14] -Input [3]: [c_last_name#20, c_first_name#19, d_date#16] -Keys [3]: [c_last_name#20, c_first_name#19, d_date#16] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#20, c_first_name#19, d_date#16] +Results [3]: [c_last_name#15, c_first_name#14, d_date#12] (34) Exchange -Input [3]: [c_last_name#20, c_first_name#19, d_date#16] -Arguments: hashpartitioning(c_last_name#20, c_first_name#19, d_date#16, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, d_date#12, 5), ENSURE_REQUIREMENTS, [plan_id=6] (35) HashAggregate [codegen id : 15] -Input [3]: [c_last_name#20, c_first_name#19, d_date#16] -Keys [3]: [c_last_name#20, c_first_name#19, d_date#16] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#20, c_first_name#19, d_date#16] +Results [3]: [c_last_name#15, c_first_name#14, d_date#12] (36) Exchange -Input [3]: [c_last_name#20, c_first_name#19, d_date#16] -Arguments: hashpartitioning(coalesce(c_last_name#20, ), isnull(c_last_name#20), coalesce(c_first_name#19, ), isnull(c_first_name#19), coalesce(d_date#16, 1970-01-01), isnull(d_date#16), 5), ENSURE_REQUIREMENTS, [id=#22] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: hashpartitioning(coalesce(c_last_name#15, ), isnull(c_last_name#15), coalesce(c_first_name#14, ), isnull(c_first_name#14), coalesce(d_date#12, 1970-01-01), isnull(d_date#12), 5), ENSURE_REQUIREMENTS, [plan_id=7] (37) Sort [codegen id : 16] -Input [3]: [c_last_name#20, c_first_name#19, d_date#16] -Arguments: [coalesce(c_last_name#20, ) ASC NULLS FIRST, isnull(c_last_name#20) ASC NULLS FIRST, coalesce(c_first_name#19, ) ASC NULLS FIRST, isnull(c_first_name#19) ASC NULLS FIRST, coalesce(d_date#16, 1970-01-01) ASC NULLS FIRST, isnull(d_date#16) ASC NULLS FIRST], false, 0 +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: [coalesce(c_last_name#15, ) ASC NULLS FIRST, isnull(c_last_name#15) ASC NULLS FIRST, coalesce(c_first_name#14, ) ASC NULLS FIRST, isnull(c_first_name#14) ASC NULLS FIRST, coalesce(d_date#12, 1970-01-01) ASC NULLS FIRST, isnull(d_date#12) ASC NULLS FIRST], false, 0 (38) SortMergeJoin [codegen id : 17] -Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)] -Right keys [6]: [coalesce(c_last_name#20, ), isnull(c_last_name#20), coalesce(c_first_name#19, ), isnull(c_first_name#19), coalesce(d_date#16, 1970-01-01), isnull(d_date#16)] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)] +Right keys [6]: [coalesce(c_last_name#15, ), isnull(c_last_name#15), coalesce(c_first_name#14, ), isnull(c_first_name#14), coalesce(d_date#12, 1970-01-01), isnull(d_date#12)] Join condition: None (39) Scan parquet default.web_sales -Output [2]: [ws_bill_customer_sk#23, ws_sold_date_sk#24] +Output [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#24), dynamicpruningexpression(ws_sold_date_sk#24 IN dynamicpruning#3)] +PartitionFilters: [isnotnull(ws_sold_date_sk#17), dynamicpruningexpression(ws_sold_date_sk#17 IN dynamicpruning#3)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (40) ColumnarToRow [codegen id : 19] -Input [2]: [ws_bill_customer_sk#23, ws_sold_date_sk#24] +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] (41) Filter [codegen id : 19] -Input [2]: [ws_bill_customer_sk#23, ws_sold_date_sk#24] -Condition : isnotnull(ws_bill_customer_sk#23) +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Condition : isnotnull(ws_bill_customer_sk#16) (42) ReusedExchange [Reuses operator id: 65] -Output [2]: [d_date_sk#25, d_date#26] +Output [2]: [d_date_sk#18, d_date#19] (43) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [ws_sold_date_sk#24] -Right keys [1]: [d_date_sk#25] +Left keys [1]: [ws_sold_date_sk#17] +Right keys [1]: [d_date_sk#18] Join condition: None (44) Project [codegen id : 19] -Output [2]: [ws_bill_customer_sk#23, d_date#26] -Input [4]: [ws_bill_customer_sk#23, ws_sold_date_sk#24, d_date_sk#25, d_date#26] +Output [2]: [ws_bill_customer_sk#16, d_date#19] +Input [4]: [ws_bill_customer_sk#16, ws_sold_date_sk#17, d_date_sk#18, d_date#19] (45) Exchange -Input [2]: [ws_bill_customer_sk#23, d_date#26] -Arguments: hashpartitioning(ws_bill_customer_sk#23, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [2]: [ws_bill_customer_sk#16, d_date#19] +Arguments: hashpartitioning(ws_bill_customer_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=8] (46) Sort [codegen id : 20] -Input [2]: [ws_bill_customer_sk#23, d_date#26] -Arguments: [ws_bill_customer_sk#23 ASC NULLS FIRST], false, 0 +Input [2]: [ws_bill_customer_sk#16, d_date#19] +Arguments: [ws_bill_customer_sk#16 ASC NULLS FIRST], false, 0 (47) ReusedExchange [Reuses operator id: 12] -Output [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30] +Output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] (48) Sort [codegen id : 22] -Input [3]: [c_customer_sk#28, c_first_name#29, c_last_name#30] -Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0 +Input [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] +Arguments: [c_customer_sk#20 ASC NULLS FIRST], false, 0 (49) SortMergeJoin [codegen id : 23] -Left keys [1]: [ws_bill_customer_sk#23] -Right keys [1]: [c_customer_sk#28] +Left keys [1]: [ws_bill_customer_sk#16] +Right keys [1]: [c_customer_sk#20] Join condition: None (50) Project [codegen id : 23] -Output [3]: [c_last_name#30, c_first_name#29, d_date#26] -Input [5]: [ws_bill_customer_sk#23, d_date#26, c_customer_sk#28, c_first_name#29, c_last_name#30] +Output [3]: [c_last_name#22, c_first_name#21, d_date#19] +Input [5]: [ws_bill_customer_sk#16, d_date#19, c_customer_sk#20, c_first_name#21, c_last_name#22] (51) HashAggregate [codegen id : 23] -Input [3]: [c_last_name#30, c_first_name#29, d_date#26] -Keys [3]: [c_last_name#30, c_first_name#29, d_date#26] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#30, c_first_name#29, d_date#26] +Results [3]: [c_last_name#22, c_first_name#21, d_date#19] (52) Exchange -Input [3]: [c_last_name#30, c_first_name#29, d_date#26] -Arguments: hashpartitioning(c_last_name#30, c_first_name#29, d_date#26, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: hashpartitioning(c_last_name#22, c_first_name#21, d_date#19, 5), ENSURE_REQUIREMENTS, [plan_id=9] (53) HashAggregate [codegen id : 24] -Input [3]: [c_last_name#30, c_first_name#29, d_date#26] -Keys [3]: [c_last_name#30, c_first_name#29, d_date#26] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#30, c_first_name#29, d_date#26] +Results [3]: [c_last_name#22, c_first_name#21, d_date#19] (54) Exchange -Input [3]: [c_last_name#30, c_first_name#29, d_date#26] -Arguments: hashpartitioning(coalesce(c_last_name#30, ), isnull(c_last_name#30), coalesce(c_first_name#29, ), isnull(c_first_name#29), coalesce(d_date#26, 1970-01-01), isnull(d_date#26), 5), ENSURE_REQUIREMENTS, [id=#32] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: hashpartitioning(coalesce(c_last_name#22, ), isnull(c_last_name#22), coalesce(c_first_name#21, ), isnull(c_first_name#21), coalesce(d_date#19, 1970-01-01), isnull(d_date#19), 5), ENSURE_REQUIREMENTS, [plan_id=10] (55) Sort [codegen id : 25] -Input [3]: [c_last_name#30, c_first_name#29, d_date#26] -Arguments: [coalesce(c_last_name#30, ) ASC NULLS FIRST, isnull(c_last_name#30) ASC NULLS FIRST, coalesce(c_first_name#29, ) ASC NULLS FIRST, isnull(c_first_name#29) ASC NULLS FIRST, coalesce(d_date#26, 1970-01-01) ASC NULLS FIRST, isnull(d_date#26) ASC NULLS FIRST], false, 0 +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: [coalesce(c_last_name#22, ) ASC NULLS FIRST, isnull(c_last_name#22) ASC NULLS FIRST, coalesce(c_first_name#21, ) ASC NULLS FIRST, isnull(c_first_name#21) ASC NULLS FIRST, coalesce(d_date#19, 1970-01-01) ASC NULLS FIRST, isnull(d_date#19) ASC NULLS FIRST], false, 0 (56) SortMergeJoin [codegen id : 26] -Left keys [6]: [coalesce(c_last_name#9, ), isnull(c_last_name#9), coalesce(c_first_name#8, ), isnull(c_first_name#8), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)] -Right keys [6]: [coalesce(c_last_name#30, ), isnull(c_last_name#30), coalesce(c_first_name#29, ), isnull(c_first_name#29), coalesce(d_date#26, 1970-01-01), isnull(d_date#26)] +Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)] +Right keys [6]: [coalesce(c_last_name#22, ), isnull(c_last_name#22), coalesce(c_first_name#21, ), isnull(c_first_name#21), coalesce(d_date#19, 1970-01-01), isnull(d_date#19)] Join condition: None (57) Project [codegen id : 26] Output: [] -Input [3]: [c_last_name#9, c_first_name#8, d_date#5] +Input [3]: [c_last_name#8, c_first_name#7, d_date#5] (58) HashAggregate [codegen id : 26] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#33] -Results [1]: [count#34] +Aggregate Attributes [1]: [count#23] +Results [1]: [count#24] (59) Exchange -Input [1]: [count#34] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#35] +Input [1]: [count#24] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11] (60) HashAggregate [codegen id : 27] -Input [1]: [count#34] +Input [1]: [count#24] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#36] -Results [1]: [count(1)#36 AS count(1)#37] +Aggregate Attributes [1]: [count(1)#25] +Results [1]: [count(1)#25 AS count(1)#26] ===== Subqueries ===== @@ -350,29 +350,29 @@ BroadcastExchange (65) (61) Scan parquet default.date_dim -Output [3]: [d_date_sk#4, d_date#5, d_month_seq#38] +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#27] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (62) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#4, d_date#5, d_month_seq#38] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#27] (63) Filter [codegen id : 1] -Input [3]: [d_date_sk#4, d_date#5, d_month_seq#38] -Condition : (((isnotnull(d_month_seq#38) AND (d_month_seq#38 >= 1200)) AND (d_month_seq#38 <= 1211)) AND isnotnull(d_date_sk#4)) +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#27] +Condition : (((isnotnull(d_month_seq#27) AND (d_month_seq#27 >= 1200)) AND (d_month_seq#27 <= 1211)) AND isnotnull(d_date_sk#4)) (64) Project [codegen id : 1] Output [2]: [d_date_sk#4, d_date#5] -Input [3]: [d_date_sk#4, d_date#5, d_month_seq#38] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#27] (65) BroadcastExchange Input [2]: [d_date_sk#4, d_date#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] -Subquery:2 Hosting operator id = 21 Hosting Expression = cs_sold_date_sk#14 IN dynamicpruning#3 +Subquery:2 Hosting operator id = 21 Hosting Expression = cs_sold_date_sk#10 IN dynamicpruning#3 -Subquery:3 Hosting operator id = 39 Hosting Expression = ws_sold_date_sk#24 IN dynamicpruning#3 +Subquery:3 Hosting operator id = 39 Hosting Expression = ws_sold_date_sk#17 IN dynamicpruning#3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt index ed2a97704b2f7..6023a9d213efe 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt @@ -91,7 +91,7 @@ Condition : isnotnull(c_customer_sk#6) (10) BroadcastExchange Input [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_customer_sk#1] @@ -111,7 +111,7 @@ Results [3]: [c_last_name#8, c_first_name#7, d_date#5] (14) Exchange Input [3]: [c_last_name#8, c_first_name#7, d_date#5] -Arguments: hashpartitioning(c_last_name#8, c_first_name#7, d_date#5, 5), ENSURE_REQUIREMENTS, [id=#10] +Arguments: hashpartitioning(c_last_name#8, c_first_name#7, d_date#5, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 12] Input [3]: [c_last_name#8, c_first_name#7, d_date#5] @@ -121,135 +121,135 @@ Aggregate Attributes: [] Results [3]: [c_last_name#8, c_first_name#7, d_date#5] (16) Scan parquet default.catalog_sales -Output [2]: [cs_bill_customer_sk#11, cs_sold_date_sk#12] +Output [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#12), dynamicpruningexpression(cs_sold_date_sk#12 IN dynamicpruning#3)] +PartitionFilters: [isnotnull(cs_sold_date_sk#10), dynamicpruningexpression(cs_sold_date_sk#10 IN dynamicpruning#3)] PushedFilters: [IsNotNull(cs_bill_customer_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 6] -Input [2]: [cs_bill_customer_sk#11, cs_sold_date_sk#12] +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] (18) Filter [codegen id : 6] -Input [2]: [cs_bill_customer_sk#11, cs_sold_date_sk#12] -Condition : isnotnull(cs_bill_customer_sk#11) +Input [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10] +Condition : isnotnull(cs_bill_customer_sk#9) (19) ReusedExchange [Reuses operator id: 52] -Output [2]: [d_date_sk#13, d_date#14] +Output [2]: [d_date_sk#11, d_date#12] (20) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_sold_date_sk#12] -Right keys [1]: [d_date_sk#13] +Left keys [1]: [cs_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] Join condition: None (21) Project [codegen id : 6] -Output [2]: [cs_bill_customer_sk#11, d_date#14] -Input [4]: [cs_bill_customer_sk#11, cs_sold_date_sk#12, d_date_sk#13, d_date#14] +Output [2]: [cs_bill_customer_sk#9, d_date#12] +Input [4]: [cs_bill_customer_sk#9, cs_sold_date_sk#10, d_date_sk#11, d_date#12] (22) ReusedExchange [Reuses operator id: 10] -Output [3]: [c_customer_sk#15, c_first_name#16, c_last_name#17] +Output [3]: [c_customer_sk#13, c_first_name#14, c_last_name#15] (23) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_bill_customer_sk#11] -Right keys [1]: [c_customer_sk#15] +Left keys [1]: [cs_bill_customer_sk#9] +Right keys [1]: [c_customer_sk#13] Join condition: None (24) Project [codegen id : 6] -Output [3]: [c_last_name#17, c_first_name#16, d_date#14] -Input [5]: [cs_bill_customer_sk#11, d_date#14, c_customer_sk#15, c_first_name#16, c_last_name#17] +Output [3]: [c_last_name#15, c_first_name#14, d_date#12] +Input [5]: [cs_bill_customer_sk#9, d_date#12, c_customer_sk#13, c_first_name#14, c_last_name#15] (25) HashAggregate [codegen id : 6] -Input [3]: [c_last_name#17, c_first_name#16, d_date#14] -Keys [3]: [c_last_name#17, c_first_name#16, d_date#14] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#17, c_first_name#16, d_date#14] +Results [3]: [c_last_name#15, c_first_name#14, d_date#12] (26) Exchange -Input [3]: [c_last_name#17, c_first_name#16, d_date#14] -Arguments: hashpartitioning(c_last_name#17, c_first_name#16, d_date#14, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: hashpartitioning(c_last_name#15, c_first_name#14, d_date#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] (27) HashAggregate [codegen id : 7] -Input [3]: [c_last_name#17, c_first_name#16, d_date#14] -Keys [3]: [c_last_name#17, c_first_name#16, d_date#14] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Keys [3]: [c_last_name#15, c_first_name#14, d_date#12] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#17, c_first_name#16, d_date#14] +Results [3]: [c_last_name#15, c_first_name#14, d_date#12] (28) BroadcastExchange -Input [3]: [c_last_name#17, c_first_name#16, d_date#14] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [id=#19] +Input [3]: [c_last_name#15, c_first_name#14, d_date#12] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [plan_id=4] (29) BroadcastHashJoin [codegen id : 12] Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)] -Right keys [6]: [coalesce(c_last_name#17, ), isnull(c_last_name#17), coalesce(c_first_name#16, ), isnull(c_first_name#16), coalesce(d_date#14, 1970-01-01), isnull(d_date#14)] +Right keys [6]: [coalesce(c_last_name#15, ), isnull(c_last_name#15), coalesce(c_first_name#14, ), isnull(c_first_name#14), coalesce(d_date#12, 1970-01-01), isnull(d_date#12)] Join condition: None (30) Scan parquet default.web_sales -Output [2]: [ws_bill_customer_sk#20, ws_sold_date_sk#21] +Output [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#21), dynamicpruningexpression(ws_sold_date_sk#21 IN dynamicpruning#3)] +PartitionFilters: [isnotnull(ws_sold_date_sk#17), dynamicpruningexpression(ws_sold_date_sk#17 IN dynamicpruning#3)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 10] -Input [2]: [ws_bill_customer_sk#20, ws_sold_date_sk#21] +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] (32) Filter [codegen id : 10] -Input [2]: [ws_bill_customer_sk#20, ws_sold_date_sk#21] -Condition : isnotnull(ws_bill_customer_sk#20) +Input [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17] +Condition : isnotnull(ws_bill_customer_sk#16) (33) ReusedExchange [Reuses operator id: 52] -Output [2]: [d_date_sk#22, d_date#23] +Output [2]: [d_date_sk#18, d_date#19] (34) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ws_sold_date_sk#21] -Right keys [1]: [d_date_sk#22] +Left keys [1]: [ws_sold_date_sk#17] +Right keys [1]: [d_date_sk#18] Join condition: None (35) Project [codegen id : 10] -Output [2]: [ws_bill_customer_sk#20, d_date#23] -Input [4]: [ws_bill_customer_sk#20, ws_sold_date_sk#21, d_date_sk#22, d_date#23] +Output [2]: [ws_bill_customer_sk#16, d_date#19] +Input [4]: [ws_bill_customer_sk#16, ws_sold_date_sk#17, d_date_sk#18, d_date#19] (36) ReusedExchange [Reuses operator id: 10] -Output [3]: [c_customer_sk#24, c_first_name#25, c_last_name#26] +Output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22] (37) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ws_bill_customer_sk#20] -Right keys [1]: [c_customer_sk#24] +Left keys [1]: [ws_bill_customer_sk#16] +Right keys [1]: [c_customer_sk#20] Join condition: None (38) Project [codegen id : 10] -Output [3]: [c_last_name#26, c_first_name#25, d_date#23] -Input [5]: [ws_bill_customer_sk#20, d_date#23, c_customer_sk#24, c_first_name#25, c_last_name#26] +Output [3]: [c_last_name#22, c_first_name#21, d_date#19] +Input [5]: [ws_bill_customer_sk#16, d_date#19, c_customer_sk#20, c_first_name#21, c_last_name#22] (39) HashAggregate [codegen id : 10] -Input [3]: [c_last_name#26, c_first_name#25, d_date#23] -Keys [3]: [c_last_name#26, c_first_name#25, d_date#23] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#26, c_first_name#25, d_date#23] +Results [3]: [c_last_name#22, c_first_name#21, d_date#19] (40) Exchange -Input [3]: [c_last_name#26, c_first_name#25, d_date#23] -Arguments: hashpartitioning(c_last_name#26, c_first_name#25, d_date#23, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: hashpartitioning(c_last_name#22, c_first_name#21, d_date#19, 5), ENSURE_REQUIREMENTS, [plan_id=5] (41) HashAggregate [codegen id : 11] -Input [3]: [c_last_name#26, c_first_name#25, d_date#23] -Keys [3]: [c_last_name#26, c_first_name#25, d_date#23] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Keys [3]: [c_last_name#22, c_first_name#21, d_date#19] Functions: [] Aggregate Attributes: [] -Results [3]: [c_last_name#26, c_first_name#25, d_date#23] +Results [3]: [c_last_name#22, c_first_name#21, d_date#19] (42) BroadcastExchange -Input [3]: [c_last_name#26, c_first_name#25, d_date#23] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [id=#28] +Input [3]: [c_last_name#22, c_first_name#21, d_date#19] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true]), coalesce(input[1, string, true], ), isnull(input[1, string, true]), coalesce(input[2, date, true], 1970-01-01), isnull(input[2, date, true])),false), [plan_id=6] (43) BroadcastHashJoin [codegen id : 12] Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)] -Right keys [6]: [coalesce(c_last_name#26, ), isnull(c_last_name#26), coalesce(c_first_name#25, ), isnull(c_first_name#25), coalesce(d_date#23, 1970-01-01), isnull(d_date#23)] +Right keys [6]: [coalesce(c_last_name#22, ), isnull(c_last_name#22), coalesce(c_first_name#21, ), isnull(c_first_name#21), coalesce(d_date#19, 1970-01-01), isnull(d_date#19)] Join condition: None (44) Project [codegen id : 12] @@ -260,19 +260,19 @@ Input [3]: [c_last_name#8, c_first_name#7, d_date#5] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#29] -Results [1]: [count#30] +Aggregate Attributes [1]: [count#23] +Results [1]: [count#24] (46) Exchange -Input [1]: [count#30] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#31] +Input [1]: [count#24] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (47) HashAggregate [codegen id : 13] -Input [1]: [count#30] +Input [1]: [count#24] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#32] -Results [1]: [count(1)#32 AS count(1)#33] +Aggregate Attributes [1]: [count(1)#25] +Results [1]: [count(1)#25 AS count(1)#26] ===== Subqueries ===== @@ -285,29 +285,29 @@ BroadcastExchange (52) (48) Scan parquet default.date_dim -Output [3]: [d_date_sk#4, d_date#5, d_month_seq#34] +Output [3]: [d_date_sk#4, d_date#5, d_month_seq#27] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (49) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#4, d_date#5, d_month_seq#34] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#27] (50) Filter [codegen id : 1] -Input [3]: [d_date_sk#4, d_date#5, d_month_seq#34] -Condition : (((isnotnull(d_month_seq#34) AND (d_month_seq#34 >= 1200)) AND (d_month_seq#34 <= 1211)) AND isnotnull(d_date_sk#4)) +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#27] +Condition : (((isnotnull(d_month_seq#27) AND (d_month_seq#27 >= 1200)) AND (d_month_seq#27 <= 1211)) AND isnotnull(d_date_sk#4)) (51) Project [codegen id : 1] Output [2]: [d_date_sk#4, d_date#5] -Input [3]: [d_date_sk#4, d_date#5, d_month_seq#34] +Input [3]: [d_date_sk#4, d_date#5, d_month_seq#27] (52) BroadcastExchange Input [2]: [d_date_sk#4, d_date#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] -Subquery:2 Hosting operator id = 16 Hosting Expression = cs_sold_date_sk#12 IN dynamicpruning#3 +Subquery:2 Hosting operator id = 16 Hosting Expression = cs_sold_date_sk#10 IN dynamicpruning#3 -Subquery:3 Hosting operator id = 30 Hosting Expression = ws_sold_date_sk#21 IN dynamicpruning#3 +Subquery:3 Hosting operator id = 30 Hosting Expression = ws_sold_date_sk#17 IN dynamicpruning#3 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/explain.txt index e72928545d080..99a999036d183 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/explain.txt @@ -221,7 +221,7 @@ Input [3]: [t_time_sk#5, t_hour#6, t_minute#7] (9) BroadcastExchange Input [1]: [t_time_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (10) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_sold_time_sk#1] @@ -233,767 +233,767 @@ Output [2]: [ss_hdemo_sk#2, ss_store_sk#3] Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, t_time_sk#5] (12) Scan parquet default.store -Output [2]: [s_store_sk#9, s_store_name#10] +Output [2]: [s_store_sk#8, s_store_name#9] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] ReadSchema: struct (13) ColumnarToRow [codegen id : 2] -Input [2]: [s_store_sk#9, s_store_name#10] +Input [2]: [s_store_sk#8, s_store_name#9] (14) Filter [codegen id : 2] -Input [2]: [s_store_sk#9, s_store_name#10] -Condition : ((isnotnull(s_store_name#10) AND (s_store_name#10 = ese)) AND isnotnull(s_store_sk#9)) +Input [2]: [s_store_sk#8, s_store_name#9] +Condition : ((isnotnull(s_store_name#9) AND (s_store_name#9 = ese)) AND isnotnull(s_store_sk#8)) (15) Project [codegen id : 2] -Output [1]: [s_store_sk#9] -Input [2]: [s_store_sk#9, s_store_name#10] +Output [1]: [s_store_sk#8] +Input [2]: [s_store_sk#8, s_store_name#9] (16) BroadcastExchange -Input [1]: [s_store_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] +Input [1]: [s_store_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#9] +Right keys [1]: [s_store_sk#8] Join condition: None (18) Project [codegen id : 4] Output [1]: [ss_hdemo_sk#2] -Input [3]: [ss_hdemo_sk#2, ss_store_sk#3, s_store_sk#9] +Input [3]: [ss_hdemo_sk#2, ss_store_sk#3, s_store_sk#8] (19) Scan parquet default.household_demographics -Output [3]: [hd_demo_sk#12, hd_dep_count#13, hd_vehicle_count#14] +Output [3]: [hd_demo_sk#10, hd_dep_count#11, hd_vehicle_count#12] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [Or(Or(And(EqualTo(hd_dep_count,4),LessThanOrEqual(hd_vehicle_count,6)),And(EqualTo(hd_dep_count,2),LessThanOrEqual(hd_vehicle_count,4))),And(EqualTo(hd_dep_count,0),LessThanOrEqual(hd_vehicle_count,2))), IsNotNull(hd_demo_sk)] ReadSchema: struct (20) ColumnarToRow [codegen id : 3] -Input [3]: [hd_demo_sk#12, hd_dep_count#13, hd_vehicle_count#14] +Input [3]: [hd_demo_sk#10, hd_dep_count#11, hd_vehicle_count#12] (21) Filter [codegen id : 3] -Input [3]: [hd_demo_sk#12, hd_dep_count#13, hd_vehicle_count#14] -Condition : (((((hd_dep_count#13 = 4) AND (hd_vehicle_count#14 <= 6)) OR ((hd_dep_count#13 = 2) AND (hd_vehicle_count#14 <= 4))) OR ((hd_dep_count#13 = 0) AND (hd_vehicle_count#14 <= 2))) AND isnotnull(hd_demo_sk#12)) +Input [3]: [hd_demo_sk#10, hd_dep_count#11, hd_vehicle_count#12] +Condition : (((((hd_dep_count#11 = 4) AND (hd_vehicle_count#12 <= 6)) OR ((hd_dep_count#11 = 2) AND (hd_vehicle_count#12 <= 4))) OR ((hd_dep_count#11 = 0) AND (hd_vehicle_count#12 <= 2))) AND isnotnull(hd_demo_sk#10)) (22) Project [codegen id : 3] -Output [1]: [hd_demo_sk#12] -Input [3]: [hd_demo_sk#12, hd_dep_count#13, hd_vehicle_count#14] +Output [1]: [hd_demo_sk#10] +Input [3]: [hd_demo_sk#10, hd_dep_count#11, hd_vehicle_count#12] (23) BroadcastExchange -Input [1]: [hd_demo_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Input [1]: [hd_demo_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#12] +Right keys [1]: [hd_demo_sk#10] Join condition: None (25) Project [codegen id : 4] Output: [] -Input [2]: [ss_hdemo_sk#2, hd_demo_sk#12] +Input [2]: [ss_hdemo_sk#2, hd_demo_sk#10] (26) HashAggregate [codegen id : 4] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#16] -Results [1]: [count#17] +Aggregate Attributes [1]: [count#13] +Results [1]: [count#14] (27) Exchange -Input [1]: [count#17] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#18] +Input [1]: [count#14] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 40] -Input [1]: [count#17] +Input [1]: [count#14] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#19] -Results [1]: [count(1)#19 AS h8_30_to_9#20] +Aggregate Attributes [1]: [count(1)#15] +Results [1]: [count(1)#15 AS h8_30_to_9#16] (29) Scan parquet default.store_sales -Output [4]: [ss_sold_time_sk#21, ss_hdemo_sk#22, ss_store_sk#23, ss_sold_date_sk#24] +Output [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 8] -Input [4]: [ss_sold_time_sk#21, ss_hdemo_sk#22, ss_store_sk#23, ss_sold_date_sk#24] +Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20] (31) Filter [codegen id : 8] -Input [4]: [ss_sold_time_sk#21, ss_hdemo_sk#22, ss_store_sk#23, ss_sold_date_sk#24] -Condition : ((isnotnull(ss_hdemo_sk#22) AND isnotnull(ss_sold_time_sk#21)) AND isnotnull(ss_store_sk#23)) +Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20] +Condition : ((isnotnull(ss_hdemo_sk#18) AND isnotnull(ss_sold_time_sk#17)) AND isnotnull(ss_store_sk#19)) (32) Project [codegen id : 8] -Output [3]: [ss_sold_time_sk#21, ss_hdemo_sk#22, ss_store_sk#23] -Input [4]: [ss_sold_time_sk#21, ss_hdemo_sk#22, ss_store_sk#23, ss_sold_date_sk#24] +Output [3]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19] +Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20] (33) Scan parquet default.time_dim -Output [3]: [t_time_sk#25, t_hour#26, t_minute#27] +Output [3]: [t_time_sk#21, t_hour#22, t_minute#23] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), LessThan(t_minute,30), IsNotNull(t_time_sk)] ReadSchema: struct (34) ColumnarToRow [codegen id : 5] -Input [3]: [t_time_sk#25, t_hour#26, t_minute#27] +Input [3]: [t_time_sk#21, t_hour#22, t_minute#23] (35) Filter [codegen id : 5] -Input [3]: [t_time_sk#25, t_hour#26, t_minute#27] -Condition : ((((isnotnull(t_hour#26) AND isnotnull(t_minute#27)) AND (t_hour#26 = 9)) AND (t_minute#27 < 30)) AND isnotnull(t_time_sk#25)) +Input [3]: [t_time_sk#21, t_hour#22, t_minute#23] +Condition : ((((isnotnull(t_hour#22) AND isnotnull(t_minute#23)) AND (t_hour#22 = 9)) AND (t_minute#23 < 30)) AND isnotnull(t_time_sk#21)) (36) Project [codegen id : 5] -Output [1]: [t_time_sk#25] -Input [3]: [t_time_sk#25, t_hour#26, t_minute#27] +Output [1]: [t_time_sk#21] +Input [3]: [t_time_sk#21, t_hour#22, t_minute#23] (37) BroadcastExchange -Input [1]: [t_time_sk#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#28] +Input [1]: [t_time_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (38) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_sold_time_sk#21] -Right keys [1]: [t_time_sk#25] +Left keys [1]: [ss_sold_time_sk#17] +Right keys [1]: [t_time_sk#21] Join condition: None (39) Project [codegen id : 8] -Output [2]: [ss_hdemo_sk#22, ss_store_sk#23] -Input [4]: [ss_sold_time_sk#21, ss_hdemo_sk#22, ss_store_sk#23, t_time_sk#25] +Output [2]: [ss_hdemo_sk#18, ss_store_sk#19] +Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, t_time_sk#21] (40) ReusedExchange [Reuses operator id: 16] -Output [1]: [s_store_sk#29] +Output [1]: [s_store_sk#24] (41) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_store_sk#23] -Right keys [1]: [s_store_sk#29] +Left keys [1]: [ss_store_sk#19] +Right keys [1]: [s_store_sk#24] Join condition: None (42) Project [codegen id : 8] -Output [1]: [ss_hdemo_sk#22] -Input [3]: [ss_hdemo_sk#22, ss_store_sk#23, s_store_sk#29] +Output [1]: [ss_hdemo_sk#18] +Input [3]: [ss_hdemo_sk#18, ss_store_sk#19, s_store_sk#24] (43) ReusedExchange [Reuses operator id: 23] -Output [1]: [hd_demo_sk#30] +Output [1]: [hd_demo_sk#25] (44) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_hdemo_sk#22] -Right keys [1]: [hd_demo_sk#30] +Left keys [1]: [ss_hdemo_sk#18] +Right keys [1]: [hd_demo_sk#25] Join condition: None (45) Project [codegen id : 8] Output: [] -Input [2]: [ss_hdemo_sk#22, hd_demo_sk#30] +Input [2]: [ss_hdemo_sk#18, hd_demo_sk#25] (46) HashAggregate [codegen id : 8] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#31] -Results [1]: [count#32] +Aggregate Attributes [1]: [count#26] +Results [1]: [count#27] (47) Exchange -Input [1]: [count#32] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#33] +Input [1]: [count#27] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] (48) HashAggregate [codegen id : 9] -Input [1]: [count#32] +Input [1]: [count#27] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#34] -Results [1]: [count(1)#34 AS h9_to_9_30#35] +Aggregate Attributes [1]: [count(1)#28] +Results [1]: [count(1)#28 AS h9_to_9_30#29] (49) BroadcastExchange -Input [1]: [h9_to_9_30#35] -Arguments: IdentityBroadcastMode, [id=#36] +Input [1]: [h9_to_9_30#29] +Arguments: IdentityBroadcastMode, [plan_id=7] (50) BroadcastNestedLoopJoin [codegen id : 40] Join condition: None (51) Scan parquet default.store_sales -Output [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] +Output [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (52) ColumnarToRow [codegen id : 13] -Input [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] +Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33] (53) Filter [codegen id : 13] -Input [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] -Condition : ((isnotnull(ss_hdemo_sk#38) AND isnotnull(ss_sold_time_sk#37)) AND isnotnull(ss_store_sk#39)) +Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33] +Condition : ((isnotnull(ss_hdemo_sk#31) AND isnotnull(ss_sold_time_sk#30)) AND isnotnull(ss_store_sk#32)) (54) Project [codegen id : 13] -Output [3]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39] -Input [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] +Output [3]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32] +Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33] (55) Scan parquet default.time_dim -Output [3]: [t_time_sk#41, t_hour#42, t_minute#43] +Output [3]: [t_time_sk#34, t_hour#35, t_minute#36] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] ReadSchema: struct (56) ColumnarToRow [codegen id : 10] -Input [3]: [t_time_sk#41, t_hour#42, t_minute#43] +Input [3]: [t_time_sk#34, t_hour#35, t_minute#36] (57) Filter [codegen id : 10] -Input [3]: [t_time_sk#41, t_hour#42, t_minute#43] -Condition : ((((isnotnull(t_hour#42) AND isnotnull(t_minute#43)) AND (t_hour#42 = 9)) AND (t_minute#43 >= 30)) AND isnotnull(t_time_sk#41)) +Input [3]: [t_time_sk#34, t_hour#35, t_minute#36] +Condition : ((((isnotnull(t_hour#35) AND isnotnull(t_minute#36)) AND (t_hour#35 = 9)) AND (t_minute#36 >= 30)) AND isnotnull(t_time_sk#34)) (58) Project [codegen id : 10] -Output [1]: [t_time_sk#41] -Input [3]: [t_time_sk#41, t_hour#42, t_minute#43] +Output [1]: [t_time_sk#34] +Input [3]: [t_time_sk#34, t_hour#35, t_minute#36] (59) BroadcastExchange -Input [1]: [t_time_sk#41] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#44] +Input [1]: [t_time_sk#34] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] (60) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ss_sold_time_sk#37] -Right keys [1]: [t_time_sk#41] +Left keys [1]: [ss_sold_time_sk#30] +Right keys [1]: [t_time_sk#34] Join condition: None (61) Project [codegen id : 13] -Output [2]: [ss_hdemo_sk#38, ss_store_sk#39] -Input [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, t_time_sk#41] +Output [2]: [ss_hdemo_sk#31, ss_store_sk#32] +Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, t_time_sk#34] (62) ReusedExchange [Reuses operator id: 16] -Output [1]: [s_store_sk#45] +Output [1]: [s_store_sk#37] (63) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ss_store_sk#39] -Right keys [1]: [s_store_sk#45] +Left keys [1]: [ss_store_sk#32] +Right keys [1]: [s_store_sk#37] Join condition: None (64) Project [codegen id : 13] -Output [1]: [ss_hdemo_sk#38] -Input [3]: [ss_hdemo_sk#38, ss_store_sk#39, s_store_sk#45] +Output [1]: [ss_hdemo_sk#31] +Input [3]: [ss_hdemo_sk#31, ss_store_sk#32, s_store_sk#37] (65) ReusedExchange [Reuses operator id: 23] -Output [1]: [hd_demo_sk#46] +Output [1]: [hd_demo_sk#38] (66) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ss_hdemo_sk#38] -Right keys [1]: [hd_demo_sk#46] +Left keys [1]: [ss_hdemo_sk#31] +Right keys [1]: [hd_demo_sk#38] Join condition: None (67) Project [codegen id : 13] Output: [] -Input [2]: [ss_hdemo_sk#38, hd_demo_sk#46] +Input [2]: [ss_hdemo_sk#31, hd_demo_sk#38] (68) HashAggregate [codegen id : 13] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#47] -Results [1]: [count#48] +Aggregate Attributes [1]: [count#39] +Results [1]: [count#40] (69) Exchange -Input [1]: [count#48] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#49] +Input [1]: [count#40] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9] (70) HashAggregate [codegen id : 14] -Input [1]: [count#48] +Input [1]: [count#40] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#50] -Results [1]: [count(1)#50 AS h9_30_to_10#51] +Aggregate Attributes [1]: [count(1)#41] +Results [1]: [count(1)#41 AS h9_30_to_10#42] (71) BroadcastExchange -Input [1]: [h9_30_to_10#51] -Arguments: IdentityBroadcastMode, [id=#52] +Input [1]: [h9_30_to_10#42] +Arguments: IdentityBroadcastMode, [plan_id=10] (72) BroadcastNestedLoopJoin [codegen id : 40] Join condition: None (73) Scan parquet default.store_sales -Output [4]: [ss_sold_time_sk#53, ss_hdemo_sk#54, ss_store_sk#55, ss_sold_date_sk#56] +Output [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (74) ColumnarToRow [codegen id : 18] -Input [4]: [ss_sold_time_sk#53, ss_hdemo_sk#54, ss_store_sk#55, ss_sold_date_sk#56] +Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46] (75) Filter [codegen id : 18] -Input [4]: [ss_sold_time_sk#53, ss_hdemo_sk#54, ss_store_sk#55, ss_sold_date_sk#56] -Condition : ((isnotnull(ss_hdemo_sk#54) AND isnotnull(ss_sold_time_sk#53)) AND isnotnull(ss_store_sk#55)) +Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46] +Condition : ((isnotnull(ss_hdemo_sk#44) AND isnotnull(ss_sold_time_sk#43)) AND isnotnull(ss_store_sk#45)) (76) Project [codegen id : 18] -Output [3]: [ss_sold_time_sk#53, ss_hdemo_sk#54, ss_store_sk#55] -Input [4]: [ss_sold_time_sk#53, ss_hdemo_sk#54, ss_store_sk#55, ss_sold_date_sk#56] +Output [3]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45] +Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46] (77) Scan parquet default.time_dim -Output [3]: [t_time_sk#57, t_hour#58, t_minute#59] +Output [3]: [t_time_sk#47, t_hour#48, t_minute#49] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), LessThan(t_minute,30), IsNotNull(t_time_sk)] ReadSchema: struct (78) ColumnarToRow [codegen id : 15] -Input [3]: [t_time_sk#57, t_hour#58, t_minute#59] +Input [3]: [t_time_sk#47, t_hour#48, t_minute#49] (79) Filter [codegen id : 15] -Input [3]: [t_time_sk#57, t_hour#58, t_minute#59] -Condition : ((((isnotnull(t_hour#58) AND isnotnull(t_minute#59)) AND (t_hour#58 = 10)) AND (t_minute#59 < 30)) AND isnotnull(t_time_sk#57)) +Input [3]: [t_time_sk#47, t_hour#48, t_minute#49] +Condition : ((((isnotnull(t_hour#48) AND isnotnull(t_minute#49)) AND (t_hour#48 = 10)) AND (t_minute#49 < 30)) AND isnotnull(t_time_sk#47)) (80) Project [codegen id : 15] -Output [1]: [t_time_sk#57] -Input [3]: [t_time_sk#57, t_hour#58, t_minute#59] +Output [1]: [t_time_sk#47] +Input [3]: [t_time_sk#47, t_hour#48, t_minute#49] (81) BroadcastExchange -Input [1]: [t_time_sk#57] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#60] +Input [1]: [t_time_sk#47] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11] (82) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ss_sold_time_sk#53] -Right keys [1]: [t_time_sk#57] +Left keys [1]: [ss_sold_time_sk#43] +Right keys [1]: [t_time_sk#47] Join condition: None (83) Project [codegen id : 18] -Output [2]: [ss_hdemo_sk#54, ss_store_sk#55] -Input [4]: [ss_sold_time_sk#53, ss_hdemo_sk#54, ss_store_sk#55, t_time_sk#57] +Output [2]: [ss_hdemo_sk#44, ss_store_sk#45] +Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, t_time_sk#47] (84) ReusedExchange [Reuses operator id: 16] -Output [1]: [s_store_sk#61] +Output [1]: [s_store_sk#50] (85) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ss_store_sk#55] -Right keys [1]: [s_store_sk#61] +Left keys [1]: [ss_store_sk#45] +Right keys [1]: [s_store_sk#50] Join condition: None (86) Project [codegen id : 18] -Output [1]: [ss_hdemo_sk#54] -Input [3]: [ss_hdemo_sk#54, ss_store_sk#55, s_store_sk#61] +Output [1]: [ss_hdemo_sk#44] +Input [3]: [ss_hdemo_sk#44, ss_store_sk#45, s_store_sk#50] (87) ReusedExchange [Reuses operator id: 23] -Output [1]: [hd_demo_sk#62] +Output [1]: [hd_demo_sk#51] (88) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ss_hdemo_sk#54] -Right keys [1]: [hd_demo_sk#62] +Left keys [1]: [ss_hdemo_sk#44] +Right keys [1]: [hd_demo_sk#51] Join condition: None (89) Project [codegen id : 18] Output: [] -Input [2]: [ss_hdemo_sk#54, hd_demo_sk#62] +Input [2]: [ss_hdemo_sk#44, hd_demo_sk#51] (90) HashAggregate [codegen id : 18] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#63] -Results [1]: [count#64] +Aggregate Attributes [1]: [count#52] +Results [1]: [count#53] (91) Exchange -Input [1]: [count#64] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#65] +Input [1]: [count#53] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12] (92) HashAggregate [codegen id : 19] -Input [1]: [count#64] +Input [1]: [count#53] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#66] -Results [1]: [count(1)#66 AS h10_to_10_30#67] +Aggregate Attributes [1]: [count(1)#54] +Results [1]: [count(1)#54 AS h10_to_10_30#55] (93) BroadcastExchange -Input [1]: [h10_to_10_30#67] -Arguments: IdentityBroadcastMode, [id=#68] +Input [1]: [h10_to_10_30#55] +Arguments: IdentityBroadcastMode, [plan_id=13] (94) BroadcastNestedLoopJoin [codegen id : 40] Join condition: None (95) Scan parquet default.store_sales -Output [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] +Output [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (96) ColumnarToRow [codegen id : 23] -Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] +Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59] (97) Filter [codegen id : 23] -Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] -Condition : ((isnotnull(ss_hdemo_sk#70) AND isnotnull(ss_sold_time_sk#69)) AND isnotnull(ss_store_sk#71)) +Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59] +Condition : ((isnotnull(ss_hdemo_sk#57) AND isnotnull(ss_sold_time_sk#56)) AND isnotnull(ss_store_sk#58)) (98) Project [codegen id : 23] -Output [3]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71] -Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] +Output [3]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58] +Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59] (99) Scan parquet default.time_dim -Output [3]: [t_time_sk#73, t_hour#74, t_minute#75] +Output [3]: [t_time_sk#60, t_hour#61, t_minute#62] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] ReadSchema: struct (100) ColumnarToRow [codegen id : 20] -Input [3]: [t_time_sk#73, t_hour#74, t_minute#75] +Input [3]: [t_time_sk#60, t_hour#61, t_minute#62] (101) Filter [codegen id : 20] -Input [3]: [t_time_sk#73, t_hour#74, t_minute#75] -Condition : ((((isnotnull(t_hour#74) AND isnotnull(t_minute#75)) AND (t_hour#74 = 10)) AND (t_minute#75 >= 30)) AND isnotnull(t_time_sk#73)) +Input [3]: [t_time_sk#60, t_hour#61, t_minute#62] +Condition : ((((isnotnull(t_hour#61) AND isnotnull(t_minute#62)) AND (t_hour#61 = 10)) AND (t_minute#62 >= 30)) AND isnotnull(t_time_sk#60)) (102) Project [codegen id : 20] -Output [1]: [t_time_sk#73] -Input [3]: [t_time_sk#73, t_hour#74, t_minute#75] +Output [1]: [t_time_sk#60] +Input [3]: [t_time_sk#60, t_hour#61, t_minute#62] (103) BroadcastExchange -Input [1]: [t_time_sk#73] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#76] +Input [1]: [t_time_sk#60] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14] (104) BroadcastHashJoin [codegen id : 23] -Left keys [1]: [ss_sold_time_sk#69] -Right keys [1]: [t_time_sk#73] +Left keys [1]: [ss_sold_time_sk#56] +Right keys [1]: [t_time_sk#60] Join condition: None (105) Project [codegen id : 23] -Output [2]: [ss_hdemo_sk#70, ss_store_sk#71] -Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, t_time_sk#73] +Output [2]: [ss_hdemo_sk#57, ss_store_sk#58] +Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, t_time_sk#60] (106) ReusedExchange [Reuses operator id: 16] -Output [1]: [s_store_sk#77] +Output [1]: [s_store_sk#63] (107) BroadcastHashJoin [codegen id : 23] -Left keys [1]: [ss_store_sk#71] -Right keys [1]: [s_store_sk#77] +Left keys [1]: [ss_store_sk#58] +Right keys [1]: [s_store_sk#63] Join condition: None (108) Project [codegen id : 23] -Output [1]: [ss_hdemo_sk#70] -Input [3]: [ss_hdemo_sk#70, ss_store_sk#71, s_store_sk#77] +Output [1]: [ss_hdemo_sk#57] +Input [3]: [ss_hdemo_sk#57, ss_store_sk#58, s_store_sk#63] (109) ReusedExchange [Reuses operator id: 23] -Output [1]: [hd_demo_sk#78] +Output [1]: [hd_demo_sk#64] (110) BroadcastHashJoin [codegen id : 23] -Left keys [1]: [ss_hdemo_sk#70] -Right keys [1]: [hd_demo_sk#78] +Left keys [1]: [ss_hdemo_sk#57] +Right keys [1]: [hd_demo_sk#64] Join condition: None (111) Project [codegen id : 23] Output: [] -Input [2]: [ss_hdemo_sk#70, hd_demo_sk#78] +Input [2]: [ss_hdemo_sk#57, hd_demo_sk#64] (112) HashAggregate [codegen id : 23] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#79] -Results [1]: [count#80] +Aggregate Attributes [1]: [count#65] +Results [1]: [count#66] (113) Exchange -Input [1]: [count#80] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#81] +Input [1]: [count#66] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15] (114) HashAggregate [codegen id : 24] -Input [1]: [count#80] +Input [1]: [count#66] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#82] -Results [1]: [count(1)#82 AS h10_30_to_11#83] +Aggregate Attributes [1]: [count(1)#67] +Results [1]: [count(1)#67 AS h10_30_to_11#68] (115) BroadcastExchange -Input [1]: [h10_30_to_11#83] -Arguments: IdentityBroadcastMode, [id=#84] +Input [1]: [h10_30_to_11#68] +Arguments: IdentityBroadcastMode, [plan_id=16] (116) BroadcastNestedLoopJoin [codegen id : 40] Join condition: None (117) Scan parquet default.store_sales -Output [4]: [ss_sold_time_sk#85, ss_hdemo_sk#86, ss_store_sk#87, ss_sold_date_sk#88] +Output [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (118) ColumnarToRow [codegen id : 28] -Input [4]: [ss_sold_time_sk#85, ss_hdemo_sk#86, ss_store_sk#87, ss_sold_date_sk#88] +Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] (119) Filter [codegen id : 28] -Input [4]: [ss_sold_time_sk#85, ss_hdemo_sk#86, ss_store_sk#87, ss_sold_date_sk#88] -Condition : ((isnotnull(ss_hdemo_sk#86) AND isnotnull(ss_sold_time_sk#85)) AND isnotnull(ss_store_sk#87)) +Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] +Condition : ((isnotnull(ss_hdemo_sk#70) AND isnotnull(ss_sold_time_sk#69)) AND isnotnull(ss_store_sk#71)) (120) Project [codegen id : 28] -Output [3]: [ss_sold_time_sk#85, ss_hdemo_sk#86, ss_store_sk#87] -Input [4]: [ss_sold_time_sk#85, ss_hdemo_sk#86, ss_store_sk#87, ss_sold_date_sk#88] +Output [3]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71] +Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] (121) Scan parquet default.time_dim -Output [3]: [t_time_sk#89, t_hour#90, t_minute#91] +Output [3]: [t_time_sk#73, t_hour#74, t_minute#75] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), LessThan(t_minute,30), IsNotNull(t_time_sk)] ReadSchema: struct (122) ColumnarToRow [codegen id : 25] -Input [3]: [t_time_sk#89, t_hour#90, t_minute#91] +Input [3]: [t_time_sk#73, t_hour#74, t_minute#75] (123) Filter [codegen id : 25] -Input [3]: [t_time_sk#89, t_hour#90, t_minute#91] -Condition : ((((isnotnull(t_hour#90) AND isnotnull(t_minute#91)) AND (t_hour#90 = 11)) AND (t_minute#91 < 30)) AND isnotnull(t_time_sk#89)) +Input [3]: [t_time_sk#73, t_hour#74, t_minute#75] +Condition : ((((isnotnull(t_hour#74) AND isnotnull(t_minute#75)) AND (t_hour#74 = 11)) AND (t_minute#75 < 30)) AND isnotnull(t_time_sk#73)) (124) Project [codegen id : 25] -Output [1]: [t_time_sk#89] -Input [3]: [t_time_sk#89, t_hour#90, t_minute#91] +Output [1]: [t_time_sk#73] +Input [3]: [t_time_sk#73, t_hour#74, t_minute#75] (125) BroadcastExchange -Input [1]: [t_time_sk#89] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#92] +Input [1]: [t_time_sk#73] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=17] (126) BroadcastHashJoin [codegen id : 28] -Left keys [1]: [ss_sold_time_sk#85] -Right keys [1]: [t_time_sk#89] +Left keys [1]: [ss_sold_time_sk#69] +Right keys [1]: [t_time_sk#73] Join condition: None (127) Project [codegen id : 28] -Output [2]: [ss_hdemo_sk#86, ss_store_sk#87] -Input [4]: [ss_sold_time_sk#85, ss_hdemo_sk#86, ss_store_sk#87, t_time_sk#89] +Output [2]: [ss_hdemo_sk#70, ss_store_sk#71] +Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, t_time_sk#73] (128) ReusedExchange [Reuses operator id: 16] -Output [1]: [s_store_sk#93] +Output [1]: [s_store_sk#76] (129) BroadcastHashJoin [codegen id : 28] -Left keys [1]: [ss_store_sk#87] -Right keys [1]: [s_store_sk#93] +Left keys [1]: [ss_store_sk#71] +Right keys [1]: [s_store_sk#76] Join condition: None (130) Project [codegen id : 28] -Output [1]: [ss_hdemo_sk#86] -Input [3]: [ss_hdemo_sk#86, ss_store_sk#87, s_store_sk#93] +Output [1]: [ss_hdemo_sk#70] +Input [3]: [ss_hdemo_sk#70, ss_store_sk#71, s_store_sk#76] (131) ReusedExchange [Reuses operator id: 23] -Output [1]: [hd_demo_sk#94] +Output [1]: [hd_demo_sk#77] (132) BroadcastHashJoin [codegen id : 28] -Left keys [1]: [ss_hdemo_sk#86] -Right keys [1]: [hd_demo_sk#94] +Left keys [1]: [ss_hdemo_sk#70] +Right keys [1]: [hd_demo_sk#77] Join condition: None (133) Project [codegen id : 28] Output: [] -Input [2]: [ss_hdemo_sk#86, hd_demo_sk#94] +Input [2]: [ss_hdemo_sk#70, hd_demo_sk#77] (134) HashAggregate [codegen id : 28] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#95] -Results [1]: [count#96] +Aggregate Attributes [1]: [count#78] +Results [1]: [count#79] (135) Exchange -Input [1]: [count#96] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#97] +Input [1]: [count#79] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=18] (136) HashAggregate [codegen id : 29] -Input [1]: [count#96] +Input [1]: [count#79] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#98] -Results [1]: [count(1)#98 AS h11_to_11_30#99] +Aggregate Attributes [1]: [count(1)#80] +Results [1]: [count(1)#80 AS h11_to_11_30#81] (137) BroadcastExchange -Input [1]: [h11_to_11_30#99] -Arguments: IdentityBroadcastMode, [id=#100] +Input [1]: [h11_to_11_30#81] +Arguments: IdentityBroadcastMode, [plan_id=19] (138) BroadcastNestedLoopJoin [codegen id : 40] Join condition: None (139) Scan parquet default.store_sales -Output [4]: [ss_sold_time_sk#101, ss_hdemo_sk#102, ss_store_sk#103, ss_sold_date_sk#104] +Output [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (140) ColumnarToRow [codegen id : 33] -Input [4]: [ss_sold_time_sk#101, ss_hdemo_sk#102, ss_store_sk#103, ss_sold_date_sk#104] +Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85] (141) Filter [codegen id : 33] -Input [4]: [ss_sold_time_sk#101, ss_hdemo_sk#102, ss_store_sk#103, ss_sold_date_sk#104] -Condition : ((isnotnull(ss_hdemo_sk#102) AND isnotnull(ss_sold_time_sk#101)) AND isnotnull(ss_store_sk#103)) +Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85] +Condition : ((isnotnull(ss_hdemo_sk#83) AND isnotnull(ss_sold_time_sk#82)) AND isnotnull(ss_store_sk#84)) (142) Project [codegen id : 33] -Output [3]: [ss_sold_time_sk#101, ss_hdemo_sk#102, ss_store_sk#103] -Input [4]: [ss_sold_time_sk#101, ss_hdemo_sk#102, ss_store_sk#103, ss_sold_date_sk#104] +Output [3]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84] +Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85] (143) Scan parquet default.time_dim -Output [3]: [t_time_sk#105, t_hour#106, t_minute#107] +Output [3]: [t_time_sk#86, t_hour#87, t_minute#88] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] ReadSchema: struct (144) ColumnarToRow [codegen id : 30] -Input [3]: [t_time_sk#105, t_hour#106, t_minute#107] +Input [3]: [t_time_sk#86, t_hour#87, t_minute#88] (145) Filter [codegen id : 30] -Input [3]: [t_time_sk#105, t_hour#106, t_minute#107] -Condition : ((((isnotnull(t_hour#106) AND isnotnull(t_minute#107)) AND (t_hour#106 = 11)) AND (t_minute#107 >= 30)) AND isnotnull(t_time_sk#105)) +Input [3]: [t_time_sk#86, t_hour#87, t_minute#88] +Condition : ((((isnotnull(t_hour#87) AND isnotnull(t_minute#88)) AND (t_hour#87 = 11)) AND (t_minute#88 >= 30)) AND isnotnull(t_time_sk#86)) (146) Project [codegen id : 30] -Output [1]: [t_time_sk#105] -Input [3]: [t_time_sk#105, t_hour#106, t_minute#107] +Output [1]: [t_time_sk#86] +Input [3]: [t_time_sk#86, t_hour#87, t_minute#88] (147) BroadcastExchange -Input [1]: [t_time_sk#105] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#108] +Input [1]: [t_time_sk#86] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=20] (148) BroadcastHashJoin [codegen id : 33] -Left keys [1]: [ss_sold_time_sk#101] -Right keys [1]: [t_time_sk#105] +Left keys [1]: [ss_sold_time_sk#82] +Right keys [1]: [t_time_sk#86] Join condition: None (149) Project [codegen id : 33] -Output [2]: [ss_hdemo_sk#102, ss_store_sk#103] -Input [4]: [ss_sold_time_sk#101, ss_hdemo_sk#102, ss_store_sk#103, t_time_sk#105] +Output [2]: [ss_hdemo_sk#83, ss_store_sk#84] +Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, t_time_sk#86] (150) ReusedExchange [Reuses operator id: 16] -Output [1]: [s_store_sk#109] +Output [1]: [s_store_sk#89] (151) BroadcastHashJoin [codegen id : 33] -Left keys [1]: [ss_store_sk#103] -Right keys [1]: [s_store_sk#109] +Left keys [1]: [ss_store_sk#84] +Right keys [1]: [s_store_sk#89] Join condition: None (152) Project [codegen id : 33] -Output [1]: [ss_hdemo_sk#102] -Input [3]: [ss_hdemo_sk#102, ss_store_sk#103, s_store_sk#109] +Output [1]: [ss_hdemo_sk#83] +Input [3]: [ss_hdemo_sk#83, ss_store_sk#84, s_store_sk#89] (153) ReusedExchange [Reuses operator id: 23] -Output [1]: [hd_demo_sk#110] +Output [1]: [hd_demo_sk#90] (154) BroadcastHashJoin [codegen id : 33] -Left keys [1]: [ss_hdemo_sk#102] -Right keys [1]: [hd_demo_sk#110] +Left keys [1]: [ss_hdemo_sk#83] +Right keys [1]: [hd_demo_sk#90] Join condition: None (155) Project [codegen id : 33] Output: [] -Input [2]: [ss_hdemo_sk#102, hd_demo_sk#110] +Input [2]: [ss_hdemo_sk#83, hd_demo_sk#90] (156) HashAggregate [codegen id : 33] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#111] -Results [1]: [count#112] +Aggregate Attributes [1]: [count#91] +Results [1]: [count#92] (157) Exchange -Input [1]: [count#112] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#113] +Input [1]: [count#92] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=21] (158) HashAggregate [codegen id : 34] -Input [1]: [count#112] +Input [1]: [count#92] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#114] -Results [1]: [count(1)#114 AS h11_30_to_12#115] +Aggregate Attributes [1]: [count(1)#93] +Results [1]: [count(1)#93 AS h11_30_to_12#94] (159) BroadcastExchange -Input [1]: [h11_30_to_12#115] -Arguments: IdentityBroadcastMode, [id=#116] +Input [1]: [h11_30_to_12#94] +Arguments: IdentityBroadcastMode, [plan_id=22] (160) BroadcastNestedLoopJoin [codegen id : 40] Join condition: None (161) Scan parquet default.store_sales -Output [4]: [ss_sold_time_sk#117, ss_hdemo_sk#118, ss_store_sk#119, ss_sold_date_sk#120] +Output [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (162) ColumnarToRow [codegen id : 38] -Input [4]: [ss_sold_time_sk#117, ss_hdemo_sk#118, ss_store_sk#119, ss_sold_date_sk#120] +Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98] (163) Filter [codegen id : 38] -Input [4]: [ss_sold_time_sk#117, ss_hdemo_sk#118, ss_store_sk#119, ss_sold_date_sk#120] -Condition : ((isnotnull(ss_hdemo_sk#118) AND isnotnull(ss_sold_time_sk#117)) AND isnotnull(ss_store_sk#119)) +Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98] +Condition : ((isnotnull(ss_hdemo_sk#96) AND isnotnull(ss_sold_time_sk#95)) AND isnotnull(ss_store_sk#97)) (164) Project [codegen id : 38] -Output [3]: [ss_sold_time_sk#117, ss_hdemo_sk#118, ss_store_sk#119] -Input [4]: [ss_sold_time_sk#117, ss_hdemo_sk#118, ss_store_sk#119, ss_sold_date_sk#120] +Output [3]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97] +Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98] (165) Scan parquet default.time_dim -Output [3]: [t_time_sk#121, t_hour#122, t_minute#123] +Output [3]: [t_time_sk#99, t_hour#100, t_minute#101] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,12), LessThan(t_minute,30), IsNotNull(t_time_sk)] ReadSchema: struct (166) ColumnarToRow [codegen id : 35] -Input [3]: [t_time_sk#121, t_hour#122, t_minute#123] +Input [3]: [t_time_sk#99, t_hour#100, t_minute#101] (167) Filter [codegen id : 35] -Input [3]: [t_time_sk#121, t_hour#122, t_minute#123] -Condition : ((((isnotnull(t_hour#122) AND isnotnull(t_minute#123)) AND (t_hour#122 = 12)) AND (t_minute#123 < 30)) AND isnotnull(t_time_sk#121)) +Input [3]: [t_time_sk#99, t_hour#100, t_minute#101] +Condition : ((((isnotnull(t_hour#100) AND isnotnull(t_minute#101)) AND (t_hour#100 = 12)) AND (t_minute#101 < 30)) AND isnotnull(t_time_sk#99)) (168) Project [codegen id : 35] -Output [1]: [t_time_sk#121] -Input [3]: [t_time_sk#121, t_hour#122, t_minute#123] +Output [1]: [t_time_sk#99] +Input [3]: [t_time_sk#99, t_hour#100, t_minute#101] (169) BroadcastExchange -Input [1]: [t_time_sk#121] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#124] +Input [1]: [t_time_sk#99] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=23] (170) BroadcastHashJoin [codegen id : 38] -Left keys [1]: [ss_sold_time_sk#117] -Right keys [1]: [t_time_sk#121] +Left keys [1]: [ss_sold_time_sk#95] +Right keys [1]: [t_time_sk#99] Join condition: None (171) Project [codegen id : 38] -Output [2]: [ss_hdemo_sk#118, ss_store_sk#119] -Input [4]: [ss_sold_time_sk#117, ss_hdemo_sk#118, ss_store_sk#119, t_time_sk#121] +Output [2]: [ss_hdemo_sk#96, ss_store_sk#97] +Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, t_time_sk#99] (172) ReusedExchange [Reuses operator id: 16] -Output [1]: [s_store_sk#125] +Output [1]: [s_store_sk#102] (173) BroadcastHashJoin [codegen id : 38] -Left keys [1]: [ss_store_sk#119] -Right keys [1]: [s_store_sk#125] +Left keys [1]: [ss_store_sk#97] +Right keys [1]: [s_store_sk#102] Join condition: None (174) Project [codegen id : 38] -Output [1]: [ss_hdemo_sk#118] -Input [3]: [ss_hdemo_sk#118, ss_store_sk#119, s_store_sk#125] +Output [1]: [ss_hdemo_sk#96] +Input [3]: [ss_hdemo_sk#96, ss_store_sk#97, s_store_sk#102] (175) ReusedExchange [Reuses operator id: 23] -Output [1]: [hd_demo_sk#126] +Output [1]: [hd_demo_sk#103] (176) BroadcastHashJoin [codegen id : 38] -Left keys [1]: [ss_hdemo_sk#118] -Right keys [1]: [hd_demo_sk#126] +Left keys [1]: [ss_hdemo_sk#96] +Right keys [1]: [hd_demo_sk#103] Join condition: None (177) Project [codegen id : 38] Output: [] -Input [2]: [ss_hdemo_sk#118, hd_demo_sk#126] +Input [2]: [ss_hdemo_sk#96, hd_demo_sk#103] (178) HashAggregate [codegen id : 38] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#127] -Results [1]: [count#128] +Aggregate Attributes [1]: [count#104] +Results [1]: [count#105] (179) Exchange -Input [1]: [count#128] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#129] +Input [1]: [count#105] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=24] (180) HashAggregate [codegen id : 39] -Input [1]: [count#128] +Input [1]: [count#105] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#130] -Results [1]: [count(1)#130 AS h12_to_12_30#131] +Aggregate Attributes [1]: [count(1)#106] +Results [1]: [count(1)#106 AS h12_to_12_30#107] (181) BroadcastExchange -Input [1]: [h12_to_12_30#131] -Arguments: IdentityBroadcastMode, [id=#132] +Input [1]: [h12_to_12_30#107] +Arguments: IdentityBroadcastMode, [plan_id=25] (182) BroadcastNestedLoopJoin [codegen id : 40] Join condition: None diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt index 9f56c71154a66..9c5bd50ccb8c4 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt @@ -221,7 +221,7 @@ Input [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7] (9) BroadcastExchange Input [1]: [hd_demo_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (10) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] @@ -233,767 +233,767 @@ Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#5] (12) Scan parquet default.time_dim -Output [3]: [t_time_sk#9, t_hour#10, t_minute#11] +Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,8), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] ReadSchema: struct (13) ColumnarToRow [codegen id : 2] -Input [3]: [t_time_sk#9, t_hour#10, t_minute#11] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] (14) Filter [codegen id : 2] -Input [3]: [t_time_sk#9, t_hour#10, t_minute#11] -Condition : ((((isnotnull(t_hour#10) AND isnotnull(t_minute#11)) AND (t_hour#10 = 8)) AND (t_minute#11 >= 30)) AND isnotnull(t_time_sk#9)) +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 8)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) (15) Project [codegen id : 2] -Output [1]: [t_time_sk#9] -Input [3]: [t_time_sk#9, t_hour#10, t_minute#11] +Output [1]: [t_time_sk#8] +Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] (16) BroadcastExchange -Input [1]: [t_time_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +Input [1]: [t_time_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_sold_time_sk#1] -Right keys [1]: [t_time_sk#9] +Right keys [1]: [t_time_sk#8] Join condition: None (18) Project [codegen id : 4] Output [1]: [ss_store_sk#3] -Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#9] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] (19) Scan parquet default.store -Output [2]: [s_store_sk#13, s_store_name#14] +Output [2]: [s_store_sk#11, s_store_name#12] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] ReadSchema: struct (20) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#13, s_store_name#14] +Input [2]: [s_store_sk#11, s_store_name#12] (21) Filter [codegen id : 3] -Input [2]: [s_store_sk#13, s_store_name#14] -Condition : ((isnotnull(s_store_name#14) AND (s_store_name#14 = ese)) AND isnotnull(s_store_sk#13)) +Input [2]: [s_store_sk#11, s_store_name#12] +Condition : ((isnotnull(s_store_name#12) AND (s_store_name#12 = ese)) AND isnotnull(s_store_sk#11)) (22) Project [codegen id : 3] -Output [1]: [s_store_sk#13] -Input [2]: [s_store_sk#13, s_store_name#14] +Output [1]: [s_store_sk#11] +Input [2]: [s_store_sk#11, s_store_name#12] (23) BroadcastExchange -Input [1]: [s_store_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Input [1]: [s_store_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#13] +Right keys [1]: [s_store_sk#11] Join condition: None (25) Project [codegen id : 4] Output: [] -Input [2]: [ss_store_sk#3, s_store_sk#13] +Input [2]: [ss_store_sk#3, s_store_sk#11] (26) HashAggregate [codegen id : 4] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#16] -Results [1]: [count#17] +Aggregate Attributes [1]: [count#13] +Results [1]: [count#14] (27) Exchange -Input [1]: [count#17] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#18] +Input [1]: [count#14] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 40] -Input [1]: [count#17] +Input [1]: [count#14] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#19] -Results [1]: [count(1)#19 AS h8_30_to_9#20] +Aggregate Attributes [1]: [count(1)#15] +Results [1]: [count(1)#15 AS h8_30_to_9#16] (29) Scan parquet default.store_sales -Output [4]: [ss_sold_time_sk#21, ss_hdemo_sk#22, ss_store_sk#23, ss_sold_date_sk#24] +Output [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 8] -Input [4]: [ss_sold_time_sk#21, ss_hdemo_sk#22, ss_store_sk#23, ss_sold_date_sk#24] +Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20] (31) Filter [codegen id : 8] -Input [4]: [ss_sold_time_sk#21, ss_hdemo_sk#22, ss_store_sk#23, ss_sold_date_sk#24] -Condition : ((isnotnull(ss_hdemo_sk#22) AND isnotnull(ss_sold_time_sk#21)) AND isnotnull(ss_store_sk#23)) +Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20] +Condition : ((isnotnull(ss_hdemo_sk#18) AND isnotnull(ss_sold_time_sk#17)) AND isnotnull(ss_store_sk#19)) (32) Project [codegen id : 8] -Output [3]: [ss_sold_time_sk#21, ss_hdemo_sk#22, ss_store_sk#23] -Input [4]: [ss_sold_time_sk#21, ss_hdemo_sk#22, ss_store_sk#23, ss_sold_date_sk#24] +Output [3]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19] +Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20] (33) ReusedExchange [Reuses operator id: 9] -Output [1]: [hd_demo_sk#25] +Output [1]: [hd_demo_sk#21] (34) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_hdemo_sk#22] -Right keys [1]: [hd_demo_sk#25] +Left keys [1]: [ss_hdemo_sk#18] +Right keys [1]: [hd_demo_sk#21] Join condition: None (35) Project [codegen id : 8] -Output [2]: [ss_sold_time_sk#21, ss_store_sk#23] -Input [4]: [ss_sold_time_sk#21, ss_hdemo_sk#22, ss_store_sk#23, hd_demo_sk#25] +Output [2]: [ss_sold_time_sk#17, ss_store_sk#19] +Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, hd_demo_sk#21] (36) Scan parquet default.time_dim -Output [3]: [t_time_sk#26, t_hour#27, t_minute#28] +Output [3]: [t_time_sk#22, t_hour#23, t_minute#24] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), LessThan(t_minute,30), IsNotNull(t_time_sk)] ReadSchema: struct (37) ColumnarToRow [codegen id : 6] -Input [3]: [t_time_sk#26, t_hour#27, t_minute#28] +Input [3]: [t_time_sk#22, t_hour#23, t_minute#24] (38) Filter [codegen id : 6] -Input [3]: [t_time_sk#26, t_hour#27, t_minute#28] -Condition : ((((isnotnull(t_hour#27) AND isnotnull(t_minute#28)) AND (t_hour#27 = 9)) AND (t_minute#28 < 30)) AND isnotnull(t_time_sk#26)) +Input [3]: [t_time_sk#22, t_hour#23, t_minute#24] +Condition : ((((isnotnull(t_hour#23) AND isnotnull(t_minute#24)) AND (t_hour#23 = 9)) AND (t_minute#24 < 30)) AND isnotnull(t_time_sk#22)) (39) Project [codegen id : 6] -Output [1]: [t_time_sk#26] -Input [3]: [t_time_sk#26, t_hour#27, t_minute#28] +Output [1]: [t_time_sk#22] +Input [3]: [t_time_sk#22, t_hour#23, t_minute#24] (40) BroadcastExchange -Input [1]: [t_time_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29] +Input [1]: [t_time_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (41) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_sold_time_sk#21] -Right keys [1]: [t_time_sk#26] +Left keys [1]: [ss_sold_time_sk#17] +Right keys [1]: [t_time_sk#22] Join condition: None (42) Project [codegen id : 8] -Output [1]: [ss_store_sk#23] -Input [3]: [ss_sold_time_sk#21, ss_store_sk#23, t_time_sk#26] +Output [1]: [ss_store_sk#19] +Input [3]: [ss_sold_time_sk#17, ss_store_sk#19, t_time_sk#22] (43) ReusedExchange [Reuses operator id: 23] -Output [1]: [s_store_sk#30] +Output [1]: [s_store_sk#25] (44) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ss_store_sk#23] -Right keys [1]: [s_store_sk#30] +Left keys [1]: [ss_store_sk#19] +Right keys [1]: [s_store_sk#25] Join condition: None (45) Project [codegen id : 8] Output: [] -Input [2]: [ss_store_sk#23, s_store_sk#30] +Input [2]: [ss_store_sk#19, s_store_sk#25] (46) HashAggregate [codegen id : 8] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#31] -Results [1]: [count#32] +Aggregate Attributes [1]: [count#26] +Results [1]: [count#27] (47) Exchange -Input [1]: [count#32] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#33] +Input [1]: [count#27] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] (48) HashAggregate [codegen id : 9] -Input [1]: [count#32] +Input [1]: [count#27] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#34] -Results [1]: [count(1)#34 AS h9_to_9_30#35] +Aggregate Attributes [1]: [count(1)#28] +Results [1]: [count(1)#28 AS h9_to_9_30#29] (49) BroadcastExchange -Input [1]: [h9_to_9_30#35] -Arguments: IdentityBroadcastMode, [id=#36] +Input [1]: [h9_to_9_30#29] +Arguments: IdentityBroadcastMode, [plan_id=7] (50) BroadcastNestedLoopJoin [codegen id : 40] Join condition: None (51) Scan parquet default.store_sales -Output [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] +Output [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (52) ColumnarToRow [codegen id : 13] -Input [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] +Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33] (53) Filter [codegen id : 13] -Input [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] -Condition : ((isnotnull(ss_hdemo_sk#38) AND isnotnull(ss_sold_time_sk#37)) AND isnotnull(ss_store_sk#39)) +Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33] +Condition : ((isnotnull(ss_hdemo_sk#31) AND isnotnull(ss_sold_time_sk#30)) AND isnotnull(ss_store_sk#32)) (54) Project [codegen id : 13] -Output [3]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39] -Input [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, ss_sold_date_sk#40] +Output [3]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32] +Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33] (55) ReusedExchange [Reuses operator id: 9] -Output [1]: [hd_demo_sk#41] +Output [1]: [hd_demo_sk#34] (56) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ss_hdemo_sk#38] -Right keys [1]: [hd_demo_sk#41] +Left keys [1]: [ss_hdemo_sk#31] +Right keys [1]: [hd_demo_sk#34] Join condition: None (57) Project [codegen id : 13] -Output [2]: [ss_sold_time_sk#37, ss_store_sk#39] -Input [4]: [ss_sold_time_sk#37, ss_hdemo_sk#38, ss_store_sk#39, hd_demo_sk#41] +Output [2]: [ss_sold_time_sk#30, ss_store_sk#32] +Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, hd_demo_sk#34] (58) Scan parquet default.time_dim -Output [3]: [t_time_sk#42, t_hour#43, t_minute#44] +Output [3]: [t_time_sk#35, t_hour#36, t_minute#37] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,9), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] ReadSchema: struct (59) ColumnarToRow [codegen id : 11] -Input [3]: [t_time_sk#42, t_hour#43, t_minute#44] +Input [3]: [t_time_sk#35, t_hour#36, t_minute#37] (60) Filter [codegen id : 11] -Input [3]: [t_time_sk#42, t_hour#43, t_minute#44] -Condition : ((((isnotnull(t_hour#43) AND isnotnull(t_minute#44)) AND (t_hour#43 = 9)) AND (t_minute#44 >= 30)) AND isnotnull(t_time_sk#42)) +Input [3]: [t_time_sk#35, t_hour#36, t_minute#37] +Condition : ((((isnotnull(t_hour#36) AND isnotnull(t_minute#37)) AND (t_hour#36 = 9)) AND (t_minute#37 >= 30)) AND isnotnull(t_time_sk#35)) (61) Project [codegen id : 11] -Output [1]: [t_time_sk#42] -Input [3]: [t_time_sk#42, t_hour#43, t_minute#44] +Output [1]: [t_time_sk#35] +Input [3]: [t_time_sk#35, t_hour#36, t_minute#37] (62) BroadcastExchange -Input [1]: [t_time_sk#42] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#45] +Input [1]: [t_time_sk#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] (63) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ss_sold_time_sk#37] -Right keys [1]: [t_time_sk#42] +Left keys [1]: [ss_sold_time_sk#30] +Right keys [1]: [t_time_sk#35] Join condition: None (64) Project [codegen id : 13] -Output [1]: [ss_store_sk#39] -Input [3]: [ss_sold_time_sk#37, ss_store_sk#39, t_time_sk#42] +Output [1]: [ss_store_sk#32] +Input [3]: [ss_sold_time_sk#30, ss_store_sk#32, t_time_sk#35] (65) ReusedExchange [Reuses operator id: 23] -Output [1]: [s_store_sk#46] +Output [1]: [s_store_sk#38] (66) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ss_store_sk#39] -Right keys [1]: [s_store_sk#46] +Left keys [1]: [ss_store_sk#32] +Right keys [1]: [s_store_sk#38] Join condition: None (67) Project [codegen id : 13] Output: [] -Input [2]: [ss_store_sk#39, s_store_sk#46] +Input [2]: [ss_store_sk#32, s_store_sk#38] (68) HashAggregate [codegen id : 13] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#47] -Results [1]: [count#48] +Aggregate Attributes [1]: [count#39] +Results [1]: [count#40] (69) Exchange -Input [1]: [count#48] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#49] +Input [1]: [count#40] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9] (70) HashAggregate [codegen id : 14] -Input [1]: [count#48] +Input [1]: [count#40] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#50] -Results [1]: [count(1)#50 AS h9_30_to_10#51] +Aggregate Attributes [1]: [count(1)#41] +Results [1]: [count(1)#41 AS h9_30_to_10#42] (71) BroadcastExchange -Input [1]: [h9_30_to_10#51] -Arguments: IdentityBroadcastMode, [id=#52] +Input [1]: [h9_30_to_10#42] +Arguments: IdentityBroadcastMode, [plan_id=10] (72) BroadcastNestedLoopJoin [codegen id : 40] Join condition: None (73) Scan parquet default.store_sales -Output [4]: [ss_sold_time_sk#53, ss_hdemo_sk#54, ss_store_sk#55, ss_sold_date_sk#56] +Output [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (74) ColumnarToRow [codegen id : 18] -Input [4]: [ss_sold_time_sk#53, ss_hdemo_sk#54, ss_store_sk#55, ss_sold_date_sk#56] +Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46] (75) Filter [codegen id : 18] -Input [4]: [ss_sold_time_sk#53, ss_hdemo_sk#54, ss_store_sk#55, ss_sold_date_sk#56] -Condition : ((isnotnull(ss_hdemo_sk#54) AND isnotnull(ss_sold_time_sk#53)) AND isnotnull(ss_store_sk#55)) +Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46] +Condition : ((isnotnull(ss_hdemo_sk#44) AND isnotnull(ss_sold_time_sk#43)) AND isnotnull(ss_store_sk#45)) (76) Project [codegen id : 18] -Output [3]: [ss_sold_time_sk#53, ss_hdemo_sk#54, ss_store_sk#55] -Input [4]: [ss_sold_time_sk#53, ss_hdemo_sk#54, ss_store_sk#55, ss_sold_date_sk#56] +Output [3]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45] +Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46] (77) ReusedExchange [Reuses operator id: 9] -Output [1]: [hd_demo_sk#57] +Output [1]: [hd_demo_sk#47] (78) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ss_hdemo_sk#54] -Right keys [1]: [hd_demo_sk#57] +Left keys [1]: [ss_hdemo_sk#44] +Right keys [1]: [hd_demo_sk#47] Join condition: None (79) Project [codegen id : 18] -Output [2]: [ss_sold_time_sk#53, ss_store_sk#55] -Input [4]: [ss_sold_time_sk#53, ss_hdemo_sk#54, ss_store_sk#55, hd_demo_sk#57] +Output [2]: [ss_sold_time_sk#43, ss_store_sk#45] +Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, hd_demo_sk#47] (80) Scan parquet default.time_dim -Output [3]: [t_time_sk#58, t_hour#59, t_minute#60] +Output [3]: [t_time_sk#48, t_hour#49, t_minute#50] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), LessThan(t_minute,30), IsNotNull(t_time_sk)] ReadSchema: struct (81) ColumnarToRow [codegen id : 16] -Input [3]: [t_time_sk#58, t_hour#59, t_minute#60] +Input [3]: [t_time_sk#48, t_hour#49, t_minute#50] (82) Filter [codegen id : 16] -Input [3]: [t_time_sk#58, t_hour#59, t_minute#60] -Condition : ((((isnotnull(t_hour#59) AND isnotnull(t_minute#60)) AND (t_hour#59 = 10)) AND (t_minute#60 < 30)) AND isnotnull(t_time_sk#58)) +Input [3]: [t_time_sk#48, t_hour#49, t_minute#50] +Condition : ((((isnotnull(t_hour#49) AND isnotnull(t_minute#50)) AND (t_hour#49 = 10)) AND (t_minute#50 < 30)) AND isnotnull(t_time_sk#48)) (83) Project [codegen id : 16] -Output [1]: [t_time_sk#58] -Input [3]: [t_time_sk#58, t_hour#59, t_minute#60] +Output [1]: [t_time_sk#48] +Input [3]: [t_time_sk#48, t_hour#49, t_minute#50] (84) BroadcastExchange -Input [1]: [t_time_sk#58] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#61] +Input [1]: [t_time_sk#48] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11] (85) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ss_sold_time_sk#53] -Right keys [1]: [t_time_sk#58] +Left keys [1]: [ss_sold_time_sk#43] +Right keys [1]: [t_time_sk#48] Join condition: None (86) Project [codegen id : 18] -Output [1]: [ss_store_sk#55] -Input [3]: [ss_sold_time_sk#53, ss_store_sk#55, t_time_sk#58] +Output [1]: [ss_store_sk#45] +Input [3]: [ss_sold_time_sk#43, ss_store_sk#45, t_time_sk#48] (87) ReusedExchange [Reuses operator id: 23] -Output [1]: [s_store_sk#62] +Output [1]: [s_store_sk#51] (88) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [ss_store_sk#55] -Right keys [1]: [s_store_sk#62] +Left keys [1]: [ss_store_sk#45] +Right keys [1]: [s_store_sk#51] Join condition: None (89) Project [codegen id : 18] Output: [] -Input [2]: [ss_store_sk#55, s_store_sk#62] +Input [2]: [ss_store_sk#45, s_store_sk#51] (90) HashAggregate [codegen id : 18] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#63] -Results [1]: [count#64] +Aggregate Attributes [1]: [count#52] +Results [1]: [count#53] (91) Exchange -Input [1]: [count#64] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#65] +Input [1]: [count#53] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12] (92) HashAggregate [codegen id : 19] -Input [1]: [count#64] +Input [1]: [count#53] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#66] -Results [1]: [count(1)#66 AS h10_to_10_30#67] +Aggregate Attributes [1]: [count(1)#54] +Results [1]: [count(1)#54 AS h10_to_10_30#55] (93) BroadcastExchange -Input [1]: [h10_to_10_30#67] -Arguments: IdentityBroadcastMode, [id=#68] +Input [1]: [h10_to_10_30#55] +Arguments: IdentityBroadcastMode, [plan_id=13] (94) BroadcastNestedLoopJoin [codegen id : 40] Join condition: None (95) Scan parquet default.store_sales -Output [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] +Output [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (96) ColumnarToRow [codegen id : 23] -Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] +Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59] (97) Filter [codegen id : 23] -Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] -Condition : ((isnotnull(ss_hdemo_sk#70) AND isnotnull(ss_sold_time_sk#69)) AND isnotnull(ss_store_sk#71)) +Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59] +Condition : ((isnotnull(ss_hdemo_sk#57) AND isnotnull(ss_sold_time_sk#56)) AND isnotnull(ss_store_sk#58)) (98) Project [codegen id : 23] -Output [3]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71] -Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] +Output [3]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58] +Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59] (99) ReusedExchange [Reuses operator id: 9] -Output [1]: [hd_demo_sk#73] +Output [1]: [hd_demo_sk#60] (100) BroadcastHashJoin [codegen id : 23] -Left keys [1]: [ss_hdemo_sk#70] -Right keys [1]: [hd_demo_sk#73] +Left keys [1]: [ss_hdemo_sk#57] +Right keys [1]: [hd_demo_sk#60] Join condition: None (101) Project [codegen id : 23] -Output [2]: [ss_sold_time_sk#69, ss_store_sk#71] -Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, hd_demo_sk#73] +Output [2]: [ss_sold_time_sk#56, ss_store_sk#58] +Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, hd_demo_sk#60] (102) Scan parquet default.time_dim -Output [3]: [t_time_sk#74, t_hour#75, t_minute#76] +Output [3]: [t_time_sk#61, t_hour#62, t_minute#63] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,10), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] ReadSchema: struct (103) ColumnarToRow [codegen id : 21] -Input [3]: [t_time_sk#74, t_hour#75, t_minute#76] +Input [3]: [t_time_sk#61, t_hour#62, t_minute#63] (104) Filter [codegen id : 21] -Input [3]: [t_time_sk#74, t_hour#75, t_minute#76] -Condition : ((((isnotnull(t_hour#75) AND isnotnull(t_minute#76)) AND (t_hour#75 = 10)) AND (t_minute#76 >= 30)) AND isnotnull(t_time_sk#74)) +Input [3]: [t_time_sk#61, t_hour#62, t_minute#63] +Condition : ((((isnotnull(t_hour#62) AND isnotnull(t_minute#63)) AND (t_hour#62 = 10)) AND (t_minute#63 >= 30)) AND isnotnull(t_time_sk#61)) (105) Project [codegen id : 21] -Output [1]: [t_time_sk#74] -Input [3]: [t_time_sk#74, t_hour#75, t_minute#76] +Output [1]: [t_time_sk#61] +Input [3]: [t_time_sk#61, t_hour#62, t_minute#63] (106) BroadcastExchange -Input [1]: [t_time_sk#74] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#77] +Input [1]: [t_time_sk#61] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14] (107) BroadcastHashJoin [codegen id : 23] -Left keys [1]: [ss_sold_time_sk#69] -Right keys [1]: [t_time_sk#74] +Left keys [1]: [ss_sold_time_sk#56] +Right keys [1]: [t_time_sk#61] Join condition: None (108) Project [codegen id : 23] -Output [1]: [ss_store_sk#71] -Input [3]: [ss_sold_time_sk#69, ss_store_sk#71, t_time_sk#74] +Output [1]: [ss_store_sk#58] +Input [3]: [ss_sold_time_sk#56, ss_store_sk#58, t_time_sk#61] (109) ReusedExchange [Reuses operator id: 23] -Output [1]: [s_store_sk#78] +Output [1]: [s_store_sk#64] (110) BroadcastHashJoin [codegen id : 23] -Left keys [1]: [ss_store_sk#71] -Right keys [1]: [s_store_sk#78] +Left keys [1]: [ss_store_sk#58] +Right keys [1]: [s_store_sk#64] Join condition: None (111) Project [codegen id : 23] Output: [] -Input [2]: [ss_store_sk#71, s_store_sk#78] +Input [2]: [ss_store_sk#58, s_store_sk#64] (112) HashAggregate [codegen id : 23] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#79] -Results [1]: [count#80] +Aggregate Attributes [1]: [count#65] +Results [1]: [count#66] (113) Exchange -Input [1]: [count#80] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#81] +Input [1]: [count#66] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15] (114) HashAggregate [codegen id : 24] -Input [1]: [count#80] +Input [1]: [count#66] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#82] -Results [1]: [count(1)#82 AS h10_30_to_11#83] +Aggregate Attributes [1]: [count(1)#67] +Results [1]: [count(1)#67 AS h10_30_to_11#68] (115) BroadcastExchange -Input [1]: [h10_30_to_11#83] -Arguments: IdentityBroadcastMode, [id=#84] +Input [1]: [h10_30_to_11#68] +Arguments: IdentityBroadcastMode, [plan_id=16] (116) BroadcastNestedLoopJoin [codegen id : 40] Join condition: None (117) Scan parquet default.store_sales -Output [4]: [ss_sold_time_sk#85, ss_hdemo_sk#86, ss_store_sk#87, ss_sold_date_sk#88] +Output [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (118) ColumnarToRow [codegen id : 28] -Input [4]: [ss_sold_time_sk#85, ss_hdemo_sk#86, ss_store_sk#87, ss_sold_date_sk#88] +Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] (119) Filter [codegen id : 28] -Input [4]: [ss_sold_time_sk#85, ss_hdemo_sk#86, ss_store_sk#87, ss_sold_date_sk#88] -Condition : ((isnotnull(ss_hdemo_sk#86) AND isnotnull(ss_sold_time_sk#85)) AND isnotnull(ss_store_sk#87)) +Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] +Condition : ((isnotnull(ss_hdemo_sk#70) AND isnotnull(ss_sold_time_sk#69)) AND isnotnull(ss_store_sk#71)) (120) Project [codegen id : 28] -Output [3]: [ss_sold_time_sk#85, ss_hdemo_sk#86, ss_store_sk#87] -Input [4]: [ss_sold_time_sk#85, ss_hdemo_sk#86, ss_store_sk#87, ss_sold_date_sk#88] +Output [3]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71] +Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72] (121) ReusedExchange [Reuses operator id: 9] -Output [1]: [hd_demo_sk#89] +Output [1]: [hd_demo_sk#73] (122) BroadcastHashJoin [codegen id : 28] -Left keys [1]: [ss_hdemo_sk#86] -Right keys [1]: [hd_demo_sk#89] +Left keys [1]: [ss_hdemo_sk#70] +Right keys [1]: [hd_demo_sk#73] Join condition: None (123) Project [codegen id : 28] -Output [2]: [ss_sold_time_sk#85, ss_store_sk#87] -Input [4]: [ss_sold_time_sk#85, ss_hdemo_sk#86, ss_store_sk#87, hd_demo_sk#89] +Output [2]: [ss_sold_time_sk#69, ss_store_sk#71] +Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, hd_demo_sk#73] (124) Scan parquet default.time_dim -Output [3]: [t_time_sk#90, t_hour#91, t_minute#92] +Output [3]: [t_time_sk#74, t_hour#75, t_minute#76] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), LessThan(t_minute,30), IsNotNull(t_time_sk)] ReadSchema: struct (125) ColumnarToRow [codegen id : 26] -Input [3]: [t_time_sk#90, t_hour#91, t_minute#92] +Input [3]: [t_time_sk#74, t_hour#75, t_minute#76] (126) Filter [codegen id : 26] -Input [3]: [t_time_sk#90, t_hour#91, t_minute#92] -Condition : ((((isnotnull(t_hour#91) AND isnotnull(t_minute#92)) AND (t_hour#91 = 11)) AND (t_minute#92 < 30)) AND isnotnull(t_time_sk#90)) +Input [3]: [t_time_sk#74, t_hour#75, t_minute#76] +Condition : ((((isnotnull(t_hour#75) AND isnotnull(t_minute#76)) AND (t_hour#75 = 11)) AND (t_minute#76 < 30)) AND isnotnull(t_time_sk#74)) (127) Project [codegen id : 26] -Output [1]: [t_time_sk#90] -Input [3]: [t_time_sk#90, t_hour#91, t_minute#92] +Output [1]: [t_time_sk#74] +Input [3]: [t_time_sk#74, t_hour#75, t_minute#76] (128) BroadcastExchange -Input [1]: [t_time_sk#90] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#93] +Input [1]: [t_time_sk#74] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=17] (129) BroadcastHashJoin [codegen id : 28] -Left keys [1]: [ss_sold_time_sk#85] -Right keys [1]: [t_time_sk#90] +Left keys [1]: [ss_sold_time_sk#69] +Right keys [1]: [t_time_sk#74] Join condition: None (130) Project [codegen id : 28] -Output [1]: [ss_store_sk#87] -Input [3]: [ss_sold_time_sk#85, ss_store_sk#87, t_time_sk#90] +Output [1]: [ss_store_sk#71] +Input [3]: [ss_sold_time_sk#69, ss_store_sk#71, t_time_sk#74] (131) ReusedExchange [Reuses operator id: 23] -Output [1]: [s_store_sk#94] +Output [1]: [s_store_sk#77] (132) BroadcastHashJoin [codegen id : 28] -Left keys [1]: [ss_store_sk#87] -Right keys [1]: [s_store_sk#94] +Left keys [1]: [ss_store_sk#71] +Right keys [1]: [s_store_sk#77] Join condition: None (133) Project [codegen id : 28] Output: [] -Input [2]: [ss_store_sk#87, s_store_sk#94] +Input [2]: [ss_store_sk#71, s_store_sk#77] (134) HashAggregate [codegen id : 28] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#95] -Results [1]: [count#96] +Aggregate Attributes [1]: [count#78] +Results [1]: [count#79] (135) Exchange -Input [1]: [count#96] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#97] +Input [1]: [count#79] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=18] (136) HashAggregate [codegen id : 29] -Input [1]: [count#96] +Input [1]: [count#79] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#98] -Results [1]: [count(1)#98 AS h11_to_11_30#99] +Aggregate Attributes [1]: [count(1)#80] +Results [1]: [count(1)#80 AS h11_to_11_30#81] (137) BroadcastExchange -Input [1]: [h11_to_11_30#99] -Arguments: IdentityBroadcastMode, [id=#100] +Input [1]: [h11_to_11_30#81] +Arguments: IdentityBroadcastMode, [plan_id=19] (138) BroadcastNestedLoopJoin [codegen id : 40] Join condition: None (139) Scan parquet default.store_sales -Output [4]: [ss_sold_time_sk#101, ss_hdemo_sk#102, ss_store_sk#103, ss_sold_date_sk#104] +Output [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (140) ColumnarToRow [codegen id : 33] -Input [4]: [ss_sold_time_sk#101, ss_hdemo_sk#102, ss_store_sk#103, ss_sold_date_sk#104] +Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85] (141) Filter [codegen id : 33] -Input [4]: [ss_sold_time_sk#101, ss_hdemo_sk#102, ss_store_sk#103, ss_sold_date_sk#104] -Condition : ((isnotnull(ss_hdemo_sk#102) AND isnotnull(ss_sold_time_sk#101)) AND isnotnull(ss_store_sk#103)) +Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85] +Condition : ((isnotnull(ss_hdemo_sk#83) AND isnotnull(ss_sold_time_sk#82)) AND isnotnull(ss_store_sk#84)) (142) Project [codegen id : 33] -Output [3]: [ss_sold_time_sk#101, ss_hdemo_sk#102, ss_store_sk#103] -Input [4]: [ss_sold_time_sk#101, ss_hdemo_sk#102, ss_store_sk#103, ss_sold_date_sk#104] +Output [3]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84] +Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85] (143) ReusedExchange [Reuses operator id: 9] -Output [1]: [hd_demo_sk#105] +Output [1]: [hd_demo_sk#86] (144) BroadcastHashJoin [codegen id : 33] -Left keys [1]: [ss_hdemo_sk#102] -Right keys [1]: [hd_demo_sk#105] +Left keys [1]: [ss_hdemo_sk#83] +Right keys [1]: [hd_demo_sk#86] Join condition: None (145) Project [codegen id : 33] -Output [2]: [ss_sold_time_sk#101, ss_store_sk#103] -Input [4]: [ss_sold_time_sk#101, ss_hdemo_sk#102, ss_store_sk#103, hd_demo_sk#105] +Output [2]: [ss_sold_time_sk#82, ss_store_sk#84] +Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, hd_demo_sk#86] (146) Scan parquet default.time_dim -Output [3]: [t_time_sk#106, t_hour#107, t_minute#108] +Output [3]: [t_time_sk#87, t_hour#88, t_minute#89] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,11), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] ReadSchema: struct (147) ColumnarToRow [codegen id : 31] -Input [3]: [t_time_sk#106, t_hour#107, t_minute#108] +Input [3]: [t_time_sk#87, t_hour#88, t_minute#89] (148) Filter [codegen id : 31] -Input [3]: [t_time_sk#106, t_hour#107, t_minute#108] -Condition : ((((isnotnull(t_hour#107) AND isnotnull(t_minute#108)) AND (t_hour#107 = 11)) AND (t_minute#108 >= 30)) AND isnotnull(t_time_sk#106)) +Input [3]: [t_time_sk#87, t_hour#88, t_minute#89] +Condition : ((((isnotnull(t_hour#88) AND isnotnull(t_minute#89)) AND (t_hour#88 = 11)) AND (t_minute#89 >= 30)) AND isnotnull(t_time_sk#87)) (149) Project [codegen id : 31] -Output [1]: [t_time_sk#106] -Input [3]: [t_time_sk#106, t_hour#107, t_minute#108] +Output [1]: [t_time_sk#87] +Input [3]: [t_time_sk#87, t_hour#88, t_minute#89] (150) BroadcastExchange -Input [1]: [t_time_sk#106] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#109] +Input [1]: [t_time_sk#87] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=20] (151) BroadcastHashJoin [codegen id : 33] -Left keys [1]: [ss_sold_time_sk#101] -Right keys [1]: [t_time_sk#106] +Left keys [1]: [ss_sold_time_sk#82] +Right keys [1]: [t_time_sk#87] Join condition: None (152) Project [codegen id : 33] -Output [1]: [ss_store_sk#103] -Input [3]: [ss_sold_time_sk#101, ss_store_sk#103, t_time_sk#106] +Output [1]: [ss_store_sk#84] +Input [3]: [ss_sold_time_sk#82, ss_store_sk#84, t_time_sk#87] (153) ReusedExchange [Reuses operator id: 23] -Output [1]: [s_store_sk#110] +Output [1]: [s_store_sk#90] (154) BroadcastHashJoin [codegen id : 33] -Left keys [1]: [ss_store_sk#103] -Right keys [1]: [s_store_sk#110] +Left keys [1]: [ss_store_sk#84] +Right keys [1]: [s_store_sk#90] Join condition: None (155) Project [codegen id : 33] Output: [] -Input [2]: [ss_store_sk#103, s_store_sk#110] +Input [2]: [ss_store_sk#84, s_store_sk#90] (156) HashAggregate [codegen id : 33] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#111] -Results [1]: [count#112] +Aggregate Attributes [1]: [count#91] +Results [1]: [count#92] (157) Exchange -Input [1]: [count#112] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#113] +Input [1]: [count#92] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=21] (158) HashAggregate [codegen id : 34] -Input [1]: [count#112] +Input [1]: [count#92] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#114] -Results [1]: [count(1)#114 AS h11_30_to_12#115] +Aggregate Attributes [1]: [count(1)#93] +Results [1]: [count(1)#93 AS h11_30_to_12#94] (159) BroadcastExchange -Input [1]: [h11_30_to_12#115] -Arguments: IdentityBroadcastMode, [id=#116] +Input [1]: [h11_30_to_12#94] +Arguments: IdentityBroadcastMode, [plan_id=22] (160) BroadcastNestedLoopJoin [codegen id : 40] Join condition: None (161) Scan parquet default.store_sales -Output [4]: [ss_sold_time_sk#117, ss_hdemo_sk#118, ss_store_sk#119, ss_sold_date_sk#120] +Output [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_hdemo_sk), IsNotNull(ss_sold_time_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (162) ColumnarToRow [codegen id : 38] -Input [4]: [ss_sold_time_sk#117, ss_hdemo_sk#118, ss_store_sk#119, ss_sold_date_sk#120] +Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98] (163) Filter [codegen id : 38] -Input [4]: [ss_sold_time_sk#117, ss_hdemo_sk#118, ss_store_sk#119, ss_sold_date_sk#120] -Condition : ((isnotnull(ss_hdemo_sk#118) AND isnotnull(ss_sold_time_sk#117)) AND isnotnull(ss_store_sk#119)) +Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98] +Condition : ((isnotnull(ss_hdemo_sk#96) AND isnotnull(ss_sold_time_sk#95)) AND isnotnull(ss_store_sk#97)) (164) Project [codegen id : 38] -Output [3]: [ss_sold_time_sk#117, ss_hdemo_sk#118, ss_store_sk#119] -Input [4]: [ss_sold_time_sk#117, ss_hdemo_sk#118, ss_store_sk#119, ss_sold_date_sk#120] +Output [3]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97] +Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98] (165) ReusedExchange [Reuses operator id: 9] -Output [1]: [hd_demo_sk#121] +Output [1]: [hd_demo_sk#99] (166) BroadcastHashJoin [codegen id : 38] -Left keys [1]: [ss_hdemo_sk#118] -Right keys [1]: [hd_demo_sk#121] +Left keys [1]: [ss_hdemo_sk#96] +Right keys [1]: [hd_demo_sk#99] Join condition: None (167) Project [codegen id : 38] -Output [2]: [ss_sold_time_sk#117, ss_store_sk#119] -Input [4]: [ss_sold_time_sk#117, ss_hdemo_sk#118, ss_store_sk#119, hd_demo_sk#121] +Output [2]: [ss_sold_time_sk#95, ss_store_sk#97] +Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, hd_demo_sk#99] (168) Scan parquet default.time_dim -Output [3]: [t_time_sk#122, t_hour#123, t_minute#124] +Output [3]: [t_time_sk#100, t_hour#101, t_minute#102] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,12), LessThan(t_minute,30), IsNotNull(t_time_sk)] ReadSchema: struct (169) ColumnarToRow [codegen id : 36] -Input [3]: [t_time_sk#122, t_hour#123, t_minute#124] +Input [3]: [t_time_sk#100, t_hour#101, t_minute#102] (170) Filter [codegen id : 36] -Input [3]: [t_time_sk#122, t_hour#123, t_minute#124] -Condition : ((((isnotnull(t_hour#123) AND isnotnull(t_minute#124)) AND (t_hour#123 = 12)) AND (t_minute#124 < 30)) AND isnotnull(t_time_sk#122)) +Input [3]: [t_time_sk#100, t_hour#101, t_minute#102] +Condition : ((((isnotnull(t_hour#101) AND isnotnull(t_minute#102)) AND (t_hour#101 = 12)) AND (t_minute#102 < 30)) AND isnotnull(t_time_sk#100)) (171) Project [codegen id : 36] -Output [1]: [t_time_sk#122] -Input [3]: [t_time_sk#122, t_hour#123, t_minute#124] +Output [1]: [t_time_sk#100] +Input [3]: [t_time_sk#100, t_hour#101, t_minute#102] (172) BroadcastExchange -Input [1]: [t_time_sk#122] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#125] +Input [1]: [t_time_sk#100] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=23] (173) BroadcastHashJoin [codegen id : 38] -Left keys [1]: [ss_sold_time_sk#117] -Right keys [1]: [t_time_sk#122] +Left keys [1]: [ss_sold_time_sk#95] +Right keys [1]: [t_time_sk#100] Join condition: None (174) Project [codegen id : 38] -Output [1]: [ss_store_sk#119] -Input [3]: [ss_sold_time_sk#117, ss_store_sk#119, t_time_sk#122] +Output [1]: [ss_store_sk#97] +Input [3]: [ss_sold_time_sk#95, ss_store_sk#97, t_time_sk#100] (175) ReusedExchange [Reuses operator id: 23] -Output [1]: [s_store_sk#126] +Output [1]: [s_store_sk#103] (176) BroadcastHashJoin [codegen id : 38] -Left keys [1]: [ss_store_sk#119] -Right keys [1]: [s_store_sk#126] +Left keys [1]: [ss_store_sk#97] +Right keys [1]: [s_store_sk#103] Join condition: None (177) Project [codegen id : 38] Output: [] -Input [2]: [ss_store_sk#119, s_store_sk#126] +Input [2]: [ss_store_sk#97, s_store_sk#103] (178) HashAggregate [codegen id : 38] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#127] -Results [1]: [count#128] +Aggregate Attributes [1]: [count#104] +Results [1]: [count#105] (179) Exchange -Input [1]: [count#128] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#129] +Input [1]: [count#105] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=24] (180) HashAggregate [codegen id : 39] -Input [1]: [count#128] +Input [1]: [count#105] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#130] -Results [1]: [count(1)#130 AS h12_to_12_30#131] +Aggregate Attributes [1]: [count(1)#106] +Results [1]: [count(1)#106 AS h12_to_12_30#107] (181) BroadcastExchange -Input [1]: [h12_to_12_30#131] -Arguments: IdentityBroadcastMode, [id=#132] +Input [1]: [h12_to_12_30#107] +Arguments: IdentityBroadcastMode, [plan_id=25] (182) BroadcastNestedLoopJoin [codegen id : 40] Join condition: None diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt index 6325bd574530a..6dcaa4bcf93f2 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt @@ -44,116 +44,116 @@ Condition : (((i_category#4 IN (Books (4) BroadcastExchange Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (5) Scan parquet default.store_sales -Output [4]: [ss_item_sk#6, ss_store_sk#7, ss_sales_price#8, ss_sold_date_sk#9] +Output [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#9), dynamicpruningexpression(ss_sold_date_sk#9 IN dynamicpruning#10)] +PartitionFilters: [isnotnull(ss_sold_date_sk#8), dynamicpruningexpression(ss_sold_date_sk#8 IN dynamicpruning#9)] PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_store_sk)] ReadSchema: struct (6) ColumnarToRow -Input [4]: [ss_item_sk#6, ss_store_sk#7, ss_sales_price#8, ss_sold_date_sk#9] +Input [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] (7) Filter -Input [4]: [ss_item_sk#6, ss_store_sk#7, ss_sales_price#8, ss_sold_date_sk#9] -Condition : (isnotnull(ss_item_sk#6) AND isnotnull(ss_store_sk#7)) +Input [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Condition : (isnotnull(ss_item_sk#5) AND isnotnull(ss_store_sk#6)) (8) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] -Right keys [1]: [ss_item_sk#6] +Right keys [1]: [ss_item_sk#5] Join condition: None (9) Project [codegen id : 4] -Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#7, ss_sales_price#8, ss_sold_date_sk#9] -Input [8]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, ss_item_sk#6, ss_store_sk#7, ss_sales_price#8, ss_sold_date_sk#9] +Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] +Input [8]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] (10) ReusedExchange [Reuses operator id: 32] -Output [2]: [d_date_sk#11, d_moy#12] +Output [2]: [d_date_sk#10, d_moy#11] (11) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#9] -Right keys [1]: [d_date_sk#11] +Left keys [1]: [ss_sold_date_sk#8] +Right keys [1]: [d_date_sk#10] Join condition: None (12) Project [codegen id : 4] -Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#7, ss_sales_price#8, d_moy#12] -Input [8]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#7, ss_sales_price#8, ss_sold_date_sk#9, d_date_sk#11, d_moy#12] +Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11] +Input [8]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8, d_date_sk#10, d_moy#11] (13) Scan parquet default.store -Output [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] (15) Filter [codegen id : 3] -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] -Condition : isnotnull(s_store_sk#13) +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Condition : isnotnull(s_store_sk#12) (16) BroadcastExchange -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_store_sk#7] -Right keys [1]: [s_store_sk#13] +Left keys [1]: [ss_store_sk#6] +Right keys [1]: [s_store_sk#12] Join condition: None (18) Project [codegen id : 4] -Output [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#8, d_moy#12, s_store_name#14, s_company_name#15] -Input [9]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#7, ss_sales_price#8, d_moy#12, s_store_sk#13, s_store_name#14, s_company_name#15] +Output [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14] +Input [9]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11, s_store_sk#12, s_store_name#13, s_company_name#14] (19) HashAggregate [codegen id : 4] -Input [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#8, d_moy#12, s_store_name#14, s_company_name#15] -Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#8))] -Aggregate Attributes [1]: [sum#17] -Results [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum#18] +Input [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [1]: [sum#15] +Results [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] (20) Exchange -Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum#18] -Arguments: hashpartitioning(i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] +Arguments: hashpartitioning(i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) HashAggregate [codegen id : 5] -Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum#18] -Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12] -Functions [1]: [sum(UnscaledValue(ss_sales_price#8))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#8))#20] -Results [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, MakeDecimal(sum(UnscaledValue(ss_sales_price#8))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#8))#20,17,2) AS _w0#22] +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11] +Functions [1]: [sum(UnscaledValue(ss_sales_price#7))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#7))#17] +Results [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#17,17,2) AS sum_sales#18, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#17,17,2) AS _w0#19] (22) Exchange -Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22] -Arguments: hashpartitioning(i_category#4, i_brand#2, s_store_name#14, s_company_name#15, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: hashpartitioning(i_category#4, i_brand#2, s_store_name#13, s_company_name#14, 5), ENSURE_REQUIREMENTS, [plan_id=4] (23) Sort [codegen id : 6] -Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22] -Arguments: [i_category#4 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST, s_company_name#15 ASC NULLS FIRST], false, 0 +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: [i_category#4 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST, s_company_name#14 ASC NULLS FIRST], false, 0 (24) Window -Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22] -Arguments: [avg(_w0#22) windowspecdefinition(i_category#4, i_brand#2, s_store_name#14, s_company_name#15, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#24], [i_category#4, i_brand#2, s_store_name#14, s_company_name#15] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: [avg(_w0#19) windowspecdefinition(i_category#4, i_brand#2, s_store_name#13, s_company_name#14, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#20], [i_category#4, i_brand#2, s_store_name#13, s_company_name#14] (25) Filter [codegen id : 7] -Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22, avg_monthly_sales#24] -Condition : (isnotnull(avg_monthly_sales#24) AND (NOT (avg_monthly_sales#24 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20] +Condition : (isnotnull(avg_monthly_sales#20) AND (NOT (avg_monthly_sales#20 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) (26) Project [codegen id : 7] -Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24] -Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22, avg_monthly_sales#24] +Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20] (27) TakeOrderedAndProject -Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24] -Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] ===== Subqueries ===== -Subquery:1 Hosting operator id = 5 Hosting Expression = ss_sold_date_sk#9 IN dynamicpruning#10 +Subquery:1 Hosting operator id = 5 Hosting Expression = ss_sold_date_sk#8 IN dynamicpruning#9 BroadcastExchange (32) +- * Project (31) +- * Filter (30) @@ -162,25 +162,25 @@ BroadcastExchange (32) (28) Scan parquet default.date_dim -Output [3]: [d_date_sk#11, d_year#25, d_moy#12] +Output [3]: [d_date_sk#10, d_year#21, d_moy#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] ReadSchema: struct (29) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#25, d_moy#12] +Input [3]: [d_date_sk#10, d_year#21, d_moy#11] (30) Filter [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#25, d_moy#12] -Condition : ((isnotnull(d_year#25) AND (d_year#25 = 1999)) AND isnotnull(d_date_sk#11)) +Input [3]: [d_date_sk#10, d_year#21, d_moy#11] +Condition : ((isnotnull(d_year#21) AND (d_year#21 = 1999)) AND isnotnull(d_date_sk#10)) (31) Project [codegen id : 1] -Output [2]: [d_date_sk#11, d_moy#12] -Input [3]: [d_date_sk#11, d_year#25, d_moy#12] +Output [2]: [d_date_sk#10, d_moy#11] +Input [3]: [d_date_sk#10, d_year#21, d_moy#11] (32) BroadcastExchange -Input [2]: [d_date_sk#11, d_moy#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Input [2]: [d_date_sk#10, d_moy#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt index 770ab84503645..fc949bd963e6f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt @@ -59,7 +59,7 @@ Condition : (isnotnull(ss_item_sk#5) AND isnotnull(ss_store_sk#6)) (7) BroadcastExchange Input [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] @@ -71,85 +71,85 @@ Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7 Input [8]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8] (10) ReusedExchange [Reuses operator id: 32] -Output [2]: [d_date_sk#11, d_moy#12] +Output [2]: [d_date_sk#10, d_moy#11] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#11] +Right keys [1]: [d_date_sk#10] Join condition: None (12) Project [codegen id : 4] -Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#12] -Input [8]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8, d_date_sk#11, d_moy#12] +Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11] +Input [8]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8, d_date_sk#10, d_moy#11] (13) Scan parquet default.store -Output [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] (15) Filter [codegen id : 3] -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] -Condition : isnotnull(s_store_sk#13) +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Condition : isnotnull(s_store_sk#12) (16) BroadcastExchange -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#6] -Right keys [1]: [s_store_sk#13] +Right keys [1]: [s_store_sk#12] Join condition: None (18) Project [codegen id : 4] -Output [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#12, s_store_name#14, s_company_name#15] -Input [9]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#12, s_store_sk#13, s_store_name#14, s_company_name#15] +Output [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14] +Input [9]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11, s_store_sk#12, s_store_name#13, s_company_name#14] (19) HashAggregate [codegen id : 4] -Input [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#12, s_store_name#14, s_company_name#15] -Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12] +Input [7]: [i_brand#2, i_class#3, i_category#4, ss_sales_price#7, d_moy#11, s_store_name#13, s_company_name#14] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11] Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#7))] -Aggregate Attributes [1]: [sum#17] -Results [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum#18] +Aggregate Attributes [1]: [sum#15] +Results [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] (20) Exchange -Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum#18] -Arguments: hashpartitioning(i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] +Arguments: hashpartitioning(i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) HashAggregate [codegen id : 5] -Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum#18] -Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12] +Input [7]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum#16] +Keys [6]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11] Functions [1]: [sum(UnscaledValue(ss_sales_price#7))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#7))#20] -Results [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#20,17,2) AS _w0#22] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#7))#17] +Results [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#17,17,2) AS sum_sales#18, MakeDecimal(sum(UnscaledValue(ss_sales_price#7))#17,17,2) AS _w0#19] (22) Exchange -Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22] -Arguments: hashpartitioning(i_category#4, i_brand#2, s_store_name#14, s_company_name#15, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: hashpartitioning(i_category#4, i_brand#2, s_store_name#13, s_company_name#14, 5), ENSURE_REQUIREMENTS, [plan_id=4] (23) Sort [codegen id : 6] -Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22] -Arguments: [i_category#4 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST, s_company_name#15 ASC NULLS FIRST], false, 0 +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: [i_category#4 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST, s_company_name#14 ASC NULLS FIRST], false, 0 (24) Window -Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22] -Arguments: [avg(_w0#22) windowspecdefinition(i_category#4, i_brand#2, s_store_name#14, s_company_name#15, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#24], [i_category#4, i_brand#2, s_store_name#14, s_company_name#15] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19] +Arguments: [avg(_w0#19) windowspecdefinition(i_category#4, i_brand#2, s_store_name#13, s_company_name#14, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#20], [i_category#4, i_brand#2, s_store_name#13, s_company_name#14] (25) Filter [codegen id : 7] -Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22, avg_monthly_sales#24] -Condition : (isnotnull(avg_monthly_sales#24) AND (NOT (avg_monthly_sales#24 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20] +Condition : (isnotnull(avg_monthly_sales#20) AND (NOT (avg_monthly_sales#20 = 0.000000) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000))) (26) Project [codegen id : 7] -Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24] -Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, _w0#22, avg_monthly_sales#24] +Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] +Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20] (27) TakeOrderedAndProject -Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24] -Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#14, s_company_name#15, d_moy#12, sum_sales#21, avg_monthly_sales#24] +Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20] ===== Subqueries ===== @@ -162,25 +162,25 @@ BroadcastExchange (32) (28) Scan parquet default.date_dim -Output [3]: [d_date_sk#11, d_year#25, d_moy#12] +Output [3]: [d_date_sk#10, d_year#21, d_moy#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] ReadSchema: struct (29) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#25, d_moy#12] +Input [3]: [d_date_sk#10, d_year#21, d_moy#11] (30) Filter [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#25, d_moy#12] -Condition : ((isnotnull(d_year#25) AND (d_year#25 = 1999)) AND isnotnull(d_date_sk#11)) +Input [3]: [d_date_sk#10, d_year#21, d_moy#11] +Condition : ((isnotnull(d_year#21) AND (d_year#21 = 1999)) AND isnotnull(d_date_sk#10)) (31) Project [codegen id : 1] -Output [2]: [d_date_sk#11, d_moy#12] -Input [3]: [d_date_sk#11, d_year#25, d_moy#12] +Output [2]: [d_date_sk#10, d_moy#11] +Input [3]: [d_date_sk#10, d_year#21, d_moy#11] (32) BroadcastExchange -Input [2]: [d_date_sk#11, d_moy#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Input [2]: [d_date_sk#10, d_moy#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/explain.txt index 8bf63794f25e4..c57e62a638258 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/explain.txt @@ -63,18 +63,18 @@ Results [5]: [count#26, sum#27, count#28, sum#29, count#30] (10) Exchange Input [5]: [count#26, sum#27, count#28, sum#29, count#30] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#31] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=1] (11) HashAggregate [codegen id : 2] Input [5]: [count#26, sum#27, count#28, sum#29, count#30] Keys: [] Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#18)), avg(UnscaledValue(ss_net_paid#19))] -Aggregate Attributes [3]: [count(1)#32, avg(UnscaledValue(ss_ext_discount_amt#18))#33, avg(UnscaledValue(ss_net_paid#19))#34] -Results [3]: [count(1)#32 AS count(1)#35, cast((avg(UnscaledValue(ss_ext_discount_amt#18))#33 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#36, cast((avg(UnscaledValue(ss_net_paid#19))#34 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#37] +Aggregate Attributes [3]: [count(1)#31, avg(UnscaledValue(ss_ext_discount_amt#18))#32, avg(UnscaledValue(ss_net_paid#19))#33] +Results [3]: [count(1)#31 AS count(1)#34, cast((avg(UnscaledValue(ss_ext_discount_amt#18))#32 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#35, cast((avg(UnscaledValue(ss_net_paid#19))#33 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#36] (12) Project [codegen id : 2] -Output [1]: [named_struct(count(1), count(1)#35, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#36, avg(ss_net_paid), avg(ss_net_paid)#37) AS mergedValue#38] -Input [3]: [count(1)#35, avg(ss_ext_discount_amt)#36, avg(ss_net_paid)#37] +Output [1]: [named_struct(count(1), count(1)#34, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#35, avg(ss_net_paid), avg(ss_net_paid)#36) AS mergedValue#37] +Input [3]: [count(1)#34, avg(ss_ext_discount_amt)#35, avg(ss_net_paid)#36] Subquery:2 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#2, [id=#3] @@ -92,44 +92,44 @@ Subquery:4 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery (13) Scan parquet default.store_sales -Output [4]: [ss_quantity#39, ss_ext_discount_amt#40, ss_net_paid#41, ss_sold_date_sk#42] +Output [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] ReadSchema: struct (14) ColumnarToRow [codegen id : 1] -Input [4]: [ss_quantity#39, ss_ext_discount_amt#40, ss_net_paid#41, ss_sold_date_sk#42] +Input [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] (15) Filter [codegen id : 1] -Input [4]: [ss_quantity#39, ss_ext_discount_amt#40, ss_net_paid#41, ss_sold_date_sk#42] -Condition : ((isnotnull(ss_quantity#39) AND (ss_quantity#39 >= 21)) AND (ss_quantity#39 <= 40)) +Input [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] +Condition : ((isnotnull(ss_quantity#38) AND (ss_quantity#38 >= 21)) AND (ss_quantity#38 <= 40)) (16) Project [codegen id : 1] -Output [2]: [ss_ext_discount_amt#40, ss_net_paid#41] -Input [4]: [ss_quantity#39, ss_ext_discount_amt#40, ss_net_paid#41, ss_sold_date_sk#42] +Output [2]: [ss_ext_discount_amt#39, ss_net_paid#40] +Input [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] (17) HashAggregate [codegen id : 1] -Input [2]: [ss_ext_discount_amt#40, ss_net_paid#41] +Input [2]: [ss_ext_discount_amt#39, ss_net_paid#40] Keys: [] -Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#40)), partial_avg(UnscaledValue(ss_net_paid#41))] -Aggregate Attributes [5]: [count#43, sum#44, count#45, sum#46, count#47] -Results [5]: [count#48, sum#49, count#50, sum#51, count#52] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#39)), partial_avg(UnscaledValue(ss_net_paid#40))] +Aggregate Attributes [5]: [count#42, sum#43, count#44, sum#45, count#46] +Results [5]: [count#47, sum#48, count#49, sum#50, count#51] (18) Exchange -Input [5]: [count#48, sum#49, count#50, sum#51, count#52] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#53] +Input [5]: [count#47, sum#48, count#49, sum#50, count#51] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] (19) HashAggregate [codegen id : 2] -Input [5]: [count#48, sum#49, count#50, sum#51, count#52] +Input [5]: [count#47, sum#48, count#49, sum#50, count#51] Keys: [] -Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#40)), avg(UnscaledValue(ss_net_paid#41))] -Aggregate Attributes [3]: [count(1)#54, avg(UnscaledValue(ss_ext_discount_amt#40))#55, avg(UnscaledValue(ss_net_paid#41))#56] -Results [3]: [count(1)#54 AS count(1)#57, cast((avg(UnscaledValue(ss_ext_discount_amt#40))#55 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#58, cast((avg(UnscaledValue(ss_net_paid#41))#56 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#59] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#39)), avg(UnscaledValue(ss_net_paid#40))] +Aggregate Attributes [3]: [count(1)#52, avg(UnscaledValue(ss_ext_discount_amt#39))#53, avg(UnscaledValue(ss_net_paid#40))#54] +Results [3]: [count(1)#52 AS count(1)#55, cast((avg(UnscaledValue(ss_ext_discount_amt#39))#53 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#56, cast((avg(UnscaledValue(ss_net_paid#40))#54 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#57] (20) Project [codegen id : 2] -Output [1]: [named_struct(count(1), count(1)#57, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#58, avg(ss_net_paid), avg(ss_net_paid)#59) AS mergedValue#60] -Input [3]: [count(1)#57, avg(ss_ext_discount_amt)#58, avg(ss_net_paid)#59] +Output [1]: [named_struct(count(1), count(1)#55, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#56, avg(ss_net_paid), avg(ss_net_paid)#57) AS mergedValue#58] +Input [3]: [count(1)#55, avg(ss_ext_discount_amt)#56, avg(ss_net_paid)#57] Subquery:5 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#5, [id=#6] @@ -147,44 +147,44 @@ Subquery:7 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery (21) Scan parquet default.store_sales -Output [4]: [ss_quantity#61, ss_ext_discount_amt#62, ss_net_paid#63, ss_sold_date_sk#64] +Output [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] ReadSchema: struct (22) ColumnarToRow [codegen id : 1] -Input [4]: [ss_quantity#61, ss_ext_discount_amt#62, ss_net_paid#63, ss_sold_date_sk#64] +Input [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] (23) Filter [codegen id : 1] -Input [4]: [ss_quantity#61, ss_ext_discount_amt#62, ss_net_paid#63, ss_sold_date_sk#64] -Condition : ((isnotnull(ss_quantity#61) AND (ss_quantity#61 >= 41)) AND (ss_quantity#61 <= 60)) +Input [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] +Condition : ((isnotnull(ss_quantity#59) AND (ss_quantity#59 >= 41)) AND (ss_quantity#59 <= 60)) (24) Project [codegen id : 1] -Output [2]: [ss_ext_discount_amt#62, ss_net_paid#63] -Input [4]: [ss_quantity#61, ss_ext_discount_amt#62, ss_net_paid#63, ss_sold_date_sk#64] +Output [2]: [ss_ext_discount_amt#60, ss_net_paid#61] +Input [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] (25) HashAggregate [codegen id : 1] -Input [2]: [ss_ext_discount_amt#62, ss_net_paid#63] +Input [2]: [ss_ext_discount_amt#60, ss_net_paid#61] Keys: [] -Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#62)), partial_avg(UnscaledValue(ss_net_paid#63))] -Aggregate Attributes [5]: [count#65, sum#66, count#67, sum#68, count#69] -Results [5]: [count#70, sum#71, count#72, sum#73, count#74] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#60)), partial_avg(UnscaledValue(ss_net_paid#61))] +Aggregate Attributes [5]: [count#63, sum#64, count#65, sum#66, count#67] +Results [5]: [count#68, sum#69, count#70, sum#71, count#72] (26) Exchange -Input [5]: [count#70, sum#71, count#72, sum#73, count#74] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#75] +Input [5]: [count#68, sum#69, count#70, sum#71, count#72] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=3] (27) HashAggregate [codegen id : 2] -Input [5]: [count#70, sum#71, count#72, sum#73, count#74] +Input [5]: [count#68, sum#69, count#70, sum#71, count#72] Keys: [] -Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#62)), avg(UnscaledValue(ss_net_paid#63))] -Aggregate Attributes [3]: [count(1)#76, avg(UnscaledValue(ss_ext_discount_amt#62))#77, avg(UnscaledValue(ss_net_paid#63))#78] -Results [3]: [count(1)#76 AS count(1)#79, cast((avg(UnscaledValue(ss_ext_discount_amt#62))#77 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#80, cast((avg(UnscaledValue(ss_net_paid#63))#78 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#81] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#60)), avg(UnscaledValue(ss_net_paid#61))] +Aggregate Attributes [3]: [count(1)#73, avg(UnscaledValue(ss_ext_discount_amt#60))#74, avg(UnscaledValue(ss_net_paid#61))#75] +Results [3]: [count(1)#73 AS count(1)#76, cast((avg(UnscaledValue(ss_ext_discount_amt#60))#74 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#77, cast((avg(UnscaledValue(ss_net_paid#61))#75 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#78] (28) Project [codegen id : 2] -Output [1]: [named_struct(count(1), count(1)#79, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#80, avg(ss_net_paid), avg(ss_net_paid)#81) AS mergedValue#82] -Input [3]: [count(1)#79, avg(ss_ext_discount_amt)#80, avg(ss_net_paid)#81] +Output [1]: [named_struct(count(1), count(1)#76, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#77, avg(ss_net_paid), avg(ss_net_paid)#78) AS mergedValue#79] +Input [3]: [count(1)#76, avg(ss_ext_discount_amt)#77, avg(ss_net_paid)#78] Subquery:8 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#8, [id=#9] @@ -202,44 +202,44 @@ Subquery:10 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquer (29) Scan parquet default.store_sales -Output [4]: [ss_quantity#83, ss_ext_discount_amt#84, ss_net_paid#85, ss_sold_date_sk#86] +Output [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [4]: [ss_quantity#83, ss_ext_discount_amt#84, ss_net_paid#85, ss_sold_date_sk#86] +Input [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] (31) Filter [codegen id : 1] -Input [4]: [ss_quantity#83, ss_ext_discount_amt#84, ss_net_paid#85, ss_sold_date_sk#86] -Condition : ((isnotnull(ss_quantity#83) AND (ss_quantity#83 >= 61)) AND (ss_quantity#83 <= 80)) +Input [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] +Condition : ((isnotnull(ss_quantity#80) AND (ss_quantity#80 >= 61)) AND (ss_quantity#80 <= 80)) (32) Project [codegen id : 1] -Output [2]: [ss_ext_discount_amt#84, ss_net_paid#85] -Input [4]: [ss_quantity#83, ss_ext_discount_amt#84, ss_net_paid#85, ss_sold_date_sk#86] +Output [2]: [ss_ext_discount_amt#81, ss_net_paid#82] +Input [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] (33) HashAggregate [codegen id : 1] -Input [2]: [ss_ext_discount_amt#84, ss_net_paid#85] +Input [2]: [ss_ext_discount_amt#81, ss_net_paid#82] Keys: [] -Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#84)), partial_avg(UnscaledValue(ss_net_paid#85))] -Aggregate Attributes [5]: [count#87, sum#88, count#89, sum#90, count#91] -Results [5]: [count#92, sum#93, count#94, sum#95, count#96] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#81)), partial_avg(UnscaledValue(ss_net_paid#82))] +Aggregate Attributes [5]: [count#84, sum#85, count#86, sum#87, count#88] +Results [5]: [count#89, sum#90, count#91, sum#92, count#93] (34) Exchange -Input [5]: [count#92, sum#93, count#94, sum#95, count#96] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#97] +Input [5]: [count#89, sum#90, count#91, sum#92, count#93] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (35) HashAggregate [codegen id : 2] -Input [5]: [count#92, sum#93, count#94, sum#95, count#96] +Input [5]: [count#89, sum#90, count#91, sum#92, count#93] Keys: [] -Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#84)), avg(UnscaledValue(ss_net_paid#85))] -Aggregate Attributes [3]: [count(1)#98, avg(UnscaledValue(ss_ext_discount_amt#84))#99, avg(UnscaledValue(ss_net_paid#85))#100] -Results [3]: [count(1)#98 AS count(1)#101, cast((avg(UnscaledValue(ss_ext_discount_amt#84))#99 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#102, cast((avg(UnscaledValue(ss_net_paid#85))#100 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#103] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#81)), avg(UnscaledValue(ss_net_paid#82))] +Aggregate Attributes [3]: [count(1)#94, avg(UnscaledValue(ss_ext_discount_amt#81))#95, avg(UnscaledValue(ss_net_paid#82))#96] +Results [3]: [count(1)#94 AS count(1)#97, cast((avg(UnscaledValue(ss_ext_discount_amt#81))#95 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#98, cast((avg(UnscaledValue(ss_net_paid#82))#96 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#99] (36) Project [codegen id : 2] -Output [1]: [named_struct(count(1), count(1)#101, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#102, avg(ss_net_paid), avg(ss_net_paid)#103) AS mergedValue#104] -Input [3]: [count(1)#101, avg(ss_ext_discount_amt)#102, avg(ss_net_paid)#103] +Output [1]: [named_struct(count(1), count(1)#97, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#98, avg(ss_net_paid), avg(ss_net_paid)#99) AS mergedValue#100] +Input [3]: [count(1)#97, avg(ss_ext_discount_amt)#98, avg(ss_net_paid)#99] Subquery:11 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] @@ -257,44 +257,44 @@ Subquery:13 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquer (37) Scan parquet default.store_sales -Output [4]: [ss_quantity#105, ss_ext_discount_amt#106, ss_net_paid#107, ss_sold_date_sk#108] +Output [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] ReadSchema: struct (38) ColumnarToRow [codegen id : 1] -Input [4]: [ss_quantity#105, ss_ext_discount_amt#106, ss_net_paid#107, ss_sold_date_sk#108] +Input [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] (39) Filter [codegen id : 1] -Input [4]: [ss_quantity#105, ss_ext_discount_amt#106, ss_net_paid#107, ss_sold_date_sk#108] -Condition : ((isnotnull(ss_quantity#105) AND (ss_quantity#105 >= 81)) AND (ss_quantity#105 <= 100)) +Input [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] +Condition : ((isnotnull(ss_quantity#101) AND (ss_quantity#101 >= 81)) AND (ss_quantity#101 <= 100)) (40) Project [codegen id : 1] -Output [2]: [ss_ext_discount_amt#106, ss_net_paid#107] -Input [4]: [ss_quantity#105, ss_ext_discount_amt#106, ss_net_paid#107, ss_sold_date_sk#108] +Output [2]: [ss_ext_discount_amt#102, ss_net_paid#103] +Input [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] (41) HashAggregate [codegen id : 1] -Input [2]: [ss_ext_discount_amt#106, ss_net_paid#107] +Input [2]: [ss_ext_discount_amt#102, ss_net_paid#103] Keys: [] -Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#106)), partial_avg(UnscaledValue(ss_net_paid#107))] -Aggregate Attributes [5]: [count#109, sum#110, count#111, sum#112, count#113] -Results [5]: [count#114, sum#115, count#116, sum#117, count#118] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#102)), partial_avg(UnscaledValue(ss_net_paid#103))] +Aggregate Attributes [5]: [count#105, sum#106, count#107, sum#108, count#109] +Results [5]: [count#110, sum#111, count#112, sum#113, count#114] (42) Exchange -Input [5]: [count#114, sum#115, count#116, sum#117, count#118] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#119] +Input [5]: [count#110, sum#111, count#112, sum#113, count#114] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] (43) HashAggregate [codegen id : 2] -Input [5]: [count#114, sum#115, count#116, sum#117, count#118] +Input [5]: [count#110, sum#111, count#112, sum#113, count#114] Keys: [] -Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#106)), avg(UnscaledValue(ss_net_paid#107))] -Aggregate Attributes [3]: [count(1)#120, avg(UnscaledValue(ss_ext_discount_amt#106))#121, avg(UnscaledValue(ss_net_paid#107))#122] -Results [3]: [count(1)#120 AS count(1)#123, cast((avg(UnscaledValue(ss_ext_discount_amt#106))#121 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#124, cast((avg(UnscaledValue(ss_net_paid#107))#122 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#125] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#102)), avg(UnscaledValue(ss_net_paid#103))] +Aggregate Attributes [3]: [count(1)#115, avg(UnscaledValue(ss_ext_discount_amt#102))#116, avg(UnscaledValue(ss_net_paid#103))#117] +Results [3]: [count(1)#115 AS count(1)#118, cast((avg(UnscaledValue(ss_ext_discount_amt#102))#116 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#119, cast((avg(UnscaledValue(ss_net_paid#103))#117 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#120] (44) Project [codegen id : 2] -Output [1]: [named_struct(count(1), count(1)#123, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#124, avg(ss_net_paid), avg(ss_net_paid)#125) AS mergedValue#126] -Input [3]: [count(1)#123, avg(ss_ext_discount_amt)#124, avg(ss_net_paid)#125] +Output [1]: [named_struct(count(1), count(1)#118, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#119, avg(ss_net_paid), avg(ss_net_paid)#120) AS mergedValue#121] +Input [3]: [count(1)#118, avg(ss_ext_discount_amt)#119, avg(ss_net_paid)#120] Subquery:14 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt index 8bf63794f25e4..c57e62a638258 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt @@ -63,18 +63,18 @@ Results [5]: [count#26, sum#27, count#28, sum#29, count#30] (10) Exchange Input [5]: [count#26, sum#27, count#28, sum#29, count#30] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#31] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=1] (11) HashAggregate [codegen id : 2] Input [5]: [count#26, sum#27, count#28, sum#29, count#30] Keys: [] Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#18)), avg(UnscaledValue(ss_net_paid#19))] -Aggregate Attributes [3]: [count(1)#32, avg(UnscaledValue(ss_ext_discount_amt#18))#33, avg(UnscaledValue(ss_net_paid#19))#34] -Results [3]: [count(1)#32 AS count(1)#35, cast((avg(UnscaledValue(ss_ext_discount_amt#18))#33 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#36, cast((avg(UnscaledValue(ss_net_paid#19))#34 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#37] +Aggregate Attributes [3]: [count(1)#31, avg(UnscaledValue(ss_ext_discount_amt#18))#32, avg(UnscaledValue(ss_net_paid#19))#33] +Results [3]: [count(1)#31 AS count(1)#34, cast((avg(UnscaledValue(ss_ext_discount_amt#18))#32 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#35, cast((avg(UnscaledValue(ss_net_paid#19))#33 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#36] (12) Project [codegen id : 2] -Output [1]: [named_struct(count(1), count(1)#35, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#36, avg(ss_net_paid), avg(ss_net_paid)#37) AS mergedValue#38] -Input [3]: [count(1)#35, avg(ss_ext_discount_amt)#36, avg(ss_net_paid)#37] +Output [1]: [named_struct(count(1), count(1)#34, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#35, avg(ss_net_paid), avg(ss_net_paid)#36) AS mergedValue#37] +Input [3]: [count(1)#34, avg(ss_ext_discount_amt)#35, avg(ss_net_paid)#36] Subquery:2 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#2, [id=#3] @@ -92,44 +92,44 @@ Subquery:4 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery (13) Scan parquet default.store_sales -Output [4]: [ss_quantity#39, ss_ext_discount_amt#40, ss_net_paid#41, ss_sold_date_sk#42] +Output [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,21), LessThanOrEqual(ss_quantity,40)] ReadSchema: struct (14) ColumnarToRow [codegen id : 1] -Input [4]: [ss_quantity#39, ss_ext_discount_amt#40, ss_net_paid#41, ss_sold_date_sk#42] +Input [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] (15) Filter [codegen id : 1] -Input [4]: [ss_quantity#39, ss_ext_discount_amt#40, ss_net_paid#41, ss_sold_date_sk#42] -Condition : ((isnotnull(ss_quantity#39) AND (ss_quantity#39 >= 21)) AND (ss_quantity#39 <= 40)) +Input [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] +Condition : ((isnotnull(ss_quantity#38) AND (ss_quantity#38 >= 21)) AND (ss_quantity#38 <= 40)) (16) Project [codegen id : 1] -Output [2]: [ss_ext_discount_amt#40, ss_net_paid#41] -Input [4]: [ss_quantity#39, ss_ext_discount_amt#40, ss_net_paid#41, ss_sold_date_sk#42] +Output [2]: [ss_ext_discount_amt#39, ss_net_paid#40] +Input [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41] (17) HashAggregate [codegen id : 1] -Input [2]: [ss_ext_discount_amt#40, ss_net_paid#41] +Input [2]: [ss_ext_discount_amt#39, ss_net_paid#40] Keys: [] -Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#40)), partial_avg(UnscaledValue(ss_net_paid#41))] -Aggregate Attributes [5]: [count#43, sum#44, count#45, sum#46, count#47] -Results [5]: [count#48, sum#49, count#50, sum#51, count#52] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#39)), partial_avg(UnscaledValue(ss_net_paid#40))] +Aggregate Attributes [5]: [count#42, sum#43, count#44, sum#45, count#46] +Results [5]: [count#47, sum#48, count#49, sum#50, count#51] (18) Exchange -Input [5]: [count#48, sum#49, count#50, sum#51, count#52] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#53] +Input [5]: [count#47, sum#48, count#49, sum#50, count#51] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] (19) HashAggregate [codegen id : 2] -Input [5]: [count#48, sum#49, count#50, sum#51, count#52] +Input [5]: [count#47, sum#48, count#49, sum#50, count#51] Keys: [] -Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#40)), avg(UnscaledValue(ss_net_paid#41))] -Aggregate Attributes [3]: [count(1)#54, avg(UnscaledValue(ss_ext_discount_amt#40))#55, avg(UnscaledValue(ss_net_paid#41))#56] -Results [3]: [count(1)#54 AS count(1)#57, cast((avg(UnscaledValue(ss_ext_discount_amt#40))#55 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#58, cast((avg(UnscaledValue(ss_net_paid#41))#56 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#59] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#39)), avg(UnscaledValue(ss_net_paid#40))] +Aggregate Attributes [3]: [count(1)#52, avg(UnscaledValue(ss_ext_discount_amt#39))#53, avg(UnscaledValue(ss_net_paid#40))#54] +Results [3]: [count(1)#52 AS count(1)#55, cast((avg(UnscaledValue(ss_ext_discount_amt#39))#53 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#56, cast((avg(UnscaledValue(ss_net_paid#40))#54 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#57] (20) Project [codegen id : 2] -Output [1]: [named_struct(count(1), count(1)#57, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#58, avg(ss_net_paid), avg(ss_net_paid)#59) AS mergedValue#60] -Input [3]: [count(1)#57, avg(ss_ext_discount_amt)#58, avg(ss_net_paid)#59] +Output [1]: [named_struct(count(1), count(1)#55, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#56, avg(ss_net_paid), avg(ss_net_paid)#57) AS mergedValue#58] +Input [3]: [count(1)#55, avg(ss_ext_discount_amt)#56, avg(ss_net_paid)#57] Subquery:5 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#5, [id=#6] @@ -147,44 +147,44 @@ Subquery:7 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery (21) Scan parquet default.store_sales -Output [4]: [ss_quantity#61, ss_ext_discount_amt#62, ss_net_paid#63, ss_sold_date_sk#64] +Output [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,41), LessThanOrEqual(ss_quantity,60)] ReadSchema: struct (22) ColumnarToRow [codegen id : 1] -Input [4]: [ss_quantity#61, ss_ext_discount_amt#62, ss_net_paid#63, ss_sold_date_sk#64] +Input [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] (23) Filter [codegen id : 1] -Input [4]: [ss_quantity#61, ss_ext_discount_amt#62, ss_net_paid#63, ss_sold_date_sk#64] -Condition : ((isnotnull(ss_quantity#61) AND (ss_quantity#61 >= 41)) AND (ss_quantity#61 <= 60)) +Input [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] +Condition : ((isnotnull(ss_quantity#59) AND (ss_quantity#59 >= 41)) AND (ss_quantity#59 <= 60)) (24) Project [codegen id : 1] -Output [2]: [ss_ext_discount_amt#62, ss_net_paid#63] -Input [4]: [ss_quantity#61, ss_ext_discount_amt#62, ss_net_paid#63, ss_sold_date_sk#64] +Output [2]: [ss_ext_discount_amt#60, ss_net_paid#61] +Input [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62] (25) HashAggregate [codegen id : 1] -Input [2]: [ss_ext_discount_amt#62, ss_net_paid#63] +Input [2]: [ss_ext_discount_amt#60, ss_net_paid#61] Keys: [] -Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#62)), partial_avg(UnscaledValue(ss_net_paid#63))] -Aggregate Attributes [5]: [count#65, sum#66, count#67, sum#68, count#69] -Results [5]: [count#70, sum#71, count#72, sum#73, count#74] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#60)), partial_avg(UnscaledValue(ss_net_paid#61))] +Aggregate Attributes [5]: [count#63, sum#64, count#65, sum#66, count#67] +Results [5]: [count#68, sum#69, count#70, sum#71, count#72] (26) Exchange -Input [5]: [count#70, sum#71, count#72, sum#73, count#74] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#75] +Input [5]: [count#68, sum#69, count#70, sum#71, count#72] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=3] (27) HashAggregate [codegen id : 2] -Input [5]: [count#70, sum#71, count#72, sum#73, count#74] +Input [5]: [count#68, sum#69, count#70, sum#71, count#72] Keys: [] -Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#62)), avg(UnscaledValue(ss_net_paid#63))] -Aggregate Attributes [3]: [count(1)#76, avg(UnscaledValue(ss_ext_discount_amt#62))#77, avg(UnscaledValue(ss_net_paid#63))#78] -Results [3]: [count(1)#76 AS count(1)#79, cast((avg(UnscaledValue(ss_ext_discount_amt#62))#77 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#80, cast((avg(UnscaledValue(ss_net_paid#63))#78 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#81] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#60)), avg(UnscaledValue(ss_net_paid#61))] +Aggregate Attributes [3]: [count(1)#73, avg(UnscaledValue(ss_ext_discount_amt#60))#74, avg(UnscaledValue(ss_net_paid#61))#75] +Results [3]: [count(1)#73 AS count(1)#76, cast((avg(UnscaledValue(ss_ext_discount_amt#60))#74 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#77, cast((avg(UnscaledValue(ss_net_paid#61))#75 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#78] (28) Project [codegen id : 2] -Output [1]: [named_struct(count(1), count(1)#79, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#80, avg(ss_net_paid), avg(ss_net_paid)#81) AS mergedValue#82] -Input [3]: [count(1)#79, avg(ss_ext_discount_amt)#80, avg(ss_net_paid)#81] +Output [1]: [named_struct(count(1), count(1)#76, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#77, avg(ss_net_paid), avg(ss_net_paid)#78) AS mergedValue#79] +Input [3]: [count(1)#76, avg(ss_ext_discount_amt)#77, avg(ss_net_paid)#78] Subquery:8 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#8, [id=#9] @@ -202,44 +202,44 @@ Subquery:10 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquer (29) Scan parquet default.store_sales -Output [4]: [ss_quantity#83, ss_ext_discount_amt#84, ss_net_paid#85, ss_sold_date_sk#86] +Output [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,61), LessThanOrEqual(ss_quantity,80)] ReadSchema: struct (30) ColumnarToRow [codegen id : 1] -Input [4]: [ss_quantity#83, ss_ext_discount_amt#84, ss_net_paid#85, ss_sold_date_sk#86] +Input [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] (31) Filter [codegen id : 1] -Input [4]: [ss_quantity#83, ss_ext_discount_amt#84, ss_net_paid#85, ss_sold_date_sk#86] -Condition : ((isnotnull(ss_quantity#83) AND (ss_quantity#83 >= 61)) AND (ss_quantity#83 <= 80)) +Input [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] +Condition : ((isnotnull(ss_quantity#80) AND (ss_quantity#80 >= 61)) AND (ss_quantity#80 <= 80)) (32) Project [codegen id : 1] -Output [2]: [ss_ext_discount_amt#84, ss_net_paid#85] -Input [4]: [ss_quantity#83, ss_ext_discount_amt#84, ss_net_paid#85, ss_sold_date_sk#86] +Output [2]: [ss_ext_discount_amt#81, ss_net_paid#82] +Input [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83] (33) HashAggregate [codegen id : 1] -Input [2]: [ss_ext_discount_amt#84, ss_net_paid#85] +Input [2]: [ss_ext_discount_amt#81, ss_net_paid#82] Keys: [] -Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#84)), partial_avg(UnscaledValue(ss_net_paid#85))] -Aggregate Attributes [5]: [count#87, sum#88, count#89, sum#90, count#91] -Results [5]: [count#92, sum#93, count#94, sum#95, count#96] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#81)), partial_avg(UnscaledValue(ss_net_paid#82))] +Aggregate Attributes [5]: [count#84, sum#85, count#86, sum#87, count#88] +Results [5]: [count#89, sum#90, count#91, sum#92, count#93] (34) Exchange -Input [5]: [count#92, sum#93, count#94, sum#95, count#96] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#97] +Input [5]: [count#89, sum#90, count#91, sum#92, count#93] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (35) HashAggregate [codegen id : 2] -Input [5]: [count#92, sum#93, count#94, sum#95, count#96] +Input [5]: [count#89, sum#90, count#91, sum#92, count#93] Keys: [] -Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#84)), avg(UnscaledValue(ss_net_paid#85))] -Aggregate Attributes [3]: [count(1)#98, avg(UnscaledValue(ss_ext_discount_amt#84))#99, avg(UnscaledValue(ss_net_paid#85))#100] -Results [3]: [count(1)#98 AS count(1)#101, cast((avg(UnscaledValue(ss_ext_discount_amt#84))#99 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#102, cast((avg(UnscaledValue(ss_net_paid#85))#100 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#103] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#81)), avg(UnscaledValue(ss_net_paid#82))] +Aggregate Attributes [3]: [count(1)#94, avg(UnscaledValue(ss_ext_discount_amt#81))#95, avg(UnscaledValue(ss_net_paid#82))#96] +Results [3]: [count(1)#94 AS count(1)#97, cast((avg(UnscaledValue(ss_ext_discount_amt#81))#95 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#98, cast((avg(UnscaledValue(ss_net_paid#82))#96 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#99] (36) Project [codegen id : 2] -Output [1]: [named_struct(count(1), count(1)#101, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#102, avg(ss_net_paid), avg(ss_net_paid)#103) AS mergedValue#104] -Input [3]: [count(1)#101, avg(ss_ext_discount_amt)#102, avg(ss_net_paid)#103] +Output [1]: [named_struct(count(1), count(1)#97, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#98, avg(ss_net_paid), avg(ss_net_paid)#99) AS mergedValue#100] +Input [3]: [count(1)#97, avg(ss_ext_discount_amt)#98, avg(ss_net_paid)#99] Subquery:11 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12] @@ -257,44 +257,44 @@ Subquery:13 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquer (37) Scan parquet default.store_sales -Output [4]: [ss_quantity#105, ss_ext_discount_amt#106, ss_net_paid#107, ss_sold_date_sk#108] +Output [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_quantity), GreaterThanOrEqual(ss_quantity,81), LessThanOrEqual(ss_quantity,100)] ReadSchema: struct (38) ColumnarToRow [codegen id : 1] -Input [4]: [ss_quantity#105, ss_ext_discount_amt#106, ss_net_paid#107, ss_sold_date_sk#108] +Input [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] (39) Filter [codegen id : 1] -Input [4]: [ss_quantity#105, ss_ext_discount_amt#106, ss_net_paid#107, ss_sold_date_sk#108] -Condition : ((isnotnull(ss_quantity#105) AND (ss_quantity#105 >= 81)) AND (ss_quantity#105 <= 100)) +Input [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] +Condition : ((isnotnull(ss_quantity#101) AND (ss_quantity#101 >= 81)) AND (ss_quantity#101 <= 100)) (40) Project [codegen id : 1] -Output [2]: [ss_ext_discount_amt#106, ss_net_paid#107] -Input [4]: [ss_quantity#105, ss_ext_discount_amt#106, ss_net_paid#107, ss_sold_date_sk#108] +Output [2]: [ss_ext_discount_amt#102, ss_net_paid#103] +Input [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104] (41) HashAggregate [codegen id : 1] -Input [2]: [ss_ext_discount_amt#106, ss_net_paid#107] +Input [2]: [ss_ext_discount_amt#102, ss_net_paid#103] Keys: [] -Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#106)), partial_avg(UnscaledValue(ss_net_paid#107))] -Aggregate Attributes [5]: [count#109, sum#110, count#111, sum#112, count#113] -Results [5]: [count#114, sum#115, count#116, sum#117, count#118] +Functions [3]: [partial_count(1), partial_avg(UnscaledValue(ss_ext_discount_amt#102)), partial_avg(UnscaledValue(ss_net_paid#103))] +Aggregate Attributes [5]: [count#105, sum#106, count#107, sum#108, count#109] +Results [5]: [count#110, sum#111, count#112, sum#113, count#114] (42) Exchange -Input [5]: [count#114, sum#115, count#116, sum#117, count#118] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#119] +Input [5]: [count#110, sum#111, count#112, sum#113, count#114] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] (43) HashAggregate [codegen id : 2] -Input [5]: [count#114, sum#115, count#116, sum#117, count#118] +Input [5]: [count#110, sum#111, count#112, sum#113, count#114] Keys: [] -Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#106)), avg(UnscaledValue(ss_net_paid#107))] -Aggregate Attributes [3]: [count(1)#120, avg(UnscaledValue(ss_ext_discount_amt#106))#121, avg(UnscaledValue(ss_net_paid#107))#122] -Results [3]: [count(1)#120 AS count(1)#123, cast((avg(UnscaledValue(ss_ext_discount_amt#106))#121 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#124, cast((avg(UnscaledValue(ss_net_paid#107))#122 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#125] +Functions [3]: [count(1), avg(UnscaledValue(ss_ext_discount_amt#102)), avg(UnscaledValue(ss_net_paid#103))] +Aggregate Attributes [3]: [count(1)#115, avg(UnscaledValue(ss_ext_discount_amt#102))#116, avg(UnscaledValue(ss_net_paid#103))#117] +Results [3]: [count(1)#115 AS count(1)#118, cast((avg(UnscaledValue(ss_ext_discount_amt#102))#116 / 100.0) as decimal(11,6)) AS avg(ss_ext_discount_amt)#119, cast((avg(UnscaledValue(ss_net_paid#103))#117 / 100.0) as decimal(11,6)) AS avg(ss_net_paid)#120] (44) Project [codegen id : 2] -Output [1]: [named_struct(count(1), count(1)#123, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#124, avg(ss_net_paid), avg(ss_net_paid)#125) AS mergedValue#126] -Input [3]: [count(1)#123, avg(ss_ext_discount_amt)#124, avg(ss_net_paid)#125] +Output [1]: [named_struct(count(1), count(1)#118, avg(ss_ext_discount_amt), avg(ss_ext_discount_amt)#119, avg(ss_net_paid), avg(ss_net_paid)#120) AS mergedValue#121] +Input [3]: [count(1)#118, avg(ss_ext_discount_amt)#119, avg(ss_net_paid)#120] Subquery:14 Hosting operator id = 4 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt index 095c3d531a509..e9b15a8928cbe 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt @@ -90,7 +90,7 @@ Input [2]: [wp_web_page_sk#5, wp_char_count#6] (9) BroadcastExchange Input [1]: [wp_web_page_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (10) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ws_web_page_sk#3] @@ -102,184 +102,184 @@ Output [2]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2] Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, wp_web_page_sk#5] (12) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#8, hd_dep_count#9] +Output [2]: [hd_demo_sk#7, hd_dep_count#8] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,6), IsNotNull(hd_demo_sk)] ReadSchema: struct (13) ColumnarToRow [codegen id : 2] -Input [2]: [hd_demo_sk#8, hd_dep_count#9] +Input [2]: [hd_demo_sk#7, hd_dep_count#8] (14) Filter [codegen id : 2] -Input [2]: [hd_demo_sk#8, hd_dep_count#9] -Condition : ((isnotnull(hd_dep_count#9) AND (hd_dep_count#9 = 6)) AND isnotnull(hd_demo_sk#8)) +Input [2]: [hd_demo_sk#7, hd_dep_count#8] +Condition : ((isnotnull(hd_dep_count#8) AND (hd_dep_count#8 = 6)) AND isnotnull(hd_demo_sk#7)) (15) Project [codegen id : 2] -Output [1]: [hd_demo_sk#8] -Input [2]: [hd_demo_sk#8, hd_dep_count#9] +Output [1]: [hd_demo_sk#7] +Input [2]: [hd_demo_sk#7, hd_dep_count#8] (16) BroadcastExchange -Input [1]: [hd_demo_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Input [1]: [hd_demo_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ws_ship_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#8] +Right keys [1]: [hd_demo_sk#7] Join condition: None (18) Project [codegen id : 4] Output [1]: [ws_sold_time_sk#1] -Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, hd_demo_sk#8] +Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, hd_demo_sk#7] (19) Scan parquet default.time_dim -Output [2]: [t_time_sk#11, t_hour#12] +Output [2]: [t_time_sk#9, t_hour#10] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,8), LessThanOrEqual(t_hour,9), IsNotNull(t_time_sk)] ReadSchema: struct (20) ColumnarToRow [codegen id : 3] -Input [2]: [t_time_sk#11, t_hour#12] +Input [2]: [t_time_sk#9, t_hour#10] (21) Filter [codegen id : 3] -Input [2]: [t_time_sk#11, t_hour#12] -Condition : (((isnotnull(t_hour#12) AND (t_hour#12 >= 8)) AND (t_hour#12 <= 9)) AND isnotnull(t_time_sk#11)) +Input [2]: [t_time_sk#9, t_hour#10] +Condition : (((isnotnull(t_hour#10) AND (t_hour#10 >= 8)) AND (t_hour#10 <= 9)) AND isnotnull(t_time_sk#9)) (22) Project [codegen id : 3] -Output [1]: [t_time_sk#11] -Input [2]: [t_time_sk#11, t_hour#12] +Output [1]: [t_time_sk#9] +Input [2]: [t_time_sk#9, t_hour#10] (23) BroadcastExchange -Input [1]: [t_time_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] +Input [1]: [t_time_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ws_sold_time_sk#1] -Right keys [1]: [t_time_sk#11] +Right keys [1]: [t_time_sk#9] Join condition: None (25) Project [codegen id : 4] Output: [] -Input [2]: [ws_sold_time_sk#1, t_time_sk#11] +Input [2]: [ws_sold_time_sk#1, t_time_sk#9] (26) HashAggregate [codegen id : 4] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#14] -Results [1]: [count#15] +Aggregate Attributes [1]: [count#11] +Results [1]: [count#12] (27) Exchange -Input [1]: [count#15] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#16] +Input [1]: [count#12] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 10] -Input [1]: [count#15] +Input [1]: [count#12] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#17] -Results [1]: [count(1)#17 AS amc#18] +Aggregate Attributes [1]: [count(1)#13] +Results [1]: [count(1)#13 AS amc#14] (29) Scan parquet default.web_sales -Output [4]: [ws_sold_time_sk#19, ws_ship_hdemo_sk#20, ws_web_page_sk#21, ws_sold_date_sk#22] +Output [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 8] -Input [4]: [ws_sold_time_sk#19, ws_ship_hdemo_sk#20, ws_web_page_sk#21, ws_sold_date_sk#22] +Input [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18] (31) Filter [codegen id : 8] -Input [4]: [ws_sold_time_sk#19, ws_ship_hdemo_sk#20, ws_web_page_sk#21, ws_sold_date_sk#22] -Condition : ((isnotnull(ws_ship_hdemo_sk#20) AND isnotnull(ws_sold_time_sk#19)) AND isnotnull(ws_web_page_sk#21)) +Input [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18] +Condition : ((isnotnull(ws_ship_hdemo_sk#16) AND isnotnull(ws_sold_time_sk#15)) AND isnotnull(ws_web_page_sk#17)) (32) Project [codegen id : 8] -Output [3]: [ws_sold_time_sk#19, ws_ship_hdemo_sk#20, ws_web_page_sk#21] -Input [4]: [ws_sold_time_sk#19, ws_ship_hdemo_sk#20, ws_web_page_sk#21, ws_sold_date_sk#22] +Output [3]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17] +Input [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18] (33) ReusedExchange [Reuses operator id: 9] -Output [1]: [wp_web_page_sk#23] +Output [1]: [wp_web_page_sk#19] (34) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_web_page_sk#21] -Right keys [1]: [wp_web_page_sk#23] +Left keys [1]: [ws_web_page_sk#17] +Right keys [1]: [wp_web_page_sk#19] Join condition: None (35) Project [codegen id : 8] -Output [2]: [ws_sold_time_sk#19, ws_ship_hdemo_sk#20] -Input [4]: [ws_sold_time_sk#19, ws_ship_hdemo_sk#20, ws_web_page_sk#21, wp_web_page_sk#23] +Output [2]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16] +Input [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, wp_web_page_sk#19] (36) ReusedExchange [Reuses operator id: 16] -Output [1]: [hd_demo_sk#24] +Output [1]: [hd_demo_sk#20] (37) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_ship_hdemo_sk#20] -Right keys [1]: [hd_demo_sk#24] +Left keys [1]: [ws_ship_hdemo_sk#16] +Right keys [1]: [hd_demo_sk#20] Join condition: None (38) Project [codegen id : 8] -Output [1]: [ws_sold_time_sk#19] -Input [3]: [ws_sold_time_sk#19, ws_ship_hdemo_sk#20, hd_demo_sk#24] +Output [1]: [ws_sold_time_sk#15] +Input [3]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, hd_demo_sk#20] (39) Scan parquet default.time_dim -Output [2]: [t_time_sk#25, t_hour#26] +Output [2]: [t_time_sk#21, t_hour#22] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,19), LessThanOrEqual(t_hour,20), IsNotNull(t_time_sk)] ReadSchema: struct (40) ColumnarToRow [codegen id : 7] -Input [2]: [t_time_sk#25, t_hour#26] +Input [2]: [t_time_sk#21, t_hour#22] (41) Filter [codegen id : 7] -Input [2]: [t_time_sk#25, t_hour#26] -Condition : (((isnotnull(t_hour#26) AND (t_hour#26 >= 19)) AND (t_hour#26 <= 20)) AND isnotnull(t_time_sk#25)) +Input [2]: [t_time_sk#21, t_hour#22] +Condition : (((isnotnull(t_hour#22) AND (t_hour#22 >= 19)) AND (t_hour#22 <= 20)) AND isnotnull(t_time_sk#21)) (42) Project [codegen id : 7] -Output [1]: [t_time_sk#25] -Input [2]: [t_time_sk#25, t_hour#26] +Output [1]: [t_time_sk#21] +Input [2]: [t_time_sk#21, t_hour#22] (43) BroadcastExchange -Input [1]: [t_time_sk#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] +Input [1]: [t_time_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (44) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_sold_time_sk#19] -Right keys [1]: [t_time_sk#25] +Left keys [1]: [ws_sold_time_sk#15] +Right keys [1]: [t_time_sk#21] Join condition: None (45) Project [codegen id : 8] Output: [] -Input [2]: [ws_sold_time_sk#19, t_time_sk#25] +Input [2]: [ws_sold_time_sk#15, t_time_sk#21] (46) HashAggregate [codegen id : 8] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#28] -Results [1]: [count#29] +Aggregate Attributes [1]: [count#23] +Results [1]: [count#24] (47) Exchange -Input [1]: [count#29] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#30] +Input [1]: [count#24] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] (48) HashAggregate [codegen id : 9] -Input [1]: [count#29] +Input [1]: [count#24] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#31] -Results [1]: [count(1)#31 AS pmc#32] +Aggregate Attributes [1]: [count(1)#25] +Results [1]: [count(1)#25 AS pmc#26] (49) BroadcastExchange -Input [1]: [pmc#32] -Arguments: IdentityBroadcastMode, [id=#33] +Input [1]: [pmc#26] +Arguments: IdentityBroadcastMode, [plan_id=7] (50) BroadcastNestedLoopJoin [codegen id : 10] Join condition: None (51) Project [codegen id : 10] -Output [1]: [CheckOverflow((promote_precision(cast(amc#18 as decimal(15,4))) / promote_precision(cast(pmc#32 as decimal(15,4)))), DecimalType(35,20)) AS am_pm_ratio#34] -Input [2]: [amc#18, pmc#32] +Output [1]: [CheckOverflow((promote_precision(cast(amc#14 as decimal(15,4))) / promote_precision(cast(pmc#26 as decimal(15,4)))), DecimalType(35,20)) AS am_pm_ratio#27] +Input [2]: [amc#14, pmc#26] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt index e9884d694852d..c333bed23a03c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt @@ -90,7 +90,7 @@ Input [2]: [hd_demo_sk#5, hd_dep_count#6] (9) BroadcastExchange Input [1]: [hd_demo_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (10) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ws_ship_hdemo_sk#2] @@ -102,184 +102,184 @@ Output [2]: [ws_sold_time_sk#1, ws_web_page_sk#3] Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, hd_demo_sk#5] (12) Scan parquet default.time_dim -Output [2]: [t_time_sk#8, t_hour#9] +Output [2]: [t_time_sk#7, t_hour#8] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,8), LessThanOrEqual(t_hour,9), IsNotNull(t_time_sk)] ReadSchema: struct (13) ColumnarToRow [codegen id : 2] -Input [2]: [t_time_sk#8, t_hour#9] +Input [2]: [t_time_sk#7, t_hour#8] (14) Filter [codegen id : 2] -Input [2]: [t_time_sk#8, t_hour#9] -Condition : (((isnotnull(t_hour#9) AND (t_hour#9 >= 8)) AND (t_hour#9 <= 9)) AND isnotnull(t_time_sk#8)) +Input [2]: [t_time_sk#7, t_hour#8] +Condition : (((isnotnull(t_hour#8) AND (t_hour#8 >= 8)) AND (t_hour#8 <= 9)) AND isnotnull(t_time_sk#7)) (15) Project [codegen id : 2] -Output [1]: [t_time_sk#8] -Input [2]: [t_time_sk#8, t_hour#9] +Output [1]: [t_time_sk#7] +Input [2]: [t_time_sk#7, t_hour#8] (16) BroadcastExchange -Input [1]: [t_time_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Input [1]: [t_time_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ws_sold_time_sk#1] -Right keys [1]: [t_time_sk#8] +Right keys [1]: [t_time_sk#7] Join condition: None (18) Project [codegen id : 4] Output [1]: [ws_web_page_sk#3] -Input [3]: [ws_sold_time_sk#1, ws_web_page_sk#3, t_time_sk#8] +Input [3]: [ws_sold_time_sk#1, ws_web_page_sk#3, t_time_sk#7] (19) Scan parquet default.web_page -Output [2]: [wp_web_page_sk#11, wp_char_count#12] +Output [2]: [wp_web_page_sk#9, wp_char_count#10] Batched: true Location [not included in comparison]/{warehouse_dir}/web_page] PushedFilters: [IsNotNull(wp_char_count), GreaterThanOrEqual(wp_char_count,5000), LessThanOrEqual(wp_char_count,5200), IsNotNull(wp_web_page_sk)] ReadSchema: struct (20) ColumnarToRow [codegen id : 3] -Input [2]: [wp_web_page_sk#11, wp_char_count#12] +Input [2]: [wp_web_page_sk#9, wp_char_count#10] (21) Filter [codegen id : 3] -Input [2]: [wp_web_page_sk#11, wp_char_count#12] -Condition : (((isnotnull(wp_char_count#12) AND (wp_char_count#12 >= 5000)) AND (wp_char_count#12 <= 5200)) AND isnotnull(wp_web_page_sk#11)) +Input [2]: [wp_web_page_sk#9, wp_char_count#10] +Condition : (((isnotnull(wp_char_count#10) AND (wp_char_count#10 >= 5000)) AND (wp_char_count#10 <= 5200)) AND isnotnull(wp_web_page_sk#9)) (22) Project [codegen id : 3] -Output [1]: [wp_web_page_sk#11] -Input [2]: [wp_web_page_sk#11, wp_char_count#12] +Output [1]: [wp_web_page_sk#9] +Input [2]: [wp_web_page_sk#9, wp_char_count#10] (23) BroadcastExchange -Input [1]: [wp_web_page_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] +Input [1]: [wp_web_page_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ws_web_page_sk#3] -Right keys [1]: [wp_web_page_sk#11] +Right keys [1]: [wp_web_page_sk#9] Join condition: None (25) Project [codegen id : 4] Output: [] -Input [2]: [ws_web_page_sk#3, wp_web_page_sk#11] +Input [2]: [ws_web_page_sk#3, wp_web_page_sk#9] (26) HashAggregate [codegen id : 4] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#14] -Results [1]: [count#15] +Aggregate Attributes [1]: [count#11] +Results [1]: [count#12] (27) Exchange -Input [1]: [count#15] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#16] +Input [1]: [count#12] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 10] -Input [1]: [count#15] +Input [1]: [count#12] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#17] -Results [1]: [count(1)#17 AS amc#18] +Aggregate Attributes [1]: [count(1)#13] +Results [1]: [count(1)#13 AS amc#14] (29) Scan parquet default.web_sales -Output [4]: [ws_sold_time_sk#19, ws_ship_hdemo_sk#20, ws_web_page_sk#21, ws_sold_date_sk#22] +Output [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_ship_hdemo_sk), IsNotNull(ws_sold_time_sk), IsNotNull(ws_web_page_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 8] -Input [4]: [ws_sold_time_sk#19, ws_ship_hdemo_sk#20, ws_web_page_sk#21, ws_sold_date_sk#22] +Input [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18] (31) Filter [codegen id : 8] -Input [4]: [ws_sold_time_sk#19, ws_ship_hdemo_sk#20, ws_web_page_sk#21, ws_sold_date_sk#22] -Condition : ((isnotnull(ws_ship_hdemo_sk#20) AND isnotnull(ws_sold_time_sk#19)) AND isnotnull(ws_web_page_sk#21)) +Input [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18] +Condition : ((isnotnull(ws_ship_hdemo_sk#16) AND isnotnull(ws_sold_time_sk#15)) AND isnotnull(ws_web_page_sk#17)) (32) Project [codegen id : 8] -Output [3]: [ws_sold_time_sk#19, ws_ship_hdemo_sk#20, ws_web_page_sk#21] -Input [4]: [ws_sold_time_sk#19, ws_ship_hdemo_sk#20, ws_web_page_sk#21, ws_sold_date_sk#22] +Output [3]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17] +Input [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18] (33) ReusedExchange [Reuses operator id: 9] -Output [1]: [hd_demo_sk#23] +Output [1]: [hd_demo_sk#19] (34) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_ship_hdemo_sk#20] -Right keys [1]: [hd_demo_sk#23] +Left keys [1]: [ws_ship_hdemo_sk#16] +Right keys [1]: [hd_demo_sk#19] Join condition: None (35) Project [codegen id : 8] -Output [2]: [ws_sold_time_sk#19, ws_web_page_sk#21] -Input [4]: [ws_sold_time_sk#19, ws_ship_hdemo_sk#20, ws_web_page_sk#21, hd_demo_sk#23] +Output [2]: [ws_sold_time_sk#15, ws_web_page_sk#17] +Input [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, hd_demo_sk#19] (36) Scan parquet default.time_dim -Output [2]: [t_time_sk#24, t_hour#25] +Output [2]: [t_time_sk#20, t_hour#21] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), GreaterThanOrEqual(t_hour,19), LessThanOrEqual(t_hour,20), IsNotNull(t_time_sk)] ReadSchema: struct (37) ColumnarToRow [codegen id : 6] -Input [2]: [t_time_sk#24, t_hour#25] +Input [2]: [t_time_sk#20, t_hour#21] (38) Filter [codegen id : 6] -Input [2]: [t_time_sk#24, t_hour#25] -Condition : (((isnotnull(t_hour#25) AND (t_hour#25 >= 19)) AND (t_hour#25 <= 20)) AND isnotnull(t_time_sk#24)) +Input [2]: [t_time_sk#20, t_hour#21] +Condition : (((isnotnull(t_hour#21) AND (t_hour#21 >= 19)) AND (t_hour#21 <= 20)) AND isnotnull(t_time_sk#20)) (39) Project [codegen id : 6] -Output [1]: [t_time_sk#24] -Input [2]: [t_time_sk#24, t_hour#25] +Output [1]: [t_time_sk#20] +Input [2]: [t_time_sk#20, t_hour#21] (40) BroadcastExchange -Input [1]: [t_time_sk#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Input [1]: [t_time_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (41) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_sold_time_sk#19] -Right keys [1]: [t_time_sk#24] +Left keys [1]: [ws_sold_time_sk#15] +Right keys [1]: [t_time_sk#20] Join condition: None (42) Project [codegen id : 8] -Output [1]: [ws_web_page_sk#21] -Input [3]: [ws_sold_time_sk#19, ws_web_page_sk#21, t_time_sk#24] +Output [1]: [ws_web_page_sk#17] +Input [3]: [ws_sold_time_sk#15, ws_web_page_sk#17, t_time_sk#20] (43) ReusedExchange [Reuses operator id: 23] -Output [1]: [wp_web_page_sk#27] +Output [1]: [wp_web_page_sk#22] (44) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_web_page_sk#21] -Right keys [1]: [wp_web_page_sk#27] +Left keys [1]: [ws_web_page_sk#17] +Right keys [1]: [wp_web_page_sk#22] Join condition: None (45) Project [codegen id : 8] Output: [] -Input [2]: [ws_web_page_sk#21, wp_web_page_sk#27] +Input [2]: [ws_web_page_sk#17, wp_web_page_sk#22] (46) HashAggregate [codegen id : 8] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#28] -Results [1]: [count#29] +Aggregate Attributes [1]: [count#23] +Results [1]: [count#24] (47) Exchange -Input [1]: [count#29] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#30] +Input [1]: [count#24] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] (48) HashAggregate [codegen id : 9] -Input [1]: [count#29] +Input [1]: [count#24] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#31] -Results [1]: [count(1)#31 AS pmc#32] +Aggregate Attributes [1]: [count(1)#25] +Results [1]: [count(1)#25 AS pmc#26] (49) BroadcastExchange -Input [1]: [pmc#32] -Arguments: IdentityBroadcastMode, [id=#33] +Input [1]: [pmc#26] +Arguments: IdentityBroadcastMode, [plan_id=7] (50) BroadcastNestedLoopJoin [codegen id : 10] Join condition: None (51) Project [codegen id : 10] -Output [1]: [CheckOverflow((promote_precision(cast(amc#18 as decimal(15,4))) / promote_precision(cast(pmc#32 as decimal(15,4)))), DecimalType(35,20)) AS am_pm_ratio#34] -Input [2]: [amc#18, pmc#32] +Output [1]: [CheckOverflow((promote_precision(cast(amc#14 as decimal(15,4))) / promote_precision(cast(pmc#26 as decimal(15,4)))), DecimalType(35,20)) AS am_pm_ratio#27] +Input [2]: [amc#14, pmc#26] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt index 337958cb2f546..8e599c576b500 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt @@ -74,7 +74,7 @@ Condition : ((((cd_marital_status#6 = M) AND (cd_education_status#7 = Unknown (7) BroadcastExchange Input [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 7] Left keys [1]: [c_current_cdemo_sk#2] @@ -86,163 +86,163 @@ Output [5]: [c_customer_sk#1, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_mari Input [7]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_demo_sk#5, cd_marital_status#6, cd_education_status#7] (10) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#9, hd_buy_potential#10] +Output [2]: [hd_demo_sk#8, hd_buy_potential#9] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [2]: [hd_demo_sk#9, hd_buy_potential#10] +Input [2]: [hd_demo_sk#8, hd_buy_potential#9] (12) Filter [codegen id : 2] -Input [2]: [hd_demo_sk#9, hd_buy_potential#10] -Condition : ((isnotnull(hd_buy_potential#10) AND StartsWith(hd_buy_potential#10, Unknown)) AND isnotnull(hd_demo_sk#9)) +Input [2]: [hd_demo_sk#8, hd_buy_potential#9] +Condition : ((isnotnull(hd_buy_potential#9) AND StartsWith(hd_buy_potential#9, Unknown)) AND isnotnull(hd_demo_sk#8)) (13) Project [codegen id : 2] -Output [1]: [hd_demo_sk#9] -Input [2]: [hd_demo_sk#9, hd_buy_potential#10] +Output [1]: [hd_demo_sk#8] +Input [2]: [hd_demo_sk#8, hd_buy_potential#9] (14) BroadcastExchange -Input [1]: [hd_demo_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] +Input [1]: [hd_demo_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 7] Left keys [1]: [c_current_hdemo_sk#3] -Right keys [1]: [hd_demo_sk#9] +Right keys [1]: [hd_demo_sk#8] Join condition: None (16) Project [codegen id : 7] Output [4]: [c_customer_sk#1, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7] -Input [6]: [c_customer_sk#1, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7, hd_demo_sk#9] +Input [6]: [c_customer_sk#1, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7, hd_demo_sk#8] (17) Scan parquet default.customer_address -Output [2]: [ca_address_sk#12, ca_gmt_offset#13] +Output [2]: [ca_address_sk#10, ca_gmt_offset#11] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)] ReadSchema: struct (18) ColumnarToRow [codegen id : 3] -Input [2]: [ca_address_sk#12, ca_gmt_offset#13] +Input [2]: [ca_address_sk#10, ca_gmt_offset#11] (19) Filter [codegen id : 3] -Input [2]: [ca_address_sk#12, ca_gmt_offset#13] -Condition : ((isnotnull(ca_gmt_offset#13) AND (ca_gmt_offset#13 = -7.00)) AND isnotnull(ca_address_sk#12)) +Input [2]: [ca_address_sk#10, ca_gmt_offset#11] +Condition : ((isnotnull(ca_gmt_offset#11) AND (ca_gmt_offset#11 = -7.00)) AND isnotnull(ca_address_sk#10)) (20) Project [codegen id : 3] -Output [1]: [ca_address_sk#12] -Input [2]: [ca_address_sk#12, ca_gmt_offset#13] +Output [1]: [ca_address_sk#10] +Input [2]: [ca_address_sk#10, ca_gmt_offset#11] (21) BroadcastExchange -Input [1]: [ca_address_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Input [1]: [ca_address_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (22) BroadcastHashJoin [codegen id : 7] Left keys [1]: [c_current_addr_sk#4] -Right keys [1]: [ca_address_sk#12] +Right keys [1]: [ca_address_sk#10] Join condition: None (23) Project [codegen id : 7] Output [3]: [c_customer_sk#1, cd_marital_status#6, cd_education_status#7] -Input [5]: [c_customer_sk#1, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7, ca_address_sk#12] +Input [5]: [c_customer_sk#1, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7, ca_address_sk#10] (24) Scan parquet default.catalog_returns -Output [4]: [cr_returning_customer_sk#15, cr_call_center_sk#16, cr_net_loss#17, cr_returned_date_sk#18] +Output [4]: [cr_returning_customer_sk#12, cr_call_center_sk#13, cr_net_loss#14, cr_returned_date_sk#15] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cr_returned_date_sk#18), dynamicpruningexpression(cr_returned_date_sk#18 IN dynamicpruning#19)] +PartitionFilters: [isnotnull(cr_returned_date_sk#15), dynamicpruningexpression(cr_returned_date_sk#15 IN dynamicpruning#16)] PushedFilters: [IsNotNull(cr_call_center_sk), IsNotNull(cr_returning_customer_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 5] -Input [4]: [cr_returning_customer_sk#15, cr_call_center_sk#16, cr_net_loss#17, cr_returned_date_sk#18] +Input [4]: [cr_returning_customer_sk#12, cr_call_center_sk#13, cr_net_loss#14, cr_returned_date_sk#15] (26) Filter [codegen id : 5] -Input [4]: [cr_returning_customer_sk#15, cr_call_center_sk#16, cr_net_loss#17, cr_returned_date_sk#18] -Condition : (isnotnull(cr_call_center_sk#16) AND isnotnull(cr_returning_customer_sk#15)) +Input [4]: [cr_returning_customer_sk#12, cr_call_center_sk#13, cr_net_loss#14, cr_returned_date_sk#15] +Condition : (isnotnull(cr_call_center_sk#13) AND isnotnull(cr_returning_customer_sk#12)) (27) ReusedExchange [Reuses operator id: 48] -Output [1]: [d_date_sk#20] +Output [1]: [d_date_sk#17] (28) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cr_returned_date_sk#18] -Right keys [1]: [d_date_sk#20] +Left keys [1]: [cr_returned_date_sk#15] +Right keys [1]: [d_date_sk#17] Join condition: None (29) Project [codegen id : 5] -Output [3]: [cr_returning_customer_sk#15, cr_call_center_sk#16, cr_net_loss#17] -Input [5]: [cr_returning_customer_sk#15, cr_call_center_sk#16, cr_net_loss#17, cr_returned_date_sk#18, d_date_sk#20] +Output [3]: [cr_returning_customer_sk#12, cr_call_center_sk#13, cr_net_loss#14] +Input [5]: [cr_returning_customer_sk#12, cr_call_center_sk#13, cr_net_loss#14, cr_returned_date_sk#15, d_date_sk#17] (30) BroadcastExchange -Input [3]: [cr_returning_customer_sk#15, cr_call_center_sk#16, cr_net_loss#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] +Input [3]: [cr_returning_customer_sk#12, cr_call_center_sk#13, cr_net_loss#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (31) BroadcastHashJoin [codegen id : 7] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [cr_returning_customer_sk#15] +Right keys [1]: [cr_returning_customer_sk#12] Join condition: None (32) Project [codegen id : 7] -Output [4]: [cd_marital_status#6, cd_education_status#7, cr_call_center_sk#16, cr_net_loss#17] -Input [6]: [c_customer_sk#1, cd_marital_status#6, cd_education_status#7, cr_returning_customer_sk#15, cr_call_center_sk#16, cr_net_loss#17] +Output [4]: [cd_marital_status#6, cd_education_status#7, cr_call_center_sk#13, cr_net_loss#14] +Input [6]: [c_customer_sk#1, cd_marital_status#6, cd_education_status#7, cr_returning_customer_sk#12, cr_call_center_sk#13, cr_net_loss#14] (33) Scan parquet default.call_center -Output [4]: [cc_call_center_sk#22, cc_call_center_id#23, cc_name#24, cc_manager#25] +Output [4]: [cc_call_center_sk#18, cc_call_center_id#19, cc_name#20, cc_manager#21] Batched: true Location [not included in comparison]/{warehouse_dir}/call_center] PushedFilters: [IsNotNull(cc_call_center_sk)] ReadSchema: struct (34) ColumnarToRow [codegen id : 6] -Input [4]: [cc_call_center_sk#22, cc_call_center_id#23, cc_name#24, cc_manager#25] +Input [4]: [cc_call_center_sk#18, cc_call_center_id#19, cc_name#20, cc_manager#21] (35) Filter [codegen id : 6] -Input [4]: [cc_call_center_sk#22, cc_call_center_id#23, cc_name#24, cc_manager#25] -Condition : isnotnull(cc_call_center_sk#22) +Input [4]: [cc_call_center_sk#18, cc_call_center_id#19, cc_name#20, cc_manager#21] +Condition : isnotnull(cc_call_center_sk#18) (36) BroadcastExchange -Input [4]: [cc_call_center_sk#22, cc_call_center_id#23, cc_name#24, cc_manager#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] +Input [4]: [cc_call_center_sk#18, cc_call_center_id#19, cc_name#20, cc_manager#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (37) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [cr_call_center_sk#16] -Right keys [1]: [cc_call_center_sk#22] +Left keys [1]: [cr_call_center_sk#13] +Right keys [1]: [cc_call_center_sk#18] Join condition: None (38) Project [codegen id : 7] -Output [6]: [cc_call_center_id#23, cc_name#24, cc_manager#25, cr_net_loss#17, cd_marital_status#6, cd_education_status#7] -Input [8]: [cd_marital_status#6, cd_education_status#7, cr_call_center_sk#16, cr_net_loss#17, cc_call_center_sk#22, cc_call_center_id#23, cc_name#24, cc_manager#25] +Output [6]: [cc_call_center_id#19, cc_name#20, cc_manager#21, cr_net_loss#14, cd_marital_status#6, cd_education_status#7] +Input [8]: [cd_marital_status#6, cd_education_status#7, cr_call_center_sk#13, cr_net_loss#14, cc_call_center_sk#18, cc_call_center_id#19, cc_name#20, cc_manager#21] (39) HashAggregate [codegen id : 7] -Input [6]: [cc_call_center_id#23, cc_name#24, cc_manager#25, cr_net_loss#17, cd_marital_status#6, cd_education_status#7] -Keys [5]: [cc_call_center_id#23, cc_name#24, cc_manager#25, cd_marital_status#6, cd_education_status#7] -Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#17))] -Aggregate Attributes [1]: [sum#27] -Results [6]: [cc_call_center_id#23, cc_name#24, cc_manager#25, cd_marital_status#6, cd_education_status#7, sum#28] +Input [6]: [cc_call_center_id#19, cc_name#20, cc_manager#21, cr_net_loss#14, cd_marital_status#6, cd_education_status#7] +Keys [5]: [cc_call_center_id#19, cc_name#20, cc_manager#21, cd_marital_status#6, cd_education_status#7] +Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#14))] +Aggregate Attributes [1]: [sum#22] +Results [6]: [cc_call_center_id#19, cc_name#20, cc_manager#21, cd_marital_status#6, cd_education_status#7, sum#23] (40) Exchange -Input [6]: [cc_call_center_id#23, cc_name#24, cc_manager#25, cd_marital_status#6, cd_education_status#7, sum#28] -Arguments: hashpartitioning(cc_call_center_id#23, cc_name#24, cc_manager#25, cd_marital_status#6, cd_education_status#7, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [6]: [cc_call_center_id#19, cc_name#20, cc_manager#21, cd_marital_status#6, cd_education_status#7, sum#23] +Arguments: hashpartitioning(cc_call_center_id#19, cc_name#20, cc_manager#21, cd_marital_status#6, cd_education_status#7, 5), ENSURE_REQUIREMENTS, [plan_id=6] (41) HashAggregate [codegen id : 8] -Input [6]: [cc_call_center_id#23, cc_name#24, cc_manager#25, cd_marital_status#6, cd_education_status#7, sum#28] -Keys [5]: [cc_call_center_id#23, cc_name#24, cc_manager#25, cd_marital_status#6, cd_education_status#7] -Functions [1]: [sum(UnscaledValue(cr_net_loss#17))] -Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#17))#30] -Results [4]: [cc_call_center_id#23 AS Call_Center#31, cc_name#24 AS Call_Center_Name#32, cc_manager#25 AS Manager#33, MakeDecimal(sum(UnscaledValue(cr_net_loss#17))#30,17,2) AS Returns_Loss#34] +Input [6]: [cc_call_center_id#19, cc_name#20, cc_manager#21, cd_marital_status#6, cd_education_status#7, sum#23] +Keys [5]: [cc_call_center_id#19, cc_name#20, cc_manager#21, cd_marital_status#6, cd_education_status#7] +Functions [1]: [sum(UnscaledValue(cr_net_loss#14))] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#14))#24] +Results [4]: [cc_call_center_id#19 AS Call_Center#25, cc_name#20 AS Call_Center_Name#26, cc_manager#21 AS Manager#27, MakeDecimal(sum(UnscaledValue(cr_net_loss#14))#24,17,2) AS Returns_Loss#28] (42) Exchange -Input [4]: [Call_Center#31, Call_Center_Name#32, Manager#33, Returns_Loss#34] -Arguments: rangepartitioning(Returns_Loss#34 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#35] +Input [4]: [Call_Center#25, Call_Center_Name#26, Manager#27, Returns_Loss#28] +Arguments: rangepartitioning(Returns_Loss#28 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=7] (43) Sort [codegen id : 9] -Input [4]: [Call_Center#31, Call_Center_Name#32, Manager#33, Returns_Loss#34] -Arguments: [Returns_Loss#34 DESC NULLS LAST], true, 0 +Input [4]: [Call_Center#25, Call_Center_Name#26, Manager#27, Returns_Loss#28] +Arguments: [Returns_Loss#28 DESC NULLS LAST], true, 0 ===== Subqueries ===== -Subquery:1 Hosting operator id = 24 Hosting Expression = cr_returned_date_sk#18 IN dynamicpruning#19 +Subquery:1 Hosting operator id = 24 Hosting Expression = cr_returned_date_sk#15 IN dynamicpruning#16 BroadcastExchange (48) +- * Project (47) +- * Filter (46) @@ -251,25 +251,25 @@ BroadcastExchange (48) (44) Scan parquet default.date_dim -Output [3]: [d_date_sk#20, d_year#36, d_moy#37] +Output [3]: [d_date_sk#17, d_year#29, d_moy#30] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] ReadSchema: struct (45) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#20, d_year#36, d_moy#37] +Input [3]: [d_date_sk#17, d_year#29, d_moy#30] (46) Filter [codegen id : 1] -Input [3]: [d_date_sk#20, d_year#36, d_moy#37] -Condition : ((((isnotnull(d_year#36) AND isnotnull(d_moy#37)) AND (d_year#36 = 1998)) AND (d_moy#37 = 11)) AND isnotnull(d_date_sk#20)) +Input [3]: [d_date_sk#17, d_year#29, d_moy#30] +Condition : ((((isnotnull(d_year#29) AND isnotnull(d_moy#30)) AND (d_year#29 = 1998)) AND (d_moy#30 = 11)) AND isnotnull(d_date_sk#17)) (47) Project [codegen id : 1] -Output [1]: [d_date_sk#20] -Input [3]: [d_date_sk#20, d_year#36, d_moy#37] +Output [1]: [d_date_sk#17] +Input [3]: [d_date_sk#17, d_year#29, d_moy#30] (48) BroadcastExchange -Input [1]: [d_date_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#38] +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt index c56ad3e677ac9..9abdbe56d87cd 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt @@ -75,7 +75,7 @@ Condition : (isnotnull(cr_call_center_sk#6) AND isnotnull(cr_returning_customer_ (7) BroadcastExchange Input [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 7] Left keys [1]: [cc_call_center_sk#1] @@ -87,158 +87,158 @@ Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer Input [8]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8] (10) ReusedExchange [Reuses operator id: 48] -Output [1]: [d_date_sk#11] +Output [1]: [d_date_sk#10] (11) BroadcastHashJoin [codegen id : 7] Left keys [1]: [cr_returned_date_sk#8] -Right keys [1]: [d_date_sk#11] +Right keys [1]: [d_date_sk#10] Join condition: None (12) Project [codegen id : 7] Output [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7] -Input [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8, d_date_sk#11] +Input [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8, d_date_sk#10] (13) Scan parquet default.customer -Output [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Output [4]: [c_customer_sk#11, c_current_cdemo_sk#12, c_current_hdemo_sk#13, c_current_addr_sk#14] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Input [4]: [c_customer_sk#11, c_current_cdemo_sk#12, c_current_hdemo_sk#13, c_current_addr_sk#14] (15) Filter [codegen id : 3] -Input [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] -Condition : (((isnotnull(c_customer_sk#12) AND isnotnull(c_current_addr_sk#15)) AND isnotnull(c_current_cdemo_sk#13)) AND isnotnull(c_current_hdemo_sk#14)) +Input [4]: [c_customer_sk#11, c_current_cdemo_sk#12, c_current_hdemo_sk#13, c_current_addr_sk#14] +Condition : (((isnotnull(c_customer_sk#11) AND isnotnull(c_current_addr_sk#14)) AND isnotnull(c_current_cdemo_sk#12)) AND isnotnull(c_current_hdemo_sk#13)) (16) BroadcastExchange -Input [4]: [c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] +Input [4]: [c_customer_sk#11, c_current_cdemo_sk#12, c_current_hdemo_sk#13, c_current_addr_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 7] Left keys [1]: [cr_returning_customer_sk#5] -Right keys [1]: [c_customer_sk#12] +Right keys [1]: [c_customer_sk#11] Join condition: None (18) Project [codegen id : 7] -Output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] -Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, c_customer_sk#12, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15] +Output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#12, c_current_hdemo_sk#13, c_current_addr_sk#14] +Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, c_customer_sk#11, c_current_cdemo_sk#12, c_current_hdemo_sk#13, c_current_addr_sk#14] (19) Scan parquet default.customer_address -Output [2]: [ca_address_sk#17, ca_gmt_offset#18] +Output [2]: [ca_address_sk#15, ca_gmt_offset#16] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_gmt_offset), EqualTo(ca_gmt_offset,-7.00), IsNotNull(ca_address_sk)] ReadSchema: struct (20) ColumnarToRow [codegen id : 4] -Input [2]: [ca_address_sk#17, ca_gmt_offset#18] +Input [2]: [ca_address_sk#15, ca_gmt_offset#16] (21) Filter [codegen id : 4] -Input [2]: [ca_address_sk#17, ca_gmt_offset#18] -Condition : ((isnotnull(ca_gmt_offset#18) AND (ca_gmt_offset#18 = -7.00)) AND isnotnull(ca_address_sk#17)) +Input [2]: [ca_address_sk#15, ca_gmt_offset#16] +Condition : ((isnotnull(ca_gmt_offset#16) AND (ca_gmt_offset#16 = -7.00)) AND isnotnull(ca_address_sk#15)) (22) Project [codegen id : 4] -Output [1]: [ca_address_sk#17] -Input [2]: [ca_address_sk#17, ca_gmt_offset#18] +Output [1]: [ca_address_sk#15] +Input [2]: [ca_address_sk#15, ca_gmt_offset#16] (23) BroadcastExchange -Input [1]: [ca_address_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] +Input [1]: [ca_address_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_addr_sk#15] -Right keys [1]: [ca_address_sk#17] +Left keys [1]: [c_current_addr_sk#14] +Right keys [1]: [ca_address_sk#15] Join condition: None (25) Project [codegen id : 7] -Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14] -Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, c_current_addr_sk#15, ca_address_sk#17] +Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#12, c_current_hdemo_sk#13] +Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#12, c_current_hdemo_sk#13, c_current_addr_sk#14, ca_address_sk#15] (26) Scan parquet default.customer_demographics -Output [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] +Output [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Unknown )),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,Advanced Degree ))), IsNotNull(cd_demo_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 5] -Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] (28) Filter [codegen id : 5] -Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] -Condition : ((((cd_marital_status#21 = M) AND (cd_education_status#22 = Unknown )) OR ((cd_marital_status#21 = W) AND (cd_education_status#22 = Advanced Degree ))) AND isnotnull(cd_demo_sk#20)) +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Condition : ((((cd_marital_status#18 = M) AND (cd_education_status#19 = Unknown )) OR ((cd_marital_status#18 = W) AND (cd_education_status#19 = Advanced Degree ))) AND isnotnull(cd_demo_sk#17)) (29) BroadcastExchange -Input [3]: [cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] +Input [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (30) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_cdemo_sk#13] -Right keys [1]: [cd_demo_sk#20] +Left keys [1]: [c_current_cdemo_sk#12] +Right keys [1]: [cd_demo_sk#17] Join condition: None (31) Project [codegen id : 7] -Output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#21, cd_education_status#22] -Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#13, c_current_hdemo_sk#14, cd_demo_sk#20, cd_marital_status#21, cd_education_status#22] +Output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#13, cd_marital_status#18, cd_education_status#19] +Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#12, c_current_hdemo_sk#13, cd_demo_sk#17, cd_marital_status#18, cd_education_status#19] (32) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#24, hd_buy_potential#25] +Output [2]: [hd_demo_sk#20, hd_buy_potential#21] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_buy_potential), StringStartsWith(hd_buy_potential,Unknown), IsNotNull(hd_demo_sk)] ReadSchema: struct (33) ColumnarToRow [codegen id : 6] -Input [2]: [hd_demo_sk#24, hd_buy_potential#25] +Input [2]: [hd_demo_sk#20, hd_buy_potential#21] (34) Filter [codegen id : 6] -Input [2]: [hd_demo_sk#24, hd_buy_potential#25] -Condition : ((isnotnull(hd_buy_potential#25) AND StartsWith(hd_buy_potential#25, Unknown)) AND isnotnull(hd_demo_sk#24)) +Input [2]: [hd_demo_sk#20, hd_buy_potential#21] +Condition : ((isnotnull(hd_buy_potential#21) AND StartsWith(hd_buy_potential#21, Unknown)) AND isnotnull(hd_demo_sk#20)) (35) Project [codegen id : 6] -Output [1]: [hd_demo_sk#24] -Input [2]: [hd_demo_sk#24, hd_buy_potential#25] +Output [1]: [hd_demo_sk#20] +Input [2]: [hd_demo_sk#20, hd_buy_potential#21] (36) BroadcastExchange -Input [1]: [hd_demo_sk#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Input [1]: [hd_demo_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (37) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_hdemo_sk#14] -Right keys [1]: [hd_demo_sk#24] +Left keys [1]: [c_current_hdemo_sk#13] +Right keys [1]: [hd_demo_sk#20] Join condition: None (38) Project [codegen id : 7] -Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#21, cd_education_status#22] -Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#14, cd_marital_status#21, cd_education_status#22, hd_demo_sk#24] +Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#18, cd_education_status#19] +Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#13, cd_marital_status#18, cd_education_status#19, hd_demo_sk#20] (39) HashAggregate [codegen id : 7] -Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#21, cd_education_status#22] -Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#21, cd_education_status#22] +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, cd_marital_status#18, cd_education_status#19] +Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#18, cd_education_status#19] Functions [1]: [partial_sum(UnscaledValue(cr_net_loss#7))] -Aggregate Attributes [1]: [sum#27] -Results [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#21, cd_education_status#22, sum#28] +Aggregate Attributes [1]: [sum#22] +Results [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#18, cd_education_status#19, sum#23] (40) Exchange -Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#21, cd_education_status#22, sum#28] -Arguments: hashpartitioning(cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#21, cd_education_status#22, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#18, cd_education_status#19, sum#23] +Arguments: hashpartitioning(cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#18, cd_education_status#19, 5), ENSURE_REQUIREMENTS, [plan_id=6] (41) HashAggregate [codegen id : 8] -Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#21, cd_education_status#22, sum#28] -Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#21, cd_education_status#22] +Input [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#18, cd_education_status#19, sum#23] +Keys [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cd_marital_status#18, cd_education_status#19] Functions [1]: [sum(UnscaledValue(cr_net_loss#7))] -Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#7))#30] -Results [4]: [cc_call_center_id#2 AS Call_Center#31, cc_name#3 AS Call_Center_Name#32, cc_manager#4 AS Manager#33, MakeDecimal(sum(UnscaledValue(cr_net_loss#7))#30,17,2) AS Returns_Loss#34] +Aggregate Attributes [1]: [sum(UnscaledValue(cr_net_loss#7))#24] +Results [4]: [cc_call_center_id#2 AS Call_Center#25, cc_name#3 AS Call_Center_Name#26, cc_manager#4 AS Manager#27, MakeDecimal(sum(UnscaledValue(cr_net_loss#7))#24,17,2) AS Returns_Loss#28] (42) Exchange -Input [4]: [Call_Center#31, Call_Center_Name#32, Manager#33, Returns_Loss#34] -Arguments: rangepartitioning(Returns_Loss#34 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#35] +Input [4]: [Call_Center#25, Call_Center_Name#26, Manager#27, Returns_Loss#28] +Arguments: rangepartitioning(Returns_Loss#28 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=7] (43) Sort [codegen id : 9] -Input [4]: [Call_Center#31, Call_Center_Name#32, Manager#33, Returns_Loss#34] -Arguments: [Returns_Loss#34 DESC NULLS LAST], true, 0 +Input [4]: [Call_Center#25, Call_Center_Name#26, Manager#27, Returns_Loss#28] +Arguments: [Returns_Loss#28 DESC NULLS LAST], true, 0 ===== Subqueries ===== @@ -251,25 +251,25 @@ BroadcastExchange (48) (44) Scan parquet default.date_dim -Output [3]: [d_date_sk#11, d_year#36, d_moy#37] +Output [3]: [d_date_sk#10, d_year#29, d_moy#30] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,1998), EqualTo(d_moy,11), IsNotNull(d_date_sk)] ReadSchema: struct (45) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#36, d_moy#37] +Input [3]: [d_date_sk#10, d_year#29, d_moy#30] (46) Filter [codegen id : 1] -Input [3]: [d_date_sk#11, d_year#36, d_moy#37] -Condition : ((((isnotnull(d_year#36) AND isnotnull(d_moy#37)) AND (d_year#36 = 1998)) AND (d_moy#37 = 11)) AND isnotnull(d_date_sk#11)) +Input [3]: [d_date_sk#10, d_year#29, d_moy#30] +Condition : ((((isnotnull(d_year#29) AND isnotnull(d_moy#30)) AND (d_year#29 = 1998)) AND (d_moy#30 = 11)) AND isnotnull(d_date_sk#10)) (47) Project [codegen id : 1] -Output [1]: [d_date_sk#11] -Input [3]: [d_date_sk#11, d_year#36, d_moy#37] +Output [1]: [d_date_sk#10] +Input [3]: [d_date_sk#10, d_year#29, d_moy#30] (48) BroadcastExchange -Input [1]: [d_date_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#38] +Input [1]: [d_date_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt index 71aa2bb603946..c63918bb6520f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt @@ -50,127 +50,127 @@ Input [2]: [i_item_sk#1, i_manufact_id#2] (5) BroadcastExchange Input [1]: [i_item_sk#1] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#3] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (6) Scan parquet default.web_sales -Output [3]: [ws_item_sk#4, ws_ext_discount_amt#5, ws_sold_date_sk#6] +Output [3]: [ws_item_sk#3, ws_ext_discount_amt#4, ws_sold_date_sk#5] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#6), dynamicpruningexpression(ws_sold_date_sk#6 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ws_sold_date_sk#5), dynamicpruningexpression(ws_sold_date_sk#5 IN dynamicpruning#6)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [3]: [ws_item_sk#4, ws_ext_discount_amt#5, ws_sold_date_sk#6] +Input [3]: [ws_item_sk#3, ws_ext_discount_amt#4, ws_sold_date_sk#5] (8) Filter [codegen id : 3] -Input [3]: [ws_item_sk#4, ws_ext_discount_amt#5, ws_sold_date_sk#6] -Condition : isnotnull(ws_item_sk#4) +Input [3]: [ws_item_sk#3, ws_ext_discount_amt#4, ws_sold_date_sk#5] +Condition : isnotnull(ws_item_sk#3) (9) ReusedExchange [Reuses operator id: 34] -Output [1]: [d_date_sk#8] +Output [1]: [d_date_sk#7] (10) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ws_sold_date_sk#6] -Right keys [1]: [d_date_sk#8] +Left keys [1]: [ws_sold_date_sk#5] +Right keys [1]: [d_date_sk#7] Join condition: None (11) Project [codegen id : 3] -Output [2]: [ws_item_sk#4, ws_ext_discount_amt#5] -Input [4]: [ws_item_sk#4, ws_ext_discount_amt#5, ws_sold_date_sk#6, d_date_sk#8] +Output [2]: [ws_item_sk#3, ws_ext_discount_amt#4] +Input [4]: [ws_item_sk#3, ws_ext_discount_amt#4, ws_sold_date_sk#5, d_date_sk#7] (12) HashAggregate [codegen id : 3] -Input [2]: [ws_item_sk#4, ws_ext_discount_amt#5] -Keys [1]: [ws_item_sk#4] -Functions [1]: [partial_avg(UnscaledValue(ws_ext_discount_amt#5))] -Aggregate Attributes [2]: [sum#9, count#10] -Results [3]: [ws_item_sk#4, sum#11, count#12] +Input [2]: [ws_item_sk#3, ws_ext_discount_amt#4] +Keys [1]: [ws_item_sk#3] +Functions [1]: [partial_avg(UnscaledValue(ws_ext_discount_amt#4))] +Aggregate Attributes [2]: [sum#8, count#9] +Results [3]: [ws_item_sk#3, sum#10, count#11] (13) Exchange -Input [3]: [ws_item_sk#4, sum#11, count#12] -Arguments: hashpartitioning(ws_item_sk#4, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [3]: [ws_item_sk#3, sum#10, count#11] +Arguments: hashpartitioning(ws_item_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=2] (14) HashAggregate -Input [3]: [ws_item_sk#4, sum#11, count#12] -Keys [1]: [ws_item_sk#4] -Functions [1]: [avg(UnscaledValue(ws_ext_discount_amt#5))] -Aggregate Attributes [1]: [avg(UnscaledValue(ws_ext_discount_amt#5))#14] -Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(ws_ext_discount_amt#5))#14 / 100.0) as decimal(11,6)))), DecimalType(14,7)) AS (1.3 * avg(ws_ext_discount_amt))#15, ws_item_sk#4] +Input [3]: [ws_item_sk#3, sum#10, count#11] +Keys [1]: [ws_item_sk#3] +Functions [1]: [avg(UnscaledValue(ws_ext_discount_amt#4))] +Aggregate Attributes [1]: [avg(UnscaledValue(ws_ext_discount_amt#4))#12] +Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(ws_ext_discount_amt#4))#12 / 100.0) as decimal(11,6)))), DecimalType(14,7)) AS (1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#3] (15) Filter -Input [2]: [(1.3 * avg(ws_ext_discount_amt))#15, ws_item_sk#4] -Condition : isnotnull((1.3 * avg(ws_ext_discount_amt))#15) +Input [2]: [(1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#3] +Condition : isnotnull((1.3 * avg(ws_ext_discount_amt))#13) (16) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] -Right keys [1]: [ws_item_sk#4] +Right keys [1]: [ws_item_sk#3] Join condition: None (17) Project [codegen id : 4] -Output [2]: [i_item_sk#1, (1.3 * avg(ws_ext_discount_amt))#15] -Input [3]: [i_item_sk#1, (1.3 * avg(ws_ext_discount_amt))#15, ws_item_sk#4] +Output [2]: [i_item_sk#1, (1.3 * avg(ws_ext_discount_amt))#13] +Input [3]: [i_item_sk#1, (1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#3] (18) BroadcastExchange -Input [2]: [i_item_sk#1, (1.3 * avg(ws_ext_discount_amt))#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] +Input [2]: [i_item_sk#1, (1.3 * avg(ws_ext_discount_amt))#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (19) Scan parquet default.web_sales -Output [3]: [ws_item_sk#17, ws_ext_discount_amt#18, ws_sold_date_sk#19] +Output [3]: [ws_item_sk#14, ws_ext_discount_amt#15, ws_sold_date_sk#16] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#19), dynamicpruningexpression(ws_sold_date_sk#19 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ws_sold_date_sk#16), dynamicpruningexpression(ws_sold_date_sk#16 IN dynamicpruning#6)] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_ext_discount_amt)] ReadSchema: struct (20) ColumnarToRow -Input [3]: [ws_item_sk#17, ws_ext_discount_amt#18, ws_sold_date_sk#19] +Input [3]: [ws_item_sk#14, ws_ext_discount_amt#15, ws_sold_date_sk#16] (21) Filter -Input [3]: [ws_item_sk#17, ws_ext_discount_amt#18, ws_sold_date_sk#19] -Condition : (isnotnull(ws_item_sk#17) AND isnotnull(ws_ext_discount_amt#18)) +Input [3]: [ws_item_sk#14, ws_ext_discount_amt#15, ws_sold_date_sk#16] +Condition : (isnotnull(ws_item_sk#14) AND isnotnull(ws_ext_discount_amt#15)) (22) BroadcastHashJoin [codegen id : 6] Left keys [1]: [i_item_sk#1] -Right keys [1]: [ws_item_sk#17] -Join condition: (cast(ws_ext_discount_amt#18 as decimal(14,7)) > (1.3 * avg(ws_ext_discount_amt))#15) +Right keys [1]: [ws_item_sk#14] +Join condition: (cast(ws_ext_discount_amt#15 as decimal(14,7)) > (1.3 * avg(ws_ext_discount_amt))#13) (23) Project [codegen id : 6] -Output [2]: [ws_ext_discount_amt#18, ws_sold_date_sk#19] -Input [5]: [i_item_sk#1, (1.3 * avg(ws_ext_discount_amt))#15, ws_item_sk#17, ws_ext_discount_amt#18, ws_sold_date_sk#19] +Output [2]: [ws_ext_discount_amt#15, ws_sold_date_sk#16] +Input [5]: [i_item_sk#1, (1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#14, ws_ext_discount_amt#15, ws_sold_date_sk#16] (24) ReusedExchange [Reuses operator id: 34] -Output [1]: [d_date_sk#20] +Output [1]: [d_date_sk#17] (25) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#19] -Right keys [1]: [d_date_sk#20] +Left keys [1]: [ws_sold_date_sk#16] +Right keys [1]: [d_date_sk#17] Join condition: None (26) Project [codegen id : 6] -Output [1]: [ws_ext_discount_amt#18] -Input [3]: [ws_ext_discount_amt#18, ws_sold_date_sk#19, d_date_sk#20] +Output [1]: [ws_ext_discount_amt#15] +Input [3]: [ws_ext_discount_amt#15, ws_sold_date_sk#16, d_date_sk#17] (27) HashAggregate [codegen id : 6] -Input [1]: [ws_ext_discount_amt#18] +Input [1]: [ws_ext_discount_amt#15] Keys: [] -Functions [1]: [partial_sum(UnscaledValue(ws_ext_discount_amt#18))] -Aggregate Attributes [1]: [sum#21] -Results [1]: [sum#22] +Functions [1]: [partial_sum(UnscaledValue(ws_ext_discount_amt#15))] +Aggregate Attributes [1]: [sum#18] +Results [1]: [sum#19] (28) Exchange -Input [1]: [sum#22] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#23] +Input [1]: [sum#19] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (29) HashAggregate [codegen id : 7] -Input [1]: [sum#22] +Input [1]: [sum#19] Keys: [] -Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#18))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#18))#24] -Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#18))#24,17,2) AS Excess Discount Amount #25] +Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#15))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#15))#20] +Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#15))#20,17,2) AS Excess Discount Amount #21] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ws_sold_date_sk#6 IN dynamicpruning#7 +Subquery:1 Hosting operator id = 6 Hosting Expression = ws_sold_date_sk#5 IN dynamicpruning#6 BroadcastExchange (34) +- * Project (33) +- * Filter (32) @@ -179,27 +179,27 @@ BroadcastExchange (34) (30) Scan parquet default.date_dim -Output [2]: [d_date_sk#8, d_date#26] +Output [2]: [d_date_sk#7, d_date#22] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#8, d_date#26] +Input [2]: [d_date_sk#7, d_date#22] (32) Filter [codegen id : 1] -Input [2]: [d_date_sk#8, d_date#26] -Condition : (((isnotnull(d_date#26) AND (d_date#26 >= 2000-01-27)) AND (d_date#26 <= 2000-04-26)) AND isnotnull(d_date_sk#8)) +Input [2]: [d_date_sk#7, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 2000-01-27)) AND (d_date#22 <= 2000-04-26)) AND isnotnull(d_date_sk#7)) (33) Project [codegen id : 1] -Output [1]: [d_date_sk#8] -Input [2]: [d_date_sk#8, d_date#26] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_date#22] (34) BroadcastExchange -Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] -Subquery:2 Hosting operator id = 19 Hosting Expression = ws_sold_date_sk#19 IN dynamicpruning#7 +Subquery:2 Hosting operator id = 19 Hosting Expression = ws_sold_date_sk#16 IN dynamicpruning#6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt index bec857eb2489a..e4c526881dbeb 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt @@ -65,7 +65,7 @@ Input [2]: [i_item_sk#5, i_manufact_id#6] (8) BroadcastExchange Input [1]: [i_item_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ws_item_sk#1] @@ -77,96 +77,96 @@ Output [3]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#5] Input [4]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#5] (11) Scan parquet default.web_sales -Output [3]: [ws_item_sk#8, ws_ext_discount_amt#9, ws_sold_date_sk#10] +Output [3]: [ws_item_sk#7, ws_ext_discount_amt#8, ws_sold_date_sk#9] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#10), dynamicpruningexpression(ws_sold_date_sk#10 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(ws_sold_date_sk#9), dynamicpruningexpression(ws_sold_date_sk#9 IN dynamicpruning#4)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 3] -Input [3]: [ws_item_sk#8, ws_ext_discount_amt#9, ws_sold_date_sk#10] +Input [3]: [ws_item_sk#7, ws_ext_discount_amt#8, ws_sold_date_sk#9] (13) Filter [codegen id : 3] -Input [3]: [ws_item_sk#8, ws_ext_discount_amt#9, ws_sold_date_sk#10] -Condition : isnotnull(ws_item_sk#8) +Input [3]: [ws_item_sk#7, ws_ext_discount_amt#8, ws_sold_date_sk#9] +Condition : isnotnull(ws_item_sk#7) (14) ReusedExchange [Reuses operator id: 34] -Output [1]: [d_date_sk#11] +Output [1]: [d_date_sk#10] (15) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ws_sold_date_sk#10] -Right keys [1]: [d_date_sk#11] +Left keys [1]: [ws_sold_date_sk#9] +Right keys [1]: [d_date_sk#10] Join condition: None (16) Project [codegen id : 3] -Output [2]: [ws_item_sk#8, ws_ext_discount_amt#9] -Input [4]: [ws_item_sk#8, ws_ext_discount_amt#9, ws_sold_date_sk#10, d_date_sk#11] +Output [2]: [ws_item_sk#7, ws_ext_discount_amt#8] +Input [4]: [ws_item_sk#7, ws_ext_discount_amt#8, ws_sold_date_sk#9, d_date_sk#10] (17) HashAggregate [codegen id : 3] -Input [2]: [ws_item_sk#8, ws_ext_discount_amt#9] -Keys [1]: [ws_item_sk#8] -Functions [1]: [partial_avg(UnscaledValue(ws_ext_discount_amt#9))] -Aggregate Attributes [2]: [sum#12, count#13] -Results [3]: [ws_item_sk#8, sum#14, count#15] +Input [2]: [ws_item_sk#7, ws_ext_discount_amt#8] +Keys [1]: [ws_item_sk#7] +Functions [1]: [partial_avg(UnscaledValue(ws_ext_discount_amt#8))] +Aggregate Attributes [2]: [sum#11, count#12] +Results [3]: [ws_item_sk#7, sum#13, count#14] (18) Exchange -Input [3]: [ws_item_sk#8, sum#14, count#15] -Arguments: hashpartitioning(ws_item_sk#8, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [3]: [ws_item_sk#7, sum#13, count#14] +Arguments: hashpartitioning(ws_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (19) HashAggregate [codegen id : 4] -Input [3]: [ws_item_sk#8, sum#14, count#15] -Keys [1]: [ws_item_sk#8] -Functions [1]: [avg(UnscaledValue(ws_ext_discount_amt#9))] -Aggregate Attributes [1]: [avg(UnscaledValue(ws_ext_discount_amt#9))#17] -Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(ws_ext_discount_amt#9))#17 / 100.0) as decimal(11,6)))), DecimalType(14,7)) AS (1.3 * avg(ws_ext_discount_amt))#18, ws_item_sk#8] +Input [3]: [ws_item_sk#7, sum#13, count#14] +Keys [1]: [ws_item_sk#7] +Functions [1]: [avg(UnscaledValue(ws_ext_discount_amt#8))] +Aggregate Attributes [1]: [avg(UnscaledValue(ws_ext_discount_amt#8))#15] +Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(ws_ext_discount_amt#8))#15 / 100.0) as decimal(11,6)))), DecimalType(14,7)) AS (1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#7] (20) Filter [codegen id : 4] -Input [2]: [(1.3 * avg(ws_ext_discount_amt))#18, ws_item_sk#8] -Condition : isnotnull((1.3 * avg(ws_ext_discount_amt))#18) +Input [2]: [(1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#7] +Condition : isnotnull((1.3 * avg(ws_ext_discount_amt))#16) (21) BroadcastExchange -Input [2]: [(1.3 * avg(ws_ext_discount_amt))#18, ws_item_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#19] +Input [2]: [(1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=3] (22) BroadcastHashJoin [codegen id : 6] Left keys [1]: [i_item_sk#5] -Right keys [1]: [ws_item_sk#8] -Join condition: (cast(ws_ext_discount_amt#2 as decimal(14,7)) > (1.3 * avg(ws_ext_discount_amt))#18) +Right keys [1]: [ws_item_sk#7] +Join condition: (cast(ws_ext_discount_amt#2 as decimal(14,7)) > (1.3 * avg(ws_ext_discount_amt))#16) (23) Project [codegen id : 6] Output [2]: [ws_ext_discount_amt#2, ws_sold_date_sk#3] -Input [5]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#5, (1.3 * avg(ws_ext_discount_amt))#18, ws_item_sk#8] +Input [5]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#5, (1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#7] (24) ReusedExchange [Reuses operator id: 34] -Output [1]: [d_date_sk#20] +Output [1]: [d_date_sk#17] (25) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ws_sold_date_sk#3] -Right keys [1]: [d_date_sk#20] +Right keys [1]: [d_date_sk#17] Join condition: None (26) Project [codegen id : 6] Output [1]: [ws_ext_discount_amt#2] -Input [3]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, d_date_sk#20] +Input [3]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, d_date_sk#17] (27) HashAggregate [codegen id : 6] Input [1]: [ws_ext_discount_amt#2] Keys: [] Functions [1]: [partial_sum(UnscaledValue(ws_ext_discount_amt#2))] -Aggregate Attributes [1]: [sum#21] -Results [1]: [sum#22] +Aggregate Attributes [1]: [sum#18] +Results [1]: [sum#19] (28) Exchange -Input [1]: [sum#22] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#23] +Input [1]: [sum#19] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (29) HashAggregate [codegen id : 7] -Input [1]: [sum#22] +Input [1]: [sum#19] Keys: [] Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#2))#24] -Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#2))#24,17,2) AS Excess Discount Amount #25] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#2))#20] +Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#2))#20,17,2) AS Excess Discount Amount #21] ===== Subqueries ===== @@ -179,27 +179,27 @@ BroadcastExchange (34) (30) Scan parquet default.date_dim -Output [2]: [d_date_sk#20, d_date#26] +Output [2]: [d_date_sk#17, d_date#22] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#20, d_date#26] +Input [2]: [d_date_sk#17, d_date#22] (32) Filter [codegen id : 1] -Input [2]: [d_date_sk#20, d_date#26] -Condition : (((isnotnull(d_date#26) AND (d_date#26 >= 2000-01-27)) AND (d_date#26 <= 2000-04-26)) AND isnotnull(d_date_sk#20)) +Input [2]: [d_date_sk#17, d_date#22] +Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 2000-01-27)) AND (d_date#22 <= 2000-04-26)) AND isnotnull(d_date_sk#17)) (33) Project [codegen id : 1] -Output [1]: [d_date_sk#20] -Input [2]: [d_date_sk#20, d_date#26] +Output [1]: [d_date_sk#17] +Input [2]: [d_date_sk#17, d_date#22] (34) BroadcastExchange -Input [1]: [d_date_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] +Input [1]: [d_date_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] -Subquery:2 Hosting operator id = 11 Hosting Expression = ws_sold_date_sk#10 IN dynamicpruning#4 +Subquery:2 Hosting operator id = 11 Hosting Expression = ws_sold_date_sk#9 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt index 3f6b5ffb48a67..e29bcc99eeeed 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt @@ -63,7 +63,7 @@ Input [2]: [r_reason_sk#6, r_reason_desc#7] (9) BroadcastExchange Input [1]: [r_reason_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (10) BroadcastHashJoin [codegen id : 2] Left keys [1]: [sr_reason_sk#2] @@ -76,61 +76,61 @@ Input [5]: [sr_item_sk#1, sr_reason_sk#2, sr_ticket_number#3, sr_return_quantity (12) Exchange Input [3]: [sr_item_sk#1, sr_ticket_number#3, sr_return_quantity#4] -Arguments: hashpartitioning(sr_item_sk#1, sr_ticket_number#3, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(sr_item_sk#1, sr_ticket_number#3, 5), ENSURE_REQUIREMENTS, [plan_id=2] (13) Sort [codegen id : 3] Input [3]: [sr_item_sk#1, sr_ticket_number#3, sr_return_quantity#4] Arguments: [sr_item_sk#1 ASC NULLS FIRST, sr_ticket_number#3 ASC NULLS FIRST], false, 0 (14) Scan parquet default.store_sales -Output [6]: [ss_item_sk#10, ss_customer_sk#11, ss_ticket_number#12, ss_quantity#13, ss_sales_price#14, ss_sold_date_sk#15] +Output [6]: [ss_item_sk#8, ss_customer_sk#9, ss_ticket_number#10, ss_quantity#11, ss_sales_price#12, ss_sold_date_sk#13] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] ReadSchema: struct (15) ColumnarToRow [codegen id : 4] -Input [6]: [ss_item_sk#10, ss_customer_sk#11, ss_ticket_number#12, ss_quantity#13, ss_sales_price#14, ss_sold_date_sk#15] +Input [6]: [ss_item_sk#8, ss_customer_sk#9, ss_ticket_number#10, ss_quantity#11, ss_sales_price#12, ss_sold_date_sk#13] (16) Project [codegen id : 4] -Output [5]: [ss_item_sk#10, ss_customer_sk#11, ss_ticket_number#12, ss_quantity#13, ss_sales_price#14] -Input [6]: [ss_item_sk#10, ss_customer_sk#11, ss_ticket_number#12, ss_quantity#13, ss_sales_price#14, ss_sold_date_sk#15] +Output [5]: [ss_item_sk#8, ss_customer_sk#9, ss_ticket_number#10, ss_quantity#11, ss_sales_price#12] +Input [6]: [ss_item_sk#8, ss_customer_sk#9, ss_ticket_number#10, ss_quantity#11, ss_sales_price#12, ss_sold_date_sk#13] (17) Exchange -Input [5]: [ss_item_sk#10, ss_customer_sk#11, ss_ticket_number#12, ss_quantity#13, ss_sales_price#14] -Arguments: hashpartitioning(ss_item_sk#10, ss_ticket_number#12, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [5]: [ss_item_sk#8, ss_customer_sk#9, ss_ticket_number#10, ss_quantity#11, ss_sales_price#12] +Arguments: hashpartitioning(ss_item_sk#8, ss_ticket_number#10, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) Sort [codegen id : 5] -Input [5]: [ss_item_sk#10, ss_customer_sk#11, ss_ticket_number#12, ss_quantity#13, ss_sales_price#14] -Arguments: [ss_item_sk#10 ASC NULLS FIRST, ss_ticket_number#12 ASC NULLS FIRST], false, 0 +Input [5]: [ss_item_sk#8, ss_customer_sk#9, ss_ticket_number#10, ss_quantity#11, ss_sales_price#12] +Arguments: [ss_item_sk#8 ASC NULLS FIRST, ss_ticket_number#10 ASC NULLS FIRST], false, 0 (19) SortMergeJoin [codegen id : 6] Left keys [2]: [sr_item_sk#1, sr_ticket_number#3] -Right keys [2]: [ss_item_sk#10, ss_ticket_number#12] +Right keys [2]: [ss_item_sk#8, ss_ticket_number#10] Join condition: None (20) Project [codegen id : 6] -Output [2]: [ss_customer_sk#11, CASE WHEN isnotnull(sr_return_quantity#4) THEN CheckOverflow((promote_precision(cast((ss_quantity#13 - sr_return_quantity#4) as decimal(12,2))) * promote_precision(cast(ss_sales_price#14 as decimal(12,2)))), DecimalType(18,2)) ELSE CheckOverflow((promote_precision(cast(ss_quantity#13 as decimal(12,2))) * promote_precision(cast(ss_sales_price#14 as decimal(12,2)))), DecimalType(18,2)) END AS act_sales#17] -Input [8]: [sr_item_sk#1, sr_ticket_number#3, sr_return_quantity#4, ss_item_sk#10, ss_customer_sk#11, ss_ticket_number#12, ss_quantity#13, ss_sales_price#14] +Output [2]: [ss_customer_sk#9, CASE WHEN isnotnull(sr_return_quantity#4) THEN CheckOverflow((promote_precision(cast((ss_quantity#11 - sr_return_quantity#4) as decimal(12,2))) * promote_precision(cast(ss_sales_price#12 as decimal(12,2)))), DecimalType(18,2)) ELSE CheckOverflow((promote_precision(cast(ss_quantity#11 as decimal(12,2))) * promote_precision(cast(ss_sales_price#12 as decimal(12,2)))), DecimalType(18,2)) END AS act_sales#14] +Input [8]: [sr_item_sk#1, sr_ticket_number#3, sr_return_quantity#4, ss_item_sk#8, ss_customer_sk#9, ss_ticket_number#10, ss_quantity#11, ss_sales_price#12] (21) HashAggregate [codegen id : 6] -Input [2]: [ss_customer_sk#11, act_sales#17] -Keys [1]: [ss_customer_sk#11] -Functions [1]: [partial_sum(act_sales#17)] -Aggregate Attributes [2]: [sum#18, isEmpty#19] -Results [3]: [ss_customer_sk#11, sum#20, isEmpty#21] +Input [2]: [ss_customer_sk#9, act_sales#14] +Keys [1]: [ss_customer_sk#9] +Functions [1]: [partial_sum(act_sales#14)] +Aggregate Attributes [2]: [sum#15, isEmpty#16] +Results [3]: [ss_customer_sk#9, sum#17, isEmpty#18] (22) Exchange -Input [3]: [ss_customer_sk#11, sum#20, isEmpty#21] -Arguments: hashpartitioning(ss_customer_sk#11, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [3]: [ss_customer_sk#9, sum#17, isEmpty#18] +Arguments: hashpartitioning(ss_customer_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=4] (23) HashAggregate [codegen id : 7] -Input [3]: [ss_customer_sk#11, sum#20, isEmpty#21] -Keys [1]: [ss_customer_sk#11] -Functions [1]: [sum(act_sales#17)] -Aggregate Attributes [1]: [sum(act_sales#17)#23] -Results [2]: [ss_customer_sk#11, sum(act_sales#17)#23 AS sumsales#24] +Input [3]: [ss_customer_sk#9, sum#17, isEmpty#18] +Keys [1]: [ss_customer_sk#9] +Functions [1]: [sum(act_sales#14)] +Aggregate Attributes [1]: [sum(act_sales#14)#19] +Results [2]: [ss_customer_sk#9, sum(act_sales#14)#19 AS sumsales#20] (24) TakeOrderedAndProject -Input [2]: [ss_customer_sk#11, sumsales#24] -Arguments: 100, [sumsales#24 ASC NULLS FIRST, ss_customer_sk#11 ASC NULLS FIRST], [ss_customer_sk#11, sumsales#24] +Input [2]: [ss_customer_sk#9, sumsales#20] +Arguments: 100, [sumsales#20 ASC NULLS FIRST, ss_customer_sk#9 ASC NULLS FIRST], [ss_customer_sk#9, sumsales#20] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt index 11f69606ece91..032eb9152cfa1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt @@ -40,97 +40,97 @@ Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, s (4) Exchange Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] -Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#3, 5), ENSURE_REQUIREMENTS, [id=#7] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#3, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5] Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#3 ASC NULLS FIRST], false, 0 (6) Scan parquet default.store_returns -Output [5]: [sr_item_sk#8, sr_reason_sk#9, sr_ticket_number#10, sr_return_quantity#11, sr_returned_date_sk#12] +Output [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number), IsNotNull(sr_reason_sk)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [5]: [sr_item_sk#8, sr_reason_sk#9, sr_ticket_number#10, sr_return_quantity#11, sr_returned_date_sk#12] +Input [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] (8) Filter [codegen id : 3] -Input [5]: [sr_item_sk#8, sr_reason_sk#9, sr_ticket_number#10, sr_return_quantity#11, sr_returned_date_sk#12] -Condition : ((isnotnull(sr_item_sk#8) AND isnotnull(sr_ticket_number#10)) AND isnotnull(sr_reason_sk#9)) +Input [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] +Condition : ((isnotnull(sr_item_sk#7) AND isnotnull(sr_ticket_number#9)) AND isnotnull(sr_reason_sk#8)) (9) Project [codegen id : 3] -Output [4]: [sr_item_sk#8, sr_reason_sk#9, sr_ticket_number#10, sr_return_quantity#11] -Input [5]: [sr_item_sk#8, sr_reason_sk#9, sr_ticket_number#10, sr_return_quantity#11, sr_returned_date_sk#12] +Output [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Input [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11] (10) Exchange -Input [4]: [sr_item_sk#8, sr_reason_sk#9, sr_ticket_number#10, sr_return_quantity#11] -Arguments: hashpartitioning(sr_item_sk#8, sr_ticket_number#10, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: hashpartitioning(sr_item_sk#7, sr_ticket_number#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [4]: [sr_item_sk#8, sr_reason_sk#9, sr_ticket_number#10, sr_return_quantity#11] -Arguments: [sr_item_sk#8 ASC NULLS FIRST, sr_ticket_number#10 ASC NULLS FIRST], false, 0 +Input [4]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] +Arguments: [sr_item_sk#7 ASC NULLS FIRST, sr_ticket_number#9 ASC NULLS FIRST], false, 0 (12) SortMergeJoin [codegen id : 6] Left keys [2]: [ss_item_sk#1, ss_ticket_number#3] -Right keys [2]: [sr_item_sk#8, sr_ticket_number#10] +Right keys [2]: [sr_item_sk#7, sr_ticket_number#9] Join condition: None (13) Project [codegen id : 6] -Output [5]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#9, sr_return_quantity#11] -Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, sr_item_sk#8, sr_reason_sk#9, sr_ticket_number#10, sr_return_quantity#11] +Output [5]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10] (14) Scan parquet default.reason -Output [2]: [r_reason_sk#14, r_reason_desc#15] +Output [2]: [r_reason_sk#12, r_reason_desc#13] Batched: true Location [not included in comparison]/{warehouse_dir}/reason] PushedFilters: [IsNotNull(r_reason_desc), EqualTo(r_reason_desc,reason 28 ), IsNotNull(r_reason_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 5] -Input [2]: [r_reason_sk#14, r_reason_desc#15] +Input [2]: [r_reason_sk#12, r_reason_desc#13] (16) Filter [codegen id : 5] -Input [2]: [r_reason_sk#14, r_reason_desc#15] -Condition : ((isnotnull(r_reason_desc#15) AND (r_reason_desc#15 = reason 28 )) AND isnotnull(r_reason_sk#14)) +Input [2]: [r_reason_sk#12, r_reason_desc#13] +Condition : ((isnotnull(r_reason_desc#13) AND (r_reason_desc#13 = reason 28 )) AND isnotnull(r_reason_sk#12)) (17) Project [codegen id : 5] -Output [1]: [r_reason_sk#14] -Input [2]: [r_reason_sk#14, r_reason_desc#15] +Output [1]: [r_reason_sk#12] +Input [2]: [r_reason_sk#12, r_reason_desc#13] (18) BroadcastExchange -Input [1]: [r_reason_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#16] +Input [1]: [r_reason_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (19) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [sr_reason_sk#9] -Right keys [1]: [r_reason_sk#14] +Left keys [1]: [sr_reason_sk#8] +Right keys [1]: [r_reason_sk#12] Join condition: None (20) Project [codegen id : 6] -Output [2]: [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#11) THEN CheckOverflow((promote_precision(cast((ss_quantity#4 - sr_return_quantity#11) as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2)) ELSE CheckOverflow((promote_precision(cast(ss_quantity#4 as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2)) END AS act_sales#17] -Input [6]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#9, sr_return_quantity#11, r_reason_sk#14] +Output [2]: [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#10) THEN CheckOverflow((promote_precision(cast((ss_quantity#4 - sr_return_quantity#10) as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2)) ELSE CheckOverflow((promote_precision(cast(ss_quantity#4 as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2)) END AS act_sales#14] +Input [6]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10, r_reason_sk#12] (21) HashAggregate [codegen id : 6] -Input [2]: [ss_customer_sk#2, act_sales#17] +Input [2]: [ss_customer_sk#2, act_sales#14] Keys [1]: [ss_customer_sk#2] -Functions [1]: [partial_sum(act_sales#17)] -Aggregate Attributes [2]: [sum#18, isEmpty#19] -Results [3]: [ss_customer_sk#2, sum#20, isEmpty#21] +Functions [1]: [partial_sum(act_sales#14)] +Aggregate Attributes [2]: [sum#15, isEmpty#16] +Results [3]: [ss_customer_sk#2, sum#17, isEmpty#18] (22) Exchange -Input [3]: [ss_customer_sk#2, sum#20, isEmpty#21] -Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [3]: [ss_customer_sk#2, sum#17, isEmpty#18] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=4] (23) HashAggregate [codegen id : 7] -Input [3]: [ss_customer_sk#2, sum#20, isEmpty#21] +Input [3]: [ss_customer_sk#2, sum#17, isEmpty#18] Keys [1]: [ss_customer_sk#2] -Functions [1]: [sum(act_sales#17)] -Aggregate Attributes [1]: [sum(act_sales#17)#23] -Results [2]: [ss_customer_sk#2, sum(act_sales#17)#23 AS sumsales#24] +Functions [1]: [sum(act_sales#14)] +Aggregate Attributes [1]: [sum(act_sales#14)#19] +Results [2]: [ss_customer_sk#2, sum(act_sales#14)#19 AS sumsales#20] (24) TakeOrderedAndProject -Input [2]: [ss_customer_sk#2, sumsales#24] -Arguments: 100, [sumsales#24 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], [ss_customer_sk#2, sumsales#24] +Input [2]: [ss_customer_sk#2, sumsales#20] +Arguments: 100, [sumsales#20 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], [ss_customer_sk#2, sumsales#20] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt index da7b19cb7d88b..0f160a566dac6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt @@ -66,190 +66,190 @@ Input [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse (5) Exchange Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] -Arguments: hashpartitioning(ws_order_number#5, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ws_order_number#5, 5), ENSURE_REQUIREMENTS, [plan_id=1] (6) Sort [codegen id : 2] Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] Arguments: [ws_order_number#5 ASC NULLS FIRST], false, 0 (7) Scan parquet default.web_sales -Output [3]: [ws_warehouse_sk#10, ws_order_number#11, ws_sold_date_sk#12] +Output [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] ReadSchema: struct (8) ColumnarToRow [codegen id : 3] -Input [3]: [ws_warehouse_sk#10, ws_order_number#11, ws_sold_date_sk#12] +Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] (9) Project [codegen id : 3] -Output [2]: [ws_warehouse_sk#10, ws_order_number#11] -Input [3]: [ws_warehouse_sk#10, ws_order_number#11, ws_sold_date_sk#12] +Output [2]: [ws_warehouse_sk#9, ws_order_number#10] +Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] (10) Exchange -Input [2]: [ws_warehouse_sk#10, ws_order_number#11] -Arguments: hashpartitioning(ws_order_number#11, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [2]: [ws_warehouse_sk#9, ws_order_number#10] +Arguments: hashpartitioning(ws_order_number#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [2]: [ws_warehouse_sk#10, ws_order_number#11] -Arguments: [ws_order_number#11 ASC NULLS FIRST], false, 0 +Input [2]: [ws_warehouse_sk#9, ws_order_number#10] +Arguments: [ws_order_number#10 ASC NULLS FIRST], false, 0 (12) SortMergeJoin [codegen id : 5] Left keys [1]: [ws_order_number#5] -Right keys [1]: [ws_order_number#11] -Join condition: NOT (ws_warehouse_sk#4 = ws_warehouse_sk#10) +Right keys [1]: [ws_order_number#10] +Join condition: NOT (ws_warehouse_sk#4 = ws_warehouse_sk#9) (13) Project [codegen id : 5] Output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] (14) Scan parquet default.web_returns -Output [2]: [wr_order_number#14, wr_returned_date_sk#15] +Output [2]: [wr_order_number#12, wr_returned_date_sk#13] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] ReadSchema: struct (15) ColumnarToRow [codegen id : 6] -Input [2]: [wr_order_number#14, wr_returned_date_sk#15] +Input [2]: [wr_order_number#12, wr_returned_date_sk#13] (16) Project [codegen id : 6] -Output [1]: [wr_order_number#14] -Input [2]: [wr_order_number#14, wr_returned_date_sk#15] +Output [1]: [wr_order_number#12] +Input [2]: [wr_order_number#12, wr_returned_date_sk#13] (17) Exchange -Input [1]: [wr_order_number#14] -Arguments: hashpartitioning(wr_order_number#14, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [1]: [wr_order_number#12] +Arguments: hashpartitioning(wr_order_number#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) Sort [codegen id : 7] -Input [1]: [wr_order_number#14] -Arguments: [wr_order_number#14 ASC NULLS FIRST], false, 0 +Input [1]: [wr_order_number#12] +Arguments: [wr_order_number#12 ASC NULLS FIRST], false, 0 (19) SortMergeJoin [codegen id : 11] Left keys [1]: [ws_order_number#5] -Right keys [1]: [wr_order_number#14] +Right keys [1]: [wr_order_number#12] Join condition: None (20) Scan parquet default.customer_address -Output [2]: [ca_address_sk#17, ca_state#18] +Output [2]: [ca_address_sk#14, ca_state#15] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 8] -Input [2]: [ca_address_sk#17, ca_state#18] +Input [2]: [ca_address_sk#14, ca_state#15] (22) Filter [codegen id : 8] -Input [2]: [ca_address_sk#17, ca_state#18] -Condition : ((isnotnull(ca_state#18) AND (ca_state#18 = IL)) AND isnotnull(ca_address_sk#17)) +Input [2]: [ca_address_sk#14, ca_state#15] +Condition : ((isnotnull(ca_state#15) AND (ca_state#15 = IL)) AND isnotnull(ca_address_sk#14)) (23) Project [codegen id : 8] -Output [1]: [ca_address_sk#17] -Input [2]: [ca_address_sk#17, ca_state#18] +Output [1]: [ca_address_sk#14] +Input [2]: [ca_address_sk#14, ca_state#15] (24) BroadcastExchange -Input [1]: [ca_address_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] +Input [1]: [ca_address_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (25) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ws_ship_addr_sk#2] -Right keys [1]: [ca_address_sk#17] +Right keys [1]: [ca_address_sk#14] Join condition: None (26) Project [codegen id : 11] Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] -Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ca_address_sk#17] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ca_address_sk#14] (27) Scan parquet default.web_site -Output [2]: [web_site_sk#20, web_company_name#21] +Output [2]: [web_site_sk#16, web_company_name#17] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri ), IsNotNull(web_site_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 9] -Input [2]: [web_site_sk#20, web_company_name#21] +Input [2]: [web_site_sk#16, web_company_name#17] (29) Filter [codegen id : 9] -Input [2]: [web_site_sk#20, web_company_name#21] -Condition : ((isnotnull(web_company_name#21) AND (web_company_name#21 = pri )) AND isnotnull(web_site_sk#20)) +Input [2]: [web_site_sk#16, web_company_name#17] +Condition : ((isnotnull(web_company_name#17) AND (web_company_name#17 = pri )) AND isnotnull(web_site_sk#16)) (30) Project [codegen id : 9] -Output [1]: [web_site_sk#20] -Input [2]: [web_site_sk#20, web_company_name#21] +Output [1]: [web_site_sk#16] +Input [2]: [web_site_sk#16, web_company_name#17] (31) BroadcastExchange -Input [1]: [web_site_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Input [1]: [web_site_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (32) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ws_web_site_sk#3] -Right keys [1]: [web_site_sk#20] +Right keys [1]: [web_site_sk#16] Join condition: None (33) Project [codegen id : 11] Output [4]: [ws_ship_date_sk#1, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] -Input [6]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, web_site_sk#20] +Input [6]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, web_site_sk#16] (34) Scan parquet default.date_dim -Output [2]: [d_date_sk#23, d_date#24] +Output [2]: [d_date_sk#18, d_date#19] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 10] -Input [2]: [d_date_sk#23, d_date#24] +Input [2]: [d_date_sk#18, d_date#19] (36) Filter [codegen id : 10] -Input [2]: [d_date_sk#23, d_date#24] -Condition : (((isnotnull(d_date#24) AND (d_date#24 >= 1999-02-01)) AND (d_date#24 <= 1999-04-02)) AND isnotnull(d_date_sk#23)) +Input [2]: [d_date_sk#18, d_date#19] +Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 1999-02-01)) AND (d_date#19 <= 1999-04-02)) AND isnotnull(d_date_sk#18)) (37) Project [codegen id : 10] -Output [1]: [d_date_sk#23] -Input [2]: [d_date_sk#23, d_date#24] +Output [1]: [d_date_sk#18] +Input [2]: [d_date_sk#18, d_date#19] (38) BroadcastExchange -Input [1]: [d_date_sk#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] (39) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ws_ship_date_sk#1] -Right keys [1]: [d_date_sk#23] +Right keys [1]: [d_date_sk#18] Join condition: None (40) Project [codegen id : 11] Output [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] -Input [5]: [ws_ship_date_sk#1, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, d_date_sk#23] +Input [5]: [ws_ship_date_sk#1, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, d_date_sk#18] (41) HashAggregate [codegen id : 11] Input [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] Keys [1]: [ws_order_number#5] Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#6)), partial_sum(UnscaledValue(ws_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#26, sum(UnscaledValue(ws_net_profit#7))#27] -Results [3]: [ws_order_number#5, sum#28, sum#29] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21] +Results [3]: [ws_order_number#5, sum#22, sum#23] (42) HashAggregate [codegen id : 11] -Input [3]: [ws_order_number#5, sum#28, sum#29] +Input [3]: [ws_order_number#5, sum#22, sum#23] Keys [1]: [ws_order_number#5] Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#26, sum(UnscaledValue(ws_net_profit#7))#27] -Results [3]: [ws_order_number#5, sum#28, sum#29] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21] +Results [3]: [ws_order_number#5, sum#22, sum#23] (43) HashAggregate [codegen id : 11] -Input [3]: [ws_order_number#5, sum#28, sum#29] +Input [3]: [ws_order_number#5, sum#22, sum#23] Keys: [] Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7)), partial_count(distinct ws_order_number#5)] -Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#26, sum(UnscaledValue(ws_net_profit#7))#27, count(ws_order_number#5)#30] -Results [3]: [sum#28, sum#29, count#31] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21, count(ws_order_number#5)#24] +Results [3]: [sum#22, sum#23, count#25] (44) Exchange -Input [3]: [sum#28, sum#29, count#31] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#32] +Input [3]: [sum#22, sum#23, count#25] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (45) HashAggregate [codegen id : 12] -Input [3]: [sum#28, sum#29, count#31] +Input [3]: [sum#22, sum#23, count#25] Keys: [] Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net_profit#7)), count(distinct ws_order_number#5)] -Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#26, sum(UnscaledValue(ws_net_profit#7))#27, count(ws_order_number#5)#30] -Results [3]: [count(ws_order_number#5)#30 AS order count #33, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#26,17,2) AS total shipping cost #34, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#27,17,2) AS total net profit #35] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21, count(ws_order_number#5)#24] +Results [3]: [count(ws_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#20,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#21,17,2) AS total net profit #28] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt index 1c1f76169ca6a..a07112c751556 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt @@ -66,190 +66,190 @@ Input [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse (5) Exchange Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] -Arguments: hashpartitioning(ws_order_number#5, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ws_order_number#5, 5), ENSURE_REQUIREMENTS, [plan_id=1] (6) Sort [codegen id : 2] Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] Arguments: [ws_order_number#5 ASC NULLS FIRST], false, 0 (7) Scan parquet default.web_sales -Output [3]: [ws_warehouse_sk#10, ws_order_number#11, ws_sold_date_sk#12] +Output [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] ReadSchema: struct (8) ColumnarToRow [codegen id : 3] -Input [3]: [ws_warehouse_sk#10, ws_order_number#11, ws_sold_date_sk#12] +Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] (9) Project [codegen id : 3] -Output [2]: [ws_warehouse_sk#10, ws_order_number#11] -Input [3]: [ws_warehouse_sk#10, ws_order_number#11, ws_sold_date_sk#12] +Output [2]: [ws_warehouse_sk#9, ws_order_number#10] +Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] (10) Exchange -Input [2]: [ws_warehouse_sk#10, ws_order_number#11] -Arguments: hashpartitioning(ws_order_number#11, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [2]: [ws_warehouse_sk#9, ws_order_number#10] +Arguments: hashpartitioning(ws_order_number#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [2]: [ws_warehouse_sk#10, ws_order_number#11] -Arguments: [ws_order_number#11 ASC NULLS FIRST], false, 0 +Input [2]: [ws_warehouse_sk#9, ws_order_number#10] +Arguments: [ws_order_number#10 ASC NULLS FIRST], false, 0 (12) SortMergeJoin [codegen id : 5] Left keys [1]: [ws_order_number#5] -Right keys [1]: [ws_order_number#11] -Join condition: NOT (ws_warehouse_sk#4 = ws_warehouse_sk#10) +Right keys [1]: [ws_order_number#10] +Join condition: NOT (ws_warehouse_sk#4 = ws_warehouse_sk#9) (13) Project [codegen id : 5] Output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] (14) Scan parquet default.web_returns -Output [2]: [wr_order_number#14, wr_returned_date_sk#15] +Output [2]: [wr_order_number#12, wr_returned_date_sk#13] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] ReadSchema: struct (15) ColumnarToRow [codegen id : 6] -Input [2]: [wr_order_number#14, wr_returned_date_sk#15] +Input [2]: [wr_order_number#12, wr_returned_date_sk#13] (16) Project [codegen id : 6] -Output [1]: [wr_order_number#14] -Input [2]: [wr_order_number#14, wr_returned_date_sk#15] +Output [1]: [wr_order_number#12] +Input [2]: [wr_order_number#12, wr_returned_date_sk#13] (17) Exchange -Input [1]: [wr_order_number#14] -Arguments: hashpartitioning(wr_order_number#14, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [1]: [wr_order_number#12] +Arguments: hashpartitioning(wr_order_number#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) Sort [codegen id : 7] -Input [1]: [wr_order_number#14] -Arguments: [wr_order_number#14 ASC NULLS FIRST], false, 0 +Input [1]: [wr_order_number#12] +Arguments: [wr_order_number#12 ASC NULLS FIRST], false, 0 (19) SortMergeJoin [codegen id : 11] Left keys [1]: [ws_order_number#5] -Right keys [1]: [wr_order_number#14] +Right keys [1]: [wr_order_number#12] Join condition: None (20) Scan parquet default.date_dim -Output [2]: [d_date_sk#17, d_date#18] +Output [2]: [d_date_sk#14, d_date#15] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 8] -Input [2]: [d_date_sk#17, d_date#18] +Input [2]: [d_date_sk#14, d_date#15] (22) Filter [codegen id : 8] -Input [2]: [d_date_sk#17, d_date#18] -Condition : (((isnotnull(d_date#18) AND (d_date#18 >= 1999-02-01)) AND (d_date#18 <= 1999-04-02)) AND isnotnull(d_date_sk#17)) +Input [2]: [d_date_sk#14, d_date#15] +Condition : (((isnotnull(d_date#15) AND (d_date#15 >= 1999-02-01)) AND (d_date#15 <= 1999-04-02)) AND isnotnull(d_date_sk#14)) (23) Project [codegen id : 8] -Output [1]: [d_date_sk#17] -Input [2]: [d_date_sk#17, d_date#18] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_date#15] (24) BroadcastExchange -Input [1]: [d_date_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#19] +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (25) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ws_ship_date_sk#1] -Right keys [1]: [d_date_sk#17] +Right keys [1]: [d_date_sk#14] Join condition: None (26) Project [codegen id : 11] Output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] -Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, d_date_sk#17] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, d_date_sk#14] (27) Scan parquet default.customer_address -Output [2]: [ca_address_sk#20, ca_state#21] +Output [2]: [ca_address_sk#16, ca_state#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 9] -Input [2]: [ca_address_sk#20, ca_state#21] +Input [2]: [ca_address_sk#16, ca_state#17] (29) Filter [codegen id : 9] -Input [2]: [ca_address_sk#20, ca_state#21] -Condition : ((isnotnull(ca_state#21) AND (ca_state#21 = IL)) AND isnotnull(ca_address_sk#20)) +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : ((isnotnull(ca_state#17) AND (ca_state#17 = IL)) AND isnotnull(ca_address_sk#16)) (30) Project [codegen id : 9] -Output [1]: [ca_address_sk#20] -Input [2]: [ca_address_sk#20, ca_state#21] +Output [1]: [ca_address_sk#16] +Input [2]: [ca_address_sk#16, ca_state#17] (31) BroadcastExchange -Input [1]: [ca_address_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Input [1]: [ca_address_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (32) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ws_ship_addr_sk#2] -Right keys [1]: [ca_address_sk#20] +Right keys [1]: [ca_address_sk#16] Join condition: None (33) Project [codegen id : 11] Output [4]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] -Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ca_address_sk#20] +Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ca_address_sk#16] (34) Scan parquet default.web_site -Output [2]: [web_site_sk#23, web_company_name#24] +Output [2]: [web_site_sk#18, web_company_name#19] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri ), IsNotNull(web_site_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 10] -Input [2]: [web_site_sk#23, web_company_name#24] +Input [2]: [web_site_sk#18, web_company_name#19] (36) Filter [codegen id : 10] -Input [2]: [web_site_sk#23, web_company_name#24] -Condition : ((isnotnull(web_company_name#24) AND (web_company_name#24 = pri )) AND isnotnull(web_site_sk#23)) +Input [2]: [web_site_sk#18, web_company_name#19] +Condition : ((isnotnull(web_company_name#19) AND (web_company_name#19 = pri )) AND isnotnull(web_site_sk#18)) (37) Project [codegen id : 10] -Output [1]: [web_site_sk#23] -Input [2]: [web_site_sk#23, web_company_name#24] +Output [1]: [web_site_sk#18] +Input [2]: [web_site_sk#18, web_company_name#19] (38) BroadcastExchange -Input [1]: [web_site_sk#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [web_site_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] (39) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ws_web_site_sk#3] -Right keys [1]: [web_site_sk#23] +Right keys [1]: [web_site_sk#18] Join condition: None (40) Project [codegen id : 11] Output [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] -Input [5]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, web_site_sk#23] +Input [5]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, web_site_sk#18] (41) HashAggregate [codegen id : 11] Input [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7] Keys [1]: [ws_order_number#5] Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#6)), partial_sum(UnscaledValue(ws_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#26, sum(UnscaledValue(ws_net_profit#7))#27] -Results [3]: [ws_order_number#5, sum#28, sum#29] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21] +Results [3]: [ws_order_number#5, sum#22, sum#23] (42) HashAggregate [codegen id : 11] -Input [3]: [ws_order_number#5, sum#28, sum#29] +Input [3]: [ws_order_number#5, sum#22, sum#23] Keys [1]: [ws_order_number#5] Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7))] -Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#26, sum(UnscaledValue(ws_net_profit#7))#27] -Results [3]: [ws_order_number#5, sum#28, sum#29] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21] +Results [3]: [ws_order_number#5, sum#22, sum#23] (43) HashAggregate [codegen id : 11] -Input [3]: [ws_order_number#5, sum#28, sum#29] +Input [3]: [ws_order_number#5, sum#22, sum#23] Keys: [] Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7)), partial_count(distinct ws_order_number#5)] -Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#26, sum(UnscaledValue(ws_net_profit#7))#27, count(ws_order_number#5)#30] -Results [3]: [sum#28, sum#29, count#31] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21, count(ws_order_number#5)#24] +Results [3]: [sum#22, sum#23, count#25] (44) Exchange -Input [3]: [sum#28, sum#29, count#31] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#32] +Input [3]: [sum#22, sum#23, count#25] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (45) HashAggregate [codegen id : 12] -Input [3]: [sum#28, sum#29, count#31] +Input [3]: [sum#22, sum#23, count#25] Keys: [] Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net_profit#7)), count(distinct ws_order_number#5)] -Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#26, sum(UnscaledValue(ws_net_profit#7))#27, count(ws_order_number#5)#30] -Results [3]: [count(ws_order_number#5)#30 AS order count #33, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#26,17,2) AS total shipping cost #34, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#27,17,2) AS total net profit #35] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21, count(ws_order_number#5)#24] +Results [3]: [count(ws_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#20,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#21,17,2) AS total net profit #28] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt index 1a24233541a26..b81fb148e1b25 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt @@ -78,240 +78,240 @@ Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_num (5) Exchange Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] -Arguments: hashpartitioning(ws_order_number#4, 5), ENSURE_REQUIREMENTS, [id=#8] +Arguments: hashpartitioning(ws_order_number#4, 5), ENSURE_REQUIREMENTS, [plan_id=1] (6) Sort [codegen id : 2] Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] Arguments: [ws_order_number#4 ASC NULLS FIRST], false, 0 (7) Scan parquet default.web_sales -Output [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] +Output [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)] ReadSchema: struct (8) ColumnarToRow [codegen id : 3] -Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] (9) Filter [codegen id : 3] -Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] -Condition : (isnotnull(ws_order_number#10) AND isnotnull(ws_warehouse_sk#9)) +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] +Condition : (isnotnull(ws_order_number#9) AND isnotnull(ws_warehouse_sk#8)) (10) Project [codegen id : 3] -Output [2]: [ws_warehouse_sk#9, ws_order_number#10] -Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] +Output [2]: [ws_warehouse_sk#8, ws_order_number#9] +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] (11) Exchange -Input [2]: [ws_warehouse_sk#9, ws_order_number#10] -Arguments: hashpartitioning(ws_order_number#10, 5), ENSURE_REQUIREMENTS, [id=#12] +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: hashpartitioning(ws_order_number#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) Sort [codegen id : 4] -Input [2]: [ws_warehouse_sk#9, ws_order_number#10] -Arguments: [ws_order_number#10 ASC NULLS FIRST], false, 0 +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: [ws_order_number#9 ASC NULLS FIRST], false, 0 (13) ReusedExchange [Reuses operator id: 11] -Output [2]: [ws_warehouse_sk#13, ws_order_number#14] +Output [2]: [ws_warehouse_sk#11, ws_order_number#12] (14) Sort [codegen id : 6] -Input [2]: [ws_warehouse_sk#13, ws_order_number#14] -Arguments: [ws_order_number#14 ASC NULLS FIRST], false, 0 +Input [2]: [ws_warehouse_sk#11, ws_order_number#12] +Arguments: [ws_order_number#12 ASC NULLS FIRST], false, 0 (15) SortMergeJoin [codegen id : 7] -Left keys [1]: [ws_order_number#10] -Right keys [1]: [ws_order_number#14] -Join condition: NOT (ws_warehouse_sk#9 = ws_warehouse_sk#13) +Left keys [1]: [ws_order_number#9] +Right keys [1]: [ws_order_number#12] +Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#11) (16) Project [codegen id : 7] -Output [1]: [ws_order_number#10] -Input [4]: [ws_warehouse_sk#9, ws_order_number#10, ws_warehouse_sk#13, ws_order_number#14] +Output [1]: [ws_order_number#9] +Input [4]: [ws_warehouse_sk#8, ws_order_number#9, ws_warehouse_sk#11, ws_order_number#12] (17) SortMergeJoin [codegen id : 8] Left keys [1]: [ws_order_number#4] -Right keys [1]: [ws_order_number#10] +Right keys [1]: [ws_order_number#9] Join condition: None (18) Scan parquet default.web_returns -Output [2]: [wr_order_number#15, wr_returned_date_sk#16] +Output [2]: [wr_order_number#13, wr_returned_date_sk#14] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_order_number)] ReadSchema: struct (19) ColumnarToRow [codegen id : 9] -Input [2]: [wr_order_number#15, wr_returned_date_sk#16] +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] (20) Filter [codegen id : 9] -Input [2]: [wr_order_number#15, wr_returned_date_sk#16] -Condition : isnotnull(wr_order_number#15) +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] +Condition : isnotnull(wr_order_number#13) (21) Project [codegen id : 9] -Output [1]: [wr_order_number#15] -Input [2]: [wr_order_number#15, wr_returned_date_sk#16] +Output [1]: [wr_order_number#13] +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] (22) Exchange -Input [1]: [wr_order_number#15] -Arguments: hashpartitioning(wr_order_number#15, 5), ENSURE_REQUIREMENTS, [id=#17] +Input [1]: [wr_order_number#13] +Arguments: hashpartitioning(wr_order_number#13, 5), ENSURE_REQUIREMENTS, [plan_id=3] (23) Sort [codegen id : 10] -Input [1]: [wr_order_number#15] -Arguments: [wr_order_number#15 ASC NULLS FIRST], false, 0 +Input [1]: [wr_order_number#13] +Arguments: [wr_order_number#13 ASC NULLS FIRST], false, 0 (24) ReusedExchange [Reuses operator id: 11] -Output [2]: [ws_warehouse_sk#9, ws_order_number#10] +Output [2]: [ws_warehouse_sk#8, ws_order_number#9] (25) Sort [codegen id : 12] -Input [2]: [ws_warehouse_sk#9, ws_order_number#10] -Arguments: [ws_order_number#10 ASC NULLS FIRST], false, 0 +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: [ws_order_number#9 ASC NULLS FIRST], false, 0 (26) SortMergeJoin [codegen id : 13] -Left keys [1]: [wr_order_number#15] -Right keys [1]: [ws_order_number#10] +Left keys [1]: [wr_order_number#13] +Right keys [1]: [ws_order_number#9] Join condition: None (27) ReusedExchange [Reuses operator id: 11] -Output [2]: [ws_warehouse_sk#13, ws_order_number#14] +Output [2]: [ws_warehouse_sk#11, ws_order_number#12] (28) Sort [codegen id : 15] -Input [2]: [ws_warehouse_sk#13, ws_order_number#14] -Arguments: [ws_order_number#14 ASC NULLS FIRST], false, 0 +Input [2]: [ws_warehouse_sk#11, ws_order_number#12] +Arguments: [ws_order_number#12 ASC NULLS FIRST], false, 0 (29) SortMergeJoin [codegen id : 16] -Left keys [1]: [ws_order_number#10] -Right keys [1]: [ws_order_number#14] -Join condition: NOT (ws_warehouse_sk#9 = ws_warehouse_sk#13) +Left keys [1]: [ws_order_number#9] +Right keys [1]: [ws_order_number#12] +Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#11) (30) Project [codegen id : 16] -Output [1]: [wr_order_number#15] -Input [5]: [wr_order_number#15, ws_warehouse_sk#9, ws_order_number#10, ws_warehouse_sk#13, ws_order_number#14] +Output [1]: [wr_order_number#13] +Input [5]: [wr_order_number#13, ws_warehouse_sk#8, ws_order_number#9, ws_warehouse_sk#11, ws_order_number#12] (31) SortMergeJoin [codegen id : 20] Left keys [1]: [ws_order_number#4] -Right keys [1]: [wr_order_number#15] +Right keys [1]: [wr_order_number#13] Join condition: None (32) Scan parquet default.customer_address -Output [2]: [ca_address_sk#18, ca_state#19] +Output [2]: [ca_address_sk#15, ca_state#16] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] ReadSchema: struct (33) ColumnarToRow [codegen id : 17] -Input [2]: [ca_address_sk#18, ca_state#19] +Input [2]: [ca_address_sk#15, ca_state#16] (34) Filter [codegen id : 17] -Input [2]: [ca_address_sk#18, ca_state#19] -Condition : ((isnotnull(ca_state#19) AND (ca_state#19 = IL)) AND isnotnull(ca_address_sk#18)) +Input [2]: [ca_address_sk#15, ca_state#16] +Condition : ((isnotnull(ca_state#16) AND (ca_state#16 = IL)) AND isnotnull(ca_address_sk#15)) (35) Project [codegen id : 17] -Output [1]: [ca_address_sk#18] -Input [2]: [ca_address_sk#18, ca_state#19] +Output [1]: [ca_address_sk#15] +Input [2]: [ca_address_sk#15, ca_state#16] (36) BroadcastExchange -Input [1]: [ca_address_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] +Input [1]: [ca_address_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (37) BroadcastHashJoin [codegen id : 20] Left keys [1]: [ws_ship_addr_sk#2] -Right keys [1]: [ca_address_sk#18] +Right keys [1]: [ca_address_sk#15] Join condition: None (38) Project [codegen id : 20] Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] -Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#18] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#15] (39) Scan parquet default.web_site -Output [2]: [web_site_sk#21, web_company_name#22] +Output [2]: [web_site_sk#17, web_company_name#18] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri ), IsNotNull(web_site_sk)] ReadSchema: struct (40) ColumnarToRow [codegen id : 18] -Input [2]: [web_site_sk#21, web_company_name#22] +Input [2]: [web_site_sk#17, web_company_name#18] (41) Filter [codegen id : 18] -Input [2]: [web_site_sk#21, web_company_name#22] -Condition : ((isnotnull(web_company_name#22) AND (web_company_name#22 = pri )) AND isnotnull(web_site_sk#21)) +Input [2]: [web_site_sk#17, web_company_name#18] +Condition : ((isnotnull(web_company_name#18) AND (web_company_name#18 = pri )) AND isnotnull(web_site_sk#17)) (42) Project [codegen id : 18] -Output [1]: [web_site_sk#21] -Input [2]: [web_site_sk#21, web_company_name#22] +Output [1]: [web_site_sk#17] +Input [2]: [web_site_sk#17, web_company_name#18] (43) BroadcastExchange -Input [1]: [web_site_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] +Input [1]: [web_site_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (44) BroadcastHashJoin [codegen id : 20] Left keys [1]: [ws_web_site_sk#3] -Right keys [1]: [web_site_sk#21] +Right keys [1]: [web_site_sk#17] Join condition: None (45) Project [codegen id : 20] Output [4]: [ws_ship_date_sk#1, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] -Input [6]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#21] +Input [6]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#17] (46) Scan parquet default.date_dim -Output [2]: [d_date_sk#24, d_date#25] +Output [2]: [d_date_sk#19, d_date#20] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] ReadSchema: struct (47) ColumnarToRow [codegen id : 19] -Input [2]: [d_date_sk#24, d_date#25] +Input [2]: [d_date_sk#19, d_date#20] (48) Filter [codegen id : 19] -Input [2]: [d_date_sk#24, d_date#25] -Condition : (((isnotnull(d_date#25) AND (d_date#25 >= 1999-02-01)) AND (d_date#25 <= 1999-04-02)) AND isnotnull(d_date_sk#24)) +Input [2]: [d_date_sk#19, d_date#20] +Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-01)) AND (d_date#20 <= 1999-04-02)) AND isnotnull(d_date_sk#19)) (49) Project [codegen id : 19] -Output [1]: [d_date_sk#24] -Input [2]: [d_date_sk#24, d_date#25] +Output [1]: [d_date_sk#19] +Input [2]: [d_date_sk#19, d_date#20] (50) BroadcastExchange -Input [1]: [d_date_sk#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Input [1]: [d_date_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] (51) BroadcastHashJoin [codegen id : 20] Left keys [1]: [ws_ship_date_sk#1] -Right keys [1]: [d_date_sk#24] +Right keys [1]: [d_date_sk#19] Join condition: None (52) Project [codegen id : 20] Output [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] -Input [5]: [ws_ship_date_sk#1, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#24] +Input [5]: [ws_ship_date_sk#1, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#19] (53) HashAggregate [codegen id : 20] Input [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] Keys [1]: [ws_order_number#4] Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#5)), partial_sum(UnscaledValue(ws_net_profit#6))] -Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28] -Results [3]: [ws_order_number#4, sum#29, sum#30] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22] +Results [3]: [ws_order_number#4, sum#23, sum#24] (54) HashAggregate [codegen id : 20] -Input [3]: [ws_order_number#4, sum#29, sum#30] +Input [3]: [ws_order_number#4, sum#23, sum#24] Keys [1]: [ws_order_number#4] Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6))] -Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28] -Results [3]: [ws_order_number#4, sum#29, sum#30] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22] +Results [3]: [ws_order_number#4, sum#23, sum#24] (55) HashAggregate [codegen id : 20] -Input [3]: [ws_order_number#4, sum#29, sum#30] +Input [3]: [ws_order_number#4, sum#23, sum#24] Keys: [] Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6)), partial_count(distinct ws_order_number#4)] -Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#31] -Results [3]: [sum#29, sum#30, count#32] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22, count(ws_order_number#4)#25] +Results [3]: [sum#23, sum#24, count#26] (56) Exchange -Input [3]: [sum#29, sum#30, count#32] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#33] +Input [3]: [sum#23, sum#24, count#26] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (57) HashAggregate [codegen id : 21] -Input [3]: [sum#29, sum#30, count#32] +Input [3]: [sum#23, sum#24, count#26] Keys: [] Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net_profit#6)), count(distinct ws_order_number#4)] -Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#31] -Results [3]: [count(ws_order_number#4)#31 AS order count #34, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#27,17,2) AS total shipping cost #35, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#28,17,2) AS total net profit #36] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22, count(ws_order_number#4)#25] +Results [3]: [count(ws_order_number#4)#25 AS order count #27, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#21,17,2) AS total shipping cost #28, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#22,17,2) AS total net profit #29] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt index 79519bfde99f6..a73394d626dce 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt @@ -79,244 +79,244 @@ Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_num (5) Exchange Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] -Arguments: hashpartitioning(ws_order_number#4, 5), ENSURE_REQUIREMENTS, [id=#8] +Arguments: hashpartitioning(ws_order_number#4, 5), ENSURE_REQUIREMENTS, [plan_id=1] (6) Sort [codegen id : 2] Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] Arguments: [ws_order_number#4 ASC NULLS FIRST], false, 0 (7) Scan parquet default.web_sales -Output [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] +Output [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)] ReadSchema: struct (8) ColumnarToRow [codegen id : 3] -Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] (9) Filter [codegen id : 3] -Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] -Condition : (isnotnull(ws_order_number#10) AND isnotnull(ws_warehouse_sk#9)) +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] +Condition : (isnotnull(ws_order_number#9) AND isnotnull(ws_warehouse_sk#8)) (10) Project [codegen id : 3] -Output [2]: [ws_warehouse_sk#9, ws_order_number#10] -Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11] +Output [2]: [ws_warehouse_sk#8, ws_order_number#9] +Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10] (11) Exchange -Input [2]: [ws_warehouse_sk#9, ws_order_number#10] -Arguments: hashpartitioning(ws_order_number#10, 5), ENSURE_REQUIREMENTS, [id=#12] +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: hashpartitioning(ws_order_number#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) Sort [codegen id : 4] -Input [2]: [ws_warehouse_sk#9, ws_order_number#10] -Arguments: [ws_order_number#10 ASC NULLS FIRST], false, 0 +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: [ws_order_number#9 ASC NULLS FIRST], false, 0 (13) ReusedExchange [Reuses operator id: 11] -Output [2]: [ws_warehouse_sk#13, ws_order_number#14] +Output [2]: [ws_warehouse_sk#11, ws_order_number#12] (14) Sort [codegen id : 6] -Input [2]: [ws_warehouse_sk#13, ws_order_number#14] -Arguments: [ws_order_number#14 ASC NULLS FIRST], false, 0 +Input [2]: [ws_warehouse_sk#11, ws_order_number#12] +Arguments: [ws_order_number#12 ASC NULLS FIRST], false, 0 (15) SortMergeJoin [codegen id : 7] -Left keys [1]: [ws_order_number#10] -Right keys [1]: [ws_order_number#14] -Join condition: NOT (ws_warehouse_sk#9 = ws_warehouse_sk#13) +Left keys [1]: [ws_order_number#9] +Right keys [1]: [ws_order_number#12] +Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#11) (16) Project [codegen id : 7] -Output [1]: [ws_order_number#10] -Input [4]: [ws_warehouse_sk#9, ws_order_number#10, ws_warehouse_sk#13, ws_order_number#14] +Output [1]: [ws_order_number#9] +Input [4]: [ws_warehouse_sk#8, ws_order_number#9, ws_warehouse_sk#11, ws_order_number#12] (17) SortMergeJoin [codegen id : 8] Left keys [1]: [ws_order_number#4] -Right keys [1]: [ws_order_number#10] +Right keys [1]: [ws_order_number#9] Join condition: None (18) Scan parquet default.web_returns -Output [2]: [wr_order_number#15, wr_returned_date_sk#16] +Output [2]: [wr_order_number#13, wr_returned_date_sk#14] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_order_number)] ReadSchema: struct (19) ColumnarToRow [codegen id : 9] -Input [2]: [wr_order_number#15, wr_returned_date_sk#16] +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] (20) Filter [codegen id : 9] -Input [2]: [wr_order_number#15, wr_returned_date_sk#16] -Condition : isnotnull(wr_order_number#15) +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] +Condition : isnotnull(wr_order_number#13) (21) Project [codegen id : 9] -Output [1]: [wr_order_number#15] -Input [2]: [wr_order_number#15, wr_returned_date_sk#16] +Output [1]: [wr_order_number#13] +Input [2]: [wr_order_number#13, wr_returned_date_sk#14] (22) Exchange -Input [1]: [wr_order_number#15] -Arguments: hashpartitioning(wr_order_number#15, 5), ENSURE_REQUIREMENTS, [id=#17] +Input [1]: [wr_order_number#13] +Arguments: hashpartitioning(wr_order_number#13, 5), ENSURE_REQUIREMENTS, [plan_id=3] (23) Sort [codegen id : 10] -Input [1]: [wr_order_number#15] -Arguments: [wr_order_number#15 ASC NULLS FIRST], false, 0 +Input [1]: [wr_order_number#13] +Arguments: [wr_order_number#13 ASC NULLS FIRST], false, 0 (24) ReusedExchange [Reuses operator id: 11] -Output [2]: [ws_warehouse_sk#9, ws_order_number#10] +Output [2]: [ws_warehouse_sk#8, ws_order_number#9] (25) Sort [codegen id : 12] -Input [2]: [ws_warehouse_sk#9, ws_order_number#10] -Arguments: [ws_order_number#10 ASC NULLS FIRST], false, 0 +Input [2]: [ws_warehouse_sk#8, ws_order_number#9] +Arguments: [ws_order_number#9 ASC NULLS FIRST], false, 0 (26) ReusedExchange [Reuses operator id: 11] -Output [2]: [ws_warehouse_sk#13, ws_order_number#14] +Output [2]: [ws_warehouse_sk#11, ws_order_number#12] (27) Sort [codegen id : 14] -Input [2]: [ws_warehouse_sk#13, ws_order_number#14] -Arguments: [ws_order_number#14 ASC NULLS FIRST], false, 0 +Input [2]: [ws_warehouse_sk#11, ws_order_number#12] +Arguments: [ws_order_number#12 ASC NULLS FIRST], false, 0 (28) SortMergeJoin [codegen id : 15] -Left keys [1]: [ws_order_number#10] -Right keys [1]: [ws_order_number#14] -Join condition: NOT (ws_warehouse_sk#9 = ws_warehouse_sk#13) +Left keys [1]: [ws_order_number#9] +Right keys [1]: [ws_order_number#12] +Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#11) (29) Project [codegen id : 15] -Output [1]: [ws_order_number#10] -Input [4]: [ws_warehouse_sk#9, ws_order_number#10, ws_warehouse_sk#13, ws_order_number#14] +Output [1]: [ws_order_number#9] +Input [4]: [ws_warehouse_sk#8, ws_order_number#9, ws_warehouse_sk#11, ws_order_number#12] (30) SortMergeJoin [codegen id : 16] -Left keys [1]: [wr_order_number#15] -Right keys [1]: [ws_order_number#10] +Left keys [1]: [wr_order_number#13] +Right keys [1]: [ws_order_number#9] Join condition: None (31) Project [codegen id : 16] -Output [1]: [wr_order_number#15] -Input [2]: [wr_order_number#15, ws_order_number#10] +Output [1]: [wr_order_number#13] +Input [2]: [wr_order_number#13, ws_order_number#9] (32) SortMergeJoin [codegen id : 20] Left keys [1]: [ws_order_number#4] -Right keys [1]: [wr_order_number#15] +Right keys [1]: [wr_order_number#13] Join condition: None (33) Scan parquet default.date_dim -Output [2]: [d_date_sk#18, d_date#19] +Output [2]: [d_date_sk#15, d_date#16] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)] ReadSchema: struct (34) ColumnarToRow [codegen id : 17] -Input [2]: [d_date_sk#18, d_date#19] +Input [2]: [d_date_sk#15, d_date#16] (35) Filter [codegen id : 17] -Input [2]: [d_date_sk#18, d_date#19] -Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 1999-02-01)) AND (d_date#19 <= 1999-04-02)) AND isnotnull(d_date_sk#18)) +Input [2]: [d_date_sk#15, d_date#16] +Condition : (((isnotnull(d_date#16) AND (d_date#16 >= 1999-02-01)) AND (d_date#16 <= 1999-04-02)) AND isnotnull(d_date_sk#15)) (36) Project [codegen id : 17] -Output [1]: [d_date_sk#18] -Input [2]: [d_date_sk#18, d_date#19] +Output [1]: [d_date_sk#15] +Input [2]: [d_date_sk#15, d_date#16] (37) BroadcastExchange -Input [1]: [d_date_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (38) BroadcastHashJoin [codegen id : 20] Left keys [1]: [ws_ship_date_sk#1] -Right keys [1]: [d_date_sk#18] +Right keys [1]: [d_date_sk#15] Join condition: None (39) Project [codegen id : 20] Output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] -Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#18] +Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#15] (40) Scan parquet default.customer_address -Output [2]: [ca_address_sk#21, ca_state#22] +Output [2]: [ca_address_sk#17, ca_state#18] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)] ReadSchema: struct (41) ColumnarToRow [codegen id : 18] -Input [2]: [ca_address_sk#21, ca_state#22] +Input [2]: [ca_address_sk#17, ca_state#18] (42) Filter [codegen id : 18] -Input [2]: [ca_address_sk#21, ca_state#22] -Condition : ((isnotnull(ca_state#22) AND (ca_state#22 = IL)) AND isnotnull(ca_address_sk#21)) +Input [2]: [ca_address_sk#17, ca_state#18] +Condition : ((isnotnull(ca_state#18) AND (ca_state#18 = IL)) AND isnotnull(ca_address_sk#17)) (43) Project [codegen id : 18] -Output [1]: [ca_address_sk#21] -Input [2]: [ca_address_sk#21, ca_state#22] +Output [1]: [ca_address_sk#17] +Input [2]: [ca_address_sk#17, ca_state#18] (44) BroadcastExchange -Input [1]: [ca_address_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] +Input [1]: [ca_address_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (45) BroadcastHashJoin [codegen id : 20] Left keys [1]: [ws_ship_addr_sk#2] -Right keys [1]: [ca_address_sk#21] +Right keys [1]: [ca_address_sk#17] Join condition: None (46) Project [codegen id : 20] Output [4]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] -Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#21] +Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#17] (47) Scan parquet default.web_site -Output [2]: [web_site_sk#24, web_company_name#25] +Output [2]: [web_site_sk#19, web_company_name#20] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri ), IsNotNull(web_site_sk)] ReadSchema: struct (48) ColumnarToRow [codegen id : 19] -Input [2]: [web_site_sk#24, web_company_name#25] +Input [2]: [web_site_sk#19, web_company_name#20] (49) Filter [codegen id : 19] -Input [2]: [web_site_sk#24, web_company_name#25] -Condition : ((isnotnull(web_company_name#25) AND (web_company_name#25 = pri )) AND isnotnull(web_site_sk#24)) +Input [2]: [web_site_sk#19, web_company_name#20] +Condition : ((isnotnull(web_company_name#20) AND (web_company_name#20 = pri )) AND isnotnull(web_site_sk#19)) (50) Project [codegen id : 19] -Output [1]: [web_site_sk#24] -Input [2]: [web_site_sk#24, web_company_name#25] +Output [1]: [web_site_sk#19] +Input [2]: [web_site_sk#19, web_company_name#20] (51) BroadcastExchange -Input [1]: [web_site_sk#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Input [1]: [web_site_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] (52) BroadcastHashJoin [codegen id : 20] Left keys [1]: [ws_web_site_sk#3] -Right keys [1]: [web_site_sk#24] +Right keys [1]: [web_site_sk#19] Join condition: None (53) Project [codegen id : 20] Output [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] -Input [5]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#24] +Input [5]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#19] (54) HashAggregate [codegen id : 20] Input [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6] Keys [1]: [ws_order_number#4] Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#5)), partial_sum(UnscaledValue(ws_net_profit#6))] -Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28] -Results [3]: [ws_order_number#4, sum#29, sum#30] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22] +Results [3]: [ws_order_number#4, sum#23, sum#24] (55) HashAggregate [codegen id : 20] -Input [3]: [ws_order_number#4, sum#29, sum#30] +Input [3]: [ws_order_number#4, sum#23, sum#24] Keys [1]: [ws_order_number#4] Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6))] -Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28] -Results [3]: [ws_order_number#4, sum#29, sum#30] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22] +Results [3]: [ws_order_number#4, sum#23, sum#24] (56) HashAggregate [codegen id : 20] -Input [3]: [ws_order_number#4, sum#29, sum#30] +Input [3]: [ws_order_number#4, sum#23, sum#24] Keys: [] Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6)), partial_count(distinct ws_order_number#4)] -Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#31] -Results [3]: [sum#29, sum#30, count#32] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22, count(ws_order_number#4)#25] +Results [3]: [sum#23, sum#24, count#26] (57) Exchange -Input [3]: [sum#29, sum#30, count#32] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#33] +Input [3]: [sum#23, sum#24, count#26] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (58) HashAggregate [codegen id : 21] -Input [3]: [sum#29, sum#30, count#32] +Input [3]: [sum#23, sum#24, count#26] Keys: [] Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net_profit#6)), count(distinct ws_order_number#4)] -Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#31] -Results [3]: [count(ws_order_number#4)#31 AS order count #34, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#27,17,2) AS total shipping cost #35, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#28,17,2) AS total net profit #36] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22, count(ws_order_number#4)#25] +Results [3]: [count(ws_order_number#4)#25 AS order count #27, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#21,17,2) AS total shipping cost #28, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#22,17,2) AS total net profit #29] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt index de1eaa98e4d12..7cb46fa759c55 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt @@ -67,7 +67,7 @@ Input [3]: [t_time_sk#5, t_hour#6, t_minute#7] (9) BroadcastExchange Input [1]: [t_time_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (10) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_sold_time_sk#1] @@ -79,82 +79,82 @@ Output [2]: [ss_hdemo_sk#2, ss_store_sk#3] Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, t_time_sk#5] (12) Scan parquet default.store -Output [2]: [s_store_sk#9, s_store_name#10] +Output [2]: [s_store_sk#8, s_store_name#9] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] ReadSchema: struct (13) ColumnarToRow [codegen id : 2] -Input [2]: [s_store_sk#9, s_store_name#10] +Input [2]: [s_store_sk#8, s_store_name#9] (14) Filter [codegen id : 2] -Input [2]: [s_store_sk#9, s_store_name#10] -Condition : ((isnotnull(s_store_name#10) AND (s_store_name#10 = ese)) AND isnotnull(s_store_sk#9)) +Input [2]: [s_store_sk#8, s_store_name#9] +Condition : ((isnotnull(s_store_name#9) AND (s_store_name#9 = ese)) AND isnotnull(s_store_sk#8)) (15) Project [codegen id : 2] -Output [1]: [s_store_sk#9] -Input [2]: [s_store_sk#9, s_store_name#10] +Output [1]: [s_store_sk#8] +Input [2]: [s_store_sk#8, s_store_name#9] (16) BroadcastExchange -Input [1]: [s_store_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] +Input [1]: [s_store_sk#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#9] +Right keys [1]: [s_store_sk#8] Join condition: None (18) Project [codegen id : 4] Output [1]: [ss_hdemo_sk#2] -Input [3]: [ss_hdemo_sk#2, ss_store_sk#3, s_store_sk#9] +Input [3]: [ss_hdemo_sk#2, ss_store_sk#3, s_store_sk#8] (19) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#12, hd_dep_count#13] +Output [2]: [hd_demo_sk#10, hd_dep_count#11] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_dep_count), EqualTo(hd_dep_count,7), IsNotNull(hd_demo_sk)] ReadSchema: struct (20) ColumnarToRow [codegen id : 3] -Input [2]: [hd_demo_sk#12, hd_dep_count#13] +Input [2]: [hd_demo_sk#10, hd_dep_count#11] (21) Filter [codegen id : 3] -Input [2]: [hd_demo_sk#12, hd_dep_count#13] -Condition : ((isnotnull(hd_dep_count#13) AND (hd_dep_count#13 = 7)) AND isnotnull(hd_demo_sk#12)) +Input [2]: [hd_demo_sk#10, hd_dep_count#11] +Condition : ((isnotnull(hd_dep_count#11) AND (hd_dep_count#11 = 7)) AND isnotnull(hd_demo_sk#10)) (22) Project [codegen id : 3] -Output [1]: [hd_demo_sk#12] -Input [2]: [hd_demo_sk#12, hd_dep_count#13] +Output [1]: [hd_demo_sk#10] +Input [2]: [hd_demo_sk#10, hd_dep_count#11] (23) BroadcastExchange -Input [1]: [hd_demo_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Input [1]: [hd_demo_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#12] +Right keys [1]: [hd_demo_sk#10] Join condition: None (25) Project [codegen id : 4] Output: [] -Input [2]: [ss_hdemo_sk#2, hd_demo_sk#12] +Input [2]: [ss_hdemo_sk#2, hd_demo_sk#10] (26) HashAggregate [codegen id : 4] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#15] -Results [1]: [count#16] +Aggregate Attributes [1]: [count#12] +Results [1]: [count#13] (27) Exchange -Input [1]: [count#16] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#17] +Input [1]: [count#13] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 5] -Input [1]: [count#16] +Input [1]: [count#13] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#18] -Results [1]: [count(1)#18 AS count(1)#19] +Aggregate Attributes [1]: [count(1)#14] +Results [1]: [count(1)#14 AS count(1)#15] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt index 8ff4d605c2af4..eb0dd4570ce08 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt @@ -67,7 +67,7 @@ Input [2]: [hd_demo_sk#5, hd_dep_count#6] (9) BroadcastExchange Input [1]: [hd_demo_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (10) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] @@ -79,82 +79,82 @@ Output [2]: [ss_sold_time_sk#1, ss_store_sk#3] Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#5] (12) Scan parquet default.time_dim -Output [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Output [3]: [t_time_sk#7, t_hour#8, t_minute#9] Batched: true Location [not included in comparison]/{warehouse_dir}/time_dim] PushedFilters: [IsNotNull(t_hour), IsNotNull(t_minute), EqualTo(t_hour,20), GreaterThanOrEqual(t_minute,30), IsNotNull(t_time_sk)] ReadSchema: struct (13) ColumnarToRow [codegen id : 2] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] (14) Filter [codegen id : 2] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] -Condition : ((((isnotnull(t_hour#9) AND isnotnull(t_minute#10)) AND (t_hour#9 = 20)) AND (t_minute#10 >= 30)) AND isnotnull(t_time_sk#8)) +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] +Condition : ((((isnotnull(t_hour#8) AND isnotnull(t_minute#9)) AND (t_hour#8 = 20)) AND (t_minute#9 >= 30)) AND isnotnull(t_time_sk#7)) (15) Project [codegen id : 2] -Output [1]: [t_time_sk#8] -Input [3]: [t_time_sk#8, t_hour#9, t_minute#10] +Output [1]: [t_time_sk#7] +Input [3]: [t_time_sk#7, t_hour#8, t_minute#9] (16) BroadcastExchange -Input [1]: [t_time_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#11] +Input [1]: [t_time_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_sold_time_sk#1] -Right keys [1]: [t_time_sk#8] +Right keys [1]: [t_time_sk#7] Join condition: None (18) Project [codegen id : 4] Output [1]: [ss_store_sk#3] -Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8] +Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#7] (19) Scan parquet default.store -Output [2]: [s_store_sk#12, s_store_name#13] +Output [2]: [s_store_sk#10, s_store_name#11] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_name), EqualTo(s_store_name,ese), IsNotNull(s_store_sk)] ReadSchema: struct (20) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#12, s_store_name#13] +Input [2]: [s_store_sk#10, s_store_name#11] (21) Filter [codegen id : 3] -Input [2]: [s_store_sk#12, s_store_name#13] -Condition : ((isnotnull(s_store_name#13) AND (s_store_name#13 = ese)) AND isnotnull(s_store_sk#12)) +Input [2]: [s_store_sk#10, s_store_name#11] +Condition : ((isnotnull(s_store_name#11) AND (s_store_name#11 = ese)) AND isnotnull(s_store_sk#10)) (22) Project [codegen id : 3] -Output [1]: [s_store_sk#12] -Input [2]: [s_store_sk#12, s_store_name#13] +Output [1]: [s_store_sk#10] +Input [2]: [s_store_sk#10, s_store_name#11] (23) BroadcastExchange -Input [1]: [s_store_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Input [1]: [s_store_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#12] +Right keys [1]: [s_store_sk#10] Join condition: None (25) Project [codegen id : 4] Output: [] -Input [2]: [ss_store_sk#3, s_store_sk#12] +Input [2]: [ss_store_sk#3, s_store_sk#10] (26) HashAggregate [codegen id : 4] Input: [] Keys: [] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#15] -Results [1]: [count#16] +Aggregate Attributes [1]: [count#12] +Results [1]: [count#13] (27) Exchange -Input [1]: [count#16] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#17] +Input [1]: [count#13] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 5] -Input [1]: [count#16] +Input [1]: [count#13] Keys: [] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#18] -Results [1]: [count(1)#18 AS count(1)#19] +Aggregate Attributes [1]: [count(1)#14] +Results [1]: [count(1)#14 AS count(1)#15] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt index e47aaf2217cfd..106bb85e83b54 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt @@ -55,89 +55,89 @@ Results [2]: [ss_customer_sk#2, ss_item_sk#1] (7) Exchange Input [2]: [ss_customer_sk#2, ss_item_sk#1] -Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#6] +Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (8) HashAggregate [codegen id : 3] Input [2]: [ss_customer_sk#2, ss_item_sk#1] Keys [2]: [ss_customer_sk#2, ss_item_sk#1] Functions: [] Aggregate Attributes: [] -Results [2]: [ss_customer_sk#2 AS customer_sk#7, ss_item_sk#1 AS item_sk#8] +Results [2]: [ss_customer_sk#2 AS customer_sk#6, ss_item_sk#1 AS item_sk#7] (9) Sort [codegen id : 3] -Input [2]: [customer_sk#7, item_sk#8] -Arguments: [customer_sk#7 ASC NULLS FIRST, item_sk#8 ASC NULLS FIRST], false, 0 +Input [2]: [customer_sk#6, item_sk#7] +Arguments: [customer_sk#6 ASC NULLS FIRST, item_sk#7 ASC NULLS FIRST], false, 0 (10) Scan parquet default.catalog_sales -Output [3]: [cs_bill_customer_sk#9, cs_item_sk#10, cs_sold_date_sk#11] +Output [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#11), dynamicpruningexpression(cs_sold_date_sk#11 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(cs_sold_date_sk#10), dynamicpruningexpression(cs_sold_date_sk#10 IN dynamicpruning#4)] ReadSchema: struct (11) ColumnarToRow [codegen id : 5] -Input [3]: [cs_bill_customer_sk#9, cs_item_sk#10, cs_sold_date_sk#11] +Input [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] (12) ReusedExchange [Reuses operator id: 28] -Output [1]: [d_date_sk#12] +Output [1]: [d_date_sk#11] (13) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_sold_date_sk#11] -Right keys [1]: [d_date_sk#12] +Left keys [1]: [cs_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] Join condition: None (14) Project [codegen id : 5] -Output [2]: [cs_bill_customer_sk#9, cs_item_sk#10] -Input [4]: [cs_bill_customer_sk#9, cs_item_sk#10, cs_sold_date_sk#11, d_date_sk#12] +Output [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Input [4]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10, d_date_sk#11] (15) HashAggregate [codegen id : 5] -Input [2]: [cs_bill_customer_sk#9, cs_item_sk#10] -Keys [2]: [cs_bill_customer_sk#9, cs_item_sk#10] +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Keys [2]: [cs_bill_customer_sk#8, cs_item_sk#9] Functions: [] Aggregate Attributes: [] -Results [2]: [cs_bill_customer_sk#9, cs_item_sk#10] +Results [2]: [cs_bill_customer_sk#8, cs_item_sk#9] (16) Exchange -Input [2]: [cs_bill_customer_sk#9, cs_item_sk#10] -Arguments: hashpartitioning(cs_bill_customer_sk#9, cs_item_sk#10, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Arguments: hashpartitioning(cs_bill_customer_sk#8, cs_item_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] (17) HashAggregate [codegen id : 6] -Input [2]: [cs_bill_customer_sk#9, cs_item_sk#10] -Keys [2]: [cs_bill_customer_sk#9, cs_item_sk#10] +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Keys [2]: [cs_bill_customer_sk#8, cs_item_sk#9] Functions: [] Aggregate Attributes: [] -Results [2]: [cs_bill_customer_sk#9 AS customer_sk#14, cs_item_sk#10 AS item_sk#15] +Results [2]: [cs_bill_customer_sk#8 AS customer_sk#12, cs_item_sk#9 AS item_sk#13] (18) Sort [codegen id : 6] -Input [2]: [customer_sk#14, item_sk#15] -Arguments: [customer_sk#14 ASC NULLS FIRST, item_sk#15 ASC NULLS FIRST], false, 0 +Input [2]: [customer_sk#12, item_sk#13] +Arguments: [customer_sk#12 ASC NULLS FIRST, item_sk#13 ASC NULLS FIRST], false, 0 (19) SortMergeJoin [codegen id : 7] -Left keys [2]: [customer_sk#7, item_sk#8] -Right keys [2]: [customer_sk#14, item_sk#15] +Left keys [2]: [customer_sk#6, item_sk#7] +Right keys [2]: [customer_sk#12, item_sk#13] Join condition: None (20) Project [codegen id : 7] -Output [2]: [customer_sk#7, customer_sk#14] -Input [4]: [customer_sk#7, item_sk#8, customer_sk#14, item_sk#15] +Output [2]: [customer_sk#6, customer_sk#12] +Input [4]: [customer_sk#6, item_sk#7, customer_sk#12, item_sk#13] (21) HashAggregate [codegen id : 7] -Input [2]: [customer_sk#7, customer_sk#14] +Input [2]: [customer_sk#6, customer_sk#12] Keys: [] -Functions [3]: [partial_sum(CASE WHEN (isnotnull(customer_sk#7) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnull(customer_sk#7) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnotnull(customer_sk#7) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)] -Aggregate Attributes [3]: [sum#16, sum#17, sum#18] -Results [3]: [sum#19, sum#20, sum#21] +Functions [3]: [partial_sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)] +Aggregate Attributes [3]: [sum#14, sum#15, sum#16] +Results [3]: [sum#17, sum#18, sum#19] (22) Exchange -Input [3]: [sum#19, sum#20, sum#21] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#22] +Input [3]: [sum#17, sum#18, sum#19] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=3] (23) HashAggregate [codegen id : 8] -Input [3]: [sum#19, sum#20, sum#21] +Input [3]: [sum#17, sum#18, sum#19] Keys: [] -Functions [3]: [sum(CASE WHEN (isnotnull(customer_sk#7) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnull(customer_sk#7) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnotnull(customer_sk#7) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)] -Aggregate Attributes [3]: [sum(CASE WHEN (isnotnull(customer_sk#7) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END)#23, sum(CASE WHEN (isnull(customer_sk#7) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#24, sum(CASE WHEN (isnotnull(customer_sk#7) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#25] -Results [3]: [sum(CASE WHEN (isnotnull(customer_sk#7) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END)#23 AS store_only#26, sum(CASE WHEN (isnull(customer_sk#7) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#24 AS catalog_only#27, sum(CASE WHEN (isnotnull(customer_sk#7) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#25 AS store_and_catalog#28] +Functions [3]: [sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)] +Aggregate Attributes [3]: [sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END)#20, sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#21, sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#22] +Results [3]: [sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END)#20 AS store_only#23, sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#21 AS catalog_only#24, sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#22 AS store_and_catalog#25] ===== Subqueries ===== @@ -150,27 +150,27 @@ BroadcastExchange (28) (24) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_month_seq#29] +Output [2]: [d_date_sk#5, d_month_seq#26] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#29] +Input [2]: [d_date_sk#5, d_month_seq#26] (26) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#29] -Condition : (((isnotnull(d_month_seq#29) AND (d_month_seq#29 >= 1200)) AND (d_month_seq#29 <= 1211)) AND isnotnull(d_date_sk#5)) +Input [2]: [d_date_sk#5, d_month_seq#26] +Condition : (((isnotnull(d_month_seq#26) AND (d_month_seq#26 >= 1200)) AND (d_month_seq#26 <= 1211)) AND isnotnull(d_date_sk#5)) (27) Project [codegen id : 1] Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_month_seq#29] +Input [2]: [d_date_sk#5, d_month_seq#26] (28) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] -Subquery:2 Hosting operator id = 10 Hosting Expression = cs_sold_date_sk#11 IN dynamicpruning#4 +Subquery:2 Hosting operator id = 10 Hosting Expression = cs_sold_date_sk#10 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt index e47aaf2217cfd..106bb85e83b54 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt @@ -55,89 +55,89 @@ Results [2]: [ss_customer_sk#2, ss_item_sk#1] (7) Exchange Input [2]: [ss_customer_sk#2, ss_item_sk#1] -Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#6] +Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (8) HashAggregate [codegen id : 3] Input [2]: [ss_customer_sk#2, ss_item_sk#1] Keys [2]: [ss_customer_sk#2, ss_item_sk#1] Functions: [] Aggregate Attributes: [] -Results [2]: [ss_customer_sk#2 AS customer_sk#7, ss_item_sk#1 AS item_sk#8] +Results [2]: [ss_customer_sk#2 AS customer_sk#6, ss_item_sk#1 AS item_sk#7] (9) Sort [codegen id : 3] -Input [2]: [customer_sk#7, item_sk#8] -Arguments: [customer_sk#7 ASC NULLS FIRST, item_sk#8 ASC NULLS FIRST], false, 0 +Input [2]: [customer_sk#6, item_sk#7] +Arguments: [customer_sk#6 ASC NULLS FIRST, item_sk#7 ASC NULLS FIRST], false, 0 (10) Scan parquet default.catalog_sales -Output [3]: [cs_bill_customer_sk#9, cs_item_sk#10, cs_sold_date_sk#11] +Output [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#11), dynamicpruningexpression(cs_sold_date_sk#11 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(cs_sold_date_sk#10), dynamicpruningexpression(cs_sold_date_sk#10 IN dynamicpruning#4)] ReadSchema: struct (11) ColumnarToRow [codegen id : 5] -Input [3]: [cs_bill_customer_sk#9, cs_item_sk#10, cs_sold_date_sk#11] +Input [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10] (12) ReusedExchange [Reuses operator id: 28] -Output [1]: [d_date_sk#12] +Output [1]: [d_date_sk#11] (13) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [cs_sold_date_sk#11] -Right keys [1]: [d_date_sk#12] +Left keys [1]: [cs_sold_date_sk#10] +Right keys [1]: [d_date_sk#11] Join condition: None (14) Project [codegen id : 5] -Output [2]: [cs_bill_customer_sk#9, cs_item_sk#10] -Input [4]: [cs_bill_customer_sk#9, cs_item_sk#10, cs_sold_date_sk#11, d_date_sk#12] +Output [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Input [4]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10, d_date_sk#11] (15) HashAggregate [codegen id : 5] -Input [2]: [cs_bill_customer_sk#9, cs_item_sk#10] -Keys [2]: [cs_bill_customer_sk#9, cs_item_sk#10] +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Keys [2]: [cs_bill_customer_sk#8, cs_item_sk#9] Functions: [] Aggregate Attributes: [] -Results [2]: [cs_bill_customer_sk#9, cs_item_sk#10] +Results [2]: [cs_bill_customer_sk#8, cs_item_sk#9] (16) Exchange -Input [2]: [cs_bill_customer_sk#9, cs_item_sk#10] -Arguments: hashpartitioning(cs_bill_customer_sk#9, cs_item_sk#10, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Arguments: hashpartitioning(cs_bill_customer_sk#8, cs_item_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] (17) HashAggregate [codegen id : 6] -Input [2]: [cs_bill_customer_sk#9, cs_item_sk#10] -Keys [2]: [cs_bill_customer_sk#9, cs_item_sk#10] +Input [2]: [cs_bill_customer_sk#8, cs_item_sk#9] +Keys [2]: [cs_bill_customer_sk#8, cs_item_sk#9] Functions: [] Aggregate Attributes: [] -Results [2]: [cs_bill_customer_sk#9 AS customer_sk#14, cs_item_sk#10 AS item_sk#15] +Results [2]: [cs_bill_customer_sk#8 AS customer_sk#12, cs_item_sk#9 AS item_sk#13] (18) Sort [codegen id : 6] -Input [2]: [customer_sk#14, item_sk#15] -Arguments: [customer_sk#14 ASC NULLS FIRST, item_sk#15 ASC NULLS FIRST], false, 0 +Input [2]: [customer_sk#12, item_sk#13] +Arguments: [customer_sk#12 ASC NULLS FIRST, item_sk#13 ASC NULLS FIRST], false, 0 (19) SortMergeJoin [codegen id : 7] -Left keys [2]: [customer_sk#7, item_sk#8] -Right keys [2]: [customer_sk#14, item_sk#15] +Left keys [2]: [customer_sk#6, item_sk#7] +Right keys [2]: [customer_sk#12, item_sk#13] Join condition: None (20) Project [codegen id : 7] -Output [2]: [customer_sk#7, customer_sk#14] -Input [4]: [customer_sk#7, item_sk#8, customer_sk#14, item_sk#15] +Output [2]: [customer_sk#6, customer_sk#12] +Input [4]: [customer_sk#6, item_sk#7, customer_sk#12, item_sk#13] (21) HashAggregate [codegen id : 7] -Input [2]: [customer_sk#7, customer_sk#14] +Input [2]: [customer_sk#6, customer_sk#12] Keys: [] -Functions [3]: [partial_sum(CASE WHEN (isnotnull(customer_sk#7) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnull(customer_sk#7) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnotnull(customer_sk#7) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)] -Aggregate Attributes [3]: [sum#16, sum#17, sum#18] -Results [3]: [sum#19, sum#20, sum#21] +Functions [3]: [partial_sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)] +Aggregate Attributes [3]: [sum#14, sum#15, sum#16] +Results [3]: [sum#17, sum#18, sum#19] (22) Exchange -Input [3]: [sum#19, sum#20, sum#21] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#22] +Input [3]: [sum#17, sum#18, sum#19] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=3] (23) HashAggregate [codegen id : 8] -Input [3]: [sum#19, sum#20, sum#21] +Input [3]: [sum#17, sum#18, sum#19] Keys: [] -Functions [3]: [sum(CASE WHEN (isnotnull(customer_sk#7) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnull(customer_sk#7) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnotnull(customer_sk#7) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)] -Aggregate Attributes [3]: [sum(CASE WHEN (isnotnull(customer_sk#7) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END)#23, sum(CASE WHEN (isnull(customer_sk#7) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#24, sum(CASE WHEN (isnotnull(customer_sk#7) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#25] -Results [3]: [sum(CASE WHEN (isnotnull(customer_sk#7) AND isnull(customer_sk#14)) THEN 1 ELSE 0 END)#23 AS store_only#26, sum(CASE WHEN (isnull(customer_sk#7) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#24 AS catalog_only#27, sum(CASE WHEN (isnotnull(customer_sk#7) AND isnotnull(customer_sk#14)) THEN 1 ELSE 0 END)#25 AS store_and_catalog#28] +Functions [3]: [sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END), sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)] +Aggregate Attributes [3]: [sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END)#20, sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#21, sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#22] +Results [3]: [sum(CASE WHEN (isnotnull(customer_sk#6) AND isnull(customer_sk#12)) THEN 1 ELSE 0 END)#20 AS store_only#23, sum(CASE WHEN (isnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#21 AS catalog_only#24, sum(CASE WHEN (isnotnull(customer_sk#6) AND isnotnull(customer_sk#12)) THEN 1 ELSE 0 END)#22 AS store_and_catalog#25] ===== Subqueries ===== @@ -150,27 +150,27 @@ BroadcastExchange (28) (24) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_month_seq#29] +Output [2]: [d_date_sk#5, d_month_seq#26] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#29] +Input [2]: [d_date_sk#5, d_month_seq#26] (26) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#29] -Condition : (((isnotnull(d_month_seq#29) AND (d_month_seq#29 >= 1200)) AND (d_month_seq#29 <= 1211)) AND isnotnull(d_date_sk#5)) +Input [2]: [d_date_sk#5, d_month_seq#26] +Condition : (((isnotnull(d_month_seq#26) AND (d_month_seq#26 >= 1200)) AND (d_month_seq#26 <= 1211)) AND isnotnull(d_date_sk#5)) (27) Project [codegen id : 1] Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_month_seq#29] +Input [2]: [d_date_sk#5, d_month_seq#26] (28) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] -Subquery:2 Hosting operator id = 10 Hosting Expression = cs_sold_date_sk#11 IN dynamicpruning#4 +Subquery:2 Hosting operator id = 10 Hosting Expression = cs_sold_date_sk#10 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/explain.txt index b3528e4b6881b..7f2a84fa037cf 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/explain.txt @@ -43,100 +43,100 @@ Condition : isnotnull(ss_item_sk#1) (4) Exchange Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#5] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (6) Scan parquet default.item -Output [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (8) Filter [codegen id : 3] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Condition : (i_category#11 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#6)) +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Condition : (i_category#10 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#5)) (9) Exchange -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Arguments: hashpartitioning(i_item_sk#6, 5), ENSURE_REQUIREMENTS, [id=#12] +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Arguments: hashpartitioning(i_item_sk#5, 5), ENSURE_REQUIREMENTS, [plan_id=2] (10) Sort [codegen id : 4] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Arguments: [i_item_sk#5 ASC NULLS FIRST], false, 0 (11) SortMergeJoin [codegen id : 6] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#6] +Right keys [1]: [i_item_sk#5] Join condition: None (12) Project [codegen id : 6] -Output [7]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Input [9]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Output [7]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Input [9]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (13) ReusedExchange [Reuses operator id: 30] -Output [1]: [d_date_sk#13] +Output [1]: [d_date_sk#11] (14) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_sold_date_sk#3] -Right keys [1]: [d_date_sk#13] +Right keys [1]: [d_date_sk#11] Join condition: None (15) Project [codegen id : 6] -Output [6]: [ss_ext_sales_price#2, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11, d_date_sk#13] +Output [6]: [ss_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#11] (16) HashAggregate [codegen id : 6] -Input [6]: [ss_ext_sales_price#2, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Keys [5]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9] +Input [6]: [ss_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#14] -Results [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] (17) Exchange -Input [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] -Arguments: hashpartitioning(i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) HashAggregate [codegen id : 7] -Input [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] -Keys [5]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#17] -Results [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#17,17,2) AS _w1#20, i_item_id#7] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14] +Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w1#17, i_item_id#6] (19) Exchange -Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, i_item_id#7] -Arguments: hashpartitioning(i_class#10, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] +Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=4] (20) Sort [codegen id : 8] -Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, i_item_id#7] -Arguments: [i_class#10 ASC NULLS FIRST], false, 0 +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] +Arguments: [i_class#9 ASC NULLS FIRST], false, 0 (21) Window -Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, i_item_id#7] -Arguments: [sum(_w1#20) windowspecdefinition(i_class#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#10] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] +Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9] (22) Project [codegen id : 9] -Output [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17)) AS revenueratio#23, i_item_id#7] -Input [9]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, i_item_id#7, _we0#22] +Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19, i_item_id#6] +Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6, _we0#18] (23) Exchange -Input [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, revenueratio#23, i_item_id#7] -Arguments: rangepartitioning(i_category#11 ASC NULLS FIRST, i_class#10 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#8 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6] +Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=5] (24) Sort [codegen id : 10] -Input [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, revenueratio#23, i_item_id#7] -Arguments: [i_category#11 ASC NULLS FIRST, i_class#10 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#8 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], true, 0 +Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6] +Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], true, 0 (25) Project [codegen id : 10] -Output [6]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, revenueratio#23] -Input [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, revenueratio#23, i_item_id#7] +Output [6]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] +Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6] ===== Subqueries ===== @@ -149,25 +149,25 @@ BroadcastExchange (30) (26) Scan parquet default.date_dim -Output [2]: [d_date_sk#13, d_date#25] +Output [2]: [d_date_sk#11, d_date#20] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#13, d_date#25] +Input [2]: [d_date_sk#11, d_date#20] (28) Filter [codegen id : 1] -Input [2]: [d_date_sk#13, d_date#25] -Condition : (((isnotnull(d_date#25) AND (d_date#25 >= 1999-02-22)) AND (d_date#25 <= 1999-03-24)) AND isnotnull(d_date_sk#13)) +Input [2]: [d_date_sk#11, d_date#20] +Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11)) (29) Project [codegen id : 1] -Output [1]: [d_date_sk#13] -Input [2]: [d_date_sk#13, d_date#25] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#20] (30) BroadcastExchange -Input [1]: [d_date_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt index ec1192af4d398..4d8874720c8c5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt @@ -54,7 +54,7 @@ Condition : (i_category#10 IN (Sports (7) BroadcastExchange Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#1] @@ -66,62 +66,62 @@ Output [7]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#6, i_item_desc#7 Input [9]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (10) ReusedExchange [Reuses operator id: 27] -Output [1]: [d_date_sk#12] +Output [1]: [d_date_sk#11] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_sold_date_sk#3] -Right keys [1]: [d_date_sk#12] +Right keys [1]: [d_date_sk#11] Join condition: None (12) Project [codegen id : 3] Output [6]: [ss_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] -Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#12] +Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#11] (13) HashAggregate [codegen id : 3] Input [6]: [ss_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#13] -Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] (14) Exchange -Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] -Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 4] -Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#16] -Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#16,17,2) AS itemrevenue#17, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#16,17,2) AS _w0#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#16,17,2) AS _w1#19, i_item_id#6] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14] +Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w1#17, i_item_id#6] (16) Exchange -Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6] -Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] +Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (17) Sort [codegen id : 5] -Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] Arguments: [i_class#9 ASC NULLS FIRST], false, 0 (18) Window -Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6] -Arguments: [sum(_w1#19) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#21], [i_class#9] +Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6] +Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9] (19) Project [codegen id : 6] -Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17)) AS revenueratio#22, i_item_id#6] -Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, i_item_id#6, _we0#21] +Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19, i_item_id#6] +Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6, _we0#18] (20) Exchange -Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22, i_item_id#6] -Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#22 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6] +Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=4] (21) Sort [codegen id : 7] -Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22, i_item_id#6] -Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#22 ASC NULLS FIRST], true, 0 +Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6] +Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], true, 0 (22) Project [codegen id : 7] -Output [6]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22] -Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22, i_item_id#6] +Output [6]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] +Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6] ===== Subqueries ===== @@ -134,25 +134,25 @@ BroadcastExchange (27) (23) Scan parquet default.date_dim -Output [2]: [d_date_sk#12, d_date#24] +Output [2]: [d_date_sk#11, d_date#20] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] ReadSchema: struct (24) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#12, d_date#24] +Input [2]: [d_date_sk#11, d_date#20] (25) Filter [codegen id : 1] -Input [2]: [d_date_sk#12, d_date#24] -Condition : (((isnotnull(d_date#24) AND (d_date#24 >= 1999-02-22)) AND (d_date#24 <= 1999-03-24)) AND isnotnull(d_date_sk#12)) +Input [2]: [d_date_sk#11, d_date#20] +Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11)) (26) Project [codegen id : 1] -Output [1]: [d_date_sk#12] -Input [2]: [d_date_sk#12, d_date#24] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#20] (27) BroadcastExchange -Input [1]: [d_date_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt index 1b955ee3bd96c..212bc3b1eaacd 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt @@ -67,7 +67,7 @@ Input [2]: [d_date_sk#6, d_month_seq#7] (8) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_ship_date_sk#1] @@ -79,105 +79,105 @@ Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_wareh Input [6]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5, d_date_sk#6] (11) Scan parquet default.ship_mode -Output [2]: [sm_ship_mode_sk#9, sm_type#10] +Output [2]: [sm_ship_mode_sk#8, sm_type#9] Batched: true Location [not included in comparison]/{warehouse_dir}/ship_mode] PushedFilters: [IsNotNull(sm_ship_mode_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [2]: [sm_ship_mode_sk#9, sm_type#10] +Input [2]: [sm_ship_mode_sk#8, sm_type#9] (13) Filter [codegen id : 2] -Input [2]: [sm_ship_mode_sk#9, sm_type#10] -Condition : isnotnull(sm_ship_mode_sk#9) +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Condition : isnotnull(sm_ship_mode_sk#8) (14) BroadcastExchange -Input [2]: [sm_ship_mode_sk#9, sm_type#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_ship_mode_sk#3] -Right keys [1]: [sm_ship_mode_sk#9] +Right keys [1]: [sm_ship_mode_sk#8] Join condition: None (16) Project [codegen id : 5] -Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_warehouse_sk#4, cs_sold_date_sk#5, sm_type#10] -Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5, sm_ship_mode_sk#9, sm_type#10] +Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_warehouse_sk#4, cs_sold_date_sk#5, sm_type#9] +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5, sm_ship_mode_sk#8, sm_type#9] (17) Scan parquet default.call_center -Output [2]: [cc_call_center_sk#12, cc_name#13] +Output [2]: [cc_call_center_sk#10, cc_name#11] Batched: true Location [not included in comparison]/{warehouse_dir}/call_center] PushedFilters: [IsNotNull(cc_call_center_sk)] ReadSchema: struct (18) ColumnarToRow [codegen id : 3] -Input [2]: [cc_call_center_sk#12, cc_name#13] +Input [2]: [cc_call_center_sk#10, cc_name#11] (19) Filter [codegen id : 3] -Input [2]: [cc_call_center_sk#12, cc_name#13] -Condition : isnotnull(cc_call_center_sk#12) +Input [2]: [cc_call_center_sk#10, cc_name#11] +Condition : isnotnull(cc_call_center_sk#10) (20) BroadcastExchange -Input [2]: [cc_call_center_sk#12, cc_name#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Input [2]: [cc_call_center_sk#10, cc_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (21) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_call_center_sk#2] -Right keys [1]: [cc_call_center_sk#12] +Right keys [1]: [cc_call_center_sk#10] Join condition: None (22) Project [codegen id : 5] -Output [5]: [cs_ship_date_sk#1, cs_warehouse_sk#4, cs_sold_date_sk#5, sm_type#10, cc_name#13] -Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_warehouse_sk#4, cs_sold_date_sk#5, sm_type#10, cc_call_center_sk#12, cc_name#13] +Output [5]: [cs_ship_date_sk#1, cs_warehouse_sk#4, cs_sold_date_sk#5, sm_type#9, cc_name#11] +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_warehouse_sk#4, cs_sold_date_sk#5, sm_type#9, cc_call_center_sk#10, cc_name#11] (23) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#15, w_warehouse_name#16] +Output [2]: [w_warehouse_sk#12, w_warehouse_name#13] Batched: true Location [not included in comparison]/{warehouse_dir}/warehouse] PushedFilters: [IsNotNull(w_warehouse_sk)] ReadSchema: struct (24) ColumnarToRow [codegen id : 4] -Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] +Input [2]: [w_warehouse_sk#12, w_warehouse_name#13] (25) Filter [codegen id : 4] -Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] -Condition : isnotnull(w_warehouse_sk#15) +Input [2]: [w_warehouse_sk#12, w_warehouse_name#13] +Condition : isnotnull(w_warehouse_sk#12) (26) BroadcastExchange -Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] +Input [2]: [w_warehouse_sk#12, w_warehouse_name#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (27) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_warehouse_sk#4] -Right keys [1]: [w_warehouse_sk#15] +Right keys [1]: [w_warehouse_sk#12] Join condition: None (28) Project [codegen id : 5] -Output [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#10, cc_name#13, substr(w_warehouse_name#16, 1, 20) AS _groupingexpression#18] -Input [7]: [cs_ship_date_sk#1, cs_warehouse_sk#4, cs_sold_date_sk#5, sm_type#10, cc_name#13, w_warehouse_sk#15, w_warehouse_name#16] +Output [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, substr(w_warehouse_name#13, 1, 20) AS _groupingexpression#14] +Input [7]: [cs_ship_date_sk#1, cs_warehouse_sk#4, cs_sold_date_sk#5, sm_type#9, cc_name#11, w_warehouse_sk#12, w_warehouse_name#13] (29) HashAggregate [codegen id : 5] -Input [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#10, cc_name#13, _groupingexpression#18] -Keys [3]: [_groupingexpression#18, sm_type#10, cc_name#13] +Input [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, sm_type#9, cc_name#11] Functions [5]: [partial_sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] -Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] -Results [8]: [_groupingexpression#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] +Aggregate Attributes [5]: [sum#15, sum#16, sum#17, sum#18, sum#19] +Results [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] (30) Exchange -Input [8]: [_groupingexpression#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] -Arguments: hashpartitioning(_groupingexpression#18, sm_type#10, cc_name#13, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(_groupingexpression#14, sm_type#9, cc_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] (31) HashAggregate [codegen id : 6] -Input [8]: [_groupingexpression#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] -Keys [3]: [_groupingexpression#18, sm_type#10, cc_name#13] +Input [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Keys [3]: [_groupingexpression#14, sm_type#9, cc_name#11] Functions [5]: [sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] -Aggregate Attributes [5]: [sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#30, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#31, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#32, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#33, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#34] -Results [8]: [_groupingexpression#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#30 AS 30 days #36, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#31 AS 31 - 60 days #37, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#32 AS 61 - 90 days #38, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#33 AS 91 - 120 days #39, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#34 AS >120 days #40] +Aggregate Attributes [5]: [sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29] +Results [8]: [_groupingexpression#14 AS substr(w_warehouse_name, 1, 20)#30, sm_type#9, cc_name#11, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25 AS 30 days #31, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26 AS 31 - 60 days #32, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27 AS 61 - 90 days #33, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28 AS 91 - 120 days #34, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29 AS >120 days #35] (32) TakeOrderedAndProject -Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] -Arguments: 100, [substr(w_warehouse_name, 1, 20)#35 ASC NULLS FIRST, sm_type#10 ASC NULLS FIRST, cc_name#13 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] +Input [8]: [substr(w_warehouse_name, 1, 20)#30, sm_type#9, cc_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] +Arguments: 100, [substr(w_warehouse_name, 1, 20)#30 ASC NULLS FIRST, sm_type#9 ASC NULLS FIRST, cc_name#11 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#30, sm_type#9, cc_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt index 1431623539828..2dd94aed3490b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt @@ -63,7 +63,7 @@ Condition : isnotnull(w_warehouse_sk#6) (7) BroadcastExchange Input [2]: [w_warehouse_sk#6, w_warehouse_name#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_warehouse_sk#4] @@ -75,109 +75,109 @@ Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_ Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5, w_warehouse_sk#6, w_warehouse_name#7] (10) Scan parquet default.ship_mode -Output [2]: [sm_ship_mode_sk#9, sm_type#10] +Output [2]: [sm_ship_mode_sk#8, sm_type#9] Batched: true Location [not included in comparison]/{warehouse_dir}/ship_mode] PushedFilters: [IsNotNull(sm_ship_mode_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [2]: [sm_ship_mode_sk#9, sm_type#10] +Input [2]: [sm_ship_mode_sk#8, sm_type#9] (12) Filter [codegen id : 2] -Input [2]: [sm_ship_mode_sk#9, sm_type#10] -Condition : isnotnull(sm_ship_mode_sk#9) +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Condition : isnotnull(sm_ship_mode_sk#8) (13) BroadcastExchange -Input [2]: [sm_ship_mode_sk#9, sm_type#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Input [2]: [sm_ship_mode_sk#8, sm_type#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_ship_mode_sk#3] -Right keys [1]: [sm_ship_mode_sk#9] +Right keys [1]: [sm_ship_mode_sk#8] Join condition: None (15) Project [codegen id : 5] -Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#10] -Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7, sm_ship_mode_sk#9, sm_type#10] +Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9] +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7, sm_ship_mode_sk#8, sm_type#9] (16) Scan parquet default.call_center -Output [2]: [cc_call_center_sk#12, cc_name#13] +Output [2]: [cc_call_center_sk#10, cc_name#11] Batched: true Location [not included in comparison]/{warehouse_dir}/call_center] PushedFilters: [IsNotNull(cc_call_center_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 3] -Input [2]: [cc_call_center_sk#12, cc_name#13] +Input [2]: [cc_call_center_sk#10, cc_name#11] (18) Filter [codegen id : 3] -Input [2]: [cc_call_center_sk#12, cc_name#13] -Condition : isnotnull(cc_call_center_sk#12) +Input [2]: [cc_call_center_sk#10, cc_name#11] +Condition : isnotnull(cc_call_center_sk#10) (19) BroadcastExchange -Input [2]: [cc_call_center_sk#12, cc_name#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Input [2]: [cc_call_center_sk#10, cc_name#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (20) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_call_center_sk#2] -Right keys [1]: [cc_call_center_sk#12] +Right keys [1]: [cc_call_center_sk#10] Join condition: None (21) Project [codegen id : 5] -Output [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#10, cc_name#13] -Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#10, cc_call_center_sk#12, cc_name#13] +Output [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11] +Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_call_center_sk#10, cc_name#11] (22) Scan parquet default.date_dim -Output [2]: [d_date_sk#15, d_month_seq#16] +Output [2]: [d_date_sk#12, d_month_seq#13] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 4] -Input [2]: [d_date_sk#15, d_month_seq#16] +Input [2]: [d_date_sk#12, d_month_seq#13] (24) Filter [codegen id : 4] -Input [2]: [d_date_sk#15, d_month_seq#16] -Condition : (((isnotnull(d_month_seq#16) AND (d_month_seq#16 >= 1200)) AND (d_month_seq#16 <= 1211)) AND isnotnull(d_date_sk#15)) +Input [2]: [d_date_sk#12, d_month_seq#13] +Condition : (((isnotnull(d_month_seq#13) AND (d_month_seq#13 >= 1200)) AND (d_month_seq#13 <= 1211)) AND isnotnull(d_date_sk#12)) (25) Project [codegen id : 4] -Output [1]: [d_date_sk#15] -Input [2]: [d_date_sk#15, d_month_seq#16] +Output [1]: [d_date_sk#12] +Input [2]: [d_date_sk#12, d_month_seq#13] (26) BroadcastExchange -Input [1]: [d_date_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] +Input [1]: [d_date_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (27) BroadcastHashJoin [codegen id : 5] Left keys [1]: [cs_ship_date_sk#1] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#12] Join condition: None (28) Project [codegen id : 5] -Output [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#10, cc_name#13, substr(w_warehouse_name#7, 1, 20) AS _groupingexpression#18] -Input [6]: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#10, cc_name#13, d_date_sk#15] +Output [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, substr(w_warehouse_name#7, 1, 20) AS _groupingexpression#14] +Input [6]: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11, d_date_sk#12] (29) HashAggregate [codegen id : 5] -Input [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#10, cc_name#13, _groupingexpression#18] -Keys [3]: [_groupingexpression#18, sm_type#10, cc_name#13] +Input [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, sm_type#9, cc_name#11, _groupingexpression#14] +Keys [3]: [_groupingexpression#14, sm_type#9, cc_name#11] Functions [5]: [partial_sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] -Aggregate Attributes [5]: [sum#19, sum#20, sum#21, sum#22, sum#23] -Results [8]: [_groupingexpression#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] +Aggregate Attributes [5]: [sum#15, sum#16, sum#17, sum#18, sum#19] +Results [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] (30) Exchange -Input [8]: [_groupingexpression#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] -Arguments: hashpartitioning(_groupingexpression#18, sm_type#10, cc_name#13, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Arguments: hashpartitioning(_groupingexpression#14, sm_type#9, cc_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] (31) HashAggregate [codegen id : 6] -Input [8]: [_groupingexpression#18, sm_type#10, cc_name#13, sum#24, sum#25, sum#26, sum#27, sum#28] -Keys [3]: [_groupingexpression#18, sm_type#10, cc_name#13] +Input [8]: [_groupingexpression#14, sm_type#9, cc_name#11, sum#20, sum#21, sum#22, sum#23, sum#24] +Keys [3]: [_groupingexpression#14, sm_type#9, cc_name#11] Functions [5]: [sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END), sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END), sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)] -Aggregate Attributes [5]: [sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#30, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#31, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#32, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#33, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#34] -Results [8]: [_groupingexpression#18 AS substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#30 AS 30 days #36, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#31 AS 31 - 60 days #37, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#32 AS 61 - 90 days #38, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#33 AS 91 - 120 days #39, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#34 AS >120 days #40] +Aggregate Attributes [5]: [sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29] +Results [8]: [_groupingexpression#14 AS substr(w_warehouse_name, 1, 20)#30, sm_type#9, cc_name#11, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 30) THEN 1 ELSE 0 END)#25 AS 30 days #31, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 30) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 60)) THEN 1 ELSE 0 END)#26 AS 31 - 60 days #32, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 60) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 90)) THEN 1 ELSE 0 END)#27 AS 61 - 90 days #33, sum(CASE WHEN (((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 90) AND ((cs_ship_date_sk#1 - cs_sold_date_sk#5) <= 120)) THEN 1 ELSE 0 END)#28 AS 91 - 120 days #34, sum(CASE WHEN ((cs_ship_date_sk#1 - cs_sold_date_sk#5) > 120) THEN 1 ELSE 0 END)#29 AS >120 days #35] (32) TakeOrderedAndProject -Input [8]: [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] -Arguments: 100, [substr(w_warehouse_name, 1, 20)#35 ASC NULLS FIRST, sm_type#10 ASC NULLS FIRST, cc_name#13 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#35, sm_type#10, cc_name#13, 30 days #36, 31 - 60 days #37, 61 - 90 days #38, 91 - 120 days #39, >120 days #40] +Input [8]: [substr(w_warehouse_name, 1, 20)#30, sm_type#9, cc_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] +Arguments: 100, [substr(w_warehouse_name, 1, 20)#30 ASC NULLS FIRST, sm_type#9 ASC NULLS FIRST, cc_name#11 ASC NULLS FIRST], [substr(w_warehouse_name, 1, 20)#30, sm_type#9, cc_name#11, 30 days #31, 31 - 60 days #32, 61 - 90 days #33, 91 - 120 days #34, >120 days #35] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/explain.txt index fe97109236cf2..96af6cd628dbe 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/explain.txt @@ -62,104 +62,104 @@ Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) (4) Exchange Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] -Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#4] +Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0 (6) Scan parquet default.store_sales -Output [2]: [ss_customer_sk#5, ss_sold_date_sk#6] +Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#6), dynamicpruningexpression(ss_sold_date_sk#6 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ss_sold_date_sk#5), dynamicpruningexpression(ss_sold_date_sk#5 IN dynamicpruning#6)] ReadSchema: struct (7) ColumnarToRow [codegen id : 4] -Input [2]: [ss_customer_sk#5, ss_sold_date_sk#6] +Input [2]: [ss_customer_sk#4, ss_sold_date_sk#5] (8) ReusedExchange [Reuses operator id: 50] -Output [1]: [d_date_sk#8] +Output [1]: [d_date_sk#7] (9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#6] -Right keys [1]: [d_date_sk#8] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#7] Join condition: None (10) Project [codegen id : 4] -Output [1]: [ss_customer_sk#5] -Input [3]: [ss_customer_sk#5, ss_sold_date_sk#6, d_date_sk#8] +Output [1]: [ss_customer_sk#4] +Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#7] (11) Exchange -Input [1]: [ss_customer_sk#5] -Arguments: hashpartitioning(ss_customer_sk#5, 5), ENSURE_REQUIREMENTS, [id=#9] +Input [1]: [ss_customer_sk#4] +Arguments: hashpartitioning(ss_customer_sk#4, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) Sort [codegen id : 5] -Input [1]: [ss_customer_sk#5] -Arguments: [ss_customer_sk#5 ASC NULLS FIRST], false, 0 +Input [1]: [ss_customer_sk#4] +Arguments: [ss_customer_sk#4 ASC NULLS FIRST], false, 0 (13) SortMergeJoin [codegen id : 6] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ss_customer_sk#5] +Right keys [1]: [ss_customer_sk#4] Join condition: None (14) Scan parquet default.web_sales -Output [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] +Output [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#11), dynamicpruningexpression(ws_sold_date_sk#11 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ws_sold_date_sk#9), dynamicpruningexpression(ws_sold_date_sk#9 IN dynamicpruning#6)] ReadSchema: struct (15) ColumnarToRow [codegen id : 8] -Input [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] +Input [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9] (16) ReusedExchange [Reuses operator id: 50] -Output [1]: [d_date_sk#12] +Output [1]: [d_date_sk#10] (17) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_sold_date_sk#11] -Right keys [1]: [d_date_sk#12] +Left keys [1]: [ws_sold_date_sk#9] +Right keys [1]: [d_date_sk#10] Join condition: None (18) Project [codegen id : 8] -Output [1]: [ws_bill_customer_sk#10 AS customer_sk#13] -Input [3]: [ws_bill_customer_sk#10, ws_sold_date_sk#11, d_date_sk#12] +Output [1]: [ws_bill_customer_sk#8 AS customer_sk#11] +Input [3]: [ws_bill_customer_sk#8, ws_sold_date_sk#9, d_date_sk#10] (19) Scan parquet default.catalog_sales -Output [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Output [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#15), dynamicpruningexpression(cs_sold_date_sk#15 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(cs_sold_date_sk#13), dynamicpruningexpression(cs_sold_date_sk#13 IN dynamicpruning#6)] ReadSchema: struct (20) ColumnarToRow [codegen id : 10] -Input [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Input [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13] (21) ReusedExchange [Reuses operator id: 50] -Output [1]: [d_date_sk#16] +Output [1]: [d_date_sk#14] (22) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#15] -Right keys [1]: [d_date_sk#16] +Left keys [1]: [cs_sold_date_sk#13] +Right keys [1]: [d_date_sk#14] Join condition: None (23) Project [codegen id : 10] -Output [1]: [cs_ship_customer_sk#14 AS customer_sk#17] -Input [3]: [cs_ship_customer_sk#14, cs_sold_date_sk#15, d_date_sk#16] +Output [1]: [cs_ship_customer_sk#12 AS customer_sk#15] +Input [3]: [cs_ship_customer_sk#12, cs_sold_date_sk#13, d_date_sk#14] (24) Union (25) Exchange -Input [1]: [customer_sk#13] -Arguments: hashpartitioning(customer_sk#13, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [1]: [customer_sk#11] +Arguments: hashpartitioning(customer_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (26) Sort [codegen id : 11] -Input [1]: [customer_sk#13] -Arguments: [customer_sk#13 ASC NULLS FIRST], false, 0 +Input [1]: [customer_sk#11] +Arguments: [customer_sk#11 ASC NULLS FIRST], false, 0 (27) SortMergeJoin [codegen id : 13] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [customer_sk#13] +Right keys [1]: [customer_sk#11] Join condition: None (28) Project [codegen id : 13] @@ -167,88 +167,88 @@ Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] (29) Scan parquet default.customer_address -Output [2]: [ca_address_sk#19, ca_county#20] +Output [2]: [ca_address_sk#16, ca_county#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_county, [Dona Ana County,Douglas County,Gaines County,Richland County,Walker County]), IsNotNull(ca_address_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 12] -Input [2]: [ca_address_sk#19, ca_county#20] +Input [2]: [ca_address_sk#16, ca_county#17] (31) Filter [codegen id : 12] -Input [2]: [ca_address_sk#19, ca_county#20] -Condition : (ca_county#20 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#19)) +Input [2]: [ca_address_sk#16, ca_county#17] +Condition : (ca_county#17 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#16)) (32) Project [codegen id : 12] -Output [1]: [ca_address_sk#19] -Input [2]: [ca_address_sk#19, ca_county#20] +Output [1]: [ca_address_sk#16] +Input [2]: [ca_address_sk#16, ca_county#17] (33) BroadcastExchange -Input [1]: [ca_address_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] +Input [1]: [ca_address_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (34) BroadcastHashJoin [codegen id : 13] Left keys [1]: [c_current_addr_sk#3] -Right keys [1]: [ca_address_sk#19] +Right keys [1]: [ca_address_sk#16] Join condition: None (35) Project [codegen id : 13] Output [1]: [c_current_cdemo_sk#2] -Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#19] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#16] (36) BroadcastExchange Input [1]: [c_current_cdemo_sk#2] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (37) Scan parquet default.customer_demographics -Output [9]: [cd_demo_sk#23, cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31] +Output [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (38) ColumnarToRow -Input [9]: [cd_demo_sk#23, cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31] +Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] (39) Filter -Input [9]: [cd_demo_sk#23, cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31] -Condition : isnotnull(cd_demo_sk#23) +Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Condition : isnotnull(cd_demo_sk#18) (40) BroadcastHashJoin [codegen id : 14] Left keys [1]: [c_current_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#23] +Right keys [1]: [cd_demo_sk#18] Join condition: None (41) Project [codegen id : 14] -Output [8]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31] -Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#23, cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31] +Output [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] (42) HashAggregate [codegen id : 14] -Input [8]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31] -Keys [8]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31] +Input [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Keys [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#32] -Results [9]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31, count#33] +Aggregate Attributes [1]: [count#27] +Results [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28] (43) Exchange -Input [9]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31, count#33] -Arguments: hashpartitioning(cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28] +Arguments: hashpartitioning(cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, 5), ENSURE_REQUIREMENTS, [plan_id=6] (44) HashAggregate [codegen id : 15] -Input [9]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31, count#33] -Keys [8]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, cd_purchase_estimate#27, cd_credit_rating#28, cd_dep_count#29, cd_dep_employed_count#30, cd_dep_college_count#31] +Input [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28] +Keys [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#35] -Results [14]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, count(1)#35 AS cnt1#36, cd_purchase_estimate#27, count(1)#35 AS cnt2#37, cd_credit_rating#28, count(1)#35 AS cnt3#38, cd_dep_count#29, count(1)#35 AS cnt4#39, cd_dep_employed_count#30, count(1)#35 AS cnt5#40, cd_dep_college_count#31, count(1)#35 AS cnt6#41] +Aggregate Attributes [1]: [count(1)#29] +Results [14]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, count(1)#29 AS cnt1#30, cd_purchase_estimate#22, count(1)#29 AS cnt2#31, cd_credit_rating#23, count(1)#29 AS cnt3#32, cd_dep_count#24, count(1)#29 AS cnt4#33, cd_dep_employed_count#25, count(1)#29 AS cnt5#34, cd_dep_college_count#26, count(1)#29 AS cnt6#35] (45) TakeOrderedAndProject -Input [14]: [cd_gender#24, cd_marital_status#25, cd_education_status#26, cnt1#36, cd_purchase_estimate#27, cnt2#37, cd_credit_rating#28, cnt3#38, cd_dep_count#29, cnt4#39, cd_dep_employed_count#30, cnt5#40, cd_dep_college_count#31, cnt6#41] -Arguments: 100, [cd_gender#24 ASC NULLS FIRST, cd_marital_status#25 ASC NULLS FIRST, cd_education_status#26 ASC NULLS FIRST, cd_purchase_estimate#27 ASC NULLS FIRST, cd_credit_rating#28 ASC NULLS FIRST, cd_dep_count#29 ASC NULLS FIRST, cd_dep_employed_count#30 ASC NULLS FIRST, cd_dep_college_count#31 ASC NULLS FIRST], [cd_gender#24, cd_marital_status#25, cd_education_status#26, cnt1#36, cd_purchase_estimate#27, cnt2#37, cd_credit_rating#28, cnt3#38, cd_dep_count#29, cnt4#39, cd_dep_employed_count#30, cnt5#40, cd_dep_college_count#31, cnt6#41] +Input [14]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cnt1#30, cd_purchase_estimate#22, cnt2#31, cd_credit_rating#23, cnt3#32, cd_dep_count#24, cnt4#33, cd_dep_employed_count#25, cnt5#34, cd_dep_college_count#26, cnt6#35] +Arguments: 100, [cd_gender#19 ASC NULLS FIRST, cd_marital_status#20 ASC NULLS FIRST, cd_education_status#21 ASC NULLS FIRST, cd_purchase_estimate#22 ASC NULLS FIRST, cd_credit_rating#23 ASC NULLS FIRST, cd_dep_count#24 ASC NULLS FIRST, cd_dep_employed_count#25 ASC NULLS FIRST, cd_dep_college_count#26 ASC NULLS FIRST], [cd_gender#19, cd_marital_status#20, cd_education_status#21, cnt1#30, cd_purchase_estimate#22, cnt2#31, cd_credit_rating#23, cnt3#32, cd_dep_count#24, cnt4#33, cd_dep_employed_count#25, cnt5#34, cd_dep_college_count#26, cnt6#35] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7 +Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#5 IN dynamicpruning#6 BroadcastExchange (50) +- * Project (49) +- * Filter (48) @@ -257,29 +257,29 @@ BroadcastExchange (50) (46) Scan parquet default.date_dim -Output [3]: [d_date_sk#8, d_year#42, d_moy#43] +Output [3]: [d_date_sk#7, d_year#36, d_moy#37] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,7), IsNotNull(d_date_sk)] ReadSchema: struct (47) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#42, d_moy#43] +Input [3]: [d_date_sk#7, d_year#36, d_moy#37] (48) Filter [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#42, d_moy#43] -Condition : (((((isnotnull(d_year#42) AND isnotnull(d_moy#43)) AND (d_year#42 = 2002)) AND (d_moy#43 >= 4)) AND (d_moy#43 <= 7)) AND isnotnull(d_date_sk#8)) +Input [3]: [d_date_sk#7, d_year#36, d_moy#37] +Condition : (((((isnotnull(d_year#36) AND isnotnull(d_moy#37)) AND (d_year#36 = 2002)) AND (d_moy#37 >= 4)) AND (d_moy#37 <= 7)) AND isnotnull(d_date_sk#7)) (49) Project [codegen id : 1] -Output [1]: [d_date_sk#8] -Input [3]: [d_date_sk#8, d_year#42, d_moy#43] +Output [1]: [d_date_sk#7] +Input [3]: [d_date_sk#7, d_year#36, d_moy#37] (50) BroadcastExchange -Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#44] +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] -Subquery:2 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#11 IN dynamicpruning#7 +Subquery:2 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#9 IN dynamicpruning#6 -Subquery:3 Hosting operator id = 19 Hosting Expression = cs_sold_date_sk#15 IN dynamicpruning#7 +Subquery:3 Hosting operator id = 19 Hosting Expression = cs_sold_date_sk#13 IN dynamicpruning#6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/explain.txt index 02522cd257d71..8396cdbc7d0fa 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/explain.txt @@ -80,7 +80,7 @@ Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#7] (9) BroadcastExchange Input [1]: [ss_customer_sk#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (10) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#1] @@ -88,58 +88,58 @@ Right keys [1]: [ss_customer_sk#4] Join condition: None (11) Scan parquet default.web_sales -Output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Output [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#10), dynamicpruningexpression(ws_sold_date_sk#10 IN dynamicpruning#6)] +PartitionFilters: [isnotnull(ws_sold_date_sk#9), dynamicpruningexpression(ws_sold_date_sk#9 IN dynamicpruning#6)] ReadSchema: struct (12) ColumnarToRow [codegen id : 4] -Input [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Input [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9] (13) ReusedExchange [Reuses operator id: 46] -Output [1]: [d_date_sk#11] +Output [1]: [d_date_sk#10] (14) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ws_sold_date_sk#10] -Right keys [1]: [d_date_sk#11] +Left keys [1]: [ws_sold_date_sk#9] +Right keys [1]: [d_date_sk#10] Join condition: None (15) Project [codegen id : 4] -Output [1]: [ws_bill_customer_sk#9 AS customer_sk#12] -Input [3]: [ws_bill_customer_sk#9, ws_sold_date_sk#10, d_date_sk#11] +Output [1]: [ws_bill_customer_sk#8 AS customer_sk#11] +Input [3]: [ws_bill_customer_sk#8, ws_sold_date_sk#9, d_date_sk#10] (16) Scan parquet default.catalog_sales -Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Output [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#14), dynamicpruningexpression(cs_sold_date_sk#14 IN dynamicpruning#6)] +PartitionFilters: [isnotnull(cs_sold_date_sk#13), dynamicpruningexpression(cs_sold_date_sk#13 IN dynamicpruning#6)] ReadSchema: struct (17) ColumnarToRow [codegen id : 6] -Input [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Input [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13] (18) ReusedExchange [Reuses operator id: 46] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (19) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_sold_date_sk#14] -Right keys [1]: [d_date_sk#15] +Left keys [1]: [cs_sold_date_sk#13] +Right keys [1]: [d_date_sk#14] Join condition: None (20) Project [codegen id : 6] -Output [1]: [cs_ship_customer_sk#13 AS customer_sk#16] -Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15] +Output [1]: [cs_ship_customer_sk#12 AS customer_sk#15] +Input [3]: [cs_ship_customer_sk#12, cs_sold_date_sk#13, d_date_sk#14] (21) Union (22) BroadcastExchange -Input [1]: [customer_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] +Input [1]: [customer_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (23) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [customer_sk#12] +Right keys [1]: [customer_sk#11] Join condition: None (24) Project [codegen id : 9] @@ -147,84 +147,84 @@ Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] (25) Scan parquet default.customer_address -Output [2]: [ca_address_sk#18, ca_county#19] +Output [2]: [ca_address_sk#16, ca_county#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_county, [Dona Ana County,Douglas County,Gaines County,Richland County,Walker County]), IsNotNull(ca_address_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 7] -Input [2]: [ca_address_sk#18, ca_county#19] +Input [2]: [ca_address_sk#16, ca_county#17] (27) Filter [codegen id : 7] -Input [2]: [ca_address_sk#18, ca_county#19] -Condition : (ca_county#19 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#18)) +Input [2]: [ca_address_sk#16, ca_county#17] +Condition : (ca_county#17 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#16)) (28) Project [codegen id : 7] -Output [1]: [ca_address_sk#18] -Input [2]: [ca_address_sk#18, ca_county#19] +Output [1]: [ca_address_sk#16] +Input [2]: [ca_address_sk#16, ca_county#17] (29) BroadcastExchange -Input [1]: [ca_address_sk#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] +Input [1]: [ca_address_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (30) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_current_addr_sk#3] -Right keys [1]: [ca_address_sk#18] +Right keys [1]: [ca_address_sk#16] Join condition: None (31) Project [codegen id : 9] Output [1]: [c_current_cdemo_sk#2] -Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#18] +Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#16] (32) Scan parquet default.customer_demographics -Output [9]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Output [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (33) ColumnarToRow [codegen id : 8] -Input [9]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] (34) Filter [codegen id : 8] -Input [9]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] -Condition : isnotnull(cd_demo_sk#21) +Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Condition : isnotnull(cd_demo_sk#18) (35) BroadcastExchange -Input [9]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#30] +Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (36) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_current_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#21] +Right keys [1]: [cd_demo_sk#18] Join condition: None (37) Project [codegen id : 9] -Output [8]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] -Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Output [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] (38) HashAggregate [codegen id : 9] -Input [8]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] -Keys [8]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Input [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Keys [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#31] -Results [9]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, count#32] +Aggregate Attributes [1]: [count#27] +Results [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28] (39) Exchange -Input [9]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, count#32] -Arguments: hashpartitioning(cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, 5), ENSURE_REQUIREMENTS, [id=#33] +Input [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28] +Arguments: hashpartitioning(cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, 5), ENSURE_REQUIREMENTS, [plan_id=5] (40) HashAggregate [codegen id : 10] -Input [9]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, count#32] -Keys [8]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Input [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28] +Keys [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#34] -Results [14]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, count(1)#34 AS cnt1#35, cd_purchase_estimate#25, count(1)#34 AS cnt2#36, cd_credit_rating#26, count(1)#34 AS cnt3#37, cd_dep_count#27, count(1)#34 AS cnt4#38, cd_dep_employed_count#28, count(1)#34 AS cnt5#39, cd_dep_college_count#29, count(1)#34 AS cnt6#40] +Aggregate Attributes [1]: [count(1)#29] +Results [14]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, count(1)#29 AS cnt1#30, cd_purchase_estimate#22, count(1)#29 AS cnt2#31, cd_credit_rating#23, count(1)#29 AS cnt3#32, cd_dep_count#24, count(1)#29 AS cnt4#33, cd_dep_employed_count#25, count(1)#29 AS cnt5#34, cd_dep_college_count#26, count(1)#29 AS cnt6#35] (41) TakeOrderedAndProject -Input [14]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cnt1#35, cd_purchase_estimate#25, cnt2#36, cd_credit_rating#26, cnt3#37, cd_dep_count#27, cnt4#38, cd_dep_employed_count#28, cnt5#39, cd_dep_college_count#29, cnt6#40] -Arguments: 100, [cd_gender#22 ASC NULLS FIRST, cd_marital_status#23 ASC NULLS FIRST, cd_education_status#24 ASC NULLS FIRST, cd_purchase_estimate#25 ASC NULLS FIRST, cd_credit_rating#26 ASC NULLS FIRST, cd_dep_count#27 ASC NULLS FIRST, cd_dep_employed_count#28 ASC NULLS FIRST, cd_dep_college_count#29 ASC NULLS FIRST], [cd_gender#22, cd_marital_status#23, cd_education_status#24, cnt1#35, cd_purchase_estimate#25, cnt2#36, cd_credit_rating#26, cnt3#37, cd_dep_count#27, cnt4#38, cd_dep_employed_count#28, cnt5#39, cd_dep_college_count#29, cnt6#40] +Input [14]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cnt1#30, cd_purchase_estimate#22, cnt2#31, cd_credit_rating#23, cnt3#32, cd_dep_count#24, cnt4#33, cd_dep_employed_count#25, cnt5#34, cd_dep_college_count#26, cnt6#35] +Arguments: 100, [cd_gender#19 ASC NULLS FIRST, cd_marital_status#20 ASC NULLS FIRST, cd_education_status#21 ASC NULLS FIRST, cd_purchase_estimate#22 ASC NULLS FIRST, cd_credit_rating#23 ASC NULLS FIRST, cd_dep_count#24 ASC NULLS FIRST, cd_dep_employed_count#25 ASC NULLS FIRST, cd_dep_college_count#26 ASC NULLS FIRST], [cd_gender#19, cd_marital_status#20, cd_education_status#21, cnt1#30, cd_purchase_estimate#22, cnt2#31, cd_credit_rating#23, cnt3#32, cd_dep_count#24, cnt4#33, cd_dep_employed_count#25, cnt5#34, cd_dep_college_count#26, cnt6#35] ===== Subqueries ===== @@ -237,29 +237,29 @@ BroadcastExchange (46) (42) Scan parquet default.date_dim -Output [3]: [d_date_sk#7, d_year#41, d_moy#42] +Output [3]: [d_date_sk#7, d_year#36, d_moy#37] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,7), IsNotNull(d_date_sk)] ReadSchema: struct (43) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#41, d_moy#42] +Input [3]: [d_date_sk#7, d_year#36, d_moy#37] (44) Filter [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#41, d_moy#42] -Condition : (((((isnotnull(d_year#41) AND isnotnull(d_moy#42)) AND (d_year#41 = 2002)) AND (d_moy#42 >= 4)) AND (d_moy#42 <= 7)) AND isnotnull(d_date_sk#7)) +Input [3]: [d_date_sk#7, d_year#36, d_moy#37] +Condition : (((((isnotnull(d_year#36) AND isnotnull(d_moy#37)) AND (d_year#36 = 2002)) AND (d_moy#37 >= 4)) AND (d_moy#37 <= 7)) AND isnotnull(d_date_sk#7)) (45) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [3]: [d_date_sk#7, d_year#41, d_moy#42] +Input [3]: [d_date_sk#7, d_year#36, d_moy#37] (46) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#43] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] -Subquery:2 Hosting operator id = 11 Hosting Expression = ws_sold_date_sk#10 IN dynamicpruning#6 +Subquery:2 Hosting operator id = 11 Hosting Expression = ws_sold_date_sk#9 IN dynamicpruning#6 -Subquery:3 Hosting operator id = 16 Hosting Expression = cs_sold_date_sk#14 IN dynamicpruning#6 +Subquery:3 Hosting operator id = 16 Hosting Expression = cs_sold_date_sk#13 IN dynamicpruning#6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/explain.txt index 7591e3bdb30c7..eef33ed4a9731 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/explain.txt @@ -109,334 +109,334 @@ Input [6]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, ss_sol (7) Exchange Input [4]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7] -Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#8] +Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (8) Sort [codegen id : 3] Input [4]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7] Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0 (9) Scan parquet default.customer -Output [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Output [8]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (10) ColumnarToRow [codegen id : 4] -Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Input [8]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15] (11) Filter [codegen id : 4] -Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] -Condition : (isnotnull(c_customer_sk#9) AND isnotnull(c_customer_id#10)) +Input [8]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15] +Condition : (isnotnull(c_customer_sk#8) AND isnotnull(c_customer_id#9)) (12) Exchange -Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] -Arguments: hashpartitioning(c_customer_sk#9, 5), ENSURE_REQUIREMENTS, [id=#17] +Input [8]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15] +Arguments: hashpartitioning(c_customer_sk#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] (13) Sort [codegen id : 5] -Input [8]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] -Arguments: [c_customer_sk#9 ASC NULLS FIRST], false, 0 +Input [8]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15] +Arguments: [c_customer_sk#8 ASC NULLS FIRST], false, 0 (14) SortMergeJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#9] +Right keys [1]: [c_customer_sk#8] Join condition: None (15) Project [codegen id : 6] -Output [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7] -Input [12]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7, c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Output [10]: [c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7] +Input [12]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7, c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15] (16) HashAggregate [codegen id : 6] -Input [10]: [c_customer_id#10, c_first_name#11, c_last_name#12, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7] -Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Input [10]: [c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7] +Keys [8]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15] Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum#18] -Results [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#19] +Aggregate Attributes [1]: [sum#16] +Results [9]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, sum#17] (17) Exchange -Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#19] -Arguments: hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [9]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, sum#17] +Arguments: hashpartitioning(c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) HashAggregate [codegen id : 7] -Input [9]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16, sum#19] -Keys [8]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#7, c_preferred_cust_flag#13, c_birth_country#14, c_login#15, c_email_address#16] +Input [9]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, sum#17] +Keys [8]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15] Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))#21] -Results [2]: [c_customer_id#10 AS customer_id#22, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))#21,18,2) AS year_total#23] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))#18] +Results [2]: [c_customer_id#9 AS customer_id#19, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))#18,18,2) AS year_total#20] (19) Filter [codegen id : 7] -Input [2]: [customer_id#22, year_total#23] -Condition : (isnotnull(year_total#23) AND (year_total#23 > 0.00)) +Input [2]: [customer_id#19, year_total#20] +Condition : (isnotnull(year_total#20) AND (year_total#20 > 0.00)) (20) Exchange -Input [2]: [customer_id#22, year_total#23] -Arguments: hashpartitioning(customer_id#22, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [2]: [customer_id#19, year_total#20] +Arguments: hashpartitioning(customer_id#19, 5), ENSURE_REQUIREMENTS, [plan_id=4] (21) Sort [codegen id : 8] -Input [2]: [customer_id#22, year_total#23] -Arguments: [customer_id#22 ASC NULLS FIRST], false, 0 +Input [2]: [customer_id#19, year_total#20] +Arguments: [customer_id#19 ASC NULLS FIRST], false, 0 (22) Scan parquet default.store_sales -Output [4]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, ss_sold_date_sk#28] +Output [4]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, ss_sold_date_sk#24] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#28), dynamicpruningexpression(ss_sold_date_sk#28 IN dynamicpruning#29)] +PartitionFilters: [isnotnull(ss_sold_date_sk#24), dynamicpruningexpression(ss_sold_date_sk#24 IN dynamicpruning#25)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 10] -Input [4]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, ss_sold_date_sk#28] +Input [4]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, ss_sold_date_sk#24] (24) Filter [codegen id : 10] -Input [4]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, ss_sold_date_sk#28] -Condition : isnotnull(ss_customer_sk#25) +Input [4]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, ss_sold_date_sk#24] +Condition : isnotnull(ss_customer_sk#21) (25) ReusedExchange [Reuses operator id: 87] -Output [2]: [d_date_sk#30, d_year#31] +Output [2]: [d_date_sk#26, d_year#27] (26) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_sold_date_sk#28] -Right keys [1]: [d_date_sk#30] +Left keys [1]: [ss_sold_date_sk#24] +Right keys [1]: [d_date_sk#26] Join condition: None (27) Project [codegen id : 10] -Output [4]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, d_year#31] -Input [6]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, ss_sold_date_sk#28, d_date_sk#30, d_year#31] +Output [4]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, d_year#27] +Input [6]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, ss_sold_date_sk#24, d_date_sk#26, d_year#27] (28) Exchange -Input [4]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, d_year#31] -Arguments: hashpartitioning(ss_customer_sk#25, 5), ENSURE_REQUIREMENTS, [id=#32] +Input [4]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, d_year#27] +Arguments: hashpartitioning(ss_customer_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=5] (29) Sort [codegen id : 11] -Input [4]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, d_year#31] -Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0 +Input [4]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, d_year#27] +Arguments: [ss_customer_sk#21 ASC NULLS FIRST], false, 0 (30) ReusedExchange [Reuses operator id: 12] -Output [8]: [c_customer_sk#33, c_customer_id#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40] +Output [8]: [c_customer_sk#28, c_customer_id#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35] (31) Sort [codegen id : 13] -Input [8]: [c_customer_sk#33, c_customer_id#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40] -Arguments: [c_customer_sk#33 ASC NULLS FIRST], false, 0 +Input [8]: [c_customer_sk#28, c_customer_id#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35] +Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0 (32) SortMergeJoin [codegen id : 14] -Left keys [1]: [ss_customer_sk#25] -Right keys [1]: [c_customer_sk#33] +Left keys [1]: [ss_customer_sk#21] +Right keys [1]: [c_customer_sk#28] Join condition: None (33) Project [codegen id : 14] -Output [10]: [c_customer_id#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, ss_ext_discount_amt#26, ss_ext_list_price#27, d_year#31] -Input [12]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_list_price#27, d_year#31, c_customer_sk#33, c_customer_id#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40] +Output [10]: [c_customer_id#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, ss_ext_discount_amt#22, ss_ext_list_price#23, d_year#27] +Input [12]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, d_year#27, c_customer_sk#28, c_customer_id#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35] (34) HashAggregate [codegen id : 14] -Input [10]: [c_customer_id#34, c_first_name#35, c_last_name#36, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, ss_ext_discount_amt#26, ss_ext_list_price#27, d_year#31] -Keys [8]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40] -Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum#41] -Results [9]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, sum#42] +Input [10]: [c_customer_id#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, ss_ext_discount_amt#22, ss_ext_list_price#23, d_year#27] +Keys [8]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#23 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#22 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum#36] +Results [9]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, sum#37] (35) Exchange -Input [9]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, sum#42] -Arguments: hashpartitioning(c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, 5), ENSURE_REQUIREMENTS, [id=#43] +Input [9]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, sum#37] +Arguments: hashpartitioning(c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, 5), ENSURE_REQUIREMENTS, [plan_id=6] (36) HashAggregate [codegen id : 15] -Input [9]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40, sum#42] -Keys [8]: [c_customer_id#34, c_first_name#35, c_last_name#36, d_year#31, c_preferred_cust_flag#37, c_birth_country#38, c_login#39, c_email_address#40] -Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2))))#21] -Results [5]: [c_customer_id#34 AS customer_id#44, c_first_name#35 AS customer_first_name#45, c_last_name#36 AS customer_last_name#46, c_email_address#40 AS customer_email_address#47, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#27 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(8,2)))), DecimalType(8,2))))#21,18,2) AS year_total#48] +Input [9]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, sum#37] +Keys [8]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#23 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#22 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#23 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#22 as decimal(8,2)))), DecimalType(8,2))))#18] +Results [5]: [c_customer_id#29 AS customer_id#38, c_first_name#30 AS customer_first_name#39, c_last_name#31 AS customer_last_name#40, c_email_address#35 AS customer_email_address#41, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#23 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#22 as decimal(8,2)))), DecimalType(8,2))))#18,18,2) AS year_total#42] (37) Exchange -Input [5]: [customer_id#44, customer_first_name#45, customer_last_name#46, customer_email_address#47, year_total#48] -Arguments: hashpartitioning(customer_id#44, 5), ENSURE_REQUIREMENTS, [id=#49] +Input [5]: [customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41, year_total#42] +Arguments: hashpartitioning(customer_id#38, 5), ENSURE_REQUIREMENTS, [plan_id=7] (38) Sort [codegen id : 16] -Input [5]: [customer_id#44, customer_first_name#45, customer_last_name#46, customer_email_address#47, year_total#48] -Arguments: [customer_id#44 ASC NULLS FIRST], false, 0 +Input [5]: [customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41, year_total#42] +Arguments: [customer_id#38 ASC NULLS FIRST], false, 0 (39) SortMergeJoin [codegen id : 17] -Left keys [1]: [customer_id#22] -Right keys [1]: [customer_id#44] +Left keys [1]: [customer_id#19] +Right keys [1]: [customer_id#38] Join condition: None (40) Scan parquet default.web_sales -Output [4]: [ws_bill_customer_sk#50, ws_ext_discount_amt#51, ws_ext_list_price#52, ws_sold_date_sk#53] +Output [4]: [ws_bill_customer_sk#43, ws_ext_discount_amt#44, ws_ext_list_price#45, ws_sold_date_sk#46] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#53), dynamicpruningexpression(ws_sold_date_sk#53 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#46), dynamicpruningexpression(ws_sold_date_sk#46 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (41) ColumnarToRow [codegen id : 19] -Input [4]: [ws_bill_customer_sk#50, ws_ext_discount_amt#51, ws_ext_list_price#52, ws_sold_date_sk#53] +Input [4]: [ws_bill_customer_sk#43, ws_ext_discount_amt#44, ws_ext_list_price#45, ws_sold_date_sk#46] (42) Filter [codegen id : 19] -Input [4]: [ws_bill_customer_sk#50, ws_ext_discount_amt#51, ws_ext_list_price#52, ws_sold_date_sk#53] -Condition : isnotnull(ws_bill_customer_sk#50) +Input [4]: [ws_bill_customer_sk#43, ws_ext_discount_amt#44, ws_ext_list_price#45, ws_sold_date_sk#46] +Condition : isnotnull(ws_bill_customer_sk#43) (43) ReusedExchange [Reuses operator id: 83] -Output [2]: [d_date_sk#54, d_year#55] +Output [2]: [d_date_sk#47, d_year#48] (44) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [ws_sold_date_sk#53] -Right keys [1]: [d_date_sk#54] +Left keys [1]: [ws_sold_date_sk#46] +Right keys [1]: [d_date_sk#47] Join condition: None (45) Project [codegen id : 19] -Output [4]: [ws_bill_customer_sk#50, ws_ext_discount_amt#51, ws_ext_list_price#52, d_year#55] -Input [6]: [ws_bill_customer_sk#50, ws_ext_discount_amt#51, ws_ext_list_price#52, ws_sold_date_sk#53, d_date_sk#54, d_year#55] +Output [4]: [ws_bill_customer_sk#43, ws_ext_discount_amt#44, ws_ext_list_price#45, d_year#48] +Input [6]: [ws_bill_customer_sk#43, ws_ext_discount_amt#44, ws_ext_list_price#45, ws_sold_date_sk#46, d_date_sk#47, d_year#48] (46) Exchange -Input [4]: [ws_bill_customer_sk#50, ws_ext_discount_amt#51, ws_ext_list_price#52, d_year#55] -Arguments: hashpartitioning(ws_bill_customer_sk#50, 5), ENSURE_REQUIREMENTS, [id=#56] +Input [4]: [ws_bill_customer_sk#43, ws_ext_discount_amt#44, ws_ext_list_price#45, d_year#48] +Arguments: hashpartitioning(ws_bill_customer_sk#43, 5), ENSURE_REQUIREMENTS, [plan_id=8] (47) Sort [codegen id : 20] -Input [4]: [ws_bill_customer_sk#50, ws_ext_discount_amt#51, ws_ext_list_price#52, d_year#55] -Arguments: [ws_bill_customer_sk#50 ASC NULLS FIRST], false, 0 +Input [4]: [ws_bill_customer_sk#43, ws_ext_discount_amt#44, ws_ext_list_price#45, d_year#48] +Arguments: [ws_bill_customer_sk#43 ASC NULLS FIRST], false, 0 (48) ReusedExchange [Reuses operator id: 12] -Output [8]: [c_customer_sk#57, c_customer_id#58, c_first_name#59, c_last_name#60, c_preferred_cust_flag#61, c_birth_country#62, c_login#63, c_email_address#64] +Output [8]: [c_customer_sk#49, c_customer_id#50, c_first_name#51, c_last_name#52, c_preferred_cust_flag#53, c_birth_country#54, c_login#55, c_email_address#56] (49) Sort [codegen id : 22] -Input [8]: [c_customer_sk#57, c_customer_id#58, c_first_name#59, c_last_name#60, c_preferred_cust_flag#61, c_birth_country#62, c_login#63, c_email_address#64] -Arguments: [c_customer_sk#57 ASC NULLS FIRST], false, 0 +Input [8]: [c_customer_sk#49, c_customer_id#50, c_first_name#51, c_last_name#52, c_preferred_cust_flag#53, c_birth_country#54, c_login#55, c_email_address#56] +Arguments: [c_customer_sk#49 ASC NULLS FIRST], false, 0 (50) SortMergeJoin [codegen id : 23] -Left keys [1]: [ws_bill_customer_sk#50] -Right keys [1]: [c_customer_sk#57] +Left keys [1]: [ws_bill_customer_sk#43] +Right keys [1]: [c_customer_sk#49] Join condition: None (51) Project [codegen id : 23] -Output [10]: [c_customer_id#58, c_first_name#59, c_last_name#60, c_preferred_cust_flag#61, c_birth_country#62, c_login#63, c_email_address#64, ws_ext_discount_amt#51, ws_ext_list_price#52, d_year#55] -Input [12]: [ws_bill_customer_sk#50, ws_ext_discount_amt#51, ws_ext_list_price#52, d_year#55, c_customer_sk#57, c_customer_id#58, c_first_name#59, c_last_name#60, c_preferred_cust_flag#61, c_birth_country#62, c_login#63, c_email_address#64] +Output [10]: [c_customer_id#50, c_first_name#51, c_last_name#52, c_preferred_cust_flag#53, c_birth_country#54, c_login#55, c_email_address#56, ws_ext_discount_amt#44, ws_ext_list_price#45, d_year#48] +Input [12]: [ws_bill_customer_sk#43, ws_ext_discount_amt#44, ws_ext_list_price#45, d_year#48, c_customer_sk#49, c_customer_id#50, c_first_name#51, c_last_name#52, c_preferred_cust_flag#53, c_birth_country#54, c_login#55, c_email_address#56] (52) HashAggregate [codegen id : 23] -Input [10]: [c_customer_id#58, c_first_name#59, c_last_name#60, c_preferred_cust_flag#61, c_birth_country#62, c_login#63, c_email_address#64, ws_ext_discount_amt#51, ws_ext_list_price#52, d_year#55] -Keys [8]: [c_customer_id#58, c_first_name#59, c_last_name#60, c_preferred_cust_flag#61, c_birth_country#62, c_login#63, c_email_address#64, d_year#55] -Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#52 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#51 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum#65] -Results [9]: [c_customer_id#58, c_first_name#59, c_last_name#60, c_preferred_cust_flag#61, c_birth_country#62, c_login#63, c_email_address#64, d_year#55, sum#66] +Input [10]: [c_customer_id#50, c_first_name#51, c_last_name#52, c_preferred_cust_flag#53, c_birth_country#54, c_login#55, c_email_address#56, ws_ext_discount_amt#44, ws_ext_list_price#45, d_year#48] +Keys [8]: [c_customer_id#50, c_first_name#51, c_last_name#52, c_preferred_cust_flag#53, c_birth_country#54, c_login#55, c_email_address#56, d_year#48] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#45 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#44 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum#57] +Results [9]: [c_customer_id#50, c_first_name#51, c_last_name#52, c_preferred_cust_flag#53, c_birth_country#54, c_login#55, c_email_address#56, d_year#48, sum#58] (53) Exchange -Input [9]: [c_customer_id#58, c_first_name#59, c_last_name#60, c_preferred_cust_flag#61, c_birth_country#62, c_login#63, c_email_address#64, d_year#55, sum#66] -Arguments: hashpartitioning(c_customer_id#58, c_first_name#59, c_last_name#60, c_preferred_cust_flag#61, c_birth_country#62, c_login#63, c_email_address#64, d_year#55, 5), ENSURE_REQUIREMENTS, [id=#67] +Input [9]: [c_customer_id#50, c_first_name#51, c_last_name#52, c_preferred_cust_flag#53, c_birth_country#54, c_login#55, c_email_address#56, d_year#48, sum#58] +Arguments: hashpartitioning(c_customer_id#50, c_first_name#51, c_last_name#52, c_preferred_cust_flag#53, c_birth_country#54, c_login#55, c_email_address#56, d_year#48, 5), ENSURE_REQUIREMENTS, [plan_id=9] (54) HashAggregate [codegen id : 24] -Input [9]: [c_customer_id#58, c_first_name#59, c_last_name#60, c_preferred_cust_flag#61, c_birth_country#62, c_login#63, c_email_address#64, d_year#55, sum#66] -Keys [8]: [c_customer_id#58, c_first_name#59, c_last_name#60, c_preferred_cust_flag#61, c_birth_country#62, c_login#63, c_email_address#64, d_year#55] -Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#52 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#51 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#52 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#51 as decimal(8,2)))), DecimalType(8,2))))#68] -Results [2]: [c_customer_id#58 AS customer_id#69, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#52 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#51 as decimal(8,2)))), DecimalType(8,2))))#68,18,2) AS year_total#70] +Input [9]: [c_customer_id#50, c_first_name#51, c_last_name#52, c_preferred_cust_flag#53, c_birth_country#54, c_login#55, c_email_address#56, d_year#48, sum#58] +Keys [8]: [c_customer_id#50, c_first_name#51, c_last_name#52, c_preferred_cust_flag#53, c_birth_country#54, c_login#55, c_email_address#56, d_year#48] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#45 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#44 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#45 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#44 as decimal(8,2)))), DecimalType(8,2))))#59] +Results [2]: [c_customer_id#50 AS customer_id#60, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#45 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#44 as decimal(8,2)))), DecimalType(8,2))))#59,18,2) AS year_total#61] (55) Filter [codegen id : 24] -Input [2]: [customer_id#69, year_total#70] -Condition : (isnotnull(year_total#70) AND (year_total#70 > 0.00)) +Input [2]: [customer_id#60, year_total#61] +Condition : (isnotnull(year_total#61) AND (year_total#61 > 0.00)) (56) Exchange -Input [2]: [customer_id#69, year_total#70] -Arguments: hashpartitioning(customer_id#69, 5), ENSURE_REQUIREMENTS, [id=#71] +Input [2]: [customer_id#60, year_total#61] +Arguments: hashpartitioning(customer_id#60, 5), ENSURE_REQUIREMENTS, [plan_id=10] (57) Sort [codegen id : 25] -Input [2]: [customer_id#69, year_total#70] -Arguments: [customer_id#69 ASC NULLS FIRST], false, 0 +Input [2]: [customer_id#60, year_total#61] +Arguments: [customer_id#60 ASC NULLS FIRST], false, 0 (58) SortMergeJoin [codegen id : 26] -Left keys [1]: [customer_id#22] -Right keys [1]: [customer_id#69] +Left keys [1]: [customer_id#19] +Right keys [1]: [customer_id#60] Join condition: None (59) Project [codegen id : 26] -Output [8]: [customer_id#22, year_total#23, customer_id#44, customer_first_name#45, customer_last_name#46, customer_email_address#47, year_total#48, year_total#70] -Input [9]: [customer_id#22, year_total#23, customer_id#44, customer_first_name#45, customer_last_name#46, customer_email_address#47, year_total#48, customer_id#69, year_total#70] +Output [8]: [customer_id#19, year_total#20, customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41, year_total#42, year_total#61] +Input [9]: [customer_id#19, year_total#20, customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41, year_total#42, customer_id#60, year_total#61] (60) Scan parquet default.web_sales -Output [4]: [ws_bill_customer_sk#72, ws_ext_discount_amt#73, ws_ext_list_price#74, ws_sold_date_sk#75] +Output [4]: [ws_bill_customer_sk#62, ws_ext_discount_amt#63, ws_ext_list_price#64, ws_sold_date_sk#65] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#75), dynamicpruningexpression(ws_sold_date_sk#75 IN dynamicpruning#29)] +PartitionFilters: [isnotnull(ws_sold_date_sk#65), dynamicpruningexpression(ws_sold_date_sk#65 IN dynamicpruning#25)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (61) ColumnarToRow [codegen id : 28] -Input [4]: [ws_bill_customer_sk#72, ws_ext_discount_amt#73, ws_ext_list_price#74, ws_sold_date_sk#75] +Input [4]: [ws_bill_customer_sk#62, ws_ext_discount_amt#63, ws_ext_list_price#64, ws_sold_date_sk#65] (62) Filter [codegen id : 28] -Input [4]: [ws_bill_customer_sk#72, ws_ext_discount_amt#73, ws_ext_list_price#74, ws_sold_date_sk#75] -Condition : isnotnull(ws_bill_customer_sk#72) +Input [4]: [ws_bill_customer_sk#62, ws_ext_discount_amt#63, ws_ext_list_price#64, ws_sold_date_sk#65] +Condition : isnotnull(ws_bill_customer_sk#62) (63) ReusedExchange [Reuses operator id: 87] -Output [2]: [d_date_sk#76, d_year#77] +Output [2]: [d_date_sk#66, d_year#67] (64) BroadcastHashJoin [codegen id : 28] -Left keys [1]: [ws_sold_date_sk#75] -Right keys [1]: [d_date_sk#76] +Left keys [1]: [ws_sold_date_sk#65] +Right keys [1]: [d_date_sk#66] Join condition: None (65) Project [codegen id : 28] -Output [4]: [ws_bill_customer_sk#72, ws_ext_discount_amt#73, ws_ext_list_price#74, d_year#77] -Input [6]: [ws_bill_customer_sk#72, ws_ext_discount_amt#73, ws_ext_list_price#74, ws_sold_date_sk#75, d_date_sk#76, d_year#77] +Output [4]: [ws_bill_customer_sk#62, ws_ext_discount_amt#63, ws_ext_list_price#64, d_year#67] +Input [6]: [ws_bill_customer_sk#62, ws_ext_discount_amt#63, ws_ext_list_price#64, ws_sold_date_sk#65, d_date_sk#66, d_year#67] (66) Exchange -Input [4]: [ws_bill_customer_sk#72, ws_ext_discount_amt#73, ws_ext_list_price#74, d_year#77] -Arguments: hashpartitioning(ws_bill_customer_sk#72, 5), ENSURE_REQUIREMENTS, [id=#78] +Input [4]: [ws_bill_customer_sk#62, ws_ext_discount_amt#63, ws_ext_list_price#64, d_year#67] +Arguments: hashpartitioning(ws_bill_customer_sk#62, 5), ENSURE_REQUIREMENTS, [plan_id=11] (67) Sort [codegen id : 29] -Input [4]: [ws_bill_customer_sk#72, ws_ext_discount_amt#73, ws_ext_list_price#74, d_year#77] -Arguments: [ws_bill_customer_sk#72 ASC NULLS FIRST], false, 0 +Input [4]: [ws_bill_customer_sk#62, ws_ext_discount_amt#63, ws_ext_list_price#64, d_year#67] +Arguments: [ws_bill_customer_sk#62 ASC NULLS FIRST], false, 0 (68) ReusedExchange [Reuses operator id: 12] -Output [8]: [c_customer_sk#79, c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86] +Output [8]: [c_customer_sk#68, c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75] (69) Sort [codegen id : 31] -Input [8]: [c_customer_sk#79, c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86] -Arguments: [c_customer_sk#79 ASC NULLS FIRST], false, 0 +Input [8]: [c_customer_sk#68, c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75] +Arguments: [c_customer_sk#68 ASC NULLS FIRST], false, 0 (70) SortMergeJoin [codegen id : 32] -Left keys [1]: [ws_bill_customer_sk#72] -Right keys [1]: [c_customer_sk#79] +Left keys [1]: [ws_bill_customer_sk#62] +Right keys [1]: [c_customer_sk#68] Join condition: None (71) Project [codegen id : 32] -Output [10]: [c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86, ws_ext_discount_amt#73, ws_ext_list_price#74, d_year#77] -Input [12]: [ws_bill_customer_sk#72, ws_ext_discount_amt#73, ws_ext_list_price#74, d_year#77, c_customer_sk#79, c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86] +Output [10]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, ws_ext_discount_amt#63, ws_ext_list_price#64, d_year#67] +Input [12]: [ws_bill_customer_sk#62, ws_ext_discount_amt#63, ws_ext_list_price#64, d_year#67, c_customer_sk#68, c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75] (72) HashAggregate [codegen id : 32] -Input [10]: [c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86, ws_ext_discount_amt#73, ws_ext_list_price#74, d_year#77] -Keys [8]: [c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86, d_year#77] -Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#74 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#73 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum#87] -Results [9]: [c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86, d_year#77, sum#88] +Input [10]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, ws_ext_discount_amt#63, ws_ext_list_price#64, d_year#67] +Keys [8]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#67] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#64 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#63 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum#76] +Results [9]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#67, sum#77] (73) Exchange -Input [9]: [c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86, d_year#77, sum#88] -Arguments: hashpartitioning(c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86, d_year#77, 5), ENSURE_REQUIREMENTS, [id=#89] +Input [9]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#67, sum#77] +Arguments: hashpartitioning(c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#67, 5), ENSURE_REQUIREMENTS, [plan_id=12] (74) HashAggregate [codegen id : 33] -Input [9]: [c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86, d_year#77, sum#88] -Keys [8]: [c_customer_id#80, c_first_name#81, c_last_name#82, c_preferred_cust_flag#83, c_birth_country#84, c_login#85, c_email_address#86, d_year#77] -Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#74 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#73 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#74 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#73 as decimal(8,2)))), DecimalType(8,2))))#68] -Results [2]: [c_customer_id#80 AS customer_id#90, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#74 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#73 as decimal(8,2)))), DecimalType(8,2))))#68,18,2) AS year_total#91] +Input [9]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#67, sum#77] +Keys [8]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#67] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#64 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#63 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#64 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#63 as decimal(8,2)))), DecimalType(8,2))))#59] +Results [2]: [c_customer_id#69 AS customer_id#78, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#64 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#63 as decimal(8,2)))), DecimalType(8,2))))#59,18,2) AS year_total#79] (75) Exchange -Input [2]: [customer_id#90, year_total#91] -Arguments: hashpartitioning(customer_id#90, 5), ENSURE_REQUIREMENTS, [id=#92] +Input [2]: [customer_id#78, year_total#79] +Arguments: hashpartitioning(customer_id#78, 5), ENSURE_REQUIREMENTS, [plan_id=13] (76) Sort [codegen id : 34] -Input [2]: [customer_id#90, year_total#91] -Arguments: [customer_id#90 ASC NULLS FIRST], false, 0 +Input [2]: [customer_id#78, year_total#79] +Arguments: [customer_id#78 ASC NULLS FIRST], false, 0 (77) SortMergeJoin [codegen id : 35] -Left keys [1]: [customer_id#22] -Right keys [1]: [customer_id#90] -Join condition: (CASE WHEN (year_total#70 > 0.00) THEN CheckOverflow((promote_precision(year_total#91) / promote_precision(year_total#70)), DecimalType(38,20)) ELSE 0E-20 END > CASE WHEN (year_total#23 > 0.00) THEN CheckOverflow((promote_precision(year_total#48) / promote_precision(year_total#23)), DecimalType(38,20)) ELSE 0E-20 END) +Left keys [1]: [customer_id#19] +Right keys [1]: [customer_id#78] +Join condition: (CASE WHEN (year_total#61 > 0.00) THEN CheckOverflow((promote_precision(year_total#79) / promote_precision(year_total#61)), DecimalType(38,20)) ELSE 0E-20 END > CASE WHEN (year_total#20 > 0.00) THEN CheckOverflow((promote_precision(year_total#42) / promote_precision(year_total#20)), DecimalType(38,20)) ELSE 0E-20 END) (78) Project [codegen id : 35] -Output [4]: [customer_id#44, customer_first_name#45, customer_last_name#46, customer_email_address#47] -Input [10]: [customer_id#22, year_total#23, customer_id#44, customer_first_name#45, customer_last_name#46, customer_email_address#47, year_total#48, year_total#70, customer_id#90, year_total#91] +Output [4]: [customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41] +Input [10]: [customer_id#19, year_total#20, customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41, year_total#42, year_total#61, customer_id#78, year_total#79] (79) TakeOrderedAndProject -Input [4]: [customer_id#44, customer_first_name#45, customer_last_name#46, customer_email_address#47] -Arguments: 100, [customer_id#44 ASC NULLS FIRST, customer_first_name#45 ASC NULLS FIRST, customer_last_name#46 ASC NULLS FIRST, customer_email_address#47 ASC NULLS FIRST], [customer_id#44, customer_first_name#45, customer_last_name#46, customer_email_address#47] +Input [4]: [customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41] +Arguments: 100, [customer_id#38 ASC NULLS FIRST, customer_first_name#39 ASC NULLS FIRST, customer_last_name#40 ASC NULLS FIRST, customer_email_address#41 ASC NULLS FIRST], [customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41] ===== Subqueries ===== @@ -463,9 +463,9 @@ Condition : ((isnotnull(d_year#7) AND (d_year#7 = 2001)) AND isnotnull(d_date_sk (83) BroadcastExchange Input [2]: [d_date_sk#6, d_year#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#93] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14] -Subquery:2 Hosting operator id = 22 Hosting Expression = ss_sold_date_sk#28 IN dynamicpruning#29 +Subquery:2 Hosting operator id = 22 Hosting Expression = ss_sold_date_sk#24 IN dynamicpruning#25 BroadcastExchange (87) +- * Filter (86) +- * ColumnarToRow (85) @@ -473,25 +473,25 @@ BroadcastExchange (87) (84) Scan parquet default.date_dim -Output [2]: [d_date_sk#30, d_year#31] +Output [2]: [d_date_sk#26, d_year#27] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] ReadSchema: struct (85) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#30, d_year#31] +Input [2]: [d_date_sk#26, d_year#27] (86) Filter [codegen id : 1] -Input [2]: [d_date_sk#30, d_year#31] -Condition : ((isnotnull(d_year#31) AND (d_year#31 = 2002)) AND isnotnull(d_date_sk#30)) +Input [2]: [d_date_sk#26, d_year#27] +Condition : ((isnotnull(d_year#27) AND (d_year#27 = 2002)) AND isnotnull(d_date_sk#26)) (87) BroadcastExchange -Input [2]: [d_date_sk#30, d_year#31] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#94] +Input [2]: [d_date_sk#26, d_year#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=15] -Subquery:3 Hosting operator id = 40 Hosting Expression = ws_sold_date_sk#53 IN dynamicpruning#5 +Subquery:3 Hosting operator id = 40 Hosting Expression = ws_sold_date_sk#46 IN dynamicpruning#5 -Subquery:4 Hosting operator id = 60 Hosting Expression = ws_sold_date_sk#75 IN dynamicpruning#29 +Subquery:4 Hosting operator id = 60 Hosting Expression = ws_sold_date_sk#65 IN dynamicpruning#25 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/explain.txt index 69d3f4ac97247..2884c8e7ba231 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/explain.txt @@ -103,7 +103,7 @@ Condition : isnotnull(ss_customer_sk#9) (7) BroadcastExchange Input [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 3] Left keys [1]: [c_customer_sk#1] @@ -115,297 +115,297 @@ Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_f Input [12]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12] (10) ReusedExchange [Reuses operator id: 75] -Output [2]: [d_date_sk#15, d_year#16] +Output [2]: [d_date_sk#14, d_year#15] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_sold_date_sk#12] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (12) Project [codegen id : 3] -Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#16] -Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12, d_date_sk#15, d_year#16] +Output [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#15] +Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12, d_date_sk#14, d_year#15] (13) HashAggregate [codegen id : 3] -Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#16] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#15] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum#17] -Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] +Aggregate Attributes [1]: [sum#16] +Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#17] (14) Exchange -Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#17] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 16] -Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#18] -Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#16, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] +Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#17] +Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8] Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))#20] -Results [2]: [c_customer_id#2 AS customer_id#21, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))#20,18,2) AS year_total#22] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))#18] +Results [2]: [c_customer_id#2 AS customer_id#19, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))#18,18,2) AS year_total#20] (16) Filter [codegen id : 16] -Input [2]: [customer_id#21, year_total#22] -Condition : (isnotnull(year_total#22) AND (year_total#22 > 0.00)) +Input [2]: [customer_id#19, year_total#20] +Condition : (isnotnull(year_total#20) AND (year_total#20 > 0.00)) (17) Scan parquet default.customer -Output [8]: [c_customer_sk#23, c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30] +Output [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (18) ColumnarToRow [codegen id : 6] -Input [8]: [c_customer_sk#23, c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30] +Input [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28] (19) Filter [codegen id : 6] -Input [8]: [c_customer_sk#23, c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30] -Condition : (isnotnull(c_customer_sk#23) AND isnotnull(c_customer_id#24)) +Input [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28] +Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_customer_id#22)) (20) Scan parquet default.store_sales -Output [4]: [ss_customer_sk#31, ss_ext_discount_amt#32, ss_ext_list_price#33, ss_sold_date_sk#34] +Output [4]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_list_price#31, ss_sold_date_sk#32] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#34), dynamicpruningexpression(ss_sold_date_sk#34 IN dynamicpruning#35)] +PartitionFilters: [isnotnull(ss_sold_date_sk#32), dynamicpruningexpression(ss_sold_date_sk#32 IN dynamicpruning#33)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 4] -Input [4]: [ss_customer_sk#31, ss_ext_discount_amt#32, ss_ext_list_price#33, ss_sold_date_sk#34] +Input [4]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_list_price#31, ss_sold_date_sk#32] (22) Filter [codegen id : 4] -Input [4]: [ss_customer_sk#31, ss_ext_discount_amt#32, ss_ext_list_price#33, ss_sold_date_sk#34] -Condition : isnotnull(ss_customer_sk#31) +Input [4]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_list_price#31, ss_sold_date_sk#32] +Condition : isnotnull(ss_customer_sk#29) (23) BroadcastExchange -Input [4]: [ss_customer_sk#31, ss_ext_discount_amt#32, ss_ext_list_price#33, ss_sold_date_sk#34] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#36] +Input [4]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_list_price#31, ss_sold_date_sk#32] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [c_customer_sk#23] -Right keys [1]: [ss_customer_sk#31] +Left keys [1]: [c_customer_sk#21] +Right keys [1]: [ss_customer_sk#29] Join condition: None (25) Project [codegen id : 6] -Output [10]: [c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, ss_ext_discount_amt#32, ss_ext_list_price#33, ss_sold_date_sk#34] -Input [12]: [c_customer_sk#23, c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, ss_customer_sk#31, ss_ext_discount_amt#32, ss_ext_list_price#33, ss_sold_date_sk#34] +Output [10]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_list_price#31, ss_sold_date_sk#32] +Input [12]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_list_price#31, ss_sold_date_sk#32] (26) ReusedExchange [Reuses operator id: 79] -Output [2]: [d_date_sk#37, d_year#38] +Output [2]: [d_date_sk#34, d_year#35] (27) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_sold_date_sk#34] -Right keys [1]: [d_date_sk#37] +Left keys [1]: [ss_sold_date_sk#32] +Right keys [1]: [d_date_sk#34] Join condition: None (28) Project [codegen id : 6] -Output [10]: [c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, ss_ext_discount_amt#32, ss_ext_list_price#33, d_year#38] -Input [12]: [c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, ss_ext_discount_amt#32, ss_ext_list_price#33, ss_sold_date_sk#34, d_date_sk#37, d_year#38] +Output [10]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_list_price#31, d_year#35] +Input [12]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_list_price#31, ss_sold_date_sk#32, d_date_sk#34, d_year#35] (29) HashAggregate [codegen id : 6] -Input [10]: [c_customer_id#24, c_first_name#25, c_last_name#26, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, ss_ext_discount_amt#32, ss_ext_list_price#33, d_year#38] -Keys [8]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30] -Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum#39] -Results [9]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, sum#40] +Input [10]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_list_price#31, d_year#35] +Keys [8]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#31 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum#36] +Results [9]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, sum#37] (30) Exchange -Input [9]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, sum#40] -Arguments: hashpartitioning(c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, 5), ENSURE_REQUIREMENTS, [id=#41] +Input [9]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, sum#37] +Arguments: hashpartitioning(c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, 5), ENSURE_REQUIREMENTS, [plan_id=4] (31) HashAggregate [codegen id : 7] -Input [9]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30, sum#40] -Keys [8]: [c_customer_id#24, c_first_name#25, c_last_name#26, d_year#38, c_preferred_cust_flag#27, c_birth_country#28, c_login#29, c_email_address#30] -Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2))))#20] -Results [5]: [c_customer_id#24 AS customer_id#42, c_first_name#25 AS customer_first_name#43, c_last_name#26 AS customer_last_name#44, c_email_address#30 AS customer_email_address#45, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#33 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#32 as decimal(8,2)))), DecimalType(8,2))))#20,18,2) AS year_total#46] +Input [9]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, sum#37] +Keys [8]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#31 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#31 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(8,2)))), DecimalType(8,2))))#18] +Results [5]: [c_customer_id#22 AS customer_id#38, c_first_name#23 AS customer_first_name#39, c_last_name#24 AS customer_last_name#40, c_email_address#28 AS customer_email_address#41, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#31 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(8,2)))), DecimalType(8,2))))#18,18,2) AS year_total#42] (32) BroadcastExchange -Input [5]: [customer_id#42, customer_first_name#43, customer_last_name#44, customer_email_address#45, year_total#46] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#47] +Input [5]: [customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41, year_total#42] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=5] (33) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [customer_id#21] -Right keys [1]: [customer_id#42] +Left keys [1]: [customer_id#19] +Right keys [1]: [customer_id#38] Join condition: None (34) Scan parquet default.customer -Output [8]: [c_customer_sk#48, c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55] +Output [8]: [c_customer_sk#43, c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (35) ColumnarToRow [codegen id : 10] -Input [8]: [c_customer_sk#48, c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55] +Input [8]: [c_customer_sk#43, c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50] (36) Filter [codegen id : 10] -Input [8]: [c_customer_sk#48, c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55] -Condition : (isnotnull(c_customer_sk#48) AND isnotnull(c_customer_id#49)) +Input [8]: [c_customer_sk#43, c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50] +Condition : (isnotnull(c_customer_sk#43) AND isnotnull(c_customer_id#44)) (37) Scan parquet default.web_sales -Output [4]: [ws_bill_customer_sk#56, ws_ext_discount_amt#57, ws_ext_list_price#58, ws_sold_date_sk#59] +Output [4]: [ws_bill_customer_sk#51, ws_ext_discount_amt#52, ws_ext_list_price#53, ws_sold_date_sk#54] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#59), dynamicpruningexpression(ws_sold_date_sk#59 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(ws_sold_date_sk#54), dynamicpruningexpression(ws_sold_date_sk#54 IN dynamicpruning#13)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (38) ColumnarToRow [codegen id : 8] -Input [4]: [ws_bill_customer_sk#56, ws_ext_discount_amt#57, ws_ext_list_price#58, ws_sold_date_sk#59] +Input [4]: [ws_bill_customer_sk#51, ws_ext_discount_amt#52, ws_ext_list_price#53, ws_sold_date_sk#54] (39) Filter [codegen id : 8] -Input [4]: [ws_bill_customer_sk#56, ws_ext_discount_amt#57, ws_ext_list_price#58, ws_sold_date_sk#59] -Condition : isnotnull(ws_bill_customer_sk#56) +Input [4]: [ws_bill_customer_sk#51, ws_ext_discount_amt#52, ws_ext_list_price#53, ws_sold_date_sk#54] +Condition : isnotnull(ws_bill_customer_sk#51) (40) BroadcastExchange -Input [4]: [ws_bill_customer_sk#56, ws_ext_discount_amt#57, ws_ext_list_price#58, ws_sold_date_sk#59] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#60] +Input [4]: [ws_bill_customer_sk#51, ws_ext_discount_amt#52, ws_ext_list_price#53, ws_sold_date_sk#54] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] (41) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [c_customer_sk#48] -Right keys [1]: [ws_bill_customer_sk#56] +Left keys [1]: [c_customer_sk#43] +Right keys [1]: [ws_bill_customer_sk#51] Join condition: None (42) Project [codegen id : 10] -Output [10]: [c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55, ws_ext_discount_amt#57, ws_ext_list_price#58, ws_sold_date_sk#59] -Input [12]: [c_customer_sk#48, c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55, ws_bill_customer_sk#56, ws_ext_discount_amt#57, ws_ext_list_price#58, ws_sold_date_sk#59] +Output [10]: [c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50, ws_ext_discount_amt#52, ws_ext_list_price#53, ws_sold_date_sk#54] +Input [12]: [c_customer_sk#43, c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50, ws_bill_customer_sk#51, ws_ext_discount_amt#52, ws_ext_list_price#53, ws_sold_date_sk#54] (43) ReusedExchange [Reuses operator id: 75] -Output [2]: [d_date_sk#61, d_year#62] +Output [2]: [d_date_sk#55, d_year#56] (44) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ws_sold_date_sk#59] -Right keys [1]: [d_date_sk#61] +Left keys [1]: [ws_sold_date_sk#54] +Right keys [1]: [d_date_sk#55] Join condition: None (45) Project [codegen id : 10] -Output [10]: [c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55, ws_ext_discount_amt#57, ws_ext_list_price#58, d_year#62] -Input [12]: [c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55, ws_ext_discount_amt#57, ws_ext_list_price#58, ws_sold_date_sk#59, d_date_sk#61, d_year#62] +Output [10]: [c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50, ws_ext_discount_amt#52, ws_ext_list_price#53, d_year#56] +Input [12]: [c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50, ws_ext_discount_amt#52, ws_ext_list_price#53, ws_sold_date_sk#54, d_date_sk#55, d_year#56] (46) HashAggregate [codegen id : 10] -Input [10]: [c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55, ws_ext_discount_amt#57, ws_ext_list_price#58, d_year#62] -Keys [8]: [c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55, d_year#62] -Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#57 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum#63] -Results [9]: [c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55, d_year#62, sum#64] +Input [10]: [c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50, ws_ext_discount_amt#52, ws_ext_list_price#53, d_year#56] +Keys [8]: [c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50, d_year#56] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#53 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#52 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum#57] +Results [9]: [c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50, d_year#56, sum#58] (47) Exchange -Input [9]: [c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55, d_year#62, sum#64] -Arguments: hashpartitioning(c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55, d_year#62, 5), ENSURE_REQUIREMENTS, [id=#65] +Input [9]: [c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50, d_year#56, sum#58] +Arguments: hashpartitioning(c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50, d_year#56, 5), ENSURE_REQUIREMENTS, [plan_id=7] (48) HashAggregate [codegen id : 11] -Input [9]: [c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55, d_year#62, sum#64] -Keys [8]: [c_customer_id#49, c_first_name#50, c_last_name#51, c_preferred_cust_flag#52, c_birth_country#53, c_login#54, c_email_address#55, d_year#62] -Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#57 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#57 as decimal(8,2)))), DecimalType(8,2))))#66] -Results [2]: [c_customer_id#49 AS customer_id#67, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#57 as decimal(8,2)))), DecimalType(8,2))))#66,18,2) AS year_total#68] +Input [9]: [c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50, d_year#56, sum#58] +Keys [8]: [c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50, d_year#56] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#53 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#52 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#53 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#52 as decimal(8,2)))), DecimalType(8,2))))#59] +Results [2]: [c_customer_id#44 AS customer_id#60, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#53 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#52 as decimal(8,2)))), DecimalType(8,2))))#59,18,2) AS year_total#61] (49) Filter [codegen id : 11] -Input [2]: [customer_id#67, year_total#68] -Condition : (isnotnull(year_total#68) AND (year_total#68 > 0.00)) +Input [2]: [customer_id#60, year_total#61] +Condition : (isnotnull(year_total#61) AND (year_total#61 > 0.00)) (50) BroadcastExchange -Input [2]: [customer_id#67, year_total#68] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#69] +Input [2]: [customer_id#60, year_total#61] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=8] (51) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [customer_id#21] -Right keys [1]: [customer_id#67] +Left keys [1]: [customer_id#19] +Right keys [1]: [customer_id#60] Join condition: None (52) Project [codegen id : 16] -Output [8]: [customer_id#21, year_total#22, customer_id#42, customer_first_name#43, customer_last_name#44, customer_email_address#45, year_total#46, year_total#68] -Input [9]: [customer_id#21, year_total#22, customer_id#42, customer_first_name#43, customer_last_name#44, customer_email_address#45, year_total#46, customer_id#67, year_total#68] +Output [8]: [customer_id#19, year_total#20, customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41, year_total#42, year_total#61] +Input [9]: [customer_id#19, year_total#20, customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41, year_total#42, customer_id#60, year_total#61] (53) Scan parquet default.customer -Output [8]: [c_customer_sk#70, c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77] +Output [8]: [c_customer_sk#62, c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (54) ColumnarToRow [codegen id : 14] -Input [8]: [c_customer_sk#70, c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77] +Input [8]: [c_customer_sk#62, c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69] (55) Filter [codegen id : 14] -Input [8]: [c_customer_sk#70, c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77] -Condition : (isnotnull(c_customer_sk#70) AND isnotnull(c_customer_id#71)) +Input [8]: [c_customer_sk#62, c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69] +Condition : (isnotnull(c_customer_sk#62) AND isnotnull(c_customer_id#63)) (56) Scan parquet default.web_sales -Output [4]: [ws_bill_customer_sk#78, ws_ext_discount_amt#79, ws_ext_list_price#80, ws_sold_date_sk#81] +Output [4]: [ws_bill_customer_sk#70, ws_ext_discount_amt#71, ws_ext_list_price#72, ws_sold_date_sk#73] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#81), dynamicpruningexpression(ws_sold_date_sk#81 IN dynamicpruning#35)] +PartitionFilters: [isnotnull(ws_sold_date_sk#73), dynamicpruningexpression(ws_sold_date_sk#73 IN dynamicpruning#33)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (57) ColumnarToRow [codegen id : 12] -Input [4]: [ws_bill_customer_sk#78, ws_ext_discount_amt#79, ws_ext_list_price#80, ws_sold_date_sk#81] +Input [4]: [ws_bill_customer_sk#70, ws_ext_discount_amt#71, ws_ext_list_price#72, ws_sold_date_sk#73] (58) Filter [codegen id : 12] -Input [4]: [ws_bill_customer_sk#78, ws_ext_discount_amt#79, ws_ext_list_price#80, ws_sold_date_sk#81] -Condition : isnotnull(ws_bill_customer_sk#78) +Input [4]: [ws_bill_customer_sk#70, ws_ext_discount_amt#71, ws_ext_list_price#72, ws_sold_date_sk#73] +Condition : isnotnull(ws_bill_customer_sk#70) (59) BroadcastExchange -Input [4]: [ws_bill_customer_sk#78, ws_ext_discount_amt#79, ws_ext_list_price#80, ws_sold_date_sk#81] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#82] +Input [4]: [ws_bill_customer_sk#70, ws_ext_discount_amt#71, ws_ext_list_price#72, ws_sold_date_sk#73] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] (60) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [c_customer_sk#70] -Right keys [1]: [ws_bill_customer_sk#78] +Left keys [1]: [c_customer_sk#62] +Right keys [1]: [ws_bill_customer_sk#70] Join condition: None (61) Project [codegen id : 14] -Output [10]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, ws_ext_discount_amt#79, ws_ext_list_price#80, ws_sold_date_sk#81] -Input [12]: [c_customer_sk#70, c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, ws_bill_customer_sk#78, ws_ext_discount_amt#79, ws_ext_list_price#80, ws_sold_date_sk#81] +Output [10]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, ws_ext_discount_amt#71, ws_ext_list_price#72, ws_sold_date_sk#73] +Input [12]: [c_customer_sk#62, c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, ws_bill_customer_sk#70, ws_ext_discount_amt#71, ws_ext_list_price#72, ws_sold_date_sk#73] (62) ReusedExchange [Reuses operator id: 79] -Output [2]: [d_date_sk#83, d_year#84] +Output [2]: [d_date_sk#74, d_year#75] (63) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ws_sold_date_sk#81] -Right keys [1]: [d_date_sk#83] +Left keys [1]: [ws_sold_date_sk#73] +Right keys [1]: [d_date_sk#74] Join condition: None (64) Project [codegen id : 14] -Output [10]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, ws_ext_discount_amt#79, ws_ext_list_price#80, d_year#84] -Input [12]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, ws_ext_discount_amt#79, ws_ext_list_price#80, ws_sold_date_sk#81, d_date_sk#83, d_year#84] +Output [10]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, ws_ext_discount_amt#71, ws_ext_list_price#72, d_year#75] +Input [12]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, ws_ext_discount_amt#71, ws_ext_list_price#72, ws_sold_date_sk#73, d_date_sk#74, d_year#75] (65) HashAggregate [codegen id : 14] -Input [10]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, ws_ext_discount_amt#79, ws_ext_list_price#80, d_year#84] -Keys [8]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#84] -Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#80 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#79 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum#85] -Results [9]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#84, sum#86] +Input [10]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, ws_ext_discount_amt#71, ws_ext_list_price#72, d_year#75] +Keys [8]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#75] +Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum#76] +Results [9]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#75, sum#77] (66) Exchange -Input [9]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#84, sum#86] -Arguments: hashpartitioning(c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#84, 5), ENSURE_REQUIREMENTS, [id=#87] +Input [9]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#75, sum#77] +Arguments: hashpartitioning(c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#75, 5), ENSURE_REQUIREMENTS, [plan_id=10] (67) HashAggregate [codegen id : 15] -Input [9]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#84, sum#86] -Keys [8]: [c_customer_id#71, c_first_name#72, c_last_name#73, c_preferred_cust_flag#74, c_birth_country#75, c_login#76, c_email_address#77, d_year#84] -Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#80 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#79 as decimal(8,2)))), DecimalType(8,2))))] -Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#80 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#79 as decimal(8,2)))), DecimalType(8,2))))#66] -Results [2]: [c_customer_id#71 AS customer_id#88, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#80 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#79 as decimal(8,2)))), DecimalType(8,2))))#66,18,2) AS year_total#89] +Input [9]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#75, sum#77] +Keys [8]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#75] +Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2))))] +Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2))))#59] +Results [2]: [c_customer_id#63 AS customer_id#78, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2))))#59,18,2) AS year_total#79] (68) BroadcastExchange -Input [2]: [customer_id#88, year_total#89] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#90] +Input [2]: [customer_id#78, year_total#79] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=11] (69) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [customer_id#21] -Right keys [1]: [customer_id#88] -Join condition: (CASE WHEN (year_total#68 > 0.00) THEN CheckOverflow((promote_precision(year_total#89) / promote_precision(year_total#68)), DecimalType(38,20)) ELSE 0E-20 END > CASE WHEN (year_total#22 > 0.00) THEN CheckOverflow((promote_precision(year_total#46) / promote_precision(year_total#22)), DecimalType(38,20)) ELSE 0E-20 END) +Left keys [1]: [customer_id#19] +Right keys [1]: [customer_id#78] +Join condition: (CASE WHEN (year_total#61 > 0.00) THEN CheckOverflow((promote_precision(year_total#79) / promote_precision(year_total#61)), DecimalType(38,20)) ELSE 0E-20 END > CASE WHEN (year_total#20 > 0.00) THEN CheckOverflow((promote_precision(year_total#42) / promote_precision(year_total#20)), DecimalType(38,20)) ELSE 0E-20 END) (70) Project [codegen id : 16] -Output [4]: [customer_id#42, customer_first_name#43, customer_last_name#44, customer_email_address#45] -Input [10]: [customer_id#21, year_total#22, customer_id#42, customer_first_name#43, customer_last_name#44, customer_email_address#45, year_total#46, year_total#68, customer_id#88, year_total#89] +Output [4]: [customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41] +Input [10]: [customer_id#19, year_total#20, customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41, year_total#42, year_total#61, customer_id#78, year_total#79] (71) TakeOrderedAndProject -Input [4]: [customer_id#42, customer_first_name#43, customer_last_name#44, customer_email_address#45] -Arguments: 100, [customer_id#42 ASC NULLS FIRST, customer_first_name#43 ASC NULLS FIRST, customer_last_name#44 ASC NULLS FIRST, customer_email_address#45 ASC NULLS FIRST], [customer_id#42, customer_first_name#43, customer_last_name#44, customer_email_address#45] +Input [4]: [customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41] +Arguments: 100, [customer_id#38 ASC NULLS FIRST, customer_first_name#39 ASC NULLS FIRST, customer_last_name#40 ASC NULLS FIRST, customer_email_address#41 ASC NULLS FIRST], [customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41] ===== Subqueries ===== @@ -417,24 +417,24 @@ BroadcastExchange (75) (72) Scan parquet default.date_dim -Output [2]: [d_date_sk#15, d_year#16] +Output [2]: [d_date_sk#14, d_year#15] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (73) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#16] +Input [2]: [d_date_sk#14, d_year#15] (74) Filter [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#16] -Condition : ((isnotnull(d_year#16) AND (d_year#16 = 2001)) AND isnotnull(d_date_sk#15)) +Input [2]: [d_date_sk#14, d_year#15] +Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2001)) AND isnotnull(d_date_sk#14)) (75) BroadcastExchange -Input [2]: [d_date_sk#15, d_year#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#91] +Input [2]: [d_date_sk#14, d_year#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] -Subquery:2 Hosting operator id = 20 Hosting Expression = ss_sold_date_sk#34 IN dynamicpruning#35 +Subquery:2 Hosting operator id = 20 Hosting Expression = ss_sold_date_sk#32 IN dynamicpruning#33 BroadcastExchange (79) +- * Filter (78) +- * ColumnarToRow (77) @@ -442,25 +442,25 @@ BroadcastExchange (79) (76) Scan parquet default.date_dim -Output [2]: [d_date_sk#37, d_year#38] +Output [2]: [d_date_sk#34, d_year#35] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] ReadSchema: struct (77) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#37, d_year#38] +Input [2]: [d_date_sk#34, d_year#35] (78) Filter [codegen id : 1] -Input [2]: [d_date_sk#37, d_year#38] -Condition : ((isnotnull(d_year#38) AND (d_year#38 = 2002)) AND isnotnull(d_date_sk#37)) +Input [2]: [d_date_sk#34, d_year#35] +Condition : ((isnotnull(d_year#35) AND (d_year#35 = 2002)) AND isnotnull(d_date_sk#34)) (79) BroadcastExchange -Input [2]: [d_date_sk#37, d_year#38] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#92] +Input [2]: [d_date_sk#34, d_year#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=13] -Subquery:3 Hosting operator id = 37 Hosting Expression = ws_sold_date_sk#59 IN dynamicpruning#13 +Subquery:3 Hosting operator id = 37 Hosting Expression = ws_sold_date_sk#54 IN dynamicpruning#13 -Subquery:4 Hosting operator id = 56 Hosting Expression = ws_sold_date_sk#81 IN dynamicpruning#35 +Subquery:4 Hosting operator id = 56 Hosting Expression = ws_sold_date_sk#73 IN dynamicpruning#33 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/explain.txt index 40793508f4786..3d13a020acb66 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/explain.txt @@ -41,92 +41,92 @@ Condition : isnotnull(ws_item_sk#1) (4) Exchange Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] -Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#5] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3] Arguments: [ws_item_sk#1 ASC NULLS FIRST], false, 0 (6) Scan parquet default.item -Output [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (8) Filter [codegen id : 3] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Condition : (i_category#11 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#6)) +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Condition : (i_category#10 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#5)) (9) Exchange -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Arguments: hashpartitioning(i_item_sk#6, 5), ENSURE_REQUIREMENTS, [id=#12] +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Arguments: hashpartitioning(i_item_sk#5, 5), ENSURE_REQUIREMENTS, [plan_id=2] (10) Sort [codegen id : 4] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Arguments: [i_item_sk#5 ASC NULLS FIRST], false, 0 (11) SortMergeJoin [codegen id : 6] Left keys [1]: [ws_item_sk#1] -Right keys [1]: [i_item_sk#6] +Right keys [1]: [i_item_sk#5] Join condition: None (12) Project [codegen id : 6] -Output [7]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Input [9]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Output [7]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Input [9]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (13) ReusedExchange [Reuses operator id: 28] -Output [1]: [d_date_sk#13] +Output [1]: [d_date_sk#11] (14) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ws_sold_date_sk#3] -Right keys [1]: [d_date_sk#13] +Right keys [1]: [d_date_sk#11] Join condition: None (15) Project [codegen id : 6] -Output [6]: [ws_ext_sales_price#2, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Input [8]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11, d_date_sk#13] +Output [6]: [ws_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Input [8]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#11] (16) HashAggregate [codegen id : 6] -Input [6]: [ws_ext_sales_price#2, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Keys [5]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9] +Input [6]: [ws_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#14] -Results [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] (17) Exchange -Input [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] -Arguments: hashpartitioning(i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) HashAggregate [codegen id : 7] -Input [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] -Keys [5]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#2))#17] -Results [8]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#17,17,2) AS _w1#20] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#2))#14] +Results [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w1#17] (19) Exchange -Input [8]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20] -Arguments: hashpartitioning(i_class#10, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] +Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=4] (20) Sort [codegen id : 8] -Input [8]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20] -Arguments: [i_class#10 ASC NULLS FIRST], false, 0 +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] +Arguments: [i_class#9 ASC NULLS FIRST], false, 0 (21) Window -Input [8]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20] -Arguments: [sum(_w1#20) windowspecdefinition(i_class#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#10] +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] +Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9] (22) Project [codegen id : 9] -Output [7]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17)) AS revenueratio#23] -Input [9]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, _we0#22] +Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19] +Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, _we0#18] (23) TakeOrderedAndProject -Input [7]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, revenueratio#23] -Arguments: 100, [i_category#11 ASC NULLS FIRST, i_class#10 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#8 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, revenueratio#23] +Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] +Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] ===== Subqueries ===== @@ -139,25 +139,25 @@ BroadcastExchange (28) (24) Scan parquet default.date_dim -Output [2]: [d_date_sk#13, d_date#24] +Output [2]: [d_date_sk#11, d_date#20] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#13, d_date#24] +Input [2]: [d_date_sk#11, d_date#20] (26) Filter [codegen id : 1] -Input [2]: [d_date_sk#13, d_date#24] -Condition : (((isnotnull(d_date#24) AND (d_date#24 >= 1999-02-22)) AND (d_date#24 <= 1999-03-24)) AND isnotnull(d_date_sk#13)) +Input [2]: [d_date_sk#11, d_date#20] +Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11)) (27) Project [codegen id : 1] -Output [1]: [d_date_sk#13] -Input [2]: [d_date_sk#13, d_date#24] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#20] (28) BroadcastExchange -Input [1]: [d_date_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/explain.txt index 02f8baa5a0b81..8c652f4782c4b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/explain.txt @@ -52,7 +52,7 @@ Condition : (i_category#10 IN (Sports (7) BroadcastExchange Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ws_item_sk#1] @@ -64,54 +64,54 @@ Output [7]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#6, i_item_desc#7 Input [9]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (10) ReusedExchange [Reuses operator id: 25] -Output [1]: [d_date_sk#12] +Output [1]: [d_date_sk#11] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ws_sold_date_sk#3] -Right keys [1]: [d_date_sk#12] +Right keys [1]: [d_date_sk#11] Join condition: None (12) Project [codegen id : 3] Output [6]: [ws_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] -Input [8]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#12] +Input [8]: [ws_ext_sales_price#2, ws_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#11] (13) HashAggregate [codegen id : 3] Input [6]: [ws_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [partial_sum(UnscaledValue(ws_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#13] -Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] (14) Exchange -Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] -Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 4] -Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#2))#16] -Results [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#16,17,2) AS itemrevenue#17, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#16,17,2) AS _w0#18, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#16,17,2) AS _w1#19] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#2))#14] +Results [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w1#17] (16) Exchange -Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19] -Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] +Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (17) Sort [codegen id : 5] -Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19] +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] Arguments: [i_class#9 ASC NULLS FIRST], false, 0 (18) Window -Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19] -Arguments: [sum(_w1#19) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#21], [i_class#9] +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] +Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9] (19) Project [codegen id : 6] -Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17)) AS revenueratio#22] -Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, _we0#21] +Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19] +Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, _we0#18] (20) TakeOrderedAndProject -Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22] -Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#22 ASC NULLS FIRST], [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22] +Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] +Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] ===== Subqueries ===== @@ -124,25 +124,25 @@ BroadcastExchange (25) (21) Scan parquet default.date_dim -Output [2]: [d_date_sk#12, d_date#23] +Output [2]: [d_date_sk#11, d_date#20] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] ReadSchema: struct (22) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#12, d_date#23] +Input [2]: [d_date_sk#11, d_date#20] (23) Filter [codegen id : 1] -Input [2]: [d_date_sk#12, d_date#23] -Condition : (((isnotnull(d_date#23) AND (d_date#23 >= 1999-02-22)) AND (d_date#23 <= 1999-03-24)) AND isnotnull(d_date_sk#12)) +Input [2]: [d_date_sk#11, d_date#20] +Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11)) (24) Project [codegen id : 1] -Output [1]: [d_date_sk#12] -Input [2]: [d_date_sk#12, d_date#23] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#20] (25) BroadcastExchange -Input [1]: [d_date_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt index 92b80b4085c67..809219a4ee851 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt @@ -117,437 +117,437 @@ Condition : isnotnull(ss_item_sk#1) (4) Exchange Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#6] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (6) Scan parquet default.item -Output [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10] +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] ReadSchema: struct (7) ColumnarToRow [codegen id : 19] -Input [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] (8) Filter [codegen id : 19] -Input [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10] -Condition : ((isnotnull(i_brand_id#8) AND isnotnull(i_class_id#9)) AND isnotnull(i_category_id#10)) +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : ((isnotnull(i_brand_id#7) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) (9) Scan parquet default.store_sales -Output [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Output [2]: [ss_item_sk#10, ss_sold_date_sk#11] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#12), dynamicpruningexpression(ss_sold_date_sk#12 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(ss_sold_date_sk#11), dynamicpruningexpression(ss_sold_date_sk#11 IN dynamicpruning#12)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (10) ColumnarToRow [codegen id : 11] -Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Input [2]: [ss_item_sk#10, ss_sold_date_sk#11] (11) Filter [codegen id : 11] -Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] -Condition : isnotnull(ss_item_sk#11) +Input [2]: [ss_item_sk#10, ss_sold_date_sk#11] +Condition : isnotnull(ss_item_sk#10) (12) ReusedExchange [Reuses operator id: 132] -Output [1]: [d_date_sk#14] +Output [1]: [d_date_sk#13] (13) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ss_sold_date_sk#12] -Right keys [1]: [d_date_sk#14] +Left keys [1]: [ss_sold_date_sk#11] +Right keys [1]: [d_date_sk#13] Join condition: None (14) Project [codegen id : 11] -Output [1]: [ss_item_sk#11] -Input [3]: [ss_item_sk#11, ss_sold_date_sk#12, d_date_sk#14] +Output [1]: [ss_item_sk#10] +Input [3]: [ss_item_sk#10, ss_sold_date_sk#11, d_date_sk#13] (15) Scan parquet default.item -Output [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] +Output [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] ReadSchema: struct (16) ColumnarToRow [codegen id : 4] -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] (17) Filter [codegen id : 4] -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] -Condition : (((isnotnull(i_item_sk#15) AND isnotnull(i_brand_id#16)) AND isnotnull(i_class_id#17)) AND isnotnull(i_category_id#18)) +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] +Condition : (((isnotnull(i_item_sk#14) AND isnotnull(i_brand_id#15)) AND isnotnull(i_class_id#16)) AND isnotnull(i_category_id#17)) (18) Exchange -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] -Arguments: hashpartitioning(coalesce(i_brand_id#16, 0), isnull(i_brand_id#16), coalesce(i_class_id#17, 0), isnull(i_class_id#17), coalesce(i_category_id#18, 0), isnull(i_category_id#18), 5), ENSURE_REQUIREMENTS, [id=#19] +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] +Arguments: hashpartitioning(coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17), 5), ENSURE_REQUIREMENTS, [plan_id=2] (19) Sort [codegen id : 5] -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] -Arguments: [coalesce(i_brand_id#16, 0) ASC NULLS FIRST, isnull(i_brand_id#16) ASC NULLS FIRST, coalesce(i_class_id#17, 0) ASC NULLS FIRST, isnull(i_class_id#17) ASC NULLS FIRST, coalesce(i_category_id#18, 0) ASC NULLS FIRST, isnull(i_category_id#18) ASC NULLS FIRST], false, 0 +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] +Arguments: [coalesce(i_brand_id#15, 0) ASC NULLS FIRST, isnull(i_brand_id#15) ASC NULLS FIRST, coalesce(i_class_id#16, 0) ASC NULLS FIRST, isnull(i_class_id#16) ASC NULLS FIRST, coalesce(i_category_id#17, 0) ASC NULLS FIRST, isnull(i_category_id#17) ASC NULLS FIRST], false, 0 (20) Scan parquet default.catalog_sales -Output [2]: [cs_item_sk#20, cs_sold_date_sk#21] +Output [2]: [cs_item_sk#18, cs_sold_date_sk#19] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#21), dynamicpruningexpression(cs_sold_date_sk#21 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(cs_sold_date_sk#19), dynamicpruningexpression(cs_sold_date_sk#19 IN dynamicpruning#12)] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 8] -Input [2]: [cs_item_sk#20, cs_sold_date_sk#21] +Input [2]: [cs_item_sk#18, cs_sold_date_sk#19] (22) Filter [codegen id : 8] -Input [2]: [cs_item_sk#20, cs_sold_date_sk#21] -Condition : isnotnull(cs_item_sk#20) +Input [2]: [cs_item_sk#18, cs_sold_date_sk#19] +Condition : isnotnull(cs_item_sk#18) (23) ReusedExchange [Reuses operator id: 132] -Output [1]: [d_date_sk#22] +Output [1]: [d_date_sk#20] (24) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_sold_date_sk#21] -Right keys [1]: [d_date_sk#22] +Left keys [1]: [cs_sold_date_sk#19] +Right keys [1]: [d_date_sk#20] Join condition: None (25) Project [codegen id : 8] -Output [1]: [cs_item_sk#20] -Input [3]: [cs_item_sk#20, cs_sold_date_sk#21, d_date_sk#22] +Output [1]: [cs_item_sk#18] +Input [3]: [cs_item_sk#18, cs_sold_date_sk#19, d_date_sk#20] (26) Scan parquet default.item -Output [4]: [i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] +Output [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 7] -Input [4]: [i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] +Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] (28) Filter [codegen id : 7] -Input [4]: [i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] -Condition : isnotnull(i_item_sk#23) +Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] +Condition : isnotnull(i_item_sk#21) (29) BroadcastExchange -Input [4]: [i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] +Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (30) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_item_sk#20] -Right keys [1]: [i_item_sk#23] +Left keys [1]: [cs_item_sk#18] +Right keys [1]: [i_item_sk#21] Join condition: None (31) Project [codegen id : 8] -Output [3]: [i_brand_id#24, i_class_id#25, i_category_id#26] -Input [5]: [cs_item_sk#20, i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] +Output [3]: [i_brand_id#22, i_class_id#23, i_category_id#24] +Input [5]: [cs_item_sk#18, i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] (32) Exchange -Input [3]: [i_brand_id#24, i_class_id#25, i_category_id#26] -Arguments: hashpartitioning(coalesce(i_brand_id#24, 0), isnull(i_brand_id#24), coalesce(i_class_id#25, 0), isnull(i_class_id#25), coalesce(i_category_id#26, 0), isnull(i_category_id#26), 5), ENSURE_REQUIREMENTS, [id=#28] +Input [3]: [i_brand_id#22, i_class_id#23, i_category_id#24] +Arguments: hashpartitioning(coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24), 5), ENSURE_REQUIREMENTS, [plan_id=4] (33) Sort [codegen id : 9] -Input [3]: [i_brand_id#24, i_class_id#25, i_category_id#26] -Arguments: [coalesce(i_brand_id#24, 0) ASC NULLS FIRST, isnull(i_brand_id#24) ASC NULLS FIRST, coalesce(i_class_id#25, 0) ASC NULLS FIRST, isnull(i_class_id#25) ASC NULLS FIRST, coalesce(i_category_id#26, 0) ASC NULLS FIRST, isnull(i_category_id#26) ASC NULLS FIRST], false, 0 +Input [3]: [i_brand_id#22, i_class_id#23, i_category_id#24] +Arguments: [coalesce(i_brand_id#22, 0) ASC NULLS FIRST, isnull(i_brand_id#22) ASC NULLS FIRST, coalesce(i_class_id#23, 0) ASC NULLS FIRST, isnull(i_class_id#23) ASC NULLS FIRST, coalesce(i_category_id#24, 0) ASC NULLS FIRST, isnull(i_category_id#24) ASC NULLS FIRST], false, 0 (34) SortMergeJoin [codegen id : 10] -Left keys [6]: [coalesce(i_brand_id#16, 0), isnull(i_brand_id#16), coalesce(i_class_id#17, 0), isnull(i_class_id#17), coalesce(i_category_id#18, 0), isnull(i_category_id#18)] -Right keys [6]: [coalesce(i_brand_id#24, 0), isnull(i_brand_id#24), coalesce(i_class_id#25, 0), isnull(i_class_id#25), coalesce(i_category_id#26, 0), isnull(i_category_id#26)] +Left keys [6]: [coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17)] +Right keys [6]: [coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24)] Join condition: None (35) BroadcastExchange -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#29] +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (36) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ss_item_sk#11] -Right keys [1]: [i_item_sk#15] +Left keys [1]: [ss_item_sk#10] +Right keys [1]: [i_item_sk#14] Join condition: None (37) Project [codegen id : 11] -Output [3]: [i_brand_id#16 AS brand_id#30, i_class_id#17 AS class_id#31, i_category_id#18 AS category_id#32] -Input [5]: [ss_item_sk#11, i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] +Output [3]: [i_brand_id#15 AS brand_id#25, i_class_id#16 AS class_id#26, i_category_id#17 AS category_id#27] +Input [5]: [ss_item_sk#10, i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] (38) HashAggregate [codegen id : 11] -Input [3]: [brand_id#30, class_id#31, category_id#32] -Keys [3]: [brand_id#30, class_id#31, category_id#32] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Keys [3]: [brand_id#25, class_id#26, category_id#27] Functions: [] Aggregate Attributes: [] -Results [3]: [brand_id#30, class_id#31, category_id#32] +Results [3]: [brand_id#25, class_id#26, category_id#27] (39) Exchange -Input [3]: [brand_id#30, class_id#31, category_id#32] -Arguments: hashpartitioning(brand_id#30, class_id#31, category_id#32, 5), ENSURE_REQUIREMENTS, [id=#33] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: hashpartitioning(brand_id#25, class_id#26, category_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=6] (40) HashAggregate [codegen id : 12] -Input [3]: [brand_id#30, class_id#31, category_id#32] -Keys [3]: [brand_id#30, class_id#31, category_id#32] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Keys [3]: [brand_id#25, class_id#26, category_id#27] Functions: [] Aggregate Attributes: [] -Results [3]: [brand_id#30, class_id#31, category_id#32] +Results [3]: [brand_id#25, class_id#26, category_id#27] (41) Exchange -Input [3]: [brand_id#30, class_id#31, category_id#32] -Arguments: hashpartitioning(coalesce(brand_id#30, 0), isnull(brand_id#30), coalesce(class_id#31, 0), isnull(class_id#31), coalesce(category_id#32, 0), isnull(category_id#32), 5), ENSURE_REQUIREMENTS, [id=#34] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: hashpartitioning(coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27), 5), ENSURE_REQUIREMENTS, [plan_id=7] (42) Sort [codegen id : 13] -Input [3]: [brand_id#30, class_id#31, category_id#32] -Arguments: [coalesce(brand_id#30, 0) ASC NULLS FIRST, isnull(brand_id#30) ASC NULLS FIRST, coalesce(class_id#31, 0) ASC NULLS FIRST, isnull(class_id#31) ASC NULLS FIRST, coalesce(category_id#32, 0) ASC NULLS FIRST, isnull(category_id#32) ASC NULLS FIRST], false, 0 +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: [coalesce(brand_id#25, 0) ASC NULLS FIRST, isnull(brand_id#25) ASC NULLS FIRST, coalesce(class_id#26, 0) ASC NULLS FIRST, isnull(class_id#26) ASC NULLS FIRST, coalesce(category_id#27, 0) ASC NULLS FIRST, isnull(category_id#27) ASC NULLS FIRST], false, 0 (43) Scan parquet default.web_sales -Output [2]: [ws_item_sk#35, ws_sold_date_sk#36] +Output [2]: [ws_item_sk#28, ws_sold_date_sk#29] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#36), dynamicpruningexpression(ws_sold_date_sk#36 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(ws_sold_date_sk#29), dynamicpruningexpression(ws_sold_date_sk#29 IN dynamicpruning#12)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 16] -Input [2]: [ws_item_sk#35, ws_sold_date_sk#36] +Input [2]: [ws_item_sk#28, ws_sold_date_sk#29] (45) Filter [codegen id : 16] -Input [2]: [ws_item_sk#35, ws_sold_date_sk#36] -Condition : isnotnull(ws_item_sk#35) +Input [2]: [ws_item_sk#28, ws_sold_date_sk#29] +Condition : isnotnull(ws_item_sk#28) (46) ReusedExchange [Reuses operator id: 132] -Output [1]: [d_date_sk#37] +Output [1]: [d_date_sk#30] (47) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [ws_sold_date_sk#36] -Right keys [1]: [d_date_sk#37] +Left keys [1]: [ws_sold_date_sk#29] +Right keys [1]: [d_date_sk#30] Join condition: None (48) Project [codegen id : 16] -Output [1]: [ws_item_sk#35] -Input [3]: [ws_item_sk#35, ws_sold_date_sk#36, d_date_sk#37] +Output [1]: [ws_item_sk#28] +Input [3]: [ws_item_sk#28, ws_sold_date_sk#29, d_date_sk#30] (49) ReusedExchange [Reuses operator id: 29] -Output [4]: [i_item_sk#38, i_brand_id#39, i_class_id#40, i_category_id#41] +Output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] (50) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [ws_item_sk#35] -Right keys [1]: [i_item_sk#38] +Left keys [1]: [ws_item_sk#28] +Right keys [1]: [i_item_sk#31] Join condition: None (51) Project [codegen id : 16] -Output [3]: [i_brand_id#39, i_class_id#40, i_category_id#41] -Input [5]: [ws_item_sk#35, i_item_sk#38, i_brand_id#39, i_class_id#40, i_category_id#41] +Output [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Input [5]: [ws_item_sk#28, i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] (52) Exchange -Input [3]: [i_brand_id#39, i_class_id#40, i_category_id#41] -Arguments: hashpartitioning(coalesce(i_brand_id#39, 0), isnull(i_brand_id#39), coalesce(i_class_id#40, 0), isnull(i_class_id#40), coalesce(i_category_id#41, 0), isnull(i_category_id#41), 5), ENSURE_REQUIREMENTS, [id=#42] +Input [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: hashpartitioning(coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34), 5), ENSURE_REQUIREMENTS, [plan_id=8] (53) Sort [codegen id : 17] -Input [3]: [i_brand_id#39, i_class_id#40, i_category_id#41] -Arguments: [coalesce(i_brand_id#39, 0) ASC NULLS FIRST, isnull(i_brand_id#39) ASC NULLS FIRST, coalesce(i_class_id#40, 0) ASC NULLS FIRST, isnull(i_class_id#40) ASC NULLS FIRST, coalesce(i_category_id#41, 0) ASC NULLS FIRST, isnull(i_category_id#41) ASC NULLS FIRST], false, 0 +Input [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [coalesce(i_brand_id#32, 0) ASC NULLS FIRST, isnull(i_brand_id#32) ASC NULLS FIRST, coalesce(i_class_id#33, 0) ASC NULLS FIRST, isnull(i_class_id#33) ASC NULLS FIRST, coalesce(i_category_id#34, 0) ASC NULLS FIRST, isnull(i_category_id#34) ASC NULLS FIRST], false, 0 (54) SortMergeJoin [codegen id : 18] -Left keys [6]: [coalesce(brand_id#30, 0), isnull(brand_id#30), coalesce(class_id#31, 0), isnull(class_id#31), coalesce(category_id#32, 0), isnull(category_id#32)] -Right keys [6]: [coalesce(i_brand_id#39, 0), isnull(i_brand_id#39), coalesce(i_class_id#40, 0), isnull(i_class_id#40), coalesce(i_category_id#41, 0), isnull(i_category_id#41)] +Left keys [6]: [coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27)] +Right keys [6]: [coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34)] Join condition: None (55) BroadcastExchange -Input [3]: [brand_id#30, class_id#31, category_id#32] -Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#43] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=9] (56) BroadcastHashJoin [codegen id : 19] -Left keys [3]: [i_brand_id#8, i_class_id#9, i_category_id#10] -Right keys [3]: [brand_id#30, class_id#31, category_id#32] +Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Right keys [3]: [brand_id#25, class_id#26, category_id#27] Join condition: None (57) Project [codegen id : 19] -Output [1]: [i_item_sk#7 AS ss_item_sk#44] -Input [7]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, brand_id#30, class_id#31, category_id#32] +Output [1]: [i_item_sk#6 AS ss_item_sk#35] +Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#25, class_id#26, category_id#27] (58) Exchange -Input [1]: [ss_item_sk#44] -Arguments: hashpartitioning(ss_item_sk#44, 5), ENSURE_REQUIREMENTS, [id=#45] +Input [1]: [ss_item_sk#35] +Arguments: hashpartitioning(ss_item_sk#35, 5), ENSURE_REQUIREMENTS, [plan_id=10] (59) Sort [codegen id : 20] -Input [1]: [ss_item_sk#44] -Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0 +Input [1]: [ss_item_sk#35] +Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0 (60) SortMergeJoin [codegen id : 43] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [ss_item_sk#44] +Right keys [1]: [ss_item_sk#35] Join condition: None (61) ReusedExchange [Reuses operator id: 123] -Output [1]: [d_date_sk#46] +Output [1]: [d_date_sk#36] (62) BroadcastHashJoin [codegen id : 43] Left keys [1]: [ss_sold_date_sk#4] -Right keys [1]: [d_date_sk#46] +Right keys [1]: [d_date_sk#36] Join condition: None (63) Project [codegen id : 43] Output [3]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3] -Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#46] +Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#36] (64) Scan parquet default.item -Output [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] +Output [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] ReadSchema: struct (65) ColumnarToRow [codegen id : 22] -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] (66) Filter [codegen id : 22] -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] -Condition : (((isnotnull(i_item_sk#47) AND isnotnull(i_brand_id#48)) AND isnotnull(i_class_id#49)) AND isnotnull(i_category_id#50)) +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] +Condition : (((isnotnull(i_item_sk#37) AND isnotnull(i_brand_id#38)) AND isnotnull(i_class_id#39)) AND isnotnull(i_category_id#40)) (67) Exchange -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] -Arguments: hashpartitioning(i_item_sk#47, 5), ENSURE_REQUIREMENTS, [id=#51] +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] +Arguments: hashpartitioning(i_item_sk#37, 5), ENSURE_REQUIREMENTS, [plan_id=11] (68) Sort [codegen id : 23] -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] -Arguments: [i_item_sk#47 ASC NULLS FIRST], false, 0 +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] +Arguments: [i_item_sk#37 ASC NULLS FIRST], false, 0 (69) ReusedExchange [Reuses operator id: 58] -Output [1]: [ss_item_sk#44] +Output [1]: [ss_item_sk#35] (70) Sort [codegen id : 41] -Input [1]: [ss_item_sk#44] -Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0 +Input [1]: [ss_item_sk#35] +Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0 (71) SortMergeJoin [codegen id : 42] -Left keys [1]: [i_item_sk#47] -Right keys [1]: [ss_item_sk#44] +Left keys [1]: [i_item_sk#37] +Right keys [1]: [ss_item_sk#35] Join condition: None (72) BroadcastExchange -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#52] +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] (73) BroadcastHashJoin [codegen id : 43] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#47] +Right keys [1]: [i_item_sk#37] Join condition: None (74) Project [codegen id : 43] -Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#48, i_class_id#49, i_category_id#50] -Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] +Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#38, i_class_id#39, i_category_id#40] +Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] (75) HashAggregate [codegen id : 43] -Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#48, i_class_id#49, i_category_id#50] -Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#38, i_class_id#39, i_category_id#40] +Keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40] Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#53, isEmpty#54, count#55] -Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58] +Aggregate Attributes [3]: [sum#41, isEmpty#42, count#43] +Results [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46] (76) Exchange -Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58] -Arguments: hashpartitioning(i_brand_id#48, i_class_id#49, i_category_id#50, 5), ENSURE_REQUIREMENTS, [id=#59] +Input [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46] +Arguments: hashpartitioning(i_brand_id#38, i_class_id#39, i_category_id#40, 5), ENSURE_REQUIREMENTS, [plan_id=13] (77) HashAggregate [codegen id : 88] -Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58] -Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] +Input [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46] +Keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40] Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#60, count(1)#61] -Results [6]: [store AS channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#60 AS sales#63, count(1)#61 AS number_sales#64] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47, count(1)#48] +Results [6]: [store AS channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47 AS sales#50, count(1)#48 AS number_sales#51] (78) Filter [codegen id : 88] -Input [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] -Condition : (isnotnull(sales#63) AND (cast(sales#63 as decimal(32,6)) > cast(Subquery scalar-subquery#65, [id=#66] as decimal(32,6)))) +Input [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sales#50, number_sales#51] +Condition : (isnotnull(sales#50) AND (cast(sales#50 as decimal(32,6)) > cast(Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) (79) Scan parquet default.store_sales -Output [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70] +Output [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#70), dynamicpruningexpression(ss_sold_date_sk#70 IN dynamicpruning#71)] +PartitionFilters: [isnotnull(ss_sold_date_sk#57), dynamicpruningexpression(ss_sold_date_sk#57 IN dynamicpruning#58)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (80) ColumnarToRow [codegen id : 44] -Input [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70] +Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57] (81) Filter [codegen id : 44] -Input [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70] -Condition : isnotnull(ss_item_sk#67) +Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57] +Condition : isnotnull(ss_item_sk#54) (82) Exchange -Input [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70] -Arguments: hashpartitioning(ss_item_sk#67, 5), ENSURE_REQUIREMENTS, [id=#72] +Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57] +Arguments: hashpartitioning(ss_item_sk#54, 5), ENSURE_REQUIREMENTS, [plan_id=14] (83) Sort [codegen id : 45] -Input [4]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70] -Arguments: [ss_item_sk#67 ASC NULLS FIRST], false, 0 +Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57] +Arguments: [ss_item_sk#54 ASC NULLS FIRST], false, 0 (84) ReusedExchange [Reuses operator id: 58] -Output [1]: [ss_item_sk#44] +Output [1]: [ss_item_sk#35] (85) Sort [codegen id : 63] -Input [1]: [ss_item_sk#44] -Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0 +Input [1]: [ss_item_sk#35] +Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0 (86) SortMergeJoin [codegen id : 86] -Left keys [1]: [ss_item_sk#67] -Right keys [1]: [ss_item_sk#44] +Left keys [1]: [ss_item_sk#54] +Right keys [1]: [ss_item_sk#35] Join condition: None (87) ReusedExchange [Reuses operator id: 137] -Output [1]: [d_date_sk#73] +Output [1]: [d_date_sk#59] (88) BroadcastHashJoin [codegen id : 86] -Left keys [1]: [ss_sold_date_sk#70] -Right keys [1]: [d_date_sk#73] +Left keys [1]: [ss_sold_date_sk#57] +Right keys [1]: [d_date_sk#59] Join condition: None (89) Project [codegen id : 86] -Output [3]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69] -Input [5]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, ss_sold_date_sk#70, d_date_sk#73] +Output [3]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56] +Input [5]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57, d_date_sk#59] (90) ReusedExchange [Reuses operator id: 72] -Output [4]: [i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77] +Output [4]: [i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63] (91) BroadcastHashJoin [codegen id : 86] -Left keys [1]: [ss_item_sk#67] -Right keys [1]: [i_item_sk#74] +Left keys [1]: [ss_item_sk#54] +Right keys [1]: [i_item_sk#60] Join condition: None (92) Project [codegen id : 86] -Output [5]: [ss_quantity#68, ss_list_price#69, i_brand_id#75, i_class_id#76, i_category_id#77] -Input [7]: [ss_item_sk#67, ss_quantity#68, ss_list_price#69, i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77] +Output [5]: [ss_quantity#55, ss_list_price#56, i_brand_id#61, i_class_id#62, i_category_id#63] +Input [7]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63] (93) HashAggregate [codegen id : 86] -Input [5]: [ss_quantity#68, ss_list_price#69, i_brand_id#75, i_class_id#76, i_category_id#77] -Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_list_price#69 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#78, isEmpty#79, count#80] -Results [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83] +Input [5]: [ss_quantity#55, ss_list_price#56, i_brand_id#61, i_class_id#62, i_category_id#63] +Keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] +Aggregate Attributes [3]: [sum#64, isEmpty#65, count#66] +Results [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#67, isEmpty#68, count#69] (94) Exchange -Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83] -Arguments: hashpartitioning(i_brand_id#75, i_class_id#76, i_category_id#77, 5), ENSURE_REQUIREMENTS, [id=#84] +Input [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#67, isEmpty#68, count#69] +Arguments: hashpartitioning(i_brand_id#61, i_class_id#62, i_category_id#63, 5), ENSURE_REQUIREMENTS, [plan_id=15] (95) HashAggregate [codegen id : 87] -Input [6]: [i_brand_id#75, i_class_id#76, i_category_id#77, sum#81, isEmpty#82, count#83] -Keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_list_price#69 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_list_price#69 as decimal(12,2)))), DecimalType(18,2)))#85, count(1)#86] -Results [6]: [store AS channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sum(CheckOverflow((promote_precision(cast(ss_quantity#68 as decimal(12,2))) * promote_precision(cast(ss_list_price#69 as decimal(12,2)))), DecimalType(18,2)))#85 AS sales#88, count(1)#86 AS number_sales#89] +Input [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#67, isEmpty#68, count#69] +Keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2))), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#70, count(1)#71] +Results [6]: [store AS channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#70 AS sales#73, count(1)#71 AS number_sales#74] (96) Filter [codegen id : 87] -Input [6]: [channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89] -Condition : (isnotnull(sales#88) AND (cast(sales#88 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#65, [id=#66] as decimal(32,6)))) +Input [6]: [channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sales#73, number_sales#74] +Condition : (isnotnull(sales#73) AND (cast(sales#73 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) (97) BroadcastExchange -Input [6]: [channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89] -Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#90] +Input [6]: [channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sales#73, number_sales#74] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [plan_id=16] (98) BroadcastHashJoin [codegen id : 88] -Left keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] -Right keys [3]: [i_brand_id#75, i_class_id#76, i_category_id#77] +Left keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40] +Right keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63] Join condition: None (99) TakeOrderedAndProject -Input [12]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89] -Arguments: 100, [i_brand_id#48 ASC NULLS FIRST, i_class_id#49 ASC NULLS FIRST, i_category_id#50 ASC NULLS FIRST], [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64, channel#87, i_brand_id#75, i_class_id#76, i_category_id#77, sales#88, number_sales#89] +Input [12]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sales#50, number_sales#51, channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sales#73, number_sales#74] +Arguments: 100, [i_brand_id#38 ASC NULLS FIRST, i_class_id#39 ASC NULLS FIRST, i_category_id#40 ASC NULLS FIRST], [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sales#50, number_sales#51, channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sales#73, number_sales#74] ===== Subqueries ===== -Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquery#65, [id=#66] +Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquery#52, [id=#53] * HashAggregate (118) +- Exchange (117) +- * HashAggregate (116) @@ -570,96 +570,96 @@ Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquer (100) Scan parquet default.store_sales -Output [3]: [ss_quantity#91, ss_list_price#92, ss_sold_date_sk#93] +Output [3]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#93), dynamicpruningexpression(ss_sold_date_sk#93 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(ss_sold_date_sk#77), dynamicpruningexpression(ss_sold_date_sk#77 IN dynamicpruning#12)] ReadSchema: struct (101) ColumnarToRow [codegen id : 2] -Input [3]: [ss_quantity#91, ss_list_price#92, ss_sold_date_sk#93] +Input [3]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77] (102) ReusedExchange [Reuses operator id: 132] -Output [1]: [d_date_sk#94] +Output [1]: [d_date_sk#78] (103) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#93] -Right keys [1]: [d_date_sk#94] +Left keys [1]: [ss_sold_date_sk#77] +Right keys [1]: [d_date_sk#78] Join condition: None (104) Project [codegen id : 2] -Output [2]: [ss_quantity#91 AS quantity#95, ss_list_price#92 AS list_price#96] -Input [4]: [ss_quantity#91, ss_list_price#92, ss_sold_date_sk#93, d_date_sk#94] +Output [2]: [ss_quantity#75 AS quantity#79, ss_list_price#76 AS list_price#80] +Input [4]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77, d_date_sk#78] (105) Scan parquet default.catalog_sales -Output [3]: [cs_quantity#97, cs_list_price#98, cs_sold_date_sk#99] +Output [3]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#99), dynamicpruningexpression(cs_sold_date_sk#99 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(cs_sold_date_sk#83), dynamicpruningexpression(cs_sold_date_sk#83 IN dynamicpruning#12)] ReadSchema: struct (106) ColumnarToRow [codegen id : 4] -Input [3]: [cs_quantity#97, cs_list_price#98, cs_sold_date_sk#99] +Input [3]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83] (107) ReusedExchange [Reuses operator id: 132] -Output [1]: [d_date_sk#100] +Output [1]: [d_date_sk#84] (108) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_sold_date_sk#99] -Right keys [1]: [d_date_sk#100] +Left keys [1]: [cs_sold_date_sk#83] +Right keys [1]: [d_date_sk#84] Join condition: None (109) Project [codegen id : 4] -Output [2]: [cs_quantity#97 AS quantity#101, cs_list_price#98 AS list_price#102] -Input [4]: [cs_quantity#97, cs_list_price#98, cs_sold_date_sk#99, d_date_sk#100] +Output [2]: [cs_quantity#81 AS quantity#85, cs_list_price#82 AS list_price#86] +Input [4]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83, d_date_sk#84] (110) Scan parquet default.web_sales -Output [3]: [ws_quantity#103, ws_list_price#104, ws_sold_date_sk#105] +Output [3]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#105), dynamicpruningexpression(ws_sold_date_sk#105 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(ws_sold_date_sk#89), dynamicpruningexpression(ws_sold_date_sk#89 IN dynamicpruning#12)] ReadSchema: struct (111) ColumnarToRow [codegen id : 6] -Input [3]: [ws_quantity#103, ws_list_price#104, ws_sold_date_sk#105] +Input [3]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89] (112) ReusedExchange [Reuses operator id: 132] -Output [1]: [d_date_sk#106] +Output [1]: [d_date_sk#90] (113) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#105] -Right keys [1]: [d_date_sk#106] +Left keys [1]: [ws_sold_date_sk#89] +Right keys [1]: [d_date_sk#90] Join condition: None (114) Project [codegen id : 6] -Output [2]: [ws_quantity#103 AS quantity#107, ws_list_price#104 AS list_price#108] -Input [4]: [ws_quantity#103, ws_list_price#104, ws_sold_date_sk#105, d_date_sk#106] +Output [2]: [ws_quantity#87 AS quantity#91, ws_list_price#88 AS list_price#92] +Input [4]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89, d_date_sk#90] (115) Union (116) HashAggregate [codegen id : 7] -Input [2]: [quantity#95, list_price#96] +Input [2]: [quantity#79, list_price#80] Keys: [] -Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#95 as decimal(12,2))) * promote_precision(cast(list_price#96 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#109, count#110] -Results [2]: [sum#111, count#112] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#93, count#94] +Results [2]: [sum#95, count#96] (117) Exchange -Input [2]: [sum#111, count#112] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#113] +Input [2]: [sum#95, count#96] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=17] (118) HashAggregate [codegen id : 8] -Input [2]: [sum#111, count#112] +Input [2]: [sum#95, count#96] Keys: [] -Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#95 as decimal(12,2))) * promote_precision(cast(list_price#96 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#95 as decimal(12,2))) * promote_precision(cast(list_price#96 as decimal(12,2)))), DecimalType(18,2)))#114] -Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#95 as decimal(12,2))) * promote_precision(cast(list_price#96 as decimal(12,2)))), DecimalType(18,2)))#114 AS average_sales#115] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))#97] +Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))#97 AS average_sales#98] -Subquery:2 Hosting operator id = 100 Hosting Expression = ss_sold_date_sk#93 IN dynamicpruning#13 +Subquery:2 Hosting operator id = 100 Hosting Expression = ss_sold_date_sk#77 IN dynamicpruning#12 -Subquery:3 Hosting operator id = 105 Hosting Expression = cs_sold_date_sk#99 IN dynamicpruning#13 +Subquery:3 Hosting operator id = 105 Hosting Expression = cs_sold_date_sk#83 IN dynamicpruning#12 -Subquery:4 Hosting operator id = 110 Hosting Expression = ws_sold_date_sk#105 IN dynamicpruning#13 +Subquery:4 Hosting operator id = 110 Hosting Expression = ws_sold_date_sk#89 IN dynamicpruning#12 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5 BroadcastExchange (123) @@ -670,28 +670,28 @@ BroadcastExchange (123) (119) Scan parquet default.date_dim -Output [2]: [d_date_sk#46, d_week_seq#116] +Output [2]: [d_date_sk#36, d_week_seq#99] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] ReadSchema: struct (120) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#46, d_week_seq#116] +Input [2]: [d_date_sk#36, d_week_seq#99] (121) Filter [codegen id : 1] -Input [2]: [d_date_sk#46, d_week_seq#116] -Condition : ((isnotnull(d_week_seq#116) AND (d_week_seq#116 = Subquery scalar-subquery#117, [id=#118])) AND isnotnull(d_date_sk#46)) +Input [2]: [d_date_sk#36, d_week_seq#99] +Condition : ((isnotnull(d_week_seq#99) AND (d_week_seq#99 = Subquery scalar-subquery#100, [id=#101])) AND isnotnull(d_date_sk#36)) (122) Project [codegen id : 1] -Output [1]: [d_date_sk#46] -Input [2]: [d_date_sk#46, d_week_seq#116] +Output [1]: [d_date_sk#36] +Input [2]: [d_date_sk#36, d_week_seq#99] (123) BroadcastExchange -Input [1]: [d_date_sk#46] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#119] +Input [1]: [d_date_sk#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=18] -Subquery:6 Hosting operator id = 121 Hosting Expression = Subquery scalar-subquery#117, [id=#118] +Subquery:6 Hosting operator id = 121 Hosting Expression = Subquery scalar-subquery#100, [id=#101] * Project (127) +- * Filter (126) +- * ColumnarToRow (125) @@ -699,24 +699,24 @@ Subquery:6 Hosting operator id = 121 Hosting Expression = Subquery scalar-subque (124) Scan parquet default.date_dim -Output [4]: [d_week_seq#120, d_year#121, d_moy#122, d_dom#123] +Output [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,16)] ReadSchema: struct (125) ColumnarToRow [codegen id : 1] -Input [4]: [d_week_seq#120, d_year#121, d_moy#122, d_dom#123] +Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105] (126) Filter [codegen id : 1] -Input [4]: [d_week_seq#120, d_year#121, d_moy#122, d_dom#123] -Condition : (((((isnotnull(d_year#121) AND isnotnull(d_moy#122)) AND isnotnull(d_dom#123)) AND (d_year#121 = 1999)) AND (d_moy#122 = 12)) AND (d_dom#123 = 16)) +Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105] +Condition : (((((isnotnull(d_year#103) AND isnotnull(d_moy#104)) AND isnotnull(d_dom#105)) AND (d_year#103 = 1999)) AND (d_moy#104 = 12)) AND (d_dom#105 = 16)) (127) Project [codegen id : 1] -Output [1]: [d_week_seq#120] -Input [4]: [d_week_seq#120, d_year#121, d_moy#122, d_dom#123] +Output [1]: [d_week_seq#102] +Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105] -Subquery:7 Hosting operator id = 9 Hosting Expression = ss_sold_date_sk#12 IN dynamicpruning#13 +Subquery:7 Hosting operator id = 9 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12 BroadcastExchange (132) +- * Project (131) +- * Filter (130) @@ -725,34 +725,34 @@ BroadcastExchange (132) (128) Scan parquet default.date_dim -Output [2]: [d_date_sk#14, d_year#124] +Output [2]: [d_date_sk#13, d_year#106] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (129) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#14, d_year#124] +Input [2]: [d_date_sk#13, d_year#106] (130) Filter [codegen id : 1] -Input [2]: [d_date_sk#14, d_year#124] -Condition : (((isnotnull(d_year#124) AND (d_year#124 >= 1998)) AND (d_year#124 <= 2000)) AND isnotnull(d_date_sk#14)) +Input [2]: [d_date_sk#13, d_year#106] +Condition : (((isnotnull(d_year#106) AND (d_year#106 >= 1998)) AND (d_year#106 <= 2000)) AND isnotnull(d_date_sk#13)) (131) Project [codegen id : 1] -Output [1]: [d_date_sk#14] -Input [2]: [d_date_sk#14, d_year#124] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_year#106] (132) BroadcastExchange -Input [1]: [d_date_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#125] +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=19] -Subquery:8 Hosting operator id = 20 Hosting Expression = cs_sold_date_sk#21 IN dynamicpruning#13 +Subquery:8 Hosting operator id = 20 Hosting Expression = cs_sold_date_sk#19 IN dynamicpruning#12 -Subquery:9 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#36 IN dynamicpruning#13 +Subquery:9 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#29 IN dynamicpruning#12 -Subquery:10 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#65, [id=#66] +Subquery:10 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53] -Subquery:11 Hosting operator id = 79 Hosting Expression = ss_sold_date_sk#70 IN dynamicpruning#71 +Subquery:11 Hosting operator id = 79 Hosting Expression = ss_sold_date_sk#57 IN dynamicpruning#58 BroadcastExchange (137) +- * Project (136) +- * Filter (135) @@ -761,28 +761,28 @@ BroadcastExchange (137) (133) Scan parquet default.date_dim -Output [2]: [d_date_sk#73, d_week_seq#126] +Output [2]: [d_date_sk#59, d_week_seq#107] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] ReadSchema: struct (134) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#73, d_week_seq#126] +Input [2]: [d_date_sk#59, d_week_seq#107] (135) Filter [codegen id : 1] -Input [2]: [d_date_sk#73, d_week_seq#126] -Condition : ((isnotnull(d_week_seq#126) AND (d_week_seq#126 = Subquery scalar-subquery#127, [id=#128])) AND isnotnull(d_date_sk#73)) +Input [2]: [d_date_sk#59, d_week_seq#107] +Condition : ((isnotnull(d_week_seq#107) AND (d_week_seq#107 = Subquery scalar-subquery#108, [id=#109])) AND isnotnull(d_date_sk#59)) (136) Project [codegen id : 1] -Output [1]: [d_date_sk#73] -Input [2]: [d_date_sk#73, d_week_seq#126] +Output [1]: [d_date_sk#59] +Input [2]: [d_date_sk#59, d_week_seq#107] (137) BroadcastExchange -Input [1]: [d_date_sk#73] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#129] +Input [1]: [d_date_sk#59] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=20] -Subquery:12 Hosting operator id = 135 Hosting Expression = Subquery scalar-subquery#127, [id=#128] +Subquery:12 Hosting operator id = 135 Hosting Expression = Subquery scalar-subquery#108, [id=#109] * Project (141) +- * Filter (140) +- * ColumnarToRow (139) @@ -790,21 +790,21 @@ Subquery:12 Hosting operator id = 135 Hosting Expression = Subquery scalar-subqu (138) Scan parquet default.date_dim -Output [4]: [d_week_seq#130, d_year#131, d_moy#132, d_dom#133] +Output [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1998), EqualTo(d_moy,12), EqualTo(d_dom,16)] ReadSchema: struct (139) ColumnarToRow [codegen id : 1] -Input [4]: [d_week_seq#130, d_year#131, d_moy#132, d_dom#133] +Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113] (140) Filter [codegen id : 1] -Input [4]: [d_week_seq#130, d_year#131, d_moy#132, d_dom#133] -Condition : (((((isnotnull(d_year#131) AND isnotnull(d_moy#132)) AND isnotnull(d_dom#133)) AND (d_year#131 = 1998)) AND (d_moy#132 = 12)) AND (d_dom#133 = 16)) +Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113] +Condition : (((((isnotnull(d_year#111) AND isnotnull(d_moy#112)) AND isnotnull(d_dom#113)) AND (d_year#111 = 1998)) AND (d_moy#112 = 12)) AND (d_dom#113 = 16)) (141) Project [codegen id : 1] -Output [1]: [d_week_seq#130] -Input [4]: [d_week_seq#130, d_year#131, d_moy#132, d_dom#133] +Output [1]: [d_week_seq#110] +Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt index 86bbc553e8c31..28a695ae202c9 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt @@ -174,7 +174,7 @@ Condition : isnotnull(i_item_sk#19) (19) BroadcastExchange Input [4]: [i_item_sk#19, i_brand_id#20, i_class_id#21, i_category_id#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (20) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_item_sk#17] @@ -186,20 +186,20 @@ Output [4]: [cs_sold_date_sk#18, i_brand_id#20, i_class_id#21, i_category_id#22] Input [6]: [cs_item_sk#17, cs_sold_date_sk#18, i_item_sk#19, i_brand_id#20, i_class_id#21, i_category_id#22] (22) ReusedExchange [Reuses operator id: 117] -Output [1]: [d_date_sk#24] +Output [1]: [d_date_sk#23] (23) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_sold_date_sk#18] -Right keys [1]: [d_date_sk#24] +Right keys [1]: [d_date_sk#23] Join condition: None (24) Project [codegen id : 3] Output [3]: [i_brand_id#20, i_class_id#21, i_category_id#22] -Input [5]: [cs_sold_date_sk#18, i_brand_id#20, i_class_id#21, i_category_id#22, d_date_sk#24] +Input [5]: [cs_sold_date_sk#18, i_brand_id#20, i_class_id#21, i_category_id#22, d_date_sk#23] (25) BroadcastExchange Input [3]: [i_brand_id#20, i_class_id#21, i_category_id#22] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#25] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=2] (26) BroadcastHashJoin [codegen id : 4] Left keys [6]: [coalesce(i_brand_id#14, 0), isnull(i_brand_id#14), coalesce(i_class_id#15, 0), isnull(i_class_id#15), coalesce(i_category_id#16, 0), isnull(i_category_id#16)] @@ -208,7 +208,7 @@ Join condition: None (27) BroadcastExchange Input [4]: [i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (28) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_item_sk#10] @@ -220,259 +220,259 @@ Output [4]: [ss_sold_date_sk#11, i_brand_id#14, i_class_id#15, i_category_id#16] Input [6]: [ss_item_sk#10, ss_sold_date_sk#11, i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16] (30) ReusedExchange [Reuses operator id: 117] -Output [1]: [d_date_sk#27] +Output [1]: [d_date_sk#24] (31) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_sold_date_sk#11] -Right keys [1]: [d_date_sk#27] +Right keys [1]: [d_date_sk#24] Join condition: None (32) Project [codegen id : 6] -Output [3]: [i_brand_id#14 AS brand_id#28, i_class_id#15 AS class_id#29, i_category_id#16 AS category_id#30] -Input [5]: [ss_sold_date_sk#11, i_brand_id#14, i_class_id#15, i_category_id#16, d_date_sk#27] +Output [3]: [i_brand_id#14 AS brand_id#25, i_class_id#15 AS class_id#26, i_category_id#16 AS category_id#27] +Input [5]: [ss_sold_date_sk#11, i_brand_id#14, i_class_id#15, i_category_id#16, d_date_sk#24] (33) HashAggregate [codegen id : 6] -Input [3]: [brand_id#28, class_id#29, category_id#30] -Keys [3]: [brand_id#28, class_id#29, category_id#30] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Keys [3]: [brand_id#25, class_id#26, category_id#27] Functions: [] Aggregate Attributes: [] -Results [3]: [brand_id#28, class_id#29, category_id#30] +Results [3]: [brand_id#25, class_id#26, category_id#27] (34) Exchange -Input [3]: [brand_id#28, class_id#29, category_id#30] -Arguments: hashpartitioning(brand_id#28, class_id#29, category_id#30, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: hashpartitioning(brand_id#25, class_id#26, category_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=4] (35) HashAggregate [codegen id : 10] -Input [3]: [brand_id#28, class_id#29, category_id#30] -Keys [3]: [brand_id#28, class_id#29, category_id#30] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Keys [3]: [brand_id#25, class_id#26, category_id#27] Functions: [] Aggregate Attributes: [] -Results [3]: [brand_id#28, class_id#29, category_id#30] +Results [3]: [brand_id#25, class_id#26, category_id#27] (36) Scan parquet default.web_sales -Output [2]: [ws_item_sk#32, ws_sold_date_sk#33] +Output [2]: [ws_item_sk#28, ws_sold_date_sk#29] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#33), dynamicpruningexpression(ws_sold_date_sk#33 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(ws_sold_date_sk#29), dynamicpruningexpression(ws_sold_date_sk#29 IN dynamicpruning#12)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (37) ColumnarToRow [codegen id : 9] -Input [2]: [ws_item_sk#32, ws_sold_date_sk#33] +Input [2]: [ws_item_sk#28, ws_sold_date_sk#29] (38) Filter [codegen id : 9] -Input [2]: [ws_item_sk#32, ws_sold_date_sk#33] -Condition : isnotnull(ws_item_sk#32) +Input [2]: [ws_item_sk#28, ws_sold_date_sk#29] +Condition : isnotnull(ws_item_sk#28) (39) ReusedExchange [Reuses operator id: 19] -Output [4]: [i_item_sk#34, i_brand_id#35, i_class_id#36, i_category_id#37] +Output [4]: [i_item_sk#30, i_brand_id#31, i_class_id#32, i_category_id#33] (40) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_item_sk#32] -Right keys [1]: [i_item_sk#34] +Left keys [1]: [ws_item_sk#28] +Right keys [1]: [i_item_sk#30] Join condition: None (41) Project [codegen id : 9] -Output [4]: [ws_sold_date_sk#33, i_brand_id#35, i_class_id#36, i_category_id#37] -Input [6]: [ws_item_sk#32, ws_sold_date_sk#33, i_item_sk#34, i_brand_id#35, i_class_id#36, i_category_id#37] +Output [4]: [ws_sold_date_sk#29, i_brand_id#31, i_class_id#32, i_category_id#33] +Input [6]: [ws_item_sk#28, ws_sold_date_sk#29, i_item_sk#30, i_brand_id#31, i_class_id#32, i_category_id#33] (42) ReusedExchange [Reuses operator id: 117] -Output [1]: [d_date_sk#38] +Output [1]: [d_date_sk#34] (43) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_sold_date_sk#33] -Right keys [1]: [d_date_sk#38] +Left keys [1]: [ws_sold_date_sk#29] +Right keys [1]: [d_date_sk#34] Join condition: None (44) Project [codegen id : 9] -Output [3]: [i_brand_id#35, i_class_id#36, i_category_id#37] -Input [5]: [ws_sold_date_sk#33, i_brand_id#35, i_class_id#36, i_category_id#37, d_date_sk#38] +Output [3]: [i_brand_id#31, i_class_id#32, i_category_id#33] +Input [5]: [ws_sold_date_sk#29, i_brand_id#31, i_class_id#32, i_category_id#33, d_date_sk#34] (45) BroadcastExchange -Input [3]: [i_brand_id#35, i_class_id#36, i_category_id#37] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#39] +Input [3]: [i_brand_id#31, i_class_id#32, i_category_id#33] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=5] (46) BroadcastHashJoin [codegen id : 10] -Left keys [6]: [coalesce(brand_id#28, 0), isnull(brand_id#28), coalesce(class_id#29, 0), isnull(class_id#29), coalesce(category_id#30, 0), isnull(category_id#30)] -Right keys [6]: [coalesce(i_brand_id#35, 0), isnull(i_brand_id#35), coalesce(i_class_id#36, 0), isnull(i_class_id#36), coalesce(i_category_id#37, 0), isnull(i_category_id#37)] +Left keys [6]: [coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27)] +Right keys [6]: [coalesce(i_brand_id#31, 0), isnull(i_brand_id#31), coalesce(i_class_id#32, 0), isnull(i_class_id#32), coalesce(i_category_id#33, 0), isnull(i_category_id#33)] Join condition: None (47) BroadcastExchange -Input [3]: [brand_id#28, class_id#29, category_id#30] -Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#40] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=6] (48) BroadcastHashJoin [codegen id : 11] Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] -Right keys [3]: [brand_id#28, class_id#29, category_id#30] +Right keys [3]: [brand_id#25, class_id#26, category_id#27] Join condition: None (49) Project [codegen id : 11] -Output [1]: [i_item_sk#6 AS ss_item_sk#41] -Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#28, class_id#29, category_id#30] +Output [1]: [i_item_sk#6 AS ss_item_sk#35] +Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#25, class_id#26, category_id#27] (50) BroadcastExchange -Input [1]: [ss_item_sk#41] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42] +Input [1]: [ss_item_sk#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] (51) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [ss_item_sk#41] +Right keys [1]: [ss_item_sk#35] Join condition: None (52) Scan parquet default.item -Output [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] +Output [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] ReadSchema: struct (53) ColumnarToRow [codegen id : 23] -Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] +Input [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] (54) Filter [codegen id : 23] -Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] -Condition : (((isnotnull(i_item_sk#43) AND isnotnull(i_brand_id#44)) AND isnotnull(i_class_id#45)) AND isnotnull(i_category_id#46)) +Input [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] +Condition : (((isnotnull(i_item_sk#36) AND isnotnull(i_brand_id#37)) AND isnotnull(i_class_id#38)) AND isnotnull(i_category_id#39)) (55) ReusedExchange [Reuses operator id: 50] -Output [1]: [ss_item_sk#41] +Output [1]: [ss_item_sk#35] (56) BroadcastHashJoin [codegen id : 23] -Left keys [1]: [i_item_sk#43] -Right keys [1]: [ss_item_sk#41] +Left keys [1]: [i_item_sk#36] +Right keys [1]: [ss_item_sk#35] Join condition: None (57) BroadcastExchange -Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#47] +Input [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] (58) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#43] +Right keys [1]: [i_item_sk#36] Join condition: None (59) Project [codegen id : 25] -Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_class_id#45, i_category_id#46] -Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] +Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#37, i_class_id#38, i_category_id#39] +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] (60) ReusedExchange [Reuses operator id: 108] -Output [1]: [d_date_sk#48] +Output [1]: [d_date_sk#40] (61) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_sold_date_sk#4] -Right keys [1]: [d_date_sk#48] +Right keys [1]: [d_date_sk#40] Join condition: None (62) Project [codegen id : 25] -Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46] -Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_class_id#45, i_category_id#46, d_date_sk#48] +Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#37, i_class_id#38, i_category_id#39] +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#37, i_class_id#38, i_category_id#39, d_date_sk#40] (63) HashAggregate [codegen id : 25] -Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46] -Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#37, i_class_id#38, i_category_id#39] +Keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39] Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#49, isEmpty#50, count#51] -Results [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54] +Aggregate Attributes [3]: [sum#41, isEmpty#42, count#43] +Results [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46] (64) Exchange -Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54] -Arguments: hashpartitioning(i_brand_id#44, i_class_id#45, i_category_id#46, 5), ENSURE_REQUIREMENTS, [id=#55] +Input [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46] +Arguments: hashpartitioning(i_brand_id#37, i_class_id#38, i_category_id#39, 5), ENSURE_REQUIREMENTS, [plan_id=9] (65) HashAggregate [codegen id : 52] -Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54] -Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46] +Input [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46] +Keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39] Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56, count(1)#57] -Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56 AS sales#59, count(1)#57 AS number_sales#60] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47, count(1)#48] +Results [6]: [store AS channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47 AS sales#50, count(1)#48 AS number_sales#51] (66) Filter [codegen id : 52] -Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60] -Condition : (isnotnull(sales#59) AND (cast(sales#59 as decimal(32,6)) > cast(Subquery scalar-subquery#61, [id=#62] as decimal(32,6)))) +Input [6]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sales#50, number_sales#51] +Condition : (isnotnull(sales#50) AND (cast(sales#50 as decimal(32,6)) > cast(Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) (67) Scan parquet default.store_sales -Output [4]: [ss_item_sk#63, ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66] +Output [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#66), dynamicpruningexpression(ss_sold_date_sk#66 IN dynamicpruning#67)] +PartitionFilters: [isnotnull(ss_sold_date_sk#57), dynamicpruningexpression(ss_sold_date_sk#57 IN dynamicpruning#58)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (68) ColumnarToRow [codegen id : 50] -Input [4]: [ss_item_sk#63, ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66] +Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57] (69) Filter [codegen id : 50] -Input [4]: [ss_item_sk#63, ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66] -Condition : isnotnull(ss_item_sk#63) +Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57] +Condition : isnotnull(ss_item_sk#54) (70) ReusedExchange [Reuses operator id: 50] -Output [1]: [ss_item_sk#41] +Output [1]: [ss_item_sk#35] (71) BroadcastHashJoin [codegen id : 50] -Left keys [1]: [ss_item_sk#63] -Right keys [1]: [ss_item_sk#41] +Left keys [1]: [ss_item_sk#54] +Right keys [1]: [ss_item_sk#35] Join condition: None (72) ReusedExchange [Reuses operator id: 57] -Output [4]: [i_item_sk#68, i_brand_id#69, i_class_id#70, i_category_id#71] +Output [4]: [i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62] (73) BroadcastHashJoin [codegen id : 50] -Left keys [1]: [ss_item_sk#63] -Right keys [1]: [i_item_sk#68] +Left keys [1]: [ss_item_sk#54] +Right keys [1]: [i_item_sk#59] Join condition: None (74) Project [codegen id : 50] -Output [6]: [ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66, i_brand_id#69, i_class_id#70, i_category_id#71] -Input [8]: [ss_item_sk#63, ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66, i_item_sk#68, i_brand_id#69, i_class_id#70, i_category_id#71] +Output [6]: [ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57, i_brand_id#60, i_class_id#61, i_category_id#62] +Input [8]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57, i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62] (75) ReusedExchange [Reuses operator id: 122] -Output [1]: [d_date_sk#72] +Output [1]: [d_date_sk#63] (76) BroadcastHashJoin [codegen id : 50] -Left keys [1]: [ss_sold_date_sk#66] -Right keys [1]: [d_date_sk#72] +Left keys [1]: [ss_sold_date_sk#57] +Right keys [1]: [d_date_sk#63] Join condition: None (77) Project [codegen id : 50] -Output [5]: [ss_quantity#64, ss_list_price#65, i_brand_id#69, i_class_id#70, i_category_id#71] -Input [7]: [ss_quantity#64, ss_list_price#65, ss_sold_date_sk#66, i_brand_id#69, i_class_id#70, i_category_id#71, d_date_sk#72] +Output [5]: [ss_quantity#55, ss_list_price#56, i_brand_id#60, i_class_id#61, i_category_id#62] +Input [7]: [ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57, i_brand_id#60, i_class_id#61, i_category_id#62, d_date_sk#63] (78) HashAggregate [codegen id : 50] -Input [5]: [ss_quantity#64, ss_list_price#65, i_brand_id#69, i_class_id#70, i_category_id#71] -Keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#73, isEmpty#74, count#75] -Results [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78] +Input [5]: [ss_quantity#55, ss_list_price#56, i_brand_id#60, i_class_id#61, i_category_id#62] +Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] +Aggregate Attributes [3]: [sum#64, isEmpty#65, count#66] +Results [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#67, isEmpty#68, count#69] (79) Exchange -Input [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78] -Arguments: hashpartitioning(i_brand_id#69, i_class_id#70, i_category_id#71, 5), ENSURE_REQUIREMENTS, [id=#79] +Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#67, isEmpty#68, count#69] +Arguments: hashpartitioning(i_brand_id#60, i_class_id#61, i_category_id#62, 5), ENSURE_REQUIREMENTS, [plan_id=10] (80) HashAggregate [codegen id : 51] -Input [6]: [i_brand_id#69, i_class_id#70, i_category_id#71, sum#76, isEmpty#77, count#78] -Keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#80, count(1)#81] -Results [6]: [store AS channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#80 AS sales#83, count(1)#81 AS number_sales#84] +Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#67, isEmpty#68, count#69] +Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2))), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#70, count(1)#71] +Results [6]: [store AS channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#70 AS sales#73, count(1)#71 AS number_sales#74] (81) Filter [codegen id : 51] -Input [6]: [channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84] -Condition : (isnotnull(sales#83) AND (cast(sales#83 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#61, [id=#62] as decimal(32,6)))) +Input [6]: [channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sales#73, number_sales#74] +Condition : (isnotnull(sales#73) AND (cast(sales#73 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) (82) BroadcastExchange -Input [6]: [channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84] -Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [id=#85] +Input [6]: [channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sales#73, number_sales#74] +Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [plan_id=11] (83) BroadcastHashJoin [codegen id : 52] -Left keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46] -Right keys [3]: [i_brand_id#69, i_class_id#70, i_category_id#71] +Left keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39] +Right keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62] Join condition: None (84) TakeOrderedAndProject -Input [12]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60, channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84] -Arguments: 100, [i_brand_id#44 ASC NULLS FIRST, i_class_id#45 ASC NULLS FIRST, i_category_id#46 ASC NULLS FIRST], [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60, channel#82, i_brand_id#69, i_class_id#70, i_category_id#71, sales#83, number_sales#84] +Input [12]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sales#50, number_sales#51, channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sales#73, number_sales#74] +Arguments: 100, [i_brand_id#37 ASC NULLS FIRST, i_class_id#38 ASC NULLS FIRST, i_category_id#39 ASC NULLS FIRST], [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sales#50, number_sales#51, channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sales#73, number_sales#74] ===== Subqueries ===== -Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquery#61, [id=#62] +Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquery#52, [id=#53] * HashAggregate (103) +- Exchange (102) +- * HashAggregate (101) @@ -495,96 +495,96 @@ Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquer (85) Scan parquet default.store_sales -Output [3]: [ss_quantity#86, ss_list_price#87, ss_sold_date_sk#88] +Output [3]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#88), dynamicpruningexpression(ss_sold_date_sk#88 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(ss_sold_date_sk#77), dynamicpruningexpression(ss_sold_date_sk#77 IN dynamicpruning#12)] ReadSchema: struct (86) ColumnarToRow [codegen id : 2] -Input [3]: [ss_quantity#86, ss_list_price#87, ss_sold_date_sk#88] +Input [3]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77] (87) ReusedExchange [Reuses operator id: 117] -Output [1]: [d_date_sk#89] +Output [1]: [d_date_sk#78] (88) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#88] -Right keys [1]: [d_date_sk#89] +Left keys [1]: [ss_sold_date_sk#77] +Right keys [1]: [d_date_sk#78] Join condition: None (89) Project [codegen id : 2] -Output [2]: [ss_quantity#86 AS quantity#90, ss_list_price#87 AS list_price#91] -Input [4]: [ss_quantity#86, ss_list_price#87, ss_sold_date_sk#88, d_date_sk#89] +Output [2]: [ss_quantity#75 AS quantity#79, ss_list_price#76 AS list_price#80] +Input [4]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77, d_date_sk#78] (90) Scan parquet default.catalog_sales -Output [3]: [cs_quantity#92, cs_list_price#93, cs_sold_date_sk#94] +Output [3]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#94), dynamicpruningexpression(cs_sold_date_sk#94 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(cs_sold_date_sk#83), dynamicpruningexpression(cs_sold_date_sk#83 IN dynamicpruning#12)] ReadSchema: struct (91) ColumnarToRow [codegen id : 4] -Input [3]: [cs_quantity#92, cs_list_price#93, cs_sold_date_sk#94] +Input [3]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83] (92) ReusedExchange [Reuses operator id: 117] -Output [1]: [d_date_sk#95] +Output [1]: [d_date_sk#84] (93) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_sold_date_sk#94] -Right keys [1]: [d_date_sk#95] +Left keys [1]: [cs_sold_date_sk#83] +Right keys [1]: [d_date_sk#84] Join condition: None (94) Project [codegen id : 4] -Output [2]: [cs_quantity#92 AS quantity#96, cs_list_price#93 AS list_price#97] -Input [4]: [cs_quantity#92, cs_list_price#93, cs_sold_date_sk#94, d_date_sk#95] +Output [2]: [cs_quantity#81 AS quantity#85, cs_list_price#82 AS list_price#86] +Input [4]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83, d_date_sk#84] (95) Scan parquet default.web_sales -Output [3]: [ws_quantity#98, ws_list_price#99, ws_sold_date_sk#100] +Output [3]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#100), dynamicpruningexpression(ws_sold_date_sk#100 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(ws_sold_date_sk#89), dynamicpruningexpression(ws_sold_date_sk#89 IN dynamicpruning#12)] ReadSchema: struct (96) ColumnarToRow [codegen id : 6] -Input [3]: [ws_quantity#98, ws_list_price#99, ws_sold_date_sk#100] +Input [3]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89] (97) ReusedExchange [Reuses operator id: 117] -Output [1]: [d_date_sk#101] +Output [1]: [d_date_sk#90] (98) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#100] -Right keys [1]: [d_date_sk#101] +Left keys [1]: [ws_sold_date_sk#89] +Right keys [1]: [d_date_sk#90] Join condition: None (99) Project [codegen id : 6] -Output [2]: [ws_quantity#98 AS quantity#102, ws_list_price#99 AS list_price#103] -Input [4]: [ws_quantity#98, ws_list_price#99, ws_sold_date_sk#100, d_date_sk#101] +Output [2]: [ws_quantity#87 AS quantity#91, ws_list_price#88 AS list_price#92] +Input [4]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89, d_date_sk#90] (100) Union (101) HashAggregate [codegen id : 7] -Input [2]: [quantity#90, list_price#91] +Input [2]: [quantity#79, list_price#80] Keys: [] -Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#104, count#105] -Results [2]: [sum#106, count#107] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#93, count#94] +Results [2]: [sum#95, count#96] (102) Exchange -Input [2]: [sum#106, count#107] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#108] +Input [2]: [sum#95, count#96] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12] (103) HashAggregate [codegen id : 8] -Input [2]: [sum#106, count#107] +Input [2]: [sum#95, count#96] Keys: [] -Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))#109] -Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#90 as decimal(12,2))) * promote_precision(cast(list_price#91 as decimal(12,2)))), DecimalType(18,2)))#109 AS average_sales#110] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))#97] +Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))#97 AS average_sales#98] -Subquery:2 Hosting operator id = 85 Hosting Expression = ss_sold_date_sk#88 IN dynamicpruning#12 +Subquery:2 Hosting operator id = 85 Hosting Expression = ss_sold_date_sk#77 IN dynamicpruning#12 -Subquery:3 Hosting operator id = 90 Hosting Expression = cs_sold_date_sk#94 IN dynamicpruning#12 +Subquery:3 Hosting operator id = 90 Hosting Expression = cs_sold_date_sk#83 IN dynamicpruning#12 -Subquery:4 Hosting operator id = 95 Hosting Expression = ws_sold_date_sk#100 IN dynamicpruning#12 +Subquery:4 Hosting operator id = 95 Hosting Expression = ws_sold_date_sk#89 IN dynamicpruning#12 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5 BroadcastExchange (108) @@ -595,28 +595,28 @@ BroadcastExchange (108) (104) Scan parquet default.date_dim -Output [2]: [d_date_sk#48, d_week_seq#111] +Output [2]: [d_date_sk#40, d_week_seq#99] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] ReadSchema: struct (105) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#48, d_week_seq#111] +Input [2]: [d_date_sk#40, d_week_seq#99] (106) Filter [codegen id : 1] -Input [2]: [d_date_sk#48, d_week_seq#111] -Condition : ((isnotnull(d_week_seq#111) AND (d_week_seq#111 = Subquery scalar-subquery#112, [id=#113])) AND isnotnull(d_date_sk#48)) +Input [2]: [d_date_sk#40, d_week_seq#99] +Condition : ((isnotnull(d_week_seq#99) AND (d_week_seq#99 = Subquery scalar-subquery#100, [id=#101])) AND isnotnull(d_date_sk#40)) (107) Project [codegen id : 1] -Output [1]: [d_date_sk#48] -Input [2]: [d_date_sk#48, d_week_seq#111] +Output [1]: [d_date_sk#40] +Input [2]: [d_date_sk#40, d_week_seq#99] (108) BroadcastExchange -Input [1]: [d_date_sk#48] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#114] +Input [1]: [d_date_sk#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=13] -Subquery:6 Hosting operator id = 106 Hosting Expression = Subquery scalar-subquery#112, [id=#113] +Subquery:6 Hosting operator id = 106 Hosting Expression = Subquery scalar-subquery#100, [id=#101] * Project (112) +- * Filter (111) +- * ColumnarToRow (110) @@ -624,22 +624,22 @@ Subquery:6 Hosting operator id = 106 Hosting Expression = Subquery scalar-subque (109) Scan parquet default.date_dim -Output [4]: [d_week_seq#115, d_year#116, d_moy#117, d_dom#118] +Output [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,16)] ReadSchema: struct (110) ColumnarToRow [codegen id : 1] -Input [4]: [d_week_seq#115, d_year#116, d_moy#117, d_dom#118] +Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105] (111) Filter [codegen id : 1] -Input [4]: [d_week_seq#115, d_year#116, d_moy#117, d_dom#118] -Condition : (((((isnotnull(d_year#116) AND isnotnull(d_moy#117)) AND isnotnull(d_dom#118)) AND (d_year#116 = 1999)) AND (d_moy#117 = 12)) AND (d_dom#118 = 16)) +Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105] +Condition : (((((isnotnull(d_year#103) AND isnotnull(d_moy#104)) AND isnotnull(d_dom#105)) AND (d_year#103 = 1999)) AND (d_moy#104 = 12)) AND (d_dom#105 = 16)) (112) Project [codegen id : 1] -Output [1]: [d_week_seq#115] -Input [4]: [d_week_seq#115, d_year#116, d_moy#117, d_dom#118] +Output [1]: [d_week_seq#102] +Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105] Subquery:7 Hosting operator id = 7 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12 BroadcastExchange (117) @@ -650,34 +650,34 @@ BroadcastExchange (117) (113) Scan parquet default.date_dim -Output [2]: [d_date_sk#27, d_year#119] +Output [2]: [d_date_sk#24, d_year#106] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (114) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#27, d_year#119] +Input [2]: [d_date_sk#24, d_year#106] (115) Filter [codegen id : 1] -Input [2]: [d_date_sk#27, d_year#119] -Condition : (((isnotnull(d_year#119) AND (d_year#119 >= 1998)) AND (d_year#119 <= 2000)) AND isnotnull(d_date_sk#27)) +Input [2]: [d_date_sk#24, d_year#106] +Condition : (((isnotnull(d_year#106) AND (d_year#106 >= 1998)) AND (d_year#106 <= 2000)) AND isnotnull(d_date_sk#24)) (116) Project [codegen id : 1] -Output [1]: [d_date_sk#27] -Input [2]: [d_date_sk#27, d_year#119] +Output [1]: [d_date_sk#24] +Input [2]: [d_date_sk#24, d_year#106] (117) BroadcastExchange -Input [1]: [d_date_sk#27] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#120] +Input [1]: [d_date_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14] Subquery:8 Hosting operator id = 13 Hosting Expression = cs_sold_date_sk#18 IN dynamicpruning#12 -Subquery:9 Hosting operator id = 36 Hosting Expression = ws_sold_date_sk#33 IN dynamicpruning#12 +Subquery:9 Hosting operator id = 36 Hosting Expression = ws_sold_date_sk#29 IN dynamicpruning#12 -Subquery:10 Hosting operator id = 81 Hosting Expression = ReusedSubquery Subquery scalar-subquery#61, [id=#62] +Subquery:10 Hosting operator id = 81 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53] -Subquery:11 Hosting operator id = 67 Hosting Expression = ss_sold_date_sk#66 IN dynamicpruning#67 +Subquery:11 Hosting operator id = 67 Hosting Expression = ss_sold_date_sk#57 IN dynamicpruning#58 BroadcastExchange (122) +- * Project (121) +- * Filter (120) @@ -686,28 +686,28 @@ BroadcastExchange (122) (118) Scan parquet default.date_dim -Output [2]: [d_date_sk#72, d_week_seq#121] +Output [2]: [d_date_sk#63, d_week_seq#107] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] ReadSchema: struct (119) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#72, d_week_seq#121] +Input [2]: [d_date_sk#63, d_week_seq#107] (120) Filter [codegen id : 1] -Input [2]: [d_date_sk#72, d_week_seq#121] -Condition : ((isnotnull(d_week_seq#121) AND (d_week_seq#121 = Subquery scalar-subquery#122, [id=#123])) AND isnotnull(d_date_sk#72)) +Input [2]: [d_date_sk#63, d_week_seq#107] +Condition : ((isnotnull(d_week_seq#107) AND (d_week_seq#107 = Subquery scalar-subquery#108, [id=#109])) AND isnotnull(d_date_sk#63)) (121) Project [codegen id : 1] -Output [1]: [d_date_sk#72] -Input [2]: [d_date_sk#72, d_week_seq#121] +Output [1]: [d_date_sk#63] +Input [2]: [d_date_sk#63, d_week_seq#107] (122) BroadcastExchange -Input [1]: [d_date_sk#72] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#124] +Input [1]: [d_date_sk#63] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=15] -Subquery:12 Hosting operator id = 120 Hosting Expression = Subquery scalar-subquery#122, [id=#123] +Subquery:12 Hosting operator id = 120 Hosting Expression = Subquery scalar-subquery#108, [id=#109] * Project (126) +- * Filter (125) +- * ColumnarToRow (124) @@ -715,21 +715,21 @@ Subquery:12 Hosting operator id = 120 Hosting Expression = Subquery scalar-subqu (123) Scan parquet default.date_dim -Output [4]: [d_week_seq#125, d_year#126, d_moy#127, d_dom#128] +Output [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1998), EqualTo(d_moy,12), EqualTo(d_dom,16)] ReadSchema: struct (124) ColumnarToRow [codegen id : 1] -Input [4]: [d_week_seq#125, d_year#126, d_moy#127, d_dom#128] +Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113] (125) Filter [codegen id : 1] -Input [4]: [d_week_seq#125, d_year#126, d_moy#127, d_dom#128] -Condition : (((((isnotnull(d_year#126) AND isnotnull(d_moy#127)) AND isnotnull(d_dom#128)) AND (d_year#126 = 1998)) AND (d_moy#127 = 12)) AND (d_dom#128 = 16)) +Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113] +Condition : (((((isnotnull(d_year#111) AND isnotnull(d_moy#112)) AND isnotnull(d_dom#113)) AND (d_year#111 = 1998)) AND (d_moy#112 = 12)) AND (d_dom#113 = 16)) (126) Project [codegen id : 1] -Output [1]: [d_week_seq#125] -Input [4]: [d_week_seq#125, d_year#126, d_moy#127, d_dom#128] +Output [1]: [d_week_seq#110] +Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt index 88d71316966c6..6777e024d930a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt @@ -161,661 +161,661 @@ Condition : isnotnull(ss_item_sk#1) (4) Exchange Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#6] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (6) Scan parquet default.item -Output [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10] +Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] ReadSchema: struct (7) ColumnarToRow [codegen id : 19] -Input [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10] +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] (8) Filter [codegen id : 19] -Input [4]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10] -Condition : ((isnotnull(i_brand_id#8) AND isnotnull(i_class_id#9)) AND isnotnull(i_category_id#10)) +Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9] +Condition : ((isnotnull(i_brand_id#7) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9)) (9) Scan parquet default.store_sales -Output [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Output [2]: [ss_item_sk#10, ss_sold_date_sk#11] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#12), dynamicpruningexpression(ss_sold_date_sk#12 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(ss_sold_date_sk#11), dynamicpruningexpression(ss_sold_date_sk#11 IN dynamicpruning#12)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (10) ColumnarToRow [codegen id : 11] -Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] +Input [2]: [ss_item_sk#10, ss_sold_date_sk#11] (11) Filter [codegen id : 11] -Input [2]: [ss_item_sk#11, ss_sold_date_sk#12] -Condition : isnotnull(ss_item_sk#11) +Input [2]: [ss_item_sk#10, ss_sold_date_sk#11] +Condition : isnotnull(ss_item_sk#10) (12) ReusedExchange [Reuses operator id: 177] -Output [1]: [d_date_sk#14] +Output [1]: [d_date_sk#13] (13) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ss_sold_date_sk#12] -Right keys [1]: [d_date_sk#14] +Left keys [1]: [ss_sold_date_sk#11] +Right keys [1]: [d_date_sk#13] Join condition: None (14) Project [codegen id : 11] -Output [1]: [ss_item_sk#11] -Input [3]: [ss_item_sk#11, ss_sold_date_sk#12, d_date_sk#14] +Output [1]: [ss_item_sk#10] +Input [3]: [ss_item_sk#10, ss_sold_date_sk#11, d_date_sk#13] (15) Scan parquet default.item -Output [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] +Output [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)] ReadSchema: struct (16) ColumnarToRow [codegen id : 4] -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] (17) Filter [codegen id : 4] -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] -Condition : (((isnotnull(i_item_sk#15) AND isnotnull(i_brand_id#16)) AND isnotnull(i_class_id#17)) AND isnotnull(i_category_id#18)) +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] +Condition : (((isnotnull(i_item_sk#14) AND isnotnull(i_brand_id#15)) AND isnotnull(i_class_id#16)) AND isnotnull(i_category_id#17)) (18) Exchange -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] -Arguments: hashpartitioning(coalesce(i_brand_id#16, 0), isnull(i_brand_id#16), coalesce(i_class_id#17, 0), isnull(i_class_id#17), coalesce(i_category_id#18, 0), isnull(i_category_id#18), 5), ENSURE_REQUIREMENTS, [id=#19] +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] +Arguments: hashpartitioning(coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17), 5), ENSURE_REQUIREMENTS, [plan_id=2] (19) Sort [codegen id : 5] -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] -Arguments: [coalesce(i_brand_id#16, 0) ASC NULLS FIRST, isnull(i_brand_id#16) ASC NULLS FIRST, coalesce(i_class_id#17, 0) ASC NULLS FIRST, isnull(i_class_id#17) ASC NULLS FIRST, coalesce(i_category_id#18, 0) ASC NULLS FIRST, isnull(i_category_id#18) ASC NULLS FIRST], false, 0 +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] +Arguments: [coalesce(i_brand_id#15, 0) ASC NULLS FIRST, isnull(i_brand_id#15) ASC NULLS FIRST, coalesce(i_class_id#16, 0) ASC NULLS FIRST, isnull(i_class_id#16) ASC NULLS FIRST, coalesce(i_category_id#17, 0) ASC NULLS FIRST, isnull(i_category_id#17) ASC NULLS FIRST], false, 0 (20) Scan parquet default.catalog_sales -Output [2]: [cs_item_sk#20, cs_sold_date_sk#21] +Output [2]: [cs_item_sk#18, cs_sold_date_sk#19] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#21), dynamicpruningexpression(cs_sold_date_sk#21 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(cs_sold_date_sk#19), dynamicpruningexpression(cs_sold_date_sk#19 IN dynamicpruning#12)] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 8] -Input [2]: [cs_item_sk#20, cs_sold_date_sk#21] +Input [2]: [cs_item_sk#18, cs_sold_date_sk#19] (22) Filter [codegen id : 8] -Input [2]: [cs_item_sk#20, cs_sold_date_sk#21] -Condition : isnotnull(cs_item_sk#20) +Input [2]: [cs_item_sk#18, cs_sold_date_sk#19] +Condition : isnotnull(cs_item_sk#18) (23) ReusedExchange [Reuses operator id: 177] -Output [1]: [d_date_sk#22] +Output [1]: [d_date_sk#20] (24) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_sold_date_sk#21] -Right keys [1]: [d_date_sk#22] +Left keys [1]: [cs_sold_date_sk#19] +Right keys [1]: [d_date_sk#20] Join condition: None (25) Project [codegen id : 8] -Output [1]: [cs_item_sk#20] -Input [3]: [cs_item_sk#20, cs_sold_date_sk#21, d_date_sk#22] +Output [1]: [cs_item_sk#18] +Input [3]: [cs_item_sk#18, cs_sold_date_sk#19, d_date_sk#20] (26) Scan parquet default.item -Output [4]: [i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] +Output [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 7] -Input [4]: [i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] +Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] (28) Filter [codegen id : 7] -Input [4]: [i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] -Condition : isnotnull(i_item_sk#23) +Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] +Condition : isnotnull(i_item_sk#21) (29) BroadcastExchange -Input [4]: [i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] +Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (30) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [cs_item_sk#20] -Right keys [1]: [i_item_sk#23] +Left keys [1]: [cs_item_sk#18] +Right keys [1]: [i_item_sk#21] Join condition: None (31) Project [codegen id : 8] -Output [3]: [i_brand_id#24, i_class_id#25, i_category_id#26] -Input [5]: [cs_item_sk#20, i_item_sk#23, i_brand_id#24, i_class_id#25, i_category_id#26] +Output [3]: [i_brand_id#22, i_class_id#23, i_category_id#24] +Input [5]: [cs_item_sk#18, i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24] (32) Exchange -Input [3]: [i_brand_id#24, i_class_id#25, i_category_id#26] -Arguments: hashpartitioning(coalesce(i_brand_id#24, 0), isnull(i_brand_id#24), coalesce(i_class_id#25, 0), isnull(i_class_id#25), coalesce(i_category_id#26, 0), isnull(i_category_id#26), 5), ENSURE_REQUIREMENTS, [id=#28] +Input [3]: [i_brand_id#22, i_class_id#23, i_category_id#24] +Arguments: hashpartitioning(coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24), 5), ENSURE_REQUIREMENTS, [plan_id=4] (33) Sort [codegen id : 9] -Input [3]: [i_brand_id#24, i_class_id#25, i_category_id#26] -Arguments: [coalesce(i_brand_id#24, 0) ASC NULLS FIRST, isnull(i_brand_id#24) ASC NULLS FIRST, coalesce(i_class_id#25, 0) ASC NULLS FIRST, isnull(i_class_id#25) ASC NULLS FIRST, coalesce(i_category_id#26, 0) ASC NULLS FIRST, isnull(i_category_id#26) ASC NULLS FIRST], false, 0 +Input [3]: [i_brand_id#22, i_class_id#23, i_category_id#24] +Arguments: [coalesce(i_brand_id#22, 0) ASC NULLS FIRST, isnull(i_brand_id#22) ASC NULLS FIRST, coalesce(i_class_id#23, 0) ASC NULLS FIRST, isnull(i_class_id#23) ASC NULLS FIRST, coalesce(i_category_id#24, 0) ASC NULLS FIRST, isnull(i_category_id#24) ASC NULLS FIRST], false, 0 (34) SortMergeJoin [codegen id : 10] -Left keys [6]: [coalesce(i_brand_id#16, 0), isnull(i_brand_id#16), coalesce(i_class_id#17, 0), isnull(i_class_id#17), coalesce(i_category_id#18, 0), isnull(i_category_id#18)] -Right keys [6]: [coalesce(i_brand_id#24, 0), isnull(i_brand_id#24), coalesce(i_class_id#25, 0), isnull(i_class_id#25), coalesce(i_category_id#26, 0), isnull(i_category_id#26)] +Left keys [6]: [coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17)] +Right keys [6]: [coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24)] Join condition: None (35) BroadcastExchange -Input [4]: [i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#29] +Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (36) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [ss_item_sk#11] -Right keys [1]: [i_item_sk#15] +Left keys [1]: [ss_item_sk#10] +Right keys [1]: [i_item_sk#14] Join condition: None (37) Project [codegen id : 11] -Output [3]: [i_brand_id#16 AS brand_id#30, i_class_id#17 AS class_id#31, i_category_id#18 AS category_id#32] -Input [5]: [ss_item_sk#11, i_item_sk#15, i_brand_id#16, i_class_id#17, i_category_id#18] +Output [3]: [i_brand_id#15 AS brand_id#25, i_class_id#16 AS class_id#26, i_category_id#17 AS category_id#27] +Input [5]: [ss_item_sk#10, i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17] (38) HashAggregate [codegen id : 11] -Input [3]: [brand_id#30, class_id#31, category_id#32] -Keys [3]: [brand_id#30, class_id#31, category_id#32] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Keys [3]: [brand_id#25, class_id#26, category_id#27] Functions: [] Aggregate Attributes: [] -Results [3]: [brand_id#30, class_id#31, category_id#32] +Results [3]: [brand_id#25, class_id#26, category_id#27] (39) Exchange -Input [3]: [brand_id#30, class_id#31, category_id#32] -Arguments: hashpartitioning(brand_id#30, class_id#31, category_id#32, 5), ENSURE_REQUIREMENTS, [id=#33] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: hashpartitioning(brand_id#25, class_id#26, category_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=6] (40) HashAggregate [codegen id : 12] -Input [3]: [brand_id#30, class_id#31, category_id#32] -Keys [3]: [brand_id#30, class_id#31, category_id#32] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Keys [3]: [brand_id#25, class_id#26, category_id#27] Functions: [] Aggregate Attributes: [] -Results [3]: [brand_id#30, class_id#31, category_id#32] +Results [3]: [brand_id#25, class_id#26, category_id#27] (41) Exchange -Input [3]: [brand_id#30, class_id#31, category_id#32] -Arguments: hashpartitioning(coalesce(brand_id#30, 0), isnull(brand_id#30), coalesce(class_id#31, 0), isnull(class_id#31), coalesce(category_id#32, 0), isnull(category_id#32), 5), ENSURE_REQUIREMENTS, [id=#34] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: hashpartitioning(coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27), 5), ENSURE_REQUIREMENTS, [plan_id=7] (42) Sort [codegen id : 13] -Input [3]: [brand_id#30, class_id#31, category_id#32] -Arguments: [coalesce(brand_id#30, 0) ASC NULLS FIRST, isnull(brand_id#30) ASC NULLS FIRST, coalesce(class_id#31, 0) ASC NULLS FIRST, isnull(class_id#31) ASC NULLS FIRST, coalesce(category_id#32, 0) ASC NULLS FIRST, isnull(category_id#32) ASC NULLS FIRST], false, 0 +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: [coalesce(brand_id#25, 0) ASC NULLS FIRST, isnull(brand_id#25) ASC NULLS FIRST, coalesce(class_id#26, 0) ASC NULLS FIRST, isnull(class_id#26) ASC NULLS FIRST, coalesce(category_id#27, 0) ASC NULLS FIRST, isnull(category_id#27) ASC NULLS FIRST], false, 0 (43) Scan parquet default.web_sales -Output [2]: [ws_item_sk#35, ws_sold_date_sk#36] +Output [2]: [ws_item_sk#28, ws_sold_date_sk#29] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#36), dynamicpruningexpression(ws_sold_date_sk#36 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(ws_sold_date_sk#29), dynamicpruningexpression(ws_sold_date_sk#29 IN dynamicpruning#12)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 16] -Input [2]: [ws_item_sk#35, ws_sold_date_sk#36] +Input [2]: [ws_item_sk#28, ws_sold_date_sk#29] (45) Filter [codegen id : 16] -Input [2]: [ws_item_sk#35, ws_sold_date_sk#36] -Condition : isnotnull(ws_item_sk#35) +Input [2]: [ws_item_sk#28, ws_sold_date_sk#29] +Condition : isnotnull(ws_item_sk#28) (46) ReusedExchange [Reuses operator id: 177] -Output [1]: [d_date_sk#37] +Output [1]: [d_date_sk#30] (47) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [ws_sold_date_sk#36] -Right keys [1]: [d_date_sk#37] +Left keys [1]: [ws_sold_date_sk#29] +Right keys [1]: [d_date_sk#30] Join condition: None (48) Project [codegen id : 16] -Output [1]: [ws_item_sk#35] -Input [3]: [ws_item_sk#35, ws_sold_date_sk#36, d_date_sk#37] +Output [1]: [ws_item_sk#28] +Input [3]: [ws_item_sk#28, ws_sold_date_sk#29, d_date_sk#30] (49) ReusedExchange [Reuses operator id: 29] -Output [4]: [i_item_sk#38, i_brand_id#39, i_class_id#40, i_category_id#41] +Output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] (50) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [ws_item_sk#35] -Right keys [1]: [i_item_sk#38] +Left keys [1]: [ws_item_sk#28] +Right keys [1]: [i_item_sk#31] Join condition: None (51) Project [codegen id : 16] -Output [3]: [i_brand_id#39, i_class_id#40, i_category_id#41] -Input [5]: [ws_item_sk#35, i_item_sk#38, i_brand_id#39, i_class_id#40, i_category_id#41] +Output [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Input [5]: [ws_item_sk#28, i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34] (52) Exchange -Input [3]: [i_brand_id#39, i_class_id#40, i_category_id#41] -Arguments: hashpartitioning(coalesce(i_brand_id#39, 0), isnull(i_brand_id#39), coalesce(i_class_id#40, 0), isnull(i_class_id#40), coalesce(i_category_id#41, 0), isnull(i_category_id#41), 5), ENSURE_REQUIREMENTS, [id=#42] +Input [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: hashpartitioning(coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34), 5), ENSURE_REQUIREMENTS, [plan_id=8] (53) Sort [codegen id : 17] -Input [3]: [i_brand_id#39, i_class_id#40, i_category_id#41] -Arguments: [coalesce(i_brand_id#39, 0) ASC NULLS FIRST, isnull(i_brand_id#39) ASC NULLS FIRST, coalesce(i_class_id#40, 0) ASC NULLS FIRST, isnull(i_class_id#40) ASC NULLS FIRST, coalesce(i_category_id#41, 0) ASC NULLS FIRST, isnull(i_category_id#41) ASC NULLS FIRST], false, 0 +Input [3]: [i_brand_id#32, i_class_id#33, i_category_id#34] +Arguments: [coalesce(i_brand_id#32, 0) ASC NULLS FIRST, isnull(i_brand_id#32) ASC NULLS FIRST, coalesce(i_class_id#33, 0) ASC NULLS FIRST, isnull(i_class_id#33) ASC NULLS FIRST, coalesce(i_category_id#34, 0) ASC NULLS FIRST, isnull(i_category_id#34) ASC NULLS FIRST], false, 0 (54) SortMergeJoin [codegen id : 18] -Left keys [6]: [coalesce(brand_id#30, 0), isnull(brand_id#30), coalesce(class_id#31, 0), isnull(class_id#31), coalesce(category_id#32, 0), isnull(category_id#32)] -Right keys [6]: [coalesce(i_brand_id#39, 0), isnull(i_brand_id#39), coalesce(i_class_id#40, 0), isnull(i_class_id#40), coalesce(i_category_id#41, 0), isnull(i_category_id#41)] +Left keys [6]: [coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27)] +Right keys [6]: [coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34)] Join condition: None (55) BroadcastExchange -Input [3]: [brand_id#30, class_id#31, category_id#32] -Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#43] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=9] (56) BroadcastHashJoin [codegen id : 19] -Left keys [3]: [i_brand_id#8, i_class_id#9, i_category_id#10] -Right keys [3]: [brand_id#30, class_id#31, category_id#32] +Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] +Right keys [3]: [brand_id#25, class_id#26, category_id#27] Join condition: None (57) Project [codegen id : 19] -Output [1]: [i_item_sk#7 AS ss_item_sk#44] -Input [7]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, brand_id#30, class_id#31, category_id#32] +Output [1]: [i_item_sk#6 AS ss_item_sk#35] +Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#25, class_id#26, category_id#27] (58) Exchange -Input [1]: [ss_item_sk#44] -Arguments: hashpartitioning(ss_item_sk#44, 5), ENSURE_REQUIREMENTS, [id=#45] +Input [1]: [ss_item_sk#35] +Arguments: hashpartitioning(ss_item_sk#35, 5), ENSURE_REQUIREMENTS, [plan_id=10] (59) Sort [codegen id : 20] -Input [1]: [ss_item_sk#44] -Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0 +Input [1]: [ss_item_sk#35] +Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0 (60) SortMergeJoin [codegen id : 43] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [ss_item_sk#44] +Right keys [1]: [ss_item_sk#35] Join condition: None (61) ReusedExchange [Reuses operator id: 172] -Output [1]: [d_date_sk#46] +Output [1]: [d_date_sk#36] (62) BroadcastHashJoin [codegen id : 43] Left keys [1]: [ss_sold_date_sk#4] -Right keys [1]: [d_date_sk#46] +Right keys [1]: [d_date_sk#36] Join condition: None (63) Project [codegen id : 43] Output [3]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3] -Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#46] +Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#36] (64) Scan parquet default.item -Output [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] +Output [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (65) ColumnarToRow [codegen id : 22] -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] (66) Filter [codegen id : 22] -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] -Condition : isnotnull(i_item_sk#47) +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] +Condition : isnotnull(i_item_sk#37) (67) Exchange -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] -Arguments: hashpartitioning(i_item_sk#47, 5), ENSURE_REQUIREMENTS, [id=#51] +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] +Arguments: hashpartitioning(i_item_sk#37, 5), ENSURE_REQUIREMENTS, [plan_id=11] (68) Sort [codegen id : 23] -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] -Arguments: [i_item_sk#47 ASC NULLS FIRST], false, 0 +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] +Arguments: [i_item_sk#37 ASC NULLS FIRST], false, 0 (69) ReusedExchange [Reuses operator id: 58] -Output [1]: [ss_item_sk#44] +Output [1]: [ss_item_sk#35] (70) Sort [codegen id : 41] -Input [1]: [ss_item_sk#44] -Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0 +Input [1]: [ss_item_sk#35] +Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0 (71) SortMergeJoin [codegen id : 42] -Left keys [1]: [i_item_sk#47] -Right keys [1]: [ss_item_sk#44] +Left keys [1]: [i_item_sk#37] +Right keys [1]: [ss_item_sk#35] Join condition: None (72) BroadcastExchange -Input [4]: [i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#52] +Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] (73) BroadcastHashJoin [codegen id : 43] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#47] +Right keys [1]: [i_item_sk#37] Join condition: None (74) Project [codegen id : 43] -Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#48, i_class_id#49, i_category_id#50] -Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#47, i_brand_id#48, i_class_id#49, i_category_id#50] +Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#38, i_class_id#39, i_category_id#40] +Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40] (75) HashAggregate [codegen id : 43] -Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#48, i_class_id#49, i_category_id#50] -Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#38, i_class_id#39, i_category_id#40] +Keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40] Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#53, isEmpty#54, count#55] -Results [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58] +Aggregate Attributes [3]: [sum#41, isEmpty#42, count#43] +Results [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46] (76) Exchange -Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58] -Arguments: hashpartitioning(i_brand_id#48, i_class_id#49, i_category_id#50, 5), ENSURE_REQUIREMENTS, [id=#59] +Input [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46] +Arguments: hashpartitioning(i_brand_id#38, i_class_id#39, i_category_id#40, 5), ENSURE_REQUIREMENTS, [plan_id=13] (77) HashAggregate [codegen id : 44] -Input [6]: [i_brand_id#48, i_class_id#49, i_category_id#50, sum#56, isEmpty#57, count#58] -Keys [3]: [i_brand_id#48, i_class_id#49, i_category_id#50] +Input [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46] +Keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40] Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#60, count(1)#61] -Results [6]: [store AS channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#60 AS sales#63, count(1)#61 AS number_sales#64] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47, count(1)#48] +Results [6]: [store AS channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47 AS sales#50, count(1)#48 AS number_sales#51] (78) Filter [codegen id : 44] -Input [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] -Condition : (isnotnull(sales#63) AND (cast(sales#63 as decimal(32,6)) > cast(Subquery scalar-subquery#65, [id=#66] as decimal(32,6)))) +Input [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sales#50, number_sales#51] +Condition : (isnotnull(sales#50) AND (cast(sales#50 as decimal(32,6)) > cast(Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) (79) Scan parquet default.catalog_sales -Output [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70] +Output [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#70), dynamicpruningexpression(cs_sold_date_sk#70 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#57), dynamicpruningexpression(cs_sold_date_sk#57 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (80) ColumnarToRow [codegen id : 45] -Input [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70] +Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] (81) Filter [codegen id : 45] -Input [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70] -Condition : isnotnull(cs_item_sk#67) +Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] +Condition : isnotnull(cs_item_sk#54) (82) Exchange -Input [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70] -Arguments: hashpartitioning(cs_item_sk#67, 5), ENSURE_REQUIREMENTS, [id=#71] +Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] +Arguments: hashpartitioning(cs_item_sk#54, 5), ENSURE_REQUIREMENTS, [plan_id=14] (83) Sort [codegen id : 46] -Input [4]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70] -Arguments: [cs_item_sk#67 ASC NULLS FIRST], false, 0 +Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] +Arguments: [cs_item_sk#54 ASC NULLS FIRST], false, 0 (84) ReusedExchange [Reuses operator id: 58] -Output [1]: [ss_item_sk#44] +Output [1]: [ss_item_sk#35] (85) Sort [codegen id : 64] -Input [1]: [ss_item_sk#44] -Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0 +Input [1]: [ss_item_sk#35] +Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0 (86) SortMergeJoin [codegen id : 87] -Left keys [1]: [cs_item_sk#67] -Right keys [1]: [ss_item_sk#44] +Left keys [1]: [cs_item_sk#54] +Right keys [1]: [ss_item_sk#35] Join condition: None (87) ReusedExchange [Reuses operator id: 172] -Output [1]: [d_date_sk#72] +Output [1]: [d_date_sk#58] (88) BroadcastHashJoin [codegen id : 87] -Left keys [1]: [cs_sold_date_sk#70] -Right keys [1]: [d_date_sk#72] +Left keys [1]: [cs_sold_date_sk#57] +Right keys [1]: [d_date_sk#58] Join condition: None (89) Project [codegen id : 87] -Output [3]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69] -Input [5]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, cs_sold_date_sk#70, d_date_sk#72] +Output [3]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56] +Input [5]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57, d_date_sk#58] (90) ReusedExchange [Reuses operator id: 72] -Output [4]: [i_item_sk#73, i_brand_id#74, i_class_id#75, i_category_id#76] +Output [4]: [i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62] (91) BroadcastHashJoin [codegen id : 87] -Left keys [1]: [cs_item_sk#67] -Right keys [1]: [i_item_sk#73] +Left keys [1]: [cs_item_sk#54] +Right keys [1]: [i_item_sk#59] Join condition: None (92) Project [codegen id : 87] -Output [5]: [cs_quantity#68, cs_list_price#69, i_brand_id#74, i_class_id#75, i_category_id#76] -Input [7]: [cs_item_sk#67, cs_quantity#68, cs_list_price#69, i_item_sk#73, i_brand_id#74, i_class_id#75, i_category_id#76] +Output [5]: [cs_quantity#55, cs_list_price#56, i_brand_id#60, i_class_id#61, i_category_id#62] +Input [7]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62] (93) HashAggregate [codegen id : 87] -Input [5]: [cs_quantity#68, cs_list_price#69, i_brand_id#74, i_class_id#75, i_category_id#76] -Keys [3]: [i_brand_id#74, i_class_id#75, i_category_id#76] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#68 as decimal(12,2))) * promote_precision(cast(cs_list_price#69 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#77, isEmpty#78, count#79] -Results [6]: [i_brand_id#74, i_class_id#75, i_category_id#76, sum#80, isEmpty#81, count#82] +Input [5]: [cs_quantity#55, cs_list_price#56, i_brand_id#60, i_class_id#61, i_category_id#62] +Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] +Aggregate Attributes [3]: [sum#63, isEmpty#64, count#65] +Results [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#66, isEmpty#67, count#68] (94) Exchange -Input [6]: [i_brand_id#74, i_class_id#75, i_category_id#76, sum#80, isEmpty#81, count#82] -Arguments: hashpartitioning(i_brand_id#74, i_class_id#75, i_category_id#76, 5), ENSURE_REQUIREMENTS, [id=#83] +Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#66, isEmpty#67, count#68] +Arguments: hashpartitioning(i_brand_id#60, i_class_id#61, i_category_id#62, 5), ENSURE_REQUIREMENTS, [plan_id=15] (95) HashAggregate [codegen id : 88] -Input [6]: [i_brand_id#74, i_class_id#75, i_category_id#76, sum#80, isEmpty#81, count#82] -Keys [3]: [i_brand_id#74, i_class_id#75, i_category_id#76] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#68 as decimal(12,2))) * promote_precision(cast(cs_list_price#69 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#68 as decimal(12,2))) * promote_precision(cast(cs_list_price#69 as decimal(12,2)))), DecimalType(18,2)))#84, count(1)#85] -Results [6]: [catalog AS channel#86, i_brand_id#74, i_class_id#75, i_category_id#76, sum(CheckOverflow((promote_precision(cast(cs_quantity#68 as decimal(12,2))) * promote_precision(cast(cs_list_price#69 as decimal(12,2)))), DecimalType(18,2)))#84 AS sales#87, count(1)#85 AS number_sales#88] +Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#66, isEmpty#67, count#68] +Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2))), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#69, count(1)#70] +Results [6]: [catalog AS channel#71, i_brand_id#60, i_class_id#61, i_category_id#62, sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#69 AS sales#72, count(1)#70 AS number_sales#73] (96) Filter [codegen id : 88] -Input [6]: [channel#86, i_brand_id#74, i_class_id#75, i_category_id#76, sales#87, number_sales#88] -Condition : (isnotnull(sales#87) AND (cast(sales#87 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#65, [id=#66] as decimal(32,6)))) +Input [6]: [channel#71, i_brand_id#60, i_class_id#61, i_category_id#62, sales#72, number_sales#73] +Condition : (isnotnull(sales#72) AND (cast(sales#72 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) (97) Scan parquet default.web_sales -Output [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92] +Output [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#92), dynamicpruningexpression(ws_sold_date_sk#92 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#77), dynamicpruningexpression(ws_sold_date_sk#77 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (98) ColumnarToRow [codegen id : 89] -Input [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92] +Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] (99) Filter [codegen id : 89] -Input [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92] -Condition : isnotnull(ws_item_sk#89) +Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] +Condition : isnotnull(ws_item_sk#74) (100) Exchange -Input [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92] -Arguments: hashpartitioning(ws_item_sk#89, 5), ENSURE_REQUIREMENTS, [id=#93] +Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] +Arguments: hashpartitioning(ws_item_sk#74, 5), ENSURE_REQUIREMENTS, [plan_id=16] (101) Sort [codegen id : 90] -Input [4]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92] -Arguments: [ws_item_sk#89 ASC NULLS FIRST], false, 0 +Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] +Arguments: [ws_item_sk#74 ASC NULLS FIRST], false, 0 (102) ReusedExchange [Reuses operator id: 58] -Output [1]: [ss_item_sk#44] +Output [1]: [ss_item_sk#35] (103) Sort [codegen id : 108] -Input [1]: [ss_item_sk#44] -Arguments: [ss_item_sk#44 ASC NULLS FIRST], false, 0 +Input [1]: [ss_item_sk#35] +Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0 (104) SortMergeJoin [codegen id : 131] -Left keys [1]: [ws_item_sk#89] -Right keys [1]: [ss_item_sk#44] +Left keys [1]: [ws_item_sk#74] +Right keys [1]: [ss_item_sk#35] Join condition: None (105) ReusedExchange [Reuses operator id: 172] -Output [1]: [d_date_sk#94] +Output [1]: [d_date_sk#78] (106) BroadcastHashJoin [codegen id : 131] -Left keys [1]: [ws_sold_date_sk#92] -Right keys [1]: [d_date_sk#94] +Left keys [1]: [ws_sold_date_sk#77] +Right keys [1]: [d_date_sk#78] Join condition: None (107) Project [codegen id : 131] -Output [3]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91] -Input [5]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, ws_sold_date_sk#92, d_date_sk#94] +Output [3]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76] +Input [5]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77, d_date_sk#78] (108) ReusedExchange [Reuses operator id: 72] -Output [4]: [i_item_sk#95, i_brand_id#96, i_class_id#97, i_category_id#98] +Output [4]: [i_item_sk#79, i_brand_id#80, i_class_id#81, i_category_id#82] (109) BroadcastHashJoin [codegen id : 131] -Left keys [1]: [ws_item_sk#89] -Right keys [1]: [i_item_sk#95] +Left keys [1]: [ws_item_sk#74] +Right keys [1]: [i_item_sk#79] Join condition: None (110) Project [codegen id : 131] -Output [5]: [ws_quantity#90, ws_list_price#91, i_brand_id#96, i_class_id#97, i_category_id#98] -Input [7]: [ws_item_sk#89, ws_quantity#90, ws_list_price#91, i_item_sk#95, i_brand_id#96, i_class_id#97, i_category_id#98] +Output [5]: [ws_quantity#75, ws_list_price#76, i_brand_id#80, i_class_id#81, i_category_id#82] +Input [7]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, i_item_sk#79, i_brand_id#80, i_class_id#81, i_category_id#82] (111) HashAggregate [codegen id : 131] -Input [5]: [ws_quantity#90, ws_list_price#91, i_brand_id#96, i_class_id#97, i_category_id#98] -Keys [3]: [i_brand_id#96, i_class_id#97, i_category_id#98] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#90 as decimal(12,2))) * promote_precision(cast(ws_list_price#91 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#99, isEmpty#100, count#101] -Results [6]: [i_brand_id#96, i_class_id#97, i_category_id#98, sum#102, isEmpty#103, count#104] +Input [5]: [ws_quantity#75, ws_list_price#76, i_brand_id#80, i_class_id#81, i_category_id#82] +Keys [3]: [i_brand_id#80, i_class_id#81, i_category_id#82] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] +Aggregate Attributes [3]: [sum#83, isEmpty#84, count#85] +Results [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#86, isEmpty#87, count#88] (112) Exchange -Input [6]: [i_brand_id#96, i_class_id#97, i_category_id#98, sum#102, isEmpty#103, count#104] -Arguments: hashpartitioning(i_brand_id#96, i_class_id#97, i_category_id#98, 5), ENSURE_REQUIREMENTS, [id=#105] +Input [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#86, isEmpty#87, count#88] +Arguments: hashpartitioning(i_brand_id#80, i_class_id#81, i_category_id#82, 5), ENSURE_REQUIREMENTS, [plan_id=17] (113) HashAggregate [codegen id : 132] -Input [6]: [i_brand_id#96, i_class_id#97, i_category_id#98, sum#102, isEmpty#103, count#104] -Keys [3]: [i_brand_id#96, i_class_id#97, i_category_id#98] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#90 as decimal(12,2))) * promote_precision(cast(ws_list_price#91 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#90 as decimal(12,2))) * promote_precision(cast(ws_list_price#91 as decimal(12,2)))), DecimalType(18,2)))#106, count(1)#107] -Results [6]: [web AS channel#108, i_brand_id#96, i_class_id#97, i_category_id#98, sum(CheckOverflow((promote_precision(cast(ws_quantity#90 as decimal(12,2))) * promote_precision(cast(ws_list_price#91 as decimal(12,2)))), DecimalType(18,2)))#106 AS sales#109, count(1)#107 AS number_sales#110] +Input [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#86, isEmpty#87, count#88] +Keys [3]: [i_brand_id#80, i_class_id#81, i_category_id#82] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2))), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2)))#89, count(1)#90] +Results [6]: [web AS channel#91, i_brand_id#80, i_class_id#81, i_category_id#82, sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2)))#89 AS sales#92, count(1)#90 AS number_sales#93] (114) Filter [codegen id : 132] -Input [6]: [channel#108, i_brand_id#96, i_class_id#97, i_category_id#98, sales#109, number_sales#110] -Condition : (isnotnull(sales#109) AND (cast(sales#109 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#65, [id=#66] as decimal(32,6)))) +Input [6]: [channel#91, i_brand_id#80, i_class_id#81, i_category_id#82, sales#92, number_sales#93] +Condition : (isnotnull(sales#92) AND (cast(sales#92 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) (115) Union (116) HashAggregate [codegen id : 133] -Input [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sales#63, number_sales#64] -Keys [4]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50] -Functions [2]: [partial_sum(sales#63), partial_sum(number_sales#64)] -Aggregate Attributes [3]: [sum#111, isEmpty#112, sum#113] -Results [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116] +Input [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sales#50, number_sales#51] +Keys [4]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40] +Functions [2]: [partial_sum(sales#50), partial_sum(number_sales#51)] +Aggregate Attributes [3]: [sum#94, isEmpty#95, sum#96] +Results [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99] (117) Exchange -Input [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116] -Arguments: hashpartitioning(channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, 5), ENSURE_REQUIREMENTS, [id=#117] +Input [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99] +Arguments: hashpartitioning(channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, 5), ENSURE_REQUIREMENTS, [plan_id=18] (118) HashAggregate [codegen id : 134] -Input [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116] -Keys [4]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50] -Functions [2]: [sum(sales#63), sum(number_sales#64)] -Aggregate Attributes [2]: [sum(sales#63)#118, sum(number_sales#64)#119] -Results [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum(sales#63)#118 AS sum_sales#120, sum(number_sales#64)#119 AS number_sales#121] +Input [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99] +Keys [4]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40] +Functions [2]: [sum(sales#50), sum(number_sales#51)] +Aggregate Attributes [2]: [sum(sales#50)#100, sum(number_sales#51)#101] +Results [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum(sales#50)#100 AS sum_sales#102, sum(number_sales#51)#101 AS number_sales#103] (119) ReusedExchange [Reuses operator id: 117] -Output [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116] +Output [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99] (120) HashAggregate [codegen id : 268] -Input [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116] -Keys [4]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50] -Functions [2]: [sum(sales#63), sum(number_sales#64)] -Aggregate Attributes [2]: [sum(sales#63)#118, sum(number_sales#64)#119] -Results [5]: [channel#62, i_brand_id#48, i_class_id#49, sum(sales#63)#118 AS sum_sales#120, sum(number_sales#64)#119 AS number_sales#121] +Input [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99] +Keys [4]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40] +Functions [2]: [sum(sales#50), sum(number_sales#51)] +Aggregate Attributes [2]: [sum(sales#50)#100, sum(number_sales#51)#101] +Results [5]: [channel#49, i_brand_id#38, i_class_id#39, sum(sales#50)#100 AS sum_sales#102, sum(number_sales#51)#101 AS number_sales#103] (121) HashAggregate [codegen id : 268] -Input [5]: [channel#62, i_brand_id#48, i_class_id#49, sum_sales#120, number_sales#121] -Keys [3]: [channel#62, i_brand_id#48, i_class_id#49] -Functions [2]: [partial_sum(sum_sales#120), partial_sum(number_sales#121)] -Aggregate Attributes [3]: [sum#122, isEmpty#123, sum#124] -Results [6]: [channel#62, i_brand_id#48, i_class_id#49, sum#125, isEmpty#126, sum#127] +Input [5]: [channel#49, i_brand_id#38, i_class_id#39, sum_sales#102, number_sales#103] +Keys [3]: [channel#49, i_brand_id#38, i_class_id#39] +Functions [2]: [partial_sum(sum_sales#102), partial_sum(number_sales#103)] +Aggregate Attributes [3]: [sum#104, isEmpty#105, sum#106] +Results [6]: [channel#49, i_brand_id#38, i_class_id#39, sum#107, isEmpty#108, sum#109] (122) Exchange -Input [6]: [channel#62, i_brand_id#48, i_class_id#49, sum#125, isEmpty#126, sum#127] -Arguments: hashpartitioning(channel#62, i_brand_id#48, i_class_id#49, 5), ENSURE_REQUIREMENTS, [id=#128] +Input [6]: [channel#49, i_brand_id#38, i_class_id#39, sum#107, isEmpty#108, sum#109] +Arguments: hashpartitioning(channel#49, i_brand_id#38, i_class_id#39, 5), ENSURE_REQUIREMENTS, [plan_id=19] (123) HashAggregate [codegen id : 269] -Input [6]: [channel#62, i_brand_id#48, i_class_id#49, sum#125, isEmpty#126, sum#127] -Keys [3]: [channel#62, i_brand_id#48, i_class_id#49] -Functions [2]: [sum(sum_sales#120), sum(number_sales#121)] -Aggregate Attributes [2]: [sum(sum_sales#120)#129, sum(number_sales#121)#130] -Results [6]: [channel#62, i_brand_id#48, i_class_id#49, null AS i_category_id#131, sum(sum_sales#120)#129 AS sum(sum_sales)#132, sum(number_sales#121)#130 AS sum(number_sales)#133] +Input [6]: [channel#49, i_brand_id#38, i_class_id#39, sum#107, isEmpty#108, sum#109] +Keys [3]: [channel#49, i_brand_id#38, i_class_id#39] +Functions [2]: [sum(sum_sales#102), sum(number_sales#103)] +Aggregate Attributes [2]: [sum(sum_sales#102)#110, sum(number_sales#103)#111] +Results [6]: [channel#49, i_brand_id#38, i_class_id#39, null AS i_category_id#112, sum(sum_sales#102)#110 AS sum(sum_sales)#113, sum(number_sales#103)#111 AS sum(number_sales)#114] (124) ReusedExchange [Reuses operator id: 117] -Output [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116] +Output [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99] (125) HashAggregate [codegen id : 403] -Input [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116] -Keys [4]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50] -Functions [2]: [sum(sales#63), sum(number_sales#64)] -Aggregate Attributes [2]: [sum(sales#63)#118, sum(number_sales#64)#119] -Results [4]: [channel#62, i_brand_id#48, sum(sales#63)#118 AS sum_sales#120, sum(number_sales#64)#119 AS number_sales#121] +Input [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99] +Keys [4]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40] +Functions [2]: [sum(sales#50), sum(number_sales#51)] +Aggregate Attributes [2]: [sum(sales#50)#100, sum(number_sales#51)#101] +Results [4]: [channel#49, i_brand_id#38, sum(sales#50)#100 AS sum_sales#102, sum(number_sales#51)#101 AS number_sales#103] (126) HashAggregate [codegen id : 403] -Input [4]: [channel#62, i_brand_id#48, sum_sales#120, number_sales#121] -Keys [2]: [channel#62, i_brand_id#48] -Functions [2]: [partial_sum(sum_sales#120), partial_sum(number_sales#121)] -Aggregate Attributes [3]: [sum#134, isEmpty#135, sum#136] -Results [5]: [channel#62, i_brand_id#48, sum#137, isEmpty#138, sum#139] +Input [4]: [channel#49, i_brand_id#38, sum_sales#102, number_sales#103] +Keys [2]: [channel#49, i_brand_id#38] +Functions [2]: [partial_sum(sum_sales#102), partial_sum(number_sales#103)] +Aggregate Attributes [3]: [sum#115, isEmpty#116, sum#117] +Results [5]: [channel#49, i_brand_id#38, sum#118, isEmpty#119, sum#120] (127) Exchange -Input [5]: [channel#62, i_brand_id#48, sum#137, isEmpty#138, sum#139] -Arguments: hashpartitioning(channel#62, i_brand_id#48, 5), ENSURE_REQUIREMENTS, [id=#140] +Input [5]: [channel#49, i_brand_id#38, sum#118, isEmpty#119, sum#120] +Arguments: hashpartitioning(channel#49, i_brand_id#38, 5), ENSURE_REQUIREMENTS, [plan_id=20] (128) HashAggregate [codegen id : 404] -Input [5]: [channel#62, i_brand_id#48, sum#137, isEmpty#138, sum#139] -Keys [2]: [channel#62, i_brand_id#48] -Functions [2]: [sum(sum_sales#120), sum(number_sales#121)] -Aggregate Attributes [2]: [sum(sum_sales#120)#141, sum(number_sales#121)#142] -Results [6]: [channel#62, i_brand_id#48, null AS i_class_id#143, null AS i_category_id#144, sum(sum_sales#120)#141 AS sum(sum_sales)#145, sum(number_sales#121)#142 AS sum(number_sales)#146] +Input [5]: [channel#49, i_brand_id#38, sum#118, isEmpty#119, sum#120] +Keys [2]: [channel#49, i_brand_id#38] +Functions [2]: [sum(sum_sales#102), sum(number_sales#103)] +Aggregate Attributes [2]: [sum(sum_sales#102)#121, sum(number_sales#103)#122] +Results [6]: [channel#49, i_brand_id#38, null AS i_class_id#123, null AS i_category_id#124, sum(sum_sales#102)#121 AS sum(sum_sales)#125, sum(number_sales#103)#122 AS sum(number_sales)#126] (129) ReusedExchange [Reuses operator id: 117] -Output [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116] +Output [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99] (130) HashAggregate [codegen id : 538] -Input [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116] -Keys [4]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50] -Functions [2]: [sum(sales#63), sum(number_sales#64)] -Aggregate Attributes [2]: [sum(sales#63)#118, sum(number_sales#64)#119] -Results [3]: [channel#62, sum(sales#63)#118 AS sum_sales#120, sum(number_sales#64)#119 AS number_sales#121] +Input [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99] +Keys [4]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40] +Functions [2]: [sum(sales#50), sum(number_sales#51)] +Aggregate Attributes [2]: [sum(sales#50)#100, sum(number_sales#51)#101] +Results [3]: [channel#49, sum(sales#50)#100 AS sum_sales#102, sum(number_sales#51)#101 AS number_sales#103] (131) HashAggregate [codegen id : 538] -Input [3]: [channel#62, sum_sales#120, number_sales#121] -Keys [1]: [channel#62] -Functions [2]: [partial_sum(sum_sales#120), partial_sum(number_sales#121)] -Aggregate Attributes [3]: [sum#147, isEmpty#148, sum#149] -Results [4]: [channel#62, sum#150, isEmpty#151, sum#152] +Input [3]: [channel#49, sum_sales#102, number_sales#103] +Keys [1]: [channel#49] +Functions [2]: [partial_sum(sum_sales#102), partial_sum(number_sales#103)] +Aggregate Attributes [3]: [sum#127, isEmpty#128, sum#129] +Results [4]: [channel#49, sum#130, isEmpty#131, sum#132] (132) Exchange -Input [4]: [channel#62, sum#150, isEmpty#151, sum#152] -Arguments: hashpartitioning(channel#62, 5), ENSURE_REQUIREMENTS, [id=#153] +Input [4]: [channel#49, sum#130, isEmpty#131, sum#132] +Arguments: hashpartitioning(channel#49, 5), ENSURE_REQUIREMENTS, [plan_id=21] (133) HashAggregate [codegen id : 539] -Input [4]: [channel#62, sum#150, isEmpty#151, sum#152] -Keys [1]: [channel#62] -Functions [2]: [sum(sum_sales#120), sum(number_sales#121)] -Aggregate Attributes [2]: [sum(sum_sales#120)#154, sum(number_sales#121)#155] -Results [6]: [channel#62, null AS i_brand_id#156, null AS i_class_id#157, null AS i_category_id#158, sum(sum_sales#120)#154 AS sum(sum_sales)#159, sum(number_sales#121)#155 AS sum(number_sales)#160] +Input [4]: [channel#49, sum#130, isEmpty#131, sum#132] +Keys [1]: [channel#49] +Functions [2]: [sum(sum_sales#102), sum(number_sales#103)] +Aggregate Attributes [2]: [sum(sum_sales#102)#133, sum(number_sales#103)#134] +Results [6]: [channel#49, null AS i_brand_id#135, null AS i_class_id#136, null AS i_category_id#137, sum(sum_sales#102)#133 AS sum(sum_sales)#138, sum(number_sales#103)#134 AS sum(number_sales)#139] (134) ReusedExchange [Reuses operator id: 117] -Output [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116] +Output [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99] (135) HashAggregate [codegen id : 673] -Input [7]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum#114, isEmpty#115, sum#116] -Keys [4]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50] -Functions [2]: [sum(sales#63), sum(number_sales#64)] -Aggregate Attributes [2]: [sum(sales#63)#118, sum(number_sales#64)#119] -Results [2]: [sum(sales#63)#118 AS sum_sales#120, sum(number_sales#64)#119 AS number_sales#121] +Input [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99] +Keys [4]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40] +Functions [2]: [sum(sales#50), sum(number_sales#51)] +Aggregate Attributes [2]: [sum(sales#50)#100, sum(number_sales#51)#101] +Results [2]: [sum(sales#50)#100 AS sum_sales#102, sum(number_sales#51)#101 AS number_sales#103] (136) HashAggregate [codegen id : 673] -Input [2]: [sum_sales#120, number_sales#121] +Input [2]: [sum_sales#102, number_sales#103] Keys: [] -Functions [2]: [partial_sum(sum_sales#120), partial_sum(number_sales#121)] -Aggregate Attributes [3]: [sum#161, isEmpty#162, sum#163] -Results [3]: [sum#164, isEmpty#165, sum#166] +Functions [2]: [partial_sum(sum_sales#102), partial_sum(number_sales#103)] +Aggregate Attributes [3]: [sum#140, isEmpty#141, sum#142] +Results [3]: [sum#143, isEmpty#144, sum#145] (137) Exchange -Input [3]: [sum#164, isEmpty#165, sum#166] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#167] +Input [3]: [sum#143, isEmpty#144, sum#145] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=22] (138) HashAggregate [codegen id : 674] -Input [3]: [sum#164, isEmpty#165, sum#166] +Input [3]: [sum#143, isEmpty#144, sum#145] Keys: [] -Functions [2]: [sum(sum_sales#120), sum(number_sales#121)] -Aggregate Attributes [2]: [sum(sum_sales#120)#168, sum(number_sales#121)#169] -Results [6]: [null AS channel#170, null AS i_brand_id#171, null AS i_class_id#172, null AS i_category_id#173, sum(sum_sales#120)#168 AS sum(sum_sales)#174, sum(number_sales#121)#169 AS sum(number_sales)#175] +Functions [2]: [sum(sum_sales#102), sum(number_sales#103)] +Aggregate Attributes [2]: [sum(sum_sales#102)#146, sum(number_sales#103)#147] +Results [6]: [null AS channel#148, null AS i_brand_id#149, null AS i_class_id#150, null AS i_category_id#151, sum(sum_sales#102)#146 AS sum(sum_sales)#152, sum(number_sales#103)#147 AS sum(number_sales)#153] (139) Union (140) HashAggregate [codegen id : 675] -Input [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121] -Keys [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121] +Input [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103] +Keys [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121] +Results [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103] (141) Exchange -Input [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121] -Arguments: hashpartitioning(channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121, 5), ENSURE_REQUIREMENTS, [id=#176] +Input [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103] +Arguments: hashpartitioning(channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103, 5), ENSURE_REQUIREMENTS, [plan_id=23] (142) HashAggregate [codegen id : 676] -Input [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121] -Keys [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121] +Input [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103] +Keys [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121] +Results [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103] (143) TakeOrderedAndProject -Input [6]: [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121] -Arguments: 100, [channel#62 ASC NULLS FIRST, i_brand_id#48 ASC NULLS FIRST, i_class_id#49 ASC NULLS FIRST, i_category_id#50 ASC NULLS FIRST], [channel#62, i_brand_id#48, i_class_id#49, i_category_id#50, sum_sales#120, number_sales#121] +Input [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103] +Arguments: 100, [channel#49 ASC NULLS FIRST, i_brand_id#38 ASC NULLS FIRST, i_class_id#39 ASC NULLS FIRST, i_category_id#40 ASC NULLS FIRST], [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103] ===== Subqueries ===== -Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquery#65, [id=#66] +Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquery#52, [id=#53] * HashAggregate (162) +- Exchange (161) +- * HashAggregate (160) @@ -838,94 +838,94 @@ Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquer (144) Scan parquet default.store_sales -Output [3]: [ss_quantity#177, ss_list_price#178, ss_sold_date_sk#179] +Output [3]: [ss_quantity#154, ss_list_price#155, ss_sold_date_sk#156] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#179), dynamicpruningexpression(ss_sold_date_sk#179 IN dynamicpruning#13)] +PartitionFilters: [isnotnull(ss_sold_date_sk#156), dynamicpruningexpression(ss_sold_date_sk#156 IN dynamicpruning#12)] ReadSchema: struct (145) ColumnarToRow [codegen id : 2] -Input [3]: [ss_quantity#177, ss_list_price#178, ss_sold_date_sk#179] +Input [3]: [ss_quantity#154, ss_list_price#155, ss_sold_date_sk#156] (146) ReusedExchange [Reuses operator id: 177] -Output [1]: [d_date_sk#180] +Output [1]: [d_date_sk#157] (147) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#179] -Right keys [1]: [d_date_sk#180] +Left keys [1]: [ss_sold_date_sk#156] +Right keys [1]: [d_date_sk#157] Join condition: None (148) Project [codegen id : 2] -Output [2]: [ss_quantity#177 AS quantity#181, ss_list_price#178 AS list_price#182] -Input [4]: [ss_quantity#177, ss_list_price#178, ss_sold_date_sk#179, d_date_sk#180] +Output [2]: [ss_quantity#154 AS quantity#158, ss_list_price#155 AS list_price#159] +Input [4]: [ss_quantity#154, ss_list_price#155, ss_sold_date_sk#156, d_date_sk#157] (149) Scan parquet default.catalog_sales -Output [3]: [cs_quantity#183, cs_list_price#184, cs_sold_date_sk#185] +Output [3]: [cs_quantity#160, cs_list_price#161, cs_sold_date_sk#162] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#185), dynamicpruningexpression(cs_sold_date_sk#185 IN dynamicpruning#186)] +PartitionFilters: [isnotnull(cs_sold_date_sk#162), dynamicpruningexpression(cs_sold_date_sk#162 IN dynamicpruning#163)] ReadSchema: struct (150) ColumnarToRow [codegen id : 4] -Input [3]: [cs_quantity#183, cs_list_price#184, cs_sold_date_sk#185] +Input [3]: [cs_quantity#160, cs_list_price#161, cs_sold_date_sk#162] (151) ReusedExchange [Reuses operator id: 167] -Output [1]: [d_date_sk#187] +Output [1]: [d_date_sk#164] (152) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_sold_date_sk#185] -Right keys [1]: [d_date_sk#187] +Left keys [1]: [cs_sold_date_sk#162] +Right keys [1]: [d_date_sk#164] Join condition: None (153) Project [codegen id : 4] -Output [2]: [cs_quantity#183 AS quantity#188, cs_list_price#184 AS list_price#189] -Input [4]: [cs_quantity#183, cs_list_price#184, cs_sold_date_sk#185, d_date_sk#187] +Output [2]: [cs_quantity#160 AS quantity#165, cs_list_price#161 AS list_price#166] +Input [4]: [cs_quantity#160, cs_list_price#161, cs_sold_date_sk#162, d_date_sk#164] (154) Scan parquet default.web_sales -Output [3]: [ws_quantity#190, ws_list_price#191, ws_sold_date_sk#192] +Output [3]: [ws_quantity#167, ws_list_price#168, ws_sold_date_sk#169] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#192), dynamicpruningexpression(ws_sold_date_sk#192 IN dynamicpruning#186)] +PartitionFilters: [isnotnull(ws_sold_date_sk#169), dynamicpruningexpression(ws_sold_date_sk#169 IN dynamicpruning#163)] ReadSchema: struct (155) ColumnarToRow [codegen id : 6] -Input [3]: [ws_quantity#190, ws_list_price#191, ws_sold_date_sk#192] +Input [3]: [ws_quantity#167, ws_list_price#168, ws_sold_date_sk#169] (156) ReusedExchange [Reuses operator id: 167] -Output [1]: [d_date_sk#193] +Output [1]: [d_date_sk#170] (157) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#192] -Right keys [1]: [d_date_sk#193] +Left keys [1]: [ws_sold_date_sk#169] +Right keys [1]: [d_date_sk#170] Join condition: None (158) Project [codegen id : 6] -Output [2]: [ws_quantity#190 AS quantity#194, ws_list_price#191 AS list_price#195] -Input [4]: [ws_quantity#190, ws_list_price#191, ws_sold_date_sk#192, d_date_sk#193] +Output [2]: [ws_quantity#167 AS quantity#171, ws_list_price#168 AS list_price#172] +Input [4]: [ws_quantity#167, ws_list_price#168, ws_sold_date_sk#169, d_date_sk#170] (159) Union (160) HashAggregate [codegen id : 7] -Input [2]: [quantity#181, list_price#182] +Input [2]: [quantity#158, list_price#159] Keys: [] -Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#181 as decimal(12,2))) * promote_precision(cast(list_price#182 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#196, count#197] -Results [2]: [sum#198, count#199] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#158 as decimal(12,2))) * promote_precision(cast(list_price#159 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#173, count#174] +Results [2]: [sum#175, count#176] (161) Exchange -Input [2]: [sum#198, count#199] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#200] +Input [2]: [sum#175, count#176] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=24] (162) HashAggregate [codegen id : 8] -Input [2]: [sum#198, count#199] +Input [2]: [sum#175, count#176] Keys: [] -Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#181 as decimal(12,2))) * promote_precision(cast(list_price#182 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#181 as decimal(12,2))) * promote_precision(cast(list_price#182 as decimal(12,2)))), DecimalType(18,2)))#201] -Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#181 as decimal(12,2))) * promote_precision(cast(list_price#182 as decimal(12,2)))), DecimalType(18,2)))#201 AS average_sales#202] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#158 as decimal(12,2))) * promote_precision(cast(list_price#159 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#158 as decimal(12,2))) * promote_precision(cast(list_price#159 as decimal(12,2)))), DecimalType(18,2)))#177] +Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#158 as decimal(12,2))) * promote_precision(cast(list_price#159 as decimal(12,2)))), DecimalType(18,2)))#177 AS average_sales#178] -Subquery:2 Hosting operator id = 144 Hosting Expression = ss_sold_date_sk#179 IN dynamicpruning#13 +Subquery:2 Hosting operator id = 144 Hosting Expression = ss_sold_date_sk#156 IN dynamicpruning#12 -Subquery:3 Hosting operator id = 149 Hosting Expression = cs_sold_date_sk#185 IN dynamicpruning#186 +Subquery:3 Hosting operator id = 149 Hosting Expression = cs_sold_date_sk#162 IN dynamicpruning#163 BroadcastExchange (167) +- * Project (166) +- * Filter (165) @@ -934,28 +934,28 @@ BroadcastExchange (167) (163) Scan parquet default.date_dim -Output [2]: [d_date_sk#187, d_year#203] +Output [2]: [d_date_sk#164, d_year#179] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (164) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#187, d_year#203] +Input [2]: [d_date_sk#164, d_year#179] (165) Filter [codegen id : 1] -Input [2]: [d_date_sk#187, d_year#203] -Condition : (((isnotnull(d_year#203) AND (d_year#203 >= 1998)) AND (d_year#203 <= 2000)) AND isnotnull(d_date_sk#187)) +Input [2]: [d_date_sk#164, d_year#179] +Condition : (((isnotnull(d_year#179) AND (d_year#179 >= 1998)) AND (d_year#179 <= 2000)) AND isnotnull(d_date_sk#164)) (166) Project [codegen id : 1] -Output [1]: [d_date_sk#187] -Input [2]: [d_date_sk#187, d_year#203] +Output [1]: [d_date_sk#164] +Input [2]: [d_date_sk#164, d_year#179] (167) BroadcastExchange -Input [1]: [d_date_sk#187] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#204] +Input [1]: [d_date_sk#164] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=25] -Subquery:4 Hosting operator id = 154 Hosting Expression = ws_sold_date_sk#192 IN dynamicpruning#186 +Subquery:4 Hosting operator id = 154 Hosting Expression = ws_sold_date_sk#169 IN dynamicpruning#163 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5 BroadcastExchange (172) @@ -966,28 +966,28 @@ BroadcastExchange (172) (168) Scan parquet default.date_dim -Output [3]: [d_date_sk#46, d_year#205, d_moy#206] +Output [3]: [d_date_sk#36, d_year#180, d_moy#181] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,11), IsNotNull(d_date_sk)] ReadSchema: struct (169) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#46, d_year#205, d_moy#206] +Input [3]: [d_date_sk#36, d_year#180, d_moy#181] (170) Filter [codegen id : 1] -Input [3]: [d_date_sk#46, d_year#205, d_moy#206] -Condition : ((((isnotnull(d_year#205) AND isnotnull(d_moy#206)) AND (d_year#205 = 2000)) AND (d_moy#206 = 11)) AND isnotnull(d_date_sk#46)) +Input [3]: [d_date_sk#36, d_year#180, d_moy#181] +Condition : ((((isnotnull(d_year#180) AND isnotnull(d_moy#181)) AND (d_year#180 = 2000)) AND (d_moy#181 = 11)) AND isnotnull(d_date_sk#36)) (171) Project [codegen id : 1] -Output [1]: [d_date_sk#46] -Input [3]: [d_date_sk#46, d_year#205, d_moy#206] +Output [1]: [d_date_sk#36] +Input [3]: [d_date_sk#36, d_year#180, d_moy#181] (172) BroadcastExchange -Input [1]: [d_date_sk#46] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#207] +Input [1]: [d_date_sk#36] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=26] -Subquery:6 Hosting operator id = 9 Hosting Expression = ss_sold_date_sk#12 IN dynamicpruning#13 +Subquery:6 Hosting operator id = 9 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12 BroadcastExchange (177) +- * Project (176) +- * Filter (175) @@ -996,37 +996,37 @@ BroadcastExchange (177) (173) Scan parquet default.date_dim -Output [2]: [d_date_sk#14, d_year#208] +Output [2]: [d_date_sk#13, d_year#182] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (174) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#14, d_year#208] +Input [2]: [d_date_sk#13, d_year#182] (175) Filter [codegen id : 1] -Input [2]: [d_date_sk#14, d_year#208] -Condition : (((isnotnull(d_year#208) AND (d_year#208 >= 1999)) AND (d_year#208 <= 2001)) AND isnotnull(d_date_sk#14)) +Input [2]: [d_date_sk#13, d_year#182] +Condition : (((isnotnull(d_year#182) AND (d_year#182 >= 1999)) AND (d_year#182 <= 2001)) AND isnotnull(d_date_sk#13)) (176) Project [codegen id : 1] -Output [1]: [d_date_sk#14] -Input [2]: [d_date_sk#14, d_year#208] +Output [1]: [d_date_sk#13] +Input [2]: [d_date_sk#13, d_year#182] (177) BroadcastExchange -Input [1]: [d_date_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#209] +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=27] -Subquery:7 Hosting operator id = 20 Hosting Expression = cs_sold_date_sk#21 IN dynamicpruning#13 +Subquery:7 Hosting operator id = 20 Hosting Expression = cs_sold_date_sk#19 IN dynamicpruning#12 -Subquery:8 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#36 IN dynamicpruning#13 +Subquery:8 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#29 IN dynamicpruning#12 -Subquery:9 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#65, [id=#66] +Subquery:9 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53] -Subquery:10 Hosting operator id = 79 Hosting Expression = cs_sold_date_sk#70 IN dynamicpruning#5 +Subquery:10 Hosting operator id = 79 Hosting Expression = cs_sold_date_sk#57 IN dynamicpruning#5 -Subquery:11 Hosting operator id = 114 Hosting Expression = ReusedSubquery Subquery scalar-subquery#65, [id=#66] +Subquery:11 Hosting operator id = 114 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53] -Subquery:12 Hosting operator id = 97 Hosting Expression = ws_sold_date_sk#92 IN dynamicpruning#5 +Subquery:12 Hosting operator id = 97 Hosting Expression = ws_sold_date_sk#77 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt index 2438fa9d7eb57..727c700735c0e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt @@ -215,7 +215,7 @@ Condition : isnotnull(i_item_sk#19) (19) BroadcastExchange Input [4]: [i_item_sk#19, i_brand_id#20, i_class_id#21, i_category_id#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (20) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_item_sk#17] @@ -227,20 +227,20 @@ Output [4]: [cs_sold_date_sk#18, i_brand_id#20, i_class_id#21, i_category_id#22] Input [6]: [cs_item_sk#17, cs_sold_date_sk#18, i_item_sk#19, i_brand_id#20, i_class_id#21, i_category_id#22] (22) ReusedExchange [Reuses operator id: 159] -Output [1]: [d_date_sk#24] +Output [1]: [d_date_sk#23] (23) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_sold_date_sk#18] -Right keys [1]: [d_date_sk#24] +Right keys [1]: [d_date_sk#23] Join condition: None (24) Project [codegen id : 3] Output [3]: [i_brand_id#20, i_class_id#21, i_category_id#22] -Input [5]: [cs_sold_date_sk#18, i_brand_id#20, i_class_id#21, i_category_id#22, d_date_sk#24] +Input [5]: [cs_sold_date_sk#18, i_brand_id#20, i_class_id#21, i_category_id#22, d_date_sk#23] (25) BroadcastExchange Input [3]: [i_brand_id#20, i_class_id#21, i_category_id#22] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#25] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=2] (26) BroadcastHashJoin [codegen id : 4] Left keys [6]: [coalesce(i_brand_id#14, 0), isnull(i_brand_id#14), coalesce(i_class_id#15, 0), isnull(i_class_id#15), coalesce(i_category_id#16, 0), isnull(i_category_id#16)] @@ -249,7 +249,7 @@ Join condition: None (27) BroadcastExchange Input [4]: [i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (28) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_item_sk#10] @@ -261,471 +261,471 @@ Output [4]: [ss_sold_date_sk#11, i_brand_id#14, i_class_id#15, i_category_id#16] Input [6]: [ss_item_sk#10, ss_sold_date_sk#11, i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16] (30) ReusedExchange [Reuses operator id: 159] -Output [1]: [d_date_sk#27] +Output [1]: [d_date_sk#24] (31) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_sold_date_sk#11] -Right keys [1]: [d_date_sk#27] +Right keys [1]: [d_date_sk#24] Join condition: None (32) Project [codegen id : 6] -Output [3]: [i_brand_id#14 AS brand_id#28, i_class_id#15 AS class_id#29, i_category_id#16 AS category_id#30] -Input [5]: [ss_sold_date_sk#11, i_brand_id#14, i_class_id#15, i_category_id#16, d_date_sk#27] +Output [3]: [i_brand_id#14 AS brand_id#25, i_class_id#15 AS class_id#26, i_category_id#16 AS category_id#27] +Input [5]: [ss_sold_date_sk#11, i_brand_id#14, i_class_id#15, i_category_id#16, d_date_sk#24] (33) HashAggregate [codegen id : 6] -Input [3]: [brand_id#28, class_id#29, category_id#30] -Keys [3]: [brand_id#28, class_id#29, category_id#30] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Keys [3]: [brand_id#25, class_id#26, category_id#27] Functions: [] Aggregate Attributes: [] -Results [3]: [brand_id#28, class_id#29, category_id#30] +Results [3]: [brand_id#25, class_id#26, category_id#27] (34) Exchange -Input [3]: [brand_id#28, class_id#29, category_id#30] -Arguments: hashpartitioning(brand_id#28, class_id#29, category_id#30, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: hashpartitioning(brand_id#25, class_id#26, category_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=4] (35) HashAggregate [codegen id : 10] -Input [3]: [brand_id#28, class_id#29, category_id#30] -Keys [3]: [brand_id#28, class_id#29, category_id#30] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Keys [3]: [brand_id#25, class_id#26, category_id#27] Functions: [] Aggregate Attributes: [] -Results [3]: [brand_id#28, class_id#29, category_id#30] +Results [3]: [brand_id#25, class_id#26, category_id#27] (36) Scan parquet default.web_sales -Output [2]: [ws_item_sk#32, ws_sold_date_sk#33] +Output [2]: [ws_item_sk#28, ws_sold_date_sk#29] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#33), dynamicpruningexpression(ws_sold_date_sk#33 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(ws_sold_date_sk#29), dynamicpruningexpression(ws_sold_date_sk#29 IN dynamicpruning#12)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (37) ColumnarToRow [codegen id : 9] -Input [2]: [ws_item_sk#32, ws_sold_date_sk#33] +Input [2]: [ws_item_sk#28, ws_sold_date_sk#29] (38) Filter [codegen id : 9] -Input [2]: [ws_item_sk#32, ws_sold_date_sk#33] -Condition : isnotnull(ws_item_sk#32) +Input [2]: [ws_item_sk#28, ws_sold_date_sk#29] +Condition : isnotnull(ws_item_sk#28) (39) ReusedExchange [Reuses operator id: 19] -Output [4]: [i_item_sk#34, i_brand_id#35, i_class_id#36, i_category_id#37] +Output [4]: [i_item_sk#30, i_brand_id#31, i_class_id#32, i_category_id#33] (40) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_item_sk#32] -Right keys [1]: [i_item_sk#34] +Left keys [1]: [ws_item_sk#28] +Right keys [1]: [i_item_sk#30] Join condition: None (41) Project [codegen id : 9] -Output [4]: [ws_sold_date_sk#33, i_brand_id#35, i_class_id#36, i_category_id#37] -Input [6]: [ws_item_sk#32, ws_sold_date_sk#33, i_item_sk#34, i_brand_id#35, i_class_id#36, i_category_id#37] +Output [4]: [ws_sold_date_sk#29, i_brand_id#31, i_class_id#32, i_category_id#33] +Input [6]: [ws_item_sk#28, ws_sold_date_sk#29, i_item_sk#30, i_brand_id#31, i_class_id#32, i_category_id#33] (42) ReusedExchange [Reuses operator id: 159] -Output [1]: [d_date_sk#38] +Output [1]: [d_date_sk#34] (43) BroadcastHashJoin [codegen id : 9] -Left keys [1]: [ws_sold_date_sk#33] -Right keys [1]: [d_date_sk#38] +Left keys [1]: [ws_sold_date_sk#29] +Right keys [1]: [d_date_sk#34] Join condition: None (44) Project [codegen id : 9] -Output [3]: [i_brand_id#35, i_class_id#36, i_category_id#37] -Input [5]: [ws_sold_date_sk#33, i_brand_id#35, i_class_id#36, i_category_id#37, d_date_sk#38] +Output [3]: [i_brand_id#31, i_class_id#32, i_category_id#33] +Input [5]: [ws_sold_date_sk#29, i_brand_id#31, i_class_id#32, i_category_id#33, d_date_sk#34] (45) BroadcastExchange -Input [3]: [i_brand_id#35, i_class_id#36, i_category_id#37] -Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [id=#39] +Input [3]: [i_brand_id#31, i_class_id#32, i_category_id#33] +Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), isnull(input[0, int, true]), coalesce(input[1, int, true], 0), isnull(input[1, int, true]), coalesce(input[2, int, true], 0), isnull(input[2, int, true])),false), [plan_id=5] (46) BroadcastHashJoin [codegen id : 10] -Left keys [6]: [coalesce(brand_id#28, 0), isnull(brand_id#28), coalesce(class_id#29, 0), isnull(class_id#29), coalesce(category_id#30, 0), isnull(category_id#30)] -Right keys [6]: [coalesce(i_brand_id#35, 0), isnull(i_brand_id#35), coalesce(i_class_id#36, 0), isnull(i_class_id#36), coalesce(i_category_id#37, 0), isnull(i_category_id#37)] +Left keys [6]: [coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27)] +Right keys [6]: [coalesce(i_brand_id#31, 0), isnull(i_brand_id#31), coalesce(i_class_id#32, 0), isnull(i_class_id#32), coalesce(i_category_id#33, 0), isnull(i_category_id#33)] Join condition: None (47) BroadcastExchange -Input [3]: [brand_id#28, class_id#29, category_id#30] -Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [id=#40] +Input [3]: [brand_id#25, class_id#26, category_id#27] +Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=6] (48) BroadcastHashJoin [codegen id : 11] Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9] -Right keys [3]: [brand_id#28, class_id#29, category_id#30] +Right keys [3]: [brand_id#25, class_id#26, category_id#27] Join condition: None (49) Project [codegen id : 11] -Output [1]: [i_item_sk#6 AS ss_item_sk#41] -Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#28, class_id#29, category_id#30] +Output [1]: [i_item_sk#6 AS ss_item_sk#35] +Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#25, class_id#26, category_id#27] (50) BroadcastExchange -Input [1]: [ss_item_sk#41] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42] +Input [1]: [ss_item_sk#35] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] (51) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [ss_item_sk#41] +Right keys [1]: [ss_item_sk#35] Join condition: None (52) Scan parquet default.item -Output [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] +Output [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (53) ColumnarToRow [codegen id : 23] -Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] +Input [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] (54) Filter [codegen id : 23] -Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] -Condition : isnotnull(i_item_sk#43) +Input [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] +Condition : isnotnull(i_item_sk#36) (55) ReusedExchange [Reuses operator id: 50] -Output [1]: [ss_item_sk#41] +Output [1]: [ss_item_sk#35] (56) BroadcastHashJoin [codegen id : 23] -Left keys [1]: [i_item_sk#43] -Right keys [1]: [ss_item_sk#41] +Left keys [1]: [i_item_sk#36] +Right keys [1]: [ss_item_sk#35] Join condition: None (57) BroadcastExchange -Input [4]: [i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#47] +Input [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] (58) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#43] +Right keys [1]: [i_item_sk#36] Join condition: None (59) Project [codegen id : 25] -Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_class_id#45, i_category_id#46] -Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#43, i_brand_id#44, i_class_id#45, i_category_id#46] +Output [6]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#37, i_class_id#38, i_category_id#39] +Input [8]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39] (60) ReusedExchange [Reuses operator id: 154] -Output [1]: [d_date_sk#48] +Output [1]: [d_date_sk#40] (61) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_sold_date_sk#4] -Right keys [1]: [d_date_sk#48] +Right keys [1]: [d_date_sk#40] Join condition: None (62) Project [codegen id : 25] -Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46] -Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#44, i_class_id#45, i_category_id#46, d_date_sk#48] +Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#37, i_class_id#38, i_category_id#39] +Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#37, i_class_id#38, i_category_id#39, d_date_sk#40] (63) HashAggregate [codegen id : 25] -Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#44, i_class_id#45, i_category_id#46] -Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46] +Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#37, i_class_id#38, i_category_id#39] +Keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39] Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#49, isEmpty#50, count#51] -Results [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54] +Aggregate Attributes [3]: [sum#41, isEmpty#42, count#43] +Results [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46] (64) Exchange -Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54] -Arguments: hashpartitioning(i_brand_id#44, i_class_id#45, i_category_id#46, 5), ENSURE_REQUIREMENTS, [id=#55] +Input [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46] +Arguments: hashpartitioning(i_brand_id#37, i_class_id#38, i_category_id#39, 5), ENSURE_REQUIREMENTS, [plan_id=9] (65) HashAggregate [codegen id : 26] -Input [6]: [i_brand_id#44, i_class_id#45, i_category_id#46, sum#52, isEmpty#53, count#54] -Keys [3]: [i_brand_id#44, i_class_id#45, i_category_id#46] +Input [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46] +Keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39] Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56, count(1)#57] -Results [6]: [store AS channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#56 AS sales#59, count(1)#57 AS number_sales#60] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47, count(1)#48] +Results [6]: [store AS channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47 AS sales#50, count(1)#48 AS number_sales#51] (66) Filter [codegen id : 26] -Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60] -Condition : (isnotnull(sales#59) AND (cast(sales#59 as decimal(32,6)) > cast(Subquery scalar-subquery#61, [id=#62] as decimal(32,6)))) +Input [6]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sales#50, number_sales#51] +Condition : (isnotnull(sales#50) AND (cast(sales#50 as decimal(32,6)) > cast(Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) (67) Scan parquet default.catalog_sales -Output [4]: [cs_item_sk#63, cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66] +Output [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#66), dynamicpruningexpression(cs_sold_date_sk#66 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#57), dynamicpruningexpression(cs_sold_date_sk#57 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (68) ColumnarToRow [codegen id : 51] -Input [4]: [cs_item_sk#63, cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66] +Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] (69) Filter [codegen id : 51] -Input [4]: [cs_item_sk#63, cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66] -Condition : isnotnull(cs_item_sk#63) +Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57] +Condition : isnotnull(cs_item_sk#54) (70) ReusedExchange [Reuses operator id: 50] -Output [1]: [ss_item_sk#41] +Output [1]: [ss_item_sk#35] (71) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [cs_item_sk#63] -Right keys [1]: [ss_item_sk#41] +Left keys [1]: [cs_item_sk#54] +Right keys [1]: [ss_item_sk#35] Join condition: None (72) ReusedExchange [Reuses operator id: 57] -Output [4]: [i_item_sk#67, i_brand_id#68, i_class_id#69, i_category_id#70] +Output [4]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61] (73) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [cs_item_sk#63] -Right keys [1]: [i_item_sk#67] +Left keys [1]: [cs_item_sk#54] +Right keys [1]: [i_item_sk#58] Join condition: None (74) Project [codegen id : 51] -Output [6]: [cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66, i_brand_id#68, i_class_id#69, i_category_id#70] -Input [8]: [cs_item_sk#63, cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66, i_item_sk#67, i_brand_id#68, i_class_id#69, i_category_id#70] +Output [6]: [cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57, i_brand_id#59, i_class_id#60, i_category_id#61] +Input [8]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57, i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61] (75) ReusedExchange [Reuses operator id: 154] -Output [1]: [d_date_sk#71] +Output [1]: [d_date_sk#62] (76) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [cs_sold_date_sk#66] -Right keys [1]: [d_date_sk#71] +Left keys [1]: [cs_sold_date_sk#57] +Right keys [1]: [d_date_sk#62] Join condition: None (77) Project [codegen id : 51] -Output [5]: [cs_quantity#64, cs_list_price#65, i_brand_id#68, i_class_id#69, i_category_id#70] -Input [7]: [cs_quantity#64, cs_list_price#65, cs_sold_date_sk#66, i_brand_id#68, i_class_id#69, i_category_id#70, d_date_sk#71] +Output [5]: [cs_quantity#55, cs_list_price#56, i_brand_id#59, i_class_id#60, i_category_id#61] +Input [7]: [cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57, i_brand_id#59, i_class_id#60, i_category_id#61, d_date_sk#62] (78) HashAggregate [codegen id : 51] -Input [5]: [cs_quantity#64, cs_list_price#65, i_brand_id#68, i_class_id#69, i_category_id#70] -Keys [3]: [i_brand_id#68, i_class_id#69, i_category_id#70] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#72, isEmpty#73, count#74] -Results [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77] +Input [5]: [cs_quantity#55, cs_list_price#56, i_brand_id#59, i_class_id#60, i_category_id#61] +Keys [3]: [i_brand_id#59, i_class_id#60, i_category_id#61] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] +Aggregate Attributes [3]: [sum#63, isEmpty#64, count#65] +Results [6]: [i_brand_id#59, i_class_id#60, i_category_id#61, sum#66, isEmpty#67, count#68] (79) Exchange -Input [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77] -Arguments: hashpartitioning(i_brand_id#68, i_class_id#69, i_category_id#70, 5), ENSURE_REQUIREMENTS, [id=#78] +Input [6]: [i_brand_id#59, i_class_id#60, i_category_id#61, sum#66, isEmpty#67, count#68] +Arguments: hashpartitioning(i_brand_id#59, i_class_id#60, i_category_id#61, 5), ENSURE_REQUIREMENTS, [plan_id=10] (80) HashAggregate [codegen id : 52] -Input [6]: [i_brand_id#68, i_class_id#69, i_category_id#70, sum#75, isEmpty#76, count#77] -Keys [3]: [i_brand_id#68, i_class_id#69, i_category_id#70] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#79, count(1)#80] -Results [6]: [catalog AS channel#81, i_brand_id#68, i_class_id#69, i_category_id#70, sum(CheckOverflow((promote_precision(cast(cs_quantity#64 as decimal(12,2))) * promote_precision(cast(cs_list_price#65 as decimal(12,2)))), DecimalType(18,2)))#79 AS sales#82, count(1)#80 AS number_sales#83] +Input [6]: [i_brand_id#59, i_class_id#60, i_category_id#61, sum#66, isEmpty#67, count#68] +Keys [3]: [i_brand_id#59, i_class_id#60, i_category_id#61] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2))), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#69, count(1)#70] +Results [6]: [catalog AS channel#71, i_brand_id#59, i_class_id#60, i_category_id#61, sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#69 AS sales#72, count(1)#70 AS number_sales#73] (81) Filter [codegen id : 52] -Input [6]: [channel#81, i_brand_id#68, i_class_id#69, i_category_id#70, sales#82, number_sales#83] -Condition : (isnotnull(sales#82) AND (cast(sales#82 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#61, [id=#62] as decimal(32,6)))) +Input [6]: [channel#71, i_brand_id#59, i_class_id#60, i_category_id#61, sales#72, number_sales#73] +Condition : (isnotnull(sales#72) AND (cast(sales#72 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) (82) Scan parquet default.web_sales -Output [4]: [ws_item_sk#84, ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87] +Output [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#87), dynamicpruningexpression(ws_sold_date_sk#87 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#77), dynamicpruningexpression(ws_sold_date_sk#77 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (83) ColumnarToRow [codegen id : 77] -Input [4]: [ws_item_sk#84, ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87] +Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] (84) Filter [codegen id : 77] -Input [4]: [ws_item_sk#84, ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87] -Condition : isnotnull(ws_item_sk#84) +Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77] +Condition : isnotnull(ws_item_sk#74) (85) ReusedExchange [Reuses operator id: 50] -Output [1]: [ss_item_sk#41] +Output [1]: [ss_item_sk#35] (86) BroadcastHashJoin [codegen id : 77] -Left keys [1]: [ws_item_sk#84] -Right keys [1]: [ss_item_sk#41] +Left keys [1]: [ws_item_sk#74] +Right keys [1]: [ss_item_sk#35] Join condition: None (87) ReusedExchange [Reuses operator id: 57] -Output [4]: [i_item_sk#88, i_brand_id#89, i_class_id#90, i_category_id#91] +Output [4]: [i_item_sk#78, i_brand_id#79, i_class_id#80, i_category_id#81] (88) BroadcastHashJoin [codegen id : 77] -Left keys [1]: [ws_item_sk#84] -Right keys [1]: [i_item_sk#88] +Left keys [1]: [ws_item_sk#74] +Right keys [1]: [i_item_sk#78] Join condition: None (89) Project [codegen id : 77] -Output [6]: [ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87, i_brand_id#89, i_class_id#90, i_category_id#91] -Input [8]: [ws_item_sk#84, ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87, i_item_sk#88, i_brand_id#89, i_class_id#90, i_category_id#91] +Output [6]: [ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77, i_brand_id#79, i_class_id#80, i_category_id#81] +Input [8]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77, i_item_sk#78, i_brand_id#79, i_class_id#80, i_category_id#81] (90) ReusedExchange [Reuses operator id: 154] -Output [1]: [d_date_sk#92] +Output [1]: [d_date_sk#82] (91) BroadcastHashJoin [codegen id : 77] -Left keys [1]: [ws_sold_date_sk#87] -Right keys [1]: [d_date_sk#92] +Left keys [1]: [ws_sold_date_sk#77] +Right keys [1]: [d_date_sk#82] Join condition: None (92) Project [codegen id : 77] -Output [5]: [ws_quantity#85, ws_list_price#86, i_brand_id#89, i_class_id#90, i_category_id#91] -Input [7]: [ws_quantity#85, ws_list_price#86, ws_sold_date_sk#87, i_brand_id#89, i_class_id#90, i_category_id#91, d_date_sk#92] +Output [5]: [ws_quantity#75, ws_list_price#76, i_brand_id#79, i_class_id#80, i_category_id#81] +Input [7]: [ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77, i_brand_id#79, i_class_id#80, i_category_id#81, d_date_sk#82] (93) HashAggregate [codegen id : 77] -Input [5]: [ws_quantity#85, ws_list_price#86, i_brand_id#89, i_class_id#90, i_category_id#91] -Keys [3]: [i_brand_id#89, i_class_id#90, i_category_id#91] -Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] -Aggregate Attributes [3]: [sum#93, isEmpty#94, count#95] -Results [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98] +Input [5]: [ws_quantity#75, ws_list_price#76, i_brand_id#79, i_class_id#80, i_category_id#81] +Keys [3]: [i_brand_id#79, i_class_id#80, i_category_id#81] +Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)] +Aggregate Attributes [3]: [sum#83, isEmpty#84, count#85] +Results [6]: [i_brand_id#79, i_class_id#80, i_category_id#81, sum#86, isEmpty#87, count#88] (94) Exchange -Input [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98] -Arguments: hashpartitioning(i_brand_id#89, i_class_id#90, i_category_id#91, 5), ENSURE_REQUIREMENTS, [id=#99] +Input [6]: [i_brand_id#79, i_class_id#80, i_category_id#81, sum#86, isEmpty#87, count#88] +Arguments: hashpartitioning(i_brand_id#79, i_class_id#80, i_category_id#81, 5), ENSURE_REQUIREMENTS, [plan_id=11] (95) HashAggregate [codegen id : 78] -Input [6]: [i_brand_id#89, i_class_id#90, i_category_id#91, sum#96, isEmpty#97, count#98] -Keys [3]: [i_brand_id#89, i_class_id#90, i_category_id#91] -Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2))), count(1)] -Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2)))#100, count(1)#101] -Results [6]: [web AS channel#102, i_brand_id#89, i_class_id#90, i_category_id#91, sum(CheckOverflow((promote_precision(cast(ws_quantity#85 as decimal(12,2))) * promote_precision(cast(ws_list_price#86 as decimal(12,2)))), DecimalType(18,2)))#100 AS sales#103, count(1)#101 AS number_sales#104] +Input [6]: [i_brand_id#79, i_class_id#80, i_category_id#81, sum#86, isEmpty#87, count#88] +Keys [3]: [i_brand_id#79, i_class_id#80, i_category_id#81] +Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2))), count(1)] +Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2)))#89, count(1)#90] +Results [6]: [web AS channel#91, i_brand_id#79, i_class_id#80, i_category_id#81, sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2)))#89 AS sales#92, count(1)#90 AS number_sales#93] (96) Filter [codegen id : 78] -Input [6]: [channel#102, i_brand_id#89, i_class_id#90, i_category_id#91, sales#103, number_sales#104] -Condition : (isnotnull(sales#103) AND (cast(sales#103 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#61, [id=#62] as decimal(32,6)))) +Input [6]: [channel#91, i_brand_id#79, i_class_id#80, i_category_id#81, sales#92, number_sales#93] +Condition : (isnotnull(sales#92) AND (cast(sales#92 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#52, [id=#53] as decimal(32,6)))) (97) Union (98) HashAggregate [codegen id : 79] -Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sales#59, number_sales#60] -Keys [4]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46] -Functions [2]: [partial_sum(sales#59), partial_sum(number_sales#60)] -Aggregate Attributes [3]: [sum#105, isEmpty#106, sum#107] -Results [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110] +Input [6]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sales#50, number_sales#51] +Keys [4]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39] +Functions [2]: [partial_sum(sales#50), partial_sum(number_sales#51)] +Aggregate Attributes [3]: [sum#94, isEmpty#95, sum#96] +Results [7]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum#97, isEmpty#98, sum#99] (99) Exchange -Input [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110] -Arguments: hashpartitioning(channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, 5), ENSURE_REQUIREMENTS, [id=#111] +Input [7]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum#97, isEmpty#98, sum#99] +Arguments: hashpartitioning(channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, 5), ENSURE_REQUIREMENTS, [plan_id=12] (100) HashAggregate [codegen id : 80] -Input [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110] -Keys [4]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46] -Functions [2]: [sum(sales#59), sum(number_sales#60)] -Aggregate Attributes [2]: [sum(sales#59)#112, sum(number_sales#60)#113] -Results [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum(sales#59)#112 AS sum_sales#114, sum(number_sales#60)#113 AS number_sales#115] +Input [7]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum#97, isEmpty#98, sum#99] +Keys [4]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39] +Functions [2]: [sum(sales#50), sum(number_sales#51)] +Aggregate Attributes [2]: [sum(sales#50)#100, sum(number_sales#51)#101] +Results [6]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum(sales#50)#100 AS sum_sales#102, sum(number_sales#51)#101 AS number_sales#103] (101) ReusedExchange [Reuses operator id: 99] -Output [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110] +Output [7]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum#97, isEmpty#98, sum#99] (102) HashAggregate [codegen id : 160] -Input [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110] -Keys [4]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46] -Functions [2]: [sum(sales#59), sum(number_sales#60)] -Aggregate Attributes [2]: [sum(sales#59)#112, sum(number_sales#60)#113] -Results [5]: [channel#58, i_brand_id#44, i_class_id#45, sum(sales#59)#112 AS sum_sales#114, sum(number_sales#60)#113 AS number_sales#115] +Input [7]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum#97, isEmpty#98, sum#99] +Keys [4]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39] +Functions [2]: [sum(sales#50), sum(number_sales#51)] +Aggregate Attributes [2]: [sum(sales#50)#100, sum(number_sales#51)#101] +Results [5]: [channel#49, i_brand_id#37, i_class_id#38, sum(sales#50)#100 AS sum_sales#102, sum(number_sales#51)#101 AS number_sales#103] (103) HashAggregate [codegen id : 160] -Input [5]: [channel#58, i_brand_id#44, i_class_id#45, sum_sales#114, number_sales#115] -Keys [3]: [channel#58, i_brand_id#44, i_class_id#45] -Functions [2]: [partial_sum(sum_sales#114), partial_sum(number_sales#115)] -Aggregate Attributes [3]: [sum#116, isEmpty#117, sum#118] -Results [6]: [channel#58, i_brand_id#44, i_class_id#45, sum#119, isEmpty#120, sum#121] +Input [5]: [channel#49, i_brand_id#37, i_class_id#38, sum_sales#102, number_sales#103] +Keys [3]: [channel#49, i_brand_id#37, i_class_id#38] +Functions [2]: [partial_sum(sum_sales#102), partial_sum(number_sales#103)] +Aggregate Attributes [3]: [sum#104, isEmpty#105, sum#106] +Results [6]: [channel#49, i_brand_id#37, i_class_id#38, sum#107, isEmpty#108, sum#109] (104) Exchange -Input [6]: [channel#58, i_brand_id#44, i_class_id#45, sum#119, isEmpty#120, sum#121] -Arguments: hashpartitioning(channel#58, i_brand_id#44, i_class_id#45, 5), ENSURE_REQUIREMENTS, [id=#122] +Input [6]: [channel#49, i_brand_id#37, i_class_id#38, sum#107, isEmpty#108, sum#109] +Arguments: hashpartitioning(channel#49, i_brand_id#37, i_class_id#38, 5), ENSURE_REQUIREMENTS, [plan_id=13] (105) HashAggregate [codegen id : 161] -Input [6]: [channel#58, i_brand_id#44, i_class_id#45, sum#119, isEmpty#120, sum#121] -Keys [3]: [channel#58, i_brand_id#44, i_class_id#45] -Functions [2]: [sum(sum_sales#114), sum(number_sales#115)] -Aggregate Attributes [2]: [sum(sum_sales#114)#123, sum(number_sales#115)#124] -Results [6]: [channel#58, i_brand_id#44, i_class_id#45, null AS i_category_id#125, sum(sum_sales#114)#123 AS sum(sum_sales)#126, sum(number_sales#115)#124 AS sum(number_sales)#127] +Input [6]: [channel#49, i_brand_id#37, i_class_id#38, sum#107, isEmpty#108, sum#109] +Keys [3]: [channel#49, i_brand_id#37, i_class_id#38] +Functions [2]: [sum(sum_sales#102), sum(number_sales#103)] +Aggregate Attributes [2]: [sum(sum_sales#102)#110, sum(number_sales#103)#111] +Results [6]: [channel#49, i_brand_id#37, i_class_id#38, null AS i_category_id#112, sum(sum_sales#102)#110 AS sum(sum_sales)#113, sum(number_sales#103)#111 AS sum(number_sales)#114] (106) ReusedExchange [Reuses operator id: 99] -Output [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110] +Output [7]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum#97, isEmpty#98, sum#99] (107) HashAggregate [codegen id : 241] -Input [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110] -Keys [4]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46] -Functions [2]: [sum(sales#59), sum(number_sales#60)] -Aggregate Attributes [2]: [sum(sales#59)#112, sum(number_sales#60)#113] -Results [4]: [channel#58, i_brand_id#44, sum(sales#59)#112 AS sum_sales#114, sum(number_sales#60)#113 AS number_sales#115] +Input [7]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum#97, isEmpty#98, sum#99] +Keys [4]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39] +Functions [2]: [sum(sales#50), sum(number_sales#51)] +Aggregate Attributes [2]: [sum(sales#50)#100, sum(number_sales#51)#101] +Results [4]: [channel#49, i_brand_id#37, sum(sales#50)#100 AS sum_sales#102, sum(number_sales#51)#101 AS number_sales#103] (108) HashAggregate [codegen id : 241] -Input [4]: [channel#58, i_brand_id#44, sum_sales#114, number_sales#115] -Keys [2]: [channel#58, i_brand_id#44] -Functions [2]: [partial_sum(sum_sales#114), partial_sum(number_sales#115)] -Aggregate Attributes [3]: [sum#128, isEmpty#129, sum#130] -Results [5]: [channel#58, i_brand_id#44, sum#131, isEmpty#132, sum#133] +Input [4]: [channel#49, i_brand_id#37, sum_sales#102, number_sales#103] +Keys [2]: [channel#49, i_brand_id#37] +Functions [2]: [partial_sum(sum_sales#102), partial_sum(number_sales#103)] +Aggregate Attributes [3]: [sum#115, isEmpty#116, sum#117] +Results [5]: [channel#49, i_brand_id#37, sum#118, isEmpty#119, sum#120] (109) Exchange -Input [5]: [channel#58, i_brand_id#44, sum#131, isEmpty#132, sum#133] -Arguments: hashpartitioning(channel#58, i_brand_id#44, 5), ENSURE_REQUIREMENTS, [id=#134] +Input [5]: [channel#49, i_brand_id#37, sum#118, isEmpty#119, sum#120] +Arguments: hashpartitioning(channel#49, i_brand_id#37, 5), ENSURE_REQUIREMENTS, [plan_id=14] (110) HashAggregate [codegen id : 242] -Input [5]: [channel#58, i_brand_id#44, sum#131, isEmpty#132, sum#133] -Keys [2]: [channel#58, i_brand_id#44] -Functions [2]: [sum(sum_sales#114), sum(number_sales#115)] -Aggregate Attributes [2]: [sum(sum_sales#114)#135, sum(number_sales#115)#136] -Results [6]: [channel#58, i_brand_id#44, null AS i_class_id#137, null AS i_category_id#138, sum(sum_sales#114)#135 AS sum(sum_sales)#139, sum(number_sales#115)#136 AS sum(number_sales)#140] +Input [5]: [channel#49, i_brand_id#37, sum#118, isEmpty#119, sum#120] +Keys [2]: [channel#49, i_brand_id#37] +Functions [2]: [sum(sum_sales#102), sum(number_sales#103)] +Aggregate Attributes [2]: [sum(sum_sales#102)#121, sum(number_sales#103)#122] +Results [6]: [channel#49, i_brand_id#37, null AS i_class_id#123, null AS i_category_id#124, sum(sum_sales#102)#121 AS sum(sum_sales)#125, sum(number_sales#103)#122 AS sum(number_sales)#126] (111) ReusedExchange [Reuses operator id: 99] -Output [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110] +Output [7]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum#97, isEmpty#98, sum#99] (112) HashAggregate [codegen id : 322] -Input [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110] -Keys [4]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46] -Functions [2]: [sum(sales#59), sum(number_sales#60)] -Aggregate Attributes [2]: [sum(sales#59)#112, sum(number_sales#60)#113] -Results [3]: [channel#58, sum(sales#59)#112 AS sum_sales#114, sum(number_sales#60)#113 AS number_sales#115] +Input [7]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum#97, isEmpty#98, sum#99] +Keys [4]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39] +Functions [2]: [sum(sales#50), sum(number_sales#51)] +Aggregate Attributes [2]: [sum(sales#50)#100, sum(number_sales#51)#101] +Results [3]: [channel#49, sum(sales#50)#100 AS sum_sales#102, sum(number_sales#51)#101 AS number_sales#103] (113) HashAggregate [codegen id : 322] -Input [3]: [channel#58, sum_sales#114, number_sales#115] -Keys [1]: [channel#58] -Functions [2]: [partial_sum(sum_sales#114), partial_sum(number_sales#115)] -Aggregate Attributes [3]: [sum#141, isEmpty#142, sum#143] -Results [4]: [channel#58, sum#144, isEmpty#145, sum#146] +Input [3]: [channel#49, sum_sales#102, number_sales#103] +Keys [1]: [channel#49] +Functions [2]: [partial_sum(sum_sales#102), partial_sum(number_sales#103)] +Aggregate Attributes [3]: [sum#127, isEmpty#128, sum#129] +Results [4]: [channel#49, sum#130, isEmpty#131, sum#132] (114) Exchange -Input [4]: [channel#58, sum#144, isEmpty#145, sum#146] -Arguments: hashpartitioning(channel#58, 5), ENSURE_REQUIREMENTS, [id=#147] +Input [4]: [channel#49, sum#130, isEmpty#131, sum#132] +Arguments: hashpartitioning(channel#49, 5), ENSURE_REQUIREMENTS, [plan_id=15] (115) HashAggregate [codegen id : 323] -Input [4]: [channel#58, sum#144, isEmpty#145, sum#146] -Keys [1]: [channel#58] -Functions [2]: [sum(sum_sales#114), sum(number_sales#115)] -Aggregate Attributes [2]: [sum(sum_sales#114)#148, sum(number_sales#115)#149] -Results [6]: [channel#58, null AS i_brand_id#150, null AS i_class_id#151, null AS i_category_id#152, sum(sum_sales#114)#148 AS sum(sum_sales)#153, sum(number_sales#115)#149 AS sum(number_sales)#154] +Input [4]: [channel#49, sum#130, isEmpty#131, sum#132] +Keys [1]: [channel#49] +Functions [2]: [sum(sum_sales#102), sum(number_sales#103)] +Aggregate Attributes [2]: [sum(sum_sales#102)#133, sum(number_sales#103)#134] +Results [6]: [channel#49, null AS i_brand_id#135, null AS i_class_id#136, null AS i_category_id#137, sum(sum_sales#102)#133 AS sum(sum_sales)#138, sum(number_sales#103)#134 AS sum(number_sales)#139] (116) ReusedExchange [Reuses operator id: 99] -Output [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110] +Output [7]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum#97, isEmpty#98, sum#99] (117) HashAggregate [codegen id : 403] -Input [7]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum#108, isEmpty#109, sum#110] -Keys [4]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46] -Functions [2]: [sum(sales#59), sum(number_sales#60)] -Aggregate Attributes [2]: [sum(sales#59)#112, sum(number_sales#60)#113] -Results [2]: [sum(sales#59)#112 AS sum_sales#114, sum(number_sales#60)#113 AS number_sales#115] +Input [7]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum#97, isEmpty#98, sum#99] +Keys [4]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39] +Functions [2]: [sum(sales#50), sum(number_sales#51)] +Aggregate Attributes [2]: [sum(sales#50)#100, sum(number_sales#51)#101] +Results [2]: [sum(sales#50)#100 AS sum_sales#102, sum(number_sales#51)#101 AS number_sales#103] (118) HashAggregate [codegen id : 403] -Input [2]: [sum_sales#114, number_sales#115] +Input [2]: [sum_sales#102, number_sales#103] Keys: [] -Functions [2]: [partial_sum(sum_sales#114), partial_sum(number_sales#115)] -Aggregate Attributes [3]: [sum#155, isEmpty#156, sum#157] -Results [3]: [sum#158, isEmpty#159, sum#160] +Functions [2]: [partial_sum(sum_sales#102), partial_sum(number_sales#103)] +Aggregate Attributes [3]: [sum#140, isEmpty#141, sum#142] +Results [3]: [sum#143, isEmpty#144, sum#145] (119) Exchange -Input [3]: [sum#158, isEmpty#159, sum#160] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#161] +Input [3]: [sum#143, isEmpty#144, sum#145] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=16] (120) HashAggregate [codegen id : 404] -Input [3]: [sum#158, isEmpty#159, sum#160] +Input [3]: [sum#143, isEmpty#144, sum#145] Keys: [] -Functions [2]: [sum(sum_sales#114), sum(number_sales#115)] -Aggregate Attributes [2]: [sum(sum_sales#114)#162, sum(number_sales#115)#163] -Results [6]: [null AS channel#164, null AS i_brand_id#165, null AS i_class_id#166, null AS i_category_id#167, sum(sum_sales#114)#162 AS sum(sum_sales)#168, sum(number_sales#115)#163 AS sum(number_sales)#169] +Functions [2]: [sum(sum_sales#102), sum(number_sales#103)] +Aggregate Attributes [2]: [sum(sum_sales#102)#146, sum(number_sales#103)#147] +Results [6]: [null AS channel#148, null AS i_brand_id#149, null AS i_class_id#150, null AS i_category_id#151, sum(sum_sales#102)#146 AS sum(sum_sales)#152, sum(number_sales#103)#147 AS sum(number_sales)#153] (121) Union (122) HashAggregate [codegen id : 405] -Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115] -Keys [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115] +Input [6]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum_sales#102, number_sales#103] +Keys [6]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum_sales#102, number_sales#103] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115] +Results [6]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum_sales#102, number_sales#103] (123) Exchange -Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115] -Arguments: hashpartitioning(channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115, 5), ENSURE_REQUIREMENTS, [id=#170] +Input [6]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum_sales#102, number_sales#103] +Arguments: hashpartitioning(channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum_sales#102, number_sales#103, 5), ENSURE_REQUIREMENTS, [plan_id=17] (124) HashAggregate [codegen id : 406] -Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115] -Keys [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115] +Input [6]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum_sales#102, number_sales#103] +Keys [6]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum_sales#102, number_sales#103] Functions: [] Aggregate Attributes: [] -Results [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115] +Results [6]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum_sales#102, number_sales#103] (125) TakeOrderedAndProject -Input [6]: [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115] -Arguments: 100, [channel#58 ASC NULLS FIRST, i_brand_id#44 ASC NULLS FIRST, i_class_id#45 ASC NULLS FIRST, i_category_id#46 ASC NULLS FIRST], [channel#58, i_brand_id#44, i_class_id#45, i_category_id#46, sum_sales#114, number_sales#115] +Input [6]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum_sales#102, number_sales#103] +Arguments: 100, [channel#49 ASC NULLS FIRST, i_brand_id#37 ASC NULLS FIRST, i_class_id#38 ASC NULLS FIRST, i_category_id#39 ASC NULLS FIRST], [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum_sales#102, number_sales#103] ===== Subqueries ===== -Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquery#61, [id=#62] +Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquery#52, [id=#53] * HashAggregate (144) +- Exchange (143) +- * HashAggregate (142) @@ -748,94 +748,94 @@ Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquer (126) Scan parquet default.store_sales -Output [3]: [ss_quantity#171, ss_list_price#172, ss_sold_date_sk#173] +Output [3]: [ss_quantity#154, ss_list_price#155, ss_sold_date_sk#156] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#173), dynamicpruningexpression(ss_sold_date_sk#173 IN dynamicpruning#12)] +PartitionFilters: [isnotnull(ss_sold_date_sk#156), dynamicpruningexpression(ss_sold_date_sk#156 IN dynamicpruning#12)] ReadSchema: struct (127) ColumnarToRow [codegen id : 2] -Input [3]: [ss_quantity#171, ss_list_price#172, ss_sold_date_sk#173] +Input [3]: [ss_quantity#154, ss_list_price#155, ss_sold_date_sk#156] (128) ReusedExchange [Reuses operator id: 159] -Output [1]: [d_date_sk#174] +Output [1]: [d_date_sk#157] (129) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [ss_sold_date_sk#173] -Right keys [1]: [d_date_sk#174] +Left keys [1]: [ss_sold_date_sk#156] +Right keys [1]: [d_date_sk#157] Join condition: None (130) Project [codegen id : 2] -Output [2]: [ss_quantity#171 AS quantity#175, ss_list_price#172 AS list_price#176] -Input [4]: [ss_quantity#171, ss_list_price#172, ss_sold_date_sk#173, d_date_sk#174] +Output [2]: [ss_quantity#154 AS quantity#158, ss_list_price#155 AS list_price#159] +Input [4]: [ss_quantity#154, ss_list_price#155, ss_sold_date_sk#156, d_date_sk#157] (131) Scan parquet default.catalog_sales -Output [3]: [cs_quantity#177, cs_list_price#178, cs_sold_date_sk#179] +Output [3]: [cs_quantity#160, cs_list_price#161, cs_sold_date_sk#162] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#179), dynamicpruningexpression(cs_sold_date_sk#179 IN dynamicpruning#180)] +PartitionFilters: [isnotnull(cs_sold_date_sk#162), dynamicpruningexpression(cs_sold_date_sk#162 IN dynamicpruning#163)] ReadSchema: struct (132) ColumnarToRow [codegen id : 4] -Input [3]: [cs_quantity#177, cs_list_price#178, cs_sold_date_sk#179] +Input [3]: [cs_quantity#160, cs_list_price#161, cs_sold_date_sk#162] (133) ReusedExchange [Reuses operator id: 149] -Output [1]: [d_date_sk#181] +Output [1]: [d_date_sk#164] (134) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [cs_sold_date_sk#179] -Right keys [1]: [d_date_sk#181] +Left keys [1]: [cs_sold_date_sk#162] +Right keys [1]: [d_date_sk#164] Join condition: None (135) Project [codegen id : 4] -Output [2]: [cs_quantity#177 AS quantity#182, cs_list_price#178 AS list_price#183] -Input [4]: [cs_quantity#177, cs_list_price#178, cs_sold_date_sk#179, d_date_sk#181] +Output [2]: [cs_quantity#160 AS quantity#165, cs_list_price#161 AS list_price#166] +Input [4]: [cs_quantity#160, cs_list_price#161, cs_sold_date_sk#162, d_date_sk#164] (136) Scan parquet default.web_sales -Output [3]: [ws_quantity#184, ws_list_price#185, ws_sold_date_sk#186] +Output [3]: [ws_quantity#167, ws_list_price#168, ws_sold_date_sk#169] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#186), dynamicpruningexpression(ws_sold_date_sk#186 IN dynamicpruning#180)] +PartitionFilters: [isnotnull(ws_sold_date_sk#169), dynamicpruningexpression(ws_sold_date_sk#169 IN dynamicpruning#163)] ReadSchema: struct (137) ColumnarToRow [codegen id : 6] -Input [3]: [ws_quantity#184, ws_list_price#185, ws_sold_date_sk#186] +Input [3]: [ws_quantity#167, ws_list_price#168, ws_sold_date_sk#169] (138) ReusedExchange [Reuses operator id: 149] -Output [1]: [d_date_sk#187] +Output [1]: [d_date_sk#170] (139) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ws_sold_date_sk#186] -Right keys [1]: [d_date_sk#187] +Left keys [1]: [ws_sold_date_sk#169] +Right keys [1]: [d_date_sk#170] Join condition: None (140) Project [codegen id : 6] -Output [2]: [ws_quantity#184 AS quantity#188, ws_list_price#185 AS list_price#189] -Input [4]: [ws_quantity#184, ws_list_price#185, ws_sold_date_sk#186, d_date_sk#187] +Output [2]: [ws_quantity#167 AS quantity#171, ws_list_price#168 AS list_price#172] +Input [4]: [ws_quantity#167, ws_list_price#168, ws_sold_date_sk#169, d_date_sk#170] (141) Union (142) HashAggregate [codegen id : 7] -Input [2]: [quantity#175, list_price#176] +Input [2]: [quantity#158, list_price#159] Keys: [] -Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [2]: [sum#190, count#191] -Results [2]: [sum#192, count#193] +Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#158 as decimal(12,2))) * promote_precision(cast(list_price#159 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [2]: [sum#173, count#174] +Results [2]: [sum#175, count#176] (143) Exchange -Input [2]: [sum#192, count#193] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#194] +Input [2]: [sum#175, count#176] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=18] (144) HashAggregate [codegen id : 8] -Input [2]: [sum#192, count#193] +Input [2]: [sum#175, count#176] Keys: [] -Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2)))] -Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2)))#195] -Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#175 as decimal(12,2))) * promote_precision(cast(list_price#176 as decimal(12,2)))), DecimalType(18,2)))#195 AS average_sales#196] +Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#158 as decimal(12,2))) * promote_precision(cast(list_price#159 as decimal(12,2)))), DecimalType(18,2)))] +Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#158 as decimal(12,2))) * promote_precision(cast(list_price#159 as decimal(12,2)))), DecimalType(18,2)))#177] +Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#158 as decimal(12,2))) * promote_precision(cast(list_price#159 as decimal(12,2)))), DecimalType(18,2)))#177 AS average_sales#178] -Subquery:2 Hosting operator id = 126 Hosting Expression = ss_sold_date_sk#173 IN dynamicpruning#12 +Subquery:2 Hosting operator id = 126 Hosting Expression = ss_sold_date_sk#156 IN dynamicpruning#12 -Subquery:3 Hosting operator id = 131 Hosting Expression = cs_sold_date_sk#179 IN dynamicpruning#180 +Subquery:3 Hosting operator id = 131 Hosting Expression = cs_sold_date_sk#162 IN dynamicpruning#163 BroadcastExchange (149) +- * Project (148) +- * Filter (147) @@ -844,28 +844,28 @@ BroadcastExchange (149) (145) Scan parquet default.date_dim -Output [2]: [d_date_sk#181, d_year#197] +Output [2]: [d_date_sk#164, d_year#179] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (146) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#181, d_year#197] +Input [2]: [d_date_sk#164, d_year#179] (147) Filter [codegen id : 1] -Input [2]: [d_date_sk#181, d_year#197] -Condition : (((isnotnull(d_year#197) AND (d_year#197 >= 1998)) AND (d_year#197 <= 2000)) AND isnotnull(d_date_sk#181)) +Input [2]: [d_date_sk#164, d_year#179] +Condition : (((isnotnull(d_year#179) AND (d_year#179 >= 1998)) AND (d_year#179 <= 2000)) AND isnotnull(d_date_sk#164)) (148) Project [codegen id : 1] -Output [1]: [d_date_sk#181] -Input [2]: [d_date_sk#181, d_year#197] +Output [1]: [d_date_sk#164] +Input [2]: [d_date_sk#164, d_year#179] (149) BroadcastExchange -Input [1]: [d_date_sk#181] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#198] +Input [1]: [d_date_sk#164] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=19] -Subquery:4 Hosting operator id = 136 Hosting Expression = ws_sold_date_sk#186 IN dynamicpruning#180 +Subquery:4 Hosting operator id = 136 Hosting Expression = ws_sold_date_sk#169 IN dynamicpruning#163 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5 BroadcastExchange (154) @@ -876,26 +876,26 @@ BroadcastExchange (154) (150) Scan parquet default.date_dim -Output [3]: [d_date_sk#48, d_year#199, d_moy#200] +Output [3]: [d_date_sk#40, d_year#180, d_moy#181] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,11), IsNotNull(d_date_sk)] ReadSchema: struct (151) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#48, d_year#199, d_moy#200] +Input [3]: [d_date_sk#40, d_year#180, d_moy#181] (152) Filter [codegen id : 1] -Input [3]: [d_date_sk#48, d_year#199, d_moy#200] -Condition : ((((isnotnull(d_year#199) AND isnotnull(d_moy#200)) AND (d_year#199 = 2000)) AND (d_moy#200 = 11)) AND isnotnull(d_date_sk#48)) +Input [3]: [d_date_sk#40, d_year#180, d_moy#181] +Condition : ((((isnotnull(d_year#180) AND isnotnull(d_moy#181)) AND (d_year#180 = 2000)) AND (d_moy#181 = 11)) AND isnotnull(d_date_sk#40)) (153) Project [codegen id : 1] -Output [1]: [d_date_sk#48] -Input [3]: [d_date_sk#48, d_year#199, d_moy#200] +Output [1]: [d_date_sk#40] +Input [3]: [d_date_sk#40, d_year#180, d_moy#181] (154) BroadcastExchange -Input [1]: [d_date_sk#48] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#201] +Input [1]: [d_date_sk#40] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=20] Subquery:6 Hosting operator id = 7 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12 BroadcastExchange (159) @@ -906,37 +906,37 @@ BroadcastExchange (159) (155) Scan parquet default.date_dim -Output [2]: [d_date_sk#27, d_year#202] +Output [2]: [d_date_sk#24, d_year#182] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (156) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#27, d_year#202] +Input [2]: [d_date_sk#24, d_year#182] (157) Filter [codegen id : 1] -Input [2]: [d_date_sk#27, d_year#202] -Condition : (((isnotnull(d_year#202) AND (d_year#202 >= 1999)) AND (d_year#202 <= 2001)) AND isnotnull(d_date_sk#27)) +Input [2]: [d_date_sk#24, d_year#182] +Condition : (((isnotnull(d_year#182) AND (d_year#182 >= 1999)) AND (d_year#182 <= 2001)) AND isnotnull(d_date_sk#24)) (158) Project [codegen id : 1] -Output [1]: [d_date_sk#27] -Input [2]: [d_date_sk#27, d_year#202] +Output [1]: [d_date_sk#24] +Input [2]: [d_date_sk#24, d_year#182] (159) BroadcastExchange -Input [1]: [d_date_sk#27] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#203] +Input [1]: [d_date_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=21] Subquery:7 Hosting operator id = 13 Hosting Expression = cs_sold_date_sk#18 IN dynamicpruning#12 -Subquery:8 Hosting operator id = 36 Hosting Expression = ws_sold_date_sk#33 IN dynamicpruning#12 +Subquery:8 Hosting operator id = 36 Hosting Expression = ws_sold_date_sk#29 IN dynamicpruning#12 -Subquery:9 Hosting operator id = 81 Hosting Expression = ReusedSubquery Subquery scalar-subquery#61, [id=#62] +Subquery:9 Hosting operator id = 81 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53] -Subquery:10 Hosting operator id = 67 Hosting Expression = cs_sold_date_sk#66 IN dynamicpruning#5 +Subquery:10 Hosting operator id = 67 Hosting Expression = cs_sold_date_sk#57 IN dynamicpruning#5 -Subquery:11 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#61, [id=#62] +Subquery:11 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53] -Subquery:12 Hosting operator id = 82 Hosting Expression = ws_sold_date_sk#87 IN dynamicpruning#5 +Subquery:12 Hosting operator id = 82 Hosting Expression = ws_sold_date_sk#77 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt index 506e18eabcc20..c77c01832e146 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt @@ -192,7 +192,7 @@ Input [4]: [cd_demo_sk#11, cd_gender#12, cd_education_status#13, cd_dep_count#14 (8) BroadcastExchange Input [2]: [cd_demo_sk#11, cd_dep_count#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_bill_cdemo_sk#2] @@ -204,396 +204,396 @@ Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5 Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#11, cd_dep_count#14] (11) ReusedExchange [Reuses operator id: 161] -Output [1]: [d_date_sk#16] +Output [1]: [d_date_sk#15] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_sold_date_sk#9] -Right keys [1]: [d_date_sk#16] +Right keys [1]: [d_date_sk#15] Join condition: None (13) Project [codegen id : 4] Output [8]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14] -Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, d_date_sk#16] +Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, d_date_sk#15] (14) Scan parquet default.item -Output [2]: [i_item_sk#17, i_item_id#18] +Output [2]: [i_item_sk#16, i_item_id#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [2]: [i_item_sk#17, i_item_id#18] +Input [2]: [i_item_sk#16, i_item_id#17] (16) Filter [codegen id : 3] -Input [2]: [i_item_sk#17, i_item_id#18] -Condition : isnotnull(i_item_sk#17) +Input [2]: [i_item_sk#16, i_item_id#17] +Condition : isnotnull(i_item_sk#16) (17) BroadcastExchange -Input [2]: [i_item_sk#17, i_item_id#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] +Input [2]: [i_item_sk#16, i_item_id#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_item_sk#3] -Right keys [1]: [i_item_sk#17] +Right keys [1]: [i_item_sk#16] Join condition: None (19) Project [codegen id : 4] -Output [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#18] -Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_sk#17, i_item_id#18] +Output [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17] +Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_sk#16, i_item_id#17] (20) Exchange -Input [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#18] -Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17] +Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) Sort [codegen id : 5] -Input [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#18] +Input [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17] Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0 (22) Scan parquet default.customer -Output [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] +Output [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [In(c_birth_month, [1,10,12,4,5,9]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 7] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] (24) Filter [codegen id : 7] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] -Condition : (((c_birth_month#24 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#21)) AND isnotnull(c_current_cdemo_sk#22)) AND isnotnull(c_current_addr_sk#23)) +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] +Condition : (((c_birth_month#21 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#18)) AND isnotnull(c_current_cdemo_sk#19)) AND isnotnull(c_current_addr_sk#20)) (25) Project [codegen id : 7] -Output [4]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_year#25] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] +Output [4]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_year#22] +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] (26) Scan parquet default.customer_address -Output [4]: [ca_address_sk#26, ca_county#27, ca_state#28, ca_country#29] +Output [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 6] -Input [4]: [ca_address_sk#26, ca_county#27, ca_state#28, ca_country#29] +Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] (28) Filter [codegen id : 6] -Input [4]: [ca_address_sk#26, ca_county#27, ca_state#28, ca_country#29] -Condition : (ca_state#28 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#26)) +Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] +Condition : (ca_state#25 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#23)) (29) BroadcastExchange -Input [4]: [ca_address_sk#26, ca_county#27, ca_state#28, ca_country#29] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#30] +Input [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (30) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_addr_sk#23] -Right keys [1]: [ca_address_sk#26] +Left keys [1]: [c_current_addr_sk#20] +Right keys [1]: [ca_address_sk#23] Join condition: None (31) Project [codegen id : 7] -Output [6]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25, ca_county#27, ca_state#28, ca_country#29] -Input [8]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_year#25, ca_address_sk#26, ca_county#27, ca_state#28, ca_country#29] +Output [6]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26] +Input [8]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_year#22, ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26] (32) Exchange -Input [6]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25, ca_county#27, ca_state#28, ca_country#29] -Arguments: hashpartitioning(c_current_cdemo_sk#22, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [6]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26] +Arguments: hashpartitioning(c_current_cdemo_sk#19, 5), ENSURE_REQUIREMENTS, [plan_id=5] (33) Sort [codegen id : 8] -Input [6]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25, ca_county#27, ca_state#28, ca_country#29] -Arguments: [c_current_cdemo_sk#22 ASC NULLS FIRST], false, 0 +Input [6]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26] +Arguments: [c_current_cdemo_sk#19 ASC NULLS FIRST], false, 0 (34) Scan parquet default.customer_demographics -Output [1]: [cd_demo_sk#32] +Output [1]: [cd_demo_sk#27] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 9] -Input [1]: [cd_demo_sk#32] +Input [1]: [cd_demo_sk#27] (36) Filter [codegen id : 9] -Input [1]: [cd_demo_sk#32] -Condition : isnotnull(cd_demo_sk#32) +Input [1]: [cd_demo_sk#27] +Condition : isnotnull(cd_demo_sk#27) (37) Exchange -Input [1]: [cd_demo_sk#32] -Arguments: hashpartitioning(cd_demo_sk#32, 5), ENSURE_REQUIREMENTS, [id=#33] +Input [1]: [cd_demo_sk#27] +Arguments: hashpartitioning(cd_demo_sk#27, 5), ENSURE_REQUIREMENTS, [plan_id=6] (38) Sort [codegen id : 10] -Input [1]: [cd_demo_sk#32] -Arguments: [cd_demo_sk#32 ASC NULLS FIRST], false, 0 +Input [1]: [cd_demo_sk#27] +Arguments: [cd_demo_sk#27 ASC NULLS FIRST], false, 0 (39) SortMergeJoin [codegen id : 11] -Left keys [1]: [c_current_cdemo_sk#22] -Right keys [1]: [cd_demo_sk#32] +Left keys [1]: [c_current_cdemo_sk#19] +Right keys [1]: [cd_demo_sk#27] Join condition: None (40) Project [codegen id : 11] -Output [5]: [c_customer_sk#21, c_birth_year#25, ca_county#27, ca_state#28, ca_country#29] -Input [7]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25, ca_county#27, ca_state#28, ca_country#29, cd_demo_sk#32] +Output [5]: [c_customer_sk#18, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26] +Input [7]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26, cd_demo_sk#27] (41) Exchange -Input [5]: [c_customer_sk#21, c_birth_year#25, ca_county#27, ca_state#28, ca_country#29] -Arguments: hashpartitioning(c_customer_sk#21, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [5]: [c_customer_sk#18, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26] +Arguments: hashpartitioning(c_customer_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=7] (42) Sort [codegen id : 12] -Input [5]: [c_customer_sk#21, c_birth_year#25, ca_county#27, ca_state#28, ca_country#29] -Arguments: [c_customer_sk#21 ASC NULLS FIRST], false, 0 +Input [5]: [c_customer_sk#18, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26] +Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0 (43) SortMergeJoin [codegen id : 13] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#21] +Right keys [1]: [c_customer_sk#18] Join condition: None (44) Project [codegen id : 13] -Output [11]: [i_item_id#18, ca_country#29, ca_state#28, ca_county#27, cast(cs_quantity#4 as decimal(12,2)) AS agg1#35, cast(cs_list_price#5 as decimal(12,2)) AS agg2#36, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#37, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#38, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#39, cast(c_birth_year#25 as decimal(12,2)) AS agg6#40, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#41] -Input [13]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#18, c_customer_sk#21, c_birth_year#25, ca_county#27, ca_state#28, ca_country#29] +Output [11]: [i_item_id#17, ca_country#26, ca_state#25, ca_county#24, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#22 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#34] +Input [13]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17, c_customer_sk#18, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26] (45) HashAggregate [codegen id : 13] -Input [11]: [i_item_id#18, ca_country#29, ca_state#28, ca_county#27, agg1#35, agg2#36, agg3#37, agg4#38, agg5#39, agg6#40, agg7#41] -Keys [4]: [i_item_id#18, ca_country#29, ca_state#28, ca_county#27] -Functions [7]: [partial_avg(agg1#35), partial_avg(agg2#36), partial_avg(agg3#37), partial_avg(agg4#38), partial_avg(agg5#39), partial_avg(agg6#40), partial_avg(agg7#41)] -Aggregate Attributes [14]: [sum#42, count#43, sum#44, count#45, sum#46, count#47, sum#48, count#49, sum#50, count#51, sum#52, count#53, sum#54, count#55] -Results [18]: [i_item_id#18, ca_country#29, ca_state#28, ca_county#27, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63, sum#64, count#65, sum#66, count#67, sum#68, count#69] +Input [11]: [i_item_id#17, ca_country#26, ca_state#25, ca_county#24, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [4]: [i_item_id#17, ca_country#26, ca_state#25, ca_county#24] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46, sum#47, count#48] +Results [18]: [i_item_id#17, ca_country#26, ca_state#25, ca_county#24, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62] (46) Exchange -Input [18]: [i_item_id#18, ca_country#29, ca_state#28, ca_county#27, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63, sum#64, count#65, sum#66, count#67, sum#68, count#69] -Arguments: hashpartitioning(i_item_id#18, ca_country#29, ca_state#28, ca_county#27, 5), ENSURE_REQUIREMENTS, [id=#70] +Input [18]: [i_item_id#17, ca_country#26, ca_state#25, ca_county#24, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62] +Arguments: hashpartitioning(i_item_id#17, ca_country#26, ca_state#25, ca_county#24, 5), ENSURE_REQUIREMENTS, [plan_id=8] (47) HashAggregate [codegen id : 14] -Input [18]: [i_item_id#18, ca_country#29, ca_state#28, ca_county#27, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63, sum#64, count#65, sum#66, count#67, sum#68, count#69] -Keys [4]: [i_item_id#18, ca_country#29, ca_state#28, ca_county#27] -Functions [7]: [avg(agg1#35), avg(agg2#36), avg(agg3#37), avg(agg4#38), avg(agg5#39), avg(agg6#40), avg(agg7#41)] -Aggregate Attributes [7]: [avg(agg1#35)#71, avg(agg2#36)#72, avg(agg3#37)#73, avg(agg4#38)#74, avg(agg5#39)#75, avg(agg6#40)#76, avg(agg7#41)#77] -Results [11]: [i_item_id#18, ca_country#29, ca_state#28, ca_county#27, avg(agg1#35)#71 AS agg1#78, avg(agg2#36)#72 AS agg2#79, avg(agg3#37)#73 AS agg3#80, avg(agg4#38)#74 AS agg4#81, avg(agg5#39)#75 AS agg5#82, avg(agg6#40)#76 AS agg6#83, avg(agg7#41)#77 AS agg7#84] +Input [18]: [i_item_id#17, ca_country#26, ca_state#25, ca_county#24, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62] +Keys [4]: [i_item_id#17, ca_country#26, ca_state#25, ca_county#24] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#63, avg(agg2#29)#64, avg(agg3#30)#65, avg(agg4#31)#66, avg(agg5#32)#67, avg(agg6#33)#68, avg(agg7#34)#69] +Results [11]: [i_item_id#17, ca_country#26, ca_state#25, ca_county#24, avg(agg1#28)#63 AS agg1#70, avg(agg2#29)#64 AS agg2#71, avg(agg3#30)#65 AS agg3#72, avg(agg4#31)#66 AS agg4#73, avg(agg5#32)#67 AS agg5#74, avg(agg6#33)#68 AS agg6#75, avg(agg7#34)#69 AS agg7#76] (48) ReusedExchange [Reuses operator id: 20] -Output [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#18] +Output [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17] (49) Sort [codegen id : 19] -Input [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#18] +Input [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17] Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0 (50) Scan parquet default.customer -Output [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] +Output [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [In(c_birth_month, [1,10,12,4,5,9]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (51) ColumnarToRow [codegen id : 21] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] (52) Filter [codegen id : 21] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] -Condition : (((c_birth_month#24 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#21)) AND isnotnull(c_current_cdemo_sk#22)) AND isnotnull(c_current_addr_sk#23)) +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] +Condition : (((c_birth_month#21 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#18)) AND isnotnull(c_current_cdemo_sk#19)) AND isnotnull(c_current_addr_sk#20)) (53) Project [codegen id : 21] -Output [4]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_year#25] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] +Output [4]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_year#22] +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] (54) Scan parquet default.customer_address -Output [3]: [ca_address_sk#26, ca_state#28, ca_country#29] +Output [3]: [ca_address_sk#23, ca_state#25, ca_country#26] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] ReadSchema: struct (55) ColumnarToRow [codegen id : 20] -Input [3]: [ca_address_sk#26, ca_state#28, ca_country#29] +Input [3]: [ca_address_sk#23, ca_state#25, ca_country#26] (56) Filter [codegen id : 20] -Input [3]: [ca_address_sk#26, ca_state#28, ca_country#29] -Condition : (ca_state#28 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#26)) +Input [3]: [ca_address_sk#23, ca_state#25, ca_country#26] +Condition : (ca_state#25 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#23)) (57) BroadcastExchange -Input [3]: [ca_address_sk#26, ca_state#28, ca_country#29] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#85] +Input [3]: [ca_address_sk#23, ca_state#25, ca_country#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] (58) BroadcastHashJoin [codegen id : 21] -Left keys [1]: [c_current_addr_sk#23] -Right keys [1]: [ca_address_sk#26] +Left keys [1]: [c_current_addr_sk#20] +Right keys [1]: [ca_address_sk#23] Join condition: None (59) Project [codegen id : 21] -Output [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25, ca_state#28, ca_country#29] -Input [7]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_year#25, ca_address_sk#26, ca_state#28, ca_country#29] +Output [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_state#25, ca_country#26] +Input [7]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_year#22, ca_address_sk#23, ca_state#25, ca_country#26] (60) Exchange -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25, ca_state#28, ca_country#29] -Arguments: hashpartitioning(c_current_cdemo_sk#22, 5), ENSURE_REQUIREMENTS, [id=#86] +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_state#25, ca_country#26] +Arguments: hashpartitioning(c_current_cdemo_sk#19, 5), ENSURE_REQUIREMENTS, [plan_id=10] (61) Sort [codegen id : 22] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25, ca_state#28, ca_country#29] -Arguments: [c_current_cdemo_sk#22 ASC NULLS FIRST], false, 0 +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_state#25, ca_country#26] +Arguments: [c_current_cdemo_sk#19 ASC NULLS FIRST], false, 0 (62) ReusedExchange [Reuses operator id: 37] -Output [1]: [cd_demo_sk#32] +Output [1]: [cd_demo_sk#27] (63) Sort [codegen id : 24] -Input [1]: [cd_demo_sk#32] -Arguments: [cd_demo_sk#32 ASC NULLS FIRST], false, 0 +Input [1]: [cd_demo_sk#27] +Arguments: [cd_demo_sk#27 ASC NULLS FIRST], false, 0 (64) SortMergeJoin [codegen id : 25] -Left keys [1]: [c_current_cdemo_sk#22] -Right keys [1]: [cd_demo_sk#32] +Left keys [1]: [c_current_cdemo_sk#19] +Right keys [1]: [cd_demo_sk#27] Join condition: None (65) Project [codegen id : 25] -Output [4]: [c_customer_sk#21, c_birth_year#25, ca_state#28, ca_country#29] -Input [6]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25, ca_state#28, ca_country#29, cd_demo_sk#32] +Output [4]: [c_customer_sk#18, c_birth_year#22, ca_state#25, ca_country#26] +Input [6]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_state#25, ca_country#26, cd_demo_sk#27] (66) Exchange -Input [4]: [c_customer_sk#21, c_birth_year#25, ca_state#28, ca_country#29] -Arguments: hashpartitioning(c_customer_sk#21, 5), ENSURE_REQUIREMENTS, [id=#87] +Input [4]: [c_customer_sk#18, c_birth_year#22, ca_state#25, ca_country#26] +Arguments: hashpartitioning(c_customer_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=11] (67) Sort [codegen id : 26] -Input [4]: [c_customer_sk#21, c_birth_year#25, ca_state#28, ca_country#29] -Arguments: [c_customer_sk#21 ASC NULLS FIRST], false, 0 +Input [4]: [c_customer_sk#18, c_birth_year#22, ca_state#25, ca_country#26] +Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0 (68) SortMergeJoin [codegen id : 27] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#21] +Right keys [1]: [c_customer_sk#18] Join condition: None (69) Project [codegen id : 27] -Output [10]: [i_item_id#18, ca_country#29, ca_state#28, cast(cs_quantity#4 as decimal(12,2)) AS agg1#35, cast(cs_list_price#5 as decimal(12,2)) AS agg2#36, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#37, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#38, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#39, cast(c_birth_year#25 as decimal(12,2)) AS agg6#40, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#41] -Input [12]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#18, c_customer_sk#21, c_birth_year#25, ca_state#28, ca_country#29] +Output [10]: [i_item_id#17, ca_country#26, ca_state#25, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#22 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#34] +Input [12]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17, c_customer_sk#18, c_birth_year#22, ca_state#25, ca_country#26] (70) HashAggregate [codegen id : 27] -Input [10]: [i_item_id#18, ca_country#29, ca_state#28, agg1#35, agg2#36, agg3#37, agg4#38, agg5#39, agg6#40, agg7#41] -Keys [3]: [i_item_id#18, ca_country#29, ca_state#28] -Functions [7]: [partial_avg(agg1#35), partial_avg(agg2#36), partial_avg(agg3#37), partial_avg(agg4#38), partial_avg(agg5#39), partial_avg(agg6#40), partial_avg(agg7#41)] -Aggregate Attributes [14]: [sum#88, count#89, sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97, sum#98, count#99, sum#100, count#101] -Results [17]: [i_item_id#18, ca_country#29, ca_state#28, sum#102, count#103, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115] +Input [10]: [i_item_id#17, ca_country#26, ca_state#25, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [3]: [i_item_id#17, ca_country#26, ca_state#25] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#77, count#78, sum#79, count#80, sum#81, count#82, sum#83, count#84, sum#85, count#86, sum#87, count#88, sum#89, count#90] +Results [17]: [i_item_id#17, ca_country#26, ca_state#25, sum#91, count#92, sum#93, count#94, sum#95, count#96, sum#97, count#98, sum#99, count#100, sum#101, count#102, sum#103, count#104] (71) Exchange -Input [17]: [i_item_id#18, ca_country#29, ca_state#28, sum#102, count#103, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115] -Arguments: hashpartitioning(i_item_id#18, ca_country#29, ca_state#28, 5), ENSURE_REQUIREMENTS, [id=#116] +Input [17]: [i_item_id#17, ca_country#26, ca_state#25, sum#91, count#92, sum#93, count#94, sum#95, count#96, sum#97, count#98, sum#99, count#100, sum#101, count#102, sum#103, count#104] +Arguments: hashpartitioning(i_item_id#17, ca_country#26, ca_state#25, 5), ENSURE_REQUIREMENTS, [plan_id=12] (72) HashAggregate [codegen id : 28] -Input [17]: [i_item_id#18, ca_country#29, ca_state#28, sum#102, count#103, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111, sum#112, count#113, sum#114, count#115] -Keys [3]: [i_item_id#18, ca_country#29, ca_state#28] -Functions [7]: [avg(agg1#35), avg(agg2#36), avg(agg3#37), avg(agg4#38), avg(agg5#39), avg(agg6#40), avg(agg7#41)] -Aggregate Attributes [7]: [avg(agg1#35)#117, avg(agg2#36)#118, avg(agg3#37)#119, avg(agg4#38)#120, avg(agg5#39)#121, avg(agg6#40)#122, avg(agg7#41)#123] -Results [11]: [i_item_id#18, ca_country#29, ca_state#28, null AS county#124, avg(agg1#35)#117 AS agg1#125, avg(agg2#36)#118 AS agg2#126, avg(agg3#37)#119 AS agg3#127, avg(agg4#38)#120 AS agg4#128, avg(agg5#39)#121 AS agg5#129, avg(agg6#40)#122 AS agg6#130, avg(agg7#41)#123 AS agg7#131] +Input [17]: [i_item_id#17, ca_country#26, ca_state#25, sum#91, count#92, sum#93, count#94, sum#95, count#96, sum#97, count#98, sum#99, count#100, sum#101, count#102, sum#103, count#104] +Keys [3]: [i_item_id#17, ca_country#26, ca_state#25] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#105, avg(agg2#29)#106, avg(agg3#30)#107, avg(agg4#31)#108, avg(agg5#32)#109, avg(agg6#33)#110, avg(agg7#34)#111] +Results [11]: [i_item_id#17, ca_country#26, ca_state#25, null AS county#112, avg(agg1#28)#105 AS agg1#113, avg(agg2#29)#106 AS agg2#114, avg(agg3#30)#107 AS agg3#115, avg(agg4#31)#108 AS agg4#116, avg(agg5#32)#109 AS agg5#117, avg(agg6#33)#110 AS agg6#118, avg(agg7#34)#111 AS agg7#119] (73) ReusedExchange [Reuses operator id: 20] -Output [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#18] +Output [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17] (74) Sort [codegen id : 33] -Input [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#18] +Input [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17] Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0 (75) Scan parquet default.customer -Output [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] +Output [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [In(c_birth_month, [1,10,12,4,5,9]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (76) ColumnarToRow [codegen id : 35] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] (77) Filter [codegen id : 35] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] -Condition : (((c_birth_month#24 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#21)) AND isnotnull(c_current_cdemo_sk#22)) AND isnotnull(c_current_addr_sk#23)) +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] +Condition : (((c_birth_month#21 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#18)) AND isnotnull(c_current_cdemo_sk#19)) AND isnotnull(c_current_addr_sk#20)) (78) Project [codegen id : 35] -Output [4]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_year#25] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] +Output [4]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_year#22] +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] (79) Scan parquet default.customer_address -Output [3]: [ca_address_sk#26, ca_state#28, ca_country#29] +Output [3]: [ca_address_sk#23, ca_state#25, ca_country#26] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] ReadSchema: struct (80) ColumnarToRow [codegen id : 34] -Input [3]: [ca_address_sk#26, ca_state#28, ca_country#29] +Input [3]: [ca_address_sk#23, ca_state#25, ca_country#26] (81) Filter [codegen id : 34] -Input [3]: [ca_address_sk#26, ca_state#28, ca_country#29] -Condition : (ca_state#28 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#26)) +Input [3]: [ca_address_sk#23, ca_state#25, ca_country#26] +Condition : (ca_state#25 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#23)) (82) Project [codegen id : 34] -Output [2]: [ca_address_sk#26, ca_country#29] -Input [3]: [ca_address_sk#26, ca_state#28, ca_country#29] +Output [2]: [ca_address_sk#23, ca_country#26] +Input [3]: [ca_address_sk#23, ca_state#25, ca_country#26] (83) BroadcastExchange -Input [2]: [ca_address_sk#26, ca_country#29] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#132] +Input [2]: [ca_address_sk#23, ca_country#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=13] (84) BroadcastHashJoin [codegen id : 35] -Left keys [1]: [c_current_addr_sk#23] -Right keys [1]: [ca_address_sk#26] +Left keys [1]: [c_current_addr_sk#20] +Right keys [1]: [ca_address_sk#23] Join condition: None (85) Project [codegen id : 35] -Output [4]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25, ca_country#29] -Input [6]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_year#25, ca_address_sk#26, ca_country#29] +Output [4]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_country#26] +Input [6]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_year#22, ca_address_sk#23, ca_country#26] (86) Exchange -Input [4]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25, ca_country#29] -Arguments: hashpartitioning(c_current_cdemo_sk#22, 5), ENSURE_REQUIREMENTS, [id=#133] +Input [4]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_country#26] +Arguments: hashpartitioning(c_current_cdemo_sk#19, 5), ENSURE_REQUIREMENTS, [plan_id=14] (87) Sort [codegen id : 36] -Input [4]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25, ca_country#29] -Arguments: [c_current_cdemo_sk#22 ASC NULLS FIRST], false, 0 +Input [4]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_country#26] +Arguments: [c_current_cdemo_sk#19 ASC NULLS FIRST], false, 0 (88) ReusedExchange [Reuses operator id: 37] -Output [1]: [cd_demo_sk#32] +Output [1]: [cd_demo_sk#27] (89) Sort [codegen id : 38] -Input [1]: [cd_demo_sk#32] -Arguments: [cd_demo_sk#32 ASC NULLS FIRST], false, 0 +Input [1]: [cd_demo_sk#27] +Arguments: [cd_demo_sk#27 ASC NULLS FIRST], false, 0 (90) SortMergeJoin [codegen id : 39] -Left keys [1]: [c_current_cdemo_sk#22] -Right keys [1]: [cd_demo_sk#32] +Left keys [1]: [c_current_cdemo_sk#19] +Right keys [1]: [cd_demo_sk#27] Join condition: None (91) Project [codegen id : 39] -Output [3]: [c_customer_sk#21, c_birth_year#25, ca_country#29] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25, ca_country#29, cd_demo_sk#32] +Output [3]: [c_customer_sk#18, c_birth_year#22, ca_country#26] +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_country#26, cd_demo_sk#27] (92) Exchange -Input [3]: [c_customer_sk#21, c_birth_year#25, ca_country#29] -Arguments: hashpartitioning(c_customer_sk#21, 5), ENSURE_REQUIREMENTS, [id=#134] +Input [3]: [c_customer_sk#18, c_birth_year#22, ca_country#26] +Arguments: hashpartitioning(c_customer_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=15] (93) Sort [codegen id : 40] -Input [3]: [c_customer_sk#21, c_birth_year#25, ca_country#29] -Arguments: [c_customer_sk#21 ASC NULLS FIRST], false, 0 +Input [3]: [c_customer_sk#18, c_birth_year#22, ca_country#26] +Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0 (94) SortMergeJoin [codegen id : 41] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#21] +Right keys [1]: [c_customer_sk#18] Join condition: None (95) Project [codegen id : 41] -Output [9]: [i_item_id#18, ca_country#29, cast(cs_quantity#4 as decimal(12,2)) AS agg1#35, cast(cs_list_price#5 as decimal(12,2)) AS agg2#36, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#37, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#38, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#39, cast(c_birth_year#25 as decimal(12,2)) AS agg6#40, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#41] -Input [11]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#18, c_customer_sk#21, c_birth_year#25, ca_country#29] +Output [9]: [i_item_id#17, ca_country#26, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#22 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#34] +Input [11]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17, c_customer_sk#18, c_birth_year#22, ca_country#26] (96) HashAggregate [codegen id : 41] -Input [9]: [i_item_id#18, ca_country#29, agg1#35, agg2#36, agg3#37, agg4#38, agg5#39, agg6#40, agg7#41] -Keys [2]: [i_item_id#18, ca_country#29] -Functions [7]: [partial_avg(agg1#35), partial_avg(agg2#36), partial_avg(agg3#37), partial_avg(agg4#38), partial_avg(agg5#39), partial_avg(agg6#40), partial_avg(agg7#41)] -Aggregate Attributes [14]: [sum#135, count#136, sum#137, count#138, sum#139, count#140, sum#141, count#142, sum#143, count#144, sum#145, count#146, sum#147, count#148] -Results [16]: [i_item_id#18, ca_country#29, sum#149, count#150, sum#151, count#152, sum#153, count#154, sum#155, count#156, sum#157, count#158, sum#159, count#160, sum#161, count#162] +Input [9]: [i_item_id#17, ca_country#26, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [2]: [i_item_id#17, ca_country#26] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#120, count#121, sum#122, count#123, sum#124, count#125, sum#126, count#127, sum#128, count#129, sum#130, count#131, sum#132, count#133] +Results [16]: [i_item_id#17, ca_country#26, sum#134, count#135, sum#136, count#137, sum#138, count#139, sum#140, count#141, sum#142, count#143, sum#144, count#145, sum#146, count#147] (97) Exchange -Input [16]: [i_item_id#18, ca_country#29, sum#149, count#150, sum#151, count#152, sum#153, count#154, sum#155, count#156, sum#157, count#158, sum#159, count#160, sum#161, count#162] -Arguments: hashpartitioning(i_item_id#18, ca_country#29, 5), ENSURE_REQUIREMENTS, [id=#163] +Input [16]: [i_item_id#17, ca_country#26, sum#134, count#135, sum#136, count#137, sum#138, count#139, sum#140, count#141, sum#142, count#143, sum#144, count#145, sum#146, count#147] +Arguments: hashpartitioning(i_item_id#17, ca_country#26, 5), ENSURE_REQUIREMENTS, [plan_id=16] (98) HashAggregate [codegen id : 42] -Input [16]: [i_item_id#18, ca_country#29, sum#149, count#150, sum#151, count#152, sum#153, count#154, sum#155, count#156, sum#157, count#158, sum#159, count#160, sum#161, count#162] -Keys [2]: [i_item_id#18, ca_country#29] -Functions [7]: [avg(agg1#35), avg(agg2#36), avg(agg3#37), avg(agg4#38), avg(agg5#39), avg(agg6#40), avg(agg7#41)] -Aggregate Attributes [7]: [avg(agg1#35)#164, avg(agg2#36)#165, avg(agg3#37)#166, avg(agg4#38)#167, avg(agg5#39)#168, avg(agg6#40)#169, avg(agg7#41)#170] -Results [11]: [i_item_id#18, ca_country#29, null AS ca_state#171, null AS county#172, avg(agg1#35)#164 AS agg1#173, avg(agg2#36)#165 AS agg2#174, avg(agg3#37)#166 AS agg3#175, avg(agg4#38)#167 AS agg4#176, avg(agg5#39)#168 AS agg5#177, avg(agg6#40)#169 AS agg6#178, avg(agg7#41)#170 AS agg7#179] +Input [16]: [i_item_id#17, ca_country#26, sum#134, count#135, sum#136, count#137, sum#138, count#139, sum#140, count#141, sum#142, count#143, sum#144, count#145, sum#146, count#147] +Keys [2]: [i_item_id#17, ca_country#26] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#148, avg(agg2#29)#149, avg(agg3#30)#150, avg(agg4#31)#151, avg(agg5#32)#152, avg(agg6#33)#153, avg(agg7#34)#154] +Results [11]: [i_item_id#17, ca_country#26, null AS ca_state#155, null AS county#156, avg(agg1#28)#148 AS agg1#157, avg(agg2#29)#149 AS agg2#158, avg(agg3#30)#150 AS agg3#159, avg(agg4#31)#151 AS agg4#160, avg(agg5#32)#152 AS agg5#161, avg(agg6#33)#153 AS agg6#162, avg(agg7#34)#154 AS agg7#163] (99) Scan parquet default.catalog_sales Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] @@ -623,135 +623,135 @@ Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5 Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#11, cd_dep_count#14] (105) ReusedExchange [Reuses operator id: 161] -Output [1]: [d_date_sk#16] +Output [1]: [d_date_sk#15] (106) BroadcastHashJoin [codegen id : 49] Left keys [1]: [cs_sold_date_sk#9] -Right keys [1]: [d_date_sk#16] +Right keys [1]: [d_date_sk#15] Join condition: None (107) Project [codegen id : 49] Output [8]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14] -Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, d_date_sk#16] +Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, d_date_sk#15] (108) Scan parquet default.customer -Output [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] +Output [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [In(c_birth_month, [1,10,12,4,5,9]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (109) ColumnarToRow [codegen id : 46] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] (110) Filter [codegen id : 46] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] -Condition : (((c_birth_month#24 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#21)) AND isnotnull(c_current_cdemo_sk#22)) AND isnotnull(c_current_addr_sk#23)) +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] +Condition : (((c_birth_month#21 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#18)) AND isnotnull(c_current_cdemo_sk#19)) AND isnotnull(c_current_addr_sk#20)) (111) Project [codegen id : 46] -Output [4]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_year#25] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_month#24, c_birth_year#25] +Output [4]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_year#22] +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22] (112) Scan parquet default.customer_address -Output [2]: [ca_address_sk#26, ca_state#28] +Output [2]: [ca_address_sk#23, ca_state#25] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] ReadSchema: struct (113) ColumnarToRow [codegen id : 45] -Input [2]: [ca_address_sk#26, ca_state#28] +Input [2]: [ca_address_sk#23, ca_state#25] (114) Filter [codegen id : 45] -Input [2]: [ca_address_sk#26, ca_state#28] -Condition : (ca_state#28 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#26)) +Input [2]: [ca_address_sk#23, ca_state#25] +Condition : (ca_state#25 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#23)) (115) Project [codegen id : 45] -Output [1]: [ca_address_sk#26] -Input [2]: [ca_address_sk#26, ca_state#28] +Output [1]: [ca_address_sk#23] +Input [2]: [ca_address_sk#23, ca_state#25] (116) BroadcastExchange -Input [1]: [ca_address_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#180] +Input [1]: [ca_address_sk#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=17] (117) BroadcastHashJoin [codegen id : 46] -Left keys [1]: [c_current_addr_sk#23] -Right keys [1]: [ca_address_sk#26] +Left keys [1]: [c_current_addr_sk#20] +Right keys [1]: [ca_address_sk#23] Join condition: None (118) Project [codegen id : 46] -Output [3]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25] -Input [5]: [c_customer_sk#21, c_current_cdemo_sk#22, c_current_addr_sk#23, c_birth_year#25, ca_address_sk#26] +Output [3]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22] +Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_year#22, ca_address_sk#23] (119) BroadcastExchange -Input [3]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [id=#181] +Input [3]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=18] (120) Scan parquet default.customer_demographics -Output [1]: [cd_demo_sk#32] +Output [1]: [cd_demo_sk#27] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (121) ColumnarToRow -Input [1]: [cd_demo_sk#32] +Input [1]: [cd_demo_sk#27] (122) Filter -Input [1]: [cd_demo_sk#32] -Condition : isnotnull(cd_demo_sk#32) +Input [1]: [cd_demo_sk#27] +Condition : isnotnull(cd_demo_sk#27) (123) BroadcastHashJoin [codegen id : 47] -Left keys [1]: [c_current_cdemo_sk#22] -Right keys [1]: [cd_demo_sk#32] +Left keys [1]: [c_current_cdemo_sk#19] +Right keys [1]: [cd_demo_sk#27] Join condition: None (124) Project [codegen id : 47] -Output [2]: [c_customer_sk#21, c_birth_year#25] -Input [4]: [c_customer_sk#21, c_current_cdemo_sk#22, c_birth_year#25, cd_demo_sk#32] +Output [2]: [c_customer_sk#18, c_birth_year#22] +Input [4]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, cd_demo_sk#27] (125) BroadcastExchange -Input [2]: [c_customer_sk#21, c_birth_year#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#182] +Input [2]: [c_customer_sk#18, c_birth_year#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=19] (126) BroadcastHashJoin [codegen id : 49] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#21] +Right keys [1]: [c_customer_sk#18] Join condition: None (127) Project [codegen id : 49] -Output [8]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#25] -Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_customer_sk#21, c_birth_year#25] +Output [8]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#22] +Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_customer_sk#18, c_birth_year#22] (128) ReusedExchange [Reuses operator id: 17] -Output [2]: [i_item_sk#17, i_item_id#18] +Output [2]: [i_item_sk#16, i_item_id#17] (129) BroadcastHashJoin [codegen id : 49] Left keys [1]: [cs_item_sk#3] -Right keys [1]: [i_item_sk#17] +Right keys [1]: [i_item_sk#16] Join condition: None (130) Project [codegen id : 49] -Output [8]: [i_item_id#18, cast(cs_quantity#4 as decimal(12,2)) AS agg1#35, cast(cs_list_price#5 as decimal(12,2)) AS agg2#36, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#37, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#38, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#39, cast(c_birth_year#25 as decimal(12,2)) AS agg6#40, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#41] -Input [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#25, i_item_sk#17, i_item_id#18] +Output [8]: [i_item_id#17, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#22 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#34] +Input [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#22, i_item_sk#16, i_item_id#17] (131) HashAggregate [codegen id : 49] -Input [8]: [i_item_id#18, agg1#35, agg2#36, agg3#37, agg4#38, agg5#39, agg6#40, agg7#41] -Keys [1]: [i_item_id#18] -Functions [7]: [partial_avg(agg1#35), partial_avg(agg2#36), partial_avg(agg3#37), partial_avg(agg4#38), partial_avg(agg5#39), partial_avg(agg6#40), partial_avg(agg7#41)] -Aggregate Attributes [14]: [sum#183, count#184, sum#185, count#186, sum#187, count#188, sum#189, count#190, sum#191, count#192, sum#193, count#194, sum#195, count#196] -Results [15]: [i_item_id#18, sum#197, count#198, sum#199, count#200, sum#201, count#202, sum#203, count#204, sum#205, count#206, sum#207, count#208, sum#209, count#210] +Input [8]: [i_item_id#17, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [1]: [i_item_id#17] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#164, count#165, sum#166, count#167, sum#168, count#169, sum#170, count#171, sum#172, count#173, sum#174, count#175, sum#176, count#177] +Results [15]: [i_item_id#17, sum#178, count#179, sum#180, count#181, sum#182, count#183, sum#184, count#185, sum#186, count#187, sum#188, count#189, sum#190, count#191] (132) Exchange -Input [15]: [i_item_id#18, sum#197, count#198, sum#199, count#200, sum#201, count#202, sum#203, count#204, sum#205, count#206, sum#207, count#208, sum#209, count#210] -Arguments: hashpartitioning(i_item_id#18, 5), ENSURE_REQUIREMENTS, [id=#211] +Input [15]: [i_item_id#17, sum#178, count#179, sum#180, count#181, sum#182, count#183, sum#184, count#185, sum#186, count#187, sum#188, count#189, sum#190, count#191] +Arguments: hashpartitioning(i_item_id#17, 5), ENSURE_REQUIREMENTS, [plan_id=20] (133) HashAggregate [codegen id : 50] -Input [15]: [i_item_id#18, sum#197, count#198, sum#199, count#200, sum#201, count#202, sum#203, count#204, sum#205, count#206, sum#207, count#208, sum#209, count#210] -Keys [1]: [i_item_id#18] -Functions [7]: [avg(agg1#35), avg(agg2#36), avg(agg3#37), avg(agg4#38), avg(agg5#39), avg(agg6#40), avg(agg7#41)] -Aggregate Attributes [7]: [avg(agg1#35)#212, avg(agg2#36)#213, avg(agg3#37)#214, avg(agg4#38)#215, avg(agg5#39)#216, avg(agg6#40)#217, avg(agg7#41)#218] -Results [11]: [i_item_id#18, null AS ca_country#219, null AS ca_state#220, null AS county#221, avg(agg1#35)#212 AS agg1#222, avg(agg2#36)#213 AS agg2#223, avg(agg3#37)#214 AS agg3#224, avg(agg4#38)#215 AS agg4#225, avg(agg5#39)#216 AS agg5#226, avg(agg6#40)#217 AS agg6#227, avg(agg7#41)#218 AS agg7#228] +Input [15]: [i_item_id#17, sum#178, count#179, sum#180, count#181, sum#182, count#183, sum#184, count#185, sum#186, count#187, sum#188, count#189, sum#190, count#191] +Keys [1]: [i_item_id#17] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#192, avg(agg2#29)#193, avg(agg3#30)#194, avg(agg4#31)#195, avg(agg5#32)#196, avg(agg6#33)#197, avg(agg7#34)#198] +Results [11]: [i_item_id#17, null AS ca_country#199, null AS ca_state#200, null AS county#201, avg(agg1#28)#192 AS agg1#202, avg(agg2#29)#193 AS agg2#203, avg(agg3#30)#194 AS agg3#204, avg(agg4#31)#195 AS agg4#205, avg(agg5#32)#196 AS agg5#206, avg(agg6#33)#197 AS agg6#207, avg(agg7#34)#198 AS agg7#208] (134) Scan parquet default.catalog_sales Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] @@ -781,79 +781,79 @@ Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5 Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#11, cd_dep_count#14] (140) ReusedExchange [Reuses operator id: 161] -Output [1]: [d_date_sk#16] +Output [1]: [d_date_sk#15] (141) BroadcastHashJoin [codegen id : 57] Left keys [1]: [cs_sold_date_sk#9] -Right keys [1]: [d_date_sk#16] +Right keys [1]: [d_date_sk#15] Join condition: None (142) Project [codegen id : 57] Output [8]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14] -Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, d_date_sk#16] +Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, d_date_sk#15] (143) Scan parquet default.item -Output [1]: [i_item_sk#17] +Output [1]: [i_item_sk#16] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (144) ColumnarToRow [codegen id : 53] -Input [1]: [i_item_sk#17] +Input [1]: [i_item_sk#16] (145) Filter [codegen id : 53] -Input [1]: [i_item_sk#17] -Condition : isnotnull(i_item_sk#17) +Input [1]: [i_item_sk#16] +Condition : isnotnull(i_item_sk#16) (146) BroadcastExchange -Input [1]: [i_item_sk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#229] +Input [1]: [i_item_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=21] (147) BroadcastHashJoin [codegen id : 57] Left keys [1]: [cs_item_sk#3] -Right keys [1]: [i_item_sk#17] +Right keys [1]: [i_item_sk#16] Join condition: None (148) Project [codegen id : 57] Output [7]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14] -Input [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_sk#17] +Input [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_sk#16] (149) ReusedExchange [Reuses operator id: 125] -Output [2]: [c_customer_sk#21, c_birth_year#25] +Output [2]: [c_customer_sk#18, c_birth_year#22] (150) BroadcastHashJoin [codegen id : 57] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#21] +Right keys [1]: [c_customer_sk#18] Join condition: None (151) Project [codegen id : 57] -Output [7]: [cast(cs_quantity#4 as decimal(12,2)) AS agg1#35, cast(cs_list_price#5 as decimal(12,2)) AS agg2#36, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#37, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#38, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#39, cast(c_birth_year#25 as decimal(12,2)) AS agg6#40, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#41] -Input [9]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_customer_sk#21, c_birth_year#25] +Output [7]: [cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#22 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#34] +Input [9]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_customer_sk#18, c_birth_year#22] (152) HashAggregate [codegen id : 57] -Input [7]: [agg1#35, agg2#36, agg3#37, agg4#38, agg5#39, agg6#40, agg7#41] +Input [7]: [agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] Keys: [] -Functions [7]: [partial_avg(agg1#35), partial_avg(agg2#36), partial_avg(agg3#37), partial_avg(agg4#38), partial_avg(agg5#39), partial_avg(agg6#40), partial_avg(agg7#41)] -Aggregate Attributes [14]: [sum#230, count#231, sum#232, count#233, sum#234, count#235, sum#236, count#237, sum#238, count#239, sum#240, count#241, sum#242, count#243] -Results [14]: [sum#244, count#245, sum#246, count#247, sum#248, count#249, sum#250, count#251, sum#252, count#253, sum#254, count#255, sum#256, count#257] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#209, count#210, sum#211, count#212, sum#213, count#214, sum#215, count#216, sum#217, count#218, sum#219, count#220, sum#221, count#222] +Results [14]: [sum#223, count#224, sum#225, count#226, sum#227, count#228, sum#229, count#230, sum#231, count#232, sum#233, count#234, sum#235, count#236] (153) Exchange -Input [14]: [sum#244, count#245, sum#246, count#247, sum#248, count#249, sum#250, count#251, sum#252, count#253, sum#254, count#255, sum#256, count#257] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#258] +Input [14]: [sum#223, count#224, sum#225, count#226, sum#227, count#228, sum#229, count#230, sum#231, count#232, sum#233, count#234, sum#235, count#236] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=22] (154) HashAggregate [codegen id : 58] -Input [14]: [sum#244, count#245, sum#246, count#247, sum#248, count#249, sum#250, count#251, sum#252, count#253, sum#254, count#255, sum#256, count#257] +Input [14]: [sum#223, count#224, sum#225, count#226, sum#227, count#228, sum#229, count#230, sum#231, count#232, sum#233, count#234, sum#235, count#236] Keys: [] -Functions [7]: [avg(agg1#35), avg(agg2#36), avg(agg3#37), avg(agg4#38), avg(agg5#39), avg(agg6#40), avg(agg7#41)] -Aggregate Attributes [7]: [avg(agg1#35)#259, avg(agg2#36)#260, avg(agg3#37)#261, avg(agg4#38)#262, avg(agg5#39)#263, avg(agg6#40)#264, avg(agg7#41)#265] -Results [11]: [null AS i_item_id#266, null AS ca_country#267, null AS ca_state#268, null AS county#269, avg(agg1#35)#259 AS agg1#270, avg(agg2#36)#260 AS agg2#271, avg(agg3#37)#261 AS agg3#272, avg(agg4#38)#262 AS agg4#273, avg(agg5#39)#263 AS agg5#274, avg(agg6#40)#264 AS agg6#275, avg(agg7#41)#265 AS agg7#276] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#237, avg(agg2#29)#238, avg(agg3#30)#239, avg(agg4#31)#240, avg(agg5#32)#241, avg(agg6#33)#242, avg(agg7#34)#243] +Results [11]: [null AS i_item_id#244, null AS ca_country#245, null AS ca_state#246, null AS county#247, avg(agg1#28)#237 AS agg1#248, avg(agg2#29)#238 AS agg2#249, avg(agg3#30)#239 AS agg3#250, avg(agg4#31)#240 AS agg4#251, avg(agg5#32)#241 AS agg5#252, avg(agg6#33)#242 AS agg6#253, avg(agg7#34)#243 AS agg7#254] (155) Union (156) TakeOrderedAndProject -Input [11]: [i_item_id#18, ca_country#29, ca_state#28, ca_county#27, agg1#78, agg2#79, agg3#80, agg4#81, agg5#82, agg6#83, agg7#84] -Arguments: 100, [ca_country#29 ASC NULLS FIRST, ca_state#28 ASC NULLS FIRST, ca_county#27 ASC NULLS FIRST, i_item_id#18 ASC NULLS FIRST], [i_item_id#18, ca_country#29, ca_state#28, ca_county#27, agg1#78, agg2#79, agg3#80, agg4#81, agg5#82, agg6#83, agg7#84] +Input [11]: [i_item_id#17, ca_country#26, ca_state#25, ca_county#24, agg1#70, agg2#71, agg3#72, agg4#73, agg5#74, agg6#75, agg7#76] +Arguments: 100, [ca_country#26 ASC NULLS FIRST, ca_state#25 ASC NULLS FIRST, ca_county#24 ASC NULLS FIRST, i_item_id#17 ASC NULLS FIRST], [i_item_id#17, ca_country#26, ca_state#25, ca_county#24, agg1#70, agg2#71, agg3#72, agg4#73, agg5#74, agg6#75, agg7#76] ===== Subqueries ===== @@ -866,26 +866,26 @@ BroadcastExchange (161) (157) Scan parquet default.date_dim -Output [2]: [d_date_sk#16, d_year#277] +Output [2]: [d_date_sk#15, d_year#255] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (158) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#16, d_year#277] +Input [2]: [d_date_sk#15, d_year#255] (159) Filter [codegen id : 1] -Input [2]: [d_date_sk#16, d_year#277] -Condition : ((isnotnull(d_year#277) AND (d_year#277 = 2001)) AND isnotnull(d_date_sk#16)) +Input [2]: [d_date_sk#15, d_year#255] +Condition : ((isnotnull(d_year#255) AND (d_year#255 = 2001)) AND isnotnull(d_date_sk#15)) (160) Project [codegen id : 1] -Output [1]: [d_date_sk#16] -Input [2]: [d_date_sk#16, d_year#277] +Output [1]: [d_date_sk#15] +Input [2]: [d_date_sk#15, d_year#255] (161) BroadcastExchange -Input [1]: [d_date_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#278] +Input [1]: [d_date_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=23] Subquery:2 Hosting operator id = 99 Hosting Expression = cs_sold_date_sk#9 IN dynamicpruning#10 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/explain.txt index 7bd7a7ae80176..daea93e01540d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/explain.txt @@ -189,7 +189,7 @@ Input [4]: [cd_demo_sk#11, cd_gender#12, cd_education_status#13, cd_dep_count#14 (8) BroadcastExchange Input [2]: [cd_demo_sk#11, cd_dep_count#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 7] Left keys [1]: [cs_bill_cdemo_sk#2] @@ -201,146 +201,146 @@ Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5 Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#11, cd_dep_count#14] (11) Scan parquet default.customer -Output [5]: [c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_month#19, c_birth_year#20] +Output [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [In(c_birth_month, [1,10,12,4,5,9]), IsNotNull(c_customer_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [5]: [c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_month#19, c_birth_year#20] +Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] (13) Filter [codegen id : 2] -Input [5]: [c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_month#19, c_birth_year#20] -Condition : (((c_birth_month#19 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#16)) AND isnotnull(c_current_cdemo_sk#17)) AND isnotnull(c_current_addr_sk#18)) +Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] +Condition : (((c_birth_month#18 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#15)) AND isnotnull(c_current_cdemo_sk#16)) AND isnotnull(c_current_addr_sk#17)) (14) Project [codegen id : 2] -Output [4]: [c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] -Input [5]: [c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_month#19, c_birth_year#20] +Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19] (15) BroadcastExchange -Input [4]: [c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] +Input [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 7] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#16] +Right keys [1]: [c_customer_sk#15] Join condition: None (17) Project [codegen id : 7] -Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] -Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] (18) Scan parquet default.customer_demographics -Output [1]: [cd_demo_sk#22] +Output [1]: [cd_demo_sk#20] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] -Input [1]: [cd_demo_sk#22] +Input [1]: [cd_demo_sk#20] (20) Filter [codegen id : 3] -Input [1]: [cd_demo_sk#22] -Condition : isnotnull(cd_demo_sk#22) +Input [1]: [cd_demo_sk#20] +Condition : isnotnull(cd_demo_sk#20) (21) BroadcastExchange -Input [1]: [cd_demo_sk#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] +Input [1]: [cd_demo_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (22) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_cdemo_sk#17] -Right keys [1]: [cd_demo_sk#22] +Left keys [1]: [c_current_cdemo_sk#16] +Right keys [1]: [cd_demo_sk#20] Join condition: None (23) Project [codegen id : 7] -Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#18, c_birth_year#20] -Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20, cd_demo_sk#22] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#17, c_birth_year#19] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#20] (24) Scan parquet default.customer_address -Output [4]: [ca_address_sk#24, ca_county#25, ca_state#26, ca_country#27] +Output [4]: [ca_address_sk#21, ca_county#22, ca_state#23, ca_country#24] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 4] -Input [4]: [ca_address_sk#24, ca_county#25, ca_state#26, ca_country#27] +Input [4]: [ca_address_sk#21, ca_county#22, ca_state#23, ca_country#24] (26) Filter [codegen id : 4] -Input [4]: [ca_address_sk#24, ca_county#25, ca_state#26, ca_country#27] -Condition : (ca_state#26 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#24)) +Input [4]: [ca_address_sk#21, ca_county#22, ca_state#23, ca_country#24] +Condition : (ca_state#23 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#21)) (27) BroadcastExchange -Input [4]: [ca_address_sk#24, ca_county#25, ca_state#26, ca_country#27] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] +Input [4]: [ca_address_sk#21, ca_county#22, ca_state#23, ca_country#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (28) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [c_current_addr_sk#18] -Right keys [1]: [ca_address_sk#24] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#21] Join condition: None (29) Project [codegen id : 7] -Output [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#20, ca_county#25, ca_state#26, ca_country#27] -Input [14]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#18, c_birth_year#20, ca_address_sk#24, ca_county#25, ca_state#26, ca_country#27] +Output [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#19, ca_county#22, ca_state#23, ca_country#24] +Input [14]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#17, c_birth_year#19, ca_address_sk#21, ca_county#22, ca_state#23, ca_country#24] (30) ReusedExchange [Reuses operator id: 158] -Output [1]: [d_date_sk#29] +Output [1]: [d_date_sk#25] (31) BroadcastHashJoin [codegen id : 7] Left keys [1]: [cs_sold_date_sk#9] -Right keys [1]: [d_date_sk#29] +Right keys [1]: [d_date_sk#25] Join condition: None (32) Project [codegen id : 7] -Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, ca_county#25, ca_state#26, ca_country#27] -Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#20, ca_county#25, ca_state#26, ca_country#27, d_date_sk#29] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, ca_county#22, ca_state#23, ca_country#24] +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#19, ca_county#22, ca_state#23, ca_country#24, d_date_sk#25] (33) Scan parquet default.item -Output [2]: [i_item_sk#30, i_item_id#31] +Output [2]: [i_item_sk#26, i_item_id#27] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (34) ColumnarToRow [codegen id : 6] -Input [2]: [i_item_sk#30, i_item_id#31] +Input [2]: [i_item_sk#26, i_item_id#27] (35) Filter [codegen id : 6] -Input [2]: [i_item_sk#30, i_item_id#31] -Condition : isnotnull(i_item_sk#30) +Input [2]: [i_item_sk#26, i_item_id#27] +Condition : isnotnull(i_item_sk#26) (36) BroadcastExchange -Input [2]: [i_item_sk#30, i_item_id#31] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#32] +Input [2]: [i_item_sk#26, i_item_id#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (37) BroadcastHashJoin [codegen id : 7] Left keys [1]: [cs_item_sk#3] -Right keys [1]: [i_item_sk#30] +Right keys [1]: [i_item_sk#26] Join condition: None (38) Project [codegen id : 7] -Output [11]: [i_item_id#31, ca_country#27, ca_state#26, ca_county#25, cast(cs_quantity#4 as decimal(12,2)) AS agg1#33, cast(cs_list_price#5 as decimal(12,2)) AS agg2#34, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#35, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#36, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#37, cast(c_birth_year#20 as decimal(12,2)) AS agg6#38, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#39] -Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, ca_county#25, ca_state#26, ca_country#27, i_item_sk#30, i_item_id#31] +Output [11]: [i_item_id#27, ca_country#24, ca_state#23, ca_county#22, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#19 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#34] +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, ca_county#22, ca_state#23, ca_country#24, i_item_sk#26, i_item_id#27] (39) HashAggregate [codegen id : 7] -Input [11]: [i_item_id#31, ca_country#27, ca_state#26, ca_county#25, agg1#33, agg2#34, agg3#35, agg4#36, agg5#37, agg6#38, agg7#39] -Keys [4]: [i_item_id#31, ca_country#27, ca_state#26, ca_county#25] -Functions [7]: [partial_avg(agg1#33), partial_avg(agg2#34), partial_avg(agg3#35), partial_avg(agg4#36), partial_avg(agg5#37), partial_avg(agg6#38), partial_avg(agg7#39)] -Aggregate Attributes [14]: [sum#40, count#41, sum#42, count#43, sum#44, count#45, sum#46, count#47, sum#48, count#49, sum#50, count#51, sum#52, count#53] -Results [18]: [i_item_id#31, ca_country#27, ca_state#26, ca_county#25, sum#54, count#55, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63, sum#64, count#65, sum#66, count#67] +Input [11]: [i_item_id#27, ca_country#24, ca_state#23, ca_county#22, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [4]: [i_item_id#27, ca_country#24, ca_state#23, ca_county#22] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#35, count#36, sum#37, count#38, sum#39, count#40, sum#41, count#42, sum#43, count#44, sum#45, count#46, sum#47, count#48] +Results [18]: [i_item_id#27, ca_country#24, ca_state#23, ca_county#22, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62] (40) Exchange -Input [18]: [i_item_id#31, ca_country#27, ca_state#26, ca_county#25, sum#54, count#55, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63, sum#64, count#65, sum#66, count#67] -Arguments: hashpartitioning(i_item_id#31, ca_country#27, ca_state#26, ca_county#25, 5), ENSURE_REQUIREMENTS, [id=#68] +Input [18]: [i_item_id#27, ca_country#24, ca_state#23, ca_county#22, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62] +Arguments: hashpartitioning(i_item_id#27, ca_country#24, ca_state#23, ca_county#22, 5), ENSURE_REQUIREMENTS, [plan_id=6] (41) HashAggregate [codegen id : 8] -Input [18]: [i_item_id#31, ca_country#27, ca_state#26, ca_county#25, sum#54, count#55, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63, sum#64, count#65, sum#66, count#67] -Keys [4]: [i_item_id#31, ca_country#27, ca_state#26, ca_county#25] -Functions [7]: [avg(agg1#33), avg(agg2#34), avg(agg3#35), avg(agg4#36), avg(agg5#37), avg(agg6#38), avg(agg7#39)] -Aggregate Attributes [7]: [avg(agg1#33)#69, avg(agg2#34)#70, avg(agg3#35)#71, avg(agg4#36)#72, avg(agg5#37)#73, avg(agg6#38)#74, avg(agg7#39)#75] -Results [11]: [i_item_id#31, ca_country#27, ca_state#26, ca_county#25, avg(agg1#33)#69 AS agg1#76, avg(agg2#34)#70 AS agg2#77, avg(agg3#35)#71 AS agg3#78, avg(agg4#36)#72 AS agg4#79, avg(agg5#37)#73 AS agg5#80, avg(agg6#38)#74 AS agg6#81, avg(agg7#39)#75 AS agg7#82] +Input [18]: [i_item_id#27, ca_country#24, ca_state#23, ca_county#22, sum#49, count#50, sum#51, count#52, sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60, sum#61, count#62] +Keys [4]: [i_item_id#27, ca_country#24, ca_state#23, ca_county#22] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#63, avg(agg2#29)#64, avg(agg3#30)#65, avg(agg4#31)#66, avg(agg5#32)#67, avg(agg6#33)#68, avg(agg7#34)#69] +Results [11]: [i_item_id#27, ca_country#24, ca_state#23, ca_county#22, avg(agg1#28)#63 AS agg1#70, avg(agg2#29)#64 AS agg2#71, avg(agg3#30)#65 AS agg3#72, avg(agg4#31)#66 AS agg4#73, avg(agg5#32)#67 AS agg5#74, avg(agg6#33)#68 AS agg6#75, avg(agg7#34)#69 AS agg7#76] (42) Scan parquet default.catalog_sales Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] @@ -370,97 +370,97 @@ Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5 Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#11, cd_dep_count#14] (48) ReusedExchange [Reuses operator id: 15] -Output [4]: [c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] +Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] (49) BroadcastHashJoin [codegen id : 15] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#16] +Right keys [1]: [c_customer_sk#15] Join condition: None (50) Project [codegen id : 15] -Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] -Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] (51) ReusedExchange [Reuses operator id: 21] -Output [1]: [cd_demo_sk#22] +Output [1]: [cd_demo_sk#20] (52) BroadcastHashJoin [codegen id : 15] -Left keys [1]: [c_current_cdemo_sk#17] -Right keys [1]: [cd_demo_sk#22] +Left keys [1]: [c_current_cdemo_sk#16] +Right keys [1]: [cd_demo_sk#20] Join condition: None (53) Project [codegen id : 15] -Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#18, c_birth_year#20] -Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20, cd_demo_sk#22] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#17, c_birth_year#19] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#20] (54) Scan parquet default.customer_address -Output [3]: [ca_address_sk#24, ca_state#26, ca_country#27] +Output [3]: [ca_address_sk#21, ca_state#23, ca_country#24] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] ReadSchema: struct (55) ColumnarToRow [codegen id : 12] -Input [3]: [ca_address_sk#24, ca_state#26, ca_country#27] +Input [3]: [ca_address_sk#21, ca_state#23, ca_country#24] (56) Filter [codegen id : 12] -Input [3]: [ca_address_sk#24, ca_state#26, ca_country#27] -Condition : (ca_state#26 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#24)) +Input [3]: [ca_address_sk#21, ca_state#23, ca_country#24] +Condition : (ca_state#23 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#21)) (57) BroadcastExchange -Input [3]: [ca_address_sk#24, ca_state#26, ca_country#27] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#83] +Input [3]: [ca_address_sk#21, ca_state#23, ca_country#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] (58) BroadcastHashJoin [codegen id : 15] -Left keys [1]: [c_current_addr_sk#18] -Right keys [1]: [ca_address_sk#24] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#21] Join condition: None (59) Project [codegen id : 15] -Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#20, ca_state#26, ca_country#27] -Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#18, c_birth_year#20, ca_address_sk#24, ca_state#26, ca_country#27] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#19, ca_state#23, ca_country#24] +Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#17, c_birth_year#19, ca_address_sk#21, ca_state#23, ca_country#24] (60) ReusedExchange [Reuses operator id: 158] -Output [1]: [d_date_sk#29] +Output [1]: [d_date_sk#25] (61) BroadcastHashJoin [codegen id : 15] Left keys [1]: [cs_sold_date_sk#9] -Right keys [1]: [d_date_sk#29] +Right keys [1]: [d_date_sk#25] Join condition: None (62) Project [codegen id : 15] -Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, ca_state#26, ca_country#27] -Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#20, ca_state#26, ca_country#27, d_date_sk#29] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, ca_state#23, ca_country#24] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#19, ca_state#23, ca_country#24, d_date_sk#25] (63) ReusedExchange [Reuses operator id: 36] -Output [2]: [i_item_sk#30, i_item_id#31] +Output [2]: [i_item_sk#26, i_item_id#27] (64) BroadcastHashJoin [codegen id : 15] Left keys [1]: [cs_item_sk#3] -Right keys [1]: [i_item_sk#30] +Right keys [1]: [i_item_sk#26] Join condition: None (65) Project [codegen id : 15] -Output [10]: [i_item_id#31, ca_country#27, ca_state#26, cast(cs_quantity#4 as decimal(12,2)) AS agg1#33, cast(cs_list_price#5 as decimal(12,2)) AS agg2#34, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#35, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#36, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#37, cast(c_birth_year#20 as decimal(12,2)) AS agg6#38, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#39] -Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, ca_state#26, ca_country#27, i_item_sk#30, i_item_id#31] +Output [10]: [i_item_id#27, ca_country#24, ca_state#23, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#19 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#34] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, ca_state#23, ca_country#24, i_item_sk#26, i_item_id#27] (66) HashAggregate [codegen id : 15] -Input [10]: [i_item_id#31, ca_country#27, ca_state#26, agg1#33, agg2#34, agg3#35, agg4#36, agg5#37, agg6#38, agg7#39] -Keys [3]: [i_item_id#31, ca_country#27, ca_state#26] -Functions [7]: [partial_avg(agg1#33), partial_avg(agg2#34), partial_avg(agg3#35), partial_avg(agg4#36), partial_avg(agg5#37), partial_avg(agg6#38), partial_avg(agg7#39)] -Aggregate Attributes [14]: [sum#84, count#85, sum#86, count#87, sum#88, count#89, sum#90, count#91, sum#92, count#93, sum#94, count#95, sum#96, count#97] -Results [17]: [i_item_id#31, ca_country#27, ca_state#26, sum#98, count#99, sum#100, count#101, sum#102, count#103, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111] +Input [10]: [i_item_id#27, ca_country#24, ca_state#23, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [3]: [i_item_id#27, ca_country#24, ca_state#23] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#77, count#78, sum#79, count#80, sum#81, count#82, sum#83, count#84, sum#85, count#86, sum#87, count#88, sum#89, count#90] +Results [17]: [i_item_id#27, ca_country#24, ca_state#23, sum#91, count#92, sum#93, count#94, sum#95, count#96, sum#97, count#98, sum#99, count#100, sum#101, count#102, sum#103, count#104] (67) Exchange -Input [17]: [i_item_id#31, ca_country#27, ca_state#26, sum#98, count#99, sum#100, count#101, sum#102, count#103, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111] -Arguments: hashpartitioning(i_item_id#31, ca_country#27, ca_state#26, 5), ENSURE_REQUIREMENTS, [id=#112] +Input [17]: [i_item_id#27, ca_country#24, ca_state#23, sum#91, count#92, sum#93, count#94, sum#95, count#96, sum#97, count#98, sum#99, count#100, sum#101, count#102, sum#103, count#104] +Arguments: hashpartitioning(i_item_id#27, ca_country#24, ca_state#23, 5), ENSURE_REQUIREMENTS, [plan_id=8] (68) HashAggregate [codegen id : 16] -Input [17]: [i_item_id#31, ca_country#27, ca_state#26, sum#98, count#99, sum#100, count#101, sum#102, count#103, sum#104, count#105, sum#106, count#107, sum#108, count#109, sum#110, count#111] -Keys [3]: [i_item_id#31, ca_country#27, ca_state#26] -Functions [7]: [avg(agg1#33), avg(agg2#34), avg(agg3#35), avg(agg4#36), avg(agg5#37), avg(agg6#38), avg(agg7#39)] -Aggregate Attributes [7]: [avg(agg1#33)#113, avg(agg2#34)#114, avg(agg3#35)#115, avg(agg4#36)#116, avg(agg5#37)#117, avg(agg6#38)#118, avg(agg7#39)#119] -Results [11]: [i_item_id#31, ca_country#27, ca_state#26, null AS county#120, avg(agg1#33)#113 AS agg1#121, avg(agg2#34)#114 AS agg2#122, avg(agg3#35)#115 AS agg3#123, avg(agg4#36)#116 AS agg4#124, avg(agg5#37)#117 AS agg5#125, avg(agg6#38)#118 AS agg6#126, avg(agg7#39)#119 AS agg7#127] +Input [17]: [i_item_id#27, ca_country#24, ca_state#23, sum#91, count#92, sum#93, count#94, sum#95, count#96, sum#97, count#98, sum#99, count#100, sum#101, count#102, sum#103, count#104] +Keys [3]: [i_item_id#27, ca_country#24, ca_state#23] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#105, avg(agg2#29)#106, avg(agg3#30)#107, avg(agg4#31)#108, avg(agg5#32)#109, avg(agg6#33)#110, avg(agg7#34)#111] +Results [11]: [i_item_id#27, ca_country#24, ca_state#23, null AS county#112, avg(agg1#28)#105 AS agg1#113, avg(agg2#29)#106 AS agg2#114, avg(agg3#30)#107 AS agg3#115, avg(agg4#31)#108 AS agg4#116, avg(agg5#32)#109 AS agg5#117, avg(agg6#33)#110 AS agg6#118, avg(agg7#34)#111 AS agg7#119] (69) Scan parquet default.catalog_sales Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] @@ -490,101 +490,101 @@ Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5 Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#11, cd_dep_count#14] (75) ReusedExchange [Reuses operator id: 15] -Output [4]: [c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] +Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] (76) BroadcastHashJoin [codegen id : 23] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#16] +Right keys [1]: [c_customer_sk#15] Join condition: None (77) Project [codegen id : 23] -Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] -Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] (78) ReusedExchange [Reuses operator id: 21] -Output [1]: [cd_demo_sk#22] +Output [1]: [cd_demo_sk#20] (79) BroadcastHashJoin [codegen id : 23] -Left keys [1]: [c_current_cdemo_sk#17] -Right keys [1]: [cd_demo_sk#22] +Left keys [1]: [c_current_cdemo_sk#16] +Right keys [1]: [cd_demo_sk#20] Join condition: None (80) Project [codegen id : 23] -Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#18, c_birth_year#20] -Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20, cd_demo_sk#22] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#17, c_birth_year#19] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#20] (81) Scan parquet default.customer_address -Output [3]: [ca_address_sk#24, ca_state#26, ca_country#27] +Output [3]: [ca_address_sk#21, ca_state#23, ca_country#24] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] ReadSchema: struct (82) ColumnarToRow [codegen id : 20] -Input [3]: [ca_address_sk#24, ca_state#26, ca_country#27] +Input [3]: [ca_address_sk#21, ca_state#23, ca_country#24] (83) Filter [codegen id : 20] -Input [3]: [ca_address_sk#24, ca_state#26, ca_country#27] -Condition : (ca_state#26 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#24)) +Input [3]: [ca_address_sk#21, ca_state#23, ca_country#24] +Condition : (ca_state#23 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#21)) (84) Project [codegen id : 20] -Output [2]: [ca_address_sk#24, ca_country#27] -Input [3]: [ca_address_sk#24, ca_state#26, ca_country#27] +Output [2]: [ca_address_sk#21, ca_country#24] +Input [3]: [ca_address_sk#21, ca_state#23, ca_country#24] (85) BroadcastExchange -Input [2]: [ca_address_sk#24, ca_country#27] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#128] +Input [2]: [ca_address_sk#21, ca_country#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] (86) BroadcastHashJoin [codegen id : 23] -Left keys [1]: [c_current_addr_sk#18] -Right keys [1]: [ca_address_sk#24] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#21] Join condition: None (87) Project [codegen id : 23] -Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#20, ca_country#27] -Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#18, c_birth_year#20, ca_address_sk#24, ca_country#27] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#19, ca_country#24] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#17, c_birth_year#19, ca_address_sk#21, ca_country#24] (88) ReusedExchange [Reuses operator id: 158] -Output [1]: [d_date_sk#29] +Output [1]: [d_date_sk#25] (89) BroadcastHashJoin [codegen id : 23] Left keys [1]: [cs_sold_date_sk#9] -Right keys [1]: [d_date_sk#29] +Right keys [1]: [d_date_sk#25] Join condition: None (90) Project [codegen id : 23] -Output [9]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, ca_country#27] -Input [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#20, ca_country#27, d_date_sk#29] +Output [9]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, ca_country#24] +Input [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#19, ca_country#24, d_date_sk#25] (91) ReusedExchange [Reuses operator id: 36] -Output [2]: [i_item_sk#30, i_item_id#31] +Output [2]: [i_item_sk#26, i_item_id#27] (92) BroadcastHashJoin [codegen id : 23] Left keys [1]: [cs_item_sk#3] -Right keys [1]: [i_item_sk#30] +Right keys [1]: [i_item_sk#26] Join condition: None (93) Project [codegen id : 23] -Output [9]: [i_item_id#31, ca_country#27, cast(cs_quantity#4 as decimal(12,2)) AS agg1#33, cast(cs_list_price#5 as decimal(12,2)) AS agg2#34, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#35, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#36, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#37, cast(c_birth_year#20 as decimal(12,2)) AS agg6#38, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#39] -Input [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, ca_country#27, i_item_sk#30, i_item_id#31] +Output [9]: [i_item_id#27, ca_country#24, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#19 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#34] +Input [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, ca_country#24, i_item_sk#26, i_item_id#27] (94) HashAggregate [codegen id : 23] -Input [9]: [i_item_id#31, ca_country#27, agg1#33, agg2#34, agg3#35, agg4#36, agg5#37, agg6#38, agg7#39] -Keys [2]: [i_item_id#31, ca_country#27] -Functions [7]: [partial_avg(agg1#33), partial_avg(agg2#34), partial_avg(agg3#35), partial_avg(agg4#36), partial_avg(agg5#37), partial_avg(agg6#38), partial_avg(agg7#39)] -Aggregate Attributes [14]: [sum#129, count#130, sum#131, count#132, sum#133, count#134, sum#135, count#136, sum#137, count#138, sum#139, count#140, sum#141, count#142] -Results [16]: [i_item_id#31, ca_country#27, sum#143, count#144, sum#145, count#146, sum#147, count#148, sum#149, count#150, sum#151, count#152, sum#153, count#154, sum#155, count#156] +Input [9]: [i_item_id#27, ca_country#24, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [2]: [i_item_id#27, ca_country#24] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#120, count#121, sum#122, count#123, sum#124, count#125, sum#126, count#127, sum#128, count#129, sum#130, count#131, sum#132, count#133] +Results [16]: [i_item_id#27, ca_country#24, sum#134, count#135, sum#136, count#137, sum#138, count#139, sum#140, count#141, sum#142, count#143, sum#144, count#145, sum#146, count#147] (95) Exchange -Input [16]: [i_item_id#31, ca_country#27, sum#143, count#144, sum#145, count#146, sum#147, count#148, sum#149, count#150, sum#151, count#152, sum#153, count#154, sum#155, count#156] -Arguments: hashpartitioning(i_item_id#31, ca_country#27, 5), ENSURE_REQUIREMENTS, [id=#157] +Input [16]: [i_item_id#27, ca_country#24, sum#134, count#135, sum#136, count#137, sum#138, count#139, sum#140, count#141, sum#142, count#143, sum#144, count#145, sum#146, count#147] +Arguments: hashpartitioning(i_item_id#27, ca_country#24, 5), ENSURE_REQUIREMENTS, [plan_id=10] (96) HashAggregate [codegen id : 24] -Input [16]: [i_item_id#31, ca_country#27, sum#143, count#144, sum#145, count#146, sum#147, count#148, sum#149, count#150, sum#151, count#152, sum#153, count#154, sum#155, count#156] -Keys [2]: [i_item_id#31, ca_country#27] -Functions [7]: [avg(agg1#33), avg(agg2#34), avg(agg3#35), avg(agg4#36), avg(agg5#37), avg(agg6#38), avg(agg7#39)] -Aggregate Attributes [7]: [avg(agg1#33)#158, avg(agg2#34)#159, avg(agg3#35)#160, avg(agg4#36)#161, avg(agg5#37)#162, avg(agg6#38)#163, avg(agg7#39)#164] -Results [11]: [i_item_id#31, ca_country#27, null AS ca_state#165, null AS county#166, avg(agg1#33)#158 AS agg1#167, avg(agg2#34)#159 AS agg2#168, avg(agg3#35)#160 AS agg3#169, avg(agg4#36)#161 AS agg4#170, avg(agg5#37)#162 AS agg5#171, avg(agg6#38)#163 AS agg6#172, avg(agg7#39)#164 AS agg7#173] +Input [16]: [i_item_id#27, ca_country#24, sum#134, count#135, sum#136, count#137, sum#138, count#139, sum#140, count#141, sum#142, count#143, sum#144, count#145, sum#146, count#147] +Keys [2]: [i_item_id#27, ca_country#24] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#148, avg(agg2#29)#149, avg(agg3#30)#150, avg(agg4#31)#151, avg(agg5#32)#152, avg(agg6#33)#153, avg(agg7#34)#154] +Results [11]: [i_item_id#27, ca_country#24, null AS ca_state#155, null AS county#156, avg(agg1#28)#148 AS agg1#157, avg(agg2#29)#149 AS agg2#158, avg(agg3#30)#150 AS agg3#159, avg(agg4#31)#151 AS agg4#160, avg(agg5#32)#152 AS agg5#161, avg(agg6#33)#153 AS agg6#162, avg(agg7#34)#154 AS agg7#163] (97) Scan parquet default.catalog_sales Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] @@ -614,101 +614,101 @@ Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5 Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#11, cd_dep_count#14] (103) ReusedExchange [Reuses operator id: 15] -Output [4]: [c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] +Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] (104) BroadcastHashJoin [codegen id : 31] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#16] +Right keys [1]: [c_customer_sk#15] Join condition: None (105) Project [codegen id : 31] -Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] -Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] (106) ReusedExchange [Reuses operator id: 21] -Output [1]: [cd_demo_sk#22] +Output [1]: [cd_demo_sk#20] (107) BroadcastHashJoin [codegen id : 31] -Left keys [1]: [c_current_cdemo_sk#17] -Right keys [1]: [cd_demo_sk#22] +Left keys [1]: [c_current_cdemo_sk#16] +Right keys [1]: [cd_demo_sk#20] Join condition: None (108) Project [codegen id : 31] -Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#18, c_birth_year#20] -Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20, cd_demo_sk#22] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#17, c_birth_year#19] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#20] (109) Scan parquet default.customer_address -Output [2]: [ca_address_sk#24, ca_state#26] +Output [2]: [ca_address_sk#21, ca_state#23] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [In(ca_state, [AL,MS,NC,ND,OK,TN,WI]), IsNotNull(ca_address_sk)] ReadSchema: struct (110) ColumnarToRow [codegen id : 28] -Input [2]: [ca_address_sk#24, ca_state#26] +Input [2]: [ca_address_sk#21, ca_state#23] (111) Filter [codegen id : 28] -Input [2]: [ca_address_sk#24, ca_state#26] -Condition : (ca_state#26 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#24)) +Input [2]: [ca_address_sk#21, ca_state#23] +Condition : (ca_state#23 IN (ND,WI,AL,NC,OK,MS,TN) AND isnotnull(ca_address_sk#21)) (112) Project [codegen id : 28] -Output [1]: [ca_address_sk#24] -Input [2]: [ca_address_sk#24, ca_state#26] +Output [1]: [ca_address_sk#21] +Input [2]: [ca_address_sk#21, ca_state#23] (113) BroadcastExchange -Input [1]: [ca_address_sk#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#174] +Input [1]: [ca_address_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11] (114) BroadcastHashJoin [codegen id : 31] -Left keys [1]: [c_current_addr_sk#18] -Right keys [1]: [ca_address_sk#24] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#21] Join condition: None (115) Project [codegen id : 31] -Output [9]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#20] -Input [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#18, c_birth_year#20, ca_address_sk#24] +Output [9]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#19] +Input [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#17, c_birth_year#19, ca_address_sk#21] (116) ReusedExchange [Reuses operator id: 158] -Output [1]: [d_date_sk#29] +Output [1]: [d_date_sk#25] (117) BroadcastHashJoin [codegen id : 31] Left keys [1]: [cs_sold_date_sk#9] -Right keys [1]: [d_date_sk#29] +Right keys [1]: [d_date_sk#25] Join condition: None (118) Project [codegen id : 31] -Output [8]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20] -Input [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#20, d_date_sk#29] +Output [8]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19] +Input [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#19, d_date_sk#25] (119) ReusedExchange [Reuses operator id: 36] -Output [2]: [i_item_sk#30, i_item_id#31] +Output [2]: [i_item_sk#26, i_item_id#27] (120) BroadcastHashJoin [codegen id : 31] Left keys [1]: [cs_item_sk#3] -Right keys [1]: [i_item_sk#30] +Right keys [1]: [i_item_sk#26] Join condition: None (121) Project [codegen id : 31] -Output [8]: [i_item_id#31, cast(cs_quantity#4 as decimal(12,2)) AS agg1#33, cast(cs_list_price#5 as decimal(12,2)) AS agg2#34, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#35, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#36, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#37, cast(c_birth_year#20 as decimal(12,2)) AS agg6#38, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#39] -Input [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, i_item_sk#30, i_item_id#31] +Output [8]: [i_item_id#27, cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#19 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#34] +Input [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, i_item_sk#26, i_item_id#27] (122) HashAggregate [codegen id : 31] -Input [8]: [i_item_id#31, agg1#33, agg2#34, agg3#35, agg4#36, agg5#37, agg6#38, agg7#39] -Keys [1]: [i_item_id#31] -Functions [7]: [partial_avg(agg1#33), partial_avg(agg2#34), partial_avg(agg3#35), partial_avg(agg4#36), partial_avg(agg5#37), partial_avg(agg6#38), partial_avg(agg7#39)] -Aggregate Attributes [14]: [sum#175, count#176, sum#177, count#178, sum#179, count#180, sum#181, count#182, sum#183, count#184, sum#185, count#186, sum#187, count#188] -Results [15]: [i_item_id#31, sum#189, count#190, sum#191, count#192, sum#193, count#194, sum#195, count#196, sum#197, count#198, sum#199, count#200, sum#201, count#202] +Input [8]: [i_item_id#27, agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] +Keys [1]: [i_item_id#27] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#164, count#165, sum#166, count#167, sum#168, count#169, sum#170, count#171, sum#172, count#173, sum#174, count#175, sum#176, count#177] +Results [15]: [i_item_id#27, sum#178, count#179, sum#180, count#181, sum#182, count#183, sum#184, count#185, sum#186, count#187, sum#188, count#189, sum#190, count#191] (123) Exchange -Input [15]: [i_item_id#31, sum#189, count#190, sum#191, count#192, sum#193, count#194, sum#195, count#196, sum#197, count#198, sum#199, count#200, sum#201, count#202] -Arguments: hashpartitioning(i_item_id#31, 5), ENSURE_REQUIREMENTS, [id=#203] +Input [15]: [i_item_id#27, sum#178, count#179, sum#180, count#181, sum#182, count#183, sum#184, count#185, sum#186, count#187, sum#188, count#189, sum#190, count#191] +Arguments: hashpartitioning(i_item_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=12] (124) HashAggregate [codegen id : 32] -Input [15]: [i_item_id#31, sum#189, count#190, sum#191, count#192, sum#193, count#194, sum#195, count#196, sum#197, count#198, sum#199, count#200, sum#201, count#202] -Keys [1]: [i_item_id#31] -Functions [7]: [avg(agg1#33), avg(agg2#34), avg(agg3#35), avg(agg4#36), avg(agg5#37), avg(agg6#38), avg(agg7#39)] -Aggregate Attributes [7]: [avg(agg1#33)#204, avg(agg2#34)#205, avg(agg3#35)#206, avg(agg4#36)#207, avg(agg5#37)#208, avg(agg6#38)#209, avg(agg7#39)#210] -Results [11]: [i_item_id#31, null AS ca_country#211, null AS ca_state#212, null AS county#213, avg(agg1#33)#204 AS agg1#214, avg(agg2#34)#205 AS agg2#215, avg(agg3#35)#206 AS agg3#216, avg(agg4#36)#207 AS agg4#217, avg(agg5#37)#208 AS agg5#218, avg(agg6#38)#209 AS agg6#219, avg(agg7#39)#210 AS agg7#220] +Input [15]: [i_item_id#27, sum#178, count#179, sum#180, count#181, sum#182, count#183, sum#184, count#185, sum#186, count#187, sum#188, count#189, sum#190, count#191] +Keys [1]: [i_item_id#27] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#192, avg(agg2#29)#193, avg(agg3#30)#194, avg(agg4#31)#195, avg(agg5#32)#196, avg(agg6#33)#197, avg(agg7#34)#198] +Results [11]: [i_item_id#27, null AS ca_country#199, null AS ca_state#200, null AS county#201, avg(agg1#28)#192 AS agg1#202, avg(agg2#29)#193 AS agg2#203, avg(agg3#30)#194 AS agg3#204, avg(agg4#31)#195 AS agg4#205, avg(agg5#32)#196 AS agg5#206, avg(agg6#33)#197 AS agg6#207, avg(agg7#34)#198 AS agg7#208] (125) Scan parquet default.catalog_sales Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9] @@ -738,103 +738,103 @@ Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5 Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#11, cd_dep_count#14] (131) ReusedExchange [Reuses operator id: 15] -Output [4]: [c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] +Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] (132) BroadcastHashJoin [codegen id : 39] Left keys [1]: [cs_bill_customer_sk#1] -Right keys [1]: [c_customer_sk#16] +Right keys [1]: [c_customer_sk#15] Join condition: None (133) Project [codegen id : 39] -Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] -Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_customer_sk#16, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20] +Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] +Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19] (134) ReusedExchange [Reuses operator id: 21] -Output [1]: [cd_demo_sk#22] +Output [1]: [cd_demo_sk#20] (135) BroadcastHashJoin [codegen id : 39] -Left keys [1]: [c_current_cdemo_sk#17] -Right keys [1]: [cd_demo_sk#22] +Left keys [1]: [c_current_cdemo_sk#16] +Right keys [1]: [cd_demo_sk#20] Join condition: None (136) Project [codegen id : 39] -Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#18, c_birth_year#20] -Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#17, c_current_addr_sk#18, c_birth_year#20, cd_demo_sk#22] +Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#17, c_birth_year#19] +Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#20] (137) ReusedExchange [Reuses operator id: 113] -Output [1]: [ca_address_sk#24] +Output [1]: [ca_address_sk#21] (138) BroadcastHashJoin [codegen id : 39] -Left keys [1]: [c_current_addr_sk#18] -Right keys [1]: [ca_address_sk#24] +Left keys [1]: [c_current_addr_sk#17] +Right keys [1]: [ca_address_sk#21] Join condition: None (139) Project [codegen id : 39] -Output [9]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#20] -Input [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#18, c_birth_year#20, ca_address_sk#24] +Output [9]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#19] +Input [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#17, c_birth_year#19, ca_address_sk#21] (140) ReusedExchange [Reuses operator id: 158] -Output [1]: [d_date_sk#29] +Output [1]: [d_date_sk#25] (141) BroadcastHashJoin [codegen id : 39] Left keys [1]: [cs_sold_date_sk#9] -Right keys [1]: [d_date_sk#29] +Right keys [1]: [d_date_sk#25] Join condition: None (142) Project [codegen id : 39] -Output [8]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20] -Input [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#20, d_date_sk#29] +Output [8]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19] +Input [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#19, d_date_sk#25] (143) Scan parquet default.item -Output [1]: [i_item_sk#30] +Output [1]: [i_item_sk#26] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (144) ColumnarToRow [codegen id : 38] -Input [1]: [i_item_sk#30] +Input [1]: [i_item_sk#26] (145) Filter [codegen id : 38] -Input [1]: [i_item_sk#30] -Condition : isnotnull(i_item_sk#30) +Input [1]: [i_item_sk#26] +Condition : isnotnull(i_item_sk#26) (146) BroadcastExchange -Input [1]: [i_item_sk#30] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#221] +Input [1]: [i_item_sk#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=13] (147) BroadcastHashJoin [codegen id : 39] Left keys [1]: [cs_item_sk#3] -Right keys [1]: [i_item_sk#30] +Right keys [1]: [i_item_sk#26] Join condition: None (148) Project [codegen id : 39] -Output [7]: [cast(cs_quantity#4 as decimal(12,2)) AS agg1#33, cast(cs_list_price#5 as decimal(12,2)) AS agg2#34, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#35, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#36, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#37, cast(c_birth_year#20 as decimal(12,2)) AS agg6#38, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#39] -Input [9]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#20, i_item_sk#30] +Output [7]: [cast(cs_quantity#4 as decimal(12,2)) AS agg1#28, cast(cs_list_price#5 as decimal(12,2)) AS agg2#29, cast(cs_coupon_amt#7 as decimal(12,2)) AS agg3#30, cast(cs_sales_price#6 as decimal(12,2)) AS agg4#31, cast(cs_net_profit#8 as decimal(12,2)) AS agg5#32, cast(c_birth_year#19 as decimal(12,2)) AS agg6#33, cast(cd_dep_count#14 as decimal(12,2)) AS agg7#34] +Input [9]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, i_item_sk#26] (149) HashAggregate [codegen id : 39] -Input [7]: [agg1#33, agg2#34, agg3#35, agg4#36, agg5#37, agg6#38, agg7#39] +Input [7]: [agg1#28, agg2#29, agg3#30, agg4#31, agg5#32, agg6#33, agg7#34] Keys: [] -Functions [7]: [partial_avg(agg1#33), partial_avg(agg2#34), partial_avg(agg3#35), partial_avg(agg4#36), partial_avg(agg5#37), partial_avg(agg6#38), partial_avg(agg7#39)] -Aggregate Attributes [14]: [sum#222, count#223, sum#224, count#225, sum#226, count#227, sum#228, count#229, sum#230, count#231, sum#232, count#233, sum#234, count#235] -Results [14]: [sum#236, count#237, sum#238, count#239, sum#240, count#241, sum#242, count#243, sum#244, count#245, sum#246, count#247, sum#248, count#249] +Functions [7]: [partial_avg(agg1#28), partial_avg(agg2#29), partial_avg(agg3#30), partial_avg(agg4#31), partial_avg(agg5#32), partial_avg(agg6#33), partial_avg(agg7#34)] +Aggregate Attributes [14]: [sum#209, count#210, sum#211, count#212, sum#213, count#214, sum#215, count#216, sum#217, count#218, sum#219, count#220, sum#221, count#222] +Results [14]: [sum#223, count#224, sum#225, count#226, sum#227, count#228, sum#229, count#230, sum#231, count#232, sum#233, count#234, sum#235, count#236] (150) Exchange -Input [14]: [sum#236, count#237, sum#238, count#239, sum#240, count#241, sum#242, count#243, sum#244, count#245, sum#246, count#247, sum#248, count#249] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#250] +Input [14]: [sum#223, count#224, sum#225, count#226, sum#227, count#228, sum#229, count#230, sum#231, count#232, sum#233, count#234, sum#235, count#236] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=14] (151) HashAggregate [codegen id : 40] -Input [14]: [sum#236, count#237, sum#238, count#239, sum#240, count#241, sum#242, count#243, sum#244, count#245, sum#246, count#247, sum#248, count#249] +Input [14]: [sum#223, count#224, sum#225, count#226, sum#227, count#228, sum#229, count#230, sum#231, count#232, sum#233, count#234, sum#235, count#236] Keys: [] -Functions [7]: [avg(agg1#33), avg(agg2#34), avg(agg3#35), avg(agg4#36), avg(agg5#37), avg(agg6#38), avg(agg7#39)] -Aggregate Attributes [7]: [avg(agg1#33)#251, avg(agg2#34)#252, avg(agg3#35)#253, avg(agg4#36)#254, avg(agg5#37)#255, avg(agg6#38)#256, avg(agg7#39)#257] -Results [11]: [null AS i_item_id#258, null AS ca_country#259, null AS ca_state#260, null AS county#261, avg(agg1#33)#251 AS agg1#262, avg(agg2#34)#252 AS agg2#263, avg(agg3#35)#253 AS agg3#264, avg(agg4#36)#254 AS agg4#265, avg(agg5#37)#255 AS agg5#266, avg(agg6#38)#256 AS agg6#267, avg(agg7#39)#257 AS agg7#268] +Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5#32), avg(agg6#33), avg(agg7#34)] +Aggregate Attributes [7]: [avg(agg1#28)#237, avg(agg2#29)#238, avg(agg3#30)#239, avg(agg4#31)#240, avg(agg5#32)#241, avg(agg6#33)#242, avg(agg7#34)#243] +Results [11]: [null AS i_item_id#244, null AS ca_country#245, null AS ca_state#246, null AS county#247, avg(agg1#28)#237 AS agg1#248, avg(agg2#29)#238 AS agg2#249, avg(agg3#30)#239 AS agg3#250, avg(agg4#31)#240 AS agg4#251, avg(agg5#32)#241 AS agg5#252, avg(agg6#33)#242 AS agg6#253, avg(agg7#34)#243 AS agg7#254] (152) Union (153) TakeOrderedAndProject -Input [11]: [i_item_id#31, ca_country#27, ca_state#26, ca_county#25, agg1#76, agg2#77, agg3#78, agg4#79, agg5#80, agg6#81, agg7#82] -Arguments: 100, [ca_country#27 ASC NULLS FIRST, ca_state#26 ASC NULLS FIRST, ca_county#25 ASC NULLS FIRST, i_item_id#31 ASC NULLS FIRST], [i_item_id#31, ca_country#27, ca_state#26, ca_county#25, agg1#76, agg2#77, agg3#78, agg4#79, agg5#80, agg6#81, agg7#82] +Input [11]: [i_item_id#27, ca_country#24, ca_state#23, ca_county#22, agg1#70, agg2#71, agg3#72, agg4#73, agg5#74, agg6#75, agg7#76] +Arguments: 100, [ca_country#24 ASC NULLS FIRST, ca_state#23 ASC NULLS FIRST, ca_county#22 ASC NULLS FIRST, i_item_id#27 ASC NULLS FIRST], [i_item_id#27, ca_country#24, ca_state#23, ca_county#22, agg1#70, agg2#71, agg3#72, agg4#73, agg5#74, agg6#75, agg7#76] ===== Subqueries ===== @@ -847,26 +847,26 @@ BroadcastExchange (158) (154) Scan parquet default.date_dim -Output [2]: [d_date_sk#29, d_year#269] +Output [2]: [d_date_sk#25, d_year#255] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (155) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#29, d_year#269] +Input [2]: [d_date_sk#25, d_year#255] (156) Filter [codegen id : 1] -Input [2]: [d_date_sk#29, d_year#269] -Condition : ((isnotnull(d_year#269) AND (d_year#269 = 2001)) AND isnotnull(d_date_sk#29)) +Input [2]: [d_date_sk#25, d_year#255] +Condition : ((isnotnull(d_year#255) AND (d_year#255 = 2001)) AND isnotnull(d_date_sk#25)) (157) Project [codegen id : 1] -Output [1]: [d_date_sk#29] -Input [2]: [d_date_sk#29, d_year#269] +Output [1]: [d_date_sk#25] +Input [2]: [d_date_sk#25, d_year#255] (158) BroadcastExchange -Input [1]: [d_date_sk#29] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#270] +Input [1]: [d_date_sk#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=15] Subquery:2 Hosting operator id = 42 Hosting Expression = cs_sold_date_sk#9 IN dynamicpruning#10 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/explain.txt index c925197336e95..72200f5f5e032 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/explain.txt @@ -41,92 +41,92 @@ Condition : isnotnull(cs_item_sk#1) (4) Exchange Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] -Arguments: hashpartitioning(cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#5] +Arguments: hashpartitioning(cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3] Arguments: [cs_item_sk#1 ASC NULLS FIRST], false, 0 (6) Scan parquet default.item -Output [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (8) Filter [codegen id : 3] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Condition : (i_category#11 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#6)) +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Condition : (i_category#10 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#5)) (9) Exchange -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Arguments: hashpartitioning(i_item_sk#6, 5), ENSURE_REQUIREMENTS, [id=#12] +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Arguments: hashpartitioning(i_item_sk#5, 5), ENSURE_REQUIREMENTS, [plan_id=2] (10) Sort [codegen id : 4] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Arguments: [i_item_sk#5 ASC NULLS FIRST], false, 0 (11) SortMergeJoin [codegen id : 6] Left keys [1]: [cs_item_sk#1] -Right keys [1]: [i_item_sk#6] +Right keys [1]: [i_item_sk#5] Join condition: None (12) Project [codegen id : 6] -Output [7]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Input [9]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Output [7]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Input [9]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (13) ReusedExchange [Reuses operator id: 28] -Output [1]: [d_date_sk#13] +Output [1]: [d_date_sk#11] (14) BroadcastHashJoin [codegen id : 6] Left keys [1]: [cs_sold_date_sk#3] -Right keys [1]: [d_date_sk#13] +Right keys [1]: [d_date_sk#11] Join condition: None (15) Project [codegen id : 6] -Output [6]: [cs_ext_sales_price#2, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Input [8]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11, d_date_sk#13] +Output [6]: [cs_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Input [8]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#11] (16) HashAggregate [codegen id : 6] -Input [6]: [cs_ext_sales_price#2, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Keys [5]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9] +Input [6]: [cs_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#14] -Results [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] (17) Exchange -Input [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] -Arguments: hashpartitioning(i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) HashAggregate [codegen id : 7] -Input [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] -Keys [5]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#2))#17] -Results [8]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#17,17,2) AS _w1#20] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#2))#14] +Results [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w1#17] (19) Exchange -Input [8]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20] -Arguments: hashpartitioning(i_class#10, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] +Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=4] (20) Sort [codegen id : 8] -Input [8]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20] -Arguments: [i_class#10 ASC NULLS FIRST], false, 0 +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] +Arguments: [i_class#9 ASC NULLS FIRST], false, 0 (21) Window -Input [8]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20] -Arguments: [sum(_w1#20) windowspecdefinition(i_class#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#10] +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] +Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9] (22) Project [codegen id : 9] -Output [7]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17)) AS revenueratio#23] -Input [9]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, _we0#22] +Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19] +Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, _we0#18] (23) TakeOrderedAndProject -Input [7]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, revenueratio#23] -Arguments: 100, [i_category#11 ASC NULLS FIRST, i_class#10 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#8 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, revenueratio#23] +Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] +Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] ===== Subqueries ===== @@ -139,25 +139,25 @@ BroadcastExchange (28) (24) Scan parquet default.date_dim -Output [2]: [d_date_sk#13, d_date#24] +Output [2]: [d_date_sk#11, d_date#20] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#13, d_date#24] +Input [2]: [d_date_sk#11, d_date#20] (26) Filter [codegen id : 1] -Input [2]: [d_date_sk#13, d_date#24] -Condition : (((isnotnull(d_date#24) AND (d_date#24 >= 1999-02-22)) AND (d_date#24 <= 1999-03-24)) AND isnotnull(d_date_sk#13)) +Input [2]: [d_date_sk#11, d_date#20] +Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11)) (27) Project [codegen id : 1] -Output [1]: [d_date_sk#13] -Input [2]: [d_date_sk#13, d_date#24] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#20] (28) BroadcastExchange -Input [1]: [d_date_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/explain.txt index ff461dafc09c0..fdc4cc9239c2d 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/explain.txt @@ -52,7 +52,7 @@ Condition : (i_category#10 IN (Sports (7) BroadcastExchange Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_item_sk#1] @@ -64,54 +64,54 @@ Output [7]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#6, i_item_desc#7 Input [9]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (10) ReusedExchange [Reuses operator id: 25] -Output [1]: [d_date_sk#12] +Output [1]: [d_date_sk#11] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_sold_date_sk#3] -Right keys [1]: [d_date_sk#12] +Right keys [1]: [d_date_sk#11] Join condition: None (12) Project [codegen id : 3] Output [6]: [cs_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] -Input [8]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#12] +Input [8]: [cs_ext_sales_price#2, cs_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#11] (13) HashAggregate [codegen id : 3] Input [6]: [cs_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [partial_sum(UnscaledValue(cs_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#13] -Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] (14) Exchange -Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] -Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 4] -Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#2))#16] -Results [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#16,17,2) AS itemrevenue#17, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#16,17,2) AS _w0#18, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#16,17,2) AS _w1#19] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#2))#14] +Results [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w1#17] (16) Exchange -Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19] -Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] +Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (17) Sort [codegen id : 5] -Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19] +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] Arguments: [i_class#9 ASC NULLS FIRST], false, 0 (18) Window -Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19] -Arguments: [sum(_w1#19) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#21], [i_class#9] +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] +Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9] (19) Project [codegen id : 6] -Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17)) AS revenueratio#22] -Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, _we0#21] +Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19] +Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, _we0#18] (20) TakeOrderedAndProject -Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22] -Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#22 ASC NULLS FIRST], [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22] +Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] +Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] ===== Subqueries ===== @@ -124,25 +124,25 @@ BroadcastExchange (25) (21) Scan parquet default.date_dim -Output [2]: [d_date_sk#12, d_date#23] +Output [2]: [d_date_sk#11, d_date#20] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] ReadSchema: struct (22) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#12, d_date#23] +Input [2]: [d_date_sk#11, d_date#20] (23) Filter [codegen id : 1] -Input [2]: [d_date_sk#12, d_date#23] -Condition : (((isnotnull(d_date#23) AND (d_date#23 >= 1999-02-22)) AND (d_date#23 <= 1999-03-24)) AND isnotnull(d_date_sk#12)) +Input [2]: [d_date_sk#11, d_date#20] +Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11)) (24) Project [codegen id : 1] -Output [1]: [d_date_sk#12] -Input [2]: [d_date_sk#12, d_date#23] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#20] (25) BroadcastExchange -Input [1]: [d_date_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#24] +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/explain.txt index eb956b9e75ab0..1d3378f030147 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/explain.txt @@ -55,42 +55,42 @@ Input [4]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3, d_date_sk#5] (7) Exchange Input [2]: [inv_item_sk#1, inv_quantity_on_hand#2] -Arguments: hashpartitioning(inv_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#6] +Arguments: hashpartitioning(inv_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (8) Sort [codegen id : 3] Input [2]: [inv_item_sk#1, inv_quantity_on_hand#2] Arguments: [inv_item_sk#1 ASC NULLS FIRST], false, 0 (9) Scan parquet default.item -Output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Output [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (10) ColumnarToRow [codegen id : 4] -Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Input [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] (11) Filter [codegen id : 4] -Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] -Condition : isnotnull(i_item_sk#7) +Input [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Condition : isnotnull(i_item_sk#6) (12) Exchange -Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] -Arguments: hashpartitioning(i_item_sk#7, 5), ENSURE_REQUIREMENTS, [id=#12] +Input [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Arguments: hashpartitioning(i_item_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=2] (13) Sort [codegen id : 5] -Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] -Arguments: [i_item_sk#7 ASC NULLS FIRST], false, 0 +Input [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 (14) SortMergeJoin [codegen id : 7] Left keys [1]: [inv_item_sk#1] -Right keys [1]: [i_item_sk#7] +Right keys [1]: [i_item_sk#6] Join condition: None (15) Project [codegen id : 7] -Output [5]: [inv_quantity_on_hand#2, i_brand#8, i_class#9, i_category#10, i_product_name#11] -Input [7]: [inv_item_sk#1, inv_quantity_on_hand#2, i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Output [5]: [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10] +Input [7]: [inv_item_sk#1, inv_quantity_on_hand#2, i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] (16) Scan parquet default.warehouse Output: [] @@ -103,40 +103,40 @@ Input: [] (18) BroadcastExchange Input: [] -Arguments: IdentityBroadcastMode, [id=#13] +Arguments: IdentityBroadcastMode, [plan_id=3] (19) BroadcastNestedLoopJoin [codegen id : 7] Join condition: None (20) Project [codegen id : 7] -Output [5]: [inv_quantity_on_hand#2, i_product_name#11, i_brand#8, i_class#9, i_category#10] -Input [5]: [inv_quantity_on_hand#2, i_brand#8, i_class#9, i_category#10, i_product_name#11] +Output [5]: [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9] +Input [5]: [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10] (21) Expand [codegen id : 7] -Input [5]: [inv_quantity_on_hand#2, i_product_name#11, i_brand#8, i_class#9, i_category#10] -Arguments: [[inv_quantity_on_hand#2, i_product_name#11, i_brand#8, i_class#9, i_category#10, 0], [inv_quantity_on_hand#2, i_product_name#11, i_brand#8, i_class#9, null, 1], [inv_quantity_on_hand#2, i_product_name#11, i_brand#8, null, null, 3], [inv_quantity_on_hand#2, i_product_name#11, null, null, null, 7], [inv_quantity_on_hand#2, null, null, null, null, 15]], [inv_quantity_on_hand#2, i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18] +Input [5]: [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9] +Arguments: [[inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9, 0], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, null, 1], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, null, null, 3], [inv_quantity_on_hand#2, i_product_name#10, null, null, null, 7], [inv_quantity_on_hand#2, null, null, null, null, 15]], [inv_quantity_on_hand#2, i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] (22) HashAggregate [codegen id : 7] -Input [6]: [inv_quantity_on_hand#2, i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18] -Keys [5]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18] +Input [6]: [inv_quantity_on_hand#2, i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] +Keys [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] Functions [1]: [partial_avg(inv_quantity_on_hand#2)] -Aggregate Attributes [2]: [sum#19, count#20] -Results [7]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18, sum#21, count#22] +Aggregate Attributes [2]: [sum#16, count#17] +Results [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] (23) Exchange -Input [7]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18, sum#21, count#22] -Arguments: hashpartitioning(i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] +Arguments: hashpartitioning(i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) HashAggregate [codegen id : 8] -Input [7]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18, sum#21, count#22] -Keys [5]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, spark_grouping_id#18] +Input [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] +Keys [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] Functions [1]: [avg(inv_quantity_on_hand#2)] -Aggregate Attributes [1]: [avg(inv_quantity_on_hand#2)#24] -Results [5]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, avg(inv_quantity_on_hand#2)#24 AS qoh#25] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#2)#20] +Results [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, avg(inv_quantity_on_hand#2)#20 AS qoh#21] (25) TakeOrderedAndProject -Input [5]: [i_product_name#14, i_brand#15, i_class#16, i_category#17, qoh#25] -Arguments: 100, [qoh#25 ASC NULLS FIRST, i_product_name#14 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, i_class#16 ASC NULLS FIRST, i_category#17 ASC NULLS FIRST], [i_product_name#14, i_brand#15, i_class#16, i_category#17, qoh#25] +Input [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, qoh#21] +Arguments: 100, [qoh#21 ASC NULLS FIRST, i_product_name#11 ASC NULLS FIRST, i_brand#12 ASC NULLS FIRST, i_class#13 ASC NULLS FIRST, i_category#14 ASC NULLS FIRST], [i_product_name#11, i_brand#12, i_class#13, i_category#14, qoh#21] ===== Subqueries ===== @@ -149,25 +149,25 @@ BroadcastExchange (30) (26) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_month_seq#26] +Output [2]: [d_date_sk#5, d_month_seq#22] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#26] +Input [2]: [d_date_sk#5, d_month_seq#22] (28) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#26] -Condition : (((isnotnull(d_month_seq#26) AND (d_month_seq#26 >= 1200)) AND (d_month_seq#26 <= 1211)) AND isnotnull(d_date_sk#5)) +Input [2]: [d_date_sk#5, d_month_seq#22] +Condition : (((isnotnull(d_month_seq#22) AND (d_month_seq#22 >= 1200)) AND (d_month_seq#22 <= 1211)) AND isnotnull(d_date_sk#5)) (29) Project [codegen id : 1] Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_month_seq#26] +Input [2]: [d_date_sk#5, d_month_seq#22] (30) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/explain.txt index 85c21eca87544..a33275b23229e 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/explain.txt @@ -66,7 +66,7 @@ Condition : isnotnull(i_item_sk#6) (10) BroadcastExchange Input [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_item_sk#1] @@ -88,7 +88,7 @@ Input: [] (15) BroadcastExchange Input: [] -Arguments: IdentityBroadcastMode, [id=#12] +Arguments: IdentityBroadcastMode, [plan_id=2] (16) BroadcastNestedLoopJoin [codegen id : 4] Join condition: None @@ -99,29 +99,29 @@ Input [5]: [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_produc (18) Expand [codegen id : 4] Input [5]: [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9] -Arguments: [[inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9, 0], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, null, 1], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, null, null, 3], [inv_quantity_on_hand#2, i_product_name#10, null, null, null, 7], [inv_quantity_on_hand#2, null, null, null, null, 15]], [inv_quantity_on_hand#2, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Arguments: [[inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, i_category#9, 0], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, i_class#8, null, 1], [inv_quantity_on_hand#2, i_product_name#10, i_brand#7, null, null, 3], [inv_quantity_on_hand#2, i_product_name#10, null, null, null, 7], [inv_quantity_on_hand#2, null, null, null, null, 15]], [inv_quantity_on_hand#2, i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] (19) HashAggregate [codegen id : 4] -Input [6]: [inv_quantity_on_hand#2, i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] -Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Input [6]: [inv_quantity_on_hand#2, i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] +Keys [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] Functions [1]: [partial_avg(inv_quantity_on_hand#2)] -Aggregate Attributes [2]: [sum#18, count#19] -Results [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] +Aggregate Attributes [2]: [sum#16, count#17] +Results [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] (20) Exchange -Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] -Arguments: hashpartitioning(i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] +Arguments: hashpartitioning(i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) HashAggregate [codegen id : 5] -Input [7]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17, sum#20, count#21] -Keys [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, spark_grouping_id#17] +Input [7]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15, sum#18, count#19] +Keys [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, spark_grouping_id#15] Functions [1]: [avg(inv_quantity_on_hand#2)] -Aggregate Attributes [1]: [avg(inv_quantity_on_hand#2)#23] -Results [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, avg(inv_quantity_on_hand#2)#23 AS qoh#24] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#2)#20] +Results [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, avg(inv_quantity_on_hand#2)#20 AS qoh#21] (22) TakeOrderedAndProject -Input [5]: [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#24] -Arguments: 100, [qoh#24 ASC NULLS FIRST, i_product_name#13 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, i_class#15 ASC NULLS FIRST, i_category#16 ASC NULLS FIRST], [i_product_name#13, i_brand#14, i_class#15, i_category#16, qoh#24] +Input [5]: [i_product_name#11, i_brand#12, i_class#13, i_category#14, qoh#21] +Arguments: 100, [qoh#21 ASC NULLS FIRST, i_product_name#11 ASC NULLS FIRST, i_brand#12 ASC NULLS FIRST, i_class#13 ASC NULLS FIRST, i_category#14 ASC NULLS FIRST], [i_product_name#11, i_brand#12, i_class#13, i_category#14, qoh#21] ===== Subqueries ===== @@ -134,25 +134,25 @@ BroadcastExchange (27) (23) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_month_seq#25] +Output [2]: [d_date_sk#5, d_month_seq#22] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1200), LessThanOrEqual(d_month_seq,1211), IsNotNull(d_date_sk)] ReadSchema: struct (24) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#25] +Input [2]: [d_date_sk#5, d_month_seq#22] (25) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#25] -Condition : (((isnotnull(d_month_seq#25) AND (d_month_seq#25 >= 1200)) AND (d_month_seq#25 <= 1211)) AND isnotnull(d_date_sk#5)) +Input [2]: [d_date_sk#5, d_month_seq#22] +Condition : (((isnotnull(d_month_seq#22) AND (d_month_seq#22 >= 1200)) AND (d_month_seq#22 <= 1211)) AND isnotnull(d_date_sk#5)) (26) Project [codegen id : 1] Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_month_seq#25] +Input [2]: [d_date_sk#5, d_month_seq#22] (27) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/explain.txt index e7072101f8f23..ed0af9e0d295f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/explain.txt @@ -80,7 +80,7 @@ Condition : isnotnull(w_warehouse_sk#6) (7) BroadcastExchange Input [1]: [w_warehouse_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 3] Left keys [1]: [inv_warehouse_sk#2] @@ -92,205 +92,205 @@ Output [3]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4] Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_sk#6] (10) ReusedExchange [Reuses operator id: 53] -Output [1]: [d_date_sk#8] +Output [1]: [d_date_sk#7] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [inv_date_sk#4] -Right keys [1]: [d_date_sk#8] +Right keys [1]: [d_date_sk#7] Join condition: None (12) Project [codegen id : 3] Output [2]: [inv_item_sk#1, inv_quantity_on_hand#3] -Input [4]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, d_date_sk#8] +Input [4]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, d_date_sk#7] (13) Exchange Input [2]: [inv_item_sk#1, inv_quantity_on_hand#3] -Arguments: hashpartitioning(inv_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(inv_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (14) Sort [codegen id : 4] Input [2]: [inv_item_sk#1, inv_quantity_on_hand#3] Arguments: [inv_item_sk#1 ASC NULLS FIRST], false, 0 (15) Scan parquet default.item -Output [5]: [i_item_sk#10, i_brand#11, i_class#12, i_category#13, i_product_name#14] +Output [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (16) ColumnarToRow [codegen id : 5] -Input [5]: [i_item_sk#10, i_brand#11, i_class#12, i_category#13, i_product_name#14] +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] (17) Filter [codegen id : 5] -Input [5]: [i_item_sk#10, i_brand#11, i_class#12, i_category#13, i_product_name#14] -Condition : isnotnull(i_item_sk#10) +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Condition : isnotnull(i_item_sk#8) (18) Exchange -Input [5]: [i_item_sk#10, i_brand#11, i_class#12, i_category#13, i_product_name#14] -Arguments: hashpartitioning(i_item_sk#10, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Arguments: hashpartitioning(i_item_sk#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) Sort [codegen id : 6] -Input [5]: [i_item_sk#10, i_brand#11, i_class#12, i_category#13, i_product_name#14] -Arguments: [i_item_sk#10 ASC NULLS FIRST], false, 0 +Input [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Arguments: [i_item_sk#8 ASC NULLS FIRST], false, 0 (20) SortMergeJoin [codegen id : 7] Left keys [1]: [inv_item_sk#1] -Right keys [1]: [i_item_sk#10] +Right keys [1]: [i_item_sk#8] Join condition: None (21) Project [codegen id : 7] -Output [5]: [inv_quantity_on_hand#3, i_brand#11, i_class#12, i_category#13, i_product_name#14] -Input [7]: [inv_item_sk#1, inv_quantity_on_hand#3, i_item_sk#10, i_brand#11, i_class#12, i_category#13, i_product_name#14] +Output [5]: [inv_quantity_on_hand#3, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Input [7]: [inv_item_sk#1, inv_quantity_on_hand#3, i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12] (22) HashAggregate [codegen id : 7] -Input [5]: [inv_quantity_on_hand#3, i_brand#11, i_class#12, i_category#13, i_product_name#14] -Keys [4]: [i_product_name#14, i_brand#11, i_class#12, i_category#13] +Input [5]: [inv_quantity_on_hand#3, i_brand#9, i_class#10, i_category#11, i_product_name#12] +Keys [4]: [i_product_name#12, i_brand#9, i_class#10, i_category#11] Functions [1]: [partial_avg(inv_quantity_on_hand#3)] -Aggregate Attributes [2]: [sum#16, count#17] -Results [6]: [i_product_name#14, i_brand#11, i_class#12, i_category#13, sum#18, count#19] +Aggregate Attributes [2]: [sum#13, count#14] +Results [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#15, count#16] (23) Exchange -Input [6]: [i_product_name#14, i_brand#11, i_class#12, i_category#13, sum#18, count#19] -Arguments: hashpartitioning(i_product_name#14, i_brand#11, i_class#12, i_category#13, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#15, count#16] +Arguments: hashpartitioning(i_product_name#12, i_brand#9, i_class#10, i_category#11, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) HashAggregate [codegen id : 8] -Input [6]: [i_product_name#14, i_brand#11, i_class#12, i_category#13, sum#18, count#19] -Keys [4]: [i_product_name#14, i_brand#11, i_class#12, i_category#13] +Input [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#15, count#16] +Keys [4]: [i_product_name#12, i_brand#9, i_class#10, i_category#11] Functions [1]: [avg(inv_quantity_on_hand#3)] -Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#21] -Results [5]: [i_product_name#14, i_brand#11, i_class#12, i_category#13, avg(inv_quantity_on_hand#3)#21 AS qoh#22] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [5]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, avg(inv_quantity_on_hand#3)#17 AS qoh#18] (25) HashAggregate [codegen id : 8] -Input [5]: [i_product_name#14, i_brand#11, i_class#12, i_category#13, qoh#22] -Keys [4]: [i_product_name#14, i_brand#11, i_class#12, i_category#13] -Functions [1]: [partial_avg(qoh#22)] -Aggregate Attributes [2]: [sum#23, count#24] -Results [6]: [i_product_name#14, i_brand#11, i_class#12, i_category#13, sum#25, count#26] +Input [5]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, qoh#18] +Keys [4]: [i_product_name#12, i_brand#9, i_class#10, i_category#11] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#19, count#20] +Results [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#21, count#22] (26) HashAggregate [codegen id : 8] -Input [6]: [i_product_name#14, i_brand#11, i_class#12, i_category#13, sum#25, count#26] -Keys [4]: [i_product_name#14, i_brand#11, i_class#12, i_category#13] -Functions [1]: [avg(qoh#22)] -Aggregate Attributes [1]: [avg(qoh#22)#27] -Results [5]: [i_product_name#14, i_brand#11, i_class#12, i_category#13, avg(qoh#22)#27 AS qoh#28] +Input [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#21, count#22] +Keys [4]: [i_product_name#12, i_brand#9, i_class#10, i_category#11] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#23] +Results [5]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, avg(qoh#18)#23 AS qoh#24] (27) ReusedExchange [Reuses operator id: 23] -Output [6]: [i_product_name#14, i_brand#11, i_class#12, i_category#13, sum#18, count#19] +Output [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#15, count#16] (28) HashAggregate [codegen id : 16] -Input [6]: [i_product_name#14, i_brand#11, i_class#12, i_category#13, sum#18, count#19] -Keys [4]: [i_product_name#14, i_brand#11, i_class#12, i_category#13] +Input [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#15, count#16] +Keys [4]: [i_product_name#12, i_brand#9, i_class#10, i_category#11] Functions [1]: [avg(inv_quantity_on_hand#3)] -Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#21] -Results [4]: [i_product_name#14, i_brand#11, i_class#12, avg(inv_quantity_on_hand#3)#21 AS qoh#22] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [4]: [i_product_name#12, i_brand#9, i_class#10, avg(inv_quantity_on_hand#3)#17 AS qoh#18] (29) HashAggregate [codegen id : 16] -Input [4]: [i_product_name#14, i_brand#11, i_class#12, qoh#22] -Keys [3]: [i_product_name#14, i_brand#11, i_class#12] -Functions [1]: [partial_avg(qoh#22)] -Aggregate Attributes [2]: [sum#29, count#30] -Results [5]: [i_product_name#14, i_brand#11, i_class#12, sum#31, count#32] +Input [4]: [i_product_name#12, i_brand#9, i_class#10, qoh#18] +Keys [3]: [i_product_name#12, i_brand#9, i_class#10] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#25, count#26] +Results [5]: [i_product_name#12, i_brand#9, i_class#10, sum#27, count#28] (30) Exchange -Input [5]: [i_product_name#14, i_brand#11, i_class#12, sum#31, count#32] -Arguments: hashpartitioning(i_product_name#14, i_brand#11, i_class#12, 5), ENSURE_REQUIREMENTS, [id=#33] +Input [5]: [i_product_name#12, i_brand#9, i_class#10, sum#27, count#28] +Arguments: hashpartitioning(i_product_name#12, i_brand#9, i_class#10, 5), ENSURE_REQUIREMENTS, [plan_id=5] (31) HashAggregate [codegen id : 17] -Input [5]: [i_product_name#14, i_brand#11, i_class#12, sum#31, count#32] -Keys [3]: [i_product_name#14, i_brand#11, i_class#12] -Functions [1]: [avg(qoh#22)] -Aggregate Attributes [1]: [avg(qoh#22)#34] -Results [5]: [i_product_name#14, i_brand#11, i_class#12, null AS i_category#35, avg(qoh#22)#34 AS qoh#36] +Input [5]: [i_product_name#12, i_brand#9, i_class#10, sum#27, count#28] +Keys [3]: [i_product_name#12, i_brand#9, i_class#10] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#29] +Results [5]: [i_product_name#12, i_brand#9, i_class#10, null AS i_category#30, avg(qoh#18)#29 AS qoh#31] (32) ReusedExchange [Reuses operator id: 23] -Output [6]: [i_product_name#14, i_brand#11, i_class#12, i_category#13, sum#18, count#19] +Output [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#15, count#16] (33) HashAggregate [codegen id : 25] -Input [6]: [i_product_name#14, i_brand#11, i_class#12, i_category#13, sum#18, count#19] -Keys [4]: [i_product_name#14, i_brand#11, i_class#12, i_category#13] +Input [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#15, count#16] +Keys [4]: [i_product_name#12, i_brand#9, i_class#10, i_category#11] Functions [1]: [avg(inv_quantity_on_hand#3)] -Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#21] -Results [3]: [i_product_name#14, i_brand#11, avg(inv_quantity_on_hand#3)#21 AS qoh#22] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [3]: [i_product_name#12, i_brand#9, avg(inv_quantity_on_hand#3)#17 AS qoh#18] (34) HashAggregate [codegen id : 25] -Input [3]: [i_product_name#14, i_brand#11, qoh#22] -Keys [2]: [i_product_name#14, i_brand#11] -Functions [1]: [partial_avg(qoh#22)] -Aggregate Attributes [2]: [sum#37, count#38] -Results [4]: [i_product_name#14, i_brand#11, sum#39, count#40] +Input [3]: [i_product_name#12, i_brand#9, qoh#18] +Keys [2]: [i_product_name#12, i_brand#9] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#32, count#33] +Results [4]: [i_product_name#12, i_brand#9, sum#34, count#35] (35) Exchange -Input [4]: [i_product_name#14, i_brand#11, sum#39, count#40] -Arguments: hashpartitioning(i_product_name#14, i_brand#11, 5), ENSURE_REQUIREMENTS, [id=#41] +Input [4]: [i_product_name#12, i_brand#9, sum#34, count#35] +Arguments: hashpartitioning(i_product_name#12, i_brand#9, 5), ENSURE_REQUIREMENTS, [plan_id=6] (36) HashAggregate [codegen id : 26] -Input [4]: [i_product_name#14, i_brand#11, sum#39, count#40] -Keys [2]: [i_product_name#14, i_brand#11] -Functions [1]: [avg(qoh#22)] -Aggregate Attributes [1]: [avg(qoh#22)#42] -Results [5]: [i_product_name#14, i_brand#11, null AS i_class#43, null AS i_category#44, avg(qoh#22)#42 AS qoh#45] +Input [4]: [i_product_name#12, i_brand#9, sum#34, count#35] +Keys [2]: [i_product_name#12, i_brand#9] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#36] +Results [5]: [i_product_name#12, i_brand#9, null AS i_class#37, null AS i_category#38, avg(qoh#18)#36 AS qoh#39] (37) ReusedExchange [Reuses operator id: 23] -Output [6]: [i_product_name#14, i_brand#11, i_class#12, i_category#13, sum#18, count#19] +Output [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#15, count#16] (38) HashAggregate [codegen id : 34] -Input [6]: [i_product_name#14, i_brand#11, i_class#12, i_category#13, sum#18, count#19] -Keys [4]: [i_product_name#14, i_brand#11, i_class#12, i_category#13] +Input [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#15, count#16] +Keys [4]: [i_product_name#12, i_brand#9, i_class#10, i_category#11] Functions [1]: [avg(inv_quantity_on_hand#3)] -Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#21] -Results [2]: [i_product_name#14, avg(inv_quantity_on_hand#3)#21 AS qoh#22] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [2]: [i_product_name#12, avg(inv_quantity_on_hand#3)#17 AS qoh#18] (39) HashAggregate [codegen id : 34] -Input [2]: [i_product_name#14, qoh#22] -Keys [1]: [i_product_name#14] -Functions [1]: [partial_avg(qoh#22)] -Aggregate Attributes [2]: [sum#46, count#47] -Results [3]: [i_product_name#14, sum#48, count#49] +Input [2]: [i_product_name#12, qoh#18] +Keys [1]: [i_product_name#12] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#40, count#41] +Results [3]: [i_product_name#12, sum#42, count#43] (40) Exchange -Input [3]: [i_product_name#14, sum#48, count#49] -Arguments: hashpartitioning(i_product_name#14, 5), ENSURE_REQUIREMENTS, [id=#50] +Input [3]: [i_product_name#12, sum#42, count#43] +Arguments: hashpartitioning(i_product_name#12, 5), ENSURE_REQUIREMENTS, [plan_id=7] (41) HashAggregate [codegen id : 35] -Input [3]: [i_product_name#14, sum#48, count#49] -Keys [1]: [i_product_name#14] -Functions [1]: [avg(qoh#22)] -Aggregate Attributes [1]: [avg(qoh#22)#51] -Results [5]: [i_product_name#14, null AS i_brand#52, null AS i_class#53, null AS i_category#54, avg(qoh#22)#51 AS qoh#55] +Input [3]: [i_product_name#12, sum#42, count#43] +Keys [1]: [i_product_name#12] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#44] +Results [5]: [i_product_name#12, null AS i_brand#45, null AS i_class#46, null AS i_category#47, avg(qoh#18)#44 AS qoh#48] (42) ReusedExchange [Reuses operator id: 23] -Output [6]: [i_product_name#14, i_brand#11, i_class#12, i_category#13, sum#18, count#19] +Output [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#15, count#16] (43) HashAggregate [codegen id : 43] -Input [6]: [i_product_name#14, i_brand#11, i_class#12, i_category#13, sum#18, count#19] -Keys [4]: [i_product_name#14, i_brand#11, i_class#12, i_category#13] +Input [6]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, sum#15, count#16] +Keys [4]: [i_product_name#12, i_brand#9, i_class#10, i_category#11] Functions [1]: [avg(inv_quantity_on_hand#3)] -Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#21] -Results [1]: [avg(inv_quantity_on_hand#3)#21 AS qoh#22] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [1]: [avg(inv_quantity_on_hand#3)#17 AS qoh#18] (44) HashAggregate [codegen id : 43] -Input [1]: [qoh#22] +Input [1]: [qoh#18] Keys: [] -Functions [1]: [partial_avg(qoh#22)] -Aggregate Attributes [2]: [sum#56, count#57] -Results [2]: [sum#58, count#59] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#49, count#50] +Results [2]: [sum#51, count#52] (45) Exchange -Input [2]: [sum#58, count#59] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#60] +Input [2]: [sum#51, count#52] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] (46) HashAggregate [codegen id : 44] -Input [2]: [sum#58, count#59] +Input [2]: [sum#51, count#52] Keys: [] -Functions [1]: [avg(qoh#22)] -Aggregate Attributes [1]: [avg(qoh#22)#61] -Results [5]: [null AS i_product_name#62, null AS i_brand#63, null AS i_class#64, null AS i_category#65, avg(qoh#22)#61 AS qoh#66] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#53] +Results [5]: [null AS i_product_name#54, null AS i_brand#55, null AS i_class#56, null AS i_category#57, avg(qoh#18)#53 AS qoh#58] (47) Union (48) TakeOrderedAndProject -Input [5]: [i_product_name#14, i_brand#11, i_class#12, i_category#13, qoh#28] -Arguments: 100, [qoh#28 ASC NULLS FIRST, i_product_name#14 ASC NULLS FIRST, i_brand#11 ASC NULLS FIRST, i_class#12 ASC NULLS FIRST, i_category#13 ASC NULLS FIRST], [i_product_name#14, i_brand#11, i_class#12, i_category#13, qoh#28] +Input [5]: [i_product_name#12, i_brand#9, i_class#10, i_category#11, qoh#24] +Arguments: 100, [qoh#24 ASC NULLS FIRST, i_product_name#12 ASC NULLS FIRST, i_brand#9 ASC NULLS FIRST, i_class#10 ASC NULLS FIRST, i_category#11 ASC NULLS FIRST], [i_product_name#12, i_brand#9, i_class#10, i_category#11, qoh#24] ===== Subqueries ===== @@ -303,25 +303,25 @@ BroadcastExchange (53) (49) Scan parquet default.date_dim -Output [2]: [d_date_sk#8, d_month_seq#67] +Output [2]: [d_date_sk#7, d_month_seq#59] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] ReadSchema: struct (50) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#8, d_month_seq#67] +Input [2]: [d_date_sk#7, d_month_seq#59] (51) Filter [codegen id : 1] -Input [2]: [d_date_sk#8, d_month_seq#67] -Condition : (((isnotnull(d_month_seq#67) AND (d_month_seq#67 >= 1212)) AND (d_month_seq#67 <= 1223)) AND isnotnull(d_date_sk#8)) +Input [2]: [d_date_sk#7, d_month_seq#59] +Condition : (((isnotnull(d_month_seq#59) AND (d_month_seq#59 >= 1212)) AND (d_month_seq#59 <= 1223)) AND isnotnull(d_date_sk#7)) (52) Project [codegen id : 1] -Output [1]: [d_date_sk#8] -Input [2]: [d_date_sk#8, d_month_seq#67] +Output [1]: [d_date_sk#7] +Input [2]: [d_date_sk#7, d_month_seq#59] (53) BroadcastExchange -Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#68] +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/explain.txt index c5bef0d13db91..72387f12a4037 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/explain.txt @@ -89,7 +89,7 @@ Condition : isnotnull(i_item_sk#7) (10) BroadcastExchange Input [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_item_sk#1] @@ -101,181 +101,181 @@ Output [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i Input [8]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11] (13) Scan parquet default.warehouse -Output [1]: [w_warehouse_sk#13] +Output [1]: [w_warehouse_sk#12] Batched: true Location [not included in comparison]/{warehouse_dir}/warehouse] PushedFilters: [IsNotNull(w_warehouse_sk)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [1]: [w_warehouse_sk#13] +Input [1]: [w_warehouse_sk#12] (15) Filter [codegen id : 3] -Input [1]: [w_warehouse_sk#13] -Condition : isnotnull(w_warehouse_sk#13) +Input [1]: [w_warehouse_sk#12] +Condition : isnotnull(w_warehouse_sk#12) (16) BroadcastExchange -Input [1]: [w_warehouse_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Input [1]: [w_warehouse_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [inv_warehouse_sk#2] -Right keys [1]: [w_warehouse_sk#13] +Right keys [1]: [w_warehouse_sk#12] Join condition: None (18) Project [codegen id : 4] Output [5]: [inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] -Input [7]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11, w_warehouse_sk#13] +Input [7]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11, w_warehouse_sk#12] (19) HashAggregate [codegen id : 4] Input [5]: [inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11] Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] Functions [1]: [partial_avg(inv_quantity_on_hand#3)] -Aggregate Attributes [2]: [sum#15, count#16] -Results [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#17, count#18] +Aggregate Attributes [2]: [sum#13, count#14] +Results [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] (20) Exchange -Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#17, count#18] -Arguments: hashpartitioning(i_product_name#11, i_brand#8, i_class#9, i_category#10, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] +Arguments: hashpartitioning(i_product_name#11, i_brand#8, i_class#9, i_category#10, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) HashAggregate [codegen id : 5] -Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#17, count#18] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] Functions [1]: [avg(inv_quantity_on_hand#3)] -Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#20] -Results [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, avg(inv_quantity_on_hand#3)#20 AS qoh#21] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, avg(inv_quantity_on_hand#3)#17 AS qoh#18] (22) HashAggregate [codegen id : 5] -Input [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#21] +Input [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#18] Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] -Functions [1]: [partial_avg(qoh#21)] -Aggregate Attributes [2]: [sum#22, count#23] -Results [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#24, count#25] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#19, count#20] +Results [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#21, count#22] (23) HashAggregate [codegen id : 5] -Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#24, count#25] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#21, count#22] Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] -Functions [1]: [avg(qoh#21)] -Aggregate Attributes [1]: [avg(qoh#21)#26] -Results [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, avg(qoh#21)#26 AS qoh#27] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#23] +Results [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, avg(qoh#18)#23 AS qoh#24] (24) ReusedExchange [Reuses operator id: 20] -Output [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#17, count#18] +Output [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] (25) HashAggregate [codegen id : 10] -Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#17, count#18] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] Functions [1]: [avg(inv_quantity_on_hand#3)] -Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#20] -Results [4]: [i_product_name#11, i_brand#8, i_class#9, avg(inv_quantity_on_hand#3)#20 AS qoh#21] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [4]: [i_product_name#11, i_brand#8, i_class#9, avg(inv_quantity_on_hand#3)#17 AS qoh#18] (26) HashAggregate [codegen id : 10] -Input [4]: [i_product_name#11, i_brand#8, i_class#9, qoh#21] +Input [4]: [i_product_name#11, i_brand#8, i_class#9, qoh#18] Keys [3]: [i_product_name#11, i_brand#8, i_class#9] -Functions [1]: [partial_avg(qoh#21)] -Aggregate Attributes [2]: [sum#28, count#29] -Results [5]: [i_product_name#11, i_brand#8, i_class#9, sum#30, count#31] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#25, count#26] +Results [5]: [i_product_name#11, i_brand#8, i_class#9, sum#27, count#28] (27) Exchange -Input [5]: [i_product_name#11, i_brand#8, i_class#9, sum#30, count#31] -Arguments: hashpartitioning(i_product_name#11, i_brand#8, i_class#9, 5), ENSURE_REQUIREMENTS, [id=#32] +Input [5]: [i_product_name#11, i_brand#8, i_class#9, sum#27, count#28] +Arguments: hashpartitioning(i_product_name#11, i_brand#8, i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 11] -Input [5]: [i_product_name#11, i_brand#8, i_class#9, sum#30, count#31] +Input [5]: [i_product_name#11, i_brand#8, i_class#9, sum#27, count#28] Keys [3]: [i_product_name#11, i_brand#8, i_class#9] -Functions [1]: [avg(qoh#21)] -Aggregate Attributes [1]: [avg(qoh#21)#33] -Results [5]: [i_product_name#11, i_brand#8, i_class#9, null AS i_category#34, avg(qoh#21)#33 AS qoh#35] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#29] +Results [5]: [i_product_name#11, i_brand#8, i_class#9, null AS i_category#30, avg(qoh#18)#29 AS qoh#31] (29) ReusedExchange [Reuses operator id: 20] -Output [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#17, count#18] +Output [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] (30) HashAggregate [codegen id : 16] -Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#17, count#18] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] Functions [1]: [avg(inv_quantity_on_hand#3)] -Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#20] -Results [3]: [i_product_name#11, i_brand#8, avg(inv_quantity_on_hand#3)#20 AS qoh#21] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [3]: [i_product_name#11, i_brand#8, avg(inv_quantity_on_hand#3)#17 AS qoh#18] (31) HashAggregate [codegen id : 16] -Input [3]: [i_product_name#11, i_brand#8, qoh#21] +Input [3]: [i_product_name#11, i_brand#8, qoh#18] Keys [2]: [i_product_name#11, i_brand#8] -Functions [1]: [partial_avg(qoh#21)] -Aggregate Attributes [2]: [sum#36, count#37] -Results [4]: [i_product_name#11, i_brand#8, sum#38, count#39] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#32, count#33] +Results [4]: [i_product_name#11, i_brand#8, sum#34, count#35] (32) Exchange -Input [4]: [i_product_name#11, i_brand#8, sum#38, count#39] -Arguments: hashpartitioning(i_product_name#11, i_brand#8, 5), ENSURE_REQUIREMENTS, [id=#40] +Input [4]: [i_product_name#11, i_brand#8, sum#34, count#35] +Arguments: hashpartitioning(i_product_name#11, i_brand#8, 5), ENSURE_REQUIREMENTS, [plan_id=5] (33) HashAggregate [codegen id : 17] -Input [4]: [i_product_name#11, i_brand#8, sum#38, count#39] +Input [4]: [i_product_name#11, i_brand#8, sum#34, count#35] Keys [2]: [i_product_name#11, i_brand#8] -Functions [1]: [avg(qoh#21)] -Aggregate Attributes [1]: [avg(qoh#21)#41] -Results [5]: [i_product_name#11, i_brand#8, null AS i_class#42, null AS i_category#43, avg(qoh#21)#41 AS qoh#44] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#36] +Results [5]: [i_product_name#11, i_brand#8, null AS i_class#37, null AS i_category#38, avg(qoh#18)#36 AS qoh#39] (34) ReusedExchange [Reuses operator id: 20] -Output [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#17, count#18] +Output [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] (35) HashAggregate [codegen id : 22] -Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#17, count#18] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] Functions [1]: [avg(inv_quantity_on_hand#3)] -Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#20] -Results [2]: [i_product_name#11, avg(inv_quantity_on_hand#3)#20 AS qoh#21] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [2]: [i_product_name#11, avg(inv_quantity_on_hand#3)#17 AS qoh#18] (36) HashAggregate [codegen id : 22] -Input [2]: [i_product_name#11, qoh#21] +Input [2]: [i_product_name#11, qoh#18] Keys [1]: [i_product_name#11] -Functions [1]: [partial_avg(qoh#21)] -Aggregate Attributes [2]: [sum#45, count#46] -Results [3]: [i_product_name#11, sum#47, count#48] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#40, count#41] +Results [3]: [i_product_name#11, sum#42, count#43] (37) Exchange -Input [3]: [i_product_name#11, sum#47, count#48] -Arguments: hashpartitioning(i_product_name#11, 5), ENSURE_REQUIREMENTS, [id=#49] +Input [3]: [i_product_name#11, sum#42, count#43] +Arguments: hashpartitioning(i_product_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=6] (38) HashAggregate [codegen id : 23] -Input [3]: [i_product_name#11, sum#47, count#48] +Input [3]: [i_product_name#11, sum#42, count#43] Keys [1]: [i_product_name#11] -Functions [1]: [avg(qoh#21)] -Aggregate Attributes [1]: [avg(qoh#21)#50] -Results [5]: [i_product_name#11, null AS i_brand#51, null AS i_class#52, null AS i_category#53, avg(qoh#21)#50 AS qoh#54] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#44] +Results [5]: [i_product_name#11, null AS i_brand#45, null AS i_class#46, null AS i_category#47, avg(qoh#18)#44 AS qoh#48] (39) ReusedExchange [Reuses operator id: 20] -Output [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#17, count#18] +Output [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] (40) HashAggregate [codegen id : 28] -Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#17, count#18] +Input [6]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, sum#15, count#16] Keys [4]: [i_product_name#11, i_brand#8, i_class#9, i_category#10] Functions [1]: [avg(inv_quantity_on_hand#3)] -Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#20] -Results [1]: [avg(inv_quantity_on_hand#3)#20 AS qoh#21] +Aggregate Attributes [1]: [avg(inv_quantity_on_hand#3)#17] +Results [1]: [avg(inv_quantity_on_hand#3)#17 AS qoh#18] (41) HashAggregate [codegen id : 28] -Input [1]: [qoh#21] +Input [1]: [qoh#18] Keys: [] -Functions [1]: [partial_avg(qoh#21)] -Aggregate Attributes [2]: [sum#55, count#56] -Results [2]: [sum#57, count#58] +Functions [1]: [partial_avg(qoh#18)] +Aggregate Attributes [2]: [sum#49, count#50] +Results [2]: [sum#51, count#52] (42) Exchange -Input [2]: [sum#57, count#58] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#59] +Input [2]: [sum#51, count#52] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (43) HashAggregate [codegen id : 29] -Input [2]: [sum#57, count#58] +Input [2]: [sum#51, count#52] Keys: [] -Functions [1]: [avg(qoh#21)] -Aggregate Attributes [1]: [avg(qoh#21)#60] -Results [5]: [null AS i_product_name#61, null AS i_brand#62, null AS i_class#63, null AS i_category#64, avg(qoh#21)#60 AS qoh#65] +Functions [1]: [avg(qoh#18)] +Aggregate Attributes [1]: [avg(qoh#18)#53] +Results [5]: [null AS i_product_name#54, null AS i_brand#55, null AS i_class#56, null AS i_category#57, avg(qoh#18)#53 AS qoh#58] (44) Union (45) TakeOrderedAndProject -Input [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#27] -Arguments: 100, [qoh#27 ASC NULLS FIRST, i_product_name#11 ASC NULLS FIRST, i_brand#8 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_category#10 ASC NULLS FIRST], [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#27] +Input [5]: [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#24] +Arguments: 100, [qoh#24 ASC NULLS FIRST, i_product_name#11 ASC NULLS FIRST, i_brand#8 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_category#10 ASC NULLS FIRST], [i_product_name#11, i_brand#8, i_class#9, i_category#10, qoh#24] ===== Subqueries ===== @@ -288,25 +288,25 @@ BroadcastExchange (50) (46) Scan parquet default.date_dim -Output [2]: [d_date_sk#6, d_month_seq#66] +Output [2]: [d_date_sk#6, d_month_seq#59] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] ReadSchema: struct (47) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#6, d_month_seq#66] +Input [2]: [d_date_sk#6, d_month_seq#59] (48) Filter [codegen id : 1] -Input [2]: [d_date_sk#6, d_month_seq#66] -Condition : (((isnotnull(d_month_seq#66) AND (d_month_seq#66 >= 1212)) AND (d_month_seq#66 <= 1223)) AND isnotnull(d_date_sk#6)) +Input [2]: [d_date_sk#6, d_month_seq#59] +Condition : (((isnotnull(d_month_seq#59) AND (d_month_seq#59 >= 1212)) AND (d_month_seq#59 <= 1223)) AND isnotnull(d_date_sk#6)) (49) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [2]: [d_date_sk#6, d_month_seq#66] +Input [2]: [d_date_sk#6, d_month_seq#59] (50) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#67] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt index db2116117c81e..22079f0f10a8a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt @@ -69,210 +69,210 @@ Input [5]: [s_store_sk#1, s_store_name#2, s_market_id#3, s_state#4, s_zip#5] (5) BroadcastExchange Input [4]: [s_store_sk#1, s_store_name#2, s_state#4, s_zip#5] -Arguments: HashedRelationBroadcastMode(List(input[3, string, true]),false), [id=#6] +Arguments: HashedRelationBroadcastMode(List(input[3, string, true]),false), [plan_id=1] (6) Scan parquet default.customer_address -Output [4]: [ca_address_sk#7, ca_state#8, ca_zip#9, ca_country#10] +Output [4]: [ca_address_sk#6, ca_state#7, ca_zip#8, ca_country#9] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_country), IsNotNull(ca_zip)] ReadSchema: struct (7) ColumnarToRow -Input [4]: [ca_address_sk#7, ca_state#8, ca_zip#9, ca_country#10] +Input [4]: [ca_address_sk#6, ca_state#7, ca_zip#8, ca_country#9] (8) Filter -Input [4]: [ca_address_sk#7, ca_state#8, ca_zip#9, ca_country#10] -Condition : ((isnotnull(ca_address_sk#7) AND isnotnull(ca_country#10)) AND isnotnull(ca_zip#9)) +Input [4]: [ca_address_sk#6, ca_state#7, ca_zip#8, ca_country#9] +Condition : ((isnotnull(ca_address_sk#6) AND isnotnull(ca_country#9)) AND isnotnull(ca_zip#8)) (9) BroadcastHashJoin [codegen id : 2] Left keys [1]: [s_zip#5] -Right keys [1]: [ca_zip#9] +Right keys [1]: [ca_zip#8] Join condition: None (10) Project [codegen id : 2] -Output [6]: [s_store_sk#1, s_store_name#2, s_state#4, ca_address_sk#7, ca_state#8, ca_country#10] -Input [8]: [s_store_sk#1, s_store_name#2, s_state#4, s_zip#5, ca_address_sk#7, ca_state#8, ca_zip#9, ca_country#10] +Output [6]: [s_store_sk#1, s_store_name#2, s_state#4, ca_address_sk#6, ca_state#7, ca_country#9] +Input [8]: [s_store_sk#1, s_store_name#2, s_state#4, s_zip#5, ca_address_sk#6, ca_state#7, ca_zip#8, ca_country#9] (11) BroadcastExchange -Input [6]: [s_store_sk#1, s_store_name#2, s_state#4, ca_address_sk#7, ca_state#8, ca_country#10] -Arguments: HashedRelationBroadcastMode(List(input[3, int, true], upper(input[5, string, true])),false), [id=#11] +Input [6]: [s_store_sk#1, s_store_name#2, s_state#4, ca_address_sk#6, ca_state#7, ca_country#9] +Arguments: HashedRelationBroadcastMode(List(input[3, int, true], upper(input[5, string, true])),false), [plan_id=2] (12) Scan parquet default.customer -Output [5]: [c_customer_sk#12, c_current_addr_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] +Output [5]: [c_customer_sk#10, c_current_addr_sk#11, c_first_name#12, c_last_name#13, c_birth_country#14] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_birth_country)] ReadSchema: struct (13) ColumnarToRow -Input [5]: [c_customer_sk#12, c_current_addr_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] +Input [5]: [c_customer_sk#10, c_current_addr_sk#11, c_first_name#12, c_last_name#13, c_birth_country#14] (14) Filter -Input [5]: [c_customer_sk#12, c_current_addr_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] -Condition : ((isnotnull(c_customer_sk#12) AND isnotnull(c_current_addr_sk#13)) AND isnotnull(c_birth_country#16)) +Input [5]: [c_customer_sk#10, c_current_addr_sk#11, c_first_name#12, c_last_name#13, c_birth_country#14] +Condition : ((isnotnull(c_customer_sk#10) AND isnotnull(c_current_addr_sk#11)) AND isnotnull(c_birth_country#14)) (15) BroadcastHashJoin [codegen id : 3] -Left keys [2]: [ca_address_sk#7, upper(ca_country#10)] -Right keys [2]: [c_current_addr_sk#13, c_birth_country#16] +Left keys [2]: [ca_address_sk#6, upper(ca_country#9)] +Right keys [2]: [c_current_addr_sk#11, c_birth_country#14] Join condition: None (16) Project [codegen id : 3] -Output [7]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#8, c_customer_sk#12, c_first_name#14, c_last_name#15] -Input [11]: [s_store_sk#1, s_store_name#2, s_state#4, ca_address_sk#7, ca_state#8, ca_country#10, c_customer_sk#12, c_current_addr_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16] +Output [7]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#7, c_customer_sk#10, c_first_name#12, c_last_name#13] +Input [11]: [s_store_sk#1, s_store_name#2, s_state#4, ca_address_sk#6, ca_state#7, ca_country#9, c_customer_sk#10, c_current_addr_sk#11, c_first_name#12, c_last_name#13, c_birth_country#14] (17) BroadcastExchange -Input [7]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#8, c_customer_sk#12, c_first_name#14, c_last_name#15] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[4, int, true] as bigint) & 4294967295))),false), [id=#17] +Input [7]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#7, c_customer_sk#10, c_first_name#12, c_last_name#13] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[4, int, true] as bigint) & 4294967295))),false), [plan_id=3] (18) Scan parquet default.store_sales -Output [6]: [ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22, ss_sold_date_sk#23] +Output [6]: [ss_item_sk#15, ss_customer_sk#16, ss_store_sk#17, ss_ticket_number#18, ss_net_paid#19, ss_sold_date_sk#20] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] ReadSchema: struct (19) ColumnarToRow -Input [6]: [ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22, ss_sold_date_sk#23] +Input [6]: [ss_item_sk#15, ss_customer_sk#16, ss_store_sk#17, ss_ticket_number#18, ss_net_paid#19, ss_sold_date_sk#20] (20) Filter -Input [6]: [ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22, ss_sold_date_sk#23] -Condition : (((isnotnull(ss_ticket_number#21) AND isnotnull(ss_item_sk#18)) AND isnotnull(ss_store_sk#20)) AND isnotnull(ss_customer_sk#19)) +Input [6]: [ss_item_sk#15, ss_customer_sk#16, ss_store_sk#17, ss_ticket_number#18, ss_net_paid#19, ss_sold_date_sk#20] +Condition : (((isnotnull(ss_ticket_number#18) AND isnotnull(ss_item_sk#15)) AND isnotnull(ss_store_sk#17)) AND isnotnull(ss_customer_sk#16)) (21) Project -Output [5]: [ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22] -Input [6]: [ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22, ss_sold_date_sk#23] +Output [5]: [ss_item_sk#15, ss_customer_sk#16, ss_store_sk#17, ss_ticket_number#18, ss_net_paid#19] +Input [6]: [ss_item_sk#15, ss_customer_sk#16, ss_store_sk#17, ss_ticket_number#18, ss_net_paid#19, ss_sold_date_sk#20] (22) BroadcastHashJoin [codegen id : 5] -Left keys [2]: [s_store_sk#1, c_customer_sk#12] -Right keys [2]: [ss_store_sk#20, ss_customer_sk#19] +Left keys [2]: [s_store_sk#1, c_customer_sk#10] +Right keys [2]: [ss_store_sk#17, ss_customer_sk#16] Join condition: None (23) Project [codegen id : 5] -Output [8]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22] -Input [12]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#8, c_customer_sk#12, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22] +Output [8]: [s_store_name#2, s_state#4, ca_state#7, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_ticket_number#18, ss_net_paid#19] +Input [12]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#7, c_customer_sk#10, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_customer_sk#16, ss_store_sk#17, ss_ticket_number#18, ss_net_paid#19] (24) Scan parquet default.item -Output [6]: [i_item_sk#24, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29] +Output [6]: [i_item_sk#21, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale ), IsNotNull(i_item_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 4] -Input [6]: [i_item_sk#24, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29] +Input [6]: [i_item_sk#21, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26] (26) Filter [codegen id : 4] -Input [6]: [i_item_sk#24, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29] -Condition : ((isnotnull(i_color#27) AND (i_color#27 = pale )) AND isnotnull(i_item_sk#24)) +Input [6]: [i_item_sk#21, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26] +Condition : ((isnotnull(i_color#24) AND (i_color#24 = pale )) AND isnotnull(i_item_sk#21)) (27) BroadcastExchange -Input [6]: [i_item_sk#24, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#30] +Input [6]: [i_item_sk#21, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (28) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ss_item_sk#18] -Right keys [1]: [i_item_sk#24] +Left keys [1]: [ss_item_sk#15] +Right keys [1]: [i_item_sk#21] Join condition: None (29) Project [codegen id : 5] -Output [13]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29] -Input [14]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_item_sk#24, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29] +Output [13]: [s_store_name#2, s_state#4, ca_state#7, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_ticket_number#18, ss_net_paid#19, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26] +Input [14]: [s_store_name#2, s_state#4, ca_state#7, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_ticket_number#18, ss_net_paid#19, i_item_sk#21, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26] (30) Exchange -Input [13]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29] -Arguments: hashpartitioning(ss_ticket_number#21, ss_item_sk#18, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [13]: [s_store_name#2, s_state#4, ca_state#7, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_ticket_number#18, ss_net_paid#19, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26] +Arguments: hashpartitioning(ss_ticket_number#18, ss_item_sk#15, 5), ENSURE_REQUIREMENTS, [plan_id=5] (31) Sort [codegen id : 6] -Input [13]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29] -Arguments: [ss_ticket_number#21 ASC NULLS FIRST, ss_item_sk#18 ASC NULLS FIRST], false, 0 +Input [13]: [s_store_name#2, s_state#4, ca_state#7, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_ticket_number#18, ss_net_paid#19, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26] +Arguments: [ss_ticket_number#18 ASC NULLS FIRST, ss_item_sk#15 ASC NULLS FIRST], false, 0 (32) Scan parquet default.store_returns -Output [3]: [sr_item_sk#32, sr_ticket_number#33, sr_returned_date_sk#34] +Output [3]: [sr_item_sk#27, sr_ticket_number#28, sr_returned_date_sk#29] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] ReadSchema: struct (33) ColumnarToRow [codegen id : 7] -Input [3]: [sr_item_sk#32, sr_ticket_number#33, sr_returned_date_sk#34] +Input [3]: [sr_item_sk#27, sr_ticket_number#28, sr_returned_date_sk#29] (34) Filter [codegen id : 7] -Input [3]: [sr_item_sk#32, sr_ticket_number#33, sr_returned_date_sk#34] -Condition : (isnotnull(sr_ticket_number#33) AND isnotnull(sr_item_sk#32)) +Input [3]: [sr_item_sk#27, sr_ticket_number#28, sr_returned_date_sk#29] +Condition : (isnotnull(sr_ticket_number#28) AND isnotnull(sr_item_sk#27)) (35) Project [codegen id : 7] -Output [2]: [sr_item_sk#32, sr_ticket_number#33] -Input [3]: [sr_item_sk#32, sr_ticket_number#33, sr_returned_date_sk#34] +Output [2]: [sr_item_sk#27, sr_ticket_number#28] +Input [3]: [sr_item_sk#27, sr_ticket_number#28, sr_returned_date_sk#29] (36) Exchange -Input [2]: [sr_item_sk#32, sr_ticket_number#33] -Arguments: hashpartitioning(sr_ticket_number#33, sr_item_sk#32, 5), ENSURE_REQUIREMENTS, [id=#35] +Input [2]: [sr_item_sk#27, sr_ticket_number#28] +Arguments: hashpartitioning(sr_ticket_number#28, sr_item_sk#27, 5), ENSURE_REQUIREMENTS, [plan_id=6] (37) Sort [codegen id : 8] -Input [2]: [sr_item_sk#32, sr_ticket_number#33] -Arguments: [sr_ticket_number#33 ASC NULLS FIRST, sr_item_sk#32 ASC NULLS FIRST], false, 0 +Input [2]: [sr_item_sk#27, sr_ticket_number#28] +Arguments: [sr_ticket_number#28 ASC NULLS FIRST, sr_item_sk#27 ASC NULLS FIRST], false, 0 (38) SortMergeJoin [codegen id : 9] -Left keys [2]: [ss_ticket_number#21, ss_item_sk#18] -Right keys [2]: [sr_ticket_number#33, sr_item_sk#32] +Left keys [2]: [ss_ticket_number#18, ss_item_sk#15] +Right keys [2]: [sr_ticket_number#28, sr_item_sk#27] Join condition: None (39) Project [codegen id : 9] -Output [11]: [ss_net_paid#22, s_store_name#2, s_state#4, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29, c_first_name#14, c_last_name#15, ca_state#8] -Input [15]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29, sr_item_sk#32, sr_ticket_number#33] +Output [11]: [ss_net_paid#19, s_store_name#2, s_state#4, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26, c_first_name#12, c_last_name#13, ca_state#7] +Input [15]: [s_store_name#2, s_state#4, ca_state#7, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_ticket_number#18, ss_net_paid#19, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26, sr_item_sk#27, sr_ticket_number#28] (40) HashAggregate [codegen id : 9] -Input [11]: [ss_net_paid#22, s_store_name#2, s_state#4, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29, c_first_name#14, c_last_name#15, ca_state#8] -Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26] -Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#22))] -Aggregate Attributes [1]: [sum#36] -Results [11]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26, sum#37] +Input [11]: [ss_net_paid#19, s_store_name#2, s_state#4, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26, c_first_name#12, c_last_name#13, ca_state#7] +Keys [10]: [c_last_name#13, c_first_name#12, s_store_name#2, ca_state#7, s_state#4, i_color#24, i_current_price#22, i_manager_id#26, i_units#25, i_size#23] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#19))] +Aggregate Attributes [1]: [sum#30] +Results [11]: [c_last_name#13, c_first_name#12, s_store_name#2, ca_state#7, s_state#4, i_color#24, i_current_price#22, i_manager_id#26, i_units#25, i_size#23, sum#31] (41) Exchange -Input [11]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26, sum#37] -Arguments: hashpartitioning(c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26, 5), ENSURE_REQUIREMENTS, [id=#38] +Input [11]: [c_last_name#13, c_first_name#12, s_store_name#2, ca_state#7, s_state#4, i_color#24, i_current_price#22, i_manager_id#26, i_units#25, i_size#23, sum#31] +Arguments: hashpartitioning(c_last_name#13, c_first_name#12, s_store_name#2, ca_state#7, s_state#4, i_color#24, i_current_price#22, i_manager_id#26, i_units#25, i_size#23, 5), ENSURE_REQUIREMENTS, [plan_id=7] (42) HashAggregate [codegen id : 10] -Input [11]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26, sum#37] -Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26] -Functions [1]: [sum(UnscaledValue(ss_net_paid#22))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#22))#39] -Results [4]: [c_last_name#15, c_first_name#14, s_store_name#2, MakeDecimal(sum(UnscaledValue(ss_net_paid#22))#39,17,2) AS netpaid#40] +Input [11]: [c_last_name#13, c_first_name#12, s_store_name#2, ca_state#7, s_state#4, i_color#24, i_current_price#22, i_manager_id#26, i_units#25, i_size#23, sum#31] +Keys [10]: [c_last_name#13, c_first_name#12, s_store_name#2, ca_state#7, s_state#4, i_color#24, i_current_price#22, i_manager_id#26, i_units#25, i_size#23] +Functions [1]: [sum(UnscaledValue(ss_net_paid#19))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#19))#32] +Results [4]: [c_last_name#13, c_first_name#12, s_store_name#2, MakeDecimal(sum(UnscaledValue(ss_net_paid#19))#32,17,2) AS netpaid#33] (43) HashAggregate [codegen id : 10] -Input [4]: [c_last_name#15, c_first_name#14, s_store_name#2, netpaid#40] -Keys [3]: [c_last_name#15, c_first_name#14, s_store_name#2] -Functions [1]: [partial_sum(netpaid#40)] -Aggregate Attributes [2]: [sum#41, isEmpty#42] -Results [5]: [c_last_name#15, c_first_name#14, s_store_name#2, sum#43, isEmpty#44] +Input [4]: [c_last_name#13, c_first_name#12, s_store_name#2, netpaid#33] +Keys [3]: [c_last_name#13, c_first_name#12, s_store_name#2] +Functions [1]: [partial_sum(netpaid#33)] +Aggregate Attributes [2]: [sum#34, isEmpty#35] +Results [5]: [c_last_name#13, c_first_name#12, s_store_name#2, sum#36, isEmpty#37] (44) Exchange -Input [5]: [c_last_name#15, c_first_name#14, s_store_name#2, sum#43, isEmpty#44] -Arguments: hashpartitioning(c_last_name#15, c_first_name#14, s_store_name#2, 5), ENSURE_REQUIREMENTS, [id=#45] +Input [5]: [c_last_name#13, c_first_name#12, s_store_name#2, sum#36, isEmpty#37] +Arguments: hashpartitioning(c_last_name#13, c_first_name#12, s_store_name#2, 5), ENSURE_REQUIREMENTS, [plan_id=8] (45) HashAggregate [codegen id : 11] -Input [5]: [c_last_name#15, c_first_name#14, s_store_name#2, sum#43, isEmpty#44] -Keys [3]: [c_last_name#15, c_first_name#14, s_store_name#2] -Functions [1]: [sum(netpaid#40)] -Aggregate Attributes [1]: [sum(netpaid#40)#46] -Results [4]: [c_last_name#15, c_first_name#14, s_store_name#2, sum(netpaid#40)#46 AS paid#47] +Input [5]: [c_last_name#13, c_first_name#12, s_store_name#2, sum#36, isEmpty#37] +Keys [3]: [c_last_name#13, c_first_name#12, s_store_name#2] +Functions [1]: [sum(netpaid#33)] +Aggregate Attributes [1]: [sum(netpaid#33)#38] +Results [4]: [c_last_name#13, c_first_name#12, s_store_name#2, sum(netpaid#33)#38 AS paid#39] (46) Filter [codegen id : 11] -Input [4]: [c_last_name#15, c_first_name#14, s_store_name#2, paid#47] -Condition : (isnotnull(paid#47) AND (cast(paid#47 as decimal(33,8)) > cast(Subquery scalar-subquery#48, [id=#49] as decimal(33,8)))) +Input [4]: [c_last_name#13, c_first_name#12, s_store_name#2, paid#39] +Condition : (isnotnull(paid#39) AND (cast(paid#39 as decimal(33,8)) > cast(Subquery scalar-subquery#40, [id=#41] as decimal(33,8)))) (47) Exchange -Input [4]: [c_last_name#15, c_first_name#14, s_store_name#2, paid#47] -Arguments: rangepartitioning(c_last_name#15 ASC NULLS FIRST, c_first_name#14 ASC NULLS FIRST, s_store_name#2 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#50] +Input [4]: [c_last_name#13, c_first_name#12, s_store_name#2, paid#39] +Arguments: rangepartitioning(c_last_name#13 ASC NULLS FIRST, c_first_name#12 ASC NULLS FIRST, s_store_name#2 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=9] (48) Sort [codegen id : 12] -Input [4]: [c_last_name#15, c_first_name#14, s_store_name#2, paid#47] -Arguments: [c_last_name#15 ASC NULLS FIRST, c_first_name#14 ASC NULLS FIRST, s_store_name#2 ASC NULLS FIRST], true, 0 +Input [4]: [c_last_name#13, c_first_name#12, s_store_name#2, paid#39] +Arguments: [c_last_name#13 ASC NULLS FIRST, c_first_name#12 ASC NULLS FIRST, s_store_name#2 ASC NULLS FIRST], true, 0 ===== Subqueries ===== -Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquery#48, [id=#49] +Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquery#40, [id=#41] * HashAggregate (76) +- Exchange (75) +- * HashAggregate (74) @@ -304,132 +304,132 @@ Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquer (49) ReusedExchange [Reuses operator id: 17] -Output [7]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#8, c_customer_sk#12, c_first_name#14, c_last_name#15] +Output [7]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#7, c_customer_sk#10, c_first_name#12, c_last_name#13] (50) Scan parquet default.store_sales -Output [6]: [ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22, ss_sold_date_sk#23] +Output [6]: [ss_item_sk#15, ss_customer_sk#16, ss_store_sk#17, ss_ticket_number#18, ss_net_paid#19, ss_sold_date_sk#20] Batched: true Location [not included in comparison]/{warehouse_dir}/store_sales] PushedFilters: [IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk)] ReadSchema: struct (51) ColumnarToRow -Input [6]: [ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22, ss_sold_date_sk#23] +Input [6]: [ss_item_sk#15, ss_customer_sk#16, ss_store_sk#17, ss_ticket_number#18, ss_net_paid#19, ss_sold_date_sk#20] (52) Filter -Input [6]: [ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22, ss_sold_date_sk#23] -Condition : (((isnotnull(ss_ticket_number#21) AND isnotnull(ss_item_sk#18)) AND isnotnull(ss_store_sk#20)) AND isnotnull(ss_customer_sk#19)) +Input [6]: [ss_item_sk#15, ss_customer_sk#16, ss_store_sk#17, ss_ticket_number#18, ss_net_paid#19, ss_sold_date_sk#20] +Condition : (((isnotnull(ss_ticket_number#18) AND isnotnull(ss_item_sk#15)) AND isnotnull(ss_store_sk#17)) AND isnotnull(ss_customer_sk#16)) (53) Project -Output [5]: [ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22] -Input [6]: [ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22, ss_sold_date_sk#23] +Output [5]: [ss_item_sk#15, ss_customer_sk#16, ss_store_sk#17, ss_ticket_number#18, ss_net_paid#19] +Input [6]: [ss_item_sk#15, ss_customer_sk#16, ss_store_sk#17, ss_ticket_number#18, ss_net_paid#19, ss_sold_date_sk#20] (54) BroadcastHashJoin [codegen id : 4] -Left keys [2]: [s_store_sk#1, c_customer_sk#12] -Right keys [2]: [ss_store_sk#20, ss_customer_sk#19] +Left keys [2]: [s_store_sk#1, c_customer_sk#10] +Right keys [2]: [ss_store_sk#17, ss_customer_sk#16] Join condition: None (55) Project [codegen id : 4] -Output [8]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22] -Input [12]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#8, c_customer_sk#12, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_customer_sk#19, ss_store_sk#20, ss_ticket_number#21, ss_net_paid#22] +Output [8]: [s_store_name#2, s_state#4, ca_state#7, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_ticket_number#18, ss_net_paid#19] +Input [12]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#7, c_customer_sk#10, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_customer_sk#16, ss_store_sk#17, ss_ticket_number#18, ss_net_paid#19] (56) Exchange -Input [8]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22] -Arguments: hashpartitioning(ss_item_sk#18, 5), ENSURE_REQUIREMENTS, [id=#51] +Input [8]: [s_store_name#2, s_state#4, ca_state#7, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_ticket_number#18, ss_net_paid#19] +Arguments: hashpartitioning(ss_item_sk#15, 5), ENSURE_REQUIREMENTS, [plan_id=10] (57) Sort [codegen id : 5] -Input [8]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22] -Arguments: [ss_item_sk#18 ASC NULLS FIRST], false, 0 +Input [8]: [s_store_name#2, s_state#4, ca_state#7, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_ticket_number#18, ss_net_paid#19] +Arguments: [ss_item_sk#15 ASC NULLS FIRST], false, 0 (58) Scan parquet default.item -Output [6]: [i_item_sk#24, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29] +Output [6]: [i_item_sk#21, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (59) ColumnarToRow [codegen id : 6] -Input [6]: [i_item_sk#24, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29] +Input [6]: [i_item_sk#21, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26] (60) Filter [codegen id : 6] -Input [6]: [i_item_sk#24, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29] -Condition : isnotnull(i_item_sk#24) +Input [6]: [i_item_sk#21, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26] +Condition : isnotnull(i_item_sk#21) (61) Exchange -Input [6]: [i_item_sk#24, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29] -Arguments: hashpartitioning(i_item_sk#24, 5), ENSURE_REQUIREMENTS, [id=#52] +Input [6]: [i_item_sk#21, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26] +Arguments: hashpartitioning(i_item_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=11] (62) Sort [codegen id : 7] -Input [6]: [i_item_sk#24, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29] -Arguments: [i_item_sk#24 ASC NULLS FIRST], false, 0 +Input [6]: [i_item_sk#21, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26] +Arguments: [i_item_sk#21 ASC NULLS FIRST], false, 0 (63) SortMergeJoin [codegen id : 8] -Left keys [1]: [ss_item_sk#18] -Right keys [1]: [i_item_sk#24] +Left keys [1]: [ss_item_sk#15] +Right keys [1]: [i_item_sk#21] Join condition: None (64) Project [codegen id : 8] -Output [13]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29] -Input [14]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_item_sk#24, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29] +Output [13]: [s_store_name#2, s_state#4, ca_state#7, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_ticket_number#18, ss_net_paid#19, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26] +Input [14]: [s_store_name#2, s_state#4, ca_state#7, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_ticket_number#18, ss_net_paid#19, i_item_sk#21, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26] (65) Exchange -Input [13]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29] -Arguments: hashpartitioning(ss_ticket_number#21, ss_item_sk#18, 5), ENSURE_REQUIREMENTS, [id=#53] +Input [13]: [s_store_name#2, s_state#4, ca_state#7, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_ticket_number#18, ss_net_paid#19, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26] +Arguments: hashpartitioning(ss_ticket_number#18, ss_item_sk#15, 5), ENSURE_REQUIREMENTS, [plan_id=12] (66) Sort [codegen id : 9] -Input [13]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29] -Arguments: [ss_ticket_number#21 ASC NULLS FIRST, ss_item_sk#18 ASC NULLS FIRST], false, 0 +Input [13]: [s_store_name#2, s_state#4, ca_state#7, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_ticket_number#18, ss_net_paid#19, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26] +Arguments: [ss_ticket_number#18 ASC NULLS FIRST, ss_item_sk#15 ASC NULLS FIRST], false, 0 (67) ReusedExchange [Reuses operator id: 36] -Output [2]: [sr_item_sk#32, sr_ticket_number#33] +Output [2]: [sr_item_sk#27, sr_ticket_number#28] (68) Sort [codegen id : 11] -Input [2]: [sr_item_sk#32, sr_ticket_number#33] -Arguments: [sr_ticket_number#33 ASC NULLS FIRST, sr_item_sk#32 ASC NULLS FIRST], false, 0 +Input [2]: [sr_item_sk#27, sr_ticket_number#28] +Arguments: [sr_ticket_number#28 ASC NULLS FIRST, sr_item_sk#27 ASC NULLS FIRST], false, 0 (69) SortMergeJoin [codegen id : 12] -Left keys [2]: [ss_ticket_number#21, ss_item_sk#18] -Right keys [2]: [sr_ticket_number#33, sr_item_sk#32] +Left keys [2]: [ss_ticket_number#18, ss_item_sk#15] +Right keys [2]: [sr_ticket_number#28, sr_item_sk#27] Join condition: None (70) Project [codegen id : 12] -Output [11]: [ss_net_paid#22, s_store_name#2, s_state#4, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29, c_first_name#14, c_last_name#15, ca_state#8] -Input [15]: [s_store_name#2, s_state#4, ca_state#8, c_first_name#14, c_last_name#15, ss_item_sk#18, ss_ticket_number#21, ss_net_paid#22, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29, sr_item_sk#32, sr_ticket_number#33] +Output [11]: [ss_net_paid#19, s_store_name#2, s_state#4, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26, c_first_name#12, c_last_name#13, ca_state#7] +Input [15]: [s_store_name#2, s_state#4, ca_state#7, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_ticket_number#18, ss_net_paid#19, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26, sr_item_sk#27, sr_ticket_number#28] (71) HashAggregate [codegen id : 12] -Input [11]: [ss_net_paid#22, s_store_name#2, s_state#4, i_current_price#25, i_size#26, i_color#27, i_units#28, i_manager_id#29, c_first_name#14, c_last_name#15, ca_state#8] -Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26] -Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#22))] -Aggregate Attributes [1]: [sum#54] -Results [11]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26, sum#55] +Input [11]: [ss_net_paid#19, s_store_name#2, s_state#4, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26, c_first_name#12, c_last_name#13, ca_state#7] +Keys [10]: [c_last_name#13, c_first_name#12, s_store_name#2, ca_state#7, s_state#4, i_color#24, i_current_price#22, i_manager_id#26, i_units#25, i_size#23] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#19))] +Aggregate Attributes [1]: [sum#42] +Results [11]: [c_last_name#13, c_first_name#12, s_store_name#2, ca_state#7, s_state#4, i_color#24, i_current_price#22, i_manager_id#26, i_units#25, i_size#23, sum#43] (72) Exchange -Input [11]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26, sum#55] -Arguments: hashpartitioning(c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26, 5), ENSURE_REQUIREMENTS, [id=#56] +Input [11]: [c_last_name#13, c_first_name#12, s_store_name#2, ca_state#7, s_state#4, i_color#24, i_current_price#22, i_manager_id#26, i_units#25, i_size#23, sum#43] +Arguments: hashpartitioning(c_last_name#13, c_first_name#12, s_store_name#2, ca_state#7, s_state#4, i_color#24, i_current_price#22, i_manager_id#26, i_units#25, i_size#23, 5), ENSURE_REQUIREMENTS, [plan_id=13] (73) HashAggregate [codegen id : 13] -Input [11]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26, sum#55] -Keys [10]: [c_last_name#15, c_first_name#14, s_store_name#2, ca_state#8, s_state#4, i_color#27, i_current_price#25, i_manager_id#29, i_units#28, i_size#26] -Functions [1]: [sum(UnscaledValue(ss_net_paid#22))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#22))#39] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#22))#39,17,2) AS netpaid#40] +Input [11]: [c_last_name#13, c_first_name#12, s_store_name#2, ca_state#7, s_state#4, i_color#24, i_current_price#22, i_manager_id#26, i_units#25, i_size#23, sum#43] +Keys [10]: [c_last_name#13, c_first_name#12, s_store_name#2, ca_state#7, s_state#4, i_color#24, i_current_price#22, i_manager_id#26, i_units#25, i_size#23] +Functions [1]: [sum(UnscaledValue(ss_net_paid#19))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#19))#32] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#19))#32,17,2) AS netpaid#33] (74) HashAggregate [codegen id : 13] -Input [1]: [netpaid#40] +Input [1]: [netpaid#33] Keys: [] -Functions [1]: [partial_avg(netpaid#40)] -Aggregate Attributes [2]: [sum#57, count#58] -Results [2]: [sum#59, count#60] +Functions [1]: [partial_avg(netpaid#33)] +Aggregate Attributes [2]: [sum#44, count#45] +Results [2]: [sum#46, count#47] (75) Exchange -Input [2]: [sum#59, count#60] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#61] +Input [2]: [sum#46, count#47] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=14] (76) HashAggregate [codegen id : 14] -Input [2]: [sum#59, count#60] +Input [2]: [sum#46, count#47] Keys: [] -Functions [1]: [avg(netpaid#40)] -Aggregate Attributes [1]: [avg(netpaid#40)#62] -Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#40)#62)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#63] +Functions [1]: [avg(netpaid#33)] +Aggregate Attributes [1]: [avg(netpaid#33)#48] +Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#33)#48)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#49] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/explain.txt index ea90187cb53ad..ccf92a7955987 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/explain.txt @@ -69,210 +69,210 @@ Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s (5) Exchange Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] -Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#7] +Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (6) Sort [codegen id : 2] Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5] Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 (7) Scan parquet default.store_returns -Output [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] ReadSchema: struct (8) ColumnarToRow [codegen id : 3] -Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] (9) Filter [codegen id : 3] -Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] -Condition : (isnotnull(sr_ticket_number#9) AND isnotnull(sr_item_sk#8)) +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] +Condition : (isnotnull(sr_ticket_number#8) AND isnotnull(sr_item_sk#7)) (10) Project [codegen id : 3] -Output [2]: [sr_item_sk#8, sr_ticket_number#9] -Input [3]: [sr_item_sk#8, sr_ticket_number#9, sr_returned_date_sk#10] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] +Input [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9] (11) Exchange -Input [2]: [sr_item_sk#8, sr_ticket_number#9] -Arguments: hashpartitioning(sr_ticket_number#9, sr_item_sk#8, 5), ENSURE_REQUIREMENTS, [id=#11] +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: hashpartitioning(sr_ticket_number#8, sr_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) Sort [codegen id : 4] -Input [2]: [sr_item_sk#8, sr_ticket_number#9] -Arguments: [sr_ticket_number#9 ASC NULLS FIRST, sr_item_sk#8 ASC NULLS FIRST], false, 0 +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 (13) SortMergeJoin [codegen id : 9] Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] -Right keys [2]: [sr_ticket_number#9, sr_item_sk#8] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] Join condition: None (14) Project [codegen id : 9] Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] -Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#8, sr_ticket_number#9] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] (15) Scan parquet default.store -Output [5]: [s_store_sk#12, s_store_name#13, s_market_id#14, s_state#15, s_zip#16] +Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_market_id), EqualTo(s_market_id,8), IsNotNull(s_store_sk), IsNotNull(s_zip)] ReadSchema: struct (16) ColumnarToRow [codegen id : 5] -Input [5]: [s_store_sk#12, s_store_name#13, s_market_id#14, s_state#15, s_zip#16] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] (17) Filter [codegen id : 5] -Input [5]: [s_store_sk#12, s_store_name#13, s_market_id#14, s_state#15, s_zip#16] -Condition : (((isnotnull(s_market_id#14) AND (s_market_id#14 = 8)) AND isnotnull(s_store_sk#12)) AND isnotnull(s_zip#16)) +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] +Condition : (((isnotnull(s_market_id#12) AND (s_market_id#12 = 8)) AND isnotnull(s_store_sk#10)) AND isnotnull(s_zip#14)) (18) Project [codegen id : 5] -Output [4]: [s_store_sk#12, s_store_name#13, s_state#15, s_zip#16] -Input [5]: [s_store_sk#12, s_store_name#13, s_market_id#14, s_state#15, s_zip#16] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Input [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14] (19) BroadcastExchange -Input [4]: [s_store_sk#12, s_store_name#13, s_state#15, s_zip#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] +Input [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (20) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#12] +Right keys [1]: [s_store_sk#10] Join condition: None (21) Project [codegen id : 9] -Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16] -Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#12, s_store_name#13, s_state#15, s_zip#16] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] (22) Scan parquet default.item -Output [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_color), EqualTo(i_color,pale ), IsNotNull(i_item_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 6] -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] (24) Filter [codegen id : 6] -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Condition : ((isnotnull(i_color#21) AND (i_color#21 = pale )) AND isnotnull(i_item_sk#18)) +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : ((isnotnull(i_color#18) AND (i_color#18 = pale )) AND isnotnull(i_item_sk#15)) (25) BroadcastExchange -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#24] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (26) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#18] +Right keys [1]: [i_item_sk#15] Join condition: None (27) Project [codegen id : 9] -Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] (28) Scan parquet default.customer -Output [5]: [c_customer_sk#25, c_current_addr_sk#26, c_first_name#27, c_last_name#28, c_birth_country#29] +Output [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_current_addr_sk), IsNotNull(c_birth_country)] ReadSchema: struct (29) ColumnarToRow [codegen id : 7] -Input [5]: [c_customer_sk#25, c_current_addr_sk#26, c_first_name#27, c_last_name#28, c_birth_country#29] +Input [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] (30) Filter [codegen id : 7] -Input [5]: [c_customer_sk#25, c_current_addr_sk#26, c_first_name#27, c_last_name#28, c_birth_country#29] -Condition : ((isnotnull(c_customer_sk#25) AND isnotnull(c_current_addr_sk#26)) AND isnotnull(c_birth_country#29)) +Input [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Condition : ((isnotnull(c_customer_sk#21) AND isnotnull(c_current_addr_sk#22)) AND isnotnull(c_birth_country#25)) (31) BroadcastExchange -Input [5]: [c_customer_sk#25, c_current_addr_sk#26, c_first_name#27, c_last_name#28, c_birth_country#29] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#30] +Input [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (32) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#25] +Right keys [1]: [c_customer_sk#21] Join condition: None (33) Project [codegen id : 9] -Output [13]: [ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_current_addr_sk#26, c_first_name#27, c_last_name#28, c_birth_country#29] -Input [15]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_customer_sk#25, c_current_addr_sk#26, c_first_name#27, c_last_name#28, c_birth_country#29] +Output [13]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Input [15]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] (34) Scan parquet default.customer_address -Output [4]: [ca_address_sk#31, ca_state#32, ca_zip#33, ca_country#34] +Output [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk), IsNotNull(ca_country), IsNotNull(ca_zip)] ReadSchema: struct (35) ColumnarToRow [codegen id : 8] -Input [4]: [ca_address_sk#31, ca_state#32, ca_zip#33, ca_country#34] +Input [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] (36) Filter [codegen id : 8] -Input [4]: [ca_address_sk#31, ca_state#32, ca_zip#33, ca_country#34] -Condition : ((isnotnull(ca_address_sk#31) AND isnotnull(ca_country#34)) AND isnotnull(ca_zip#33)) +Input [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] +Condition : ((isnotnull(ca_address_sk#26) AND isnotnull(ca_country#29)) AND isnotnull(ca_zip#28)) (37) BroadcastExchange -Input [4]: [ca_address_sk#31, ca_state#32, ca_zip#33, ca_country#34] -Arguments: HashedRelationBroadcastMode(List(input[0, int, false], upper(input[3, string, false]), input[2, string, false]),false), [id=#35] +Input [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] +Arguments: HashedRelationBroadcastMode(List(input[0, int, false], upper(input[3, string, false]), input[2, string, false]),false), [plan_id=6] (38) BroadcastHashJoin [codegen id : 9] -Left keys [3]: [c_current_addr_sk#26, c_birth_country#29, s_zip#16] -Right keys [3]: [ca_address_sk#31, upper(ca_country#34), ca_zip#33] +Left keys [3]: [c_current_addr_sk#22, c_birth_country#25, s_zip#14] +Right keys [3]: [ca_address_sk#26, upper(ca_country#29), ca_zip#28] Join condition: None (39) Project [codegen id : 9] -Output [11]: [ss_net_paid#5, s_store_name#13, s_state#15, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#27, c_last_name#28, ca_state#32] -Input [17]: [ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_current_addr_sk#26, c_first_name#27, c_last_name#28, c_birth_country#29, ca_address_sk#31, ca_state#32, ca_zip#33, ca_country#34] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Input [17]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25, ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] (40) HashAggregate [codegen id : 9] -Input [11]: [ss_net_paid#5, s_store_name#13, s_state#15, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#27, c_last_name#28, ca_state#32] -Keys [10]: [c_last_name#28, c_first_name#27, s_store_name#13, ca_state#32, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum#36] -Results [11]: [c_last_name#28, c_first_name#27, s_store_name#13, ca_state#32, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#37] +Aggregate Attributes [1]: [sum#30] +Results [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#31] (41) Exchange -Input [11]: [c_last_name#28, c_first_name#27, s_store_name#13, ca_state#32, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#37] -Arguments: hashpartitioning(c_last_name#28, c_first_name#27, s_store_name#13, ca_state#32, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, 5), ENSURE_REQUIREMENTS, [id=#38] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#31] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=7] (42) HashAggregate [codegen id : 10] -Input [11]: [c_last_name#28, c_first_name#27, s_store_name#13, ca_state#32, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#37] -Keys [10]: [c_last_name#28, c_first_name#27, s_store_name#13, ca_state#32, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#31] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#39] -Results [4]: [c_last_name#28, c_first_name#27, s_store_name#13, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#39,17,2) AS netpaid#40] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#32] +Results [4]: [c_last_name#24, c_first_name#23, s_store_name#11, MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#32,17,2) AS netpaid#33] (43) HashAggregate [codegen id : 10] -Input [4]: [c_last_name#28, c_first_name#27, s_store_name#13, netpaid#40] -Keys [3]: [c_last_name#28, c_first_name#27, s_store_name#13] -Functions [1]: [partial_sum(netpaid#40)] -Aggregate Attributes [2]: [sum#41, isEmpty#42] -Results [5]: [c_last_name#28, c_first_name#27, s_store_name#13, sum#43, isEmpty#44] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, netpaid#33] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#11] +Functions [1]: [partial_sum(netpaid#33)] +Aggregate Attributes [2]: [sum#34, isEmpty#35] +Results [5]: [c_last_name#24, c_first_name#23, s_store_name#11, sum#36, isEmpty#37] (44) Exchange -Input [5]: [c_last_name#28, c_first_name#27, s_store_name#13, sum#43, isEmpty#44] -Arguments: hashpartitioning(c_last_name#28, c_first_name#27, s_store_name#13, 5), ENSURE_REQUIREMENTS, [id=#45] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#11, sum#36, isEmpty#37] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=8] (45) HashAggregate [codegen id : 11] -Input [5]: [c_last_name#28, c_first_name#27, s_store_name#13, sum#43, isEmpty#44] -Keys [3]: [c_last_name#28, c_first_name#27, s_store_name#13] -Functions [1]: [sum(netpaid#40)] -Aggregate Attributes [1]: [sum(netpaid#40)#46] -Results [4]: [c_last_name#28, c_first_name#27, s_store_name#13, sum(netpaid#40)#46 AS paid#47] +Input [5]: [c_last_name#24, c_first_name#23, s_store_name#11, sum#36, isEmpty#37] +Keys [3]: [c_last_name#24, c_first_name#23, s_store_name#11] +Functions [1]: [sum(netpaid#33)] +Aggregate Attributes [1]: [sum(netpaid#33)#38] +Results [4]: [c_last_name#24, c_first_name#23, s_store_name#11, sum(netpaid#33)#38 AS paid#39] (46) Filter [codegen id : 11] -Input [4]: [c_last_name#28, c_first_name#27, s_store_name#13, paid#47] -Condition : (isnotnull(paid#47) AND (cast(paid#47 as decimal(33,8)) > cast(Subquery scalar-subquery#48, [id=#49] as decimal(33,8)))) +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39] +Condition : (isnotnull(paid#39) AND (cast(paid#39 as decimal(33,8)) > cast(Subquery scalar-subquery#40, [id=#41] as decimal(33,8)))) (47) Exchange -Input [4]: [c_last_name#28, c_first_name#27, s_store_name#13, paid#47] -Arguments: rangepartitioning(c_last_name#28 ASC NULLS FIRST, c_first_name#27 ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#50] +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39] +Arguments: rangepartitioning(c_last_name#24 ASC NULLS FIRST, c_first_name#23 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=9] (48) Sort [codegen id : 12] -Input [4]: [c_last_name#28, c_first_name#27, s_store_name#13, paid#47] -Arguments: [c_last_name#28 ASC NULLS FIRST, c_first_name#27 ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], true, 0 +Input [4]: [c_last_name#24, c_first_name#23, s_store_name#11, paid#39] +Arguments: [c_last_name#24 ASC NULLS FIRST, c_first_name#23 ASC NULLS FIRST, s_store_name#11 ASC NULLS FIRST], true, 0 ===== Subqueries ===== -Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquery#48, [id=#49] +Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquery#40, [id=#41] * HashAggregate (75) +- Exchange (74) +- * HashAggregate (73) @@ -310,118 +310,118 @@ Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 (51) ReusedExchange [Reuses operator id: 11] -Output [2]: [sr_item_sk#8, sr_ticket_number#9] +Output [2]: [sr_item_sk#7, sr_ticket_number#8] (52) Sort [codegen id : 4] -Input [2]: [sr_item_sk#8, sr_ticket_number#9] -Arguments: [sr_ticket_number#9 ASC NULLS FIRST, sr_item_sk#8 ASC NULLS FIRST], false, 0 +Input [2]: [sr_item_sk#7, sr_ticket_number#8] +Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], false, 0 (53) SortMergeJoin [codegen id : 9] Left keys [2]: [ss_ticket_number#4, ss_item_sk#1] -Right keys [2]: [sr_ticket_number#9, sr_item_sk#8] +Right keys [2]: [sr_ticket_number#8, sr_item_sk#7] Join condition: None (54) Project [codegen id : 9] Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5] -Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#8, sr_ticket_number#9] +Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8] (55) ReusedExchange [Reuses operator id: 19] -Output [4]: [s_store_sk#12, s_store_name#13, s_state#15, s_zip#16] +Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] (56) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#12] +Right keys [1]: [s_store_sk#10] Join condition: None (57) Project [codegen id : 9] -Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16] -Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#12, s_store_name#13, s_state#15, s_zip#16] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14] (58) Scan parquet default.item -Output [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (59) ColumnarToRow [codegen id : 6] -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] (60) Filter [codegen id : 6] -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Condition : isnotnull(i_item_sk#18) +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Condition : isnotnull(i_item_sk#15) (61) BroadcastExchange -Input [6]: [i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#51] +Input [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] (62) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#18] +Right keys [1]: [i_item_sk#15] Join condition: None (63) Project [codegen id : 9] -Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_item_sk#18, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23] +Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20] (64) ReusedExchange [Reuses operator id: 31] -Output [5]: [c_customer_sk#25, c_current_addr_sk#26, c_first_name#27, c_last_name#28, c_birth_country#29] +Output [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] (65) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#25] +Right keys [1]: [c_customer_sk#21] Join condition: None (66) Project [codegen id : 9] -Output [13]: [ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_current_addr_sk#26, c_first_name#27, c_last_name#28, c_birth_country#29] -Input [15]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_customer_sk#25, c_current_addr_sk#26, c_first_name#27, c_last_name#28, c_birth_country#29] +Output [13]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] +Input [15]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25] (67) ReusedExchange [Reuses operator id: 37] -Output [4]: [ca_address_sk#31, ca_state#32, ca_zip#33, ca_country#34] +Output [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] (68) BroadcastHashJoin [codegen id : 9] -Left keys [3]: [c_current_addr_sk#26, c_birth_country#29, s_zip#16] -Right keys [3]: [ca_address_sk#31, upper(ca_country#34), ca_zip#33] +Left keys [3]: [c_current_addr_sk#22, c_birth_country#25, s_zip#14] +Right keys [3]: [ca_address_sk#26, upper(ca_country#29), ca_zip#28] Join condition: None (69) Project [codegen id : 9] -Output [11]: [ss_net_paid#5, s_store_name#13, s_state#15, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#27, c_last_name#28, ca_state#32] -Input [17]: [ss_net_paid#5, s_store_name#13, s_state#15, s_zip#16, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_current_addr_sk#26, c_first_name#27, c_last_name#28, c_birth_country#29, ca_address_sk#31, ca_state#32, ca_zip#33, ca_country#34] +Output [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Input [17]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25, ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29] (70) HashAggregate [codegen id : 9] -Input [11]: [ss_net_paid#5, s_store_name#13, s_state#15, i_current_price#19, i_size#20, i_color#21, i_units#22, i_manager_id#23, c_first_name#27, c_last_name#28, ca_state#32] -Keys [10]: [c_last_name#28, c_first_name#27, s_store_name#13, ca_state#32, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20] +Input [11]: [ss_net_paid#5, s_store_name#11, s_state#13, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#23, c_last_name#24, ca_state#27] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum#52] -Results [11]: [c_last_name#28, c_first_name#27, s_store_name#13, ca_state#32, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#53] +Aggregate Attributes [1]: [sum#42] +Results [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#43] (71) Exchange -Input [11]: [c_last_name#28, c_first_name#27, s_store_name#13, ca_state#32, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#53] -Arguments: hashpartitioning(c_last_name#28, c_first_name#27, s_store_name#13, ca_state#32, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, 5), ENSURE_REQUIREMENTS, [id=#54] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#43] +Arguments: hashpartitioning(c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, 5), ENSURE_REQUIREMENTS, [plan_id=11] (72) HashAggregate [codegen id : 10] -Input [11]: [c_last_name#28, c_first_name#27, s_store_name#13, ca_state#32, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20, sum#53] -Keys [10]: [c_last_name#28, c_first_name#27, s_store_name#13, ca_state#32, s_state#15, i_color#21, i_current_price#19, i_manager_id#23, i_units#22, i_size#20] +Input [11]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17, sum#43] +Keys [10]: [c_last_name#24, c_first_name#23, s_store_name#11, ca_state#27, s_state#13, i_color#18, i_current_price#16, i_manager_id#20, i_units#19, i_size#17] Functions [1]: [sum(UnscaledValue(ss_net_paid#5))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#39] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#39,17,2) AS netpaid#40] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#5))#32] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_paid#5))#32,17,2) AS netpaid#33] (73) HashAggregate [codegen id : 10] -Input [1]: [netpaid#40] +Input [1]: [netpaid#33] Keys: [] -Functions [1]: [partial_avg(netpaid#40)] -Aggregate Attributes [2]: [sum#55, count#56] -Results [2]: [sum#57, count#58] +Functions [1]: [partial_avg(netpaid#33)] +Aggregate Attributes [2]: [sum#44, count#45] +Results [2]: [sum#46, count#47] (74) Exchange -Input [2]: [sum#57, count#58] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#59] +Input [2]: [sum#46, count#47] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12] (75) HashAggregate [codegen id : 11] -Input [2]: [sum#57, count#58] +Input [2]: [sum#46, count#47] Keys: [] -Functions [1]: [avg(netpaid#40)] -Aggregate Attributes [1]: [avg(netpaid#40)#60] -Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#40)#60)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#61] +Functions [1]: [avg(netpaid#33)] +Aggregate Attributes [1]: [avg(netpaid#33)#48] +Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#33)#48)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#49] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/explain.txt index b55e5641a679a..cc7c668795244 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/explain.txt @@ -109,7 +109,7 @@ Input [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_stat (8) BroadcastExchange Input [1]: [cd_demo_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_cdemo_sk#2] @@ -121,88 +121,88 @@ Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sal Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#10] (11) ReusedExchange [Reuses operator id: 78] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (13) Project [codegen id : 5] Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#15] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14] (14) Scan parquet default.store -Output [2]: [s_store_sk#16, s_state#17] +Output [2]: [s_store_sk#15, s_state#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#16, s_state#17] +Input [2]: [s_store_sk#15, s_state#16] (16) Filter [codegen id : 3] -Input [2]: [s_store_sk#16, s_state#17] -Condition : ((isnotnull(s_state#17) AND (s_state#17 = TN)) AND isnotnull(s_store_sk#16)) +Input [2]: [s_store_sk#15, s_state#16] +Condition : ((isnotnull(s_state#16) AND (s_state#16 = TN)) AND isnotnull(s_store_sk#15)) (17) BroadcastExchange -Input [2]: [s_store_sk#16, s_state#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] +Input [2]: [s_store_sk#15, s_state#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#16] +Right keys [1]: [s_store_sk#15] Join condition: None (19) Project [codegen id : 5] -Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#17] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#16, s_state#17] +Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16] (20) Scan parquet default.item -Output [2]: [i_item_sk#19, i_item_id#20] +Output [2]: [i_item_sk#17, i_item_id#18] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#19, i_item_id#20] +Input [2]: [i_item_sk#17, i_item_id#18] (22) Filter [codegen id : 4] -Input [2]: [i_item_sk#19, i_item_id#20] -Condition : isnotnull(i_item_sk#19) +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) (23) BroadcastExchange -Input [2]: [i_item_sk#19, i_item_id#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#19] +Right keys [1]: [i_item_sk#17] Join condition: None (25) Project [codegen id : 5] -Output [6]: [i_item_id#20, s_state#17, ss_quantity#4 AS agg1#22, ss_list_price#5 AS agg2#23, ss_coupon_amt#7 AS agg3#24, ss_sales_price#6 AS agg4#25] -Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#17, i_item_sk#19, i_item_id#20] +Output [6]: [i_item_id#18, s_state#16, ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16, i_item_sk#17, i_item_id#18] (26) HashAggregate [codegen id : 5] -Input [6]: [i_item_id#20, s_state#17, agg1#22, agg2#23, agg3#24, agg4#25] -Keys [2]: [i_item_id#20, s_state#17] -Functions [4]: [partial_avg(agg1#22), partial_avg(UnscaledValue(agg2#23)), partial_avg(UnscaledValue(agg3#24)), partial_avg(UnscaledValue(agg4#25))] -Aggregate Attributes [8]: [sum#26, count#27, sum#28, count#29, sum#30, count#31, sum#32, count#33] -Results [10]: [i_item_id#20, s_state#17, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] +Input [6]: [i_item_id#18, s_state#16, agg1#19, agg2#20, agg3#21, agg4#22] +Keys [2]: [i_item_id#18, s_state#16] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#23, count#24, sum#25, count#26, sum#27, count#28, sum#29, count#30] +Results [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] (27) Exchange -Input [10]: [i_item_id#20, s_state#17, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] -Arguments: hashpartitioning(i_item_id#20, s_state#17, 5), ENSURE_REQUIREMENTS, [id=#42] +Input [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Arguments: hashpartitioning(i_item_id#18, s_state#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 6] -Input [10]: [i_item_id#20, s_state#17, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] -Keys [2]: [i_item_id#20, s_state#17] -Functions [4]: [avg(agg1#22), avg(UnscaledValue(agg2#23)), avg(UnscaledValue(agg3#24)), avg(UnscaledValue(agg4#25))] -Aggregate Attributes [4]: [avg(agg1#22)#43, avg(UnscaledValue(agg2#23))#44, avg(UnscaledValue(agg3#24))#45, avg(UnscaledValue(agg4#25))#46] -Results [7]: [i_item_id#20, s_state#17, 0 AS g_state#47, avg(agg1#22)#43 AS agg1#48, cast((avg(UnscaledValue(agg2#23))#44 / 100.0) as decimal(11,6)) AS agg2#49, cast((avg(UnscaledValue(agg3#24))#45 / 100.0) as decimal(11,6)) AS agg3#50, cast((avg(UnscaledValue(agg4#25))#46 / 100.0) as decimal(11,6)) AS agg4#51] +Input [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Keys [2]: [i_item_id#18, s_state#16] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#39, avg(UnscaledValue(agg2#20))#40, avg(UnscaledValue(agg3#21))#41, avg(UnscaledValue(agg4#22))#42] +Results [7]: [i_item_id#18, s_state#16, 0 AS g_state#43, avg(agg1#19)#39 AS agg1#44, cast((avg(UnscaledValue(agg2#20))#40 / 100.0) as decimal(11,6)) AS agg2#45, cast((avg(UnscaledValue(agg3#21))#41 / 100.0) as decimal(11,6)) AS agg3#46, cast((avg(UnscaledValue(agg4#22))#42 / 100.0) as decimal(11,6)) AS agg4#47] (29) Scan parquet default.store_sales Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] @@ -232,77 +232,77 @@ Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sal Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#10] (35) Scan parquet default.store -Output [2]: [s_store_sk#16, s_state#17] +Output [2]: [s_store_sk#15, s_state#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] ReadSchema: struct (36) ColumnarToRow [codegen id : 8] -Input [2]: [s_store_sk#16, s_state#17] +Input [2]: [s_store_sk#15, s_state#16] (37) Filter [codegen id : 8] -Input [2]: [s_store_sk#16, s_state#17] -Condition : ((isnotnull(s_state#17) AND (s_state#17 = TN)) AND isnotnull(s_store_sk#16)) +Input [2]: [s_store_sk#15, s_state#16] +Condition : ((isnotnull(s_state#16) AND (s_state#16 = TN)) AND isnotnull(s_store_sk#15)) (38) Project [codegen id : 8] -Output [1]: [s_store_sk#16] -Input [2]: [s_store_sk#16, s_state#17] +Output [1]: [s_store_sk#15] +Input [2]: [s_store_sk#15, s_state#16] (39) BroadcastExchange -Input [1]: [s_store_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52] +Input [1]: [s_store_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (40) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#16] +Right keys [1]: [s_store_sk#15] Join condition: None (41) Project [codegen id : 11] Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, s_store_sk#16] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, s_store_sk#15] (42) ReusedExchange [Reuses operator id: 78] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (43) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (44) Project [codegen id : 11] Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#15] +Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14] (45) ReusedExchange [Reuses operator id: 23] -Output [2]: [i_item_sk#19, i_item_id#20] +Output [2]: [i_item_sk#17, i_item_id#18] (46) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#19] +Right keys [1]: [i_item_sk#17] Join condition: None (47) Project [codegen id : 11] -Output [5]: [i_item_id#20, ss_quantity#4 AS agg1#22, ss_list_price#5 AS agg2#23, ss_coupon_amt#7 AS agg3#24, ss_sales_price#6 AS agg4#25] -Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#19, i_item_id#20] +Output [5]: [i_item_id#18, ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#17, i_item_id#18] (48) HashAggregate [codegen id : 11] -Input [5]: [i_item_id#20, agg1#22, agg2#23, agg3#24, agg4#25] -Keys [1]: [i_item_id#20] -Functions [4]: [partial_avg(agg1#22), partial_avg(UnscaledValue(agg2#23)), partial_avg(UnscaledValue(agg3#24)), partial_avg(UnscaledValue(agg4#25))] -Aggregate Attributes [8]: [sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] -Results [9]: [i_item_id#20, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] +Input [5]: [i_item_id#18, agg1#19, agg2#20, agg3#21, agg4#22] +Keys [1]: [i_item_id#18] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#48, count#49, sum#50, count#51, sum#52, count#53, sum#54, count#55] +Results [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] (49) Exchange -Input [9]: [i_item_id#20, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] -Arguments: hashpartitioning(i_item_id#20, 5), ENSURE_REQUIREMENTS, [id=#69] +Input [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] +Arguments: hashpartitioning(i_item_id#18, 5), ENSURE_REQUIREMENTS, [plan_id=6] (50) HashAggregate [codegen id : 12] -Input [9]: [i_item_id#20, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] -Keys [1]: [i_item_id#20] -Functions [4]: [avg(agg1#22), avg(UnscaledValue(agg2#23)), avg(UnscaledValue(agg3#24)), avg(UnscaledValue(agg4#25))] -Aggregate Attributes [4]: [avg(agg1#22)#70, avg(UnscaledValue(agg2#23))#71, avg(UnscaledValue(agg3#24))#72, avg(UnscaledValue(agg4#25))#73] -Results [7]: [i_item_id#20, null AS s_state#74, 1 AS g_state#75, avg(agg1#22)#70 AS agg1#76, cast((avg(UnscaledValue(agg2#23))#71 / 100.0) as decimal(11,6)) AS agg2#77, cast((avg(UnscaledValue(agg3#24))#72 / 100.0) as decimal(11,6)) AS agg3#78, cast((avg(UnscaledValue(agg4#25))#73 / 100.0) as decimal(11,6)) AS agg4#79] +Input [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] +Keys [1]: [i_item_id#18] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#64, avg(UnscaledValue(agg2#20))#65, avg(UnscaledValue(agg3#21))#66, avg(UnscaledValue(agg4#22))#67] +Results [7]: [i_item_id#18, null AS s_state#68, 1 AS g_state#69, avg(agg1#19)#64 AS agg1#70, cast((avg(UnscaledValue(agg2#20))#65 / 100.0) as decimal(11,6)) AS agg2#71, cast((avg(UnscaledValue(agg3#21))#66 / 100.0) as decimal(11,6)) AS agg3#72, cast((avg(UnscaledValue(agg4#22))#67 / 100.0) as decimal(11,6)) AS agg4#73] (51) Scan parquet default.store_sales Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] @@ -332,79 +332,79 @@ Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sal Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#10] (57) ReusedExchange [Reuses operator id: 39] -Output [1]: [s_store_sk#16] +Output [1]: [s_store_sk#15] (58) BroadcastHashJoin [codegen id : 17] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#16] +Right keys [1]: [s_store_sk#15] Join condition: None (59) Project [codegen id : 17] Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, s_store_sk#16] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, s_store_sk#15] (60) ReusedExchange [Reuses operator id: 78] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (61) BroadcastHashJoin [codegen id : 17] Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (62) Project [codegen id : 17] Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#15] +Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14] (63) Scan parquet default.item -Output [1]: [i_item_sk#19] +Output [1]: [i_item_sk#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (64) ColumnarToRow [codegen id : 16] -Input [1]: [i_item_sk#19] +Input [1]: [i_item_sk#17] (65) Filter [codegen id : 16] -Input [1]: [i_item_sk#19] -Condition : isnotnull(i_item_sk#19) +Input [1]: [i_item_sk#17] +Condition : isnotnull(i_item_sk#17) (66) BroadcastExchange -Input [1]: [i_item_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#80] +Input [1]: [i_item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] (67) BroadcastHashJoin [codegen id : 17] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#19] +Right keys [1]: [i_item_sk#17] Join condition: None (68) Project [codegen id : 17] -Output [4]: [ss_quantity#4 AS agg1#22, ss_list_price#5 AS agg2#23, ss_coupon_amt#7 AS agg3#24, ss_sales_price#6 AS agg4#25] -Input [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#19] +Output [4]: [ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#17] (69) HashAggregate [codegen id : 17] -Input [4]: [agg1#22, agg2#23, agg3#24, agg4#25] +Input [4]: [agg1#19, agg2#20, agg3#21, agg4#22] Keys: [] -Functions [4]: [partial_avg(agg1#22), partial_avg(UnscaledValue(agg2#23)), partial_avg(UnscaledValue(agg3#24)), partial_avg(UnscaledValue(agg4#25))] -Aggregate Attributes [8]: [sum#81, count#82, sum#83, count#84, sum#85, count#86, sum#87, count#88] -Results [8]: [sum#89, count#90, sum#91, count#92, sum#93, count#94, sum#95, count#96] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#74, count#75, sum#76, count#77, sum#78, count#79, sum#80, count#81] +Results [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] (70) Exchange -Input [8]: [sum#89, count#90, sum#91, count#92, sum#93, count#94, sum#95, count#96] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#97] +Input [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] (71) HashAggregate [codegen id : 18] -Input [8]: [sum#89, count#90, sum#91, count#92, sum#93, count#94, sum#95, count#96] +Input [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] Keys: [] -Functions [4]: [avg(agg1#22), avg(UnscaledValue(agg2#23)), avg(UnscaledValue(agg3#24)), avg(UnscaledValue(agg4#25))] -Aggregate Attributes [4]: [avg(agg1#22)#98, avg(UnscaledValue(agg2#23))#99, avg(UnscaledValue(agg3#24))#100, avg(UnscaledValue(agg4#25))#101] -Results [7]: [null AS i_item_id#102, null AS s_state#103, 1 AS g_state#104, avg(agg1#22)#98 AS agg1#105, cast((avg(UnscaledValue(agg2#23))#99 / 100.0) as decimal(11,6)) AS agg2#106, cast((avg(UnscaledValue(agg3#24))#100 / 100.0) as decimal(11,6)) AS agg3#107, cast((avg(UnscaledValue(agg4#25))#101 / 100.0) as decimal(11,6)) AS agg4#108] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#90, avg(UnscaledValue(agg2#20))#91, avg(UnscaledValue(agg3#21))#92, avg(UnscaledValue(agg4#22))#93] +Results [7]: [null AS i_item_id#94, null AS s_state#95, 1 AS g_state#96, avg(agg1#19)#90 AS agg1#97, cast((avg(UnscaledValue(agg2#20))#91 / 100.0) as decimal(11,6)) AS agg2#98, cast((avg(UnscaledValue(agg3#21))#92 / 100.0) as decimal(11,6)) AS agg3#99, cast((avg(UnscaledValue(agg4#22))#93 / 100.0) as decimal(11,6)) AS agg4#100] (72) Union (73) TakeOrderedAndProject -Input [7]: [i_item_id#20, s_state#17, g_state#47, agg1#48, agg2#49, agg3#50, agg4#51] -Arguments: 100, [i_item_id#20 ASC NULLS FIRST, s_state#17 ASC NULLS FIRST], [i_item_id#20, s_state#17, g_state#47, agg1#48, agg2#49, agg3#50, agg4#51] +Input [7]: [i_item_id#18, s_state#16, g_state#43, agg1#44, agg2#45, agg3#46, agg4#47] +Arguments: 100, [i_item_id#18 ASC NULLS FIRST, s_state#16 ASC NULLS FIRST], [i_item_id#18, s_state#16, g_state#43, agg1#44, agg2#45, agg3#46, agg4#47] ===== Subqueries ===== @@ -417,26 +417,26 @@ BroadcastExchange (78) (74) Scan parquet default.date_dim -Output [2]: [d_date_sk#15, d_year#109] +Output [2]: [d_date_sk#14, d_year#101] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)] ReadSchema: struct (75) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#109] +Input [2]: [d_date_sk#14, d_year#101] (76) Filter [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#109] -Condition : ((isnotnull(d_year#109) AND (d_year#109 = 1998)) AND isnotnull(d_date_sk#15)) +Input [2]: [d_date_sk#14, d_year#101] +Condition : ((isnotnull(d_year#101) AND (d_year#101 = 1998)) AND isnotnull(d_date_sk#14)) (77) Project [codegen id : 1] -Output [1]: [d_date_sk#15] -Input [2]: [d_date_sk#15, d_year#109] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#101] (78) BroadcastExchange -Input [1]: [d_date_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#110] +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] Subquery:2 Hosting operator id = 29 Hosting Expression = ss_sold_date_sk#8 IN dynamicpruning#9 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/explain.txt index 60b1498c4e6d0..305636a385e3b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/explain.txt @@ -109,7 +109,7 @@ Input [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_stat (8) BroadcastExchange Input [1]: [cd_demo_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_cdemo_sk#2] @@ -121,88 +121,88 @@ Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sal Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#10] (11) ReusedExchange [Reuses operator id: 78] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (13) Project [codegen id : 5] Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#15] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14] (14) Scan parquet default.store -Output [2]: [s_store_sk#16, s_state#17] +Output [2]: [s_store_sk#15, s_state#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#16, s_state#17] +Input [2]: [s_store_sk#15, s_state#16] (16) Filter [codegen id : 3] -Input [2]: [s_store_sk#16, s_state#17] -Condition : ((isnotnull(s_state#17) AND (s_state#17 = TN)) AND isnotnull(s_store_sk#16)) +Input [2]: [s_store_sk#15, s_state#16] +Condition : ((isnotnull(s_state#16) AND (s_state#16 = TN)) AND isnotnull(s_store_sk#15)) (17) BroadcastExchange -Input [2]: [s_store_sk#16, s_state#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] +Input [2]: [s_store_sk#15, s_state#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#16] +Right keys [1]: [s_store_sk#15] Join condition: None (19) Project [codegen id : 5] -Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#17] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#16, s_state#17] +Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16] (20) Scan parquet default.item -Output [2]: [i_item_sk#19, i_item_id#20] +Output [2]: [i_item_sk#17, i_item_id#18] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 4] -Input [2]: [i_item_sk#19, i_item_id#20] +Input [2]: [i_item_sk#17, i_item_id#18] (22) Filter [codegen id : 4] -Input [2]: [i_item_sk#19, i_item_id#20] -Condition : isnotnull(i_item_sk#19) +Input [2]: [i_item_sk#17, i_item_id#18] +Condition : isnotnull(i_item_sk#17) (23) BroadcastExchange -Input [2]: [i_item_sk#19, i_item_id#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] +Input [2]: [i_item_sk#17, i_item_id#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#19] +Right keys [1]: [i_item_sk#17] Join condition: None (25) Project [codegen id : 5] -Output [6]: [i_item_id#20, s_state#17, ss_quantity#4 AS agg1#22, ss_list_price#5 AS agg2#23, ss_coupon_amt#7 AS agg3#24, ss_sales_price#6 AS agg4#25] -Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#17, i_item_sk#19, i_item_id#20] +Output [6]: [i_item_id#18, s_state#16, ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [8]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16, i_item_sk#17, i_item_id#18] (26) HashAggregate [codegen id : 5] -Input [6]: [i_item_id#20, s_state#17, agg1#22, agg2#23, agg3#24, agg4#25] -Keys [2]: [i_item_id#20, s_state#17] -Functions [4]: [partial_avg(agg1#22), partial_avg(UnscaledValue(agg2#23)), partial_avg(UnscaledValue(agg3#24)), partial_avg(UnscaledValue(agg4#25))] -Aggregate Attributes [8]: [sum#26, count#27, sum#28, count#29, sum#30, count#31, sum#32, count#33] -Results [10]: [i_item_id#20, s_state#17, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] +Input [6]: [i_item_id#18, s_state#16, agg1#19, agg2#20, agg3#21, agg4#22] +Keys [2]: [i_item_id#18, s_state#16] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#23, count#24, sum#25, count#26, sum#27, count#28, sum#29, count#30] +Results [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] (27) Exchange -Input [10]: [i_item_id#20, s_state#17, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] -Arguments: hashpartitioning(i_item_id#20, s_state#17, 5), ENSURE_REQUIREMENTS, [id=#42] +Input [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Arguments: hashpartitioning(i_item_id#18, s_state#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] (28) HashAggregate [codegen id : 6] -Input [10]: [i_item_id#20, s_state#17, sum#34, count#35, sum#36, count#37, sum#38, count#39, sum#40, count#41] -Keys [2]: [i_item_id#20, s_state#17] -Functions [4]: [avg(agg1#22), avg(UnscaledValue(agg2#23)), avg(UnscaledValue(agg3#24)), avg(UnscaledValue(agg4#25))] -Aggregate Attributes [4]: [avg(agg1#22)#43, avg(UnscaledValue(agg2#23))#44, avg(UnscaledValue(agg3#24))#45, avg(UnscaledValue(agg4#25))#46] -Results [7]: [i_item_id#20, s_state#17, 0 AS g_state#47, avg(agg1#22)#43 AS agg1#48, cast((avg(UnscaledValue(agg2#23))#44 / 100.0) as decimal(11,6)) AS agg2#49, cast((avg(UnscaledValue(agg3#24))#45 / 100.0) as decimal(11,6)) AS agg3#50, cast((avg(UnscaledValue(agg4#25))#46 / 100.0) as decimal(11,6)) AS agg4#51] +Input [10]: [i_item_id#18, s_state#16, sum#31, count#32, sum#33, count#34, sum#35, count#36, sum#37, count#38] +Keys [2]: [i_item_id#18, s_state#16] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#39, avg(UnscaledValue(agg2#20))#40, avg(UnscaledValue(agg3#21))#41, avg(UnscaledValue(agg4#22))#42] +Results [7]: [i_item_id#18, s_state#16, 0 AS g_state#43, avg(agg1#19)#39 AS agg1#44, cast((avg(UnscaledValue(agg2#20))#40 / 100.0) as decimal(11,6)) AS agg2#45, cast((avg(UnscaledValue(agg3#21))#41 / 100.0) as decimal(11,6)) AS agg3#46, cast((avg(UnscaledValue(agg4#22))#42 / 100.0) as decimal(11,6)) AS agg4#47] (29) Scan parquet default.store_sales Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] @@ -232,77 +232,77 @@ Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sal Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#10] (35) ReusedExchange [Reuses operator id: 78] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (36) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (37) Project [codegen id : 11] Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#15] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14] (38) Scan parquet default.store -Output [2]: [s_store_sk#16, s_state#17] +Output [2]: [s_store_sk#15, s_state#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] ReadSchema: struct (39) ColumnarToRow [codegen id : 9] -Input [2]: [s_store_sk#16, s_state#17] +Input [2]: [s_store_sk#15, s_state#16] (40) Filter [codegen id : 9] -Input [2]: [s_store_sk#16, s_state#17] -Condition : ((isnotnull(s_state#17) AND (s_state#17 = TN)) AND isnotnull(s_store_sk#16)) +Input [2]: [s_store_sk#15, s_state#16] +Condition : ((isnotnull(s_state#16) AND (s_state#16 = TN)) AND isnotnull(s_store_sk#15)) (41) Project [codegen id : 9] -Output [1]: [s_store_sk#16] -Input [2]: [s_store_sk#16, s_state#17] +Output [1]: [s_store_sk#15] +Input [2]: [s_store_sk#15, s_state#16] (42) BroadcastExchange -Input [1]: [s_store_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#52] +Input [1]: [s_store_sk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (43) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#16] +Right keys [1]: [s_store_sk#15] Join condition: None (44) Project [codegen id : 11] Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#16] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15] (45) ReusedExchange [Reuses operator id: 23] -Output [2]: [i_item_sk#19, i_item_id#20] +Output [2]: [i_item_sk#17, i_item_id#18] (46) BroadcastHashJoin [codegen id : 11] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#19] +Right keys [1]: [i_item_sk#17] Join condition: None (47) Project [codegen id : 11] -Output [5]: [i_item_id#20, ss_quantity#4 AS agg1#22, ss_list_price#5 AS agg2#23, ss_coupon_amt#7 AS agg3#24, ss_sales_price#6 AS agg4#25] -Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#19, i_item_id#20] +Output [5]: [i_item_id#18, ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#17, i_item_id#18] (48) HashAggregate [codegen id : 11] -Input [5]: [i_item_id#20, agg1#22, agg2#23, agg3#24, agg4#25] -Keys [1]: [i_item_id#20] -Functions [4]: [partial_avg(agg1#22), partial_avg(UnscaledValue(agg2#23)), partial_avg(UnscaledValue(agg3#24)), partial_avg(UnscaledValue(agg4#25))] -Aggregate Attributes [8]: [sum#53, count#54, sum#55, count#56, sum#57, count#58, sum#59, count#60] -Results [9]: [i_item_id#20, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] +Input [5]: [i_item_id#18, agg1#19, agg2#20, agg3#21, agg4#22] +Keys [1]: [i_item_id#18] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#48, count#49, sum#50, count#51, sum#52, count#53, sum#54, count#55] +Results [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] (49) Exchange -Input [9]: [i_item_id#20, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] -Arguments: hashpartitioning(i_item_id#20, 5), ENSURE_REQUIREMENTS, [id=#69] +Input [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] +Arguments: hashpartitioning(i_item_id#18, 5), ENSURE_REQUIREMENTS, [plan_id=6] (50) HashAggregate [codegen id : 12] -Input [9]: [i_item_id#20, sum#61, count#62, sum#63, count#64, sum#65, count#66, sum#67, count#68] -Keys [1]: [i_item_id#20] -Functions [4]: [avg(agg1#22), avg(UnscaledValue(agg2#23)), avg(UnscaledValue(agg3#24)), avg(UnscaledValue(agg4#25))] -Aggregate Attributes [4]: [avg(agg1#22)#70, avg(UnscaledValue(agg2#23))#71, avg(UnscaledValue(agg3#24))#72, avg(UnscaledValue(agg4#25))#73] -Results [7]: [i_item_id#20, null AS s_state#74, 1 AS g_state#75, avg(agg1#22)#70 AS agg1#76, cast((avg(UnscaledValue(agg2#23))#71 / 100.0) as decimal(11,6)) AS agg2#77, cast((avg(UnscaledValue(agg3#24))#72 / 100.0) as decimal(11,6)) AS agg3#78, cast((avg(UnscaledValue(agg4#25))#73 / 100.0) as decimal(11,6)) AS agg4#79] +Input [9]: [i_item_id#18, sum#56, count#57, sum#58, count#59, sum#60, count#61, sum#62, count#63] +Keys [1]: [i_item_id#18] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#64, avg(UnscaledValue(agg2#20))#65, avg(UnscaledValue(agg3#21))#66, avg(UnscaledValue(agg4#22))#67] +Results [7]: [i_item_id#18, null AS s_state#68, 1 AS g_state#69, avg(agg1#19)#64 AS agg1#70, cast((avg(UnscaledValue(agg2#20))#65 / 100.0) as decimal(11,6)) AS agg2#71, cast((avg(UnscaledValue(agg3#21))#66 / 100.0) as decimal(11,6)) AS agg3#72, cast((avg(UnscaledValue(agg4#22))#67 / 100.0) as decimal(11,6)) AS agg4#73] (51) Scan parquet default.store_sales Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8] @@ -332,79 +332,79 @@ Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sal Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#10] (57) ReusedExchange [Reuses operator id: 78] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (58) BroadcastHashJoin [codegen id : 17] Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (59) Project [codegen id : 17] Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#15] +Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14] (60) ReusedExchange [Reuses operator id: 42] -Output [1]: [s_store_sk#16] +Output [1]: [s_store_sk#15] (61) BroadcastHashJoin [codegen id : 17] Left keys [1]: [ss_store_sk#3] -Right keys [1]: [s_store_sk#16] +Right keys [1]: [s_store_sk#15] Join condition: None (62) Project [codegen id : 17] Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7] -Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#16] +Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15] (63) Scan parquet default.item -Output [1]: [i_item_sk#19] +Output [1]: [i_item_sk#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (64) ColumnarToRow [codegen id : 16] -Input [1]: [i_item_sk#19] +Input [1]: [i_item_sk#17] (65) Filter [codegen id : 16] -Input [1]: [i_item_sk#19] -Condition : isnotnull(i_item_sk#19) +Input [1]: [i_item_sk#17] +Condition : isnotnull(i_item_sk#17) (66) BroadcastExchange -Input [1]: [i_item_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#80] +Input [1]: [i_item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] (67) BroadcastHashJoin [codegen id : 17] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#19] +Right keys [1]: [i_item_sk#17] Join condition: None (68) Project [codegen id : 17] -Output [4]: [ss_quantity#4 AS agg1#22, ss_list_price#5 AS agg2#23, ss_coupon_amt#7 AS agg3#24, ss_sales_price#6 AS agg4#25] -Input [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#19] +Output [4]: [ss_quantity#4 AS agg1#19, ss_list_price#5 AS agg2#20, ss_coupon_amt#7 AS agg3#21, ss_sales_price#6 AS agg4#22] +Input [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#17] (69) HashAggregate [codegen id : 17] -Input [4]: [agg1#22, agg2#23, agg3#24, agg4#25] +Input [4]: [agg1#19, agg2#20, agg3#21, agg4#22] Keys: [] -Functions [4]: [partial_avg(agg1#22), partial_avg(UnscaledValue(agg2#23)), partial_avg(UnscaledValue(agg3#24)), partial_avg(UnscaledValue(agg4#25))] -Aggregate Attributes [8]: [sum#81, count#82, sum#83, count#84, sum#85, count#86, sum#87, count#88] -Results [8]: [sum#89, count#90, sum#91, count#92, sum#93, count#94, sum#95, count#96] +Functions [4]: [partial_avg(agg1#19), partial_avg(UnscaledValue(agg2#20)), partial_avg(UnscaledValue(agg3#21)), partial_avg(UnscaledValue(agg4#22))] +Aggregate Attributes [8]: [sum#74, count#75, sum#76, count#77, sum#78, count#79, sum#80, count#81] +Results [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] (70) Exchange -Input [8]: [sum#89, count#90, sum#91, count#92, sum#93, count#94, sum#95, count#96] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#97] +Input [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] (71) HashAggregate [codegen id : 18] -Input [8]: [sum#89, count#90, sum#91, count#92, sum#93, count#94, sum#95, count#96] +Input [8]: [sum#82, count#83, sum#84, count#85, sum#86, count#87, sum#88, count#89] Keys: [] -Functions [4]: [avg(agg1#22), avg(UnscaledValue(agg2#23)), avg(UnscaledValue(agg3#24)), avg(UnscaledValue(agg4#25))] -Aggregate Attributes [4]: [avg(agg1#22)#98, avg(UnscaledValue(agg2#23))#99, avg(UnscaledValue(agg3#24))#100, avg(UnscaledValue(agg4#25))#101] -Results [7]: [null AS i_item_id#102, null AS s_state#103, 1 AS g_state#104, avg(agg1#22)#98 AS agg1#105, cast((avg(UnscaledValue(agg2#23))#99 / 100.0) as decimal(11,6)) AS agg2#106, cast((avg(UnscaledValue(agg3#24))#100 / 100.0) as decimal(11,6)) AS agg3#107, cast((avg(UnscaledValue(agg4#25))#101 / 100.0) as decimal(11,6)) AS agg4#108] +Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg3#21)), avg(UnscaledValue(agg4#22))] +Aggregate Attributes [4]: [avg(agg1#19)#90, avg(UnscaledValue(agg2#20))#91, avg(UnscaledValue(agg3#21))#92, avg(UnscaledValue(agg4#22))#93] +Results [7]: [null AS i_item_id#94, null AS s_state#95, 1 AS g_state#96, avg(agg1#19)#90 AS agg1#97, cast((avg(UnscaledValue(agg2#20))#91 / 100.0) as decimal(11,6)) AS agg2#98, cast((avg(UnscaledValue(agg3#21))#92 / 100.0) as decimal(11,6)) AS agg3#99, cast((avg(UnscaledValue(agg4#22))#93 / 100.0) as decimal(11,6)) AS agg4#100] (72) Union (73) TakeOrderedAndProject -Input [7]: [i_item_id#20, s_state#17, g_state#47, agg1#48, agg2#49, agg3#50, agg4#51] -Arguments: 100, [i_item_id#20 ASC NULLS FIRST, s_state#17 ASC NULLS FIRST], [i_item_id#20, s_state#17, g_state#47, agg1#48, agg2#49, agg3#50, agg4#51] +Input [7]: [i_item_id#18, s_state#16, g_state#43, agg1#44, agg2#45, agg3#46, agg4#47] +Arguments: 100, [i_item_id#18 ASC NULLS FIRST, s_state#16 ASC NULLS FIRST], [i_item_id#18, s_state#16, g_state#43, agg1#44, agg2#45, agg3#46, agg4#47] ===== Subqueries ===== @@ -417,26 +417,26 @@ BroadcastExchange (78) (74) Scan parquet default.date_dim -Output [2]: [d_date_sk#15, d_year#109] +Output [2]: [d_date_sk#14, d_year#101] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1998), IsNotNull(d_date_sk)] ReadSchema: struct (75) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#109] +Input [2]: [d_date_sk#14, d_year#101] (76) Filter [codegen id : 1] -Input [2]: [d_date_sk#15, d_year#109] -Condition : ((isnotnull(d_year#109) AND (d_year#109 = 1998)) AND isnotnull(d_date_sk#15)) +Input [2]: [d_date_sk#14, d_year#101] +Condition : ((isnotnull(d_year#101) AND (d_year#101 = 1998)) AND isnotnull(d_date_sk#14)) (77) Project [codegen id : 1] -Output [1]: [d_date_sk#15] -Input [2]: [d_date_sk#15, d_year#109] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_year#101] (78) BroadcastExchange -Input [1]: [d_date_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#110] +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] Subquery:2 Hosting operator id = 29 Hosting Expression = ss_sold_date_sk#8 IN dynamicpruning#9 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt index e9ebb885020e2..503bfdb0233cd 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt @@ -83,7 +83,7 @@ Input [2]: [s_store_sk#8, s_county#9] (11) BroadcastExchange Input [1]: [s_store_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#3] @@ -95,104 +95,104 @@ Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8] (14) Scan parquet default.household_demographics -Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] (16) Filter [codegen id : 3] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] -Condition : (((((isnotnull(hd_vehicle_count#14) AND isnotnull(hd_dep_count#13)) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND ((cast(hd_dep_count#13 as double) / cast(hd_vehicle_count#14 as double)) > 1.2)) AND isnotnull(hd_demo_sk#11)) +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] +Condition : (((((isnotnull(hd_vehicle_count#13) AND isnotnull(hd_dep_count#12)) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = unknown ))) AND (hd_vehicle_count#13 > 0)) AND ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.2)) AND isnotnull(hd_demo_sk#10)) (17) Project [codegen id : 3] -Output [1]: [hd_demo_sk#11] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Output [1]: [hd_demo_sk#10] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] (18) BroadcastExchange -Input [1]: [hd_demo_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Input [1]: [hd_demo_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#11] +Right keys [1]: [hd_demo_sk#10] Join condition: None (20) Project [codegen id : 4] Output [2]: [ss_customer_sk#1, ss_ticket_number#4] -Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#10] (21) HashAggregate [codegen id : 4] Input [2]: [ss_customer_sk#1, ss_ticket_number#4] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#16] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] +Aggregate Attributes [1]: [count#14] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] (22) Exchange -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] -Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] (23) HashAggregate [codegen id : 5] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#19] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#19 AS cnt#20] +Aggregate Attributes [1]: [count(1)#16] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#16 AS cnt#17] (24) Filter [codegen id : 5] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20] -Condition : ((cnt#20 >= 15) AND (cnt#20 <= 20)) +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17] +Condition : ((cnt#17 >= 15) AND (cnt#17 <= 20)) (25) Exchange -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20] -Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17] +Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4] (26) Sort [codegen id : 6] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17] Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0 (27) Scan parquet default.customer -Output [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] +Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 7] -Input [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] (29) Filter [codegen id : 7] -Input [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] -Condition : isnotnull(c_customer_sk#22) +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Condition : isnotnull(c_customer_sk#18) (30) Exchange -Input [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] -Arguments: hashpartitioning(c_customer_sk#22, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Arguments: hashpartitioning(c_customer_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=5] (31) Sort [codegen id : 8] -Input [5]: [c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] -Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0 (32) SortMergeJoin [codegen id : 9] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#22] +Right keys [1]: [c_customer_sk#18] Join condition: None (33) Project [codegen id : 9] -Output [6]: [c_last_name#25, c_first_name#24, c_salutation#23, c_preferred_cust_flag#26, ss_ticket_number#4, cnt#20] -Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20, c_customer_sk#22, c_salutation#23, c_first_name#24, c_last_name#25, c_preferred_cust_flag#26] +Output [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17, c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] (34) Exchange -Input [6]: [c_last_name#25, c_first_name#24, c_salutation#23, c_preferred_cust_flag#26, ss_ticket_number#4, cnt#20] -Arguments: rangepartitioning(c_last_name#25 ASC NULLS FIRST, c_first_name#24 ASC NULLS FIRST, c_salutation#23 ASC NULLS FIRST, c_preferred_cust_flag#26 DESC NULLS LAST, ss_ticket_number#4 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: rangepartitioning(c_last_name#21 ASC NULLS FIRST, c_first_name#20 ASC NULLS FIRST, c_salutation#19 ASC NULLS FIRST, c_preferred_cust_flag#22 DESC NULLS LAST, ss_ticket_number#4 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=6] (35) Sort [codegen id : 10] -Input [6]: [c_last_name#25, c_first_name#24, c_salutation#23, c_preferred_cust_flag#26, ss_ticket_number#4, cnt#20] -Arguments: [c_last_name#25 ASC NULLS FIRST, c_first_name#24 ASC NULLS FIRST, c_salutation#23 ASC NULLS FIRST, c_preferred_cust_flag#26 DESC NULLS LAST, ss_ticket_number#4 ASC NULLS FIRST], true, 0 +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: [c_last_name#21 ASC NULLS FIRST, c_first_name#20 ASC NULLS FIRST, c_salutation#19 ASC NULLS FIRST, c_preferred_cust_flag#22 DESC NULLS LAST, ss_ticket_number#4 ASC NULLS FIRST], true, 0 ===== Subqueries ===== @@ -205,25 +205,25 @@ BroadcastExchange (40) (36) Scan parquet default.date_dim -Output [3]: [d_date_sk#7, d_year#29, d_dom#30] +Output [3]: [d_date_sk#7, d_year#23, d_dom#24] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] ReadSchema: struct (37) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#29, d_dom#30] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] (38) Filter [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#29, d_dom#30] -Condition : (((((d_dom#30 >= 1) AND (d_dom#30 <= 3)) OR ((d_dom#30 >= 25) AND (d_dom#30 <= 28))) AND d_year#29 IN (1999,2000,2001)) AND isnotnull(d_date_sk#7)) +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] +Condition : (((((d_dom#24 >= 1) AND (d_dom#24 <= 3)) OR ((d_dom#24 >= 25) AND (d_dom#24 <= 28))) AND d_year#23 IN (1999,2000,2001)) AND isnotnull(d_date_sk#7)) (39) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [3]: [d_date_sk#7, d_year#29, d_dom#30] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] (40) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#31] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt index 3020b3ab2e4eb..41141558a807a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt @@ -80,7 +80,7 @@ Input [2]: [s_store_sk#8, s_county#9] (11) BroadcastExchange Input [1]: [s_store_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#3] @@ -92,92 +92,92 @@ Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4] Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8] (14) Scan parquet default.household_demographics -Output [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000 ),EqualTo(hd_buy_potential,unknown )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] (16) Filter [codegen id : 3] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] -Condition : (((((isnotnull(hd_vehicle_count#14) AND isnotnull(hd_dep_count#13)) AND ((hd_buy_potential#12 = >10000 ) OR (hd_buy_potential#12 = unknown ))) AND (hd_vehicle_count#14 > 0)) AND ((cast(hd_dep_count#13 as double) / cast(hd_vehicle_count#14 as double)) > 1.2)) AND isnotnull(hd_demo_sk#11)) +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] +Condition : (((((isnotnull(hd_vehicle_count#13) AND isnotnull(hd_dep_count#12)) AND ((hd_buy_potential#11 = >10000 ) OR (hd_buy_potential#11 = unknown ))) AND (hd_vehicle_count#13 > 0)) AND ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.2)) AND isnotnull(hd_demo_sk#10)) (17) Project [codegen id : 3] -Output [1]: [hd_demo_sk#11] -Input [4]: [hd_demo_sk#11, hd_buy_potential#12, hd_dep_count#13, hd_vehicle_count#14] +Output [1]: [hd_demo_sk#10] +Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13] (18) BroadcastExchange -Input [1]: [hd_demo_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Input [1]: [hd_demo_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (19) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_hdemo_sk#2] -Right keys [1]: [hd_demo_sk#11] +Right keys [1]: [hd_demo_sk#10] Join condition: None (20) Project [codegen id : 4] Output [2]: [ss_customer_sk#1, ss_ticket_number#4] -Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#11] +Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#10] (21) HashAggregate [codegen id : 4] Input [2]: [ss_customer_sk#1, ss_ticket_number#4] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#16] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] +Aggregate Attributes [1]: [count#14] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] (22) Exchange -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] -Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] +Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] (23) HashAggregate [codegen id : 6] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#17] +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15] Keys [2]: [ss_ticket_number#4, ss_customer_sk#1] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#19] -Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#19 AS cnt#20] +Aggregate Attributes [1]: [count(1)#16] +Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#16 AS cnt#17] (24) Filter [codegen id : 6] -Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20] -Condition : ((cnt#20 >= 15) AND (cnt#20 <= 20)) +Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17] +Condition : ((cnt#17 >= 15) AND (cnt#17 <= 20)) (25) Scan parquet default.customer -Output [5]: [c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] +Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 5] -Input [5]: [c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] (27) Filter [codegen id : 5] -Input [5]: [c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] -Condition : isnotnull(c_customer_sk#21) +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Condition : isnotnull(c_customer_sk#18) (28) BroadcastExchange -Input [5]: [c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] +Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (29) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#21] +Right keys [1]: [c_customer_sk#18] Join condition: None (30) Project [codegen id : 6] -Output [6]: [c_last_name#24, c_first_name#23, c_salutation#22, c_preferred_cust_flag#25, ss_ticket_number#4, cnt#20] -Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#20, c_customer_sk#21, c_salutation#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25] +Output [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17, c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22] (31) Exchange -Input [6]: [c_last_name#24, c_first_name#23, c_salutation#22, c_preferred_cust_flag#25, ss_ticket_number#4, cnt#20] -Arguments: rangepartitioning(c_last_name#24 ASC NULLS FIRST, c_first_name#23 ASC NULLS FIRST, c_salutation#22 ASC NULLS FIRST, c_preferred_cust_flag#25 DESC NULLS LAST, ss_ticket_number#4 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: rangepartitioning(c_last_name#21 ASC NULLS FIRST, c_first_name#20 ASC NULLS FIRST, c_salutation#19 ASC NULLS FIRST, c_preferred_cust_flag#22 DESC NULLS LAST, ss_ticket_number#4 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=5] (32) Sort [codegen id : 7] -Input [6]: [c_last_name#24, c_first_name#23, c_salutation#22, c_preferred_cust_flag#25, ss_ticket_number#4, cnt#20] -Arguments: [c_last_name#24 ASC NULLS FIRST, c_first_name#23 ASC NULLS FIRST, c_salutation#22 ASC NULLS FIRST, c_preferred_cust_flag#25 DESC NULLS LAST, ss_ticket_number#4 ASC NULLS FIRST], true, 0 +Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17] +Arguments: [c_last_name#21 ASC NULLS FIRST, c_first_name#20 ASC NULLS FIRST, c_salutation#19 ASC NULLS FIRST, c_preferred_cust_flag#22 DESC NULLS LAST, ss_ticket_number#4 ASC NULLS FIRST], true, 0 ===== Subqueries ===== @@ -190,25 +190,25 @@ BroadcastExchange (37) (33) Scan parquet default.date_dim -Output [3]: [d_date_sk#7, d_year#28, d_dom#29] +Output [3]: [d_date_sk#7, d_year#23, d_dom#24] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)] ReadSchema: struct (34) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#28, d_dom#29] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] (35) Filter [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#28, d_dom#29] -Condition : (((((d_dom#29 >= 1) AND (d_dom#29 <= 3)) OR ((d_dom#29 >= 25) AND (d_dom#29 <= 28))) AND d_year#28 IN (1999,2000,2001)) AND isnotnull(d_date_sk#7)) +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] +Condition : (((((d_dom#24 >= 1) AND (d_dom#24 <= 3)) OR ((d_dom#24 >= 25) AND (d_dom#24 <= 28))) AND d_year#23 IN (1999,2000,2001)) AND isnotnull(d_date_sk#7)) (36) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [3]: [d_date_sk#7, d_year#28, d_dom#29] +Input [3]: [d_date_sk#7, d_year#23, d_dom#24] (37) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#30] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/explain.txt index 33797ca4e7f6e..42d97a4bd3e76 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/explain.txt @@ -70,115 +70,115 @@ Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) (4) Exchange Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] -Arguments: hashpartitioning(c_customer_sk#3, 5), ENSURE_REQUIREMENTS, [id=#6] +Arguments: hashpartitioning(c_customer_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5] Arguments: [c_customer_sk#3 ASC NULLS FIRST], false, 0 (6) Scan parquet default.store_sales -Output [2]: [ss_customer_sk#7, ss_sold_date_sk#8] +Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#8), dynamicpruningexpression(ss_sold_date_sk#8 IN dynamicpruning#9)] +PartitionFilters: [isnotnull(ss_sold_date_sk#7), dynamicpruningexpression(ss_sold_date_sk#7 IN dynamicpruning#8)] ReadSchema: struct (7) ColumnarToRow [codegen id : 4] -Input [2]: [ss_customer_sk#7, ss_sold_date_sk#8] +Input [2]: [ss_customer_sk#6, ss_sold_date_sk#7] (8) ReusedExchange [Reuses operator id: 58] -Output [1]: [d_date_sk#10] +Output [1]: [d_date_sk#9] (9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#10] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#9] Join condition: None (10) Project [codegen id : 4] -Output [1]: [ss_customer_sk#7] -Input [3]: [ss_customer_sk#7, ss_sold_date_sk#8, d_date_sk#10] +Output [1]: [ss_customer_sk#6] +Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#9] (11) Exchange -Input [1]: [ss_customer_sk#7] -Arguments: hashpartitioning(ss_customer_sk#7, 5), ENSURE_REQUIREMENTS, [id=#11] +Input [1]: [ss_customer_sk#6] +Arguments: hashpartitioning(ss_customer_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) Sort [codegen id : 5] -Input [1]: [ss_customer_sk#7] -Arguments: [ss_customer_sk#7 ASC NULLS FIRST], false, 0 +Input [1]: [ss_customer_sk#6] +Arguments: [ss_customer_sk#6 ASC NULLS FIRST], false, 0 (13) SortMergeJoin [codegen id : 6] Left keys [1]: [c_customer_sk#3] -Right keys [1]: [ss_customer_sk#7] +Right keys [1]: [ss_customer_sk#6] Join condition: None (14) Scan parquet default.web_sales -Output [2]: [ws_bill_customer_sk#12, ws_sold_date_sk#13] +Output [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#13), dynamicpruningexpression(ws_sold_date_sk#13 IN dynamicpruning#9)] +PartitionFilters: [isnotnull(ws_sold_date_sk#11), dynamicpruningexpression(ws_sold_date_sk#11 IN dynamicpruning#8)] ReadSchema: struct (15) ColumnarToRow [codegen id : 8] -Input [2]: [ws_bill_customer_sk#12, ws_sold_date_sk#13] +Input [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] (16) ReusedExchange [Reuses operator id: 58] -Output [1]: [d_date_sk#14] +Output [1]: [d_date_sk#12] (17) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_sold_date_sk#13] -Right keys [1]: [d_date_sk#14] +Left keys [1]: [ws_sold_date_sk#11] +Right keys [1]: [d_date_sk#12] Join condition: None (18) Project [codegen id : 8] -Output [1]: [ws_bill_customer_sk#12] -Input [3]: [ws_bill_customer_sk#12, ws_sold_date_sk#13, d_date_sk#14] +Output [1]: [ws_bill_customer_sk#10] +Input [3]: [ws_bill_customer_sk#10, ws_sold_date_sk#11, d_date_sk#12] (19) Exchange -Input [1]: [ws_bill_customer_sk#12] -Arguments: hashpartitioning(ws_bill_customer_sk#12, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [1]: [ws_bill_customer_sk#10] +Arguments: hashpartitioning(ws_bill_customer_sk#10, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) Sort [codegen id : 9] -Input [1]: [ws_bill_customer_sk#12] -Arguments: [ws_bill_customer_sk#12 ASC NULLS FIRST], false, 0 +Input [1]: [ws_bill_customer_sk#10] +Arguments: [ws_bill_customer_sk#10 ASC NULLS FIRST], false, 0 (21) SortMergeJoin [codegen id : 10] Left keys [1]: [c_customer_sk#3] -Right keys [1]: [ws_bill_customer_sk#12] +Right keys [1]: [ws_bill_customer_sk#10] Join condition: None (22) Scan parquet default.catalog_sales -Output [2]: [cs_ship_customer_sk#16, cs_sold_date_sk#17] +Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#17), dynamicpruningexpression(cs_sold_date_sk#17 IN dynamicpruning#9)] +PartitionFilters: [isnotnull(cs_sold_date_sk#14), dynamicpruningexpression(cs_sold_date_sk#14 IN dynamicpruning#8)] ReadSchema: struct (23) ColumnarToRow [codegen id : 12] -Input [2]: [cs_ship_customer_sk#16, cs_sold_date_sk#17] +Input [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] (24) ReusedExchange [Reuses operator id: 58] -Output [1]: [d_date_sk#18] +Output [1]: [d_date_sk#15] (25) BroadcastHashJoin [codegen id : 12] -Left keys [1]: [cs_sold_date_sk#17] -Right keys [1]: [d_date_sk#18] +Left keys [1]: [cs_sold_date_sk#14] +Right keys [1]: [d_date_sk#15] Join condition: None (26) Project [codegen id : 12] -Output [1]: [cs_ship_customer_sk#16] -Input [3]: [cs_ship_customer_sk#16, cs_sold_date_sk#17, d_date_sk#18] +Output [1]: [cs_ship_customer_sk#13] +Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15] (27) Exchange -Input [1]: [cs_ship_customer_sk#16] -Arguments: hashpartitioning(cs_ship_customer_sk#16, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [1]: [cs_ship_customer_sk#13] +Arguments: hashpartitioning(cs_ship_customer_sk#13, 5), ENSURE_REQUIREMENTS, [plan_id=4] (28) Sort [codegen id : 13] -Input [1]: [cs_ship_customer_sk#16] -Arguments: [cs_ship_customer_sk#16 ASC NULLS FIRST], false, 0 +Input [1]: [cs_ship_customer_sk#13] +Arguments: [cs_ship_customer_sk#13 ASC NULLS FIRST], false, 0 (29) SortMergeJoin [codegen id : 14] Left keys [1]: [c_customer_sk#3] -Right keys [1]: [cs_ship_customer_sk#16] +Right keys [1]: [cs_ship_customer_sk#13] Join condition: None (30) Filter [codegen id : 14] @@ -191,107 +191,107 @@ Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2 (32) Exchange Input [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] -Arguments: hashpartitioning(c_current_addr_sk#5, 5), ENSURE_REQUIREMENTS, [id=#20] +Arguments: hashpartitioning(c_current_addr_sk#5, 5), ENSURE_REQUIREMENTS, [plan_id=5] (33) Sort [codegen id : 15] Input [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] Arguments: [c_current_addr_sk#5 ASC NULLS FIRST], false, 0 (34) Scan parquet default.customer_address -Output [2]: [ca_address_sk#21, ca_state#22] +Output [2]: [ca_address_sk#16, ca_state#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 16] -Input [2]: [ca_address_sk#21, ca_state#22] +Input [2]: [ca_address_sk#16, ca_state#17] (36) Filter [codegen id : 16] -Input [2]: [ca_address_sk#21, ca_state#22] -Condition : isnotnull(ca_address_sk#21) +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : isnotnull(ca_address_sk#16) (37) Exchange -Input [2]: [ca_address_sk#21, ca_state#22] -Arguments: hashpartitioning(ca_address_sk#21, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [2]: [ca_address_sk#16, ca_state#17] +Arguments: hashpartitioning(ca_address_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=6] (38) Sort [codegen id : 17] -Input [2]: [ca_address_sk#21, ca_state#22] -Arguments: [ca_address_sk#21 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#16, ca_state#17] +Arguments: [ca_address_sk#16 ASC NULLS FIRST], false, 0 (39) SortMergeJoin [codegen id : 18] Left keys [1]: [c_current_addr_sk#5] -Right keys [1]: [ca_address_sk#21] +Right keys [1]: [ca_address_sk#16] Join condition: None (40) Project [codegen id : 18] -Output [2]: [c_current_cdemo_sk#4, ca_state#22] -Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#21, ca_state#22] +Output [2]: [c_current_cdemo_sk#4, ca_state#17] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#16, ca_state#17] (41) Exchange -Input [2]: [c_current_cdemo_sk#4, ca_state#22] -Arguments: hashpartitioning(c_current_cdemo_sk#4, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [2]: [c_current_cdemo_sk#4, ca_state#17] +Arguments: hashpartitioning(c_current_cdemo_sk#4, 5), ENSURE_REQUIREMENTS, [plan_id=7] (42) Sort [codegen id : 19] -Input [2]: [c_current_cdemo_sk#4, ca_state#22] +Input [2]: [c_current_cdemo_sk#4, ca_state#17] Arguments: [c_current_cdemo_sk#4 ASC NULLS FIRST], false, 0 (43) Scan parquet default.customer_demographics -Output [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Output [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 20] -Input [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] (45) Filter [codegen id : 20] -Input [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Condition : isnotnull(cd_demo_sk#25) +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Condition : isnotnull(cd_demo_sk#18) (46) Exchange -Input [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Arguments: hashpartitioning(cd_demo_sk#25, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Arguments: hashpartitioning(cd_demo_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=8] (47) Sort [codegen id : 21] -Input [6]: [cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Arguments: [cd_demo_sk#25 ASC NULLS FIRST], false, 0 +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Arguments: [cd_demo_sk#18 ASC NULLS FIRST], false, 0 (48) SortMergeJoin [codegen id : 22] Left keys [1]: [c_current_cdemo_sk#4] -Right keys [1]: [cd_demo_sk#25] +Right keys [1]: [cd_demo_sk#18] Join condition: None (49) Project [codegen id : 22] -Output [6]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Input [8]: [c_current_cdemo_sk#4, ca_state#22, cd_demo_sk#25, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] +Output [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Input [8]: [c_current_cdemo_sk#4, ca_state#17, cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] (50) HashAggregate [codegen id : 22] -Input [6]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Keys [6]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Functions [10]: [partial_count(1), partial_avg(cd_dep_count#28), partial_max(cd_dep_count#28), partial_sum(cd_dep_count#28), partial_avg(cd_dep_employed_count#29), partial_max(cd_dep_employed_count#29), partial_sum(cd_dep_employed_count#29), partial_avg(cd_dep_college_count#30), partial_max(cd_dep_college_count#30), partial_sum(cd_dep_college_count#30)] -Aggregate Attributes [13]: [count#32, sum#33, count#34, max#35, sum#36, sum#37, count#38, max#39, sum#40, sum#41, count#42, max#43, sum#44] -Results [19]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#45, sum#46, count#47, max#48, sum#49, sum#50, count#51, max#52, sum#53, sum#54, count#55, max#56, sum#57] +Input [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Keys [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Functions [10]: [partial_count(1), partial_avg(cd_dep_count#21), partial_max(cd_dep_count#21), partial_sum(cd_dep_count#21), partial_avg(cd_dep_employed_count#22), partial_max(cd_dep_employed_count#22), partial_sum(cd_dep_employed_count#22), partial_avg(cd_dep_college_count#23), partial_max(cd_dep_college_count#23), partial_sum(cd_dep_college_count#23)] +Aggregate Attributes [13]: [count#24, sum#25, count#26, max#27, sum#28, sum#29, count#30, max#31, sum#32, sum#33, count#34, max#35, sum#36] +Results [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, sum#38, count#39, max#40, sum#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49] (51) Exchange -Input [19]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#45, sum#46, count#47, max#48, sum#49, sum#50, count#51, max#52, sum#53, sum#54, count#55, max#56, sum#57] -Arguments: hashpartitioning(ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, 5), ENSURE_REQUIREMENTS, [id=#58] +Input [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, sum#38, count#39, max#40, sum#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49] +Arguments: hashpartitioning(ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, 5), ENSURE_REQUIREMENTS, [plan_id=9] (52) HashAggregate [codegen id : 23] -Input [19]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30, count#45, sum#46, count#47, max#48, sum#49, sum#50, count#51, max#52, sum#53, sum#54, count#55, max#56, sum#57] -Keys [6]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cd_dep_employed_count#29, cd_dep_college_count#30] -Functions [10]: [count(1), avg(cd_dep_count#28), max(cd_dep_count#28), sum(cd_dep_count#28), avg(cd_dep_employed_count#29), max(cd_dep_employed_count#29), sum(cd_dep_employed_count#29), avg(cd_dep_college_count#30), max(cd_dep_college_count#30), sum(cd_dep_college_count#30)] -Aggregate Attributes [10]: [count(1)#59, avg(cd_dep_count#28)#60, max(cd_dep_count#28)#61, sum(cd_dep_count#28)#62, avg(cd_dep_employed_count#29)#63, max(cd_dep_employed_count#29)#64, sum(cd_dep_employed_count#29)#65, avg(cd_dep_college_count#30)#66, max(cd_dep_college_count#30)#67, sum(cd_dep_college_count#30)#68] -Results [18]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, count(1)#59 AS cnt1#69, avg(cd_dep_count#28)#60 AS avg(cd_dep_count)#70, max(cd_dep_count#28)#61 AS max(cd_dep_count)#71, sum(cd_dep_count#28)#62 AS sum(cd_dep_count)#72, cd_dep_employed_count#29, count(1)#59 AS cnt2#73, avg(cd_dep_employed_count#29)#63 AS avg(cd_dep_employed_count)#74, max(cd_dep_employed_count#29)#64 AS max(cd_dep_employed_count)#75, sum(cd_dep_employed_count#29)#65 AS sum(cd_dep_employed_count)#76, cd_dep_college_count#30, count(1)#59 AS cnt3#77, avg(cd_dep_college_count#30)#66 AS avg(cd_dep_college_count)#78, max(cd_dep_college_count#30)#67 AS max(cd_dep_college_count)#79, sum(cd_dep_college_count#30)#68 AS sum(cd_dep_college_count)#80] +Input [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, sum#38, count#39, max#40, sum#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49] +Keys [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Functions [10]: [count(1), avg(cd_dep_count#21), max(cd_dep_count#21), sum(cd_dep_count#21), avg(cd_dep_employed_count#22), max(cd_dep_employed_count#22), sum(cd_dep_employed_count#22), avg(cd_dep_college_count#23), max(cd_dep_college_count#23), sum(cd_dep_college_count#23)] +Aggregate Attributes [10]: [count(1)#50, avg(cd_dep_count#21)#51, max(cd_dep_count#21)#52, sum(cd_dep_count#21)#53, avg(cd_dep_employed_count#22)#54, max(cd_dep_employed_count#22)#55, sum(cd_dep_employed_count#22)#56, avg(cd_dep_college_count#23)#57, max(cd_dep_college_count#23)#58, sum(cd_dep_college_count#23)#59] +Results [18]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, count(1)#50 AS cnt1#60, avg(cd_dep_count#21)#51 AS avg(cd_dep_count)#61, max(cd_dep_count#21)#52 AS max(cd_dep_count)#62, sum(cd_dep_count#21)#53 AS sum(cd_dep_count)#63, cd_dep_employed_count#22, count(1)#50 AS cnt2#64, avg(cd_dep_employed_count#22)#54 AS avg(cd_dep_employed_count)#65, max(cd_dep_employed_count#22)#55 AS max(cd_dep_employed_count)#66, sum(cd_dep_employed_count#22)#56 AS sum(cd_dep_employed_count)#67, cd_dep_college_count#23, count(1)#50 AS cnt3#68, avg(cd_dep_college_count#23)#57 AS avg(cd_dep_college_count)#69, max(cd_dep_college_count#23)#58 AS max(cd_dep_college_count)#70, sum(cd_dep_college_count#23)#59 AS sum(cd_dep_college_count)#71] (53) TakeOrderedAndProject -Input [18]: [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cnt1#69, avg(cd_dep_count)#70, max(cd_dep_count)#71, sum(cd_dep_count)#72, cd_dep_employed_count#29, cnt2#73, avg(cd_dep_employed_count)#74, max(cd_dep_employed_count)#75, sum(cd_dep_employed_count)#76, cd_dep_college_count#30, cnt3#77, avg(cd_dep_college_count)#78, max(cd_dep_college_count)#79, sum(cd_dep_college_count)#80] -Arguments: 100, [ca_state#22 ASC NULLS FIRST, cd_gender#26 ASC NULLS FIRST, cd_marital_status#27 ASC NULLS FIRST, cd_dep_count#28 ASC NULLS FIRST, cd_dep_employed_count#29 ASC NULLS FIRST, cd_dep_college_count#30 ASC NULLS FIRST], [ca_state#22, cd_gender#26, cd_marital_status#27, cd_dep_count#28, cnt1#69, avg(cd_dep_count)#70, max(cd_dep_count)#71, sum(cd_dep_count)#72, cd_dep_employed_count#29, cnt2#73, avg(cd_dep_employed_count)#74, max(cd_dep_employed_count)#75, sum(cd_dep_employed_count)#76, cd_dep_college_count#30, cnt3#77, avg(cd_dep_college_count)#78, max(cd_dep_college_count)#79, sum(cd_dep_college_count)#80] +Input [18]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cnt1#60, avg(cd_dep_count)#61, max(cd_dep_count)#62, sum(cd_dep_count)#63, cd_dep_employed_count#22, cnt2#64, avg(cd_dep_employed_count)#65, max(cd_dep_employed_count)#66, sum(cd_dep_employed_count)#67, cd_dep_college_count#23, cnt3#68, avg(cd_dep_college_count)#69, max(cd_dep_college_count)#70, sum(cd_dep_college_count)#71] +Arguments: 100, [ca_state#17 ASC NULLS FIRST, cd_gender#19 ASC NULLS FIRST, cd_marital_status#20 ASC NULLS FIRST, cd_dep_count#21 ASC NULLS FIRST, cd_dep_employed_count#22 ASC NULLS FIRST, cd_dep_college_count#23 ASC NULLS FIRST], [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cnt1#60, avg(cd_dep_count)#61, max(cd_dep_count)#62, sum(cd_dep_count)#63, cd_dep_employed_count#22, cnt2#64, avg(cd_dep_employed_count)#65, max(cd_dep_employed_count)#66, sum(cd_dep_employed_count)#67, cd_dep_college_count#23, cnt3#68, avg(cd_dep_college_count)#69, max(cd_dep_college_count)#70, sum(cd_dep_college_count)#71] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#8 IN dynamicpruning#9 +Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8 BroadcastExchange (58) +- * Project (57) +- * Filter (56) @@ -300,29 +300,29 @@ BroadcastExchange (58) (54) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#81, d_qoy#82] +Output [3]: [d_date_sk#9, d_year#72, d_qoy#73] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)] ReadSchema: struct (55) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#81, d_qoy#82] +Input [3]: [d_date_sk#9, d_year#72, d_qoy#73] (56) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#81, d_qoy#82] -Condition : ((((isnotnull(d_year#81) AND isnotnull(d_qoy#82)) AND (d_year#81 = 2002)) AND (d_qoy#82 < 4)) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#9, d_year#72, d_qoy#73] +Condition : ((((isnotnull(d_year#72) AND isnotnull(d_qoy#73)) AND (d_year#72 = 2002)) AND (d_qoy#73 < 4)) AND isnotnull(d_date_sk#9)) (57) Project [codegen id : 1] -Output [1]: [d_date_sk#10] -Input [3]: [d_date_sk#10, d_year#81, d_qoy#82] +Output [1]: [d_date_sk#9] +Input [3]: [d_date_sk#9, d_year#72, d_qoy#73] (58) BroadcastExchange -Input [1]: [d_date_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#83] +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] -Subquery:2 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#13 IN dynamicpruning#9 +Subquery:2 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#11 IN dynamicpruning#8 -Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#17 IN dynamicpruning#9 +Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#14 IN dynamicpruning#8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/explain.txt index a7d47450807a7..4ca278bcffb2f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/explain.txt @@ -81,7 +81,7 @@ Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#9] (9) BroadcastExchange Input [1]: [ss_customer_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (10) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#3] @@ -89,65 +89,65 @@ Right keys [1]: [ss_customer_sk#6] Join condition: None (11) Scan parquet default.web_sales -Output [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Output [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#12), dynamicpruningexpression(ws_sold_date_sk#12 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(ws_sold_date_sk#11), dynamicpruningexpression(ws_sold_date_sk#11 IN dynamicpruning#8)] ReadSchema: struct (12) ColumnarToRow [codegen id : 4] -Input [2]: [ws_bill_customer_sk#11, ws_sold_date_sk#12] +Input [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] (13) ReusedExchange [Reuses operator id: 47] -Output [1]: [d_date_sk#13] +Output [1]: [d_date_sk#12] (14) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ws_sold_date_sk#12] -Right keys [1]: [d_date_sk#13] +Left keys [1]: [ws_sold_date_sk#11] +Right keys [1]: [d_date_sk#12] Join condition: None (15) Project [codegen id : 4] -Output [1]: [ws_bill_customer_sk#11] -Input [3]: [ws_bill_customer_sk#11, ws_sold_date_sk#12, d_date_sk#13] +Output [1]: [ws_bill_customer_sk#10] +Input [3]: [ws_bill_customer_sk#10, ws_sold_date_sk#11, d_date_sk#12] (16) BroadcastExchange -Input [1]: [ws_bill_customer_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Input [1]: [ws_bill_customer_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#3] -Right keys [1]: [ws_bill_customer_sk#11] +Right keys [1]: [ws_bill_customer_sk#10] Join condition: None (18) Scan parquet default.catalog_sales -Output [2]: [cs_ship_customer_sk#15, cs_sold_date_sk#16] +Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#16), dynamicpruningexpression(cs_sold_date_sk#16 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(cs_sold_date_sk#14), dynamicpruningexpression(cs_sold_date_sk#14 IN dynamicpruning#8)] ReadSchema: struct (19) ColumnarToRow [codegen id : 6] -Input [2]: [cs_ship_customer_sk#15, cs_sold_date_sk#16] +Input [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] (20) ReusedExchange [Reuses operator id: 47] -Output [1]: [d_date_sk#17] +Output [1]: [d_date_sk#15] (21) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_sold_date_sk#16] -Right keys [1]: [d_date_sk#17] +Left keys [1]: [cs_sold_date_sk#14] +Right keys [1]: [d_date_sk#15] Join condition: None (22) Project [codegen id : 6] -Output [1]: [cs_ship_customer_sk#15] -Input [3]: [cs_ship_customer_sk#15, cs_sold_date_sk#16, d_date_sk#17] +Output [1]: [cs_ship_customer_sk#13] +Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15] (23) BroadcastExchange -Input [1]: [cs_ship_customer_sk#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [1]: [cs_ship_customer_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#3] -Right keys [1]: [cs_ship_customer_sk#15] +Right keys [1]: [cs_ship_customer_sk#13] Join condition: None (25) Filter [codegen id : 9] @@ -159,80 +159,80 @@ Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5] Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1] (27) Scan parquet default.customer_address -Output [2]: [ca_address_sk#19, ca_state#20] +Output [2]: [ca_address_sk#16, ca_state#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 7] -Input [2]: [ca_address_sk#19, ca_state#20] +Input [2]: [ca_address_sk#16, ca_state#17] (29) Filter [codegen id : 7] -Input [2]: [ca_address_sk#19, ca_state#20] -Condition : isnotnull(ca_address_sk#19) +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : isnotnull(ca_address_sk#16) (30) BroadcastExchange -Input [2]: [ca_address_sk#19, ca_state#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#21] +Input [2]: [ca_address_sk#16, ca_state#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (31) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_current_addr_sk#5] -Right keys [1]: [ca_address_sk#19] +Right keys [1]: [ca_address_sk#16] Join condition: None (32) Project [codegen id : 9] -Output [2]: [c_current_cdemo_sk#4, ca_state#20] -Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#19, ca_state#20] +Output [2]: [c_current_cdemo_sk#4, ca_state#17] +Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#16, ca_state#17] (33) Scan parquet default.customer_demographics -Output [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Output [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (34) ColumnarToRow [codegen id : 8] -Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] (35) Filter [codegen id : 8] -Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Condition : isnotnull(cd_demo_sk#22) +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Condition : isnotnull(cd_demo_sk#18) (36) BroadcastExchange -Input [6]: [cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#28] +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (37) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_current_cdemo_sk#4] -Right keys [1]: [cd_demo_sk#22] +Right keys [1]: [cd_demo_sk#18] Join condition: None (38) Project [codegen id : 9] -Output [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Input [8]: [c_current_cdemo_sk#4, ca_state#20, cd_demo_sk#22, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] +Output [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Input [8]: [c_current_cdemo_sk#4, ca_state#17, cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] (39) HashAggregate [codegen id : 9] -Input [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Keys [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Functions [10]: [partial_count(1), partial_avg(cd_dep_count#25), partial_max(cd_dep_count#25), partial_sum(cd_dep_count#25), partial_avg(cd_dep_employed_count#26), partial_max(cd_dep_employed_count#26), partial_sum(cd_dep_employed_count#26), partial_avg(cd_dep_college_count#27), partial_max(cd_dep_college_count#27), partial_sum(cd_dep_college_count#27)] -Aggregate Attributes [13]: [count#29, sum#30, count#31, max#32, sum#33, sum#34, count#35, max#36, sum#37, sum#38, count#39, max#40, sum#41] -Results [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50, sum#51, count#52, max#53, sum#54] +Input [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Keys [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Functions [10]: [partial_count(1), partial_avg(cd_dep_count#21), partial_max(cd_dep_count#21), partial_sum(cd_dep_count#21), partial_avg(cd_dep_employed_count#22), partial_max(cd_dep_employed_count#22), partial_sum(cd_dep_employed_count#22), partial_avg(cd_dep_college_count#23), partial_max(cd_dep_college_count#23), partial_sum(cd_dep_college_count#23)] +Aggregate Attributes [13]: [count#24, sum#25, count#26, max#27, sum#28, sum#29, count#30, max#31, sum#32, sum#33, count#34, max#35, sum#36] +Results [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, sum#38, count#39, max#40, sum#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49] (40) Exchange -Input [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50, sum#51, count#52, max#53, sum#54] -Arguments: hashpartitioning(ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, 5), ENSURE_REQUIREMENTS, [id=#55] +Input [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, sum#38, count#39, max#40, sum#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49] +Arguments: hashpartitioning(ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, 5), ENSURE_REQUIREMENTS, [plan_id=6] (41) HashAggregate [codegen id : 10] -Input [19]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#42, sum#43, count#44, max#45, sum#46, sum#47, count#48, max#49, sum#50, sum#51, count#52, max#53, sum#54] -Keys [6]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27] -Functions [10]: [count(1), avg(cd_dep_count#25), max(cd_dep_count#25), sum(cd_dep_count#25), avg(cd_dep_employed_count#26), max(cd_dep_employed_count#26), sum(cd_dep_employed_count#26), avg(cd_dep_college_count#27), max(cd_dep_college_count#27), sum(cd_dep_college_count#27)] -Aggregate Attributes [10]: [count(1)#56, avg(cd_dep_count#25)#57, max(cd_dep_count#25)#58, sum(cd_dep_count#25)#59, avg(cd_dep_employed_count#26)#60, max(cd_dep_employed_count#26)#61, sum(cd_dep_employed_count#26)#62, avg(cd_dep_college_count#27)#63, max(cd_dep_college_count#27)#64, sum(cd_dep_college_count#27)#65] -Results [18]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, count(1)#56 AS cnt1#66, avg(cd_dep_count#25)#57 AS avg(cd_dep_count)#67, max(cd_dep_count#25)#58 AS max(cd_dep_count)#68, sum(cd_dep_count#25)#59 AS sum(cd_dep_count)#69, cd_dep_employed_count#26, count(1)#56 AS cnt2#70, avg(cd_dep_employed_count#26)#60 AS avg(cd_dep_employed_count)#71, max(cd_dep_employed_count#26)#61 AS max(cd_dep_employed_count)#72, sum(cd_dep_employed_count#26)#62 AS sum(cd_dep_employed_count)#73, cd_dep_college_count#27, count(1)#56 AS cnt3#74, avg(cd_dep_college_count#27)#63 AS avg(cd_dep_college_count)#75, max(cd_dep_college_count#27)#64 AS max(cd_dep_college_count)#76, sum(cd_dep_college_count#27)#65 AS sum(cd_dep_college_count)#77] +Input [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, sum#38, count#39, max#40, sum#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49] +Keys [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Functions [10]: [count(1), avg(cd_dep_count#21), max(cd_dep_count#21), sum(cd_dep_count#21), avg(cd_dep_employed_count#22), max(cd_dep_employed_count#22), sum(cd_dep_employed_count#22), avg(cd_dep_college_count#23), max(cd_dep_college_count#23), sum(cd_dep_college_count#23)] +Aggregate Attributes [10]: [count(1)#50, avg(cd_dep_count#21)#51, max(cd_dep_count#21)#52, sum(cd_dep_count#21)#53, avg(cd_dep_employed_count#22)#54, max(cd_dep_employed_count#22)#55, sum(cd_dep_employed_count#22)#56, avg(cd_dep_college_count#23)#57, max(cd_dep_college_count#23)#58, sum(cd_dep_college_count#23)#59] +Results [18]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, count(1)#50 AS cnt1#60, avg(cd_dep_count#21)#51 AS avg(cd_dep_count)#61, max(cd_dep_count#21)#52 AS max(cd_dep_count)#62, sum(cd_dep_count#21)#53 AS sum(cd_dep_count)#63, cd_dep_employed_count#22, count(1)#50 AS cnt2#64, avg(cd_dep_employed_count#22)#54 AS avg(cd_dep_employed_count)#65, max(cd_dep_employed_count#22)#55 AS max(cd_dep_employed_count)#66, sum(cd_dep_employed_count#22)#56 AS sum(cd_dep_employed_count)#67, cd_dep_college_count#23, count(1)#50 AS cnt3#68, avg(cd_dep_college_count#23)#57 AS avg(cd_dep_college_count)#69, max(cd_dep_college_count#23)#58 AS max(cd_dep_college_count)#70, sum(cd_dep_college_count#23)#59 AS sum(cd_dep_college_count)#71] (42) TakeOrderedAndProject -Input [18]: [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cnt1#66, avg(cd_dep_count)#67, max(cd_dep_count)#68, sum(cd_dep_count)#69, cd_dep_employed_count#26, cnt2#70, avg(cd_dep_employed_count)#71, max(cd_dep_employed_count)#72, sum(cd_dep_employed_count)#73, cd_dep_college_count#27, cnt3#74, avg(cd_dep_college_count)#75, max(cd_dep_college_count)#76, sum(cd_dep_college_count)#77] -Arguments: 100, [ca_state#20 ASC NULLS FIRST, cd_gender#23 ASC NULLS FIRST, cd_marital_status#24 ASC NULLS FIRST, cd_dep_count#25 ASC NULLS FIRST, cd_dep_employed_count#26 ASC NULLS FIRST, cd_dep_college_count#27 ASC NULLS FIRST], [ca_state#20, cd_gender#23, cd_marital_status#24, cd_dep_count#25, cnt1#66, avg(cd_dep_count)#67, max(cd_dep_count)#68, sum(cd_dep_count)#69, cd_dep_employed_count#26, cnt2#70, avg(cd_dep_employed_count)#71, max(cd_dep_employed_count)#72, sum(cd_dep_employed_count)#73, cd_dep_college_count#27, cnt3#74, avg(cd_dep_college_count)#75, max(cd_dep_college_count)#76, sum(cd_dep_college_count)#77] +Input [18]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cnt1#60, avg(cd_dep_count)#61, max(cd_dep_count)#62, sum(cd_dep_count)#63, cd_dep_employed_count#22, cnt2#64, avg(cd_dep_employed_count)#65, max(cd_dep_employed_count)#66, sum(cd_dep_employed_count)#67, cd_dep_college_count#23, cnt3#68, avg(cd_dep_college_count)#69, max(cd_dep_college_count)#70, sum(cd_dep_college_count)#71] +Arguments: 100, [ca_state#17 ASC NULLS FIRST, cd_gender#19 ASC NULLS FIRST, cd_marital_status#20 ASC NULLS FIRST, cd_dep_count#21 ASC NULLS FIRST, cd_dep_employed_count#22 ASC NULLS FIRST, cd_dep_college_count#23 ASC NULLS FIRST], [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cnt1#60, avg(cd_dep_count)#61, max(cd_dep_count)#62, sum(cd_dep_count)#63, cd_dep_employed_count#22, cnt2#64, avg(cd_dep_employed_count)#65, max(cd_dep_employed_count)#66, sum(cd_dep_employed_count)#67, cd_dep_college_count#23, cnt3#68, avg(cd_dep_college_count)#69, max(cd_dep_college_count)#70, sum(cd_dep_college_count)#71] ===== Subqueries ===== @@ -245,29 +245,29 @@ BroadcastExchange (47) (43) Scan parquet default.date_dim -Output [3]: [d_date_sk#9, d_year#78, d_qoy#79] +Output [3]: [d_date_sk#9, d_year#72, d_qoy#73] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,2002), LessThan(d_qoy,4), IsNotNull(d_date_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#9, d_year#78, d_qoy#79] +Input [3]: [d_date_sk#9, d_year#72, d_qoy#73] (45) Filter [codegen id : 1] -Input [3]: [d_date_sk#9, d_year#78, d_qoy#79] -Condition : ((((isnotnull(d_year#78) AND isnotnull(d_qoy#79)) AND (d_year#78 = 2002)) AND (d_qoy#79 < 4)) AND isnotnull(d_date_sk#9)) +Input [3]: [d_date_sk#9, d_year#72, d_qoy#73] +Condition : ((((isnotnull(d_year#72) AND isnotnull(d_qoy#73)) AND (d_year#72 = 2002)) AND (d_qoy#73 < 4)) AND isnotnull(d_date_sk#9)) (46) Project [codegen id : 1] Output [1]: [d_date_sk#9] -Input [3]: [d_date_sk#9, d_year#78, d_qoy#79] +Input [3]: [d_date_sk#9, d_year#72, d_qoy#73] (47) BroadcastExchange Input [1]: [d_date_sk#9] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#80] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] -Subquery:2 Hosting operator id = 11 Hosting Expression = ws_sold_date_sk#12 IN dynamicpruning#8 +Subquery:2 Hosting operator id = 11 Hosting Expression = ws_sold_date_sk#11 IN dynamicpruning#8 -Subquery:3 Hosting operator id = 18 Hosting Expression = cs_sold_date_sk#16 IN dynamicpruning#8 +Subquery:3 Hosting operator id = 18 Hosting Expression = cs_sold_date_sk#14 IN dynamicpruning#8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/explain.txt index f8d8946e2af92..06570d76957fd 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/explain.txt @@ -67,104 +67,104 @@ Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) (4) Exchange Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] -Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#4] +Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0 (6) Scan parquet default.store_sales -Output [2]: [ss_customer_sk#5, ss_sold_date_sk#6] +Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#6), dynamicpruningexpression(ss_sold_date_sk#6 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ss_sold_date_sk#5), dynamicpruningexpression(ss_sold_date_sk#5 IN dynamicpruning#6)] ReadSchema: struct (7) ColumnarToRow [codegen id : 4] -Input [2]: [ss_customer_sk#5, ss_sold_date_sk#6] +Input [2]: [ss_customer_sk#4, ss_sold_date_sk#5] (8) ReusedExchange [Reuses operator id: 55] -Output [1]: [d_date_sk#8] +Output [1]: [d_date_sk#7] (9) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ss_sold_date_sk#6] -Right keys [1]: [d_date_sk#8] +Left keys [1]: [ss_sold_date_sk#5] +Right keys [1]: [d_date_sk#7] Join condition: None (10) Project [codegen id : 4] -Output [1]: [ss_customer_sk#5] -Input [3]: [ss_customer_sk#5, ss_sold_date_sk#6, d_date_sk#8] +Output [1]: [ss_customer_sk#4] +Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#7] (11) Exchange -Input [1]: [ss_customer_sk#5] -Arguments: hashpartitioning(ss_customer_sk#5, 5), ENSURE_REQUIREMENTS, [id=#9] +Input [1]: [ss_customer_sk#4] +Arguments: hashpartitioning(ss_customer_sk#4, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) Sort [codegen id : 5] -Input [1]: [ss_customer_sk#5] -Arguments: [ss_customer_sk#5 ASC NULLS FIRST], false, 0 +Input [1]: [ss_customer_sk#4] +Arguments: [ss_customer_sk#4 ASC NULLS FIRST], false, 0 (13) SortMergeJoin [codegen id : 6] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [ss_customer_sk#5] +Right keys [1]: [ss_customer_sk#4] Join condition: None (14) Scan parquet default.web_sales -Output [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] +Output [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#11), dynamicpruningexpression(ws_sold_date_sk#11 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ws_sold_date_sk#9), dynamicpruningexpression(ws_sold_date_sk#9 IN dynamicpruning#6)] ReadSchema: struct (15) ColumnarToRow [codegen id : 8] -Input [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11] +Input [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9] (16) ReusedExchange [Reuses operator id: 55] -Output [1]: [d_date_sk#12] +Output [1]: [d_date_sk#10] (17) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [ws_sold_date_sk#11] -Right keys [1]: [d_date_sk#12] +Left keys [1]: [ws_sold_date_sk#9] +Right keys [1]: [d_date_sk#10] Join condition: None (18) Project [codegen id : 8] -Output [1]: [ws_bill_customer_sk#10 AS customsk#13] -Input [3]: [ws_bill_customer_sk#10, ws_sold_date_sk#11, d_date_sk#12] +Output [1]: [ws_bill_customer_sk#8 AS customsk#11] +Input [3]: [ws_bill_customer_sk#8, ws_sold_date_sk#9, d_date_sk#10] (19) Scan parquet default.catalog_sales -Output [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Output [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#15), dynamicpruningexpression(cs_sold_date_sk#15 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(cs_sold_date_sk#13), dynamicpruningexpression(cs_sold_date_sk#13 IN dynamicpruning#6)] ReadSchema: struct (20) ColumnarToRow [codegen id : 10] -Input [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15] +Input [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13] (21) ReusedExchange [Reuses operator id: 55] -Output [1]: [d_date_sk#16] +Output [1]: [d_date_sk#14] (22) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#15] -Right keys [1]: [d_date_sk#16] +Left keys [1]: [cs_sold_date_sk#13] +Right keys [1]: [d_date_sk#14] Join condition: None (23) Project [codegen id : 10] -Output [1]: [cs_ship_customer_sk#14 AS customsk#17] -Input [3]: [cs_ship_customer_sk#14, cs_sold_date_sk#15, d_date_sk#16] +Output [1]: [cs_ship_customer_sk#12 AS customsk#15] +Input [3]: [cs_ship_customer_sk#12, cs_sold_date_sk#13, d_date_sk#14] (24) Union (25) Exchange -Input [1]: [customsk#13] -Arguments: hashpartitioning(customsk#13, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [1]: [customsk#11] +Arguments: hashpartitioning(customsk#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (26) Sort [codegen id : 11] -Input [1]: [customsk#13] -Arguments: [customsk#13 ASC NULLS FIRST], false, 0 +Input [1]: [customsk#11] +Arguments: [customsk#11 ASC NULLS FIRST], false, 0 (27) SortMergeJoin [codegen id : 12] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [customsk#13] +Right keys [1]: [customsk#11] Join condition: None (28) Project [codegen id : 12] @@ -173,107 +173,107 @@ Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] (29) Exchange Input [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] -Arguments: hashpartitioning(c_current_addr_sk#3, 5), ENSURE_REQUIREMENTS, [id=#19] +Arguments: hashpartitioning(c_current_addr_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=4] (30) Sort [codegen id : 13] Input [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] Arguments: [c_current_addr_sk#3 ASC NULLS FIRST], false, 0 (31) Scan parquet default.customer_address -Output [2]: [ca_address_sk#20, ca_state#21] +Output [2]: [ca_address_sk#16, ca_state#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 14] -Input [2]: [ca_address_sk#20, ca_state#21] +Input [2]: [ca_address_sk#16, ca_state#17] (33) Filter [codegen id : 14] -Input [2]: [ca_address_sk#20, ca_state#21] -Condition : isnotnull(ca_address_sk#20) +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : isnotnull(ca_address_sk#16) (34) Exchange -Input [2]: [ca_address_sk#20, ca_state#21] -Arguments: hashpartitioning(ca_address_sk#20, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [2]: [ca_address_sk#16, ca_state#17] +Arguments: hashpartitioning(ca_address_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=5] (35) Sort [codegen id : 15] -Input [2]: [ca_address_sk#20, ca_state#21] -Arguments: [ca_address_sk#20 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#16, ca_state#17] +Arguments: [ca_address_sk#16 ASC NULLS FIRST], false, 0 (36) SortMergeJoin [codegen id : 16] Left keys [1]: [c_current_addr_sk#3] -Right keys [1]: [ca_address_sk#20] +Right keys [1]: [ca_address_sk#16] Join condition: None (37) Project [codegen id : 16] -Output [2]: [c_current_cdemo_sk#2, ca_state#21] -Input [4]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#20, ca_state#21] +Output [2]: [c_current_cdemo_sk#2, ca_state#17] +Input [4]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#16, ca_state#17] (38) Exchange -Input [2]: [c_current_cdemo_sk#2, ca_state#21] -Arguments: hashpartitioning(c_current_cdemo_sk#2, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [2]: [c_current_cdemo_sk#2, ca_state#17] +Arguments: hashpartitioning(c_current_cdemo_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=6] (39) Sort [codegen id : 17] -Input [2]: [c_current_cdemo_sk#2, ca_state#21] +Input [2]: [c_current_cdemo_sk#2, ca_state#17] Arguments: [c_current_cdemo_sk#2 ASC NULLS FIRST], false, 0 (40) Scan parquet default.customer_demographics -Output [6]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Output [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (41) ColumnarToRow [codegen id : 18] -Input [6]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] (42) Filter [codegen id : 18] -Input [6]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] -Condition : isnotnull(cd_demo_sk#24) +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Condition : isnotnull(cd_demo_sk#18) (43) Exchange -Input [6]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] -Arguments: hashpartitioning(cd_demo_sk#24, 5), ENSURE_REQUIREMENTS, [id=#30] +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Arguments: hashpartitioning(cd_demo_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=7] (44) Sort [codegen id : 19] -Input [6]: [cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] -Arguments: [cd_demo_sk#24 ASC NULLS FIRST], false, 0 +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Arguments: [cd_demo_sk#18 ASC NULLS FIRST], false, 0 (45) SortMergeJoin [codegen id : 20] Left keys [1]: [c_current_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#24] +Right keys [1]: [cd_demo_sk#18] Join condition: None (46) Project [codegen id : 20] -Output [6]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] -Input [8]: [c_current_cdemo_sk#2, ca_state#21, cd_demo_sk#24, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] +Output [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Input [8]: [c_current_cdemo_sk#2, ca_state#17, cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] (47) HashAggregate [codegen id : 20] -Input [6]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] -Keys [6]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] -Functions [10]: [partial_count(1), partial_avg(cd_dep_count#27), partial_max(cd_dep_count#27), partial_sum(cd_dep_count#27), partial_avg(cd_dep_employed_count#28), partial_max(cd_dep_employed_count#28), partial_sum(cd_dep_employed_count#28), partial_avg(cd_dep_college_count#29), partial_max(cd_dep_college_count#29), partial_sum(cd_dep_college_count#29)] -Aggregate Attributes [13]: [count#31, sum#32, count#33, max#34, sum#35, sum#36, count#37, max#38, sum#39, sum#40, count#41, max#42, sum#43] -Results [19]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, count#44, sum#45, count#46, max#47, sum#48, sum#49, count#50, max#51, sum#52, sum#53, count#54, max#55, sum#56] +Input [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Keys [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Functions [10]: [partial_count(1), partial_avg(cd_dep_count#21), partial_max(cd_dep_count#21), partial_sum(cd_dep_count#21), partial_avg(cd_dep_employed_count#22), partial_max(cd_dep_employed_count#22), partial_sum(cd_dep_employed_count#22), partial_avg(cd_dep_college_count#23), partial_max(cd_dep_college_count#23), partial_sum(cd_dep_college_count#23)] +Aggregate Attributes [13]: [count#24, sum#25, count#26, max#27, sum#28, sum#29, count#30, max#31, sum#32, sum#33, count#34, max#35, sum#36] +Results [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, sum#38, count#39, max#40, sum#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49] (48) Exchange -Input [19]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, count#44, sum#45, count#46, max#47, sum#48, sum#49, count#50, max#51, sum#52, sum#53, count#54, max#55, sum#56] -Arguments: hashpartitioning(ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, 5), ENSURE_REQUIREMENTS, [id=#57] +Input [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, sum#38, count#39, max#40, sum#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49] +Arguments: hashpartitioning(ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, 5), ENSURE_REQUIREMENTS, [plan_id=8] (49) HashAggregate [codegen id : 21] -Input [19]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, count#44, sum#45, count#46, max#47, sum#48, sum#49, count#50, max#51, sum#52, sum#53, count#54, max#55, sum#56] -Keys [6]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29] -Functions [10]: [count(1), avg(cd_dep_count#27), max(cd_dep_count#27), sum(cd_dep_count#27), avg(cd_dep_employed_count#28), max(cd_dep_employed_count#28), sum(cd_dep_employed_count#28), avg(cd_dep_college_count#29), max(cd_dep_college_count#29), sum(cd_dep_college_count#29)] -Aggregate Attributes [10]: [count(1)#58, avg(cd_dep_count#27)#59, max(cd_dep_count#27)#60, sum(cd_dep_count#27)#61, avg(cd_dep_employed_count#28)#62, max(cd_dep_employed_count#28)#63, sum(cd_dep_employed_count#28)#64, avg(cd_dep_college_count#29)#65, max(cd_dep_college_count#29)#66, sum(cd_dep_college_count#29)#67] -Results [18]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, count(1)#58 AS cnt1#68, avg(cd_dep_count#27)#59 AS avg(cd_dep_count)#69, max(cd_dep_count#27)#60 AS max(cd_dep_count)#70, sum(cd_dep_count#27)#61 AS sum(cd_dep_count)#71, cd_dep_employed_count#28, count(1)#58 AS cnt2#72, avg(cd_dep_employed_count#28)#62 AS avg(cd_dep_employed_count)#73, max(cd_dep_employed_count#28)#63 AS max(cd_dep_employed_count)#74, sum(cd_dep_employed_count#28)#64 AS sum(cd_dep_employed_count)#75, cd_dep_college_count#29, count(1)#58 AS cnt3#76, avg(cd_dep_college_count#29)#65 AS avg(cd_dep_college_count)#77, max(cd_dep_college_count#29)#66 AS max(cd_dep_college_count)#78, sum(cd_dep_college_count#29)#67 AS sum(cd_dep_college_count)#79] +Input [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, sum#38, count#39, max#40, sum#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49] +Keys [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Functions [10]: [count(1), avg(cd_dep_count#21), max(cd_dep_count#21), sum(cd_dep_count#21), avg(cd_dep_employed_count#22), max(cd_dep_employed_count#22), sum(cd_dep_employed_count#22), avg(cd_dep_college_count#23), max(cd_dep_college_count#23), sum(cd_dep_college_count#23)] +Aggregate Attributes [10]: [count(1)#50, avg(cd_dep_count#21)#51, max(cd_dep_count#21)#52, sum(cd_dep_count#21)#53, avg(cd_dep_employed_count#22)#54, max(cd_dep_employed_count#22)#55, sum(cd_dep_employed_count#22)#56, avg(cd_dep_college_count#23)#57, max(cd_dep_college_count#23)#58, sum(cd_dep_college_count#23)#59] +Results [18]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, count(1)#50 AS cnt1#60, avg(cd_dep_count#21)#51 AS avg(cd_dep_count)#61, max(cd_dep_count#21)#52 AS max(cd_dep_count)#62, sum(cd_dep_count#21)#53 AS sum(cd_dep_count)#63, cd_dep_employed_count#22, count(1)#50 AS cnt2#64, avg(cd_dep_employed_count#22)#54 AS avg(cd_dep_employed_count)#65, max(cd_dep_employed_count#22)#55 AS max(cd_dep_employed_count)#66, sum(cd_dep_employed_count#22)#56 AS sum(cd_dep_employed_count)#67, cd_dep_college_count#23, count(1)#50 AS cnt3#68, avg(cd_dep_college_count#23)#57 AS avg(cd_dep_college_count)#69, max(cd_dep_college_count#23)#58 AS max(cd_dep_college_count)#70, sum(cd_dep_college_count#23)#59 AS sum(cd_dep_college_count)#71] (50) TakeOrderedAndProject -Input [18]: [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cnt1#68, avg(cd_dep_count)#69, max(cd_dep_count)#70, sum(cd_dep_count)#71, cd_dep_employed_count#28, cnt2#72, avg(cd_dep_employed_count)#73, max(cd_dep_employed_count)#74, sum(cd_dep_employed_count)#75, cd_dep_college_count#29, cnt3#76, avg(cd_dep_college_count)#77, max(cd_dep_college_count)#78, sum(cd_dep_college_count)#79] -Arguments: 100, [ca_state#21 ASC NULLS FIRST, cd_gender#25 ASC NULLS FIRST, cd_marital_status#26 ASC NULLS FIRST, cd_dep_count#27 ASC NULLS FIRST, cd_dep_employed_count#28 ASC NULLS FIRST, cd_dep_college_count#29 ASC NULLS FIRST], [ca_state#21, cd_gender#25, cd_marital_status#26, cd_dep_count#27, cnt1#68, avg(cd_dep_count)#69, max(cd_dep_count)#70, sum(cd_dep_count)#71, cd_dep_employed_count#28, cnt2#72, avg(cd_dep_employed_count)#73, max(cd_dep_employed_count)#74, sum(cd_dep_employed_count)#75, cd_dep_college_count#29, cnt3#76, avg(cd_dep_college_count)#77, max(cd_dep_college_count)#78, sum(cd_dep_college_count)#79] +Input [18]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cnt1#60, avg(cd_dep_count)#61, max(cd_dep_count)#62, sum(cd_dep_count)#63, cd_dep_employed_count#22, cnt2#64, avg(cd_dep_employed_count)#65, max(cd_dep_employed_count)#66, sum(cd_dep_employed_count)#67, cd_dep_college_count#23, cnt3#68, avg(cd_dep_college_count)#69, max(cd_dep_college_count)#70, sum(cd_dep_college_count)#71] +Arguments: 100, [ca_state#17 ASC NULLS FIRST, cd_gender#19 ASC NULLS FIRST, cd_marital_status#20 ASC NULLS FIRST, cd_dep_count#21 ASC NULLS FIRST, cd_dep_employed_count#22 ASC NULLS FIRST, cd_dep_college_count#23 ASC NULLS FIRST], [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cnt1#60, avg(cd_dep_count)#61, max(cd_dep_count)#62, sum(cd_dep_count)#63, cd_dep_employed_count#22, cnt2#64, avg(cd_dep_employed_count)#65, max(cd_dep_employed_count)#66, sum(cd_dep_employed_count)#67, cd_dep_college_count#23, cnt3#68, avg(cd_dep_college_count)#69, max(cd_dep_college_count)#70, sum(cd_dep_college_count)#71] ===== Subqueries ===== -Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7 +Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#5 IN dynamicpruning#6 BroadcastExchange (55) +- * Project (54) +- * Filter (53) @@ -282,29 +282,29 @@ BroadcastExchange (55) (51) Scan parquet default.date_dim -Output [3]: [d_date_sk#8, d_year#80, d_qoy#81] +Output [3]: [d_date_sk#7, d_year#72, d_qoy#73] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,1999), LessThan(d_qoy,4), IsNotNull(d_date_sk)] ReadSchema: struct (52) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#80, d_qoy#81] +Input [3]: [d_date_sk#7, d_year#72, d_qoy#73] (53) Filter [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#80, d_qoy#81] -Condition : ((((isnotnull(d_year#80) AND isnotnull(d_qoy#81)) AND (d_year#80 = 1999)) AND (d_qoy#81 < 4)) AND isnotnull(d_date_sk#8)) +Input [3]: [d_date_sk#7, d_year#72, d_qoy#73] +Condition : ((((isnotnull(d_year#72) AND isnotnull(d_qoy#73)) AND (d_year#72 = 1999)) AND (d_qoy#73 < 4)) AND isnotnull(d_date_sk#7)) (54) Project [codegen id : 1] -Output [1]: [d_date_sk#8] -Input [3]: [d_date_sk#8, d_year#80, d_qoy#81] +Output [1]: [d_date_sk#7] +Input [3]: [d_date_sk#7, d_year#72, d_qoy#73] (55) BroadcastExchange -Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#82] +Input [1]: [d_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] -Subquery:2 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#11 IN dynamicpruning#7 +Subquery:2 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#9 IN dynamicpruning#6 -Subquery:3 Hosting operator id = 19 Hosting Expression = cs_sold_date_sk#15 IN dynamicpruning#7 +Subquery:3 Hosting operator id = 19 Hosting Expression = cs_sold_date_sk#13 IN dynamicpruning#6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/explain.txt index fe6d15a3fb15b..e7bd524f31530 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/explain.txt @@ -79,7 +79,7 @@ Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#7] (9) BroadcastExchange Input [1]: [ss_customer_sk#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (10) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#1] @@ -87,58 +87,58 @@ Right keys [1]: [ss_customer_sk#4] Join condition: None (11) Scan parquet default.web_sales -Output [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Output [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#10), dynamicpruningexpression(ws_sold_date_sk#10 IN dynamicpruning#6)] +PartitionFilters: [isnotnull(ws_sold_date_sk#9), dynamicpruningexpression(ws_sold_date_sk#9 IN dynamicpruning#6)] ReadSchema: struct (12) ColumnarToRow [codegen id : 4] -Input [2]: [ws_bill_customer_sk#9, ws_sold_date_sk#10] +Input [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9] (13) ReusedExchange [Reuses operator id: 45] -Output [1]: [d_date_sk#11] +Output [1]: [d_date_sk#10] (14) BroadcastHashJoin [codegen id : 4] -Left keys [1]: [ws_sold_date_sk#10] -Right keys [1]: [d_date_sk#11] +Left keys [1]: [ws_sold_date_sk#9] +Right keys [1]: [d_date_sk#10] Join condition: None (15) Project [codegen id : 4] -Output [1]: [ws_bill_customer_sk#9 AS customsk#12] -Input [3]: [ws_bill_customer_sk#9, ws_sold_date_sk#10, d_date_sk#11] +Output [1]: [ws_bill_customer_sk#8 AS customsk#11] +Input [3]: [ws_bill_customer_sk#8, ws_sold_date_sk#9, d_date_sk#10] (16) Scan parquet default.catalog_sales -Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Output [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#14), dynamicpruningexpression(cs_sold_date_sk#14 IN dynamicpruning#6)] +PartitionFilters: [isnotnull(cs_sold_date_sk#13), dynamicpruningexpression(cs_sold_date_sk#13 IN dynamicpruning#6)] ReadSchema: struct (17) ColumnarToRow [codegen id : 6] -Input [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14] +Input [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13] (18) ReusedExchange [Reuses operator id: 45] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (19) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [cs_sold_date_sk#14] -Right keys [1]: [d_date_sk#15] +Left keys [1]: [cs_sold_date_sk#13] +Right keys [1]: [d_date_sk#14] Join condition: None (20) Project [codegen id : 6] -Output [1]: [cs_ship_customer_sk#13 AS customsk#16] -Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15] +Output [1]: [cs_ship_customer_sk#12 AS customsk#15] +Input [3]: [cs_ship_customer_sk#12, cs_sold_date_sk#13, d_date_sk#14] (21) Union (22) BroadcastExchange -Input [1]: [customsk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#17] +Input [1]: [customsk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (23) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_customer_sk#1] -Right keys [1]: [customsk#12] +Right keys [1]: [customsk#11] Join condition: None (24) Project [codegen id : 9] @@ -146,80 +146,80 @@ Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3] Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3] (25) Scan parquet default.customer_address -Output [2]: [ca_address_sk#18, ca_state#19] +Output [2]: [ca_address_sk#16, ca_state#17] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 7] -Input [2]: [ca_address_sk#18, ca_state#19] +Input [2]: [ca_address_sk#16, ca_state#17] (27) Filter [codegen id : 7] -Input [2]: [ca_address_sk#18, ca_state#19] -Condition : isnotnull(ca_address_sk#18) +Input [2]: [ca_address_sk#16, ca_state#17] +Condition : isnotnull(ca_address_sk#16) (28) BroadcastExchange -Input [2]: [ca_address_sk#18, ca_state#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] +Input [2]: [ca_address_sk#16, ca_state#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (29) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_current_addr_sk#3] -Right keys [1]: [ca_address_sk#18] +Right keys [1]: [ca_address_sk#16] Join condition: None (30) Project [codegen id : 9] -Output [2]: [c_current_cdemo_sk#2, ca_state#19] -Input [4]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#18, ca_state#19] +Output [2]: [c_current_cdemo_sk#2, ca_state#17] +Input [4]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#16, ca_state#17] (31) Scan parquet default.customer_demographics -Output [6]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Output [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 8] -Input [6]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] (33) Filter [codegen id : 8] -Input [6]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] -Condition : isnotnull(cd_demo_sk#21) +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Condition : isnotnull(cd_demo_sk#18) (34) BroadcastExchange -Input [6]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27] +Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (35) BroadcastHashJoin [codegen id : 9] Left keys [1]: [c_current_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#21] +Right keys [1]: [cd_demo_sk#18] Join condition: None (36) Project [codegen id : 9] -Output [6]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] -Input [8]: [c_current_cdemo_sk#2, ca_state#19, cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] +Output [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Input [8]: [c_current_cdemo_sk#2, ca_state#17, cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] (37) HashAggregate [codegen id : 9] -Input [6]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] -Keys [6]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] -Functions [10]: [partial_count(1), partial_avg(cd_dep_count#24), partial_max(cd_dep_count#24), partial_sum(cd_dep_count#24), partial_avg(cd_dep_employed_count#25), partial_max(cd_dep_employed_count#25), partial_sum(cd_dep_employed_count#25), partial_avg(cd_dep_college_count#26), partial_max(cd_dep_college_count#26), partial_sum(cd_dep_college_count#26)] -Aggregate Attributes [13]: [count#28, sum#29, count#30, max#31, sum#32, sum#33, count#34, max#35, sum#36, sum#37, count#38, max#39, sum#40] -Results [19]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49, sum#50, count#51, max#52, sum#53] +Input [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Keys [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Functions [10]: [partial_count(1), partial_avg(cd_dep_count#21), partial_max(cd_dep_count#21), partial_sum(cd_dep_count#21), partial_avg(cd_dep_employed_count#22), partial_max(cd_dep_employed_count#22), partial_sum(cd_dep_employed_count#22), partial_avg(cd_dep_college_count#23), partial_max(cd_dep_college_count#23), partial_sum(cd_dep_college_count#23)] +Aggregate Attributes [13]: [count#24, sum#25, count#26, max#27, sum#28, sum#29, count#30, max#31, sum#32, sum#33, count#34, max#35, sum#36] +Results [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, sum#38, count#39, max#40, sum#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49] (38) Exchange -Input [19]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49, sum#50, count#51, max#52, sum#53] -Arguments: hashpartitioning(ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, 5), ENSURE_REQUIREMENTS, [id=#54] +Input [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, sum#38, count#39, max#40, sum#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49] +Arguments: hashpartitioning(ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, 5), ENSURE_REQUIREMENTS, [plan_id=5] (39) HashAggregate [codegen id : 10] -Input [19]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49, sum#50, count#51, max#52, sum#53] -Keys [6]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26] -Functions [10]: [count(1), avg(cd_dep_count#24), max(cd_dep_count#24), sum(cd_dep_count#24), avg(cd_dep_employed_count#25), max(cd_dep_employed_count#25), sum(cd_dep_employed_count#25), avg(cd_dep_college_count#26), max(cd_dep_college_count#26), sum(cd_dep_college_count#26)] -Aggregate Attributes [10]: [count(1)#55, avg(cd_dep_count#24)#56, max(cd_dep_count#24)#57, sum(cd_dep_count#24)#58, avg(cd_dep_employed_count#25)#59, max(cd_dep_employed_count#25)#60, sum(cd_dep_employed_count#25)#61, avg(cd_dep_college_count#26)#62, max(cd_dep_college_count#26)#63, sum(cd_dep_college_count#26)#64] -Results [18]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, count(1)#55 AS cnt1#65, avg(cd_dep_count#24)#56 AS avg(cd_dep_count)#66, max(cd_dep_count#24)#57 AS max(cd_dep_count)#67, sum(cd_dep_count#24)#58 AS sum(cd_dep_count)#68, cd_dep_employed_count#25, count(1)#55 AS cnt2#69, avg(cd_dep_employed_count#25)#59 AS avg(cd_dep_employed_count)#70, max(cd_dep_employed_count#25)#60 AS max(cd_dep_employed_count)#71, sum(cd_dep_employed_count#25)#61 AS sum(cd_dep_employed_count)#72, cd_dep_college_count#26, count(1)#55 AS cnt3#73, avg(cd_dep_college_count#26)#62 AS avg(cd_dep_college_count)#74, max(cd_dep_college_count#26)#63 AS max(cd_dep_college_count)#75, sum(cd_dep_college_count#26)#64 AS sum(cd_dep_college_count)#76] +Input [19]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23, count#37, sum#38, count#39, max#40, sum#41, sum#42, count#43, max#44, sum#45, sum#46, count#47, max#48, sum#49] +Keys [6]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23] +Functions [10]: [count(1), avg(cd_dep_count#21), max(cd_dep_count#21), sum(cd_dep_count#21), avg(cd_dep_employed_count#22), max(cd_dep_employed_count#22), sum(cd_dep_employed_count#22), avg(cd_dep_college_count#23), max(cd_dep_college_count#23), sum(cd_dep_college_count#23)] +Aggregate Attributes [10]: [count(1)#50, avg(cd_dep_count#21)#51, max(cd_dep_count#21)#52, sum(cd_dep_count#21)#53, avg(cd_dep_employed_count#22)#54, max(cd_dep_employed_count#22)#55, sum(cd_dep_employed_count#22)#56, avg(cd_dep_college_count#23)#57, max(cd_dep_college_count#23)#58, sum(cd_dep_college_count#23)#59] +Results [18]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, count(1)#50 AS cnt1#60, avg(cd_dep_count#21)#51 AS avg(cd_dep_count)#61, max(cd_dep_count#21)#52 AS max(cd_dep_count)#62, sum(cd_dep_count#21)#53 AS sum(cd_dep_count)#63, cd_dep_employed_count#22, count(1)#50 AS cnt2#64, avg(cd_dep_employed_count#22)#54 AS avg(cd_dep_employed_count)#65, max(cd_dep_employed_count#22)#55 AS max(cd_dep_employed_count)#66, sum(cd_dep_employed_count#22)#56 AS sum(cd_dep_employed_count)#67, cd_dep_college_count#23, count(1)#50 AS cnt3#68, avg(cd_dep_college_count#23)#57 AS avg(cd_dep_college_count)#69, max(cd_dep_college_count#23)#58 AS max(cd_dep_college_count)#70, sum(cd_dep_college_count#23)#59 AS sum(cd_dep_college_count)#71] (40) TakeOrderedAndProject -Input [18]: [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cnt1#65, avg(cd_dep_count)#66, max(cd_dep_count)#67, sum(cd_dep_count)#68, cd_dep_employed_count#25, cnt2#69, avg(cd_dep_employed_count)#70, max(cd_dep_employed_count)#71, sum(cd_dep_employed_count)#72, cd_dep_college_count#26, cnt3#73, avg(cd_dep_college_count)#74, max(cd_dep_college_count)#75, sum(cd_dep_college_count)#76] -Arguments: 100, [ca_state#19 ASC NULLS FIRST, cd_gender#22 ASC NULLS FIRST, cd_marital_status#23 ASC NULLS FIRST, cd_dep_count#24 ASC NULLS FIRST, cd_dep_employed_count#25 ASC NULLS FIRST, cd_dep_college_count#26 ASC NULLS FIRST], [ca_state#19, cd_gender#22, cd_marital_status#23, cd_dep_count#24, cnt1#65, avg(cd_dep_count)#66, max(cd_dep_count)#67, sum(cd_dep_count)#68, cd_dep_employed_count#25, cnt2#69, avg(cd_dep_employed_count)#70, max(cd_dep_employed_count)#71, sum(cd_dep_employed_count)#72, cd_dep_college_count#26, cnt3#73, avg(cd_dep_college_count)#74, max(cd_dep_college_count)#75, sum(cd_dep_college_count)#76] +Input [18]: [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cnt1#60, avg(cd_dep_count)#61, max(cd_dep_count)#62, sum(cd_dep_count)#63, cd_dep_employed_count#22, cnt2#64, avg(cd_dep_employed_count)#65, max(cd_dep_employed_count)#66, sum(cd_dep_employed_count)#67, cd_dep_college_count#23, cnt3#68, avg(cd_dep_college_count)#69, max(cd_dep_college_count)#70, sum(cd_dep_college_count)#71] +Arguments: 100, [ca_state#17 ASC NULLS FIRST, cd_gender#19 ASC NULLS FIRST, cd_marital_status#20 ASC NULLS FIRST, cd_dep_count#21 ASC NULLS FIRST, cd_dep_employed_count#22 ASC NULLS FIRST, cd_dep_college_count#23 ASC NULLS FIRST], [ca_state#17, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cnt1#60, avg(cd_dep_count)#61, max(cd_dep_count)#62, sum(cd_dep_count)#63, cd_dep_employed_count#22, cnt2#64, avg(cd_dep_employed_count)#65, max(cd_dep_employed_count)#66, sum(cd_dep_employed_count)#67, cd_dep_college_count#23, cnt3#68, avg(cd_dep_college_count)#69, max(cd_dep_college_count)#70, sum(cd_dep_college_count)#71] ===== Subqueries ===== @@ -232,29 +232,29 @@ BroadcastExchange (45) (41) Scan parquet default.date_dim -Output [3]: [d_date_sk#7, d_year#77, d_qoy#78] +Output [3]: [d_date_sk#7, d_year#72, d_qoy#73] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_qoy), EqualTo(d_year,1999), LessThan(d_qoy,4), IsNotNull(d_date_sk)] ReadSchema: struct (42) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#77, d_qoy#78] +Input [3]: [d_date_sk#7, d_year#72, d_qoy#73] (43) Filter [codegen id : 1] -Input [3]: [d_date_sk#7, d_year#77, d_qoy#78] -Condition : ((((isnotnull(d_year#77) AND isnotnull(d_qoy#78)) AND (d_year#77 = 1999)) AND (d_qoy#78 < 4)) AND isnotnull(d_date_sk#7)) +Input [3]: [d_date_sk#7, d_year#72, d_qoy#73] +Condition : ((((isnotnull(d_year#72) AND isnotnull(d_qoy#73)) AND (d_year#72 = 1999)) AND (d_qoy#73 < 4)) AND isnotnull(d_date_sk#7)) (44) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [3]: [d_date_sk#7, d_year#77, d_qoy#78] +Input [3]: [d_date_sk#7, d_year#72, d_qoy#73] (45) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#79] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] -Subquery:2 Hosting operator id = 11 Hosting Expression = ws_sold_date_sk#10 IN dynamicpruning#6 +Subquery:2 Hosting operator id = 11 Hosting Expression = ws_sold_date_sk#9 IN dynamicpruning#6 -Subquery:3 Hosting operator id = 16 Hosting Expression = cs_sold_date_sk#14 IN dynamicpruning#6 +Subquery:3 Hosting operator id = 16 Hosting Expression = cs_sold_date_sk#13 IN dynamicpruning#6 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt index 9224fbda95e47..15815ca9b9453 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt @@ -89,7 +89,7 @@ Input [2]: [s_store_sk#8, s_state#9] (11) BroadcastExchange Input [1]: [s_store_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#2] @@ -101,145 +101,145 @@ Output [3]: [ss_item_sk#1, ss_ext_sales_price#3, ss_net_profit#4] Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8] (14) Scan parquet default.item -Output [3]: [i_item_sk#11, i_class#12, i_category#13] +Output [3]: [i_item_sk#10, i_class#11, i_category#12] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 3] -Input [3]: [i_item_sk#11, i_class#12, i_category#13] +Input [3]: [i_item_sk#10, i_class#11, i_category#12] (16) Filter [codegen id : 3] -Input [3]: [i_item_sk#11, i_class#12, i_category#13] -Condition : isnotnull(i_item_sk#11) +Input [3]: [i_item_sk#10, i_class#11, i_category#12] +Condition : isnotnull(i_item_sk#10) (17) BroadcastExchange -Input [3]: [i_item_sk#11, i_class#12, i_category#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Input [3]: [i_item_sk#10, i_class#11, i_category#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#11] +Right keys [1]: [i_item_sk#10] Join condition: None (19) Project [codegen id : 4] -Output [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_class#12, i_category#13] -Input [6]: [ss_item_sk#1, ss_ext_sales_price#3, ss_net_profit#4, i_item_sk#11, i_class#12, i_category#13] +Output [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_class#11, i_category#12] +Input [6]: [ss_item_sk#1, ss_ext_sales_price#3, ss_net_profit#4, i_item_sk#10, i_class#11, i_category#12] (20) HashAggregate [codegen id : 4] -Input [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_class#12, i_category#13] -Keys [2]: [i_category#13, i_class#12] +Input [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_class#11, i_category#12] +Keys [2]: [i_category#12, i_class#11] Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#4)), partial_sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [2]: [sum#15, sum#16] -Results [4]: [i_category#13, i_class#12, sum#17, sum#18] +Aggregate Attributes [2]: [sum#13, sum#14] +Results [4]: [i_category#12, i_class#11, sum#15, sum#16] (21) Exchange -Input [4]: [i_category#13, i_class#12, sum#17, sum#18] -Arguments: hashpartitioning(i_category#13, i_class#12, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [4]: [i_category#12, i_class#11, sum#15, sum#16] +Arguments: hashpartitioning(i_category#12, i_class#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 5] -Input [4]: [i_category#13, i_class#12, sum#17, sum#18] -Keys [2]: [i_category#13, i_class#12] +Input [4]: [i_category#12, i_class#11, sum#15, sum#16] +Keys [2]: [i_category#12, i_class#11] Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#20, sum(UnscaledValue(ss_ext_sales_price#3))#21] -Results [6]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2))), DecimalType(37,20)) as decimal(38,20)) AS gross_margin#22, i_category#13, i_class#12, 0 AS t_category#23, 0 AS t_class#24, 0 AS lochierarchy#25] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#17, sum(UnscaledValue(ss_ext_sales_price#3))#18] +Results [6]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#17,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2))), DecimalType(37,20)) as decimal(38,20)) AS gross_margin#19, i_category#12, i_class#11, 0 AS t_category#20, 0 AS t_class#21, 0 AS lochierarchy#22] (23) ReusedExchange [Reuses operator id: 21] -Output [4]: [i_category#13, i_class#12, sum#26, sum#27] +Output [4]: [i_category#12, i_class#11, sum#23, sum#24] (24) HashAggregate [codegen id : 10] -Input [4]: [i_category#13, i_class#12, sum#26, sum#27] -Keys [2]: [i_category#13, i_class#12] +Input [4]: [i_category#12, i_class#11, sum#23, sum#24] +Keys [2]: [i_category#12, i_class#11] Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#28, sum(UnscaledValue(ss_ext_sales_price#3))#29] -Results [3]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#28,17,2) AS ss_net_profit#30, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#29,17,2) AS ss_ext_sales_price#31, i_category#13] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#25, sum(UnscaledValue(ss_ext_sales_price#3))#26] +Results [3]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#25,17,2) AS ss_net_profit#27, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#26,17,2) AS ss_ext_sales_price#28, i_category#12] (25) HashAggregate [codegen id : 10] -Input [3]: [ss_net_profit#30, ss_ext_sales_price#31, i_category#13] -Keys [1]: [i_category#13] -Functions [2]: [partial_sum(ss_net_profit#30), partial_sum(ss_ext_sales_price#31)] -Aggregate Attributes [4]: [sum#32, isEmpty#33, sum#34, isEmpty#35] -Results [5]: [i_category#13, sum#36, isEmpty#37, sum#38, isEmpty#39] +Input [3]: [ss_net_profit#27, ss_ext_sales_price#28, i_category#12] +Keys [1]: [i_category#12] +Functions [2]: [partial_sum(ss_net_profit#27), partial_sum(ss_ext_sales_price#28)] +Aggregate Attributes [4]: [sum#29, isEmpty#30, sum#31, isEmpty#32] +Results [5]: [i_category#12, sum#33, isEmpty#34, sum#35, isEmpty#36] (26) Exchange -Input [5]: [i_category#13, sum#36, isEmpty#37, sum#38, isEmpty#39] -Arguments: hashpartitioning(i_category#13, 5), ENSURE_REQUIREMENTS, [id=#40] +Input [5]: [i_category#12, sum#33, isEmpty#34, sum#35, isEmpty#36] +Arguments: hashpartitioning(i_category#12, 5), ENSURE_REQUIREMENTS, [plan_id=4] (27) HashAggregate [codegen id : 11] -Input [5]: [i_category#13, sum#36, isEmpty#37, sum#38, isEmpty#39] -Keys [1]: [i_category#13] -Functions [2]: [sum(ss_net_profit#30), sum(ss_ext_sales_price#31)] -Aggregate Attributes [2]: [sum(ss_net_profit#30)#41, sum(ss_ext_sales_price#31)#42] -Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#30)#41) / promote_precision(sum(ss_ext_sales_price#31)#42)), DecimalType(38,11)) as decimal(38,20)) AS gross_margin#43, i_category#13, null AS i_class#44, 0 AS t_category#45, 1 AS t_class#46, 1 AS lochierarchy#47] +Input [5]: [i_category#12, sum#33, isEmpty#34, sum#35, isEmpty#36] +Keys [1]: [i_category#12] +Functions [2]: [sum(ss_net_profit#27), sum(ss_ext_sales_price#28)] +Aggregate Attributes [2]: [sum(ss_net_profit#27)#37, sum(ss_ext_sales_price#28)#38] +Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#27)#37) / promote_precision(sum(ss_ext_sales_price#28)#38)), DecimalType(38,11)) as decimal(38,20)) AS gross_margin#39, i_category#12, null AS i_class#40, 0 AS t_category#41, 1 AS t_class#42, 1 AS lochierarchy#43] (28) ReusedExchange [Reuses operator id: 21] -Output [4]: [i_category#13, i_class#12, sum#48, sum#49] +Output [4]: [i_category#12, i_class#11, sum#44, sum#45] (29) HashAggregate [codegen id : 16] -Input [4]: [i_category#13, i_class#12, sum#48, sum#49] -Keys [2]: [i_category#13, i_class#12] +Input [4]: [i_category#12, i_class#11, sum#44, sum#45] +Keys [2]: [i_category#12, i_class#11] Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#28, sum(UnscaledValue(ss_ext_sales_price#3))#29] -Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#28,17,2) AS ss_net_profit#30, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#29,17,2) AS ss_ext_sales_price#31] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#25, sum(UnscaledValue(ss_ext_sales_price#3))#26] +Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#25,17,2) AS ss_net_profit#27, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#26,17,2) AS ss_ext_sales_price#28] (30) HashAggregate [codegen id : 16] -Input [2]: [ss_net_profit#30, ss_ext_sales_price#31] +Input [2]: [ss_net_profit#27, ss_ext_sales_price#28] Keys: [] -Functions [2]: [partial_sum(ss_net_profit#30), partial_sum(ss_ext_sales_price#31)] -Aggregate Attributes [4]: [sum#50, isEmpty#51, sum#52, isEmpty#53] -Results [4]: [sum#54, isEmpty#55, sum#56, isEmpty#57] +Functions [2]: [partial_sum(ss_net_profit#27), partial_sum(ss_ext_sales_price#28)] +Aggregate Attributes [4]: [sum#46, isEmpty#47, sum#48, isEmpty#49] +Results [4]: [sum#50, isEmpty#51, sum#52, isEmpty#53] (31) Exchange -Input [4]: [sum#54, isEmpty#55, sum#56, isEmpty#57] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#58] +Input [4]: [sum#50, isEmpty#51, sum#52, isEmpty#53] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] (32) HashAggregate [codegen id : 17] -Input [4]: [sum#54, isEmpty#55, sum#56, isEmpty#57] +Input [4]: [sum#50, isEmpty#51, sum#52, isEmpty#53] Keys: [] -Functions [2]: [sum(ss_net_profit#30), sum(ss_ext_sales_price#31)] -Aggregate Attributes [2]: [sum(ss_net_profit#30)#59, sum(ss_ext_sales_price#31)#60] -Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#30)#59) / promote_precision(sum(ss_ext_sales_price#31)#60)), DecimalType(38,11)) as decimal(38,20)) AS gross_margin#61, null AS i_category#62, null AS i_class#63, 1 AS t_category#64, 1 AS t_class#65, 2 AS lochierarchy#66] +Functions [2]: [sum(ss_net_profit#27), sum(ss_ext_sales_price#28)] +Aggregate Attributes [2]: [sum(ss_net_profit#27)#54, sum(ss_ext_sales_price#28)#55] +Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#27)#54) / promote_precision(sum(ss_ext_sales_price#28)#55)), DecimalType(38,11)) as decimal(38,20)) AS gross_margin#56, null AS i_category#57, null AS i_class#58, 1 AS t_category#59, 1 AS t_class#60, 2 AS lochierarchy#61] (33) Union (34) HashAggregate [codegen id : 18] -Input [6]: [gross_margin#22, i_category#13, i_class#12, t_category#23, t_class#24, lochierarchy#25] -Keys [6]: [gross_margin#22, i_category#13, i_class#12, t_category#23, t_class#24, lochierarchy#25] +Input [6]: [gross_margin#19, i_category#12, i_class#11, t_category#20, t_class#21, lochierarchy#22] +Keys [6]: [gross_margin#19, i_category#12, i_class#11, t_category#20, t_class#21, lochierarchy#22] Functions: [] Aggregate Attributes: [] -Results [6]: [gross_margin#22, i_category#13, i_class#12, t_category#23, t_class#24, lochierarchy#25] +Results [6]: [gross_margin#19, i_category#12, i_class#11, t_category#20, t_class#21, lochierarchy#22] (35) Exchange -Input [6]: [gross_margin#22, i_category#13, i_class#12, t_category#23, t_class#24, lochierarchy#25] -Arguments: hashpartitioning(gross_margin#22, i_category#13, i_class#12, t_category#23, t_class#24, lochierarchy#25, 5), ENSURE_REQUIREMENTS, [id=#67] +Input [6]: [gross_margin#19, i_category#12, i_class#11, t_category#20, t_class#21, lochierarchy#22] +Arguments: hashpartitioning(gross_margin#19, i_category#12, i_class#11, t_category#20, t_class#21, lochierarchy#22, 5), ENSURE_REQUIREMENTS, [plan_id=6] (36) HashAggregate [codegen id : 19] -Input [6]: [gross_margin#22, i_category#13, i_class#12, t_category#23, t_class#24, lochierarchy#25] -Keys [6]: [gross_margin#22, i_category#13, i_class#12, t_category#23, t_class#24, lochierarchy#25] +Input [6]: [gross_margin#19, i_category#12, i_class#11, t_category#20, t_class#21, lochierarchy#22] +Keys [6]: [gross_margin#19, i_category#12, i_class#11, t_category#20, t_class#21, lochierarchy#22] Functions: [] Aggregate Attributes: [] -Results [5]: [gross_margin#22, i_category#13, i_class#12, lochierarchy#25, CASE WHEN (t_class#24 = 0) THEN i_category#13 END AS _w0#68] +Results [5]: [gross_margin#19, i_category#12, i_class#11, lochierarchy#22, CASE WHEN (t_class#21 = 0) THEN i_category#12 END AS _w0#62] (37) Exchange -Input [5]: [gross_margin#22, i_category#13, i_class#12, lochierarchy#25, _w0#68] -Arguments: hashpartitioning(lochierarchy#25, _w0#68, 5), ENSURE_REQUIREMENTS, [id=#69] +Input [5]: [gross_margin#19, i_category#12, i_class#11, lochierarchy#22, _w0#62] +Arguments: hashpartitioning(lochierarchy#22, _w0#62, 5), ENSURE_REQUIREMENTS, [plan_id=7] (38) Sort [codegen id : 20] -Input [5]: [gross_margin#22, i_category#13, i_class#12, lochierarchy#25, _w0#68] -Arguments: [lochierarchy#25 ASC NULLS FIRST, _w0#68 ASC NULLS FIRST, gross_margin#22 ASC NULLS FIRST], false, 0 +Input [5]: [gross_margin#19, i_category#12, i_class#11, lochierarchy#22, _w0#62] +Arguments: [lochierarchy#22 ASC NULLS FIRST, _w0#62 ASC NULLS FIRST, gross_margin#19 ASC NULLS FIRST], false, 0 (39) Window -Input [5]: [gross_margin#22, i_category#13, i_class#12, lochierarchy#25, _w0#68] -Arguments: [rank(gross_margin#22) windowspecdefinition(lochierarchy#25, _w0#68, gross_margin#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#70], [lochierarchy#25, _w0#68], [gross_margin#22 ASC NULLS FIRST] +Input [5]: [gross_margin#19, i_category#12, i_class#11, lochierarchy#22, _w0#62] +Arguments: [rank(gross_margin#19) windowspecdefinition(lochierarchy#22, _w0#62, gross_margin#19 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#63], [lochierarchy#22, _w0#62], [gross_margin#19 ASC NULLS FIRST] (40) Project [codegen id : 21] -Output [5]: [gross_margin#22, i_category#13, i_class#12, lochierarchy#25, rank_within_parent#70] -Input [6]: [gross_margin#22, i_category#13, i_class#12, lochierarchy#25, _w0#68, rank_within_parent#70] +Output [5]: [gross_margin#19, i_category#12, i_class#11, lochierarchy#22, rank_within_parent#63] +Input [6]: [gross_margin#19, i_category#12, i_class#11, lochierarchy#22, _w0#62, rank_within_parent#63] (41) TakeOrderedAndProject -Input [5]: [gross_margin#22, i_category#13, i_class#12, lochierarchy#25, rank_within_parent#70] -Arguments: 100, [lochierarchy#25 DESC NULLS LAST, CASE WHEN (lochierarchy#25 = 0) THEN i_category#13 END ASC NULLS FIRST, rank_within_parent#70 ASC NULLS FIRST], [gross_margin#22, i_category#13, i_class#12, lochierarchy#25, rank_within_parent#70] +Input [5]: [gross_margin#19, i_category#12, i_class#11, lochierarchy#22, rank_within_parent#63] +Arguments: 100, [lochierarchy#22 DESC NULLS LAST, CASE WHEN (lochierarchy#22 = 0) THEN i_category#12 END ASC NULLS FIRST, rank_within_parent#63 ASC NULLS FIRST], [gross_margin#19, i_category#12, i_class#11, lochierarchy#22, rank_within_parent#63] ===== Subqueries ===== @@ -252,25 +252,25 @@ BroadcastExchange (46) (42) Scan parquet default.date_dim -Output [2]: [d_date_sk#7, d_year#71] +Output [2]: [d_date_sk#7, d_year#64] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (43) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#7, d_year#71] +Input [2]: [d_date_sk#7, d_year#64] (44) Filter [codegen id : 1] -Input [2]: [d_date_sk#7, d_year#71] -Condition : ((isnotnull(d_year#71) AND (d_year#71 = 2001)) AND isnotnull(d_date_sk#7)) +Input [2]: [d_date_sk#7, d_year#64] +Condition : ((isnotnull(d_year#64) AND (d_year#64 = 2001)) AND isnotnull(d_date_sk#7)) (45) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [2]: [d_date_sk#7, d_year#71] +Input [2]: [d_date_sk#7, d_year#64] (46) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#72] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt index f036e3e8fef42..65df229e9c1e9 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt @@ -85,7 +85,7 @@ Condition : isnotnull(i_item_sk#8) (10) BroadcastExchange Input [3]: [i_item_sk#8, i_class#9, i_category#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_item_sk#1] @@ -97,149 +97,149 @@ Output [5]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_ Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_item_sk#8, i_class#9, i_category#10] (13) Scan parquet default.store -Output [2]: [s_store_sk#12, s_state#13] +Output [2]: [s_store_sk#11, s_state#12] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_state), EqualTo(s_state,TN), IsNotNull(s_store_sk)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [2]: [s_store_sk#12, s_state#13] +Input [2]: [s_store_sk#11, s_state#12] (15) Filter [codegen id : 3] -Input [2]: [s_store_sk#12, s_state#13] -Condition : ((isnotnull(s_state#13) AND (s_state#13 = TN)) AND isnotnull(s_store_sk#12)) +Input [2]: [s_store_sk#11, s_state#12] +Condition : ((isnotnull(s_state#12) AND (s_state#12 = TN)) AND isnotnull(s_store_sk#11)) (16) Project [codegen id : 3] -Output [1]: [s_store_sk#12] -Input [2]: [s_store_sk#12, s_state#13] +Output [1]: [s_store_sk#11] +Input [2]: [s_store_sk#11, s_state#12] (17) BroadcastExchange -Input [1]: [s_store_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#14] +Input [1]: [s_store_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#2] -Right keys [1]: [s_store_sk#12] +Right keys [1]: [s_store_sk#11] Join condition: None (19) Project [codegen id : 4] Output [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] -Input [6]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10, s_store_sk#12] +Input [6]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10, s_store_sk#11] (20) HashAggregate [codegen id : 4] Input [4]: [ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10] Keys [2]: [i_category#10, i_class#9] Functions [2]: [partial_sum(UnscaledValue(ss_net_profit#4)), partial_sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [2]: [sum#15, sum#16] -Results [4]: [i_category#10, i_class#9, sum#17, sum#18] +Aggregate Attributes [2]: [sum#13, sum#14] +Results [4]: [i_category#10, i_class#9, sum#15, sum#16] (21) Exchange -Input [4]: [i_category#10, i_class#9, sum#17, sum#18] -Arguments: hashpartitioning(i_category#10, i_class#9, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [4]: [i_category#10, i_class#9, sum#15, sum#16] +Arguments: hashpartitioning(i_category#10, i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (22) HashAggregate [codegen id : 5] -Input [4]: [i_category#10, i_class#9, sum#17, sum#18] +Input [4]: [i_category#10, i_class#9, sum#15, sum#16] Keys [2]: [i_category#10, i_class#9] Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#20, sum(UnscaledValue(ss_ext_sales_price#3))#21] -Results [6]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2))), DecimalType(37,20)) as decimal(38,20)) AS gross_margin#22, i_category#10, i_class#9, 0 AS t_category#23, 0 AS t_class#24, 0 AS lochierarchy#25] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#17, sum(UnscaledValue(ss_ext_sales_price#3))#18] +Results [6]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#17,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2))), DecimalType(37,20)) as decimal(38,20)) AS gross_margin#19, i_category#10, i_class#9, 0 AS t_category#20, 0 AS t_class#21, 0 AS lochierarchy#22] (23) ReusedExchange [Reuses operator id: 21] -Output [4]: [i_category#10, i_class#9, sum#26, sum#27] +Output [4]: [i_category#10, i_class#9, sum#23, sum#24] (24) HashAggregate [codegen id : 10] -Input [4]: [i_category#10, i_class#9, sum#26, sum#27] +Input [4]: [i_category#10, i_class#9, sum#23, sum#24] Keys [2]: [i_category#10, i_class#9] Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#28, sum(UnscaledValue(ss_ext_sales_price#3))#29] -Results [3]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#28,17,2) AS ss_net_profit#30, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#29,17,2) AS ss_ext_sales_price#31, i_category#10] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#25, sum(UnscaledValue(ss_ext_sales_price#3))#26] +Results [3]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#25,17,2) AS ss_net_profit#27, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#26,17,2) AS ss_ext_sales_price#28, i_category#10] (25) HashAggregate [codegen id : 10] -Input [3]: [ss_net_profit#30, ss_ext_sales_price#31, i_category#10] +Input [3]: [ss_net_profit#27, ss_ext_sales_price#28, i_category#10] Keys [1]: [i_category#10] -Functions [2]: [partial_sum(ss_net_profit#30), partial_sum(ss_ext_sales_price#31)] -Aggregate Attributes [4]: [sum#32, isEmpty#33, sum#34, isEmpty#35] -Results [5]: [i_category#10, sum#36, isEmpty#37, sum#38, isEmpty#39] +Functions [2]: [partial_sum(ss_net_profit#27), partial_sum(ss_ext_sales_price#28)] +Aggregate Attributes [4]: [sum#29, isEmpty#30, sum#31, isEmpty#32] +Results [5]: [i_category#10, sum#33, isEmpty#34, sum#35, isEmpty#36] (26) Exchange -Input [5]: [i_category#10, sum#36, isEmpty#37, sum#38, isEmpty#39] -Arguments: hashpartitioning(i_category#10, 5), ENSURE_REQUIREMENTS, [id=#40] +Input [5]: [i_category#10, sum#33, isEmpty#34, sum#35, isEmpty#36] +Arguments: hashpartitioning(i_category#10, 5), ENSURE_REQUIREMENTS, [plan_id=4] (27) HashAggregate [codegen id : 11] -Input [5]: [i_category#10, sum#36, isEmpty#37, sum#38, isEmpty#39] +Input [5]: [i_category#10, sum#33, isEmpty#34, sum#35, isEmpty#36] Keys [1]: [i_category#10] -Functions [2]: [sum(ss_net_profit#30), sum(ss_ext_sales_price#31)] -Aggregate Attributes [2]: [sum(ss_net_profit#30)#41, sum(ss_ext_sales_price#31)#42] -Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#30)#41) / promote_precision(sum(ss_ext_sales_price#31)#42)), DecimalType(38,11)) as decimal(38,20)) AS gross_margin#43, i_category#10, null AS i_class#44, 0 AS t_category#45, 1 AS t_class#46, 1 AS lochierarchy#47] +Functions [2]: [sum(ss_net_profit#27), sum(ss_ext_sales_price#28)] +Aggregate Attributes [2]: [sum(ss_net_profit#27)#37, sum(ss_ext_sales_price#28)#38] +Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#27)#37) / promote_precision(sum(ss_ext_sales_price#28)#38)), DecimalType(38,11)) as decimal(38,20)) AS gross_margin#39, i_category#10, null AS i_class#40, 0 AS t_category#41, 1 AS t_class#42, 1 AS lochierarchy#43] (28) ReusedExchange [Reuses operator id: 21] -Output [4]: [i_category#10, i_class#9, sum#48, sum#49] +Output [4]: [i_category#10, i_class#9, sum#44, sum#45] (29) HashAggregate [codegen id : 16] -Input [4]: [i_category#10, i_class#9, sum#48, sum#49] +Input [4]: [i_category#10, i_class#9, sum#44, sum#45] Keys [2]: [i_category#10, i_class#9] Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#28, sum(UnscaledValue(ss_ext_sales_price#3))#29] -Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#28,17,2) AS ss_net_profit#30, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#29,17,2) AS ss_ext_sales_price#31] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#25, sum(UnscaledValue(ss_ext_sales_price#3))#26] +Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#25,17,2) AS ss_net_profit#27, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#26,17,2) AS ss_ext_sales_price#28] (30) HashAggregate [codegen id : 16] -Input [2]: [ss_net_profit#30, ss_ext_sales_price#31] +Input [2]: [ss_net_profit#27, ss_ext_sales_price#28] Keys: [] -Functions [2]: [partial_sum(ss_net_profit#30), partial_sum(ss_ext_sales_price#31)] -Aggregate Attributes [4]: [sum#50, isEmpty#51, sum#52, isEmpty#53] -Results [4]: [sum#54, isEmpty#55, sum#56, isEmpty#57] +Functions [2]: [partial_sum(ss_net_profit#27), partial_sum(ss_ext_sales_price#28)] +Aggregate Attributes [4]: [sum#46, isEmpty#47, sum#48, isEmpty#49] +Results [4]: [sum#50, isEmpty#51, sum#52, isEmpty#53] (31) Exchange -Input [4]: [sum#54, isEmpty#55, sum#56, isEmpty#57] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#58] +Input [4]: [sum#50, isEmpty#51, sum#52, isEmpty#53] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] (32) HashAggregate [codegen id : 17] -Input [4]: [sum#54, isEmpty#55, sum#56, isEmpty#57] +Input [4]: [sum#50, isEmpty#51, sum#52, isEmpty#53] Keys: [] -Functions [2]: [sum(ss_net_profit#30), sum(ss_ext_sales_price#31)] -Aggregate Attributes [2]: [sum(ss_net_profit#30)#59, sum(ss_ext_sales_price#31)#60] -Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#30)#59) / promote_precision(sum(ss_ext_sales_price#31)#60)), DecimalType(38,11)) as decimal(38,20)) AS gross_margin#61, null AS i_category#62, null AS i_class#63, 1 AS t_category#64, 1 AS t_class#65, 2 AS lochierarchy#66] +Functions [2]: [sum(ss_net_profit#27), sum(ss_ext_sales_price#28)] +Aggregate Attributes [2]: [sum(ss_net_profit#27)#54, sum(ss_ext_sales_price#28)#55] +Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#27)#54) / promote_precision(sum(ss_ext_sales_price#28)#55)), DecimalType(38,11)) as decimal(38,20)) AS gross_margin#56, null AS i_category#57, null AS i_class#58, 1 AS t_category#59, 1 AS t_class#60, 2 AS lochierarchy#61] (33) Union (34) HashAggregate [codegen id : 18] -Input [6]: [gross_margin#22, i_category#10, i_class#9, t_category#23, t_class#24, lochierarchy#25] -Keys [6]: [gross_margin#22, i_category#10, i_class#9, t_category#23, t_class#24, lochierarchy#25] +Input [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] +Keys [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] Functions: [] Aggregate Attributes: [] -Results [6]: [gross_margin#22, i_category#10, i_class#9, t_category#23, t_class#24, lochierarchy#25] +Results [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] (35) Exchange -Input [6]: [gross_margin#22, i_category#10, i_class#9, t_category#23, t_class#24, lochierarchy#25] -Arguments: hashpartitioning(gross_margin#22, i_category#10, i_class#9, t_category#23, t_class#24, lochierarchy#25, 5), ENSURE_REQUIREMENTS, [id=#67] +Input [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] +Arguments: hashpartitioning(gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22, 5), ENSURE_REQUIREMENTS, [plan_id=6] (36) HashAggregate [codegen id : 19] -Input [6]: [gross_margin#22, i_category#10, i_class#9, t_category#23, t_class#24, lochierarchy#25] -Keys [6]: [gross_margin#22, i_category#10, i_class#9, t_category#23, t_class#24, lochierarchy#25] +Input [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] +Keys [6]: [gross_margin#19, i_category#10, i_class#9, t_category#20, t_class#21, lochierarchy#22] Functions: [] Aggregate Attributes: [] -Results [5]: [gross_margin#22, i_category#10, i_class#9, lochierarchy#25, CASE WHEN (t_class#24 = 0) THEN i_category#10 END AS _w0#68] +Results [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, CASE WHEN (t_class#21 = 0) THEN i_category#10 END AS _w0#62] (37) Exchange -Input [5]: [gross_margin#22, i_category#10, i_class#9, lochierarchy#25, _w0#68] -Arguments: hashpartitioning(lochierarchy#25, _w0#68, 5), ENSURE_REQUIREMENTS, [id=#69] +Input [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, _w0#62] +Arguments: hashpartitioning(lochierarchy#22, _w0#62, 5), ENSURE_REQUIREMENTS, [plan_id=7] (38) Sort [codegen id : 20] -Input [5]: [gross_margin#22, i_category#10, i_class#9, lochierarchy#25, _w0#68] -Arguments: [lochierarchy#25 ASC NULLS FIRST, _w0#68 ASC NULLS FIRST, gross_margin#22 ASC NULLS FIRST], false, 0 +Input [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, _w0#62] +Arguments: [lochierarchy#22 ASC NULLS FIRST, _w0#62 ASC NULLS FIRST, gross_margin#19 ASC NULLS FIRST], false, 0 (39) Window -Input [5]: [gross_margin#22, i_category#10, i_class#9, lochierarchy#25, _w0#68] -Arguments: [rank(gross_margin#22) windowspecdefinition(lochierarchy#25, _w0#68, gross_margin#22 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#70], [lochierarchy#25, _w0#68], [gross_margin#22 ASC NULLS FIRST] +Input [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, _w0#62] +Arguments: [rank(gross_margin#19) windowspecdefinition(lochierarchy#22, _w0#62, gross_margin#19 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#63], [lochierarchy#22, _w0#62], [gross_margin#19 ASC NULLS FIRST] (40) Project [codegen id : 21] -Output [5]: [gross_margin#22, i_category#10, i_class#9, lochierarchy#25, rank_within_parent#70] -Input [6]: [gross_margin#22, i_category#10, i_class#9, lochierarchy#25, _w0#68, rank_within_parent#70] +Output [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, rank_within_parent#63] +Input [6]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, _w0#62, rank_within_parent#63] (41) TakeOrderedAndProject -Input [5]: [gross_margin#22, i_category#10, i_class#9, lochierarchy#25, rank_within_parent#70] -Arguments: 100, [lochierarchy#25 DESC NULLS LAST, CASE WHEN (lochierarchy#25 = 0) THEN i_category#10 END ASC NULLS FIRST, rank_within_parent#70 ASC NULLS FIRST], [gross_margin#22, i_category#10, i_class#9, lochierarchy#25, rank_within_parent#70] +Input [5]: [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, rank_within_parent#63] +Arguments: 100, [lochierarchy#22 DESC NULLS LAST, CASE WHEN (lochierarchy#22 = 0) THEN i_category#10 END ASC NULLS FIRST, rank_within_parent#63 ASC NULLS FIRST], [gross_margin#19, i_category#10, i_class#9, lochierarchy#22, rank_within_parent#63] ===== Subqueries ===== @@ -252,25 +252,25 @@ BroadcastExchange (46) (42) Scan parquet default.date_dim -Output [2]: [d_date_sk#7, d_year#71] +Output [2]: [d_date_sk#7, d_year#64] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (43) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#7, d_year#71] +Input [2]: [d_date_sk#7, d_year#64] (44) Filter [codegen id : 1] -Input [2]: [d_date_sk#7, d_year#71] -Condition : ((isnotnull(d_year#71) AND (d_year#71 = 2001)) AND isnotnull(d_date_sk#7)) +Input [2]: [d_date_sk#7, d_year#64] +Condition : ((isnotnull(d_year#64) AND (d_year#64 = 2001)) AND isnotnull(d_date_sk#7)) (45) Project [codegen id : 1] Output [1]: [d_date_sk#7] -Input [2]: [d_date_sk#7, d_year#71] +Input [2]: [d_date_sk#7, d_year#64] (46) BroadcastExchange Input [1]: [d_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#72] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt index d2a5ecef9c900..6d94222679ec3 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt @@ -96,7 +96,7 @@ Condition : ((isnotnull(s_store_sk#9) AND isnotnull(s_store_name#10)) AND isnotn (10) BroadcastExchange Input [3]: [s_store_sk#9, s_store_name#10, s_company_name#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_store_sk#2] @@ -109,175 +109,175 @@ Input [8]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, d_year#7, d_moy#8, s_ (13) Exchange Input [6]: [ss_item_sk#1, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#13] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (14) Sort [codegen id : 4] Input [6]: [ss_item_sk#1, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (15) Scan parquet default.item -Output [3]: [i_item_sk#14, i_brand#15, i_category#16] +Output [3]: [i_item_sk#12, i_brand#13, i_category#14] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] ReadSchema: struct (16) ColumnarToRow [codegen id : 5] -Input [3]: [i_item_sk#14, i_brand#15, i_category#16] +Input [3]: [i_item_sk#12, i_brand#13, i_category#14] (17) Filter [codegen id : 5] -Input [3]: [i_item_sk#14, i_brand#15, i_category#16] -Condition : ((isnotnull(i_item_sk#14) AND isnotnull(i_category#16)) AND isnotnull(i_brand#15)) +Input [3]: [i_item_sk#12, i_brand#13, i_category#14] +Condition : ((isnotnull(i_item_sk#12) AND isnotnull(i_category#14)) AND isnotnull(i_brand#13)) (18) Exchange -Input [3]: [i_item_sk#14, i_brand#15, i_category#16] -Arguments: hashpartitioning(i_item_sk#14, 5), ENSURE_REQUIREMENTS, [id=#17] +Input [3]: [i_item_sk#12, i_brand#13, i_category#14] +Arguments: hashpartitioning(i_item_sk#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) Sort [codegen id : 6] -Input [3]: [i_item_sk#14, i_brand#15, i_category#16] -Arguments: [i_item_sk#14 ASC NULLS FIRST], false, 0 +Input [3]: [i_item_sk#12, i_brand#13, i_category#14] +Arguments: [i_item_sk#12 ASC NULLS FIRST], false, 0 (20) SortMergeJoin [codegen id : 7] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#14] +Right keys [1]: [i_item_sk#12] Join condition: None (21) Project [codegen id : 7] -Output [7]: [i_brand#15, i_category#16, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11] -Input [9]: [ss_item_sk#1, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11, i_item_sk#14, i_brand#15, i_category#16] +Output [7]: [i_brand#13, i_category#14, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11] +Input [9]: [ss_item_sk#1, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11, i_item_sk#12, i_brand#13, i_category#14] (22) HashAggregate [codegen id : 7] -Input [7]: [i_brand#15, i_category#16, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11] -Keys [6]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8] +Input [7]: [i_brand#13, i_category#14, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11] +Keys [6]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8] Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#3))] -Aggregate Attributes [1]: [sum#18] -Results [7]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum#19] +Aggregate Attributes [1]: [sum#15] +Results [7]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum#16] (23) Exchange -Input [7]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum#19] -Arguments: hashpartitioning(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [7]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum#16] +Arguments: hashpartitioning(i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) HashAggregate [codegen id : 8] -Input [7]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum#19] -Keys [6]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8] +Input [7]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum#16] +Keys [6]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8] Functions [1]: [sum(UnscaledValue(ss_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#3))#21] -Results [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, MakeDecimal(sum(UnscaledValue(ss_sales_price#3))#21,17,2) AS sum_sales#22, MakeDecimal(sum(UnscaledValue(ss_sales_price#3))#21,17,2) AS _w0#23] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#3))#17] +Results [8]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, MakeDecimal(sum(UnscaledValue(ss_sales_price#3))#17,17,2) AS sum_sales#18, MakeDecimal(sum(UnscaledValue(ss_sales_price#3))#17,17,2) AS _w0#19] (25) Exchange -Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23] -Arguments: hashpartitioning(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [8]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19] +Arguments: hashpartitioning(i_category#14, i_brand#13, s_store_name#10, s_company_name#11, 5), ENSURE_REQUIREMENTS, [plan_id=5] (26) Sort [codegen id : 9] -Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23] -Arguments: [i_category#16 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, s_company_name#11 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 +Input [8]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19] +Arguments: [i_category#14 ASC NULLS FIRST, i_brand#13 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, s_company_name#11 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 (27) Window -Input [8]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23] -Arguments: [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#25], [i_category#16, i_brand#15, s_store_name#10, s_company_name#11], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] +Input [8]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19] +Arguments: [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#20], [i_category#14, i_brand#13, s_store_name#10, s_company_name#11], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] (28) Filter [codegen id : 10] -Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23, rn#25] +Input [9]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19, rn#20] Condition : (isnotnull(d_year#7) AND (d_year#7 = 1999)) (29) Window -Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23, rn#25] -Arguments: [avg(_w0#23) windowspecdefinition(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#26], [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7] +Input [9]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19, rn#20] +Arguments: [avg(_w0#19) windowspecdefinition(i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#21], [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7] (30) Filter [codegen id : 11] -Input [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23, rn#25, avg_monthly_sales#26] -Condition : ((isnotnull(avg_monthly_sales#26) AND (avg_monthly_sales#26 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) +Input [10]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19, rn#20, avg_monthly_sales#21] +Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) (31) Project [codegen id : 11] -Output [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25] -Input [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, _w0#23, rn#25, avg_monthly_sales#26] +Output [9]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, avg_monthly_sales#21, rn#20] +Input [10]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19, rn#20, avg_monthly_sales#21] (32) Exchange -Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25] -Arguments: hashpartitioning(i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#25, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [9]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, avg_monthly_sales#21, rn#20] +Arguments: hashpartitioning(i_category#14, i_brand#13, s_store_name#10, s_company_name#11, rn#20, 5), ENSURE_REQUIREMENTS, [plan_id=6] (33) Sort [codegen id : 12] -Input [9]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25] -Arguments: [i_category#16 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, s_company_name#11 ASC NULLS FIRST, rn#25 ASC NULLS FIRST], false, 0 +Input [9]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, avg_monthly_sales#21, rn#20] +Arguments: [i_category#14 ASC NULLS FIRST, i_brand#13 ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST, s_company_name#11 ASC NULLS FIRST, rn#20 ASC NULLS FIRST], false, 0 (34) ReusedExchange [Reuses operator id: 23] -Output [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum#34] +Output [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum#28] (35) HashAggregate [codegen id : 20] -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum#34] -Keys [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33] -Functions [1]: [sum(UnscaledValue(ss_sales_price#35))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#35))#21] -Results [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, MakeDecimal(sum(UnscaledValue(ss_sales_price#35))#21,17,2) AS sum_sales#22] +Input [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum#28] +Keys [6]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27] +Functions [1]: [sum(UnscaledValue(ss_sales_price#29))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#29))#17] +Results [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, MakeDecimal(sum(UnscaledValue(ss_sales_price#29))#17,17,2) AS sum_sales#18] (36) Exchange -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#22] -Arguments: hashpartitioning(i_category#28, i_brand#29, s_store_name#30, s_company_name#31, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum_sales#18] +Arguments: hashpartitioning(i_category#22, i_brand#23, s_store_name#24, s_company_name#25, 5), ENSURE_REQUIREMENTS, [plan_id=7] (37) Sort [codegen id : 21] -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#22] -Arguments: [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, s_store_name#30 ASC NULLS FIRST, s_company_name#31 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST], false, 0 +Input [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum_sales#18] +Arguments: [i_category#22 ASC NULLS FIRST, i_brand#23 ASC NULLS FIRST, s_store_name#24 ASC NULLS FIRST, s_company_name#25 ASC NULLS FIRST, d_year#26 ASC NULLS FIRST, d_moy#27 ASC NULLS FIRST], false, 0 (38) Window -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#22] -Arguments: [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#37], [i_category#28, i_brand#29, s_store_name#30, s_company_name#31], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] +Input [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum_sales#18] +Arguments: [rank(d_year#26, d_moy#27) windowspecdefinition(i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26 ASC NULLS FIRST, d_moy#27 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#30], [i_category#22, i_brand#23, s_store_name#24, s_company_name#25], [d_year#26 ASC NULLS FIRST, d_moy#27 ASC NULLS FIRST] (39) Project [codegen id : 22] -Output [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#22 AS sum_sales#38, rn#37] -Input [8]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#22, rn#37] +Output [6]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, sum_sales#18 AS sum_sales#31, rn#30] +Input [8]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum_sales#18, rn#30] (40) Exchange -Input [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#38, rn#37] -Arguments: hashpartitioning(i_category#28, i_brand#29, s_store_name#30, s_company_name#31, (rn#37 + 1), 5), ENSURE_REQUIREMENTS, [id=#39] +Input [6]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, sum_sales#31, rn#30] +Arguments: hashpartitioning(i_category#22, i_brand#23, s_store_name#24, s_company_name#25, (rn#30 + 1), 5), ENSURE_REQUIREMENTS, [plan_id=8] (41) Sort [codegen id : 23] -Input [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#38, rn#37] -Arguments: [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, s_store_name#30 ASC NULLS FIRST, s_company_name#31 ASC NULLS FIRST, (rn#37 + 1) ASC NULLS FIRST], false, 0 +Input [6]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, sum_sales#31, rn#30] +Arguments: [i_category#22 ASC NULLS FIRST, i_brand#23 ASC NULLS FIRST, s_store_name#24 ASC NULLS FIRST, s_company_name#25 ASC NULLS FIRST, (rn#30 + 1) ASC NULLS FIRST], false, 0 (42) SortMergeJoin [codegen id : 24] -Left keys [5]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#25] -Right keys [5]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, (rn#37 + 1)] +Left keys [5]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, rn#20] +Right keys [5]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, (rn#30 + 1)] Join condition: None (43) Project [codegen id : 24] -Output [10]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, sum_sales#38] -Input [15]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#38, rn#37] +Output [10]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, avg_monthly_sales#21, rn#20, sum_sales#31] +Input [15]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, avg_monthly_sales#21, rn#20, i_category#22, i_brand#23, s_store_name#24, s_company_name#25, sum_sales#31, rn#30] (44) ReusedExchange [Reuses operator id: 36] -Output [7]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44, d_moy#45, sum_sales#22] +Output [7]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36, d_moy#37, sum_sales#18] (45) Sort [codegen id : 33] -Input [7]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44, d_moy#45, sum_sales#22] -Arguments: [i_category#40 ASC NULLS FIRST, i_brand#41 ASC NULLS FIRST, s_store_name#42 ASC NULLS FIRST, s_company_name#43 ASC NULLS FIRST, d_year#44 ASC NULLS FIRST, d_moy#45 ASC NULLS FIRST], false, 0 +Input [7]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36, d_moy#37, sum_sales#18] +Arguments: [i_category#32 ASC NULLS FIRST, i_brand#33 ASC NULLS FIRST, s_store_name#34 ASC NULLS FIRST, s_company_name#35 ASC NULLS FIRST, d_year#36 ASC NULLS FIRST, d_moy#37 ASC NULLS FIRST], false, 0 (46) Window -Input [7]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44, d_moy#45, sum_sales#22] -Arguments: [rank(d_year#44, d_moy#45) windowspecdefinition(i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44 ASC NULLS FIRST, d_moy#45 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#46], [i_category#40, i_brand#41, s_store_name#42, s_company_name#43], [d_year#44 ASC NULLS FIRST, d_moy#45 ASC NULLS FIRST] +Input [7]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36, d_moy#37, sum_sales#18] +Arguments: [rank(d_year#36, d_moy#37) windowspecdefinition(i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36 ASC NULLS FIRST, d_moy#37 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#38], [i_category#32, i_brand#33, s_store_name#34, s_company_name#35], [d_year#36 ASC NULLS FIRST, d_moy#37 ASC NULLS FIRST] (47) Project [codegen id : 34] -Output [6]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, sum_sales#22 AS sum_sales#47, rn#46] -Input [8]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, d_year#44, d_moy#45, sum_sales#22, rn#46] +Output [6]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, sum_sales#18 AS sum_sales#39, rn#38] +Input [8]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36, d_moy#37, sum_sales#18, rn#38] (48) Exchange -Input [6]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, sum_sales#47, rn#46] -Arguments: hashpartitioning(i_category#40, i_brand#41, s_store_name#42, s_company_name#43, (rn#46 - 1), 5), ENSURE_REQUIREMENTS, [id=#48] +Input [6]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, sum_sales#39, rn#38] +Arguments: hashpartitioning(i_category#32, i_brand#33, s_store_name#34, s_company_name#35, (rn#38 - 1), 5), ENSURE_REQUIREMENTS, [plan_id=9] (49) Sort [codegen id : 35] -Input [6]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, sum_sales#47, rn#46] -Arguments: [i_category#40 ASC NULLS FIRST, i_brand#41 ASC NULLS FIRST, s_store_name#42 ASC NULLS FIRST, s_company_name#43 ASC NULLS FIRST, (rn#46 - 1) ASC NULLS FIRST], false, 0 +Input [6]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, sum_sales#39, rn#38] +Arguments: [i_category#32 ASC NULLS FIRST, i_brand#33 ASC NULLS FIRST, s_store_name#34 ASC NULLS FIRST, s_company_name#35 ASC NULLS FIRST, (rn#38 - 1) ASC NULLS FIRST], false, 0 (50) SortMergeJoin [codegen id : 36] -Left keys [5]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, rn#25] -Right keys [5]: [i_category#40, i_brand#41, s_store_name#42, s_company_name#43, (rn#46 - 1)] +Left keys [5]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, rn#20] +Right keys [5]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, (rn#38 - 1)] Join condition: None (51) Project [codegen id : 36] -Output [7]: [i_category#16, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, sum_sales#38 AS psum#49, sum_sales#47 AS nsum#50] -Input [16]: [i_category#16, i_brand#15, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#22, avg_monthly_sales#26, rn#25, sum_sales#38, i_category#40, i_brand#41, s_store_name#42, s_company_name#43, sum_sales#47, rn#46] +Output [7]: [i_category#14, d_year#7, d_moy#8, avg_monthly_sales#21, sum_sales#18, sum_sales#31 AS psum#40, sum_sales#39 AS nsum#41] +Input [16]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, avg_monthly_sales#21, rn#20, sum_sales#31, i_category#32, i_brand#33, s_store_name#34, s_company_name#35, sum_sales#39, rn#38] (52) TakeOrderedAndProject -Input [7]: [i_category#16, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#49, nsum#50] -Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#22 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#26 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], [i_category#16, d_year#7, d_moy#8, avg_monthly_sales#26, sum_sales#22, psum#49, nsum#50] +Input [7]: [i_category#14, d_year#7, d_moy#8, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], [i_category#14, d_year#7, d_moy#8, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41] ===== Subqueries ===== @@ -304,6 +304,6 @@ Condition : ((((d_year#7 = 1999) OR ((d_year#7 = 1998) AND (d_moy#8 = 12))) OR ( (56) BroadcastExchange Input [3]: [d_date_sk#6, d_year#7, d_moy#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#51] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt index 8abc8fda35cef..5cd32d2922d3c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt @@ -77,7 +77,7 @@ Condition : (isnotnull(ss_item_sk#4) AND isnotnull(ss_store_sk#5)) (7) BroadcastExchange Input [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] @@ -89,160 +89,160 @@ Output [5]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_d Input [7]: [i_item_sk#1, i_brand#2, i_category#3, ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7] (10) ReusedExchange [Reuses operator id: 49] -Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_sold_date_sk#7] -Right keys [1]: [d_date_sk#10] +Right keys [1]: [d_date_sk#9] Join condition: None (12) Project [codegen id : 4] -Output [6]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#11, d_moy#12] -Input [8]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#10, d_year#11, d_moy#12] +Output [6]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#10, d_moy#11] +Input [8]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#9, d_year#10, d_moy#11] (13) Scan parquet default.store -Output [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_company_name)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] (15) Filter [codegen id : 3] -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] -Condition : ((isnotnull(s_store_sk#13) AND isnotnull(s_store_name#14)) AND isnotnull(s_company_name#15)) +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Condition : ((isnotnull(s_store_sk#12) AND isnotnull(s_store_name#13)) AND isnotnull(s_company_name#14)) (16) BroadcastExchange -Input [3]: [s_store_sk#13, s_store_name#14, s_company_name#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16] +Input [3]: [s_store_sk#12, s_store_name#13, s_company_name#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#5] -Right keys [1]: [s_store_sk#13] +Right keys [1]: [s_store_sk#12] Join condition: None (18) Project [codegen id : 4] -Output [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#11, d_moy#12, s_store_name#14, s_company_name#15] -Input [9]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#11, d_moy#12, s_store_sk#13, s_store_name#14, s_company_name#15] +Output [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#10, d_moy#11, s_store_name#13, s_company_name#14] +Input [9]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#10, d_moy#11, s_store_sk#12, s_store_name#13, s_company_name#14] (19) HashAggregate [codegen id : 4] -Input [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#11, d_moy#12, s_store_name#14, s_company_name#15] -Keys [6]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12] +Input [7]: [i_brand#2, i_category#3, ss_sales_price#6, d_year#10, d_moy#11, s_store_name#13, s_company_name#14] +Keys [6]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11] Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [1]: [sum#17] -Results [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum#18] +Aggregate Attributes [1]: [sum#15] +Results [7]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum#16] (20) Exchange -Input [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum#18] -Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [7]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum#16] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) HashAggregate [codegen id : 5] -Input [7]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum#18] -Keys [6]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12] +Input [7]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum#16] +Keys [6]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11] Functions [1]: [sum(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#6))#20] -Results [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#20,17,2) AS _w0#22] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#6))#17] +Results [8]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#17,17,2) AS sum_sales#18, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#17,17,2) AS _w0#19] (22) Exchange -Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, _w0#22] -Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [8]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19] +Arguments: hashpartitioning(i_category#3, i_brand#2, s_store_name#13, s_company_name#14, 5), ENSURE_REQUIREMENTS, [plan_id=4] (23) Sort [codegen id : 6] -Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, _w0#22] -Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#14 ASC NULLS FIRST, s_company_name#15 ASC NULLS FIRST, d_year#11 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST], false, 0 +Input [8]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST, s_company_name#14 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], false, 0 (24) Window -Input [8]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, _w0#22] -Arguments: [rank(d_year#11, d_moy#12) windowspecdefinition(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#24], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15], [d_year#11 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST] +Input [8]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19] +Arguments: [rank(d_year#10, d_moy#11) windowspecdefinition(i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#20], [i_category#3, i_brand#2, s_store_name#13, s_company_name#14], [d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST] (25) Filter [codegen id : 7] -Input [9]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, _w0#22, rn#24] -Condition : (isnotnull(d_year#11) AND (d_year#11 = 1999)) +Input [9]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19, rn#20] +Condition : (isnotnull(d_year#10) AND (d_year#10 = 1999)) (26) Window -Input [9]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, _w0#22, rn#24] -Arguments: [avg(_w0#22) windowspecdefinition(i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#25], [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11] +Input [9]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19, rn#20] +Arguments: [avg(_w0#19) windowspecdefinition(i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#21], [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10] (27) Filter [codegen id : 22] -Input [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, _w0#22, rn#24, avg_monthly_sales#25] -Condition : ((isnotnull(avg_monthly_sales#25) AND (avg_monthly_sales#25 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) +Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19, rn#20, avg_monthly_sales#21] +Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) (28) Project [codegen id : 22] -Output [9]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, avg_monthly_sales#25, rn#24] -Input [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, _w0#22, rn#24, avg_monthly_sales#25] +Output [9]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, avg_monthly_sales#21, rn#20] +Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19, rn#20, avg_monthly_sales#21] (29) ReusedExchange [Reuses operator id: 20] -Output [7]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30, d_moy#31, sum#32] +Output [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum#28] (30) HashAggregate [codegen id : 12] -Input [7]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30, d_moy#31, sum#32] -Keys [6]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30, d_moy#31] -Functions [1]: [sum(UnscaledValue(ss_sales_price#33))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#33))#20] -Results [7]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30, d_moy#31, MakeDecimal(sum(UnscaledValue(ss_sales_price#33))#20,17,2) AS sum_sales#21] +Input [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum#28] +Keys [6]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27] +Functions [1]: [sum(UnscaledValue(ss_sales_price#29))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#29))#17] +Results [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, MakeDecimal(sum(UnscaledValue(ss_sales_price#29))#17,17,2) AS sum_sales#18] (31) Exchange -Input [7]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30, d_moy#31, sum_sales#21] -Arguments: hashpartitioning(i_category#26, i_brand#27, s_store_name#28, s_company_name#29, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum_sales#18] +Arguments: hashpartitioning(i_category#22, i_brand#23, s_store_name#24, s_company_name#25, 5), ENSURE_REQUIREMENTS, [plan_id=5] (32) Sort [codegen id : 13] -Input [7]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30, d_moy#31, sum_sales#21] -Arguments: [i_category#26 ASC NULLS FIRST, i_brand#27 ASC NULLS FIRST, s_store_name#28 ASC NULLS FIRST, s_company_name#29 ASC NULLS FIRST, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST], false, 0 +Input [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum_sales#18] +Arguments: [i_category#22 ASC NULLS FIRST, i_brand#23 ASC NULLS FIRST, s_store_name#24 ASC NULLS FIRST, s_company_name#25 ASC NULLS FIRST, d_year#26 ASC NULLS FIRST, d_moy#27 ASC NULLS FIRST], false, 0 (33) Window -Input [7]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30, d_moy#31, sum_sales#21] -Arguments: [rank(d_year#30, d_moy#31) windowspecdefinition(i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#26, i_brand#27, s_store_name#28, s_company_name#29], [d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST] +Input [7]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum_sales#18] +Arguments: [rank(d_year#26, d_moy#27) windowspecdefinition(i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26 ASC NULLS FIRST, d_moy#27 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#30], [i_category#22, i_brand#23, s_store_name#24, s_company_name#25], [d_year#26 ASC NULLS FIRST, d_moy#27 ASC NULLS FIRST] (34) Project [codegen id : 14] -Output [6]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, sum_sales#21 AS sum_sales#36, rn#35] -Input [8]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, d_year#30, d_moy#31, sum_sales#21, rn#35] +Output [6]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, sum_sales#18 AS sum_sales#31, rn#30] +Input [8]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, d_year#26, d_moy#27, sum_sales#18, rn#30] (35) BroadcastExchange -Input [6]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, sum_sales#36, rn#35] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [id=#37] +Input [6]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, sum_sales#31, rn#30] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=6] (36) BroadcastHashJoin [codegen id : 22] -Left keys [5]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, rn#24] -Right keys [5]: [i_category#26, i_brand#27, s_store_name#28, s_company_name#29, (rn#35 + 1)] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, rn#20] +Right keys [5]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, (rn#30 + 1)] Join condition: None (37) Project [codegen id : 22] -Output [10]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, avg_monthly_sales#25, rn#24, sum_sales#36] -Input [15]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, avg_monthly_sales#25, rn#24, i_category#26, i_brand#27, s_store_name#28, s_company_name#29, sum_sales#36, rn#35] +Output [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, avg_monthly_sales#21, rn#20, sum_sales#31] +Input [15]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, avg_monthly_sales#21, rn#20, i_category#22, i_brand#23, s_store_name#24, s_company_name#25, sum_sales#31, rn#30] (38) ReusedExchange [Reuses operator id: 31] -Output [7]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42, d_moy#43, sum_sales#21] +Output [7]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36, d_moy#37, sum_sales#18] (39) Sort [codegen id : 20] -Input [7]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42, d_moy#43, sum_sales#21] -Arguments: [i_category#38 ASC NULLS FIRST, i_brand#39 ASC NULLS FIRST, s_store_name#40 ASC NULLS FIRST, s_company_name#41 ASC NULLS FIRST, d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST], false, 0 +Input [7]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36, d_moy#37, sum_sales#18] +Arguments: [i_category#32 ASC NULLS FIRST, i_brand#33 ASC NULLS FIRST, s_store_name#34 ASC NULLS FIRST, s_company_name#35 ASC NULLS FIRST, d_year#36 ASC NULLS FIRST, d_moy#37 ASC NULLS FIRST], false, 0 (40) Window -Input [7]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42, d_moy#43, sum_sales#21] -Arguments: [rank(d_year#42, d_moy#43) windowspecdefinition(i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#44], [i_category#38, i_brand#39, s_store_name#40, s_company_name#41], [d_year#42 ASC NULLS FIRST, d_moy#43 ASC NULLS FIRST] +Input [7]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36, d_moy#37, sum_sales#18] +Arguments: [rank(d_year#36, d_moy#37) windowspecdefinition(i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36 ASC NULLS FIRST, d_moy#37 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#38], [i_category#32, i_brand#33, s_store_name#34, s_company_name#35], [d_year#36 ASC NULLS FIRST, d_moy#37 ASC NULLS FIRST] (41) Project [codegen id : 21] -Output [6]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, sum_sales#21 AS sum_sales#45, rn#44] -Input [8]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, d_year#42, d_moy#43, sum_sales#21, rn#44] +Output [6]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, sum_sales#18 AS sum_sales#39, rn#38] +Input [8]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, d_year#36, d_moy#37, sum_sales#18, rn#38] (42) BroadcastExchange -Input [6]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, sum_sales#45, rn#44] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [id=#46] +Input [6]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, sum_sales#39, rn#38] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=7] (43) BroadcastHashJoin [codegen id : 22] -Left keys [5]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, rn#24] -Right keys [5]: [i_category#38, i_brand#39, s_store_name#40, s_company_name#41, (rn#44 - 1)] +Left keys [5]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, rn#20] +Right keys [5]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, (rn#38 - 1)] Join condition: None (44) Project [codegen id : 22] -Output [7]: [i_category#3, d_year#11, d_moy#12, avg_monthly_sales#25, sum_sales#21, sum_sales#36 AS psum#47, sum_sales#45 AS nsum#48] -Input [16]: [i_category#3, i_brand#2, s_store_name#14, s_company_name#15, d_year#11, d_moy#12, sum_sales#21, avg_monthly_sales#25, rn#24, sum_sales#36, i_category#38, i_brand#39, s_store_name#40, s_company_name#41, sum_sales#45, rn#44] +Output [7]: [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#21, sum_sales#18, sum_sales#31 AS psum#40, sum_sales#39 AS nsum#41] +Input [16]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, avg_monthly_sales#21, rn#20, sum_sales#31, i_category#32, i_brand#33, s_store_name#34, s_company_name#35, sum_sales#39, rn#38] (45) TakeOrderedAndProject -Input [7]: [i_category#3, d_year#11, d_moy#12, avg_monthly_sales#25, sum_sales#21, psum#47, nsum#48] -Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST], [i_category#3, d_year#11, d_moy#12, avg_monthly_sales#25, sum_sales#21, psum#47, nsum#48] +Input [7]: [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41] ===== Subqueries ===== @@ -254,21 +254,21 @@ BroadcastExchange (49) (46) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] ReadSchema: struct (47) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] (48) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] -Condition : ((((d_year#11 = 1999) OR ((d_year#11 = 1998) AND (d_moy#12 = 12))) OR ((d_year#11 = 2000) AND (d_moy#12 = 1))) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) (49) BroadcastExchange -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#49] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/explain.txt index 5efc0bfaed99e..6a546a42ff309 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/explain.txt @@ -120,356 +120,356 @@ Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_so (8) Exchange Input [4]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4] -Arguments: hashpartitioning(ws_order_number#2, ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ws_order_number#2, ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (9) Sort [codegen id : 3] Input [4]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4] Arguments: [ws_order_number#2 ASC NULLS FIRST, ws_item_sk#1 ASC NULLS FIRST], false, 0 (10) Scan parquet default.web_returns -Output [5]: [wr_item_sk#10, wr_order_number#11, wr_return_quantity#12, wr_return_amt#13, wr_returned_date_sk#14] +Output [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 4] -Input [5]: [wr_item_sk#10, wr_order_number#11, wr_return_quantity#12, wr_return_amt#13, wr_returned_date_sk#14] +Input [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13] (12) Filter [codegen id : 4] -Input [5]: [wr_item_sk#10, wr_order_number#11, wr_return_quantity#12, wr_return_amt#13, wr_returned_date_sk#14] -Condition : (((isnotnull(wr_return_amt#13) AND (wr_return_amt#13 > 10000.00)) AND isnotnull(wr_order_number#11)) AND isnotnull(wr_item_sk#10)) +Input [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13] +Condition : (((isnotnull(wr_return_amt#12) AND (wr_return_amt#12 > 10000.00)) AND isnotnull(wr_order_number#10)) AND isnotnull(wr_item_sk#9)) (13) Project [codegen id : 4] -Output [4]: [wr_item_sk#10, wr_order_number#11, wr_return_quantity#12, wr_return_amt#13] -Input [5]: [wr_item_sk#10, wr_order_number#11, wr_return_quantity#12, wr_return_amt#13, wr_returned_date_sk#14] +Output [4]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12] +Input [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13] (14) Exchange -Input [4]: [wr_item_sk#10, wr_order_number#11, wr_return_quantity#12, wr_return_amt#13] -Arguments: hashpartitioning(wr_order_number#11, wr_item_sk#10, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [4]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12] +Arguments: hashpartitioning(wr_order_number#10, wr_item_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) Sort [codegen id : 5] -Input [4]: [wr_item_sk#10, wr_order_number#11, wr_return_quantity#12, wr_return_amt#13] -Arguments: [wr_order_number#11 ASC NULLS FIRST, wr_item_sk#10 ASC NULLS FIRST], false, 0 +Input [4]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12] +Arguments: [wr_order_number#10 ASC NULLS FIRST, wr_item_sk#9 ASC NULLS FIRST], false, 0 (16) SortMergeJoin [codegen id : 6] Left keys [2]: [ws_order_number#2, ws_item_sk#1] -Right keys [2]: [wr_order_number#11, wr_item_sk#10] +Right keys [2]: [wr_order_number#10, wr_item_sk#9] Join condition: None (17) Project [codegen id : 6] -Output [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#12, wr_return_amt#13] -Input [8]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, wr_item_sk#10, wr_order_number#11, wr_return_quantity#12, wr_return_amt#13] +Output [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#11, wr_return_amt#12] +Input [8]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12] (18) HashAggregate [codegen id : 6] -Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#12, wr_return_amt#13] +Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#11, wr_return_amt#12] Keys [1]: [ws_item_sk#1] -Functions [4]: [partial_sum(coalesce(wr_return_quantity#12, 0)), partial_sum(coalesce(ws_quantity#3, 0)), partial_sum(coalesce(cast(wr_return_amt#13 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] -Aggregate Attributes [6]: [sum#16, sum#17, sum#18, isEmpty#19, sum#20, isEmpty#21] -Results [7]: [ws_item_sk#1, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] +Functions [4]: [partial_sum(coalesce(wr_return_quantity#11, 0)), partial_sum(coalesce(ws_quantity#3, 0)), partial_sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#14, sum#15, sum#16, isEmpty#17, sum#18, isEmpty#19] +Results [7]: [ws_item_sk#1, sum#20, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] (19) Exchange -Input [7]: [ws_item_sk#1, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] -Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [7]: [ws_item_sk#1, sum#20, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 7] -Input [7]: [ws_item_sk#1, sum#22, sum#23, sum#24, isEmpty#25, sum#26, isEmpty#27] +Input [7]: [ws_item_sk#1, sum#20, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] Keys [1]: [ws_item_sk#1] -Functions [4]: [sum(coalesce(wr_return_quantity#12, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#13 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] -Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#12, 0))#29, sum(coalesce(ws_quantity#3, 0))#30, sum(coalesce(cast(wr_return_amt#13 as decimal(12,2)), 0.00))#31, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#32] -Results [3]: [ws_item_sk#1 AS item#33, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#12, 0))#29 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#30 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#34, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#13 as decimal(12,2)), 0.00))#31 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#32 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#35] +Functions [4]: [sum(coalesce(wr_return_quantity#11, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#11, 0))#26, sum(coalesce(ws_quantity#3, 0))#27, sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#28, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29] +Results [3]: [ws_item_sk#1 AS item#30, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#11, 0))#26 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#27 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#31, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#28 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#32] (21) Exchange -Input [3]: [item#33, return_ratio#34, currency_ratio#35] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#36] +Input [3]: [item#30, return_ratio#31, currency_ratio#32] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (22) Sort [codegen id : 8] -Input [3]: [item#33, return_ratio#34, currency_ratio#35] -Arguments: [return_ratio#34 ASC NULLS FIRST], false, 0 +Input [3]: [item#30, return_ratio#31, currency_ratio#32] +Arguments: [return_ratio#31 ASC NULLS FIRST], false, 0 (23) Window -Input [3]: [item#33, return_ratio#34, currency_ratio#35] -Arguments: [rank(return_ratio#34) windowspecdefinition(return_ratio#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#37], [return_ratio#34 ASC NULLS FIRST] +Input [3]: [item#30, return_ratio#31, currency_ratio#32] +Arguments: [rank(return_ratio#31) windowspecdefinition(return_ratio#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#33], [return_ratio#31 ASC NULLS FIRST] (24) Sort [codegen id : 9] -Input [4]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37] -Arguments: [currency_ratio#35 ASC NULLS FIRST], false, 0 +Input [4]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33] +Arguments: [currency_ratio#32 ASC NULLS FIRST], false, 0 (25) Window -Input [4]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37] -Arguments: [rank(currency_ratio#35) windowspecdefinition(currency_ratio#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#38], [currency_ratio#35 ASC NULLS FIRST] +Input [4]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33] +Arguments: [rank(currency_ratio#32) windowspecdefinition(currency_ratio#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#34], [currency_ratio#32 ASC NULLS FIRST] (26) Filter [codegen id : 10] -Input [5]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37, currency_rank#38] -Condition : ((return_rank#37 <= 10) OR (currency_rank#38 <= 10)) +Input [5]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33, currency_rank#34] +Condition : ((return_rank#33 <= 10) OR (currency_rank#34 <= 10)) (27) Project [codegen id : 10] -Output [5]: [web AS channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] -Input [5]: [item#33, return_ratio#34, currency_ratio#35, return_rank#37, currency_rank#38] +Output [5]: [web AS channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Input [5]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33, currency_rank#34] (28) Scan parquet default.catalog_sales -Output [6]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43, cs_net_profit#44, cs_sold_date_sk#45] +Output [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#45), dynamicpruningexpression(cs_sold_date_sk#45 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(cs_sold_date_sk#41), dynamicpruningexpression(cs_sold_date_sk#41 IN dynamicpruning#7)] PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_net_paid), IsNotNull(cs_quantity), GreaterThan(cs_net_profit,1.00), GreaterThan(cs_net_paid,0.00), GreaterThan(cs_quantity,0), IsNotNull(cs_order_number), IsNotNull(cs_item_sk)] ReadSchema: struct (29) ColumnarToRow [codegen id : 12] -Input [6]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43, cs_net_profit#44, cs_sold_date_sk#45] +Input [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41] (30) Filter [codegen id : 12] -Input [6]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43, cs_net_profit#44, cs_sold_date_sk#45] -Condition : (((((((isnotnull(cs_net_profit#44) AND isnotnull(cs_net_paid#43)) AND isnotnull(cs_quantity#42)) AND (cs_net_profit#44 > 1.00)) AND (cs_net_paid#43 > 0.00)) AND (cs_quantity#42 > 0)) AND isnotnull(cs_order_number#41)) AND isnotnull(cs_item_sk#40)) +Input [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41] +Condition : (((((((isnotnull(cs_net_profit#40) AND isnotnull(cs_net_paid#39)) AND isnotnull(cs_quantity#38)) AND (cs_net_profit#40 > 1.00)) AND (cs_net_paid#39 > 0.00)) AND (cs_quantity#38 > 0)) AND isnotnull(cs_order_number#37)) AND isnotnull(cs_item_sk#36)) (31) Project [codegen id : 12] -Output [5]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43, cs_sold_date_sk#45] -Input [6]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43, cs_net_profit#44, cs_sold_date_sk#45] +Output [5]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_sold_date_sk#41] +Input [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41] (32) ReusedExchange [Reuses operator id: 91] -Output [1]: [d_date_sk#46] +Output [1]: [d_date_sk#42] (33) BroadcastHashJoin [codegen id : 12] -Left keys [1]: [cs_sold_date_sk#45] -Right keys [1]: [d_date_sk#46] +Left keys [1]: [cs_sold_date_sk#41] +Right keys [1]: [d_date_sk#42] Join condition: None (34) Project [codegen id : 12] -Output [4]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43] -Input [6]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43, cs_sold_date_sk#45, d_date_sk#46] +Output [4]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39] +Input [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_sold_date_sk#41, d_date_sk#42] (35) Exchange -Input [4]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43] -Arguments: hashpartitioning(cs_order_number#41, cs_item_sk#40, 5), ENSURE_REQUIREMENTS, [id=#47] +Input [4]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39] +Arguments: hashpartitioning(cs_order_number#37, cs_item_sk#36, 5), ENSURE_REQUIREMENTS, [plan_id=5] (36) Sort [codegen id : 13] -Input [4]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43] -Arguments: [cs_order_number#41 ASC NULLS FIRST, cs_item_sk#40 ASC NULLS FIRST], false, 0 +Input [4]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39] +Arguments: [cs_order_number#37 ASC NULLS FIRST, cs_item_sk#36 ASC NULLS FIRST], false, 0 (37) Scan parquet default.catalog_returns -Output [5]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51, cr_returned_date_sk#52] +Output [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] ReadSchema: struct (38) ColumnarToRow [codegen id : 14] -Input [5]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51, cr_returned_date_sk#52] +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] (39) Filter [codegen id : 14] -Input [5]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51, cr_returned_date_sk#52] -Condition : (((isnotnull(cr_return_amount#51) AND (cr_return_amount#51 > 10000.00)) AND isnotnull(cr_order_number#49)) AND isnotnull(cr_item_sk#48)) +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] +Condition : (((isnotnull(cr_return_amount#46) AND (cr_return_amount#46 > 10000.00)) AND isnotnull(cr_order_number#44)) AND isnotnull(cr_item_sk#43)) (40) Project [codegen id : 14] -Output [4]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] -Input [5]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51, cr_returned_date_sk#52] +Output [4]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46] +Input [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47] (41) Exchange -Input [4]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] -Arguments: hashpartitioning(cr_order_number#49, cr_item_sk#48, 5), ENSURE_REQUIREMENTS, [id=#53] +Input [4]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46] +Arguments: hashpartitioning(cr_order_number#44, cr_item_sk#43, 5), ENSURE_REQUIREMENTS, [plan_id=6] (42) Sort [codegen id : 15] -Input [4]: [cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] -Arguments: [cr_order_number#49 ASC NULLS FIRST, cr_item_sk#48 ASC NULLS FIRST], false, 0 +Input [4]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46] +Arguments: [cr_order_number#44 ASC NULLS FIRST, cr_item_sk#43 ASC NULLS FIRST], false, 0 (43) SortMergeJoin [codegen id : 16] -Left keys [2]: [cs_order_number#41, cs_item_sk#40] -Right keys [2]: [cr_order_number#49, cr_item_sk#48] +Left keys [2]: [cs_order_number#37, cs_item_sk#36] +Right keys [2]: [cr_order_number#44, cr_item_sk#43] Join condition: None (44) Project [codegen id : 16] -Output [5]: [cs_item_sk#40, cs_quantity#42, cs_net_paid#43, cr_return_quantity#50, cr_return_amount#51] -Input [8]: [cs_item_sk#40, cs_order_number#41, cs_quantity#42, cs_net_paid#43, cr_item_sk#48, cr_order_number#49, cr_return_quantity#50, cr_return_amount#51] +Output [5]: [cs_item_sk#36, cs_quantity#38, cs_net_paid#39, cr_return_quantity#45, cr_return_amount#46] +Input [8]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46] (45) HashAggregate [codegen id : 16] -Input [5]: [cs_item_sk#40, cs_quantity#42, cs_net_paid#43, cr_return_quantity#50, cr_return_amount#51] -Keys [1]: [cs_item_sk#40] -Functions [4]: [partial_sum(coalesce(cr_return_quantity#50, 0)), partial_sum(coalesce(cs_quantity#42, 0)), partial_sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#43 as decimal(12,2)), 0.00))] -Aggregate Attributes [6]: [sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#59] -Results [7]: [cs_item_sk#40, sum#60, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Input [5]: [cs_item_sk#36, cs_quantity#38, cs_net_paid#39, cr_return_quantity#45, cr_return_amount#46] +Keys [1]: [cs_item_sk#36] +Functions [4]: [partial_sum(coalesce(cr_return_quantity#45, 0)), partial_sum(coalesce(cs_quantity#38, 0)), partial_sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#48, sum#49, sum#50, isEmpty#51, sum#52, isEmpty#53] +Results [7]: [cs_item_sk#36, sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#59] (46) Exchange -Input [7]: [cs_item_sk#40, sum#60, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] -Arguments: hashpartitioning(cs_item_sk#40, 5), ENSURE_REQUIREMENTS, [id=#66] +Input [7]: [cs_item_sk#36, sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#59] +Arguments: hashpartitioning(cs_item_sk#36, 5), ENSURE_REQUIREMENTS, [plan_id=7] (47) HashAggregate [codegen id : 17] -Input [7]: [cs_item_sk#40, sum#60, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] -Keys [1]: [cs_item_sk#40] -Functions [4]: [sum(coalesce(cr_return_quantity#50, 0)), sum(coalesce(cs_quantity#42, 0)), sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#43 as decimal(12,2)), 0.00))] -Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#50, 0))#67, sum(coalesce(cs_quantity#42, 0))#68, sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00))#69, sum(coalesce(cast(cs_net_paid#43 as decimal(12,2)), 0.00))#70] -Results [3]: [cs_item_sk#40 AS item#71, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#50, 0))#67 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#42, 0))#68 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#72, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#51 as decimal(12,2)), 0.00))#69 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#43 as decimal(12,2)), 0.00))#70 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#73] +Input [7]: [cs_item_sk#36, sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#59] +Keys [1]: [cs_item_sk#36] +Functions [4]: [sum(coalesce(cr_return_quantity#45, 0)), sum(coalesce(cs_quantity#38, 0)), sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#45, 0))#60, sum(coalesce(cs_quantity#38, 0))#61, sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#62, sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63] +Results [3]: [cs_item_sk#36 AS item#64, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#45, 0))#60 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#38, 0))#61 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#65, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#62 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#66] (48) Exchange -Input [3]: [item#71, return_ratio#72, currency_ratio#73] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#74] +Input [3]: [item#64, return_ratio#65, currency_ratio#66] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8] (49) Sort [codegen id : 18] -Input [3]: [item#71, return_ratio#72, currency_ratio#73] -Arguments: [return_ratio#72 ASC NULLS FIRST], false, 0 +Input [3]: [item#64, return_ratio#65, currency_ratio#66] +Arguments: [return_ratio#65 ASC NULLS FIRST], false, 0 (50) Window -Input [3]: [item#71, return_ratio#72, currency_ratio#73] -Arguments: [rank(return_ratio#72) windowspecdefinition(return_ratio#72 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#75], [return_ratio#72 ASC NULLS FIRST] +Input [3]: [item#64, return_ratio#65, currency_ratio#66] +Arguments: [rank(return_ratio#65) windowspecdefinition(return_ratio#65 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#67], [return_ratio#65 ASC NULLS FIRST] (51) Sort [codegen id : 19] -Input [4]: [item#71, return_ratio#72, currency_ratio#73, return_rank#75] -Arguments: [currency_ratio#73 ASC NULLS FIRST], false, 0 +Input [4]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67] +Arguments: [currency_ratio#66 ASC NULLS FIRST], false, 0 (52) Window -Input [4]: [item#71, return_ratio#72, currency_ratio#73, return_rank#75] -Arguments: [rank(currency_ratio#73) windowspecdefinition(currency_ratio#73 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#76], [currency_ratio#73 ASC NULLS FIRST] +Input [4]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67] +Arguments: [rank(currency_ratio#66) windowspecdefinition(currency_ratio#66 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#68], [currency_ratio#66 ASC NULLS FIRST] (53) Filter [codegen id : 20] -Input [5]: [item#71, return_ratio#72, currency_ratio#73, return_rank#75, currency_rank#76] -Condition : ((return_rank#75 <= 10) OR (currency_rank#76 <= 10)) +Input [5]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67, currency_rank#68] +Condition : ((return_rank#67 <= 10) OR (currency_rank#68 <= 10)) (54) Project [codegen id : 20] -Output [5]: [catalog AS channel#77, item#71, return_ratio#72, return_rank#75, currency_rank#76] -Input [5]: [item#71, return_ratio#72, currency_ratio#73, return_rank#75, currency_rank#76] +Output [5]: [catalog AS channel#69, item#64, return_ratio#65, return_rank#67, currency_rank#68] +Input [5]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67, currency_rank#68] (55) Scan parquet default.store_sales -Output [6]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_net_profit#82, ss_sold_date_sk#83] +Output [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#83), dynamicpruningexpression(ss_sold_date_sk#83 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ss_sold_date_sk#75), dynamicpruningexpression(ss_sold_date_sk#75 IN dynamicpruning#7)] PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_net_paid), IsNotNull(ss_quantity), GreaterThan(ss_net_profit,1.00), GreaterThan(ss_net_paid,0.00), GreaterThan(ss_quantity,0), IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk)] ReadSchema: struct (56) ColumnarToRow [codegen id : 22] -Input [6]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_net_profit#82, ss_sold_date_sk#83] +Input [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75] (57) Filter [codegen id : 22] -Input [6]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_net_profit#82, ss_sold_date_sk#83] -Condition : (((((((isnotnull(ss_net_profit#82) AND isnotnull(ss_net_paid#81)) AND isnotnull(ss_quantity#80)) AND (ss_net_profit#82 > 1.00)) AND (ss_net_paid#81 > 0.00)) AND (ss_quantity#80 > 0)) AND isnotnull(ss_ticket_number#79)) AND isnotnull(ss_item_sk#78)) +Input [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75] +Condition : (((((((isnotnull(ss_net_profit#74) AND isnotnull(ss_net_paid#73)) AND isnotnull(ss_quantity#72)) AND (ss_net_profit#74 > 1.00)) AND (ss_net_paid#73 > 0.00)) AND (ss_quantity#72 > 0)) AND isnotnull(ss_ticket_number#71)) AND isnotnull(ss_item_sk#70)) (58) Project [codegen id : 22] -Output [5]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_sold_date_sk#83] -Input [6]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_net_profit#82, ss_sold_date_sk#83] +Output [5]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_sold_date_sk#75] +Input [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75] (59) ReusedExchange [Reuses operator id: 91] -Output [1]: [d_date_sk#84] +Output [1]: [d_date_sk#76] (60) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [ss_sold_date_sk#83] -Right keys [1]: [d_date_sk#84] +Left keys [1]: [ss_sold_date_sk#75] +Right keys [1]: [d_date_sk#76] Join condition: None (61) Project [codegen id : 22] -Output [4]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81] -Input [6]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, ss_sold_date_sk#83, d_date_sk#84] +Output [4]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73] +Input [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_sold_date_sk#75, d_date_sk#76] (62) Exchange -Input [4]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81] -Arguments: hashpartitioning(ss_ticket_number#79, ss_item_sk#78, 5), ENSURE_REQUIREMENTS, [id=#85] +Input [4]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73] +Arguments: hashpartitioning(ss_ticket_number#71, ss_item_sk#70, 5), ENSURE_REQUIREMENTS, [plan_id=9] (63) Sort [codegen id : 23] -Input [4]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81] -Arguments: [ss_ticket_number#79 ASC NULLS FIRST, ss_item_sk#78 ASC NULLS FIRST], false, 0 +Input [4]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73] +Arguments: [ss_ticket_number#71 ASC NULLS FIRST, ss_item_sk#70 ASC NULLS FIRST], false, 0 (64) Scan parquet default.store_returns -Output [5]: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89, sr_returned_date_sk#90] +Output [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] ReadSchema: struct (65) ColumnarToRow [codegen id : 24] -Input [5]: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89, sr_returned_date_sk#90] +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] (66) Filter [codegen id : 24] -Input [5]: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89, sr_returned_date_sk#90] -Condition : (((isnotnull(sr_return_amt#89) AND (sr_return_amt#89 > 10000.00)) AND isnotnull(sr_ticket_number#87)) AND isnotnull(sr_item_sk#86)) +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] +Condition : (((isnotnull(sr_return_amt#80) AND (sr_return_amt#80 > 10000.00)) AND isnotnull(sr_ticket_number#78)) AND isnotnull(sr_item_sk#77)) (67) Project [codegen id : 24] -Output [4]: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89] -Input [5]: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89, sr_returned_date_sk#90] +Output [4]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80] +Input [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81] (68) Exchange -Input [4]: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89] -Arguments: hashpartitioning(sr_ticket_number#87, sr_item_sk#86, 5), ENSURE_REQUIREMENTS, [id=#91] +Input [4]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80] +Arguments: hashpartitioning(sr_ticket_number#78, sr_item_sk#77, 5), ENSURE_REQUIREMENTS, [plan_id=10] (69) Sort [codegen id : 25] -Input [4]: [sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89] -Arguments: [sr_ticket_number#87 ASC NULLS FIRST, sr_item_sk#86 ASC NULLS FIRST], false, 0 +Input [4]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80] +Arguments: [sr_ticket_number#78 ASC NULLS FIRST, sr_item_sk#77 ASC NULLS FIRST], false, 0 (70) SortMergeJoin [codegen id : 26] -Left keys [2]: [ss_ticket_number#79, ss_item_sk#78] -Right keys [2]: [sr_ticket_number#87, sr_item_sk#86] +Left keys [2]: [ss_ticket_number#71, ss_item_sk#70] +Right keys [2]: [sr_ticket_number#78, sr_item_sk#77] Join condition: None (71) Project [codegen id : 26] -Output [5]: [ss_item_sk#78, ss_quantity#80, ss_net_paid#81, sr_return_quantity#88, sr_return_amt#89] -Input [8]: [ss_item_sk#78, ss_ticket_number#79, ss_quantity#80, ss_net_paid#81, sr_item_sk#86, sr_ticket_number#87, sr_return_quantity#88, sr_return_amt#89] +Output [5]: [ss_item_sk#70, ss_quantity#72, ss_net_paid#73, sr_return_quantity#79, sr_return_amt#80] +Input [8]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80] (72) HashAggregate [codegen id : 26] -Input [5]: [ss_item_sk#78, ss_quantity#80, ss_net_paid#81, sr_return_quantity#88, sr_return_amt#89] -Keys [1]: [ss_item_sk#78] -Functions [4]: [partial_sum(coalesce(sr_return_quantity#88, 0)), partial_sum(coalesce(ss_quantity#80, 0)), partial_sum(coalesce(cast(sr_return_amt#89 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))] -Aggregate Attributes [6]: [sum#92, sum#93, sum#94, isEmpty#95, sum#96, isEmpty#97] -Results [7]: [ss_item_sk#78, sum#98, sum#99, sum#100, isEmpty#101, sum#102, isEmpty#103] +Input [5]: [ss_item_sk#70, ss_quantity#72, ss_net_paid#73, sr_return_quantity#79, sr_return_amt#80] +Keys [1]: [ss_item_sk#70] +Functions [4]: [partial_sum(coalesce(sr_return_quantity#79, 0)), partial_sum(coalesce(ss_quantity#72, 0)), partial_sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#82, sum#83, sum#84, isEmpty#85, sum#86, isEmpty#87] +Results [7]: [ss_item_sk#70, sum#88, sum#89, sum#90, isEmpty#91, sum#92, isEmpty#93] (73) Exchange -Input [7]: [ss_item_sk#78, sum#98, sum#99, sum#100, isEmpty#101, sum#102, isEmpty#103] -Arguments: hashpartitioning(ss_item_sk#78, 5), ENSURE_REQUIREMENTS, [id=#104] +Input [7]: [ss_item_sk#70, sum#88, sum#89, sum#90, isEmpty#91, sum#92, isEmpty#93] +Arguments: hashpartitioning(ss_item_sk#70, 5), ENSURE_REQUIREMENTS, [plan_id=11] (74) HashAggregate [codegen id : 27] -Input [7]: [ss_item_sk#78, sum#98, sum#99, sum#100, isEmpty#101, sum#102, isEmpty#103] -Keys [1]: [ss_item_sk#78] -Functions [4]: [sum(coalesce(sr_return_quantity#88, 0)), sum(coalesce(ss_quantity#80, 0)), sum(coalesce(cast(sr_return_amt#89 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))] -Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#88, 0))#105, sum(coalesce(ss_quantity#80, 0))#106, sum(coalesce(cast(sr_return_amt#89 as decimal(12,2)), 0.00))#107, sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))#108] -Results [3]: [ss_item_sk#78 AS item#109, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#88, 0))#105 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#80, 0))#106 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#110, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#89 as decimal(12,2)), 0.00))#107 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#81 as decimal(12,2)), 0.00))#108 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#111] +Input [7]: [ss_item_sk#70, sum#88, sum#89, sum#90, isEmpty#91, sum#92, isEmpty#93] +Keys [1]: [ss_item_sk#70] +Functions [4]: [sum(coalesce(sr_return_quantity#79, 0)), sum(coalesce(ss_quantity#72, 0)), sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#79, 0))#94, sum(coalesce(ss_quantity#72, 0))#95, sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#96, sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97] +Results [3]: [ss_item_sk#70 AS item#98, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#79, 0))#94 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#72, 0))#95 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#99, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#96 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#100] (75) Exchange -Input [3]: [item#109, return_ratio#110, currency_ratio#111] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#112] +Input [3]: [item#98, return_ratio#99, currency_ratio#100] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12] (76) Sort [codegen id : 28] -Input [3]: [item#109, return_ratio#110, currency_ratio#111] -Arguments: [return_ratio#110 ASC NULLS FIRST], false, 0 +Input [3]: [item#98, return_ratio#99, currency_ratio#100] +Arguments: [return_ratio#99 ASC NULLS FIRST], false, 0 (77) Window -Input [3]: [item#109, return_ratio#110, currency_ratio#111] -Arguments: [rank(return_ratio#110) windowspecdefinition(return_ratio#110 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#113], [return_ratio#110 ASC NULLS FIRST] +Input [3]: [item#98, return_ratio#99, currency_ratio#100] +Arguments: [rank(return_ratio#99) windowspecdefinition(return_ratio#99 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#101], [return_ratio#99 ASC NULLS FIRST] (78) Sort [codegen id : 29] -Input [4]: [item#109, return_ratio#110, currency_ratio#111, return_rank#113] -Arguments: [currency_ratio#111 ASC NULLS FIRST], false, 0 +Input [4]: [item#98, return_ratio#99, currency_ratio#100, return_rank#101] +Arguments: [currency_ratio#100 ASC NULLS FIRST], false, 0 (79) Window -Input [4]: [item#109, return_ratio#110, currency_ratio#111, return_rank#113] -Arguments: [rank(currency_ratio#111) windowspecdefinition(currency_ratio#111 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#114], [currency_ratio#111 ASC NULLS FIRST] +Input [4]: [item#98, return_ratio#99, currency_ratio#100, return_rank#101] +Arguments: [rank(currency_ratio#100) windowspecdefinition(currency_ratio#100 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#102], [currency_ratio#100 ASC NULLS FIRST] (80) Filter [codegen id : 30] -Input [5]: [item#109, return_ratio#110, currency_ratio#111, return_rank#113, currency_rank#114] -Condition : ((return_rank#113 <= 10) OR (currency_rank#114 <= 10)) +Input [5]: [item#98, return_ratio#99, currency_ratio#100, return_rank#101, currency_rank#102] +Condition : ((return_rank#101 <= 10) OR (currency_rank#102 <= 10)) (81) Project [codegen id : 30] -Output [5]: [store AS channel#115, item#109, return_ratio#110, return_rank#113, currency_rank#114] -Input [5]: [item#109, return_ratio#110, currency_ratio#111, return_rank#113, currency_rank#114] +Output [5]: [store AS channel#103, item#98, return_ratio#99, return_rank#101, currency_rank#102] +Input [5]: [item#98, return_ratio#99, currency_ratio#100, return_rank#101, currency_rank#102] (82) Union (83) HashAggregate [codegen id : 31] -Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] -Keys [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Input [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Keys [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Results [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] (84) Exchange -Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] -Arguments: hashpartitioning(channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38, 5), ENSURE_REQUIREMENTS, [id=#116] +Input [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Arguments: hashpartitioning(channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34, 5), ENSURE_REQUIREMENTS, [plan_id=13] (85) HashAggregate [codegen id : 32] -Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] -Keys [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Input [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Keys [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Results [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] (86) TakeOrderedAndProject -Input [5]: [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] -Arguments: 100, [channel#39 ASC NULLS FIRST, return_rank#37 ASC NULLS FIRST, currency_rank#38 ASC NULLS FIRST, item#33 ASC NULLS FIRST], [channel#39, item#33, return_ratio#34, return_rank#37, currency_rank#38] +Input [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Arguments: 100, [channel#35 ASC NULLS FIRST, return_rank#33 ASC NULLS FIRST, currency_rank#34 ASC NULLS FIRST, item#30 ASC NULLS FIRST], [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] ===== Subqueries ===== @@ -482,29 +482,29 @@ BroadcastExchange (91) (87) Scan parquet default.date_dim -Output [3]: [d_date_sk#8, d_year#117, d_moy#118] +Output [3]: [d_date_sk#8, d_year#104, d_moy#105] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)] ReadSchema: struct (88) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#117, d_moy#118] +Input [3]: [d_date_sk#8, d_year#104, d_moy#105] (89) Filter [codegen id : 1] -Input [3]: [d_date_sk#8, d_year#117, d_moy#118] -Condition : ((((isnotnull(d_year#117) AND isnotnull(d_moy#118)) AND (d_year#117 = 2001)) AND (d_moy#118 = 12)) AND isnotnull(d_date_sk#8)) +Input [3]: [d_date_sk#8, d_year#104, d_moy#105] +Condition : ((((isnotnull(d_year#104) AND isnotnull(d_moy#105)) AND (d_year#104 = 2001)) AND (d_moy#105 = 12)) AND isnotnull(d_date_sk#8)) (90) Project [codegen id : 1] Output [1]: [d_date_sk#8] -Input [3]: [d_date_sk#8, d_year#117, d_moy#118] +Input [3]: [d_date_sk#8, d_year#104, d_moy#105] (91) BroadcastExchange Input [1]: [d_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#119] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14] -Subquery:2 Hosting operator id = 28 Hosting Expression = cs_sold_date_sk#45 IN dynamicpruning#7 +Subquery:2 Hosting operator id = 28 Hosting Expression = cs_sold_date_sk#41 IN dynamicpruning#7 -Subquery:3 Hosting operator id = 55 Hosting Expression = ss_sold_date_sk#83 IN dynamicpruning#7 +Subquery:3 Hosting operator id = 55 Hosting Expression = ss_sold_date_sk#75 IN dynamicpruning#7 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/explain.txt index 657a1a1f358c6..b6cfd8a096c8c 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/explain.txt @@ -99,332 +99,332 @@ Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_ne (5) BroadcastExchange Input [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=1] (6) Scan parquet default.web_returns -Output [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13] +Output [5]: [wr_item_sk#8, wr_order_number#9, wr_return_quantity#10, wr_return_amt#11, wr_returned_date_sk#12] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_return_amt), GreaterThan(wr_return_amt,10000.00), IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] ReadSchema: struct (7) ColumnarToRow -Input [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13] +Input [5]: [wr_item_sk#8, wr_order_number#9, wr_return_quantity#10, wr_return_amt#11, wr_returned_date_sk#12] (8) Filter -Input [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13] -Condition : (((isnotnull(wr_return_amt#12) AND (wr_return_amt#12 > 10000.00)) AND isnotnull(wr_order_number#10)) AND isnotnull(wr_item_sk#9)) +Input [5]: [wr_item_sk#8, wr_order_number#9, wr_return_quantity#10, wr_return_amt#11, wr_returned_date_sk#12] +Condition : (((isnotnull(wr_return_amt#11) AND (wr_return_amt#11 > 10000.00)) AND isnotnull(wr_order_number#9)) AND isnotnull(wr_item_sk#8)) (9) Project -Output [4]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12] -Input [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13] +Output [4]: [wr_item_sk#8, wr_order_number#9, wr_return_quantity#10, wr_return_amt#11] +Input [5]: [wr_item_sk#8, wr_order_number#9, wr_return_quantity#10, wr_return_amt#11, wr_returned_date_sk#12] (10) BroadcastHashJoin [codegen id : 3] Left keys [2]: [ws_order_number#2, ws_item_sk#1] -Right keys [2]: [wr_order_number#10, wr_item_sk#9] +Right keys [2]: [wr_order_number#9, wr_item_sk#8] Join condition: None (11) Project [codegen id : 3] -Output [6]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#11, wr_return_amt#12] -Input [9]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12] +Output [6]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#10, wr_return_amt#11] +Input [9]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_item_sk#8, wr_order_number#9, wr_return_quantity#10, wr_return_amt#11] (12) ReusedExchange [Reuses operator id: 82] -Output [1]: [d_date_sk#14] +Output [1]: [d_date_sk#13] (13) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ws_sold_date_sk#6] -Right keys [1]: [d_date_sk#14] +Right keys [1]: [d_date_sk#13] Join condition: None (14) Project [codegen id : 3] -Output [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#11, wr_return_amt#12] -Input [7]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#11, wr_return_amt#12, d_date_sk#14] +Output [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#10, wr_return_amt#11] +Input [7]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6, wr_return_quantity#10, wr_return_amt#11, d_date_sk#13] (15) HashAggregate [codegen id : 3] -Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#11, wr_return_amt#12] +Input [5]: [ws_item_sk#1, ws_quantity#3, ws_net_paid#4, wr_return_quantity#10, wr_return_amt#11] Keys [1]: [ws_item_sk#1] -Functions [4]: [partial_sum(coalesce(wr_return_quantity#11, 0)), partial_sum(coalesce(ws_quantity#3, 0)), partial_sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] -Aggregate Attributes [6]: [sum#15, sum#16, sum#17, isEmpty#18, sum#19, isEmpty#20] -Results [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] +Functions [4]: [partial_sum(coalesce(wr_return_quantity#10, 0)), partial_sum(coalesce(ws_quantity#3, 0)), partial_sum(coalesce(cast(wr_return_amt#11 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#14, sum#15, sum#16, isEmpty#17, sum#18, isEmpty#19] +Results [7]: [ws_item_sk#1, sum#20, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] (16) Exchange -Input [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] -Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [7]: [ws_item_sk#1, sum#20, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (17) HashAggregate [codegen id : 4] -Input [7]: [ws_item_sk#1, sum#21, sum#22, sum#23, isEmpty#24, sum#25, isEmpty#26] +Input [7]: [ws_item_sk#1, sum#20, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] Keys [1]: [ws_item_sk#1] -Functions [4]: [sum(coalesce(wr_return_quantity#11, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] -Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#11, 0))#28, sum(coalesce(ws_quantity#3, 0))#29, sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#30, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#31] -Results [3]: [ws_item_sk#1 AS item#32, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#11, 0))#28 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#29 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#33, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#30 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#31 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#34] +Functions [4]: [sum(coalesce(wr_return_quantity#10, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#11 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#10, 0))#26, sum(coalesce(ws_quantity#3, 0))#27, sum(coalesce(cast(wr_return_amt#11 as decimal(12,2)), 0.00))#28, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29] +Results [3]: [ws_item_sk#1 AS item#30, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#10, 0))#26 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#27 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#31, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#11 as decimal(12,2)), 0.00))#28 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#32] (18) Exchange -Input [3]: [item#32, return_ratio#33, currency_ratio#34] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#35] +Input [3]: [item#30, return_ratio#31, currency_ratio#32] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=3] (19) Sort [codegen id : 5] -Input [3]: [item#32, return_ratio#33, currency_ratio#34] -Arguments: [return_ratio#33 ASC NULLS FIRST], false, 0 +Input [3]: [item#30, return_ratio#31, currency_ratio#32] +Arguments: [return_ratio#31 ASC NULLS FIRST], false, 0 (20) Window -Input [3]: [item#32, return_ratio#33, currency_ratio#34] -Arguments: [rank(return_ratio#33) windowspecdefinition(return_ratio#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#36], [return_ratio#33 ASC NULLS FIRST] +Input [3]: [item#30, return_ratio#31, currency_ratio#32] +Arguments: [rank(return_ratio#31) windowspecdefinition(return_ratio#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#33], [return_ratio#31 ASC NULLS FIRST] (21) Sort [codegen id : 6] -Input [4]: [item#32, return_ratio#33, currency_ratio#34, return_rank#36] -Arguments: [currency_ratio#34 ASC NULLS FIRST], false, 0 +Input [4]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33] +Arguments: [currency_ratio#32 ASC NULLS FIRST], false, 0 (22) Window -Input [4]: [item#32, return_ratio#33, currency_ratio#34, return_rank#36] -Arguments: [rank(currency_ratio#34) windowspecdefinition(currency_ratio#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#37], [currency_ratio#34 ASC NULLS FIRST] +Input [4]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33] +Arguments: [rank(currency_ratio#32) windowspecdefinition(currency_ratio#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#34], [currency_ratio#32 ASC NULLS FIRST] (23) Filter [codegen id : 7] -Input [5]: [item#32, return_ratio#33, currency_ratio#34, return_rank#36, currency_rank#37] -Condition : ((return_rank#36 <= 10) OR (currency_rank#37 <= 10)) +Input [5]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33, currency_rank#34] +Condition : ((return_rank#33 <= 10) OR (currency_rank#34 <= 10)) (24) Project [codegen id : 7] -Output [5]: [web AS channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] -Input [5]: [item#32, return_ratio#33, currency_ratio#34, return_rank#36, currency_rank#37] +Output [5]: [web AS channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Input [5]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33, currency_rank#34] (25) Scan parquet default.catalog_sales -Output [6]: [cs_item_sk#39, cs_order_number#40, cs_quantity#41, cs_net_paid#42, cs_net_profit#43, cs_sold_date_sk#44] +Output [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#44), dynamicpruningexpression(cs_sold_date_sk#44 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(cs_sold_date_sk#41), dynamicpruningexpression(cs_sold_date_sk#41 IN dynamicpruning#7)] PushedFilters: [IsNotNull(cs_net_profit), IsNotNull(cs_net_paid), IsNotNull(cs_quantity), GreaterThan(cs_net_profit,1.00), GreaterThan(cs_net_paid,0.00), GreaterThan(cs_quantity,0), IsNotNull(cs_order_number), IsNotNull(cs_item_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 8] -Input [6]: [cs_item_sk#39, cs_order_number#40, cs_quantity#41, cs_net_paid#42, cs_net_profit#43, cs_sold_date_sk#44] +Input [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41] (27) Filter [codegen id : 8] -Input [6]: [cs_item_sk#39, cs_order_number#40, cs_quantity#41, cs_net_paid#42, cs_net_profit#43, cs_sold_date_sk#44] -Condition : (((((((isnotnull(cs_net_profit#43) AND isnotnull(cs_net_paid#42)) AND isnotnull(cs_quantity#41)) AND (cs_net_profit#43 > 1.00)) AND (cs_net_paid#42 > 0.00)) AND (cs_quantity#41 > 0)) AND isnotnull(cs_order_number#40)) AND isnotnull(cs_item_sk#39)) +Input [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41] +Condition : (((((((isnotnull(cs_net_profit#40) AND isnotnull(cs_net_paid#39)) AND isnotnull(cs_quantity#38)) AND (cs_net_profit#40 > 1.00)) AND (cs_net_paid#39 > 0.00)) AND (cs_quantity#38 > 0)) AND isnotnull(cs_order_number#37)) AND isnotnull(cs_item_sk#36)) (28) Project [codegen id : 8] -Output [5]: [cs_item_sk#39, cs_order_number#40, cs_quantity#41, cs_net_paid#42, cs_sold_date_sk#44] -Input [6]: [cs_item_sk#39, cs_order_number#40, cs_quantity#41, cs_net_paid#42, cs_net_profit#43, cs_sold_date_sk#44] +Output [5]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_sold_date_sk#41] +Input [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41] (29) BroadcastExchange -Input [5]: [cs_item_sk#39, cs_order_number#40, cs_quantity#41, cs_net_paid#42, cs_sold_date_sk#44] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [id=#45] +Input [5]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_sold_date_sk#41] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=4] (30) Scan parquet default.catalog_returns -Output [5]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49, cr_returned_date_sk#50] +Output [5]: [cr_item_sk#42, cr_order_number#43, cr_return_quantity#44, cr_return_amount#45, cr_returned_date_sk#46] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_return_amount), GreaterThan(cr_return_amount,10000.00), IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] ReadSchema: struct (31) ColumnarToRow -Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49, cr_returned_date_sk#50] +Input [5]: [cr_item_sk#42, cr_order_number#43, cr_return_quantity#44, cr_return_amount#45, cr_returned_date_sk#46] (32) Filter -Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49, cr_returned_date_sk#50] -Condition : (((isnotnull(cr_return_amount#49) AND (cr_return_amount#49 > 10000.00)) AND isnotnull(cr_order_number#47)) AND isnotnull(cr_item_sk#46)) +Input [5]: [cr_item_sk#42, cr_order_number#43, cr_return_quantity#44, cr_return_amount#45, cr_returned_date_sk#46] +Condition : (((isnotnull(cr_return_amount#45) AND (cr_return_amount#45 > 10000.00)) AND isnotnull(cr_order_number#43)) AND isnotnull(cr_item_sk#42)) (33) Project -Output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] -Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49, cr_returned_date_sk#50] +Output [4]: [cr_item_sk#42, cr_order_number#43, cr_return_quantity#44, cr_return_amount#45] +Input [5]: [cr_item_sk#42, cr_order_number#43, cr_return_quantity#44, cr_return_amount#45, cr_returned_date_sk#46] (34) BroadcastHashJoin [codegen id : 10] -Left keys [2]: [cs_order_number#40, cs_item_sk#39] -Right keys [2]: [cr_order_number#47, cr_item_sk#46] +Left keys [2]: [cs_order_number#37, cs_item_sk#36] +Right keys [2]: [cr_order_number#43, cr_item_sk#42] Join condition: None (35) Project [codegen id : 10] -Output [6]: [cs_item_sk#39, cs_quantity#41, cs_net_paid#42, cs_sold_date_sk#44, cr_return_quantity#48, cr_return_amount#49] -Input [9]: [cs_item_sk#39, cs_order_number#40, cs_quantity#41, cs_net_paid#42, cs_sold_date_sk#44, cr_item_sk#46, cr_order_number#47, cr_return_quantity#48, cr_return_amount#49] +Output [6]: [cs_item_sk#36, cs_quantity#38, cs_net_paid#39, cs_sold_date_sk#41, cr_return_quantity#44, cr_return_amount#45] +Input [9]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_sold_date_sk#41, cr_item_sk#42, cr_order_number#43, cr_return_quantity#44, cr_return_amount#45] (36) ReusedExchange [Reuses operator id: 82] -Output [1]: [d_date_sk#51] +Output [1]: [d_date_sk#47] (37) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#44] -Right keys [1]: [d_date_sk#51] +Left keys [1]: [cs_sold_date_sk#41] +Right keys [1]: [d_date_sk#47] Join condition: None (38) Project [codegen id : 10] -Output [5]: [cs_item_sk#39, cs_quantity#41, cs_net_paid#42, cr_return_quantity#48, cr_return_amount#49] -Input [7]: [cs_item_sk#39, cs_quantity#41, cs_net_paid#42, cs_sold_date_sk#44, cr_return_quantity#48, cr_return_amount#49, d_date_sk#51] +Output [5]: [cs_item_sk#36, cs_quantity#38, cs_net_paid#39, cr_return_quantity#44, cr_return_amount#45] +Input [7]: [cs_item_sk#36, cs_quantity#38, cs_net_paid#39, cs_sold_date_sk#41, cr_return_quantity#44, cr_return_amount#45, d_date_sk#47] (39) HashAggregate [codegen id : 10] -Input [5]: [cs_item_sk#39, cs_quantity#41, cs_net_paid#42, cr_return_quantity#48, cr_return_amount#49] -Keys [1]: [cs_item_sk#39] -Functions [4]: [partial_sum(coalesce(cr_return_quantity#48, 0)), partial_sum(coalesce(cs_quantity#41, 0)), partial_sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#42 as decimal(12,2)), 0.00))] -Aggregate Attributes [6]: [sum#52, sum#53, sum#54, isEmpty#55, sum#56, isEmpty#57] -Results [7]: [cs_item_sk#39, sum#58, sum#59, sum#60, isEmpty#61, sum#62, isEmpty#63] +Input [5]: [cs_item_sk#36, cs_quantity#38, cs_net_paid#39, cr_return_quantity#44, cr_return_amount#45] +Keys [1]: [cs_item_sk#36] +Functions [4]: [partial_sum(coalesce(cr_return_quantity#44, 0)), partial_sum(coalesce(cs_quantity#38, 0)), partial_sum(coalesce(cast(cr_return_amount#45 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#48, sum#49, sum#50, isEmpty#51, sum#52, isEmpty#53] +Results [7]: [cs_item_sk#36, sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#59] (40) Exchange -Input [7]: [cs_item_sk#39, sum#58, sum#59, sum#60, isEmpty#61, sum#62, isEmpty#63] -Arguments: hashpartitioning(cs_item_sk#39, 5), ENSURE_REQUIREMENTS, [id=#64] +Input [7]: [cs_item_sk#36, sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#59] +Arguments: hashpartitioning(cs_item_sk#36, 5), ENSURE_REQUIREMENTS, [plan_id=5] (41) HashAggregate [codegen id : 11] -Input [7]: [cs_item_sk#39, sum#58, sum#59, sum#60, isEmpty#61, sum#62, isEmpty#63] -Keys [1]: [cs_item_sk#39] -Functions [4]: [sum(coalesce(cr_return_quantity#48, 0)), sum(coalesce(cs_quantity#41, 0)), sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#42 as decimal(12,2)), 0.00))] -Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#48, 0))#65, sum(coalesce(cs_quantity#41, 0))#66, sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#67, sum(coalesce(cast(cs_net_paid#42 as decimal(12,2)), 0.00))#68] -Results [3]: [cs_item_sk#39 AS item#69, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#48, 0))#65 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#41, 0))#66 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#70, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#49 as decimal(12,2)), 0.00))#67 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#42 as decimal(12,2)), 0.00))#68 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#71] +Input [7]: [cs_item_sk#36, sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#59] +Keys [1]: [cs_item_sk#36] +Functions [4]: [sum(coalesce(cr_return_quantity#44, 0)), sum(coalesce(cs_quantity#38, 0)), sum(coalesce(cast(cr_return_amount#45 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#44, 0))#60, sum(coalesce(cs_quantity#38, 0))#61, sum(coalesce(cast(cr_return_amount#45 as decimal(12,2)), 0.00))#62, sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63] +Results [3]: [cs_item_sk#36 AS item#64, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#44, 0))#60 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#38, 0))#61 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#65, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#45 as decimal(12,2)), 0.00))#62 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#66] (42) Exchange -Input [3]: [item#69, return_ratio#70, currency_ratio#71] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#72] +Input [3]: [item#64, return_ratio#65, currency_ratio#66] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] (43) Sort [codegen id : 12] -Input [3]: [item#69, return_ratio#70, currency_ratio#71] -Arguments: [return_ratio#70 ASC NULLS FIRST], false, 0 +Input [3]: [item#64, return_ratio#65, currency_ratio#66] +Arguments: [return_ratio#65 ASC NULLS FIRST], false, 0 (44) Window -Input [3]: [item#69, return_ratio#70, currency_ratio#71] -Arguments: [rank(return_ratio#70) windowspecdefinition(return_ratio#70 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#73], [return_ratio#70 ASC NULLS FIRST] +Input [3]: [item#64, return_ratio#65, currency_ratio#66] +Arguments: [rank(return_ratio#65) windowspecdefinition(return_ratio#65 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#67], [return_ratio#65 ASC NULLS FIRST] (45) Sort [codegen id : 13] -Input [4]: [item#69, return_ratio#70, currency_ratio#71, return_rank#73] -Arguments: [currency_ratio#71 ASC NULLS FIRST], false, 0 +Input [4]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67] +Arguments: [currency_ratio#66 ASC NULLS FIRST], false, 0 (46) Window -Input [4]: [item#69, return_ratio#70, currency_ratio#71, return_rank#73] -Arguments: [rank(currency_ratio#71) windowspecdefinition(currency_ratio#71 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#74], [currency_ratio#71 ASC NULLS FIRST] +Input [4]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67] +Arguments: [rank(currency_ratio#66) windowspecdefinition(currency_ratio#66 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#68], [currency_ratio#66 ASC NULLS FIRST] (47) Filter [codegen id : 14] -Input [5]: [item#69, return_ratio#70, currency_ratio#71, return_rank#73, currency_rank#74] -Condition : ((return_rank#73 <= 10) OR (currency_rank#74 <= 10)) +Input [5]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67, currency_rank#68] +Condition : ((return_rank#67 <= 10) OR (currency_rank#68 <= 10)) (48) Project [codegen id : 14] -Output [5]: [catalog AS channel#75, item#69, return_ratio#70, return_rank#73, currency_rank#74] -Input [5]: [item#69, return_ratio#70, currency_ratio#71, return_rank#73, currency_rank#74] +Output [5]: [catalog AS channel#69, item#64, return_ratio#65, return_rank#67, currency_rank#68] +Input [5]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67, currency_rank#68] (49) Scan parquet default.store_sales -Output [6]: [ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80, ss_sold_date_sk#81] +Output [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#81), dynamicpruningexpression(ss_sold_date_sk#81 IN dynamicpruning#7)] +PartitionFilters: [isnotnull(ss_sold_date_sk#75), dynamicpruningexpression(ss_sold_date_sk#75 IN dynamicpruning#7)] PushedFilters: [IsNotNull(ss_net_profit), IsNotNull(ss_net_paid), IsNotNull(ss_quantity), GreaterThan(ss_net_profit,1.00), GreaterThan(ss_net_paid,0.00), GreaterThan(ss_quantity,0), IsNotNull(ss_ticket_number), IsNotNull(ss_item_sk)] ReadSchema: struct (50) ColumnarToRow [codegen id : 15] -Input [6]: [ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80, ss_sold_date_sk#81] +Input [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75] (51) Filter [codegen id : 15] -Input [6]: [ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80, ss_sold_date_sk#81] -Condition : (((((((isnotnull(ss_net_profit#80) AND isnotnull(ss_net_paid#79)) AND isnotnull(ss_quantity#78)) AND (ss_net_profit#80 > 1.00)) AND (ss_net_paid#79 > 0.00)) AND (ss_quantity#78 > 0)) AND isnotnull(ss_ticket_number#77)) AND isnotnull(ss_item_sk#76)) +Input [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75] +Condition : (((((((isnotnull(ss_net_profit#74) AND isnotnull(ss_net_paid#73)) AND isnotnull(ss_quantity#72)) AND (ss_net_profit#74 > 1.00)) AND (ss_net_paid#73 > 0.00)) AND (ss_quantity#72 > 0)) AND isnotnull(ss_ticket_number#71)) AND isnotnull(ss_item_sk#70)) (52) Project [codegen id : 15] -Output [5]: [ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_sold_date_sk#81] -Input [6]: [ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_net_profit#80, ss_sold_date_sk#81] +Output [5]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_sold_date_sk#75] +Input [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75] (53) BroadcastExchange -Input [5]: [ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_sold_date_sk#81] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [id=#82] +Input [5]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_sold_date_sk#75] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=7] (54) Scan parquet default.store_returns -Output [5]: [sr_item_sk#83, sr_ticket_number#84, sr_return_quantity#85, sr_return_amt#86, sr_returned_date_sk#87] +Output [5]: [sr_item_sk#76, sr_ticket_number#77, sr_return_quantity#78, sr_return_amt#79, sr_returned_date_sk#80] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_return_amt), GreaterThan(sr_return_amt,10000.00), IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] ReadSchema: struct (55) ColumnarToRow -Input [5]: [sr_item_sk#83, sr_ticket_number#84, sr_return_quantity#85, sr_return_amt#86, sr_returned_date_sk#87] +Input [5]: [sr_item_sk#76, sr_ticket_number#77, sr_return_quantity#78, sr_return_amt#79, sr_returned_date_sk#80] (56) Filter -Input [5]: [sr_item_sk#83, sr_ticket_number#84, sr_return_quantity#85, sr_return_amt#86, sr_returned_date_sk#87] -Condition : (((isnotnull(sr_return_amt#86) AND (sr_return_amt#86 > 10000.00)) AND isnotnull(sr_ticket_number#84)) AND isnotnull(sr_item_sk#83)) +Input [5]: [sr_item_sk#76, sr_ticket_number#77, sr_return_quantity#78, sr_return_amt#79, sr_returned_date_sk#80] +Condition : (((isnotnull(sr_return_amt#79) AND (sr_return_amt#79 > 10000.00)) AND isnotnull(sr_ticket_number#77)) AND isnotnull(sr_item_sk#76)) (57) Project -Output [4]: [sr_item_sk#83, sr_ticket_number#84, sr_return_quantity#85, sr_return_amt#86] -Input [5]: [sr_item_sk#83, sr_ticket_number#84, sr_return_quantity#85, sr_return_amt#86, sr_returned_date_sk#87] +Output [4]: [sr_item_sk#76, sr_ticket_number#77, sr_return_quantity#78, sr_return_amt#79] +Input [5]: [sr_item_sk#76, sr_ticket_number#77, sr_return_quantity#78, sr_return_amt#79, sr_returned_date_sk#80] (58) BroadcastHashJoin [codegen id : 17] -Left keys [2]: [ss_ticket_number#77, ss_item_sk#76] -Right keys [2]: [sr_ticket_number#84, sr_item_sk#83] +Left keys [2]: [ss_ticket_number#71, ss_item_sk#70] +Right keys [2]: [sr_ticket_number#77, sr_item_sk#76] Join condition: None (59) Project [codegen id : 17] -Output [6]: [ss_item_sk#76, ss_quantity#78, ss_net_paid#79, ss_sold_date_sk#81, sr_return_quantity#85, sr_return_amt#86] -Input [9]: [ss_item_sk#76, ss_ticket_number#77, ss_quantity#78, ss_net_paid#79, ss_sold_date_sk#81, sr_item_sk#83, sr_ticket_number#84, sr_return_quantity#85, sr_return_amt#86] +Output [6]: [ss_item_sk#70, ss_quantity#72, ss_net_paid#73, ss_sold_date_sk#75, sr_return_quantity#78, sr_return_amt#79] +Input [9]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_sold_date_sk#75, sr_item_sk#76, sr_ticket_number#77, sr_return_quantity#78, sr_return_amt#79] (60) ReusedExchange [Reuses operator id: 82] -Output [1]: [d_date_sk#88] +Output [1]: [d_date_sk#81] (61) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ss_sold_date_sk#81] -Right keys [1]: [d_date_sk#88] +Left keys [1]: [ss_sold_date_sk#75] +Right keys [1]: [d_date_sk#81] Join condition: None (62) Project [codegen id : 17] -Output [5]: [ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#85, sr_return_amt#86] -Input [7]: [ss_item_sk#76, ss_quantity#78, ss_net_paid#79, ss_sold_date_sk#81, sr_return_quantity#85, sr_return_amt#86, d_date_sk#88] +Output [5]: [ss_item_sk#70, ss_quantity#72, ss_net_paid#73, sr_return_quantity#78, sr_return_amt#79] +Input [7]: [ss_item_sk#70, ss_quantity#72, ss_net_paid#73, ss_sold_date_sk#75, sr_return_quantity#78, sr_return_amt#79, d_date_sk#81] (63) HashAggregate [codegen id : 17] -Input [5]: [ss_item_sk#76, ss_quantity#78, ss_net_paid#79, sr_return_quantity#85, sr_return_amt#86] -Keys [1]: [ss_item_sk#76] -Functions [4]: [partial_sum(coalesce(sr_return_quantity#85, 0)), partial_sum(coalesce(ss_quantity#78, 0)), partial_sum(coalesce(cast(sr_return_amt#86 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))] -Aggregate Attributes [6]: [sum#89, sum#90, sum#91, isEmpty#92, sum#93, isEmpty#94] -Results [7]: [ss_item_sk#76, sum#95, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Input [5]: [ss_item_sk#70, ss_quantity#72, ss_net_paid#73, sr_return_quantity#78, sr_return_amt#79] +Keys [1]: [ss_item_sk#70] +Functions [4]: [partial_sum(coalesce(sr_return_quantity#78, 0)), partial_sum(coalesce(ss_quantity#72, 0)), partial_sum(coalesce(cast(sr_return_amt#79 as decimal(12,2)), 0.00)), partial_sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))] +Aggregate Attributes [6]: [sum#82, sum#83, sum#84, isEmpty#85, sum#86, isEmpty#87] +Results [7]: [ss_item_sk#70, sum#88, sum#89, sum#90, isEmpty#91, sum#92, isEmpty#93] (64) Exchange -Input [7]: [ss_item_sk#76, sum#95, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] -Arguments: hashpartitioning(ss_item_sk#76, 5), ENSURE_REQUIREMENTS, [id=#101] +Input [7]: [ss_item_sk#70, sum#88, sum#89, sum#90, isEmpty#91, sum#92, isEmpty#93] +Arguments: hashpartitioning(ss_item_sk#70, 5), ENSURE_REQUIREMENTS, [plan_id=8] (65) HashAggregate [codegen id : 18] -Input [7]: [ss_item_sk#76, sum#95, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] -Keys [1]: [ss_item_sk#76] -Functions [4]: [sum(coalesce(sr_return_quantity#85, 0)), sum(coalesce(ss_quantity#78, 0)), sum(coalesce(cast(sr_return_amt#86 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))] -Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#85, 0))#102, sum(coalesce(ss_quantity#78, 0))#103, sum(coalesce(cast(sr_return_amt#86 as decimal(12,2)), 0.00))#104, sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#105] -Results [3]: [ss_item_sk#76 AS item#106, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#85, 0))#102 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#78, 0))#103 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#107, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#86 as decimal(12,2)), 0.00))#104 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#79 as decimal(12,2)), 0.00))#105 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#108] +Input [7]: [ss_item_sk#70, sum#88, sum#89, sum#90, isEmpty#91, sum#92, isEmpty#93] +Keys [1]: [ss_item_sk#70] +Functions [4]: [sum(coalesce(sr_return_quantity#78, 0)), sum(coalesce(ss_quantity#72, 0)), sum(coalesce(cast(sr_return_amt#79 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))] +Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#78, 0))#94, sum(coalesce(ss_quantity#72, 0))#95, sum(coalesce(cast(sr_return_amt#79 as decimal(12,2)), 0.00))#96, sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97] +Results [3]: [ss_item_sk#70 AS item#98, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#78, 0))#94 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#72, 0))#95 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#99, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#79 as decimal(12,2)), 0.00))#96 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#100] (66) Exchange -Input [3]: [item#106, return_ratio#107, currency_ratio#108] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#109] +Input [3]: [item#98, return_ratio#99, currency_ratio#100] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9] (67) Sort [codegen id : 19] -Input [3]: [item#106, return_ratio#107, currency_ratio#108] -Arguments: [return_ratio#107 ASC NULLS FIRST], false, 0 +Input [3]: [item#98, return_ratio#99, currency_ratio#100] +Arguments: [return_ratio#99 ASC NULLS FIRST], false, 0 (68) Window -Input [3]: [item#106, return_ratio#107, currency_ratio#108] -Arguments: [rank(return_ratio#107) windowspecdefinition(return_ratio#107 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#110], [return_ratio#107 ASC NULLS FIRST] +Input [3]: [item#98, return_ratio#99, currency_ratio#100] +Arguments: [rank(return_ratio#99) windowspecdefinition(return_ratio#99 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS return_rank#101], [return_ratio#99 ASC NULLS FIRST] (69) Sort [codegen id : 20] -Input [4]: [item#106, return_ratio#107, currency_ratio#108, return_rank#110] -Arguments: [currency_ratio#108 ASC NULLS FIRST], false, 0 +Input [4]: [item#98, return_ratio#99, currency_ratio#100, return_rank#101] +Arguments: [currency_ratio#100 ASC NULLS FIRST], false, 0 (70) Window -Input [4]: [item#106, return_ratio#107, currency_ratio#108, return_rank#110] -Arguments: [rank(currency_ratio#108) windowspecdefinition(currency_ratio#108 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#111], [currency_ratio#108 ASC NULLS FIRST] +Input [4]: [item#98, return_ratio#99, currency_ratio#100, return_rank#101] +Arguments: [rank(currency_ratio#100) windowspecdefinition(currency_ratio#100 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS currency_rank#102], [currency_ratio#100 ASC NULLS FIRST] (71) Filter [codegen id : 21] -Input [5]: [item#106, return_ratio#107, currency_ratio#108, return_rank#110, currency_rank#111] -Condition : ((return_rank#110 <= 10) OR (currency_rank#111 <= 10)) +Input [5]: [item#98, return_ratio#99, currency_ratio#100, return_rank#101, currency_rank#102] +Condition : ((return_rank#101 <= 10) OR (currency_rank#102 <= 10)) (72) Project [codegen id : 21] -Output [5]: [store AS channel#112, item#106, return_ratio#107, return_rank#110, currency_rank#111] -Input [5]: [item#106, return_ratio#107, currency_ratio#108, return_rank#110, currency_rank#111] +Output [5]: [store AS channel#103, item#98, return_ratio#99, return_rank#101, currency_rank#102] +Input [5]: [item#98, return_ratio#99, currency_ratio#100, return_rank#101, currency_rank#102] (73) Union (74) HashAggregate [codegen id : 22] -Input [5]: [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] -Keys [5]: [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] +Input [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Keys [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] +Results [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] (75) Exchange -Input [5]: [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] -Arguments: hashpartitioning(channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37, 5), ENSURE_REQUIREMENTS, [id=#113] +Input [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Arguments: hashpartitioning(channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34, 5), ENSURE_REQUIREMENTS, [plan_id=10] (76) HashAggregate [codegen id : 23] -Input [5]: [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] -Keys [5]: [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] +Input [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Keys [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] +Results [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] (77) TakeOrderedAndProject -Input [5]: [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] -Arguments: 100, [channel#38 ASC NULLS FIRST, return_rank#36 ASC NULLS FIRST, currency_rank#37 ASC NULLS FIRST, item#32 ASC NULLS FIRST], [channel#38, item#32, return_ratio#33, return_rank#36, currency_rank#37] +Input [5]: [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] +Arguments: 100, [channel#35 ASC NULLS FIRST, return_rank#33 ASC NULLS FIRST, currency_rank#34 ASC NULLS FIRST, item#30 ASC NULLS FIRST], [channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34] ===== Subqueries ===== @@ -437,29 +437,29 @@ BroadcastExchange (82) (78) Scan parquet default.date_dim -Output [3]: [d_date_sk#14, d_year#114, d_moy#115] +Output [3]: [d_date_sk#13, d_year#104, d_moy#105] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,12), IsNotNull(d_date_sk)] ReadSchema: struct (79) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#14, d_year#114, d_moy#115] +Input [3]: [d_date_sk#13, d_year#104, d_moy#105] (80) Filter [codegen id : 1] -Input [3]: [d_date_sk#14, d_year#114, d_moy#115] -Condition : ((((isnotnull(d_year#114) AND isnotnull(d_moy#115)) AND (d_year#114 = 2001)) AND (d_moy#115 = 12)) AND isnotnull(d_date_sk#14)) +Input [3]: [d_date_sk#13, d_year#104, d_moy#105] +Condition : ((((isnotnull(d_year#104) AND isnotnull(d_moy#105)) AND (d_year#104 = 2001)) AND (d_moy#105 = 12)) AND isnotnull(d_date_sk#13)) (81) Project [codegen id : 1] -Output [1]: [d_date_sk#14] -Input [3]: [d_date_sk#14, d_year#114, d_moy#115] +Output [1]: [d_date_sk#13] +Input [3]: [d_date_sk#13, d_year#104, d_moy#105] (82) BroadcastExchange -Input [1]: [d_date_sk#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#116] +Input [1]: [d_date_sk#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11] -Subquery:2 Hosting operator id = 25 Hosting Expression = cs_sold_date_sk#44 IN dynamicpruning#7 +Subquery:2 Hosting operator id = 25 Hosting Expression = cs_sold_date_sk#41 IN dynamicpruning#7 -Subquery:3 Hosting operator id = 49 Hosting Expression = ss_sold_date_sk#81 IN dynamicpruning#7 +Subquery:3 Hosting operator id = 49 Hosting Expression = ss_sold_date_sk#75 IN dynamicpruning#7 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/explain.txt index 64111eef627d2..89749badaad68 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/explain.txt @@ -107,286 +107,286 @@ Results [3]: [ws_item_sk#1, d_date#6, sum#8] (8) Exchange Input [3]: [ws_item_sk#1, d_date#6, sum#8] -Arguments: hashpartitioning(ws_item_sk#1, d_date#6, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ws_item_sk#1, d_date#6, 5), ENSURE_REQUIREMENTS, [plan_id=1] (9) HashAggregate [codegen id : 3] Input [3]: [ws_item_sk#1, d_date#6, sum#8] Keys [2]: [ws_item_sk#1, d_date#6] Functions [1]: [sum(UnscaledValue(ws_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#2))#10] -Results [4]: [ws_item_sk#1 AS item_sk#11, d_date#6, MakeDecimal(sum(UnscaledValue(ws_sales_price#2))#10,17,2) AS sumws#12, ws_item_sk#1] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#2))#9] +Results [4]: [ws_item_sk#1 AS item_sk#10, d_date#6, MakeDecimal(sum(UnscaledValue(ws_sales_price#2))#9,17,2) AS sumws#11, ws_item_sk#1] (10) Exchange -Input [4]: [item_sk#11, d_date#6, sumws#12, ws_item_sk#1] -Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [4]: [item_sk#10, d_date#6, sumws#11, ws_item_sk#1] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [4]: [item_sk#11, d_date#6, sumws#12, ws_item_sk#1] +Input [4]: [item_sk#10, d_date#6, sumws#11, ws_item_sk#1] Arguments: [ws_item_sk#1 ASC NULLS FIRST, d_date#6 ASC NULLS FIRST], false, 0 (12) Window -Input [4]: [item_sk#11, d_date#6, sumws#12, ws_item_sk#1] -Arguments: [row_number() windowspecdefinition(ws_item_sk#1, d_date#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#14], [ws_item_sk#1], [d_date#6 ASC NULLS FIRST] +Input [4]: [item_sk#10, d_date#6, sumws#11, ws_item_sk#1] +Arguments: [row_number() windowspecdefinition(ws_item_sk#1, d_date#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#12], [ws_item_sk#1], [d_date#6 ASC NULLS FIRST] (13) Project [codegen id : 5] -Output [4]: [item_sk#11, d_date#6, sumws#12, rk#14] -Input [5]: [item_sk#11, d_date#6, sumws#12, ws_item_sk#1, rk#14] +Output [4]: [item_sk#10, d_date#6, sumws#11, rk#12] +Input [5]: [item_sk#10, d_date#6, sumws#11, ws_item_sk#1, rk#12] (14) Exchange -Input [4]: [item_sk#11, d_date#6, sumws#12, rk#14] -Arguments: hashpartitioning(item_sk#11, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [4]: [item_sk#10, d_date#6, sumws#11, rk#12] +Arguments: hashpartitioning(item_sk#10, 5), ENSURE_REQUIREMENTS, [plan_id=3] (15) Sort [codegen id : 6] -Input [4]: [item_sk#11, d_date#6, sumws#12, rk#14] -Arguments: [item_sk#11 ASC NULLS FIRST], false, 0 +Input [4]: [item_sk#10, d_date#6, sumws#11, rk#12] +Arguments: [item_sk#10 ASC NULLS FIRST], false, 0 (16) ReusedExchange [Reuses operator id: 10] -Output [4]: [item_sk#11, d_date#16, sumws#12, ws_item_sk#17] +Output [4]: [item_sk#10, d_date#13, sumws#11, ws_item_sk#14] (17) Sort [codegen id : 10] -Input [4]: [item_sk#11, d_date#16, sumws#12, ws_item_sk#17] -Arguments: [ws_item_sk#17 ASC NULLS FIRST, d_date#16 ASC NULLS FIRST], false, 0 +Input [4]: [item_sk#10, d_date#13, sumws#11, ws_item_sk#14] +Arguments: [ws_item_sk#14 ASC NULLS FIRST, d_date#13 ASC NULLS FIRST], false, 0 (18) Window -Input [4]: [item_sk#11, d_date#16, sumws#12, ws_item_sk#17] -Arguments: [row_number() windowspecdefinition(ws_item_sk#17, d_date#16 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#18], [ws_item_sk#17], [d_date#16 ASC NULLS FIRST] +Input [4]: [item_sk#10, d_date#13, sumws#11, ws_item_sk#14] +Arguments: [row_number() windowspecdefinition(ws_item_sk#14, d_date#13 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#15], [ws_item_sk#14], [d_date#13 ASC NULLS FIRST] (19) Project [codegen id : 11] -Output [3]: [item_sk#11 AS item_sk#19, sumws#12 AS sumws#20, rk#18] -Input [5]: [item_sk#11, d_date#16, sumws#12, ws_item_sk#17, rk#18] +Output [3]: [item_sk#10 AS item_sk#16, sumws#11 AS sumws#17, rk#15] +Input [5]: [item_sk#10, d_date#13, sumws#11, ws_item_sk#14, rk#15] (20) Exchange -Input [3]: [item_sk#19, sumws#20, rk#18] -Arguments: hashpartitioning(item_sk#19, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [3]: [item_sk#16, sumws#17, rk#15] +Arguments: hashpartitioning(item_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=4] (21) Sort [codegen id : 12] -Input [3]: [item_sk#19, sumws#20, rk#18] -Arguments: [item_sk#19 ASC NULLS FIRST], false, 0 +Input [3]: [item_sk#16, sumws#17, rk#15] +Arguments: [item_sk#16 ASC NULLS FIRST], false, 0 (22) SortMergeJoin [codegen id : 13] -Left keys [1]: [item_sk#11] -Right keys [1]: [item_sk#19] -Join condition: (rk#14 >= rk#18) +Left keys [1]: [item_sk#10] +Right keys [1]: [item_sk#16] +Join condition: (rk#12 >= rk#15) (23) Project [codegen id : 13] -Output [4]: [item_sk#11, d_date#6, sumws#12, sumws#20] -Input [7]: [item_sk#11, d_date#6, sumws#12, rk#14, item_sk#19, sumws#20, rk#18] +Output [4]: [item_sk#10, d_date#6, sumws#11, sumws#17] +Input [7]: [item_sk#10, d_date#6, sumws#11, rk#12, item_sk#16, sumws#17, rk#15] (24) HashAggregate [codegen id : 13] -Input [4]: [item_sk#11, d_date#6, sumws#12, sumws#20] -Keys [3]: [item_sk#11, d_date#6, sumws#12] -Functions [1]: [partial_sum(sumws#20)] -Aggregate Attributes [2]: [sum#22, isEmpty#23] -Results [5]: [item_sk#11, d_date#6, sumws#12, sum#24, isEmpty#25] +Input [4]: [item_sk#10, d_date#6, sumws#11, sumws#17] +Keys [3]: [item_sk#10, d_date#6, sumws#11] +Functions [1]: [partial_sum(sumws#17)] +Aggregate Attributes [2]: [sum#18, isEmpty#19] +Results [5]: [item_sk#10, d_date#6, sumws#11, sum#20, isEmpty#21] (25) HashAggregate [codegen id : 13] -Input [5]: [item_sk#11, d_date#6, sumws#12, sum#24, isEmpty#25] -Keys [3]: [item_sk#11, d_date#6, sumws#12] -Functions [1]: [sum(sumws#20)] -Aggregate Attributes [1]: [sum(sumws#20)#26] -Results [3]: [item_sk#11, d_date#6, sum(sumws#20)#26 AS cume_sales#27] +Input [5]: [item_sk#10, d_date#6, sumws#11, sum#20, isEmpty#21] +Keys [3]: [item_sk#10, d_date#6, sumws#11] +Functions [1]: [sum(sumws#17)] +Aggregate Attributes [1]: [sum(sumws#17)#22] +Results [3]: [item_sk#10, d_date#6, sum(sumws#17)#22 AS cume_sales#23] (26) Exchange -Input [3]: [item_sk#11, d_date#6, cume_sales#27] -Arguments: hashpartitioning(item_sk#11, d_date#6, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [3]: [item_sk#10, d_date#6, cume_sales#23] +Arguments: hashpartitioning(item_sk#10, d_date#6, 5), ENSURE_REQUIREMENTS, [plan_id=5] (27) Sort [codegen id : 14] -Input [3]: [item_sk#11, d_date#6, cume_sales#27] -Arguments: [item_sk#11 ASC NULLS FIRST, d_date#6 ASC NULLS FIRST], false, 0 +Input [3]: [item_sk#10, d_date#6, cume_sales#23] +Arguments: [item_sk#10 ASC NULLS FIRST, d_date#6 ASC NULLS FIRST], false, 0 (28) Scan parquet default.store_sales -Output [3]: [ss_item_sk#29, ss_sales_price#30, ss_sold_date_sk#31] +Output [3]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#31), dynamicpruningexpression(ss_sold_date_sk#31 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(ss_sold_date_sk#26), dynamicpruningexpression(ss_sold_date_sk#26 IN dynamicpruning#4)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (29) ColumnarToRow [codegen id : 16] -Input [3]: [ss_item_sk#29, ss_sales_price#30, ss_sold_date_sk#31] +Input [3]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26] (30) Filter [codegen id : 16] -Input [3]: [ss_item_sk#29, ss_sales_price#30, ss_sold_date_sk#31] -Condition : isnotnull(ss_item_sk#29) +Input [3]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26] +Condition : isnotnull(ss_item_sk#24) (31) ReusedExchange [Reuses operator id: 75] -Output [2]: [d_date_sk#32, d_date#33] +Output [2]: [d_date_sk#27, d_date#28] (32) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [ss_sold_date_sk#31] -Right keys [1]: [d_date_sk#32] +Left keys [1]: [ss_sold_date_sk#26] +Right keys [1]: [d_date_sk#27] Join condition: None (33) Project [codegen id : 16] -Output [3]: [ss_item_sk#29, ss_sales_price#30, d_date#33] -Input [5]: [ss_item_sk#29, ss_sales_price#30, ss_sold_date_sk#31, d_date_sk#32, d_date#33] +Output [3]: [ss_item_sk#24, ss_sales_price#25, d_date#28] +Input [5]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26, d_date_sk#27, d_date#28] (34) HashAggregate [codegen id : 16] -Input [3]: [ss_item_sk#29, ss_sales_price#30, d_date#33] -Keys [2]: [ss_item_sk#29, d_date#33] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#30))] -Aggregate Attributes [1]: [sum#34] -Results [3]: [ss_item_sk#29, d_date#33, sum#35] +Input [3]: [ss_item_sk#24, ss_sales_price#25, d_date#28] +Keys [2]: [ss_item_sk#24, d_date#28] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#25))] +Aggregate Attributes [1]: [sum#29] +Results [3]: [ss_item_sk#24, d_date#28, sum#30] (35) Exchange -Input [3]: [ss_item_sk#29, d_date#33, sum#35] -Arguments: hashpartitioning(ss_item_sk#29, d_date#33, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [3]: [ss_item_sk#24, d_date#28, sum#30] +Arguments: hashpartitioning(ss_item_sk#24, d_date#28, 5), ENSURE_REQUIREMENTS, [plan_id=6] (36) HashAggregate [codegen id : 17] -Input [3]: [ss_item_sk#29, d_date#33, sum#35] -Keys [2]: [ss_item_sk#29, d_date#33] -Functions [1]: [sum(UnscaledValue(ss_sales_price#30))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#30))#37] -Results [4]: [ss_item_sk#29 AS item_sk#38, d_date#33, MakeDecimal(sum(UnscaledValue(ss_sales_price#30))#37,17,2) AS sumss#39, ss_item_sk#29] +Input [3]: [ss_item_sk#24, d_date#28, sum#30] +Keys [2]: [ss_item_sk#24, d_date#28] +Functions [1]: [sum(UnscaledValue(ss_sales_price#25))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#25))#31] +Results [4]: [ss_item_sk#24 AS item_sk#32, d_date#28, MakeDecimal(sum(UnscaledValue(ss_sales_price#25))#31,17,2) AS sumss#33, ss_item_sk#24] (37) Exchange -Input [4]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#29] -Arguments: hashpartitioning(ss_item_sk#29, 5), ENSURE_REQUIREMENTS, [id=#40] +Input [4]: [item_sk#32, d_date#28, sumss#33, ss_item_sk#24] +Arguments: hashpartitioning(ss_item_sk#24, 5), ENSURE_REQUIREMENTS, [plan_id=7] (38) Sort [codegen id : 18] -Input [4]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#29] -Arguments: [ss_item_sk#29 ASC NULLS FIRST, d_date#33 ASC NULLS FIRST], false, 0 +Input [4]: [item_sk#32, d_date#28, sumss#33, ss_item_sk#24] +Arguments: [ss_item_sk#24 ASC NULLS FIRST, d_date#28 ASC NULLS FIRST], false, 0 (39) Window -Input [4]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#29] -Arguments: [row_number() windowspecdefinition(ss_item_sk#29, d_date#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#41], [ss_item_sk#29], [d_date#33 ASC NULLS FIRST] +Input [4]: [item_sk#32, d_date#28, sumss#33, ss_item_sk#24] +Arguments: [row_number() windowspecdefinition(ss_item_sk#24, d_date#28 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#34], [ss_item_sk#24], [d_date#28 ASC NULLS FIRST] (40) Project [codegen id : 19] -Output [4]: [item_sk#38, d_date#33, sumss#39, rk#41] -Input [5]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#29, rk#41] +Output [4]: [item_sk#32, d_date#28, sumss#33, rk#34] +Input [5]: [item_sk#32, d_date#28, sumss#33, ss_item_sk#24, rk#34] (41) Exchange -Input [4]: [item_sk#38, d_date#33, sumss#39, rk#41] -Arguments: hashpartitioning(item_sk#38, 5), ENSURE_REQUIREMENTS, [id=#42] +Input [4]: [item_sk#32, d_date#28, sumss#33, rk#34] +Arguments: hashpartitioning(item_sk#32, 5), ENSURE_REQUIREMENTS, [plan_id=8] (42) Sort [codegen id : 20] -Input [4]: [item_sk#38, d_date#33, sumss#39, rk#41] -Arguments: [item_sk#38 ASC NULLS FIRST], false, 0 +Input [4]: [item_sk#32, d_date#28, sumss#33, rk#34] +Arguments: [item_sk#32 ASC NULLS FIRST], false, 0 (43) ReusedExchange [Reuses operator id: 37] -Output [4]: [item_sk#38, d_date#43, sumss#39, ss_item_sk#44] +Output [4]: [item_sk#32, d_date#35, sumss#33, ss_item_sk#36] (44) Sort [codegen id : 24] -Input [4]: [item_sk#38, d_date#43, sumss#39, ss_item_sk#44] -Arguments: [ss_item_sk#44 ASC NULLS FIRST, d_date#43 ASC NULLS FIRST], false, 0 +Input [4]: [item_sk#32, d_date#35, sumss#33, ss_item_sk#36] +Arguments: [ss_item_sk#36 ASC NULLS FIRST, d_date#35 ASC NULLS FIRST], false, 0 (45) Window -Input [4]: [item_sk#38, d_date#43, sumss#39, ss_item_sk#44] -Arguments: [row_number() windowspecdefinition(ss_item_sk#44, d_date#43 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#45], [ss_item_sk#44], [d_date#43 ASC NULLS FIRST] +Input [4]: [item_sk#32, d_date#35, sumss#33, ss_item_sk#36] +Arguments: [row_number() windowspecdefinition(ss_item_sk#36, d_date#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#37], [ss_item_sk#36], [d_date#35 ASC NULLS FIRST] (46) Project [codegen id : 25] -Output [3]: [item_sk#38 AS item_sk#46, sumss#39 AS sumss#47, rk#45] -Input [5]: [item_sk#38, d_date#43, sumss#39, ss_item_sk#44, rk#45] +Output [3]: [item_sk#32 AS item_sk#38, sumss#33 AS sumss#39, rk#37] +Input [5]: [item_sk#32, d_date#35, sumss#33, ss_item_sk#36, rk#37] (47) Exchange -Input [3]: [item_sk#46, sumss#47, rk#45] -Arguments: hashpartitioning(item_sk#46, 5), ENSURE_REQUIREMENTS, [id=#48] +Input [3]: [item_sk#38, sumss#39, rk#37] +Arguments: hashpartitioning(item_sk#38, 5), ENSURE_REQUIREMENTS, [plan_id=9] (48) Sort [codegen id : 26] -Input [3]: [item_sk#46, sumss#47, rk#45] -Arguments: [item_sk#46 ASC NULLS FIRST], false, 0 +Input [3]: [item_sk#38, sumss#39, rk#37] +Arguments: [item_sk#38 ASC NULLS FIRST], false, 0 (49) SortMergeJoin [codegen id : 27] -Left keys [1]: [item_sk#38] -Right keys [1]: [item_sk#46] -Join condition: (rk#41 >= rk#45) +Left keys [1]: [item_sk#32] +Right keys [1]: [item_sk#38] +Join condition: (rk#34 >= rk#37) (50) Project [codegen id : 27] -Output [4]: [item_sk#38, d_date#33, sumss#39, sumss#47] -Input [7]: [item_sk#38, d_date#33, sumss#39, rk#41, item_sk#46, sumss#47, rk#45] +Output [4]: [item_sk#32, d_date#28, sumss#33, sumss#39] +Input [7]: [item_sk#32, d_date#28, sumss#33, rk#34, item_sk#38, sumss#39, rk#37] (51) HashAggregate [codegen id : 27] -Input [4]: [item_sk#38, d_date#33, sumss#39, sumss#47] -Keys [3]: [item_sk#38, d_date#33, sumss#39] -Functions [1]: [partial_sum(sumss#47)] -Aggregate Attributes [2]: [sum#49, isEmpty#50] -Results [5]: [item_sk#38, d_date#33, sumss#39, sum#51, isEmpty#52] +Input [4]: [item_sk#32, d_date#28, sumss#33, sumss#39] +Keys [3]: [item_sk#32, d_date#28, sumss#33] +Functions [1]: [partial_sum(sumss#39)] +Aggregate Attributes [2]: [sum#40, isEmpty#41] +Results [5]: [item_sk#32, d_date#28, sumss#33, sum#42, isEmpty#43] (52) HashAggregate [codegen id : 27] -Input [5]: [item_sk#38, d_date#33, sumss#39, sum#51, isEmpty#52] -Keys [3]: [item_sk#38, d_date#33, sumss#39] -Functions [1]: [sum(sumss#47)] -Aggregate Attributes [1]: [sum(sumss#47)#53] -Results [3]: [item_sk#38, d_date#33, sum(sumss#47)#53 AS cume_sales#54] +Input [5]: [item_sk#32, d_date#28, sumss#33, sum#42, isEmpty#43] +Keys [3]: [item_sk#32, d_date#28, sumss#33] +Functions [1]: [sum(sumss#39)] +Aggregate Attributes [1]: [sum(sumss#39)#44] +Results [3]: [item_sk#32, d_date#28, sum(sumss#39)#44 AS cume_sales#45] (53) Exchange -Input [3]: [item_sk#38, d_date#33, cume_sales#54] -Arguments: hashpartitioning(item_sk#38, d_date#33, 5), ENSURE_REQUIREMENTS, [id=#55] +Input [3]: [item_sk#32, d_date#28, cume_sales#45] +Arguments: hashpartitioning(item_sk#32, d_date#28, 5), ENSURE_REQUIREMENTS, [plan_id=10] (54) Sort [codegen id : 28] -Input [3]: [item_sk#38, d_date#33, cume_sales#54] -Arguments: [item_sk#38 ASC NULLS FIRST, d_date#33 ASC NULLS FIRST], false, 0 +Input [3]: [item_sk#32, d_date#28, cume_sales#45] +Arguments: [item_sk#32 ASC NULLS FIRST, d_date#28 ASC NULLS FIRST], false, 0 (55) SortMergeJoin [codegen id : 29] -Left keys [2]: [item_sk#11, d_date#6] -Right keys [2]: [item_sk#38, d_date#33] +Left keys [2]: [item_sk#10, d_date#6] +Right keys [2]: [item_sk#32, d_date#28] Join condition: None (56) Filter [codegen id : 29] -Input [6]: [item_sk#11, d_date#6, cume_sales#27, item_sk#38, d_date#33, cume_sales#54] -Condition : isnotnull(CASE WHEN isnotnull(item_sk#11) THEN item_sk#11 ELSE item_sk#38 END) +Input [6]: [item_sk#10, d_date#6, cume_sales#23, item_sk#32, d_date#28, cume_sales#45] +Condition : isnotnull(CASE WHEN isnotnull(item_sk#10) THEN item_sk#10 ELSE item_sk#32 END) (57) Project [codegen id : 29] -Output [4]: [CASE WHEN isnotnull(item_sk#11) THEN item_sk#11 ELSE item_sk#38 END AS item_sk#56, CASE WHEN isnotnull(d_date#6) THEN d_date#6 ELSE d_date#33 END AS d_date#57, cume_sales#27 AS web_sales#58, cume_sales#54 AS store_sales#59] -Input [6]: [item_sk#11, d_date#6, cume_sales#27, item_sk#38, d_date#33, cume_sales#54] +Output [4]: [CASE WHEN isnotnull(item_sk#10) THEN item_sk#10 ELSE item_sk#32 END AS item_sk#46, CASE WHEN isnotnull(d_date#6) THEN d_date#6 ELSE d_date#28 END AS d_date#47, cume_sales#23 AS web_sales#48, cume_sales#45 AS store_sales#49] +Input [6]: [item_sk#10, d_date#6, cume_sales#23, item_sk#32, d_date#28, cume_sales#45] (58) Exchange -Input [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59] -Arguments: hashpartitioning(item_sk#56, 5), ENSURE_REQUIREMENTS, [id=#60] +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: hashpartitioning(item_sk#46, 5), ENSURE_REQUIREMENTS, [plan_id=11] (59) Sort [codegen id : 30] -Input [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59] -Arguments: [item_sk#56 ASC NULLS FIRST, d_date#57 ASC NULLS FIRST], false, 0 +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: [item_sk#46 ASC NULLS FIRST, d_date#47 ASC NULLS FIRST], false, 0 (60) Window -Input [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59] -Arguments: [row_number() windowspecdefinition(item_sk#56, d_date#57 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#61], [item_sk#56], [d_date#57 ASC NULLS FIRST] +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: [row_number() windowspecdefinition(item_sk#46, d_date#47 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#50], [item_sk#46], [d_date#47 ASC NULLS FIRST] (61) ReusedExchange [Reuses operator id: 58] -Output [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59] +Output [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] (62) Sort [codegen id : 60] -Input [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59] -Arguments: [item_sk#56 ASC NULLS FIRST, d_date#57 ASC NULLS FIRST], false, 0 +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: [item_sk#46 ASC NULLS FIRST, d_date#47 ASC NULLS FIRST], false, 0 (63) Window -Input [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59] -Arguments: [row_number() windowspecdefinition(item_sk#56, d_date#57 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#62], [item_sk#56], [d_date#57 ASC NULLS FIRST] +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: [row_number() windowspecdefinition(item_sk#46, d_date#47 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#51], [item_sk#46], [d_date#47 ASC NULLS FIRST] (64) Project [codegen id : 61] -Output [4]: [item_sk#56 AS item_sk#63, web_sales#58 AS web_sales#64, store_sales#59 AS store_sales#65, rk#62] -Input [5]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, rk#62] +Output [4]: [item_sk#46 AS item_sk#52, web_sales#48 AS web_sales#53, store_sales#49 AS store_sales#54, rk#51] +Input [5]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, rk#51] (65) SortMergeJoin [codegen id : 62] -Left keys [1]: [item_sk#56] -Right keys [1]: [item_sk#63] -Join condition: (rk#61 >= rk#62) +Left keys [1]: [item_sk#46] +Right keys [1]: [item_sk#52] +Join condition: (rk#50 >= rk#51) (66) Project [codegen id : 62] -Output [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, web_sales#64, store_sales#65] -Input [9]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, rk#61, item_sk#63, web_sales#64, store_sales#65, rk#62] +Output [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_sales#53, store_sales#54] +Input [9]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, rk#50, item_sk#52, web_sales#53, store_sales#54, rk#51] (67) HashAggregate [codegen id : 62] -Input [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, web_sales#64, store_sales#65] -Keys [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59] -Functions [2]: [partial_max(web_sales#64), partial_max(store_sales#65)] -Aggregate Attributes [2]: [max#66, max#67] -Results [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, max#68, max#69] +Input [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_sales#53, store_sales#54] +Keys [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Functions [2]: [partial_max(web_sales#53), partial_max(store_sales#54)] +Aggregate Attributes [2]: [max#55, max#56] +Results [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, max#57, max#58] (68) HashAggregate [codegen id : 62] -Input [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, max#68, max#69] -Keys [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59] -Functions [2]: [max(web_sales#64), max(store_sales#65)] -Aggregate Attributes [2]: [max(web_sales#64)#70, max(store_sales#65)#71] -Results [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, max(web_sales#64)#70 AS web_cumulative#72, max(store_sales#65)#71 AS store_cumulative#73] +Input [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, max#57, max#58] +Keys [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Functions [2]: [max(web_sales#53), max(store_sales#54)] +Aggregate Attributes [2]: [max(web_sales#53)#59, max(store_sales#54)#60] +Results [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, max(web_sales#53)#59 AS web_cumulative#61, max(store_sales#54)#60 AS store_cumulative#62] (69) Filter [codegen id : 62] -Input [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, web_cumulative#72, store_cumulative#73] -Condition : ((isnotnull(web_cumulative#72) AND isnotnull(store_cumulative#73)) AND (web_cumulative#72 > store_cumulative#73)) +Input [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_cumulative#61, store_cumulative#62] +Condition : ((isnotnull(web_cumulative#61) AND isnotnull(store_cumulative#62)) AND (web_cumulative#61 > store_cumulative#62)) (70) TakeOrderedAndProject -Input [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, web_cumulative#72, store_cumulative#73] -Arguments: 100, [item_sk#56 ASC NULLS FIRST, d_date#57 ASC NULLS FIRST], [item_sk#56, d_date#57, web_sales#58, store_sales#59, web_cumulative#72, store_cumulative#73] +Input [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_cumulative#61, store_cumulative#62] +Arguments: 100, [item_sk#46 ASC NULLS FIRST, d_date#47 ASC NULLS FIRST], [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_cumulative#61, store_cumulative#62] ===== Subqueries ===== @@ -399,27 +399,27 @@ BroadcastExchange (75) (71) Scan parquet default.date_dim -Output [3]: [d_date_sk#5, d_date#6, d_month_seq#74] +Output [3]: [d_date_sk#5, d_date#6, d_month_seq#63] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] ReadSchema: struct (72) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#5, d_date#6, d_month_seq#74] +Input [3]: [d_date_sk#5, d_date#6, d_month_seq#63] (73) Filter [codegen id : 1] -Input [3]: [d_date_sk#5, d_date#6, d_month_seq#74] -Condition : (((isnotnull(d_month_seq#74) AND (d_month_seq#74 >= 1212)) AND (d_month_seq#74 <= 1223)) AND isnotnull(d_date_sk#5)) +Input [3]: [d_date_sk#5, d_date#6, d_month_seq#63] +Condition : (((isnotnull(d_month_seq#63) AND (d_month_seq#63 >= 1212)) AND (d_month_seq#63 <= 1223)) AND isnotnull(d_date_sk#5)) (74) Project [codegen id : 1] Output [2]: [d_date_sk#5, d_date#6] -Input [3]: [d_date_sk#5, d_date#6, d_month_seq#74] +Input [3]: [d_date_sk#5, d_date#6, d_month_seq#63] (75) BroadcastExchange Input [2]: [d_date_sk#5, d_date#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#75] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] -Subquery:2 Hosting operator id = 28 Hosting Expression = ss_sold_date_sk#31 IN dynamicpruning#4 +Subquery:2 Hosting operator id = 28 Hosting Expression = ss_sold_date_sk#26 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/explain.txt index 9edb377b305c5..6b821b68cbac9 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/explain.txt @@ -104,274 +104,274 @@ Results [3]: [ws_item_sk#1, d_date#6, sum#8] (8) Exchange Input [3]: [ws_item_sk#1, d_date#6, sum#8] -Arguments: hashpartitioning(ws_item_sk#1, d_date#6, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ws_item_sk#1, d_date#6, 5), ENSURE_REQUIREMENTS, [plan_id=1] (9) HashAggregate [codegen id : 3] Input [3]: [ws_item_sk#1, d_date#6, sum#8] Keys [2]: [ws_item_sk#1, d_date#6] Functions [1]: [sum(UnscaledValue(ws_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#2))#10] -Results [4]: [ws_item_sk#1 AS item_sk#11, d_date#6, MakeDecimal(sum(UnscaledValue(ws_sales_price#2))#10,17,2) AS sumws#12, ws_item_sk#1] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_sales_price#2))#9] +Results [4]: [ws_item_sk#1 AS item_sk#10, d_date#6, MakeDecimal(sum(UnscaledValue(ws_sales_price#2))#9,17,2) AS sumws#11, ws_item_sk#1] (10) Exchange -Input [4]: [item_sk#11, d_date#6, sumws#12, ws_item_sk#1] -Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [4]: [item_sk#10, d_date#6, sumws#11, ws_item_sk#1] +Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [4]: [item_sk#11, d_date#6, sumws#12, ws_item_sk#1] +Input [4]: [item_sk#10, d_date#6, sumws#11, ws_item_sk#1] Arguments: [ws_item_sk#1 ASC NULLS FIRST, d_date#6 ASC NULLS FIRST], false, 0 (12) Window -Input [4]: [item_sk#11, d_date#6, sumws#12, ws_item_sk#1] -Arguments: [row_number() windowspecdefinition(ws_item_sk#1, d_date#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#14], [ws_item_sk#1], [d_date#6 ASC NULLS FIRST] +Input [4]: [item_sk#10, d_date#6, sumws#11, ws_item_sk#1] +Arguments: [row_number() windowspecdefinition(ws_item_sk#1, d_date#6 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#12], [ws_item_sk#1], [d_date#6 ASC NULLS FIRST] (13) Project [codegen id : 10] -Output [4]: [item_sk#11, d_date#6, sumws#12, rk#14] -Input [5]: [item_sk#11, d_date#6, sumws#12, ws_item_sk#1, rk#14] +Output [4]: [item_sk#10, d_date#6, sumws#11, rk#12] +Input [5]: [item_sk#10, d_date#6, sumws#11, ws_item_sk#1, rk#12] (14) ReusedExchange [Reuses operator id: 10] -Output [4]: [item_sk#11, d_date#15, sumws#12, ws_item_sk#16] +Output [4]: [item_sk#10, d_date#13, sumws#11, ws_item_sk#14] (15) Sort [codegen id : 8] -Input [4]: [item_sk#11, d_date#15, sumws#12, ws_item_sk#16] -Arguments: [ws_item_sk#16 ASC NULLS FIRST, d_date#15 ASC NULLS FIRST], false, 0 +Input [4]: [item_sk#10, d_date#13, sumws#11, ws_item_sk#14] +Arguments: [ws_item_sk#14 ASC NULLS FIRST, d_date#13 ASC NULLS FIRST], false, 0 (16) Window -Input [4]: [item_sk#11, d_date#15, sumws#12, ws_item_sk#16] -Arguments: [row_number() windowspecdefinition(ws_item_sk#16, d_date#15 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#17], [ws_item_sk#16], [d_date#15 ASC NULLS FIRST] +Input [4]: [item_sk#10, d_date#13, sumws#11, ws_item_sk#14] +Arguments: [row_number() windowspecdefinition(ws_item_sk#14, d_date#13 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#15], [ws_item_sk#14], [d_date#13 ASC NULLS FIRST] (17) Project [codegen id : 9] -Output [3]: [item_sk#11 AS item_sk#18, sumws#12 AS sumws#19, rk#17] -Input [5]: [item_sk#11, d_date#15, sumws#12, ws_item_sk#16, rk#17] +Output [3]: [item_sk#10 AS item_sk#16, sumws#11 AS sumws#17, rk#15] +Input [5]: [item_sk#10, d_date#13, sumws#11, ws_item_sk#14, rk#15] (18) BroadcastExchange -Input [3]: [item_sk#18, sumws#19, rk#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#20] +Input [3]: [item_sk#16, sumws#17, rk#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (19) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [item_sk#11] -Right keys [1]: [item_sk#18] -Join condition: (rk#14 >= rk#17) +Left keys [1]: [item_sk#10] +Right keys [1]: [item_sk#16] +Join condition: (rk#12 >= rk#15) (20) Project [codegen id : 10] -Output [4]: [item_sk#11, d_date#6, sumws#12, sumws#19] -Input [7]: [item_sk#11, d_date#6, sumws#12, rk#14, item_sk#18, sumws#19, rk#17] +Output [4]: [item_sk#10, d_date#6, sumws#11, sumws#17] +Input [7]: [item_sk#10, d_date#6, sumws#11, rk#12, item_sk#16, sumws#17, rk#15] (21) HashAggregate [codegen id : 10] -Input [4]: [item_sk#11, d_date#6, sumws#12, sumws#19] -Keys [3]: [item_sk#11, d_date#6, sumws#12] -Functions [1]: [partial_sum(sumws#19)] -Aggregate Attributes [2]: [sum#21, isEmpty#22] -Results [5]: [item_sk#11, d_date#6, sumws#12, sum#23, isEmpty#24] +Input [4]: [item_sk#10, d_date#6, sumws#11, sumws#17] +Keys [3]: [item_sk#10, d_date#6, sumws#11] +Functions [1]: [partial_sum(sumws#17)] +Aggregate Attributes [2]: [sum#18, isEmpty#19] +Results [5]: [item_sk#10, d_date#6, sumws#11, sum#20, isEmpty#21] (22) Exchange -Input [5]: [item_sk#11, d_date#6, sumws#12, sum#23, isEmpty#24] -Arguments: hashpartitioning(item_sk#11, d_date#6, sumws#12, 5), ENSURE_REQUIREMENTS, [id=#25] +Input [5]: [item_sk#10, d_date#6, sumws#11, sum#20, isEmpty#21] +Arguments: hashpartitioning(item_sk#10, d_date#6, sumws#11, 5), ENSURE_REQUIREMENTS, [plan_id=4] (23) HashAggregate [codegen id : 11] -Input [5]: [item_sk#11, d_date#6, sumws#12, sum#23, isEmpty#24] -Keys [3]: [item_sk#11, d_date#6, sumws#12] -Functions [1]: [sum(sumws#19)] -Aggregate Attributes [1]: [sum(sumws#19)#26] -Results [3]: [item_sk#11, d_date#6, sum(sumws#19)#26 AS cume_sales#27] +Input [5]: [item_sk#10, d_date#6, sumws#11, sum#20, isEmpty#21] +Keys [3]: [item_sk#10, d_date#6, sumws#11] +Functions [1]: [sum(sumws#17)] +Aggregate Attributes [1]: [sum(sumws#17)#22] +Results [3]: [item_sk#10, d_date#6, sum(sumws#17)#22 AS cume_sales#23] (24) Exchange -Input [3]: [item_sk#11, d_date#6, cume_sales#27] -Arguments: hashpartitioning(item_sk#11, d_date#6, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [3]: [item_sk#10, d_date#6, cume_sales#23] +Arguments: hashpartitioning(item_sk#10, d_date#6, 5), ENSURE_REQUIREMENTS, [plan_id=5] (25) Sort [codegen id : 12] -Input [3]: [item_sk#11, d_date#6, cume_sales#27] -Arguments: [item_sk#11 ASC NULLS FIRST, d_date#6 ASC NULLS FIRST], false, 0 +Input [3]: [item_sk#10, d_date#6, cume_sales#23] +Arguments: [item_sk#10 ASC NULLS FIRST, d_date#6 ASC NULLS FIRST], false, 0 (26) Scan parquet default.store_sales -Output [3]: [ss_item_sk#29, ss_sales_price#30, ss_sold_date_sk#31] +Output [3]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#31), dynamicpruningexpression(ss_sold_date_sk#31 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(ss_sold_date_sk#26), dynamicpruningexpression(ss_sold_date_sk#26 IN dynamicpruning#4)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 14] -Input [3]: [ss_item_sk#29, ss_sales_price#30, ss_sold_date_sk#31] +Input [3]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26] (28) Filter [codegen id : 14] -Input [3]: [ss_item_sk#29, ss_sales_price#30, ss_sold_date_sk#31] -Condition : isnotnull(ss_item_sk#29) +Input [3]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26] +Condition : isnotnull(ss_item_sk#24) (29) ReusedExchange [Reuses operator id: 72] -Output [2]: [d_date_sk#32, d_date#33] +Output [2]: [d_date_sk#27, d_date#28] (30) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ss_sold_date_sk#31] -Right keys [1]: [d_date_sk#32] +Left keys [1]: [ss_sold_date_sk#26] +Right keys [1]: [d_date_sk#27] Join condition: None (31) Project [codegen id : 14] -Output [3]: [ss_item_sk#29, ss_sales_price#30, d_date#33] -Input [5]: [ss_item_sk#29, ss_sales_price#30, ss_sold_date_sk#31, d_date_sk#32, d_date#33] +Output [3]: [ss_item_sk#24, ss_sales_price#25, d_date#28] +Input [5]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26, d_date_sk#27, d_date#28] (32) HashAggregate [codegen id : 14] -Input [3]: [ss_item_sk#29, ss_sales_price#30, d_date#33] -Keys [2]: [ss_item_sk#29, d_date#33] -Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#30))] -Aggregate Attributes [1]: [sum#34] -Results [3]: [ss_item_sk#29, d_date#33, sum#35] +Input [3]: [ss_item_sk#24, ss_sales_price#25, d_date#28] +Keys [2]: [ss_item_sk#24, d_date#28] +Functions [1]: [partial_sum(UnscaledValue(ss_sales_price#25))] +Aggregate Attributes [1]: [sum#29] +Results [3]: [ss_item_sk#24, d_date#28, sum#30] (33) Exchange -Input [3]: [ss_item_sk#29, d_date#33, sum#35] -Arguments: hashpartitioning(ss_item_sk#29, d_date#33, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [3]: [ss_item_sk#24, d_date#28, sum#30] +Arguments: hashpartitioning(ss_item_sk#24, d_date#28, 5), ENSURE_REQUIREMENTS, [plan_id=6] (34) HashAggregate [codegen id : 15] -Input [3]: [ss_item_sk#29, d_date#33, sum#35] -Keys [2]: [ss_item_sk#29, d_date#33] -Functions [1]: [sum(UnscaledValue(ss_sales_price#30))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#30))#37] -Results [4]: [ss_item_sk#29 AS item_sk#38, d_date#33, MakeDecimal(sum(UnscaledValue(ss_sales_price#30))#37,17,2) AS sumss#39, ss_item_sk#29] +Input [3]: [ss_item_sk#24, d_date#28, sum#30] +Keys [2]: [ss_item_sk#24, d_date#28] +Functions [1]: [sum(UnscaledValue(ss_sales_price#25))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_sales_price#25))#31] +Results [4]: [ss_item_sk#24 AS item_sk#32, d_date#28, MakeDecimal(sum(UnscaledValue(ss_sales_price#25))#31,17,2) AS sumss#33, ss_item_sk#24] (35) Exchange -Input [4]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#29] -Arguments: hashpartitioning(ss_item_sk#29, 5), ENSURE_REQUIREMENTS, [id=#40] +Input [4]: [item_sk#32, d_date#28, sumss#33, ss_item_sk#24] +Arguments: hashpartitioning(ss_item_sk#24, 5), ENSURE_REQUIREMENTS, [plan_id=7] (36) Sort [codegen id : 16] -Input [4]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#29] -Arguments: [ss_item_sk#29 ASC NULLS FIRST, d_date#33 ASC NULLS FIRST], false, 0 +Input [4]: [item_sk#32, d_date#28, sumss#33, ss_item_sk#24] +Arguments: [ss_item_sk#24 ASC NULLS FIRST, d_date#28 ASC NULLS FIRST], false, 0 (37) Window -Input [4]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#29] -Arguments: [row_number() windowspecdefinition(ss_item_sk#29, d_date#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#41], [ss_item_sk#29], [d_date#33 ASC NULLS FIRST] +Input [4]: [item_sk#32, d_date#28, sumss#33, ss_item_sk#24] +Arguments: [row_number() windowspecdefinition(ss_item_sk#24, d_date#28 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#34], [ss_item_sk#24], [d_date#28 ASC NULLS FIRST] (38) Project [codegen id : 22] -Output [4]: [item_sk#38, d_date#33, sumss#39, rk#41] -Input [5]: [item_sk#38, d_date#33, sumss#39, ss_item_sk#29, rk#41] +Output [4]: [item_sk#32, d_date#28, sumss#33, rk#34] +Input [5]: [item_sk#32, d_date#28, sumss#33, ss_item_sk#24, rk#34] (39) ReusedExchange [Reuses operator id: 35] -Output [4]: [item_sk#38, d_date#42, sumss#39, ss_item_sk#43] +Output [4]: [item_sk#32, d_date#35, sumss#33, ss_item_sk#36] (40) Sort [codegen id : 20] -Input [4]: [item_sk#38, d_date#42, sumss#39, ss_item_sk#43] -Arguments: [ss_item_sk#43 ASC NULLS FIRST, d_date#42 ASC NULLS FIRST], false, 0 +Input [4]: [item_sk#32, d_date#35, sumss#33, ss_item_sk#36] +Arguments: [ss_item_sk#36 ASC NULLS FIRST, d_date#35 ASC NULLS FIRST], false, 0 (41) Window -Input [4]: [item_sk#38, d_date#42, sumss#39, ss_item_sk#43] -Arguments: [row_number() windowspecdefinition(ss_item_sk#43, d_date#42 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#44], [ss_item_sk#43], [d_date#42 ASC NULLS FIRST] +Input [4]: [item_sk#32, d_date#35, sumss#33, ss_item_sk#36] +Arguments: [row_number() windowspecdefinition(ss_item_sk#36, d_date#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#37], [ss_item_sk#36], [d_date#35 ASC NULLS FIRST] (42) Project [codegen id : 21] -Output [3]: [item_sk#38 AS item_sk#45, sumss#39 AS sumss#46, rk#44] -Input [5]: [item_sk#38, d_date#42, sumss#39, ss_item_sk#43, rk#44] +Output [3]: [item_sk#32 AS item_sk#38, sumss#33 AS sumss#39, rk#37] +Input [5]: [item_sk#32, d_date#35, sumss#33, ss_item_sk#36, rk#37] (43) BroadcastExchange -Input [3]: [item_sk#45, sumss#46, rk#44] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#47] +Input [3]: [item_sk#38, sumss#39, rk#37] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8] (44) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [item_sk#38] -Right keys [1]: [item_sk#45] -Join condition: (rk#41 >= rk#44) +Left keys [1]: [item_sk#32] +Right keys [1]: [item_sk#38] +Join condition: (rk#34 >= rk#37) (45) Project [codegen id : 22] -Output [4]: [item_sk#38, d_date#33, sumss#39, sumss#46] -Input [7]: [item_sk#38, d_date#33, sumss#39, rk#41, item_sk#45, sumss#46, rk#44] +Output [4]: [item_sk#32, d_date#28, sumss#33, sumss#39] +Input [7]: [item_sk#32, d_date#28, sumss#33, rk#34, item_sk#38, sumss#39, rk#37] (46) HashAggregate [codegen id : 22] -Input [4]: [item_sk#38, d_date#33, sumss#39, sumss#46] -Keys [3]: [item_sk#38, d_date#33, sumss#39] -Functions [1]: [partial_sum(sumss#46)] -Aggregate Attributes [2]: [sum#48, isEmpty#49] -Results [5]: [item_sk#38, d_date#33, sumss#39, sum#50, isEmpty#51] +Input [4]: [item_sk#32, d_date#28, sumss#33, sumss#39] +Keys [3]: [item_sk#32, d_date#28, sumss#33] +Functions [1]: [partial_sum(sumss#39)] +Aggregate Attributes [2]: [sum#40, isEmpty#41] +Results [5]: [item_sk#32, d_date#28, sumss#33, sum#42, isEmpty#43] (47) Exchange -Input [5]: [item_sk#38, d_date#33, sumss#39, sum#50, isEmpty#51] -Arguments: hashpartitioning(item_sk#38, d_date#33, sumss#39, 5), ENSURE_REQUIREMENTS, [id=#52] +Input [5]: [item_sk#32, d_date#28, sumss#33, sum#42, isEmpty#43] +Arguments: hashpartitioning(item_sk#32, d_date#28, sumss#33, 5), ENSURE_REQUIREMENTS, [plan_id=9] (48) HashAggregate [codegen id : 23] -Input [5]: [item_sk#38, d_date#33, sumss#39, sum#50, isEmpty#51] -Keys [3]: [item_sk#38, d_date#33, sumss#39] -Functions [1]: [sum(sumss#46)] -Aggregate Attributes [1]: [sum(sumss#46)#53] -Results [3]: [item_sk#38, d_date#33, sum(sumss#46)#53 AS cume_sales#54] +Input [5]: [item_sk#32, d_date#28, sumss#33, sum#42, isEmpty#43] +Keys [3]: [item_sk#32, d_date#28, sumss#33] +Functions [1]: [sum(sumss#39)] +Aggregate Attributes [1]: [sum(sumss#39)#44] +Results [3]: [item_sk#32, d_date#28, sum(sumss#39)#44 AS cume_sales#45] (49) Exchange -Input [3]: [item_sk#38, d_date#33, cume_sales#54] -Arguments: hashpartitioning(item_sk#38, d_date#33, 5), ENSURE_REQUIREMENTS, [id=#55] +Input [3]: [item_sk#32, d_date#28, cume_sales#45] +Arguments: hashpartitioning(item_sk#32, d_date#28, 5), ENSURE_REQUIREMENTS, [plan_id=10] (50) Sort [codegen id : 24] -Input [3]: [item_sk#38, d_date#33, cume_sales#54] -Arguments: [item_sk#38 ASC NULLS FIRST, d_date#33 ASC NULLS FIRST], false, 0 +Input [3]: [item_sk#32, d_date#28, cume_sales#45] +Arguments: [item_sk#32 ASC NULLS FIRST, d_date#28 ASC NULLS FIRST], false, 0 (51) SortMergeJoin [codegen id : 25] -Left keys [2]: [item_sk#11, d_date#6] -Right keys [2]: [item_sk#38, d_date#33] +Left keys [2]: [item_sk#10, d_date#6] +Right keys [2]: [item_sk#32, d_date#28] Join condition: None (52) Filter [codegen id : 25] -Input [6]: [item_sk#11, d_date#6, cume_sales#27, item_sk#38, d_date#33, cume_sales#54] -Condition : isnotnull(CASE WHEN isnotnull(item_sk#11) THEN item_sk#11 ELSE item_sk#38 END) +Input [6]: [item_sk#10, d_date#6, cume_sales#23, item_sk#32, d_date#28, cume_sales#45] +Condition : isnotnull(CASE WHEN isnotnull(item_sk#10) THEN item_sk#10 ELSE item_sk#32 END) (53) Project [codegen id : 25] -Output [4]: [CASE WHEN isnotnull(item_sk#11) THEN item_sk#11 ELSE item_sk#38 END AS item_sk#56, CASE WHEN isnotnull(d_date#6) THEN d_date#6 ELSE d_date#33 END AS d_date#57, cume_sales#27 AS web_sales#58, cume_sales#54 AS store_sales#59] -Input [6]: [item_sk#11, d_date#6, cume_sales#27, item_sk#38, d_date#33, cume_sales#54] +Output [4]: [CASE WHEN isnotnull(item_sk#10) THEN item_sk#10 ELSE item_sk#32 END AS item_sk#46, CASE WHEN isnotnull(d_date#6) THEN d_date#6 ELSE d_date#28 END AS d_date#47, cume_sales#23 AS web_sales#48, cume_sales#45 AS store_sales#49] +Input [6]: [item_sk#10, d_date#6, cume_sales#23, item_sk#32, d_date#28, cume_sales#45] (54) Exchange -Input [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59] -Arguments: hashpartitioning(item_sk#56, 5), ENSURE_REQUIREMENTS, [id=#60] +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: hashpartitioning(item_sk#46, 5), ENSURE_REQUIREMENTS, [plan_id=11] (55) Sort [codegen id : 26] -Input [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59] -Arguments: [item_sk#56 ASC NULLS FIRST, d_date#57 ASC NULLS FIRST], false, 0 +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: [item_sk#46 ASC NULLS FIRST, d_date#47 ASC NULLS FIRST], false, 0 (56) Window -Input [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59] -Arguments: [row_number() windowspecdefinition(item_sk#56, d_date#57 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#61], [item_sk#56], [d_date#57 ASC NULLS FIRST] +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: [row_number() windowspecdefinition(item_sk#46, d_date#47 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#50], [item_sk#46], [d_date#47 ASC NULLS FIRST] (57) ReusedExchange [Reuses operator id: 54] -Output [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59] +Output [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] (58) Sort [codegen id : 52] -Input [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59] -Arguments: [item_sk#56 ASC NULLS FIRST, d_date#57 ASC NULLS FIRST], false, 0 +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: [item_sk#46 ASC NULLS FIRST, d_date#47 ASC NULLS FIRST], false, 0 (59) Window -Input [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59] -Arguments: [row_number() windowspecdefinition(item_sk#56, d_date#57 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#62], [item_sk#56], [d_date#57 ASC NULLS FIRST] +Input [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Arguments: [row_number() windowspecdefinition(item_sk#46, d_date#47 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#51], [item_sk#46], [d_date#47 ASC NULLS FIRST] (60) Project [codegen id : 53] -Output [4]: [item_sk#56 AS item_sk#63, web_sales#58 AS web_sales#64, store_sales#59 AS store_sales#65, rk#62] -Input [5]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, rk#62] +Output [4]: [item_sk#46 AS item_sk#52, web_sales#48 AS web_sales#53, store_sales#49 AS store_sales#54, rk#51] +Input [5]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, rk#51] (61) BroadcastExchange -Input [4]: [item_sk#63, web_sales#64, store_sales#65, rk#62] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#66] +Input [4]: [item_sk#52, web_sales#53, store_sales#54, rk#51] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] (62) BroadcastHashJoin [codegen id : 54] -Left keys [1]: [item_sk#56] -Right keys [1]: [item_sk#63] -Join condition: (rk#61 >= rk#62) +Left keys [1]: [item_sk#46] +Right keys [1]: [item_sk#52] +Join condition: (rk#50 >= rk#51) (63) Project [codegen id : 54] -Output [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, web_sales#64, store_sales#65] -Input [9]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, rk#61, item_sk#63, web_sales#64, store_sales#65, rk#62] +Output [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_sales#53, store_sales#54] +Input [9]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, rk#50, item_sk#52, web_sales#53, store_sales#54, rk#51] (64) HashAggregate [codegen id : 54] -Input [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, web_sales#64, store_sales#65] -Keys [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59] -Functions [2]: [partial_max(web_sales#64), partial_max(store_sales#65)] -Aggregate Attributes [2]: [max#67, max#68] -Results [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, max#69, max#70] +Input [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_sales#53, store_sales#54] +Keys [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Functions [2]: [partial_max(web_sales#53), partial_max(store_sales#54)] +Aggregate Attributes [2]: [max#55, max#56] +Results [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, max#57, max#58] (65) HashAggregate [codegen id : 54] -Input [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, max#69, max#70] -Keys [4]: [item_sk#56, d_date#57, web_sales#58, store_sales#59] -Functions [2]: [max(web_sales#64), max(store_sales#65)] -Aggregate Attributes [2]: [max(web_sales#64)#71, max(store_sales#65)#72] -Results [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, max(web_sales#64)#71 AS web_cumulative#73, max(store_sales#65)#72 AS store_cumulative#74] +Input [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, max#57, max#58] +Keys [4]: [item_sk#46, d_date#47, web_sales#48, store_sales#49] +Functions [2]: [max(web_sales#53), max(store_sales#54)] +Aggregate Attributes [2]: [max(web_sales#53)#59, max(store_sales#54)#60] +Results [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, max(web_sales#53)#59 AS web_cumulative#61, max(store_sales#54)#60 AS store_cumulative#62] (66) Filter [codegen id : 54] -Input [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, web_cumulative#73, store_cumulative#74] -Condition : ((isnotnull(web_cumulative#73) AND isnotnull(store_cumulative#74)) AND (web_cumulative#73 > store_cumulative#74)) +Input [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_cumulative#61, store_cumulative#62] +Condition : ((isnotnull(web_cumulative#61) AND isnotnull(store_cumulative#62)) AND (web_cumulative#61 > store_cumulative#62)) (67) TakeOrderedAndProject -Input [6]: [item_sk#56, d_date#57, web_sales#58, store_sales#59, web_cumulative#73, store_cumulative#74] -Arguments: 100, [item_sk#56 ASC NULLS FIRST, d_date#57 ASC NULLS FIRST], [item_sk#56, d_date#57, web_sales#58, store_sales#59, web_cumulative#73, store_cumulative#74] +Input [6]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_cumulative#61, store_cumulative#62] +Arguments: 100, [item_sk#46 ASC NULLS FIRST, d_date#47 ASC NULLS FIRST], [item_sk#46, d_date#47, web_sales#48, store_sales#49, web_cumulative#61, store_cumulative#62] ===== Subqueries ===== @@ -384,27 +384,27 @@ BroadcastExchange (72) (68) Scan parquet default.date_dim -Output [3]: [d_date_sk#5, d_date#6, d_month_seq#75] +Output [3]: [d_date_sk#5, d_date#6, d_month_seq#63] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] ReadSchema: struct (69) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#5, d_date#6, d_month_seq#75] +Input [3]: [d_date_sk#5, d_date#6, d_month_seq#63] (70) Filter [codegen id : 1] -Input [3]: [d_date_sk#5, d_date#6, d_month_seq#75] -Condition : (((isnotnull(d_month_seq#75) AND (d_month_seq#75 >= 1212)) AND (d_month_seq#75 <= 1223)) AND isnotnull(d_date_sk#5)) +Input [3]: [d_date_sk#5, d_date#6, d_month_seq#63] +Condition : (((isnotnull(d_month_seq#63) AND (d_month_seq#63 >= 1212)) AND (d_month_seq#63 <= 1223)) AND isnotnull(d_date_sk#5)) (71) Project [codegen id : 1] Output [2]: [d_date_sk#5, d_date#6] -Input [3]: [d_date_sk#5, d_date#6, d_month_seq#75] +Input [3]: [d_date_sk#5, d_date#6, d_month_seq#63] (72) BroadcastExchange Input [2]: [d_date_sk#5, d_date#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#76] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=13] -Subquery:2 Hosting operator id = 26 Hosting Expression = ss_sold_date_sk#31 IN dynamicpruning#4 +Subquery:2 Hosting operator id = 26 Hosting Expression = ss_sold_date_sk#26 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt index d46c1d8c7e336..857e754bf67d7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt @@ -96,7 +96,7 @@ Condition : (isnotnull(cc_call_center_sk#9) AND isnotnull(cc_name#10)) (10) BroadcastExchange Input [2]: [cc_call_center_sk#9, cc_name#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_call_center_sk#1] @@ -109,175 +109,175 @@ Input [7]: [cs_call_center_sk#1, cs_item_sk#2, cs_sales_price#3, d_year#7, d_moy (13) Exchange Input [5]: [cs_item_sk#2, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10] -Arguments: hashpartitioning(cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [id=#12] +Arguments: hashpartitioning(cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=2] (14) Sort [codegen id : 4] Input [5]: [cs_item_sk#2, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10] Arguments: [cs_item_sk#2 ASC NULLS FIRST], false, 0 (15) Scan parquet default.item -Output [3]: [i_item_sk#13, i_brand#14, i_category#15] +Output [3]: [i_item_sk#11, i_brand#12, i_category#13] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_category), IsNotNull(i_brand)] ReadSchema: struct (16) ColumnarToRow [codegen id : 5] -Input [3]: [i_item_sk#13, i_brand#14, i_category#15] +Input [3]: [i_item_sk#11, i_brand#12, i_category#13] (17) Filter [codegen id : 5] -Input [3]: [i_item_sk#13, i_brand#14, i_category#15] -Condition : ((isnotnull(i_item_sk#13) AND isnotnull(i_category#15)) AND isnotnull(i_brand#14)) +Input [3]: [i_item_sk#11, i_brand#12, i_category#13] +Condition : ((isnotnull(i_item_sk#11) AND isnotnull(i_category#13)) AND isnotnull(i_brand#12)) (18) Exchange -Input [3]: [i_item_sk#13, i_brand#14, i_category#15] -Arguments: hashpartitioning(i_item_sk#13, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [3]: [i_item_sk#11, i_brand#12, i_category#13] +Arguments: hashpartitioning(i_item_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) Sort [codegen id : 6] -Input [3]: [i_item_sk#13, i_brand#14, i_category#15] -Arguments: [i_item_sk#13 ASC NULLS FIRST], false, 0 +Input [3]: [i_item_sk#11, i_brand#12, i_category#13] +Arguments: [i_item_sk#11 ASC NULLS FIRST], false, 0 (20) SortMergeJoin [codegen id : 7] Left keys [1]: [cs_item_sk#2] -Right keys [1]: [i_item_sk#13] +Right keys [1]: [i_item_sk#11] Join condition: None (21) Project [codegen id : 7] -Output [6]: [i_brand#14, i_category#15, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10] -Input [8]: [cs_item_sk#2, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10, i_item_sk#13, i_brand#14, i_category#15] +Output [6]: [i_brand#12, i_category#13, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10] +Input [8]: [cs_item_sk#2, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10, i_item_sk#11, i_brand#12, i_category#13] (22) HashAggregate [codegen id : 7] -Input [6]: [i_brand#14, i_category#15, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10] -Keys [5]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8] +Input [6]: [i_brand#12, i_category#13, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10] +Keys [5]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8] Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#3))] -Aggregate Attributes [1]: [sum#17] -Results [6]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum#18] +Aggregate Attributes [1]: [sum#14] +Results [6]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum#15] (23) Exchange -Input [6]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum#18] -Arguments: hashpartitioning(i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [6]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum#15] +Arguments: hashpartitioning(i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) HashAggregate [codegen id : 8] -Input [6]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum#18] -Keys [5]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8] +Input [6]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum#15] +Keys [5]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8] Functions [1]: [sum(UnscaledValue(cs_sales_price#3))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#3))#20] -Results [7]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, MakeDecimal(sum(UnscaledValue(cs_sales_price#3))#20,17,2) AS sum_sales#21, MakeDecimal(sum(UnscaledValue(cs_sales_price#3))#20,17,2) AS _w0#22] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#3))#16] +Results [7]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, MakeDecimal(sum(UnscaledValue(cs_sales_price#3))#16,17,2) AS sum_sales#17, MakeDecimal(sum(UnscaledValue(cs_sales_price#3))#16,17,2) AS _w0#18] (25) Exchange -Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22] -Arguments: hashpartitioning(i_category#15, i_brand#14, cc_name#10, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [7]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18] +Arguments: hashpartitioning(i_category#13, i_brand#12, cc_name#10, 5), ENSURE_REQUIREMENTS, [plan_id=5] (26) Sort [codegen id : 9] -Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22] -Arguments: [i_category#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 +Input [7]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18] +Arguments: [i_category#13 ASC NULLS FIRST, i_brand#12 ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], false, 0 (27) Window -Input [7]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22] -Arguments: [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#15, i_brand#14, cc_name#10, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#24], [i_category#15, i_brand#14, cc_name#10], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] +Input [7]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18] +Arguments: [rank(d_year#7, d_moy#8) windowspecdefinition(i_category#13, i_brand#12, cc_name#10, d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#19], [i_category#13, i_brand#12, cc_name#10], [d_year#7 ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST] (28) Filter [codegen id : 10] -Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22, rn#24] +Input [8]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18, rn#19] Condition : (isnotnull(d_year#7) AND (d_year#7 = 1999)) (29) Window -Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22, rn#24] -Arguments: [avg(_w0#22) windowspecdefinition(i_category#15, i_brand#14, cc_name#10, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#25], [i_category#15, i_brand#14, cc_name#10, d_year#7] +Input [8]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18, rn#19] +Arguments: [avg(_w0#18) windowspecdefinition(i_category#13, i_brand#12, cc_name#10, d_year#7, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#20], [i_category#13, i_brand#12, cc_name#10, d_year#7] (30) Filter [codegen id : 11] -Input [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22, rn#24, avg_monthly_sales#25] -Condition : ((isnotnull(avg_monthly_sales#25) AND (avg_monthly_sales#25 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) +Input [9]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] +Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) (31) Project [codegen id : 11] -Output [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24] -Input [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, _w0#22, rn#24, avg_monthly_sales#25] +Output [8]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, avg_monthly_sales#20, rn#19] +Input [9]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] (32) Exchange -Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24] -Arguments: hashpartitioning(i_category#15, i_brand#14, cc_name#10, rn#24, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [8]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, avg_monthly_sales#20, rn#19] +Arguments: hashpartitioning(i_category#13, i_brand#12, cc_name#10, rn#19, 5), ENSURE_REQUIREMENTS, [plan_id=6] (33) Sort [codegen id : 12] -Input [8]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24] -Arguments: [i_category#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST, rn#24 ASC NULLS FIRST], false, 0 +Input [8]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, avg_monthly_sales#20, rn#19] +Arguments: [i_category#13 ASC NULLS FIRST, i_brand#12 ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST, rn#19 ASC NULLS FIRST], false, 0 (34) ReusedExchange [Reuses operator id: 23] -Output [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum#32] +Output [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum#26] (35) HashAggregate [codegen id : 20] -Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum#32] -Keys [5]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31] -Functions [1]: [sum(UnscaledValue(cs_sales_price#33))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#33))#20] -Results [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, MakeDecimal(sum(UnscaledValue(cs_sales_price#33))#20,17,2) AS sum_sales#21] +Input [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum#26] +Keys [5]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25] +Functions [1]: [sum(UnscaledValue(cs_sales_price#27))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#27))#16] +Results [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, MakeDecimal(sum(UnscaledValue(cs_sales_price#27))#16,17,2) AS sum_sales#17] (36) Exchange -Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#21] -Arguments: hashpartitioning(i_category#27, i_brand#28, cc_name#29, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum_sales#17] +Arguments: hashpartitioning(i_category#21, i_brand#22, cc_name#23, 5), ENSURE_REQUIREMENTS, [plan_id=7] (37) Sort [codegen id : 21] -Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#21] -Arguments: [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, cc_name#29 ASC NULLS FIRST, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST], false, 0 +Input [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum_sales#17] +Arguments: [i_category#21 ASC NULLS FIRST, i_brand#22 ASC NULLS FIRST, cc_name#23 ASC NULLS FIRST, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST], false, 0 (38) Window -Input [6]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#21] -Arguments: [rank(d_year#30, d_moy#31) windowspecdefinition(i_category#27, i_brand#28, cc_name#29, d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#27, i_brand#28, cc_name#29], [d_year#30 ASC NULLS FIRST, d_moy#31 ASC NULLS FIRST] +Input [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum_sales#17] +Arguments: [rank(d_year#24, d_moy#25) windowspecdefinition(i_category#21, i_brand#22, cc_name#23, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#28], [i_category#21, i_brand#22, cc_name#23], [d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST] (39) Project [codegen id : 22] -Output [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#21 AS sum_sales#36, rn#35] -Input [7]: [i_category#27, i_brand#28, cc_name#29, d_year#30, d_moy#31, sum_sales#21, rn#35] +Output [5]: [i_category#21, i_brand#22, cc_name#23, sum_sales#17 AS sum_sales#29, rn#28] +Input [7]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum_sales#17, rn#28] (40) Exchange -Input [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#36, rn#35] -Arguments: hashpartitioning(i_category#27, i_brand#28, cc_name#29, (rn#35 + 1), 5), ENSURE_REQUIREMENTS, [id=#37] +Input [5]: [i_category#21, i_brand#22, cc_name#23, sum_sales#29, rn#28] +Arguments: hashpartitioning(i_category#21, i_brand#22, cc_name#23, (rn#28 + 1), 5), ENSURE_REQUIREMENTS, [plan_id=8] (41) Sort [codegen id : 23] -Input [5]: [i_category#27, i_brand#28, cc_name#29, sum_sales#36, rn#35] -Arguments: [i_category#27 ASC NULLS FIRST, i_brand#28 ASC NULLS FIRST, cc_name#29 ASC NULLS FIRST, (rn#35 + 1) ASC NULLS FIRST], false, 0 +Input [5]: [i_category#21, i_brand#22, cc_name#23, sum_sales#29, rn#28] +Arguments: [i_category#21 ASC NULLS FIRST, i_brand#22 ASC NULLS FIRST, cc_name#23 ASC NULLS FIRST, (rn#28 + 1) ASC NULLS FIRST], false, 0 (42) SortMergeJoin [codegen id : 24] -Left keys [4]: [i_category#15, i_brand#14, cc_name#10, rn#24] -Right keys [4]: [i_category#27, i_brand#28, cc_name#29, (rn#35 + 1)] +Left keys [4]: [i_category#13, i_brand#12, cc_name#10, rn#19] +Right keys [4]: [i_category#21, i_brand#22, cc_name#23, (rn#28 + 1)] Join condition: None (43) Project [codegen id : 24] -Output [9]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, sum_sales#36] -Input [13]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, i_category#27, i_brand#28, cc_name#29, sum_sales#36, rn#35] +Output [9]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#29] +Input [13]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, avg_monthly_sales#20, rn#19, i_category#21, i_brand#22, cc_name#23, sum_sales#29, rn#28] (44) ReusedExchange [Reuses operator id: 36] -Output [6]: [i_category#38, i_brand#39, cc_name#40, d_year#41, d_moy#42, sum_sales#21] +Output [6]: [i_category#30, i_brand#31, cc_name#32, d_year#33, d_moy#34, sum_sales#17] (45) Sort [codegen id : 33] -Input [6]: [i_category#38, i_brand#39, cc_name#40, d_year#41, d_moy#42, sum_sales#21] -Arguments: [i_category#38 ASC NULLS FIRST, i_brand#39 ASC NULLS FIRST, cc_name#40 ASC NULLS FIRST, d_year#41 ASC NULLS FIRST, d_moy#42 ASC NULLS FIRST], false, 0 +Input [6]: [i_category#30, i_brand#31, cc_name#32, d_year#33, d_moy#34, sum_sales#17] +Arguments: [i_category#30 ASC NULLS FIRST, i_brand#31 ASC NULLS FIRST, cc_name#32 ASC NULLS FIRST, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST], false, 0 (46) Window -Input [6]: [i_category#38, i_brand#39, cc_name#40, d_year#41, d_moy#42, sum_sales#21] -Arguments: [rank(d_year#41, d_moy#42) windowspecdefinition(i_category#38, i_brand#39, cc_name#40, d_year#41 ASC NULLS FIRST, d_moy#42 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#43], [i_category#38, i_brand#39, cc_name#40], [d_year#41 ASC NULLS FIRST, d_moy#42 ASC NULLS FIRST] +Input [6]: [i_category#30, i_brand#31, cc_name#32, d_year#33, d_moy#34, sum_sales#17] +Arguments: [rank(d_year#33, d_moy#34) windowspecdefinition(i_category#30, i_brand#31, cc_name#32, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#30, i_brand#31, cc_name#32], [d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST] (47) Project [codegen id : 34] -Output [5]: [i_category#38, i_brand#39, cc_name#40, sum_sales#21 AS sum_sales#44, rn#43] -Input [7]: [i_category#38, i_brand#39, cc_name#40, d_year#41, d_moy#42, sum_sales#21, rn#43] +Output [5]: [i_category#30, i_brand#31, cc_name#32, sum_sales#17 AS sum_sales#36, rn#35] +Input [7]: [i_category#30, i_brand#31, cc_name#32, d_year#33, d_moy#34, sum_sales#17, rn#35] (48) Exchange -Input [5]: [i_category#38, i_brand#39, cc_name#40, sum_sales#44, rn#43] -Arguments: hashpartitioning(i_category#38, i_brand#39, cc_name#40, (rn#43 - 1), 5), ENSURE_REQUIREMENTS, [id=#45] +Input [5]: [i_category#30, i_brand#31, cc_name#32, sum_sales#36, rn#35] +Arguments: hashpartitioning(i_category#30, i_brand#31, cc_name#32, (rn#35 - 1), 5), ENSURE_REQUIREMENTS, [plan_id=9] (49) Sort [codegen id : 35] -Input [5]: [i_category#38, i_brand#39, cc_name#40, sum_sales#44, rn#43] -Arguments: [i_category#38 ASC NULLS FIRST, i_brand#39 ASC NULLS FIRST, cc_name#40 ASC NULLS FIRST, (rn#43 - 1) ASC NULLS FIRST], false, 0 +Input [5]: [i_category#30, i_brand#31, cc_name#32, sum_sales#36, rn#35] +Arguments: [i_category#30 ASC NULLS FIRST, i_brand#31 ASC NULLS FIRST, cc_name#32 ASC NULLS FIRST, (rn#35 - 1) ASC NULLS FIRST], false, 0 (50) SortMergeJoin [codegen id : 36] -Left keys [4]: [i_category#15, i_brand#14, cc_name#10, rn#24] -Right keys [4]: [i_category#38, i_brand#39, cc_name#40, (rn#43 - 1)] +Left keys [4]: [i_category#13, i_brand#12, cc_name#10, rn#19] +Right keys [4]: [i_category#30, i_brand#31, cc_name#32, (rn#35 - 1)] Join condition: None (51) Project [codegen id : 36] -Output [8]: [i_category#15, i_brand#14, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, sum_sales#36 AS psum#46, sum_sales#44 AS nsum#47] -Input [14]: [i_category#15, i_brand#14, cc_name#10, d_year#7, d_moy#8, sum_sales#21, avg_monthly_sales#25, rn#24, sum_sales#36, i_category#38, i_brand#39, cc_name#40, sum_sales#44, rn#43] +Output [8]: [i_category#13, i_brand#12, d_year#7, d_moy#8, avg_monthly_sales#20, sum_sales#17, sum_sales#29 AS psum#37, sum_sales#36 AS nsum#38] +Input [14]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#29, i_category#30, i_brand#31, cc_name#32, sum_sales#36, rn#35] (52) TakeOrderedAndProject -Input [8]: [i_category#15, i_brand#14, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#46, nsum#47] -Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#25 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, d_year#7 ASC NULLS FIRST], [i_category#15, i_brand#14, d_year#7, d_moy#8, avg_monthly_sales#25, sum_sales#21, psum#46, nsum#47] +Input [8]: [i_category#13, i_brand#12, d_year#7, d_moy#8, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, d_year#7 ASC NULLS FIRST], [i_category#13, i_brand#12, d_year#7, d_moy#8, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38] ===== Subqueries ===== @@ -304,6 +304,6 @@ Condition : ((((d_year#7 = 1999) OR ((d_year#7 = 1998) AND (d_moy#8 = 12))) OR ( (56) BroadcastExchange Input [3]: [d_date_sk#6, d_year#7, d_moy#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt index 675acedcd9cad..0fa6debb223ab 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt @@ -77,7 +77,7 @@ Condition : (isnotnull(cs_item_sk#5) AND isnotnull(cs_call_center_sk#4)) (7) BroadcastExchange Input [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 4] Left keys [1]: [i_item_sk#1] @@ -89,160 +89,160 @@ Output [5]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_ Input [7]: [i_item_sk#1, i_brand#2, i_category#3, cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7] (10) ReusedExchange [Reuses operator id: 49] -Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_sold_date_sk#7] -Right keys [1]: [d_date_sk#10] +Right keys [1]: [d_date_sk#9] Join condition: None (12) Project [codegen id : 4] -Output [6]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#11, d_moy#12] -Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7, d_date_sk#10, d_year#11, d_moy#12] +Output [6]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#10, d_moy#11] +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7, d_date_sk#9, d_year#10, d_moy#11] (13) Scan parquet default.call_center -Output [2]: [cc_call_center_sk#13, cc_name#14] +Output [2]: [cc_call_center_sk#12, cc_name#13] Batched: true Location [not included in comparison]/{warehouse_dir}/call_center] PushedFilters: [IsNotNull(cc_call_center_sk), IsNotNull(cc_name)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [2]: [cc_call_center_sk#13, cc_name#14] +Input [2]: [cc_call_center_sk#12, cc_name#13] (15) Filter [codegen id : 3] -Input [2]: [cc_call_center_sk#13, cc_name#14] -Condition : (isnotnull(cc_call_center_sk#13) AND isnotnull(cc_name#14)) +Input [2]: [cc_call_center_sk#12, cc_name#13] +Condition : (isnotnull(cc_call_center_sk#12) AND isnotnull(cc_name#13)) (16) BroadcastExchange -Input [2]: [cc_call_center_sk#13, cc_name#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] +Input [2]: [cc_call_center_sk#12, cc_name#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_call_center_sk#4] -Right keys [1]: [cc_call_center_sk#13] +Right keys [1]: [cc_call_center_sk#12] Join condition: None (18) Project [codegen id : 4] -Output [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#11, d_moy#12, cc_name#14] -Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#11, d_moy#12, cc_call_center_sk#13, cc_name#14] +Output [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#10, d_moy#11, cc_name#13] +Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#10, d_moy#11, cc_call_center_sk#12, cc_name#13] (19) HashAggregate [codegen id : 4] -Input [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#11, d_moy#12, cc_name#14] -Keys [5]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12] +Input [6]: [i_brand#2, i_category#3, cs_sales_price#6, d_year#10, d_moy#11, cc_name#13] +Keys [5]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11] Functions [1]: [partial_sum(UnscaledValue(cs_sales_price#6))] -Aggregate Attributes [1]: [sum#16] -Results [6]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum#17] +Aggregate Attributes [1]: [sum#14] +Results [6]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum#15] (20) Exchange -Input [6]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum#17] -Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [6]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum#15] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) HashAggregate [codegen id : 5] -Input [6]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum#17] -Keys [5]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12] +Input [6]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum#15] +Keys [5]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11] Functions [1]: [sum(UnscaledValue(cs_sales_price#6))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#6))#19] -Results [7]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#19,17,2) AS sum_sales#20, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#19,17,2) AS _w0#21] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#6))#16] +Results [7]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#16,17,2) AS sum_sales#17, MakeDecimal(sum(UnscaledValue(cs_sales_price#6))#16,17,2) AS _w0#18] (22) Exchange -Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, _w0#21] -Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#14, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [7]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18] +Arguments: hashpartitioning(i_category#3, i_brand#2, cc_name#13, 5), ENSURE_REQUIREMENTS, [plan_id=4] (23) Sort [codegen id : 6] -Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, _w0#21] -Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#14 ASC NULLS FIRST, d_year#11 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST], false, 0 +Input [7]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18] +Arguments: [i_category#3 ASC NULLS FIRST, i_brand#2 ASC NULLS FIRST, cc_name#13 ASC NULLS FIRST, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], false, 0 (24) Window -Input [7]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, _w0#21] -Arguments: [rank(d_year#11, d_moy#12) windowspecdefinition(i_category#3, i_brand#2, cc_name#14, d_year#11 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#23], [i_category#3, i_brand#2, cc_name#14], [d_year#11 ASC NULLS FIRST, d_moy#12 ASC NULLS FIRST] +Input [7]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18] +Arguments: [rank(d_year#10, d_moy#11) windowspecdefinition(i_category#3, i_brand#2, cc_name#13, d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#19], [i_category#3, i_brand#2, cc_name#13], [d_year#10 ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST] (25) Filter [codegen id : 7] -Input [8]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, _w0#21, rn#23] -Condition : (isnotnull(d_year#11) AND (d_year#11 = 1999)) +Input [8]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18, rn#19] +Condition : (isnotnull(d_year#10) AND (d_year#10 = 1999)) (26) Window -Input [8]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, _w0#21, rn#23] -Arguments: [avg(_w0#21) windowspecdefinition(i_category#3, i_brand#2, cc_name#14, d_year#11, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#24], [i_category#3, i_brand#2, cc_name#14, d_year#11] +Input [8]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18, rn#19] +Arguments: [avg(_w0#18) windowspecdefinition(i_category#3, i_brand#2, cc_name#13, d_year#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg_monthly_sales#20], [i_category#3, i_brand#2, cc_name#13, d_year#10] (27) Filter [codegen id : 22] -Input [9]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, _w0#21, rn#23, avg_monthly_sales#24] -Condition : ((isnotnull(avg_monthly_sales#24) AND (avg_monthly_sales#24 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) +Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] +Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000)) (28) Project [codegen id : 22] -Output [8]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, avg_monthly_sales#24, rn#23] -Input [9]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, _w0#21, rn#23, avg_monthly_sales#24] +Output [8]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, avg_monthly_sales#20, rn#19] +Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20] (29) ReusedExchange [Reuses operator id: 20] -Output [6]: [i_category#25, i_brand#26, cc_name#27, d_year#28, d_moy#29, sum#30] +Output [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum#26] (30) HashAggregate [codegen id : 12] -Input [6]: [i_category#25, i_brand#26, cc_name#27, d_year#28, d_moy#29, sum#30] -Keys [5]: [i_category#25, i_brand#26, cc_name#27, d_year#28, d_moy#29] -Functions [1]: [sum(UnscaledValue(cs_sales_price#31))] -Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#31))#19] -Results [6]: [i_category#25, i_brand#26, cc_name#27, d_year#28, d_moy#29, MakeDecimal(sum(UnscaledValue(cs_sales_price#31))#19,17,2) AS sum_sales#20] +Input [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum#26] +Keys [5]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25] +Functions [1]: [sum(UnscaledValue(cs_sales_price#27))] +Aggregate Attributes [1]: [sum(UnscaledValue(cs_sales_price#27))#16] +Results [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, MakeDecimal(sum(UnscaledValue(cs_sales_price#27))#16,17,2) AS sum_sales#17] (31) Exchange -Input [6]: [i_category#25, i_brand#26, cc_name#27, d_year#28, d_moy#29, sum_sales#20] -Arguments: hashpartitioning(i_category#25, i_brand#26, cc_name#27, 5), ENSURE_REQUIREMENTS, [id=#32] +Input [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum_sales#17] +Arguments: hashpartitioning(i_category#21, i_brand#22, cc_name#23, 5), ENSURE_REQUIREMENTS, [plan_id=5] (32) Sort [codegen id : 13] -Input [6]: [i_category#25, i_brand#26, cc_name#27, d_year#28, d_moy#29, sum_sales#20] -Arguments: [i_category#25 ASC NULLS FIRST, i_brand#26 ASC NULLS FIRST, cc_name#27 ASC NULLS FIRST, d_year#28 ASC NULLS FIRST, d_moy#29 ASC NULLS FIRST], false, 0 +Input [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum_sales#17] +Arguments: [i_category#21 ASC NULLS FIRST, i_brand#22 ASC NULLS FIRST, cc_name#23 ASC NULLS FIRST, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST], false, 0 (33) Window -Input [6]: [i_category#25, i_brand#26, cc_name#27, d_year#28, d_moy#29, sum_sales#20] -Arguments: [rank(d_year#28, d_moy#29) windowspecdefinition(i_category#25, i_brand#26, cc_name#27, d_year#28 ASC NULLS FIRST, d_moy#29 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#33], [i_category#25, i_brand#26, cc_name#27], [d_year#28 ASC NULLS FIRST, d_moy#29 ASC NULLS FIRST] +Input [6]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum_sales#17] +Arguments: [rank(d_year#24, d_moy#25) windowspecdefinition(i_category#21, i_brand#22, cc_name#23, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#28], [i_category#21, i_brand#22, cc_name#23], [d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST] (34) Project [codegen id : 14] -Output [5]: [i_category#25, i_brand#26, cc_name#27, sum_sales#20 AS sum_sales#34, rn#33] -Input [7]: [i_category#25, i_brand#26, cc_name#27, d_year#28, d_moy#29, sum_sales#20, rn#33] +Output [5]: [i_category#21, i_brand#22, cc_name#23, sum_sales#17 AS sum_sales#29, rn#28] +Input [7]: [i_category#21, i_brand#22, cc_name#23, d_year#24, d_moy#25, sum_sales#17, rn#28] (35) BroadcastExchange -Input [5]: [i_category#25, i_brand#26, cc_name#27, sum_sales#34, rn#33] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [id=#35] +Input [5]: [i_category#21, i_brand#22, cc_name#23, sum_sales#29, rn#28] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=6] (36) BroadcastHashJoin [codegen id : 22] -Left keys [4]: [i_category#3, i_brand#2, cc_name#14, rn#23] -Right keys [4]: [i_category#25, i_brand#26, cc_name#27, (rn#33 + 1)] +Left keys [4]: [i_category#3, i_brand#2, cc_name#13, rn#19] +Right keys [4]: [i_category#21, i_brand#22, cc_name#23, (rn#28 + 1)] Join condition: None (37) Project [codegen id : 22] -Output [9]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, avg_monthly_sales#24, rn#23, sum_sales#34] -Input [13]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, avg_monthly_sales#24, rn#23, i_category#25, i_brand#26, cc_name#27, sum_sales#34, rn#33] +Output [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#29] +Input [13]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, avg_monthly_sales#20, rn#19, i_category#21, i_brand#22, cc_name#23, sum_sales#29, rn#28] (38) ReusedExchange [Reuses operator id: 31] -Output [6]: [i_category#36, i_brand#37, cc_name#38, d_year#39, d_moy#40, sum_sales#20] +Output [6]: [i_category#30, i_brand#31, cc_name#32, d_year#33, d_moy#34, sum_sales#17] (39) Sort [codegen id : 20] -Input [6]: [i_category#36, i_brand#37, cc_name#38, d_year#39, d_moy#40, sum_sales#20] -Arguments: [i_category#36 ASC NULLS FIRST, i_brand#37 ASC NULLS FIRST, cc_name#38 ASC NULLS FIRST, d_year#39 ASC NULLS FIRST, d_moy#40 ASC NULLS FIRST], false, 0 +Input [6]: [i_category#30, i_brand#31, cc_name#32, d_year#33, d_moy#34, sum_sales#17] +Arguments: [i_category#30 ASC NULLS FIRST, i_brand#31 ASC NULLS FIRST, cc_name#32 ASC NULLS FIRST, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST], false, 0 (40) Window -Input [6]: [i_category#36, i_brand#37, cc_name#38, d_year#39, d_moy#40, sum_sales#20] -Arguments: [rank(d_year#39, d_moy#40) windowspecdefinition(i_category#36, i_brand#37, cc_name#38, d_year#39 ASC NULLS FIRST, d_moy#40 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#41], [i_category#36, i_brand#37, cc_name#38], [d_year#39 ASC NULLS FIRST, d_moy#40 ASC NULLS FIRST] +Input [6]: [i_category#30, i_brand#31, cc_name#32, d_year#33, d_moy#34, sum_sales#17] +Arguments: [rank(d_year#33, d_moy#34) windowspecdefinition(i_category#30, i_brand#31, cc_name#32, d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#35], [i_category#30, i_brand#31, cc_name#32], [d_year#33 ASC NULLS FIRST, d_moy#34 ASC NULLS FIRST] (41) Project [codegen id : 21] -Output [5]: [i_category#36, i_brand#37, cc_name#38, sum_sales#20 AS sum_sales#42, rn#41] -Input [7]: [i_category#36, i_brand#37, cc_name#38, d_year#39, d_moy#40, sum_sales#20, rn#41] +Output [5]: [i_category#30, i_brand#31, cc_name#32, sum_sales#17 AS sum_sales#36, rn#35] +Input [7]: [i_category#30, i_brand#31, cc_name#32, d_year#33, d_moy#34, sum_sales#17, rn#35] (42) BroadcastExchange -Input [5]: [i_category#36, i_brand#37, cc_name#38, sum_sales#42, rn#41] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [id=#43] +Input [5]: [i_category#30, i_brand#31, cc_name#32, sum_sales#36, rn#35] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=7] (43) BroadcastHashJoin [codegen id : 22] -Left keys [4]: [i_category#3, i_brand#2, cc_name#14, rn#23] -Right keys [4]: [i_category#36, i_brand#37, cc_name#38, (rn#41 - 1)] +Left keys [4]: [i_category#3, i_brand#2, cc_name#13, rn#19] +Right keys [4]: [i_category#30, i_brand#31, cc_name#32, (rn#35 - 1)] Join condition: None (44) Project [codegen id : 22] -Output [8]: [i_category#3, i_brand#2, d_year#11, d_moy#12, avg_monthly_sales#24, sum_sales#20, sum_sales#34 AS psum#44, sum_sales#42 AS nsum#45] -Input [14]: [i_category#3, i_brand#2, cc_name#14, d_year#11, d_moy#12, sum_sales#20, avg_monthly_sales#24, rn#23, sum_sales#34, i_category#36, i_brand#37, cc_name#38, sum_sales#42, rn#41] +Output [8]: [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#20, sum_sales#17, sum_sales#29 AS psum#37, sum_sales#36 AS nsum#38] +Input [14]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, avg_monthly_sales#20, rn#19, sum_sales#29, i_category#30, i_brand#31, cc_name#32, sum_sales#36, rn#35] (45) TakeOrderedAndProject -Input [8]: [i_category#3, i_brand#2, d_year#11, d_moy#12, avg_monthly_sales#24, sum_sales#20, psum#44, nsum#45] -Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#20 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#24 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, d_year#11 ASC NULLS FIRST], [i_category#3, i_brand#2, d_year#11, d_moy#12, avg_monthly_sales#24, sum_sales#20, psum#44, nsum#45] +Input [8]: [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38] +Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, d_year#10 ASC NULLS FIRST], [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38] ===== Subqueries ===== @@ -254,21 +254,21 @@ BroadcastExchange (49) (46) Scan parquet default.date_dim -Output [3]: [d_date_sk#10, d_year#11, d_moy#12] +Output [3]: [d_date_sk#9, d_year#10, d_moy#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] ReadSchema: struct (47) ColumnarToRow [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] (48) Filter [codegen id : 1] -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] -Condition : ((((d_year#11 = 1999) OR ((d_year#11 = 1998) AND (d_moy#12 = 12))) OR ((d_year#11 = 2000) AND (d_moy#12 = 1))) AND isnotnull(d_date_sk#10)) +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) (49) BroadcastExchange -Input [3]: [d_date_sk#10, d_year#11, d_moy#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#46] +Input [3]: [d_date_sk#9, d_year#10, d_moy#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt index 88d3ec5d20f2b..05c7834b2abcb 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt @@ -147,7 +147,7 @@ Condition : isnotnull(s_store_sk#22) (13) BroadcastExchange Input [2]: [s_store_sk#22, s_store_id#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (14) BroadcastHashJoin [codegen id : 5] Left keys [1]: [store_sk#6] @@ -159,362 +159,362 @@ Output [6]: [date_sk#7, sales_price#8, profit#9, return_amt#10, net_loss#11, s_s Input [8]: [store_sk#6, date_sk#7, sales_price#8, profit#9, return_amt#10, net_loss#11, s_store_sk#22, s_store_id#23] (16) ReusedExchange [Reuses operator id: 95] -Output [1]: [d_date_sk#25] +Output [1]: [d_date_sk#24] (17) BroadcastHashJoin [codegen id : 5] Left keys [1]: [date_sk#7] -Right keys [1]: [d_date_sk#25] +Right keys [1]: [d_date_sk#24] Join condition: None (18) Project [codegen id : 5] Output [5]: [sales_price#8, profit#9, return_amt#10, net_loss#11, s_store_id#23] -Input [7]: [date_sk#7, sales_price#8, profit#9, return_amt#10, net_loss#11, s_store_id#23, d_date_sk#25] +Input [7]: [date_sk#7, sales_price#8, profit#9, return_amt#10, net_loss#11, s_store_id#23, d_date_sk#24] (19) HashAggregate [codegen id : 5] Input [5]: [sales_price#8, profit#9, return_amt#10, net_loss#11, s_store_id#23] Keys [1]: [s_store_id#23] Functions [4]: [partial_sum(UnscaledValue(sales_price#8)), partial_sum(UnscaledValue(return_amt#10)), partial_sum(UnscaledValue(profit#9)), partial_sum(UnscaledValue(net_loss#11))] -Aggregate Attributes [4]: [sum#26, sum#27, sum#28, sum#29] -Results [5]: [s_store_id#23, sum#30, sum#31, sum#32, sum#33] +Aggregate Attributes [4]: [sum#25, sum#26, sum#27, sum#28] +Results [5]: [s_store_id#23, sum#29, sum#30, sum#31, sum#32] (20) Exchange -Input [5]: [s_store_id#23, sum#30, sum#31, sum#32, sum#33] -Arguments: hashpartitioning(s_store_id#23, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [5]: [s_store_id#23, sum#29, sum#30, sum#31, sum#32] +Arguments: hashpartitioning(s_store_id#23, 5), ENSURE_REQUIREMENTS, [plan_id=2] (21) HashAggregate [codegen id : 6] -Input [5]: [s_store_id#23, sum#30, sum#31, sum#32, sum#33] +Input [5]: [s_store_id#23, sum#29, sum#30, sum#31, sum#32] Keys [1]: [s_store_id#23] Functions [4]: [sum(UnscaledValue(sales_price#8)), sum(UnscaledValue(return_amt#10)), sum(UnscaledValue(profit#9)), sum(UnscaledValue(net_loss#11))] -Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#8))#35, sum(UnscaledValue(return_amt#10))#36, sum(UnscaledValue(profit#9))#37, sum(UnscaledValue(net_loss#11))#38] -Results [5]: [store channel AS channel#39, concat(store, s_store_id#23) AS id#40, MakeDecimal(sum(UnscaledValue(sales_price#8))#35,17,2) AS sales#41, MakeDecimal(sum(UnscaledValue(return_amt#10))#36,17,2) AS returns#42, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#37,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#38,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#43] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#8))#33, sum(UnscaledValue(return_amt#10))#34, sum(UnscaledValue(profit#9))#35, sum(UnscaledValue(net_loss#11))#36] +Results [5]: [store channel AS channel#37, concat(store, s_store_id#23) AS id#38, MakeDecimal(sum(UnscaledValue(sales_price#8))#33,17,2) AS sales#39, MakeDecimal(sum(UnscaledValue(return_amt#10))#34,17,2) AS returns#40, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#35,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#36,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#41] (22) Scan parquet default.catalog_sales -Output [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] +Output [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#47), dynamicpruningexpression(cs_sold_date_sk#47 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#45), dynamicpruningexpression(cs_sold_date_sk#45 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cs_catalog_page_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 7] -Input [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] (24) Filter [codegen id : 7] -Input [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] -Condition : isnotnull(cs_catalog_page_sk#44) +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Condition : isnotnull(cs_catalog_page_sk#42) (25) Project [codegen id : 7] -Output [6]: [cs_catalog_page_sk#44 AS page_sk#48, cs_sold_date_sk#47 AS date_sk#49, cs_ext_sales_price#45 AS sales_price#50, cs_net_profit#46 AS profit#51, 0.00 AS return_amt#52, 0.00 AS net_loss#53] -Input [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] +Output [6]: [cs_catalog_page_sk#42 AS page_sk#46, cs_sold_date_sk#45 AS date_sk#47, cs_ext_sales_price#43 AS sales_price#48, cs_net_profit#44 AS profit#49, 0.00 AS return_amt#50, 0.00 AS net_loss#51] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] (26) Scan parquet default.catalog_returns -Output [4]: [cr_catalog_page_sk#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Output [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cr_returned_date_sk#57), dynamicpruningexpression(cr_returned_date_sk#57 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cr_returned_date_sk#55), dynamicpruningexpression(cr_returned_date_sk#55 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cr_catalog_page_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 8] -Input [4]: [cr_catalog_page_sk#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] (28) Filter [codegen id : 8] -Input [4]: [cr_catalog_page_sk#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] -Condition : isnotnull(cr_catalog_page_sk#54) +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] +Condition : isnotnull(cr_catalog_page_sk#52) (29) Project [codegen id : 8] -Output [6]: [cr_catalog_page_sk#54 AS page_sk#58, cr_returned_date_sk#57 AS date_sk#59, 0.00 AS sales_price#60, 0.00 AS profit#61, cr_return_amount#55 AS return_amt#62, cr_net_loss#56 AS net_loss#63] -Input [4]: [cr_catalog_page_sk#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Output [6]: [cr_catalog_page_sk#52 AS page_sk#56, cr_returned_date_sk#55 AS date_sk#57, 0.00 AS sales_price#58, 0.00 AS profit#59, cr_return_amount#53 AS return_amt#60, cr_net_loss#54 AS net_loss#61] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] (30) Union (31) Scan parquet default.catalog_page -Output [2]: [cp_catalog_page_sk#64, cp_catalog_page_id#65] +Output [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_page] PushedFilters: [IsNotNull(cp_catalog_page_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 9] -Input [2]: [cp_catalog_page_sk#64, cp_catalog_page_id#65] +Input [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63] (33) Filter [codegen id : 9] -Input [2]: [cp_catalog_page_sk#64, cp_catalog_page_id#65] -Condition : isnotnull(cp_catalog_page_sk#64) +Input [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63] +Condition : isnotnull(cp_catalog_page_sk#62) (34) BroadcastExchange -Input [2]: [cp_catalog_page_sk#64, cp_catalog_page_id#65] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#66] +Input [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (35) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [page_sk#48] -Right keys [1]: [cp_catalog_page_sk#64] +Left keys [1]: [page_sk#46] +Right keys [1]: [cp_catalog_page_sk#62] Join condition: None (36) Project [codegen id : 11] -Output [6]: [date_sk#49, sales_price#50, profit#51, return_amt#52, net_loss#53, cp_catalog_page_id#65] -Input [8]: [page_sk#48, date_sk#49, sales_price#50, profit#51, return_amt#52, net_loss#53, cp_catalog_page_sk#64, cp_catalog_page_id#65] +Output [6]: [date_sk#47, sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#63] +Input [8]: [page_sk#46, date_sk#47, sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_sk#62, cp_catalog_page_id#63] (37) ReusedExchange [Reuses operator id: 95] -Output [1]: [d_date_sk#67] +Output [1]: [d_date_sk#64] (38) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [date_sk#49] -Right keys [1]: [d_date_sk#67] +Left keys [1]: [date_sk#47] +Right keys [1]: [d_date_sk#64] Join condition: None (39) Project [codegen id : 11] -Output [5]: [sales_price#50, profit#51, return_amt#52, net_loss#53, cp_catalog_page_id#65] -Input [7]: [date_sk#49, sales_price#50, profit#51, return_amt#52, net_loss#53, cp_catalog_page_id#65, d_date_sk#67] +Output [5]: [sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#63] +Input [7]: [date_sk#47, sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#63, d_date_sk#64] (40) HashAggregate [codegen id : 11] -Input [5]: [sales_price#50, profit#51, return_amt#52, net_loss#53, cp_catalog_page_id#65] -Keys [1]: [cp_catalog_page_id#65] -Functions [4]: [partial_sum(UnscaledValue(sales_price#50)), partial_sum(UnscaledValue(return_amt#52)), partial_sum(UnscaledValue(profit#51)), partial_sum(UnscaledValue(net_loss#53))] -Aggregate Attributes [4]: [sum#68, sum#69, sum#70, sum#71] -Results [5]: [cp_catalog_page_id#65, sum#72, sum#73, sum#74, sum#75] +Input [5]: [sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#63] +Keys [1]: [cp_catalog_page_id#63] +Functions [4]: [partial_sum(UnscaledValue(sales_price#48)), partial_sum(UnscaledValue(return_amt#50)), partial_sum(UnscaledValue(profit#49)), partial_sum(UnscaledValue(net_loss#51))] +Aggregate Attributes [4]: [sum#65, sum#66, sum#67, sum#68] +Results [5]: [cp_catalog_page_id#63, sum#69, sum#70, sum#71, sum#72] (41) Exchange -Input [5]: [cp_catalog_page_id#65, sum#72, sum#73, sum#74, sum#75] -Arguments: hashpartitioning(cp_catalog_page_id#65, 5), ENSURE_REQUIREMENTS, [id=#76] +Input [5]: [cp_catalog_page_id#63, sum#69, sum#70, sum#71, sum#72] +Arguments: hashpartitioning(cp_catalog_page_id#63, 5), ENSURE_REQUIREMENTS, [plan_id=4] (42) HashAggregate [codegen id : 12] -Input [5]: [cp_catalog_page_id#65, sum#72, sum#73, sum#74, sum#75] -Keys [1]: [cp_catalog_page_id#65] -Functions [4]: [sum(UnscaledValue(sales_price#50)), sum(UnscaledValue(return_amt#52)), sum(UnscaledValue(profit#51)), sum(UnscaledValue(net_loss#53))] -Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#50))#77, sum(UnscaledValue(return_amt#52))#78, sum(UnscaledValue(profit#51))#79, sum(UnscaledValue(net_loss#53))#80] -Results [5]: [catalog channel AS channel#81, concat(catalog_page, cp_catalog_page_id#65) AS id#82, MakeDecimal(sum(UnscaledValue(sales_price#50))#77,17,2) AS sales#83, MakeDecimal(sum(UnscaledValue(return_amt#52))#78,17,2) AS returns#84, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#51))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#53))#80,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#85] +Input [5]: [cp_catalog_page_id#63, sum#69, sum#70, sum#71, sum#72] +Keys [1]: [cp_catalog_page_id#63] +Functions [4]: [sum(UnscaledValue(sales_price#48)), sum(UnscaledValue(return_amt#50)), sum(UnscaledValue(profit#49)), sum(UnscaledValue(net_loss#51))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#48))#73, sum(UnscaledValue(return_amt#50))#74, sum(UnscaledValue(profit#49))#75, sum(UnscaledValue(net_loss#51))#76] +Results [5]: [catalog channel AS channel#77, concat(catalog_page, cp_catalog_page_id#63) AS id#78, MakeDecimal(sum(UnscaledValue(sales_price#48))#73,17,2) AS sales#79, MakeDecimal(sum(UnscaledValue(return_amt#50))#74,17,2) AS returns#80, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#49))#75,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#51))#76,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#81] (43) Scan parquet default.web_sales -Output [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89] +Output [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#89), dynamicpruningexpression(ws_sold_date_sk#89 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#85), dynamicpruningexpression(ws_sold_date_sk#85 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_web_site_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 13] -Input [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] (45) Filter [codegen id : 13] -Input [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89] -Condition : isnotnull(ws_web_site_sk#86) +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] +Condition : isnotnull(ws_web_site_sk#82) (46) Project [codegen id : 13] -Output [6]: [ws_web_site_sk#86 AS wsr_web_site_sk#90, ws_sold_date_sk#89 AS date_sk#91, ws_ext_sales_price#87 AS sales_price#92, ws_net_profit#88 AS profit#93, 0.00 AS return_amt#94, 0.00 AS net_loss#95] -Input [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89] +Output [6]: [ws_web_site_sk#82 AS wsr_web_site_sk#86, ws_sold_date_sk#85 AS date_sk#87, ws_ext_sales_price#83 AS sales_price#88, ws_net_profit#84 AS profit#89, 0.00 AS return_amt#90, 0.00 AS net_loss#91] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] (47) Scan parquet default.web_returns -Output [5]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100] +Output [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(wr_returned_date_sk#100), dynamicpruningexpression(wr_returned_date_sk#100 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(wr_returned_date_sk#96), dynamicpruningexpression(wr_returned_date_sk#96 IN dynamicpruning#5)] ReadSchema: struct (48) ColumnarToRow [codegen id : 14] -Input [5]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100] +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] (49) Exchange -Input [5]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100] -Arguments: hashpartitioning(wr_item_sk#96, wr_order_number#97, 5), ENSURE_REQUIREMENTS, [id=#101] +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Arguments: hashpartitioning(wr_item_sk#92, wr_order_number#93, 5), ENSURE_REQUIREMENTS, [plan_id=5] (50) Sort [codegen id : 15] -Input [5]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100] -Arguments: [wr_item_sk#96 ASC NULLS FIRST, wr_order_number#97 ASC NULLS FIRST], false, 0 +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Arguments: [wr_item_sk#92 ASC NULLS FIRST, wr_order_number#93 ASC NULLS FIRST], false, 0 (51) Scan parquet default.web_sales -Output [4]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104, ws_sold_date_sk#105] +Output [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] ReadSchema: struct (52) ColumnarToRow [codegen id : 16] -Input [4]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104, ws_sold_date_sk#105] +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] (53) Filter [codegen id : 16] -Input [4]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104, ws_sold_date_sk#105] -Condition : ((isnotnull(ws_item_sk#102) AND isnotnull(ws_order_number#104)) AND isnotnull(ws_web_site_sk#103)) +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] +Condition : ((isnotnull(ws_item_sk#97) AND isnotnull(ws_order_number#99)) AND isnotnull(ws_web_site_sk#98)) (54) Project [codegen id : 16] -Output [3]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104] -Input [4]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104, ws_sold_date_sk#105] +Output [3]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] (55) Exchange -Input [3]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104] -Arguments: hashpartitioning(ws_item_sk#102, ws_order_number#104, 5), ENSURE_REQUIREMENTS, [id=#106] +Input [3]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] +Arguments: hashpartitioning(ws_item_sk#97, ws_order_number#99, 5), ENSURE_REQUIREMENTS, [plan_id=6] (56) Sort [codegen id : 17] -Input [3]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104] -Arguments: [ws_item_sk#102 ASC NULLS FIRST, ws_order_number#104 ASC NULLS FIRST], false, 0 +Input [3]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] +Arguments: [ws_item_sk#97 ASC NULLS FIRST, ws_order_number#99 ASC NULLS FIRST], false, 0 (57) SortMergeJoin [codegen id : 18] -Left keys [2]: [wr_item_sk#96, wr_order_number#97] -Right keys [2]: [ws_item_sk#102, ws_order_number#104] +Left keys [2]: [wr_item_sk#92, wr_order_number#93] +Right keys [2]: [ws_item_sk#97, ws_order_number#99] Join condition: None (58) Project [codegen id : 18] -Output [6]: [ws_web_site_sk#103 AS wsr_web_site_sk#107, wr_returned_date_sk#100 AS date_sk#108, 0.00 AS sales_price#109, 0.00 AS profit#110, wr_return_amt#98 AS return_amt#111, wr_net_loss#99 AS net_loss#112] -Input [8]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100, ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104] +Output [6]: [ws_web_site_sk#98 AS wsr_web_site_sk#101, wr_returned_date_sk#96 AS date_sk#102, 0.00 AS sales_price#103, 0.00 AS profit#104, wr_return_amt#94 AS return_amt#105, wr_net_loss#95 AS net_loss#106] +Input [8]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96, ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] (59) Union (60) Scan parquet default.web_site -Output [2]: [web_site_sk#113, web_site_id#114] +Output [2]: [web_site_sk#107, web_site_id#108] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_site_sk)] ReadSchema: struct (61) ColumnarToRow [codegen id : 19] -Input [2]: [web_site_sk#113, web_site_id#114] +Input [2]: [web_site_sk#107, web_site_id#108] (62) Filter [codegen id : 19] -Input [2]: [web_site_sk#113, web_site_id#114] -Condition : isnotnull(web_site_sk#113) +Input [2]: [web_site_sk#107, web_site_id#108] +Condition : isnotnull(web_site_sk#107) (63) BroadcastExchange -Input [2]: [web_site_sk#113, web_site_id#114] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#115] +Input [2]: [web_site_sk#107, web_site_id#108] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] (64) BroadcastHashJoin [codegen id : 21] -Left keys [1]: [wsr_web_site_sk#90] -Right keys [1]: [web_site_sk#113] +Left keys [1]: [wsr_web_site_sk#86] +Right keys [1]: [web_site_sk#107] Join condition: None (65) Project [codegen id : 21] -Output [6]: [date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#114] -Input [8]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#113, web_site_id#114] +Output [6]: [date_sk#87, sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#108] +Input [8]: [wsr_web_site_sk#86, date_sk#87, sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_sk#107, web_site_id#108] (66) ReusedExchange [Reuses operator id: 95] -Output [1]: [d_date_sk#116] +Output [1]: [d_date_sk#109] (67) BroadcastHashJoin [codegen id : 21] -Left keys [1]: [date_sk#91] -Right keys [1]: [d_date_sk#116] +Left keys [1]: [date_sk#87] +Right keys [1]: [d_date_sk#109] Join condition: None (68) Project [codegen id : 21] -Output [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#114] -Input [7]: [date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#114, d_date_sk#116] +Output [5]: [sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#108] +Input [7]: [date_sk#87, sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#108, d_date_sk#109] (69) HashAggregate [codegen id : 21] -Input [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#114] -Keys [1]: [web_site_id#114] -Functions [4]: [partial_sum(UnscaledValue(sales_price#92)), partial_sum(UnscaledValue(return_amt#94)), partial_sum(UnscaledValue(profit#93)), partial_sum(UnscaledValue(net_loss#95))] -Aggregate Attributes [4]: [sum#117, sum#118, sum#119, sum#120] -Results [5]: [web_site_id#114, sum#121, sum#122, sum#123, sum#124] +Input [5]: [sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#108] +Keys [1]: [web_site_id#108] +Functions [4]: [partial_sum(UnscaledValue(sales_price#88)), partial_sum(UnscaledValue(return_amt#90)), partial_sum(UnscaledValue(profit#89)), partial_sum(UnscaledValue(net_loss#91))] +Aggregate Attributes [4]: [sum#110, sum#111, sum#112, sum#113] +Results [5]: [web_site_id#108, sum#114, sum#115, sum#116, sum#117] (70) Exchange -Input [5]: [web_site_id#114, sum#121, sum#122, sum#123, sum#124] -Arguments: hashpartitioning(web_site_id#114, 5), ENSURE_REQUIREMENTS, [id=#125] +Input [5]: [web_site_id#108, sum#114, sum#115, sum#116, sum#117] +Arguments: hashpartitioning(web_site_id#108, 5), ENSURE_REQUIREMENTS, [plan_id=8] (71) HashAggregate [codegen id : 22] -Input [5]: [web_site_id#114, sum#121, sum#122, sum#123, sum#124] -Keys [1]: [web_site_id#114] -Functions [4]: [sum(UnscaledValue(sales_price#92)), sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)), sum(UnscaledValue(net_loss#95))] -Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#126, sum(UnscaledValue(return_amt#94))#127, sum(UnscaledValue(profit#93))#128, sum(UnscaledValue(net_loss#95))#129] -Results [5]: [web channel AS channel#130, concat(web_site, web_site_id#114) AS id#131, MakeDecimal(sum(UnscaledValue(sales_price#92))#126,17,2) AS sales#132, MakeDecimal(sum(UnscaledValue(return_amt#94))#127,17,2) AS returns#133, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#128,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#129,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#134] +Input [5]: [web_site_id#108, sum#114, sum#115, sum#116, sum#117] +Keys [1]: [web_site_id#108] +Functions [4]: [sum(UnscaledValue(sales_price#88)), sum(UnscaledValue(return_amt#90)), sum(UnscaledValue(profit#89)), sum(UnscaledValue(net_loss#91))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#88))#118, sum(UnscaledValue(return_amt#90))#119, sum(UnscaledValue(profit#89))#120, sum(UnscaledValue(net_loss#91))#121] +Results [5]: [web channel AS channel#122, concat(web_site, web_site_id#108) AS id#123, MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#124, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#125, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#126] (72) Union (73) HashAggregate [codegen id : 23] -Input [5]: [channel#39, id#40, sales#41, returns#42, profit#43] -Keys [2]: [channel#39, id#40] -Functions [3]: [partial_sum(sales#41), partial_sum(returns#42), partial_sum(profit#43)] -Aggregate Attributes [6]: [sum#135, isEmpty#136, sum#137, isEmpty#138, sum#139, isEmpty#140] -Results [8]: [channel#39, id#40, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146] +Input [5]: [channel#37, id#38, sales#39, returns#40, profit#41] +Keys [2]: [channel#37, id#38] +Functions [3]: [partial_sum(sales#39), partial_sum(returns#40), partial_sum(profit#41)] +Aggregate Attributes [6]: [sum#127, isEmpty#128, sum#129, isEmpty#130, sum#131, isEmpty#132] +Results [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] (74) Exchange -Input [8]: [channel#39, id#40, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146] -Arguments: hashpartitioning(channel#39, id#40, 5), ENSURE_REQUIREMENTS, [id=#147] +Input [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Arguments: hashpartitioning(channel#37, id#38, 5), ENSURE_REQUIREMENTS, [plan_id=9] (75) HashAggregate [codegen id : 24] -Input [8]: [channel#39, id#40, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146] -Keys [2]: [channel#39, id#40] -Functions [3]: [sum(sales#41), sum(returns#42), sum(profit#43)] -Aggregate Attributes [3]: [sum(sales#41)#148, sum(returns#42)#149, sum(profit#43)#150] -Results [5]: [channel#39, id#40, cast(sum(sales#41)#148 as decimal(37,2)) AS sales#151, cast(sum(returns#42)#149 as decimal(37,2)) AS returns#152, cast(sum(profit#43)#150 as decimal(38,2)) AS profit#153] +Input [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Keys [2]: [channel#37, id#38] +Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] +Aggregate Attributes [3]: [sum(sales#39)#139, sum(returns#40)#140, sum(profit#41)#141] +Results [5]: [channel#37, id#38, cast(sum(sales#39)#139 as decimal(37,2)) AS sales#142, cast(sum(returns#40)#140 as decimal(37,2)) AS returns#143, cast(sum(profit#41)#141 as decimal(38,2)) AS profit#144] (76) ReusedExchange [Reuses operator id: 74] -Output [8]: [channel#39, id#40, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146] +Output [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] (77) HashAggregate [codegen id : 48] -Input [8]: [channel#39, id#40, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146] -Keys [2]: [channel#39, id#40] -Functions [3]: [sum(sales#41), sum(returns#42), sum(profit#43)] -Aggregate Attributes [3]: [sum(sales#41)#148, sum(returns#42)#149, sum(profit#43)#150] -Results [4]: [channel#39, sum(sales#41)#148 AS sales#154, sum(returns#42)#149 AS returns#155, sum(profit#43)#150 AS profit#156] +Input [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Keys [2]: [channel#37, id#38] +Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] +Aggregate Attributes [3]: [sum(sales#39)#139, sum(returns#40)#140, sum(profit#41)#141] +Results [4]: [channel#37, sum(sales#39)#139 AS sales#145, sum(returns#40)#140 AS returns#146, sum(profit#41)#141 AS profit#147] (78) HashAggregate [codegen id : 48] -Input [4]: [channel#39, sales#154, returns#155, profit#156] -Keys [1]: [channel#39] -Functions [3]: [partial_sum(sales#154), partial_sum(returns#155), partial_sum(profit#156)] -Aggregate Attributes [6]: [sum#157, isEmpty#158, sum#159, isEmpty#160, sum#161, isEmpty#162] -Results [7]: [channel#39, sum#163, isEmpty#164, sum#165, isEmpty#166, sum#167, isEmpty#168] +Input [4]: [channel#37, sales#145, returns#146, profit#147] +Keys [1]: [channel#37] +Functions [3]: [partial_sum(sales#145), partial_sum(returns#146), partial_sum(profit#147)] +Aggregate Attributes [6]: [sum#148, isEmpty#149, sum#150, isEmpty#151, sum#152, isEmpty#153] +Results [7]: [channel#37, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] (79) Exchange -Input [7]: [channel#39, sum#163, isEmpty#164, sum#165, isEmpty#166, sum#167, isEmpty#168] -Arguments: hashpartitioning(channel#39, 5), ENSURE_REQUIREMENTS, [id=#169] +Input [7]: [channel#37, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] +Arguments: hashpartitioning(channel#37, 5), ENSURE_REQUIREMENTS, [plan_id=10] (80) HashAggregate [codegen id : 49] -Input [7]: [channel#39, sum#163, isEmpty#164, sum#165, isEmpty#166, sum#167, isEmpty#168] -Keys [1]: [channel#39] -Functions [3]: [sum(sales#154), sum(returns#155), sum(profit#156)] -Aggregate Attributes [3]: [sum(sales#154)#170, sum(returns#155)#171, sum(profit#156)#172] -Results [5]: [channel#39, null AS id#173, sum(sales#154)#170 AS sum(sales)#174, sum(returns#155)#171 AS sum(returns)#175, sum(profit#156)#172 AS sum(profit)#176] +Input [7]: [channel#37, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] +Keys [1]: [channel#37] +Functions [3]: [sum(sales#145), sum(returns#146), sum(profit#147)] +Aggregate Attributes [3]: [sum(sales#145)#160, sum(returns#146)#161, sum(profit#147)#162] +Results [5]: [channel#37, null AS id#163, sum(sales#145)#160 AS sum(sales)#164, sum(returns#146)#161 AS sum(returns)#165, sum(profit#147)#162 AS sum(profit)#166] (81) ReusedExchange [Reuses operator id: 74] -Output [8]: [channel#39, id#40, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146] +Output [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] (82) HashAggregate [codegen id : 73] -Input [8]: [channel#39, id#40, sum#141, isEmpty#142, sum#143, isEmpty#144, sum#145, isEmpty#146] -Keys [2]: [channel#39, id#40] -Functions [3]: [sum(sales#41), sum(returns#42), sum(profit#43)] -Aggregate Attributes [3]: [sum(sales#41)#148, sum(returns#42)#149, sum(profit#43)#150] -Results [3]: [sum(sales#41)#148 AS sales#154, sum(returns#42)#149 AS returns#155, sum(profit#43)#150 AS profit#156] +Input [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Keys [2]: [channel#37, id#38] +Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] +Aggregate Attributes [3]: [sum(sales#39)#139, sum(returns#40)#140, sum(profit#41)#141] +Results [3]: [sum(sales#39)#139 AS sales#145, sum(returns#40)#140 AS returns#146, sum(profit#41)#141 AS profit#147] (83) HashAggregate [codegen id : 73] -Input [3]: [sales#154, returns#155, profit#156] +Input [3]: [sales#145, returns#146, profit#147] Keys: [] -Functions [3]: [partial_sum(sales#154), partial_sum(returns#155), partial_sum(profit#156)] -Aggregate Attributes [6]: [sum#177, isEmpty#178, sum#179, isEmpty#180, sum#181, isEmpty#182] -Results [6]: [sum#183, isEmpty#184, sum#185, isEmpty#186, sum#187, isEmpty#188] +Functions [3]: [partial_sum(sales#145), partial_sum(returns#146), partial_sum(profit#147)] +Aggregate Attributes [6]: [sum#167, isEmpty#168, sum#169, isEmpty#170, sum#171, isEmpty#172] +Results [6]: [sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178] (84) Exchange -Input [6]: [sum#183, isEmpty#184, sum#185, isEmpty#186, sum#187, isEmpty#188] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#189] +Input [6]: [sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11] (85) HashAggregate [codegen id : 74] -Input [6]: [sum#183, isEmpty#184, sum#185, isEmpty#186, sum#187, isEmpty#188] +Input [6]: [sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178] Keys: [] -Functions [3]: [sum(sales#154), sum(returns#155), sum(profit#156)] -Aggregate Attributes [3]: [sum(sales#154)#190, sum(returns#155)#191, sum(profit#156)#192] -Results [5]: [null AS channel#193, null AS id#194, sum(sales#154)#190 AS sum(sales)#195, sum(returns#155)#191 AS sum(returns)#196, sum(profit#156)#192 AS sum(profit)#197] +Functions [3]: [sum(sales#145), sum(returns#146), sum(profit#147)] +Aggregate Attributes [3]: [sum(sales#145)#179, sum(returns#146)#180, sum(profit#147)#181] +Results [5]: [null AS channel#182, null AS id#183, sum(sales#145)#179 AS sum(sales)#184, sum(returns#146)#180 AS sum(returns)#185, sum(profit#147)#181 AS sum(profit)#186] (86) Union (87) HashAggregate [codegen id : 75] -Input [5]: [channel#39, id#40, sales#151, returns#152, profit#153] -Keys [5]: [channel#39, id#40, sales#151, returns#152, profit#153] +Input [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Keys [5]: [channel#37, id#38, sales#142, returns#143, profit#144] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#39, id#40, sales#151, returns#152, profit#153] +Results [5]: [channel#37, id#38, sales#142, returns#143, profit#144] (88) Exchange -Input [5]: [channel#39, id#40, sales#151, returns#152, profit#153] -Arguments: hashpartitioning(channel#39, id#40, sales#151, returns#152, profit#153, 5), ENSURE_REQUIREMENTS, [id=#198] +Input [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Arguments: hashpartitioning(channel#37, id#38, sales#142, returns#143, profit#144, 5), ENSURE_REQUIREMENTS, [plan_id=12] (89) HashAggregate [codegen id : 76] -Input [5]: [channel#39, id#40, sales#151, returns#152, profit#153] -Keys [5]: [channel#39, id#40, sales#151, returns#152, profit#153] +Input [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Keys [5]: [channel#37, id#38, sales#142, returns#143, profit#144] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#39, id#40, sales#151, returns#152, profit#153] +Results [5]: [channel#37, id#38, sales#142, returns#143, profit#144] (90) TakeOrderedAndProject -Input [5]: [channel#39, id#40, sales#151, returns#152, profit#153] -Arguments: 100, [channel#39 ASC NULLS FIRST, id#40 ASC NULLS FIRST], [channel#39, id#40, sales#151, returns#152, profit#153] +Input [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Arguments: 100, [channel#37 ASC NULLS FIRST, id#38 ASC NULLS FIRST], [channel#37, id#38, sales#142, returns#143, profit#144] ===== Subqueries ===== @@ -527,35 +527,35 @@ BroadcastExchange (95) (91) Scan parquet default.date_dim -Output [2]: [d_date_sk#25, d_date#199] +Output [2]: [d_date_sk#24, d_date#187] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-08-18), IsNotNull(d_date_sk)] ReadSchema: struct (92) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#25, d_date#199] +Input [2]: [d_date_sk#24, d_date#187] (93) Filter [codegen id : 1] -Input [2]: [d_date_sk#25, d_date#199] -Condition : (((isnotnull(d_date#199) AND (d_date#199 >= 1998-08-04)) AND (d_date#199 <= 1998-08-18)) AND isnotnull(d_date_sk#25)) +Input [2]: [d_date_sk#24, d_date#187] +Condition : (((isnotnull(d_date#187) AND (d_date#187 >= 1998-08-04)) AND (d_date#187 <= 1998-08-18)) AND isnotnull(d_date_sk#24)) (94) Project [codegen id : 1] -Output [1]: [d_date_sk#25] -Input [2]: [d_date_sk#25, d_date#199] +Output [1]: [d_date_sk#24] +Input [2]: [d_date_sk#24, d_date#187] (95) BroadcastExchange -Input [1]: [d_date_sk#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#200] +Input [1]: [d_date_sk#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=13] Subquery:2 Hosting operator id = 5 Hosting Expression = sr_returned_date_sk#15 IN dynamicpruning#5 -Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#47 IN dynamicpruning#5 +Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#45 IN dynamicpruning#5 -Subquery:4 Hosting operator id = 26 Hosting Expression = cr_returned_date_sk#57 IN dynamicpruning#5 +Subquery:4 Hosting operator id = 26 Hosting Expression = cr_returned_date_sk#55 IN dynamicpruning#5 -Subquery:5 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#89 IN dynamicpruning#5 +Subquery:5 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#85 IN dynamicpruning#5 -Subquery:6 Hosting operator id = 47 Hosting Expression = wr_returned_date_sk#100 IN dynamicpruning#5 +Subquery:6 Hosting operator id = 47 Hosting Expression = wr_returned_date_sk#96 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt index cadbb12000ba3..225f1d26b0eab 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt @@ -156,7 +156,7 @@ Condition : isnotnull(s_store_sk#23) (16) BroadcastExchange Input [2]: [s_store_sk#23, s_store_id#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (17) BroadcastHashJoin [codegen id : 5] Left keys [1]: [store_sk#6] @@ -171,335 +171,335 @@ Input [7]: [store_sk#6, sales_price#8, profit#9, return_amt#10, net_loss#11, s_s Input [5]: [sales_price#8, profit#9, return_amt#10, net_loss#11, s_store_id#24] Keys [1]: [s_store_id#24] Functions [4]: [partial_sum(UnscaledValue(sales_price#8)), partial_sum(UnscaledValue(return_amt#10)), partial_sum(UnscaledValue(profit#9)), partial_sum(UnscaledValue(net_loss#11))] -Aggregate Attributes [4]: [sum#26, sum#27, sum#28, sum#29] -Results [5]: [s_store_id#24, sum#30, sum#31, sum#32, sum#33] +Aggregate Attributes [4]: [sum#25, sum#26, sum#27, sum#28] +Results [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] (20) Exchange -Input [5]: [s_store_id#24, sum#30, sum#31, sum#32, sum#33] -Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] +Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [plan_id=2] (21) HashAggregate [codegen id : 6] -Input [5]: [s_store_id#24, sum#30, sum#31, sum#32, sum#33] +Input [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32] Keys [1]: [s_store_id#24] Functions [4]: [sum(UnscaledValue(sales_price#8)), sum(UnscaledValue(return_amt#10)), sum(UnscaledValue(profit#9)), sum(UnscaledValue(net_loss#11))] -Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#8))#35, sum(UnscaledValue(return_amt#10))#36, sum(UnscaledValue(profit#9))#37, sum(UnscaledValue(net_loss#11))#38] -Results [5]: [store channel AS channel#39, concat(store, s_store_id#24) AS id#40, MakeDecimal(sum(UnscaledValue(sales_price#8))#35,17,2) AS sales#41, MakeDecimal(sum(UnscaledValue(return_amt#10))#36,17,2) AS returns#42, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#37,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#38,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#43] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#8))#33, sum(UnscaledValue(return_amt#10))#34, sum(UnscaledValue(profit#9))#35, sum(UnscaledValue(net_loss#11))#36] +Results [5]: [store channel AS channel#37, concat(store, s_store_id#24) AS id#38, MakeDecimal(sum(UnscaledValue(sales_price#8))#33,17,2) AS sales#39, MakeDecimal(sum(UnscaledValue(return_amt#10))#34,17,2) AS returns#40, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#35,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#36,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#41] (22) Scan parquet default.catalog_sales -Output [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] +Output [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#47), dynamicpruningexpression(cs_sold_date_sk#47 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#45), dynamicpruningexpression(cs_sold_date_sk#45 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cs_catalog_page_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 7] -Input [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] (24) Filter [codegen id : 7] -Input [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] -Condition : isnotnull(cs_catalog_page_sk#44) +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Condition : isnotnull(cs_catalog_page_sk#42) (25) Project [codegen id : 7] -Output [6]: [cs_catalog_page_sk#44 AS page_sk#48, cs_sold_date_sk#47 AS date_sk#49, cs_ext_sales_price#45 AS sales_price#50, cs_net_profit#46 AS profit#51, 0.00 AS return_amt#52, 0.00 AS net_loss#53] -Input [4]: [cs_catalog_page_sk#44, cs_ext_sales_price#45, cs_net_profit#46, cs_sold_date_sk#47] +Output [6]: [cs_catalog_page_sk#42 AS page_sk#46, cs_sold_date_sk#45 AS date_sk#47, cs_ext_sales_price#43 AS sales_price#48, cs_net_profit#44 AS profit#49, 0.00 AS return_amt#50, 0.00 AS net_loss#51] +Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] (26) Scan parquet default.catalog_returns -Output [4]: [cr_catalog_page_sk#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Output [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cr_returned_date_sk#57), dynamicpruningexpression(cr_returned_date_sk#57 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cr_returned_date_sk#55), dynamicpruningexpression(cr_returned_date_sk#55 IN dynamicpruning#5)] PushedFilters: [IsNotNull(cr_catalog_page_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 8] -Input [4]: [cr_catalog_page_sk#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] (28) Filter [codegen id : 8] -Input [4]: [cr_catalog_page_sk#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] -Condition : isnotnull(cr_catalog_page_sk#54) +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] +Condition : isnotnull(cr_catalog_page_sk#52) (29) Project [codegen id : 8] -Output [6]: [cr_catalog_page_sk#54 AS page_sk#58, cr_returned_date_sk#57 AS date_sk#59, 0.00 AS sales_price#60, 0.00 AS profit#61, cr_return_amount#55 AS return_amt#62, cr_net_loss#56 AS net_loss#63] -Input [4]: [cr_catalog_page_sk#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Output [6]: [cr_catalog_page_sk#52 AS page_sk#56, cr_returned_date_sk#55 AS date_sk#57, 0.00 AS sales_price#58, 0.00 AS profit#59, cr_return_amount#53 AS return_amt#60, cr_net_loss#54 AS net_loss#61] +Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] (30) Union (31) ReusedExchange [Reuses operator id: 92] -Output [1]: [d_date_sk#64] +Output [1]: [d_date_sk#62] (32) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [date_sk#49] -Right keys [1]: [d_date_sk#64] +Left keys [1]: [date_sk#47] +Right keys [1]: [d_date_sk#62] Join condition: None (33) Project [codegen id : 11] -Output [5]: [page_sk#48, sales_price#50, profit#51, return_amt#52, net_loss#53] -Input [7]: [page_sk#48, date_sk#49, sales_price#50, profit#51, return_amt#52, net_loss#53, d_date_sk#64] +Output [5]: [page_sk#46, sales_price#48, profit#49, return_amt#50, net_loss#51] +Input [7]: [page_sk#46, date_sk#47, sales_price#48, profit#49, return_amt#50, net_loss#51, d_date_sk#62] (34) Scan parquet default.catalog_page -Output [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Output [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_page] PushedFilters: [IsNotNull(cp_catalog_page_sk)] ReadSchema: struct (35) ColumnarToRow [codegen id : 10] -Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] (36) Filter [codegen id : 10] -Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] -Condition : isnotnull(cp_catalog_page_sk#65) +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] +Condition : isnotnull(cp_catalog_page_sk#63) (37) BroadcastExchange -Input [2]: [cp_catalog_page_sk#65, cp_catalog_page_id#66] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#67] +Input [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (38) BroadcastHashJoin [codegen id : 11] -Left keys [1]: [page_sk#48] -Right keys [1]: [cp_catalog_page_sk#65] +Left keys [1]: [page_sk#46] +Right keys [1]: [cp_catalog_page_sk#63] Join condition: None (39) Project [codegen id : 11] -Output [5]: [sales_price#50, profit#51, return_amt#52, net_loss#53, cp_catalog_page_id#66] -Input [7]: [page_sk#48, sales_price#50, profit#51, return_amt#52, net_loss#53, cp_catalog_page_sk#65, cp_catalog_page_id#66] +Output [5]: [sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#64] +Input [7]: [page_sk#46, sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_sk#63, cp_catalog_page_id#64] (40) HashAggregate [codegen id : 11] -Input [5]: [sales_price#50, profit#51, return_amt#52, net_loss#53, cp_catalog_page_id#66] -Keys [1]: [cp_catalog_page_id#66] -Functions [4]: [partial_sum(UnscaledValue(sales_price#50)), partial_sum(UnscaledValue(return_amt#52)), partial_sum(UnscaledValue(profit#51)), partial_sum(UnscaledValue(net_loss#53))] -Aggregate Attributes [4]: [sum#68, sum#69, sum#70, sum#71] -Results [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] +Input [5]: [sales_price#48, profit#49, return_amt#50, net_loss#51, cp_catalog_page_id#64] +Keys [1]: [cp_catalog_page_id#64] +Functions [4]: [partial_sum(UnscaledValue(sales_price#48)), partial_sum(UnscaledValue(return_amt#50)), partial_sum(UnscaledValue(profit#49)), partial_sum(UnscaledValue(net_loss#51))] +Aggregate Attributes [4]: [sum#65, sum#66, sum#67, sum#68] +Results [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] (41) Exchange -Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] -Arguments: hashpartitioning(cp_catalog_page_id#66, 5), ENSURE_REQUIREMENTS, [id=#76] +Input [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] +Arguments: hashpartitioning(cp_catalog_page_id#64, 5), ENSURE_REQUIREMENTS, [plan_id=4] (42) HashAggregate [codegen id : 12] -Input [5]: [cp_catalog_page_id#66, sum#72, sum#73, sum#74, sum#75] -Keys [1]: [cp_catalog_page_id#66] -Functions [4]: [sum(UnscaledValue(sales_price#50)), sum(UnscaledValue(return_amt#52)), sum(UnscaledValue(profit#51)), sum(UnscaledValue(net_loss#53))] -Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#50))#77, sum(UnscaledValue(return_amt#52))#78, sum(UnscaledValue(profit#51))#79, sum(UnscaledValue(net_loss#53))#80] -Results [5]: [catalog channel AS channel#81, concat(catalog_page, cp_catalog_page_id#66) AS id#82, MakeDecimal(sum(UnscaledValue(sales_price#50))#77,17,2) AS sales#83, MakeDecimal(sum(UnscaledValue(return_amt#52))#78,17,2) AS returns#84, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#51))#79,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#53))#80,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#85] +Input [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72] +Keys [1]: [cp_catalog_page_id#64] +Functions [4]: [sum(UnscaledValue(sales_price#48)), sum(UnscaledValue(return_amt#50)), sum(UnscaledValue(profit#49)), sum(UnscaledValue(net_loss#51))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#48))#73, sum(UnscaledValue(return_amt#50))#74, sum(UnscaledValue(profit#49))#75, sum(UnscaledValue(net_loss#51))#76] +Results [5]: [catalog channel AS channel#77, concat(catalog_page, cp_catalog_page_id#64) AS id#78, MakeDecimal(sum(UnscaledValue(sales_price#48))#73,17,2) AS sales#79, MakeDecimal(sum(UnscaledValue(return_amt#50))#74,17,2) AS returns#80, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#49))#75,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#51))#76,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#81] (43) Scan parquet default.web_sales -Output [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89] +Output [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#89), dynamicpruningexpression(ws_sold_date_sk#89 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#85), dynamicpruningexpression(ws_sold_date_sk#85 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_web_site_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 13] -Input [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] (45) Filter [codegen id : 13] -Input [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89] -Condition : isnotnull(ws_web_site_sk#86) +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] +Condition : isnotnull(ws_web_site_sk#82) (46) Project [codegen id : 13] -Output [6]: [ws_web_site_sk#86 AS wsr_web_site_sk#90, ws_sold_date_sk#89 AS date_sk#91, ws_ext_sales_price#87 AS sales_price#92, ws_net_profit#88 AS profit#93, 0.00 AS return_amt#94, 0.00 AS net_loss#95] -Input [4]: [ws_web_site_sk#86, ws_ext_sales_price#87, ws_net_profit#88, ws_sold_date_sk#89] +Output [6]: [ws_web_site_sk#82 AS wsr_web_site_sk#86, ws_sold_date_sk#85 AS date_sk#87, ws_ext_sales_price#83 AS sales_price#88, ws_net_profit#84 AS profit#89, 0.00 AS return_amt#90, 0.00 AS net_loss#91] +Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85] (47) Scan parquet default.web_returns -Output [5]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100] +Output [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(wr_returned_date_sk#100), dynamicpruningexpression(wr_returned_date_sk#100 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(wr_returned_date_sk#96), dynamicpruningexpression(wr_returned_date_sk#96 IN dynamicpruning#5)] ReadSchema: struct (48) ColumnarToRow [codegen id : 14] -Input [5]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100] +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] (49) BroadcastExchange -Input [5]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295))),false), [id=#101] +Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295))),false), [plan_id=5] (50) Scan parquet default.web_sales -Output [4]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104, ws_sold_date_sk#105] +Output [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] Batched: true Location [not included in comparison]/{warehouse_dir}/web_sales] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_order_number), IsNotNull(ws_web_site_sk)] ReadSchema: struct (51) ColumnarToRow -Input [4]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104, ws_sold_date_sk#105] +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] (52) Filter -Input [4]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104, ws_sold_date_sk#105] -Condition : ((isnotnull(ws_item_sk#102) AND isnotnull(ws_order_number#104)) AND isnotnull(ws_web_site_sk#103)) +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] +Condition : ((isnotnull(ws_item_sk#97) AND isnotnull(ws_order_number#99)) AND isnotnull(ws_web_site_sk#98)) (53) Project -Output [3]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104] -Input [4]: [ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104, ws_sold_date_sk#105] +Output [3]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] +Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100] (54) BroadcastHashJoin [codegen id : 15] -Left keys [2]: [wr_item_sk#96, wr_order_number#97] -Right keys [2]: [ws_item_sk#102, ws_order_number#104] +Left keys [2]: [wr_item_sk#92, wr_order_number#93] +Right keys [2]: [ws_item_sk#97, ws_order_number#99] Join condition: None (55) Project [codegen id : 15] -Output [6]: [ws_web_site_sk#103 AS wsr_web_site_sk#106, wr_returned_date_sk#100 AS date_sk#107, 0.00 AS sales_price#108, 0.00 AS profit#109, wr_return_amt#98 AS return_amt#110, wr_net_loss#99 AS net_loss#111] -Input [8]: [wr_item_sk#96, wr_order_number#97, wr_return_amt#98, wr_net_loss#99, wr_returned_date_sk#100, ws_item_sk#102, ws_web_site_sk#103, ws_order_number#104] +Output [6]: [ws_web_site_sk#98 AS wsr_web_site_sk#101, wr_returned_date_sk#96 AS date_sk#102, 0.00 AS sales_price#103, 0.00 AS profit#104, wr_return_amt#94 AS return_amt#105, wr_net_loss#95 AS net_loss#106] +Input [8]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96, ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99] (56) Union (57) ReusedExchange [Reuses operator id: 92] -Output [1]: [d_date_sk#112] +Output [1]: [d_date_sk#107] (58) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [date_sk#91] -Right keys [1]: [d_date_sk#112] +Left keys [1]: [date_sk#87] +Right keys [1]: [d_date_sk#107] Join condition: None (59) Project [codegen id : 18] -Output [5]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95] -Input [7]: [wsr_web_site_sk#90, date_sk#91, sales_price#92, profit#93, return_amt#94, net_loss#95, d_date_sk#112] +Output [5]: [wsr_web_site_sk#86, sales_price#88, profit#89, return_amt#90, net_loss#91] +Input [7]: [wsr_web_site_sk#86, date_sk#87, sales_price#88, profit#89, return_amt#90, net_loss#91, d_date_sk#107] (60) Scan parquet default.web_site -Output [2]: [web_site_sk#113, web_site_id#114] +Output [2]: [web_site_sk#108, web_site_id#109] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_site_sk)] ReadSchema: struct (61) ColumnarToRow [codegen id : 17] -Input [2]: [web_site_sk#113, web_site_id#114] +Input [2]: [web_site_sk#108, web_site_id#109] (62) Filter [codegen id : 17] -Input [2]: [web_site_sk#113, web_site_id#114] -Condition : isnotnull(web_site_sk#113) +Input [2]: [web_site_sk#108, web_site_id#109] +Condition : isnotnull(web_site_sk#108) (63) BroadcastExchange -Input [2]: [web_site_sk#113, web_site_id#114] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#115] +Input [2]: [web_site_sk#108, web_site_id#109] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] (64) BroadcastHashJoin [codegen id : 18] -Left keys [1]: [wsr_web_site_sk#90] -Right keys [1]: [web_site_sk#113] +Left keys [1]: [wsr_web_site_sk#86] +Right keys [1]: [web_site_sk#108] Join condition: None (65) Project [codegen id : 18] -Output [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#114] -Input [7]: [wsr_web_site_sk#90, sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_sk#113, web_site_id#114] +Output [5]: [sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#109] +Input [7]: [wsr_web_site_sk#86, sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_sk#108, web_site_id#109] (66) HashAggregate [codegen id : 18] -Input [5]: [sales_price#92, profit#93, return_amt#94, net_loss#95, web_site_id#114] -Keys [1]: [web_site_id#114] -Functions [4]: [partial_sum(UnscaledValue(sales_price#92)), partial_sum(UnscaledValue(return_amt#94)), partial_sum(UnscaledValue(profit#93)), partial_sum(UnscaledValue(net_loss#95))] -Aggregate Attributes [4]: [sum#116, sum#117, sum#118, sum#119] -Results [5]: [web_site_id#114, sum#120, sum#121, sum#122, sum#123] +Input [5]: [sales_price#88, profit#89, return_amt#90, net_loss#91, web_site_id#109] +Keys [1]: [web_site_id#109] +Functions [4]: [partial_sum(UnscaledValue(sales_price#88)), partial_sum(UnscaledValue(return_amt#90)), partial_sum(UnscaledValue(profit#89)), partial_sum(UnscaledValue(net_loss#91))] +Aggregate Attributes [4]: [sum#110, sum#111, sum#112, sum#113] +Results [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] (67) Exchange -Input [5]: [web_site_id#114, sum#120, sum#121, sum#122, sum#123] -Arguments: hashpartitioning(web_site_id#114, 5), ENSURE_REQUIREMENTS, [id=#124] +Input [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] +Arguments: hashpartitioning(web_site_id#109, 5), ENSURE_REQUIREMENTS, [plan_id=7] (68) HashAggregate [codegen id : 19] -Input [5]: [web_site_id#114, sum#120, sum#121, sum#122, sum#123] -Keys [1]: [web_site_id#114] -Functions [4]: [sum(UnscaledValue(sales_price#92)), sum(UnscaledValue(return_amt#94)), sum(UnscaledValue(profit#93)), sum(UnscaledValue(net_loss#95))] -Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#92))#125, sum(UnscaledValue(return_amt#94))#126, sum(UnscaledValue(profit#93))#127, sum(UnscaledValue(net_loss#95))#128] -Results [5]: [web channel AS channel#129, concat(web_site, web_site_id#114) AS id#130, MakeDecimal(sum(UnscaledValue(sales_price#92))#125,17,2) AS sales#131, MakeDecimal(sum(UnscaledValue(return_amt#94))#126,17,2) AS returns#132, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#93))#127,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#95))#128,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#133] +Input [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117] +Keys [1]: [web_site_id#109] +Functions [4]: [sum(UnscaledValue(sales_price#88)), sum(UnscaledValue(return_amt#90)), sum(UnscaledValue(profit#89)), sum(UnscaledValue(net_loss#91))] +Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#88))#118, sum(UnscaledValue(return_amt#90))#119, sum(UnscaledValue(profit#89))#120, sum(UnscaledValue(net_loss#91))#121] +Results [5]: [web channel AS channel#122, concat(web_site, web_site_id#109) AS id#123, MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#124, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#125, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#126] (69) Union (70) HashAggregate [codegen id : 20] -Input [5]: [channel#39, id#40, sales#41, returns#42, profit#43] -Keys [2]: [channel#39, id#40] -Functions [3]: [partial_sum(sales#41), partial_sum(returns#42), partial_sum(profit#43)] -Aggregate Attributes [6]: [sum#134, isEmpty#135, sum#136, isEmpty#137, sum#138, isEmpty#139] -Results [8]: [channel#39, id#40, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] +Input [5]: [channel#37, id#38, sales#39, returns#40, profit#41] +Keys [2]: [channel#37, id#38] +Functions [3]: [partial_sum(sales#39), partial_sum(returns#40), partial_sum(profit#41)] +Aggregate Attributes [6]: [sum#127, isEmpty#128, sum#129, isEmpty#130, sum#131, isEmpty#132] +Results [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] (71) Exchange -Input [8]: [channel#39, id#40, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] -Arguments: hashpartitioning(channel#39, id#40, 5), ENSURE_REQUIREMENTS, [id=#146] +Input [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Arguments: hashpartitioning(channel#37, id#38, 5), ENSURE_REQUIREMENTS, [plan_id=8] (72) HashAggregate [codegen id : 21] -Input [8]: [channel#39, id#40, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] -Keys [2]: [channel#39, id#40] -Functions [3]: [sum(sales#41), sum(returns#42), sum(profit#43)] -Aggregate Attributes [3]: [sum(sales#41)#147, sum(returns#42)#148, sum(profit#43)#149] -Results [5]: [channel#39, id#40, cast(sum(sales#41)#147 as decimal(37,2)) AS sales#150, cast(sum(returns#42)#148 as decimal(37,2)) AS returns#151, cast(sum(profit#43)#149 as decimal(38,2)) AS profit#152] +Input [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Keys [2]: [channel#37, id#38] +Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] +Aggregate Attributes [3]: [sum(sales#39)#139, sum(returns#40)#140, sum(profit#41)#141] +Results [5]: [channel#37, id#38, cast(sum(sales#39)#139 as decimal(37,2)) AS sales#142, cast(sum(returns#40)#140 as decimal(37,2)) AS returns#143, cast(sum(profit#41)#141 as decimal(38,2)) AS profit#144] (73) ReusedExchange [Reuses operator id: 71] -Output [8]: [channel#39, id#40, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] +Output [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] (74) HashAggregate [codegen id : 42] -Input [8]: [channel#39, id#40, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] -Keys [2]: [channel#39, id#40] -Functions [3]: [sum(sales#41), sum(returns#42), sum(profit#43)] -Aggregate Attributes [3]: [sum(sales#41)#147, sum(returns#42)#148, sum(profit#43)#149] -Results [4]: [channel#39, sum(sales#41)#147 AS sales#153, sum(returns#42)#148 AS returns#154, sum(profit#43)#149 AS profit#155] +Input [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Keys [2]: [channel#37, id#38] +Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] +Aggregate Attributes [3]: [sum(sales#39)#139, sum(returns#40)#140, sum(profit#41)#141] +Results [4]: [channel#37, sum(sales#39)#139 AS sales#145, sum(returns#40)#140 AS returns#146, sum(profit#41)#141 AS profit#147] (75) HashAggregate [codegen id : 42] -Input [4]: [channel#39, sales#153, returns#154, profit#155] -Keys [1]: [channel#39] -Functions [3]: [partial_sum(sales#153), partial_sum(returns#154), partial_sum(profit#155)] -Aggregate Attributes [6]: [sum#156, isEmpty#157, sum#158, isEmpty#159, sum#160, isEmpty#161] -Results [7]: [channel#39, sum#162, isEmpty#163, sum#164, isEmpty#165, sum#166, isEmpty#167] +Input [4]: [channel#37, sales#145, returns#146, profit#147] +Keys [1]: [channel#37] +Functions [3]: [partial_sum(sales#145), partial_sum(returns#146), partial_sum(profit#147)] +Aggregate Attributes [6]: [sum#148, isEmpty#149, sum#150, isEmpty#151, sum#152, isEmpty#153] +Results [7]: [channel#37, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] (76) Exchange -Input [7]: [channel#39, sum#162, isEmpty#163, sum#164, isEmpty#165, sum#166, isEmpty#167] -Arguments: hashpartitioning(channel#39, 5), ENSURE_REQUIREMENTS, [id=#168] +Input [7]: [channel#37, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] +Arguments: hashpartitioning(channel#37, 5), ENSURE_REQUIREMENTS, [plan_id=9] (77) HashAggregate [codegen id : 43] -Input [7]: [channel#39, sum#162, isEmpty#163, sum#164, isEmpty#165, sum#166, isEmpty#167] -Keys [1]: [channel#39] -Functions [3]: [sum(sales#153), sum(returns#154), sum(profit#155)] -Aggregate Attributes [3]: [sum(sales#153)#169, sum(returns#154)#170, sum(profit#155)#171] -Results [5]: [channel#39, null AS id#172, sum(sales#153)#169 AS sum(sales)#173, sum(returns#154)#170 AS sum(returns)#174, sum(profit#155)#171 AS sum(profit)#175] +Input [7]: [channel#37, sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159] +Keys [1]: [channel#37] +Functions [3]: [sum(sales#145), sum(returns#146), sum(profit#147)] +Aggregate Attributes [3]: [sum(sales#145)#160, sum(returns#146)#161, sum(profit#147)#162] +Results [5]: [channel#37, null AS id#163, sum(sales#145)#160 AS sum(sales)#164, sum(returns#146)#161 AS sum(returns)#165, sum(profit#147)#162 AS sum(profit)#166] (78) ReusedExchange [Reuses operator id: 71] -Output [8]: [channel#39, id#40, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] +Output [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] (79) HashAggregate [codegen id : 64] -Input [8]: [channel#39, id#40, sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] -Keys [2]: [channel#39, id#40] -Functions [3]: [sum(sales#41), sum(returns#42), sum(profit#43)] -Aggregate Attributes [3]: [sum(sales#41)#147, sum(returns#42)#148, sum(profit#43)#149] -Results [3]: [sum(sales#41)#147 AS sales#153, sum(returns#42)#148 AS returns#154, sum(profit#43)#149 AS profit#155] +Input [8]: [channel#37, id#38, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Keys [2]: [channel#37, id#38] +Functions [3]: [sum(sales#39), sum(returns#40), sum(profit#41)] +Aggregate Attributes [3]: [sum(sales#39)#139, sum(returns#40)#140, sum(profit#41)#141] +Results [3]: [sum(sales#39)#139 AS sales#145, sum(returns#40)#140 AS returns#146, sum(profit#41)#141 AS profit#147] (80) HashAggregate [codegen id : 64] -Input [3]: [sales#153, returns#154, profit#155] +Input [3]: [sales#145, returns#146, profit#147] Keys: [] -Functions [3]: [partial_sum(sales#153), partial_sum(returns#154), partial_sum(profit#155)] -Aggregate Attributes [6]: [sum#176, isEmpty#177, sum#178, isEmpty#179, sum#180, isEmpty#181] -Results [6]: [sum#182, isEmpty#183, sum#184, isEmpty#185, sum#186, isEmpty#187] +Functions [3]: [partial_sum(sales#145), partial_sum(returns#146), partial_sum(profit#147)] +Aggregate Attributes [6]: [sum#167, isEmpty#168, sum#169, isEmpty#170, sum#171, isEmpty#172] +Results [6]: [sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178] (81) Exchange -Input [6]: [sum#182, isEmpty#183, sum#184, isEmpty#185, sum#186, isEmpty#187] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#188] +Input [6]: [sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10] (82) HashAggregate [codegen id : 65] -Input [6]: [sum#182, isEmpty#183, sum#184, isEmpty#185, sum#186, isEmpty#187] +Input [6]: [sum#173, isEmpty#174, sum#175, isEmpty#176, sum#177, isEmpty#178] Keys: [] -Functions [3]: [sum(sales#153), sum(returns#154), sum(profit#155)] -Aggregate Attributes [3]: [sum(sales#153)#189, sum(returns#154)#190, sum(profit#155)#191] -Results [5]: [null AS channel#192, null AS id#193, sum(sales#153)#189 AS sum(sales)#194, sum(returns#154)#190 AS sum(returns)#195, sum(profit#155)#191 AS sum(profit)#196] +Functions [3]: [sum(sales#145), sum(returns#146), sum(profit#147)] +Aggregate Attributes [3]: [sum(sales#145)#179, sum(returns#146)#180, sum(profit#147)#181] +Results [5]: [null AS channel#182, null AS id#183, sum(sales#145)#179 AS sum(sales)#184, sum(returns#146)#180 AS sum(returns)#185, sum(profit#147)#181 AS sum(profit)#186] (83) Union (84) HashAggregate [codegen id : 66] -Input [5]: [channel#39, id#40, sales#150, returns#151, profit#152] -Keys [5]: [channel#39, id#40, sales#150, returns#151, profit#152] +Input [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Keys [5]: [channel#37, id#38, sales#142, returns#143, profit#144] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#39, id#40, sales#150, returns#151, profit#152] +Results [5]: [channel#37, id#38, sales#142, returns#143, profit#144] (85) Exchange -Input [5]: [channel#39, id#40, sales#150, returns#151, profit#152] -Arguments: hashpartitioning(channel#39, id#40, sales#150, returns#151, profit#152, 5), ENSURE_REQUIREMENTS, [id=#197] +Input [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Arguments: hashpartitioning(channel#37, id#38, sales#142, returns#143, profit#144, 5), ENSURE_REQUIREMENTS, [plan_id=11] (86) HashAggregate [codegen id : 67] -Input [5]: [channel#39, id#40, sales#150, returns#151, profit#152] -Keys [5]: [channel#39, id#40, sales#150, returns#151, profit#152] +Input [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Keys [5]: [channel#37, id#38, sales#142, returns#143, profit#144] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#39, id#40, sales#150, returns#151, profit#152] +Results [5]: [channel#37, id#38, sales#142, returns#143, profit#144] (87) TakeOrderedAndProject -Input [5]: [channel#39, id#40, sales#150, returns#151, profit#152] -Arguments: 100, [channel#39 ASC NULLS FIRST, id#40 ASC NULLS FIRST], [channel#39, id#40, sales#150, returns#151, profit#152] +Input [5]: [channel#37, id#38, sales#142, returns#143, profit#144] +Arguments: 100, [channel#37 ASC NULLS FIRST, id#38 ASC NULLS FIRST], [channel#37, id#38, sales#142, returns#143, profit#144] ===== Subqueries ===== @@ -512,35 +512,35 @@ BroadcastExchange (92) (88) Scan parquet default.date_dim -Output [2]: [d_date_sk#22, d_date#198] +Output [2]: [d_date_sk#22, d_date#187] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-08-18), IsNotNull(d_date_sk)] ReadSchema: struct (89) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#22, d_date#198] +Input [2]: [d_date_sk#22, d_date#187] (90) Filter [codegen id : 1] -Input [2]: [d_date_sk#22, d_date#198] -Condition : (((isnotnull(d_date#198) AND (d_date#198 >= 1998-08-04)) AND (d_date#198 <= 1998-08-18)) AND isnotnull(d_date_sk#22)) +Input [2]: [d_date_sk#22, d_date#187] +Condition : (((isnotnull(d_date#187) AND (d_date#187 >= 1998-08-04)) AND (d_date#187 <= 1998-08-18)) AND isnotnull(d_date_sk#22)) (91) Project [codegen id : 1] Output [1]: [d_date_sk#22] -Input [2]: [d_date_sk#22, d_date#198] +Input [2]: [d_date_sk#22, d_date#187] (92) BroadcastExchange Input [1]: [d_date_sk#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#199] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] Subquery:2 Hosting operator id = 5 Hosting Expression = sr_returned_date_sk#15 IN dynamicpruning#5 -Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#47 IN dynamicpruning#5 +Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#45 IN dynamicpruning#5 -Subquery:4 Hosting operator id = 26 Hosting Expression = cr_returned_date_sk#57 IN dynamicpruning#5 +Subquery:4 Hosting operator id = 26 Hosting Expression = cr_returned_date_sk#55 IN dynamicpruning#5 -Subquery:5 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#89 IN dynamicpruning#5 +Subquery:5 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#85 IN dynamicpruning#5 -Subquery:6 Hosting operator id = 47 Hosting Expression = wr_returned_date_sk#100 IN dynamicpruning#5 +Subquery:6 Hosting operator id = 47 Hosting Expression = wr_returned_date_sk#96 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt index 1992b08c26b23..2e353046dae8f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt @@ -98,18 +98,18 @@ Results [3]: [i_category#9, sum#12, count#13] (11) Exchange Input [3]: [i_category#9, sum#12, count#13] -Arguments: hashpartitioning(i_category#9, 5), ENSURE_REQUIREMENTS, [id=#14] +Arguments: hashpartitioning(i_category#9, 5), ENSURE_REQUIREMENTS, [plan_id=1] (12) HashAggregate [codegen id : 2] Input [3]: [i_category#9, sum#12, count#13] Keys [1]: [i_category#9] Functions [1]: [avg(UnscaledValue(i_current_price#8))] -Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#8))#15] -Results [2]: [cast((avg(UnscaledValue(i_current_price#8))#15 / 100.0) as decimal(11,6)) AS avg(i_current_price)#16, i_category#9] +Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#8))#14] +Results [2]: [cast((avg(UnscaledValue(i_current_price#8))#14 / 100.0) as decimal(11,6)) AS avg(i_current_price)#15, i_category#9] (13) BroadcastExchange -Input [2]: [avg(i_current_price)#16, i_category#9] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#17] +Input [2]: [avg(i_current_price)#15, i_category#9] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 3] Left keys [1]: [i_category#7] @@ -117,16 +117,16 @@ Right keys [1]: [i_category#9] Join condition: None (15) Filter [codegen id : 3] -Input [5]: [i_item_sk#5, i_current_price#6, i_category#7, avg(i_current_price)#16, i_category#9] -Condition : (cast(i_current_price#6 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#16)), DecimalType(14,7))) +Input [5]: [i_item_sk#5, i_current_price#6, i_category#7, avg(i_current_price)#15, i_category#9] +Condition : (cast(i_current_price#6 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#15)), DecimalType(14,7))) (16) Project [codegen id : 3] Output [1]: [i_item_sk#5] -Input [5]: [i_item_sk#5, i_current_price#6, i_category#7, avg(i_current_price)#16, i_category#9] +Input [5]: [i_item_sk#5, i_current_price#6, i_category#7, avg(i_current_price)#15, i_category#9] (17) BroadcastExchange Input [1]: [i_item_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (18) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_item_sk#1] @@ -138,120 +138,120 @@ Output [2]: [ss_customer_sk#2, ss_sold_date_sk#3] Input [4]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3, i_item_sk#5] (20) ReusedExchange [Reuses operator id: 50] -Output [1]: [d_date_sk#19] +Output [1]: [d_date_sk#16] (21) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ss_sold_date_sk#3] -Right keys [1]: [d_date_sk#19] +Right keys [1]: [d_date_sk#16] Join condition: None (22) Project [codegen id : 5] Output [1]: [ss_customer_sk#2] -Input [3]: [ss_customer_sk#2, ss_sold_date_sk#3, d_date_sk#19] +Input [3]: [ss_customer_sk#2, ss_sold_date_sk#3, d_date_sk#16] (23) Exchange Input [1]: [ss_customer_sk#2] -Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#20] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) Sort [codegen id : 6] Input [1]: [ss_customer_sk#2] Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 (25) Scan parquet default.customer_address -Output [2]: [ca_address_sk#21, ca_state#22] +Output [2]: [ca_address_sk#17, ca_state#18] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 7] -Input [2]: [ca_address_sk#21, ca_state#22] +Input [2]: [ca_address_sk#17, ca_state#18] (27) Filter [codegen id : 7] -Input [2]: [ca_address_sk#21, ca_state#22] -Condition : isnotnull(ca_address_sk#21) +Input [2]: [ca_address_sk#17, ca_state#18] +Condition : isnotnull(ca_address_sk#17) (28) Exchange -Input [2]: [ca_address_sk#21, ca_state#22] -Arguments: hashpartitioning(ca_address_sk#21, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [2]: [ca_address_sk#17, ca_state#18] +Arguments: hashpartitioning(ca_address_sk#17, 5), ENSURE_REQUIREMENTS, [plan_id=5] (29) Sort [codegen id : 8] -Input [2]: [ca_address_sk#21, ca_state#22] -Arguments: [ca_address_sk#21 ASC NULLS FIRST], false, 0 +Input [2]: [ca_address_sk#17, ca_state#18] +Arguments: [ca_address_sk#17 ASC NULLS FIRST], false, 0 (30) Scan parquet default.customer -Output [2]: [c_customer_sk#24, c_current_addr_sk#25] +Output [2]: [c_customer_sk#19, c_current_addr_sk#20] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 9] -Input [2]: [c_customer_sk#24, c_current_addr_sk#25] +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] (32) Filter [codegen id : 9] -Input [2]: [c_customer_sk#24, c_current_addr_sk#25] -Condition : (isnotnull(c_current_addr_sk#25) AND isnotnull(c_customer_sk#24)) +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Condition : (isnotnull(c_current_addr_sk#20) AND isnotnull(c_customer_sk#19)) (33) Exchange -Input [2]: [c_customer_sk#24, c_current_addr_sk#25] -Arguments: hashpartitioning(c_current_addr_sk#25, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Arguments: hashpartitioning(c_current_addr_sk#20, 5), ENSURE_REQUIREMENTS, [plan_id=6] (34) Sort [codegen id : 10] -Input [2]: [c_customer_sk#24, c_current_addr_sk#25] -Arguments: [c_current_addr_sk#25 ASC NULLS FIRST], false, 0 +Input [2]: [c_customer_sk#19, c_current_addr_sk#20] +Arguments: [c_current_addr_sk#20 ASC NULLS FIRST], false, 0 (35) SortMergeJoin [codegen id : 11] -Left keys [1]: [ca_address_sk#21] -Right keys [1]: [c_current_addr_sk#25] +Left keys [1]: [ca_address_sk#17] +Right keys [1]: [c_current_addr_sk#20] Join condition: None (36) Project [codegen id : 11] -Output [2]: [ca_state#22, c_customer_sk#24] -Input [4]: [ca_address_sk#21, ca_state#22, c_customer_sk#24, c_current_addr_sk#25] +Output [2]: [ca_state#18, c_customer_sk#19] +Input [4]: [ca_address_sk#17, ca_state#18, c_customer_sk#19, c_current_addr_sk#20] (37) Exchange -Input [2]: [ca_state#22, c_customer_sk#24] -Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [2]: [ca_state#18, c_customer_sk#19] +Arguments: hashpartitioning(c_customer_sk#19, 5), ENSURE_REQUIREMENTS, [plan_id=7] (38) Sort [codegen id : 12] -Input [2]: [ca_state#22, c_customer_sk#24] -Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0 +Input [2]: [ca_state#18, c_customer_sk#19] +Arguments: [c_customer_sk#19 ASC NULLS FIRST], false, 0 (39) SortMergeJoin [codegen id : 13] Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#24] +Right keys [1]: [c_customer_sk#19] Join condition: None (40) Project [codegen id : 13] -Output [1]: [ca_state#22] -Input [3]: [ss_customer_sk#2, ca_state#22, c_customer_sk#24] +Output [1]: [ca_state#18] +Input [3]: [ss_customer_sk#2, ca_state#18, c_customer_sk#19] (41) HashAggregate [codegen id : 13] -Input [1]: [ca_state#22] -Keys [1]: [ca_state#22] +Input [1]: [ca_state#18] +Keys [1]: [ca_state#18] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#28] -Results [2]: [ca_state#22, count#29] +Aggregate Attributes [1]: [count#21] +Results [2]: [ca_state#18, count#22] (42) Exchange -Input [2]: [ca_state#22, count#29] -Arguments: hashpartitioning(ca_state#22, 5), ENSURE_REQUIREMENTS, [id=#30] +Input [2]: [ca_state#18, count#22] +Arguments: hashpartitioning(ca_state#18, 5), ENSURE_REQUIREMENTS, [plan_id=8] (43) HashAggregate [codegen id : 14] -Input [2]: [ca_state#22, count#29] -Keys [1]: [ca_state#22] +Input [2]: [ca_state#18, count#22] +Keys [1]: [ca_state#18] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#31] -Results [3]: [ca_state#22 AS state#32, count(1)#31 AS cnt#33, ca_state#22] +Aggregate Attributes [1]: [count(1)#23] +Results [3]: [ca_state#18 AS state#24, count(1)#23 AS cnt#25, ca_state#18] (44) Filter [codegen id : 14] -Input [3]: [state#32, cnt#33, ca_state#22] -Condition : (cnt#33 >= 10) +Input [3]: [state#24, cnt#25, ca_state#18] +Condition : (cnt#25 >= 10) (45) TakeOrderedAndProject -Input [3]: [state#32, cnt#33, ca_state#22] -Arguments: 100, [cnt#33 ASC NULLS FIRST, ca_state#22 ASC NULLS FIRST], [state#32, cnt#33] +Input [3]: [state#24, cnt#25, ca_state#18] +Arguments: 100, [cnt#25 ASC NULLS FIRST, ca_state#18 ASC NULLS FIRST], [state#24, cnt#25] ===== Subqueries ===== @@ -264,28 +264,28 @@ BroadcastExchange (50) (46) Scan parquet default.date_dim -Output [2]: [d_date_sk#19, d_month_seq#34] +Output [2]: [d_date_sk#16, d_month_seq#26] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] ReadSchema: struct (47) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#19, d_month_seq#34] +Input [2]: [d_date_sk#16, d_month_seq#26] (48) Filter [codegen id : 1] -Input [2]: [d_date_sk#19, d_month_seq#34] -Condition : ((isnotnull(d_month_seq#34) AND (d_month_seq#34 = Subquery scalar-subquery#35, [id=#36])) AND isnotnull(d_date_sk#19)) +Input [2]: [d_date_sk#16, d_month_seq#26] +Condition : ((isnotnull(d_month_seq#26) AND (d_month_seq#26 = Subquery scalar-subquery#27, [id=#28])) AND isnotnull(d_date_sk#16)) (49) Project [codegen id : 1] -Output [1]: [d_date_sk#19] -Input [2]: [d_date_sk#19, d_month_seq#34] +Output [1]: [d_date_sk#16] +Input [2]: [d_date_sk#16, d_month_seq#26] (50) BroadcastExchange -Input [1]: [d_date_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#37] +Input [1]: [d_date_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9] -Subquery:2 Hosting operator id = 48 Hosting Expression = Subquery scalar-subquery#35, [id=#36] +Subquery:2 Hosting operator id = 48 Hosting Expression = Subquery scalar-subquery#27, [id=#28] * HashAggregate (57) +- Exchange (56) +- * HashAggregate (55) @@ -296,39 +296,39 @@ Subquery:2 Hosting operator id = 48 Hosting Expression = Subquery scalar-subquer (51) Scan parquet default.date_dim -Output [3]: [d_month_seq#38, d_year#39, d_moy#40] +Output [3]: [d_month_seq#29, d_year#30, d_moy#31] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)] ReadSchema: struct (52) ColumnarToRow [codegen id : 1] -Input [3]: [d_month_seq#38, d_year#39, d_moy#40] +Input [3]: [d_month_seq#29, d_year#30, d_moy#31] (53) Filter [codegen id : 1] -Input [3]: [d_month_seq#38, d_year#39, d_moy#40] -Condition : (((isnotnull(d_year#39) AND isnotnull(d_moy#40)) AND (d_year#39 = 2000)) AND (d_moy#40 = 1)) +Input [3]: [d_month_seq#29, d_year#30, d_moy#31] +Condition : (((isnotnull(d_year#30) AND isnotnull(d_moy#31)) AND (d_year#30 = 2000)) AND (d_moy#31 = 1)) (54) Project [codegen id : 1] -Output [1]: [d_month_seq#38] -Input [3]: [d_month_seq#38, d_year#39, d_moy#40] +Output [1]: [d_month_seq#29] +Input [3]: [d_month_seq#29, d_year#30, d_moy#31] (55) HashAggregate [codegen id : 1] -Input [1]: [d_month_seq#38] -Keys [1]: [d_month_seq#38] +Input [1]: [d_month_seq#29] +Keys [1]: [d_month_seq#29] Functions: [] Aggregate Attributes: [] -Results [1]: [d_month_seq#38] +Results [1]: [d_month_seq#29] (56) Exchange -Input [1]: [d_month_seq#38] -Arguments: hashpartitioning(d_month_seq#38, 5), ENSURE_REQUIREMENTS, [id=#41] +Input [1]: [d_month_seq#29] +Arguments: hashpartitioning(d_month_seq#29, 5), ENSURE_REQUIREMENTS, [plan_id=10] (57) HashAggregate [codegen id : 2] -Input [1]: [d_month_seq#38] -Keys [1]: [d_month_seq#38] +Input [1]: [d_month_seq#29] +Keys [1]: [d_month_seq#29] Functions: [] Aggregate Attributes: [] -Results [1]: [d_month_seq#38] +Results [1]: [d_month_seq#29] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/explain.txt index 918c6c375a9ea..c415966f85bad 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/explain.txt @@ -70,7 +70,7 @@ Condition : (isnotnull(c_current_addr_sk#4) AND isnotnull(c_customer_sk#3)) (7) BroadcastExchange Input [2]: [c_customer_sk#3, c_current_addr_sk#4] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#5] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 7] Left keys [1]: [ca_address_sk#1] @@ -82,150 +82,150 @@ Output [2]: [ca_state#2, c_customer_sk#3] Input [4]: [ca_address_sk#1, ca_state#2, c_customer_sk#3, c_current_addr_sk#4] (10) Scan parquet default.store_sales -Output [3]: [ss_item_sk#6, ss_customer_sk#7, ss_sold_date_sk#8] +Output [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#8), dynamicpruningexpression(ss_sold_date_sk#8 IN dynamicpruning#9)] +PartitionFilters: [isnotnull(ss_sold_date_sk#7), dynamicpruningexpression(ss_sold_date_sk#7 IN dynamicpruning#8)] PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [3]: [ss_item_sk#6, ss_customer_sk#7, ss_sold_date_sk#8] +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] (12) Filter [codegen id : 2] -Input [3]: [ss_item_sk#6, ss_customer_sk#7, ss_sold_date_sk#8] -Condition : (isnotnull(ss_customer_sk#7) AND isnotnull(ss_item_sk#6)) +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Condition : (isnotnull(ss_customer_sk#6) AND isnotnull(ss_item_sk#5)) (13) BroadcastExchange -Input [3]: [ss_item_sk#6, ss_customer_sk#7, ss_sold_date_sk#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#10] +Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] +Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 7] Left keys [1]: [c_customer_sk#3] -Right keys [1]: [ss_customer_sk#7] +Right keys [1]: [ss_customer_sk#6] Join condition: None (15) Project [codegen id : 7] -Output [3]: [ca_state#2, ss_item_sk#6, ss_sold_date_sk#8] -Input [5]: [ca_state#2, c_customer_sk#3, ss_item_sk#6, ss_customer_sk#7, ss_sold_date_sk#8] +Output [3]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7] +Input [5]: [ca_state#2, c_customer_sk#3, ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7] (16) ReusedExchange [Reuses operator id: 44] -Output [1]: [d_date_sk#11] +Output [1]: [d_date_sk#9] (17) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [ss_sold_date_sk#8] -Right keys [1]: [d_date_sk#11] +Left keys [1]: [ss_sold_date_sk#7] +Right keys [1]: [d_date_sk#9] Join condition: None (18) Project [codegen id : 7] -Output [2]: [ca_state#2, ss_item_sk#6] -Input [4]: [ca_state#2, ss_item_sk#6, ss_sold_date_sk#8, d_date_sk#11] +Output [2]: [ca_state#2, ss_item_sk#5] +Input [4]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7, d_date_sk#9] (19) Scan parquet default.item -Output [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Output [3]: [i_item_sk#10, i_current_price#11, i_category#12] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)] ReadSchema: struct (20) ColumnarToRow [codegen id : 6] -Input [3]: [i_item_sk#12, i_current_price#13, i_category#14] +Input [3]: [i_item_sk#10, i_current_price#11, i_category#12] (21) Filter [codegen id : 6] -Input [3]: [i_item_sk#12, i_current_price#13, i_category#14] -Condition : (isnotnull(i_current_price#13) AND isnotnull(i_item_sk#12)) +Input [3]: [i_item_sk#10, i_current_price#11, i_category#12] +Condition : (isnotnull(i_current_price#11) AND isnotnull(i_item_sk#10)) (22) Scan parquet default.item -Output [2]: [i_current_price#15, i_category#16] +Output [2]: [i_current_price#13, i_category#14] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_category)] ReadSchema: struct (23) ColumnarToRow [codegen id : 4] -Input [2]: [i_current_price#15, i_category#16] +Input [2]: [i_current_price#13, i_category#14] (24) Filter [codegen id : 4] -Input [2]: [i_current_price#15, i_category#16] -Condition : isnotnull(i_category#16) +Input [2]: [i_current_price#13, i_category#14] +Condition : isnotnull(i_category#14) (25) HashAggregate [codegen id : 4] -Input [2]: [i_current_price#15, i_category#16] -Keys [1]: [i_category#16] -Functions [1]: [partial_avg(UnscaledValue(i_current_price#15))] -Aggregate Attributes [2]: [sum#17, count#18] -Results [3]: [i_category#16, sum#19, count#20] +Input [2]: [i_current_price#13, i_category#14] +Keys [1]: [i_category#14] +Functions [1]: [partial_avg(UnscaledValue(i_current_price#13))] +Aggregate Attributes [2]: [sum#15, count#16] +Results [3]: [i_category#14, sum#17, count#18] (26) Exchange -Input [3]: [i_category#16, sum#19, count#20] -Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [3]: [i_category#14, sum#17, count#18] +Arguments: hashpartitioning(i_category#14, 5), ENSURE_REQUIREMENTS, [plan_id=3] (27) HashAggregate [codegen id : 5] -Input [3]: [i_category#16, sum#19, count#20] -Keys [1]: [i_category#16] -Functions [1]: [avg(UnscaledValue(i_current_price#15))] -Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#15))#22] -Results [2]: [cast((avg(UnscaledValue(i_current_price#15))#22 / 100.0) as decimal(11,6)) AS avg(i_current_price)#23, i_category#16] +Input [3]: [i_category#14, sum#17, count#18] +Keys [1]: [i_category#14] +Functions [1]: [avg(UnscaledValue(i_current_price#13))] +Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#13))#19] +Results [2]: [cast((avg(UnscaledValue(i_current_price#13))#19 / 100.0) as decimal(11,6)) AS avg(i_current_price)#20, i_category#14] (28) BroadcastExchange -Input [2]: [avg(i_current_price)#23, i_category#16] -Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#24] +Input [2]: [avg(i_current_price)#20, i_category#14] +Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=4] (29) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [i_category#14] -Right keys [1]: [i_category#16] +Left keys [1]: [i_category#12] +Right keys [1]: [i_category#14] Join condition: None (30) Filter [codegen id : 6] -Input [5]: [i_item_sk#12, i_current_price#13, i_category#14, avg(i_current_price)#23, i_category#16] -Condition : (cast(i_current_price#13 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#23)), DecimalType(14,7))) +Input [5]: [i_item_sk#10, i_current_price#11, i_category#12, avg(i_current_price)#20, i_category#14] +Condition : (cast(i_current_price#11 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#20)), DecimalType(14,7))) (31) Project [codegen id : 6] -Output [1]: [i_item_sk#12] -Input [5]: [i_item_sk#12, i_current_price#13, i_category#14, avg(i_current_price)#23, i_category#16] +Output [1]: [i_item_sk#10] +Input [5]: [i_item_sk#10, i_current_price#11, i_category#12, avg(i_current_price)#20, i_category#14] (32) BroadcastExchange -Input [1]: [i_item_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [i_item_sk#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (33) BroadcastHashJoin [codegen id : 7] -Left keys [1]: [ss_item_sk#6] -Right keys [1]: [i_item_sk#12] +Left keys [1]: [ss_item_sk#5] +Right keys [1]: [i_item_sk#10] Join condition: None (34) Project [codegen id : 7] Output [1]: [ca_state#2] -Input [3]: [ca_state#2, ss_item_sk#6, i_item_sk#12] +Input [3]: [ca_state#2, ss_item_sk#5, i_item_sk#10] (35) HashAggregate [codegen id : 7] Input [1]: [ca_state#2] Keys [1]: [ca_state#2] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#26] -Results [2]: [ca_state#2, count#27] +Aggregate Attributes [1]: [count#21] +Results [2]: [ca_state#2, count#22] (36) Exchange -Input [2]: [ca_state#2, count#27] -Arguments: hashpartitioning(ca_state#2, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [2]: [ca_state#2, count#22] +Arguments: hashpartitioning(ca_state#2, 5), ENSURE_REQUIREMENTS, [plan_id=6] (37) HashAggregate [codegen id : 8] -Input [2]: [ca_state#2, count#27] +Input [2]: [ca_state#2, count#22] Keys [1]: [ca_state#2] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#29] -Results [3]: [ca_state#2 AS state#30, count(1)#29 AS cnt#31, ca_state#2] +Aggregate Attributes [1]: [count(1)#23] +Results [3]: [ca_state#2 AS state#24, count(1)#23 AS cnt#25, ca_state#2] (38) Filter [codegen id : 8] -Input [3]: [state#30, cnt#31, ca_state#2] -Condition : (cnt#31 >= 10) +Input [3]: [state#24, cnt#25, ca_state#2] +Condition : (cnt#25 >= 10) (39) TakeOrderedAndProject -Input [3]: [state#30, cnt#31, ca_state#2] -Arguments: 100, [cnt#31 ASC NULLS FIRST, ca_state#2 ASC NULLS FIRST], [state#30, cnt#31] +Input [3]: [state#24, cnt#25, ca_state#2] +Arguments: 100, [cnt#25 ASC NULLS FIRST, ca_state#2 ASC NULLS FIRST], [state#24, cnt#25] ===== Subqueries ===== -Subquery:1 Hosting operator id = 10 Hosting Expression = ss_sold_date_sk#8 IN dynamicpruning#9 +Subquery:1 Hosting operator id = 10 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8 BroadcastExchange (44) +- * Project (43) +- * Filter (42) @@ -234,28 +234,28 @@ BroadcastExchange (44) (40) Scan parquet default.date_dim -Output [2]: [d_date_sk#11, d_month_seq#32] +Output [2]: [d_date_sk#9, d_month_seq#26] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)] ReadSchema: struct (41) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#11, d_month_seq#32] +Input [2]: [d_date_sk#9, d_month_seq#26] (42) Filter [codegen id : 1] -Input [2]: [d_date_sk#11, d_month_seq#32] -Condition : ((isnotnull(d_month_seq#32) AND (d_month_seq#32 = Subquery scalar-subquery#33, [id=#34])) AND isnotnull(d_date_sk#11)) +Input [2]: [d_date_sk#9, d_month_seq#26] +Condition : ((isnotnull(d_month_seq#26) AND (d_month_seq#26 = Subquery scalar-subquery#27, [id=#28])) AND isnotnull(d_date_sk#9)) (43) Project [codegen id : 1] -Output [1]: [d_date_sk#11] -Input [2]: [d_date_sk#11, d_month_seq#32] +Output [1]: [d_date_sk#9] +Input [2]: [d_date_sk#9, d_month_seq#26] (44) BroadcastExchange -Input [1]: [d_date_sk#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#35] +Input [1]: [d_date_sk#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] -Subquery:2 Hosting operator id = 42 Hosting Expression = Subquery scalar-subquery#33, [id=#34] +Subquery:2 Hosting operator id = 42 Hosting Expression = Subquery scalar-subquery#27, [id=#28] * HashAggregate (51) +- Exchange (50) +- * HashAggregate (49) @@ -266,39 +266,39 @@ Subquery:2 Hosting operator id = 42 Hosting Expression = Subquery scalar-subquer (45) Scan parquet default.date_dim -Output [3]: [d_month_seq#36, d_year#37, d_moy#38] +Output [3]: [d_month_seq#29, d_year#30, d_moy#31] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)] ReadSchema: struct (46) ColumnarToRow [codegen id : 1] -Input [3]: [d_month_seq#36, d_year#37, d_moy#38] +Input [3]: [d_month_seq#29, d_year#30, d_moy#31] (47) Filter [codegen id : 1] -Input [3]: [d_month_seq#36, d_year#37, d_moy#38] -Condition : (((isnotnull(d_year#37) AND isnotnull(d_moy#38)) AND (d_year#37 = 2000)) AND (d_moy#38 = 1)) +Input [3]: [d_month_seq#29, d_year#30, d_moy#31] +Condition : (((isnotnull(d_year#30) AND isnotnull(d_moy#31)) AND (d_year#30 = 2000)) AND (d_moy#31 = 1)) (48) Project [codegen id : 1] -Output [1]: [d_month_seq#36] -Input [3]: [d_month_seq#36, d_year#37, d_moy#38] +Output [1]: [d_month_seq#29] +Input [3]: [d_month_seq#29, d_year#30, d_moy#31] (49) HashAggregate [codegen id : 1] -Input [1]: [d_month_seq#36] -Keys [1]: [d_month_seq#36] +Input [1]: [d_month_seq#29] +Keys [1]: [d_month_seq#29] Functions: [] Aggregate Attributes: [] -Results [1]: [d_month_seq#36] +Results [1]: [d_month_seq#29] (50) Exchange -Input [1]: [d_month_seq#36] -Arguments: hashpartitioning(d_month_seq#36, 5), ENSURE_REQUIREMENTS, [id=#39] +Input [1]: [d_month_seq#29] +Arguments: hashpartitioning(d_month_seq#29, 5), ENSURE_REQUIREMENTS, [plan_id=8] (51) HashAggregate [codegen id : 2] -Input [1]: [d_month_seq#36] -Keys [1]: [d_month_seq#36] +Input [1]: [d_month_seq#29] +Keys [1]: [d_month_seq#29] Functions: [] Aggregate Attributes: [] -Results [1]: [d_month_seq#36] +Results [1]: [d_month_seq#29] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt index 868f1f26459aa..ff121dd9f7b8f 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt @@ -227,885 +227,885 @@ Condition : (((((((isnotnull(ss_item_sk#1) AND isnotnull(ss_ticket_number#8)) AN (4) Exchange Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] -Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#8, 5), ENSURE_REQUIREMENTS, [id=#14] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#8, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#8 ASC NULLS FIRST], false, 0 (6) Scan parquet default.store_returns -Output [3]: [sr_item_sk#15, sr_ticket_number#16, sr_returned_date_sk#17] +Output [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [3]: [sr_item_sk#15, sr_ticket_number#16, sr_returned_date_sk#17] +Input [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16] (8) Filter [codegen id : 3] -Input [3]: [sr_item_sk#15, sr_ticket_number#16, sr_returned_date_sk#17] -Condition : (isnotnull(sr_item_sk#15) AND isnotnull(sr_ticket_number#16)) +Input [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16] +Condition : (isnotnull(sr_item_sk#14) AND isnotnull(sr_ticket_number#15)) (9) Project [codegen id : 3] -Output [2]: [sr_item_sk#15, sr_ticket_number#16] -Input [3]: [sr_item_sk#15, sr_ticket_number#16, sr_returned_date_sk#17] +Output [2]: [sr_item_sk#14, sr_ticket_number#15] +Input [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16] (10) Exchange -Input [2]: [sr_item_sk#15, sr_ticket_number#16] -Arguments: hashpartitioning(sr_item_sk#15, sr_ticket_number#16, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [2]: [sr_item_sk#14, sr_ticket_number#15] +Arguments: hashpartitioning(sr_item_sk#14, sr_ticket_number#15, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [2]: [sr_item_sk#15, sr_ticket_number#16] -Arguments: [sr_item_sk#15 ASC NULLS FIRST, sr_ticket_number#16 ASC NULLS FIRST], false, 0 +Input [2]: [sr_item_sk#14, sr_ticket_number#15] +Arguments: [sr_item_sk#14 ASC NULLS FIRST, sr_ticket_number#15 ASC NULLS FIRST], false, 0 (12) SortMergeJoin [codegen id : 13] Left keys [2]: [ss_item_sk#1, ss_ticket_number#8] -Right keys [2]: [sr_item_sk#15, sr_ticket_number#16] +Right keys [2]: [sr_item_sk#14, sr_ticket_number#15] Join condition: None (13) Project [codegen id : 13] Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] -Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#15, sr_ticket_number#16] +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#14, sr_ticket_number#15] (14) Scan parquet default.catalog_sales -Output [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_date_sk#22] +Output [4]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cs_sold_date_sk#20] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_sales] PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)] ReadSchema: struct (15) ColumnarToRow [codegen id : 5] -Input [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_date_sk#22] +Input [4]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cs_sold_date_sk#20] (16) Filter [codegen id : 5] -Input [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_date_sk#22] -Condition : (isnotnull(cs_item_sk#19) AND isnotnull(cs_order_number#20)) +Input [4]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cs_sold_date_sk#20] +Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_order_number#18)) (17) Project [codegen id : 5] -Output [3]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21] -Input [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_date_sk#22] +Output [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19] +Input [4]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cs_sold_date_sk#20] (18) Exchange -Input [3]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21] -Arguments: hashpartitioning(cs_item_sk#19, cs_order_number#20, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19] +Arguments: hashpartitioning(cs_item_sk#17, cs_order_number#18, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) Sort [codegen id : 6] -Input [3]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21] -Arguments: [cs_item_sk#19 ASC NULLS FIRST, cs_order_number#20 ASC NULLS FIRST], false, 0 +Input [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19] +Arguments: [cs_item_sk#17 ASC NULLS FIRST, cs_order_number#18 ASC NULLS FIRST], false, 0 (20) Scan parquet default.catalog_returns -Output [6]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28, cr_returned_date_sk#29] +Output [6]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25, cr_returned_date_sk#26] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] ReadSchema: struct (21) ColumnarToRow [codegen id : 7] -Input [6]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28, cr_returned_date_sk#29] +Input [6]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25, cr_returned_date_sk#26] (22) Filter [codegen id : 7] -Input [6]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28, cr_returned_date_sk#29] -Condition : (isnotnull(cr_item_sk#24) AND isnotnull(cr_order_number#25)) +Input [6]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25, cr_returned_date_sk#26] +Condition : (isnotnull(cr_item_sk#21) AND isnotnull(cr_order_number#22)) (23) Project [codegen id : 7] -Output [5]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28] -Input [6]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28, cr_returned_date_sk#29] +Output [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Input [6]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25, cr_returned_date_sk#26] (24) Exchange -Input [5]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28] -Arguments: hashpartitioning(cr_item_sk#24, cr_order_number#25, 5), ENSURE_REQUIREMENTS, [id=#30] +Input [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Arguments: hashpartitioning(cr_item_sk#21, cr_order_number#22, 5), ENSURE_REQUIREMENTS, [plan_id=4] (25) Sort [codegen id : 8] -Input [5]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28] -Arguments: [cr_item_sk#24 ASC NULLS FIRST, cr_order_number#25 ASC NULLS FIRST], false, 0 +Input [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Arguments: [cr_item_sk#21 ASC NULLS FIRST, cr_order_number#22 ASC NULLS FIRST], false, 0 (26) SortMergeJoin [codegen id : 9] -Left keys [2]: [cs_item_sk#19, cs_order_number#20] -Right keys [2]: [cr_item_sk#24, cr_order_number#25] +Left keys [2]: [cs_item_sk#17, cs_order_number#18] +Right keys [2]: [cr_item_sk#21, cr_order_number#22] Join condition: None (27) Project [codegen id : 9] -Output [5]: [cs_item_sk#19, cs_ext_list_price#21, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28] -Input [8]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28] +Output [5]: [cs_item_sk#17, cs_ext_list_price#19, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Input [8]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] (28) HashAggregate [codegen id : 9] -Input [5]: [cs_item_sk#19, cs_ext_list_price#21, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28] -Keys [1]: [cs_item_sk#19] -Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#21)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2)))] -Aggregate Attributes [3]: [sum#31, sum#32, isEmpty#33] -Results [4]: [cs_item_sk#19, sum#34, sum#35, isEmpty#36] +Input [5]: [cs_item_sk#17, cs_ext_list_price#19, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Keys [1]: [cs_item_sk#17] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#19)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2)))] +Aggregate Attributes [3]: [sum#27, sum#28, isEmpty#29] +Results [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32] (29) Exchange -Input [4]: [cs_item_sk#19, sum#34, sum#35, isEmpty#36] -Arguments: hashpartitioning(cs_item_sk#19, 5), ENSURE_REQUIREMENTS, [id=#37] +Input [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32] +Arguments: hashpartitioning(cs_item_sk#17, 5), ENSURE_REQUIREMENTS, [plan_id=5] (30) HashAggregate [codegen id : 10] -Input [4]: [cs_item_sk#19, sum#34, sum#35, isEmpty#36] -Keys [1]: [cs_item_sk#19] -Functions [2]: [sum(UnscaledValue(cs_ext_list_price#21)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2)))] -Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#21))#38, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2)))#39] -Results [3]: [cs_item_sk#19, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#21))#38,17,2) AS sale#40, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2)))#39 AS refund#41] +Input [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32] +Keys [1]: [cs_item_sk#17] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#19)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2)))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#19))#33, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2)))#34] +Results [3]: [cs_item_sk#17, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#19))#33,17,2) AS sale#35, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2)))#34 AS refund#36] (31) Filter [codegen id : 10] -Input [3]: [cs_item_sk#19, sale#40, refund#41] -Condition : (isnotnull(sale#40) AND (cast(sale#40 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(refund#41)), DecimalType(21,2)))) +Input [3]: [cs_item_sk#17, sale#35, refund#36] +Condition : (isnotnull(sale#35) AND (cast(sale#35 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(refund#36)), DecimalType(21,2)))) (32) Project [codegen id : 10] -Output [1]: [cs_item_sk#19] -Input [3]: [cs_item_sk#19, sale#40, refund#41] +Output [1]: [cs_item_sk#17] +Input [3]: [cs_item_sk#17, sale#35, refund#36] (33) BroadcastExchange -Input [1]: [cs_item_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42] +Input [1]: [cs_item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] (34) BroadcastHashJoin [codegen id : 13] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [cs_item_sk#19] +Right keys [1]: [cs_item_sk#17] Join condition: None (35) Project [codegen id : 13] Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#19] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#17] (36) ReusedExchange [Reuses operator id: 213] -Output [2]: [d_date_sk#43, d_year#44] +Output [2]: [d_date_sk#37, d_year#38] (37) BroadcastHashJoin [codegen id : 13] Left keys [1]: [ss_sold_date_sk#12] -Right keys [1]: [d_date_sk#43] +Right keys [1]: [d_date_sk#37] Join condition: None (38) Project [codegen id : 13] -Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44] -Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#43, d_year#44] +Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38] +Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#37, d_year#38] (39) Scan parquet default.store -Output [3]: [s_store_sk#45, s_store_name#46, s_zip#47] +Output [3]: [s_store_sk#39, s_store_name#40, s_zip#41] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_zip)] ReadSchema: struct (40) ColumnarToRow [codegen id : 12] -Input [3]: [s_store_sk#45, s_store_name#46, s_zip#47] +Input [3]: [s_store_sk#39, s_store_name#40, s_zip#41] (41) Filter [codegen id : 12] -Input [3]: [s_store_sk#45, s_store_name#46, s_zip#47] -Condition : ((isnotnull(s_store_sk#45) AND isnotnull(s_store_name#46)) AND isnotnull(s_zip#47)) +Input [3]: [s_store_sk#39, s_store_name#40, s_zip#41] +Condition : ((isnotnull(s_store_sk#39) AND isnotnull(s_store_name#40)) AND isnotnull(s_zip#41)) (42) BroadcastExchange -Input [3]: [s_store_sk#45, s_store_name#46, s_zip#47] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48] +Input [3]: [s_store_sk#39, s_store_name#40, s_zip#41] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] (43) BroadcastHashJoin [codegen id : 13] Left keys [1]: [ss_store_sk#6] -Right keys [1]: [s_store_sk#45] +Right keys [1]: [s_store_sk#39] Join condition: None (44) Project [codegen id : 13] -Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47] -Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_sk#45, s_store_name#46, s_zip#47] +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41] +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_sk#39, s_store_name#40, s_zip#41] (45) Exchange -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47] -Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#49] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41] +Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=8] (46) Sort [codegen id : 14] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41] Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0 (47) Scan parquet default.customer -Output [6]: [c_customer_sk#50, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, c_first_shipto_date_sk#54, c_first_sales_date_sk#55] +Output [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (48) ColumnarToRow [codegen id : 15] -Input [6]: [c_customer_sk#50, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, c_first_shipto_date_sk#54, c_first_sales_date_sk#55] +Input [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47] (49) Filter [codegen id : 15] -Input [6]: [c_customer_sk#50, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, c_first_shipto_date_sk#54, c_first_sales_date_sk#55] -Condition : (((((isnotnull(c_customer_sk#50) AND isnotnull(c_first_sales_date_sk#55)) AND isnotnull(c_first_shipto_date_sk#54)) AND isnotnull(c_current_cdemo_sk#51)) AND isnotnull(c_current_hdemo_sk#52)) AND isnotnull(c_current_addr_sk#53)) +Input [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47] +Condition : (((((isnotnull(c_customer_sk#42) AND isnotnull(c_first_sales_date_sk#47)) AND isnotnull(c_first_shipto_date_sk#46)) AND isnotnull(c_current_cdemo_sk#43)) AND isnotnull(c_current_hdemo_sk#44)) AND isnotnull(c_current_addr_sk#45)) (50) Exchange -Input [6]: [c_customer_sk#50, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, c_first_shipto_date_sk#54, c_first_sales_date_sk#55] -Arguments: hashpartitioning(c_customer_sk#50, 5), ENSURE_REQUIREMENTS, [id=#56] +Input [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47] +Arguments: hashpartitioning(c_customer_sk#42, 5), ENSURE_REQUIREMENTS, [plan_id=9] (51) Sort [codegen id : 16] -Input [6]: [c_customer_sk#50, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, c_first_shipto_date_sk#54, c_first_sales_date_sk#55] -Arguments: [c_customer_sk#50 ASC NULLS FIRST], false, 0 +Input [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47] +Arguments: [c_customer_sk#42 ASC NULLS FIRST], false, 0 (52) SortMergeJoin [codegen id : 19] Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#50] +Right keys [1]: [c_customer_sk#42] Join condition: None (53) Project [codegen id : 19] -Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, c_first_shipto_date_sk#54, c_first_sales_date_sk#55] -Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_customer_sk#50, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, c_first_shipto_date_sk#54, c_first_sales_date_sk#55] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47] +Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47] (54) Scan parquet default.date_dim -Output [2]: [d_date_sk#57, d_year#58] +Output [2]: [d_date_sk#48, d_year#49] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date_sk)] ReadSchema: struct (55) ColumnarToRow [codegen id : 17] -Input [2]: [d_date_sk#57, d_year#58] +Input [2]: [d_date_sk#48, d_year#49] (56) Filter [codegen id : 17] -Input [2]: [d_date_sk#57, d_year#58] -Condition : isnotnull(d_date_sk#57) +Input [2]: [d_date_sk#48, d_year#49] +Condition : isnotnull(d_date_sk#48) (57) BroadcastExchange -Input [2]: [d_date_sk#57, d_year#58] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#59] +Input [2]: [d_date_sk#48, d_year#49] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] (58) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [c_first_sales_date_sk#55] -Right keys [1]: [d_date_sk#57] +Left keys [1]: [c_first_sales_date_sk#47] +Right keys [1]: [d_date_sk#48] Join condition: None (59) Project [codegen id : 19] -Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, c_first_shipto_date_sk#54, d_year#58] -Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, c_first_shipto_date_sk#54, c_first_sales_date_sk#55, d_date_sk#57, d_year#58] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, d_year#49] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47, d_date_sk#48, d_year#49] (60) ReusedExchange [Reuses operator id: 57] -Output [2]: [d_date_sk#60, d_year#61] +Output [2]: [d_date_sk#50, d_year#51] (61) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [c_first_shipto_date_sk#54] -Right keys [1]: [d_date_sk#60] +Left keys [1]: [c_first_shipto_date_sk#46] +Right keys [1]: [d_date_sk#50] Join condition: None (62) Project [codegen id : 19] -Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, d_year#58, d_year#61] -Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, c_first_shipto_date_sk#54, d_year#58, d_date_sk#60, d_year#61] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, d_year#49, d_date_sk#50, d_year#51] (63) Exchange -Input [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, d_year#58, d_year#61] -Arguments: hashpartitioning(ss_cdemo_sk#3, 5), ENSURE_REQUIREMENTS, [id=#62] +Input [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51] +Arguments: hashpartitioning(ss_cdemo_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=11] (64) Sort [codegen id : 20] -Input [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, d_year#58, d_year#61] +Input [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51] Arguments: [ss_cdemo_sk#3 ASC NULLS FIRST], false, 0 (65) Scan parquet default.customer_demographics -Output [2]: [cd_demo_sk#63, cd_marital_status#64] +Output [2]: [cd_demo_sk#52, cd_marital_status#53] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)] ReadSchema: struct (66) ColumnarToRow [codegen id : 21] -Input [2]: [cd_demo_sk#63, cd_marital_status#64] +Input [2]: [cd_demo_sk#52, cd_marital_status#53] (67) Filter [codegen id : 21] -Input [2]: [cd_demo_sk#63, cd_marital_status#64] -Condition : (isnotnull(cd_demo_sk#63) AND isnotnull(cd_marital_status#64)) +Input [2]: [cd_demo_sk#52, cd_marital_status#53] +Condition : (isnotnull(cd_demo_sk#52) AND isnotnull(cd_marital_status#53)) (68) Exchange -Input [2]: [cd_demo_sk#63, cd_marital_status#64] -Arguments: hashpartitioning(cd_demo_sk#63, 5), ENSURE_REQUIREMENTS, [id=#65] +Input [2]: [cd_demo_sk#52, cd_marital_status#53] +Arguments: hashpartitioning(cd_demo_sk#52, 5), ENSURE_REQUIREMENTS, [plan_id=12] (69) Sort [codegen id : 22] -Input [2]: [cd_demo_sk#63, cd_marital_status#64] -Arguments: [cd_demo_sk#63 ASC NULLS FIRST], false, 0 +Input [2]: [cd_demo_sk#52, cd_marital_status#53] +Arguments: [cd_demo_sk#52 ASC NULLS FIRST], false, 0 (70) SortMergeJoin [codegen id : 23] Left keys [1]: [ss_cdemo_sk#3] -Right keys [1]: [cd_demo_sk#63] +Right keys [1]: [cd_demo_sk#52] Join condition: None (71) Project [codegen id : 23] -Output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, d_year#58, d_year#61, cd_marital_status#64] -Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, d_year#58, d_year#61, cd_demo_sk#63, cd_marital_status#64] +Output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, cd_marital_status#53] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, cd_demo_sk#52, cd_marital_status#53] (72) Exchange -Input [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, d_year#58, d_year#61, cd_marital_status#64] -Arguments: hashpartitioning(c_current_cdemo_sk#51, 5), ENSURE_REQUIREMENTS, [id=#66] +Input [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, cd_marital_status#53] +Arguments: hashpartitioning(c_current_cdemo_sk#43, 5), ENSURE_REQUIREMENTS, [plan_id=13] (73) Sort [codegen id : 24] -Input [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, d_year#58, d_year#61, cd_marital_status#64] -Arguments: [c_current_cdemo_sk#51 ASC NULLS FIRST], false, 0 +Input [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, cd_marital_status#53] +Arguments: [c_current_cdemo_sk#43 ASC NULLS FIRST], false, 0 (74) ReusedExchange [Reuses operator id: 68] -Output [2]: [cd_demo_sk#67, cd_marital_status#68] +Output [2]: [cd_demo_sk#54, cd_marital_status#55] (75) Sort [codegen id : 26] -Input [2]: [cd_demo_sk#67, cd_marital_status#68] -Arguments: [cd_demo_sk#67 ASC NULLS FIRST], false, 0 +Input [2]: [cd_demo_sk#54, cd_marital_status#55] +Arguments: [cd_demo_sk#54 ASC NULLS FIRST], false, 0 (76) SortMergeJoin [codegen id : 30] -Left keys [1]: [c_current_cdemo_sk#51] -Right keys [1]: [cd_demo_sk#67] -Join condition: NOT (cd_marital_status#64 = cd_marital_status#68) +Left keys [1]: [c_current_cdemo_sk#43] +Right keys [1]: [cd_demo_sk#54] +Join condition: NOT (cd_marital_status#53 = cd_marital_status#55) (77) Project [codegen id : 30] -Output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_hdemo_sk#52, c_current_addr_sk#53, d_year#58, d_year#61] -Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_cdemo_sk#51, c_current_hdemo_sk#52, c_current_addr_sk#53, d_year#58, d_year#61, cd_marital_status#64, cd_demo_sk#67, cd_marital_status#68] +Output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51] +Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, cd_marital_status#53, cd_demo_sk#54, cd_marital_status#55] (78) Scan parquet default.promotion -Output [1]: [p_promo_sk#69] +Output [1]: [p_promo_sk#56] Batched: true Location [not included in comparison]/{warehouse_dir}/promotion] PushedFilters: [IsNotNull(p_promo_sk)] ReadSchema: struct (79) ColumnarToRow [codegen id : 27] -Input [1]: [p_promo_sk#69] +Input [1]: [p_promo_sk#56] (80) Filter [codegen id : 27] -Input [1]: [p_promo_sk#69] -Condition : isnotnull(p_promo_sk#69) +Input [1]: [p_promo_sk#56] +Condition : isnotnull(p_promo_sk#56) (81) BroadcastExchange -Input [1]: [p_promo_sk#69] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#70] +Input [1]: [p_promo_sk#56] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14] (82) BroadcastHashJoin [codegen id : 30] Left keys [1]: [ss_promo_sk#7] -Right keys [1]: [p_promo_sk#69] +Right keys [1]: [p_promo_sk#56] Join condition: None (83) Project [codegen id : 30] -Output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_hdemo_sk#52, c_current_addr_sk#53, d_year#58, d_year#61] -Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_hdemo_sk#52, c_current_addr_sk#53, d_year#58, d_year#61, p_promo_sk#69] +Output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51] +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, p_promo_sk#56] (84) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#71, hd_income_band_sk#72] +Output [2]: [hd_demo_sk#57, hd_income_band_sk#58] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] ReadSchema: struct (85) ColumnarToRow [codegen id : 28] -Input [2]: [hd_demo_sk#71, hd_income_band_sk#72] +Input [2]: [hd_demo_sk#57, hd_income_band_sk#58] (86) Filter [codegen id : 28] -Input [2]: [hd_demo_sk#71, hd_income_band_sk#72] -Condition : (isnotnull(hd_demo_sk#71) AND isnotnull(hd_income_band_sk#72)) +Input [2]: [hd_demo_sk#57, hd_income_band_sk#58] +Condition : (isnotnull(hd_demo_sk#57) AND isnotnull(hd_income_band_sk#58)) (87) BroadcastExchange -Input [2]: [hd_demo_sk#71, hd_income_band_sk#72] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#73] +Input [2]: [hd_demo_sk#57, hd_income_band_sk#58] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=15] (88) BroadcastHashJoin [codegen id : 30] Left keys [1]: [ss_hdemo_sk#4] -Right keys [1]: [hd_demo_sk#71] +Right keys [1]: [hd_demo_sk#57] Join condition: None (89) Project [codegen id : 30] -Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_hdemo_sk#52, c_current_addr_sk#53, d_year#58, d_year#61, hd_income_band_sk#72] -Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_hdemo_sk#52, c_current_addr_sk#53, d_year#58, d_year#61, hd_demo_sk#71, hd_income_band_sk#72] +Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58] +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, hd_demo_sk#57, hd_income_band_sk#58] (90) ReusedExchange [Reuses operator id: 87] -Output [2]: [hd_demo_sk#74, hd_income_band_sk#75] +Output [2]: [hd_demo_sk#59, hd_income_band_sk#60] (91) BroadcastHashJoin [codegen id : 30] -Left keys [1]: [c_current_hdemo_sk#52] -Right keys [1]: [hd_demo_sk#74] +Left keys [1]: [c_current_hdemo_sk#44] +Right keys [1]: [hd_demo_sk#59] Join condition: None (92) Project [codegen id : 30] -Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_addr_sk#53, d_year#58, d_year#61, hd_income_band_sk#72, hd_income_band_sk#75] -Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_hdemo_sk#52, c_current_addr_sk#53, d_year#58, d_year#61, hd_income_band_sk#72, hd_demo_sk#74, hd_income_band_sk#75] +Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60] +Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_demo_sk#59, hd_income_band_sk#60] (93) Exchange -Input [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_addr_sk#53, d_year#58, d_year#61, hd_income_band_sk#72, hd_income_band_sk#75] -Arguments: hashpartitioning(ss_addr_sk#5, 5), ENSURE_REQUIREMENTS, [id=#76] +Input [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60] +Arguments: hashpartitioning(ss_addr_sk#5, 5), ENSURE_REQUIREMENTS, [plan_id=16] (94) Sort [codegen id : 31] -Input [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_addr_sk#53, d_year#58, d_year#61, hd_income_band_sk#72, hd_income_band_sk#75] +Input [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60] Arguments: [ss_addr_sk#5 ASC NULLS FIRST], false, 0 (95) Scan parquet default.customer_address -Output [5]: [ca_address_sk#77, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81] +Output [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk)] ReadSchema: struct (96) ColumnarToRow [codegen id : 32] -Input [5]: [ca_address_sk#77, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81] +Input [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65] (97) Filter [codegen id : 32] -Input [5]: [ca_address_sk#77, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81] -Condition : isnotnull(ca_address_sk#77) +Input [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65] +Condition : isnotnull(ca_address_sk#61) (98) Exchange -Input [5]: [ca_address_sk#77, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81] -Arguments: hashpartitioning(ca_address_sk#77, 5), ENSURE_REQUIREMENTS, [id=#82] +Input [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65] +Arguments: hashpartitioning(ca_address_sk#61, 5), ENSURE_REQUIREMENTS, [plan_id=17] (99) Sort [codegen id : 33] -Input [5]: [ca_address_sk#77, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81] -Arguments: [ca_address_sk#77 ASC NULLS FIRST], false, 0 +Input [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65] +Arguments: [ca_address_sk#61 ASC NULLS FIRST], false, 0 (100) SortMergeJoin [codegen id : 34] Left keys [1]: [ss_addr_sk#5] -Right keys [1]: [ca_address_sk#77] +Right keys [1]: [ca_address_sk#61] Join condition: None (101) Project [codegen id : 34] -Output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_addr_sk#53, d_year#58, d_year#61, hd_income_band_sk#72, hd_income_band_sk#75, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81] -Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_addr_sk#53, d_year#58, d_year#61, hd_income_band_sk#72, hd_income_band_sk#75, ca_address_sk#77, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81] +Output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65] +Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65] (102) Exchange -Input [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_addr_sk#53, d_year#58, d_year#61, hd_income_band_sk#72, hd_income_band_sk#75, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81] -Arguments: hashpartitioning(c_current_addr_sk#53, 5), ENSURE_REQUIREMENTS, [id=#83] +Input [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65] +Arguments: hashpartitioning(c_current_addr_sk#45, 5), ENSURE_REQUIREMENTS, [plan_id=18] (103) Sort [codegen id : 35] -Input [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_addr_sk#53, d_year#58, d_year#61, hd_income_band_sk#72, hd_income_band_sk#75, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81] -Arguments: [c_current_addr_sk#53 ASC NULLS FIRST], false, 0 +Input [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65] +Arguments: [c_current_addr_sk#45 ASC NULLS FIRST], false, 0 (104) ReusedExchange [Reuses operator id: 98] -Output [5]: [ca_address_sk#84, ca_street_number#85, ca_street_name#86, ca_city#87, ca_zip#88] +Output [5]: [ca_address_sk#66, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70] (105) Sort [codegen id : 37] -Input [5]: [ca_address_sk#84, ca_street_number#85, ca_street_name#86, ca_city#87, ca_zip#88] -Arguments: [ca_address_sk#84 ASC NULLS FIRST], false, 0 +Input [5]: [ca_address_sk#66, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70] +Arguments: [ca_address_sk#66 ASC NULLS FIRST], false, 0 (106) SortMergeJoin [codegen id : 41] -Left keys [1]: [c_current_addr_sk#53] -Right keys [1]: [ca_address_sk#84] +Left keys [1]: [c_current_addr_sk#45] +Right keys [1]: [ca_address_sk#66] Join condition: None (107) Project [codegen id : 41] -Output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, d_year#58, d_year#61, hd_income_band_sk#72, hd_income_band_sk#75, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81, ca_street_number#85, ca_street_name#86, ca_city#87, ca_zip#88] -Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, c_current_addr_sk#53, d_year#58, d_year#61, hd_income_band_sk#72, hd_income_band_sk#75, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81, ca_address_sk#84, ca_street_number#85, ca_street_name#86, ca_city#87, ca_zip#88] +Output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70] +Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_address_sk#66, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70] (108) Scan parquet default.income_band -Output [1]: [ib_income_band_sk#89] +Output [1]: [ib_income_band_sk#71] Batched: true Location [not included in comparison]/{warehouse_dir}/income_band] PushedFilters: [IsNotNull(ib_income_band_sk)] ReadSchema: struct (109) ColumnarToRow [codegen id : 38] -Input [1]: [ib_income_band_sk#89] +Input [1]: [ib_income_band_sk#71] (110) Filter [codegen id : 38] -Input [1]: [ib_income_band_sk#89] -Condition : isnotnull(ib_income_band_sk#89) +Input [1]: [ib_income_band_sk#71] +Condition : isnotnull(ib_income_band_sk#71) (111) BroadcastExchange -Input [1]: [ib_income_band_sk#89] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#90] +Input [1]: [ib_income_band_sk#71] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=19] (112) BroadcastHashJoin [codegen id : 41] -Left keys [1]: [hd_income_band_sk#72] -Right keys [1]: [ib_income_band_sk#89] +Left keys [1]: [hd_income_band_sk#58] +Right keys [1]: [ib_income_band_sk#71] Join condition: None (113) Project [codegen id : 41] -Output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, d_year#58, d_year#61, hd_income_band_sk#75, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81, ca_street_number#85, ca_street_name#86, ca_city#87, ca_zip#88] -Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, d_year#58, d_year#61, hd_income_band_sk#72, hd_income_band_sk#75, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81, ca_street_number#85, ca_street_name#86, ca_city#87, ca_zip#88, ib_income_band_sk#89] +Output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70] +Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, ib_income_band_sk#71] (114) ReusedExchange [Reuses operator id: 111] -Output [1]: [ib_income_band_sk#91] +Output [1]: [ib_income_band_sk#72] (115) BroadcastHashJoin [codegen id : 41] -Left keys [1]: [hd_income_band_sk#75] -Right keys [1]: [ib_income_band_sk#91] +Left keys [1]: [hd_income_band_sk#60] +Right keys [1]: [ib_income_band_sk#72] Join condition: None (116) Project [codegen id : 41] -Output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, d_year#58, d_year#61, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81, ca_street_number#85, ca_street_name#86, ca_city#87, ca_zip#88] -Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, d_year#58, d_year#61, hd_income_band_sk#75, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81, ca_street_number#85, ca_street_name#86, ca_city#87, ca_zip#88, ib_income_band_sk#91] +Output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70] +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, ib_income_band_sk#72] (117) Scan parquet default.item -Output [4]: [i_item_sk#92, i_current_price#93, i_color#94, i_product_name#95] +Output [4]: [i_item_sk#73, i_current_price#74, i_color#75, i_product_name#76] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_current_price), In(i_color, [burlywood ,floral ,indian ,medium ,purple ,spring ]), GreaterThanOrEqual(i_current_price,64.00), LessThanOrEqual(i_current_price,74.00), GreaterThanOrEqual(i_current_price,65.00), LessThanOrEqual(i_current_price,79.00), IsNotNull(i_item_sk)] ReadSchema: struct (118) ColumnarToRow [codegen id : 40] -Input [4]: [i_item_sk#92, i_current_price#93, i_color#94, i_product_name#95] +Input [4]: [i_item_sk#73, i_current_price#74, i_color#75, i_product_name#76] (119) Filter [codegen id : 40] -Input [4]: [i_item_sk#92, i_current_price#93, i_color#94, i_product_name#95] -Condition : ((((((isnotnull(i_current_price#93) AND i_color#94 IN (purple ,burlywood ,indian ,spring ,floral ,medium )) AND (i_current_price#93 >= 64.00)) AND (i_current_price#93 <= 74.00)) AND (i_current_price#93 >= 65.00)) AND (i_current_price#93 <= 79.00)) AND isnotnull(i_item_sk#92)) +Input [4]: [i_item_sk#73, i_current_price#74, i_color#75, i_product_name#76] +Condition : ((((((isnotnull(i_current_price#74) AND i_color#75 IN (purple ,burlywood ,indian ,spring ,floral ,medium )) AND (i_current_price#74 >= 64.00)) AND (i_current_price#74 <= 74.00)) AND (i_current_price#74 >= 65.00)) AND (i_current_price#74 <= 79.00)) AND isnotnull(i_item_sk#73)) (120) Project [codegen id : 40] -Output [2]: [i_item_sk#92, i_product_name#95] -Input [4]: [i_item_sk#92, i_current_price#93, i_color#94, i_product_name#95] +Output [2]: [i_item_sk#73, i_product_name#76] +Input [4]: [i_item_sk#73, i_current_price#74, i_color#75, i_product_name#76] (121) BroadcastExchange -Input [2]: [i_item_sk#92, i_product_name#95] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#96] +Input [2]: [i_item_sk#73, i_product_name#76] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=20] (122) BroadcastHashJoin [codegen id : 41] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#92] +Right keys [1]: [i_item_sk#73] Join condition: None (123) Project [codegen id : 41] -Output [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, d_year#58, d_year#61, s_store_name#46, s_zip#47, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81, ca_street_number#85, ca_street_name#86, ca_city#87, ca_zip#88, i_item_sk#92, i_product_name#95] -Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#46, s_zip#47, d_year#58, d_year#61, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81, ca_street_number#85, ca_street_name#86, ca_city#87, ca_zip#88, i_item_sk#92, i_product_name#95] +Output [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, d_year#49, d_year#51, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, i_item_sk#73, i_product_name#76] +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, i_item_sk#73, i_product_name#76] (124) HashAggregate [codegen id : 41] -Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, d_year#58, d_year#61, s_store_name#46, s_zip#47, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81, ca_street_number#85, ca_street_name#86, ca_city#87, ca_zip#88, i_item_sk#92, i_product_name#95] -Keys [15]: [i_product_name#95, i_item_sk#92, s_store_name#46, s_zip#47, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81, ca_street_number#85, ca_street_name#86, ca_city#87, ca_zip#88, d_year#44, d_year#58, d_year#61] +Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, d_year#49, d_year#51, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, i_item_sk#73, i_product_name#76] +Keys [15]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51] Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#9)), partial_sum(UnscaledValue(ss_list_price#10)), partial_sum(UnscaledValue(ss_coupon_amt#11))] -Aggregate Attributes [4]: [count#97, sum#98, sum#99, sum#100] -Results [19]: [i_product_name#95, i_item_sk#92, s_store_name#46, s_zip#47, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81, ca_street_number#85, ca_street_name#86, ca_city#87, ca_zip#88, d_year#44, d_year#58, d_year#61, count#101, sum#102, sum#103, sum#104] +Aggregate Attributes [4]: [count#77, sum#78, sum#79, sum#80] +Results [19]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51, count#81, sum#82, sum#83, sum#84] (125) Exchange -Input [19]: [i_product_name#95, i_item_sk#92, s_store_name#46, s_zip#47, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81, ca_street_number#85, ca_street_name#86, ca_city#87, ca_zip#88, d_year#44, d_year#58, d_year#61, count#101, sum#102, sum#103, sum#104] -Arguments: hashpartitioning(i_product_name#95, i_item_sk#92, s_store_name#46, s_zip#47, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81, ca_street_number#85, ca_street_name#86, ca_city#87, ca_zip#88, d_year#44, d_year#58, d_year#61, 5), ENSURE_REQUIREMENTS, [id=#105] +Input [19]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51, count#81, sum#82, sum#83, sum#84] +Arguments: hashpartitioning(i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51, 5), ENSURE_REQUIREMENTS, [plan_id=21] (126) HashAggregate [codegen id : 42] -Input [19]: [i_product_name#95, i_item_sk#92, s_store_name#46, s_zip#47, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81, ca_street_number#85, ca_street_name#86, ca_city#87, ca_zip#88, d_year#44, d_year#58, d_year#61, count#101, sum#102, sum#103, sum#104] -Keys [15]: [i_product_name#95, i_item_sk#92, s_store_name#46, s_zip#47, ca_street_number#78, ca_street_name#79, ca_city#80, ca_zip#81, ca_street_number#85, ca_street_name#86, ca_city#87, ca_zip#88, d_year#44, d_year#58, d_year#61] +Input [19]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51, count#81, sum#82, sum#83, sum#84] +Keys [15]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51] Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#9)), sum(UnscaledValue(ss_list_price#10)), sum(UnscaledValue(ss_coupon_amt#11))] -Aggregate Attributes [4]: [count(1)#106, sum(UnscaledValue(ss_wholesale_cost#9))#107, sum(UnscaledValue(ss_list_price#10))#108, sum(UnscaledValue(ss_coupon_amt#11))#109] -Results [17]: [i_product_name#95 AS product_name#110, i_item_sk#92 AS item_sk#111, s_store_name#46 AS store_name#112, s_zip#47 AS store_zip#113, ca_street_number#78 AS b_street_number#114, ca_street_name#79 AS b_streen_name#115, ca_city#80 AS b_city#116, ca_zip#81 AS b_zip#117, ca_street_number#85 AS c_street_number#118, ca_street_name#86 AS c_street_name#119, ca_city#87 AS c_city#120, ca_zip#88 AS c_zip#121, d_year#44 AS syear#122, count(1)#106 AS cnt#123, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#9))#107,17,2) AS s1#124, MakeDecimal(sum(UnscaledValue(ss_list_price#10))#108,17,2) AS s2#125, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#11))#109,17,2) AS s3#126] +Aggregate Attributes [4]: [count(1)#85, sum(UnscaledValue(ss_wholesale_cost#9))#86, sum(UnscaledValue(ss_list_price#10))#87, sum(UnscaledValue(ss_coupon_amt#11))#88] +Results [17]: [i_product_name#76 AS product_name#89, i_item_sk#73 AS item_sk#90, s_store_name#40 AS store_name#91, s_zip#41 AS store_zip#92, ca_street_number#62 AS b_street_number#93, ca_street_name#63 AS b_streen_name#94, ca_city#64 AS b_city#95, ca_zip#65 AS b_zip#96, ca_street_number#67 AS c_street_number#97, ca_street_name#68 AS c_street_name#98, ca_city#69 AS c_city#99, ca_zip#70 AS c_zip#100, d_year#38 AS syear#101, count(1)#85 AS cnt#102, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#9))#86,17,2) AS s1#103, MakeDecimal(sum(UnscaledValue(ss_list_price#10))#87,17,2) AS s2#104, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#11))#88,17,2) AS s3#105] (127) Exchange -Input [17]: [product_name#110, item_sk#111, store_name#112, store_zip#113, b_street_number#114, b_streen_name#115, b_city#116, b_zip#117, c_street_number#118, c_street_name#119, c_city#120, c_zip#121, syear#122, cnt#123, s1#124, s2#125, s3#126] -Arguments: hashpartitioning(item_sk#111, store_name#112, store_zip#113, 5), ENSURE_REQUIREMENTS, [id=#127] +Input [17]: [product_name#89, item_sk#90, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105] +Arguments: hashpartitioning(item_sk#90, store_name#91, store_zip#92, 5), ENSURE_REQUIREMENTS, [plan_id=22] (128) Sort [codegen id : 43] -Input [17]: [product_name#110, item_sk#111, store_name#112, store_zip#113, b_street_number#114, b_streen_name#115, b_city#116, b_zip#117, c_street_number#118, c_street_name#119, c_city#120, c_zip#121, syear#122, cnt#123, s1#124, s2#125, s3#126] -Arguments: [item_sk#111 ASC NULLS FIRST, store_name#112 ASC NULLS FIRST, store_zip#113 ASC NULLS FIRST], false, 0 +Input [17]: [product_name#89, item_sk#90, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105] +Arguments: [item_sk#90 ASC NULLS FIRST, store_name#91 ASC NULLS FIRST, store_zip#92 ASC NULLS FIRST], false, 0 (129) Scan parquet default.store_sales -Output [12]: [ss_item_sk#128, ss_customer_sk#129, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_store_sk#133, ss_promo_sk#134, ss_ticket_number#135, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, ss_sold_date_sk#139] +Output [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#139), dynamicpruningexpression(ss_sold_date_sk#139 IN dynamicpruning#140)] +PartitionFilters: [isnotnull(ss_sold_date_sk#117), dynamicpruningexpression(ss_sold_date_sk#117 IN dynamicpruning#118)] PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] ReadSchema: struct (130) ColumnarToRow [codegen id : 44] -Input [12]: [ss_item_sk#128, ss_customer_sk#129, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_store_sk#133, ss_promo_sk#134, ss_ticket_number#135, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, ss_sold_date_sk#139] +Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117] (131) Filter [codegen id : 44] -Input [12]: [ss_item_sk#128, ss_customer_sk#129, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_store_sk#133, ss_promo_sk#134, ss_ticket_number#135, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, ss_sold_date_sk#139] -Condition : (((((((isnotnull(ss_item_sk#128) AND isnotnull(ss_ticket_number#135)) AND isnotnull(ss_store_sk#133)) AND isnotnull(ss_customer_sk#129)) AND isnotnull(ss_cdemo_sk#130)) AND isnotnull(ss_promo_sk#134)) AND isnotnull(ss_hdemo_sk#131)) AND isnotnull(ss_addr_sk#132)) +Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117] +Condition : (((((((isnotnull(ss_item_sk#106) AND isnotnull(ss_ticket_number#113)) AND isnotnull(ss_store_sk#111)) AND isnotnull(ss_customer_sk#107)) AND isnotnull(ss_cdemo_sk#108)) AND isnotnull(ss_promo_sk#112)) AND isnotnull(ss_hdemo_sk#109)) AND isnotnull(ss_addr_sk#110)) (132) Exchange -Input [12]: [ss_item_sk#128, ss_customer_sk#129, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_store_sk#133, ss_promo_sk#134, ss_ticket_number#135, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, ss_sold_date_sk#139] -Arguments: hashpartitioning(ss_item_sk#128, ss_ticket_number#135, 5), ENSURE_REQUIREMENTS, [id=#141] +Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117] +Arguments: hashpartitioning(ss_item_sk#106, ss_ticket_number#113, 5), ENSURE_REQUIREMENTS, [plan_id=23] (133) Sort [codegen id : 45] -Input [12]: [ss_item_sk#128, ss_customer_sk#129, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_store_sk#133, ss_promo_sk#134, ss_ticket_number#135, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, ss_sold_date_sk#139] -Arguments: [ss_item_sk#128 ASC NULLS FIRST, ss_ticket_number#135 ASC NULLS FIRST], false, 0 +Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117] +Arguments: [ss_item_sk#106 ASC NULLS FIRST, ss_ticket_number#113 ASC NULLS FIRST], false, 0 (134) ReusedExchange [Reuses operator id: 10] -Output [2]: [sr_item_sk#142, sr_ticket_number#143] +Output [2]: [sr_item_sk#119, sr_ticket_number#120] (135) Sort [codegen id : 47] -Input [2]: [sr_item_sk#142, sr_ticket_number#143] -Arguments: [sr_item_sk#142 ASC NULLS FIRST, sr_ticket_number#143 ASC NULLS FIRST], false, 0 +Input [2]: [sr_item_sk#119, sr_ticket_number#120] +Arguments: [sr_item_sk#119 ASC NULLS FIRST, sr_ticket_number#120 ASC NULLS FIRST], false, 0 (136) SortMergeJoin [codegen id : 56] -Left keys [2]: [ss_item_sk#128, ss_ticket_number#135] -Right keys [2]: [sr_item_sk#142, sr_ticket_number#143] +Left keys [2]: [ss_item_sk#106, ss_ticket_number#113] +Right keys [2]: [sr_item_sk#119, sr_ticket_number#120] Join condition: None (137) Project [codegen id : 56] -Output [11]: [ss_item_sk#128, ss_customer_sk#129, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_store_sk#133, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, ss_sold_date_sk#139] -Input [14]: [ss_item_sk#128, ss_customer_sk#129, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_store_sk#133, ss_promo_sk#134, ss_ticket_number#135, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, ss_sold_date_sk#139, sr_item_sk#142, sr_ticket_number#143] +Output [11]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117] +Input [14]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117, sr_item_sk#119, sr_ticket_number#120] (138) ReusedExchange [Reuses operator id: 33] -Output [1]: [cs_item_sk#144] +Output [1]: [cs_item_sk#121] (139) BroadcastHashJoin [codegen id : 56] -Left keys [1]: [ss_item_sk#128] -Right keys [1]: [cs_item_sk#144] +Left keys [1]: [ss_item_sk#106] +Right keys [1]: [cs_item_sk#121] Join condition: None (140) Project [codegen id : 56] -Output [11]: [ss_item_sk#128, ss_customer_sk#129, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_store_sk#133, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, ss_sold_date_sk#139] -Input [12]: [ss_item_sk#128, ss_customer_sk#129, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_store_sk#133, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, ss_sold_date_sk#139, cs_item_sk#144] +Output [11]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117] +Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117, cs_item_sk#121] (141) ReusedExchange [Reuses operator id: 217] -Output [2]: [d_date_sk#145, d_year#146] +Output [2]: [d_date_sk#122, d_year#123] (142) BroadcastHashJoin [codegen id : 56] -Left keys [1]: [ss_sold_date_sk#139] -Right keys [1]: [d_date_sk#145] +Left keys [1]: [ss_sold_date_sk#117] +Right keys [1]: [d_date_sk#122] Join condition: None (143) Project [codegen id : 56] -Output [11]: [ss_item_sk#128, ss_customer_sk#129, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_store_sk#133, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146] -Input [13]: [ss_item_sk#128, ss_customer_sk#129, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_store_sk#133, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, ss_sold_date_sk#139, d_date_sk#145, d_year#146] +Output [11]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123] +Input [13]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117, d_date_sk#122, d_year#123] (144) ReusedExchange [Reuses operator id: 42] -Output [3]: [s_store_sk#147, s_store_name#148, s_zip#149] +Output [3]: [s_store_sk#124, s_store_name#125, s_zip#126] (145) BroadcastHashJoin [codegen id : 56] -Left keys [1]: [ss_store_sk#133] -Right keys [1]: [s_store_sk#147] +Left keys [1]: [ss_store_sk#111] +Right keys [1]: [s_store_sk#124] Join condition: None (146) Project [codegen id : 56] -Output [12]: [ss_item_sk#128, ss_customer_sk#129, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149] -Input [14]: [ss_item_sk#128, ss_customer_sk#129, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_store_sk#133, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_sk#147, s_store_name#148, s_zip#149] +Output [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126] +Input [14]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_sk#124, s_store_name#125, s_zip#126] (147) Exchange -Input [12]: [ss_item_sk#128, ss_customer_sk#129, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149] -Arguments: hashpartitioning(ss_customer_sk#129, 5), ENSURE_REQUIREMENTS, [id=#150] +Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126] +Arguments: hashpartitioning(ss_customer_sk#107, 5), ENSURE_REQUIREMENTS, [plan_id=24] (148) Sort [codegen id : 57] -Input [12]: [ss_item_sk#128, ss_customer_sk#129, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149] -Arguments: [ss_customer_sk#129 ASC NULLS FIRST], false, 0 +Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126] +Arguments: [ss_customer_sk#107 ASC NULLS FIRST], false, 0 (149) ReusedExchange [Reuses operator id: 50] -Output [6]: [c_customer_sk#151, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, c_first_shipto_date_sk#155, c_first_sales_date_sk#156] +Output [6]: [c_customer_sk#127, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, c_first_shipto_date_sk#131, c_first_sales_date_sk#132] (150) Sort [codegen id : 59] -Input [6]: [c_customer_sk#151, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, c_first_shipto_date_sk#155, c_first_sales_date_sk#156] -Arguments: [c_customer_sk#151 ASC NULLS FIRST], false, 0 +Input [6]: [c_customer_sk#127, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, c_first_shipto_date_sk#131, c_first_sales_date_sk#132] +Arguments: [c_customer_sk#127 ASC NULLS FIRST], false, 0 (151) SortMergeJoin [codegen id : 62] -Left keys [1]: [ss_customer_sk#129] -Right keys [1]: [c_customer_sk#151] +Left keys [1]: [ss_customer_sk#107] +Right keys [1]: [c_customer_sk#127] Join condition: None (152) Project [codegen id : 62] -Output [16]: [ss_item_sk#128, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, c_first_shipto_date_sk#155, c_first_sales_date_sk#156] -Input [18]: [ss_item_sk#128, ss_customer_sk#129, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_customer_sk#151, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, c_first_shipto_date_sk#155, c_first_sales_date_sk#156] +Output [16]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, c_first_shipto_date_sk#131, c_first_sales_date_sk#132] +Input [18]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_customer_sk#127, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, c_first_shipto_date_sk#131, c_first_sales_date_sk#132] (153) ReusedExchange [Reuses operator id: 57] -Output [2]: [d_date_sk#157, d_year#158] +Output [2]: [d_date_sk#133, d_year#134] (154) BroadcastHashJoin [codegen id : 62] -Left keys [1]: [c_first_sales_date_sk#156] -Right keys [1]: [d_date_sk#157] +Left keys [1]: [c_first_sales_date_sk#132] +Right keys [1]: [d_date_sk#133] Join condition: None (155) Project [codegen id : 62] -Output [16]: [ss_item_sk#128, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, c_first_shipto_date_sk#155, d_year#158] -Input [18]: [ss_item_sk#128, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, c_first_shipto_date_sk#155, c_first_sales_date_sk#156, d_date_sk#157, d_year#158] +Output [16]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, c_first_shipto_date_sk#131, d_year#134] +Input [18]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, c_first_shipto_date_sk#131, c_first_sales_date_sk#132, d_date_sk#133, d_year#134] (156) ReusedExchange [Reuses operator id: 57] -Output [2]: [d_date_sk#159, d_year#160] +Output [2]: [d_date_sk#135, d_year#136] (157) BroadcastHashJoin [codegen id : 62] -Left keys [1]: [c_first_shipto_date_sk#155] -Right keys [1]: [d_date_sk#159] +Left keys [1]: [c_first_shipto_date_sk#131] +Right keys [1]: [d_date_sk#135] Join condition: None (158) Project [codegen id : 62] -Output [16]: [ss_item_sk#128, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160] -Input [18]: [ss_item_sk#128, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, c_first_shipto_date_sk#155, d_year#158, d_date_sk#159, d_year#160] +Output [16]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136] +Input [18]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, c_first_shipto_date_sk#131, d_year#134, d_date_sk#135, d_year#136] (159) Exchange -Input [16]: [ss_item_sk#128, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160] -Arguments: hashpartitioning(ss_cdemo_sk#130, 5), ENSURE_REQUIREMENTS, [id=#161] +Input [16]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136] +Arguments: hashpartitioning(ss_cdemo_sk#108, 5), ENSURE_REQUIREMENTS, [plan_id=25] (160) Sort [codegen id : 63] -Input [16]: [ss_item_sk#128, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160] -Arguments: [ss_cdemo_sk#130 ASC NULLS FIRST], false, 0 +Input [16]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136] +Arguments: [ss_cdemo_sk#108 ASC NULLS FIRST], false, 0 (161) ReusedExchange [Reuses operator id: 68] -Output [2]: [cd_demo_sk#162, cd_marital_status#163] +Output [2]: [cd_demo_sk#137, cd_marital_status#138] (162) Sort [codegen id : 65] -Input [2]: [cd_demo_sk#162, cd_marital_status#163] -Arguments: [cd_demo_sk#162 ASC NULLS FIRST], false, 0 +Input [2]: [cd_demo_sk#137, cd_marital_status#138] +Arguments: [cd_demo_sk#137 ASC NULLS FIRST], false, 0 (163) SortMergeJoin [codegen id : 66] -Left keys [1]: [ss_cdemo_sk#130] -Right keys [1]: [cd_demo_sk#162] +Left keys [1]: [ss_cdemo_sk#108] +Right keys [1]: [cd_demo_sk#137] Join condition: None (164) Project [codegen id : 66] -Output [16]: [ss_item_sk#128, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160, cd_marital_status#163] -Input [18]: [ss_item_sk#128, ss_cdemo_sk#130, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160, cd_demo_sk#162, cd_marital_status#163] +Output [16]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, cd_marital_status#138] +Input [18]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, cd_demo_sk#137, cd_marital_status#138] (165) Exchange -Input [16]: [ss_item_sk#128, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160, cd_marital_status#163] -Arguments: hashpartitioning(c_current_cdemo_sk#152, 5), ENSURE_REQUIREMENTS, [id=#164] +Input [16]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, cd_marital_status#138] +Arguments: hashpartitioning(c_current_cdemo_sk#128, 5), ENSURE_REQUIREMENTS, [plan_id=26] (166) Sort [codegen id : 67] -Input [16]: [ss_item_sk#128, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160, cd_marital_status#163] -Arguments: [c_current_cdemo_sk#152 ASC NULLS FIRST], false, 0 +Input [16]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, cd_marital_status#138] +Arguments: [c_current_cdemo_sk#128 ASC NULLS FIRST], false, 0 (167) ReusedExchange [Reuses operator id: 68] -Output [2]: [cd_demo_sk#165, cd_marital_status#166] +Output [2]: [cd_demo_sk#139, cd_marital_status#140] (168) Sort [codegen id : 69] -Input [2]: [cd_demo_sk#165, cd_marital_status#166] -Arguments: [cd_demo_sk#165 ASC NULLS FIRST], false, 0 +Input [2]: [cd_demo_sk#139, cd_marital_status#140] +Arguments: [cd_demo_sk#139 ASC NULLS FIRST], false, 0 (169) SortMergeJoin [codegen id : 73] -Left keys [1]: [c_current_cdemo_sk#152] -Right keys [1]: [cd_demo_sk#165] -Join condition: NOT (cd_marital_status#163 = cd_marital_status#166) +Left keys [1]: [c_current_cdemo_sk#128] +Right keys [1]: [cd_demo_sk#139] +Join condition: NOT (cd_marital_status#138 = cd_marital_status#140) (170) Project [codegen id : 73] -Output [14]: [ss_item_sk#128, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160] -Input [18]: [ss_item_sk#128, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_cdemo_sk#152, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160, cd_marital_status#163, cd_demo_sk#165, cd_marital_status#166] +Output [14]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136] +Input [18]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, cd_marital_status#138, cd_demo_sk#139, cd_marital_status#140] (171) ReusedExchange [Reuses operator id: 81] -Output [1]: [p_promo_sk#167] +Output [1]: [p_promo_sk#141] (172) BroadcastHashJoin [codegen id : 73] -Left keys [1]: [ss_promo_sk#134] -Right keys [1]: [p_promo_sk#167] +Left keys [1]: [ss_promo_sk#112] +Right keys [1]: [p_promo_sk#141] Join condition: None (173) Project [codegen id : 73] -Output [13]: [ss_item_sk#128, ss_hdemo_sk#131, ss_addr_sk#132, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160] -Input [15]: [ss_item_sk#128, ss_hdemo_sk#131, ss_addr_sk#132, ss_promo_sk#134, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160, p_promo_sk#167] +Output [13]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136] +Input [15]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, p_promo_sk#141] (174) ReusedExchange [Reuses operator id: 87] -Output [2]: [hd_demo_sk#168, hd_income_band_sk#169] +Output [2]: [hd_demo_sk#142, hd_income_band_sk#143] (175) BroadcastHashJoin [codegen id : 73] -Left keys [1]: [ss_hdemo_sk#131] -Right keys [1]: [hd_demo_sk#168] +Left keys [1]: [ss_hdemo_sk#109] +Right keys [1]: [hd_demo_sk#142] Join condition: None (176) Project [codegen id : 73] -Output [13]: [ss_item_sk#128, ss_addr_sk#132, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160, hd_income_band_sk#169] -Input [15]: [ss_item_sk#128, ss_hdemo_sk#131, ss_addr_sk#132, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160, hd_demo_sk#168, hd_income_band_sk#169] +Output [13]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143] +Input [15]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, hd_demo_sk#142, hd_income_band_sk#143] (177) ReusedExchange [Reuses operator id: 87] -Output [2]: [hd_demo_sk#170, hd_income_band_sk#171] +Output [2]: [hd_demo_sk#144, hd_income_band_sk#145] (178) BroadcastHashJoin [codegen id : 73] -Left keys [1]: [c_current_hdemo_sk#153] -Right keys [1]: [hd_demo_sk#170] +Left keys [1]: [c_current_hdemo_sk#129] +Right keys [1]: [hd_demo_sk#144] Join condition: None (179) Project [codegen id : 73] -Output [13]: [ss_item_sk#128, ss_addr_sk#132, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_addr_sk#154, d_year#158, d_year#160, hd_income_band_sk#169, hd_income_band_sk#171] -Input [15]: [ss_item_sk#128, ss_addr_sk#132, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_hdemo_sk#153, c_current_addr_sk#154, d_year#158, d_year#160, hd_income_band_sk#169, hd_demo_sk#170, hd_income_band_sk#171] +Output [13]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145] +Input [15]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_demo_sk#144, hd_income_band_sk#145] (180) Exchange -Input [13]: [ss_item_sk#128, ss_addr_sk#132, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_addr_sk#154, d_year#158, d_year#160, hd_income_band_sk#169, hd_income_band_sk#171] -Arguments: hashpartitioning(ss_addr_sk#132, 5), ENSURE_REQUIREMENTS, [id=#172] +Input [13]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145] +Arguments: hashpartitioning(ss_addr_sk#110, 5), ENSURE_REQUIREMENTS, [plan_id=27] (181) Sort [codegen id : 74] -Input [13]: [ss_item_sk#128, ss_addr_sk#132, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_addr_sk#154, d_year#158, d_year#160, hd_income_band_sk#169, hd_income_band_sk#171] -Arguments: [ss_addr_sk#132 ASC NULLS FIRST], false, 0 +Input [13]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145] +Arguments: [ss_addr_sk#110 ASC NULLS FIRST], false, 0 (182) ReusedExchange [Reuses operator id: 98] -Output [5]: [ca_address_sk#173, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177] +Output [5]: [ca_address_sk#146, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150] (183) Sort [codegen id : 76] -Input [5]: [ca_address_sk#173, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177] -Arguments: [ca_address_sk#173 ASC NULLS FIRST], false, 0 +Input [5]: [ca_address_sk#146, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150] +Arguments: [ca_address_sk#146 ASC NULLS FIRST], false, 0 (184) SortMergeJoin [codegen id : 77] -Left keys [1]: [ss_addr_sk#132] -Right keys [1]: [ca_address_sk#173] +Left keys [1]: [ss_addr_sk#110] +Right keys [1]: [ca_address_sk#146] Join condition: None (185) Project [codegen id : 77] -Output [16]: [ss_item_sk#128, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_addr_sk#154, d_year#158, d_year#160, hd_income_band_sk#169, hd_income_band_sk#171, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177] -Input [18]: [ss_item_sk#128, ss_addr_sk#132, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_addr_sk#154, d_year#158, d_year#160, hd_income_band_sk#169, hd_income_band_sk#171, ca_address_sk#173, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177] +Output [16]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150] +Input [18]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145, ca_address_sk#146, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150] (186) Exchange -Input [16]: [ss_item_sk#128, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_addr_sk#154, d_year#158, d_year#160, hd_income_band_sk#169, hd_income_band_sk#171, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177] -Arguments: hashpartitioning(c_current_addr_sk#154, 5), ENSURE_REQUIREMENTS, [id=#178] +Input [16]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150] +Arguments: hashpartitioning(c_current_addr_sk#130, 5), ENSURE_REQUIREMENTS, [plan_id=28] (187) Sort [codegen id : 78] -Input [16]: [ss_item_sk#128, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_addr_sk#154, d_year#158, d_year#160, hd_income_band_sk#169, hd_income_band_sk#171, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177] -Arguments: [c_current_addr_sk#154 ASC NULLS FIRST], false, 0 +Input [16]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150] +Arguments: [c_current_addr_sk#130 ASC NULLS FIRST], false, 0 (188) ReusedExchange [Reuses operator id: 98] -Output [5]: [ca_address_sk#179, ca_street_number#180, ca_street_name#181, ca_city#182, ca_zip#183] +Output [5]: [ca_address_sk#151, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155] (189) Sort [codegen id : 80] -Input [5]: [ca_address_sk#179, ca_street_number#180, ca_street_name#181, ca_city#182, ca_zip#183] -Arguments: [ca_address_sk#179 ASC NULLS FIRST], false, 0 +Input [5]: [ca_address_sk#151, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155] +Arguments: [ca_address_sk#151 ASC NULLS FIRST], false, 0 (190) SortMergeJoin [codegen id : 84] -Left keys [1]: [c_current_addr_sk#154] -Right keys [1]: [ca_address_sk#179] +Left keys [1]: [c_current_addr_sk#130] +Right keys [1]: [ca_address_sk#151] Join condition: None (191) Project [codegen id : 84] -Output [19]: [ss_item_sk#128, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, d_year#158, d_year#160, hd_income_band_sk#169, hd_income_band_sk#171, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177, ca_street_number#180, ca_street_name#181, ca_city#182, ca_zip#183] -Input [21]: [ss_item_sk#128, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, c_current_addr_sk#154, d_year#158, d_year#160, hd_income_band_sk#169, hd_income_band_sk#171, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177, ca_address_sk#179, ca_street_number#180, ca_street_name#181, ca_city#182, ca_zip#183] +Output [19]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155] +Input [21]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_address_sk#151, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155] (192) ReusedExchange [Reuses operator id: 111] -Output [1]: [ib_income_band_sk#184] +Output [1]: [ib_income_band_sk#156] (193) BroadcastHashJoin [codegen id : 84] -Left keys [1]: [hd_income_band_sk#169] -Right keys [1]: [ib_income_band_sk#184] +Left keys [1]: [hd_income_band_sk#143] +Right keys [1]: [ib_income_band_sk#156] Join condition: None (194) Project [codegen id : 84] -Output [18]: [ss_item_sk#128, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, d_year#158, d_year#160, hd_income_band_sk#171, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177, ca_street_number#180, ca_street_name#181, ca_city#182, ca_zip#183] -Input [20]: [ss_item_sk#128, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, d_year#158, d_year#160, hd_income_band_sk#169, hd_income_band_sk#171, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177, ca_street_number#180, ca_street_name#181, ca_city#182, ca_zip#183, ib_income_band_sk#184] +Output [18]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, d_year#134, d_year#136, hd_income_band_sk#145, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155] +Input [20]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, ib_income_band_sk#156] (195) ReusedExchange [Reuses operator id: 111] -Output [1]: [ib_income_band_sk#185] +Output [1]: [ib_income_band_sk#157] (196) BroadcastHashJoin [codegen id : 84] -Left keys [1]: [hd_income_band_sk#171] -Right keys [1]: [ib_income_band_sk#185] +Left keys [1]: [hd_income_band_sk#145] +Right keys [1]: [ib_income_band_sk#157] Join condition: None (197) Project [codegen id : 84] -Output [17]: [ss_item_sk#128, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, d_year#158, d_year#160, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177, ca_street_number#180, ca_street_name#181, ca_city#182, ca_zip#183] -Input [19]: [ss_item_sk#128, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, d_year#158, d_year#160, hd_income_band_sk#171, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177, ca_street_number#180, ca_street_name#181, ca_city#182, ca_zip#183, ib_income_band_sk#185] +Output [17]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, d_year#134, d_year#136, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155] +Input [19]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, d_year#134, d_year#136, hd_income_band_sk#145, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, ib_income_band_sk#157] (198) ReusedExchange [Reuses operator id: 121] -Output [2]: [i_item_sk#186, i_product_name#187] +Output [2]: [i_item_sk#158, i_product_name#159] (199) BroadcastHashJoin [codegen id : 84] -Left keys [1]: [ss_item_sk#128] -Right keys [1]: [i_item_sk#186] +Left keys [1]: [ss_item_sk#106] +Right keys [1]: [i_item_sk#158] Join condition: None (200) Project [codegen id : 84] -Output [18]: [ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, d_year#158, d_year#160, s_store_name#148, s_zip#149, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177, ca_street_number#180, ca_street_name#181, ca_city#182, ca_zip#183, i_item_sk#186, i_product_name#187] -Input [19]: [ss_item_sk#128, ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, s_store_name#148, s_zip#149, d_year#158, d_year#160, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177, ca_street_number#180, ca_street_name#181, ca_city#182, ca_zip#183, i_item_sk#186, i_product_name#187] +Output [18]: [ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, d_year#134, d_year#136, s_store_name#125, s_zip#126, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, i_item_sk#158, i_product_name#159] +Input [19]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, d_year#134, d_year#136, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, i_item_sk#158, i_product_name#159] (201) HashAggregate [codegen id : 84] -Input [18]: [ss_wholesale_cost#136, ss_list_price#137, ss_coupon_amt#138, d_year#146, d_year#158, d_year#160, s_store_name#148, s_zip#149, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177, ca_street_number#180, ca_street_name#181, ca_city#182, ca_zip#183, i_item_sk#186, i_product_name#187] -Keys [15]: [i_product_name#187, i_item_sk#186, s_store_name#148, s_zip#149, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177, ca_street_number#180, ca_street_name#181, ca_city#182, ca_zip#183, d_year#146, d_year#158, d_year#160] -Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#136)), partial_sum(UnscaledValue(ss_list_price#137)), partial_sum(UnscaledValue(ss_coupon_amt#138))] -Aggregate Attributes [4]: [count#97, sum#188, sum#189, sum#190] -Results [19]: [i_product_name#187, i_item_sk#186, s_store_name#148, s_zip#149, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177, ca_street_number#180, ca_street_name#181, ca_city#182, ca_zip#183, d_year#146, d_year#158, d_year#160, count#101, sum#191, sum#192, sum#193] +Input [18]: [ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, d_year#134, d_year#136, s_store_name#125, s_zip#126, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, i_item_sk#158, i_product_name#159] +Keys [15]: [i_product_name#159, i_item_sk#158, s_store_name#125, s_zip#126, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, d_year#123, d_year#134, d_year#136] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#114)), partial_sum(UnscaledValue(ss_list_price#115)), partial_sum(UnscaledValue(ss_coupon_amt#116))] +Aggregate Attributes [4]: [count#77, sum#160, sum#161, sum#162] +Results [19]: [i_product_name#159, i_item_sk#158, s_store_name#125, s_zip#126, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, d_year#123, d_year#134, d_year#136, count#81, sum#163, sum#164, sum#165] (202) Exchange -Input [19]: [i_product_name#187, i_item_sk#186, s_store_name#148, s_zip#149, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177, ca_street_number#180, ca_street_name#181, ca_city#182, ca_zip#183, d_year#146, d_year#158, d_year#160, count#101, sum#191, sum#192, sum#193] -Arguments: hashpartitioning(i_product_name#187, i_item_sk#186, s_store_name#148, s_zip#149, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177, ca_street_number#180, ca_street_name#181, ca_city#182, ca_zip#183, d_year#146, d_year#158, d_year#160, 5), ENSURE_REQUIREMENTS, [id=#194] +Input [19]: [i_product_name#159, i_item_sk#158, s_store_name#125, s_zip#126, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, d_year#123, d_year#134, d_year#136, count#81, sum#163, sum#164, sum#165] +Arguments: hashpartitioning(i_product_name#159, i_item_sk#158, s_store_name#125, s_zip#126, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, d_year#123, d_year#134, d_year#136, 5), ENSURE_REQUIREMENTS, [plan_id=29] (203) HashAggregate [codegen id : 85] -Input [19]: [i_product_name#187, i_item_sk#186, s_store_name#148, s_zip#149, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177, ca_street_number#180, ca_street_name#181, ca_city#182, ca_zip#183, d_year#146, d_year#158, d_year#160, count#101, sum#191, sum#192, sum#193] -Keys [15]: [i_product_name#187, i_item_sk#186, s_store_name#148, s_zip#149, ca_street_number#174, ca_street_name#175, ca_city#176, ca_zip#177, ca_street_number#180, ca_street_name#181, ca_city#182, ca_zip#183, d_year#146, d_year#158, d_year#160] -Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#136)), sum(UnscaledValue(ss_list_price#137)), sum(UnscaledValue(ss_coupon_amt#138))] -Aggregate Attributes [4]: [count(1)#106, sum(UnscaledValue(ss_wholesale_cost#136))#107, sum(UnscaledValue(ss_list_price#137))#108, sum(UnscaledValue(ss_coupon_amt#138))#109] -Results [8]: [i_item_sk#186 AS item_sk#195, s_store_name#148 AS store_name#196, s_zip#149 AS store_zip#197, d_year#146 AS syear#198, count(1)#106 AS cnt#199, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#136))#107,17,2) AS s1#200, MakeDecimal(sum(UnscaledValue(ss_list_price#137))#108,17,2) AS s2#201, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#138))#109,17,2) AS s3#202] +Input [19]: [i_product_name#159, i_item_sk#158, s_store_name#125, s_zip#126, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, d_year#123, d_year#134, d_year#136, count#81, sum#163, sum#164, sum#165] +Keys [15]: [i_product_name#159, i_item_sk#158, s_store_name#125, s_zip#126, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, d_year#123, d_year#134, d_year#136] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#114)), sum(UnscaledValue(ss_list_price#115)), sum(UnscaledValue(ss_coupon_amt#116))] +Aggregate Attributes [4]: [count(1)#85, sum(UnscaledValue(ss_wholesale_cost#114))#86, sum(UnscaledValue(ss_list_price#115))#87, sum(UnscaledValue(ss_coupon_amt#116))#88] +Results [8]: [i_item_sk#158 AS item_sk#166, s_store_name#125 AS store_name#167, s_zip#126 AS store_zip#168, d_year#123 AS syear#169, count(1)#85 AS cnt#170, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#114))#86,17,2) AS s1#171, MakeDecimal(sum(UnscaledValue(ss_list_price#115))#87,17,2) AS s2#172, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#116))#88,17,2) AS s3#173] (204) Exchange -Input [8]: [item_sk#195, store_name#196, store_zip#197, syear#198, cnt#199, s1#200, s2#201, s3#202] -Arguments: hashpartitioning(item_sk#195, store_name#196, store_zip#197, 5), ENSURE_REQUIREMENTS, [id=#203] +Input [8]: [item_sk#166, store_name#167, store_zip#168, syear#169, cnt#170, s1#171, s2#172, s3#173] +Arguments: hashpartitioning(item_sk#166, store_name#167, store_zip#168, 5), ENSURE_REQUIREMENTS, [plan_id=30] (205) Sort [codegen id : 86] -Input [8]: [item_sk#195, store_name#196, store_zip#197, syear#198, cnt#199, s1#200, s2#201, s3#202] -Arguments: [item_sk#195 ASC NULLS FIRST, store_name#196 ASC NULLS FIRST, store_zip#197 ASC NULLS FIRST], false, 0 +Input [8]: [item_sk#166, store_name#167, store_zip#168, syear#169, cnt#170, s1#171, s2#172, s3#173] +Arguments: [item_sk#166 ASC NULLS FIRST, store_name#167 ASC NULLS FIRST, store_zip#168 ASC NULLS FIRST], false, 0 (206) SortMergeJoin [codegen id : 87] -Left keys [3]: [item_sk#111, store_name#112, store_zip#113] -Right keys [3]: [item_sk#195, store_name#196, store_zip#197] -Join condition: (cnt#199 <= cnt#123) +Left keys [3]: [item_sk#90, store_name#91, store_zip#92] +Right keys [3]: [item_sk#166, store_name#167, store_zip#168] +Join condition: (cnt#170 <= cnt#102) (207) Project [codegen id : 87] -Output [21]: [product_name#110, store_name#112, store_zip#113, b_street_number#114, b_streen_name#115, b_city#116, b_zip#117, c_street_number#118, c_street_name#119, c_city#120, c_zip#121, syear#122, cnt#123, s1#124, s2#125, s3#126, s1#200, s2#201, s3#202, syear#198, cnt#199] -Input [25]: [product_name#110, item_sk#111, store_name#112, store_zip#113, b_street_number#114, b_streen_name#115, b_city#116, b_zip#117, c_street_number#118, c_street_name#119, c_city#120, c_zip#121, syear#122, cnt#123, s1#124, s2#125, s3#126, item_sk#195, store_name#196, store_zip#197, syear#198, cnt#199, s1#200, s2#201, s3#202] +Output [21]: [product_name#89, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105, s1#171, s2#172, s3#173, syear#169, cnt#170] +Input [25]: [product_name#89, item_sk#90, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105, item_sk#166, store_name#167, store_zip#168, syear#169, cnt#170, s1#171, s2#172, s3#173] (208) Exchange -Input [21]: [product_name#110, store_name#112, store_zip#113, b_street_number#114, b_streen_name#115, b_city#116, b_zip#117, c_street_number#118, c_street_name#119, c_city#120, c_zip#121, syear#122, cnt#123, s1#124, s2#125, s3#126, s1#200, s2#201, s3#202, syear#198, cnt#199] -Arguments: rangepartitioning(product_name#110 ASC NULLS FIRST, store_name#112 ASC NULLS FIRST, cnt#199 ASC NULLS FIRST, s1#124 ASC NULLS FIRST, s1#200 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#204] +Input [21]: [product_name#89, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105, s1#171, s2#172, s3#173, syear#169, cnt#170] +Arguments: rangepartitioning(product_name#89 ASC NULLS FIRST, store_name#91 ASC NULLS FIRST, cnt#170 ASC NULLS FIRST, s1#103 ASC NULLS FIRST, s1#171 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=31] (209) Sort [codegen id : 88] -Input [21]: [product_name#110, store_name#112, store_zip#113, b_street_number#114, b_streen_name#115, b_city#116, b_zip#117, c_street_number#118, c_street_name#119, c_city#120, c_zip#121, syear#122, cnt#123, s1#124, s2#125, s3#126, s1#200, s2#201, s3#202, syear#198, cnt#199] -Arguments: [product_name#110 ASC NULLS FIRST, store_name#112 ASC NULLS FIRST, cnt#199 ASC NULLS FIRST, s1#124 ASC NULLS FIRST, s1#200 ASC NULLS FIRST], true, 0 +Input [21]: [product_name#89, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105, s1#171, s2#172, s3#173, syear#169, cnt#170] +Arguments: [product_name#89 ASC NULLS FIRST, store_name#91 ASC NULLS FIRST, cnt#170 ASC NULLS FIRST, s1#103 ASC NULLS FIRST, s1#171 ASC NULLS FIRST], true, 0 ===== Subqueries ===== @@ -1117,24 +1117,24 @@ BroadcastExchange (213) (210) Scan parquet default.date_dim -Output [2]: [d_date_sk#43, d_year#44] +Output [2]: [d_date_sk#37, d_year#38] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] ReadSchema: struct (211) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#43, d_year#44] +Input [2]: [d_date_sk#37, d_year#38] (212) Filter [codegen id : 1] -Input [2]: [d_date_sk#43, d_year#44] -Condition : ((isnotnull(d_year#44) AND (d_year#44 = 1999)) AND isnotnull(d_date_sk#43)) +Input [2]: [d_date_sk#37, d_year#38] +Condition : ((isnotnull(d_year#38) AND (d_year#38 = 1999)) AND isnotnull(d_date_sk#37)) (213) BroadcastExchange -Input [2]: [d_date_sk#43, d_year#44] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#205] +Input [2]: [d_date_sk#37, d_year#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=32] -Subquery:2 Hosting operator id = 129 Hosting Expression = ss_sold_date_sk#139 IN dynamicpruning#140 +Subquery:2 Hosting operator id = 129 Hosting Expression = ss_sold_date_sk#117 IN dynamicpruning#118 BroadcastExchange (217) +- * Filter (216) +- * ColumnarToRow (215) @@ -1142,21 +1142,21 @@ BroadcastExchange (217) (214) Scan parquet default.date_dim -Output [2]: [d_date_sk#145, d_year#146] +Output [2]: [d_date_sk#122, d_year#123] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (215) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#145, d_year#146] +Input [2]: [d_date_sk#122, d_year#123] (216) Filter [codegen id : 1] -Input [2]: [d_date_sk#145, d_year#146] -Condition : ((isnotnull(d_year#146) AND (d_year#146 = 2000)) AND isnotnull(d_date_sk#145)) +Input [2]: [d_date_sk#122, d_year#123] +Condition : ((isnotnull(d_year#123) AND (d_year#123 = 2000)) AND isnotnull(d_date_sk#122)) (217) BroadcastExchange -Input [2]: [d_date_sk#145, d_year#146] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#206] +Input [2]: [d_date_sk#122, d_year#123] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=33] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt index 426b408190662..28affb7b36a28 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt @@ -201,787 +201,787 @@ Condition : (((((((isnotnull(ss_item_sk#1) AND isnotnull(ss_ticket_number#8)) AN (4) BroadcastExchange Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[7, int, false] as bigint) & 4294967295))),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[7, int, false] as bigint) & 4294967295))),false), [plan_id=1] (5) Scan parquet default.store_returns -Output [3]: [sr_item_sk#15, sr_ticket_number#16, sr_returned_date_sk#17] +Output [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] ReadSchema: struct (6) ColumnarToRow -Input [3]: [sr_item_sk#15, sr_ticket_number#16, sr_returned_date_sk#17] +Input [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16] (7) Filter -Input [3]: [sr_item_sk#15, sr_ticket_number#16, sr_returned_date_sk#17] -Condition : (isnotnull(sr_item_sk#15) AND isnotnull(sr_ticket_number#16)) +Input [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16] +Condition : (isnotnull(sr_item_sk#14) AND isnotnull(sr_ticket_number#15)) (8) Project -Output [2]: [sr_item_sk#15, sr_ticket_number#16] -Input [3]: [sr_item_sk#15, sr_ticket_number#16, sr_returned_date_sk#17] +Output [2]: [sr_item_sk#14, sr_ticket_number#15] +Input [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16] (9) BroadcastHashJoin [codegen id : 2] Left keys [2]: [ss_item_sk#1, ss_ticket_number#8] -Right keys [2]: [sr_item_sk#15, sr_ticket_number#16] +Right keys [2]: [sr_item_sk#14, sr_ticket_number#15] Join condition: None (10) Project [codegen id : 2] Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] -Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#15, sr_ticket_number#16] +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#14, sr_ticket_number#15] (11) Exchange Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#18] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) Sort [codegen id : 3] Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (13) Scan parquet default.catalog_sales -Output [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_date_sk#22] +Output [4]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cs_sold_date_sk#20] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_sales] PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)] ReadSchema: struct (14) ColumnarToRow [codegen id : 4] -Input [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_date_sk#22] +Input [4]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cs_sold_date_sk#20] (15) Filter [codegen id : 4] -Input [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_date_sk#22] -Condition : (isnotnull(cs_item_sk#19) AND isnotnull(cs_order_number#20)) +Input [4]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cs_sold_date_sk#20] +Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_order_number#18)) (16) Project [codegen id : 4] -Output [3]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21] -Input [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_date_sk#22] +Output [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19] +Input [4]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cs_sold_date_sk#20] (17) Exchange -Input [3]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21] -Arguments: hashpartitioning(cs_item_sk#19, cs_order_number#20, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19] +Arguments: hashpartitioning(cs_item_sk#17, cs_order_number#18, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) Sort [codegen id : 5] -Input [3]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21] -Arguments: [cs_item_sk#19 ASC NULLS FIRST, cs_order_number#20 ASC NULLS FIRST], false, 0 +Input [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19] +Arguments: [cs_item_sk#17 ASC NULLS FIRST, cs_order_number#18 ASC NULLS FIRST], false, 0 (19) Scan parquet default.catalog_returns -Output [6]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28, cr_returned_date_sk#29] +Output [6]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25, cr_returned_date_sk#26] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] ReadSchema: struct (20) ColumnarToRow [codegen id : 6] -Input [6]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28, cr_returned_date_sk#29] +Input [6]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25, cr_returned_date_sk#26] (21) Filter [codegen id : 6] -Input [6]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28, cr_returned_date_sk#29] -Condition : (isnotnull(cr_item_sk#24) AND isnotnull(cr_order_number#25)) +Input [6]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25, cr_returned_date_sk#26] +Condition : (isnotnull(cr_item_sk#21) AND isnotnull(cr_order_number#22)) (22) Project [codegen id : 6] -Output [5]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28] -Input [6]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28, cr_returned_date_sk#29] +Output [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Input [6]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25, cr_returned_date_sk#26] (23) Exchange -Input [5]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28] -Arguments: hashpartitioning(cr_item_sk#24, cr_order_number#25, 5), ENSURE_REQUIREMENTS, [id=#30] +Input [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Arguments: hashpartitioning(cr_item_sk#21, cr_order_number#22, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) Sort [codegen id : 7] -Input [5]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28] -Arguments: [cr_item_sk#24 ASC NULLS FIRST, cr_order_number#25 ASC NULLS FIRST], false, 0 +Input [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Arguments: [cr_item_sk#21 ASC NULLS FIRST, cr_order_number#22 ASC NULLS FIRST], false, 0 (25) SortMergeJoin [codegen id : 8] -Left keys [2]: [cs_item_sk#19, cs_order_number#20] -Right keys [2]: [cr_item_sk#24, cr_order_number#25] +Left keys [2]: [cs_item_sk#17, cs_order_number#18] +Right keys [2]: [cr_item_sk#21, cr_order_number#22] Join condition: None (26) Project [codegen id : 8] -Output [5]: [cs_item_sk#19, cs_ext_list_price#21, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28] -Input [8]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28] +Output [5]: [cs_item_sk#17, cs_ext_list_price#19, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Input [8]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] (27) HashAggregate [codegen id : 8] -Input [5]: [cs_item_sk#19, cs_ext_list_price#21, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28] -Keys [1]: [cs_item_sk#19] -Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#21)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2)))] -Aggregate Attributes [3]: [sum#31, sum#32, isEmpty#33] -Results [4]: [cs_item_sk#19, sum#34, sum#35, isEmpty#36] +Input [5]: [cs_item_sk#17, cs_ext_list_price#19, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25] +Keys [1]: [cs_item_sk#17] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#19)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2)))] +Aggregate Attributes [3]: [sum#27, sum#28, isEmpty#29] +Results [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32] (28) Exchange -Input [4]: [cs_item_sk#19, sum#34, sum#35, isEmpty#36] -Arguments: hashpartitioning(cs_item_sk#19, 5), ENSURE_REQUIREMENTS, [id=#37] +Input [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32] +Arguments: hashpartitioning(cs_item_sk#17, 5), ENSURE_REQUIREMENTS, [plan_id=5] (29) HashAggregate [codegen id : 9] -Input [4]: [cs_item_sk#19, sum#34, sum#35, isEmpty#36] -Keys [1]: [cs_item_sk#19] -Functions [2]: [sum(UnscaledValue(cs_ext_list_price#21)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2)))] -Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#21))#38, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2)))#39] -Results [3]: [cs_item_sk#19, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#21))#38,17,2) AS sale#40, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2)))#39 AS refund#41] +Input [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32] +Keys [1]: [cs_item_sk#17] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#19)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2)))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#19))#33, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2)))#34] +Results [3]: [cs_item_sk#17, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#19))#33,17,2) AS sale#35, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2)))#34 AS refund#36] (30) Filter [codegen id : 9] -Input [3]: [cs_item_sk#19, sale#40, refund#41] -Condition : (isnotnull(sale#40) AND (cast(sale#40 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(refund#41)), DecimalType(21,2)))) +Input [3]: [cs_item_sk#17, sale#35, refund#36] +Condition : (isnotnull(sale#35) AND (cast(sale#35 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(refund#36)), DecimalType(21,2)))) (31) Project [codegen id : 9] -Output [1]: [cs_item_sk#19] -Input [3]: [cs_item_sk#19, sale#40, refund#41] +Output [1]: [cs_item_sk#17] +Input [3]: [cs_item_sk#17, sale#35, refund#36] (32) Sort [codegen id : 9] -Input [1]: [cs_item_sk#19] -Arguments: [cs_item_sk#19 ASC NULLS FIRST], false, 0 +Input [1]: [cs_item_sk#17] +Arguments: [cs_item_sk#17 ASC NULLS FIRST], false, 0 (33) SortMergeJoin [codegen id : 25] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [cs_item_sk#19] +Right keys [1]: [cs_item_sk#17] Join condition: None (34) Project [codegen id : 25] Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12] -Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#19] +Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#17] (35) ReusedExchange [Reuses operator id: 187] -Output [2]: [d_date_sk#42, d_year#43] +Output [2]: [d_date_sk#37, d_year#38] (36) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_sold_date_sk#12] -Right keys [1]: [d_date_sk#42] +Right keys [1]: [d_date_sk#37] Join condition: None (37) Project [codegen id : 25] -Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43] -Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#42, d_year#43] +Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38] +Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#37, d_year#38] (38) Scan parquet default.store -Output [3]: [s_store_sk#44, s_store_name#45, s_zip#46] +Output [3]: [s_store_sk#39, s_store_name#40, s_zip#41] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_zip)] ReadSchema: struct (39) ColumnarToRow [codegen id : 11] -Input [3]: [s_store_sk#44, s_store_name#45, s_zip#46] +Input [3]: [s_store_sk#39, s_store_name#40, s_zip#41] (40) Filter [codegen id : 11] -Input [3]: [s_store_sk#44, s_store_name#45, s_zip#46] -Condition : ((isnotnull(s_store_sk#44) AND isnotnull(s_store_name#45)) AND isnotnull(s_zip#46)) +Input [3]: [s_store_sk#39, s_store_name#40, s_zip#41] +Condition : ((isnotnull(s_store_sk#39) AND isnotnull(s_store_name#40)) AND isnotnull(s_zip#41)) (41) BroadcastExchange -Input [3]: [s_store_sk#44, s_store_name#45, s_zip#46] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#47] +Input [3]: [s_store_sk#39, s_store_name#40, s_zip#41] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] (42) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_store_sk#6] -Right keys [1]: [s_store_sk#44] +Right keys [1]: [s_store_sk#39] Join condition: None (43) Project [codegen id : 25] -Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46] -Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_sk#44, s_store_name#45, s_zip#46] +Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41] +Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_sk#39, s_store_name#40, s_zip#41] (44) Scan parquet default.customer -Output [6]: [c_customer_sk#48, c_current_cdemo_sk#49, c_current_hdemo_sk#50, c_current_addr_sk#51, c_first_shipto_date_sk#52, c_first_sales_date_sk#53] +Output [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk), IsNotNull(c_current_addr_sk)] ReadSchema: struct (45) ColumnarToRow [codegen id : 12] -Input [6]: [c_customer_sk#48, c_current_cdemo_sk#49, c_current_hdemo_sk#50, c_current_addr_sk#51, c_first_shipto_date_sk#52, c_first_sales_date_sk#53] +Input [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47] (46) Filter [codegen id : 12] -Input [6]: [c_customer_sk#48, c_current_cdemo_sk#49, c_current_hdemo_sk#50, c_current_addr_sk#51, c_first_shipto_date_sk#52, c_first_sales_date_sk#53] -Condition : (((((isnotnull(c_customer_sk#48) AND isnotnull(c_first_sales_date_sk#53)) AND isnotnull(c_first_shipto_date_sk#52)) AND isnotnull(c_current_cdemo_sk#49)) AND isnotnull(c_current_hdemo_sk#50)) AND isnotnull(c_current_addr_sk#51)) +Input [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47] +Condition : (((((isnotnull(c_customer_sk#42) AND isnotnull(c_first_sales_date_sk#47)) AND isnotnull(c_first_shipto_date_sk#46)) AND isnotnull(c_current_cdemo_sk#43)) AND isnotnull(c_current_hdemo_sk#44)) AND isnotnull(c_current_addr_sk#45)) (47) BroadcastExchange -Input [6]: [c_customer_sk#48, c_current_cdemo_sk#49, c_current_hdemo_sk#50, c_current_addr_sk#51, c_first_shipto_date_sk#52, c_first_sales_date_sk#53] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#54] +Input [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] (48) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_customer_sk#2] -Right keys [1]: [c_customer_sk#48] +Right keys [1]: [c_customer_sk#42] Join condition: None (49) Project [codegen id : 25] -Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_cdemo_sk#49, c_current_hdemo_sk#50, c_current_addr_sk#51, c_first_shipto_date_sk#52, c_first_sales_date_sk#53] -Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_customer_sk#48, c_current_cdemo_sk#49, c_current_hdemo_sk#50, c_current_addr_sk#51, c_first_shipto_date_sk#52, c_first_sales_date_sk#53] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47] +Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47] (50) Scan parquet default.date_dim -Output [2]: [d_date_sk#55, d_year#56] +Output [2]: [d_date_sk#48, d_year#49] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date_sk)] ReadSchema: struct (51) ColumnarToRow [codegen id : 13] -Input [2]: [d_date_sk#55, d_year#56] +Input [2]: [d_date_sk#48, d_year#49] (52) Filter [codegen id : 13] -Input [2]: [d_date_sk#55, d_year#56] -Condition : isnotnull(d_date_sk#55) +Input [2]: [d_date_sk#48, d_year#49] +Condition : isnotnull(d_date_sk#48) (53) BroadcastExchange -Input [2]: [d_date_sk#55, d_year#56] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#57] +Input [2]: [d_date_sk#48, d_year#49] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] (54) BroadcastHashJoin [codegen id : 25] -Left keys [1]: [c_first_sales_date_sk#53] -Right keys [1]: [d_date_sk#55] +Left keys [1]: [c_first_sales_date_sk#47] +Right keys [1]: [d_date_sk#48] Join condition: None (55) Project [codegen id : 25] -Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_cdemo_sk#49, c_current_hdemo_sk#50, c_current_addr_sk#51, c_first_shipto_date_sk#52, d_year#56] -Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_cdemo_sk#49, c_current_hdemo_sk#50, c_current_addr_sk#51, c_first_shipto_date_sk#52, c_first_sales_date_sk#53, d_date_sk#55, d_year#56] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, d_year#49] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47, d_date_sk#48, d_year#49] (56) ReusedExchange [Reuses operator id: 53] -Output [2]: [d_date_sk#58, d_year#59] +Output [2]: [d_date_sk#50, d_year#51] (57) BroadcastHashJoin [codegen id : 25] -Left keys [1]: [c_first_shipto_date_sk#52] -Right keys [1]: [d_date_sk#58] +Left keys [1]: [c_first_shipto_date_sk#46] +Right keys [1]: [d_date_sk#50] Join condition: None (58) Project [codegen id : 25] -Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_cdemo_sk#49, c_current_hdemo_sk#50, c_current_addr_sk#51, d_year#56, d_year#59] -Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_cdemo_sk#49, c_current_hdemo_sk#50, c_current_addr_sk#51, c_first_shipto_date_sk#52, d_year#56, d_date_sk#58, d_year#59] +Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, d_year#49, d_date_sk#50, d_year#51] (59) Scan parquet default.customer_demographics -Output [2]: [cd_demo_sk#60, cd_marital_status#61] +Output [2]: [cd_demo_sk#52, cd_marital_status#53] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)] ReadSchema: struct (60) ColumnarToRow [codegen id : 15] -Input [2]: [cd_demo_sk#60, cd_marital_status#61] +Input [2]: [cd_demo_sk#52, cd_marital_status#53] (61) Filter [codegen id : 15] -Input [2]: [cd_demo_sk#60, cd_marital_status#61] -Condition : (isnotnull(cd_demo_sk#60) AND isnotnull(cd_marital_status#61)) +Input [2]: [cd_demo_sk#52, cd_marital_status#53] +Condition : (isnotnull(cd_demo_sk#52) AND isnotnull(cd_marital_status#53)) (62) BroadcastExchange -Input [2]: [cd_demo_sk#60, cd_marital_status#61] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +Input [2]: [cd_demo_sk#52, cd_marital_status#53] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] (63) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_cdemo_sk#3] -Right keys [1]: [cd_demo_sk#60] +Right keys [1]: [cd_demo_sk#52] Join condition: None (64) Project [codegen id : 25] -Output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_cdemo_sk#49, c_current_hdemo_sk#50, c_current_addr_sk#51, d_year#56, d_year#59, cd_marital_status#61] -Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_cdemo_sk#49, c_current_hdemo_sk#50, c_current_addr_sk#51, d_year#56, d_year#59, cd_demo_sk#60, cd_marital_status#61] +Output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, cd_marital_status#53] +Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, cd_demo_sk#52, cd_marital_status#53] (65) ReusedExchange [Reuses operator id: 62] -Output [2]: [cd_demo_sk#63, cd_marital_status#64] +Output [2]: [cd_demo_sk#54, cd_marital_status#55] (66) BroadcastHashJoin [codegen id : 25] -Left keys [1]: [c_current_cdemo_sk#49] -Right keys [1]: [cd_demo_sk#63] -Join condition: NOT (cd_marital_status#61 = cd_marital_status#64) +Left keys [1]: [c_current_cdemo_sk#43] +Right keys [1]: [cd_demo_sk#54] +Join condition: NOT (cd_marital_status#53 = cd_marital_status#55) (67) Project [codegen id : 25] -Output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_hdemo_sk#50, c_current_addr_sk#51, d_year#56, d_year#59] -Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_cdemo_sk#49, c_current_hdemo_sk#50, c_current_addr_sk#51, d_year#56, d_year#59, cd_marital_status#61, cd_demo_sk#63, cd_marital_status#64] +Output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51] +Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, cd_marital_status#53, cd_demo_sk#54, cd_marital_status#55] (68) Scan parquet default.promotion -Output [1]: [p_promo_sk#65] +Output [1]: [p_promo_sk#56] Batched: true Location [not included in comparison]/{warehouse_dir}/promotion] PushedFilters: [IsNotNull(p_promo_sk)] ReadSchema: struct (69) ColumnarToRow [codegen id : 17] -Input [1]: [p_promo_sk#65] +Input [1]: [p_promo_sk#56] (70) Filter [codegen id : 17] -Input [1]: [p_promo_sk#65] -Condition : isnotnull(p_promo_sk#65) +Input [1]: [p_promo_sk#56] +Condition : isnotnull(p_promo_sk#56) (71) BroadcastExchange -Input [1]: [p_promo_sk#65] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#66] +Input [1]: [p_promo_sk#56] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] (72) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_promo_sk#7] -Right keys [1]: [p_promo_sk#65] +Right keys [1]: [p_promo_sk#56] Join condition: None (73) Project [codegen id : 25] -Output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_hdemo_sk#50, c_current_addr_sk#51, d_year#56, d_year#59] -Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_hdemo_sk#50, c_current_addr_sk#51, d_year#56, d_year#59, p_promo_sk#65] +Output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51] +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, p_promo_sk#56] (74) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#67, hd_income_band_sk#68] +Output [2]: [hd_demo_sk#57, hd_income_band_sk#58] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)] ReadSchema: struct (75) ColumnarToRow [codegen id : 18] -Input [2]: [hd_demo_sk#67, hd_income_band_sk#68] +Input [2]: [hd_demo_sk#57, hd_income_band_sk#58] (76) Filter [codegen id : 18] -Input [2]: [hd_demo_sk#67, hd_income_band_sk#68] -Condition : (isnotnull(hd_demo_sk#67) AND isnotnull(hd_income_band_sk#68)) +Input [2]: [hd_demo_sk#57, hd_income_band_sk#58] +Condition : (isnotnull(hd_demo_sk#57) AND isnotnull(hd_income_band_sk#58)) (77) BroadcastExchange -Input [2]: [hd_demo_sk#67, hd_income_band_sk#68] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#69] +Input [2]: [hd_demo_sk#57, hd_income_band_sk#58] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=11] (78) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_hdemo_sk#4] -Right keys [1]: [hd_demo_sk#67] +Right keys [1]: [hd_demo_sk#57] Join condition: None (79) Project [codegen id : 25] -Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_hdemo_sk#50, c_current_addr_sk#51, d_year#56, d_year#59, hd_income_band_sk#68] -Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_hdemo_sk#50, c_current_addr_sk#51, d_year#56, d_year#59, hd_demo_sk#67, hd_income_band_sk#68] +Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58] +Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, hd_demo_sk#57, hd_income_band_sk#58] (80) ReusedExchange [Reuses operator id: 77] -Output [2]: [hd_demo_sk#70, hd_income_band_sk#71] +Output [2]: [hd_demo_sk#59, hd_income_band_sk#60] (81) BroadcastHashJoin [codegen id : 25] -Left keys [1]: [c_current_hdemo_sk#50] -Right keys [1]: [hd_demo_sk#70] +Left keys [1]: [c_current_hdemo_sk#44] +Right keys [1]: [hd_demo_sk#59] Join condition: None (82) Project [codegen id : 25] -Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_addr_sk#51, d_year#56, d_year#59, hd_income_band_sk#68, hd_income_band_sk#71] -Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_hdemo_sk#50, c_current_addr_sk#51, d_year#56, d_year#59, hd_income_band_sk#68, hd_demo_sk#70, hd_income_band_sk#71] +Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60] +Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_demo_sk#59, hd_income_band_sk#60] (83) Scan parquet default.customer_address -Output [5]: [ca_address_sk#72, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76] +Output [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_address] PushedFilters: [IsNotNull(ca_address_sk)] ReadSchema: struct (84) ColumnarToRow [codegen id : 20] -Input [5]: [ca_address_sk#72, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76] +Input [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65] (85) Filter [codegen id : 20] -Input [5]: [ca_address_sk#72, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76] -Condition : isnotnull(ca_address_sk#72) +Input [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65] +Condition : isnotnull(ca_address_sk#61) (86) BroadcastExchange -Input [5]: [ca_address_sk#72, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#77] +Input [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] (87) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_addr_sk#5] -Right keys [1]: [ca_address_sk#72] +Right keys [1]: [ca_address_sk#61] Join condition: None (88) Project [codegen id : 25] -Output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_addr_sk#51, d_year#56, d_year#59, hd_income_band_sk#68, hd_income_band_sk#71, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76] -Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_addr_sk#51, d_year#56, d_year#59, hd_income_band_sk#68, hd_income_band_sk#71, ca_address_sk#72, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76] +Output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65] +Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65] (89) ReusedExchange [Reuses operator id: 86] -Output [5]: [ca_address_sk#78, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82] +Output [5]: [ca_address_sk#66, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70] (90) BroadcastHashJoin [codegen id : 25] -Left keys [1]: [c_current_addr_sk#51] -Right keys [1]: [ca_address_sk#78] +Left keys [1]: [c_current_addr_sk#45] +Right keys [1]: [ca_address_sk#66] Join condition: None (91) Project [codegen id : 25] -Output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, d_year#56, d_year#59, hd_income_band_sk#68, hd_income_band_sk#71, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82] -Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, c_current_addr_sk#51, d_year#56, d_year#59, hd_income_band_sk#68, hd_income_band_sk#71, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76, ca_address_sk#78, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82] +Output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70] +Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_address_sk#66, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70] (92) Scan parquet default.income_band -Output [1]: [ib_income_band_sk#83] +Output [1]: [ib_income_band_sk#71] Batched: true Location [not included in comparison]/{warehouse_dir}/income_band] PushedFilters: [IsNotNull(ib_income_band_sk)] ReadSchema: struct (93) ColumnarToRow [codegen id : 22] -Input [1]: [ib_income_band_sk#83] +Input [1]: [ib_income_band_sk#71] (94) Filter [codegen id : 22] -Input [1]: [ib_income_band_sk#83] -Condition : isnotnull(ib_income_band_sk#83) +Input [1]: [ib_income_band_sk#71] +Condition : isnotnull(ib_income_band_sk#71) (95) BroadcastExchange -Input [1]: [ib_income_band_sk#83] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#84] +Input [1]: [ib_income_band_sk#71] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=13] (96) BroadcastHashJoin [codegen id : 25] -Left keys [1]: [hd_income_band_sk#68] -Right keys [1]: [ib_income_band_sk#83] +Left keys [1]: [hd_income_band_sk#58] +Right keys [1]: [ib_income_band_sk#71] Join condition: None (97) Project [codegen id : 25] -Output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, d_year#56, d_year#59, hd_income_band_sk#71, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82] -Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, d_year#56, d_year#59, hd_income_band_sk#68, hd_income_band_sk#71, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, ib_income_band_sk#83] +Output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70] +Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, ib_income_band_sk#71] (98) ReusedExchange [Reuses operator id: 95] -Output [1]: [ib_income_band_sk#85] +Output [1]: [ib_income_band_sk#72] (99) BroadcastHashJoin [codegen id : 25] -Left keys [1]: [hd_income_band_sk#71] -Right keys [1]: [ib_income_band_sk#85] +Left keys [1]: [hd_income_band_sk#60] +Right keys [1]: [ib_income_band_sk#72] Join condition: None (100) Project [codegen id : 25] -Output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, d_year#56, d_year#59, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82] -Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, d_year#56, d_year#59, hd_income_band_sk#71, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, ib_income_band_sk#85] +Output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70] +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, ib_income_band_sk#72] (101) Scan parquet default.item -Output [4]: [i_item_sk#86, i_current_price#87, i_color#88, i_product_name#89] +Output [4]: [i_item_sk#73, i_current_price#74, i_color#75, i_product_name#76] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_current_price), In(i_color, [burlywood ,floral ,indian ,medium ,purple ,spring ]), GreaterThanOrEqual(i_current_price,64.00), LessThanOrEqual(i_current_price,74.00), GreaterThanOrEqual(i_current_price,65.00), LessThanOrEqual(i_current_price,79.00), IsNotNull(i_item_sk)] ReadSchema: struct (102) ColumnarToRow [codegen id : 24] -Input [4]: [i_item_sk#86, i_current_price#87, i_color#88, i_product_name#89] +Input [4]: [i_item_sk#73, i_current_price#74, i_color#75, i_product_name#76] (103) Filter [codegen id : 24] -Input [4]: [i_item_sk#86, i_current_price#87, i_color#88, i_product_name#89] -Condition : ((((((isnotnull(i_current_price#87) AND i_color#88 IN (purple ,burlywood ,indian ,spring ,floral ,medium )) AND (i_current_price#87 >= 64.00)) AND (i_current_price#87 <= 74.00)) AND (i_current_price#87 >= 65.00)) AND (i_current_price#87 <= 79.00)) AND isnotnull(i_item_sk#86)) +Input [4]: [i_item_sk#73, i_current_price#74, i_color#75, i_product_name#76] +Condition : ((((((isnotnull(i_current_price#74) AND i_color#75 IN (purple ,burlywood ,indian ,spring ,floral ,medium )) AND (i_current_price#74 >= 64.00)) AND (i_current_price#74 <= 74.00)) AND (i_current_price#74 >= 65.00)) AND (i_current_price#74 <= 79.00)) AND isnotnull(i_item_sk#73)) (104) Project [codegen id : 24] -Output [2]: [i_item_sk#86, i_product_name#89] -Input [4]: [i_item_sk#86, i_current_price#87, i_color#88, i_product_name#89] +Output [2]: [i_item_sk#73, i_product_name#76] +Input [4]: [i_item_sk#73, i_current_price#74, i_color#75, i_product_name#76] (105) BroadcastExchange -Input [2]: [i_item_sk#86, i_product_name#89] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#90] +Input [2]: [i_item_sk#73, i_product_name#76] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14] (106) BroadcastHashJoin [codegen id : 25] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#86] +Right keys [1]: [i_item_sk#73] Join condition: None (107) Project [codegen id : 25] -Output [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, d_year#56, d_year#59, s_store_name#45, s_zip#46, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, i_item_sk#86, i_product_name#89] -Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#45, s_zip#46, d_year#56, d_year#59, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, i_item_sk#86, i_product_name#89] +Output [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, d_year#49, d_year#51, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, i_item_sk#73, i_product_name#76] +Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, i_item_sk#73, i_product_name#76] (108) HashAggregate [codegen id : 25] -Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, d_year#56, d_year#59, s_store_name#45, s_zip#46, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, i_item_sk#86, i_product_name#89] -Keys [15]: [i_product_name#89, i_item_sk#86, s_store_name#45, s_zip#46, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, d_year#43, d_year#56, d_year#59] +Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, d_year#49, d_year#51, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, i_item_sk#73, i_product_name#76] +Keys [15]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51] Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#9)), partial_sum(UnscaledValue(ss_list_price#10)), partial_sum(UnscaledValue(ss_coupon_amt#11))] -Aggregate Attributes [4]: [count#91, sum#92, sum#93, sum#94] -Results [19]: [i_product_name#89, i_item_sk#86, s_store_name#45, s_zip#46, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, d_year#43, d_year#56, d_year#59, count#95, sum#96, sum#97, sum#98] +Aggregate Attributes [4]: [count#77, sum#78, sum#79, sum#80] +Results [19]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51, count#81, sum#82, sum#83, sum#84] (109) HashAggregate [codegen id : 25] -Input [19]: [i_product_name#89, i_item_sk#86, s_store_name#45, s_zip#46, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, d_year#43, d_year#56, d_year#59, count#95, sum#96, sum#97, sum#98] -Keys [15]: [i_product_name#89, i_item_sk#86, s_store_name#45, s_zip#46, ca_street_number#73, ca_street_name#74, ca_city#75, ca_zip#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, d_year#43, d_year#56, d_year#59] +Input [19]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51, count#81, sum#82, sum#83, sum#84] +Keys [15]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51] Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#9)), sum(UnscaledValue(ss_list_price#10)), sum(UnscaledValue(ss_coupon_amt#11))] -Aggregate Attributes [4]: [count(1)#99, sum(UnscaledValue(ss_wholesale_cost#9))#100, sum(UnscaledValue(ss_list_price#10))#101, sum(UnscaledValue(ss_coupon_amt#11))#102] -Results [17]: [i_product_name#89 AS product_name#103, i_item_sk#86 AS item_sk#104, s_store_name#45 AS store_name#105, s_zip#46 AS store_zip#106, ca_street_number#73 AS b_street_number#107, ca_street_name#74 AS b_streen_name#108, ca_city#75 AS b_city#109, ca_zip#76 AS b_zip#110, ca_street_number#79 AS c_street_number#111, ca_street_name#80 AS c_street_name#112, ca_city#81 AS c_city#113, ca_zip#82 AS c_zip#114, d_year#43 AS syear#115, count(1)#99 AS cnt#116, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#9))#100,17,2) AS s1#117, MakeDecimal(sum(UnscaledValue(ss_list_price#10))#101,17,2) AS s2#118, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#11))#102,17,2) AS s3#119] +Aggregate Attributes [4]: [count(1)#85, sum(UnscaledValue(ss_wholesale_cost#9))#86, sum(UnscaledValue(ss_list_price#10))#87, sum(UnscaledValue(ss_coupon_amt#11))#88] +Results [17]: [i_product_name#76 AS product_name#89, i_item_sk#73 AS item_sk#90, s_store_name#40 AS store_name#91, s_zip#41 AS store_zip#92, ca_street_number#62 AS b_street_number#93, ca_street_name#63 AS b_streen_name#94, ca_city#64 AS b_city#95, ca_zip#65 AS b_zip#96, ca_street_number#67 AS c_street_number#97, ca_street_name#68 AS c_street_name#98, ca_city#69 AS c_city#99, ca_zip#70 AS c_zip#100, d_year#38 AS syear#101, count(1)#85 AS cnt#102, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#9))#86,17,2) AS s1#103, MakeDecimal(sum(UnscaledValue(ss_list_price#10))#87,17,2) AS s2#104, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#11))#88,17,2) AS s3#105] (110) Exchange -Input [17]: [product_name#103, item_sk#104, store_name#105, store_zip#106, b_street_number#107, b_streen_name#108, b_city#109, b_zip#110, c_street_number#111, c_street_name#112, c_city#113, c_zip#114, syear#115, cnt#116, s1#117, s2#118, s3#119] -Arguments: hashpartitioning(item_sk#104, store_name#105, store_zip#106, 5), ENSURE_REQUIREMENTS, [id=#120] +Input [17]: [product_name#89, item_sk#90, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105] +Arguments: hashpartitioning(item_sk#90, store_name#91, store_zip#92, 5), ENSURE_REQUIREMENTS, [plan_id=15] (111) Sort [codegen id : 26] -Input [17]: [product_name#103, item_sk#104, store_name#105, store_zip#106, b_street_number#107, b_streen_name#108, b_city#109, b_zip#110, c_street_number#111, c_street_name#112, c_city#113, c_zip#114, syear#115, cnt#116, s1#117, s2#118, s3#119] -Arguments: [item_sk#104 ASC NULLS FIRST, store_name#105 ASC NULLS FIRST, store_zip#106 ASC NULLS FIRST], false, 0 +Input [17]: [product_name#89, item_sk#90, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105] +Arguments: [item_sk#90 ASC NULLS FIRST, store_name#91 ASC NULLS FIRST, store_zip#92 ASC NULLS FIRST], false, 0 (112) Scan parquet default.store_sales -Output [12]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_ticket_number#128, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132] +Output [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#132), dynamicpruningexpression(ss_sold_date_sk#132 IN dynamicpruning#133)] +PartitionFilters: [isnotnull(ss_sold_date_sk#117), dynamicpruningexpression(ss_sold_date_sk#117 IN dynamicpruning#118)] PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)] ReadSchema: struct (113) ColumnarToRow [codegen id : 27] -Input [12]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_ticket_number#128, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132] +Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117] (114) Filter [codegen id : 27] -Input [12]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_ticket_number#128, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132] -Condition : (((((((isnotnull(ss_item_sk#121) AND isnotnull(ss_ticket_number#128)) AND isnotnull(ss_store_sk#126)) AND isnotnull(ss_customer_sk#122)) AND isnotnull(ss_cdemo_sk#123)) AND isnotnull(ss_promo_sk#127)) AND isnotnull(ss_hdemo_sk#124)) AND isnotnull(ss_addr_sk#125)) +Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117] +Condition : (((((((isnotnull(ss_item_sk#106) AND isnotnull(ss_ticket_number#113)) AND isnotnull(ss_store_sk#111)) AND isnotnull(ss_customer_sk#107)) AND isnotnull(ss_cdemo_sk#108)) AND isnotnull(ss_promo_sk#112)) AND isnotnull(ss_hdemo_sk#109)) AND isnotnull(ss_addr_sk#110)) (115) BroadcastExchange -Input [12]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_ticket_number#128, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[7, int, false] as bigint) & 4294967295))),false), [id=#134] +Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[7, int, false] as bigint) & 4294967295))),false), [plan_id=16] (116) Scan parquet default.store_returns -Output [3]: [sr_item_sk#135, sr_ticket_number#136, sr_returned_date_sk#137] +Output [3]: [sr_item_sk#119, sr_ticket_number#120, sr_returned_date_sk#121] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] ReadSchema: struct (117) ColumnarToRow -Input [3]: [sr_item_sk#135, sr_ticket_number#136, sr_returned_date_sk#137] +Input [3]: [sr_item_sk#119, sr_ticket_number#120, sr_returned_date_sk#121] (118) Filter -Input [3]: [sr_item_sk#135, sr_ticket_number#136, sr_returned_date_sk#137] -Condition : (isnotnull(sr_item_sk#135) AND isnotnull(sr_ticket_number#136)) +Input [3]: [sr_item_sk#119, sr_ticket_number#120, sr_returned_date_sk#121] +Condition : (isnotnull(sr_item_sk#119) AND isnotnull(sr_ticket_number#120)) (119) Project -Output [2]: [sr_item_sk#135, sr_ticket_number#136] -Input [3]: [sr_item_sk#135, sr_ticket_number#136, sr_returned_date_sk#137] +Output [2]: [sr_item_sk#119, sr_ticket_number#120] +Input [3]: [sr_item_sk#119, sr_ticket_number#120, sr_returned_date_sk#121] (120) BroadcastHashJoin [codegen id : 28] -Left keys [2]: [ss_item_sk#121, ss_ticket_number#128] -Right keys [2]: [sr_item_sk#135, sr_ticket_number#136] +Left keys [2]: [ss_item_sk#106, ss_ticket_number#113] +Right keys [2]: [sr_item_sk#119, sr_ticket_number#120] Join condition: None (121) Project [codegen id : 28] -Output [11]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132] -Input [14]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_ticket_number#128, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132, sr_item_sk#135, sr_ticket_number#136] +Output [11]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117] +Input [14]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117, sr_item_sk#119, sr_ticket_number#120] (122) Exchange -Input [11]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132] -Arguments: hashpartitioning(ss_item_sk#121, 5), ENSURE_REQUIREMENTS, [id=#138] +Input [11]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117] +Arguments: hashpartitioning(ss_item_sk#106, 5), ENSURE_REQUIREMENTS, [plan_id=17] (123) Sort [codegen id : 29] -Input [11]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132] -Arguments: [ss_item_sk#121 ASC NULLS FIRST], false, 0 +Input [11]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117] +Arguments: [ss_item_sk#106 ASC NULLS FIRST], false, 0 (124) ReusedExchange [Reuses operator id: 28] -Output [4]: [cs_item_sk#139, sum#140, sum#141, isEmpty#142] +Output [4]: [cs_item_sk#122, sum#123, sum#124, isEmpty#125] (125) HashAggregate [codegen id : 35] -Input [4]: [cs_item_sk#139, sum#140, sum#141, isEmpty#142] -Keys [1]: [cs_item_sk#139] -Functions [2]: [sum(UnscaledValue(cs_ext_list_price#143)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#144 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#145 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#146 as decimal(9,2)))), DecimalType(9,2)))] -Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#143))#38, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#144 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#145 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#146 as decimal(9,2)))), DecimalType(9,2)))#39] -Results [3]: [cs_item_sk#139, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#143))#38,17,2) AS sale#40, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#144 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#145 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#146 as decimal(9,2)))), DecimalType(9,2)))#39 AS refund#41] +Input [4]: [cs_item_sk#122, sum#123, sum#124, isEmpty#125] +Keys [1]: [cs_item_sk#122] +Functions [2]: [sum(UnscaledValue(cs_ext_list_price#126)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#127 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#128 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#129 as decimal(9,2)))), DecimalType(9,2)))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#126))#33, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#127 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#128 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#129 as decimal(9,2)))), DecimalType(9,2)))#34] +Results [3]: [cs_item_sk#122, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#126))#33,17,2) AS sale#35, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#127 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#128 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#129 as decimal(9,2)))), DecimalType(9,2)))#34 AS refund#36] (126) Filter [codegen id : 35] -Input [3]: [cs_item_sk#139, sale#40, refund#41] -Condition : (isnotnull(sale#40) AND (cast(sale#40 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(refund#41)), DecimalType(21,2)))) +Input [3]: [cs_item_sk#122, sale#35, refund#36] +Condition : (isnotnull(sale#35) AND (cast(sale#35 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(refund#36)), DecimalType(21,2)))) (127) Project [codegen id : 35] -Output [1]: [cs_item_sk#139] -Input [3]: [cs_item_sk#139, sale#40, refund#41] +Output [1]: [cs_item_sk#122] +Input [3]: [cs_item_sk#122, sale#35, refund#36] (128) Sort [codegen id : 35] -Input [1]: [cs_item_sk#139] -Arguments: [cs_item_sk#139 ASC NULLS FIRST], false, 0 +Input [1]: [cs_item_sk#122] +Arguments: [cs_item_sk#122 ASC NULLS FIRST], false, 0 (129) SortMergeJoin [codegen id : 51] -Left keys [1]: [ss_item_sk#121] -Right keys [1]: [cs_item_sk#139] +Left keys [1]: [ss_item_sk#106] +Right keys [1]: [cs_item_sk#122] Join condition: None (130) Project [codegen id : 51] -Output [11]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132] -Input [12]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132, cs_item_sk#139] +Output [11]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117] +Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117, cs_item_sk#122] (131) ReusedExchange [Reuses operator id: 191] -Output [2]: [d_date_sk#147, d_year#148] +Output [2]: [d_date_sk#130, d_year#131] (132) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [ss_sold_date_sk#132] -Right keys [1]: [d_date_sk#147] +Left keys [1]: [ss_sold_date_sk#117] +Right keys [1]: [d_date_sk#130] Join condition: None (133) Project [codegen id : 51] -Output [11]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148] -Input [13]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, ss_sold_date_sk#132, d_date_sk#147, d_year#148] +Output [11]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131] +Input [13]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117, d_date_sk#130, d_year#131] (134) ReusedExchange [Reuses operator id: 41] -Output [3]: [s_store_sk#149, s_store_name#150, s_zip#151] +Output [3]: [s_store_sk#132, s_store_name#133, s_zip#134] (135) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [ss_store_sk#126] -Right keys [1]: [s_store_sk#149] +Left keys [1]: [ss_store_sk#111] +Right keys [1]: [s_store_sk#132] Join condition: None (136) Project [codegen id : 51] -Output [12]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151] -Input [14]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_store_sk#126, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_sk#149, s_store_name#150, s_zip#151] +Output [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134] +Input [14]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_sk#132, s_store_name#133, s_zip#134] (137) ReusedExchange [Reuses operator id: 47] -Output [6]: [c_customer_sk#152, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, c_first_shipto_date_sk#156, c_first_sales_date_sk#157] +Output [6]: [c_customer_sk#135, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, c_first_shipto_date_sk#139, c_first_sales_date_sk#140] (138) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [ss_customer_sk#122] -Right keys [1]: [c_customer_sk#152] +Left keys [1]: [ss_customer_sk#107] +Right keys [1]: [c_customer_sk#135] Join condition: None (139) Project [codegen id : 51] -Output [16]: [ss_item_sk#121, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, c_first_shipto_date_sk#156, c_first_sales_date_sk#157] -Input [18]: [ss_item_sk#121, ss_customer_sk#122, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_customer_sk#152, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, c_first_shipto_date_sk#156, c_first_sales_date_sk#157] +Output [16]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, c_first_shipto_date_sk#139, c_first_sales_date_sk#140] +Input [18]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_customer_sk#135, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, c_first_shipto_date_sk#139, c_first_sales_date_sk#140] (140) ReusedExchange [Reuses operator id: 53] -Output [2]: [d_date_sk#158, d_year#159] +Output [2]: [d_date_sk#141, d_year#142] (141) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [c_first_sales_date_sk#157] -Right keys [1]: [d_date_sk#158] +Left keys [1]: [c_first_sales_date_sk#140] +Right keys [1]: [d_date_sk#141] Join condition: None (142) Project [codegen id : 51] -Output [16]: [ss_item_sk#121, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, c_first_shipto_date_sk#156, d_year#159] -Input [18]: [ss_item_sk#121, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, c_first_shipto_date_sk#156, c_first_sales_date_sk#157, d_date_sk#158, d_year#159] +Output [16]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, c_first_shipto_date_sk#139, d_year#142] +Input [18]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, c_first_shipto_date_sk#139, c_first_sales_date_sk#140, d_date_sk#141, d_year#142] (143) ReusedExchange [Reuses operator id: 53] -Output [2]: [d_date_sk#160, d_year#161] +Output [2]: [d_date_sk#143, d_year#144] (144) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [c_first_shipto_date_sk#156] -Right keys [1]: [d_date_sk#160] +Left keys [1]: [c_first_shipto_date_sk#139] +Right keys [1]: [d_date_sk#143] Join condition: None (145) Project [codegen id : 51] -Output [16]: [ss_item_sk#121, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161] -Input [18]: [ss_item_sk#121, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, c_first_shipto_date_sk#156, d_year#159, d_date_sk#160, d_year#161] +Output [16]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144] +Input [18]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, c_first_shipto_date_sk#139, d_year#142, d_date_sk#143, d_year#144] (146) ReusedExchange [Reuses operator id: 62] -Output [2]: [cd_demo_sk#162, cd_marital_status#163] +Output [2]: [cd_demo_sk#145, cd_marital_status#146] (147) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [ss_cdemo_sk#123] -Right keys [1]: [cd_demo_sk#162] +Left keys [1]: [ss_cdemo_sk#108] +Right keys [1]: [cd_demo_sk#145] Join condition: None (148) Project [codegen id : 51] -Output [16]: [ss_item_sk#121, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161, cd_marital_status#163] -Input [18]: [ss_item_sk#121, ss_cdemo_sk#123, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161, cd_demo_sk#162, cd_marital_status#163] +Output [16]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144, cd_marital_status#146] +Input [18]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144, cd_demo_sk#145, cd_marital_status#146] (149) ReusedExchange [Reuses operator id: 62] -Output [2]: [cd_demo_sk#164, cd_marital_status#165] +Output [2]: [cd_demo_sk#147, cd_marital_status#148] (150) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [c_current_cdemo_sk#153] -Right keys [1]: [cd_demo_sk#164] -Join condition: NOT (cd_marital_status#163 = cd_marital_status#165) +Left keys [1]: [c_current_cdemo_sk#136] +Right keys [1]: [cd_demo_sk#147] +Join condition: NOT (cd_marital_status#146 = cd_marital_status#148) (151) Project [codegen id : 51] -Output [14]: [ss_item_sk#121, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161] -Input [18]: [ss_item_sk#121, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_cdemo_sk#153, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161, cd_marital_status#163, cd_demo_sk#164, cd_marital_status#165] +Output [14]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144] +Input [18]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144, cd_marital_status#146, cd_demo_sk#147, cd_marital_status#148] (152) ReusedExchange [Reuses operator id: 71] -Output [1]: [p_promo_sk#166] +Output [1]: [p_promo_sk#149] (153) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [ss_promo_sk#127] -Right keys [1]: [p_promo_sk#166] +Left keys [1]: [ss_promo_sk#112] +Right keys [1]: [p_promo_sk#149] Join condition: None (154) Project [codegen id : 51] -Output [13]: [ss_item_sk#121, ss_hdemo_sk#124, ss_addr_sk#125, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161] -Input [15]: [ss_item_sk#121, ss_hdemo_sk#124, ss_addr_sk#125, ss_promo_sk#127, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161, p_promo_sk#166] +Output [13]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144] +Input [15]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144, p_promo_sk#149] (155) ReusedExchange [Reuses operator id: 77] -Output [2]: [hd_demo_sk#167, hd_income_band_sk#168] +Output [2]: [hd_demo_sk#150, hd_income_band_sk#151] (156) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [ss_hdemo_sk#124] -Right keys [1]: [hd_demo_sk#167] +Left keys [1]: [ss_hdemo_sk#109] +Right keys [1]: [hd_demo_sk#150] Join condition: None (157) Project [codegen id : 51] -Output [13]: [ss_item_sk#121, ss_addr_sk#125, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161, hd_income_band_sk#168] -Input [15]: [ss_item_sk#121, ss_hdemo_sk#124, ss_addr_sk#125, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161, hd_demo_sk#167, hd_income_band_sk#168] +Output [13]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144, hd_income_band_sk#151] +Input [15]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144, hd_demo_sk#150, hd_income_band_sk#151] (158) ReusedExchange [Reuses operator id: 77] -Output [2]: [hd_demo_sk#169, hd_income_band_sk#170] +Output [2]: [hd_demo_sk#152, hd_income_band_sk#153] (159) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [c_current_hdemo_sk#154] -Right keys [1]: [hd_demo_sk#169] +Left keys [1]: [c_current_hdemo_sk#137] +Right keys [1]: [hd_demo_sk#152] Join condition: None (160) Project [codegen id : 51] -Output [13]: [ss_item_sk#121, ss_addr_sk#125, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_addr_sk#155, d_year#159, d_year#161, hd_income_band_sk#168, hd_income_band_sk#170] -Input [15]: [ss_item_sk#121, ss_addr_sk#125, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_hdemo_sk#154, c_current_addr_sk#155, d_year#159, d_year#161, hd_income_band_sk#168, hd_demo_sk#169, hd_income_band_sk#170] +Output [13]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_addr_sk#138, d_year#142, d_year#144, hd_income_band_sk#151, hd_income_band_sk#153] +Input [15]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144, hd_income_band_sk#151, hd_demo_sk#152, hd_income_band_sk#153] (161) ReusedExchange [Reuses operator id: 86] -Output [5]: [ca_address_sk#171, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175] +Output [5]: [ca_address_sk#154, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158] (162) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [ss_addr_sk#125] -Right keys [1]: [ca_address_sk#171] +Left keys [1]: [ss_addr_sk#110] +Right keys [1]: [ca_address_sk#154] Join condition: None (163) Project [codegen id : 51] -Output [16]: [ss_item_sk#121, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_addr_sk#155, d_year#159, d_year#161, hd_income_band_sk#168, hd_income_band_sk#170, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175] -Input [18]: [ss_item_sk#121, ss_addr_sk#125, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_addr_sk#155, d_year#159, d_year#161, hd_income_band_sk#168, hd_income_band_sk#170, ca_address_sk#171, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175] +Output [16]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_addr_sk#138, d_year#142, d_year#144, hd_income_band_sk#151, hd_income_band_sk#153, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158] +Input [18]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_addr_sk#138, d_year#142, d_year#144, hd_income_band_sk#151, hd_income_band_sk#153, ca_address_sk#154, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158] (164) ReusedExchange [Reuses operator id: 86] -Output [5]: [ca_address_sk#176, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180] +Output [5]: [ca_address_sk#159, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163] (165) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [c_current_addr_sk#155] -Right keys [1]: [ca_address_sk#176] +Left keys [1]: [c_current_addr_sk#138] +Right keys [1]: [ca_address_sk#159] Join condition: None (166) Project [codegen id : 51] -Output [19]: [ss_item_sk#121, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, d_year#159, d_year#161, hd_income_band_sk#168, hd_income_band_sk#170, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180] -Input [21]: [ss_item_sk#121, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, c_current_addr_sk#155, d_year#159, d_year#161, hd_income_band_sk#168, hd_income_band_sk#170, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_address_sk#176, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180] +Output [19]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, d_year#142, d_year#144, hd_income_band_sk#151, hd_income_band_sk#153, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163] +Input [21]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_addr_sk#138, d_year#142, d_year#144, hd_income_band_sk#151, hd_income_band_sk#153, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_address_sk#159, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163] (167) ReusedExchange [Reuses operator id: 95] -Output [1]: [ib_income_band_sk#181] +Output [1]: [ib_income_band_sk#164] (168) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [hd_income_band_sk#168] -Right keys [1]: [ib_income_band_sk#181] +Left keys [1]: [hd_income_band_sk#151] +Right keys [1]: [ib_income_band_sk#164] Join condition: None (169) Project [codegen id : 51] -Output [18]: [ss_item_sk#121, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, d_year#159, d_year#161, hd_income_band_sk#170, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180] -Input [20]: [ss_item_sk#121, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, d_year#159, d_year#161, hd_income_band_sk#168, hd_income_band_sk#170, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, ib_income_band_sk#181] +Output [18]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, d_year#142, d_year#144, hd_income_band_sk#153, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163] +Input [20]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, d_year#142, d_year#144, hd_income_band_sk#151, hd_income_band_sk#153, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, ib_income_band_sk#164] (170) ReusedExchange [Reuses operator id: 95] -Output [1]: [ib_income_band_sk#182] +Output [1]: [ib_income_band_sk#165] (171) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [hd_income_band_sk#170] -Right keys [1]: [ib_income_band_sk#182] +Left keys [1]: [hd_income_band_sk#153] +Right keys [1]: [ib_income_band_sk#165] Join condition: None (172) Project [codegen id : 51] -Output [17]: [ss_item_sk#121, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, d_year#159, d_year#161, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180] -Input [19]: [ss_item_sk#121, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, d_year#159, d_year#161, hd_income_band_sk#170, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, ib_income_band_sk#182] +Output [17]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, d_year#142, d_year#144, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163] +Input [19]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, d_year#142, d_year#144, hd_income_band_sk#153, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, ib_income_band_sk#165] (173) ReusedExchange [Reuses operator id: 105] -Output [2]: [i_item_sk#183, i_product_name#184] +Output [2]: [i_item_sk#166, i_product_name#167] (174) BroadcastHashJoin [codegen id : 51] -Left keys [1]: [ss_item_sk#121] -Right keys [1]: [i_item_sk#183] +Left keys [1]: [ss_item_sk#106] +Right keys [1]: [i_item_sk#166] Join condition: None (175) Project [codegen id : 51] -Output [18]: [ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, d_year#159, d_year#161, s_store_name#150, s_zip#151, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, i_item_sk#183, i_product_name#184] -Input [19]: [ss_item_sk#121, ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, s_store_name#150, s_zip#151, d_year#159, d_year#161, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, i_item_sk#183, i_product_name#184] +Output [18]: [ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, d_year#142, d_year#144, s_store_name#133, s_zip#134, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, i_item_sk#166, i_product_name#167] +Input [19]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, d_year#142, d_year#144, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, i_item_sk#166, i_product_name#167] (176) HashAggregate [codegen id : 51] -Input [18]: [ss_wholesale_cost#129, ss_list_price#130, ss_coupon_amt#131, d_year#148, d_year#159, d_year#161, s_store_name#150, s_zip#151, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, i_item_sk#183, i_product_name#184] -Keys [15]: [i_product_name#184, i_item_sk#183, s_store_name#150, s_zip#151, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, d_year#148, d_year#159, d_year#161] -Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#129)), partial_sum(UnscaledValue(ss_list_price#130)), partial_sum(UnscaledValue(ss_coupon_amt#131))] -Aggregate Attributes [4]: [count#91, sum#185, sum#186, sum#187] -Results [19]: [i_product_name#184, i_item_sk#183, s_store_name#150, s_zip#151, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, d_year#148, d_year#159, d_year#161, count#95, sum#188, sum#189, sum#190] +Input [18]: [ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, d_year#142, d_year#144, s_store_name#133, s_zip#134, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, i_item_sk#166, i_product_name#167] +Keys [15]: [i_product_name#167, i_item_sk#166, s_store_name#133, s_zip#134, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, d_year#131, d_year#142, d_year#144] +Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#114)), partial_sum(UnscaledValue(ss_list_price#115)), partial_sum(UnscaledValue(ss_coupon_amt#116))] +Aggregate Attributes [4]: [count#77, sum#168, sum#169, sum#170] +Results [19]: [i_product_name#167, i_item_sk#166, s_store_name#133, s_zip#134, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, d_year#131, d_year#142, d_year#144, count#81, sum#171, sum#172, sum#173] (177) HashAggregate [codegen id : 51] -Input [19]: [i_product_name#184, i_item_sk#183, s_store_name#150, s_zip#151, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, d_year#148, d_year#159, d_year#161, count#95, sum#188, sum#189, sum#190] -Keys [15]: [i_product_name#184, i_item_sk#183, s_store_name#150, s_zip#151, ca_street_number#172, ca_street_name#173, ca_city#174, ca_zip#175, ca_street_number#177, ca_street_name#178, ca_city#179, ca_zip#180, d_year#148, d_year#159, d_year#161] -Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#129)), sum(UnscaledValue(ss_list_price#130)), sum(UnscaledValue(ss_coupon_amt#131))] -Aggregate Attributes [4]: [count(1)#99, sum(UnscaledValue(ss_wholesale_cost#129))#100, sum(UnscaledValue(ss_list_price#130))#101, sum(UnscaledValue(ss_coupon_amt#131))#102] -Results [8]: [i_item_sk#183 AS item_sk#191, s_store_name#150 AS store_name#192, s_zip#151 AS store_zip#193, d_year#148 AS syear#194, count(1)#99 AS cnt#195, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#129))#100,17,2) AS s1#196, MakeDecimal(sum(UnscaledValue(ss_list_price#130))#101,17,2) AS s2#197, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#131))#102,17,2) AS s3#198] +Input [19]: [i_product_name#167, i_item_sk#166, s_store_name#133, s_zip#134, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, d_year#131, d_year#142, d_year#144, count#81, sum#171, sum#172, sum#173] +Keys [15]: [i_product_name#167, i_item_sk#166, s_store_name#133, s_zip#134, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, d_year#131, d_year#142, d_year#144] +Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#114)), sum(UnscaledValue(ss_list_price#115)), sum(UnscaledValue(ss_coupon_amt#116))] +Aggregate Attributes [4]: [count(1)#85, sum(UnscaledValue(ss_wholesale_cost#114))#86, sum(UnscaledValue(ss_list_price#115))#87, sum(UnscaledValue(ss_coupon_amt#116))#88] +Results [8]: [i_item_sk#166 AS item_sk#174, s_store_name#133 AS store_name#175, s_zip#134 AS store_zip#176, d_year#131 AS syear#177, count(1)#85 AS cnt#178, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#114))#86,17,2) AS s1#179, MakeDecimal(sum(UnscaledValue(ss_list_price#115))#87,17,2) AS s2#180, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#116))#88,17,2) AS s3#181] (178) Exchange -Input [8]: [item_sk#191, store_name#192, store_zip#193, syear#194, cnt#195, s1#196, s2#197, s3#198] -Arguments: hashpartitioning(item_sk#191, store_name#192, store_zip#193, 5), ENSURE_REQUIREMENTS, [id=#199] +Input [8]: [item_sk#174, store_name#175, store_zip#176, syear#177, cnt#178, s1#179, s2#180, s3#181] +Arguments: hashpartitioning(item_sk#174, store_name#175, store_zip#176, 5), ENSURE_REQUIREMENTS, [plan_id=18] (179) Sort [codegen id : 52] -Input [8]: [item_sk#191, store_name#192, store_zip#193, syear#194, cnt#195, s1#196, s2#197, s3#198] -Arguments: [item_sk#191 ASC NULLS FIRST, store_name#192 ASC NULLS FIRST, store_zip#193 ASC NULLS FIRST], false, 0 +Input [8]: [item_sk#174, store_name#175, store_zip#176, syear#177, cnt#178, s1#179, s2#180, s3#181] +Arguments: [item_sk#174 ASC NULLS FIRST, store_name#175 ASC NULLS FIRST, store_zip#176 ASC NULLS FIRST], false, 0 (180) SortMergeJoin [codegen id : 53] -Left keys [3]: [item_sk#104, store_name#105, store_zip#106] -Right keys [3]: [item_sk#191, store_name#192, store_zip#193] -Join condition: (cnt#195 <= cnt#116) +Left keys [3]: [item_sk#90, store_name#91, store_zip#92] +Right keys [3]: [item_sk#174, store_name#175, store_zip#176] +Join condition: (cnt#178 <= cnt#102) (181) Project [codegen id : 53] -Output [21]: [product_name#103, store_name#105, store_zip#106, b_street_number#107, b_streen_name#108, b_city#109, b_zip#110, c_street_number#111, c_street_name#112, c_city#113, c_zip#114, syear#115, cnt#116, s1#117, s2#118, s3#119, s1#196, s2#197, s3#198, syear#194, cnt#195] -Input [25]: [product_name#103, item_sk#104, store_name#105, store_zip#106, b_street_number#107, b_streen_name#108, b_city#109, b_zip#110, c_street_number#111, c_street_name#112, c_city#113, c_zip#114, syear#115, cnt#116, s1#117, s2#118, s3#119, item_sk#191, store_name#192, store_zip#193, syear#194, cnt#195, s1#196, s2#197, s3#198] +Output [21]: [product_name#89, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105, s1#179, s2#180, s3#181, syear#177, cnt#178] +Input [25]: [product_name#89, item_sk#90, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105, item_sk#174, store_name#175, store_zip#176, syear#177, cnt#178, s1#179, s2#180, s3#181] (182) Exchange -Input [21]: [product_name#103, store_name#105, store_zip#106, b_street_number#107, b_streen_name#108, b_city#109, b_zip#110, c_street_number#111, c_street_name#112, c_city#113, c_zip#114, syear#115, cnt#116, s1#117, s2#118, s3#119, s1#196, s2#197, s3#198, syear#194, cnt#195] -Arguments: rangepartitioning(product_name#103 ASC NULLS FIRST, store_name#105 ASC NULLS FIRST, cnt#195 ASC NULLS FIRST, s1#117 ASC NULLS FIRST, s1#196 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#200] +Input [21]: [product_name#89, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105, s1#179, s2#180, s3#181, syear#177, cnt#178] +Arguments: rangepartitioning(product_name#89 ASC NULLS FIRST, store_name#91 ASC NULLS FIRST, cnt#178 ASC NULLS FIRST, s1#103 ASC NULLS FIRST, s1#179 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=19] (183) Sort [codegen id : 54] -Input [21]: [product_name#103, store_name#105, store_zip#106, b_street_number#107, b_streen_name#108, b_city#109, b_zip#110, c_street_number#111, c_street_name#112, c_city#113, c_zip#114, syear#115, cnt#116, s1#117, s2#118, s3#119, s1#196, s2#197, s3#198, syear#194, cnt#195] -Arguments: [product_name#103 ASC NULLS FIRST, store_name#105 ASC NULLS FIRST, cnt#195 ASC NULLS FIRST, s1#117 ASC NULLS FIRST, s1#196 ASC NULLS FIRST], true, 0 +Input [21]: [product_name#89, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105, s1#179, s2#180, s3#181, syear#177, cnt#178] +Arguments: [product_name#89 ASC NULLS FIRST, store_name#91 ASC NULLS FIRST, cnt#178 ASC NULLS FIRST, s1#103 ASC NULLS FIRST, s1#179 ASC NULLS FIRST], true, 0 ===== Subqueries ===== @@ -993,24 +993,24 @@ BroadcastExchange (187) (184) Scan parquet default.date_dim -Output [2]: [d_date_sk#42, d_year#43] +Output [2]: [d_date_sk#37, d_year#38] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)] ReadSchema: struct (185) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#42, d_year#43] +Input [2]: [d_date_sk#37, d_year#38] (186) Filter [codegen id : 1] -Input [2]: [d_date_sk#42, d_year#43] -Condition : ((isnotnull(d_year#43) AND (d_year#43 = 1999)) AND isnotnull(d_date_sk#42)) +Input [2]: [d_date_sk#37, d_year#38] +Condition : ((isnotnull(d_year#38) AND (d_year#38 = 1999)) AND isnotnull(d_date_sk#37)) (187) BroadcastExchange -Input [2]: [d_date_sk#42, d_year#43] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#201] +Input [2]: [d_date_sk#37, d_year#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=20] -Subquery:2 Hosting operator id = 112 Hosting Expression = ss_sold_date_sk#132 IN dynamicpruning#133 +Subquery:2 Hosting operator id = 112 Hosting Expression = ss_sold_date_sk#117 IN dynamicpruning#118 BroadcastExchange (191) +- * Filter (190) +- * ColumnarToRow (189) @@ -1018,21 +1018,21 @@ BroadcastExchange (191) (188) Scan parquet default.date_dim -Output [2]: [d_date_sk#147, d_year#148] +Output [2]: [d_date_sk#130, d_year#131] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (189) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#147, d_year#148] +Input [2]: [d_date_sk#130, d_year#131] (190) Filter [codegen id : 1] -Input [2]: [d_date_sk#147, d_year#148] -Condition : ((isnotnull(d_year#148) AND (d_year#148 = 2000)) AND isnotnull(d_date_sk#147)) +Input [2]: [d_date_sk#130, d_year#131] +Condition : ((isnotnull(d_year#131) AND (d_year#131 = 2000)) AND isnotnull(d_date_sk#130)) (191) BroadcastExchange -Input [2]: [d_date_sk#147, d_year#148] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#202] +Input [2]: [d_date_sk#130, d_year#131] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=21] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt index 00d9676dc2ec9..537414941ac60 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt @@ -114,7 +114,7 @@ Condition : isnotnull(s_store_sk#11) (10) BroadcastExchange Input [2]: [s_store_sk#11, s_store_id#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_store_sk#2] @@ -127,306 +127,306 @@ Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year (13) Exchange Input [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#14] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (14) Sort [codegen id : 4] Input [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (15) Scan parquet default.item -Output [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] +Output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (16) ColumnarToRow [codegen id : 5] -Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] (17) Filter [codegen id : 5] -Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] -Condition : isnotnull(i_item_sk#15) +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Condition : isnotnull(i_item_sk#13) (18) Exchange -Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] -Arguments: hashpartitioning(i_item_sk#15, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: hashpartitioning(i_item_sk#13, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) Sort [codegen id : 6] -Input [5]: [i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] -Arguments: [i_item_sk#15 ASC NULLS FIRST], false, 0 +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: [i_item_sk#13 ASC NULLS FIRST], false, 0 (20) SortMergeJoin [codegen id : 7] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#15] +Right keys [1]: [i_item_sk#13] Join condition: None (21) Project [codegen id : 7] -Output [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#16, i_class#17, i_category#18, i_product_name#19] -Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_item_sk#15, i_brand#16, i_class#17, i_category#18, i_product_name#19] +Output [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] (22) HashAggregate [codegen id : 7] -Input [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#16, i_class#17, i_category#18, i_product_name#19] -Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [2]: [sum#21, isEmpty#22] -Results [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#23, isEmpty#24] +Aggregate Attributes [2]: [sum#18, isEmpty#19] +Results [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#20, isEmpty#21] (23) Exchange -Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#23, isEmpty#24] -Arguments: hashpartitioning(i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, 5), ENSURE_REQUIREMENTS, [id=#25] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#20, isEmpty#21] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, 5), ENSURE_REQUIREMENTS, [plan_id=4] (24) HashAggregate [codegen id : 8] -Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#23, isEmpty#24] -Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#20, isEmpty#21] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26] -Results [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 as decimal(38,2)) AS sumsales#27] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 as decimal(38,2)) AS sumsales#23] (25) ReusedExchange [Reuses operator id: 23] -Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#28, isEmpty#29] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#24, isEmpty#25] (26) HashAggregate [codegen id : 16] -Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#28, isEmpty#29] -Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#24, isEmpty#25] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26] -Results [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 AS sumsales#30] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26] (27) HashAggregate [codegen id : 16] -Input [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, sumsales#30] -Keys [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9] -Functions [1]: [partial_sum(sumsales#30)] -Aggregate Attributes [2]: [sum#31, isEmpty#32] -Results [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, sum#33, isEmpty#34] +Input [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sumsales#26] +Keys [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#27, isEmpty#28] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum#29, isEmpty#30] (28) Exchange -Input [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, sum#33, isEmpty#34] -Arguments: hashpartitioning(i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, 5), ENSURE_REQUIREMENTS, [id=#35] +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum#29, isEmpty#30] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, 5), ENSURE_REQUIREMENTS, [plan_id=5] (29) HashAggregate [codegen id : 17] -Input [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, sum#33, isEmpty#34] -Keys [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9] -Functions [1]: [sum(sumsales#30)] -Aggregate Attributes [1]: [sum(sumsales#30)#36] -Results [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, null AS s_store_id#37, sum(sumsales#30)#36 AS sumsales#38] +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum#29, isEmpty#30] +Keys [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#31] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, null AS s_store_id#32, sum(sumsales#26)#31 AS sumsales#33] (30) ReusedExchange [Reuses operator id: 23] -Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#39, isEmpty#40] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#34, isEmpty#35] (31) HashAggregate [codegen id : 25] -Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#39, isEmpty#40] -Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#34, isEmpty#35] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26] -Results [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 AS sumsales#30] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26] (32) HashAggregate [codegen id : 25] -Input [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, sumsales#30] -Keys [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10] -Functions [1]: [partial_sum(sumsales#30)] -Aggregate Attributes [2]: [sum#41, isEmpty#42] -Results [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, sum#43, isEmpty#44] +Input [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sumsales#26] +Keys [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#36, isEmpty#37] +Results [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum#38, isEmpty#39] (33) Exchange -Input [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, sum#43, isEmpty#44] -Arguments: hashpartitioning(i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, 5), ENSURE_REQUIREMENTS, [id=#45] +Input [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum#38, isEmpty#39] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, 5), ENSURE_REQUIREMENTS, [plan_id=6] (34) HashAggregate [codegen id : 26] -Input [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, sum#43, isEmpty#44] -Keys [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10] -Functions [1]: [sum(sumsales#30)] -Aggregate Attributes [1]: [sum(sumsales#30)#46] -Results [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, null AS d_moy#47, null AS s_store_id#48, sum(sumsales#30)#46 AS sumsales#49] +Input [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum#38, isEmpty#39] +Keys [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#40] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, null AS d_moy#41, null AS s_store_id#42, sum(sumsales#26)#40 AS sumsales#43] (35) ReusedExchange [Reuses operator id: 23] -Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#50, isEmpty#51] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#44, isEmpty#45] (36) HashAggregate [codegen id : 34] -Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#50, isEmpty#51] -Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#44, isEmpty#45] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26] -Results [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 AS sumsales#30] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26] (37) HashAggregate [codegen id : 34] -Input [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, sumsales#30] -Keys [5]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8] -Functions [1]: [partial_sum(sumsales#30)] -Aggregate Attributes [2]: [sum#52, isEmpty#53] -Results [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, sum#54, isEmpty#55] +Input [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sumsales#26] +Keys [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#46, isEmpty#47] +Results [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum#48, isEmpty#49] (38) Exchange -Input [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, sum#54, isEmpty#55] -Arguments: hashpartitioning(i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, 5), ENSURE_REQUIREMENTS, [id=#56] +Input [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum#48, isEmpty#49] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, 5), ENSURE_REQUIREMENTS, [plan_id=7] (39) HashAggregate [codegen id : 35] -Input [7]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, sum#54, isEmpty#55] -Keys [5]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8] -Functions [1]: [sum(sumsales#30)] -Aggregate Attributes [1]: [sum(sumsales#30)#57] -Results [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, null AS d_qoy#58, null AS d_moy#59, null AS s_store_id#60, sum(sumsales#30)#57 AS sumsales#61] +Input [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum#48, isEmpty#49] +Keys [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#50] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, null AS d_qoy#51, null AS d_moy#52, null AS s_store_id#53, sum(sumsales#26)#50 AS sumsales#54] (40) ReusedExchange [Reuses operator id: 23] -Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#62, isEmpty#63] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#55, isEmpty#56] (41) HashAggregate [codegen id : 43] -Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#62, isEmpty#63] -Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#55, isEmpty#56] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26] -Results [5]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 AS sumsales#30] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26] (42) HashAggregate [codegen id : 43] -Input [5]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, sumsales#30] -Keys [4]: [i_category#18, i_class#17, i_brand#16, i_product_name#19] -Functions [1]: [partial_sum(sumsales#30)] -Aggregate Attributes [2]: [sum#64, isEmpty#65] -Results [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, sum#66, isEmpty#67] +Input [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sumsales#26] +Keys [4]: [i_category#16, i_class#15, i_brand#14, i_product_name#17] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#57, isEmpty#58] +Results [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum#59, isEmpty#60] (43) Exchange -Input [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, sum#66, isEmpty#67] -Arguments: hashpartitioning(i_category#18, i_class#17, i_brand#16, i_product_name#19, 5), ENSURE_REQUIREMENTS, [id=#68] +Input [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum#59, isEmpty#60] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, 5), ENSURE_REQUIREMENTS, [plan_id=8] (44) HashAggregate [codegen id : 44] -Input [6]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, sum#66, isEmpty#67] -Keys [4]: [i_category#18, i_class#17, i_brand#16, i_product_name#19] -Functions [1]: [sum(sumsales#30)] -Aggregate Attributes [1]: [sum(sumsales#30)#69] -Results [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, null AS d_year#70, null AS d_qoy#71, null AS d_moy#72, null AS s_store_id#73, sum(sumsales#30)#69 AS sumsales#74] +Input [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum#59, isEmpty#60] +Keys [4]: [i_category#16, i_class#15, i_brand#14, i_product_name#17] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#61] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, null AS d_year#62, null AS d_qoy#63, null AS d_moy#64, null AS s_store_id#65, sum(sumsales#26)#61 AS sumsales#66] (45) ReusedExchange [Reuses operator id: 23] -Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#75, isEmpty#76] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#67, isEmpty#68] (46) HashAggregate [codegen id : 52] -Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#75, isEmpty#76] -Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#67, isEmpty#68] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26] -Results [4]: [i_category#18, i_class#17, i_brand#16, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 AS sumsales#30] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [4]: [i_category#16, i_class#15, i_brand#14, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26] (47) HashAggregate [codegen id : 52] -Input [4]: [i_category#18, i_class#17, i_brand#16, sumsales#30] -Keys [3]: [i_category#18, i_class#17, i_brand#16] -Functions [1]: [partial_sum(sumsales#30)] -Aggregate Attributes [2]: [sum#77, isEmpty#78] -Results [5]: [i_category#18, i_class#17, i_brand#16, sum#79, isEmpty#80] +Input [4]: [i_category#16, i_class#15, i_brand#14, sumsales#26] +Keys [3]: [i_category#16, i_class#15, i_brand#14] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#69, isEmpty#70] +Results [5]: [i_category#16, i_class#15, i_brand#14, sum#71, isEmpty#72] (48) Exchange -Input [5]: [i_category#18, i_class#17, i_brand#16, sum#79, isEmpty#80] -Arguments: hashpartitioning(i_category#18, i_class#17, i_brand#16, 5), ENSURE_REQUIREMENTS, [id=#81] +Input [5]: [i_category#16, i_class#15, i_brand#14, sum#71, isEmpty#72] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, 5), ENSURE_REQUIREMENTS, [plan_id=9] (49) HashAggregate [codegen id : 53] -Input [5]: [i_category#18, i_class#17, i_brand#16, sum#79, isEmpty#80] -Keys [3]: [i_category#18, i_class#17, i_brand#16] -Functions [1]: [sum(sumsales#30)] -Aggregate Attributes [1]: [sum(sumsales#30)#82] -Results [9]: [i_category#18, i_class#17, i_brand#16, null AS i_product_name#83, null AS d_year#84, null AS d_qoy#85, null AS d_moy#86, null AS s_store_id#87, sum(sumsales#30)#82 AS sumsales#88] +Input [5]: [i_category#16, i_class#15, i_brand#14, sum#71, isEmpty#72] +Keys [3]: [i_category#16, i_class#15, i_brand#14] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#73] +Results [9]: [i_category#16, i_class#15, i_brand#14, null AS i_product_name#74, null AS d_year#75, null AS d_qoy#76, null AS d_moy#77, null AS s_store_id#78, sum(sumsales#26)#73 AS sumsales#79] (50) ReusedExchange [Reuses operator id: 23] -Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#89, isEmpty#90] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#80, isEmpty#81] (51) HashAggregate [codegen id : 61] -Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#89, isEmpty#90] -Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#80, isEmpty#81] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26] -Results [3]: [i_category#18, i_class#17, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 AS sumsales#30] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [3]: [i_category#16, i_class#15, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26] (52) HashAggregate [codegen id : 61] -Input [3]: [i_category#18, i_class#17, sumsales#30] -Keys [2]: [i_category#18, i_class#17] -Functions [1]: [partial_sum(sumsales#30)] -Aggregate Attributes [2]: [sum#91, isEmpty#92] -Results [4]: [i_category#18, i_class#17, sum#93, isEmpty#94] +Input [3]: [i_category#16, i_class#15, sumsales#26] +Keys [2]: [i_category#16, i_class#15] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#82, isEmpty#83] +Results [4]: [i_category#16, i_class#15, sum#84, isEmpty#85] (53) Exchange -Input [4]: [i_category#18, i_class#17, sum#93, isEmpty#94] -Arguments: hashpartitioning(i_category#18, i_class#17, 5), ENSURE_REQUIREMENTS, [id=#95] +Input [4]: [i_category#16, i_class#15, sum#84, isEmpty#85] +Arguments: hashpartitioning(i_category#16, i_class#15, 5), ENSURE_REQUIREMENTS, [plan_id=10] (54) HashAggregate [codegen id : 62] -Input [4]: [i_category#18, i_class#17, sum#93, isEmpty#94] -Keys [2]: [i_category#18, i_class#17] -Functions [1]: [sum(sumsales#30)] -Aggregate Attributes [1]: [sum(sumsales#30)#96] -Results [9]: [i_category#18, i_class#17, null AS i_brand#97, null AS i_product_name#98, null AS d_year#99, null AS d_qoy#100, null AS d_moy#101, null AS s_store_id#102, sum(sumsales#30)#96 AS sumsales#103] +Input [4]: [i_category#16, i_class#15, sum#84, isEmpty#85] +Keys [2]: [i_category#16, i_class#15] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#86] +Results [9]: [i_category#16, i_class#15, null AS i_brand#87, null AS i_product_name#88, null AS d_year#89, null AS d_qoy#90, null AS d_moy#91, null AS s_store_id#92, sum(sumsales#26)#86 AS sumsales#93] (55) ReusedExchange [Reuses operator id: 23] -Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#104, isEmpty#105] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#94, isEmpty#95] (56) HashAggregate [codegen id : 70] -Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#104, isEmpty#105] -Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#94, isEmpty#95] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26] -Results [2]: [i_category#18, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 AS sumsales#30] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [2]: [i_category#16, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26] (57) HashAggregate [codegen id : 70] -Input [2]: [i_category#18, sumsales#30] -Keys [1]: [i_category#18] -Functions [1]: [partial_sum(sumsales#30)] -Aggregate Attributes [2]: [sum#106, isEmpty#107] -Results [3]: [i_category#18, sum#108, isEmpty#109] +Input [2]: [i_category#16, sumsales#26] +Keys [1]: [i_category#16] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#96, isEmpty#97] +Results [3]: [i_category#16, sum#98, isEmpty#99] (58) Exchange -Input [3]: [i_category#18, sum#108, isEmpty#109] -Arguments: hashpartitioning(i_category#18, 5), ENSURE_REQUIREMENTS, [id=#110] +Input [3]: [i_category#16, sum#98, isEmpty#99] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, [plan_id=11] (59) HashAggregate [codegen id : 71] -Input [3]: [i_category#18, sum#108, isEmpty#109] -Keys [1]: [i_category#18] -Functions [1]: [sum(sumsales#30)] -Aggregate Attributes [1]: [sum(sumsales#30)#111] -Results [9]: [i_category#18, null AS i_class#112, null AS i_brand#113, null AS i_product_name#114, null AS d_year#115, null AS d_qoy#116, null AS d_moy#117, null AS s_store_id#118, sum(sumsales#30)#111 AS sumsales#119] +Input [3]: [i_category#16, sum#98, isEmpty#99] +Keys [1]: [i_category#16] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#100] +Results [9]: [i_category#16, null AS i_class#101, null AS i_brand#102, null AS i_product_name#103, null AS d_year#104, null AS d_qoy#105, null AS d_moy#106, null AS s_store_id#107, sum(sumsales#26)#100 AS sumsales#108] (60) ReusedExchange [Reuses operator id: 23] -Output [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#120, isEmpty#121] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#109, isEmpty#110] (61) HashAggregate [codegen id : 79] -Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#120, isEmpty#121] -Keys [8]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#109, isEmpty#110] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26] -Results [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#26 AS sumsales#30] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26] (62) HashAggregate [codegen id : 79] -Input [1]: [sumsales#30] +Input [1]: [sumsales#26] Keys: [] -Functions [1]: [partial_sum(sumsales#30)] -Aggregate Attributes [2]: [sum#122, isEmpty#123] -Results [2]: [sum#124, isEmpty#125] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#111, isEmpty#112] +Results [2]: [sum#113, isEmpty#114] (63) Exchange -Input [2]: [sum#124, isEmpty#125] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#126] +Input [2]: [sum#113, isEmpty#114] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12] (64) HashAggregate [codegen id : 80] -Input [2]: [sum#124, isEmpty#125] +Input [2]: [sum#113, isEmpty#114] Keys: [] -Functions [1]: [sum(sumsales#30)] -Aggregate Attributes [1]: [sum(sumsales#30)#127] -Results [9]: [null AS i_category#128, null AS i_class#129, null AS i_brand#130, null AS i_product_name#131, null AS d_year#132, null AS d_qoy#133, null AS d_moy#134, null AS s_store_id#135, sum(sumsales#30)#127 AS sumsales#136] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#115] +Results [9]: [null AS i_category#116, null AS i_class#117, null AS i_brand#118, null AS i_product_name#119, null AS d_year#120, null AS d_qoy#121, null AS d_moy#122, null AS s_store_id#123, sum(sumsales#26)#115 AS sumsales#124] (65) Union (66) Exchange -Input [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#27] -Arguments: hashpartitioning(i_category#18, 5), ENSURE_REQUIREMENTS, [id=#137] +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, [plan_id=13] (67) Sort [codegen id : 81] -Input [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#27] -Arguments: [i_category#18 ASC NULLS FIRST, sumsales#27 DESC NULLS LAST], false, 0 +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23] +Arguments: [i_category#16 ASC NULLS FIRST, sumsales#23 DESC NULLS LAST], false, 0 (68) Window -Input [9]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#27] -Arguments: [rank(sumsales#27) windowspecdefinition(i_category#18, sumsales#27 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#138], [i_category#18], [sumsales#27 DESC NULLS LAST] +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23] +Arguments: [rank(sumsales#23) windowspecdefinition(i_category#16, sumsales#23 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#125], [i_category#16], [sumsales#23 DESC NULLS LAST] (69) Filter [codegen id : 82] -Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#27, rk#138] -Condition : (rk#138 <= 100) +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23, rk#125] +Condition : (rk#125 <= 100) (70) TakeOrderedAndProject -Input [10]: [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#27, rk#138] -Arguments: 100, [i_category#18 ASC NULLS FIRST, i_class#17 ASC NULLS FIRST, i_brand#16 ASC NULLS FIRST, i_product_name#19 ASC NULLS FIRST, d_year#8 ASC NULLS FIRST, d_qoy#10 ASC NULLS FIRST, d_moy#9 ASC NULLS FIRST, s_store_id#12 ASC NULLS FIRST, sumsales#27 ASC NULLS FIRST, rk#138 ASC NULLS FIRST], [i_category#18, i_class#17, i_brand#16, i_product_name#19, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#27, rk#138] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23, rk#125] +Arguments: 100, [i_category#16 ASC NULLS FIRST, i_class#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, i_product_name#17 ASC NULLS FIRST, d_year#8 ASC NULLS FIRST, d_qoy#10 ASC NULLS FIRST, d_moy#9 ASC NULLS FIRST, s_store_id#12 ASC NULLS FIRST, sumsales#23 ASC NULLS FIRST, rk#125 ASC NULLS FIRST], [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23, rk#125] ===== Subqueries ===== @@ -439,25 +439,25 @@ BroadcastExchange (75) (71) Scan parquet default.date_dim -Output [5]: [d_date_sk#7, d_month_seq#139, d_year#8, d_moy#9, d_qoy#10] +Output [5]: [d_date_sk#7, d_month_seq#126, d_year#8, d_moy#9, d_qoy#10] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] ReadSchema: struct (72) ColumnarToRow [codegen id : 1] -Input [5]: [d_date_sk#7, d_month_seq#139, d_year#8, d_moy#9, d_qoy#10] +Input [5]: [d_date_sk#7, d_month_seq#126, d_year#8, d_moy#9, d_qoy#10] (73) Filter [codegen id : 1] -Input [5]: [d_date_sk#7, d_month_seq#139, d_year#8, d_moy#9, d_qoy#10] -Condition : (((isnotnull(d_month_seq#139) AND (d_month_seq#139 >= 1212)) AND (d_month_seq#139 <= 1223)) AND isnotnull(d_date_sk#7)) +Input [5]: [d_date_sk#7, d_month_seq#126, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#126) AND (d_month_seq#126 >= 1212)) AND (d_month_seq#126 <= 1223)) AND isnotnull(d_date_sk#7)) (74) Project [codegen id : 1] Output [4]: [d_date_sk#7, d_year#8, d_moy#9, d_qoy#10] -Input [5]: [d_date_sk#7, d_month_seq#139, d_year#8, d_moy#9, d_qoy#10] +Input [5]: [d_date_sk#7, d_month_seq#126, d_year#8, d_moy#9, d_qoy#10] (75) BroadcastExchange Input [4]: [d_date_sk#7, d_year#8, d_moy#9, d_qoy#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#140] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt index d0208d6e24e2f..a8506c6577ba7 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt @@ -111,7 +111,7 @@ Condition : isnotnull(s_store_sk#11) (10) BroadcastExchange Input [2]: [s_store_sk#11, s_store_id#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#2] @@ -123,295 +123,295 @@ Output [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_sk#11, s_store_id#12] (13) Scan parquet default.item -Output [5]: [i_item_sk#14, i_brand#15, i_class#16, i_category#17, i_product_name#18] +Output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [5]: [i_item_sk#14, i_brand#15, i_class#16, i_category#17, i_product_name#18] +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] (15) Filter [codegen id : 3] -Input [5]: [i_item_sk#14, i_brand#15, i_class#16, i_category#17, i_product_name#18] -Condition : isnotnull(i_item_sk#14) +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Condition : isnotnull(i_item_sk#13) (16) BroadcastExchange -Input [5]: [i_item_sk#14, i_brand#15, i_class#16, i_category#17, i_product_name#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] +Input [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#14] +Right keys [1]: [i_item_sk#13] Join condition: None (18) Project [codegen id : 4] -Output [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#15, i_class#16, i_category#17, i_product_name#18] -Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_item_sk#14, i_brand#15, i_class#16, i_category#17, i_product_name#18] +Output [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17] (19) HashAggregate [codegen id : 4] -Input [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#15, i_class#16, i_category#17, i_product_name#18] -Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#14, i_class#15, i_category#16, i_product_name#17] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [2]: [sum#20, isEmpty#21] -Results [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#22, isEmpty#23] +Aggregate Attributes [2]: [sum#18, isEmpty#19] +Results [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#20, isEmpty#21] (20) Exchange -Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#22, isEmpty#23] -Arguments: hashpartitioning(i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#20, isEmpty#21] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) HashAggregate [codegen id : 5] -Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#22, isEmpty#23] -Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#20, isEmpty#21] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25] -Results [9]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 as decimal(38,2)) AS sumsales#26] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 as decimal(38,2)) AS sumsales#23] (22) ReusedExchange [Reuses operator id: 20] -Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#27, isEmpty#28] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#24, isEmpty#25] (23) HashAggregate [codegen id : 10] -Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#27, isEmpty#28] -Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#24, isEmpty#25] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25] -Results [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 AS sumsales#29] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26] (24) HashAggregate [codegen id : 10] -Input [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, sumsales#29] -Keys [7]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9] -Functions [1]: [partial_sum(sumsales#29)] -Aggregate Attributes [2]: [sum#30, isEmpty#31] -Results [9]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, sum#32, isEmpty#33] +Input [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sumsales#26] +Keys [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#27, isEmpty#28] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum#29, isEmpty#30] (25) Exchange -Input [9]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, sum#32, isEmpty#33] -Arguments: hashpartitioning(i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, 5), ENSURE_REQUIREMENTS, [id=#34] +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum#29, isEmpty#30] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, 5), ENSURE_REQUIREMENTS, [plan_id=4] (26) HashAggregate [codegen id : 11] -Input [9]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, sum#32, isEmpty#33] -Keys [7]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9] -Functions [1]: [sum(sumsales#29)] -Aggregate Attributes [1]: [sum(sumsales#29)#35] -Results [9]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, null AS s_store_id#36, sum(sumsales#29)#35 AS sumsales#37] +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum#29, isEmpty#30] +Keys [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#31] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, null AS s_store_id#32, sum(sumsales#26)#31 AS sumsales#33] (27) ReusedExchange [Reuses operator id: 20] -Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#38, isEmpty#39] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#34, isEmpty#35] (28) HashAggregate [codegen id : 16] -Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#38, isEmpty#39] -Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#34, isEmpty#35] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25] -Results [7]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 AS sumsales#29] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26] (29) HashAggregate [codegen id : 16] -Input [7]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, sumsales#29] -Keys [6]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10] -Functions [1]: [partial_sum(sumsales#29)] -Aggregate Attributes [2]: [sum#40, isEmpty#41] -Results [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, sum#42, isEmpty#43] +Input [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sumsales#26] +Keys [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#36, isEmpty#37] +Results [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum#38, isEmpty#39] (30) Exchange -Input [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, sum#42, isEmpty#43] -Arguments: hashpartitioning(i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, 5), ENSURE_REQUIREMENTS, [id=#44] +Input [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum#38, isEmpty#39] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, 5), ENSURE_REQUIREMENTS, [plan_id=5] (31) HashAggregate [codegen id : 17] -Input [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, sum#42, isEmpty#43] -Keys [6]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10] -Functions [1]: [sum(sumsales#29)] -Aggregate Attributes [1]: [sum(sumsales#29)#45] -Results [9]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, null AS d_moy#46, null AS s_store_id#47, sum(sumsales#29)#45 AS sumsales#48] +Input [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum#38, isEmpty#39] +Keys [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#40] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, null AS d_moy#41, null AS s_store_id#42, sum(sumsales#26)#40 AS sumsales#43] (32) ReusedExchange [Reuses operator id: 20] -Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#49, isEmpty#50] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#44, isEmpty#45] (33) HashAggregate [codegen id : 22] -Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#49, isEmpty#50] -Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#44, isEmpty#45] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25] -Results [6]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 AS sumsales#29] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26] (34) HashAggregate [codegen id : 22] -Input [6]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, sumsales#29] -Keys [5]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8] -Functions [1]: [partial_sum(sumsales#29)] -Aggregate Attributes [2]: [sum#51, isEmpty#52] -Results [7]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, sum#53, isEmpty#54] +Input [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sumsales#26] +Keys [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#46, isEmpty#47] +Results [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum#48, isEmpty#49] (35) Exchange -Input [7]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, sum#53, isEmpty#54] -Arguments: hashpartitioning(i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, 5), ENSURE_REQUIREMENTS, [id=#55] +Input [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum#48, isEmpty#49] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, 5), ENSURE_REQUIREMENTS, [plan_id=6] (36) HashAggregate [codegen id : 23] -Input [7]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, sum#53, isEmpty#54] -Keys [5]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8] -Functions [1]: [sum(sumsales#29)] -Aggregate Attributes [1]: [sum(sumsales#29)#56] -Results [9]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, null AS d_qoy#57, null AS d_moy#58, null AS s_store_id#59, sum(sumsales#29)#56 AS sumsales#60] +Input [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum#48, isEmpty#49] +Keys [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#50] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, null AS d_qoy#51, null AS d_moy#52, null AS s_store_id#53, sum(sumsales#26)#50 AS sumsales#54] (37) ReusedExchange [Reuses operator id: 20] -Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#61, isEmpty#62] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#55, isEmpty#56] (38) HashAggregate [codegen id : 28] -Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#61, isEmpty#62] -Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#55, isEmpty#56] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25] -Results [5]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 AS sumsales#29] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26] (39) HashAggregate [codegen id : 28] -Input [5]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, sumsales#29] -Keys [4]: [i_category#17, i_class#16, i_brand#15, i_product_name#18] -Functions [1]: [partial_sum(sumsales#29)] -Aggregate Attributes [2]: [sum#63, isEmpty#64] -Results [6]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, sum#65, isEmpty#66] +Input [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sumsales#26] +Keys [4]: [i_category#16, i_class#15, i_brand#14, i_product_name#17] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#57, isEmpty#58] +Results [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum#59, isEmpty#60] (40) Exchange -Input [6]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, sum#65, isEmpty#66] -Arguments: hashpartitioning(i_category#17, i_class#16, i_brand#15, i_product_name#18, 5), ENSURE_REQUIREMENTS, [id=#67] +Input [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum#59, isEmpty#60] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_name#17, 5), ENSURE_REQUIREMENTS, [plan_id=7] (41) HashAggregate [codegen id : 29] -Input [6]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, sum#65, isEmpty#66] -Keys [4]: [i_category#17, i_class#16, i_brand#15, i_product_name#18] -Functions [1]: [sum(sumsales#29)] -Aggregate Attributes [1]: [sum(sumsales#29)#68] -Results [9]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, null AS d_year#69, null AS d_qoy#70, null AS d_moy#71, null AS s_store_id#72, sum(sumsales#29)#68 AS sumsales#73] +Input [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum#59, isEmpty#60] +Keys [4]: [i_category#16, i_class#15, i_brand#14, i_product_name#17] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#61] +Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, null AS d_year#62, null AS d_qoy#63, null AS d_moy#64, null AS s_store_id#65, sum(sumsales#26)#61 AS sumsales#66] (42) ReusedExchange [Reuses operator id: 20] -Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#74, isEmpty#75] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#67, isEmpty#68] (43) HashAggregate [codegen id : 34] -Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#74, isEmpty#75] -Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#67, isEmpty#68] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25] -Results [4]: [i_category#17, i_class#16, i_brand#15, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 AS sumsales#29] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [4]: [i_category#16, i_class#15, i_brand#14, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26] (44) HashAggregate [codegen id : 34] -Input [4]: [i_category#17, i_class#16, i_brand#15, sumsales#29] -Keys [3]: [i_category#17, i_class#16, i_brand#15] -Functions [1]: [partial_sum(sumsales#29)] -Aggregate Attributes [2]: [sum#76, isEmpty#77] -Results [5]: [i_category#17, i_class#16, i_brand#15, sum#78, isEmpty#79] +Input [4]: [i_category#16, i_class#15, i_brand#14, sumsales#26] +Keys [3]: [i_category#16, i_class#15, i_brand#14] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#69, isEmpty#70] +Results [5]: [i_category#16, i_class#15, i_brand#14, sum#71, isEmpty#72] (45) Exchange -Input [5]: [i_category#17, i_class#16, i_brand#15, sum#78, isEmpty#79] -Arguments: hashpartitioning(i_category#17, i_class#16, i_brand#15, 5), ENSURE_REQUIREMENTS, [id=#80] +Input [5]: [i_category#16, i_class#15, i_brand#14, sum#71, isEmpty#72] +Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, 5), ENSURE_REQUIREMENTS, [plan_id=8] (46) HashAggregate [codegen id : 35] -Input [5]: [i_category#17, i_class#16, i_brand#15, sum#78, isEmpty#79] -Keys [3]: [i_category#17, i_class#16, i_brand#15] -Functions [1]: [sum(sumsales#29)] -Aggregate Attributes [1]: [sum(sumsales#29)#81] -Results [9]: [i_category#17, i_class#16, i_brand#15, null AS i_product_name#82, null AS d_year#83, null AS d_qoy#84, null AS d_moy#85, null AS s_store_id#86, sum(sumsales#29)#81 AS sumsales#87] +Input [5]: [i_category#16, i_class#15, i_brand#14, sum#71, isEmpty#72] +Keys [3]: [i_category#16, i_class#15, i_brand#14] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#73] +Results [9]: [i_category#16, i_class#15, i_brand#14, null AS i_product_name#74, null AS d_year#75, null AS d_qoy#76, null AS d_moy#77, null AS s_store_id#78, sum(sumsales#26)#73 AS sumsales#79] (47) ReusedExchange [Reuses operator id: 20] -Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#88, isEmpty#89] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#80, isEmpty#81] (48) HashAggregate [codegen id : 40] -Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#88, isEmpty#89] -Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#80, isEmpty#81] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25] -Results [3]: [i_category#17, i_class#16, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 AS sumsales#29] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [3]: [i_category#16, i_class#15, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26] (49) HashAggregate [codegen id : 40] -Input [3]: [i_category#17, i_class#16, sumsales#29] -Keys [2]: [i_category#17, i_class#16] -Functions [1]: [partial_sum(sumsales#29)] -Aggregate Attributes [2]: [sum#90, isEmpty#91] -Results [4]: [i_category#17, i_class#16, sum#92, isEmpty#93] +Input [3]: [i_category#16, i_class#15, sumsales#26] +Keys [2]: [i_category#16, i_class#15] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#82, isEmpty#83] +Results [4]: [i_category#16, i_class#15, sum#84, isEmpty#85] (50) Exchange -Input [4]: [i_category#17, i_class#16, sum#92, isEmpty#93] -Arguments: hashpartitioning(i_category#17, i_class#16, 5), ENSURE_REQUIREMENTS, [id=#94] +Input [4]: [i_category#16, i_class#15, sum#84, isEmpty#85] +Arguments: hashpartitioning(i_category#16, i_class#15, 5), ENSURE_REQUIREMENTS, [plan_id=9] (51) HashAggregate [codegen id : 41] -Input [4]: [i_category#17, i_class#16, sum#92, isEmpty#93] -Keys [2]: [i_category#17, i_class#16] -Functions [1]: [sum(sumsales#29)] -Aggregate Attributes [1]: [sum(sumsales#29)#95] -Results [9]: [i_category#17, i_class#16, null AS i_brand#96, null AS i_product_name#97, null AS d_year#98, null AS d_qoy#99, null AS d_moy#100, null AS s_store_id#101, sum(sumsales#29)#95 AS sumsales#102] +Input [4]: [i_category#16, i_class#15, sum#84, isEmpty#85] +Keys [2]: [i_category#16, i_class#15] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#86] +Results [9]: [i_category#16, i_class#15, null AS i_brand#87, null AS i_product_name#88, null AS d_year#89, null AS d_qoy#90, null AS d_moy#91, null AS s_store_id#92, sum(sumsales#26)#86 AS sumsales#93] (52) ReusedExchange [Reuses operator id: 20] -Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#103, isEmpty#104] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#94, isEmpty#95] (53) HashAggregate [codegen id : 46] -Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#103, isEmpty#104] -Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#94, isEmpty#95] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25] -Results [2]: [i_category#17, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 AS sumsales#29] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [2]: [i_category#16, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26] (54) HashAggregate [codegen id : 46] -Input [2]: [i_category#17, sumsales#29] -Keys [1]: [i_category#17] -Functions [1]: [partial_sum(sumsales#29)] -Aggregate Attributes [2]: [sum#105, isEmpty#106] -Results [3]: [i_category#17, sum#107, isEmpty#108] +Input [2]: [i_category#16, sumsales#26] +Keys [1]: [i_category#16] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#96, isEmpty#97] +Results [3]: [i_category#16, sum#98, isEmpty#99] (55) Exchange -Input [3]: [i_category#17, sum#107, isEmpty#108] -Arguments: hashpartitioning(i_category#17, 5), ENSURE_REQUIREMENTS, [id=#109] +Input [3]: [i_category#16, sum#98, isEmpty#99] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, [plan_id=10] (56) HashAggregate [codegen id : 47] -Input [3]: [i_category#17, sum#107, isEmpty#108] -Keys [1]: [i_category#17] -Functions [1]: [sum(sumsales#29)] -Aggregate Attributes [1]: [sum(sumsales#29)#110] -Results [9]: [i_category#17, null AS i_class#111, null AS i_brand#112, null AS i_product_name#113, null AS d_year#114, null AS d_qoy#115, null AS d_moy#116, null AS s_store_id#117, sum(sumsales#29)#110 AS sumsales#118] +Input [3]: [i_category#16, sum#98, isEmpty#99] +Keys [1]: [i_category#16] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#100] +Results [9]: [i_category#16, null AS i_class#101, null AS i_brand#102, null AS i_product_name#103, null AS d_year#104, null AS d_qoy#105, null AS d_moy#106, null AS s_store_id#107, sum(sumsales#26)#100 AS sumsales#108] (57) ReusedExchange [Reuses operator id: 20] -Output [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#119, isEmpty#120] +Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#109, isEmpty#110] (58) HashAggregate [codegen id : 52] -Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#119, isEmpty#120] -Keys [8]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#109, isEmpty#110] +Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12] Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))] -Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25] -Results [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#25 AS sumsales#29] +Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22] +Results [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26] (59) HashAggregate [codegen id : 52] -Input [1]: [sumsales#29] +Input [1]: [sumsales#26] Keys: [] -Functions [1]: [partial_sum(sumsales#29)] -Aggregate Attributes [2]: [sum#121, isEmpty#122] -Results [2]: [sum#123, isEmpty#124] +Functions [1]: [partial_sum(sumsales#26)] +Aggregate Attributes [2]: [sum#111, isEmpty#112] +Results [2]: [sum#113, isEmpty#114] (60) Exchange -Input [2]: [sum#123, isEmpty#124] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#125] +Input [2]: [sum#113, isEmpty#114] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11] (61) HashAggregate [codegen id : 53] -Input [2]: [sum#123, isEmpty#124] +Input [2]: [sum#113, isEmpty#114] Keys: [] -Functions [1]: [sum(sumsales#29)] -Aggregate Attributes [1]: [sum(sumsales#29)#126] -Results [9]: [null AS i_category#127, null AS i_class#128, null AS i_brand#129, null AS i_product_name#130, null AS d_year#131, null AS d_qoy#132, null AS d_moy#133, null AS s_store_id#134, sum(sumsales#29)#126 AS sumsales#135] +Functions [1]: [sum(sumsales#26)] +Aggregate Attributes [1]: [sum(sumsales#26)#115] +Results [9]: [null AS i_category#116, null AS i_class#117, null AS i_brand#118, null AS i_product_name#119, null AS d_year#120, null AS d_qoy#121, null AS d_moy#122, null AS s_store_id#123, sum(sumsales#26)#115 AS sumsales#124] (62) Union (63) Exchange -Input [9]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#26] -Arguments: hashpartitioning(i_category#17, 5), ENSURE_REQUIREMENTS, [id=#136] +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23] +Arguments: hashpartitioning(i_category#16, 5), ENSURE_REQUIREMENTS, [plan_id=12] (64) Sort [codegen id : 54] -Input [9]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#26] -Arguments: [i_category#17 ASC NULLS FIRST, sumsales#26 DESC NULLS LAST], false, 0 +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23] +Arguments: [i_category#16 ASC NULLS FIRST, sumsales#23 DESC NULLS LAST], false, 0 (65) Window -Input [9]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#26] -Arguments: [rank(sumsales#26) windowspecdefinition(i_category#17, sumsales#26 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#137], [i_category#17], [sumsales#26 DESC NULLS LAST] +Input [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23] +Arguments: [rank(sumsales#23) windowspecdefinition(i_category#16, sumsales#23 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rk#125], [i_category#16], [sumsales#23 DESC NULLS LAST] (66) Filter [codegen id : 55] -Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#26, rk#137] -Condition : (rk#137 <= 100) +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23, rk#125] +Condition : (rk#125 <= 100) (67) TakeOrderedAndProject -Input [10]: [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#26, rk#137] -Arguments: 100, [i_category#17 ASC NULLS FIRST, i_class#16 ASC NULLS FIRST, i_brand#15 ASC NULLS FIRST, i_product_name#18 ASC NULLS FIRST, d_year#8 ASC NULLS FIRST, d_qoy#10 ASC NULLS FIRST, d_moy#9 ASC NULLS FIRST, s_store_id#12 ASC NULLS FIRST, sumsales#26 ASC NULLS FIRST, rk#137 ASC NULLS FIRST], [i_category#17, i_class#16, i_brand#15, i_product_name#18, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#26, rk#137] +Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23, rk#125] +Arguments: 100, [i_category#16 ASC NULLS FIRST, i_class#15 ASC NULLS FIRST, i_brand#14 ASC NULLS FIRST, i_product_name#17 ASC NULLS FIRST, d_year#8 ASC NULLS FIRST, d_qoy#10 ASC NULLS FIRST, d_moy#9 ASC NULLS FIRST, s_store_id#12 ASC NULLS FIRST, sumsales#23 ASC NULLS FIRST, rk#125 ASC NULLS FIRST], [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sumsales#23, rk#125] ===== Subqueries ===== @@ -424,25 +424,25 @@ BroadcastExchange (72) (68) Scan parquet default.date_dim -Output [5]: [d_date_sk#7, d_month_seq#138, d_year#8, d_moy#9, d_qoy#10] +Output [5]: [d_date_sk#7, d_month_seq#126, d_year#8, d_moy#9, d_qoy#10] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] ReadSchema: struct (69) ColumnarToRow [codegen id : 1] -Input [5]: [d_date_sk#7, d_month_seq#138, d_year#8, d_moy#9, d_qoy#10] +Input [5]: [d_date_sk#7, d_month_seq#126, d_year#8, d_moy#9, d_qoy#10] (70) Filter [codegen id : 1] -Input [5]: [d_date_sk#7, d_month_seq#138, d_year#8, d_moy#9, d_qoy#10] -Condition : (((isnotnull(d_month_seq#138) AND (d_month_seq#138 >= 1212)) AND (d_month_seq#138 <= 1223)) AND isnotnull(d_date_sk#7)) +Input [5]: [d_date_sk#7, d_month_seq#126, d_year#8, d_moy#9, d_qoy#10] +Condition : (((isnotnull(d_month_seq#126) AND (d_month_seq#126 >= 1212)) AND (d_month_seq#126 <= 1223)) AND isnotnull(d_date_sk#7)) (71) Project [codegen id : 1] Output [4]: [d_date_sk#7, d_year#8, d_moy#9, d_qoy#10] -Input [5]: [d_date_sk#7, d_month_seq#138, d_year#8, d_moy#9, d_qoy#10] +Input [5]: [d_date_sk#7, d_month_seq#126, d_year#8, d_moy#9, d_qoy#10] (72) BroadcastExchange Input [4]: [d_date_sk#7, d_year#8, d_moy#9, d_qoy#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#139] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=13] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt index bd893d09165e1..1fae29d564ae6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt @@ -140,7 +140,7 @@ Condition : isnotnull(s_store_sk#13) (19) BroadcastExchange Input [2]: [s_store_sk#13, s_state#14] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (20) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#9] @@ -155,39 +155,39 @@ Input [4]: [ss_store_sk#9, ss_net_profit#10, s_store_sk#13, s_state#14] Input [2]: [ss_net_profit#10, s_state#14] Keys [1]: [s_state#14] Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#10))] -Aggregate Attributes [1]: [sum#16] -Results [2]: [s_state#14, sum#17] +Aggregate Attributes [1]: [sum#15] +Results [2]: [s_state#14, sum#16] (23) Exchange -Input [2]: [s_state#14, sum#17] -Arguments: hashpartitioning(s_state#14, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [2]: [s_state#14, sum#16] +Arguments: hashpartitioning(s_state#14, 5), ENSURE_REQUIREMENTS, [plan_id=2] (24) HashAggregate [codegen id : 5] -Input [2]: [s_state#14, sum#17] +Input [2]: [s_state#14, sum#16] Keys [1]: [s_state#14] Functions [1]: [sum(UnscaledValue(ss_net_profit#10))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#10))#19] -Results [3]: [s_state#14, s_state#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#19,17,2) AS _w2#20] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#10))#17] +Results [3]: [s_state#14, s_state#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#17,17,2) AS _w2#18] (25) Sort [codegen id : 5] -Input [3]: [s_state#14, s_state#14, _w2#20] -Arguments: [s_state#14 ASC NULLS FIRST, _w2#20 DESC NULLS LAST], false, 0 +Input [3]: [s_state#14, s_state#14, _w2#18] +Arguments: [s_state#14 ASC NULLS FIRST, _w2#18 DESC NULLS LAST], false, 0 (26) Window -Input [3]: [s_state#14, s_state#14, _w2#20] -Arguments: [rank(_w2#20) windowspecdefinition(s_state#14, _w2#20 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#21], [s_state#14], [_w2#20 DESC NULLS LAST] +Input [3]: [s_state#14, s_state#14, _w2#18] +Arguments: [rank(_w2#18) windowspecdefinition(s_state#14, _w2#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#19], [s_state#14], [_w2#18 DESC NULLS LAST] (27) Filter [codegen id : 6] -Input [4]: [s_state#14, s_state#14, _w2#20, ranking#21] -Condition : (ranking#21 <= 5) +Input [4]: [s_state#14, s_state#14, _w2#18, ranking#19] +Condition : (ranking#19 <= 5) (28) Project [codegen id : 6] Output [1]: [s_state#14] -Input [4]: [s_state#14, s_state#14, _w2#20, ranking#21] +Input [4]: [s_state#14, s_state#14, _w2#18, ranking#19] (29) BroadcastExchange Input [1]: [s_state#14] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#22] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=3] (30) BroadcastHashJoin [codegen id : 7] Left keys [1]: [s_state#8] @@ -196,7 +196,7 @@ Join condition: None (31) BroadcastExchange Input [3]: [s_store_sk#6, s_county#7, s_state#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (32) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_store_sk#1] @@ -211,115 +211,115 @@ Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#6, s_county#7, s_state#8] Input [3]: [ss_net_profit#2, s_county#7, s_state#8] Keys [2]: [s_state#8, s_county#7] Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum#24] -Results [3]: [s_state#8, s_county#7, sum#25] +Aggregate Attributes [1]: [sum#20] +Results [3]: [s_state#8, s_county#7, sum#21] (35) Exchange -Input [3]: [s_state#8, s_county#7, sum#25] -Arguments: hashpartitioning(s_state#8, s_county#7, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [3]: [s_state#8, s_county#7, sum#21] +Arguments: hashpartitioning(s_state#8, s_county#7, 5), ENSURE_REQUIREMENTS, [plan_id=5] (36) HashAggregate [codegen id : 9] -Input [3]: [s_state#8, s_county#7, sum#25] +Input [3]: [s_state#8, s_county#7, sum#21] Keys [2]: [s_state#8, s_county#7] Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#27] -Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#27,17,2) as decimal(27,2)) AS total_sum#28, s_state#8, s_county#7, 0 AS g_state#29, 0 AS g_county#30, 0 AS lochierarchy#31] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#22] +Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#22,17,2) as decimal(27,2)) AS total_sum#23, s_state#8, s_county#7, 0 AS g_state#24, 0 AS g_county#25, 0 AS lochierarchy#26] (37) ReusedExchange [Reuses operator id: 35] -Output [3]: [s_state#8, s_county#7, sum#32] +Output [3]: [s_state#8, s_county#7, sum#27] (38) HashAggregate [codegen id : 18] -Input [3]: [s_state#8, s_county#7, sum#32] +Input [3]: [s_state#8, s_county#7, sum#27] Keys [2]: [s_state#8, s_county#7] Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#27] -Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#27,17,2) AS total_sum#33, s_state#8] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#22] +Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#22,17,2) AS total_sum#28, s_state#8] (39) HashAggregate [codegen id : 18] -Input [2]: [total_sum#33, s_state#8] +Input [2]: [total_sum#28, s_state#8] Keys [1]: [s_state#8] -Functions [1]: [partial_sum(total_sum#33)] -Aggregate Attributes [2]: [sum#34, isEmpty#35] -Results [3]: [s_state#8, sum#36, isEmpty#37] +Functions [1]: [partial_sum(total_sum#28)] +Aggregate Attributes [2]: [sum#29, isEmpty#30] +Results [3]: [s_state#8, sum#31, isEmpty#32] (40) Exchange -Input [3]: [s_state#8, sum#36, isEmpty#37] -Arguments: hashpartitioning(s_state#8, 5), ENSURE_REQUIREMENTS, [id=#38] +Input [3]: [s_state#8, sum#31, isEmpty#32] +Arguments: hashpartitioning(s_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=6] (41) HashAggregate [codegen id : 19] -Input [3]: [s_state#8, sum#36, isEmpty#37] +Input [3]: [s_state#8, sum#31, isEmpty#32] Keys [1]: [s_state#8] -Functions [1]: [sum(total_sum#33)] -Aggregate Attributes [1]: [sum(total_sum#33)#39] -Results [6]: [sum(total_sum#33)#39 AS total_sum#40, s_state#8, null AS s_county#41, 0 AS g_state#42, 1 AS g_county#43, 1 AS lochierarchy#44] +Functions [1]: [sum(total_sum#28)] +Aggregate Attributes [1]: [sum(total_sum#28)#33] +Results [6]: [sum(total_sum#28)#33 AS total_sum#34, s_state#8, null AS s_county#35, 0 AS g_state#36, 1 AS g_county#37, 1 AS lochierarchy#38] (42) ReusedExchange [Reuses operator id: 35] -Output [3]: [s_state#8, s_county#7, sum#45] +Output [3]: [s_state#8, s_county#7, sum#39] (43) HashAggregate [codegen id : 28] -Input [3]: [s_state#8, s_county#7, sum#45] +Input [3]: [s_state#8, s_county#7, sum#39] Keys [2]: [s_state#8, s_county#7] Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#27] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#27,17,2) AS total_sum#33] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#22] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#22,17,2) AS total_sum#28] (44) HashAggregate [codegen id : 28] -Input [1]: [total_sum#33] +Input [1]: [total_sum#28] Keys: [] -Functions [1]: [partial_sum(total_sum#33)] -Aggregate Attributes [2]: [sum#46, isEmpty#47] -Results [2]: [sum#48, isEmpty#49] +Functions [1]: [partial_sum(total_sum#28)] +Aggregate Attributes [2]: [sum#40, isEmpty#41] +Results [2]: [sum#42, isEmpty#43] (45) Exchange -Input [2]: [sum#48, isEmpty#49] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#50] +Input [2]: [sum#42, isEmpty#43] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (46) HashAggregate [codegen id : 29] -Input [2]: [sum#48, isEmpty#49] +Input [2]: [sum#42, isEmpty#43] Keys: [] -Functions [1]: [sum(total_sum#33)] -Aggregate Attributes [1]: [sum(total_sum#33)#51] -Results [6]: [sum(total_sum#33)#51 AS total_sum#52, null AS s_state#53, null AS s_county#54, 1 AS g_state#55, 1 AS g_county#56, 2 AS lochierarchy#57] +Functions [1]: [sum(total_sum#28)] +Aggregate Attributes [1]: [sum(total_sum#28)#44] +Results [6]: [sum(total_sum#28)#44 AS total_sum#45, null AS s_state#46, null AS s_county#47, 1 AS g_state#48, 1 AS g_county#49, 2 AS lochierarchy#50] (47) Union (48) HashAggregate [codegen id : 30] -Input [6]: [total_sum#28, s_state#8, s_county#7, g_state#29, g_county#30, lochierarchy#31] -Keys [6]: [total_sum#28, s_state#8, s_county#7, g_state#29, g_county#30, lochierarchy#31] +Input [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] +Keys [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] Functions: [] Aggregate Attributes: [] -Results [6]: [total_sum#28, s_state#8, s_county#7, g_state#29, g_county#30, lochierarchy#31] +Results [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] (49) Exchange -Input [6]: [total_sum#28, s_state#8, s_county#7, g_state#29, g_county#30, lochierarchy#31] -Arguments: hashpartitioning(total_sum#28, s_state#8, s_county#7, g_state#29, g_county#30, lochierarchy#31, 5), ENSURE_REQUIREMENTS, [id=#58] +Input [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] +Arguments: hashpartitioning(total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26, 5), ENSURE_REQUIREMENTS, [plan_id=8] (50) HashAggregate [codegen id : 31] -Input [6]: [total_sum#28, s_state#8, s_county#7, g_state#29, g_county#30, lochierarchy#31] -Keys [6]: [total_sum#28, s_state#8, s_county#7, g_state#29, g_county#30, lochierarchy#31] +Input [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] +Keys [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] Functions: [] Aggregate Attributes: [] -Results [5]: [total_sum#28, s_state#8, s_county#7, lochierarchy#31, CASE WHEN (g_county#30 = 0) THEN s_state#8 END AS _w0#59] +Results [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, CASE WHEN (g_county#25 = 0) THEN s_state#8 END AS _w0#51] (51) Exchange -Input [5]: [total_sum#28, s_state#8, s_county#7, lochierarchy#31, _w0#59] -Arguments: hashpartitioning(lochierarchy#31, _w0#59, 5), ENSURE_REQUIREMENTS, [id=#60] +Input [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, _w0#51] +Arguments: hashpartitioning(lochierarchy#26, _w0#51, 5), ENSURE_REQUIREMENTS, [plan_id=9] (52) Sort [codegen id : 32] -Input [5]: [total_sum#28, s_state#8, s_county#7, lochierarchy#31, _w0#59] -Arguments: [lochierarchy#31 ASC NULLS FIRST, _w0#59 ASC NULLS FIRST, total_sum#28 DESC NULLS LAST], false, 0 +Input [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, _w0#51] +Arguments: [lochierarchy#26 ASC NULLS FIRST, _w0#51 ASC NULLS FIRST, total_sum#23 DESC NULLS LAST], false, 0 (53) Window -Input [5]: [total_sum#28, s_state#8, s_county#7, lochierarchy#31, _w0#59] -Arguments: [rank(total_sum#28) windowspecdefinition(lochierarchy#31, _w0#59, total_sum#28 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#61], [lochierarchy#31, _w0#59], [total_sum#28 DESC NULLS LAST] +Input [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, _w0#51] +Arguments: [rank(total_sum#23) windowspecdefinition(lochierarchy#26, _w0#51, total_sum#23 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#52], [lochierarchy#26, _w0#51], [total_sum#23 DESC NULLS LAST] (54) Project [codegen id : 33] -Output [5]: [total_sum#28, s_state#8, s_county#7, lochierarchy#31, rank_within_parent#61] -Input [6]: [total_sum#28, s_state#8, s_county#7, lochierarchy#31, _w0#59, rank_within_parent#61] +Output [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, rank_within_parent#52] +Input [6]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, _w0#51, rank_within_parent#52] (55) TakeOrderedAndProject -Input [5]: [total_sum#28, s_state#8, s_county#7, lochierarchy#31, rank_within_parent#61] -Arguments: 100, [lochierarchy#31 DESC NULLS LAST, CASE WHEN (lochierarchy#31 = 0) THEN s_state#8 END ASC NULLS FIRST, rank_within_parent#61 ASC NULLS FIRST], [total_sum#28, s_state#8, s_county#7, lochierarchy#31, rank_within_parent#61] +Input [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, rank_within_parent#52] +Arguments: 100, [lochierarchy#26 DESC NULLS LAST, CASE WHEN (lochierarchy#26 = 0) THEN s_state#8 END ASC NULLS FIRST, rank_within_parent#52 ASC NULLS FIRST], [total_sum#23, s_state#8, s_county#7, lochierarchy#26, rank_within_parent#52] ===== Subqueries ===== @@ -332,26 +332,26 @@ BroadcastExchange (60) (56) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_month_seq#62] +Output [2]: [d_date_sk#5, d_month_seq#53] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] ReadSchema: struct (57) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#62] +Input [2]: [d_date_sk#5, d_month_seq#53] (58) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#62] -Condition : (((isnotnull(d_month_seq#62) AND (d_month_seq#62 >= 1212)) AND (d_month_seq#62 <= 1223)) AND isnotnull(d_date_sk#5)) +Input [2]: [d_date_sk#5, d_month_seq#53] +Condition : (((isnotnull(d_month_seq#53) AND (d_month_seq#53 >= 1212)) AND (d_month_seq#53 <= 1223)) AND isnotnull(d_date_sk#5)) (59) Project [codegen id : 1] Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_month_seq#62] +Input [2]: [d_date_sk#5, d_month_seq#53] (60) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#63] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] Subquery:2 Hosting operator id = 10 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt index 2bfa05e382559..e497c7d198b5b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt @@ -128,7 +128,7 @@ Condition : isnotnull(s_store_sk#12) (16) BroadcastExchange Input [2]: [s_store_sk#12, s_state#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (17) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_store_sk#9] @@ -140,54 +140,54 @@ Output [3]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13] Input [5]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11, s_store_sk#12, s_state#13] (19) ReusedExchange [Reuses operator id: 60] -Output [1]: [d_date_sk#15] +Output [1]: [d_date_sk#14] (20) BroadcastHashJoin [codegen id : 4] Left keys [1]: [ss_sold_date_sk#11] -Right keys [1]: [d_date_sk#15] +Right keys [1]: [d_date_sk#14] Join condition: None (21) Project [codegen id : 4] Output [2]: [ss_net_profit#10, s_state#13] -Input [4]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13, d_date_sk#15] +Input [4]: [ss_net_profit#10, ss_sold_date_sk#11, s_state#13, d_date_sk#14] (22) HashAggregate [codegen id : 4] Input [2]: [ss_net_profit#10, s_state#13] Keys [1]: [s_state#13] Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#10))] -Aggregate Attributes [1]: [sum#16] -Results [2]: [s_state#13, sum#17] +Aggregate Attributes [1]: [sum#15] +Results [2]: [s_state#13, sum#16] (23) Exchange -Input [2]: [s_state#13, sum#17] -Arguments: hashpartitioning(s_state#13, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [2]: [s_state#13, sum#16] +Arguments: hashpartitioning(s_state#13, 5), ENSURE_REQUIREMENTS, [plan_id=2] (24) HashAggregate [codegen id : 5] -Input [2]: [s_state#13, sum#17] +Input [2]: [s_state#13, sum#16] Keys [1]: [s_state#13] Functions [1]: [sum(UnscaledValue(ss_net_profit#10))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#10))#19] -Results [3]: [s_state#13, s_state#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#19,17,2) AS _w2#20] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#10))#17] +Results [3]: [s_state#13, s_state#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#17,17,2) AS _w2#18] (25) Sort [codegen id : 5] -Input [3]: [s_state#13, s_state#13, _w2#20] -Arguments: [s_state#13 ASC NULLS FIRST, _w2#20 DESC NULLS LAST], false, 0 +Input [3]: [s_state#13, s_state#13, _w2#18] +Arguments: [s_state#13 ASC NULLS FIRST, _w2#18 DESC NULLS LAST], false, 0 (26) Window -Input [3]: [s_state#13, s_state#13, _w2#20] -Arguments: [rank(_w2#20) windowspecdefinition(s_state#13, _w2#20 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#21], [s_state#13], [_w2#20 DESC NULLS LAST] +Input [3]: [s_state#13, s_state#13, _w2#18] +Arguments: [rank(_w2#18) windowspecdefinition(s_state#13, _w2#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#19], [s_state#13], [_w2#18 DESC NULLS LAST] (27) Filter [codegen id : 6] -Input [4]: [s_state#13, s_state#13, _w2#20, ranking#21] -Condition : (ranking#21 <= 5) +Input [4]: [s_state#13, s_state#13, _w2#18, ranking#19] +Condition : (ranking#19 <= 5) (28) Project [codegen id : 6] Output [1]: [s_state#13] -Input [4]: [s_state#13, s_state#13, _w2#20, ranking#21] +Input [4]: [s_state#13, s_state#13, _w2#18, ranking#19] (29) BroadcastExchange Input [1]: [s_state#13] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#22] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=3] (30) BroadcastHashJoin [codegen id : 7] Left keys [1]: [s_state#8] @@ -196,7 +196,7 @@ Join condition: None (31) BroadcastExchange Input [3]: [s_store_sk#6, s_county#7, s_state#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#23] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4] (32) BroadcastHashJoin [codegen id : 8] Left keys [1]: [ss_store_sk#1] @@ -211,115 +211,115 @@ Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#6, s_county#7, s_state#8] Input [3]: [ss_net_profit#2, s_county#7, s_state#8] Keys [2]: [s_state#8, s_county#7] Functions [1]: [partial_sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum#24] -Results [3]: [s_state#8, s_county#7, sum#25] +Aggregate Attributes [1]: [sum#20] +Results [3]: [s_state#8, s_county#7, sum#21] (35) Exchange -Input [3]: [s_state#8, s_county#7, sum#25] -Arguments: hashpartitioning(s_state#8, s_county#7, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [3]: [s_state#8, s_county#7, sum#21] +Arguments: hashpartitioning(s_state#8, s_county#7, 5), ENSURE_REQUIREMENTS, [plan_id=5] (36) HashAggregate [codegen id : 9] -Input [3]: [s_state#8, s_county#7, sum#25] +Input [3]: [s_state#8, s_county#7, sum#21] Keys [2]: [s_state#8, s_county#7] Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#27] -Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#27,17,2) as decimal(27,2)) AS total_sum#28, s_state#8, s_county#7, 0 AS g_state#29, 0 AS g_county#30, 0 AS lochierarchy#31] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#22] +Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#22,17,2) as decimal(27,2)) AS total_sum#23, s_state#8, s_county#7, 0 AS g_state#24, 0 AS g_county#25, 0 AS lochierarchy#26] (37) ReusedExchange [Reuses operator id: 35] -Output [3]: [s_state#8, s_county#7, sum#32] +Output [3]: [s_state#8, s_county#7, sum#27] (38) HashAggregate [codegen id : 18] -Input [3]: [s_state#8, s_county#7, sum#32] +Input [3]: [s_state#8, s_county#7, sum#27] Keys [2]: [s_state#8, s_county#7] Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#27] -Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#27,17,2) AS total_sum#33, s_state#8] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#22] +Results [2]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#22,17,2) AS total_sum#28, s_state#8] (39) HashAggregate [codegen id : 18] -Input [2]: [total_sum#33, s_state#8] +Input [2]: [total_sum#28, s_state#8] Keys [1]: [s_state#8] -Functions [1]: [partial_sum(total_sum#33)] -Aggregate Attributes [2]: [sum#34, isEmpty#35] -Results [3]: [s_state#8, sum#36, isEmpty#37] +Functions [1]: [partial_sum(total_sum#28)] +Aggregate Attributes [2]: [sum#29, isEmpty#30] +Results [3]: [s_state#8, sum#31, isEmpty#32] (40) Exchange -Input [3]: [s_state#8, sum#36, isEmpty#37] -Arguments: hashpartitioning(s_state#8, 5), ENSURE_REQUIREMENTS, [id=#38] +Input [3]: [s_state#8, sum#31, isEmpty#32] +Arguments: hashpartitioning(s_state#8, 5), ENSURE_REQUIREMENTS, [plan_id=6] (41) HashAggregate [codegen id : 19] -Input [3]: [s_state#8, sum#36, isEmpty#37] +Input [3]: [s_state#8, sum#31, isEmpty#32] Keys [1]: [s_state#8] -Functions [1]: [sum(total_sum#33)] -Aggregate Attributes [1]: [sum(total_sum#33)#39] -Results [6]: [sum(total_sum#33)#39 AS total_sum#40, s_state#8, null AS s_county#41, 0 AS g_state#42, 1 AS g_county#43, 1 AS lochierarchy#44] +Functions [1]: [sum(total_sum#28)] +Aggregate Attributes [1]: [sum(total_sum#28)#33] +Results [6]: [sum(total_sum#28)#33 AS total_sum#34, s_state#8, null AS s_county#35, 0 AS g_state#36, 1 AS g_county#37, 1 AS lochierarchy#38] (42) ReusedExchange [Reuses operator id: 35] -Output [3]: [s_state#8, s_county#7, sum#45] +Output [3]: [s_state#8, s_county#7, sum#39] (43) HashAggregate [codegen id : 28] -Input [3]: [s_state#8, s_county#7, sum#45] +Input [3]: [s_state#8, s_county#7, sum#39] Keys [2]: [s_state#8, s_county#7] Functions [1]: [sum(UnscaledValue(ss_net_profit#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#27] -Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#27,17,2) AS total_sum#33] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#22] +Results [1]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#22,17,2) AS total_sum#28] (44) HashAggregate [codegen id : 28] -Input [1]: [total_sum#33] +Input [1]: [total_sum#28] Keys: [] -Functions [1]: [partial_sum(total_sum#33)] -Aggregate Attributes [2]: [sum#46, isEmpty#47] -Results [2]: [sum#48, isEmpty#49] +Functions [1]: [partial_sum(total_sum#28)] +Aggregate Attributes [2]: [sum#40, isEmpty#41] +Results [2]: [sum#42, isEmpty#43] (45) Exchange -Input [2]: [sum#48, isEmpty#49] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#50] +Input [2]: [sum#42, isEmpty#43] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (46) HashAggregate [codegen id : 29] -Input [2]: [sum#48, isEmpty#49] +Input [2]: [sum#42, isEmpty#43] Keys: [] -Functions [1]: [sum(total_sum#33)] -Aggregate Attributes [1]: [sum(total_sum#33)#51] -Results [6]: [sum(total_sum#33)#51 AS total_sum#52, null AS s_state#53, null AS s_county#54, 1 AS g_state#55, 1 AS g_county#56, 2 AS lochierarchy#57] +Functions [1]: [sum(total_sum#28)] +Aggregate Attributes [1]: [sum(total_sum#28)#44] +Results [6]: [sum(total_sum#28)#44 AS total_sum#45, null AS s_state#46, null AS s_county#47, 1 AS g_state#48, 1 AS g_county#49, 2 AS lochierarchy#50] (47) Union (48) HashAggregate [codegen id : 30] -Input [6]: [total_sum#28, s_state#8, s_county#7, g_state#29, g_county#30, lochierarchy#31] -Keys [6]: [total_sum#28, s_state#8, s_county#7, g_state#29, g_county#30, lochierarchy#31] +Input [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] +Keys [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] Functions: [] Aggregate Attributes: [] -Results [6]: [total_sum#28, s_state#8, s_county#7, g_state#29, g_county#30, lochierarchy#31] +Results [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] (49) Exchange -Input [6]: [total_sum#28, s_state#8, s_county#7, g_state#29, g_county#30, lochierarchy#31] -Arguments: hashpartitioning(total_sum#28, s_state#8, s_county#7, g_state#29, g_county#30, lochierarchy#31, 5), ENSURE_REQUIREMENTS, [id=#58] +Input [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] +Arguments: hashpartitioning(total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26, 5), ENSURE_REQUIREMENTS, [plan_id=8] (50) HashAggregate [codegen id : 31] -Input [6]: [total_sum#28, s_state#8, s_county#7, g_state#29, g_county#30, lochierarchy#31] -Keys [6]: [total_sum#28, s_state#8, s_county#7, g_state#29, g_county#30, lochierarchy#31] +Input [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] +Keys [6]: [total_sum#23, s_state#8, s_county#7, g_state#24, g_county#25, lochierarchy#26] Functions: [] Aggregate Attributes: [] -Results [5]: [total_sum#28, s_state#8, s_county#7, lochierarchy#31, CASE WHEN (g_county#30 = 0) THEN s_state#8 END AS _w0#59] +Results [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, CASE WHEN (g_county#25 = 0) THEN s_state#8 END AS _w0#51] (51) Exchange -Input [5]: [total_sum#28, s_state#8, s_county#7, lochierarchy#31, _w0#59] -Arguments: hashpartitioning(lochierarchy#31, _w0#59, 5), ENSURE_REQUIREMENTS, [id=#60] +Input [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, _w0#51] +Arguments: hashpartitioning(lochierarchy#26, _w0#51, 5), ENSURE_REQUIREMENTS, [plan_id=9] (52) Sort [codegen id : 32] -Input [5]: [total_sum#28, s_state#8, s_county#7, lochierarchy#31, _w0#59] -Arguments: [lochierarchy#31 ASC NULLS FIRST, _w0#59 ASC NULLS FIRST, total_sum#28 DESC NULLS LAST], false, 0 +Input [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, _w0#51] +Arguments: [lochierarchy#26 ASC NULLS FIRST, _w0#51 ASC NULLS FIRST, total_sum#23 DESC NULLS LAST], false, 0 (53) Window -Input [5]: [total_sum#28, s_state#8, s_county#7, lochierarchy#31, _w0#59] -Arguments: [rank(total_sum#28) windowspecdefinition(lochierarchy#31, _w0#59, total_sum#28 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#61], [lochierarchy#31, _w0#59], [total_sum#28 DESC NULLS LAST] +Input [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, _w0#51] +Arguments: [rank(total_sum#23) windowspecdefinition(lochierarchy#26, _w0#51, total_sum#23 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#52], [lochierarchy#26, _w0#51], [total_sum#23 DESC NULLS LAST] (54) Project [codegen id : 33] -Output [5]: [total_sum#28, s_state#8, s_county#7, lochierarchy#31, rank_within_parent#61] -Input [6]: [total_sum#28, s_state#8, s_county#7, lochierarchy#31, _w0#59, rank_within_parent#61] +Output [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, rank_within_parent#52] +Input [6]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, _w0#51, rank_within_parent#52] (55) TakeOrderedAndProject -Input [5]: [total_sum#28, s_state#8, s_county#7, lochierarchy#31, rank_within_parent#61] -Arguments: 100, [lochierarchy#31 DESC NULLS LAST, CASE WHEN (lochierarchy#31 = 0) THEN s_state#8 END ASC NULLS FIRST, rank_within_parent#61 ASC NULLS FIRST], [total_sum#28, s_state#8, s_county#7, lochierarchy#31, rank_within_parent#61] +Input [5]: [total_sum#23, s_state#8, s_county#7, lochierarchy#26, rank_within_parent#52] +Arguments: 100, [lochierarchy#26 DESC NULLS LAST, CASE WHEN (lochierarchy#26 = 0) THEN s_state#8 END ASC NULLS FIRST, rank_within_parent#52 ASC NULLS FIRST], [total_sum#23, s_state#8, s_county#7, lochierarchy#26, rank_within_parent#52] ===== Subqueries ===== @@ -332,26 +332,26 @@ BroadcastExchange (60) (56) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_month_seq#62] +Output [2]: [d_date_sk#5, d_month_seq#53] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] ReadSchema: struct (57) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#62] +Input [2]: [d_date_sk#5, d_month_seq#53] (58) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#62] -Condition : (((isnotnull(d_month_seq#62) AND (d_month_seq#62 >= 1212)) AND (d_month_seq#62 <= 1223)) AND isnotnull(d_date_sk#5)) +Input [2]: [d_date_sk#5, d_month_seq#53] +Condition : (((isnotnull(d_month_seq#53) AND (d_month_seq#53 >= 1212)) AND (d_month_seq#53 <= 1223)) AND isnotnull(d_date_sk#5)) (59) Project [codegen id : 1] Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_month_seq#62] +Input [2]: [d_date_sk#5, d_month_seq#53] (60) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#63] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10] Subquery:2 Hosting operator id = 10 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#4 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt index e5e42f2be1366..72274b888a6a2 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt @@ -106,7 +106,7 @@ Input [2]: [hd_demo_sk#10, hd_buy_potential#11] (8) BroadcastExchange Input [1]: [hd_demo_sk#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_bill_hdemo_sk#3] @@ -118,272 +118,272 @@ Output [7]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#4, cs_promo_sk#5, Input [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, hd_demo_sk#10] (11) Scan parquet default.customer_demographics -Output [2]: [cd_demo_sk#13, cd_marital_status#14] +Output [2]: [cd_demo_sk#12, cd_marital_status#13] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,M), IsNotNull(cd_demo_sk)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [2]: [cd_demo_sk#13, cd_marital_status#14] +Input [2]: [cd_demo_sk#12, cd_marital_status#13] (13) Filter [codegen id : 2] -Input [2]: [cd_demo_sk#13, cd_marital_status#14] -Condition : ((isnotnull(cd_marital_status#14) AND (cd_marital_status#14 = M)) AND isnotnull(cd_demo_sk#13)) +Input [2]: [cd_demo_sk#12, cd_marital_status#13] +Condition : ((isnotnull(cd_marital_status#13) AND (cd_marital_status#13 = M)) AND isnotnull(cd_demo_sk#12)) (14) Project [codegen id : 2] -Output [1]: [cd_demo_sk#13] -Input [2]: [cd_demo_sk#13, cd_marital_status#14] +Output [1]: [cd_demo_sk#12] +Input [2]: [cd_demo_sk#12, cd_marital_status#13] (15) BroadcastExchange -Input [1]: [cd_demo_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15] +Input [1]: [cd_demo_sk#12] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_bill_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#13] +Right keys [1]: [cd_demo_sk#12] Join condition: None (17) Project [codegen id : 4] Output [6]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8] -Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, cd_demo_sk#13] +Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, cd_demo_sk#12] (18) Scan parquet default.date_dim -Output [2]: [d_date_sk#16, d_date#17] +Output [2]: [d_date_sk#14, d_date#15] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] -Input [2]: [d_date_sk#16, d_date#17] +Input [2]: [d_date_sk#14, d_date#15] (20) Filter [codegen id : 3] -Input [2]: [d_date_sk#16, d_date#17] -Condition : (isnotnull(d_date#17) AND isnotnull(d_date_sk#16)) +Input [2]: [d_date_sk#14, d_date#15] +Condition : (isnotnull(d_date#15) AND isnotnull(d_date_sk#14)) (21) BroadcastExchange -Input [2]: [d_date_sk#16, d_date#17] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#18] +Input [2]: [d_date_sk#14, d_date#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (22) BroadcastHashJoin [codegen id : 4] Left keys [1]: [cs_ship_date_sk#1] -Right keys [1]: [d_date_sk#16] +Right keys [1]: [d_date_sk#14] Join condition: None (23) Project [codegen id : 4] -Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17] -Input [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date_sk#16, d_date#17] +Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#15] +Input [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date_sk#14, d_date#15] (24) Exchange -Input [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17] -Arguments: hashpartitioning(cs_item_sk#4, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#15] +Arguments: hashpartitioning(cs_item_sk#4, 5), ENSURE_REQUIREMENTS, [plan_id=4] (25) Sort [codegen id : 5] -Input [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17] +Input [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#15] Arguments: [cs_item_sk#4 ASC NULLS FIRST], false, 0 (26) Scan parquet default.item -Output [2]: [i_item_sk#20, i_item_desc#21] +Output [2]: [i_item_sk#16, i_item_desc#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (27) ColumnarToRow [codegen id : 6] -Input [2]: [i_item_sk#20, i_item_desc#21] +Input [2]: [i_item_sk#16, i_item_desc#17] (28) Filter [codegen id : 6] -Input [2]: [i_item_sk#20, i_item_desc#21] -Condition : isnotnull(i_item_sk#20) +Input [2]: [i_item_sk#16, i_item_desc#17] +Condition : isnotnull(i_item_sk#16) (29) Exchange -Input [2]: [i_item_sk#20, i_item_desc#21] -Arguments: hashpartitioning(i_item_sk#20, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [2]: [i_item_sk#16, i_item_desc#17] +Arguments: hashpartitioning(i_item_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=5] (30) Sort [codegen id : 7] -Input [2]: [i_item_sk#20, i_item_desc#21] -Arguments: [i_item_sk#20 ASC NULLS FIRST], false, 0 +Input [2]: [i_item_sk#16, i_item_desc#17] +Arguments: [i_item_sk#16 ASC NULLS FIRST], false, 0 (31) SortMergeJoin [codegen id : 10] Left keys [1]: [cs_item_sk#4] -Right keys [1]: [i_item_sk#20] +Right keys [1]: [i_item_sk#16] Join condition: None (32) Project [codegen id : 10] -Output [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17, i_item_desc#21] -Input [8]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17, i_item_sk#20, i_item_desc#21] +Output [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#15, i_item_desc#17] +Input [8]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#15, i_item_sk#16, i_item_desc#17] (33) ReusedExchange [Reuses operator id: 81] -Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26] +Output [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_date_sk#21] (34) BroadcastHashJoin [codegen id : 10] Left keys [1]: [cs_sold_date_sk#8] -Right keys [1]: [d_date_sk#23] -Join condition: (d_date#17 > date_add(d_date#24, 5)) +Right keys [1]: [d_date_sk#18] +Join condition: (d_date#15 > date_add(d_date#19, 5)) (35) Project [codegen id : 10] -Output [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26] -Input [11]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#17, i_item_desc#21, d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26] +Output [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#17, d_week_seq#20, d_date_sk#21] +Input [11]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#15, i_item_desc#17, d_date_sk#18, d_date#19, d_week_seq#20, d_date_sk#21] (36) Exchange -Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26] -Arguments: hashpartitioning(cs_item_sk#4, d_date_sk#26, 5), ENSURE_REQUIREMENTS, [id=#27] +Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#17, d_week_seq#20, d_date_sk#21] +Arguments: hashpartitioning(cs_item_sk#4, d_date_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=6] (37) Sort [codegen id : 11] -Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26] -Arguments: [cs_item_sk#4 ASC NULLS FIRST, d_date_sk#26 ASC NULLS FIRST], false, 0 +Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#17, d_week_seq#20, d_date_sk#21] +Arguments: [cs_item_sk#4 ASC NULLS FIRST, d_date_sk#21 ASC NULLS FIRST], false, 0 (38) Scan parquet default.inventory -Output [4]: [inv_item_sk#28, inv_warehouse_sk#29, inv_quantity_on_hand#30, inv_date_sk#31] +Output [4]: [inv_item_sk#22, inv_warehouse_sk#23, inv_quantity_on_hand#24, inv_date_sk#25] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(inv_date_sk#31), dynamicpruningexpression(true)] +PartitionFilters: [isnotnull(inv_date_sk#25), dynamicpruningexpression(true)] PushedFilters: [IsNotNull(inv_quantity_on_hand), IsNotNull(inv_item_sk), IsNotNull(inv_warehouse_sk)] ReadSchema: struct (39) ColumnarToRow [codegen id : 13] -Input [4]: [inv_item_sk#28, inv_warehouse_sk#29, inv_quantity_on_hand#30, inv_date_sk#31] +Input [4]: [inv_item_sk#22, inv_warehouse_sk#23, inv_quantity_on_hand#24, inv_date_sk#25] (40) Filter [codegen id : 13] -Input [4]: [inv_item_sk#28, inv_warehouse_sk#29, inv_quantity_on_hand#30, inv_date_sk#31] -Condition : ((isnotnull(inv_quantity_on_hand#30) AND isnotnull(inv_item_sk#28)) AND isnotnull(inv_warehouse_sk#29)) +Input [4]: [inv_item_sk#22, inv_warehouse_sk#23, inv_quantity_on_hand#24, inv_date_sk#25] +Condition : ((isnotnull(inv_quantity_on_hand#24) AND isnotnull(inv_item_sk#22)) AND isnotnull(inv_warehouse_sk#23)) (41) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#32, w_warehouse_name#33] +Output [2]: [w_warehouse_sk#26, w_warehouse_name#27] Batched: true Location [not included in comparison]/{warehouse_dir}/warehouse] PushedFilters: [IsNotNull(w_warehouse_sk)] ReadSchema: struct (42) ColumnarToRow [codegen id : 12] -Input [2]: [w_warehouse_sk#32, w_warehouse_name#33] +Input [2]: [w_warehouse_sk#26, w_warehouse_name#27] (43) Filter [codegen id : 12] -Input [2]: [w_warehouse_sk#32, w_warehouse_name#33] -Condition : isnotnull(w_warehouse_sk#32) +Input [2]: [w_warehouse_sk#26, w_warehouse_name#27] +Condition : isnotnull(w_warehouse_sk#26) (44) BroadcastExchange -Input [2]: [w_warehouse_sk#32, w_warehouse_name#33] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#34] +Input [2]: [w_warehouse_sk#26, w_warehouse_name#27] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] (45) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [inv_warehouse_sk#29] -Right keys [1]: [w_warehouse_sk#32] +Left keys [1]: [inv_warehouse_sk#23] +Right keys [1]: [w_warehouse_sk#26] Join condition: None (46) Project [codegen id : 13] -Output [4]: [inv_item_sk#28, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_name#33] -Input [6]: [inv_item_sk#28, inv_warehouse_sk#29, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_sk#32, w_warehouse_name#33] +Output [4]: [inv_item_sk#22, inv_quantity_on_hand#24, inv_date_sk#25, w_warehouse_name#27] +Input [6]: [inv_item_sk#22, inv_warehouse_sk#23, inv_quantity_on_hand#24, inv_date_sk#25, w_warehouse_sk#26, w_warehouse_name#27] (47) Exchange -Input [4]: [inv_item_sk#28, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_name#33] -Arguments: hashpartitioning(inv_item_sk#28, inv_date_sk#31, 5), ENSURE_REQUIREMENTS, [id=#35] +Input [4]: [inv_item_sk#22, inv_quantity_on_hand#24, inv_date_sk#25, w_warehouse_name#27] +Arguments: hashpartitioning(inv_item_sk#22, inv_date_sk#25, 5), ENSURE_REQUIREMENTS, [plan_id=8] (48) Sort [codegen id : 14] -Input [4]: [inv_item_sk#28, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_name#33] -Arguments: [inv_item_sk#28 ASC NULLS FIRST, inv_date_sk#31 ASC NULLS FIRST], false, 0 +Input [4]: [inv_item_sk#22, inv_quantity_on_hand#24, inv_date_sk#25, w_warehouse_name#27] +Arguments: [inv_item_sk#22 ASC NULLS FIRST, inv_date_sk#25 ASC NULLS FIRST], false, 0 (49) SortMergeJoin [codegen id : 16] -Left keys [2]: [cs_item_sk#4, d_date_sk#26] -Right keys [2]: [inv_item_sk#28, inv_date_sk#31] -Join condition: (inv_quantity_on_hand#30 < cs_quantity#7) +Left keys [2]: [cs_item_sk#4, d_date_sk#21] +Right keys [2]: [inv_item_sk#22, inv_date_sk#25] +Join condition: (inv_quantity_on_hand#24 < cs_quantity#7) (50) Project [codegen id : 16] -Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25] -Input [11]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#21, d_week_seq#25, d_date_sk#26, inv_item_sk#28, inv_quantity_on_hand#30, inv_date_sk#31, w_warehouse_name#33] +Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#27, i_item_desc#17, d_week_seq#20] +Input [11]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#17, d_week_seq#20, d_date_sk#21, inv_item_sk#22, inv_quantity_on_hand#24, inv_date_sk#25, w_warehouse_name#27] (51) Scan parquet default.promotion -Output [1]: [p_promo_sk#36] +Output [1]: [p_promo_sk#28] Batched: true Location [not included in comparison]/{warehouse_dir}/promotion] PushedFilters: [IsNotNull(p_promo_sk)] ReadSchema: struct (52) ColumnarToRow [codegen id : 15] -Input [1]: [p_promo_sk#36] +Input [1]: [p_promo_sk#28] (53) Filter [codegen id : 15] -Input [1]: [p_promo_sk#36] -Condition : isnotnull(p_promo_sk#36) +Input [1]: [p_promo_sk#28] +Condition : isnotnull(p_promo_sk#28) (54) BroadcastExchange -Input [1]: [p_promo_sk#36] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#37] +Input [1]: [p_promo_sk#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] (55) BroadcastHashJoin [codegen id : 16] Left keys [1]: [cs_promo_sk#5] -Right keys [1]: [p_promo_sk#36] +Right keys [1]: [p_promo_sk#28] Join condition: None (56) Project [codegen id : 16] -Output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25] -Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25, p_promo_sk#36] +Output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#27, i_item_desc#17, d_week_seq#20] +Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#27, i_item_desc#17, d_week_seq#20, p_promo_sk#28] (57) Exchange -Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25] -Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, [id=#38] +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#27, i_item_desc#17, d_week_seq#20] +Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, [plan_id=10] (58) Sort [codegen id : 17] -Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25] +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#27, i_item_desc#17, d_week_seq#20] Arguments: [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST], false, 0 (59) Scan parquet default.catalog_returns -Output [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41] +Output [3]: [cr_item_sk#29, cr_order_number#30, cr_returned_date_sk#31] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] ReadSchema: struct (60) ColumnarToRow [codegen id : 18] -Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41] +Input [3]: [cr_item_sk#29, cr_order_number#30, cr_returned_date_sk#31] (61) Filter [codegen id : 18] -Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41] -Condition : (isnotnull(cr_item_sk#39) AND isnotnull(cr_order_number#40)) +Input [3]: [cr_item_sk#29, cr_order_number#30, cr_returned_date_sk#31] +Condition : (isnotnull(cr_item_sk#29) AND isnotnull(cr_order_number#30)) (62) Project [codegen id : 18] -Output [2]: [cr_item_sk#39, cr_order_number#40] -Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41] +Output [2]: [cr_item_sk#29, cr_order_number#30] +Input [3]: [cr_item_sk#29, cr_order_number#30, cr_returned_date_sk#31] (63) Exchange -Input [2]: [cr_item_sk#39, cr_order_number#40] -Arguments: hashpartitioning(cr_item_sk#39, cr_order_number#40, 5), ENSURE_REQUIREMENTS, [id=#42] +Input [2]: [cr_item_sk#29, cr_order_number#30] +Arguments: hashpartitioning(cr_item_sk#29, cr_order_number#30, 5), ENSURE_REQUIREMENTS, [plan_id=11] (64) Sort [codegen id : 19] -Input [2]: [cr_item_sk#39, cr_order_number#40] -Arguments: [cr_item_sk#39 ASC NULLS FIRST, cr_order_number#40 ASC NULLS FIRST], false, 0 +Input [2]: [cr_item_sk#29, cr_order_number#30] +Arguments: [cr_item_sk#29 ASC NULLS FIRST, cr_order_number#30 ASC NULLS FIRST], false, 0 (65) SortMergeJoin [codegen id : 20] Left keys [2]: [cs_item_sk#4, cs_order_number#6] -Right keys [2]: [cr_item_sk#39, cr_order_number#40] +Right keys [2]: [cr_item_sk#29, cr_order_number#30] Join condition: None (66) Project [codegen id : 20] -Output [3]: [w_warehouse_name#33, i_item_desc#21, d_week_seq#25] -Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#33, i_item_desc#21, d_week_seq#25, cr_item_sk#39, cr_order_number#40] +Output [3]: [w_warehouse_name#27, i_item_desc#17, d_week_seq#20] +Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#27, i_item_desc#17, d_week_seq#20, cr_item_sk#29, cr_order_number#30] (67) HashAggregate [codegen id : 20] -Input [3]: [w_warehouse_name#33, i_item_desc#21, d_week_seq#25] -Keys [3]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25] +Input [3]: [w_warehouse_name#27, i_item_desc#17, d_week_seq#20] +Keys [3]: [i_item_desc#17, w_warehouse_name#27, d_week_seq#20] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#43] -Results [4]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, count#44] +Aggregate Attributes [1]: [count#32] +Results [4]: [i_item_desc#17, w_warehouse_name#27, d_week_seq#20, count#33] (68) Exchange -Input [4]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, count#44] -Arguments: hashpartitioning(i_item_desc#21, w_warehouse_name#33, d_week_seq#25, 5), ENSURE_REQUIREMENTS, [id=#45] +Input [4]: [i_item_desc#17, w_warehouse_name#27, d_week_seq#20, count#33] +Arguments: hashpartitioning(i_item_desc#17, w_warehouse_name#27, d_week_seq#20, 5), ENSURE_REQUIREMENTS, [plan_id=12] (69) HashAggregate [codegen id : 21] -Input [4]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, count#44] -Keys [3]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25] +Input [4]: [i_item_desc#17, w_warehouse_name#27, d_week_seq#20, count#33] +Keys [3]: [i_item_desc#17, w_warehouse_name#27, d_week_seq#20] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#46] -Results [6]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, count(1)#46 AS no_promo#47, count(1)#46 AS promo#48, count(1)#46 AS total_cnt#49] +Aggregate Attributes [1]: [count(1)#34] +Results [6]: [i_item_desc#17, w_warehouse_name#27, d_week_seq#20, count(1)#34 AS no_promo#35, count(1)#34 AS promo#36, count(1)#34 AS total_cnt#37] (70) TakeOrderedAndProject -Input [6]: [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, no_promo#47, promo#48, total_cnt#49] -Arguments: 100, [total_cnt#49 DESC NULLS LAST, i_item_desc#21 ASC NULLS FIRST, w_warehouse_name#33 ASC NULLS FIRST, d_week_seq#25 ASC NULLS FIRST], [i_item_desc#21, w_warehouse_name#33, d_week_seq#25, no_promo#47, promo#48, total_cnt#49] +Input [6]: [i_item_desc#17, w_warehouse_name#27, d_week_seq#20, no_promo#35, promo#36, total_cnt#37] +Arguments: 100, [total_cnt#37 DESC NULLS LAST, i_item_desc#17 ASC NULLS FIRST, w_warehouse_name#27 ASC NULLS FIRST, d_week_seq#20 ASC NULLS FIRST], [i_item_desc#17, w_warehouse_name#27, d_week_seq#20, no_promo#35, promo#36, total_cnt#37] ===== Subqueries ===== @@ -402,52 +402,52 @@ BroadcastExchange (81) (71) Scan parquet default.date_dim -Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#50] +Output [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_year#38] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] ReadSchema: struct (72) ColumnarToRow [codegen id : 1] -Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#50] +Input [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_year#38] (73) Filter [codegen id : 1] -Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#50] -Condition : ((((isnotnull(d_year#50) AND (d_year#50 = 2001)) AND isnotnull(d_date_sk#23)) AND isnotnull(d_week_seq#25)) AND isnotnull(d_date#24)) +Input [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_year#38] +Condition : ((((isnotnull(d_year#38) AND (d_year#38 = 2001)) AND isnotnull(d_date_sk#18)) AND isnotnull(d_week_seq#20)) AND isnotnull(d_date#19)) (74) Project [codegen id : 1] -Output [3]: [d_date_sk#23, d_date#24, d_week_seq#25] -Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_year#50] +Output [3]: [d_date_sk#18, d_date#19, d_week_seq#20] +Input [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_year#38] (75) BroadcastExchange -Input [3]: [d_date_sk#23, d_date#24, d_week_seq#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [id=#51] +Input [3]: [d_date_sk#18, d_date#19, d_week_seq#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=13] (76) Scan parquet default.date_dim -Output [2]: [d_date_sk#26, d_week_seq#52] +Output [2]: [d_date_sk#21, d_week_seq#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] ReadSchema: struct (77) ColumnarToRow -Input [2]: [d_date_sk#26, d_week_seq#52] +Input [2]: [d_date_sk#21, d_week_seq#39] (78) Filter -Input [2]: [d_date_sk#26, d_week_seq#52] -Condition : (isnotnull(d_week_seq#52) AND isnotnull(d_date_sk#26)) +Input [2]: [d_date_sk#21, d_week_seq#39] +Condition : (isnotnull(d_week_seq#39) AND isnotnull(d_date_sk#21)) (79) BroadcastHashJoin [codegen id : 2] -Left keys [1]: [d_week_seq#25] -Right keys [1]: [d_week_seq#52] +Left keys [1]: [d_week_seq#20] +Right keys [1]: [d_week_seq#39] Join condition: None (80) Project [codegen id : 2] -Output [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26] -Input [5]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26, d_week_seq#52] +Output [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_date_sk#21] +Input [5]: [d_date_sk#18, d_date#19, d_week_seq#20, d_date_sk#21, d_week_seq#39] (81) BroadcastExchange -Input [4]: [d_date_sk#23, d_date#24, d_week_seq#25, d_date_sk#26] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#53] +Input [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_date_sk#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/explain.txt index 33ef0ab30ebc9..d2ed2bf03f1d5 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/explain.txt @@ -103,7 +103,7 @@ Condition : ((isnotnull(inv_quantity_on_hand#12) AND isnotnull(inv_item_sk#10)) (7) BroadcastExchange Input [4]: [inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12, inv_date_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 10] Left keys [1]: [cs_item_sk#4] @@ -115,278 +115,278 @@ Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_ Input [12]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12, inv_date_sk#13] (10) Scan parquet default.warehouse -Output [2]: [w_warehouse_sk#15, w_warehouse_name#16] +Output [2]: [w_warehouse_sk#14, w_warehouse_name#15] Batched: true Location [not included in comparison]/{warehouse_dir}/warehouse] PushedFilters: [IsNotNull(w_warehouse_sk)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] +Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] (12) Filter [codegen id : 2] -Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] -Condition : isnotnull(w_warehouse_sk#15) +Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] +Condition : isnotnull(w_warehouse_sk#14) (13) BroadcastExchange -Input [2]: [w_warehouse_sk#15, w_warehouse_name#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#17] +Input [2]: [w_warehouse_sk#14, w_warehouse_name#15] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 10] Left keys [1]: [inv_warehouse_sk#11] -Right keys [1]: [w_warehouse_sk#15] +Right keys [1]: [w_warehouse_sk#14] Join condition: None (15) Project [codegen id : 10] -Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#16] -Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#11, inv_date_sk#13, w_warehouse_sk#15, w_warehouse_name#16] +Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#11, inv_date_sk#13, w_warehouse_sk#14, w_warehouse_name#15] (16) Scan parquet default.item -Output [2]: [i_item_sk#18, i_item_desc#19] +Output [2]: [i_item_sk#16, i_item_desc#17] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_item_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 3] -Input [2]: [i_item_sk#18, i_item_desc#19] +Input [2]: [i_item_sk#16, i_item_desc#17] (18) Filter [codegen id : 3] -Input [2]: [i_item_sk#18, i_item_desc#19] -Condition : isnotnull(i_item_sk#18) +Input [2]: [i_item_sk#16, i_item_desc#17] +Condition : isnotnull(i_item_sk#16) (19) BroadcastExchange -Input [2]: [i_item_sk#18, i_item_desc#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#20] +Input [2]: [i_item_sk#16, i_item_desc#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (20) BroadcastHashJoin [codegen id : 10] Left keys [1]: [cs_item_sk#4] -Right keys [1]: [i_item_sk#18] +Right keys [1]: [i_item_sk#16] Join condition: None (21) Project [codegen id : 10] -Output [10]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#16, i_item_desc#19] -Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#16, i_item_sk#18, i_item_desc#19] +Output [10]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_sk#16, i_item_desc#17] (22) Scan parquet default.customer_demographics -Output [2]: [cd_demo_sk#21, cd_marital_status#22] +Output [2]: [cd_demo_sk#18, cd_marital_status#19] Batched: true Location [not included in comparison]/{warehouse_dir}/customer_demographics] PushedFilters: [IsNotNull(cd_marital_status), EqualTo(cd_marital_status,M), IsNotNull(cd_demo_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 4] -Input [2]: [cd_demo_sk#21, cd_marital_status#22] +Input [2]: [cd_demo_sk#18, cd_marital_status#19] (24) Filter [codegen id : 4] -Input [2]: [cd_demo_sk#21, cd_marital_status#22] -Condition : ((isnotnull(cd_marital_status#22) AND (cd_marital_status#22 = M)) AND isnotnull(cd_demo_sk#21)) +Input [2]: [cd_demo_sk#18, cd_marital_status#19] +Condition : ((isnotnull(cd_marital_status#19) AND (cd_marital_status#19 = M)) AND isnotnull(cd_demo_sk#18)) (25) Project [codegen id : 4] -Output [1]: [cd_demo_sk#21] -Input [2]: [cd_demo_sk#21, cd_marital_status#22] +Output [1]: [cd_demo_sk#18] +Input [2]: [cd_demo_sk#18, cd_marital_status#19] (26) BroadcastExchange -Input [1]: [cd_demo_sk#21] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23] +Input [1]: [cd_demo_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (27) BroadcastHashJoin [codegen id : 10] Left keys [1]: [cs_bill_cdemo_sk#2] -Right keys [1]: [cd_demo_sk#21] +Right keys [1]: [cd_demo_sk#18] Join condition: None (28) Project [codegen id : 10] -Output [9]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#16, i_item_desc#19] -Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#16, i_item_desc#19, cd_demo_sk#21] +Output [9]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17] +Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, cd_demo_sk#18] (29) Scan parquet default.household_demographics -Output [2]: [hd_demo_sk#24, hd_buy_potential#25] +Output [2]: [hd_demo_sk#20, hd_buy_potential#21] Batched: true Location [not included in comparison]/{warehouse_dir}/household_demographics] PushedFilters: [IsNotNull(hd_buy_potential), EqualTo(hd_buy_potential,1001-5000 ), IsNotNull(hd_demo_sk)] ReadSchema: struct (30) ColumnarToRow [codegen id : 5] -Input [2]: [hd_demo_sk#24, hd_buy_potential#25] +Input [2]: [hd_demo_sk#20, hd_buy_potential#21] (31) Filter [codegen id : 5] -Input [2]: [hd_demo_sk#24, hd_buy_potential#25] -Condition : ((isnotnull(hd_buy_potential#25) AND (hd_buy_potential#25 = 1001-5000 )) AND isnotnull(hd_demo_sk#24)) +Input [2]: [hd_demo_sk#20, hd_buy_potential#21] +Condition : ((isnotnull(hd_buy_potential#21) AND (hd_buy_potential#21 = 1001-5000 )) AND isnotnull(hd_demo_sk#20)) (32) Project [codegen id : 5] -Output [1]: [hd_demo_sk#24] -Input [2]: [hd_demo_sk#24, hd_buy_potential#25] +Output [1]: [hd_demo_sk#20] +Input [2]: [hd_demo_sk#20, hd_buy_potential#21] (33) BroadcastExchange -Input [1]: [hd_demo_sk#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Input [1]: [hd_demo_sk#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (34) BroadcastHashJoin [codegen id : 10] Left keys [1]: [cs_bill_hdemo_sk#3] -Right keys [1]: [hd_demo_sk#24] +Right keys [1]: [hd_demo_sk#20] Join condition: None (35) Project [codegen id : 10] -Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#16, i_item_desc#19] -Input [10]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#16, i_item_desc#19, hd_demo_sk#24] +Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17] +Input [10]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, hd_demo_sk#20] (36) ReusedExchange [Reuses operator id: 75] -Output [3]: [d_date_sk#27, d_date#28, d_week_seq#29] +Output [3]: [d_date_sk#22, d_date#23, d_week_seq#24] (37) BroadcastHashJoin [codegen id : 10] Left keys [1]: [cs_sold_date_sk#8] -Right keys [1]: [d_date_sk#27] +Right keys [1]: [d_date_sk#22] Join condition: None (38) Project [codegen id : 10] -Output [9]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#13, w_warehouse_name#16, i_item_desc#19, d_date#28, d_week_seq#29] -Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#16, i_item_desc#19, d_date_sk#27, d_date#28, d_week_seq#29] +Output [9]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, d_date#23, d_week_seq#24] +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, d_date_sk#22, d_date#23, d_week_seq#24] (39) Scan parquet default.date_dim -Output [2]: [d_date_sk#30, d_week_seq#31] +Output [2]: [d_date_sk#25, d_week_seq#26] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)] ReadSchema: struct (40) ColumnarToRow [codegen id : 7] -Input [2]: [d_date_sk#30, d_week_seq#31] +Input [2]: [d_date_sk#25, d_week_seq#26] (41) Filter [codegen id : 7] -Input [2]: [d_date_sk#30, d_week_seq#31] -Condition : (isnotnull(d_week_seq#31) AND isnotnull(d_date_sk#30)) +Input [2]: [d_date_sk#25, d_week_seq#26] +Condition : (isnotnull(d_week_seq#26) AND isnotnull(d_date_sk#25)) (42) BroadcastExchange -Input [2]: [d_date_sk#30, d_week_seq#31] -Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [id=#32] +Input [2]: [d_date_sk#25, d_week_seq#26] +Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false] as bigint), 32) | (cast(input[0, int, false] as bigint) & 4294967295))),false), [plan_id=6] (43) BroadcastHashJoin [codegen id : 10] -Left keys [2]: [d_week_seq#29, inv_date_sk#13] -Right keys [2]: [d_week_seq#31, d_date_sk#30] +Left keys [2]: [d_week_seq#24, inv_date_sk#13] +Right keys [2]: [d_week_seq#26, d_date_sk#25] Join condition: None (44) Project [codegen id : 10] -Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#16, i_item_desc#19, d_date#28, d_week_seq#29] -Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#13, w_warehouse_name#16, i_item_desc#19, d_date#28, d_week_seq#29, d_date_sk#30, d_week_seq#31] +Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_date#23, d_week_seq#24] +Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, d_date#23, d_week_seq#24, d_date_sk#25, d_week_seq#26] (45) Scan parquet default.date_dim -Output [2]: [d_date_sk#33, d_date#34] +Output [2]: [d_date_sk#27, d_date#28] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), IsNotNull(d_date_sk)] ReadSchema: struct (46) ColumnarToRow [codegen id : 8] -Input [2]: [d_date_sk#33, d_date#34] +Input [2]: [d_date_sk#27, d_date#28] (47) Filter [codegen id : 8] -Input [2]: [d_date_sk#33, d_date#34] -Condition : (isnotnull(d_date#34) AND isnotnull(d_date_sk#33)) +Input [2]: [d_date_sk#27, d_date#28] +Condition : (isnotnull(d_date#28) AND isnotnull(d_date_sk#27)) (48) BroadcastExchange -Input [2]: [d_date_sk#33, d_date#34] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#35] +Input [2]: [d_date_sk#27, d_date#28] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7] (49) BroadcastHashJoin [codegen id : 10] Left keys [1]: [cs_ship_date_sk#1] -Right keys [1]: [d_date_sk#33] -Join condition: (d_date#34 > date_add(d_date#28, 5)) +Right keys [1]: [d_date_sk#27] +Join condition: (d_date#28 > date_add(d_date#23, 5)) (50) Project [codegen id : 10] -Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#16, i_item_desc#19, d_week_seq#29] -Input [10]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#16, i_item_desc#19, d_date#28, d_week_seq#29, d_date_sk#33, d_date#34] +Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_week_seq#24] +Input [10]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_date#23, d_week_seq#24, d_date_sk#27, d_date#28] (51) Scan parquet default.promotion -Output [1]: [p_promo_sk#36] +Output [1]: [p_promo_sk#29] Batched: true Location [not included in comparison]/{warehouse_dir}/promotion] PushedFilters: [IsNotNull(p_promo_sk)] ReadSchema: struct (52) ColumnarToRow [codegen id : 9] -Input [1]: [p_promo_sk#36] +Input [1]: [p_promo_sk#29] (53) Filter [codegen id : 9] -Input [1]: [p_promo_sk#36] -Condition : isnotnull(p_promo_sk#36) +Input [1]: [p_promo_sk#29] +Condition : isnotnull(p_promo_sk#29) (54) BroadcastExchange -Input [1]: [p_promo_sk#36] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#37] +Input [1]: [p_promo_sk#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] (55) BroadcastHashJoin [codegen id : 10] Left keys [1]: [cs_promo_sk#5] -Right keys [1]: [p_promo_sk#36] +Right keys [1]: [p_promo_sk#29] Join condition: None (56) Project [codegen id : 10] -Output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#16, i_item_desc#19, d_week_seq#29] -Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#16, i_item_desc#19, d_week_seq#29, p_promo_sk#36] +Output [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_week_seq#24] +Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_week_seq#24, p_promo_sk#29] (57) Exchange -Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#16, i_item_desc#19, d_week_seq#29] -Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, [id=#38] +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_week_seq#24] +Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREMENTS, [plan_id=9] (58) Sort [codegen id : 11] -Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#16, i_item_desc#19, d_week_seq#29] +Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_week_seq#24] Arguments: [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST], false, 0 (59) Scan parquet default.catalog_returns -Output [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41] +Output [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] ReadSchema: struct (60) ColumnarToRow [codegen id : 12] -Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41] +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] (61) Filter [codegen id : 12] -Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41] -Condition : (isnotnull(cr_item_sk#39) AND isnotnull(cr_order_number#40)) +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] +Condition : (isnotnull(cr_item_sk#30) AND isnotnull(cr_order_number#31)) (62) Project [codegen id : 12] -Output [2]: [cr_item_sk#39, cr_order_number#40] -Input [3]: [cr_item_sk#39, cr_order_number#40, cr_returned_date_sk#41] +Output [2]: [cr_item_sk#30, cr_order_number#31] +Input [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32] (63) Exchange -Input [2]: [cr_item_sk#39, cr_order_number#40] -Arguments: hashpartitioning(cr_item_sk#39, cr_order_number#40, 5), ENSURE_REQUIREMENTS, [id=#42] +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: hashpartitioning(cr_item_sk#30, cr_order_number#31, 5), ENSURE_REQUIREMENTS, [plan_id=10] (64) Sort [codegen id : 13] -Input [2]: [cr_item_sk#39, cr_order_number#40] -Arguments: [cr_item_sk#39 ASC NULLS FIRST, cr_order_number#40 ASC NULLS FIRST], false, 0 +Input [2]: [cr_item_sk#30, cr_order_number#31] +Arguments: [cr_item_sk#30 ASC NULLS FIRST, cr_order_number#31 ASC NULLS FIRST], false, 0 (65) SortMergeJoin [codegen id : 14] Left keys [2]: [cs_item_sk#4, cs_order_number#6] -Right keys [2]: [cr_item_sk#39, cr_order_number#40] +Right keys [2]: [cr_item_sk#30, cr_order_number#31] Join condition: None (66) Project [codegen id : 14] -Output [3]: [w_warehouse_name#16, i_item_desc#19, d_week_seq#29] -Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#16, i_item_desc#19, d_week_seq#29, cr_item_sk#39, cr_order_number#40] +Output [3]: [w_warehouse_name#15, i_item_desc#17, d_week_seq#24] +Input [7]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_week_seq#24, cr_item_sk#30, cr_order_number#31] (67) HashAggregate [codegen id : 14] -Input [3]: [w_warehouse_name#16, i_item_desc#19, d_week_seq#29] -Keys [3]: [i_item_desc#19, w_warehouse_name#16, d_week_seq#29] +Input [3]: [w_warehouse_name#15, i_item_desc#17, d_week_seq#24] +Keys [3]: [i_item_desc#17, w_warehouse_name#15, d_week_seq#24] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#43] -Results [4]: [i_item_desc#19, w_warehouse_name#16, d_week_seq#29, count#44] +Aggregate Attributes [1]: [count#33] +Results [4]: [i_item_desc#17, w_warehouse_name#15, d_week_seq#24, count#34] (68) Exchange -Input [4]: [i_item_desc#19, w_warehouse_name#16, d_week_seq#29, count#44] -Arguments: hashpartitioning(i_item_desc#19, w_warehouse_name#16, d_week_seq#29, 5), ENSURE_REQUIREMENTS, [id=#45] +Input [4]: [i_item_desc#17, w_warehouse_name#15, d_week_seq#24, count#34] +Arguments: hashpartitioning(i_item_desc#17, w_warehouse_name#15, d_week_seq#24, 5), ENSURE_REQUIREMENTS, [plan_id=11] (69) HashAggregate [codegen id : 15] -Input [4]: [i_item_desc#19, w_warehouse_name#16, d_week_seq#29, count#44] -Keys [3]: [i_item_desc#19, w_warehouse_name#16, d_week_seq#29] +Input [4]: [i_item_desc#17, w_warehouse_name#15, d_week_seq#24, count#34] +Keys [3]: [i_item_desc#17, w_warehouse_name#15, d_week_seq#24] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#46] -Results [6]: [i_item_desc#19, w_warehouse_name#16, d_week_seq#29, count(1)#46 AS no_promo#47, count(1)#46 AS promo#48, count(1)#46 AS total_cnt#49] +Aggregate Attributes [1]: [count(1)#35] +Results [6]: [i_item_desc#17, w_warehouse_name#15, d_week_seq#24, count(1)#35 AS no_promo#36, count(1)#35 AS promo#37, count(1)#35 AS total_cnt#38] (70) TakeOrderedAndProject -Input [6]: [i_item_desc#19, w_warehouse_name#16, d_week_seq#29, no_promo#47, promo#48, total_cnt#49] -Arguments: 100, [total_cnt#49 DESC NULLS LAST, i_item_desc#19 ASC NULLS FIRST, w_warehouse_name#16 ASC NULLS FIRST, d_week_seq#29 ASC NULLS FIRST], [i_item_desc#19, w_warehouse_name#16, d_week_seq#29, no_promo#47, promo#48, total_cnt#49] +Input [6]: [i_item_desc#17, w_warehouse_name#15, d_week_seq#24, no_promo#36, promo#37, total_cnt#38] +Arguments: 100, [total_cnt#38 DESC NULLS LAST, i_item_desc#17 ASC NULLS FIRST, w_warehouse_name#15 ASC NULLS FIRST, d_week_seq#24 ASC NULLS FIRST], [i_item_desc#17, w_warehouse_name#15, d_week_seq#24, no_promo#36, promo#37, total_cnt#38] ===== Subqueries ===== @@ -399,25 +399,25 @@ BroadcastExchange (75) (71) Scan parquet default.date_dim -Output [4]: [d_date_sk#27, d_date#28, d_week_seq#29, d_year#50] +Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#39] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk), IsNotNull(d_week_seq), IsNotNull(d_date)] ReadSchema: struct (72) ColumnarToRow [codegen id : 1] -Input [4]: [d_date_sk#27, d_date#28, d_week_seq#29, d_year#50] +Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#39] (73) Filter [codegen id : 1] -Input [4]: [d_date_sk#27, d_date#28, d_week_seq#29, d_year#50] -Condition : ((((isnotnull(d_year#50) AND (d_year#50 = 2001)) AND isnotnull(d_date_sk#27)) AND isnotnull(d_week_seq#29)) AND isnotnull(d_date#28)) +Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#39] +Condition : ((((isnotnull(d_year#39) AND (d_year#39 = 2001)) AND isnotnull(d_date_sk#22)) AND isnotnull(d_week_seq#24)) AND isnotnull(d_date#23)) (74) Project [codegen id : 1] -Output [3]: [d_date_sk#27, d_date#28, d_week_seq#29] -Input [4]: [d_date_sk#27, d_date#28, d_week_seq#29, d_year#50] +Output [3]: [d_date_sk#22, d_date#23, d_week_seq#24] +Input [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#39] (75) BroadcastExchange -Input [3]: [d_date_sk#27, d_date#28, d_week_seq#29] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#51] +Input [3]: [d_date_sk#22, d_date#23, d_week_seq#24] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=12] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/explain.txt index 7ee6ada91dfea..8e9e50cc2f0a2 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/explain.txt @@ -109,334 +109,334 @@ Input [5]: [ss_customer_sk#1, ss_net_paid#2, ss_sold_date_sk#3, d_date_sk#5, d_y (7) Exchange Input [3]: [ss_customer_sk#1, ss_net_paid#2, d_year#6] -Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#7] +Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (8) Sort [codegen id : 3] Input [3]: [ss_customer_sk#1, ss_net_paid#2, d_year#6] Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0 (9) Scan parquet default.customer -Output [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] +Output [4]: [c_customer_sk#7, c_customer_id#8, c_first_name#9, c_last_name#10] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (10) ColumnarToRow [codegen id : 4] -Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] +Input [4]: [c_customer_sk#7, c_customer_id#8, c_first_name#9, c_last_name#10] (11) Filter [codegen id : 4] -Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] -Condition : (isnotnull(c_customer_sk#8) AND isnotnull(c_customer_id#9)) +Input [4]: [c_customer_sk#7, c_customer_id#8, c_first_name#9, c_last_name#10] +Condition : (isnotnull(c_customer_sk#7) AND isnotnull(c_customer_id#8)) (12) Exchange -Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] -Arguments: hashpartitioning(c_customer_sk#8, 5), ENSURE_REQUIREMENTS, [id=#12] +Input [4]: [c_customer_sk#7, c_customer_id#8, c_first_name#9, c_last_name#10] +Arguments: hashpartitioning(c_customer_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (13) Sort [codegen id : 5] -Input [4]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] -Arguments: [c_customer_sk#8 ASC NULLS FIRST], false, 0 +Input [4]: [c_customer_sk#7, c_customer_id#8, c_first_name#9, c_last_name#10] +Arguments: [c_customer_sk#7 ASC NULLS FIRST], false, 0 (14) SortMergeJoin [codegen id : 6] Left keys [1]: [ss_customer_sk#1] -Right keys [1]: [c_customer_sk#8] +Right keys [1]: [c_customer_sk#7] Join condition: None (15) Project [codegen id : 6] -Output [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ss_net_paid#2, d_year#6] -Input [7]: [ss_customer_sk#1, ss_net_paid#2, d_year#6, c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11] +Output [5]: [c_customer_id#8, c_first_name#9, c_last_name#10, ss_net_paid#2, d_year#6] +Input [7]: [ss_customer_sk#1, ss_net_paid#2, d_year#6, c_customer_sk#7, c_customer_id#8, c_first_name#9, c_last_name#10] (16) HashAggregate [codegen id : 6] -Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, ss_net_paid#2, d_year#6] -Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#6] +Input [5]: [c_customer_id#8, c_first_name#9, c_last_name#10, ss_net_paid#2, d_year#6] +Keys [4]: [c_customer_id#8, c_first_name#9, c_last_name#10, d_year#6] Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#2))] -Aggregate Attributes [1]: [sum#13] -Results [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#6, sum#14] +Aggregate Attributes [1]: [sum#11] +Results [5]: [c_customer_id#8, c_first_name#9, c_last_name#10, d_year#6, sum#12] (17) Exchange -Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#6, sum#14] -Arguments: hashpartitioning(c_customer_id#9, c_first_name#10, c_last_name#11, d_year#6, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [5]: [c_customer_id#8, c_first_name#9, c_last_name#10, d_year#6, sum#12] +Arguments: hashpartitioning(c_customer_id#8, c_first_name#9, c_last_name#10, d_year#6, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) HashAggregate [codegen id : 7] -Input [5]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#6, sum#14] -Keys [4]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#6] +Input [5]: [c_customer_id#8, c_first_name#9, c_last_name#10, d_year#6, sum#12] +Keys [4]: [c_customer_id#8, c_first_name#9, c_last_name#10, d_year#6] Functions [1]: [sum(UnscaledValue(ss_net_paid#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#2))#16] -Results [2]: [c_customer_id#9 AS customer_id#17, MakeDecimal(sum(UnscaledValue(ss_net_paid#2))#16,17,2) AS year_total#18] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#2))#13] +Results [2]: [c_customer_id#8 AS customer_id#14, MakeDecimal(sum(UnscaledValue(ss_net_paid#2))#13,17,2) AS year_total#15] (19) Filter [codegen id : 7] -Input [2]: [customer_id#17, year_total#18] -Condition : (isnotnull(year_total#18) AND (year_total#18 > 0.00)) +Input [2]: [customer_id#14, year_total#15] +Condition : (isnotnull(year_total#15) AND (year_total#15 > 0.00)) (20) Exchange -Input [2]: [customer_id#17, year_total#18] -Arguments: hashpartitioning(customer_id#17, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [2]: [customer_id#14, year_total#15] +Arguments: hashpartitioning(customer_id#14, 5), ENSURE_REQUIREMENTS, [plan_id=4] (21) Sort [codegen id : 8] -Input [2]: [customer_id#17, year_total#18] -Arguments: [customer_id#17 ASC NULLS FIRST], false, 0 +Input [2]: [customer_id#14, year_total#15] +Arguments: [customer_id#14 ASC NULLS FIRST], false, 0 (22) Scan parquet default.store_sales -Output [3]: [ss_customer_sk#20, ss_net_paid#21, ss_sold_date_sk#22] +Output [3]: [ss_customer_sk#16, ss_net_paid#17, ss_sold_date_sk#18] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#22), dynamicpruningexpression(ss_sold_date_sk#22 IN dynamicpruning#23)] +PartitionFilters: [isnotnull(ss_sold_date_sk#18), dynamicpruningexpression(ss_sold_date_sk#18 IN dynamicpruning#19)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 10] -Input [3]: [ss_customer_sk#20, ss_net_paid#21, ss_sold_date_sk#22] +Input [3]: [ss_customer_sk#16, ss_net_paid#17, ss_sold_date_sk#18] (24) Filter [codegen id : 10] -Input [3]: [ss_customer_sk#20, ss_net_paid#21, ss_sold_date_sk#22] -Condition : isnotnull(ss_customer_sk#20) +Input [3]: [ss_customer_sk#16, ss_net_paid#17, ss_sold_date_sk#18] +Condition : isnotnull(ss_customer_sk#16) (25) ReusedExchange [Reuses operator id: 87] -Output [2]: [d_date_sk#24, d_year#25] +Output [2]: [d_date_sk#20, d_year#21] (26) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_sold_date_sk#22] -Right keys [1]: [d_date_sk#24] +Left keys [1]: [ss_sold_date_sk#18] +Right keys [1]: [d_date_sk#20] Join condition: None (27) Project [codegen id : 10] -Output [3]: [ss_customer_sk#20, ss_net_paid#21, d_year#25] -Input [5]: [ss_customer_sk#20, ss_net_paid#21, ss_sold_date_sk#22, d_date_sk#24, d_year#25] +Output [3]: [ss_customer_sk#16, ss_net_paid#17, d_year#21] +Input [5]: [ss_customer_sk#16, ss_net_paid#17, ss_sold_date_sk#18, d_date_sk#20, d_year#21] (28) Exchange -Input [3]: [ss_customer_sk#20, ss_net_paid#21, d_year#25] -Arguments: hashpartitioning(ss_customer_sk#20, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [3]: [ss_customer_sk#16, ss_net_paid#17, d_year#21] +Arguments: hashpartitioning(ss_customer_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=5] (29) Sort [codegen id : 11] -Input [3]: [ss_customer_sk#20, ss_net_paid#21, d_year#25] -Arguments: [ss_customer_sk#20 ASC NULLS FIRST], false, 0 +Input [3]: [ss_customer_sk#16, ss_net_paid#17, d_year#21] +Arguments: [ss_customer_sk#16 ASC NULLS FIRST], false, 0 (30) ReusedExchange [Reuses operator id: 12] -Output [4]: [c_customer_sk#27, c_customer_id#28, c_first_name#29, c_last_name#30] +Output [4]: [c_customer_sk#22, c_customer_id#23, c_first_name#24, c_last_name#25] (31) Sort [codegen id : 13] -Input [4]: [c_customer_sk#27, c_customer_id#28, c_first_name#29, c_last_name#30] -Arguments: [c_customer_sk#27 ASC NULLS FIRST], false, 0 +Input [4]: [c_customer_sk#22, c_customer_id#23, c_first_name#24, c_last_name#25] +Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0 (32) SortMergeJoin [codegen id : 14] -Left keys [1]: [ss_customer_sk#20] -Right keys [1]: [c_customer_sk#27] +Left keys [1]: [ss_customer_sk#16] +Right keys [1]: [c_customer_sk#22] Join condition: None (33) Project [codegen id : 14] -Output [5]: [c_customer_id#28, c_first_name#29, c_last_name#30, ss_net_paid#21, d_year#25] -Input [7]: [ss_customer_sk#20, ss_net_paid#21, d_year#25, c_customer_sk#27, c_customer_id#28, c_first_name#29, c_last_name#30] +Output [5]: [c_customer_id#23, c_first_name#24, c_last_name#25, ss_net_paid#17, d_year#21] +Input [7]: [ss_customer_sk#16, ss_net_paid#17, d_year#21, c_customer_sk#22, c_customer_id#23, c_first_name#24, c_last_name#25] (34) HashAggregate [codegen id : 14] -Input [5]: [c_customer_id#28, c_first_name#29, c_last_name#30, ss_net_paid#21, d_year#25] -Keys [4]: [c_customer_id#28, c_first_name#29, c_last_name#30, d_year#25] -Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#21))] -Aggregate Attributes [1]: [sum#31] -Results [5]: [c_customer_id#28, c_first_name#29, c_last_name#30, d_year#25, sum#32] +Input [5]: [c_customer_id#23, c_first_name#24, c_last_name#25, ss_net_paid#17, d_year#21] +Keys [4]: [c_customer_id#23, c_first_name#24, c_last_name#25, d_year#21] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#17))] +Aggregate Attributes [1]: [sum#26] +Results [5]: [c_customer_id#23, c_first_name#24, c_last_name#25, d_year#21, sum#27] (35) Exchange -Input [5]: [c_customer_id#28, c_first_name#29, c_last_name#30, d_year#25, sum#32] -Arguments: hashpartitioning(c_customer_id#28, c_first_name#29, c_last_name#30, d_year#25, 5), ENSURE_REQUIREMENTS, [id=#33] +Input [5]: [c_customer_id#23, c_first_name#24, c_last_name#25, d_year#21, sum#27] +Arguments: hashpartitioning(c_customer_id#23, c_first_name#24, c_last_name#25, d_year#21, 5), ENSURE_REQUIREMENTS, [plan_id=6] (36) HashAggregate [codegen id : 15] -Input [5]: [c_customer_id#28, c_first_name#29, c_last_name#30, d_year#25, sum#32] -Keys [4]: [c_customer_id#28, c_first_name#29, c_last_name#30, d_year#25] -Functions [1]: [sum(UnscaledValue(ss_net_paid#21))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#21))#16] -Results [4]: [c_customer_id#28 AS customer_id#34, c_first_name#29 AS customer_first_name#35, c_last_name#30 AS customer_last_name#36, MakeDecimal(sum(UnscaledValue(ss_net_paid#21))#16,17,2) AS year_total#37] +Input [5]: [c_customer_id#23, c_first_name#24, c_last_name#25, d_year#21, sum#27] +Keys [4]: [c_customer_id#23, c_first_name#24, c_last_name#25, d_year#21] +Functions [1]: [sum(UnscaledValue(ss_net_paid#17))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#17))#13] +Results [4]: [c_customer_id#23 AS customer_id#28, c_first_name#24 AS customer_first_name#29, c_last_name#25 AS customer_last_name#30, MakeDecimal(sum(UnscaledValue(ss_net_paid#17))#13,17,2) AS year_total#31] (37) Exchange -Input [4]: [customer_id#34, customer_first_name#35, customer_last_name#36, year_total#37] -Arguments: hashpartitioning(customer_id#34, 5), ENSURE_REQUIREMENTS, [id=#38] +Input [4]: [customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31] +Arguments: hashpartitioning(customer_id#28, 5), ENSURE_REQUIREMENTS, [plan_id=7] (38) Sort [codegen id : 16] -Input [4]: [customer_id#34, customer_first_name#35, customer_last_name#36, year_total#37] -Arguments: [customer_id#34 ASC NULLS FIRST], false, 0 +Input [4]: [customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31] +Arguments: [customer_id#28 ASC NULLS FIRST], false, 0 (39) SortMergeJoin [codegen id : 17] -Left keys [1]: [customer_id#17] -Right keys [1]: [customer_id#34] +Left keys [1]: [customer_id#14] +Right keys [1]: [customer_id#28] Join condition: None (40) Scan parquet default.web_sales -Output [3]: [ws_bill_customer_sk#39, ws_net_paid#40, ws_sold_date_sk#41] +Output [3]: [ws_bill_customer_sk#32, ws_net_paid#33, ws_sold_date_sk#34] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#41), dynamicpruningexpression(ws_sold_date_sk#41 IN dynamicpruning#4)] +PartitionFilters: [isnotnull(ws_sold_date_sk#34), dynamicpruningexpression(ws_sold_date_sk#34 IN dynamicpruning#4)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (41) ColumnarToRow [codegen id : 19] -Input [3]: [ws_bill_customer_sk#39, ws_net_paid#40, ws_sold_date_sk#41] +Input [3]: [ws_bill_customer_sk#32, ws_net_paid#33, ws_sold_date_sk#34] (42) Filter [codegen id : 19] -Input [3]: [ws_bill_customer_sk#39, ws_net_paid#40, ws_sold_date_sk#41] -Condition : isnotnull(ws_bill_customer_sk#39) +Input [3]: [ws_bill_customer_sk#32, ws_net_paid#33, ws_sold_date_sk#34] +Condition : isnotnull(ws_bill_customer_sk#32) (43) ReusedExchange [Reuses operator id: 83] -Output [2]: [d_date_sk#42, d_year#43] +Output [2]: [d_date_sk#35, d_year#36] (44) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [ws_sold_date_sk#41] -Right keys [1]: [d_date_sk#42] +Left keys [1]: [ws_sold_date_sk#34] +Right keys [1]: [d_date_sk#35] Join condition: None (45) Project [codegen id : 19] -Output [3]: [ws_bill_customer_sk#39, ws_net_paid#40, d_year#43] -Input [5]: [ws_bill_customer_sk#39, ws_net_paid#40, ws_sold_date_sk#41, d_date_sk#42, d_year#43] +Output [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#36] +Input [5]: [ws_bill_customer_sk#32, ws_net_paid#33, ws_sold_date_sk#34, d_date_sk#35, d_year#36] (46) Exchange -Input [3]: [ws_bill_customer_sk#39, ws_net_paid#40, d_year#43] -Arguments: hashpartitioning(ws_bill_customer_sk#39, 5), ENSURE_REQUIREMENTS, [id=#44] +Input [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#36] +Arguments: hashpartitioning(ws_bill_customer_sk#32, 5), ENSURE_REQUIREMENTS, [plan_id=8] (47) Sort [codegen id : 20] -Input [3]: [ws_bill_customer_sk#39, ws_net_paid#40, d_year#43] -Arguments: [ws_bill_customer_sk#39 ASC NULLS FIRST], false, 0 +Input [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#36] +Arguments: [ws_bill_customer_sk#32 ASC NULLS FIRST], false, 0 (48) ReusedExchange [Reuses operator id: 12] -Output [4]: [c_customer_sk#45, c_customer_id#46, c_first_name#47, c_last_name#48] +Output [4]: [c_customer_sk#37, c_customer_id#38, c_first_name#39, c_last_name#40] (49) Sort [codegen id : 22] -Input [4]: [c_customer_sk#45, c_customer_id#46, c_first_name#47, c_last_name#48] -Arguments: [c_customer_sk#45 ASC NULLS FIRST], false, 0 +Input [4]: [c_customer_sk#37, c_customer_id#38, c_first_name#39, c_last_name#40] +Arguments: [c_customer_sk#37 ASC NULLS FIRST], false, 0 (50) SortMergeJoin [codegen id : 23] -Left keys [1]: [ws_bill_customer_sk#39] -Right keys [1]: [c_customer_sk#45] +Left keys [1]: [ws_bill_customer_sk#32] +Right keys [1]: [c_customer_sk#37] Join condition: None (51) Project [codegen id : 23] -Output [5]: [c_customer_id#46, c_first_name#47, c_last_name#48, ws_net_paid#40, d_year#43] -Input [7]: [ws_bill_customer_sk#39, ws_net_paid#40, d_year#43, c_customer_sk#45, c_customer_id#46, c_first_name#47, c_last_name#48] +Output [5]: [c_customer_id#38, c_first_name#39, c_last_name#40, ws_net_paid#33, d_year#36] +Input [7]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#36, c_customer_sk#37, c_customer_id#38, c_first_name#39, c_last_name#40] (52) HashAggregate [codegen id : 23] -Input [5]: [c_customer_id#46, c_first_name#47, c_last_name#48, ws_net_paid#40, d_year#43] -Keys [4]: [c_customer_id#46, c_first_name#47, c_last_name#48, d_year#43] -Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#40))] -Aggregate Attributes [1]: [sum#49] -Results [5]: [c_customer_id#46, c_first_name#47, c_last_name#48, d_year#43, sum#50] +Input [5]: [c_customer_id#38, c_first_name#39, c_last_name#40, ws_net_paid#33, d_year#36] +Keys [4]: [c_customer_id#38, c_first_name#39, c_last_name#40, d_year#36] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#33))] +Aggregate Attributes [1]: [sum#41] +Results [5]: [c_customer_id#38, c_first_name#39, c_last_name#40, d_year#36, sum#42] (53) Exchange -Input [5]: [c_customer_id#46, c_first_name#47, c_last_name#48, d_year#43, sum#50] -Arguments: hashpartitioning(c_customer_id#46, c_first_name#47, c_last_name#48, d_year#43, 5), ENSURE_REQUIREMENTS, [id=#51] +Input [5]: [c_customer_id#38, c_first_name#39, c_last_name#40, d_year#36, sum#42] +Arguments: hashpartitioning(c_customer_id#38, c_first_name#39, c_last_name#40, d_year#36, 5), ENSURE_REQUIREMENTS, [plan_id=9] (54) HashAggregate [codegen id : 24] -Input [5]: [c_customer_id#46, c_first_name#47, c_last_name#48, d_year#43, sum#50] -Keys [4]: [c_customer_id#46, c_first_name#47, c_last_name#48, d_year#43] -Functions [1]: [sum(UnscaledValue(ws_net_paid#40))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#40))#52] -Results [2]: [c_customer_id#46 AS customer_id#53, MakeDecimal(sum(UnscaledValue(ws_net_paid#40))#52,17,2) AS year_total#54] +Input [5]: [c_customer_id#38, c_first_name#39, c_last_name#40, d_year#36, sum#42] +Keys [4]: [c_customer_id#38, c_first_name#39, c_last_name#40, d_year#36] +Functions [1]: [sum(UnscaledValue(ws_net_paid#33))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#33))#43] +Results [2]: [c_customer_id#38 AS customer_id#44, MakeDecimal(sum(UnscaledValue(ws_net_paid#33))#43,17,2) AS year_total#45] (55) Filter [codegen id : 24] -Input [2]: [customer_id#53, year_total#54] -Condition : (isnotnull(year_total#54) AND (year_total#54 > 0.00)) +Input [2]: [customer_id#44, year_total#45] +Condition : (isnotnull(year_total#45) AND (year_total#45 > 0.00)) (56) Exchange -Input [2]: [customer_id#53, year_total#54] -Arguments: hashpartitioning(customer_id#53, 5), ENSURE_REQUIREMENTS, [id=#55] +Input [2]: [customer_id#44, year_total#45] +Arguments: hashpartitioning(customer_id#44, 5), ENSURE_REQUIREMENTS, [plan_id=10] (57) Sort [codegen id : 25] -Input [2]: [customer_id#53, year_total#54] -Arguments: [customer_id#53 ASC NULLS FIRST], false, 0 +Input [2]: [customer_id#44, year_total#45] +Arguments: [customer_id#44 ASC NULLS FIRST], false, 0 (58) SortMergeJoin [codegen id : 26] -Left keys [1]: [customer_id#17] -Right keys [1]: [customer_id#53] +Left keys [1]: [customer_id#14] +Right keys [1]: [customer_id#44] Join condition: None (59) Project [codegen id : 26] -Output [7]: [customer_id#17, year_total#18, customer_id#34, customer_first_name#35, customer_last_name#36, year_total#37, year_total#54] -Input [8]: [customer_id#17, year_total#18, customer_id#34, customer_first_name#35, customer_last_name#36, year_total#37, customer_id#53, year_total#54] +Output [7]: [customer_id#14, year_total#15, customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31, year_total#45] +Input [8]: [customer_id#14, year_total#15, customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31, customer_id#44, year_total#45] (60) Scan parquet default.web_sales -Output [3]: [ws_bill_customer_sk#56, ws_net_paid#57, ws_sold_date_sk#58] +Output [3]: [ws_bill_customer_sk#46, ws_net_paid#47, ws_sold_date_sk#48] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#58), dynamicpruningexpression(ws_sold_date_sk#58 IN dynamicpruning#23)] +PartitionFilters: [isnotnull(ws_sold_date_sk#48), dynamicpruningexpression(ws_sold_date_sk#48 IN dynamicpruning#19)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (61) ColumnarToRow [codegen id : 28] -Input [3]: [ws_bill_customer_sk#56, ws_net_paid#57, ws_sold_date_sk#58] +Input [3]: [ws_bill_customer_sk#46, ws_net_paid#47, ws_sold_date_sk#48] (62) Filter [codegen id : 28] -Input [3]: [ws_bill_customer_sk#56, ws_net_paid#57, ws_sold_date_sk#58] -Condition : isnotnull(ws_bill_customer_sk#56) +Input [3]: [ws_bill_customer_sk#46, ws_net_paid#47, ws_sold_date_sk#48] +Condition : isnotnull(ws_bill_customer_sk#46) (63) ReusedExchange [Reuses operator id: 87] -Output [2]: [d_date_sk#59, d_year#60] +Output [2]: [d_date_sk#49, d_year#50] (64) BroadcastHashJoin [codegen id : 28] -Left keys [1]: [ws_sold_date_sk#58] -Right keys [1]: [d_date_sk#59] +Left keys [1]: [ws_sold_date_sk#48] +Right keys [1]: [d_date_sk#49] Join condition: None (65) Project [codegen id : 28] -Output [3]: [ws_bill_customer_sk#56, ws_net_paid#57, d_year#60] -Input [5]: [ws_bill_customer_sk#56, ws_net_paid#57, ws_sold_date_sk#58, d_date_sk#59, d_year#60] +Output [3]: [ws_bill_customer_sk#46, ws_net_paid#47, d_year#50] +Input [5]: [ws_bill_customer_sk#46, ws_net_paid#47, ws_sold_date_sk#48, d_date_sk#49, d_year#50] (66) Exchange -Input [3]: [ws_bill_customer_sk#56, ws_net_paid#57, d_year#60] -Arguments: hashpartitioning(ws_bill_customer_sk#56, 5), ENSURE_REQUIREMENTS, [id=#61] +Input [3]: [ws_bill_customer_sk#46, ws_net_paid#47, d_year#50] +Arguments: hashpartitioning(ws_bill_customer_sk#46, 5), ENSURE_REQUIREMENTS, [plan_id=11] (67) Sort [codegen id : 29] -Input [3]: [ws_bill_customer_sk#56, ws_net_paid#57, d_year#60] -Arguments: [ws_bill_customer_sk#56 ASC NULLS FIRST], false, 0 +Input [3]: [ws_bill_customer_sk#46, ws_net_paid#47, d_year#50] +Arguments: [ws_bill_customer_sk#46 ASC NULLS FIRST], false, 0 (68) ReusedExchange [Reuses operator id: 12] -Output [4]: [c_customer_sk#62, c_customer_id#63, c_first_name#64, c_last_name#65] +Output [4]: [c_customer_sk#51, c_customer_id#52, c_first_name#53, c_last_name#54] (69) Sort [codegen id : 31] -Input [4]: [c_customer_sk#62, c_customer_id#63, c_first_name#64, c_last_name#65] -Arguments: [c_customer_sk#62 ASC NULLS FIRST], false, 0 +Input [4]: [c_customer_sk#51, c_customer_id#52, c_first_name#53, c_last_name#54] +Arguments: [c_customer_sk#51 ASC NULLS FIRST], false, 0 (70) SortMergeJoin [codegen id : 32] -Left keys [1]: [ws_bill_customer_sk#56] -Right keys [1]: [c_customer_sk#62] +Left keys [1]: [ws_bill_customer_sk#46] +Right keys [1]: [c_customer_sk#51] Join condition: None (71) Project [codegen id : 32] -Output [5]: [c_customer_id#63, c_first_name#64, c_last_name#65, ws_net_paid#57, d_year#60] -Input [7]: [ws_bill_customer_sk#56, ws_net_paid#57, d_year#60, c_customer_sk#62, c_customer_id#63, c_first_name#64, c_last_name#65] +Output [5]: [c_customer_id#52, c_first_name#53, c_last_name#54, ws_net_paid#47, d_year#50] +Input [7]: [ws_bill_customer_sk#46, ws_net_paid#47, d_year#50, c_customer_sk#51, c_customer_id#52, c_first_name#53, c_last_name#54] (72) HashAggregate [codegen id : 32] -Input [5]: [c_customer_id#63, c_first_name#64, c_last_name#65, ws_net_paid#57, d_year#60] -Keys [4]: [c_customer_id#63, c_first_name#64, c_last_name#65, d_year#60] -Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#57))] -Aggregate Attributes [1]: [sum#66] -Results [5]: [c_customer_id#63, c_first_name#64, c_last_name#65, d_year#60, sum#67] +Input [5]: [c_customer_id#52, c_first_name#53, c_last_name#54, ws_net_paid#47, d_year#50] +Keys [4]: [c_customer_id#52, c_first_name#53, c_last_name#54, d_year#50] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#47))] +Aggregate Attributes [1]: [sum#55] +Results [5]: [c_customer_id#52, c_first_name#53, c_last_name#54, d_year#50, sum#56] (73) Exchange -Input [5]: [c_customer_id#63, c_first_name#64, c_last_name#65, d_year#60, sum#67] -Arguments: hashpartitioning(c_customer_id#63, c_first_name#64, c_last_name#65, d_year#60, 5), ENSURE_REQUIREMENTS, [id=#68] +Input [5]: [c_customer_id#52, c_first_name#53, c_last_name#54, d_year#50, sum#56] +Arguments: hashpartitioning(c_customer_id#52, c_first_name#53, c_last_name#54, d_year#50, 5), ENSURE_REQUIREMENTS, [plan_id=12] (74) HashAggregate [codegen id : 33] -Input [5]: [c_customer_id#63, c_first_name#64, c_last_name#65, d_year#60, sum#67] -Keys [4]: [c_customer_id#63, c_first_name#64, c_last_name#65, d_year#60] -Functions [1]: [sum(UnscaledValue(ws_net_paid#57))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#57))#52] -Results [2]: [c_customer_id#63 AS customer_id#69, MakeDecimal(sum(UnscaledValue(ws_net_paid#57))#52,17,2) AS year_total#70] +Input [5]: [c_customer_id#52, c_first_name#53, c_last_name#54, d_year#50, sum#56] +Keys [4]: [c_customer_id#52, c_first_name#53, c_last_name#54, d_year#50] +Functions [1]: [sum(UnscaledValue(ws_net_paid#47))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#47))#43] +Results [2]: [c_customer_id#52 AS customer_id#57, MakeDecimal(sum(UnscaledValue(ws_net_paid#47))#43,17,2) AS year_total#58] (75) Exchange -Input [2]: [customer_id#69, year_total#70] -Arguments: hashpartitioning(customer_id#69, 5), ENSURE_REQUIREMENTS, [id=#71] +Input [2]: [customer_id#57, year_total#58] +Arguments: hashpartitioning(customer_id#57, 5), ENSURE_REQUIREMENTS, [plan_id=13] (76) Sort [codegen id : 34] -Input [2]: [customer_id#69, year_total#70] -Arguments: [customer_id#69 ASC NULLS FIRST], false, 0 +Input [2]: [customer_id#57, year_total#58] +Arguments: [customer_id#57 ASC NULLS FIRST], false, 0 (77) SortMergeJoin [codegen id : 35] -Left keys [1]: [customer_id#17] -Right keys [1]: [customer_id#69] -Join condition: (CASE WHEN (year_total#54 > 0.00) THEN CheckOverflow((promote_precision(year_total#70) / promote_precision(year_total#54)), DecimalType(37,20)) END > CASE WHEN (year_total#18 > 0.00) THEN CheckOverflow((promote_precision(year_total#37) / promote_precision(year_total#18)), DecimalType(37,20)) END) +Left keys [1]: [customer_id#14] +Right keys [1]: [customer_id#57] +Join condition: (CASE WHEN (year_total#45 > 0.00) THEN CheckOverflow((promote_precision(year_total#58) / promote_precision(year_total#45)), DecimalType(37,20)) END > CASE WHEN (year_total#15 > 0.00) THEN CheckOverflow((promote_precision(year_total#31) / promote_precision(year_total#15)), DecimalType(37,20)) END) (78) Project [codegen id : 35] -Output [3]: [customer_id#34, customer_first_name#35, customer_last_name#36] -Input [9]: [customer_id#17, year_total#18, customer_id#34, customer_first_name#35, customer_last_name#36, year_total#37, year_total#54, customer_id#69, year_total#70] +Output [3]: [customer_id#28, customer_first_name#29, customer_last_name#30] +Input [9]: [customer_id#14, year_total#15, customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31, year_total#45, customer_id#57, year_total#58] (79) TakeOrderedAndProject -Input [3]: [customer_id#34, customer_first_name#35, customer_last_name#36] -Arguments: 100, [customer_first_name#35 ASC NULLS FIRST, customer_id#34 ASC NULLS FIRST, customer_last_name#36 ASC NULLS FIRST], [customer_id#34, customer_first_name#35, customer_last_name#36] +Input [3]: [customer_id#28, customer_first_name#29, customer_last_name#30] +Arguments: 100, [customer_first_name#29 ASC NULLS FIRST, customer_id#28 ASC NULLS FIRST, customer_last_name#30 ASC NULLS FIRST], [customer_id#28, customer_first_name#29, customer_last_name#30] ===== Subqueries ===== @@ -463,9 +463,9 @@ Condition : (((isnotnull(d_year#6) AND (d_year#6 = 2001)) AND d_year#6 IN (2001, (83) BroadcastExchange Input [2]: [d_date_sk#5, d_year#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#72] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14] -Subquery:2 Hosting operator id = 22 Hosting Expression = ss_sold_date_sk#22 IN dynamicpruning#23 +Subquery:2 Hosting operator id = 22 Hosting Expression = ss_sold_date_sk#18 IN dynamicpruning#19 BroadcastExchange (87) +- * Filter (86) +- * ColumnarToRow (85) @@ -473,25 +473,25 @@ BroadcastExchange (87) (84) Scan parquet default.date_dim -Output [2]: [d_date_sk#24, d_year#25] +Output [2]: [d_date_sk#20, d_year#21] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] ReadSchema: struct (85) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#24, d_year#25] +Input [2]: [d_date_sk#20, d_year#21] (86) Filter [codegen id : 1] -Input [2]: [d_date_sk#24, d_year#25] -Condition : (((isnotnull(d_year#25) AND (d_year#25 = 2002)) AND d_year#25 IN (2001,2002)) AND isnotnull(d_date_sk#24)) +Input [2]: [d_date_sk#20, d_year#21] +Condition : (((isnotnull(d_year#21) AND (d_year#21 = 2002)) AND d_year#21 IN (2001,2002)) AND isnotnull(d_date_sk#20)) (87) BroadcastExchange -Input [2]: [d_date_sk#24, d_year#25] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#73] +Input [2]: [d_date_sk#20, d_year#21] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=15] -Subquery:3 Hosting operator id = 40 Hosting Expression = ws_sold_date_sk#41 IN dynamicpruning#4 +Subquery:3 Hosting operator id = 40 Hosting Expression = ws_sold_date_sk#34 IN dynamicpruning#4 -Subquery:4 Hosting operator id = 60 Hosting Expression = ws_sold_date_sk#58 IN dynamicpruning#23 +Subquery:4 Hosting operator id = 60 Hosting Expression = ws_sold_date_sk#48 IN dynamicpruning#19 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/explain.txt index a2c8929c7f285..3c549bb77b191 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/explain.txt @@ -103,7 +103,7 @@ Condition : isnotnull(ss_customer_sk#5) (7) BroadcastExchange Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 3] Left keys [1]: [c_customer_sk#1] @@ -115,297 +115,297 @@ Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_s Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7] (10) ReusedExchange [Reuses operator id: 75] -Output [2]: [d_date_sk#10, d_year#11] +Output [2]: [d_date_sk#9, d_year#10] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_sold_date_sk#7] -Right keys [1]: [d_date_sk#10] +Right keys [1]: [d_date_sk#9] Join condition: None (12) Project [codegen id : 3] -Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#11] -Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7, d_date_sk#10, d_year#11] +Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#10] +Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7, d_date_sk#9, d_year#10] (13) HashAggregate [codegen id : 3] -Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#11] -Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#11] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#10] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#6))] -Aggregate Attributes [1]: [sum#12] -Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#11, sum#13] +Aggregate Attributes [1]: [sum#11] +Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#12] (14) Exchange -Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#11, sum#13] -Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#11, 5), ENSURE_REQUIREMENTS, [id=#14] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#12] +Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 16] -Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#11, sum#13] -Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#11] +Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#12] +Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10] Functions [1]: [sum(UnscaledValue(ss_net_paid#6))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#6))#15] -Results [2]: [c_customer_id#2 AS customer_id#16, MakeDecimal(sum(UnscaledValue(ss_net_paid#6))#15,17,2) AS year_total#17] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#6))#13] +Results [2]: [c_customer_id#2 AS customer_id#14, MakeDecimal(sum(UnscaledValue(ss_net_paid#6))#13,17,2) AS year_total#15] (16) Filter [codegen id : 16] -Input [2]: [customer_id#16, year_total#17] -Condition : (isnotnull(year_total#17) AND (year_total#17 > 0.00)) +Input [2]: [customer_id#14, year_total#15] +Condition : (isnotnull(year_total#15) AND (year_total#15 > 0.00)) (17) Scan parquet default.customer -Output [4]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21] +Output [4]: [c_customer_sk#16, c_customer_id#17, c_first_name#18, c_last_name#19] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (18) ColumnarToRow [codegen id : 6] -Input [4]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21] +Input [4]: [c_customer_sk#16, c_customer_id#17, c_first_name#18, c_last_name#19] (19) Filter [codegen id : 6] -Input [4]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21] -Condition : (isnotnull(c_customer_sk#18) AND isnotnull(c_customer_id#19)) +Input [4]: [c_customer_sk#16, c_customer_id#17, c_first_name#18, c_last_name#19] +Condition : (isnotnull(c_customer_sk#16) AND isnotnull(c_customer_id#17)) (20) Scan parquet default.store_sales -Output [3]: [ss_customer_sk#22, ss_net_paid#23, ss_sold_date_sk#24] +Output [3]: [ss_customer_sk#20, ss_net_paid#21, ss_sold_date_sk#22] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#24), dynamicpruningexpression(ss_sold_date_sk#24 IN dynamicpruning#25)] +PartitionFilters: [isnotnull(ss_sold_date_sk#22), dynamicpruningexpression(ss_sold_date_sk#22 IN dynamicpruning#23)] PushedFilters: [IsNotNull(ss_customer_sk)] ReadSchema: struct (21) ColumnarToRow [codegen id : 4] -Input [3]: [ss_customer_sk#22, ss_net_paid#23, ss_sold_date_sk#24] +Input [3]: [ss_customer_sk#20, ss_net_paid#21, ss_sold_date_sk#22] (22) Filter [codegen id : 4] -Input [3]: [ss_customer_sk#22, ss_net_paid#23, ss_sold_date_sk#24] -Condition : isnotnull(ss_customer_sk#22) +Input [3]: [ss_customer_sk#20, ss_net_paid#21, ss_sold_date_sk#22] +Condition : isnotnull(ss_customer_sk#20) (23) BroadcastExchange -Input [3]: [ss_customer_sk#22, ss_net_paid#23, ss_sold_date_sk#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#26] +Input [3]: [ss_customer_sk#20, ss_net_paid#21, ss_sold_date_sk#22] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [c_customer_sk#18] -Right keys [1]: [ss_customer_sk#22] +Left keys [1]: [c_customer_sk#16] +Right keys [1]: [ss_customer_sk#20] Join condition: None (25) Project [codegen id : 6] -Output [5]: [c_customer_id#19, c_first_name#20, c_last_name#21, ss_net_paid#23, ss_sold_date_sk#24] -Input [7]: [c_customer_sk#18, c_customer_id#19, c_first_name#20, c_last_name#21, ss_customer_sk#22, ss_net_paid#23, ss_sold_date_sk#24] +Output [5]: [c_customer_id#17, c_first_name#18, c_last_name#19, ss_net_paid#21, ss_sold_date_sk#22] +Input [7]: [c_customer_sk#16, c_customer_id#17, c_first_name#18, c_last_name#19, ss_customer_sk#20, ss_net_paid#21, ss_sold_date_sk#22] (26) ReusedExchange [Reuses operator id: 79] -Output [2]: [d_date_sk#27, d_year#28] +Output [2]: [d_date_sk#24, d_year#25] (27) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [ss_sold_date_sk#24] -Right keys [1]: [d_date_sk#27] +Left keys [1]: [ss_sold_date_sk#22] +Right keys [1]: [d_date_sk#24] Join condition: None (28) Project [codegen id : 6] -Output [5]: [c_customer_id#19, c_first_name#20, c_last_name#21, ss_net_paid#23, d_year#28] -Input [7]: [c_customer_id#19, c_first_name#20, c_last_name#21, ss_net_paid#23, ss_sold_date_sk#24, d_date_sk#27, d_year#28] +Output [5]: [c_customer_id#17, c_first_name#18, c_last_name#19, ss_net_paid#21, d_year#25] +Input [7]: [c_customer_id#17, c_first_name#18, c_last_name#19, ss_net_paid#21, ss_sold_date_sk#22, d_date_sk#24, d_year#25] (29) HashAggregate [codegen id : 6] -Input [5]: [c_customer_id#19, c_first_name#20, c_last_name#21, ss_net_paid#23, d_year#28] -Keys [4]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#28] -Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#23))] -Aggregate Attributes [1]: [sum#29] -Results [5]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#28, sum#30] +Input [5]: [c_customer_id#17, c_first_name#18, c_last_name#19, ss_net_paid#21, d_year#25] +Keys [4]: [c_customer_id#17, c_first_name#18, c_last_name#19, d_year#25] +Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#21))] +Aggregate Attributes [1]: [sum#26] +Results [5]: [c_customer_id#17, c_first_name#18, c_last_name#19, d_year#25, sum#27] (30) Exchange -Input [5]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#28, sum#30] -Arguments: hashpartitioning(c_customer_id#19, c_first_name#20, c_last_name#21, d_year#28, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [5]: [c_customer_id#17, c_first_name#18, c_last_name#19, d_year#25, sum#27] +Arguments: hashpartitioning(c_customer_id#17, c_first_name#18, c_last_name#19, d_year#25, 5), ENSURE_REQUIREMENTS, [plan_id=4] (31) HashAggregate [codegen id : 7] -Input [5]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#28, sum#30] -Keys [4]: [c_customer_id#19, c_first_name#20, c_last_name#21, d_year#28] -Functions [1]: [sum(UnscaledValue(ss_net_paid#23))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#23))#15] -Results [4]: [c_customer_id#19 AS customer_id#32, c_first_name#20 AS customer_first_name#33, c_last_name#21 AS customer_last_name#34, MakeDecimal(sum(UnscaledValue(ss_net_paid#23))#15,17,2) AS year_total#35] +Input [5]: [c_customer_id#17, c_first_name#18, c_last_name#19, d_year#25, sum#27] +Keys [4]: [c_customer_id#17, c_first_name#18, c_last_name#19, d_year#25] +Functions [1]: [sum(UnscaledValue(ss_net_paid#21))] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#21))#13] +Results [4]: [c_customer_id#17 AS customer_id#28, c_first_name#18 AS customer_first_name#29, c_last_name#19 AS customer_last_name#30, MakeDecimal(sum(UnscaledValue(ss_net_paid#21))#13,17,2) AS year_total#31] (32) BroadcastExchange -Input [4]: [customer_id#32, customer_first_name#33, customer_last_name#34, year_total#35] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#36] +Input [4]: [customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=5] (33) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [customer_id#16] -Right keys [1]: [customer_id#32] +Left keys [1]: [customer_id#14] +Right keys [1]: [customer_id#28] Join condition: None (34) Scan parquet default.customer -Output [4]: [c_customer_sk#37, c_customer_id#38, c_first_name#39, c_last_name#40] +Output [4]: [c_customer_sk#32, c_customer_id#33, c_first_name#34, c_last_name#35] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (35) ColumnarToRow [codegen id : 10] -Input [4]: [c_customer_sk#37, c_customer_id#38, c_first_name#39, c_last_name#40] +Input [4]: [c_customer_sk#32, c_customer_id#33, c_first_name#34, c_last_name#35] (36) Filter [codegen id : 10] -Input [4]: [c_customer_sk#37, c_customer_id#38, c_first_name#39, c_last_name#40] -Condition : (isnotnull(c_customer_sk#37) AND isnotnull(c_customer_id#38)) +Input [4]: [c_customer_sk#32, c_customer_id#33, c_first_name#34, c_last_name#35] +Condition : (isnotnull(c_customer_sk#32) AND isnotnull(c_customer_id#33)) (37) Scan parquet default.web_sales -Output [3]: [ws_bill_customer_sk#41, ws_net_paid#42, ws_sold_date_sk#43] +Output [3]: [ws_bill_customer_sk#36, ws_net_paid#37, ws_sold_date_sk#38] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#43), dynamicpruningexpression(ws_sold_date_sk#43 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(ws_sold_date_sk#38), dynamicpruningexpression(ws_sold_date_sk#38 IN dynamicpruning#8)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (38) ColumnarToRow [codegen id : 8] -Input [3]: [ws_bill_customer_sk#41, ws_net_paid#42, ws_sold_date_sk#43] +Input [3]: [ws_bill_customer_sk#36, ws_net_paid#37, ws_sold_date_sk#38] (39) Filter [codegen id : 8] -Input [3]: [ws_bill_customer_sk#41, ws_net_paid#42, ws_sold_date_sk#43] -Condition : isnotnull(ws_bill_customer_sk#41) +Input [3]: [ws_bill_customer_sk#36, ws_net_paid#37, ws_sold_date_sk#38] +Condition : isnotnull(ws_bill_customer_sk#36) (40) BroadcastExchange -Input [3]: [ws_bill_customer_sk#41, ws_net_paid#42, ws_sold_date_sk#43] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#44] +Input [3]: [ws_bill_customer_sk#36, ws_net_paid#37, ws_sold_date_sk#38] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] (41) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [c_customer_sk#37] -Right keys [1]: [ws_bill_customer_sk#41] +Left keys [1]: [c_customer_sk#32] +Right keys [1]: [ws_bill_customer_sk#36] Join condition: None (42) Project [codegen id : 10] -Output [5]: [c_customer_id#38, c_first_name#39, c_last_name#40, ws_net_paid#42, ws_sold_date_sk#43] -Input [7]: [c_customer_sk#37, c_customer_id#38, c_first_name#39, c_last_name#40, ws_bill_customer_sk#41, ws_net_paid#42, ws_sold_date_sk#43] +Output [5]: [c_customer_id#33, c_first_name#34, c_last_name#35, ws_net_paid#37, ws_sold_date_sk#38] +Input [7]: [c_customer_sk#32, c_customer_id#33, c_first_name#34, c_last_name#35, ws_bill_customer_sk#36, ws_net_paid#37, ws_sold_date_sk#38] (43) ReusedExchange [Reuses operator id: 75] -Output [2]: [d_date_sk#45, d_year#46] +Output [2]: [d_date_sk#39, d_year#40] (44) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ws_sold_date_sk#43] -Right keys [1]: [d_date_sk#45] +Left keys [1]: [ws_sold_date_sk#38] +Right keys [1]: [d_date_sk#39] Join condition: None (45) Project [codegen id : 10] -Output [5]: [c_customer_id#38, c_first_name#39, c_last_name#40, ws_net_paid#42, d_year#46] -Input [7]: [c_customer_id#38, c_first_name#39, c_last_name#40, ws_net_paid#42, ws_sold_date_sk#43, d_date_sk#45, d_year#46] +Output [5]: [c_customer_id#33, c_first_name#34, c_last_name#35, ws_net_paid#37, d_year#40] +Input [7]: [c_customer_id#33, c_first_name#34, c_last_name#35, ws_net_paid#37, ws_sold_date_sk#38, d_date_sk#39, d_year#40] (46) HashAggregate [codegen id : 10] -Input [5]: [c_customer_id#38, c_first_name#39, c_last_name#40, ws_net_paid#42, d_year#46] -Keys [4]: [c_customer_id#38, c_first_name#39, c_last_name#40, d_year#46] -Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#42))] -Aggregate Attributes [1]: [sum#47] -Results [5]: [c_customer_id#38, c_first_name#39, c_last_name#40, d_year#46, sum#48] +Input [5]: [c_customer_id#33, c_first_name#34, c_last_name#35, ws_net_paid#37, d_year#40] +Keys [4]: [c_customer_id#33, c_first_name#34, c_last_name#35, d_year#40] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#37))] +Aggregate Attributes [1]: [sum#41] +Results [5]: [c_customer_id#33, c_first_name#34, c_last_name#35, d_year#40, sum#42] (47) Exchange -Input [5]: [c_customer_id#38, c_first_name#39, c_last_name#40, d_year#46, sum#48] -Arguments: hashpartitioning(c_customer_id#38, c_first_name#39, c_last_name#40, d_year#46, 5), ENSURE_REQUIREMENTS, [id=#49] +Input [5]: [c_customer_id#33, c_first_name#34, c_last_name#35, d_year#40, sum#42] +Arguments: hashpartitioning(c_customer_id#33, c_first_name#34, c_last_name#35, d_year#40, 5), ENSURE_REQUIREMENTS, [plan_id=7] (48) HashAggregate [codegen id : 11] -Input [5]: [c_customer_id#38, c_first_name#39, c_last_name#40, d_year#46, sum#48] -Keys [4]: [c_customer_id#38, c_first_name#39, c_last_name#40, d_year#46] -Functions [1]: [sum(UnscaledValue(ws_net_paid#42))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#42))#50] -Results [2]: [c_customer_id#38 AS customer_id#51, MakeDecimal(sum(UnscaledValue(ws_net_paid#42))#50,17,2) AS year_total#52] +Input [5]: [c_customer_id#33, c_first_name#34, c_last_name#35, d_year#40, sum#42] +Keys [4]: [c_customer_id#33, c_first_name#34, c_last_name#35, d_year#40] +Functions [1]: [sum(UnscaledValue(ws_net_paid#37))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#37))#43] +Results [2]: [c_customer_id#33 AS customer_id#44, MakeDecimal(sum(UnscaledValue(ws_net_paid#37))#43,17,2) AS year_total#45] (49) Filter [codegen id : 11] -Input [2]: [customer_id#51, year_total#52] -Condition : (isnotnull(year_total#52) AND (year_total#52 > 0.00)) +Input [2]: [customer_id#44, year_total#45] +Condition : (isnotnull(year_total#45) AND (year_total#45 > 0.00)) (50) BroadcastExchange -Input [2]: [customer_id#51, year_total#52] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#53] +Input [2]: [customer_id#44, year_total#45] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=8] (51) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [customer_id#16] -Right keys [1]: [customer_id#51] +Left keys [1]: [customer_id#14] +Right keys [1]: [customer_id#44] Join condition: None (52) Project [codegen id : 16] -Output [7]: [customer_id#16, year_total#17, customer_id#32, customer_first_name#33, customer_last_name#34, year_total#35, year_total#52] -Input [8]: [customer_id#16, year_total#17, customer_id#32, customer_first_name#33, customer_last_name#34, year_total#35, customer_id#51, year_total#52] +Output [7]: [customer_id#14, year_total#15, customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31, year_total#45] +Input [8]: [customer_id#14, year_total#15, customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31, customer_id#44, year_total#45] (53) Scan parquet default.customer -Output [4]: [c_customer_sk#54, c_customer_id#55, c_first_name#56, c_last_name#57] +Output [4]: [c_customer_sk#46, c_customer_id#47, c_first_name#48, c_last_name#49] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)] ReadSchema: struct (54) ColumnarToRow [codegen id : 14] -Input [4]: [c_customer_sk#54, c_customer_id#55, c_first_name#56, c_last_name#57] +Input [4]: [c_customer_sk#46, c_customer_id#47, c_first_name#48, c_last_name#49] (55) Filter [codegen id : 14] -Input [4]: [c_customer_sk#54, c_customer_id#55, c_first_name#56, c_last_name#57] -Condition : (isnotnull(c_customer_sk#54) AND isnotnull(c_customer_id#55)) +Input [4]: [c_customer_sk#46, c_customer_id#47, c_first_name#48, c_last_name#49] +Condition : (isnotnull(c_customer_sk#46) AND isnotnull(c_customer_id#47)) (56) Scan parquet default.web_sales -Output [3]: [ws_bill_customer_sk#58, ws_net_paid#59, ws_sold_date_sk#60] +Output [3]: [ws_bill_customer_sk#50, ws_net_paid#51, ws_sold_date_sk#52] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#60), dynamicpruningexpression(ws_sold_date_sk#60 IN dynamicpruning#25)] +PartitionFilters: [isnotnull(ws_sold_date_sk#52), dynamicpruningexpression(ws_sold_date_sk#52 IN dynamicpruning#23)] PushedFilters: [IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (57) ColumnarToRow [codegen id : 12] -Input [3]: [ws_bill_customer_sk#58, ws_net_paid#59, ws_sold_date_sk#60] +Input [3]: [ws_bill_customer_sk#50, ws_net_paid#51, ws_sold_date_sk#52] (58) Filter [codegen id : 12] -Input [3]: [ws_bill_customer_sk#58, ws_net_paid#59, ws_sold_date_sk#60] -Condition : isnotnull(ws_bill_customer_sk#58) +Input [3]: [ws_bill_customer_sk#50, ws_net_paid#51, ws_sold_date_sk#52] +Condition : isnotnull(ws_bill_customer_sk#50) (59) BroadcastExchange -Input [3]: [ws_bill_customer_sk#58, ws_net_paid#59, ws_sold_date_sk#60] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#61] +Input [3]: [ws_bill_customer_sk#50, ws_net_paid#51, ws_sold_date_sk#52] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] (60) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [c_customer_sk#54] -Right keys [1]: [ws_bill_customer_sk#58] +Left keys [1]: [c_customer_sk#46] +Right keys [1]: [ws_bill_customer_sk#50] Join condition: None (61) Project [codegen id : 14] -Output [5]: [c_customer_id#55, c_first_name#56, c_last_name#57, ws_net_paid#59, ws_sold_date_sk#60] -Input [7]: [c_customer_sk#54, c_customer_id#55, c_first_name#56, c_last_name#57, ws_bill_customer_sk#58, ws_net_paid#59, ws_sold_date_sk#60] +Output [5]: [c_customer_id#47, c_first_name#48, c_last_name#49, ws_net_paid#51, ws_sold_date_sk#52] +Input [7]: [c_customer_sk#46, c_customer_id#47, c_first_name#48, c_last_name#49, ws_bill_customer_sk#50, ws_net_paid#51, ws_sold_date_sk#52] (62) ReusedExchange [Reuses operator id: 79] -Output [2]: [d_date_sk#62, d_year#63] +Output [2]: [d_date_sk#53, d_year#54] (63) BroadcastHashJoin [codegen id : 14] -Left keys [1]: [ws_sold_date_sk#60] -Right keys [1]: [d_date_sk#62] +Left keys [1]: [ws_sold_date_sk#52] +Right keys [1]: [d_date_sk#53] Join condition: None (64) Project [codegen id : 14] -Output [5]: [c_customer_id#55, c_first_name#56, c_last_name#57, ws_net_paid#59, d_year#63] -Input [7]: [c_customer_id#55, c_first_name#56, c_last_name#57, ws_net_paid#59, ws_sold_date_sk#60, d_date_sk#62, d_year#63] +Output [5]: [c_customer_id#47, c_first_name#48, c_last_name#49, ws_net_paid#51, d_year#54] +Input [7]: [c_customer_id#47, c_first_name#48, c_last_name#49, ws_net_paid#51, ws_sold_date_sk#52, d_date_sk#53, d_year#54] (65) HashAggregate [codegen id : 14] -Input [5]: [c_customer_id#55, c_first_name#56, c_last_name#57, ws_net_paid#59, d_year#63] -Keys [4]: [c_customer_id#55, c_first_name#56, c_last_name#57, d_year#63] -Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#59))] -Aggregate Attributes [1]: [sum#64] -Results [5]: [c_customer_id#55, c_first_name#56, c_last_name#57, d_year#63, sum#65] +Input [5]: [c_customer_id#47, c_first_name#48, c_last_name#49, ws_net_paid#51, d_year#54] +Keys [4]: [c_customer_id#47, c_first_name#48, c_last_name#49, d_year#54] +Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#51))] +Aggregate Attributes [1]: [sum#55] +Results [5]: [c_customer_id#47, c_first_name#48, c_last_name#49, d_year#54, sum#56] (66) Exchange -Input [5]: [c_customer_id#55, c_first_name#56, c_last_name#57, d_year#63, sum#65] -Arguments: hashpartitioning(c_customer_id#55, c_first_name#56, c_last_name#57, d_year#63, 5), ENSURE_REQUIREMENTS, [id=#66] +Input [5]: [c_customer_id#47, c_first_name#48, c_last_name#49, d_year#54, sum#56] +Arguments: hashpartitioning(c_customer_id#47, c_first_name#48, c_last_name#49, d_year#54, 5), ENSURE_REQUIREMENTS, [plan_id=10] (67) HashAggregate [codegen id : 15] -Input [5]: [c_customer_id#55, c_first_name#56, c_last_name#57, d_year#63, sum#65] -Keys [4]: [c_customer_id#55, c_first_name#56, c_last_name#57, d_year#63] -Functions [1]: [sum(UnscaledValue(ws_net_paid#59))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#59))#50] -Results [2]: [c_customer_id#55 AS customer_id#67, MakeDecimal(sum(UnscaledValue(ws_net_paid#59))#50,17,2) AS year_total#68] +Input [5]: [c_customer_id#47, c_first_name#48, c_last_name#49, d_year#54, sum#56] +Keys [4]: [c_customer_id#47, c_first_name#48, c_last_name#49, d_year#54] +Functions [1]: [sum(UnscaledValue(ws_net_paid#51))] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#51))#43] +Results [2]: [c_customer_id#47 AS customer_id#57, MakeDecimal(sum(UnscaledValue(ws_net_paid#51))#43,17,2) AS year_total#58] (68) BroadcastExchange -Input [2]: [customer_id#67, year_total#68] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#69] +Input [2]: [customer_id#57, year_total#58] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=11] (69) BroadcastHashJoin [codegen id : 16] -Left keys [1]: [customer_id#16] -Right keys [1]: [customer_id#67] -Join condition: (CASE WHEN (year_total#52 > 0.00) THEN CheckOverflow((promote_precision(year_total#68) / promote_precision(year_total#52)), DecimalType(37,20)) END > CASE WHEN (year_total#17 > 0.00) THEN CheckOverflow((promote_precision(year_total#35) / promote_precision(year_total#17)), DecimalType(37,20)) END) +Left keys [1]: [customer_id#14] +Right keys [1]: [customer_id#57] +Join condition: (CASE WHEN (year_total#45 > 0.00) THEN CheckOverflow((promote_precision(year_total#58) / promote_precision(year_total#45)), DecimalType(37,20)) END > CASE WHEN (year_total#15 > 0.00) THEN CheckOverflow((promote_precision(year_total#31) / promote_precision(year_total#15)), DecimalType(37,20)) END) (70) Project [codegen id : 16] -Output [3]: [customer_id#32, customer_first_name#33, customer_last_name#34] -Input [9]: [customer_id#16, year_total#17, customer_id#32, customer_first_name#33, customer_last_name#34, year_total#35, year_total#52, customer_id#67, year_total#68] +Output [3]: [customer_id#28, customer_first_name#29, customer_last_name#30] +Input [9]: [customer_id#14, year_total#15, customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31, year_total#45, customer_id#57, year_total#58] (71) TakeOrderedAndProject -Input [3]: [customer_id#32, customer_first_name#33, customer_last_name#34] -Arguments: 100, [customer_first_name#33 ASC NULLS FIRST, customer_id#32 ASC NULLS FIRST, customer_last_name#34 ASC NULLS FIRST], [customer_id#32, customer_first_name#33, customer_last_name#34] +Input [3]: [customer_id#28, customer_first_name#29, customer_last_name#30] +Arguments: 100, [customer_first_name#29 ASC NULLS FIRST, customer_id#28 ASC NULLS FIRST, customer_last_name#30 ASC NULLS FIRST], [customer_id#28, customer_first_name#29, customer_last_name#30] ===== Subqueries ===== @@ -417,24 +417,24 @@ BroadcastExchange (75) (72) Scan parquet default.date_dim -Output [2]: [d_date_sk#10, d_year#11] +Output [2]: [d_date_sk#9, d_year#10] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] ReadSchema: struct (73) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#10, d_year#11] +Input [2]: [d_date_sk#9, d_year#10] (74) Filter [codegen id : 1] -Input [2]: [d_date_sk#10, d_year#11] -Condition : (((isnotnull(d_year#11) AND (d_year#11 = 2001)) AND d_year#11 IN (2001,2002)) AND isnotnull(d_date_sk#10)) +Input [2]: [d_date_sk#9, d_year#10] +Condition : (((isnotnull(d_year#10) AND (d_year#10 = 2001)) AND d_year#10 IN (2001,2002)) AND isnotnull(d_date_sk#9)) (75) BroadcastExchange -Input [2]: [d_date_sk#10, d_year#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#70] +Input [2]: [d_date_sk#9, d_year#10] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12] -Subquery:2 Hosting operator id = 20 Hosting Expression = ss_sold_date_sk#24 IN dynamicpruning#25 +Subquery:2 Hosting operator id = 20 Hosting Expression = ss_sold_date_sk#22 IN dynamicpruning#23 BroadcastExchange (79) +- * Filter (78) +- * ColumnarToRow (77) @@ -442,25 +442,25 @@ BroadcastExchange (79) (76) Scan parquet default.date_dim -Output [2]: [d_date_sk#27, d_year#28] +Output [2]: [d_date_sk#24, d_year#25] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)] ReadSchema: struct (77) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#27, d_year#28] +Input [2]: [d_date_sk#24, d_year#25] (78) Filter [codegen id : 1] -Input [2]: [d_date_sk#27, d_year#28] -Condition : (((isnotnull(d_year#28) AND (d_year#28 = 2002)) AND d_year#28 IN (2001,2002)) AND isnotnull(d_date_sk#27)) +Input [2]: [d_date_sk#24, d_year#25] +Condition : (((isnotnull(d_year#25) AND (d_year#25 = 2002)) AND d_year#25 IN (2001,2002)) AND isnotnull(d_date_sk#24)) (79) BroadcastExchange -Input [2]: [d_date_sk#27, d_year#28] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#71] +Input [2]: [d_date_sk#24, d_year#25] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=13] -Subquery:3 Hosting operator id = 37 Hosting Expression = ws_sold_date_sk#43 IN dynamicpruning#8 +Subquery:3 Hosting operator id = 37 Hosting Expression = ws_sold_date_sk#38 IN dynamicpruning#8 -Subquery:4 Hosting operator id = 56 Hosting Expression = ws_sold_date_sk#60 IN dynamicpruning#25 +Subquery:4 Hosting operator id = 56 Hosting Expression = ws_sold_date_sk#52 IN dynamicpruning#23 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt index 27a2b5f734281..6bb0d1e90534b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt @@ -163,7 +163,7 @@ Input [6]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_categor (8) BroadcastExchange Input [5]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_item_sk#1] @@ -175,517 +175,517 @@ Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price# Input [10]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12] (11) ReusedExchange [Reuses operator id: 131] -Output [2]: [d_date_sk#14, d_year#15] +Output [2]: [d_date_sk#13, d_year#14] (12) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_sold_date_sk#5] -Right keys [1]: [d_date_sk#14] +Right keys [1]: [d_date_sk#13] Join condition: None (13) Project [codegen id : 3] -Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15] -Input [11]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_date_sk#14, d_year#15] +Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14] +Input [11]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_date_sk#13, d_year#14] (14) Exchange -Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15] -Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14] +Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) Sort [codegen id : 4] -Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15] +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14] Arguments: [cs_order_number#2 ASC NULLS FIRST, cs_item_sk#1 ASC NULLS FIRST], false, 0 (16) Scan parquet default.catalog_returns -Output [5]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20, cr_returned_date_sk#21] +Output [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 5] -Input [5]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20, cr_returned_date_sk#21] +Input [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19] (18) Filter [codegen id : 5] -Input [5]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20, cr_returned_date_sk#21] -Condition : (isnotnull(cr_order_number#18) AND isnotnull(cr_item_sk#17)) +Input [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19] +Condition : (isnotnull(cr_order_number#16) AND isnotnull(cr_item_sk#15)) (19) Project [codegen id : 5] -Output [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] -Input [5]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20, cr_returned_date_sk#21] +Output [4]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18] +Input [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19] (20) Exchange -Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] -Arguments: hashpartitioning(cr_order_number#18, cr_item_sk#17, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [4]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18] +Arguments: hashpartitioning(cr_order_number#16, cr_item_sk#15, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) Sort [codegen id : 6] -Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] -Arguments: [cr_order_number#18 ASC NULLS FIRST, cr_item_sk#17 ASC NULLS FIRST], false, 0 +Input [4]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18] +Arguments: [cr_order_number#16 ASC NULLS FIRST, cr_item_sk#15 ASC NULLS FIRST], false, 0 (22) SortMergeJoin [codegen id : 7] Left keys [2]: [cs_order_number#2, cs_item_sk#1] -Right keys [2]: [cr_order_number#18, cr_item_sk#17] +Right keys [2]: [cr_order_number#16, cr_item_sk#15] Join condition: None (23) Project [codegen id : 7] -Output [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, (cs_quantity#3 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#23, CheckOverflow((promote_precision(cast(cs_ext_sales_price#4 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#24] -Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15, cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] +Output [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, (cs_quantity#3 - coalesce(cr_return_quantity#17, 0)) AS sales_cnt#20, CheckOverflow((promote_precision(cast(cs_ext_sales_price#4 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#18, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#21] +Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14, cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18] (24) Scan parquet default.store_sales -Output [5]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, ss_sold_date_sk#29] +Output [5]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#29), dynamicpruningexpression(ss_sold_date_sk#29 IN dynamicpruning#6)] +PartitionFilters: [isnotnull(ss_sold_date_sk#26), dynamicpruningexpression(ss_sold_date_sk#26 IN dynamicpruning#6)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 10] -Input [5]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, ss_sold_date_sk#29] +Input [5]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26] (26) Filter [codegen id : 10] -Input [5]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, ss_sold_date_sk#29] -Condition : isnotnull(ss_item_sk#25) +Input [5]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Condition : isnotnull(ss_item_sk#22) (27) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#30, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34] +Output [5]: [i_item_sk#27, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31] (28) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_item_sk#25] -Right keys [1]: [i_item_sk#30] +Left keys [1]: [ss_item_sk#22] +Right keys [1]: [i_item_sk#27] Join condition: None (29) Project [codegen id : 10] -Output [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, ss_sold_date_sk#29, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34] -Input [10]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, ss_sold_date_sk#29, i_item_sk#30, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34] +Output [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31] +Input [10]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26, i_item_sk#27, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31] (30) ReusedExchange [Reuses operator id: 131] -Output [2]: [d_date_sk#35, d_year#36] +Output [2]: [d_date_sk#32, d_year#33] (31) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_sold_date_sk#29] -Right keys [1]: [d_date_sk#35] +Left keys [1]: [ss_sold_date_sk#26] +Right keys [1]: [d_date_sk#32] Join condition: None (32) Project [codegen id : 10] -Output [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, d_year#36] -Input [11]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, ss_sold_date_sk#29, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, d_date_sk#35, d_year#36] +Output [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33] +Input [11]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_date_sk#32, d_year#33] (33) Exchange -Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, d_year#36] -Arguments: hashpartitioning(ss_ticket_number#26, ss_item_sk#25, 5), ENSURE_REQUIREMENTS, [id=#37] +Input [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33] +Arguments: hashpartitioning(ss_ticket_number#23, ss_item_sk#22, 5), ENSURE_REQUIREMENTS, [plan_id=4] (34) Sort [codegen id : 11] -Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, d_year#36] -Arguments: [ss_ticket_number#26 ASC NULLS FIRST, ss_item_sk#25 ASC NULLS FIRST], false, 0 +Input [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33] +Arguments: [ss_ticket_number#23 ASC NULLS FIRST, ss_item_sk#22 ASC NULLS FIRST], false, 0 (35) Scan parquet default.store_returns -Output [5]: [sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41, sr_returned_date_sk#42] +Output [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] ReadSchema: struct (36) ColumnarToRow [codegen id : 12] -Input [5]: [sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41, sr_returned_date_sk#42] +Input [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38] (37) Filter [codegen id : 12] -Input [5]: [sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41, sr_returned_date_sk#42] -Condition : (isnotnull(sr_ticket_number#39) AND isnotnull(sr_item_sk#38)) +Input [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38] +Condition : (isnotnull(sr_ticket_number#35) AND isnotnull(sr_item_sk#34)) (38) Project [codegen id : 12] -Output [4]: [sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41] -Input [5]: [sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41, sr_returned_date_sk#42] +Output [4]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37] +Input [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38] (39) Exchange -Input [4]: [sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41] -Arguments: hashpartitioning(sr_ticket_number#39, sr_item_sk#38, 5), ENSURE_REQUIREMENTS, [id=#43] +Input [4]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37] +Arguments: hashpartitioning(sr_ticket_number#35, sr_item_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=5] (40) Sort [codegen id : 13] -Input [4]: [sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41] -Arguments: [sr_ticket_number#39 ASC NULLS FIRST, sr_item_sk#38 ASC NULLS FIRST], false, 0 +Input [4]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37] +Arguments: [sr_ticket_number#35 ASC NULLS FIRST, sr_item_sk#34 ASC NULLS FIRST], false, 0 (41) SortMergeJoin [codegen id : 14] -Left keys [2]: [ss_ticket_number#26, ss_item_sk#25] -Right keys [2]: [sr_ticket_number#39, sr_item_sk#38] +Left keys [2]: [ss_ticket_number#23, ss_item_sk#22] +Right keys [2]: [sr_ticket_number#35, sr_item_sk#34] Join condition: None (42) Project [codegen id : 14] -Output [7]: [d_year#36, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, (ss_quantity#27 - coalesce(sr_return_quantity#40, 0)) AS sales_cnt#44, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#41, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#45] -Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, d_year#36, sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41] +Output [7]: [d_year#33, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, (ss_quantity#24 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#39, CheckOverflow((promote_precision(cast(ss_ext_sales_price#25 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#37, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40] +Input [13]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33, sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37] (43) Scan parquet default.web_sales -Output [5]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, ws_sold_date_sk#50] +Output [5]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#50), dynamicpruningexpression(ws_sold_date_sk#50 IN dynamicpruning#6)] +PartitionFilters: [isnotnull(ws_sold_date_sk#45), dynamicpruningexpression(ws_sold_date_sk#45 IN dynamicpruning#6)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 17] -Input [5]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, ws_sold_date_sk#50] +Input [5]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45] (45) Filter [codegen id : 17] -Input [5]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, ws_sold_date_sk#50] -Condition : isnotnull(ws_item_sk#46) +Input [5]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45] +Condition : isnotnull(ws_item_sk#41) (46) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#51, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55] +Output [5]: [i_item_sk#46, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50] (47) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_item_sk#46] -Right keys [1]: [i_item_sk#51] +Left keys [1]: [ws_item_sk#41] +Right keys [1]: [i_item_sk#46] Join condition: None (48) Project [codegen id : 17] -Output [9]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, ws_sold_date_sk#50, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55] -Input [10]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, ws_sold_date_sk#50, i_item_sk#51, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55] +Output [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50] +Input [10]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45, i_item_sk#46, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50] (49) ReusedExchange [Reuses operator id: 131] -Output [2]: [d_date_sk#56, d_year#57] +Output [2]: [d_date_sk#51, d_year#52] (50) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_sold_date_sk#50] -Right keys [1]: [d_date_sk#56] +Left keys [1]: [ws_sold_date_sk#45] +Right keys [1]: [d_date_sk#51] Join condition: None (51) Project [codegen id : 17] -Output [9]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, d_year#57] -Input [11]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, ws_sold_date_sk#50, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, d_date_sk#56, d_year#57] +Output [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52] +Input [11]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_date_sk#51, d_year#52] (52) Exchange -Input [9]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, d_year#57] -Arguments: hashpartitioning(ws_order_number#47, ws_item_sk#46, 5), ENSURE_REQUIREMENTS, [id=#58] +Input [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52] +Arguments: hashpartitioning(ws_order_number#42, ws_item_sk#41, 5), ENSURE_REQUIREMENTS, [plan_id=6] (53) Sort [codegen id : 18] -Input [9]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, d_year#57] -Arguments: [ws_order_number#47 ASC NULLS FIRST, ws_item_sk#46 ASC NULLS FIRST], false, 0 +Input [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52] +Arguments: [ws_order_number#42 ASC NULLS FIRST, ws_item_sk#41 ASC NULLS FIRST], false, 0 (54) Scan parquet default.web_returns -Output [5]: [wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62, wr_returned_date_sk#63] +Output [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] ReadSchema: struct (55) ColumnarToRow [codegen id : 19] -Input [5]: [wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62, wr_returned_date_sk#63] +Input [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57] (56) Filter [codegen id : 19] -Input [5]: [wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62, wr_returned_date_sk#63] -Condition : (isnotnull(wr_order_number#60) AND isnotnull(wr_item_sk#59)) +Input [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57] +Condition : (isnotnull(wr_order_number#54) AND isnotnull(wr_item_sk#53)) (57) Project [codegen id : 19] -Output [4]: [wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62] -Input [5]: [wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62, wr_returned_date_sk#63] +Output [4]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56] +Input [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57] (58) Exchange -Input [4]: [wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62] -Arguments: hashpartitioning(wr_order_number#60, wr_item_sk#59, 5), ENSURE_REQUIREMENTS, [id=#64] +Input [4]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56] +Arguments: hashpartitioning(wr_order_number#54, wr_item_sk#53, 5), ENSURE_REQUIREMENTS, [plan_id=7] (59) Sort [codegen id : 20] -Input [4]: [wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62] -Arguments: [wr_order_number#60 ASC NULLS FIRST, wr_item_sk#59 ASC NULLS FIRST], false, 0 +Input [4]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56] +Arguments: [wr_order_number#54 ASC NULLS FIRST, wr_item_sk#53 ASC NULLS FIRST], false, 0 (60) SortMergeJoin [codegen id : 21] -Left keys [2]: [ws_order_number#47, ws_item_sk#46] -Right keys [2]: [wr_order_number#60, wr_item_sk#59] +Left keys [2]: [ws_order_number#42, ws_item_sk#41] +Right keys [2]: [wr_order_number#54, wr_item_sk#53] Join condition: None (61) Project [codegen id : 21] -Output [7]: [d_year#57, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, (ws_quantity#48 - coalesce(wr_return_quantity#61, 0)) AS sales_cnt#65, CheckOverflow((promote_precision(cast(ws_ext_sales_price#49 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#62, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#66] -Input [13]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, d_year#57, wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62] +Output [7]: [d_year#52, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, (ws_quantity#43 - coalesce(wr_return_quantity#55, 0)) AS sales_cnt#58, CheckOverflow((promote_precision(cast(ws_ext_sales_price#44 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#56, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#59] +Input [13]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52, wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56] (62) Union (63) HashAggregate [codegen id : 22] -Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24] -Keys [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24] +Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21] +Keys [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21] Functions: [] Aggregate Attributes: [] -Results [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24] +Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21] (64) Exchange -Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24] -Arguments: hashpartitioning(d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24, 5), ENSURE_REQUIREMENTS, [id=#67] +Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21] +Arguments: hashpartitioning(d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21, 5), ENSURE_REQUIREMENTS, [plan_id=8] (65) HashAggregate [codegen id : 23] -Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24] -Keys [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24] +Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21] +Keys [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21] Functions: [] Aggregate Attributes: [] -Results [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24] +Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21] (66) HashAggregate [codegen id : 23] -Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24] -Keys [5]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12] -Functions [2]: [partial_sum(sales_cnt#23), partial_sum(UnscaledValue(sales_amt#24))] -Aggregate Attributes [2]: [sum#68, sum#69] -Results [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#70, sum#71] +Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21] +Keys [5]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12] +Functions [2]: [partial_sum(sales_cnt#20), partial_sum(UnscaledValue(sales_amt#21))] +Aggregate Attributes [2]: [sum#60, sum#61] +Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#62, sum#63] (67) Exchange -Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#70, sum#71] -Arguments: hashpartitioning(d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [id=#72] +Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#62, sum#63] +Arguments: hashpartitioning(d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [plan_id=9] (68) HashAggregate [codegen id : 24] -Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#70, sum#71] -Keys [5]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12] -Functions [2]: [sum(sales_cnt#23), sum(UnscaledValue(sales_amt#24))] -Aggregate Attributes [2]: [sum(sales_cnt#23)#73, sum(UnscaledValue(sales_amt#24))#74] -Results [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum(sales_cnt#23)#73 AS sales_cnt#75, MakeDecimal(sum(UnscaledValue(sales_amt#24))#74,18,2) AS sales_amt#76] +Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#62, sum#63] +Keys [5]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12] +Functions [2]: [sum(sales_cnt#20), sum(UnscaledValue(sales_amt#21))] +Aggregate Attributes [2]: [sum(sales_cnt#20)#64, sum(UnscaledValue(sales_amt#21))#65] +Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum(sales_cnt#20)#64 AS sales_cnt#66, MakeDecimal(sum(UnscaledValue(sales_amt#21))#65,18,2) AS sales_amt#67] (69) Exchange -Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#75, sales_amt#76] -Arguments: hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [id=#77] +Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67] +Arguments: hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [plan_id=10] (70) Sort [codegen id : 25] -Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#75, sales_amt#76] +Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67] Arguments: [i_brand_id#8 ASC NULLS FIRST, i_class_id#9 ASC NULLS FIRST, i_category_id#10 ASC NULLS FIRST, i_manufact_id#12 ASC NULLS FIRST], false, 0 (71) Scan parquet default.catalog_sales -Output [5]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, cs_sold_date_sk#82] +Output [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#82), dynamicpruningexpression(cs_sold_date_sk#82 IN dynamicpruning#83)] +PartitionFilters: [isnotnull(cs_sold_date_sk#72), dynamicpruningexpression(cs_sold_date_sk#72 IN dynamicpruning#73)] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (72) ColumnarToRow [codegen id : 28] -Input [5]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, cs_sold_date_sk#82] +Input [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72] (73) Filter [codegen id : 28] -Input [5]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, cs_sold_date_sk#82] -Condition : isnotnull(cs_item_sk#78) +Input [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72] +Condition : isnotnull(cs_item_sk#68) (74) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#84, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] +Output [5]: [i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78] (75) BroadcastHashJoin [codegen id : 28] -Left keys [1]: [cs_item_sk#78] -Right keys [1]: [i_item_sk#84] +Left keys [1]: [cs_item_sk#68] +Right keys [1]: [i_item_sk#74] Join condition: None (76) Project [codegen id : 28] -Output [9]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, cs_sold_date_sk#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] -Input [10]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, cs_sold_date_sk#82, i_item_sk#84, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] +Output [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78] +Input [10]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78] (77) ReusedExchange [Reuses operator id: 135] -Output [2]: [d_date_sk#89, d_year#90] +Output [2]: [d_date_sk#79, d_year#80] (78) BroadcastHashJoin [codegen id : 28] -Left keys [1]: [cs_sold_date_sk#82] -Right keys [1]: [d_date_sk#89] +Left keys [1]: [cs_sold_date_sk#72] +Right keys [1]: [d_date_sk#79] Join condition: None (79) Project [codegen id : 28] -Output [9]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90] -Input [11]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, cs_sold_date_sk#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_date_sk#89, d_year#90] +Output [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80] +Input [11]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_date_sk#79, d_year#80] (80) Exchange -Input [9]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90] -Arguments: hashpartitioning(cs_order_number#79, cs_item_sk#78, 5), ENSURE_REQUIREMENTS, [id=#91] +Input [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80] +Arguments: hashpartitioning(cs_order_number#69, cs_item_sk#68, 5), ENSURE_REQUIREMENTS, [plan_id=11] (81) Sort [codegen id : 29] -Input [9]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90] -Arguments: [cs_order_number#79 ASC NULLS FIRST, cs_item_sk#78 ASC NULLS FIRST], false, 0 +Input [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80] +Arguments: [cs_order_number#69 ASC NULLS FIRST, cs_item_sk#68 ASC NULLS FIRST], false, 0 (82) ReusedExchange [Reuses operator id: 20] -Output [4]: [cr_item_sk#92, cr_order_number#93, cr_return_quantity#94, cr_return_amount#95] +Output [4]: [cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84] (83) Sort [codegen id : 31] -Input [4]: [cr_item_sk#92, cr_order_number#93, cr_return_quantity#94, cr_return_amount#95] -Arguments: [cr_order_number#93 ASC NULLS FIRST, cr_item_sk#92 ASC NULLS FIRST], false, 0 +Input [4]: [cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84] +Arguments: [cr_order_number#82 ASC NULLS FIRST, cr_item_sk#81 ASC NULLS FIRST], false, 0 (84) SortMergeJoin [codegen id : 32] -Left keys [2]: [cs_order_number#79, cs_item_sk#78] -Right keys [2]: [cr_order_number#93, cr_item_sk#92] +Left keys [2]: [cs_order_number#69, cs_item_sk#68] +Right keys [2]: [cr_order_number#82, cr_item_sk#81] Join condition: None (85) Project [codegen id : 32] -Output [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, (cs_quantity#80 - coalesce(cr_return_quantity#94, 0)) AS sales_cnt#23, CheckOverflow((promote_precision(cast(cs_ext_sales_price#81 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#95, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#24] -Input [13]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90, cr_item_sk#92, cr_order_number#93, cr_return_quantity#94, cr_return_amount#95] +Output [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, (cs_quantity#70 - coalesce(cr_return_quantity#83, 0)) AS sales_cnt#20, CheckOverflow((promote_precision(cast(cs_ext_sales_price#71 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#84, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#21] +Input [13]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80, cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84] (86) Scan parquet default.store_sales -Output [5]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, ss_sold_date_sk#100] +Output [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#100), dynamicpruningexpression(ss_sold_date_sk#100 IN dynamicpruning#83)] +PartitionFilters: [isnotnull(ss_sold_date_sk#89), dynamicpruningexpression(ss_sold_date_sk#89 IN dynamicpruning#73)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (87) ColumnarToRow [codegen id : 35] -Input [5]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, ss_sold_date_sk#100] +Input [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89] (88) Filter [codegen id : 35] -Input [5]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, ss_sold_date_sk#100] -Condition : isnotnull(ss_item_sk#96) +Input [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89] +Condition : isnotnull(ss_item_sk#85) (89) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#101, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105] +Output [5]: [i_item_sk#90, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94] (90) BroadcastHashJoin [codegen id : 35] -Left keys [1]: [ss_item_sk#96] -Right keys [1]: [i_item_sk#101] +Left keys [1]: [ss_item_sk#85] +Right keys [1]: [i_item_sk#90] Join condition: None (91) Project [codegen id : 35] -Output [9]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, ss_sold_date_sk#100, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105] -Input [10]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, ss_sold_date_sk#100, i_item_sk#101, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105] +Output [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94] +Input [10]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_item_sk#90, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94] (92) ReusedExchange [Reuses operator id: 135] -Output [2]: [d_date_sk#106, d_year#107] +Output [2]: [d_date_sk#95, d_year#96] (93) BroadcastHashJoin [codegen id : 35] -Left keys [1]: [ss_sold_date_sk#100] -Right keys [1]: [d_date_sk#106] +Left keys [1]: [ss_sold_date_sk#89] +Right keys [1]: [d_date_sk#95] Join condition: None (94) Project [codegen id : 35] -Output [9]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, d_year#107] -Input [11]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, ss_sold_date_sk#100, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, d_date_sk#106, d_year#107] +Output [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96] +Input [11]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_date_sk#95, d_year#96] (95) Exchange -Input [9]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, d_year#107] -Arguments: hashpartitioning(ss_ticket_number#97, ss_item_sk#96, 5), ENSURE_REQUIREMENTS, [id=#108] +Input [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96] +Arguments: hashpartitioning(ss_ticket_number#86, ss_item_sk#85, 5), ENSURE_REQUIREMENTS, [plan_id=12] (96) Sort [codegen id : 36] -Input [9]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, d_year#107] -Arguments: [ss_ticket_number#97 ASC NULLS FIRST, ss_item_sk#96 ASC NULLS FIRST], false, 0 +Input [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96] +Arguments: [ss_ticket_number#86 ASC NULLS FIRST, ss_item_sk#85 ASC NULLS FIRST], false, 0 (97) ReusedExchange [Reuses operator id: 39] -Output [4]: [sr_item_sk#109, sr_ticket_number#110, sr_return_quantity#111, sr_return_amt#112] +Output [4]: [sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100] (98) Sort [codegen id : 38] -Input [4]: [sr_item_sk#109, sr_ticket_number#110, sr_return_quantity#111, sr_return_amt#112] -Arguments: [sr_ticket_number#110 ASC NULLS FIRST, sr_item_sk#109 ASC NULLS FIRST], false, 0 +Input [4]: [sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100] +Arguments: [sr_ticket_number#98 ASC NULLS FIRST, sr_item_sk#97 ASC NULLS FIRST], false, 0 (99) SortMergeJoin [codegen id : 39] -Left keys [2]: [ss_ticket_number#97, ss_item_sk#96] -Right keys [2]: [sr_ticket_number#110, sr_item_sk#109] +Left keys [2]: [ss_ticket_number#86, ss_item_sk#85] +Right keys [2]: [sr_ticket_number#98, sr_item_sk#97] Join condition: None (100) Project [codegen id : 39] -Output [7]: [d_year#107, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, (ss_quantity#98 - coalesce(sr_return_quantity#111, 0)) AS sales_cnt#44, CheckOverflow((promote_precision(cast(ss_ext_sales_price#99 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#112, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#45] -Input [13]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, d_year#107, sr_item_sk#109, sr_ticket_number#110, sr_return_quantity#111, sr_return_amt#112] +Output [7]: [d_year#96, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, (ss_quantity#87 - coalesce(sr_return_quantity#99, 0)) AS sales_cnt#39, CheckOverflow((promote_precision(cast(ss_ext_sales_price#88 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#100, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40] +Input [13]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96, sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100] (101) Scan parquet default.web_sales -Output [5]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, ws_sold_date_sk#117] +Output [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#117), dynamicpruningexpression(ws_sold_date_sk#117 IN dynamicpruning#83)] +PartitionFilters: [isnotnull(ws_sold_date_sk#105), dynamicpruningexpression(ws_sold_date_sk#105 IN dynamicpruning#73)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (102) ColumnarToRow [codegen id : 42] -Input [5]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, ws_sold_date_sk#117] +Input [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105] (103) Filter [codegen id : 42] -Input [5]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, ws_sold_date_sk#117] -Condition : isnotnull(ws_item_sk#113) +Input [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105] +Condition : isnotnull(ws_item_sk#101) (104) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#118, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122] +Output [5]: [i_item_sk#106, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110] (105) BroadcastHashJoin [codegen id : 42] -Left keys [1]: [ws_item_sk#113] -Right keys [1]: [i_item_sk#118] +Left keys [1]: [ws_item_sk#101] +Right keys [1]: [i_item_sk#106] Join condition: None (106) Project [codegen id : 42] -Output [9]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, ws_sold_date_sk#117, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122] -Input [10]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, ws_sold_date_sk#117, i_item_sk#118, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122] +Output [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110] +Input [10]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_item_sk#106, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110] (107) ReusedExchange [Reuses operator id: 135] -Output [2]: [d_date_sk#123, d_year#124] +Output [2]: [d_date_sk#111, d_year#112] (108) BroadcastHashJoin [codegen id : 42] -Left keys [1]: [ws_sold_date_sk#117] -Right keys [1]: [d_date_sk#123] +Left keys [1]: [ws_sold_date_sk#105] +Right keys [1]: [d_date_sk#111] Join condition: None (109) Project [codegen id : 42] -Output [9]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, d_year#124] -Input [11]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, ws_sold_date_sk#117, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, d_date_sk#123, d_year#124] +Output [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112] +Input [11]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_date_sk#111, d_year#112] (110) Exchange -Input [9]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, d_year#124] -Arguments: hashpartitioning(ws_order_number#114, ws_item_sk#113, 5), ENSURE_REQUIREMENTS, [id=#125] +Input [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112] +Arguments: hashpartitioning(ws_order_number#102, ws_item_sk#101, 5), ENSURE_REQUIREMENTS, [plan_id=13] (111) Sort [codegen id : 43] -Input [9]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, d_year#124] -Arguments: [ws_order_number#114 ASC NULLS FIRST, ws_item_sk#113 ASC NULLS FIRST], false, 0 +Input [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112] +Arguments: [ws_order_number#102 ASC NULLS FIRST, ws_item_sk#101 ASC NULLS FIRST], false, 0 (112) ReusedExchange [Reuses operator id: 58] -Output [4]: [wr_item_sk#126, wr_order_number#127, wr_return_quantity#128, wr_return_amt#129] +Output [4]: [wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116] (113) Sort [codegen id : 45] -Input [4]: [wr_item_sk#126, wr_order_number#127, wr_return_quantity#128, wr_return_amt#129] -Arguments: [wr_order_number#127 ASC NULLS FIRST, wr_item_sk#126 ASC NULLS FIRST], false, 0 +Input [4]: [wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116] +Arguments: [wr_order_number#114 ASC NULLS FIRST, wr_item_sk#113 ASC NULLS FIRST], false, 0 (114) SortMergeJoin [codegen id : 46] -Left keys [2]: [ws_order_number#114, ws_item_sk#113] -Right keys [2]: [wr_order_number#127, wr_item_sk#126] +Left keys [2]: [ws_order_number#102, ws_item_sk#101] +Right keys [2]: [wr_order_number#114, wr_item_sk#113] Join condition: None (115) Project [codegen id : 46] -Output [7]: [d_year#124, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, (ws_quantity#115 - coalesce(wr_return_quantity#128, 0)) AS sales_cnt#65, CheckOverflow((promote_precision(cast(ws_ext_sales_price#116 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#129, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#66] -Input [13]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, d_year#124, wr_item_sk#126, wr_order_number#127, wr_return_quantity#128, wr_return_amt#129] +Output [7]: [d_year#112, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, (ws_quantity#103 - coalesce(wr_return_quantity#115, 0)) AS sales_cnt#58, CheckOverflow((promote_precision(cast(ws_ext_sales_price#104 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#116, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#59] +Input [13]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112, wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116] (116) Union (117) HashAggregate [codegen id : 47] -Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24] -Keys [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24] +Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21] +Keys [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21] Functions: [] Aggregate Attributes: [] -Results [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24] +Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21] (118) Exchange -Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24] -Arguments: hashpartitioning(d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24, 5), ENSURE_REQUIREMENTS, [id=#130] +Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21] +Arguments: hashpartitioning(d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21, 5), ENSURE_REQUIREMENTS, [plan_id=14] (119) HashAggregate [codegen id : 48] -Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24] -Keys [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24] +Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21] +Keys [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21] Functions: [] Aggregate Attributes: [] -Results [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24] +Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21] (120) HashAggregate [codegen id : 48] -Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24] -Keys [5]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] -Functions [2]: [partial_sum(sales_cnt#23), partial_sum(UnscaledValue(sales_amt#24))] -Aggregate Attributes [2]: [sum#68, sum#131] -Results [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sum#70, sum#132] +Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21] +Keys [5]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78] +Functions [2]: [partial_sum(sales_cnt#20), partial_sum(UnscaledValue(sales_amt#21))] +Aggregate Attributes [2]: [sum#60, sum#117] +Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118] (121) Exchange -Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sum#70, sum#132] -Arguments: hashpartitioning(d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, 5), ENSURE_REQUIREMENTS, [id=#133] +Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118] +Arguments: hashpartitioning(d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, 5), ENSURE_REQUIREMENTS, [plan_id=15] (122) HashAggregate [codegen id : 49] -Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sum#70, sum#132] -Keys [5]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] -Functions [2]: [sum(sales_cnt#23), sum(UnscaledValue(sales_amt#24))] -Aggregate Attributes [2]: [sum(sales_cnt#23)#73, sum(UnscaledValue(sales_amt#24))#74] -Results [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sum(sales_cnt#23)#73 AS sales_cnt#134, MakeDecimal(sum(UnscaledValue(sales_amt#24))#74,18,2) AS sales_amt#135] +Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118] +Keys [5]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78] +Functions [2]: [sum(sales_cnt#20), sum(UnscaledValue(sales_amt#21))] +Aggregate Attributes [2]: [sum(sales_cnt#20)#64, sum(UnscaledValue(sales_amt#21))#65] +Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum(sales_cnt#20)#64 AS sales_cnt#119, MakeDecimal(sum(UnscaledValue(sales_amt#21))#65,18,2) AS sales_amt#120] (123) Exchange -Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#134, sales_amt#135] -Arguments: hashpartitioning(i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, 5), ENSURE_REQUIREMENTS, [id=#136] +Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120] +Arguments: hashpartitioning(i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, 5), ENSURE_REQUIREMENTS, [plan_id=16] (124) Sort [codegen id : 50] -Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#134, sales_amt#135] -Arguments: [i_brand_id#85 ASC NULLS FIRST, i_class_id#86 ASC NULLS FIRST, i_category_id#87 ASC NULLS FIRST, i_manufact_id#88 ASC NULLS FIRST], false, 0 +Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120] +Arguments: [i_brand_id#75 ASC NULLS FIRST, i_class_id#76 ASC NULLS FIRST, i_category_id#77 ASC NULLS FIRST, i_manufact_id#78 ASC NULLS FIRST], false, 0 (125) SortMergeJoin [codegen id : 51] Left keys [4]: [i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12] -Right keys [4]: [i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] -Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#75 as decimal(17,2))) / promote_precision(cast(sales_cnt#134 as decimal(17,2)))), DecimalType(37,20)) < 0.90000000000000000000) +Right keys [4]: [i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78] +Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#66 as decimal(17,2))) / promote_precision(cast(sales_cnt#119 as decimal(17,2)))), DecimalType(37,20)) < 0.90000000000000000000) (126) Project [codegen id : 51] -Output [10]: [d_year#90 AS prev_year#137, d_year#15 AS year#138, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#134 AS prev_yr_cnt#139, sales_cnt#75 AS curr_yr_cnt#140, (sales_cnt#75 - sales_cnt#134) AS sales_cnt_diff#141, CheckOverflow((promote_precision(cast(sales_amt#76 as decimal(19,2))) - promote_precision(cast(sales_amt#135 as decimal(19,2)))), DecimalType(19,2)) AS sales_amt_diff#142] -Input [14]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#75, sales_amt#76, d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#134, sales_amt#135] +Output [10]: [d_year#80 AS prev_year#121, d_year#14 AS year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#119 AS prev_yr_cnt#123, sales_cnt#66 AS curr_yr_cnt#124, (sales_cnt#66 - sales_cnt#119) AS sales_cnt_diff#125, CheckOverflow((promote_precision(cast(sales_amt#67 as decimal(19,2))) - promote_precision(cast(sales_amt#120 as decimal(19,2)))), DecimalType(19,2)) AS sales_amt_diff#126] +Input [14]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67, d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120] (127) TakeOrderedAndProject -Input [10]: [prev_year#137, year#138, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#139, curr_yr_cnt#140, sales_cnt_diff#141, sales_amt_diff#142] -Arguments: 100, [sales_cnt_diff#141 ASC NULLS FIRST, sales_amt_diff#142 ASC NULLS FIRST], [prev_year#137, year#138, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#139, curr_yr_cnt#140, sales_cnt_diff#141, sales_amt_diff#142] +Input [10]: [prev_year#121, year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#123, curr_yr_cnt#124, sales_cnt_diff#125, sales_amt_diff#126] +Arguments: 100, [sales_cnt_diff#125 ASC NULLS FIRST, sales_amt_diff#126 ASC NULLS FIRST], [prev_year#121, year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#123, curr_yr_cnt#124, sales_cnt_diff#125, sales_amt_diff#126] ===== Subqueries ===== @@ -697,28 +697,28 @@ BroadcastExchange (131) (128) Scan parquet default.date_dim -Output [2]: [d_date_sk#14, d_year#15] +Output [2]: [d_date_sk#13, d_year#14] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] ReadSchema: struct (129) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#14, d_year#15] +Input [2]: [d_date_sk#13, d_year#14] (130) Filter [codegen id : 1] -Input [2]: [d_date_sk#14, d_year#15] -Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2002)) AND isnotnull(d_date_sk#14)) +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2002)) AND isnotnull(d_date_sk#13)) (131) BroadcastExchange -Input [2]: [d_date_sk#14, d_year#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#143] +Input [2]: [d_date_sk#13, d_year#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=17] -Subquery:2 Hosting operator id = 24 Hosting Expression = ss_sold_date_sk#29 IN dynamicpruning#6 +Subquery:2 Hosting operator id = 24 Hosting Expression = ss_sold_date_sk#26 IN dynamicpruning#6 -Subquery:3 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#50 IN dynamicpruning#6 +Subquery:3 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#45 IN dynamicpruning#6 -Subquery:4 Hosting operator id = 71 Hosting Expression = cs_sold_date_sk#82 IN dynamicpruning#83 +Subquery:4 Hosting operator id = 71 Hosting Expression = cs_sold_date_sk#72 IN dynamicpruning#73 BroadcastExchange (135) +- * Filter (134) +- * ColumnarToRow (133) @@ -726,25 +726,25 @@ BroadcastExchange (135) (132) Scan parquet default.date_dim -Output [2]: [d_date_sk#89, d_year#90] +Output [2]: [d_date_sk#79, d_year#80] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (133) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#89, d_year#90] +Input [2]: [d_date_sk#79, d_year#80] (134) Filter [codegen id : 1] -Input [2]: [d_date_sk#89, d_year#90] -Condition : ((isnotnull(d_year#90) AND (d_year#90 = 2001)) AND isnotnull(d_date_sk#89)) +Input [2]: [d_date_sk#79, d_year#80] +Condition : ((isnotnull(d_year#80) AND (d_year#80 = 2001)) AND isnotnull(d_date_sk#79)) (135) BroadcastExchange -Input [2]: [d_date_sk#89, d_year#90] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#144] +Input [2]: [d_date_sk#79, d_year#80] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=18] -Subquery:5 Hosting operator id = 86 Hosting Expression = ss_sold_date_sk#100 IN dynamicpruning#83 +Subquery:5 Hosting operator id = 86 Hosting Expression = ss_sold_date_sk#89 IN dynamicpruning#73 -Subquery:6 Hosting operator id = 101 Hosting Expression = ws_sold_date_sk#117 IN dynamicpruning#83 +Subquery:6 Hosting operator id = 101 Hosting Expression = ws_sold_date_sk#105 IN dynamicpruning#73 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt index 27a2b5f734281..6bb0d1e90534b 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt @@ -163,7 +163,7 @@ Input [6]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_categor (8) BroadcastExchange Input [5]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_item_sk#1] @@ -175,517 +175,517 @@ Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price# Input [10]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12] (11) ReusedExchange [Reuses operator id: 131] -Output [2]: [d_date_sk#14, d_year#15] +Output [2]: [d_date_sk#13, d_year#14] (12) BroadcastHashJoin [codegen id : 3] Left keys [1]: [cs_sold_date_sk#5] -Right keys [1]: [d_date_sk#14] +Right keys [1]: [d_date_sk#13] Join condition: None (13) Project [codegen id : 3] -Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15] -Input [11]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_date_sk#14, d_year#15] +Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14] +Input [11]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_date_sk#13, d_year#14] (14) Exchange -Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15] -Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14] +Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) Sort [codegen id : 4] -Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15] +Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14] Arguments: [cs_order_number#2 ASC NULLS FIRST, cs_item_sk#1 ASC NULLS FIRST], false, 0 (16) Scan parquet default.catalog_returns -Output [5]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20, cr_returned_date_sk#21] +Output [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 5] -Input [5]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20, cr_returned_date_sk#21] +Input [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19] (18) Filter [codegen id : 5] -Input [5]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20, cr_returned_date_sk#21] -Condition : (isnotnull(cr_order_number#18) AND isnotnull(cr_item_sk#17)) +Input [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19] +Condition : (isnotnull(cr_order_number#16) AND isnotnull(cr_item_sk#15)) (19) Project [codegen id : 5] -Output [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] -Input [5]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20, cr_returned_date_sk#21] +Output [4]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18] +Input [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19] (20) Exchange -Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] -Arguments: hashpartitioning(cr_order_number#18, cr_item_sk#17, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [4]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18] +Arguments: hashpartitioning(cr_order_number#16, cr_item_sk#15, 5), ENSURE_REQUIREMENTS, [plan_id=3] (21) Sort [codegen id : 6] -Input [4]: [cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] -Arguments: [cr_order_number#18 ASC NULLS FIRST, cr_item_sk#17 ASC NULLS FIRST], false, 0 +Input [4]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18] +Arguments: [cr_order_number#16 ASC NULLS FIRST, cr_item_sk#15 ASC NULLS FIRST], false, 0 (22) SortMergeJoin [codegen id : 7] Left keys [2]: [cs_order_number#2, cs_item_sk#1] -Right keys [2]: [cr_order_number#18, cr_item_sk#17] +Right keys [2]: [cr_order_number#16, cr_item_sk#15] Join condition: None (23) Project [codegen id : 7] -Output [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, (cs_quantity#3 - coalesce(cr_return_quantity#19, 0)) AS sales_cnt#23, CheckOverflow((promote_precision(cast(cs_ext_sales_price#4 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#20, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#24] -Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15, cr_item_sk#17, cr_order_number#18, cr_return_quantity#19, cr_return_amount#20] +Output [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, (cs_quantity#3 - coalesce(cr_return_quantity#17, 0)) AS sales_cnt#20, CheckOverflow((promote_precision(cast(cs_ext_sales_price#4 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#18, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#21] +Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14, cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18] (24) Scan parquet default.store_sales -Output [5]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, ss_sold_date_sk#29] +Output [5]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#29), dynamicpruningexpression(ss_sold_date_sk#29 IN dynamicpruning#6)] +PartitionFilters: [isnotnull(ss_sold_date_sk#26), dynamicpruningexpression(ss_sold_date_sk#26 IN dynamicpruning#6)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (25) ColumnarToRow [codegen id : 10] -Input [5]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, ss_sold_date_sk#29] +Input [5]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26] (26) Filter [codegen id : 10] -Input [5]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, ss_sold_date_sk#29] -Condition : isnotnull(ss_item_sk#25) +Input [5]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26] +Condition : isnotnull(ss_item_sk#22) (27) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#30, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34] +Output [5]: [i_item_sk#27, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31] (28) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_item_sk#25] -Right keys [1]: [i_item_sk#30] +Left keys [1]: [ss_item_sk#22] +Right keys [1]: [i_item_sk#27] Join condition: None (29) Project [codegen id : 10] -Output [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, ss_sold_date_sk#29, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34] -Input [10]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, ss_sold_date_sk#29, i_item_sk#30, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34] +Output [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31] +Input [10]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26, i_item_sk#27, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31] (30) ReusedExchange [Reuses operator id: 131] -Output [2]: [d_date_sk#35, d_year#36] +Output [2]: [d_date_sk#32, d_year#33] (31) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [ss_sold_date_sk#29] -Right keys [1]: [d_date_sk#35] +Left keys [1]: [ss_sold_date_sk#26] +Right keys [1]: [d_date_sk#32] Join condition: None (32) Project [codegen id : 10] -Output [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, d_year#36] -Input [11]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, ss_sold_date_sk#29, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, d_date_sk#35, d_year#36] +Output [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33] +Input [11]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_date_sk#32, d_year#33] (33) Exchange -Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, d_year#36] -Arguments: hashpartitioning(ss_ticket_number#26, ss_item_sk#25, 5), ENSURE_REQUIREMENTS, [id=#37] +Input [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33] +Arguments: hashpartitioning(ss_ticket_number#23, ss_item_sk#22, 5), ENSURE_REQUIREMENTS, [plan_id=4] (34) Sort [codegen id : 11] -Input [9]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, d_year#36] -Arguments: [ss_ticket_number#26 ASC NULLS FIRST, ss_item_sk#25 ASC NULLS FIRST], false, 0 +Input [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33] +Arguments: [ss_ticket_number#23 ASC NULLS FIRST, ss_item_sk#22 ASC NULLS FIRST], false, 0 (35) Scan parquet default.store_returns -Output [5]: [sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41, sr_returned_date_sk#42] +Output [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] ReadSchema: struct (36) ColumnarToRow [codegen id : 12] -Input [5]: [sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41, sr_returned_date_sk#42] +Input [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38] (37) Filter [codegen id : 12] -Input [5]: [sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41, sr_returned_date_sk#42] -Condition : (isnotnull(sr_ticket_number#39) AND isnotnull(sr_item_sk#38)) +Input [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38] +Condition : (isnotnull(sr_ticket_number#35) AND isnotnull(sr_item_sk#34)) (38) Project [codegen id : 12] -Output [4]: [sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41] -Input [5]: [sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41, sr_returned_date_sk#42] +Output [4]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37] +Input [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38] (39) Exchange -Input [4]: [sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41] -Arguments: hashpartitioning(sr_ticket_number#39, sr_item_sk#38, 5), ENSURE_REQUIREMENTS, [id=#43] +Input [4]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37] +Arguments: hashpartitioning(sr_ticket_number#35, sr_item_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=5] (40) Sort [codegen id : 13] -Input [4]: [sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41] -Arguments: [sr_ticket_number#39 ASC NULLS FIRST, sr_item_sk#38 ASC NULLS FIRST], false, 0 +Input [4]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37] +Arguments: [sr_ticket_number#35 ASC NULLS FIRST, sr_item_sk#34 ASC NULLS FIRST], false, 0 (41) SortMergeJoin [codegen id : 14] -Left keys [2]: [ss_ticket_number#26, ss_item_sk#25] -Right keys [2]: [sr_ticket_number#39, sr_item_sk#38] +Left keys [2]: [ss_ticket_number#23, ss_item_sk#22] +Right keys [2]: [sr_ticket_number#35, sr_item_sk#34] Join condition: None (42) Project [codegen id : 14] -Output [7]: [d_year#36, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, (ss_quantity#27 - coalesce(sr_return_quantity#40, 0)) AS sales_cnt#44, CheckOverflow((promote_precision(cast(ss_ext_sales_price#28 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#41, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#45] -Input [13]: [ss_item_sk#25, ss_ticket_number#26, ss_quantity#27, ss_ext_sales_price#28, i_brand_id#31, i_class_id#32, i_category_id#33, i_manufact_id#34, d_year#36, sr_item_sk#38, sr_ticket_number#39, sr_return_quantity#40, sr_return_amt#41] +Output [7]: [d_year#33, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, (ss_quantity#24 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#39, CheckOverflow((promote_precision(cast(ss_ext_sales_price#25 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#37, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40] +Input [13]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33, sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37] (43) Scan parquet default.web_sales -Output [5]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, ws_sold_date_sk#50] +Output [5]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#50), dynamicpruningexpression(ws_sold_date_sk#50 IN dynamicpruning#6)] +PartitionFilters: [isnotnull(ws_sold_date_sk#45), dynamicpruningexpression(ws_sold_date_sk#45 IN dynamicpruning#6)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (44) ColumnarToRow [codegen id : 17] -Input [5]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, ws_sold_date_sk#50] +Input [5]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45] (45) Filter [codegen id : 17] -Input [5]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, ws_sold_date_sk#50] -Condition : isnotnull(ws_item_sk#46) +Input [5]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45] +Condition : isnotnull(ws_item_sk#41) (46) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#51, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55] +Output [5]: [i_item_sk#46, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50] (47) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_item_sk#46] -Right keys [1]: [i_item_sk#51] +Left keys [1]: [ws_item_sk#41] +Right keys [1]: [i_item_sk#46] Join condition: None (48) Project [codegen id : 17] -Output [9]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, ws_sold_date_sk#50, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55] -Input [10]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, ws_sold_date_sk#50, i_item_sk#51, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55] +Output [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50] +Input [10]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45, i_item_sk#46, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50] (49) ReusedExchange [Reuses operator id: 131] -Output [2]: [d_date_sk#56, d_year#57] +Output [2]: [d_date_sk#51, d_year#52] (50) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_sold_date_sk#50] -Right keys [1]: [d_date_sk#56] +Left keys [1]: [ws_sold_date_sk#45] +Right keys [1]: [d_date_sk#51] Join condition: None (51) Project [codegen id : 17] -Output [9]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, d_year#57] -Input [11]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, ws_sold_date_sk#50, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, d_date_sk#56, d_year#57] +Output [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52] +Input [11]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_date_sk#51, d_year#52] (52) Exchange -Input [9]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, d_year#57] -Arguments: hashpartitioning(ws_order_number#47, ws_item_sk#46, 5), ENSURE_REQUIREMENTS, [id=#58] +Input [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52] +Arguments: hashpartitioning(ws_order_number#42, ws_item_sk#41, 5), ENSURE_REQUIREMENTS, [plan_id=6] (53) Sort [codegen id : 18] -Input [9]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, d_year#57] -Arguments: [ws_order_number#47 ASC NULLS FIRST, ws_item_sk#46 ASC NULLS FIRST], false, 0 +Input [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52] +Arguments: [ws_order_number#42 ASC NULLS FIRST, ws_item_sk#41 ASC NULLS FIRST], false, 0 (54) Scan parquet default.web_returns -Output [5]: [wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62, wr_returned_date_sk#63] +Output [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] ReadSchema: struct (55) ColumnarToRow [codegen id : 19] -Input [5]: [wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62, wr_returned_date_sk#63] +Input [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57] (56) Filter [codegen id : 19] -Input [5]: [wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62, wr_returned_date_sk#63] -Condition : (isnotnull(wr_order_number#60) AND isnotnull(wr_item_sk#59)) +Input [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57] +Condition : (isnotnull(wr_order_number#54) AND isnotnull(wr_item_sk#53)) (57) Project [codegen id : 19] -Output [4]: [wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62] -Input [5]: [wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62, wr_returned_date_sk#63] +Output [4]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56] +Input [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57] (58) Exchange -Input [4]: [wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62] -Arguments: hashpartitioning(wr_order_number#60, wr_item_sk#59, 5), ENSURE_REQUIREMENTS, [id=#64] +Input [4]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56] +Arguments: hashpartitioning(wr_order_number#54, wr_item_sk#53, 5), ENSURE_REQUIREMENTS, [plan_id=7] (59) Sort [codegen id : 20] -Input [4]: [wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62] -Arguments: [wr_order_number#60 ASC NULLS FIRST, wr_item_sk#59 ASC NULLS FIRST], false, 0 +Input [4]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56] +Arguments: [wr_order_number#54 ASC NULLS FIRST, wr_item_sk#53 ASC NULLS FIRST], false, 0 (60) SortMergeJoin [codegen id : 21] -Left keys [2]: [ws_order_number#47, ws_item_sk#46] -Right keys [2]: [wr_order_number#60, wr_item_sk#59] +Left keys [2]: [ws_order_number#42, ws_item_sk#41] +Right keys [2]: [wr_order_number#54, wr_item_sk#53] Join condition: None (61) Project [codegen id : 21] -Output [7]: [d_year#57, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, (ws_quantity#48 - coalesce(wr_return_quantity#61, 0)) AS sales_cnt#65, CheckOverflow((promote_precision(cast(ws_ext_sales_price#49 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#62, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#66] -Input [13]: [ws_item_sk#46, ws_order_number#47, ws_quantity#48, ws_ext_sales_price#49, i_brand_id#52, i_class_id#53, i_category_id#54, i_manufact_id#55, d_year#57, wr_item_sk#59, wr_order_number#60, wr_return_quantity#61, wr_return_amt#62] +Output [7]: [d_year#52, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, (ws_quantity#43 - coalesce(wr_return_quantity#55, 0)) AS sales_cnt#58, CheckOverflow((promote_precision(cast(ws_ext_sales_price#44 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#56, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#59] +Input [13]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52, wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56] (62) Union (63) HashAggregate [codegen id : 22] -Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24] -Keys [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24] +Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21] +Keys [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21] Functions: [] Aggregate Attributes: [] -Results [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24] +Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21] (64) Exchange -Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24] -Arguments: hashpartitioning(d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24, 5), ENSURE_REQUIREMENTS, [id=#67] +Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21] +Arguments: hashpartitioning(d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21, 5), ENSURE_REQUIREMENTS, [plan_id=8] (65) HashAggregate [codegen id : 23] -Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24] -Keys [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24] +Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21] +Keys [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21] Functions: [] Aggregate Attributes: [] -Results [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24] +Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21] (66) HashAggregate [codegen id : 23] -Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#23, sales_amt#24] -Keys [5]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12] -Functions [2]: [partial_sum(sales_cnt#23), partial_sum(UnscaledValue(sales_amt#24))] -Aggregate Attributes [2]: [sum#68, sum#69] -Results [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#70, sum#71] +Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21] +Keys [5]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12] +Functions [2]: [partial_sum(sales_cnt#20), partial_sum(UnscaledValue(sales_amt#21))] +Aggregate Attributes [2]: [sum#60, sum#61] +Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#62, sum#63] (67) Exchange -Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#70, sum#71] -Arguments: hashpartitioning(d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [id=#72] +Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#62, sum#63] +Arguments: hashpartitioning(d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [plan_id=9] (68) HashAggregate [codegen id : 24] -Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#70, sum#71] -Keys [5]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12] -Functions [2]: [sum(sales_cnt#23), sum(UnscaledValue(sales_amt#24))] -Aggregate Attributes [2]: [sum(sales_cnt#23)#73, sum(UnscaledValue(sales_amt#24))#74] -Results [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum(sales_cnt#23)#73 AS sales_cnt#75, MakeDecimal(sum(UnscaledValue(sales_amt#24))#74,18,2) AS sales_amt#76] +Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#62, sum#63] +Keys [5]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12] +Functions [2]: [sum(sales_cnt#20), sum(UnscaledValue(sales_amt#21))] +Aggregate Attributes [2]: [sum(sales_cnt#20)#64, sum(UnscaledValue(sales_amt#21))#65] +Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum(sales_cnt#20)#64 AS sales_cnt#66, MakeDecimal(sum(UnscaledValue(sales_amt#21))#65,18,2) AS sales_amt#67] (69) Exchange -Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#75, sales_amt#76] -Arguments: hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [id=#77] +Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67] +Arguments: hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [plan_id=10] (70) Sort [codegen id : 25] -Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#75, sales_amt#76] +Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67] Arguments: [i_brand_id#8 ASC NULLS FIRST, i_class_id#9 ASC NULLS FIRST, i_category_id#10 ASC NULLS FIRST, i_manufact_id#12 ASC NULLS FIRST], false, 0 (71) Scan parquet default.catalog_sales -Output [5]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, cs_sold_date_sk#82] +Output [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#82), dynamicpruningexpression(cs_sold_date_sk#82 IN dynamicpruning#83)] +PartitionFilters: [isnotnull(cs_sold_date_sk#72), dynamicpruningexpression(cs_sold_date_sk#72 IN dynamicpruning#73)] PushedFilters: [IsNotNull(cs_item_sk)] ReadSchema: struct (72) ColumnarToRow [codegen id : 28] -Input [5]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, cs_sold_date_sk#82] +Input [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72] (73) Filter [codegen id : 28] -Input [5]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, cs_sold_date_sk#82] -Condition : isnotnull(cs_item_sk#78) +Input [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72] +Condition : isnotnull(cs_item_sk#68) (74) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#84, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] +Output [5]: [i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78] (75) BroadcastHashJoin [codegen id : 28] -Left keys [1]: [cs_item_sk#78] -Right keys [1]: [i_item_sk#84] +Left keys [1]: [cs_item_sk#68] +Right keys [1]: [i_item_sk#74] Join condition: None (76) Project [codegen id : 28] -Output [9]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, cs_sold_date_sk#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] -Input [10]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, cs_sold_date_sk#82, i_item_sk#84, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] +Output [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78] +Input [10]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78] (77) ReusedExchange [Reuses operator id: 135] -Output [2]: [d_date_sk#89, d_year#90] +Output [2]: [d_date_sk#79, d_year#80] (78) BroadcastHashJoin [codegen id : 28] -Left keys [1]: [cs_sold_date_sk#82] -Right keys [1]: [d_date_sk#89] +Left keys [1]: [cs_sold_date_sk#72] +Right keys [1]: [d_date_sk#79] Join condition: None (79) Project [codegen id : 28] -Output [9]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90] -Input [11]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, cs_sold_date_sk#82, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_date_sk#89, d_year#90] +Output [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80] +Input [11]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_date_sk#79, d_year#80] (80) Exchange -Input [9]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90] -Arguments: hashpartitioning(cs_order_number#79, cs_item_sk#78, 5), ENSURE_REQUIREMENTS, [id=#91] +Input [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80] +Arguments: hashpartitioning(cs_order_number#69, cs_item_sk#68, 5), ENSURE_REQUIREMENTS, [plan_id=11] (81) Sort [codegen id : 29] -Input [9]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90] -Arguments: [cs_order_number#79 ASC NULLS FIRST, cs_item_sk#78 ASC NULLS FIRST], false, 0 +Input [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80] +Arguments: [cs_order_number#69 ASC NULLS FIRST, cs_item_sk#68 ASC NULLS FIRST], false, 0 (82) ReusedExchange [Reuses operator id: 20] -Output [4]: [cr_item_sk#92, cr_order_number#93, cr_return_quantity#94, cr_return_amount#95] +Output [4]: [cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84] (83) Sort [codegen id : 31] -Input [4]: [cr_item_sk#92, cr_order_number#93, cr_return_quantity#94, cr_return_amount#95] -Arguments: [cr_order_number#93 ASC NULLS FIRST, cr_item_sk#92 ASC NULLS FIRST], false, 0 +Input [4]: [cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84] +Arguments: [cr_order_number#82 ASC NULLS FIRST, cr_item_sk#81 ASC NULLS FIRST], false, 0 (84) SortMergeJoin [codegen id : 32] -Left keys [2]: [cs_order_number#79, cs_item_sk#78] -Right keys [2]: [cr_order_number#93, cr_item_sk#92] +Left keys [2]: [cs_order_number#69, cs_item_sk#68] +Right keys [2]: [cr_order_number#82, cr_item_sk#81] Join condition: None (85) Project [codegen id : 32] -Output [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, (cs_quantity#80 - coalesce(cr_return_quantity#94, 0)) AS sales_cnt#23, CheckOverflow((promote_precision(cast(cs_ext_sales_price#81 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#95, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#24] -Input [13]: [cs_item_sk#78, cs_order_number#79, cs_quantity#80, cs_ext_sales_price#81, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, d_year#90, cr_item_sk#92, cr_order_number#93, cr_return_quantity#94, cr_return_amount#95] +Output [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, (cs_quantity#70 - coalesce(cr_return_quantity#83, 0)) AS sales_cnt#20, CheckOverflow((promote_precision(cast(cs_ext_sales_price#71 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#84, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#21] +Input [13]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80, cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84] (86) Scan parquet default.store_sales -Output [5]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, ss_sold_date_sk#100] +Output [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ss_sold_date_sk#100), dynamicpruningexpression(ss_sold_date_sk#100 IN dynamicpruning#83)] +PartitionFilters: [isnotnull(ss_sold_date_sk#89), dynamicpruningexpression(ss_sold_date_sk#89 IN dynamicpruning#73)] PushedFilters: [IsNotNull(ss_item_sk)] ReadSchema: struct (87) ColumnarToRow [codegen id : 35] -Input [5]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, ss_sold_date_sk#100] +Input [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89] (88) Filter [codegen id : 35] -Input [5]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, ss_sold_date_sk#100] -Condition : isnotnull(ss_item_sk#96) +Input [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89] +Condition : isnotnull(ss_item_sk#85) (89) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#101, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105] +Output [5]: [i_item_sk#90, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94] (90) BroadcastHashJoin [codegen id : 35] -Left keys [1]: [ss_item_sk#96] -Right keys [1]: [i_item_sk#101] +Left keys [1]: [ss_item_sk#85] +Right keys [1]: [i_item_sk#90] Join condition: None (91) Project [codegen id : 35] -Output [9]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, ss_sold_date_sk#100, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105] -Input [10]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, ss_sold_date_sk#100, i_item_sk#101, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105] +Output [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94] +Input [10]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_item_sk#90, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94] (92) ReusedExchange [Reuses operator id: 135] -Output [2]: [d_date_sk#106, d_year#107] +Output [2]: [d_date_sk#95, d_year#96] (93) BroadcastHashJoin [codegen id : 35] -Left keys [1]: [ss_sold_date_sk#100] -Right keys [1]: [d_date_sk#106] +Left keys [1]: [ss_sold_date_sk#89] +Right keys [1]: [d_date_sk#95] Join condition: None (94) Project [codegen id : 35] -Output [9]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, d_year#107] -Input [11]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, ss_sold_date_sk#100, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, d_date_sk#106, d_year#107] +Output [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96] +Input [11]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_date_sk#95, d_year#96] (95) Exchange -Input [9]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, d_year#107] -Arguments: hashpartitioning(ss_ticket_number#97, ss_item_sk#96, 5), ENSURE_REQUIREMENTS, [id=#108] +Input [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96] +Arguments: hashpartitioning(ss_ticket_number#86, ss_item_sk#85, 5), ENSURE_REQUIREMENTS, [plan_id=12] (96) Sort [codegen id : 36] -Input [9]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, d_year#107] -Arguments: [ss_ticket_number#97 ASC NULLS FIRST, ss_item_sk#96 ASC NULLS FIRST], false, 0 +Input [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96] +Arguments: [ss_ticket_number#86 ASC NULLS FIRST, ss_item_sk#85 ASC NULLS FIRST], false, 0 (97) ReusedExchange [Reuses operator id: 39] -Output [4]: [sr_item_sk#109, sr_ticket_number#110, sr_return_quantity#111, sr_return_amt#112] +Output [4]: [sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100] (98) Sort [codegen id : 38] -Input [4]: [sr_item_sk#109, sr_ticket_number#110, sr_return_quantity#111, sr_return_amt#112] -Arguments: [sr_ticket_number#110 ASC NULLS FIRST, sr_item_sk#109 ASC NULLS FIRST], false, 0 +Input [4]: [sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100] +Arguments: [sr_ticket_number#98 ASC NULLS FIRST, sr_item_sk#97 ASC NULLS FIRST], false, 0 (99) SortMergeJoin [codegen id : 39] -Left keys [2]: [ss_ticket_number#97, ss_item_sk#96] -Right keys [2]: [sr_ticket_number#110, sr_item_sk#109] +Left keys [2]: [ss_ticket_number#86, ss_item_sk#85] +Right keys [2]: [sr_ticket_number#98, sr_item_sk#97] Join condition: None (100) Project [codegen id : 39] -Output [7]: [d_year#107, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, (ss_quantity#98 - coalesce(sr_return_quantity#111, 0)) AS sales_cnt#44, CheckOverflow((promote_precision(cast(ss_ext_sales_price#99 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#112, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#45] -Input [13]: [ss_item_sk#96, ss_ticket_number#97, ss_quantity#98, ss_ext_sales_price#99, i_brand_id#102, i_class_id#103, i_category_id#104, i_manufact_id#105, d_year#107, sr_item_sk#109, sr_ticket_number#110, sr_return_quantity#111, sr_return_amt#112] +Output [7]: [d_year#96, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, (ss_quantity#87 - coalesce(sr_return_quantity#99, 0)) AS sales_cnt#39, CheckOverflow((promote_precision(cast(ss_ext_sales_price#88 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#100, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40] +Input [13]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96, sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100] (101) Scan parquet default.web_sales -Output [5]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, ws_sold_date_sk#117] +Output [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#117), dynamicpruningexpression(ws_sold_date_sk#117 IN dynamicpruning#83)] +PartitionFilters: [isnotnull(ws_sold_date_sk#105), dynamicpruningexpression(ws_sold_date_sk#105 IN dynamicpruning#73)] PushedFilters: [IsNotNull(ws_item_sk)] ReadSchema: struct (102) ColumnarToRow [codegen id : 42] -Input [5]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, ws_sold_date_sk#117] +Input [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105] (103) Filter [codegen id : 42] -Input [5]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, ws_sold_date_sk#117] -Condition : isnotnull(ws_item_sk#113) +Input [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105] +Condition : isnotnull(ws_item_sk#101) (104) ReusedExchange [Reuses operator id: 8] -Output [5]: [i_item_sk#118, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122] +Output [5]: [i_item_sk#106, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110] (105) BroadcastHashJoin [codegen id : 42] -Left keys [1]: [ws_item_sk#113] -Right keys [1]: [i_item_sk#118] +Left keys [1]: [ws_item_sk#101] +Right keys [1]: [i_item_sk#106] Join condition: None (106) Project [codegen id : 42] -Output [9]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, ws_sold_date_sk#117, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122] -Input [10]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, ws_sold_date_sk#117, i_item_sk#118, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122] +Output [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110] +Input [10]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_item_sk#106, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110] (107) ReusedExchange [Reuses operator id: 135] -Output [2]: [d_date_sk#123, d_year#124] +Output [2]: [d_date_sk#111, d_year#112] (108) BroadcastHashJoin [codegen id : 42] -Left keys [1]: [ws_sold_date_sk#117] -Right keys [1]: [d_date_sk#123] +Left keys [1]: [ws_sold_date_sk#105] +Right keys [1]: [d_date_sk#111] Join condition: None (109) Project [codegen id : 42] -Output [9]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, d_year#124] -Input [11]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, ws_sold_date_sk#117, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, d_date_sk#123, d_year#124] +Output [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112] +Input [11]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_date_sk#111, d_year#112] (110) Exchange -Input [9]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, d_year#124] -Arguments: hashpartitioning(ws_order_number#114, ws_item_sk#113, 5), ENSURE_REQUIREMENTS, [id=#125] +Input [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112] +Arguments: hashpartitioning(ws_order_number#102, ws_item_sk#101, 5), ENSURE_REQUIREMENTS, [plan_id=13] (111) Sort [codegen id : 43] -Input [9]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, d_year#124] -Arguments: [ws_order_number#114 ASC NULLS FIRST, ws_item_sk#113 ASC NULLS FIRST], false, 0 +Input [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112] +Arguments: [ws_order_number#102 ASC NULLS FIRST, ws_item_sk#101 ASC NULLS FIRST], false, 0 (112) ReusedExchange [Reuses operator id: 58] -Output [4]: [wr_item_sk#126, wr_order_number#127, wr_return_quantity#128, wr_return_amt#129] +Output [4]: [wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116] (113) Sort [codegen id : 45] -Input [4]: [wr_item_sk#126, wr_order_number#127, wr_return_quantity#128, wr_return_amt#129] -Arguments: [wr_order_number#127 ASC NULLS FIRST, wr_item_sk#126 ASC NULLS FIRST], false, 0 +Input [4]: [wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116] +Arguments: [wr_order_number#114 ASC NULLS FIRST, wr_item_sk#113 ASC NULLS FIRST], false, 0 (114) SortMergeJoin [codegen id : 46] -Left keys [2]: [ws_order_number#114, ws_item_sk#113] -Right keys [2]: [wr_order_number#127, wr_item_sk#126] +Left keys [2]: [ws_order_number#102, ws_item_sk#101] +Right keys [2]: [wr_order_number#114, wr_item_sk#113] Join condition: None (115) Project [codegen id : 46] -Output [7]: [d_year#124, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, (ws_quantity#115 - coalesce(wr_return_quantity#128, 0)) AS sales_cnt#65, CheckOverflow((promote_precision(cast(ws_ext_sales_price#116 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#129, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#66] -Input [13]: [ws_item_sk#113, ws_order_number#114, ws_quantity#115, ws_ext_sales_price#116, i_brand_id#119, i_class_id#120, i_category_id#121, i_manufact_id#122, d_year#124, wr_item_sk#126, wr_order_number#127, wr_return_quantity#128, wr_return_amt#129] +Output [7]: [d_year#112, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, (ws_quantity#103 - coalesce(wr_return_quantity#115, 0)) AS sales_cnt#58, CheckOverflow((promote_precision(cast(ws_ext_sales_price#104 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#116, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#59] +Input [13]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112, wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116] (116) Union (117) HashAggregate [codegen id : 47] -Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24] -Keys [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24] +Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21] +Keys [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21] Functions: [] Aggregate Attributes: [] -Results [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24] +Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21] (118) Exchange -Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24] -Arguments: hashpartitioning(d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24, 5), ENSURE_REQUIREMENTS, [id=#130] +Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21] +Arguments: hashpartitioning(d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21, 5), ENSURE_REQUIREMENTS, [plan_id=14] (119) HashAggregate [codegen id : 48] -Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24] -Keys [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24] +Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21] +Keys [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21] Functions: [] Aggregate Attributes: [] -Results [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24] +Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21] (120) HashAggregate [codegen id : 48] -Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#23, sales_amt#24] -Keys [5]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] -Functions [2]: [partial_sum(sales_cnt#23), partial_sum(UnscaledValue(sales_amt#24))] -Aggregate Attributes [2]: [sum#68, sum#131] -Results [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sum#70, sum#132] +Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21] +Keys [5]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78] +Functions [2]: [partial_sum(sales_cnt#20), partial_sum(UnscaledValue(sales_amt#21))] +Aggregate Attributes [2]: [sum#60, sum#117] +Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118] (121) Exchange -Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sum#70, sum#132] -Arguments: hashpartitioning(d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, 5), ENSURE_REQUIREMENTS, [id=#133] +Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118] +Arguments: hashpartitioning(d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, 5), ENSURE_REQUIREMENTS, [plan_id=15] (122) HashAggregate [codegen id : 49] -Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sum#70, sum#132] -Keys [5]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] -Functions [2]: [sum(sales_cnt#23), sum(UnscaledValue(sales_amt#24))] -Aggregate Attributes [2]: [sum(sales_cnt#23)#73, sum(UnscaledValue(sales_amt#24))#74] -Results [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sum(sales_cnt#23)#73 AS sales_cnt#134, MakeDecimal(sum(UnscaledValue(sales_amt#24))#74,18,2) AS sales_amt#135] +Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118] +Keys [5]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78] +Functions [2]: [sum(sales_cnt#20), sum(UnscaledValue(sales_amt#21))] +Aggregate Attributes [2]: [sum(sales_cnt#20)#64, sum(UnscaledValue(sales_amt#21))#65] +Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum(sales_cnt#20)#64 AS sales_cnt#119, MakeDecimal(sum(UnscaledValue(sales_amt#21))#65,18,2) AS sales_amt#120] (123) Exchange -Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#134, sales_amt#135] -Arguments: hashpartitioning(i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, 5), ENSURE_REQUIREMENTS, [id=#136] +Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120] +Arguments: hashpartitioning(i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, 5), ENSURE_REQUIREMENTS, [plan_id=16] (124) Sort [codegen id : 50] -Input [7]: [d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#134, sales_amt#135] -Arguments: [i_brand_id#85 ASC NULLS FIRST, i_class_id#86 ASC NULLS FIRST, i_category_id#87 ASC NULLS FIRST, i_manufact_id#88 ASC NULLS FIRST], false, 0 +Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120] +Arguments: [i_brand_id#75 ASC NULLS FIRST, i_class_id#76 ASC NULLS FIRST, i_category_id#77 ASC NULLS FIRST, i_manufact_id#78 ASC NULLS FIRST], false, 0 (125) SortMergeJoin [codegen id : 51] Left keys [4]: [i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12] -Right keys [4]: [i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88] -Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#75 as decimal(17,2))) / promote_precision(cast(sales_cnt#134 as decimal(17,2)))), DecimalType(37,20)) < 0.90000000000000000000) +Right keys [4]: [i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78] +Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#66 as decimal(17,2))) / promote_precision(cast(sales_cnt#119 as decimal(17,2)))), DecimalType(37,20)) < 0.90000000000000000000) (126) Project [codegen id : 51] -Output [10]: [d_year#90 AS prev_year#137, d_year#15 AS year#138, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#134 AS prev_yr_cnt#139, sales_cnt#75 AS curr_yr_cnt#140, (sales_cnt#75 - sales_cnt#134) AS sales_cnt_diff#141, CheckOverflow((promote_precision(cast(sales_amt#76 as decimal(19,2))) - promote_precision(cast(sales_amt#135 as decimal(19,2)))), DecimalType(19,2)) AS sales_amt_diff#142] -Input [14]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#75, sales_amt#76, d_year#90, i_brand_id#85, i_class_id#86, i_category_id#87, i_manufact_id#88, sales_cnt#134, sales_amt#135] +Output [10]: [d_year#80 AS prev_year#121, d_year#14 AS year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#119 AS prev_yr_cnt#123, sales_cnt#66 AS curr_yr_cnt#124, (sales_cnt#66 - sales_cnt#119) AS sales_cnt_diff#125, CheckOverflow((promote_precision(cast(sales_amt#67 as decimal(19,2))) - promote_precision(cast(sales_amt#120 as decimal(19,2)))), DecimalType(19,2)) AS sales_amt_diff#126] +Input [14]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67, d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120] (127) TakeOrderedAndProject -Input [10]: [prev_year#137, year#138, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#139, curr_yr_cnt#140, sales_cnt_diff#141, sales_amt_diff#142] -Arguments: 100, [sales_cnt_diff#141 ASC NULLS FIRST, sales_amt_diff#142 ASC NULLS FIRST], [prev_year#137, year#138, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#139, curr_yr_cnt#140, sales_cnt_diff#141, sales_amt_diff#142] +Input [10]: [prev_year#121, year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#123, curr_yr_cnt#124, sales_cnt_diff#125, sales_amt_diff#126] +Arguments: 100, [sales_cnt_diff#125 ASC NULLS FIRST, sales_amt_diff#126 ASC NULLS FIRST], [prev_year#121, year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#123, curr_yr_cnt#124, sales_cnt_diff#125, sales_amt_diff#126] ===== Subqueries ===== @@ -697,28 +697,28 @@ BroadcastExchange (131) (128) Scan parquet default.date_dim -Output [2]: [d_date_sk#14, d_year#15] +Output [2]: [d_date_sk#13, d_year#14] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)] ReadSchema: struct (129) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#14, d_year#15] +Input [2]: [d_date_sk#13, d_year#14] (130) Filter [codegen id : 1] -Input [2]: [d_date_sk#14, d_year#15] -Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2002)) AND isnotnull(d_date_sk#14)) +Input [2]: [d_date_sk#13, d_year#14] +Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2002)) AND isnotnull(d_date_sk#13)) (131) BroadcastExchange -Input [2]: [d_date_sk#14, d_year#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#143] +Input [2]: [d_date_sk#13, d_year#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=17] -Subquery:2 Hosting operator id = 24 Hosting Expression = ss_sold_date_sk#29 IN dynamicpruning#6 +Subquery:2 Hosting operator id = 24 Hosting Expression = ss_sold_date_sk#26 IN dynamicpruning#6 -Subquery:3 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#50 IN dynamicpruning#6 +Subquery:3 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#45 IN dynamicpruning#6 -Subquery:4 Hosting operator id = 71 Hosting Expression = cs_sold_date_sk#82 IN dynamicpruning#83 +Subquery:4 Hosting operator id = 71 Hosting Expression = cs_sold_date_sk#72 IN dynamicpruning#73 BroadcastExchange (135) +- * Filter (134) +- * ColumnarToRow (133) @@ -726,25 +726,25 @@ BroadcastExchange (135) (132) Scan parquet default.date_dim -Output [2]: [d_date_sk#89, d_year#90] +Output [2]: [d_date_sk#79, d_year#80] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)] ReadSchema: struct (133) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#89, d_year#90] +Input [2]: [d_date_sk#79, d_year#80] (134) Filter [codegen id : 1] -Input [2]: [d_date_sk#89, d_year#90] -Condition : ((isnotnull(d_year#90) AND (d_year#90 = 2001)) AND isnotnull(d_date_sk#89)) +Input [2]: [d_date_sk#79, d_year#80] +Condition : ((isnotnull(d_year#80) AND (d_year#80 = 2001)) AND isnotnull(d_date_sk#79)) (135) BroadcastExchange -Input [2]: [d_date_sk#89, d_year#90] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#144] +Input [2]: [d_date_sk#79, d_year#80] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=18] -Subquery:5 Hosting operator id = 86 Hosting Expression = ss_sold_date_sk#100 IN dynamicpruning#83 +Subquery:5 Hosting operator id = 86 Hosting Expression = ss_sold_date_sk#89 IN dynamicpruning#73 -Subquery:6 Hosting operator id = 101 Hosting Expression = ws_sold_date_sk#117 IN dynamicpruning#83 +Subquery:6 Hosting operator id = 101 Hosting Expression = ws_sold_date_sk#105 IN dynamicpruning#73 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt index 335e1aee4e5ca..1061fb775d5d6 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt @@ -142,7 +142,7 @@ Condition : isnotnull(s_store_sk#7) (10) BroadcastExchange Input [1]: [s_store_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_store_sk#1] @@ -157,422 +157,422 @@ Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] Input [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] Keys [1]: [s_store_sk#7] Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#2)), partial_sum(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [2]: [sum#9, sum#10] -Results [3]: [s_store_sk#7, sum#11, sum#12] +Aggregate Attributes [2]: [sum#8, sum#9] +Results [3]: [s_store_sk#7, sum#10, sum#11] (14) Exchange -Input [3]: [s_store_sk#7, sum#11, sum#12] -Arguments: hashpartitioning(s_store_sk#7, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [3]: [s_store_sk#7, sum#10, sum#11] +Arguments: hashpartitioning(s_store_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 8] -Input [3]: [s_store_sk#7, sum#11, sum#12] +Input [3]: [s_store_sk#7, sum#10, sum#11] Keys [1]: [s_store_sk#7] Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#2)), sum(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#2))#14, sum(UnscaledValue(ss_net_profit#3))#15] -Results [3]: [s_store_sk#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS sales#16, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#15,17,2) AS profit#17] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#2))#12, sum(UnscaledValue(ss_net_profit#3))#13] +Results [3]: [s_store_sk#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS sales#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#13,17,2) AS profit#15] (16) Scan parquet default.store_returns -Output [4]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, sr_returned_date_sk#21] +Output [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(sr_returned_date_sk#21), dynamicpruningexpression(sr_returned_date_sk#21 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(sr_returned_date_sk#19), dynamicpruningexpression(sr_returned_date_sk#19 IN dynamicpruning#5)] PushedFilters: [IsNotNull(sr_store_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 6] -Input [4]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, sr_returned_date_sk#21] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] (18) Filter [codegen id : 6] -Input [4]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, sr_returned_date_sk#21] -Condition : isnotnull(sr_store_sk#18) +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] +Condition : isnotnull(sr_store_sk#16) (19) ReusedExchange [Reuses operator id: 103] -Output [1]: [d_date_sk#22] +Output [1]: [d_date_sk#20] (20) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [sr_returned_date_sk#21] -Right keys [1]: [d_date_sk#22] +Left keys [1]: [sr_returned_date_sk#19] +Right keys [1]: [d_date_sk#20] Join condition: None (21) Project [codegen id : 6] -Output [3]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20] -Input [5]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, sr_returned_date_sk#21, d_date_sk#22] +Output [3]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18] +Input [5]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19, d_date_sk#20] (22) ReusedExchange [Reuses operator id: 10] -Output [1]: [s_store_sk#23] +Output [1]: [s_store_sk#21] (23) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [sr_store_sk#18] -Right keys [1]: [s_store_sk#23] +Left keys [1]: [sr_store_sk#16] +Right keys [1]: [s_store_sk#21] Join condition: None (24) Project [codegen id : 6] -Output [3]: [sr_return_amt#19, sr_net_loss#20, s_store_sk#23] -Input [4]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, s_store_sk#23] +Output [3]: [sr_return_amt#17, sr_net_loss#18, s_store_sk#21] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, s_store_sk#21] (25) HashAggregate [codegen id : 6] -Input [3]: [sr_return_amt#19, sr_net_loss#20, s_store_sk#23] -Keys [1]: [s_store_sk#23] -Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#19)), partial_sum(UnscaledValue(sr_net_loss#20))] -Aggregate Attributes [2]: [sum#24, sum#25] -Results [3]: [s_store_sk#23, sum#26, sum#27] +Input [3]: [sr_return_amt#17, sr_net_loss#18, s_store_sk#21] +Keys [1]: [s_store_sk#21] +Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#17)), partial_sum(UnscaledValue(sr_net_loss#18))] +Aggregate Attributes [2]: [sum#22, sum#23] +Results [3]: [s_store_sk#21, sum#24, sum#25] (26) Exchange -Input [3]: [s_store_sk#23, sum#26, sum#27] -Arguments: hashpartitioning(s_store_sk#23, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [3]: [s_store_sk#21, sum#24, sum#25] +Arguments: hashpartitioning(s_store_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=3] (27) HashAggregate [codegen id : 7] -Input [3]: [s_store_sk#23, sum#26, sum#27] -Keys [1]: [s_store_sk#23] -Functions [2]: [sum(UnscaledValue(sr_return_amt#19)), sum(UnscaledValue(sr_net_loss#20))] -Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#19))#29, sum(UnscaledValue(sr_net_loss#20))#30] -Results [3]: [s_store_sk#23, MakeDecimal(sum(UnscaledValue(sr_return_amt#19))#29,17,2) AS returns#31, MakeDecimal(sum(UnscaledValue(sr_net_loss#20))#30,17,2) AS profit_loss#32] +Input [3]: [s_store_sk#21, sum#24, sum#25] +Keys [1]: [s_store_sk#21] +Functions [2]: [sum(UnscaledValue(sr_return_amt#17)), sum(UnscaledValue(sr_net_loss#18))] +Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#17))#26, sum(UnscaledValue(sr_net_loss#18))#27] +Results [3]: [s_store_sk#21, MakeDecimal(sum(UnscaledValue(sr_return_amt#17))#26,17,2) AS returns#28, MakeDecimal(sum(UnscaledValue(sr_net_loss#18))#27,17,2) AS profit_loss#29] (28) BroadcastExchange -Input [3]: [s_store_sk#23, returns#31, profit_loss#32] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] +Input [3]: [s_store_sk#21, returns#28, profit_loss#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (29) BroadcastHashJoin [codegen id : 8] Left keys [1]: [s_store_sk#7] -Right keys [1]: [s_store_sk#23] +Right keys [1]: [s_store_sk#21] Join condition: None (30) Project [codegen id : 8] -Output [5]: [store channel AS channel#34, s_store_sk#7 AS id#35, sales#16, coalesce(returns#31, 0.00) AS returns#36, CheckOverflow((promote_precision(cast(profit#17 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#37] -Input [6]: [s_store_sk#7, sales#16, profit#17, s_store_sk#23, returns#31, profit_loss#32] +Output [5]: [store channel AS channel#30, s_store_sk#7 AS id#31, sales#14, coalesce(returns#28, 0.00) AS returns#32, CheckOverflow((promote_precision(cast(profit#15 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#29, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#33] +Input [6]: [s_store_sk#7, sales#14, profit#15, s_store_sk#21, returns#28, profit_loss#29] (31) Scan parquet default.catalog_sales -Output [4]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41] +Output [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#41), dynamicpruningexpression(cs_sold_date_sk#41 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#37), dynamicpruningexpression(cs_sold_date_sk#37 IN dynamicpruning#5)] ReadSchema: struct (32) ColumnarToRow [codegen id : 10] -Input [4]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41] +Input [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37] (33) ReusedExchange [Reuses operator id: 103] -Output [1]: [d_date_sk#42] +Output [1]: [d_date_sk#38] (34) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#41] -Right keys [1]: [d_date_sk#42] +Left keys [1]: [cs_sold_date_sk#37] +Right keys [1]: [d_date_sk#38] Join condition: None (35) Project [codegen id : 10] -Output [3]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40] -Input [5]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41, d_date_sk#42] +Output [3]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36] +Input [5]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37, d_date_sk#38] (36) HashAggregate [codegen id : 10] -Input [3]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40] -Keys [1]: [cs_call_center_sk#38] -Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#39)), partial_sum(UnscaledValue(cs_net_profit#40))] -Aggregate Attributes [2]: [sum#43, sum#44] -Results [3]: [cs_call_center_sk#38, sum#45, sum#46] +Input [3]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36] +Keys [1]: [cs_call_center_sk#34] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#35)), partial_sum(UnscaledValue(cs_net_profit#36))] +Aggregate Attributes [2]: [sum#39, sum#40] +Results [3]: [cs_call_center_sk#34, sum#41, sum#42] (37) Exchange -Input [3]: [cs_call_center_sk#38, sum#45, sum#46] -Arguments: hashpartitioning(cs_call_center_sk#38, 5), ENSURE_REQUIREMENTS, [id=#47] +Input [3]: [cs_call_center_sk#34, sum#41, sum#42] +Arguments: hashpartitioning(cs_call_center_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=5] (38) HashAggregate [codegen id : 14] -Input [3]: [cs_call_center_sk#38, sum#45, sum#46] -Keys [1]: [cs_call_center_sk#38] -Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#39)), sum(UnscaledValue(cs_net_profit#40))] -Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#39))#48, sum(UnscaledValue(cs_net_profit#40))#49] -Results [3]: [cs_call_center_sk#38, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#39))#48,17,2) AS sales#50, MakeDecimal(sum(UnscaledValue(cs_net_profit#40))#49,17,2) AS profit#51] +Input [3]: [cs_call_center_sk#34, sum#41, sum#42] +Keys [1]: [cs_call_center_sk#34] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#35)), sum(UnscaledValue(cs_net_profit#36))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#35))#43, sum(UnscaledValue(cs_net_profit#36))#44] +Results [3]: [cs_call_center_sk#34, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#35))#43,17,2) AS sales#45, MakeDecimal(sum(UnscaledValue(cs_net_profit#36))#44,17,2) AS profit#46] (39) Scan parquet default.catalog_returns -Output [3]: [cr_return_amount#52, cr_net_loss#53, cr_returned_date_sk#54] +Output [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cr_returned_date_sk#54), dynamicpruningexpression(cr_returned_date_sk#54 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cr_returned_date_sk#49), dynamicpruningexpression(cr_returned_date_sk#49 IN dynamicpruning#5)] ReadSchema: struct (40) ColumnarToRow [codegen id : 12] -Input [3]: [cr_return_amount#52, cr_net_loss#53, cr_returned_date_sk#54] +Input [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49] (41) ReusedExchange [Reuses operator id: 103] -Output [1]: [d_date_sk#55] +Output [1]: [d_date_sk#50] (42) BroadcastHashJoin [codegen id : 12] -Left keys [1]: [cr_returned_date_sk#54] -Right keys [1]: [d_date_sk#55] +Left keys [1]: [cr_returned_date_sk#49] +Right keys [1]: [d_date_sk#50] Join condition: None (43) Project [codegen id : 12] -Output [2]: [cr_return_amount#52, cr_net_loss#53] -Input [4]: [cr_return_amount#52, cr_net_loss#53, cr_returned_date_sk#54, d_date_sk#55] +Output [2]: [cr_return_amount#47, cr_net_loss#48] +Input [4]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49, d_date_sk#50] (44) HashAggregate [codegen id : 12] -Input [2]: [cr_return_amount#52, cr_net_loss#53] +Input [2]: [cr_return_amount#47, cr_net_loss#48] Keys: [] -Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#52)), partial_sum(UnscaledValue(cr_net_loss#53))] -Aggregate Attributes [2]: [sum#56, sum#57] -Results [2]: [sum#58, sum#59] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#47)), partial_sum(UnscaledValue(cr_net_loss#48))] +Aggregate Attributes [2]: [sum#51, sum#52] +Results [2]: [sum#53, sum#54] (45) Exchange -Input [2]: [sum#58, sum#59] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#60] +Input [2]: [sum#53, sum#54] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6] (46) HashAggregate [codegen id : 13] -Input [2]: [sum#58, sum#59] +Input [2]: [sum#53, sum#54] Keys: [] -Functions [2]: [sum(UnscaledValue(cr_return_amount#52)), sum(UnscaledValue(cr_net_loss#53))] -Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#52))#61, sum(UnscaledValue(cr_net_loss#53))#62] -Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#52))#61,17,2) AS returns#63, MakeDecimal(sum(UnscaledValue(cr_net_loss#53))#62,17,2) AS profit_loss#64] +Functions [2]: [sum(UnscaledValue(cr_return_amount#47)), sum(UnscaledValue(cr_net_loss#48))] +Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#47))#55, sum(UnscaledValue(cr_net_loss#48))#56] +Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#47))#55,17,2) AS returns#57, MakeDecimal(sum(UnscaledValue(cr_net_loss#48))#56,17,2) AS profit_loss#58] (47) BroadcastExchange -Input [2]: [returns#63, profit_loss#64] -Arguments: IdentityBroadcastMode, [id=#65] +Input [2]: [returns#57, profit_loss#58] +Arguments: IdentityBroadcastMode, [plan_id=7] (48) BroadcastNestedLoopJoin [codegen id : 14] Join condition: None (49) Project [codegen id : 14] -Output [5]: [catalog channel AS channel#66, cs_call_center_sk#38 AS id#67, sales#50, returns#63, CheckOverflow((promote_precision(cast(profit#51 as decimal(18,2))) - promote_precision(cast(profit_loss#64 as decimal(18,2)))), DecimalType(18,2)) AS profit#68] -Input [5]: [cs_call_center_sk#38, sales#50, profit#51, returns#63, profit_loss#64] +Output [5]: [catalog channel AS channel#59, cs_call_center_sk#34 AS id#60, sales#45, returns#57, CheckOverflow((promote_precision(cast(profit#46 as decimal(18,2))) - promote_precision(cast(profit_loss#58 as decimal(18,2)))), DecimalType(18,2)) AS profit#61] +Input [5]: [cs_call_center_sk#34, sales#45, profit#46, returns#57, profit_loss#58] (50) Scan parquet default.web_sales -Output [4]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, ws_sold_date_sk#72] +Output [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#72), dynamicpruningexpression(ws_sold_date_sk#72 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#65), dynamicpruningexpression(ws_sold_date_sk#65 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_web_page_sk)] ReadSchema: struct (51) ColumnarToRow [codegen id : 17] -Input [4]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, ws_sold_date_sk#72] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] (52) Filter [codegen id : 17] -Input [4]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, ws_sold_date_sk#72] -Condition : isnotnull(ws_web_page_sk#69) +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] +Condition : isnotnull(ws_web_page_sk#62) (53) ReusedExchange [Reuses operator id: 103] -Output [1]: [d_date_sk#73] +Output [1]: [d_date_sk#66] (54) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_sold_date_sk#72] -Right keys [1]: [d_date_sk#73] +Left keys [1]: [ws_sold_date_sk#65] +Right keys [1]: [d_date_sk#66] Join condition: None (55) Project [codegen id : 17] -Output [3]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71] -Input [5]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, ws_sold_date_sk#72, d_date_sk#73] +Output [3]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64] +Input [5]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65, d_date_sk#66] (56) Scan parquet default.web_page -Output [1]: [wp_web_page_sk#74] +Output [1]: [wp_web_page_sk#67] Batched: true Location [not included in comparison]/{warehouse_dir}/web_page] PushedFilters: [IsNotNull(wp_web_page_sk)] ReadSchema: struct (57) ColumnarToRow [codegen id : 16] -Input [1]: [wp_web_page_sk#74] +Input [1]: [wp_web_page_sk#67] (58) Filter [codegen id : 16] -Input [1]: [wp_web_page_sk#74] -Condition : isnotnull(wp_web_page_sk#74) +Input [1]: [wp_web_page_sk#67] +Condition : isnotnull(wp_web_page_sk#67) (59) BroadcastExchange -Input [1]: [wp_web_page_sk#74] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#75] +Input [1]: [wp_web_page_sk#67] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] (60) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_web_page_sk#69] -Right keys [1]: [wp_web_page_sk#74] +Left keys [1]: [ws_web_page_sk#62] +Right keys [1]: [wp_web_page_sk#67] Join condition: None (61) Project [codegen id : 17] -Output [3]: [ws_ext_sales_price#70, ws_net_profit#71, wp_web_page_sk#74] -Input [4]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, wp_web_page_sk#74] +Output [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] (62) HashAggregate [codegen id : 17] -Input [3]: [ws_ext_sales_price#70, ws_net_profit#71, wp_web_page_sk#74] -Keys [1]: [wp_web_page_sk#74] -Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#70)), partial_sum(UnscaledValue(ws_net_profit#71))] -Aggregate Attributes [2]: [sum#76, sum#77] -Results [3]: [wp_web_page_sk#74, sum#78, sum#79] +Input [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] +Keys [1]: [wp_web_page_sk#67] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#63)), partial_sum(UnscaledValue(ws_net_profit#64))] +Aggregate Attributes [2]: [sum#68, sum#69] +Results [3]: [wp_web_page_sk#67, sum#70, sum#71] (63) Exchange -Input [3]: [wp_web_page_sk#74, sum#78, sum#79] -Arguments: hashpartitioning(wp_web_page_sk#74, 5), ENSURE_REQUIREMENTS, [id=#80] +Input [3]: [wp_web_page_sk#67, sum#70, sum#71] +Arguments: hashpartitioning(wp_web_page_sk#67, 5), ENSURE_REQUIREMENTS, [plan_id=9] (64) HashAggregate [codegen id : 22] -Input [3]: [wp_web_page_sk#74, sum#78, sum#79] -Keys [1]: [wp_web_page_sk#74] -Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#70)), sum(UnscaledValue(ws_net_profit#71))] -Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#70))#81, sum(UnscaledValue(ws_net_profit#71))#82] -Results [3]: [wp_web_page_sk#74, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#70))#81,17,2) AS sales#83, MakeDecimal(sum(UnscaledValue(ws_net_profit#71))#82,17,2) AS profit#84] +Input [3]: [wp_web_page_sk#67, sum#70, sum#71] +Keys [1]: [wp_web_page_sk#67] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#63)), sum(UnscaledValue(ws_net_profit#64))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#63))#72, sum(UnscaledValue(ws_net_profit#64))#73] +Results [3]: [wp_web_page_sk#67, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#63))#72,17,2) AS sales#74, MakeDecimal(sum(UnscaledValue(ws_net_profit#64))#73,17,2) AS profit#75] (65) Scan parquet default.web_returns -Output [4]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] +Output [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(wr_returned_date_sk#88), dynamicpruningexpression(wr_returned_date_sk#88 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(wr_returned_date_sk#79), dynamicpruningexpression(wr_returned_date_sk#79 IN dynamicpruning#5)] PushedFilters: [IsNotNull(wr_web_page_sk)] ReadSchema: struct (66) ColumnarToRow [codegen id : 20] -Input [4]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] (67) Filter [codegen id : 20] -Input [4]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] -Condition : isnotnull(wr_web_page_sk#85) +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] +Condition : isnotnull(wr_web_page_sk#76) (68) ReusedExchange [Reuses operator id: 103] -Output [1]: [d_date_sk#89] +Output [1]: [d_date_sk#80] (69) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [wr_returned_date_sk#88] -Right keys [1]: [d_date_sk#89] +Left keys [1]: [wr_returned_date_sk#79] +Right keys [1]: [d_date_sk#80] Join condition: None (70) Project [codegen id : 20] -Output [3]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87] -Input [5]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88, d_date_sk#89] +Output [3]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78] +Input [5]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79, d_date_sk#80] (71) ReusedExchange [Reuses operator id: 59] -Output [1]: [wp_web_page_sk#90] +Output [1]: [wp_web_page_sk#81] (72) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [wr_web_page_sk#85] -Right keys [1]: [wp_web_page_sk#90] +Left keys [1]: [wr_web_page_sk#76] +Right keys [1]: [wp_web_page_sk#81] Join condition: None (73) Project [codegen id : 20] -Output [3]: [wr_return_amt#86, wr_net_loss#87, wp_web_page_sk#90] -Input [4]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wp_web_page_sk#90] +Output [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] (74) HashAggregate [codegen id : 20] -Input [3]: [wr_return_amt#86, wr_net_loss#87, wp_web_page_sk#90] -Keys [1]: [wp_web_page_sk#90] -Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#86)), partial_sum(UnscaledValue(wr_net_loss#87))] -Aggregate Attributes [2]: [sum#91, sum#92] -Results [3]: [wp_web_page_sk#90, sum#93, sum#94] +Input [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] +Keys [1]: [wp_web_page_sk#81] +Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#77)), partial_sum(UnscaledValue(wr_net_loss#78))] +Aggregate Attributes [2]: [sum#82, sum#83] +Results [3]: [wp_web_page_sk#81, sum#84, sum#85] (75) Exchange -Input [3]: [wp_web_page_sk#90, sum#93, sum#94] -Arguments: hashpartitioning(wp_web_page_sk#90, 5), ENSURE_REQUIREMENTS, [id=#95] +Input [3]: [wp_web_page_sk#81, sum#84, sum#85] +Arguments: hashpartitioning(wp_web_page_sk#81, 5), ENSURE_REQUIREMENTS, [plan_id=10] (76) HashAggregate [codegen id : 21] -Input [3]: [wp_web_page_sk#90, sum#93, sum#94] -Keys [1]: [wp_web_page_sk#90] -Functions [2]: [sum(UnscaledValue(wr_return_amt#86)), sum(UnscaledValue(wr_net_loss#87))] -Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#86))#96, sum(UnscaledValue(wr_net_loss#87))#97] -Results [3]: [wp_web_page_sk#90, MakeDecimal(sum(UnscaledValue(wr_return_amt#86))#96,17,2) AS returns#98, MakeDecimal(sum(UnscaledValue(wr_net_loss#87))#97,17,2) AS profit_loss#99] +Input [3]: [wp_web_page_sk#81, sum#84, sum#85] +Keys [1]: [wp_web_page_sk#81] +Functions [2]: [sum(UnscaledValue(wr_return_amt#77)), sum(UnscaledValue(wr_net_loss#78))] +Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#77))#86, sum(UnscaledValue(wr_net_loss#78))#87] +Results [3]: [wp_web_page_sk#81, MakeDecimal(sum(UnscaledValue(wr_return_amt#77))#86,17,2) AS returns#88, MakeDecimal(sum(UnscaledValue(wr_net_loss#78))#87,17,2) AS profit_loss#89] (77) BroadcastExchange -Input [3]: [wp_web_page_sk#90, returns#98, profit_loss#99] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#100] +Input [3]: [wp_web_page_sk#81, returns#88, profit_loss#89] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11] (78) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [wp_web_page_sk#74] -Right keys [1]: [wp_web_page_sk#90] +Left keys [1]: [wp_web_page_sk#67] +Right keys [1]: [wp_web_page_sk#81] Join condition: None (79) Project [codegen id : 22] -Output [5]: [web channel AS channel#101, wp_web_page_sk#74 AS id#102, sales#83, coalesce(returns#98, 0.00) AS returns#103, CheckOverflow((promote_precision(cast(profit#84 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#99, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#104] -Input [6]: [wp_web_page_sk#74, sales#83, profit#84, wp_web_page_sk#90, returns#98, profit_loss#99] +Output [5]: [web channel AS channel#90, wp_web_page_sk#67 AS id#91, sales#74, coalesce(returns#88, 0.00) AS returns#92, CheckOverflow((promote_precision(cast(profit#75 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#89, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#93] +Input [6]: [wp_web_page_sk#67, sales#74, profit#75, wp_web_page_sk#81, returns#88, profit_loss#89] (80) Union (81) HashAggregate [codegen id : 23] -Input [5]: [channel#34, id#35, sales#16, returns#36, profit#37] -Keys [2]: [channel#34, id#35] -Functions [3]: [partial_sum(sales#16), partial_sum(returns#36), partial_sum(profit#37)] -Aggregate Attributes [6]: [sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110] -Results [8]: [channel#34, id#35, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116] +Input [5]: [channel#30, id#31, sales#14, returns#32, profit#33] +Keys [2]: [channel#30, id#31] +Functions [3]: [partial_sum(sales#14), partial_sum(returns#32), partial_sum(profit#33)] +Aggregate Attributes [6]: [sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99] +Results [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] (82) Exchange -Input [8]: [channel#34, id#35, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116] -Arguments: hashpartitioning(channel#34, id#35, 5), ENSURE_REQUIREMENTS, [id=#117] +Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Arguments: hashpartitioning(channel#30, id#31, 5), ENSURE_REQUIREMENTS, [plan_id=12] (83) HashAggregate [codegen id : 24] -Input [8]: [channel#34, id#35, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116] -Keys [2]: [channel#34, id#35] -Functions [3]: [sum(sales#16), sum(returns#36), sum(profit#37)] -Aggregate Attributes [3]: [sum(sales#16)#118, sum(returns#36)#119, sum(profit#37)#120] -Results [5]: [channel#34, id#35, cast(sum(sales#16)#118 as decimal(37,2)) AS sales#121, cast(sum(returns#36)#119 as decimal(37,2)) AS returns#122, cast(sum(profit#37)#120 as decimal(38,2)) AS profit#123] +Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [2]: [channel#30, id#31] +Functions [3]: [sum(sales#14), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#14)#106, sum(returns#32)#107, sum(profit#33)#108] +Results [5]: [channel#30, id#31, cast(sum(sales#14)#106 as decimal(37,2)) AS sales#109, cast(sum(returns#32)#107 as decimal(37,2)) AS returns#110, cast(sum(profit#33)#108 as decimal(38,2)) AS profit#111] (84) ReusedExchange [Reuses operator id: 82] -Output [8]: [channel#34, id#35, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116] +Output [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] (85) HashAggregate [codegen id : 48] -Input [8]: [channel#34, id#35, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116] -Keys [2]: [channel#34, id#35] -Functions [3]: [sum(sales#16), sum(returns#36), sum(profit#37)] -Aggregate Attributes [3]: [sum(sales#16)#118, sum(returns#36)#119, sum(profit#37)#120] -Results [4]: [channel#34, sum(sales#16)#118 AS sales#124, sum(returns#36)#119 AS returns#125, sum(profit#37)#120 AS profit#126] +Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [2]: [channel#30, id#31] +Functions [3]: [sum(sales#14), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#14)#106, sum(returns#32)#107, sum(profit#33)#108] +Results [4]: [channel#30, sum(sales#14)#106 AS sales#112, sum(returns#32)#107 AS returns#113, sum(profit#33)#108 AS profit#114] (86) HashAggregate [codegen id : 48] -Input [4]: [channel#34, sales#124, returns#125, profit#126] -Keys [1]: [channel#34] -Functions [3]: [partial_sum(sales#124), partial_sum(returns#125), partial_sum(profit#126)] -Aggregate Attributes [6]: [sum#127, isEmpty#128, sum#129, isEmpty#130, sum#131, isEmpty#132] -Results [7]: [channel#34, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Input [4]: [channel#30, sales#112, returns#113, profit#114] +Keys [1]: [channel#30] +Functions [3]: [partial_sum(sales#112), partial_sum(returns#113), partial_sum(profit#114)] +Aggregate Attributes [6]: [sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Results [7]: [channel#30, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] (87) Exchange -Input [7]: [channel#34, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] -Arguments: hashpartitioning(channel#34, 5), ENSURE_REQUIREMENTS, [id=#139] +Input [7]: [channel#30, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Arguments: hashpartitioning(channel#30, 5), ENSURE_REQUIREMENTS, [plan_id=13] (88) HashAggregate [codegen id : 49] -Input [7]: [channel#34, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] -Keys [1]: [channel#34] -Functions [3]: [sum(sales#124), sum(returns#125), sum(profit#126)] -Aggregate Attributes [3]: [sum(sales#124)#140, sum(returns#125)#141, sum(profit#126)#142] -Results [5]: [channel#34, null AS id#143, sum(sales#124)#140 AS sales#144, sum(returns#125)#141 AS returns#145, sum(profit#126)#142 AS profit#146] +Input [7]: [channel#30, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Keys [1]: [channel#30] +Functions [3]: [sum(sales#112), sum(returns#113), sum(profit#114)] +Aggregate Attributes [3]: [sum(sales#112)#127, sum(returns#113)#128, sum(profit#114)#129] +Results [5]: [channel#30, null AS id#130, sum(sales#112)#127 AS sales#131, sum(returns#113)#128 AS returns#132, sum(profit#114)#129 AS profit#133] (89) ReusedExchange [Reuses operator id: 82] -Output [8]: [channel#34, id#35, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116] +Output [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] (90) HashAggregate [codegen id : 73] -Input [8]: [channel#34, id#35, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116] -Keys [2]: [channel#34, id#35] -Functions [3]: [sum(sales#16), sum(returns#36), sum(profit#37)] -Aggregate Attributes [3]: [sum(sales#16)#118, sum(returns#36)#119, sum(profit#37)#120] -Results [3]: [sum(sales#16)#118 AS sales#124, sum(returns#36)#119 AS returns#125, sum(profit#37)#120 AS profit#126] +Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [2]: [channel#30, id#31] +Functions [3]: [sum(sales#14), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#14)#106, sum(returns#32)#107, sum(profit#33)#108] +Results [3]: [sum(sales#14)#106 AS sales#112, sum(returns#32)#107 AS returns#113, sum(profit#33)#108 AS profit#114] (91) HashAggregate [codegen id : 73] -Input [3]: [sales#124, returns#125, profit#126] +Input [3]: [sales#112, returns#113, profit#114] Keys: [] -Functions [3]: [partial_sum(sales#124), partial_sum(returns#125), partial_sum(profit#126)] -Aggregate Attributes [6]: [sum#147, isEmpty#148, sum#149, isEmpty#150, sum#151, isEmpty#152] -Results [6]: [sum#153, isEmpty#154, sum#155, isEmpty#156, sum#157, isEmpty#158] +Functions [3]: [partial_sum(sales#112), partial_sum(returns#113), partial_sum(profit#114)] +Aggregate Attributes [6]: [sum#134, isEmpty#135, sum#136, isEmpty#137, sum#138, isEmpty#139] +Results [6]: [sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] (92) Exchange -Input [6]: [sum#153, isEmpty#154, sum#155, isEmpty#156, sum#157, isEmpty#158] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#159] +Input [6]: [sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=14] (93) HashAggregate [codegen id : 74] -Input [6]: [sum#153, isEmpty#154, sum#155, isEmpty#156, sum#157, isEmpty#158] +Input [6]: [sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] Keys: [] -Functions [3]: [sum(sales#124), sum(returns#125), sum(profit#126)] -Aggregate Attributes [3]: [sum(sales#124)#160, sum(returns#125)#161, sum(profit#126)#162] -Results [5]: [null AS channel#163, null AS id#164, sum(sales#124)#160 AS sales#165, sum(returns#125)#161 AS returns#166, sum(profit#126)#162 AS profit#167] +Functions [3]: [sum(sales#112), sum(returns#113), sum(profit#114)] +Aggregate Attributes [3]: [sum(sales#112)#146, sum(returns#113)#147, sum(profit#114)#148] +Results [5]: [null AS channel#149, null AS id#150, sum(sales#112)#146 AS sales#151, sum(returns#113)#147 AS returns#152, sum(profit#114)#148 AS profit#153] (94) Union (95) HashAggregate [codegen id : 75] -Input [5]: [channel#34, id#35, sales#121, returns#122, profit#123] -Keys [5]: [channel#34, id#35, sales#121, returns#122, profit#123] +Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Keys [5]: [channel#30, id#31, sales#109, returns#110, profit#111] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#34, id#35, sales#121, returns#122, profit#123] +Results [5]: [channel#30, id#31, sales#109, returns#110, profit#111] (96) Exchange -Input [5]: [channel#34, id#35, sales#121, returns#122, profit#123] -Arguments: hashpartitioning(channel#34, id#35, sales#121, returns#122, profit#123, 5), ENSURE_REQUIREMENTS, [id=#168] +Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Arguments: hashpartitioning(channel#30, id#31, sales#109, returns#110, profit#111, 5), ENSURE_REQUIREMENTS, [plan_id=15] (97) HashAggregate [codegen id : 76] -Input [5]: [channel#34, id#35, sales#121, returns#122, profit#123] -Keys [5]: [channel#34, id#35, sales#121, returns#122, profit#123] +Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Keys [5]: [channel#30, id#31, sales#109, returns#110, profit#111] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#34, id#35, sales#121, returns#122, profit#123] +Results [5]: [channel#30, id#31, sales#109, returns#110, profit#111] (98) TakeOrderedAndProject -Input [5]: [channel#34, id#35, sales#121, returns#122, profit#123] -Arguments: 100, [channel#34 ASC NULLS FIRST, id#35 ASC NULLS FIRST], [channel#34, id#35, sales#121, returns#122, profit#123] +Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Arguments: 100, [channel#30 ASC NULLS FIRST, id#31 ASC NULLS FIRST], [channel#30, id#31, sales#109, returns#110, profit#111] ===== Subqueries ===== @@ -585,35 +585,35 @@ BroadcastExchange (103) (99) Scan parquet default.date_dim -Output [2]: [d_date_sk#6, d_date#169] +Output [2]: [d_date_sk#6, d_date#154] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)] ReadSchema: struct (100) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#6, d_date#169] +Input [2]: [d_date_sk#6, d_date#154] (101) Filter [codegen id : 1] -Input [2]: [d_date_sk#6, d_date#169] -Condition : (((isnotnull(d_date#169) AND (d_date#169 >= 1998-08-04)) AND (d_date#169 <= 1998-09-03)) AND isnotnull(d_date_sk#6)) +Input [2]: [d_date_sk#6, d_date#154] +Condition : (((isnotnull(d_date#154) AND (d_date#154 >= 1998-08-04)) AND (d_date#154 <= 1998-09-03)) AND isnotnull(d_date_sk#6)) (102) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [2]: [d_date_sk#6, d_date#169] +Input [2]: [d_date_sk#6, d_date#154] (103) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#170] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=16] -Subquery:2 Hosting operator id = 16 Hosting Expression = sr_returned_date_sk#21 IN dynamicpruning#5 +Subquery:2 Hosting operator id = 16 Hosting Expression = sr_returned_date_sk#19 IN dynamicpruning#5 -Subquery:3 Hosting operator id = 31 Hosting Expression = cs_sold_date_sk#41 IN dynamicpruning#5 +Subquery:3 Hosting operator id = 31 Hosting Expression = cs_sold_date_sk#37 IN dynamicpruning#5 -Subquery:4 Hosting operator id = 39 Hosting Expression = cr_returned_date_sk#54 IN dynamicpruning#5 +Subquery:4 Hosting operator id = 39 Hosting Expression = cr_returned_date_sk#49 IN dynamicpruning#5 -Subquery:5 Hosting operator id = 50 Hosting Expression = ws_sold_date_sk#72 IN dynamicpruning#5 +Subquery:5 Hosting operator id = 50 Hosting Expression = ws_sold_date_sk#65 IN dynamicpruning#5 -Subquery:6 Hosting operator id = 65 Hosting Expression = wr_returned_date_sk#88 IN dynamicpruning#5 +Subquery:6 Hosting operator id = 65 Hosting Expression = wr_returned_date_sk#79 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt index 815eabe2fe0e8..d124ed40a3a59 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt @@ -142,7 +142,7 @@ Condition : isnotnull(s_store_sk#7) (10) BroadcastExchange Input [1]: [s_store_sk#7] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_store_sk#1] @@ -157,422 +157,422 @@ Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] Input [3]: [ss_ext_sales_price#2, ss_net_profit#3, s_store_sk#7] Keys [1]: [s_store_sk#7] Functions [2]: [partial_sum(UnscaledValue(ss_ext_sales_price#2)), partial_sum(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [2]: [sum#9, sum#10] -Results [3]: [s_store_sk#7, sum#11, sum#12] +Aggregate Attributes [2]: [sum#8, sum#9] +Results [3]: [s_store_sk#7, sum#10, sum#11] (14) Exchange -Input [3]: [s_store_sk#7, sum#11, sum#12] -Arguments: hashpartitioning(s_store_sk#7, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [3]: [s_store_sk#7, sum#10, sum#11] +Arguments: hashpartitioning(s_store_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 8] -Input [3]: [s_store_sk#7, sum#11, sum#12] +Input [3]: [s_store_sk#7, sum#10, sum#11] Keys [1]: [s_store_sk#7] Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#2)), sum(UnscaledValue(ss_net_profit#3))] -Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#2))#14, sum(UnscaledValue(ss_net_profit#3))#15] -Results [3]: [s_store_sk#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS sales#16, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#15,17,2) AS profit#17] +Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#2))#12, sum(UnscaledValue(ss_net_profit#3))#13] +Results [3]: [s_store_sk#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS sales#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#13,17,2) AS profit#15] (16) Scan parquet default.store_returns -Output [4]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, sr_returned_date_sk#21] +Output [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(sr_returned_date_sk#21), dynamicpruningexpression(sr_returned_date_sk#21 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(sr_returned_date_sk#19), dynamicpruningexpression(sr_returned_date_sk#19 IN dynamicpruning#5)] PushedFilters: [IsNotNull(sr_store_sk)] ReadSchema: struct (17) ColumnarToRow [codegen id : 6] -Input [4]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, sr_returned_date_sk#21] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] (18) Filter [codegen id : 6] -Input [4]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, sr_returned_date_sk#21] -Condition : isnotnull(sr_store_sk#18) +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19] +Condition : isnotnull(sr_store_sk#16) (19) ReusedExchange [Reuses operator id: 103] -Output [1]: [d_date_sk#22] +Output [1]: [d_date_sk#20] (20) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [sr_returned_date_sk#21] -Right keys [1]: [d_date_sk#22] +Left keys [1]: [sr_returned_date_sk#19] +Right keys [1]: [d_date_sk#20] Join condition: None (21) Project [codegen id : 6] -Output [3]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20] -Input [5]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, sr_returned_date_sk#21, d_date_sk#22] +Output [3]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18] +Input [5]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19, d_date_sk#20] (22) ReusedExchange [Reuses operator id: 10] -Output [1]: [s_store_sk#23] +Output [1]: [s_store_sk#21] (23) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [sr_store_sk#18] -Right keys [1]: [s_store_sk#23] +Left keys [1]: [sr_store_sk#16] +Right keys [1]: [s_store_sk#21] Join condition: None (24) Project [codegen id : 6] -Output [3]: [sr_return_amt#19, sr_net_loss#20, s_store_sk#23] -Input [4]: [sr_store_sk#18, sr_return_amt#19, sr_net_loss#20, s_store_sk#23] +Output [3]: [sr_return_amt#17, sr_net_loss#18, s_store_sk#21] +Input [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, s_store_sk#21] (25) HashAggregate [codegen id : 6] -Input [3]: [sr_return_amt#19, sr_net_loss#20, s_store_sk#23] -Keys [1]: [s_store_sk#23] -Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#19)), partial_sum(UnscaledValue(sr_net_loss#20))] -Aggregate Attributes [2]: [sum#24, sum#25] -Results [3]: [s_store_sk#23, sum#26, sum#27] +Input [3]: [sr_return_amt#17, sr_net_loss#18, s_store_sk#21] +Keys [1]: [s_store_sk#21] +Functions [2]: [partial_sum(UnscaledValue(sr_return_amt#17)), partial_sum(UnscaledValue(sr_net_loss#18))] +Aggregate Attributes [2]: [sum#22, sum#23] +Results [3]: [s_store_sk#21, sum#24, sum#25] (26) Exchange -Input [3]: [s_store_sk#23, sum#26, sum#27] -Arguments: hashpartitioning(s_store_sk#23, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [3]: [s_store_sk#21, sum#24, sum#25] +Arguments: hashpartitioning(s_store_sk#21, 5), ENSURE_REQUIREMENTS, [plan_id=3] (27) HashAggregate [codegen id : 7] -Input [3]: [s_store_sk#23, sum#26, sum#27] -Keys [1]: [s_store_sk#23] -Functions [2]: [sum(UnscaledValue(sr_return_amt#19)), sum(UnscaledValue(sr_net_loss#20))] -Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#19))#29, sum(UnscaledValue(sr_net_loss#20))#30] -Results [3]: [s_store_sk#23, MakeDecimal(sum(UnscaledValue(sr_return_amt#19))#29,17,2) AS returns#31, MakeDecimal(sum(UnscaledValue(sr_net_loss#20))#30,17,2) AS profit_loss#32] +Input [3]: [s_store_sk#21, sum#24, sum#25] +Keys [1]: [s_store_sk#21] +Functions [2]: [sum(UnscaledValue(sr_return_amt#17)), sum(UnscaledValue(sr_net_loss#18))] +Aggregate Attributes [2]: [sum(UnscaledValue(sr_return_amt#17))#26, sum(UnscaledValue(sr_net_loss#18))#27] +Results [3]: [s_store_sk#21, MakeDecimal(sum(UnscaledValue(sr_return_amt#17))#26,17,2) AS returns#28, MakeDecimal(sum(UnscaledValue(sr_net_loss#18))#27,17,2) AS profit_loss#29] (28) BroadcastExchange -Input [3]: [s_store_sk#23, returns#31, profit_loss#32] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#33] +Input [3]: [s_store_sk#21, returns#28, profit_loss#29] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (29) BroadcastHashJoin [codegen id : 8] Left keys [1]: [s_store_sk#7] -Right keys [1]: [s_store_sk#23] +Right keys [1]: [s_store_sk#21] Join condition: None (30) Project [codegen id : 8] -Output [5]: [store channel AS channel#34, s_store_sk#7 AS id#35, sales#16, coalesce(returns#31, 0.00) AS returns#36, CheckOverflow((promote_precision(cast(profit#17 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#32, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#37] -Input [6]: [s_store_sk#7, sales#16, profit#17, s_store_sk#23, returns#31, profit_loss#32] +Output [5]: [store channel AS channel#30, s_store_sk#7 AS id#31, sales#14, coalesce(returns#28, 0.00) AS returns#32, CheckOverflow((promote_precision(cast(profit#15 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#29, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#33] +Input [6]: [s_store_sk#7, sales#14, profit#15, s_store_sk#21, returns#28, profit_loss#29] (31) Scan parquet default.catalog_sales -Output [4]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41] +Output [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#41), dynamicpruningexpression(cs_sold_date_sk#41 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cs_sold_date_sk#37), dynamicpruningexpression(cs_sold_date_sk#37 IN dynamicpruning#5)] ReadSchema: struct (32) ColumnarToRow [codegen id : 10] -Input [4]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41] +Input [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37] (33) ReusedExchange [Reuses operator id: 103] -Output [1]: [d_date_sk#42] +Output [1]: [d_date_sk#38] (34) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [cs_sold_date_sk#41] -Right keys [1]: [d_date_sk#42] +Left keys [1]: [cs_sold_date_sk#37] +Right keys [1]: [d_date_sk#38] Join condition: None (35) Project [codegen id : 10] -Output [3]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40] -Input [5]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40, cs_sold_date_sk#41, d_date_sk#42] +Output [3]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36] +Input [5]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37, d_date_sk#38] (36) HashAggregate [codegen id : 10] -Input [3]: [cs_call_center_sk#38, cs_ext_sales_price#39, cs_net_profit#40] -Keys [1]: [cs_call_center_sk#38] -Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#39)), partial_sum(UnscaledValue(cs_net_profit#40))] -Aggregate Attributes [2]: [sum#43, sum#44] -Results [3]: [cs_call_center_sk#38, sum#45, sum#46] +Input [3]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36] +Keys [1]: [cs_call_center_sk#34] +Functions [2]: [partial_sum(UnscaledValue(cs_ext_sales_price#35)), partial_sum(UnscaledValue(cs_net_profit#36))] +Aggregate Attributes [2]: [sum#39, sum#40] +Results [3]: [cs_call_center_sk#34, sum#41, sum#42] (37) Exchange -Input [3]: [cs_call_center_sk#38, sum#45, sum#46] -Arguments: hashpartitioning(cs_call_center_sk#38, 5), ENSURE_REQUIREMENTS, [id=#47] +Input [3]: [cs_call_center_sk#34, sum#41, sum#42] +Arguments: hashpartitioning(cs_call_center_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=5] (38) HashAggregate [codegen id : 11] -Input [3]: [cs_call_center_sk#38, sum#45, sum#46] -Keys [1]: [cs_call_center_sk#38] -Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#39)), sum(UnscaledValue(cs_net_profit#40))] -Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#39))#48, sum(UnscaledValue(cs_net_profit#40))#49] -Results [3]: [cs_call_center_sk#38, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#39))#48,17,2) AS sales#50, MakeDecimal(sum(UnscaledValue(cs_net_profit#40))#49,17,2) AS profit#51] +Input [3]: [cs_call_center_sk#34, sum#41, sum#42] +Keys [1]: [cs_call_center_sk#34] +Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#35)), sum(UnscaledValue(cs_net_profit#36))] +Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#35))#43, sum(UnscaledValue(cs_net_profit#36))#44] +Results [3]: [cs_call_center_sk#34, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#35))#43,17,2) AS sales#45, MakeDecimal(sum(UnscaledValue(cs_net_profit#36))#44,17,2) AS profit#46] (39) BroadcastExchange -Input [3]: [cs_call_center_sk#38, sales#50, profit#51] -Arguments: IdentityBroadcastMode, [id=#52] +Input [3]: [cs_call_center_sk#34, sales#45, profit#46] +Arguments: IdentityBroadcastMode, [plan_id=6] (40) Scan parquet default.catalog_returns -Output [3]: [cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] +Output [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cr_returned_date_sk#55), dynamicpruningexpression(cr_returned_date_sk#55 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(cr_returned_date_sk#49), dynamicpruningexpression(cr_returned_date_sk#49 IN dynamicpruning#5)] ReadSchema: struct (41) ColumnarToRow [codegen id : 13] -Input [3]: [cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55] +Input [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49] (42) ReusedExchange [Reuses operator id: 103] -Output [1]: [d_date_sk#56] +Output [1]: [d_date_sk#50] (43) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [cr_returned_date_sk#55] -Right keys [1]: [d_date_sk#56] +Left keys [1]: [cr_returned_date_sk#49] +Right keys [1]: [d_date_sk#50] Join condition: None (44) Project [codegen id : 13] -Output [2]: [cr_return_amount#53, cr_net_loss#54] -Input [4]: [cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55, d_date_sk#56] +Output [2]: [cr_return_amount#47, cr_net_loss#48] +Input [4]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49, d_date_sk#50] (45) HashAggregate [codegen id : 13] -Input [2]: [cr_return_amount#53, cr_net_loss#54] +Input [2]: [cr_return_amount#47, cr_net_loss#48] Keys: [] -Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#53)), partial_sum(UnscaledValue(cr_net_loss#54))] -Aggregate Attributes [2]: [sum#57, sum#58] -Results [2]: [sum#59, sum#60] +Functions [2]: [partial_sum(UnscaledValue(cr_return_amount#47)), partial_sum(UnscaledValue(cr_net_loss#48))] +Aggregate Attributes [2]: [sum#51, sum#52] +Results [2]: [sum#53, sum#54] (46) Exchange -Input [2]: [sum#59, sum#60] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#61] +Input [2]: [sum#53, sum#54] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7] (47) HashAggregate -Input [2]: [sum#59, sum#60] +Input [2]: [sum#53, sum#54] Keys: [] -Functions [2]: [sum(UnscaledValue(cr_return_amount#53)), sum(UnscaledValue(cr_net_loss#54))] -Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#53))#62, sum(UnscaledValue(cr_net_loss#54))#63] -Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#53))#62,17,2) AS returns#64, MakeDecimal(sum(UnscaledValue(cr_net_loss#54))#63,17,2) AS profit_loss#65] +Functions [2]: [sum(UnscaledValue(cr_return_amount#47)), sum(UnscaledValue(cr_net_loss#48))] +Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#47))#55, sum(UnscaledValue(cr_net_loss#48))#56] +Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#47))#55,17,2) AS returns#57, MakeDecimal(sum(UnscaledValue(cr_net_loss#48))#56,17,2) AS profit_loss#58] (48) BroadcastNestedLoopJoin [codegen id : 14] Join condition: None (49) Project [codegen id : 14] -Output [5]: [catalog channel AS channel#66, cs_call_center_sk#38 AS id#67, sales#50, returns#64, CheckOverflow((promote_precision(cast(profit#51 as decimal(18,2))) - promote_precision(cast(profit_loss#65 as decimal(18,2)))), DecimalType(18,2)) AS profit#68] -Input [5]: [cs_call_center_sk#38, sales#50, profit#51, returns#64, profit_loss#65] +Output [5]: [catalog channel AS channel#59, cs_call_center_sk#34 AS id#60, sales#45, returns#57, CheckOverflow((promote_precision(cast(profit#46 as decimal(18,2))) - promote_precision(cast(profit_loss#58 as decimal(18,2)))), DecimalType(18,2)) AS profit#61] +Input [5]: [cs_call_center_sk#34, sales#45, profit#46, returns#57, profit_loss#58] (50) Scan parquet default.web_sales -Output [4]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, ws_sold_date_sk#72] +Output [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#72), dynamicpruningexpression(ws_sold_date_sk#72 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(ws_sold_date_sk#65), dynamicpruningexpression(ws_sold_date_sk#65 IN dynamicpruning#5)] PushedFilters: [IsNotNull(ws_web_page_sk)] ReadSchema: struct (51) ColumnarToRow [codegen id : 17] -Input [4]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, ws_sold_date_sk#72] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] (52) Filter [codegen id : 17] -Input [4]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, ws_sold_date_sk#72] -Condition : isnotnull(ws_web_page_sk#69) +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65] +Condition : isnotnull(ws_web_page_sk#62) (53) ReusedExchange [Reuses operator id: 103] -Output [1]: [d_date_sk#73] +Output [1]: [d_date_sk#66] (54) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_sold_date_sk#72] -Right keys [1]: [d_date_sk#73] +Left keys [1]: [ws_sold_date_sk#65] +Right keys [1]: [d_date_sk#66] Join condition: None (55) Project [codegen id : 17] -Output [3]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71] -Input [5]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, ws_sold_date_sk#72, d_date_sk#73] +Output [3]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64] +Input [5]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65, d_date_sk#66] (56) Scan parquet default.web_page -Output [1]: [wp_web_page_sk#74] +Output [1]: [wp_web_page_sk#67] Batched: true Location [not included in comparison]/{warehouse_dir}/web_page] PushedFilters: [IsNotNull(wp_web_page_sk)] ReadSchema: struct (57) ColumnarToRow [codegen id : 16] -Input [1]: [wp_web_page_sk#74] +Input [1]: [wp_web_page_sk#67] (58) Filter [codegen id : 16] -Input [1]: [wp_web_page_sk#74] -Condition : isnotnull(wp_web_page_sk#74) +Input [1]: [wp_web_page_sk#67] +Condition : isnotnull(wp_web_page_sk#67) (59) BroadcastExchange -Input [1]: [wp_web_page_sk#74] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#75] +Input [1]: [wp_web_page_sk#67] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8] (60) BroadcastHashJoin [codegen id : 17] -Left keys [1]: [ws_web_page_sk#69] -Right keys [1]: [wp_web_page_sk#74] +Left keys [1]: [ws_web_page_sk#62] +Right keys [1]: [wp_web_page_sk#67] Join condition: None (61) Project [codegen id : 17] -Output [3]: [ws_ext_sales_price#70, ws_net_profit#71, wp_web_page_sk#74] -Input [4]: [ws_web_page_sk#69, ws_ext_sales_price#70, ws_net_profit#71, wp_web_page_sk#74] +Output [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] +Input [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] (62) HashAggregate [codegen id : 17] -Input [3]: [ws_ext_sales_price#70, ws_net_profit#71, wp_web_page_sk#74] -Keys [1]: [wp_web_page_sk#74] -Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#70)), partial_sum(UnscaledValue(ws_net_profit#71))] -Aggregate Attributes [2]: [sum#76, sum#77] -Results [3]: [wp_web_page_sk#74, sum#78, sum#79] +Input [3]: [ws_ext_sales_price#63, ws_net_profit#64, wp_web_page_sk#67] +Keys [1]: [wp_web_page_sk#67] +Functions [2]: [partial_sum(UnscaledValue(ws_ext_sales_price#63)), partial_sum(UnscaledValue(ws_net_profit#64))] +Aggregate Attributes [2]: [sum#68, sum#69] +Results [3]: [wp_web_page_sk#67, sum#70, sum#71] (63) Exchange -Input [3]: [wp_web_page_sk#74, sum#78, sum#79] -Arguments: hashpartitioning(wp_web_page_sk#74, 5), ENSURE_REQUIREMENTS, [id=#80] +Input [3]: [wp_web_page_sk#67, sum#70, sum#71] +Arguments: hashpartitioning(wp_web_page_sk#67, 5), ENSURE_REQUIREMENTS, [plan_id=9] (64) HashAggregate [codegen id : 22] -Input [3]: [wp_web_page_sk#74, sum#78, sum#79] -Keys [1]: [wp_web_page_sk#74] -Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#70)), sum(UnscaledValue(ws_net_profit#71))] -Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#70))#81, sum(UnscaledValue(ws_net_profit#71))#82] -Results [3]: [wp_web_page_sk#74, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#70))#81,17,2) AS sales#83, MakeDecimal(sum(UnscaledValue(ws_net_profit#71))#82,17,2) AS profit#84] +Input [3]: [wp_web_page_sk#67, sum#70, sum#71] +Keys [1]: [wp_web_page_sk#67] +Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#63)), sum(UnscaledValue(ws_net_profit#64))] +Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#63))#72, sum(UnscaledValue(ws_net_profit#64))#73] +Results [3]: [wp_web_page_sk#67, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#63))#72,17,2) AS sales#74, MakeDecimal(sum(UnscaledValue(ws_net_profit#64))#73,17,2) AS profit#75] (65) Scan parquet default.web_returns -Output [4]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] +Output [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(wr_returned_date_sk#88), dynamicpruningexpression(wr_returned_date_sk#88 IN dynamicpruning#5)] +PartitionFilters: [isnotnull(wr_returned_date_sk#79), dynamicpruningexpression(wr_returned_date_sk#79 IN dynamicpruning#5)] PushedFilters: [IsNotNull(wr_web_page_sk)] ReadSchema: struct (66) ColumnarToRow [codegen id : 20] -Input [4]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] (67) Filter [codegen id : 20] -Input [4]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88] -Condition : isnotnull(wr_web_page_sk#85) +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79] +Condition : isnotnull(wr_web_page_sk#76) (68) ReusedExchange [Reuses operator id: 103] -Output [1]: [d_date_sk#89] +Output [1]: [d_date_sk#80] (69) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [wr_returned_date_sk#88] -Right keys [1]: [d_date_sk#89] +Left keys [1]: [wr_returned_date_sk#79] +Right keys [1]: [d_date_sk#80] Join condition: None (70) Project [codegen id : 20] -Output [3]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87] -Input [5]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wr_returned_date_sk#88, d_date_sk#89] +Output [3]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78] +Input [5]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79, d_date_sk#80] (71) ReusedExchange [Reuses operator id: 59] -Output [1]: [wp_web_page_sk#90] +Output [1]: [wp_web_page_sk#81] (72) BroadcastHashJoin [codegen id : 20] -Left keys [1]: [wr_web_page_sk#85] -Right keys [1]: [wp_web_page_sk#90] +Left keys [1]: [wr_web_page_sk#76] +Right keys [1]: [wp_web_page_sk#81] Join condition: None (73) Project [codegen id : 20] -Output [3]: [wr_return_amt#86, wr_net_loss#87, wp_web_page_sk#90] -Input [4]: [wr_web_page_sk#85, wr_return_amt#86, wr_net_loss#87, wp_web_page_sk#90] +Output [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] +Input [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] (74) HashAggregate [codegen id : 20] -Input [3]: [wr_return_amt#86, wr_net_loss#87, wp_web_page_sk#90] -Keys [1]: [wp_web_page_sk#90] -Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#86)), partial_sum(UnscaledValue(wr_net_loss#87))] -Aggregate Attributes [2]: [sum#91, sum#92] -Results [3]: [wp_web_page_sk#90, sum#93, sum#94] +Input [3]: [wr_return_amt#77, wr_net_loss#78, wp_web_page_sk#81] +Keys [1]: [wp_web_page_sk#81] +Functions [2]: [partial_sum(UnscaledValue(wr_return_amt#77)), partial_sum(UnscaledValue(wr_net_loss#78))] +Aggregate Attributes [2]: [sum#82, sum#83] +Results [3]: [wp_web_page_sk#81, sum#84, sum#85] (75) Exchange -Input [3]: [wp_web_page_sk#90, sum#93, sum#94] -Arguments: hashpartitioning(wp_web_page_sk#90, 5), ENSURE_REQUIREMENTS, [id=#95] +Input [3]: [wp_web_page_sk#81, sum#84, sum#85] +Arguments: hashpartitioning(wp_web_page_sk#81, 5), ENSURE_REQUIREMENTS, [plan_id=10] (76) HashAggregate [codegen id : 21] -Input [3]: [wp_web_page_sk#90, sum#93, sum#94] -Keys [1]: [wp_web_page_sk#90] -Functions [2]: [sum(UnscaledValue(wr_return_amt#86)), sum(UnscaledValue(wr_net_loss#87))] -Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#86))#96, sum(UnscaledValue(wr_net_loss#87))#97] -Results [3]: [wp_web_page_sk#90, MakeDecimal(sum(UnscaledValue(wr_return_amt#86))#96,17,2) AS returns#98, MakeDecimal(sum(UnscaledValue(wr_net_loss#87))#97,17,2) AS profit_loss#99] +Input [3]: [wp_web_page_sk#81, sum#84, sum#85] +Keys [1]: [wp_web_page_sk#81] +Functions [2]: [sum(UnscaledValue(wr_return_amt#77)), sum(UnscaledValue(wr_net_loss#78))] +Aggregate Attributes [2]: [sum(UnscaledValue(wr_return_amt#77))#86, sum(UnscaledValue(wr_net_loss#78))#87] +Results [3]: [wp_web_page_sk#81, MakeDecimal(sum(UnscaledValue(wr_return_amt#77))#86,17,2) AS returns#88, MakeDecimal(sum(UnscaledValue(wr_net_loss#78))#87,17,2) AS profit_loss#89] (77) BroadcastExchange -Input [3]: [wp_web_page_sk#90, returns#98, profit_loss#99] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#100] +Input [3]: [wp_web_page_sk#81, returns#88, profit_loss#89] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=11] (78) BroadcastHashJoin [codegen id : 22] -Left keys [1]: [wp_web_page_sk#74] -Right keys [1]: [wp_web_page_sk#90] +Left keys [1]: [wp_web_page_sk#67] +Right keys [1]: [wp_web_page_sk#81] Join condition: None (79) Project [codegen id : 22] -Output [5]: [web channel AS channel#101, wp_web_page_sk#74 AS id#102, sales#83, coalesce(returns#98, 0.00) AS returns#103, CheckOverflow((promote_precision(cast(profit#84 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#99, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#104] -Input [6]: [wp_web_page_sk#74, sales#83, profit#84, wp_web_page_sk#90, returns#98, profit_loss#99] +Output [5]: [web channel AS channel#90, wp_web_page_sk#67 AS id#91, sales#74, coalesce(returns#88, 0.00) AS returns#92, CheckOverflow((promote_precision(cast(profit#75 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#89, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#93] +Input [6]: [wp_web_page_sk#67, sales#74, profit#75, wp_web_page_sk#81, returns#88, profit_loss#89] (80) Union (81) HashAggregate [codegen id : 23] -Input [5]: [channel#34, id#35, sales#16, returns#36, profit#37] -Keys [2]: [channel#34, id#35] -Functions [3]: [partial_sum(sales#16), partial_sum(returns#36), partial_sum(profit#37)] -Aggregate Attributes [6]: [sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110] -Results [8]: [channel#34, id#35, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116] +Input [5]: [channel#30, id#31, sales#14, returns#32, profit#33] +Keys [2]: [channel#30, id#31] +Functions [3]: [partial_sum(sales#14), partial_sum(returns#32), partial_sum(profit#33)] +Aggregate Attributes [6]: [sum#94, isEmpty#95, sum#96, isEmpty#97, sum#98, isEmpty#99] +Results [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] (82) Exchange -Input [8]: [channel#34, id#35, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116] -Arguments: hashpartitioning(channel#34, id#35, 5), ENSURE_REQUIREMENTS, [id=#117] +Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Arguments: hashpartitioning(channel#30, id#31, 5), ENSURE_REQUIREMENTS, [plan_id=12] (83) HashAggregate [codegen id : 24] -Input [8]: [channel#34, id#35, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116] -Keys [2]: [channel#34, id#35] -Functions [3]: [sum(sales#16), sum(returns#36), sum(profit#37)] -Aggregate Attributes [3]: [sum(sales#16)#118, sum(returns#36)#119, sum(profit#37)#120] -Results [5]: [channel#34, id#35, cast(sum(sales#16)#118 as decimal(37,2)) AS sales#121, cast(sum(returns#36)#119 as decimal(37,2)) AS returns#122, cast(sum(profit#37)#120 as decimal(38,2)) AS profit#123] +Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [2]: [channel#30, id#31] +Functions [3]: [sum(sales#14), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#14)#106, sum(returns#32)#107, sum(profit#33)#108] +Results [5]: [channel#30, id#31, cast(sum(sales#14)#106 as decimal(37,2)) AS sales#109, cast(sum(returns#32)#107 as decimal(37,2)) AS returns#110, cast(sum(profit#33)#108 as decimal(38,2)) AS profit#111] (84) ReusedExchange [Reuses operator id: 82] -Output [8]: [channel#34, id#35, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116] +Output [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] (85) HashAggregate [codegen id : 48] -Input [8]: [channel#34, id#35, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116] -Keys [2]: [channel#34, id#35] -Functions [3]: [sum(sales#16), sum(returns#36), sum(profit#37)] -Aggregate Attributes [3]: [sum(sales#16)#118, sum(returns#36)#119, sum(profit#37)#120] -Results [4]: [channel#34, sum(sales#16)#118 AS sales#124, sum(returns#36)#119 AS returns#125, sum(profit#37)#120 AS profit#126] +Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [2]: [channel#30, id#31] +Functions [3]: [sum(sales#14), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#14)#106, sum(returns#32)#107, sum(profit#33)#108] +Results [4]: [channel#30, sum(sales#14)#106 AS sales#112, sum(returns#32)#107 AS returns#113, sum(profit#33)#108 AS profit#114] (86) HashAggregate [codegen id : 48] -Input [4]: [channel#34, sales#124, returns#125, profit#126] -Keys [1]: [channel#34] -Functions [3]: [partial_sum(sales#124), partial_sum(returns#125), partial_sum(profit#126)] -Aggregate Attributes [6]: [sum#127, isEmpty#128, sum#129, isEmpty#130, sum#131, isEmpty#132] -Results [7]: [channel#34, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] +Input [4]: [channel#30, sales#112, returns#113, profit#114] +Keys [1]: [channel#30] +Functions [3]: [partial_sum(sales#112), partial_sum(returns#113), partial_sum(profit#114)] +Aggregate Attributes [6]: [sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Results [7]: [channel#30, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] (87) Exchange -Input [7]: [channel#34, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] -Arguments: hashpartitioning(channel#34, 5), ENSURE_REQUIREMENTS, [id=#139] +Input [7]: [channel#30, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Arguments: hashpartitioning(channel#30, 5), ENSURE_REQUIREMENTS, [plan_id=13] (88) HashAggregate [codegen id : 49] -Input [7]: [channel#34, sum#133, isEmpty#134, sum#135, isEmpty#136, sum#137, isEmpty#138] -Keys [1]: [channel#34] -Functions [3]: [sum(sales#124), sum(returns#125), sum(profit#126)] -Aggregate Attributes [3]: [sum(sales#124)#140, sum(returns#125)#141, sum(profit#126)#142] -Results [5]: [channel#34, null AS id#143, sum(sales#124)#140 AS sales#144, sum(returns#125)#141 AS returns#145, sum(profit#126)#142 AS profit#146] +Input [7]: [channel#30, sum#121, isEmpty#122, sum#123, isEmpty#124, sum#125, isEmpty#126] +Keys [1]: [channel#30] +Functions [3]: [sum(sales#112), sum(returns#113), sum(profit#114)] +Aggregate Attributes [3]: [sum(sales#112)#127, sum(returns#113)#128, sum(profit#114)#129] +Results [5]: [channel#30, null AS id#130, sum(sales#112)#127 AS sales#131, sum(returns#113)#128 AS returns#132, sum(profit#114)#129 AS profit#133] (89) ReusedExchange [Reuses operator id: 82] -Output [8]: [channel#34, id#35, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116] +Output [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] (90) HashAggregate [codegen id : 73] -Input [8]: [channel#34, id#35, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116] -Keys [2]: [channel#34, id#35] -Functions [3]: [sum(sales#16), sum(returns#36), sum(profit#37)] -Aggregate Attributes [3]: [sum(sales#16)#118, sum(returns#36)#119, sum(profit#37)#120] -Results [3]: [sum(sales#16)#118 AS sales#124, sum(returns#36)#119 AS returns#125, sum(profit#37)#120 AS profit#126] +Input [8]: [channel#30, id#31, sum#100, isEmpty#101, sum#102, isEmpty#103, sum#104, isEmpty#105] +Keys [2]: [channel#30, id#31] +Functions [3]: [sum(sales#14), sum(returns#32), sum(profit#33)] +Aggregate Attributes [3]: [sum(sales#14)#106, sum(returns#32)#107, sum(profit#33)#108] +Results [3]: [sum(sales#14)#106 AS sales#112, sum(returns#32)#107 AS returns#113, sum(profit#33)#108 AS profit#114] (91) HashAggregate [codegen id : 73] -Input [3]: [sales#124, returns#125, profit#126] +Input [3]: [sales#112, returns#113, profit#114] Keys: [] -Functions [3]: [partial_sum(sales#124), partial_sum(returns#125), partial_sum(profit#126)] -Aggregate Attributes [6]: [sum#147, isEmpty#148, sum#149, isEmpty#150, sum#151, isEmpty#152] -Results [6]: [sum#153, isEmpty#154, sum#155, isEmpty#156, sum#157, isEmpty#158] +Functions [3]: [partial_sum(sales#112), partial_sum(returns#113), partial_sum(profit#114)] +Aggregate Attributes [6]: [sum#134, isEmpty#135, sum#136, isEmpty#137, sum#138, isEmpty#139] +Results [6]: [sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] (92) Exchange -Input [6]: [sum#153, isEmpty#154, sum#155, isEmpty#156, sum#157, isEmpty#158] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#159] +Input [6]: [sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=14] (93) HashAggregate [codegen id : 74] -Input [6]: [sum#153, isEmpty#154, sum#155, isEmpty#156, sum#157, isEmpty#158] +Input [6]: [sum#140, isEmpty#141, sum#142, isEmpty#143, sum#144, isEmpty#145] Keys: [] -Functions [3]: [sum(sales#124), sum(returns#125), sum(profit#126)] -Aggregate Attributes [3]: [sum(sales#124)#160, sum(returns#125)#161, sum(profit#126)#162] -Results [5]: [null AS channel#163, null AS id#164, sum(sales#124)#160 AS sales#165, sum(returns#125)#161 AS returns#166, sum(profit#126)#162 AS profit#167] +Functions [3]: [sum(sales#112), sum(returns#113), sum(profit#114)] +Aggregate Attributes [3]: [sum(sales#112)#146, sum(returns#113)#147, sum(profit#114)#148] +Results [5]: [null AS channel#149, null AS id#150, sum(sales#112)#146 AS sales#151, sum(returns#113)#147 AS returns#152, sum(profit#114)#148 AS profit#153] (94) Union (95) HashAggregate [codegen id : 75] -Input [5]: [channel#34, id#35, sales#121, returns#122, profit#123] -Keys [5]: [channel#34, id#35, sales#121, returns#122, profit#123] +Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Keys [5]: [channel#30, id#31, sales#109, returns#110, profit#111] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#34, id#35, sales#121, returns#122, profit#123] +Results [5]: [channel#30, id#31, sales#109, returns#110, profit#111] (96) Exchange -Input [5]: [channel#34, id#35, sales#121, returns#122, profit#123] -Arguments: hashpartitioning(channel#34, id#35, sales#121, returns#122, profit#123, 5), ENSURE_REQUIREMENTS, [id=#168] +Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Arguments: hashpartitioning(channel#30, id#31, sales#109, returns#110, profit#111, 5), ENSURE_REQUIREMENTS, [plan_id=15] (97) HashAggregate [codegen id : 76] -Input [5]: [channel#34, id#35, sales#121, returns#122, profit#123] -Keys [5]: [channel#34, id#35, sales#121, returns#122, profit#123] +Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Keys [5]: [channel#30, id#31, sales#109, returns#110, profit#111] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#34, id#35, sales#121, returns#122, profit#123] +Results [5]: [channel#30, id#31, sales#109, returns#110, profit#111] (98) TakeOrderedAndProject -Input [5]: [channel#34, id#35, sales#121, returns#122, profit#123] -Arguments: 100, [channel#34 ASC NULLS FIRST, id#35 ASC NULLS FIRST], [channel#34, id#35, sales#121, returns#122, profit#123] +Input [5]: [channel#30, id#31, sales#109, returns#110, profit#111] +Arguments: 100, [channel#30 ASC NULLS FIRST, id#31 ASC NULLS FIRST], [channel#30, id#31, sales#109, returns#110, profit#111] ===== Subqueries ===== @@ -585,35 +585,35 @@ BroadcastExchange (103) (99) Scan parquet default.date_dim -Output [2]: [d_date_sk#6, d_date#169] +Output [2]: [d_date_sk#6, d_date#154] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)] ReadSchema: struct (100) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#6, d_date#169] +Input [2]: [d_date_sk#6, d_date#154] (101) Filter [codegen id : 1] -Input [2]: [d_date_sk#6, d_date#169] -Condition : (((isnotnull(d_date#169) AND (d_date#169 >= 1998-08-04)) AND (d_date#169 <= 1998-09-03)) AND isnotnull(d_date_sk#6)) +Input [2]: [d_date_sk#6, d_date#154] +Condition : (((isnotnull(d_date#154) AND (d_date#154 >= 1998-08-04)) AND (d_date#154 <= 1998-09-03)) AND isnotnull(d_date_sk#6)) (102) Project [codegen id : 1] Output [1]: [d_date_sk#6] -Input [2]: [d_date_sk#6, d_date#169] +Input [2]: [d_date_sk#6, d_date#154] (103) BroadcastExchange Input [1]: [d_date_sk#6] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#170] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=16] -Subquery:2 Hosting operator id = 16 Hosting Expression = sr_returned_date_sk#21 IN dynamicpruning#5 +Subquery:2 Hosting operator id = 16 Hosting Expression = sr_returned_date_sk#19 IN dynamicpruning#5 -Subquery:3 Hosting operator id = 31 Hosting Expression = cs_sold_date_sk#41 IN dynamicpruning#5 +Subquery:3 Hosting operator id = 31 Hosting Expression = cs_sold_date_sk#37 IN dynamicpruning#5 -Subquery:4 Hosting operator id = 40 Hosting Expression = cr_returned_date_sk#55 IN dynamicpruning#5 +Subquery:4 Hosting operator id = 40 Hosting Expression = cr_returned_date_sk#49 IN dynamicpruning#5 -Subquery:5 Hosting operator id = 50 Hosting Expression = ws_sold_date_sk#72 IN dynamicpruning#5 +Subquery:5 Hosting operator id = 50 Hosting Expression = ws_sold_date_sk#65 IN dynamicpruning#5 -Subquery:6 Hosting operator id = 65 Hosting Expression = wr_returned_date_sk#88 IN dynamicpruning#5 +Subquery:6 Hosting operator id = 65 Hosting Expression = wr_returned_date_sk#79 IN dynamicpruning#5 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/explain.txt index 133d5272ec111..1be531c232011 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/explain.txt @@ -88,306 +88,306 @@ Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_customer_sk#2)) (4) Exchange Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] -Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] Arguments: [ss_ticket_number#3 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 (6) Scan parquet default.store_returns -Output [3]: [sr_item_sk#10, sr_ticket_number#11, sr_returned_date_sk#12] +Output [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [3]: [sr_item_sk#10, sr_ticket_number#11, sr_returned_date_sk#12] +Input [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11] (8) Filter [codegen id : 3] -Input [3]: [sr_item_sk#10, sr_ticket_number#11, sr_returned_date_sk#12] -Condition : (isnotnull(sr_ticket_number#11) AND isnotnull(sr_item_sk#10)) +Input [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11] +Condition : (isnotnull(sr_ticket_number#10) AND isnotnull(sr_item_sk#9)) (9) Project [codegen id : 3] -Output [2]: [sr_item_sk#10, sr_ticket_number#11] -Input [3]: [sr_item_sk#10, sr_ticket_number#11, sr_returned_date_sk#12] +Output [2]: [sr_item_sk#9, sr_ticket_number#10] +Input [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11] (10) Exchange -Input [2]: [sr_item_sk#10, sr_ticket_number#11] -Arguments: hashpartitioning(sr_ticket_number#11, sr_item_sk#10, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [2]: [sr_item_sk#9, sr_ticket_number#10] +Arguments: hashpartitioning(sr_ticket_number#10, sr_item_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [2]: [sr_item_sk#10, sr_ticket_number#11] -Arguments: [sr_ticket_number#11 ASC NULLS FIRST, sr_item_sk#10 ASC NULLS FIRST], false, 0 +Input [2]: [sr_item_sk#9, sr_ticket_number#10] +Arguments: [sr_ticket_number#10 ASC NULLS FIRST, sr_item_sk#9 ASC NULLS FIRST], false, 0 (12) SortMergeJoin [codegen id : 6] Left keys [2]: [ss_ticket_number#3, ss_item_sk#1] -Right keys [2]: [sr_ticket_number#11, sr_item_sk#10] +Right keys [2]: [sr_ticket_number#10, sr_item_sk#9] Join condition: None (13) Filter [codegen id : 6] -Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#10, sr_ticket_number#11] -Condition : isnull(sr_ticket_number#11) +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#9, sr_ticket_number#10] +Condition : isnull(sr_ticket_number#10) (14) Project [codegen id : 6] Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] -Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#10, sr_ticket_number#11] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#9, sr_ticket_number#10] (15) ReusedExchange [Reuses operator id: 74] -Output [2]: [d_date_sk#14, d_year#15] +Output [2]: [d_date_sk#12, d_year#13] (16) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_sold_date_sk#7] -Right keys [1]: [d_date_sk#14] +Right keys [1]: [d_date_sk#12] Join condition: None (17) Project [codegen id : 6] -Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#15] -Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#14, d_year#15] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#13] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#12, d_year#13] (18) HashAggregate [codegen id : 6] -Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#15] -Keys [3]: [d_year#15, ss_item_sk#1, ss_customer_sk#2] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#13] +Keys [3]: [d_year#13, ss_item_sk#1, ss_customer_sk#2] Functions [3]: [partial_sum(ss_quantity#4), partial_sum(UnscaledValue(ss_wholesale_cost#5)), partial_sum(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [3]: [sum#16, sum#17, sum#18] -Results [6]: [d_year#15, ss_item_sk#1, ss_customer_sk#2, sum#19, sum#20, sum#21] +Aggregate Attributes [3]: [sum#14, sum#15, sum#16] +Results [6]: [d_year#13, ss_item_sk#1, ss_customer_sk#2, sum#17, sum#18, sum#19] (19) Exchange -Input [6]: [d_year#15, ss_item_sk#1, ss_customer_sk#2, sum#19, sum#20, sum#21] -Arguments: hashpartitioning(d_year#15, ss_item_sk#1, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [6]: [d_year#13, ss_item_sk#1, ss_customer_sk#2, sum#17, sum#18, sum#19] +Arguments: hashpartitioning(d_year#13, ss_item_sk#1, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 7] -Input [6]: [d_year#15, ss_item_sk#1, ss_customer_sk#2, sum#19, sum#20, sum#21] -Keys [3]: [d_year#15, ss_item_sk#1, ss_customer_sk#2] +Input [6]: [d_year#13, ss_item_sk#1, ss_customer_sk#2, sum#17, sum#18, sum#19] +Keys [3]: [d_year#13, ss_item_sk#1, ss_customer_sk#2] Functions [3]: [sum(ss_quantity#4), sum(UnscaledValue(ss_wholesale_cost#5)), sum(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [3]: [sum(ss_quantity#4)#23, sum(UnscaledValue(ss_wholesale_cost#5))#24, sum(UnscaledValue(ss_sales_price#6))#25] -Results [6]: [d_year#15 AS ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, sum(ss_quantity#4)#23 AS ss_qty#27, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#5))#24,17,2) AS ss_wc#28, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#25,17,2) AS ss_sp#29] +Aggregate Attributes [3]: [sum(ss_quantity#4)#20, sum(UnscaledValue(ss_wholesale_cost#5))#21, sum(UnscaledValue(ss_sales_price#6))#22] +Results [6]: [d_year#13 AS ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, sum(ss_quantity#4)#20 AS ss_qty#24, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#5))#21,17,2) AS ss_wc#25, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#22,17,2) AS ss_sp#26] (21) Sort [codegen id : 7] -Input [6]: [ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29] -Arguments: [ss_sold_year#26 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], false, 0 +Input [6]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26] +Arguments: [ss_sold_year#23 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], false, 0 (22) Scan parquet default.web_sales -Output [7]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_order_number#32, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36] +Output [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#36), dynamicpruningexpression(ws_sold_date_sk#36 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(ws_sold_date_sk#33), dynamicpruningexpression(ws_sold_date_sk#33 IN dynamicpruning#8)] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 8] -Input [7]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_order_number#32, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36] +Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33] (24) Filter [codegen id : 8] -Input [7]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_order_number#32, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36] -Condition : (isnotnull(ws_item_sk#30) AND isnotnull(ws_bill_customer_sk#31)) +Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33] +Condition : (isnotnull(ws_item_sk#27) AND isnotnull(ws_bill_customer_sk#28)) (25) Exchange -Input [7]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_order_number#32, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36] -Arguments: hashpartitioning(ws_order_number#32, ws_item_sk#30, 5), ENSURE_REQUIREMENTS, [id=#37] +Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33] +Arguments: hashpartitioning(ws_order_number#29, ws_item_sk#27, 5), ENSURE_REQUIREMENTS, [plan_id=4] (26) Sort [codegen id : 9] -Input [7]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_order_number#32, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36] -Arguments: [ws_order_number#32 ASC NULLS FIRST, ws_item_sk#30 ASC NULLS FIRST], false, 0 +Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33] +Arguments: [ws_order_number#29 ASC NULLS FIRST, ws_item_sk#27 ASC NULLS FIRST], false, 0 (27) Scan parquet default.web_returns -Output [3]: [wr_item_sk#38, wr_order_number#39, wr_returned_date_sk#40] +Output [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 10] -Input [3]: [wr_item_sk#38, wr_order_number#39, wr_returned_date_sk#40] +Input [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36] (29) Filter [codegen id : 10] -Input [3]: [wr_item_sk#38, wr_order_number#39, wr_returned_date_sk#40] -Condition : (isnotnull(wr_order_number#39) AND isnotnull(wr_item_sk#38)) +Input [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36] +Condition : (isnotnull(wr_order_number#35) AND isnotnull(wr_item_sk#34)) (30) Project [codegen id : 10] -Output [2]: [wr_item_sk#38, wr_order_number#39] -Input [3]: [wr_item_sk#38, wr_order_number#39, wr_returned_date_sk#40] +Output [2]: [wr_item_sk#34, wr_order_number#35] +Input [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36] (31) Exchange -Input [2]: [wr_item_sk#38, wr_order_number#39] -Arguments: hashpartitioning(wr_order_number#39, wr_item_sk#38, 5), ENSURE_REQUIREMENTS, [id=#41] +Input [2]: [wr_item_sk#34, wr_order_number#35] +Arguments: hashpartitioning(wr_order_number#35, wr_item_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=5] (32) Sort [codegen id : 11] -Input [2]: [wr_item_sk#38, wr_order_number#39] -Arguments: [wr_order_number#39 ASC NULLS FIRST, wr_item_sk#38 ASC NULLS FIRST], false, 0 +Input [2]: [wr_item_sk#34, wr_order_number#35] +Arguments: [wr_order_number#35 ASC NULLS FIRST, wr_item_sk#34 ASC NULLS FIRST], false, 0 (33) SortMergeJoin [codegen id : 13] -Left keys [2]: [ws_order_number#32, ws_item_sk#30] -Right keys [2]: [wr_order_number#39, wr_item_sk#38] +Left keys [2]: [ws_order_number#29, ws_item_sk#27] +Right keys [2]: [wr_order_number#35, wr_item_sk#34] Join condition: None (34) Filter [codegen id : 13] -Input [9]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_order_number#32, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36, wr_item_sk#38, wr_order_number#39] -Condition : isnull(wr_order_number#39) +Input [9]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33, wr_item_sk#34, wr_order_number#35] +Condition : isnull(wr_order_number#35) (35) Project [codegen id : 13] -Output [6]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36] -Input [9]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_order_number#32, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36, wr_item_sk#38, wr_order_number#39] +Output [6]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33] +Input [9]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33, wr_item_sk#34, wr_order_number#35] (36) ReusedExchange [Reuses operator id: 74] -Output [2]: [d_date_sk#42, d_year#43] +Output [2]: [d_date_sk#37, d_year#38] (37) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ws_sold_date_sk#36] -Right keys [1]: [d_date_sk#42] +Left keys [1]: [ws_sold_date_sk#33] +Right keys [1]: [d_date_sk#37] Join condition: None (38) Project [codegen id : 13] -Output [6]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, d_year#43] -Input [8]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36, d_date_sk#42, d_year#43] +Output [6]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, d_year#38] +Input [8]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33, d_date_sk#37, d_year#38] (39) HashAggregate [codegen id : 13] -Input [6]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, d_year#43] -Keys [3]: [d_year#43, ws_item_sk#30, ws_bill_customer_sk#31] -Functions [3]: [partial_sum(ws_quantity#33), partial_sum(UnscaledValue(ws_wholesale_cost#34)), partial_sum(UnscaledValue(ws_sales_price#35))] -Aggregate Attributes [3]: [sum#44, sum#45, sum#46] -Results [6]: [d_year#43, ws_item_sk#30, ws_bill_customer_sk#31, sum#47, sum#48, sum#49] +Input [6]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, d_year#38] +Keys [3]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28] +Functions [3]: [partial_sum(ws_quantity#30), partial_sum(UnscaledValue(ws_wholesale_cost#31)), partial_sum(UnscaledValue(ws_sales_price#32))] +Aggregate Attributes [3]: [sum#39, sum#40, sum#41] +Results [6]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28, sum#42, sum#43, sum#44] (40) Exchange -Input [6]: [d_year#43, ws_item_sk#30, ws_bill_customer_sk#31, sum#47, sum#48, sum#49] -Arguments: hashpartitioning(d_year#43, ws_item_sk#30, ws_bill_customer_sk#31, 5), ENSURE_REQUIREMENTS, [id=#50] +Input [6]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28, sum#42, sum#43, sum#44] +Arguments: hashpartitioning(d_year#38, ws_item_sk#27, ws_bill_customer_sk#28, 5), ENSURE_REQUIREMENTS, [plan_id=6] (41) HashAggregate [codegen id : 14] -Input [6]: [d_year#43, ws_item_sk#30, ws_bill_customer_sk#31, sum#47, sum#48, sum#49] -Keys [3]: [d_year#43, ws_item_sk#30, ws_bill_customer_sk#31] -Functions [3]: [sum(ws_quantity#33), sum(UnscaledValue(ws_wholesale_cost#34)), sum(UnscaledValue(ws_sales_price#35))] -Aggregate Attributes [3]: [sum(ws_quantity#33)#51, sum(UnscaledValue(ws_wholesale_cost#34))#52, sum(UnscaledValue(ws_sales_price#35))#53] -Results [6]: [d_year#43 AS ws_sold_year#54, ws_item_sk#30, ws_bill_customer_sk#31 AS ws_customer_sk#55, sum(ws_quantity#33)#51 AS ws_qty#56, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#34))#52,17,2) AS ws_wc#57, MakeDecimal(sum(UnscaledValue(ws_sales_price#35))#53,17,2) AS ws_sp#58] +Input [6]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28, sum#42, sum#43, sum#44] +Keys [3]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28] +Functions [3]: [sum(ws_quantity#30), sum(UnscaledValue(ws_wholesale_cost#31)), sum(UnscaledValue(ws_sales_price#32))] +Aggregate Attributes [3]: [sum(ws_quantity#30)#45, sum(UnscaledValue(ws_wholesale_cost#31))#46, sum(UnscaledValue(ws_sales_price#32))#47] +Results [6]: [d_year#38 AS ws_sold_year#48, ws_item_sk#27, ws_bill_customer_sk#28 AS ws_customer_sk#49, sum(ws_quantity#30)#45 AS ws_qty#50, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#31))#46,17,2) AS ws_wc#51, MakeDecimal(sum(UnscaledValue(ws_sales_price#32))#47,17,2) AS ws_sp#52] (42) Filter [codegen id : 14] -Input [6]: [ws_sold_year#54, ws_item_sk#30, ws_customer_sk#55, ws_qty#56, ws_wc#57, ws_sp#58] -Condition : (coalesce(ws_qty#56, 0) > 0) +Input [6]: [ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] +Condition : (coalesce(ws_qty#50, 0) > 0) (43) Sort [codegen id : 14] -Input [6]: [ws_sold_year#54, ws_item_sk#30, ws_customer_sk#55, ws_qty#56, ws_wc#57, ws_sp#58] -Arguments: [ws_sold_year#54 ASC NULLS FIRST, ws_item_sk#30 ASC NULLS FIRST, ws_customer_sk#55 ASC NULLS FIRST], false, 0 +Input [6]: [ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] +Arguments: [ws_sold_year#48 ASC NULLS FIRST, ws_item_sk#27 ASC NULLS FIRST, ws_customer_sk#49 ASC NULLS FIRST], false, 0 (44) SortMergeJoin [codegen id : 15] -Left keys [3]: [ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2] -Right keys [3]: [ws_sold_year#54, ws_item_sk#30, ws_customer_sk#55] +Left keys [3]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2] +Right keys [3]: [ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49] Join condition: None (45) Project [codegen id : 15] -Output [9]: [ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29, ws_qty#56, ws_wc#57, ws_sp#58] -Input [12]: [ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29, ws_sold_year#54, ws_item_sk#30, ws_customer_sk#55, ws_qty#56, ws_wc#57, ws_sp#58] +Output [9]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_qty#50, ws_wc#51, ws_sp#52] +Input [12]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] (46) Scan parquet default.catalog_sales -Output [7]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_order_number#61, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65] +Output [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#65), dynamicpruningexpression(cs_sold_date_sk#65 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(cs_sold_date_sk#59), dynamicpruningexpression(cs_sold_date_sk#59 IN dynamicpruning#8)] PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] ReadSchema: struct (47) ColumnarToRow [codegen id : 16] -Input [7]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_order_number#61, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65] +Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59] (48) Filter [codegen id : 16] -Input [7]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_order_number#61, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65] -Condition : (isnotnull(cs_item_sk#60) AND isnotnull(cs_bill_customer_sk#59)) +Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59] +Condition : (isnotnull(cs_item_sk#54) AND isnotnull(cs_bill_customer_sk#53)) (49) Exchange -Input [7]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_order_number#61, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65] -Arguments: hashpartitioning(cs_order_number#61, cs_item_sk#60, 5), ENSURE_REQUIREMENTS, [id=#66] +Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59] +Arguments: hashpartitioning(cs_order_number#55, cs_item_sk#54, 5), ENSURE_REQUIREMENTS, [plan_id=7] (50) Sort [codegen id : 17] -Input [7]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_order_number#61, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65] -Arguments: [cs_order_number#61 ASC NULLS FIRST, cs_item_sk#60 ASC NULLS FIRST], false, 0 +Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59] +Arguments: [cs_order_number#55 ASC NULLS FIRST, cs_item_sk#54 ASC NULLS FIRST], false, 0 (51) Scan parquet default.catalog_returns -Output [3]: [cr_item_sk#67, cr_order_number#68, cr_returned_date_sk#69] +Output [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] ReadSchema: struct (52) ColumnarToRow [codegen id : 18] -Input [3]: [cr_item_sk#67, cr_order_number#68, cr_returned_date_sk#69] +Input [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62] (53) Filter [codegen id : 18] -Input [3]: [cr_item_sk#67, cr_order_number#68, cr_returned_date_sk#69] -Condition : (isnotnull(cr_order_number#68) AND isnotnull(cr_item_sk#67)) +Input [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62] +Condition : (isnotnull(cr_order_number#61) AND isnotnull(cr_item_sk#60)) (54) Project [codegen id : 18] -Output [2]: [cr_item_sk#67, cr_order_number#68] -Input [3]: [cr_item_sk#67, cr_order_number#68, cr_returned_date_sk#69] +Output [2]: [cr_item_sk#60, cr_order_number#61] +Input [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62] (55) Exchange -Input [2]: [cr_item_sk#67, cr_order_number#68] -Arguments: hashpartitioning(cr_order_number#68, cr_item_sk#67, 5), ENSURE_REQUIREMENTS, [id=#70] +Input [2]: [cr_item_sk#60, cr_order_number#61] +Arguments: hashpartitioning(cr_order_number#61, cr_item_sk#60, 5), ENSURE_REQUIREMENTS, [plan_id=8] (56) Sort [codegen id : 19] -Input [2]: [cr_item_sk#67, cr_order_number#68] -Arguments: [cr_order_number#68 ASC NULLS FIRST, cr_item_sk#67 ASC NULLS FIRST], false, 0 +Input [2]: [cr_item_sk#60, cr_order_number#61] +Arguments: [cr_order_number#61 ASC NULLS FIRST, cr_item_sk#60 ASC NULLS FIRST], false, 0 (57) SortMergeJoin [codegen id : 21] -Left keys [2]: [cs_order_number#61, cs_item_sk#60] -Right keys [2]: [cr_order_number#68, cr_item_sk#67] +Left keys [2]: [cs_order_number#55, cs_item_sk#54] +Right keys [2]: [cr_order_number#61, cr_item_sk#60] Join condition: None (58) Filter [codegen id : 21] -Input [9]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_order_number#61, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65, cr_item_sk#67, cr_order_number#68] -Condition : isnull(cr_order_number#68) +Input [9]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59, cr_item_sk#60, cr_order_number#61] +Condition : isnull(cr_order_number#61) (59) Project [codegen id : 21] -Output [6]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65] -Input [9]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_order_number#61, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65, cr_item_sk#67, cr_order_number#68] +Output [6]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59] +Input [9]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59, cr_item_sk#60, cr_order_number#61] (60) ReusedExchange [Reuses operator id: 74] -Output [2]: [d_date_sk#71, d_year#72] +Output [2]: [d_date_sk#63, d_year#64] (61) BroadcastHashJoin [codegen id : 21] -Left keys [1]: [cs_sold_date_sk#65] -Right keys [1]: [d_date_sk#71] +Left keys [1]: [cs_sold_date_sk#59] +Right keys [1]: [d_date_sk#63] Join condition: None (62) Project [codegen id : 21] -Output [6]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, d_year#72] -Input [8]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65, d_date_sk#71, d_year#72] +Output [6]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, d_year#64] +Input [8]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59, d_date_sk#63, d_year#64] (63) HashAggregate [codegen id : 21] -Input [6]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, d_year#72] -Keys [3]: [d_year#72, cs_item_sk#60, cs_bill_customer_sk#59] -Functions [3]: [partial_sum(cs_quantity#62), partial_sum(UnscaledValue(cs_wholesale_cost#63)), partial_sum(UnscaledValue(cs_sales_price#64))] -Aggregate Attributes [3]: [sum#73, sum#74, sum#75] -Results [6]: [d_year#72, cs_item_sk#60, cs_bill_customer_sk#59, sum#76, sum#77, sum#78] +Input [6]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, d_year#64] +Keys [3]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53] +Functions [3]: [partial_sum(cs_quantity#56), partial_sum(UnscaledValue(cs_wholesale_cost#57)), partial_sum(UnscaledValue(cs_sales_price#58))] +Aggregate Attributes [3]: [sum#65, sum#66, sum#67] +Results [6]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53, sum#68, sum#69, sum#70] (64) Exchange -Input [6]: [d_year#72, cs_item_sk#60, cs_bill_customer_sk#59, sum#76, sum#77, sum#78] -Arguments: hashpartitioning(d_year#72, cs_item_sk#60, cs_bill_customer_sk#59, 5), ENSURE_REQUIREMENTS, [id=#79] +Input [6]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53, sum#68, sum#69, sum#70] +Arguments: hashpartitioning(d_year#64, cs_item_sk#54, cs_bill_customer_sk#53, 5), ENSURE_REQUIREMENTS, [plan_id=9] (65) HashAggregate [codegen id : 22] -Input [6]: [d_year#72, cs_item_sk#60, cs_bill_customer_sk#59, sum#76, sum#77, sum#78] -Keys [3]: [d_year#72, cs_item_sk#60, cs_bill_customer_sk#59] -Functions [3]: [sum(cs_quantity#62), sum(UnscaledValue(cs_wholesale_cost#63)), sum(UnscaledValue(cs_sales_price#64))] -Aggregate Attributes [3]: [sum(cs_quantity#62)#80, sum(UnscaledValue(cs_wholesale_cost#63))#81, sum(UnscaledValue(cs_sales_price#64))#82] -Results [6]: [d_year#72 AS cs_sold_year#83, cs_item_sk#60, cs_bill_customer_sk#59 AS cs_customer_sk#84, sum(cs_quantity#62)#80 AS cs_qty#85, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#63))#81,17,2) AS cs_wc#86, MakeDecimal(sum(UnscaledValue(cs_sales_price#64))#82,17,2) AS cs_sp#87] +Input [6]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53, sum#68, sum#69, sum#70] +Keys [3]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53] +Functions [3]: [sum(cs_quantity#56), sum(UnscaledValue(cs_wholesale_cost#57)), sum(UnscaledValue(cs_sales_price#58))] +Aggregate Attributes [3]: [sum(cs_quantity#56)#71, sum(UnscaledValue(cs_wholesale_cost#57))#72, sum(UnscaledValue(cs_sales_price#58))#73] +Results [6]: [d_year#64 AS cs_sold_year#74, cs_item_sk#54, cs_bill_customer_sk#53 AS cs_customer_sk#75, sum(cs_quantity#56)#71 AS cs_qty#76, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#57))#72,17,2) AS cs_wc#77, MakeDecimal(sum(UnscaledValue(cs_sales_price#58))#73,17,2) AS cs_sp#78] (66) Filter [codegen id : 22] -Input [6]: [cs_sold_year#83, cs_item_sk#60, cs_customer_sk#84, cs_qty#85, cs_wc#86, cs_sp#87] -Condition : (coalesce(cs_qty#85, 0) > 0) +Input [6]: [cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] +Condition : (coalesce(cs_qty#76, 0) > 0) (67) Sort [codegen id : 22] -Input [6]: [cs_sold_year#83, cs_item_sk#60, cs_customer_sk#84, cs_qty#85, cs_wc#86, cs_sp#87] -Arguments: [cs_sold_year#83 ASC NULLS FIRST, cs_item_sk#60 ASC NULLS FIRST, cs_customer_sk#84 ASC NULLS FIRST], false, 0 +Input [6]: [cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] +Arguments: [cs_sold_year#74 ASC NULLS FIRST, cs_item_sk#54 ASC NULLS FIRST, cs_customer_sk#75 ASC NULLS FIRST], false, 0 (68) SortMergeJoin [codegen id : 23] -Left keys [3]: [ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2] -Right keys [3]: [cs_sold_year#83, cs_item_sk#60, cs_customer_sk#84] +Left keys [3]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2] +Right keys [3]: [cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75] Join condition: None (69) Project [codegen id : 23] -Output [13]: [round((cast(ss_qty#27 as double) / cast(coalesce((ws_qty#56 + cs_qty#85), 1) as double)), 2) AS ratio#88, ss_qty#27 AS store_qty#89, ss_wc#28 AS store_wholesale_cost#90, ss_sp#29 AS store_sales_price#91, (coalesce(ws_qty#56, 0) + coalesce(cs_qty#85, 0)) AS other_chan_qty#92, CheckOverflow((promote_precision(cast(coalesce(ws_wc#57, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#86, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_wholesale_cost#93, CheckOverflow((promote_precision(cast(coalesce(ws_sp#58, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#87, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_sales_price#94, ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29] -Input [15]: [ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29, ws_qty#56, ws_wc#57, ws_sp#58, cs_sold_year#83, cs_item_sk#60, cs_customer_sk#84, cs_qty#85, cs_wc#86, cs_sp#87] +Output [13]: [round((cast(ss_qty#24 as double) / cast(coalesce((ws_qty#50 + cs_qty#76), 1) as double)), 2) AS ratio#79, ss_qty#24 AS store_qty#80, ss_wc#25 AS store_wholesale_cost#81, ss_sp#26 AS store_sales_price#82, (coalesce(ws_qty#50, 0) + coalesce(cs_qty#76, 0)) AS other_chan_qty#83, CheckOverflow((promote_precision(cast(coalesce(ws_wc#51, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#77, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_wholesale_cost#84, CheckOverflow((promote_precision(cast(coalesce(ws_sp#52, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#78, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_sales_price#85, ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26] +Input [15]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_qty#50, ws_wc#51, ws_sp#52, cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] (70) TakeOrderedAndProject -Input [13]: [ratio#88, store_qty#89, store_wholesale_cost#90, store_sales_price#91, other_chan_qty#92, other_chan_wholesale_cost#93, other_chan_sales_price#94, ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29] -Arguments: 100, [ss_sold_year#26 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST, ss_qty#27 DESC NULLS LAST, ss_wc#28 DESC NULLS LAST, ss_sp#29 DESC NULLS LAST, other_chan_qty#92 ASC NULLS FIRST, other_chan_wholesale_cost#93 ASC NULLS FIRST, other_chan_sales_price#94 ASC NULLS FIRST, ratio#88 ASC NULLS FIRST], [ratio#88, store_qty#89, store_wholesale_cost#90, store_sales_price#91, other_chan_qty#92, other_chan_wholesale_cost#93, other_chan_sales_price#94] +Input [13]: [ratio#79, store_qty#80, store_wholesale_cost#81, store_sales_price#82, other_chan_qty#83, other_chan_wholesale_cost#84, other_chan_sales_price#85, ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26] +Arguments: 100, [ss_sold_year#23 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST, ss_qty#24 DESC NULLS LAST, ss_wc#25 DESC NULLS LAST, ss_sp#26 DESC NULLS LAST, other_chan_qty#83 ASC NULLS FIRST, other_chan_wholesale_cost#84 ASC NULLS FIRST, other_chan_sales_price#85 ASC NULLS FIRST, ratio#79 ASC NULLS FIRST], [ratio#79, store_qty#80, store_wholesale_cost#81, store_sales_price#82, other_chan_qty#83, other_chan_wholesale_cost#84, other_chan_sales_price#85] ===== Subqueries ===== @@ -399,25 +399,25 @@ BroadcastExchange (74) (71) Scan parquet default.date_dim -Output [2]: [d_date_sk#14, d_year#15] +Output [2]: [d_date_sk#12, d_year#13] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (72) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#14, d_year#15] +Input [2]: [d_date_sk#12, d_year#13] (73) Filter [codegen id : 1] -Input [2]: [d_date_sk#14, d_year#15] -Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2000)) AND isnotnull(d_date_sk#14)) +Input [2]: [d_date_sk#12, d_year#13] +Condition : ((isnotnull(d_year#13) AND (d_year#13 = 2000)) AND isnotnull(d_date_sk#12)) (74) BroadcastExchange -Input [2]: [d_date_sk#14, d_year#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#95] +Input [2]: [d_date_sk#12, d_year#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] -Subquery:2 Hosting operator id = 22 Hosting Expression = ws_sold_date_sk#36 IN dynamicpruning#8 +Subquery:2 Hosting operator id = 22 Hosting Expression = ws_sold_date_sk#33 IN dynamicpruning#8 -Subquery:3 Hosting operator id = 46 Hosting Expression = cs_sold_date_sk#65 IN dynamicpruning#8 +Subquery:3 Hosting operator id = 46 Hosting Expression = cs_sold_date_sk#59 IN dynamicpruning#8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/explain.txt index 133d5272ec111..1be531c232011 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/explain.txt @@ -88,306 +88,306 @@ Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_customer_sk#2)) (4) Exchange Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] -Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] Arguments: [ss_ticket_number#3 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0 (6) Scan parquet default.store_returns -Output [3]: [sr_item_sk#10, sr_ticket_number#11, sr_returned_date_sk#12] +Output [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [3]: [sr_item_sk#10, sr_ticket_number#11, sr_returned_date_sk#12] +Input [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11] (8) Filter [codegen id : 3] -Input [3]: [sr_item_sk#10, sr_ticket_number#11, sr_returned_date_sk#12] -Condition : (isnotnull(sr_ticket_number#11) AND isnotnull(sr_item_sk#10)) +Input [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11] +Condition : (isnotnull(sr_ticket_number#10) AND isnotnull(sr_item_sk#9)) (9) Project [codegen id : 3] -Output [2]: [sr_item_sk#10, sr_ticket_number#11] -Input [3]: [sr_item_sk#10, sr_ticket_number#11, sr_returned_date_sk#12] +Output [2]: [sr_item_sk#9, sr_ticket_number#10] +Input [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11] (10) Exchange -Input [2]: [sr_item_sk#10, sr_ticket_number#11] -Arguments: hashpartitioning(sr_ticket_number#11, sr_item_sk#10, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [2]: [sr_item_sk#9, sr_ticket_number#10] +Arguments: hashpartitioning(sr_ticket_number#10, sr_item_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [2]: [sr_item_sk#10, sr_ticket_number#11] -Arguments: [sr_ticket_number#11 ASC NULLS FIRST, sr_item_sk#10 ASC NULLS FIRST], false, 0 +Input [2]: [sr_item_sk#9, sr_ticket_number#10] +Arguments: [sr_ticket_number#10 ASC NULLS FIRST, sr_item_sk#9 ASC NULLS FIRST], false, 0 (12) SortMergeJoin [codegen id : 6] Left keys [2]: [ss_ticket_number#3, ss_item_sk#1] -Right keys [2]: [sr_ticket_number#11, sr_item_sk#10] +Right keys [2]: [sr_ticket_number#10, sr_item_sk#9] Join condition: None (13) Filter [codegen id : 6] -Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#10, sr_ticket_number#11] -Condition : isnull(sr_ticket_number#11) +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#9, sr_ticket_number#10] +Condition : isnull(sr_ticket_number#10) (14) Project [codegen id : 6] Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7] -Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#10, sr_ticket_number#11] +Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#9, sr_ticket_number#10] (15) ReusedExchange [Reuses operator id: 74] -Output [2]: [d_date_sk#14, d_year#15] +Output [2]: [d_date_sk#12, d_year#13] (16) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_sold_date_sk#7] -Right keys [1]: [d_date_sk#14] +Right keys [1]: [d_date_sk#12] Join condition: None (17) Project [codegen id : 6] -Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#15] -Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#14, d_year#15] +Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#13] +Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#12, d_year#13] (18) HashAggregate [codegen id : 6] -Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#15] -Keys [3]: [d_year#15, ss_item_sk#1, ss_customer_sk#2] +Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#13] +Keys [3]: [d_year#13, ss_item_sk#1, ss_customer_sk#2] Functions [3]: [partial_sum(ss_quantity#4), partial_sum(UnscaledValue(ss_wholesale_cost#5)), partial_sum(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [3]: [sum#16, sum#17, sum#18] -Results [6]: [d_year#15, ss_item_sk#1, ss_customer_sk#2, sum#19, sum#20, sum#21] +Aggregate Attributes [3]: [sum#14, sum#15, sum#16] +Results [6]: [d_year#13, ss_item_sk#1, ss_customer_sk#2, sum#17, sum#18, sum#19] (19) Exchange -Input [6]: [d_year#15, ss_item_sk#1, ss_customer_sk#2, sum#19, sum#20, sum#21] -Arguments: hashpartitioning(d_year#15, ss_item_sk#1, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#22] +Input [6]: [d_year#13, ss_item_sk#1, ss_customer_sk#2, sum#17, sum#18, sum#19] +Arguments: hashpartitioning(d_year#13, ss_item_sk#1, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 7] -Input [6]: [d_year#15, ss_item_sk#1, ss_customer_sk#2, sum#19, sum#20, sum#21] -Keys [3]: [d_year#15, ss_item_sk#1, ss_customer_sk#2] +Input [6]: [d_year#13, ss_item_sk#1, ss_customer_sk#2, sum#17, sum#18, sum#19] +Keys [3]: [d_year#13, ss_item_sk#1, ss_customer_sk#2] Functions [3]: [sum(ss_quantity#4), sum(UnscaledValue(ss_wholesale_cost#5)), sum(UnscaledValue(ss_sales_price#6))] -Aggregate Attributes [3]: [sum(ss_quantity#4)#23, sum(UnscaledValue(ss_wholesale_cost#5))#24, sum(UnscaledValue(ss_sales_price#6))#25] -Results [6]: [d_year#15 AS ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, sum(ss_quantity#4)#23 AS ss_qty#27, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#5))#24,17,2) AS ss_wc#28, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#25,17,2) AS ss_sp#29] +Aggregate Attributes [3]: [sum(ss_quantity#4)#20, sum(UnscaledValue(ss_wholesale_cost#5))#21, sum(UnscaledValue(ss_sales_price#6))#22] +Results [6]: [d_year#13 AS ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, sum(ss_quantity#4)#20 AS ss_qty#24, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#5))#21,17,2) AS ss_wc#25, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#22,17,2) AS ss_sp#26] (21) Sort [codegen id : 7] -Input [6]: [ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29] -Arguments: [ss_sold_year#26 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], false, 0 +Input [6]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26] +Arguments: [ss_sold_year#23 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], false, 0 (22) Scan parquet default.web_sales -Output [7]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_order_number#32, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36] +Output [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#36), dynamicpruningexpression(ws_sold_date_sk#36 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(ws_sold_date_sk#33), dynamicpruningexpression(ws_sold_date_sk#33 IN dynamicpruning#8)] PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 8] -Input [7]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_order_number#32, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36] +Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33] (24) Filter [codegen id : 8] -Input [7]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_order_number#32, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36] -Condition : (isnotnull(ws_item_sk#30) AND isnotnull(ws_bill_customer_sk#31)) +Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33] +Condition : (isnotnull(ws_item_sk#27) AND isnotnull(ws_bill_customer_sk#28)) (25) Exchange -Input [7]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_order_number#32, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36] -Arguments: hashpartitioning(ws_order_number#32, ws_item_sk#30, 5), ENSURE_REQUIREMENTS, [id=#37] +Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33] +Arguments: hashpartitioning(ws_order_number#29, ws_item_sk#27, 5), ENSURE_REQUIREMENTS, [plan_id=4] (26) Sort [codegen id : 9] -Input [7]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_order_number#32, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36] -Arguments: [ws_order_number#32 ASC NULLS FIRST, ws_item_sk#30 ASC NULLS FIRST], false, 0 +Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33] +Arguments: [ws_order_number#29 ASC NULLS FIRST, ws_item_sk#27 ASC NULLS FIRST], false, 0 (27) Scan parquet default.web_returns -Output [3]: [wr_item_sk#38, wr_order_number#39, wr_returned_date_sk#40] +Output [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)] ReadSchema: struct (28) ColumnarToRow [codegen id : 10] -Input [3]: [wr_item_sk#38, wr_order_number#39, wr_returned_date_sk#40] +Input [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36] (29) Filter [codegen id : 10] -Input [3]: [wr_item_sk#38, wr_order_number#39, wr_returned_date_sk#40] -Condition : (isnotnull(wr_order_number#39) AND isnotnull(wr_item_sk#38)) +Input [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36] +Condition : (isnotnull(wr_order_number#35) AND isnotnull(wr_item_sk#34)) (30) Project [codegen id : 10] -Output [2]: [wr_item_sk#38, wr_order_number#39] -Input [3]: [wr_item_sk#38, wr_order_number#39, wr_returned_date_sk#40] +Output [2]: [wr_item_sk#34, wr_order_number#35] +Input [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36] (31) Exchange -Input [2]: [wr_item_sk#38, wr_order_number#39] -Arguments: hashpartitioning(wr_order_number#39, wr_item_sk#38, 5), ENSURE_REQUIREMENTS, [id=#41] +Input [2]: [wr_item_sk#34, wr_order_number#35] +Arguments: hashpartitioning(wr_order_number#35, wr_item_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=5] (32) Sort [codegen id : 11] -Input [2]: [wr_item_sk#38, wr_order_number#39] -Arguments: [wr_order_number#39 ASC NULLS FIRST, wr_item_sk#38 ASC NULLS FIRST], false, 0 +Input [2]: [wr_item_sk#34, wr_order_number#35] +Arguments: [wr_order_number#35 ASC NULLS FIRST, wr_item_sk#34 ASC NULLS FIRST], false, 0 (33) SortMergeJoin [codegen id : 13] -Left keys [2]: [ws_order_number#32, ws_item_sk#30] -Right keys [2]: [wr_order_number#39, wr_item_sk#38] +Left keys [2]: [ws_order_number#29, ws_item_sk#27] +Right keys [2]: [wr_order_number#35, wr_item_sk#34] Join condition: None (34) Filter [codegen id : 13] -Input [9]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_order_number#32, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36, wr_item_sk#38, wr_order_number#39] -Condition : isnull(wr_order_number#39) +Input [9]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33, wr_item_sk#34, wr_order_number#35] +Condition : isnull(wr_order_number#35) (35) Project [codegen id : 13] -Output [6]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36] -Input [9]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_order_number#32, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36, wr_item_sk#38, wr_order_number#39] +Output [6]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33] +Input [9]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33, wr_item_sk#34, wr_order_number#35] (36) ReusedExchange [Reuses operator id: 74] -Output [2]: [d_date_sk#42, d_year#43] +Output [2]: [d_date_sk#37, d_year#38] (37) BroadcastHashJoin [codegen id : 13] -Left keys [1]: [ws_sold_date_sk#36] -Right keys [1]: [d_date_sk#42] +Left keys [1]: [ws_sold_date_sk#33] +Right keys [1]: [d_date_sk#37] Join condition: None (38) Project [codegen id : 13] -Output [6]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, d_year#43] -Input [8]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, ws_sold_date_sk#36, d_date_sk#42, d_year#43] +Output [6]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, d_year#38] +Input [8]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33, d_date_sk#37, d_year#38] (39) HashAggregate [codegen id : 13] -Input [6]: [ws_item_sk#30, ws_bill_customer_sk#31, ws_quantity#33, ws_wholesale_cost#34, ws_sales_price#35, d_year#43] -Keys [3]: [d_year#43, ws_item_sk#30, ws_bill_customer_sk#31] -Functions [3]: [partial_sum(ws_quantity#33), partial_sum(UnscaledValue(ws_wholesale_cost#34)), partial_sum(UnscaledValue(ws_sales_price#35))] -Aggregate Attributes [3]: [sum#44, sum#45, sum#46] -Results [6]: [d_year#43, ws_item_sk#30, ws_bill_customer_sk#31, sum#47, sum#48, sum#49] +Input [6]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, d_year#38] +Keys [3]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28] +Functions [3]: [partial_sum(ws_quantity#30), partial_sum(UnscaledValue(ws_wholesale_cost#31)), partial_sum(UnscaledValue(ws_sales_price#32))] +Aggregate Attributes [3]: [sum#39, sum#40, sum#41] +Results [6]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28, sum#42, sum#43, sum#44] (40) Exchange -Input [6]: [d_year#43, ws_item_sk#30, ws_bill_customer_sk#31, sum#47, sum#48, sum#49] -Arguments: hashpartitioning(d_year#43, ws_item_sk#30, ws_bill_customer_sk#31, 5), ENSURE_REQUIREMENTS, [id=#50] +Input [6]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28, sum#42, sum#43, sum#44] +Arguments: hashpartitioning(d_year#38, ws_item_sk#27, ws_bill_customer_sk#28, 5), ENSURE_REQUIREMENTS, [plan_id=6] (41) HashAggregate [codegen id : 14] -Input [6]: [d_year#43, ws_item_sk#30, ws_bill_customer_sk#31, sum#47, sum#48, sum#49] -Keys [3]: [d_year#43, ws_item_sk#30, ws_bill_customer_sk#31] -Functions [3]: [sum(ws_quantity#33), sum(UnscaledValue(ws_wholesale_cost#34)), sum(UnscaledValue(ws_sales_price#35))] -Aggregate Attributes [3]: [sum(ws_quantity#33)#51, sum(UnscaledValue(ws_wholesale_cost#34))#52, sum(UnscaledValue(ws_sales_price#35))#53] -Results [6]: [d_year#43 AS ws_sold_year#54, ws_item_sk#30, ws_bill_customer_sk#31 AS ws_customer_sk#55, sum(ws_quantity#33)#51 AS ws_qty#56, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#34))#52,17,2) AS ws_wc#57, MakeDecimal(sum(UnscaledValue(ws_sales_price#35))#53,17,2) AS ws_sp#58] +Input [6]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28, sum#42, sum#43, sum#44] +Keys [3]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28] +Functions [3]: [sum(ws_quantity#30), sum(UnscaledValue(ws_wholesale_cost#31)), sum(UnscaledValue(ws_sales_price#32))] +Aggregate Attributes [3]: [sum(ws_quantity#30)#45, sum(UnscaledValue(ws_wholesale_cost#31))#46, sum(UnscaledValue(ws_sales_price#32))#47] +Results [6]: [d_year#38 AS ws_sold_year#48, ws_item_sk#27, ws_bill_customer_sk#28 AS ws_customer_sk#49, sum(ws_quantity#30)#45 AS ws_qty#50, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#31))#46,17,2) AS ws_wc#51, MakeDecimal(sum(UnscaledValue(ws_sales_price#32))#47,17,2) AS ws_sp#52] (42) Filter [codegen id : 14] -Input [6]: [ws_sold_year#54, ws_item_sk#30, ws_customer_sk#55, ws_qty#56, ws_wc#57, ws_sp#58] -Condition : (coalesce(ws_qty#56, 0) > 0) +Input [6]: [ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] +Condition : (coalesce(ws_qty#50, 0) > 0) (43) Sort [codegen id : 14] -Input [6]: [ws_sold_year#54, ws_item_sk#30, ws_customer_sk#55, ws_qty#56, ws_wc#57, ws_sp#58] -Arguments: [ws_sold_year#54 ASC NULLS FIRST, ws_item_sk#30 ASC NULLS FIRST, ws_customer_sk#55 ASC NULLS FIRST], false, 0 +Input [6]: [ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] +Arguments: [ws_sold_year#48 ASC NULLS FIRST, ws_item_sk#27 ASC NULLS FIRST, ws_customer_sk#49 ASC NULLS FIRST], false, 0 (44) SortMergeJoin [codegen id : 15] -Left keys [3]: [ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2] -Right keys [3]: [ws_sold_year#54, ws_item_sk#30, ws_customer_sk#55] +Left keys [3]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2] +Right keys [3]: [ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49] Join condition: None (45) Project [codegen id : 15] -Output [9]: [ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29, ws_qty#56, ws_wc#57, ws_sp#58] -Input [12]: [ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29, ws_sold_year#54, ws_item_sk#30, ws_customer_sk#55, ws_qty#56, ws_wc#57, ws_sp#58] +Output [9]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_qty#50, ws_wc#51, ws_sp#52] +Input [12]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52] (46) Scan parquet default.catalog_sales -Output [7]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_order_number#61, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65] +Output [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#65), dynamicpruningexpression(cs_sold_date_sk#65 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(cs_sold_date_sk#59), dynamicpruningexpression(cs_sold_date_sk#59 IN dynamicpruning#8)] PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)] ReadSchema: struct (47) ColumnarToRow [codegen id : 16] -Input [7]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_order_number#61, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65] +Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59] (48) Filter [codegen id : 16] -Input [7]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_order_number#61, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65] -Condition : (isnotnull(cs_item_sk#60) AND isnotnull(cs_bill_customer_sk#59)) +Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59] +Condition : (isnotnull(cs_item_sk#54) AND isnotnull(cs_bill_customer_sk#53)) (49) Exchange -Input [7]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_order_number#61, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65] -Arguments: hashpartitioning(cs_order_number#61, cs_item_sk#60, 5), ENSURE_REQUIREMENTS, [id=#66] +Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59] +Arguments: hashpartitioning(cs_order_number#55, cs_item_sk#54, 5), ENSURE_REQUIREMENTS, [plan_id=7] (50) Sort [codegen id : 17] -Input [7]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_order_number#61, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65] -Arguments: [cs_order_number#61 ASC NULLS FIRST, cs_item_sk#60 ASC NULLS FIRST], false, 0 +Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59] +Arguments: [cs_order_number#55 ASC NULLS FIRST, cs_item_sk#54 ASC NULLS FIRST], false, 0 (51) Scan parquet default.catalog_returns -Output [3]: [cr_item_sk#67, cr_order_number#68, cr_returned_date_sk#69] +Output [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)] ReadSchema: struct (52) ColumnarToRow [codegen id : 18] -Input [3]: [cr_item_sk#67, cr_order_number#68, cr_returned_date_sk#69] +Input [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62] (53) Filter [codegen id : 18] -Input [3]: [cr_item_sk#67, cr_order_number#68, cr_returned_date_sk#69] -Condition : (isnotnull(cr_order_number#68) AND isnotnull(cr_item_sk#67)) +Input [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62] +Condition : (isnotnull(cr_order_number#61) AND isnotnull(cr_item_sk#60)) (54) Project [codegen id : 18] -Output [2]: [cr_item_sk#67, cr_order_number#68] -Input [3]: [cr_item_sk#67, cr_order_number#68, cr_returned_date_sk#69] +Output [2]: [cr_item_sk#60, cr_order_number#61] +Input [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62] (55) Exchange -Input [2]: [cr_item_sk#67, cr_order_number#68] -Arguments: hashpartitioning(cr_order_number#68, cr_item_sk#67, 5), ENSURE_REQUIREMENTS, [id=#70] +Input [2]: [cr_item_sk#60, cr_order_number#61] +Arguments: hashpartitioning(cr_order_number#61, cr_item_sk#60, 5), ENSURE_REQUIREMENTS, [plan_id=8] (56) Sort [codegen id : 19] -Input [2]: [cr_item_sk#67, cr_order_number#68] -Arguments: [cr_order_number#68 ASC NULLS FIRST, cr_item_sk#67 ASC NULLS FIRST], false, 0 +Input [2]: [cr_item_sk#60, cr_order_number#61] +Arguments: [cr_order_number#61 ASC NULLS FIRST, cr_item_sk#60 ASC NULLS FIRST], false, 0 (57) SortMergeJoin [codegen id : 21] -Left keys [2]: [cs_order_number#61, cs_item_sk#60] -Right keys [2]: [cr_order_number#68, cr_item_sk#67] +Left keys [2]: [cs_order_number#55, cs_item_sk#54] +Right keys [2]: [cr_order_number#61, cr_item_sk#60] Join condition: None (58) Filter [codegen id : 21] -Input [9]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_order_number#61, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65, cr_item_sk#67, cr_order_number#68] -Condition : isnull(cr_order_number#68) +Input [9]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59, cr_item_sk#60, cr_order_number#61] +Condition : isnull(cr_order_number#61) (59) Project [codegen id : 21] -Output [6]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65] -Input [9]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_order_number#61, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65, cr_item_sk#67, cr_order_number#68] +Output [6]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59] +Input [9]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59, cr_item_sk#60, cr_order_number#61] (60) ReusedExchange [Reuses operator id: 74] -Output [2]: [d_date_sk#71, d_year#72] +Output [2]: [d_date_sk#63, d_year#64] (61) BroadcastHashJoin [codegen id : 21] -Left keys [1]: [cs_sold_date_sk#65] -Right keys [1]: [d_date_sk#71] +Left keys [1]: [cs_sold_date_sk#59] +Right keys [1]: [d_date_sk#63] Join condition: None (62) Project [codegen id : 21] -Output [6]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, d_year#72] -Input [8]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, cs_sold_date_sk#65, d_date_sk#71, d_year#72] +Output [6]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, d_year#64] +Input [8]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59, d_date_sk#63, d_year#64] (63) HashAggregate [codegen id : 21] -Input [6]: [cs_bill_customer_sk#59, cs_item_sk#60, cs_quantity#62, cs_wholesale_cost#63, cs_sales_price#64, d_year#72] -Keys [3]: [d_year#72, cs_item_sk#60, cs_bill_customer_sk#59] -Functions [3]: [partial_sum(cs_quantity#62), partial_sum(UnscaledValue(cs_wholesale_cost#63)), partial_sum(UnscaledValue(cs_sales_price#64))] -Aggregate Attributes [3]: [sum#73, sum#74, sum#75] -Results [6]: [d_year#72, cs_item_sk#60, cs_bill_customer_sk#59, sum#76, sum#77, sum#78] +Input [6]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, d_year#64] +Keys [3]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53] +Functions [3]: [partial_sum(cs_quantity#56), partial_sum(UnscaledValue(cs_wholesale_cost#57)), partial_sum(UnscaledValue(cs_sales_price#58))] +Aggregate Attributes [3]: [sum#65, sum#66, sum#67] +Results [6]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53, sum#68, sum#69, sum#70] (64) Exchange -Input [6]: [d_year#72, cs_item_sk#60, cs_bill_customer_sk#59, sum#76, sum#77, sum#78] -Arguments: hashpartitioning(d_year#72, cs_item_sk#60, cs_bill_customer_sk#59, 5), ENSURE_REQUIREMENTS, [id=#79] +Input [6]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53, sum#68, sum#69, sum#70] +Arguments: hashpartitioning(d_year#64, cs_item_sk#54, cs_bill_customer_sk#53, 5), ENSURE_REQUIREMENTS, [plan_id=9] (65) HashAggregate [codegen id : 22] -Input [6]: [d_year#72, cs_item_sk#60, cs_bill_customer_sk#59, sum#76, sum#77, sum#78] -Keys [3]: [d_year#72, cs_item_sk#60, cs_bill_customer_sk#59] -Functions [3]: [sum(cs_quantity#62), sum(UnscaledValue(cs_wholesale_cost#63)), sum(UnscaledValue(cs_sales_price#64))] -Aggregate Attributes [3]: [sum(cs_quantity#62)#80, sum(UnscaledValue(cs_wholesale_cost#63))#81, sum(UnscaledValue(cs_sales_price#64))#82] -Results [6]: [d_year#72 AS cs_sold_year#83, cs_item_sk#60, cs_bill_customer_sk#59 AS cs_customer_sk#84, sum(cs_quantity#62)#80 AS cs_qty#85, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#63))#81,17,2) AS cs_wc#86, MakeDecimal(sum(UnscaledValue(cs_sales_price#64))#82,17,2) AS cs_sp#87] +Input [6]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53, sum#68, sum#69, sum#70] +Keys [3]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53] +Functions [3]: [sum(cs_quantity#56), sum(UnscaledValue(cs_wholesale_cost#57)), sum(UnscaledValue(cs_sales_price#58))] +Aggregate Attributes [3]: [sum(cs_quantity#56)#71, sum(UnscaledValue(cs_wholesale_cost#57))#72, sum(UnscaledValue(cs_sales_price#58))#73] +Results [6]: [d_year#64 AS cs_sold_year#74, cs_item_sk#54, cs_bill_customer_sk#53 AS cs_customer_sk#75, sum(cs_quantity#56)#71 AS cs_qty#76, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#57))#72,17,2) AS cs_wc#77, MakeDecimal(sum(UnscaledValue(cs_sales_price#58))#73,17,2) AS cs_sp#78] (66) Filter [codegen id : 22] -Input [6]: [cs_sold_year#83, cs_item_sk#60, cs_customer_sk#84, cs_qty#85, cs_wc#86, cs_sp#87] -Condition : (coalesce(cs_qty#85, 0) > 0) +Input [6]: [cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] +Condition : (coalesce(cs_qty#76, 0) > 0) (67) Sort [codegen id : 22] -Input [6]: [cs_sold_year#83, cs_item_sk#60, cs_customer_sk#84, cs_qty#85, cs_wc#86, cs_sp#87] -Arguments: [cs_sold_year#83 ASC NULLS FIRST, cs_item_sk#60 ASC NULLS FIRST, cs_customer_sk#84 ASC NULLS FIRST], false, 0 +Input [6]: [cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] +Arguments: [cs_sold_year#74 ASC NULLS FIRST, cs_item_sk#54 ASC NULLS FIRST, cs_customer_sk#75 ASC NULLS FIRST], false, 0 (68) SortMergeJoin [codegen id : 23] -Left keys [3]: [ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2] -Right keys [3]: [cs_sold_year#83, cs_item_sk#60, cs_customer_sk#84] +Left keys [3]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2] +Right keys [3]: [cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75] Join condition: None (69) Project [codegen id : 23] -Output [13]: [round((cast(ss_qty#27 as double) / cast(coalesce((ws_qty#56 + cs_qty#85), 1) as double)), 2) AS ratio#88, ss_qty#27 AS store_qty#89, ss_wc#28 AS store_wholesale_cost#90, ss_sp#29 AS store_sales_price#91, (coalesce(ws_qty#56, 0) + coalesce(cs_qty#85, 0)) AS other_chan_qty#92, CheckOverflow((promote_precision(cast(coalesce(ws_wc#57, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#86, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_wholesale_cost#93, CheckOverflow((promote_precision(cast(coalesce(ws_sp#58, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#87, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_sales_price#94, ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29] -Input [15]: [ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29, ws_qty#56, ws_wc#57, ws_sp#58, cs_sold_year#83, cs_item_sk#60, cs_customer_sk#84, cs_qty#85, cs_wc#86, cs_sp#87] +Output [13]: [round((cast(ss_qty#24 as double) / cast(coalesce((ws_qty#50 + cs_qty#76), 1) as double)), 2) AS ratio#79, ss_qty#24 AS store_qty#80, ss_wc#25 AS store_wholesale_cost#81, ss_sp#26 AS store_sales_price#82, (coalesce(ws_qty#50, 0) + coalesce(cs_qty#76, 0)) AS other_chan_qty#83, CheckOverflow((promote_precision(cast(coalesce(ws_wc#51, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#77, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_wholesale_cost#84, CheckOverflow((promote_precision(cast(coalesce(ws_sp#52, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#78, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_sales_price#85, ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26] +Input [15]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_qty#50, ws_wc#51, ws_sp#52, cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78] (70) TakeOrderedAndProject -Input [13]: [ratio#88, store_qty#89, store_wholesale_cost#90, store_sales_price#91, other_chan_qty#92, other_chan_wholesale_cost#93, other_chan_sales_price#94, ss_sold_year#26, ss_item_sk#1, ss_customer_sk#2, ss_qty#27, ss_wc#28, ss_sp#29] -Arguments: 100, [ss_sold_year#26 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST, ss_qty#27 DESC NULLS LAST, ss_wc#28 DESC NULLS LAST, ss_sp#29 DESC NULLS LAST, other_chan_qty#92 ASC NULLS FIRST, other_chan_wholesale_cost#93 ASC NULLS FIRST, other_chan_sales_price#94 ASC NULLS FIRST, ratio#88 ASC NULLS FIRST], [ratio#88, store_qty#89, store_wholesale_cost#90, store_sales_price#91, other_chan_qty#92, other_chan_wholesale_cost#93, other_chan_sales_price#94] +Input [13]: [ratio#79, store_qty#80, store_wholesale_cost#81, store_sales_price#82, other_chan_qty#83, other_chan_wholesale_cost#84, other_chan_sales_price#85, ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26] +Arguments: 100, [ss_sold_year#23 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST, ss_qty#24 DESC NULLS LAST, ss_wc#25 DESC NULLS LAST, ss_sp#26 DESC NULLS LAST, other_chan_qty#83 ASC NULLS FIRST, other_chan_wholesale_cost#84 ASC NULLS FIRST, other_chan_sales_price#85 ASC NULLS FIRST, ratio#79 ASC NULLS FIRST], [ratio#79, store_qty#80, store_wholesale_cost#81, store_sales_price#82, other_chan_qty#83, other_chan_wholesale_cost#84, other_chan_sales_price#85] ===== Subqueries ===== @@ -399,25 +399,25 @@ BroadcastExchange (74) (71) Scan parquet default.date_dim -Output [2]: [d_date_sk#14, d_year#15] +Output [2]: [d_date_sk#12, d_year#13] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)] ReadSchema: struct (72) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#14, d_year#15] +Input [2]: [d_date_sk#12, d_year#13] (73) Filter [codegen id : 1] -Input [2]: [d_date_sk#14, d_year#15] -Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2000)) AND isnotnull(d_date_sk#14)) +Input [2]: [d_date_sk#12, d_year#13] +Condition : ((isnotnull(d_year#13) AND (d_year#13 = 2000)) AND isnotnull(d_date_sk#12)) (74) BroadcastExchange -Input [2]: [d_date_sk#14, d_year#15] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#95] +Input [2]: [d_date_sk#12, d_year#13] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10] -Subquery:2 Hosting operator id = 22 Hosting Expression = ws_sold_date_sk#36 IN dynamicpruning#8 +Subquery:2 Hosting operator id = 22 Hosting Expression = ws_sold_date_sk#33 IN dynamicpruning#8 -Subquery:3 Hosting operator id = 46 Hosting Expression = cs_sold_date_sk#65 IN dynamicpruning#8 +Subquery:3 Hosting operator id = 46 Hosting Expression = cs_sold_date_sk#59 IN dynamicpruning#8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt index a9ea4905b9fb7..b49808fae8d48 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt @@ -138,543 +138,543 @@ Condition : ((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnul (4) Exchange Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] -Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0 (6) Scan parquet default.store_returns -Output [5]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Output [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [5]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] (8) Filter [codegen id : 3] -Input [5]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] -Condition : (isnotnull(sr_item_sk#10) AND isnotnull(sr_ticket_number#11)) +Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] +Condition : (isnotnull(sr_item_sk#9) AND isnotnull(sr_ticket_number#10)) (9) Project [codegen id : 3] -Output [4]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13] -Input [5]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Output [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] (10) Exchange -Input [4]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13] -Arguments: hashpartitioning(sr_item_sk#10, sr_ticket_number#11, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Arguments: hashpartitioning(sr_item_sk#9, sr_ticket_number#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [4]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13] -Arguments: [sr_item_sk#10 ASC NULLS FIRST, sr_ticket_number#11 ASC NULLS FIRST], false, 0 +Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Arguments: [sr_item_sk#9 ASC NULLS FIRST, sr_ticket_number#10 ASC NULLS FIRST], false, 0 (12) SortMergeJoin [codegen id : 9] Left keys [2]: [ss_item_sk#1, ss_ticket_number#4] -Right keys [2]: [sr_item_sk#10, sr_ticket_number#11] +Right keys [2]: [sr_item_sk#9, sr_ticket_number#10] Join condition: None (13) Project [codegen id : 9] -Output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#12, sr_net_loss#13] -Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13] +Output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12] +Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] (14) Scan parquet default.item -Output [2]: [i_item_sk#16, i_current_price#17] +Output [2]: [i_item_sk#14, i_current_price#15] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)] ReadSchema: struct (15) ColumnarToRow [codegen id : 5] -Input [2]: [i_item_sk#16, i_current_price#17] +Input [2]: [i_item_sk#14, i_current_price#15] (16) Filter [codegen id : 5] -Input [2]: [i_item_sk#16, i_current_price#17] -Condition : ((isnotnull(i_current_price#17) AND (i_current_price#17 > 50.00)) AND isnotnull(i_item_sk#16)) +Input [2]: [i_item_sk#14, i_current_price#15] +Condition : ((isnotnull(i_current_price#15) AND (i_current_price#15 > 50.00)) AND isnotnull(i_item_sk#14)) (17) Project [codegen id : 5] -Output [1]: [i_item_sk#16] -Input [2]: [i_item_sk#16, i_current_price#17] +Output [1]: [i_item_sk#14] +Input [2]: [i_item_sk#14, i_current_price#15] (18) BroadcastExchange -Input [1]: [i_item_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18] +Input [1]: [i_item_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3] (19) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#16] +Right keys [1]: [i_item_sk#14] Join condition: None (20) Project [codegen id : 9] -Output [7]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#12, sr_net_loss#13] -Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#12, sr_net_loss#13, i_item_sk#16] +Output [7]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12, i_item_sk#14] (21) Scan parquet default.promotion -Output [2]: [p_promo_sk#19, p_channel_tv#20] +Output [2]: [p_promo_sk#16, p_channel_tv#17] Batched: true Location [not included in comparison]/{warehouse_dir}/promotion] PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] ReadSchema: struct (22) ColumnarToRow [codegen id : 6] -Input [2]: [p_promo_sk#19, p_channel_tv#20] +Input [2]: [p_promo_sk#16, p_channel_tv#17] (23) Filter [codegen id : 6] -Input [2]: [p_promo_sk#19, p_channel_tv#20] -Condition : ((isnotnull(p_channel_tv#20) AND (p_channel_tv#20 = N)) AND isnotnull(p_promo_sk#19)) +Input [2]: [p_promo_sk#16, p_channel_tv#17] +Condition : ((isnotnull(p_channel_tv#17) AND (p_channel_tv#17 = N)) AND isnotnull(p_promo_sk#16)) (24) Project [codegen id : 6] -Output [1]: [p_promo_sk#19] -Input [2]: [p_promo_sk#19, p_channel_tv#20] +Output [1]: [p_promo_sk#16] +Input [2]: [p_promo_sk#16, p_channel_tv#17] (25) BroadcastExchange -Input [1]: [p_promo_sk#19] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#21] +Input [1]: [p_promo_sk#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (26) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_promo_sk#3] -Right keys [1]: [p_promo_sk#19] +Right keys [1]: [p_promo_sk#16] Join condition: None (27) Project [codegen id : 9] -Output [6]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#12, sr_net_loss#13] -Input [8]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#12, sr_net_loss#13, p_promo_sk#19] +Output [6]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12] +Input [8]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12, p_promo_sk#16] (28) ReusedExchange [Reuses operator id: 125] -Output [1]: [d_date_sk#22] +Output [1]: [d_date_sk#18] (29) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_sold_date_sk#7] -Right keys [1]: [d_date_sk#22] +Right keys [1]: [d_date_sk#18] Join condition: None (30) Project [codegen id : 9] -Output [5]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13] -Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#12, sr_net_loss#13, d_date_sk#22] +Output [5]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12] +Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#18] (31) Scan parquet default.store -Output [2]: [s_store_sk#23, s_store_id#24] +Output [2]: [s_store_sk#19, s_store_id#20] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (32) ColumnarToRow [codegen id : 8] -Input [2]: [s_store_sk#23, s_store_id#24] +Input [2]: [s_store_sk#19, s_store_id#20] (33) Filter [codegen id : 8] -Input [2]: [s_store_sk#23, s_store_id#24] -Condition : isnotnull(s_store_sk#23) +Input [2]: [s_store_sk#19, s_store_id#20] +Condition : isnotnull(s_store_sk#19) (34) BroadcastExchange -Input [2]: [s_store_sk#23, s_store_id#24] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#25] +Input [2]: [s_store_sk#19, s_store_id#20] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] (35) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_store_sk#2] -Right keys [1]: [s_store_sk#23] +Right keys [1]: [s_store_sk#19] Join condition: None (36) Project [codegen id : 9] -Output [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#24] -Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_sk#23, s_store_id#24] +Output [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#20] +Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_sk#19, s_store_id#20] (37) HashAggregate [codegen id : 9] -Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#24] -Keys [1]: [s_store_id#24] -Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [5]: [sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] -Results [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] +Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#20] +Keys [1]: [s_store_id#20] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [5]: [sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Results [6]: [s_store_id#20, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] (38) Exchange -Input [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] -Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [6]: [s_store_id#20, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Arguments: hashpartitioning(s_store_id#20, 5), ENSURE_REQUIREMENTS, [plan_id=6] (39) HashAggregate [codegen id : 10] -Input [6]: [s_store_id#24, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] -Keys [1]: [s_store_id#24] -Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#37, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#39] -Results [5]: [store channel AS channel#40, concat(store, s_store_id#24) AS id#41, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#37,17,2) AS sales#42, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38 AS returns#43, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#39 AS profit#44] +Input [6]: [s_store_id#20, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Keys [1]: [s_store_id#20] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#31, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#33] +Results [5]: [store channel AS channel#34, concat(store, s_store_id#20) AS id#35, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS sales#36, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32 AS returns#37, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#33 AS profit#38] (40) Scan parquet default.catalog_sales -Output [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] +Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#51), dynamicpruningexpression(cs_sold_date_sk#51 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(cs_sold_date_sk#45), dynamicpruningexpression(cs_sold_date_sk#45 IN dynamicpruning#8)] PushedFilters: [IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] ReadSchema: struct (41) ColumnarToRow [codegen id : 11] -Input [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] (42) Filter [codegen id : 11] -Input [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] -Condition : ((isnotnull(cs_catalog_page_sk#45) AND isnotnull(cs_item_sk#46)) AND isnotnull(cs_promo_sk#47)) +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Condition : ((isnotnull(cs_catalog_page_sk#39) AND isnotnull(cs_item_sk#40)) AND isnotnull(cs_promo_sk#41)) (43) Exchange -Input [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] -Arguments: hashpartitioning(cs_item_sk#46, cs_order_number#48, 5), ENSURE_REQUIREMENTS, [id=#52] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: hashpartitioning(cs_item_sk#40, cs_order_number#42, 5), ENSURE_REQUIREMENTS, [plan_id=7] (44) Sort [codegen id : 12] -Input [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] -Arguments: [cs_item_sk#46 ASC NULLS FIRST, cs_order_number#48 ASC NULLS FIRST], false, 0 +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: [cs_item_sk#40 ASC NULLS FIRST, cs_order_number#42 ASC NULLS FIRST], false, 0 (45) Scan parquet default.catalog_returns -Output [5]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Output [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] ReadSchema: struct (46) ColumnarToRow [codegen id : 13] -Input [5]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] (47) Filter [codegen id : 13] -Input [5]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] -Condition : (isnotnull(cr_item_sk#53) AND isnotnull(cr_order_number#54)) +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] +Condition : (isnotnull(cr_item_sk#46) AND isnotnull(cr_order_number#47)) (48) Project [codegen id : 13] -Output [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] -Input [5]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] (49) Exchange -Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] -Arguments: hashpartitioning(cr_item_sk#53, cr_order_number#54, 5), ENSURE_REQUIREMENTS, [id=#58] +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: hashpartitioning(cr_item_sk#46, cr_order_number#47, 5), ENSURE_REQUIREMENTS, [plan_id=8] (50) Sort [codegen id : 14] -Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] -Arguments: [cr_item_sk#53 ASC NULLS FIRST, cr_order_number#54 ASC NULLS FIRST], false, 0 +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: [cr_item_sk#46 ASC NULLS FIRST, cr_order_number#47 ASC NULLS FIRST], false, 0 (51) SortMergeJoin [codegen id : 19] -Left keys [2]: [cs_item_sk#46, cs_order_number#48] -Right keys [2]: [cr_item_sk#53, cr_order_number#54] +Left keys [2]: [cs_item_sk#40, cs_order_number#42] +Right keys [2]: [cr_item_sk#46, cr_order_number#47] Join condition: None (52) Project [codegen id : 19] -Output [8]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_return_amount#55, cr_net_loss#56] -Input [11]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] +Output [8]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49] +Input [11]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] (53) ReusedExchange [Reuses operator id: 18] -Output [1]: [i_item_sk#59] +Output [1]: [i_item_sk#51] (54) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_item_sk#46] -Right keys [1]: [i_item_sk#59] +Left keys [1]: [cs_item_sk#40] +Right keys [1]: [i_item_sk#51] Join condition: None (55) Project [codegen id : 19] -Output [7]: [cs_catalog_page_sk#45, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_return_amount#55, cr_net_loss#56] -Input [9]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_return_amount#55, cr_net_loss#56, i_item_sk#59] +Output [7]: [cs_catalog_page_sk#39, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49] +Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, i_item_sk#51] (56) ReusedExchange [Reuses operator id: 25] -Output [1]: [p_promo_sk#60] +Output [1]: [p_promo_sk#52] (57) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_promo_sk#47] -Right keys [1]: [p_promo_sk#60] +Left keys [1]: [cs_promo_sk#41] +Right keys [1]: [p_promo_sk#52] Join condition: None (58) Project [codegen id : 19] -Output [6]: [cs_catalog_page_sk#45, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_return_amount#55, cr_net_loss#56] -Input [8]: [cs_catalog_page_sk#45, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_return_amount#55, cr_net_loss#56, p_promo_sk#60] +Output [6]: [cs_catalog_page_sk#39, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49] +Input [8]: [cs_catalog_page_sk#39, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, p_promo_sk#52] (59) ReusedExchange [Reuses operator id: 125] -Output [1]: [d_date_sk#61] +Output [1]: [d_date_sk#53] (60) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_sold_date_sk#51] -Right keys [1]: [d_date_sk#61] +Left keys [1]: [cs_sold_date_sk#45] +Right keys [1]: [d_date_sk#53] Join condition: None (61) Project [codegen id : 19] -Output [5]: [cs_catalog_page_sk#45, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56] -Input [7]: [cs_catalog_page_sk#45, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_return_amount#55, cr_net_loss#56, d_date_sk#61] +Output [5]: [cs_catalog_page_sk#39, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49] +Input [7]: [cs_catalog_page_sk#39, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, d_date_sk#53] (62) Scan parquet default.catalog_page -Output [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63] +Output [2]: [cp_catalog_page_sk#54, cp_catalog_page_id#55] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_page] PushedFilters: [IsNotNull(cp_catalog_page_sk)] ReadSchema: struct (63) ColumnarToRow [codegen id : 18] -Input [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63] +Input [2]: [cp_catalog_page_sk#54, cp_catalog_page_id#55] (64) Filter [codegen id : 18] -Input [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63] -Condition : isnotnull(cp_catalog_page_sk#62) +Input [2]: [cp_catalog_page_sk#54, cp_catalog_page_id#55] +Condition : isnotnull(cp_catalog_page_sk#54) (65) BroadcastExchange -Input [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#64] +Input [2]: [cp_catalog_page_sk#54, cp_catalog_page_id#55] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] (66) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_catalog_page_sk#45] -Right keys [1]: [cp_catalog_page_sk#62] +Left keys [1]: [cs_catalog_page_sk#39] +Right keys [1]: [cp_catalog_page_sk#54] Join condition: None (67) Project [codegen id : 19] -Output [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#63] -Input [7]: [cs_catalog_page_sk#45, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_sk#62, cp_catalog_page_id#63] +Output [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#55] +Input [7]: [cs_catalog_page_sk#39, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_sk#54, cp_catalog_page_id#55] (68) HashAggregate [codegen id : 19] -Input [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#63] -Keys [1]: [cp_catalog_page_id#63] -Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#49)), partial_sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [5]: [sum#65, sum#66, isEmpty#67, sum#68, isEmpty#69] -Results [6]: [cp_catalog_page_id#63, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74] +Input [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#55] +Keys [1]: [cp_catalog_page_id#55] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#43)), partial_sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [5]: [sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] +Results [6]: [cp_catalog_page_id#55, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] (69) Exchange -Input [6]: [cp_catalog_page_id#63, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74] -Arguments: hashpartitioning(cp_catalog_page_id#63, 5), ENSURE_REQUIREMENTS, [id=#75] +Input [6]: [cp_catalog_page_id#55, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Arguments: hashpartitioning(cp_catalog_page_id#55, 5), ENSURE_REQUIREMENTS, [plan_id=10] (70) HashAggregate [codegen id : 20] -Input [6]: [cp_catalog_page_id#63, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74] -Keys [1]: [cp_catalog_page_id#63] -Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#49)), sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#49))#76, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#78] -Results [5]: [catalog channel AS channel#79, concat(catalog_page, cp_catalog_page_id#63) AS id#80, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#49))#76,17,2) AS sales#81, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77 AS returns#82, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#78 AS profit#83] +Input [6]: [cp_catalog_page_id#55, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Keys [1]: [cp_catalog_page_id#55] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#43)), sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#43))#66, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67, sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#68] +Results [5]: [catalog channel AS channel#69, concat(catalog_page, cp_catalog_page_id#55) AS id#70, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#43))#66,17,2) AS sales#71, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67 AS returns#72, sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#68 AS profit#73] (71) Scan parquet default.web_sales -Output [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] +Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#90), dynamicpruningexpression(ws_sold_date_sk#90 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(ws_sold_date_sk#80), dynamicpruningexpression(ws_sold_date_sk#80 IN dynamicpruning#8)] PushedFilters: [IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)] ReadSchema: struct (72) ColumnarToRow [codegen id : 21] -Input [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] (73) Filter [codegen id : 21] -Input [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] -Condition : ((isnotnull(ws_web_site_sk#85) AND isnotnull(ws_item_sk#84)) AND isnotnull(ws_promo_sk#86)) +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Condition : ((isnotnull(ws_web_site_sk#75) AND isnotnull(ws_item_sk#74)) AND isnotnull(ws_promo_sk#76)) (74) Exchange -Input [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] -Arguments: hashpartitioning(ws_item_sk#84, ws_order_number#87, 5), ENSURE_REQUIREMENTS, [id=#91] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Arguments: hashpartitioning(ws_item_sk#74, ws_order_number#77, 5), ENSURE_REQUIREMENTS, [plan_id=11] (75) Sort [codegen id : 22] -Input [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] -Arguments: [ws_item_sk#84 ASC NULLS FIRST, ws_order_number#87 ASC NULLS FIRST], false, 0 +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Arguments: [ws_item_sk#74 ASC NULLS FIRST, ws_order_number#77 ASC NULLS FIRST], false, 0 (76) Scan parquet default.web_returns -Output [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Output [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] ReadSchema: struct (77) ColumnarToRow [codegen id : 23] -Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] (78) Filter [codegen id : 23] -Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] -Condition : (isnotnull(wr_item_sk#92) AND isnotnull(wr_order_number#93)) +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] +Condition : (isnotnull(wr_item_sk#81) AND isnotnull(wr_order_number#82)) (79) Project [codegen id : 23] -Output [4]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95] -Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Output [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] (80) Exchange -Input [4]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95] -Arguments: hashpartitioning(wr_item_sk#92, wr_order_number#93, 5), ENSURE_REQUIREMENTS, [id=#97] +Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Arguments: hashpartitioning(wr_item_sk#81, wr_order_number#82, 5), ENSURE_REQUIREMENTS, [plan_id=12] (81) Sort [codegen id : 24] -Input [4]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95] -Arguments: [wr_item_sk#92 ASC NULLS FIRST, wr_order_number#93 ASC NULLS FIRST], false, 0 +Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Arguments: [wr_item_sk#81 ASC NULLS FIRST, wr_order_number#82 ASC NULLS FIRST], false, 0 (82) SortMergeJoin [codegen id : 29] -Left keys [2]: [ws_item_sk#84, ws_order_number#87] -Right keys [2]: [wr_item_sk#92, wr_order_number#93] +Left keys [2]: [ws_item_sk#74, ws_order_number#77] +Right keys [2]: [wr_item_sk#81, wr_order_number#82] Join condition: None (83) Project [codegen id : 29] -Output [8]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_return_amt#94, wr_net_loss#95] -Input [11]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95] +Output [8]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84] +Input [11]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] (84) ReusedExchange [Reuses operator id: 18] -Output [1]: [i_item_sk#98] +Output [1]: [i_item_sk#86] (85) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_item_sk#84] -Right keys [1]: [i_item_sk#98] +Left keys [1]: [ws_item_sk#74] +Right keys [1]: [i_item_sk#86] Join condition: None (86) Project [codegen id : 29] -Output [7]: [ws_web_site_sk#85, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_return_amt#94, wr_net_loss#95] -Input [9]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_return_amt#94, wr_net_loss#95, i_item_sk#98] +Output [7]: [ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84] +Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, i_item_sk#86] (87) ReusedExchange [Reuses operator id: 25] -Output [1]: [p_promo_sk#99] +Output [1]: [p_promo_sk#87] (88) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_promo_sk#86] -Right keys [1]: [p_promo_sk#99] +Left keys [1]: [ws_promo_sk#76] +Right keys [1]: [p_promo_sk#87] Join condition: None (89) Project [codegen id : 29] -Output [6]: [ws_web_site_sk#85, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_return_amt#94, wr_net_loss#95] -Input [8]: [ws_web_site_sk#85, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_return_amt#94, wr_net_loss#95, p_promo_sk#99] +Output [6]: [ws_web_site_sk#75, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84] +Input [8]: [ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, p_promo_sk#87] (90) ReusedExchange [Reuses operator id: 125] -Output [1]: [d_date_sk#100] +Output [1]: [d_date_sk#88] (91) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_sold_date_sk#90] -Right keys [1]: [d_date_sk#100] +Left keys [1]: [ws_sold_date_sk#80] +Right keys [1]: [d_date_sk#88] Join condition: None (92) Project [codegen id : 29] -Output [5]: [ws_web_site_sk#85, ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95] -Input [7]: [ws_web_site_sk#85, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_return_amt#94, wr_net_loss#95, d_date_sk#100] +Output [5]: [ws_web_site_sk#75, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84] +Input [7]: [ws_web_site_sk#75, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, d_date_sk#88] (93) Scan parquet default.web_site -Output [2]: [web_site_sk#101, web_site_id#102] +Output [2]: [web_site_sk#89, web_site_id#90] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_site_sk)] ReadSchema: struct (94) ColumnarToRow [codegen id : 28] -Input [2]: [web_site_sk#101, web_site_id#102] +Input [2]: [web_site_sk#89, web_site_id#90] (95) Filter [codegen id : 28] -Input [2]: [web_site_sk#101, web_site_id#102] -Condition : isnotnull(web_site_sk#101) +Input [2]: [web_site_sk#89, web_site_id#90] +Condition : isnotnull(web_site_sk#89) (96) BroadcastExchange -Input [2]: [web_site_sk#101, web_site_id#102] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#103] +Input [2]: [web_site_sk#89, web_site_id#90] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=13] (97) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_web_site_sk#85] -Right keys [1]: [web_site_sk#101] +Left keys [1]: [ws_web_site_sk#75] +Right keys [1]: [web_site_sk#89] Join condition: None (98) Project [codegen id : 29] -Output [5]: [ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#102] -Input [7]: [ws_web_site_sk#85, ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_sk#101, web_site_id#102] +Output [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#90] +Input [7]: [ws_web_site_sk#75, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_sk#89, web_site_id#90] (99) HashAggregate [codegen id : 29] -Input [5]: [ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#102] -Keys [1]: [web_site_id#102] -Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#88)), partial_sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [5]: [sum#104, sum#105, isEmpty#106, sum#107, isEmpty#108] -Results [6]: [web_site_id#102, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113] +Input [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#90] +Keys [1]: [web_site_id#90] +Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#78)), partial_sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [5]: [sum#91, sum#92, isEmpty#93, sum#94, isEmpty#95] +Results [6]: [web_site_id#90, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] (100) Exchange -Input [6]: [web_site_id#102, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113] -Arguments: hashpartitioning(web_site_id#102, 5), ENSURE_REQUIREMENTS, [id=#114] +Input [6]: [web_site_id#90, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Arguments: hashpartitioning(web_site_id#90, 5), ENSURE_REQUIREMENTS, [plan_id=14] (101) HashAggregate [codegen id : 30] -Input [6]: [web_site_id#102, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113] -Keys [1]: [web_site_id#102] -Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#88)), sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#88))#115, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#117] -Results [5]: [web channel AS channel#118, concat(web_site, web_site_id#102) AS id#119, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#88))#115,17,2) AS sales#120, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116 AS returns#121, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#117 AS profit#122] +Input [6]: [web_site_id#90, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Keys [1]: [web_site_id#90] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#78)), sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#78))#101, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102, sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#103] +Results [5]: [web channel AS channel#104, concat(web_site, web_site_id#90) AS id#105, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#78))#101,17,2) AS sales#106, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102 AS returns#107, sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#103 AS profit#108] (102) Union (103) HashAggregate [codegen id : 31] -Input [5]: [channel#40, id#41, sales#42, returns#43, profit#44] -Keys [2]: [channel#40, id#41] -Functions [3]: [partial_sum(sales#42), partial_sum(returns#43), partial_sum(profit#44)] -Aggregate Attributes [6]: [sum#123, isEmpty#124, sum#125, isEmpty#126, sum#127, isEmpty#128] -Results [8]: [channel#40, id#41, sum#129, isEmpty#130, sum#131, isEmpty#132, sum#133, isEmpty#134] +Input [5]: [channel#34, id#35, sales#36, returns#37, profit#38] +Keys [2]: [channel#34, id#35] +Functions [3]: [partial_sum(sales#36), partial_sum(returns#37), partial_sum(profit#38)] +Aggregate Attributes [6]: [sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114] +Results [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] (104) Exchange -Input [8]: [channel#40, id#41, sum#129, isEmpty#130, sum#131, isEmpty#132, sum#133, isEmpty#134] -Arguments: hashpartitioning(channel#40, id#41, 5), ENSURE_REQUIREMENTS, [id=#135] +Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Arguments: hashpartitioning(channel#34, id#35, 5), ENSURE_REQUIREMENTS, [plan_id=15] (105) HashAggregate [codegen id : 32] -Input [8]: [channel#40, id#41, sum#129, isEmpty#130, sum#131, isEmpty#132, sum#133, isEmpty#134] -Keys [2]: [channel#40, id#41] -Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#44)] -Aggregate Attributes [3]: [sum(sales#42)#136, sum(returns#43)#137, sum(profit#44)#138] -Results [5]: [channel#40, id#41, cast(sum(sales#42)#136 as decimal(37,2)) AS sales#139, cast(sum(returns#43)#137 as decimal(38,2)) AS returns#140, cast(sum(profit#44)#138 as decimal(38,2)) AS profit#141] +Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#36), sum(returns#37), sum(profit#38)] +Aggregate Attributes [3]: [sum(sales#36)#121, sum(returns#37)#122, sum(profit#38)#123] +Results [5]: [channel#34, id#35, cast(sum(sales#36)#121 as decimal(37,2)) AS sales#124, cast(sum(returns#37)#122 as decimal(38,2)) AS returns#125, cast(sum(profit#38)#123 as decimal(38,2)) AS profit#126] (106) ReusedExchange [Reuses operator id: 104] -Output [8]: [channel#40, id#41, sum#129, isEmpty#130, sum#131, isEmpty#132, sum#133, isEmpty#134] +Output [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] (107) HashAggregate [codegen id : 64] -Input [8]: [channel#40, id#41, sum#129, isEmpty#130, sum#131, isEmpty#132, sum#133, isEmpty#134] -Keys [2]: [channel#40, id#41] -Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#44)] -Aggregate Attributes [3]: [sum(sales#42)#136, sum(returns#43)#137, sum(profit#44)#138] -Results [4]: [channel#40, sum(sales#42)#136 AS sales#142, sum(returns#43)#137 AS returns#143, sum(profit#44)#138 AS profit#144] +Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#36), sum(returns#37), sum(profit#38)] +Aggregate Attributes [3]: [sum(sales#36)#121, sum(returns#37)#122, sum(profit#38)#123] +Results [4]: [channel#34, sum(sales#36)#121 AS sales#127, sum(returns#37)#122 AS returns#128, sum(profit#38)#123 AS profit#129] (108) HashAggregate [codegen id : 64] -Input [4]: [channel#40, sales#142, returns#143, profit#144] -Keys [1]: [channel#40] -Functions [3]: [partial_sum(sales#142), partial_sum(returns#143), partial_sum(profit#144)] -Aggregate Attributes [6]: [sum#145, isEmpty#146, sum#147, isEmpty#148, sum#149, isEmpty#150] -Results [7]: [channel#40, sum#151, isEmpty#152, sum#153, isEmpty#154, sum#155, isEmpty#156] +Input [4]: [channel#34, sales#127, returns#128, profit#129] +Keys [1]: [channel#34] +Functions [3]: [partial_sum(sales#127), partial_sum(returns#128), partial_sum(profit#129)] +Aggregate Attributes [6]: [sum#130, isEmpty#131, sum#132, isEmpty#133, sum#134, isEmpty#135] +Results [7]: [channel#34, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] (109) Exchange -Input [7]: [channel#40, sum#151, isEmpty#152, sum#153, isEmpty#154, sum#155, isEmpty#156] -Arguments: hashpartitioning(channel#40, 5), ENSURE_REQUIREMENTS, [id=#157] +Input [7]: [channel#34, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] +Arguments: hashpartitioning(channel#34, 5), ENSURE_REQUIREMENTS, [plan_id=16] (110) HashAggregate [codegen id : 65] -Input [7]: [channel#40, sum#151, isEmpty#152, sum#153, isEmpty#154, sum#155, isEmpty#156] -Keys [1]: [channel#40] -Functions [3]: [sum(sales#142), sum(returns#143), sum(profit#144)] -Aggregate Attributes [3]: [sum(sales#142)#158, sum(returns#143)#159, sum(profit#144)#160] -Results [5]: [channel#40, null AS id#161, sum(sales#142)#158 AS sales#162, sum(returns#143)#159 AS returns#163, sum(profit#144)#160 AS profit#164] +Input [7]: [channel#34, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] +Keys [1]: [channel#34] +Functions [3]: [sum(sales#127), sum(returns#128), sum(profit#129)] +Aggregate Attributes [3]: [sum(sales#127)#142, sum(returns#128)#143, sum(profit#129)#144] +Results [5]: [channel#34, null AS id#145, sum(sales#127)#142 AS sales#146, sum(returns#128)#143 AS returns#147, sum(profit#129)#144 AS profit#148] (111) ReusedExchange [Reuses operator id: 104] -Output [8]: [channel#40, id#41, sum#129, isEmpty#130, sum#131, isEmpty#132, sum#133, isEmpty#134] +Output [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] (112) HashAggregate [codegen id : 97] -Input [8]: [channel#40, id#41, sum#129, isEmpty#130, sum#131, isEmpty#132, sum#133, isEmpty#134] -Keys [2]: [channel#40, id#41] -Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#44)] -Aggregate Attributes [3]: [sum(sales#42)#136, sum(returns#43)#137, sum(profit#44)#138] -Results [3]: [sum(sales#42)#136 AS sales#142, sum(returns#43)#137 AS returns#143, sum(profit#44)#138 AS profit#144] +Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#36), sum(returns#37), sum(profit#38)] +Aggregate Attributes [3]: [sum(sales#36)#121, sum(returns#37)#122, sum(profit#38)#123] +Results [3]: [sum(sales#36)#121 AS sales#127, sum(returns#37)#122 AS returns#128, sum(profit#38)#123 AS profit#129] (113) HashAggregate [codegen id : 97] -Input [3]: [sales#142, returns#143, profit#144] +Input [3]: [sales#127, returns#128, profit#129] Keys: [] -Functions [3]: [partial_sum(sales#142), partial_sum(returns#143), partial_sum(profit#144)] -Aggregate Attributes [6]: [sum#165, isEmpty#166, sum#167, isEmpty#168, sum#169, isEmpty#170] -Results [6]: [sum#171, isEmpty#172, sum#173, isEmpty#174, sum#175, isEmpty#176] +Functions [3]: [partial_sum(sales#127), partial_sum(returns#128), partial_sum(profit#129)] +Aggregate Attributes [6]: [sum#149, isEmpty#150, sum#151, isEmpty#152, sum#153, isEmpty#154] +Results [6]: [sum#155, isEmpty#156, sum#157, isEmpty#158, sum#159, isEmpty#160] (114) Exchange -Input [6]: [sum#171, isEmpty#172, sum#173, isEmpty#174, sum#175, isEmpty#176] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#177] +Input [6]: [sum#155, isEmpty#156, sum#157, isEmpty#158, sum#159, isEmpty#160] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=17] (115) HashAggregate [codegen id : 98] -Input [6]: [sum#171, isEmpty#172, sum#173, isEmpty#174, sum#175, isEmpty#176] +Input [6]: [sum#155, isEmpty#156, sum#157, isEmpty#158, sum#159, isEmpty#160] Keys: [] -Functions [3]: [sum(sales#142), sum(returns#143), sum(profit#144)] -Aggregate Attributes [3]: [sum(sales#142)#178, sum(returns#143)#179, sum(profit#144)#180] -Results [5]: [null AS channel#181, null AS id#182, sum(sales#142)#178 AS sales#183, sum(returns#143)#179 AS returns#184, sum(profit#144)#180 AS profit#185] +Functions [3]: [sum(sales#127), sum(returns#128), sum(profit#129)] +Aggregate Attributes [3]: [sum(sales#127)#161, sum(returns#128)#162, sum(profit#129)#163] +Results [5]: [null AS channel#164, null AS id#165, sum(sales#127)#161 AS sales#166, sum(returns#128)#162 AS returns#167, sum(profit#129)#163 AS profit#168] (116) Union (117) HashAggregate [codegen id : 99] -Input [5]: [channel#40, id#41, sales#139, returns#140, profit#141] -Keys [5]: [channel#40, id#41, sales#139, returns#140, profit#141] +Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Keys [5]: [channel#34, id#35, sales#124, returns#125, profit#126] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#40, id#41, sales#139, returns#140, profit#141] +Results [5]: [channel#34, id#35, sales#124, returns#125, profit#126] (118) Exchange -Input [5]: [channel#40, id#41, sales#139, returns#140, profit#141] -Arguments: hashpartitioning(channel#40, id#41, sales#139, returns#140, profit#141, 5), ENSURE_REQUIREMENTS, [id=#186] +Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Arguments: hashpartitioning(channel#34, id#35, sales#124, returns#125, profit#126, 5), ENSURE_REQUIREMENTS, [plan_id=18] (119) HashAggregate [codegen id : 100] -Input [5]: [channel#40, id#41, sales#139, returns#140, profit#141] -Keys [5]: [channel#40, id#41, sales#139, returns#140, profit#141] +Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Keys [5]: [channel#34, id#35, sales#124, returns#125, profit#126] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#40, id#41, sales#139, returns#140, profit#141] +Results [5]: [channel#34, id#35, sales#124, returns#125, profit#126] (120) TakeOrderedAndProject -Input [5]: [channel#40, id#41, sales#139, returns#140, profit#141] -Arguments: 100, [channel#40 ASC NULLS FIRST, id#41 ASC NULLS FIRST], [channel#40, id#41, sales#139, returns#140, profit#141] +Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Arguments: 100, [channel#34 ASC NULLS FIRST, id#35 ASC NULLS FIRST], [channel#34, id#35, sales#124, returns#125, profit#126] ===== Subqueries ===== @@ -687,29 +687,29 @@ BroadcastExchange (125) (121) Scan parquet default.date_dim -Output [2]: [d_date_sk#22, d_date#187] +Output [2]: [d_date_sk#18, d_date#169] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)] ReadSchema: struct (122) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#22, d_date#187] +Input [2]: [d_date_sk#18, d_date#169] (123) Filter [codegen id : 1] -Input [2]: [d_date_sk#22, d_date#187] -Condition : (((isnotnull(d_date#187) AND (d_date#187 >= 1998-08-04)) AND (d_date#187 <= 1998-09-03)) AND isnotnull(d_date_sk#22)) +Input [2]: [d_date_sk#18, d_date#169] +Condition : (((isnotnull(d_date#169) AND (d_date#169 >= 1998-08-04)) AND (d_date#169 <= 1998-09-03)) AND isnotnull(d_date_sk#18)) (124) Project [codegen id : 1] -Output [1]: [d_date_sk#22] -Input [2]: [d_date_sk#22, d_date#187] +Output [1]: [d_date_sk#18] +Input [2]: [d_date_sk#18, d_date#169] (125) BroadcastExchange -Input [1]: [d_date_sk#22] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#188] +Input [1]: [d_date_sk#18] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=19] -Subquery:2 Hosting operator id = 40 Hosting Expression = cs_sold_date_sk#51 IN dynamicpruning#8 +Subquery:2 Hosting operator id = 40 Hosting Expression = cs_sold_date_sk#45 IN dynamicpruning#8 -Subquery:3 Hosting operator id = 71 Hosting Expression = ws_sold_date_sk#90 IN dynamicpruning#8 +Subquery:3 Hosting operator id = 71 Hosting Expression = ws_sold_date_sk#80 IN dynamicpruning#8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt index 03e744ac87b53..63b2856d44204 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt @@ -138,543 +138,543 @@ Condition : ((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnul (4) Exchange Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] -Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [id=#9] +Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7] Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0 (6) Scan parquet default.store_returns -Output [5]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Output [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] Batched: true Location [not included in comparison]/{warehouse_dir}/store_returns] PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [5]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] (8) Filter [codegen id : 3] -Input [5]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] -Condition : (isnotnull(sr_item_sk#10) AND isnotnull(sr_ticket_number#11)) +Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] +Condition : (isnotnull(sr_item_sk#9) AND isnotnull(sr_ticket_number#10)) (9) Project [codegen id : 3] -Output [4]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13] -Input [5]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13, sr_returned_date_sk#14] +Output [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13] (10) Exchange -Input [4]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13] -Arguments: hashpartitioning(sr_item_sk#10, sr_ticket_number#11, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Arguments: hashpartitioning(sr_item_sk#9, sr_ticket_number#10, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) Sort [codegen id : 4] -Input [4]: [sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13] -Arguments: [sr_item_sk#10 ASC NULLS FIRST, sr_ticket_number#11 ASC NULLS FIRST], false, 0 +Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] +Arguments: [sr_item_sk#9 ASC NULLS FIRST, sr_ticket_number#10 ASC NULLS FIRST], false, 0 (12) SortMergeJoin [codegen id : 9] Left keys [2]: [ss_item_sk#1, ss_ticket_number#4] -Right keys [2]: [sr_item_sk#10, sr_ticket_number#11] +Right keys [2]: [sr_item_sk#9, sr_ticket_number#10] Join condition: None (13) Project [codegen id : 9] -Output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#12, sr_net_loss#13] -Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#10, sr_ticket_number#11, sr_return_amt#12, sr_net_loss#13] +Output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12] +Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12] (14) ReusedExchange [Reuses operator id: 125] -Output [1]: [d_date_sk#16] +Output [1]: [d_date_sk#14] (15) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_sold_date_sk#7] -Right keys [1]: [d_date_sk#16] +Right keys [1]: [d_date_sk#14] Join condition: None (16) Project [codegen id : 9] -Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13] -Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#12, sr_net_loss#13, d_date_sk#16] +Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#14] (17) Scan parquet default.store -Output [2]: [s_store_sk#17, s_store_id#18] +Output [2]: [s_store_sk#15, s_store_id#16] Batched: true Location [not included in comparison]/{warehouse_dir}/store] PushedFilters: [IsNotNull(s_store_sk)] ReadSchema: struct (18) ColumnarToRow [codegen id : 6] -Input [2]: [s_store_sk#17, s_store_id#18] +Input [2]: [s_store_sk#15, s_store_id#16] (19) Filter [codegen id : 6] -Input [2]: [s_store_sk#17, s_store_id#18] -Condition : isnotnull(s_store_sk#17) +Input [2]: [s_store_sk#15, s_store_id#16] +Condition : isnotnull(s_store_sk#15) (20) BroadcastExchange -Input [2]: [s_store_sk#17, s_store_id#18] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#19] +Input [2]: [s_store_sk#15, s_store_id#16] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3] (21) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_store_sk#2] -Right keys [1]: [s_store_sk#17] +Right keys [1]: [s_store_sk#15] Join condition: None (22) Project [codegen id : 9] -Output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#18] -Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_sk#17, s_store_id#18] +Output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16] +Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_sk#15, s_store_id#16] (23) Scan parquet default.item -Output [2]: [i_item_sk#20, i_current_price#21] +Output [2]: [i_item_sk#17, i_current_price#18] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)] ReadSchema: struct (24) ColumnarToRow [codegen id : 7] -Input [2]: [i_item_sk#20, i_current_price#21] +Input [2]: [i_item_sk#17, i_current_price#18] (25) Filter [codegen id : 7] -Input [2]: [i_item_sk#20, i_current_price#21] -Condition : ((isnotnull(i_current_price#21) AND (i_current_price#21 > 50.00)) AND isnotnull(i_item_sk#20)) +Input [2]: [i_item_sk#17, i_current_price#18] +Condition : ((isnotnull(i_current_price#18) AND (i_current_price#18 > 50.00)) AND isnotnull(i_item_sk#17)) (26) Project [codegen id : 7] -Output [1]: [i_item_sk#20] -Input [2]: [i_item_sk#20, i_current_price#21] +Output [1]: [i_item_sk#17] +Input [2]: [i_item_sk#17, i_current_price#18] (27) BroadcastExchange -Input [1]: [i_item_sk#20] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#22] +Input [1]: [i_item_sk#17] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4] (28) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#20] +Right keys [1]: [i_item_sk#17] Join condition: None (29) Project [codegen id : 9] -Output [6]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#18] -Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#18, i_item_sk#20] +Output [6]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16] +Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16, i_item_sk#17] (30) Scan parquet default.promotion -Output [2]: [p_promo_sk#23, p_channel_tv#24] +Output [2]: [p_promo_sk#19, p_channel_tv#20] Batched: true Location [not included in comparison]/{warehouse_dir}/promotion] PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)] ReadSchema: struct (31) ColumnarToRow [codegen id : 8] -Input [2]: [p_promo_sk#23, p_channel_tv#24] +Input [2]: [p_promo_sk#19, p_channel_tv#20] (32) Filter [codegen id : 8] -Input [2]: [p_promo_sk#23, p_channel_tv#24] -Condition : ((isnotnull(p_channel_tv#24) AND (p_channel_tv#24 = N)) AND isnotnull(p_promo_sk#23)) +Input [2]: [p_promo_sk#19, p_channel_tv#20] +Condition : ((isnotnull(p_channel_tv#20) AND (p_channel_tv#20 = N)) AND isnotnull(p_promo_sk#19)) (33) Project [codegen id : 8] -Output [1]: [p_promo_sk#23] -Input [2]: [p_promo_sk#23, p_channel_tv#24] +Output [1]: [p_promo_sk#19] +Input [2]: [p_promo_sk#19, p_channel_tv#20] (34) BroadcastExchange -Input [1]: [p_promo_sk#23] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [p_promo_sk#19] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] (35) BroadcastHashJoin [codegen id : 9] Left keys [1]: [ss_promo_sk#3] -Right keys [1]: [p_promo_sk#23] +Right keys [1]: [p_promo_sk#19] Join condition: None (36) Project [codegen id : 9] -Output [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#18] -Input [7]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#18, p_promo_sk#23] +Output [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16] +Input [7]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16, p_promo_sk#19] (37) HashAggregate [codegen id : 9] -Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#12, sr_net_loss#13, s_store_id#18] -Keys [1]: [s_store_id#18] -Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [5]: [sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] -Results [6]: [s_store_id#18, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] +Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16] +Keys [1]: [s_store_id#16] +Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [5]: [sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25] +Results [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] (38) Exchange -Input [6]: [s_store_id#18, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] -Arguments: hashpartitioning(s_store_id#18, 5), ENSURE_REQUIREMENTS, [id=#36] +Input [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Arguments: hashpartitioning(s_store_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=6] (39) HashAggregate [codegen id : 10] -Input [6]: [s_store_id#18, sum#31, sum#32, isEmpty#33, sum#34, isEmpty#35] -Keys [1]: [s_store_id#18] -Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#37, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#39] -Results [5]: [store channel AS channel#40, concat(store, s_store_id#18) AS id#41, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#37,17,2) AS sales#42, sum(coalesce(cast(sr_return_amt#12 as decimal(12,2)), 0.00))#38 AS returns#43, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#13 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#39 AS profit#44] +Input [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30] +Keys [1]: [s_store_id#16] +Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#31, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#33] +Results [5]: [store channel AS channel#34, concat(store, s_store_id#16) AS id#35, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS sales#36, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32 AS returns#37, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#33 AS profit#38] (40) Scan parquet default.catalog_sales -Output [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] +Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(cs_sold_date_sk#51), dynamicpruningexpression(cs_sold_date_sk#51 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(cs_sold_date_sk#45), dynamicpruningexpression(cs_sold_date_sk#45 IN dynamicpruning#8)] PushedFilters: [IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)] ReadSchema: struct (41) ColumnarToRow [codegen id : 11] -Input [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] (42) Filter [codegen id : 11] -Input [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] -Condition : ((isnotnull(cs_catalog_page_sk#45) AND isnotnull(cs_item_sk#46)) AND isnotnull(cs_promo_sk#47)) +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Condition : ((isnotnull(cs_catalog_page_sk#39) AND isnotnull(cs_item_sk#40)) AND isnotnull(cs_promo_sk#41)) (43) Exchange -Input [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] -Arguments: hashpartitioning(cs_item_sk#46, cs_order_number#48, 5), ENSURE_REQUIREMENTS, [id=#52] +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: hashpartitioning(cs_item_sk#40, cs_order_number#42, 5), ENSURE_REQUIREMENTS, [plan_id=7] (44) Sort [codegen id : 12] -Input [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51] -Arguments: [cs_item_sk#46 ASC NULLS FIRST, cs_order_number#48 ASC NULLS FIRST], false, 0 +Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45] +Arguments: [cs_item_sk#40 ASC NULLS FIRST, cs_order_number#42 ASC NULLS FIRST], false, 0 (45) Scan parquet default.catalog_returns -Output [5]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Output [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_returns] PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)] ReadSchema: struct (46) ColumnarToRow [codegen id : 13] -Input [5]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] (47) Filter [codegen id : 13] -Input [5]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] -Condition : (isnotnull(cr_item_sk#53) AND isnotnull(cr_order_number#54)) +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] +Condition : (isnotnull(cr_item_sk#46) AND isnotnull(cr_order_number#47)) (48) Project [codegen id : 13] -Output [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] -Input [5]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56, cr_returned_date_sk#57] +Output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50] (49) Exchange -Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] -Arguments: hashpartitioning(cr_item_sk#53, cr_order_number#54, 5), ENSURE_REQUIREMENTS, [id=#58] +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: hashpartitioning(cr_item_sk#46, cr_order_number#47, 5), ENSURE_REQUIREMENTS, [plan_id=8] (50) Sort [codegen id : 14] -Input [4]: [cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] -Arguments: [cr_item_sk#53 ASC NULLS FIRST, cr_order_number#54 ASC NULLS FIRST], false, 0 +Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] +Arguments: [cr_item_sk#46 ASC NULLS FIRST, cr_order_number#47 ASC NULLS FIRST], false, 0 (51) SortMergeJoin [codegen id : 19] -Left keys [2]: [cs_item_sk#46, cs_order_number#48] -Right keys [2]: [cr_item_sk#53, cr_order_number#54] +Left keys [2]: [cs_item_sk#40, cs_order_number#42] +Right keys [2]: [cr_item_sk#46, cr_order_number#47] Join condition: None (52) Project [codegen id : 19] -Output [8]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_return_amount#55, cr_net_loss#56] -Input [11]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_order_number#48, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_item_sk#53, cr_order_number#54, cr_return_amount#55, cr_net_loss#56] +Output [8]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49] +Input [11]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49] (53) ReusedExchange [Reuses operator id: 125] -Output [1]: [d_date_sk#59] +Output [1]: [d_date_sk#51] (54) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_sold_date_sk#51] -Right keys [1]: [d_date_sk#59] +Left keys [1]: [cs_sold_date_sk#45] +Right keys [1]: [d_date_sk#51] Join condition: None (55) Project [codegen id : 19] -Output [7]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56] -Input [9]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cs_sold_date_sk#51, cr_return_amount#55, cr_net_loss#56, d_date_sk#59] +Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49] +Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, d_date_sk#51] (56) Scan parquet default.catalog_page -Output [2]: [cp_catalog_page_sk#60, cp_catalog_page_id#61] +Output [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] Batched: true Location [not included in comparison]/{warehouse_dir}/catalog_page] PushedFilters: [IsNotNull(cp_catalog_page_sk)] ReadSchema: struct (57) ColumnarToRow [codegen id : 16] -Input [2]: [cp_catalog_page_sk#60, cp_catalog_page_id#61] +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] (58) Filter [codegen id : 16] -Input [2]: [cp_catalog_page_sk#60, cp_catalog_page_id#61] -Condition : isnotnull(cp_catalog_page_sk#60) +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] +Condition : isnotnull(cp_catalog_page_sk#52) (59) BroadcastExchange -Input [2]: [cp_catalog_page_sk#60, cp_catalog_page_id#61] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#62] +Input [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9] (60) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_catalog_page_sk#45] -Right keys [1]: [cp_catalog_page_sk#60] +Left keys [1]: [cs_catalog_page_sk#39] +Right keys [1]: [cp_catalog_page_sk#52] Join condition: None (61) Project [codegen id : 19] -Output [7]: [cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#61] -Input [9]: [cs_catalog_page_sk#45, cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_sk#60, cp_catalog_page_id#61] +Output [7]: [cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_sk#52, cp_catalog_page_id#53] (62) ReusedExchange [Reuses operator id: 27] -Output [1]: [i_item_sk#63] +Output [1]: [i_item_sk#54] (63) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_item_sk#46] -Right keys [1]: [i_item_sk#63] +Left keys [1]: [cs_item_sk#40] +Right keys [1]: [i_item_sk#54] Join condition: None (64) Project [codegen id : 19] -Output [6]: [cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#61] -Input [8]: [cs_item_sk#46, cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#61, i_item_sk#63] +Output [6]: [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [8]: [cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53, i_item_sk#54] (65) ReusedExchange [Reuses operator id: 34] -Output [1]: [p_promo_sk#64] +Output [1]: [p_promo_sk#55] (66) BroadcastHashJoin [codegen id : 19] -Left keys [1]: [cs_promo_sk#47] -Right keys [1]: [p_promo_sk#64] +Left keys [1]: [cs_promo_sk#41] +Right keys [1]: [p_promo_sk#55] Join condition: None (67) Project [codegen id : 19] -Output [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#61] -Input [7]: [cs_promo_sk#47, cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#61, p_promo_sk#64] +Output [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Input [7]: [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53, p_promo_sk#55] (68) HashAggregate [codegen id : 19] -Input [5]: [cs_ext_sales_price#49, cs_net_profit#50, cr_return_amount#55, cr_net_loss#56, cp_catalog_page_id#61] -Keys [1]: [cp_catalog_page_id#61] -Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#49)), partial_sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [5]: [sum#65, sum#66, isEmpty#67, sum#68, isEmpty#69] -Results [6]: [cp_catalog_page_id#61, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74] +Input [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53] +Keys [1]: [cp_catalog_page_id#53] +Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#43)), partial_sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [5]: [sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60] +Results [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] (69) Exchange -Input [6]: [cp_catalog_page_id#61, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74] -Arguments: hashpartitioning(cp_catalog_page_id#61, 5), ENSURE_REQUIREMENTS, [id=#75] +Input [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Arguments: hashpartitioning(cp_catalog_page_id#53, 5), ENSURE_REQUIREMENTS, [plan_id=10] (70) HashAggregate [codegen id : 20] -Input [6]: [cp_catalog_page_id#61, sum#70, sum#71, isEmpty#72, sum#73, isEmpty#74] -Keys [1]: [cp_catalog_page_id#61] -Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#49)), sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#49))#76, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#78] -Results [5]: [catalog channel AS channel#79, concat(catalog_page, cp_catalog_page_id#61) AS id#80, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#49))#76,17,2) AS sales#81, sum(coalesce(cast(cr_return_amount#55 as decimal(12,2)), 0.00))#77 AS returns#82, sum(CheckOverflow((promote_precision(cast(cs_net_profit#50 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#56 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#78 AS profit#83] +Input [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65] +Keys [1]: [cp_catalog_page_id#53] +Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#43)), sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#43))#66, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67, sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#68] +Results [5]: [catalog channel AS channel#69, concat(catalog_page, cp_catalog_page_id#53) AS id#70, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#43))#66,17,2) AS sales#71, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67 AS returns#72, sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#68 AS profit#73] (71) Scan parquet default.web_sales -Output [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] +Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] Batched: true Location: InMemoryFileIndex [] -PartitionFilters: [isnotnull(ws_sold_date_sk#90), dynamicpruningexpression(ws_sold_date_sk#90 IN dynamicpruning#8)] +PartitionFilters: [isnotnull(ws_sold_date_sk#80), dynamicpruningexpression(ws_sold_date_sk#80 IN dynamicpruning#8)] PushedFilters: [IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)] ReadSchema: struct (72) ColumnarToRow [codegen id : 21] -Input [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] (73) Filter [codegen id : 21] -Input [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] -Condition : ((isnotnull(ws_web_site_sk#85) AND isnotnull(ws_item_sk#84)) AND isnotnull(ws_promo_sk#86)) +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Condition : ((isnotnull(ws_web_site_sk#75) AND isnotnull(ws_item_sk#74)) AND isnotnull(ws_promo_sk#76)) (74) Exchange -Input [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] -Arguments: hashpartitioning(ws_item_sk#84, ws_order_number#87, 5), ENSURE_REQUIREMENTS, [id=#91] +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Arguments: hashpartitioning(ws_item_sk#74, ws_order_number#77, 5), ENSURE_REQUIREMENTS, [plan_id=11] (75) Sort [codegen id : 22] -Input [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90] -Arguments: [ws_item_sk#84 ASC NULLS FIRST, ws_order_number#87 ASC NULLS FIRST], false, 0 +Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80] +Arguments: [ws_item_sk#74 ASC NULLS FIRST, ws_order_number#77 ASC NULLS FIRST], false, 0 (76) Scan parquet default.web_returns -Output [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Output [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] Batched: true Location [not included in comparison]/{warehouse_dir}/web_returns] PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)] ReadSchema: struct (77) ColumnarToRow [codegen id : 23] -Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] (78) Filter [codegen id : 23] -Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] -Condition : (isnotnull(wr_item_sk#92) AND isnotnull(wr_order_number#93)) +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] +Condition : (isnotnull(wr_item_sk#81) AND isnotnull(wr_order_number#82)) (79) Project [codegen id : 23] -Output [4]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95] -Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96] +Output [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85] (80) Exchange -Input [4]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95] -Arguments: hashpartitioning(wr_item_sk#92, wr_order_number#93, 5), ENSURE_REQUIREMENTS, [id=#97] +Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Arguments: hashpartitioning(wr_item_sk#81, wr_order_number#82, 5), ENSURE_REQUIREMENTS, [plan_id=12] (81) Sort [codegen id : 24] -Input [4]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95] -Arguments: [wr_item_sk#92 ASC NULLS FIRST, wr_order_number#93 ASC NULLS FIRST], false, 0 +Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] +Arguments: [wr_item_sk#81 ASC NULLS FIRST, wr_order_number#82 ASC NULLS FIRST], false, 0 (82) SortMergeJoin [codegen id : 29] -Left keys [2]: [ws_item_sk#84, ws_order_number#87] -Right keys [2]: [wr_item_sk#92, wr_order_number#93] +Left keys [2]: [ws_item_sk#74, ws_order_number#77] +Right keys [2]: [wr_item_sk#81, wr_order_number#82] Join condition: None (83) Project [codegen id : 29] -Output [8]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_return_amt#94, wr_net_loss#95] -Input [11]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_order_number#87, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95] +Output [8]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84] +Input [11]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84] (84) ReusedExchange [Reuses operator id: 125] -Output [1]: [d_date_sk#98] +Output [1]: [d_date_sk#86] (85) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_sold_date_sk#90] -Right keys [1]: [d_date_sk#98] +Left keys [1]: [ws_sold_date_sk#80] +Right keys [1]: [d_date_sk#86] Join condition: None (86) Project [codegen id : 29] -Output [7]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95] -Input [9]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, ws_sold_date_sk#90, wr_return_amt#94, wr_net_loss#95, d_date_sk#98] +Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84] +Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, d_date_sk#86] (87) Scan parquet default.web_site -Output [2]: [web_site_sk#99, web_site_id#100] +Output [2]: [web_site_sk#87, web_site_id#88] Batched: true Location [not included in comparison]/{warehouse_dir}/web_site] PushedFilters: [IsNotNull(web_site_sk)] ReadSchema: struct (88) ColumnarToRow [codegen id : 26] -Input [2]: [web_site_sk#99, web_site_id#100] +Input [2]: [web_site_sk#87, web_site_id#88] (89) Filter [codegen id : 26] -Input [2]: [web_site_sk#99, web_site_id#100] -Condition : isnotnull(web_site_sk#99) +Input [2]: [web_site_sk#87, web_site_id#88] +Condition : isnotnull(web_site_sk#87) (90) BroadcastExchange -Input [2]: [web_site_sk#99, web_site_id#100] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#101] +Input [2]: [web_site_sk#87, web_site_id#88] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=13] (91) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_web_site_sk#85] -Right keys [1]: [web_site_sk#99] +Left keys [1]: [ws_web_site_sk#75] +Right keys [1]: [web_site_sk#87] Join condition: None (92) Project [codegen id : 29] -Output [7]: [ws_item_sk#84, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#100] -Input [9]: [ws_item_sk#84, ws_web_site_sk#85, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_sk#99, web_site_id#100] +Output [7]: [ws_item_sk#74, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_sk#87, web_site_id#88] (93) ReusedExchange [Reuses operator id: 27] -Output [1]: [i_item_sk#102] +Output [1]: [i_item_sk#89] (94) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_item_sk#84] -Right keys [1]: [i_item_sk#102] +Left keys [1]: [ws_item_sk#74] +Right keys [1]: [i_item_sk#89] Join condition: None (95) Project [codegen id : 29] -Output [6]: [ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#100] -Input [8]: [ws_item_sk#84, ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#100, i_item_sk#102] +Output [6]: [ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [8]: [ws_item_sk#74, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88, i_item_sk#89] (96) ReusedExchange [Reuses operator id: 34] -Output [1]: [p_promo_sk#103] +Output [1]: [p_promo_sk#90] (97) BroadcastHashJoin [codegen id : 29] -Left keys [1]: [ws_promo_sk#86] -Right keys [1]: [p_promo_sk#103] +Left keys [1]: [ws_promo_sk#76] +Right keys [1]: [p_promo_sk#90] Join condition: None (98) Project [codegen id : 29] -Output [5]: [ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#100] -Input [7]: [ws_promo_sk#86, ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#100, p_promo_sk#103] +Output [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Input [7]: [ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88, p_promo_sk#90] (99) HashAggregate [codegen id : 29] -Input [5]: [ws_ext_sales_price#88, ws_net_profit#89, wr_return_amt#94, wr_net_loss#95, web_site_id#100] -Keys [1]: [web_site_id#100] -Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#88)), partial_sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [5]: [sum#104, sum#105, isEmpty#106, sum#107, isEmpty#108] -Results [6]: [web_site_id#100, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113] +Input [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88] +Keys [1]: [web_site_id#88] +Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#78)), partial_sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [5]: [sum#91, sum#92, isEmpty#93, sum#94, isEmpty#95] +Results [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] (100) Exchange -Input [6]: [web_site_id#100, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113] -Arguments: hashpartitioning(web_site_id#100, 5), ENSURE_REQUIREMENTS, [id=#114] +Input [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Arguments: hashpartitioning(web_site_id#88, 5), ENSURE_REQUIREMENTS, [plan_id=14] (101) HashAggregate [codegen id : 30] -Input [6]: [web_site_id#100, sum#109, sum#110, isEmpty#111, sum#112, isEmpty#113] -Keys [1]: [web_site_id#100] -Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#88)), sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] -Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#88))#115, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#117] -Results [5]: [web channel AS channel#118, concat(web_site, web_site_id#100) AS id#119, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#88))#115,17,2) AS sales#120, sum(coalesce(cast(wr_return_amt#94 as decimal(12,2)), 0.00))#116 AS returns#121, sum(CheckOverflow((promote_precision(cast(ws_net_profit#89 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#95 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#117 AS profit#122] +Input [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100] +Keys [1]: [web_site_id#88] +Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#78)), sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))] +Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#78))#101, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102, sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#103] +Results [5]: [web channel AS channel#104, concat(web_site, web_site_id#88) AS id#105, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#78))#101,17,2) AS sales#106, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102 AS returns#107, sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#103 AS profit#108] (102) Union (103) HashAggregate [codegen id : 31] -Input [5]: [channel#40, id#41, sales#42, returns#43, profit#44] -Keys [2]: [channel#40, id#41] -Functions [3]: [partial_sum(sales#42), partial_sum(returns#43), partial_sum(profit#44)] -Aggregate Attributes [6]: [sum#123, isEmpty#124, sum#125, isEmpty#126, sum#127, isEmpty#128] -Results [8]: [channel#40, id#41, sum#129, isEmpty#130, sum#131, isEmpty#132, sum#133, isEmpty#134] +Input [5]: [channel#34, id#35, sales#36, returns#37, profit#38] +Keys [2]: [channel#34, id#35] +Functions [3]: [partial_sum(sales#36), partial_sum(returns#37), partial_sum(profit#38)] +Aggregate Attributes [6]: [sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114] +Results [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] (104) Exchange -Input [8]: [channel#40, id#41, sum#129, isEmpty#130, sum#131, isEmpty#132, sum#133, isEmpty#134] -Arguments: hashpartitioning(channel#40, id#41, 5), ENSURE_REQUIREMENTS, [id=#135] +Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Arguments: hashpartitioning(channel#34, id#35, 5), ENSURE_REQUIREMENTS, [plan_id=15] (105) HashAggregate [codegen id : 32] -Input [8]: [channel#40, id#41, sum#129, isEmpty#130, sum#131, isEmpty#132, sum#133, isEmpty#134] -Keys [2]: [channel#40, id#41] -Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#44)] -Aggregate Attributes [3]: [sum(sales#42)#136, sum(returns#43)#137, sum(profit#44)#138] -Results [5]: [channel#40, id#41, cast(sum(sales#42)#136 as decimal(37,2)) AS sales#139, cast(sum(returns#43)#137 as decimal(38,2)) AS returns#140, cast(sum(profit#44)#138 as decimal(38,2)) AS profit#141] +Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#36), sum(returns#37), sum(profit#38)] +Aggregate Attributes [3]: [sum(sales#36)#121, sum(returns#37)#122, sum(profit#38)#123] +Results [5]: [channel#34, id#35, cast(sum(sales#36)#121 as decimal(37,2)) AS sales#124, cast(sum(returns#37)#122 as decimal(38,2)) AS returns#125, cast(sum(profit#38)#123 as decimal(38,2)) AS profit#126] (106) ReusedExchange [Reuses operator id: 104] -Output [8]: [channel#40, id#41, sum#129, isEmpty#130, sum#131, isEmpty#132, sum#133, isEmpty#134] +Output [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] (107) HashAggregate [codegen id : 64] -Input [8]: [channel#40, id#41, sum#129, isEmpty#130, sum#131, isEmpty#132, sum#133, isEmpty#134] -Keys [2]: [channel#40, id#41] -Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#44)] -Aggregate Attributes [3]: [sum(sales#42)#136, sum(returns#43)#137, sum(profit#44)#138] -Results [4]: [channel#40, sum(sales#42)#136 AS sales#142, sum(returns#43)#137 AS returns#143, sum(profit#44)#138 AS profit#144] +Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#36), sum(returns#37), sum(profit#38)] +Aggregate Attributes [3]: [sum(sales#36)#121, sum(returns#37)#122, sum(profit#38)#123] +Results [4]: [channel#34, sum(sales#36)#121 AS sales#127, sum(returns#37)#122 AS returns#128, sum(profit#38)#123 AS profit#129] (108) HashAggregate [codegen id : 64] -Input [4]: [channel#40, sales#142, returns#143, profit#144] -Keys [1]: [channel#40] -Functions [3]: [partial_sum(sales#142), partial_sum(returns#143), partial_sum(profit#144)] -Aggregate Attributes [6]: [sum#145, isEmpty#146, sum#147, isEmpty#148, sum#149, isEmpty#150] -Results [7]: [channel#40, sum#151, isEmpty#152, sum#153, isEmpty#154, sum#155, isEmpty#156] +Input [4]: [channel#34, sales#127, returns#128, profit#129] +Keys [1]: [channel#34] +Functions [3]: [partial_sum(sales#127), partial_sum(returns#128), partial_sum(profit#129)] +Aggregate Attributes [6]: [sum#130, isEmpty#131, sum#132, isEmpty#133, sum#134, isEmpty#135] +Results [7]: [channel#34, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] (109) Exchange -Input [7]: [channel#40, sum#151, isEmpty#152, sum#153, isEmpty#154, sum#155, isEmpty#156] -Arguments: hashpartitioning(channel#40, 5), ENSURE_REQUIREMENTS, [id=#157] +Input [7]: [channel#34, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] +Arguments: hashpartitioning(channel#34, 5), ENSURE_REQUIREMENTS, [plan_id=16] (110) HashAggregate [codegen id : 65] -Input [7]: [channel#40, sum#151, isEmpty#152, sum#153, isEmpty#154, sum#155, isEmpty#156] -Keys [1]: [channel#40] -Functions [3]: [sum(sales#142), sum(returns#143), sum(profit#144)] -Aggregate Attributes [3]: [sum(sales#142)#158, sum(returns#143)#159, sum(profit#144)#160] -Results [5]: [channel#40, null AS id#161, sum(sales#142)#158 AS sales#162, sum(returns#143)#159 AS returns#163, sum(profit#144)#160 AS profit#164] +Input [7]: [channel#34, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141] +Keys [1]: [channel#34] +Functions [3]: [sum(sales#127), sum(returns#128), sum(profit#129)] +Aggregate Attributes [3]: [sum(sales#127)#142, sum(returns#128)#143, sum(profit#129)#144] +Results [5]: [channel#34, null AS id#145, sum(sales#127)#142 AS sales#146, sum(returns#128)#143 AS returns#147, sum(profit#129)#144 AS profit#148] (111) ReusedExchange [Reuses operator id: 104] -Output [8]: [channel#40, id#41, sum#129, isEmpty#130, sum#131, isEmpty#132, sum#133, isEmpty#134] +Output [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] (112) HashAggregate [codegen id : 97] -Input [8]: [channel#40, id#41, sum#129, isEmpty#130, sum#131, isEmpty#132, sum#133, isEmpty#134] -Keys [2]: [channel#40, id#41] -Functions [3]: [sum(sales#42), sum(returns#43), sum(profit#44)] -Aggregate Attributes [3]: [sum(sales#42)#136, sum(returns#43)#137, sum(profit#44)#138] -Results [3]: [sum(sales#42)#136 AS sales#142, sum(returns#43)#137 AS returns#143, sum(profit#44)#138 AS profit#144] +Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120] +Keys [2]: [channel#34, id#35] +Functions [3]: [sum(sales#36), sum(returns#37), sum(profit#38)] +Aggregate Attributes [3]: [sum(sales#36)#121, sum(returns#37)#122, sum(profit#38)#123] +Results [3]: [sum(sales#36)#121 AS sales#127, sum(returns#37)#122 AS returns#128, sum(profit#38)#123 AS profit#129] (113) HashAggregate [codegen id : 97] -Input [3]: [sales#142, returns#143, profit#144] +Input [3]: [sales#127, returns#128, profit#129] Keys: [] -Functions [3]: [partial_sum(sales#142), partial_sum(returns#143), partial_sum(profit#144)] -Aggregate Attributes [6]: [sum#165, isEmpty#166, sum#167, isEmpty#168, sum#169, isEmpty#170] -Results [6]: [sum#171, isEmpty#172, sum#173, isEmpty#174, sum#175, isEmpty#176] +Functions [3]: [partial_sum(sales#127), partial_sum(returns#128), partial_sum(profit#129)] +Aggregate Attributes [6]: [sum#149, isEmpty#150, sum#151, isEmpty#152, sum#153, isEmpty#154] +Results [6]: [sum#155, isEmpty#156, sum#157, isEmpty#158, sum#159, isEmpty#160] (114) Exchange -Input [6]: [sum#171, isEmpty#172, sum#173, isEmpty#174, sum#175, isEmpty#176] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#177] +Input [6]: [sum#155, isEmpty#156, sum#157, isEmpty#158, sum#159, isEmpty#160] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=17] (115) HashAggregate [codegen id : 98] -Input [6]: [sum#171, isEmpty#172, sum#173, isEmpty#174, sum#175, isEmpty#176] +Input [6]: [sum#155, isEmpty#156, sum#157, isEmpty#158, sum#159, isEmpty#160] Keys: [] -Functions [3]: [sum(sales#142), sum(returns#143), sum(profit#144)] -Aggregate Attributes [3]: [sum(sales#142)#178, sum(returns#143)#179, sum(profit#144)#180] -Results [5]: [null AS channel#181, null AS id#182, sum(sales#142)#178 AS sales#183, sum(returns#143)#179 AS returns#184, sum(profit#144)#180 AS profit#185] +Functions [3]: [sum(sales#127), sum(returns#128), sum(profit#129)] +Aggregate Attributes [3]: [sum(sales#127)#161, sum(returns#128)#162, sum(profit#129)#163] +Results [5]: [null AS channel#164, null AS id#165, sum(sales#127)#161 AS sales#166, sum(returns#128)#162 AS returns#167, sum(profit#129)#163 AS profit#168] (116) Union (117) HashAggregate [codegen id : 99] -Input [5]: [channel#40, id#41, sales#139, returns#140, profit#141] -Keys [5]: [channel#40, id#41, sales#139, returns#140, profit#141] +Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Keys [5]: [channel#34, id#35, sales#124, returns#125, profit#126] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#40, id#41, sales#139, returns#140, profit#141] +Results [5]: [channel#34, id#35, sales#124, returns#125, profit#126] (118) Exchange -Input [5]: [channel#40, id#41, sales#139, returns#140, profit#141] -Arguments: hashpartitioning(channel#40, id#41, sales#139, returns#140, profit#141, 5), ENSURE_REQUIREMENTS, [id=#186] +Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Arguments: hashpartitioning(channel#34, id#35, sales#124, returns#125, profit#126, 5), ENSURE_REQUIREMENTS, [plan_id=18] (119) HashAggregate [codegen id : 100] -Input [5]: [channel#40, id#41, sales#139, returns#140, profit#141] -Keys [5]: [channel#40, id#41, sales#139, returns#140, profit#141] +Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Keys [5]: [channel#34, id#35, sales#124, returns#125, profit#126] Functions: [] Aggregate Attributes: [] -Results [5]: [channel#40, id#41, sales#139, returns#140, profit#141] +Results [5]: [channel#34, id#35, sales#124, returns#125, profit#126] (120) TakeOrderedAndProject -Input [5]: [channel#40, id#41, sales#139, returns#140, profit#141] -Arguments: 100, [channel#40 ASC NULLS FIRST, id#41 ASC NULLS FIRST], [channel#40, id#41, sales#139, returns#140, profit#141] +Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126] +Arguments: 100, [channel#34 ASC NULLS FIRST, id#35 ASC NULLS FIRST], [channel#34, id#35, sales#124, returns#125, profit#126] ===== Subqueries ===== @@ -687,29 +687,29 @@ BroadcastExchange (125) (121) Scan parquet default.date_dim -Output [2]: [d_date_sk#16, d_date#187] +Output [2]: [d_date_sk#14, d_date#169] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)] ReadSchema: struct (122) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#16, d_date#187] +Input [2]: [d_date_sk#14, d_date#169] (123) Filter [codegen id : 1] -Input [2]: [d_date_sk#16, d_date#187] -Condition : (((isnotnull(d_date#187) AND (d_date#187 >= 1998-08-04)) AND (d_date#187 <= 1998-09-03)) AND isnotnull(d_date_sk#16)) +Input [2]: [d_date_sk#14, d_date#169] +Condition : (((isnotnull(d_date#169) AND (d_date#169 >= 1998-08-04)) AND (d_date#169 <= 1998-09-03)) AND isnotnull(d_date_sk#14)) (124) Project [codegen id : 1] -Output [1]: [d_date_sk#16] -Input [2]: [d_date_sk#16, d_date#187] +Output [1]: [d_date_sk#14] +Input [2]: [d_date_sk#14, d_date#169] (125) BroadcastExchange -Input [1]: [d_date_sk#16] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#188] +Input [1]: [d_date_sk#14] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=19] -Subquery:2 Hosting operator id = 40 Hosting Expression = cs_sold_date_sk#51 IN dynamicpruning#8 +Subquery:2 Hosting operator id = 40 Hosting Expression = cs_sold_date_sk#45 IN dynamicpruning#8 -Subquery:3 Hosting operator id = 71 Hosting Expression = ws_sold_date_sk#90 IN dynamicpruning#8 +Subquery:3 Hosting operator id = 71 Hosting Expression = ws_sold_date_sk#80 IN dynamicpruning#8 diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt index 1dd3dc76a8f56..4301f4cd2b2d1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt @@ -78,7 +78,7 @@ Condition : isnotnull(i_item_sk#6) (10) BroadcastExchange Input [3]: [i_item_sk#6, i_class#7, i_category#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ws_item_sk#1] @@ -93,115 +93,115 @@ Input [5]: [ws_item_sk#1, ws_net_paid#2, i_item_sk#6, i_class#7, i_category#8] Input [3]: [ws_net_paid#2, i_class#7, i_category#8] Keys [2]: [i_category#8, i_class#7] Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#2))] -Aggregate Attributes [1]: [sum#10] -Results [3]: [i_category#8, i_class#7, sum#11] +Aggregate Attributes [1]: [sum#9] +Results [3]: [i_category#8, i_class#7, sum#10] (14) Exchange -Input [3]: [i_category#8, i_class#7, sum#11] -Arguments: hashpartitioning(i_category#8, i_class#7, 5), ENSURE_REQUIREMENTS, [id=#12] +Input [3]: [i_category#8, i_class#7, sum#10] +Arguments: hashpartitioning(i_category#8, i_class#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 4] -Input [3]: [i_category#8, i_class#7, sum#11] +Input [3]: [i_category#8, i_class#7, sum#10] Keys [2]: [i_category#8, i_class#7] Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#13] -Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#13,17,2) as decimal(27,2)) AS total_sum#14, i_category#8, i_class#7, 0 AS g_category#15, 0 AS g_class#16, 0 AS lochierarchy#17] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#11] +Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#11,17,2) as decimal(27,2)) AS total_sum#12, i_category#8, i_class#7, 0 AS g_category#13, 0 AS g_class#14, 0 AS lochierarchy#15] (16) ReusedExchange [Reuses operator id: 14] -Output [3]: [i_category#8, i_class#7, sum#18] +Output [3]: [i_category#8, i_class#7, sum#16] (17) HashAggregate [codegen id : 8] -Input [3]: [i_category#8, i_class#7, sum#18] +Input [3]: [i_category#8, i_class#7, sum#16] Keys [2]: [i_category#8, i_class#7] Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#13] -Results [2]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#13,17,2) AS total_sum#19, i_category#8] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#11] +Results [2]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#11,17,2) AS total_sum#17, i_category#8] (18) HashAggregate [codegen id : 8] -Input [2]: [total_sum#19, i_category#8] +Input [2]: [total_sum#17, i_category#8] Keys [1]: [i_category#8] -Functions [1]: [partial_sum(total_sum#19)] -Aggregate Attributes [2]: [sum#20, isEmpty#21] -Results [3]: [i_category#8, sum#22, isEmpty#23] +Functions [1]: [partial_sum(total_sum#17)] +Aggregate Attributes [2]: [sum#18, isEmpty#19] +Results [3]: [i_category#8, sum#20, isEmpty#21] (19) Exchange -Input [3]: [i_category#8, sum#22, isEmpty#23] -Arguments: hashpartitioning(i_category#8, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [3]: [i_category#8, sum#20, isEmpty#21] +Arguments: hashpartitioning(i_category#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 9] -Input [3]: [i_category#8, sum#22, isEmpty#23] +Input [3]: [i_category#8, sum#20, isEmpty#21] Keys [1]: [i_category#8] -Functions [1]: [sum(total_sum#19)] -Aggregate Attributes [1]: [sum(total_sum#19)#25] -Results [6]: [sum(total_sum#19)#25 AS total_sum#26, i_category#8, null AS i_class#27, 0 AS g_category#28, 1 AS g_class#29, 1 AS lochierarchy#30] +Functions [1]: [sum(total_sum#17)] +Aggregate Attributes [1]: [sum(total_sum#17)#22] +Results [6]: [sum(total_sum#17)#22 AS total_sum#23, i_category#8, null AS i_class#24, 0 AS g_category#25, 1 AS g_class#26, 1 AS lochierarchy#27] (21) ReusedExchange [Reuses operator id: 14] -Output [3]: [i_category#8, i_class#7, sum#31] +Output [3]: [i_category#8, i_class#7, sum#28] (22) HashAggregate [codegen id : 13] -Input [3]: [i_category#8, i_class#7, sum#31] +Input [3]: [i_category#8, i_class#7, sum#28] Keys [2]: [i_category#8, i_class#7] Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#13] -Results [1]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#13,17,2) AS total_sum#19] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#11] +Results [1]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#11,17,2) AS total_sum#17] (23) HashAggregate [codegen id : 13] -Input [1]: [total_sum#19] +Input [1]: [total_sum#17] Keys: [] -Functions [1]: [partial_sum(total_sum#19)] -Aggregate Attributes [2]: [sum#32, isEmpty#33] -Results [2]: [sum#34, isEmpty#35] +Functions [1]: [partial_sum(total_sum#17)] +Aggregate Attributes [2]: [sum#29, isEmpty#30] +Results [2]: [sum#31, isEmpty#32] (24) Exchange -Input [2]: [sum#34, isEmpty#35] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#36] +Input [2]: [sum#31, isEmpty#32] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (25) HashAggregate [codegen id : 14] -Input [2]: [sum#34, isEmpty#35] +Input [2]: [sum#31, isEmpty#32] Keys: [] -Functions [1]: [sum(total_sum#19)] -Aggregate Attributes [1]: [sum(total_sum#19)#37] -Results [6]: [sum(total_sum#19)#37 AS total_sum#38, null AS i_category#39, null AS i_class#40, 1 AS g_category#41, 1 AS g_class#42, 2 AS lochierarchy#43] +Functions [1]: [sum(total_sum#17)] +Aggregate Attributes [1]: [sum(total_sum#17)#33] +Results [6]: [sum(total_sum#17)#33 AS total_sum#34, null AS i_category#35, null AS i_class#36, 1 AS g_category#37, 1 AS g_class#38, 2 AS lochierarchy#39] (26) Union (27) HashAggregate [codegen id : 15] -Input [6]: [total_sum#14, i_category#8, i_class#7, g_category#15, g_class#16, lochierarchy#17] -Keys [6]: [total_sum#14, i_category#8, i_class#7, g_category#15, g_class#16, lochierarchy#17] +Input [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] +Keys [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] Functions: [] Aggregate Attributes: [] -Results [6]: [total_sum#14, i_category#8, i_class#7, g_category#15, g_class#16, lochierarchy#17] +Results [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] (28) Exchange -Input [6]: [total_sum#14, i_category#8, i_class#7, g_category#15, g_class#16, lochierarchy#17] -Arguments: hashpartitioning(total_sum#14, i_category#8, i_class#7, g_category#15, g_class#16, lochierarchy#17, 5), ENSURE_REQUIREMENTS, [id=#44] +Input [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] +Arguments: hashpartitioning(total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15, 5), ENSURE_REQUIREMENTS, [plan_id=5] (29) HashAggregate [codegen id : 16] -Input [6]: [total_sum#14, i_category#8, i_class#7, g_category#15, g_class#16, lochierarchy#17] -Keys [6]: [total_sum#14, i_category#8, i_class#7, g_category#15, g_class#16, lochierarchy#17] +Input [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] +Keys [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] Functions: [] Aggregate Attributes: [] -Results [5]: [total_sum#14, i_category#8, i_class#7, lochierarchy#17, CASE WHEN (g_class#16 = 0) THEN i_category#8 END AS _w0#45] +Results [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, CASE WHEN (g_class#14 = 0) THEN i_category#8 END AS _w0#40] (30) Exchange -Input [5]: [total_sum#14, i_category#8, i_class#7, lochierarchy#17, _w0#45] -Arguments: hashpartitioning(lochierarchy#17, _w0#45, 5), ENSURE_REQUIREMENTS, [id=#46] +Input [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, _w0#40] +Arguments: hashpartitioning(lochierarchy#15, _w0#40, 5), ENSURE_REQUIREMENTS, [plan_id=6] (31) Sort [codegen id : 17] -Input [5]: [total_sum#14, i_category#8, i_class#7, lochierarchy#17, _w0#45] -Arguments: [lochierarchy#17 ASC NULLS FIRST, _w0#45 ASC NULLS FIRST, total_sum#14 DESC NULLS LAST], false, 0 +Input [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, _w0#40] +Arguments: [lochierarchy#15 ASC NULLS FIRST, _w0#40 ASC NULLS FIRST, total_sum#12 DESC NULLS LAST], false, 0 (32) Window -Input [5]: [total_sum#14, i_category#8, i_class#7, lochierarchy#17, _w0#45] -Arguments: [rank(total_sum#14) windowspecdefinition(lochierarchy#17, _w0#45, total_sum#14 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#47], [lochierarchy#17, _w0#45], [total_sum#14 DESC NULLS LAST] +Input [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, _w0#40] +Arguments: [rank(total_sum#12) windowspecdefinition(lochierarchy#15, _w0#40, total_sum#12 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#41], [lochierarchy#15, _w0#40], [total_sum#12 DESC NULLS LAST] (33) Project [codegen id : 18] -Output [5]: [total_sum#14, i_category#8, i_class#7, lochierarchy#17, rank_within_parent#47] -Input [6]: [total_sum#14, i_category#8, i_class#7, lochierarchy#17, _w0#45, rank_within_parent#47] +Output [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, rank_within_parent#41] +Input [6]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, _w0#40, rank_within_parent#41] (34) TakeOrderedAndProject -Input [5]: [total_sum#14, i_category#8, i_class#7, lochierarchy#17, rank_within_parent#47] -Arguments: 100, [lochierarchy#17 DESC NULLS LAST, CASE WHEN (lochierarchy#17 = 0) THEN i_category#8 END ASC NULLS FIRST, rank_within_parent#47 ASC NULLS FIRST], [total_sum#14, i_category#8, i_class#7, lochierarchy#17, rank_within_parent#47] +Input [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, rank_within_parent#41] +Arguments: 100, [lochierarchy#15 DESC NULLS LAST, CASE WHEN (lochierarchy#15 = 0) THEN i_category#8 END ASC NULLS FIRST, rank_within_parent#41 ASC NULLS FIRST], [total_sum#12, i_category#8, i_class#7, lochierarchy#15, rank_within_parent#41] ===== Subqueries ===== @@ -214,25 +214,25 @@ BroadcastExchange (39) (35) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_month_seq#48] +Output [2]: [d_date_sk#5, d_month_seq#42] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] ReadSchema: struct (36) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#48] +Input [2]: [d_date_sk#5, d_month_seq#42] (37) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#48] -Condition : (((isnotnull(d_month_seq#48) AND (d_month_seq#48 >= 1212)) AND (d_month_seq#48 <= 1223)) AND isnotnull(d_date_sk#5)) +Input [2]: [d_date_sk#5, d_month_seq#42] +Condition : (((isnotnull(d_month_seq#42) AND (d_month_seq#42 >= 1212)) AND (d_month_seq#42 <= 1223)) AND isnotnull(d_date_sk#5)) (38) Project [codegen id : 1] Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_month_seq#48] +Input [2]: [d_date_sk#5, d_month_seq#42] (39) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#49] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt index 1dd3dc76a8f56..4301f4cd2b2d1 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt @@ -78,7 +78,7 @@ Condition : isnotnull(i_item_sk#6) (10) BroadcastExchange Input [3]: [i_item_sk#6, i_class#7, i_category#8] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ws_item_sk#1] @@ -93,115 +93,115 @@ Input [5]: [ws_item_sk#1, ws_net_paid#2, i_item_sk#6, i_class#7, i_category#8] Input [3]: [ws_net_paid#2, i_class#7, i_category#8] Keys [2]: [i_category#8, i_class#7] Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#2))] -Aggregate Attributes [1]: [sum#10] -Results [3]: [i_category#8, i_class#7, sum#11] +Aggregate Attributes [1]: [sum#9] +Results [3]: [i_category#8, i_class#7, sum#10] (14) Exchange -Input [3]: [i_category#8, i_class#7, sum#11] -Arguments: hashpartitioning(i_category#8, i_class#7, 5), ENSURE_REQUIREMENTS, [id=#12] +Input [3]: [i_category#8, i_class#7, sum#10] +Arguments: hashpartitioning(i_category#8, i_class#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 4] -Input [3]: [i_category#8, i_class#7, sum#11] +Input [3]: [i_category#8, i_class#7, sum#10] Keys [2]: [i_category#8, i_class#7] Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#13] -Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#13,17,2) as decimal(27,2)) AS total_sum#14, i_category#8, i_class#7, 0 AS g_category#15, 0 AS g_class#16, 0 AS lochierarchy#17] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#11] +Results [6]: [cast(MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#11,17,2) as decimal(27,2)) AS total_sum#12, i_category#8, i_class#7, 0 AS g_category#13, 0 AS g_class#14, 0 AS lochierarchy#15] (16) ReusedExchange [Reuses operator id: 14] -Output [3]: [i_category#8, i_class#7, sum#18] +Output [3]: [i_category#8, i_class#7, sum#16] (17) HashAggregate [codegen id : 8] -Input [3]: [i_category#8, i_class#7, sum#18] +Input [3]: [i_category#8, i_class#7, sum#16] Keys [2]: [i_category#8, i_class#7] Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#13] -Results [2]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#13,17,2) AS total_sum#19, i_category#8] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#11] +Results [2]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#11,17,2) AS total_sum#17, i_category#8] (18) HashAggregate [codegen id : 8] -Input [2]: [total_sum#19, i_category#8] +Input [2]: [total_sum#17, i_category#8] Keys [1]: [i_category#8] -Functions [1]: [partial_sum(total_sum#19)] -Aggregate Attributes [2]: [sum#20, isEmpty#21] -Results [3]: [i_category#8, sum#22, isEmpty#23] +Functions [1]: [partial_sum(total_sum#17)] +Aggregate Attributes [2]: [sum#18, isEmpty#19] +Results [3]: [i_category#8, sum#20, isEmpty#21] (19) Exchange -Input [3]: [i_category#8, sum#22, isEmpty#23] -Arguments: hashpartitioning(i_category#8, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [3]: [i_category#8, sum#20, isEmpty#21] +Arguments: hashpartitioning(i_category#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 9] -Input [3]: [i_category#8, sum#22, isEmpty#23] +Input [3]: [i_category#8, sum#20, isEmpty#21] Keys [1]: [i_category#8] -Functions [1]: [sum(total_sum#19)] -Aggregate Attributes [1]: [sum(total_sum#19)#25] -Results [6]: [sum(total_sum#19)#25 AS total_sum#26, i_category#8, null AS i_class#27, 0 AS g_category#28, 1 AS g_class#29, 1 AS lochierarchy#30] +Functions [1]: [sum(total_sum#17)] +Aggregate Attributes [1]: [sum(total_sum#17)#22] +Results [6]: [sum(total_sum#17)#22 AS total_sum#23, i_category#8, null AS i_class#24, 0 AS g_category#25, 1 AS g_class#26, 1 AS lochierarchy#27] (21) ReusedExchange [Reuses operator id: 14] -Output [3]: [i_category#8, i_class#7, sum#31] +Output [3]: [i_category#8, i_class#7, sum#28] (22) HashAggregate [codegen id : 13] -Input [3]: [i_category#8, i_class#7, sum#31] +Input [3]: [i_category#8, i_class#7, sum#28] Keys [2]: [i_category#8, i_class#7] Functions [1]: [sum(UnscaledValue(ws_net_paid#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#13] -Results [1]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#13,17,2) AS total_sum#19] +Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#11] +Results [1]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#11,17,2) AS total_sum#17] (23) HashAggregate [codegen id : 13] -Input [1]: [total_sum#19] +Input [1]: [total_sum#17] Keys: [] -Functions [1]: [partial_sum(total_sum#19)] -Aggregate Attributes [2]: [sum#32, isEmpty#33] -Results [2]: [sum#34, isEmpty#35] +Functions [1]: [partial_sum(total_sum#17)] +Aggregate Attributes [2]: [sum#29, isEmpty#30] +Results [2]: [sum#31, isEmpty#32] (24) Exchange -Input [2]: [sum#34, isEmpty#35] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#36] +Input [2]: [sum#31, isEmpty#32] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (25) HashAggregate [codegen id : 14] -Input [2]: [sum#34, isEmpty#35] +Input [2]: [sum#31, isEmpty#32] Keys: [] -Functions [1]: [sum(total_sum#19)] -Aggregate Attributes [1]: [sum(total_sum#19)#37] -Results [6]: [sum(total_sum#19)#37 AS total_sum#38, null AS i_category#39, null AS i_class#40, 1 AS g_category#41, 1 AS g_class#42, 2 AS lochierarchy#43] +Functions [1]: [sum(total_sum#17)] +Aggregate Attributes [1]: [sum(total_sum#17)#33] +Results [6]: [sum(total_sum#17)#33 AS total_sum#34, null AS i_category#35, null AS i_class#36, 1 AS g_category#37, 1 AS g_class#38, 2 AS lochierarchy#39] (26) Union (27) HashAggregate [codegen id : 15] -Input [6]: [total_sum#14, i_category#8, i_class#7, g_category#15, g_class#16, lochierarchy#17] -Keys [6]: [total_sum#14, i_category#8, i_class#7, g_category#15, g_class#16, lochierarchy#17] +Input [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] +Keys [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] Functions: [] Aggregate Attributes: [] -Results [6]: [total_sum#14, i_category#8, i_class#7, g_category#15, g_class#16, lochierarchy#17] +Results [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] (28) Exchange -Input [6]: [total_sum#14, i_category#8, i_class#7, g_category#15, g_class#16, lochierarchy#17] -Arguments: hashpartitioning(total_sum#14, i_category#8, i_class#7, g_category#15, g_class#16, lochierarchy#17, 5), ENSURE_REQUIREMENTS, [id=#44] +Input [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] +Arguments: hashpartitioning(total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15, 5), ENSURE_REQUIREMENTS, [plan_id=5] (29) HashAggregate [codegen id : 16] -Input [6]: [total_sum#14, i_category#8, i_class#7, g_category#15, g_class#16, lochierarchy#17] -Keys [6]: [total_sum#14, i_category#8, i_class#7, g_category#15, g_class#16, lochierarchy#17] +Input [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] +Keys [6]: [total_sum#12, i_category#8, i_class#7, g_category#13, g_class#14, lochierarchy#15] Functions: [] Aggregate Attributes: [] -Results [5]: [total_sum#14, i_category#8, i_class#7, lochierarchy#17, CASE WHEN (g_class#16 = 0) THEN i_category#8 END AS _w0#45] +Results [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, CASE WHEN (g_class#14 = 0) THEN i_category#8 END AS _w0#40] (30) Exchange -Input [5]: [total_sum#14, i_category#8, i_class#7, lochierarchy#17, _w0#45] -Arguments: hashpartitioning(lochierarchy#17, _w0#45, 5), ENSURE_REQUIREMENTS, [id=#46] +Input [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, _w0#40] +Arguments: hashpartitioning(lochierarchy#15, _w0#40, 5), ENSURE_REQUIREMENTS, [plan_id=6] (31) Sort [codegen id : 17] -Input [5]: [total_sum#14, i_category#8, i_class#7, lochierarchy#17, _w0#45] -Arguments: [lochierarchy#17 ASC NULLS FIRST, _w0#45 ASC NULLS FIRST, total_sum#14 DESC NULLS LAST], false, 0 +Input [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, _w0#40] +Arguments: [lochierarchy#15 ASC NULLS FIRST, _w0#40 ASC NULLS FIRST, total_sum#12 DESC NULLS LAST], false, 0 (32) Window -Input [5]: [total_sum#14, i_category#8, i_class#7, lochierarchy#17, _w0#45] -Arguments: [rank(total_sum#14) windowspecdefinition(lochierarchy#17, _w0#45, total_sum#14 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#47], [lochierarchy#17, _w0#45], [total_sum#14 DESC NULLS LAST] +Input [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, _w0#40] +Arguments: [rank(total_sum#12) windowspecdefinition(lochierarchy#15, _w0#40, total_sum#12 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#41], [lochierarchy#15, _w0#40], [total_sum#12 DESC NULLS LAST] (33) Project [codegen id : 18] -Output [5]: [total_sum#14, i_category#8, i_class#7, lochierarchy#17, rank_within_parent#47] -Input [6]: [total_sum#14, i_category#8, i_class#7, lochierarchy#17, _w0#45, rank_within_parent#47] +Output [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, rank_within_parent#41] +Input [6]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, _w0#40, rank_within_parent#41] (34) TakeOrderedAndProject -Input [5]: [total_sum#14, i_category#8, i_class#7, lochierarchy#17, rank_within_parent#47] -Arguments: 100, [lochierarchy#17 DESC NULLS LAST, CASE WHEN (lochierarchy#17 = 0) THEN i_category#8 END ASC NULLS FIRST, rank_within_parent#47 ASC NULLS FIRST], [total_sum#14, i_category#8, i_class#7, lochierarchy#17, rank_within_parent#47] +Input [5]: [total_sum#12, i_category#8, i_class#7, lochierarchy#15, rank_within_parent#41] +Arguments: 100, [lochierarchy#15 DESC NULLS LAST, CASE WHEN (lochierarchy#15 = 0) THEN i_category#8 END ASC NULLS FIRST, rank_within_parent#41 ASC NULLS FIRST], [total_sum#12, i_category#8, i_class#7, lochierarchy#15, rank_within_parent#41] ===== Subqueries ===== @@ -214,25 +214,25 @@ BroadcastExchange (39) (35) Scan parquet default.date_dim -Output [2]: [d_date_sk#5, d_month_seq#48] +Output [2]: [d_date_sk#5, d_month_seq#42] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_date_sk)] ReadSchema: struct (36) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#48] +Input [2]: [d_date_sk#5, d_month_seq#42] (37) Filter [codegen id : 1] -Input [2]: [d_date_sk#5, d_month_seq#48] -Condition : (((isnotnull(d_month_seq#48) AND (d_month_seq#48 >= 1212)) AND (d_month_seq#48 <= 1223)) AND isnotnull(d_date_sk#5)) +Input [2]: [d_date_sk#5, d_month_seq#42] +Condition : (((isnotnull(d_month_seq#42) AND (d_month_seq#42 >= 1212)) AND (d_month_seq#42 <= 1223)) AND isnotnull(d_date_sk#5)) (38) Project [codegen id : 1] Output [1]: [d_date_sk#5] -Input [2]: [d_date_sk#5, d_month_seq#48] +Input [2]: [d_date_sk#5, d_month_seq#42] (39) BroadcastExchange Input [1]: [d_date_sk#5] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#49] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/explain.txt index fd1c4b503eaa8..e49d55b5e12aa 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/explain.txt @@ -42,96 +42,96 @@ Condition : isnotnull(ss_item_sk#1) (4) Exchange Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] -Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#5] +Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1] (5) Sort [codegen id : 2] Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3] Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0 (6) Scan parquet default.item -Output [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] Batched: true Location [not included in comparison]/{warehouse_dir}/item] PushedFilters: [In(i_category, [Books ,Home ,Sports ]), IsNotNull(i_item_sk)] ReadSchema: struct (7) ColumnarToRow [codegen id : 3] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (8) Filter [codegen id : 3] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Condition : (i_category#11 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#6)) +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Condition : (i_category#10 IN (Sports ,Books ,Home ) AND isnotnull(i_item_sk#5)) (9) Exchange -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Arguments: hashpartitioning(i_item_sk#6, 5), ENSURE_REQUIREMENTS, [id=#12] +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Arguments: hashpartitioning(i_item_sk#5, 5), ENSURE_REQUIREMENTS, [plan_id=2] (10) Sort [codegen id : 4] -Input [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0 +Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Arguments: [i_item_sk#5 ASC NULLS FIRST], false, 0 (11) SortMergeJoin [codegen id : 6] Left keys [1]: [ss_item_sk#1] -Right keys [1]: [i_item_sk#6] +Right keys [1]: [i_item_sk#5] Join condition: None (12) Project [codegen id : 6] -Output [7]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Input [9]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] +Output [7]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Input [9]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (13) ReusedExchange [Reuses operator id: 29] -Output [1]: [d_date_sk#13] +Output [1]: [d_date_sk#11] (14) BroadcastHashJoin [codegen id : 6] Left keys [1]: [ss_sold_date_sk#3] -Right keys [1]: [d_date_sk#13] +Right keys [1]: [d_date_sk#11] Join condition: None (15) Project [codegen id : 6] -Output [6]: [ss_ext_sales_price#2, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11, d_date_sk#13] +Output [6]: [ss_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#11] (16) HashAggregate [codegen id : 6] -Input [6]: [ss_ext_sales_price#2, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11] -Keys [5]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9] +Input [6]: [ss_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] +Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#14] -Results [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] (17) Exchange -Input [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] -Arguments: hashpartitioning(i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) HashAggregate [codegen id : 7] -Input [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, sum#15] -Keys [5]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#17] -Results [8]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#17,17,2) AS itemrevenue#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#17,17,2) AS _w0#19, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#17,17,2) AS _w1#20] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14] +Results [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w1#17] (19) Exchange -Input [8]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20] -Arguments: hashpartitioning(i_class#10, 5), ENSURE_REQUIREMENTS, [id=#21] +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] +Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=4] (20) Sort [codegen id : 8] -Input [8]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20] -Arguments: [i_class#10 ASC NULLS FIRST], false, 0 +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] +Arguments: [i_class#9 ASC NULLS FIRST], false, 0 (21) Window -Input [8]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20] -Arguments: [sum(_w1#20) windowspecdefinition(i_class#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#22], [i_class#10] +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] +Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9] (22) Project [codegen id : 9] -Output [7]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#19) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#22)), DecimalType(38,17)) AS revenueratio#23] -Input [9]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, _w0#19, _w1#20, _we0#22] +Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19] +Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, _we0#18] (23) Exchange -Input [7]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, revenueratio#23] -Arguments: rangepartitioning(i_category#11 ASC NULLS FIRST, i_class#10 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#8 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] +Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=5] (24) Sort [codegen id : 10] -Input [7]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#18, revenueratio#23] -Arguments: [i_category#11 ASC NULLS FIRST, i_class#10 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#8 ASC NULLS FIRST, revenueratio#23 ASC NULLS FIRST], true, 0 +Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] +Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], true, 0 ===== Subqueries ===== @@ -144,25 +144,25 @@ BroadcastExchange (29) (25) Scan parquet default.date_dim -Output [2]: [d_date_sk#13, d_date#25] +Output [2]: [d_date_sk#11, d_date#20] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] ReadSchema: struct (26) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#13, d_date#25] +Input [2]: [d_date_sk#11, d_date#20] (27) Filter [codegen id : 1] -Input [2]: [d_date_sk#13, d_date#25] -Condition : (((isnotnull(d_date#25) AND (d_date#25 >= 1999-02-22)) AND (d_date#25 <= 1999-03-24)) AND isnotnull(d_date_sk#13)) +Input [2]: [d_date_sk#11, d_date#20] +Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11)) (28) Project [codegen id : 1] -Output [1]: [d_date_sk#13] -Input [2]: [d_date_sk#13, d_date#25] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#20] (29) BroadcastExchange -Input [1]: [d_date_sk#13] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#26] +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6] diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/explain.txt index 68e7dba19dbab..7d6f7604beb5a 100644 --- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/explain.txt +++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/explain.txt @@ -53,7 +53,7 @@ Condition : (i_category#10 IN (Sports (7) BroadcastExchange Input [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_item_sk#1] @@ -65,58 +65,58 @@ Output [7]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#6, i_item_desc#7 Input [9]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] (10) ReusedExchange [Reuses operator id: 26] -Output [1]: [d_date_sk#12] +Output [1]: [d_date_sk#11] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ss_sold_date_sk#3] -Right keys [1]: [d_date_sk#12] +Right keys [1]: [d_date_sk#11] Join condition: None (12) Project [codegen id : 3] Output [6]: [ss_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] -Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#12] +Input [8]: [ss_ext_sales_price#2, ss_sold_date_sk#3, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10, d_date_sk#11] (13) HashAggregate [codegen id : 3] Input [6]: [ss_ext_sales_price#2, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10] Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [partial_sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum#13] -Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] +Aggregate Attributes [1]: [sum#12] +Results [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] (14) Exchange -Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] -Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] +Arguments: hashpartitioning(i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] (15) HashAggregate [codegen id : 4] -Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#14] +Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, sum#13] Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8] Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))] -Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#16] -Results [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#16,17,2) AS itemrevenue#17, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#16,17,2) AS _w0#18, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#16,17,2) AS _w1#19] +Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14] +Results [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w1#17] (16) Exchange -Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19] -Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] +Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=3] (17) Sort [codegen id : 5] -Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19] +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] Arguments: [i_class#9 ASC NULLS FIRST], false, 0 (18) Window -Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19] -Arguments: [sum(_w1#19) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#21], [i_class#9] +Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17] +Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9] (19) Project [codegen id : 6] -Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#18) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#21)), DecimalType(38,17)) AS revenueratio#22] -Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, _w0#18, _w1#19, _we0#21] +Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19] +Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, _we0#18] (20) Exchange -Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22] -Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#22 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] +Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=4] (21) Sort [codegen id : 7] -Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#17, revenueratio#22] -Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#22 ASC NULLS FIRST], true, 0 +Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19] +Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], true, 0 ===== Subqueries ===== @@ -129,25 +129,25 @@ BroadcastExchange (26) (22) Scan parquet default.date_dim -Output [2]: [d_date_sk#12, d_date#24] +Output [2]: [d_date_sk#11, d_date#20] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)] ReadSchema: struct (23) ColumnarToRow [codegen id : 1] -Input [2]: [d_date_sk#12, d_date#24] +Input [2]: [d_date_sk#11, d_date#20] (24) Filter [codegen id : 1] -Input [2]: [d_date_sk#12, d_date#24] -Condition : (((isnotnull(d_date#24) AND (d_date#24 >= 1999-02-22)) AND (d_date#24 <= 1999-03-24)) AND isnotnull(d_date_sk#12)) +Input [2]: [d_date_sk#11, d_date#20] +Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11)) (25) Project [codegen id : 1] -Output [1]: [d_date_sk#12] -Input [2]: [d_date_sk#12, d_date#24] +Output [1]: [d_date_sk#11] +Input [2]: [d_date_sk#11, d_date#20] (26) BroadcastExchange -Input [1]: [d_date_sk#12] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#25] +Input [1]: [d_date_sk#11] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5] diff --git a/sql/core/src/test/resources/tpch-plan-stability/q1/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q1/explain.txt index c2fdfb24d2d85..85136accfbc2c 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q1/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q1/explain.txt @@ -37,20 +37,20 @@ Results [17]: [l_returnflag#5, l_linestatus#6, sum#23, isEmpty#24, sum#25, isEmp (6) Exchange Input [17]: [l_returnflag#5, l_linestatus#6, sum#23, isEmpty#24, sum#25, isEmpty#26, sum#27, isEmpty#28, sum#29, isEmpty#30, sum#31, count#32, sum#33, count#34, sum#35, count#36, count#37] -Arguments: hashpartitioning(l_returnflag#5, l_linestatus#6, 5), ENSURE_REQUIREMENTS, [id=#38] +Arguments: hashpartitioning(l_returnflag#5, l_linestatus#6, 5), ENSURE_REQUIREMENTS, [plan_id=1] (7) HashAggregate [codegen id : 2] Input [17]: [l_returnflag#5, l_linestatus#6, sum#23, isEmpty#24, sum#25, isEmpty#26, sum#27, isEmpty#28, sum#29, isEmpty#30, sum#31, count#32, sum#33, count#34, sum#35, count#36, count#37] Keys [2]: [l_returnflag#5, l_linestatus#6] Functions [8]: [sum(l_quantity#1), sum(l_extendedprice#2), sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))), sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0)) as decimal(22,0)))), DecimalType(34,0))), avg(UnscaledValue(l_quantity#1)), avg(UnscaledValue(l_extendedprice#2)), avg(UnscaledValue(l_discount#3)), count(1)] -Aggregate Attributes [8]: [sum(l_quantity#1)#39, sum(l_extendedprice#2)#40, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#41, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0)) as decimal(22,0)))), DecimalType(34,0)))#42, avg(UnscaledValue(l_quantity#1))#43, avg(UnscaledValue(l_extendedprice#2))#44, avg(UnscaledValue(l_discount#3))#45, count(1)#46] -Results [10]: [l_returnflag#5, l_linestatus#6, sum(l_quantity#1)#39 AS sum_qty#47, sum(l_extendedprice#2)#40 AS sum_base_price#48, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#41 AS sum_disc_price#49, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0)) as decimal(22,0)))), DecimalType(34,0)))#42 AS sum_charge#50, cast((avg(UnscaledValue(l_quantity#1))#43 / 1.0) as decimal(14,4)) AS avg_qty#51, cast((avg(UnscaledValue(l_extendedprice#2))#44 / 1.0) as decimal(14,4)) AS avg_price#52, cast((avg(UnscaledValue(l_discount#3))#45 / 1.0) as decimal(14,4)) AS avg_disc#53, count(1)#46 AS count_order#54] +Aggregate Attributes [8]: [sum(l_quantity#1)#38, sum(l_extendedprice#2)#39, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#40, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0)) as decimal(22,0)))), DecimalType(34,0)))#41, avg(UnscaledValue(l_quantity#1))#42, avg(UnscaledValue(l_extendedprice#2))#43, avg(UnscaledValue(l_discount#3))#44, count(1)#45] +Results [10]: [l_returnflag#5, l_linestatus#6, sum(l_quantity#1)#38 AS sum_qty#46, sum(l_extendedprice#2)#39 AS sum_base_price#47, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#40 AS sum_disc_price#48, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0)) as decimal(22,0)))), DecimalType(34,0)))#41 AS sum_charge#49, cast((avg(UnscaledValue(l_quantity#1))#42 / 1.0) as decimal(14,4)) AS avg_qty#50, cast((avg(UnscaledValue(l_extendedprice#2))#43 / 1.0) as decimal(14,4)) AS avg_price#51, cast((avg(UnscaledValue(l_discount#3))#44 / 1.0) as decimal(14,4)) AS avg_disc#52, count(1)#45 AS count_order#53] (8) Exchange -Input [10]: [l_returnflag#5, l_linestatus#6, sum_qty#47, sum_base_price#48, sum_disc_price#49, sum_charge#50, avg_qty#51, avg_price#52, avg_disc#53, count_order#54] -Arguments: rangepartitioning(l_returnflag#5 ASC NULLS FIRST, l_linestatus#6 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#55] +Input [10]: [l_returnflag#5, l_linestatus#6, sum_qty#46, sum_base_price#47, sum_disc_price#48, sum_charge#49, avg_qty#50, avg_price#51, avg_disc#52, count_order#53] +Arguments: rangepartitioning(l_returnflag#5 ASC NULLS FIRST, l_linestatus#6 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=2] (9) Sort [codegen id : 3] -Input [10]: [l_returnflag#5, l_linestatus#6, sum_qty#47, sum_base_price#48, sum_disc_price#49, sum_charge#50, avg_qty#51, avg_price#52, avg_disc#53, count_order#54] +Input [10]: [l_returnflag#5, l_linestatus#6, sum_qty#46, sum_base_price#47, sum_disc_price#48, sum_charge#49, avg_qty#50, avg_price#51, avg_disc#52, count_order#53] Arguments: [l_returnflag#5 ASC NULLS FIRST, l_linestatus#6 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpch-plan-stability/q10/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q10/explain.txt index 08be511944f36..d43b68c3fc34a 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q10/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q10/explain.txt @@ -62,7 +62,7 @@ Input [3]: [o_orderkey#8, o_custkey#9, o_orderdate#10] (8) BroadcastExchange Input [2]: [o_orderkey#8, o_custkey#9] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [id=#11] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 4] Left keys [1]: [c_custkey#1] @@ -74,82 +74,82 @@ Output [8]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acc Input [9]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7, o_orderkey#8, o_custkey#9] (11) Scan parquet default.lineitem -Output [4]: [l_orderkey#12, l_extendedprice#13, l_discount#14, l_returnflag#15] +Output [4]: [l_orderkey#11, l_extendedprice#12, l_discount#13, l_returnflag#14] Batched: true Location [not included in comparison]/{warehouse_dir}/lineitem] PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [l_orderkey#12, l_extendedprice#13, l_discount#14, l_returnflag#15] +Input [4]: [l_orderkey#11, l_extendedprice#12, l_discount#13, l_returnflag#14] (13) Filter [codegen id : 2] -Input [4]: [l_orderkey#12, l_extendedprice#13, l_discount#14, l_returnflag#15] -Condition : ((isnotnull(l_returnflag#15) AND (l_returnflag#15 = R)) AND isnotnull(l_orderkey#12)) +Input [4]: [l_orderkey#11, l_extendedprice#12, l_discount#13, l_returnflag#14] +Condition : ((isnotnull(l_returnflag#14) AND (l_returnflag#14 = R)) AND isnotnull(l_orderkey#11)) (14) Project [codegen id : 2] -Output [3]: [l_orderkey#12, l_extendedprice#13, l_discount#14] -Input [4]: [l_orderkey#12, l_extendedprice#13, l_discount#14, l_returnflag#15] +Output [3]: [l_orderkey#11, l_extendedprice#12, l_discount#13] +Input [4]: [l_orderkey#11, l_extendedprice#12, l_discount#13, l_returnflag#14] (15) BroadcastExchange -Input [3]: [l_orderkey#12, l_extendedprice#13, l_discount#14] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#16] +Input [3]: [l_orderkey#11, l_extendedprice#12, l_discount#13] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 4] Left keys [1]: [o_orderkey#8] -Right keys [1]: [l_orderkey#12] +Right keys [1]: [l_orderkey#11] Join condition: None (17) Project [codegen id : 4] -Output [9]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7, l_extendedprice#13, l_discount#14] -Input [11]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7, o_orderkey#8, l_orderkey#12, l_extendedprice#13, l_discount#14] +Output [9]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7, l_extendedprice#12, l_discount#13] +Input [11]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7, o_orderkey#8, l_orderkey#11, l_extendedprice#12, l_discount#13] (18) Scan parquet default.nation -Output [2]: [n_nationkey#17, n_name#18] +Output [2]: [n_nationkey#15, n_name#16] Batched: true Location [not included in comparison]/{warehouse_dir}/nation] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct (19) ColumnarToRow [codegen id : 3] -Input [2]: [n_nationkey#17, n_name#18] +Input [2]: [n_nationkey#15, n_name#16] (20) Filter [codegen id : 3] -Input [2]: [n_nationkey#17, n_name#18] -Condition : isnotnull(n_nationkey#17) +Input [2]: [n_nationkey#15, n_name#16] +Condition : isnotnull(n_nationkey#15) (21) BroadcastExchange -Input [2]: [n_nationkey#17, n_name#18] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#19] +Input [2]: [n_nationkey#15, n_name#16] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=3] (22) BroadcastHashJoin [codegen id : 4] Left keys [1]: [c_nationkey#4] -Right keys [1]: [n_nationkey#17] +Right keys [1]: [n_nationkey#15] Join condition: None (23) Project [codegen id : 4] -Output [9]: [c_custkey#1, c_name#2, c_address#3, c_phone#5, c_acctbal#6, c_comment#7, l_extendedprice#13, l_discount#14, n_name#18] -Input [11]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7, l_extendedprice#13, l_discount#14, n_nationkey#17, n_name#18] +Output [9]: [c_custkey#1, c_name#2, c_address#3, c_phone#5, c_acctbal#6, c_comment#7, l_extendedprice#12, l_discount#13, n_name#16] +Input [11]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7, l_extendedprice#12, l_discount#13, n_nationkey#15, n_name#16] (24) HashAggregate [codegen id : 4] -Input [9]: [c_custkey#1, c_name#2, c_address#3, c_phone#5, c_acctbal#6, c_comment#7, l_extendedprice#13, l_discount#14, n_name#18] -Keys [7]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#18, c_address#3, c_comment#7] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#13 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#14 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] -Aggregate Attributes [2]: [sum#20, isEmpty#21] -Results [9]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#18, c_address#3, c_comment#7, sum#22, isEmpty#23] +Input [9]: [c_custkey#1, c_name#2, c_address#3, c_phone#5, c_acctbal#6, c_comment#7, l_extendedprice#12, l_discount#13, n_name#16] +Keys [7]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#16, c_address#3, c_comment#7] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#12 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#13 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] +Aggregate Attributes [2]: [sum#17, isEmpty#18] +Results [9]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#16, c_address#3, c_comment#7, sum#19, isEmpty#20] (25) Exchange -Input [9]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#18, c_address#3, c_comment#7, sum#22, isEmpty#23] -Arguments: hashpartitioning(c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#18, c_address#3, c_comment#7, 5), ENSURE_REQUIREMENTS, [id=#24] +Input [9]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#16, c_address#3, c_comment#7, sum#19, isEmpty#20] +Arguments: hashpartitioning(c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#16, c_address#3, c_comment#7, 5), ENSURE_REQUIREMENTS, [plan_id=4] (26) HashAggregate [codegen id : 5] -Input [9]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#18, c_address#3, c_comment#7, sum#22, isEmpty#23] -Keys [7]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#18, c_address#3, c_comment#7] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#13 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#14 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#13 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#14 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#25] -Results [8]: [c_custkey#1, c_name#2, sum(CheckOverflow((promote_precision(cast(l_extendedprice#13 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#14 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#25 AS revenue#26, c_acctbal#6, n_name#18, c_address#3, c_phone#5, c_comment#7] +Input [9]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#16, c_address#3, c_comment#7, sum#19, isEmpty#20] +Keys [7]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#16, c_address#3, c_comment#7] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#12 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#13 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#12 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#13 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#21] +Results [8]: [c_custkey#1, c_name#2, sum(CheckOverflow((promote_precision(cast(l_extendedprice#12 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#13 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#21 AS revenue#22, c_acctbal#6, n_name#16, c_address#3, c_phone#5, c_comment#7] (27) TakeOrderedAndProject -Input [8]: [c_custkey#1, c_name#2, revenue#26, c_acctbal#6, n_name#18, c_address#3, c_phone#5, c_comment#7] -Arguments: 20, [revenue#26 DESC NULLS LAST], [c_custkey#1, c_name#2, revenue#26, c_acctbal#6, n_name#18, c_address#3, c_phone#5, c_comment#7] +Input [8]: [c_custkey#1, c_name#2, revenue#22, c_acctbal#6, n_name#16, c_address#3, c_phone#5, c_comment#7] +Arguments: 20, [revenue#22 DESC NULLS LAST], [c_custkey#1, c_name#2, revenue#22, c_acctbal#6, n_name#16, c_address#3, c_phone#5, c_comment#7] diff --git a/sql/core/src/test/resources/tpch-plan-stability/q11/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q11/explain.txt index bc7e629fd7dd8..d538953f141b8 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q11/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q11/explain.txt @@ -53,7 +53,7 @@ Condition : (isnotnull(s_suppkey#5) AND isnotnull(s_nationkey#6)) (7) BroadcastExchange Input [2]: [s_suppkey#5, s_nationkey#6] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ps_suppkey#2] @@ -65,69 +65,69 @@ Output [4]: [ps_partkey#1, ps_availqty#3, ps_supplycost#4, s_nationkey#6] Input [6]: [ps_partkey#1, ps_suppkey#2, ps_availqty#3, ps_supplycost#4, s_suppkey#5, s_nationkey#6] (10) Scan parquet default.nation -Output [2]: [n_nationkey#8, n_name#9] +Output [2]: [n_nationkey#7, n_name#8] Batched: true Location [not included in comparison]/{warehouse_dir}/nation] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [2]: [n_nationkey#8, n_name#9] +Input [2]: [n_nationkey#7, n_name#8] (12) Filter [codegen id : 2] -Input [2]: [n_nationkey#8, n_name#9] -Condition : ((isnotnull(n_name#9) AND (n_name#9 = GERMANY)) AND isnotnull(n_nationkey#8)) +Input [2]: [n_nationkey#7, n_name#8] +Condition : ((isnotnull(n_name#8) AND (n_name#8 = GERMANY)) AND isnotnull(n_nationkey#7)) (13) Project [codegen id : 2] -Output [1]: [n_nationkey#8] -Input [2]: [n_nationkey#8, n_name#9] +Output [1]: [n_nationkey#7] +Input [2]: [n_nationkey#7, n_name#8] (14) BroadcastExchange -Input [1]: [n_nationkey#8] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#10] +Input [1]: [n_nationkey#7] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 3] Left keys [1]: [s_nationkey#6] -Right keys [1]: [n_nationkey#8] +Right keys [1]: [n_nationkey#7] Join condition: None (16) Project [codegen id : 3] Output [3]: [ps_partkey#1, ps_availqty#3, ps_supplycost#4] -Input [5]: [ps_partkey#1, ps_availqty#3, ps_supplycost#4, s_nationkey#6, n_nationkey#8] +Input [5]: [ps_partkey#1, ps_availqty#3, ps_supplycost#4, s_nationkey#6, n_nationkey#7] (17) HashAggregate [codegen id : 3] Input [3]: [ps_partkey#1, ps_availqty#3, ps_supplycost#4] Keys [1]: [ps_partkey#1] Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0)))] -Aggregate Attributes [2]: [sum#11, isEmpty#12] -Results [3]: [ps_partkey#1, sum#13, isEmpty#14] +Aggregate Attributes [2]: [sum#9, isEmpty#10] +Results [3]: [ps_partkey#1, sum#11, isEmpty#12] (18) Exchange -Input [3]: [ps_partkey#1, sum#13, isEmpty#14] -Arguments: hashpartitioning(ps_partkey#1, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [3]: [ps_partkey#1, sum#11, isEmpty#12] +Arguments: hashpartitioning(ps_partkey#1, 5), ENSURE_REQUIREMENTS, [plan_id=3] (19) HashAggregate [codegen id : 4] -Input [3]: [ps_partkey#1, sum#13, isEmpty#14] +Input [3]: [ps_partkey#1, sum#11, isEmpty#12] Keys [1]: [ps_partkey#1] Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0)))#16] -Results [2]: [ps_partkey#1, sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0)))#16 AS value#17] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0)))#13] +Results [2]: [ps_partkey#1, sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0)))#13 AS value#14] (20) Filter [codegen id : 4] -Input [2]: [ps_partkey#1, value#17] -Condition : (isnotnull(value#17) AND (cast(value#17 as decimal(38,6)) > Subquery scalar-subquery#18, [id=#19])) +Input [2]: [ps_partkey#1, value#14] +Condition : (isnotnull(value#14) AND (cast(value#14 as decimal(38,6)) > Subquery scalar-subquery#15, [id=#16])) (21) Exchange -Input [2]: [ps_partkey#1, value#17] -Arguments: rangepartitioning(value#17 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [2]: [ps_partkey#1, value#14] +Arguments: rangepartitioning(value#14 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=4] (22) Sort [codegen id : 5] -Input [2]: [ps_partkey#1, value#17] -Arguments: [value#17 DESC NULLS LAST], true, 0 +Input [2]: [ps_partkey#1, value#14] +Arguments: [value#14 DESC NULLS LAST], true, 0 ===== Subqueries ===== -Subquery:1 Hosting operator id = 20 Hosting Expression = Subquery scalar-subquery#18, [id=#19] +Subquery:1 Hosting operator id = 20 Hosting Expression = Subquery scalar-subquery#15, [id=#16] * HashAggregate (34) +- Exchange (33) +- * HashAggregate (32) @@ -143,59 +143,59 @@ Subquery:1 Hosting operator id = 20 Hosting Expression = Subquery scalar-subquer (23) Scan parquet default.partsupp -Output [3]: [ps_suppkey#21, ps_availqty#22, ps_supplycost#23] +Output [3]: [ps_suppkey#17, ps_availqty#18, ps_supplycost#19] Batched: true Location [not included in comparison]/{warehouse_dir}/partsupp] PushedFilters: [IsNotNull(ps_suppkey)] ReadSchema: struct (24) ColumnarToRow [codegen id : 3] -Input [3]: [ps_suppkey#21, ps_availqty#22, ps_supplycost#23] +Input [3]: [ps_suppkey#17, ps_availqty#18, ps_supplycost#19] (25) Filter [codegen id : 3] -Input [3]: [ps_suppkey#21, ps_availqty#22, ps_supplycost#23] -Condition : isnotnull(ps_suppkey#21) +Input [3]: [ps_suppkey#17, ps_availqty#18, ps_supplycost#19] +Condition : isnotnull(ps_suppkey#17) (26) ReusedExchange [Reuses operator id: 7] -Output [2]: [s_suppkey#24, s_nationkey#25] +Output [2]: [s_suppkey#20, s_nationkey#21] (27) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [ps_suppkey#21] -Right keys [1]: [s_suppkey#24] +Left keys [1]: [ps_suppkey#17] +Right keys [1]: [s_suppkey#20] Join condition: None (28) Project [codegen id : 3] -Output [3]: [ps_availqty#22, ps_supplycost#23, s_nationkey#25] -Input [5]: [ps_suppkey#21, ps_availqty#22, ps_supplycost#23, s_suppkey#24, s_nationkey#25] +Output [3]: [ps_availqty#18, ps_supplycost#19, s_nationkey#21] +Input [5]: [ps_suppkey#17, ps_availqty#18, ps_supplycost#19, s_suppkey#20, s_nationkey#21] (29) ReusedExchange [Reuses operator id: 14] -Output [1]: [n_nationkey#26] +Output [1]: [n_nationkey#22] (30) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [s_nationkey#25] -Right keys [1]: [n_nationkey#26] +Left keys [1]: [s_nationkey#21] +Right keys [1]: [n_nationkey#22] Join condition: None (31) Project [codegen id : 3] -Output [2]: [ps_availqty#22, ps_supplycost#23] -Input [4]: [ps_availqty#22, ps_supplycost#23, s_nationkey#25, n_nationkey#26] +Output [2]: [ps_availqty#18, ps_supplycost#19] +Input [4]: [ps_availqty#18, ps_supplycost#19, s_nationkey#21, n_nationkey#22] (32) HashAggregate [codegen id : 3] -Input [2]: [ps_availqty#22, ps_supplycost#23] +Input [2]: [ps_availqty#18, ps_supplycost#19] Keys: [] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#23) * promote_precision(cast(ps_availqty#22 as decimal(10,0)))), DecimalType(21,0)))] -Aggregate Attributes [2]: [sum#27, isEmpty#28] -Results [2]: [sum#29, isEmpty#30] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#19) * promote_precision(cast(ps_availqty#18 as decimal(10,0)))), DecimalType(21,0)))] +Aggregate Attributes [2]: [sum#23, isEmpty#24] +Results [2]: [sum#25, isEmpty#26] (33) Exchange -Input [2]: [sum#29, isEmpty#30] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#31] +Input [2]: [sum#25, isEmpty#26] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] (34) HashAggregate [codegen id : 4] -Input [2]: [sum#29, isEmpty#30] +Input [2]: [sum#25, isEmpty#26] Keys: [] -Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#23) * promote_precision(cast(ps_availqty#22 as decimal(10,0)))), DecimalType(21,0)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#23) * promote_precision(cast(ps_availqty#22 as decimal(10,0)))), DecimalType(21,0)))#32] -Results [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#23) * promote_precision(cast(ps_availqty#22 as decimal(10,0)))), DecimalType(21,0)))#32 as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#33] +Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#19) * promote_precision(cast(ps_availqty#18 as decimal(10,0)))), DecimalType(21,0)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#19) * promote_precision(cast(ps_availqty#18 as decimal(10,0)))), DecimalType(21,0)))#27] +Results [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#19) * promote_precision(cast(ps_availqty#18 as decimal(10,0)))), DecimalType(21,0)))#27 as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#28] diff --git a/sql/core/src/test/resources/tpch-plan-stability/q12/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q12/explain.txt index 8d696607a569c..b966c6070e8ad 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q12/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q12/explain.txt @@ -50,7 +50,7 @@ Input [5]: [l_orderkey#3, l_shipdate#4, l_commitdate#5, l_receiptdate#6, l_shipm (8) BroadcastExchange Input [2]: [l_orderkey#3, l_shipmode#7] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 2] Left keys [1]: [o_orderkey#1] @@ -65,25 +65,25 @@ Input [4]: [o_orderkey#1, o_orderpriority#2, l_orderkey#3, l_shipmode#7] Input [2]: [o_orderpriority#2, l_shipmode#7] Keys [1]: [l_shipmode#7] Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#2 = 1-URGENT) OR (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#2 = 1-URGENT) AND NOT (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END)] -Aggregate Attributes [2]: [sum#9, sum#10] -Results [3]: [l_shipmode#7, sum#11, sum#12] +Aggregate Attributes [2]: [sum#8, sum#9] +Results [3]: [l_shipmode#7, sum#10, sum#11] (12) Exchange -Input [3]: [l_shipmode#7, sum#11, sum#12] -Arguments: hashpartitioning(l_shipmode#7, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [3]: [l_shipmode#7, sum#10, sum#11] +Arguments: hashpartitioning(l_shipmode#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (13) HashAggregate [codegen id : 3] -Input [3]: [l_shipmode#7, sum#11, sum#12] +Input [3]: [l_shipmode#7, sum#10, sum#11] Keys [1]: [l_shipmode#7] Functions [2]: [sum(CASE WHEN ((o_orderpriority#2 = 1-URGENT) OR (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#2 = 1-URGENT) AND NOT (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END)] -Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#2 = 1-URGENT) OR (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END)#14, sum(CASE WHEN (NOT (o_orderpriority#2 = 1-URGENT) AND NOT (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END)#15] -Results [3]: [l_shipmode#7, sum(CASE WHEN ((o_orderpriority#2 = 1-URGENT) OR (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END)#14 AS high_line_count#16, sum(CASE WHEN (NOT (o_orderpriority#2 = 1-URGENT) AND NOT (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END)#15 AS low_line_count#17] +Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#2 = 1-URGENT) OR (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END)#12, sum(CASE WHEN (NOT (o_orderpriority#2 = 1-URGENT) AND NOT (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END)#13] +Results [3]: [l_shipmode#7, sum(CASE WHEN ((o_orderpriority#2 = 1-URGENT) OR (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END)#12 AS high_line_count#14, sum(CASE WHEN (NOT (o_orderpriority#2 = 1-URGENT) AND NOT (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END)#13 AS low_line_count#15] (14) Exchange -Input [3]: [l_shipmode#7, high_line_count#16, low_line_count#17] -Arguments: rangepartitioning(l_shipmode#7 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#18] +Input [3]: [l_shipmode#7, high_line_count#14, low_line_count#15] +Arguments: rangepartitioning(l_shipmode#7 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=3] (15) Sort [codegen id : 4] -Input [3]: [l_shipmode#7, high_line_count#16, low_line_count#17] +Input [3]: [l_shipmode#7, high_line_count#14, low_line_count#15] Arguments: [l_shipmode#7 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpch-plan-stability/q13/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q13/explain.txt index ade70cd509a61..01aa3d3074d6f 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q13/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q13/explain.txt @@ -47,7 +47,7 @@ Input [3]: [o_orderkey#2, o_custkey#3, o_comment#4] (7) BroadcastExchange Input [2]: [o_orderkey#2, o_custkey#3] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [id=#5] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 2] Left keys [1]: [c_custkey#1] @@ -62,43 +62,43 @@ Input [3]: [c_custkey#1, o_orderkey#2, o_custkey#3] Input [2]: [c_custkey#1, o_orderkey#2] Keys [1]: [c_custkey#1] Functions [1]: [partial_count(o_orderkey#2)] -Aggregate Attributes [1]: [count#6] -Results [2]: [c_custkey#1, count#7] +Aggregate Attributes [1]: [count#5] +Results [2]: [c_custkey#1, count#6] (11) Exchange -Input [2]: [c_custkey#1, count#7] -Arguments: hashpartitioning(c_custkey#1, 5), ENSURE_REQUIREMENTS, [id=#8] +Input [2]: [c_custkey#1, count#6] +Arguments: hashpartitioning(c_custkey#1, 5), ENSURE_REQUIREMENTS, [plan_id=2] (12) HashAggregate [codegen id : 3] -Input [2]: [c_custkey#1, count#7] +Input [2]: [c_custkey#1, count#6] Keys [1]: [c_custkey#1] Functions [1]: [count(o_orderkey#2)] -Aggregate Attributes [1]: [count(o_orderkey#2)#9] -Results [1]: [count(o_orderkey#2)#9 AS c_count#10] +Aggregate Attributes [1]: [count(o_orderkey#2)#7] +Results [1]: [count(o_orderkey#2)#7 AS c_count#8] (13) HashAggregate [codegen id : 3] -Input [1]: [c_count#10] -Keys [1]: [c_count#10] +Input [1]: [c_count#8] +Keys [1]: [c_count#8] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#11] -Results [2]: [c_count#10, count#12] +Aggregate Attributes [1]: [count#9] +Results [2]: [c_count#8, count#10] (14) Exchange -Input [2]: [c_count#10, count#12] -Arguments: hashpartitioning(c_count#10, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [2]: [c_count#8, count#10] +Arguments: hashpartitioning(c_count#8, 5), ENSURE_REQUIREMENTS, [plan_id=3] (15) HashAggregate [codegen id : 4] -Input [2]: [c_count#10, count#12] -Keys [1]: [c_count#10] +Input [2]: [c_count#8, count#10] +Keys [1]: [c_count#8] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#14] -Results [2]: [c_count#10, count(1)#14 AS custdist#15] +Aggregate Attributes [1]: [count(1)#11] +Results [2]: [c_count#8, count(1)#11 AS custdist#12] (16) Exchange -Input [2]: [c_count#10, custdist#15] -Arguments: rangepartitioning(custdist#15 DESC NULLS LAST, c_count#10 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#16] +Input [2]: [c_count#8, custdist#12] +Arguments: rangepartitioning(custdist#12 DESC NULLS LAST, c_count#8 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=4] (17) Sort [codegen id : 5] -Input [2]: [c_count#10, custdist#15] -Arguments: [custdist#15 DESC NULLS LAST, c_count#10 DESC NULLS LAST], true, 0 +Input [2]: [c_count#8, custdist#12] +Arguments: [custdist#12 DESC NULLS LAST, c_count#8 DESC NULLS LAST], true, 0 diff --git a/sql/core/src/test/resources/tpch-plan-stability/q14/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q14/explain.txt index 0e923aebe1e11..7a0dd83d4e1a0 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q14/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q14/explain.txt @@ -48,7 +48,7 @@ Condition : isnotnull(p_partkey#5) (8) BroadcastExchange Input [2]: [p_partkey#5, p_type#6] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 2] Left keys [1]: [l_partkey#1] @@ -63,17 +63,17 @@ Input [5]: [l_partkey#1, l_extendedprice#2, l_discount#3, p_partkey#5, p_type#6] Input [3]: [l_extendedprice#2, l_discount#3, p_type#6] Keys: [] Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) ELSE 0 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] -Aggregate Attributes [4]: [sum#8, isEmpty#9, sum#10, isEmpty#11] -Results [4]: [sum#12, isEmpty#13, sum#14, isEmpty#15] +Aggregate Attributes [4]: [sum#7, isEmpty#8, sum#9, isEmpty#10] +Results [4]: [sum#11, isEmpty#12, sum#13, isEmpty#14] (12) Exchange -Input [4]: [sum#12, isEmpty#13, sum#14, isEmpty#15] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#16] +Input [4]: [sum#11, isEmpty#12, sum#13, isEmpty#14] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] (13) HashAggregate [codegen id : 3] -Input [4]: [sum#12, isEmpty#13, sum#14, isEmpty#15] +Input [4]: [sum#11, isEmpty#12, sum#13, isEmpty#14] Keys: [] Functions [2]: [sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) ELSE 0 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] -Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) ELSE 0 END)#17, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#18] -Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.00 * promote_precision(cast(sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) ELSE 0 END)#17 as decimal(34,2)))), DecimalType(38,2))) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#18 as decimal(38,2)))), DecimalType(38,6)) AS promo_revenue#19] +Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) ELSE 0 END)#15, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#16] +Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.00 * promote_precision(cast(sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) ELSE 0 END)#15 as decimal(34,2)))), DecimalType(38,2))) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#16 as decimal(38,2)))), DecimalType(38,6)) AS promo_revenue#17] diff --git a/sql/core/src/test/resources/tpch-plan-stability/q15/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q15/explain.txt index a615b73893782..dc6bfda4a7309 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q15/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q15/explain.txt @@ -58,43 +58,43 @@ Results [3]: [l_suppkey#5, sum#11, isEmpty#12] (9) Exchange Input [3]: [l_suppkey#5, sum#11, isEmpty#12] -Arguments: hashpartitioning(l_suppkey#5, 5), ENSURE_REQUIREMENTS, [id=#13] +Arguments: hashpartitioning(l_suppkey#5, 5), ENSURE_REQUIREMENTS, [plan_id=1] (10) HashAggregate [codegen id : 2] Input [3]: [l_suppkey#5, sum#11, isEmpty#12] Keys [1]: [l_suppkey#5] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#14] -Results [2]: [l_suppkey#5 AS supplier_no#15, sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#14 AS total_revenue#16] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#13] +Results [2]: [l_suppkey#5 AS supplier_no#14, sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#13 AS total_revenue#15] (11) Filter [codegen id : 2] -Input [2]: [supplier_no#15, total_revenue#16] -Condition : (isnotnull(total_revenue#16) AND (total_revenue#16 = Subquery scalar-subquery#17, [id=#18])) +Input [2]: [supplier_no#14, total_revenue#15] +Condition : (isnotnull(total_revenue#15) AND (total_revenue#15 = Subquery scalar-subquery#16, [id=#17])) (12) BroadcastExchange -Input [2]: [supplier_no#15, total_revenue#16] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#19] +Input [2]: [supplier_no#14, total_revenue#15] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=2] (13) BroadcastHashJoin [codegen id : 3] Left keys [1]: [s_suppkey#1] -Right keys [1]: [supplier_no#15] +Right keys [1]: [supplier_no#14] Join condition: None (14) Project [codegen id : 3] -Output [5]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4, total_revenue#16] -Input [6]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4, supplier_no#15, total_revenue#16] +Output [5]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4, total_revenue#15] +Input [6]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4, supplier_no#14, total_revenue#15] (15) Exchange -Input [5]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4, total_revenue#16] -Arguments: rangepartitioning(s_suppkey#1 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [5]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4, total_revenue#15] +Arguments: rangepartitioning(s_suppkey#1 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=3] (16) Sort [codegen id : 4] -Input [5]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4, total_revenue#16] +Input [5]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4, total_revenue#15] Arguments: [s_suppkey#1 ASC NULLS FIRST], true, 0 ===== Subqueries ===== -Subquery:1 Hosting operator id = 11 Hosting Expression = Subquery scalar-subquery#17, [id=#18] +Subquery:1 Hosting operator id = 11 Hosting Expression = Subquery scalar-subquery#16, [id=#17] * HashAggregate (26) +- Exchange (25) +- * HashAggregate (24) @@ -129,36 +129,36 @@ Input [4]: [l_suppkey#5, l_extendedprice#6, l_discount#7, l_shipdate#8] Input [3]: [l_suppkey#5, l_extendedprice#6, l_discount#7] Keys [1]: [l_suppkey#5] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] -Aggregate Attributes [2]: [sum#21, isEmpty#22] -Results [3]: [l_suppkey#5, sum#23, isEmpty#24] +Aggregate Attributes [2]: [sum#18, isEmpty#19] +Results [3]: [l_suppkey#5, sum#20, isEmpty#21] (22) Exchange -Input [3]: [l_suppkey#5, sum#23, isEmpty#24] -Arguments: hashpartitioning(l_suppkey#5, 5), ENSURE_REQUIREMENTS, [id=#25] +Input [3]: [l_suppkey#5, sum#20, isEmpty#21] +Arguments: hashpartitioning(l_suppkey#5, 5), ENSURE_REQUIREMENTS, [plan_id=4] (23) HashAggregate [codegen id : 2] -Input [3]: [l_suppkey#5, sum#23, isEmpty#24] +Input [3]: [l_suppkey#5, sum#20, isEmpty#21] Keys [1]: [l_suppkey#5] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#14] -Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#14 AS total_revenue#16] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#13] +Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#13 AS total_revenue#15] (24) HashAggregate [codegen id : 2] -Input [1]: [total_revenue#16] +Input [1]: [total_revenue#15] Keys: [] -Functions [1]: [partial_max(total_revenue#16)] -Aggregate Attributes [1]: [max#26] -Results [1]: [max#27] +Functions [1]: [partial_max(total_revenue#15)] +Aggregate Attributes [1]: [max#22] +Results [1]: [max#23] (25) Exchange -Input [1]: [max#27] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#28] +Input [1]: [max#23] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5] (26) HashAggregate [codegen id : 3] -Input [1]: [max#27] +Input [1]: [max#23] Keys: [] -Functions [1]: [max(total_revenue#16)] -Aggregate Attributes [1]: [max(total_revenue#16)#29] -Results [1]: [max(total_revenue#16)#29 AS max(total_revenue)#30] +Functions [1]: [max(total_revenue#15)] +Aggregate Attributes [1]: [max(total_revenue#15)#24] +Results [1]: [max(total_revenue#15)#24 AS max(total_revenue)#25] diff --git a/sql/core/src/test/resources/tpch-plan-stability/q16/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q16/explain.txt index 462da91904e53..f1a2383668b66 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q16/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q16/explain.txt @@ -58,7 +58,7 @@ Input [2]: [s_suppkey#3, s_comment#4] (8) BroadcastExchange Input [1]: [s_suppkey#3] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),true), [id=#5] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),true), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ps_suppkey#2] @@ -66,73 +66,73 @@ Right keys [1]: [s_suppkey#3] Join condition: None (10) Scan parquet default.part -Output [4]: [p_partkey#6, p_brand#7, p_type#8, p_size#9] +Output [4]: [p_partkey#5, p_brand#6, p_type#7, p_size#8] Batched: true Location [not included in comparison]/{warehouse_dir}/part] -PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#10)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] +PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#9)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [4]: [p_partkey#6, p_brand#7, p_type#8, p_size#9] +Input [4]: [p_partkey#5, p_brand#6, p_type#7, p_size#8] (12) Filter [codegen id : 2] -Input [4]: [p_partkey#6, p_brand#7, p_type#8, p_size#9] -Condition : (((((isnotnull(p_brand#7) AND isnotnull(p_type#8)) AND NOT (p_brand#7 = Brand#10)) AND NOT StartsWith(p_type#8, MEDIUM POLISHED)) AND p_size#9 IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#6)) +Input [4]: [p_partkey#5, p_brand#6, p_type#7, p_size#8] +Condition : (((((isnotnull(p_brand#6) AND isnotnull(p_type#7)) AND NOT (p_brand#6 = Brand#9)) AND NOT StartsWith(p_type#7, MEDIUM POLISHED)) AND p_size#8 IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#5)) (13) BroadcastExchange -Input [4]: [p_partkey#6, p_brand#7, p_type#8, p_size#9] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#11] +Input [4]: [p_partkey#5, p_brand#6, p_type#7, p_size#8] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 3] Left keys [1]: [ps_partkey#1] -Right keys [1]: [p_partkey#6] +Right keys [1]: [p_partkey#5] Join condition: None (15) Project [codegen id : 3] -Output [4]: [ps_suppkey#2, p_brand#7, p_type#8, p_size#9] -Input [6]: [ps_partkey#1, ps_suppkey#2, p_partkey#6, p_brand#7, p_type#8, p_size#9] +Output [4]: [ps_suppkey#2, p_brand#6, p_type#7, p_size#8] +Input [6]: [ps_partkey#1, ps_suppkey#2, p_partkey#5, p_brand#6, p_type#7, p_size#8] (16) HashAggregate [codegen id : 3] -Input [4]: [ps_suppkey#2, p_brand#7, p_type#8, p_size#9] -Keys [4]: [p_brand#7, p_type#8, p_size#9, ps_suppkey#2] +Input [4]: [ps_suppkey#2, p_brand#6, p_type#7, p_size#8] +Keys [4]: [p_brand#6, p_type#7, p_size#8, ps_suppkey#2] Functions: [] Aggregate Attributes: [] -Results [4]: [p_brand#7, p_type#8, p_size#9, ps_suppkey#2] +Results [4]: [p_brand#6, p_type#7, p_size#8, ps_suppkey#2] (17) Exchange -Input [4]: [p_brand#7, p_type#8, p_size#9, ps_suppkey#2] -Arguments: hashpartitioning(p_brand#7, p_type#8, p_size#9, ps_suppkey#2, 5), ENSURE_REQUIREMENTS, [id=#12] +Input [4]: [p_brand#6, p_type#7, p_size#8, ps_suppkey#2] +Arguments: hashpartitioning(p_brand#6, p_type#7, p_size#8, ps_suppkey#2, 5), ENSURE_REQUIREMENTS, [plan_id=3] (18) HashAggregate [codegen id : 4] -Input [4]: [p_brand#7, p_type#8, p_size#9, ps_suppkey#2] -Keys [4]: [p_brand#7, p_type#8, p_size#9, ps_suppkey#2] +Input [4]: [p_brand#6, p_type#7, p_size#8, ps_suppkey#2] +Keys [4]: [p_brand#6, p_type#7, p_size#8, ps_suppkey#2] Functions: [] Aggregate Attributes: [] -Results [4]: [p_brand#7, p_type#8, p_size#9, ps_suppkey#2] +Results [4]: [p_brand#6, p_type#7, p_size#8, ps_suppkey#2] (19) HashAggregate [codegen id : 4] -Input [4]: [p_brand#7, p_type#8, p_size#9, ps_suppkey#2] -Keys [3]: [p_brand#7, p_type#8, p_size#9] +Input [4]: [p_brand#6, p_type#7, p_size#8, ps_suppkey#2] +Keys [3]: [p_brand#6, p_type#7, p_size#8] Functions [1]: [partial_count(distinct ps_suppkey#2)] -Aggregate Attributes [1]: [count(ps_suppkey#2)#13] -Results [4]: [p_brand#7, p_type#8, p_size#9, count#14] +Aggregate Attributes [1]: [count(ps_suppkey#2)#10] +Results [4]: [p_brand#6, p_type#7, p_size#8, count#11] (20) Exchange -Input [4]: [p_brand#7, p_type#8, p_size#9, count#14] -Arguments: hashpartitioning(p_brand#7, p_type#8, p_size#9, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [4]: [p_brand#6, p_type#7, p_size#8, count#11] +Arguments: hashpartitioning(p_brand#6, p_type#7, p_size#8, 5), ENSURE_REQUIREMENTS, [plan_id=4] (21) HashAggregate [codegen id : 5] -Input [4]: [p_brand#7, p_type#8, p_size#9, count#14] -Keys [3]: [p_brand#7, p_type#8, p_size#9] +Input [4]: [p_brand#6, p_type#7, p_size#8, count#11] +Keys [3]: [p_brand#6, p_type#7, p_size#8] Functions [1]: [count(distinct ps_suppkey#2)] -Aggregate Attributes [1]: [count(ps_suppkey#2)#13] -Results [4]: [p_brand#7, p_type#8, p_size#9, count(ps_suppkey#2)#13 AS supplier_cnt#16] +Aggregate Attributes [1]: [count(ps_suppkey#2)#10] +Results [4]: [p_brand#6, p_type#7, p_size#8, count(ps_suppkey#2)#10 AS supplier_cnt#12] (22) Exchange -Input [4]: [p_brand#7, p_type#8, p_size#9, supplier_cnt#16] -Arguments: rangepartitioning(supplier_cnt#16 DESC NULLS LAST, p_brand#7 ASC NULLS FIRST, p_type#8 ASC NULLS FIRST, p_size#9 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#17] +Input [4]: [p_brand#6, p_type#7, p_size#8, supplier_cnt#12] +Arguments: rangepartitioning(supplier_cnt#12 DESC NULLS LAST, p_brand#6 ASC NULLS FIRST, p_type#7 ASC NULLS FIRST, p_size#8 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=5] (23) Sort [codegen id : 6] -Input [4]: [p_brand#7, p_type#8, p_size#9, supplier_cnt#16] -Arguments: [supplier_cnt#16 DESC NULLS LAST, p_brand#7 ASC NULLS FIRST, p_type#8 ASC NULLS FIRST, p_size#9 ASC NULLS FIRST], true, 0 +Input [4]: [p_brand#6, p_type#7, p_size#8, supplier_cnt#12] +Arguments: [supplier_cnt#12 DESC NULLS LAST, p_brand#6 ASC NULLS FIRST, p_type#7 ASC NULLS FIRST, p_size#8 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpch-plan-stability/q17/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q17/explain.txt index 652bf04238ca2..a3bf795610edf 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q17/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q17/explain.txt @@ -58,7 +58,7 @@ Input [3]: [p_partkey#4, p_brand#5, p_container#6] (8) BroadcastExchange Input [1]: [p_partkey#4] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 4] Left keys [1]: [l_partkey#1] @@ -70,69 +70,69 @@ Output [3]: [l_quantity#2, l_extendedprice#3, p_partkey#4] Input [4]: [l_partkey#1, l_quantity#2, l_extendedprice#3, p_partkey#4] (11) Scan parquet default.lineitem -Output [2]: [l_partkey#9, l_quantity#10] +Output [2]: [l_partkey#8, l_quantity#9] Batched: true Location [not included in comparison]/{warehouse_dir}/lineitem] PushedFilters: [IsNotNull(l_partkey)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [2]: [l_partkey#9, l_quantity#10] +Input [2]: [l_partkey#8, l_quantity#9] (13) Filter [codegen id : 2] -Input [2]: [l_partkey#9, l_quantity#10] -Condition : isnotnull(l_partkey#9) +Input [2]: [l_partkey#8, l_quantity#9] +Condition : isnotnull(l_partkey#8) (14) HashAggregate [codegen id : 2] -Input [2]: [l_partkey#9, l_quantity#10] -Keys [1]: [l_partkey#9] -Functions [1]: [partial_avg(UnscaledValue(l_quantity#10))] -Aggregate Attributes [2]: [sum#11, count#12] -Results [3]: [l_partkey#9, sum#13, count#14] +Input [2]: [l_partkey#8, l_quantity#9] +Keys [1]: [l_partkey#8] +Functions [1]: [partial_avg(UnscaledValue(l_quantity#9))] +Aggregate Attributes [2]: [sum#10, count#11] +Results [3]: [l_partkey#8, sum#12, count#13] (15) Exchange -Input [3]: [l_partkey#9, sum#13, count#14] -Arguments: hashpartitioning(l_partkey#9, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [3]: [l_partkey#8, sum#12, count#13] +Arguments: hashpartitioning(l_partkey#8, 5), ENSURE_REQUIREMENTS, [plan_id=2] (16) HashAggregate [codegen id : 3] -Input [3]: [l_partkey#9, sum#13, count#14] -Keys [1]: [l_partkey#9] -Functions [1]: [avg(UnscaledValue(l_quantity#10))] -Aggregate Attributes [1]: [avg(UnscaledValue(l_quantity#10))#16] -Results [2]: [CheckOverflow((0.2000 * promote_precision(cast((avg(UnscaledValue(l_quantity#10))#16 / 1.0) as decimal(14,4)))), DecimalType(16,5)) AS (0.2 * avg(l_quantity))#17, l_partkey#9] +Input [3]: [l_partkey#8, sum#12, count#13] +Keys [1]: [l_partkey#8] +Functions [1]: [avg(UnscaledValue(l_quantity#9))] +Aggregate Attributes [1]: [avg(UnscaledValue(l_quantity#9))#14] +Results [2]: [CheckOverflow((0.2000 * promote_precision(cast((avg(UnscaledValue(l_quantity#9))#14 / 1.0) as decimal(14,4)))), DecimalType(16,5)) AS (0.2 * avg(l_quantity))#15, l_partkey#8] (17) Filter [codegen id : 3] -Input [2]: [(0.2 * avg(l_quantity))#17, l_partkey#9] -Condition : isnotnull((0.2 * avg(l_quantity))#17) +Input [2]: [(0.2 * avg(l_quantity))#15, l_partkey#8] +Condition : isnotnull((0.2 * avg(l_quantity))#15) (18) BroadcastExchange -Input [2]: [(0.2 * avg(l_quantity))#17, l_partkey#9] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [id=#18] +Input [2]: [(0.2 * avg(l_quantity))#15, l_partkey#8] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [plan_id=3] (19) BroadcastHashJoin [codegen id : 4] Left keys [1]: [p_partkey#4] -Right keys [1]: [l_partkey#9] -Join condition: (cast(l_quantity#2 as decimal(16,5)) < (0.2 * avg(l_quantity))#17) +Right keys [1]: [l_partkey#8] +Join condition: (cast(l_quantity#2 as decimal(16,5)) < (0.2 * avg(l_quantity))#15) (20) Project [codegen id : 4] Output [1]: [l_extendedprice#3] -Input [5]: [l_quantity#2, l_extendedprice#3, p_partkey#4, (0.2 * avg(l_quantity))#17, l_partkey#9] +Input [5]: [l_quantity#2, l_extendedprice#3, p_partkey#4, (0.2 * avg(l_quantity))#15, l_partkey#8] (21) HashAggregate [codegen id : 4] Input [1]: [l_extendedprice#3] Keys: [] Functions [1]: [partial_sum(l_extendedprice#3)] -Aggregate Attributes [2]: [sum#19, isEmpty#20] -Results [2]: [sum#21, isEmpty#22] +Aggregate Attributes [2]: [sum#16, isEmpty#17] +Results [2]: [sum#18, isEmpty#19] (22) Exchange -Input [2]: [sum#21, isEmpty#22] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#23] +Input [2]: [sum#18, isEmpty#19] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (23) HashAggregate [codegen id : 5] -Input [2]: [sum#21, isEmpty#22] +Input [2]: [sum#18, isEmpty#19] Keys: [] Functions [1]: [sum(l_extendedprice#3)] -Aggregate Attributes [1]: [sum(l_extendedprice#3)#24] -Results [1]: [CheckOverflow((promote_precision(cast(sum(l_extendedprice#3)#24 as decimal(21,1))) / 7.0), DecimalType(27,6)) AS avg_yearly#25] +Aggregate Attributes [1]: [sum(l_extendedprice#3)#20] +Results [1]: [CheckOverflow((promote_precision(cast(sum(l_extendedprice#3)#20 as decimal(21,1))) / 7.0), DecimalType(27,6)) AS avg_yearly#21] diff --git a/sql/core/src/test/resources/tpch-plan-stability/q18/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q18/explain.txt index 6425b29a8f0ae..36ff27d8bd496 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q18/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q18/explain.txt @@ -77,26 +77,26 @@ Results [3]: [l_orderkey#7, sum#11, isEmpty#12] (10) Exchange Input [3]: [l_orderkey#7, sum#11, isEmpty#12] -Arguments: hashpartitioning(l_orderkey#7, 5), ENSURE_REQUIREMENTS, [id=#13] +Arguments: hashpartitioning(l_orderkey#7, 5), ENSURE_REQUIREMENTS, [plan_id=1] (11) HashAggregate [codegen id : 2] Input [3]: [l_orderkey#7, sum#11, isEmpty#12] Keys [1]: [l_orderkey#7] Functions [1]: [sum(l_quantity#8)] -Aggregate Attributes [1]: [sum(l_quantity#8)#14] -Results [2]: [l_orderkey#7, sum(l_quantity#8)#14 AS sum(l_quantity#15)#16] +Aggregate Attributes [1]: [sum(l_quantity#8)#13] +Results [2]: [l_orderkey#7, sum(l_quantity#8)#13 AS sum(l_quantity#14)#15] (12) Filter [codegen id : 2] -Input [2]: [l_orderkey#7, sum(l_quantity#15)#16] -Condition : (isnotnull(sum(l_quantity#15)#16) AND (sum(l_quantity#15)#16 > 300)) +Input [2]: [l_orderkey#7, sum(l_quantity#14)#15] +Condition : (isnotnull(sum(l_quantity#14)#15) AND (sum(l_quantity#14)#15 > 300)) (13) Project [codegen id : 2] Output [1]: [l_orderkey#7] -Input [2]: [l_orderkey#7, sum(l_quantity#15)#16] +Input [2]: [l_orderkey#7, sum(l_quantity#14)#15] (14) BroadcastExchange Input [1]: [l_orderkey#7] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#17] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 3] Left keys [1]: [o_orderkey#3] @@ -105,7 +105,7 @@ Join condition: None (16) BroadcastExchange Input [4]: [o_orderkey#3, o_custkey#4, o_totalprice#5, o_orderdate#6] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [id=#18] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [plan_id=3] (17) BroadcastHashJoin [codegen id : 7] Left keys [1]: [c_custkey#1] @@ -117,59 +117,59 @@ Output [5]: [c_custkey#1, c_name#2, o_orderkey#3, o_totalprice#5, o_orderdate#6] Input [6]: [c_custkey#1, c_name#2, o_orderkey#3, o_custkey#4, o_totalprice#5, o_orderdate#6] (19) Scan parquet default.lineitem -Output [2]: [l_orderkey#19, l_quantity#15] +Output [2]: [l_orderkey#16, l_quantity#14] Batched: true Location [not included in comparison]/{warehouse_dir}/lineitem] PushedFilters: [IsNotNull(l_orderkey)] ReadSchema: struct (20) ColumnarToRow [codegen id : 6] -Input [2]: [l_orderkey#19, l_quantity#15] +Input [2]: [l_orderkey#16, l_quantity#14] (21) Filter [codegen id : 6] -Input [2]: [l_orderkey#19, l_quantity#15] -Condition : isnotnull(l_orderkey#19) +Input [2]: [l_orderkey#16, l_quantity#14] +Condition : isnotnull(l_orderkey#16) (22) ReusedExchange [Reuses operator id: 14] Output [1]: [l_orderkey#7] (23) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [l_orderkey#19] +Left keys [1]: [l_orderkey#16] Right keys [1]: [l_orderkey#7] Join condition: None (24) BroadcastExchange -Input [2]: [l_orderkey#19, l_quantity#15] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#20] +Input [2]: [l_orderkey#16, l_quantity#14] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=4] (25) BroadcastHashJoin [codegen id : 7] Left keys [1]: [o_orderkey#3] -Right keys [1]: [l_orderkey#19] +Right keys [1]: [l_orderkey#16] Join condition: None (26) Project [codegen id : 7] -Output [6]: [c_custkey#1, c_name#2, o_orderkey#3, o_totalprice#5, o_orderdate#6, l_quantity#15] -Input [7]: [c_custkey#1, c_name#2, o_orderkey#3, o_totalprice#5, o_orderdate#6, l_orderkey#19, l_quantity#15] +Output [6]: [c_custkey#1, c_name#2, o_orderkey#3, o_totalprice#5, o_orderdate#6, l_quantity#14] +Input [7]: [c_custkey#1, c_name#2, o_orderkey#3, o_totalprice#5, o_orderdate#6, l_orderkey#16, l_quantity#14] (27) HashAggregate [codegen id : 7] -Input [6]: [c_custkey#1, c_name#2, o_orderkey#3, o_totalprice#5, o_orderdate#6, l_quantity#15] +Input [6]: [c_custkey#1, c_name#2, o_orderkey#3, o_totalprice#5, o_orderdate#6, l_quantity#14] Keys [5]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5] -Functions [1]: [partial_sum(l_quantity#15)] -Aggregate Attributes [2]: [sum#21, isEmpty#22] -Results [7]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum#23, isEmpty#24] +Functions [1]: [partial_sum(l_quantity#14)] +Aggregate Attributes [2]: [sum#17, isEmpty#18] +Results [7]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum#19, isEmpty#20] (28) Exchange -Input [7]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum#23, isEmpty#24] -Arguments: hashpartitioning(c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, 5), ENSURE_REQUIREMENTS, [id=#25] +Input [7]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum#19, isEmpty#20] +Arguments: hashpartitioning(c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, 5), ENSURE_REQUIREMENTS, [plan_id=5] (29) HashAggregate [codegen id : 8] -Input [7]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum#23, isEmpty#24] +Input [7]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum#19, isEmpty#20] Keys [5]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5] -Functions [1]: [sum(l_quantity#15)] -Aggregate Attributes [1]: [sum(l_quantity#15)#26] -Results [6]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum(l_quantity#15)#26 AS sum(l_quantity)#27] +Functions [1]: [sum(l_quantity#14)] +Aggregate Attributes [1]: [sum(l_quantity#14)#21] +Results [6]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum(l_quantity#14)#21 AS sum(l_quantity)#22] (30) TakeOrderedAndProject -Input [6]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum(l_quantity)#27] -Arguments: 100, [o_totalprice#5 DESC NULLS LAST, o_orderdate#6 ASC NULLS FIRST], [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum(l_quantity)#27] +Input [6]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum(l_quantity)#22] +Arguments: 100, [o_totalprice#5 DESC NULLS LAST, o_orderdate#6 ASC NULLS FIRST], [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum(l_quantity)#22] diff --git a/sql/core/src/test/resources/tpch-plan-stability/q19/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q19/explain.txt index b5d84e54efc7e..e0c279b6b1e86 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q19/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q19/explain.txt @@ -48,7 +48,7 @@ Condition : (((isnotnull(p_size#9) AND (p_size#9 >= 1)) AND isnotnull(p_partkey# (8) BroadcastExchange Input [4]: [p_partkey#7, p_brand#8, p_size#9, p_container#10] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#14] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 2] Left keys [1]: [l_partkey#1] @@ -63,17 +63,17 @@ Input [8]: [l_partkey#1, l_quantity#2, l_extendedprice#3, l_discount#4, p_partke Input [2]: [l_extendedprice#3, l_discount#4] Keys: [] Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] -Aggregate Attributes [2]: [sum#15, isEmpty#16] -Results [2]: [sum#17, isEmpty#18] +Aggregate Attributes [2]: [sum#14, isEmpty#15] +Results [2]: [sum#16, isEmpty#17] (12) Exchange -Input [2]: [sum#17, isEmpty#18] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#19] +Input [2]: [sum#16, isEmpty#17] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2] (13) HashAggregate [codegen id : 3] -Input [2]: [sum#17, isEmpty#18] +Input [2]: [sum#16, isEmpty#17] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#20] -Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#20 AS revenue#21] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#18] +Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#18 AS revenue#19] diff --git a/sql/core/src/test/resources/tpch-plan-stability/q2/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q2/explain.txt index 1a938a23f7d3f..da01853dbe487 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q2/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q2/explain.txt @@ -90,7 +90,7 @@ Condition : ((isnotnull(ps_partkey#5) AND isnotnull(ps_supplycost#7)) AND isnotn (8) BroadcastExchange Input [3]: [ps_partkey#5, ps_suppkey#6, ps_supplycost#7] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 10] Left keys [1]: [p_partkey#1] @@ -102,206 +102,206 @@ Output [4]: [p_partkey#1, p_mfgr#2, ps_suppkey#6, ps_supplycost#7] Input [5]: [p_partkey#1, p_mfgr#2, ps_partkey#5, ps_suppkey#6, ps_supplycost#7] (11) Scan parquet default.partsupp -Output [3]: [ps_partkey#9, ps_suppkey#10, ps_supplycost#11] +Output [3]: [ps_partkey#8, ps_suppkey#9, ps_supplycost#10] Batched: true Location [not included in comparison]/{warehouse_dir}/partsupp] PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] ReadSchema: struct (12) ColumnarToRow [codegen id : 5] -Input [3]: [ps_partkey#9, ps_suppkey#10, ps_supplycost#11] +Input [3]: [ps_partkey#8, ps_suppkey#9, ps_supplycost#10] (13) Filter [codegen id : 5] -Input [3]: [ps_partkey#9, ps_suppkey#10, ps_supplycost#11] -Condition : (isnotnull(ps_suppkey#10) AND isnotnull(ps_partkey#9)) +Input [3]: [ps_partkey#8, ps_suppkey#9, ps_supplycost#10] +Condition : (isnotnull(ps_suppkey#9) AND isnotnull(ps_partkey#8)) (14) Scan parquet default.supplier -Output [2]: [s_suppkey#12, s_nationkey#13] +Output [2]: [s_suppkey#11, s_nationkey#12] Batched: true Location [not included in comparison]/{warehouse_dir}/supplier] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct (15) ColumnarToRow [codegen id : 2] -Input [2]: [s_suppkey#12, s_nationkey#13] +Input [2]: [s_suppkey#11, s_nationkey#12] (16) Filter [codegen id : 2] -Input [2]: [s_suppkey#12, s_nationkey#13] -Condition : (isnotnull(s_suppkey#12) AND isnotnull(s_nationkey#13)) +Input [2]: [s_suppkey#11, s_nationkey#12] +Condition : (isnotnull(s_suppkey#11) AND isnotnull(s_nationkey#12)) (17) BroadcastExchange -Input [2]: [s_suppkey#12, s_nationkey#13] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#14] +Input [2]: [s_suppkey#11, s_nationkey#12] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=2] (18) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [ps_suppkey#10] -Right keys [1]: [s_suppkey#12] +Left keys [1]: [ps_suppkey#9] +Right keys [1]: [s_suppkey#11] Join condition: None (19) Project [codegen id : 5] -Output [3]: [ps_partkey#9, ps_supplycost#11, s_nationkey#13] -Input [5]: [ps_partkey#9, ps_suppkey#10, ps_supplycost#11, s_suppkey#12, s_nationkey#13] +Output [3]: [ps_partkey#8, ps_supplycost#10, s_nationkey#12] +Input [5]: [ps_partkey#8, ps_suppkey#9, ps_supplycost#10, s_suppkey#11, s_nationkey#12] (20) Scan parquet default.nation -Output [2]: [n_nationkey#15, n_regionkey#16] +Output [2]: [n_nationkey#13, n_regionkey#14] Batched: true Location [not included in comparison]/{warehouse_dir}/nation] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct (21) ColumnarToRow [codegen id : 3] -Input [2]: [n_nationkey#15, n_regionkey#16] +Input [2]: [n_nationkey#13, n_regionkey#14] (22) Filter [codegen id : 3] -Input [2]: [n_nationkey#15, n_regionkey#16] -Condition : (isnotnull(n_nationkey#15) AND isnotnull(n_regionkey#16)) +Input [2]: [n_nationkey#13, n_regionkey#14] +Condition : (isnotnull(n_nationkey#13) AND isnotnull(n_regionkey#14)) (23) BroadcastExchange -Input [2]: [n_nationkey#15, n_regionkey#16] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#17] +Input [2]: [n_nationkey#13, n_regionkey#14] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [s_nationkey#13] -Right keys [1]: [n_nationkey#15] +Left keys [1]: [s_nationkey#12] +Right keys [1]: [n_nationkey#13] Join condition: None (25) Project [codegen id : 5] -Output [3]: [ps_partkey#9, ps_supplycost#11, n_regionkey#16] -Input [5]: [ps_partkey#9, ps_supplycost#11, s_nationkey#13, n_nationkey#15, n_regionkey#16] +Output [3]: [ps_partkey#8, ps_supplycost#10, n_regionkey#14] +Input [5]: [ps_partkey#8, ps_supplycost#10, s_nationkey#12, n_nationkey#13, n_regionkey#14] (26) Scan parquet default.region -Output [2]: [r_regionkey#18, r_name#19] +Output [2]: [r_regionkey#15, r_name#16] Batched: true Location [not included in comparison]/{warehouse_dir}/region] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,EUROPE), IsNotNull(r_regionkey)] ReadSchema: struct (27) ColumnarToRow [codegen id : 4] -Input [2]: [r_regionkey#18, r_name#19] +Input [2]: [r_regionkey#15, r_name#16] (28) Filter [codegen id : 4] -Input [2]: [r_regionkey#18, r_name#19] -Condition : ((isnotnull(r_name#19) AND (r_name#19 = EUROPE)) AND isnotnull(r_regionkey#18)) +Input [2]: [r_regionkey#15, r_name#16] +Condition : ((isnotnull(r_name#16) AND (r_name#16 = EUROPE)) AND isnotnull(r_regionkey#15)) (29) Project [codegen id : 4] -Output [1]: [r_regionkey#18] -Input [2]: [r_regionkey#18, r_name#19] +Output [1]: [r_regionkey#15] +Input [2]: [r_regionkey#15, r_name#16] (30) BroadcastExchange -Input [1]: [r_regionkey#18] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#20] +Input [1]: [r_regionkey#15] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=4] (31) BroadcastHashJoin [codegen id : 5] -Left keys [1]: [n_regionkey#16] -Right keys [1]: [r_regionkey#18] +Left keys [1]: [n_regionkey#14] +Right keys [1]: [r_regionkey#15] Join condition: None (32) Project [codegen id : 5] -Output [2]: [ps_partkey#9, ps_supplycost#11] -Input [4]: [ps_partkey#9, ps_supplycost#11, n_regionkey#16, r_regionkey#18] +Output [2]: [ps_partkey#8, ps_supplycost#10] +Input [4]: [ps_partkey#8, ps_supplycost#10, n_regionkey#14, r_regionkey#15] (33) HashAggregate [codegen id : 5] -Input [2]: [ps_partkey#9, ps_supplycost#11] -Keys [1]: [ps_partkey#9] -Functions [1]: [partial_min(ps_supplycost#11)] -Aggregate Attributes [1]: [min#21] -Results [2]: [ps_partkey#9, min#22] +Input [2]: [ps_partkey#8, ps_supplycost#10] +Keys [1]: [ps_partkey#8] +Functions [1]: [partial_min(ps_supplycost#10)] +Aggregate Attributes [1]: [min#17] +Results [2]: [ps_partkey#8, min#18] (34) Exchange -Input [2]: [ps_partkey#9, min#22] -Arguments: hashpartitioning(ps_partkey#9, 5), ENSURE_REQUIREMENTS, [id=#23] +Input [2]: [ps_partkey#8, min#18] +Arguments: hashpartitioning(ps_partkey#8, 5), ENSURE_REQUIREMENTS, [plan_id=5] (35) HashAggregate [codegen id : 6] -Input [2]: [ps_partkey#9, min#22] -Keys [1]: [ps_partkey#9] -Functions [1]: [min(ps_supplycost#11)] -Aggregate Attributes [1]: [min(ps_supplycost#11)#24] -Results [2]: [min(ps_supplycost#11)#24 AS min(ps_supplycost)#25, ps_partkey#9] +Input [2]: [ps_partkey#8, min#18] +Keys [1]: [ps_partkey#8] +Functions [1]: [min(ps_supplycost#10)] +Aggregate Attributes [1]: [min(ps_supplycost#10)#19] +Results [2]: [min(ps_supplycost#10)#19 AS min(ps_supplycost)#20, ps_partkey#8] (36) Filter [codegen id : 6] -Input [2]: [min(ps_supplycost)#25, ps_partkey#9] -Condition : isnotnull(min(ps_supplycost)#25) +Input [2]: [min(ps_supplycost)#20, ps_partkey#8] +Condition : isnotnull(min(ps_supplycost)#20) (37) BroadcastExchange -Input [2]: [min(ps_supplycost)#25, ps_partkey#9] -Arguments: HashedRelationBroadcastMode(List(input[0, decimal(10,0), false], input[1, bigint, true]),false), [id=#26] +Input [2]: [min(ps_supplycost)#20, ps_partkey#8] +Arguments: HashedRelationBroadcastMode(List(input[0, decimal(10,0), false], input[1, bigint, true]),false), [plan_id=6] (38) BroadcastHashJoin [codegen id : 10] Left keys [2]: [ps_supplycost#7, p_partkey#1] -Right keys [2]: [min(ps_supplycost)#25, ps_partkey#9] +Right keys [2]: [min(ps_supplycost)#20, ps_partkey#8] Join condition: None (39) Project [codegen id : 10] Output [3]: [p_partkey#1, p_mfgr#2, ps_suppkey#6] -Input [6]: [p_partkey#1, p_mfgr#2, ps_suppkey#6, ps_supplycost#7, min(ps_supplycost)#25, ps_partkey#9] +Input [6]: [p_partkey#1, p_mfgr#2, ps_suppkey#6, ps_supplycost#7, min(ps_supplycost)#20, ps_partkey#8] (40) Scan parquet default.supplier -Output [7]: [s_suppkey#27, s_name#28, s_address#29, s_nationkey#30, s_phone#31, s_acctbal#32, s_comment#33] +Output [7]: [s_suppkey#21, s_name#22, s_address#23, s_nationkey#24, s_phone#25, s_acctbal#26, s_comment#27] Batched: true Location [not included in comparison]/{warehouse_dir}/supplier] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct (41) ColumnarToRow [codegen id : 7] -Input [7]: [s_suppkey#27, s_name#28, s_address#29, s_nationkey#30, s_phone#31, s_acctbal#32, s_comment#33] +Input [7]: [s_suppkey#21, s_name#22, s_address#23, s_nationkey#24, s_phone#25, s_acctbal#26, s_comment#27] (42) Filter [codegen id : 7] -Input [7]: [s_suppkey#27, s_name#28, s_address#29, s_nationkey#30, s_phone#31, s_acctbal#32, s_comment#33] -Condition : (isnotnull(s_suppkey#27) AND isnotnull(s_nationkey#30)) +Input [7]: [s_suppkey#21, s_name#22, s_address#23, s_nationkey#24, s_phone#25, s_acctbal#26, s_comment#27] +Condition : (isnotnull(s_suppkey#21) AND isnotnull(s_nationkey#24)) (43) BroadcastExchange -Input [7]: [s_suppkey#27, s_name#28, s_address#29, s_nationkey#30, s_phone#31, s_acctbal#32, s_comment#33] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#34] +Input [7]: [s_suppkey#21, s_name#22, s_address#23, s_nationkey#24, s_phone#25, s_acctbal#26, s_comment#27] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=7] (44) BroadcastHashJoin [codegen id : 10] Left keys [1]: [ps_suppkey#6] -Right keys [1]: [s_suppkey#27] +Right keys [1]: [s_suppkey#21] Join condition: None (45) Project [codegen id : 10] -Output [8]: [p_partkey#1, p_mfgr#2, s_name#28, s_address#29, s_nationkey#30, s_phone#31, s_acctbal#32, s_comment#33] -Input [10]: [p_partkey#1, p_mfgr#2, ps_suppkey#6, s_suppkey#27, s_name#28, s_address#29, s_nationkey#30, s_phone#31, s_acctbal#32, s_comment#33] +Output [8]: [p_partkey#1, p_mfgr#2, s_name#22, s_address#23, s_nationkey#24, s_phone#25, s_acctbal#26, s_comment#27] +Input [10]: [p_partkey#1, p_mfgr#2, ps_suppkey#6, s_suppkey#21, s_name#22, s_address#23, s_nationkey#24, s_phone#25, s_acctbal#26, s_comment#27] (46) Scan parquet default.nation -Output [3]: [n_nationkey#35, n_name#36, n_regionkey#37] +Output [3]: [n_nationkey#28, n_name#29, n_regionkey#30] Batched: true Location [not included in comparison]/{warehouse_dir}/nation] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct (47) ColumnarToRow [codegen id : 8] -Input [3]: [n_nationkey#35, n_name#36, n_regionkey#37] +Input [3]: [n_nationkey#28, n_name#29, n_regionkey#30] (48) Filter [codegen id : 8] -Input [3]: [n_nationkey#35, n_name#36, n_regionkey#37] -Condition : (isnotnull(n_nationkey#35) AND isnotnull(n_regionkey#37)) +Input [3]: [n_nationkey#28, n_name#29, n_regionkey#30] +Condition : (isnotnull(n_nationkey#28) AND isnotnull(n_regionkey#30)) (49) BroadcastExchange -Input [3]: [n_nationkey#35, n_name#36, n_regionkey#37] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#38] +Input [3]: [n_nationkey#28, n_name#29, n_regionkey#30] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=8] (50) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [s_nationkey#30] -Right keys [1]: [n_nationkey#35] +Left keys [1]: [s_nationkey#24] +Right keys [1]: [n_nationkey#28] Join condition: None (51) Project [codegen id : 10] -Output [9]: [p_partkey#1, p_mfgr#2, s_name#28, s_address#29, s_phone#31, s_acctbal#32, s_comment#33, n_name#36, n_regionkey#37] -Input [11]: [p_partkey#1, p_mfgr#2, s_name#28, s_address#29, s_nationkey#30, s_phone#31, s_acctbal#32, s_comment#33, n_nationkey#35, n_name#36, n_regionkey#37] +Output [9]: [p_partkey#1, p_mfgr#2, s_name#22, s_address#23, s_phone#25, s_acctbal#26, s_comment#27, n_name#29, n_regionkey#30] +Input [11]: [p_partkey#1, p_mfgr#2, s_name#22, s_address#23, s_nationkey#24, s_phone#25, s_acctbal#26, s_comment#27, n_nationkey#28, n_name#29, n_regionkey#30] (52) ReusedExchange [Reuses operator id: 30] -Output [1]: [r_regionkey#39] +Output [1]: [r_regionkey#31] (53) BroadcastHashJoin [codegen id : 10] -Left keys [1]: [n_regionkey#37] -Right keys [1]: [r_regionkey#39] +Left keys [1]: [n_regionkey#30] +Right keys [1]: [r_regionkey#31] Join condition: None (54) Project [codegen id : 10] -Output [8]: [s_acctbal#32, s_name#28, n_name#36, p_partkey#1, p_mfgr#2, s_address#29, s_phone#31, s_comment#33] -Input [10]: [p_partkey#1, p_mfgr#2, s_name#28, s_address#29, s_phone#31, s_acctbal#32, s_comment#33, n_name#36, n_regionkey#37, r_regionkey#39] +Output [8]: [s_acctbal#26, s_name#22, n_name#29, p_partkey#1, p_mfgr#2, s_address#23, s_phone#25, s_comment#27] +Input [10]: [p_partkey#1, p_mfgr#2, s_name#22, s_address#23, s_phone#25, s_acctbal#26, s_comment#27, n_name#29, n_regionkey#30, r_regionkey#31] (55) TakeOrderedAndProject -Input [8]: [s_acctbal#32, s_name#28, n_name#36, p_partkey#1, p_mfgr#2, s_address#29, s_phone#31, s_comment#33] -Arguments: 100, [s_acctbal#32 DESC NULLS LAST, n_name#36 ASC NULLS FIRST, s_name#28 ASC NULLS FIRST, p_partkey#1 ASC NULLS FIRST], [s_acctbal#32, s_name#28, n_name#36, p_partkey#1, p_mfgr#2, s_address#29, s_phone#31, s_comment#33] +Input [8]: [s_acctbal#26, s_name#22, n_name#29, p_partkey#1, p_mfgr#2, s_address#23, s_phone#25, s_comment#27] +Arguments: 100, [s_acctbal#26 DESC NULLS LAST, n_name#29 ASC NULLS FIRST, s_name#22 ASC NULLS FIRST, p_partkey#1 ASC NULLS FIRST], [s_acctbal#26, s_name#22, n_name#29, p_partkey#1, p_mfgr#2, s_address#23, s_phone#25, s_comment#27] diff --git a/sql/core/src/test/resources/tpch-plan-stability/q20/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q20/explain.txt index 43d5431a70f2e..45ff5e67dacb9 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q20/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q20/explain.txt @@ -86,7 +86,7 @@ Input [2]: [p_partkey#8, p_name#9] (11) BroadcastExchange Input [1]: [p_partkey#8] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=1] (12) BroadcastHashJoin [codegen id : 5] Left keys [1]: [ps_partkey#5] @@ -94,69 +94,69 @@ Right keys [1]: [p_partkey#8] Join condition: None (13) Scan parquet default.lineitem -Output [4]: [l_partkey#11, l_suppkey#12, l_quantity#13, l_shipdate#14] +Output [4]: [l_partkey#10, l_suppkey#11, l_quantity#12, l_shipdate#13] Batched: true Location [not included in comparison]/{warehouse_dir}/lineitem] PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)] ReadSchema: struct (14) ColumnarToRow [codegen id : 3] -Input [4]: [l_partkey#11, l_suppkey#12, l_quantity#13, l_shipdate#14] +Input [4]: [l_partkey#10, l_suppkey#11, l_quantity#12, l_shipdate#13] (15) Filter [codegen id : 3] -Input [4]: [l_partkey#11, l_suppkey#12, l_quantity#13, l_shipdate#14] -Condition : ((((isnotnull(l_shipdate#14) AND (l_shipdate#14 >= 1994-01-01)) AND (l_shipdate#14 < 1995-01-01)) AND isnotnull(l_partkey#11)) AND isnotnull(l_suppkey#12)) +Input [4]: [l_partkey#10, l_suppkey#11, l_quantity#12, l_shipdate#13] +Condition : ((((isnotnull(l_shipdate#13) AND (l_shipdate#13 >= 1994-01-01)) AND (l_shipdate#13 < 1995-01-01)) AND isnotnull(l_partkey#10)) AND isnotnull(l_suppkey#11)) (16) Project [codegen id : 3] -Output [3]: [l_partkey#11, l_suppkey#12, l_quantity#13] -Input [4]: [l_partkey#11, l_suppkey#12, l_quantity#13, l_shipdate#14] +Output [3]: [l_partkey#10, l_suppkey#11, l_quantity#12] +Input [4]: [l_partkey#10, l_suppkey#11, l_quantity#12, l_shipdate#13] (17) ReusedExchange [Reuses operator id: 11] Output [1]: [p_partkey#8] (18) BroadcastHashJoin [codegen id : 3] -Left keys [1]: [l_partkey#11] +Left keys [1]: [l_partkey#10] Right keys [1]: [p_partkey#8] Join condition: None (19) HashAggregate [codegen id : 3] -Input [3]: [l_partkey#11, l_suppkey#12, l_quantity#13] -Keys [2]: [l_partkey#11, l_suppkey#12] -Functions [1]: [partial_sum(l_quantity#13)] -Aggregate Attributes [2]: [sum#15, isEmpty#16] -Results [4]: [l_partkey#11, l_suppkey#12, sum#17, isEmpty#18] +Input [3]: [l_partkey#10, l_suppkey#11, l_quantity#12] +Keys [2]: [l_partkey#10, l_suppkey#11] +Functions [1]: [partial_sum(l_quantity#12)] +Aggregate Attributes [2]: [sum#14, isEmpty#15] +Results [4]: [l_partkey#10, l_suppkey#11, sum#16, isEmpty#17] (20) Exchange -Input [4]: [l_partkey#11, l_suppkey#12, sum#17, isEmpty#18] -Arguments: hashpartitioning(l_partkey#11, l_suppkey#12, 5), ENSURE_REQUIREMENTS, [id=#19] +Input [4]: [l_partkey#10, l_suppkey#11, sum#16, isEmpty#17] +Arguments: hashpartitioning(l_partkey#10, l_suppkey#11, 5), ENSURE_REQUIREMENTS, [plan_id=2] (21) HashAggregate [codegen id : 4] -Input [4]: [l_partkey#11, l_suppkey#12, sum#17, isEmpty#18] -Keys [2]: [l_partkey#11, l_suppkey#12] -Functions [1]: [sum(l_quantity#13)] -Aggregate Attributes [1]: [sum(l_quantity#13)#20] -Results [3]: [CheckOverflow((0.5 * promote_precision(cast(sum(l_quantity#13)#20 as decimal(21,1)))), DecimalType(22,1)) AS (0.5 * sum(l_quantity))#21, l_partkey#11, l_suppkey#12] +Input [4]: [l_partkey#10, l_suppkey#11, sum#16, isEmpty#17] +Keys [2]: [l_partkey#10, l_suppkey#11] +Functions [1]: [sum(l_quantity#12)] +Aggregate Attributes [1]: [sum(l_quantity#12)#18] +Results [3]: [CheckOverflow((0.5 * promote_precision(cast(sum(l_quantity#12)#18 as decimal(21,1)))), DecimalType(22,1)) AS (0.5 * sum(l_quantity))#19, l_partkey#10, l_suppkey#11] (22) Filter [codegen id : 4] -Input [3]: [(0.5 * sum(l_quantity))#21, l_partkey#11, l_suppkey#12] -Condition : isnotnull((0.5 * sum(l_quantity))#21) +Input [3]: [(0.5 * sum(l_quantity))#19, l_partkey#10, l_suppkey#11] +Condition : isnotnull((0.5 * sum(l_quantity))#19) (23) BroadcastExchange -Input [3]: [(0.5 * sum(l_quantity))#21, l_partkey#11, l_suppkey#12] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true], input[2, bigint, true]),false), [id=#22] +Input [3]: [(0.5 * sum(l_quantity))#19, l_partkey#10, l_suppkey#11] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true], input[2, bigint, true]),false), [plan_id=3] (24) BroadcastHashJoin [codegen id : 5] Left keys [2]: [ps_partkey#5, ps_suppkey#6] -Right keys [2]: [l_partkey#11, l_suppkey#12] -Join condition: (cast(ps_availqty#7 as decimal(22,1)) > (0.5 * sum(l_quantity))#21) +Right keys [2]: [l_partkey#10, l_suppkey#11] +Join condition: (cast(ps_availqty#7 as decimal(22,1)) > (0.5 * sum(l_quantity))#19) (25) Project [codegen id : 5] Output [1]: [ps_suppkey#6] -Input [6]: [ps_partkey#5, ps_suppkey#6, ps_availqty#7, (0.5 * sum(l_quantity))#21, l_partkey#11, l_suppkey#12] +Input [6]: [ps_partkey#5, ps_suppkey#6, ps_availqty#7, (0.5 * sum(l_quantity))#19, l_partkey#10, l_suppkey#11] (26) BroadcastExchange Input [1]: [ps_suppkey#6] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#23] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=4] (27) BroadcastHashJoin [codegen id : 7] Left keys [1]: [s_suppkey#1] @@ -168,39 +168,39 @@ Output [3]: [s_name#2, s_address#3, s_nationkey#4] Input [4]: [s_suppkey#1, s_name#2, s_address#3, s_nationkey#4] (29) Scan parquet default.nation -Output [2]: [n_nationkey#24, n_name#25] +Output [2]: [n_nationkey#20, n_name#21] Batched: true Location [not included in comparison]/{warehouse_dir}/nation] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)] ReadSchema: struct (30) ColumnarToRow [codegen id : 6] -Input [2]: [n_nationkey#24, n_name#25] +Input [2]: [n_nationkey#20, n_name#21] (31) Filter [codegen id : 6] -Input [2]: [n_nationkey#24, n_name#25] -Condition : ((isnotnull(n_name#25) AND (n_name#25 = CANADA)) AND isnotnull(n_nationkey#24)) +Input [2]: [n_nationkey#20, n_name#21] +Condition : ((isnotnull(n_name#21) AND (n_name#21 = CANADA)) AND isnotnull(n_nationkey#20)) (32) Project [codegen id : 6] -Output [1]: [n_nationkey#24] -Input [2]: [n_nationkey#24, n_name#25] +Output [1]: [n_nationkey#20] +Input [2]: [n_nationkey#20, n_name#21] (33) BroadcastExchange -Input [1]: [n_nationkey#24] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#26] +Input [1]: [n_nationkey#20] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=5] (34) BroadcastHashJoin [codegen id : 7] Left keys [1]: [s_nationkey#4] -Right keys [1]: [n_nationkey#24] +Right keys [1]: [n_nationkey#20] Join condition: None (35) Project [codegen id : 7] Output [2]: [s_name#2, s_address#3] -Input [4]: [s_name#2, s_address#3, s_nationkey#4, n_nationkey#24] +Input [4]: [s_name#2, s_address#3, s_nationkey#4, n_nationkey#20] (36) Exchange Input [2]: [s_name#2, s_address#3] -Arguments: rangepartitioning(s_name#2 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#27] +Arguments: rangepartitioning(s_name#2 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=6] (37) Sort [codegen id : 8] Input [2]: [s_name#2, s_address#3] diff --git a/sql/core/src/test/resources/tpch-plan-stability/q21/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q21/explain.txt index 6a38536eb7246..5d5a5e6a430b6 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q21/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q21/explain.txt @@ -82,7 +82,7 @@ Input [2]: [l_orderkey#8, l_suppkey#9] (10) BroadcastExchange Input [2]: [l_orderkey#8, l_suppkey#9] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#10] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=1] (11) BroadcastHashJoin [codegen id : 3] Left keys [1]: [l_orderkey#4] @@ -90,35 +90,35 @@ Right keys [1]: [l_orderkey#8] Join condition: NOT (l_suppkey#9 = l_suppkey#5) (12) Scan parquet default.lineitem -Output [4]: [l_orderkey#11, l_suppkey#12, l_commitdate#13, l_receiptdate#14] +Output [4]: [l_orderkey#10, l_suppkey#11, l_commitdate#12, l_receiptdate#13] Batched: true Location [not included in comparison]/{warehouse_dir}/lineitem] PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)] ReadSchema: struct (13) ColumnarToRow [codegen id : 2] -Input [4]: [l_orderkey#11, l_suppkey#12, l_commitdate#13, l_receiptdate#14] +Input [4]: [l_orderkey#10, l_suppkey#11, l_commitdate#12, l_receiptdate#13] (14) Filter [codegen id : 2] -Input [4]: [l_orderkey#11, l_suppkey#12, l_commitdate#13, l_receiptdate#14] -Condition : ((isnotnull(l_receiptdate#14) AND isnotnull(l_commitdate#13)) AND (l_receiptdate#14 > l_commitdate#13)) +Input [4]: [l_orderkey#10, l_suppkey#11, l_commitdate#12, l_receiptdate#13] +Condition : ((isnotnull(l_receiptdate#13) AND isnotnull(l_commitdate#12)) AND (l_receiptdate#13 > l_commitdate#12)) (15) Project [codegen id : 2] -Output [2]: [l_orderkey#11, l_suppkey#12] -Input [4]: [l_orderkey#11, l_suppkey#12, l_commitdate#13, l_receiptdate#14] +Output [2]: [l_orderkey#10, l_suppkey#11] +Input [4]: [l_orderkey#10, l_suppkey#11, l_commitdate#12, l_receiptdate#13] (16) BroadcastExchange -Input [2]: [l_orderkey#11, l_suppkey#12] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#15] +Input [2]: [l_orderkey#10, l_suppkey#11] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=2] (17) BroadcastHashJoin [codegen id : 3] Left keys [1]: [l_orderkey#4] -Right keys [1]: [l_orderkey#11] -Join condition: NOT (l_suppkey#12 = l_suppkey#5) +Right keys [1]: [l_orderkey#10] +Join condition: NOT (l_suppkey#11 = l_suppkey#5) (18) BroadcastExchange Input [2]: [l_orderkey#4, l_suppkey#5] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [id=#16] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [plan_id=3] (19) BroadcastHashJoin [codegen id : 6] Left keys [1]: [s_suppkey#1] @@ -130,86 +130,86 @@ Output [3]: [s_name#2, s_nationkey#3, l_orderkey#4] Input [5]: [s_suppkey#1, s_name#2, s_nationkey#3, l_orderkey#4, l_suppkey#5] (21) Scan parquet default.orders -Output [2]: [o_orderkey#17, o_orderstatus#18] +Output [2]: [o_orderkey#14, o_orderstatus#15] Batched: true Location [not included in comparison]/{warehouse_dir}/orders] PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)] ReadSchema: struct (22) ColumnarToRow [codegen id : 4] -Input [2]: [o_orderkey#17, o_orderstatus#18] +Input [2]: [o_orderkey#14, o_orderstatus#15] (23) Filter [codegen id : 4] -Input [2]: [o_orderkey#17, o_orderstatus#18] -Condition : ((isnotnull(o_orderstatus#18) AND (o_orderstatus#18 = F)) AND isnotnull(o_orderkey#17)) +Input [2]: [o_orderkey#14, o_orderstatus#15] +Condition : ((isnotnull(o_orderstatus#15) AND (o_orderstatus#15 = F)) AND isnotnull(o_orderkey#14)) (24) Project [codegen id : 4] -Output [1]: [o_orderkey#17] -Input [2]: [o_orderkey#17, o_orderstatus#18] +Output [1]: [o_orderkey#14] +Input [2]: [o_orderkey#14, o_orderstatus#15] (25) BroadcastExchange -Input [1]: [o_orderkey#17] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#19] +Input [1]: [o_orderkey#14] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=4] (26) BroadcastHashJoin [codegen id : 6] Left keys [1]: [l_orderkey#4] -Right keys [1]: [o_orderkey#17] +Right keys [1]: [o_orderkey#14] Join condition: None (27) Project [codegen id : 6] Output [2]: [s_name#2, s_nationkey#3] -Input [4]: [s_name#2, s_nationkey#3, l_orderkey#4, o_orderkey#17] +Input [4]: [s_name#2, s_nationkey#3, l_orderkey#4, o_orderkey#14] (28) Scan parquet default.nation -Output [2]: [n_nationkey#20, n_name#21] +Output [2]: [n_nationkey#16, n_name#17] Batched: true Location [not included in comparison]/{warehouse_dir}/nation] PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)] ReadSchema: struct (29) ColumnarToRow [codegen id : 5] -Input [2]: [n_nationkey#20, n_name#21] +Input [2]: [n_nationkey#16, n_name#17] (30) Filter [codegen id : 5] -Input [2]: [n_nationkey#20, n_name#21] -Condition : ((isnotnull(n_name#21) AND (n_name#21 = SAUDI ARABIA)) AND isnotnull(n_nationkey#20)) +Input [2]: [n_nationkey#16, n_name#17] +Condition : ((isnotnull(n_name#17) AND (n_name#17 = SAUDI ARABIA)) AND isnotnull(n_nationkey#16)) (31) Project [codegen id : 5] -Output [1]: [n_nationkey#20] -Input [2]: [n_nationkey#20, n_name#21] +Output [1]: [n_nationkey#16] +Input [2]: [n_nationkey#16, n_name#17] (32) BroadcastExchange -Input [1]: [n_nationkey#20] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#22] +Input [1]: [n_nationkey#16] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=5] (33) BroadcastHashJoin [codegen id : 6] Left keys [1]: [s_nationkey#3] -Right keys [1]: [n_nationkey#20] +Right keys [1]: [n_nationkey#16] Join condition: None (34) Project [codegen id : 6] Output [1]: [s_name#2] -Input [3]: [s_name#2, s_nationkey#3, n_nationkey#20] +Input [3]: [s_name#2, s_nationkey#3, n_nationkey#16] (35) HashAggregate [codegen id : 6] Input [1]: [s_name#2] Keys [1]: [s_name#2] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#23] -Results [2]: [s_name#2, count#24] +Aggregate Attributes [1]: [count#18] +Results [2]: [s_name#2, count#19] (36) Exchange -Input [2]: [s_name#2, count#24] -Arguments: hashpartitioning(s_name#2, 5), ENSURE_REQUIREMENTS, [id=#25] +Input [2]: [s_name#2, count#19] +Arguments: hashpartitioning(s_name#2, 5), ENSURE_REQUIREMENTS, [plan_id=6] (37) HashAggregate [codegen id : 7] -Input [2]: [s_name#2, count#24] +Input [2]: [s_name#2, count#19] Keys [1]: [s_name#2] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#26] -Results [2]: [s_name#2, count(1)#26 AS numwait#27] +Aggregate Attributes [1]: [count(1)#20] +Results [2]: [s_name#2, count(1)#20 AS numwait#21] (38) TakeOrderedAndProject -Input [2]: [s_name#2, numwait#27] -Arguments: 100, [numwait#27 DESC NULLS LAST, s_name#2 ASC NULLS FIRST], [s_name#2, numwait#27] +Input [2]: [s_name#2, numwait#21] +Arguments: 100, [numwait#21 DESC NULLS LAST, s_name#2 ASC NULLS FIRST], [s_name#2, numwait#21] diff --git a/sql/core/src/test/resources/tpch-plan-stability/q22/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q22/explain.txt index d9dc261771e0e..b9bc2eb0526dc 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q22/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q22/explain.txt @@ -39,7 +39,7 @@ Input [1]: [o_custkey#6] (6) BroadcastExchange Input [1]: [o_custkey#6] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=1] (7) BroadcastHashJoin [codegen id : 2] Left keys [1]: [c_custkey#1] @@ -47,34 +47,34 @@ Right keys [1]: [o_custkey#6] Join condition: None (8) Project [codegen id : 2] -Output [2]: [substring(c_phone#2, 1, 2) AS cntrycode#8, c_acctbal#3] +Output [2]: [substring(c_phone#2, 1, 2) AS cntrycode#7, c_acctbal#3] Input [3]: [c_custkey#1, c_phone#2, c_acctbal#3] (9) HashAggregate [codegen id : 2] -Input [2]: [cntrycode#8, c_acctbal#3] -Keys [1]: [cntrycode#8] +Input [2]: [cntrycode#7, c_acctbal#3] +Keys [1]: [cntrycode#7] Functions [2]: [partial_count(1), partial_sum(c_acctbal#3)] -Aggregate Attributes [3]: [count#9, sum#10, isEmpty#11] -Results [4]: [cntrycode#8, count#12, sum#13, isEmpty#14] +Aggregate Attributes [3]: [count#8, sum#9, isEmpty#10] +Results [4]: [cntrycode#7, count#11, sum#12, isEmpty#13] (10) Exchange -Input [4]: [cntrycode#8, count#12, sum#13, isEmpty#14] -Arguments: hashpartitioning(cntrycode#8, 5), ENSURE_REQUIREMENTS, [id=#15] +Input [4]: [cntrycode#7, count#11, sum#12, isEmpty#13] +Arguments: hashpartitioning(cntrycode#7, 5), ENSURE_REQUIREMENTS, [plan_id=2] (11) HashAggregate [codegen id : 3] -Input [4]: [cntrycode#8, count#12, sum#13, isEmpty#14] -Keys [1]: [cntrycode#8] +Input [4]: [cntrycode#7, count#11, sum#12, isEmpty#13] +Keys [1]: [cntrycode#7] Functions [2]: [count(1), sum(c_acctbal#3)] -Aggregate Attributes [2]: [count(1)#16, sum(c_acctbal#3)#17] -Results [3]: [cntrycode#8, count(1)#16 AS numcust#18, sum(c_acctbal#3)#17 AS totacctbal#19] +Aggregate Attributes [2]: [count(1)#14, sum(c_acctbal#3)#15] +Results [3]: [cntrycode#7, count(1)#14 AS numcust#16, sum(c_acctbal#3)#15 AS totacctbal#17] (12) Exchange -Input [3]: [cntrycode#8, numcust#18, totacctbal#19] -Arguments: rangepartitioning(cntrycode#8 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#20] +Input [3]: [cntrycode#7, numcust#16, totacctbal#17] +Arguments: rangepartitioning(cntrycode#7 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=3] (13) Sort [codegen id : 4] -Input [3]: [cntrycode#8, numcust#18, totacctbal#19] -Arguments: [cntrycode#8 ASC NULLS FIRST], true, 0 +Input [3]: [cntrycode#7, numcust#16, totacctbal#17] +Arguments: [cntrycode#7 ASC NULLS FIRST], true, 0 ===== Subqueries ===== @@ -89,39 +89,39 @@ Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery (14) Scan parquet default.customer -Output [2]: [c_phone#21, c_acctbal#22] +Output [2]: [c_phone#18, c_acctbal#19] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0)] ReadSchema: struct (15) ColumnarToRow [codegen id : 1] -Input [2]: [c_phone#21, c_acctbal#22] +Input [2]: [c_phone#18, c_acctbal#19] (16) Filter [codegen id : 1] -Input [2]: [c_phone#21, c_acctbal#22] -Condition : ((isnotnull(c_acctbal#22) AND (c_acctbal#22 > 0)) AND substring(c_phone#21, 1, 2) IN (13,31,23,29,30,18,17)) +Input [2]: [c_phone#18, c_acctbal#19] +Condition : ((isnotnull(c_acctbal#19) AND (c_acctbal#19 > 0)) AND substring(c_phone#18, 1, 2) IN (13,31,23,29,30,18,17)) (17) Project [codegen id : 1] -Output [1]: [c_acctbal#22] -Input [2]: [c_phone#21, c_acctbal#22] +Output [1]: [c_acctbal#19] +Input [2]: [c_phone#18, c_acctbal#19] (18) HashAggregate [codegen id : 1] -Input [1]: [c_acctbal#22] +Input [1]: [c_acctbal#19] Keys: [] -Functions [1]: [partial_avg(UnscaledValue(c_acctbal#22))] -Aggregate Attributes [2]: [sum#23, count#24] -Results [2]: [sum#25, count#26] +Functions [1]: [partial_avg(UnscaledValue(c_acctbal#19))] +Aggregate Attributes [2]: [sum#20, count#21] +Results [2]: [sum#22, count#23] (19) Exchange -Input [2]: [sum#25, count#26] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#27] +Input [2]: [sum#22, count#23] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4] (20) HashAggregate [codegen id : 2] -Input [2]: [sum#25, count#26] +Input [2]: [sum#22, count#23] Keys: [] -Functions [1]: [avg(UnscaledValue(c_acctbal#22))] -Aggregate Attributes [1]: [avg(UnscaledValue(c_acctbal#22))#28] -Results [1]: [cast((avg(UnscaledValue(c_acctbal#22))#28 / 1.0) as decimal(14,4)) AS avg(c_acctbal)#29] +Functions [1]: [avg(UnscaledValue(c_acctbal#19))] +Aggregate Attributes [1]: [avg(UnscaledValue(c_acctbal#19))#24] +Results [1]: [cast((avg(UnscaledValue(c_acctbal#19))#24 / 1.0) as decimal(14,4)) AS avg(c_acctbal)#25] diff --git a/sql/core/src/test/resources/tpch-plan-stability/q3/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q3/explain.txt index e0243ce3bbd52..49285d759b09a 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q3/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q3/explain.txt @@ -56,7 +56,7 @@ Condition : (((isnotnull(o_orderdate#5) AND (o_orderdate#5 < 1995-03-15)) AND is (8) BroadcastExchange Input [4]: [o_orderkey#3, o_custkey#4, o_orderdate#5, o_shippriority#6] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 3] Left keys [1]: [c_custkey#1] @@ -68,55 +68,55 @@ Output [3]: [o_orderkey#3, o_orderdate#5, o_shippriority#6] Input [5]: [c_custkey#1, o_orderkey#3, o_custkey#4, o_orderdate#5, o_shippriority#6] (11) Scan parquet default.lineitem -Output [4]: [l_orderkey#8, l_extendedprice#9, l_discount#10, l_shipdate#11] +Output [4]: [l_orderkey#7, l_extendedprice#8, l_discount#9, l_shipdate#10] Batched: true Location [not included in comparison]/{warehouse_dir}/lineitem] PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [l_orderkey#8, l_extendedprice#9, l_discount#10, l_shipdate#11] +Input [4]: [l_orderkey#7, l_extendedprice#8, l_discount#9, l_shipdate#10] (13) Filter [codegen id : 2] -Input [4]: [l_orderkey#8, l_extendedprice#9, l_discount#10, l_shipdate#11] -Condition : ((isnotnull(l_shipdate#11) AND (l_shipdate#11 > 1995-03-15)) AND isnotnull(l_orderkey#8)) +Input [4]: [l_orderkey#7, l_extendedprice#8, l_discount#9, l_shipdate#10] +Condition : ((isnotnull(l_shipdate#10) AND (l_shipdate#10 > 1995-03-15)) AND isnotnull(l_orderkey#7)) (14) Project [codegen id : 2] -Output [3]: [l_orderkey#8, l_extendedprice#9, l_discount#10] -Input [4]: [l_orderkey#8, l_extendedprice#9, l_discount#10, l_shipdate#11] +Output [3]: [l_orderkey#7, l_extendedprice#8, l_discount#9] +Input [4]: [l_orderkey#7, l_extendedprice#8, l_discount#9, l_shipdate#10] (15) BroadcastExchange -Input [3]: [l_orderkey#8, l_extendedprice#9, l_discount#10] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#12] +Input [3]: [l_orderkey#7, l_extendedprice#8, l_discount#9] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=2] (16) BroadcastHashJoin [codegen id : 3] Left keys [1]: [o_orderkey#3] -Right keys [1]: [l_orderkey#8] +Right keys [1]: [l_orderkey#7] Join condition: None (17) Project [codegen id : 3] -Output [5]: [o_orderdate#5, o_shippriority#6, l_orderkey#8, l_extendedprice#9, l_discount#10] -Input [6]: [o_orderkey#3, o_orderdate#5, o_shippriority#6, l_orderkey#8, l_extendedprice#9, l_discount#10] +Output [5]: [o_orderdate#5, o_shippriority#6, l_orderkey#7, l_extendedprice#8, l_discount#9] +Input [6]: [o_orderkey#3, o_orderdate#5, o_shippriority#6, l_orderkey#7, l_extendedprice#8, l_discount#9] (18) HashAggregate [codegen id : 3] -Input [5]: [o_orderdate#5, o_shippriority#6, l_orderkey#8, l_extendedprice#9, l_discount#10] -Keys [3]: [l_orderkey#8, o_orderdate#5, o_shippriority#6] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] -Aggregate Attributes [2]: [sum#13, isEmpty#14] -Results [5]: [l_orderkey#8, o_orderdate#5, o_shippriority#6, sum#15, isEmpty#16] +Input [5]: [o_orderdate#5, o_shippriority#6, l_orderkey#7, l_extendedprice#8, l_discount#9] +Keys [3]: [l_orderkey#7, o_orderdate#5, o_shippriority#6] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#8 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#9 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] +Aggregate Attributes [2]: [sum#11, isEmpty#12] +Results [5]: [l_orderkey#7, o_orderdate#5, o_shippriority#6, sum#13, isEmpty#14] (19) Exchange -Input [5]: [l_orderkey#8, o_orderdate#5, o_shippriority#6, sum#15, isEmpty#16] -Arguments: hashpartitioning(l_orderkey#8, o_orderdate#5, o_shippriority#6, 5), ENSURE_REQUIREMENTS, [id=#17] +Input [5]: [l_orderkey#7, o_orderdate#5, o_shippriority#6, sum#13, isEmpty#14] +Arguments: hashpartitioning(l_orderkey#7, o_orderdate#5, o_shippriority#6, 5), ENSURE_REQUIREMENTS, [plan_id=3] (20) HashAggregate [codegen id : 4] -Input [5]: [l_orderkey#8, o_orderdate#5, o_shippriority#6, sum#15, isEmpty#16] -Keys [3]: [l_orderkey#8, o_orderdate#5, o_shippriority#6] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#18] -Results [4]: [l_orderkey#8, sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#18 AS revenue#19, o_orderdate#5, o_shippriority#6] +Input [5]: [l_orderkey#7, o_orderdate#5, o_shippriority#6, sum#13, isEmpty#14] +Keys [3]: [l_orderkey#7, o_orderdate#5, o_shippriority#6] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#8 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#9 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#8 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#9 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#15] +Results [4]: [l_orderkey#7, sum(CheckOverflow((promote_precision(cast(l_extendedprice#8 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#9 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#15 AS revenue#16, o_orderdate#5, o_shippriority#6] (21) TakeOrderedAndProject -Input [4]: [l_orderkey#8, revenue#19, o_orderdate#5, o_shippriority#6] -Arguments: 10, [revenue#19 DESC NULLS LAST, o_orderdate#5 ASC NULLS FIRST], [l_orderkey#8, revenue#19, o_orderdate#5, o_shippriority#6] +Input [4]: [l_orderkey#7, revenue#16, o_orderdate#5, o_shippriority#6] +Arguments: 10, [revenue#16 DESC NULLS LAST, o_orderdate#5 ASC NULLS FIRST], [l_orderkey#7, revenue#16, o_orderdate#5, o_shippriority#6] diff --git a/sql/core/src/test/resources/tpch-plan-stability/q4/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q4/explain.txt index 064d659e1d4ed..b12a2d7593671 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q4/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q4/explain.txt @@ -55,7 +55,7 @@ Input [3]: [l_orderkey#4, l_commitdate#5, l_receiptdate#6] (9) BroadcastExchange Input [1]: [l_orderkey#4] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#7] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=1] (10) BroadcastHashJoin [codegen id : 2] Left keys [1]: [o_orderkey#1] @@ -70,25 +70,25 @@ Input [2]: [o_orderkey#1, o_orderpriority#3] Input [1]: [o_orderpriority#3] Keys [1]: [o_orderpriority#3] Functions [1]: [partial_count(1)] -Aggregate Attributes [1]: [count#8] -Results [2]: [o_orderpriority#3, count#9] +Aggregate Attributes [1]: [count#7] +Results [2]: [o_orderpriority#3, count#8] (13) Exchange -Input [2]: [o_orderpriority#3, count#9] -Arguments: hashpartitioning(o_orderpriority#3, 5), ENSURE_REQUIREMENTS, [id=#10] +Input [2]: [o_orderpriority#3, count#8] +Arguments: hashpartitioning(o_orderpriority#3, 5), ENSURE_REQUIREMENTS, [plan_id=2] (14) HashAggregate [codegen id : 3] -Input [2]: [o_orderpriority#3, count#9] +Input [2]: [o_orderpriority#3, count#8] Keys [1]: [o_orderpriority#3] Functions [1]: [count(1)] -Aggregate Attributes [1]: [count(1)#11] -Results [2]: [o_orderpriority#3, count(1)#11 AS order_count#12] +Aggregate Attributes [1]: [count(1)#9] +Results [2]: [o_orderpriority#3, count(1)#9 AS order_count#10] (15) Exchange -Input [2]: [o_orderpriority#3, order_count#12] -Arguments: rangepartitioning(o_orderpriority#3 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#13] +Input [2]: [o_orderpriority#3, order_count#10] +Arguments: rangepartitioning(o_orderpriority#3 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=3] (16) Sort [codegen id : 4] -Input [2]: [o_orderpriority#3, order_count#12] +Input [2]: [o_orderpriority#3, order_count#10] Arguments: [o_orderpriority#3 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpch-plan-stability/q5/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q5/explain.txt index c3dbd88338317..9ba43b07cde9f 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q5/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q5/explain.txt @@ -75,7 +75,7 @@ Input [3]: [o_orderkey#3, o_custkey#4, o_orderdate#5] (8) BroadcastExchange Input [2]: [o_orderkey#3, o_custkey#4] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [id=#6] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 6] Left keys [1]: [c_custkey#1] @@ -87,140 +87,140 @@ Output [2]: [c_nationkey#2, o_orderkey#3] Input [4]: [c_custkey#1, c_nationkey#2, o_orderkey#3, o_custkey#4] (11) Scan parquet default.lineitem -Output [4]: [l_orderkey#7, l_suppkey#8, l_extendedprice#9, l_discount#10] +Output [4]: [l_orderkey#6, l_suppkey#7, l_extendedprice#8, l_discount#9] Batched: true Location [not included in comparison]/{warehouse_dir}/lineitem] PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [4]: [l_orderkey#7, l_suppkey#8, l_extendedprice#9, l_discount#10] +Input [4]: [l_orderkey#6, l_suppkey#7, l_extendedprice#8, l_discount#9] (13) Filter [codegen id : 2] -Input [4]: [l_orderkey#7, l_suppkey#8, l_extendedprice#9, l_discount#10] -Condition : (isnotnull(l_orderkey#7) AND isnotnull(l_suppkey#8)) +Input [4]: [l_orderkey#6, l_suppkey#7, l_extendedprice#8, l_discount#9] +Condition : (isnotnull(l_orderkey#6) AND isnotnull(l_suppkey#7)) (14) BroadcastExchange -Input [4]: [l_orderkey#7, l_suppkey#8, l_extendedprice#9, l_discount#10] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#11] +Input [4]: [l_orderkey#6, l_suppkey#7, l_extendedprice#8, l_discount#9] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 6] Left keys [1]: [o_orderkey#3] -Right keys [1]: [l_orderkey#7] +Right keys [1]: [l_orderkey#6] Join condition: None (16) Project [codegen id : 6] -Output [4]: [c_nationkey#2, l_suppkey#8, l_extendedprice#9, l_discount#10] -Input [6]: [c_nationkey#2, o_orderkey#3, l_orderkey#7, l_suppkey#8, l_extendedprice#9, l_discount#10] +Output [4]: [c_nationkey#2, l_suppkey#7, l_extendedprice#8, l_discount#9] +Input [6]: [c_nationkey#2, o_orderkey#3, l_orderkey#6, l_suppkey#7, l_extendedprice#8, l_discount#9] (17) Scan parquet default.supplier -Output [2]: [s_suppkey#12, s_nationkey#13] +Output [2]: [s_suppkey#10, s_nationkey#11] Batched: true Location [not included in comparison]/{warehouse_dir}/supplier] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct (18) ColumnarToRow [codegen id : 3] -Input [2]: [s_suppkey#12, s_nationkey#13] +Input [2]: [s_suppkey#10, s_nationkey#11] (19) Filter [codegen id : 3] -Input [2]: [s_suppkey#12, s_nationkey#13] -Condition : (isnotnull(s_suppkey#12) AND isnotnull(s_nationkey#13)) +Input [2]: [s_suppkey#10, s_nationkey#11] +Condition : (isnotnull(s_suppkey#10) AND isnotnull(s_nationkey#11)) (20) BroadcastExchange -Input [2]: [s_suppkey#12, s_nationkey#13] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [id=#14] +Input [2]: [s_suppkey#10, s_nationkey#11] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [plan_id=3] (21) BroadcastHashJoin [codegen id : 6] -Left keys [2]: [l_suppkey#8, c_nationkey#2] -Right keys [2]: [s_suppkey#12, s_nationkey#13] +Left keys [2]: [l_suppkey#7, c_nationkey#2] +Right keys [2]: [s_suppkey#10, s_nationkey#11] Join condition: None (22) Project [codegen id : 6] -Output [3]: [l_extendedprice#9, l_discount#10, s_nationkey#13] -Input [6]: [c_nationkey#2, l_suppkey#8, l_extendedprice#9, l_discount#10, s_suppkey#12, s_nationkey#13] +Output [3]: [l_extendedprice#8, l_discount#9, s_nationkey#11] +Input [6]: [c_nationkey#2, l_suppkey#7, l_extendedprice#8, l_discount#9, s_suppkey#10, s_nationkey#11] (23) Scan parquet default.nation -Output [3]: [n_nationkey#15, n_name#16, n_regionkey#17] +Output [3]: [n_nationkey#12, n_name#13, n_regionkey#14] Batched: true Location [not included in comparison]/{warehouse_dir}/nation] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct (24) ColumnarToRow [codegen id : 4] -Input [3]: [n_nationkey#15, n_name#16, n_regionkey#17] +Input [3]: [n_nationkey#12, n_name#13, n_regionkey#14] (25) Filter [codegen id : 4] -Input [3]: [n_nationkey#15, n_name#16, n_regionkey#17] -Condition : (isnotnull(n_nationkey#15) AND isnotnull(n_regionkey#17)) +Input [3]: [n_nationkey#12, n_name#13, n_regionkey#14] +Condition : (isnotnull(n_nationkey#12) AND isnotnull(n_regionkey#14)) (26) BroadcastExchange -Input [3]: [n_nationkey#15, n_name#16, n_regionkey#17] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#18] +Input [3]: [n_nationkey#12, n_name#13, n_regionkey#14] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=4] (27) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [s_nationkey#13] -Right keys [1]: [n_nationkey#15] +Left keys [1]: [s_nationkey#11] +Right keys [1]: [n_nationkey#12] Join condition: None (28) Project [codegen id : 6] -Output [4]: [l_extendedprice#9, l_discount#10, n_name#16, n_regionkey#17] -Input [6]: [l_extendedprice#9, l_discount#10, s_nationkey#13, n_nationkey#15, n_name#16, n_regionkey#17] +Output [4]: [l_extendedprice#8, l_discount#9, n_name#13, n_regionkey#14] +Input [6]: [l_extendedprice#8, l_discount#9, s_nationkey#11, n_nationkey#12, n_name#13, n_regionkey#14] (29) Scan parquet default.region -Output [2]: [r_regionkey#19, r_name#20] +Output [2]: [r_regionkey#15, r_name#16] Batched: true Location [not included in comparison]/{warehouse_dir}/region] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)] ReadSchema: struct (30) ColumnarToRow [codegen id : 5] -Input [2]: [r_regionkey#19, r_name#20] +Input [2]: [r_regionkey#15, r_name#16] (31) Filter [codegen id : 5] -Input [2]: [r_regionkey#19, r_name#20] -Condition : ((isnotnull(r_name#20) AND (r_name#20 = ASIA)) AND isnotnull(r_regionkey#19)) +Input [2]: [r_regionkey#15, r_name#16] +Condition : ((isnotnull(r_name#16) AND (r_name#16 = ASIA)) AND isnotnull(r_regionkey#15)) (32) Project [codegen id : 5] -Output [1]: [r_regionkey#19] -Input [2]: [r_regionkey#19, r_name#20] +Output [1]: [r_regionkey#15] +Input [2]: [r_regionkey#15, r_name#16] (33) BroadcastExchange -Input [1]: [r_regionkey#19] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#21] +Input [1]: [r_regionkey#15] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=5] (34) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [n_regionkey#17] -Right keys [1]: [r_regionkey#19] +Left keys [1]: [n_regionkey#14] +Right keys [1]: [r_regionkey#15] Join condition: None (35) Project [codegen id : 6] -Output [3]: [l_extendedprice#9, l_discount#10, n_name#16] -Input [5]: [l_extendedprice#9, l_discount#10, n_name#16, n_regionkey#17, r_regionkey#19] +Output [3]: [l_extendedprice#8, l_discount#9, n_name#13] +Input [5]: [l_extendedprice#8, l_discount#9, n_name#13, n_regionkey#14, r_regionkey#15] (36) HashAggregate [codegen id : 6] -Input [3]: [l_extendedprice#9, l_discount#10, n_name#16] -Keys [1]: [n_name#16] -Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] -Aggregate Attributes [2]: [sum#22, isEmpty#23] -Results [3]: [n_name#16, sum#24, isEmpty#25] +Input [3]: [l_extendedprice#8, l_discount#9, n_name#13] +Keys [1]: [n_name#13] +Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#8 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#9 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] +Aggregate Attributes [2]: [sum#17, isEmpty#18] +Results [3]: [n_name#13, sum#19, isEmpty#20] (37) Exchange -Input [3]: [n_name#16, sum#24, isEmpty#25] -Arguments: hashpartitioning(n_name#16, 5), ENSURE_REQUIREMENTS, [id=#26] +Input [3]: [n_name#13, sum#19, isEmpty#20] +Arguments: hashpartitioning(n_name#13, 5), ENSURE_REQUIREMENTS, [plan_id=6] (38) HashAggregate [codegen id : 7] -Input [3]: [n_name#16, sum#24, isEmpty#25] -Keys [1]: [n_name#16] -Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#27] -Results [2]: [n_name#16, sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#27 AS revenue#28] +Input [3]: [n_name#13, sum#19, isEmpty#20] +Keys [1]: [n_name#13] +Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#8 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#9 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#8 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#9 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#21] +Results [2]: [n_name#13, sum(CheckOverflow((promote_precision(cast(l_extendedprice#8 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#9 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#21 AS revenue#22] (39) Exchange -Input [2]: [n_name#16, revenue#28] -Arguments: rangepartitioning(revenue#28 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#29] +Input [2]: [n_name#13, revenue#22] +Arguments: rangepartitioning(revenue#22 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=7] (40) Sort [codegen id : 8] -Input [2]: [n_name#16, revenue#28] -Arguments: [revenue#28 DESC NULLS LAST], true, 0 +Input [2]: [n_name#13, revenue#22] +Arguments: [revenue#22 DESC NULLS LAST], true, 0 diff --git a/sql/core/src/test/resources/tpch-plan-stability/q6/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q6/explain.txt index a092574d73c57..71aee8542d0bc 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q6/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q6/explain.txt @@ -18,12 +18,12 @@ Results [2]: [sum#5, isEmpty#6] (3) Exchange Input [2]: [sum#5, isEmpty#6] -Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#7] +Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=1] (4) HashAggregate [codegen id : 2] Input [2]: [sum#5, isEmpty#6] Keys: [] Functions [1]: [sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0)))] -Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0)))#8] -Results [1]: [sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0)))#8 AS revenue#9] +Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0)))#7] +Results [1]: [sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0)))#7 AS revenue#8] diff --git a/sql/core/src/test/resources/tpch-plan-stability/q7/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q7/explain.txt index 9994d01a28e5c..ec99e035b4bac 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q7/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q7/explain.txt @@ -66,7 +66,7 @@ Condition : ((((isnotnull(l_shipdate#7) AND (l_shipdate#7 >= 1995-01-01)) AND (l (7) BroadcastExchange Input [5]: [l_orderkey#3, l_suppkey#4, l_extendedprice#5, l_discount#6, l_shipdate#7] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [plan_id=1] (8) BroadcastHashJoin [codegen id : 6] Left keys [1]: [s_suppkey#1] @@ -78,121 +78,121 @@ Output [5]: [s_nationkey#2, l_orderkey#3, l_extendedprice#5, l_discount#6, l_shi Input [7]: [s_suppkey#1, s_nationkey#2, l_orderkey#3, l_suppkey#4, l_extendedprice#5, l_discount#6, l_shipdate#7] (10) Scan parquet default.orders -Output [2]: [o_orderkey#9, o_custkey#10] +Output [2]: [o_orderkey#8, o_custkey#9] Batched: true Location [not included in comparison]/{warehouse_dir}/orders] PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct (11) ColumnarToRow [codegen id : 2] -Input [2]: [o_orderkey#9, o_custkey#10] +Input [2]: [o_orderkey#8, o_custkey#9] (12) Filter [codegen id : 2] -Input [2]: [o_orderkey#9, o_custkey#10] -Condition : (isnotnull(o_orderkey#9) AND isnotnull(o_custkey#10)) +Input [2]: [o_orderkey#8, o_custkey#9] +Condition : (isnotnull(o_orderkey#8) AND isnotnull(o_custkey#9)) (13) BroadcastExchange -Input [2]: [o_orderkey#9, o_custkey#10] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#11] +Input [2]: [o_orderkey#8, o_custkey#9] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=2] (14) BroadcastHashJoin [codegen id : 6] Left keys [1]: [l_orderkey#3] -Right keys [1]: [o_orderkey#9] +Right keys [1]: [o_orderkey#8] Join condition: None (15) Project [codegen id : 6] -Output [5]: [s_nationkey#2, l_extendedprice#5, l_discount#6, l_shipdate#7, o_custkey#10] -Input [7]: [s_nationkey#2, l_orderkey#3, l_extendedprice#5, l_discount#6, l_shipdate#7, o_orderkey#9, o_custkey#10] +Output [5]: [s_nationkey#2, l_extendedprice#5, l_discount#6, l_shipdate#7, o_custkey#9] +Input [7]: [s_nationkey#2, l_orderkey#3, l_extendedprice#5, l_discount#6, l_shipdate#7, o_orderkey#8, o_custkey#9] (16) Scan parquet default.customer -Output [2]: [c_custkey#12, c_nationkey#13] +Output [2]: [c_custkey#10, c_nationkey#11] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct (17) ColumnarToRow [codegen id : 3] -Input [2]: [c_custkey#12, c_nationkey#13] +Input [2]: [c_custkey#10, c_nationkey#11] (18) Filter [codegen id : 3] -Input [2]: [c_custkey#12, c_nationkey#13] -Condition : (isnotnull(c_custkey#12) AND isnotnull(c_nationkey#13)) +Input [2]: [c_custkey#10, c_nationkey#11] +Condition : (isnotnull(c_custkey#10) AND isnotnull(c_nationkey#11)) (19) BroadcastExchange -Input [2]: [c_custkey#12, c_nationkey#13] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#14] +Input [2]: [c_custkey#10, c_nationkey#11] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=3] (20) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [o_custkey#10] -Right keys [1]: [c_custkey#12] +Left keys [1]: [o_custkey#9] +Right keys [1]: [c_custkey#10] Join condition: None (21) Project [codegen id : 6] -Output [5]: [s_nationkey#2, l_extendedprice#5, l_discount#6, l_shipdate#7, c_nationkey#13] -Input [7]: [s_nationkey#2, l_extendedprice#5, l_discount#6, l_shipdate#7, o_custkey#10, c_custkey#12, c_nationkey#13] +Output [5]: [s_nationkey#2, l_extendedprice#5, l_discount#6, l_shipdate#7, c_nationkey#11] +Input [7]: [s_nationkey#2, l_extendedprice#5, l_discount#6, l_shipdate#7, o_custkey#9, c_custkey#10, c_nationkey#11] (22) Scan parquet default.nation -Output [2]: [n_nationkey#15, n_name#16] +Output [2]: [n_nationkey#12, n_name#13] Batched: true Location [not included in comparison]/{warehouse_dir}/nation] PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))] ReadSchema: struct (23) ColumnarToRow [codegen id : 4] -Input [2]: [n_nationkey#15, n_name#16] +Input [2]: [n_nationkey#12, n_name#13] (24) Filter [codegen id : 4] -Input [2]: [n_nationkey#15, n_name#16] -Condition : (isnotnull(n_nationkey#15) AND ((n_name#16 = FRANCE) OR (n_name#16 = GERMANY))) +Input [2]: [n_nationkey#12, n_name#13] +Condition : (isnotnull(n_nationkey#12) AND ((n_name#13 = FRANCE) OR (n_name#13 = GERMANY))) (25) BroadcastExchange -Input [2]: [n_nationkey#15, n_name#16] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#17] +Input [2]: [n_nationkey#12, n_name#13] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=4] (26) BroadcastHashJoin [codegen id : 6] Left keys [1]: [s_nationkey#2] -Right keys [1]: [n_nationkey#15] +Right keys [1]: [n_nationkey#12] Join condition: None (27) Project [codegen id : 6] -Output [5]: [l_extendedprice#5, l_discount#6, l_shipdate#7, c_nationkey#13, n_name#16] -Input [7]: [s_nationkey#2, l_extendedprice#5, l_discount#6, l_shipdate#7, c_nationkey#13, n_nationkey#15, n_name#16] +Output [5]: [l_extendedprice#5, l_discount#6, l_shipdate#7, c_nationkey#11, n_name#13] +Input [7]: [s_nationkey#2, l_extendedprice#5, l_discount#6, l_shipdate#7, c_nationkey#11, n_nationkey#12, n_name#13] (28) ReusedExchange [Reuses operator id: 25] -Output [2]: [n_nationkey#18, n_name#19] +Output [2]: [n_nationkey#14, n_name#15] (29) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [c_nationkey#13] -Right keys [1]: [n_nationkey#18] -Join condition: (((n_name#16 = FRANCE) AND (n_name#19 = GERMANY)) OR ((n_name#16 = GERMANY) AND (n_name#19 = FRANCE))) +Left keys [1]: [c_nationkey#11] +Right keys [1]: [n_nationkey#14] +Join condition: (((n_name#13 = FRANCE) AND (n_name#15 = GERMANY)) OR ((n_name#13 = GERMANY) AND (n_name#15 = FRANCE))) (30) Project [codegen id : 6] -Output [4]: [n_name#16 AS supp_nation#20, n_name#19 AS cust_nation#21, year(l_shipdate#7) AS l_year#22, CheckOverflow((promote_precision(cast(l_extendedprice#5 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#6 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) AS volume#23] -Input [7]: [l_extendedprice#5, l_discount#6, l_shipdate#7, c_nationkey#13, n_name#16, n_nationkey#18, n_name#19] +Output [4]: [n_name#13 AS supp_nation#16, n_name#15 AS cust_nation#17, year(l_shipdate#7) AS l_year#18, CheckOverflow((promote_precision(cast(l_extendedprice#5 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#6 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) AS volume#19] +Input [7]: [l_extendedprice#5, l_discount#6, l_shipdate#7, c_nationkey#11, n_name#13, n_nationkey#14, n_name#15] (31) HashAggregate [codegen id : 6] -Input [4]: [supp_nation#20, cust_nation#21, l_year#22, volume#23] -Keys [3]: [supp_nation#20, cust_nation#21, l_year#22] -Functions [1]: [partial_sum(volume#23)] -Aggregate Attributes [2]: [sum#24, isEmpty#25] -Results [5]: [supp_nation#20, cust_nation#21, l_year#22, sum#26, isEmpty#27] +Input [4]: [supp_nation#16, cust_nation#17, l_year#18, volume#19] +Keys [3]: [supp_nation#16, cust_nation#17, l_year#18] +Functions [1]: [partial_sum(volume#19)] +Aggregate Attributes [2]: [sum#20, isEmpty#21] +Results [5]: [supp_nation#16, cust_nation#17, l_year#18, sum#22, isEmpty#23] (32) Exchange -Input [5]: [supp_nation#20, cust_nation#21, l_year#22, sum#26, isEmpty#27] -Arguments: hashpartitioning(supp_nation#20, cust_nation#21, l_year#22, 5), ENSURE_REQUIREMENTS, [id=#28] +Input [5]: [supp_nation#16, cust_nation#17, l_year#18, sum#22, isEmpty#23] +Arguments: hashpartitioning(supp_nation#16, cust_nation#17, l_year#18, 5), ENSURE_REQUIREMENTS, [plan_id=5] (33) HashAggregate [codegen id : 7] -Input [5]: [supp_nation#20, cust_nation#21, l_year#22, sum#26, isEmpty#27] -Keys [3]: [supp_nation#20, cust_nation#21, l_year#22] -Functions [1]: [sum(volume#23)] -Aggregate Attributes [1]: [sum(volume#23)#29] -Results [4]: [supp_nation#20, cust_nation#21, l_year#22, sum(volume#23)#29 AS revenue#30] +Input [5]: [supp_nation#16, cust_nation#17, l_year#18, sum#22, isEmpty#23] +Keys [3]: [supp_nation#16, cust_nation#17, l_year#18] +Functions [1]: [sum(volume#19)] +Aggregate Attributes [1]: [sum(volume#19)#24] +Results [4]: [supp_nation#16, cust_nation#17, l_year#18, sum(volume#19)#24 AS revenue#25] (34) Exchange -Input [4]: [supp_nation#20, cust_nation#21, l_year#22, revenue#30] -Arguments: rangepartitioning(supp_nation#20 ASC NULLS FIRST, cust_nation#21 ASC NULLS FIRST, l_year#22 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#31] +Input [4]: [supp_nation#16, cust_nation#17, l_year#18, revenue#25] +Arguments: rangepartitioning(supp_nation#16 ASC NULLS FIRST, cust_nation#17 ASC NULLS FIRST, l_year#18 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=6] (35) Sort [codegen id : 8] -Input [4]: [supp_nation#20, cust_nation#21, l_year#22, revenue#30] -Arguments: [supp_nation#20 ASC NULLS FIRST, cust_nation#21 ASC NULLS FIRST, l_year#22 ASC NULLS FIRST], true, 0 +Input [4]: [supp_nation#16, cust_nation#17, l_year#18, revenue#25] +Arguments: [supp_nation#16 ASC NULLS FIRST, cust_nation#17 ASC NULLS FIRST, l_year#18 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpch-plan-stability/q8/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q8/explain.txt index 4eb4f811035d8..651269127a090 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q8/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q8/explain.txt @@ -87,7 +87,7 @@ Condition : ((isnotnull(l_partkey#4) AND isnotnull(l_suppkey#5)) AND isnotnull(l (8) BroadcastExchange Input [5]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_extendedprice#6, l_discount#7] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [id=#8] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 8] Left keys [1]: [p_partkey#1] @@ -99,194 +99,194 @@ Output [4]: [l_orderkey#3, l_suppkey#5, l_extendedprice#6, l_discount#7] Input [6]: [p_partkey#1, l_orderkey#3, l_partkey#4, l_suppkey#5, l_extendedprice#6, l_discount#7] (11) Scan parquet default.supplier -Output [2]: [s_suppkey#9, s_nationkey#10] +Output [2]: [s_suppkey#8, s_nationkey#9] Batched: true Location [not included in comparison]/{warehouse_dir}/supplier] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [2]: [s_suppkey#9, s_nationkey#10] +Input [2]: [s_suppkey#8, s_nationkey#9] (13) Filter [codegen id : 2] -Input [2]: [s_suppkey#9, s_nationkey#10] -Condition : (isnotnull(s_suppkey#9) AND isnotnull(s_nationkey#10)) +Input [2]: [s_suppkey#8, s_nationkey#9] +Condition : (isnotnull(s_suppkey#8) AND isnotnull(s_nationkey#9)) (14) BroadcastExchange -Input [2]: [s_suppkey#9, s_nationkey#10] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#11] +Input [2]: [s_suppkey#8, s_nationkey#9] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 8] Left keys [1]: [l_suppkey#5] -Right keys [1]: [s_suppkey#9] +Right keys [1]: [s_suppkey#8] Join condition: None (16) Project [codegen id : 8] -Output [4]: [l_orderkey#3, l_extendedprice#6, l_discount#7, s_nationkey#10] -Input [6]: [l_orderkey#3, l_suppkey#5, l_extendedprice#6, l_discount#7, s_suppkey#9, s_nationkey#10] +Output [4]: [l_orderkey#3, l_extendedprice#6, l_discount#7, s_nationkey#9] +Input [6]: [l_orderkey#3, l_suppkey#5, l_extendedprice#6, l_discount#7, s_suppkey#8, s_nationkey#9] (17) Scan parquet default.orders -Output [3]: [o_orderkey#12, o_custkey#13, o_orderdate#14] +Output [3]: [o_orderkey#10, o_custkey#11, o_orderdate#12] Batched: true Location [not included in comparison]/{warehouse_dir}/orders] PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)] ReadSchema: struct (18) ColumnarToRow [codegen id : 3] -Input [3]: [o_orderkey#12, o_custkey#13, o_orderdate#14] +Input [3]: [o_orderkey#10, o_custkey#11, o_orderdate#12] (19) Filter [codegen id : 3] -Input [3]: [o_orderkey#12, o_custkey#13, o_orderdate#14] -Condition : ((((isnotnull(o_orderdate#14) AND (o_orderdate#14 >= 1995-01-01)) AND (o_orderdate#14 <= 1996-12-31)) AND isnotnull(o_orderkey#12)) AND isnotnull(o_custkey#13)) +Input [3]: [o_orderkey#10, o_custkey#11, o_orderdate#12] +Condition : ((((isnotnull(o_orderdate#12) AND (o_orderdate#12 >= 1995-01-01)) AND (o_orderdate#12 <= 1996-12-31)) AND isnotnull(o_orderkey#10)) AND isnotnull(o_custkey#11)) (20) BroadcastExchange -Input [3]: [o_orderkey#12, o_custkey#13, o_orderdate#14] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#15] +Input [3]: [o_orderkey#10, o_custkey#11, o_orderdate#12] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=3] (21) BroadcastHashJoin [codegen id : 8] Left keys [1]: [l_orderkey#3] -Right keys [1]: [o_orderkey#12] +Right keys [1]: [o_orderkey#10] Join condition: None (22) Project [codegen id : 8] -Output [5]: [l_extendedprice#6, l_discount#7, s_nationkey#10, o_custkey#13, o_orderdate#14] -Input [7]: [l_orderkey#3, l_extendedprice#6, l_discount#7, s_nationkey#10, o_orderkey#12, o_custkey#13, o_orderdate#14] +Output [5]: [l_extendedprice#6, l_discount#7, s_nationkey#9, o_custkey#11, o_orderdate#12] +Input [7]: [l_orderkey#3, l_extendedprice#6, l_discount#7, s_nationkey#9, o_orderkey#10, o_custkey#11, o_orderdate#12] (23) Scan parquet default.customer -Output [2]: [c_custkey#16, c_nationkey#17] +Output [2]: [c_custkey#13, c_nationkey#14] Batched: true Location [not included in comparison]/{warehouse_dir}/customer] PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)] ReadSchema: struct (24) ColumnarToRow [codegen id : 4] -Input [2]: [c_custkey#16, c_nationkey#17] +Input [2]: [c_custkey#13, c_nationkey#14] (25) Filter [codegen id : 4] -Input [2]: [c_custkey#16, c_nationkey#17] -Condition : (isnotnull(c_custkey#16) AND isnotnull(c_nationkey#17)) +Input [2]: [c_custkey#13, c_nationkey#14] +Condition : (isnotnull(c_custkey#13) AND isnotnull(c_nationkey#14)) (26) BroadcastExchange -Input [2]: [c_custkey#16, c_nationkey#17] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#18] +Input [2]: [c_custkey#13, c_nationkey#14] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=4] (27) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [o_custkey#13] -Right keys [1]: [c_custkey#16] +Left keys [1]: [o_custkey#11] +Right keys [1]: [c_custkey#13] Join condition: None (28) Project [codegen id : 8] -Output [5]: [l_extendedprice#6, l_discount#7, s_nationkey#10, o_orderdate#14, c_nationkey#17] -Input [7]: [l_extendedprice#6, l_discount#7, s_nationkey#10, o_custkey#13, o_orderdate#14, c_custkey#16, c_nationkey#17] +Output [5]: [l_extendedprice#6, l_discount#7, s_nationkey#9, o_orderdate#12, c_nationkey#14] +Input [7]: [l_extendedprice#6, l_discount#7, s_nationkey#9, o_custkey#11, o_orderdate#12, c_custkey#13, c_nationkey#14] (29) Scan parquet default.nation -Output [2]: [n_nationkey#19, n_regionkey#20] +Output [2]: [n_nationkey#15, n_regionkey#16] Batched: true Location [not included in comparison]/{warehouse_dir}/nation] PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)] ReadSchema: struct (30) ColumnarToRow [codegen id : 5] -Input [2]: [n_nationkey#19, n_regionkey#20] +Input [2]: [n_nationkey#15, n_regionkey#16] (31) Filter [codegen id : 5] -Input [2]: [n_nationkey#19, n_regionkey#20] -Condition : (isnotnull(n_nationkey#19) AND isnotnull(n_regionkey#20)) +Input [2]: [n_nationkey#15, n_regionkey#16] +Condition : (isnotnull(n_nationkey#15) AND isnotnull(n_regionkey#16)) (32) BroadcastExchange -Input [2]: [n_nationkey#19, n_regionkey#20] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#21] +Input [2]: [n_nationkey#15, n_regionkey#16] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=5] (33) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [c_nationkey#17] -Right keys [1]: [n_nationkey#19] +Left keys [1]: [c_nationkey#14] +Right keys [1]: [n_nationkey#15] Join condition: None (34) Project [codegen id : 8] -Output [5]: [l_extendedprice#6, l_discount#7, s_nationkey#10, o_orderdate#14, n_regionkey#20] -Input [7]: [l_extendedprice#6, l_discount#7, s_nationkey#10, o_orderdate#14, c_nationkey#17, n_nationkey#19, n_regionkey#20] +Output [5]: [l_extendedprice#6, l_discount#7, s_nationkey#9, o_orderdate#12, n_regionkey#16] +Input [7]: [l_extendedprice#6, l_discount#7, s_nationkey#9, o_orderdate#12, c_nationkey#14, n_nationkey#15, n_regionkey#16] (35) Scan parquet default.nation -Output [2]: [n_nationkey#22, n_name#23] +Output [2]: [n_nationkey#17, n_name#18] Batched: true Location [not included in comparison]/{warehouse_dir}/nation] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct (36) ColumnarToRow [codegen id : 6] -Input [2]: [n_nationkey#22, n_name#23] +Input [2]: [n_nationkey#17, n_name#18] (37) Filter [codegen id : 6] -Input [2]: [n_nationkey#22, n_name#23] -Condition : isnotnull(n_nationkey#22) +Input [2]: [n_nationkey#17, n_name#18] +Condition : isnotnull(n_nationkey#17) (38) BroadcastExchange -Input [2]: [n_nationkey#22, n_name#23] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#24] +Input [2]: [n_nationkey#17, n_name#18] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=6] (39) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [s_nationkey#10] -Right keys [1]: [n_nationkey#22] +Left keys [1]: [s_nationkey#9] +Right keys [1]: [n_nationkey#17] Join condition: None (40) Project [codegen id : 8] -Output [5]: [l_extendedprice#6, l_discount#7, o_orderdate#14, n_regionkey#20, n_name#23] -Input [7]: [l_extendedprice#6, l_discount#7, s_nationkey#10, o_orderdate#14, n_regionkey#20, n_nationkey#22, n_name#23] +Output [5]: [l_extendedprice#6, l_discount#7, o_orderdate#12, n_regionkey#16, n_name#18] +Input [7]: [l_extendedprice#6, l_discount#7, s_nationkey#9, o_orderdate#12, n_regionkey#16, n_nationkey#17, n_name#18] (41) Scan parquet default.region -Output [2]: [r_regionkey#25, r_name#26] +Output [2]: [r_regionkey#19, r_name#20] Batched: true Location [not included in comparison]/{warehouse_dir}/region] PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)] ReadSchema: struct (42) ColumnarToRow [codegen id : 7] -Input [2]: [r_regionkey#25, r_name#26] +Input [2]: [r_regionkey#19, r_name#20] (43) Filter [codegen id : 7] -Input [2]: [r_regionkey#25, r_name#26] -Condition : ((isnotnull(r_name#26) AND (r_name#26 = AMERICA)) AND isnotnull(r_regionkey#25)) +Input [2]: [r_regionkey#19, r_name#20] +Condition : ((isnotnull(r_name#20) AND (r_name#20 = AMERICA)) AND isnotnull(r_regionkey#19)) (44) Project [codegen id : 7] -Output [1]: [r_regionkey#25] -Input [2]: [r_regionkey#25, r_name#26] +Output [1]: [r_regionkey#19] +Input [2]: [r_regionkey#19, r_name#20] (45) BroadcastExchange -Input [1]: [r_regionkey#25] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#27] +Input [1]: [r_regionkey#19] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [plan_id=7] (46) BroadcastHashJoin [codegen id : 8] -Left keys [1]: [n_regionkey#20] -Right keys [1]: [r_regionkey#25] +Left keys [1]: [n_regionkey#16] +Right keys [1]: [r_regionkey#19] Join condition: None (47) Project [codegen id : 8] -Output [3]: [year(o_orderdate#14) AS o_year#28, CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) AS volume#29, n_name#23 AS nation#30] -Input [6]: [l_extendedprice#6, l_discount#7, o_orderdate#14, n_regionkey#20, n_name#23, r_regionkey#25] +Output [3]: [year(o_orderdate#12) AS o_year#21, CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) AS volume#22, n_name#18 AS nation#23] +Input [6]: [l_extendedprice#6, l_discount#7, o_orderdate#12, n_regionkey#16, n_name#18, r_regionkey#19] (48) HashAggregate [codegen id : 8] -Input [3]: [o_year#28, volume#29, nation#30] -Keys [1]: [o_year#28] -Functions [2]: [partial_sum(CASE WHEN (nation#30 = BRAZIL) THEN volume#29 ELSE 0 END), partial_sum(volume#29)] -Aggregate Attributes [4]: [sum#31, isEmpty#32, sum#33, isEmpty#34] -Results [5]: [o_year#28, sum#35, isEmpty#36, sum#37, isEmpty#38] +Input [3]: [o_year#21, volume#22, nation#23] +Keys [1]: [o_year#21] +Functions [2]: [partial_sum(CASE WHEN (nation#23 = BRAZIL) THEN volume#22 ELSE 0 END), partial_sum(volume#22)] +Aggregate Attributes [4]: [sum#24, isEmpty#25, sum#26, isEmpty#27] +Results [5]: [o_year#21, sum#28, isEmpty#29, sum#30, isEmpty#31] (49) Exchange -Input [5]: [o_year#28, sum#35, isEmpty#36, sum#37, isEmpty#38] -Arguments: hashpartitioning(o_year#28, 5), ENSURE_REQUIREMENTS, [id=#39] +Input [5]: [o_year#21, sum#28, isEmpty#29, sum#30, isEmpty#31] +Arguments: hashpartitioning(o_year#21, 5), ENSURE_REQUIREMENTS, [plan_id=8] (50) HashAggregate [codegen id : 9] -Input [5]: [o_year#28, sum#35, isEmpty#36, sum#37, isEmpty#38] -Keys [1]: [o_year#28] -Functions [2]: [sum(CASE WHEN (nation#30 = BRAZIL) THEN volume#29 ELSE 0 END), sum(volume#29)] -Aggregate Attributes [2]: [sum(CASE WHEN (nation#30 = BRAZIL) THEN volume#29 ELSE 0 END)#40, sum(volume#29)#41] -Results [2]: [o_year#28, CheckOverflow((promote_precision(sum(CASE WHEN (nation#30 = BRAZIL) THEN volume#29 ELSE 0 END)#40) / promote_precision(sum(volume#29)#41)), DecimalType(38,6)) AS mkt_share#42] +Input [5]: [o_year#21, sum#28, isEmpty#29, sum#30, isEmpty#31] +Keys [1]: [o_year#21] +Functions [2]: [sum(CASE WHEN (nation#23 = BRAZIL) THEN volume#22 ELSE 0 END), sum(volume#22)] +Aggregate Attributes [2]: [sum(CASE WHEN (nation#23 = BRAZIL) THEN volume#22 ELSE 0 END)#32, sum(volume#22)#33] +Results [2]: [o_year#21, CheckOverflow((promote_precision(sum(CASE WHEN (nation#23 = BRAZIL) THEN volume#22 ELSE 0 END)#32) / promote_precision(sum(volume#22)#33)), DecimalType(38,6)) AS mkt_share#34] (51) Exchange -Input [2]: [o_year#28, mkt_share#42] -Arguments: rangepartitioning(o_year#28 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#43] +Input [2]: [o_year#21, mkt_share#34] +Arguments: rangepartitioning(o_year#21 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=9] (52) Sort [codegen id : 10] -Input [2]: [o_year#28, mkt_share#42] -Arguments: [o_year#28 ASC NULLS FIRST], true, 0 +Input [2]: [o_year#21, mkt_share#34] +Arguments: [o_year#21 ASC NULLS FIRST], true, 0 diff --git a/sql/core/src/test/resources/tpch-plan-stability/q9/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q9/explain.txt index 9ed3700e668e0..a13118976829e 100644 --- a/sql/core/src/test/resources/tpch-plan-stability/q9/explain.txt +++ b/sql/core/src/test/resources/tpch-plan-stability/q9/explain.txt @@ -74,7 +74,7 @@ Condition : ((isnotnull(l_partkey#4) AND isnotnull(l_suppkey#5)) AND isnotnull(l (8) BroadcastExchange Input [6]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [id=#9] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [plan_id=1] (9) BroadcastHashJoin [codegen id : 6] Left keys [1]: [p_partkey#1] @@ -86,136 +86,136 @@ Output [6]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedpri Input [7]: [p_partkey#1, l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8] (11) Scan parquet default.supplier -Output [2]: [s_suppkey#10, s_nationkey#11] +Output [2]: [s_suppkey#9, s_nationkey#10] Batched: true Location [not included in comparison]/{warehouse_dir}/supplier] PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)] ReadSchema: struct (12) ColumnarToRow [codegen id : 2] -Input [2]: [s_suppkey#10, s_nationkey#11] +Input [2]: [s_suppkey#9, s_nationkey#10] (13) Filter [codegen id : 2] -Input [2]: [s_suppkey#10, s_nationkey#11] -Condition : (isnotnull(s_suppkey#10) AND isnotnull(s_nationkey#11)) +Input [2]: [s_suppkey#9, s_nationkey#10] +Condition : (isnotnull(s_suppkey#9) AND isnotnull(s_nationkey#10)) (14) BroadcastExchange -Input [2]: [s_suppkey#10, s_nationkey#11] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#12] +Input [2]: [s_suppkey#9, s_nationkey#10] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=2] (15) BroadcastHashJoin [codegen id : 6] Left keys [1]: [l_suppkey#5] -Right keys [1]: [s_suppkey#10] +Right keys [1]: [s_suppkey#9] Join condition: None (16) Project [codegen id : 6] -Output [7]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#11] -Input [8]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8, s_suppkey#10, s_nationkey#11] +Output [7]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#10] +Input [8]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8, s_suppkey#9, s_nationkey#10] (17) Scan parquet default.partsupp -Output [3]: [ps_partkey#13, ps_suppkey#14, ps_supplycost#15] +Output [3]: [ps_partkey#11, ps_suppkey#12, ps_supplycost#13] Batched: true Location [not included in comparison]/{warehouse_dir}/partsupp] PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)] ReadSchema: struct (18) ColumnarToRow [codegen id : 3] -Input [3]: [ps_partkey#13, ps_suppkey#14, ps_supplycost#15] +Input [3]: [ps_partkey#11, ps_suppkey#12, ps_supplycost#13] (19) Filter [codegen id : 3] -Input [3]: [ps_partkey#13, ps_suppkey#14, ps_supplycost#15] -Condition : (isnotnull(ps_suppkey#14) AND isnotnull(ps_partkey#13)) +Input [3]: [ps_partkey#11, ps_suppkey#12, ps_supplycost#13] +Condition : (isnotnull(ps_suppkey#12) AND isnotnull(ps_partkey#11)) (20) BroadcastExchange -Input [3]: [ps_partkey#13, ps_suppkey#14, ps_supplycost#15] -Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#16] +Input [3]: [ps_partkey#11, ps_suppkey#12, ps_supplycost#13] +Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [plan_id=3] (21) BroadcastHashJoin [codegen id : 6] Left keys [2]: [l_suppkey#5, l_partkey#4] -Right keys [2]: [ps_suppkey#14, ps_partkey#13] +Right keys [2]: [ps_suppkey#12, ps_partkey#11] Join condition: None (22) Project [codegen id : 6] -Output [6]: [l_orderkey#3, l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#11, ps_supplycost#15] -Input [10]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#11, ps_partkey#13, ps_suppkey#14, ps_supplycost#15] +Output [6]: [l_orderkey#3, l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#10, ps_supplycost#13] +Input [10]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#10, ps_partkey#11, ps_suppkey#12, ps_supplycost#13] (23) Scan parquet default.orders -Output [2]: [o_orderkey#17, o_orderdate#18] +Output [2]: [o_orderkey#14, o_orderdate#15] Batched: true Location [not included in comparison]/{warehouse_dir}/orders] PushedFilters: [IsNotNull(o_orderkey)] ReadSchema: struct (24) ColumnarToRow [codegen id : 4] -Input [2]: [o_orderkey#17, o_orderdate#18] +Input [2]: [o_orderkey#14, o_orderdate#15] (25) Filter [codegen id : 4] -Input [2]: [o_orderkey#17, o_orderdate#18] -Condition : isnotnull(o_orderkey#17) +Input [2]: [o_orderkey#14, o_orderdate#15] +Condition : isnotnull(o_orderkey#14) (26) BroadcastExchange -Input [2]: [o_orderkey#17, o_orderdate#18] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#19] +Input [2]: [o_orderkey#14, o_orderdate#15] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=4] (27) BroadcastHashJoin [codegen id : 6] Left keys [1]: [l_orderkey#3] -Right keys [1]: [o_orderkey#17] +Right keys [1]: [o_orderkey#14] Join condition: None (28) Project [codegen id : 6] -Output [6]: [l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#11, ps_supplycost#15, o_orderdate#18] -Input [8]: [l_orderkey#3, l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#11, ps_supplycost#15, o_orderkey#17, o_orderdate#18] +Output [6]: [l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#10, ps_supplycost#13, o_orderdate#15] +Input [8]: [l_orderkey#3, l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#10, ps_supplycost#13, o_orderkey#14, o_orderdate#15] (29) Scan parquet default.nation -Output [2]: [n_nationkey#20, n_name#21] +Output [2]: [n_nationkey#16, n_name#17] Batched: true Location [not included in comparison]/{warehouse_dir}/nation] PushedFilters: [IsNotNull(n_nationkey)] ReadSchema: struct (30) ColumnarToRow [codegen id : 5] -Input [2]: [n_nationkey#20, n_name#21] +Input [2]: [n_nationkey#16, n_name#17] (31) Filter [codegen id : 5] -Input [2]: [n_nationkey#20, n_name#21] -Condition : isnotnull(n_nationkey#20) +Input [2]: [n_nationkey#16, n_name#17] +Condition : isnotnull(n_nationkey#16) (32) BroadcastExchange -Input [2]: [n_nationkey#20, n_name#21] -Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#22] +Input [2]: [n_nationkey#16, n_name#17] +Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [plan_id=5] (33) BroadcastHashJoin [codegen id : 6] -Left keys [1]: [s_nationkey#11] -Right keys [1]: [n_nationkey#20] +Left keys [1]: [s_nationkey#10] +Right keys [1]: [n_nationkey#16] Join condition: None (34) Project [codegen id : 6] -Output [3]: [n_name#21 AS nation#23, year(o_orderdate#18) AS o_year#24, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(l_extendedprice#7 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#8 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) as decimal(23,0))) - promote_precision(cast(CheckOverflow((promote_precision(ps_supplycost#15) * promote_precision(l_quantity#6)), DecimalType(21,0)) as decimal(23,0)))), DecimalType(23,0)) AS amount#25] -Input [8]: [l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#11, ps_supplycost#15, o_orderdate#18, n_nationkey#20, n_name#21] +Output [3]: [n_name#17 AS nation#18, year(o_orderdate#15) AS o_year#19, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(l_extendedprice#7 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#8 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) as decimal(23,0))) - promote_precision(cast(CheckOverflow((promote_precision(ps_supplycost#13) * promote_precision(l_quantity#6)), DecimalType(21,0)) as decimal(23,0)))), DecimalType(23,0)) AS amount#20] +Input [8]: [l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#10, ps_supplycost#13, o_orderdate#15, n_nationkey#16, n_name#17] (35) HashAggregate [codegen id : 6] -Input [3]: [nation#23, o_year#24, amount#25] -Keys [2]: [nation#23, o_year#24] -Functions [1]: [partial_sum(amount#25)] -Aggregate Attributes [2]: [sum#26, isEmpty#27] -Results [4]: [nation#23, o_year#24, sum#28, isEmpty#29] +Input [3]: [nation#18, o_year#19, amount#20] +Keys [2]: [nation#18, o_year#19] +Functions [1]: [partial_sum(amount#20)] +Aggregate Attributes [2]: [sum#21, isEmpty#22] +Results [4]: [nation#18, o_year#19, sum#23, isEmpty#24] (36) Exchange -Input [4]: [nation#23, o_year#24, sum#28, isEmpty#29] -Arguments: hashpartitioning(nation#23, o_year#24, 5), ENSURE_REQUIREMENTS, [id=#30] +Input [4]: [nation#18, o_year#19, sum#23, isEmpty#24] +Arguments: hashpartitioning(nation#18, o_year#19, 5), ENSURE_REQUIREMENTS, [plan_id=6] (37) HashAggregate [codegen id : 7] -Input [4]: [nation#23, o_year#24, sum#28, isEmpty#29] -Keys [2]: [nation#23, o_year#24] -Functions [1]: [sum(amount#25)] -Aggregate Attributes [1]: [sum(amount#25)#31] -Results [3]: [nation#23, o_year#24, sum(amount#25)#31 AS sum_profit#32] +Input [4]: [nation#18, o_year#19, sum#23, isEmpty#24] +Keys [2]: [nation#18, o_year#19] +Functions [1]: [sum(amount#20)] +Aggregate Attributes [1]: [sum(amount#20)#25] +Results [3]: [nation#18, o_year#19, sum(amount#20)#25 AS sum_profit#26] (38) Exchange -Input [3]: [nation#23, o_year#24, sum_profit#32] -Arguments: rangepartitioning(nation#23 ASC NULLS FIRST, o_year#24 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#33] +Input [3]: [nation#18, o_year#19, sum_profit#26] +Arguments: rangepartitioning(nation#18 ASC NULLS FIRST, o_year#19 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=7] (39) Sort [codegen id : 8] -Input [3]: [nation#23, o_year#24, sum_profit#32] -Arguments: [nation#23 ASC NULLS FIRST, o_year#24 DESC NULLS LAST], true, 0 +Input [3]: [nation#18, o_year#19, sum_profit#26] +Arguments: [nation#18 ASC NULLS FIRST, o_year#19 DESC NULLS LAST], true, 0 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala index d8caf80c9a961..8cbb841e7d55e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala @@ -80,6 +80,7 @@ trait PlanStabilitySuite extends DisableAdaptiveExecutionSuite { private val referenceRegex = "#\\d+".r private val normalizeRegex = "#\\d+L?".r + private val planIdRegex = "plan_id=\\d+".r private val clsName = this.getClass.getCanonicalName @@ -234,7 +235,15 @@ trait PlanStabilitySuite extends DisableAdaptiveExecutionSuite { val map = new mutable.HashMap[String, String]() normalizeRegex.findAllMatchIn(plan).map(_.toString) .foreach(map.getOrElseUpdate(_, (map.size + 1).toString)) - normalizeRegex.replaceAllIn(plan, regexMatch => s"#${map(regexMatch.toString)}") + val exprIdNormalized = normalizeRegex.replaceAllIn( + plan, regexMatch => s"#${map(regexMatch.toString)}") + + // Normalize the plan id in Exchange nodes. See `Exchange.stringArgs`. + val planIdMap = new mutable.HashMap[String, String]() + planIdRegex.findAllMatchIn(exprIdNormalized).map(_.toString) + .foreach(planIdMap.getOrElseUpdate(_, (planIdMap.size + 1).toString)) + planIdRegex.replaceAllIn( + exprIdNormalized, regexMatch => s"plan_id=${planIdMap(regexMatch.toString)}") } private def normalizeLocation(plan: String): String = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala index b9ca2a0f034e5..987e09adb168e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala @@ -35,6 +35,7 @@ trait SQLQueryTestHelper { protected def replaceNotIncludedMsg(line: String): String = { line.replaceAll("#\\d+", "#x") + .replaceAll("plan_id=\\d+", "plan_id=x") .replaceAll( s"Location.*$clsName/", s"Location $notIncludedMsg/{warehouse_dir}/") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala index 849ad378fc095..3a0bd35cb70fd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/debug/DebuggingSuite.scala @@ -112,8 +112,8 @@ class DebuggingSuite extends DebuggingSuiteBase with DisableAdaptiveExecutionSui val output = captured.toString() val hashedModeString = "HashedRelationBroadcastMode(List(input[0, bigint, false]),false)" - assert(output.replaceAll("\\[id=#\\d+\\]", "[id=#x]").contains( - s"""== BroadcastExchange $hashedModeString, [id=#x] == + assert(output.replaceAll("\\[plan_id=\\d+\\]", "[plan_id=x]").contains( + s"""== BroadcastExchange $hashedModeString, [plan_id=x] == |Tuples output: 0 | id LongType: {} |== WholeStageCodegen (1) == From 8077944a021a9e52e7d55681799c62dbc974d458 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 13 Jun 2022 13:25:57 -0700 Subject: [PATCH 346/535] [SPARK-39458][CORE][TESTS] Fix `UISuite` for IPv6 ### What changes were proposed in this pull request? This PR aims to fix `UISuite` to work in IPv6 environment. ### Why are the changes needed? IPv6 address contains `:`. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manual tests in Pure IPv6 environment. Closes #36858 from dongjoon-hyun/SPARK-39458. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 2182be81a32cdda691a3051a1591c232e8bd9f65) Signed-off-by: Dongjoon Hyun --- core/src/test/scala/org/apache/spark/ui/UISuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala index 90136dd06237c..b30c6fc462be1 100644 --- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala @@ -195,7 +195,7 @@ class UISuite extends SparkFunSuite { val ui = sc.ui.get val splitUIAddress = ui.webUrl.split(':') val boundPort = ui.boundPort - assert(splitUIAddress(2).toInt == boundPort) + assert(splitUIAddress(splitUIAddress.length - 1).toInt == boundPort) } } From c6778bacd481e794ef013efd241303421f8400e4 Mon Sep 17 00:00:00 2001 From: Daniel Tenedorio Date: Tue, 14 Jun 2022 08:54:04 +0800 Subject: [PATCH 347/535] [SPARK-38796][SQL] Update to_number and try_to_number functions to allow PR with positive numbers ### What changes were proposed in this pull request? Update `to_number` and `try_to_number` functions to allow the `PR` format token with input strings comprising positive numbers. Before this bug fix, function calls like `to_number(' 123 ', '999PR')` would fail. Now they succeed, which is helpful since `PR` should allow both positive and negative numbers. This satisfies the following specification: ``` to_number(expr, fmt) fmt { ' [ MI | S ] [ L | $ ] [ 0 | 9 | G | , ] [...] [ . | D ] [ 0 | 9 ] [...] [ L | $ ] [ PR | MI | S ] ' } ``` ### Why are the changes needed? After reviewing the specification, this behavior makes the most sense. ### Does this PR introduce _any_ user-facing change? Yes, a slight change in the behavior of the format string. ### How was this patch tested? Existing and updated unit test coverage. Closes #36861 from dtenedor/to-number-fix-pr. Authored-by: Daniel Tenedorio Signed-off-by: Wenchen Fan (cherry picked from commit 4a803ca22a9a98f9bbbbd1a5a33b9ae394fb7c49) Signed-off-by: Wenchen Fan --- .../sql/catalyst/util/ToNumberParser.scala | 98 +++++++++++-------- .../expressions/StringExpressionsSuite.scala | 5 +- 2 files changed, 61 insertions(+), 42 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala index 716224983e0d0..22e655c4eb45f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala @@ -397,6 +397,9 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali beforeDecimalPoint.clear() afterDecimalPoint.clear() var reachedDecimalPoint = false + // Record whether we have consumed opening angle bracket characters in the input string. + var reachedOpeningAngleBracket = false + var reachedClosingAngleBracket = false // Record whether the input specified a negative result, such as with a minus sign. var negateResult = false // This is an index into the characters of the provided input string. @@ -407,66 +410,79 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali // Iterate through the tokens representing the provided format string, in order. while (formatIndex < formatTokens.size) { val token: InputToken = formatTokens(formatIndex) + val inputChar: Option[Char] = + if (inputIndex < inputLength) { + Some(inputString(inputIndex)) + } else { + Option.empty[Char] + } token match { case d: DigitGroups => inputIndex = parseDigitGroups(d, inputString, inputIndex, reachedDecimalPoint).getOrElse( return formatMatchFailure(input, numberFormat)) case DecimalPoint() => - if (inputIndex < inputLength && - inputString(inputIndex) == POINT_SIGN) { - reachedDecimalPoint = true - inputIndex += 1 - } else { - // There is no decimal point. Consume the token and remain at the same character in the - // input string. + inputChar.foreach { + case POINT_SIGN => + reachedDecimalPoint = true + inputIndex += 1 + case _ => + // There is no decimal point. Consume the token and remain at the same character in + // the input string. } case DollarSign() => - if (inputIndex >= inputLength || - inputString(inputIndex) != DOLLAR_SIGN) { - // The input string did not contain an expected dollar sign. - return formatMatchFailure(input, numberFormat) + inputChar.foreach { + case DOLLAR_SIGN => + inputIndex += 1 + case _ => + // The input string did not contain an expected dollar sign. + return formatMatchFailure(input, numberFormat) } - inputIndex += 1 case OptionalPlusOrMinusSign() => - if (inputIndex < inputLength && - inputString(inputIndex) == PLUS_SIGN) { - inputIndex += 1 - } else if (inputIndex < inputLength && - inputString(inputIndex) == MINUS_SIGN) { - negateResult = !negateResult - inputIndex += 1 - } else { - // There is no plus or minus sign. Consume the token and remain at the same character in - // the input string. + inputChar.foreach { + case PLUS_SIGN => + inputIndex += 1 + case MINUS_SIGN => + negateResult = !negateResult + inputIndex += 1 + case _ => + // There is no plus or minus sign. Consume the token and remain at the same character + // in the input string. } case OptionalMinusSign() => - if (inputIndex < inputLength && - inputString(inputIndex) == MINUS_SIGN) { - negateResult = !negateResult - inputIndex += 1 - } else { - // There is no minus sign. Consume the token and remain at the same character in the - // input string. + inputChar.foreach { + case MINUS_SIGN => + negateResult = !negateResult + inputIndex += 1 + case _ => + // There is no minus sign. Consume the token and remain at the same character in the + // input string. } case OpeningAngleBracket() => - if (inputIndex >= inputLength || - inputString(inputIndex) != ANGLE_BRACKET_OPEN) { - // The input string did not contain an expected opening angle bracket. - return formatMatchFailure(input, numberFormat) + inputChar.foreach { + case ANGLE_BRACKET_OPEN => + if (reachedOpeningAngleBracket) { + return formatMatchFailure(input, numberFormat) + } + reachedOpeningAngleBracket = true + inputIndex += 1 + case _ => } - inputIndex += 1 case ClosingAngleBracket() => - if (inputIndex >= inputLength || - inputString(inputIndex) != ANGLE_BRACKET_CLOSE) { - // The input string did not contain an expected closing angle bracket. - return formatMatchFailure(input, numberFormat) + inputChar.foreach { + case ANGLE_BRACKET_CLOSE => + if (!reachedOpeningAngleBracket) { + return formatMatchFailure(input, numberFormat) + } + reachedClosingAngleBracket = true + negateResult = !negateResult + inputIndex += 1 + case _ => } - negateResult = !negateResult - inputIndex += 1 } formatIndex += 1 } - if (inputIndex < inputLength) { + if (inputIndex < inputLength || + reachedOpeningAngleBracket != reachedClosingAngleBracket) { // If we have consumed all the tokens in the format string, but characters remain unconsumed // in the input string, then the input string does not match the format string. formatMatchFailure(input, numberFormat) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index db7aae9985516..655e9b744bf15 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -969,7 +969,8 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { ("+$89,1,2,3,45.123", "S$999,0,0,0,999.00000") -> Decimal(8912345.123), ("-454", "S999") -> Decimal(-454), ("+454", "S999") -> Decimal(454), - ("<454>", "999PR") -> Decimal(-454), + ("454", "999PR") -> Decimal(454), + (" 454 ", "999PR") -> Decimal(454), ("454-", "999MI") -> Decimal(-454), ("-$54", "MI$99") -> Decimal(-54), // The input string contains more digits than fit in a long integer. @@ -1089,6 +1090,8 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { // The trailing PR required exactly one leading < and trailing >. ("<454", "999PR"), ("454>", "999PR"), + ("<454 ", "999PR"), + (" 454>", "999PR"), ("<<454>>", "999PR"), // At least three digits were required. ("45", "S$999,099.99"), From 47b8eeeb61657f293d132f29ac5859e39e98f9d6 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 14 Jun 2022 08:43:49 +0300 Subject: [PATCH 348/535] [SPARK-38700][SQL][3.3] Use error classes in the execution errors of save mode ### What changes were proposed in this pull request? Migrate the following errors in QueryExecutionErrors: * unsupportedSaveModeError -> UNSUPPORTED_SAVE_MODE This is a backport of https://github.com/apache/spark/pull/36350. ### Why are the changes needed? Porting execution errors of unsupported saveMode to new error framework. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add new UT. Closes #36852 from panbingkun/branch-3.3-SPARK-38700-new. Lead-authored-by: panbingkun Co-authored-by: panbingkun <84731559@qq.com> Signed-off-by: Max Gekk --- .../main/resources/error/error-classes.json | 11 ++++++ .../scala/org/apache/spark/ErrorInfo.scala | 34 +++++++++++++++++-- .../sql/errors/QueryExecutionErrors.scala | 9 +++-- .../InsertIntoHadoopFsRelationCommand.scala | 2 +- .../errors/QueryExecutionErrorsSuite.scala | 25 ++++++++++++-- 5 files changed, 73 insertions(+), 8 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 2e32482328a7e..2d16ec50a6612 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -184,6 +184,17 @@ "UNSUPPORTED_OPERATION" : { "message" : [ "The operation is not supported: " ] }, + "UNSUPPORTED_SAVE_MODE" : { + "message" : [ "The save mode is not supported for: " ], + "subClass" : { + "EXISTENT_PATH" : { + "message" : [ "an existent path." ] + }, + "NON_EXISTENT_PATH" : { + "message" : [ "a non-existent path." ] + } + } + }, "WRITING_JOB_ABORTED" : { "message" : [ "Writing job aborted" ], "sqlState" : "40000" diff --git a/core/src/main/scala/org/apache/spark/ErrorInfo.scala b/core/src/main/scala/org/apache/spark/ErrorInfo.scala index 99e081ebb45ea..fa2e1034692ac 100644 --- a/core/src/main/scala/org/apache/spark/ErrorInfo.scala +++ b/core/src/main/scala/org/apache/spark/ErrorInfo.scala @@ -28,14 +28,30 @@ import com.fasterxml.jackson.module.scala.DefaultScalaModule import org.apache.spark.util.Utils +/** + * Information associated with an error subclass. + * + * @param message C-style message format compatible with printf. + * The error message is constructed by concatenating the lines with newlines. + */ +private[spark] case class ErrorSubInfo(message: Seq[String]) { + // For compatibility with multi-line error messages + @JsonIgnore + val messageFormat: String = message.mkString("\n") +} + /** * Information associated with an error class. * * @param sqlState SQLSTATE associated with this class. + * @param subClass SubClass associated with this class. * @param message C-style message format compatible with printf. * The error message is constructed by concatenating the lines with newlines. */ -private[spark] case class ErrorInfo(message: Seq[String], sqlState: Option[String]) { +private[spark] case class ErrorInfo( + message: Seq[String], + subClass: Option[Map[String, ErrorSubInfo]], + sqlState: Option[String]) { // For compatibility with multi-line error messages @JsonIgnore val messageFormat: String = message.mkString("\n") @@ -61,13 +77,25 @@ private[spark] object SparkThrowableHelper { queryContext: String = ""): String = { val errorInfo = errorClassToInfoMap.getOrElse(errorClass, throw new IllegalArgumentException(s"Cannot find error class '$errorClass'")) + val (displayMessageParameters, displayFormat) = if (errorInfo.subClass.isDefined) { + val subClass = errorInfo.subClass.get + val subErrorClass = messageParameters.head + val errorSubInfo = subClass.getOrElse(subErrorClass, + throw new IllegalArgumentException(s"Cannot find sub error class '$subErrorClass'")) + val subMessageParameters = messageParameters.tail + (subMessageParameters, errorInfo.messageFormat + errorSubInfo.messageFormat) + } else { + (messageParameters, errorInfo.messageFormat) + } + val displayMessage = String.format( + displayFormat.replaceAll("<[a-zA-Z0-9_-]+>", "%s"), + displayMessageParameters : _*) val displayQueryContext = if (queryContext.isEmpty) { "" } else { s"\n$queryContext" } - String.format(errorInfo.messageFormat.replaceAll("<[a-zA-Z0-9_-]+>", "%s"), - messageParameters: _*) + displayQueryContext + s"$displayMessage$displayQueryContext" } def getSqlState(errorClass: String): String = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 9e29acf04d2ef..6c6139d2ccc92 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -652,8 +652,13 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { """.stripMargin) } - def unsupportedSaveModeError(saveMode: String, pathExists: Boolean): Throwable = { - new IllegalStateException(s"unsupported save mode $saveMode ($pathExists)") + def saveModeUnsupportedError(saveMode: Any, pathExists: Boolean): Throwable = { + pathExists match { + case true => new SparkIllegalArgumentException(errorClass = "UNSUPPORTED_SAVE_MODE", + messageParameters = Array("EXISTENT_PATH", toSQLValue(saveMode, StringType))) + case _ => new SparkIllegalArgumentException(errorClass = "UNSUPPORTED_SAVE_MODE", + messageParameters = Array("NON_EXISTENT_PATH", toSQLValue(saveMode, StringType))) + } } def cannotClearOutputDirectoryError(staticPrefixPath: Path): Throwable = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala index 74be483cd7c37..d773d4bd271b3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala @@ -133,7 +133,7 @@ case class InsertIntoHadoopFsRelationCommand( case (SaveMode.Ignore, exists) => !exists case (s, exists) => - throw QueryExecutionErrors.unsupportedSaveModeError(s.toString, exists) + throw QueryExecutionErrors.saveModeUnsupportedError(s, exists) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala index e6ce1d7008039..73c5b12849a1d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala @@ -17,14 +17,15 @@ package org.apache.spark.sql.errors -import org.apache.spark.{SparkArithmeticException, SparkException, SparkRuntimeException, SparkUnsupportedOperationException, SparkUpgradeException} -import org.apache.spark.sql.{DataFrame, QueryTest} +import org.apache.spark.{SparkArithmeticException, SparkException, SparkIllegalArgumentException, SparkRuntimeException, SparkUnsupportedOperationException, SparkUpgradeException} +import org.apache.spark.sql.{DataFrame, QueryTest, SaveMode} import org.apache.spark.sql.execution.datasources.orc.OrcTest import org.apache.spark.sql.execution.datasources.parquet.ParquetTest import org.apache.spark.sql.functions.{lit, lower, struct, sum} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy.EXCEPTION import org.apache.spark.sql.test.SharedSparkSession +import org.apache.spark.util.Utils class QueryExecutionErrorsSuite extends QueryTest with ParquetTest with OrcTest with SharedSparkSession { @@ -265,4 +266,24 @@ class QueryExecutionErrorsSuite extends QueryTest assert(e.getMessage === "Datetime operation overflow: add 1000000 YEAR to TIMESTAMP '2022-03-09 01:02:03'.") } + + test("UNSUPPORTED_SAVE_MODE: unsupported null saveMode whether the path exists or not") { + withTempPath { path => + val e1 = intercept[SparkIllegalArgumentException] { + val saveMode: SaveMode = null + Seq(1, 2).toDS().write.mode(saveMode).parquet(path.getAbsolutePath) + } + assert(e1.getErrorClass === "UNSUPPORTED_SAVE_MODE") + assert(e1.getMessage === "The save mode NULL is not supported for: a non-existent path.") + + Utils.createDirectory(path) + + val e2 = intercept[SparkIllegalArgumentException] { + val saveMode: SaveMode = null + Seq(1, 2).toDS().write.mode(saveMode).parquet(path.getAbsolutePath) + } + assert(e2.getErrorClass === "UNSUPPORTED_SAVE_MODE") + assert(e2.getMessage === "The save mode NULL is not supported for: an existent path.") + } + } } From bb0cce990c214d4ca9cf3828940a2ca5350acf79 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Tue, 14 Jun 2022 00:43:20 -0700 Subject: [PATCH 349/535] [SPARK-39448][SQL] Add `ReplaceCTERefWithRepartition` into `nonExcludableRules` list ### What changes were proposed in this pull request? This PR adds `ReplaceCTERefWithRepartition` into nonExcludableRules list. ### Why are the changes needed? It will throw exception if user `set spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ReplaceCTERefWithRepartition` before running this query: ```sql SELECT (SELECT avg(id) FROM range(10)), (SELECT sum(id) FROM range(10)), (SELECT count(distinct id) FROM range(10)) ``` Exception: ``` Caused by: java.lang.AssertionError: assertion failed: No plan for WithCTE :- CTERelationDef 0, true : +- Project [named_struct(min(id), min(id)#223L, sum(id), sum(id)#226L, count(DISTINCT id), count(DISTINCT id)#229L) AS mergedValue#240] : +- Aggregate [min(id#221L) AS min(id)#223L, sum(id#221L) AS sum(id)#226L, count(distinct id#221L) AS count(DISTINCT id)#229L] : +- Range (0, 10, step=1, splits=None) +- Project [scalar-subquery#218 [].min(id) AS scalarsubquery()#230L, scalar-subquery#219 [].sum(id) AS scalarsubquery()#231L, scalar-subquery#220 [].count(DISTINCT id) AS scalarsubquery()#232L] : :- CTERelationRef 0, true, [mergedValue#240] : :- CTERelationRef 0, true, [mergedValue#240] : +- CTERelationRef 0, true, [mergedValue#240] +- OneRowRelation ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #36847 from wangyum/SPARK-39448. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun (cherry picked from commit 0b785b3c77374fa7736f01bb55e87c796985ae14) Signed-off-by: Dongjoon Hyun --- .../spark/sql/execution/SparkOptimizer.scala | 3 ++- .../sql-tests/inputs/non-excludable-rule.sql | 6 ++++++ .../results/non-excludable-rule.sql.out | 21 +++++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala index 84e5975189b8f..b886171572699 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala @@ -88,7 +88,8 @@ class SparkOptimizer( GroupBasedRowLevelOperationScanPlanning.ruleName :+ V2ScanRelationPushDown.ruleName :+ V2ScanPartitioning.ruleName :+ - V2Writes.ruleName + V2Writes.ruleName :+ + ReplaceCTERefWithRepartition.ruleName /** * Optimization batches that are executed before the regular optimization batches (also before diff --git a/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql b/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql new file mode 100644 index 0000000000000..b238d199cc14a --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql @@ -0,0 +1,6 @@ +-- SPARK-39448 +SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ReplaceCTERefWithRepartition; +SELECT + (SELECT min(id) FROM range(10)), + (SELECT sum(id) FROM range(10)), + (SELECT count(distinct id) FROM range(10)); diff --git a/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out b/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out new file mode 100644 index 0000000000000..c7fa2f0415222 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out @@ -0,0 +1,21 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 2 + + +-- !query +SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ReplaceCTERefWithRepartition +-- !query schema +struct +-- !query output +spark.sql.optimizer.excludedRules org.apache.spark.sql.catalyst.optimizer.ReplaceCTERefWithRepartition + + +-- !query +SELECT + (SELECT min(id) FROM range(10)), + (SELECT sum(id) FROM range(10)), + (SELECT count(distinct id) FROM range(10)) +-- !query schema +struct +-- !query output +0 45 10 From 2078838adff6730bdb6db5337ee67f2efaf9153e Mon Sep 17 00:00:00 2001 From: sychen Date: Wed, 15 Jun 2022 16:02:46 +0800 Subject: [PATCH 350/535] [SPARK-39355][SQL] Single column uses quoted to construct UnresolvedAttribute ### What changes were proposed in this pull request? Use `UnresolvedAttribute.quoted` in `Alias.toAttribute` to avoid calling `UnresolvedAttribute.apply` causing `ParseException`. ### Why are the changes needed? ```sql SELECT * FROM ( SELECT '2022-06-01' AS c1 ) a WHERE c1 IN ( SELECT date_add('2022-06-01', 0) ); ``` ``` Error in query: mismatched input '(' expecting {, '.', '-'}(line 1, pos 8) == SQL == date_add(2022-06-01, 0) --------^^^ ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? add UT Closes #36740 from cxzl25/SPARK-39355. Authored-by: sychen Signed-off-by: Wenchen Fan (cherry picked from commit 8731cb875d075b68e4e0cb1d1eb970725eab9cf9) Signed-off-by: Wenchen Fan --- .../expressions/aggregate/interfaces.scala | 2 +- .../expressions/namedExpressions.scala | 2 +- .../org/apache/spark/sql/SubquerySuite.scala | 28 +++++++++++++++++++ 3 files changed, 30 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala index f97293dc9b464..e60c07b0d8268 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala @@ -117,7 +117,7 @@ case class AggregateExpression( // This is a bit of a hack. Really we should not be constructing this container and reasoning // about datatypes / aggregation mode until after we have finished analysis and made it to // planning. - UnresolvedAttribute(aggregateFunction.toString) + UnresolvedAttribute.quoted(aggregateFunction.toString) } def filterAttributes: AttributeSet = filter.map(_.references).getOrElse(AttributeSet.empty) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala index 47cdf21a8729f..145f371301f37 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala @@ -193,7 +193,7 @@ case class Alias(child: Expression, name: String)( if (resolved) { AttributeReference(name, child.dataType, child.nullable, metadata)(exprId, qualifier) } else { - UnresolvedAttribute(name) + UnresolvedAttribute.quoted(name) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index 5a1ea6ea29e56..e8ddf93afc334 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -2185,4 +2185,32 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } } + + test("SPARK-39355: Single column uses quoted to construct UnresolvedAttribute") { + checkAnswer( + sql(""" + |SELECT * + |FROM ( + | SELECT '2022-06-01' AS c1 + |) a + |WHERE c1 IN ( + | SELECT date_add('2022-06-01', 0) + |) + |""".stripMargin), + Row("2022-06-01")) + checkAnswer( + sql(""" + |SELECT * + |FROM ( + | SELECT '2022-06-01' AS c1 + |) a + |WHERE c1 IN ( + | SELECT date_add(a.c1.k1, 0) + | FROM ( + | SELECT named_struct('k1', '2022-06-01') AS c1 + | ) a + |) + |""".stripMargin), + Row("2022-06-01")) + } } From 68bec7342433f10dae2fbc0ac9fcb0c267aba536 Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Thu, 16 Jun 2022 09:38:50 +0900 Subject: [PATCH 351/535] [SPARK-39061][SQL] Set nullable correctly for `Inline` output attributes ### What changes were proposed in this pull request? Change `Inline#elementSchema` to make each struct field nullable when the containing array has a null element. ### Why are the changes needed? This query returns incorrect results (the last row should be `NULL NULL`): ``` spark-sql> select inline(array(named_struct('a', 1, 'b', 2), null)); 1 2 -1 -1 Time taken: 4.053 seconds, Fetched 2 row(s) spark-sql> ``` And this query gets a NullPointerException: ``` spark-sql> select inline(array(named_struct('a', '1', 'b', '2'), null)); 22/04/28 16:51:54 ERROR Executor: Exception in task 0.0 in stage 2.0 (TID 2) java.lang.NullPointerException: null at org.apache.spark.sql.catalyst.expressions.codegen.UnsafeWriter.write(UnsafeWriter.java:110) ~[spark-catalyst_2.12-3.4.0-SNAPSHOT.jar:3.4.0-SNAPSHOT] at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.generate_doConsume_0$(Unknown Source) ~[?:?] at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source) ~[?:?] at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(Buffere ``` When an array of structs is created by `CreateArray`, and no struct field contains a literal null value, the schema for the struct will have non-nullable fields, even if the array itself has a null entry (as in the example above). As a result, the output attributes for the generator will be non-nullable. When the output attributes for `Inline` are non-nullable, `GenerateUnsafeProjection#writeExpressionsToBuffer` generates incorrect code for null structs. In more detail, the issue is this: `GenerateExec#codeGenCollection` generates code that will check if the struct instance (i.e., array element) is null and, if so, set a boolean for each struct field to indicate that the field contains a null. However, unless the generator's output attributes are nullable, `GenerateUnsafeProjection#writeExpressionsToBuffer` will not generate any code to check those booleans. Instead it will generate code to write out whatever is in the variables that normally hold the struct values (which will be garbage if the array element is null). Arrays of structs from file sources do not have this issue. In that case, each `StructField` will have nullable=true due to [this](https://github.com/apache/spark/blob/fe85d7912f86c3e337aa93b23bfa7e7e01c0a32e/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala#L417). (Note: the eval path for `Inline` has a different bug with null array elements that occurs even when `nullable` is set correctly in the schema, but I will address that in a separate PR). ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New unit test. Closes #36883 from bersprockets/inline_struct_nullability_issue. Authored-by: Bruce Robbins Signed-off-by: Hyukjin Kwon (cherry picked from commit fc385dafabe3c609b38b81deaaf36e5eb6ee341b) Signed-off-by: Hyukjin Kwon --- .../sql/catalyst/expressions/generators.scala | 3 ++- .../spark/sql/GeneratorFunctionSuite.scala | 25 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala index 5214bff3543af..1079f0a333dd0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala @@ -444,7 +444,8 @@ case class Inline(child: Expression) extends UnaryExpression with CollectionGene } override def elementSchema: StructType = child.dataType match { - case ArrayType(st: StructType, _) => st + case ArrayType(st: StructType, false) => st + case ArrayType(st: StructType, true) => st.asNullable } override def collectionType: DataType = child.dataType diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala index 436ccb08294b3..ef87e10946b62 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala @@ -388,6 +388,31 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession { Row(0, 1) :: Row(3, 4) :: Row(6, 7) :: Row(null, null) :: Row(null, null) :: Nil) } } + + test("SPARK-39061: inline should handle null struct") { + val df = sql( + """select * from values + |( + | 1, + | array( + | named_struct('c1', 0, 'c2', 1), + | null, + | named_struct('c1', 2, 'c2', 3), + | null + | ) + |) + |as tbl(a, b) + """.stripMargin) + df.createOrReplaceTempView("t1") + + checkAnswer( + sql("select inline(b) from t1"), + Row(0, 1) :: Row(null, null) :: Row(2, 3) :: Row(null, null) :: Nil) + + checkAnswer( + sql("select a, inline(b) from t1"), + Row(1, 0, 1) :: Row(1, null, null) :: Row(1, 2, 3) :: Row(1, null, null) :: Nil) + } } case class EmptyGenerator() extends Generator with LeafLike[Expression] { From a7554c34b59d1cacf53b2e239acd746a886bdde6 Mon Sep 17 00:00:00 2001 From: "wangguangxin.cn" Date: Thu, 16 Jun 2022 09:27:24 +0800 Subject: [PATCH 352/535] [SPARK-39476][SQL] Disable Unwrap cast optimize when casting from Long to Float/ Double or from Integer to Float ### What changes were proposed in this pull request? Cast from Integer to Float or from Long to Double/Float may loss precision if the length of Integer/Long beyonds the **significant digits** of a Double(which is 15 or 16 digits) or Float(which is 7 or 8 digits). For example, ```select *, cast(a as int) from (select cast(33554435 as foat) a )``` gives `33554436` instead of `33554435`. When it comes the optimization rule `UnwrapCastInBinaryComparison`, it may result in incorrect (confused) result . We can reproduce it with following script. ``` spark.range(10).map(i => 64707595868612313L).createOrReplaceTempView("tbl") val df = sql("select * from tbl where cast(value as double) = cast('64707595868612313' as double)") df.explain(true) df.show() ``` With we disable this optimization rule , it returns 10 records. But if we enable this optimization rule, it returns empty, since the sql is optimized to ``` select * from tbl where value = 64707595868612312L ``` ### Why are the changes needed? Fix the behavior that may confuse users (or maybe a bug?) ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add a new UT Closes #36873 from WangGuangxin/SPARK-24994-followup. Authored-by: wangguangxin.cn Signed-off-by: Wenchen Fan (cherry picked from commit 9612db3fc9c38204b2bf9f724dedb9ec5f636556) Signed-off-by: Wenchen Fan --- .../UnwrapCastInBinaryComparison.scala | 14 +++++++-- .../UnwrapCastInComparisonEndToEndSuite.scala | 31 +++++++++++++++++++ 2 files changed, 43 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala index d9ec2c0d4b4cd..94e27379b7465 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparison.scala @@ -358,8 +358,18 @@ object UnwrapCastInBinaryComparison extends Rule[LogicalPlan] { toType.sameType(literalType) && !fromExp.foldable && toType.isInstanceOf[NumericType] && - ((fromExp.dataType.isInstanceOf[NumericType] && Cast.canUpCast(fromExp.dataType, toType)) || - fromExp.dataType.isInstanceOf[BooleanType]) + canUnwrapCast(fromExp.dataType, toType) + } + + private def canUnwrapCast(from: DataType, to: DataType): Boolean = (from, to) match { + case (BooleanType, _) => true + // SPARK-39476: It's not safe to unwrap cast from Integer to Float or from Long to Float/Double, + // since the length of Integer/Long may exceed the significant digits of Float/Double. + case (IntegerType, FloatType) => false + case (LongType, FloatType) => false + case (LongType, DoubleType) => false + case _ if from.isInstanceOf[NumericType] => Cast.canUpCast(from, to) + case _ => false } private[optimizer] def getRange(dt: DataType): Option[(Any, Any)] = dt match { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala index 2c361299b173d..1d7af84ef6096 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/UnwrapCastInComparisonEndToEndSuite.scala @@ -209,5 +209,36 @@ class UnwrapCastInComparisonEndToEndSuite extends QueryTest with SharedSparkSess } } + test("SPARK-39476: Should not unwrap cast from Long to Double/Float") { + withTable(t) { + Seq((6470759586864300301L)) + .toDF("c1").write.saveAsTable(t) + val df = spark.table(t) + + checkAnswer( + df.where("cast(c1 as double) == cast(6470759586864300301L as double)") + .select("c1"), + Row(6470759586864300301L)) + + checkAnswer( + df.where("cast(c1 as float) == cast(6470759586864300301L as float)") + .select("c1"), + Row(6470759586864300301L)) + } + } + + test("SPARK-39476: Should not unwrap cast from Integer to Float") { + withTable(t) { + Seq((33554435)) + .toDF("c1").write.saveAsTable(t) + val df = spark.table(t) + + checkAnswer( + df.where("cast(c1 as float) == cast(33554435 as float)") + .select("c1"), + Row(33554435)) + } + } + private def decimal(v: BigDecimal): Decimal = Decimal(v, 5, 2) } From 4c4efdc9897fa5ff137d454d085482061475b6e5 Mon Sep 17 00:00:00 2001 From: William Hyun Date: Thu, 16 Jun 2022 16:19:15 -0700 Subject: [PATCH 353/535] [SPARK-39493][BUILD] Update ORC to 1.7.5 This PR aims to update ORC to version 1.7.5. ORC 1.7.5 is the latest version with the following bug fixes: -https://orc.apache.org/news/2022/06/16/ORC-1.7.5/ No. Pass the CIs. Closes #36892 from williamhyun/orc175. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 264d8fd7b8f2a653ddaa027adc7a194d017c9eda) Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 6 +++--- dev/deps/spark-deps-hadoop-3-hive-2.3 | 6 +++--- pom.xml | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index 092531a320ab5..e426b8f030643 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -219,9 +219,9 @@ objenesis/3.2//objenesis-3.2.jar okhttp/3.12.12//okhttp-3.12.12.jar okio/1.14.0//okio-1.14.0.jar opencsv/2.3//opencsv-2.3.jar -orc-core/1.7.4//orc-core-1.7.4.jar -orc-mapreduce/1.7.4//orc-mapreduce-1.7.4.jar -orc-shims/1.7.4//orc-shims-1.7.4.jar +orc-core/1.7.5//orc-core-1.7.5.jar +orc-mapreduce/1.7.5//orc-mapreduce-1.7.5.jar +orc-shims/1.7.5//orc-shims-1.7.5.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index c4baa6bb1fc77..365ede5ca10f7 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -208,9 +208,9 @@ opencsv/2.3//opencsv-2.3.jar opentracing-api/0.33.0//opentracing-api-0.33.0.jar opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar opentracing-util/0.33.0//opentracing-util-0.33.0.jar -orc-core/1.7.4//orc-core-1.7.4.jar -orc-mapreduce/1.7.4//orc-mapreduce-1.7.4.jar -orc-shims/1.7.4//orc-shims-1.7.4.jar +orc-core/1.7.5//orc-core-1.7.5.jar +orc-mapreduce/1.7.5//orc-mapreduce-1.7.5.jar +orc-shims/1.7.5//orc-shims-1.7.5.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar diff --git a/pom.xml b/pom.xml index 32bd40ee83461..9d290301d2350 100644 --- a/pom.xml +++ b/pom.xml @@ -132,7 +132,7 @@ 10.14.2.0 1.12.2 - 1.7.4 + 1.7.5 9.4.46.v20220331 4.0.3 0.10.0 From ad90195de56688ce0898691eb9d04297ab0871ad Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Fri, 17 Jun 2022 09:36:49 -0700 Subject: [PATCH 354/535] [SPARK-39505][UI] Escape log content rendered in UI ### What changes were proposed in this pull request? Escape log content rendered to the UI. ### Why are the changes needed? Log content may contain reserved characters or other code in the log and be misinterpreted in the UI as HTML. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests Closes #36902 from srowen/LogViewEscape. Authored-by: Sean Owen Signed-off-by: Dongjoon Hyun --- .../src/main/resources/org/apache/spark/ui/static/log-view.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/resources/org/apache/spark/ui/static/log-view.js b/core/src/main/resources/org/apache/spark/ui/static/log-view.js index 2f416d8210e18..92df087f4bd92 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/log-view.js +++ b/core/src/main/resources/org/apache/spark/ui/static/log-view.js @@ -85,7 +85,7 @@ function loadMore() { if (retStartByte == 0) { disableMoreButton(); } - $("pre", ".log-content").prepend(cleanData); + $("pre", ".log-content").prepend(document.createTextNode(cleanData)); curLogLength = curLogLength + (startByte - retStartByte); startByte = retStartByte; @@ -115,7 +115,7 @@ function loadNew() { var retLogLength = dataInfo[2]; var cleanData = data.substring(newlineIndex + 1); - $("pre", ".log-content").append(cleanData); + $("pre", ".log-content").append(document.createTextNode(cleanData)); curLogLength = curLogLength + (retEndByte - retStartByte); endByte = retEndByte; From 1dea5746fe5af42b121f5500d0f6c0b1a7947b88 Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Sat, 18 Jun 2022 09:25:11 +0900 Subject: [PATCH 355/535] [SPARK-39496][SQL] Handle null struct in `Inline.eval` ### What changes were proposed in this pull request? Change `Inline.eval` to return a row of null values rather than a null row in the case of a null input struct. ### Why are the changes needed? Consider the following query: ``` set spark.sql.codegen.wholeStage=false; select inline(array(named_struct('a', 1, 'b', 2), null)); ``` This query fails with a `NullPointerException`: ``` 22/06/16 15:10:06 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0) java.lang.NullPointerException at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source) at org.apache.spark.sql.execution.GenerateExec.$anonfun$doExecute$11(GenerateExec.scala:122) ``` (In Spark 3.1.3, you don't need to set `spark.sql.codegen.wholeStage` to false to reproduce the error, since Spark 3.1.3 has no codegen path for `Inline`). This query fails regardless of the setting of `spark.sql.codegen.wholeStage`: ``` val dfWide = (Seq((1)) .toDF("col0") .selectExpr(Seq.tabulate(99)(x => s"$x as col${x + 1}"): _*)) val df = (dfWide .selectExpr("*", "array(named_struct('a', 1, 'b', 2), null) as struct_array")) df.selectExpr("*", "inline(struct_array)").collect ``` It fails with ``` 22/06/16 15:18:55 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0)/ 1] java.lang.NullPointerException at org.apache.spark.sql.catalyst.expressions.JoinedRow.isNullAt(JoinedRow.scala:80) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.writeFields_0_8$(Unknown Source) ``` When `Inline.eval` returns a null row in the collection, GenerateExec gets a NullPointerException either when joining the null row with required child output, or projecting the null row. This PR avoids producing the null row and produces a row of null values instead: ``` spark-sql> set spark.sql.codegen.wholeStage=false; spark.sql.codegen.wholeStage false Time taken: 3.095 seconds, Fetched 1 row(s) spark-sql> select inline(array(named_struct('a', 1, 'b', 2), null)); 1 2 NULL NULL Time taken: 1.214 seconds, Fetched 2 row(s) spark-sql> ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? New unit test. Closes #36903 from bersprockets/inline_eval_null_struct_issue. Authored-by: Bruce Robbins Signed-off-by: Hyukjin Kwon (cherry picked from commit c4d5390dd032d17a40ad50e38f0ed7bd9bbd4698) Signed-off-by: Hyukjin Kwon --- .../spark/sql/catalyst/expressions/generators.scala | 8 ++++++-- .../apache/spark/sql/GeneratorFunctionSuite.scala | 13 ++++++++++++- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala index 1079f0a333dd0..d305b4d370050 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala @@ -452,13 +452,17 @@ case class Inline(child: Expression) extends UnaryExpression with CollectionGene private lazy val numFields = elementSchema.fields.length + private lazy val generatorNullRow = new GenericInternalRow(elementSchema.length) + override def eval(input: InternalRow): TraversableOnce[InternalRow] = { val inputArray = child.eval(input).asInstanceOf[ArrayData] if (inputArray == null) { Nil } else { - for (i <- 0 until inputArray.numElements()) - yield inputArray.getStruct(i, numFields) + for (i <- 0 until inputArray.numElements()) yield { + val s = inputArray.getStruct(i, numFields) + if (s == null) generatorNullRow else s + } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala index ef87e10946b62..09afedea7a5f8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala @@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCo import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.trees.LeafLike import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{IntegerType, StructType} @@ -389,7 +390,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession { } } - test("SPARK-39061: inline should handle null struct") { + def testNullStruct(): Unit = { val df = sql( """select * from values |( @@ -413,6 +414,16 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession { sql("select a, inline(b) from t1"), Row(1, 0, 1) :: Row(1, null, null) :: Row(1, 2, 3) :: Row(1, null, null) :: Nil) } + + test("SPARK-39061: inline should handle null struct") { + testNullStruct + } + + test("SPARK-39496: inline eval path should handle null struct") { + withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") { + testNullStruct + } + } } case class EmptyGenerator() extends Generator with LeafLike[Expression] { From 458be83fdac12a609433b4f8f1c72cdd0aaba097 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Sun, 19 Jun 2022 12:34:41 +0300 Subject: [PATCH 356/535] [SPARK-38688][SQL][TESTS][3.3] Use error classes in the compilation errors of deserializer ### What changes were proposed in this pull request? Migrate the following errors in QueryCompilationErrors: * dataTypeMismatchForDeserializerError -> UNSUPPORTED_DESERIALIZER.DATA_TYPE_MISMATCH * fieldNumberMismatchForDeserializerError -> UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH This is a backport of https://github.com/apache/spark/pull/36479. ### Why are the changes needed? Porting compilation errors of unsupported deserializer to new error framework. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add new UT. Closes #36897 from panbingkun/branch-3.3-SPARK-38688. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../main/resources/error/error-classes.json | 11 ++++++ .../sql/errors/QueryCompilationErrors.scala | 10 +++--- .../spark/sql/errors/QueryErrorsBase.scala | 4 +++ .../encoders/EncoderResolutionSuite.scala | 23 +++++++------ .../org/apache/spark/sql/DatasetSuite.scala | 18 ---------- .../errors/QueryCompilationErrorsSuite.scala | 34 ++++++++++++++++++- 6 files changed, 67 insertions(+), 33 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 2d16ec50a6612..4eb3a4e8e1e51 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -174,6 +174,17 @@ "message" : [ "Unsupported data type " ], "sqlState" : "0A000" }, + "UNSUPPORTED_DESERIALIZER" : { + "message" : [ "The deserializer is not supported: " ], + "subClass" : { + "DATA_TYPE_MISMATCH" : { + "message" : [ "need a(n) field but got ." ] + }, + "FIELD_NUMBER_MISMATCH" : { + "message" : [ "try to map to Tuple, but failed as the number of fields does not line up." ] + } + } + }, "UNSUPPORTED_FEATURE" : { "message" : [ "The feature is not supported: " ], "sqlState" : "0A000" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index d877bb5b2a861..04b1d5f796d9b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -146,16 +146,18 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { def dataTypeMismatchForDeserializerError( dataType: DataType, desiredType: String): Throwable = { - val quantifier = if (desiredType.equals("array")) "an" else "a" new AnalysisException( - s"need $quantifier $desiredType field but got " + dataType.catalogString) + errorClass = "UNSUPPORTED_DESERIALIZER", + messageParameters = + Array("DATA_TYPE_MISMATCH", toSQLType(desiredType), toSQLType(dataType))) } def fieldNumberMismatchForDeserializerError( schema: StructType, maxOrdinal: Int): Throwable = { new AnalysisException( - s"Try to map ${schema.catalogString} to Tuple${maxOrdinal + 1}, " + - "but failed as the number of fields does not line up.") + errorClass = "UNSUPPORTED_DESERIALIZER", + messageParameters = + Array("FIELD_NUMBER_MISMATCH", toSQLType(schema), (maxOrdinal + 1).toString)) } def upCastFailureError( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala index 343a1561dcea3..558401cb4e93b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala @@ -79,6 +79,10 @@ private[sql] trait QueryErrorsBase { quoteByDefault(t.sql) } + def toSQLType(text: String): String = { + quoteByDefault(text.toUpperCase(Locale.ROOT)) + } + def toSQLConf(conf: String): String = { quoteByDefault(conf) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala index 34b8cba6e90a8..ad2d0df42ff12 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala @@ -118,7 +118,7 @@ class EncoderResolutionSuite extends PlanTest { val encoder = ExpressionEncoder[ArrayClass] val attrs = Seq('arr.int) assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == - "need an array field but got int") + """The deserializer is not supported: need a(n) "ARRAY" field but got "INT".""") } test("the real type is not compatible with encoder schema: array element type") { @@ -134,7 +134,7 @@ class EncoderResolutionSuite extends PlanTest { withClue("inner element is not array") { val attrs = Seq('nestedArr.array(new StructType().add("arr", "int"))) assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == - "need an array field but got int") + """The deserializer is not supported: need a(n) "ARRAY" field but got "INT".""") } withClue("nested array element type is not compatible") { @@ -168,15 +168,16 @@ class EncoderResolutionSuite extends PlanTest { { val attrs = Seq('a.string, 'b.long, 'c.int) assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == - "Try to map struct to Tuple2, " + - "but failed as the number of fields does not line up.") + """The deserializer is not supported: """ + + """try to map "STRUCT" to Tuple2, """ + + """but failed as the number of fields does not line up.""") } { val attrs = Seq('a.string) assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == - "Try to map struct to Tuple2, " + - "but failed as the number of fields does not line up.") + """The deserializer is not supported: try to map "STRUCT" to Tuple2, """ + + """but failed as the number of fields does not line up.""") } } @@ -186,15 +187,17 @@ class EncoderResolutionSuite extends PlanTest { { val attrs = Seq('a.string, 'b.struct('x.long, 'y.string, 'z.int)) assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == - "Try to map struct to Tuple2, " + - "but failed as the number of fields does not line up.") + """The deserializer is not supported: """ + + """try to map "STRUCT" to Tuple2, """ + + """but failed as the number of fields does not line up.""") } { val attrs = Seq('a.string, 'b.struct('x.long)) assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message == - "Try to map struct to Tuple2, " + - "but failed as the number of fields does not line up.") + """The deserializer is not supported: """ + + """try to map "STRUCT" to Tuple2, """ + + """but failed as the number of fields does not line up.""") } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 8586c5f3266c2..dbe4c5a741745 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -1002,24 +1002,6 @@ class DatasetSuite extends QueryTest checkDataset(cogrouped, "a13", "b24") } - test("give nice error message when the real number of fields doesn't match encoder schema") { - val ds = Seq(ClassData("a", 1), ClassData("b", 2)).toDS() - - val message = intercept[AnalysisException] { - ds.as[(String, Int, Long)] - }.message - assert(message == - "Try to map struct to Tuple3, " + - "but failed as the number of fields does not line up.") - - val message2 = intercept[AnalysisException] { - ds.as[Tuple1[String]] - }.message - assert(message2 == - "Try to map struct to Tuple1, " + - "but failed as the number of fields does not line up.") - } - test("SPARK-13440: Resolving option fields") { val df = Seq(1, 2, 3).toDS() val ds = df.as[Option[Int]] diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index 4a5890926c8ef..3da5202a2ad55 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.errors -import org.apache.spark.sql.{AnalysisException, IntegratedUDFTestUtils, QueryTest} +import org.apache.spark.sql.{AnalysisException, ClassData, IntegratedUDFTestUtils, QueryTest} import org.apache.spark.sql.functions.{grouping, grouping_id, sum} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession @@ -28,6 +28,8 @@ case class StringIntClass(a: String, b: Int) case class ComplexClass(a: Long, b: StringLongClass) +case class ArrayClass(arr: Seq[StringIntClass]) + class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession { import testImplicits._ @@ -173,4 +175,34 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession { "The feature is not supported: " + "Pandas UDF aggregate expressions don't support pivot.") } + + test("UNSUPPORTED_DESERIALIZER: data type mismatch") { + val e = intercept[AnalysisException] { + sql("select 1 as arr").as[ArrayClass] + } + assert(e.errorClass === Some("UNSUPPORTED_DESERIALIZER")) + assert(e.message === + """The deserializer is not supported: need a(n) "ARRAY" field but got "INT".""") + } + + test("UNSUPPORTED_DESERIALIZER:" + + "the real number of fields doesn't match encoder schema") { + val ds = Seq(ClassData("a", 1), ClassData("b", 2)).toDS() + + val e1 = intercept[AnalysisException] { + ds.as[(String, Int, Long)] + } + assert(e1.errorClass === Some("UNSUPPORTED_DESERIALIZER")) + assert(e1.message === + "The deserializer is not supported: try to map \"STRUCT\" " + + "to Tuple3, but failed as the number of fields does not line up.") + + val e2 = intercept[AnalysisException] { + ds.as[Tuple1[String]] + } + assert(e2.errorClass === Some("UNSUPPORTED_DESERIALIZER")) + assert(e2.message === + "The deserializer is not supported: try to map \"STRUCT\" " + + "to Tuple1, but failed as the number of fields does not line up.") + } } From 5d3f3365c0b7d4515fd97d0ff7b7b29db69b2faf Mon Sep 17 00:00:00 2001 From: panbingkun Date: Mon, 20 Jun 2022 11:04:14 +0900 Subject: [PATCH 357/535] [SPARK-39163][SQL][3.3] Throw an exception w/ error class for an invalid bucket file ### What changes were proposed in this pull request? In the PR, I propose to use the INVALID_BUCKET_FILE error classes for an invalid bucket file. This is a backport of https://github.com/apache/spark/pull/36603. ### Why are the changes needed? Porting the executing errors for multiple rows from a subquery used as an expression to the new error framework should improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? UT Closes #36913 from panbingkun/branch-3.3-SPARK-39163. Authored-by: panbingkun Signed-off-by: Hyukjin Kwon --- .../main/resources/error/error-classes.json | 3 +++ .../sql/errors/QueryExecutionErrors.scala | 5 ++++ .../sql/execution/DataSourceScanExec.scala | 4 +-- .../errors/QueryExecutionErrorsSuite.scala | 25 +++++++++++++++++++ .../adaptive/AdaptiveQueryExecSuite.scala | 8 +++--- .../spark/sql/sources/BucketedReadSuite.scala | 21 ---------------- 6 files changed, 39 insertions(+), 27 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 4eb3a4e8e1e51..fc712fc9c52e3 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -95,6 +95,9 @@ "INVALID_ARRAY_INDEX_IN_ELEMENT_AT" : { "message" : [ "The index is out of bounds. The array has elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set to \"false\" to bypass this error." ] }, + "INVALID_BUCKET_FILE" : { + "message" : [ "Invalid bucket file: " ] + }, "INVALID_FIELD_NAME" : { "message" : [ "Field name is invalid: is not a struct." ], "sqlState" : "42000" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 6c6139d2ccc92..161bfd3c03d84 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2075,4 +2075,9 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { new SparkException(errorClass = "NULL_COMPARISON_RESULT", messageParameters = Array(), cause = null) } + + def invalidBucketFile(path: String): Throwable = { + new SparkException(errorClass = "INVALID_BUCKET_FILE", messageParameters = Array(path), + cause = null) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala index 1ec93a614b779..9e8ae9a714d5f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, UnknownPartitioning} import org.apache.spark.sql.catalyst.util.truncatedString +import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat => ParquetSource} import org.apache.spark.sql.execution.datasources.v2.PushedDownOperators @@ -592,8 +593,7 @@ case class FileSourceScanExec( }.groupBy { f => BucketingUtils .getBucketId(new Path(f.filePath).getName) - // TODO(SPARK-39163): Throw an exception w/ error class for an invalid bucket file - .getOrElse(throw new IllegalStateException(s"Invalid bucket file ${f.filePath}")) + .getOrElse(throw QueryExecutionErrors.invalidBucketFile(f.filePath)) } val prunedFilesGroupedToBuckets = if (optionalBucketSet.isDefined) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala index 73c5b12849a1d..21acea53ed00a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala @@ -17,6 +17,9 @@ package org.apache.spark.sql.errors +import java.io.File +import java.net.URI + import org.apache.spark.{SparkArithmeticException, SparkException, SparkIllegalArgumentException, SparkRuntimeException, SparkUnsupportedOperationException, SparkUpgradeException} import org.apache.spark.sql.{DataFrame, QueryTest, SaveMode} import org.apache.spark.sql.execution.datasources.orc.OrcTest @@ -286,4 +289,26 @@ class QueryExecutionErrorsSuite extends QueryTest assert(e2.getMessage === "The save mode NULL is not supported for: an existent path.") } } + + test("INVALID_BUCKET_FILE: error if there exists any malformed bucket files") { + val df1 = (0 until 50).map(i => (i % 5, i % 13, i.toString)). + toDF("i", "j", "k").as("df1") + + withTable("bucketed_table") { + df1.write.format("parquet").bucketBy(8, "i"). + saveAsTable("bucketed_table") + val warehouseFilePath = new URI(spark.sessionState.conf.warehousePath).getPath + val tableDir = new File(warehouseFilePath, "bucketed_table") + Utils.deleteRecursively(tableDir) + df1.write.parquet(tableDir.getAbsolutePath) + + val aggregated = spark.table("bucketed_table").groupBy("i").count() + + val e = intercept[SparkException] { + aggregated.count() + } + assert(e.getErrorClass === "INVALID_BUCKET_FILE") + assert(e.getMessage.matches("Invalid bucket file: .+")) + } + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index f068ab8a4e2b8..831a998dfaaec 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -23,6 +23,7 @@ import java.net.URI import org.apache.logging.log4j.Level import org.scalatest.PrivateMethodTester +import org.apache.spark.SparkException import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListenerJobStart} import org.apache.spark.sql.{Dataset, QueryTest, Row, SparkSession, Strategy} import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight} @@ -856,12 +857,11 @@ class AdaptiveQueryExecSuite df1.write.parquet(tableDir.getAbsolutePath) val aggregated = spark.table("bucketed_table").groupBy("i").count() - val error = intercept[IllegalStateException] { + val error = intercept[SparkException] { aggregated.count() } - // TODO(SPARK-39163): Throw an exception w/ error class for an invalid bucket file - assert(error.toString contains "Invalid bucket file") - assert(error.getSuppressed.size === 0) + assert(error.getErrorClass === "INVALID_BUCKET_FILE") + assert(error.getMessage contains "Invalid bucket file") } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala index 8d593a55a7ef5..bdd642a1f908c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala @@ -17,9 +17,6 @@ package org.apache.spark.sql.sources -import java.io.File -import java.net.URI - import scala.util.Random import org.apache.spark.sql._ @@ -36,7 +33,6 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils} -import org.apache.spark.util.Utils import org.apache.spark.util.collection.BitSet class BucketedReadWithoutHiveSupportSuite @@ -832,23 +828,6 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti } } - test("error if there exists any malformed bucket files") { - withTable("bucketed_table") { - df1.write.format("parquet").bucketBy(8, "i").saveAsTable("bucketed_table") - val warehouseFilePath = new URI(spark.sessionState.conf.warehousePath).getPath - val tableDir = new File(warehouseFilePath, "bucketed_table") - Utils.deleteRecursively(tableDir) - df1.write.parquet(tableDir.getAbsolutePath) - - val aggregated = spark.table("bucketed_table").groupBy("i").count() - val e = intercept[IllegalStateException] { - aggregated.count() - } - // TODO(SPARK-39163): Throw an exception w/ error class for an invalid bucket file - assert(e.toString contains "Invalid bucket file") - } - } - test("disable bucketing when the output doesn't contain all bucketing columns") { withTable("bucketed_table") { df1.write.format("parquet").bucketBy(8, "i").saveAsTable("bucketed_table") From d736bec27b3b0b576db006c5ced82c63a5cb5c6b Mon Sep 17 00:00:00 2001 From: panbingkun Date: Tue, 21 Jun 2022 17:12:39 +0300 Subject: [PATCH 358/535] [SPARK-37939][SQL][3.3] Use error classes in the parsing errors of properties ## What changes were proposed in this pull request? Migrate the following errors in QueryParsingErrors onto use error classes: - cannotCleanReservedNamespacePropertyError => UNSUPPORTED_FEATURE - cannotCleanReservedTablePropertyError => UNSUPPORTED_FEATURE - invalidPropertyKeyForSetQuotedConfigurationError => INVALID_PROPERTY_KEY - invalidPropertyValueForSetQuotedConfigurationError => INVALID_PROPERTY_VALUE - propertiesAndDbPropertiesBothSpecifiedError => UNSUPPORTED_FEATURE This is a backport of https://github.com/apache/spark/pull/36561. ### Why are the changes needed? Porting parsing errors of partitions to new error framework, improve test coverage, and document expected error messages in tests. ### Does this PR introduce any user-facing change? No ### How was this patch tested? By running new test: ``` $ build/sbt "sql/testOnly *QueryParsingErrorsSuite*" ``` Closes #36916 from panbingkun/branch-3.3-SPARK-37939. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../main/resources/error/error-classes.json | 6 ++ .../spark/sql/errors/QueryParsingErrors.scala | 29 +++--- .../sql/errors/QueryParsingErrorsSuite.scala | 92 +++++++++++++++++++ .../sql/execution/SparkSqlParserSuite.scala | 6 +- .../command/CreateNamespaceParserSuite.scala | 3 +- 5 files changed, 120 insertions(+), 16 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index fc712fc9c52e3..5eab18dfd352c 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -113,6 +113,12 @@ "message" : [ "The value of parameter(s) '' in is invalid: " ], "sqlState" : "22023" }, + "INVALID_PROPERTY_KEY" : { + "message" : [ " is an invalid property key, please use quotes, e.g. SET =" ] + }, + "INVALID_PROPERTY_VALUE" : { + "message" : [ " is an invalid property value, please use quotes, e.g. SET =" ] + }, "INVALID_SQL_SYNTAX" : { "message" : [ "Invalid SQL syntax: " ], "sqlState" : "42000" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index 6e8124c89e2d6..e92ed3e3b0729 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -260,17 +260,20 @@ private[sql] object QueryParsingErrors extends QueryErrorsBase { } def cannotCleanReservedNamespacePropertyError( - property: String, ctx: ParserRuleContext, msg: String): Throwable = { - new ParseException(s"$property is a reserved namespace property, $msg.", ctx) + property: String, ctx: ParserRuleContext, msg: String): ParseException = { + new ParseException("UNSUPPORTED_FEATURE", + Array(s"$property is a reserved namespace property, $msg."), ctx) } - def propertiesAndDbPropertiesBothSpecifiedError(ctx: CreateNamespaceContext): Throwable = { - new ParseException("Either PROPERTIES or DBPROPERTIES is allowed.", ctx) + def propertiesAndDbPropertiesBothSpecifiedError(ctx: CreateNamespaceContext): ParseException = { + new ParseException("UNSUPPORTED_FEATURE", + Array("set PROPERTIES and DBPROPERTIES at the same time."), ctx) } def cannotCleanReservedTablePropertyError( - property: String, ctx: ParserRuleContext, msg: String): Throwable = { - new ParseException(s"$property is a reserved table property, $msg.", ctx) + property: String, ctx: ParserRuleContext, msg: String): ParseException = { + new ParseException("UNSUPPORTED_FEATURE", + Array(s"$property is a reserved table property, $msg."), ctx) } def duplicatedTablePathsFoundError( @@ -367,15 +370,17 @@ private[sql] object QueryParsingErrors extends QueryErrorsBase { } def invalidPropertyKeyForSetQuotedConfigurationError( - keyCandidate: String, valueStr: String, ctx: ParserRuleContext): Throwable = { - new ParseException(s"'$keyCandidate' is an invalid property key, please " + - s"use quotes, e.g. SET `$keyCandidate`=`$valueStr`", ctx) + keyCandidate: String, valueStr: String, ctx: ParserRuleContext): ParseException = { + new ParseException(errorClass = "INVALID_PROPERTY_KEY", + messageParameters = Array(toSQLConf(keyCandidate), + toSQLConf(keyCandidate), toSQLConf(valueStr)), ctx) } def invalidPropertyValueForSetQuotedConfigurationError( - valueCandidate: String, keyStr: String, ctx: ParserRuleContext): Throwable = { - new ParseException(s"'$valueCandidate' is an invalid property value, please " + - s"use quotes, e.g. SET `$keyStr`=`$valueCandidate`", ctx) + valueCandidate: String, keyStr: String, ctx: ParserRuleContext): ParseException = { + new ParseException(errorClass = "INVALID_PROPERTY_VALUE", + messageParameters = Array(toSQLConf(valueCandidate), + toSQLConf(keyStr), toSQLConf(valueCandidate)), ctx) } def unexpectedFormatForResetConfigurationError(ctx: ResetConfigurationContext): Throwable = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala index 508f551bcec95..6494e541d4fab 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala @@ -213,4 +213,96 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession { |--------------------------------------------^^^ |""".stripMargin) } + + test("UNSUPPORTED_FEATURE: cannot set reserved namespace property") { + val sql = "CREATE NAMESPACE IF NOT EXISTS a.b.c WITH PROPERTIES ('location'='/home/user/db')" + val msg = """The feature is not supported: location is a reserved namespace property, """ + + """please use the LOCATION clause to specify it.(line 1, pos 0)""" + validateParsingError( + sqlText = sql, + errorClass = "UNSUPPORTED_FEATURE", + sqlState = "0A000", + message = + s""" + |$msg + | + |== SQL == + |$sql + |^^^ + |""".stripMargin) + } + + test("UNSUPPORTED_FEATURE: cannot set reserved table property") { + val sql = "CREATE TABLE student (id INT, name STRING, age INT) " + + "USING PARQUET TBLPROPERTIES ('provider'='parquet')" + val msg = """The feature is not supported: provider is a reserved table property, """ + + """please use the USING clause to specify it.(line 1, pos 66)""" + validateParsingError( + sqlText = sql, + errorClass = "UNSUPPORTED_FEATURE", + sqlState = "0A000", + message = + s""" + |$msg + | + |== SQL == + |$sql + |------------------------------------------------------------------^^^ + |""".stripMargin) + } + + test("INVALID_PROPERTY_KEY: invalid property key for set quoted configuration") { + val sql = "set =`value`" + val msg = """"" is an invalid property key, please use quotes, """ + + """e.g. SET ""="value"(line 1, pos 0)""" + validateParsingError( + sqlText = sql, + errorClass = "INVALID_PROPERTY_KEY", + sqlState = null, + message = + s""" + |$msg + | + |== SQL == + |$sql + |^^^ + |""".stripMargin) + } + + test("INVALID_PROPERTY_VALUE: invalid property value for set quoted configuration") { + val sql = "set `key`=1;2;;" + val msg = """"1;2;;" is an invalid property value, please use quotes, """ + + """e.g. SET "key"="1;2;;"(line 1, pos 0)""" + validateParsingError( + sqlText = sql, + errorClass = "INVALID_PROPERTY_VALUE", + sqlState = null, + message = + s""" + |$msg + | + |== SQL == + |$sql + |^^^ + |""".stripMargin) + } + + test("UNSUPPORTED_FEATURE: cannot set Properties and DbProperties at the same time") { + val sql = "CREATE NAMESPACE IF NOT EXISTS a.b.c WITH PROPERTIES ('a'='a', 'b'='b', 'c'='c') " + + "WITH DBPROPERTIES('a'='a', 'b'='b', 'c'='c')" + val msg = """The feature is not supported: set PROPERTIES and DBPROPERTIES at the same time.""" + + """(line 1, pos 0)""" + validateParsingError( + sqlText = sql, + errorClass = "UNSUPPORTED_FEATURE", + sqlState = "0A000", + message = + s""" + |$msg + | + |== SQL == + |$sql + |^^^ + |""".stripMargin) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala index fb8f2ea6d8db2..49f65ab51cd6d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala @@ -168,11 +168,11 @@ class SparkSqlParserSuite extends AnalysisTest { intercept("SET a=1;2;;", expectedErrMsg) intercept("SET a b=`1;;`", - "'a b' is an invalid property key, please use quotes, e.g. SET `a b`=`1;;`") + "\"a b\" is an invalid property key, please use quotes, e.g. SET \"a b\"=\"1;;\"") intercept("SET `a`=1;2;;", - "'1;2;;' is an invalid property value, please use quotes, e.g." + - " SET `a`=`1;2;;`") + "\"1;2;;\" is an invalid property value, please use quotes, e.g." + + " SET \"a\"=\"1;2;;\"") } test("refresh resource") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceParserSuite.scala index 69a208b942429..6c59512148a53 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceParserSuite.scala @@ -84,7 +84,8 @@ class CreateNamespaceParserSuite extends AnalysisTest { |WITH PROPERTIES ('a'='a', 'b'='b', 'c'='c') |WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c') """.stripMargin - intercept(sql, "Either PROPERTIES or DBPROPERTIES is allowed") + intercept(sql, "The feature is not supported: " + + "set PROPERTIES and DBPROPERTIES at the same time.") } test("create namespace - support for other types in PROPERTIES") { From ac404a683a3908f5c80465aded07d6814ff80e90 Mon Sep 17 00:00:00 2001 From: itholic Date: Wed, 22 Jun 2022 19:21:18 +0900 Subject: [PATCH 359/535] [SPARK-38755][PYTHON][3.3] Add file to address missing pandas general functions ### What changes were proposed in this pull request? Backport for https://github.com/apache/spark/pull/36034 This PR proposes to add `python/pyspark/pandas/missing/general_functions.py` to track the missing [pandas general functions](https://pandas.pydata.org/docs/reference/general_functions.html) API. ### Why are the changes needed? We have scripts in `missing` directory to track & address the missing pandas APIs, but one for general functions is missing. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? The existing tests should cover Closes #36034 from itholic/SPARK-38755. Authored-by: itholic Signed-off-by: Hyukjin Kwon Closes #36955 from itholic/SPARK-38755-backport. Authored-by: itholic Signed-off-by: Hyukjin Kwon --- .../pyspark.pandas/general_functions.rst | 14 +++++-- python/pyspark/pandas/__init__.py | 13 +++++- .../pandas/missing/general_functions.py | 41 +++++++++++++++++++ python/pyspark/pandas/tests/test_namespace.py | 17 ++++++++ .../pyspark/pandas/usage_logging/__init__.py | 4 +- 5 files changed, 84 insertions(+), 5 deletions(-) create mode 100644 python/pyspark/pandas/missing/general_functions.py diff --git a/python/docs/source/reference/pyspark.pandas/general_functions.rst b/python/docs/source/reference/pyspark.pandas/general_functions.rst index 1358a99a97eb4..a67c0a872e074 100644 --- a/python/docs/source/reference/pyspark.pandas/general_functions.rst +++ b/python/docs/source/reference/pyspark.pandas/general_functions.rst @@ -41,6 +41,7 @@ Data manipulations and SQL melt merge + merge_asof get_dummies concat sql @@ -52,18 +53,25 @@ Top-level missing data .. autosummary:: :toctree: api/ - to_numeric isna isnull notna notnull -Top-level dealing with datetimelike +Top-level dealing with numeric data ----------------------------------- + +.. autosummary:: + :toctree: api/ + + to_numeric + +Top-level dealing with datetimelike data +---------------------------------------- .. autosummary:: :toctree: api/ to_datetime date_range to_timedelta - timedelta_range \ No newline at end of file + timedelta_range diff --git a/python/pyspark/pandas/__init__.py b/python/pyspark/pandas/__init__.py index a11c496e2ca88..56a4f80a13ce9 100644 --- a/python/pyspark/pandas/__init__.py +++ b/python/pyspark/pandas/__init__.py @@ -22,9 +22,11 @@ import os import sys -from distutils.version import LooseVersion import warnings +from distutils.version import LooseVersion +from typing import Any +from pyspark.pandas.missing.general_functions import _MissingPandasLikeGeneralFunctions from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version try: @@ -151,3 +153,12 @@ def _auto_patch_pandas() -> None: from pyspark.pandas.config import get_option, options, option_context, reset_option, set_option from pyspark.pandas.namespace import * # noqa: F403 from pyspark.pandas.sql_formatter import sql + + +def __getattr__(key: str) -> Any: + if key.startswith("__"): + raise AttributeError(key) + if hasattr(_MissingPandasLikeGeneralFunctions, key): + return getattr(_MissingPandasLikeGeneralFunctions, key) + else: + raise AttributeError("module 'pyspark.pandas' has no attribute '%s'" % (key)) diff --git a/python/pyspark/pandas/missing/general_functions.py b/python/pyspark/pandas/missing/general_functions.py new file mode 100644 index 0000000000000..2fd5b877cc929 --- /dev/null +++ b/python/pyspark/pandas/missing/general_functions.py @@ -0,0 +1,41 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from pyspark.pandas.missing import unsupported_function + + +def _unsupported_function(method_name, deprecated=False, reason=""): + return unsupported_function( + class_name="pd", method_name=method_name, deprecated=deprecated, reason=reason + ) + + +class _MissingPandasLikeGeneralFunctions: + + pivot = _unsupported_function("pivot") + pivot_table = _unsupported_function("pivot_table") + crosstab = _unsupported_function("crosstab") + cut = _unsupported_function("cut") + qcut = _unsupported_function("qcut") + merge_ordered = _unsupported_function("merge_ordered") + factorize = _unsupported_function("factorize") + unique = _unsupported_function("unique") + wide_to_long = _unsupported_function("wide_to_long") + bdate_range = _unsupported_function("bdate_range") + period_range = _unsupported_function("period_range") + infer_freq = _unsupported_function("infer_freq") + interval_range = _unsupported_function("interval_range") + eval = _unsupported_function("eval") diff --git a/python/pyspark/pandas/tests/test_namespace.py b/python/pyspark/pandas/tests/test_namespace.py index b529025708905..8c5adb9bae5e1 100644 --- a/python/pyspark/pandas/tests/test_namespace.py +++ b/python/pyspark/pandas/tests/test_namespace.py @@ -16,13 +16,16 @@ # import itertools +import inspect import pandas as pd import numpy as np from pyspark import pandas as ps +from pyspark.pandas.exceptions import PandasNotImplementedError from pyspark.pandas.namespace import _get_index_map, read_delta from pyspark.pandas.utils import spark_column_equals +from pyspark.pandas.missing.general_functions import _MissingPandasLikeGeneralFunctions from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.testing.sqlutils import SQLTestUtils @@ -554,6 +557,20 @@ def test_to_numeric(self): lambda: ps.to_numeric(psser, errors="ignore"), ) + def test_missing(self): + missing_functions = inspect.getmembers( + _MissingPandasLikeGeneralFunctions, inspect.isfunction + ) + unsupported_functions = [ + name for (name, type_) in missing_functions if type_.__name__ == "unsupported_function" + ] + for name in unsupported_functions: + with self.assertRaisesRegex( + PandasNotImplementedError, + "The method.*pd.*{}.*not implemented yet.".format(name), + ): + getattr(ps, name)() + if __name__ == "__main__": import unittest diff --git a/python/pyspark/pandas/usage_logging/__init__.py b/python/pyspark/pandas/usage_logging/__init__.py index a6f1470b9f4e4..7f082623c0351 100644 --- a/python/pyspark/pandas/usage_logging/__init__.py +++ b/python/pyspark/pandas/usage_logging/__init__.py @@ -31,6 +31,7 @@ from pyspark.pandas.indexes.multi import MultiIndex from pyspark.pandas.indexes.numeric import Float64Index, Int64Index from pyspark.pandas.missing.frame import _MissingPandasLikeDataFrame +from pyspark.pandas.missing.general_functions import _MissingPandasLikeGeneralFunctions from pyspark.pandas.missing.groupby import ( MissingPandasLikeDataFrameGroupBy, MissingPandasLikeSeriesGroupBy, @@ -109,6 +110,7 @@ def attach(logger_module: Union[str, ModuleType]) -> None: modules.append(sql_formatter) missings = [ + (pd, _MissingPandasLikeGeneralFunctions), (pd.DataFrame, _MissingPandasLikeDataFrame), (pd.Series, MissingPandasLikeSeries), (pd.Index, MissingPandasLikeIndex), @@ -122,4 +124,4 @@ def attach(logger_module: Union[str, ModuleType]) -> None: (pd.core.window.RollingGroupby, MissingPandasLikeRollingGroupby), ] - _attach(logger_module, modules, classes, missings) + _attach(logger_module, modules, classes, missings) # type: ignore[arg-type] From 3cf304855be8ec04158d976d15210da1fa22ac03 Mon Sep 17 00:00:00 2001 From: Maryann Xue Date: Wed, 22 Jun 2022 22:31:58 +0800 Subject: [PATCH 360/535] [SPARK-39551][SQL] Add AQE invalid plan check This PR adds a check for invalid plans in AQE replanning process. The check will throw exceptions when it detects an invalid plan, causing AQE to void the current replanning result and keep using the latest valid plan. AQE logical optimization rules can lead to invalid physical plans and cause runtime exceptions as certain physical plan nodes are not compatible with others. E.g., `BroadcastExchangeExec` can only work as a direct child of broadcast join nodes, but it could appear under other incompatible physical plan nodes because of empty relation propagation. No. Added UT. Closes #36953 from maryannxue/validate-aqe. Authored-by: Maryann Xue Signed-off-by: Wenchen Fan (cherry picked from commit 58b91b1fa381f0a173c7b3c015337113f8f2b6c6) Signed-off-by: Wenchen Fan --- .../adaptive/AdaptiveSparkPlanExec.scala | 72 +++++++++++-------- .../adaptive/InvalidAQEPlanException.scala | 30 ++++++++ .../adaptive/ValidateSparkPlan.scala | 68 ++++++++++++++++++ .../adaptive/AdaptiveQueryExecSuite.scala | 25 ++++++- 4 files changed, 163 insertions(+), 32 deletions(-) create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InvalidAQEPlanException.scala create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ValidateSparkPlan.scala diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala index df302e5dc7577..40d2e1a3a8f46 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala @@ -116,6 +116,7 @@ case class AdaptiveSparkPlanExec( Seq( RemoveRedundantProjects, ensureRequirements, + ValidateSparkPlan, ReplaceHashWithSortAgg, RemoveRedundantSorts, DisableUnnecessaryBucketedScan, @@ -299,16 +300,19 @@ case class AdaptiveSparkPlanExec( // plans are updated, we can clear the query stage list because at this point the two plans // are semantically and physically in sync again. val logicalPlan = replaceWithQueryStagesInLogicalPlan(currentLogicalPlan, stagesToReplace) - val (newPhysicalPlan, newLogicalPlan) = reOptimize(logicalPlan) - val origCost = costEvaluator.evaluateCost(currentPhysicalPlan) - val newCost = costEvaluator.evaluateCost(newPhysicalPlan) - if (newCost < origCost || + val afterReOptimize = reOptimize(logicalPlan) + if (afterReOptimize.isDefined) { + val (newPhysicalPlan, newLogicalPlan) = afterReOptimize.get + val origCost = costEvaluator.evaluateCost(currentPhysicalPlan) + val newCost = costEvaluator.evaluateCost(newPhysicalPlan) + if (newCost < origCost || (newCost == origCost && currentPhysicalPlan != newPhysicalPlan)) { - logOnLevel(s"Plan changed from $currentPhysicalPlan to $newPhysicalPlan") - cleanUpTempTags(newPhysicalPlan) - currentPhysicalPlan = newPhysicalPlan - currentLogicalPlan = newLogicalPlan - stagesToReplace = Seq.empty[QueryStageExec] + logOnLevel(s"Plan changed from $currentPhysicalPlan to $newPhysicalPlan") + cleanUpTempTags(newPhysicalPlan) + currentPhysicalPlan = newPhysicalPlan + currentLogicalPlan = newLogicalPlan + stagesToReplace = Seq.empty[QueryStageExec] + } } // Now that some stages have finished, we can try creating new stages. result = createQueryStages(currentPhysicalPlan) @@ -638,29 +642,35 @@ case class AdaptiveSparkPlanExec( /** * Re-optimize and run physical planning on the current logical plan based on the latest stats. */ - private def reOptimize(logicalPlan: LogicalPlan): (SparkPlan, LogicalPlan) = { - logicalPlan.invalidateStatsCache() - val optimized = optimizer.execute(logicalPlan) - val sparkPlan = context.session.sessionState.planner.plan(ReturnAnswer(optimized)).next() - val newPlan = applyPhysicalRules( - sparkPlan, - preprocessingRules ++ queryStagePreparationRules, - Some((planChangeLogger, "AQE Replanning"))) - - // When both enabling AQE and DPP, `PlanAdaptiveDynamicPruningFilters` rule will - // add the `BroadcastExchangeExec` node manually in the DPP subquery, - // not through `EnsureRequirements` rule. Therefore, when the DPP subquery is complicated - // and need to be re-optimized, AQE also need to manually insert the `BroadcastExchangeExec` - // node to prevent the loss of the `BroadcastExchangeExec` node in DPP subquery. - // Here, we also need to avoid to insert the `BroadcastExchangeExec` node when the newPlan - // is already the `BroadcastExchangeExec` plan after apply the `LogicalQueryStageStrategy` rule. - val finalPlan = currentPhysicalPlan match { - case b: BroadcastExchangeLike - if (!newPlan.isInstanceOf[BroadcastExchangeLike]) => b.withNewChildren(Seq(newPlan)) - case _ => newPlan - } + private def reOptimize(logicalPlan: LogicalPlan): Option[(SparkPlan, LogicalPlan)] = { + try { + logicalPlan.invalidateStatsCache() + val optimized = optimizer.execute(logicalPlan) + val sparkPlan = context.session.sessionState.planner.plan(ReturnAnswer(optimized)).next() + val newPlan = applyPhysicalRules( + sparkPlan, + preprocessingRules ++ queryStagePreparationRules, + Some((planChangeLogger, "AQE Replanning"))) + + // When both enabling AQE and DPP, `PlanAdaptiveDynamicPruningFilters` rule will + // add the `BroadcastExchangeExec` node manually in the DPP subquery, + // not through `EnsureRequirements` rule. Therefore, when the DPP subquery is complicated + // and need to be re-optimized, AQE also need to manually insert the `BroadcastExchangeExec` + // node to prevent the loss of the `BroadcastExchangeExec` node in DPP subquery. + // Here, we also need to avoid to insert the `BroadcastExchangeExec` node when the newPlan is + // already the `BroadcastExchangeExec` plan after apply the `LogicalQueryStageStrategy` rule. + val finalPlan = currentPhysicalPlan match { + case b: BroadcastExchangeLike + if (!newPlan.isInstanceOf[BroadcastExchangeLike]) => b.withNewChildren(Seq(newPlan)) + case _ => newPlan + } - (finalPlan, optimized) + Some((finalPlan, optimized)) + } catch { + case e: InvalidAQEPlanException[_] => + logOnLevel(s"Re-optimize - ${e.getMessage()}:\n${e.plan}") + None + } } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InvalidAQEPlanException.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InvalidAQEPlanException.scala new file mode 100644 index 0000000000000..71f6db8b2b9cb --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InvalidAQEPlanException.scala @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.adaptive + +import org.apache.spark.sql.catalyst.plans.QueryPlan + +/** + * Exception thrown when an invalid query plan is detected in AQE replanning, + * in which case AQE will stop the current replanning process and keep using the latest valid plan. + * + * @param message The reason why the plan is considered invalid. + * @param plan The invalid plan/sub-plan. + */ +case class InvalidAQEPlanException[QueryType <: QueryPlan[_]](message: String, plan: QueryType) + extends Exception(message) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ValidateSparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ValidateSparkPlan.scala new file mode 100644 index 0000000000000..0fdc50e2acc8d --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ValidateSparkPlan.scala @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.adaptive + +import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight} +import org.apache.spark.sql.catalyst.rules.Rule +import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BroadcastNestedLoopJoinExec} + +/** + * Detects invalid physical plans generated by AQE replanning and throws `InvalidAQEPlanException` + * if such plans are detected. This rule should be called after EnsureRequirements where all + * necessary Exchange nodes are added. + */ +object ValidateSparkPlan extends Rule[SparkPlan] { + + def apply(plan: SparkPlan): SparkPlan = { + validate(plan) + plan + } + + /** + * Validate that the plan satisfies the following condition: + * - BroadcastQueryStage only appears as the immediate child and the build side of a broadcast + * hash join or broadcast nested loop join. + */ + private def validate(plan: SparkPlan): Unit = plan match { + case b: BroadcastHashJoinExec => + val (buildPlan, probePlan) = b.buildSide match { + case BuildLeft => (b.left, b.right) + case BuildRight => (b.right, b.left) + } + if (!buildPlan.isInstanceOf[BroadcastQueryStageExec]) { + validate(buildPlan) + } + validate(probePlan) + case b: BroadcastNestedLoopJoinExec => + val (buildPlan, probePlan) = b.buildSide match { + case BuildLeft => (b.left, b.right) + case BuildRight => (b.right, b.left) + } + if (!buildPlan.isInstanceOf[BroadcastQueryStageExec]) { + validate(buildPlan) + } + validate(probePlan) + case q: BroadcastQueryStageExec => errorOnInvalidBroadcastQueryStage(q) + case _ => plan.children.foreach(validate) + } + + private def errorOnInvalidBroadcastQueryStage(plan: SparkPlan): Unit = { + throw InvalidAQEPlanException("Invalid broadcast query stage", plan) + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index 831a998dfaaec..67dc359ab8f33 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.execution.command.DataWritingCommandExec import org.apache.spark.sql.execution.datasources.noop.NoopDataSource import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ENSURE_REQUIREMENTS, Exchange, REPARTITION_BY_COL, REPARTITION_BY_NUM, ReusedExchangeExec, ShuffleExchangeExec, ShuffleExchangeLike, ShuffleOrigin} -import org.apache.spark.sql.execution.joins.{BaseJoinExec, BroadcastHashJoinExec, ShuffledHashJoinExec, ShuffledJoin, SortMergeJoinExec} +import org.apache.spark.sql.execution.joins.{BaseJoinExec, BroadcastHashJoinExec, BroadcastNestedLoopJoinExec, ShuffledHashJoinExec, ShuffledJoin, SortMergeJoinExec} import org.apache.spark.sql.execution.metric.SQLShuffleReadMetricsReporter import org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate import org.apache.spark.sql.functions._ @@ -104,6 +104,12 @@ class AdaptiveQueryExecSuite } } + def findTopLevelBroadcastNestedLoopJoin(plan: SparkPlan): Seq[BaseJoinExec] = { + collect(plan) { + case j: BroadcastNestedLoopJoinExec => j + } + } + private def findTopLevelSortMergeJoin(plan: SparkPlan): Seq[SortMergeJoinExec] = { collect(plan) { case j: SortMergeJoinExec => j @@ -2576,6 +2582,23 @@ class AdaptiveQueryExecSuite assert(findTopLevelAggregate(adaptive5).size == 4) } } + + test("SPARK-39551: Invalid plan check - invalid broadcast query stage") { + withSQLConf( + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") { + val (_, adaptivePlan) = runAdaptiveAndVerifyResult( + """ + |SELECT /*+ BROADCAST(t3) */ t3.b, count(t3.a) FROM testData2 t1 + |INNER JOIN testData2 t2 + |ON t1.b = t2.b AND t1.a = 0 + |RIGHT OUTER JOIN testData2 t3 + |ON t1.a > t3.a + |GROUP BY t3.b + """.stripMargin + ) + assert(findTopLevelBroadcastNestedLoopJoin(adaptivePlan).size == 1) + } + } } /** From a7c21bb3ddd3e2cd62018482818c84b19ed97c97 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 22 Jun 2022 22:59:16 +0800 Subject: [PATCH 361/535] [SPARK-39340][SQL] DS v2 agg pushdown should allow dots in the name of top-level columns It turns out that I was wrong in https://github.com/apache/spark/pull/36727 . We still have the limitation (column name cannot contain dot) in master and 3.3 braches, in a very implicit way: The `V2ExpressionBuilder` has a boolean flag `nestedPredicatePushdownEnabled` whose default value is false. When it's false, it uses `PushableColumnWithoutNestedColumn` to match columns, which doesn't support dot in names. `V2ExpressionBuilder` is only used in 2 places: 1. `PushableExpression`. This is a pattern match that is only used in v2 agg pushdown 2. `PushablePredicate`. This is a pattern match that is used in various places, but all the caller sides set `nestedPredicatePushdownEnabled` to true. This PR removes the `nestedPredicatePushdownEnabled` flag from `V2ExpressionBuilder`, and makes it always support nested fields. `PushablePredicate` is also updated accordingly to remove the boolean flag, as it's always true. Fix a mistake to eliminate an unexpected limitation in DS v2 pushdown. No for end users. For data source developers, they can trigger agg pushdowm more often. a new test Closes #36945 from cloud-fan/dsv2. Authored-by: Wenchen Fan Signed-off-by: Wenchen Fan (cherry picked from commit 4567ed99a52d0274312ba78024c548f91659a12a) Signed-off-by: Wenchen Fan --- .../catalyst/util/V2ExpressionBuilder.scala | 25 ++++++------ .../datasources/v2/DataSourceV2Strategy.scala | 38 +++++++------------ .../datasources/v2/PushDownUtils.scala | 2 +- .../v2/DataSourceV2StrategySuite.scala | 2 +- .../apache/spark/sql/jdbc/JDBCV2Suite.scala | 31 ++++++++++----- 5 files changed, 49 insertions(+), 49 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala index c77a040bc64e5..23560ae1d098e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala @@ -17,19 +17,15 @@ package org.apache.spark.sql.catalyst.util -import org.apache.spark.sql.catalyst.expressions.{Abs, Add, And, BinaryComparison, BinaryOperator, BitwiseAnd, BitwiseNot, BitwiseOr, BitwiseXor, CaseWhen, Cast, Ceil, Coalesce, Contains, Divide, EndsWith, EqualTo, Exp, Expression, Floor, In, InSet, IsNotNull, IsNull, Literal, Log, Multiply, Not, Or, Pow, Predicate, Remainder, Sqrt, StartsWith, StringPredicate, Subtract, UnaryMinus, WidthBucket} +import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.connector.expressions.{Cast => V2Cast, Expression => V2Expression, FieldReference, GeneralScalarExpression, LiteralValue} import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse, AlwaysTrue, And => V2And, Not => V2Not, Or => V2Or, Predicate => V2Predicate} -import org.apache.spark.sql.execution.datasources.PushableColumn import org.apache.spark.sql.types.BooleanType /** * The builder to generate V2 expressions from catalyst expressions. */ -class V2ExpressionBuilder( - e: Expression, nestedPredicatePushdownEnabled: Boolean = false, isPredicate: Boolean = false) { - - val pushableColumn = PushableColumn(nestedPredicatePushdownEnabled) +class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) { def build(): Option[V2Expression] = generateExpression(e, isPredicate) @@ -49,12 +45,8 @@ class V2ExpressionBuilder( case Literal(true, BooleanType) => Some(new AlwaysTrue()) case Literal(false, BooleanType) => Some(new AlwaysFalse()) case Literal(value, dataType) => Some(LiteralValue(value, dataType)) - case col @ pushableColumn(name) => - val ref = if (nestedPredicatePushdownEnabled) { - FieldReference(name) - } else { - FieldReference.column(name) - } + case col @ ColumnOrField(nameParts) => + val ref = FieldReference(nameParts) if (isPredicate && col.dataType.isInstanceOf[BooleanType]) { Some(new V2Predicate("=", Array(ref, LiteralValue(true, BooleanType)))) } else { @@ -207,3 +199,12 @@ class V2ExpressionBuilder( case _ => None } } + +object ColumnOrField { + def unapply(e: Expression): Option[Seq[String]] = e match { + case a: Attribute => Some(Seq(a.name)) + case s: GetStructField => + unapply(s.child).map(_ :+ s.childSchema(s.ordinal).name) + case _ => None + } +} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala index 95418027187cb..9be9cdda9e00a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala @@ -484,12 +484,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat private[sql] object DataSourceV2Strategy { - private def translateLeafNodeFilterV2( - predicate: Expression, - supportNestedPredicatePushdown: Boolean): Option[Predicate] = { - val pushablePredicate = PushablePredicate(supportNestedPredicatePushdown) + private def translateLeafNodeFilterV2(predicate: Expression): Option[Predicate] = { predicate match { - case pushablePredicate(expr) => Some(expr) + case PushablePredicate(expr) => Some(expr) case _ => None } } @@ -499,10 +496,8 @@ private[sql] object DataSourceV2Strategy { * * @return a `Some[Filter]` if the input [[Expression]] is convertible, otherwise a `None`. */ - protected[sql] def translateFilterV2( - predicate: Expression, - supportNestedPredicatePushdown: Boolean): Option[Predicate] = { - translateFilterV2WithMapping(predicate, None, supportNestedPredicatePushdown) + protected[sql] def translateFilterV2(predicate: Expression): Option[Predicate] = { + translateFilterV2WithMapping(predicate, None) } /** @@ -516,8 +511,7 @@ private[sql] object DataSourceV2Strategy { */ protected[sql] def translateFilterV2WithMapping( predicate: Expression, - translatedFilterToExpr: Option[mutable.HashMap[Predicate, Expression]], - nestedPredicatePushdownEnabled: Boolean) + translatedFilterToExpr: Option[mutable.HashMap[Predicate, Expression]]) : Option[Predicate] = { predicate match { case And(left, right) => @@ -531,26 +525,21 @@ private[sql] object DataSourceV2Strategy { // Pushing one leg of AND down is only safe to do at the top level. // You can see ParquetFilters' createFilter for more details. for { - leftFilter <- translateFilterV2WithMapping( - left, translatedFilterToExpr, nestedPredicatePushdownEnabled) - rightFilter <- translateFilterV2WithMapping( - right, translatedFilterToExpr, nestedPredicatePushdownEnabled) + leftFilter <- translateFilterV2WithMapping(left, translatedFilterToExpr) + rightFilter <- translateFilterV2WithMapping(right, translatedFilterToExpr) } yield new V2And(leftFilter, rightFilter) case Or(left, right) => for { - leftFilter <- translateFilterV2WithMapping( - left, translatedFilterToExpr, nestedPredicatePushdownEnabled) - rightFilter <- translateFilterV2WithMapping( - right, translatedFilterToExpr, nestedPredicatePushdownEnabled) + leftFilter <- translateFilterV2WithMapping(left, translatedFilterToExpr) + rightFilter <- translateFilterV2WithMapping(right, translatedFilterToExpr) } yield new V2Or(leftFilter, rightFilter) case Not(child) => - translateFilterV2WithMapping(child, translatedFilterToExpr, nestedPredicatePushdownEnabled) - .map(new V2Not(_)) + translateFilterV2WithMapping(child, translatedFilterToExpr).map(new V2Not(_)) case other => - val filter = translateLeafNodeFilterV2(other, nestedPredicatePushdownEnabled) + val filter = translateLeafNodeFilterV2(other) if (filter.isDefined && translatedFilterToExpr.isDefined) { translatedFilterToExpr.get(filter.get) = predicate } @@ -582,10 +571,9 @@ private[sql] object DataSourceV2Strategy { /** * Get the expression of DS V2 to represent catalyst predicate that can be pushed down. */ -case class PushablePredicate(nestedPredicatePushdownEnabled: Boolean) { - +object PushablePredicate { def unapply(e: Expression): Option[Predicate] = - new V2ExpressionBuilder(e, nestedPredicatePushdownEnabled, true).build().map { v => + new V2ExpressionBuilder(e, true).build().map { v => assert(v.isInstanceOf[Predicate]) v.asInstanceOf[Predicate] } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala index 0ebfed2fe9eef..6b366fbd68a1d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala @@ -80,7 +80,7 @@ object PushDownUtils extends PredicateHelper { for (filterExpr <- filters) { val translated = DataSourceV2Strategy.translateFilterV2WithMapping( - filterExpr, Some(translatedFilterToExpr), nestedPredicatePushdownEnabled = true) + filterExpr, Some(translatedFilterToExpr)) if (translated.isEmpty) { untranslatableExprs += filterExpr } else { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala index 6296da47cca51..1a5a382afdc6b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala @@ -37,7 +37,7 @@ class DataSourceV2StrategySuite extends PlanTest with SharedSparkSession { */ def testTranslateFilter(catalystFilter: Expression, result: Option[Predicate]): Unit = { assertResult(result) { - DataSourceV2Strategy.translateFilterV2(catalystFilter, true) + DataSourceV2Strategy.translateFilterV2(catalystFilter) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala index 858aeaa13653b..2f94f9ef31e83 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala @@ -81,9 +81,10 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel conn.prepareStatement( "INSERT INTO \"test\".\"employee\" VALUES (6, 'jen', 12000, 1200, true)").executeUpdate() conn.prepareStatement( - "CREATE TABLE \"test\".\"dept\" (\"dept id\" INTEGER NOT NULL)").executeUpdate() - conn.prepareStatement("INSERT INTO \"test\".\"dept\" VALUES (1)").executeUpdate() - conn.prepareStatement("INSERT INTO \"test\".\"dept\" VALUES (2)").executeUpdate() + "CREATE TABLE \"test\".\"dept\" (\"dept id\" INTEGER NOT NULL, \"dept.id\" INTEGER)") + .executeUpdate() + conn.prepareStatement("INSERT INTO \"test\".\"dept\" VALUES (1, 1)").executeUpdate() + conn.prepareStatement("INSERT INTO \"test\".\"dept\" VALUES (2, 1)").executeUpdate() // scalastyle:off conn.prepareStatement( @@ -117,10 +118,10 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel checkAnswer(sql("SELECT name, id FROM h2.test.people"), Seq(Row("fred", 1), Row("mary", 2))) } - private def checkPushedInfo(df: DataFrame, expectedPlanFragment: String): Unit = { + private def checkPushedInfo(df: DataFrame, expectedPlanFragment: String*): Unit = { df.queryExecution.optimizedPlan.collect { case _: DataSourceV2ScanRelation => - checkKeywordsExistsInExplain(df, expectedPlanFragment) + checkKeywordsExistsInExplain(df, expectedPlanFragment: _*) } } @@ -1177,11 +1178,21 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel } test("column name with composite field") { - checkAnswer(sql("SELECT `dept id` FROM h2.test.dept"), Seq(Row(1), Row(2))) - val df = sql("SELECT COUNT(`dept id`) FROM h2.test.dept") - checkAggregateRemoved(df) - checkPushedInfo(df, "PushedAggregates: [COUNT(`dept id`)]") - checkAnswer(df, Seq(Row(2))) + checkAnswer(sql("SELECT `dept id`, `dept.id` FROM h2.test.dept"), Seq(Row(1, 1), Row(2, 1))) + + val df1 = sql("SELECT COUNT(`dept id`) FROM h2.test.dept") + checkPushedInfo(df1, "PushedAggregates: [COUNT(`dept id`)]") + checkAnswer(df1, Seq(Row(2))) + + val df2 = sql("SELECT `dept.id`, COUNT(`dept id`) FROM h2.test.dept GROUP BY `dept.id`") + checkPushedInfo(df2, + "PushedGroupByExpressions: [`dept.id`]", "PushedAggregates: [COUNT(`dept id`)]") + checkAnswer(df2, Seq(Row(1, 2))) + + val df3 = sql("SELECT `dept id`, COUNT(`dept.id`) FROM h2.test.dept GROUP BY `dept id`") + checkPushedInfo(df3, + "PushedGroupByExpressions: [`dept id`]", "PushedAggregates: [COUNT(`dept.id`)]") + checkAnswer(df3, Seq(Row(1, 1), Row(2, 1))) } test("column name with non-ascii") { From b39ed56193ea0ade80960cd920536426d85680f1 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 22 Jun 2022 18:52:14 +0300 Subject: [PATCH 362/535] [SPARK-38687][SQL][3.3] Use error classes in the compilation errors of generators ## What changes were proposed in this pull request? Migrate the following errors in QueryCompilationErrors onto use error classes: - nestedGeneratorError => UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS - moreThanOneGeneratorError => UNSUPPORTED_GENERATOR.MULTI_GENERATOR - generatorOutsideSelectError => UNSUPPORTED_GENERATOR.OUTSIDE_SELECT - generatorNotExpectedError => UNSUPPORTED_GENERATOR.NOT_GENERATOR This is a backport of https://github.com/apache/spark/pull/36617. ### Why are the changes needed? Porting compilation errors of generator to new error framework, improve test coverage, and document expected error messages in tests. ### Does this PR introduce any user-facing change? No ### How was this patch tested? By running new test: ``` $ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*" ``` Closes #36956 from panbingkun/branch-3.3-SPARK-38687. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../main/resources/error/error-classes.json | 17 +++++++ .../sql/errors/QueryCompilationErrors.scala | 22 ++++----- .../analysis/AnalysisErrorSuite.scala | 23 +++++----- .../spark/sql/GeneratorFunctionSuite.scala | 9 ++-- .../errors/QueryCompilationErrorsSuite.scala | 46 +++++++++++++++++++ 5 files changed, 90 insertions(+), 27 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 5eab18dfd352c..31ec5aaa05e33 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -198,6 +198,23 @@ "message" : [ "The feature is not supported: " ], "sqlState" : "0A000" }, + "UNSUPPORTED_GENERATOR" : { + "message" : [ "The generator is not supported: " ], + "subClass" : { + "MULTI_GENERATOR" : { + "message" : [ "only one generator allowed per clause but found : " ] + }, + "NESTED_IN_EXPRESSIONS" : { + "message" : [ "nested in expressions " ] + }, + "NOT_GENERATOR" : { + "message" : [ " is expected to be a generator. However, its class is , which is not a generator." ] + }, + "OUTSIDE_SELECT" : { + "message" : [ "outside the SELECT clause, found: " ] + } + } + }, "UNSUPPORTED_GROUPING_EXPRESSION" : { "message" : [ "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup" ] }, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 04b1d5f796d9b..6946f9dfc9888 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeRef import org.apache.spark.sql.catalyst.plans.JoinType import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, Join, LogicalPlan, SerdeInfo, Window} import org.apache.spark.sql.catalyst.trees.{Origin, TreeNode} -import org.apache.spark.sql.catalyst.util.{toPrettySQL, FailFastMode, ParseMode, PermissiveMode} +import org.apache.spark.sql.catalyst.util.{FailFastMode, ParseMode, PermissiveMode} import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.functions.{BoundFunction, UnboundFunction} @@ -113,21 +113,19 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { } def nestedGeneratorError(trimmedNestedGenerator: Expression): Throwable = { - new AnalysisException( - "Generators are not supported when it's nested in " + - "expressions, but got: " + toPrettySQL(trimmedNestedGenerator)) + new AnalysisException(errorClass = "UNSUPPORTED_GENERATOR", + messageParameters = Array("NESTED_IN_EXPRESSIONS", toSQLExpr(trimmedNestedGenerator))) } def moreThanOneGeneratorError(generators: Seq[Expression], clause: String): Throwable = { - new AnalysisException( - s"Only one generator allowed per $clause clause but found " + - generators.size + ": " + generators.map(toPrettySQL).mkString(", ")) + new AnalysisException(errorClass = "UNSUPPORTED_GENERATOR", + messageParameters = Array("MULTI_GENERATOR", + clause, generators.size.toString, generators.map(toSQLExpr).mkString(", "))) } def generatorOutsideSelectError(plan: LogicalPlan): Throwable = { - new AnalysisException( - "Generators are not supported outside the SELECT clause, but " + - "got: " + plan.simpleString(SQLConf.get.maxToStringFields)) + new AnalysisException(errorClass = "UNSUPPORTED_GENERATOR", + messageParameters = Array("OUTSIDE_SELECT", plan.simpleString(SQLConf.get.maxToStringFields))) } def legacyStoreAssignmentPolicyError(): Throwable = { @@ -324,8 +322,8 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { } def generatorNotExpectedError(name: FunctionIdentifier, classCanonicalName: String): Throwable = { - new AnalysisException(s"$name is expected to be a generator. However, " + - s"its class is $classCanonicalName, which is not a generator.") + new AnalysisException(errorClass = "UNSUPPORTED_GENERATOR", + messageParameters = Array("NOT_GENERATOR", toSQLId(name.toString), classCanonicalName)) } def functionWithUnsupportedSyntaxError(prettyName: String, syntax: String): Throwable = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index a5b8663f5e674..890344324528c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -483,7 +483,7 @@ class AnalysisErrorSuite extends AnalysisTest { errorTest( "generator nested in expressions", listRelation.select(Explode($"list") + 1), - "Generators are not supported when it's nested in expressions, but got: (explode(list) + 1)" + "The generator is not supported: nested in expressions \"(explode(list) + 1)\"" :: Nil ) @@ -494,29 +494,29 @@ class AnalysisErrorSuite extends AnalysisTest { AttributeReference("nestedList", ArrayType(ArrayType(IntegerType)))()) nestedListRelation.select(Explode(Explode($"nestedList"))) }, - "Generators are not supported when it's nested in expressions, but got: " + - "explode(explode(nestedList))" :: Nil + "The generator is not supported: nested in expressions " + + """"explode(explode(nestedList))"""" :: Nil ) errorTest( "SPARK-30998: unsupported nested inner generators for aggregates", testRelation.select(Explode(Explode( CreateArray(CreateArray(min($"a") :: max($"a") :: Nil) :: Nil)))), - "Generators are not supported when it's nested in expressions, but got: " + - "explode(explode(array(array(min(a), max(a)))))" :: Nil + "The generator is not supported: nested in expressions " + + """"explode(explode(array(array(min(a), max(a)))))"""" :: Nil ) errorTest( "generator nested in expressions for aggregates", testRelation.select(Explode(CreateArray(min($"a") :: max($"a") :: Nil)) + 1), - "Generators are not supported when it's nested in expressions, but got: " + - "(explode(array(min(a), max(a))) + 1)" :: Nil + "The generator is not supported: nested in expressions " + + """"(explode(array(min(a), max(a))) + 1)"""" :: Nil ) errorTest( "generator appears in operator which is not Project", listRelation.sortBy(Explode($"list").asc), - "Generators are not supported outside the SELECT clause, but got: Sort" :: Nil + "The generator is not supported: outside the SELECT clause, found: Sort" :: Nil ) errorTest( @@ -534,15 +534,16 @@ class AnalysisErrorSuite extends AnalysisTest { errorTest( "more than one generators in SELECT", listRelation.select(Explode($"list"), Explode($"list")), - "Only one generator allowed per select clause but found 2: explode(list), explode(list)" :: Nil + "The generator is not supported: only one generator allowed per select clause but found 2: " + + """"explode(list)", "explode(list)"""" :: Nil ) errorTest( "more than one generators for aggregates in SELECT", testRelation.select(Explode(CreateArray(min($"a") :: Nil)), Explode(CreateArray(max($"a") :: Nil))), - "Only one generator allowed per select clause but found 2: " + - "explode(array(min(a))), explode(array(max(a)))" :: Nil + "The generator is not supported: only one generator allowed per select clause but found 2: " + + """"explode(array(min(a)))", "explode(array(max(a)))"""" :: Nil ) errorTest( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala index 09afedea7a5f8..08280c08cd2e6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala @@ -332,7 +332,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession { val msg1 = intercept[AnalysisException] { sql("select 1 + explode(array(min(c2), max(c2))) from t1 group by c1") }.getMessage - assert(msg1.contains("Generators are not supported when it's nested in expressions")) + assert(msg1.contains("The generator is not supported: nested in expressions")) val msg2 = intercept[AnalysisException] { sql( @@ -342,7 +342,8 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession { |from t1 group by c1 """.stripMargin) }.getMessage - assert(msg2.contains("Only one generator allowed per aggregate clause")) + assert(msg2.contains("The generator is not supported: " + + "only one generator allowed per aggregate clause")) } } @@ -350,8 +351,8 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession { val errMsg = intercept[AnalysisException] { sql("SELECT array(array(1, 2), array(3)) v").select(explode(explode($"v"))).collect }.getMessage - assert(errMsg.contains("Generators are not supported when it's nested in expressions, " + - "but got: explode(explode(v))")) + assert(errMsg.contains("The generator is not supported: " + + "nested in expressions \"explode(explode(v))\"")) } test("SPARK-30997: generators in aggregate expressions for dataframe") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index 3da5202a2ad55..9e18e4e66922b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -205,4 +205,50 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession { "The deserializer is not supported: try to map \"STRUCT\" " + "to Tuple1, but failed as the number of fields does not line up.") } + + test("UNSUPPORTED_GENERATOR: " + + "generators are not supported when it's nested in expressions") { + val e = intercept[AnalysisException]( + sql("""select explode(Array(1, 2, 3)) + 1""").collect() + ) + assert(e.errorClass === Some("UNSUPPORTED_GENERATOR")) + assert(e.message === + """The generator is not supported: """ + + """nested in expressions "(explode(array(1, 2, 3)) + 1)"""") + } + + test("UNSUPPORTED_GENERATOR: only one generator allowed") { + val e = intercept[AnalysisException]( + sql("""select explode(Array(1, 2, 3)), explode(Array(1, 2, 3))""").collect() + ) + assert(e.errorClass === Some("UNSUPPORTED_GENERATOR")) + assert(e.message === + "The generator is not supported: only one generator allowed per select clause " + + """but found 2: "explode(array(1, 2, 3))", "explode(array(1, 2, 3))"""") + } + + test("UNSUPPORTED_GENERATOR: generators are not supported outside the SELECT clause") { + val e = intercept[AnalysisException]( + sql("""select 1 from t order by explode(Array(1, 2, 3))""").collect() + ) + assert(e.errorClass === Some("UNSUPPORTED_GENERATOR")) + assert(e.message === + "The generator is not supported: outside the SELECT clause, found: " + + "'Sort [explode(array(1, 2, 3)) ASC NULLS FIRST], true") + } + + test("UNSUPPORTED_GENERATOR: not a generator") { + val e = intercept[AnalysisException]( + sql( + """ + |SELECT explodedvalue.* + |FROM VALUES array(1, 2, 3) AS (value) + |LATERAL VIEW array_contains(value, 1) AS explodedvalue""".stripMargin).collect() + ) + assert(e.errorClass === Some("UNSUPPORTED_GENERATOR")) + assert(e.message === + """The generator is not supported: `array_contains` is expected to be a generator. """ + + "However, its class is org.apache.spark.sql.catalyst.expressions.ArrayContains, " + + "which is not a generator.") + } } From bebfecb81da2de33240d8ab37bd641985281844e Mon Sep 17 00:00:00 2001 From: Bruce Robbins Date: Thu, 23 Jun 2022 08:07:43 +0800 Subject: [PATCH 363/535] [SPARK-38614][SQL] Don't push down limit through window that's using percent_rank ### What changes were proposed in this pull request? Change `LimitPushDownThroughWindow` so that it no longer supports pushing down a limit through a window using percent_rank. ### Why are the changes needed? Given a query with a limit of _n_ rows, and a window whose child produces _m_ rows, percent_rank will label the _nth_ row as 100% rather than the _mth_ row. This behavior conflicts with Spark 3.1.3, Hive 2.3.9 and Prestodb 0.268. #### Example Assume this data: ``` create table t1 stored as parquet as select * from range(101); ``` And also assume this query: ``` select id, percent_rank() over (order by id) as pr from t1 limit 3; ``` With Spark 3.2.1, 3.3.0, and master, the limit is applied before the percent_rank: ``` 0 0.0 1 0.5 2 1.0 ``` With Spark 3.1.3, and Hive 2.3.9, and Prestodb 0.268, the limit is applied _after_ percent_rank: Spark 3.1.3: ``` 0 0.0 1 0.01 2 0.02 ``` Hive 2.3.9: ``` 0: jdbc:hive2://localhost:10000> select id, percent_rank() over (order by id) as pr from t1 limit 3; . . . . . . . . . . . . . . . .> . . . . . . . . . . . . . . . .> WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. +-----+-------+ | id | pr | +-----+-------+ | 0 | 0.0 | | 1 | 0.01 | | 2 | 0.02 | +-----+-------+ 3 rows selected (4.621 seconds) 0: jdbc:hive2://localhost:10000> ``` Prestodb 0.268: ``` id | pr ----+------ 0 | 0.0 1 | 0.01 2 | 0.02 (3 rows) ``` With this PR, Spark will apply the limit after percent_rank. ### Does this PR introduce _any_ user-facing change? No (besides changing percent_rank's behavior to be more like Spark 3.1.3, Hive, and Prestodb). ### How was this patch tested? New unit tests. Closes #36951 from bersprockets/percent_rank_issue. Authored-by: Bruce Robbins Signed-off-by: Yuming Wang (cherry picked from commit a63ce5676e79f15903e9fd533a26a6c3ec9bf7a8) Signed-off-by: Yuming Wang --- .../optimizer/LimitPushDownThroughWindow.scala | 5 +++-- .../optimizer/LimitPushdownThroughWindowSuite.scala | 13 ++++++++++++- .../spark/sql/DataFrameWindowFunctionsSuite.scala | 13 +++++++++++++ 3 files changed, 28 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushDownThroughWindow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushDownThroughWindow.scala index eaea167ee9ff2..635434741b944 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushDownThroughWindow.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushDownThroughWindow.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.optimizer -import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentRow, IntegerLiteral, NamedExpression, RankLike, RowFrame, RowNumberLike, SpecifiedWindowFrame, UnboundedPreceding, WindowExpression, WindowSpecDefinition} +import org.apache.spark.sql.catalyst.expressions.{Alias, CurrentRow, DenseRank, IntegerLiteral, NamedExpression, NTile, Rank, RowFrame, RowNumber, SpecifiedWindowFrame, UnboundedPreceding, WindowExpression, WindowSpecDefinition} import org.apache.spark.sql.catalyst.plans.logical.{Limit, LocalLimit, LogicalPlan, Project, Sort, Window} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.TreePattern.{LIMIT, WINDOW} @@ -33,7 +33,8 @@ object LimitPushDownThroughWindow extends Rule[LogicalPlan] { // The window frame of RankLike and RowNumberLike can only be UNBOUNDED PRECEDING to CURRENT ROW. private def supportsPushdownThroughWindow( windowExpressions: Seq[NamedExpression]): Boolean = windowExpressions.forall { - case Alias(WindowExpression(_: RankLike | _: RowNumberLike, WindowSpecDefinition(Nil, _, + case Alias(WindowExpression(_: Rank | _: DenseRank | _: NTile | _: RowNumber, + WindowSpecDefinition(Nil, _, SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow))), _) => true case _ => false } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownThroughWindowSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownThroughWindowSuite.scala index f2c1f452d0203..b09d10b260174 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownThroughWindowSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownThroughWindowSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.expressions.{CurrentRow, Rank, RowFrame, RowNumber, SpecifiedWindowFrame, UnboundedPreceding} +import org.apache.spark.sql.catalyst.expressions.{CurrentRow, PercentRank, Rank, RowFrame, RowNumber, SpecifiedWindowFrame, UnboundedPreceding} import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ @@ -187,4 +187,15 @@ class LimitPushdownThroughWindowSuite extends PlanTest { Optimize.execute(originalQuery.analyze), WithoutOptimize.execute(originalQuery.analyze)) } + + test("SPARK-38614: Should not push through percent_rank window function") { + val originalQuery = testRelation + .select(a, b, c, + windowExpr(new PercentRank(), windowSpec(Nil, c.desc :: Nil, windowFrame)).as("rn")) + .limit(2) + + comparePlans( + Optimize.execute(originalQuery.analyze), + WithoutOptimize.execute(originalQuery.analyze)) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala index 25d676f5d93bc..557b278f9c45c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala @@ -1190,4 +1190,17 @@ class DataFrameWindowFunctionsSuite extends QueryTest ) ) } + + test("SPARK-38614: percent_rank should apply before limit") { + val df = Seq.tabulate(101)(identity).toDF("id") + val w = Window.orderBy("id") + checkAnswer( + df.select($"id", percent_rank().over(w)).limit(3), + Seq( + Row(0, 0.0d), + Row(1, 0.01d), + Row(2, 0.02d) + ) + ) + } } From be9fae2e64ed7e611f2ec70b8cca0e2ae403786d Mon Sep 17 00:00:00 2001 From: Yikf Date: Thu, 23 Jun 2022 13:04:05 +0800 Subject: [PATCH 364/535] [SPARK-39543] The option of DataFrameWriterV2 should be passed to storage properties if fallback to v1 ### What changes were proposed in this pull request? The option of DataFrameWriterV2 should be passed to storage properties if fallback to v1, to support something such as compressed formats ### Why are the changes needed? example: `spark.range(0, 100).writeTo("t1").option("compression", "zstd").using("parquet").create` **before** gen: part-00000-644a65ed-0e7a-43d5-8d30-b610a0fb19dc-c000.**snappy**.parquet ... **after** gen: part-00000-6eb9d1ae-8fdb-4428-aea3-bd6553954cdd-c000.**zstd**.parquet ... ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? new test Closes #36941 from Yikf/writeV2option. Authored-by: Yikf Signed-off-by: Wenchen Fan (cherry picked from commit e5b7fb85b2d91f2e84dc60888c94e15b53751078) Signed-off-by: Wenchen Fan --- .../analysis/ResolveSessionCatalog.scala | 8 ++++++-- .../spark/sql/DataFrameWriterV2Suite.scala | 20 +++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala index dd4b545b7ff2d..e5819b509e717 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala @@ -158,11 +158,15 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager) c } - case c @ CreateTableAsSelect(ResolvedDBObjectName(catalog, name), _, _, _, _, _) + case c @ CreateTableAsSelect(ResolvedDBObjectName(catalog, name), _, _, _, writeOptions, _) if isSessionCatalog(catalog) => val (storageFormat, provider) = getStorageFormatAndProvider( - c.tableSpec.provider, c.tableSpec.options, c.tableSpec.location, c.tableSpec.serde, + c.tableSpec.provider, + c.tableSpec.options ++ writeOptions, + c.tableSpec.location, + c.tableSpec.serde, ctas = true) + if (!isV2Provider(provider)) { constructV1TableCmd(Some(c.query), c.tableSpec, name, new StructType, c.partitioning, c.ignoreIfExists, storageFormat, provider) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala index 8aef27a1b6692..86108a81da829 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala @@ -23,12 +23,15 @@ import scala.collection.JavaConverters._ import org.scalatest.BeforeAndAfter +import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic} +import org.apache.spark.sql.connector.InMemoryV1Provider import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTable, InMemoryTableCatalog, TableCatalog} import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, YearsTransform} import org.apache.spark.sql.execution.QueryExecution import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.sources.FakeSourceOne import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType, TimestampType} @@ -531,6 +534,23 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo assert(table.properties === (Map("provider" -> "foo") ++ defaultOwnership).asJava) } + test("SPARK-39543 writeOption should be passed to storage properties when fallback to v1") { + val provider = classOf[InMemoryV1Provider].getName + + withSQLConf((SQLConf.USE_V1_SOURCE_LIST.key, provider)) { + spark.range(10) + .writeTo("table_name") + .option("compression", "zstd").option("name", "table_name") + .using(provider) + .create() + val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("table_name")) + + assert(table.identifier === TableIdentifier("table_name", Some("default"))) + assert(table.storage.properties.contains("compression")) + assert(table.storage.properties.getOrElse("compression", "foo") == "zstd") + } + } + test("Replace: basic behavior") { spark.sql( "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)") From 86e3514ceeac0b494ec58cbd413ae957f4d4bc8d Mon Sep 17 00:00:00 2001 From: Prashant Singh Date: Thu, 23 Jun 2022 13:25:08 +0800 Subject: [PATCH 365/535] [SPARK-39547][SQL] V2SessionCatalog should not throw NoSuchDatabaseException in loadNamspaceMetadata ### What changes were proposed in this pull request? This change attempts to make V2SessionCatalog return NoSuchNameSpaceException rather than NoSuchDataseException ### Why are the changes needed? if a catalog doesn't overrides `namespaceExists` it by default uses `loadNamespaceMetadata` and in case a `db` not exists loadNamespaceMetadata throws a `NoSuchDatabaseException` which is not catched and we see failures even with `if exists` clause. One such use case we observed was in iceberg table a post test clean up was failing with `NoSuchDatabaseException` now. Also queries such as `DROP TABLE IF EXISTS {}` fails with no such db exception. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Modified the UT to match the proposed behviour Closes #36948 from singhpk234/fix/loadNamespaceMetadata. Authored-by: Prashant Singh Signed-off-by: Wenchen Fan (cherry picked from commit 95133932a661742bf0dd1343bc7eda08f2cf752f) Signed-off-by: Wenchen Fan --- .../sql/execution/datasources/v2/V2SessionCatalog.scala | 9 +++++++-- .../execution/command/v1/DescribeNamespaceSuite.scala | 1 + .../execution/datasources/v2/V2SessionCatalogSuite.scala | 2 +- .../hive/execution/command/DescribeNamespaceSuite.scala | 1 + 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala index b9a4e0e6ba30b..eb7e3d798325d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala @@ -24,7 +24,7 @@ import scala.collection.JavaConverters._ import scala.collection.mutable import org.apache.spark.sql.catalyst.{FunctionIdentifier, SQLConfHelper, TableIdentifier} -import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException} +import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchTableException, TableAlreadyExistsException} import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTable, CatalogTableType, CatalogUtils, SessionCatalog} import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogV2Util, FunctionCatalog, Identifier, NamespaceChange, SupportsNamespaces, Table, TableCatalog, TableChange, V1Table} import org.apache.spark.sql.connector.catalog.NamespaceChange.RemoveProperty @@ -244,7 +244,12 @@ class V2SessionCatalog(catalog: SessionCatalog) override def loadNamespaceMetadata(namespace: Array[String]): util.Map[String, String] = { namespace match { case Array(db) => - catalog.getDatabaseMetadata(db).toMetadata + try { + catalog.getDatabaseMetadata(db).toMetadata + } catch { + case _: NoSuchDatabaseException => + throw QueryCompilationErrors.noSuchNamespaceError(namespace) + } case _ => throw QueryCompilationErrors.noSuchNamespaceError(namespace) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeNamespaceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeNamespaceSuite.scala index aa4547db1e624..e71b311d24149 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeNamespaceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeNamespaceSuite.scala @@ -59,5 +59,6 @@ trait DescribeNamespaceSuiteBase extends command.DescribeNamespaceSuiteBase * table catalog. */ class DescribeNamespaceSuite extends DescribeNamespaceSuiteBase with CommandSuiteBase { + override def notFoundMsgPrefix: String = if (conf.useV1Command) "Database" else "Namespace" override def commandVersion: String = super[DescribeNamespaceSuiteBase].commandVersion } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala index af0eafbc805ee..d37d5a96c656e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala @@ -850,7 +850,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite { test("loadNamespaceMetadata: fail missing namespace") { val catalog = newCatalog() - val exc = intercept[NoSuchDatabaseException] { + val exc = intercept[NoSuchNamespaceException] { catalog.loadNamespaceMetadata(testNs) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DescribeNamespaceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DescribeNamespaceSuite.scala index be8423fca0b44..f730cad03ba8d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DescribeNamespaceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DescribeNamespaceSuite.scala @@ -24,5 +24,6 @@ import org.apache.spark.sql.execution.command.v1 * table catalog. */ class DescribeNamespaceSuite extends v1.DescribeNamespaceSuiteBase with CommandSuiteBase { + override def notFoundMsgPrefix: String = if (conf.useV1Command) "Database" else "Namespace" override def commandVersion: String = super[DescribeNamespaceSuiteBase].commandVersion } From cb9492534820ce6b2b419a062926da7a7bf09b6a Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Thu, 23 Jun 2022 13:30:34 +0800 Subject: [PATCH 366/535] [SPARK-39548][SQL] CreateView Command with a window clause query hit a wrong window definition not found issue ### What changes were proposed in this pull request? 1. In the inline CTE code path, fix a bug that top down style unresolved window expression check leads to mis-clarification of a defined window expression. 2. Move unresolved window expression check in project to `CheckAnalysis`. ### Why are the changes needed? This bug fails a correct query. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? UT Closes #36947 from amaliujia/improvewindow. Authored-by: Rui Wang Signed-off-by: Wenchen Fan (cherry picked from commit 4718d59c6c4e201bf940303a4311dfb753372395) Signed-off-by: Wenchen Fan --- .../sql/catalyst/analysis/Analyzer.scala | 10 +------- .../sql/catalyst/analysis/CheckAnalysis.scala | 10 +++++++- .../org/apache/spark/sql/SQLQuerySuite.scala | 23 +++++++++++++++++++ 3 files changed, 33 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 51c1d1f768f73..03f021350a269 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -450,7 +450,7 @@ class Analyzer(override val catalogManager: CatalogManager) * Substitute child plan with WindowSpecDefinitions. */ object WindowsSubstitution extends Rule[LogicalPlan] { - def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsDownWithPruning( + def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning( _.containsAnyPattern(WITH_WINDOW_DEFINITION, UNRESOLVED_WINDOW_EXPRESSION), ruleId) { // Lookup WindowSpecDefinitions. This rule works with unresolved children. case WithWindowDefinition(windowDefinitions, child) => child.resolveExpressions { @@ -459,14 +459,6 @@ class Analyzer(override val catalogManager: CatalogManager) throw QueryCompilationErrors.windowSpecificationNotDefinedError(windowName)) WindowExpression(c, windowSpecDefinition) } - - case p @ Project(projectList, _) => - projectList.foreach(_.transformDownWithPruning( - _.containsPattern(UNRESOLVED_WINDOW_EXPRESSION), ruleId) { - case UnresolvedWindowExpression(_, windowSpec) => - throw QueryCompilationErrors.windowSpecificationNotDefinedError(windowSpec.name) - }) - p } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index b0d1d6c2a30c4..a0319f4b715d2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.optimizer.{BooleanSimplification, Decorrela import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.TreeNodeTag +import org.apache.spark.sql.catalyst.trees.TreePattern.UNRESOLVED_WINDOW_EXPRESSION import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils, TypeUtils} import org.apache.spark.sql.connector.catalog.{LookupCatalog, SupportsPartitionManagement} import org.apache.spark.sql.errors.QueryCompilationErrors @@ -226,7 +227,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { failAnalysis("grouping_id() can only be used with GroupingSets/Cube/Rollup") case e: Expression if e.children.exists(_.isInstanceOf[WindowFunction]) && - !e.isInstanceOf[WindowExpression] => + !e.isInstanceOf[WindowExpression] && e.resolved => val w = e.children.find(_.isInstanceOf[WindowFunction]).get failAnalysis(s"Window function $w requires an OVER clause.") @@ -523,6 +524,13 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { s"""Only a single table generating function is allowed in a SELECT clause, found: | ${exprs.map(_.sql).mkString(",")}""".stripMargin) + case p @ Project(projectList, _) => + projectList.foreach(_.transformDownWithPruning( + _.containsPattern(UNRESOLVED_WINDOW_EXPRESSION)) { + case UnresolvedWindowExpression(_, windowSpec) => + throw QueryCompilationErrors.windowSpecificationNotDefinedError(windowSpec.name) + }) + case j: Join if !j.duplicateResolved => val conflictingAttributes = j.left.outputSet.intersect(j.right.outputSet) failAnalysis( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 0761f8e274999..b0f2421d897ba 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4503,6 +4503,29 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark """.stripMargin), Seq(Row(2), Row(1))) } + + test("SPARK-39548: CreateView will make queries go into inline CTE code path thus" + + "trigger a mis-clarified `window definition not found` issue") { + sql( + """ + |create or replace temporary view test_temp_view as + |with step_1 as ( + |select * , min(a) over w2 as min_a_over_w2 from + |(select 1 as a, 2 as b, 3 as c) window w2 as (partition by b order by c)) , step_2 as + |( + |select *, max(e) over w1 as max_a_over_w1 + |from (select 1 as e, 2 as f, 3 as g) + |join step_1 on true + |window w1 as (partition by f order by g) + |) + |select * + |from step_2 + |""".stripMargin) + + checkAnswer( + sql("select * from test_temp_view"), + Row(1, 2, 3, 1, 2, 3, 1, 1)) + } } case class Foo(bar: Option[String]) From bf59f6e4bd7f34f8a36bfef1e93e0ddccddf9e43 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sat, 25 Jun 2022 00:31:54 -0700 Subject: [PATCH 367/535] [SPARK-39596][INFRA] Install `ggplot2` for GitHub Action linter job MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This PR aims to fix GitHub Action linter job by installing `ggplot2`. ### Why are the changes needed? It starts to fail like the following. - https://github.com/apache/spark/runs/7047294196?check_suite_focus=true ``` x Failed to parse Rd in histogram.Rd ℹ there is no package called ‘ggplot2’ ``` ### Does this PR introduce _any_ user-facing change? No. This is a dev-only change. ### How was this patch tested? Pass the GitHub Action linter job. Closes #36987 from dongjoon-hyun/SPARK-39596. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .github/workflows/build_and_test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index e0e9f70556c5f..3afdb942f8e41 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -509,6 +509,7 @@ jobs: apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')" + Rscript -e "install.packages(c('ggplot2'), repos='https://cloud.r-project.org/')" ./R/install-dev.sh - name: Instll JavaScript linter dependencies run: | From 4c79cc7d5f0d818e479565f5d623e168d777ba0a Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sat, 25 Jun 2022 20:37:53 +0900 Subject: [PATCH 368/535] [SPARK-39596][INFRA][FOLLOWUP] Install `mvtnorm` and `statmod` at linter job ### What changes were proposed in this pull request? ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? Closes #36988 from dongjoon-hyun/SPARK-39596-2. Authored-by: Dongjoon Hyun Signed-off-by: Hyukjin Kwon --- .github/workflows/build_and_test.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index 3afdb942f8e41..5f57003cbd83a 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -509,7 +509,6 @@ jobs: apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')" Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')" - Rscript -e "install.packages(c('ggplot2'), repos='https://cloud.r-project.org/')" ./R/install-dev.sh - name: Instll JavaScript linter dependencies run: | @@ -532,7 +531,7 @@ jobs: python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421 apt-get update -y apt-get install -y ruby ruby-dev - Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'markdown', 'e1071', 'roxygen2'), repos='https://cloud.r-project.org/')" + Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'markdown', 'e1071', 'roxygen2', 'ggplot2', 'mvtnorm', 'statmod'), repos='https://cloud.r-project.org/')" Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')" gem install bundler From fc6a6644ee12c32b8da60d29eb2e6f25fa91f30d Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Sun, 26 Jun 2022 08:18:02 -0500 Subject: [PATCH 369/535] [SPARK-39599][BUILD] Upgrade maven to 3.8.6 ### What changes were proposed in this pull request? This PR aims to upgrade Maven to 3.8.6 from 3.8.4. ### Why are the changes needed? The release notes and as follows: - https://maven.apache.org/docs/3.8.5/release-notes.html - https://maven.apache.org/docs/3.8.6/release-notes.html Note that the profile dependency bug should fixed by [MNG-7432] Resolver session contains non-MavenWorkspaceReader (#695) ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? - Pass GitHub Actions - Manual test 1: run `build/mvn -version` wll trigger download `apache-maven-3.8.6-bin.tar.gz` ``` exec: curl --silent --show-error -L https://www.apache.org/dyn/closer.lua/maven/maven-3/3.8.6/binaries/apache-maven-3.8.6-bin.tar.gz?action=download ``` - Manual test 2: run `./dev/test-dependencies.sh --replace-manifest ` doesn't generate git diff, this behavior is consistent with maven 3.8.4,but there will git diff of `dev/deps/spark-deps-hadoop-2-hive-2.3` when use maven 3.8.5. Closes #36978 from LuciferYang/mvn-386. Authored-by: yangjie01 Signed-off-by: Sean Owen (cherry picked from commit a9397484853843d84bd12048b5ca162acdba2549) Signed-off-by: Sean Owen --- dev/appveyor-install-dependencies.ps1 | 2 +- docs/building-spark.md | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/appveyor-install-dependencies.ps1 b/dev/appveyor-install-dependencies.ps1 index d469c98fdb3a2..fae7fe35dd7dd 100644 --- a/dev/appveyor-install-dependencies.ps1 +++ b/dev/appveyor-install-dependencies.ps1 @@ -81,7 +81,7 @@ if (!(Test-Path $tools)) { # ========================== Maven # Push-Location $tools # -# $mavenVer = "3.8.4" +# $mavenVer = "3.8.6" # Start-FileDownload "https://archive.apache.org/dist/maven/maven-3/$mavenVer/binaries/apache-maven-$mavenVer-bin.zip" "maven.zip" # # # extract diff --git a/docs/building-spark.md b/docs/building-spark.md index 4e775095a376e..caf8773b4002c 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -27,7 +27,7 @@ license: | ## Apache Maven The Maven-based build is the build of reference for Apache Spark. -Building Spark using Maven requires Maven 3.8.4 and Java 8. +Building Spark using Maven requires Maven 3.8.6 and Java 8. Spark requires Scala 2.12/2.13; support for Scala 2.11 was removed in Spark 3.0.0. ### Setting up Maven's Memory Usage diff --git a/pom.xml b/pom.xml index 9d290301d2350..3ac52a7494451 100644 --- a/pom.xml +++ b/pom.xml @@ -109,7 +109,7 @@ 1.8 ${java.version} ${java.version} - 3.8.4 + 3.8.6 1.6.0 spark 1.7.32 From 427148f86e28576ec5eb9799edff4d8106758082 Mon Sep 17 00:00:00 2001 From: "wangzixuan.wzxuan" Date: Sun, 26 Jun 2022 21:05:08 -0500 Subject: [PATCH 370/535] =?UTF-8?q?[SPARK-39575][AVRO]=20add=20ByteBuffer#?= =?UTF-8?q?rewind=20after=20ByteBuffer#get=20in=20Avr=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …oDeserializer ### What changes were proposed in this pull request? Add ByteBuffer#rewind after ByteBuffer#get in AvroDeserializer. ### Why are the changes needed? - HeapBuffer.get(bytes) puts the data from POS to the end into bytes, and sets POS as the end. The next call will return empty bytes. - The second call of AvroDeserializer will return an InternalRow with empty binary column when avro record has binary column. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Add ut in AvroCatalystDataConversionSuite. Closes #36973 from wzx140/avro-fix. Authored-by: wangzixuan.wzxuan Signed-off-by: Sean Owen (cherry picked from commit 558b395880673ec45bf9514c98983e50e21d9398) Signed-off-by: Sean Owen --- .../spark/sql/avro/AvroDeserializer.scala | 2 ++ .../AvroCatalystDataConversionSuite.scala | 21 +++++++++++++++++++ 2 files changed, 23 insertions(+) diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala index 5bb51a92977af..1192856ae7796 100644 --- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala +++ b/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala @@ -195,6 +195,8 @@ private[sql] class AvroDeserializer( case b: ByteBuffer => val bytes = new Array[Byte](b.remaining) b.get(bytes) + // Do not forget to reset the position + b.rewind() bytes case b: Array[Byte] => b case other => diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala index a43d171fb52d3..5c0d64b4d55eb 100644 --- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala +++ b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala @@ -360,4 +360,25 @@ class AvroCatalystDataConversionSuite extends SparkFunSuite None, new OrderedFilters(Seq(Not(EqualTo("Age", 39))), sqlSchema)) } + + test("AvroDeserializer with binary type") { + val jsonFormatSchema = + """ + |{ + | "type": "record", + | "name": "record", + | "fields" : [ + | {"name": "a", "type": "bytes"} + | ] + |} + """.stripMargin + val avroSchema = new Schema.Parser().parse(jsonFormatSchema) + val avroRecord = new GenericData.Record(avroSchema) + val bb = java.nio.ByteBuffer.wrap(Array[Byte](97, 48, 53)) + avroRecord.put("a", bb) + + val expected = InternalRow(Array[Byte](97, 48, 53)) + checkDeserialization(avroSchema, avroRecord, Some(expected)) + checkDeserialization(avroSchema, avroRecord, Some(expected)) + } } From 972338ae771c99fc63acb5f75fdfa2f6d2c0ffab Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 27 Jun 2022 01:29:57 -0700 Subject: [PATCH 371/535] [SPARK-39614][K8S] K8s pod name follows `DNS Subdomain Names` rule This PR aims to fix a regression at Apache Spark 3.3.0 which doesn't allow long pod name prefix whose length is greater than 63. Although Pod's `hostname` follows [DNS Label Names](https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-label-names), Pod name itself follows [DNS Subdomain Names](https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-subdomain-names) whose maximum length is 253. Yes, this fixes a regression. Pass the CIs with the updated unit tests. Closes #36999 from dongjoon-hyun/SPARK-39614. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit c15508f0d6a49738db5edf7eb139cc1d438af9a9) Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/deploy/k8s/Config.scala | 11 ++++++----- .../org/apache/spark/deploy/k8s/KubernetesConf.scala | 2 +- .../k8s/features/BasicExecutorFeatureStep.scala | 4 ++-- .../k8s/features/DriverServiceFeatureStep.scala | 4 ++-- .../deploy/k8s/submit/KubernetesClientUtils.scala | 6 +++--- .../k8s/features/BasicExecutorFeatureStepSuite.scala | 8 ++++---- 6 files changed, 18 insertions(+), 17 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala index 7930cd0ce1563..e3067bc3a7db0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala @@ -323,7 +323,7 @@ private[spark] object Config extends Logging { private def isValidExecutorPodNamePrefix(prefix: String): Boolean = { // 6 is length of '-exec-' val reservedLen = Int.MaxValue.toString.length + 6 - val validLength = prefix.length + reservedLen <= KUBERNETES_DNSNAME_MAX_LENGTH + val validLength = prefix.length + reservedLen <= KUBERNETES_DNS_SUBDOMAIN_NAME_MAX_LENGTH validLength && podConfValidator.matcher(prefix).matches() } @@ -331,15 +331,15 @@ private[spark] object Config extends Logging { ConfigBuilder("spark.kubernetes.executor.podNamePrefix") .doc("Prefix to use in front of the executor pod names. It must conform the rules defined " + "by the Kubernetes DNS Label Names. " + + "working-with-objects/names/#dns-subdomain-names\">DNS Subdomain Names. " + "The prefix will be used to generate executor pod names in the form of " + "$podNamePrefix-exec-$id, where the `id` is a positive int value, " + - "so the length of the `podNamePrefix` needs to be <= 47(= 63 - 10 - 6).") + "so the length of the `podNamePrefix` needs to be <= 237(= 253 - 10 - 6).") .version("2.3.0") .stringConf .checkValue(isValidExecutorPodNamePrefix, "must conform https://kubernetes.io/docs/concepts/overview/working-with-objects" + - "/names/#dns-label-names and the value length <= 47") + "/names/#dns-subdomain-names and the value length <= 237") .createOptional val KUBERNETES_EXECUTOR_DISABLE_CONFIGMAP = @@ -713,5 +713,6 @@ private[spark] object Config extends Logging { val KUBERNETES_DRIVER_ENV_PREFIX = "spark.kubernetes.driverEnv." - val KUBERNETES_DNSNAME_MAX_LENGTH = 63 + val KUBERNETES_DNS_SUBDOMAIN_NAME_MAX_LENGTH = 253 + val KUBERNETES_DNS_LABEL_NAME_MAX_LENGTH = 63 } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala index 8a985c31b171c..510609537cf2a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala @@ -270,7 +270,7 @@ private[spark] object KubernetesConf { .replaceAll("[^a-z0-9\\-]", "-") .replaceAll("-+", "-"), "", - KUBERNETES_DNSNAME_MAX_LENGTH + KUBERNETES_DNS_LABEL_NAME_MAX_LENGTH ).stripPrefix("-").stripSuffix("-") } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala index 15c69ad487f5f..8102ca84affcc 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala @@ -106,10 +106,10 @@ private[spark] class BasicExecutorFeatureStep( val keyToPaths = KubernetesClientUtils.buildKeyToPathObjects(confFilesMap) // According to // https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-label-names, - // hostname must be no longer than `KUBERNETES_DNSNAME_MAX_LENGTH`(63) characters, + // hostname must be no longer than `KUBERNETES_DNS_LABEL_NAME_MAX_LENGTH`(63) characters, // so take the last 63 characters of the pod name as the hostname. // This preserves uniqueness since the end of name contains executorId - val hostname = name.substring(Math.max(0, name.length - KUBERNETES_DNSNAME_MAX_LENGTH)) + val hostname = name.substring(Math.max(0, name.length - KUBERNETES_DNS_LABEL_NAME_MAX_LENGTH)) // Remove non-word characters from the start of the hostname .replaceAll("^[^\\w]+", "") // Replace dangerous characters in the remaining string with a safe alternative. diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala index 75c40584a64e4..bc18048876291 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala @@ -21,7 +21,7 @@ import scala.collection.JavaConverters._ import io.fabric8.kubernetes.api.model.{HasMetadata, ServiceBuilder} import org.apache.spark.deploy.k8s.{KubernetesDriverConf, KubernetesUtils, SparkPod} -import org.apache.spark.deploy.k8s.Config.KUBERNETES_DNSNAME_MAX_LENGTH +import org.apache.spark.deploy.k8s.Config.KUBERNETES_DNS_LABEL_NAME_MAX_LENGTH import org.apache.spark.deploy.k8s.Constants._ import org.apache.spark.internal.{config, Logging} import org.apache.spark.util.{Clock, SystemClock} @@ -101,5 +101,5 @@ private[spark] object DriverServiceFeatureStep { val DRIVER_BIND_ADDRESS_KEY = config.DRIVER_BIND_ADDRESS.key val DRIVER_HOST_KEY = config.DRIVER_HOST_ADDRESS.key val DRIVER_SVC_POSTFIX = "-driver-svc" - val MAX_SERVICE_NAME_LENGTH = KUBERNETES_DNSNAME_MAX_LENGTH + val MAX_SERVICE_NAME_LENGTH = KUBERNETES_DNS_LABEL_NAME_MAX_LENGTH } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala index a26a7638a64d4..dc52babc5f32e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientUtils.scala @@ -29,16 +29,16 @@ import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, KeyToPath} import org.apache.spark.SparkConf import org.apache.spark.deploy.k8s.{Config, Constants, KubernetesUtils} -import org.apache.spark.deploy.k8s.Config.{KUBERNETES_DNSNAME_MAX_LENGTH, KUBERNETES_NAMESPACE} +import org.apache.spark.deploy.k8s.Config.{KUBERNETES_DNS_SUBDOMAIN_NAME_MAX_LENGTH, KUBERNETES_NAMESPACE} import org.apache.spark.deploy.k8s.Constants.ENV_SPARK_CONF_DIR import org.apache.spark.internal.Logging private[spark] object KubernetesClientUtils extends Logging { - // Config map name can be 63 chars at max. + // Config map name can be KUBERNETES_DNS_SUBDOMAIN_NAME_MAX_LENGTH chars at max. def configMapName(prefix: String): String = { val suffix = "-conf-map" - s"${prefix.take(KUBERNETES_DNSNAME_MAX_LENGTH - suffix.length)}$suffix" + s"${prefix.take(KUBERNETES_DNS_SUBDOMAIN_NAME_MAX_LENGTH - suffix.length)}$suffix" } val configMapNameExecutor: String = configMapName(s"spark-exec-${KubernetesUtils.uniqueID()}") diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala index 731a9b77d2059..84c4f3b8ba352 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala @@ -199,18 +199,18 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter { val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf), defaultProfile) assert(step.configurePod(SparkPod.initialPod()).pod.getSpec.getHostname.length === - KUBERNETES_DNSNAME_MAX_LENGTH) + KUBERNETES_DNS_LABEL_NAME_MAX_LENGTH) } } test("SPARK-35460: invalid PodNamePrefixes") { withPodNamePrefix { - Seq("_123", "spark_exec", "spark@", "a" * 48).foreach { invalid => + Seq("_123", "spark_exec", "spark@", "a" * 238).foreach { invalid => baseConf.set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, invalid) val e = intercept[IllegalArgumentException](newExecutorConf()) assert(e.getMessage === s"'$invalid' in spark.kubernetes.executor.podNamePrefix is" + s" invalid. must conform https://kubernetes.io/docs/concepts/overview/" + - "working-with-objects/names/#dns-label-names and the value length <= 47") + "working-with-objects/names/#dns-subdomain-names and the value length <= 237") } } } @@ -224,7 +224,7 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter { val step = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf), defaultProfile) val hostname = step.configurePod(SparkPod.initialPod()).pod.getSpec().getHostname() - assert(hostname.length <= KUBERNETES_DNSNAME_MAX_LENGTH) + assert(hostname.length <= KUBERNETES_DNS_LABEL_NAME_MAX_LENGTH) assert(InternetDomainName.isValid(hostname)) } } From 39413db6f011b75504bb952423f3a5b579b36d97 Mon Sep 17 00:00:00 2001 From: mcdull-zhang Date: Mon, 27 Jun 2022 21:46:53 +0800 Subject: [PATCH 372/535] [SPARK-37753][FOLLOWUP][SQL] Fix unit tests sometimes failing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This unit test sometimes fails to run. for example, https://github.com/apache/spark/pull/35715#discussion_r892247619 When the left side is completed first, and then the right side is completed, since it is known that there are many empty partitions on the left side, the broadcast on the right side is demoted. However, if the right side is completed first and the left side is still being executed, the right side does not know whether there are many empty partitions on the left side, so there is no demote, and then the right side is broadcast in the planning stage. This PR does this: When it is found that the other side is QueryStage, if the QueryStage has not been materialized, demote it first. When the other side is completed, judge again whether demote is needed. ### Why are the changes needed? Fix small problems in logic ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? manual testing Closes #36966 from mcdull-zhang/wait_other_side. Authored-by: mcdull-zhang Signed-off-by: Wenchen Fan (cherry picked from commit 8c8801cf501ddbdeb4a4a869bc27c8a2331531fe) Signed-off-by: Wenchen Fan --- .../adaptive/AdaptiveQueryExecSuite.scala | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index 67dc359ab8f33..dd727855ce2fe 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -22,6 +22,7 @@ import java.net.URI import org.apache.logging.log4j.Level import org.scalatest.PrivateMethodTester +import org.scalatest.time.SpanSugar._ import org.apache.spark.SparkException import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListenerJobStart} @@ -710,18 +711,20 @@ class AdaptiveQueryExecSuite test("SPARK-37753: Inhibit broadcast in left outer join when there are many empty" + " partitions on outer/left side") { - withSQLConf( - SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true", - SQLConf.NON_EMPTY_PARTITION_RATIO_FOR_BROADCAST_JOIN.key -> "0.5") { - // `testData` is small enough to be broadcast but has empty partition ratio over the config. - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "200") { - val (plan, adaptivePlan) = runAdaptiveAndVerifyResult( - "SELECT * FROM (select * from testData where value = '1') td" + - " left outer join testData2 ON key = a") - val smj = findTopLevelSortMergeJoin(plan) - assert(smj.size == 1) - val bhj = findTopLevelBroadcastHashJoin(adaptivePlan) - assert(bhj.isEmpty) + eventually(timeout(15.seconds), interval(500.milliseconds)) { + withSQLConf( + SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true", + SQLConf.NON_EMPTY_PARTITION_RATIO_FOR_BROADCAST_JOIN.key -> "0.5") { + // `testData` is small enough to be broadcast but has empty partition ratio over the config. + withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "200") { + val (plan, adaptivePlan) = runAdaptiveAndVerifyResult( + "SELECT * FROM (select * from testData where value = '1') td" + + " left outer join testData2 ON key = a") + val smj = findTopLevelSortMergeJoin(plan) + assert(smj.size == 1) + val bhj = findTopLevelBroadcastHashJoin(adaptivePlan) + assert(bhj.isEmpty) + } } } } From 7e1a329af28820b10381526e4adab5a53d7deeda Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Mon, 27 Jun 2022 21:51:21 +0800 Subject: [PATCH 373/535] [SPARK-39577][SQL][DOCS] Add SQL reference for built-in functions ### What changes were proposed in this pull request? Currently, Spark SQL reference missing many functions. Users cannot find the needed functions. ### Why are the changes needed? Add SQL reference for built-in functions ### Does this PR introduce _any_ user-facing change? 'Yes'. Users can find needed functions in SQL reference. Before this PR, the built-in functions show below. ![image](https://user-images.githubusercontent.com/8486025/175505440-a375dbe6-988c-4647-836d-746c681be19a.png) After this PR, the built-in functions show below. ![image](https://user-images.githubusercontent.com/8486025/175916778-d55ab1eb-fd28-4362-a0b7-7e33f37eacb4.png) The part of Mathematical Functions show below. ![image](https://user-images.githubusercontent.com/8486025/175760329-1b185eca-3d92-4c4f-ba2d-980200bf1a5b.png) ![image](https://user-images.githubusercontent.com/8486025/175760340-e93ea083-90e7-4710-9bf4-f45c2b57f8bc.png) ![image](https://user-images.githubusercontent.com/8486025/175760345-638b8fb2-d7f6-4e51-943e-7915583c03db.png) ![image](https://user-images.githubusercontent.com/8486025/175760355-99d7125a-dcdb-407f-8c8d-6fbd2ca60801.png) The part of String Functions show below. ![image](https://user-images.githubusercontent.com/8486025/175760377-2e26454f-75d1-4ad4-9fdd-060a0460d439.png) ![image](https://user-images.githubusercontent.com/8486025/175760387-125483a4-0f13-45f8-9e60-4c66f1f3dc6f.png) ![image](https://user-images.githubusercontent.com/8486025/175760396-44d0f0b2-645b-408b-bdd7-ac167f98a30a.png) ![image](https://user-images.githubusercontent.com/8486025/175760405-c44b0661-c73f-437f-aa99-113dd25bb3fb.png) ![image](https://user-images.githubusercontent.com/8486025/175760416-66af659d-7db8-4c89-8686-9bf8bf1fec20.png) ![image](https://user-images.githubusercontent.com/8486025/175760422-6105ea4a-5ae1-42ef-aee2-1a88984b2c10.png) ![image](https://user-images.githubusercontent.com/8486025/175760428-5ab34058-ac78-48c6-aaa0-9a21b324822f.png) The part of Bitwise Functions show below. ![image](https://user-images.githubusercontent.com/8486025/175760440-10623eff-6330-4407-b069-f74e29966b64.png) The part of Conversion Functions show below. ![image](https://user-images.githubusercontent.com/8486025/175760458-384267f7-b7bb-44a7-8200-3c5e04dbd71a.png) The part of Conditional Functions show below. ![image](https://user-images.githubusercontent.com/8486025/175762759-7546e799-d530-47ea-ac22-27dabd1fcf4a.png) The part of Predicate Functions show below. ![image](https://user-images.githubusercontent.com/8486025/175760534-7a996e8e-188c-44de-b8e1-e36f346ae58e.png) ![image](https://user-images.githubusercontent.com/8486025/175760545-96222945-0dbf-4885-b23e-3043791f13d5.png) The part of Csv Functions show below. ![image](https://user-images.githubusercontent.com/8486025/175760588-e0860cf4-0457-4f22-a21c-880c7ef92db2.png) The part of Misc Functions show below. ![image](https://user-images.githubusercontent.com/8486025/175760614-93b5d9da-fed3-41d6-84c5-4d207b2f1175.png) ![image](https://user-images.githubusercontent.com/8486025/175760623-fb699de6-6174-496f-b15d-43155f223ee6.png) The part of Generator Functions show below. ![image](https://user-images.githubusercontent.com/8486025/175760569-5f14bf3b-5844-4821-acce-232f6ce21372.png) ### How was this patch tested? N/A Closes #36976 from beliefer/SPARK-39577. Authored-by: Jiaan Geng Signed-off-by: Wenchen Fan (cherry picked from commit 250cb912215e548b965aa2d1a27affe9f924cac7) Signed-off-by: Wenchen Fan --- docs/sql-ref-functions-builtin.md | 89 +++++++++++++++++++++++++++++++ docs/sql-ref-functions.md | 11 ++++ sql/gen-sql-functions-docs.py | 12 ++++- 3 files changed, 111 insertions(+), 1 deletion(-) diff --git a/docs/sql-ref-functions-builtin.md b/docs/sql-ref-functions-builtin.md index 08e620b0f4f86..c28905c3fdc0d 100644 --- a/docs/sql-ref-functions-builtin.md +++ b/docs/sql-ref-functions-builtin.md @@ -77,3 +77,92 @@ license: | {% endif %} {% endfor %} +{% for static_file in site.static_files %} + {% if static_file.name == 'generated-math-funcs-table.html' %} +### Mathematical Functions +{% include_relative generated-math-funcs-table.html %} +#### Examples +{% include_relative generated-math-funcs-examples.html %} + {% break %} + {% endif %} +{% endfor %} + +{% for static_file in site.static_files %} + {% if static_file.name == 'generated-string-funcs-table.html' %} +### String Functions +{% include_relative generated-string-funcs-table.html %} +#### Examples +{% include_relative generated-string-funcs-examples.html %} + {% break %} + {% endif %} +{% endfor %} + +{% for static_file in site.static_files %} + {% if static_file.name == 'generated-conditional-funcs-table.html' %} +### Conditional Functions +{% include_relative generated-conditional-funcs-table.html %} +#### Examples +{% include_relative generated-conditional-funcs-examples.html %} + {% break %} + {% endif %} +{% endfor %} + +{% for static_file in site.static_files %} + {% if static_file.name == 'generated-bitwise-funcs-table.html' %} +### Bitwise Functions +{% include_relative generated-bitwise-funcs-table.html %} +#### Examples +{% include_relative generated-bitwise-funcs-examples.html %} + {% break %} + {% endif %} +{% endfor %} + +{% for static_file in site.static_files %} + {% if static_file.name == 'generated-conversion-funcs-table.html' %} +### Conversion Functions +{% include_relative generated-conversion-funcs-table.html %} +#### Examples +{% include_relative generated-conversion-funcs-examples.html %} + {% break %} + {% endif %} +{% endfor %} + +{% for static_file in site.static_files %} + {% if static_file.name == 'generated-predicate-funcs-table.html' %} +### Predicate Functions +{% include_relative generated-predicate-funcs-table.html %} +#### Examples +{% include_relative generated-predicate-funcs-examples.html %} + {% break %} + {% endif %} +{% endfor %} + +{% for static_file in site.static_files %} + {% if static_file.name == 'generated-csv-funcs-table.html' %} +### Csv Functions +{% include_relative generated-csv-funcs-table.html %} +#### Examples +{% include_relative generated-csv-funcs-examples.html %} + {% break %} + {% endif %} +{% endfor %} + +{% for static_file in site.static_files %} + {% if static_file.name == 'generated-misc-funcs-table.html' %} +### Misc Functions +{% include_relative generated-misc-funcs-table.html %} +#### Examples +{% include_relative generated-misc-funcs-examples.html %} + {% break %} + {% endif %} +{% endfor %} + +{% for static_file in site.static_files %} + {% if static_file.name == 'generated-generator-funcs-table.html' %} +### Generator Functions +{% include_relative generated-generator-funcs-table.html %} +#### Examples +{% include_relative generated-generator-funcs-examples.html %} + {% break %} + {% endif %} +{% endfor %} diff --git a/docs/sql-ref-functions.md b/docs/sql-ref-functions.md index 67951a9695f5e..58be9e7e61963 100644 --- a/docs/sql-ref-functions.md +++ b/docs/sql-ref-functions.md @@ -32,11 +32,22 @@ This subsection presents the usages and descriptions of these functions. * [Map Functions](sql-ref-functions-builtin.html#map-functions) * [Date and Timestamp Functions](sql-ref-functions-builtin.html#date-and-timestamp-functions) * [JSON Functions](sql-ref-functions-builtin.html#json-functions) + * [Mathematical Functions](sql-ref-functions-builtin.html#mathematical-functions) + * [String Functions](sql-ref-functions-builtin.html#string-functions) + * [Bitwise Functions](sql-ref-functions-builtin.html#bitwise-functions) + * [Conversion Functions](sql-ref-functions-builtin.html#conversion-functions) + * [Conditional Functions](sql-ref-functions-builtin.html#conditional-functions) + * [Predicate Functions](sql-ref-functions-builtin.html#predicate-functions) + * [Csv Functions](sql-ref-functions-builtin.html#csv-functions) + * [Misc Functions](sql-ref-functions-builtin.html#misc-functions) #### Aggregate-like Functions * [Aggregate Functions](sql-ref-functions-builtin.html#aggregate-functions) * [Window Functions](sql-ref-functions-builtin.html#window-functions) +#### Generator Functions +* [Generator Functions](sql-ref-functions-builtin.html#generator-functions) + ### UDFs (User-Defined Functions) User-Defined Functions (UDFs) are a feature of Spark SQL that allows users to define their own functions when the system's built-in functions are not enough to perform the desired task. To use UDFs in Spark SQL, users must first define the function, then register the function with Spark, and finally call the registered function. The User-Defined Functions can act on a single row or act on multiple rows at once. Spark SQL also supports integration of existing Hive implementations of UDFs, UDAFs and UDTFs. diff --git a/sql/gen-sql-functions-docs.py b/sql/gen-sql-functions-docs.py index c07734e273051..8d18b06768c21 100644 --- a/sql/gen-sql-functions-docs.py +++ b/sql/gen-sql-functions-docs.py @@ -31,6 +31,9 @@ groups = { "agg_funcs", "array_funcs", "datetime_funcs", "json_funcs", "map_funcs", "window_funcs", + "math_funcs", "conditional_funcs", "generator_funcs", + "predicate_funcs", "string_funcs", "misc_funcs", + "bitwise_funcs", "conversion_funcs", "csv_funcs", } @@ -45,6 +48,8 @@ def _list_grouped_function_infos(jvm): for jinfo in filter(lambda x: x.getGroup() in groups, jinfos): name = jinfo.getName() + if (name == "raise_error"): + continue usage = jinfo.getUsage() usage = usage.replace("_FUNC_", name) if usage is not None else usage infos.append(ExpressionInfo( @@ -108,7 +113,12 @@ def _make_pretty_usage(infos): # Expected formats are as follows; # - `_FUNC_(...) - description`, or # - `_FUNC_ - description` - usages = iter(re.split(r"(%s.*) - " % info.name, info.usage.strip())[1:]) + func_name = info.name + if (info.name == "*" or info.name == "+"): + func_name = "\\" + func_name + elif (info.name == "when"): + func_name = "CASE WHEN" + usages = iter(re.split(r"(.*%s.*) - " % func_name, info.usage.strip())[1:]) for (sig, description) in zip(usages, usages): result.append("
        %s
        spark.kubernetes.test.driverRequestCores + Set cpu resource for each driver pod in test, this is currently only for test on cpu resource limited cluster, + it's not recommended for other scenarios. +
        spark.kubernetes.test.executorRequestCores + Set cpu resource for each executor pod in test, this is currently only for test on cpu resource limited cluster, + it's not recommended for other scenarios. +
        # Running the Kubernetes Integration Tests with SBT diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala index 3d7a9313031b0..041b2886c4174 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala @@ -192,6 +192,12 @@ class KubernetesSuite extends SparkFunSuite .set("spark.kubernetes.driver.label.spark-app-locator", appLocator) .set("spark.kubernetes.executor.label.spark-app-locator", appLocator) .set(NETWORK_AUTH_ENABLED.key, "true") + sys.props.get(CONFIG_DRIVER_REQUEST_CORES).map { cpu => + sparkAppConf.set("spark.kubernetes.driver.request.cores", cpu) + } + sys.props.get(CONFIG_EXECUTOR_REQUEST_CORES).map { cpu => + sparkAppConf.set("spark.kubernetes.executor.request.cores", cpu) + } if (!kubernetesTestComponents.hasUserSpecifiedNamespace) { kubernetesTestComponents.createNamespace() } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/TestConstants.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/TestConstants.scala index c46839f1dffcc..2175d23d44977 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/TestConstants.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/TestConstants.scala @@ -34,4 +34,6 @@ object TestConstants { val CONFIG_KEY_IMAGE_TAG_FILE = "spark.kubernetes.test.imageTagFile" val CONFIG_KEY_IMAGE_REPO = "spark.kubernetes.test.imageRepo" val CONFIG_KEY_UNPACK_DIR = "spark.kubernetes.test.unpackSparkDir" + val CONFIG_DRIVER_REQUEST_CORES = "spark.kubernetes.test.driverRequestCores" + val CONFIG_EXECUTOR_REQUEST_CORES = "spark.kubernetes.test.executorRequestCores" } From 896ecd85edc31965beba1ca207863cc860b5cc45 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Fri, 8 Apr 2022 00:38:59 -0700 Subject: [PATCH 493/535] [SPARK-38803][K8S][TESTS] Lower minio cpu to 250m (0.25) from 1 in K8s IT ### What changes were proposed in this pull request? This PR aims to set minio request cpu to `250m` (0.25). - This value also recommand in [link](https://docs.gitlab.com/charts/charts/minio/#installation-command-line-options). - There are [no cpu request limitation](https://github.com/minio/minio/blob/a3e317773a2b90a433136e1ff2a8394bc5017c75/helm/minio/values.yaml#L251) on current minio. ### Why are the changes needed? In some cases (such as resource limited case), we reduce request cpu of minio. See also: https://github.com/apache/spark/pull/35830#pullrequestreview-929597027 ### Does this PR introduce _any_ user-facing change? No, test only ### How was this patch tested? IT passsed Closes #36096 from Yikun/minioRequestCores. Authored-by: Yikun Jiang Signed-off-by: Dongjoon Hyun (cherry picked from commit 5ea2b386eb866e20540660cdb6ed43792cb29969) Signed-off-by: Dongjoon Hyun --- .../spark/deploy/k8s/integrationtest/DepsTestsSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala index 3f3c4ef14607c..10ac197ec7715 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala @@ -58,7 +58,7 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite => ).toArray val resources = Map( - "cpu" -> new Quantity("1"), + "cpu" -> new Quantity("250m"), "memory" -> new Quantity("512M") ).asJava From 5b81c0fd834f7cf7366e1dcb6fd715a69c270920 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Tue, 20 Sep 2022 15:27:22 +0900 Subject: [PATCH 494/535] [MINOR][DOCS][PYTHON] Document datetime.timedelta <> DayTimeIntervalType ### What changes were proposed in this pull request? This PR proposes to document datetime.timedelta support in PySpark in SQL DataType reference page. This support was added in SPARK-37275 ### Why are the changes needed? To show the support of datetime.timedelta. ### Does this PR introduce _any_ user-facing change? Yes, this fixes the documentation. ### How was this patch tested? CI in this PR should validate the build. Closes #37939 from HyukjinKwon/minor-daytimeinterval. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon (cherry picked from commit 2aeb8d74c45aea358c0887573f0d549f6111f119) Signed-off-by: Hyukjin Kwon --- docs/sql-ref-datatypes.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/sql-ref-datatypes.md b/docs/sql-ref-datatypes.md index d699bfe5f2820..ba070d2a89a4b 100644 --- a/docs/sql-ref-datatypes.md +++ b/docs/sql-ref-datatypes.md @@ -177,7 +177,7 @@ from pyspark.sql.types import * |**ByteType**|int or long
        **Note:** Numbers will be converted to 1-byte signed integer numbers at runtime. Please make sure that numbers are within the range of -128 to 127.|ByteType()| |**ShortType**|int or long
        **Note:** Numbers will be converted to 2-byte signed integer numbers at runtime. Please make sure that numbers are within the range of -32768 to 32767.|ShortType()| |**IntegerType**|int or long|IntegerType()| -|**LongType**|long
        **Note:** Numbers will be converted to 8-byte signed integer numbers at runtime. Please make sure that numbers are within the range of -9223372036854775808 to 9223372036854775807.Otherwise, please convert data to decimal.Decimal and use DecimalType.|LongType()| +|**LongType**|long
        **Note:** Numbers will be converted to 8-byte signed integer numbers at runtime. Please make sure that numbers are within the range of -9223372036854775808 to 9223372036854775807. Otherwise, please convert data to decimal.Decimal and use DecimalType.|LongType()| |**FloatType**|float
        **Note:** Numbers will be converted to 4-byte single-precision floating point numbers at runtime.|FloatType()| |**DoubleType**|float|DoubleType()| |**DecimalType**|decimal.Decimal|DecimalType()| @@ -186,6 +186,7 @@ from pyspark.sql.types import * |**BooleanType**|bool|BooleanType()| |**TimestampType**|datetime.datetime|TimestampType()| |**DateType**|datetime.date|DateType()| +|**DayTimeIntervalType**|datetime.timedelta|DayTimeIntervalType()| |**ArrayType**|list, tuple, or array|ArrayType(*elementType*, [*containsNull*])
        **Note:**The default value of *containsNull* is True.| |**MapType**|dict|MapType(*keyType*, *valueType*, [*valueContainsNull]*)
        **Note:**The default value of *valueContainsNull* is True.| |**StructType**|list or tuple|StructType(*fields*)
        **Note:** *fields* is a Seq of StructFields. Also, two fields with the same name are not allowed.| From 1f2b5d197dc2dc72a236289f5acf635601c4dff0 Mon Sep 17 00:00:00 2001 From: Ted Yu Date: Wed, 21 Sep 2022 14:50:09 -0700 Subject: [PATCH 495/535] [SPARK-40508][SQL][3.3] Treat unknown partitioning as UnknownPartitioning ### What changes were proposed in this pull request? When running spark application against spark 3.3, I see the following : ``` java.lang.IllegalArgumentException: Unsupported data source V2 partitioning type: CustomPartitioning at org.apache.spark.sql.execution.datasources.v2.V2ScanPartitioning$$anonfun$apply$1.applyOrElse(V2ScanPartitioning.scala:46) at org.apache.spark.sql.execution.datasources.v2.V2ScanPartitioning$$anonfun$apply$1.applyOrElse(V2ScanPartitioning.scala:34) at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:584) ``` The CustomPartitioning works fine with Spark 3.2.1 This PR proposes to relax the code and treat all unknown partitioning the same way as that for UnknownPartitioning. ### Why are the changes needed? 3.3.0 doesn't seem to warrant such behavioral change (from that of 3.2.1 release). ### Does this PR introduce _any_ user-facing change? This would allow user's custom partitioning to continue to work with 3.3.x releases. ### How was this patch tested? Existing test suite. I have run the test using Cassandra Spark connector and modified Spark (with this patch) which passes. Closes #37957 from tedyu/unk-33. Authored-by: Ted Yu Signed-off-by: Chao Sun --- .../execution/datasources/v2/V2ScanPartitioning.scala | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioning.scala index 64e80081018a7..67d3bf479b131 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioning.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioning.scala @@ -16,6 +16,7 @@ */ package org.apache.spark.sql.execution.datasources.v2 +import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.SQLConfHelper import org.apache.spark.sql.catalyst.expressions.V2ExpressionUtils import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan @@ -30,7 +31,7 @@ import org.apache.spark.util.collection.Utils.sequenceToOption * reported by data sources to their catalyst counterparts. Then, annotates the plan with the * result. */ -object V2ScanPartitioning extends Rule[LogicalPlan] with SQLConfHelper { +object V2ScanPartitioning extends Rule[LogicalPlan] with SQLConfHelper with Logging { override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { case d @ DataSourceV2ScanRelation(relation, scan: SupportsReportPartitioning, _, None) => val funCatalogOpt = relation.catalog.flatMap { @@ -52,8 +53,10 @@ object V2ScanPartitioning extends Rule[LogicalPlan] with SQLConfHelper { } } case _: UnknownPartitioning => None - case p => throw new IllegalArgumentException("Unsupported data source V2 partitioning " + - "type: " + p.getClass.getSimpleName) + case p => + logWarning("Spark ignores the partitioning ${p.getClass.getSimpleName}." + + " Please use KeyGroupedPartitioning for better performance") + None } d.copy(keyGroupedPartitioning = catalystPartitioning) From 50f0ced2c83ef8a9c2316814a2e78cb1b049e749 Mon Sep 17 00:00:00 2001 From: Emil Ejbyfeldt Date: Thu, 22 Sep 2022 11:46:35 +0900 Subject: [PATCH 496/535] [SPARK-40385][SQL] Fix interpreted path for companion object constructor ### What changes were proposed in this pull request? Fixes encoding of classes that uses companion object constructors in the interpreted path. Without this change the that is added in this change would fail with ``` ... Cause: java.lang.RuntimeException: Error while decoding: java.lang.RuntimeException: Couldn't find a valid constructor on interface org.apache.spark.sql.catalyst.ScroogeLikeExample newInstance(interface org.apache.spark.sql.catalyst.ScroogeLikeExample) at org.apache.spark.sql.errors.QueryExecutionErrors$.expressionDecodingError(QueryExecutionErrors.scala:1199) ... ``` As far as I can tell this bug has existed since the initial implementation in SPARK-8288 https://github.com/apache/spark/pull/23062 The existing spec that tested this part of the code incorrectly provided an outerPointer which hid the bug from that test. ### Why are the changes needed? Fixes a bug, the new spec in the ExpressionsEncoderSuite shows that this is in fact a bug. ### Does this PR introduce _any_ user-facing change? Yes, it fixes a bug. ### How was this patch tested? New and existing specs in ExpressionEncoderSuite and ObjectExpressionsSuite. Closes #37837 from eejbyfeldt/spark-40385. Authored-by: Emil Ejbyfeldt Signed-off-by: Hyukjin Kwon (cherry picked from commit 73e3c36ec89242e4c586a5328e813310f82de728) Signed-off-by: Hyukjin Kwon --- .../org/apache/spark/sql/catalyst/ScalaReflection.scala | 4 ++-- .../spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala | 5 ++++- .../sql/catalyst/expressions/ObjectExpressionsSuite.scala | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index ff0488b670612..dfcb6bbb0d28c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -820,8 +820,8 @@ object ScalaReflection extends ScalaReflection { applyMethods.find { method => val params = method.typeSignature.paramLists.head // Check that the needed params are the same length and of matching types - params.size == paramTypes.tail.size && - params.zip(paramTypes.tail).forall { case(ps, pc) => + params.size == paramTypes.size && + params.zip(paramTypes).forall { case(ps, pc) => ps.typeSignature.typeSymbol == mirror.classSymbol(pc) } }.map { applyMethodSymbol => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala index e2eafb7370d18..9b481b13fee2d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala @@ -26,7 +26,7 @@ import scala.reflect.runtime.universe.TypeTag import org.apache.spark.SparkArithmeticException import org.apache.spark.sql.{Encoder, Encoders} -import org.apache.spark.sql.catalyst.{FooClassWithEnum, FooEnum, OptionalData, PrimitiveData} +import org.apache.spark.sql.catalyst.{FooClassWithEnum, FooEnum, OptionalData, PrimitiveData, ScroogeLikeExample} import org.apache.spark.sql.catalyst.analysis.AnalysisTest import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.AttributeReference @@ -477,6 +477,9 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes encodeDecodeTest(Option("abc"), "option of string") encodeDecodeTest(Option.empty[String], "empty option of string") + encodeDecodeTest(ScroogeLikeExample(1), + "SPARK-40385 class with only a companion object constructor") + productTest(("UDT", new ExamplePoint(0.1, 0.2))) test("AnyVal class with Any fields") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala index 585191faf18bc..0f19f2b6aac2f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala @@ -423,7 +423,7 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { inputTypes = Nil, propagateNull = false, dataType = ObjectType(classOf[ScroogeLikeExample]), - outerPointer = Some(() => outerObj)) + outerPointer = None) checkObjectExprEvaluation(newInst3, ScroogeLikeExample(1)) } From b608ba3ff1070893ff3ee86f670c2de211bf3d27 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Thu, 22 Sep 2022 02:34:20 -0700 Subject: [PATCH 497/535] [SPARK-40490][YARN][TESTS][3.3] Ensure YarnShuffleIntegrationSuite tests registeredExecFile reload scenarios ### What changes were proposed in this pull request? After SPARK-17321, `YarnShuffleService` will persist data to local shuffle state db/reload data from local shuffle state db only when Yarn NodeManager start with `YarnConfiguration#NM_RECOVERY_ENABLED = true`. `YarnShuffleIntegrationSuite` not set `YarnConfiguration#NM_RECOVERY_ENABLED` and the default value of the configuration is false, so `YarnShuffleIntegrationSuite` will neither trigger data persistence to the db nor verify the reload of data. This pr aims to let `YarnShuffleIntegrationSuite` restart the verification of registeredExecFile reload scenarios, to achieve this goal, this pr make the following changes: 1. Add a new un-document configuration `spark.yarn.shuffle.testing` to `YarnShuffleService`, and Initialize `_recoveryPath` when `_recoveryPath == null && spark.yarn.shuffle.testing == true`. 2. Only set `spark.yarn.shuffle.testing = true` in `YarnShuffleIntegrationSuite`, and add assertions to check `registeredExecFile` is not null to ensure that registeredExecFile reload scenarios will be verified. ### Why are the changes needed? Fix registeredExecFile reload test scenarios. Why not test by configuring `YarnConfiguration#NM_RECOVERY_ENABLED` as true? This configuration has been tried **Hadoop 3.3.2** ``` build/mvn clean install -pl resource-managers/yarn -Pyarn -Dtest=none -DwildcardSuites=org.apache.spark.deploy.yarn.YarnShuffleIntegrationSuite -Phadoop-3 ``` ``` YarnShuffleIntegrationSuite: *** RUN ABORTED *** java.lang.NoClassDefFoundError: org/apache/hadoop/shaded/org/iq80/leveldb/DBException at org.apache.hadoop.yarn.server.nodemanager.NodeManager.initAndStartRecoveryStore(NodeManager.java:313) at org.apache.hadoop.yarn.server.nodemanager.NodeManager.serviceInit(NodeManager.java:370) at org.apache.hadoop.service.AbstractService.init(AbstractService.java:164) at org.apache.hadoop.yarn.server.MiniYARNCluster$NodeManagerWrapper.serviceInit(MiniYARNCluster.java:597) at org.apache.hadoop.service.AbstractService.init(AbstractService.java:164) at org.apache.hadoop.service.CompositeService.serviceInit(CompositeService.java:109) at org.apache.hadoop.yarn.server.MiniYARNCluster.serviceInit(MiniYARNCluster.java:327) at org.apache.hadoop.service.AbstractService.init(AbstractService.java:164) at org.apache.spark.deploy.yarn.BaseYarnClusterSuite.beforeAll(BaseYarnClusterSuite.scala:111) at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:212) ... Cause: java.lang.ClassNotFoundException: org.apache.hadoop.shaded.org.iq80.leveldb.DBException at java.net.URLClassLoader.findClass(URLClassLoader.java:387) at java.lang.ClassLoader.loadClass(ClassLoader.java:419) at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:352) at java.lang.ClassLoader.loadClass(ClassLoader.java:352) at org.apache.hadoop.yarn.server.nodemanager.NodeManager.initAndStartRecoveryStore(NodeManager.java:313) at org.apache.hadoop.yarn.server.nodemanager.NodeManager.serviceInit(NodeManager.java:370) at org.apache.hadoop.service.AbstractService.init(AbstractService.java:164) at org.apache.hadoop.yarn.server.MiniYARNCluster$NodeManagerWrapper.serviceInit(MiniYARNCluster.java:597) at org.apache.hadoop.service.AbstractService.init(AbstractService.java:164) at org.apache.hadoop.service.CompositeService.serviceInit(CompositeService.java:109) ``` **Hadoop 2.7.4** ``` build/mvn clean install -pl resource-managers/yarn -Pyarn -Dtest=none -DwildcardSuites=org.apache.spark.deploy.yarn.YarnShuffleIntegrationSuite -Phadoop-2 ``` ``` YarnShuffleIntegrationSuite: org.apache.spark.deploy.yarn.YarnShuffleIntegrationSuite *** ABORTED *** java.lang.IllegalArgumentException: Cannot support recovery with an ephemeral server port. Check the setting of yarn.nodemanager.address at org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.serviceStart(ContainerManagerImpl.java:395) at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193) at org.apache.hadoop.service.CompositeService.serviceStart(CompositeService.java:120) at org.apache.hadoop.yarn.server.nodemanager.NodeManager.serviceStart(NodeManager.java:272) at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193) at org.apache.hadoop.yarn.server.MiniYARNCluster$NodeManagerWrapper.serviceStart(MiniYARNCluster.java:560) at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193) at org.apache.hadoop.service.CompositeService.serviceStart(CompositeService.java:120) at org.apache.hadoop.yarn.server.MiniYARNCluster.serviceStart(MiniYARNCluster.java:278) at org.apache.hadoop.service.AbstractService.start(AbstractService.java:193) ... Run completed in 3 seconds, 992 milliseconds. Total number of tests run: 0 Suites: completed 1, aborted 1 Tests: succeeded 0, failed 0, canceled 0, ignored 0, pending 0 *** 1 SUITE ABORTED *** ``` From the above test, we need to use a fixed port to enable Yarn NodeManager recovery, but this is difficult to be guaranteed in UT, so this pr try a workaround way. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions Closes #37962 from LuciferYang/SPARK-40490-33. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun --- .../spark/network/yarn/YarnShuffleService.java | 9 +++++++++ .../deploy/yarn/YarnShuffleIntegrationSuite.scala | 12 ++++-------- .../apache/spark/network/yarn/YarnTestAccessor.scala | 4 ++++ 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java index f1b894139149d..c5abc61f5cf33 100644 --- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java +++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java @@ -32,6 +32,7 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; +import com.google.common.io.Files; import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.hadoop.conf.Configuration; @@ -124,6 +125,10 @@ public class YarnShuffleService extends AuxiliaryService { // Whether failure during service initialization should stop the NM. @VisibleForTesting static final String STOP_ON_FAILURE_KEY = "spark.yarn.shuffle.stopOnFailure"; + + @VisibleForTesting + static final String INTEGRATION_TESTING = "spark.yarn.shuffle.testing"; + private static final boolean DEFAULT_STOP_ON_FAILURE = false; // just for testing when you want to find an open port @@ -222,6 +227,10 @@ protected void serviceInit(Configuration externalConf) throws Exception { boolean stopOnFailure = _conf.getBoolean(STOP_ON_FAILURE_KEY, DEFAULT_STOP_ON_FAILURE); + if (_recoveryPath == null && _conf.getBoolean(INTEGRATION_TESTING, false)) { + _recoveryPath = new Path(Files.createTempDir().toURI()); + } + try { // In case this NM was killed while there were running spark applications, we need to restore // lost state for the existing executors. We look for an existing file in the NM's local dirs. diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala index 547bfca2891f1..a3447d352bb60 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala @@ -47,6 +47,7 @@ class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite { yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"), classOf[YarnShuffleService].getCanonicalName) yarnConfig.set(SHUFFLE_SERVICE_PORT.key, "0") + yarnConfig.set(YarnTestAccessor.shuffleServiceIntegrationTestingKey, "true") yarnConfig } @@ -67,23 +68,18 @@ class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite { val shuffleService = YarnTestAccessor.getShuffleServiceInstance val registeredExecFile = YarnTestAccessor.getRegisteredExecutorFile(shuffleService) + assert(registeredExecFile != null) val result = File.createTempFile("result", null, tempDir) val finalState = runSpark( false, mainClassName(YarnExternalShuffleDriver.getClass), - appArgs = if (registeredExecFile != null) { - Seq(result.getAbsolutePath, registeredExecFile.getAbsolutePath) - } else { - Seq(result.getAbsolutePath) - }, + appArgs = Seq(result.getAbsolutePath, registeredExecFile.getAbsolutePath), extraConf = extraSparkConf() ) checkResult(finalState, result) - if (registeredExecFile != null) { - assert(YarnTestAccessor.getRegisteredExecutorFile(shuffleService).exists()) - } + assert(YarnTestAccessor.getRegisteredExecutorFile(shuffleService).exists()) } } diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnTestAccessor.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnTestAccessor.scala index d87cc26384729..df7bfd800b115 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnTestAccessor.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnTestAccessor.scala @@ -37,4 +37,8 @@ object YarnTestAccessor { def getShuffleServiceConfOverlayResourceName: String = { YarnShuffleService.SHUFFLE_SERVICE_CONF_OVERLAY_RESOURCE_NAME } + + def shuffleServiceIntegrationTestingKey: String = { + YarnShuffleService.INTEGRATION_TESTING + } } From 2bae604d05ef734ed4562be5fc545d5e4abfbbd2 Mon Sep 17 00:00:00 2001 From: Bobby Wang Date: Thu, 22 Sep 2022 20:59:00 +0800 Subject: [PATCH 498/535] [SPARK-40407][SQL] Fix the potential data skew caused by df.repartition ### What changes were proposed in this pull request? ``` scala val df = spark.range(0, 100, 1, 50).repartition(4) val v = df.rdd.mapPartitions { iter => { Iterator.single(iter.length) }.collect() println(v.mkString(",")) ``` The above simple code outputs `50,0,0,50`, which means there is no data in partition 1 and partition 2. The RoundRobin seems to ensure to distribute the records evenly *in the same partition*, and not guarantee it between partitions. Below is the code to generate the key ``` scala case RoundRobinPartitioning(numPartitions) => // Distributes elements evenly across output partitions, starting from a random partition. var position = new Random(TaskContext.get().partitionId()).nextInt(numPartitions) (row: InternalRow) => { // The HashPartitioner will handle the `mod` by the number of partitions position += 1 position } ``` In this case, There are 50 partitions, each partition will only compute 2 elements. The issue for RoundRobin here is it always starts with position=2 to do the Roundrobin. See the output of Random ``` scala scala> (1 to 200).foreach(partitionId => print(new Random(partitionId).nextInt(4) + " ")) // the position is always 2. 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ``` Similarly, the below Random code also outputs the same value, ``` scala (1 to 200).foreach(partitionId => print(new Random(partitionId).nextInt(2) + " ")) (1 to 200).foreach(partitionId => print(new Random(partitionId).nextInt(4) + " ")) (1 to 200).foreach(partitionId => print(new Random(partitionId).nextInt(8) + " ")) (1 to 200).foreach(partitionId => print(new Random(partitionId).nextInt(16) + " ")) (1 to 200).foreach(partitionId => print(new Random(partitionId).nextInt(32) + " ")) ``` Consider partition 0, the total elements are [0, 1], so when shuffle writes, for element 0, the key will be (position + 1) = 2 + 1 = 3%4=3, the element 1, the key will be (position + 1)=(3+1)=4%4 = 0 consider partition 1, the total elements are [2, 3], so when shuffle writes, for element 2, the key will be (position + 1) = 2 + 1 = 3%4=3, the element 3, the key will be (position + 1)=(3+1)=4%4 = 0 The calculation is also applied for other left partitions since the starting position is always 2 for this case. So, as you can see, each partition will write its elements to Partition [0, 3], which results in Partition [1, 2] without any data. This PR changes the starting position of RoundRobin. The default position calculated by `new Random(partitionId).nextInt(numPartitions)` may always be the same for different partitions, which means each partition will output the data into the same keys when shuffle writes, and some keys may not have any data in some special cases. ### Why are the changes needed? The PR can fix the data skew issue for the special cases. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Will add some tests and watch CI pass Closes #37855 from wbo4958/roundrobin-data-skew. Authored-by: Bobby Wang Signed-off-by: Wenchen Fan (cherry picked from commit f6c4e58b85d7486c70cd6d58aae208f037e657fa) Signed-off-by: Wenchen Fan --- .../sql/execution/exchange/ShuffleExchangeExec.scala | 10 +++++++++- .../test/scala/org/apache/spark/sql/DatasetSuite.scala | 6 ++++++ .../execution/adaptive/AdaptiveQueryExecSuite.scala | 4 ++-- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala index f3eb5636bb997..9800a781402d6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala @@ -21,6 +21,7 @@ import java.util.Random import java.util.function.Supplier import scala.concurrent.Future +import scala.util.hashing import org.apache.spark._ import org.apache.spark.internal.config @@ -306,7 +307,14 @@ object ShuffleExchangeExec { def getPartitionKeyExtractor(): InternalRow => Any = newPartitioning match { case RoundRobinPartitioning(numPartitions) => // Distributes elements evenly across output partitions, starting from a random partition. - var position = new Random(TaskContext.get().partitionId()).nextInt(numPartitions) + // nextInt(numPartitions) implementation has a special case when bound is a power of 2, + // which is basically taking several highest bits from the initial seed, with only a + // minimal scrambling. Due to deterministic seed, using the generator only once, + // and lack of scrambling, the position values for power-of-two numPartitions always + // end up being almost the same regardless of the index. substantially scrambling the + // seed by hashing will help. Refer to SPARK-21782 for more details. + val partitionId = TaskContext.get().partitionId() + var position = new Random(hashing.byteswap32(partitionId)).nextInt(numPartitions) (row: InternalRow) => { // The HashPartitioner will handle the `mod` by the number of partitions position += 1 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index dbe4c5a741745..ed4443542e5eb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -2132,6 +2132,12 @@ class DatasetSuite extends QueryTest (2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), (3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)) } + + test("SPARK-40407: repartition should not result in severe data skew") { + val df = spark.range(0, 100, 1, 50).repartition(4) + val result = df.mapPartitions(iter => Iterator.single(iter.length)).collect() + assert(result.sorted.toSeq === Seq(19, 25, 25, 31)) + } } case class Bar(a: Int) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index 55f092e2d601b..8aaafd05217eb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -2090,8 +2090,8 @@ class AdaptiveQueryExecSuite withSQLConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "150") { // partition size [0,258,72,72,72] checkPartitionNumber("SELECT /*+ REBALANCE(c1) */ * FROM v", 2, 4) - // partition size [72,216,216,144,72] - checkPartitionNumber("SELECT /*+ REBALANCE */ * FROM v", 4, 7) + // partition size [144,72,144,216,144] + checkPartitionNumber("SELECT /*+ REBALANCE */ * FROM v", 2, 6) } // no skewed partition should be optimized From 1d3b8f7cb15283a1e37ecada6d751e17f30647ce Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Fri, 23 Sep 2022 08:29:17 +0000 Subject: [PATCH 499/535] Preparing Spark release v3.3.1-rc2 --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 39 files changed, 41 insertions(+), 41 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index c1e490df26f4a..0e449e841cf6d 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.2 +Version: 3.3.1 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index eff5e3419be64..32126a5e13820 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 8834464f7f6ac..21bf56094503b 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index bfadba306c5ec..43740354d84d1 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 287355ac07d96..46c875dcb0a06 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 14d41802a8b74..d6d28fe4ec687 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index f6f26a262fd25..a37bc21ca6e54 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index b3b7da8919fc5..817a30e5deea0 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 9c13be8a1f017..99b641a3658c0 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index d5267f3b32d27..0711ecc3e0744 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 667b574c867bc..15eea016135a4 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.3.2-SNAPSHOT -SPARK_VERSION_SHORT: 3.3.2 +SPARK_VERSION: 3.3.1 +SPARK_VERSION_SHORT: 3.3.1 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.15" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.3.2"] + 'facetFilters': ["version:3.3.1"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index f3934614cb810..18b30b092b273 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index fbca1101eae44..e932501b8b834 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 537e4c97b1f9d..72940cb743386 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 1ce0b53014aa7..f079671b8998a 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index e851b0a8b2c79..1b79350397482 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 695154d8ceb3f..83097460edc9d 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 2fcd0e4c2b75d..91e111ee38d10 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index af53c827711c4..e622369eb7250 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index 2f8755241b3c2..a208e03e8bbf3 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index c3a1b68c82657..e464dfacc4c7e 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 9c0f78231df9d..ed0c627abb943 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 3668043c4e316..606b6cb8c5cd7 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index a97e35dae4ce1..cb5c693068114 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 092c1c7d83da6..3fc9ece3d0e05 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 24370ce56e883..d4d0fc3b6f9e6 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/pom.xml b/pom.xml index 045d299277769..99d4e265332f9 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 pom Spark Project Parent POM https://spark.apache.org/ diff --git a/python/pyspark/version.py b/python/pyspark/version.py index 3e5963da87f31..49fe5caabc028 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__: str = "3.3.2.dev0" +__version__: str = "3.3.1" diff --git a/repl/pom.xml b/repl/pom.xml index 68148f637ac0a..d5abd10e610c7 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index be3c81fbf949f..253a5aeffb521 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index fa9fc6473d330..13b0046c47b4d 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 9354ffda8e46c..1c91ae916bc4b 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index a5c123e47ac0b..eeb0ae37fa109 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index e1aaf4afa59c0..5c6188add47cf 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 9ce6e61a7abc6..c6754cf57f9dd 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 6653a4f61dbfe..15ecd5597fcab 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 79219abecf6a8..944fd8f58dbed 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 13eb55d55ebf5..91ab784016069 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 1195252ba95c3..0ea392b136b98 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml From 81c2b0cee29e1aafdd0c8943786a75b68d7938f5 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Fri, 23 Sep 2022 08:29:34 +0000 Subject: [PATCH 500/535] Preparing development version 3.3.2-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 39 files changed, 41 insertions(+), 41 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 0e449e841cf6d..c1e490df26f4a 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.1 +Version: 3.3.2 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 32126a5e13820..eff5e3419be64 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 21bf56094503b..8834464f7f6ac 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 43740354d84d1..bfadba306c5ec 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 46c875dcb0a06..287355ac07d96 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index d6d28fe4ec687..14d41802a8b74 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index a37bc21ca6e54..f6f26a262fd25 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 817a30e5deea0..b3b7da8919fc5 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 99b641a3658c0..9c13be8a1f017 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 0711ecc3e0744..d5267f3b32d27 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 15eea016135a4..667b574c867bc 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.3.1 -SPARK_VERSION_SHORT: 3.3.1 +SPARK_VERSION: 3.3.2-SNAPSHOT +SPARK_VERSION_SHORT: 3.3.2 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.15" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.3.1"] + 'facetFilters': ["version:3.3.2"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index 18b30b092b273..f3934614cb810 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index e932501b8b834..fbca1101eae44 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 72940cb743386..537e4c97b1f9d 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index f079671b8998a..1ce0b53014aa7 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 1b79350397482..e851b0a8b2c79 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 83097460edc9d..695154d8ceb3f 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 91e111ee38d10..2fcd0e4c2b75d 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index e622369eb7250..af53c827711c4 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index a208e03e8bbf3..2f8755241b3c2 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index e464dfacc4c7e..c3a1b68c82657 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index ed0c627abb943..9c0f78231df9d 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 606b6cb8c5cd7..3668043c4e316 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index cb5c693068114..a97e35dae4ce1 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 3fc9ece3d0e05..092c1c7d83da6 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index d4d0fc3b6f9e6..24370ce56e883 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 99d4e265332f9..045d299277769 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT pom Spark Project Parent POM https://spark.apache.org/ diff --git a/python/pyspark/version.py b/python/pyspark/version.py index 49fe5caabc028..3e5963da87f31 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__: str = "3.3.1" +__version__: str = "3.3.2.dev0" diff --git a/repl/pom.xml b/repl/pom.xml index d5abd10e610c7..68148f637ac0a 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 253a5aeffb521..be3c81fbf949f 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 13b0046c47b4d..fa9fc6473d330 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 1c91ae916bc4b..9354ffda8e46c 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index eeb0ae37fa109..a5c123e47ac0b 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 5c6188add47cf..e1aaf4afa59c0 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index c6754cf57f9dd..9ce6e61a7abc6 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 15ecd5597fcab..6653a4f61dbfe 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 944fd8f58dbed..79219abecf6a8 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 91ab784016069..13eb55d55ebf5 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 0ea392b136b98..1195252ba95c3 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml From fb42c3ecd7395afeb871a9d782d3844eda7f44f4 Mon Sep 17 00:00:00 2001 From: Jiaan Geng Date: Fri, 23 Sep 2022 14:22:09 +0300 Subject: [PATCH 501/535] [SPARK-40535][SQL] Fix bug the buffer of AggregatingAccumulator will not be created if the input rows is empty ### What changes were proposed in this pull request? When `AggregatingAccumulator` serialize aggregate buffer, may throwing NPE. There is one test case could repeat this error. ``` val namedObservation = Observation("named") val df = spark.range(1, 10, 1, 10) val observed_df = df.observe( namedObservation, percentile_approx($"id", lit(0.5), lit(100)).as("percentile_approx_val")) observed_df.collect() ``` throws exception as follows: ``` 13:45:10.976 ERROR org.apache.spark.util.Utils: Exception encountered java.lang.NullPointerException at org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate.getBufferObject(interfaces.scala:641) at org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate.getBufferObject(interfaces.scala:602) at org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate.serializeAggregateBufferInPlace(interfaces.scala:624) at org.apache.spark.sql.execution.AggregatingAccumulator.withBufferSerialized(AggregatingAccumulator.scala:205) at org.apache.spark.sql.execution.AggregatingAccumulator.withBufferSerialized(AggregatingAccumulator.scala:33) at org.apache.spark.util.AccumulatorV2.writeReplace(AccumulatorV2.scala:186) at sun.reflect.GeneratedMethodAccessor3.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at java.io.ObjectStreamClass.invokeWriteReplace(ObjectStreamClass.java:1245) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1136) at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:348) at org.apache.spark.scheduler.DirectTaskResult.$anonfun$writeExternal$2(TaskResult.scala:55) at org.apache.spark.scheduler.DirectTaskResult.$anonfun$writeExternal$2$adapted(TaskResult.scala:55) at scala.collection.Iterator.foreach(Iterator.scala:943) at scala.collection.Iterator.foreach$(Iterator.scala:943) at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) at scala.collection.IterableLike.foreach(IterableLike.scala:74) at scala.collection.IterableLike.foreach$(IterableLike.scala:73) at scala.collection.AbstractIterable.foreach(Iterable.scala:56) at org.apache.spark.scheduler.DirectTaskResult.$anonfun$writeExternal$1(TaskResult.scala:55) at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1456) at org.apache.spark.scheduler.DirectTaskResult.writeExternal(TaskResult.scala:51) at java.io.ObjectOutputStream.writeExternalData(ObjectOutputStream.java:1459) at java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1430) at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178) at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:348) at org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:46) at org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:115) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:663) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) ``` ### Why are the changes needed? Fix a bug. After my investigation, The root cause is the buffer of `AggregatingAccumulator` will not be created if the input rows is empty. ### Does this PR introduce _any_ user-facing change? 'Yes'. Users will see the correct results. ### How was this patch tested? New test case. Closes #37977 from beliefer/SPARK-37203_followup. Authored-by: Jiaan Geng Signed-off-by: Max Gekk (cherry picked from commit 7bbd975f165ec73c17e4604050f0828e3e5b9c0e) Signed-off-by: Max Gekk --- .../spark/sql/execution/AggregatingAccumulator.scala | 12 +++++++----- .../scala/org/apache/spark/sql/DatasetSuite.scala | 2 ++ 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/AggregatingAccumulator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/AggregatingAccumulator.scala index d528e9114baa6..667d1a67b3932 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/AggregatingAccumulator.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/AggregatingAccumulator.scala @@ -199,11 +199,13 @@ class AggregatingAccumulator private( override def withBufferSerialized(): AggregatingAccumulator = { assert(!isAtDriverSide) - var i = 0 - // AggregatingAccumulator runs on executor, we should serialize all TypedImperativeAggregate. - while (i < typedImperatives.length) { - typedImperatives(i).serializeAggregateBufferInPlace(buffer) - i += 1 + if (buffer != null) { + var i = 0 + // AggregatingAccumulator runs on executor, we should serialize all TypedImperativeAggregate. + while (i < typedImperatives.length) { + typedImperatives(i).serializeAggregateBufferInPlace(buffer) + i += 1 + } } this } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index ed4443542e5eb..c65ae966ef69b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -769,6 +769,8 @@ class DatasetSuite extends QueryTest observe(spark.range(100), Map("percentile_approx_val" -> 49)) observe(spark.range(0), Map("percentile_approx_val" -> null)) + observe(spark.range(1, 10), Map("percentile_approx_val" -> 5)) + observe(spark.range(1, 10, 1, 11), Map("percentile_approx_val" -> 5)) } test("sample with replacement") { From 431e90de3271a3e6f4a3bf8746556de9784efea5 Mon Sep 17 00:00:00 2001 From: Frank Yin Date: Fri, 23 Sep 2022 04:23:26 -0700 Subject: [PATCH 502/535] [SPARK-39200][CORE] Make Fallback Storage readFully on content ### What changes were proposed in this pull request? Looks like from bug description, fallback storage doesn't readFully and then cause `org.apache.spark.shuffle.FetchFailedException: Decompression error: Corrupted block detected`. This is an attempt to fix this by read the underlying stream fully. ### Why are the changes needed? Fix a bug documented in SPARK-39200 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Wrote a unit test Closes #37960 from ukby1234/SPARK-39200. Authored-by: Frank Yin Signed-off-by: Dongjoon Hyun (cherry picked from commit 07061f1a07a96f59ae42c9df6110eb784d2f3dab) Signed-off-by: Dongjoon Hyun --- .../spark/storage/FallbackStorage.scala | 2 +- .../spark/storage/FallbackStorageSuite.scala | 90 ++++++++++++++++++- 2 files changed, 90 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala b/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala index e644ffe87e34a..5aa5c6eff7b21 100644 --- a/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala +++ b/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala @@ -193,7 +193,7 @@ private[spark] object FallbackStorage extends Logging { val array = new Array[Byte](size.toInt) val startTimeNs = System.nanoTime() f.seek(offset) - f.read(array) + f.readFully(array) logDebug(s"Took ${(System.nanoTime() - startTimeNs) / (1000 * 1000)}ms") f.close() new NioManagedBuffer(ByteBuffer.wrap(array)) diff --git a/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala b/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala index 3828e9d8297a6..83c9707bfc273 100644 --- a/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/FallbackStorageSuite.scala @@ -16,12 +16,14 @@ */ package org.apache.spark.storage -import java.io.{DataOutputStream, File, FileOutputStream, IOException} +import java.io.{DataOutputStream, File, FileOutputStream, InputStream, IOException} import java.nio.file.Files import scala.concurrent.duration._ +import scala.util.Random import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FSDataInputStream, LocalFileSystem, Path, PositionedReadable, Seekable} import org.mockito.{ArgumentMatchers => mc} import org.mockito.Mockito.{mock, never, verify, when} import org.scalatest.concurrent.Eventually.{eventually, interval, timeout} @@ -107,6 +109,49 @@ class FallbackStorageSuite extends SparkFunSuite with LocalSparkContext { FallbackStorage.read(conf, ShuffleBlockId(1, 2L, 0)) } + test("SPARK-39200: fallback storage APIs - readFully") { + val conf = new SparkConf(false) + .set("spark.app.id", "testId") + .set("spark.hadoop.fs.file.impl", classOf[ReadPartialFileSystem].getName) + .set(SHUFFLE_COMPRESS, false) + .set(STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED, true) + .set(STORAGE_DECOMMISSION_FALLBACK_STORAGE_PATH, + "file://" + Files.createTempDirectory("tmp").toFile.getAbsolutePath + "/") + val fallbackStorage = new FallbackStorage(conf) + val bmm = new BlockManagerMaster(new NoopRpcEndpointRef(conf), null, conf, false) + + val bm = mock(classOf[BlockManager]) + val dbm = new DiskBlockManager(conf, deleteFilesOnStop = false, isDriver = false) + when(bm.diskBlockManager).thenReturn(dbm) + when(bm.master).thenReturn(bmm) + val resolver = new IndexShuffleBlockResolver(conf, bm) + when(bm.migratableResolver).thenReturn(resolver) + + val length = 100000 + val content = new Array[Byte](length) + Random.nextBytes(content) + + val indexFile = resolver.getIndexFile(1, 2L) + tryWithResource(new FileOutputStream(indexFile)) { fos => + val dos = new DataOutputStream(fos) + dos.writeLong(0) + dos.writeLong(length) + } + + val dataFile = resolver.getDataFile(1, 2L) + tryWithResource(new FileOutputStream(dataFile)) { fos => + fos.write(content) + } + + fallbackStorage.copy(ShuffleBlockInfo(1, 2L), bm) + + assert(fallbackStorage.exists(1, ShuffleIndexBlockId(1, 2L, NOOP_REDUCE_ID).name)) + assert(fallbackStorage.exists(1, ShuffleDataBlockId(1, 2L, NOOP_REDUCE_ID).name)) + + val readResult = FallbackStorage.read(conf, ShuffleBlockId(1, 2L, 0)) + assert(readResult.nioByteBuffer().array().sameElements(content)) + } + test("SPARK-34142: fallback storage API - cleanUp") { withTempDir { dir => Seq(true, false).foreach { cleanUp => @@ -289,3 +334,46 @@ class FallbackStorageSuite extends SparkFunSuite with LocalSparkContext { } } } +class ReadPartialInputStream(val in: FSDataInputStream) extends InputStream + with Seekable with PositionedReadable { + override def read: Int = in.read + + override def read(b: Array[Byte], off: Int, len: Int): Int = { + if (len > 1) { + in.read(b, off, len - 1) + } else { + in.read(b, off, len) + } + } + + override def seek(pos: Long): Unit = { + in.seek(pos) + } + + override def getPos: Long = in.getPos + + override def seekToNewSource(targetPos: Long): Boolean = in.seekToNewSource(targetPos) + + override def read(position: Long, buffer: Array[Byte], offset: Int, length: Int): Int = { + if (length > 1) { + in.read(position, buffer, offset, length - 1) + } else { + in.read(position, buffer, offset, length) + } + } + + override def readFully(position: Long, buffer: Array[Byte], offset: Int, length: Int): Unit = { + in.readFully(position, buffer, offset, length) + } + + override def readFully(position: Long, buffer: Array[Byte]): Unit = { + in.readFully(position, buffer) + } +} + +class ReadPartialFileSystem extends LocalFileSystem { + override def open(f: Path): FSDataInputStream = { + val stream = super.open(f) + new FSDataInputStream(new ReadPartialInputStream(stream)) + } +} From 0f99027c2f527f4221822e5863b8b8fe4e3a1cfc Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Fri, 23 Sep 2022 08:57:59 -0700 Subject: [PATCH 503/535] [SPARK-40547][DOCS] Fix dead links in sparkr-vignettes.Rmd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? This PR fix all dead links in sparkr-vignettes.Rmd. ### Why are the changes needed? binary-release-hadoop3.log logs: ``` yumwangLM-SHC-16508156 output % tail -n 30 binary-release-hadoop3.log * this is package ‘SparkR’ version ‘3.3.1’ * package encoding: UTF-8 * checking CRAN incoming feasibility ... NOTE Maintainer: ‘The Apache Software Foundation ’ New submission Package was archived on CRAN CRAN repository db overrides: X-CRAN-Comment: Archived on 2021-06-28 as issues were not corrected in time. Should use tools::R_user_dir(). Found the following (possibly) invalid URLs: URL: https://spark.apache.org/docs/latest/api/R/column_aggregate_functions.html From: inst/doc/sparkr-vignettes.html Status: 404 Message: Not Found URL: https://spark.apache.org/docs/latest/api/R/read.df.html From: inst/doc/sparkr-vignettes.html Status: 404 Message: Not Found URL: https://spark.apache.org/docs/latest/api/R/sparkR.session.html From: inst/doc/sparkr-vignettes.html Status: 404 Message: Not Found * checking package namespace information ... OK * checking package dependencies ...% ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? manual test. Closes #37983 from wangyum/fix-links. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun (cherry picked from commit 7d13d467a88521c81dfbd9453edda444a13e8855) Signed-off-by: Dongjoon Hyun --- R/pkg/vignettes/sparkr-vignettes.Rmd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index 1f3dd13353ffe..4a510763afb6c 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -170,7 +170,7 @@ sparkR.session(sparkHome = "/HOME/spark") ### Spark Session {#SetupSparkSession} -In addition to `sparkHome`, many other options can be specified in `sparkR.session`. For a complete list, see [Starting up: SparkSession](https://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession) and [SparkR API doc](https://spark.apache.org/docs/latest/api/R/sparkR.session.html). +In addition to `sparkHome`, many other options can be specified in `sparkR.session`. For a complete list, see [Starting up: SparkSession](https://spark.apache.org/docs/latest/sparkr.html#starting-up-sparksession) and [SparkR API doc](https://spark.apache.org/docs/latest/api/R/reference/sparkR.session.html). In particular, the following Spark driver properties can be set in `sparkConfig`. @@ -231,7 +231,7 @@ The general method for creating `SparkDataFrame` from data sources is `read.df`. sparkR.session(sparkPackages = "com.databricks:spark-avro_2.12:3.0.0") ``` -We can see how to use data sources using an example CSV input file. For more information please refer to SparkR [read.df](https://spark.apache.org/docs/latest/api/R/read.df.html) API documentation. +We can see how to use data sources using an example CSV input file. For more information please refer to SparkR [read.df](https://spark.apache.org/docs/latest/api/R/reference/read.df.html) API documentation. ```{r, eval=FALSE} df <- read.df(csvPath, "csv", header = "true", inferSchema = "true", na.strings = "NA") ``` @@ -340,7 +340,7 @@ A common flow of grouping and aggregation is 2. Feed the `GroupedData` object to `agg` or `summarize` functions, with some provided aggregation functions to compute a number within each group. -A number of widely used functions are supported to aggregate data after grouping, including `avg`, `count_distinct`, `count`, `first`, `kurtosis`, `last`, `max`, `mean`, `min`, `sd`, `skewness`, `stddev_pop`, `stddev_samp`, `sum_distinct`, `sum`, `var_pop`, `var_samp`, `var`. See the [API doc for aggregate functions](https://spark.apache.org/docs/latest/api/R/column_aggregate_functions.html) linked there. +A number of widely used functions are supported to aggregate data after grouping, including `avg`, `count_distinct`, `count`, `first`, `kurtosis`, `last`, `max`, `mean`, `min`, `sd`, `skewness`, `stddev_pop`, `stddev_samp`, `sum_distinct`, `sum`, `var_pop`, `var_samp`, `var`. See the [API doc for aggregate functions](https://spark.apache.org/docs/latest/api/R/reference/column_aggregate_functions.html) linked there. For example we can compute a histogram of the number of cylinders in the `mtcars` dataset as shown below. From 76b7ea25e155b1786ebc3d82ecebe2f37e926223 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 24 Sep 2022 17:12:16 +0900 Subject: [PATCH 504/535] [SPARK-40322][DOCS][3.3] Fix all dead links in the docs This PR backports https://github.com/apache/spark/pull/37981 to branch-3.3. The original PR description: ### What changes were proposed in this pull request? This PR fixes any dead links in the documentation. ### Why are the changes needed? Correct the document. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? manual test. Closes #37984 from wangyum/branch-3.3-SPARK-40322. Authored-by: Yuming Wang Signed-off-by: Hyukjin Kwon --- docs/ml-classification-regression.md | 34 ++++++------- docs/ml-clustering.md | 10 ++-- docs/ml-collaborative-filtering.md | 2 +- docs/ml-frequent-pattern-mining.md | 4 +- docs/rdd-programming-guide.md | 4 +- docs/sparkr.md | 48 +++++++++---------- docs/sql-getting-started.md | 10 ++-- .../structured-streaming-programming-guide.md | 16 +++---- .../getting_started/quickstart_ps.ipynb | 2 +- 9 files changed, 65 insertions(+), 65 deletions(-) diff --git a/docs/ml-classification-regression.md b/docs/ml-classification-regression.md index bad74cbcf6cc9..c3e1b6b439040 100644 --- a/docs/ml-classification-regression.md +++ b/docs/ml-classification-regression.md @@ -92,7 +92,7 @@ More details on parameters can be found in the [Python API documentation](api/py

        @@ -195,7 +195,7 @@ training summary for evaluating the model.
        -More details on parameters can be found in the [R API documentation](api/R/spark.logit.html). +More details on parameters can be found in the [R API documentation](api/R/reference/spark.logit.html). {% include_example multinomial r/ml/logit.R %}
        @@ -240,7 +240,7 @@ More details on parameters can be found in the [Python API documentation](api/py
        -Refer to the [R API docs](api/R/spark.decisionTree.html) for more details. +Refer to the [R API docs](api/R/reference/spark.decisionTree.html) for more details. {% include_example classification r/ml/decisionTree.R %} @@ -282,7 +282,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.classificatio
        -Refer to the [R API docs](api/R/spark.randomForest.html) for more details. +Refer to the [R API docs](api/R/reference/spark.randomForest.html) for more details. {% include_example classification r/ml/randomForest.R %}
        @@ -323,7 +323,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.classificatio
        -Refer to the [R API docs](api/R/spark.gbt.html) for more details. +Refer to the [R API docs](api/R/reference/spark.gbt.html) for more details. {% include_example classification r/ml/gbt.R %}
        @@ -379,7 +379,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.classificatio
        -Refer to the [R API docs](api/R/spark.mlp.html) for more details. +Refer to the [R API docs](api/R/reference/spark.mlp.html) for more details. {% include_example r/ml/mlp.R %}
        @@ -424,7 +424,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.classificatio
        -Refer to the [R API docs](api/R/spark.svmLinear.html) for more details. +Refer to the [R API docs](api/R/reference/spark.svmLinear.html) for more details. {% include_example r/ml/svmLinear.R %}
        @@ -522,7 +522,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.classificatio
        -Refer to the [R API docs](api/R/spark.naiveBayes.html) for more details. +Refer to the [R API docs](api/R/reference/spark.naiveBayes.html) for more details. {% include_example r/ml/naiveBayes.R %}
        @@ -565,7 +565,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.classificatio
        -Refer to the [R API docs](api/R/spark.fmClassifier.html) for more details. +Refer to the [R API docs](api/R/reference/spark.fmClassifier.html) for more details. Note: At the moment SparkR doesn't support feature scaling. @@ -616,7 +616,7 @@ More details on parameters can be found in the [Python API documentation](api/py
        -More details on parameters can be found in the [R API documentation](api/R/spark.lm.html). +More details on parameters can be found in the [R API documentation](api/R/reference/spark.lm.html). {% include_example r/ml/lm_with_elastic_net.R %}
        @@ -763,7 +763,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.regression.Ge
        -Refer to the [R API docs](api/R/spark.glm.html) for more details. +Refer to the [R API docs](api/R/reference/spark.glm.html) for more details. {% include_example r/ml/glm.R %}
        @@ -805,7 +805,7 @@ More details on parameters can be found in the [Python API documentation](api/py
        -Refer to the [R API docs](api/R/spark.decisionTree.html) for more details. +Refer to the [R API docs](api/R/reference/spark.decisionTree.html) for more details. {% include_example regression r/ml/decisionTree.R %}
        @@ -847,7 +847,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.regression.Ra
        -Refer to the [R API docs](api/R/spark.randomForest.html) for more details. +Refer to the [R API docs](api/R/reference/spark.randomForest.html) for more details. {% include_example regression r/ml/randomForest.R %}
        @@ -888,7 +888,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.regression.GB
        -Refer to the [R API docs](api/R/spark.gbt.html) for more details. +Refer to the [R API docs](api/R/reference/spark.gbt.html) for more details. {% include_example regression r/ml/gbt.R %}
        @@ -982,7 +982,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.regression.AF
        -Refer to the [R API docs](api/R/spark.survreg.html) for more details. +Refer to the [R API docs](api/R/reference/spark.survreg.html) for more details. {% include_example r/ml/survreg.R %}
        @@ -1060,7 +1060,7 @@ Refer to the [`IsotonicRegression` Python docs](api/python/reference/api/pyspark
        -Refer to the [`IsotonicRegression` R API docs](api/R/spark.isoreg.html) for more details on the API. +Refer to the [`IsotonicRegression` R API docs](api/R/reference/spark.isoreg.html) for more details on the API. {% include_example r/ml/isoreg.R %}
        @@ -1103,7 +1103,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.regression.FM
        -Refer to the [R API documentation](api/R/spark.fmRegressor.html) for more details. +Refer to the [R API documentation](api/R/reference/spark.fmRegressor.html) for more details. Note: At the moment SparkR doesn't support feature scaling. diff --git a/docs/ml-clustering.md b/docs/ml-clustering.md index f478776196d42..1d15f61a29da7 100644 --- a/docs/ml-clustering.md +++ b/docs/ml-clustering.md @@ -104,7 +104,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.clustering.KM
        -Refer to the [R API docs](api/R/spark.kmeans.html) for more details. +Refer to the [R API docs](api/R/reference/spark.kmeans.html) for more details. {% include_example r/ml/kmeans.R %}
        @@ -144,7 +144,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.clustering.LD
        -Refer to the [R API docs](api/R/spark.lda.html) for more details. +Refer to the [R API docs](api/R/reference/spark.lda.html) for more details. {% include_example r/ml/lda.R %}
        @@ -185,7 +185,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.clustering.Bi
        -Refer to the [R API docs](api/R/spark.bisectingKmeans.html) for more details. +Refer to the [R API docs](api/R/reference/spark.bisectingKmeans.html) for more details. {% include_example r/ml/bisectingKmeans.R %}
        @@ -274,7 +274,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.clustering.Ga
        -Refer to the [R API docs](api/R/spark.gaussianMixture.html) for more details. +Refer to the [R API docs](api/R/reference/spark.gaussianMixture.html) for more details. {% include_example r/ml/gaussianMixture.R %}
        @@ -321,7 +321,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.clustering.Po
        -Refer to the [R API docs](api/R/spark.powerIterationClustering.html) for more details. +Refer to the [R API docs](api/R/reference/spark.powerIterationClustering.html) for more details. {% include_example r/ml/powerIterationClustering.R %}
        diff --git a/docs/ml-collaborative-filtering.md b/docs/ml-collaborative-filtering.md index ddc90406648a4..8b6d2a1d14c76 100644 --- a/docs/ml-collaborative-filtering.md +++ b/docs/ml-collaborative-filtering.md @@ -195,7 +195,7 @@ als = ALS(maxIter=5, regParam=0.01, implicitPrefs=True,
        -Refer to the [R API docs](api/R/spark.als.html) for more details. +Refer to the [R API docs](api/R/reference/spark.als.html) for more details. {% include_example r/ml/als.R %}
        diff --git a/docs/ml-frequent-pattern-mining.md b/docs/ml-frequent-pattern-mining.md index 6e6ae410cb7d9..58cd29fd8f6ea 100644 --- a/docs/ml-frequent-pattern-mining.md +++ b/docs/ml-frequent-pattern-mining.md @@ -102,7 +102,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.fpm.FPGrowth.
        -Refer to the [R API docs](api/R/spark.fpGrowth.html) for more details. +Refer to the [R API docs](api/R/reference/spark.fpGrowth.html) for more details. {% include_example r/ml/fpm.R %}
        @@ -155,7 +155,7 @@ Refer to the [Python API docs](api/python/reference/api/pyspark.ml.fpm.PrefixSpa
        -Refer to the [R API docs](api/R/spark.prefixSpan.html) for more details. +Refer to the [R API docs](api/R/reference/spark.prefixSpan.html) for more details. {% include_example r/ml/prefixSpan.R %}
        diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md index 4234eb6365f65..7e4664f2a0e97 100644 --- a/docs/rdd-programming-guide.md +++ b/docs/rdd-programming-guide.md @@ -950,7 +950,7 @@ RDD API doc ([Scala](api/scala/org/apache/spark/rdd/RDD.html), [Java](api/java/index.html?org/apache/spark/api/java/JavaRDD.html), [Python](api/python/reference/api/pyspark.RDD.html#pyspark.RDD), - [R](api/R/index.html)) + [R](api/R/reference/index.html)) and pair RDD functions doc ([Scala](api/scala/org/apache/spark/rdd/PairRDDFunctions.html), [Java](api/java/index.html?org/apache/spark/api/java/JavaPairRDD.html)) @@ -1064,7 +1064,7 @@ RDD API doc ([Scala](api/scala/org/apache/spark/rdd/RDD.html), [Java](api/java/index.html?org/apache/spark/api/java/JavaRDD.html), [Python](api/python/reference/api/pyspark.RDD.html#pyspark.RDD), - [R](api/R/index.html)) + [R](api/R/reference/index.html)) and pair RDD functions doc ([Scala](api/scala/org/apache/spark/rdd/PairRDDFunctions.html), diff --git a/docs/sparkr.md b/docs/sparkr.md index 002da5a56fa9e..2e55c7a20c0e0 100644 --- a/docs/sparkr.md +++ b/docs/sparkr.md @@ -175,7 +175,7 @@ people <- read.json(c("./examples/src/main/resources/people.json", "./examples/s {% endhighlight %}
        -The data sources API natively supports CSV formatted input files. For more information please refer to SparkR [read.df](api/R/read.df.html) API documentation. +The data sources API natively supports CSV formatted input files. For more information please refer to SparkR [read.df](api/R/reference/read.df.html) API documentation.
        {% highlight r %} @@ -536,49 +536,49 @@ SparkR supports the following machine learning algorithms currently: #### Classification -* [`spark.logit`](api/R/spark.logit.html): [`Logistic Regression`](ml-classification-regression.html#logistic-regression) -* [`spark.mlp`](api/R/spark.mlp.html): [`Multilayer Perceptron (MLP)`](ml-classification-regression.html#multilayer-perceptron-classifier) -* [`spark.naiveBayes`](api/R/spark.naiveBayes.html): [`Naive Bayes`](ml-classification-regression.html#naive-bayes) -* [`spark.svmLinear`](api/R/spark.svmLinear.html): [`Linear Support Vector Machine`](ml-classification-regression.html#linear-support-vector-machine) -* [`spark.fmClassifier`](api/R/fmClassifier.html): [`Factorization Machines classifier`](ml-classification-regression.html#factorization-machines-classifier) +* [`spark.logit`](api/R/reference/spark.logit.html): [`Logistic Regression`](ml-classification-regression.html#logistic-regression) +* [`spark.mlp`](api/R/reference/spark.mlp.html): [`Multilayer Perceptron (MLP)`](ml-classification-regression.html#multilayer-perceptron-classifier) +* [`spark.naiveBayes`](api/R/reference/spark.naiveBayes.html): [`Naive Bayes`](ml-classification-regression.html#naive-bayes) +* [`spark.svmLinear`](api/R/reference/spark.svmLinear.html): [`Linear Support Vector Machine`](ml-classification-regression.html#linear-support-vector-machine) +* [`spark.fmClassifier`](api/R/reference/fmClassifier.html): [`Factorization Machines classifier`](ml-classification-regression.html#factorization-machines-classifier) #### Regression -* [`spark.survreg`](api/R/spark.survreg.html): [`Accelerated Failure Time (AFT) Survival Model`](ml-classification-regression.html#survival-regression) -* [`spark.glm`](api/R/spark.glm.html) or [`glm`](api/R/glm.html): [`Generalized Linear Model (GLM)`](ml-classification-regression.html#generalized-linear-regression) -* [`spark.isoreg`](api/R/spark.isoreg.html): [`Isotonic Regression`](ml-classification-regression.html#isotonic-regression) -* [`spark.lm`](api/R/spark.lm.html): [`Linear Regression`](ml-classification-regression.html#linear-regression) -* [`spark.fmRegressor`](api/R/spark.fmRegressor.html): [`Factorization Machines regressor`](ml-classification-regression.html#factorization-machines-regressor) +* [`spark.survreg`](api/R/reference/spark.survreg.html): [`Accelerated Failure Time (AFT) Survival Model`](ml-classification-regression.html#survival-regression) +* [`spark.glm`](api/R/reference/spark.glm.html) or [`glm`](api/R/reference/glm.html): [`Generalized Linear Model (GLM)`](ml-classification-regression.html#generalized-linear-regression) +* [`spark.isoreg`](api/R/reference/spark.isoreg.html): [`Isotonic Regression`](ml-classification-regression.html#isotonic-regression) +* [`spark.lm`](api/R/reference/spark.lm.html): [`Linear Regression`](ml-classification-regression.html#linear-regression) +* [`spark.fmRegressor`](api/R/reference/spark.fmRegressor.html): [`Factorization Machines regressor`](ml-classification-regression.html#factorization-machines-regressor) #### Tree -* [`spark.decisionTree`](api/R/spark.decisionTree.html): `Decision Tree for` [`Regression`](ml-classification-regression.html#decision-tree-regression) `and` [`Classification`](ml-classification-regression.html#decision-tree-classifier) -* [`spark.gbt`](api/R/spark.gbt.html): `Gradient Boosted Trees for` [`Regression`](ml-classification-regression.html#gradient-boosted-tree-regression) `and` [`Classification`](ml-classification-regression.html#gradient-boosted-tree-classifier) -* [`spark.randomForest`](api/R/spark.randomForest.html): `Random Forest for` [`Regression`](ml-classification-regression.html#random-forest-regression) `and` [`Classification`](ml-classification-regression.html#random-forest-classifier) +* [`spark.decisionTree`](api/R/reference/spark.decisionTree.html): `Decision Tree for` [`Regression`](ml-classification-regression.html#decision-tree-regression) `and` [`Classification`](ml-classification-regression.html#decision-tree-classifier) +* [`spark.gbt`](api/R/reference/spark.gbt.html): `Gradient Boosted Trees for` [`Regression`](ml-classification-regression.html#gradient-boosted-tree-regression) `and` [`Classification`](ml-classification-regression.html#gradient-boosted-tree-classifier) +* [`spark.randomForest`](api/R/reference/spark.randomForest.html): `Random Forest for` [`Regression`](ml-classification-regression.html#random-forest-regression) `and` [`Classification`](ml-classification-regression.html#random-forest-classifier) #### Clustering -* [`spark.bisectingKmeans`](api/R/spark.bisectingKmeans.html): [`Bisecting k-means`](ml-clustering.html#bisecting-k-means) -* [`spark.gaussianMixture`](api/R/spark.gaussianMixture.html): [`Gaussian Mixture Model (GMM)`](ml-clustering.html#gaussian-mixture-model-gmm) -* [`spark.kmeans`](api/R/spark.kmeans.html): [`K-Means`](ml-clustering.html#k-means) -* [`spark.lda`](api/R/spark.lda.html): [`Latent Dirichlet Allocation (LDA)`](ml-clustering.html#latent-dirichlet-allocation-lda) -* [`spark.powerIterationClustering (PIC)`](api/R/spark.powerIterationClustering.html): [`Power Iteration Clustering (PIC)`](ml-clustering.html#power-iteration-clustering-pic) +* [`spark.bisectingKmeans`](api/R/reference/spark.bisectingKmeans.html): [`Bisecting k-means`](ml-clustering.html#bisecting-k-means) +* [`spark.gaussianMixture`](api/R/reference/spark.gaussianMixture.html): [`Gaussian Mixture Model (GMM)`](ml-clustering.html#gaussian-mixture-model-gmm) +* [`spark.kmeans`](api/R/reference/spark.kmeans.html): [`K-Means`](ml-clustering.html#k-means) +* [`spark.lda`](api/R/reference/spark.lda.html): [`Latent Dirichlet Allocation (LDA)`](ml-clustering.html#latent-dirichlet-allocation-lda) +* [`spark.powerIterationClustering (PIC)`](api/R/reference/spark.powerIterationClustering.html): [`Power Iteration Clustering (PIC)`](ml-clustering.html#power-iteration-clustering-pic) #### Collaborative Filtering -* [`spark.als`](api/R/spark.als.html): [`Alternating Least Squares (ALS)`](ml-collaborative-filtering.html#collaborative-filtering) +* [`spark.als`](api/R/reference/spark.als.html): [`Alternating Least Squares (ALS)`](ml-collaborative-filtering.html#collaborative-filtering) #### Frequent Pattern Mining -* [`spark.fpGrowth`](api/R/spark.fpGrowth.html) : [`FP-growth`](ml-frequent-pattern-mining.html#fp-growth) -* [`spark.prefixSpan`](api/R/spark.prefixSpan.html) : [`PrefixSpan`](ml-frequent-pattern-mining.html#prefixSpan) +* [`spark.fpGrowth`](api/R/reference/spark.fpGrowth.html) : [`FP-growth`](ml-frequent-pattern-mining.html#fp-growth) +* [`spark.prefixSpan`](api/R/reference/spark.prefixSpan.html) : [`PrefixSpan`](ml-frequent-pattern-mining.html#prefixSpan) #### Statistics -* [`spark.kstest`](api/R/spark.kstest.html): `Kolmogorov-Smirnov Test` +* [`spark.kstest`](api/R/reference/spark.kstest.html): `Kolmogorov-Smirnov Test` Under the hood, SparkR uses MLlib to train the model. Please refer to the corresponding section of MLlib user guide for example code. -Users can call `summary` to print a summary of the fitted model, [predict](api/R/predict.html) to make predictions on new data, and [write.ml](api/R/write.ml.html)/[read.ml](api/R/read.ml.html) to save/load fitted models. +Users can call `summary` to print a summary of the fitted model, [predict](api/R/reference/predict.html) to make predictions on new data, and [write.ml](api/R/reference/write.ml.html)/[read.ml](api/R/reference/read.ml.html) to save/load fitted models. SparkR supports a subset of the available R formula operators for model fitting, including ‘~’, ‘.’, ‘:’, ‘+’, and ‘-‘. diff --git a/docs/sql-getting-started.md b/docs/sql-getting-started.md index 2403d7b2a6c97..69396924e35ed 100644 --- a/docs/sql-getting-started.md +++ b/docs/sql-getting-started.md @@ -41,14 +41,14 @@ The entry point into all functionality in Spark is the [`SparkSession`](api/java
        -The entry point into all functionality in Spark is the [`SparkSession`](api/python/reference/api/pyspark.sql.SparkSession.html) class. To create a basic `SparkSession`, just use `SparkSession.builder`: +The entry point into all functionality in Spark is the [`SparkSession`](api/python/reference/pyspark.sql/api/pyspark.sql.SparkSession.html) class. To create a basic `SparkSession`, just use `SparkSession.builder`: {% include_example init_session python/sql/basic.py %}
        -The entry point into all functionality in Spark is the [`SparkSession`](api/R/sparkR.session.html) class. To initialize a basic `SparkSession`, just call `sparkR.session()`: +The entry point into all functionality in Spark is the [`SparkSession`](api/R/reference/sparkR.session.html) class. To initialize a basic `SparkSession`, just call `sparkR.session()`: {% include_example init_session r/RSparkSQLExample.R %} @@ -104,7 +104,7 @@ As an example, the following creates a DataFrame based on the content of a JSON ## Untyped Dataset Operations (aka DataFrame Operations) -DataFrames provide a domain-specific language for structured data manipulation in [Scala](api/scala/org/apache/spark/sql/Dataset.html), [Java](api/java/index.html?org/apache/spark/sql/Dataset.html), [Python](api/python/reference/api/pyspark.sql.DataFrame.html) and [R](api/R/SparkDataFrame.html). +DataFrames provide a domain-specific language for structured data manipulation in [Scala](api/scala/org/apache/spark/sql/Dataset.html), [Java](api/java/index.html?org/apache/spark/sql/Dataset.html), [Python](api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.html) and [R](api/R/reference/SparkDataFrame.html). As mentioned above, in Spark 2.0, DataFrames are just Dataset of `Row`s in Scala and Java API. These operations are also referred as "untyped transformations" in contrast to "typed transformations" come with strongly typed Scala/Java Datasets. @@ -146,9 +146,9 @@ In addition to simple column references and expressions, DataFrames also have a {% include_example untyped_ops r/RSparkSQLExample.R %} -For a complete list of the types of operations that can be performed on a DataFrame refer to the [API Documentation](api/R/index.html). +For a complete list of the types of operations that can be performed on a DataFrame refer to the [API Documentation](api/R/reference/index.html). -In addition to simple column references and expressions, DataFrames also have a rich library of functions including string manipulation, date arithmetic, common math operations and more. The complete list is available in the [DataFrame Function Reference](api/R/SparkDataFrame.html). +In addition to simple column references and expressions, DataFrames also have a rich library of functions including string manipulation, date arithmetic, common math operations and more. The complete list is available in the [DataFrame Function Reference](api/R/reference/SparkDataFrame.html).
        diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md index 2db4f92842cd6..48ff4b767cc11 100644 --- a/docs/structured-streaming-programming-guide.md +++ b/docs/structured-streaming-programming-guide.md @@ -498,14 +498,14 @@ to track the read position in the stream. The engine uses checkpointing and writ # API using Datasets and DataFrames Since Spark 2.0, DataFrames and Datasets can represent static, bounded data, as well as streaming, unbounded data. Similar to static Datasets/DataFrames, you can use the common entry point `SparkSession` -([Scala](api/scala/org/apache/spark/sql/SparkSession.html)/[Java](api/java/org/apache/spark/sql/SparkSession.html)/[Python](api/python/reference/api/pyspark.sql.SparkSession.html#pyspark.sql.SparkSession)/[R](api/R/sparkR.session.html) docs) +([Scala](api/scala/org/apache/spark/sql/SparkSession.html)/[Java](api/java/org/apache/spark/sql/SparkSession.html)/[Python](api/python/reference/pyspark.sql/api/pyspark.sql.SparkSession.html#pyspark.sql.SparkSession)/[R](api/R/reference/sparkR.session.html) docs) to create streaming DataFrames/Datasets from streaming sources, and apply the same operations on them as static DataFrames/Datasets. If you are not familiar with Datasets/DataFrames, you are strongly advised to familiarize yourself with them using the [DataFrame/Dataset Programming Guide](sql-programming-guide.html). ## Creating streaming DataFrames and streaming Datasets Streaming DataFrames can be created through the `DataStreamReader` interface -([Scala](api/scala/org/apache/spark/sql/streaming/DataStreamReader.html)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamReader.html)/[Python](api/python/reference/api/pyspark.sql.streaming.DataStreamReader.html#pyspark.sql.streaming.DataStreamReader) docs) -returned by `SparkSession.readStream()`. In [R](api/R/read.stream.html), with the `read.stream()` method. Similar to the read interface for creating static DataFrame, you can specify the details of the source – data format, schema, options, etc. +([Scala](api/scala/org/apache/spark/sql/streaming/DataStreamReader.html)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamReader.html)/[Python](api/python/reference/pyspark.ss/api/pyspark.sql.streaming.DataStreamReader.html#pyspark.sql.streaming.DataStreamReader) docs) +returned by `SparkSession.readStream()`. In [R](api/R/reference/read.stream.html), with the `read.stream()` method. Similar to the read interface for creating static DataFrame, you can specify the details of the source – data format, schema, options, etc. #### Input Sources There are a few built-in sources. @@ -560,7 +560,7 @@ Here are the details of all the sources in Spark. NOTE 3: Both delete and move actions are best effort. Failing to delete or move files will not fail the streaming query. Spark may not clean up some source files in some circumstances - e.g. the application doesn't shut down gracefully, too many files are queued to clean up.

        For file-format-specific options, see the related methods in DataStreamReader - (
        Scala/Java/Python/Scala/Java/Python/R). E.g. for "parquet" format options see DataStreamReader.parquet().

        @@ -2003,7 +2003,7 @@ User can increase Spark locality waiting configurations to avoid loading state s ## Starting Streaming Queries Once you have defined the final result DataFrame/Dataset, all that is left is for you to start the streaming computation. To do that, you have to use the `DataStreamWriter` -([Scala](api/scala/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Python](api/python/reference/api/pyspark.sql.streaming.DataStreamWriter.html#pyspark.sql.streaming.DataStreamWriter) docs) +([Scala](api/scala/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Python](api/python/reference/pyspark.ss/api/pyspark.sql.streaming.DataStreamWriter.html#pyspark.sql.streaming.DataStreamWriter) docs) returned through `Dataset.writeStream()`. You will have to specify one or more of the following in this interface. - *Details of the output sink:* Data format, location, etc. @@ -2193,7 +2193,7 @@ Here are the details of all the sinks in Spark. By default it's disabled.

        For file-format-specific options, see the related methods in DataFrameWriter - (Scala/Java/Python/Scala/Java/Python/R). E.g. for "parquet" format options see DataFrameWriter.parquet() @@ -2736,7 +2736,7 @@ Not available in R.
        -For more details, please check the docs for DataStreamReader ([Scala](api/scala/org/apache/spark/sql/streaming/DataStreamReader.html)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamReader.html)/[Python](api/python/reference/api/pyspark.sql.streaming.DataStreamReader.html#pyspark.sql.streaming.DataStreamReader) docs) and DataStreamWriter ([Scala](api/scala/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Python](api/python/reference/api/pyspark.sql.streaming.DataStreamWriter.html#pyspark.sql.streaming.DataStreamWriter) docs). +For more details, please check the docs for DataStreamReader ([Scala](api/scala/org/apache/spark/sql/streaming/DataStreamReader.html)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamReader.html)/[Python](api/python/reference/pyspark.ss/api/pyspark.sql.streaming.DataStreamReader.html#pyspark.sql.streaming.DataStreamReader) docs) and DataStreamWriter ([Scala](api/scala/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Python](api/python/reference/pyspark.ss/api/pyspark.sql.streaming.DataStreamWriter.html#pyspark.sql.streaming.DataStreamWriter) docs). #### Triggers The trigger settings of a streaming query define the timing of streaming data processing, whether @@ -3034,7 +3034,7 @@ lastProgress(query) # the most recent progress update of this streaming qu
        You can start any number of queries in a single SparkSession. They will all be running concurrently sharing the cluster resources. You can use `sparkSession.streams()` to get the `StreamingQueryManager` -([Scala](api/scala/org/apache/spark/sql/streaming/StreamingQueryManager.html)/[Java](api/java/org/apache/spark/sql/streaming/StreamingQueryManager.html)/[Python](api/python/reference/api/pyspark.sql.streaming.StreamingQueryManager.html#pyspark.sql.streaming.StreamingQueryManager) docs) +([Scala](api/scala/org/apache/spark/sql/streaming/StreamingQueryManager.html)/[Java](api/java/org/apache/spark/sql/streaming/StreamingQueryManager.html)/[Python](api/python/reference/pyspark.ss/api/pyspark.sql.streaming.StreamingQueryManager.html#pyspark.sql.streaming.StreamingQueryManager) docs) that can be used to manage the currently active queries.
        diff --git a/python/docs/source/getting_started/quickstart_ps.ipynb b/python/docs/source/getting_started/quickstart_ps.ipynb index 494e08da9ee15..dc47bdfa2c620 100644 --- a/python/docs/source/getting_started/quickstart_ps.ipynb +++ b/python/docs/source/getting_started/quickstart_ps.ipynb @@ -14183,7 +14183,7 @@ "source": [ "### Parquet\n", "\n", - "Parquet is an efficient and compact file format to read and write faster. See [here](https://spark.apache.org/docs/latest/api/python/reference/pyspark.pandas/api/pyspark.pandas.DataFrame.to_paruqet.html) to write a Parquet file and [here](https://spark.apache.org/docs/latest/api/python/reference/pyspark.pandas/api/pyspark.pandas.read_parquet.html) to read a Parquet file." + "Parquet is an efficient and compact file format to read and write faster. See [here](https://spark.apache.org/docs/latest/api/python/reference/pyspark.pandas/api/pyspark.pandas.DataFrame.to_parquet.html) to write a Parquet file and [here](https://spark.apache.org/docs/latest/api/python/reference/pyspark.pandas/api/pyspark.pandas.read_parquet.html) to read a Parquet file." ] }, { From ec5c2d0e7a11bf105383ab6ebc5703b89fa9f724 Mon Sep 17 00:00:00 2001 From: Peter Toth Date: Mon, 26 Sep 2022 13:07:48 +0800 Subject: [PATCH 505/535] [SPARK-38717][SQL][3.3] Handle Hive's bucket spec case preserving behaviour ### What changes were proposed in this pull request? When converting a native table metadata representation `CatalogTable` to `HiveTable` make sure bucket spec uses an existing column. ### Does this PR introduce _any_ user-facing change? Hive metastore seems to be not case preserving with columns but case preserving with bucket spec, which means the following table creation: ``` CREATE TABLE t( c STRING, B_C STRING ) PARTITIONED BY (p_c STRING) CLUSTERED BY (B_C) INTO 4 BUCKETS STORED AS PARQUET ``` followed by a query: ``` SELECT * FROM t ``` fails with: ``` Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Bucket columns B_C is not part of the table columns ([FieldSchema(name:c, type:string, comment:null), FieldSchema(name:b_c, type:string, comment:null)] ``` ### Why are the changes needed? Bug fix. ### How was this patch tested? Added new UT. Closes #37982 from peter-toth/SPARK-38717-handle-upper-case-bucket-spec-3.3. Authored-by: Peter Toth Signed-off-by: Wenchen Fan --- .../spark/sql/hive/HiveExternalCatalog.scala | 6 +-- .../spark/sql/hive/client/HiveClient.scala | 20 ++++++++-- .../sql/hive/client/HiveClientImpl.scala | 37 ++++++++++++++----- .../sql/hive/client/HiveClientSuite.scala | 4 +- .../client/HivePartitionFilteringSuite.scala | 8 ++-- .../sql/hive/execution/SQLQuerySuite.scala | 17 +++++++++ 6 files changed, 70 insertions(+), 22 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index fefa032d35105..15c587b357d82 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -1280,11 +1280,11 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat table: String, predicates: Seq[Expression], defaultTimeZoneId: String): Seq[CatalogTablePartition] = withClient { - val rawTable = getRawTable(db, table) - val catalogTable = restoreTableMetadata(rawTable) + val rawHiveTable = client.getRawHiveTable(db, table) + val catalogTable = restoreTableMetadata(rawHiveTable.toCatalogTable) val partColNameMap = buildLowerCasePartColNameMap(catalogTable) val clientPrunedPartitions = - client.getPartitionsByFilter(rawTable, predicates).map { part => + client.getPartitionsByFilter(rawHiveTable, predicates).map { part => part.copy(spec = restorePartitionSpec(part.spec, partColNameMap)) } prunePartitionsByFilter(catalogTable, clientPrunedPartitions, predicates, defaultTimeZoneId) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala index 8b2bf31007200..58cacfa1d5ded 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala @@ -26,6 +26,11 @@ import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.types.StructType +private[hive] trait RawHiveTable { + def rawTable: Object + def toCatalogTable: CatalogTable +} + /** * An externally visible interface to the Hive client. This interface is shared across both the * internal and external classloaders for a given version of Hive and thus must expose only @@ -93,6 +98,15 @@ private[hive] trait HiveClient { /** Returns the metadata for the specified table or None if it doesn't exist. */ def getTableOption(dbName: String, tableName: String): Option[CatalogTable] + /** Returns the specified catalog and Hive table, or throws `NoSuchTableException`. */ + final def getRawHiveTable(dbName: String, tableName: String): RawHiveTable = { + getRawHiveTableOption(dbName, tableName) + .getOrElse(throw new NoSuchTableException(dbName, tableName)) + } + + /** Returns the metadata for the specified catalog and Hive table or None if it doesn't exist. */ + def getRawHiveTableOption(dbName: String, tableName: String): Option[RawHiveTable] + /** Returns metadata of existing permanent tables/views for given names. */ def getTablesByName(dbName: String, tableNames: Seq[String]): Seq[CatalogTable] @@ -203,12 +217,12 @@ private[hive] trait HiveClient { db: String, table: String, spec: TablePartitionSpec): Option[CatalogTablePartition] = { - getPartitionOption(getTable(db, table), spec) + getPartitionOption(getRawHiveTable(db, table), spec) } /** Returns the specified partition or None if it does not exist. */ def getPartitionOption( - table: CatalogTable, + table: RawHiveTable, spec: TablePartitionSpec): Option[CatalogTablePartition] /** @@ -222,7 +236,7 @@ private[hive] trait HiveClient { /** Returns partitions filtered by predicates for the given table. */ def getPartitionsByFilter( - catalogTable: CatalogTable, + catalogTable: RawHiveTable, predicates: Seq[Expression]): Seq[CatalogTablePartition] /** Loads a static partition into an existing table. */ diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index d70ac781c0395..42b980900c501 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -100,6 +100,10 @@ private[hive] class HiveClientImpl( extends HiveClient with Logging { + private class RawHiveTableImpl(override val rawTable: HiveTable) extends RawHiveTable { + override lazy val toCatalogTable = convertHiveTableToCatalogTable(rawTable) + } + import HiveClientImpl._ // Circular buffer to hold what hive prints to STDOUT and ERR. Only printed when failures occur. @@ -430,6 +434,13 @@ private[hive] class HiveClientImpl( getRawTableOption(dbName, tableName).map(convertHiveTableToCatalogTable) } + override def getRawHiveTableOption( + dbName: String, + tableName: String): Option[RawHiveTable] = withHiveState { + logDebug(s"Looking up $dbName.$tableName") + getRawTableOption(dbName, tableName).map(new RawHiveTableImpl(_)) + } + private def convertHiveTableToCatalogTable(h: HiveTable): CatalogTable = { // Note: Hive separates partition columns and the schema, but for us the // partition columns are part of the schema @@ -674,13 +685,14 @@ private[hive] class HiveClientImpl( specs: Seq[TablePartitionSpec], newSpecs: Seq[TablePartitionSpec]): Unit = withHiveState { require(specs.size == newSpecs.size, "number of old and new partition specs differ") - val catalogTable = getTable(db, table) - val hiveTable = toHiveTable(catalogTable, Some(userName)) + val rawHiveTable = getRawHiveTable(db, table) + val hiveTable = rawHiveTable.rawTable.asInstanceOf[HiveTable] + hiveTable.setOwner(userName) specs.zip(newSpecs).foreach { case (oldSpec, newSpec) => if (shim.getPartition(client, hiveTable, newSpec.asJava, false) != null) { throw new PartitionAlreadyExistsException(db, table, newSpec) } - val hivePart = getPartitionOption(catalogTable, oldSpec) + val hivePart = getPartitionOption(rawHiveTable, oldSpec) .map { p => toHivePartition(p.copy(spec = newSpec), hiveTable) } .getOrElse { throw new NoSuchPartitionException(db, table, oldSpec) } shim.renamePartition(client, hiveTable, oldSpec.asJava, hivePart) @@ -698,7 +710,10 @@ private[hive] class HiveClientImpl( val original = state.getCurrentDatabase try { setCurrentDatabaseRaw(db) - val hiveTable = toHiveTable(getTable(db, table), Some(userName)) + val hiveTable = withHiveState { + getRawTableOption(db, table).getOrElse(throw new NoSuchTableException(db, table)) + } + hiveTable.setOwner(userName) shim.alterPartitions(client, table, newParts.map { toHivePartition(_, hiveTable) }.asJava) } finally { state.setCurrentDatabase(original) @@ -727,9 +742,9 @@ private[hive] class HiveClientImpl( } override def getPartitionOption( - table: CatalogTable, + rawHiveTable: RawHiveTable, spec: TablePartitionSpec): Option[CatalogTablePartition] = withHiveState { - val hiveTable = toHiveTable(table, Some(userName)) + val hiveTable = rawHiveTable.rawTable.asInstanceOf[HiveTable] val hivePartition = shim.getPartition(client, hiveTable, spec.asJava, false) Option(hivePartition).map(fromHivePartition) } @@ -759,11 +774,13 @@ private[hive] class HiveClientImpl( } override def getPartitionsByFilter( - table: CatalogTable, + rawHiveTable: RawHiveTable, predicates: Seq[Expression]): Seq[CatalogTablePartition] = withHiveState { - val hiveTable = toHiveTable(table, Some(userName)) - val parts = shim.getPartitionsByFilter(client, hiveTable, predicates, table) - .map(fromHivePartition) + val hiveTable = rawHiveTable.rawTable.asInstanceOf[HiveTable] + hiveTable.setOwner(userName) + val parts = + shim.getPartitionsByFilter(client, hiveTable, predicates, rawHiveTable.toCatalogTable) + .map(fromHivePartition) HiveCatalogMetrics.incrementFetchedPartitions(parts.length) parts } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala index ad0f9a56a8267..1727fbe240fbc 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala @@ -413,7 +413,7 @@ class HiveClientSuite(version: String, allVersions: Seq[String]) test("getPartitionsByFilter") { // Only one partition [1, 1] for key2 == 1 - val result = client.getPartitionsByFilter(client.getTable("default", "src_part"), + val result = client.getPartitionsByFilter(client.getRawHiveTable("default", "src_part"), Seq(EqualTo(AttributeReference("key2", IntegerType)(), Literal(1)))) // Hive 0.12 doesn't support getPartitionsByFilter, it ignores the filter condition. @@ -437,7 +437,7 @@ class HiveClientSuite(version: String, allVersions: Seq[String]) test("getPartitionOption(table: CatalogTable, spec: TablePartitionSpec)") { val partition = client.getPartitionOption( - client.getTable("default", "src_part"), Map("key1" -> "1", "key2" -> "2")) + client.getRawHiveTable("default", "src_part"), Map("key1" -> "1", "key2" -> "2")) assert(partition.isDefined) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala index e9ab8edf9ad18..efbf0b0b8becb 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala @@ -121,7 +121,7 @@ class HivePartitionFilteringSuite(version: String) test(s"getPartitionsByFilter returns all partitions when $fallbackKey=true") { withSQLConf(fallbackKey -> "true") { val filteredPartitions = clientWithoutDirectSql.getPartitionsByFilter( - clientWithoutDirectSql.getTable("default", "test"), + clientWithoutDirectSql.getRawHiveTable("default", "test"), Seq(attr("ds") === 20170101)) assert(filteredPartitions.size == testPartitionCount) @@ -132,7 +132,7 @@ class HivePartitionFilteringSuite(version: String) withSQLConf(fallbackKey -> "false") { val e = intercept[RuntimeException]( clientWithoutDirectSql.getPartitionsByFilter( - clientWithoutDirectSql.getTable("default", "test"), + clientWithoutDirectSql.getRawHiveTable("default", "test"), Seq(attr("ds") === 20170101))) assert(e.getMessage.contains("Caught Hive MetaException")) } @@ -628,7 +628,7 @@ class HivePartitionFilteringSuite(version: String) test(s"SPARK-35437: getPartitionsByFilter: ds=20170101 when $fallbackKey=true") { withSQLConf(fallbackKey -> "true", pruningFastFallback -> "true") { val filteredPartitions = clientWithoutDirectSql.getPartitionsByFilter( - clientWithoutDirectSql.getTable("default", "test"), + clientWithoutDirectSql.getRawHiveTable("default", "test"), Seq(attr("ds") === 20170101)) assert(filteredPartitions.size == 1 * hValue.size * chunkValue.size * @@ -705,7 +705,7 @@ class HivePartitionFilteringSuite(version: String) filterExpr: Expression, expectedPartitionCubes: Seq[(Seq[Int], Seq[Int], Seq[String], Seq[String], Seq[String])], transform: Expression => Expression): Unit = { - val filteredPartitions = client.getPartitionsByFilter(client.getTable("default", "test"), + val filteredPartitions = client.getPartitionsByFilter(client.getRawHiveTable("default", "test"), Seq( transform(filterExpr) )) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index f2711db839913..aaf918f4a108b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -2694,6 +2694,23 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi } } } + + test("SPARK-38717: Handle Hive's bucket spec case preserving behaviour") { + withTable("t") { + sql( + s""" + |CREATE TABLE t( + | c STRING, + | B_C STRING + |) + |PARTITIONED BY (p_c STRING) + |CLUSTERED BY (B_C) INTO 4 BUCKETS + |STORED AS PARQUET + |""".stripMargin) + val df = sql("SELECT * FROM t") + checkAnswer(df, Seq.empty[Row]) + } + } } @SlowHiveTest From 0eb87211fc12bb221156aaf5775dec2b6a56950e Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 27 Sep 2022 02:14:22 -0700 Subject: [PATCH 506/535] [SPARK-40562][SQL] Add `spark.sql.legacy.groupingIdWithAppendedUserGroupBy` ### What changes were proposed in this pull request? This PR aims to add a new legacy configuration to keep `grouping__id` value like the released Apache Spark 3.2 and 3.3. Please note that this syntax is non-SQL standard and even Hive doesn't support it. ```SQL hive> SELECT version(); OK 3.1.3 r4df4d75bf1e16fe0af75aad0b4179c34c07fc975 Time taken: 0.111 seconds, Fetched: 1 row(s) hive> SELECT count(*), grouping__id from t GROUP BY a GROUPING SETS (b); FAILED: SemanticException 1:63 [Error 10213]: Grouping sets expression is not in GROUP BY key. Error encountered near token 'b' ``` ### Why are the changes needed? SPARK-40218 fixed a bug caused by SPARK-34932 (at Apache Spark 3.2.0). As a side effect, `grouping__id` values are changed. - Apache Spark 3.2.0, 3.2.1, 3.2.2, 3.3.0. ```scala scala> sql("SELECT count(*), grouping__id from (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY k1 GROUPING SETS (k2) ").show() +--------+------------+ |count(1)|grouping__id| +--------+------------+ | 1| 1| | 1| 1| +--------+------------+ ``` - After SPARK-40218, Apache Spark 3.4.0, 3.3.1, 3.2.3 ```scala scala> sql("SELECT count(*), grouping__id from (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY k1 GROUPING SETS (k2) ").show() +--------+------------+ |count(1)|grouping__id| +--------+------------+ | 1| 2| | 1| 2| +--------+------------+ ``` - This PR (Apache Spark 3.4.0, 3.3.1, 3.2.3) ```scala scala> sql("SELECT count(*), grouping__id from (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY k1 GROUPING SETS (k2) ").show() +--------+------------+ |count(1)|grouping__id| +--------+------------+ | 1| 2| | 1| 2| +--------+------------+ scala> sql("set spark.sql.legacy.groupingIdWithAppendedUserGroupBy=true") res1: org.apache.spark.sql.DataFrame = [key: string, value: string]scala> sql("SELECT count(*), grouping__id from (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY k1 GROUPING SETS (k2) ").show() +--------+------------+ |count(1)|grouping__id| +--------+------------+ | 1| 1| | 1| 1| +--------+------------+ ``` ### Does this PR introduce _any_ user-facing change? No, this simply added back the previous behavior by the legacy configuration. ### How was this patch tested? Pass the CIs. Closes #38001 from dongjoon-hyun/SPARK-40562. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 5c0ebf3d97ae49b6e2bd2096c2d590abf4d725bd) Signed-off-by: Dongjoon Hyun --- docs/sql-migration-guide.md | 2 ++ .../sql/catalyst/expressions/grouping.scala | 18 ++++++++++++++---- .../apache/spark/sql/internal/SQLConf.scala | 12 ++++++++++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 4214c2b9aee63..5c46343d99401 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -68,6 +68,8 @@ license: | - Since Spark 3.3, the precision of the return type of round-like functions has been fixed. This may cause Spark throw `AnalysisException` of the `CANNOT_UP_CAST_DATATYPE` error class when using views created by prior versions. In such cases, you need to recreate the views using ALTER VIEW AS or CREATE OR REPLACE VIEW AS with newer Spark versions. + - Since Spark 3.3.1 and 3.2.3, for `SELECT ... GROUP BY a GROUPING SETS (b)`-style SQL statements, `grouping__id` returns different values from Apache Spark 3.2.0, 3.2.1, 3.2.2, and 3.3.0. It computes based on user-given group-by expressions plus grouping set columns. To restore the behavior before 3.3.1 and 3.2.3, you can set `spark.sql.legacy.groupingIdWithAppendedUserGroupBy`. For details, see [SPARK-40218](https://issues.apache.org/jira/browse/SPARK-40218) and [SPARK-40562](https://issues.apache.org/jira/browse/SPARK-40562). + ## Upgrading from Spark SQL 3.1 to 3.2 - Since Spark 3.2, ADD FILE/JAR/ARCHIVE commands require each path to be enclosed by `"` or `'` if the path contains whitespaces. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala index 22e25b31f2e1c..8856a3fe94b84 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala @@ -154,12 +154,22 @@ case class GroupingSets( // Note that, we must put `userGivenGroupByExprs` at the beginning, to preserve the order of // grouping columns specified by users. For example, GROUP BY (a, b) GROUPING SETS (b, a), the // final grouping columns should be (a, b). - override def children: Seq[Expression] = userGivenGroupByExprs ++ flatGroupingSets + override def children: Seq[Expression] = + if (SQLConf.get.groupingIdWithAppendedUserGroupByEnabled) { + flatGroupingSets ++ userGivenGroupByExprs + } else { + userGivenGroupByExprs ++ flatGroupingSets + } + override protected def withNewChildrenInternal( newChildren: IndexedSeq[Expression]): GroupingSets = - copy( - userGivenGroupByExprs = newChildren.take(userGivenGroupByExprs.length), - flatGroupingSets = newChildren.drop(userGivenGroupByExprs.length)) + if (SQLConf.get.groupingIdWithAppendedUserGroupByEnabled) { + super.legacyWithNewChildren(newChildren).asInstanceOf[GroupingSets] + } else { + copy( + userGivenGroupByExprs = newChildren.take(userGivenGroupByExprs.length), + flatGroupingSets = newChildren.drop(userGivenGroupByExprs.length)) + } } object GroupingSets { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 7f41e463d89a2..d9e38ea92586c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3450,6 +3450,15 @@ object SQLConf { .booleanConf .createWithDefault(false) + val LEGACY_GROUPING_ID_WITH_APPENDED_USER_GROUPBY = + buildConf("spark.sql.legacy.groupingIdWithAppendedUserGroupBy") + .internal() + .doc("When true, grouping_id() returns values based on grouping set columns plus " + + "user-given group-by expressions order like Spark 3.2.0, 3.2.1, 3.2.2, and 3.3.0.") + .version("3.2.3") + .booleanConf + .createWithDefault(false) + val PARQUET_INT96_REBASE_MODE_IN_WRITE = buildConf("spark.sql.parquet.int96RebaseModeInWrite") .internal() @@ -4480,6 +4489,9 @@ class SQLConf extends Serializable with Logging { def integerGroupingIdEnabled: Boolean = getConf(SQLConf.LEGACY_INTEGER_GROUPING_ID) + def groupingIdWithAppendedUserGroupByEnabled: Boolean = + getConf(SQLConf.LEGACY_GROUPING_ID_WITH_APPENDED_USER_GROUPBY) + def metadataCacheTTL: Long = getConf(StaticSQLConf.METADATA_CACHE_TTL_SECONDS) def coalesceBucketsInJoinEnabled: Boolean = getConf(SQLConf.COALESCE_BUCKETS_IN_JOIN_ENABLED) From b53106e16151acfd0c065b6999860cda8b0417b2 Mon Sep 17 00:00:00 2001 From: Daniel Ranchal Parrado Date: Tue, 27 Sep 2022 15:25:11 -0700 Subject: [PATCH 507/535] [SPARK-40583][DOCS] Fixing artifactId name in `cloud-integration.md` ### What changes were proposed in this pull request? I am changing the name of the artifactId that enables the integration with several cloud infrastructures. ### Why are the changes needed? The name of the package is wrong and it does not exist. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? It is not needed. Closes #38021 from danitico/fix/SPARK-40583. Authored-by: Daniel Ranchal Parrado Signed-off-by: Dongjoon Hyun (cherry picked from commit dac58f82d1c10fb91f85fd9670f88d88dbe2feea) Signed-off-by: Dongjoon Hyun --- docs/cloud-integration.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/cloud-integration.md b/docs/cloud-integration.md index d65616ed0b8d1..c2b87b356d179 100644 --- a/docs/cloud-integration.md +++ b/docs/cloud-integration.md @@ -105,7 +105,7 @@ is set to the chosen version of Spark: ... org.apache.spark - hadoop-cloud_{{site.SCALA_BINARY_VERSION}} + spark-hadoop-cloud_{{site.SCALA_BINARY_VERSION}} ${spark.version} provided From 367d3e530e299b8cc0d2061ec3f6407b84e1787c Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Tue, 27 Sep 2022 15:32:37 -0700 Subject: [PATCH 508/535] [SPARK-40574][DOCS] Enhance DROP TABLE documentation ### What changes were proposed in this pull request? This PR adds `PURGE` in `DROP TABLE` documentation. Related documentation and code: 1. Hive `DROP TABLE` documentation: https://cwiki.apache.org/confluence/display/hive/languagemanual+ddl image 2. Hive code: https://github.com/apache/hive/blob/rel/release-2.3.9/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java#L1185-L1209 3. Spark code: https://github.com/apache/spark/blob/v3.3.0/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala#L1317-L1327 ### Why are the changes needed? Enhance documentation. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? manual test. Closes #38011 from wangyum/SPARK-40574. Authored-by: Yuming Wang Signed-off-by: Dongjoon Hyun (cherry picked from commit 11eefc81e5c1f3ec7db6df8ba068a7155f7abda3) Signed-off-by: Dongjoon Hyun --- docs/sql-ref-syntax-ddl-drop-table.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/docs/sql-ref-syntax-ddl-drop-table.md b/docs/sql-ref-syntax-ddl-drop-table.md index 6c115fd5f83aa..ce1b033a4eac8 100644 --- a/docs/sql-ref-syntax-ddl-drop-table.md +++ b/docs/sql-ref-syntax-ddl-drop-table.md @@ -31,7 +31,7 @@ If the table is cached, the command uncaches the table and all its dependents. ### Syntax ```sql -DROP TABLE [ IF EXISTS ] table_identifier +DROP TABLE [ IF EXISTS ] table_identifier [ PURGE ] ``` ### Parameter @@ -46,6 +46,10 @@ DROP TABLE [ IF EXISTS ] table_identifier **Syntax:** `[ database_name. ] table_name` +* **PURGE** + + If specified, completely purge the table skipping trash while dropping table(Note: PURGE available in Hive Metastore 0.14.0 and later). + ### Examples ```sql @@ -64,6 +68,9 @@ Error: org.apache.spark.sql.AnalysisException: Table or view not found: employee -- Assumes a table named `employeetable` does not exist,Try with IF EXISTS -- this time it will not throw exception DROP TABLE IF EXISTS employeetable; + +-- Completely purge the table skipping trash. +DROP TABLE employeetable PURGE; ``` ### Related Statements From 7cf579fc8cfc8e6de3acb1d16436ca5024f9e61e Mon Sep 17 00:00:00 2001 From: attilapiros Date: Fri, 30 Sep 2022 14:52:40 -0700 Subject: [PATCH 509/535] [SPARK-40612][CORE] Fixing the principal used for delegation token renewal on non-YARN resource managers ### What changes were proposed in this pull request? When the delegation token is fetched at the first time (see the `fetchDelegationTokens()` call at `HadoopFSDelegationTokenProvider#getTokenRenewalInterval()`) the principal is the current user but at the subsequent token renewals (see `obtainDelegationTokens()` where `getTokenRenewer()` is used to identify the principal) are using a MapReduce/Yarn specific principal even on resource managers different from YARN. This PR fixes `getTokenRenewer()` to use the current user instead of `org.apache.hadoop.mapred.Master.getMasterPrincipal(hadoopConf)` when the resource manager is not YARN. The condition `(master != null && master.contains("yarn"))` is the very same what we already have in `hadoopFSsToAccess()`. I would like to say thank you for squito who have done the investigation regarding of the problem which lead to this PR. ### Why are the changes needed? To avoid `org.apache.hadoop.security.AccessControlException: Permission denied.` for long running applications. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually. Closes #38048 from attilapiros/SPARK-40612. Authored-by: attilapiros Signed-off-by: Dongjoon Hyun (cherry picked from commit 6484992535767ae8dc93df1c79efc66420728155) Signed-off-by: Dongjoon Hyun --- .../security/HadoopFSDelegationTokenProvider.scala | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala index 3120d482f11e1..6ec281f5b4406 100644 --- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala @@ -52,8 +52,8 @@ private[deploy] class HadoopFSDelegationTokenProvider val fsToExclude = sparkConf.get(YARN_KERBEROS_FILESYSTEM_RENEWAL_EXCLUDE) .map(new Path(_).getFileSystem(hadoopConf).getUri.getHost) .toSet - val fetchCreds = fetchDelegationTokens(getTokenRenewer(hadoopConf), fileSystems, creds, - fsToExclude) + val fetchCreds = fetchDelegationTokens(getTokenRenewer(sparkConf, hadoopConf), fileSystems, + creds, fsToExclude) // Get the token renewal interval if it is not set. It will only be called once. if (tokenRenewalInterval == null) { @@ -88,8 +88,13 @@ private[deploy] class HadoopFSDelegationTokenProvider UserGroupInformation.isSecurityEnabled } - private def getTokenRenewer(hadoopConf: Configuration): String = { - val tokenRenewer = Master.getMasterPrincipal(hadoopConf) + private def getTokenRenewer(sparkConf: SparkConf, hadoopConf: Configuration): String = { + val master = sparkConf.get("spark.master", null) + val tokenRenewer = if (master != null && master.contains("yarn")) { + Master.getMasterPrincipal(hadoopConf) + } else { + UserGroupInformation.getCurrentUser().getUserName() + } logDebug("Delegation token renewer is: " + tokenRenewer) if (tokenRenewer == null || tokenRenewer.length() == 0) { From 90a27757ec17c2511049114a437f365326e51225 Mon Sep 17 00:00:00 2001 From: attilapiros Date: Mon, 3 Oct 2022 06:23:51 -0700 Subject: [PATCH 510/535] [SPARK-40617] Fix race condition at the handling of ExecutorMetricsPoller's stageTCMP entries ### What changes were proposed in this pull request? Fix a race condition in ExecutorMetricsPoller between `getExecutorUpdates()` and `onTaskStart()` methods by avoiding removing entries when another stage is not started yet. ### Why are the changes needed? Spurious failures are reported because of the following assert: ``` 22/09/29 09:46:24 ERROR SparkUncaughtExceptionHandler: Uncaught exception in thread Thread[Executor task launch worker for task 3063.0 in stage 1997.0 (TID 677249),5,main] java.lang.AssertionError: assertion failed: task count shouldn't below 0 at scala.Predef$.assert(Predef.scala:223) at org.apache.spark.executor.ExecutorMetricsPoller.decrementCount$1(ExecutorMetricsPoller.scala:130) at org.apache.spark.executor.ExecutorMetricsPoller.$anonfun$onTaskCompletion$3(ExecutorMetricsPoller.scala:135) at java.base/java.util.concurrent.ConcurrentHashMap.computeIfPresent(ConcurrentHashMap.java:1822) at org.apache.spark.executor.ExecutorMetricsPoller.onTaskCompletion(ExecutorMetricsPoller.scala:135) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:737) at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) at java.base/java.lang.Thread.run(Thread.java:829) 22/09/29 09:46:24 INFO MemoryStore: MemoryStore cleared 22/09/29 09:46:24 INFO BlockManager: BlockManager stopped 22/09/29 09:46:24 INFO ShutdownHookManager: Shutdown hook called 22/09/29 09:46:24 INFO ShutdownHookManager: Deleting directory /mnt/yarn/usercache/hadoop/appcache/application_1664443624160_0001/spark-93efc2d4-84de-494b-a3b7-2cb1c3a45426 ``` I have checked the code and the basic assumption to have at least as many `onTaskStart()` calls as `onTaskCompletion()` for the same `stageId` & `stageAttemptId` pair is correct. But there is race condition between `getExecutorUpdates()` and `onTaskStart()`. First of all we have two different threads: - task runner: to execute the task and informs `ExecutorMetricsPoller` about task starts and completion - heartbeater: which uses the `ExecutorMetricsPoller` to get the metrics To show the race condition assume a task just finished which was running on its own (no other tasks was running). So this will decrease the `count` from 1 to 0. On the task runner thread let say a new task starts. So the execution is in the `onTaskStart()` method let's assume the `countAndPeaks` is already computed and here the counter is 0 but the execution is still before incrementing the counter. So we are in between the following two lines: ```scala val countAndPeaks = stageTCMP.computeIfAbsent((stageId, stageAttemptId), _ => TCMP(new AtomicLong(0), new AtomicLongArray(ExecutorMetricType.numMetrics))) val stageCount = countAndPeaks.count.incrementAndGet() ``` Let's look at the other thread (heartbeater) where the `getExecutorUpdates()` is running and it is at the `removeIfInactive()` method: ```scala def removeIfInactive(k: StageKey, v: TCMP): TCMP = { if (v.count.get == 0) { logDebug(s"removing (${k._1}, ${k._2}) from stageTCMP") null } else { v } } ``` And here this entry is removed from `stageTCMP` as the count is 0. Let's go back to the task runner thread where we increase the counter to 1 but that value will be lost as we have no entry in the `stageTCMP` for this stage and attempt. So if a new task comes instead of 2 we will have 1 in the `stageTCMP` and when those two tasks finishes the second one will decrease the counter from 0 to -1. This is when the assert raised. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Unit test. I managed to reproduce the issue with a temporary test: ```scala test("reproduce assert failure") { val testMemoryManager = new TestMemoryManager(new SparkConf()) val taskId = new AtomicLong(0) val runFlag = new AtomicBoolean(true) val poller = new ExecutorMetricsPoller(testMemoryManager, 1000, None) val callUpdates = new Thread("getExecutorOpdates") { override def run() { while (runFlag.get()) { poller.getExecutorUpdates.size } } } val taskStartRunner1 = new Thread("taskRunner1") { override def run() { while (runFlag.get) { var l = taskId.incrementAndGet() poller.onTaskStart(l, 0, 0) poller.onTaskCompletion(l, 0, 0) } } } val taskStartRunner2 = new Thread("taskRunner2") { override def run() { while (runFlag.get) { var l = taskId.incrementAndGet() poller.onTaskStart(l, 0, 0) poller.onTaskCompletion(l, 0, 0) } } } val taskStartRunner3 = new Thread("taskRunner3") { override def run() { while (runFlag.get) { var l = taskId.incrementAndGet() var m = taskId.incrementAndGet() poller.onTaskStart(l, 0, 0) poller.onTaskStart(m, 0, 0) poller.onTaskCompletion(l, 0, 0) poller.onTaskCompletion(m, 0, 0) } } } callUpdates.start() taskStartRunner1.start() taskStartRunner2.start() taskStartRunner3.start() Thread.sleep(1000 * 20) runFlag.set(false) callUpdates.join() taskStartRunner1.join() taskStartRunner2.join() taskStartRunner3.join() } ``` The assert which raised is: ``` Exception in thread "taskRunner3" java.lang.AssertionError: assertion failed: task count shouldn't below 0 at scala.Predef$.assert(Predef.scala:223) at org.apache.spark.executor.ExecutorMetricsPoller.decrementCount$1(ExecutorMetricsPoller.scala:130) at org.apache.spark.executor.ExecutorMetricsPoller.$anonfun$onTaskCompletion$3(ExecutorMetricsPoller.scala:135) at java.base/java.util.concurrent.ConcurrentHashMap.computeIfPresent(ConcurrentHashMap.java:1828) at org.apache.spark.executor.ExecutorMetricsPoller.onTaskCompletion(ExecutorMetricsPoller.scala:135) at org.apache.spark.executor.ExecutorMetricsPollerSuite$$anon$4.run(ExecutorMetricsPollerSuite.scala:64) ``` But when I switch off `removeIfInactive()` by using the following code: ```scala if (false && v.count.get == 0) { logDebug(s"removing (${k._1}, ${k._2}) from stageTCMP") null } else { v } ``` Then no assert is raised. Closes #38056 from attilapiros/SPARK-40617. Authored-by: attilapiros Signed-off-by: attilapiros (cherry picked from commit 564a51b64e71f7402c2674de073b3b18001df56f) Signed-off-by: attilapiros --- .../executor/ExecutorMetricsPoller.scala | 21 +++++++++++-------- .../executor/ExecutorMetricsPollerSuite.scala | 4 ++-- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsPoller.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsPoller.scala index 0cdb306af74a3..69182dbc27f4e 100644 --- a/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsPoller.scala +++ b/core/src/main/scala/org/apache/spark/executor/ExecutorMetricsPoller.scala @@ -18,7 +18,7 @@ package org.apache.spark.executor import java.lang.Long.{MAX_VALUE => LONG_MAX_VALUE} import java.util.concurrent.{ConcurrentHashMap, TimeUnit} -import java.util.concurrent.atomic.{AtomicLong, AtomicLongArray} +import java.util.concurrent.atomic.AtomicLongArray import scala.collection.mutable.HashMap @@ -53,7 +53,7 @@ private[spark] class ExecutorMetricsPoller( type StageKey = (Int, Int) // Task Count and Metric Peaks - private[executor] case class TCMP(count: AtomicLong, peaks: AtomicLongArray) + private[executor] case class TCMP(count: Long, peaks: AtomicLongArray) // Map of (stageId, stageAttemptId) to (count of running tasks, executor metric peaks) private[executor] val stageTCMP = new ConcurrentHashMap[StageKey, TCMP] @@ -112,10 +112,13 @@ private[spark] class ExecutorMetricsPoller( // Put a new entry in stageTCMP for the stage if there isn't one already. // Increment the task count. - val countAndPeaks = stageTCMP.computeIfAbsent((stageId, stageAttemptId), - _ => TCMP(new AtomicLong(0), new AtomicLongArray(ExecutorMetricType.numMetrics))) - val stageCount = countAndPeaks.count.incrementAndGet() - logDebug(s"stageTCMP: ($stageId, $stageAttemptId) -> $stageCount") + val countAndPeaks = stageTCMP.compute((stageId, stageAttemptId), (k: StageKey, v: TCMP) => + if (v == null) { + TCMP(1L, new AtomicLongArray(ExecutorMetricType.numMetrics)) + } else { + TCMP(v.count + 1, v.peaks) + }) + logDebug(s"stageTCMP: ($stageId, $stageAttemptId) -> ${countAndPeaks.count}") } /** @@ -126,10 +129,10 @@ private[spark] class ExecutorMetricsPoller( // Decrement the task count. def decrementCount(stage: StageKey, countAndPeaks: TCMP): TCMP = { - val countValue = countAndPeaks.count.decrementAndGet() + val countValue = countAndPeaks.count - 1 assert(countValue >= 0, "task count shouldn't below 0") logDebug(s"stageTCMP: (${stage._1}, ${stage._2}) -> " + countValue) - countAndPeaks + TCMP(countValue, countAndPeaks.peaks) } stageTCMP.computeIfPresent((stageId, stageAttemptId), decrementCount) @@ -172,7 +175,7 @@ private[spark] class ExecutorMetricsPoller( stageTCMP.replaceAll(getUpdateAndResetPeaks) def removeIfInactive(k: StageKey, v: TCMP): TCMP = { - if (v.count.get == 0) { + if (v.count == 0) { logDebug(s"removing (${k._1}, ${k._2}) from stageTCMP") null } else { diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorMetricsPollerSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorMetricsPollerSuite.scala index e471864ae240f..11593a0472019 100644 --- a/core/src/test/scala/org/apache/spark/executor/ExecutorMetricsPollerSuite.scala +++ b/core/src/test/scala/org/apache/spark/executor/ExecutorMetricsPollerSuite.scala @@ -30,13 +30,13 @@ class ExecutorMetricsPollerSuite extends SparkFunSuite { // stage (0, 0) has an active task, so it remains on stageTCMP after heartbeat. assert(poller.getExecutorUpdates.size === 1) assert(poller.stageTCMP.size === 1) - assert(poller.stageTCMP.get((0, 0)).count.get === 1) + assert(poller.stageTCMP.get((0, 0)).count === 1) poller.onTaskCompletion(0L, 0, 0) // stage (0, 0) doesn't have active tasks, but its entry will be kept until next // heartbeat. assert(poller.stageTCMP.size === 1) - assert(poller.stageTCMP.get((0, 0)).count.get === 0) + assert(poller.stageTCMP.get((0, 0)).count === 0) // the next heartbeat will report the peak metrics of stage (0, 0) during the // previous heartbeat interval, then remove it from stageTCMP. From 2adee979a025c7ba2422f6cedda6c2d697054f2e Mon Sep 17 00:00:00 2001 From: Warren Zhu Date: Tue, 4 Oct 2022 13:38:17 -0700 Subject: [PATCH 511/535] [SPARK-40636][CORE] Fix wrong remained shuffles log in BlockManagerDecommissioner ### What changes were proposed in this pull request Fix wrong remained shuffles log in BlockManagerDecommissioner ### Why are the changes needed? BlockManagerDecommissioner should log correct remained shuffles. Current log used all shuffles num as remained. ``` 4 of 24 local shuffles are added. In total, 24 shuffles are remained. 2022-09-30 17:42:15.035 PDT 0 of 24 local shuffles are added. In total, 24 shuffles are remained. 2022-09-30 17:42:45.069 PDT 0 of 24 local shuffles are added. In total, 24 shuffles are remained. ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manually tested Closes #38078 from warrenzhu25/deco-log. Authored-by: Warren Zhu Signed-off-by: Dongjoon Hyun (cherry picked from commit b39f2d6acf25726d99bf2c2fa84ba6a227d0d909) Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/storage/BlockManagerDecommissioner.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala index cb01faf7d401d..ecd64b6695a30 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala @@ -280,8 +280,9 @@ private[storage] class BlockManagerDecommissioner( .sortBy(b => (b.shuffleId, b.mapId)) shufflesToMigrate.addAll(newShufflesToMigrate.map(x => (x, 0)).asJava) migratingShuffles ++= newShufflesToMigrate + val remainedShuffles = migratingShuffles.size - numMigratedShuffles.get() logInfo(s"${newShufflesToMigrate.size} of ${localShuffles.size} local shuffles " + - s"are added. In total, ${migratingShuffles.size} shuffles are remained.") + s"are added. In total, $remainedShuffles shuffles are remained.") // Update the threads doing migrations val livePeerSet = bm.getPeers(false).toSet From d7b78a353f39e85f9426218de495b1774ace4e22 Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Tue, 4 Oct 2022 13:40:37 -0700 Subject: [PATCH 512/535] [SPARK-40648][YARN][TESTS][3.3] Add @ExtendedLevelDBTest to LevelDB relevant tests in the yarn module MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? SPARK-40490 make the test case related to `YarnShuffleIntegrationSuite` starts to verify the `registeredExecFile` reload test scenario again,so this pr add `ExtendedLevelDBTest` to `LevelDB` relevant tests in the `yarn` module so that the `MacOs/Apple Silicon` can skip the tests through `-Dtest.exclude.tags=org.apache.spark.tags.ExtendedLevelDBTest`. ### Why are the changes needed? According to convention, Add `ExtendedLevelDBTest` to LevelDB relevant tests to make `yarn` module can skip these tests through `-Dtest.exclude.tags=org.apache.spark.tags.ExtendedLevelDBTest` on `MacOs/Apple Silicon`. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Pass GitHub Actions - Manual test on `MacOs/Apple Silicon` ``` build/sbt clean "yarn/testOnly *YarnShuffleIntegrationSuite*" -Pyarn -Dtest.exclude.tags=org.apache.spark.tags.ExtendedLevelDBTest build/sbt clean "yarn/testOnly *YarnShuffleAuthSuite*" -Pyarn -Dtest.exclude.tags=org.apache.spark.tags.ExtendedLevelDBTest build/sbt clean "yarn/testOnly *YarnShuffleAlternateNameConfigSuite*" -Pyarn -Dtest.exclude.tags=org.apache.spark.tags.ExtendedLevelDBTest ``` **Before** All 3 case aborted as follows ``` [info] YarnShuffleIntegrationSuite: [info] org.apache.spark.deploy.yarn.YarnShuffleIntegrationSuite *** ABORTED *** (1 second, 144 milliseconds) [info] java.lang.UnsatisfiedLinkError: Could not load library. Reasons: [no leveldbjni64-1.8 in java.library.path, no leveldbjni-1.8 in java.library.path, no leveldbjni in java.library.path, /Users/yangjie01/SourceCode/git/spark-source/target/tmp/libleveldbjni-64-1-7065283280142546801.8: dlopen(/Users/yangjie01/SourceCode/git/spark-source/target/tmp/libleveldbjni-64-1-7065283280142546801.8, 1): no suitable image found. Did find: [info] /Users/yangjie01/SourceCode/git/spark-source/target/tmp/libleveldbjni-64-1-7065283280142546801.8: no matching architecture in universal wrapper [info] /Users/yangjie01/SourceCode/git/spark-source/target/tmp/libleveldbjni-64-1-7065283280142546801.8: no matching architecture in universal wrapper] [info] at org.fusesource.hawtjni.runtime.Library.doLoad(Library.java:182) [info] at org.fusesource.hawtjni.runtime.Library.load(Library.java:140) [info] at org.fusesource.leveldbjni.JniDBFactory.(JniDBFactory.java:48) [info] at org.apache.spark.network.util.LevelDBProvider.initLevelDB(LevelDBProvider.java:48) [info] at org.apache.spark.network.shuffle.ExternalShuffleBlockResolver.(ExternalShuffleBlockResolver.java:126) [info] at org.apache.spark.network.shuffle.ExternalShuffleBlockResolver.(ExternalShuffleBlockResolver.java:99) [info] at org.apache.spark.network.shuffle.ExternalBlockHandler.(ExternalBlockHandler.java:90) [info] at org.apache.spark.network.yarn.YarnShuffleService.serviceInit(YarnShuffleService.java:247) [info] at org.apache.hadoop.service.AbstractService.init(AbstractService.java:164) [info] at org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServices.initAuxService(AuxServices.java:475) [info] at org.apache.hadoop.yarn.server.nodemanager.containermanager.AuxServices.serviceInit(AuxServices.java:758) [info] at org.apache.hadoop.service.AbstractService.init(AbstractService.java:164) [info] at org.apache.hadoop.service.CompositeService.serviceInit(CompositeService.java:109) [info] at org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl.serviceInit(ContainerManagerImpl.java:327) [info] at org.apache.hadoop.service.AbstractService.init(AbstractService.java:164) [info] at org.apache.hadoop.service.CompositeService.serviceInit(CompositeService.java:109) [info] at org.apache.hadoop.yarn.server.nodemanager.NodeManager.serviceInit(NodeManager.java:494) [info] at org.apache.hadoop.service.AbstractService.init(AbstractService.java:164) [info] at org.apache.hadoop.yarn.server.MiniYARNCluster$NodeManagerWrapper.serviceInit(MiniYARNCluster.java:597) [info] at org.apache.hadoop.service.AbstractService.init(AbstractService.java:164) [info] at org.apache.hadoop.service.CompositeService.serviceInit(CompositeService.java:109) [info] at org.apache.hadoop.yarn.server.MiniYARNCluster.serviceInit(MiniYARNCluster.java:327) [info] at org.apache.hadoop.service.AbstractService.init(AbstractService.java:164) [info] at org.apache.spark.deploy.yarn.BaseYarnClusterSuite.beforeAll(BaseYarnClusterSuite.scala:111) [info] at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:212) [info] at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210) [info] at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208) [info] at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:64) [info] at org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:318) [info] at org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:513) [info] at sbt.ForkMain$Run.lambda$runTest$1(ForkMain.java:413) [info] at java.util.concurrent.FutureTask.run(FutureTask.java:266) [info] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) [info] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) [info] at java.lang.Thread.run(Thread.java:750) ``` **After** All 3 case as follows: ``` [info] YarnShuffleAlternateNameConfigSuite: [info] Run completed in 1 second, 288 milliseconds. [info] Total number of tests run: 0 [info] Suites: completed 1, aborted 0 [info] Tests: succeeded 0, failed 0, ``` Closes #38096 from LuciferYang/SPARK-40648-33. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun --- .../deploy/yarn/YarnShuffleAlternateNameConfigSuite.scala | 3 ++- .../spark/deploy/yarn/YarnShuffleIntegrationSuite.scala | 4 +++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleAlternateNameConfigSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleAlternateNameConfigSuite.scala index db001a946fddf..55ae7a4769bc5 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleAlternateNameConfigSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleAlternateNameConfigSuite.scala @@ -24,12 +24,13 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.spark._ import org.apache.spark.internal.config._ import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor} -import org.apache.spark.tags.ExtendedYarnTest +import org.apache.spark.tags.{ExtendedLevelDBTest, ExtendedYarnTest} /** * SPARK-34828: Integration test for the external shuffle service with an alternate name and * configs (by using a configuration overlay) */ +@ExtendedLevelDBTest @ExtendedYarnTest class YarnShuffleAlternateNameConfigSuite extends YarnShuffleIntegrationSuite { diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala index a3447d352bb60..c559388de1d7f 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala @@ -33,11 +33,12 @@ import org.apache.spark.internal.config._ import org.apache.spark.internal.config.Network._ import org.apache.spark.network.shuffle.ShuffleTestAccessor import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor} -import org.apache.spark.tags.ExtendedYarnTest +import org.apache.spark.tags.{ExtendedLevelDBTest, ExtendedYarnTest} /** * Integration test for the external shuffle service with a yarn mini-cluster */ +@ExtendedLevelDBTest @ExtendedYarnTest class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite { @@ -86,6 +87,7 @@ class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite { /** * Integration test for the external shuffle service with auth on. */ +@ExtendedLevelDBTest @ExtendedYarnTest class YarnShuffleAuthSuite extends YarnShuffleIntegrationSuite { From 5483607910aba0aaf05d029c6c813faa37cb4731 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20J=C3=B8rgensen?= <47577197+bjornjorgensen@users.noreply.github.com> Date: Tue, 4 Oct 2022 17:50:57 -0700 Subject: [PATCH 513/535] [SPARK-39725][BUILD][3.3] Upgrade `jetty` to 9.4.48.v20220622 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? Upgrade jetty from 9.4.46.v20220331 to 9.4.48.v20220622 [Relase notes](https://github.com/eclipse/jetty.project/releases/tag/jetty-9.4.48.v20220622) ### Why are the changes needed? [CVE-2022-2047](https://nvd.nist.gov/vuln/detail/CVE-2022-2047) and [CVE-2022-2048](https://nvd.nist.gov/vuln/detail/CVE-2022-2048) This is a 7.5 HIGH ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass Github actions Closes #38098 from bjornjorgensen/patch-1. Lead-authored-by: Bjørn Jørgensen <47577197+bjornjorgensen@users.noreply.github.com> Co-authored-by: Bjørn Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 4 ++-- pom.xml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index 8208f90efe6c1..fb9c36a26a134 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -146,7 +146,7 @@ jersey-hk2/2.36//jersey-hk2-2.36.jar jersey-server/2.36//jersey-server-2.36.jar jetty-sslengine/6.1.26//jetty-sslengine-6.1.26.jar jetty-util/6.1.26//jetty-util-6.1.26.jar -jetty-util/9.4.46.v20220331//jetty-util-9.4.46.v20220331.jar +jetty-util/9.4.48.v20220622//jetty-util-9.4.48.v20220622.jar jetty/6.1.26//jetty-6.1.26.jar jline/2.14.6//jline-2.14.6.jar joda-time/2.10.13//joda-time-2.10.13.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 04be0c1d7d647..f6e09eff50aea 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -133,8 +133,8 @@ jersey-container-servlet/2.36//jersey-container-servlet-2.36.jar jersey-hk2/2.36//jersey-hk2-2.36.jar jersey-server/2.36//jersey-server-2.36.jar jettison/1.1//jettison-1.1.jar -jetty-util-ajax/9.4.46.v20220331//jetty-util-ajax-9.4.46.v20220331.jar -jetty-util/9.4.46.v20220331//jetty-util-9.4.46.v20220331.jar +jetty-util-ajax/9.4.48.v20220622//jetty-util-ajax-9.4.48.v20220622.jar +jetty-util/9.4.48.v20220622//jetty-util-9.4.48.v20220622.jar jline/2.14.6//jline-2.14.6.jar joda-time/2.10.13//joda-time-2.10.13.jar jodd-core/3.5.2//jodd-core-3.5.2.jar diff --git a/pom.xml b/pom.xml index 045d299277769..d7ed56329fd67 100644 --- a/pom.xml +++ b/pom.xml @@ -133,7 +133,7 @@ 10.14.2.0 1.12.2 1.7.6 - 9.4.46.v20220331 + 9.4.48.v20220622 4.0.3 0.10.0 2.5.0 From 5dc9ba0d22741173bd122afb387c54d7ca4bfb6d Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 5 Oct 2022 18:01:55 -0700 Subject: [PATCH 514/535] [SPARK-40669][SQL][TESTS] Parameterize `rowsNum` in `InMemoryColumnarBenchmark` This PR aims to parameterize `InMemoryColumnarBenchmark` to accept `rowsNum`. This enables us to benchmark more flexibly. ``` build/sbt "sql/test:runMain org.apache.spark.sql.execution.columnar.InMemoryColumnarBenchmark 1000000" ... [info] Running benchmark: Int In-Memory scan [info] Running case: columnar deserialization + columnar-to-row [info] Stopped after 3 iterations, 444 ms [info] Running case: row-based deserialization [info] Stopped after 3 iterations, 462 ms [info] OpenJDK 64-Bit Server VM 17.0.4+8-LTS on Mac OS X 12.6 [info] Apple M1 Max [info] Int In-Memory scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative [info] -------------------------------------------------------------------------------------------------------------------------- [info] columnar deserialization + columnar-to-row 119 148 26 8.4 118.5 1.0X [info] row-based deserialization 119 154 32 8.4 119.5 1.0X ``` ``` $ build/sbt "sql/test:runMain org.apache.spark.sql.execution.columnar.InMemoryColumnarBenchmark 10000000" ... [info] Running benchmark: Int In-Memory scan [info] Running case: columnar deserialization + columnar-to-row [info] Stopped after 3 iterations, 3855 ms [info] Running case: row-based deserialization [info] Stopped after 3 iterations, 4250 ms [info] OpenJDK 64-Bit Server VM 17.0.4+8-LTS on Mac OS X 12.6 [info] Apple M1 Max [info] Int In-Memory scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative [info] -------------------------------------------------------------------------------------------------------------------------- [info] columnar deserialization + columnar-to-row 1082 1285 199 9.2 108.2 1.0X [info] row-based deserialization 1057 1417 335 9.5 105.7 1.0X ``` ``` $ build/sbt "sql/test:runMain org.apache.spark.sql.execution.columnar.InMemoryColumnarBenchmark 20000000" [info] Running benchmark: Int In-Memory scan [info] Running case: columnar deserialization + columnar-to-row [info] Stopped after 3 iterations, 8482 ms [info] Running case: row-based deserialization [info] Stopped after 3 iterations, 7534 ms [info] OpenJDK 64-Bit Server VM 17.0.4+8-LTS on Mac OS X 12.6 [info] Apple M1 Max [info] Int In-Memory scan: Best Time(ms) Avg Time(ms) Stdev(ms) Rate(M/s) Per Row(ns) Relative [info] -------------------------------------------------------------------------------------------------------------------------- [info] columnar deserialization + columnar-to-row 2261 2828 555 8.8 113.1 1.0X [info] row-based deserialization 1788 2511 1187 11.2 89.4 1.3X ``` No. This is a benchmark test code. Manually. Closes #38114 from dongjoon-hyun/SPARK-40669. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 95cfdc694d3e0b68979cd06b78b52e107aa58a9f) Signed-off-by: Dongjoon Hyun --- .../columnar/InMemoryColumnarBenchmark.scala | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarBenchmark.scala index b975451e13563..55d9fb2731799 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarBenchmark.scala @@ -26,14 +26,15 @@ import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark * {{{ * 1. without sbt: * bin/spark-submit --class - * --jars - * 2. build/sbt "sql/test:runMain " - * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain " + * --jars , + * 2. build/sbt "sql/Test/runMain " + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain + * " * Results will be written to "benchmarks/InMemoryColumnarBenchmark-results.txt". * }}} */ object InMemoryColumnarBenchmark extends SqlBasedBenchmark { - def intCache(rowsNum: Int, numIters: Int): Unit = { + def intCache(rowsNum: Long, numIters: Int): Unit = { val data = spark.range(0, rowsNum, 1, 1).toDF("i").cache() val inMemoryScan = data.queryExecution.executedPlan.collect { @@ -59,8 +60,9 @@ object InMemoryColumnarBenchmark extends SqlBasedBenchmark { } override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { - runBenchmark("Int In-memory") { - intCache(rowsNum = 1000000, numIters = 3) + val rowsNum = if (mainArgs.length > 0) mainArgs(0).toLong else 1000000 + runBenchmark(s"Int In-memory with $rowsNum rows") { + intCache(rowsNum = rowsNum, numIters = 3) } } } From 5fe895a65a4a9d65f81d43af473b5e3a855ed8c8 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 6 Oct 2022 13:02:22 +0800 Subject: [PATCH 515/535] [SPARK-40660][SQL][3.3] Switch to XORShiftRandom to distribute elements ### What changes were proposed in this pull request? Cherry-picked from #38106 and reverted changes in RDD.scala: https://github.com/apache/spark/blob/d2952b671a3579759ad9ce326ed8389f5270fd9f/core/src/main/scala/org/apache/spark/rdd/RDD.scala#L507 ### Why are the changes needed? The number of output files has changed since SPARK-40407. [Some downstream projects](https://github.com/apache/iceberg/blob/c07f2aabc0a1d02f068ecf1514d2479c0fbdd3b0/spark/v3.3/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java#L578-L579) use repartition to determine the number of output files in the test. ``` bin/spark-shell --master "local[2]" spark.range(10).repartition(10).write.mode("overwrite").parquet("/tmp/spark/repartition") ``` Before this PR and after SPARK-40407, the number of output files is 8. After this PR or before SPARK-40407, the number of output files is 10. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #38110 from wangyum/branch-3.3-SPARK-40660. Authored-by: Yuming Wang Signed-off-by: Yuming Wang --- .../execution/exchange/ShuffleExchangeExec.scala | 5 ++--- .../scala/org/apache/spark/sql/DatasetSuite.scala | 14 +++++++++++++- .../adaptive/AdaptiveQueryExecSuite.scala | 4 ++-- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala index 9800a781402d6..964f1d6d518cb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala @@ -17,11 +17,9 @@ package org.apache.spark.sql.execution.exchange -import java.util.Random import java.util.function.Supplier import scala.concurrent.Future -import scala.util.hashing import org.apache.spark._ import org.apache.spark.internal.config @@ -40,6 +38,7 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.StructType import org.apache.spark.util.MutablePair import org.apache.spark.util.collection.unsafe.sort.{PrefixComparators, RecordComparator} +import org.apache.spark.util.random.XORShiftRandom /** * Common trait for all shuffle exchange implementations to facilitate pattern matching. @@ -314,7 +313,7 @@ object ShuffleExchangeExec { // end up being almost the same regardless of the index. substantially scrambling the // seed by hashing will help. Refer to SPARK-21782 for more details. val partitionId = TaskContext.get().partitionId() - var position = new Random(hashing.byteswap32(partitionId)).nextInt(numPartitions) + var position = new XORShiftRandom(partitionId).nextInt(numPartitions) (row: InternalRow) => { // The HashPartitioner will handle the `mod` by the number of partitions position += 1 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index c65ae966ef69b..f5e736621ebbe 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql import java.io.{Externalizable, ObjectInput, ObjectOutput} import java.sql.{Date, Timestamp} +import org.apache.hadoop.fs.{Path, PathFilter} import org.scalatest.Assertions._ import org.scalatest.exceptions.TestFailedException import org.scalatest.prop.TableDrivenPropertyChecks._ @@ -2138,7 +2139,18 @@ class DatasetSuite extends QueryTest test("SPARK-40407: repartition should not result in severe data skew") { val df = spark.range(0, 100, 1, 50).repartition(4) val result = df.mapPartitions(iter => Iterator.single(iter.length)).collect() - assert(result.sorted.toSeq === Seq(19, 25, 25, 31)) + assert(result.sorted.toSeq === Seq(23, 25, 25, 27)) + } + + test("SPARK-40660: Switch to XORShiftRandom to distribute elements") { + withTempDir { dir => + spark.range(10).repartition(10).write.mode(SaveMode.Overwrite).parquet(dir.getCanonicalPath) + val fs = new Path(dir.getAbsolutePath).getFileSystem(spark.sessionState.newHadoopConf()) + val parquetFiles = fs.listStatus(new Path(dir.getAbsolutePath), new PathFilter { + override def accept(path: Path): Boolean = path.getName.endsWith("parquet") + }) + assert(parquetFiles.size === 10) + } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala index 8aaafd05217eb..0055b94fa0662 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala @@ -2090,8 +2090,8 @@ class AdaptiveQueryExecSuite withSQLConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "150") { // partition size [0,258,72,72,72] checkPartitionNumber("SELECT /*+ REBALANCE(c1) */ * FROM v", 2, 4) - // partition size [144,72,144,216,144] - checkPartitionNumber("SELECT /*+ REBALANCE */ * FROM v", 2, 6) + // partition size [144,72,144,72,72,144,72] + checkPartitionNumber("SELECT /*+ REBALANCE */ * FROM v", 6, 7) } // no skewed partition should be optimized From 7c465bc3154cdd0d578f837c9b82e4289caf0b14 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 6 Oct 2022 05:15:03 +0000 Subject: [PATCH 516/535] Preparing Spark release v3.3.1-rc3 --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 39 files changed, 41 insertions(+), 41 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index c1e490df26f4a..0e449e841cf6d 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.2 +Version: 3.3.1 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index eff5e3419be64..32126a5e13820 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 8834464f7f6ac..21bf56094503b 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index bfadba306c5ec..43740354d84d1 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 287355ac07d96..46c875dcb0a06 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 14d41802a8b74..d6d28fe4ec687 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index f6f26a262fd25..a37bc21ca6e54 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index b3b7da8919fc5..817a30e5deea0 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 9c13be8a1f017..99b641a3658c0 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index d5267f3b32d27..0711ecc3e0744 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 667b574c867bc..15eea016135a4 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.3.2-SNAPSHOT -SPARK_VERSION_SHORT: 3.3.2 +SPARK_VERSION: 3.3.1 +SPARK_VERSION_SHORT: 3.3.1 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.15" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.3.2"] + 'facetFilters': ["version:3.3.1"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index f3934614cb810..18b30b092b273 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index fbca1101eae44..e932501b8b834 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 537e4c97b1f9d..72940cb743386 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 1ce0b53014aa7..f079671b8998a 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index e851b0a8b2c79..1b79350397482 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 695154d8ceb3f..83097460edc9d 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 2fcd0e4c2b75d..91e111ee38d10 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index af53c827711c4..e622369eb7250 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index 2f8755241b3c2..a208e03e8bbf3 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index c3a1b68c82657..e464dfacc4c7e 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 9c0f78231df9d..ed0c627abb943 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 3668043c4e316..606b6cb8c5cd7 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index a97e35dae4ce1..cb5c693068114 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 092c1c7d83da6..3fc9ece3d0e05 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 24370ce56e883..d4d0fc3b6f9e6 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/pom.xml b/pom.xml index d7ed56329fd67..ffd9589fc47a2 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 pom Spark Project Parent POM https://spark.apache.org/ diff --git a/python/pyspark/version.py b/python/pyspark/version.py index 3e5963da87f31..49fe5caabc028 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__: str = "3.3.2.dev0" +__version__: str = "3.3.1" diff --git a/repl/pom.xml b/repl/pom.xml index 68148f637ac0a..d5abd10e610c7 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index be3c81fbf949f..253a5aeffb521 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index fa9fc6473d330..13b0046c47b4d 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 9354ffda8e46c..1c91ae916bc4b 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index a5c123e47ac0b..eeb0ae37fa109 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index e1aaf4afa59c0..5c6188add47cf 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 9ce6e61a7abc6..c6754cf57f9dd 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 6653a4f61dbfe..15ecd5597fcab 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 79219abecf6a8..944fd8f58dbed 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 13eb55d55ebf5..91ab784016069 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 1195252ba95c3..0ea392b136b98 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml From 5a23f62806109425869752de9be1b4ab012f9af8 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Thu, 6 Oct 2022 05:15:21 +0000 Subject: [PATCH 517/535] Preparing development version 3.3.2-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 39 files changed, 41 insertions(+), 41 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 0e449e841cf6d..c1e490df26f4a 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.1 +Version: 3.3.2 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 32126a5e13820..eff5e3419be64 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 21bf56094503b..8834464f7f6ac 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 43740354d84d1..bfadba306c5ec 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 46c875dcb0a06..287355ac07d96 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index d6d28fe4ec687..14d41802a8b74 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index a37bc21ca6e54..f6f26a262fd25 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 817a30e5deea0..b3b7da8919fc5 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 99b641a3658c0..9c13be8a1f017 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 0711ecc3e0744..d5267f3b32d27 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 15eea016135a4..667b574c867bc 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.3.1 -SPARK_VERSION_SHORT: 3.3.1 +SPARK_VERSION: 3.3.2-SNAPSHOT +SPARK_VERSION_SHORT: 3.3.2 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.15" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.3.1"] + 'facetFilters': ["version:3.3.2"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index 18b30b092b273..f3934614cb810 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index e932501b8b834..fbca1101eae44 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 72940cb743386..537e4c97b1f9d 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index f079671b8998a..1ce0b53014aa7 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 1b79350397482..e851b0a8b2c79 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 83097460edc9d..695154d8ceb3f 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 91e111ee38d10..2fcd0e4c2b75d 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index e622369eb7250..af53c827711c4 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index a208e03e8bbf3..2f8755241b3c2 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index e464dfacc4c7e..c3a1b68c82657 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index ed0c627abb943..9c0f78231df9d 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 606b6cb8c5cd7..3668043c4e316 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index cb5c693068114..a97e35dae4ce1 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 3fc9ece3d0e05..092c1c7d83da6 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index d4d0fc3b6f9e6..24370ce56e883 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index ffd9589fc47a2..d7ed56329fd67 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT pom Spark Project Parent POM https://spark.apache.org/ diff --git a/python/pyspark/version.py b/python/pyspark/version.py index 49fe5caabc028..3e5963da87f31 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__: str = "3.3.1" +__version__: str = "3.3.2.dev0" diff --git a/repl/pom.xml b/repl/pom.xml index d5abd10e610c7..68148f637ac0a 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 253a5aeffb521..be3c81fbf949f 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 13b0046c47b4d..fa9fc6473d330 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 1c91ae916bc4b..9354ffda8e46c 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index eeb0ae37fa109..a5c123e47ac0b 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 5c6188add47cf..e1aaf4afa59c0 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index c6754cf57f9dd..9ce6e61a7abc6 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 15ecd5597fcab..6653a4f61dbfe 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 944fd8f58dbed..79219abecf6a8 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 91ab784016069..13eb55d55ebf5 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 0ea392b136b98..1195252ba95c3 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.1 + 3.3.2-SNAPSHOT ../pom.xml From 9f8eef8bc7fbb5f9a0fe7a4f5c99da0b59b74c07 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 6 Oct 2022 18:33:19 -0700 Subject: [PATCH 518/535] [SPARK-40682][SQL][TESTS] Set `spark.driver.maxResultSize` to 3g in `SqlBasedBenchmark` ### What changes were proposed in this pull request? This PR aims to set `spark.driver.maxResultSize` to `3g` from the default value, `1g`, in `SqlBasedBenchmark`. ### Why are the changes needed? Apache Spark benchmark is using `4g` JVM memory. We can utilize it for `spark.driver.maxResultSize` in some cases after this PR. ### Does this PR introduce _any_ user-facing change? No. This is a test case and unblocks only the cases which failed before. ### How was this patch tested? N/A Closes #38136 from dongjoon-hyun/SPARK-40682. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit e02d518f11c3a83780170e6b7221520639393729) Signed-off-by: Dongjoon Hyun --- .../spark/sql/execution/benchmark/SqlBasedBenchmark.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SqlBasedBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SqlBasedBenchmark.scala index f84172278bef6..78d6b01580355 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SqlBasedBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SqlBasedBenchmark.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.execution.benchmark import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} +import org.apache.spark.internal.config.MAX_RESULT_SIZE import org.apache.spark.internal.config.UI.UI_ENABLED import org.apache.spark.sql.{Dataset, SparkSession} import org.apache.spark.sql.SaveMode.Overwrite @@ -41,6 +42,7 @@ trait SqlBasedBenchmark extends BenchmarkBase with SQLHelper { .config(SQLConf.SHUFFLE_PARTITIONS.key, 1) .config(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, 1) .config(UI_ENABLED.key, false) + .config(MAX_RESULT_SIZE.key, "3g") .getOrCreate() } From fdc51c73fb08eb2cd234cdaf1032a4e54ff0b1a4 Mon Sep 17 00:00:00 2001 From: Ait Zeouay Amrane Date: Mon, 10 Oct 2022 10:18:51 -0500 Subject: [PATCH 519/535] [SPARK-40705][SQL] Handle case of using mutable array when converting Row to JSON for Scala 2.13 ### What changes were proposed in this pull request? I encountered an issue using Spark while reading JSON files based on a schema it throws every time an exception related to conversion of types. >Note: This issue can be reproduced only with Scala `2.13`, I'm not having this issue with `2.12` ```` Failed to convert value ArraySeq(1, 2, 3) (class of class scala.collection.mutable.ArraySeq$ofRef}) with the type of ArrayType(StringType,true) to JSON. java.lang.IllegalArgumentException: Failed to convert value ArraySeq(1, 2, 3) (class of class scala.collection.mutable.ArraySeq$ofRef}) with the type of ArrayType(StringType,true) to JSON. ```` If I add ArraySeq to the matching cases, the test that I added passed successfully ![image](https://user-images.githubusercontent.com/28459763/194669557-2f13032f-126f-4c2e-bc6d-1a4cfd0a009d.png) With the current code source, the test fails and we have this following error ![image](https://user-images.githubusercontent.com/28459763/194669654-19cefb13-180c-48ac-9206-69d8f672f64c.png) ### Why are the changes needed? If the person is using Scala 2.13, they can't parse an array. Which means they need to fallback to 2.12 to keep the project functioning ### How was this patch tested? I added a sample unit test for the case, but I can add more if you want to. Closes #38154 from Amraneze/fix/spark_40705. Authored-by: Ait Zeouay Amrane Signed-off-by: Sean Owen (cherry picked from commit 9a97f8c62bcd1ad9f34c6318792ae443af46ea85) Signed-off-by: Sean Owen --- .../src/main/scala/org/apache/spark/sql/Row.scala | 2 ++ .../src/test/scala/org/apache/spark/sql/RowTest.scala | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala index 4f6c9a8c703e3..72e1dd94c94da 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala @@ -584,6 +584,8 @@ trait Row extends Serializable { case (i: CalendarInterval, _) => JString(i.toString) case (a: Array[_], ArrayType(elementType, _)) => iteratorToJsonArray(a.iterator, elementType) + case (a: mutable.ArraySeq[_], ArrayType(elementType, _)) => + iteratorToJsonArray(a.iterator, elementType) case (s: Seq[_], ArrayType(elementType, _)) => iteratorToJsonArray(s.iterator, elementType) case (m: Map[String @unchecked, _], MapType(StringType, valueType, _)) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala index 385f749736846..82731cdb220a2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala @@ -17,6 +17,9 @@ package org.apache.spark.sql +import scala.collection.mutable.ArraySeq + +import org.json4s.JsonAST.{JArray, JObject, JString} import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.must.Matchers import org.scalatest.matchers.should.Matchers._ @@ -91,6 +94,14 @@ class RowTest extends AnyFunSpec with Matchers { it("getAs() on type extending AnyVal does not throw exception when value is null") { sampleRowWithoutCol3.getAs[String](sampleRowWithoutCol3.fieldIndex("col1")) shouldBe null } + + it("json should convert a mutable array to JSON") { + val schema = new StructType().add(StructField("list", ArrayType(StringType))) + val values = ArraySeq("1", "2", "3") + val row = new GenericRowWithSchema(Array(values), schema) + val expectedList = JArray(JString("1") :: JString("2") :: JString("3") :: Nil) + row.jsonValue shouldBe new JObject(("list", expectedList) :: Nil) + } } describe("row equals") { From 442ae56a330e114651e9195a16b58c4c9a4a56b7 Mon Sep 17 00:00:00 2001 From: zhouyifan279 Date: Wed, 12 Oct 2022 11:34:43 +0800 Subject: [PATCH 520/535] [SPARK-8731] Beeline doesn't work with -e option when started in background ### What changes were proposed in this pull request? Append jline option "-Djline.terminal=jline.UnsupportedTerminal" to enable the Beeline process to run in background. ### Why are the changes needed? Currently, if we execute spark Beeline in background, the Beeline process stops immediately. image ### Does this PR introduce _any_ user-facing change? User will be able to execute Spark Beeline in background. ### How was this patch tested? 1. Start Spark ThriftServer 2. Execute command `./bin/beeline -u "jdbc:hive2://localhost:10000" -e "select 1;" &` 3. Verify Beeline process output in console: image ### Note Beeline works fine on Windows when backgrounded: ![image](https://user-images.githubusercontent.com/88070094/194743797-7dc4fc21-dec6-4056-8b13-21fc96f1476e.png) Closes #38172 from zhouyifan279/SPARK-8731. Authored-by: zhouyifan279 Signed-off-by: Kent Yao (cherry picked from commit cb0d6ed46acee7271597764e018558b86aa8c29b) Signed-off-by: Kent Yao --- bin/load-spark-env.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/bin/load-spark-env.sh b/bin/load-spark-env.sh index 04adaeed7ac61..fc5e881dd0dfd 100644 --- a/bin/load-spark-env.sh +++ b/bin/load-spark-env.sh @@ -63,3 +63,8 @@ if [ -z "$SPARK_SCALA_VERSION" ]; then export SPARK_SCALA_VERSION=${SCALA_VERSION_2} fi fi + +# Append jline option to enable the Beeline process to run in background. +if [[ ( ! $(ps -o stat= -p $$) =~ "+" ) && ! ( -p /dev/stdin ) ]]; then + export SPARK_BEELINE_OPTS="$SPARK_BEELINE_OPTS -Djline.terminal=jline.UnsupportedTerminal" +fi From 27ca30aaad41e4dd50834d255720fb46a36d9e6d Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Thu, 13 Oct 2022 10:29:59 -0500 Subject: [PATCH 521/535] [SPARK-40782][BUILD] Upgrade `jackson-databind` to 2.13.4.1 ### What changes were proposed in this pull request? This pr aims upgrade `jackson-databind` to 2.13.4.1. ### Why are the changes needed? This is a bug fix version related to [CVE-2022-42003] - https://github.com/FasterXML/jackson-databind/pull/3621 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions Closes #38235 from LuciferYang/SPARK-40782. Authored-by: yangjie01 Signed-off-by: Sean Owen (cherry picked from commit 2a8b2a136d5a705526bb76697596f5ad01ce391d) Signed-off-by: Sean Owen --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index fb9c36a26a134..55515614ab8d5 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -115,7 +115,7 @@ ivy/2.5.0//ivy-2.5.0.jar jackson-annotations/2.13.4//jackson-annotations-2.13.4.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.13.4//jackson-core-2.13.4.jar -jackson-databind/2.13.4//jackson-databind-2.13.4.jar +jackson-databind/2.13.4.1//jackson-databind-2.13.4.1.jar jackson-dataformat-cbor/2.13.4//jackson-dataformat-cbor-2.13.4.jar jackson-dataformat-yaml/2.13.4//jackson-dataformat-yaml-2.13.4.jar jackson-datatype-jsr310/2.13.4//jackson-datatype-jsr310-2.13.4.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index f6e09eff50aea..9fc9dca09b03e 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -105,7 +105,7 @@ ivy/2.5.0//ivy-2.5.0.jar jackson-annotations/2.13.4//jackson-annotations-2.13.4.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.13.4//jackson-core-2.13.4.jar -jackson-databind/2.13.4//jackson-databind-2.13.4.jar +jackson-databind/2.13.4.1//jackson-databind-2.13.4.1.jar jackson-dataformat-cbor/2.13.4//jackson-dataformat-cbor-2.13.4.jar jackson-dataformat-yaml/2.13.4//jackson-dataformat-yaml-2.13.4.jar jackson-datatype-jsr310/2.13.4//jackson-datatype-jsr310-2.13.4.jar diff --git a/pom.xml b/pom.xml index d7ed56329fd67..43f9c30422fd7 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ true 1.9.13 2.13.4 - 2.13.4 + 2.13.4.1 1.1.8.4 1.1.2 2.2.1 From ca606650f11c0bef6a2a92542228f174cddd9d44 Mon Sep 17 00:00:00 2001 From: Chao Sun Date: Sat, 15 Oct 2022 10:20:53 +0800 Subject: [PATCH 522/535] [SPARK-40703][SQL] Introduce shuffle on SinglePartition to improve parallelism ### What changes were proposed in this pull request? This PR fixes a performance regression issue when one side of a join uses `HashPartitioning` with `ShuffleExchange` while the other side uses `SinglePartition`. In this case, Spark will re-shuffle the side with `HashPartitioning` and both sides will end up with only a single partition. This could hurt query performance a lot if the side with `HashPartitioning` contains a lot of input data. ### Why are the changes needed? After SPARK-35703, when Spark sees that one side of the join has `ShuffleExchange` (meaning it needs to be shuffled anyways), and the other side doesn't, it'll try to avoid shuffling the side without `ShuffleExchange`. For instance: ``` ShuffleExchange(HashPartition(200)) <-> HashPartition(150) ``` will be converted into ``` ShuffleExchange(HashPartition(150)) <-> HashPartition(150) ``` However, when the side without `ShuffleExchange` is `SinglePartition`, like the following: ``` ShuffleExchange(HashPartition(150)) <-> SinglePartition ``` Spark will also do the same which causes the left-hand side to only use one partition. This can hurt job parallelism dramatically, especially when using DataSource V2, since `SinglePartition` is used by the V2 scan. On the other hand, it seems DataSource V1 won't be impacted much as it always report `UnknownPartitioning` in this situation. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added new unit tests in `EnsureRequirementsSuite`. Closes #38196 from sunchao/SPARK-40703. Authored-by: Chao Sun Signed-off-by: Yuming Wang (cherry picked from commit bde6423c947dea0c7529bd3a1f8a0be36b970ff5) Signed-off-by: Yuming Wang --- .../plans/physical/partitioning.scala | 3 +- .../spark/sql/catalyst/ShuffleSpecSuite.scala | 2 +- .../exchange/EnsureRequirements.scala | 6 ++- .../spark/sql/execution/PlannerSuite.scala | 4 +- .../exchange/EnsureRequirementsSuite.scala | 45 ++++++++++++++++++- 5 files changed, 52 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala index 69eeab426ed01..209f369f009be 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala @@ -529,7 +529,6 @@ trait ShuffleSpec { * clustering expressions. * * This will only be called when: - * - [[canCreatePartitioning]] returns true. * - [[isCompatibleWith]] returns false on the side where the `clustering` is from. */ def createPartitioning(clustering: Seq[Expression]): Partitioning = @@ -542,7 +541,7 @@ case object SinglePartitionShuffleSpec extends ShuffleSpec { other.numPartitions == 1 } - override def canCreatePartitioning: Boolean = true + override def canCreatePartitioning: Boolean = false override def createPartitioning(clustering: Seq[Expression]): Partitioning = SinglePartition diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala index 7e11d4f68392f..51e7688732265 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala @@ -367,7 +367,7 @@ class ShuffleSpecSuite extends SparkFunSuite with SQLHelper { assert(HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10), distribution) .canCreatePartitioning) } - assert(SinglePartitionShuffleSpec.canCreatePartitioning) + assert(!SinglePartitionShuffleSpec.canCreatePartitioning) withSQLConf(SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION.key -> "false") { assert(ShuffleSpecCollection(Seq( HashShuffleSpec(HashPartitioning(Seq($"a"), 10), distribution), diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala index 67a58da89625e..581fa1475b8a7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala @@ -73,9 +73,13 @@ case class EnsureRequirements( case _ => false }.map(_._2) + // Special case: if all sides of the join are single partition + val allSinglePartition = + childrenIndexes.forall(children(_).outputPartitioning == SinglePartition) + // If there are more than one children, we'll need to check partitioning & distribution of them // and see if extra shuffles are necessary. - if (childrenIndexes.length > 1) { + if (childrenIndexes.length > 1 && !allSinglePartition) { val specs = childrenIndexes.map(i => { val requiredDist = requiredChildDistributions(i) assert(requiredDist.isInstanceOf[ClusteredDistribution], diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala index c3c8959d6e1ca..000bd8c84f64a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala @@ -262,7 +262,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { val numExchanges = collect(plan) { case exchange: ShuffleExchangeExec => exchange }.length - assert(numExchanges === 3) + assert(numExchanges === 5) } { @@ -278,7 +278,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper { val numExchanges = collect(plan) { case exchange: ShuffleExchangeExec => exchange }.length - assert(numExchanges === 3) + assert(numExchanges === 5) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala index 7237cc5f0fa51..d692ba5b17073 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala @@ -433,8 +433,10 @@ class EnsureRequirementsSuite extends SharedSparkSession { exprA :: exprB :: Nil, exprC :: exprD :: Nil, Inner, None, plan1, plan2) EnsureRequirements.apply(smjExec) match { case SortMergeJoinExec(_, _, _, _, - SortExec(_, _, DummySparkPlan(_, _, SinglePartition, _, _), _), - SortExec(_, _, ShuffleExchangeExec(SinglePartition, _, _), _), _) => + SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _), + SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) => + assert(left.numPartitions == 5) + assert(right.numPartitions == 5) case other => fail(other.toString) } @@ -690,6 +692,45 @@ class EnsureRequirementsSuite extends SharedSparkSession { } } + test("SPARK-40703: shuffle for SinglePartitionShuffleSpec") { + withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> 20.toString) { + // We should re-shuffle the side with single partition when the other side is + // `HashPartitioning` with shuffle node, and respect the minimum parallelism. + var plan1: SparkPlan = ShuffleExchangeExec( + outputPartitioning = HashPartitioning(exprA :: Nil, 10), + DummySparkPlan()) + var plan2 = DummySparkPlan(outputPartitioning = SinglePartition) + var smjExec = SortMergeJoinExec(exprA :: Nil, exprC :: Nil, Inner, None, plan1, plan2) + EnsureRequirements.apply(smjExec) match { + case SortMergeJoinExec(leftKeys, rightKeys, _, _, + SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _), + SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) => + assert(leftKeys === Seq(exprA)) + assert(rightKeys === Seq(exprC)) + assert(left.numPartitions == 20) + assert(right.numPartitions == 20) + case other => fail(other.toString) + } + + // We should also re-shuffle the side with only a single partition even the other side does + // not have `ShuffleExchange`, but just `HashPartitioning`. However in this case the minimum + // shuffle parallelism will be ignored since we don't want to introduce extra shuffle. + plan1 = DummySparkPlan( + outputPartitioning = HashPartitioning(exprA :: Nil, 10)) + plan2 = DummySparkPlan(outputPartitioning = SinglePartition) + smjExec = SortMergeJoinExec(exprA :: Nil, exprC :: Nil, Inner, None, plan1, plan2) + EnsureRequirements.apply(smjExec) match { + case SortMergeJoinExec(leftKeys, rightKeys, _, _, + SortExec(_, _, DummySparkPlan(_, _, _: HashPartitioning, _, _), _), + SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) => + assert(leftKeys === Seq(exprA)) + assert(rightKeys === Seq(exprC)) + assert(right.numPartitions == 10) + case other => fail(other.toString) + } + } + } + test("Check with KeyGroupedPartitioning") { // simplest case: identity transforms var plan1 = DummySparkPlan( From fbbcf9434ac070dd4ced4fb9efe32899c6db12a9 Mon Sep 17 00:00:00 2001 From: Yuming Wang Date: Sat, 15 Oct 2022 05:56:02 +0000 Subject: [PATCH 523/535] Preparing Spark release v3.3.1-rc4 --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 39 files changed, 41 insertions(+), 41 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index c1e490df26f4a..0e449e841cf6d 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.2 +Version: 3.3.1 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index eff5e3419be64..32126a5e13820 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 8834464f7f6ac..21bf56094503b 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index bfadba306c5ec..43740354d84d1 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 287355ac07d96..46c875dcb0a06 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 14d41802a8b74..d6d28fe4ec687 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index f6f26a262fd25..a37bc21ca6e54 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index b3b7da8919fc5..817a30e5deea0 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 9c13be8a1f017..99b641a3658c0 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index d5267f3b32d27..0711ecc3e0744 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 667b574c867bc..15eea016135a4 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.3.2-SNAPSHOT -SPARK_VERSION_SHORT: 3.3.2 +SPARK_VERSION: 3.3.1 +SPARK_VERSION_SHORT: 3.3.1 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.15" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.3.2"] + 'facetFilters': ["version:3.3.1"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/examples/pom.xml b/examples/pom.xml index f3934614cb810..18b30b092b273 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/external/avro/pom.xml b/external/avro/pom.xml index fbca1101eae44..e932501b8b834 100644 --- a/external/avro/pom.xml +++ b/external/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 537e4c97b1f9d..72940cb743386 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 1ce0b53014aa7..f079671b8998a 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index e851b0a8b2c79..1b79350397482 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml index 695154d8ceb3f..83097460edc9d 100644 --- a/external/kafka-0-10-token-provider/pom.xml +++ b/external/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 2fcd0e4c2b75d..91e111ee38d10 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index af53c827711c4..e622369eb7250 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index 2f8755241b3c2..a208e03e8bbf3 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index c3a1b68c82657..e464dfacc4c7e 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 9c0f78231df9d..ed0c627abb943 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 3668043c4e316..606b6cb8c5cd7 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index a97e35dae4ce1..cb5c693068114 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 092c1c7d83da6..3fc9ece3d0e05 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 24370ce56e883..d4d0fc3b6f9e6 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/pom.xml b/pom.xml index 43f9c30422fd7..8c1a2b25c0765 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 pom Spark Project Parent POM https://spark.apache.org/ diff --git a/python/pyspark/version.py b/python/pyspark/version.py index 3e5963da87f31..49fe5caabc028 100644 --- a/python/pyspark/version.py +++ b/python/pyspark/version.py @@ -16,4 +16,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__: str = "3.3.2.dev0" +__version__: str = "3.3.1" diff --git a/repl/pom.xml b/repl/pom.xml index 68148f637ac0a..d5abd10e610c7 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index be3c81fbf949f..253a5aeffb521 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index fa9fc6473d330..13b0046c47b4d 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 9354ffda8e46c..1c91ae916bc4b 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index a5c123e47ac0b..eeb0ae37fa109 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index e1aaf4afa59c0..5c6188add47cf 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 9ce6e61a7abc6..c6754cf57f9dd 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 6653a4f61dbfe..15ecd5597fcab 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 79219abecf6a8..944fd8f58dbed 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 13eb55d55ebf5..91ab784016069 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 1195252ba95c3..0ea392b136b98 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.3.2-SNAPSHOT + 3.3.1 ../pom.xml From 860cb0598009eebc1d60b3daca8fbb0ccf8d4ecf Mon Sep 17 00:00:00 2001 From: catalinii Date: Wed, 9 Dec 2020 14:01:51 -0800 Subject: [PATCH 524/535] Log first successful container request (#20) Call internal lyft API using reflection when the executor was successfully requested from K8s --- .../org/apache/spark/util/LyftUtils.scala | 31 ++++++++++++++ .../apache/spark/util/LyftUtilsSuite.scala | 40 +++++++++++++++++++ .../cluster/k8s/ExecutorPodsAllocator.scala | 5 +++ 3 files changed, 76 insertions(+) create mode 100644 core/src/main/scala/org/apache/spark/util/LyftUtils.scala create mode 100644 core/src/test/scala/org/apache/spark/util/LyftUtilsSuite.scala diff --git a/core/src/main/scala/org/apache/spark/util/LyftUtils.scala b/core/src/main/scala/org/apache/spark/util/LyftUtils.scala new file mode 100644 index 0000000000000..d9c8a643ec57d --- /dev/null +++ b/core/src/main/scala/org/apache/spark/util/LyftUtils.scala @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util + +private[spark] object LyftUtils { + def callObjectMethodNoArguments(objectName: String, method: String): Boolean = { + var ok = true + try { + val m = Utils.classForName(objectName).getField("MODULE$").get(null) + Utils.classForName(objectName).getDeclaredMethod(method).invoke(m) + } catch { + case e: Throwable => ok = false + } + ok + } +} diff --git a/core/src/test/scala/org/apache/spark/util/LyftUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/LyftUtilsSuite.scala new file mode 100644 index 0000000000000..7107a270ba742 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/util/LyftUtilsSuite.scala @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util + +import org.apache.spark.{SparkException, SparkFunSuite} +import org.apache.spark.internal.Logging + +object TestObjectLyftUtils { + var testVar = 0L + def setVal() = { + testVar = 1L + } +} + +class LyftUtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging { + + test("callObjectMethodNoArguments") { + // Test calling the method using reflection 1 + val v = LyftUtils.callObjectMethodNoArguments("org.apache.spark.util.TestObjectLyftUtils$", "setVal") + assert(v === true) + assert(TestObjectLyftUtils.testVar === 1) + assert(false == + LyftUtils.callObjectMethodNoArguments("org.apache.spark.util.TestObjectLyftUtils$", "setVal1")) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala index 3519efd3fcb10..bda7650895a24 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala @@ -401,6 +401,11 @@ class ExecutorPodsAllocator( val resources = replacePVCsIfNeeded( podWithAttachedContainer, resolvedExecutorSpec.executorKubernetesResources, reusablePVCs) val createdExecutorPod = kubernetesClient.pods().create(podWithAttachedContainer) + + org.apache.spark.util.LyftUtils.callObjectMethodNoArguments( + "com.lyft.data.spark.AppMetrics$", + "setFirstExecutorAllocationTime") + try { addOwnerReference(createdExecutorPod, resources) resources From db105db167ef892bfbcb0b3ec17110d9868aed6b Mon Sep 17 00:00:00 2001 From: Catalin Toda Date: Wed, 14 Apr 2021 12:36:21 -0700 Subject: [PATCH 525/535] Do not localize s3 files --- .../main/scala/org/apache/spark/deploy/yarn/Client.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 5402c503908ce..ee35e609cf99f 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -553,7 +553,12 @@ private[spark] class Client( val localPath = getQualifiedLocalPath(localURI, hadoopConf) val linkname = targetDir.map(_ + "/").getOrElse("") + destName.orElse(Option(localURI.getFragment())).getOrElse(localPath.getName()) - val destPath = copyFileToRemote(destDir, localPath, replication, symlinkCache) + var destPath = localPath + if (!localPath.toUri.getScheme.startsWith("s3")) { + destPath = copyFileToRemote(destDir, localPath, replication, symlinkCache) + } else { + logInfo(s"Adding binary from location: $destPath to the distributed cache") + } val destFs = FileSystem.get(destPath.toUri(), hadoopConf) distCacheMgr.addResource( destFs, hadoopConf, destPath, localResources, resType, linkname, statCache, @@ -730,7 +735,7 @@ private[spark] class Client( pySparkArchives.foreach { f => val uri = Utils.resolveURI(f) if (uri.getScheme != Utils.LOCAL_SCHEME) { - distribute(f) + distribute(f, LocalResourceType.ARCHIVE) } } From 902996cb087a0d529a0255bf4d9883a0c140c600 Mon Sep 17 00:00:00 2001 From: Daniel Zhi Date: Wed, 14 Apr 2021 11:29:22 -0700 Subject: [PATCH 526/535] Automatic staging committer conflict-mode for dynamic partition overwrite As an attempt to support dynamic partition overwrite using S3A staging committers, we disabled the previous hard-coded exception and dynamically set the value for fs.s3a.committer.staging.conflict-mode so to ensure PartitionedStagingCommitter behaves as expected in both "INSERT INTO" and "INSERT OVERWRITE" scenarios. The details are documented at: https://docs.google.com/document/d/1fH4AtClYDiQt4fU9g-QzcoRu9SxMo8isGuLgvVxZgdc/edit?usp=sharing --- .../internal/io/HadoopMapReduceCommitProtocol.scala | 12 ++++++++++++ .../internal/io/cloud/PathOutputCommitProtocol.scala | 8 ++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala index 3a24da98ecc24..892427ef9ab6b 100644 --- a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala +++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala @@ -183,6 +183,18 @@ class HadoopMapReduceCommitProtocol( jobContext.getConfiguration.setBoolean("mapreduce.task.ismap", true) jobContext.getConfiguration.setInt("mapreduce.task.partition", 0) + // Automatically set conflict-mode based on value of dynamicPartitionOverwrite, + // unless configuration auto-staging-conflict-mode exists with value false. + val autoConflictMode = jobContext.getConfiguration.get( + "spark.internal.io.hmrcp.auto-staging-conflict-mode") + if (autoConflictMode == null || autoConflictMode != "false") { + if (dynamicPartitionOverwrite) { + jobContext.getConfiguration.set("fs.s3a.committer.staging.conflict-mode", "replace") + } else { + jobContext.getConfiguration.set("fs.s3a.committer.staging.conflict-mode", "append") + } + } + val taskAttemptContext = new TaskAttemptContextImpl(jobContext.getConfiguration, taskAttemptId) committer = setupCommitter(taskAttemptContext) committer.setupJob(jobContext) diff --git a/hadoop-cloud/src/hadoop-3/main/scala/org/apache/spark/internal/io/cloud/PathOutputCommitProtocol.scala b/hadoop-cloud/src/hadoop-3/main/scala/org/apache/spark/internal/io/cloud/PathOutputCommitProtocol.scala index fc5d0a0b3a7f5..dc1b1dcc0ea60 100644 --- a/hadoop-cloud/src/hadoop-3/main/scala/org/apache/spark/internal/io/cloud/PathOutputCommitProtocol.scala +++ b/hadoop-cloud/src/hadoop-3/main/scala/org/apache/spark/internal/io/cloud/PathOutputCommitProtocol.scala @@ -17,8 +17,6 @@ package org.apache.spark.internal.io.cloud -import java.io.IOException - import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.TaskAttemptContext import org.apache.hadoop.mapreduce.lib.output.{FileOutputCommitter, PathOutputCommitter, PathOutputCommitterFactory} @@ -51,13 +49,15 @@ class PathOutputCommitProtocol( jobId: String, dest: String, dynamicPartitionOverwrite: Boolean = false) - extends HadoopMapReduceCommitProtocol(jobId, dest, false) with Serializable { + extends HadoopMapReduceCommitProtocol(jobId, dest, dynamicPartitionOverwrite) with Serializable { if (dynamicPartitionOverwrite) { // until there's explicit extensions to the PathOutputCommitProtocols // to support the spark mechanism, it's left to the individual committer // choice to handle partitioning. - throw new IOException(PathOutputCommitProtocol.UNSUPPORTED) + // throw new IOException(PathOutputCommitProtocol.UNSUPPORTED) + // The above exception is disabled with automatic value of fs.s3a.committer.staging.conflict-mode + // in HadoopMapReduceCommitProtocol. } /** The committer created. */ From 005ce7d8b0d19259e94088551a0aa8d5bf7d03d9 Mon Sep 17 00:00:00 2001 From: Catalin Toda Date: Tue, 29 Jun 2021 16:14:42 -0700 Subject: [PATCH 527/535] Backport [SPARK-30707][SQL]Window function set partitionSpec as order spec when orderSpec is empty --- .../spark/sql/catalyst/parser/AstBuilder.scala | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index ecc5360a4f784..0161082b08cd4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1986,7 +1986,19 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit override def visitWindowDef(ctx: WindowDefContext): WindowSpecDefinition = withOrigin(ctx) { // CLUSTER BY ... | PARTITION BY ... ORDER BY ... val partition = ctx.partition.asScala.map(expression) - val order = ctx.sortItem.asScala.map(visitSortItem) + val order = if (ctx.sortItem.asScala.nonEmpty) { + ctx.sortItem.asScala.map(visitSortItem) + } else if (ctx.windowFrame != null && + ctx.windowFrame().frameType.getType == SqlBaseParser.RANGE) { + // for RANGE window frame, we won't add default order spec + ctx.sortItem.asScala.map(visitSortItem) + } else { + // Same default behaviors like hive, when order spec is null + // set partition spec expression as order spec + ctx.partition.asScala.map { expr => + SortOrder(expression(expr), Ascending, Ascending.defaultNullOrdering, Seq.empty) + } + } // RANGE/ROWS BETWEEN ... val frameSpecOption = Option(ctx.windowFrame).map { frame => From 984bf786b8107f580e02bbb48595fc985b911df7 Mon Sep 17 00:00:00 2001 From: catalinii Date: Mon, 16 Aug 2021 09:45:46 -0700 Subject: [PATCH 528/535] [SPARK-28098][SQL]Support read partitioned Hive tables with (#40) --- .../apache/spark/util/LyftUtilsSuite.scala | 2 +- .../apache/spark/sql/internal/SQLConf.scala | 10 ++++++++++ .../datasources/InMemoryFileIndex.scala | 20 ++++++++++++++++++- .../spark/sql/hive/HiveMetastoreCatalog.scala | 15 +++++++++++++- 4 files changed, 44 insertions(+), 3 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/util/LyftUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/LyftUtilsSuite.scala index 7107a270ba742..2a54bce44b220 100644 --- a/core/src/test/scala/org/apache/spark/util/LyftUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/LyftUtilsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.util -import org.apache.spark.{SparkException, SparkFunSuite} +import org.apache.spark.SparkFunSuite import org.apache.spark.internal.Logging object TestObjectLyftUtils { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index d9e38ea92586c..35f50d6a1de7a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3766,6 +3766,13 @@ object SQLConf { .version("3.2.2") .booleanConf .createWithDefault(false) + + val READ_PARTITION_WITH_SUBDIRECTORY_ENABLED = + buildConf("spark.sql.sources.readPartitionWithSubdirectory.enabled") + .doc("When set to true, Spark SQL could read the files of " + + " partitioned hive table from subdirectories under root path of table") + .booleanConf + .createWithDefault(true) /** * Holds information about keys that have been deprecated. @@ -4516,6 +4523,9 @@ class SQLConf extends Serializable with Logging { def decorrelateInnerQueryEnabled: Boolean = getConf(SQLConf.DECORRELATE_INNER_QUERY_ENABLED) def maxConcurrentOutputFileWriters: Int = getConf(SQLConf.MAX_CONCURRENT_OUTPUT_FILE_WRITERS) + + def readPartitionWithSubdirectoryEnabled: Boolean = + getConf(READ_PARTITION_WITH_SUBDIRECTORY_ENABLED) def inferDictAsStruct: Boolean = getConf(SQLConf.INFER_NESTED_DICT_AS_STRUCT) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala index 6c3deee2c3173..c902a9decb303 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala @@ -59,6 +59,9 @@ class InMemoryFileIndex( override val rootPaths = rootPathsSpecified.filterNot(FileStreamSink.ancestorIsMetadataDirectory(_, hadoopConf)) + val readPartitionWithSubdirectoryEnabled = + sparkSession.sessionState.conf.readPartitionWithSubdirectoryEnabled + @volatile private var cachedLeafFiles: mutable.LinkedHashMap[Path, FileStatus] = _ @volatile private var cachedLeafDirToChildrenFiles: Map[Path, Array[FileStatus]] = _ @volatile private var cachedPartitionSpec: PartitionSpec = _ @@ -94,10 +97,25 @@ class InMemoryFileIndex( val files = listLeafFiles(rootPaths) cachedLeafFiles = new mutable.LinkedHashMap[Path, FileStatus]() ++= files.map(f => f.getPath -> f) - cachedLeafDirToChildrenFiles = files.toArray.groupBy(_.getPath.getParent) + cachedLeafDirToChildrenFiles = + if (readPartitionWithSubdirectoryEnabled) { + files.toArray.groupBy(file => getRootPathsLeafDir(file.getPath.getParent, file.getPath)) + } else { + files.toArray.groupBy(_.getPath.getParent) + } cachedPartitionSpec = null } + private def getRootPathsLeafDir(path: Path, child: Path): Path = { + if (rootPaths.contains(child)) { + path + } else if (rootPaths.contains(path)) { + path + } else { + getRootPathsLeafDir(path.getParent, path) + } + } + override def equals(other: Any): Boolean = other match { case hdfs: InMemoryFileIndex => rootPaths.toSet == hdfs.rootPaths.toSet case _ => false diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala index 12b570e818650..c60516717101d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala @@ -25,6 +25,7 @@ import com.google.common.util.concurrent.Striped import org.apache.hadoop.fs.Path import org.apache.spark.SparkException +import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.internal.Logging import org.apache.spark.sql.{AnalysisException, SparkSession} import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier} @@ -280,7 +281,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log LogicalRelation( DataSource( sparkSession = sparkSession, - paths = rootPath.toString :: Nil, + paths = getDirectoryPathSeq(rootPath), userSpecifiedSchema = Option(updatedTable.dataSchema), bucketSpec = hiveBucketSpec, // Do not interpret the 'path' option at all when tables are read using the Hive @@ -318,6 +319,18 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log result.copy(output = newOutput) } + private def getDirectoryPathSeq(rootPath: Path): Seq[String] = { + val enableSupportSubDirectories = + sparkSession.sessionState.conf.readPartitionWithSubdirectoryEnabled + + if (enableSupportSubDirectories) { + val fs = rootPath.getFileSystem(sparkSession.sessionState.newHadoopConf()) + SparkHadoopUtil.get.listLeafDirStatuses(fs, rootPath).map(_.getPath.toString) + } else { + rootPath.toString :: Nil + } + } + private def inferIfNeeded( relation: HiveTableRelation, options: Map[String, String], From 719c80f020194dd4d55cedf38cebb0fe89fea301 Mon Sep 17 00:00:00 2001 From: Catalin Toda Date: Mon, 4 Oct 2021 14:49:27 -0700 Subject: [PATCH 529/535] Spark Thrift Server Fixes by rmenon --- pom.xml | 6 +++--- .../org/apache/hive/service/cli/ColumnBasedSet.java | 13 +++++++++---- .../service/cli/operation/HiveCommandOperation.java | 5 ++++- .../service/cli/operation/OperationManager.java | 9 +++++---- .../hive/service/cli/operation/SQLOperation.java | 5 ++++- 5 files changed, 25 insertions(+), 13 deletions(-) diff --git a/pom.xml b/pom.xml index 8c1a2b25c0765..fcd607b9ecbfb 100644 --- a/pom.xml +++ b/pom.xml @@ -123,8 +123,8 @@ org.apache.hive core - 2.3.9 - 2.3.9 + 2.3.6.47 + 2.3.6.47 2.3 @@ -1987,7 +1987,7 @@ ${hive.group} hive-exec ${hive.classifier} - ${hive.version} + 2.3.6.47 ${hive.deps.scope} diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java index 4729fdf2e8e69..579a19ad68ef3 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java @@ -47,10 +47,15 @@ public class ColumnBasedSet implements RowSet { public static final Logger LOG = LoggerFactory.getLogger(ColumnBasedSet.class); public ColumnBasedSet(TableSchema schema) { - descriptors = schema.toTypeDescriptors(); - columns = new ArrayList(); - for (ColumnDescriptor colDesc : schema.getColumnDescriptors()) { - columns.add(new ColumnBuffer(colDesc.getType())); + if (schema == null) { + descriptors = new TypeDescriptor[0]; + columns = new ArrayList(); + } else { + descriptors = schema.toTypeDescriptors(); + columns = new ArrayList(); + for (ColumnDescriptor colDesc : schema.getColumnDescriptors()) { + columns.add(new ColumnBuffer(colDesc.getType())); + } } } diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java index 173256d4782f4..a972c20c3b653 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java @@ -50,6 +50,7 @@ public class HiveCommandOperation extends ExecuteStatementOperation { private CommandProcessor commandProcessor; private TableSchema resultSchema = null; + private int readRows = 0; /** * For processors other than Hive queries (Driver), they output to session.out (a temp file) @@ -156,10 +157,11 @@ public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws H } List rows = readResults((int) maxRows); RowSet rowSet = RowSetFactory.create(resultSchema, getProtocolVersion(), false); - + rowSet.setStartOffset(readRows); for (String row : rows) { rowSet.addRow(new String[] {row}); } + readRows += rows.size(); return rowSet; } @@ -210,5 +212,6 @@ private void resetResultReader() { ServiceUtils.cleanup(LOG, resultReader); resultReader = null; } + readRows = 0; } } diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java index 40daa1ff49367..008970d143de4 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java @@ -249,6 +249,10 @@ public RowSet getOperationNextRowSet(OperationHandle opHandle, public RowSet getOperationLogRowSet(OperationHandle opHandle, FetchOrientation orientation, long maxRows) throws HiveSQLException { + TableSchema tableSchema = new TableSchema(getLogSchema()); + RowSet rowSet = RowSetFactory.create(tableSchema, + getOperation(opHandle).getProtocolVersion(), false); + // get the OperationLog object from the operation OperationLog operationLog = getOperation(opHandle).getOperationLog(); if (operationLog == null) { @@ -257,17 +261,14 @@ public RowSet getOperationLogRowSet(OperationHandle opHandle, // read logs List logs; + rowSet.setStartOffset(operationLog.getStartPosition(isFetchFirst(orientation))); try { logs = operationLog.readOperationLog(isFetchFirst(orientation), maxRows); } catch (SQLException e) { throw new HiveSQLException(e.getMessage(), e.getCause()); } - // convert logs to RowSet - TableSchema tableSchema = new TableSchema(getLogSchema()); - RowSet rowSet = RowSetFactory.create(tableSchema, - getOperation(opHandle).getProtocolVersion(), false); for (String log : logs) { rowSet.addRow(new String[] {log}); } diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java index ce704b281df1f..a7523c22d2e29 100644 --- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java +++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java @@ -326,8 +326,11 @@ public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws H fetchStarted = true; driver.setMaxRows((int) maxRows); if (driver.getResults(convey)) { - return decode(convey, rowSet); + decode(convey, rowSet); } + long startRowOffset = driver.getStartRowOffset(); + rowSet.setStartOffset(startRowOffset); + driver.setStartRowOffset(startRowOffset + rowSet.numRows()); return rowSet; } catch (IOException e) { throw new HiveSQLException(e); From 949dfa083c87e8b94e4b80b6af712dd428376913 Mon Sep 17 00:00:00 2001 From: catalinii Date: Mon, 24 Jan 2022 14:52:44 -0800 Subject: [PATCH 530/535] Trunc function does not contain the Q string (for quarter) (#45) * Revert "[SPARK-17658][SQL] Disallow Users to Change Table Type" This reverts commit [SPARK-17657][SQL] Disallow Users to Change Table Type. * Trunc function does not contain the Q string (for quarter) * Add comment for disabling the external table check * Ignore tests instead of removing them --- .../sql/catalyst/expressions/datetimeExpressions.scala | 2 +- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 2 +- .../org/apache/spark/sql/hive/HiveExternalCatalog.scala | 6 +----- .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala | 4 ++-- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 17241f47e0339..000bb665a175e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -2718,7 +2718,7 @@ object DatePart { source: Expression): Expression = extractField.toUpperCase(Locale.ROOT) match { case "YEAR" | "Y" | "YEARS" | "YR" | "YRS" => Year(source) case "YEAROFWEEK" => YearOfWeek(source) - case "QUARTER" | "QTR" => Quarter(source) + case "QUARTER" | "QTR" | "Q" => Quarter(source) case "MONTH" | "MON" | "MONS" | "MONTHS" => Month(source) case "WEEK" | "W" | "WEEKS" => WeekOfYear(source) case "DAY" | "D" | "DAYS" => DayOfMonth(source) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index cc61491dc95d7..eed1f9246e0c2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -1004,7 +1004,7 @@ object DateTimeUtils { case "DAY" | "DD" => TRUNC_TO_DAY case "WEEK" => TRUNC_TO_WEEK case "MON" | "MONTH" | "MM" => TRUNC_TO_MONTH - case "QUARTER" => TRUNC_TO_QUARTER + case "QUARTER" | "QTR" | "Q" => TRUNC_TO_QUARTER case "YEAR" | "YYYY" | "YY" => TRUNC_TO_YEAR case _ => TRUNC_INVALID } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala index 15c587b357d82..d67308b2b5bb0 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala @@ -138,11 +138,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat s"as table property keys may not start with '$SPARK_SQL_PREFIX': " + invalidKeys.mkString("[", ", ", "]")) } - // External users are not allowed to set/switch the table type. In Hive metastore, the table - // type can be switched by changing the value of a case-sensitive table property `EXTERNAL`. - if (table.properties.contains("EXTERNAL")) { - throw new AnalysisException("Cannot set or change the preserved property key: 'EXTERNAL'") - } + // To keep the parity with Hive, the check for external table properties has been removed. } /** diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index c4cef44b6cc90..c2aaf1b99279e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -821,7 +821,7 @@ class HiveDDLSuite s"$tableName is a table. '$cmdName' expects a view. Please use ALTER TABLE instead.") } - test("create table - SET TBLPROPERTIES EXTERNAL to TRUE") { + ignore("create table - SET TBLPROPERTIES EXTERNAL to TRUE") { val tabName = "tab1" withTable(tabName) { assertAnalysisError( @@ -830,7 +830,7 @@ class HiveDDLSuite } } - test("alter table - SET TBLPROPERTIES EXTERNAL to TRUE") { + ignore("alter table - SET TBLPROPERTIES EXTERNAL to TRUE") { val tabName = "tab1" withTable(tabName) { val catalog = spark.sessionState.catalog From e90b3e0aa8dc57cc6fa3ad0983e2acebb4231dff Mon Sep 17 00:00:00 2001 From: Catalin Toda Date: Tue, 11 Oct 2022 17:01:59 -0700 Subject: [PATCH 531/535] Use IP address on the executor side instead of hostname and random port for blockmanager --- core/src/main/scala/org/apache/spark/SparkEnv.scala | 11 ++++++----- .../main/scala/org/apache/spark/util/Utils.scala | 13 ++++++++++++- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala index 19467e7eca12e..ff39e8710e414 100644 --- a/core/src/main/scala/org/apache/spark/SparkEnv.scala +++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala @@ -18,17 +18,14 @@ package org.apache.spark import java.io.File -import java.net.Socket +import java.net.{InetAddress, Socket} import java.util.Locale - import scala.collection.JavaConverters._ import scala.collection.concurrent import scala.collection.mutable import scala.util.Properties - import com.google.common.cache.CacheBuilder import org.apache.hadoop.conf.Configuration - import org.apache.spark.annotation.DeveloperApi import org.apache.spark.api.python.PythonWorkerFactory import org.apache.spark.broadcast.BroadcastManager @@ -207,11 +204,15 @@ object SparkEnv extends Logging { numCores: Int, ioEncryptionKey: Option[Array[Byte]], isLocal: Boolean): SparkEnv = { + var hostnameFinal = hostname + if (conf.getBoolean("spark.lyft.resolve", false)) { + hostnameFinal = InetAddress.getByName(hostname).getHostAddress + } val env = create( conf, executorId, bindAddress, - hostname, + hostnameFinal, None, isLocal, numCores, diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 24b3d2b6191e3..b772d10845eea 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -2365,9 +2365,16 @@ private[spark] object Utils extends Logging { * privileged ports. */ def userPort(base: Int, offset: Int): Int = { - (base + offset - 1024) % (65536 - 1024) + 1024 + (base + offset - 1024) % (65536 - 1024) + 1024 } + def randPort(base: Int, offset: Int, maxRand: Int): Int = { + val rand = new scala.util.Random + val r = rand.nextInt(maxRand) + (base + offset + r) % 65535 + } + + /** * Attempt to start a service on the given port, or fail after a number of attempts. * Each subsequent attempt uses 1 + the port used in the previous attempt (unless the port is 0). @@ -2394,6 +2401,10 @@ private[spark] object Utils extends Logging { // Do not increment port if startPort is 0, which is treated as a special port val tryPort = if (startPort == 0) { startPort + } else if (offset > 0 && conf.getInt("spark.lyft.maxrand", 0) > 0) + { + logInfo(s"Using randPort") + randPort(startPort, offset, conf.getInt("spark.lyft.maxrand", 0)) } else { userPort(startPort, offset) } From 1c1eb994c045902164de9421eeef6d9d6faf7a66 Mon Sep 17 00:00:00 2001 From: Catalin Toda Date: Tue, 25 Oct 2022 14:02:49 -0700 Subject: [PATCH 532/535] [SPARK-32838][SQL]Check DataSource insert command path with actual path --- .../sql/execution/DataSourceScanExec.scala | 2 +- .../datasources/DataSourceStrategy.scala | 6 -- .../InsertIntoHadoopFsRelationCommand.scala | 61 ++++++++++++++++++- 3 files changed, 61 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala index 40d29af28f908..a30fbfb7c0b6f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala @@ -263,7 +263,7 @@ case class FileSourceScanExec( // We can only determine the actual partitions at runtime when a dynamic partition filter is // present. This is because such a filter relies on information that is only available at run // time (for instance the keys used in the other side of a join). - @transient private lazy val dynamicallySelectedPartitions: Array[PartitionDirectory] = { + @transient lazy val dynamicallySelectedPartitions: Array[PartitionDirectory] = { val dynamicPartitionFilters = partitionFilters.filter(isDynamicPruningFilter) if (dynamicPartitionFilters.nonEmpty) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala index e35d09320760c..7aac7a7e170b2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala @@ -219,12 +219,6 @@ object DataSourceAnalysis extends Rule[LogicalPlan] { Some(t.location), actualQuery.output.map(_.name)) - // For dynamic partition overwrite, we do not delete partition directories ahead. - // We write to staging directories and move to final partition directories after writing - // job is done. So it is ok to have outputPath try to overwrite inputpath. - if (overwrite && !insertCommand.dynamicPartitionOverwrite) { - DDLUtils.verifyNotReadPath(actualQuery, outputPath) - } insertCommand } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala index d773d4bd271b3..123839730f369 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.execution.datasources +import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.internal.io.FileCommitProtocol @@ -28,7 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} -import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan} import org.apache.spark.sql.execution.command._ import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode import org.apache.spark.sql.util.SchemaUtils @@ -125,10 +126,12 @@ case class InsertIntoHadoopFsRelationCommand( // For dynamic partition overwrite, do not delete partition directories ahead. true } else { + checkWritePathReadFrom(child, hadoopConf, qualifiedOutputPath, customPartitionLocations) deleteMatchingPartitions(fs, qualifiedOutputPath, customPartitionLocations, committer) true } case (SaveMode.Overwrite, _) | (SaveMode.ErrorIfExists, false) => + checkWritePathReadFrom(child, hadoopConf, qualifiedOutputPath, customPartitionLocations) true case (SaveMode.Ignore, exists) => !exists @@ -271,6 +274,62 @@ case class InsertIntoHadoopFsRelationCommand( }.toMap } + private def isSubDir( + src: Path, + dest: Path, + hadoopConf: Configuration): Boolean = { + if (src == null) { + false + } else { + val srcFs = src.getFileSystem(hadoopConf) + val destFs = dest.getFileSystem(hadoopConf) + val fullSrc = srcFs.makeQualified(src).toString + Path.SEPARATOR + val fullDest = destFs.makeQualified(dest).toString + Path.SEPARATOR + val schemaSrcf = src.toUri.getScheme + val schemaDestf = dest.toUri.getScheme + if (schemaSrcf != null && schemaDestf != null && !(schemaSrcf == schemaDestf)) { + false + } else { + fullSrc.startsWith(fullDest) + } + } + } + + def checkWritePathReadFrom( + child: SparkPlan, + hadoopConf: Configuration, + qualifiedOutputPath: Path, + customPartitionLocations: Map[TablePartitionSpec, String]): Unit = { + // For dynamic partition overwrite, we do not delete partition directories ahead. + // We write to staging directories and move to final partition directories after writing + // job is done. So it is ok to have outputPath try to overwrite inputpath. + if (mode == SaveMode.Overwrite && !dynamicPartitionOverwrite) { + val inputPaths = child.collect { + case scan: FileSourceScanExec => + scan.dynamicallySelectedPartitions.flatMap(_.files.map { + case fileStatus if !fileStatus.isDirectory => fileStatus.getPath.getParent + case fileStatus => fileStatus.getPath + }) + }.flatten + val finalOutputPath = + if (staticPartitions.nonEmpty && partitionColumns.length == staticPartitions.size) { + val staticPathFragment = + PartitioningUtils.getPathFragment(staticPartitions, partitionColumns) + if (customPartitionLocations.contains(staticPartitions)) { + new Path(customPartitionLocations.getOrElse(staticPartitions, staticPathFragment)) + } else { + new Path(qualifiedOutputPath, staticPathFragment) + } + } else { + outputPath + } + if (inputPaths.exists(isSubDir(_, finalOutputPath, hadoopConf))) { + throw new AnalysisException( + s"Cannot overwrite a path that is also being read from.") + } + } + } + override protected def withNewChildInternal( newChild: LogicalPlan): InsertIntoHadoopFsRelationCommand = copy(query = newChild) } From 73f742dd5c44eb175b67d6ccb16cb9e469ffb48e Mon Sep 17 00:00:00 2001 From: Catalin Toda Date: Thu, 7 Oct 2021 12:42:29 -0700 Subject: [PATCH 533/535] Fix Long type in theschema and int32 parquet type --- .../parquet/ParquetVectorUpdaterFactory.java | 13 +++++-------- .../parquet/VectorizedPlainValuesReader.java | 10 ++++++++++ .../datasources/parquet/VectorizedReaderBase.java | 5 +++++ .../parquet/VectorizedRleValuesReader.java | 5 +++++ .../datasources/parquet/VectorizedValuesReader.java | 1 + 5 files changed, 26 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java index 53606f58dcfd2..0850c357d2e56 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java @@ -76,11 +76,8 @@ public ParquetVectorUpdater getUpdater(ColumnDescriptor descriptor, DataType spa case INT32: if (sparkType == DataTypes.IntegerType || canReadAsIntDecimal(descriptor, sparkType)) { return new IntegerUpdater(); - } else if (sparkType == DataTypes.LongType && isUnsignedIntTypeMatched(32)) { - // In `ParquetToSparkSchemaConverter`, we map parquet UINT32 to our LongType. - // For unsigned int32, it stores as plain signed int32 in Parquet when dictionary - // fallbacks. We read them as long values. - return new UnsignedIntegerUpdater(); + } else if (sparkType == DataTypes.LongType) { + return new LongIntegerUpdater(); } else if (sparkType == DataTypes.ByteType) { return new ByteUpdater(); } else if (sparkType == DataTypes.ShortType) { @@ -279,14 +276,14 @@ public void decodeSingleDictionaryId( } } - private static class UnsignedIntegerUpdater implements ParquetVectorUpdater { + private static class LongIntegerUpdater implements ParquetVectorUpdater { @Override public void readValues( int total, int offset, WritableColumnVector values, VectorizedValuesReader valuesReader) { - valuesReader.readUnsignedIntegers(total, values, offset); + valuesReader.readIntegersAsLongs(total, values, offset); } @Override @@ -299,7 +296,7 @@ public void readValue( int offset, WritableColumnVector values, VectorizedValuesReader valuesReader) { - values.putLong(offset, Integer.toUnsignedLong(valuesReader.readInteger())); + values.putLong(offset, valuesReader.readInteger()); } @Override diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java index fb40a131d2a6b..f309abbf618de 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedPlainValuesReader.java @@ -130,6 +130,16 @@ public void skipIntegers(int total) { in.skip(total * 4L); } + @Override + public final void readIntegersAsLongs(int total, WritableColumnVector c, int rowId) { + int requiredBytes = total * 4; + ByteBuffer buffer = getBuffer(requiredBytes); + for (int i = 0; i < total; i += 1) { + c.putLong(rowId + i, buffer.getInt()); + } + } + + @Override public final void readUnsignedIntegers(int total, WritableColumnVector c, int rowId) { int requiredBytes = total * 4; diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedReaderBase.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedReaderBase.java index b6715f1e7a07a..5d60124b6f62a 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedReaderBase.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedReaderBase.java @@ -66,6 +66,11 @@ public void readIntegers(int total, WritableColumnVector c, int rowId) { throw new UnsupportedOperationException(); } + @Override + public void readIntegersAsLongs(int total, WritableColumnVector c, int rowId) { + throw new UnsupportedOperationException(); + } + @Override public void readIntegersWithRebase(int total, WritableColumnVector c, int rowId, boolean failIfRebase) { diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java index 12fe5697954dc..6a4b87f9c55dc 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedRleValuesReader.java @@ -710,6 +710,11 @@ public void readIntegers(int total, WritableColumnVector c, int rowId) { } } + @Override + public void readIntegersAsLongs(int total, WritableColumnVector c, int rowId) { + throw new UnsupportedOperationException("only readInts is valid."); + } + @Override public void readUnsignedIntegers(int total, WritableColumnVector c, int rowId) { throw new UnsupportedOperationException("only readInts is valid."); diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedValuesReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedValuesReader.java index 4308614338499..06d199e0dd087 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedValuesReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedValuesReader.java @@ -45,6 +45,7 @@ public interface VectorizedValuesReader { void readShorts(int total, WritableColumnVector c, int rowId); void readIntegers(int total, WritableColumnVector c, int rowId); void readIntegersWithRebase(int total, WritableColumnVector c, int rowId, boolean failIfRebase); + void readIntegersAsLongs(int total, WritableColumnVector c, int rowId); void readUnsignedIntegers(int total, WritableColumnVector c, int rowId); void readUnsignedLongs(int total, WritableColumnVector c, int rowId); void readLongs(int total, WritableColumnVector c, int rowId); From fe62b94a5cdcda726edb7561b9edb1f2c2689fc1 Mon Sep 17 00:00:00 2001 From: Kyle Andelin <7277377+andelink@users.noreply.github.com> Date: Fri, 4 Nov 2022 12:26:53 -0400 Subject: [PATCH 534/535] Remove unnecessary logging statements --- core/src/main/scala/org/apache/spark/internal/Logging.scala | 5 ----- 1 file changed, 5 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/internal/Logging.scala b/core/src/main/scala/org/apache/spark/internal/Logging.scala index d483a93464c06..6f2a0c75038ab 100644 --- a/core/src/main/scala/org/apache/spark/internal/Logging.scala +++ b/core/src/main/scala/org/apache/spark/internal/Logging.scala @@ -161,11 +161,6 @@ trait Logging { } // Update the consoleAppender threshold to replLevel if (replLevel != rootLogger.getLevel()) { - if (!silent) { - System.err.printf("Setting default log level to \"%s\".\n", replLevel) - System.err.println("To adjust logging level use sc.setLogLevel(newLevel). " + - "For SparkR, use setLogLevel(newLevel).") - } Logging.sparkShellThresholdLevel = replLevel rootLogger.getAppenders().asScala.foreach { case (_, ca: ConsoleAppender) => From c93bba8b9d4823c0d891561e041eaec91be0c11b Mon Sep 17 00:00:00 2001 From: Catalin Toda Date: Fri, 11 Nov 2022 13:52:23 -0800 Subject: [PATCH 535/535] Revert [SPARK-39865][SQL][3.3] Show proper error messages on the overflow commit - 19991047d5b5316412d8b1763807c5945a705bff --- .../spark/sql/catalyst/analysis/TableOutputResolver.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala index c723a018a6c53..e4bdb2903e356 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala @@ -231,8 +231,7 @@ object TableOutputResolver { } private def canCauseCastOverflow(cast: AnsiCast): Boolean = { - containsIntegralOrDecimalType(cast.dataType) && - !Cast.canUpCast(cast.child.dataType, cast.dataType) + false } private def checkField(